{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 49376, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.101101749837978e-05, "grad_norm": 0.02385132946074009, "learning_rate": 4.0502227622519245e-08, "loss": 1.0388, "step": 1 }, { "epoch": 0.00016202203499675956, "grad_norm": 0.022809118032455444, "learning_rate": 8.100445524503849e-08, "loss": 1.169, "step": 2 }, { "epoch": 0.00024303305249513934, "grad_norm": 0.021291494369506836, "learning_rate": 1.215066828675577e-07, "loss": 1.1149, "step": 3 }, { "epoch": 0.0003240440699935191, "grad_norm": 0.020570585504174232, "learning_rate": 1.6200891049007698e-07, "loss": 0.9499, "step": 4 }, { "epoch": 0.0004050550874918989, "grad_norm": 0.023432889953255653, "learning_rate": 2.025111381125962e-07, "loss": 1.0296, "step": 5 }, { "epoch": 0.0004860661049902787, "grad_norm": 0.02114630490541458, "learning_rate": 2.430133657351154e-07, "loss": 1.1114, "step": 6 }, { "epoch": 0.0005670771224886585, "grad_norm": 0.02415238879621029, "learning_rate": 2.8351559335763466e-07, "loss": 1.001, "step": 7 }, { "epoch": 0.0006480881399870382, "grad_norm": 0.021928859874606133, "learning_rate": 3.2401782098015396e-07, "loss": 0.9555, "step": 8 }, { "epoch": 0.000729099157485418, "grad_norm": 0.022112445905804634, "learning_rate": 3.6452004860267315e-07, "loss": 1.0955, "step": 9 }, { "epoch": 0.0008101101749837978, "grad_norm": 0.022664247080683708, "learning_rate": 4.050222762251924e-07, "loss": 1.1065, "step": 10 }, { "epoch": 0.0008911211924821776, "grad_norm": 0.021883847191929817, "learning_rate": 4.455245038477117e-07, "loss": 1.1031, "step": 11 }, { "epoch": 0.0009721322099805574, "grad_norm": 0.024452999234199524, "learning_rate": 4.860267314702308e-07, "loss": 1.1181, "step": 12 }, { "epoch": 0.001053143227478937, "grad_norm": 0.023982515558600426, "learning_rate": 5.265289590927501e-07, "loss": 1.0201, "step": 13 }, { "epoch": 0.001134154244977317, "grad_norm": 0.020815137773752213, "learning_rate": 5.670311867152693e-07, "loss": 1.0554, "step": 14 }, { "epoch": 0.0012151652624756966, "grad_norm": 0.023049861192703247, "learning_rate": 6.075334143377886e-07, "loss": 1.0279, "step": 15 }, { "epoch": 0.0012961762799740765, "grad_norm": 0.025142598897218704, "learning_rate": 6.480356419603079e-07, "loss": 1.0198, "step": 16 }, { "epoch": 0.0013771872974724562, "grad_norm": 0.02580203115940094, "learning_rate": 6.885378695828271e-07, "loss": 1.0425, "step": 17 }, { "epoch": 0.001458198314970836, "grad_norm": 0.02373248152434826, "learning_rate": 7.290400972053463e-07, "loss": 0.9958, "step": 18 }, { "epoch": 0.0015392093324692157, "grad_norm": 0.025345822796225548, "learning_rate": 7.695423248278656e-07, "loss": 1.0466, "step": 19 }, { "epoch": 0.0016202203499675956, "grad_norm": 0.024625582620501518, "learning_rate": 8.100445524503848e-07, "loss": 1.0204, "step": 20 }, { "epoch": 0.0017012313674659753, "grad_norm": 0.023450065404176712, "learning_rate": 8.50546780072904e-07, "loss": 1.1077, "step": 21 }, { "epoch": 0.0017822423849643552, "grad_norm": 0.01932292804121971, "learning_rate": 8.910490076954234e-07, "loss": 1.0104, "step": 22 }, { "epoch": 0.0018632534024627348, "grad_norm": 0.020872652530670166, "learning_rate": 9.315512353179425e-07, "loss": 1.1566, "step": 23 }, { "epoch": 0.0019442644199611147, "grad_norm": 0.020221302285790443, "learning_rate": 9.720534629404617e-07, "loss": 1.0174, "step": 24 }, { "epoch": 0.0020252754374594944, "grad_norm": 0.02044861391186714, "learning_rate": 1.0125556905629811e-06, "loss": 1.0797, "step": 25 }, { "epoch": 0.002106286454957874, "grad_norm": 0.021288206800818443, "learning_rate": 1.0530579181855002e-06, "loss": 1.0198, "step": 26 }, { "epoch": 0.002187297472456254, "grad_norm": 0.02114301174879074, "learning_rate": 1.0935601458080196e-06, "loss": 1.1145, "step": 27 }, { "epoch": 0.002268308489954634, "grad_norm": 0.024435359984636307, "learning_rate": 1.1340623734305387e-06, "loss": 1.0083, "step": 28 }, { "epoch": 0.0023493195074530135, "grad_norm": 0.023775780573487282, "learning_rate": 1.174564601053058e-06, "loss": 1.0904, "step": 29 }, { "epoch": 0.002430330524951393, "grad_norm": 0.022898796945810318, "learning_rate": 1.2150668286755771e-06, "loss": 1.1089, "step": 30 }, { "epoch": 0.0025113415424497733, "grad_norm": 0.023763054981827736, "learning_rate": 1.2555690562980964e-06, "loss": 1.0393, "step": 31 }, { "epoch": 0.002592352559948153, "grad_norm": 0.023700254037976265, "learning_rate": 1.2960712839206158e-06, "loss": 0.9731, "step": 32 }, { "epoch": 0.0026733635774465326, "grad_norm": 0.02282639406621456, "learning_rate": 1.3365735115431349e-06, "loss": 1.1776, "step": 33 }, { "epoch": 0.0027543745949449123, "grad_norm": 0.021881787106394768, "learning_rate": 1.3770757391656541e-06, "loss": 1.0377, "step": 34 }, { "epoch": 0.0028353856124432924, "grad_norm": 0.02300041727721691, "learning_rate": 1.4175779667881734e-06, "loss": 1.0286, "step": 35 }, { "epoch": 0.002916396629941672, "grad_norm": 0.025359181687235832, "learning_rate": 1.4580801944106926e-06, "loss": 1.0717, "step": 36 }, { "epoch": 0.0029974076474400518, "grad_norm": 0.020881187170743942, "learning_rate": 1.4985824220332119e-06, "loss": 0.9762, "step": 37 }, { "epoch": 0.0030784186649384314, "grad_norm": 0.02200203947722912, "learning_rate": 1.539084649655731e-06, "loss": 1.0024, "step": 38 }, { "epoch": 0.0031594296824368115, "grad_norm": 0.025778813287615776, "learning_rate": 1.5795868772782503e-06, "loss": 1.1089, "step": 39 }, { "epoch": 0.0032404406999351912, "grad_norm": 0.022604526951909065, "learning_rate": 1.6200891049007696e-06, "loss": 1.0507, "step": 40 }, { "epoch": 0.003321451717433571, "grad_norm": 0.028379792347550392, "learning_rate": 1.660591332523289e-06, "loss": 1.1776, "step": 41 }, { "epoch": 0.0034024627349319506, "grad_norm": 0.02317628636956215, "learning_rate": 1.701093560145808e-06, "loss": 1.1075, "step": 42 }, { "epoch": 0.0034834737524303307, "grad_norm": 0.02414836548268795, "learning_rate": 1.7415957877683273e-06, "loss": 1.1208, "step": 43 }, { "epoch": 0.0035644847699287103, "grad_norm": 0.020418209955096245, "learning_rate": 1.7820980153908468e-06, "loss": 0.9636, "step": 44 }, { "epoch": 0.00364549578742709, "grad_norm": 0.020706145092844963, "learning_rate": 1.8226002430133656e-06, "loss": 1.0209, "step": 45 }, { "epoch": 0.0037265068049254697, "grad_norm": 0.022698137909173965, "learning_rate": 1.863102470635885e-06, "loss": 0.9973, "step": 46 }, { "epoch": 0.00380751782242385, "grad_norm": 0.030558334663510323, "learning_rate": 1.9036046982584043e-06, "loss": 0.978, "step": 47 }, { "epoch": 0.0038885288399222295, "grad_norm": 0.024986794218420982, "learning_rate": 1.9441069258809233e-06, "loss": 1.0603, "step": 48 }, { "epoch": 0.0039695398574206096, "grad_norm": 0.024735961109399796, "learning_rate": 1.9846091535034426e-06, "loss": 1.0594, "step": 49 }, { "epoch": 0.004050550874918989, "grad_norm": 0.02292553521692753, "learning_rate": 2.0251113811259623e-06, "loss": 1.0657, "step": 50 }, { "epoch": 0.004131561892417369, "grad_norm": 0.027018576860427856, "learning_rate": 2.0656136087484815e-06, "loss": 1.0674, "step": 51 }, { "epoch": 0.004212572909915748, "grad_norm": 0.0235233623534441, "learning_rate": 2.1061158363710003e-06, "loss": 1.0473, "step": 52 }, { "epoch": 0.004293583927414128, "grad_norm": 0.022738710045814514, "learning_rate": 2.1466180639935196e-06, "loss": 1.0892, "step": 53 }, { "epoch": 0.004374594944912508, "grad_norm": 0.022287271916866302, "learning_rate": 2.1871202916160392e-06, "loss": 0.9848, "step": 54 }, { "epoch": 0.004455605962410888, "grad_norm": 0.025856684893369675, "learning_rate": 2.227622519238558e-06, "loss": 1.0407, "step": 55 }, { "epoch": 0.004536616979909268, "grad_norm": 0.0249209925532341, "learning_rate": 2.2681247468610773e-06, "loss": 1.1791, "step": 56 }, { "epoch": 0.004617627997407648, "grad_norm": 0.028566813096404076, "learning_rate": 2.308626974483597e-06, "loss": 1.1377, "step": 57 }, { "epoch": 0.004698639014906027, "grad_norm": 0.0300540030002594, "learning_rate": 2.349129202106116e-06, "loss": 1.0835, "step": 58 }, { "epoch": 0.004779650032404407, "grad_norm": 0.020815081894397736, "learning_rate": 2.389631429728635e-06, "loss": 1.1043, "step": 59 }, { "epoch": 0.004860661049902786, "grad_norm": 0.023698071017861366, "learning_rate": 2.4301336573511543e-06, "loss": 1.0009, "step": 60 }, { "epoch": 0.0049416720674011665, "grad_norm": 0.023796427994966507, "learning_rate": 2.470635884973674e-06, "loss": 1.0231, "step": 61 }, { "epoch": 0.005022683084899547, "grad_norm": 0.02444332465529442, "learning_rate": 2.5111381125961928e-06, "loss": 1.0623, "step": 62 }, { "epoch": 0.005103694102397926, "grad_norm": 0.024619588628411293, "learning_rate": 2.551640340218712e-06, "loss": 1.0212, "step": 63 }, { "epoch": 0.005184705119896306, "grad_norm": 0.0225670225918293, "learning_rate": 2.5921425678412317e-06, "loss": 0.9564, "step": 64 }, { "epoch": 0.005265716137394686, "grad_norm": 0.02799377217888832, "learning_rate": 2.6326447954637505e-06, "loss": 1.0529, "step": 65 }, { "epoch": 0.005346727154893065, "grad_norm": 0.02838836796581745, "learning_rate": 2.6731470230862698e-06, "loss": 1.0972, "step": 66 }, { "epoch": 0.005427738172391445, "grad_norm": 0.027125639840960503, "learning_rate": 2.713649250708789e-06, "loss": 0.9969, "step": 67 }, { "epoch": 0.005508749189889825, "grad_norm": 0.027286173775792122, "learning_rate": 2.7541514783313082e-06, "loss": 1.0246, "step": 68 }, { "epoch": 0.005589760207388205, "grad_norm": 0.029833434149622917, "learning_rate": 2.7946537059538275e-06, "loss": 1.1968, "step": 69 }, { "epoch": 0.005670771224886585, "grad_norm": 0.025817332789301872, "learning_rate": 2.8351559335763467e-06, "loss": 1.175, "step": 70 }, { "epoch": 0.005751782242384964, "grad_norm": 0.03145721182227135, "learning_rate": 2.8756581611988664e-06, "loss": 1.0967, "step": 71 }, { "epoch": 0.005832793259883344, "grad_norm": 0.03033231757581234, "learning_rate": 2.9161603888213852e-06, "loss": 1.0946, "step": 72 }, { "epoch": 0.005913804277381724, "grad_norm": 0.02514854073524475, "learning_rate": 2.9566626164439045e-06, "loss": 1.1251, "step": 73 }, { "epoch": 0.0059948152948801035, "grad_norm": 0.02949725091457367, "learning_rate": 2.9971648440664237e-06, "loss": 0.9468, "step": 74 }, { "epoch": 0.006075826312378484, "grad_norm": 0.02892262302339077, "learning_rate": 3.037667071688943e-06, "loss": 1.0442, "step": 75 }, { "epoch": 0.006156837329876863, "grad_norm": 0.026688704267144203, "learning_rate": 3.078169299311462e-06, "loss": 0.934, "step": 76 }, { "epoch": 0.006237848347375243, "grad_norm": 0.024869117885828018, "learning_rate": 3.1186715269339815e-06, "loss": 0.9493, "step": 77 }, { "epoch": 0.006318859364873623, "grad_norm": 0.026760468259453773, "learning_rate": 3.1591737545565007e-06, "loss": 1.0778, "step": 78 }, { "epoch": 0.006399870382372002, "grad_norm": 0.02856779471039772, "learning_rate": 3.19967598217902e-06, "loss": 1.0456, "step": 79 }, { "epoch": 0.0064808813998703824, "grad_norm": 0.023248232901096344, "learning_rate": 3.240178209801539e-06, "loss": 0.9863, "step": 80 }, { "epoch": 0.0065618924173687625, "grad_norm": 0.029149865731596947, "learning_rate": 3.280680437424059e-06, "loss": 1.0313, "step": 81 }, { "epoch": 0.006642903434867142, "grad_norm": 0.02813745103776455, "learning_rate": 3.321182665046578e-06, "loss": 1.0753, "step": 82 }, { "epoch": 0.006723914452365522, "grad_norm": 0.027179231867194176, "learning_rate": 3.3616848926690965e-06, "loss": 1.0573, "step": 83 }, { "epoch": 0.006804925469863901, "grad_norm": 0.027503957971930504, "learning_rate": 3.402187120291616e-06, "loss": 0.9619, "step": 84 }, { "epoch": 0.006885936487362281, "grad_norm": 0.027507808059453964, "learning_rate": 3.4426893479141354e-06, "loss": 0.965, "step": 85 }, { "epoch": 0.006966947504860661, "grad_norm": 0.03333299234509468, "learning_rate": 3.4831915755366547e-06, "loss": 0.9754, "step": 86 }, { "epoch": 0.007047958522359041, "grad_norm": 0.02706284075975418, "learning_rate": 3.523693803159174e-06, "loss": 1.0152, "step": 87 }, { "epoch": 0.007128969539857421, "grad_norm": 0.024948805570602417, "learning_rate": 3.5641960307816936e-06, "loss": 0.9234, "step": 88 }, { "epoch": 0.007209980557355801, "grad_norm": 0.0278322733938694, "learning_rate": 3.604698258404213e-06, "loss": 1.2218, "step": 89 }, { "epoch": 0.00729099157485418, "grad_norm": 0.02799849770963192, "learning_rate": 3.6452004860267312e-06, "loss": 1.1309, "step": 90 }, { "epoch": 0.00737200259235256, "grad_norm": 0.030282633379101753, "learning_rate": 3.685702713649251e-06, "loss": 1.0145, "step": 91 }, { "epoch": 0.007453013609850939, "grad_norm": 0.02803598903119564, "learning_rate": 3.72620494127177e-06, "loss": 1.059, "step": 92 }, { "epoch": 0.0075340246273493195, "grad_norm": 0.031542301177978516, "learning_rate": 3.7667071688942894e-06, "loss": 1.0055, "step": 93 }, { "epoch": 0.0076150356448477, "grad_norm": 0.03627784922719002, "learning_rate": 3.8072093965168086e-06, "loss": 0.9798, "step": 94 }, { "epoch": 0.007696046662346079, "grad_norm": 0.029904387891292572, "learning_rate": 3.847711624139328e-06, "loss": 1.1077, "step": 95 }, { "epoch": 0.007777057679844459, "grad_norm": 0.03554466366767883, "learning_rate": 3.888213851761847e-06, "loss": 1.0548, "step": 96 }, { "epoch": 0.007858068697342839, "grad_norm": 0.029148144647479057, "learning_rate": 3.928716079384366e-06, "loss": 0.9938, "step": 97 }, { "epoch": 0.007939079714841219, "grad_norm": 0.03140278160572052, "learning_rate": 3.969218307006885e-06, "loss": 1.0229, "step": 98 }, { "epoch": 0.008020090732339598, "grad_norm": 0.031796544790267944, "learning_rate": 4.009720534629405e-06, "loss": 1.0887, "step": 99 }, { "epoch": 0.008101101749837978, "grad_norm": 0.030875643715262413, "learning_rate": 4.0502227622519245e-06, "loss": 1.1028, "step": 100 }, { "epoch": 0.008182112767336358, "grad_norm": 0.03191255405545235, "learning_rate": 4.090724989874443e-06, "loss": 1.0444, "step": 101 }, { "epoch": 0.008263123784834738, "grad_norm": 0.035354603081941605, "learning_rate": 4.131227217496963e-06, "loss": 1.053, "step": 102 }, { "epoch": 0.008344134802333118, "grad_norm": 0.034191109240055084, "learning_rate": 4.171729445119482e-06, "loss": 1.0567, "step": 103 }, { "epoch": 0.008425145819831496, "grad_norm": 0.028501426801085472, "learning_rate": 4.212231672742001e-06, "loss": 1.0091, "step": 104 }, { "epoch": 0.008506156837329876, "grad_norm": 0.03529810532927513, "learning_rate": 4.25273390036452e-06, "loss": 1.117, "step": 105 }, { "epoch": 0.008587167854828257, "grad_norm": 0.038472309708595276, "learning_rate": 4.293236127987039e-06, "loss": 1.0489, "step": 106 }, { "epoch": 0.008668178872326637, "grad_norm": 0.034638624638319016, "learning_rate": 4.333738355609559e-06, "loss": 1.0371, "step": 107 }, { "epoch": 0.008749189889825017, "grad_norm": 0.03382338955998421, "learning_rate": 4.3742405832320785e-06, "loss": 1.0435, "step": 108 }, { "epoch": 0.008830200907323397, "grad_norm": 0.031995080411434174, "learning_rate": 4.414742810854597e-06, "loss": 0.9459, "step": 109 }, { "epoch": 0.008911211924821775, "grad_norm": 0.03637147694826126, "learning_rate": 4.455245038477116e-06, "loss": 0.995, "step": 110 }, { "epoch": 0.008992222942320155, "grad_norm": 0.03417530655860901, "learning_rate": 4.495747266099636e-06, "loss": 1.0156, "step": 111 }, { "epoch": 0.009073233959818535, "grad_norm": 0.03398241102695465, "learning_rate": 4.536249493722155e-06, "loss": 1.1755, "step": 112 }, { "epoch": 0.009154244977316916, "grad_norm": 0.039991866797208786, "learning_rate": 4.576751721344674e-06, "loss": 1.0777, "step": 113 }, { "epoch": 0.009235255994815296, "grad_norm": 0.03602704033255577, "learning_rate": 4.617253948967194e-06, "loss": 0.9658, "step": 114 }, { "epoch": 0.009316267012313674, "grad_norm": 0.040713947266340256, "learning_rate": 4.657756176589713e-06, "loss": 1.1282, "step": 115 }, { "epoch": 0.009397278029812054, "grad_norm": 0.036449410021305084, "learning_rate": 4.698258404212232e-06, "loss": 1.0613, "step": 116 }, { "epoch": 0.009478289047310434, "grad_norm": 0.03143557533621788, "learning_rate": 4.738760631834751e-06, "loss": 1.0661, "step": 117 }, { "epoch": 0.009559300064808814, "grad_norm": 0.03461870178580284, "learning_rate": 4.77926285945727e-06, "loss": 1.0569, "step": 118 }, { "epoch": 0.009640311082307194, "grad_norm": 0.03391311317682266, "learning_rate": 4.81976508707979e-06, "loss": 0.9362, "step": 119 }, { "epoch": 0.009721322099805573, "grad_norm": 0.03787930682301521, "learning_rate": 4.8602673147023086e-06, "loss": 1.0893, "step": 120 }, { "epoch": 0.009802333117303953, "grad_norm": 0.03874959051609039, "learning_rate": 4.900769542324828e-06, "loss": 0.9632, "step": 121 }, { "epoch": 0.009883344134802333, "grad_norm": 0.03478735685348511, "learning_rate": 4.941271769947348e-06, "loss": 0.9535, "step": 122 }, { "epoch": 0.009964355152300713, "grad_norm": 0.039458271116018295, "learning_rate": 4.981773997569866e-06, "loss": 1.1773, "step": 123 }, { "epoch": 0.010045366169799093, "grad_norm": 0.03747010603547096, "learning_rate": 5.0222762251923855e-06, "loss": 1.0859, "step": 124 }, { "epoch": 0.010126377187297473, "grad_norm": 0.047416865825653076, "learning_rate": 5.062778452814905e-06, "loss": 1.0857, "step": 125 }, { "epoch": 0.010207388204795852, "grad_norm": 0.03998400643467903, "learning_rate": 5.103280680437424e-06, "loss": 0.9918, "step": 126 }, { "epoch": 0.010288399222294232, "grad_norm": 0.03821217268705368, "learning_rate": 5.143782908059944e-06, "loss": 0.9687, "step": 127 }, { "epoch": 0.010369410239792612, "grad_norm": 0.03778871148824692, "learning_rate": 5.184285135682463e-06, "loss": 0.9259, "step": 128 }, { "epoch": 0.010450421257290992, "grad_norm": 0.043181974440813065, "learning_rate": 5.224787363304982e-06, "loss": 1.0013, "step": 129 }, { "epoch": 0.010531432274789372, "grad_norm": 0.03853442519903183, "learning_rate": 5.265289590927501e-06, "loss": 1.0027, "step": 130 }, { "epoch": 0.01061244329228775, "grad_norm": 0.03264236822724342, "learning_rate": 5.305791818550021e-06, "loss": 1.0482, "step": 131 }, { "epoch": 0.01069345430978613, "grad_norm": 0.03932544216513634, "learning_rate": 5.3462940461725395e-06, "loss": 1.2041, "step": 132 }, { "epoch": 0.01077446532728451, "grad_norm": 0.03828361630439758, "learning_rate": 5.386796273795059e-06, "loss": 0.9825, "step": 133 }, { "epoch": 0.01085547634478289, "grad_norm": 0.04001957178115845, "learning_rate": 5.427298501417578e-06, "loss": 1.0606, "step": 134 }, { "epoch": 0.010936487362281271, "grad_norm": 0.040740400552749634, "learning_rate": 5.467800729040098e-06, "loss": 1.0208, "step": 135 }, { "epoch": 0.01101749837977965, "grad_norm": 0.03935074433684349, "learning_rate": 5.5083029566626165e-06, "loss": 1.0871, "step": 136 }, { "epoch": 0.01109850939727803, "grad_norm": 0.0420953705906868, "learning_rate": 5.548805184285136e-06, "loss": 1.0619, "step": 137 }, { "epoch": 0.01117952041477641, "grad_norm": 0.033982839435338974, "learning_rate": 5.589307411907655e-06, "loss": 1.024, "step": 138 }, { "epoch": 0.01126053143227479, "grad_norm": 0.03850257024168968, "learning_rate": 5.629809639530175e-06, "loss": 1.0603, "step": 139 }, { "epoch": 0.01134154244977317, "grad_norm": 0.040236424654722214, "learning_rate": 5.6703118671526935e-06, "loss": 1.0393, "step": 140 }, { "epoch": 0.011422553467271548, "grad_norm": 0.04402274265885353, "learning_rate": 5.710814094775213e-06, "loss": 1.0713, "step": 141 }, { "epoch": 0.011503564484769928, "grad_norm": 0.04175622761249542, "learning_rate": 5.751316322397733e-06, "loss": 1.0241, "step": 142 }, { "epoch": 0.011584575502268308, "grad_norm": 0.03973923996090889, "learning_rate": 5.791818550020251e-06, "loss": 1.0643, "step": 143 }, { "epoch": 0.011665586519766688, "grad_norm": 0.04083230346441269, "learning_rate": 5.8323207776427705e-06, "loss": 0.963, "step": 144 }, { "epoch": 0.011746597537265068, "grad_norm": 0.03818622976541519, "learning_rate": 5.87282300526529e-06, "loss": 1.0514, "step": 145 }, { "epoch": 0.011827608554763449, "grad_norm": 0.03518862649798393, "learning_rate": 5.913325232887809e-06, "loss": 1.0711, "step": 146 }, { "epoch": 0.011908619572261827, "grad_norm": 0.040990497916936874, "learning_rate": 5.953827460510329e-06, "loss": 0.9552, "step": 147 }, { "epoch": 0.011989630589760207, "grad_norm": 0.037080589681863785, "learning_rate": 5.9943296881328474e-06, "loss": 1.0958, "step": 148 }, { "epoch": 0.012070641607258587, "grad_norm": 0.043971672654151917, "learning_rate": 6.034831915755366e-06, "loss": 1.0015, "step": 149 }, { "epoch": 0.012151652624756967, "grad_norm": 0.0402100533246994, "learning_rate": 6.075334143377886e-06, "loss": 0.9715, "step": 150 }, { "epoch": 0.012232663642255347, "grad_norm": 0.03516768664121628, "learning_rate": 6.115836371000406e-06, "loss": 0.9676, "step": 151 }, { "epoch": 0.012313674659753726, "grad_norm": 0.03923693671822548, "learning_rate": 6.156338598622924e-06, "loss": 1.0169, "step": 152 }, { "epoch": 0.012394685677252106, "grad_norm": 0.044282715767621994, "learning_rate": 6.196840826245444e-06, "loss": 1.106, "step": 153 }, { "epoch": 0.012475696694750486, "grad_norm": 0.03654364496469498, "learning_rate": 6.237343053867963e-06, "loss": 1.0134, "step": 154 }, { "epoch": 0.012556707712248866, "grad_norm": 0.039169635623693466, "learning_rate": 6.2778452814904826e-06, "loss": 0.9492, "step": 155 }, { "epoch": 0.012637718729747246, "grad_norm": 0.03776717185974121, "learning_rate": 6.318347509113001e-06, "loss": 1.0097, "step": 156 }, { "epoch": 0.012718729747245625, "grad_norm": 0.03706394135951996, "learning_rate": 6.358849736735521e-06, "loss": 0.9956, "step": 157 }, { "epoch": 0.012799740764744005, "grad_norm": 0.03906858712434769, "learning_rate": 6.39935196435804e-06, "loss": 1.1265, "step": 158 }, { "epoch": 0.012880751782242385, "grad_norm": 0.0432046577334404, "learning_rate": 6.439854191980559e-06, "loss": 1.0441, "step": 159 }, { "epoch": 0.012961762799740765, "grad_norm": 0.04111461341381073, "learning_rate": 6.480356419603078e-06, "loss": 1.1336, "step": 160 }, { "epoch": 0.013042773817239145, "grad_norm": 0.04539615660905838, "learning_rate": 6.520858647225597e-06, "loss": 1.0992, "step": 161 }, { "epoch": 0.013123784834737525, "grad_norm": 0.03695986419916153, "learning_rate": 6.561360874848118e-06, "loss": 1.0151, "step": 162 }, { "epoch": 0.013204795852235903, "grad_norm": 0.038577646017074585, "learning_rate": 6.601863102470636e-06, "loss": 0.9178, "step": 163 }, { "epoch": 0.013285806869734284, "grad_norm": 0.039863359183073044, "learning_rate": 6.642365330093156e-06, "loss": 1.0743, "step": 164 }, { "epoch": 0.013366817887232664, "grad_norm": 0.04418211802840233, "learning_rate": 6.682867557715675e-06, "loss": 1.0468, "step": 165 }, { "epoch": 0.013447828904731044, "grad_norm": 0.040715545415878296, "learning_rate": 6.723369785338193e-06, "loss": 0.9407, "step": 166 }, { "epoch": 0.013528839922229424, "grad_norm": 0.03955593705177307, "learning_rate": 6.7638720129607135e-06, "loss": 0.9896, "step": 167 }, { "epoch": 0.013609850939727802, "grad_norm": 0.04149999842047691, "learning_rate": 6.804374240583232e-06, "loss": 1.1123, "step": 168 }, { "epoch": 0.013690861957226182, "grad_norm": 0.03756433352828026, "learning_rate": 6.844876468205752e-06, "loss": 0.9447, "step": 169 }, { "epoch": 0.013771872974724562, "grad_norm": 0.04297390207648277, "learning_rate": 6.885378695828271e-06, "loss": 0.9698, "step": 170 }, { "epoch": 0.013852883992222943, "grad_norm": 0.044178616255521774, "learning_rate": 6.9258809234507905e-06, "loss": 1.0236, "step": 171 }, { "epoch": 0.013933895009721323, "grad_norm": 0.04240819066762924, "learning_rate": 6.966383151073309e-06, "loss": 0.9572, "step": 172 }, { "epoch": 0.014014906027219701, "grad_norm": 0.03693396970629692, "learning_rate": 7.006885378695828e-06, "loss": 0.9087, "step": 173 }, { "epoch": 0.014095917044718081, "grad_norm": 0.03896639496088028, "learning_rate": 7.047387606318348e-06, "loss": 0.9924, "step": 174 }, { "epoch": 0.014176928062216461, "grad_norm": 0.03346260264515877, "learning_rate": 7.087889833940867e-06, "loss": 0.9482, "step": 175 }, { "epoch": 0.014257939079714841, "grad_norm": 0.0437021479010582, "learning_rate": 7.128392061563387e-06, "loss": 1.0355, "step": 176 }, { "epoch": 0.014338950097213221, "grad_norm": 0.05387115105986595, "learning_rate": 7.168894289185905e-06, "loss": 1.0333, "step": 177 }, { "epoch": 0.014419961114711602, "grad_norm": 0.043737273663282394, "learning_rate": 7.209396516808426e-06, "loss": 0.969, "step": 178 }, { "epoch": 0.01450097213220998, "grad_norm": 0.043181467801332474, "learning_rate": 7.2498987444309445e-06, "loss": 0.9959, "step": 179 }, { "epoch": 0.01458198314970836, "grad_norm": 0.03788682073354721, "learning_rate": 7.2904009720534624e-06, "loss": 1.0632, "step": 180 }, { "epoch": 0.01466299416720674, "grad_norm": 0.049245551228523254, "learning_rate": 7.330903199675983e-06, "loss": 1.1571, "step": 181 }, { "epoch": 0.01474400518470512, "grad_norm": 0.043730925768613815, "learning_rate": 7.371405427298502e-06, "loss": 1.0468, "step": 182 }, { "epoch": 0.0148250162022035, "grad_norm": 0.041232235729694366, "learning_rate": 7.4119076549210214e-06, "loss": 1.0773, "step": 183 }, { "epoch": 0.014906027219701879, "grad_norm": 0.03509649634361267, "learning_rate": 7.45240988254354e-06, "loss": 0.9525, "step": 184 }, { "epoch": 0.014987038237200259, "grad_norm": 0.033452972769737244, "learning_rate": 7.49291211016606e-06, "loss": 0.9441, "step": 185 }, { "epoch": 0.015068049254698639, "grad_norm": 0.044284090399742126, "learning_rate": 7.533414337788579e-06, "loss": 0.9824, "step": 186 }, { "epoch": 0.015149060272197019, "grad_norm": 0.0432291179895401, "learning_rate": 7.5739165654110976e-06, "loss": 1.0314, "step": 187 }, { "epoch": 0.0152300712896954, "grad_norm": 0.03911181911826134, "learning_rate": 7.614418793033617e-06, "loss": 0.9796, "step": 188 }, { "epoch": 0.015311082307193778, "grad_norm": 0.04108152911067009, "learning_rate": 7.654921020656136e-06, "loss": 0.961, "step": 189 }, { "epoch": 0.015392093324692158, "grad_norm": 0.04519292339682579, "learning_rate": 7.695423248278656e-06, "loss": 1.1185, "step": 190 }, { "epoch": 0.015473104342190538, "grad_norm": 0.04229548200964928, "learning_rate": 7.735925475901175e-06, "loss": 1.0708, "step": 191 }, { "epoch": 0.015554115359688918, "grad_norm": 0.038502808660268784, "learning_rate": 7.776427703523693e-06, "loss": 0.9103, "step": 192 }, { "epoch": 0.015635126377187298, "grad_norm": 0.03859207034111023, "learning_rate": 7.816929931146213e-06, "loss": 0.9856, "step": 193 }, { "epoch": 0.015716137394685678, "grad_norm": 0.03727702423930168, "learning_rate": 7.857432158768733e-06, "loss": 0.8671, "step": 194 }, { "epoch": 0.015797148412184058, "grad_norm": 0.03756839036941528, "learning_rate": 7.897934386391252e-06, "loss": 1.0028, "step": 195 }, { "epoch": 0.015878159429682438, "grad_norm": 0.040489424020051956, "learning_rate": 7.93843661401377e-06, "loss": 1.0696, "step": 196 }, { "epoch": 0.015959170447180815, "grad_norm": 0.031826749444007874, "learning_rate": 7.978938841636292e-06, "loss": 0.9044, "step": 197 }, { "epoch": 0.016040181464679195, "grad_norm": 0.03558851778507233, "learning_rate": 8.01944106925881e-06, "loss": 0.9084, "step": 198 }, { "epoch": 0.016121192482177575, "grad_norm": 0.036237046122550964, "learning_rate": 8.059943296881328e-06, "loss": 0.941, "step": 199 }, { "epoch": 0.016202203499675955, "grad_norm": 0.046845950186252594, "learning_rate": 8.100445524503849e-06, "loss": 1.0429, "step": 200 }, { "epoch": 0.016283214517174335, "grad_norm": 0.042282190173864365, "learning_rate": 8.140947752126367e-06, "loss": 1.0032, "step": 201 }, { "epoch": 0.016364225534672715, "grad_norm": 0.039992641657590866, "learning_rate": 8.181449979748887e-06, "loss": 1.0286, "step": 202 }, { "epoch": 0.016445236552171096, "grad_norm": 0.03947419673204422, "learning_rate": 8.221952207371406e-06, "loss": 1.0194, "step": 203 }, { "epoch": 0.016526247569669476, "grad_norm": 0.03853017836809158, "learning_rate": 8.262454434993926e-06, "loss": 1.0542, "step": 204 }, { "epoch": 0.016607258587167856, "grad_norm": 0.0387580543756485, "learning_rate": 8.302956662616444e-06, "loss": 0.9822, "step": 205 }, { "epoch": 0.016688269604666236, "grad_norm": 0.0381411649286747, "learning_rate": 8.343458890238964e-06, "loss": 0.9213, "step": 206 }, { "epoch": 0.016769280622164616, "grad_norm": 0.04278302192687988, "learning_rate": 8.383961117861483e-06, "loss": 1.0441, "step": 207 }, { "epoch": 0.016850291639662993, "grad_norm": 0.03946967422962189, "learning_rate": 8.424463345484001e-06, "loss": 0.9348, "step": 208 }, { "epoch": 0.016931302657161373, "grad_norm": 0.03685486316680908, "learning_rate": 8.464965573106521e-06, "loss": 1.0595, "step": 209 }, { "epoch": 0.017012313674659753, "grad_norm": 0.04094025865197182, "learning_rate": 8.50546780072904e-06, "loss": 0.9907, "step": 210 }, { "epoch": 0.017093324692158133, "grad_norm": 0.04092978313565254, "learning_rate": 8.54597002835156e-06, "loss": 0.9561, "step": 211 }, { "epoch": 0.017174335709656513, "grad_norm": 0.0413689911365509, "learning_rate": 8.586472255974078e-06, "loss": 1.0234, "step": 212 }, { "epoch": 0.017255346727154893, "grad_norm": 0.038415905088186264, "learning_rate": 8.626974483596598e-06, "loss": 0.9763, "step": 213 }, { "epoch": 0.017336357744653273, "grad_norm": 0.04191512241959572, "learning_rate": 8.667476711219118e-06, "loss": 1.0653, "step": 214 }, { "epoch": 0.017417368762151653, "grad_norm": 0.03985489904880524, "learning_rate": 8.707978938841636e-06, "loss": 1.0275, "step": 215 }, { "epoch": 0.017498379779650033, "grad_norm": 0.040544137358665466, "learning_rate": 8.748481166464157e-06, "loss": 0.9499, "step": 216 }, { "epoch": 0.017579390797148414, "grad_norm": 0.041784144937992096, "learning_rate": 8.788983394086675e-06, "loss": 1.0257, "step": 217 }, { "epoch": 0.017660401814646794, "grad_norm": 0.0427846759557724, "learning_rate": 8.829485621709195e-06, "loss": 1.022, "step": 218 }, { "epoch": 0.01774141283214517, "grad_norm": 0.03604327514767647, "learning_rate": 8.869987849331714e-06, "loss": 0.9539, "step": 219 }, { "epoch": 0.01782242384964355, "grad_norm": 0.03896184265613556, "learning_rate": 8.910490076954232e-06, "loss": 1.0411, "step": 220 }, { "epoch": 0.01790343486714193, "grad_norm": 0.04511598125100136, "learning_rate": 8.950992304576752e-06, "loss": 1.0978, "step": 221 }, { "epoch": 0.01798444588464031, "grad_norm": 0.04048030450940132, "learning_rate": 8.991494532199272e-06, "loss": 0.9711, "step": 222 }, { "epoch": 0.01806545690213869, "grad_norm": 0.04436124861240387, "learning_rate": 9.031996759821791e-06, "loss": 0.9871, "step": 223 }, { "epoch": 0.01814646791963707, "grad_norm": 0.039108771830797195, "learning_rate": 9.07249898744431e-06, "loss": 0.9747, "step": 224 }, { "epoch": 0.01822747893713545, "grad_norm": 0.04072122648358345, "learning_rate": 9.11300121506683e-06, "loss": 0.9828, "step": 225 }, { "epoch": 0.01830848995463383, "grad_norm": 0.03978056088089943, "learning_rate": 9.153503442689349e-06, "loss": 0.9255, "step": 226 }, { "epoch": 0.01838950097213221, "grad_norm": 0.04174983873963356, "learning_rate": 9.194005670311867e-06, "loss": 1.0139, "step": 227 }, { "epoch": 0.01847051198963059, "grad_norm": 0.043365299701690674, "learning_rate": 9.234507897934388e-06, "loss": 1.0303, "step": 228 }, { "epoch": 0.018551523007128968, "grad_norm": 0.03832549601793289, "learning_rate": 9.275010125556906e-06, "loss": 0.9125, "step": 229 }, { "epoch": 0.018632534024627348, "grad_norm": 0.04062044620513916, "learning_rate": 9.315512353179426e-06, "loss": 1.0105, "step": 230 }, { "epoch": 0.018713545042125728, "grad_norm": 0.04038373380899429, "learning_rate": 9.356014580801945e-06, "loss": 1.0133, "step": 231 }, { "epoch": 0.018794556059624108, "grad_norm": 0.04146847873926163, "learning_rate": 9.396516808424463e-06, "loss": 0.988, "step": 232 }, { "epoch": 0.01887556707712249, "grad_norm": 0.040415383875370026, "learning_rate": 9.437019036046983e-06, "loss": 0.9543, "step": 233 }, { "epoch": 0.01895657809462087, "grad_norm": 0.04330156370997429, "learning_rate": 9.477521263669503e-06, "loss": 0.9775, "step": 234 }, { "epoch": 0.01903758911211925, "grad_norm": 0.043947651982307434, "learning_rate": 9.518023491292022e-06, "loss": 0.8902, "step": 235 }, { "epoch": 0.01911860012961763, "grad_norm": 0.04477566108107567, "learning_rate": 9.55852571891454e-06, "loss": 1.0621, "step": 236 }, { "epoch": 0.01919961114711601, "grad_norm": 0.039206307381391525, "learning_rate": 9.59902794653706e-06, "loss": 0.9572, "step": 237 }, { "epoch": 0.01928062216461439, "grad_norm": 0.03924980387091637, "learning_rate": 9.63953017415958e-06, "loss": 0.9539, "step": 238 }, { "epoch": 0.01936163318211277, "grad_norm": 0.040131330490112305, "learning_rate": 9.680032401782097e-06, "loss": 0.9643, "step": 239 }, { "epoch": 0.019442644199611146, "grad_norm": 0.0443425178527832, "learning_rate": 9.720534629404617e-06, "loss": 0.9767, "step": 240 }, { "epoch": 0.019523655217109526, "grad_norm": 0.04038490727543831, "learning_rate": 9.761036857027137e-06, "loss": 0.9597, "step": 241 }, { "epoch": 0.019604666234607906, "grad_norm": 0.03880751505494118, "learning_rate": 9.801539084649656e-06, "loss": 0.9, "step": 242 }, { "epoch": 0.019685677252106286, "grad_norm": 0.03484981507062912, "learning_rate": 9.842041312272174e-06, "loss": 0.861, "step": 243 }, { "epoch": 0.019766688269604666, "grad_norm": 0.04652680829167366, "learning_rate": 9.882543539894696e-06, "loss": 1.046, "step": 244 }, { "epoch": 0.019847699287103046, "grad_norm": 0.045620325952768326, "learning_rate": 9.923045767517214e-06, "loss": 1.0687, "step": 245 }, { "epoch": 0.019928710304601426, "grad_norm": 0.0337100550532341, "learning_rate": 9.963547995139732e-06, "loss": 0.8199, "step": 246 }, { "epoch": 0.020009721322099806, "grad_norm": 0.044301003217697144, "learning_rate": 1.0004050222762253e-05, "loss": 0.9616, "step": 247 }, { "epoch": 0.020090732339598186, "grad_norm": 0.042871661484241486, "learning_rate": 1.0044552450384771e-05, "loss": 0.9569, "step": 248 }, { "epoch": 0.020171743357096567, "grad_norm": 0.043442253023386, "learning_rate": 1.008505467800729e-05, "loss": 0.9999, "step": 249 }, { "epoch": 0.020252754374594947, "grad_norm": 0.039713114500045776, "learning_rate": 1.012555690562981e-05, "loss": 0.8564, "step": 250 }, { "epoch": 0.020333765392093323, "grad_norm": 0.047681573778390884, "learning_rate": 1.016605913325233e-05, "loss": 1.0182, "step": 251 }, { "epoch": 0.020414776409591703, "grad_norm": 0.04213854670524597, "learning_rate": 1.0206561360874848e-05, "loss": 0.9129, "step": 252 }, { "epoch": 0.020495787427090083, "grad_norm": 0.045345891267061234, "learning_rate": 1.0247063588497368e-05, "loss": 0.9895, "step": 253 }, { "epoch": 0.020576798444588464, "grad_norm": 0.042612865567207336, "learning_rate": 1.0287565816119887e-05, "loss": 0.9485, "step": 254 }, { "epoch": 0.020657809462086844, "grad_norm": 0.041252050548791885, "learning_rate": 1.0328068043742405e-05, "loss": 0.8881, "step": 255 }, { "epoch": 0.020738820479585224, "grad_norm": 0.044315893203020096, "learning_rate": 1.0368570271364927e-05, "loss": 0.93, "step": 256 }, { "epoch": 0.020819831497083604, "grad_norm": 0.04035898670554161, "learning_rate": 1.0409072498987445e-05, "loss": 0.9263, "step": 257 }, { "epoch": 0.020900842514581984, "grad_norm": 0.03786478936672211, "learning_rate": 1.0449574726609964e-05, "loss": 0.8736, "step": 258 }, { "epoch": 0.020981853532080364, "grad_norm": 0.04340916872024536, "learning_rate": 1.0490076954232484e-05, "loss": 0.8948, "step": 259 }, { "epoch": 0.021062864549578744, "grad_norm": 0.04343588650226593, "learning_rate": 1.0530579181855002e-05, "loss": 0.9546, "step": 260 }, { "epoch": 0.02114387556707712, "grad_norm": 0.049006178975105286, "learning_rate": 1.0571081409477522e-05, "loss": 0.9688, "step": 261 }, { "epoch": 0.0212248865845755, "grad_norm": 0.04548350349068642, "learning_rate": 1.0611583637100041e-05, "loss": 0.9232, "step": 262 }, { "epoch": 0.02130589760207388, "grad_norm": 0.04764221981167793, "learning_rate": 1.0652085864722561e-05, "loss": 0.9413, "step": 263 }, { "epoch": 0.02138690861957226, "grad_norm": 0.05102779343724251, "learning_rate": 1.0692588092345079e-05, "loss": 1.0695, "step": 264 }, { "epoch": 0.02146791963707064, "grad_norm": 0.04845573008060455, "learning_rate": 1.0733090319967599e-05, "loss": 0.9992, "step": 265 }, { "epoch": 0.02154893065456902, "grad_norm": 0.04806605353951454, "learning_rate": 1.0773592547590118e-05, "loss": 0.9287, "step": 266 }, { "epoch": 0.0216299416720674, "grad_norm": 0.04713521897792816, "learning_rate": 1.0814094775212636e-05, "loss": 0.9274, "step": 267 }, { "epoch": 0.02171095268956578, "grad_norm": 0.041815537959337234, "learning_rate": 1.0854597002835156e-05, "loss": 0.8218, "step": 268 }, { "epoch": 0.02179196370706416, "grad_norm": 0.04000954329967499, "learning_rate": 1.0895099230457676e-05, "loss": 0.8657, "step": 269 }, { "epoch": 0.021872974724562542, "grad_norm": 0.03801000490784645, "learning_rate": 1.0935601458080195e-05, "loss": 0.8065, "step": 270 }, { "epoch": 0.021953985742060922, "grad_norm": 0.04805195331573486, "learning_rate": 1.0976103685702713e-05, "loss": 0.9453, "step": 271 }, { "epoch": 0.0220349967595593, "grad_norm": 0.04761458933353424, "learning_rate": 1.1016605913325233e-05, "loss": 0.9711, "step": 272 }, { "epoch": 0.02211600777705768, "grad_norm": 0.05013057217001915, "learning_rate": 1.1057108140947753e-05, "loss": 0.9825, "step": 273 }, { "epoch": 0.02219701879455606, "grad_norm": 0.04019589349627495, "learning_rate": 1.1097610368570272e-05, "loss": 0.7704, "step": 274 }, { "epoch": 0.02227802981205444, "grad_norm": 0.05667426809668541, "learning_rate": 1.1138112596192792e-05, "loss": 1.0435, "step": 275 }, { "epoch": 0.02235904082955282, "grad_norm": 0.04651724174618721, "learning_rate": 1.117861482381531e-05, "loss": 0.8615, "step": 276 }, { "epoch": 0.0224400518470512, "grad_norm": 0.04452743008732796, "learning_rate": 1.121911705143783e-05, "loss": 0.9388, "step": 277 }, { "epoch": 0.02252106286454958, "grad_norm": 0.04375988990068436, "learning_rate": 1.125961927906035e-05, "loss": 0.83, "step": 278 }, { "epoch": 0.02260207388204796, "grad_norm": 0.04938081279397011, "learning_rate": 1.1300121506682867e-05, "loss": 0.8963, "step": 279 }, { "epoch": 0.02268308489954634, "grad_norm": 0.039991993457078934, "learning_rate": 1.1340623734305387e-05, "loss": 0.7539, "step": 280 }, { "epoch": 0.02276409591704472, "grad_norm": 0.044255875051021576, "learning_rate": 1.1381125961927907e-05, "loss": 0.8487, "step": 281 }, { "epoch": 0.022845106934543096, "grad_norm": 0.05448516830801964, "learning_rate": 1.1421628189550426e-05, "loss": 0.9822, "step": 282 }, { "epoch": 0.022926117952041476, "grad_norm": 0.04801035299897194, "learning_rate": 1.1462130417172944e-05, "loss": 0.8966, "step": 283 }, { "epoch": 0.023007128969539856, "grad_norm": 0.052658502012491226, "learning_rate": 1.1502632644795466e-05, "loss": 0.8962, "step": 284 }, { "epoch": 0.023088139987038236, "grad_norm": 0.05240850895643234, "learning_rate": 1.1543134872417984e-05, "loss": 0.8753, "step": 285 }, { "epoch": 0.023169151004536617, "grad_norm": 0.05047140643000603, "learning_rate": 1.1583637100040502e-05, "loss": 0.9311, "step": 286 }, { "epoch": 0.023250162022034997, "grad_norm": 0.04640672728419304, "learning_rate": 1.1624139327663023e-05, "loss": 0.8523, "step": 287 }, { "epoch": 0.023331173039533377, "grad_norm": 0.04837445169687271, "learning_rate": 1.1664641555285541e-05, "loss": 0.9191, "step": 288 }, { "epoch": 0.023412184057031757, "grad_norm": 0.04194364324212074, "learning_rate": 1.170514378290806e-05, "loss": 0.8187, "step": 289 }, { "epoch": 0.023493195074530137, "grad_norm": 0.04633034020662308, "learning_rate": 1.174564601053058e-05, "loss": 0.8054, "step": 290 }, { "epoch": 0.023574206092028517, "grad_norm": 0.04540485143661499, "learning_rate": 1.17861482381531e-05, "loss": 0.8684, "step": 291 }, { "epoch": 0.023655217109526897, "grad_norm": 0.05126497149467468, "learning_rate": 1.1826650465775618e-05, "loss": 0.8609, "step": 292 }, { "epoch": 0.023736228127025274, "grad_norm": 0.052500225603580475, "learning_rate": 1.1867152693398138e-05, "loss": 0.9471, "step": 293 }, { "epoch": 0.023817239144523654, "grad_norm": 0.04483199119567871, "learning_rate": 1.1907654921020657e-05, "loss": 0.888, "step": 294 }, { "epoch": 0.023898250162022034, "grad_norm": 0.04261082038283348, "learning_rate": 1.1948157148643175e-05, "loss": 0.7984, "step": 295 }, { "epoch": 0.023979261179520414, "grad_norm": 0.04691658914089203, "learning_rate": 1.1988659376265695e-05, "loss": 0.9147, "step": 296 }, { "epoch": 0.024060272197018794, "grad_norm": 0.039187829941511154, "learning_rate": 1.2029161603888215e-05, "loss": 0.787, "step": 297 }, { "epoch": 0.024141283214517174, "grad_norm": 0.041846975684165955, "learning_rate": 1.2069663831510733e-05, "loss": 0.7899, "step": 298 }, { "epoch": 0.024222294232015554, "grad_norm": 0.041736699640750885, "learning_rate": 1.2110166059133254e-05, "loss": 0.8537, "step": 299 }, { "epoch": 0.024303305249513935, "grad_norm": 0.04296275973320007, "learning_rate": 1.2150668286755772e-05, "loss": 0.7563, "step": 300 }, { "epoch": 0.024384316267012315, "grad_norm": 0.0476207509636879, "learning_rate": 1.2191170514378292e-05, "loss": 0.8532, "step": 301 }, { "epoch": 0.024465327284510695, "grad_norm": 0.0477546826004982, "learning_rate": 1.2231672742000811e-05, "loss": 0.8733, "step": 302 }, { "epoch": 0.024546338302009075, "grad_norm": 0.05017669498920441, "learning_rate": 1.2272174969623331e-05, "loss": 0.9032, "step": 303 }, { "epoch": 0.02462734931950745, "grad_norm": 0.048817798495292664, "learning_rate": 1.2312677197245849e-05, "loss": 0.8685, "step": 304 }, { "epoch": 0.02470836033700583, "grad_norm": 0.046138327568769455, "learning_rate": 1.2353179424868368e-05, "loss": 0.884, "step": 305 }, { "epoch": 0.02478937135450421, "grad_norm": 0.046168919652700424, "learning_rate": 1.2393681652490888e-05, "loss": 0.8913, "step": 306 }, { "epoch": 0.024870382372002592, "grad_norm": 0.043708957731723785, "learning_rate": 1.2434183880113406e-05, "loss": 0.8526, "step": 307 }, { "epoch": 0.024951393389500972, "grad_norm": 0.043650537729263306, "learning_rate": 1.2474686107735926e-05, "loss": 0.8083, "step": 308 }, { "epoch": 0.025032404406999352, "grad_norm": 0.04286431893706322, "learning_rate": 1.2515188335358447e-05, "loss": 0.7903, "step": 309 }, { "epoch": 0.025113415424497732, "grad_norm": 0.03427548334002495, "learning_rate": 1.2555690562980965e-05, "loss": 0.7333, "step": 310 }, { "epoch": 0.025194426441996112, "grad_norm": 0.04491386190056801, "learning_rate": 1.2596192790603483e-05, "loss": 0.8457, "step": 311 }, { "epoch": 0.025275437459494492, "grad_norm": 0.0491580106317997, "learning_rate": 1.2636695018226003e-05, "loss": 0.8856, "step": 312 }, { "epoch": 0.025356448476992872, "grad_norm": 0.040008701384067535, "learning_rate": 1.267719724584852e-05, "loss": 0.8176, "step": 313 }, { "epoch": 0.02543745949449125, "grad_norm": 0.05017147958278656, "learning_rate": 1.2717699473471042e-05, "loss": 0.9175, "step": 314 }, { "epoch": 0.02551847051198963, "grad_norm": 0.04300964996218681, "learning_rate": 1.2758201701093562e-05, "loss": 0.8511, "step": 315 }, { "epoch": 0.02559948152948801, "grad_norm": 0.03991328552365303, "learning_rate": 1.279870392871608e-05, "loss": 0.7562, "step": 316 }, { "epoch": 0.02568049254698639, "grad_norm": 0.04305849224328995, "learning_rate": 1.2839206156338598e-05, "loss": 0.8165, "step": 317 }, { "epoch": 0.02576150356448477, "grad_norm": 0.04361141473054886, "learning_rate": 1.2879708383961117e-05, "loss": 0.8114, "step": 318 }, { "epoch": 0.02584251458198315, "grad_norm": 0.04171218350529671, "learning_rate": 1.2920210611583639e-05, "loss": 0.8169, "step": 319 }, { "epoch": 0.02592352559948153, "grad_norm": 0.034734416753053665, "learning_rate": 1.2960712839206157e-05, "loss": 0.7547, "step": 320 }, { "epoch": 0.02600453661697991, "grad_norm": 0.04020668566226959, "learning_rate": 1.3001215066828676e-05, "loss": 0.829, "step": 321 }, { "epoch": 0.02608554763447829, "grad_norm": 0.03808221593499184, "learning_rate": 1.3041717294451194e-05, "loss": 0.7703, "step": 322 }, { "epoch": 0.02616655865197667, "grad_norm": 0.04488734155893326, "learning_rate": 1.3082219522073716e-05, "loss": 0.8324, "step": 323 }, { "epoch": 0.02624756966947505, "grad_norm": 0.04443804547190666, "learning_rate": 1.3122721749696235e-05, "loss": 0.8446, "step": 324 }, { "epoch": 0.026328580686973427, "grad_norm": 0.04030577838420868, "learning_rate": 1.3163223977318753e-05, "loss": 0.7798, "step": 325 }, { "epoch": 0.026409591704471807, "grad_norm": 0.04477348551154137, "learning_rate": 1.3203726204941271e-05, "loss": 0.8708, "step": 326 }, { "epoch": 0.026490602721970187, "grad_norm": 0.040085140615701675, "learning_rate": 1.3244228432563791e-05, "loss": 0.8228, "step": 327 }, { "epoch": 0.026571613739468567, "grad_norm": 0.03322353959083557, "learning_rate": 1.3284730660186312e-05, "loss": 0.6957, "step": 328 }, { "epoch": 0.026652624756966947, "grad_norm": 0.04367142170667648, "learning_rate": 1.332523288780883e-05, "loss": 0.8452, "step": 329 }, { "epoch": 0.026733635774465327, "grad_norm": 0.03859969228506088, "learning_rate": 1.336573511543135e-05, "loss": 0.7426, "step": 330 }, { "epoch": 0.026814646791963707, "grad_norm": 0.036489035934209824, "learning_rate": 1.3406237343053868e-05, "loss": 0.7271, "step": 331 }, { "epoch": 0.026895657809462088, "grad_norm": 0.04094213247299194, "learning_rate": 1.3446739570676386e-05, "loss": 0.8086, "step": 332 }, { "epoch": 0.026976668826960468, "grad_norm": 0.043221861124038696, "learning_rate": 1.3487241798298907e-05, "loss": 0.8558, "step": 333 }, { "epoch": 0.027057679844458848, "grad_norm": 0.044967714697122574, "learning_rate": 1.3527744025921427e-05, "loss": 0.848, "step": 334 }, { "epoch": 0.027138690861957228, "grad_norm": 0.04468412697315216, "learning_rate": 1.3568246253543945e-05, "loss": 0.849, "step": 335 }, { "epoch": 0.027219701879455604, "grad_norm": 0.04480603709816933, "learning_rate": 1.3608748481166465e-05, "loss": 0.9005, "step": 336 }, { "epoch": 0.027300712896953985, "grad_norm": 0.03967758268117905, "learning_rate": 1.3649250708788986e-05, "loss": 0.8326, "step": 337 }, { "epoch": 0.027381723914452365, "grad_norm": 0.04579153284430504, "learning_rate": 1.3689752936411504e-05, "loss": 0.8577, "step": 338 }, { "epoch": 0.027462734931950745, "grad_norm": 0.03907260298728943, "learning_rate": 1.3730255164034022e-05, "loss": 0.811, "step": 339 }, { "epoch": 0.027543745949449125, "grad_norm": 0.04092499241232872, "learning_rate": 1.3770757391656542e-05, "loss": 0.8279, "step": 340 }, { "epoch": 0.027624756966947505, "grad_norm": 0.046532005071640015, "learning_rate": 1.381125961927906e-05, "loss": 0.8455, "step": 341 }, { "epoch": 0.027705767984445885, "grad_norm": 0.04032495245337486, "learning_rate": 1.3851761846901581e-05, "loss": 0.8789, "step": 342 }, { "epoch": 0.027786779001944265, "grad_norm": 0.04253386706113815, "learning_rate": 1.38922640745241e-05, "loss": 0.8235, "step": 343 }, { "epoch": 0.027867790019442645, "grad_norm": 0.04008479788899422, "learning_rate": 1.3932766302146619e-05, "loss": 0.8336, "step": 344 }, { "epoch": 0.027948801036941025, "grad_norm": 0.04024027660489082, "learning_rate": 1.3973268529769137e-05, "loss": 0.7889, "step": 345 }, { "epoch": 0.028029812054439402, "grad_norm": 0.04010341316461563, "learning_rate": 1.4013770757391656e-05, "loss": 0.7946, "step": 346 }, { "epoch": 0.028110823071937782, "grad_norm": 0.03629535436630249, "learning_rate": 1.4054272985014178e-05, "loss": 0.7274, "step": 347 }, { "epoch": 0.028191834089436162, "grad_norm": 0.04157916083931923, "learning_rate": 1.4094775212636696e-05, "loss": 0.7929, "step": 348 }, { "epoch": 0.028272845106934542, "grad_norm": 0.04346117004752159, "learning_rate": 1.4135277440259215e-05, "loss": 0.7551, "step": 349 }, { "epoch": 0.028353856124432922, "grad_norm": 0.03737987205386162, "learning_rate": 1.4175779667881733e-05, "loss": 0.7418, "step": 350 }, { "epoch": 0.028434867141931303, "grad_norm": 0.040639728307724, "learning_rate": 1.4216281895504251e-05, "loss": 0.7781, "step": 351 }, { "epoch": 0.028515878159429683, "grad_norm": 0.04043082147836685, "learning_rate": 1.4256784123126774e-05, "loss": 0.8111, "step": 352 }, { "epoch": 0.028596889176928063, "grad_norm": 0.041902732104063034, "learning_rate": 1.4297286350749292e-05, "loss": 0.8684, "step": 353 }, { "epoch": 0.028677900194426443, "grad_norm": 0.04072404280304909, "learning_rate": 1.433778857837181e-05, "loss": 0.8067, "step": 354 }, { "epoch": 0.028758911211924823, "grad_norm": 0.04357956349849701, "learning_rate": 1.437829080599433e-05, "loss": 0.7638, "step": 355 }, { "epoch": 0.028839922229423203, "grad_norm": 0.042490243911743164, "learning_rate": 1.4418793033616851e-05, "loss": 0.816, "step": 356 }, { "epoch": 0.02892093324692158, "grad_norm": 0.04168788343667984, "learning_rate": 1.445929526123937e-05, "loss": 0.8139, "step": 357 }, { "epoch": 0.02900194426441996, "grad_norm": 0.041793275624513626, "learning_rate": 1.4499797488861889e-05, "loss": 0.7767, "step": 358 }, { "epoch": 0.02908295528191834, "grad_norm": 0.036548249423503876, "learning_rate": 1.4540299716484407e-05, "loss": 0.7338, "step": 359 }, { "epoch": 0.02916396629941672, "grad_norm": 0.03943818062543869, "learning_rate": 1.4580801944106925e-05, "loss": 0.8113, "step": 360 }, { "epoch": 0.0292449773169151, "grad_norm": 0.039916470646858215, "learning_rate": 1.4621304171729446e-05, "loss": 0.785, "step": 361 }, { "epoch": 0.02932598833441348, "grad_norm": 0.04223480448126793, "learning_rate": 1.4661806399351966e-05, "loss": 0.7892, "step": 362 }, { "epoch": 0.02940699935191186, "grad_norm": 0.03503810241818428, "learning_rate": 1.4702308626974484e-05, "loss": 0.6974, "step": 363 }, { "epoch": 0.02948801036941024, "grad_norm": 0.04188969358801842, "learning_rate": 1.4742810854597004e-05, "loss": 0.8068, "step": 364 }, { "epoch": 0.02956902138690862, "grad_norm": 0.039196666330099106, "learning_rate": 1.4783313082219522e-05, "loss": 0.7942, "step": 365 }, { "epoch": 0.029650032404407, "grad_norm": 0.03726887330412865, "learning_rate": 1.4823815309842043e-05, "loss": 0.7629, "step": 366 }, { "epoch": 0.02973104342190538, "grad_norm": 0.04150981456041336, "learning_rate": 1.486431753746456e-05, "loss": 0.7888, "step": 367 }, { "epoch": 0.029812054439403757, "grad_norm": 0.041392356157302856, "learning_rate": 1.490481976508708e-05, "loss": 0.8023, "step": 368 }, { "epoch": 0.029893065456902138, "grad_norm": 0.04221396520733833, "learning_rate": 1.4945321992709598e-05, "loss": 0.8325, "step": 369 }, { "epoch": 0.029974076474400518, "grad_norm": 0.03871150687336922, "learning_rate": 1.498582422033212e-05, "loss": 0.7942, "step": 370 }, { "epoch": 0.030055087491898898, "grad_norm": 0.04151451215147972, "learning_rate": 1.502632644795464e-05, "loss": 0.8003, "step": 371 }, { "epoch": 0.030136098509397278, "grad_norm": 0.03538280725479126, "learning_rate": 1.5066828675577157e-05, "loss": 0.7003, "step": 372 }, { "epoch": 0.030217109526895658, "grad_norm": 0.03889065608382225, "learning_rate": 1.5107330903199675e-05, "loss": 0.7495, "step": 373 }, { "epoch": 0.030298120544394038, "grad_norm": 0.030184775590896606, "learning_rate": 1.5147833130822195e-05, "loss": 0.6278, "step": 374 }, { "epoch": 0.030379131561892418, "grad_norm": 0.04202747344970703, "learning_rate": 1.5188335358444717e-05, "loss": 0.8286, "step": 375 }, { "epoch": 0.0304601425793908, "grad_norm": 0.04680528864264488, "learning_rate": 1.5228837586067234e-05, "loss": 0.867, "step": 376 }, { "epoch": 0.03054115359688918, "grad_norm": 0.04027519375085831, "learning_rate": 1.5269339813689752e-05, "loss": 0.7637, "step": 377 }, { "epoch": 0.030622164614387555, "grad_norm": 0.041619181632995605, "learning_rate": 1.5309842041312272e-05, "loss": 0.7716, "step": 378 }, { "epoch": 0.030703175631885935, "grad_norm": 0.03543030843138695, "learning_rate": 1.5350344268934792e-05, "loss": 0.757, "step": 379 }, { "epoch": 0.030784186649384315, "grad_norm": 0.03332729637622833, "learning_rate": 1.539084649655731e-05, "loss": 0.6851, "step": 380 }, { "epoch": 0.030865197666882695, "grad_norm": 0.03877364099025726, "learning_rate": 1.543134872417983e-05, "loss": 0.7643, "step": 381 }, { "epoch": 0.030946208684381075, "grad_norm": 0.034454673528671265, "learning_rate": 1.547185095180235e-05, "loss": 0.6828, "step": 382 }, { "epoch": 0.031027219701879456, "grad_norm": 0.039795245975255966, "learning_rate": 1.5512353179424867e-05, "loss": 0.7524, "step": 383 }, { "epoch": 0.031108230719377836, "grad_norm": 0.03474024310708046, "learning_rate": 1.5552855407047387e-05, "loss": 0.6882, "step": 384 }, { "epoch": 0.031189241736876216, "grad_norm": 0.04128913953900337, "learning_rate": 1.559335763466991e-05, "loss": 0.7806, "step": 385 }, { "epoch": 0.031270252754374596, "grad_norm": 0.03762802109122276, "learning_rate": 1.5633859862292426e-05, "loss": 0.7746, "step": 386 }, { "epoch": 0.03135126377187297, "grad_norm": 0.03340327739715576, "learning_rate": 1.5674362089914946e-05, "loss": 0.7158, "step": 387 }, { "epoch": 0.031432274789371356, "grad_norm": 0.036030713468790054, "learning_rate": 1.5714864317537465e-05, "loss": 0.6745, "step": 388 }, { "epoch": 0.03151328580686973, "grad_norm": 0.04570871591567993, "learning_rate": 1.5755366545159985e-05, "loss": 0.8154, "step": 389 }, { "epoch": 0.031594296824368116, "grad_norm": 0.04245030879974365, "learning_rate": 1.5795868772782505e-05, "loss": 0.7874, "step": 390 }, { "epoch": 0.03167530784186649, "grad_norm": 0.0361122190952301, "learning_rate": 1.5836371000405024e-05, "loss": 0.7843, "step": 391 }, { "epoch": 0.031756318859364877, "grad_norm": 0.04195176810026169, "learning_rate": 1.587687322802754e-05, "loss": 0.7749, "step": 392 }, { "epoch": 0.03183732987686325, "grad_norm": 0.0431600846350193, "learning_rate": 1.591737545565006e-05, "loss": 0.8219, "step": 393 }, { "epoch": 0.03191834089436163, "grad_norm": 0.03839649632573128, "learning_rate": 1.5957877683272583e-05, "loss": 0.716, "step": 394 }, { "epoch": 0.03199935191186001, "grad_norm": 0.039989449083805084, "learning_rate": 1.59983799108951e-05, "loss": 0.7706, "step": 395 }, { "epoch": 0.03208036292935839, "grad_norm": 0.04222527891397476, "learning_rate": 1.603888213851762e-05, "loss": 0.7206, "step": 396 }, { "epoch": 0.032161373946856774, "grad_norm": 0.04178968816995621, "learning_rate": 1.607938436614014e-05, "loss": 0.8058, "step": 397 }, { "epoch": 0.03224238496435515, "grad_norm": 0.03837030008435249, "learning_rate": 1.6119886593762655e-05, "loss": 0.7037, "step": 398 }, { "epoch": 0.032323395981853534, "grad_norm": 0.03963199257850647, "learning_rate": 1.616038882138518e-05, "loss": 0.7843, "step": 399 }, { "epoch": 0.03240440699935191, "grad_norm": 0.03307361900806427, "learning_rate": 1.6200891049007698e-05, "loss": 0.6755, "step": 400 }, { "epoch": 0.032485418016850294, "grad_norm": 0.03694215416908264, "learning_rate": 1.6241393276630214e-05, "loss": 0.7467, "step": 401 }, { "epoch": 0.03256642903434867, "grad_norm": 0.041869282722473145, "learning_rate": 1.6281895504252734e-05, "loss": 0.7941, "step": 402 }, { "epoch": 0.032647440051847054, "grad_norm": 0.043812256306409836, "learning_rate": 1.6322397731875254e-05, "loss": 0.7621, "step": 403 }, { "epoch": 0.03272845106934543, "grad_norm": 0.0400121733546257, "learning_rate": 1.6362899959497773e-05, "loss": 0.718, "step": 404 }, { "epoch": 0.03280946208684381, "grad_norm": 0.04324105381965637, "learning_rate": 1.6403402187120293e-05, "loss": 0.7901, "step": 405 }, { "epoch": 0.03289047310434219, "grad_norm": 0.03996644914150238, "learning_rate": 1.6443904414742813e-05, "loss": 0.737, "step": 406 }, { "epoch": 0.03297148412184057, "grad_norm": 0.04127480834722519, "learning_rate": 1.648440664236533e-05, "loss": 0.74, "step": 407 }, { "epoch": 0.03305249513933895, "grad_norm": 0.04006365314126015, "learning_rate": 1.6524908869987852e-05, "loss": 0.7189, "step": 408 }, { "epoch": 0.03313350615683733, "grad_norm": 0.036010291427373886, "learning_rate": 1.6565411097610368e-05, "loss": 0.6999, "step": 409 }, { "epoch": 0.03321451717433571, "grad_norm": 0.04041115194559097, "learning_rate": 1.6605913325232888e-05, "loss": 0.7223, "step": 410 }, { "epoch": 0.03329552819183409, "grad_norm": 0.03830122947692871, "learning_rate": 1.6646415552855408e-05, "loss": 0.6951, "step": 411 }, { "epoch": 0.03337653920933247, "grad_norm": 0.03762373328208923, "learning_rate": 1.6686917780477927e-05, "loss": 0.7426, "step": 412 }, { "epoch": 0.03345755022683085, "grad_norm": 0.04263002797961235, "learning_rate": 1.6727420008100447e-05, "loss": 0.7428, "step": 413 }, { "epoch": 0.03353856124432923, "grad_norm": 0.03778361529111862, "learning_rate": 1.6767922235722967e-05, "loss": 0.722, "step": 414 }, { "epoch": 0.03361957226182761, "grad_norm": 0.041179753839969635, "learning_rate": 1.6808424463345483e-05, "loss": 0.7686, "step": 415 }, { "epoch": 0.033700583279325985, "grad_norm": 0.03923264890909195, "learning_rate": 1.6848926690968003e-05, "loss": 0.6927, "step": 416 }, { "epoch": 0.03378159429682437, "grad_norm": 0.03404265269637108, "learning_rate": 1.6889428918590526e-05, "loss": 0.6726, "step": 417 }, { "epoch": 0.033862605314322745, "grad_norm": 0.039949920028448105, "learning_rate": 1.6929931146213042e-05, "loss": 0.7377, "step": 418 }, { "epoch": 0.03394361633182113, "grad_norm": 0.038464561104774475, "learning_rate": 1.697043337383556e-05, "loss": 0.7441, "step": 419 }, { "epoch": 0.034024627349319506, "grad_norm": 0.037392206490039825, "learning_rate": 1.701093560145808e-05, "loss": 0.6999, "step": 420 }, { "epoch": 0.03410563836681789, "grad_norm": 0.04498305171728134, "learning_rate": 1.70514378290806e-05, "loss": 0.6328, "step": 421 }, { "epoch": 0.034186649384316266, "grad_norm": 0.03619861975312233, "learning_rate": 1.709194005670312e-05, "loss": 0.6997, "step": 422 }, { "epoch": 0.03426766040181465, "grad_norm": 0.0377713143825531, "learning_rate": 1.713244228432564e-05, "loss": 0.6791, "step": 423 }, { "epoch": 0.034348671419313026, "grad_norm": 0.042271509766578674, "learning_rate": 1.7172944511948157e-05, "loss": 0.7267, "step": 424 }, { "epoch": 0.03442968243681141, "grad_norm": 0.03935857117176056, "learning_rate": 1.7213446739570676e-05, "loss": 0.6378, "step": 425 }, { "epoch": 0.034510693454309786, "grad_norm": 0.04291445389389992, "learning_rate": 1.7253948967193196e-05, "loss": 0.7408, "step": 426 }, { "epoch": 0.03459170447180816, "grad_norm": 0.04915785416960716, "learning_rate": 1.7294451194815716e-05, "loss": 0.6971, "step": 427 }, { "epoch": 0.034672715489306546, "grad_norm": 0.042455509305000305, "learning_rate": 1.7334953422438235e-05, "loss": 0.6855, "step": 428 }, { "epoch": 0.03475372650680492, "grad_norm": 0.03784140944480896, "learning_rate": 1.7375455650060755e-05, "loss": 0.6268, "step": 429 }, { "epoch": 0.03483473752430331, "grad_norm": 0.04316165670752525, "learning_rate": 1.741595787768327e-05, "loss": 0.647, "step": 430 }, { "epoch": 0.03491574854180168, "grad_norm": 0.04066496342420578, "learning_rate": 1.745646010530579e-05, "loss": 0.6596, "step": 431 }, { "epoch": 0.03499675955930007, "grad_norm": 0.0413605198264122, "learning_rate": 1.7496962332928314e-05, "loss": 0.6529, "step": 432 }, { "epoch": 0.035077770576798444, "grad_norm": 0.0477905236184597, "learning_rate": 1.753746456055083e-05, "loss": 0.7452, "step": 433 }, { "epoch": 0.03515878159429683, "grad_norm": 0.04964848607778549, "learning_rate": 1.757796678817335e-05, "loss": 0.707, "step": 434 }, { "epoch": 0.035239792611795204, "grad_norm": 0.03816310688853264, "learning_rate": 1.761846901579587e-05, "loss": 0.6183, "step": 435 }, { "epoch": 0.03532080362929359, "grad_norm": 0.0438305027782917, "learning_rate": 1.765897124341839e-05, "loss": 0.6684, "step": 436 }, { "epoch": 0.035401814646791964, "grad_norm": 0.042699217796325684, "learning_rate": 1.769947347104091e-05, "loss": 0.6744, "step": 437 }, { "epoch": 0.03548282566429034, "grad_norm": 0.04368191957473755, "learning_rate": 1.773997569866343e-05, "loss": 0.6804, "step": 438 }, { "epoch": 0.035563836681788724, "grad_norm": 0.04724517837166786, "learning_rate": 1.7780477926285945e-05, "loss": 0.7299, "step": 439 }, { "epoch": 0.0356448476992871, "grad_norm": 0.04736173152923584, "learning_rate": 1.7820980153908464e-05, "loss": 0.697, "step": 440 }, { "epoch": 0.035725858716785484, "grad_norm": 0.04788941144943237, "learning_rate": 1.7861482381530988e-05, "loss": 0.7005, "step": 441 }, { "epoch": 0.03580686973428386, "grad_norm": 0.041798755526542664, "learning_rate": 1.7901984609153504e-05, "loss": 0.6847, "step": 442 }, { "epoch": 0.035887880751782245, "grad_norm": 0.05260884389281273, "learning_rate": 1.7942486836776023e-05, "loss": 0.6841, "step": 443 }, { "epoch": 0.03596889176928062, "grad_norm": 0.046023592352867126, "learning_rate": 1.7982989064398543e-05, "loss": 0.7487, "step": 444 }, { "epoch": 0.036049902786779005, "grad_norm": 0.04284362122416496, "learning_rate": 1.802349129202106e-05, "loss": 0.6566, "step": 445 }, { "epoch": 0.03613091380427738, "grad_norm": 0.04734259471297264, "learning_rate": 1.8063993519643582e-05, "loss": 0.7172, "step": 446 }, { "epoch": 0.03621192482177576, "grad_norm": 0.03644348680973053, "learning_rate": 1.8104495747266102e-05, "loss": 0.6219, "step": 447 }, { "epoch": 0.03629293583927414, "grad_norm": 0.04391570761799812, "learning_rate": 1.814499797488862e-05, "loss": 0.6723, "step": 448 }, { "epoch": 0.03637394685677252, "grad_norm": 0.04355219379067421, "learning_rate": 1.8185500202511138e-05, "loss": 0.6287, "step": 449 }, { "epoch": 0.0364549578742709, "grad_norm": 0.04065849632024765, "learning_rate": 1.822600243013366e-05, "loss": 0.6504, "step": 450 }, { "epoch": 0.03653596889176928, "grad_norm": 0.04802323877811432, "learning_rate": 1.8266504657756177e-05, "loss": 0.6465, "step": 451 }, { "epoch": 0.03661697990926766, "grad_norm": 0.04284946620464325, "learning_rate": 1.8307006885378697e-05, "loss": 0.6613, "step": 452 }, { "epoch": 0.03669799092676604, "grad_norm": 0.039490021765232086, "learning_rate": 1.8347509113001217e-05, "loss": 0.6396, "step": 453 }, { "epoch": 0.03677900194426442, "grad_norm": 0.03865380212664604, "learning_rate": 1.8388011340623733e-05, "loss": 0.6547, "step": 454 }, { "epoch": 0.0368600129617628, "grad_norm": 0.048680275678634644, "learning_rate": 1.8428513568246256e-05, "loss": 0.6325, "step": 455 }, { "epoch": 0.03694102397926118, "grad_norm": 0.047213200479745865, "learning_rate": 1.8469015795868776e-05, "loss": 0.7057, "step": 456 }, { "epoch": 0.03702203499675956, "grad_norm": 0.04423484206199646, "learning_rate": 1.8509518023491292e-05, "loss": 0.639, "step": 457 }, { "epoch": 0.037103046014257936, "grad_norm": 0.04309452697634697, "learning_rate": 1.8550020251113812e-05, "loss": 0.5955, "step": 458 }, { "epoch": 0.03718405703175632, "grad_norm": 0.04167764261364937, "learning_rate": 1.859052247873633e-05, "loss": 0.6394, "step": 459 }, { "epoch": 0.037265068049254696, "grad_norm": 0.04568406194448471, "learning_rate": 1.863102470635885e-05, "loss": 0.6784, "step": 460 }, { "epoch": 0.03734607906675308, "grad_norm": 0.040312498807907104, "learning_rate": 1.867152693398137e-05, "loss": 0.6277, "step": 461 }, { "epoch": 0.037427090084251456, "grad_norm": 0.04599596560001373, "learning_rate": 1.871202916160389e-05, "loss": 0.6869, "step": 462 }, { "epoch": 0.03750810110174984, "grad_norm": 0.04221928119659424, "learning_rate": 1.8752531389226407e-05, "loss": 0.626, "step": 463 }, { "epoch": 0.037589112119248216, "grad_norm": 0.042346443980932236, "learning_rate": 1.8793033616848926e-05, "loss": 0.6522, "step": 464 }, { "epoch": 0.0376701231367466, "grad_norm": 0.04198833554983139, "learning_rate": 1.8833535844471446e-05, "loss": 0.6207, "step": 465 }, { "epoch": 0.03775113415424498, "grad_norm": 0.03679432347416878, "learning_rate": 1.8874038072093966e-05, "loss": 0.5898, "step": 466 }, { "epoch": 0.03783214517174336, "grad_norm": 0.04612481966614723, "learning_rate": 1.8914540299716485e-05, "loss": 0.6411, "step": 467 }, { "epoch": 0.03791315618924174, "grad_norm": 0.04428162798285484, "learning_rate": 1.8955042527339005e-05, "loss": 0.6973, "step": 468 }, { "epoch": 0.03799416720674011, "grad_norm": 0.04129292443394661, "learning_rate": 1.8995544754961525e-05, "loss": 0.5986, "step": 469 }, { "epoch": 0.0380751782242385, "grad_norm": 0.04522555693984032, "learning_rate": 1.9036046982584044e-05, "loss": 0.6681, "step": 470 }, { "epoch": 0.038156189241736874, "grad_norm": 0.04063719883561134, "learning_rate": 1.9076549210206564e-05, "loss": 0.6732, "step": 471 }, { "epoch": 0.03823720025923526, "grad_norm": 0.03796732425689697, "learning_rate": 1.911705143782908e-05, "loss": 0.5774, "step": 472 }, { "epoch": 0.038318211276733634, "grad_norm": 0.047407280653715134, "learning_rate": 1.91575536654516e-05, "loss": 0.6766, "step": 473 }, { "epoch": 0.03839922229423202, "grad_norm": 0.042836595326662064, "learning_rate": 1.919805589307412e-05, "loss": 0.6184, "step": 474 }, { "epoch": 0.038480233311730394, "grad_norm": 0.03992742300033569, "learning_rate": 1.923855812069664e-05, "loss": 0.568, "step": 475 }, { "epoch": 0.03856124432922878, "grad_norm": 0.046161893755197525, "learning_rate": 1.927906034831916e-05, "loss": 0.634, "step": 476 }, { "epoch": 0.038642255346727154, "grad_norm": 0.042704734951257706, "learning_rate": 1.931956257594168e-05, "loss": 0.6076, "step": 477 }, { "epoch": 0.03872326636422554, "grad_norm": 0.04184344783425331, "learning_rate": 1.9360064803564195e-05, "loss": 0.5935, "step": 478 }, { "epoch": 0.038804277381723914, "grad_norm": 0.04119317978620529, "learning_rate": 1.9400567031186718e-05, "loss": 0.6022, "step": 479 }, { "epoch": 0.03888528839922229, "grad_norm": 0.04500815272331238, "learning_rate": 1.9441069258809234e-05, "loss": 0.6759, "step": 480 }, { "epoch": 0.038966299416720675, "grad_norm": 0.04669018089771271, "learning_rate": 1.9481571486431754e-05, "loss": 0.5904, "step": 481 }, { "epoch": 0.03904731043421905, "grad_norm": 0.07213626056909561, "learning_rate": 1.9522073714054274e-05, "loss": 0.7376, "step": 482 }, { "epoch": 0.039128321451717435, "grad_norm": 0.04742096737027168, "learning_rate": 1.9562575941676793e-05, "loss": 0.5984, "step": 483 }, { "epoch": 0.03920933246921581, "grad_norm": 0.03930765762925148, "learning_rate": 1.9603078169299313e-05, "loss": 0.6301, "step": 484 }, { "epoch": 0.039290343486714195, "grad_norm": 0.049007635563611984, "learning_rate": 1.9643580396921833e-05, "loss": 0.6962, "step": 485 }, { "epoch": 0.03937135450421257, "grad_norm": 0.049863554537296295, "learning_rate": 1.968408262454435e-05, "loss": 0.6641, "step": 486 }, { "epoch": 0.039452365521710955, "grad_norm": 0.05560353770852089, "learning_rate": 1.972458485216687e-05, "loss": 0.6403, "step": 487 }, { "epoch": 0.03953337653920933, "grad_norm": 0.04843142628669739, "learning_rate": 1.976508707978939e-05, "loss": 0.6101, "step": 488 }, { "epoch": 0.039614387556707716, "grad_norm": 0.04507589340209961, "learning_rate": 1.9805589307411908e-05, "loss": 0.6062, "step": 489 }, { "epoch": 0.03969539857420609, "grad_norm": 0.05050964653491974, "learning_rate": 1.9846091535034428e-05, "loss": 0.6674, "step": 490 }, { "epoch": 0.03977640959170447, "grad_norm": 0.0509161613881588, "learning_rate": 1.9886593762656947e-05, "loss": 0.5875, "step": 491 }, { "epoch": 0.03985742060920285, "grad_norm": 0.05824290215969086, "learning_rate": 1.9927095990279464e-05, "loss": 0.6179, "step": 492 }, { "epoch": 0.03993843162670123, "grad_norm": 0.05500331521034241, "learning_rate": 1.9967598217901987e-05, "loss": 0.6085, "step": 493 }, { "epoch": 0.04001944264419961, "grad_norm": 0.06655135005712509, "learning_rate": 2.0008100445524506e-05, "loss": 0.6541, "step": 494 }, { "epoch": 0.04010045366169799, "grad_norm": 0.05266350507736206, "learning_rate": 2.0048602673147023e-05, "loss": 0.601, "step": 495 }, { "epoch": 0.04018146467919637, "grad_norm": 0.045727360993623734, "learning_rate": 2.0089104900769542e-05, "loss": 0.573, "step": 496 }, { "epoch": 0.04026247569669475, "grad_norm": 0.0533706471323967, "learning_rate": 2.0129607128392062e-05, "loss": 0.5564, "step": 497 }, { "epoch": 0.04034348671419313, "grad_norm": 0.06397935003042221, "learning_rate": 2.017010935601458e-05, "loss": 0.6389, "step": 498 }, { "epoch": 0.04042449773169151, "grad_norm": 0.056940142065286636, "learning_rate": 2.02106115836371e-05, "loss": 0.6105, "step": 499 }, { "epoch": 0.04050550874918989, "grad_norm": 0.05736251175403595, "learning_rate": 2.025111381125962e-05, "loss": 0.5387, "step": 500 }, { "epoch": 0.04058651976668827, "grad_norm": 0.058200638741254807, "learning_rate": 2.0291616038882137e-05, "loss": 0.6367, "step": 501 }, { "epoch": 0.040667530784186647, "grad_norm": 0.05528083071112633, "learning_rate": 2.033211826650466e-05, "loss": 0.5223, "step": 502 }, { "epoch": 0.04074854180168503, "grad_norm": 0.05075810104608536, "learning_rate": 2.037262049412718e-05, "loss": 0.6069, "step": 503 }, { "epoch": 0.04082955281918341, "grad_norm": 0.05383017659187317, "learning_rate": 2.0413122721749696e-05, "loss": 0.5804, "step": 504 }, { "epoch": 0.04091056383668179, "grad_norm": 0.052171312272548676, "learning_rate": 2.0453624949372216e-05, "loss": 0.5599, "step": 505 }, { "epoch": 0.04099157485418017, "grad_norm": 0.06255053728818893, "learning_rate": 2.0494127176994735e-05, "loss": 0.5705, "step": 506 }, { "epoch": 0.04107258587167855, "grad_norm": 0.05635403096675873, "learning_rate": 2.0534629404617255e-05, "loss": 0.604, "step": 507 }, { "epoch": 0.04115359688917693, "grad_norm": 0.05404305458068848, "learning_rate": 2.0575131632239775e-05, "loss": 0.62, "step": 508 }, { "epoch": 0.04123460790667531, "grad_norm": 0.05946921557188034, "learning_rate": 2.0615633859862295e-05, "loss": 0.5949, "step": 509 }, { "epoch": 0.04131561892417369, "grad_norm": 0.04885870963335037, "learning_rate": 2.065613608748481e-05, "loss": 0.5022, "step": 510 }, { "epoch": 0.041396629941672064, "grad_norm": 0.053143661469221115, "learning_rate": 2.069663831510733e-05, "loss": 0.5709, "step": 511 }, { "epoch": 0.04147764095917045, "grad_norm": 0.0510605163872242, "learning_rate": 2.0737140542729854e-05, "loss": 0.5656, "step": 512 }, { "epoch": 0.041558651976668824, "grad_norm": 0.053504448384046555, "learning_rate": 2.077764277035237e-05, "loss": 0.5462, "step": 513 }, { "epoch": 0.04163966299416721, "grad_norm": 0.06571054458618164, "learning_rate": 2.081814499797489e-05, "loss": 0.527, "step": 514 }, { "epoch": 0.041720674011665584, "grad_norm": 0.05549018830060959, "learning_rate": 2.085864722559741e-05, "loss": 0.5494, "step": 515 }, { "epoch": 0.04180168502916397, "grad_norm": 0.05811379849910736, "learning_rate": 2.089914945321993e-05, "loss": 0.515, "step": 516 }, { "epoch": 0.041882696046662345, "grad_norm": 0.06510355323553085, "learning_rate": 2.093965168084245e-05, "loss": 0.588, "step": 517 }, { "epoch": 0.04196370706416073, "grad_norm": 0.055551595985889435, "learning_rate": 2.0980153908464968e-05, "loss": 0.5697, "step": 518 }, { "epoch": 0.042044718081659105, "grad_norm": 0.05914895981550217, "learning_rate": 2.1020656136087484e-05, "loss": 0.5064, "step": 519 }, { "epoch": 0.04212572909915749, "grad_norm": 0.07579497992992401, "learning_rate": 2.1061158363710004e-05, "loss": 0.535, "step": 520 }, { "epoch": 0.042206740116655865, "grad_norm": 0.04847273975610733, "learning_rate": 2.1101660591332527e-05, "loss": 0.5059, "step": 521 }, { "epoch": 0.04228775113415424, "grad_norm": 0.05634573474526405, "learning_rate": 2.1142162818955043e-05, "loss": 0.528, "step": 522 }, { "epoch": 0.042368762151652625, "grad_norm": 0.0529668815433979, "learning_rate": 2.1182665046577563e-05, "loss": 0.5497, "step": 523 }, { "epoch": 0.042449773169151, "grad_norm": 0.04280909150838852, "learning_rate": 2.1223167274200083e-05, "loss": 0.5208, "step": 524 }, { "epoch": 0.042530784186649385, "grad_norm": 0.05029316246509552, "learning_rate": 2.12636695018226e-05, "loss": 0.5829, "step": 525 }, { "epoch": 0.04261179520414776, "grad_norm": 0.052403755486011505, "learning_rate": 2.1304171729445122e-05, "loss": 0.5172, "step": 526 }, { "epoch": 0.042692806221646146, "grad_norm": 0.055489446967840195, "learning_rate": 2.1344673957067642e-05, "loss": 0.5782, "step": 527 }, { "epoch": 0.04277381723914452, "grad_norm": 0.05629388242959976, "learning_rate": 2.1385176184690158e-05, "loss": 0.5884, "step": 528 }, { "epoch": 0.042854828256642906, "grad_norm": 0.047733061015605927, "learning_rate": 2.1425678412312678e-05, "loss": 0.515, "step": 529 }, { "epoch": 0.04293583927414128, "grad_norm": 0.053567174822092056, "learning_rate": 2.1466180639935197e-05, "loss": 0.5624, "step": 530 }, { "epoch": 0.043016850291639666, "grad_norm": 0.05233658850193024, "learning_rate": 2.1506682867557717e-05, "loss": 0.5617, "step": 531 }, { "epoch": 0.04309786130913804, "grad_norm": 0.04258202761411667, "learning_rate": 2.1547185095180237e-05, "loss": 0.4907, "step": 532 }, { "epoch": 0.04317887232663642, "grad_norm": 0.04429875314235687, "learning_rate": 2.1587687322802756e-05, "loss": 0.5367, "step": 533 }, { "epoch": 0.0432598833441348, "grad_norm": 0.04000532627105713, "learning_rate": 2.1628189550425273e-05, "loss": 0.5274, "step": 534 }, { "epoch": 0.04334089436163318, "grad_norm": 0.04910857975482941, "learning_rate": 2.1668691778047796e-05, "loss": 0.5301, "step": 535 }, { "epoch": 0.04342190537913156, "grad_norm": 0.04290639981627464, "learning_rate": 2.1709194005670312e-05, "loss": 0.5091, "step": 536 }, { "epoch": 0.04350291639662994, "grad_norm": 0.047956433147192, "learning_rate": 2.174969623329283e-05, "loss": 0.4973, "step": 537 }, { "epoch": 0.04358392741412832, "grad_norm": 0.04619165509939194, "learning_rate": 2.179019846091535e-05, "loss": 0.5907, "step": 538 }, { "epoch": 0.0436649384316267, "grad_norm": 0.05486949533224106, "learning_rate": 2.183070068853787e-05, "loss": 0.5225, "step": 539 }, { "epoch": 0.043745949449125084, "grad_norm": 0.04930966719985008, "learning_rate": 2.187120291616039e-05, "loss": 0.5284, "step": 540 }, { "epoch": 0.04382696046662346, "grad_norm": 0.06125545874238014, "learning_rate": 2.191170514378291e-05, "loss": 0.5276, "step": 541 }, { "epoch": 0.043907971484121844, "grad_norm": 0.048369333148002625, "learning_rate": 2.1952207371405427e-05, "loss": 0.5033, "step": 542 }, { "epoch": 0.04398898250162022, "grad_norm": 0.04628164693713188, "learning_rate": 2.1992709599027946e-05, "loss": 0.4926, "step": 543 }, { "epoch": 0.0440699935191186, "grad_norm": 0.06109035015106201, "learning_rate": 2.2033211826650466e-05, "loss": 0.5382, "step": 544 }, { "epoch": 0.04415100453661698, "grad_norm": 0.04685697704553604, "learning_rate": 2.2073714054272986e-05, "loss": 0.5546, "step": 545 }, { "epoch": 0.04423201555411536, "grad_norm": 0.06735414266586304, "learning_rate": 2.2114216281895505e-05, "loss": 0.5866, "step": 546 }, { "epoch": 0.04431302657161374, "grad_norm": 0.050085365772247314, "learning_rate": 2.2154718509518025e-05, "loss": 0.5075, "step": 547 }, { "epoch": 0.04439403758911212, "grad_norm": 0.04673994705080986, "learning_rate": 2.2195220737140545e-05, "loss": 0.5104, "step": 548 }, { "epoch": 0.0444750486066105, "grad_norm": 0.05349628999829292, "learning_rate": 2.2235722964763064e-05, "loss": 0.4982, "step": 549 }, { "epoch": 0.04455605962410888, "grad_norm": 0.05613791197538376, "learning_rate": 2.2276225192385584e-05, "loss": 0.5439, "step": 550 }, { "epoch": 0.04463707064160726, "grad_norm": 0.04818318039178848, "learning_rate": 2.23167274200081e-05, "loss": 0.5558, "step": 551 }, { "epoch": 0.04471808165910564, "grad_norm": 0.047487739473581314, "learning_rate": 2.235722964763062e-05, "loss": 0.5025, "step": 552 }, { "epoch": 0.04479909267660402, "grad_norm": 0.0547555573284626, "learning_rate": 2.239773187525314e-05, "loss": 0.4781, "step": 553 }, { "epoch": 0.0448801036941024, "grad_norm": 0.05158519372344017, "learning_rate": 2.243823410287566e-05, "loss": 0.5309, "step": 554 }, { "epoch": 0.044961114711600775, "grad_norm": 0.04126646742224693, "learning_rate": 2.247873633049818e-05, "loss": 0.4664, "step": 555 }, { "epoch": 0.04504212572909916, "grad_norm": 0.06769046187400818, "learning_rate": 2.25192385581207e-05, "loss": 0.5093, "step": 556 }, { "epoch": 0.045123136746597535, "grad_norm": 0.050162386149168015, "learning_rate": 2.2559740785743215e-05, "loss": 0.4988, "step": 557 }, { "epoch": 0.04520414776409592, "grad_norm": 0.041536845266819, "learning_rate": 2.2600243013365735e-05, "loss": 0.5398, "step": 558 }, { "epoch": 0.045285158781594295, "grad_norm": 0.0459623746573925, "learning_rate": 2.2640745240988258e-05, "loss": 0.4807, "step": 559 }, { "epoch": 0.04536616979909268, "grad_norm": 0.044929806143045425, "learning_rate": 2.2681247468610774e-05, "loss": 0.483, "step": 560 }, { "epoch": 0.045447180816591055, "grad_norm": 0.04633745923638344, "learning_rate": 2.2721749696233294e-05, "loss": 0.4784, "step": 561 }, { "epoch": 0.04552819183408944, "grad_norm": 0.05819401517510414, "learning_rate": 2.2762251923855813e-05, "loss": 0.5187, "step": 562 }, { "epoch": 0.045609202851587816, "grad_norm": 0.04854605346918106, "learning_rate": 2.280275415147833e-05, "loss": 0.5054, "step": 563 }, { "epoch": 0.04569021386908619, "grad_norm": 0.05877658352255821, "learning_rate": 2.2843256379100853e-05, "loss": 0.4914, "step": 564 }, { "epoch": 0.045771224886584576, "grad_norm": 0.04236457124352455, "learning_rate": 2.2883758606723372e-05, "loss": 0.4927, "step": 565 }, { "epoch": 0.04585223590408295, "grad_norm": 0.04844123125076294, "learning_rate": 2.292426083434589e-05, "loss": 0.5393, "step": 566 }, { "epoch": 0.045933246921581336, "grad_norm": 0.0493597574532032, "learning_rate": 2.2964763061968408e-05, "loss": 0.543, "step": 567 }, { "epoch": 0.04601425793907971, "grad_norm": 0.044169507920742035, "learning_rate": 2.300526528959093e-05, "loss": 0.5062, "step": 568 }, { "epoch": 0.046095268956578096, "grad_norm": 0.05171835795044899, "learning_rate": 2.3045767517213448e-05, "loss": 0.5232, "step": 569 }, { "epoch": 0.04617627997407647, "grad_norm": 0.04781988635659218, "learning_rate": 2.3086269744835967e-05, "loss": 0.4988, "step": 570 }, { "epoch": 0.046257290991574856, "grad_norm": 0.05599427595734596, "learning_rate": 2.3126771972458487e-05, "loss": 0.5655, "step": 571 }, { "epoch": 0.04633830200907323, "grad_norm": 0.04716041311621666, "learning_rate": 2.3167274200081003e-05, "loss": 0.4983, "step": 572 }, { "epoch": 0.04641931302657162, "grad_norm": 0.04140276089310646, "learning_rate": 2.3207776427703526e-05, "loss": 0.4953, "step": 573 }, { "epoch": 0.04650032404406999, "grad_norm": 0.04450197145342827, "learning_rate": 2.3248278655326046e-05, "loss": 0.4608, "step": 574 }, { "epoch": 0.04658133506156837, "grad_norm": 0.047172173857688904, "learning_rate": 2.3288780882948562e-05, "loss": 0.538, "step": 575 }, { "epoch": 0.046662346079066754, "grad_norm": 0.06201828271150589, "learning_rate": 2.3329283110571082e-05, "loss": 0.5433, "step": 576 }, { "epoch": 0.04674335709656513, "grad_norm": 0.05522223934531212, "learning_rate": 2.33697853381936e-05, "loss": 0.5024, "step": 577 }, { "epoch": 0.046824368114063514, "grad_norm": 0.08238446712493896, "learning_rate": 2.341028756581612e-05, "loss": 0.5263, "step": 578 }, { "epoch": 0.04690537913156189, "grad_norm": 0.07169990241527557, "learning_rate": 2.345078979343864e-05, "loss": 0.4711, "step": 579 }, { "epoch": 0.046986390149060274, "grad_norm": 0.04631124436855316, "learning_rate": 2.349129202106116e-05, "loss": 0.5079, "step": 580 }, { "epoch": 0.04706740116655865, "grad_norm": 0.05277573689818382, "learning_rate": 2.3531794248683677e-05, "loss": 0.5469, "step": 581 }, { "epoch": 0.047148412184057034, "grad_norm": 0.0452033132314682, "learning_rate": 2.35722964763062e-05, "loss": 0.4745, "step": 582 }, { "epoch": 0.04722942320155541, "grad_norm": 0.050169870257377625, "learning_rate": 2.361279870392872e-05, "loss": 0.4768, "step": 583 }, { "epoch": 0.047310434219053794, "grad_norm": 0.04421820119023323, "learning_rate": 2.3653300931551236e-05, "loss": 0.4852, "step": 584 }, { "epoch": 0.04739144523655217, "grad_norm": 0.04202679544687271, "learning_rate": 2.3693803159173755e-05, "loss": 0.4853, "step": 585 }, { "epoch": 0.04747245625405055, "grad_norm": 0.07291880995035172, "learning_rate": 2.3734305386796275e-05, "loss": 0.5294, "step": 586 }, { "epoch": 0.04755346727154893, "grad_norm": 0.05519851669669151, "learning_rate": 2.3774807614418795e-05, "loss": 0.505, "step": 587 }, { "epoch": 0.04763447828904731, "grad_norm": 0.04110843688249588, "learning_rate": 2.3815309842041314e-05, "loss": 0.5181, "step": 588 }, { "epoch": 0.04771548930654569, "grad_norm": 0.044017914682626724, "learning_rate": 2.3855812069663834e-05, "loss": 0.5272, "step": 589 }, { "epoch": 0.04779650032404407, "grad_norm": 0.07405832409858704, "learning_rate": 2.389631429728635e-05, "loss": 0.4221, "step": 590 }, { "epoch": 0.04787751134154245, "grad_norm": 0.05474500358104706, "learning_rate": 2.393681652490887e-05, "loss": 0.4942, "step": 591 }, { "epoch": 0.04795852235904083, "grad_norm": 0.04653024300932884, "learning_rate": 2.397731875253139e-05, "loss": 0.4566, "step": 592 }, { "epoch": 0.04803953337653921, "grad_norm": 0.052719101309776306, "learning_rate": 2.401782098015391e-05, "loss": 0.4965, "step": 593 }, { "epoch": 0.04812054439403759, "grad_norm": 0.05099957436323166, "learning_rate": 2.405832320777643e-05, "loss": 0.4876, "step": 594 }, { "epoch": 0.04820155541153597, "grad_norm": 0.061169691383838654, "learning_rate": 2.409882543539895e-05, "loss": 0.4586, "step": 595 }, { "epoch": 0.04828256642903435, "grad_norm": 0.06282297521829605, "learning_rate": 2.4139327663021465e-05, "loss": 0.4801, "step": 596 }, { "epoch": 0.048363577446532725, "grad_norm": 0.05508417636156082, "learning_rate": 2.4179829890643988e-05, "loss": 0.5072, "step": 597 }, { "epoch": 0.04844458846403111, "grad_norm": 0.052037712186574936, "learning_rate": 2.4220332118266508e-05, "loss": 0.528, "step": 598 }, { "epoch": 0.048525599481529486, "grad_norm": 0.06128312274813652, "learning_rate": 2.4260834345889024e-05, "loss": 0.4564, "step": 599 }, { "epoch": 0.04860661049902787, "grad_norm": 0.04434271529316902, "learning_rate": 2.4301336573511544e-05, "loss": 0.4989, "step": 600 }, { "epoch": 0.048687621516526246, "grad_norm": 0.05338647589087486, "learning_rate": 2.4341838801134063e-05, "loss": 0.5295, "step": 601 }, { "epoch": 0.04876863253402463, "grad_norm": 0.051930706948041916, "learning_rate": 2.4382341028756583e-05, "loss": 0.4958, "step": 602 }, { "epoch": 0.048849643551523006, "grad_norm": 0.06045117601752281, "learning_rate": 2.4422843256379103e-05, "loss": 0.4752, "step": 603 }, { "epoch": 0.04893065456902139, "grad_norm": 0.050316449254751205, "learning_rate": 2.4463345484001622e-05, "loss": 0.4702, "step": 604 }, { "epoch": 0.049011665586519766, "grad_norm": 0.044821321964263916, "learning_rate": 2.450384771162414e-05, "loss": 0.452, "step": 605 }, { "epoch": 0.04909267660401815, "grad_norm": 0.07491052150726318, "learning_rate": 2.4544349939246662e-05, "loss": 0.5094, "step": 606 }, { "epoch": 0.049173687621516526, "grad_norm": 0.05308236926794052, "learning_rate": 2.4584852166869178e-05, "loss": 0.4962, "step": 607 }, { "epoch": 0.0492546986390149, "grad_norm": 0.04302395507693291, "learning_rate": 2.4625354394491698e-05, "loss": 0.4967, "step": 608 }, { "epoch": 0.04933570965651329, "grad_norm": 0.04931635782122612, "learning_rate": 2.4665856622114217e-05, "loss": 0.4631, "step": 609 }, { "epoch": 0.04941672067401166, "grad_norm": 0.04895971342921257, "learning_rate": 2.4706358849736737e-05, "loss": 0.4652, "step": 610 }, { "epoch": 0.04949773169151005, "grad_norm": 0.046912480145692825, "learning_rate": 2.4746861077359257e-05, "loss": 0.5166, "step": 611 }, { "epoch": 0.04957874270900842, "grad_norm": 0.0499003529548645, "learning_rate": 2.4787363304981776e-05, "loss": 0.5191, "step": 612 }, { "epoch": 0.04965975372650681, "grad_norm": 0.05203314870595932, "learning_rate": 2.4827865532604293e-05, "loss": 0.477, "step": 613 }, { "epoch": 0.049740764744005184, "grad_norm": 0.07039269059896469, "learning_rate": 2.4868367760226812e-05, "loss": 0.5298, "step": 614 }, { "epoch": 0.04982177576150357, "grad_norm": 0.05096956714987755, "learning_rate": 2.4908869987849335e-05, "loss": 0.4775, "step": 615 }, { "epoch": 0.049902786779001944, "grad_norm": 0.05297008156776428, "learning_rate": 2.494937221547185e-05, "loss": 0.5042, "step": 616 }, { "epoch": 0.04998379779650033, "grad_norm": 0.046072036027908325, "learning_rate": 2.498987444309437e-05, "loss": 0.4911, "step": 617 }, { "epoch": 0.050064808813998704, "grad_norm": 0.044839046895504, "learning_rate": 2.5030376670716894e-05, "loss": 0.4661, "step": 618 }, { "epoch": 0.05014581983149708, "grad_norm": 0.06291180104017258, "learning_rate": 2.507087889833941e-05, "loss": 0.497, "step": 619 }, { "epoch": 0.050226830848995464, "grad_norm": 0.049912694841623306, "learning_rate": 2.511138112596193e-05, "loss": 0.4384, "step": 620 }, { "epoch": 0.05030784186649384, "grad_norm": 0.0411076582968235, "learning_rate": 2.515188335358445e-05, "loss": 0.5415, "step": 621 }, { "epoch": 0.050388852883992225, "grad_norm": 0.046500127762556076, "learning_rate": 2.5192385581206966e-05, "loss": 0.4896, "step": 622 }, { "epoch": 0.0504698639014906, "grad_norm": 0.05132298171520233, "learning_rate": 2.5232887808829486e-05, "loss": 0.4807, "step": 623 }, { "epoch": 0.050550874918988985, "grad_norm": 0.05340069532394409, "learning_rate": 2.5273390036452006e-05, "loss": 0.4442, "step": 624 }, { "epoch": 0.05063188593648736, "grad_norm": 0.05164061486721039, "learning_rate": 2.5313892264074522e-05, "loss": 0.4896, "step": 625 }, { "epoch": 0.050712896953985745, "grad_norm": 0.05592244863510132, "learning_rate": 2.535439449169704e-05, "loss": 0.5025, "step": 626 }, { "epoch": 0.05079390797148412, "grad_norm": 0.050286903977394104, "learning_rate": 2.5394896719319568e-05, "loss": 0.4409, "step": 627 }, { "epoch": 0.0508749189889825, "grad_norm": 0.06757903099060059, "learning_rate": 2.5435398946942084e-05, "loss": 0.4934, "step": 628 }, { "epoch": 0.05095593000648088, "grad_norm": 0.05722956359386444, "learning_rate": 2.5475901174564604e-05, "loss": 0.501, "step": 629 }, { "epoch": 0.05103694102397926, "grad_norm": 0.05500578135251999, "learning_rate": 2.5516403402187124e-05, "loss": 0.488, "step": 630 }, { "epoch": 0.05111795204147764, "grad_norm": 0.06366024166345596, "learning_rate": 2.555690562980964e-05, "loss": 0.5071, "step": 631 }, { "epoch": 0.05119896305897602, "grad_norm": 0.050802573561668396, "learning_rate": 2.559740785743216e-05, "loss": 0.5274, "step": 632 }, { "epoch": 0.0512799740764744, "grad_norm": 0.070594422519207, "learning_rate": 2.563791008505468e-05, "loss": 0.5142, "step": 633 }, { "epoch": 0.05136098509397278, "grad_norm": 0.05826390162110329, "learning_rate": 2.5678412312677195e-05, "loss": 0.5286, "step": 634 }, { "epoch": 0.05144199611147116, "grad_norm": 0.046842195093631744, "learning_rate": 2.5718914540299715e-05, "loss": 0.48, "step": 635 }, { "epoch": 0.05152300712896954, "grad_norm": 0.08180008828639984, "learning_rate": 2.5759416767922235e-05, "loss": 0.5028, "step": 636 }, { "epoch": 0.05160401814646792, "grad_norm": 0.05418627709150314, "learning_rate": 2.5799918995544758e-05, "loss": 0.4879, "step": 637 }, { "epoch": 0.0516850291639663, "grad_norm": 0.053439754992723465, "learning_rate": 2.5840421223167278e-05, "loss": 0.4461, "step": 638 }, { "epoch": 0.051766040181464676, "grad_norm": 0.04994548112154007, "learning_rate": 2.5880923450789797e-05, "loss": 0.4158, "step": 639 }, { "epoch": 0.05184705119896306, "grad_norm": 0.052046455442905426, "learning_rate": 2.5921425678412313e-05, "loss": 0.4741, "step": 640 }, { "epoch": 0.051928062216461436, "grad_norm": 0.0547301284968853, "learning_rate": 2.5961927906034833e-05, "loss": 0.4777, "step": 641 }, { "epoch": 0.05200907323395982, "grad_norm": 0.06931430846452713, "learning_rate": 2.6002430133657353e-05, "loss": 0.4783, "step": 642 }, { "epoch": 0.052090084251458196, "grad_norm": 0.06401245296001434, "learning_rate": 2.604293236127987e-05, "loss": 0.5112, "step": 643 }, { "epoch": 0.05217109526895658, "grad_norm": 0.054302409291267395, "learning_rate": 2.608343458890239e-05, "loss": 0.4491, "step": 644 }, { "epoch": 0.052252106286454957, "grad_norm": 0.07285508513450623, "learning_rate": 2.612393681652491e-05, "loss": 0.4729, "step": 645 }, { "epoch": 0.05233311730395334, "grad_norm": 0.050434838980436325, "learning_rate": 2.616443904414743e-05, "loss": 0.4898, "step": 646 }, { "epoch": 0.05241412832145172, "grad_norm": 0.07490405440330505, "learning_rate": 2.620494127176995e-05, "loss": 0.4827, "step": 647 }, { "epoch": 0.0524951393389501, "grad_norm": 0.0542641319334507, "learning_rate": 2.624544349939247e-05, "loss": 0.4467, "step": 648 }, { "epoch": 0.05257615035644848, "grad_norm": 0.05059095472097397, "learning_rate": 2.6285945727014987e-05, "loss": 0.4108, "step": 649 }, { "epoch": 0.052657161373946854, "grad_norm": 0.06154976785182953, "learning_rate": 2.6326447954637507e-05, "loss": 0.4863, "step": 650 }, { "epoch": 0.05273817239144524, "grad_norm": 0.05898062512278557, "learning_rate": 2.6366950182260026e-05, "loss": 0.44, "step": 651 }, { "epoch": 0.052819183408943614, "grad_norm": 0.06105419248342514, "learning_rate": 2.6407452409882543e-05, "loss": 0.4522, "step": 652 }, { "epoch": 0.052900194426442, "grad_norm": 0.07445076107978821, "learning_rate": 2.6447954637505062e-05, "loss": 0.4753, "step": 653 }, { "epoch": 0.052981205443940374, "grad_norm": 0.08417477458715439, "learning_rate": 2.6488456865127582e-05, "loss": 0.4927, "step": 654 }, { "epoch": 0.05306221646143876, "grad_norm": 0.04969826713204384, "learning_rate": 2.6528959092750105e-05, "loss": 0.4787, "step": 655 }, { "epoch": 0.053143227478937134, "grad_norm": 0.059925880283117294, "learning_rate": 2.6569461320372625e-05, "loss": 0.4539, "step": 656 }, { "epoch": 0.05322423849643552, "grad_norm": 0.0699731782078743, "learning_rate": 2.660996354799514e-05, "loss": 0.4452, "step": 657 }, { "epoch": 0.053305249513933894, "grad_norm": 0.06968280673027039, "learning_rate": 2.665046577561766e-05, "loss": 0.4414, "step": 658 }, { "epoch": 0.05338626053143228, "grad_norm": 0.052930060774087906, "learning_rate": 2.669096800324018e-05, "loss": 0.5047, "step": 659 }, { "epoch": 0.053467271548930655, "grad_norm": 0.06393340229988098, "learning_rate": 2.67314702308627e-05, "loss": 0.5045, "step": 660 }, { "epoch": 0.05354828256642903, "grad_norm": 0.08847188204526901, "learning_rate": 2.6771972458485216e-05, "loss": 0.4623, "step": 661 }, { "epoch": 0.053629293583927415, "grad_norm": 0.037499427795410156, "learning_rate": 2.6812474686107736e-05, "loss": 0.3986, "step": 662 }, { "epoch": 0.05371030460142579, "grad_norm": 0.10235746949911118, "learning_rate": 2.6852976913730256e-05, "loss": 0.5048, "step": 663 }, { "epoch": 0.053791315618924175, "grad_norm": 0.07281772047281265, "learning_rate": 2.6893479141352772e-05, "loss": 0.4631, "step": 664 }, { "epoch": 0.05387232663642255, "grad_norm": 0.06468793749809265, "learning_rate": 2.69339813689753e-05, "loss": 0.4814, "step": 665 }, { "epoch": 0.053953337653920935, "grad_norm": 0.058206070214509964, "learning_rate": 2.6974483596597815e-05, "loss": 0.4306, "step": 666 }, { "epoch": 0.05403434867141931, "grad_norm": 0.05109895393252373, "learning_rate": 2.7014985824220334e-05, "loss": 0.4726, "step": 667 }, { "epoch": 0.054115359688917695, "grad_norm": 0.06766542047262192, "learning_rate": 2.7055488051842854e-05, "loss": 0.4929, "step": 668 }, { "epoch": 0.05419637070641607, "grad_norm": 0.06401512026786804, "learning_rate": 2.709599027946537e-05, "loss": 0.4568, "step": 669 }, { "epoch": 0.054277381723914456, "grad_norm": 0.09650994092226028, "learning_rate": 2.713649250708789e-05, "loss": 0.4668, "step": 670 }, { "epoch": 0.05435839274141283, "grad_norm": 0.060593217611312866, "learning_rate": 2.717699473471041e-05, "loss": 0.4951, "step": 671 }, { "epoch": 0.05443940375891121, "grad_norm": 0.0636834129691124, "learning_rate": 2.721749696233293e-05, "loss": 0.5109, "step": 672 }, { "epoch": 0.05452041477640959, "grad_norm": 0.07222723215818405, "learning_rate": 2.7257999189955446e-05, "loss": 0.4741, "step": 673 }, { "epoch": 0.05460142579390797, "grad_norm": 0.04111061245203018, "learning_rate": 2.7298501417577972e-05, "loss": 0.4789, "step": 674 }, { "epoch": 0.05468243681140635, "grad_norm": 0.07527043670415878, "learning_rate": 2.733900364520049e-05, "loss": 0.4688, "step": 675 }, { "epoch": 0.05476344782890473, "grad_norm": 0.058275461196899414, "learning_rate": 2.7379505872823008e-05, "loss": 0.4358, "step": 676 }, { "epoch": 0.05484445884640311, "grad_norm": 0.07854972034692764, "learning_rate": 2.7420008100445528e-05, "loss": 0.4898, "step": 677 }, { "epoch": 0.05492546986390149, "grad_norm": 0.06151697412133217, "learning_rate": 2.7460510328068044e-05, "loss": 0.4815, "step": 678 }, { "epoch": 0.05500648088139987, "grad_norm": 0.06732051074504852, "learning_rate": 2.7501012555690564e-05, "loss": 0.4921, "step": 679 }, { "epoch": 0.05508749189889825, "grad_norm": 0.05637775734066963, "learning_rate": 2.7541514783313083e-05, "loss": 0.481, "step": 680 }, { "epoch": 0.05516850291639663, "grad_norm": 0.050734519958496094, "learning_rate": 2.7582017010935603e-05, "loss": 0.4436, "step": 681 }, { "epoch": 0.05524951393389501, "grad_norm": 0.05997519567608833, "learning_rate": 2.762251923855812e-05, "loss": 0.4468, "step": 682 }, { "epoch": 0.05533052495139339, "grad_norm": 0.07706957310438156, "learning_rate": 2.766302146618064e-05, "loss": 0.4777, "step": 683 }, { "epoch": 0.05541153596889177, "grad_norm": 0.10084830969572067, "learning_rate": 2.7703523693803162e-05, "loss": 0.4774, "step": 684 }, { "epoch": 0.05549254698639015, "grad_norm": 0.08028780668973923, "learning_rate": 2.774402592142568e-05, "loss": 0.441, "step": 685 }, { "epoch": 0.05557355800388853, "grad_norm": 0.09915050864219666, "learning_rate": 2.77845281490482e-05, "loss": 0.3905, "step": 686 }, { "epoch": 0.05565456902138691, "grad_norm": 0.05721239000558853, "learning_rate": 2.7825030376670718e-05, "loss": 0.48, "step": 687 }, { "epoch": 0.05573558003888529, "grad_norm": 0.07557759433984756, "learning_rate": 2.7865532604293237e-05, "loss": 0.4644, "step": 688 }, { "epoch": 0.05581659105638367, "grad_norm": 0.05306036397814751, "learning_rate": 2.7906034831915757e-05, "loss": 0.4579, "step": 689 }, { "epoch": 0.05589760207388205, "grad_norm": 0.04879166930913925, "learning_rate": 2.7946537059538273e-05, "loss": 0.4459, "step": 690 }, { "epoch": 0.05597861309138043, "grad_norm": 0.06670980900526047, "learning_rate": 2.7987039287160793e-05, "loss": 0.4797, "step": 691 }, { "epoch": 0.056059624108878804, "grad_norm": 0.06885267049074173, "learning_rate": 2.8027541514783313e-05, "loss": 0.4272, "step": 692 }, { "epoch": 0.05614063512637719, "grad_norm": 0.06158503517508507, "learning_rate": 2.8068043742405836e-05, "loss": 0.4645, "step": 693 }, { "epoch": 0.056221646143875564, "grad_norm": 0.05785226821899414, "learning_rate": 2.8108545970028355e-05, "loss": 0.4371, "step": 694 }, { "epoch": 0.05630265716137395, "grad_norm": 0.06631193310022354, "learning_rate": 2.8149048197650875e-05, "loss": 0.4346, "step": 695 }, { "epoch": 0.056383668178872325, "grad_norm": 0.04557311534881592, "learning_rate": 2.818955042527339e-05, "loss": 0.3764, "step": 696 }, { "epoch": 0.05646467919637071, "grad_norm": 0.07891491055488586, "learning_rate": 2.823005265289591e-05, "loss": 0.5242, "step": 697 }, { "epoch": 0.056545690213869085, "grad_norm": 0.04715527594089508, "learning_rate": 2.827055488051843e-05, "loss": 0.4351, "step": 698 }, { "epoch": 0.05662670123136747, "grad_norm": 0.07520975172519684, "learning_rate": 2.8311057108140947e-05, "loss": 0.4603, "step": 699 }, { "epoch": 0.056707712248865845, "grad_norm": 0.0787554606795311, "learning_rate": 2.8351559335763467e-05, "loss": 0.495, "step": 700 }, { "epoch": 0.05678872326636423, "grad_norm": 0.07464078068733215, "learning_rate": 2.8392061563385986e-05, "loss": 0.495, "step": 701 }, { "epoch": 0.056869734283862605, "grad_norm": 0.062342628836631775, "learning_rate": 2.8432563791008502e-05, "loss": 0.4516, "step": 702 }, { "epoch": 0.05695074530136098, "grad_norm": 0.06254198402166367, "learning_rate": 2.847306601863103e-05, "loss": 0.5159, "step": 703 }, { "epoch": 0.057031756318859365, "grad_norm": 0.07052873075008392, "learning_rate": 2.851356824625355e-05, "loss": 0.4405, "step": 704 }, { "epoch": 0.05711276733635774, "grad_norm": 0.058935195207595825, "learning_rate": 2.8554070473876065e-05, "loss": 0.5252, "step": 705 }, { "epoch": 0.057193778353856126, "grad_norm": 0.06088797003030777, "learning_rate": 2.8594572701498585e-05, "loss": 0.4408, "step": 706 }, { "epoch": 0.0572747893713545, "grad_norm": 0.05880602449178696, "learning_rate": 2.8635074929121104e-05, "loss": 0.4783, "step": 707 }, { "epoch": 0.057355800388852886, "grad_norm": 0.05036713927984238, "learning_rate": 2.867557715674362e-05, "loss": 0.4618, "step": 708 }, { "epoch": 0.05743681140635126, "grad_norm": 0.06838516145944595, "learning_rate": 2.871607938436614e-05, "loss": 0.4824, "step": 709 }, { "epoch": 0.057517822423849646, "grad_norm": 0.07706855237483978, "learning_rate": 2.875658161198866e-05, "loss": 0.4507, "step": 710 }, { "epoch": 0.05759883344134802, "grad_norm": 0.061234429478645325, "learning_rate": 2.8797083839611176e-05, "loss": 0.3989, "step": 711 }, { "epoch": 0.057679844458846406, "grad_norm": 0.06529439985752106, "learning_rate": 2.8837586067233703e-05, "loss": 0.4465, "step": 712 }, { "epoch": 0.05776085547634478, "grad_norm": 0.06635624170303345, "learning_rate": 2.887808829485622e-05, "loss": 0.519, "step": 713 }, { "epoch": 0.05784186649384316, "grad_norm": 0.06093122810125351, "learning_rate": 2.891859052247874e-05, "loss": 0.4298, "step": 714 }, { "epoch": 0.05792287751134154, "grad_norm": 0.06917279958724976, "learning_rate": 2.8959092750101258e-05, "loss": 0.4774, "step": 715 }, { "epoch": 0.05800388852883992, "grad_norm": 0.059787567704916, "learning_rate": 2.8999594977723778e-05, "loss": 0.4112, "step": 716 }, { "epoch": 0.0580848995463383, "grad_norm": 0.0676770806312561, "learning_rate": 2.9040097205346294e-05, "loss": 0.5195, "step": 717 }, { "epoch": 0.05816591056383668, "grad_norm": 0.08954107761383057, "learning_rate": 2.9080599432968814e-05, "loss": 0.4654, "step": 718 }, { "epoch": 0.058246921581335064, "grad_norm": 0.10148172080516815, "learning_rate": 2.9121101660591333e-05, "loss": 0.4899, "step": 719 }, { "epoch": 0.05832793259883344, "grad_norm": 0.07054968923330307, "learning_rate": 2.916160388821385e-05, "loss": 0.4395, "step": 720 }, { "epoch": 0.058408943616331824, "grad_norm": 0.06562081724405289, "learning_rate": 2.9202106115836376e-05, "loss": 0.4206, "step": 721 }, { "epoch": 0.0584899546338302, "grad_norm": 0.07018006592988968, "learning_rate": 2.9242608343458892e-05, "loss": 0.4295, "step": 722 }, { "epoch": 0.058570965651328584, "grad_norm": 0.09009187668561935, "learning_rate": 2.9283110571081412e-05, "loss": 0.4158, "step": 723 }, { "epoch": 0.05865197666882696, "grad_norm": 0.08497653156518936, "learning_rate": 2.9323612798703932e-05, "loss": 0.4775, "step": 724 }, { "epoch": 0.05873298768632534, "grad_norm": 0.07111209630966187, "learning_rate": 2.9364115026326448e-05, "loss": 0.4699, "step": 725 }, { "epoch": 0.05881399870382372, "grad_norm": 0.059641528874635696, "learning_rate": 2.9404617253948968e-05, "loss": 0.4656, "step": 726 }, { "epoch": 0.0588950097213221, "grad_norm": 0.07775542885065079, "learning_rate": 2.9445119481571487e-05, "loss": 0.4342, "step": 727 }, { "epoch": 0.05897602073882048, "grad_norm": 0.08325430750846863, "learning_rate": 2.9485621709194007e-05, "loss": 0.4743, "step": 728 }, { "epoch": 0.05905703175631886, "grad_norm": 0.05315934866666794, "learning_rate": 2.9526123936816523e-05, "loss": 0.4698, "step": 729 }, { "epoch": 0.05913804277381724, "grad_norm": 0.06893842667341232, "learning_rate": 2.9566626164439043e-05, "loss": 0.4809, "step": 730 }, { "epoch": 0.05921905379131562, "grad_norm": 0.07211416959762573, "learning_rate": 2.9607128392061566e-05, "loss": 0.4471, "step": 731 }, { "epoch": 0.059300064808814, "grad_norm": 0.07356251776218414, "learning_rate": 2.9647630619684086e-05, "loss": 0.4359, "step": 732 }, { "epoch": 0.05938107582631238, "grad_norm": 0.05225740373134613, "learning_rate": 2.9688132847306605e-05, "loss": 0.4097, "step": 733 }, { "epoch": 0.05946208684381076, "grad_norm": 0.05501040071249008, "learning_rate": 2.972863507492912e-05, "loss": 0.48, "step": 734 }, { "epoch": 0.05954309786130914, "grad_norm": 0.06707523763179779, "learning_rate": 2.976913730255164e-05, "loss": 0.4669, "step": 735 }, { "epoch": 0.059624108878807515, "grad_norm": 0.048934899270534515, "learning_rate": 2.980963953017416e-05, "loss": 0.429, "step": 736 }, { "epoch": 0.0597051198963059, "grad_norm": 0.07759115099906921, "learning_rate": 2.985014175779668e-05, "loss": 0.5033, "step": 737 }, { "epoch": 0.059786130913804275, "grad_norm": 0.0644378662109375, "learning_rate": 2.9890643985419197e-05, "loss": 0.4323, "step": 738 }, { "epoch": 0.05986714193130266, "grad_norm": 0.06665252149105072, "learning_rate": 2.9931146213041717e-05, "loss": 0.526, "step": 739 }, { "epoch": 0.059948152948801035, "grad_norm": 0.0821952298283577, "learning_rate": 2.997164844066424e-05, "loss": 0.4263, "step": 740 }, { "epoch": 0.06002916396629942, "grad_norm": 0.09428700804710388, "learning_rate": 3.001215066828676e-05, "loss": 0.4644, "step": 741 }, { "epoch": 0.060110174983797796, "grad_norm": 0.07214829325675964, "learning_rate": 3.005265289590928e-05, "loss": 0.4371, "step": 742 }, { "epoch": 0.06019118600129618, "grad_norm": 0.0625072717666626, "learning_rate": 3.0093155123531795e-05, "loss": 0.4382, "step": 743 }, { "epoch": 0.060272197018794556, "grad_norm": 0.07718163728713989, "learning_rate": 3.0133657351154315e-05, "loss": 0.4709, "step": 744 }, { "epoch": 0.06035320803629293, "grad_norm": 0.08825402706861496, "learning_rate": 3.0174159578776835e-05, "loss": 0.4502, "step": 745 }, { "epoch": 0.060434219053791316, "grad_norm": 0.08489922434091568, "learning_rate": 3.021466180639935e-05, "loss": 0.4801, "step": 746 }, { "epoch": 0.06051523007128969, "grad_norm": 0.06677371263504028, "learning_rate": 3.025516403402187e-05, "loss": 0.4335, "step": 747 }, { "epoch": 0.060596241088788076, "grad_norm": 0.06519728899002075, "learning_rate": 3.029566626164439e-05, "loss": 0.4419, "step": 748 }, { "epoch": 0.06067725210628645, "grad_norm": 0.07708553224802017, "learning_rate": 3.033616848926691e-05, "loss": 0.489, "step": 749 }, { "epoch": 0.060758263123784836, "grad_norm": 0.06495978683233261, "learning_rate": 3.0376670716889433e-05, "loss": 0.4339, "step": 750 }, { "epoch": 0.06083927414128321, "grad_norm": 0.07273128628730774, "learning_rate": 3.0417172944511953e-05, "loss": 0.4651, "step": 751 }, { "epoch": 0.0609202851587816, "grad_norm": 0.06824234127998352, "learning_rate": 3.045767517213447e-05, "loss": 0.4655, "step": 752 }, { "epoch": 0.06100129617627997, "grad_norm": 0.08591991662979126, "learning_rate": 3.049817739975699e-05, "loss": 0.4789, "step": 753 }, { "epoch": 0.06108230719377836, "grad_norm": 0.05332494154572487, "learning_rate": 3.0538679627379505e-05, "loss": 0.4561, "step": 754 }, { "epoch": 0.06116331821127673, "grad_norm": 0.0581384003162384, "learning_rate": 3.0579181855002025e-05, "loss": 0.4882, "step": 755 }, { "epoch": 0.06124432922877511, "grad_norm": 0.07281646877527237, "learning_rate": 3.0619684082624544e-05, "loss": 0.4728, "step": 756 }, { "epoch": 0.061325340246273494, "grad_norm": 0.06750751286745071, "learning_rate": 3.0660186310247064e-05, "loss": 0.4936, "step": 757 }, { "epoch": 0.06140635126377187, "grad_norm": 0.08174298703670502, "learning_rate": 3.0700688537869584e-05, "loss": 0.5091, "step": 758 }, { "epoch": 0.061487362281270254, "grad_norm": 0.061219800263643265, "learning_rate": 3.07411907654921e-05, "loss": 0.4394, "step": 759 }, { "epoch": 0.06156837329876863, "grad_norm": 0.06408350169658661, "learning_rate": 3.078169299311462e-05, "loss": 0.4342, "step": 760 }, { "epoch": 0.061649384316267014, "grad_norm": 0.06699630618095398, "learning_rate": 3.082219522073714e-05, "loss": 0.4705, "step": 761 }, { "epoch": 0.06173039533376539, "grad_norm": 0.05425221472978592, "learning_rate": 3.086269744835966e-05, "loss": 0.454, "step": 762 }, { "epoch": 0.061811406351263774, "grad_norm": 0.07127571105957031, "learning_rate": 3.090319967598218e-05, "loss": 0.4268, "step": 763 }, { "epoch": 0.06189241736876215, "grad_norm": 0.07275000214576721, "learning_rate": 3.09437019036047e-05, "loss": 0.5141, "step": 764 }, { "epoch": 0.061973428386260535, "grad_norm": 0.054138775914907455, "learning_rate": 3.098420413122722e-05, "loss": 0.3759, "step": 765 }, { "epoch": 0.06205443940375891, "grad_norm": 0.10909716784954071, "learning_rate": 3.1024706358849734e-05, "loss": 0.4651, "step": 766 }, { "epoch": 0.06213545042125729, "grad_norm": 0.05660035461187363, "learning_rate": 3.1065208586472254e-05, "loss": 0.4307, "step": 767 }, { "epoch": 0.06221646143875567, "grad_norm": 0.06873264163732529, "learning_rate": 3.1105710814094773e-05, "loss": 0.3971, "step": 768 }, { "epoch": 0.06229747245625405, "grad_norm": 0.06368529796600342, "learning_rate": 3.11462130417173e-05, "loss": 0.4432, "step": 769 }, { "epoch": 0.06237848347375243, "grad_norm": 0.07408467680215836, "learning_rate": 3.118671526933982e-05, "loss": 0.454, "step": 770 }, { "epoch": 0.06245949449125081, "grad_norm": 0.053322043269872665, "learning_rate": 3.122721749696233e-05, "loss": 0.4304, "step": 771 }, { "epoch": 0.06254050550874919, "grad_norm": 0.07185545563697815, "learning_rate": 3.126771972458485e-05, "loss": 0.428, "step": 772 }, { "epoch": 0.06262151652624758, "grad_norm": 0.05358489975333214, "learning_rate": 3.130822195220737e-05, "loss": 0.4019, "step": 773 }, { "epoch": 0.06270252754374595, "grad_norm": 0.06032414361834526, "learning_rate": 3.134872417982989e-05, "loss": 0.4216, "step": 774 }, { "epoch": 0.06278353856124433, "grad_norm": 0.08076735585927963, "learning_rate": 3.138922640745241e-05, "loss": 0.4212, "step": 775 }, { "epoch": 0.06286454957874271, "grad_norm": 0.06268110126256943, "learning_rate": 3.142972863507493e-05, "loss": 0.4413, "step": 776 }, { "epoch": 0.06294556059624108, "grad_norm": 0.06503452360630035, "learning_rate": 3.147023086269745e-05, "loss": 0.4647, "step": 777 }, { "epoch": 0.06302657161373947, "grad_norm": 0.048685915768146515, "learning_rate": 3.151073309031997e-05, "loss": 0.4504, "step": 778 }, { "epoch": 0.06310758263123785, "grad_norm": 0.074101522564888, "learning_rate": 3.155123531794249e-05, "loss": 0.4652, "step": 779 }, { "epoch": 0.06318859364873623, "grad_norm": 0.05946307256817818, "learning_rate": 3.159173754556501e-05, "loss": 0.4551, "step": 780 }, { "epoch": 0.0632696046662346, "grad_norm": 0.06036413088440895, "learning_rate": 3.163223977318753e-05, "loss": 0.44, "step": 781 }, { "epoch": 0.06335061568373299, "grad_norm": 0.06359696388244629, "learning_rate": 3.167274200081005e-05, "loss": 0.4049, "step": 782 }, { "epoch": 0.06343162670123137, "grad_norm": 0.09583299607038498, "learning_rate": 3.171324422843256e-05, "loss": 0.4147, "step": 783 }, { "epoch": 0.06351263771872975, "grad_norm": 0.07221318781375885, "learning_rate": 3.175374645605508e-05, "loss": 0.5164, "step": 784 }, { "epoch": 0.06359364873622812, "grad_norm": 0.07264591008424759, "learning_rate": 3.17942486836776e-05, "loss": 0.4692, "step": 785 }, { "epoch": 0.0636746597537265, "grad_norm": 0.07795893400907516, "learning_rate": 3.183475091130012e-05, "loss": 0.4213, "step": 786 }, { "epoch": 0.06375567077122489, "grad_norm": 0.07821632921695709, "learning_rate": 3.187525313892265e-05, "loss": 0.4439, "step": 787 }, { "epoch": 0.06383668178872326, "grad_norm": 0.062484800815582275, "learning_rate": 3.191575536654517e-05, "loss": 0.4532, "step": 788 }, { "epoch": 0.06391769280622164, "grad_norm": 0.07076051086187363, "learning_rate": 3.195625759416768e-05, "loss": 0.478, "step": 789 }, { "epoch": 0.06399870382372003, "grad_norm": 0.06853286176919937, "learning_rate": 3.19967598217902e-05, "loss": 0.4268, "step": 790 }, { "epoch": 0.06407971484121841, "grad_norm": 0.06096012890338898, "learning_rate": 3.203726204941272e-05, "loss": 0.4349, "step": 791 }, { "epoch": 0.06416072585871678, "grad_norm": 0.08760856091976166, "learning_rate": 3.207776427703524e-05, "loss": 0.4645, "step": 792 }, { "epoch": 0.06424173687621516, "grad_norm": 0.10125716030597687, "learning_rate": 3.211826650465776e-05, "loss": 0.4728, "step": 793 }, { "epoch": 0.06432274789371355, "grad_norm": 0.067176952958107, "learning_rate": 3.215876873228028e-05, "loss": 0.4516, "step": 794 }, { "epoch": 0.06440375891121193, "grad_norm": 0.06922373175621033, "learning_rate": 3.219927095990279e-05, "loss": 0.4323, "step": 795 }, { "epoch": 0.0644847699287103, "grad_norm": 0.04995464161038399, "learning_rate": 3.223977318752531e-05, "loss": 0.4426, "step": 796 }, { "epoch": 0.06456578094620868, "grad_norm": 0.07050605118274689, "learning_rate": 3.228027541514784e-05, "loss": 0.4209, "step": 797 }, { "epoch": 0.06464679196370707, "grad_norm": 0.05389539897441864, "learning_rate": 3.232077764277036e-05, "loss": 0.4226, "step": 798 }, { "epoch": 0.06472780298120544, "grad_norm": 0.07226435095071793, "learning_rate": 3.2361279870392876e-05, "loss": 0.4906, "step": 799 }, { "epoch": 0.06480881399870382, "grad_norm": 0.0973285585641861, "learning_rate": 3.2401782098015396e-05, "loss": 0.4134, "step": 800 }, { "epoch": 0.0648898250162022, "grad_norm": 0.06966665387153625, "learning_rate": 3.244228432563791e-05, "loss": 0.4223, "step": 801 }, { "epoch": 0.06497083603370059, "grad_norm": 0.08046171069145203, "learning_rate": 3.248278655326043e-05, "loss": 0.4279, "step": 802 }, { "epoch": 0.06505184705119896, "grad_norm": 0.06887330114841461, "learning_rate": 3.252328878088295e-05, "loss": 0.4018, "step": 803 }, { "epoch": 0.06513285806869734, "grad_norm": 0.0826861634850502, "learning_rate": 3.256379100850547e-05, "loss": 0.4329, "step": 804 }, { "epoch": 0.06521386908619572, "grad_norm": 0.062017880380153656, "learning_rate": 3.260429323612799e-05, "loss": 0.3822, "step": 805 }, { "epoch": 0.06529488010369411, "grad_norm": 0.08863137662410736, "learning_rate": 3.264479546375051e-05, "loss": 0.4338, "step": 806 }, { "epoch": 0.06537589112119248, "grad_norm": 0.08286241441965103, "learning_rate": 3.268529769137303e-05, "loss": 0.4763, "step": 807 }, { "epoch": 0.06545690213869086, "grad_norm": 0.07620840519666672, "learning_rate": 3.272579991899555e-05, "loss": 0.3837, "step": 808 }, { "epoch": 0.06553791315618925, "grad_norm": 0.07819268852472305, "learning_rate": 3.2766302146618066e-05, "loss": 0.4757, "step": 809 }, { "epoch": 0.06561892417368761, "grad_norm": 0.052711982280015945, "learning_rate": 3.2806804374240586e-05, "loss": 0.4293, "step": 810 }, { "epoch": 0.065699935191186, "grad_norm": 0.0609319843351841, "learning_rate": 3.2847306601863106e-05, "loss": 0.4521, "step": 811 }, { "epoch": 0.06578094620868438, "grad_norm": 0.07290962338447571, "learning_rate": 3.2887808829485625e-05, "loss": 0.4798, "step": 812 }, { "epoch": 0.06586195722618277, "grad_norm": 0.07731480151414871, "learning_rate": 3.292831105710814e-05, "loss": 0.4529, "step": 813 }, { "epoch": 0.06594296824368114, "grad_norm": 0.05770780146121979, "learning_rate": 3.296881328473066e-05, "loss": 0.4183, "step": 814 }, { "epoch": 0.06602397926117952, "grad_norm": 0.08070331811904907, "learning_rate": 3.300931551235318e-05, "loss": 0.4718, "step": 815 }, { "epoch": 0.0661049902786779, "grad_norm": 0.09154818207025528, "learning_rate": 3.3049817739975704e-05, "loss": 0.4276, "step": 816 }, { "epoch": 0.06618600129617629, "grad_norm": 0.05981754884123802, "learning_rate": 3.3090319967598224e-05, "loss": 0.4542, "step": 817 }, { "epoch": 0.06626701231367466, "grad_norm": 0.06444111466407776, "learning_rate": 3.3130822195220737e-05, "loss": 0.4409, "step": 818 }, { "epoch": 0.06634802333117304, "grad_norm": 0.0756894052028656, "learning_rate": 3.3171324422843256e-05, "loss": 0.458, "step": 819 }, { "epoch": 0.06642903434867142, "grad_norm": 0.0637042224407196, "learning_rate": 3.3211826650465776e-05, "loss": 0.4301, "step": 820 }, { "epoch": 0.06651004536616979, "grad_norm": 0.05820478871464729, "learning_rate": 3.3252328878088296e-05, "loss": 0.4502, "step": 821 }, { "epoch": 0.06659105638366818, "grad_norm": 0.07865587621927261, "learning_rate": 3.3292831105710815e-05, "loss": 0.4105, "step": 822 }, { "epoch": 0.06667206740116656, "grad_norm": 0.06652259081602097, "learning_rate": 3.3333333333333335e-05, "loss": 0.4158, "step": 823 }, { "epoch": 0.06675307841866494, "grad_norm": 0.06405427306890488, "learning_rate": 3.3373835560955855e-05, "loss": 0.4431, "step": 824 }, { "epoch": 0.06683408943616331, "grad_norm": 0.04830295965075493, "learning_rate": 3.3414337788578374e-05, "loss": 0.4088, "step": 825 }, { "epoch": 0.0669151004536617, "grad_norm": 0.06837856024503708, "learning_rate": 3.3454840016200894e-05, "loss": 0.4458, "step": 826 }, { "epoch": 0.06699611147116008, "grad_norm": 0.07393626123666763, "learning_rate": 3.3495342243823414e-05, "loss": 0.4726, "step": 827 }, { "epoch": 0.06707712248865846, "grad_norm": 0.11590786278247833, "learning_rate": 3.353584447144593e-05, "loss": 0.4804, "step": 828 }, { "epoch": 0.06715813350615683, "grad_norm": 0.05595247447490692, "learning_rate": 3.357634669906845e-05, "loss": 0.4547, "step": 829 }, { "epoch": 0.06723914452365522, "grad_norm": 0.07072945684194565, "learning_rate": 3.3616848926690966e-05, "loss": 0.421, "step": 830 }, { "epoch": 0.0673201555411536, "grad_norm": 0.07533926516771317, "learning_rate": 3.3657351154313486e-05, "loss": 0.4573, "step": 831 }, { "epoch": 0.06740116655865197, "grad_norm": 0.06634481251239777, "learning_rate": 3.3697853381936005e-05, "loss": 0.4423, "step": 832 }, { "epoch": 0.06748217757615035, "grad_norm": 0.08560493588447571, "learning_rate": 3.3738355609558525e-05, "loss": 0.4801, "step": 833 }, { "epoch": 0.06756318859364874, "grad_norm": 0.07027285546064377, "learning_rate": 3.377885783718105e-05, "loss": 0.4304, "step": 834 }, { "epoch": 0.06764419961114712, "grad_norm": 0.09453713148832321, "learning_rate": 3.381936006480357e-05, "loss": 0.4349, "step": 835 }, { "epoch": 0.06772521062864549, "grad_norm": 0.06721215695142746, "learning_rate": 3.3859862292426084e-05, "loss": 0.4018, "step": 836 }, { "epoch": 0.06780622164614387, "grad_norm": 0.09632536768913269, "learning_rate": 3.3900364520048604e-05, "loss": 0.4856, "step": 837 }, { "epoch": 0.06788723266364226, "grad_norm": 0.08073796331882477, "learning_rate": 3.394086674767112e-05, "loss": 0.4082, "step": 838 }, { "epoch": 0.06796824368114064, "grad_norm": 0.061706796288490295, "learning_rate": 3.398136897529364e-05, "loss": 0.3996, "step": 839 }, { "epoch": 0.06804925469863901, "grad_norm": 0.046711515635252, "learning_rate": 3.402187120291616e-05, "loss": 0.5058, "step": 840 }, { "epoch": 0.0681302657161374, "grad_norm": 0.057807352393865585, "learning_rate": 3.406237343053868e-05, "loss": 0.4455, "step": 841 }, { "epoch": 0.06821127673363578, "grad_norm": 0.08929713815450668, "learning_rate": 3.41028756581612e-05, "loss": 0.4552, "step": 842 }, { "epoch": 0.06829228775113415, "grad_norm": 0.06823332607746124, "learning_rate": 3.4143377885783715e-05, "loss": 0.4267, "step": 843 }, { "epoch": 0.06837329876863253, "grad_norm": 0.07023243606090546, "learning_rate": 3.418388011340624e-05, "loss": 0.4496, "step": 844 }, { "epoch": 0.06845430978613092, "grad_norm": 0.08038611710071564, "learning_rate": 3.422438234102876e-05, "loss": 0.4268, "step": 845 }, { "epoch": 0.0685353208036293, "grad_norm": 0.09449832141399384, "learning_rate": 3.426488456865128e-05, "loss": 0.398, "step": 846 }, { "epoch": 0.06861633182112767, "grad_norm": 0.08607659488916397, "learning_rate": 3.43053867962738e-05, "loss": 0.4441, "step": 847 }, { "epoch": 0.06869734283862605, "grad_norm": 0.06817245483398438, "learning_rate": 3.434588902389631e-05, "loss": 0.4163, "step": 848 }, { "epoch": 0.06877835385612444, "grad_norm": 0.07797311991453171, "learning_rate": 3.438639125151883e-05, "loss": 0.4569, "step": 849 }, { "epoch": 0.06885936487362282, "grad_norm": 0.08795508742332458, "learning_rate": 3.442689347914135e-05, "loss": 0.4423, "step": 850 }, { "epoch": 0.06894037589112119, "grad_norm": 0.08801967650651932, "learning_rate": 3.446739570676387e-05, "loss": 0.4253, "step": 851 }, { "epoch": 0.06902138690861957, "grad_norm": 0.07507802546024323, "learning_rate": 3.450789793438639e-05, "loss": 0.39, "step": 852 }, { "epoch": 0.06910239792611796, "grad_norm": 0.09158620238304138, "learning_rate": 3.454840016200892e-05, "loss": 0.4405, "step": 853 }, { "epoch": 0.06918340894361633, "grad_norm": 0.07361084222793579, "learning_rate": 3.458890238963143e-05, "loss": 0.4276, "step": 854 }, { "epoch": 0.06926441996111471, "grad_norm": 0.07052503526210785, "learning_rate": 3.462940461725395e-05, "loss": 0.4412, "step": 855 }, { "epoch": 0.06934543097861309, "grad_norm": 0.08575800061225891, "learning_rate": 3.466990684487647e-05, "loss": 0.4585, "step": 856 }, { "epoch": 0.06942644199611148, "grad_norm": 0.10039302706718445, "learning_rate": 3.471040907249899e-05, "loss": 0.4771, "step": 857 }, { "epoch": 0.06950745301360985, "grad_norm": 0.08235925436019897, "learning_rate": 3.475091130012151e-05, "loss": 0.4749, "step": 858 }, { "epoch": 0.06958846403110823, "grad_norm": 0.06944045424461365, "learning_rate": 3.479141352774403e-05, "loss": 0.4516, "step": 859 }, { "epoch": 0.06966947504860661, "grad_norm": 0.05629153922200203, "learning_rate": 3.483191575536654e-05, "loss": 0.4347, "step": 860 }, { "epoch": 0.069750486066105, "grad_norm": 0.0687924176454544, "learning_rate": 3.487241798298906e-05, "loss": 0.4505, "step": 861 }, { "epoch": 0.06983149708360337, "grad_norm": 0.07229902595281601, "learning_rate": 3.491292021061158e-05, "loss": 0.4455, "step": 862 }, { "epoch": 0.06991250810110175, "grad_norm": 0.07076973468065262, "learning_rate": 3.495342243823411e-05, "loss": 0.3961, "step": 863 }, { "epoch": 0.06999351911860013, "grad_norm": 0.07589247077703476, "learning_rate": 3.499392466585663e-05, "loss": 0.4375, "step": 864 }, { "epoch": 0.0700745301360985, "grad_norm": 0.09876564145088196, "learning_rate": 3.503442689347915e-05, "loss": 0.4497, "step": 865 }, { "epoch": 0.07015554115359689, "grad_norm": 0.09799569845199585, "learning_rate": 3.507492912110166e-05, "loss": 0.3982, "step": 866 }, { "epoch": 0.07023655217109527, "grad_norm": 0.07569810003042221, "learning_rate": 3.511543134872418e-05, "loss": 0.4153, "step": 867 }, { "epoch": 0.07031756318859365, "grad_norm": 0.07665053009986877, "learning_rate": 3.51559335763467e-05, "loss": 0.4038, "step": 868 }, { "epoch": 0.07039857420609202, "grad_norm": 0.07011052966117859, "learning_rate": 3.519643580396922e-05, "loss": 0.4528, "step": 869 }, { "epoch": 0.07047958522359041, "grad_norm": 0.10473107546567917, "learning_rate": 3.523693803159174e-05, "loss": 0.4633, "step": 870 }, { "epoch": 0.07056059624108879, "grad_norm": 0.07926227152347565, "learning_rate": 3.527744025921426e-05, "loss": 0.4294, "step": 871 }, { "epoch": 0.07064160725858717, "grad_norm": 0.09561335295438766, "learning_rate": 3.531794248683678e-05, "loss": 0.407, "step": 872 }, { "epoch": 0.07072261827608554, "grad_norm": 0.06910093128681183, "learning_rate": 3.53584447144593e-05, "loss": 0.4598, "step": 873 }, { "epoch": 0.07080362929358393, "grad_norm": 0.07909014821052551, "learning_rate": 3.539894694208182e-05, "loss": 0.4522, "step": 874 }, { "epoch": 0.07088464031108231, "grad_norm": 0.0774121955037117, "learning_rate": 3.543944916970434e-05, "loss": 0.3741, "step": 875 }, { "epoch": 0.07096565132858068, "grad_norm": 0.07421425729990005, "learning_rate": 3.547995139732686e-05, "loss": 0.4193, "step": 876 }, { "epoch": 0.07104666234607906, "grad_norm": 0.0737396851181984, "learning_rate": 3.552045362494938e-05, "loss": 0.466, "step": 877 }, { "epoch": 0.07112767336357745, "grad_norm": 0.0909152403473854, "learning_rate": 3.556095585257189e-05, "loss": 0.4667, "step": 878 }, { "epoch": 0.07120868438107583, "grad_norm": 0.07740772515535355, "learning_rate": 3.560145808019441e-05, "loss": 0.4708, "step": 879 }, { "epoch": 0.0712896953985742, "grad_norm": 0.09760919213294983, "learning_rate": 3.564196030781693e-05, "loss": 0.4607, "step": 880 }, { "epoch": 0.07137070641607259, "grad_norm": 0.07436461001634598, "learning_rate": 3.568246253543945e-05, "loss": 0.385, "step": 881 }, { "epoch": 0.07145171743357097, "grad_norm": 0.09642557799816132, "learning_rate": 3.5722964763061975e-05, "loss": 0.4432, "step": 882 }, { "epoch": 0.07153272845106935, "grad_norm": 0.07746947556734085, "learning_rate": 3.576346699068449e-05, "loss": 0.3736, "step": 883 }, { "epoch": 0.07161373946856772, "grad_norm": 0.07410507649183273, "learning_rate": 3.580396921830701e-05, "loss": 0.3902, "step": 884 }, { "epoch": 0.0716947504860661, "grad_norm": 0.08017998188734055, "learning_rate": 3.584447144592953e-05, "loss": 0.4489, "step": 885 }, { "epoch": 0.07177576150356449, "grad_norm": 0.08439603447914124, "learning_rate": 3.588497367355205e-05, "loss": 0.4222, "step": 886 }, { "epoch": 0.07185677252106286, "grad_norm": 0.06829867511987686, "learning_rate": 3.592547590117457e-05, "loss": 0.4425, "step": 887 }, { "epoch": 0.07193778353856124, "grad_norm": 0.0824725404381752, "learning_rate": 3.5965978128797086e-05, "loss": 0.4839, "step": 888 }, { "epoch": 0.07201879455605963, "grad_norm": 0.08847682923078537, "learning_rate": 3.6006480356419606e-05, "loss": 0.4496, "step": 889 }, { "epoch": 0.07209980557355801, "grad_norm": 0.08362285047769547, "learning_rate": 3.604698258404212e-05, "loss": 0.3894, "step": 890 }, { "epoch": 0.07218081659105638, "grad_norm": 0.13280020654201508, "learning_rate": 3.6087484811664645e-05, "loss": 0.4776, "step": 891 }, { "epoch": 0.07226182760855476, "grad_norm": 0.0723172277212143, "learning_rate": 3.6127987039287165e-05, "loss": 0.4236, "step": 892 }, { "epoch": 0.07234283862605315, "grad_norm": 0.08192502707242966, "learning_rate": 3.6168489266909685e-05, "loss": 0.484, "step": 893 }, { "epoch": 0.07242384964355152, "grad_norm": 0.07791081815958023, "learning_rate": 3.6208991494532204e-05, "loss": 0.4526, "step": 894 }, { "epoch": 0.0725048606610499, "grad_norm": 0.0736956000328064, "learning_rate": 3.624949372215472e-05, "loss": 0.4567, "step": 895 }, { "epoch": 0.07258587167854828, "grad_norm": 0.08527030050754547, "learning_rate": 3.628999594977724e-05, "loss": 0.4406, "step": 896 }, { "epoch": 0.07266688269604667, "grad_norm": 0.08626183122396469, "learning_rate": 3.6330498177399757e-05, "loss": 0.4275, "step": 897 }, { "epoch": 0.07274789371354504, "grad_norm": 0.07522546499967575, "learning_rate": 3.6371000405022276e-05, "loss": 0.4164, "step": 898 }, { "epoch": 0.07282890473104342, "grad_norm": 0.06778135895729065, "learning_rate": 3.6411502632644796e-05, "loss": 0.4287, "step": 899 }, { "epoch": 0.0729099157485418, "grad_norm": 0.07022549957036972, "learning_rate": 3.645200486026732e-05, "loss": 0.41, "step": 900 }, { "epoch": 0.07299092676604019, "grad_norm": 0.08264485746622086, "learning_rate": 3.6492507087889835e-05, "loss": 0.4176, "step": 901 }, { "epoch": 0.07307193778353856, "grad_norm": 0.08415798097848892, "learning_rate": 3.6533009315512355e-05, "loss": 0.4389, "step": 902 }, { "epoch": 0.07315294880103694, "grad_norm": 0.07540509849786758, "learning_rate": 3.6573511543134875e-05, "loss": 0.403, "step": 903 }, { "epoch": 0.07323395981853532, "grad_norm": 0.07358894497156143, "learning_rate": 3.6614013770757394e-05, "loss": 0.4001, "step": 904 }, { "epoch": 0.0733149708360337, "grad_norm": 0.06488876789808273, "learning_rate": 3.6654515998379914e-05, "loss": 0.4223, "step": 905 }, { "epoch": 0.07339598185353208, "grad_norm": 0.07415130734443665, "learning_rate": 3.6695018226002434e-05, "loss": 0.435, "step": 906 }, { "epoch": 0.07347699287103046, "grad_norm": 0.06136851757764816, "learning_rate": 3.6735520453624946e-05, "loss": 0.3851, "step": 907 }, { "epoch": 0.07355800388852884, "grad_norm": 0.08178620040416718, "learning_rate": 3.6776022681247466e-05, "loss": 0.4772, "step": 908 }, { "epoch": 0.07363901490602721, "grad_norm": 0.07980084419250488, "learning_rate": 3.6816524908869986e-05, "loss": 0.4604, "step": 909 }, { "epoch": 0.0737200259235256, "grad_norm": 0.08928181976079941, "learning_rate": 3.685702713649251e-05, "loss": 0.4252, "step": 910 }, { "epoch": 0.07380103694102398, "grad_norm": 0.0732206255197525, "learning_rate": 3.689752936411503e-05, "loss": 0.4495, "step": 911 }, { "epoch": 0.07388204795852236, "grad_norm": 0.08373378217220306, "learning_rate": 3.693803159173755e-05, "loss": 0.4705, "step": 912 }, { "epoch": 0.07396305897602073, "grad_norm": 0.08140382915735245, "learning_rate": 3.6978533819360064e-05, "loss": 0.3726, "step": 913 }, { "epoch": 0.07404406999351912, "grad_norm": 0.07321371883153915, "learning_rate": 3.7019036046982584e-05, "loss": 0.3908, "step": 914 }, { "epoch": 0.0741250810110175, "grad_norm": 0.0650591105222702, "learning_rate": 3.7059538274605104e-05, "loss": 0.4489, "step": 915 }, { "epoch": 0.07420609202851587, "grad_norm": 0.06934455037117004, "learning_rate": 3.7100040502227623e-05, "loss": 0.3858, "step": 916 }, { "epoch": 0.07428710304601426, "grad_norm": 0.060079026967287064, "learning_rate": 3.714054272985014e-05, "loss": 0.4396, "step": 917 }, { "epoch": 0.07436811406351264, "grad_norm": 0.0832790955901146, "learning_rate": 3.718104495747266e-05, "loss": 0.4441, "step": 918 }, { "epoch": 0.07444912508101102, "grad_norm": 0.08325023204088211, "learning_rate": 3.722154718509518e-05, "loss": 0.4077, "step": 919 }, { "epoch": 0.07453013609850939, "grad_norm": 0.0719456821680069, "learning_rate": 3.72620494127177e-05, "loss": 0.4646, "step": 920 }, { "epoch": 0.07461114711600778, "grad_norm": 0.0605672225356102, "learning_rate": 3.730255164034022e-05, "loss": 0.4334, "step": 921 }, { "epoch": 0.07469215813350616, "grad_norm": 0.06589698046445847, "learning_rate": 3.734305386796274e-05, "loss": 0.3967, "step": 922 }, { "epoch": 0.07477316915100454, "grad_norm": 0.06026124954223633, "learning_rate": 3.738355609558526e-05, "loss": 0.3843, "step": 923 }, { "epoch": 0.07485418016850291, "grad_norm": 0.07374893873929977, "learning_rate": 3.742405832320778e-05, "loss": 0.4454, "step": 924 }, { "epoch": 0.0749351911860013, "grad_norm": 0.09211552143096924, "learning_rate": 3.7464560550830294e-05, "loss": 0.4193, "step": 925 }, { "epoch": 0.07501620220349968, "grad_norm": 0.09048813581466675, "learning_rate": 3.750506277845281e-05, "loss": 0.4556, "step": 926 }, { "epoch": 0.07509721322099805, "grad_norm": 0.06524454802274704, "learning_rate": 3.754556500607533e-05, "loss": 0.4074, "step": 927 }, { "epoch": 0.07517822423849643, "grad_norm": 0.09637756645679474, "learning_rate": 3.758606723369785e-05, "loss": 0.5207, "step": 928 }, { "epoch": 0.07525923525599482, "grad_norm": 0.07778630405664444, "learning_rate": 3.762656946132038e-05, "loss": 0.4447, "step": 929 }, { "epoch": 0.0753402462734932, "grad_norm": 0.07583451271057129, "learning_rate": 3.766707168894289e-05, "loss": 0.4346, "step": 930 }, { "epoch": 0.07542125729099157, "grad_norm": 0.06921878457069397, "learning_rate": 3.770757391656541e-05, "loss": 0.4793, "step": 931 }, { "epoch": 0.07550226830848995, "grad_norm": 0.057142313569784164, "learning_rate": 3.774807614418793e-05, "loss": 0.4345, "step": 932 }, { "epoch": 0.07558327932598834, "grad_norm": 0.06628698110580444, "learning_rate": 3.778857837181045e-05, "loss": 0.393, "step": 933 }, { "epoch": 0.07566429034348672, "grad_norm": 0.07588442414999008, "learning_rate": 3.782908059943297e-05, "loss": 0.4236, "step": 934 }, { "epoch": 0.07574530136098509, "grad_norm": 0.0772482231259346, "learning_rate": 3.786958282705549e-05, "loss": 0.3927, "step": 935 }, { "epoch": 0.07582631237848347, "grad_norm": 0.07052788883447647, "learning_rate": 3.791008505467801e-05, "loss": 0.4326, "step": 936 }, { "epoch": 0.07590732339598186, "grad_norm": 0.06511484831571579, "learning_rate": 3.795058728230052e-05, "loss": 0.4243, "step": 937 }, { "epoch": 0.07598833441348023, "grad_norm": 0.13957849144935608, "learning_rate": 3.799108950992305e-05, "loss": 0.403, "step": 938 }, { "epoch": 0.07606934543097861, "grad_norm": 0.07951390743255615, "learning_rate": 3.803159173754557e-05, "loss": 0.4815, "step": 939 }, { "epoch": 0.076150356448477, "grad_norm": 0.07832984626293182, "learning_rate": 3.807209396516809e-05, "loss": 0.4766, "step": 940 }, { "epoch": 0.07623136746597538, "grad_norm": 0.09598967432975769, "learning_rate": 3.811259619279061e-05, "loss": 0.4121, "step": 941 }, { "epoch": 0.07631237848347375, "grad_norm": 0.09158296138048172, "learning_rate": 3.815309842041313e-05, "loss": 0.3938, "step": 942 }, { "epoch": 0.07639338950097213, "grad_norm": 0.09140002727508545, "learning_rate": 3.819360064803564e-05, "loss": 0.4612, "step": 943 }, { "epoch": 0.07647440051847051, "grad_norm": 0.06446819752454758, "learning_rate": 3.823410287565816e-05, "loss": 0.3948, "step": 944 }, { "epoch": 0.0765554115359689, "grad_norm": 0.09059543907642365, "learning_rate": 3.827460510328068e-05, "loss": 0.4104, "step": 945 }, { "epoch": 0.07663642255346727, "grad_norm": 0.09735757112503052, "learning_rate": 3.83151073309032e-05, "loss": 0.4497, "step": 946 }, { "epoch": 0.07671743357096565, "grad_norm": 0.05969754979014397, "learning_rate": 3.835560955852572e-05, "loss": 0.4121, "step": 947 }, { "epoch": 0.07679844458846403, "grad_norm": 0.07921057939529419, "learning_rate": 3.839611178614824e-05, "loss": 0.4194, "step": 948 }, { "epoch": 0.0768794556059624, "grad_norm": 0.08428069949150085, "learning_rate": 3.843661401377076e-05, "loss": 0.4155, "step": 949 }, { "epoch": 0.07696046662346079, "grad_norm": 0.09198450297117233, "learning_rate": 3.847711624139328e-05, "loss": 0.4305, "step": 950 }, { "epoch": 0.07704147764095917, "grad_norm": 0.05633941665291786, "learning_rate": 3.85176184690158e-05, "loss": 0.3704, "step": 951 }, { "epoch": 0.07712248865845756, "grad_norm": 0.0748986154794693, "learning_rate": 3.855812069663832e-05, "loss": 0.4343, "step": 952 }, { "epoch": 0.07720349967595592, "grad_norm": 0.09329600632190704, "learning_rate": 3.859862292426084e-05, "loss": 0.4409, "step": 953 }, { "epoch": 0.07728451069345431, "grad_norm": 0.059523507952690125, "learning_rate": 3.863912515188336e-05, "loss": 0.4032, "step": 954 }, { "epoch": 0.07736552171095269, "grad_norm": 0.09699185192584991, "learning_rate": 3.867962737950587e-05, "loss": 0.4062, "step": 955 }, { "epoch": 0.07744653272845108, "grad_norm": 0.08740203082561493, "learning_rate": 3.872012960712839e-05, "loss": 0.428, "step": 956 }, { "epoch": 0.07752754374594945, "grad_norm": 0.08077336847782135, "learning_rate": 3.8760631834750916e-05, "loss": 0.4207, "step": 957 }, { "epoch": 0.07760855476344783, "grad_norm": 0.08518598228693008, "learning_rate": 3.8801134062373436e-05, "loss": 0.4363, "step": 958 }, { "epoch": 0.07768956578094621, "grad_norm": 0.09720475971698761, "learning_rate": 3.8841636289995956e-05, "loss": 0.4176, "step": 959 }, { "epoch": 0.07777057679844458, "grad_norm": 0.05982989817857742, "learning_rate": 3.888213851761847e-05, "loss": 0.3733, "step": 960 }, { "epoch": 0.07785158781594297, "grad_norm": 0.1074175164103508, "learning_rate": 3.892264074524099e-05, "loss": 0.4032, "step": 961 }, { "epoch": 0.07793259883344135, "grad_norm": 0.0886072888970375, "learning_rate": 3.896314297286351e-05, "loss": 0.431, "step": 962 }, { "epoch": 0.07801360985093973, "grad_norm": 0.08959907293319702, "learning_rate": 3.900364520048603e-05, "loss": 0.4562, "step": 963 }, { "epoch": 0.0780946208684381, "grad_norm": 0.0632220134139061, "learning_rate": 3.904414742810855e-05, "loss": 0.4105, "step": 964 }, { "epoch": 0.07817563188593649, "grad_norm": 0.0738753005862236, "learning_rate": 3.908464965573107e-05, "loss": 0.4092, "step": 965 }, { "epoch": 0.07825664290343487, "grad_norm": 0.07675029337406158, "learning_rate": 3.9125151883353587e-05, "loss": 0.4765, "step": 966 }, { "epoch": 0.07833765392093325, "grad_norm": 0.07345636188983917, "learning_rate": 3.9165654110976106e-05, "loss": 0.4274, "step": 967 }, { "epoch": 0.07841866493843162, "grad_norm": 0.08332125842571259, "learning_rate": 3.9206156338598626e-05, "loss": 0.3674, "step": 968 }, { "epoch": 0.07849967595593, "grad_norm": 0.09537743031978607, "learning_rate": 3.9246658566221146e-05, "loss": 0.4072, "step": 969 }, { "epoch": 0.07858068697342839, "grad_norm": 0.06771516054868698, "learning_rate": 3.9287160793843665e-05, "loss": 0.42, "step": 970 }, { "epoch": 0.07866169799092676, "grad_norm": 0.07258981466293335, "learning_rate": 3.9327663021466185e-05, "loss": 0.3905, "step": 971 }, { "epoch": 0.07874270900842514, "grad_norm": 0.07407287508249283, "learning_rate": 3.93681652490887e-05, "loss": 0.4602, "step": 972 }, { "epoch": 0.07882372002592353, "grad_norm": 0.07650595903396606, "learning_rate": 3.940866747671122e-05, "loss": 0.3946, "step": 973 }, { "epoch": 0.07890473104342191, "grad_norm": 0.08994881808757782, "learning_rate": 3.944916970433374e-05, "loss": 0.4315, "step": 974 }, { "epoch": 0.07898574206092028, "grad_norm": 0.0726025402545929, "learning_rate": 3.948967193195626e-05, "loss": 0.4127, "step": 975 }, { "epoch": 0.07906675307841866, "grad_norm": 0.0902196392416954, "learning_rate": 3.953017415957878e-05, "loss": 0.3589, "step": 976 }, { "epoch": 0.07914776409591705, "grad_norm": 0.07093965262174606, "learning_rate": 3.95706763872013e-05, "loss": 0.3824, "step": 977 }, { "epoch": 0.07922877511341543, "grad_norm": 0.07622726261615753, "learning_rate": 3.9611178614823816e-05, "loss": 0.3884, "step": 978 }, { "epoch": 0.0793097861309138, "grad_norm": 0.07749678194522858, "learning_rate": 3.9651680842446335e-05, "loss": 0.4681, "step": 979 }, { "epoch": 0.07939079714841218, "grad_norm": 0.0761314406991005, "learning_rate": 3.9692183070068855e-05, "loss": 0.4281, "step": 980 }, { "epoch": 0.07947180816591057, "grad_norm": 0.07970507442951202, "learning_rate": 3.9732685297691375e-05, "loss": 0.4163, "step": 981 }, { "epoch": 0.07955281918340894, "grad_norm": 0.07264453172683716, "learning_rate": 3.9773187525313894e-05, "loss": 0.4161, "step": 982 }, { "epoch": 0.07963383020090732, "grad_norm": 0.09370779246091843, "learning_rate": 3.9813689752936414e-05, "loss": 0.458, "step": 983 }, { "epoch": 0.0797148412184057, "grad_norm": 0.09485704451799393, "learning_rate": 3.985419198055893e-05, "loss": 0.4437, "step": 984 }, { "epoch": 0.07979585223590409, "grad_norm": 0.06327967345714569, "learning_rate": 3.9894694208181453e-05, "loss": 0.4401, "step": 985 }, { "epoch": 0.07987686325340246, "grad_norm": 0.05429242178797722, "learning_rate": 3.993519643580397e-05, "loss": 0.4138, "step": 986 }, { "epoch": 0.07995787427090084, "grad_norm": 0.08034180104732513, "learning_rate": 3.997569866342649e-05, "loss": 0.4735, "step": 987 }, { "epoch": 0.08003888528839923, "grad_norm": 0.12071143090724945, "learning_rate": 4.001620089104901e-05, "loss": 0.477, "step": 988 }, { "epoch": 0.08011989630589761, "grad_norm": 0.08465772867202759, "learning_rate": 4.005670311867153e-05, "loss": 0.417, "step": 989 }, { "epoch": 0.08020090732339598, "grad_norm": 0.05626750737428665, "learning_rate": 4.0097205346294045e-05, "loss": 0.3891, "step": 990 }, { "epoch": 0.08028191834089436, "grad_norm": 0.09615819156169891, "learning_rate": 4.0137707573916565e-05, "loss": 0.4099, "step": 991 }, { "epoch": 0.08036292935839275, "grad_norm": 0.09798921644687653, "learning_rate": 4.0178209801539084e-05, "loss": 0.4212, "step": 992 }, { "epoch": 0.08044394037589112, "grad_norm": 0.07209984958171844, "learning_rate": 4.0218712029161604e-05, "loss": 0.3817, "step": 993 }, { "epoch": 0.0805249513933895, "grad_norm": 0.07342953979969025, "learning_rate": 4.0259214256784124e-05, "loss": 0.4143, "step": 994 }, { "epoch": 0.08060596241088788, "grad_norm": 0.07241872698068619, "learning_rate": 4.0299716484406643e-05, "loss": 0.4504, "step": 995 }, { "epoch": 0.08068697342838627, "grad_norm": 0.08157894760370255, "learning_rate": 4.034021871202916e-05, "loss": 0.4215, "step": 996 }, { "epoch": 0.08076798444588464, "grad_norm": 0.07313471287488937, "learning_rate": 4.038072093965168e-05, "loss": 0.4457, "step": 997 }, { "epoch": 0.08084899546338302, "grad_norm": 0.09657979011535645, "learning_rate": 4.04212231672742e-05, "loss": 0.4337, "step": 998 }, { "epoch": 0.0809300064808814, "grad_norm": 0.0723256841301918, "learning_rate": 4.046172539489672e-05, "loss": 0.4305, "step": 999 }, { "epoch": 0.08101101749837979, "grad_norm": 0.10510041564702988, "learning_rate": 4.050222762251924e-05, "loss": 0.4376, "step": 1000 }, { "epoch": 0.08109202851587816, "grad_norm": 0.07272931188344955, "learning_rate": 4.054272985014176e-05, "loss": 0.4248, "step": 1001 }, { "epoch": 0.08117303953337654, "grad_norm": 0.08269768953323364, "learning_rate": 4.0583232077764274e-05, "loss": 0.4025, "step": 1002 }, { "epoch": 0.08125405055087492, "grad_norm": 0.054768819361925125, "learning_rate": 4.0623734305386794e-05, "loss": 0.4148, "step": 1003 }, { "epoch": 0.08133506156837329, "grad_norm": 0.12584498524665833, "learning_rate": 4.066423653300932e-05, "loss": 0.4429, "step": 1004 }, { "epoch": 0.08141607258587168, "grad_norm": 0.05965457484126091, "learning_rate": 4.070473876063184e-05, "loss": 0.3961, "step": 1005 }, { "epoch": 0.08149708360337006, "grad_norm": 0.07491681724786758, "learning_rate": 4.074524098825436e-05, "loss": 0.432, "step": 1006 }, { "epoch": 0.08157809462086844, "grad_norm": 0.06580981612205505, "learning_rate": 4.078574321587687e-05, "loss": 0.3893, "step": 1007 }, { "epoch": 0.08165910563836681, "grad_norm": 0.07918661832809448, "learning_rate": 4.082624544349939e-05, "loss": 0.4332, "step": 1008 }, { "epoch": 0.0817401166558652, "grad_norm": 0.07634010910987854, "learning_rate": 4.086674767112191e-05, "loss": 0.377, "step": 1009 }, { "epoch": 0.08182112767336358, "grad_norm": 0.0872841626405716, "learning_rate": 4.090724989874443e-05, "loss": 0.4351, "step": 1010 }, { "epoch": 0.08190213869086195, "grad_norm": 0.08204904943704605, "learning_rate": 4.094775212636695e-05, "loss": 0.4632, "step": 1011 }, { "epoch": 0.08198314970836033, "grad_norm": 0.06768794357776642, "learning_rate": 4.098825435398947e-05, "loss": 0.4524, "step": 1012 }, { "epoch": 0.08206416072585872, "grad_norm": 0.07469814270734787, "learning_rate": 4.102875658161199e-05, "loss": 0.3856, "step": 1013 }, { "epoch": 0.0821451717433571, "grad_norm": 0.0659903958439827, "learning_rate": 4.106925880923451e-05, "loss": 0.4345, "step": 1014 }, { "epoch": 0.08222618276085547, "grad_norm": 0.06834937632083893, "learning_rate": 4.110976103685703e-05, "loss": 0.4568, "step": 1015 }, { "epoch": 0.08230719377835385, "grad_norm": 0.08034605532884598, "learning_rate": 4.115026326447955e-05, "loss": 0.3623, "step": 1016 }, { "epoch": 0.08238820479585224, "grad_norm": 0.08727092295885086, "learning_rate": 4.119076549210207e-05, "loss": 0.425, "step": 1017 }, { "epoch": 0.08246921581335062, "grad_norm": 0.06007024645805359, "learning_rate": 4.123126771972459e-05, "loss": 0.3768, "step": 1018 }, { "epoch": 0.08255022683084899, "grad_norm": 0.09328009933233261, "learning_rate": 4.127176994734711e-05, "loss": 0.415, "step": 1019 }, { "epoch": 0.08263123784834737, "grad_norm": 0.0731268972158432, "learning_rate": 4.131227217496962e-05, "loss": 0.4157, "step": 1020 }, { "epoch": 0.08271224886584576, "grad_norm": 0.08149100095033646, "learning_rate": 4.135277440259214e-05, "loss": 0.39, "step": 1021 }, { "epoch": 0.08279325988334413, "grad_norm": 0.07745902240276337, "learning_rate": 4.139327663021466e-05, "loss": 0.4899, "step": 1022 }, { "epoch": 0.08287427090084251, "grad_norm": 0.08260829001665115, "learning_rate": 4.143377885783719e-05, "loss": 0.4592, "step": 1023 }, { "epoch": 0.0829552819183409, "grad_norm": 0.08275768160820007, "learning_rate": 4.147428108545971e-05, "loss": 0.4523, "step": 1024 }, { "epoch": 0.08303629293583928, "grad_norm": 0.08937709033489227, "learning_rate": 4.151478331308222e-05, "loss": 0.4551, "step": 1025 }, { "epoch": 0.08311730395333765, "grad_norm": 0.0691104531288147, "learning_rate": 4.155528554070474e-05, "loss": 0.4338, "step": 1026 }, { "epoch": 0.08319831497083603, "grad_norm": 0.061282236129045486, "learning_rate": 4.159578776832726e-05, "loss": 0.4415, "step": 1027 }, { "epoch": 0.08327932598833442, "grad_norm": 0.08525071293115616, "learning_rate": 4.163628999594978e-05, "loss": 0.4987, "step": 1028 }, { "epoch": 0.0833603370058328, "grad_norm": 0.07696589827537537, "learning_rate": 4.16767922235723e-05, "loss": 0.409, "step": 1029 }, { "epoch": 0.08344134802333117, "grad_norm": 0.06403271853923798, "learning_rate": 4.171729445119482e-05, "loss": 0.3939, "step": 1030 }, { "epoch": 0.08352235904082955, "grad_norm": 0.11858736723661423, "learning_rate": 4.175779667881734e-05, "loss": 0.4389, "step": 1031 }, { "epoch": 0.08360337005832794, "grad_norm": 0.1112351268529892, "learning_rate": 4.179829890643986e-05, "loss": 0.4359, "step": 1032 }, { "epoch": 0.0836843810758263, "grad_norm": 0.07795192301273346, "learning_rate": 4.183880113406238e-05, "loss": 0.3926, "step": 1033 }, { "epoch": 0.08376539209332469, "grad_norm": 0.08661824464797974, "learning_rate": 4.18793033616849e-05, "loss": 0.452, "step": 1034 }, { "epoch": 0.08384640311082307, "grad_norm": 0.07108978182077408, "learning_rate": 4.1919805589307417e-05, "loss": 0.4535, "step": 1035 }, { "epoch": 0.08392741412832146, "grad_norm": 0.08247314393520355, "learning_rate": 4.1960307816929936e-05, "loss": 0.4366, "step": 1036 }, { "epoch": 0.08400842514581983, "grad_norm": 0.07033973187208176, "learning_rate": 4.200081004455245e-05, "loss": 0.3718, "step": 1037 }, { "epoch": 0.08408943616331821, "grad_norm": 0.10139517486095428, "learning_rate": 4.204131227217497e-05, "loss": 0.4682, "step": 1038 }, { "epoch": 0.0841704471808166, "grad_norm": 0.06558583676815033, "learning_rate": 4.208181449979749e-05, "loss": 0.4123, "step": 1039 }, { "epoch": 0.08425145819831498, "grad_norm": 0.06659422814846039, "learning_rate": 4.212231672742001e-05, "loss": 0.4624, "step": 1040 }, { "epoch": 0.08433246921581335, "grad_norm": 0.09026381373405457, "learning_rate": 4.216281895504253e-05, "loss": 0.3827, "step": 1041 }, { "epoch": 0.08441348023331173, "grad_norm": 0.08064718544483185, "learning_rate": 4.2203321182665054e-05, "loss": 0.4509, "step": 1042 }, { "epoch": 0.08449449125081011, "grad_norm": 0.07609561085700989, "learning_rate": 4.224382341028757e-05, "loss": 0.3976, "step": 1043 }, { "epoch": 0.08457550226830848, "grad_norm": 0.07682601362466812, "learning_rate": 4.228432563791009e-05, "loss": 0.418, "step": 1044 }, { "epoch": 0.08465651328580687, "grad_norm": 0.07300432026386261, "learning_rate": 4.2324827865532607e-05, "loss": 0.4237, "step": 1045 }, { "epoch": 0.08473752430330525, "grad_norm": 0.08021645992994308, "learning_rate": 4.2365330093155126e-05, "loss": 0.4395, "step": 1046 }, { "epoch": 0.08481853532080363, "grad_norm": 0.07140376418828964, "learning_rate": 4.2405832320777646e-05, "loss": 0.4582, "step": 1047 }, { "epoch": 0.084899546338302, "grad_norm": 0.08937831968069077, "learning_rate": 4.2446334548400166e-05, "loss": 0.4022, "step": 1048 }, { "epoch": 0.08498055735580039, "grad_norm": 0.0884295105934143, "learning_rate": 4.248683677602268e-05, "loss": 0.4502, "step": 1049 }, { "epoch": 0.08506156837329877, "grad_norm": 0.0966046079993248, "learning_rate": 4.25273390036452e-05, "loss": 0.3892, "step": 1050 }, { "epoch": 0.08514257939079715, "grad_norm": 0.07256580144166946, "learning_rate": 4.2567841231267725e-05, "loss": 0.4142, "step": 1051 }, { "epoch": 0.08522359040829552, "grad_norm": 0.08426011353731155, "learning_rate": 4.2608343458890244e-05, "loss": 0.4438, "step": 1052 }, { "epoch": 0.08530460142579391, "grad_norm": 0.07426105439662933, "learning_rate": 4.2648845686512764e-05, "loss": 0.403, "step": 1053 }, { "epoch": 0.08538561244329229, "grad_norm": 0.09439975768327713, "learning_rate": 4.2689347914135284e-05, "loss": 0.3901, "step": 1054 }, { "epoch": 0.08546662346079066, "grad_norm": 0.07137623429298401, "learning_rate": 4.2729850141757796e-05, "loss": 0.4224, "step": 1055 }, { "epoch": 0.08554763447828904, "grad_norm": 0.07066964358091354, "learning_rate": 4.2770352369380316e-05, "loss": 0.4394, "step": 1056 }, { "epoch": 0.08562864549578743, "grad_norm": 0.06699734926223755, "learning_rate": 4.2810854597002836e-05, "loss": 0.4161, "step": 1057 }, { "epoch": 0.08570965651328581, "grad_norm": 0.06020607799291611, "learning_rate": 4.2851356824625355e-05, "loss": 0.4373, "step": 1058 }, { "epoch": 0.08579066753078418, "grad_norm": 0.06886182725429535, "learning_rate": 4.2891859052247875e-05, "loss": 0.433, "step": 1059 }, { "epoch": 0.08587167854828257, "grad_norm": 0.07165521383285522, "learning_rate": 4.2932361279870395e-05, "loss": 0.382, "step": 1060 }, { "epoch": 0.08595268956578095, "grad_norm": 0.05116075277328491, "learning_rate": 4.2972863507492914e-05, "loss": 0.4373, "step": 1061 }, { "epoch": 0.08603370058327933, "grad_norm": 0.060705091804265976, "learning_rate": 4.3013365735115434e-05, "loss": 0.4095, "step": 1062 }, { "epoch": 0.0861147116007777, "grad_norm": 0.0898757055401802, "learning_rate": 4.3053867962737954e-05, "loss": 0.4073, "step": 1063 }, { "epoch": 0.08619572261827609, "grad_norm": 0.0634063258767128, "learning_rate": 4.3094370190360473e-05, "loss": 0.3874, "step": 1064 }, { "epoch": 0.08627673363577447, "grad_norm": 0.07108429074287415, "learning_rate": 4.313487241798299e-05, "loss": 0.3907, "step": 1065 }, { "epoch": 0.08635774465327284, "grad_norm": 0.06919017434120178, "learning_rate": 4.317537464560551e-05, "loss": 0.405, "step": 1066 }, { "epoch": 0.08643875567077122, "grad_norm": 0.08269021660089493, "learning_rate": 4.3215876873228026e-05, "loss": 0.4392, "step": 1067 }, { "epoch": 0.0865197666882696, "grad_norm": 0.07313472777605057, "learning_rate": 4.3256379100850545e-05, "loss": 0.449, "step": 1068 }, { "epoch": 0.08660077770576799, "grad_norm": 0.08952949196100235, "learning_rate": 4.3296881328473065e-05, "loss": 0.3906, "step": 1069 }, { "epoch": 0.08668178872326636, "grad_norm": 0.06147094443440437, "learning_rate": 4.333738355609559e-05, "loss": 0.3965, "step": 1070 }, { "epoch": 0.08676279974076474, "grad_norm": 0.05797317251563072, "learning_rate": 4.337788578371811e-05, "loss": 0.4425, "step": 1071 }, { "epoch": 0.08684381075826313, "grad_norm": 0.07006549835205078, "learning_rate": 4.3418388011340624e-05, "loss": 0.42, "step": 1072 }, { "epoch": 0.08692482177576151, "grad_norm": 0.0779939591884613, "learning_rate": 4.3458890238963144e-05, "loss": 0.4091, "step": 1073 }, { "epoch": 0.08700583279325988, "grad_norm": 0.05775775760412216, "learning_rate": 4.349939246658566e-05, "loss": 0.4438, "step": 1074 }, { "epoch": 0.08708684381075826, "grad_norm": 0.054523251950740814, "learning_rate": 4.353989469420818e-05, "loss": 0.4324, "step": 1075 }, { "epoch": 0.08716785482825665, "grad_norm": 0.07762283086776733, "learning_rate": 4.35803969218307e-05, "loss": 0.4263, "step": 1076 }, { "epoch": 0.08724886584575502, "grad_norm": 0.08139238506555557, "learning_rate": 4.362089914945322e-05, "loss": 0.3948, "step": 1077 }, { "epoch": 0.0873298768632534, "grad_norm": 0.07066658139228821, "learning_rate": 4.366140137707574e-05, "loss": 0.4454, "step": 1078 }, { "epoch": 0.08741088788075178, "grad_norm": 0.06936241686344147, "learning_rate": 4.3701903604698255e-05, "loss": 0.3634, "step": 1079 }, { "epoch": 0.08749189889825017, "grad_norm": 0.08730461448431015, "learning_rate": 4.374240583232078e-05, "loss": 0.4485, "step": 1080 }, { "epoch": 0.08757290991574854, "grad_norm": 0.05085871368646622, "learning_rate": 4.37829080599433e-05, "loss": 0.4426, "step": 1081 }, { "epoch": 0.08765392093324692, "grad_norm": 0.07138410210609436, "learning_rate": 4.382341028756582e-05, "loss": 0.3827, "step": 1082 }, { "epoch": 0.0877349319507453, "grad_norm": 0.06872845441102982, "learning_rate": 4.386391251518834e-05, "loss": 0.4263, "step": 1083 }, { "epoch": 0.08781594296824369, "grad_norm": 0.09864786267280579, "learning_rate": 4.390441474281085e-05, "loss": 0.3815, "step": 1084 }, { "epoch": 0.08789695398574206, "grad_norm": 0.1010093167424202, "learning_rate": 4.394491697043337e-05, "loss": 0.4514, "step": 1085 }, { "epoch": 0.08797796500324044, "grad_norm": 0.07918153703212738, "learning_rate": 4.398541919805589e-05, "loss": 0.425, "step": 1086 }, { "epoch": 0.08805897602073882, "grad_norm": 0.0776391550898552, "learning_rate": 4.402592142567841e-05, "loss": 0.406, "step": 1087 }, { "epoch": 0.0881399870382372, "grad_norm": 0.07044905424118042, "learning_rate": 4.406642365330093e-05, "loss": 0.4271, "step": 1088 }, { "epoch": 0.08822099805573558, "grad_norm": 0.07022466510534286, "learning_rate": 4.410692588092346e-05, "loss": 0.4309, "step": 1089 }, { "epoch": 0.08830200907323396, "grad_norm": 0.08486749976873398, "learning_rate": 4.414742810854597e-05, "loss": 0.4455, "step": 1090 }, { "epoch": 0.08838302009073234, "grad_norm": 0.057559676468372345, "learning_rate": 4.418793033616849e-05, "loss": 0.3676, "step": 1091 }, { "epoch": 0.08846403110823071, "grad_norm": 0.07805667817592621, "learning_rate": 4.422843256379101e-05, "loss": 0.4804, "step": 1092 }, { "epoch": 0.0885450421257291, "grad_norm": 0.12310101091861725, "learning_rate": 4.426893479141353e-05, "loss": 0.4195, "step": 1093 }, { "epoch": 0.08862605314322748, "grad_norm": 0.07303401827812195, "learning_rate": 4.430943701903605e-05, "loss": 0.4732, "step": 1094 }, { "epoch": 0.08870706416072587, "grad_norm": 0.08019717782735825, "learning_rate": 4.434993924665857e-05, "loss": 0.4216, "step": 1095 }, { "epoch": 0.08878807517822424, "grad_norm": 0.08238019049167633, "learning_rate": 4.439044147428109e-05, "loss": 0.4106, "step": 1096 }, { "epoch": 0.08886908619572262, "grad_norm": 0.06461286544799805, "learning_rate": 4.44309437019036e-05, "loss": 0.4242, "step": 1097 }, { "epoch": 0.088950097213221, "grad_norm": 0.09898345917463303, "learning_rate": 4.447144592952613e-05, "loss": 0.4189, "step": 1098 }, { "epoch": 0.08903110823071937, "grad_norm": 0.060210444033145905, "learning_rate": 4.451194815714865e-05, "loss": 0.3702, "step": 1099 }, { "epoch": 0.08911211924821776, "grad_norm": 0.0533088743686676, "learning_rate": 4.455245038477117e-05, "loss": 0.3527, "step": 1100 }, { "epoch": 0.08919313026571614, "grad_norm": 0.08914104104042053, "learning_rate": 4.459295261239369e-05, "loss": 0.4156, "step": 1101 }, { "epoch": 0.08927414128321452, "grad_norm": 0.07332563400268555, "learning_rate": 4.46334548400162e-05, "loss": 0.377, "step": 1102 }, { "epoch": 0.08935515230071289, "grad_norm": 0.07546962052583694, "learning_rate": 4.467395706763872e-05, "loss": 0.4239, "step": 1103 }, { "epoch": 0.08943616331821128, "grad_norm": 0.06037290766835213, "learning_rate": 4.471445929526124e-05, "loss": 0.3883, "step": 1104 }, { "epoch": 0.08951717433570966, "grad_norm": 0.059730976819992065, "learning_rate": 4.475496152288376e-05, "loss": 0.4102, "step": 1105 }, { "epoch": 0.08959818535320804, "grad_norm": 0.07452671229839325, "learning_rate": 4.479546375050628e-05, "loss": 0.4174, "step": 1106 }, { "epoch": 0.08967919637070641, "grad_norm": 0.06664783507585526, "learning_rate": 4.48359659781288e-05, "loss": 0.433, "step": 1107 }, { "epoch": 0.0897602073882048, "grad_norm": 0.06502564251422882, "learning_rate": 4.487646820575132e-05, "loss": 0.3682, "step": 1108 }, { "epoch": 0.08984121840570318, "grad_norm": 0.08823461830615997, "learning_rate": 4.491697043337384e-05, "loss": 0.4112, "step": 1109 }, { "epoch": 0.08992222942320155, "grad_norm": 0.06355852633714676, "learning_rate": 4.495747266099636e-05, "loss": 0.3712, "step": 1110 }, { "epoch": 0.09000324044069993, "grad_norm": 0.09234008938074112, "learning_rate": 4.499797488861888e-05, "loss": 0.3959, "step": 1111 }, { "epoch": 0.09008425145819832, "grad_norm": 0.07149260491132736, "learning_rate": 4.50384771162414e-05, "loss": 0.4581, "step": 1112 }, { "epoch": 0.0901652624756967, "grad_norm": 0.0520872138440609, "learning_rate": 4.507897934386392e-05, "loss": 0.3969, "step": 1113 }, { "epoch": 0.09024627349319507, "grad_norm": 0.06375659257173538, "learning_rate": 4.511948157148643e-05, "loss": 0.4179, "step": 1114 }, { "epoch": 0.09032728451069345, "grad_norm": 0.05911070480942726, "learning_rate": 4.515998379910895e-05, "loss": 0.3514, "step": 1115 }, { "epoch": 0.09040829552819184, "grad_norm": 0.07041165977716446, "learning_rate": 4.520048602673147e-05, "loss": 0.3959, "step": 1116 }, { "epoch": 0.09048930654569022, "grad_norm": 0.07625393569469452, "learning_rate": 4.5240988254353996e-05, "loss": 0.4026, "step": 1117 }, { "epoch": 0.09057031756318859, "grad_norm": 0.06732834130525589, "learning_rate": 4.5281490481976515e-05, "loss": 0.4292, "step": 1118 }, { "epoch": 0.09065132858068697, "grad_norm": 0.07233322411775589, "learning_rate": 4.5321992709599035e-05, "loss": 0.4127, "step": 1119 }, { "epoch": 0.09073233959818536, "grad_norm": 0.09195321798324585, "learning_rate": 4.536249493722155e-05, "loss": 0.4276, "step": 1120 }, { "epoch": 0.09081335061568373, "grad_norm": 0.06543777137994766, "learning_rate": 4.540299716484407e-05, "loss": 0.4317, "step": 1121 }, { "epoch": 0.09089436163318211, "grad_norm": 0.08684305101633072, "learning_rate": 4.544349939246659e-05, "loss": 0.4126, "step": 1122 }, { "epoch": 0.0909753726506805, "grad_norm": 0.07450434565544128, "learning_rate": 4.548400162008911e-05, "loss": 0.4115, "step": 1123 }, { "epoch": 0.09105638366817888, "grad_norm": 0.06927470862865448, "learning_rate": 4.5524503847711626e-05, "loss": 0.411, "step": 1124 }, { "epoch": 0.09113739468567725, "grad_norm": 0.08960287272930145, "learning_rate": 4.5565006075334146e-05, "loss": 0.4878, "step": 1125 }, { "epoch": 0.09121840570317563, "grad_norm": 0.09957445412874222, "learning_rate": 4.560550830295666e-05, "loss": 0.3939, "step": 1126 }, { "epoch": 0.09129941672067401, "grad_norm": 0.0676618292927742, "learning_rate": 4.5646010530579185e-05, "loss": 0.4266, "step": 1127 }, { "epoch": 0.09138042773817238, "grad_norm": 0.07924690842628479, "learning_rate": 4.5686512758201705e-05, "loss": 0.4755, "step": 1128 }, { "epoch": 0.09146143875567077, "grad_norm": 0.08089061081409454, "learning_rate": 4.5727014985824225e-05, "loss": 0.4054, "step": 1129 }, { "epoch": 0.09154244977316915, "grad_norm": 0.05947039648890495, "learning_rate": 4.5767517213446744e-05, "loss": 0.3883, "step": 1130 }, { "epoch": 0.09162346079066754, "grad_norm": 0.06786283105611801, "learning_rate": 4.5808019441069264e-05, "loss": 0.4051, "step": 1131 }, { "epoch": 0.0917044718081659, "grad_norm": 0.07103536278009415, "learning_rate": 4.584852166869178e-05, "loss": 0.4103, "step": 1132 }, { "epoch": 0.09178548282566429, "grad_norm": 0.09525204449892044, "learning_rate": 4.58890238963143e-05, "loss": 0.4124, "step": 1133 }, { "epoch": 0.09186649384316267, "grad_norm": 0.08623608201742172, "learning_rate": 4.5929526123936816e-05, "loss": 0.4231, "step": 1134 }, { "epoch": 0.09194750486066106, "grad_norm": 0.06909052282571793, "learning_rate": 4.5970028351559336e-05, "loss": 0.3996, "step": 1135 }, { "epoch": 0.09202851587815943, "grad_norm": 0.07997187972068787, "learning_rate": 4.601053057918186e-05, "loss": 0.3815, "step": 1136 }, { "epoch": 0.09210952689565781, "grad_norm": 0.07907916605472565, "learning_rate": 4.6051032806804375e-05, "loss": 0.3531, "step": 1137 }, { "epoch": 0.09219053791315619, "grad_norm": 0.08305371552705765, "learning_rate": 4.6091535034426895e-05, "loss": 0.4223, "step": 1138 }, { "epoch": 0.09227154893065456, "grad_norm": 0.05640506371855736, "learning_rate": 4.6132037262049415e-05, "loss": 0.417, "step": 1139 }, { "epoch": 0.09235255994815295, "grad_norm": 0.07259729504585266, "learning_rate": 4.6172539489671934e-05, "loss": 0.4169, "step": 1140 }, { "epoch": 0.09243357096565133, "grad_norm": 0.062133122235536575, "learning_rate": 4.6213041717294454e-05, "loss": 0.3546, "step": 1141 }, { "epoch": 0.09251458198314971, "grad_norm": 0.05108145251870155, "learning_rate": 4.6253543944916974e-05, "loss": 0.3647, "step": 1142 }, { "epoch": 0.09259559300064808, "grad_norm": 0.07486524432897568, "learning_rate": 4.629404617253949e-05, "loss": 0.4012, "step": 1143 }, { "epoch": 0.09267660401814647, "grad_norm": 0.08537513017654419, "learning_rate": 4.6334548400162006e-05, "loss": 0.4385, "step": 1144 }, { "epoch": 0.09275761503564485, "grad_norm": 0.07322200387716293, "learning_rate": 4.637505062778453e-05, "loss": 0.398, "step": 1145 }, { "epoch": 0.09283862605314323, "grad_norm": 0.06534241139888763, "learning_rate": 4.641555285540705e-05, "loss": 0.3882, "step": 1146 }, { "epoch": 0.0929196370706416, "grad_norm": 0.10034750401973724, "learning_rate": 4.645605508302957e-05, "loss": 0.4191, "step": 1147 }, { "epoch": 0.09300064808813999, "grad_norm": 0.0938630998134613, "learning_rate": 4.649655731065209e-05, "loss": 0.4167, "step": 1148 }, { "epoch": 0.09308165910563837, "grad_norm": 0.06524375826120377, "learning_rate": 4.6537059538274605e-05, "loss": 0.3875, "step": 1149 }, { "epoch": 0.09316267012313674, "grad_norm": 0.07733950018882751, "learning_rate": 4.6577561765897124e-05, "loss": 0.4724, "step": 1150 }, { "epoch": 0.09324368114063512, "grad_norm": 0.08383625745773315, "learning_rate": 4.6618063993519644e-05, "loss": 0.4273, "step": 1151 }, { "epoch": 0.09332469215813351, "grad_norm": 0.07422877848148346, "learning_rate": 4.6658566221142164e-05, "loss": 0.4515, "step": 1152 }, { "epoch": 0.09340570317563189, "grad_norm": 0.06479012966156006, "learning_rate": 4.669906844876468e-05, "loss": 0.4212, "step": 1153 }, { "epoch": 0.09348671419313026, "grad_norm": 0.055432505905628204, "learning_rate": 4.67395706763872e-05, "loss": 0.417, "step": 1154 }, { "epoch": 0.09356772521062864, "grad_norm": 0.09282301366329193, "learning_rate": 4.678007290400972e-05, "loss": 0.4056, "step": 1155 }, { "epoch": 0.09364873622812703, "grad_norm": 0.08054335415363312, "learning_rate": 4.682057513163224e-05, "loss": 0.3989, "step": 1156 }, { "epoch": 0.09372974724562541, "grad_norm": 0.0536513514816761, "learning_rate": 4.686107735925476e-05, "loss": 0.4002, "step": 1157 }, { "epoch": 0.09381075826312378, "grad_norm": 0.071620412170887, "learning_rate": 4.690157958687728e-05, "loss": 0.4133, "step": 1158 }, { "epoch": 0.09389176928062216, "grad_norm": 0.06727361679077148, "learning_rate": 4.69420818144998e-05, "loss": 0.4043, "step": 1159 }, { "epoch": 0.09397278029812055, "grad_norm": 0.07755836099386215, "learning_rate": 4.698258404212232e-05, "loss": 0.415, "step": 1160 }, { "epoch": 0.09405379131561892, "grad_norm": 0.06299854069948196, "learning_rate": 4.7023086269744834e-05, "loss": 0.406, "step": 1161 }, { "epoch": 0.0941348023331173, "grad_norm": 0.07159342616796494, "learning_rate": 4.7063588497367354e-05, "loss": 0.414, "step": 1162 }, { "epoch": 0.09421581335061568, "grad_norm": 0.07070232182741165, "learning_rate": 4.710409072498987e-05, "loss": 0.3701, "step": 1163 }, { "epoch": 0.09429682436811407, "grad_norm": 0.06699605286121368, "learning_rate": 4.71445929526124e-05, "loss": 0.4029, "step": 1164 }, { "epoch": 0.09437783538561244, "grad_norm": 0.05861575901508331, "learning_rate": 4.718509518023492e-05, "loss": 0.4001, "step": 1165 }, { "epoch": 0.09445884640311082, "grad_norm": 0.06890154629945755, "learning_rate": 4.722559740785744e-05, "loss": 0.4163, "step": 1166 }, { "epoch": 0.0945398574206092, "grad_norm": 0.07200920581817627, "learning_rate": 4.726609963547995e-05, "loss": 0.416, "step": 1167 }, { "epoch": 0.09462086843810759, "grad_norm": 0.08826032280921936, "learning_rate": 4.730660186310247e-05, "loss": 0.3949, "step": 1168 }, { "epoch": 0.09470187945560596, "grad_norm": 0.08327112346887589, "learning_rate": 4.734710409072499e-05, "loss": 0.4567, "step": 1169 }, { "epoch": 0.09478289047310434, "grad_norm": 0.06406053900718689, "learning_rate": 4.738760631834751e-05, "loss": 0.3839, "step": 1170 }, { "epoch": 0.09486390149060273, "grad_norm": 0.09097456187009811, "learning_rate": 4.742810854597003e-05, "loss": 0.402, "step": 1171 }, { "epoch": 0.0949449125081011, "grad_norm": 0.11253924667835236, "learning_rate": 4.746861077359255e-05, "loss": 0.4074, "step": 1172 }, { "epoch": 0.09502592352559948, "grad_norm": 0.08323650807142258, "learning_rate": 4.750911300121506e-05, "loss": 0.4308, "step": 1173 }, { "epoch": 0.09510693454309786, "grad_norm": 0.05829388275742531, "learning_rate": 4.754961522883759e-05, "loss": 0.393, "step": 1174 }, { "epoch": 0.09518794556059625, "grad_norm": 0.08917537331581116, "learning_rate": 4.759011745646011e-05, "loss": 0.4074, "step": 1175 }, { "epoch": 0.09526895657809462, "grad_norm": 0.0945352092385292, "learning_rate": 4.763061968408263e-05, "loss": 0.4483, "step": 1176 }, { "epoch": 0.095349967595593, "grad_norm": 0.08760906010866165, "learning_rate": 4.767112191170515e-05, "loss": 0.4784, "step": 1177 }, { "epoch": 0.09543097861309138, "grad_norm": 0.08544166386127472, "learning_rate": 4.771162413932767e-05, "loss": 0.4429, "step": 1178 }, { "epoch": 0.09551198963058977, "grad_norm": 0.07088976353406906, "learning_rate": 4.775212636695018e-05, "loss": 0.4216, "step": 1179 }, { "epoch": 0.09559300064808814, "grad_norm": 0.08196429163217545, "learning_rate": 4.77926285945727e-05, "loss": 0.4091, "step": 1180 }, { "epoch": 0.09567401166558652, "grad_norm": 0.06955184787511826, "learning_rate": 4.783313082219522e-05, "loss": 0.4191, "step": 1181 }, { "epoch": 0.0957550226830849, "grad_norm": 0.06988652795553207, "learning_rate": 4.787363304981774e-05, "loss": 0.4125, "step": 1182 }, { "epoch": 0.09583603370058327, "grad_norm": 0.10521332174539566, "learning_rate": 4.7914135277440267e-05, "loss": 0.4901, "step": 1183 }, { "epoch": 0.09591704471808166, "grad_norm": 0.05970345810055733, "learning_rate": 4.795463750506278e-05, "loss": 0.428, "step": 1184 }, { "epoch": 0.09599805573558004, "grad_norm": 0.07127997279167175, "learning_rate": 4.79951397326853e-05, "loss": 0.3929, "step": 1185 }, { "epoch": 0.09607906675307842, "grad_norm": 0.07283204048871994, "learning_rate": 4.803564196030782e-05, "loss": 0.4243, "step": 1186 }, { "epoch": 0.0961600777705768, "grad_norm": 0.10053546726703644, "learning_rate": 4.807614418793034e-05, "loss": 0.4163, "step": 1187 }, { "epoch": 0.09624108878807518, "grad_norm": 0.126708984375, "learning_rate": 4.811664641555286e-05, "loss": 0.4715, "step": 1188 }, { "epoch": 0.09632209980557356, "grad_norm": 0.10233563184738159, "learning_rate": 4.815714864317538e-05, "loss": 0.4774, "step": 1189 }, { "epoch": 0.09640311082307194, "grad_norm": 0.08582703024148941, "learning_rate": 4.81976508707979e-05, "loss": 0.4193, "step": 1190 }, { "epoch": 0.09648412184057031, "grad_norm": 0.07314405590295792, "learning_rate": 4.823815309842041e-05, "loss": 0.3819, "step": 1191 }, { "epoch": 0.0965651328580687, "grad_norm": 0.09516175836324692, "learning_rate": 4.827865532604293e-05, "loss": 0.4087, "step": 1192 }, { "epoch": 0.09664614387556708, "grad_norm": 0.08461283892393112, "learning_rate": 4.8319157553665456e-05, "loss": 0.408, "step": 1193 }, { "epoch": 0.09672715489306545, "grad_norm": 0.0769420638680458, "learning_rate": 4.8359659781287976e-05, "loss": 0.401, "step": 1194 }, { "epoch": 0.09680816591056383, "grad_norm": 0.06576257199048996, "learning_rate": 4.8400162008910496e-05, "loss": 0.4162, "step": 1195 }, { "epoch": 0.09688917692806222, "grad_norm": 0.06271419674158096, "learning_rate": 4.8440664236533015e-05, "loss": 0.3753, "step": 1196 }, { "epoch": 0.0969701879455606, "grad_norm": 0.07287538051605225, "learning_rate": 4.848116646415553e-05, "loss": 0.3884, "step": 1197 }, { "epoch": 0.09705119896305897, "grad_norm": 0.07314209640026093, "learning_rate": 4.852166869177805e-05, "loss": 0.4365, "step": 1198 }, { "epoch": 0.09713220998055735, "grad_norm": 0.09039682894945145, "learning_rate": 4.856217091940057e-05, "loss": 0.4272, "step": 1199 }, { "epoch": 0.09721322099805574, "grad_norm": 0.07149705290794373, "learning_rate": 4.860267314702309e-05, "loss": 0.4285, "step": 1200 }, { "epoch": 0.09729423201555412, "grad_norm": 0.07689498364925385, "learning_rate": 4.864317537464561e-05, "loss": 0.3871, "step": 1201 }, { "epoch": 0.09737524303305249, "grad_norm": 0.0991893783211708, "learning_rate": 4.868367760226813e-05, "loss": 0.406, "step": 1202 }, { "epoch": 0.09745625405055088, "grad_norm": 0.07114807516336441, "learning_rate": 4.8724179829890646e-05, "loss": 0.4091, "step": 1203 }, { "epoch": 0.09753726506804926, "grad_norm": 0.09959740936756134, "learning_rate": 4.8764682057513166e-05, "loss": 0.4136, "step": 1204 }, { "epoch": 0.09761827608554763, "grad_norm": 0.07426830381155014, "learning_rate": 4.8805184285135686e-05, "loss": 0.3902, "step": 1205 }, { "epoch": 0.09769928710304601, "grad_norm": 0.06713051348924637, "learning_rate": 4.8845686512758205e-05, "loss": 0.4007, "step": 1206 }, { "epoch": 0.0977802981205444, "grad_norm": 0.06639891117811203, "learning_rate": 4.8886188740380725e-05, "loss": 0.4468, "step": 1207 }, { "epoch": 0.09786130913804278, "grad_norm": 0.07029426842927933, "learning_rate": 4.8926690968003245e-05, "loss": 0.3641, "step": 1208 }, { "epoch": 0.09794232015554115, "grad_norm": 0.07336651533842087, "learning_rate": 4.896719319562576e-05, "loss": 0.4148, "step": 1209 }, { "epoch": 0.09802333117303953, "grad_norm": 0.07670346647500992, "learning_rate": 4.900769542324828e-05, "loss": 0.438, "step": 1210 }, { "epoch": 0.09810434219053792, "grad_norm": 0.07110664993524551, "learning_rate": 4.9048197650870804e-05, "loss": 0.4076, "step": 1211 }, { "epoch": 0.0981853532080363, "grad_norm": 0.06885383278131485, "learning_rate": 4.9088699878493323e-05, "loss": 0.3902, "step": 1212 }, { "epoch": 0.09826636422553467, "grad_norm": 0.07384192943572998, "learning_rate": 4.912920210611584e-05, "loss": 0.3999, "step": 1213 }, { "epoch": 0.09834737524303305, "grad_norm": 0.06060566008090973, "learning_rate": 4.9169704333738356e-05, "loss": 0.4097, "step": 1214 }, { "epoch": 0.09842838626053144, "grad_norm": 0.06468156725168228, "learning_rate": 4.9210206561360876e-05, "loss": 0.3903, "step": 1215 }, { "epoch": 0.0985093972780298, "grad_norm": 0.0653204470872879, "learning_rate": 4.9250708788983395e-05, "loss": 0.413, "step": 1216 }, { "epoch": 0.09859040829552819, "grad_norm": 0.07760292291641235, "learning_rate": 4.9291211016605915e-05, "loss": 0.3773, "step": 1217 }, { "epoch": 0.09867141931302657, "grad_norm": 0.06723463535308838, "learning_rate": 4.9331713244228435e-05, "loss": 0.4027, "step": 1218 }, { "epoch": 0.09875243033052496, "grad_norm": 0.07288457453250885, "learning_rate": 4.9372215471850954e-05, "loss": 0.4557, "step": 1219 }, { "epoch": 0.09883344134802333, "grad_norm": 0.06699313223361969, "learning_rate": 4.9412717699473474e-05, "loss": 0.3996, "step": 1220 }, { "epoch": 0.09891445236552171, "grad_norm": 0.07510928064584732, "learning_rate": 4.9453219927095994e-05, "loss": 0.4223, "step": 1221 }, { "epoch": 0.0989954633830201, "grad_norm": 0.07308734208345413, "learning_rate": 4.949372215471851e-05, "loss": 0.4433, "step": 1222 }, { "epoch": 0.09907647440051848, "grad_norm": 0.0717131718993187, "learning_rate": 4.953422438234103e-05, "loss": 0.4068, "step": 1223 }, { "epoch": 0.09915748541801685, "grad_norm": 0.07816499471664429, "learning_rate": 4.957472660996355e-05, "loss": 0.4064, "step": 1224 }, { "epoch": 0.09923849643551523, "grad_norm": 0.0869022086262703, "learning_rate": 4.961522883758607e-05, "loss": 0.3962, "step": 1225 }, { "epoch": 0.09931950745301361, "grad_norm": 0.06032872572541237, "learning_rate": 4.9655731065208585e-05, "loss": 0.3723, "step": 1226 }, { "epoch": 0.09940051847051198, "grad_norm": 0.09066958725452423, "learning_rate": 4.9696233292831105e-05, "loss": 0.4219, "step": 1227 }, { "epoch": 0.09948152948801037, "grad_norm": 0.09620976448059082, "learning_rate": 4.9736735520453625e-05, "loss": 0.4514, "step": 1228 }, { "epoch": 0.09956254050550875, "grad_norm": 0.07409602403640747, "learning_rate": 4.9777237748076144e-05, "loss": 0.4768, "step": 1229 }, { "epoch": 0.09964355152300713, "grad_norm": 0.06351660937070847, "learning_rate": 4.981773997569867e-05, "loss": 0.3369, "step": 1230 }, { "epoch": 0.0997245625405055, "grad_norm": 0.06918103992938995, "learning_rate": 4.985824220332119e-05, "loss": 0.4126, "step": 1231 }, { "epoch": 0.09980557355800389, "grad_norm": 0.07381061464548111, "learning_rate": 4.98987444309437e-05, "loss": 0.4248, "step": 1232 }, { "epoch": 0.09988658457550227, "grad_norm": 0.05555868148803711, "learning_rate": 4.993924665856622e-05, "loss": 0.3771, "step": 1233 }, { "epoch": 0.09996759559300065, "grad_norm": 0.13505244255065918, "learning_rate": 4.997974888618874e-05, "loss": 0.4016, "step": 1234 }, { "epoch": 0.10004860661049902, "grad_norm": 0.061966948211193085, "learning_rate": 5.002025111381127e-05, "loss": 0.4484, "step": 1235 }, { "epoch": 0.10012961762799741, "grad_norm": 0.07690034806728363, "learning_rate": 5.006075334143379e-05, "loss": 0.391, "step": 1236 }, { "epoch": 0.10021062864549579, "grad_norm": 0.0610932819545269, "learning_rate": 5.01012555690563e-05, "loss": 0.4042, "step": 1237 }, { "epoch": 0.10029163966299416, "grad_norm": 0.0714229866862297, "learning_rate": 5.014175779667882e-05, "loss": 0.3876, "step": 1238 }, { "epoch": 0.10037265068049255, "grad_norm": 0.055647656321525574, "learning_rate": 5.018226002430134e-05, "loss": 0.4291, "step": 1239 }, { "epoch": 0.10045366169799093, "grad_norm": 0.09826336801052094, "learning_rate": 5.022276225192386e-05, "loss": 0.4639, "step": 1240 }, { "epoch": 0.10053467271548931, "grad_norm": 0.06438162177801132, "learning_rate": 5.026326447954638e-05, "loss": 0.4241, "step": 1241 }, { "epoch": 0.10061568373298768, "grad_norm": 0.05960644781589508, "learning_rate": 5.03037667071689e-05, "loss": 0.4309, "step": 1242 }, { "epoch": 0.10069669475048607, "grad_norm": 0.08444269001483917, "learning_rate": 5.034426893479142e-05, "loss": 0.4588, "step": 1243 }, { "epoch": 0.10077770576798445, "grad_norm": 0.05898994579911232, "learning_rate": 5.038477116241393e-05, "loss": 0.4007, "step": 1244 }, { "epoch": 0.10085871678548283, "grad_norm": 0.0722171887755394, "learning_rate": 5.042527339003645e-05, "loss": 0.4289, "step": 1245 }, { "epoch": 0.1009397278029812, "grad_norm": 0.07884074747562408, "learning_rate": 5.046577561765897e-05, "loss": 0.4367, "step": 1246 }, { "epoch": 0.10102073882047959, "grad_norm": 0.07166250795125961, "learning_rate": 5.050627784528149e-05, "loss": 0.4291, "step": 1247 }, { "epoch": 0.10110174983797797, "grad_norm": 0.07508683204650879, "learning_rate": 5.054678007290401e-05, "loss": 0.3988, "step": 1248 }, { "epoch": 0.10118276085547634, "grad_norm": 0.08329222351312637, "learning_rate": 5.058728230052653e-05, "loss": 0.3927, "step": 1249 }, { "epoch": 0.10126377187297472, "grad_norm": 0.05996193736791611, "learning_rate": 5.0627784528149044e-05, "loss": 0.4289, "step": 1250 }, { "epoch": 0.1013447828904731, "grad_norm": 0.07019893079996109, "learning_rate": 5.066828675577156e-05, "loss": 0.4024, "step": 1251 }, { "epoch": 0.10142579390797149, "grad_norm": 0.07923491299152374, "learning_rate": 5.070878898339408e-05, "loss": 0.3712, "step": 1252 }, { "epoch": 0.10150680492546986, "grad_norm": 0.06898915022611618, "learning_rate": 5.07492912110166e-05, "loss": 0.3843, "step": 1253 }, { "epoch": 0.10158781594296824, "grad_norm": 0.05755610764026642, "learning_rate": 5.0789793438639136e-05, "loss": 0.4135, "step": 1254 }, { "epoch": 0.10166882696046663, "grad_norm": 0.08014683425426483, "learning_rate": 5.083029566626165e-05, "loss": 0.4377, "step": 1255 }, { "epoch": 0.101749837977965, "grad_norm": 0.07435602694749832, "learning_rate": 5.087079789388417e-05, "loss": 0.393, "step": 1256 }, { "epoch": 0.10183084899546338, "grad_norm": 0.07429254055023193, "learning_rate": 5.091130012150669e-05, "loss": 0.4, "step": 1257 }, { "epoch": 0.10191186001296176, "grad_norm": 0.06816524267196655, "learning_rate": 5.095180234912921e-05, "loss": 0.4017, "step": 1258 }, { "epoch": 0.10199287103046015, "grad_norm": 0.07542555779218674, "learning_rate": 5.099230457675173e-05, "loss": 0.4366, "step": 1259 }, { "epoch": 0.10207388204795852, "grad_norm": 0.055631767958402634, "learning_rate": 5.103280680437425e-05, "loss": 0.3886, "step": 1260 }, { "epoch": 0.1021548930654569, "grad_norm": 0.07585558295249939, "learning_rate": 5.107330903199676e-05, "loss": 0.381, "step": 1261 }, { "epoch": 0.10223590408295528, "grad_norm": 0.06207391247153282, "learning_rate": 5.111381125961928e-05, "loss": 0.4269, "step": 1262 }, { "epoch": 0.10231691510045367, "grad_norm": 0.07599161565303802, "learning_rate": 5.11543134872418e-05, "loss": 0.4273, "step": 1263 }, { "epoch": 0.10239792611795204, "grad_norm": 0.07198527455329895, "learning_rate": 5.119481571486432e-05, "loss": 0.4296, "step": 1264 }, { "epoch": 0.10247893713545042, "grad_norm": 0.06384332478046417, "learning_rate": 5.123531794248684e-05, "loss": 0.3764, "step": 1265 }, { "epoch": 0.1025599481529488, "grad_norm": 0.0781475305557251, "learning_rate": 5.127582017010936e-05, "loss": 0.4283, "step": 1266 }, { "epoch": 0.10264095917044717, "grad_norm": 0.05831428989768028, "learning_rate": 5.131632239773188e-05, "loss": 0.3982, "step": 1267 }, { "epoch": 0.10272197018794556, "grad_norm": 0.06163075193762779, "learning_rate": 5.135682462535439e-05, "loss": 0.4392, "step": 1268 }, { "epoch": 0.10280298120544394, "grad_norm": 0.07373479008674622, "learning_rate": 5.139732685297691e-05, "loss": 0.4261, "step": 1269 }, { "epoch": 0.10288399222294232, "grad_norm": 0.07836463302373886, "learning_rate": 5.143782908059943e-05, "loss": 0.4043, "step": 1270 }, { "epoch": 0.1029650032404407, "grad_norm": 0.05384104326367378, "learning_rate": 5.147833130822195e-05, "loss": 0.4128, "step": 1271 }, { "epoch": 0.10304601425793908, "grad_norm": 0.10287515819072723, "learning_rate": 5.151883353584447e-05, "loss": 0.3915, "step": 1272 }, { "epoch": 0.10312702527543746, "grad_norm": 0.06811502575874329, "learning_rate": 5.1559335763466996e-05, "loss": 0.4398, "step": 1273 }, { "epoch": 0.10320803629293585, "grad_norm": 0.06112295761704445, "learning_rate": 5.1599837991089516e-05, "loss": 0.4399, "step": 1274 }, { "epoch": 0.10328904731043421, "grad_norm": 0.07518140226602554, "learning_rate": 5.1640340218712035e-05, "loss": 0.3912, "step": 1275 }, { "epoch": 0.1033700583279326, "grad_norm": 0.07181891798973083, "learning_rate": 5.1680842446334555e-05, "loss": 0.3966, "step": 1276 }, { "epoch": 0.10345106934543098, "grad_norm": 0.07075998187065125, "learning_rate": 5.1721344673957075e-05, "loss": 0.4301, "step": 1277 }, { "epoch": 0.10353208036292935, "grad_norm": 0.059220463037490845, "learning_rate": 5.1761846901579594e-05, "loss": 0.3807, "step": 1278 }, { "epoch": 0.10361309138042774, "grad_norm": 0.06967325508594513, "learning_rate": 5.180234912920211e-05, "loss": 0.4307, "step": 1279 }, { "epoch": 0.10369410239792612, "grad_norm": 0.06643428653478622, "learning_rate": 5.184285135682463e-05, "loss": 0.4111, "step": 1280 }, { "epoch": 0.1037751134154245, "grad_norm": 0.07665630429983139, "learning_rate": 5.188335358444715e-05, "loss": 0.4827, "step": 1281 }, { "epoch": 0.10385612443292287, "grad_norm": 0.06388570368289948, "learning_rate": 5.1923855812069666e-05, "loss": 0.3923, "step": 1282 }, { "epoch": 0.10393713545042126, "grad_norm": 0.07032448053359985, "learning_rate": 5.1964358039692186e-05, "loss": 0.4156, "step": 1283 }, { "epoch": 0.10401814646791964, "grad_norm": 0.06190132722258568, "learning_rate": 5.2004860267314706e-05, "loss": 0.3803, "step": 1284 }, { "epoch": 0.10409915748541802, "grad_norm": 0.07370159029960632, "learning_rate": 5.2045362494937225e-05, "loss": 0.3936, "step": 1285 }, { "epoch": 0.10418016850291639, "grad_norm": 0.054596032947301865, "learning_rate": 5.208586472255974e-05, "loss": 0.3749, "step": 1286 }, { "epoch": 0.10426117952041478, "grad_norm": 0.07030202448368073, "learning_rate": 5.212636695018226e-05, "loss": 0.394, "step": 1287 }, { "epoch": 0.10434219053791316, "grad_norm": 0.060638271272182465, "learning_rate": 5.216686917780478e-05, "loss": 0.413, "step": 1288 }, { "epoch": 0.10442320155541153, "grad_norm": 0.0664840042591095, "learning_rate": 5.22073714054273e-05, "loss": 0.3936, "step": 1289 }, { "epoch": 0.10450421257290991, "grad_norm": 0.060060229152441025, "learning_rate": 5.224787363304982e-05, "loss": 0.4421, "step": 1290 }, { "epoch": 0.1045852235904083, "grad_norm": 0.08365281671285629, "learning_rate": 5.2288375860672337e-05, "loss": 0.3978, "step": 1291 }, { "epoch": 0.10466623460790668, "grad_norm": 0.07517527043819427, "learning_rate": 5.232887808829486e-05, "loss": 0.4183, "step": 1292 }, { "epoch": 0.10474724562540505, "grad_norm": 0.06062887981534004, "learning_rate": 5.236938031591738e-05, "loss": 0.4059, "step": 1293 }, { "epoch": 0.10482825664290343, "grad_norm": 0.07648169249296188, "learning_rate": 5.24098825435399e-05, "loss": 0.4105, "step": 1294 }, { "epoch": 0.10490926766040182, "grad_norm": 0.0700891986489296, "learning_rate": 5.245038477116242e-05, "loss": 0.3848, "step": 1295 }, { "epoch": 0.1049902786779002, "grad_norm": 0.05411629378795624, "learning_rate": 5.249088699878494e-05, "loss": 0.3797, "step": 1296 }, { "epoch": 0.10507128969539857, "grad_norm": 0.057578980922698975, "learning_rate": 5.2531389226407455e-05, "loss": 0.4323, "step": 1297 }, { "epoch": 0.10515230071289695, "grad_norm": 0.08588584512472153, "learning_rate": 5.2571891454029974e-05, "loss": 0.4319, "step": 1298 }, { "epoch": 0.10523331173039534, "grad_norm": 0.08884284645318985, "learning_rate": 5.2612393681652494e-05, "loss": 0.439, "step": 1299 }, { "epoch": 0.10531432274789371, "grad_norm": 0.0666431188583374, "learning_rate": 5.2652895909275014e-05, "loss": 0.3792, "step": 1300 }, { "epoch": 0.10539533376539209, "grad_norm": 0.06067335978150368, "learning_rate": 5.269339813689753e-05, "loss": 0.3713, "step": 1301 }, { "epoch": 0.10547634478289047, "grad_norm": 0.06495458632707596, "learning_rate": 5.273390036452005e-05, "loss": 0.442, "step": 1302 }, { "epoch": 0.10555735580038886, "grad_norm": 0.0760221853852272, "learning_rate": 5.2774402592142566e-05, "loss": 0.4034, "step": 1303 }, { "epoch": 0.10563836681788723, "grad_norm": 0.07905784994363785, "learning_rate": 5.2814904819765085e-05, "loss": 0.4143, "step": 1304 }, { "epoch": 0.10571937783538561, "grad_norm": 0.08168292790651321, "learning_rate": 5.2855407047387605e-05, "loss": 0.4194, "step": 1305 }, { "epoch": 0.105800388852884, "grad_norm": 0.06710600107908249, "learning_rate": 5.2895909275010125e-05, "loss": 0.3838, "step": 1306 }, { "epoch": 0.10588139987038238, "grad_norm": 0.06214667111635208, "learning_rate": 5.2936411502632644e-05, "loss": 0.4564, "step": 1307 }, { "epoch": 0.10596241088788075, "grad_norm": 0.11926735192537308, "learning_rate": 5.2976913730255164e-05, "loss": 0.4303, "step": 1308 }, { "epoch": 0.10604342190537913, "grad_norm": 0.08118419349193573, "learning_rate": 5.3017415957877684e-05, "loss": 0.3886, "step": 1309 }, { "epoch": 0.10612443292287752, "grad_norm": 0.06788288801908493, "learning_rate": 5.305791818550021e-05, "loss": 0.3587, "step": 1310 }, { "epoch": 0.10620544394037588, "grad_norm": 0.09914708882570267, "learning_rate": 5.309842041312273e-05, "loss": 0.4239, "step": 1311 }, { "epoch": 0.10628645495787427, "grad_norm": 0.08624789863824844, "learning_rate": 5.313892264074525e-05, "loss": 0.4397, "step": 1312 }, { "epoch": 0.10636746597537265, "grad_norm": 0.09602291136980057, "learning_rate": 5.317942486836777e-05, "loss": 0.3927, "step": 1313 }, { "epoch": 0.10644847699287104, "grad_norm": 0.06951847672462463, "learning_rate": 5.321992709599028e-05, "loss": 0.4475, "step": 1314 }, { "epoch": 0.1065294880103694, "grad_norm": 0.09299685060977936, "learning_rate": 5.32604293236128e-05, "loss": 0.371, "step": 1315 }, { "epoch": 0.10661049902786779, "grad_norm": 0.0918346717953682, "learning_rate": 5.330093155123532e-05, "loss": 0.4075, "step": 1316 }, { "epoch": 0.10669151004536617, "grad_norm": 0.07095564901828766, "learning_rate": 5.334143377885784e-05, "loss": 0.4471, "step": 1317 }, { "epoch": 0.10677252106286456, "grad_norm": 0.08020664006471634, "learning_rate": 5.338193600648036e-05, "loss": 0.3802, "step": 1318 }, { "epoch": 0.10685353208036293, "grad_norm": 0.08093412220478058, "learning_rate": 5.342243823410288e-05, "loss": 0.3666, "step": 1319 }, { "epoch": 0.10693454309786131, "grad_norm": 0.09224852919578552, "learning_rate": 5.34629404617254e-05, "loss": 0.4185, "step": 1320 }, { "epoch": 0.10701555411535969, "grad_norm": 0.10123410820960999, "learning_rate": 5.350344268934791e-05, "loss": 0.4172, "step": 1321 }, { "epoch": 0.10709656513285806, "grad_norm": 0.08183860778808594, "learning_rate": 5.354394491697043e-05, "loss": 0.3952, "step": 1322 }, { "epoch": 0.10717757615035645, "grad_norm": 0.06311725080013275, "learning_rate": 5.358444714459295e-05, "loss": 0.3537, "step": 1323 }, { "epoch": 0.10725858716785483, "grad_norm": 0.06070149317383766, "learning_rate": 5.362494937221547e-05, "loss": 0.4073, "step": 1324 }, { "epoch": 0.10733959818535321, "grad_norm": 0.06825286895036697, "learning_rate": 5.366545159983799e-05, "loss": 0.4092, "step": 1325 }, { "epoch": 0.10742060920285158, "grad_norm": 0.08379079401493073, "learning_rate": 5.370595382746051e-05, "loss": 0.4024, "step": 1326 }, { "epoch": 0.10750162022034997, "grad_norm": 0.06500423699617386, "learning_rate": 5.3746456055083024e-05, "loss": 0.3803, "step": 1327 }, { "epoch": 0.10758263123784835, "grad_norm": 0.07769917696714401, "learning_rate": 5.3786958282705544e-05, "loss": 0.3683, "step": 1328 }, { "epoch": 0.10766364225534673, "grad_norm": 0.13604873418807983, "learning_rate": 5.382746051032808e-05, "loss": 0.3725, "step": 1329 }, { "epoch": 0.1077446532728451, "grad_norm": 0.08064484596252441, "learning_rate": 5.38679627379506e-05, "loss": 0.4188, "step": 1330 }, { "epoch": 0.10782566429034349, "grad_norm": 0.07017720490694046, "learning_rate": 5.3908464965573117e-05, "loss": 0.3579, "step": 1331 }, { "epoch": 0.10790667530784187, "grad_norm": 0.07512283325195312, "learning_rate": 5.394896719319563e-05, "loss": 0.4195, "step": 1332 }, { "epoch": 0.10798768632534024, "grad_norm": 0.07279182970523834, "learning_rate": 5.398946942081815e-05, "loss": 0.4237, "step": 1333 }, { "epoch": 0.10806869734283862, "grad_norm": 0.07479194551706314, "learning_rate": 5.402997164844067e-05, "loss": 0.4228, "step": 1334 }, { "epoch": 0.10814970836033701, "grad_norm": 0.06885584443807602, "learning_rate": 5.407047387606319e-05, "loss": 0.3907, "step": 1335 }, { "epoch": 0.10823071937783539, "grad_norm": 0.08084303885698318, "learning_rate": 5.411097610368571e-05, "loss": 0.3847, "step": 1336 }, { "epoch": 0.10831173039533376, "grad_norm": 0.0600086972117424, "learning_rate": 5.415147833130823e-05, "loss": 0.4101, "step": 1337 }, { "epoch": 0.10839274141283214, "grad_norm": 0.07234185189008713, "learning_rate": 5.419198055893074e-05, "loss": 0.4056, "step": 1338 }, { "epoch": 0.10847375243033053, "grad_norm": 0.07753019034862518, "learning_rate": 5.423248278655326e-05, "loss": 0.4525, "step": 1339 }, { "epoch": 0.10855476344782891, "grad_norm": 0.08446196466684341, "learning_rate": 5.427298501417578e-05, "loss": 0.3609, "step": 1340 }, { "epoch": 0.10863577446532728, "grad_norm": 0.05372902750968933, "learning_rate": 5.43134872417983e-05, "loss": 0.3903, "step": 1341 }, { "epoch": 0.10871678548282566, "grad_norm": 0.06935431808233261, "learning_rate": 5.435398946942082e-05, "loss": 0.445, "step": 1342 }, { "epoch": 0.10879779650032405, "grad_norm": 0.07019354403018951, "learning_rate": 5.439449169704334e-05, "loss": 0.4459, "step": 1343 }, { "epoch": 0.10887880751782242, "grad_norm": 0.07519102841615677, "learning_rate": 5.443499392466586e-05, "loss": 0.4252, "step": 1344 }, { "epoch": 0.1089598185353208, "grad_norm": 0.07183311134576797, "learning_rate": 5.447549615228837e-05, "loss": 0.4769, "step": 1345 }, { "epoch": 0.10904082955281919, "grad_norm": 0.0855363979935646, "learning_rate": 5.451599837991089e-05, "loss": 0.4073, "step": 1346 }, { "epoch": 0.10912184057031757, "grad_norm": 0.06138298660516739, "learning_rate": 5.455650060753341e-05, "loss": 0.4301, "step": 1347 }, { "epoch": 0.10920285158781594, "grad_norm": 0.09503749012947083, "learning_rate": 5.4597002835155944e-05, "loss": 0.4495, "step": 1348 }, { "epoch": 0.10928386260531432, "grad_norm": 0.07598304003477097, "learning_rate": 5.463750506277846e-05, "loss": 0.3855, "step": 1349 }, { "epoch": 0.1093648736228127, "grad_norm": 0.05978058651089668, "learning_rate": 5.467800729040098e-05, "loss": 0.3343, "step": 1350 }, { "epoch": 0.10944588464031109, "grad_norm": 0.0791708379983902, "learning_rate": 5.4718509518023496e-05, "loss": 0.3953, "step": 1351 }, { "epoch": 0.10952689565780946, "grad_norm": 0.07386378198862076, "learning_rate": 5.4759011745646016e-05, "loss": 0.3758, "step": 1352 }, { "epoch": 0.10960790667530784, "grad_norm": 0.0669383704662323, "learning_rate": 5.4799513973268536e-05, "loss": 0.4606, "step": 1353 }, { "epoch": 0.10968891769280623, "grad_norm": 0.05895500257611275, "learning_rate": 5.4840016200891055e-05, "loss": 0.4016, "step": 1354 }, { "epoch": 0.1097699287103046, "grad_norm": 0.0767853856086731, "learning_rate": 5.4880518428513575e-05, "loss": 0.3933, "step": 1355 }, { "epoch": 0.10985093972780298, "grad_norm": 0.055138807743787766, "learning_rate": 5.492102065613609e-05, "loss": 0.4144, "step": 1356 }, { "epoch": 0.10993195074530136, "grad_norm": 0.06357793509960175, "learning_rate": 5.496152288375861e-05, "loss": 0.3813, "step": 1357 }, { "epoch": 0.11001296176279975, "grad_norm": 0.062212321907281876, "learning_rate": 5.500202511138113e-05, "loss": 0.3552, "step": 1358 }, { "epoch": 0.11009397278029812, "grad_norm": 0.07156316190958023, "learning_rate": 5.504252733900365e-05, "loss": 0.3622, "step": 1359 }, { "epoch": 0.1101749837977965, "grad_norm": 0.06475205719470978, "learning_rate": 5.5083029566626167e-05, "loss": 0.3744, "step": 1360 }, { "epoch": 0.11025599481529488, "grad_norm": 0.07238809019327164, "learning_rate": 5.5123531794248686e-05, "loss": 0.4212, "step": 1361 }, { "epoch": 0.11033700583279327, "grad_norm": 0.0972784087061882, "learning_rate": 5.5164034021871206e-05, "loss": 0.3891, "step": 1362 }, { "epoch": 0.11041801685029164, "grad_norm": 0.07933227717876434, "learning_rate": 5.520453624949372e-05, "loss": 0.4464, "step": 1363 }, { "epoch": 0.11049902786779002, "grad_norm": 0.06054377183318138, "learning_rate": 5.524503847711624e-05, "loss": 0.3636, "step": 1364 }, { "epoch": 0.1105800388852884, "grad_norm": 0.051436055451631546, "learning_rate": 5.528554070473876e-05, "loss": 0.3919, "step": 1365 }, { "epoch": 0.11066104990278677, "grad_norm": 0.07529158145189285, "learning_rate": 5.532604293236128e-05, "loss": 0.3801, "step": 1366 }, { "epoch": 0.11074206092028516, "grad_norm": 0.08469279110431671, "learning_rate": 5.5366545159983804e-05, "loss": 0.451, "step": 1367 }, { "epoch": 0.11082307193778354, "grad_norm": 0.073479562997818, "learning_rate": 5.5407047387606324e-05, "loss": 0.3786, "step": 1368 }, { "epoch": 0.11090408295528192, "grad_norm": 0.060560815036296844, "learning_rate": 5.5447549615228844e-05, "loss": 0.4208, "step": 1369 }, { "epoch": 0.1109850939727803, "grad_norm": 0.10111406445503235, "learning_rate": 5.548805184285136e-05, "loss": 0.3602, "step": 1370 }, { "epoch": 0.11106610499027868, "grad_norm": 0.05426943302154541, "learning_rate": 5.552855407047388e-05, "loss": 0.3702, "step": 1371 }, { "epoch": 0.11114711600777706, "grad_norm": 0.04940052330493927, "learning_rate": 5.55690562980964e-05, "loss": 0.3639, "step": 1372 }, { "epoch": 0.11122812702527543, "grad_norm": 0.07840988785028458, "learning_rate": 5.5609558525718916e-05, "loss": 0.4495, "step": 1373 }, { "epoch": 0.11130913804277381, "grad_norm": 0.06023367494344711, "learning_rate": 5.5650060753341435e-05, "loss": 0.4355, "step": 1374 }, { "epoch": 0.1113901490602722, "grad_norm": 0.06036898493766785, "learning_rate": 5.5690562980963955e-05, "loss": 0.4203, "step": 1375 }, { "epoch": 0.11147116007777058, "grad_norm": 0.05843716487288475, "learning_rate": 5.5731065208586475e-05, "loss": 0.3917, "step": 1376 }, { "epoch": 0.11155217109526895, "grad_norm": 0.06300392746925354, "learning_rate": 5.5771567436208994e-05, "loss": 0.4065, "step": 1377 }, { "epoch": 0.11163318211276733, "grad_norm": 0.06990660727024078, "learning_rate": 5.5812069663831514e-05, "loss": 0.4314, "step": 1378 }, { "epoch": 0.11171419313026572, "grad_norm": 0.060971733182668686, "learning_rate": 5.5852571891454034e-05, "loss": 0.342, "step": 1379 }, { "epoch": 0.1117952041477641, "grad_norm": 0.05048093572258949, "learning_rate": 5.5893074119076546e-05, "loss": 0.3855, "step": 1380 }, { "epoch": 0.11187621516526247, "grad_norm": 0.06826608628034592, "learning_rate": 5.5933576346699066e-05, "loss": 0.3843, "step": 1381 }, { "epoch": 0.11195722618276086, "grad_norm": 0.06443291902542114, "learning_rate": 5.5974078574321586e-05, "loss": 0.3698, "step": 1382 }, { "epoch": 0.11203823720025924, "grad_norm": 0.06290264427661896, "learning_rate": 5.6014580801944105e-05, "loss": 0.4046, "step": 1383 }, { "epoch": 0.11211924821775761, "grad_norm": 0.06185779720544815, "learning_rate": 5.6055083029566625e-05, "loss": 0.4099, "step": 1384 }, { "epoch": 0.11220025923525599, "grad_norm": 0.07247399538755417, "learning_rate": 5.6095585257189145e-05, "loss": 0.4091, "step": 1385 }, { "epoch": 0.11228127025275438, "grad_norm": 0.05858607590198517, "learning_rate": 5.613608748481167e-05, "loss": 0.42, "step": 1386 }, { "epoch": 0.11236228127025276, "grad_norm": 0.05494888499379158, "learning_rate": 5.617658971243419e-05, "loss": 0.3484, "step": 1387 }, { "epoch": 0.11244329228775113, "grad_norm": 0.06474845111370087, "learning_rate": 5.621709194005671e-05, "loss": 0.3846, "step": 1388 }, { "epoch": 0.11252430330524951, "grad_norm": 0.07955454289913177, "learning_rate": 5.625759416767923e-05, "loss": 0.429, "step": 1389 }, { "epoch": 0.1126053143227479, "grad_norm": 0.07667361944913864, "learning_rate": 5.629809639530175e-05, "loss": 0.4158, "step": 1390 }, { "epoch": 0.11268632534024628, "grad_norm": 0.06198974698781967, "learning_rate": 5.633859862292426e-05, "loss": 0.4174, "step": 1391 }, { "epoch": 0.11276733635774465, "grad_norm": 0.07477148622274399, "learning_rate": 5.637910085054678e-05, "loss": 0.4521, "step": 1392 }, { "epoch": 0.11284834737524303, "grad_norm": 0.06670566648244858, "learning_rate": 5.64196030781693e-05, "loss": 0.3652, "step": 1393 }, { "epoch": 0.11292935839274142, "grad_norm": 0.09626266360282898, "learning_rate": 5.646010530579182e-05, "loss": 0.4482, "step": 1394 }, { "epoch": 0.11301036941023979, "grad_norm": 0.06237015873193741, "learning_rate": 5.650060753341434e-05, "loss": 0.4102, "step": 1395 }, { "epoch": 0.11309138042773817, "grad_norm": 0.054151054471731186, "learning_rate": 5.654110976103686e-05, "loss": 0.4147, "step": 1396 }, { "epoch": 0.11317239144523655, "grad_norm": 0.07566509395837784, "learning_rate": 5.658161198865938e-05, "loss": 0.4318, "step": 1397 }, { "epoch": 0.11325340246273494, "grad_norm": 0.05651364475488663, "learning_rate": 5.6622114216281894e-05, "loss": 0.3687, "step": 1398 }, { "epoch": 0.1133344134802333, "grad_norm": 0.06089472398161888, "learning_rate": 5.666261644390441e-05, "loss": 0.3987, "step": 1399 }, { "epoch": 0.11341542449773169, "grad_norm": 0.08430080115795135, "learning_rate": 5.670311867152693e-05, "loss": 0.4169, "step": 1400 }, { "epoch": 0.11349643551523007, "grad_norm": 0.05499950423836708, "learning_rate": 5.674362089914945e-05, "loss": 0.4104, "step": 1401 }, { "epoch": 0.11357744653272846, "grad_norm": 0.07133946567773819, "learning_rate": 5.678412312677197e-05, "loss": 0.3948, "step": 1402 }, { "epoch": 0.11365845755022683, "grad_norm": 0.07891767472028732, "learning_rate": 5.682462535439449e-05, "loss": 0.3846, "step": 1403 }, { "epoch": 0.11373946856772521, "grad_norm": 0.09289713948965073, "learning_rate": 5.6865127582017005e-05, "loss": 0.3524, "step": 1404 }, { "epoch": 0.1138204795852236, "grad_norm": 0.0759500190615654, "learning_rate": 5.690562980963954e-05, "loss": 0.3903, "step": 1405 }, { "epoch": 0.11390149060272196, "grad_norm": 0.05667630583047867, "learning_rate": 5.694613203726206e-05, "loss": 0.3905, "step": 1406 }, { "epoch": 0.11398250162022035, "grad_norm": 0.07564699649810791, "learning_rate": 5.698663426488458e-05, "loss": 0.4242, "step": 1407 }, { "epoch": 0.11406351263771873, "grad_norm": 0.05298449099063873, "learning_rate": 5.70271364925071e-05, "loss": 0.3956, "step": 1408 }, { "epoch": 0.11414452365521711, "grad_norm": 0.08333901315927505, "learning_rate": 5.706763872012961e-05, "loss": 0.3865, "step": 1409 }, { "epoch": 0.11422553467271548, "grad_norm": 0.05635182932019234, "learning_rate": 5.710814094775213e-05, "loss": 0.407, "step": 1410 }, { "epoch": 0.11430654569021387, "grad_norm": 0.07674378901720047, "learning_rate": 5.714864317537465e-05, "loss": 0.4479, "step": 1411 }, { "epoch": 0.11438755670771225, "grad_norm": 0.07845453172922134, "learning_rate": 5.718914540299717e-05, "loss": 0.3823, "step": 1412 }, { "epoch": 0.11446856772521063, "grad_norm": 0.08581458032131195, "learning_rate": 5.722964763061969e-05, "loss": 0.3887, "step": 1413 }, { "epoch": 0.114549578742709, "grad_norm": 0.08316148072481155, "learning_rate": 5.727014985824221e-05, "loss": 0.3622, "step": 1414 }, { "epoch": 0.11463058976020739, "grad_norm": 0.11354885250329971, "learning_rate": 5.731065208586472e-05, "loss": 0.3941, "step": 1415 }, { "epoch": 0.11471160077770577, "grad_norm": 0.07128278911113739, "learning_rate": 5.735115431348724e-05, "loss": 0.4047, "step": 1416 }, { "epoch": 0.11479261179520414, "grad_norm": 0.07349122315645218, "learning_rate": 5.739165654110976e-05, "loss": 0.4173, "step": 1417 }, { "epoch": 0.11487362281270252, "grad_norm": 0.06401881575584412, "learning_rate": 5.743215876873228e-05, "loss": 0.416, "step": 1418 }, { "epoch": 0.11495463383020091, "grad_norm": 0.06177844852209091, "learning_rate": 5.74726609963548e-05, "loss": 0.4033, "step": 1419 }, { "epoch": 0.11503564484769929, "grad_norm": 0.07423469424247742, "learning_rate": 5.751316322397732e-05, "loss": 0.3603, "step": 1420 }, { "epoch": 0.11511665586519766, "grad_norm": 0.06056517735123634, "learning_rate": 5.755366545159984e-05, "loss": 0.3945, "step": 1421 }, { "epoch": 0.11519766688269605, "grad_norm": 0.06560319662094116, "learning_rate": 5.759416767922235e-05, "loss": 0.4216, "step": 1422 }, { "epoch": 0.11527867790019443, "grad_norm": 0.05511828511953354, "learning_rate": 5.7634669906844885e-05, "loss": 0.3935, "step": 1423 }, { "epoch": 0.11535968891769281, "grad_norm": 0.0775819942355156, "learning_rate": 5.7675172134467405e-05, "loss": 0.4025, "step": 1424 }, { "epoch": 0.11544069993519118, "grad_norm": 0.05826788395643234, "learning_rate": 5.7715674362089925e-05, "loss": 0.3966, "step": 1425 }, { "epoch": 0.11552171095268957, "grad_norm": 0.059229105710983276, "learning_rate": 5.775617658971244e-05, "loss": 0.3551, "step": 1426 }, { "epoch": 0.11560272197018795, "grad_norm": 0.054622627794742584, "learning_rate": 5.779667881733496e-05, "loss": 0.3783, "step": 1427 }, { "epoch": 0.11568373298768632, "grad_norm": 0.07204500585794449, "learning_rate": 5.783718104495748e-05, "loss": 0.3871, "step": 1428 }, { "epoch": 0.1157647440051847, "grad_norm": 0.062203872948884964, "learning_rate": 5.787768327258e-05, "loss": 0.3686, "step": 1429 }, { "epoch": 0.11584575502268309, "grad_norm": 0.07069511711597443, "learning_rate": 5.7918185500202516e-05, "loss": 0.4034, "step": 1430 }, { "epoch": 0.11592676604018147, "grad_norm": 0.053734250366687775, "learning_rate": 5.7958687727825036e-05, "loss": 0.3774, "step": 1431 }, { "epoch": 0.11600777705767984, "grad_norm": 0.06350980699062347, "learning_rate": 5.7999189955447556e-05, "loss": 0.357, "step": 1432 }, { "epoch": 0.11608878807517822, "grad_norm": 0.06450559943914413, "learning_rate": 5.803969218307007e-05, "loss": 0.373, "step": 1433 }, { "epoch": 0.1161697990926766, "grad_norm": 0.05774838477373123, "learning_rate": 5.808019441069259e-05, "loss": 0.3711, "step": 1434 }, { "epoch": 0.11625081011017499, "grad_norm": 0.05953490734100342, "learning_rate": 5.812069663831511e-05, "loss": 0.375, "step": 1435 }, { "epoch": 0.11633182112767336, "grad_norm": 0.06642217189073563, "learning_rate": 5.816119886593763e-05, "loss": 0.4207, "step": 1436 }, { "epoch": 0.11641283214517174, "grad_norm": 0.06046362221240997, "learning_rate": 5.820170109356015e-05, "loss": 0.3919, "step": 1437 }, { "epoch": 0.11649384316267013, "grad_norm": 0.07783038169145584, "learning_rate": 5.824220332118267e-05, "loss": 0.3518, "step": 1438 }, { "epoch": 0.1165748541801685, "grad_norm": 0.0818445235490799, "learning_rate": 5.8282705548805187e-05, "loss": 0.4278, "step": 1439 }, { "epoch": 0.11665586519766688, "grad_norm": 0.08483424782752991, "learning_rate": 5.83232077764277e-05, "loss": 0.4215, "step": 1440 }, { "epoch": 0.11673687621516526, "grad_norm": 0.06911425292491913, "learning_rate": 5.836371000405022e-05, "loss": 0.4033, "step": 1441 }, { "epoch": 0.11681788723266365, "grad_norm": 0.0859529972076416, "learning_rate": 5.840421223167275e-05, "loss": 0.4568, "step": 1442 }, { "epoch": 0.11689889825016202, "grad_norm": 0.07996172457933426, "learning_rate": 5.844471445929527e-05, "loss": 0.4261, "step": 1443 }, { "epoch": 0.1169799092676604, "grad_norm": 0.07805225998163223, "learning_rate": 5.8485216686917785e-05, "loss": 0.3527, "step": 1444 }, { "epoch": 0.11706092028515878, "grad_norm": 0.053170718252658844, "learning_rate": 5.8525718914540305e-05, "loss": 0.4232, "step": 1445 }, { "epoch": 0.11714193130265717, "grad_norm": 0.056831154972314835, "learning_rate": 5.8566221142162824e-05, "loss": 0.3995, "step": 1446 }, { "epoch": 0.11722294232015554, "grad_norm": 0.07470972090959549, "learning_rate": 5.8606723369785344e-05, "loss": 0.4433, "step": 1447 }, { "epoch": 0.11730395333765392, "grad_norm": 0.08808553218841553, "learning_rate": 5.8647225597407864e-05, "loss": 0.4363, "step": 1448 }, { "epoch": 0.1173849643551523, "grad_norm": 0.059553466737270355, "learning_rate": 5.868772782503038e-05, "loss": 0.3869, "step": 1449 }, { "epoch": 0.11746597537265067, "grad_norm": 0.06513705104589462, "learning_rate": 5.8728230052652896e-05, "loss": 0.3305, "step": 1450 }, { "epoch": 0.11754698639014906, "grad_norm": 0.07553792744874954, "learning_rate": 5.8768732280275416e-05, "loss": 0.4072, "step": 1451 }, { "epoch": 0.11762799740764744, "grad_norm": 0.07356736063957214, "learning_rate": 5.8809234507897935e-05, "loss": 0.422, "step": 1452 }, { "epoch": 0.11770900842514583, "grad_norm": 0.08110546320676804, "learning_rate": 5.8849736735520455e-05, "loss": 0.4178, "step": 1453 }, { "epoch": 0.1177900194426442, "grad_norm": 0.07067373394966125, "learning_rate": 5.8890238963142975e-05, "loss": 0.3834, "step": 1454 }, { "epoch": 0.11787103046014258, "grad_norm": 0.06569874286651611, "learning_rate": 5.8930741190765494e-05, "loss": 0.4695, "step": 1455 }, { "epoch": 0.11795204147764096, "grad_norm": 0.06837479025125504, "learning_rate": 5.8971243418388014e-05, "loss": 0.4075, "step": 1456 }, { "epoch": 0.11803305249513935, "grad_norm": 0.06939905881881714, "learning_rate": 5.901174564601053e-05, "loss": 0.4364, "step": 1457 }, { "epoch": 0.11811406351263772, "grad_norm": 0.05386871099472046, "learning_rate": 5.905224787363305e-05, "loss": 0.3447, "step": 1458 }, { "epoch": 0.1181950745301361, "grad_norm": 0.06182454526424408, "learning_rate": 5.9092750101255566e-05, "loss": 0.4063, "step": 1459 }, { "epoch": 0.11827608554763448, "grad_norm": 0.08313605934381485, "learning_rate": 5.9133252328878086e-05, "loss": 0.4423, "step": 1460 }, { "epoch": 0.11835709656513285, "grad_norm": 0.058940161019563675, "learning_rate": 5.917375455650061e-05, "loss": 0.3887, "step": 1461 }, { "epoch": 0.11843810758263124, "grad_norm": 0.06423253566026688, "learning_rate": 5.921425678412313e-05, "loss": 0.4242, "step": 1462 }, { "epoch": 0.11851911860012962, "grad_norm": 0.0520365871489048, "learning_rate": 5.925475901174565e-05, "loss": 0.3587, "step": 1463 }, { "epoch": 0.118600129617628, "grad_norm": 0.06681492924690247, "learning_rate": 5.929526123936817e-05, "loss": 0.378, "step": 1464 }, { "epoch": 0.11868114063512637, "grad_norm": 0.07512427121400833, "learning_rate": 5.933576346699069e-05, "loss": 0.4253, "step": 1465 }, { "epoch": 0.11876215165262476, "grad_norm": 0.06821638345718384, "learning_rate": 5.937626569461321e-05, "loss": 0.3874, "step": 1466 }, { "epoch": 0.11884316267012314, "grad_norm": 0.09401658922433853, "learning_rate": 5.941676792223573e-05, "loss": 0.4082, "step": 1467 }, { "epoch": 0.11892417368762152, "grad_norm": 0.08017772436141968, "learning_rate": 5.945727014985824e-05, "loss": 0.4195, "step": 1468 }, { "epoch": 0.11900518470511989, "grad_norm": 0.0690443366765976, "learning_rate": 5.949777237748076e-05, "loss": 0.4214, "step": 1469 }, { "epoch": 0.11908619572261828, "grad_norm": 0.07056768983602524, "learning_rate": 5.953827460510328e-05, "loss": 0.4009, "step": 1470 }, { "epoch": 0.11916720674011666, "grad_norm": 0.07003843039274216, "learning_rate": 5.95787768327258e-05, "loss": 0.4205, "step": 1471 }, { "epoch": 0.11924821775761503, "grad_norm": 0.1017618402838707, "learning_rate": 5.961927906034832e-05, "loss": 0.4004, "step": 1472 }, { "epoch": 0.11932922877511341, "grad_norm": 0.07712042331695557, "learning_rate": 5.965978128797084e-05, "loss": 0.3657, "step": 1473 }, { "epoch": 0.1194102397926118, "grad_norm": 0.06559691578149796, "learning_rate": 5.970028351559336e-05, "loss": 0.4154, "step": 1474 }, { "epoch": 0.11949125081011018, "grad_norm": 0.06711196154356003, "learning_rate": 5.9740785743215874e-05, "loss": 0.4071, "step": 1475 }, { "epoch": 0.11957226182760855, "grad_norm": 0.06897864490747452, "learning_rate": 5.9781287970838394e-05, "loss": 0.4112, "step": 1476 }, { "epoch": 0.11965327284510693, "grad_norm": 0.053040411323308945, "learning_rate": 5.9821790198460914e-05, "loss": 0.363, "step": 1477 }, { "epoch": 0.11973428386260532, "grad_norm": 0.06178470328450203, "learning_rate": 5.986229242608343e-05, "loss": 0.3897, "step": 1478 }, { "epoch": 0.1198152948801037, "grad_norm": 0.0642366036772728, "learning_rate": 5.990279465370595e-05, "loss": 0.3915, "step": 1479 }, { "epoch": 0.11989630589760207, "grad_norm": 0.061436790972948074, "learning_rate": 5.994329688132848e-05, "loss": 0.3906, "step": 1480 }, { "epoch": 0.11997731691510045, "grad_norm": 0.064246267080307, "learning_rate": 5.9983799108951e-05, "loss": 0.3562, "step": 1481 }, { "epoch": 0.12005832793259884, "grad_norm": 0.07403618097305298, "learning_rate": 6.002430133657352e-05, "loss": 0.4127, "step": 1482 }, { "epoch": 0.12013933895009721, "grad_norm": 0.06679215282201767, "learning_rate": 6.006480356419604e-05, "loss": 0.4089, "step": 1483 }, { "epoch": 0.12022034996759559, "grad_norm": 0.08910468965768814, "learning_rate": 6.010530579181856e-05, "loss": 0.4234, "step": 1484 }, { "epoch": 0.12030136098509397, "grad_norm": 0.07021404802799225, "learning_rate": 6.014580801944108e-05, "loss": 0.3685, "step": 1485 }, { "epoch": 0.12038237200259236, "grad_norm": 0.07262944430112839, "learning_rate": 6.018631024706359e-05, "loss": 0.4486, "step": 1486 }, { "epoch": 0.12046338302009073, "grad_norm": 0.060715995728969574, "learning_rate": 6.022681247468611e-05, "loss": 0.4298, "step": 1487 }, { "epoch": 0.12054439403758911, "grad_norm": 0.07092267274856567, "learning_rate": 6.026731470230863e-05, "loss": 0.3316, "step": 1488 }, { "epoch": 0.1206254050550875, "grad_norm": 0.055634964257478714, "learning_rate": 6.030781692993115e-05, "loss": 0.3747, "step": 1489 }, { "epoch": 0.12070641607258586, "grad_norm": 0.06697001308202744, "learning_rate": 6.034831915755367e-05, "loss": 0.3923, "step": 1490 }, { "epoch": 0.12078742709008425, "grad_norm": 0.068814717233181, "learning_rate": 6.038882138517619e-05, "loss": 0.3987, "step": 1491 }, { "epoch": 0.12086843810758263, "grad_norm": 0.06390635669231415, "learning_rate": 6.04293236127987e-05, "loss": 0.3724, "step": 1492 }, { "epoch": 0.12094944912508102, "grad_norm": 0.06354351341724396, "learning_rate": 6.046982584042122e-05, "loss": 0.3693, "step": 1493 }, { "epoch": 0.12103046014257939, "grad_norm": 0.06260344386100769, "learning_rate": 6.051032806804374e-05, "loss": 0.406, "step": 1494 }, { "epoch": 0.12111147116007777, "grad_norm": 0.06350363790988922, "learning_rate": 6.055083029566626e-05, "loss": 0.3977, "step": 1495 }, { "epoch": 0.12119248217757615, "grad_norm": 0.06784169375896454, "learning_rate": 6.059133252328878e-05, "loss": 0.4385, "step": 1496 }, { "epoch": 0.12127349319507454, "grad_norm": 0.06894693523645401, "learning_rate": 6.06318347509113e-05, "loss": 0.4927, "step": 1497 }, { "epoch": 0.1213545042125729, "grad_norm": 0.059703197330236435, "learning_rate": 6.067233697853382e-05, "loss": 0.3762, "step": 1498 }, { "epoch": 0.12143551523007129, "grad_norm": 0.0626472756266594, "learning_rate": 6.0712839206156346e-05, "loss": 0.4345, "step": 1499 }, { "epoch": 0.12151652624756967, "grad_norm": 0.073844313621521, "learning_rate": 6.0753341433778866e-05, "loss": 0.418, "step": 1500 }, { "epoch": 0.12159753726506804, "grad_norm": 0.07876653969287872, "learning_rate": 6.0793843661401386e-05, "loss": 0.3884, "step": 1501 }, { "epoch": 0.12167854828256643, "grad_norm": 0.06772121787071228, "learning_rate": 6.0834345889023905e-05, "loss": 0.4145, "step": 1502 }, { "epoch": 0.12175955930006481, "grad_norm": 0.0799356997013092, "learning_rate": 6.087484811664642e-05, "loss": 0.4178, "step": 1503 }, { "epoch": 0.1218405703175632, "grad_norm": 0.06637130677700043, "learning_rate": 6.091535034426894e-05, "loss": 0.4, "step": 1504 }, { "epoch": 0.12192158133506156, "grad_norm": 0.08216597139835358, "learning_rate": 6.095585257189146e-05, "loss": 0.4349, "step": 1505 }, { "epoch": 0.12200259235255995, "grad_norm": 0.07377637922763824, "learning_rate": 6.099635479951398e-05, "loss": 0.4112, "step": 1506 }, { "epoch": 0.12208360337005833, "grad_norm": 0.06952842324972153, "learning_rate": 6.103685702713649e-05, "loss": 0.3819, "step": 1507 }, { "epoch": 0.12216461438755671, "grad_norm": 0.07549799978733063, "learning_rate": 6.107735925475901e-05, "loss": 0.4019, "step": 1508 }, { "epoch": 0.12224562540505508, "grad_norm": 0.062323398888111115, "learning_rate": 6.111786148238153e-05, "loss": 0.382, "step": 1509 }, { "epoch": 0.12232663642255347, "grad_norm": 0.07245488464832306, "learning_rate": 6.115836371000405e-05, "loss": 0.3724, "step": 1510 }, { "epoch": 0.12240764744005185, "grad_norm": 0.07224228233098984, "learning_rate": 6.119886593762657e-05, "loss": 0.3675, "step": 1511 }, { "epoch": 0.12248865845755022, "grad_norm": 0.06970011442899704, "learning_rate": 6.123936816524909e-05, "loss": 0.3884, "step": 1512 }, { "epoch": 0.1225696694750486, "grad_norm": 0.06573334336280823, "learning_rate": 6.127987039287161e-05, "loss": 0.3984, "step": 1513 }, { "epoch": 0.12265068049254699, "grad_norm": 0.07179760187864304, "learning_rate": 6.132037262049413e-05, "loss": 0.4381, "step": 1514 }, { "epoch": 0.12273169151004537, "grad_norm": 0.06900203227996826, "learning_rate": 6.136087484811665e-05, "loss": 0.4239, "step": 1515 }, { "epoch": 0.12281270252754374, "grad_norm": 0.08503764122724533, "learning_rate": 6.140137707573917e-05, "loss": 0.4344, "step": 1516 }, { "epoch": 0.12289371354504212, "grad_norm": 0.06342464685440063, "learning_rate": 6.144187930336169e-05, "loss": 0.4055, "step": 1517 }, { "epoch": 0.12297472456254051, "grad_norm": 0.07056683301925659, "learning_rate": 6.14823815309842e-05, "loss": 0.4432, "step": 1518 }, { "epoch": 0.12305573558003889, "grad_norm": 0.05167962238192558, "learning_rate": 6.152288375860673e-05, "loss": 0.3766, "step": 1519 }, { "epoch": 0.12313674659753726, "grad_norm": 0.06147640198469162, "learning_rate": 6.156338598622925e-05, "loss": 0.37, "step": 1520 }, { "epoch": 0.12321775761503564, "grad_norm": 0.0592205636203289, "learning_rate": 6.160388821385177e-05, "loss": 0.3506, "step": 1521 }, { "epoch": 0.12329876863253403, "grad_norm": 0.05324317514896393, "learning_rate": 6.164439044147429e-05, "loss": 0.3264, "step": 1522 }, { "epoch": 0.1233797796500324, "grad_norm": 0.07853440195322037, "learning_rate": 6.16848926690968e-05, "loss": 0.3834, "step": 1523 }, { "epoch": 0.12346079066753078, "grad_norm": 0.06663885712623596, "learning_rate": 6.172539489671932e-05, "loss": 0.4007, "step": 1524 }, { "epoch": 0.12354180168502917, "grad_norm": 0.0634799674153328, "learning_rate": 6.176589712434184e-05, "loss": 0.4035, "step": 1525 }, { "epoch": 0.12362281270252755, "grad_norm": 0.05222257971763611, "learning_rate": 6.180639935196436e-05, "loss": 0.3598, "step": 1526 }, { "epoch": 0.12370382372002592, "grad_norm": 0.051549945026636124, "learning_rate": 6.184690157958688e-05, "loss": 0.4021, "step": 1527 }, { "epoch": 0.1237848347375243, "grad_norm": 0.05802503600716591, "learning_rate": 6.18874038072094e-05, "loss": 0.417, "step": 1528 }, { "epoch": 0.12386584575502269, "grad_norm": 0.08867768198251724, "learning_rate": 6.192790603483192e-05, "loss": 0.4079, "step": 1529 }, { "epoch": 0.12394685677252107, "grad_norm": 0.06496446579694748, "learning_rate": 6.196840826245444e-05, "loss": 0.4484, "step": 1530 }, { "epoch": 0.12402786779001944, "grad_norm": 0.057406000792980194, "learning_rate": 6.200891049007695e-05, "loss": 0.4442, "step": 1531 }, { "epoch": 0.12410887880751782, "grad_norm": 0.054245512932538986, "learning_rate": 6.204941271769947e-05, "loss": 0.3731, "step": 1532 }, { "epoch": 0.1241898898250162, "grad_norm": 0.0568988211452961, "learning_rate": 6.208991494532199e-05, "loss": 0.4182, "step": 1533 }, { "epoch": 0.12427090084251458, "grad_norm": 0.05035136267542839, "learning_rate": 6.213041717294451e-05, "loss": 0.3707, "step": 1534 }, { "epoch": 0.12435191186001296, "grad_norm": 0.059908732771873474, "learning_rate": 6.217091940056703e-05, "loss": 0.4289, "step": 1535 }, { "epoch": 0.12443292287751134, "grad_norm": 0.05112859234213829, "learning_rate": 6.221142162818955e-05, "loss": 0.4052, "step": 1536 }, { "epoch": 0.12451393389500973, "grad_norm": 0.05995730310678482, "learning_rate": 6.225192385581208e-05, "loss": 0.3902, "step": 1537 }, { "epoch": 0.1245949449125081, "grad_norm": 0.06803309172391891, "learning_rate": 6.22924260834346e-05, "loss": 0.436, "step": 1538 }, { "epoch": 0.12467595593000648, "grad_norm": 0.04792184755206108, "learning_rate": 6.233292831105712e-05, "loss": 0.3511, "step": 1539 }, { "epoch": 0.12475696694750486, "grad_norm": 0.07876551896333694, "learning_rate": 6.237343053867964e-05, "loss": 0.4269, "step": 1540 }, { "epoch": 0.12483797796500325, "grad_norm": 0.054781924933195114, "learning_rate": 6.241393276630216e-05, "loss": 0.4205, "step": 1541 }, { "epoch": 0.12491898898250162, "grad_norm": 0.08007676899433136, "learning_rate": 6.245443499392466e-05, "loss": 0.3985, "step": 1542 }, { "epoch": 0.125, "grad_norm": 0.05014176294207573, "learning_rate": 6.249493722154718e-05, "loss": 0.3702, "step": 1543 }, { "epoch": 0.12508101101749838, "grad_norm": 0.05353700742125511, "learning_rate": 6.25354394491697e-05, "loss": 0.3644, "step": 1544 }, { "epoch": 0.12516202203499677, "grad_norm": 0.06442080438137054, "learning_rate": 6.257594167679222e-05, "loss": 0.3492, "step": 1545 }, { "epoch": 0.12524303305249515, "grad_norm": 0.06315489113330841, "learning_rate": 6.261644390441474e-05, "loss": 0.4156, "step": 1546 }, { "epoch": 0.1253240440699935, "grad_norm": 0.06708311289548874, "learning_rate": 6.265694613203726e-05, "loss": 0.3946, "step": 1547 }, { "epoch": 0.1254050550874919, "grad_norm": 0.049992773681879044, "learning_rate": 6.269744835965978e-05, "loss": 0.3638, "step": 1548 }, { "epoch": 0.12548606610499027, "grad_norm": 0.0535244345664978, "learning_rate": 6.27379505872823e-05, "loss": 0.4226, "step": 1549 }, { "epoch": 0.12556707712248866, "grad_norm": 0.055212512612342834, "learning_rate": 6.277845281490482e-05, "loss": 0.4012, "step": 1550 }, { "epoch": 0.12564808813998704, "grad_norm": 0.11156027764081955, "learning_rate": 6.281895504252734e-05, "loss": 0.3796, "step": 1551 }, { "epoch": 0.12572909915748542, "grad_norm": 0.0516376718878746, "learning_rate": 6.285945727014986e-05, "loss": 0.3988, "step": 1552 }, { "epoch": 0.1258101101749838, "grad_norm": 0.06597688049077988, "learning_rate": 6.289995949777238e-05, "loss": 0.436, "step": 1553 }, { "epoch": 0.12589112119248216, "grad_norm": 0.05716634914278984, "learning_rate": 6.29404617253949e-05, "loss": 0.3742, "step": 1554 }, { "epoch": 0.12597213220998055, "grad_norm": 0.06599877029657364, "learning_rate": 6.298096395301742e-05, "loss": 0.4098, "step": 1555 }, { "epoch": 0.12605314322747893, "grad_norm": 0.07445321977138519, "learning_rate": 6.302146618063994e-05, "loss": 0.445, "step": 1556 }, { "epoch": 0.12613415424497731, "grad_norm": 0.05665164813399315, "learning_rate": 6.306196840826246e-05, "loss": 0.4132, "step": 1557 }, { "epoch": 0.1262151652624757, "grad_norm": 0.053392160683870316, "learning_rate": 6.310247063588498e-05, "loss": 0.3849, "step": 1558 }, { "epoch": 0.12629617627997408, "grad_norm": 0.07199081778526306, "learning_rate": 6.31429728635075e-05, "loss": 0.4211, "step": 1559 }, { "epoch": 0.12637718729747247, "grad_norm": 0.07637181133031845, "learning_rate": 6.318347509113002e-05, "loss": 0.4031, "step": 1560 }, { "epoch": 0.12645819831497085, "grad_norm": 0.057522937655448914, "learning_rate": 6.322397731875254e-05, "loss": 0.3851, "step": 1561 }, { "epoch": 0.1265392093324692, "grad_norm": 0.06370987743139267, "learning_rate": 6.326447954637506e-05, "loss": 0.4303, "step": 1562 }, { "epoch": 0.1266202203499676, "grad_norm": 0.05618150904774666, "learning_rate": 6.330498177399758e-05, "loss": 0.4337, "step": 1563 }, { "epoch": 0.12670123136746597, "grad_norm": 0.0655088871717453, "learning_rate": 6.33454840016201e-05, "loss": 0.3722, "step": 1564 }, { "epoch": 0.12678224238496436, "grad_norm": 0.05627769976854324, "learning_rate": 6.338598622924262e-05, "loss": 0.3779, "step": 1565 }, { "epoch": 0.12686325340246274, "grad_norm": 0.06236083433032036, "learning_rate": 6.342648845686512e-05, "loss": 0.369, "step": 1566 }, { "epoch": 0.12694426441996112, "grad_norm": 0.057616692036390305, "learning_rate": 6.346699068448764e-05, "loss": 0.3754, "step": 1567 }, { "epoch": 0.1270252754374595, "grad_norm": 0.06232890859246254, "learning_rate": 6.350749291211016e-05, "loss": 0.3878, "step": 1568 }, { "epoch": 0.12710628645495786, "grad_norm": 0.06295885890722275, "learning_rate": 6.354799513973268e-05, "loss": 0.4025, "step": 1569 }, { "epoch": 0.12718729747245625, "grad_norm": 0.061489179730415344, "learning_rate": 6.35884973673552e-05, "loss": 0.3467, "step": 1570 }, { "epoch": 0.12726830848995463, "grad_norm": 0.06776545196771622, "learning_rate": 6.362899959497772e-05, "loss": 0.4573, "step": 1571 }, { "epoch": 0.127349319507453, "grad_norm": 0.06524661928415298, "learning_rate": 6.366950182260024e-05, "loss": 0.4187, "step": 1572 }, { "epoch": 0.1274303305249514, "grad_norm": 0.061771683394908905, "learning_rate": 6.371000405022276e-05, "loss": 0.4127, "step": 1573 }, { "epoch": 0.12751134154244978, "grad_norm": 0.057876091450452805, "learning_rate": 6.37505062778453e-05, "loss": 0.3942, "step": 1574 }, { "epoch": 0.12759235255994816, "grad_norm": 0.054396193474531174, "learning_rate": 6.379100850546781e-05, "loss": 0.4281, "step": 1575 }, { "epoch": 0.12767336357744652, "grad_norm": 0.07029586285352707, "learning_rate": 6.383151073309033e-05, "loss": 0.3947, "step": 1576 }, { "epoch": 0.1277543745949449, "grad_norm": 0.07588616758584976, "learning_rate": 6.387201296071284e-05, "loss": 0.4575, "step": 1577 }, { "epoch": 0.1278353856124433, "grad_norm": 0.06628143042325974, "learning_rate": 6.391251518833536e-05, "loss": 0.3918, "step": 1578 }, { "epoch": 0.12791639662994167, "grad_norm": 0.06898647546768188, "learning_rate": 6.395301741595788e-05, "loss": 0.3896, "step": 1579 }, { "epoch": 0.12799740764744005, "grad_norm": 0.07129407674074173, "learning_rate": 6.39935196435804e-05, "loss": 0.3751, "step": 1580 }, { "epoch": 0.12807841866493844, "grad_norm": 0.06237871199846268, "learning_rate": 6.403402187120292e-05, "loss": 0.4224, "step": 1581 }, { "epoch": 0.12815942968243682, "grad_norm": 0.08748257905244827, "learning_rate": 6.407452409882544e-05, "loss": 0.4231, "step": 1582 }, { "epoch": 0.1282404406999352, "grad_norm": 0.062178194522857666, "learning_rate": 6.411502632644796e-05, "loss": 0.4333, "step": 1583 }, { "epoch": 0.12832145171743356, "grad_norm": 0.07100991159677505, "learning_rate": 6.415552855407048e-05, "loss": 0.4207, "step": 1584 }, { "epoch": 0.12840246273493194, "grad_norm": 0.0815022736787796, "learning_rate": 6.4196030781693e-05, "loss": 0.4191, "step": 1585 }, { "epoch": 0.12848347375243033, "grad_norm": 0.07659836113452911, "learning_rate": 6.423653300931552e-05, "loss": 0.4201, "step": 1586 }, { "epoch": 0.1285644847699287, "grad_norm": 0.07788506895303726, "learning_rate": 6.427703523693804e-05, "loss": 0.4051, "step": 1587 }, { "epoch": 0.1286454957874271, "grad_norm": 0.09391462057828903, "learning_rate": 6.431753746456056e-05, "loss": 0.4634, "step": 1588 }, { "epoch": 0.12872650680492548, "grad_norm": 0.054652221500873566, "learning_rate": 6.435803969218308e-05, "loss": 0.3983, "step": 1589 }, { "epoch": 0.12880751782242386, "grad_norm": 0.06090129539370537, "learning_rate": 6.439854191980558e-05, "loss": 0.4262, "step": 1590 }, { "epoch": 0.12888852883992222, "grad_norm": 0.05298708379268646, "learning_rate": 6.44390441474281e-05, "loss": 0.3402, "step": 1591 }, { "epoch": 0.1289695398574206, "grad_norm": 0.06067481264472008, "learning_rate": 6.447954637505062e-05, "loss": 0.3825, "step": 1592 }, { "epoch": 0.12905055087491898, "grad_norm": 0.08918090164661407, "learning_rate": 6.452004860267315e-05, "loss": 0.4773, "step": 1593 }, { "epoch": 0.12913156189241737, "grad_norm": 0.06664161384105682, "learning_rate": 6.456055083029567e-05, "loss": 0.383, "step": 1594 }, { "epoch": 0.12921257290991575, "grad_norm": 0.07685840129852295, "learning_rate": 6.46010530579182e-05, "loss": 0.3905, "step": 1595 }, { "epoch": 0.12929358392741414, "grad_norm": 0.07122089713811874, "learning_rate": 6.464155528554071e-05, "loss": 0.3543, "step": 1596 }, { "epoch": 0.12937459494491252, "grad_norm": 0.056531671434640884, "learning_rate": 6.468205751316323e-05, "loss": 0.4529, "step": 1597 }, { "epoch": 0.12945560596241087, "grad_norm": 0.06631804257631302, "learning_rate": 6.472255974078575e-05, "loss": 0.39, "step": 1598 }, { "epoch": 0.12953661697990926, "grad_norm": 0.08155537396669388, "learning_rate": 6.476306196840827e-05, "loss": 0.4113, "step": 1599 }, { "epoch": 0.12961762799740764, "grad_norm": 0.0701952800154686, "learning_rate": 6.480356419603079e-05, "loss": 0.3384, "step": 1600 }, { "epoch": 0.12969863901490603, "grad_norm": 0.07322200387716293, "learning_rate": 6.48440664236533e-05, "loss": 0.376, "step": 1601 }, { "epoch": 0.1297796500324044, "grad_norm": 0.06151268631219864, "learning_rate": 6.488456865127582e-05, "loss": 0.398, "step": 1602 }, { "epoch": 0.1298606610499028, "grad_norm": 0.06803981959819794, "learning_rate": 6.492507087889834e-05, "loss": 0.439, "step": 1603 }, { "epoch": 0.12994167206740118, "grad_norm": 0.05600098520517349, "learning_rate": 6.496557310652086e-05, "loss": 0.3622, "step": 1604 }, { "epoch": 0.13002268308489956, "grad_norm": 0.05879819765686989, "learning_rate": 6.500607533414338e-05, "loss": 0.4124, "step": 1605 }, { "epoch": 0.13010369410239792, "grad_norm": 0.05631718039512634, "learning_rate": 6.50465775617659e-05, "loss": 0.3549, "step": 1606 }, { "epoch": 0.1301847051198963, "grad_norm": 0.05812705308198929, "learning_rate": 6.508707978938842e-05, "loss": 0.368, "step": 1607 }, { "epoch": 0.13026571613739468, "grad_norm": 0.061224520206451416, "learning_rate": 6.512758201701094e-05, "loss": 0.3571, "step": 1608 }, { "epoch": 0.13034672715489307, "grad_norm": 0.06485003232955933, "learning_rate": 6.516808424463346e-05, "loss": 0.3681, "step": 1609 }, { "epoch": 0.13042773817239145, "grad_norm": 0.062425050884485245, "learning_rate": 6.520858647225598e-05, "loss": 0.4119, "step": 1610 }, { "epoch": 0.13050874918988983, "grad_norm": 0.073312908411026, "learning_rate": 6.52490886998785e-05, "loss": 0.392, "step": 1611 }, { "epoch": 0.13058976020738822, "grad_norm": 0.07332398742437363, "learning_rate": 6.528959092750101e-05, "loss": 0.377, "step": 1612 }, { "epoch": 0.13067077122488657, "grad_norm": 0.06517259031534195, "learning_rate": 6.533009315512353e-05, "loss": 0.4136, "step": 1613 }, { "epoch": 0.13075178224238496, "grad_norm": 0.05453259125351906, "learning_rate": 6.537059538274605e-05, "loss": 0.3647, "step": 1614 }, { "epoch": 0.13083279325988334, "grad_norm": 0.06073718145489693, "learning_rate": 6.541109761036857e-05, "loss": 0.4809, "step": 1615 }, { "epoch": 0.13091380427738172, "grad_norm": 0.0551639199256897, "learning_rate": 6.54515998379911e-05, "loss": 0.401, "step": 1616 }, { "epoch": 0.1309948152948801, "grad_norm": 0.04859033226966858, "learning_rate": 6.549210206561361e-05, "loss": 0.3624, "step": 1617 }, { "epoch": 0.1310758263123785, "grad_norm": 0.0692361369729042, "learning_rate": 6.553260429323613e-05, "loss": 0.3955, "step": 1618 }, { "epoch": 0.13115683732987687, "grad_norm": 0.07203768193721771, "learning_rate": 6.557310652085865e-05, "loss": 0.3639, "step": 1619 }, { "epoch": 0.13123784834737523, "grad_norm": 0.06581790000200272, "learning_rate": 6.561360874848117e-05, "loss": 0.3937, "step": 1620 }, { "epoch": 0.1313188593648736, "grad_norm": 0.06830241531133652, "learning_rate": 6.565411097610369e-05, "loss": 0.4286, "step": 1621 }, { "epoch": 0.131399870382372, "grad_norm": 0.05630087852478027, "learning_rate": 6.569461320372621e-05, "loss": 0.3946, "step": 1622 }, { "epoch": 0.13148088139987038, "grad_norm": 0.07321779429912567, "learning_rate": 6.573511543134873e-05, "loss": 0.4141, "step": 1623 }, { "epoch": 0.13156189241736876, "grad_norm": 0.055487010627985, "learning_rate": 6.577561765897125e-05, "loss": 0.4224, "step": 1624 }, { "epoch": 0.13164290343486715, "grad_norm": 0.06711199134588242, "learning_rate": 6.581611988659376e-05, "loss": 0.3823, "step": 1625 }, { "epoch": 0.13172391445236553, "grad_norm": 0.06719058007001877, "learning_rate": 6.585662211421628e-05, "loss": 0.3954, "step": 1626 }, { "epoch": 0.13180492546986391, "grad_norm": 0.06506709009408951, "learning_rate": 6.58971243418388e-05, "loss": 0.4039, "step": 1627 }, { "epoch": 0.13188593648736227, "grad_norm": 0.07303988933563232, "learning_rate": 6.593762656946132e-05, "loss": 0.3961, "step": 1628 }, { "epoch": 0.13196694750486065, "grad_norm": 0.05539443716406822, "learning_rate": 6.597812879708384e-05, "loss": 0.3972, "step": 1629 }, { "epoch": 0.13204795852235904, "grad_norm": 0.07812444865703583, "learning_rate": 6.601863102470636e-05, "loss": 0.4308, "step": 1630 }, { "epoch": 0.13212896953985742, "grad_norm": 0.049702536314725876, "learning_rate": 6.605913325232889e-05, "loss": 0.397, "step": 1631 }, { "epoch": 0.1322099805573558, "grad_norm": 0.06067189574241638, "learning_rate": 6.609963547995141e-05, "loss": 0.3935, "step": 1632 }, { "epoch": 0.1322909915748542, "grad_norm": 0.04642423987388611, "learning_rate": 6.614013770757393e-05, "loss": 0.3677, "step": 1633 }, { "epoch": 0.13237200259235257, "grad_norm": 0.049967169761657715, "learning_rate": 6.618063993519645e-05, "loss": 0.333, "step": 1634 }, { "epoch": 0.13245301360985093, "grad_norm": 0.07911203056573868, "learning_rate": 6.622114216281897e-05, "loss": 0.3841, "step": 1635 }, { "epoch": 0.1325340246273493, "grad_norm": 0.05701819062232971, "learning_rate": 6.626164439044147e-05, "loss": 0.3792, "step": 1636 }, { "epoch": 0.1326150356448477, "grad_norm": 0.071646548807621, "learning_rate": 6.630214661806399e-05, "loss": 0.4173, "step": 1637 }, { "epoch": 0.13269604666234608, "grad_norm": 0.06087390333414078, "learning_rate": 6.634264884568651e-05, "loss": 0.3695, "step": 1638 }, { "epoch": 0.13277705767984446, "grad_norm": 0.06627701967954636, "learning_rate": 6.638315107330903e-05, "loss": 0.3858, "step": 1639 }, { "epoch": 0.13285806869734285, "grad_norm": 0.061554260551929474, "learning_rate": 6.642365330093155e-05, "loss": 0.4465, "step": 1640 }, { "epoch": 0.13293907971484123, "grad_norm": 0.06847324967384338, "learning_rate": 6.646415552855407e-05, "loss": 0.3974, "step": 1641 }, { "epoch": 0.13302009073233959, "grad_norm": 0.07292938232421875, "learning_rate": 6.650465775617659e-05, "loss": 0.4101, "step": 1642 }, { "epoch": 0.13310110174983797, "grad_norm": 0.05951497703790665, "learning_rate": 6.654515998379911e-05, "loss": 0.375, "step": 1643 }, { "epoch": 0.13318211276733635, "grad_norm": 0.0688377097249031, "learning_rate": 6.658566221142163e-05, "loss": 0.4024, "step": 1644 }, { "epoch": 0.13326312378483474, "grad_norm": 0.06519949436187744, "learning_rate": 6.662616443904415e-05, "loss": 0.4162, "step": 1645 }, { "epoch": 0.13334413480233312, "grad_norm": 0.061955176293849945, "learning_rate": 6.666666666666667e-05, "loss": 0.4252, "step": 1646 }, { "epoch": 0.1334251458198315, "grad_norm": 0.07237394899129868, "learning_rate": 6.670716889428919e-05, "loss": 0.4233, "step": 1647 }, { "epoch": 0.1335061568373299, "grad_norm": 0.05980387330055237, "learning_rate": 6.674767112191171e-05, "loss": 0.4072, "step": 1648 }, { "epoch": 0.13358716785482827, "grad_norm": 0.06416431814432144, "learning_rate": 6.678817334953423e-05, "loss": 0.434, "step": 1649 }, { "epoch": 0.13366817887232663, "grad_norm": 0.06477854400873184, "learning_rate": 6.682867557715675e-05, "loss": 0.3973, "step": 1650 }, { "epoch": 0.133749189889825, "grad_norm": 0.07490331679582596, "learning_rate": 6.686917780477927e-05, "loss": 0.4374, "step": 1651 }, { "epoch": 0.1338302009073234, "grad_norm": 0.06125623732805252, "learning_rate": 6.690968003240179e-05, "loss": 0.3869, "step": 1652 }, { "epoch": 0.13391121192482178, "grad_norm": 0.06214214488863945, "learning_rate": 6.695018226002431e-05, "loss": 0.3977, "step": 1653 }, { "epoch": 0.13399222294232016, "grad_norm": 0.046827007085084915, "learning_rate": 6.699068448764683e-05, "loss": 0.3905, "step": 1654 }, { "epoch": 0.13407323395981854, "grad_norm": 0.05726904422044754, "learning_rate": 6.703118671526935e-05, "loss": 0.4151, "step": 1655 }, { "epoch": 0.13415424497731693, "grad_norm": 0.07240765541791916, "learning_rate": 6.707168894289187e-05, "loss": 0.3689, "step": 1656 }, { "epoch": 0.13423525599481528, "grad_norm": 0.049259696155786514, "learning_rate": 6.711219117051439e-05, "loss": 0.3673, "step": 1657 }, { "epoch": 0.13431626701231367, "grad_norm": 0.05210607126355171, "learning_rate": 6.71526933981369e-05, "loss": 0.3938, "step": 1658 }, { "epoch": 0.13439727802981205, "grad_norm": 0.05046253278851509, "learning_rate": 6.719319562575943e-05, "loss": 0.3753, "step": 1659 }, { "epoch": 0.13447828904731043, "grad_norm": 0.052167586982250214, "learning_rate": 6.723369785338193e-05, "loss": 0.3874, "step": 1660 }, { "epoch": 0.13455930006480882, "grad_norm": 0.06064862012863159, "learning_rate": 6.727420008100445e-05, "loss": 0.3736, "step": 1661 }, { "epoch": 0.1346403110823072, "grad_norm": 0.07566836476325989, "learning_rate": 6.731470230862697e-05, "loss": 0.4438, "step": 1662 }, { "epoch": 0.13472132209980558, "grad_norm": 0.058606453239917755, "learning_rate": 6.735520453624949e-05, "loss": 0.4132, "step": 1663 }, { "epoch": 0.13480233311730394, "grad_norm": 0.059201929718256, "learning_rate": 6.739570676387201e-05, "loss": 0.4312, "step": 1664 }, { "epoch": 0.13488334413480232, "grad_norm": 0.05796176567673683, "learning_rate": 6.743620899149453e-05, "loss": 0.3889, "step": 1665 }, { "epoch": 0.1349643551523007, "grad_norm": 0.058323007076978683, "learning_rate": 6.747671121911705e-05, "loss": 0.3573, "step": 1666 }, { "epoch": 0.1350453661697991, "grad_norm": 0.06881019473075867, "learning_rate": 6.751721344673957e-05, "loss": 0.3813, "step": 1667 }, { "epoch": 0.13512637718729748, "grad_norm": 0.06973112374544144, "learning_rate": 6.75577156743621e-05, "loss": 0.3706, "step": 1668 }, { "epoch": 0.13520738820479586, "grad_norm": 0.06532883644104004, "learning_rate": 6.759821790198462e-05, "loss": 0.3911, "step": 1669 }, { "epoch": 0.13528839922229424, "grad_norm": 0.06697031110525131, "learning_rate": 6.763872012960714e-05, "loss": 0.3457, "step": 1670 }, { "epoch": 0.1353694102397926, "grad_norm": 0.05248725041747093, "learning_rate": 6.767922235722965e-05, "loss": 0.3672, "step": 1671 }, { "epoch": 0.13545042125729098, "grad_norm": 0.06255452334880829, "learning_rate": 6.771972458485217e-05, "loss": 0.384, "step": 1672 }, { "epoch": 0.13553143227478937, "grad_norm": 0.04622405394911766, "learning_rate": 6.776022681247469e-05, "loss": 0.3759, "step": 1673 }, { "epoch": 0.13561244329228775, "grad_norm": 0.07297157496213913, "learning_rate": 6.780072904009721e-05, "loss": 0.3776, "step": 1674 }, { "epoch": 0.13569345430978613, "grad_norm": 0.06406359374523163, "learning_rate": 6.784123126771973e-05, "loss": 0.3921, "step": 1675 }, { "epoch": 0.13577446532728452, "grad_norm": 0.053780291229486465, "learning_rate": 6.788173349534225e-05, "loss": 0.3156, "step": 1676 }, { "epoch": 0.1358554763447829, "grad_norm": 0.06514348834753036, "learning_rate": 6.792223572296477e-05, "loss": 0.3968, "step": 1677 }, { "epoch": 0.13593648736228128, "grad_norm": 0.06725168228149414, "learning_rate": 6.796273795058729e-05, "loss": 0.4387, "step": 1678 }, { "epoch": 0.13601749837977964, "grad_norm": 0.0678589791059494, "learning_rate": 6.80032401782098e-05, "loss": 0.4239, "step": 1679 }, { "epoch": 0.13609850939727802, "grad_norm": 0.06807240843772888, "learning_rate": 6.804374240583233e-05, "loss": 0.4178, "step": 1680 }, { "epoch": 0.1361795204147764, "grad_norm": 0.06269272416830063, "learning_rate": 6.808424463345484e-05, "loss": 0.4557, "step": 1681 }, { "epoch": 0.1362605314322748, "grad_norm": 0.05374554917216301, "learning_rate": 6.812474686107736e-05, "loss": 0.4185, "step": 1682 }, { "epoch": 0.13634154244977317, "grad_norm": 0.06106915697455406, "learning_rate": 6.816524908869988e-05, "loss": 0.3768, "step": 1683 }, { "epoch": 0.13642255346727156, "grad_norm": 0.08892907202243805, "learning_rate": 6.82057513163224e-05, "loss": 0.4065, "step": 1684 }, { "epoch": 0.13650356448476994, "grad_norm": 0.04702616110444069, "learning_rate": 6.824625354394491e-05, "loss": 0.3306, "step": 1685 }, { "epoch": 0.1365845755022683, "grad_norm": 0.051397860050201416, "learning_rate": 6.828675577156743e-05, "loss": 0.3856, "step": 1686 }, { "epoch": 0.13666558651976668, "grad_norm": 0.07031011581420898, "learning_rate": 6.832725799918996e-05, "loss": 0.3844, "step": 1687 }, { "epoch": 0.13674659753726506, "grad_norm": 0.057391006499528885, "learning_rate": 6.836776022681248e-05, "loss": 0.3764, "step": 1688 }, { "epoch": 0.13682760855476345, "grad_norm": 0.05652105435729027, "learning_rate": 6.8408262454435e-05, "loss": 0.4324, "step": 1689 }, { "epoch": 0.13690861957226183, "grad_norm": 0.06434738636016846, "learning_rate": 6.844876468205752e-05, "loss": 0.4152, "step": 1690 }, { "epoch": 0.13698963058976021, "grad_norm": 0.07498002797365189, "learning_rate": 6.848926690968004e-05, "loss": 0.3906, "step": 1691 }, { "epoch": 0.1370706416072586, "grad_norm": 0.074516162276268, "learning_rate": 6.852976913730256e-05, "loss": 0.3704, "step": 1692 }, { "epoch": 0.13715165262475695, "grad_norm": 0.05279865488409996, "learning_rate": 6.857027136492508e-05, "loss": 0.3765, "step": 1693 }, { "epoch": 0.13723266364225534, "grad_norm": 0.051047589629888535, "learning_rate": 6.86107735925476e-05, "loss": 0.3428, "step": 1694 }, { "epoch": 0.13731367465975372, "grad_norm": 0.057483524084091187, "learning_rate": 6.865127582017012e-05, "loss": 0.4172, "step": 1695 }, { "epoch": 0.1373946856772521, "grad_norm": 0.0812988355755806, "learning_rate": 6.869177804779263e-05, "loss": 0.4141, "step": 1696 }, { "epoch": 0.1374756966947505, "grad_norm": 0.08455239981412888, "learning_rate": 6.873228027541515e-05, "loss": 0.3761, "step": 1697 }, { "epoch": 0.13755670771224887, "grad_norm": 0.06918775290250778, "learning_rate": 6.877278250303767e-05, "loss": 0.4699, "step": 1698 }, { "epoch": 0.13763771872974725, "grad_norm": 0.06675770878791809, "learning_rate": 6.881328473066019e-05, "loss": 0.4157, "step": 1699 }, { "epoch": 0.13771872974724564, "grad_norm": 0.05350131541490555, "learning_rate": 6.88537869582827e-05, "loss": 0.378, "step": 1700 }, { "epoch": 0.137799740764744, "grad_norm": 0.07368238270282745, "learning_rate": 6.889428918590522e-05, "loss": 0.4242, "step": 1701 }, { "epoch": 0.13788075178224238, "grad_norm": 0.051566384732723236, "learning_rate": 6.893479141352774e-05, "loss": 0.4016, "step": 1702 }, { "epoch": 0.13796176279974076, "grad_norm": 0.05574240908026695, "learning_rate": 6.897529364115026e-05, "loss": 0.4242, "step": 1703 }, { "epoch": 0.13804277381723914, "grad_norm": 0.06958450376987457, "learning_rate": 6.901579586877278e-05, "loss": 0.4175, "step": 1704 }, { "epoch": 0.13812378483473753, "grad_norm": 0.0514814667403698, "learning_rate": 6.90562980963953e-05, "loss": 0.3715, "step": 1705 }, { "epoch": 0.1382047958522359, "grad_norm": 0.05750512704253197, "learning_rate": 6.909680032401784e-05, "loss": 0.4226, "step": 1706 }, { "epoch": 0.1382858068697343, "grad_norm": 0.08278968930244446, "learning_rate": 6.913730255164034e-05, "loss": 0.4605, "step": 1707 }, { "epoch": 0.13836681788723265, "grad_norm": 0.050866756588220596, "learning_rate": 6.917780477926286e-05, "loss": 0.4151, "step": 1708 }, { "epoch": 0.13844782890473104, "grad_norm": 0.07240499556064606, "learning_rate": 6.921830700688538e-05, "loss": 0.3758, "step": 1709 }, { "epoch": 0.13852883992222942, "grad_norm": 0.061994630843400955, "learning_rate": 6.92588092345079e-05, "loss": 0.3862, "step": 1710 }, { "epoch": 0.1386098509397278, "grad_norm": 0.06153041124343872, "learning_rate": 6.929931146213042e-05, "loss": 0.4123, "step": 1711 }, { "epoch": 0.13869086195722619, "grad_norm": 0.06387482583522797, "learning_rate": 6.933981368975294e-05, "loss": 0.4057, "step": 1712 }, { "epoch": 0.13877187297472457, "grad_norm": 0.06365308165550232, "learning_rate": 6.938031591737546e-05, "loss": 0.3397, "step": 1713 }, { "epoch": 0.13885288399222295, "grad_norm": 0.05819310247898102, "learning_rate": 6.942081814499798e-05, "loss": 0.3572, "step": 1714 }, { "epoch": 0.1389338950097213, "grad_norm": 0.06904326379299164, "learning_rate": 6.94613203726205e-05, "loss": 0.4026, "step": 1715 }, { "epoch": 0.1390149060272197, "grad_norm": 0.10969147086143494, "learning_rate": 6.950182260024302e-05, "loss": 0.4163, "step": 1716 }, { "epoch": 0.13909591704471808, "grad_norm": 0.07195261120796204, "learning_rate": 6.954232482786554e-05, "loss": 0.4684, "step": 1717 }, { "epoch": 0.13917692806221646, "grad_norm": 0.06505865603685379, "learning_rate": 6.958282705548806e-05, "loss": 0.384, "step": 1718 }, { "epoch": 0.13925793907971484, "grad_norm": 0.05413194000720978, "learning_rate": 6.962332928311058e-05, "loss": 0.3935, "step": 1719 }, { "epoch": 0.13933895009721323, "grad_norm": 0.05856451764702797, "learning_rate": 6.966383151073308e-05, "loss": 0.4345, "step": 1720 }, { "epoch": 0.1394199611147116, "grad_norm": 0.05400045961141586, "learning_rate": 6.97043337383556e-05, "loss": 0.3794, "step": 1721 }, { "epoch": 0.13950097213221, "grad_norm": 0.0684712827205658, "learning_rate": 6.974483596597812e-05, "loss": 0.3592, "step": 1722 }, { "epoch": 0.13958198314970835, "grad_norm": 0.05927879735827446, "learning_rate": 6.978533819360064e-05, "loss": 0.4145, "step": 1723 }, { "epoch": 0.13966299416720673, "grad_norm": 0.06611039489507675, "learning_rate": 6.982584042122316e-05, "loss": 0.4255, "step": 1724 }, { "epoch": 0.13974400518470512, "grad_norm": 0.05736701190471649, "learning_rate": 6.98663426488457e-05, "loss": 0.4155, "step": 1725 }, { "epoch": 0.1398250162022035, "grad_norm": 0.054496392607688904, "learning_rate": 6.990684487646822e-05, "loss": 0.3923, "step": 1726 }, { "epoch": 0.13990602721970188, "grad_norm": 0.07339916378259659, "learning_rate": 6.994734710409074e-05, "loss": 0.386, "step": 1727 }, { "epoch": 0.13998703823720027, "grad_norm": 0.08990602940320969, "learning_rate": 6.998784933171326e-05, "loss": 0.3809, "step": 1728 }, { "epoch": 0.14006804925469865, "grad_norm": 0.05522826686501503, "learning_rate": 7.002835155933578e-05, "loss": 0.3997, "step": 1729 }, { "epoch": 0.140149060272197, "grad_norm": 0.06750888377428055, "learning_rate": 7.00688537869583e-05, "loss": 0.4567, "step": 1730 }, { "epoch": 0.1402300712896954, "grad_norm": 0.07188330590724945, "learning_rate": 7.01093560145808e-05, "loss": 0.3631, "step": 1731 }, { "epoch": 0.14031108230719377, "grad_norm": 0.06761690974235535, "learning_rate": 7.014985824220332e-05, "loss": 0.3815, "step": 1732 }, { "epoch": 0.14039209332469216, "grad_norm": 0.07259739190340042, "learning_rate": 7.019036046982584e-05, "loss": 0.3993, "step": 1733 }, { "epoch": 0.14047310434219054, "grad_norm": 0.05059479549527168, "learning_rate": 7.023086269744836e-05, "loss": 0.4039, "step": 1734 }, { "epoch": 0.14055411535968892, "grad_norm": 0.06526746600866318, "learning_rate": 7.027136492507088e-05, "loss": 0.4439, "step": 1735 }, { "epoch": 0.1406351263771873, "grad_norm": 0.06921422481536865, "learning_rate": 7.03118671526934e-05, "loss": 0.4067, "step": 1736 }, { "epoch": 0.14071613739468566, "grad_norm": 0.0597330667078495, "learning_rate": 7.035236938031592e-05, "loss": 0.4079, "step": 1737 }, { "epoch": 0.14079714841218405, "grad_norm": 0.06214408203959465, "learning_rate": 7.039287160793844e-05, "loss": 0.394, "step": 1738 }, { "epoch": 0.14087815942968243, "grad_norm": 0.05894998461008072, "learning_rate": 7.043337383556096e-05, "loss": 0.3823, "step": 1739 }, { "epoch": 0.14095917044718081, "grad_norm": 0.07503427565097809, "learning_rate": 7.047387606318348e-05, "loss": 0.4185, "step": 1740 }, { "epoch": 0.1410401814646792, "grad_norm": 0.05956784635782242, "learning_rate": 7.0514378290806e-05, "loss": 0.4074, "step": 1741 }, { "epoch": 0.14112119248217758, "grad_norm": 0.07910850644111633, "learning_rate": 7.055488051842852e-05, "loss": 0.4129, "step": 1742 }, { "epoch": 0.14120220349967597, "grad_norm": 0.056844715029001236, "learning_rate": 7.059538274605104e-05, "loss": 0.3862, "step": 1743 }, { "epoch": 0.14128321451717435, "grad_norm": 0.08414852619171143, "learning_rate": 7.063588497367356e-05, "loss": 0.4196, "step": 1744 }, { "epoch": 0.1413642255346727, "grad_norm": 0.06702537089586258, "learning_rate": 7.067638720129608e-05, "loss": 0.4098, "step": 1745 }, { "epoch": 0.1414452365521711, "grad_norm": 0.06511709839105606, "learning_rate": 7.07168894289186e-05, "loss": 0.418, "step": 1746 }, { "epoch": 0.14152624756966947, "grad_norm": 0.054429568350315094, "learning_rate": 7.075739165654112e-05, "loss": 0.3404, "step": 1747 }, { "epoch": 0.14160725858716786, "grad_norm": 0.047777604311704636, "learning_rate": 7.079789388416364e-05, "loss": 0.3381, "step": 1748 }, { "epoch": 0.14168826960466624, "grad_norm": 0.05889793112874031, "learning_rate": 7.083839611178616e-05, "loss": 0.4082, "step": 1749 }, { "epoch": 0.14176928062216462, "grad_norm": 0.05489526316523552, "learning_rate": 7.087889833940867e-05, "loss": 0.4125, "step": 1750 }, { "epoch": 0.141850291639663, "grad_norm": 0.06594657152891159, "learning_rate": 7.09194005670312e-05, "loss": 0.3895, "step": 1751 }, { "epoch": 0.14193130265716136, "grad_norm": 0.0674719288945198, "learning_rate": 7.095990279465371e-05, "loss": 0.4384, "step": 1752 }, { "epoch": 0.14201231367465975, "grad_norm": 0.05686030164361, "learning_rate": 7.100040502227623e-05, "loss": 0.3342, "step": 1753 }, { "epoch": 0.14209332469215813, "grad_norm": 0.05702396109700203, "learning_rate": 7.104090724989875e-05, "loss": 0.4137, "step": 1754 }, { "epoch": 0.1421743357096565, "grad_norm": 0.07312499731779099, "learning_rate": 7.108140947752126e-05, "loss": 0.3709, "step": 1755 }, { "epoch": 0.1422553467271549, "grad_norm": 0.04934091120958328, "learning_rate": 7.112191170514378e-05, "loss": 0.3619, "step": 1756 }, { "epoch": 0.14233635774465328, "grad_norm": 0.05569101497530937, "learning_rate": 7.11624139327663e-05, "loss": 0.4096, "step": 1757 }, { "epoch": 0.14241736876215166, "grad_norm": 0.05229457840323448, "learning_rate": 7.120291616038882e-05, "loss": 0.3803, "step": 1758 }, { "epoch": 0.14249837977965002, "grad_norm": 0.05280294641852379, "learning_rate": 7.124341838801134e-05, "loss": 0.4167, "step": 1759 }, { "epoch": 0.1425793907971484, "grad_norm": 0.05248962715268135, "learning_rate": 7.128392061563386e-05, "loss": 0.4203, "step": 1760 }, { "epoch": 0.1426604018146468, "grad_norm": 0.05805622413754463, "learning_rate": 7.132442284325638e-05, "loss": 0.3952, "step": 1761 }, { "epoch": 0.14274141283214517, "grad_norm": 0.06868473440408707, "learning_rate": 7.13649250708789e-05, "loss": 0.3603, "step": 1762 }, { "epoch": 0.14282242384964355, "grad_norm": 0.05611317232251167, "learning_rate": 7.140542729850143e-05, "loss": 0.3756, "step": 1763 }, { "epoch": 0.14290343486714194, "grad_norm": 0.05748287960886955, "learning_rate": 7.144592952612395e-05, "loss": 0.428, "step": 1764 }, { "epoch": 0.14298444588464032, "grad_norm": 0.0716121718287468, "learning_rate": 7.148643175374647e-05, "loss": 0.4295, "step": 1765 }, { "epoch": 0.1430654569021387, "grad_norm": 0.0687982439994812, "learning_rate": 7.152693398136898e-05, "loss": 0.4295, "step": 1766 }, { "epoch": 0.14314646791963706, "grad_norm": 0.06397130340337753, "learning_rate": 7.15674362089915e-05, "loss": 0.442, "step": 1767 }, { "epoch": 0.14322747893713544, "grad_norm": 0.06169519200921059, "learning_rate": 7.160793843661402e-05, "loss": 0.4189, "step": 1768 }, { "epoch": 0.14330848995463383, "grad_norm": 0.04765634983778, "learning_rate": 7.164844066423653e-05, "loss": 0.381, "step": 1769 }, { "epoch": 0.1433895009721322, "grad_norm": 0.04898335412144661, "learning_rate": 7.168894289185905e-05, "loss": 0.4064, "step": 1770 }, { "epoch": 0.1434705119896306, "grad_norm": 0.0570511594414711, "learning_rate": 7.172944511948157e-05, "loss": 0.3865, "step": 1771 }, { "epoch": 0.14355152300712898, "grad_norm": 0.0582033172249794, "learning_rate": 7.17699473471041e-05, "loss": 0.3971, "step": 1772 }, { "epoch": 0.14363253402462736, "grad_norm": 0.056540779769420624, "learning_rate": 7.181044957472661e-05, "loss": 0.4527, "step": 1773 }, { "epoch": 0.14371354504212572, "grad_norm": 0.056025274097919464, "learning_rate": 7.185095180234913e-05, "loss": 0.3989, "step": 1774 }, { "epoch": 0.1437945560596241, "grad_norm": 0.048998527228832245, "learning_rate": 7.189145402997165e-05, "loss": 0.3978, "step": 1775 }, { "epoch": 0.14387556707712248, "grad_norm": 0.06863243877887726, "learning_rate": 7.193195625759417e-05, "loss": 0.4469, "step": 1776 }, { "epoch": 0.14395657809462087, "grad_norm": 0.06901993602514267, "learning_rate": 7.197245848521669e-05, "loss": 0.4687, "step": 1777 }, { "epoch": 0.14403758911211925, "grad_norm": 0.06378109008073807, "learning_rate": 7.201296071283921e-05, "loss": 0.3789, "step": 1778 }, { "epoch": 0.14411860012961764, "grad_norm": 0.053304869681596756, "learning_rate": 7.205346294046172e-05, "loss": 0.4167, "step": 1779 }, { "epoch": 0.14419961114711602, "grad_norm": 0.04796311631798744, "learning_rate": 7.209396516808424e-05, "loss": 0.4195, "step": 1780 }, { "epoch": 0.14428062216461437, "grad_norm": 0.061010587960481644, "learning_rate": 7.213446739570676e-05, "loss": 0.425, "step": 1781 }, { "epoch": 0.14436163318211276, "grad_norm": 0.05330296978354454, "learning_rate": 7.217496962332929e-05, "loss": 0.3903, "step": 1782 }, { "epoch": 0.14444264419961114, "grad_norm": 0.06527654826641083, "learning_rate": 7.221547185095181e-05, "loss": 0.3622, "step": 1783 }, { "epoch": 0.14452365521710953, "grad_norm": 0.05822371691465378, "learning_rate": 7.225597407857433e-05, "loss": 0.3866, "step": 1784 }, { "epoch": 0.1446046662346079, "grad_norm": 0.05615841597318649, "learning_rate": 7.229647630619685e-05, "loss": 0.3653, "step": 1785 }, { "epoch": 0.1446856772521063, "grad_norm": 0.06061498448252678, "learning_rate": 7.233697853381937e-05, "loss": 0.4049, "step": 1786 }, { "epoch": 0.14476668826960468, "grad_norm": 0.06993502378463745, "learning_rate": 7.237748076144189e-05, "loss": 0.3763, "step": 1787 }, { "epoch": 0.14484769928710303, "grad_norm": 0.07220494002103806, "learning_rate": 7.241798298906441e-05, "loss": 0.4207, "step": 1788 }, { "epoch": 0.14492871030460142, "grad_norm": 0.06628101319074631, "learning_rate": 7.245848521668693e-05, "loss": 0.4357, "step": 1789 }, { "epoch": 0.1450097213220998, "grad_norm": 0.06409826874732971, "learning_rate": 7.249898744430943e-05, "loss": 0.423, "step": 1790 }, { "epoch": 0.14509073233959818, "grad_norm": 0.07170279324054718, "learning_rate": 7.253948967193195e-05, "loss": 0.4523, "step": 1791 }, { "epoch": 0.14517174335709657, "grad_norm": 0.04600429907441139, "learning_rate": 7.257999189955447e-05, "loss": 0.3896, "step": 1792 }, { "epoch": 0.14525275437459495, "grad_norm": 0.06032882630825043, "learning_rate": 7.2620494127177e-05, "loss": 0.3646, "step": 1793 }, { "epoch": 0.14533376539209333, "grad_norm": 0.07741162925958633, "learning_rate": 7.266099635479951e-05, "loss": 0.3783, "step": 1794 }, { "epoch": 0.14541477640959172, "grad_norm": 0.08997105062007904, "learning_rate": 7.270149858242203e-05, "loss": 0.4193, "step": 1795 }, { "epoch": 0.14549578742709007, "grad_norm": 0.05801474675536156, "learning_rate": 7.274200081004455e-05, "loss": 0.3841, "step": 1796 }, { "epoch": 0.14557679844458846, "grad_norm": 0.07002715021371841, "learning_rate": 7.278250303766707e-05, "loss": 0.4065, "step": 1797 }, { "epoch": 0.14565780946208684, "grad_norm": 0.05423908308148384, "learning_rate": 7.282300526528959e-05, "loss": 0.4072, "step": 1798 }, { "epoch": 0.14573882047958522, "grad_norm": 0.050748929381370544, "learning_rate": 7.286350749291211e-05, "loss": 0.3967, "step": 1799 }, { "epoch": 0.1458198314970836, "grad_norm": 0.05518374964594841, "learning_rate": 7.290400972053464e-05, "loss": 0.3719, "step": 1800 }, { "epoch": 0.145900842514582, "grad_norm": 0.06590072810649872, "learning_rate": 7.294451194815715e-05, "loss": 0.4037, "step": 1801 }, { "epoch": 0.14598185353208037, "grad_norm": 0.06755327433347702, "learning_rate": 7.298501417577967e-05, "loss": 0.4461, "step": 1802 }, { "epoch": 0.14606286454957873, "grad_norm": 0.058261655271053314, "learning_rate": 7.302551640340219e-05, "loss": 0.3815, "step": 1803 }, { "epoch": 0.14614387556707711, "grad_norm": 0.05535726994276047, "learning_rate": 7.306601863102471e-05, "loss": 0.4019, "step": 1804 }, { "epoch": 0.1462248865845755, "grad_norm": 0.05837560072541237, "learning_rate": 7.310652085864723e-05, "loss": 0.4141, "step": 1805 }, { "epoch": 0.14630589760207388, "grad_norm": 0.059157032519578934, "learning_rate": 7.314702308626975e-05, "loss": 0.3868, "step": 1806 }, { "epoch": 0.14638690861957226, "grad_norm": 0.06888743489980698, "learning_rate": 7.318752531389227e-05, "loss": 0.3937, "step": 1807 }, { "epoch": 0.14646791963707065, "grad_norm": 0.06422577798366547, "learning_rate": 7.322802754151479e-05, "loss": 0.4016, "step": 1808 }, { "epoch": 0.14654893065456903, "grad_norm": 0.05708504468202591, "learning_rate": 7.326852976913731e-05, "loss": 0.375, "step": 1809 }, { "epoch": 0.1466299416720674, "grad_norm": 0.06336402148008347, "learning_rate": 7.330903199675983e-05, "loss": 0.4152, "step": 1810 }, { "epoch": 0.14671095268956577, "grad_norm": 0.06681369990110397, "learning_rate": 7.334953422438235e-05, "loss": 0.3972, "step": 1811 }, { "epoch": 0.14679196370706415, "grad_norm": 0.06833192706108093, "learning_rate": 7.339003645200487e-05, "loss": 0.4259, "step": 1812 }, { "epoch": 0.14687297472456254, "grad_norm": 0.06117531284689903, "learning_rate": 7.343053867962739e-05, "loss": 0.4279, "step": 1813 }, { "epoch": 0.14695398574206092, "grad_norm": 0.061122827231884, "learning_rate": 7.347104090724989e-05, "loss": 0.4019, "step": 1814 }, { "epoch": 0.1470349967595593, "grad_norm": 0.06589484959840775, "learning_rate": 7.351154313487241e-05, "loss": 0.372, "step": 1815 }, { "epoch": 0.1471160077770577, "grad_norm": 0.0601775124669075, "learning_rate": 7.355204536249493e-05, "loss": 0.4704, "step": 1816 }, { "epoch": 0.14719701879455607, "grad_norm": 0.05938103049993515, "learning_rate": 7.359254759011745e-05, "loss": 0.4103, "step": 1817 }, { "epoch": 0.14727802981205443, "grad_norm": 0.052586477249860764, "learning_rate": 7.363304981773997e-05, "loss": 0.4259, "step": 1818 }, { "epoch": 0.1473590408295528, "grad_norm": 0.056762926280498505, "learning_rate": 7.36735520453625e-05, "loss": 0.4044, "step": 1819 }, { "epoch": 0.1474400518470512, "grad_norm": 0.04729248583316803, "learning_rate": 7.371405427298502e-05, "loss": 0.4058, "step": 1820 }, { "epoch": 0.14752106286454958, "grad_norm": 0.05358875170350075, "learning_rate": 7.375455650060754e-05, "loss": 0.3731, "step": 1821 }, { "epoch": 0.14760207388204796, "grad_norm": 0.05654335767030716, "learning_rate": 7.379505872823006e-05, "loss": 0.3702, "step": 1822 }, { "epoch": 0.14768308489954635, "grad_norm": 0.055869363248348236, "learning_rate": 7.383556095585258e-05, "loss": 0.3869, "step": 1823 }, { "epoch": 0.14776409591704473, "grad_norm": 0.05027586966753006, "learning_rate": 7.38760631834751e-05, "loss": 0.4121, "step": 1824 }, { "epoch": 0.14784510693454309, "grad_norm": 0.056683965027332306, "learning_rate": 7.391656541109761e-05, "loss": 0.3917, "step": 1825 }, { "epoch": 0.14792611795204147, "grad_norm": 0.05755713954567909, "learning_rate": 7.395706763872013e-05, "loss": 0.4402, "step": 1826 }, { "epoch": 0.14800712896953985, "grad_norm": 0.060089223086833954, "learning_rate": 7.399756986634265e-05, "loss": 0.3635, "step": 1827 }, { "epoch": 0.14808813998703824, "grad_norm": 0.060978520661592484, "learning_rate": 7.403807209396517e-05, "loss": 0.4458, "step": 1828 }, { "epoch": 0.14816915100453662, "grad_norm": 0.06925841420888901, "learning_rate": 7.407857432158769e-05, "loss": 0.4283, "step": 1829 }, { "epoch": 0.148250162022035, "grad_norm": 0.06608008593320847, "learning_rate": 7.411907654921021e-05, "loss": 0.3925, "step": 1830 }, { "epoch": 0.1483311730395334, "grad_norm": 0.06768857687711716, "learning_rate": 7.415957877683273e-05, "loss": 0.3769, "step": 1831 }, { "epoch": 0.14841218405703174, "grad_norm": 0.05477285385131836, "learning_rate": 7.420008100445525e-05, "loss": 0.3388, "step": 1832 }, { "epoch": 0.14849319507453013, "grad_norm": 0.05085244029760361, "learning_rate": 7.424058323207777e-05, "loss": 0.4125, "step": 1833 }, { "epoch": 0.1485742060920285, "grad_norm": 0.05243555083870888, "learning_rate": 7.428108545970029e-05, "loss": 0.3376, "step": 1834 }, { "epoch": 0.1486552171095269, "grad_norm": 0.05747552961111069, "learning_rate": 7.43215876873228e-05, "loss": 0.4099, "step": 1835 }, { "epoch": 0.14873622812702528, "grad_norm": 0.05261996015906334, "learning_rate": 7.436208991494533e-05, "loss": 0.4171, "step": 1836 }, { "epoch": 0.14881723914452366, "grad_norm": 0.07032431662082672, "learning_rate": 7.440259214256785e-05, "loss": 0.4228, "step": 1837 }, { "epoch": 0.14889825016202204, "grad_norm": 0.0585196353495121, "learning_rate": 7.444309437019036e-05, "loss": 0.3887, "step": 1838 }, { "epoch": 0.14897926117952043, "grad_norm": 0.04458057880401611, "learning_rate": 7.448359659781288e-05, "loss": 0.3939, "step": 1839 }, { "epoch": 0.14906027219701878, "grad_norm": 0.04972951114177704, "learning_rate": 7.45240988254354e-05, "loss": 0.3625, "step": 1840 }, { "epoch": 0.14914128321451717, "grad_norm": 0.05577665939927101, "learning_rate": 7.456460105305792e-05, "loss": 0.407, "step": 1841 }, { "epoch": 0.14922229423201555, "grad_norm": 0.05278479680418968, "learning_rate": 7.460510328068044e-05, "loss": 0.4324, "step": 1842 }, { "epoch": 0.14930330524951393, "grad_norm": 0.05005854740738869, "learning_rate": 7.464560550830296e-05, "loss": 0.4089, "step": 1843 }, { "epoch": 0.14938431626701232, "grad_norm": 0.0569162480533123, "learning_rate": 7.468610773592548e-05, "loss": 0.3852, "step": 1844 }, { "epoch": 0.1494653272845107, "grad_norm": 0.04794647544622421, "learning_rate": 7.4726609963548e-05, "loss": 0.3865, "step": 1845 }, { "epoch": 0.14954633830200909, "grad_norm": 0.04714139923453331, "learning_rate": 7.476711219117052e-05, "loss": 0.3557, "step": 1846 }, { "epoch": 0.14962734931950744, "grad_norm": 0.0505010262131691, "learning_rate": 7.480761441879304e-05, "loss": 0.3681, "step": 1847 }, { "epoch": 0.14970836033700582, "grad_norm": 0.04720376804471016, "learning_rate": 7.484811664641556e-05, "loss": 0.3652, "step": 1848 }, { "epoch": 0.1497893713545042, "grad_norm": 0.06479644775390625, "learning_rate": 7.488861887403808e-05, "loss": 0.4114, "step": 1849 }, { "epoch": 0.1498703823720026, "grad_norm": 0.056275542825460434, "learning_rate": 7.492912110166059e-05, "loss": 0.3694, "step": 1850 }, { "epoch": 0.14995139338950098, "grad_norm": 0.06178649142384529, "learning_rate": 7.496962332928311e-05, "loss": 0.4472, "step": 1851 }, { "epoch": 0.15003240440699936, "grad_norm": 0.06014366075396538, "learning_rate": 7.501012555690563e-05, "loss": 0.38, "step": 1852 }, { "epoch": 0.15011341542449774, "grad_norm": 0.06340664625167847, "learning_rate": 7.505062778452815e-05, "loss": 0.3629, "step": 1853 }, { "epoch": 0.1501944264419961, "grad_norm": 0.05327383428812027, "learning_rate": 7.509113001215067e-05, "loss": 0.376, "step": 1854 }, { "epoch": 0.15027543745949448, "grad_norm": 0.058881551027297974, "learning_rate": 7.513163223977319e-05, "loss": 0.3972, "step": 1855 }, { "epoch": 0.15035644847699287, "grad_norm": 0.06142482906579971, "learning_rate": 7.51721344673957e-05, "loss": 0.4119, "step": 1856 }, { "epoch": 0.15043745949449125, "grad_norm": 0.06492027640342712, "learning_rate": 7.521263669501824e-05, "loss": 0.4008, "step": 1857 }, { "epoch": 0.15051847051198963, "grad_norm": 0.0463237501680851, "learning_rate": 7.525313892264076e-05, "loss": 0.3546, "step": 1858 }, { "epoch": 0.15059948152948802, "grad_norm": 0.054810382425785065, "learning_rate": 7.529364115026328e-05, "loss": 0.4539, "step": 1859 }, { "epoch": 0.1506804925469864, "grad_norm": 0.05126664787530899, "learning_rate": 7.533414337788578e-05, "loss": 0.3981, "step": 1860 }, { "epoch": 0.15076150356448478, "grad_norm": 0.12834858894348145, "learning_rate": 7.53746456055083e-05, "loss": 0.4163, "step": 1861 }, { "epoch": 0.15084251458198314, "grad_norm": 0.06354403495788574, "learning_rate": 7.541514783313082e-05, "loss": 0.3773, "step": 1862 }, { "epoch": 0.15092352559948152, "grad_norm": 0.053905289620161057, "learning_rate": 7.545565006075334e-05, "loss": 0.4318, "step": 1863 }, { "epoch": 0.1510045366169799, "grad_norm": 0.05705507472157478, "learning_rate": 7.549615228837586e-05, "loss": 0.3674, "step": 1864 }, { "epoch": 0.1510855476344783, "grad_norm": 0.05759746581315994, "learning_rate": 7.553665451599838e-05, "loss": 0.3508, "step": 1865 }, { "epoch": 0.15116655865197667, "grad_norm": 0.04640937224030495, "learning_rate": 7.55771567436209e-05, "loss": 0.4149, "step": 1866 }, { "epoch": 0.15124756966947506, "grad_norm": 0.06412570178508759, "learning_rate": 7.561765897124342e-05, "loss": 0.4117, "step": 1867 }, { "epoch": 0.15132858068697344, "grad_norm": 0.06056508794426918, "learning_rate": 7.565816119886594e-05, "loss": 0.3966, "step": 1868 }, { "epoch": 0.1514095917044718, "grad_norm": 0.06665674597024918, "learning_rate": 7.569866342648846e-05, "loss": 0.4389, "step": 1869 }, { "epoch": 0.15149060272197018, "grad_norm": 0.060271285474300385, "learning_rate": 7.573916565411098e-05, "loss": 0.4256, "step": 1870 }, { "epoch": 0.15157161373946856, "grad_norm": 0.062425002455711365, "learning_rate": 7.57796678817335e-05, "loss": 0.3871, "step": 1871 }, { "epoch": 0.15165262475696695, "grad_norm": 0.07723111659288406, "learning_rate": 7.582017010935602e-05, "loss": 0.4202, "step": 1872 }, { "epoch": 0.15173363577446533, "grad_norm": 0.06984084099531174, "learning_rate": 7.586067233697854e-05, "loss": 0.4244, "step": 1873 }, { "epoch": 0.15181464679196371, "grad_norm": 0.05606261268258095, "learning_rate": 7.590117456460105e-05, "loss": 0.3444, "step": 1874 }, { "epoch": 0.1518956578094621, "grad_norm": 0.05358389392495155, "learning_rate": 7.594167679222357e-05, "loss": 0.3726, "step": 1875 }, { "epoch": 0.15197666882696045, "grad_norm": 0.052349768579006195, "learning_rate": 7.59821790198461e-05, "loss": 0.394, "step": 1876 }, { "epoch": 0.15205767984445884, "grad_norm": 0.05732255429029465, "learning_rate": 7.602268124746862e-05, "loss": 0.3717, "step": 1877 }, { "epoch": 0.15213869086195722, "grad_norm": 0.05306144058704376, "learning_rate": 7.606318347509114e-05, "loss": 0.4004, "step": 1878 }, { "epoch": 0.1522197018794556, "grad_norm": 0.05637253820896149, "learning_rate": 7.610368570271366e-05, "loss": 0.3899, "step": 1879 }, { "epoch": 0.152300712896954, "grad_norm": 0.07494375109672546, "learning_rate": 7.614418793033618e-05, "loss": 0.3769, "step": 1880 }, { "epoch": 0.15238172391445237, "grad_norm": 0.06573886424303055, "learning_rate": 7.61846901579587e-05, "loss": 0.3921, "step": 1881 }, { "epoch": 0.15246273493195076, "grad_norm": 0.056376952677965164, "learning_rate": 7.622519238558122e-05, "loss": 0.4119, "step": 1882 }, { "epoch": 0.15254374594944914, "grad_norm": 0.05358589440584183, "learning_rate": 7.626569461320374e-05, "loss": 0.3883, "step": 1883 }, { "epoch": 0.1526247569669475, "grad_norm": 0.06232666224241257, "learning_rate": 7.630619684082626e-05, "loss": 0.395, "step": 1884 }, { "epoch": 0.15270576798444588, "grad_norm": 0.07566624134778976, "learning_rate": 7.634669906844876e-05, "loss": 0.3768, "step": 1885 }, { "epoch": 0.15278677900194426, "grad_norm": 0.07797633856534958, "learning_rate": 7.638720129607128e-05, "loss": 0.357, "step": 1886 }, { "epoch": 0.15286779001944265, "grad_norm": 0.04703294858336449, "learning_rate": 7.64277035236938e-05, "loss": 0.3907, "step": 1887 }, { "epoch": 0.15294880103694103, "grad_norm": 0.07149975746870041, "learning_rate": 7.646820575131632e-05, "loss": 0.4132, "step": 1888 }, { "epoch": 0.1530298120544394, "grad_norm": 0.05562804639339447, "learning_rate": 7.650870797893884e-05, "loss": 0.3588, "step": 1889 }, { "epoch": 0.1531108230719378, "grad_norm": 0.0702008605003357, "learning_rate": 7.654921020656136e-05, "loss": 0.41, "step": 1890 }, { "epoch": 0.15319183408943615, "grad_norm": 0.0702863484621048, "learning_rate": 7.658971243418388e-05, "loss": 0.4284, "step": 1891 }, { "epoch": 0.15327284510693454, "grad_norm": 0.11156198382377625, "learning_rate": 7.66302146618064e-05, "loss": 0.4401, "step": 1892 }, { "epoch": 0.15335385612443292, "grad_norm": 0.06580785661935806, "learning_rate": 7.667071688942892e-05, "loss": 0.3719, "step": 1893 }, { "epoch": 0.1534348671419313, "grad_norm": 0.058176323771476746, "learning_rate": 7.671121911705144e-05, "loss": 0.3352, "step": 1894 }, { "epoch": 0.1535158781594297, "grad_norm": 0.05421845242381096, "learning_rate": 7.675172134467397e-05, "loss": 0.4187, "step": 1895 }, { "epoch": 0.15359688917692807, "grad_norm": 0.04730819910764694, "learning_rate": 7.679222357229648e-05, "loss": 0.3854, "step": 1896 }, { "epoch": 0.15367790019442645, "grad_norm": 0.05933893471956253, "learning_rate": 7.6832725799919e-05, "loss": 0.3833, "step": 1897 }, { "epoch": 0.1537589112119248, "grad_norm": 0.058625269681215286, "learning_rate": 7.687322802754152e-05, "loss": 0.3672, "step": 1898 }, { "epoch": 0.1538399222294232, "grad_norm": 0.05623107776045799, "learning_rate": 7.691373025516404e-05, "loss": 0.3706, "step": 1899 }, { "epoch": 0.15392093324692158, "grad_norm": 0.06870344281196594, "learning_rate": 7.695423248278656e-05, "loss": 0.4005, "step": 1900 }, { "epoch": 0.15400194426441996, "grad_norm": 0.06046906113624573, "learning_rate": 7.699473471040908e-05, "loss": 0.349, "step": 1901 }, { "epoch": 0.15408295528191834, "grad_norm": 0.060264572501182556, "learning_rate": 7.70352369380316e-05, "loss": 0.3927, "step": 1902 }, { "epoch": 0.15416396629941673, "grad_norm": 0.06525126844644547, "learning_rate": 7.707573916565412e-05, "loss": 0.374, "step": 1903 }, { "epoch": 0.1542449773169151, "grad_norm": 0.05228757858276367, "learning_rate": 7.711624139327664e-05, "loss": 0.4138, "step": 1904 }, { "epoch": 0.15432598833441347, "grad_norm": 0.05303167179226875, "learning_rate": 7.715674362089916e-05, "loss": 0.399, "step": 1905 }, { "epoch": 0.15440699935191185, "grad_norm": 0.06746599823236465, "learning_rate": 7.719724584852168e-05, "loss": 0.4202, "step": 1906 }, { "epoch": 0.15448801036941023, "grad_norm": 0.0527813620865345, "learning_rate": 7.72377480761442e-05, "loss": 0.3833, "step": 1907 }, { "epoch": 0.15456902138690862, "grad_norm": 0.07892647385597229, "learning_rate": 7.727825030376671e-05, "loss": 0.4008, "step": 1908 }, { "epoch": 0.154650032404407, "grad_norm": 0.05395408719778061, "learning_rate": 7.731875253138922e-05, "loss": 0.3503, "step": 1909 }, { "epoch": 0.15473104342190538, "grad_norm": 0.060144975781440735, "learning_rate": 7.735925475901174e-05, "loss": 0.3815, "step": 1910 }, { "epoch": 0.15481205443940377, "grad_norm": 0.07419056445360184, "learning_rate": 7.739975698663426e-05, "loss": 0.3393, "step": 1911 }, { "epoch": 0.15489306545690215, "grad_norm": 0.07367844879627228, "learning_rate": 7.744025921425678e-05, "loss": 0.4157, "step": 1912 }, { "epoch": 0.1549740764744005, "grad_norm": 0.0585818886756897, "learning_rate": 7.74807614418793e-05, "loss": 0.3414, "step": 1913 }, { "epoch": 0.1550550874918989, "grad_norm": 0.05288233235478401, "learning_rate": 7.752126366950183e-05, "loss": 0.4007, "step": 1914 }, { "epoch": 0.15513609850939727, "grad_norm": 0.054131995886564255, "learning_rate": 7.756176589712435e-05, "loss": 0.4007, "step": 1915 }, { "epoch": 0.15521710952689566, "grad_norm": 0.05087273567914963, "learning_rate": 7.760226812474687e-05, "loss": 0.4121, "step": 1916 }, { "epoch": 0.15529812054439404, "grad_norm": 0.0774221420288086, "learning_rate": 7.764277035236939e-05, "loss": 0.4083, "step": 1917 }, { "epoch": 0.15537913156189243, "grad_norm": 0.08393000066280365, "learning_rate": 7.768327257999191e-05, "loss": 0.4433, "step": 1918 }, { "epoch": 0.1554601425793908, "grad_norm": 0.04874301701784134, "learning_rate": 7.772377480761443e-05, "loss": 0.4229, "step": 1919 }, { "epoch": 0.15554115359688916, "grad_norm": 0.05467551574110985, "learning_rate": 7.776427703523694e-05, "loss": 0.4047, "step": 1920 }, { "epoch": 0.15562216461438755, "grad_norm": 0.063243567943573, "learning_rate": 7.780477926285946e-05, "loss": 0.4126, "step": 1921 }, { "epoch": 0.15570317563188593, "grad_norm": 0.05471186712384224, "learning_rate": 7.784528149048198e-05, "loss": 0.395, "step": 1922 }, { "epoch": 0.15578418664938432, "grad_norm": 0.06775137037038803, "learning_rate": 7.78857837181045e-05, "loss": 0.4176, "step": 1923 }, { "epoch": 0.1558651976668827, "grad_norm": 0.06087944284081459, "learning_rate": 7.792628594572702e-05, "loss": 0.4275, "step": 1924 }, { "epoch": 0.15594620868438108, "grad_norm": 0.0486009307205677, "learning_rate": 7.796678817334954e-05, "loss": 0.3679, "step": 1925 }, { "epoch": 0.15602721970187947, "grad_norm": 0.05688975751399994, "learning_rate": 7.800729040097206e-05, "loss": 0.3961, "step": 1926 }, { "epoch": 0.15610823071937782, "grad_norm": 0.05588873103260994, "learning_rate": 7.804779262859457e-05, "loss": 0.4191, "step": 1927 }, { "epoch": 0.1561892417368762, "grad_norm": 0.04377869516611099, "learning_rate": 7.80882948562171e-05, "loss": 0.37, "step": 1928 }, { "epoch": 0.1562702527543746, "grad_norm": 0.05242636427283287, "learning_rate": 7.812879708383961e-05, "loss": 0.4042, "step": 1929 }, { "epoch": 0.15635126377187297, "grad_norm": 0.05123385041952133, "learning_rate": 7.816929931146213e-05, "loss": 0.4155, "step": 1930 }, { "epoch": 0.15643227478937136, "grad_norm": 0.05295446887612343, "learning_rate": 7.820980153908465e-05, "loss": 0.3773, "step": 1931 }, { "epoch": 0.15651328580686974, "grad_norm": 0.04457692801952362, "learning_rate": 7.825030376670717e-05, "loss": 0.4005, "step": 1932 }, { "epoch": 0.15659429682436812, "grad_norm": 0.04625699669122696, "learning_rate": 7.829080599432969e-05, "loss": 0.34, "step": 1933 }, { "epoch": 0.1566753078418665, "grad_norm": 0.049518827348947525, "learning_rate": 7.833130822195221e-05, "loss": 0.3716, "step": 1934 }, { "epoch": 0.15675631885936486, "grad_norm": 0.06226338446140289, "learning_rate": 7.837181044957473e-05, "loss": 0.4163, "step": 1935 }, { "epoch": 0.15683732987686325, "grad_norm": 0.05170482024550438, "learning_rate": 7.841231267719725e-05, "loss": 0.3994, "step": 1936 }, { "epoch": 0.15691834089436163, "grad_norm": 0.06363697350025177, "learning_rate": 7.845281490481977e-05, "loss": 0.4003, "step": 1937 }, { "epoch": 0.15699935191186, "grad_norm": 0.05502576008439064, "learning_rate": 7.849331713244229e-05, "loss": 0.4276, "step": 1938 }, { "epoch": 0.1570803629293584, "grad_norm": 0.05680760741233826, "learning_rate": 7.853381936006481e-05, "loss": 0.4005, "step": 1939 }, { "epoch": 0.15716137394685678, "grad_norm": 0.05502947419881821, "learning_rate": 7.857432158768733e-05, "loss": 0.378, "step": 1940 }, { "epoch": 0.15724238496435516, "grad_norm": 0.04791278392076492, "learning_rate": 7.861482381530985e-05, "loss": 0.4095, "step": 1941 }, { "epoch": 0.15732339598185352, "grad_norm": 0.05494358763098717, "learning_rate": 7.865532604293237e-05, "loss": 0.4119, "step": 1942 }, { "epoch": 0.1574044069993519, "grad_norm": 0.061576370149850845, "learning_rate": 7.869582827055489e-05, "loss": 0.4125, "step": 1943 }, { "epoch": 0.1574854180168503, "grad_norm": 0.05027461051940918, "learning_rate": 7.87363304981774e-05, "loss": 0.3575, "step": 1944 }, { "epoch": 0.15756642903434867, "grad_norm": 0.07733383774757385, "learning_rate": 7.877683272579992e-05, "loss": 0.4461, "step": 1945 }, { "epoch": 0.15764744005184705, "grad_norm": 0.06613076478242874, "learning_rate": 7.881733495342243e-05, "loss": 0.4225, "step": 1946 }, { "epoch": 0.15772845106934544, "grad_norm": 0.05832867696881294, "learning_rate": 7.885783718104495e-05, "loss": 0.3247, "step": 1947 }, { "epoch": 0.15780946208684382, "grad_norm": 0.04764074459671974, "learning_rate": 7.889833940866747e-05, "loss": 0.37, "step": 1948 }, { "epoch": 0.15789047310434218, "grad_norm": 0.05749349296092987, "learning_rate": 7.893884163629e-05, "loss": 0.3437, "step": 1949 }, { "epoch": 0.15797148412184056, "grad_norm": 0.05542978271842003, "learning_rate": 7.897934386391251e-05, "loss": 0.4011, "step": 1950 }, { "epoch": 0.15805249513933894, "grad_norm": 0.043319717049598694, "learning_rate": 7.901984609153505e-05, "loss": 0.403, "step": 1951 }, { "epoch": 0.15813350615683733, "grad_norm": 0.06057552993297577, "learning_rate": 7.906034831915757e-05, "loss": 0.4068, "step": 1952 }, { "epoch": 0.1582145171743357, "grad_norm": 0.06298187375068665, "learning_rate": 7.910085054678009e-05, "loss": 0.4251, "step": 1953 }, { "epoch": 0.1582955281918341, "grad_norm": 0.06181003153324127, "learning_rate": 7.91413527744026e-05, "loss": 0.3637, "step": 1954 }, { "epoch": 0.15837653920933248, "grad_norm": 0.05909266695380211, "learning_rate": 7.918185500202511e-05, "loss": 0.4113, "step": 1955 }, { "epoch": 0.15845755022683086, "grad_norm": 0.055890344083309174, "learning_rate": 7.922235722964763e-05, "loss": 0.3859, "step": 1956 }, { "epoch": 0.15853856124432922, "grad_norm": 0.055468108505010605, "learning_rate": 7.926285945727015e-05, "loss": 0.3716, "step": 1957 }, { "epoch": 0.1586195722618276, "grad_norm": 0.06345760822296143, "learning_rate": 7.930336168489267e-05, "loss": 0.376, "step": 1958 }, { "epoch": 0.15870058327932599, "grad_norm": 0.08457563072443008, "learning_rate": 7.934386391251519e-05, "loss": 0.385, "step": 1959 }, { "epoch": 0.15878159429682437, "grad_norm": 0.06403377652168274, "learning_rate": 7.938436614013771e-05, "loss": 0.3838, "step": 1960 }, { "epoch": 0.15886260531432275, "grad_norm": 0.06183291971683502, "learning_rate": 7.942486836776023e-05, "loss": 0.3982, "step": 1961 }, { "epoch": 0.15894361633182114, "grad_norm": 0.0550561286509037, "learning_rate": 7.946537059538275e-05, "loss": 0.3767, "step": 1962 }, { "epoch": 0.15902462734931952, "grad_norm": 0.058353181928396225, "learning_rate": 7.950587282300527e-05, "loss": 0.3592, "step": 1963 }, { "epoch": 0.15910563836681788, "grad_norm": 0.06605188548564911, "learning_rate": 7.954637505062779e-05, "loss": 0.4754, "step": 1964 }, { "epoch": 0.15918664938431626, "grad_norm": 0.05959831550717354, "learning_rate": 7.958687727825031e-05, "loss": 0.4033, "step": 1965 }, { "epoch": 0.15926766040181464, "grad_norm": 0.0478963702917099, "learning_rate": 7.962737950587283e-05, "loss": 0.3678, "step": 1966 }, { "epoch": 0.15934867141931303, "grad_norm": 0.058596860617399216, "learning_rate": 7.966788173349535e-05, "loss": 0.3687, "step": 1967 }, { "epoch": 0.1594296824368114, "grad_norm": 0.0579354427754879, "learning_rate": 7.970838396111785e-05, "loss": 0.39, "step": 1968 }, { "epoch": 0.1595106934543098, "grad_norm": 0.06575972586870193, "learning_rate": 7.974888618874037e-05, "loss": 0.4387, "step": 1969 }, { "epoch": 0.15959170447180818, "grad_norm": 0.06601779907941818, "learning_rate": 7.978938841636291e-05, "loss": 0.3899, "step": 1970 }, { "epoch": 0.15967271548930653, "grad_norm": 0.08127705752849579, "learning_rate": 7.982989064398543e-05, "loss": 0.4376, "step": 1971 }, { "epoch": 0.15975372650680492, "grad_norm": 0.05637722462415695, "learning_rate": 7.987039287160795e-05, "loss": 0.3444, "step": 1972 }, { "epoch": 0.1598347375243033, "grad_norm": 0.04732414335012436, "learning_rate": 7.991089509923047e-05, "loss": 0.3893, "step": 1973 }, { "epoch": 0.15991574854180168, "grad_norm": 0.056384116411209106, "learning_rate": 7.995139732685299e-05, "loss": 0.3915, "step": 1974 }, { "epoch": 0.15999675955930007, "grad_norm": 0.059659332036972046, "learning_rate": 7.99918995544755e-05, "loss": 0.4129, "step": 1975 }, { "epoch": 0.16007777057679845, "grad_norm": 0.0602417029440403, "learning_rate": 8.003240178209802e-05, "loss": 0.3395, "step": 1976 }, { "epoch": 0.16015878159429683, "grad_norm": 0.04651299864053726, "learning_rate": 8.007290400972054e-05, "loss": 0.367, "step": 1977 }, { "epoch": 0.16023979261179522, "grad_norm": 0.05572040379047394, "learning_rate": 8.011340623734306e-05, "loss": 0.4314, "step": 1978 }, { "epoch": 0.16032080362929357, "grad_norm": 0.06522223353385925, "learning_rate": 8.015390846496557e-05, "loss": 0.3829, "step": 1979 }, { "epoch": 0.16040181464679196, "grad_norm": 0.07616066187620163, "learning_rate": 8.019441069258809e-05, "loss": 0.4696, "step": 1980 }, { "epoch": 0.16048282566429034, "grad_norm": 0.04531807824969292, "learning_rate": 8.023491292021061e-05, "loss": 0.3622, "step": 1981 }, { "epoch": 0.16056383668178872, "grad_norm": 0.05485687404870987, "learning_rate": 8.027541514783313e-05, "loss": 0.4081, "step": 1982 }, { "epoch": 0.1606448476992871, "grad_norm": 0.05889255553483963, "learning_rate": 8.031591737545565e-05, "loss": 0.3924, "step": 1983 }, { "epoch": 0.1607258587167855, "grad_norm": 0.0493583045899868, "learning_rate": 8.035641960307817e-05, "loss": 0.37, "step": 1984 }, { "epoch": 0.16080686973428387, "grad_norm": 0.04861397668719292, "learning_rate": 8.039692183070069e-05, "loss": 0.3602, "step": 1985 }, { "epoch": 0.16088788075178223, "grad_norm": 0.04801037162542343, "learning_rate": 8.043742405832321e-05, "loss": 0.3762, "step": 1986 }, { "epoch": 0.16096889176928061, "grad_norm": 0.055991508066654205, "learning_rate": 8.047792628594573e-05, "loss": 0.4076, "step": 1987 }, { "epoch": 0.161049902786779, "grad_norm": 0.06045025214552879, "learning_rate": 8.051842851356825e-05, "loss": 0.4172, "step": 1988 }, { "epoch": 0.16113091380427738, "grad_norm": 0.04982104152441025, "learning_rate": 8.055893074119078e-05, "loss": 0.3778, "step": 1989 }, { "epoch": 0.16121192482177576, "grad_norm": 0.0629178062081337, "learning_rate": 8.059943296881329e-05, "loss": 0.4051, "step": 1990 }, { "epoch": 0.16129293583927415, "grad_norm": 0.05481969192624092, "learning_rate": 8.06399351964358e-05, "loss": 0.3788, "step": 1991 }, { "epoch": 0.16137394685677253, "grad_norm": 0.07303039729595184, "learning_rate": 8.068043742405833e-05, "loss": 0.3684, "step": 1992 }, { "epoch": 0.1614549578742709, "grad_norm": 0.0633680671453476, "learning_rate": 8.072093965168085e-05, "loss": 0.356, "step": 1993 }, { "epoch": 0.16153596889176927, "grad_norm": 0.04789692908525467, "learning_rate": 8.076144187930337e-05, "loss": 0.4002, "step": 1994 }, { "epoch": 0.16161697990926766, "grad_norm": 0.05857124179601669, "learning_rate": 8.080194410692589e-05, "loss": 0.3854, "step": 1995 }, { "epoch": 0.16169799092676604, "grad_norm": 0.04986008629202843, "learning_rate": 8.08424463345484e-05, "loss": 0.3633, "step": 1996 }, { "epoch": 0.16177900194426442, "grad_norm": 0.055833835154771805, "learning_rate": 8.088294856217092e-05, "loss": 0.3494, "step": 1997 }, { "epoch": 0.1618600129617628, "grad_norm": 0.057907700538635254, "learning_rate": 8.092345078979344e-05, "loss": 0.4351, "step": 1998 }, { "epoch": 0.1619410239792612, "grad_norm": 0.06546240299940109, "learning_rate": 8.096395301741596e-05, "loss": 0.4461, "step": 1999 }, { "epoch": 0.16202203499675957, "grad_norm": 0.052776411175727844, "learning_rate": 8.100445524503848e-05, "loss": 0.3789, "step": 2000 }, { "epoch": 0.16210304601425793, "grad_norm": 0.05560920014977455, "learning_rate": 8.1044957472661e-05, "loss": 0.4568, "step": 2001 }, { "epoch": 0.1621840570317563, "grad_norm": 0.06929999589920044, "learning_rate": 8.108545970028352e-05, "loss": 0.3983, "step": 2002 }, { "epoch": 0.1622650680492547, "grad_norm": 0.051275137811899185, "learning_rate": 8.112596192790603e-05, "loss": 0.4339, "step": 2003 }, { "epoch": 0.16234607906675308, "grad_norm": 0.061996664851903915, "learning_rate": 8.116646415552855e-05, "loss": 0.3624, "step": 2004 }, { "epoch": 0.16242709008425146, "grad_norm": 0.06525516510009766, "learning_rate": 8.120696638315107e-05, "loss": 0.3954, "step": 2005 }, { "epoch": 0.16250810110174985, "grad_norm": 0.04934125021100044, "learning_rate": 8.124746861077359e-05, "loss": 0.3958, "step": 2006 }, { "epoch": 0.16258911211924823, "grad_norm": 0.05546298250555992, "learning_rate": 8.128797083839611e-05, "loss": 0.3855, "step": 2007 }, { "epoch": 0.16267012313674659, "grad_norm": 0.057532209903001785, "learning_rate": 8.132847306601864e-05, "loss": 0.3938, "step": 2008 }, { "epoch": 0.16275113415424497, "grad_norm": 0.06330697983503342, "learning_rate": 8.136897529364116e-05, "loss": 0.3873, "step": 2009 }, { "epoch": 0.16283214517174335, "grad_norm": 0.06267868727445602, "learning_rate": 8.140947752126368e-05, "loss": 0.4103, "step": 2010 }, { "epoch": 0.16291315618924174, "grad_norm": 0.06294619292020798, "learning_rate": 8.14499797488862e-05, "loss": 0.4097, "step": 2011 }, { "epoch": 0.16299416720674012, "grad_norm": 0.046896860003471375, "learning_rate": 8.149048197650872e-05, "loss": 0.3275, "step": 2012 }, { "epoch": 0.1630751782242385, "grad_norm": 0.05417289212346077, "learning_rate": 8.153098420413124e-05, "loss": 0.3654, "step": 2013 }, { "epoch": 0.1631561892417369, "grad_norm": 0.06072888895869255, "learning_rate": 8.157148643175375e-05, "loss": 0.3878, "step": 2014 }, { "epoch": 0.16323720025923524, "grad_norm": 0.08595926314592361, "learning_rate": 8.161198865937626e-05, "loss": 0.4025, "step": 2015 }, { "epoch": 0.16331821127673363, "grad_norm": 0.061273351311683655, "learning_rate": 8.165249088699878e-05, "loss": 0.386, "step": 2016 }, { "epoch": 0.163399222294232, "grad_norm": 0.05186104774475098, "learning_rate": 8.16929931146213e-05, "loss": 0.3795, "step": 2017 }, { "epoch": 0.1634802333117304, "grad_norm": 0.053132180124521255, "learning_rate": 8.173349534224382e-05, "loss": 0.387, "step": 2018 }, { "epoch": 0.16356124432922878, "grad_norm": 0.07236981391906738, "learning_rate": 8.177399756986634e-05, "loss": 0.3965, "step": 2019 }, { "epoch": 0.16364225534672716, "grad_norm": 0.0509367398917675, "learning_rate": 8.181449979748886e-05, "loss": 0.3846, "step": 2020 }, { "epoch": 0.16372326636422554, "grad_norm": 0.07419263571500778, "learning_rate": 8.185500202511138e-05, "loss": 0.4159, "step": 2021 }, { "epoch": 0.1638042773817239, "grad_norm": 0.05094772204756737, "learning_rate": 8.18955042527339e-05, "loss": 0.316, "step": 2022 }, { "epoch": 0.16388528839922228, "grad_norm": 0.054001349955797195, "learning_rate": 8.193600648035642e-05, "loss": 0.3751, "step": 2023 }, { "epoch": 0.16396629941672067, "grad_norm": 0.047819335013628006, "learning_rate": 8.197650870797894e-05, "loss": 0.3919, "step": 2024 }, { "epoch": 0.16404731043421905, "grad_norm": 0.056271765381097794, "learning_rate": 8.201701093560146e-05, "loss": 0.4111, "step": 2025 }, { "epoch": 0.16412832145171743, "grad_norm": 0.04949527233839035, "learning_rate": 8.205751316322398e-05, "loss": 0.4071, "step": 2026 }, { "epoch": 0.16420933246921582, "grad_norm": 0.05989866331219673, "learning_rate": 8.20980153908465e-05, "loss": 0.3947, "step": 2027 }, { "epoch": 0.1642903434867142, "grad_norm": 0.0578424446284771, "learning_rate": 8.213851761846902e-05, "loss": 0.4411, "step": 2028 }, { "epoch": 0.16437135450421259, "grad_norm": 0.05793230980634689, "learning_rate": 8.217901984609154e-05, "loss": 0.3714, "step": 2029 }, { "epoch": 0.16445236552171094, "grad_norm": 0.05304427817463875, "learning_rate": 8.221952207371406e-05, "loss": 0.3843, "step": 2030 }, { "epoch": 0.16453337653920933, "grad_norm": 0.050228215754032135, "learning_rate": 8.226002430133658e-05, "loss": 0.4134, "step": 2031 }, { "epoch": 0.1646143875567077, "grad_norm": 0.053763143718242645, "learning_rate": 8.23005265289591e-05, "loss": 0.3539, "step": 2032 }, { "epoch": 0.1646953985742061, "grad_norm": 0.06726308166980743, "learning_rate": 8.234102875658162e-05, "loss": 0.4218, "step": 2033 }, { "epoch": 0.16477640959170448, "grad_norm": 0.06924933940172195, "learning_rate": 8.238153098420414e-05, "loss": 0.3621, "step": 2034 }, { "epoch": 0.16485742060920286, "grad_norm": 0.057229988276958466, "learning_rate": 8.242203321182666e-05, "loss": 0.3866, "step": 2035 }, { "epoch": 0.16493843162670124, "grad_norm": 0.09982109814882278, "learning_rate": 8.246253543944918e-05, "loss": 0.4047, "step": 2036 }, { "epoch": 0.1650194426441996, "grad_norm": 0.07248137146234512, "learning_rate": 8.25030376670717e-05, "loss": 0.458, "step": 2037 }, { "epoch": 0.16510045366169798, "grad_norm": 0.05413713678717613, "learning_rate": 8.254353989469422e-05, "loss": 0.3775, "step": 2038 }, { "epoch": 0.16518146467919637, "grad_norm": 0.058229487389326096, "learning_rate": 8.258404212231672e-05, "loss": 0.371, "step": 2039 }, { "epoch": 0.16526247569669475, "grad_norm": 0.06540478765964508, "learning_rate": 8.262454434993924e-05, "loss": 0.4214, "step": 2040 }, { "epoch": 0.16534348671419313, "grad_norm": 0.050467513501644135, "learning_rate": 8.266504657756176e-05, "loss": 0.3416, "step": 2041 }, { "epoch": 0.16542449773169152, "grad_norm": 0.11017223447561264, "learning_rate": 8.270554880518428e-05, "loss": 0.3417, "step": 2042 }, { "epoch": 0.1655055087491899, "grad_norm": 0.061050888150930405, "learning_rate": 8.27460510328068e-05, "loss": 0.3726, "step": 2043 }, { "epoch": 0.16558651976668826, "grad_norm": 0.058097269386053085, "learning_rate": 8.278655326042932e-05, "loss": 0.4059, "step": 2044 }, { "epoch": 0.16566753078418664, "grad_norm": 0.0401441790163517, "learning_rate": 8.282705548805186e-05, "loss": 0.3885, "step": 2045 }, { "epoch": 0.16574854180168502, "grad_norm": 0.05544157698750496, "learning_rate": 8.286755771567437e-05, "loss": 0.3818, "step": 2046 }, { "epoch": 0.1658295528191834, "grad_norm": 0.06059475243091583, "learning_rate": 8.29080599432969e-05, "loss": 0.3719, "step": 2047 }, { "epoch": 0.1659105638366818, "grad_norm": 0.05821231007575989, "learning_rate": 8.294856217091941e-05, "loss": 0.3742, "step": 2048 }, { "epoch": 0.16599157485418017, "grad_norm": 0.0714845061302185, "learning_rate": 8.298906439854193e-05, "loss": 0.467, "step": 2049 }, { "epoch": 0.16607258587167856, "grad_norm": 0.06715501844882965, "learning_rate": 8.302956662616444e-05, "loss": 0.4076, "step": 2050 }, { "epoch": 0.16615359688917694, "grad_norm": 0.07473434507846832, "learning_rate": 8.307006885378696e-05, "loss": 0.4136, "step": 2051 }, { "epoch": 0.1662346079066753, "grad_norm": 0.0510735958814621, "learning_rate": 8.311057108140948e-05, "loss": 0.3626, "step": 2052 }, { "epoch": 0.16631561892417368, "grad_norm": 0.046676717698574066, "learning_rate": 8.3151073309032e-05, "loss": 0.4257, "step": 2053 }, { "epoch": 0.16639662994167206, "grad_norm": 0.04868389293551445, "learning_rate": 8.319157553665452e-05, "loss": 0.3741, "step": 2054 }, { "epoch": 0.16647764095917045, "grad_norm": 0.051401641219854355, "learning_rate": 8.323207776427704e-05, "loss": 0.3741, "step": 2055 }, { "epoch": 0.16655865197666883, "grad_norm": 0.04849712923169136, "learning_rate": 8.327257999189956e-05, "loss": 0.4219, "step": 2056 }, { "epoch": 0.16663966299416721, "grad_norm": 0.04904567822813988, "learning_rate": 8.331308221952208e-05, "loss": 0.3797, "step": 2057 }, { "epoch": 0.1667206740116656, "grad_norm": 0.06897692382335663, "learning_rate": 8.33535844471446e-05, "loss": 0.3907, "step": 2058 }, { "epoch": 0.16680168502916395, "grad_norm": 0.057387739419937134, "learning_rate": 8.339408667476712e-05, "loss": 0.364, "step": 2059 }, { "epoch": 0.16688269604666234, "grad_norm": 0.056419409811496735, "learning_rate": 8.343458890238964e-05, "loss": 0.3966, "step": 2060 }, { "epoch": 0.16696370706416072, "grad_norm": 0.05423840880393982, "learning_rate": 8.347509113001216e-05, "loss": 0.378, "step": 2061 }, { "epoch": 0.1670447180816591, "grad_norm": 0.05476713925600052, "learning_rate": 8.351559335763468e-05, "loss": 0.3787, "step": 2062 }, { "epoch": 0.1671257290991575, "grad_norm": 0.0587175115942955, "learning_rate": 8.355609558525718e-05, "loss": 0.3939, "step": 2063 }, { "epoch": 0.16720674011665587, "grad_norm": 0.059848107397556305, "learning_rate": 8.359659781287972e-05, "loss": 0.386, "step": 2064 }, { "epoch": 0.16728775113415426, "grad_norm": 0.05978243052959442, "learning_rate": 8.363710004050223e-05, "loss": 0.3865, "step": 2065 }, { "epoch": 0.1673687621516526, "grad_norm": 0.052778005599975586, "learning_rate": 8.367760226812475e-05, "loss": 0.379, "step": 2066 }, { "epoch": 0.167449773169151, "grad_norm": 0.05640120059251785, "learning_rate": 8.371810449574727e-05, "loss": 0.4059, "step": 2067 }, { "epoch": 0.16753078418664938, "grad_norm": 0.06634902209043503, "learning_rate": 8.37586067233698e-05, "loss": 0.3989, "step": 2068 }, { "epoch": 0.16761179520414776, "grad_norm": 0.06636673957109451, "learning_rate": 8.379910895099231e-05, "loss": 0.4047, "step": 2069 }, { "epoch": 0.16769280622164615, "grad_norm": 0.060244470834732056, "learning_rate": 8.383961117861483e-05, "loss": 0.3811, "step": 2070 }, { "epoch": 0.16777381723914453, "grad_norm": 0.05000202730298042, "learning_rate": 8.388011340623735e-05, "loss": 0.4119, "step": 2071 }, { "epoch": 0.1678548282566429, "grad_norm": 0.05411381646990776, "learning_rate": 8.392061563385987e-05, "loss": 0.3634, "step": 2072 }, { "epoch": 0.1679358392741413, "grad_norm": 0.046290017664432526, "learning_rate": 8.396111786148239e-05, "loss": 0.4702, "step": 2073 }, { "epoch": 0.16801685029163965, "grad_norm": 0.05964218080043793, "learning_rate": 8.40016200891049e-05, "loss": 0.4264, "step": 2074 }, { "epoch": 0.16809786130913804, "grad_norm": 0.06505851447582245, "learning_rate": 8.404212231672742e-05, "loss": 0.388, "step": 2075 }, { "epoch": 0.16817887232663642, "grad_norm": 0.07671922445297241, "learning_rate": 8.408262454434994e-05, "loss": 0.4455, "step": 2076 }, { "epoch": 0.1682598833441348, "grad_norm": 0.0595112070441246, "learning_rate": 8.412312677197246e-05, "loss": 0.3761, "step": 2077 }, { "epoch": 0.1683408943616332, "grad_norm": 0.055115941911935806, "learning_rate": 8.416362899959498e-05, "loss": 0.3811, "step": 2078 }, { "epoch": 0.16842190537913157, "grad_norm": 0.04717046394944191, "learning_rate": 8.42041312272175e-05, "loss": 0.353, "step": 2079 }, { "epoch": 0.16850291639662995, "grad_norm": 0.0552898645401001, "learning_rate": 8.424463345484002e-05, "loss": 0.3661, "step": 2080 }, { "epoch": 0.1685839274141283, "grad_norm": 0.06371273100376129, "learning_rate": 8.428513568246254e-05, "loss": 0.4038, "step": 2081 }, { "epoch": 0.1686649384316267, "grad_norm": 0.06050824373960495, "learning_rate": 8.432563791008506e-05, "loss": 0.3478, "step": 2082 }, { "epoch": 0.16874594944912508, "grad_norm": 0.05126861855387688, "learning_rate": 8.436614013770759e-05, "loss": 0.4055, "step": 2083 }, { "epoch": 0.16882696046662346, "grad_norm": 0.05001140385866165, "learning_rate": 8.440664236533011e-05, "loss": 0.4168, "step": 2084 }, { "epoch": 0.16890797148412184, "grad_norm": 0.07154504209756851, "learning_rate": 8.444714459295261e-05, "loss": 0.3987, "step": 2085 }, { "epoch": 0.16898898250162023, "grad_norm": 0.06982825696468353, "learning_rate": 8.448764682057513e-05, "loss": 0.4167, "step": 2086 }, { "epoch": 0.1690699935191186, "grad_norm": 0.051283951848745346, "learning_rate": 8.452814904819765e-05, "loss": 0.3803, "step": 2087 }, { "epoch": 0.16915100453661697, "grad_norm": 0.054312046617269516, "learning_rate": 8.456865127582017e-05, "loss": 0.408, "step": 2088 }, { "epoch": 0.16923201555411535, "grad_norm": 0.061378683894872665, "learning_rate": 8.46091535034427e-05, "loss": 0.4033, "step": 2089 }, { "epoch": 0.16931302657161373, "grad_norm": 0.0683789774775505, "learning_rate": 8.464965573106521e-05, "loss": 0.4117, "step": 2090 }, { "epoch": 0.16939403758911212, "grad_norm": 0.06293641030788422, "learning_rate": 8.469015795868773e-05, "loss": 0.3856, "step": 2091 }, { "epoch": 0.1694750486066105, "grad_norm": 0.060117315500974655, "learning_rate": 8.473066018631025e-05, "loss": 0.453, "step": 2092 }, { "epoch": 0.16955605962410888, "grad_norm": 0.05961222946643829, "learning_rate": 8.477116241393277e-05, "loss": 0.4213, "step": 2093 }, { "epoch": 0.16963707064160727, "grad_norm": 0.05129764601588249, "learning_rate": 8.481166464155529e-05, "loss": 0.3548, "step": 2094 }, { "epoch": 0.16971808165910565, "grad_norm": 0.06676856428384781, "learning_rate": 8.485216686917781e-05, "loss": 0.425, "step": 2095 }, { "epoch": 0.169799092676604, "grad_norm": 0.05692289024591446, "learning_rate": 8.489266909680033e-05, "loss": 0.3769, "step": 2096 }, { "epoch": 0.1698801036941024, "grad_norm": 0.056856393814086914, "learning_rate": 8.493317132442285e-05, "loss": 0.3897, "step": 2097 }, { "epoch": 0.16996111471160077, "grad_norm": 0.05395682156085968, "learning_rate": 8.497367355204536e-05, "loss": 0.3646, "step": 2098 }, { "epoch": 0.17004212572909916, "grad_norm": 0.051694948226213455, "learning_rate": 8.501417577966788e-05, "loss": 0.3813, "step": 2099 }, { "epoch": 0.17012313674659754, "grad_norm": 0.05395714193582535, "learning_rate": 8.50546780072904e-05, "loss": 0.38, "step": 2100 }, { "epoch": 0.17020414776409593, "grad_norm": 0.061914216727018356, "learning_rate": 8.509518023491292e-05, "loss": 0.3776, "step": 2101 }, { "epoch": 0.1702851587815943, "grad_norm": 0.05741438642144203, "learning_rate": 8.513568246253545e-05, "loss": 0.3861, "step": 2102 }, { "epoch": 0.17036616979909266, "grad_norm": 0.0539533868432045, "learning_rate": 8.517618469015797e-05, "loss": 0.3972, "step": 2103 }, { "epoch": 0.17044718081659105, "grad_norm": 0.06513522565364838, "learning_rate": 8.521668691778049e-05, "loss": 0.3707, "step": 2104 }, { "epoch": 0.17052819183408943, "grad_norm": 0.04477247595787048, "learning_rate": 8.525718914540301e-05, "loss": 0.3624, "step": 2105 }, { "epoch": 0.17060920285158782, "grad_norm": 0.058546360582113266, "learning_rate": 8.529769137302553e-05, "loss": 0.3551, "step": 2106 }, { "epoch": 0.1706902138690862, "grad_norm": 0.08391832560300827, "learning_rate": 8.533819360064805e-05, "loss": 0.4142, "step": 2107 }, { "epoch": 0.17077122488658458, "grad_norm": 0.0546516515314579, "learning_rate": 8.537869582827057e-05, "loss": 0.3747, "step": 2108 }, { "epoch": 0.17085223590408297, "grad_norm": 0.06025971099734306, "learning_rate": 8.541919805589307e-05, "loss": 0.4133, "step": 2109 }, { "epoch": 0.17093324692158132, "grad_norm": 0.05930865556001663, "learning_rate": 8.545970028351559e-05, "loss": 0.3902, "step": 2110 }, { "epoch": 0.1710142579390797, "grad_norm": 0.047234680503606796, "learning_rate": 8.550020251113811e-05, "loss": 0.3234, "step": 2111 }, { "epoch": 0.1710952689565781, "grad_norm": 0.05951959267258644, "learning_rate": 8.554070473876063e-05, "loss": 0.3898, "step": 2112 }, { "epoch": 0.17117627997407647, "grad_norm": 0.04036698862910271, "learning_rate": 8.558120696638315e-05, "loss": 0.3518, "step": 2113 }, { "epoch": 0.17125729099157486, "grad_norm": 0.05052487924695015, "learning_rate": 8.562170919400567e-05, "loss": 0.4164, "step": 2114 }, { "epoch": 0.17133830200907324, "grad_norm": 0.06837450712919235, "learning_rate": 8.566221142162819e-05, "loss": 0.4064, "step": 2115 }, { "epoch": 0.17141931302657162, "grad_norm": 0.05384335294365883, "learning_rate": 8.570271364925071e-05, "loss": 0.3549, "step": 2116 }, { "epoch": 0.17150032404407, "grad_norm": 0.06057494506239891, "learning_rate": 8.574321587687323e-05, "loss": 0.4043, "step": 2117 }, { "epoch": 0.17158133506156836, "grad_norm": 0.06762439012527466, "learning_rate": 8.578371810449575e-05, "loss": 0.3724, "step": 2118 }, { "epoch": 0.17166234607906675, "grad_norm": 0.05397611856460571, "learning_rate": 8.582422033211827e-05, "loss": 0.4143, "step": 2119 }, { "epoch": 0.17174335709656513, "grad_norm": 0.051856379956007004, "learning_rate": 8.586472255974079e-05, "loss": 0.4403, "step": 2120 }, { "epoch": 0.1718243681140635, "grad_norm": 0.052403032779693604, "learning_rate": 8.590522478736331e-05, "loss": 0.3789, "step": 2121 }, { "epoch": 0.1719053791315619, "grad_norm": 0.07184526324272156, "learning_rate": 8.594572701498583e-05, "loss": 0.4007, "step": 2122 }, { "epoch": 0.17198639014906028, "grad_norm": 0.05395899713039398, "learning_rate": 8.598622924260835e-05, "loss": 0.4117, "step": 2123 }, { "epoch": 0.17206740116655866, "grad_norm": 0.05061892420053482, "learning_rate": 8.602673147023087e-05, "loss": 0.3984, "step": 2124 }, { "epoch": 0.17214841218405702, "grad_norm": 0.05283121392130852, "learning_rate": 8.606723369785339e-05, "loss": 0.4138, "step": 2125 }, { "epoch": 0.1722294232015554, "grad_norm": 0.07489524781703949, "learning_rate": 8.610773592547591e-05, "loss": 0.4387, "step": 2126 }, { "epoch": 0.1723104342190538, "grad_norm": 0.05126599967479706, "learning_rate": 8.614823815309843e-05, "loss": 0.363, "step": 2127 }, { "epoch": 0.17239144523655217, "grad_norm": 0.04971858859062195, "learning_rate": 8.618874038072095e-05, "loss": 0.4093, "step": 2128 }, { "epoch": 0.17247245625405055, "grad_norm": 0.04760098457336426, "learning_rate": 8.622924260834347e-05, "loss": 0.4135, "step": 2129 }, { "epoch": 0.17255346727154894, "grad_norm": 0.05287618190050125, "learning_rate": 8.626974483596599e-05, "loss": 0.4061, "step": 2130 }, { "epoch": 0.17263447828904732, "grad_norm": 0.06464332342147827, "learning_rate": 8.63102470635885e-05, "loss": 0.3367, "step": 2131 }, { "epoch": 0.17271548930654568, "grad_norm": 0.070146843791008, "learning_rate": 8.635074929121103e-05, "loss": 0.3794, "step": 2132 }, { "epoch": 0.17279650032404406, "grad_norm": 0.06114206090569496, "learning_rate": 8.639125151883353e-05, "loss": 0.415, "step": 2133 }, { "epoch": 0.17287751134154244, "grad_norm": 0.061411190778017044, "learning_rate": 8.643175374645605e-05, "loss": 0.3569, "step": 2134 }, { "epoch": 0.17295852235904083, "grad_norm": 0.062324997037649155, "learning_rate": 8.647225597407857e-05, "loss": 0.4484, "step": 2135 }, { "epoch": 0.1730395333765392, "grad_norm": 0.04799463227391243, "learning_rate": 8.651275820170109e-05, "loss": 0.3216, "step": 2136 }, { "epoch": 0.1731205443940376, "grad_norm": 0.05411509796977043, "learning_rate": 8.655326042932361e-05, "loss": 0.3415, "step": 2137 }, { "epoch": 0.17320155541153598, "grad_norm": 0.05922120809555054, "learning_rate": 8.659376265694613e-05, "loss": 0.4232, "step": 2138 }, { "epoch": 0.17328256642903433, "grad_norm": 0.052514318376779556, "learning_rate": 8.663426488456865e-05, "loss": 0.406, "step": 2139 }, { "epoch": 0.17336357744653272, "grad_norm": 0.06161622330546379, "learning_rate": 8.667476711219118e-05, "loss": 0.3945, "step": 2140 }, { "epoch": 0.1734445884640311, "grad_norm": 0.05787722021341324, "learning_rate": 8.67152693398137e-05, "loss": 0.4028, "step": 2141 }, { "epoch": 0.17352559948152949, "grad_norm": 0.05399668589234352, "learning_rate": 8.675577156743622e-05, "loss": 0.3698, "step": 2142 }, { "epoch": 0.17360661049902787, "grad_norm": 0.0524609200656414, "learning_rate": 8.679627379505874e-05, "loss": 0.328, "step": 2143 }, { "epoch": 0.17368762151652625, "grad_norm": 0.04540975019335747, "learning_rate": 8.683677602268125e-05, "loss": 0.3855, "step": 2144 }, { "epoch": 0.17376863253402464, "grad_norm": 0.049933046102523804, "learning_rate": 8.687727825030377e-05, "loss": 0.3817, "step": 2145 }, { "epoch": 0.17384964355152302, "grad_norm": 0.06545059382915497, "learning_rate": 8.691778047792629e-05, "loss": 0.383, "step": 2146 }, { "epoch": 0.17393065456902138, "grad_norm": 0.05309139937162399, "learning_rate": 8.695828270554881e-05, "loss": 0.351, "step": 2147 }, { "epoch": 0.17401166558651976, "grad_norm": 0.06722250580787659, "learning_rate": 8.699878493317133e-05, "loss": 0.4386, "step": 2148 }, { "epoch": 0.17409267660401814, "grad_norm": 0.06491897255182266, "learning_rate": 8.703928716079385e-05, "loss": 0.417, "step": 2149 }, { "epoch": 0.17417368762151653, "grad_norm": 0.04083773121237755, "learning_rate": 8.707978938841637e-05, "loss": 0.3465, "step": 2150 }, { "epoch": 0.1742546986390149, "grad_norm": 0.06629964709281921, "learning_rate": 8.712029161603889e-05, "loss": 0.3801, "step": 2151 }, { "epoch": 0.1743357096565133, "grad_norm": 0.045432791113853455, "learning_rate": 8.71607938436614e-05, "loss": 0.4369, "step": 2152 }, { "epoch": 0.17441672067401168, "grad_norm": 0.045189496129751205, "learning_rate": 8.720129607128393e-05, "loss": 0.3773, "step": 2153 }, { "epoch": 0.17449773169151003, "grad_norm": 0.04340081661939621, "learning_rate": 8.724179829890644e-05, "loss": 0.3656, "step": 2154 }, { "epoch": 0.17457874270900842, "grad_norm": 0.05498838797211647, "learning_rate": 8.728230052652896e-05, "loss": 0.3915, "step": 2155 }, { "epoch": 0.1746597537265068, "grad_norm": 0.0468340627849102, "learning_rate": 8.732280275415148e-05, "loss": 0.3468, "step": 2156 }, { "epoch": 0.17474076474400518, "grad_norm": 0.05144199728965759, "learning_rate": 8.736330498177399e-05, "loss": 0.3562, "step": 2157 }, { "epoch": 0.17482177576150357, "grad_norm": 0.05355091020464897, "learning_rate": 8.740380720939651e-05, "loss": 0.4315, "step": 2158 }, { "epoch": 0.17490278677900195, "grad_norm": 0.04960298538208008, "learning_rate": 8.744430943701904e-05, "loss": 0.3555, "step": 2159 }, { "epoch": 0.17498379779650033, "grad_norm": 0.05432707816362381, "learning_rate": 8.748481166464156e-05, "loss": 0.3638, "step": 2160 }, { "epoch": 0.1750648088139987, "grad_norm": 0.047047730535268784, "learning_rate": 8.752531389226408e-05, "loss": 0.3595, "step": 2161 }, { "epoch": 0.17514581983149707, "grad_norm": 0.04662622511386871, "learning_rate": 8.75658161198866e-05, "loss": 0.3562, "step": 2162 }, { "epoch": 0.17522683084899546, "grad_norm": 0.05988886579871178, "learning_rate": 8.760631834750912e-05, "loss": 0.3803, "step": 2163 }, { "epoch": 0.17530784186649384, "grad_norm": 0.06446512788534164, "learning_rate": 8.764682057513164e-05, "loss": 0.3907, "step": 2164 }, { "epoch": 0.17538885288399222, "grad_norm": 0.05944246053695679, "learning_rate": 8.768732280275416e-05, "loss": 0.3786, "step": 2165 }, { "epoch": 0.1754698639014906, "grad_norm": 0.06325705349445343, "learning_rate": 8.772782503037668e-05, "loss": 0.3958, "step": 2166 }, { "epoch": 0.175550874918989, "grad_norm": 0.05642695724964142, "learning_rate": 8.77683272579992e-05, "loss": 0.3966, "step": 2167 }, { "epoch": 0.17563188593648738, "grad_norm": 0.06587480753660202, "learning_rate": 8.78088294856217e-05, "loss": 0.3944, "step": 2168 }, { "epoch": 0.17571289695398573, "grad_norm": 0.07901154458522797, "learning_rate": 8.784933171324423e-05, "loss": 0.3428, "step": 2169 }, { "epoch": 0.17579390797148411, "grad_norm": 0.05240624397993088, "learning_rate": 8.788983394086675e-05, "loss": 0.3707, "step": 2170 }, { "epoch": 0.1758749189889825, "grad_norm": 0.05577824264764786, "learning_rate": 8.793033616848927e-05, "loss": 0.4076, "step": 2171 }, { "epoch": 0.17595593000648088, "grad_norm": 0.05159473046660423, "learning_rate": 8.797083839611179e-05, "loss": 0.3711, "step": 2172 }, { "epoch": 0.17603694102397927, "grad_norm": 0.06700492650270462, "learning_rate": 8.80113406237343e-05, "loss": 0.4164, "step": 2173 }, { "epoch": 0.17611795204147765, "grad_norm": 0.05094028636813164, "learning_rate": 8.805184285135682e-05, "loss": 0.4056, "step": 2174 }, { "epoch": 0.17619896305897603, "grad_norm": 0.04142050817608833, "learning_rate": 8.809234507897934e-05, "loss": 0.3825, "step": 2175 }, { "epoch": 0.1762799740764744, "grad_norm": 0.05937556177377701, "learning_rate": 8.813284730660186e-05, "loss": 0.3708, "step": 2176 }, { "epoch": 0.17636098509397277, "grad_norm": 0.06152129918336868, "learning_rate": 8.81733495342244e-05, "loss": 0.4548, "step": 2177 }, { "epoch": 0.17644199611147116, "grad_norm": 0.057044580578804016, "learning_rate": 8.821385176184692e-05, "loss": 0.424, "step": 2178 }, { "epoch": 0.17652300712896954, "grad_norm": 0.04587271809577942, "learning_rate": 8.825435398946942e-05, "loss": 0.4239, "step": 2179 }, { "epoch": 0.17660401814646792, "grad_norm": 0.05933641269803047, "learning_rate": 8.829485621709194e-05, "loss": 0.4028, "step": 2180 }, { "epoch": 0.1766850291639663, "grad_norm": 0.051318321377038956, "learning_rate": 8.833535844471446e-05, "loss": 0.3685, "step": 2181 }, { "epoch": 0.1767660401814647, "grad_norm": 0.06208323687314987, "learning_rate": 8.837586067233698e-05, "loss": 0.3273, "step": 2182 }, { "epoch": 0.17684705119896305, "grad_norm": 0.06081528961658478, "learning_rate": 8.84163628999595e-05, "loss": 0.4095, "step": 2183 }, { "epoch": 0.17692806221646143, "grad_norm": 0.04740947484970093, "learning_rate": 8.845686512758202e-05, "loss": 0.3657, "step": 2184 }, { "epoch": 0.1770090732339598, "grad_norm": 0.04662710428237915, "learning_rate": 8.849736735520454e-05, "loss": 0.3934, "step": 2185 }, { "epoch": 0.1770900842514582, "grad_norm": 0.05747944489121437, "learning_rate": 8.853786958282706e-05, "loss": 0.4094, "step": 2186 }, { "epoch": 0.17717109526895658, "grad_norm": 0.04955825209617615, "learning_rate": 8.857837181044958e-05, "loss": 0.3559, "step": 2187 }, { "epoch": 0.17725210628645496, "grad_norm": 0.05781328305602074, "learning_rate": 8.86188740380721e-05, "loss": 0.3837, "step": 2188 }, { "epoch": 0.17733311730395335, "grad_norm": 0.0525483638048172, "learning_rate": 8.865937626569462e-05, "loss": 0.3811, "step": 2189 }, { "epoch": 0.17741412832145173, "grad_norm": 0.05579186975955963, "learning_rate": 8.869987849331714e-05, "loss": 0.3574, "step": 2190 }, { "epoch": 0.1774951393389501, "grad_norm": 0.04814240708947182, "learning_rate": 8.874038072093966e-05, "loss": 0.4373, "step": 2191 }, { "epoch": 0.17757615035644847, "grad_norm": 0.04550255835056305, "learning_rate": 8.878088294856218e-05, "loss": 0.4166, "step": 2192 }, { "epoch": 0.17765716137394685, "grad_norm": 0.056230008602142334, "learning_rate": 8.882138517618468e-05, "loss": 0.4454, "step": 2193 }, { "epoch": 0.17773817239144524, "grad_norm": 0.04420791566371918, "learning_rate": 8.88618874038072e-05, "loss": 0.3212, "step": 2194 }, { "epoch": 0.17781918340894362, "grad_norm": 0.06241984665393829, "learning_rate": 8.890238963142972e-05, "loss": 0.3696, "step": 2195 }, { "epoch": 0.177900194426442, "grad_norm": 0.050221629440784454, "learning_rate": 8.894289185905226e-05, "loss": 0.4101, "step": 2196 }, { "epoch": 0.1779812054439404, "grad_norm": 0.050246626138687134, "learning_rate": 8.898339408667478e-05, "loss": 0.3766, "step": 2197 }, { "epoch": 0.17806221646143874, "grad_norm": 0.06250311434268951, "learning_rate": 8.90238963142973e-05, "loss": 0.3499, "step": 2198 }, { "epoch": 0.17814322747893713, "grad_norm": 0.06322769820690155, "learning_rate": 8.906439854191982e-05, "loss": 0.3916, "step": 2199 }, { "epoch": 0.1782242384964355, "grad_norm": 0.06526292115449905, "learning_rate": 8.910490076954234e-05, "loss": 0.3825, "step": 2200 }, { "epoch": 0.1783052495139339, "grad_norm": 0.06252694875001907, "learning_rate": 8.914540299716486e-05, "loss": 0.3992, "step": 2201 }, { "epoch": 0.17838626053143228, "grad_norm": 0.055992960929870605, "learning_rate": 8.918590522478738e-05, "loss": 0.3745, "step": 2202 }, { "epoch": 0.17846727154893066, "grad_norm": 0.04313978552818298, "learning_rate": 8.922640745240988e-05, "loss": 0.3145, "step": 2203 }, { "epoch": 0.17854828256642905, "grad_norm": 0.05175900086760521, "learning_rate": 8.92669096800324e-05, "loss": 0.4054, "step": 2204 }, { "epoch": 0.1786292935839274, "grad_norm": 0.05246730521321297, "learning_rate": 8.930741190765492e-05, "loss": 0.3126, "step": 2205 }, { "epoch": 0.17871030460142578, "grad_norm": 0.06548149883747101, "learning_rate": 8.934791413527744e-05, "loss": 0.4067, "step": 2206 }, { "epoch": 0.17879131561892417, "grad_norm": 0.060931019484996796, "learning_rate": 8.938841636289996e-05, "loss": 0.4066, "step": 2207 }, { "epoch": 0.17887232663642255, "grad_norm": 0.05262281745672226, "learning_rate": 8.942891859052248e-05, "loss": 0.4086, "step": 2208 }, { "epoch": 0.17895333765392094, "grad_norm": 0.050549279898405075, "learning_rate": 8.9469420818145e-05, "loss": 0.3758, "step": 2209 }, { "epoch": 0.17903434867141932, "grad_norm": 0.05054941028356552, "learning_rate": 8.950992304576752e-05, "loss": 0.3482, "step": 2210 }, { "epoch": 0.1791153596889177, "grad_norm": 0.05035468190908432, "learning_rate": 8.955042527339004e-05, "loss": 0.4008, "step": 2211 }, { "epoch": 0.17919637070641609, "grad_norm": 0.05484228581190109, "learning_rate": 8.959092750101256e-05, "loss": 0.3867, "step": 2212 }, { "epoch": 0.17927738172391444, "grad_norm": 0.049987874925136566, "learning_rate": 8.963142972863508e-05, "loss": 0.4075, "step": 2213 }, { "epoch": 0.17935839274141283, "grad_norm": 0.05101454257965088, "learning_rate": 8.96719319562576e-05, "loss": 0.367, "step": 2214 }, { "epoch": 0.1794394037589112, "grad_norm": 0.05252314731478691, "learning_rate": 8.971243418388012e-05, "loss": 0.3951, "step": 2215 }, { "epoch": 0.1795204147764096, "grad_norm": 0.05595763400197029, "learning_rate": 8.975293641150264e-05, "loss": 0.4107, "step": 2216 }, { "epoch": 0.17960142579390798, "grad_norm": 0.051620274782180786, "learning_rate": 8.979343863912516e-05, "loss": 0.3786, "step": 2217 }, { "epoch": 0.17968243681140636, "grad_norm": 0.04558439925312996, "learning_rate": 8.983394086674768e-05, "loss": 0.3614, "step": 2218 }, { "epoch": 0.17976344782890474, "grad_norm": 0.05854353681206703, "learning_rate": 8.98744430943702e-05, "loss": 0.3866, "step": 2219 }, { "epoch": 0.1798444588464031, "grad_norm": 0.06507213413715363, "learning_rate": 8.991494532199272e-05, "loss": 0.337, "step": 2220 }, { "epoch": 0.17992546986390148, "grad_norm": 0.054253242909908295, "learning_rate": 8.995544754961524e-05, "loss": 0.378, "step": 2221 }, { "epoch": 0.18000648088139987, "grad_norm": 0.05274822190403938, "learning_rate": 8.999594977723776e-05, "loss": 0.4095, "step": 2222 }, { "epoch": 0.18008749189889825, "grad_norm": 0.04596314951777458, "learning_rate": 9.003645200486027e-05, "loss": 0.3689, "step": 2223 }, { "epoch": 0.18016850291639663, "grad_norm": 0.04373635724186897, "learning_rate": 9.00769542324828e-05, "loss": 0.3374, "step": 2224 }, { "epoch": 0.18024951393389502, "grad_norm": 0.0654408186674118, "learning_rate": 9.011745646010531e-05, "loss": 0.3679, "step": 2225 }, { "epoch": 0.1803305249513934, "grad_norm": 0.04936238005757332, "learning_rate": 9.015795868772783e-05, "loss": 0.4143, "step": 2226 }, { "epoch": 0.18041153596889176, "grad_norm": 0.05110304057598114, "learning_rate": 9.019846091535035e-05, "loss": 0.3529, "step": 2227 }, { "epoch": 0.18049254698639014, "grad_norm": 0.05016132444143295, "learning_rate": 9.023896314297286e-05, "loss": 0.3847, "step": 2228 }, { "epoch": 0.18057355800388852, "grad_norm": 0.04777160659432411, "learning_rate": 9.027946537059538e-05, "loss": 0.3529, "step": 2229 }, { "epoch": 0.1806545690213869, "grad_norm": 0.06519763171672821, "learning_rate": 9.03199675982179e-05, "loss": 0.3865, "step": 2230 }, { "epoch": 0.1807355800388853, "grad_norm": 0.06276557594537735, "learning_rate": 9.036046982584042e-05, "loss": 0.4392, "step": 2231 }, { "epoch": 0.18081659105638367, "grad_norm": 0.05355183780193329, "learning_rate": 9.040097205346294e-05, "loss": 0.3734, "step": 2232 }, { "epoch": 0.18089760207388206, "grad_norm": 0.041655007749795914, "learning_rate": 9.044147428108546e-05, "loss": 0.3352, "step": 2233 }, { "epoch": 0.18097861309138044, "grad_norm": 0.054301269352436066, "learning_rate": 9.048197650870799e-05, "loss": 0.3925, "step": 2234 }, { "epoch": 0.1810596241088788, "grad_norm": 0.049007266759872437, "learning_rate": 9.052247873633051e-05, "loss": 0.4126, "step": 2235 }, { "epoch": 0.18114063512637718, "grad_norm": 0.04472389444708824, "learning_rate": 9.056298096395303e-05, "loss": 0.3172, "step": 2236 }, { "epoch": 0.18122164614387556, "grad_norm": 0.04902403801679611, "learning_rate": 9.060348319157555e-05, "loss": 0.3517, "step": 2237 }, { "epoch": 0.18130265716137395, "grad_norm": 0.05876627936959267, "learning_rate": 9.064398541919807e-05, "loss": 0.3968, "step": 2238 }, { "epoch": 0.18138366817887233, "grad_norm": 0.0561637207865715, "learning_rate": 9.068448764682058e-05, "loss": 0.415, "step": 2239 }, { "epoch": 0.18146467919637072, "grad_norm": 0.04728609696030617, "learning_rate": 9.07249898744431e-05, "loss": 0.3665, "step": 2240 }, { "epoch": 0.1815456902138691, "grad_norm": 0.056595578789711, "learning_rate": 9.076549210206562e-05, "loss": 0.4514, "step": 2241 }, { "epoch": 0.18162670123136745, "grad_norm": 0.06155933067202568, "learning_rate": 9.080599432968813e-05, "loss": 0.3642, "step": 2242 }, { "epoch": 0.18170771224886584, "grad_norm": 0.045497674494981766, "learning_rate": 9.084649655731065e-05, "loss": 0.367, "step": 2243 }, { "epoch": 0.18178872326636422, "grad_norm": 0.06236834451556206, "learning_rate": 9.088699878493317e-05, "loss": 0.3618, "step": 2244 }, { "epoch": 0.1818697342838626, "grad_norm": 0.0569576732814312, "learning_rate": 9.09275010125557e-05, "loss": 0.4281, "step": 2245 }, { "epoch": 0.181950745301361, "grad_norm": 0.04748576879501343, "learning_rate": 9.096800324017821e-05, "loss": 0.4056, "step": 2246 }, { "epoch": 0.18203175631885937, "grad_norm": 0.04922928288578987, "learning_rate": 9.100850546780073e-05, "loss": 0.3859, "step": 2247 }, { "epoch": 0.18211276733635776, "grad_norm": 0.05819029361009598, "learning_rate": 9.104900769542325e-05, "loss": 0.4301, "step": 2248 }, { "epoch": 0.1821937783538561, "grad_norm": 0.05144455283880234, "learning_rate": 9.108950992304577e-05, "loss": 0.355, "step": 2249 }, { "epoch": 0.1822747893713545, "grad_norm": 0.056657761335372925, "learning_rate": 9.113001215066829e-05, "loss": 0.3997, "step": 2250 }, { "epoch": 0.18235580038885288, "grad_norm": 0.05661802738904953, "learning_rate": 9.117051437829081e-05, "loss": 0.3802, "step": 2251 }, { "epoch": 0.18243681140635126, "grad_norm": 0.05875290557742119, "learning_rate": 9.121101660591332e-05, "loss": 0.4077, "step": 2252 }, { "epoch": 0.18251782242384965, "grad_norm": 0.05324438959360123, "learning_rate": 9.125151883353585e-05, "loss": 0.3975, "step": 2253 }, { "epoch": 0.18259883344134803, "grad_norm": 0.06660490483045578, "learning_rate": 9.129202106115837e-05, "loss": 0.3673, "step": 2254 }, { "epoch": 0.1826798444588464, "grad_norm": 0.05302351340651512, "learning_rate": 9.133252328878089e-05, "loss": 0.3425, "step": 2255 }, { "epoch": 0.18276085547634477, "grad_norm": 0.042320359498262405, "learning_rate": 9.137302551640341e-05, "loss": 0.3621, "step": 2256 }, { "epoch": 0.18284186649384315, "grad_norm": 0.0677284523844719, "learning_rate": 9.141352774402593e-05, "loss": 0.3821, "step": 2257 }, { "epoch": 0.18292287751134154, "grad_norm": 0.05299816280603409, "learning_rate": 9.145402997164845e-05, "loss": 0.4133, "step": 2258 }, { "epoch": 0.18300388852883992, "grad_norm": 0.057131148874759674, "learning_rate": 9.149453219927097e-05, "loss": 0.3976, "step": 2259 }, { "epoch": 0.1830848995463383, "grad_norm": 0.04714860022068024, "learning_rate": 9.153503442689349e-05, "loss": 0.3569, "step": 2260 }, { "epoch": 0.1831659105638367, "grad_norm": 0.04154291749000549, "learning_rate": 9.157553665451601e-05, "loss": 0.3975, "step": 2261 }, { "epoch": 0.18324692158133507, "grad_norm": 0.06255395710468292, "learning_rate": 9.161603888213853e-05, "loss": 0.4208, "step": 2262 }, { "epoch": 0.18332793259883345, "grad_norm": 0.04732973128557205, "learning_rate": 9.165654110976103e-05, "loss": 0.429, "step": 2263 }, { "epoch": 0.1834089436163318, "grad_norm": 0.0500243715941906, "learning_rate": 9.169704333738355e-05, "loss": 0.3555, "step": 2264 }, { "epoch": 0.1834899546338302, "grad_norm": 0.047191690653562546, "learning_rate": 9.173754556500607e-05, "loss": 0.3791, "step": 2265 }, { "epoch": 0.18357096565132858, "grad_norm": 0.04734690859913826, "learning_rate": 9.17780477926286e-05, "loss": 0.3663, "step": 2266 }, { "epoch": 0.18365197666882696, "grad_norm": 0.0568060502409935, "learning_rate": 9.181855002025111e-05, "loss": 0.3479, "step": 2267 }, { "epoch": 0.18373298768632534, "grad_norm": 0.061953410506248474, "learning_rate": 9.185905224787363e-05, "loss": 0.3681, "step": 2268 }, { "epoch": 0.18381399870382373, "grad_norm": 0.04263650253415108, "learning_rate": 9.189955447549615e-05, "loss": 0.2988, "step": 2269 }, { "epoch": 0.1838950097213221, "grad_norm": 0.04538256675004959, "learning_rate": 9.194005670311867e-05, "loss": 0.3543, "step": 2270 }, { "epoch": 0.18397602073882047, "grad_norm": 0.04849075525999069, "learning_rate": 9.198055893074119e-05, "loss": 0.3667, "step": 2271 }, { "epoch": 0.18405703175631885, "grad_norm": 0.04478795453906059, "learning_rate": 9.202106115836372e-05, "loss": 0.4318, "step": 2272 }, { "epoch": 0.18413804277381723, "grad_norm": 0.04313032701611519, "learning_rate": 9.206156338598624e-05, "loss": 0.3399, "step": 2273 }, { "epoch": 0.18421905379131562, "grad_norm": 0.047500211745500565, "learning_rate": 9.210206561360875e-05, "loss": 0.3399, "step": 2274 }, { "epoch": 0.184300064808814, "grad_norm": 0.05286448076367378, "learning_rate": 9.214256784123127e-05, "loss": 0.3759, "step": 2275 }, { "epoch": 0.18438107582631239, "grad_norm": 0.05197947472333908, "learning_rate": 9.218307006885379e-05, "loss": 0.346, "step": 2276 }, { "epoch": 0.18446208684381077, "grad_norm": 0.06517504900693893, "learning_rate": 9.222357229647631e-05, "loss": 0.4529, "step": 2277 }, { "epoch": 0.18454309786130912, "grad_norm": 0.04489710554480553, "learning_rate": 9.226407452409883e-05, "loss": 0.4124, "step": 2278 }, { "epoch": 0.1846241088788075, "grad_norm": 0.04965034872293472, "learning_rate": 9.230457675172135e-05, "loss": 0.3523, "step": 2279 }, { "epoch": 0.1847051198963059, "grad_norm": 0.053209736943244934, "learning_rate": 9.234507897934387e-05, "loss": 0.3404, "step": 2280 }, { "epoch": 0.18478613091380428, "grad_norm": 0.06462380290031433, "learning_rate": 9.238558120696639e-05, "loss": 0.436, "step": 2281 }, { "epoch": 0.18486714193130266, "grad_norm": 0.04681272432208061, "learning_rate": 9.242608343458891e-05, "loss": 0.3605, "step": 2282 }, { "epoch": 0.18494815294880104, "grad_norm": 0.047515030950307846, "learning_rate": 9.246658566221143e-05, "loss": 0.3561, "step": 2283 }, { "epoch": 0.18502916396629943, "grad_norm": 0.04604284465312958, "learning_rate": 9.250708788983395e-05, "loss": 0.3581, "step": 2284 }, { "epoch": 0.1851101749837978, "grad_norm": 0.05426971614360809, "learning_rate": 9.254759011745647e-05, "loss": 0.3875, "step": 2285 }, { "epoch": 0.18519118600129617, "grad_norm": 0.048005685210227966, "learning_rate": 9.258809234507899e-05, "loss": 0.3615, "step": 2286 }, { "epoch": 0.18527219701879455, "grad_norm": 0.05091318115592003, "learning_rate": 9.262859457270149e-05, "loss": 0.4587, "step": 2287 }, { "epoch": 0.18535320803629293, "grad_norm": 0.0451776497066021, "learning_rate": 9.266909680032401e-05, "loss": 0.3651, "step": 2288 }, { "epoch": 0.18543421905379132, "grad_norm": 0.051855940371751785, "learning_rate": 9.270959902794653e-05, "loss": 0.3906, "step": 2289 }, { "epoch": 0.1855152300712897, "grad_norm": 0.04242299869656563, "learning_rate": 9.275010125556907e-05, "loss": 0.351, "step": 2290 }, { "epoch": 0.18559624108878808, "grad_norm": 0.04861443117260933, "learning_rate": 9.279060348319159e-05, "loss": 0.3527, "step": 2291 }, { "epoch": 0.18567725210628647, "grad_norm": 0.04748028144240379, "learning_rate": 9.28311057108141e-05, "loss": 0.4083, "step": 2292 }, { "epoch": 0.18575826312378482, "grad_norm": 0.0490681454539299, "learning_rate": 9.287160793843662e-05, "loss": 0.4009, "step": 2293 }, { "epoch": 0.1858392741412832, "grad_norm": 0.05255401134490967, "learning_rate": 9.291211016605914e-05, "loss": 0.4057, "step": 2294 }, { "epoch": 0.1859202851587816, "grad_norm": 0.05210600048303604, "learning_rate": 9.295261239368166e-05, "loss": 0.3714, "step": 2295 }, { "epoch": 0.18600129617627997, "grad_norm": 0.0676577240228653, "learning_rate": 9.299311462130418e-05, "loss": 0.3882, "step": 2296 }, { "epoch": 0.18608230719377836, "grad_norm": 0.06080527976155281, "learning_rate": 9.30336168489267e-05, "loss": 0.3907, "step": 2297 }, { "epoch": 0.18616331821127674, "grad_norm": 0.04721786454319954, "learning_rate": 9.307411907654921e-05, "loss": 0.3615, "step": 2298 }, { "epoch": 0.18624432922877512, "grad_norm": 0.04322146624326706, "learning_rate": 9.311462130417173e-05, "loss": 0.3894, "step": 2299 }, { "epoch": 0.18632534024627348, "grad_norm": 0.04705704003572464, "learning_rate": 9.315512353179425e-05, "loss": 0.3722, "step": 2300 }, { "epoch": 0.18640635126377186, "grad_norm": 0.05449768900871277, "learning_rate": 9.319562575941677e-05, "loss": 0.3898, "step": 2301 }, { "epoch": 0.18648736228127025, "grad_norm": 0.05563833937048912, "learning_rate": 9.323612798703929e-05, "loss": 0.4292, "step": 2302 }, { "epoch": 0.18656837329876863, "grad_norm": 0.05745968222618103, "learning_rate": 9.327663021466181e-05, "loss": 0.4307, "step": 2303 }, { "epoch": 0.18664938431626701, "grad_norm": 0.06325607001781464, "learning_rate": 9.331713244228433e-05, "loss": 0.3989, "step": 2304 }, { "epoch": 0.1867303953337654, "grad_norm": 0.04417753964662552, "learning_rate": 9.335763466990685e-05, "loss": 0.4082, "step": 2305 }, { "epoch": 0.18681140635126378, "grad_norm": 0.05200282856822014, "learning_rate": 9.339813689752937e-05, "loss": 0.3822, "step": 2306 }, { "epoch": 0.18689241736876216, "grad_norm": 0.036817897111177444, "learning_rate": 9.343863912515189e-05, "loss": 0.3125, "step": 2307 }, { "epoch": 0.18697342838626052, "grad_norm": 0.053293824195861816, "learning_rate": 9.34791413527744e-05, "loss": 0.3794, "step": 2308 }, { "epoch": 0.1870544394037589, "grad_norm": 0.0653097853064537, "learning_rate": 9.351964358039693e-05, "loss": 0.461, "step": 2309 }, { "epoch": 0.1871354504212573, "grad_norm": 0.050535351037979126, "learning_rate": 9.356014580801945e-05, "loss": 0.3984, "step": 2310 }, { "epoch": 0.18721646143875567, "grad_norm": 0.04054562374949455, "learning_rate": 9.360064803564196e-05, "loss": 0.3642, "step": 2311 }, { "epoch": 0.18729747245625405, "grad_norm": 0.04133705049753189, "learning_rate": 9.364115026326448e-05, "loss": 0.3811, "step": 2312 }, { "epoch": 0.18737848347375244, "grad_norm": 0.046158090233802795, "learning_rate": 9.3681652490887e-05, "loss": 0.3797, "step": 2313 }, { "epoch": 0.18745949449125082, "grad_norm": 0.0477442741394043, "learning_rate": 9.372215471850952e-05, "loss": 0.3778, "step": 2314 }, { "epoch": 0.18754050550874918, "grad_norm": 0.051181282848119736, "learning_rate": 9.376265694613204e-05, "loss": 0.4524, "step": 2315 }, { "epoch": 0.18762151652624756, "grad_norm": 0.047320883721113205, "learning_rate": 9.380315917375456e-05, "loss": 0.3927, "step": 2316 }, { "epoch": 0.18770252754374595, "grad_norm": 0.06091245263814926, "learning_rate": 9.384366140137708e-05, "loss": 0.3817, "step": 2317 }, { "epoch": 0.18778353856124433, "grad_norm": 0.043418072164058685, "learning_rate": 9.38841636289996e-05, "loss": 0.3928, "step": 2318 }, { "epoch": 0.1878645495787427, "grad_norm": 0.05807121470570564, "learning_rate": 9.392466585662212e-05, "loss": 0.3893, "step": 2319 }, { "epoch": 0.1879455605962411, "grad_norm": 0.04889382794499397, "learning_rate": 9.396516808424464e-05, "loss": 0.3647, "step": 2320 }, { "epoch": 0.18802657161373948, "grad_norm": 0.05064339190721512, "learning_rate": 9.400567031186716e-05, "loss": 0.4005, "step": 2321 }, { "epoch": 0.18810758263123784, "grad_norm": 0.049906641244888306, "learning_rate": 9.404617253948967e-05, "loss": 0.3851, "step": 2322 }, { "epoch": 0.18818859364873622, "grad_norm": 0.061403222382068634, "learning_rate": 9.408667476711219e-05, "loss": 0.4124, "step": 2323 }, { "epoch": 0.1882696046662346, "grad_norm": 0.049313317984342575, "learning_rate": 9.412717699473471e-05, "loss": 0.3853, "step": 2324 }, { "epoch": 0.18835061568373299, "grad_norm": 0.04512112960219383, "learning_rate": 9.416767922235723e-05, "loss": 0.3849, "step": 2325 }, { "epoch": 0.18843162670123137, "grad_norm": 0.06029899790883064, "learning_rate": 9.420818144997975e-05, "loss": 0.4307, "step": 2326 }, { "epoch": 0.18851263771872975, "grad_norm": 0.05402587726712227, "learning_rate": 9.424868367760227e-05, "loss": 0.4108, "step": 2327 }, { "epoch": 0.18859364873622814, "grad_norm": 0.05312091484665871, "learning_rate": 9.42891859052248e-05, "loss": 0.3826, "step": 2328 }, { "epoch": 0.18867465975372652, "grad_norm": 0.04519422724843025, "learning_rate": 9.432968813284732e-05, "loss": 0.4146, "step": 2329 }, { "epoch": 0.18875567077122488, "grad_norm": 0.04941098019480705, "learning_rate": 9.437019036046984e-05, "loss": 0.3499, "step": 2330 }, { "epoch": 0.18883668178872326, "grad_norm": 0.06194014847278595, "learning_rate": 9.441069258809236e-05, "loss": 0.4409, "step": 2331 }, { "epoch": 0.18891769280622164, "grad_norm": 0.054493311792612076, "learning_rate": 9.445119481571488e-05, "loss": 0.3915, "step": 2332 }, { "epoch": 0.18899870382372003, "grad_norm": 0.05225975066423416, "learning_rate": 9.449169704333738e-05, "loss": 0.3641, "step": 2333 }, { "epoch": 0.1890797148412184, "grad_norm": 0.05683285742998123, "learning_rate": 9.45321992709599e-05, "loss": 0.3665, "step": 2334 }, { "epoch": 0.1891607258587168, "grad_norm": 0.047711968421936035, "learning_rate": 9.457270149858242e-05, "loss": 0.4671, "step": 2335 }, { "epoch": 0.18924173687621518, "grad_norm": 0.04842852056026459, "learning_rate": 9.461320372620494e-05, "loss": 0.3622, "step": 2336 }, { "epoch": 0.18932274789371353, "grad_norm": 0.04394889995455742, "learning_rate": 9.465370595382746e-05, "loss": 0.3606, "step": 2337 }, { "epoch": 0.18940375891121192, "grad_norm": 0.06385563313961029, "learning_rate": 9.469420818144998e-05, "loss": 0.4298, "step": 2338 }, { "epoch": 0.1894847699287103, "grad_norm": 0.05741212144494057, "learning_rate": 9.47347104090725e-05, "loss": 0.3639, "step": 2339 }, { "epoch": 0.18956578094620868, "grad_norm": 0.044967882335186005, "learning_rate": 9.477521263669502e-05, "loss": 0.3775, "step": 2340 }, { "epoch": 0.18964679196370707, "grad_norm": 0.05367691069841385, "learning_rate": 9.481571486431754e-05, "loss": 0.4057, "step": 2341 }, { "epoch": 0.18972780298120545, "grad_norm": 0.05288249999284744, "learning_rate": 9.485621709194006e-05, "loss": 0.3901, "step": 2342 }, { "epoch": 0.18980881399870383, "grad_norm": 0.04440377652645111, "learning_rate": 9.489671931956258e-05, "loss": 0.3894, "step": 2343 }, { "epoch": 0.1898898250162022, "grad_norm": 0.05041750520467758, "learning_rate": 9.49372215471851e-05, "loss": 0.4323, "step": 2344 }, { "epoch": 0.18997083603370057, "grad_norm": 0.05050059035420418, "learning_rate": 9.497772377480762e-05, "loss": 0.393, "step": 2345 }, { "epoch": 0.19005184705119896, "grad_norm": 0.04902779310941696, "learning_rate": 9.501822600243013e-05, "loss": 0.3747, "step": 2346 }, { "epoch": 0.19013285806869734, "grad_norm": 0.06620379537343979, "learning_rate": 9.505872823005266e-05, "loss": 0.4253, "step": 2347 }, { "epoch": 0.19021386908619572, "grad_norm": 0.05406967177987099, "learning_rate": 9.509923045767518e-05, "loss": 0.3722, "step": 2348 }, { "epoch": 0.1902948801036941, "grad_norm": 0.04907216131687164, "learning_rate": 9.51397326852977e-05, "loss": 0.4089, "step": 2349 }, { "epoch": 0.1903758911211925, "grad_norm": 0.05281197652220726, "learning_rate": 9.518023491292022e-05, "loss": 0.3484, "step": 2350 }, { "epoch": 0.19045690213869088, "grad_norm": 0.06806964427232742, "learning_rate": 9.522073714054274e-05, "loss": 0.4212, "step": 2351 }, { "epoch": 0.19053791315618923, "grad_norm": 0.05582256242632866, "learning_rate": 9.526123936816526e-05, "loss": 0.4001, "step": 2352 }, { "epoch": 0.19061892417368761, "grad_norm": 0.053403329104185104, "learning_rate": 9.530174159578778e-05, "loss": 0.4009, "step": 2353 }, { "epoch": 0.190699935191186, "grad_norm": 0.05009522661566734, "learning_rate": 9.53422438234103e-05, "loss": 0.3614, "step": 2354 }, { "epoch": 0.19078094620868438, "grad_norm": 0.05327250063419342, "learning_rate": 9.538274605103282e-05, "loss": 0.4264, "step": 2355 }, { "epoch": 0.19086195722618277, "grad_norm": 0.054642241448163986, "learning_rate": 9.542324827865534e-05, "loss": 0.3857, "step": 2356 }, { "epoch": 0.19094296824368115, "grad_norm": 0.050950486212968826, "learning_rate": 9.546375050627784e-05, "loss": 0.3573, "step": 2357 }, { "epoch": 0.19102397926117953, "grad_norm": 0.05052249878644943, "learning_rate": 9.550425273390036e-05, "loss": 0.3971, "step": 2358 }, { "epoch": 0.1911049902786779, "grad_norm": 0.04672643914818764, "learning_rate": 9.554475496152288e-05, "loss": 0.3903, "step": 2359 }, { "epoch": 0.19118600129617627, "grad_norm": 0.052174076437950134, "learning_rate": 9.55852571891454e-05, "loss": 0.3908, "step": 2360 }, { "epoch": 0.19126701231367466, "grad_norm": 0.058124348521232605, "learning_rate": 9.562575941676792e-05, "loss": 0.4022, "step": 2361 }, { "epoch": 0.19134802333117304, "grad_norm": 0.05619959160685539, "learning_rate": 9.566626164439044e-05, "loss": 0.3924, "step": 2362 }, { "epoch": 0.19142903434867142, "grad_norm": 0.04171370714902878, "learning_rate": 9.570676387201296e-05, "loss": 0.3773, "step": 2363 }, { "epoch": 0.1915100453661698, "grad_norm": 0.0523751825094223, "learning_rate": 9.574726609963548e-05, "loss": 0.3713, "step": 2364 }, { "epoch": 0.1915910563836682, "grad_norm": 0.06082122027873993, "learning_rate": 9.5787768327258e-05, "loss": 0.3853, "step": 2365 }, { "epoch": 0.19167206740116655, "grad_norm": 0.05510568991303444, "learning_rate": 9.582827055488053e-05, "loss": 0.3692, "step": 2366 }, { "epoch": 0.19175307841866493, "grad_norm": 0.04889333248138428, "learning_rate": 9.586877278250305e-05, "loss": 0.3786, "step": 2367 }, { "epoch": 0.1918340894361633, "grad_norm": 0.049090322107076645, "learning_rate": 9.590927501012556e-05, "loss": 0.3905, "step": 2368 }, { "epoch": 0.1919151004536617, "grad_norm": 0.053586363792419434, "learning_rate": 9.594977723774808e-05, "loss": 0.3731, "step": 2369 }, { "epoch": 0.19199611147116008, "grad_norm": 0.04378211498260498, "learning_rate": 9.59902794653706e-05, "loss": 0.3731, "step": 2370 }, { "epoch": 0.19207712248865846, "grad_norm": 0.046975620090961456, "learning_rate": 9.603078169299312e-05, "loss": 0.3849, "step": 2371 }, { "epoch": 0.19215813350615685, "grad_norm": 0.05281860753893852, "learning_rate": 9.607128392061564e-05, "loss": 0.3375, "step": 2372 }, { "epoch": 0.19223914452365523, "grad_norm": 0.038259562104940414, "learning_rate": 9.611178614823816e-05, "loss": 0.3518, "step": 2373 }, { "epoch": 0.1923201555411536, "grad_norm": 0.05231066793203354, "learning_rate": 9.615228837586068e-05, "loss": 0.4345, "step": 2374 }, { "epoch": 0.19240116655865197, "grad_norm": 0.054934676736593246, "learning_rate": 9.61927906034832e-05, "loss": 0.3924, "step": 2375 }, { "epoch": 0.19248217757615035, "grad_norm": 0.06908360868692398, "learning_rate": 9.623329283110572e-05, "loss": 0.3992, "step": 2376 }, { "epoch": 0.19256318859364874, "grad_norm": 0.04846476763486862, "learning_rate": 9.627379505872824e-05, "loss": 0.3915, "step": 2377 }, { "epoch": 0.19264419961114712, "grad_norm": 0.04592437669634819, "learning_rate": 9.631429728635076e-05, "loss": 0.3585, "step": 2378 }, { "epoch": 0.1927252106286455, "grad_norm": 0.054542385041713715, "learning_rate": 9.635479951397328e-05, "loss": 0.4254, "step": 2379 }, { "epoch": 0.1928062216461439, "grad_norm": 0.0487421490252018, "learning_rate": 9.63953017415958e-05, "loss": 0.3718, "step": 2380 }, { "epoch": 0.19288723266364224, "grad_norm": 0.04887447506189346, "learning_rate": 9.643580396921831e-05, "loss": 0.4017, "step": 2381 }, { "epoch": 0.19296824368114063, "grad_norm": 0.0589744932949543, "learning_rate": 9.647630619684082e-05, "loss": 0.374, "step": 2382 }, { "epoch": 0.193049254698639, "grad_norm": 0.05274634808301926, "learning_rate": 9.651680842446334e-05, "loss": 0.3185, "step": 2383 }, { "epoch": 0.1931302657161374, "grad_norm": 0.0514618456363678, "learning_rate": 9.655731065208586e-05, "loss": 0.3948, "step": 2384 }, { "epoch": 0.19321127673363578, "grad_norm": 0.05362550541758537, "learning_rate": 9.65978128797084e-05, "loss": 0.3651, "step": 2385 }, { "epoch": 0.19329228775113416, "grad_norm": 0.05524744093418121, "learning_rate": 9.663831510733091e-05, "loss": 0.4032, "step": 2386 }, { "epoch": 0.19337329876863255, "grad_norm": 0.046348828822374344, "learning_rate": 9.667881733495343e-05, "loss": 0.3832, "step": 2387 }, { "epoch": 0.1934543097861309, "grad_norm": 0.05013475939631462, "learning_rate": 9.671931956257595e-05, "loss": 0.3749, "step": 2388 }, { "epoch": 0.19353532080362928, "grad_norm": 0.05070723965764046, "learning_rate": 9.675982179019847e-05, "loss": 0.4044, "step": 2389 }, { "epoch": 0.19361633182112767, "grad_norm": 0.044254641979932785, "learning_rate": 9.680032401782099e-05, "loss": 0.3564, "step": 2390 }, { "epoch": 0.19369734283862605, "grad_norm": 0.05322520062327385, "learning_rate": 9.684082624544351e-05, "loss": 0.3379, "step": 2391 }, { "epoch": 0.19377835385612444, "grad_norm": 0.0782526433467865, "learning_rate": 9.688132847306603e-05, "loss": 0.3541, "step": 2392 }, { "epoch": 0.19385936487362282, "grad_norm": 0.06899959594011307, "learning_rate": 9.692183070068854e-05, "loss": 0.3742, "step": 2393 }, { "epoch": 0.1939403758911212, "grad_norm": 0.043857067823410034, "learning_rate": 9.696233292831106e-05, "loss": 0.3907, "step": 2394 }, { "epoch": 0.19402138690861956, "grad_norm": 0.06296835094690323, "learning_rate": 9.700283515593358e-05, "loss": 0.353, "step": 2395 }, { "epoch": 0.19410239792611794, "grad_norm": 0.060926903039216995, "learning_rate": 9.70433373835561e-05, "loss": 0.3564, "step": 2396 }, { "epoch": 0.19418340894361633, "grad_norm": 0.052972421050071716, "learning_rate": 9.708383961117862e-05, "loss": 0.4146, "step": 2397 }, { "epoch": 0.1942644199611147, "grad_norm": 0.0610865093767643, "learning_rate": 9.712434183880114e-05, "loss": 0.3754, "step": 2398 }, { "epoch": 0.1943454309786131, "grad_norm": 0.045094795525074005, "learning_rate": 9.716484406642366e-05, "loss": 0.369, "step": 2399 }, { "epoch": 0.19442644199611148, "grad_norm": 0.059093985706567764, "learning_rate": 9.720534629404617e-05, "loss": 0.3982, "step": 2400 }, { "epoch": 0.19450745301360986, "grad_norm": 0.04496023431420326, "learning_rate": 9.72458485216687e-05, "loss": 0.3582, "step": 2401 }, { "epoch": 0.19458846403110824, "grad_norm": 0.04400349035859108, "learning_rate": 9.728635074929121e-05, "loss": 0.38, "step": 2402 }, { "epoch": 0.1946694750486066, "grad_norm": 0.05442773923277855, "learning_rate": 9.732685297691373e-05, "loss": 0.3755, "step": 2403 }, { "epoch": 0.19475048606610498, "grad_norm": 0.05021575838327408, "learning_rate": 9.736735520453625e-05, "loss": 0.4224, "step": 2404 }, { "epoch": 0.19483149708360337, "grad_norm": 0.04044508561491966, "learning_rate": 9.740785743215877e-05, "loss": 0.3635, "step": 2405 }, { "epoch": 0.19491250810110175, "grad_norm": 0.054432496428489685, "learning_rate": 9.744835965978129e-05, "loss": 0.4186, "step": 2406 }, { "epoch": 0.19499351911860013, "grad_norm": 0.044115956872701645, "learning_rate": 9.748886188740381e-05, "loss": 0.3876, "step": 2407 }, { "epoch": 0.19507453013609852, "grad_norm": 0.04487552493810654, "learning_rate": 9.752936411502633e-05, "loss": 0.4089, "step": 2408 }, { "epoch": 0.1951555411535969, "grad_norm": 0.04776868224143982, "learning_rate": 9.756986634264885e-05, "loss": 0.3757, "step": 2409 }, { "epoch": 0.19523655217109526, "grad_norm": 0.056760191917419434, "learning_rate": 9.761036857027137e-05, "loss": 0.3678, "step": 2410 }, { "epoch": 0.19531756318859364, "grad_norm": 0.06094193831086159, "learning_rate": 9.765087079789389e-05, "loss": 0.3973, "step": 2411 }, { "epoch": 0.19539857420609202, "grad_norm": 0.05493546649813652, "learning_rate": 9.769137302551641e-05, "loss": 0.3608, "step": 2412 }, { "epoch": 0.1954795852235904, "grad_norm": 0.04134117811918259, "learning_rate": 9.773187525313893e-05, "loss": 0.3641, "step": 2413 }, { "epoch": 0.1955605962410888, "grad_norm": 0.05286389961838722, "learning_rate": 9.777237748076145e-05, "loss": 0.3864, "step": 2414 }, { "epoch": 0.19564160725858717, "grad_norm": 0.055073704570531845, "learning_rate": 9.781287970838397e-05, "loss": 0.4151, "step": 2415 }, { "epoch": 0.19572261827608556, "grad_norm": 0.07006943225860596, "learning_rate": 9.785338193600649e-05, "loss": 0.3754, "step": 2416 }, { "epoch": 0.19580362929358391, "grad_norm": 0.05164717510342598, "learning_rate": 9.7893884163629e-05, "loss": 0.4127, "step": 2417 }, { "epoch": 0.1958846403110823, "grad_norm": 0.051379360258579254, "learning_rate": 9.793438639125152e-05, "loss": 0.405, "step": 2418 }, { "epoch": 0.19596565132858068, "grad_norm": 0.056827612221241, "learning_rate": 9.797488861887403e-05, "loss": 0.3474, "step": 2419 }, { "epoch": 0.19604666234607906, "grad_norm": 0.0616968534886837, "learning_rate": 9.801539084649655e-05, "loss": 0.3876, "step": 2420 }, { "epoch": 0.19612767336357745, "grad_norm": 0.0510423481464386, "learning_rate": 9.805589307411907e-05, "loss": 0.3849, "step": 2421 }, { "epoch": 0.19620868438107583, "grad_norm": 0.05379074066877365, "learning_rate": 9.809639530174161e-05, "loss": 0.416, "step": 2422 }, { "epoch": 0.19628969539857422, "grad_norm": 0.04857899248600006, "learning_rate": 9.813689752936413e-05, "loss": 0.4007, "step": 2423 }, { "epoch": 0.1963707064160726, "grad_norm": 0.04664013907313347, "learning_rate": 9.817739975698665e-05, "loss": 0.3865, "step": 2424 }, { "epoch": 0.19645171743357095, "grad_norm": 0.0516962967813015, "learning_rate": 9.821790198460917e-05, "loss": 0.4013, "step": 2425 }, { "epoch": 0.19653272845106934, "grad_norm": 0.059693820774555206, "learning_rate": 9.825840421223169e-05, "loss": 0.3976, "step": 2426 }, { "epoch": 0.19661373946856772, "grad_norm": 0.05525093898177147, "learning_rate": 9.82989064398542e-05, "loss": 0.3586, "step": 2427 }, { "epoch": 0.1966947504860661, "grad_norm": 0.07122207432985306, "learning_rate": 9.833940866747671e-05, "loss": 0.374, "step": 2428 }, { "epoch": 0.1967757615035645, "grad_norm": 0.03935431316494942, "learning_rate": 9.837991089509923e-05, "loss": 0.3822, "step": 2429 }, { "epoch": 0.19685677252106287, "grad_norm": 0.05369625613093376, "learning_rate": 9.842041312272175e-05, "loss": 0.4008, "step": 2430 }, { "epoch": 0.19693778353856126, "grad_norm": 0.05163230001926422, "learning_rate": 9.846091535034427e-05, "loss": 0.4191, "step": 2431 }, { "epoch": 0.1970187945560596, "grad_norm": 0.0478132963180542, "learning_rate": 9.850141757796679e-05, "loss": 0.397, "step": 2432 }, { "epoch": 0.197099805573558, "grad_norm": 0.04648119583725929, "learning_rate": 9.854191980558931e-05, "loss": 0.364, "step": 2433 }, { "epoch": 0.19718081659105638, "grad_norm": 0.04318595305085182, "learning_rate": 9.858242203321183e-05, "loss": 0.3196, "step": 2434 }, { "epoch": 0.19726182760855476, "grad_norm": 0.049725234508514404, "learning_rate": 9.862292426083435e-05, "loss": 0.4059, "step": 2435 }, { "epoch": 0.19734283862605315, "grad_norm": 0.059837911278009415, "learning_rate": 9.866342648845687e-05, "loss": 0.3796, "step": 2436 }, { "epoch": 0.19742384964355153, "grad_norm": 0.05953565239906311, "learning_rate": 9.870392871607939e-05, "loss": 0.4661, "step": 2437 }, { "epoch": 0.1975048606610499, "grad_norm": 0.06108205392956734, "learning_rate": 9.874443094370191e-05, "loss": 0.3892, "step": 2438 }, { "epoch": 0.19758587167854827, "grad_norm": 0.04766416177153587, "learning_rate": 9.878493317132443e-05, "loss": 0.3453, "step": 2439 }, { "epoch": 0.19766688269604665, "grad_norm": 0.04841725900769234, "learning_rate": 9.882543539894695e-05, "loss": 0.4404, "step": 2440 }, { "epoch": 0.19774789371354504, "grad_norm": 0.05051546171307564, "learning_rate": 9.886593762656947e-05, "loss": 0.3734, "step": 2441 }, { "epoch": 0.19782890473104342, "grad_norm": 0.06493677943944931, "learning_rate": 9.890643985419199e-05, "loss": 0.4166, "step": 2442 }, { "epoch": 0.1979099157485418, "grad_norm": 0.044552478939294815, "learning_rate": 9.894694208181451e-05, "loss": 0.3468, "step": 2443 }, { "epoch": 0.1979909267660402, "grad_norm": 0.04488362744450569, "learning_rate": 9.898744430943703e-05, "loss": 0.3645, "step": 2444 }, { "epoch": 0.19807193778353857, "grad_norm": 0.04239024221897125, "learning_rate": 9.902794653705955e-05, "loss": 0.3579, "step": 2445 }, { "epoch": 0.19815294880103695, "grad_norm": 0.05800511687994003, "learning_rate": 9.906844876468207e-05, "loss": 0.3979, "step": 2446 }, { "epoch": 0.1982339598185353, "grad_norm": 0.05241608992218971, "learning_rate": 9.910895099230459e-05, "loss": 0.4038, "step": 2447 }, { "epoch": 0.1983149708360337, "grad_norm": 0.04767435044050217, "learning_rate": 9.91494532199271e-05, "loss": 0.3741, "step": 2448 }, { "epoch": 0.19839598185353208, "grad_norm": 0.05435354635119438, "learning_rate": 9.918995544754962e-05, "loss": 0.4271, "step": 2449 }, { "epoch": 0.19847699287103046, "grad_norm": 0.04715365171432495, "learning_rate": 9.923045767517214e-05, "loss": 0.3608, "step": 2450 }, { "epoch": 0.19855800388852884, "grad_norm": 0.05234677344560623, "learning_rate": 9.927095990279466e-05, "loss": 0.369, "step": 2451 }, { "epoch": 0.19863901490602723, "grad_norm": 0.050974685698747635, "learning_rate": 9.931146213041717e-05, "loss": 0.3251, "step": 2452 }, { "epoch": 0.1987200259235256, "grad_norm": 0.059354811906814575, "learning_rate": 9.935196435803969e-05, "loss": 0.4558, "step": 2453 }, { "epoch": 0.19880103694102397, "grad_norm": 0.057084355503320694, "learning_rate": 9.939246658566221e-05, "loss": 0.4074, "step": 2454 }, { "epoch": 0.19888204795852235, "grad_norm": 0.05420767143368721, "learning_rate": 9.943296881328473e-05, "loss": 0.3669, "step": 2455 }, { "epoch": 0.19896305897602073, "grad_norm": 0.04995109885931015, "learning_rate": 9.947347104090725e-05, "loss": 0.3623, "step": 2456 }, { "epoch": 0.19904406999351912, "grad_norm": 0.07834792882204056, "learning_rate": 9.951397326852977e-05, "loss": 0.3814, "step": 2457 }, { "epoch": 0.1991250810110175, "grad_norm": 0.04483048617839813, "learning_rate": 9.955447549615229e-05, "loss": 0.3305, "step": 2458 }, { "epoch": 0.19920609202851589, "grad_norm": 0.05417317524552345, "learning_rate": 9.959497772377481e-05, "loss": 0.3865, "step": 2459 }, { "epoch": 0.19928710304601427, "grad_norm": 0.041050031781196594, "learning_rate": 9.963547995139734e-05, "loss": 0.3489, "step": 2460 }, { "epoch": 0.19936811406351262, "grad_norm": 0.03944450989365578, "learning_rate": 9.967598217901986e-05, "loss": 0.3544, "step": 2461 }, { "epoch": 0.199449125081011, "grad_norm": 0.05919930338859558, "learning_rate": 9.971648440664238e-05, "loss": 0.3941, "step": 2462 }, { "epoch": 0.1995301360985094, "grad_norm": 0.04712875559926033, "learning_rate": 9.975698663426489e-05, "loss": 0.4015, "step": 2463 }, { "epoch": 0.19961114711600778, "grad_norm": 0.04720218479633331, "learning_rate": 9.97974888618874e-05, "loss": 0.3927, "step": 2464 }, { "epoch": 0.19969215813350616, "grad_norm": 0.0454418770968914, "learning_rate": 9.983799108950993e-05, "loss": 0.3821, "step": 2465 }, { "epoch": 0.19977316915100454, "grad_norm": 0.03888606280088425, "learning_rate": 9.987849331713245e-05, "loss": 0.3211, "step": 2466 }, { "epoch": 0.19985418016850293, "grad_norm": 0.05296216905117035, "learning_rate": 9.991899554475497e-05, "loss": 0.4288, "step": 2467 }, { "epoch": 0.1999351911860013, "grad_norm": 0.04445220157504082, "learning_rate": 9.995949777237749e-05, "loss": 0.3433, "step": 2468 }, { "epoch": 0.20001620220349967, "grad_norm": 0.04101482406258583, "learning_rate": 0.0001, "loss": 0.3738, "step": 2469 }, { "epoch": 0.20009721322099805, "grad_norm": 0.05536132678389549, "learning_rate": 0.00010004050222762254, "loss": 0.416, "step": 2470 }, { "epoch": 0.20017822423849643, "grad_norm": 0.05537095293402672, "learning_rate": 0.00010008100445524504, "loss": 0.4103, "step": 2471 }, { "epoch": 0.20025923525599482, "grad_norm": 0.04526954144239426, "learning_rate": 0.00010012150668286758, "loss": 0.3716, "step": 2472 }, { "epoch": 0.2003402462734932, "grad_norm": 0.04359053820371628, "learning_rate": 0.00010016200891049008, "loss": 0.3563, "step": 2473 }, { "epoch": 0.20042125729099158, "grad_norm": 0.04995141550898552, "learning_rate": 0.0001002025111381126, "loss": 0.3854, "step": 2474 }, { "epoch": 0.20050226830848997, "grad_norm": 0.04640784114599228, "learning_rate": 0.00010024301336573512, "loss": 0.4231, "step": 2475 }, { "epoch": 0.20058327932598832, "grad_norm": 0.045250989496707916, "learning_rate": 0.00010028351559335764, "loss": 0.4278, "step": 2476 }, { "epoch": 0.2006642903434867, "grad_norm": 0.053471554070711136, "learning_rate": 0.00010032401782098015, "loss": 0.3881, "step": 2477 }, { "epoch": 0.2007453013609851, "grad_norm": 0.054227881133556366, "learning_rate": 0.00010036452004860268, "loss": 0.4368, "step": 2478 }, { "epoch": 0.20082631237848347, "grad_norm": 0.05022569000720978, "learning_rate": 0.00010040502227622519, "loss": 0.372, "step": 2479 }, { "epoch": 0.20090732339598186, "grad_norm": 0.06383142620325089, "learning_rate": 0.00010044552450384772, "loss": 0.4035, "step": 2480 }, { "epoch": 0.20098833441348024, "grad_norm": 0.0433533675968647, "learning_rate": 0.00010048602673147023, "loss": 0.3546, "step": 2481 }, { "epoch": 0.20106934543097862, "grad_norm": 0.05223929136991501, "learning_rate": 0.00010052652895909276, "loss": 0.4119, "step": 2482 }, { "epoch": 0.20115035644847698, "grad_norm": 0.04103383049368858, "learning_rate": 0.00010056703118671527, "loss": 0.3788, "step": 2483 }, { "epoch": 0.20123136746597536, "grad_norm": 0.05163447558879852, "learning_rate": 0.0001006075334143378, "loss": 0.4652, "step": 2484 }, { "epoch": 0.20131237848347375, "grad_norm": 0.05344129726290703, "learning_rate": 0.0001006480356419603, "loss": 0.4269, "step": 2485 }, { "epoch": 0.20139338950097213, "grad_norm": 0.056302350014448166, "learning_rate": 0.00010068853786958284, "loss": 0.3922, "step": 2486 }, { "epoch": 0.20147440051847051, "grad_norm": 0.05290120095014572, "learning_rate": 0.00010072904009720535, "loss": 0.3809, "step": 2487 }, { "epoch": 0.2015554115359689, "grad_norm": 0.038659267127513885, "learning_rate": 0.00010076954232482786, "loss": 0.4318, "step": 2488 }, { "epoch": 0.20163642255346728, "grad_norm": 0.06425322592258453, "learning_rate": 0.0001008100445524504, "loss": 0.3526, "step": 2489 }, { "epoch": 0.20171743357096567, "grad_norm": 0.04753356799483299, "learning_rate": 0.0001008505467800729, "loss": 0.3346, "step": 2490 }, { "epoch": 0.20179844458846402, "grad_norm": 0.043934766203165054, "learning_rate": 0.00010089104900769544, "loss": 0.371, "step": 2491 }, { "epoch": 0.2018794556059624, "grad_norm": 0.049425188452005386, "learning_rate": 0.00010093155123531794, "loss": 0.3821, "step": 2492 }, { "epoch": 0.2019604666234608, "grad_norm": 0.059526655822992325, "learning_rate": 0.00010097205346294048, "loss": 0.3777, "step": 2493 }, { "epoch": 0.20204147764095917, "grad_norm": 0.054595060646533966, "learning_rate": 0.00010101255569056298, "loss": 0.45, "step": 2494 }, { "epoch": 0.20212248865845756, "grad_norm": 0.04631621390581131, "learning_rate": 0.00010105305791818552, "loss": 0.3896, "step": 2495 }, { "epoch": 0.20220349967595594, "grad_norm": 0.051338374614715576, "learning_rate": 0.00010109356014580802, "loss": 0.3814, "step": 2496 }, { "epoch": 0.20228451069345432, "grad_norm": 0.046590592712163925, "learning_rate": 0.00010113406237343056, "loss": 0.4024, "step": 2497 }, { "epoch": 0.20236552171095268, "grad_norm": 0.04861883446574211, "learning_rate": 0.00010117456460105306, "loss": 0.3794, "step": 2498 }, { "epoch": 0.20244653272845106, "grad_norm": 0.045799244195222855, "learning_rate": 0.00010121506682867558, "loss": 0.3595, "step": 2499 }, { "epoch": 0.20252754374594945, "grad_norm": 0.0462491437792778, "learning_rate": 0.00010125556905629809, "loss": 0.4529, "step": 2500 }, { "epoch": 0.20260855476344783, "grad_norm": 0.05045001208782196, "learning_rate": 0.00010129607128392062, "loss": 0.378, "step": 2501 }, { "epoch": 0.2026895657809462, "grad_norm": 0.04670971632003784, "learning_rate": 0.00010133657351154313, "loss": 0.3551, "step": 2502 }, { "epoch": 0.2027705767984446, "grad_norm": 0.04216444492340088, "learning_rate": 0.00010137707573916566, "loss": 0.4095, "step": 2503 }, { "epoch": 0.20285158781594298, "grad_norm": 0.0527469776570797, "learning_rate": 0.00010141757796678817, "loss": 0.3529, "step": 2504 }, { "epoch": 0.20293259883344134, "grad_norm": 0.04359910637140274, "learning_rate": 0.0001014580801944107, "loss": 0.3884, "step": 2505 }, { "epoch": 0.20301360985093972, "grad_norm": 0.05967698618769646, "learning_rate": 0.0001014985824220332, "loss": 0.4167, "step": 2506 }, { "epoch": 0.2030946208684381, "grad_norm": 0.048827651888132095, "learning_rate": 0.00010153908464965574, "loss": 0.4089, "step": 2507 }, { "epoch": 0.2031756318859365, "grad_norm": 0.04760623350739479, "learning_rate": 0.00010157958687727827, "loss": 0.3515, "step": 2508 }, { "epoch": 0.20325664290343487, "grad_norm": 0.038198892027139664, "learning_rate": 0.00010162008910490078, "loss": 0.3312, "step": 2509 }, { "epoch": 0.20333765392093325, "grad_norm": 0.04905112832784653, "learning_rate": 0.0001016605913325233, "loss": 0.3888, "step": 2510 }, { "epoch": 0.20341866493843164, "grad_norm": 0.04764381796121597, "learning_rate": 0.0001017010935601458, "loss": 0.3204, "step": 2511 }, { "epoch": 0.20349967595593, "grad_norm": 0.045229025185108185, "learning_rate": 0.00010174159578776834, "loss": 0.382, "step": 2512 }, { "epoch": 0.20358068697342838, "grad_norm": 0.053234439343214035, "learning_rate": 0.00010178209801539084, "loss": 0.3822, "step": 2513 }, { "epoch": 0.20366169799092676, "grad_norm": 0.053509242832660675, "learning_rate": 0.00010182260024301338, "loss": 0.4125, "step": 2514 }, { "epoch": 0.20374270900842514, "grad_norm": 0.04826676845550537, "learning_rate": 0.00010186310247063588, "loss": 0.3603, "step": 2515 }, { "epoch": 0.20382372002592353, "grad_norm": 0.04815275967121124, "learning_rate": 0.00010190360469825842, "loss": 0.3774, "step": 2516 }, { "epoch": 0.2039047310434219, "grad_norm": 0.04670335352420807, "learning_rate": 0.00010194410692588092, "loss": 0.4279, "step": 2517 }, { "epoch": 0.2039857420609203, "grad_norm": 0.050155483186244965, "learning_rate": 0.00010198460915350346, "loss": 0.3811, "step": 2518 }, { "epoch": 0.20406675307841868, "grad_norm": 0.03997861593961716, "learning_rate": 0.00010202511138112596, "loss": 0.3826, "step": 2519 }, { "epoch": 0.20414776409591703, "grad_norm": 0.04606309160590172, "learning_rate": 0.0001020656136087485, "loss": 0.4141, "step": 2520 }, { "epoch": 0.20422877511341542, "grad_norm": 0.048883430659770966, "learning_rate": 0.000102106115836371, "loss": 0.3719, "step": 2521 }, { "epoch": 0.2043097861309138, "grad_norm": 0.04640788212418556, "learning_rate": 0.00010214661806399352, "loss": 0.4113, "step": 2522 }, { "epoch": 0.20439079714841218, "grad_norm": 0.05309394747018814, "learning_rate": 0.00010218712029161604, "loss": 0.4066, "step": 2523 }, { "epoch": 0.20447180816591057, "grad_norm": 0.04602646827697754, "learning_rate": 0.00010222762251923856, "loss": 0.3612, "step": 2524 }, { "epoch": 0.20455281918340895, "grad_norm": 0.04883800819516182, "learning_rate": 0.00010226812474686107, "loss": 0.4093, "step": 2525 }, { "epoch": 0.20463383020090734, "grad_norm": 0.044395849108695984, "learning_rate": 0.0001023086269744836, "loss": 0.3983, "step": 2526 }, { "epoch": 0.2047148412184057, "grad_norm": 0.050485894083976746, "learning_rate": 0.00010234912920210613, "loss": 0.4242, "step": 2527 }, { "epoch": 0.20479585223590407, "grad_norm": 0.0483909510076046, "learning_rate": 0.00010238963142972864, "loss": 0.3771, "step": 2528 }, { "epoch": 0.20487686325340246, "grad_norm": 0.04684009775519371, "learning_rate": 0.00010243013365735117, "loss": 0.3917, "step": 2529 }, { "epoch": 0.20495787427090084, "grad_norm": 0.0491405613720417, "learning_rate": 0.00010247063588497368, "loss": 0.3377, "step": 2530 }, { "epoch": 0.20503888528839923, "grad_norm": 0.04853895306587219, "learning_rate": 0.00010251113811259621, "loss": 0.4153, "step": 2531 }, { "epoch": 0.2051198963058976, "grad_norm": 0.05164919048547745, "learning_rate": 0.00010255164034021872, "loss": 0.3867, "step": 2532 }, { "epoch": 0.205200907323396, "grad_norm": 0.04687311127781868, "learning_rate": 0.00010259214256784124, "loss": 0.3784, "step": 2533 }, { "epoch": 0.20528191834089435, "grad_norm": 0.034922413527965546, "learning_rate": 0.00010263264479546376, "loss": 0.3459, "step": 2534 }, { "epoch": 0.20536292935839273, "grad_norm": 0.047675810754299164, "learning_rate": 0.00010267314702308628, "loss": 0.3675, "step": 2535 }, { "epoch": 0.20544394037589112, "grad_norm": 0.05016015097498894, "learning_rate": 0.00010271364925070878, "loss": 0.3978, "step": 2536 }, { "epoch": 0.2055249513933895, "grad_norm": 0.03888197988271713, "learning_rate": 0.00010275415147833132, "loss": 0.3539, "step": 2537 }, { "epoch": 0.20560596241088788, "grad_norm": 0.06236858665943146, "learning_rate": 0.00010279465370595382, "loss": 0.4093, "step": 2538 }, { "epoch": 0.20568697342838627, "grad_norm": 0.038991302251815796, "learning_rate": 0.00010283515593357635, "loss": 0.3799, "step": 2539 }, { "epoch": 0.20576798444588465, "grad_norm": 0.049997709691524506, "learning_rate": 0.00010287565816119886, "loss": 0.3985, "step": 2540 }, { "epoch": 0.20584899546338303, "grad_norm": 0.05341407284140587, "learning_rate": 0.0001029161603888214, "loss": 0.3524, "step": 2541 }, { "epoch": 0.2059300064808814, "grad_norm": 0.03703078255057335, "learning_rate": 0.0001029566626164439, "loss": 0.4079, "step": 2542 }, { "epoch": 0.20601101749837977, "grad_norm": 0.06276031583547592, "learning_rate": 0.00010299716484406643, "loss": 0.3747, "step": 2543 }, { "epoch": 0.20609202851587816, "grad_norm": 0.04427820071578026, "learning_rate": 0.00010303766707168894, "loss": 0.3774, "step": 2544 }, { "epoch": 0.20617303953337654, "grad_norm": 0.051968708634376526, "learning_rate": 0.00010307816929931147, "loss": 0.3683, "step": 2545 }, { "epoch": 0.20625405055087492, "grad_norm": 0.0557091124355793, "learning_rate": 0.00010311867152693399, "loss": 0.3791, "step": 2546 }, { "epoch": 0.2063350615683733, "grad_norm": 0.046910978853702545, "learning_rate": 0.0001031591737545565, "loss": 0.4094, "step": 2547 }, { "epoch": 0.2064160725858717, "grad_norm": 0.05115199834108353, "learning_rate": 0.00010319967598217903, "loss": 0.4132, "step": 2548 }, { "epoch": 0.20649708360337005, "grad_norm": 0.049847543239593506, "learning_rate": 0.00010324017820980154, "loss": 0.3635, "step": 2549 }, { "epoch": 0.20657809462086843, "grad_norm": 0.055366579443216324, "learning_rate": 0.00010328068043742407, "loss": 0.4279, "step": 2550 }, { "epoch": 0.2066591056383668, "grad_norm": 0.05920335277915001, "learning_rate": 0.00010332118266504658, "loss": 0.4094, "step": 2551 }, { "epoch": 0.2067401166558652, "grad_norm": 0.04443211480975151, "learning_rate": 0.00010336168489266911, "loss": 0.3963, "step": 2552 }, { "epoch": 0.20682112767336358, "grad_norm": 0.046224404126405716, "learning_rate": 0.00010340218712029162, "loss": 0.4038, "step": 2553 }, { "epoch": 0.20690213869086196, "grad_norm": 0.05442473664879799, "learning_rate": 0.00010344268934791415, "loss": 0.4074, "step": 2554 }, { "epoch": 0.20698314970836035, "grad_norm": 0.04899989441037178, "learning_rate": 0.00010348319157553666, "loss": 0.3474, "step": 2555 }, { "epoch": 0.2070641607258587, "grad_norm": 0.054443079978227615, "learning_rate": 0.00010352369380315919, "loss": 0.4292, "step": 2556 }, { "epoch": 0.2071451717433571, "grad_norm": 0.054214950650930405, "learning_rate": 0.0001035641960307817, "loss": 0.3893, "step": 2557 }, { "epoch": 0.20722618276085547, "grad_norm": 0.048906613141298294, "learning_rate": 0.00010360469825840421, "loss": 0.4234, "step": 2558 }, { "epoch": 0.20730719377835385, "grad_norm": 0.05881531909108162, "learning_rate": 0.00010364520048602673, "loss": 0.4157, "step": 2559 }, { "epoch": 0.20738820479585224, "grad_norm": 0.05360455438494682, "learning_rate": 0.00010368570271364925, "loss": 0.4134, "step": 2560 }, { "epoch": 0.20746921581335062, "grad_norm": 0.046132009476423264, "learning_rate": 0.00010372620494127176, "loss": 0.3317, "step": 2561 }, { "epoch": 0.207550226830849, "grad_norm": 0.05172186344861984, "learning_rate": 0.0001037667071688943, "loss": 0.4097, "step": 2562 }, { "epoch": 0.2076312378483474, "grad_norm": 0.06279409676790237, "learning_rate": 0.0001038072093965168, "loss": 0.4337, "step": 2563 }, { "epoch": 0.20771224886584574, "grad_norm": 0.05410788580775261, "learning_rate": 0.00010384771162413933, "loss": 0.351, "step": 2564 }, { "epoch": 0.20779325988334413, "grad_norm": 0.04601385444402695, "learning_rate": 0.00010388821385176187, "loss": 0.428, "step": 2565 }, { "epoch": 0.2078742709008425, "grad_norm": 0.04627078399062157, "learning_rate": 0.00010392871607938437, "loss": 0.3752, "step": 2566 }, { "epoch": 0.2079552819183409, "grad_norm": 0.06033918634057045, "learning_rate": 0.0001039692183070069, "loss": 0.3628, "step": 2567 }, { "epoch": 0.20803629293583928, "grad_norm": 0.046143606305122375, "learning_rate": 0.00010400972053462941, "loss": 0.3764, "step": 2568 }, { "epoch": 0.20811730395333766, "grad_norm": 0.04848916456103325, "learning_rate": 0.00010405022276225193, "loss": 0.3636, "step": 2569 }, { "epoch": 0.20819831497083605, "grad_norm": 0.04305524379014969, "learning_rate": 0.00010409072498987445, "loss": 0.3783, "step": 2570 }, { "epoch": 0.2082793259883344, "grad_norm": 0.05727194994688034, "learning_rate": 0.00010413122721749697, "loss": 0.4555, "step": 2571 }, { "epoch": 0.20836033700583279, "grad_norm": 0.05420166626572609, "learning_rate": 0.00010417172944511948, "loss": 0.3874, "step": 2572 }, { "epoch": 0.20844134802333117, "grad_norm": 0.05022074654698372, "learning_rate": 0.00010421223167274201, "loss": 0.352, "step": 2573 }, { "epoch": 0.20852235904082955, "grad_norm": 0.0534265972673893, "learning_rate": 0.00010425273390036452, "loss": 0.4138, "step": 2574 }, { "epoch": 0.20860337005832794, "grad_norm": 0.03776722401380539, "learning_rate": 0.00010429323612798705, "loss": 0.3459, "step": 2575 }, { "epoch": 0.20868438107582632, "grad_norm": 0.04339131712913513, "learning_rate": 0.00010433373835560956, "loss": 0.4046, "step": 2576 }, { "epoch": 0.2087653920933247, "grad_norm": 0.050997912883758545, "learning_rate": 0.00010437424058323209, "loss": 0.3805, "step": 2577 }, { "epoch": 0.20884640311082306, "grad_norm": 0.04996298626065254, "learning_rate": 0.0001044147428108546, "loss": 0.4127, "step": 2578 }, { "epoch": 0.20892741412832144, "grad_norm": 0.06990036368370056, "learning_rate": 0.00010445524503847713, "loss": 0.426, "step": 2579 }, { "epoch": 0.20900842514581983, "grad_norm": 0.053124912083148956, "learning_rate": 0.00010449574726609963, "loss": 0.4036, "step": 2580 }, { "epoch": 0.2090894361633182, "grad_norm": 0.047472499310970306, "learning_rate": 0.00010453624949372217, "loss": 0.4067, "step": 2581 }, { "epoch": 0.2091704471808166, "grad_norm": 0.05586513504385948, "learning_rate": 0.00010457675172134467, "loss": 0.3958, "step": 2582 }, { "epoch": 0.20925145819831498, "grad_norm": 0.048053573817014694, "learning_rate": 0.00010461725394896719, "loss": 0.3987, "step": 2583 }, { "epoch": 0.20933246921581336, "grad_norm": 0.043412040919065475, "learning_rate": 0.00010465775617658973, "loss": 0.3462, "step": 2584 }, { "epoch": 0.20941348023331174, "grad_norm": 0.05099153518676758, "learning_rate": 0.00010469825840421223, "loss": 0.4066, "step": 2585 }, { "epoch": 0.2094944912508101, "grad_norm": 0.04131011292338371, "learning_rate": 0.00010473876063183477, "loss": 0.4256, "step": 2586 }, { "epoch": 0.20957550226830848, "grad_norm": 0.04058512672781944, "learning_rate": 0.00010477926285945727, "loss": 0.3686, "step": 2587 }, { "epoch": 0.20965651328580687, "grad_norm": 0.04154278337955475, "learning_rate": 0.0001048197650870798, "loss": 0.3233, "step": 2588 }, { "epoch": 0.20973752430330525, "grad_norm": 0.07527300715446472, "learning_rate": 0.00010486026731470231, "loss": 0.3186, "step": 2589 }, { "epoch": 0.20981853532080363, "grad_norm": 0.042909786105155945, "learning_rate": 0.00010490076954232484, "loss": 0.4065, "step": 2590 }, { "epoch": 0.20989954633830202, "grad_norm": 0.04452323168516159, "learning_rate": 0.00010494127176994735, "loss": 0.4012, "step": 2591 }, { "epoch": 0.2099805573558004, "grad_norm": 0.06063823029398918, "learning_rate": 0.00010498177399756988, "loss": 0.4393, "step": 2592 }, { "epoch": 0.21006156837329876, "grad_norm": 0.044901590794324875, "learning_rate": 0.00010502227622519239, "loss": 0.3851, "step": 2593 }, { "epoch": 0.21014257939079714, "grad_norm": 0.04395297169685364, "learning_rate": 0.00010506277845281491, "loss": 0.392, "step": 2594 }, { "epoch": 0.21022359040829552, "grad_norm": 0.04686028137803078, "learning_rate": 0.00010510328068043742, "loss": 0.351, "step": 2595 }, { "epoch": 0.2103046014257939, "grad_norm": 0.05416541174054146, "learning_rate": 0.00010514378290805995, "loss": 0.3642, "step": 2596 }, { "epoch": 0.2103856124432923, "grad_norm": 0.053576841950416565, "learning_rate": 0.00010518428513568245, "loss": 0.412, "step": 2597 }, { "epoch": 0.21046662346079067, "grad_norm": 0.05176137387752533, "learning_rate": 0.00010522478736330499, "loss": 0.3755, "step": 2598 }, { "epoch": 0.21054763447828906, "grad_norm": 0.04937148466706276, "learning_rate": 0.0001052652895909275, "loss": 0.3539, "step": 2599 }, { "epoch": 0.21062864549578741, "grad_norm": 0.044205427169799805, "learning_rate": 0.00010530579181855003, "loss": 0.3502, "step": 2600 }, { "epoch": 0.2107096565132858, "grad_norm": 0.04475211724638939, "learning_rate": 0.00010534629404617256, "loss": 0.3376, "step": 2601 }, { "epoch": 0.21079066753078418, "grad_norm": 0.05387239158153534, "learning_rate": 0.00010538679627379507, "loss": 0.3748, "step": 2602 }, { "epoch": 0.21087167854828257, "grad_norm": 0.04502122476696968, "learning_rate": 0.00010542729850141759, "loss": 0.3698, "step": 2603 }, { "epoch": 0.21095268956578095, "grad_norm": 0.04435496777296066, "learning_rate": 0.0001054678007290401, "loss": 0.3881, "step": 2604 }, { "epoch": 0.21103370058327933, "grad_norm": 0.054403528571128845, "learning_rate": 0.00010550830295666263, "loss": 0.3588, "step": 2605 }, { "epoch": 0.21111471160077772, "grad_norm": 0.047774478793144226, "learning_rate": 0.00010554880518428513, "loss": 0.3836, "step": 2606 }, { "epoch": 0.2111957226182761, "grad_norm": 0.04820839688181877, "learning_rate": 0.00010558930741190766, "loss": 0.3701, "step": 2607 }, { "epoch": 0.21127673363577446, "grad_norm": 0.05031566321849823, "learning_rate": 0.00010562980963953017, "loss": 0.3585, "step": 2608 }, { "epoch": 0.21135774465327284, "grad_norm": 0.06141781806945801, "learning_rate": 0.0001056703118671527, "loss": 0.4669, "step": 2609 }, { "epoch": 0.21143875567077122, "grad_norm": 0.056281477212905884, "learning_rate": 0.00010571081409477521, "loss": 0.4542, "step": 2610 }, { "epoch": 0.2115197666882696, "grad_norm": 0.04627525433897972, "learning_rate": 0.00010575131632239774, "loss": 0.4101, "step": 2611 }, { "epoch": 0.211600777705768, "grad_norm": 0.05587709695100784, "learning_rate": 0.00010579181855002025, "loss": 0.4177, "step": 2612 }, { "epoch": 0.21168178872326637, "grad_norm": 0.04824918136000633, "learning_rate": 0.00010583232077764278, "loss": 0.3946, "step": 2613 }, { "epoch": 0.21176279974076476, "grad_norm": 0.05462539941072464, "learning_rate": 0.00010587282300526529, "loss": 0.3867, "step": 2614 }, { "epoch": 0.2118438107582631, "grad_norm": 0.049656134098768234, "learning_rate": 0.00010591332523288782, "loss": 0.3657, "step": 2615 }, { "epoch": 0.2119248217757615, "grad_norm": 0.048310454934835434, "learning_rate": 0.00010595382746051033, "loss": 0.4064, "step": 2616 }, { "epoch": 0.21200583279325988, "grad_norm": 0.0601760558784008, "learning_rate": 0.00010599432968813285, "loss": 0.4552, "step": 2617 }, { "epoch": 0.21208684381075826, "grad_norm": 0.042523082345724106, "learning_rate": 0.00010603483191575537, "loss": 0.3503, "step": 2618 }, { "epoch": 0.21216785482825665, "grad_norm": 0.04929207265377045, "learning_rate": 0.00010607533414337789, "loss": 0.4063, "step": 2619 }, { "epoch": 0.21224886584575503, "grad_norm": 0.04285252466797829, "learning_rate": 0.00010611583637100042, "loss": 0.3896, "step": 2620 }, { "epoch": 0.21232987686325341, "grad_norm": 0.05070970952510834, "learning_rate": 0.00010615633859862293, "loss": 0.4165, "step": 2621 }, { "epoch": 0.21241088788075177, "grad_norm": 0.05116109549999237, "learning_rate": 0.00010619684082624546, "loss": 0.3549, "step": 2622 }, { "epoch": 0.21249189889825015, "grad_norm": 0.06008598208427429, "learning_rate": 0.00010623734305386797, "loss": 0.3841, "step": 2623 }, { "epoch": 0.21257290991574854, "grad_norm": 0.04959864169359207, "learning_rate": 0.0001062778452814905, "loss": 0.4246, "step": 2624 }, { "epoch": 0.21265392093324692, "grad_norm": 0.04739030823111534, "learning_rate": 0.000106318347509113, "loss": 0.3482, "step": 2625 }, { "epoch": 0.2127349319507453, "grad_norm": 0.04815612733364105, "learning_rate": 0.00010635884973673554, "loss": 0.372, "step": 2626 }, { "epoch": 0.2128159429682437, "grad_norm": 0.05172804743051529, "learning_rate": 0.00010639935196435804, "loss": 0.3737, "step": 2627 }, { "epoch": 0.21289695398574207, "grad_norm": 0.05179368704557419, "learning_rate": 0.00010643985419198056, "loss": 0.4017, "step": 2628 }, { "epoch": 0.21297796500324043, "grad_norm": 0.05360709875822067, "learning_rate": 0.00010648035641960308, "loss": 0.3869, "step": 2629 }, { "epoch": 0.2130589760207388, "grad_norm": 0.04751502722501755, "learning_rate": 0.0001065208586472256, "loss": 0.4248, "step": 2630 }, { "epoch": 0.2131399870382372, "grad_norm": 0.046408139169216156, "learning_rate": 0.00010656136087484811, "loss": 0.3668, "step": 2631 }, { "epoch": 0.21322099805573558, "grad_norm": 0.05516528710722923, "learning_rate": 0.00010660186310247064, "loss": 0.4057, "step": 2632 }, { "epoch": 0.21330200907323396, "grad_norm": 0.06695632636547089, "learning_rate": 0.00010664236533009315, "loss": 0.4126, "step": 2633 }, { "epoch": 0.21338302009073234, "grad_norm": 0.04524846002459526, "learning_rate": 0.00010668286755771568, "loss": 0.4305, "step": 2634 }, { "epoch": 0.21346403110823073, "grad_norm": 0.043575726449489594, "learning_rate": 0.00010672336978533819, "loss": 0.3572, "step": 2635 }, { "epoch": 0.2135450421257291, "grad_norm": 0.04118606820702553, "learning_rate": 0.00010676387201296072, "loss": 0.337, "step": 2636 }, { "epoch": 0.21362605314322747, "grad_norm": 0.03964829444885254, "learning_rate": 0.00010680437424058323, "loss": 0.3562, "step": 2637 }, { "epoch": 0.21370706416072585, "grad_norm": 0.04576268047094345, "learning_rate": 0.00010684487646820576, "loss": 0.4024, "step": 2638 }, { "epoch": 0.21378807517822424, "grad_norm": 0.04380327835679054, "learning_rate": 0.00010688537869582828, "loss": 0.4258, "step": 2639 }, { "epoch": 0.21386908619572262, "grad_norm": 0.04862361401319504, "learning_rate": 0.0001069258809234508, "loss": 0.3582, "step": 2640 }, { "epoch": 0.213950097213221, "grad_norm": 0.04417659342288971, "learning_rate": 0.00010696638315107332, "loss": 0.4156, "step": 2641 }, { "epoch": 0.21403110823071939, "grad_norm": 0.048604417592287064, "learning_rate": 0.00010700688537869583, "loss": 0.3986, "step": 2642 }, { "epoch": 0.21411211924821777, "grad_norm": 0.0473223477602005, "learning_rate": 0.00010704738760631836, "loss": 0.3804, "step": 2643 }, { "epoch": 0.21419313026571613, "grad_norm": 0.049184344708919525, "learning_rate": 0.00010708788983394087, "loss": 0.3635, "step": 2644 }, { "epoch": 0.2142741412832145, "grad_norm": 0.03883194550871849, "learning_rate": 0.0001071283920615634, "loss": 0.3426, "step": 2645 }, { "epoch": 0.2143551523007129, "grad_norm": 0.04144468158483505, "learning_rate": 0.0001071688942891859, "loss": 0.3702, "step": 2646 }, { "epoch": 0.21443616331821128, "grad_norm": 0.04413948208093643, "learning_rate": 0.00010720939651680844, "loss": 0.3699, "step": 2647 }, { "epoch": 0.21451717433570966, "grad_norm": 0.05277412384748459, "learning_rate": 0.00010724989874443094, "loss": 0.4364, "step": 2648 }, { "epoch": 0.21459818535320804, "grad_norm": 0.05613725632429123, "learning_rate": 0.00010729040097205348, "loss": 0.4129, "step": 2649 }, { "epoch": 0.21467919637070643, "grad_norm": 0.04522144794464111, "learning_rate": 0.00010733090319967598, "loss": 0.4004, "step": 2650 }, { "epoch": 0.21476020738820478, "grad_norm": 0.053900957107543945, "learning_rate": 0.00010737140542729852, "loss": 0.4037, "step": 2651 }, { "epoch": 0.21484121840570317, "grad_norm": 0.04681387543678284, "learning_rate": 0.00010741190765492102, "loss": 0.3747, "step": 2652 }, { "epoch": 0.21492222942320155, "grad_norm": 0.05777775123715401, "learning_rate": 0.00010745240988254354, "loss": 0.3672, "step": 2653 }, { "epoch": 0.21500324044069993, "grad_norm": 0.058414071798324585, "learning_rate": 0.00010749291211016605, "loss": 0.4225, "step": 2654 }, { "epoch": 0.21508425145819832, "grad_norm": 0.03851136565208435, "learning_rate": 0.00010753341433778858, "loss": 0.3504, "step": 2655 }, { "epoch": 0.2151652624756967, "grad_norm": 0.03778860345482826, "learning_rate": 0.00010757391656541109, "loss": 0.3751, "step": 2656 }, { "epoch": 0.21524627349319508, "grad_norm": 0.04218238964676857, "learning_rate": 0.00010761441879303362, "loss": 0.3385, "step": 2657 }, { "epoch": 0.21532728451069347, "grad_norm": 0.04938570782542229, "learning_rate": 0.00010765492102065615, "loss": 0.3746, "step": 2658 }, { "epoch": 0.21540829552819182, "grad_norm": 0.05085742846131325, "learning_rate": 0.00010769542324827866, "loss": 0.3804, "step": 2659 }, { "epoch": 0.2154893065456902, "grad_norm": 0.05732310190796852, "learning_rate": 0.0001077359254759012, "loss": 0.4115, "step": 2660 }, { "epoch": 0.2155703175631886, "grad_norm": 0.055434245616197586, "learning_rate": 0.0001077764277035237, "loss": 0.3893, "step": 2661 }, { "epoch": 0.21565132858068697, "grad_norm": 0.042372528463602066, "learning_rate": 0.00010781692993114623, "loss": 0.3624, "step": 2662 }, { "epoch": 0.21573233959818536, "grad_norm": 0.04685783386230469, "learning_rate": 0.00010785743215876874, "loss": 0.4375, "step": 2663 }, { "epoch": 0.21581335061568374, "grad_norm": 0.04215598478913307, "learning_rate": 0.00010789793438639126, "loss": 0.3682, "step": 2664 }, { "epoch": 0.21589436163318212, "grad_norm": 0.049776118248701096, "learning_rate": 0.00010793843661401376, "loss": 0.3488, "step": 2665 }, { "epoch": 0.21597537265068048, "grad_norm": 0.04941713809967041, "learning_rate": 0.0001079789388416363, "loss": 0.3578, "step": 2666 }, { "epoch": 0.21605638366817886, "grad_norm": 0.04695756733417511, "learning_rate": 0.0001080194410692588, "loss": 0.3845, "step": 2667 }, { "epoch": 0.21613739468567725, "grad_norm": 0.0699956864118576, "learning_rate": 0.00010805994329688134, "loss": 0.4633, "step": 2668 }, { "epoch": 0.21621840570317563, "grad_norm": 0.05027562007308006, "learning_rate": 0.00010810044552450384, "loss": 0.4073, "step": 2669 }, { "epoch": 0.21629941672067401, "grad_norm": 0.043147969990968704, "learning_rate": 0.00010814094775212638, "loss": 0.3883, "step": 2670 }, { "epoch": 0.2163804277381724, "grad_norm": 0.06733971834182739, "learning_rate": 0.00010818144997974888, "loss": 0.3542, "step": 2671 }, { "epoch": 0.21646143875567078, "grad_norm": 0.04482163116335869, "learning_rate": 0.00010822195220737142, "loss": 0.3922, "step": 2672 }, { "epoch": 0.21654244977316914, "grad_norm": 0.04736680909991264, "learning_rate": 0.00010826245443499392, "loss": 0.3624, "step": 2673 }, { "epoch": 0.21662346079066752, "grad_norm": 0.04092049226164818, "learning_rate": 0.00010830295666261646, "loss": 0.3695, "step": 2674 }, { "epoch": 0.2167044718081659, "grad_norm": 0.048764027655124664, "learning_rate": 0.00010834345889023896, "loss": 0.3841, "step": 2675 }, { "epoch": 0.2167854828256643, "grad_norm": 0.04987451061606407, "learning_rate": 0.00010838396111786148, "loss": 0.4004, "step": 2676 }, { "epoch": 0.21686649384316267, "grad_norm": 0.044140033423900604, "learning_rate": 0.00010842446334548401, "loss": 0.3409, "step": 2677 }, { "epoch": 0.21694750486066106, "grad_norm": 0.05619576945900917, "learning_rate": 0.00010846496557310652, "loss": 0.4316, "step": 2678 }, { "epoch": 0.21702851587815944, "grad_norm": 0.05569642409682274, "learning_rate": 0.00010850546780072905, "loss": 0.343, "step": 2679 }, { "epoch": 0.21710952689565782, "grad_norm": 0.04952612519264221, "learning_rate": 0.00010854597002835156, "loss": 0.3723, "step": 2680 }, { "epoch": 0.21719053791315618, "grad_norm": 0.050588030368089676, "learning_rate": 0.0001085864722559741, "loss": 0.401, "step": 2681 }, { "epoch": 0.21727154893065456, "grad_norm": 0.036035146564245224, "learning_rate": 0.0001086269744835966, "loss": 0.3753, "step": 2682 }, { "epoch": 0.21735255994815295, "grad_norm": 0.04765566438436508, "learning_rate": 0.00010866747671121913, "loss": 0.4035, "step": 2683 }, { "epoch": 0.21743357096565133, "grad_norm": 0.04165409505367279, "learning_rate": 0.00010870797893884164, "loss": 0.459, "step": 2684 }, { "epoch": 0.2175145819831497, "grad_norm": 0.05602968856692314, "learning_rate": 0.00010874848116646417, "loss": 0.3788, "step": 2685 }, { "epoch": 0.2175955930006481, "grad_norm": 0.04455839842557907, "learning_rate": 0.00010878898339408668, "loss": 0.3706, "step": 2686 }, { "epoch": 0.21767660401814648, "grad_norm": 0.048621680587530136, "learning_rate": 0.0001088294856217092, "loss": 0.4115, "step": 2687 }, { "epoch": 0.21775761503564484, "grad_norm": 0.04829535260796547, "learning_rate": 0.00010886998784933172, "loss": 0.4108, "step": 2688 }, { "epoch": 0.21783862605314322, "grad_norm": 0.04750366136431694, "learning_rate": 0.00010891049007695424, "loss": 0.3978, "step": 2689 }, { "epoch": 0.2179196370706416, "grad_norm": 0.050557080656290054, "learning_rate": 0.00010895099230457674, "loss": 0.3803, "step": 2690 }, { "epoch": 0.21800064808814, "grad_norm": 0.04632573202252388, "learning_rate": 0.00010899149453219928, "loss": 0.378, "step": 2691 }, { "epoch": 0.21808165910563837, "grad_norm": 0.054422810673713684, "learning_rate": 0.00010903199675982178, "loss": 0.3912, "step": 2692 }, { "epoch": 0.21816267012313675, "grad_norm": 0.044331423938274384, "learning_rate": 0.00010907249898744432, "loss": 0.3682, "step": 2693 }, { "epoch": 0.21824368114063514, "grad_norm": 0.04680316522717476, "learning_rate": 0.00010911300121506682, "loss": 0.4347, "step": 2694 }, { "epoch": 0.2183246921581335, "grad_norm": 0.0582558698952198, "learning_rate": 0.00010915350344268936, "loss": 0.3916, "step": 2695 }, { "epoch": 0.21840570317563188, "grad_norm": 0.043212030082941055, "learning_rate": 0.00010919400567031189, "loss": 0.3624, "step": 2696 }, { "epoch": 0.21848671419313026, "grad_norm": 0.04562610760331154, "learning_rate": 0.0001092345078979344, "loss": 0.3665, "step": 2697 }, { "epoch": 0.21856772521062864, "grad_norm": 0.03662271425127983, "learning_rate": 0.00010927501012555691, "loss": 0.354, "step": 2698 }, { "epoch": 0.21864873622812703, "grad_norm": 0.043365396559238434, "learning_rate": 0.00010931551235317943, "loss": 0.4155, "step": 2699 }, { "epoch": 0.2187297472456254, "grad_norm": 0.04387480020523071, "learning_rate": 0.00010935601458080195, "loss": 0.3916, "step": 2700 }, { "epoch": 0.2188107582631238, "grad_norm": 0.051516756415367126, "learning_rate": 0.00010939651680842446, "loss": 0.3896, "step": 2701 }, { "epoch": 0.21889176928062218, "grad_norm": 0.04242309555411339, "learning_rate": 0.00010943701903604699, "loss": 0.3803, "step": 2702 }, { "epoch": 0.21897278029812053, "grad_norm": 0.04661116003990173, "learning_rate": 0.0001094775212636695, "loss": 0.3575, "step": 2703 }, { "epoch": 0.21905379131561892, "grad_norm": 0.04585146903991699, "learning_rate": 0.00010951802349129203, "loss": 0.398, "step": 2704 }, { "epoch": 0.2191348023331173, "grad_norm": 0.04462905600667, "learning_rate": 0.00010955852571891454, "loss": 0.3762, "step": 2705 }, { "epoch": 0.21921581335061568, "grad_norm": 0.05082102492451668, "learning_rate": 0.00010959902794653707, "loss": 0.3943, "step": 2706 }, { "epoch": 0.21929682436811407, "grad_norm": 0.05209215730428696, "learning_rate": 0.00010963953017415958, "loss": 0.4001, "step": 2707 }, { "epoch": 0.21937783538561245, "grad_norm": 0.040937576442956924, "learning_rate": 0.00010968003240178211, "loss": 0.3479, "step": 2708 }, { "epoch": 0.21945884640311084, "grad_norm": 0.054162561893463135, "learning_rate": 0.00010972053462940462, "loss": 0.3737, "step": 2709 }, { "epoch": 0.2195398574206092, "grad_norm": 0.04428819939494133, "learning_rate": 0.00010976103685702715, "loss": 0.3754, "step": 2710 }, { "epoch": 0.21962086843810757, "grad_norm": 0.04762909933924675, "learning_rate": 0.00010980153908464966, "loss": 0.359, "step": 2711 }, { "epoch": 0.21970187945560596, "grad_norm": 0.0502607524394989, "learning_rate": 0.00010984204131227218, "loss": 0.3484, "step": 2712 }, { "epoch": 0.21978289047310434, "grad_norm": 0.04703029245138168, "learning_rate": 0.0001098825435398947, "loss": 0.3873, "step": 2713 }, { "epoch": 0.21986390149060273, "grad_norm": 0.036031436175107956, "learning_rate": 0.00010992304576751722, "loss": 0.3486, "step": 2714 }, { "epoch": 0.2199449125081011, "grad_norm": 0.050215840339660645, "learning_rate": 0.00010996354799513975, "loss": 0.3577, "step": 2715 }, { "epoch": 0.2200259235255995, "grad_norm": 0.04181578382849693, "learning_rate": 0.00011000405022276225, "loss": 0.3569, "step": 2716 }, { "epoch": 0.22010693454309785, "grad_norm": 0.048212967813014984, "learning_rate": 0.00011004455245038479, "loss": 0.412, "step": 2717 }, { "epoch": 0.22018794556059623, "grad_norm": 0.040041908621788025, "learning_rate": 0.0001100850546780073, "loss": 0.3738, "step": 2718 }, { "epoch": 0.22026895657809462, "grad_norm": 0.03832246735692024, "learning_rate": 0.00011012555690562983, "loss": 0.3477, "step": 2719 }, { "epoch": 0.220349967595593, "grad_norm": 0.04586288332939148, "learning_rate": 0.00011016605913325233, "loss": 0.3495, "step": 2720 }, { "epoch": 0.22043097861309138, "grad_norm": 0.04519755020737648, "learning_rate": 0.00011020656136087487, "loss": 0.3569, "step": 2721 }, { "epoch": 0.22051198963058977, "grad_norm": 0.04331675544381142, "learning_rate": 0.00011024706358849737, "loss": 0.3464, "step": 2722 }, { "epoch": 0.22059300064808815, "grad_norm": 0.049206074327230453, "learning_rate": 0.00011028756581611989, "loss": 0.3763, "step": 2723 }, { "epoch": 0.22067401166558653, "grad_norm": 0.05203338712453842, "learning_rate": 0.00011032806804374241, "loss": 0.3811, "step": 2724 }, { "epoch": 0.2207550226830849, "grad_norm": 0.05231618136167526, "learning_rate": 0.00011036857027136493, "loss": 0.3838, "step": 2725 }, { "epoch": 0.22083603370058327, "grad_norm": 0.04322638362646103, "learning_rate": 0.00011040907249898744, "loss": 0.3613, "step": 2726 }, { "epoch": 0.22091704471808166, "grad_norm": 0.048019472509622574, "learning_rate": 0.00011044957472660997, "loss": 0.4003, "step": 2727 }, { "epoch": 0.22099805573558004, "grad_norm": 0.05193858593702316, "learning_rate": 0.00011049007695423248, "loss": 0.4298, "step": 2728 }, { "epoch": 0.22107906675307842, "grad_norm": 0.04562699794769287, "learning_rate": 0.00011053057918185501, "loss": 0.4184, "step": 2729 }, { "epoch": 0.2211600777705768, "grad_norm": 0.043055713176727295, "learning_rate": 0.00011057108140947752, "loss": 0.4453, "step": 2730 }, { "epoch": 0.2212410887880752, "grad_norm": 0.042347393929958344, "learning_rate": 0.00011061158363710005, "loss": 0.3841, "step": 2731 }, { "epoch": 0.22132209980557355, "grad_norm": 0.048777904361486435, "learning_rate": 0.00011065208586472256, "loss": 0.3981, "step": 2732 }, { "epoch": 0.22140311082307193, "grad_norm": 0.048668425530195236, "learning_rate": 0.00011069258809234509, "loss": 0.4039, "step": 2733 }, { "epoch": 0.2214841218405703, "grad_norm": 0.052155207842588425, "learning_rate": 0.00011073309031996761, "loss": 0.3668, "step": 2734 }, { "epoch": 0.2215651328580687, "grad_norm": 0.04615851119160652, "learning_rate": 0.00011077359254759013, "loss": 0.3642, "step": 2735 }, { "epoch": 0.22164614387556708, "grad_norm": 0.04468770697712898, "learning_rate": 0.00011081409477521265, "loss": 0.4079, "step": 2736 }, { "epoch": 0.22172715489306546, "grad_norm": 0.05454346165060997, "learning_rate": 0.00011085459700283515, "loss": 0.3461, "step": 2737 }, { "epoch": 0.22180816591056385, "grad_norm": 0.049995217472314835, "learning_rate": 0.00011089509923045769, "loss": 0.3601, "step": 2738 }, { "epoch": 0.2218891769280622, "grad_norm": 0.06284768134355545, "learning_rate": 0.0001109356014580802, "loss": 0.4296, "step": 2739 }, { "epoch": 0.2219701879455606, "grad_norm": 0.04179792478680611, "learning_rate": 0.00011097610368570273, "loss": 0.3439, "step": 2740 }, { "epoch": 0.22205119896305897, "grad_norm": 0.056236620992422104, "learning_rate": 0.00011101660591332523, "loss": 0.3928, "step": 2741 }, { "epoch": 0.22213220998055735, "grad_norm": 0.05494439974427223, "learning_rate": 0.00011105710814094777, "loss": 0.3415, "step": 2742 }, { "epoch": 0.22221322099805574, "grad_norm": 0.04557449743151665, "learning_rate": 0.00011109761036857027, "loss": 0.3638, "step": 2743 }, { "epoch": 0.22229423201555412, "grad_norm": 0.039909373968839645, "learning_rate": 0.0001111381125961928, "loss": 0.3681, "step": 2744 }, { "epoch": 0.2223752430330525, "grad_norm": 0.04111361876130104, "learning_rate": 0.00011117861482381531, "loss": 0.3306, "step": 2745 }, { "epoch": 0.22245625405055086, "grad_norm": 0.05619952082633972, "learning_rate": 0.00011121911705143783, "loss": 0.4013, "step": 2746 }, { "epoch": 0.22253726506804924, "grad_norm": 0.0419950969517231, "learning_rate": 0.00011125961927906035, "loss": 0.3853, "step": 2747 }, { "epoch": 0.22261827608554763, "grad_norm": 0.046850599348545074, "learning_rate": 0.00011130012150668287, "loss": 0.3279, "step": 2748 }, { "epoch": 0.222699287103046, "grad_norm": 0.042228203266859055, "learning_rate": 0.00011134062373430538, "loss": 0.3912, "step": 2749 }, { "epoch": 0.2227802981205444, "grad_norm": 0.0419267974793911, "learning_rate": 0.00011138112596192791, "loss": 0.3359, "step": 2750 }, { "epoch": 0.22286130913804278, "grad_norm": 0.05071806535124779, "learning_rate": 0.00011142162818955042, "loss": 0.4229, "step": 2751 }, { "epoch": 0.22294232015554116, "grad_norm": 0.04096124693751335, "learning_rate": 0.00011146213041717295, "loss": 0.3433, "step": 2752 }, { "epoch": 0.22302333117303955, "grad_norm": 0.043501559644937515, "learning_rate": 0.00011150263264479548, "loss": 0.3647, "step": 2753 }, { "epoch": 0.2231043421905379, "grad_norm": 0.044449783861637115, "learning_rate": 0.00011154313487241799, "loss": 0.4118, "step": 2754 }, { "epoch": 0.22318535320803629, "grad_norm": 0.05392434075474739, "learning_rate": 0.00011158363710004052, "loss": 0.4326, "step": 2755 }, { "epoch": 0.22326636422553467, "grad_norm": 0.051186319440603256, "learning_rate": 0.00011162413932766303, "loss": 0.4005, "step": 2756 }, { "epoch": 0.22334737524303305, "grad_norm": 0.04222441464662552, "learning_rate": 0.00011166464155528555, "loss": 0.3626, "step": 2757 }, { "epoch": 0.22342838626053144, "grad_norm": 0.045014429837465286, "learning_rate": 0.00011170514378290807, "loss": 0.3478, "step": 2758 }, { "epoch": 0.22350939727802982, "grad_norm": 0.05044560879468918, "learning_rate": 0.00011174564601053059, "loss": 0.3998, "step": 2759 }, { "epoch": 0.2235904082955282, "grad_norm": 0.04669976606965065, "learning_rate": 0.00011178614823815309, "loss": 0.3786, "step": 2760 }, { "epoch": 0.22367141931302656, "grad_norm": 0.06014898791909218, "learning_rate": 0.00011182665046577563, "loss": 0.385, "step": 2761 }, { "epoch": 0.22375243033052494, "grad_norm": 0.04031899571418762, "learning_rate": 0.00011186715269339813, "loss": 0.4309, "step": 2762 }, { "epoch": 0.22383344134802333, "grad_norm": 0.052987974137067795, "learning_rate": 0.00011190765492102067, "loss": 0.3789, "step": 2763 }, { "epoch": 0.2239144523655217, "grad_norm": 0.043287429958581924, "learning_rate": 0.00011194815714864317, "loss": 0.3664, "step": 2764 }, { "epoch": 0.2239954633830201, "grad_norm": 0.044800058007240295, "learning_rate": 0.0001119886593762657, "loss": 0.368, "step": 2765 }, { "epoch": 0.22407647440051848, "grad_norm": 0.04879983142018318, "learning_rate": 0.00011202916160388821, "loss": 0.3831, "step": 2766 }, { "epoch": 0.22415748541801686, "grad_norm": 0.04009270668029785, "learning_rate": 0.00011206966383151074, "loss": 0.3842, "step": 2767 }, { "epoch": 0.22423849643551522, "grad_norm": 0.047250065952539444, "learning_rate": 0.00011211016605913325, "loss": 0.4083, "step": 2768 }, { "epoch": 0.2243195074530136, "grad_norm": 0.03695997968316078, "learning_rate": 0.00011215066828675578, "loss": 0.3762, "step": 2769 }, { "epoch": 0.22440051847051198, "grad_norm": 0.04160401225090027, "learning_rate": 0.00011219117051437829, "loss": 0.3743, "step": 2770 }, { "epoch": 0.22448152948801037, "grad_norm": 0.041437141597270966, "learning_rate": 0.00011223167274200081, "loss": 0.3421, "step": 2771 }, { "epoch": 0.22456254050550875, "grad_norm": 0.0401858426630497, "learning_rate": 0.00011227217496962334, "loss": 0.3826, "step": 2772 }, { "epoch": 0.22464355152300713, "grad_norm": 0.06117672845721245, "learning_rate": 0.00011231267719724585, "loss": 0.4798, "step": 2773 }, { "epoch": 0.22472456254050552, "grad_norm": 0.045669130980968475, "learning_rate": 0.00011235317942486838, "loss": 0.403, "step": 2774 }, { "epoch": 0.2248055735580039, "grad_norm": 0.05554332211613655, "learning_rate": 0.00011239368165249089, "loss": 0.3553, "step": 2775 }, { "epoch": 0.22488658457550226, "grad_norm": 0.048306893557310104, "learning_rate": 0.00011243418388011342, "loss": 0.3933, "step": 2776 }, { "epoch": 0.22496759559300064, "grad_norm": 0.046369053423404694, "learning_rate": 0.00011247468610773593, "loss": 0.383, "step": 2777 }, { "epoch": 0.22504860661049902, "grad_norm": 0.0498780757188797, "learning_rate": 0.00011251518833535846, "loss": 0.4161, "step": 2778 }, { "epoch": 0.2251296176279974, "grad_norm": 0.04474148526787758, "learning_rate": 0.00011255569056298097, "loss": 0.3793, "step": 2779 }, { "epoch": 0.2252106286454958, "grad_norm": 0.04136667028069496, "learning_rate": 0.0001125961927906035, "loss": 0.3628, "step": 2780 }, { "epoch": 0.22529163966299418, "grad_norm": 0.040542151778936386, "learning_rate": 0.000112636695018226, "loss": 0.3953, "step": 2781 }, { "epoch": 0.22537265068049256, "grad_norm": 0.041604239493608475, "learning_rate": 0.00011267719724584853, "loss": 0.3645, "step": 2782 }, { "epoch": 0.22545366169799091, "grad_norm": 0.039977479726076126, "learning_rate": 0.00011271769947347105, "loss": 0.3775, "step": 2783 }, { "epoch": 0.2255346727154893, "grad_norm": 0.04203964024782181, "learning_rate": 0.00011275820170109356, "loss": 0.3811, "step": 2784 }, { "epoch": 0.22561568373298768, "grad_norm": 0.042460497468709946, "learning_rate": 0.00011279870392871607, "loss": 0.3768, "step": 2785 }, { "epoch": 0.22569669475048607, "grad_norm": 0.04054595157504082, "learning_rate": 0.0001128392061563386, "loss": 0.4022, "step": 2786 }, { "epoch": 0.22577770576798445, "grad_norm": 0.05155777558684349, "learning_rate": 0.00011287970838396111, "loss": 0.4044, "step": 2787 }, { "epoch": 0.22585871678548283, "grad_norm": 0.037050679326057434, "learning_rate": 0.00011292021061158364, "loss": 0.3806, "step": 2788 }, { "epoch": 0.22593972780298122, "grad_norm": 0.050555843859910965, "learning_rate": 0.00011296071283920615, "loss": 0.468, "step": 2789 }, { "epoch": 0.22602073882047957, "grad_norm": 0.04230424389243126, "learning_rate": 0.00011300121506682868, "loss": 0.3509, "step": 2790 }, { "epoch": 0.22610174983797796, "grad_norm": 0.05011545121669769, "learning_rate": 0.00011304171729445122, "loss": 0.3929, "step": 2791 }, { "epoch": 0.22618276085547634, "grad_norm": 0.04369264468550682, "learning_rate": 0.00011308221952207372, "loss": 0.4351, "step": 2792 }, { "epoch": 0.22626377187297472, "grad_norm": 0.0465107187628746, "learning_rate": 0.00011312272174969624, "loss": 0.3824, "step": 2793 }, { "epoch": 0.2263447828904731, "grad_norm": 0.043736957013607025, "learning_rate": 0.00011316322397731876, "loss": 0.3363, "step": 2794 }, { "epoch": 0.2264257939079715, "grad_norm": 0.056683193892240524, "learning_rate": 0.00011320372620494128, "loss": 0.3998, "step": 2795 }, { "epoch": 0.22650680492546987, "grad_norm": 0.05034268647432327, "learning_rate": 0.00011324422843256379, "loss": 0.4284, "step": 2796 }, { "epoch": 0.22658781594296826, "grad_norm": 0.03823321685194969, "learning_rate": 0.00011328473066018632, "loss": 0.3417, "step": 2797 }, { "epoch": 0.2266688269604666, "grad_norm": 0.040836721658706665, "learning_rate": 0.00011332523288780883, "loss": 0.3479, "step": 2798 }, { "epoch": 0.226749837977965, "grad_norm": 0.044317472726106644, "learning_rate": 0.00011336573511543136, "loss": 0.3861, "step": 2799 }, { "epoch": 0.22683084899546338, "grad_norm": 0.05670579895377159, "learning_rate": 0.00011340623734305387, "loss": 0.4012, "step": 2800 }, { "epoch": 0.22691186001296176, "grad_norm": 0.038431454449892044, "learning_rate": 0.0001134467395706764, "loss": 0.3231, "step": 2801 }, { "epoch": 0.22699287103046015, "grad_norm": 0.04589037969708443, "learning_rate": 0.0001134872417982989, "loss": 0.3879, "step": 2802 }, { "epoch": 0.22707388204795853, "grad_norm": 0.04616350680589676, "learning_rate": 0.00011352774402592144, "loss": 0.3994, "step": 2803 }, { "epoch": 0.22715489306545691, "grad_norm": 0.037241414189338684, "learning_rate": 0.00011356824625354394, "loss": 0.3305, "step": 2804 }, { "epoch": 0.22723590408295527, "grad_norm": 0.037454795092344284, "learning_rate": 0.00011360874848116648, "loss": 0.3414, "step": 2805 }, { "epoch": 0.22731691510045365, "grad_norm": 0.04402243718504906, "learning_rate": 0.00011364925070878898, "loss": 0.3993, "step": 2806 }, { "epoch": 0.22739792611795204, "grad_norm": 0.04333695396780968, "learning_rate": 0.0001136897529364115, "loss": 0.4096, "step": 2807 }, { "epoch": 0.22747893713545042, "grad_norm": 0.04681192710995674, "learning_rate": 0.00011373025516403401, "loss": 0.4094, "step": 2808 }, { "epoch": 0.2275599481529488, "grad_norm": 0.055429939180612564, "learning_rate": 0.00011377075739165654, "loss": 0.4254, "step": 2809 }, { "epoch": 0.2276409591704472, "grad_norm": 0.06147387996315956, "learning_rate": 0.00011381125961927908, "loss": 0.4017, "step": 2810 }, { "epoch": 0.22772197018794557, "grad_norm": 0.04066922888159752, "learning_rate": 0.00011385176184690158, "loss": 0.3356, "step": 2811 }, { "epoch": 0.22780298120544393, "grad_norm": 0.05167434737086296, "learning_rate": 0.00011389226407452412, "loss": 0.3007, "step": 2812 }, { "epoch": 0.2278839922229423, "grad_norm": 0.05443723127245903, "learning_rate": 0.00011393276630214662, "loss": 0.4414, "step": 2813 }, { "epoch": 0.2279650032404407, "grad_norm": 0.044885192066431046, "learning_rate": 0.00011397326852976915, "loss": 0.3484, "step": 2814 }, { "epoch": 0.22804601425793908, "grad_norm": 0.040323249995708466, "learning_rate": 0.00011401377075739166, "loss": 0.3723, "step": 2815 }, { "epoch": 0.22812702527543746, "grad_norm": 0.038147564977407455, "learning_rate": 0.0001140542729850142, "loss": 0.406, "step": 2816 }, { "epoch": 0.22820803629293585, "grad_norm": 0.05138280987739563, "learning_rate": 0.0001140947752126367, "loss": 0.3594, "step": 2817 }, { "epoch": 0.22828904731043423, "grad_norm": 0.0651860162615776, "learning_rate": 0.00011413527744025922, "loss": 0.4064, "step": 2818 }, { "epoch": 0.2283700583279326, "grad_norm": 0.042356159538030624, "learning_rate": 0.00011417577966788173, "loss": 0.3587, "step": 2819 }, { "epoch": 0.22845106934543097, "grad_norm": 0.051314469426870346, "learning_rate": 0.00011421628189550426, "loss": 0.4061, "step": 2820 }, { "epoch": 0.22853208036292935, "grad_norm": 0.05304783210158348, "learning_rate": 0.00011425678412312677, "loss": 0.4175, "step": 2821 }, { "epoch": 0.22861309138042774, "grad_norm": 0.04307013750076294, "learning_rate": 0.0001142972863507493, "loss": 0.405, "step": 2822 }, { "epoch": 0.22869410239792612, "grad_norm": 0.04428693279623985, "learning_rate": 0.0001143377885783718, "loss": 0.3473, "step": 2823 }, { "epoch": 0.2287751134154245, "grad_norm": 0.04833827540278435, "learning_rate": 0.00011437829080599434, "loss": 0.3711, "step": 2824 }, { "epoch": 0.22885612443292289, "grad_norm": 0.04875127598643303, "learning_rate": 0.00011441879303361684, "loss": 0.3986, "step": 2825 }, { "epoch": 0.22893713545042127, "grad_norm": 0.04338555783033371, "learning_rate": 0.00011445929526123938, "loss": 0.3913, "step": 2826 }, { "epoch": 0.22901814646791963, "grad_norm": 0.03709283098578453, "learning_rate": 0.00011449979748886188, "loss": 0.3625, "step": 2827 }, { "epoch": 0.229099157485418, "grad_norm": 0.040035828948020935, "learning_rate": 0.00011454029971648442, "loss": 0.3766, "step": 2828 }, { "epoch": 0.2291801685029164, "grad_norm": 0.045361414551734924, "learning_rate": 0.00011458080194410694, "loss": 0.3784, "step": 2829 }, { "epoch": 0.22926117952041478, "grad_norm": 0.05100679025053978, "learning_rate": 0.00011462130417172944, "loss": 0.3821, "step": 2830 }, { "epoch": 0.22934219053791316, "grad_norm": 0.04482673481106758, "learning_rate": 0.00011466180639935198, "loss": 0.3936, "step": 2831 }, { "epoch": 0.22942320155541154, "grad_norm": 0.04993167892098427, "learning_rate": 0.00011470230862697448, "loss": 0.4575, "step": 2832 }, { "epoch": 0.22950421257290993, "grad_norm": 0.04634513333439827, "learning_rate": 0.00011474281085459702, "loss": 0.3786, "step": 2833 }, { "epoch": 0.22958522359040828, "grad_norm": 0.044133465737104416, "learning_rate": 0.00011478331308221952, "loss": 0.4102, "step": 2834 }, { "epoch": 0.22966623460790667, "grad_norm": 0.056882306933403015, "learning_rate": 0.00011482381530984205, "loss": 0.3638, "step": 2835 }, { "epoch": 0.22974724562540505, "grad_norm": 0.05236594006419182, "learning_rate": 0.00011486431753746456, "loss": 0.361, "step": 2836 }, { "epoch": 0.22982825664290343, "grad_norm": 0.049694813787937164, "learning_rate": 0.0001149048197650871, "loss": 0.4428, "step": 2837 }, { "epoch": 0.22990926766040182, "grad_norm": 0.04660965874791145, "learning_rate": 0.0001149453219927096, "loss": 0.4022, "step": 2838 }, { "epoch": 0.2299902786779002, "grad_norm": 0.05099980905652046, "learning_rate": 0.00011498582422033213, "loss": 0.4409, "step": 2839 }, { "epoch": 0.23007128969539858, "grad_norm": 0.051555100828409195, "learning_rate": 0.00011502632644795464, "loss": 0.3945, "step": 2840 }, { "epoch": 0.23015230071289697, "grad_norm": 0.037023525685071945, "learning_rate": 0.00011506682867557716, "loss": 0.3314, "step": 2841 }, { "epoch": 0.23023331173039532, "grad_norm": 0.0383535772562027, "learning_rate": 0.00011510733090319968, "loss": 0.3609, "step": 2842 }, { "epoch": 0.2303143227478937, "grad_norm": 0.03986530005931854, "learning_rate": 0.0001151478331308222, "loss": 0.381, "step": 2843 }, { "epoch": 0.2303953337653921, "grad_norm": 0.045942630618810654, "learning_rate": 0.0001151883353584447, "loss": 0.4098, "step": 2844 }, { "epoch": 0.23047634478289047, "grad_norm": 0.04153173789381981, "learning_rate": 0.00011522883758606724, "loss": 0.3453, "step": 2845 }, { "epoch": 0.23055735580038886, "grad_norm": 0.04518669471144676, "learning_rate": 0.00011526933981368977, "loss": 0.3895, "step": 2846 }, { "epoch": 0.23063836681788724, "grad_norm": 0.0531323216855526, "learning_rate": 0.00011530984204131228, "loss": 0.3566, "step": 2847 }, { "epoch": 0.23071937783538563, "grad_norm": 0.04103211313486099, "learning_rate": 0.00011535034426893481, "loss": 0.3482, "step": 2848 }, { "epoch": 0.23080038885288398, "grad_norm": 0.037041306495666504, "learning_rate": 0.00011539084649655732, "loss": 0.3532, "step": 2849 }, { "epoch": 0.23088139987038236, "grad_norm": 0.04363298416137695, "learning_rate": 0.00011543134872417985, "loss": 0.3321, "step": 2850 }, { "epoch": 0.23096241088788075, "grad_norm": 0.04319039359688759, "learning_rate": 0.00011547185095180236, "loss": 0.3919, "step": 2851 }, { "epoch": 0.23104342190537913, "grad_norm": 0.044030141085386276, "learning_rate": 0.00011551235317942488, "loss": 0.3925, "step": 2852 }, { "epoch": 0.23112443292287752, "grad_norm": 0.04883784055709839, "learning_rate": 0.0001155528554070474, "loss": 0.3755, "step": 2853 }, { "epoch": 0.2312054439403759, "grad_norm": 0.042822521179914474, "learning_rate": 0.00011559335763466991, "loss": 0.4036, "step": 2854 }, { "epoch": 0.23128645495787428, "grad_norm": 0.039836104959249496, "learning_rate": 0.00011563385986229242, "loss": 0.4299, "step": 2855 }, { "epoch": 0.23136746597537264, "grad_norm": 0.05859369412064552, "learning_rate": 0.00011567436208991495, "loss": 0.3835, "step": 2856 }, { "epoch": 0.23144847699287102, "grad_norm": 0.039807382971048355, "learning_rate": 0.00011571486431753746, "loss": 0.3918, "step": 2857 }, { "epoch": 0.2315294880103694, "grad_norm": 0.04248964414000511, "learning_rate": 0.00011575536654516, "loss": 0.3577, "step": 2858 }, { "epoch": 0.2316104990278678, "grad_norm": 0.04504331946372986, "learning_rate": 0.0001157958687727825, "loss": 0.3758, "step": 2859 }, { "epoch": 0.23169151004536617, "grad_norm": 0.04224937781691551, "learning_rate": 0.00011583637100040503, "loss": 0.3899, "step": 2860 }, { "epoch": 0.23177252106286456, "grad_norm": 0.04513216391205788, "learning_rate": 0.00011587687322802754, "loss": 0.4145, "step": 2861 }, { "epoch": 0.23185353208036294, "grad_norm": 0.05136607587337494, "learning_rate": 0.00011591737545565007, "loss": 0.4014, "step": 2862 }, { "epoch": 0.2319345430978613, "grad_norm": 0.042943354696035385, "learning_rate": 0.00011595787768327258, "loss": 0.353, "step": 2863 }, { "epoch": 0.23201555411535968, "grad_norm": 0.04254595562815666, "learning_rate": 0.00011599837991089511, "loss": 0.3895, "step": 2864 }, { "epoch": 0.23209656513285806, "grad_norm": 0.039488524198532104, "learning_rate": 0.00011603888213851763, "loss": 0.3684, "step": 2865 }, { "epoch": 0.23217757615035645, "grad_norm": 0.03824709355831146, "learning_rate": 0.00011607938436614014, "loss": 0.3316, "step": 2866 }, { "epoch": 0.23225858716785483, "grad_norm": 0.04050895944237709, "learning_rate": 0.00011611988659376267, "loss": 0.3971, "step": 2867 }, { "epoch": 0.2323395981853532, "grad_norm": 0.04249674081802368, "learning_rate": 0.00011616038882138518, "loss": 0.371, "step": 2868 }, { "epoch": 0.2324206092028516, "grad_norm": 0.04510214179754257, "learning_rate": 0.00011620089104900771, "loss": 0.3866, "step": 2869 }, { "epoch": 0.23250162022034998, "grad_norm": 0.048988454043865204, "learning_rate": 0.00011624139327663022, "loss": 0.3157, "step": 2870 }, { "epoch": 0.23258263123784834, "grad_norm": 0.03871070221066475, "learning_rate": 0.00011628189550425275, "loss": 0.3896, "step": 2871 }, { "epoch": 0.23266364225534672, "grad_norm": 0.04526352509856224, "learning_rate": 0.00011632239773187526, "loss": 0.3846, "step": 2872 }, { "epoch": 0.2327446532728451, "grad_norm": 0.049733031541109085, "learning_rate": 0.00011636289995949779, "loss": 0.3702, "step": 2873 }, { "epoch": 0.2328256642903435, "grad_norm": 0.059630136936903, "learning_rate": 0.0001164034021871203, "loss": 0.3536, "step": 2874 }, { "epoch": 0.23290667530784187, "grad_norm": 0.05297769978642464, "learning_rate": 0.00011644390441474283, "loss": 0.3572, "step": 2875 }, { "epoch": 0.23298768632534025, "grad_norm": 0.0500696562230587, "learning_rate": 0.00011648440664236533, "loss": 0.3934, "step": 2876 }, { "epoch": 0.23306869734283864, "grad_norm": 0.045815713703632355, "learning_rate": 0.00011652490886998785, "loss": 0.4009, "step": 2877 }, { "epoch": 0.233149708360337, "grad_norm": 0.044900525361299515, "learning_rate": 0.00011656541109761037, "loss": 0.3715, "step": 2878 }, { "epoch": 0.23323071937783538, "grad_norm": 0.05272326618432999, "learning_rate": 0.00011660591332523289, "loss": 0.3457, "step": 2879 }, { "epoch": 0.23331173039533376, "grad_norm": 0.05682436376810074, "learning_rate": 0.0001166464155528554, "loss": 0.3678, "step": 2880 }, { "epoch": 0.23339274141283214, "grad_norm": 0.05235380306839943, "learning_rate": 0.00011668691778047793, "loss": 0.3432, "step": 2881 }, { "epoch": 0.23347375243033053, "grad_norm": 0.04751413315534592, "learning_rate": 0.00011672742000810044, "loss": 0.3745, "step": 2882 }, { "epoch": 0.2335547634478289, "grad_norm": 0.06046335771679878, "learning_rate": 0.00011676792223572297, "loss": 0.4513, "step": 2883 }, { "epoch": 0.2336357744653273, "grad_norm": 0.04304825887084007, "learning_rate": 0.0001168084244633455, "loss": 0.3731, "step": 2884 }, { "epoch": 0.23371678548282565, "grad_norm": 0.04588375613093376, "learning_rate": 0.00011684892669096801, "loss": 0.4134, "step": 2885 }, { "epoch": 0.23379779650032403, "grad_norm": 0.046558551490306854, "learning_rate": 0.00011688942891859054, "loss": 0.4054, "step": 2886 }, { "epoch": 0.23387880751782242, "grad_norm": 0.05705951899290085, "learning_rate": 0.00011692993114621305, "loss": 0.369, "step": 2887 }, { "epoch": 0.2339598185353208, "grad_norm": 0.04509139806032181, "learning_rate": 0.00011697043337383557, "loss": 0.3661, "step": 2888 }, { "epoch": 0.23404082955281919, "grad_norm": 0.052528828382492065, "learning_rate": 0.00011701093560145808, "loss": 0.3595, "step": 2889 }, { "epoch": 0.23412184057031757, "grad_norm": 0.04576665163040161, "learning_rate": 0.00011705143782908061, "loss": 0.3703, "step": 2890 }, { "epoch": 0.23420285158781595, "grad_norm": 0.04580291360616684, "learning_rate": 0.00011709194005670312, "loss": 0.3458, "step": 2891 }, { "epoch": 0.23428386260531434, "grad_norm": 0.06401868909597397, "learning_rate": 0.00011713244228432565, "loss": 0.3658, "step": 2892 }, { "epoch": 0.2343648736228127, "grad_norm": 0.03928394988179207, "learning_rate": 0.00011717294451194815, "loss": 0.3226, "step": 2893 }, { "epoch": 0.23444588464031108, "grad_norm": 0.0432765930891037, "learning_rate": 0.00011721344673957069, "loss": 0.3945, "step": 2894 }, { "epoch": 0.23452689565780946, "grad_norm": 0.040293481200933456, "learning_rate": 0.0001172539489671932, "loss": 0.3825, "step": 2895 }, { "epoch": 0.23460790667530784, "grad_norm": 0.0494161918759346, "learning_rate": 0.00011729445119481573, "loss": 0.3802, "step": 2896 }, { "epoch": 0.23468891769280623, "grad_norm": 0.03994528949260712, "learning_rate": 0.00011733495342243823, "loss": 0.3772, "step": 2897 }, { "epoch": 0.2347699287103046, "grad_norm": 0.05831955000758171, "learning_rate": 0.00011737545565006077, "loss": 0.3916, "step": 2898 }, { "epoch": 0.234850939727803, "grad_norm": 0.04238951951265335, "learning_rate": 0.00011741595787768327, "loss": 0.359, "step": 2899 }, { "epoch": 0.23493195074530135, "grad_norm": 0.05210850015282631, "learning_rate": 0.00011745646010530579, "loss": 0.381, "step": 2900 }, { "epoch": 0.23501296176279973, "grad_norm": 0.0391576923429966, "learning_rate": 0.00011749696233292831, "loss": 0.3605, "step": 2901 }, { "epoch": 0.23509397278029812, "grad_norm": 0.05010760948061943, "learning_rate": 0.00011753746456055083, "loss": 0.3643, "step": 2902 }, { "epoch": 0.2351749837977965, "grad_norm": 0.05513544753193855, "learning_rate": 0.00011757796678817336, "loss": 0.4352, "step": 2903 }, { "epoch": 0.23525599481529488, "grad_norm": 0.04464849829673767, "learning_rate": 0.00011761846901579587, "loss": 0.3811, "step": 2904 }, { "epoch": 0.23533700583279327, "grad_norm": 0.04738510400056839, "learning_rate": 0.0001176589712434184, "loss": 0.4235, "step": 2905 }, { "epoch": 0.23541801685029165, "grad_norm": 0.04617168754339218, "learning_rate": 0.00011769947347104091, "loss": 0.395, "step": 2906 }, { "epoch": 0.23549902786779, "grad_norm": 0.05911537632346153, "learning_rate": 0.00011773997569866344, "loss": 0.3439, "step": 2907 }, { "epoch": 0.2355800388852884, "grad_norm": 0.0571928471326828, "learning_rate": 0.00011778047792628595, "loss": 0.3754, "step": 2908 }, { "epoch": 0.23566104990278677, "grad_norm": 0.04202379658818245, "learning_rate": 0.00011782098015390848, "loss": 0.3798, "step": 2909 }, { "epoch": 0.23574206092028516, "grad_norm": 0.04209866374731064, "learning_rate": 0.00011786148238153099, "loss": 0.376, "step": 2910 }, { "epoch": 0.23582307193778354, "grad_norm": 0.045575957745313644, "learning_rate": 0.00011790198460915351, "loss": 0.4286, "step": 2911 }, { "epoch": 0.23590408295528192, "grad_norm": 0.03941948339343071, "learning_rate": 0.00011794248683677603, "loss": 0.3605, "step": 2912 }, { "epoch": 0.2359850939727803, "grad_norm": 0.04388366639614105, "learning_rate": 0.00011798298906439855, "loss": 0.3692, "step": 2913 }, { "epoch": 0.2360661049902787, "grad_norm": 0.0419548898935318, "learning_rate": 0.00011802349129202105, "loss": 0.3752, "step": 2914 }, { "epoch": 0.23614711600777705, "grad_norm": 0.048258326947689056, "learning_rate": 0.00011806399351964359, "loss": 0.3246, "step": 2915 }, { "epoch": 0.23622812702527543, "grad_norm": 0.043853510171175, "learning_rate": 0.0001181044957472661, "loss": 0.4175, "step": 2916 }, { "epoch": 0.23630913804277381, "grad_norm": 0.034705597907304764, "learning_rate": 0.00011814499797488863, "loss": 0.4249, "step": 2917 }, { "epoch": 0.2363901490602722, "grad_norm": 0.04022218659520149, "learning_rate": 0.00011818550020251113, "loss": 0.3845, "step": 2918 }, { "epoch": 0.23647116007777058, "grad_norm": 0.04778154939413071, "learning_rate": 0.00011822600243013367, "loss": 0.3716, "step": 2919 }, { "epoch": 0.23655217109526896, "grad_norm": 0.04581722244620323, "learning_rate": 0.00011826650465775617, "loss": 0.3863, "step": 2920 }, { "epoch": 0.23663318211276735, "grad_norm": 0.045739613473415375, "learning_rate": 0.0001183070068853787, "loss": 0.4155, "step": 2921 }, { "epoch": 0.2367141931302657, "grad_norm": 0.0473037026822567, "learning_rate": 0.00011834750911300122, "loss": 0.3698, "step": 2922 }, { "epoch": 0.2367952041477641, "grad_norm": 0.0326833575963974, "learning_rate": 0.00011838801134062374, "loss": 0.3233, "step": 2923 }, { "epoch": 0.23687621516526247, "grad_norm": 0.0429866723716259, "learning_rate": 0.00011842851356824626, "loss": 0.4209, "step": 2924 }, { "epoch": 0.23695722618276086, "grad_norm": 0.03465301916003227, "learning_rate": 0.00011846901579586877, "loss": 0.3538, "step": 2925 }, { "epoch": 0.23703823720025924, "grad_norm": 0.03621894493699074, "learning_rate": 0.0001185095180234913, "loss": 0.3601, "step": 2926 }, { "epoch": 0.23711924821775762, "grad_norm": 0.04385453090071678, "learning_rate": 0.00011855002025111381, "loss": 0.3191, "step": 2927 }, { "epoch": 0.237200259235256, "grad_norm": 0.041872210800647736, "learning_rate": 0.00011859052247873634, "loss": 0.3737, "step": 2928 }, { "epoch": 0.23728127025275436, "grad_norm": 0.043836288154125214, "learning_rate": 0.00011863102470635885, "loss": 0.3315, "step": 2929 }, { "epoch": 0.23736228127025275, "grad_norm": 0.05111682042479515, "learning_rate": 0.00011867152693398138, "loss": 0.3472, "step": 2930 }, { "epoch": 0.23744329228775113, "grad_norm": 0.045502860099077225, "learning_rate": 0.00011871202916160389, "loss": 0.3504, "step": 2931 }, { "epoch": 0.2375243033052495, "grad_norm": 0.050450120121240616, "learning_rate": 0.00011875253138922642, "loss": 0.4085, "step": 2932 }, { "epoch": 0.2376053143227479, "grad_norm": 0.0447453111410141, "learning_rate": 0.00011879303361684893, "loss": 0.3852, "step": 2933 }, { "epoch": 0.23768632534024628, "grad_norm": 0.03878478333353996, "learning_rate": 0.00011883353584447146, "loss": 0.2904, "step": 2934 }, { "epoch": 0.23776733635774466, "grad_norm": 0.04184164106845856, "learning_rate": 0.00011887403807209397, "loss": 0.3481, "step": 2935 }, { "epoch": 0.23784834737524305, "grad_norm": 0.04044514149427414, "learning_rate": 0.00011891454029971649, "loss": 0.4507, "step": 2936 }, { "epoch": 0.2379293583927414, "grad_norm": 0.044943299144506454, "learning_rate": 0.000118955042527339, "loss": 0.3847, "step": 2937 }, { "epoch": 0.23801036941023979, "grad_norm": 0.04254603385925293, "learning_rate": 0.00011899554475496153, "loss": 0.3852, "step": 2938 }, { "epoch": 0.23809138042773817, "grad_norm": 0.042455434799194336, "learning_rate": 0.00011903604698258403, "loss": 0.3802, "step": 2939 }, { "epoch": 0.23817239144523655, "grad_norm": 0.04601847007870674, "learning_rate": 0.00011907654921020657, "loss": 0.3877, "step": 2940 }, { "epoch": 0.23825340246273494, "grad_norm": 0.05261930823326111, "learning_rate": 0.0001191170514378291, "loss": 0.3801, "step": 2941 }, { "epoch": 0.23833441348023332, "grad_norm": 0.06650017946958542, "learning_rate": 0.0001191575536654516, "loss": 0.4182, "step": 2942 }, { "epoch": 0.2384154244977317, "grad_norm": 0.04565853625535965, "learning_rate": 0.00011919805589307414, "loss": 0.3418, "step": 2943 }, { "epoch": 0.23849643551523006, "grad_norm": 0.04410654678940773, "learning_rate": 0.00011923855812069664, "loss": 0.4302, "step": 2944 }, { "epoch": 0.23857744653272844, "grad_norm": 0.04739765822887421, "learning_rate": 0.00011927906034831918, "loss": 0.4081, "step": 2945 }, { "epoch": 0.23865845755022683, "grad_norm": 0.04107481986284256, "learning_rate": 0.00011931956257594168, "loss": 0.4017, "step": 2946 }, { "epoch": 0.2387394685677252, "grad_norm": 0.04466596618294716, "learning_rate": 0.0001193600648035642, "loss": 0.3583, "step": 2947 }, { "epoch": 0.2388204795852236, "grad_norm": 0.04515571519732475, "learning_rate": 0.00011940056703118672, "loss": 0.3896, "step": 2948 }, { "epoch": 0.23890149060272198, "grad_norm": 0.045190826058387756, "learning_rate": 0.00011944106925880924, "loss": 0.3659, "step": 2949 }, { "epoch": 0.23898250162022036, "grad_norm": 0.041392821818590164, "learning_rate": 0.00011948157148643175, "loss": 0.4046, "step": 2950 }, { "epoch": 0.23906351263771872, "grad_norm": 0.04082554206252098, "learning_rate": 0.00011952207371405428, "loss": 0.4052, "step": 2951 }, { "epoch": 0.2391445236552171, "grad_norm": 0.048217855393886566, "learning_rate": 0.00011956257594167679, "loss": 0.3632, "step": 2952 }, { "epoch": 0.23922553467271548, "grad_norm": 0.04387287795543671, "learning_rate": 0.00011960307816929932, "loss": 0.3655, "step": 2953 }, { "epoch": 0.23930654569021387, "grad_norm": 0.04062773659825325, "learning_rate": 0.00011964358039692183, "loss": 0.3817, "step": 2954 }, { "epoch": 0.23938755670771225, "grad_norm": 0.04514176398515701, "learning_rate": 0.00011968408262454436, "loss": 0.3887, "step": 2955 }, { "epoch": 0.23946856772521063, "grad_norm": 0.04475953057408333, "learning_rate": 0.00011972458485216687, "loss": 0.4148, "step": 2956 }, { "epoch": 0.23954957874270902, "grad_norm": 0.042864445596933365, "learning_rate": 0.0001197650870797894, "loss": 0.3809, "step": 2957 }, { "epoch": 0.2396305897602074, "grad_norm": 0.04250786826014519, "learning_rate": 0.0001198055893074119, "loss": 0.398, "step": 2958 }, { "epoch": 0.23971160077770576, "grad_norm": 0.04754723981022835, "learning_rate": 0.00011984609153503444, "loss": 0.4056, "step": 2959 }, { "epoch": 0.23979261179520414, "grad_norm": 0.04678526893258095, "learning_rate": 0.00011988659376265696, "loss": 0.3673, "step": 2960 }, { "epoch": 0.23987362281270252, "grad_norm": 0.045734815299510956, "learning_rate": 0.00011992709599027946, "loss": 0.3905, "step": 2961 }, { "epoch": 0.2399546338302009, "grad_norm": 0.038129664957523346, "learning_rate": 0.000119967598217902, "loss": 0.3594, "step": 2962 }, { "epoch": 0.2400356448476993, "grad_norm": 0.04983527213335037, "learning_rate": 0.0001200081004455245, "loss": 0.3623, "step": 2963 }, { "epoch": 0.24011665586519768, "grad_norm": 0.04684574529528618, "learning_rate": 0.00012004860267314704, "loss": 0.4494, "step": 2964 }, { "epoch": 0.24019766688269606, "grad_norm": 0.04936652630567551, "learning_rate": 0.00012008910490076954, "loss": 0.4044, "step": 2965 }, { "epoch": 0.24027867790019442, "grad_norm": 0.04301603138446808, "learning_rate": 0.00012012960712839208, "loss": 0.3792, "step": 2966 }, { "epoch": 0.2403596889176928, "grad_norm": 0.044206999242305756, "learning_rate": 0.00012017010935601458, "loss": 0.4301, "step": 2967 }, { "epoch": 0.24044069993519118, "grad_norm": 0.04447254166007042, "learning_rate": 0.00012021061158363712, "loss": 0.3812, "step": 2968 }, { "epoch": 0.24052171095268957, "grad_norm": 0.038800474256277084, "learning_rate": 0.00012025111381125962, "loss": 0.4229, "step": 2969 }, { "epoch": 0.24060272197018795, "grad_norm": 0.04933485761284828, "learning_rate": 0.00012029161603888216, "loss": 0.3904, "step": 2970 }, { "epoch": 0.24068373298768633, "grad_norm": 0.04177100211381912, "learning_rate": 0.00012033211826650466, "loss": 0.3652, "step": 2971 }, { "epoch": 0.24076474400518472, "grad_norm": 0.04116995632648468, "learning_rate": 0.00012037262049412718, "loss": 0.3841, "step": 2972 }, { "epoch": 0.24084575502268307, "grad_norm": 0.04211720451712608, "learning_rate": 0.00012041312272174969, "loss": 0.3557, "step": 2973 }, { "epoch": 0.24092676604018146, "grad_norm": 0.04911576583981514, "learning_rate": 0.00012045362494937222, "loss": 0.373, "step": 2974 }, { "epoch": 0.24100777705767984, "grad_norm": 0.04702078551054001, "learning_rate": 0.00012049412717699473, "loss": 0.4155, "step": 2975 }, { "epoch": 0.24108878807517822, "grad_norm": 0.03862113133072853, "learning_rate": 0.00012053462940461726, "loss": 0.3473, "step": 2976 }, { "epoch": 0.2411697990926766, "grad_norm": 0.037792373448610306, "learning_rate": 0.00012057513163223977, "loss": 0.3565, "step": 2977 }, { "epoch": 0.241250810110175, "grad_norm": 0.0489872545003891, "learning_rate": 0.0001206156338598623, "loss": 0.4076, "step": 2978 }, { "epoch": 0.24133182112767337, "grad_norm": 0.05115858465433121, "learning_rate": 0.00012065613608748483, "loss": 0.3641, "step": 2979 }, { "epoch": 0.24141283214517173, "grad_norm": 0.04305893927812576, "learning_rate": 0.00012069663831510734, "loss": 0.4047, "step": 2980 }, { "epoch": 0.2414938431626701, "grad_norm": 0.05037764459848404, "learning_rate": 0.00012073714054272987, "loss": 0.3834, "step": 2981 }, { "epoch": 0.2415748541801685, "grad_norm": 0.03909236937761307, "learning_rate": 0.00012077764277035238, "loss": 0.3717, "step": 2982 }, { "epoch": 0.24165586519766688, "grad_norm": 0.041752856224775314, "learning_rate": 0.0001208181449979749, "loss": 0.3594, "step": 2983 }, { "epoch": 0.24173687621516526, "grad_norm": 0.050167832523584366, "learning_rate": 0.0001208586472255974, "loss": 0.3732, "step": 2984 }, { "epoch": 0.24181788723266365, "grad_norm": 0.03796180710196495, "learning_rate": 0.00012089914945321994, "loss": 0.3425, "step": 2985 }, { "epoch": 0.24189889825016203, "grad_norm": 0.04057871922850609, "learning_rate": 0.00012093965168084244, "loss": 0.3774, "step": 2986 }, { "epoch": 0.24197990926766041, "grad_norm": 0.04229074716567993, "learning_rate": 0.00012098015390846498, "loss": 0.3542, "step": 2987 }, { "epoch": 0.24206092028515877, "grad_norm": 0.03807763010263443, "learning_rate": 0.00012102065613608748, "loss": 0.3493, "step": 2988 }, { "epoch": 0.24214193130265715, "grad_norm": 0.0359298512339592, "learning_rate": 0.00012106115836371002, "loss": 0.3518, "step": 2989 }, { "epoch": 0.24222294232015554, "grad_norm": 0.04807816445827484, "learning_rate": 0.00012110166059133252, "loss": 0.3222, "step": 2990 }, { "epoch": 0.24230395333765392, "grad_norm": 0.04524649307131767, "learning_rate": 0.00012114216281895505, "loss": 0.3751, "step": 2991 }, { "epoch": 0.2423849643551523, "grad_norm": 0.0531078465282917, "learning_rate": 0.00012118266504657756, "loss": 0.3387, "step": 2992 }, { "epoch": 0.2424659753726507, "grad_norm": 0.044817257672548294, "learning_rate": 0.0001212231672742001, "loss": 0.3881, "step": 2993 }, { "epoch": 0.24254698639014907, "grad_norm": 0.046434782445430756, "learning_rate": 0.0001212636695018226, "loss": 0.4195, "step": 2994 }, { "epoch": 0.24262799740764743, "grad_norm": 0.047300081700086594, "learning_rate": 0.00012130417172944512, "loss": 0.3725, "step": 2995 }, { "epoch": 0.2427090084251458, "grad_norm": 0.03659592196345329, "learning_rate": 0.00012134467395706764, "loss": 0.3658, "step": 2996 }, { "epoch": 0.2427900194426442, "grad_norm": 0.04757637158036232, "learning_rate": 0.00012138517618469016, "loss": 0.3528, "step": 2997 }, { "epoch": 0.24287103046014258, "grad_norm": 0.035950496792793274, "learning_rate": 0.00012142567841231269, "loss": 0.3559, "step": 2998 }, { "epoch": 0.24295204147764096, "grad_norm": 0.050411324948072433, "learning_rate": 0.0001214661806399352, "loss": 0.4086, "step": 2999 }, { "epoch": 0.24303305249513935, "grad_norm": 0.05020357668399811, "learning_rate": 0.00012150668286755773, "loss": 0.3628, "step": 3000 }, { "epoch": 0.24311406351263773, "grad_norm": 0.04406118765473366, "learning_rate": 0.00012154718509518024, "loss": 0.3785, "step": 3001 }, { "epoch": 0.24319507453013609, "grad_norm": 0.04498102515935898, "learning_rate": 0.00012158768732280277, "loss": 0.355, "step": 3002 }, { "epoch": 0.24327608554763447, "grad_norm": 0.043865758925676346, "learning_rate": 0.00012162818955042528, "loss": 0.3933, "step": 3003 }, { "epoch": 0.24335709656513285, "grad_norm": 0.03579156845808029, "learning_rate": 0.00012166869177804781, "loss": 0.3243, "step": 3004 }, { "epoch": 0.24343810758263124, "grad_norm": 0.046793438494205475, "learning_rate": 0.00012170919400567032, "loss": 0.4052, "step": 3005 }, { "epoch": 0.24351911860012962, "grad_norm": 0.04385807737708092, "learning_rate": 0.00012174969623329284, "loss": 0.3982, "step": 3006 }, { "epoch": 0.243600129617628, "grad_norm": 0.044661667197942734, "learning_rate": 0.00012179019846091536, "loss": 0.3769, "step": 3007 }, { "epoch": 0.2436811406351264, "grad_norm": 0.04510461539030075, "learning_rate": 0.00012183070068853788, "loss": 0.3756, "step": 3008 }, { "epoch": 0.24376215165262477, "grad_norm": 0.03899111598730087, "learning_rate": 0.00012187120291616038, "loss": 0.3859, "step": 3009 }, { "epoch": 0.24384316267012313, "grad_norm": 0.04161735624074936, "learning_rate": 0.00012191170514378292, "loss": 0.3634, "step": 3010 }, { "epoch": 0.2439241736876215, "grad_norm": 0.046930018812417984, "learning_rate": 0.00012195220737140542, "loss": 0.3986, "step": 3011 }, { "epoch": 0.2440051847051199, "grad_norm": 0.04540453106164932, "learning_rate": 0.00012199270959902795, "loss": 0.4051, "step": 3012 }, { "epoch": 0.24408619572261828, "grad_norm": 0.04622745141386986, "learning_rate": 0.00012203321182665046, "loss": 0.3568, "step": 3013 }, { "epoch": 0.24416720674011666, "grad_norm": 0.042839668691158295, "learning_rate": 0.00012207371405427298, "loss": 0.3686, "step": 3014 }, { "epoch": 0.24424821775761504, "grad_norm": 0.05041300132870674, "learning_rate": 0.00012211421628189549, "loss": 0.4411, "step": 3015 }, { "epoch": 0.24432922877511343, "grad_norm": 0.046316828578710556, "learning_rate": 0.00012215471850951802, "loss": 0.3958, "step": 3016 }, { "epoch": 0.24441023979261178, "grad_norm": 0.044460829347372055, "learning_rate": 0.00012219522073714055, "loss": 0.3939, "step": 3017 }, { "epoch": 0.24449125081011017, "grad_norm": 0.04973135143518448, "learning_rate": 0.00012223572296476306, "loss": 0.376, "step": 3018 }, { "epoch": 0.24457226182760855, "grad_norm": 0.039541829377412796, "learning_rate": 0.0001222762251923856, "loss": 0.3998, "step": 3019 }, { "epoch": 0.24465327284510693, "grad_norm": 0.03787733614444733, "learning_rate": 0.0001223167274200081, "loss": 0.3652, "step": 3020 }, { "epoch": 0.24473428386260532, "grad_norm": 0.0433904230594635, "learning_rate": 0.00012235722964763063, "loss": 0.3448, "step": 3021 }, { "epoch": 0.2448152948801037, "grad_norm": 0.036354780197143555, "learning_rate": 0.00012239773187525314, "loss": 0.3932, "step": 3022 }, { "epoch": 0.24489630589760208, "grad_norm": 0.03718483820557594, "learning_rate": 0.00012243823410287567, "loss": 0.3671, "step": 3023 }, { "epoch": 0.24497731691510044, "grad_norm": 0.04530775547027588, "learning_rate": 0.00012247873633049818, "loss": 0.3855, "step": 3024 }, { "epoch": 0.24505832793259882, "grad_norm": 0.03824467584490776, "learning_rate": 0.0001225192385581207, "loss": 0.3913, "step": 3025 }, { "epoch": 0.2451393389500972, "grad_norm": 0.040557969361543655, "learning_rate": 0.00012255974078574322, "loss": 0.391, "step": 3026 }, { "epoch": 0.2452203499675956, "grad_norm": 0.04104351997375488, "learning_rate": 0.00012260024301336575, "loss": 0.3543, "step": 3027 }, { "epoch": 0.24530136098509397, "grad_norm": 0.043880265206098557, "learning_rate": 0.00012264074524098826, "loss": 0.3496, "step": 3028 }, { "epoch": 0.24538237200259236, "grad_norm": 0.04171985760331154, "learning_rate": 0.0001226812474686108, "loss": 0.3563, "step": 3029 }, { "epoch": 0.24546338302009074, "grad_norm": 0.03931222856044769, "learning_rate": 0.0001227217496962333, "loss": 0.4155, "step": 3030 }, { "epoch": 0.24554439403758913, "grad_norm": 0.039511002600193024, "learning_rate": 0.00012276225192385583, "loss": 0.3587, "step": 3031 }, { "epoch": 0.24562540505508748, "grad_norm": 0.05257326364517212, "learning_rate": 0.00012280275415147833, "loss": 0.3488, "step": 3032 }, { "epoch": 0.24570641607258586, "grad_norm": 0.04402840510010719, "learning_rate": 0.00012284325637910087, "loss": 0.3926, "step": 3033 }, { "epoch": 0.24578742709008425, "grad_norm": 0.04026148095726967, "learning_rate": 0.00012288375860672337, "loss": 0.3613, "step": 3034 }, { "epoch": 0.24586843810758263, "grad_norm": 0.05136657878756523, "learning_rate": 0.0001229242608343459, "loss": 0.3644, "step": 3035 }, { "epoch": 0.24594944912508102, "grad_norm": 0.045099638402462006, "learning_rate": 0.0001229647630619684, "loss": 0.3997, "step": 3036 }, { "epoch": 0.2460304601425794, "grad_norm": 0.04942428320646286, "learning_rate": 0.00012300526528959092, "loss": 0.3895, "step": 3037 }, { "epoch": 0.24611147116007778, "grad_norm": 0.04704645648598671, "learning_rate": 0.00012304576751721345, "loss": 0.3687, "step": 3038 }, { "epoch": 0.24619248217757614, "grad_norm": 0.044770658016204834, "learning_rate": 0.00012308626974483596, "loss": 0.413, "step": 3039 }, { "epoch": 0.24627349319507452, "grad_norm": 0.053014349192380905, "learning_rate": 0.0001231267719724585, "loss": 0.4557, "step": 3040 }, { "epoch": 0.2463545042125729, "grad_norm": 0.0513080395758152, "learning_rate": 0.000123167274200081, "loss": 0.4115, "step": 3041 }, { "epoch": 0.2464355152300713, "grad_norm": 0.06259527057409286, "learning_rate": 0.00012320777642770353, "loss": 0.4064, "step": 3042 }, { "epoch": 0.24651652624756967, "grad_norm": 0.0488407164812088, "learning_rate": 0.00012324827865532604, "loss": 0.3817, "step": 3043 }, { "epoch": 0.24659753726506806, "grad_norm": 0.03926355019211769, "learning_rate": 0.00012328878088294857, "loss": 0.4017, "step": 3044 }, { "epoch": 0.24667854828256644, "grad_norm": 0.03465135395526886, "learning_rate": 0.00012332928311057108, "loss": 0.3564, "step": 3045 }, { "epoch": 0.2467595593000648, "grad_norm": 0.04638232663273811, "learning_rate": 0.0001233697853381936, "loss": 0.3726, "step": 3046 }, { "epoch": 0.24684057031756318, "grad_norm": 0.06023021414875984, "learning_rate": 0.00012341028756581612, "loss": 0.3579, "step": 3047 }, { "epoch": 0.24692158133506156, "grad_norm": 0.03787575662136078, "learning_rate": 0.00012345078979343865, "loss": 0.3638, "step": 3048 }, { "epoch": 0.24700259235255995, "grad_norm": 0.04475943744182587, "learning_rate": 0.00012349129202106116, "loss": 0.3742, "step": 3049 }, { "epoch": 0.24708360337005833, "grad_norm": 0.038167525082826614, "learning_rate": 0.0001235317942486837, "loss": 0.3695, "step": 3050 }, { "epoch": 0.2471646143875567, "grad_norm": 0.04315280169248581, "learning_rate": 0.0001235722964763062, "loss": 0.4386, "step": 3051 }, { "epoch": 0.2472456254050551, "grad_norm": 0.0408618189394474, "learning_rate": 0.00012361279870392873, "loss": 0.3583, "step": 3052 }, { "epoch": 0.24732663642255348, "grad_norm": 0.0395033024251461, "learning_rate": 0.00012365330093155123, "loss": 0.3228, "step": 3053 }, { "epoch": 0.24740764744005184, "grad_norm": 0.042332377284765244, "learning_rate": 0.00012369380315917377, "loss": 0.3667, "step": 3054 }, { "epoch": 0.24748865845755022, "grad_norm": 0.04684600234031677, "learning_rate": 0.0001237343053867963, "loss": 0.4264, "step": 3055 }, { "epoch": 0.2475696694750486, "grad_norm": 0.039548370987176895, "learning_rate": 0.0001237748076144188, "loss": 0.3568, "step": 3056 }, { "epoch": 0.247650680492547, "grad_norm": 0.053158797323703766, "learning_rate": 0.00012381530984204134, "loss": 0.4191, "step": 3057 }, { "epoch": 0.24773169151004537, "grad_norm": 0.05239748954772949, "learning_rate": 0.00012385581206966385, "loss": 0.3636, "step": 3058 }, { "epoch": 0.24781270252754375, "grad_norm": 0.03982961177825928, "learning_rate": 0.00012389631429728635, "loss": 0.3994, "step": 3059 }, { "epoch": 0.24789371354504214, "grad_norm": 0.041404590010643005, "learning_rate": 0.00012393681652490889, "loss": 0.3498, "step": 3060 }, { "epoch": 0.2479747245625405, "grad_norm": 0.045367904007434845, "learning_rate": 0.0001239773187525314, "loss": 0.3847, "step": 3061 }, { "epoch": 0.24805573558003888, "grad_norm": 0.041434478014707565, "learning_rate": 0.0001240178209801539, "loss": 0.3941, "step": 3062 }, { "epoch": 0.24813674659753726, "grad_norm": 0.040743254125118256, "learning_rate": 0.00012405832320777643, "loss": 0.4037, "step": 3063 }, { "epoch": 0.24821775761503564, "grad_norm": 0.04002142325043678, "learning_rate": 0.00012409882543539894, "loss": 0.4078, "step": 3064 }, { "epoch": 0.24829876863253403, "grad_norm": 0.057170569896698, "learning_rate": 0.00012413932766302147, "loss": 0.3649, "step": 3065 }, { "epoch": 0.2483797796500324, "grad_norm": 0.054762836545705795, "learning_rate": 0.00012417982989064398, "loss": 0.3933, "step": 3066 }, { "epoch": 0.2484607906675308, "grad_norm": 0.03908546268939972, "learning_rate": 0.0001242203321182665, "loss": 0.3379, "step": 3067 }, { "epoch": 0.24854180168502915, "grad_norm": 0.043949905782938004, "learning_rate": 0.00012426083434588902, "loss": 0.425, "step": 3068 }, { "epoch": 0.24862281270252753, "grad_norm": 0.0349920354783535, "learning_rate": 0.00012430133657351155, "loss": 0.3924, "step": 3069 }, { "epoch": 0.24870382372002592, "grad_norm": 0.04423988610506058, "learning_rate": 0.00012434183880113405, "loss": 0.3753, "step": 3070 }, { "epoch": 0.2487848347375243, "grad_norm": 0.0439189188182354, "learning_rate": 0.0001243823410287566, "loss": 0.3584, "step": 3071 }, { "epoch": 0.24886584575502269, "grad_norm": 0.05119139328598976, "learning_rate": 0.0001244228432563791, "loss": 0.4317, "step": 3072 }, { "epoch": 0.24894685677252107, "grad_norm": 0.04475146904587746, "learning_rate": 0.00012446334548400163, "loss": 0.3936, "step": 3073 }, { "epoch": 0.24902786779001945, "grad_norm": 0.03774057701230049, "learning_rate": 0.00012450384771162416, "loss": 0.3627, "step": 3074 }, { "epoch": 0.24910887880751784, "grad_norm": 0.04917656630277634, "learning_rate": 0.00012454434993924667, "loss": 0.4298, "step": 3075 }, { "epoch": 0.2491898898250162, "grad_norm": 0.04121607169508934, "learning_rate": 0.0001245848521668692, "loss": 0.3785, "step": 3076 }, { "epoch": 0.24927090084251458, "grad_norm": 0.03795452415943146, "learning_rate": 0.0001246253543944917, "loss": 0.3869, "step": 3077 }, { "epoch": 0.24935191186001296, "grad_norm": 0.03608179837465286, "learning_rate": 0.00012466585662211424, "loss": 0.3885, "step": 3078 }, { "epoch": 0.24943292287751134, "grad_norm": 0.04807370528578758, "learning_rate": 0.00012470635884973675, "loss": 0.403, "step": 3079 }, { "epoch": 0.24951393389500973, "grad_norm": 0.05438527837395668, "learning_rate": 0.00012474686107735928, "loss": 0.3725, "step": 3080 }, { "epoch": 0.2495949449125081, "grad_norm": 0.039910171180963516, "learning_rate": 0.00012478736330498178, "loss": 0.3373, "step": 3081 }, { "epoch": 0.2496759559300065, "grad_norm": 0.037223588675260544, "learning_rate": 0.00012482786553260432, "loss": 0.3578, "step": 3082 }, { "epoch": 0.24975696694750485, "grad_norm": 0.04596159979701042, "learning_rate": 0.00012486836776022682, "loss": 0.3981, "step": 3083 }, { "epoch": 0.24983797796500323, "grad_norm": 0.04447013884782791, "learning_rate": 0.00012490886998784933, "loss": 0.4163, "step": 3084 }, { "epoch": 0.24991898898250162, "grad_norm": 0.04586843401193619, "learning_rate": 0.00012494937221547184, "loss": 0.3653, "step": 3085 }, { "epoch": 0.25, "grad_norm": 0.04713024944067001, "learning_rate": 0.00012498987444309437, "loss": 0.435, "step": 3086 }, { "epoch": 0.25008101101749836, "grad_norm": 0.04586999490857124, "learning_rate": 0.00012503037667071688, "loss": 0.4236, "step": 3087 }, { "epoch": 0.25016202203499677, "grad_norm": 0.04212983697652817, "learning_rate": 0.0001250708788983394, "loss": 0.3092, "step": 3088 }, { "epoch": 0.2502430330524951, "grad_norm": 0.05363105237483978, "learning_rate": 0.00012511138112596191, "loss": 0.4238, "step": 3089 }, { "epoch": 0.25032404406999353, "grad_norm": 0.042278800159692764, "learning_rate": 0.00012515188335358445, "loss": 0.4017, "step": 3090 }, { "epoch": 0.2504050550874919, "grad_norm": 0.03694623336195946, "learning_rate": 0.00012519238558120698, "loss": 0.3607, "step": 3091 }, { "epoch": 0.2504860661049903, "grad_norm": 0.0348326712846756, "learning_rate": 0.0001252328878088295, "loss": 0.392, "step": 3092 }, { "epoch": 0.25056707712248866, "grad_norm": 0.05313507094979286, "learning_rate": 0.00012527339003645202, "loss": 0.386, "step": 3093 }, { "epoch": 0.250648088139987, "grad_norm": 0.043994393199682236, "learning_rate": 0.00012531389226407453, "loss": 0.424, "step": 3094 }, { "epoch": 0.2507290991574854, "grad_norm": 0.045557476580142975, "learning_rate": 0.00012535439449169706, "loss": 0.4057, "step": 3095 }, { "epoch": 0.2508101101749838, "grad_norm": 0.0556633435189724, "learning_rate": 0.00012539489671931957, "loss": 0.3665, "step": 3096 }, { "epoch": 0.2508911211924822, "grad_norm": 0.03474360331892967, "learning_rate": 0.0001254353989469421, "loss": 0.3409, "step": 3097 }, { "epoch": 0.25097213220998055, "grad_norm": 0.03593473881483078, "learning_rate": 0.0001254759011745646, "loss": 0.4017, "step": 3098 }, { "epoch": 0.25105314322747896, "grad_norm": 0.04045382887125015, "learning_rate": 0.00012551640340218714, "loss": 0.3781, "step": 3099 }, { "epoch": 0.2511341542449773, "grad_norm": 0.05407486483454704, "learning_rate": 0.00012555690562980964, "loss": 0.4013, "step": 3100 }, { "epoch": 0.25121516526247567, "grad_norm": 0.048889655619859695, "learning_rate": 0.00012559740785743218, "loss": 0.3784, "step": 3101 }, { "epoch": 0.2512961762799741, "grad_norm": 0.04563205689191818, "learning_rate": 0.00012563791008505468, "loss": 0.3982, "step": 3102 }, { "epoch": 0.25137718729747244, "grad_norm": 0.04439222067594528, "learning_rate": 0.00012567841231267722, "loss": 0.3797, "step": 3103 }, { "epoch": 0.25145819831497085, "grad_norm": 0.048667896538972855, "learning_rate": 0.00012571891454029972, "loss": 0.4266, "step": 3104 }, { "epoch": 0.2515392093324692, "grad_norm": 0.057649172842502594, "learning_rate": 0.00012575941676792226, "loss": 0.3974, "step": 3105 }, { "epoch": 0.2516202203499676, "grad_norm": 0.04143094643950462, "learning_rate": 0.00012579991899554476, "loss": 0.3832, "step": 3106 }, { "epoch": 0.25170123136746597, "grad_norm": 0.044324059039354324, "learning_rate": 0.00012584042122316727, "loss": 0.4323, "step": 3107 }, { "epoch": 0.2517822423849643, "grad_norm": 0.03827289119362831, "learning_rate": 0.0001258809234507898, "loss": 0.3605, "step": 3108 }, { "epoch": 0.25186325340246274, "grad_norm": 0.039986081421375275, "learning_rate": 0.0001259214256784123, "loss": 0.3701, "step": 3109 }, { "epoch": 0.2519442644199611, "grad_norm": 0.04025157168507576, "learning_rate": 0.00012596192790603484, "loss": 0.3573, "step": 3110 }, { "epoch": 0.2520252754374595, "grad_norm": 0.042288873344659805, "learning_rate": 0.00012600243013365735, "loss": 0.3594, "step": 3111 }, { "epoch": 0.25210628645495786, "grad_norm": 0.0396200492978096, "learning_rate": 0.00012604293236127988, "loss": 0.3739, "step": 3112 }, { "epoch": 0.2521872974724563, "grad_norm": 0.03899937868118286, "learning_rate": 0.0001260834345889024, "loss": 0.3855, "step": 3113 }, { "epoch": 0.25226830848995463, "grad_norm": 0.04981239512562752, "learning_rate": 0.00012612393681652492, "loss": 0.3627, "step": 3114 }, { "epoch": 0.25234931950745304, "grad_norm": 0.04627585783600807, "learning_rate": 0.00012616443904414743, "loss": 0.345, "step": 3115 }, { "epoch": 0.2524303305249514, "grad_norm": 0.03675699606537819, "learning_rate": 0.00012620494127176996, "loss": 0.318, "step": 3116 }, { "epoch": 0.25251134154244975, "grad_norm": 0.04463730379939079, "learning_rate": 0.00012624544349939247, "loss": 0.3599, "step": 3117 }, { "epoch": 0.25259235255994816, "grad_norm": 0.04861759394407272, "learning_rate": 0.000126285945727015, "loss": 0.3645, "step": 3118 }, { "epoch": 0.2526733635774465, "grad_norm": 0.039907146245241165, "learning_rate": 0.0001263264479546375, "loss": 0.3723, "step": 3119 }, { "epoch": 0.25275437459494493, "grad_norm": 0.051097266376018524, "learning_rate": 0.00012636695018226004, "loss": 0.394, "step": 3120 }, { "epoch": 0.2528353856124433, "grad_norm": 0.04374748468399048, "learning_rate": 0.00012640745240988254, "loss": 0.4085, "step": 3121 }, { "epoch": 0.2529163966299417, "grad_norm": 0.04951639473438263, "learning_rate": 0.00012644795463750508, "loss": 0.3515, "step": 3122 }, { "epoch": 0.25299740764744005, "grad_norm": 0.04277408868074417, "learning_rate": 0.00012648845686512758, "loss": 0.3368, "step": 3123 }, { "epoch": 0.2530784186649384, "grad_norm": 0.04922161623835564, "learning_rate": 0.00012652895909275012, "loss": 0.3734, "step": 3124 }, { "epoch": 0.2531594296824368, "grad_norm": 0.05067862197756767, "learning_rate": 0.00012656946132037262, "loss": 0.381, "step": 3125 }, { "epoch": 0.2532404406999352, "grad_norm": 0.03622680902481079, "learning_rate": 0.00012660996354799516, "loss": 0.3465, "step": 3126 }, { "epoch": 0.2533214517174336, "grad_norm": 0.04435792192816734, "learning_rate": 0.00012665046577561766, "loss": 0.3514, "step": 3127 }, { "epoch": 0.25340246273493194, "grad_norm": 0.047936275601387024, "learning_rate": 0.0001266909680032402, "loss": 0.3669, "step": 3128 }, { "epoch": 0.25348347375243035, "grad_norm": 0.050081074237823486, "learning_rate": 0.0001267314702308627, "loss": 0.3585, "step": 3129 }, { "epoch": 0.2535644847699287, "grad_norm": 0.0505104586482048, "learning_rate": 0.00012677197245848523, "loss": 0.3788, "step": 3130 }, { "epoch": 0.25364549578742707, "grad_norm": 0.03991609066724777, "learning_rate": 0.00012681247468610774, "loss": 0.3467, "step": 3131 }, { "epoch": 0.2537265068049255, "grad_norm": 0.04814030975103378, "learning_rate": 0.00012685297691373025, "loss": 0.3656, "step": 3132 }, { "epoch": 0.25380751782242383, "grad_norm": 0.04745684564113617, "learning_rate": 0.00012689347914135278, "loss": 0.3873, "step": 3133 }, { "epoch": 0.25388852883992225, "grad_norm": 0.047067102044820786, "learning_rate": 0.00012693398136897529, "loss": 0.3876, "step": 3134 }, { "epoch": 0.2539695398574206, "grad_norm": 0.041653912514448166, "learning_rate": 0.00012697448359659782, "loss": 0.3832, "step": 3135 }, { "epoch": 0.254050550874919, "grad_norm": 0.04571259766817093, "learning_rate": 0.00012701498582422033, "loss": 0.3589, "step": 3136 }, { "epoch": 0.25413156189241737, "grad_norm": 0.038450032472610474, "learning_rate": 0.00012705548805184286, "loss": 0.3516, "step": 3137 }, { "epoch": 0.2542125729099157, "grad_norm": 0.04408708214759827, "learning_rate": 0.00012709599027946536, "loss": 0.3606, "step": 3138 }, { "epoch": 0.25429358392741414, "grad_norm": 0.05931435525417328, "learning_rate": 0.0001271364925070879, "loss": 0.3971, "step": 3139 }, { "epoch": 0.2543745949449125, "grad_norm": 0.04115188121795654, "learning_rate": 0.0001271769947347104, "loss": 0.352, "step": 3140 }, { "epoch": 0.2544556059624109, "grad_norm": 0.052127905189991, "learning_rate": 0.00012721749696233294, "loss": 0.3807, "step": 3141 }, { "epoch": 0.25453661697990926, "grad_norm": 0.04414311423897743, "learning_rate": 0.00012725799918995544, "loss": 0.4141, "step": 3142 }, { "epoch": 0.25461762799740767, "grad_norm": 0.049567751586437225, "learning_rate": 0.00012729850141757798, "loss": 0.3739, "step": 3143 }, { "epoch": 0.254698639014906, "grad_norm": 0.056248169392347336, "learning_rate": 0.00012733900364520048, "loss": 0.359, "step": 3144 }, { "epoch": 0.2547796500324044, "grad_norm": 0.04483194649219513, "learning_rate": 0.00012737950587282302, "loss": 0.3429, "step": 3145 }, { "epoch": 0.2548606610499028, "grad_norm": 0.04335392266511917, "learning_rate": 0.00012742000810044552, "loss": 0.3925, "step": 3146 }, { "epoch": 0.25494167206740115, "grad_norm": 0.03910553455352783, "learning_rate": 0.00012746051032806806, "loss": 0.3952, "step": 3147 }, { "epoch": 0.25502268308489956, "grad_norm": 0.05573083087801933, "learning_rate": 0.0001275010125556906, "loss": 0.3531, "step": 3148 }, { "epoch": 0.2551036941023979, "grad_norm": 0.05946796387434006, "learning_rate": 0.0001275415147833131, "loss": 0.373, "step": 3149 }, { "epoch": 0.2551847051198963, "grad_norm": 0.04649536684155464, "learning_rate": 0.00012758201701093563, "loss": 0.415, "step": 3150 }, { "epoch": 0.2552657161373947, "grad_norm": 0.04201361909508705, "learning_rate": 0.00012762251923855813, "loss": 0.3631, "step": 3151 }, { "epoch": 0.25534672715489304, "grad_norm": 0.04001443833112717, "learning_rate": 0.00012766302146618067, "loss": 0.3667, "step": 3152 }, { "epoch": 0.25542773817239145, "grad_norm": 0.051107991486787796, "learning_rate": 0.00012770352369380317, "loss": 0.4328, "step": 3153 }, { "epoch": 0.2555087491898898, "grad_norm": 0.04720538482069969, "learning_rate": 0.00012774402592142568, "loss": 0.3522, "step": 3154 }, { "epoch": 0.2555897602073882, "grad_norm": 0.0412609800696373, "learning_rate": 0.0001277845281490482, "loss": 0.4278, "step": 3155 }, { "epoch": 0.2556707712248866, "grad_norm": 0.03820064663887024, "learning_rate": 0.00012782503037667072, "loss": 0.3855, "step": 3156 }, { "epoch": 0.255751782242385, "grad_norm": 0.04344452545046806, "learning_rate": 0.00012786553260429323, "loss": 0.4136, "step": 3157 }, { "epoch": 0.25583279325988334, "grad_norm": 0.04224742576479912, "learning_rate": 0.00012790603483191576, "loss": 0.3729, "step": 3158 }, { "epoch": 0.25591380427738175, "grad_norm": 0.039598360657691956, "learning_rate": 0.00012794653705953826, "loss": 0.3878, "step": 3159 }, { "epoch": 0.2559948152948801, "grad_norm": 0.035558078438043594, "learning_rate": 0.0001279870392871608, "loss": 0.3357, "step": 3160 }, { "epoch": 0.25607582631237846, "grad_norm": 0.04091186448931694, "learning_rate": 0.0001280275415147833, "loss": 0.3201, "step": 3161 }, { "epoch": 0.2561568373298769, "grad_norm": 0.056138601154088974, "learning_rate": 0.00012806804374240584, "loss": 0.4365, "step": 3162 }, { "epoch": 0.25623784834737523, "grad_norm": 0.041428547352552414, "learning_rate": 0.00012810854597002834, "loss": 0.3792, "step": 3163 }, { "epoch": 0.25631885936487364, "grad_norm": 0.04135297238826752, "learning_rate": 0.00012814904819765088, "loss": 0.3543, "step": 3164 }, { "epoch": 0.256399870382372, "grad_norm": 0.041687510907649994, "learning_rate": 0.00012818955042527338, "loss": 0.3958, "step": 3165 }, { "epoch": 0.2564808813998704, "grad_norm": 0.0362611822783947, "learning_rate": 0.00012823005265289592, "loss": 0.366, "step": 3166 }, { "epoch": 0.25656189241736876, "grad_norm": 0.04926367476582527, "learning_rate": 0.00012827055488051845, "loss": 0.436, "step": 3167 }, { "epoch": 0.2566429034348671, "grad_norm": 0.04708395525813103, "learning_rate": 0.00012831105710814096, "loss": 0.4092, "step": 3168 }, { "epoch": 0.25672391445236553, "grad_norm": 0.03967064991593361, "learning_rate": 0.0001283515593357635, "loss": 0.3539, "step": 3169 }, { "epoch": 0.2568049254698639, "grad_norm": 0.04242156818509102, "learning_rate": 0.000128392061563386, "loss": 0.3838, "step": 3170 }, { "epoch": 0.2568859364873623, "grad_norm": 0.03780914098024368, "learning_rate": 0.00012843256379100853, "loss": 0.3519, "step": 3171 }, { "epoch": 0.25696694750486065, "grad_norm": 0.04463619738817215, "learning_rate": 0.00012847306601863103, "loss": 0.3899, "step": 3172 }, { "epoch": 0.25704795852235907, "grad_norm": 0.040854331105947495, "learning_rate": 0.00012851356824625357, "loss": 0.3554, "step": 3173 }, { "epoch": 0.2571289695398574, "grad_norm": 0.04449096694588661, "learning_rate": 0.00012855407047387607, "loss": 0.309, "step": 3174 }, { "epoch": 0.2572099805573558, "grad_norm": 0.04881928488612175, "learning_rate": 0.0001285945727014986, "loss": 0.4015, "step": 3175 }, { "epoch": 0.2572909915748542, "grad_norm": 0.041136372834444046, "learning_rate": 0.0001286350749291211, "loss": 0.3598, "step": 3176 }, { "epoch": 0.25737200259235254, "grad_norm": 0.04056499898433685, "learning_rate": 0.00012867557715674362, "loss": 0.3847, "step": 3177 }, { "epoch": 0.25745301360985096, "grad_norm": 0.047216638922691345, "learning_rate": 0.00012871607938436615, "loss": 0.3789, "step": 3178 }, { "epoch": 0.2575340246273493, "grad_norm": 0.0441555492579937, "learning_rate": 0.00012875658161198866, "loss": 0.3974, "step": 3179 }, { "epoch": 0.2576150356448477, "grad_norm": 0.03761187940835953, "learning_rate": 0.00012879708383961116, "loss": 0.42, "step": 3180 }, { "epoch": 0.2576960466623461, "grad_norm": 0.043480463325977325, "learning_rate": 0.0001288375860672337, "loss": 0.3676, "step": 3181 }, { "epoch": 0.25777705767984443, "grad_norm": 0.0372801311314106, "learning_rate": 0.0001288780882948562, "loss": 0.3773, "step": 3182 }, { "epoch": 0.25785806869734285, "grad_norm": 0.04209472984075546, "learning_rate": 0.00012891859052247874, "loss": 0.3822, "step": 3183 }, { "epoch": 0.2579390797148412, "grad_norm": 0.04223468527197838, "learning_rate": 0.00012895909275010124, "loss": 0.3481, "step": 3184 }, { "epoch": 0.2580200907323396, "grad_norm": 0.051124799996614456, "learning_rate": 0.00012899959497772378, "loss": 0.4014, "step": 3185 }, { "epoch": 0.25810110174983797, "grad_norm": 0.040176596492528915, "learning_rate": 0.0001290400972053463, "loss": 0.352, "step": 3186 }, { "epoch": 0.2581821127673364, "grad_norm": 0.04070594534277916, "learning_rate": 0.00012908059943296882, "loss": 0.3603, "step": 3187 }, { "epoch": 0.25826312378483474, "grad_norm": 0.03919408470392227, "learning_rate": 0.00012912110166059135, "loss": 0.3743, "step": 3188 }, { "epoch": 0.2583441348023331, "grad_norm": 0.043664198368787766, "learning_rate": 0.00012916160388821385, "loss": 0.4041, "step": 3189 }, { "epoch": 0.2584251458198315, "grad_norm": 0.04055207595229149, "learning_rate": 0.0001292021061158364, "loss": 0.3676, "step": 3190 }, { "epoch": 0.25850615683732986, "grad_norm": 0.041981372982263565, "learning_rate": 0.0001292426083434589, "loss": 0.3562, "step": 3191 }, { "epoch": 0.25858716785482827, "grad_norm": 0.04531414806842804, "learning_rate": 0.00012928311057108143, "loss": 0.3404, "step": 3192 }, { "epoch": 0.2586681788723266, "grad_norm": 0.043773435056209564, "learning_rate": 0.00012932361279870393, "loss": 0.3807, "step": 3193 }, { "epoch": 0.25874918988982504, "grad_norm": 0.04385381191968918, "learning_rate": 0.00012936411502632647, "loss": 0.3815, "step": 3194 }, { "epoch": 0.2588302009073234, "grad_norm": 0.03920425474643707, "learning_rate": 0.00012940461725394897, "loss": 0.3725, "step": 3195 }, { "epoch": 0.25891121192482175, "grad_norm": 0.04236259311437607, "learning_rate": 0.0001294451194815715, "loss": 0.3781, "step": 3196 }, { "epoch": 0.25899222294232016, "grad_norm": 0.05604299530386925, "learning_rate": 0.000129485621709194, "loss": 0.3814, "step": 3197 }, { "epoch": 0.2590732339598185, "grad_norm": 0.04819274693727493, "learning_rate": 0.00012952612393681655, "loss": 0.3885, "step": 3198 }, { "epoch": 0.2591542449773169, "grad_norm": 0.04288613796234131, "learning_rate": 0.00012956662616443905, "loss": 0.4185, "step": 3199 }, { "epoch": 0.2592352559948153, "grad_norm": 0.040658704936504364, "learning_rate": 0.00012960712839206158, "loss": 0.3725, "step": 3200 }, { "epoch": 0.2593162670123137, "grad_norm": 0.03994179889559746, "learning_rate": 0.0001296476306196841, "loss": 0.3481, "step": 3201 }, { "epoch": 0.25939727802981205, "grad_norm": 0.04214403033256531, "learning_rate": 0.0001296881328473066, "loss": 0.3794, "step": 3202 }, { "epoch": 0.25947828904731046, "grad_norm": 0.0443929024040699, "learning_rate": 0.00012972863507492913, "loss": 0.377, "step": 3203 }, { "epoch": 0.2595593000648088, "grad_norm": 0.04427499696612358, "learning_rate": 0.00012976913730255164, "loss": 0.4113, "step": 3204 }, { "epoch": 0.2596403110823072, "grad_norm": 0.03917337581515312, "learning_rate": 0.00012980963953017417, "loss": 0.3133, "step": 3205 }, { "epoch": 0.2597213220998056, "grad_norm": 0.040803954005241394, "learning_rate": 0.00012985014175779668, "loss": 0.3581, "step": 3206 }, { "epoch": 0.25980233311730394, "grad_norm": 0.048127301037311554, "learning_rate": 0.0001298906439854192, "loss": 0.3959, "step": 3207 }, { "epoch": 0.25988334413480235, "grad_norm": 0.051993049681186676, "learning_rate": 0.00012993114621304171, "loss": 0.4089, "step": 3208 }, { "epoch": 0.2599643551523007, "grad_norm": 0.03847181797027588, "learning_rate": 0.00012997164844066425, "loss": 0.3845, "step": 3209 }, { "epoch": 0.2600453661697991, "grad_norm": 0.043846599757671356, "learning_rate": 0.00013001215066828675, "loss": 0.4003, "step": 3210 }, { "epoch": 0.2601263771872975, "grad_norm": 0.04759734496474266, "learning_rate": 0.0001300526528959093, "loss": 0.3772, "step": 3211 }, { "epoch": 0.26020738820479583, "grad_norm": 0.03929856792092323, "learning_rate": 0.0001300931551235318, "loss": 0.403, "step": 3212 }, { "epoch": 0.26028839922229424, "grad_norm": 0.036084044724702835, "learning_rate": 0.00013013365735115433, "loss": 0.3519, "step": 3213 }, { "epoch": 0.2603694102397926, "grad_norm": 0.040780842304229736, "learning_rate": 0.00013017415957877683, "loss": 0.333, "step": 3214 }, { "epoch": 0.260450421257291, "grad_norm": 0.03689270839095116, "learning_rate": 0.00013021466180639937, "loss": 0.3211, "step": 3215 }, { "epoch": 0.26053143227478937, "grad_norm": 0.04343465343117714, "learning_rate": 0.00013025516403402187, "loss": 0.4017, "step": 3216 }, { "epoch": 0.2606124432922878, "grad_norm": 0.04075007885694504, "learning_rate": 0.0001302956662616444, "loss": 0.3772, "step": 3217 }, { "epoch": 0.26069345430978613, "grad_norm": 0.04069703072309494, "learning_rate": 0.0001303361684892669, "loss": 0.3754, "step": 3218 }, { "epoch": 0.2607744653272845, "grad_norm": 0.05063739791512489, "learning_rate": 0.00013037667071688944, "loss": 0.3919, "step": 3219 }, { "epoch": 0.2608554763447829, "grad_norm": 0.04554930701851845, "learning_rate": 0.00013041717294451195, "loss": 0.376, "step": 3220 }, { "epoch": 0.26093648736228126, "grad_norm": 0.03898908942937851, "learning_rate": 0.00013045767517213448, "loss": 0.3209, "step": 3221 }, { "epoch": 0.26101749837977967, "grad_norm": 0.04082167148590088, "learning_rate": 0.000130498177399757, "loss": 0.3895, "step": 3222 }, { "epoch": 0.261098509397278, "grad_norm": 0.04319079592823982, "learning_rate": 0.00013053867962737952, "loss": 0.3846, "step": 3223 }, { "epoch": 0.26117952041477643, "grad_norm": 0.037946876138448715, "learning_rate": 0.00013057918185500203, "loss": 0.3475, "step": 3224 }, { "epoch": 0.2612605314322748, "grad_norm": 0.047004539519548416, "learning_rate": 0.00013061968408262456, "loss": 0.3756, "step": 3225 }, { "epoch": 0.26134154244977315, "grad_norm": 0.046914055943489075, "learning_rate": 0.00013066018631024707, "loss": 0.3843, "step": 3226 }, { "epoch": 0.26142255346727156, "grad_norm": 0.03881968930363655, "learning_rate": 0.00013070068853786957, "loss": 0.3565, "step": 3227 }, { "epoch": 0.2615035644847699, "grad_norm": 0.04270708188414574, "learning_rate": 0.0001307411907654921, "loss": 0.4095, "step": 3228 }, { "epoch": 0.2615845755022683, "grad_norm": 0.044253475964069366, "learning_rate": 0.00013078169299311461, "loss": 0.3677, "step": 3229 }, { "epoch": 0.2616655865197667, "grad_norm": 0.04407831281423569, "learning_rate": 0.00013082219522073715, "loss": 0.3748, "step": 3230 }, { "epoch": 0.2617465975372651, "grad_norm": 0.04300897940993309, "learning_rate": 0.00013086269744835965, "loss": 0.4075, "step": 3231 }, { "epoch": 0.26182760855476345, "grad_norm": 0.03860275447368622, "learning_rate": 0.0001309031996759822, "loss": 0.3697, "step": 3232 }, { "epoch": 0.2619086195722618, "grad_norm": 0.04201575368642807, "learning_rate": 0.0001309437019036047, "loss": 0.3104, "step": 3233 }, { "epoch": 0.2619896305897602, "grad_norm": 0.042958687990903854, "learning_rate": 0.00013098420413122723, "loss": 0.3594, "step": 3234 }, { "epoch": 0.26207064160725857, "grad_norm": 0.05025588348507881, "learning_rate": 0.00013102470635884973, "loss": 0.4544, "step": 3235 }, { "epoch": 0.262151652624757, "grad_norm": 0.04615491256117821, "learning_rate": 0.00013106520858647227, "loss": 0.3719, "step": 3236 }, { "epoch": 0.26223266364225534, "grad_norm": 0.04639549180865288, "learning_rate": 0.00013110571081409477, "loss": 0.3913, "step": 3237 }, { "epoch": 0.26231367465975375, "grad_norm": 0.04262283071875572, "learning_rate": 0.0001311462130417173, "loss": 0.3961, "step": 3238 }, { "epoch": 0.2623946856772521, "grad_norm": 0.04736074432730675, "learning_rate": 0.0001311867152693398, "loss": 0.3343, "step": 3239 }, { "epoch": 0.26247569669475046, "grad_norm": 0.046176109462976456, "learning_rate": 0.00013122721749696234, "loss": 0.3841, "step": 3240 }, { "epoch": 0.26255670771224887, "grad_norm": 0.038287967443466187, "learning_rate": 0.00013126771972458485, "loss": 0.4, "step": 3241 }, { "epoch": 0.2626377187297472, "grad_norm": 0.04353439062833786, "learning_rate": 0.00013130822195220738, "loss": 0.3573, "step": 3242 }, { "epoch": 0.26271872974724564, "grad_norm": 0.042398590594530106, "learning_rate": 0.00013134872417982992, "loss": 0.3832, "step": 3243 }, { "epoch": 0.262799740764744, "grad_norm": 0.03516731783747673, "learning_rate": 0.00013138922640745242, "loss": 0.3693, "step": 3244 }, { "epoch": 0.2628807517822424, "grad_norm": 0.04480927065014839, "learning_rate": 0.00013142972863507496, "loss": 0.3878, "step": 3245 }, { "epoch": 0.26296176279974076, "grad_norm": 0.03937854990363121, "learning_rate": 0.00013147023086269746, "loss": 0.3739, "step": 3246 }, { "epoch": 0.2630427738172391, "grad_norm": 0.0367438830435276, "learning_rate": 0.00013151073309032, "loss": 0.4108, "step": 3247 }, { "epoch": 0.26312378483473753, "grad_norm": 0.04266749322414398, "learning_rate": 0.0001315512353179425, "loss": 0.3796, "step": 3248 }, { "epoch": 0.2632047958522359, "grad_norm": 0.040220484137535095, "learning_rate": 0.000131591737545565, "loss": 0.341, "step": 3249 }, { "epoch": 0.2632858068697343, "grad_norm": 0.0418410487473011, "learning_rate": 0.00013163223977318751, "loss": 0.3756, "step": 3250 }, { "epoch": 0.26336681788723265, "grad_norm": 0.04273224249482155, "learning_rate": 0.00013167274200081005, "loss": 0.3772, "step": 3251 }, { "epoch": 0.26344782890473106, "grad_norm": 0.04679445922374725, "learning_rate": 0.00013171324422843255, "loss": 0.4407, "step": 3252 }, { "epoch": 0.2635288399222294, "grad_norm": 0.03873904049396515, "learning_rate": 0.00013175374645605509, "loss": 0.354, "step": 3253 }, { "epoch": 0.26360985093972783, "grad_norm": 0.04092535004019737, "learning_rate": 0.0001317942486836776, "loss": 0.3934, "step": 3254 }, { "epoch": 0.2636908619572262, "grad_norm": 0.04127156734466553, "learning_rate": 0.00013183475091130013, "loss": 0.379, "step": 3255 }, { "epoch": 0.26377187297472454, "grad_norm": 0.03976596146821976, "learning_rate": 0.00013187525313892263, "loss": 0.3379, "step": 3256 }, { "epoch": 0.26385288399222295, "grad_norm": 0.04563805088400841, "learning_rate": 0.00013191575536654516, "loss": 0.3389, "step": 3257 }, { "epoch": 0.2639338950097213, "grad_norm": 0.03627234324812889, "learning_rate": 0.00013195625759416767, "loss": 0.3842, "step": 3258 }, { "epoch": 0.2640149060272197, "grad_norm": 0.03891756385564804, "learning_rate": 0.0001319967598217902, "loss": 0.3434, "step": 3259 }, { "epoch": 0.2640959170447181, "grad_norm": 0.0457911491394043, "learning_rate": 0.0001320372620494127, "loss": 0.3799, "step": 3260 }, { "epoch": 0.2641769280622165, "grad_norm": 0.04247802123427391, "learning_rate": 0.00013207776427703524, "loss": 0.3573, "step": 3261 }, { "epoch": 0.26425793907971484, "grad_norm": 0.03541119396686554, "learning_rate": 0.00013211826650465778, "loss": 0.3333, "step": 3262 }, { "epoch": 0.2643389500972132, "grad_norm": 0.039114829152822495, "learning_rate": 0.00013215876873228028, "loss": 0.3675, "step": 3263 }, { "epoch": 0.2644199611147116, "grad_norm": 0.04546349123120308, "learning_rate": 0.00013219927095990282, "loss": 0.3912, "step": 3264 }, { "epoch": 0.26450097213220997, "grad_norm": 0.042006999254226685, "learning_rate": 0.00013223977318752532, "loss": 0.3663, "step": 3265 }, { "epoch": 0.2645819831497084, "grad_norm": 0.0530223585665226, "learning_rate": 0.00013228027541514786, "loss": 0.3815, "step": 3266 }, { "epoch": 0.26466299416720673, "grad_norm": 0.046276140958070755, "learning_rate": 0.00013232077764277036, "loss": 0.4073, "step": 3267 }, { "epoch": 0.26474400518470514, "grad_norm": 0.03913923352956772, "learning_rate": 0.0001323612798703929, "loss": 0.3715, "step": 3268 }, { "epoch": 0.2648250162022035, "grad_norm": 0.03895312175154686, "learning_rate": 0.0001324017820980154, "loss": 0.3929, "step": 3269 }, { "epoch": 0.26490602721970186, "grad_norm": 0.03699490427970886, "learning_rate": 0.00013244228432563793, "loss": 0.3966, "step": 3270 }, { "epoch": 0.26498703823720027, "grad_norm": 0.042337387800216675, "learning_rate": 0.00013248278655326044, "loss": 0.3573, "step": 3271 }, { "epoch": 0.2650680492546986, "grad_norm": 0.03894384205341339, "learning_rate": 0.00013252328878088295, "loss": 0.3855, "step": 3272 }, { "epoch": 0.26514906027219703, "grad_norm": 0.05220978334546089, "learning_rate": 0.00013256379100850548, "loss": 0.3908, "step": 3273 }, { "epoch": 0.2652300712896954, "grad_norm": 0.046401623636484146, "learning_rate": 0.00013260429323612799, "loss": 0.43, "step": 3274 }, { "epoch": 0.2653110823071938, "grad_norm": 0.04030894115567207, "learning_rate": 0.0001326447954637505, "loss": 0.4457, "step": 3275 }, { "epoch": 0.26539209332469216, "grad_norm": 0.03204037621617317, "learning_rate": 0.00013268529769137303, "loss": 0.3711, "step": 3276 }, { "epoch": 0.2654731043421905, "grad_norm": 0.03618483617901802, "learning_rate": 0.00013272579991899553, "loss": 0.3423, "step": 3277 }, { "epoch": 0.2655541153596889, "grad_norm": 0.040721192955970764, "learning_rate": 0.00013276630214661806, "loss": 0.3368, "step": 3278 }, { "epoch": 0.2656351263771873, "grad_norm": 0.04458589851856232, "learning_rate": 0.00013280680437424057, "loss": 0.4423, "step": 3279 }, { "epoch": 0.2657161373946857, "grad_norm": 0.048048872500658035, "learning_rate": 0.0001328473066018631, "loss": 0.3879, "step": 3280 }, { "epoch": 0.26579714841218405, "grad_norm": 0.04069700837135315, "learning_rate": 0.00013288780882948564, "loss": 0.3699, "step": 3281 }, { "epoch": 0.26587815942968246, "grad_norm": 0.04361570626497269, "learning_rate": 0.00013292831105710814, "loss": 0.4226, "step": 3282 }, { "epoch": 0.2659591704471808, "grad_norm": 0.03607351705431938, "learning_rate": 0.00013296881328473068, "loss": 0.3625, "step": 3283 }, { "epoch": 0.26604018146467917, "grad_norm": 0.0401526503264904, "learning_rate": 0.00013300931551235318, "loss": 0.379, "step": 3284 }, { "epoch": 0.2661211924821776, "grad_norm": 0.04371299222111702, "learning_rate": 0.00013304981773997572, "loss": 0.3464, "step": 3285 }, { "epoch": 0.26620220349967594, "grad_norm": 0.04439733177423477, "learning_rate": 0.00013309031996759822, "loss": 0.3851, "step": 3286 }, { "epoch": 0.26628321451717435, "grad_norm": 0.04283667728304863, "learning_rate": 0.00013313082219522075, "loss": 0.3645, "step": 3287 }, { "epoch": 0.2663642255346727, "grad_norm": 0.04181892052292824, "learning_rate": 0.00013317132442284326, "loss": 0.3553, "step": 3288 }, { "epoch": 0.2664452365521711, "grad_norm": 0.045632556080818176, "learning_rate": 0.0001332118266504658, "loss": 0.3707, "step": 3289 }, { "epoch": 0.26652624756966947, "grad_norm": 0.041404347866773605, "learning_rate": 0.0001332523288780883, "loss": 0.3905, "step": 3290 }, { "epoch": 0.26660725858716783, "grad_norm": 0.04776446893811226, "learning_rate": 0.00013329283110571083, "loss": 0.4064, "step": 3291 }, { "epoch": 0.26668826960466624, "grad_norm": 0.05265938863158226, "learning_rate": 0.00013333333333333334, "loss": 0.3756, "step": 3292 }, { "epoch": 0.2667692806221646, "grad_norm": 0.04272965341806412, "learning_rate": 0.00013337383556095587, "loss": 0.4049, "step": 3293 }, { "epoch": 0.266850291639663, "grad_norm": 0.04434579610824585, "learning_rate": 0.00013341433778857838, "loss": 0.3837, "step": 3294 }, { "epoch": 0.26693130265716136, "grad_norm": 0.04281012713909149, "learning_rate": 0.0001334548400162009, "loss": 0.405, "step": 3295 }, { "epoch": 0.2670123136746598, "grad_norm": 0.04999249801039696, "learning_rate": 0.00013349534224382342, "loss": 0.3927, "step": 3296 }, { "epoch": 0.26709332469215813, "grad_norm": 0.03942561522126198, "learning_rate": 0.00013353584447144592, "loss": 0.3599, "step": 3297 }, { "epoch": 0.26717433570965654, "grad_norm": 0.04372606426477432, "learning_rate": 0.00013357634669906846, "loss": 0.3304, "step": 3298 }, { "epoch": 0.2672553467271549, "grad_norm": 0.03771752864122391, "learning_rate": 0.00013361684892669096, "loss": 0.3816, "step": 3299 }, { "epoch": 0.26733635774465325, "grad_norm": 0.047414857894182205, "learning_rate": 0.0001336573511543135, "loss": 0.3672, "step": 3300 }, { "epoch": 0.26741736876215166, "grad_norm": 0.04105841740965843, "learning_rate": 0.000133697853381936, "loss": 0.3384, "step": 3301 }, { "epoch": 0.26749837977965, "grad_norm": 0.03685571625828743, "learning_rate": 0.00013373835560955854, "loss": 0.3882, "step": 3302 }, { "epoch": 0.26757939079714843, "grad_norm": 0.05935465544462204, "learning_rate": 0.00013377885783718104, "loss": 0.4173, "step": 3303 }, { "epoch": 0.2676604018146468, "grad_norm": 0.03994208946824074, "learning_rate": 0.00013381936006480358, "loss": 0.3625, "step": 3304 }, { "epoch": 0.2677414128321452, "grad_norm": 0.04886750131845474, "learning_rate": 0.00013385986229242608, "loss": 0.4518, "step": 3305 }, { "epoch": 0.26782242384964355, "grad_norm": 0.03798381984233856, "learning_rate": 0.00013390036452004862, "loss": 0.367, "step": 3306 }, { "epoch": 0.2679034348671419, "grad_norm": 0.04732591658830643, "learning_rate": 0.00013394086674767112, "loss": 0.3887, "step": 3307 }, { "epoch": 0.2679844458846403, "grad_norm": 0.034767456352710724, "learning_rate": 0.00013398136897529365, "loss": 0.361, "step": 3308 }, { "epoch": 0.2680654569021387, "grad_norm": 0.036919280886650085, "learning_rate": 0.00013402187120291616, "loss": 0.3588, "step": 3309 }, { "epoch": 0.2681464679196371, "grad_norm": 0.04431672394275665, "learning_rate": 0.0001340623734305387, "loss": 0.3589, "step": 3310 }, { "epoch": 0.26822747893713544, "grad_norm": 0.04340912401676178, "learning_rate": 0.0001341028756581612, "loss": 0.343, "step": 3311 }, { "epoch": 0.26830848995463386, "grad_norm": 0.03599447011947632, "learning_rate": 0.00013414337788578373, "loss": 0.3526, "step": 3312 }, { "epoch": 0.2683895009721322, "grad_norm": 0.04156935214996338, "learning_rate": 0.00013418388011340624, "loss": 0.3797, "step": 3313 }, { "epoch": 0.26847051198963057, "grad_norm": 0.036646392196416855, "learning_rate": 0.00013422438234102877, "loss": 0.3596, "step": 3314 }, { "epoch": 0.268551523007129, "grad_norm": 0.043701015412807465, "learning_rate": 0.00013426488456865128, "loss": 0.4103, "step": 3315 }, { "epoch": 0.26863253402462733, "grad_norm": 0.04093540832400322, "learning_rate": 0.0001343053867962738, "loss": 0.3778, "step": 3316 }, { "epoch": 0.26871354504212575, "grad_norm": 0.043862100690603256, "learning_rate": 0.00013434588902389632, "loss": 0.3737, "step": 3317 }, { "epoch": 0.2687945560596241, "grad_norm": 0.04786309227347374, "learning_rate": 0.00013438639125151885, "loss": 0.3606, "step": 3318 }, { "epoch": 0.2688755670771225, "grad_norm": 0.03975916653871536, "learning_rate": 0.00013442689347914136, "loss": 0.3923, "step": 3319 }, { "epoch": 0.26895657809462087, "grad_norm": 0.040104031562805176, "learning_rate": 0.00013446739570676386, "loss": 0.3744, "step": 3320 }, { "epoch": 0.2690375891121192, "grad_norm": 0.045201487839221954, "learning_rate": 0.0001345078979343864, "loss": 0.3496, "step": 3321 }, { "epoch": 0.26911860012961764, "grad_norm": 0.042033299803733826, "learning_rate": 0.0001345484001620089, "loss": 0.3831, "step": 3322 }, { "epoch": 0.269199611147116, "grad_norm": 0.03977862000465393, "learning_rate": 0.00013458890238963144, "loss": 0.3882, "step": 3323 }, { "epoch": 0.2692806221646144, "grad_norm": 0.03551046550273895, "learning_rate": 0.00013462940461725394, "loss": 0.323, "step": 3324 }, { "epoch": 0.26936163318211276, "grad_norm": 0.03731973096728325, "learning_rate": 0.00013466990684487648, "loss": 0.3815, "step": 3325 }, { "epoch": 0.26944264419961117, "grad_norm": 0.04608595743775368, "learning_rate": 0.00013471040907249898, "loss": 0.4272, "step": 3326 }, { "epoch": 0.2695236552171095, "grad_norm": 0.04043601453304291, "learning_rate": 0.00013475091130012151, "loss": 0.4027, "step": 3327 }, { "epoch": 0.2696046662346079, "grad_norm": 0.03765019401907921, "learning_rate": 0.00013479141352774402, "loss": 0.4161, "step": 3328 }, { "epoch": 0.2696856772521063, "grad_norm": 0.04062202200293541, "learning_rate": 0.00013483191575536655, "loss": 0.3838, "step": 3329 }, { "epoch": 0.26976668826960465, "grad_norm": 0.04692335054278374, "learning_rate": 0.00013487241798298906, "loss": 0.3961, "step": 3330 }, { "epoch": 0.26984769928710306, "grad_norm": 0.03722019121050835, "learning_rate": 0.0001349129202106116, "loss": 0.3208, "step": 3331 }, { "epoch": 0.2699287103046014, "grad_norm": 0.03446126729249954, "learning_rate": 0.0001349534224382341, "loss": 0.3415, "step": 3332 }, { "epoch": 0.2700097213220998, "grad_norm": 0.039892058819532394, "learning_rate": 0.00013499392466585663, "loss": 0.3001, "step": 3333 }, { "epoch": 0.2700907323395982, "grad_norm": 0.04407581314444542, "learning_rate": 0.00013503442689347914, "loss": 0.3266, "step": 3334 }, { "epoch": 0.27017174335709654, "grad_norm": 0.042377736419439316, "learning_rate": 0.00013507492912110167, "loss": 0.3708, "step": 3335 }, { "epoch": 0.27025275437459495, "grad_norm": 0.036600060760974884, "learning_rate": 0.0001351154313487242, "loss": 0.3425, "step": 3336 }, { "epoch": 0.2703337653920933, "grad_norm": 0.04431897774338722, "learning_rate": 0.0001351559335763467, "loss": 0.3732, "step": 3337 }, { "epoch": 0.2704147764095917, "grad_norm": 0.04788152873516083, "learning_rate": 0.00013519643580396924, "loss": 0.382, "step": 3338 }, { "epoch": 0.2704957874270901, "grad_norm": 0.04281443729996681, "learning_rate": 0.00013523693803159175, "loss": 0.3487, "step": 3339 }, { "epoch": 0.2705767984445885, "grad_norm": 0.04157177358865738, "learning_rate": 0.00013527744025921428, "loss": 0.3783, "step": 3340 }, { "epoch": 0.27065780946208684, "grad_norm": 0.046408966183662415, "learning_rate": 0.0001353179424868368, "loss": 0.3998, "step": 3341 }, { "epoch": 0.2707388204795852, "grad_norm": 0.04052488133311272, "learning_rate": 0.0001353584447144593, "loss": 0.4103, "step": 3342 }, { "epoch": 0.2708198314970836, "grad_norm": 0.041967444121837616, "learning_rate": 0.00013539894694208183, "loss": 0.3815, "step": 3343 }, { "epoch": 0.27090084251458196, "grad_norm": 0.041668497025966644, "learning_rate": 0.00013543944916970434, "loss": 0.3921, "step": 3344 }, { "epoch": 0.2709818535320804, "grad_norm": 0.042255863547325134, "learning_rate": 0.00013547995139732684, "loss": 0.3733, "step": 3345 }, { "epoch": 0.27106286454957873, "grad_norm": 0.052985310554504395, "learning_rate": 0.00013552045362494937, "loss": 0.3898, "step": 3346 }, { "epoch": 0.27114387556707714, "grad_norm": 0.03639228269457817, "learning_rate": 0.00013556095585257188, "loss": 0.3232, "step": 3347 }, { "epoch": 0.2712248865845755, "grad_norm": 0.041970837861299515, "learning_rate": 0.00013560145808019441, "loss": 0.3487, "step": 3348 }, { "epoch": 0.2713058976020739, "grad_norm": 0.041541244834661484, "learning_rate": 0.00013564196030781692, "loss": 0.3511, "step": 3349 }, { "epoch": 0.27138690861957226, "grad_norm": 0.03761943429708481, "learning_rate": 0.00013568246253543945, "loss": 0.3305, "step": 3350 }, { "epoch": 0.2714679196370706, "grad_norm": 0.03963864594697952, "learning_rate": 0.00013572296476306196, "loss": 0.3451, "step": 3351 }, { "epoch": 0.27154893065456903, "grad_norm": 0.040368348360061646, "learning_rate": 0.0001357634669906845, "loss": 0.371, "step": 3352 }, { "epoch": 0.2716299416720674, "grad_norm": 0.038501329720020294, "learning_rate": 0.000135803969218307, "loss": 0.4093, "step": 3353 }, { "epoch": 0.2717109526895658, "grad_norm": 0.039939314126968384, "learning_rate": 0.00013584447144592953, "loss": 0.3919, "step": 3354 }, { "epoch": 0.27179196370706415, "grad_norm": 0.04205890744924545, "learning_rate": 0.00013588497367355207, "loss": 0.3552, "step": 3355 }, { "epoch": 0.27187297472456257, "grad_norm": 0.04411284998059273, "learning_rate": 0.00013592547590117457, "loss": 0.3334, "step": 3356 }, { "epoch": 0.2719539857420609, "grad_norm": 0.05139146000146866, "learning_rate": 0.0001359659781287971, "loss": 0.324, "step": 3357 }, { "epoch": 0.2720349967595593, "grad_norm": 0.04077056795358658, "learning_rate": 0.0001360064803564196, "loss": 0.3724, "step": 3358 }, { "epoch": 0.2721160077770577, "grad_norm": 0.04681459814310074, "learning_rate": 0.00013604698258404214, "loss": 0.372, "step": 3359 }, { "epoch": 0.27219701879455604, "grad_norm": 0.0463474839925766, "learning_rate": 0.00013608748481166465, "loss": 0.3591, "step": 3360 }, { "epoch": 0.27227802981205446, "grad_norm": 0.03969384729862213, "learning_rate": 0.00013612798703928718, "loss": 0.3452, "step": 3361 }, { "epoch": 0.2723590408295528, "grad_norm": 0.03769201785326004, "learning_rate": 0.0001361684892669097, "loss": 0.4277, "step": 3362 }, { "epoch": 0.2724400518470512, "grad_norm": 0.039927735924720764, "learning_rate": 0.00013620899149453222, "loss": 0.3531, "step": 3363 }, { "epoch": 0.2725210628645496, "grad_norm": 0.05112633854150772, "learning_rate": 0.00013624949372215473, "loss": 0.3901, "step": 3364 }, { "epoch": 0.27260207388204793, "grad_norm": 0.049192748963832855, "learning_rate": 0.00013628999594977726, "loss": 0.4118, "step": 3365 }, { "epoch": 0.27268308489954635, "grad_norm": 0.04921245574951172, "learning_rate": 0.00013633049817739977, "loss": 0.4416, "step": 3366 }, { "epoch": 0.2727640959170447, "grad_norm": 0.034451164305210114, "learning_rate": 0.00013637100040502227, "loss": 0.4156, "step": 3367 }, { "epoch": 0.2728451069345431, "grad_norm": 0.03713851794600487, "learning_rate": 0.0001364115026326448, "loss": 0.3796, "step": 3368 }, { "epoch": 0.27292611795204147, "grad_norm": 0.04367047920823097, "learning_rate": 0.0001364520048602673, "loss": 0.3143, "step": 3369 }, { "epoch": 0.2730071289695399, "grad_norm": 0.039611510932445526, "learning_rate": 0.00013649250708788982, "loss": 0.3549, "step": 3370 }, { "epoch": 0.27308813998703824, "grad_norm": 0.043952010571956635, "learning_rate": 0.00013653300931551235, "loss": 0.4153, "step": 3371 }, { "epoch": 0.2731691510045366, "grad_norm": 0.04876040667295456, "learning_rate": 0.00013657351154313486, "loss": 0.3853, "step": 3372 }, { "epoch": 0.273250162022035, "grad_norm": 0.04519908130168915, "learning_rate": 0.0001366140137707574, "loss": 0.3651, "step": 3373 }, { "epoch": 0.27333117303953336, "grad_norm": 0.050599344074726105, "learning_rate": 0.00013665451599837993, "loss": 0.3816, "step": 3374 }, { "epoch": 0.27341218405703177, "grad_norm": 0.04185096547007561, "learning_rate": 0.00013669501822600243, "loss": 0.3821, "step": 3375 }, { "epoch": 0.2734931950745301, "grad_norm": 0.0364995002746582, "learning_rate": 0.00013673552045362496, "loss": 0.3356, "step": 3376 }, { "epoch": 0.27357420609202854, "grad_norm": 0.044956009835004807, "learning_rate": 0.00013677602268124747, "loss": 0.3437, "step": 3377 }, { "epoch": 0.2736552171095269, "grad_norm": 0.04424003139138222, "learning_rate": 0.00013681652490887, "loss": 0.3639, "step": 3378 }, { "epoch": 0.27373622812702525, "grad_norm": 0.042500704526901245, "learning_rate": 0.0001368570271364925, "loss": 0.3316, "step": 3379 }, { "epoch": 0.27381723914452366, "grad_norm": 0.03931257873773575, "learning_rate": 0.00013689752936411504, "loss": 0.3817, "step": 3380 }, { "epoch": 0.273898250162022, "grad_norm": 0.0466313473880291, "learning_rate": 0.00013693803159173755, "loss": 0.3709, "step": 3381 }, { "epoch": 0.27397926117952043, "grad_norm": 0.04171553999185562, "learning_rate": 0.00013697853381936008, "loss": 0.3918, "step": 3382 }, { "epoch": 0.2740602721970188, "grad_norm": 0.04538581892848015, "learning_rate": 0.0001370190360469826, "loss": 0.3781, "step": 3383 }, { "epoch": 0.2741412832145172, "grad_norm": 0.03303420916199684, "learning_rate": 0.00013705953827460512, "loss": 0.3072, "step": 3384 }, { "epoch": 0.27422229423201555, "grad_norm": 0.03682176396250725, "learning_rate": 0.00013710004050222763, "loss": 0.4019, "step": 3385 }, { "epoch": 0.2743033052495139, "grad_norm": 0.040339164435863495, "learning_rate": 0.00013714054272985016, "loss": 0.4232, "step": 3386 }, { "epoch": 0.2743843162670123, "grad_norm": 0.053271178156137466, "learning_rate": 0.00013718104495747267, "loss": 0.3692, "step": 3387 }, { "epoch": 0.2744653272845107, "grad_norm": 0.045034825801849365, "learning_rate": 0.0001372215471850952, "loss": 0.4105, "step": 3388 }, { "epoch": 0.2745463383020091, "grad_norm": 0.04269041493535042, "learning_rate": 0.0001372620494127177, "loss": 0.38, "step": 3389 }, { "epoch": 0.27462734931950744, "grad_norm": 0.0487322062253952, "learning_rate": 0.00013730255164034024, "loss": 0.4426, "step": 3390 }, { "epoch": 0.27470836033700585, "grad_norm": 0.041726551949977875, "learning_rate": 0.00013734305386796275, "loss": 0.3755, "step": 3391 }, { "epoch": 0.2747893713545042, "grad_norm": 0.04072955250740051, "learning_rate": 0.00013738355609558525, "loss": 0.3748, "step": 3392 }, { "epoch": 0.2748703823720026, "grad_norm": 0.04266681149601936, "learning_rate": 0.00013742405832320779, "loss": 0.349, "step": 3393 }, { "epoch": 0.274951393389501, "grad_norm": 0.05282905325293541, "learning_rate": 0.0001374645605508303, "loss": 0.3642, "step": 3394 }, { "epoch": 0.27503240440699933, "grad_norm": 0.04154253751039505, "learning_rate": 0.00013750506277845282, "loss": 0.4091, "step": 3395 }, { "epoch": 0.27511341542449774, "grad_norm": 0.03817356377840042, "learning_rate": 0.00013754556500607533, "loss": 0.3445, "step": 3396 }, { "epoch": 0.2751944264419961, "grad_norm": 0.04182487726211548, "learning_rate": 0.00013758606723369786, "loss": 0.3667, "step": 3397 }, { "epoch": 0.2752754374594945, "grad_norm": 0.042500101029872894, "learning_rate": 0.00013762656946132037, "loss": 0.3824, "step": 3398 }, { "epoch": 0.27535644847699287, "grad_norm": 0.03695574402809143, "learning_rate": 0.0001376670716889429, "loss": 0.3493, "step": 3399 }, { "epoch": 0.2754374594944913, "grad_norm": 0.049295760691165924, "learning_rate": 0.0001377075739165654, "loss": 0.3522, "step": 3400 }, { "epoch": 0.27551847051198963, "grad_norm": 0.04231920465826988, "learning_rate": 0.00013774807614418794, "loss": 0.3436, "step": 3401 }, { "epoch": 0.275599481529488, "grad_norm": 0.040712591260671616, "learning_rate": 0.00013778857837181045, "loss": 0.395, "step": 3402 }, { "epoch": 0.2756804925469864, "grad_norm": 0.034782666712999344, "learning_rate": 0.00013782908059943298, "loss": 0.3343, "step": 3403 }, { "epoch": 0.27576150356448476, "grad_norm": 0.03873579949140549, "learning_rate": 0.0001378695828270555, "loss": 0.3334, "step": 3404 }, { "epoch": 0.27584251458198317, "grad_norm": 0.03855769336223602, "learning_rate": 0.00013791008505467802, "loss": 0.3417, "step": 3405 }, { "epoch": 0.2759235255994815, "grad_norm": 0.042153116315603256, "learning_rate": 0.00013795058728230053, "loss": 0.4225, "step": 3406 }, { "epoch": 0.27600453661697993, "grad_norm": 0.04109833016991615, "learning_rate": 0.00013799108950992306, "loss": 0.3462, "step": 3407 }, { "epoch": 0.2760855476344783, "grad_norm": 0.03828966245055199, "learning_rate": 0.00013803159173754557, "loss": 0.367, "step": 3408 }, { "epoch": 0.27616655865197665, "grad_norm": 0.05456683784723282, "learning_rate": 0.0001380720939651681, "loss": 0.3989, "step": 3409 }, { "epoch": 0.27624756966947506, "grad_norm": 0.04691299423575401, "learning_rate": 0.0001381125961927906, "loss": 0.3572, "step": 3410 }, { "epoch": 0.2763285806869734, "grad_norm": 0.046030715107917786, "learning_rate": 0.00013815309842041314, "loss": 0.3414, "step": 3411 }, { "epoch": 0.2764095917044718, "grad_norm": 0.03669194132089615, "learning_rate": 0.00013819360064803567, "loss": 0.3463, "step": 3412 }, { "epoch": 0.2764906027219702, "grad_norm": 0.03543637692928314, "learning_rate": 0.00013823410287565818, "loss": 0.3356, "step": 3413 }, { "epoch": 0.2765716137394686, "grad_norm": 0.051583707332611084, "learning_rate": 0.00013827460510328069, "loss": 0.3421, "step": 3414 }, { "epoch": 0.27665262475696695, "grad_norm": 0.039491068571805954, "learning_rate": 0.0001383151073309032, "loss": 0.3587, "step": 3415 }, { "epoch": 0.2767336357744653, "grad_norm": 0.0397016741335392, "learning_rate": 0.00013835560955852572, "loss": 0.3307, "step": 3416 }, { "epoch": 0.2768146467919637, "grad_norm": 0.04070290923118591, "learning_rate": 0.00013839611178614823, "loss": 0.3835, "step": 3417 }, { "epoch": 0.27689565780946207, "grad_norm": 0.04390476271510124, "learning_rate": 0.00013843661401377076, "loss": 0.3459, "step": 3418 }, { "epoch": 0.2769766688269605, "grad_norm": 0.04806474596261978, "learning_rate": 0.00013847711624139327, "loss": 0.3261, "step": 3419 }, { "epoch": 0.27705767984445884, "grad_norm": 0.03897551819682121, "learning_rate": 0.0001385176184690158, "loss": 0.3872, "step": 3420 }, { "epoch": 0.27713869086195725, "grad_norm": 0.035828590393066406, "learning_rate": 0.0001385581206966383, "loss": 0.4061, "step": 3421 }, { "epoch": 0.2772197018794556, "grad_norm": 0.03442465886473656, "learning_rate": 0.00013859862292426084, "loss": 0.3272, "step": 3422 }, { "epoch": 0.27730071289695396, "grad_norm": 0.04492630809545517, "learning_rate": 0.00013863912515188335, "loss": 0.3583, "step": 3423 }, { "epoch": 0.27738172391445237, "grad_norm": 0.04906081035733223, "learning_rate": 0.00013867962737950588, "loss": 0.4207, "step": 3424 }, { "epoch": 0.2774627349319507, "grad_norm": 0.04144575819373131, "learning_rate": 0.0001387201296071284, "loss": 0.4113, "step": 3425 }, { "epoch": 0.27754374594944914, "grad_norm": 0.03677285090088844, "learning_rate": 0.00013876063183475092, "loss": 0.3383, "step": 3426 }, { "epoch": 0.2776247569669475, "grad_norm": 0.0371638722717762, "learning_rate": 0.00013880113406237343, "loss": 0.3519, "step": 3427 }, { "epoch": 0.2777057679844459, "grad_norm": 0.04500902071595192, "learning_rate": 0.00013884163628999596, "loss": 0.3751, "step": 3428 }, { "epoch": 0.27778677900194426, "grad_norm": 0.03653738275170326, "learning_rate": 0.00013888213851761847, "loss": 0.3316, "step": 3429 }, { "epoch": 0.2778677900194426, "grad_norm": 0.047400642186403275, "learning_rate": 0.000138922640745241, "loss": 0.4025, "step": 3430 }, { "epoch": 0.27794880103694103, "grad_norm": 0.043980009853839874, "learning_rate": 0.00013896314297286353, "loss": 0.3988, "step": 3431 }, { "epoch": 0.2780298120544394, "grad_norm": 0.03693951293826103, "learning_rate": 0.00013900364520048604, "loss": 0.3071, "step": 3432 }, { "epoch": 0.2781108230719378, "grad_norm": 0.045300330966711044, "learning_rate": 0.00013904414742810857, "loss": 0.4204, "step": 3433 }, { "epoch": 0.27819183408943615, "grad_norm": 0.04068627208471298, "learning_rate": 0.00013908464965573108, "loss": 0.3378, "step": 3434 }, { "epoch": 0.27827284510693456, "grad_norm": 0.036370884627103806, "learning_rate": 0.0001391251518833536, "loss": 0.3359, "step": 3435 }, { "epoch": 0.2783538561244329, "grad_norm": 0.039670251309871674, "learning_rate": 0.00013916565411097612, "loss": 0.3862, "step": 3436 }, { "epoch": 0.27843486714193133, "grad_norm": 0.04585297778248787, "learning_rate": 0.00013920615633859862, "loss": 0.4285, "step": 3437 }, { "epoch": 0.2785158781594297, "grad_norm": 0.048730622977018356, "learning_rate": 0.00013924665856622116, "loss": 0.4015, "step": 3438 }, { "epoch": 0.27859688917692804, "grad_norm": 0.05057798698544502, "learning_rate": 0.00013928716079384366, "loss": 0.3831, "step": 3439 }, { "epoch": 0.27867790019442645, "grad_norm": 0.03900301083922386, "learning_rate": 0.00013932766302146617, "loss": 0.3171, "step": 3440 }, { "epoch": 0.2787589112119248, "grad_norm": 0.03645486384630203, "learning_rate": 0.0001393681652490887, "loss": 0.3732, "step": 3441 }, { "epoch": 0.2788399222294232, "grad_norm": 0.038289736956357956, "learning_rate": 0.0001394086674767112, "loss": 0.3929, "step": 3442 }, { "epoch": 0.2789209332469216, "grad_norm": 0.05204898864030838, "learning_rate": 0.00013944916970433374, "loss": 0.4195, "step": 3443 }, { "epoch": 0.27900194426442, "grad_norm": 0.03858143091201782, "learning_rate": 0.00013948967193195625, "loss": 0.3292, "step": 3444 }, { "epoch": 0.27908295528191834, "grad_norm": 0.04965558275580406, "learning_rate": 0.00013953017415957878, "loss": 0.3984, "step": 3445 }, { "epoch": 0.2791639662994167, "grad_norm": 0.03543282300233841, "learning_rate": 0.0001395706763872013, "loss": 0.4242, "step": 3446 }, { "epoch": 0.2792449773169151, "grad_norm": 0.038583435118198395, "learning_rate": 0.00013961117861482382, "loss": 0.3807, "step": 3447 }, { "epoch": 0.27932598833441347, "grad_norm": 0.034903813153505325, "learning_rate": 0.00013965168084244633, "loss": 0.3902, "step": 3448 }, { "epoch": 0.2794069993519119, "grad_norm": 0.047700364142656326, "learning_rate": 0.00013969218307006886, "loss": 0.373, "step": 3449 }, { "epoch": 0.27948801036941023, "grad_norm": 0.04197010025382042, "learning_rate": 0.0001397326852976914, "loss": 0.3786, "step": 3450 }, { "epoch": 0.27956902138690864, "grad_norm": 0.041849054396152496, "learning_rate": 0.0001397731875253139, "loss": 0.4107, "step": 3451 }, { "epoch": 0.279650032404407, "grad_norm": 0.04604052007198334, "learning_rate": 0.00013981368975293643, "loss": 0.3422, "step": 3452 }, { "epoch": 0.27973104342190536, "grad_norm": 0.04134983569383621, "learning_rate": 0.00013985419198055894, "loss": 0.3852, "step": 3453 }, { "epoch": 0.27981205443940377, "grad_norm": 0.043265946209430695, "learning_rate": 0.00013989469420818147, "loss": 0.3609, "step": 3454 }, { "epoch": 0.2798930654569021, "grad_norm": 0.037968236953020096, "learning_rate": 0.00013993519643580398, "loss": 0.3342, "step": 3455 }, { "epoch": 0.27997407647440054, "grad_norm": 0.04364189878106117, "learning_rate": 0.0001399756986634265, "loss": 0.3908, "step": 3456 }, { "epoch": 0.2800550874918989, "grad_norm": 0.04440903291106224, "learning_rate": 0.00014001620089104902, "loss": 0.4043, "step": 3457 }, { "epoch": 0.2801360985093973, "grad_norm": 0.04480652138590813, "learning_rate": 0.00014005670311867155, "loss": 0.388, "step": 3458 }, { "epoch": 0.28021710952689566, "grad_norm": 0.03758074343204498, "learning_rate": 0.00014009720534629406, "loss": 0.3902, "step": 3459 }, { "epoch": 0.280298120544394, "grad_norm": 0.0437944270670414, "learning_rate": 0.0001401377075739166, "loss": 0.3714, "step": 3460 }, { "epoch": 0.2803791315618924, "grad_norm": 0.039681438356637955, "learning_rate": 0.0001401782098015391, "loss": 0.3775, "step": 3461 }, { "epoch": 0.2804601425793908, "grad_norm": 0.04409261420369148, "learning_rate": 0.0001402187120291616, "loss": 0.3569, "step": 3462 }, { "epoch": 0.2805411535968892, "grad_norm": 0.03767713904380798, "learning_rate": 0.0001402592142567841, "loss": 0.3602, "step": 3463 }, { "epoch": 0.28062216461438755, "grad_norm": 0.04003945365548134, "learning_rate": 0.00014029971648440664, "loss": 0.3751, "step": 3464 }, { "epoch": 0.28070317563188596, "grad_norm": 0.0402999185025692, "learning_rate": 0.00014034021871202915, "loss": 0.422, "step": 3465 }, { "epoch": 0.2807841866493843, "grad_norm": 0.04214511439204216, "learning_rate": 0.00014038072093965168, "loss": 0.3954, "step": 3466 }, { "epoch": 0.28086519766688267, "grad_norm": 0.03766317665576935, "learning_rate": 0.0001404212231672742, "loss": 0.3376, "step": 3467 }, { "epoch": 0.2809462086843811, "grad_norm": 0.044028155505657196, "learning_rate": 0.00014046172539489672, "loss": 0.3901, "step": 3468 }, { "epoch": 0.28102721970187944, "grad_norm": 0.04517464339733124, "learning_rate": 0.00014050222762251925, "loss": 0.3351, "step": 3469 }, { "epoch": 0.28110823071937785, "grad_norm": 0.044333942234516144, "learning_rate": 0.00014054272985014176, "loss": 0.3802, "step": 3470 }, { "epoch": 0.2811892417368762, "grad_norm": 0.04904649406671524, "learning_rate": 0.0001405832320777643, "loss": 0.3873, "step": 3471 }, { "epoch": 0.2812702527543746, "grad_norm": 0.03742432966828346, "learning_rate": 0.0001406237343053868, "loss": 0.4129, "step": 3472 }, { "epoch": 0.281351263771873, "grad_norm": 0.037451375275850296, "learning_rate": 0.00014066423653300933, "loss": 0.3418, "step": 3473 }, { "epoch": 0.28143227478937133, "grad_norm": 0.036673497408628464, "learning_rate": 0.00014070473876063184, "loss": 0.3392, "step": 3474 }, { "epoch": 0.28151328580686974, "grad_norm": 0.035846445709466934, "learning_rate": 0.00014074524098825437, "loss": 0.3608, "step": 3475 }, { "epoch": 0.2815942968243681, "grad_norm": 0.04174364358186722, "learning_rate": 0.00014078574321587688, "loss": 0.344, "step": 3476 }, { "epoch": 0.2816753078418665, "grad_norm": 0.03926561772823334, "learning_rate": 0.0001408262454434994, "loss": 0.3958, "step": 3477 }, { "epoch": 0.28175631885936486, "grad_norm": 0.044232893735170364, "learning_rate": 0.00014086674767112192, "loss": 0.3692, "step": 3478 }, { "epoch": 0.2818373298768633, "grad_norm": 0.03457103297114372, "learning_rate": 0.00014090724989874445, "loss": 0.3502, "step": 3479 }, { "epoch": 0.28191834089436163, "grad_norm": 0.03710518032312393, "learning_rate": 0.00014094775212636696, "loss": 0.4032, "step": 3480 }, { "epoch": 0.28199935191186, "grad_norm": 0.03896922618150711, "learning_rate": 0.0001409882543539895, "loss": 0.3837, "step": 3481 }, { "epoch": 0.2820803629293584, "grad_norm": 0.03973960876464844, "learning_rate": 0.000141028756581612, "loss": 0.378, "step": 3482 }, { "epoch": 0.28216137394685675, "grad_norm": 0.03989710286259651, "learning_rate": 0.00014106925880923453, "loss": 0.3804, "step": 3483 }, { "epoch": 0.28224238496435516, "grad_norm": 0.035204801708459854, "learning_rate": 0.00014110976103685703, "loss": 0.378, "step": 3484 }, { "epoch": 0.2823233959818535, "grad_norm": 0.052764181047677994, "learning_rate": 0.00014115026326447954, "loss": 0.4182, "step": 3485 }, { "epoch": 0.28240440699935193, "grad_norm": 0.03714631870388985, "learning_rate": 0.00014119076549210207, "loss": 0.3522, "step": 3486 }, { "epoch": 0.2824854180168503, "grad_norm": 0.03593859076499939, "learning_rate": 0.00014123126771972458, "loss": 0.3286, "step": 3487 }, { "epoch": 0.2825664290343487, "grad_norm": 0.040704239159822464, "learning_rate": 0.0001412717699473471, "loss": 0.3842, "step": 3488 }, { "epoch": 0.28264744005184705, "grad_norm": 0.03508979082107544, "learning_rate": 0.00014131227217496962, "loss": 0.3497, "step": 3489 }, { "epoch": 0.2827284510693454, "grad_norm": 0.03818688914179802, "learning_rate": 0.00014135277440259215, "loss": 0.4074, "step": 3490 }, { "epoch": 0.2828094620868438, "grad_norm": 0.048654261976480484, "learning_rate": 0.00014139327663021466, "loss": 0.3559, "step": 3491 }, { "epoch": 0.2828904731043422, "grad_norm": 0.03858116269111633, "learning_rate": 0.0001414337788578372, "loss": 0.3417, "step": 3492 }, { "epoch": 0.2829714841218406, "grad_norm": 0.04757966473698616, "learning_rate": 0.0001414742810854597, "loss": 0.391, "step": 3493 }, { "epoch": 0.28305249513933894, "grad_norm": 0.04076710715889931, "learning_rate": 0.00014151478331308223, "loss": 0.3532, "step": 3494 }, { "epoch": 0.28313350615683736, "grad_norm": 0.046661194413900375, "learning_rate": 0.00014155528554070474, "loss": 0.3996, "step": 3495 }, { "epoch": 0.2832145171743357, "grad_norm": 0.04634448140859604, "learning_rate": 0.00014159578776832727, "loss": 0.403, "step": 3496 }, { "epoch": 0.28329552819183407, "grad_norm": 0.044653963297605515, "learning_rate": 0.00014163628999594978, "loss": 0.3725, "step": 3497 }, { "epoch": 0.2833765392093325, "grad_norm": 0.037522487342357635, "learning_rate": 0.0001416767922235723, "loss": 0.3723, "step": 3498 }, { "epoch": 0.28345755022683083, "grad_norm": 0.04664743319153786, "learning_rate": 0.00014171729445119482, "loss": 0.4247, "step": 3499 }, { "epoch": 0.28353856124432925, "grad_norm": 0.04447102174162865, "learning_rate": 0.00014175779667881735, "loss": 0.3465, "step": 3500 }, { "epoch": 0.2836195722618276, "grad_norm": 0.04960550740361214, "learning_rate": 0.00014179829890643986, "loss": 0.3827, "step": 3501 }, { "epoch": 0.283700583279326, "grad_norm": 0.046506986021995544, "learning_rate": 0.0001418388011340624, "loss": 0.4013, "step": 3502 }, { "epoch": 0.28378159429682437, "grad_norm": 0.04039178788661957, "learning_rate": 0.0001418793033616849, "loss": 0.4076, "step": 3503 }, { "epoch": 0.2838626053143227, "grad_norm": 0.03963049128651619, "learning_rate": 0.00014191980558930743, "loss": 0.3529, "step": 3504 }, { "epoch": 0.28394361633182114, "grad_norm": 0.04509516805410385, "learning_rate": 0.00014196030781692993, "loss": 0.3771, "step": 3505 }, { "epoch": 0.2840246273493195, "grad_norm": 0.04358714073896408, "learning_rate": 0.00014200081004455247, "loss": 0.3809, "step": 3506 }, { "epoch": 0.2841056383668179, "grad_norm": 0.036663755774497986, "learning_rate": 0.00014204131227217497, "loss": 0.3424, "step": 3507 }, { "epoch": 0.28418664938431626, "grad_norm": 0.03686859831213951, "learning_rate": 0.0001420818144997975, "loss": 0.3423, "step": 3508 }, { "epoch": 0.28426766040181467, "grad_norm": 0.03790473937988281, "learning_rate": 0.00014212231672742, "loss": 0.3514, "step": 3509 }, { "epoch": 0.284348671419313, "grad_norm": 0.043648287653923035, "learning_rate": 0.00014216281895504252, "loss": 0.3904, "step": 3510 }, { "epoch": 0.2844296824368114, "grad_norm": 0.03174104541540146, "learning_rate": 0.00014220332118266505, "loss": 0.3637, "step": 3511 }, { "epoch": 0.2845106934543098, "grad_norm": 0.03682335466146469, "learning_rate": 0.00014224382341028756, "loss": 0.375, "step": 3512 }, { "epoch": 0.28459170447180815, "grad_norm": 0.0405367873609066, "learning_rate": 0.0001422843256379101, "loss": 0.3642, "step": 3513 }, { "epoch": 0.28467271548930656, "grad_norm": 0.03693538159132004, "learning_rate": 0.0001423248278655326, "loss": 0.3858, "step": 3514 }, { "epoch": 0.2847537265068049, "grad_norm": 0.04037932679057121, "learning_rate": 0.00014236533009315513, "loss": 0.3701, "step": 3515 }, { "epoch": 0.2848347375243033, "grad_norm": 0.038718000054359436, "learning_rate": 0.00014240583232077764, "loss": 0.3506, "step": 3516 }, { "epoch": 0.2849157485418017, "grad_norm": 0.04396659508347511, "learning_rate": 0.00014244633454840017, "loss": 0.4199, "step": 3517 }, { "epoch": 0.28499675955930004, "grad_norm": 0.035912420600652695, "learning_rate": 0.00014248683677602268, "loss": 0.3609, "step": 3518 }, { "epoch": 0.28507777057679845, "grad_norm": 0.052159637212753296, "learning_rate": 0.0001425273390036452, "loss": 0.3734, "step": 3519 }, { "epoch": 0.2851587815942968, "grad_norm": 0.04180484637618065, "learning_rate": 0.00014256784123126772, "loss": 0.3505, "step": 3520 }, { "epoch": 0.2852397926117952, "grad_norm": 0.03229363262653351, "learning_rate": 0.00014260834345889025, "loss": 0.3691, "step": 3521 }, { "epoch": 0.2853208036292936, "grad_norm": 0.04348330572247505, "learning_rate": 0.00014264884568651276, "loss": 0.3871, "step": 3522 }, { "epoch": 0.285401814646792, "grad_norm": 0.04351454973220825, "learning_rate": 0.0001426893479141353, "loss": 0.3836, "step": 3523 }, { "epoch": 0.28548282566429034, "grad_norm": 0.04140999913215637, "learning_rate": 0.0001427298501417578, "loss": 0.4107, "step": 3524 }, { "epoch": 0.2855638366817887, "grad_norm": 0.04897570237517357, "learning_rate": 0.00014277035236938033, "loss": 0.4028, "step": 3525 }, { "epoch": 0.2856448476992871, "grad_norm": 0.03690803796052933, "learning_rate": 0.00014281085459700286, "loss": 0.3223, "step": 3526 }, { "epoch": 0.28572585871678546, "grad_norm": 0.05218454450368881, "learning_rate": 0.00014285135682462537, "loss": 0.398, "step": 3527 }, { "epoch": 0.2858068697342839, "grad_norm": 0.04127073287963867, "learning_rate": 0.0001428918590522479, "loss": 0.4329, "step": 3528 }, { "epoch": 0.28588788075178223, "grad_norm": 0.03908568620681763, "learning_rate": 0.0001429323612798704, "loss": 0.3761, "step": 3529 }, { "epoch": 0.28596889176928064, "grad_norm": 0.03365986794233322, "learning_rate": 0.00014297286350749294, "loss": 0.3516, "step": 3530 }, { "epoch": 0.286049902786779, "grad_norm": 0.03684350103139877, "learning_rate": 0.00014301336573511545, "loss": 0.3762, "step": 3531 }, { "epoch": 0.2861309138042774, "grad_norm": 0.03770121559500694, "learning_rate": 0.00014305386796273795, "loss": 0.3836, "step": 3532 }, { "epoch": 0.28621192482177576, "grad_norm": 0.03829558193683624, "learning_rate": 0.00014309437019036048, "loss": 0.3485, "step": 3533 }, { "epoch": 0.2862929358392741, "grad_norm": 0.052036408334970474, "learning_rate": 0.000143134872417983, "loss": 0.3988, "step": 3534 }, { "epoch": 0.28637394685677253, "grad_norm": 0.048078227788209915, "learning_rate": 0.0001431753746456055, "loss": 0.3805, "step": 3535 }, { "epoch": 0.2864549578742709, "grad_norm": 0.043900419026613235, "learning_rate": 0.00014321587687322803, "loss": 0.4089, "step": 3536 }, { "epoch": 0.2865359688917693, "grad_norm": 0.03973248600959778, "learning_rate": 0.00014325637910085054, "loss": 0.3965, "step": 3537 }, { "epoch": 0.28661697990926766, "grad_norm": 0.04060791805386543, "learning_rate": 0.00014329688132847307, "loss": 0.3785, "step": 3538 }, { "epoch": 0.28669799092676607, "grad_norm": 0.035248950123786926, "learning_rate": 0.00014333738355609558, "loss": 0.3761, "step": 3539 }, { "epoch": 0.2867790019442644, "grad_norm": 0.03733890876173973, "learning_rate": 0.0001433778857837181, "loss": 0.3458, "step": 3540 }, { "epoch": 0.2868600129617628, "grad_norm": 0.04270927235484123, "learning_rate": 0.00014341838801134062, "loss": 0.3731, "step": 3541 }, { "epoch": 0.2869410239792612, "grad_norm": 0.0395025797188282, "learning_rate": 0.00014345889023896315, "loss": 0.3688, "step": 3542 }, { "epoch": 0.28702203499675955, "grad_norm": 0.03573182225227356, "learning_rate": 0.00014349939246658565, "loss": 0.3082, "step": 3543 }, { "epoch": 0.28710304601425796, "grad_norm": 0.03845391795039177, "learning_rate": 0.0001435398946942082, "loss": 0.3705, "step": 3544 }, { "epoch": 0.2871840570317563, "grad_norm": 0.03716522082686424, "learning_rate": 0.00014358039692183072, "loss": 0.3207, "step": 3545 }, { "epoch": 0.2872650680492547, "grad_norm": 0.046315036714076996, "learning_rate": 0.00014362089914945323, "loss": 0.3454, "step": 3546 }, { "epoch": 0.2873460790667531, "grad_norm": 0.033960383385419846, "learning_rate": 0.00014366140137707576, "loss": 0.3123, "step": 3547 }, { "epoch": 0.28742709008425144, "grad_norm": 0.04001070186495781, "learning_rate": 0.00014370190360469827, "loss": 0.4092, "step": 3548 }, { "epoch": 0.28750810110174985, "grad_norm": 0.04841066151857376, "learning_rate": 0.0001437424058323208, "loss": 0.3717, "step": 3549 }, { "epoch": 0.2875891121192482, "grad_norm": 0.04550325497984886, "learning_rate": 0.0001437829080599433, "loss": 0.3799, "step": 3550 }, { "epoch": 0.2876701231367466, "grad_norm": 0.04212433844804764, "learning_rate": 0.00014382341028756584, "loss": 0.4029, "step": 3551 }, { "epoch": 0.28775113415424497, "grad_norm": 0.046040620654821396, "learning_rate": 0.00014386391251518835, "loss": 0.4278, "step": 3552 }, { "epoch": 0.2878321451717434, "grad_norm": 0.0427493192255497, "learning_rate": 0.00014390441474281088, "loss": 0.3561, "step": 3553 }, { "epoch": 0.28791315618924174, "grad_norm": 0.036486729979515076, "learning_rate": 0.00014394491697043338, "loss": 0.3879, "step": 3554 }, { "epoch": 0.2879941672067401, "grad_norm": 0.03568176552653313, "learning_rate": 0.00014398541919805592, "loss": 0.4286, "step": 3555 }, { "epoch": 0.2880751782242385, "grad_norm": 0.04736393690109253, "learning_rate": 0.00014402592142567842, "loss": 0.4214, "step": 3556 }, { "epoch": 0.28815618924173686, "grad_norm": 0.04134480282664299, "learning_rate": 0.00014406642365330093, "loss": 0.3721, "step": 3557 }, { "epoch": 0.28823720025923527, "grad_norm": 0.03446532040834427, "learning_rate": 0.00014410692588092344, "loss": 0.3385, "step": 3558 }, { "epoch": 0.2883182112767336, "grad_norm": 0.03532950207591057, "learning_rate": 0.00014414742810854597, "loss": 0.371, "step": 3559 }, { "epoch": 0.28839922229423204, "grad_norm": 0.04255358502268791, "learning_rate": 0.00014418793033616848, "loss": 0.424, "step": 3560 }, { "epoch": 0.2884802333117304, "grad_norm": 0.035087406635284424, "learning_rate": 0.000144228432563791, "loss": 0.3308, "step": 3561 }, { "epoch": 0.28856124432922875, "grad_norm": 0.049905095249414444, "learning_rate": 0.00014426893479141351, "loss": 0.3809, "step": 3562 }, { "epoch": 0.28864225534672716, "grad_norm": 0.040026336908340454, "learning_rate": 0.00014430943701903605, "loss": 0.3896, "step": 3563 }, { "epoch": 0.2887232663642255, "grad_norm": 0.03831109777092934, "learning_rate": 0.00014434993924665858, "loss": 0.4104, "step": 3564 }, { "epoch": 0.28880427738172393, "grad_norm": 0.04056220501661301, "learning_rate": 0.0001443904414742811, "loss": 0.3361, "step": 3565 }, { "epoch": 0.2888852883992223, "grad_norm": 0.038314368575811386, "learning_rate": 0.00014443094370190362, "loss": 0.3596, "step": 3566 }, { "epoch": 0.2889662994167207, "grad_norm": 0.04543676599860191, "learning_rate": 0.00014447144592952613, "loss": 0.4056, "step": 3567 }, { "epoch": 0.28904731043421905, "grad_norm": 0.04003027454018593, "learning_rate": 0.00014451194815714866, "loss": 0.3556, "step": 3568 }, { "epoch": 0.2891283214517174, "grad_norm": 0.04414728283882141, "learning_rate": 0.00014455245038477117, "loss": 0.4268, "step": 3569 }, { "epoch": 0.2892093324692158, "grad_norm": 0.0389229953289032, "learning_rate": 0.0001445929526123937, "loss": 0.3166, "step": 3570 }, { "epoch": 0.2892903434867142, "grad_norm": 0.0376911461353302, "learning_rate": 0.0001446334548400162, "loss": 0.3885, "step": 3571 }, { "epoch": 0.2893713545042126, "grad_norm": 0.04160469397902489, "learning_rate": 0.00014467395706763874, "loss": 0.3855, "step": 3572 }, { "epoch": 0.28945236552171094, "grad_norm": 0.041869085282087326, "learning_rate": 0.00014471445929526124, "loss": 0.3559, "step": 3573 }, { "epoch": 0.28953337653920935, "grad_norm": 0.04308614507317543, "learning_rate": 0.00014475496152288378, "loss": 0.3946, "step": 3574 }, { "epoch": 0.2896143875567077, "grad_norm": 0.036581553518772125, "learning_rate": 0.00014479546375050628, "loss": 0.3732, "step": 3575 }, { "epoch": 0.28969539857420606, "grad_norm": 0.050230905413627625, "learning_rate": 0.00014483596597812882, "loss": 0.3364, "step": 3576 }, { "epoch": 0.2897764095917045, "grad_norm": 0.04288823902606964, "learning_rate": 0.00014487646820575132, "loss": 0.3683, "step": 3577 }, { "epoch": 0.28985742060920283, "grad_norm": 0.03672194108366966, "learning_rate": 0.00014491697043337386, "loss": 0.3776, "step": 3578 }, { "epoch": 0.28993843162670124, "grad_norm": 0.04550372064113617, "learning_rate": 0.00014495747266099636, "loss": 0.4514, "step": 3579 }, { "epoch": 0.2900194426441996, "grad_norm": 0.04061241075396538, "learning_rate": 0.00014499797488861887, "loss": 0.3659, "step": 3580 }, { "epoch": 0.290100453661698, "grad_norm": 0.039126452058553696, "learning_rate": 0.0001450384771162414, "loss": 0.4203, "step": 3581 }, { "epoch": 0.29018146467919637, "grad_norm": 0.0516708642244339, "learning_rate": 0.0001450789793438639, "loss": 0.384, "step": 3582 }, { "epoch": 0.2902624756966948, "grad_norm": 0.045026857405900955, "learning_rate": 0.00014511948157148644, "loss": 0.3631, "step": 3583 }, { "epoch": 0.29034348671419313, "grad_norm": 0.036989931017160416, "learning_rate": 0.00014515998379910895, "loss": 0.3612, "step": 3584 }, { "epoch": 0.2904244977316915, "grad_norm": 0.03131851181387901, "learning_rate": 0.00014520048602673148, "loss": 0.3642, "step": 3585 }, { "epoch": 0.2905055087491899, "grad_norm": 0.03495854511857033, "learning_rate": 0.000145240988254354, "loss": 0.3532, "step": 3586 }, { "epoch": 0.29058651976668826, "grad_norm": 0.03899826854467392, "learning_rate": 0.00014528149048197652, "loss": 0.3829, "step": 3587 }, { "epoch": 0.29066753078418667, "grad_norm": 0.04118049889802933, "learning_rate": 0.00014532199270959903, "loss": 0.3684, "step": 3588 }, { "epoch": 0.290748541801685, "grad_norm": 0.04020068049430847, "learning_rate": 0.00014536249493722156, "loss": 0.3875, "step": 3589 }, { "epoch": 0.29082955281918343, "grad_norm": 0.04532919079065323, "learning_rate": 0.00014540299716484407, "loss": 0.403, "step": 3590 }, { "epoch": 0.2909105638366818, "grad_norm": 0.03378492221236229, "learning_rate": 0.0001454434993924666, "loss": 0.4048, "step": 3591 }, { "epoch": 0.29099157485418015, "grad_norm": 0.04663817957043648, "learning_rate": 0.0001454840016200891, "loss": 0.375, "step": 3592 }, { "epoch": 0.29107258587167856, "grad_norm": 0.051759760826826096, "learning_rate": 0.00014552450384771164, "loss": 0.3474, "step": 3593 }, { "epoch": 0.2911535968891769, "grad_norm": 0.043322253972291946, "learning_rate": 0.00014556500607533414, "loss": 0.4434, "step": 3594 }, { "epoch": 0.2912346079066753, "grad_norm": 0.04209831729531288, "learning_rate": 0.00014560550830295668, "loss": 0.3423, "step": 3595 }, { "epoch": 0.2913156189241737, "grad_norm": 0.04809553548693657, "learning_rate": 0.00014564601053057918, "loss": 0.4065, "step": 3596 }, { "epoch": 0.2913966299416721, "grad_norm": 0.04254837706685066, "learning_rate": 0.00014568651275820172, "loss": 0.391, "step": 3597 }, { "epoch": 0.29147764095917045, "grad_norm": 0.050005342811346054, "learning_rate": 0.00014572701498582422, "loss": 0.3765, "step": 3598 }, { "epoch": 0.2915586519766688, "grad_norm": 0.04318535327911377, "learning_rate": 0.00014576751721344676, "loss": 0.3803, "step": 3599 }, { "epoch": 0.2916396629941672, "grad_norm": 0.042721282690763474, "learning_rate": 0.0001458080194410693, "loss": 0.3554, "step": 3600 }, { "epoch": 0.29172067401166557, "grad_norm": 0.03852958604693413, "learning_rate": 0.0001458485216686918, "loss": 0.4072, "step": 3601 }, { "epoch": 0.291801685029164, "grad_norm": 0.048130251467227936, "learning_rate": 0.0001458890238963143, "loss": 0.3778, "step": 3602 }, { "epoch": 0.29188269604666234, "grad_norm": 0.03864138573408127, "learning_rate": 0.00014592952612393683, "loss": 0.3391, "step": 3603 }, { "epoch": 0.29196370706416075, "grad_norm": 0.044405851513147354, "learning_rate": 0.00014597002835155934, "loss": 0.376, "step": 3604 }, { "epoch": 0.2920447180816591, "grad_norm": 0.0400206632912159, "learning_rate": 0.00014601053057918185, "loss": 0.3613, "step": 3605 }, { "epoch": 0.29212572909915746, "grad_norm": 0.05284254252910614, "learning_rate": 0.00014605103280680438, "loss": 0.4416, "step": 3606 }, { "epoch": 0.29220674011665587, "grad_norm": 0.044001657515764236, "learning_rate": 0.00014609153503442689, "loss": 0.3975, "step": 3607 }, { "epoch": 0.29228775113415423, "grad_norm": 0.04004145413637161, "learning_rate": 0.00014613203726204942, "loss": 0.3787, "step": 3608 }, { "epoch": 0.29236876215165264, "grad_norm": 0.04128222540020943, "learning_rate": 0.00014617253948967193, "loss": 0.3683, "step": 3609 }, { "epoch": 0.292449773169151, "grad_norm": 0.03241799399256706, "learning_rate": 0.00014621304171729446, "loss": 0.3784, "step": 3610 }, { "epoch": 0.2925307841866494, "grad_norm": 0.046559903770685196, "learning_rate": 0.00014625354394491696, "loss": 0.3505, "step": 3611 }, { "epoch": 0.29261179520414776, "grad_norm": 0.04115467518568039, "learning_rate": 0.0001462940461725395, "loss": 0.3923, "step": 3612 }, { "epoch": 0.2926928062216461, "grad_norm": 0.04463135451078415, "learning_rate": 0.000146334548400162, "loss": 0.3951, "step": 3613 }, { "epoch": 0.29277381723914453, "grad_norm": 0.04419780522584915, "learning_rate": 0.00014637505062778454, "loss": 0.3886, "step": 3614 }, { "epoch": 0.2928548282566429, "grad_norm": 0.040862735360860825, "learning_rate": 0.00014641555285540704, "loss": 0.4113, "step": 3615 }, { "epoch": 0.2929358392741413, "grad_norm": 0.03600708395242691, "learning_rate": 0.00014645605508302958, "loss": 0.3534, "step": 3616 }, { "epoch": 0.29301685029163965, "grad_norm": 0.03524504974484444, "learning_rate": 0.00014649655731065208, "loss": 0.3842, "step": 3617 }, { "epoch": 0.29309786130913806, "grad_norm": 0.039181362837553024, "learning_rate": 0.00014653705953827462, "loss": 0.3386, "step": 3618 }, { "epoch": 0.2931788723266364, "grad_norm": 0.04047534987330437, "learning_rate": 0.00014657756176589715, "loss": 0.405, "step": 3619 }, { "epoch": 0.2932598833441348, "grad_norm": 0.04122704267501831, "learning_rate": 0.00014661806399351966, "loss": 0.338, "step": 3620 }, { "epoch": 0.2933408943616332, "grad_norm": 0.03426510840654373, "learning_rate": 0.0001466585662211422, "loss": 0.3272, "step": 3621 }, { "epoch": 0.29342190537913154, "grad_norm": 0.049452658742666245, "learning_rate": 0.0001466990684487647, "loss": 0.3257, "step": 3622 }, { "epoch": 0.29350291639662995, "grad_norm": 0.04260660335421562, "learning_rate": 0.00014673957067638723, "loss": 0.3875, "step": 3623 }, { "epoch": 0.2935839274141283, "grad_norm": 0.039224427193403244, "learning_rate": 0.00014678007290400973, "loss": 0.3489, "step": 3624 }, { "epoch": 0.2936649384316267, "grad_norm": 0.04074981436133385, "learning_rate": 0.00014682057513163227, "loss": 0.3824, "step": 3625 }, { "epoch": 0.2937459494491251, "grad_norm": 0.04070234298706055, "learning_rate": 0.00014686107735925477, "loss": 0.3139, "step": 3626 }, { "epoch": 0.2938269604666235, "grad_norm": 0.03818705305457115, "learning_rate": 0.00014690157958687728, "loss": 0.3318, "step": 3627 }, { "epoch": 0.29390797148412184, "grad_norm": 0.03919083997607231, "learning_rate": 0.00014694208181449979, "loss": 0.3666, "step": 3628 }, { "epoch": 0.2939889825016202, "grad_norm": 0.052625805139541626, "learning_rate": 0.00014698258404212232, "loss": 0.3837, "step": 3629 }, { "epoch": 0.2940699935191186, "grad_norm": 0.048445843160152435, "learning_rate": 0.00014702308626974483, "loss": 0.3527, "step": 3630 }, { "epoch": 0.29415100453661697, "grad_norm": 0.04346831887960434, "learning_rate": 0.00014706358849736736, "loss": 0.393, "step": 3631 }, { "epoch": 0.2942320155541154, "grad_norm": 0.06405261904001236, "learning_rate": 0.00014710409072498986, "loss": 0.3996, "step": 3632 }, { "epoch": 0.29431302657161373, "grad_norm": 0.03609946370124817, "learning_rate": 0.0001471445929526124, "loss": 0.3749, "step": 3633 }, { "epoch": 0.29439403758911215, "grad_norm": 0.0333322137594223, "learning_rate": 0.0001471850951802349, "loss": 0.3139, "step": 3634 }, { "epoch": 0.2944750486066105, "grad_norm": 0.036384835839271545, "learning_rate": 0.00014722559740785744, "loss": 0.3566, "step": 3635 }, { "epoch": 0.29455605962410886, "grad_norm": 0.04768802970647812, "learning_rate": 0.00014726609963547994, "loss": 0.3948, "step": 3636 }, { "epoch": 0.29463707064160727, "grad_norm": 0.03785340115427971, "learning_rate": 0.00014730660186310248, "loss": 0.3791, "step": 3637 }, { "epoch": 0.2947180816591056, "grad_norm": 0.03505001962184906, "learning_rate": 0.000147347104090725, "loss": 0.3427, "step": 3638 }, { "epoch": 0.29479909267660404, "grad_norm": 0.03778628259897232, "learning_rate": 0.00014738760631834752, "loss": 0.4328, "step": 3639 }, { "epoch": 0.2948801036941024, "grad_norm": 0.0557037778198719, "learning_rate": 0.00014742810854597005, "loss": 0.393, "step": 3640 }, { "epoch": 0.2949611147116008, "grad_norm": 0.036547672003507614, "learning_rate": 0.00014746861077359256, "loss": 0.372, "step": 3641 }, { "epoch": 0.29504212572909916, "grad_norm": 0.03863165155053139, "learning_rate": 0.0001475091130012151, "loss": 0.4083, "step": 3642 }, { "epoch": 0.2951231367465975, "grad_norm": 0.030831417068839073, "learning_rate": 0.0001475496152288376, "loss": 0.3538, "step": 3643 }, { "epoch": 0.2952041477640959, "grad_norm": 0.0388491228222847, "learning_rate": 0.00014759011745646013, "loss": 0.3774, "step": 3644 }, { "epoch": 0.2952851587815943, "grad_norm": 0.04144338145852089, "learning_rate": 0.00014763061968408263, "loss": 0.3155, "step": 3645 }, { "epoch": 0.2953661697990927, "grad_norm": 0.03875422850251198, "learning_rate": 0.00014767112191170517, "loss": 0.3804, "step": 3646 }, { "epoch": 0.29544718081659105, "grad_norm": 0.03554854914546013, "learning_rate": 0.00014771162413932767, "loss": 0.334, "step": 3647 }, { "epoch": 0.29552819183408946, "grad_norm": 0.03913655877113342, "learning_rate": 0.0001477521263669502, "loss": 0.3686, "step": 3648 }, { "epoch": 0.2956092028515878, "grad_norm": 0.033935628831386566, "learning_rate": 0.0001477926285945727, "loss": 0.3666, "step": 3649 }, { "epoch": 0.29569021386908617, "grad_norm": 0.040643028914928436, "learning_rate": 0.00014783313082219522, "loss": 0.3867, "step": 3650 }, { "epoch": 0.2957712248865846, "grad_norm": 0.040583305060863495, "learning_rate": 0.00014787363304981775, "loss": 0.3971, "step": 3651 }, { "epoch": 0.29585223590408294, "grad_norm": 0.043748460710048676, "learning_rate": 0.00014791413527744026, "loss": 0.3837, "step": 3652 }, { "epoch": 0.29593324692158135, "grad_norm": 0.032871171832084656, "learning_rate": 0.00014795463750506276, "loss": 0.3959, "step": 3653 }, { "epoch": 0.2960142579390797, "grad_norm": 0.03821488469839096, "learning_rate": 0.0001479951397326853, "loss": 0.3946, "step": 3654 }, { "epoch": 0.2960952689565781, "grad_norm": 0.03509106859564781, "learning_rate": 0.0001480356419603078, "loss": 0.3799, "step": 3655 }, { "epoch": 0.2961762799740765, "grad_norm": 0.03787700831890106, "learning_rate": 0.00014807614418793034, "loss": 0.3385, "step": 3656 }, { "epoch": 0.29625729099157483, "grad_norm": 0.03202836588025093, "learning_rate": 0.00014811664641555287, "loss": 0.3546, "step": 3657 }, { "epoch": 0.29633830200907324, "grad_norm": 0.03317293897271156, "learning_rate": 0.00014815714864317538, "loss": 0.3755, "step": 3658 }, { "epoch": 0.2964193130265716, "grad_norm": 0.03745601698756218, "learning_rate": 0.0001481976508707979, "loss": 0.3894, "step": 3659 }, { "epoch": 0.29650032404407, "grad_norm": 0.04062475264072418, "learning_rate": 0.00014823815309842042, "loss": 0.377, "step": 3660 }, { "epoch": 0.29658133506156836, "grad_norm": 0.043611712753772736, "learning_rate": 0.00014827865532604295, "loss": 0.3956, "step": 3661 }, { "epoch": 0.2966623460790668, "grad_norm": 0.04180651530623436, "learning_rate": 0.00014831915755366545, "loss": 0.4128, "step": 3662 }, { "epoch": 0.29674335709656513, "grad_norm": 0.03399563580751419, "learning_rate": 0.000148359659781288, "loss": 0.3598, "step": 3663 }, { "epoch": 0.2968243681140635, "grad_norm": 0.03948217257857323, "learning_rate": 0.0001484001620089105, "loss": 0.3433, "step": 3664 }, { "epoch": 0.2969053791315619, "grad_norm": 0.040393419563770294, "learning_rate": 0.00014844066423653303, "loss": 0.3689, "step": 3665 }, { "epoch": 0.29698639014906025, "grad_norm": 0.037186894565820694, "learning_rate": 0.00014848116646415553, "loss": 0.3551, "step": 3666 }, { "epoch": 0.29706740116655866, "grad_norm": 0.04067966714501381, "learning_rate": 0.00014852166869177807, "loss": 0.3772, "step": 3667 }, { "epoch": 0.297148412184057, "grad_norm": 0.03943729028105736, "learning_rate": 0.00014856217091940057, "loss": 0.3924, "step": 3668 }, { "epoch": 0.29722942320155543, "grad_norm": 0.045668501406908035, "learning_rate": 0.0001486026731470231, "loss": 0.3949, "step": 3669 }, { "epoch": 0.2973104342190538, "grad_norm": 0.03306059166789055, "learning_rate": 0.0001486431753746456, "loss": 0.3779, "step": 3670 }, { "epoch": 0.2973914452365522, "grad_norm": 0.045515839010477066, "learning_rate": 0.00014868367760226815, "loss": 0.3821, "step": 3671 }, { "epoch": 0.29747245625405055, "grad_norm": 0.03392757847905159, "learning_rate": 0.00014872417982989065, "loss": 0.3475, "step": 3672 }, { "epoch": 0.2975534672715489, "grad_norm": 0.04137163981795311, "learning_rate": 0.00014876468205751318, "loss": 0.4049, "step": 3673 }, { "epoch": 0.2976344782890473, "grad_norm": 0.040442898869514465, "learning_rate": 0.0001488051842851357, "loss": 0.3604, "step": 3674 }, { "epoch": 0.2977154893065457, "grad_norm": 0.042486123740673065, "learning_rate": 0.0001488456865127582, "loss": 0.3766, "step": 3675 }, { "epoch": 0.2977965003240441, "grad_norm": 0.04691873863339424, "learning_rate": 0.00014888618874038073, "loss": 0.3714, "step": 3676 }, { "epoch": 0.29787751134154244, "grad_norm": 0.04484069347381592, "learning_rate": 0.00014892669096800324, "loss": 0.3795, "step": 3677 }, { "epoch": 0.29795852235904086, "grad_norm": 0.04399680346250534, "learning_rate": 0.00014896719319562577, "loss": 0.3808, "step": 3678 }, { "epoch": 0.2980395333765392, "grad_norm": 0.041292715817689896, "learning_rate": 0.00014900769542324828, "loss": 0.375, "step": 3679 }, { "epoch": 0.29812054439403757, "grad_norm": 0.042164355516433716, "learning_rate": 0.0001490481976508708, "loss": 0.379, "step": 3680 }, { "epoch": 0.298201555411536, "grad_norm": 0.04407854005694389, "learning_rate": 0.00014908869987849331, "loss": 0.4312, "step": 3681 }, { "epoch": 0.29828256642903433, "grad_norm": 0.03977717086672783, "learning_rate": 0.00014912920210611585, "loss": 0.433, "step": 3682 }, { "epoch": 0.29836357744653275, "grad_norm": 0.044105205684900284, "learning_rate": 0.00014916970433373835, "loss": 0.4145, "step": 3683 }, { "epoch": 0.2984445884640311, "grad_norm": 0.03839416801929474, "learning_rate": 0.0001492102065613609, "loss": 0.3969, "step": 3684 }, { "epoch": 0.2985255994815295, "grad_norm": 0.040000759065151215, "learning_rate": 0.0001492507087889834, "loss": 0.431, "step": 3685 }, { "epoch": 0.29860661049902787, "grad_norm": 0.04719092324376106, "learning_rate": 0.00014929121101660593, "loss": 0.3822, "step": 3686 }, { "epoch": 0.2986876215165262, "grad_norm": 0.03640539199113846, "learning_rate": 0.00014933171324422843, "loss": 0.4155, "step": 3687 }, { "epoch": 0.29876863253402464, "grad_norm": 0.0411471463739872, "learning_rate": 0.00014937221547185097, "loss": 0.3914, "step": 3688 }, { "epoch": 0.298849643551523, "grad_norm": 0.041285451501607895, "learning_rate": 0.00014941271769947347, "loss": 0.3989, "step": 3689 }, { "epoch": 0.2989306545690214, "grad_norm": 0.034479282796382904, "learning_rate": 0.000149453219927096, "loss": 0.3202, "step": 3690 }, { "epoch": 0.29901166558651976, "grad_norm": 0.03484316170215607, "learning_rate": 0.0001494937221547185, "loss": 0.3759, "step": 3691 }, { "epoch": 0.29909267660401817, "grad_norm": 0.04065540432929993, "learning_rate": 0.00014953422438234104, "loss": 0.407, "step": 3692 }, { "epoch": 0.2991736876215165, "grad_norm": 0.03859071433544159, "learning_rate": 0.00014957472660996355, "loss": 0.3799, "step": 3693 }, { "epoch": 0.2992546986390149, "grad_norm": 0.038158584386110306, "learning_rate": 0.00014961522883758608, "loss": 0.3492, "step": 3694 }, { "epoch": 0.2993357096565133, "grad_norm": 0.03658697009086609, "learning_rate": 0.00014965573106520862, "loss": 0.3138, "step": 3695 }, { "epoch": 0.29941672067401165, "grad_norm": 0.03887635096907616, "learning_rate": 0.00014969623329283112, "loss": 0.3794, "step": 3696 }, { "epoch": 0.29949773169151006, "grad_norm": 0.036319054663181305, "learning_rate": 0.00014973673552045363, "loss": 0.3726, "step": 3697 }, { "epoch": 0.2995787427090084, "grad_norm": 0.03450790420174599, "learning_rate": 0.00014977723774807616, "loss": 0.3809, "step": 3698 }, { "epoch": 0.29965975372650683, "grad_norm": 0.04092950001358986, "learning_rate": 0.00014981773997569867, "loss": 0.3833, "step": 3699 }, { "epoch": 0.2997407647440052, "grad_norm": 0.03737466782331467, "learning_rate": 0.00014985824220332117, "loss": 0.3669, "step": 3700 }, { "epoch": 0.29982177576150354, "grad_norm": 0.03429148718714714, "learning_rate": 0.0001498987444309437, "loss": 0.3875, "step": 3701 }, { "epoch": 0.29990278677900195, "grad_norm": 0.038960050791502, "learning_rate": 0.00014993924665856621, "loss": 0.3762, "step": 3702 }, { "epoch": 0.2999837977965003, "grad_norm": 0.03236428275704384, "learning_rate": 0.00014997974888618875, "loss": 0.3574, "step": 3703 }, { "epoch": 0.3000648088139987, "grad_norm": 0.035337019711732864, "learning_rate": 0.00015002025111381125, "loss": 0.3855, "step": 3704 }, { "epoch": 0.3001458198314971, "grad_norm": 0.043556008487939835, "learning_rate": 0.0001500607533414338, "loss": 0.3833, "step": 3705 }, { "epoch": 0.3002268308489955, "grad_norm": 0.03529440239071846, "learning_rate": 0.0001501012555690563, "loss": 0.3947, "step": 3706 }, { "epoch": 0.30030784186649384, "grad_norm": 0.03917783871293068, "learning_rate": 0.00015014175779667883, "loss": 0.3524, "step": 3707 }, { "epoch": 0.3003888528839922, "grad_norm": 0.03778640553355217, "learning_rate": 0.00015018226002430133, "loss": 0.395, "step": 3708 }, { "epoch": 0.3004698639014906, "grad_norm": 0.041695136576890945, "learning_rate": 0.00015022276225192387, "loss": 0.4077, "step": 3709 }, { "epoch": 0.30055087491898896, "grad_norm": 0.03826890140771866, "learning_rate": 0.00015026326447954637, "loss": 0.3684, "step": 3710 }, { "epoch": 0.3006318859364874, "grad_norm": 0.040991995483636856, "learning_rate": 0.0001503037667071689, "loss": 0.4126, "step": 3711 }, { "epoch": 0.30071289695398573, "grad_norm": 0.034812260419130325, "learning_rate": 0.0001503442689347914, "loss": 0.3823, "step": 3712 }, { "epoch": 0.30079390797148414, "grad_norm": 0.034525495022535324, "learning_rate": 0.00015038477116241394, "loss": 0.3554, "step": 3713 }, { "epoch": 0.3008749189889825, "grad_norm": 0.03014807216823101, "learning_rate": 0.00015042527339003648, "loss": 0.3261, "step": 3714 }, { "epoch": 0.30095593000648085, "grad_norm": 0.03565245866775513, "learning_rate": 0.00015046577561765898, "loss": 0.324, "step": 3715 }, { "epoch": 0.30103694102397927, "grad_norm": 0.04224449023604393, "learning_rate": 0.00015050627784528152, "loss": 0.3788, "step": 3716 }, { "epoch": 0.3011179520414776, "grad_norm": 0.035440728068351746, "learning_rate": 0.00015054678007290402, "loss": 0.344, "step": 3717 }, { "epoch": 0.30119896305897603, "grad_norm": 0.03690610080957413, "learning_rate": 0.00015058728230052656, "loss": 0.3534, "step": 3718 }, { "epoch": 0.3012799740764744, "grad_norm": 0.04340109974145889, "learning_rate": 0.00015062778452814906, "loss": 0.4237, "step": 3719 }, { "epoch": 0.3013609850939728, "grad_norm": 0.03343253582715988, "learning_rate": 0.00015066828675577157, "loss": 0.3615, "step": 3720 }, { "epoch": 0.30144199611147116, "grad_norm": 0.03364843130111694, "learning_rate": 0.0001507087889833941, "loss": 0.3531, "step": 3721 }, { "epoch": 0.30152300712896957, "grad_norm": 0.033783357590436935, "learning_rate": 0.0001507492912110166, "loss": 0.3652, "step": 3722 }, { "epoch": 0.3016040181464679, "grad_norm": 0.04219111055135727, "learning_rate": 0.00015078979343863911, "loss": 0.3457, "step": 3723 }, { "epoch": 0.3016850291639663, "grad_norm": 0.03568140044808388, "learning_rate": 0.00015083029566626165, "loss": 0.3636, "step": 3724 }, { "epoch": 0.3017660401814647, "grad_norm": 0.037624381482601166, "learning_rate": 0.00015087079789388415, "loss": 0.4275, "step": 3725 }, { "epoch": 0.30184705119896305, "grad_norm": 0.036071497946977615, "learning_rate": 0.00015091130012150669, "loss": 0.3749, "step": 3726 }, { "epoch": 0.30192806221646146, "grad_norm": 0.04090581461787224, "learning_rate": 0.0001509518023491292, "loss": 0.3617, "step": 3727 }, { "epoch": 0.3020090732339598, "grad_norm": 0.04076239466667175, "learning_rate": 0.00015099230457675173, "loss": 0.3988, "step": 3728 }, { "epoch": 0.3020900842514582, "grad_norm": 0.04235999286174774, "learning_rate": 0.00015103280680437423, "loss": 0.3757, "step": 3729 }, { "epoch": 0.3021710952689566, "grad_norm": 0.0365556962788105, "learning_rate": 0.00015107330903199676, "loss": 0.4298, "step": 3730 }, { "epoch": 0.30225210628645494, "grad_norm": 0.043695688247680664, "learning_rate": 0.00015111381125961927, "loss": 0.3601, "step": 3731 }, { "epoch": 0.30233311730395335, "grad_norm": 0.04236026108264923, "learning_rate": 0.0001511543134872418, "loss": 0.4221, "step": 3732 }, { "epoch": 0.3024141283214517, "grad_norm": 0.040135957300662994, "learning_rate": 0.00015119481571486434, "loss": 0.4264, "step": 3733 }, { "epoch": 0.3024951393389501, "grad_norm": 0.036353833973407745, "learning_rate": 0.00015123531794248684, "loss": 0.3314, "step": 3734 }, { "epoch": 0.30257615035644847, "grad_norm": 0.03830006346106529, "learning_rate": 0.00015127582017010938, "loss": 0.4432, "step": 3735 }, { "epoch": 0.3026571613739469, "grad_norm": 0.03320148587226868, "learning_rate": 0.00015131632239773188, "loss": 0.3652, "step": 3736 }, { "epoch": 0.30273817239144524, "grad_norm": 0.0449746735394001, "learning_rate": 0.00015135682462535442, "loss": 0.4141, "step": 3737 }, { "epoch": 0.3028191834089436, "grad_norm": 0.04558303579688072, "learning_rate": 0.00015139732685297692, "loss": 0.3729, "step": 3738 }, { "epoch": 0.302900194426442, "grad_norm": 0.037199411541223526, "learning_rate": 0.00015143782908059946, "loss": 0.4127, "step": 3739 }, { "epoch": 0.30298120544394036, "grad_norm": 0.036756481975317, "learning_rate": 0.00015147833130822196, "loss": 0.3719, "step": 3740 }, { "epoch": 0.30306221646143877, "grad_norm": 0.037477146834135056, "learning_rate": 0.0001515188335358445, "loss": 0.3545, "step": 3741 }, { "epoch": 0.3031432274789371, "grad_norm": 0.03502131998538971, "learning_rate": 0.000151559335763467, "loss": 0.3462, "step": 3742 }, { "epoch": 0.30322423849643554, "grad_norm": 0.04525633901357651, "learning_rate": 0.00015159983799108953, "loss": 0.3617, "step": 3743 }, { "epoch": 0.3033052495139339, "grad_norm": 0.03657715395092964, "learning_rate": 0.00015164034021871204, "loss": 0.3848, "step": 3744 }, { "epoch": 0.30338626053143225, "grad_norm": 0.030081717297434807, "learning_rate": 0.00015168084244633455, "loss": 0.3351, "step": 3745 }, { "epoch": 0.30346727154893066, "grad_norm": 0.04054329916834831, "learning_rate": 0.00015172134467395708, "loss": 0.4144, "step": 3746 }, { "epoch": 0.303548282566429, "grad_norm": 0.03237282857298851, "learning_rate": 0.00015176184690157959, "loss": 0.3311, "step": 3747 }, { "epoch": 0.30362929358392743, "grad_norm": 0.04375077411532402, "learning_rate": 0.0001518023491292021, "loss": 0.3587, "step": 3748 }, { "epoch": 0.3037103046014258, "grad_norm": 0.03343536704778671, "learning_rate": 0.00015184285135682463, "loss": 0.337, "step": 3749 }, { "epoch": 0.3037913156189242, "grad_norm": 0.03777144104242325, "learning_rate": 0.00015188335358444713, "loss": 0.3863, "step": 3750 }, { "epoch": 0.30387232663642255, "grad_norm": 0.030147891491651535, "learning_rate": 0.00015192385581206966, "loss": 0.3014, "step": 3751 }, { "epoch": 0.3039533376539209, "grad_norm": 0.048893216997385025, "learning_rate": 0.0001519643580396922, "loss": 0.4196, "step": 3752 }, { "epoch": 0.3040343486714193, "grad_norm": 0.03945691138505936, "learning_rate": 0.0001520048602673147, "loss": 0.386, "step": 3753 }, { "epoch": 0.3041153596889177, "grad_norm": 0.03786306828260422, "learning_rate": 0.00015204536249493724, "loss": 0.3315, "step": 3754 }, { "epoch": 0.3041963707064161, "grad_norm": 0.03841707110404968, "learning_rate": 0.00015208586472255974, "loss": 0.3924, "step": 3755 }, { "epoch": 0.30427738172391444, "grad_norm": 0.035001907497644424, "learning_rate": 0.00015212636695018228, "loss": 0.4017, "step": 3756 }, { "epoch": 0.30435839274141285, "grad_norm": 0.03870251774787903, "learning_rate": 0.00015216686917780478, "loss": 0.4184, "step": 3757 }, { "epoch": 0.3044394037589112, "grad_norm": 0.03757096081972122, "learning_rate": 0.00015220737140542732, "loss": 0.3805, "step": 3758 }, { "epoch": 0.30452041477640956, "grad_norm": 0.044028136879205704, "learning_rate": 0.00015224787363304982, "loss": 0.3585, "step": 3759 }, { "epoch": 0.304601425793908, "grad_norm": 0.041459329426288605, "learning_rate": 0.00015228837586067235, "loss": 0.3789, "step": 3760 }, { "epoch": 0.30468243681140633, "grad_norm": 0.045845143496990204, "learning_rate": 0.00015232887808829486, "loss": 0.3638, "step": 3761 }, { "epoch": 0.30476344782890474, "grad_norm": 0.0462605319917202, "learning_rate": 0.0001523693803159174, "loss": 0.3344, "step": 3762 }, { "epoch": 0.3048444588464031, "grad_norm": 0.03516705334186554, "learning_rate": 0.0001524098825435399, "loss": 0.3918, "step": 3763 }, { "epoch": 0.3049254698639015, "grad_norm": 0.05478263646364212, "learning_rate": 0.00015245038477116243, "loss": 0.3916, "step": 3764 }, { "epoch": 0.30500648088139987, "grad_norm": 0.0416891872882843, "learning_rate": 0.00015249088699878494, "loss": 0.339, "step": 3765 }, { "epoch": 0.3050874918988983, "grad_norm": 0.03643078729510307, "learning_rate": 0.00015253138922640747, "loss": 0.3638, "step": 3766 }, { "epoch": 0.30516850291639663, "grad_norm": 0.040709856897592545, "learning_rate": 0.00015257189145402998, "loss": 0.3849, "step": 3767 }, { "epoch": 0.305249513933895, "grad_norm": 0.041243139654397964, "learning_rate": 0.0001526123936816525, "loss": 0.358, "step": 3768 }, { "epoch": 0.3053305249513934, "grad_norm": 0.03741239011287689, "learning_rate": 0.00015265289590927502, "loss": 0.395, "step": 3769 }, { "epoch": 0.30541153596889176, "grad_norm": 0.033558666706085205, "learning_rate": 0.00015269339813689752, "loss": 0.3565, "step": 3770 }, { "epoch": 0.30549254698639017, "grad_norm": 0.03725985065102577, "learning_rate": 0.00015273390036452006, "loss": 0.3383, "step": 3771 }, { "epoch": 0.3055735580038885, "grad_norm": 0.03618418425321579, "learning_rate": 0.00015277440259214256, "loss": 0.3852, "step": 3772 }, { "epoch": 0.30565456902138693, "grad_norm": 0.03546826168894768, "learning_rate": 0.0001528149048197651, "loss": 0.3507, "step": 3773 }, { "epoch": 0.3057355800388853, "grad_norm": 0.040979351848363876, "learning_rate": 0.0001528554070473876, "loss": 0.4016, "step": 3774 }, { "epoch": 0.30581659105638365, "grad_norm": 0.04222219064831734, "learning_rate": 0.00015289590927501014, "loss": 0.396, "step": 3775 }, { "epoch": 0.30589760207388206, "grad_norm": 0.03376726806163788, "learning_rate": 0.00015293641150263264, "loss": 0.3172, "step": 3776 }, { "epoch": 0.3059786130913804, "grad_norm": 0.04466283321380615, "learning_rate": 0.00015297691373025518, "loss": 0.3875, "step": 3777 }, { "epoch": 0.3060596241088788, "grad_norm": 0.039065901190042496, "learning_rate": 0.00015301741595787768, "loss": 0.3869, "step": 3778 }, { "epoch": 0.3061406351263772, "grad_norm": 0.036004744470119476, "learning_rate": 0.00015305791818550022, "loss": 0.3518, "step": 3779 }, { "epoch": 0.3062216461438756, "grad_norm": 0.05747569352388382, "learning_rate": 0.00015309842041312272, "loss": 0.3464, "step": 3780 }, { "epoch": 0.30630265716137395, "grad_norm": 0.03801568225026131, "learning_rate": 0.00015313892264074525, "loss": 0.3534, "step": 3781 }, { "epoch": 0.3063836681788723, "grad_norm": 0.0325610414147377, "learning_rate": 0.00015317942486836776, "loss": 0.3749, "step": 3782 }, { "epoch": 0.3064646791963707, "grad_norm": 0.049797795712947845, "learning_rate": 0.0001532199270959903, "loss": 0.3833, "step": 3783 }, { "epoch": 0.30654569021386907, "grad_norm": 0.03561552241444588, "learning_rate": 0.0001532604293236128, "loss": 0.385, "step": 3784 }, { "epoch": 0.3066267012313675, "grad_norm": 0.03801201283931732, "learning_rate": 0.00015330093155123533, "loss": 0.3636, "step": 3785 }, { "epoch": 0.30670771224886584, "grad_norm": 0.03518911823630333, "learning_rate": 0.00015334143377885784, "loss": 0.3582, "step": 3786 }, { "epoch": 0.30678872326636425, "grad_norm": 0.03534886613488197, "learning_rate": 0.00015338193600648037, "loss": 0.3912, "step": 3787 }, { "epoch": 0.3068697342838626, "grad_norm": 0.032708004117012024, "learning_rate": 0.00015342243823410288, "loss": 0.3523, "step": 3788 }, { "epoch": 0.30695074530136096, "grad_norm": 0.04191884770989418, "learning_rate": 0.0001534629404617254, "loss": 0.3536, "step": 3789 }, { "epoch": 0.3070317563188594, "grad_norm": 0.04125582426786423, "learning_rate": 0.00015350344268934794, "loss": 0.3842, "step": 3790 }, { "epoch": 0.30711276733635773, "grad_norm": 0.035285379737615585, "learning_rate": 0.00015354394491697045, "loss": 0.3603, "step": 3791 }, { "epoch": 0.30719377835385614, "grad_norm": 0.04442056640982628, "learning_rate": 0.00015358444714459296, "loss": 0.386, "step": 3792 }, { "epoch": 0.3072747893713545, "grad_norm": 0.03986095264554024, "learning_rate": 0.00015362494937221546, "loss": 0.3747, "step": 3793 }, { "epoch": 0.3073558003888529, "grad_norm": 0.03432997688651085, "learning_rate": 0.000153665451599838, "loss": 0.3494, "step": 3794 }, { "epoch": 0.30743681140635126, "grad_norm": 0.047092802822589874, "learning_rate": 0.0001537059538274605, "loss": 0.3552, "step": 3795 }, { "epoch": 0.3075178224238496, "grad_norm": 0.039662785828113556, "learning_rate": 0.00015374645605508304, "loss": 0.3976, "step": 3796 }, { "epoch": 0.30759883344134803, "grad_norm": 0.04702027142047882, "learning_rate": 0.00015378695828270554, "loss": 0.4191, "step": 3797 }, { "epoch": 0.3076798444588464, "grad_norm": 0.03785824775695801, "learning_rate": 0.00015382746051032808, "loss": 0.345, "step": 3798 }, { "epoch": 0.3077608554763448, "grad_norm": 0.042051903903484344, "learning_rate": 0.00015386796273795058, "loss": 0.4369, "step": 3799 }, { "epoch": 0.30784186649384315, "grad_norm": 0.04059034585952759, "learning_rate": 0.00015390846496557311, "loss": 0.3913, "step": 3800 }, { "epoch": 0.30792287751134156, "grad_norm": 0.04255729168653488, "learning_rate": 0.00015394896719319562, "loss": 0.3709, "step": 3801 }, { "epoch": 0.3080038885288399, "grad_norm": 0.03430137783288956, "learning_rate": 0.00015398946942081815, "loss": 0.3481, "step": 3802 }, { "epoch": 0.3080848995463383, "grad_norm": 0.037832751870155334, "learning_rate": 0.00015402997164844066, "loss": 0.3803, "step": 3803 }, { "epoch": 0.3081659105638367, "grad_norm": 0.03891238570213318, "learning_rate": 0.0001540704738760632, "loss": 0.352, "step": 3804 }, { "epoch": 0.30824692158133504, "grad_norm": 0.03535735234618187, "learning_rate": 0.0001541109761036857, "loss": 0.3866, "step": 3805 }, { "epoch": 0.30832793259883345, "grad_norm": 0.034961897879838943, "learning_rate": 0.00015415147833130823, "loss": 0.3778, "step": 3806 }, { "epoch": 0.3084089436163318, "grad_norm": 0.042050305753946304, "learning_rate": 0.00015419198055893074, "loss": 0.3674, "step": 3807 }, { "epoch": 0.3084899546338302, "grad_norm": 0.0314970538020134, "learning_rate": 0.00015423248278655327, "loss": 0.3533, "step": 3808 }, { "epoch": 0.3085709656513286, "grad_norm": 0.03608867898583412, "learning_rate": 0.0001542729850141758, "loss": 0.3695, "step": 3809 }, { "epoch": 0.30865197666882693, "grad_norm": 0.04926412180066109, "learning_rate": 0.0001543134872417983, "loss": 0.4015, "step": 3810 }, { "epoch": 0.30873298768632534, "grad_norm": 0.03634179010987282, "learning_rate": 0.00015435398946942084, "loss": 0.3532, "step": 3811 }, { "epoch": 0.3088139987038237, "grad_norm": 0.037701528519392014, "learning_rate": 0.00015439449169704335, "loss": 0.3515, "step": 3812 }, { "epoch": 0.3088950097213221, "grad_norm": 0.040494389832019806, "learning_rate": 0.00015443499392466588, "loss": 0.3561, "step": 3813 }, { "epoch": 0.30897602073882047, "grad_norm": 0.03875404968857765, "learning_rate": 0.0001544754961522884, "loss": 0.354, "step": 3814 }, { "epoch": 0.3090570317563189, "grad_norm": 0.040258169174194336, "learning_rate": 0.0001545159983799109, "loss": 0.3379, "step": 3815 }, { "epoch": 0.30913804277381723, "grad_norm": 0.03131493180990219, "learning_rate": 0.00015455650060753343, "loss": 0.3281, "step": 3816 }, { "epoch": 0.30921905379131565, "grad_norm": 0.038832154124975204, "learning_rate": 0.00015459700283515594, "loss": 0.3928, "step": 3817 }, { "epoch": 0.309300064808814, "grad_norm": 0.06491102278232574, "learning_rate": 0.00015463750506277844, "loss": 0.3753, "step": 3818 }, { "epoch": 0.30938107582631236, "grad_norm": 0.037673432379961014, "learning_rate": 0.00015467800729040097, "loss": 0.4006, "step": 3819 }, { "epoch": 0.30946208684381077, "grad_norm": 0.0383102111518383, "learning_rate": 0.00015471850951802348, "loss": 0.3528, "step": 3820 }, { "epoch": 0.3095430978613091, "grad_norm": 0.03696411848068237, "learning_rate": 0.00015475901174564601, "loss": 0.3814, "step": 3821 }, { "epoch": 0.30962410887880754, "grad_norm": 0.03863132745027542, "learning_rate": 0.00015479951397326852, "loss": 0.4174, "step": 3822 }, { "epoch": 0.3097051198963059, "grad_norm": 0.034970369189977646, "learning_rate": 0.00015484001620089105, "loss": 0.3798, "step": 3823 }, { "epoch": 0.3097861309138043, "grad_norm": 0.051043085753917694, "learning_rate": 0.00015488051842851356, "loss": 0.4075, "step": 3824 }, { "epoch": 0.30986714193130266, "grad_norm": 0.04437202587723732, "learning_rate": 0.0001549210206561361, "loss": 0.3273, "step": 3825 }, { "epoch": 0.309948152948801, "grad_norm": 0.03810075297951698, "learning_rate": 0.0001549615228837586, "loss": 0.375, "step": 3826 }, { "epoch": 0.3100291639662994, "grad_norm": 0.03678411617875099, "learning_rate": 0.00015500202511138113, "loss": 0.3642, "step": 3827 }, { "epoch": 0.3101101749837978, "grad_norm": 0.03821595013141632, "learning_rate": 0.00015504252733900367, "loss": 0.3331, "step": 3828 }, { "epoch": 0.3101911860012962, "grad_norm": 0.038664739578962326, "learning_rate": 0.00015508302956662617, "loss": 0.3971, "step": 3829 }, { "epoch": 0.31027219701879455, "grad_norm": 0.03899373114109039, "learning_rate": 0.0001551235317942487, "loss": 0.3777, "step": 3830 }, { "epoch": 0.31035320803629296, "grad_norm": 0.039425078779459, "learning_rate": 0.0001551640340218712, "loss": 0.3583, "step": 3831 }, { "epoch": 0.3104342190537913, "grad_norm": 0.03491853550076485, "learning_rate": 0.00015520453624949374, "loss": 0.3701, "step": 3832 }, { "epoch": 0.31051523007128967, "grad_norm": 0.03618711233139038, "learning_rate": 0.00015524503847711625, "loss": 0.4148, "step": 3833 }, { "epoch": 0.3105962410887881, "grad_norm": 0.03505758196115494, "learning_rate": 0.00015528554070473878, "loss": 0.3773, "step": 3834 }, { "epoch": 0.31067725210628644, "grad_norm": 0.038697127252817154, "learning_rate": 0.0001553260429323613, "loss": 0.3366, "step": 3835 }, { "epoch": 0.31075826312378485, "grad_norm": 0.03813067823648453, "learning_rate": 0.00015536654515998382, "loss": 0.3935, "step": 3836 }, { "epoch": 0.3108392741412832, "grad_norm": 0.032286062836647034, "learning_rate": 0.00015540704738760633, "loss": 0.3588, "step": 3837 }, { "epoch": 0.3109202851587816, "grad_norm": 0.03338897228240967, "learning_rate": 0.00015544754961522886, "loss": 0.3776, "step": 3838 }, { "epoch": 0.31100129617628, "grad_norm": 0.03878074884414673, "learning_rate": 0.00015548805184285137, "loss": 0.3242, "step": 3839 }, { "epoch": 0.31108230719377833, "grad_norm": 0.03827903792262077, "learning_rate": 0.00015552855407047387, "loss": 0.4061, "step": 3840 }, { "epoch": 0.31116331821127674, "grad_norm": 0.04468392953276634, "learning_rate": 0.0001555690562980964, "loss": 0.3881, "step": 3841 }, { "epoch": 0.3112443292287751, "grad_norm": 0.033204734325408936, "learning_rate": 0.0001556095585257189, "loss": 0.3998, "step": 3842 }, { "epoch": 0.3113253402462735, "grad_norm": 0.04107440635561943, "learning_rate": 0.00015565006075334142, "loss": 0.3947, "step": 3843 }, { "epoch": 0.31140635126377186, "grad_norm": 0.04109537973999977, "learning_rate": 0.00015569056298096395, "loss": 0.367, "step": 3844 }, { "epoch": 0.3114873622812703, "grad_norm": 0.04015589505434036, "learning_rate": 0.00015573106520858649, "loss": 0.359, "step": 3845 }, { "epoch": 0.31156837329876863, "grad_norm": 0.03611072152853012, "learning_rate": 0.000155771567436209, "loss": 0.3701, "step": 3846 }, { "epoch": 0.311649384316267, "grad_norm": 0.03249693661928177, "learning_rate": 0.00015581206966383153, "loss": 0.3294, "step": 3847 }, { "epoch": 0.3117303953337654, "grad_norm": 0.03942349553108215, "learning_rate": 0.00015585257189145403, "loss": 0.4098, "step": 3848 }, { "epoch": 0.31181140635126375, "grad_norm": 0.034818992018699646, "learning_rate": 0.00015589307411907656, "loss": 0.3704, "step": 3849 }, { "epoch": 0.31189241736876216, "grad_norm": 0.03547549620270729, "learning_rate": 0.00015593357634669907, "loss": 0.3751, "step": 3850 }, { "epoch": 0.3119734283862605, "grad_norm": 0.04353281483054161, "learning_rate": 0.0001559740785743216, "loss": 0.3593, "step": 3851 }, { "epoch": 0.31205443940375893, "grad_norm": 0.0328286774456501, "learning_rate": 0.0001560145808019441, "loss": 0.3672, "step": 3852 }, { "epoch": 0.3121354504212573, "grad_norm": 0.0367732048034668, "learning_rate": 0.00015605508302956664, "loss": 0.3434, "step": 3853 }, { "epoch": 0.31221646143875564, "grad_norm": 0.032217882573604584, "learning_rate": 0.00015609558525718915, "loss": 0.3289, "step": 3854 }, { "epoch": 0.31229747245625405, "grad_norm": 0.04093978926539421, "learning_rate": 0.00015613608748481168, "loss": 0.4299, "step": 3855 }, { "epoch": 0.3123784834737524, "grad_norm": 0.042412422597408295, "learning_rate": 0.0001561765897124342, "loss": 0.3585, "step": 3856 }, { "epoch": 0.3124594944912508, "grad_norm": 0.03818630427122116, "learning_rate": 0.00015621709194005672, "loss": 0.3846, "step": 3857 }, { "epoch": 0.3125405055087492, "grad_norm": 0.04645620286464691, "learning_rate": 0.00015625759416767923, "loss": 0.4463, "step": 3858 }, { "epoch": 0.3126215165262476, "grad_norm": 0.04581103101372719, "learning_rate": 0.00015629809639530176, "loss": 0.4104, "step": 3859 }, { "epoch": 0.31270252754374595, "grad_norm": 0.042016733437776566, "learning_rate": 0.00015633859862292427, "loss": 0.374, "step": 3860 }, { "epoch": 0.31278353856124436, "grad_norm": 0.0401945486664772, "learning_rate": 0.0001563791008505468, "loss": 0.3817, "step": 3861 }, { "epoch": 0.3128645495787427, "grad_norm": 0.030451439321041107, "learning_rate": 0.0001564196030781693, "loss": 0.3348, "step": 3862 }, { "epoch": 0.31294556059624107, "grad_norm": 0.035469312220811844, "learning_rate": 0.0001564601053057918, "loss": 0.371, "step": 3863 }, { "epoch": 0.3130265716137395, "grad_norm": 0.03374820947647095, "learning_rate": 0.00015650060753341435, "loss": 0.3808, "step": 3864 }, { "epoch": 0.31310758263123784, "grad_norm": 0.03543124347925186, "learning_rate": 0.00015654110976103685, "loss": 0.374, "step": 3865 }, { "epoch": 0.31318859364873625, "grad_norm": 0.04282199963927269, "learning_rate": 0.00015658161198865939, "loss": 0.3263, "step": 3866 }, { "epoch": 0.3132696046662346, "grad_norm": 0.03741493076086044, "learning_rate": 0.0001566221142162819, "loss": 0.3565, "step": 3867 }, { "epoch": 0.313350615683733, "grad_norm": 0.03533174842596054, "learning_rate": 0.00015666261644390442, "loss": 0.3678, "step": 3868 }, { "epoch": 0.31343162670123137, "grad_norm": 0.03229209780693054, "learning_rate": 0.00015670311867152693, "loss": 0.3772, "step": 3869 }, { "epoch": 0.3135126377187297, "grad_norm": 0.03186071664094925, "learning_rate": 0.00015674362089914946, "loss": 0.386, "step": 3870 }, { "epoch": 0.31359364873622814, "grad_norm": 0.04570740461349487, "learning_rate": 0.00015678412312677197, "loss": 0.4336, "step": 3871 }, { "epoch": 0.3136746597537265, "grad_norm": 0.030013004317879677, "learning_rate": 0.0001568246253543945, "loss": 0.3536, "step": 3872 }, { "epoch": 0.3137556707712249, "grad_norm": 0.03668045252561569, "learning_rate": 0.000156865127582017, "loss": 0.3605, "step": 3873 }, { "epoch": 0.31383668178872326, "grad_norm": 0.04096108675003052, "learning_rate": 0.00015690562980963954, "loss": 0.3905, "step": 3874 }, { "epoch": 0.31391769280622167, "grad_norm": 0.03962705284357071, "learning_rate": 0.00015694613203726205, "loss": 0.4093, "step": 3875 }, { "epoch": 0.31399870382372, "grad_norm": 0.03714505955576897, "learning_rate": 0.00015698663426488458, "loss": 0.4513, "step": 3876 }, { "epoch": 0.3140797148412184, "grad_norm": 0.03829257935285568, "learning_rate": 0.0001570271364925071, "loss": 0.3441, "step": 3877 }, { "epoch": 0.3141607258587168, "grad_norm": 0.03213474899530411, "learning_rate": 0.00015706763872012962, "loss": 0.3869, "step": 3878 }, { "epoch": 0.31424173687621515, "grad_norm": 0.047674331814050674, "learning_rate": 0.00015710814094775213, "loss": 0.4068, "step": 3879 }, { "epoch": 0.31432274789371356, "grad_norm": 0.049570657312870026, "learning_rate": 0.00015714864317537466, "loss": 0.394, "step": 3880 }, { "epoch": 0.3144037589112119, "grad_norm": 0.03454206883907318, "learning_rate": 0.00015718914540299717, "loss": 0.3608, "step": 3881 }, { "epoch": 0.31448476992871033, "grad_norm": 0.03465542942285538, "learning_rate": 0.0001572296476306197, "loss": 0.3472, "step": 3882 }, { "epoch": 0.3145657809462087, "grad_norm": 0.03724677115678787, "learning_rate": 0.00015727014985824223, "loss": 0.4236, "step": 3883 }, { "epoch": 0.31464679196370704, "grad_norm": 0.036393288522958755, "learning_rate": 0.00015731065208586474, "loss": 0.3341, "step": 3884 }, { "epoch": 0.31472780298120545, "grad_norm": 0.033561281859874725, "learning_rate": 0.00015735115431348725, "loss": 0.354, "step": 3885 }, { "epoch": 0.3148088139987038, "grad_norm": 0.041037797927856445, "learning_rate": 0.00015739165654110978, "loss": 0.3653, "step": 3886 }, { "epoch": 0.3148898250162022, "grad_norm": 0.03577622398734093, "learning_rate": 0.00015743215876873229, "loss": 0.4372, "step": 3887 }, { "epoch": 0.3149708360337006, "grad_norm": 0.03454362228512764, "learning_rate": 0.0001574726609963548, "loss": 0.3424, "step": 3888 }, { "epoch": 0.315051847051199, "grad_norm": 0.04236502945423126, "learning_rate": 0.00015751316322397732, "loss": 0.3823, "step": 3889 }, { "epoch": 0.31513285806869734, "grad_norm": 0.0397837869822979, "learning_rate": 0.00015755366545159983, "loss": 0.358, "step": 3890 }, { "epoch": 0.3152138690861957, "grad_norm": 0.0396885946393013, "learning_rate": 0.00015759416767922236, "loss": 0.403, "step": 3891 }, { "epoch": 0.3152948801036941, "grad_norm": 0.0386836864054203, "learning_rate": 0.00015763466990684487, "loss": 0.3255, "step": 3892 }, { "epoch": 0.31537589112119246, "grad_norm": 0.047269124537706375, "learning_rate": 0.0001576751721344674, "loss": 0.4151, "step": 3893 }, { "epoch": 0.3154569021386909, "grad_norm": 0.04643293470144272, "learning_rate": 0.0001577156743620899, "loss": 0.427, "step": 3894 }, { "epoch": 0.31553791315618923, "grad_norm": 0.0444369800388813, "learning_rate": 0.00015775617658971244, "loss": 0.4181, "step": 3895 }, { "epoch": 0.31561892417368764, "grad_norm": 0.03646974265575409, "learning_rate": 0.00015779667881733495, "loss": 0.4078, "step": 3896 }, { "epoch": 0.315699935191186, "grad_norm": 0.04198101535439491, "learning_rate": 0.00015783718104495748, "loss": 0.361, "step": 3897 }, { "epoch": 0.31578094620868435, "grad_norm": 0.040488485246896744, "learning_rate": 0.00015787768327258, "loss": 0.3837, "step": 3898 }, { "epoch": 0.31586195722618277, "grad_norm": 0.03984922170639038, "learning_rate": 0.00015791818550020252, "loss": 0.4125, "step": 3899 }, { "epoch": 0.3159429682436811, "grad_norm": 0.03467942401766777, "learning_rate": 0.00015795868772782503, "loss": 0.3673, "step": 3900 }, { "epoch": 0.31602397926117953, "grad_norm": 0.03277461603283882, "learning_rate": 0.00015799918995544756, "loss": 0.3796, "step": 3901 }, { "epoch": 0.3161049902786779, "grad_norm": 0.03881349042057991, "learning_rate": 0.0001580396921830701, "loss": 0.3427, "step": 3902 }, { "epoch": 0.3161860012961763, "grad_norm": 0.03951621428132057, "learning_rate": 0.0001580801944106926, "loss": 0.4245, "step": 3903 }, { "epoch": 0.31626701231367466, "grad_norm": 0.037890203297138214, "learning_rate": 0.00015812069663831513, "loss": 0.3822, "step": 3904 }, { "epoch": 0.31634802333117307, "grad_norm": 0.038001008331775665, "learning_rate": 0.00015816119886593764, "loss": 0.3829, "step": 3905 }, { "epoch": 0.3164290343486714, "grad_norm": 0.04266396909952164, "learning_rate": 0.00015820170109356017, "loss": 0.3751, "step": 3906 }, { "epoch": 0.3165100453661698, "grad_norm": 0.03496110811829567, "learning_rate": 0.00015824220332118268, "loss": 0.3971, "step": 3907 }, { "epoch": 0.3165910563836682, "grad_norm": 0.032627735286951065, "learning_rate": 0.0001582827055488052, "loss": 0.3925, "step": 3908 }, { "epoch": 0.31667206740116655, "grad_norm": 0.040596555918455124, "learning_rate": 0.00015832320777642772, "loss": 0.3649, "step": 3909 }, { "epoch": 0.31675307841866496, "grad_norm": 0.04176980257034302, "learning_rate": 0.00015836371000405022, "loss": 0.4309, "step": 3910 }, { "epoch": 0.3168340894361633, "grad_norm": 0.03294534981250763, "learning_rate": 0.00015840421223167276, "loss": 0.3871, "step": 3911 }, { "epoch": 0.3169151004536617, "grad_norm": 0.032855693250894547, "learning_rate": 0.00015844471445929526, "loss": 0.3522, "step": 3912 }, { "epoch": 0.3169961114711601, "grad_norm": 0.04674731567502022, "learning_rate": 0.00015848521668691777, "loss": 0.3731, "step": 3913 }, { "epoch": 0.31707712248865844, "grad_norm": 0.04186466336250305, "learning_rate": 0.0001585257189145403, "loss": 0.3498, "step": 3914 }, { "epoch": 0.31715813350615685, "grad_norm": 0.03885754197835922, "learning_rate": 0.0001585662211421628, "loss": 0.3419, "step": 3915 }, { "epoch": 0.3172391445236552, "grad_norm": 0.04161924123764038, "learning_rate": 0.00015860672336978534, "loss": 0.3811, "step": 3916 }, { "epoch": 0.3173201555411536, "grad_norm": 0.034731023013591766, "learning_rate": 0.00015864722559740785, "loss": 0.3516, "step": 3917 }, { "epoch": 0.31740116655865197, "grad_norm": 0.041832007467746735, "learning_rate": 0.00015868772782503038, "loss": 0.3688, "step": 3918 }, { "epoch": 0.3174821775761504, "grad_norm": 0.03571493178606033, "learning_rate": 0.0001587282300526529, "loss": 0.3345, "step": 3919 }, { "epoch": 0.31756318859364874, "grad_norm": 0.04684751108288765, "learning_rate": 0.00015876873228027542, "loss": 0.4023, "step": 3920 }, { "epoch": 0.3176441996111471, "grad_norm": 0.032821930944919586, "learning_rate": 0.00015880923450789795, "loss": 0.3578, "step": 3921 }, { "epoch": 0.3177252106286455, "grad_norm": 0.035076357424259186, "learning_rate": 0.00015884973673552046, "loss": 0.3392, "step": 3922 }, { "epoch": 0.31780622164614386, "grad_norm": 0.050397682934999466, "learning_rate": 0.000158890238963143, "loss": 0.3661, "step": 3923 }, { "epoch": 0.31788723266364227, "grad_norm": 0.038197655230760574, "learning_rate": 0.0001589307411907655, "loss": 0.3239, "step": 3924 }, { "epoch": 0.3179682436811406, "grad_norm": 0.03825785592198372, "learning_rate": 0.00015897124341838803, "loss": 0.3271, "step": 3925 }, { "epoch": 0.31804925469863904, "grad_norm": 0.03636878728866577, "learning_rate": 0.00015901174564601054, "loss": 0.3732, "step": 3926 }, { "epoch": 0.3181302657161374, "grad_norm": 0.03680877014994621, "learning_rate": 0.00015905224787363307, "loss": 0.3994, "step": 3927 }, { "epoch": 0.31821127673363575, "grad_norm": 0.03805559128522873, "learning_rate": 0.00015909275010125558, "loss": 0.3787, "step": 3928 }, { "epoch": 0.31829228775113416, "grad_norm": 0.03796125948429108, "learning_rate": 0.0001591332523288781, "loss": 0.3882, "step": 3929 }, { "epoch": 0.3183732987686325, "grad_norm": 0.0325532890856266, "learning_rate": 0.00015917375455650062, "loss": 0.3429, "step": 3930 }, { "epoch": 0.31845430978613093, "grad_norm": 0.034440673887729645, "learning_rate": 0.00015921425678412315, "loss": 0.3795, "step": 3931 }, { "epoch": 0.3185353208036293, "grad_norm": 0.0457146018743515, "learning_rate": 0.00015925475901174566, "loss": 0.3864, "step": 3932 }, { "epoch": 0.3186163318211277, "grad_norm": 0.030668552964925766, "learning_rate": 0.0001592952612393682, "loss": 0.3374, "step": 3933 }, { "epoch": 0.31869734283862605, "grad_norm": 0.04022758826613426, "learning_rate": 0.0001593357634669907, "loss": 0.4046, "step": 3934 }, { "epoch": 0.3187783538561244, "grad_norm": 0.0356454961001873, "learning_rate": 0.0001593762656946132, "loss": 0.3657, "step": 3935 }, { "epoch": 0.3188593648736228, "grad_norm": 0.055281862616539, "learning_rate": 0.0001594167679222357, "loss": 0.3932, "step": 3936 }, { "epoch": 0.3189403758911212, "grad_norm": 0.03439437970519066, "learning_rate": 0.00015945727014985824, "loss": 0.3297, "step": 3937 }, { "epoch": 0.3190213869086196, "grad_norm": 0.035919319838285446, "learning_rate": 0.00015949777237748075, "loss": 0.325, "step": 3938 }, { "epoch": 0.31910239792611794, "grad_norm": 0.043670397251844406, "learning_rate": 0.00015953827460510328, "loss": 0.4025, "step": 3939 }, { "epoch": 0.31918340894361635, "grad_norm": 0.04012720659375191, "learning_rate": 0.00015957877683272581, "loss": 0.3902, "step": 3940 }, { "epoch": 0.3192644199611147, "grad_norm": 0.042554520070552826, "learning_rate": 0.00015961927906034832, "loss": 0.403, "step": 3941 }, { "epoch": 0.31934543097861307, "grad_norm": 0.04239801689982414, "learning_rate": 0.00015965978128797085, "loss": 0.389, "step": 3942 }, { "epoch": 0.3194264419961115, "grad_norm": 0.03829963505268097, "learning_rate": 0.00015970028351559336, "loss": 0.3512, "step": 3943 }, { "epoch": 0.31950745301360983, "grad_norm": 0.03850733861327171, "learning_rate": 0.0001597407857432159, "loss": 0.3895, "step": 3944 }, { "epoch": 0.31958846403110824, "grad_norm": 0.04022325202822685, "learning_rate": 0.0001597812879708384, "loss": 0.3582, "step": 3945 }, { "epoch": 0.3196694750486066, "grad_norm": 0.03183432295918465, "learning_rate": 0.00015982179019846093, "loss": 0.3445, "step": 3946 }, { "epoch": 0.319750486066105, "grad_norm": 0.04546622559428215, "learning_rate": 0.00015986229242608344, "loss": 0.3393, "step": 3947 }, { "epoch": 0.31983149708360337, "grad_norm": 0.031236806884407997, "learning_rate": 0.00015990279465370597, "loss": 0.3326, "step": 3948 }, { "epoch": 0.3199125081011017, "grad_norm": 0.033743731677532196, "learning_rate": 0.00015994329688132848, "loss": 0.3756, "step": 3949 }, { "epoch": 0.31999351911860013, "grad_norm": 0.044623322784900665, "learning_rate": 0.000159983799108951, "loss": 0.3892, "step": 3950 }, { "epoch": 0.3200745301360985, "grad_norm": 0.03815013915300369, "learning_rate": 0.00016002430133657352, "loss": 0.3768, "step": 3951 }, { "epoch": 0.3201555411535969, "grad_norm": 0.031806398183107376, "learning_rate": 0.00016006480356419605, "loss": 0.3566, "step": 3952 }, { "epoch": 0.32023655217109526, "grad_norm": 0.04002038761973381, "learning_rate": 0.00016010530579181856, "loss": 0.3515, "step": 3953 }, { "epoch": 0.32031756318859367, "grad_norm": 0.03920595347881317, "learning_rate": 0.0001601458080194411, "loss": 0.411, "step": 3954 }, { "epoch": 0.320398574206092, "grad_norm": 0.038479164242744446, "learning_rate": 0.0001601863102470636, "loss": 0.4116, "step": 3955 }, { "epoch": 0.32047958522359044, "grad_norm": 0.02915555238723755, "learning_rate": 0.00016022681247468613, "loss": 0.3619, "step": 3956 }, { "epoch": 0.3205605962410888, "grad_norm": 0.032505497336387634, "learning_rate": 0.00016026731470230863, "loss": 0.3693, "step": 3957 }, { "epoch": 0.32064160725858715, "grad_norm": 0.039211973547935486, "learning_rate": 0.00016030781692993114, "loss": 0.3704, "step": 3958 }, { "epoch": 0.32072261827608556, "grad_norm": 0.03348521515727043, "learning_rate": 0.00016034831915755367, "loss": 0.3629, "step": 3959 }, { "epoch": 0.3208036292935839, "grad_norm": 0.04027702286839485, "learning_rate": 0.00016038882138517618, "loss": 0.3444, "step": 3960 }, { "epoch": 0.3208846403110823, "grad_norm": 0.04102419316768646, "learning_rate": 0.0001604293236127987, "loss": 0.3598, "step": 3961 }, { "epoch": 0.3209656513285807, "grad_norm": 0.032008182257413864, "learning_rate": 0.00016046982584042122, "loss": 0.3424, "step": 3962 }, { "epoch": 0.3210466623460791, "grad_norm": 0.03532759100198746, "learning_rate": 0.00016051032806804375, "loss": 0.3722, "step": 3963 }, { "epoch": 0.32112767336357745, "grad_norm": 0.039972130209207535, "learning_rate": 0.00016055083029566626, "loss": 0.3641, "step": 3964 }, { "epoch": 0.3212086843810758, "grad_norm": 0.03748741000890732, "learning_rate": 0.0001605913325232888, "loss": 0.3447, "step": 3965 }, { "epoch": 0.3212896953985742, "grad_norm": 0.03745513781905174, "learning_rate": 0.0001606318347509113, "loss": 0.3469, "step": 3966 }, { "epoch": 0.32137070641607257, "grad_norm": 0.03754321113228798, "learning_rate": 0.00016067233697853383, "loss": 0.3861, "step": 3967 }, { "epoch": 0.321451717433571, "grad_norm": 0.0451197624206543, "learning_rate": 0.00016071283920615634, "loss": 0.4456, "step": 3968 }, { "epoch": 0.32153272845106934, "grad_norm": 0.040234439074993134, "learning_rate": 0.00016075334143377887, "loss": 0.4076, "step": 3969 }, { "epoch": 0.32161373946856775, "grad_norm": 0.03272630274295807, "learning_rate": 0.00016079384366140138, "loss": 0.3299, "step": 3970 }, { "epoch": 0.3216947504860661, "grad_norm": 0.03154432773590088, "learning_rate": 0.0001608343458890239, "loss": 0.3517, "step": 3971 }, { "epoch": 0.32177576150356446, "grad_norm": 0.042832449078559875, "learning_rate": 0.00016087484811664642, "loss": 0.4232, "step": 3972 }, { "epoch": 0.3218567725210629, "grad_norm": 0.042939815670251846, "learning_rate": 0.00016091535034426895, "loss": 0.3594, "step": 3973 }, { "epoch": 0.32193778353856123, "grad_norm": 0.034177325665950775, "learning_rate": 0.00016095585257189146, "loss": 0.3972, "step": 3974 }, { "epoch": 0.32201879455605964, "grad_norm": 0.03674381598830223, "learning_rate": 0.000160996354799514, "loss": 0.3461, "step": 3975 }, { "epoch": 0.322099805573558, "grad_norm": 0.0347549170255661, "learning_rate": 0.0001610368570271365, "loss": 0.3703, "step": 3976 }, { "epoch": 0.3221808165910564, "grad_norm": 0.03207762539386749, "learning_rate": 0.00016107735925475903, "loss": 0.3475, "step": 3977 }, { "epoch": 0.32226182760855476, "grad_norm": 0.03411857783794403, "learning_rate": 0.00016111786148238156, "loss": 0.3955, "step": 3978 }, { "epoch": 0.3223428386260531, "grad_norm": 0.04447000101208687, "learning_rate": 0.00016115836371000407, "loss": 0.4138, "step": 3979 }, { "epoch": 0.32242384964355153, "grad_norm": 0.038355935364961624, "learning_rate": 0.00016119886593762657, "loss": 0.3929, "step": 3980 }, { "epoch": 0.3225048606610499, "grad_norm": 0.03359310328960419, "learning_rate": 0.0001612393681652491, "loss": 0.356, "step": 3981 }, { "epoch": 0.3225858716785483, "grad_norm": 0.03186175972223282, "learning_rate": 0.0001612798703928716, "loss": 0.3308, "step": 3982 }, { "epoch": 0.32266688269604665, "grad_norm": 0.03117132931947708, "learning_rate": 0.00016132037262049412, "loss": 0.3371, "step": 3983 }, { "epoch": 0.32274789371354506, "grad_norm": 0.039014607667922974, "learning_rate": 0.00016136087484811665, "loss": 0.4021, "step": 3984 }, { "epoch": 0.3228289047310434, "grad_norm": 0.03741605579853058, "learning_rate": 0.00016140137707573916, "loss": 0.3816, "step": 3985 }, { "epoch": 0.3229099157485418, "grad_norm": 0.03936758264899254, "learning_rate": 0.0001614418793033617, "loss": 0.4038, "step": 3986 }, { "epoch": 0.3229909267660402, "grad_norm": 0.03849122300744057, "learning_rate": 0.0001614823815309842, "loss": 0.3726, "step": 3987 }, { "epoch": 0.32307193778353854, "grad_norm": 0.034238673746585846, "learning_rate": 0.00016152288375860673, "loss": 0.3438, "step": 3988 }, { "epoch": 0.32315294880103695, "grad_norm": 0.034184083342552185, "learning_rate": 0.00016156338598622924, "loss": 0.3556, "step": 3989 }, { "epoch": 0.3232339598185353, "grad_norm": 0.039354074746370316, "learning_rate": 0.00016160388821385177, "loss": 0.4, "step": 3990 }, { "epoch": 0.3233149708360337, "grad_norm": 0.03062828630208969, "learning_rate": 0.00016164439044147428, "loss": 0.3365, "step": 3991 }, { "epoch": 0.3233959818535321, "grad_norm": 0.0330023393034935, "learning_rate": 0.0001616848926690968, "loss": 0.3612, "step": 3992 }, { "epoch": 0.32347699287103043, "grad_norm": 0.0405740812420845, "learning_rate": 0.00016172539489671932, "loss": 0.3907, "step": 3993 }, { "epoch": 0.32355800388852884, "grad_norm": 0.03357372432947159, "learning_rate": 0.00016176589712434185, "loss": 0.3836, "step": 3994 }, { "epoch": 0.3236390149060272, "grad_norm": 0.03729019686579704, "learning_rate": 0.00016180639935196436, "loss": 0.372, "step": 3995 }, { "epoch": 0.3237200259235256, "grad_norm": 0.04074510186910629, "learning_rate": 0.0001618469015795869, "loss": 0.399, "step": 3996 }, { "epoch": 0.32380103694102397, "grad_norm": 0.03839709237217903, "learning_rate": 0.00016188740380720942, "loss": 0.3793, "step": 3997 }, { "epoch": 0.3238820479585224, "grad_norm": 0.03876428306102753, "learning_rate": 0.00016192790603483193, "loss": 0.366, "step": 3998 }, { "epoch": 0.32396305897602073, "grad_norm": 0.03858571499586105, "learning_rate": 0.00016196840826245446, "loss": 0.3614, "step": 3999 }, { "epoch": 0.32404406999351915, "grad_norm": 0.037236109375953674, "learning_rate": 0.00016200891049007697, "loss": 0.361, "step": 4000 }, { "epoch": 0.3241250810110175, "grad_norm": 0.0351792648434639, "learning_rate": 0.0001620494127176995, "loss": 0.4234, "step": 4001 }, { "epoch": 0.32420609202851586, "grad_norm": 0.03616362810134888, "learning_rate": 0.000162089914945322, "loss": 0.4013, "step": 4002 }, { "epoch": 0.32428710304601427, "grad_norm": 0.0375266894698143, "learning_rate": 0.00016213041717294454, "loss": 0.4115, "step": 4003 }, { "epoch": 0.3243681140635126, "grad_norm": 0.03920578584074974, "learning_rate": 0.00016217091940056705, "loss": 0.3409, "step": 4004 }, { "epoch": 0.32444912508101104, "grad_norm": 0.036732036620378494, "learning_rate": 0.00016221142162818955, "loss": 0.3824, "step": 4005 }, { "epoch": 0.3245301360985094, "grad_norm": 0.03974968567490578, "learning_rate": 0.00016225192385581206, "loss": 0.3146, "step": 4006 }, { "epoch": 0.3246111471160078, "grad_norm": 0.042769160121679306, "learning_rate": 0.0001622924260834346, "loss": 0.388, "step": 4007 }, { "epoch": 0.32469215813350616, "grad_norm": 0.03568337485194206, "learning_rate": 0.0001623329283110571, "loss": 0.3803, "step": 4008 }, { "epoch": 0.3247731691510045, "grad_norm": 0.03258265182375908, "learning_rate": 0.00016237343053867963, "loss": 0.3972, "step": 4009 }, { "epoch": 0.3248541801685029, "grad_norm": 0.036132700741291046, "learning_rate": 0.00016241393276630214, "loss": 0.3571, "step": 4010 }, { "epoch": 0.3249351911860013, "grad_norm": 0.03624130040407181, "learning_rate": 0.00016245443499392467, "loss": 0.3414, "step": 4011 }, { "epoch": 0.3250162022034997, "grad_norm": 0.04750201851129532, "learning_rate": 0.00016249493722154718, "loss": 0.3793, "step": 4012 }, { "epoch": 0.32509721322099805, "grad_norm": 0.037769172340631485, "learning_rate": 0.0001625354394491697, "loss": 0.3086, "step": 4013 }, { "epoch": 0.32517822423849646, "grad_norm": 0.03758542984724045, "learning_rate": 0.00016257594167679222, "loss": 0.3603, "step": 4014 }, { "epoch": 0.3252592352559948, "grad_norm": 0.03945527970790863, "learning_rate": 0.00016261644390441475, "loss": 0.3936, "step": 4015 }, { "epoch": 0.32534024627349317, "grad_norm": 0.03996698558330536, "learning_rate": 0.00016265694613203728, "loss": 0.4186, "step": 4016 }, { "epoch": 0.3254212572909916, "grad_norm": 0.03950809687376022, "learning_rate": 0.0001626974483596598, "loss": 0.393, "step": 4017 }, { "epoch": 0.32550226830848994, "grad_norm": 0.04291021078824997, "learning_rate": 0.00016273795058728232, "loss": 0.492, "step": 4018 }, { "epoch": 0.32558327932598835, "grad_norm": 0.035584136843681335, "learning_rate": 0.00016277845281490483, "loss": 0.3314, "step": 4019 }, { "epoch": 0.3256642903434867, "grad_norm": 0.04221056029200554, "learning_rate": 0.00016281895504252736, "loss": 0.4079, "step": 4020 }, { "epoch": 0.3257453013609851, "grad_norm": 0.03662978485226631, "learning_rate": 0.00016285945727014987, "loss": 0.3372, "step": 4021 }, { "epoch": 0.3258263123784835, "grad_norm": 0.03927822783589363, "learning_rate": 0.0001628999594977724, "loss": 0.3701, "step": 4022 }, { "epoch": 0.32590732339598183, "grad_norm": 0.03549426048994064, "learning_rate": 0.0001629404617253949, "loss": 0.3976, "step": 4023 }, { "epoch": 0.32598833441348024, "grad_norm": 0.035459041595458984, "learning_rate": 0.00016298096395301744, "loss": 0.37, "step": 4024 }, { "epoch": 0.3260693454309786, "grad_norm": 0.03579239174723625, "learning_rate": 0.00016302146618063995, "loss": 0.3874, "step": 4025 }, { "epoch": 0.326150356448477, "grad_norm": 0.03891357406973839, "learning_rate": 0.00016306196840826248, "loss": 0.3551, "step": 4026 }, { "epoch": 0.32623136746597536, "grad_norm": 0.040021754801273346, "learning_rate": 0.00016310247063588498, "loss": 0.3913, "step": 4027 }, { "epoch": 0.3263123784834738, "grad_norm": 0.03605050593614578, "learning_rate": 0.0001631429728635075, "loss": 0.3339, "step": 4028 }, { "epoch": 0.32639338950097213, "grad_norm": 0.03774379566311836, "learning_rate": 0.00016318347509113002, "loss": 0.3455, "step": 4029 }, { "epoch": 0.3264744005184705, "grad_norm": 0.037987157702445984, "learning_rate": 0.00016322397731875253, "loss": 0.3608, "step": 4030 }, { "epoch": 0.3265554115359689, "grad_norm": 0.03648436442017555, "learning_rate": 0.00016326447954637504, "loss": 0.323, "step": 4031 }, { "epoch": 0.32663642255346725, "grad_norm": 0.03928240388631821, "learning_rate": 0.00016330498177399757, "loss": 0.3906, "step": 4032 }, { "epoch": 0.32671743357096567, "grad_norm": 0.0331883542239666, "learning_rate": 0.00016334548400162008, "loss": 0.3736, "step": 4033 }, { "epoch": 0.326798444588464, "grad_norm": 0.03794945031404495, "learning_rate": 0.0001633859862292426, "loss": 0.3839, "step": 4034 }, { "epoch": 0.32687945560596243, "grad_norm": 0.03638019412755966, "learning_rate": 0.00016342648845686514, "loss": 0.38, "step": 4035 }, { "epoch": 0.3269604666234608, "grad_norm": 0.035750579088926315, "learning_rate": 0.00016346699068448765, "loss": 0.3653, "step": 4036 }, { "epoch": 0.32704147764095914, "grad_norm": 0.03391675651073456, "learning_rate": 0.00016350749291211018, "loss": 0.3206, "step": 4037 }, { "epoch": 0.32712248865845756, "grad_norm": 0.03558258339762688, "learning_rate": 0.0001635479951397327, "loss": 0.3528, "step": 4038 }, { "epoch": 0.3272034996759559, "grad_norm": 0.03626623749732971, "learning_rate": 0.00016358849736735522, "loss": 0.3612, "step": 4039 }, { "epoch": 0.3272845106934543, "grad_norm": 0.04127888381481171, "learning_rate": 0.00016362899959497773, "loss": 0.4088, "step": 4040 }, { "epoch": 0.3273655217109527, "grad_norm": 0.03882889822125435, "learning_rate": 0.00016366950182260026, "loss": 0.37, "step": 4041 }, { "epoch": 0.3274465327284511, "grad_norm": 0.03618205338716507, "learning_rate": 0.00016371000405022277, "loss": 0.3634, "step": 4042 }, { "epoch": 0.32752754374594945, "grad_norm": 0.0353274866938591, "learning_rate": 0.0001637505062778453, "loss": 0.3593, "step": 4043 }, { "epoch": 0.3276085547634478, "grad_norm": 0.040717270225286484, "learning_rate": 0.0001637910085054678, "loss": 0.3824, "step": 4044 }, { "epoch": 0.3276895657809462, "grad_norm": 0.03363925591111183, "learning_rate": 0.00016383151073309034, "loss": 0.3533, "step": 4045 }, { "epoch": 0.32777057679844457, "grad_norm": 0.033195581287145615, "learning_rate": 0.00016387201296071284, "loss": 0.3516, "step": 4046 }, { "epoch": 0.327851587815943, "grad_norm": 0.045429445803165436, "learning_rate": 0.00016391251518833538, "loss": 0.3696, "step": 4047 }, { "epoch": 0.32793259883344134, "grad_norm": 0.032288793474435806, "learning_rate": 0.00016395301741595788, "loss": 0.3922, "step": 4048 }, { "epoch": 0.32801360985093975, "grad_norm": 0.03578419238328934, "learning_rate": 0.00016399351964358042, "loss": 0.4296, "step": 4049 }, { "epoch": 0.3280946208684381, "grad_norm": 0.037038251757621765, "learning_rate": 0.00016403402187120292, "loss": 0.37, "step": 4050 }, { "epoch": 0.3281756318859365, "grad_norm": 0.03831426426768303, "learning_rate": 0.00016407452409882546, "loss": 0.4149, "step": 4051 }, { "epoch": 0.32825664290343487, "grad_norm": 0.03701657056808472, "learning_rate": 0.00016411502632644796, "loss": 0.3257, "step": 4052 }, { "epoch": 0.3283376539209332, "grad_norm": 0.03409591317176819, "learning_rate": 0.00016415552855407047, "loss": 0.3546, "step": 4053 }, { "epoch": 0.32841866493843164, "grad_norm": 0.03310379013419151, "learning_rate": 0.000164196030781693, "loss": 0.3481, "step": 4054 }, { "epoch": 0.32849967595593, "grad_norm": 0.03756807744503021, "learning_rate": 0.0001642365330093155, "loss": 0.4197, "step": 4055 }, { "epoch": 0.3285806869734284, "grad_norm": 0.03222097083926201, "learning_rate": 0.00016427703523693804, "loss": 0.3418, "step": 4056 }, { "epoch": 0.32866169799092676, "grad_norm": 0.039320822805166245, "learning_rate": 0.00016431753746456055, "loss": 0.3667, "step": 4057 }, { "epoch": 0.32874270900842517, "grad_norm": 0.03398220241069794, "learning_rate": 0.00016435803969218308, "loss": 0.347, "step": 4058 }, { "epoch": 0.3288237200259235, "grad_norm": 0.03665045648813248, "learning_rate": 0.0001643985419198056, "loss": 0.3903, "step": 4059 }, { "epoch": 0.3289047310434219, "grad_norm": 0.03421606495976448, "learning_rate": 0.00016443904414742812, "loss": 0.3649, "step": 4060 }, { "epoch": 0.3289857420609203, "grad_norm": 0.03205835446715355, "learning_rate": 0.00016447954637505063, "loss": 0.3511, "step": 4061 }, { "epoch": 0.32906675307841865, "grad_norm": 0.03892917558550835, "learning_rate": 0.00016452004860267316, "loss": 0.4036, "step": 4062 }, { "epoch": 0.32914776409591706, "grad_norm": 0.05253162235021591, "learning_rate": 0.00016456055083029567, "loss": 0.3281, "step": 4063 }, { "epoch": 0.3292287751134154, "grad_norm": 0.03342265635728836, "learning_rate": 0.0001646010530579182, "loss": 0.3494, "step": 4064 }, { "epoch": 0.32930978613091383, "grad_norm": 0.04515873268246651, "learning_rate": 0.0001646415552855407, "loss": 0.3868, "step": 4065 }, { "epoch": 0.3293907971484122, "grad_norm": 0.038879964500665665, "learning_rate": 0.00016468205751316324, "loss": 0.37, "step": 4066 }, { "epoch": 0.32947180816591054, "grad_norm": 0.034797415137290955, "learning_rate": 0.00016472255974078574, "loss": 0.379, "step": 4067 }, { "epoch": 0.32955281918340895, "grad_norm": 0.039061516523361206, "learning_rate": 0.00016476306196840828, "loss": 0.3781, "step": 4068 }, { "epoch": 0.3296338302009073, "grad_norm": 0.03718171268701553, "learning_rate": 0.00016480356419603078, "loss": 0.3805, "step": 4069 }, { "epoch": 0.3297148412184057, "grad_norm": 0.03436566889286041, "learning_rate": 0.00016484406642365332, "loss": 0.4011, "step": 4070 }, { "epoch": 0.3297958522359041, "grad_norm": 0.03371388092637062, "learning_rate": 0.00016488456865127582, "loss": 0.3144, "step": 4071 }, { "epoch": 0.3298768632534025, "grad_norm": 0.03688603639602661, "learning_rate": 0.00016492507087889836, "loss": 0.309, "step": 4072 }, { "epoch": 0.32995787427090084, "grad_norm": 0.03432590141892433, "learning_rate": 0.0001649655731065209, "loss": 0.349, "step": 4073 }, { "epoch": 0.3300388852883992, "grad_norm": 0.03731279820203781, "learning_rate": 0.0001650060753341434, "loss": 0.3977, "step": 4074 }, { "epoch": 0.3301198963058976, "grad_norm": 0.040071602910757065, "learning_rate": 0.0001650465775617659, "loss": 0.3577, "step": 4075 }, { "epoch": 0.33020090732339596, "grad_norm": 0.04492799565196037, "learning_rate": 0.00016508707978938843, "loss": 0.3521, "step": 4076 }, { "epoch": 0.3302819183408944, "grad_norm": 0.04404788836836815, "learning_rate": 0.00016512758201701094, "loss": 0.4375, "step": 4077 }, { "epoch": 0.33036292935839273, "grad_norm": 0.03406739979982376, "learning_rate": 0.00016516808424463345, "loss": 0.3423, "step": 4078 }, { "epoch": 0.33044394037589114, "grad_norm": 0.04167330637574196, "learning_rate": 0.00016520858647225598, "loss": 0.3892, "step": 4079 }, { "epoch": 0.3305249513933895, "grad_norm": 0.03561322018504143, "learning_rate": 0.00016524908869987849, "loss": 0.3382, "step": 4080 }, { "epoch": 0.33060596241088785, "grad_norm": 0.03386366367340088, "learning_rate": 0.00016528959092750102, "loss": 0.3811, "step": 4081 }, { "epoch": 0.33068697342838627, "grad_norm": 0.03760869801044464, "learning_rate": 0.00016533009315512353, "loss": 0.3442, "step": 4082 }, { "epoch": 0.3307679844458846, "grad_norm": 0.036848295480012894, "learning_rate": 0.00016537059538274606, "loss": 0.406, "step": 4083 }, { "epoch": 0.33084899546338303, "grad_norm": 0.039902713149785995, "learning_rate": 0.00016541109761036856, "loss": 0.4254, "step": 4084 }, { "epoch": 0.3309300064808814, "grad_norm": 0.04576299339532852, "learning_rate": 0.0001654515998379911, "loss": 0.3806, "step": 4085 }, { "epoch": 0.3310110174983798, "grad_norm": 0.03668006509542465, "learning_rate": 0.0001654921020656136, "loss": 0.365, "step": 4086 }, { "epoch": 0.33109202851587816, "grad_norm": 0.038494616746902466, "learning_rate": 0.00016553260429323614, "loss": 0.4095, "step": 4087 }, { "epoch": 0.3311730395333765, "grad_norm": 0.042895376682281494, "learning_rate": 0.00016557310652085864, "loss": 0.359, "step": 4088 }, { "epoch": 0.3312540505508749, "grad_norm": 0.038043245673179626, "learning_rate": 0.00016561360874848118, "loss": 0.3424, "step": 4089 }, { "epoch": 0.3313350615683733, "grad_norm": 0.040794432163238525, "learning_rate": 0.0001656541109761037, "loss": 0.3786, "step": 4090 }, { "epoch": 0.3314160725858717, "grad_norm": 0.04780365899205208, "learning_rate": 0.00016569461320372622, "loss": 0.3424, "step": 4091 }, { "epoch": 0.33149708360337005, "grad_norm": 0.039480097591876984, "learning_rate": 0.00016573511543134875, "loss": 0.3805, "step": 4092 }, { "epoch": 0.33157809462086846, "grad_norm": 0.03293336182832718, "learning_rate": 0.00016577561765897126, "loss": 0.3511, "step": 4093 }, { "epoch": 0.3316591056383668, "grad_norm": 0.03346782550215721, "learning_rate": 0.0001658161198865938, "loss": 0.3501, "step": 4094 }, { "epoch": 0.3317401166558652, "grad_norm": 0.03275993466377258, "learning_rate": 0.0001658566221142163, "loss": 0.3694, "step": 4095 }, { "epoch": 0.3318211276733636, "grad_norm": 0.03415694832801819, "learning_rate": 0.00016589712434183883, "loss": 0.3533, "step": 4096 }, { "epoch": 0.33190213869086194, "grad_norm": 0.03276718035340309, "learning_rate": 0.00016593762656946133, "loss": 0.3577, "step": 4097 }, { "epoch": 0.33198314970836035, "grad_norm": 0.03801753744482994, "learning_rate": 0.00016597812879708387, "loss": 0.4198, "step": 4098 }, { "epoch": 0.3320641607258587, "grad_norm": 0.03857783228158951, "learning_rate": 0.00016601863102470637, "loss": 0.4047, "step": 4099 }, { "epoch": 0.3321451717433571, "grad_norm": 0.03730057552456856, "learning_rate": 0.00016605913325232888, "loss": 0.3675, "step": 4100 }, { "epoch": 0.33222618276085547, "grad_norm": 0.03350397199392319, "learning_rate": 0.00016609963547995139, "loss": 0.3975, "step": 4101 }, { "epoch": 0.3323071937783539, "grad_norm": 0.03293445333838463, "learning_rate": 0.00016614013770757392, "loss": 0.3638, "step": 4102 }, { "epoch": 0.33238820479585224, "grad_norm": 0.030500883236527443, "learning_rate": 0.00016618063993519643, "loss": 0.3162, "step": 4103 }, { "epoch": 0.3324692158133506, "grad_norm": 0.030487844720482826, "learning_rate": 0.00016622114216281896, "loss": 0.3563, "step": 4104 }, { "epoch": 0.332550226830849, "grad_norm": 0.03776392340660095, "learning_rate": 0.00016626164439044146, "loss": 0.3087, "step": 4105 }, { "epoch": 0.33263123784834736, "grad_norm": 0.04220743849873543, "learning_rate": 0.000166302146618064, "loss": 0.4032, "step": 4106 }, { "epoch": 0.33271224886584577, "grad_norm": 0.03415783867239952, "learning_rate": 0.0001663426488456865, "loss": 0.3666, "step": 4107 }, { "epoch": 0.33279325988334413, "grad_norm": 0.04167284071445465, "learning_rate": 0.00016638315107330904, "loss": 0.3416, "step": 4108 }, { "epoch": 0.33287427090084254, "grad_norm": 0.03049684315919876, "learning_rate": 0.00016642365330093157, "loss": 0.3758, "step": 4109 }, { "epoch": 0.3329552819183409, "grad_norm": 0.03721962496638298, "learning_rate": 0.00016646415552855408, "loss": 0.356, "step": 4110 }, { "epoch": 0.33303629293583925, "grad_norm": 0.03811046853661537, "learning_rate": 0.0001665046577561766, "loss": 0.3641, "step": 4111 }, { "epoch": 0.33311730395333766, "grad_norm": 0.033855751156806946, "learning_rate": 0.00016654515998379912, "loss": 0.4055, "step": 4112 }, { "epoch": 0.333198314970836, "grad_norm": 0.04019276052713394, "learning_rate": 0.00016658566221142165, "loss": 0.3109, "step": 4113 }, { "epoch": 0.33327932598833443, "grad_norm": 0.030369669198989868, "learning_rate": 0.00016662616443904415, "loss": 0.3483, "step": 4114 }, { "epoch": 0.3333603370058328, "grad_norm": 0.036614250391721725, "learning_rate": 0.0001666666666666667, "loss": 0.3964, "step": 4115 }, { "epoch": 0.3334413480233312, "grad_norm": 0.039595827460289, "learning_rate": 0.0001667071688942892, "loss": 0.3832, "step": 4116 }, { "epoch": 0.33352235904082955, "grad_norm": 0.03889375925064087, "learning_rate": 0.00016674767112191173, "loss": 0.3758, "step": 4117 }, { "epoch": 0.3336033700583279, "grad_norm": 0.03840089589357376, "learning_rate": 0.00016678817334953423, "loss": 0.3581, "step": 4118 }, { "epoch": 0.3336843810758263, "grad_norm": 0.030346790328621864, "learning_rate": 0.00016682867557715677, "loss": 0.3402, "step": 4119 }, { "epoch": 0.3337653920933247, "grad_norm": 0.03762712702155113, "learning_rate": 0.00016686917780477927, "loss": 0.4095, "step": 4120 }, { "epoch": 0.3338464031108231, "grad_norm": 0.0357481949031353, "learning_rate": 0.0001669096800324018, "loss": 0.3672, "step": 4121 }, { "epoch": 0.33392741412832144, "grad_norm": 0.03390078991651535, "learning_rate": 0.0001669501822600243, "loss": 0.3526, "step": 4122 }, { "epoch": 0.33400842514581985, "grad_norm": 0.04207165166735649, "learning_rate": 0.00016699068448764682, "loss": 0.4223, "step": 4123 }, { "epoch": 0.3340894361633182, "grad_norm": 0.042306751012802124, "learning_rate": 0.00016703118671526935, "loss": 0.384, "step": 4124 }, { "epoch": 0.33417044718081657, "grad_norm": 0.03514132276177406, "learning_rate": 0.00016707168894289186, "loss": 0.3652, "step": 4125 }, { "epoch": 0.334251458198315, "grad_norm": 0.03561227768659592, "learning_rate": 0.00016711219117051436, "loss": 0.4069, "step": 4126 }, { "epoch": 0.33433246921581333, "grad_norm": 0.027793284505605698, "learning_rate": 0.0001671526933981369, "loss": 0.2562, "step": 4127 }, { "epoch": 0.33441348023331174, "grad_norm": 0.03717358410358429, "learning_rate": 0.00016719319562575943, "loss": 0.4096, "step": 4128 }, { "epoch": 0.3344944912508101, "grad_norm": 0.036481596529483795, "learning_rate": 0.00016723369785338194, "loss": 0.3614, "step": 4129 }, { "epoch": 0.3345755022683085, "grad_norm": 0.044352419674396515, "learning_rate": 0.00016727420008100447, "loss": 0.3769, "step": 4130 }, { "epoch": 0.33465651328580687, "grad_norm": 0.037814266979694366, "learning_rate": 0.00016731470230862698, "loss": 0.3389, "step": 4131 }, { "epoch": 0.3347375243033052, "grad_norm": 0.03270898014307022, "learning_rate": 0.0001673552045362495, "loss": 0.3526, "step": 4132 }, { "epoch": 0.33481853532080363, "grad_norm": 0.03235417231917381, "learning_rate": 0.00016739570676387202, "loss": 0.3298, "step": 4133 }, { "epoch": 0.334899546338302, "grad_norm": 0.038817260414361954, "learning_rate": 0.00016743620899149455, "loss": 0.4, "step": 4134 }, { "epoch": 0.3349805573558004, "grad_norm": 0.04076307639479637, "learning_rate": 0.00016747671121911705, "loss": 0.3628, "step": 4135 }, { "epoch": 0.33506156837329876, "grad_norm": 0.03437405079603195, "learning_rate": 0.0001675172134467396, "loss": 0.4204, "step": 4136 }, { "epoch": 0.33514257939079717, "grad_norm": 0.03410829231142998, "learning_rate": 0.0001675577156743621, "loss": 0.3609, "step": 4137 }, { "epoch": 0.3352235904082955, "grad_norm": 0.04926472529768944, "learning_rate": 0.00016759821790198463, "loss": 0.3613, "step": 4138 }, { "epoch": 0.33530460142579394, "grad_norm": 0.03411533683538437, "learning_rate": 0.00016763872012960713, "loss": 0.3933, "step": 4139 }, { "epoch": 0.3353856124432923, "grad_norm": 0.03284559026360512, "learning_rate": 0.00016767922235722967, "loss": 0.3586, "step": 4140 }, { "epoch": 0.33546662346079065, "grad_norm": 0.040767405182123184, "learning_rate": 0.00016771972458485217, "loss": 0.3632, "step": 4141 }, { "epoch": 0.33554763447828906, "grad_norm": 0.034310050308704376, "learning_rate": 0.0001677602268124747, "loss": 0.3488, "step": 4142 }, { "epoch": 0.3356286454957874, "grad_norm": 0.03712863475084305, "learning_rate": 0.0001678007290400972, "loss": 0.4369, "step": 4143 }, { "epoch": 0.3357096565132858, "grad_norm": 0.03386729210615158, "learning_rate": 0.00016784123126771975, "loss": 0.3479, "step": 4144 }, { "epoch": 0.3357906675307842, "grad_norm": 0.039073631167411804, "learning_rate": 0.00016788173349534225, "loss": 0.3928, "step": 4145 }, { "epoch": 0.3358716785482826, "grad_norm": 0.039460305124521255, "learning_rate": 0.00016792223572296478, "loss": 0.3976, "step": 4146 }, { "epoch": 0.33595268956578095, "grad_norm": 0.03876733034849167, "learning_rate": 0.0001679627379505873, "loss": 0.3755, "step": 4147 }, { "epoch": 0.3360337005832793, "grad_norm": 0.031978458166122437, "learning_rate": 0.0001680032401782098, "loss": 0.3225, "step": 4148 }, { "epoch": 0.3361147116007777, "grad_norm": 0.0404481403529644, "learning_rate": 0.00016804374240583233, "loss": 0.3893, "step": 4149 }, { "epoch": 0.33619572261827607, "grad_norm": 0.034730345010757446, "learning_rate": 0.00016808424463345484, "loss": 0.3538, "step": 4150 }, { "epoch": 0.3362767336357745, "grad_norm": 0.03582317754626274, "learning_rate": 0.00016812474686107737, "loss": 0.3475, "step": 4151 }, { "epoch": 0.33635774465327284, "grad_norm": 0.035376086831092834, "learning_rate": 0.00016816524908869988, "loss": 0.3683, "step": 4152 }, { "epoch": 0.33643875567077125, "grad_norm": 0.033756762742996216, "learning_rate": 0.0001682057513163224, "loss": 0.3522, "step": 4153 }, { "epoch": 0.3365197666882696, "grad_norm": 0.03765640780329704, "learning_rate": 0.00016824625354394491, "loss": 0.3725, "step": 4154 }, { "epoch": 0.33660077770576796, "grad_norm": 0.03699144721031189, "learning_rate": 0.00016828675577156745, "loss": 0.3676, "step": 4155 }, { "epoch": 0.3366817887232664, "grad_norm": 0.033394955098629, "learning_rate": 0.00016832725799918995, "loss": 0.3527, "step": 4156 }, { "epoch": 0.33676279974076473, "grad_norm": 0.045186761766672134, "learning_rate": 0.0001683677602268125, "loss": 0.422, "step": 4157 }, { "epoch": 0.33684381075826314, "grad_norm": 0.040306515991687775, "learning_rate": 0.000168408262454435, "loss": 0.3672, "step": 4158 }, { "epoch": 0.3369248217757615, "grad_norm": 0.02902974747121334, "learning_rate": 0.00016844876468205753, "loss": 0.3542, "step": 4159 }, { "epoch": 0.3370058327932599, "grad_norm": 0.037141066044569016, "learning_rate": 0.00016848926690968003, "loss": 0.3852, "step": 4160 }, { "epoch": 0.33708684381075826, "grad_norm": 0.050520069897174835, "learning_rate": 0.00016852976913730257, "loss": 0.373, "step": 4161 }, { "epoch": 0.3371678548282566, "grad_norm": 0.041232600808143616, "learning_rate": 0.00016857027136492507, "loss": 0.3563, "step": 4162 }, { "epoch": 0.33724886584575503, "grad_norm": 0.034661248326301575, "learning_rate": 0.0001686107735925476, "loss": 0.3394, "step": 4163 }, { "epoch": 0.3373298768632534, "grad_norm": 0.04212620481848717, "learning_rate": 0.0001686512758201701, "loss": 0.4088, "step": 4164 }, { "epoch": 0.3374108878807518, "grad_norm": 0.03127884119749069, "learning_rate": 0.00016869177804779264, "loss": 0.331, "step": 4165 }, { "epoch": 0.33749189889825015, "grad_norm": 0.03282209485769272, "learning_rate": 0.00016873228027541518, "loss": 0.3882, "step": 4166 }, { "epoch": 0.33757290991574856, "grad_norm": 0.03402530401945114, "learning_rate": 0.00016877278250303768, "loss": 0.4194, "step": 4167 }, { "epoch": 0.3376539209332469, "grad_norm": 0.03680253401398659, "learning_rate": 0.00016881328473066022, "loss": 0.393, "step": 4168 }, { "epoch": 0.3377349319507453, "grad_norm": 0.03531409054994583, "learning_rate": 0.00016885378695828272, "loss": 0.3438, "step": 4169 }, { "epoch": 0.3378159429682437, "grad_norm": 0.039557069540023804, "learning_rate": 0.00016889428918590523, "loss": 0.372, "step": 4170 }, { "epoch": 0.33789695398574204, "grad_norm": 0.041093405336141586, "learning_rate": 0.00016893479141352774, "loss": 0.3918, "step": 4171 }, { "epoch": 0.33797796500324045, "grad_norm": 0.035452280193567276, "learning_rate": 0.00016897529364115027, "loss": 0.365, "step": 4172 }, { "epoch": 0.3380589760207388, "grad_norm": 0.03799006715416908, "learning_rate": 0.00016901579586877277, "loss": 0.3629, "step": 4173 }, { "epoch": 0.3381399870382372, "grad_norm": 0.03691485896706581, "learning_rate": 0.0001690562980963953, "loss": 0.3645, "step": 4174 }, { "epoch": 0.3382209980557356, "grad_norm": 0.036314696073532104, "learning_rate": 0.00016909680032401781, "loss": 0.3775, "step": 4175 }, { "epoch": 0.33830200907323393, "grad_norm": 0.04029175639152527, "learning_rate": 0.00016913730255164035, "loss": 0.4537, "step": 4176 }, { "epoch": 0.33838302009073234, "grad_norm": 0.030173292383551598, "learning_rate": 0.00016917780477926285, "loss": 0.337, "step": 4177 }, { "epoch": 0.3384640311082307, "grad_norm": 0.04181046038866043, "learning_rate": 0.0001692183070068854, "loss": 0.3995, "step": 4178 }, { "epoch": 0.3385450421257291, "grad_norm": 0.03693336248397827, "learning_rate": 0.0001692588092345079, "loss": 0.3401, "step": 4179 }, { "epoch": 0.33862605314322747, "grad_norm": 0.04609256982803345, "learning_rate": 0.00016929931146213043, "loss": 0.385, "step": 4180 }, { "epoch": 0.3387070641607259, "grad_norm": 0.036621615290641785, "learning_rate": 0.00016933981368975293, "loss": 0.3583, "step": 4181 }, { "epoch": 0.33878807517822424, "grad_norm": 0.03672129288315773, "learning_rate": 0.00016938031591737547, "loss": 0.4042, "step": 4182 }, { "epoch": 0.3388690861957226, "grad_norm": 0.038371678441762924, "learning_rate": 0.00016942081814499797, "loss": 0.3672, "step": 4183 }, { "epoch": 0.338950097213221, "grad_norm": 0.0406721830368042, "learning_rate": 0.0001694613203726205, "loss": 0.383, "step": 4184 }, { "epoch": 0.33903110823071936, "grad_norm": 0.03535905107855797, "learning_rate": 0.00016950182260024304, "loss": 0.3518, "step": 4185 }, { "epoch": 0.33911211924821777, "grad_norm": 0.03706865385174751, "learning_rate": 0.00016954232482786554, "loss": 0.3521, "step": 4186 }, { "epoch": 0.3391931302657161, "grad_norm": 0.04128368943929672, "learning_rate": 0.00016958282705548808, "loss": 0.397, "step": 4187 }, { "epoch": 0.33927414128321454, "grad_norm": 0.03312550112605095, "learning_rate": 0.00016962332928311058, "loss": 0.3685, "step": 4188 }, { "epoch": 0.3393551523007129, "grad_norm": 0.034005653113126755, "learning_rate": 0.00016966383151073312, "loss": 0.3365, "step": 4189 }, { "epoch": 0.3394361633182113, "grad_norm": 0.03641623631119728, "learning_rate": 0.00016970433373835562, "loss": 0.3931, "step": 4190 }, { "epoch": 0.33951717433570966, "grad_norm": 0.03957383707165718, "learning_rate": 0.00016974483596597816, "loss": 0.4141, "step": 4191 }, { "epoch": 0.339598185353208, "grad_norm": 0.04036073386669159, "learning_rate": 0.00016978533819360066, "loss": 0.3924, "step": 4192 }, { "epoch": 0.3396791963707064, "grad_norm": 0.033038701862096786, "learning_rate": 0.00016982584042122317, "loss": 0.296, "step": 4193 }, { "epoch": 0.3397602073882048, "grad_norm": 0.027858994901180267, "learning_rate": 0.0001698663426488457, "loss": 0.3356, "step": 4194 }, { "epoch": 0.3398412184057032, "grad_norm": 0.04070800542831421, "learning_rate": 0.0001699068448764682, "loss": 0.3525, "step": 4195 }, { "epoch": 0.33992222942320155, "grad_norm": 0.03459759056568146, "learning_rate": 0.00016994734710409071, "loss": 0.3693, "step": 4196 }, { "epoch": 0.34000324044069996, "grad_norm": 0.03300487622618675, "learning_rate": 0.00016998784933171325, "loss": 0.3489, "step": 4197 }, { "epoch": 0.3400842514581983, "grad_norm": 0.03482555225491524, "learning_rate": 0.00017002835155933575, "loss": 0.306, "step": 4198 }, { "epoch": 0.3401652624756967, "grad_norm": 0.039338547736406326, "learning_rate": 0.00017006885378695829, "loss": 0.3555, "step": 4199 }, { "epoch": 0.3402462734931951, "grad_norm": 0.041121240705251694, "learning_rate": 0.0001701093560145808, "loss": 0.3355, "step": 4200 }, { "epoch": 0.34032728451069344, "grad_norm": 0.03414642810821533, "learning_rate": 0.00017014985824220333, "loss": 0.3546, "step": 4201 }, { "epoch": 0.34040829552819185, "grad_norm": 0.039132002741098404, "learning_rate": 0.00017019036046982583, "loss": 0.3546, "step": 4202 }, { "epoch": 0.3404893065456902, "grad_norm": 0.036015842109918594, "learning_rate": 0.00017023086269744836, "loss": 0.3876, "step": 4203 }, { "epoch": 0.3405703175631886, "grad_norm": 0.04213041439652443, "learning_rate": 0.0001702713649250709, "loss": 0.379, "step": 4204 }, { "epoch": 0.340651328580687, "grad_norm": 0.04193120822310448, "learning_rate": 0.0001703118671526934, "loss": 0.3266, "step": 4205 }, { "epoch": 0.34073233959818533, "grad_norm": 0.035873111337423325, "learning_rate": 0.00017035236938031594, "loss": 0.3738, "step": 4206 }, { "epoch": 0.34081335061568374, "grad_norm": 0.03832458332180977, "learning_rate": 0.00017039287160793844, "loss": 0.3866, "step": 4207 }, { "epoch": 0.3408943616331821, "grad_norm": 0.032247111201286316, "learning_rate": 0.00017043337383556098, "loss": 0.3546, "step": 4208 }, { "epoch": 0.3409753726506805, "grad_norm": 0.03261726349592209, "learning_rate": 0.00017047387606318348, "loss": 0.3558, "step": 4209 }, { "epoch": 0.34105638366817886, "grad_norm": 0.030771298334002495, "learning_rate": 0.00017051437829080602, "loss": 0.3082, "step": 4210 }, { "epoch": 0.3411373946856773, "grad_norm": 0.03943264111876488, "learning_rate": 0.00017055488051842852, "loss": 0.3681, "step": 4211 }, { "epoch": 0.34121840570317563, "grad_norm": 0.03512781485915184, "learning_rate": 0.00017059538274605106, "loss": 0.342, "step": 4212 }, { "epoch": 0.341299416720674, "grad_norm": 0.03673945739865303, "learning_rate": 0.00017063588497367356, "loss": 0.3391, "step": 4213 }, { "epoch": 0.3413804277381724, "grad_norm": 0.03736485540866852, "learning_rate": 0.0001706763872012961, "loss": 0.3542, "step": 4214 }, { "epoch": 0.34146143875567075, "grad_norm": 0.0473208986222744, "learning_rate": 0.0001707168894289186, "loss": 0.3504, "step": 4215 }, { "epoch": 0.34154244977316917, "grad_norm": 0.033738020807504654, "learning_rate": 0.00017075739165654113, "loss": 0.3571, "step": 4216 }, { "epoch": 0.3416234607906675, "grad_norm": 0.033597659319639206, "learning_rate": 0.00017079789388416364, "loss": 0.3311, "step": 4217 }, { "epoch": 0.34170447180816593, "grad_norm": 0.038350872695446014, "learning_rate": 0.00017083839611178615, "loss": 0.3237, "step": 4218 }, { "epoch": 0.3417854828256643, "grad_norm": 0.033569056540727615, "learning_rate": 0.00017087889833940868, "loss": 0.3653, "step": 4219 }, { "epoch": 0.34186649384316264, "grad_norm": 0.039351075887680054, "learning_rate": 0.00017091940056703119, "loss": 0.3981, "step": 4220 }, { "epoch": 0.34194750486066106, "grad_norm": 0.03401686251163483, "learning_rate": 0.0001709599027946537, "loss": 0.3414, "step": 4221 }, { "epoch": 0.3420285158781594, "grad_norm": 0.0369318425655365, "learning_rate": 0.00017100040502227622, "loss": 0.4034, "step": 4222 }, { "epoch": 0.3421095268956578, "grad_norm": 0.03724020719528198, "learning_rate": 0.00017104090724989876, "loss": 0.3716, "step": 4223 }, { "epoch": 0.3421905379131562, "grad_norm": 0.03206992894411087, "learning_rate": 0.00017108140947752126, "loss": 0.3711, "step": 4224 }, { "epoch": 0.3422715489306546, "grad_norm": 0.032597772777080536, "learning_rate": 0.0001711219117051438, "loss": 0.3333, "step": 4225 }, { "epoch": 0.34235255994815295, "grad_norm": 0.0400988794863224, "learning_rate": 0.0001711624139327663, "loss": 0.4322, "step": 4226 }, { "epoch": 0.3424335709656513, "grad_norm": 0.03737347200512886, "learning_rate": 0.00017120291616038884, "loss": 0.3767, "step": 4227 }, { "epoch": 0.3425145819831497, "grad_norm": 0.06823983043432236, "learning_rate": 0.00017124341838801134, "loss": 0.3987, "step": 4228 }, { "epoch": 0.34259559300064807, "grad_norm": 0.03661317750811577, "learning_rate": 0.00017128392061563388, "loss": 0.3714, "step": 4229 }, { "epoch": 0.3426766040181465, "grad_norm": 0.04608812928199768, "learning_rate": 0.00017132442284325638, "loss": 0.3924, "step": 4230 }, { "epoch": 0.34275761503564484, "grad_norm": 0.03493238613009453, "learning_rate": 0.00017136492507087892, "loss": 0.3719, "step": 4231 }, { "epoch": 0.34283862605314325, "grad_norm": 0.03279491513967514, "learning_rate": 0.00017140542729850142, "loss": 0.3655, "step": 4232 }, { "epoch": 0.3429196370706416, "grad_norm": 0.038346197456121445, "learning_rate": 0.00017144592952612395, "loss": 0.3979, "step": 4233 }, { "epoch": 0.34300064808814, "grad_norm": 0.03517588600516319, "learning_rate": 0.00017148643175374646, "loss": 0.3248, "step": 4234 }, { "epoch": 0.34308165910563837, "grad_norm": 0.03253260254859924, "learning_rate": 0.000171526933981369, "loss": 0.3423, "step": 4235 }, { "epoch": 0.3431626701231367, "grad_norm": 0.03733436390757561, "learning_rate": 0.0001715674362089915, "loss": 0.3809, "step": 4236 }, { "epoch": 0.34324368114063514, "grad_norm": 0.03323622792959213, "learning_rate": 0.00017160793843661403, "loss": 0.3964, "step": 4237 }, { "epoch": 0.3433246921581335, "grad_norm": 0.03212364390492439, "learning_rate": 0.00017164844066423654, "loss": 0.361, "step": 4238 }, { "epoch": 0.3434057031756319, "grad_norm": 0.03472090885043144, "learning_rate": 0.00017168894289185907, "loss": 0.3455, "step": 4239 }, { "epoch": 0.34348671419313026, "grad_norm": 0.036414679139852524, "learning_rate": 0.00017172944511948158, "loss": 0.3286, "step": 4240 }, { "epoch": 0.34356772521062867, "grad_norm": 0.034054234623909, "learning_rate": 0.0001717699473471041, "loss": 0.3388, "step": 4241 }, { "epoch": 0.343648736228127, "grad_norm": 0.036633871495723724, "learning_rate": 0.00017181044957472662, "loss": 0.3605, "step": 4242 }, { "epoch": 0.3437297472456254, "grad_norm": 0.03781089931726456, "learning_rate": 0.00017185095180234912, "loss": 0.395, "step": 4243 }, { "epoch": 0.3438107582631238, "grad_norm": 0.03504957631230354, "learning_rate": 0.00017189145402997166, "loss": 0.3678, "step": 4244 }, { "epoch": 0.34389176928062215, "grad_norm": 0.03233807906508446, "learning_rate": 0.00017193195625759416, "loss": 0.3344, "step": 4245 }, { "epoch": 0.34397278029812056, "grad_norm": 0.03948920965194702, "learning_rate": 0.0001719724584852167, "loss": 0.45, "step": 4246 }, { "epoch": 0.3440537913156189, "grad_norm": 0.03502481058239937, "learning_rate": 0.0001720129607128392, "loss": 0.359, "step": 4247 }, { "epoch": 0.34413480233311733, "grad_norm": 0.03225164860486984, "learning_rate": 0.00017205346294046174, "loss": 0.332, "step": 4248 }, { "epoch": 0.3442158133506157, "grad_norm": 0.038138873875141144, "learning_rate": 0.00017209396516808424, "loss": 0.3711, "step": 4249 }, { "epoch": 0.34429682436811404, "grad_norm": 0.0346660241484642, "learning_rate": 0.00017213446739570678, "loss": 0.3786, "step": 4250 }, { "epoch": 0.34437783538561245, "grad_norm": 0.032906532287597656, "learning_rate": 0.00017217496962332928, "loss": 0.3661, "step": 4251 }, { "epoch": 0.3444588464031108, "grad_norm": 0.03140093386173248, "learning_rate": 0.00017221547185095182, "loss": 0.3831, "step": 4252 }, { "epoch": 0.3445398574206092, "grad_norm": 0.037232931703329086, "learning_rate": 0.00017225597407857432, "loss": 0.3661, "step": 4253 }, { "epoch": 0.3446208684381076, "grad_norm": 0.03418324142694473, "learning_rate": 0.00017229647630619685, "loss": 0.3599, "step": 4254 }, { "epoch": 0.344701879455606, "grad_norm": 0.03270847722887993, "learning_rate": 0.00017233697853381936, "loss": 0.4305, "step": 4255 }, { "epoch": 0.34478289047310434, "grad_norm": 0.04176706075668335, "learning_rate": 0.0001723774807614419, "loss": 0.403, "step": 4256 }, { "epoch": 0.3448639014906027, "grad_norm": 0.030672620981931686, "learning_rate": 0.0001724179829890644, "loss": 0.3659, "step": 4257 }, { "epoch": 0.3449449125081011, "grad_norm": 0.031542904675006866, "learning_rate": 0.00017245848521668693, "loss": 0.3779, "step": 4258 }, { "epoch": 0.34502592352559946, "grad_norm": 0.03651117905974388, "learning_rate": 0.00017249898744430944, "loss": 0.3581, "step": 4259 }, { "epoch": 0.3451069345430979, "grad_norm": 0.03164476901292801, "learning_rate": 0.00017253948967193197, "loss": 0.3565, "step": 4260 }, { "epoch": 0.34518794556059623, "grad_norm": 0.03185632824897766, "learning_rate": 0.0001725799918995545, "loss": 0.3787, "step": 4261 }, { "epoch": 0.34526895657809464, "grad_norm": 0.03777198866009712, "learning_rate": 0.000172620494127177, "loss": 0.4305, "step": 4262 }, { "epoch": 0.345349967595593, "grad_norm": 0.034228693693876266, "learning_rate": 0.00017266099635479952, "loss": 0.3546, "step": 4263 }, { "epoch": 0.34543097861309136, "grad_norm": 0.035769667476415634, "learning_rate": 0.00017270149858242205, "loss": 0.3961, "step": 4264 }, { "epoch": 0.34551198963058977, "grad_norm": 0.034536611288785934, "learning_rate": 0.00017274200081004456, "loss": 0.3816, "step": 4265 }, { "epoch": 0.3455930006480881, "grad_norm": 0.04174252972006798, "learning_rate": 0.00017278250303766706, "loss": 0.3716, "step": 4266 }, { "epoch": 0.34567401166558653, "grad_norm": 0.04082999750971794, "learning_rate": 0.0001728230052652896, "loss": 0.3954, "step": 4267 }, { "epoch": 0.3457550226830849, "grad_norm": 0.05414394289255142, "learning_rate": 0.0001728635074929121, "loss": 0.404, "step": 4268 }, { "epoch": 0.3458360337005833, "grad_norm": 0.037421610206365585, "learning_rate": 0.00017290400972053464, "loss": 0.3687, "step": 4269 }, { "epoch": 0.34591704471808166, "grad_norm": 0.03721112385392189, "learning_rate": 0.00017294451194815714, "loss": 0.3903, "step": 4270 }, { "epoch": 0.34599805573558, "grad_norm": 0.040057696402072906, "learning_rate": 0.00017298501417577968, "loss": 0.4476, "step": 4271 }, { "epoch": 0.3460790667530784, "grad_norm": 0.03950329124927521, "learning_rate": 0.00017302551640340218, "loss": 0.3874, "step": 4272 }, { "epoch": 0.3461600777705768, "grad_norm": 0.04660689830780029, "learning_rate": 0.00017306601863102471, "loss": 0.4252, "step": 4273 }, { "epoch": 0.3462410887880752, "grad_norm": 0.0310058556497097, "learning_rate": 0.00017310652085864722, "loss": 0.3315, "step": 4274 }, { "epoch": 0.34632209980557355, "grad_norm": 0.035095661878585815, "learning_rate": 0.00017314702308626975, "loss": 0.3643, "step": 4275 }, { "epoch": 0.34640311082307196, "grad_norm": 0.04145737737417221, "learning_rate": 0.00017318752531389226, "loss": 0.3888, "step": 4276 }, { "epoch": 0.3464841218405703, "grad_norm": 0.03940679132938385, "learning_rate": 0.0001732280275415148, "loss": 0.3694, "step": 4277 }, { "epoch": 0.34656513285806867, "grad_norm": 0.03774067014455795, "learning_rate": 0.0001732685297691373, "loss": 0.3623, "step": 4278 }, { "epoch": 0.3466461438755671, "grad_norm": 0.039981309324502945, "learning_rate": 0.00017330903199675983, "loss": 0.4002, "step": 4279 }, { "epoch": 0.34672715489306544, "grad_norm": 0.036382727324962616, "learning_rate": 0.00017334953422438237, "loss": 0.4025, "step": 4280 }, { "epoch": 0.34680816591056385, "grad_norm": 0.03269738331437111, "learning_rate": 0.00017339003645200487, "loss": 0.3438, "step": 4281 }, { "epoch": 0.3468891769280622, "grad_norm": 0.03966363146901131, "learning_rate": 0.0001734305386796274, "loss": 0.3557, "step": 4282 }, { "epoch": 0.3469701879455606, "grad_norm": 0.02861810475587845, "learning_rate": 0.0001734710409072499, "loss": 0.3333, "step": 4283 }, { "epoch": 0.34705119896305897, "grad_norm": 0.04468153789639473, "learning_rate": 0.00017351154313487244, "loss": 0.3887, "step": 4284 }, { "epoch": 0.3471322099805574, "grad_norm": 0.036037784069776535, "learning_rate": 0.00017355204536249495, "loss": 0.356, "step": 4285 }, { "epoch": 0.34721322099805574, "grad_norm": 0.034804608672857285, "learning_rate": 0.00017359254759011748, "loss": 0.3769, "step": 4286 }, { "epoch": 0.3472942320155541, "grad_norm": 0.04255205765366554, "learning_rate": 0.00017363304981774, "loss": 0.3766, "step": 4287 }, { "epoch": 0.3473752430330525, "grad_norm": 0.030785469338297844, "learning_rate": 0.0001736735520453625, "loss": 0.3586, "step": 4288 }, { "epoch": 0.34745625405055086, "grad_norm": 0.030791115015745163, "learning_rate": 0.00017371405427298503, "loss": 0.3536, "step": 4289 }, { "epoch": 0.3475372650680493, "grad_norm": 0.03815629705786705, "learning_rate": 0.00017375455650060754, "loss": 0.4058, "step": 4290 }, { "epoch": 0.34761827608554763, "grad_norm": 0.04502563923597336, "learning_rate": 0.00017379505872823004, "loss": 0.4145, "step": 4291 }, { "epoch": 0.34769928710304604, "grad_norm": 0.03694465756416321, "learning_rate": 0.00017383556095585257, "loss": 0.3686, "step": 4292 }, { "epoch": 0.3477802981205444, "grad_norm": 0.03498321771621704, "learning_rate": 0.00017387606318347508, "loss": 0.402, "step": 4293 }, { "epoch": 0.34786130913804275, "grad_norm": 0.033133365213871, "learning_rate": 0.00017391656541109761, "loss": 0.4, "step": 4294 }, { "epoch": 0.34794232015554116, "grad_norm": 0.036578428000211716, "learning_rate": 0.00017395706763872012, "loss": 0.3368, "step": 4295 }, { "epoch": 0.3480233311730395, "grad_norm": 0.041449353098869324, "learning_rate": 0.00017399756986634265, "loss": 0.3596, "step": 4296 }, { "epoch": 0.34810434219053793, "grad_norm": 0.031675804406404495, "learning_rate": 0.00017403807209396516, "loss": 0.3637, "step": 4297 }, { "epoch": 0.3481853532080363, "grad_norm": 0.03366389870643616, "learning_rate": 0.0001740785743215877, "loss": 0.3615, "step": 4298 }, { "epoch": 0.3482663642255347, "grad_norm": 0.04206422343850136, "learning_rate": 0.00017411907654921023, "loss": 0.4181, "step": 4299 }, { "epoch": 0.34834737524303305, "grad_norm": 0.037293173372745514, "learning_rate": 0.00017415957877683273, "loss": 0.3176, "step": 4300 }, { "epoch": 0.3484283862605314, "grad_norm": 0.04146702215075493, "learning_rate": 0.00017420008100445527, "loss": 0.3659, "step": 4301 }, { "epoch": 0.3485093972780298, "grad_norm": 0.04191764444112778, "learning_rate": 0.00017424058323207777, "loss": 0.3824, "step": 4302 }, { "epoch": 0.3485904082955282, "grad_norm": 0.03564842417836189, "learning_rate": 0.0001742810854597003, "loss": 0.3736, "step": 4303 }, { "epoch": 0.3486714193130266, "grad_norm": 0.028532709926366806, "learning_rate": 0.0001743215876873228, "loss": 0.3697, "step": 4304 }, { "epoch": 0.34875243033052494, "grad_norm": 0.033898431807756424, "learning_rate": 0.00017436208991494534, "loss": 0.3598, "step": 4305 }, { "epoch": 0.34883344134802335, "grad_norm": 0.03289264813065529, "learning_rate": 0.00017440259214256785, "loss": 0.377, "step": 4306 }, { "epoch": 0.3489144523655217, "grad_norm": 0.02952691726386547, "learning_rate": 0.00017444309437019038, "loss": 0.3408, "step": 4307 }, { "epoch": 0.34899546338302007, "grad_norm": 0.035252220928668976, "learning_rate": 0.0001744835965978129, "loss": 0.3984, "step": 4308 }, { "epoch": 0.3490764744005185, "grad_norm": 0.033801451325416565, "learning_rate": 0.00017452409882543542, "loss": 0.3344, "step": 4309 }, { "epoch": 0.34915748541801683, "grad_norm": 0.04563166946172714, "learning_rate": 0.00017456460105305793, "loss": 0.3895, "step": 4310 }, { "epoch": 0.34923849643551524, "grad_norm": 0.03591470420360565, "learning_rate": 0.00017460510328068046, "loss": 0.3608, "step": 4311 }, { "epoch": 0.3493195074530136, "grad_norm": 0.035211559385061264, "learning_rate": 0.00017464560550830297, "loss": 0.3281, "step": 4312 }, { "epoch": 0.349400518470512, "grad_norm": 0.03434130921959877, "learning_rate": 0.00017468610773592547, "loss": 0.3557, "step": 4313 }, { "epoch": 0.34948152948801037, "grad_norm": 0.039995063096284866, "learning_rate": 0.00017472660996354798, "loss": 0.4288, "step": 4314 }, { "epoch": 0.3495625405055087, "grad_norm": 0.03357269987463951, "learning_rate": 0.0001747671121911705, "loss": 0.3522, "step": 4315 }, { "epoch": 0.34964355152300713, "grad_norm": 0.03886905312538147, "learning_rate": 0.00017480761441879302, "loss": 0.3746, "step": 4316 }, { "epoch": 0.3497245625405055, "grad_norm": 0.033615898340940475, "learning_rate": 0.00017484811664641555, "loss": 0.3593, "step": 4317 }, { "epoch": 0.3498055735580039, "grad_norm": 0.037499893456697464, "learning_rate": 0.00017488861887403809, "loss": 0.379, "step": 4318 }, { "epoch": 0.34988658457550226, "grad_norm": 0.03338012099266052, "learning_rate": 0.0001749291211016606, "loss": 0.3761, "step": 4319 }, { "epoch": 0.34996759559300067, "grad_norm": 0.03335127979516983, "learning_rate": 0.00017496962332928313, "loss": 0.3602, "step": 4320 }, { "epoch": 0.350048606610499, "grad_norm": 0.03429269418120384, "learning_rate": 0.00017501012555690563, "loss": 0.3741, "step": 4321 }, { "epoch": 0.3501296176279974, "grad_norm": 0.037369150668382645, "learning_rate": 0.00017505062778452816, "loss": 0.3561, "step": 4322 }, { "epoch": 0.3502106286454958, "grad_norm": 0.031715746968984604, "learning_rate": 0.00017509113001215067, "loss": 0.3575, "step": 4323 }, { "epoch": 0.35029163966299415, "grad_norm": 0.03233131021261215, "learning_rate": 0.0001751316322397732, "loss": 0.3526, "step": 4324 }, { "epoch": 0.35037265068049256, "grad_norm": 0.03732204809784889, "learning_rate": 0.0001751721344673957, "loss": 0.3946, "step": 4325 }, { "epoch": 0.3504536616979909, "grad_norm": 0.0344877764582634, "learning_rate": 0.00017521263669501824, "loss": 0.374, "step": 4326 }, { "epoch": 0.3505346727154893, "grad_norm": 0.03300429508090019, "learning_rate": 0.00017525313892264075, "loss": 0.3765, "step": 4327 }, { "epoch": 0.3506156837329877, "grad_norm": 0.04378129914402962, "learning_rate": 0.00017529364115026328, "loss": 0.3132, "step": 4328 }, { "epoch": 0.3506966947504861, "grad_norm": 0.037889618426561356, "learning_rate": 0.0001753341433778858, "loss": 0.4394, "step": 4329 }, { "epoch": 0.35077770576798445, "grad_norm": 0.05033688247203827, "learning_rate": 0.00017537464560550832, "loss": 0.4018, "step": 4330 }, { "epoch": 0.3508587167854828, "grad_norm": 0.033001694828271866, "learning_rate": 0.00017541514783313083, "loss": 0.3496, "step": 4331 }, { "epoch": 0.3509397278029812, "grad_norm": 0.030153915286064148, "learning_rate": 0.00017545565006075336, "loss": 0.3689, "step": 4332 }, { "epoch": 0.35102073882047957, "grad_norm": 0.03007018193602562, "learning_rate": 0.00017549615228837587, "loss": 0.3636, "step": 4333 }, { "epoch": 0.351101749837978, "grad_norm": 0.036428261548280716, "learning_rate": 0.0001755366545159984, "loss": 0.3656, "step": 4334 }, { "epoch": 0.35118276085547634, "grad_norm": 0.0325998030602932, "learning_rate": 0.0001755771567436209, "loss": 0.3421, "step": 4335 }, { "epoch": 0.35126377187297475, "grad_norm": 0.03554887697100639, "learning_rate": 0.0001756176589712434, "loss": 0.3741, "step": 4336 }, { "epoch": 0.3513447828904731, "grad_norm": 0.03348753973841667, "learning_rate": 0.00017565816119886595, "loss": 0.3319, "step": 4337 }, { "epoch": 0.35142579390797146, "grad_norm": 0.03808234632015228, "learning_rate": 0.00017569866342648845, "loss": 0.3794, "step": 4338 }, { "epoch": 0.3515068049254699, "grad_norm": 0.037807319313287735, "learning_rate": 0.00017573916565411099, "loss": 0.4035, "step": 4339 }, { "epoch": 0.35158781594296823, "grad_norm": 0.030004501342773438, "learning_rate": 0.0001757796678817335, "loss": 0.3414, "step": 4340 }, { "epoch": 0.35166882696046664, "grad_norm": 0.0362929105758667, "learning_rate": 0.00017582017010935602, "loss": 0.398, "step": 4341 }, { "epoch": 0.351749837977965, "grad_norm": 0.03204100951552391, "learning_rate": 0.00017586067233697853, "loss": 0.3171, "step": 4342 }, { "epoch": 0.3518308489954634, "grad_norm": 0.032586175948381424, "learning_rate": 0.00017590117456460106, "loss": 0.3528, "step": 4343 }, { "epoch": 0.35191186001296176, "grad_norm": 0.035550639033317566, "learning_rate": 0.00017594167679222357, "loss": 0.3487, "step": 4344 }, { "epoch": 0.3519928710304601, "grad_norm": 0.03612900897860527, "learning_rate": 0.0001759821790198461, "loss": 0.3612, "step": 4345 }, { "epoch": 0.35207388204795853, "grad_norm": 0.03829009085893631, "learning_rate": 0.0001760226812474686, "loss": 0.3444, "step": 4346 }, { "epoch": 0.3521548930654569, "grad_norm": 0.03981778025627136, "learning_rate": 0.00017606318347509114, "loss": 0.3354, "step": 4347 }, { "epoch": 0.3522359040829553, "grad_norm": 0.03323979303240776, "learning_rate": 0.00017610368570271365, "loss": 0.3847, "step": 4348 }, { "epoch": 0.35231691510045365, "grad_norm": 0.037503600120544434, "learning_rate": 0.00017614418793033618, "loss": 0.3923, "step": 4349 }, { "epoch": 0.35239792611795207, "grad_norm": 0.03373998403549194, "learning_rate": 0.0001761846901579587, "loss": 0.4081, "step": 4350 }, { "epoch": 0.3524789371354504, "grad_norm": 0.035595640540122986, "learning_rate": 0.00017622519238558122, "loss": 0.3839, "step": 4351 }, { "epoch": 0.3525599481529488, "grad_norm": 0.03282087668776512, "learning_rate": 0.00017626569461320373, "loss": 0.3699, "step": 4352 }, { "epoch": 0.3526409591704472, "grad_norm": 0.038674503564834595, "learning_rate": 0.00017630619684082626, "loss": 0.4181, "step": 4353 }, { "epoch": 0.35272197018794554, "grad_norm": 0.04119935631752014, "learning_rate": 0.0001763466990684488, "loss": 0.4143, "step": 4354 }, { "epoch": 0.35280298120544396, "grad_norm": 0.03823656216263771, "learning_rate": 0.0001763872012960713, "loss": 0.3594, "step": 4355 }, { "epoch": 0.3528839922229423, "grad_norm": 0.03402746096253395, "learning_rate": 0.00017642770352369383, "loss": 0.3849, "step": 4356 }, { "epoch": 0.3529650032404407, "grad_norm": 0.032122448086738586, "learning_rate": 0.00017646820575131634, "loss": 0.3509, "step": 4357 }, { "epoch": 0.3530460142579391, "grad_norm": 0.04331757873296738, "learning_rate": 0.00017650870797893885, "loss": 0.4486, "step": 4358 }, { "epoch": 0.35312702527543743, "grad_norm": 0.03687101975083351, "learning_rate": 0.00017654921020656138, "loss": 0.402, "step": 4359 }, { "epoch": 0.35320803629293585, "grad_norm": 0.03533836454153061, "learning_rate": 0.00017658971243418389, "loss": 0.3699, "step": 4360 }, { "epoch": 0.3532890473104342, "grad_norm": 0.03400861471891403, "learning_rate": 0.0001766302146618064, "loss": 0.3589, "step": 4361 }, { "epoch": 0.3533700583279326, "grad_norm": 0.038683127611875534, "learning_rate": 0.00017667071688942892, "loss": 0.361, "step": 4362 }, { "epoch": 0.35345106934543097, "grad_norm": 0.03580750152468681, "learning_rate": 0.00017671121911705143, "loss": 0.3218, "step": 4363 }, { "epoch": 0.3535320803629294, "grad_norm": 0.042962536215782166, "learning_rate": 0.00017675172134467396, "loss": 0.3851, "step": 4364 }, { "epoch": 0.35361309138042774, "grad_norm": 0.03469430282711983, "learning_rate": 0.00017679222357229647, "loss": 0.329, "step": 4365 }, { "epoch": 0.3536941023979261, "grad_norm": 0.04712754860520363, "learning_rate": 0.000176832725799919, "loss": 0.3624, "step": 4366 }, { "epoch": 0.3537751134154245, "grad_norm": 0.042234018445014954, "learning_rate": 0.0001768732280275415, "loss": 0.3715, "step": 4367 }, { "epoch": 0.35385612443292286, "grad_norm": 0.034240856766700745, "learning_rate": 0.00017691373025516404, "loss": 0.3689, "step": 4368 }, { "epoch": 0.35393713545042127, "grad_norm": 0.03210359439253807, "learning_rate": 0.00017695423248278655, "loss": 0.3568, "step": 4369 }, { "epoch": 0.3540181464679196, "grad_norm": 0.036989495158195496, "learning_rate": 0.00017699473471040908, "loss": 0.3943, "step": 4370 }, { "epoch": 0.35409915748541804, "grad_norm": 0.041003771126270294, "learning_rate": 0.0001770352369380316, "loss": 0.4186, "step": 4371 }, { "epoch": 0.3541801685029164, "grad_norm": 0.03730255737900734, "learning_rate": 0.00017707573916565412, "loss": 0.3636, "step": 4372 }, { "epoch": 0.3542611795204148, "grad_norm": 0.036754168570041656, "learning_rate": 0.00017711624139327665, "loss": 0.3896, "step": 4373 }, { "epoch": 0.35434219053791316, "grad_norm": 0.037548258900642395, "learning_rate": 0.00017715674362089916, "loss": 0.3794, "step": 4374 }, { "epoch": 0.3544232015554115, "grad_norm": 0.049613919109106064, "learning_rate": 0.0001771972458485217, "loss": 0.3303, "step": 4375 }, { "epoch": 0.3545042125729099, "grad_norm": 0.03377959504723549, "learning_rate": 0.0001772377480761442, "loss": 0.3296, "step": 4376 }, { "epoch": 0.3545852235904083, "grad_norm": 0.04323616996407509, "learning_rate": 0.00017727825030376673, "loss": 0.3667, "step": 4377 }, { "epoch": 0.3546662346079067, "grad_norm": 0.04176010936498642, "learning_rate": 0.00017731875253138924, "loss": 0.407, "step": 4378 }, { "epoch": 0.35474724562540505, "grad_norm": 0.042668696492910385, "learning_rate": 0.00017735925475901177, "loss": 0.3787, "step": 4379 }, { "epoch": 0.35482825664290346, "grad_norm": 0.035692185163497925, "learning_rate": 0.00017739975698663428, "loss": 0.3701, "step": 4380 }, { "epoch": 0.3549092676604018, "grad_norm": 0.034370869398117065, "learning_rate": 0.0001774402592142568, "loss": 0.3917, "step": 4381 }, { "epoch": 0.3549902786779002, "grad_norm": 0.043179381638765335, "learning_rate": 0.00017748076144187932, "loss": 0.3479, "step": 4382 }, { "epoch": 0.3550712896953986, "grad_norm": 0.032561879605054855, "learning_rate": 0.00017752126366950182, "loss": 0.3171, "step": 4383 }, { "epoch": 0.35515230071289694, "grad_norm": 0.035728905349969864, "learning_rate": 0.00017756176589712436, "loss": 0.3248, "step": 4384 }, { "epoch": 0.35523331173039535, "grad_norm": 0.029100000858306885, "learning_rate": 0.00017760226812474686, "loss": 0.3176, "step": 4385 }, { "epoch": 0.3553143227478937, "grad_norm": 0.03463733196258545, "learning_rate": 0.00017764277035236937, "loss": 0.3507, "step": 4386 }, { "epoch": 0.3553953337653921, "grad_norm": 0.03186383098363876, "learning_rate": 0.0001776832725799919, "loss": 0.3626, "step": 4387 }, { "epoch": 0.3554763447828905, "grad_norm": 0.03267962113022804, "learning_rate": 0.0001777237748076144, "loss": 0.3582, "step": 4388 }, { "epoch": 0.35555735580038883, "grad_norm": 0.03787730634212494, "learning_rate": 0.00017776427703523694, "loss": 0.3739, "step": 4389 }, { "epoch": 0.35563836681788724, "grad_norm": 0.042798951268196106, "learning_rate": 0.00017780477926285945, "loss": 0.4089, "step": 4390 }, { "epoch": 0.3557193778353856, "grad_norm": 0.03359090909361839, "learning_rate": 0.00017784528149048198, "loss": 0.3766, "step": 4391 }, { "epoch": 0.355800388852884, "grad_norm": 0.03374600037932396, "learning_rate": 0.00017788578371810451, "loss": 0.384, "step": 4392 }, { "epoch": 0.35588139987038236, "grad_norm": 0.037167515605688095, "learning_rate": 0.00017792628594572702, "loss": 0.3613, "step": 4393 }, { "epoch": 0.3559624108878808, "grad_norm": 0.03484749794006348, "learning_rate": 0.00017796678817334955, "loss": 0.373, "step": 4394 }, { "epoch": 0.35604342190537913, "grad_norm": 0.040365755558013916, "learning_rate": 0.00017800729040097206, "loss": 0.4126, "step": 4395 }, { "epoch": 0.3561244329228775, "grad_norm": 0.033233847469091415, "learning_rate": 0.0001780477926285946, "loss": 0.4013, "step": 4396 }, { "epoch": 0.3562054439403759, "grad_norm": 0.03474245220422745, "learning_rate": 0.0001780882948562171, "loss": 0.4215, "step": 4397 }, { "epoch": 0.35628645495787425, "grad_norm": 0.03786751255393028, "learning_rate": 0.00017812879708383963, "loss": 0.3619, "step": 4398 }, { "epoch": 0.35636746597537267, "grad_norm": 0.034605786204338074, "learning_rate": 0.00017816929931146214, "loss": 0.3443, "step": 4399 }, { "epoch": 0.356448476992871, "grad_norm": 0.03127652406692505, "learning_rate": 0.00017820980153908467, "loss": 0.3953, "step": 4400 }, { "epoch": 0.35652948801036943, "grad_norm": 0.04228046536445618, "learning_rate": 0.00017825030376670718, "loss": 0.3826, "step": 4401 }, { "epoch": 0.3566104990278678, "grad_norm": 0.036720506846904755, "learning_rate": 0.0001782908059943297, "loss": 0.3821, "step": 4402 }, { "epoch": 0.35669151004536614, "grad_norm": 0.03216755390167236, "learning_rate": 0.00017833130822195222, "loss": 0.4051, "step": 4403 }, { "epoch": 0.35677252106286456, "grad_norm": 0.033246368169784546, "learning_rate": 0.00017837181044957475, "loss": 0.3691, "step": 4404 }, { "epoch": 0.3568535320803629, "grad_norm": 0.031144283711910248, "learning_rate": 0.00017841231267719726, "loss": 0.3615, "step": 4405 }, { "epoch": 0.3569345430978613, "grad_norm": 0.03426123782992363, "learning_rate": 0.00017845281490481976, "loss": 0.3484, "step": 4406 }, { "epoch": 0.3570155541153597, "grad_norm": 0.031132718548178673, "learning_rate": 0.0001784933171324423, "loss": 0.3619, "step": 4407 }, { "epoch": 0.3570965651328581, "grad_norm": 0.04052342474460602, "learning_rate": 0.0001785338193600648, "loss": 0.3621, "step": 4408 }, { "epoch": 0.35717757615035645, "grad_norm": 0.055709172040224075, "learning_rate": 0.0001785743215876873, "loss": 0.3982, "step": 4409 }, { "epoch": 0.3572585871678548, "grad_norm": 0.031802013516426086, "learning_rate": 0.00017861482381530984, "loss": 0.3618, "step": 4410 }, { "epoch": 0.3573395981853532, "grad_norm": 0.03412245586514473, "learning_rate": 0.00017865532604293237, "loss": 0.3946, "step": 4411 }, { "epoch": 0.35742060920285157, "grad_norm": 0.03505575656890869, "learning_rate": 0.00017869582827055488, "loss": 0.3698, "step": 4412 }, { "epoch": 0.35750162022035, "grad_norm": 0.03713294491171837, "learning_rate": 0.00017873633049817741, "loss": 0.3686, "step": 4413 }, { "epoch": 0.35758263123784834, "grad_norm": 0.03343082591891289, "learning_rate": 0.00017877683272579992, "loss": 0.3212, "step": 4414 }, { "epoch": 0.35766364225534675, "grad_norm": 0.03919597715139389, "learning_rate": 0.00017881733495342245, "loss": 0.355, "step": 4415 }, { "epoch": 0.3577446532728451, "grad_norm": 0.03567646071314812, "learning_rate": 0.00017885783718104496, "loss": 0.3724, "step": 4416 }, { "epoch": 0.35782566429034346, "grad_norm": 0.03884551301598549, "learning_rate": 0.0001788983394086675, "loss": 0.3947, "step": 4417 }, { "epoch": 0.35790667530784187, "grad_norm": 0.03751397132873535, "learning_rate": 0.00017893884163629, "loss": 0.3752, "step": 4418 }, { "epoch": 0.3579876863253402, "grad_norm": 0.03351527824997902, "learning_rate": 0.00017897934386391253, "loss": 0.3422, "step": 4419 }, { "epoch": 0.35806869734283864, "grad_norm": 0.03406880795955658, "learning_rate": 0.00017901984609153504, "loss": 0.3083, "step": 4420 }, { "epoch": 0.358149708360337, "grad_norm": 0.0321863517165184, "learning_rate": 0.00017906034831915757, "loss": 0.3498, "step": 4421 }, { "epoch": 0.3582307193778354, "grad_norm": 0.032903872430324554, "learning_rate": 0.00017910085054678008, "loss": 0.3447, "step": 4422 }, { "epoch": 0.35831173039533376, "grad_norm": 0.032026126980781555, "learning_rate": 0.0001791413527744026, "loss": 0.3368, "step": 4423 }, { "epoch": 0.35839274141283217, "grad_norm": 0.031434398144483566, "learning_rate": 0.00017918185500202512, "loss": 0.3541, "step": 4424 }, { "epoch": 0.35847375243033053, "grad_norm": 0.03229876980185509, "learning_rate": 0.00017922235722964765, "loss": 0.3212, "step": 4425 }, { "epoch": 0.3585547634478289, "grad_norm": 0.03334301337599754, "learning_rate": 0.00017926285945727016, "loss": 0.3369, "step": 4426 }, { "epoch": 0.3586357744653273, "grad_norm": 0.033178362995386124, "learning_rate": 0.0001793033616848927, "loss": 0.3791, "step": 4427 }, { "epoch": 0.35871678548282565, "grad_norm": 0.03164026886224747, "learning_rate": 0.0001793438639125152, "loss": 0.3518, "step": 4428 }, { "epoch": 0.35879779650032406, "grad_norm": 0.03897491469979286, "learning_rate": 0.00017938436614013773, "loss": 0.3846, "step": 4429 }, { "epoch": 0.3588788075178224, "grad_norm": 0.034208353608846664, "learning_rate": 0.00017942486836776023, "loss": 0.2991, "step": 4430 }, { "epoch": 0.35895981853532083, "grad_norm": 0.03465808555483818, "learning_rate": 0.00017946537059538274, "loss": 0.3682, "step": 4431 }, { "epoch": 0.3590408295528192, "grad_norm": 0.039521731436252594, "learning_rate": 0.00017950587282300527, "loss": 0.3649, "step": 4432 }, { "epoch": 0.35912184057031754, "grad_norm": 0.03066393733024597, "learning_rate": 0.00017954637505062778, "loss": 0.3466, "step": 4433 }, { "epoch": 0.35920285158781595, "grad_norm": 0.03277844935655594, "learning_rate": 0.0001795868772782503, "loss": 0.3776, "step": 4434 }, { "epoch": 0.3592838626053143, "grad_norm": 0.031138377264142036, "learning_rate": 0.00017962737950587282, "loss": 0.381, "step": 4435 }, { "epoch": 0.3593648736228127, "grad_norm": 0.03991644084453583, "learning_rate": 0.00017966788173349535, "loss": 0.4098, "step": 4436 }, { "epoch": 0.3594458846403111, "grad_norm": 0.037129972130060196, "learning_rate": 0.00017970838396111786, "loss": 0.3501, "step": 4437 }, { "epoch": 0.3595268956578095, "grad_norm": 0.03298862650990486, "learning_rate": 0.0001797488861887404, "loss": 0.3319, "step": 4438 }, { "epoch": 0.35960790667530784, "grad_norm": 0.03268022462725639, "learning_rate": 0.0001797893884163629, "loss": 0.3242, "step": 4439 }, { "epoch": 0.3596889176928062, "grad_norm": 0.03326528146862984, "learning_rate": 0.00017982989064398543, "loss": 0.3843, "step": 4440 }, { "epoch": 0.3597699287103046, "grad_norm": 0.03307091072201729, "learning_rate": 0.00017987039287160794, "loss": 0.3218, "step": 4441 }, { "epoch": 0.35985093972780297, "grad_norm": 0.0321352519094944, "learning_rate": 0.00017991089509923047, "loss": 0.3213, "step": 4442 }, { "epoch": 0.3599319507453014, "grad_norm": 0.03353230282664299, "learning_rate": 0.00017995139732685298, "loss": 0.3459, "step": 4443 }, { "epoch": 0.36001296176279973, "grad_norm": 0.036084793508052826, "learning_rate": 0.0001799918995544755, "loss": 0.3738, "step": 4444 }, { "epoch": 0.36009397278029814, "grad_norm": 0.03896293416619301, "learning_rate": 0.00018003240178209802, "loss": 0.3596, "step": 4445 }, { "epoch": 0.3601749837977965, "grad_norm": 0.03121447004377842, "learning_rate": 0.00018007290400972055, "loss": 0.3664, "step": 4446 }, { "epoch": 0.36025599481529486, "grad_norm": 0.036111436784267426, "learning_rate": 0.00018011340623734306, "loss": 0.4038, "step": 4447 }, { "epoch": 0.36033700583279327, "grad_norm": 0.030647702515125275, "learning_rate": 0.0001801539084649656, "loss": 0.3517, "step": 4448 }, { "epoch": 0.3604180168502916, "grad_norm": 0.03785083442926407, "learning_rate": 0.00018019441069258812, "loss": 0.3894, "step": 4449 }, { "epoch": 0.36049902786779003, "grad_norm": 0.037119291722774506, "learning_rate": 0.00018023491292021063, "loss": 0.3726, "step": 4450 }, { "epoch": 0.3605800388852884, "grad_norm": 0.0395854189991951, "learning_rate": 0.00018027541514783316, "loss": 0.4297, "step": 4451 }, { "epoch": 0.3606610499027868, "grad_norm": 0.03590099513530731, "learning_rate": 0.00018031591737545567, "loss": 0.3236, "step": 4452 }, { "epoch": 0.36074206092028516, "grad_norm": 0.04296981170773506, "learning_rate": 0.00018035641960307817, "loss": 0.3651, "step": 4453 }, { "epoch": 0.3608230719377835, "grad_norm": 0.039156656712293625, "learning_rate": 0.0001803969218307007, "loss": 0.3636, "step": 4454 }, { "epoch": 0.3609040829552819, "grad_norm": 0.040239349007606506, "learning_rate": 0.0001804374240583232, "loss": 0.3535, "step": 4455 }, { "epoch": 0.3609850939727803, "grad_norm": 0.03353238105773926, "learning_rate": 0.00018047792628594572, "loss": 0.382, "step": 4456 }, { "epoch": 0.3610661049902787, "grad_norm": 0.03441842272877693, "learning_rate": 0.00018051842851356825, "loss": 0.3489, "step": 4457 }, { "epoch": 0.36114711600777705, "grad_norm": 0.030106976628303528, "learning_rate": 0.00018055893074119076, "loss": 0.3159, "step": 4458 }, { "epoch": 0.36122812702527546, "grad_norm": 0.03584039956331253, "learning_rate": 0.0001805994329688133, "loss": 0.3966, "step": 4459 }, { "epoch": 0.3613091380427738, "grad_norm": 0.034742627292871475, "learning_rate": 0.0001806399351964358, "loss": 0.3637, "step": 4460 }, { "epoch": 0.36139014906027217, "grad_norm": 0.03912995010614395, "learning_rate": 0.00018068043742405833, "loss": 0.3234, "step": 4461 }, { "epoch": 0.3614711600777706, "grad_norm": 0.03890937939286232, "learning_rate": 0.00018072093965168084, "loss": 0.3684, "step": 4462 }, { "epoch": 0.36155217109526894, "grad_norm": 0.0334155298769474, "learning_rate": 0.00018076144187930337, "loss": 0.3663, "step": 4463 }, { "epoch": 0.36163318211276735, "grad_norm": 0.039641525596380234, "learning_rate": 0.00018080194410692588, "loss": 0.3978, "step": 4464 }, { "epoch": 0.3617141931302657, "grad_norm": 0.036285150796175, "learning_rate": 0.0001808424463345484, "loss": 0.4287, "step": 4465 }, { "epoch": 0.3617952041477641, "grad_norm": 0.034932248294353485, "learning_rate": 0.00018088294856217092, "loss": 0.3866, "step": 4466 }, { "epoch": 0.36187621516526247, "grad_norm": 0.03337856009602547, "learning_rate": 0.00018092345078979345, "loss": 0.3974, "step": 4467 }, { "epoch": 0.3619572261827609, "grad_norm": 0.031295645982027054, "learning_rate": 0.00018096395301741598, "loss": 0.3226, "step": 4468 }, { "epoch": 0.36203823720025924, "grad_norm": 0.03899591416120529, "learning_rate": 0.0001810044552450385, "loss": 0.3586, "step": 4469 }, { "epoch": 0.3621192482177576, "grad_norm": 0.03353521227836609, "learning_rate": 0.00018104495747266102, "loss": 0.3386, "step": 4470 }, { "epoch": 0.362200259235256, "grad_norm": 0.04231454059481621, "learning_rate": 0.00018108545970028353, "loss": 0.3845, "step": 4471 }, { "epoch": 0.36228127025275436, "grad_norm": 0.030955109745264053, "learning_rate": 0.00018112596192790606, "loss": 0.3506, "step": 4472 }, { "epoch": 0.3623622812702528, "grad_norm": 0.036558013409376144, "learning_rate": 0.00018116646415552857, "loss": 0.4057, "step": 4473 }, { "epoch": 0.36244329228775113, "grad_norm": 0.031068559736013412, "learning_rate": 0.0001812069663831511, "loss": 0.3392, "step": 4474 }, { "epoch": 0.36252430330524954, "grad_norm": 0.03186468034982681, "learning_rate": 0.0001812474686107736, "loss": 0.3352, "step": 4475 }, { "epoch": 0.3626053143227479, "grad_norm": 0.03339767083525658, "learning_rate": 0.00018128797083839614, "loss": 0.3896, "step": 4476 }, { "epoch": 0.36268632534024625, "grad_norm": 0.03032582812011242, "learning_rate": 0.00018132847306601865, "loss": 0.347, "step": 4477 }, { "epoch": 0.36276733635774466, "grad_norm": 0.03296176716685295, "learning_rate": 0.00018136897529364115, "loss": 0.3254, "step": 4478 }, { "epoch": 0.362848347375243, "grad_norm": 0.0313357375562191, "learning_rate": 0.00018140947752126366, "loss": 0.4099, "step": 4479 }, { "epoch": 0.36292935839274143, "grad_norm": 0.03426244109869003, "learning_rate": 0.0001814499797488862, "loss": 0.3294, "step": 4480 }, { "epoch": 0.3630103694102398, "grad_norm": 0.03199993073940277, "learning_rate": 0.0001814904819765087, "loss": 0.3469, "step": 4481 }, { "epoch": 0.3630913804277382, "grad_norm": 0.0293571837246418, "learning_rate": 0.00018153098420413123, "loss": 0.3434, "step": 4482 }, { "epoch": 0.36317239144523655, "grad_norm": 0.03131209686398506, "learning_rate": 0.00018157148643175374, "loss": 0.3375, "step": 4483 }, { "epoch": 0.3632534024627349, "grad_norm": 0.03087998926639557, "learning_rate": 0.00018161198865937627, "loss": 0.355, "step": 4484 }, { "epoch": 0.3633344134802333, "grad_norm": 0.030083077028393745, "learning_rate": 0.00018165249088699878, "loss": 0.3811, "step": 4485 }, { "epoch": 0.3634154244977317, "grad_norm": 0.03256846219301224, "learning_rate": 0.0001816929931146213, "loss": 0.3512, "step": 4486 }, { "epoch": 0.3634964355152301, "grad_norm": 0.038921091705560684, "learning_rate": 0.00018173349534224384, "loss": 0.4197, "step": 4487 }, { "epoch": 0.36357744653272844, "grad_norm": 0.03630004823207855, "learning_rate": 0.00018177399756986635, "loss": 0.3657, "step": 4488 }, { "epoch": 0.36365845755022685, "grad_norm": 0.03309519216418266, "learning_rate": 0.00018181449979748888, "loss": 0.3578, "step": 4489 }, { "epoch": 0.3637394685677252, "grad_norm": 0.03494489565491676, "learning_rate": 0.0001818550020251114, "loss": 0.3547, "step": 4490 }, { "epoch": 0.36382047958522357, "grad_norm": 0.0350724458694458, "learning_rate": 0.00018189550425273392, "loss": 0.3861, "step": 4491 }, { "epoch": 0.363901490602722, "grad_norm": 0.03464026749134064, "learning_rate": 0.00018193600648035643, "loss": 0.3652, "step": 4492 }, { "epoch": 0.36398250162022033, "grad_norm": 0.030455349013209343, "learning_rate": 0.00018197650870797896, "loss": 0.3767, "step": 4493 }, { "epoch": 0.36406351263771874, "grad_norm": 0.03616175055503845, "learning_rate": 0.00018201701093560147, "loss": 0.3474, "step": 4494 }, { "epoch": 0.3641445236552171, "grad_norm": 0.037097640335559845, "learning_rate": 0.000182057513163224, "loss": 0.4007, "step": 4495 }, { "epoch": 0.3642255346727155, "grad_norm": 0.04117016866803169, "learning_rate": 0.0001820980153908465, "loss": 0.3695, "step": 4496 }, { "epoch": 0.36430654569021387, "grad_norm": 0.034477025270462036, "learning_rate": 0.00018213851761846904, "loss": 0.3537, "step": 4497 }, { "epoch": 0.3643875567077122, "grad_norm": 0.03152913972735405, "learning_rate": 0.00018217901984609155, "loss": 0.3469, "step": 4498 }, { "epoch": 0.36446856772521063, "grad_norm": 0.03316012769937515, "learning_rate": 0.00018221952207371408, "loss": 0.3431, "step": 4499 }, { "epoch": 0.364549578742709, "grad_norm": 0.03767377510666847, "learning_rate": 0.00018226002430133658, "loss": 0.4001, "step": 4500 }, { "epoch": 0.3646305897602074, "grad_norm": 0.03182438388466835, "learning_rate": 0.0001823005265289591, "loss": 0.3346, "step": 4501 }, { "epoch": 0.36471160077770576, "grad_norm": 0.03710455447435379, "learning_rate": 0.00018234102875658162, "loss": 0.4222, "step": 4502 }, { "epoch": 0.36479261179520417, "grad_norm": 0.03305591270327568, "learning_rate": 0.00018238153098420413, "loss": 0.3964, "step": 4503 }, { "epoch": 0.3648736228127025, "grad_norm": 0.0305950790643692, "learning_rate": 0.00018242203321182664, "loss": 0.3369, "step": 4504 }, { "epoch": 0.3649546338302009, "grad_norm": 0.03193674981594086, "learning_rate": 0.00018246253543944917, "loss": 0.3641, "step": 4505 }, { "epoch": 0.3650356448476993, "grad_norm": 0.03355922922492027, "learning_rate": 0.0001825030376670717, "loss": 0.4064, "step": 4506 }, { "epoch": 0.36511665586519765, "grad_norm": 0.028800426051020622, "learning_rate": 0.0001825435398946942, "loss": 0.3606, "step": 4507 }, { "epoch": 0.36519766688269606, "grad_norm": 0.039104342460632324, "learning_rate": 0.00018258404212231674, "loss": 0.3716, "step": 4508 }, { "epoch": 0.3652786779001944, "grad_norm": 0.03672458603978157, "learning_rate": 0.00018262454434993925, "loss": 0.3535, "step": 4509 }, { "epoch": 0.3653596889176928, "grad_norm": 0.03032134473323822, "learning_rate": 0.00018266504657756178, "loss": 0.3762, "step": 4510 }, { "epoch": 0.3654406999351912, "grad_norm": 0.03428445756435394, "learning_rate": 0.0001827055488051843, "loss": 0.3917, "step": 4511 }, { "epoch": 0.36552171095268954, "grad_norm": 0.03314060717821121, "learning_rate": 0.00018274605103280682, "loss": 0.3586, "step": 4512 }, { "epoch": 0.36560272197018795, "grad_norm": 0.03430025652050972, "learning_rate": 0.00018278655326042933, "loss": 0.3634, "step": 4513 }, { "epoch": 0.3656837329876863, "grad_norm": 0.030363822355866432, "learning_rate": 0.00018282705548805186, "loss": 0.363, "step": 4514 }, { "epoch": 0.3657647440051847, "grad_norm": 0.038539864122867584, "learning_rate": 0.00018286755771567437, "loss": 0.4033, "step": 4515 }, { "epoch": 0.3658457550226831, "grad_norm": 0.03524131700396538, "learning_rate": 0.0001829080599432969, "loss": 0.3802, "step": 4516 }, { "epoch": 0.3659267660401815, "grad_norm": 0.03943591192364693, "learning_rate": 0.0001829485621709194, "loss": 0.4098, "step": 4517 }, { "epoch": 0.36600777705767984, "grad_norm": 0.03730255737900734, "learning_rate": 0.00018298906439854194, "loss": 0.346, "step": 4518 }, { "epoch": 0.36608878807517825, "grad_norm": 0.033342789858579636, "learning_rate": 0.00018302956662616444, "loss": 0.3728, "step": 4519 }, { "epoch": 0.3661697990926766, "grad_norm": 0.038131117820739746, "learning_rate": 0.00018307006885378698, "loss": 0.3615, "step": 4520 }, { "epoch": 0.36625081011017496, "grad_norm": 0.04024023562669754, "learning_rate": 0.00018311057108140948, "loss": 0.369, "step": 4521 }, { "epoch": 0.3663318211276734, "grad_norm": 0.03590629994869232, "learning_rate": 0.00018315107330903202, "loss": 0.3693, "step": 4522 }, { "epoch": 0.36641283214517173, "grad_norm": 0.03583737835288048, "learning_rate": 0.00018319157553665452, "loss": 0.3642, "step": 4523 }, { "epoch": 0.36649384316267014, "grad_norm": 0.03601490706205368, "learning_rate": 0.00018323207776427706, "loss": 0.3483, "step": 4524 }, { "epoch": 0.3665748541801685, "grad_norm": 0.0373217836022377, "learning_rate": 0.00018327257999189956, "loss": 0.4122, "step": 4525 }, { "epoch": 0.3666558651976669, "grad_norm": 0.035433217883110046, "learning_rate": 0.00018331308221952207, "loss": 0.3168, "step": 4526 }, { "epoch": 0.36673687621516526, "grad_norm": 0.03406085819005966, "learning_rate": 0.0001833535844471446, "loss": 0.3609, "step": 4527 }, { "epoch": 0.3668178872326636, "grad_norm": 0.03262537717819214, "learning_rate": 0.0001833940866747671, "loss": 0.3474, "step": 4528 }, { "epoch": 0.36689889825016203, "grad_norm": 0.038104861974716187, "learning_rate": 0.00018343458890238964, "loss": 0.376, "step": 4529 }, { "epoch": 0.3669799092676604, "grad_norm": 0.037595901638269424, "learning_rate": 0.00018347509113001215, "loss": 0.4158, "step": 4530 }, { "epoch": 0.3670609202851588, "grad_norm": 0.03141307085752487, "learning_rate": 0.00018351559335763468, "loss": 0.3483, "step": 4531 }, { "epoch": 0.36714193130265715, "grad_norm": 0.037604913115501404, "learning_rate": 0.0001835560955852572, "loss": 0.3513, "step": 4532 }, { "epoch": 0.36722294232015557, "grad_norm": 0.03441972658038139, "learning_rate": 0.00018359659781287972, "loss": 0.364, "step": 4533 }, { "epoch": 0.3673039533376539, "grad_norm": 0.032113075256347656, "learning_rate": 0.00018363710004050223, "loss": 0.3318, "step": 4534 }, { "epoch": 0.3673849643551523, "grad_norm": 0.03355928510427475, "learning_rate": 0.00018367760226812476, "loss": 0.3813, "step": 4535 }, { "epoch": 0.3674659753726507, "grad_norm": 0.03720551356673241, "learning_rate": 0.00018371810449574727, "loss": 0.3893, "step": 4536 }, { "epoch": 0.36754698639014904, "grad_norm": 0.04136877879500389, "learning_rate": 0.0001837586067233698, "loss": 0.3667, "step": 4537 }, { "epoch": 0.36762799740764746, "grad_norm": 0.03302746266126633, "learning_rate": 0.0001837991089509923, "loss": 0.3675, "step": 4538 }, { "epoch": 0.3677090084251458, "grad_norm": 0.034325793385505676, "learning_rate": 0.00018383961117861484, "loss": 0.3442, "step": 4539 }, { "epoch": 0.3677900194426442, "grad_norm": 0.03347557410597801, "learning_rate": 0.00018388011340623734, "loss": 0.352, "step": 4540 }, { "epoch": 0.3678710304601426, "grad_norm": 0.03631537780165672, "learning_rate": 0.00018392061563385988, "loss": 0.3562, "step": 4541 }, { "epoch": 0.36795204147764093, "grad_norm": 0.031340762972831726, "learning_rate": 0.00018396111786148238, "loss": 0.3775, "step": 4542 }, { "epoch": 0.36803305249513935, "grad_norm": 0.029904751107096672, "learning_rate": 0.00018400162008910492, "loss": 0.338, "step": 4543 }, { "epoch": 0.3681140635126377, "grad_norm": 0.03467988595366478, "learning_rate": 0.00018404212231672745, "loss": 0.3809, "step": 4544 }, { "epoch": 0.3681950745301361, "grad_norm": 0.03151930868625641, "learning_rate": 0.00018408262454434996, "loss": 0.3551, "step": 4545 }, { "epoch": 0.36827608554763447, "grad_norm": 0.036787740886211395, "learning_rate": 0.0001841231267719725, "loss": 0.3439, "step": 4546 }, { "epoch": 0.3683570965651329, "grad_norm": 0.03223686292767525, "learning_rate": 0.000184163628999595, "loss": 0.3669, "step": 4547 }, { "epoch": 0.36843810758263124, "grad_norm": 0.03125021234154701, "learning_rate": 0.0001842041312272175, "loss": 0.3613, "step": 4548 }, { "epoch": 0.3685191186001296, "grad_norm": 0.03878352791070938, "learning_rate": 0.00018424463345484, "loss": 0.4096, "step": 4549 }, { "epoch": 0.368600129617628, "grad_norm": 0.03440506383776665, "learning_rate": 0.00018428513568246254, "loss": 0.3913, "step": 4550 }, { "epoch": 0.36868114063512636, "grad_norm": 0.03337797522544861, "learning_rate": 0.00018432563791008505, "loss": 0.3498, "step": 4551 }, { "epoch": 0.36876215165262477, "grad_norm": 0.035269659012556076, "learning_rate": 0.00018436614013770758, "loss": 0.3618, "step": 4552 }, { "epoch": 0.3688431626701231, "grad_norm": 0.036833252757787704, "learning_rate": 0.00018440664236533009, "loss": 0.4147, "step": 4553 }, { "epoch": 0.36892417368762154, "grad_norm": 0.03536658361554146, "learning_rate": 0.00018444714459295262, "loss": 0.353, "step": 4554 }, { "epoch": 0.3690051847051199, "grad_norm": 0.028857829049229622, "learning_rate": 0.00018448764682057513, "loss": 0.3742, "step": 4555 }, { "epoch": 0.36908619572261825, "grad_norm": 0.035467978566884995, "learning_rate": 0.00018452814904819766, "loss": 0.341, "step": 4556 }, { "epoch": 0.36916720674011666, "grad_norm": 0.031414639204740524, "learning_rate": 0.00018456865127582016, "loss": 0.3664, "step": 4557 }, { "epoch": 0.369248217757615, "grad_norm": 0.03707354515790939, "learning_rate": 0.0001846091535034427, "loss": 0.398, "step": 4558 }, { "epoch": 0.3693292287751134, "grad_norm": 0.03519902005791664, "learning_rate": 0.0001846496557310652, "loss": 0.3708, "step": 4559 }, { "epoch": 0.3694102397926118, "grad_norm": 0.03694036602973938, "learning_rate": 0.00018469015795868774, "loss": 0.3665, "step": 4560 }, { "epoch": 0.3694912508101102, "grad_norm": 0.029678206890821457, "learning_rate": 0.00018473066018631024, "loss": 0.3253, "step": 4561 }, { "epoch": 0.36957226182760855, "grad_norm": 0.030835380777716637, "learning_rate": 0.00018477116241393278, "loss": 0.3339, "step": 4562 }, { "epoch": 0.36965327284510696, "grad_norm": 0.03632909059524536, "learning_rate": 0.0001848116646415553, "loss": 0.4066, "step": 4563 }, { "epoch": 0.3697342838626053, "grad_norm": 0.031888432800769806, "learning_rate": 0.00018485216686917782, "loss": 0.3753, "step": 4564 }, { "epoch": 0.3698152948801037, "grad_norm": 0.03251507505774498, "learning_rate": 0.00018489266909680035, "loss": 0.3445, "step": 4565 }, { "epoch": 0.3698963058976021, "grad_norm": 0.03139140456914902, "learning_rate": 0.00018493317132442286, "loss": 0.3425, "step": 4566 }, { "epoch": 0.36997731691510044, "grad_norm": 0.031516510993242264, "learning_rate": 0.0001849736735520454, "loss": 0.3523, "step": 4567 }, { "epoch": 0.37005832793259885, "grad_norm": 0.03723737969994545, "learning_rate": 0.0001850141757796679, "loss": 0.4167, "step": 4568 }, { "epoch": 0.3701393389500972, "grad_norm": 0.03188415989279747, "learning_rate": 0.00018505467800729043, "loss": 0.3622, "step": 4569 }, { "epoch": 0.3702203499675956, "grad_norm": 0.030478963628411293, "learning_rate": 0.00018509518023491293, "loss": 0.3232, "step": 4570 }, { "epoch": 0.370301360985094, "grad_norm": 0.0350758358836174, "learning_rate": 0.00018513568246253544, "loss": 0.3963, "step": 4571 }, { "epoch": 0.37038237200259233, "grad_norm": 0.03737777844071388, "learning_rate": 0.00018517618469015797, "loss": 0.38, "step": 4572 }, { "epoch": 0.37046338302009074, "grad_norm": 0.03170812129974365, "learning_rate": 0.00018521668691778048, "loss": 0.339, "step": 4573 }, { "epoch": 0.3705443940375891, "grad_norm": 0.03591955453157425, "learning_rate": 0.00018525718914540299, "loss": 0.4212, "step": 4574 }, { "epoch": 0.3706254050550875, "grad_norm": 0.03319928050041199, "learning_rate": 0.00018529769137302552, "loss": 0.3712, "step": 4575 }, { "epoch": 0.37070641607258586, "grad_norm": 0.030675271525979042, "learning_rate": 0.00018533819360064803, "loss": 0.3147, "step": 4576 }, { "epoch": 0.3707874270900843, "grad_norm": 0.029982060194015503, "learning_rate": 0.00018537869582827056, "loss": 0.3431, "step": 4577 }, { "epoch": 0.37086843810758263, "grad_norm": 0.03718116879463196, "learning_rate": 0.00018541919805589306, "loss": 0.3588, "step": 4578 }, { "epoch": 0.370949449125081, "grad_norm": 0.032471753656864166, "learning_rate": 0.0001854597002835156, "loss": 0.3329, "step": 4579 }, { "epoch": 0.3710304601425794, "grad_norm": 0.03818058222532272, "learning_rate": 0.00018550020251113813, "loss": 0.4274, "step": 4580 }, { "epoch": 0.37111147116007775, "grad_norm": 0.03520652651786804, "learning_rate": 0.00018554070473876064, "loss": 0.4192, "step": 4581 }, { "epoch": 0.37119248217757617, "grad_norm": 0.03470182791352272, "learning_rate": 0.00018558120696638317, "loss": 0.3832, "step": 4582 }, { "epoch": 0.3712734931950745, "grad_norm": 0.033301129937171936, "learning_rate": 0.00018562170919400568, "loss": 0.344, "step": 4583 }, { "epoch": 0.37135450421257293, "grad_norm": 0.033769186586141586, "learning_rate": 0.0001856622114216282, "loss": 0.3479, "step": 4584 }, { "epoch": 0.3714355152300713, "grad_norm": 0.03508080914616585, "learning_rate": 0.00018570271364925072, "loss": 0.3664, "step": 4585 }, { "epoch": 0.37151652624756965, "grad_norm": 0.033902063965797424, "learning_rate": 0.00018574321587687325, "loss": 0.4061, "step": 4586 }, { "epoch": 0.37159753726506806, "grad_norm": 0.036879245191812515, "learning_rate": 0.00018578371810449575, "loss": 0.3981, "step": 4587 }, { "epoch": 0.3716785482825664, "grad_norm": 0.03260815516114235, "learning_rate": 0.0001858242203321183, "loss": 0.3279, "step": 4588 }, { "epoch": 0.3717595593000648, "grad_norm": 0.029037103056907654, "learning_rate": 0.0001858647225597408, "loss": 0.3237, "step": 4589 }, { "epoch": 0.3718405703175632, "grad_norm": 0.03038596175611019, "learning_rate": 0.00018590522478736333, "loss": 0.3839, "step": 4590 }, { "epoch": 0.3719215813350616, "grad_norm": 0.031168343499302864, "learning_rate": 0.00018594572701498583, "loss": 0.3289, "step": 4591 }, { "epoch": 0.37200259235255995, "grad_norm": 0.03584503382444382, "learning_rate": 0.00018598622924260837, "loss": 0.3892, "step": 4592 }, { "epoch": 0.3720836033700583, "grad_norm": 0.034035272896289825, "learning_rate": 0.00018602673147023087, "loss": 0.3566, "step": 4593 }, { "epoch": 0.3721646143875567, "grad_norm": 0.033572208136320114, "learning_rate": 0.0001860672336978534, "loss": 0.4105, "step": 4594 }, { "epoch": 0.37224562540505507, "grad_norm": 0.03304578363895416, "learning_rate": 0.0001861077359254759, "loss": 0.3542, "step": 4595 }, { "epoch": 0.3723266364225535, "grad_norm": 0.03381304442882538, "learning_rate": 0.00018614823815309842, "loss": 0.3696, "step": 4596 }, { "epoch": 0.37240764744005184, "grad_norm": 0.03615789860486984, "learning_rate": 0.00018618874038072095, "loss": 0.3894, "step": 4597 }, { "epoch": 0.37248865845755025, "grad_norm": 0.029093435034155846, "learning_rate": 0.00018622924260834346, "loss": 0.3533, "step": 4598 }, { "epoch": 0.3725696694750486, "grad_norm": 0.0371689610183239, "learning_rate": 0.000186269744835966, "loss": 0.4334, "step": 4599 }, { "epoch": 0.37265068049254696, "grad_norm": 0.0313064381480217, "learning_rate": 0.0001863102470635885, "loss": 0.3863, "step": 4600 }, { "epoch": 0.37273169151004537, "grad_norm": 0.03435458242893219, "learning_rate": 0.00018635074929121103, "loss": 0.3883, "step": 4601 }, { "epoch": 0.3728127025275437, "grad_norm": 0.04193079471588135, "learning_rate": 0.00018639125151883354, "loss": 0.408, "step": 4602 }, { "epoch": 0.37289371354504214, "grad_norm": 0.03607318922877312, "learning_rate": 0.00018643175374645607, "loss": 0.3325, "step": 4603 }, { "epoch": 0.3729747245625405, "grad_norm": 0.03750342130661011, "learning_rate": 0.00018647225597407858, "loss": 0.3663, "step": 4604 }, { "epoch": 0.3730557355800389, "grad_norm": 0.034173641353845596, "learning_rate": 0.0001865127582017011, "loss": 0.3383, "step": 4605 }, { "epoch": 0.37313674659753726, "grad_norm": 0.028621068224310875, "learning_rate": 0.00018655326042932362, "loss": 0.3288, "step": 4606 }, { "epoch": 0.3732177576150357, "grad_norm": 0.038024332374334335, "learning_rate": 0.00018659376265694615, "loss": 0.3744, "step": 4607 }, { "epoch": 0.37329876863253403, "grad_norm": 0.03312882408499718, "learning_rate": 0.00018663426488456865, "loss": 0.4204, "step": 4608 }, { "epoch": 0.3733797796500324, "grad_norm": 0.035135842859745026, "learning_rate": 0.0001866747671121912, "loss": 0.3515, "step": 4609 }, { "epoch": 0.3734607906675308, "grad_norm": 0.037961844354867935, "learning_rate": 0.0001867152693398137, "loss": 0.3565, "step": 4610 }, { "epoch": 0.37354180168502915, "grad_norm": 0.034696970134973526, "learning_rate": 0.00018675577156743623, "loss": 0.3688, "step": 4611 }, { "epoch": 0.37362281270252756, "grad_norm": 0.03243953734636307, "learning_rate": 0.00018679627379505873, "loss": 0.3631, "step": 4612 }, { "epoch": 0.3737038237200259, "grad_norm": 0.032312992960214615, "learning_rate": 0.00018683677602268127, "loss": 0.3574, "step": 4613 }, { "epoch": 0.37378483473752433, "grad_norm": 0.03381352126598358, "learning_rate": 0.00018687727825030377, "loss": 0.3624, "step": 4614 }, { "epoch": 0.3738658457550227, "grad_norm": 0.02955443412065506, "learning_rate": 0.0001869177804779263, "loss": 0.3256, "step": 4615 }, { "epoch": 0.37394685677252104, "grad_norm": 0.03173473849892616, "learning_rate": 0.0001869582827055488, "loss": 0.3171, "step": 4616 }, { "epoch": 0.37402786779001945, "grad_norm": 0.03345518559217453, "learning_rate": 0.00018699878493317135, "loss": 0.4112, "step": 4617 }, { "epoch": 0.3741088788075178, "grad_norm": 0.03448694944381714, "learning_rate": 0.00018703928716079385, "loss": 0.4033, "step": 4618 }, { "epoch": 0.3741898898250162, "grad_norm": 0.03236488997936249, "learning_rate": 0.00018707978938841638, "loss": 0.3619, "step": 4619 }, { "epoch": 0.3742709008425146, "grad_norm": 0.031700026243925095, "learning_rate": 0.0001871202916160389, "loss": 0.3548, "step": 4620 }, { "epoch": 0.374351911860013, "grad_norm": 0.03522234782576561, "learning_rate": 0.0001871607938436614, "loss": 0.3816, "step": 4621 }, { "epoch": 0.37443292287751134, "grad_norm": 0.042682599276304245, "learning_rate": 0.00018720129607128393, "loss": 0.4125, "step": 4622 }, { "epoch": 0.3745139338950097, "grad_norm": 0.031941335648298264, "learning_rate": 0.00018724179829890644, "loss": 0.3205, "step": 4623 }, { "epoch": 0.3745949449125081, "grad_norm": 0.0366583988070488, "learning_rate": 0.00018728230052652897, "loss": 0.3329, "step": 4624 }, { "epoch": 0.37467595593000647, "grad_norm": 0.03480471670627594, "learning_rate": 0.00018732280275415148, "loss": 0.3354, "step": 4625 }, { "epoch": 0.3747569669475049, "grad_norm": 0.038374267518520355, "learning_rate": 0.000187363304981774, "loss": 0.3454, "step": 4626 }, { "epoch": 0.37483797796500323, "grad_norm": 0.04138772934675217, "learning_rate": 0.00018740380720939651, "loss": 0.3712, "step": 4627 }, { "epoch": 0.37491898898250164, "grad_norm": 0.03101898916065693, "learning_rate": 0.00018744430943701905, "loss": 0.3845, "step": 4628 }, { "epoch": 0.375, "grad_norm": 0.0356474332511425, "learning_rate": 0.00018748481166464155, "loss": 0.3657, "step": 4629 }, { "epoch": 0.37508101101749836, "grad_norm": 0.03429270163178444, "learning_rate": 0.0001875253138922641, "loss": 0.3168, "step": 4630 }, { "epoch": 0.37516202203499677, "grad_norm": 0.028021546080708504, "learning_rate": 0.0001875658161198866, "loss": 0.3497, "step": 4631 }, { "epoch": 0.3752430330524951, "grad_norm": 0.03524024412035942, "learning_rate": 0.00018760631834750913, "loss": 0.4036, "step": 4632 }, { "epoch": 0.37532404406999353, "grad_norm": 0.038602109998464584, "learning_rate": 0.00018764682057513163, "loss": 0.3592, "step": 4633 }, { "epoch": 0.3754050550874919, "grad_norm": 0.027118699625134468, "learning_rate": 0.00018768732280275417, "loss": 0.3417, "step": 4634 }, { "epoch": 0.3754860661049903, "grad_norm": 0.031041573733091354, "learning_rate": 0.00018772782503037667, "loss": 0.3874, "step": 4635 }, { "epoch": 0.37556707712248866, "grad_norm": 0.03967761993408203, "learning_rate": 0.0001877683272579992, "loss": 0.3585, "step": 4636 }, { "epoch": 0.375648088139987, "grad_norm": 0.036579083651304245, "learning_rate": 0.00018780882948562174, "loss": 0.3883, "step": 4637 }, { "epoch": 0.3757290991574854, "grad_norm": 0.031344201415777206, "learning_rate": 0.00018784933171324424, "loss": 0.36, "step": 4638 }, { "epoch": 0.3758101101749838, "grad_norm": 0.033904679119586945, "learning_rate": 0.00018788983394086678, "loss": 0.3798, "step": 4639 }, { "epoch": 0.3758911211924822, "grad_norm": 0.031038541346788406, "learning_rate": 0.00018793033616848928, "loss": 0.3629, "step": 4640 }, { "epoch": 0.37597213220998055, "grad_norm": 0.035065874457359314, "learning_rate": 0.00018797083839611182, "loss": 0.3545, "step": 4641 }, { "epoch": 0.37605314322747896, "grad_norm": 0.03886274993419647, "learning_rate": 0.00018801134062373432, "loss": 0.4592, "step": 4642 }, { "epoch": 0.3761341542449773, "grad_norm": 0.04013144597411156, "learning_rate": 0.00018805184285135683, "loss": 0.4282, "step": 4643 }, { "epoch": 0.37621516526247567, "grad_norm": 0.044672705233097076, "learning_rate": 0.00018809234507897934, "loss": 0.4349, "step": 4644 }, { "epoch": 0.3762961762799741, "grad_norm": 0.032288748770952225, "learning_rate": 0.00018813284730660187, "loss": 0.3905, "step": 4645 }, { "epoch": 0.37637718729747244, "grad_norm": 0.039334528148174286, "learning_rate": 0.00018817334953422437, "loss": 0.3949, "step": 4646 }, { "epoch": 0.37645819831497085, "grad_norm": 0.04137732461094856, "learning_rate": 0.0001882138517618469, "loss": 0.3766, "step": 4647 }, { "epoch": 0.3765392093324692, "grad_norm": 0.04196125641465187, "learning_rate": 0.00018825435398946941, "loss": 0.4214, "step": 4648 }, { "epoch": 0.3766202203499676, "grad_norm": 0.04401590675115585, "learning_rate": 0.00018829485621709195, "loss": 0.3992, "step": 4649 }, { "epoch": 0.37670123136746597, "grad_norm": 0.03296400606632233, "learning_rate": 0.00018833535844471445, "loss": 0.3302, "step": 4650 }, { "epoch": 0.3767822423849643, "grad_norm": 0.041027676314115524, "learning_rate": 0.000188375860672337, "loss": 0.4114, "step": 4651 }, { "epoch": 0.37686325340246274, "grad_norm": 0.030237749218940735, "learning_rate": 0.0001884163628999595, "loss": 0.3457, "step": 4652 }, { "epoch": 0.3769442644199611, "grad_norm": 0.03659079596400261, "learning_rate": 0.00018845686512758203, "loss": 0.3633, "step": 4653 }, { "epoch": 0.3770252754374595, "grad_norm": 0.03513143211603165, "learning_rate": 0.00018849736735520453, "loss": 0.4143, "step": 4654 }, { "epoch": 0.37710628645495786, "grad_norm": 0.033116623759269714, "learning_rate": 0.00018853786958282707, "loss": 0.3731, "step": 4655 }, { "epoch": 0.3771872974724563, "grad_norm": 0.03905783221125603, "learning_rate": 0.0001885783718104496, "loss": 0.373, "step": 4656 }, { "epoch": 0.37726830848995463, "grad_norm": 0.02860383875668049, "learning_rate": 0.0001886188740380721, "loss": 0.3334, "step": 4657 }, { "epoch": 0.37734931950745304, "grad_norm": 0.039546336978673935, "learning_rate": 0.00018865937626569464, "loss": 0.3569, "step": 4658 }, { "epoch": 0.3774303305249514, "grad_norm": 0.03063320182263851, "learning_rate": 0.00018869987849331714, "loss": 0.3908, "step": 4659 }, { "epoch": 0.37751134154244975, "grad_norm": 0.03542062267661095, "learning_rate": 0.00018874038072093968, "loss": 0.4156, "step": 4660 }, { "epoch": 0.37759235255994816, "grad_norm": 0.03663628175854683, "learning_rate": 0.00018878088294856218, "loss": 0.3556, "step": 4661 }, { "epoch": 0.3776733635774465, "grad_norm": 0.039453811943531036, "learning_rate": 0.00018882138517618472, "loss": 0.4064, "step": 4662 }, { "epoch": 0.37775437459494493, "grad_norm": 0.03887701779603958, "learning_rate": 0.00018886188740380722, "loss": 0.3667, "step": 4663 }, { "epoch": 0.3778353856124433, "grad_norm": 0.03565892577171326, "learning_rate": 0.00018890238963142976, "loss": 0.3649, "step": 4664 }, { "epoch": 0.3779163966299417, "grad_norm": 0.03139740228652954, "learning_rate": 0.00018894289185905226, "loss": 0.3656, "step": 4665 }, { "epoch": 0.37799740764744005, "grad_norm": 0.04097292944788933, "learning_rate": 0.00018898339408667477, "loss": 0.3594, "step": 4666 }, { "epoch": 0.3780784186649384, "grad_norm": 0.04518847167491913, "learning_rate": 0.0001890238963142973, "loss": 0.3792, "step": 4667 }, { "epoch": 0.3781594296824368, "grad_norm": 0.036011647433042526, "learning_rate": 0.0001890643985419198, "loss": 0.3838, "step": 4668 }, { "epoch": 0.3782404406999352, "grad_norm": 0.040306150913238525, "learning_rate": 0.00018910490076954231, "loss": 0.4105, "step": 4669 }, { "epoch": 0.3783214517174336, "grad_norm": 0.03284032270312309, "learning_rate": 0.00018914540299716485, "loss": 0.4012, "step": 4670 }, { "epoch": 0.37840246273493194, "grad_norm": 0.03616539016366005, "learning_rate": 0.00018918590522478735, "loss": 0.33, "step": 4671 }, { "epoch": 0.37848347375243035, "grad_norm": 0.04062451422214508, "learning_rate": 0.00018922640745240989, "loss": 0.4352, "step": 4672 }, { "epoch": 0.3785644847699287, "grad_norm": 0.03321415185928345, "learning_rate": 0.0001892669096800324, "loss": 0.4466, "step": 4673 }, { "epoch": 0.37864549578742707, "grad_norm": 0.03490709885954857, "learning_rate": 0.00018930741190765493, "loss": 0.3705, "step": 4674 }, { "epoch": 0.3787265068049255, "grad_norm": 0.03743315488100052, "learning_rate": 0.00018934791413527746, "loss": 0.3305, "step": 4675 }, { "epoch": 0.37880751782242383, "grad_norm": 0.038661107420921326, "learning_rate": 0.00018938841636289996, "loss": 0.3704, "step": 4676 }, { "epoch": 0.37888852883992225, "grad_norm": 0.033372364938259125, "learning_rate": 0.0001894289185905225, "loss": 0.3775, "step": 4677 }, { "epoch": 0.3789695398574206, "grad_norm": 0.031249843537807465, "learning_rate": 0.000189469420818145, "loss": 0.3183, "step": 4678 }, { "epoch": 0.379050550874919, "grad_norm": 0.034112848341464996, "learning_rate": 0.00018950992304576754, "loss": 0.3702, "step": 4679 }, { "epoch": 0.37913156189241737, "grad_norm": 0.02745191939175129, "learning_rate": 0.00018955042527339004, "loss": 0.3374, "step": 4680 }, { "epoch": 0.3792125729099157, "grad_norm": 0.03565063700079918, "learning_rate": 0.00018959092750101258, "loss": 0.3862, "step": 4681 }, { "epoch": 0.37929358392741414, "grad_norm": 0.030555015429854393, "learning_rate": 0.00018963142972863508, "loss": 0.3539, "step": 4682 }, { "epoch": 0.3793745949449125, "grad_norm": 0.032852426171302795, "learning_rate": 0.00018967193195625762, "loss": 0.3389, "step": 4683 }, { "epoch": 0.3794556059624109, "grad_norm": 0.036056555807590485, "learning_rate": 0.00018971243418388012, "loss": 0.3736, "step": 4684 }, { "epoch": 0.37953661697990926, "grad_norm": 0.03699024021625519, "learning_rate": 0.00018975293641150266, "loss": 0.3956, "step": 4685 }, { "epoch": 0.37961762799740767, "grad_norm": 0.03291408717632294, "learning_rate": 0.00018979343863912516, "loss": 0.3524, "step": 4686 }, { "epoch": 0.379698639014906, "grad_norm": 0.03387143090367317, "learning_rate": 0.0001898339408667477, "loss": 0.3661, "step": 4687 }, { "epoch": 0.3797796500324044, "grad_norm": 0.03629806637763977, "learning_rate": 0.0001898744430943702, "loss": 0.3653, "step": 4688 }, { "epoch": 0.3798606610499028, "grad_norm": 0.036638397723436356, "learning_rate": 0.00018991494532199273, "loss": 0.3592, "step": 4689 }, { "epoch": 0.37994167206740115, "grad_norm": 0.03318271040916443, "learning_rate": 0.00018995544754961524, "loss": 0.3674, "step": 4690 }, { "epoch": 0.38002268308489956, "grad_norm": 0.03417496010661125, "learning_rate": 0.00018999594977723775, "loss": 0.345, "step": 4691 }, { "epoch": 0.3801036941023979, "grad_norm": 0.029663294553756714, "learning_rate": 0.00019003645200486025, "loss": 0.3648, "step": 4692 }, { "epoch": 0.3801847051198963, "grad_norm": 0.03385043144226074, "learning_rate": 0.00019007695423248279, "loss": 0.3936, "step": 4693 }, { "epoch": 0.3802657161373947, "grad_norm": 0.03586571291089058, "learning_rate": 0.00019011745646010532, "loss": 0.4184, "step": 4694 }, { "epoch": 0.38034672715489304, "grad_norm": 0.04350770264863968, "learning_rate": 0.00019015795868772782, "loss": 0.4485, "step": 4695 }, { "epoch": 0.38042773817239145, "grad_norm": 0.03400348499417305, "learning_rate": 0.00019019846091535036, "loss": 0.3484, "step": 4696 }, { "epoch": 0.3805087491898898, "grad_norm": 0.03798680007457733, "learning_rate": 0.00019023896314297286, "loss": 0.3386, "step": 4697 }, { "epoch": 0.3805897602073882, "grad_norm": 0.031218959018588066, "learning_rate": 0.0001902794653705954, "loss": 0.3039, "step": 4698 }, { "epoch": 0.3806707712248866, "grad_norm": 0.031623970717191696, "learning_rate": 0.0001903199675982179, "loss": 0.3518, "step": 4699 }, { "epoch": 0.380751782242385, "grad_norm": 0.034070685505867004, "learning_rate": 0.00019036046982584044, "loss": 0.3822, "step": 4700 }, { "epoch": 0.38083279325988334, "grad_norm": 0.03226622939109802, "learning_rate": 0.00019040097205346294, "loss": 0.4141, "step": 4701 }, { "epoch": 0.38091380427738175, "grad_norm": 0.04006161540746689, "learning_rate": 0.00019044147428108548, "loss": 0.3833, "step": 4702 }, { "epoch": 0.3809948152948801, "grad_norm": 0.03199142962694168, "learning_rate": 0.00019048197650870798, "loss": 0.3583, "step": 4703 }, { "epoch": 0.38107582631237846, "grad_norm": 0.033569540828466415, "learning_rate": 0.00019052247873633052, "loss": 0.3401, "step": 4704 }, { "epoch": 0.3811568373298769, "grad_norm": 0.03711489588022232, "learning_rate": 0.00019056298096395302, "loss": 0.4158, "step": 4705 }, { "epoch": 0.38123784834737523, "grad_norm": 0.035843729972839355, "learning_rate": 0.00019060348319157555, "loss": 0.4146, "step": 4706 }, { "epoch": 0.38131885936487364, "grad_norm": 0.03303459659218788, "learning_rate": 0.00019064398541919806, "loss": 0.311, "step": 4707 }, { "epoch": 0.381399870382372, "grad_norm": 0.03796025365591049, "learning_rate": 0.0001906844876468206, "loss": 0.3822, "step": 4708 }, { "epoch": 0.3814808813998704, "grad_norm": 0.0355183407664299, "learning_rate": 0.0001907249898744431, "loss": 0.4254, "step": 4709 }, { "epoch": 0.38156189241736876, "grad_norm": 0.034124333411455154, "learning_rate": 0.00019076549210206563, "loss": 0.3445, "step": 4710 }, { "epoch": 0.3816429034348671, "grad_norm": 0.03657494857907295, "learning_rate": 0.00019080599432968814, "loss": 0.3568, "step": 4711 }, { "epoch": 0.38172391445236553, "grad_norm": 0.03229145705699921, "learning_rate": 0.00019084649655731067, "loss": 0.338, "step": 4712 }, { "epoch": 0.3818049254698639, "grad_norm": 0.03623109310865402, "learning_rate": 0.00019088699878493318, "loss": 0.3769, "step": 4713 }, { "epoch": 0.3818859364873623, "grad_norm": 0.03175680711865425, "learning_rate": 0.00019092750101255569, "loss": 0.3516, "step": 4714 }, { "epoch": 0.38196694750486065, "grad_norm": 0.033988941460847855, "learning_rate": 0.00019096800324017822, "loss": 0.35, "step": 4715 }, { "epoch": 0.38204795852235907, "grad_norm": 0.03612534701824188, "learning_rate": 0.00019100850546780072, "loss": 0.3905, "step": 4716 }, { "epoch": 0.3821289695398574, "grad_norm": 0.03572618588805199, "learning_rate": 0.00019104900769542326, "loss": 0.365, "step": 4717 }, { "epoch": 0.3822099805573558, "grad_norm": 0.02998768351972103, "learning_rate": 0.00019108950992304576, "loss": 0.3414, "step": 4718 }, { "epoch": 0.3822909915748542, "grad_norm": 0.0337430015206337, "learning_rate": 0.0001911300121506683, "loss": 0.3381, "step": 4719 }, { "epoch": 0.38237200259235254, "grad_norm": 0.034730587154626846, "learning_rate": 0.0001911705143782908, "loss": 0.4033, "step": 4720 }, { "epoch": 0.38245301360985096, "grad_norm": 0.03520103171467781, "learning_rate": 0.00019121101660591334, "loss": 0.3705, "step": 4721 }, { "epoch": 0.3825340246273493, "grad_norm": 0.03200915828347206, "learning_rate": 0.00019125151883353584, "loss": 0.3908, "step": 4722 }, { "epoch": 0.3826150356448477, "grad_norm": 0.04115212708711624, "learning_rate": 0.00019129202106115838, "loss": 0.3934, "step": 4723 }, { "epoch": 0.3826960466623461, "grad_norm": 0.0342140719294548, "learning_rate": 0.00019133252328878088, "loss": 0.3553, "step": 4724 }, { "epoch": 0.38277705767984443, "grad_norm": 0.04393256828188896, "learning_rate": 0.00019137302551640342, "loss": 0.3451, "step": 4725 }, { "epoch": 0.38285806869734285, "grad_norm": 0.032705143094062805, "learning_rate": 0.00019141352774402592, "loss": 0.3812, "step": 4726 }, { "epoch": 0.3829390797148412, "grad_norm": 0.028380325064063072, "learning_rate": 0.00019145402997164845, "loss": 0.3414, "step": 4727 }, { "epoch": 0.3830200907323396, "grad_norm": 0.03669360280036926, "learning_rate": 0.00019149453219927096, "loss": 0.3819, "step": 4728 }, { "epoch": 0.38310110174983797, "grad_norm": 0.030903344973921776, "learning_rate": 0.0001915350344268935, "loss": 0.3984, "step": 4729 }, { "epoch": 0.3831821127673364, "grad_norm": 0.035275667905807495, "learning_rate": 0.000191575536654516, "loss": 0.3246, "step": 4730 }, { "epoch": 0.38326312378483474, "grad_norm": 0.03556999936699867, "learning_rate": 0.00019161603888213853, "loss": 0.379, "step": 4731 }, { "epoch": 0.3833441348023331, "grad_norm": 0.0267938245087862, "learning_rate": 0.00019165654110976107, "loss": 0.3089, "step": 4732 }, { "epoch": 0.3834251458198315, "grad_norm": 0.03239806741476059, "learning_rate": 0.00019169704333738357, "loss": 0.336, "step": 4733 }, { "epoch": 0.38350615683732986, "grad_norm": 0.027912812307476997, "learning_rate": 0.0001917375455650061, "loss": 0.3173, "step": 4734 }, { "epoch": 0.38358716785482827, "grad_norm": 0.034143056720495224, "learning_rate": 0.0001917780477926286, "loss": 0.3243, "step": 4735 }, { "epoch": 0.3836681788723266, "grad_norm": 0.03252992406487465, "learning_rate": 0.00019181855002025112, "loss": 0.3815, "step": 4736 }, { "epoch": 0.38374918988982504, "grad_norm": 0.03119749017059803, "learning_rate": 0.00019185905224787365, "loss": 0.3537, "step": 4737 }, { "epoch": 0.3838302009073234, "grad_norm": 0.033663660287857056, "learning_rate": 0.00019189955447549616, "loss": 0.3263, "step": 4738 }, { "epoch": 0.38391121192482175, "grad_norm": 0.03443240374326706, "learning_rate": 0.00019194005670311866, "loss": 0.4121, "step": 4739 }, { "epoch": 0.38399222294232016, "grad_norm": 0.031480759382247925, "learning_rate": 0.0001919805589307412, "loss": 0.3269, "step": 4740 }, { "epoch": 0.3840732339598185, "grad_norm": 0.03234820440411568, "learning_rate": 0.0001920210611583637, "loss": 0.3758, "step": 4741 }, { "epoch": 0.3841542449773169, "grad_norm": 0.03727741912007332, "learning_rate": 0.00019206156338598624, "loss": 0.3401, "step": 4742 }, { "epoch": 0.3842352559948153, "grad_norm": 0.03509011119604111, "learning_rate": 0.00019210206561360874, "loss": 0.3712, "step": 4743 }, { "epoch": 0.3843162670123137, "grad_norm": 0.03658130764961243, "learning_rate": 0.00019214256784123128, "loss": 0.3807, "step": 4744 }, { "epoch": 0.38439727802981205, "grad_norm": 0.03258894011378288, "learning_rate": 0.00019218307006885378, "loss": 0.3832, "step": 4745 }, { "epoch": 0.38447828904731046, "grad_norm": 0.03701847419142723, "learning_rate": 0.00019222357229647631, "loss": 0.3734, "step": 4746 }, { "epoch": 0.3845593000648088, "grad_norm": 0.028350885957479477, "learning_rate": 0.00019226407452409882, "loss": 0.3574, "step": 4747 }, { "epoch": 0.3846403110823072, "grad_norm": 0.03285471722483635, "learning_rate": 0.00019230457675172135, "loss": 0.3566, "step": 4748 }, { "epoch": 0.3847213220998056, "grad_norm": 0.03530566021800041, "learning_rate": 0.00019234507897934386, "loss": 0.4205, "step": 4749 }, { "epoch": 0.38480233311730394, "grad_norm": 0.037856727838516235, "learning_rate": 0.0001923855812069664, "loss": 0.3725, "step": 4750 }, { "epoch": 0.38488334413480235, "grad_norm": 0.033076632767915726, "learning_rate": 0.00019242608343458893, "loss": 0.3962, "step": 4751 }, { "epoch": 0.3849643551523007, "grad_norm": 0.027378113940358162, "learning_rate": 0.00019246658566221143, "loss": 0.3608, "step": 4752 }, { "epoch": 0.3850453661697991, "grad_norm": 0.03405110165476799, "learning_rate": 0.00019250708788983397, "loss": 0.3823, "step": 4753 }, { "epoch": 0.3851263771872975, "grad_norm": 0.032293081283569336, "learning_rate": 0.00019254759011745647, "loss": 0.3116, "step": 4754 }, { "epoch": 0.38520738820479583, "grad_norm": 0.02823660336434841, "learning_rate": 0.000192588092345079, "loss": 0.3263, "step": 4755 }, { "epoch": 0.38528839922229424, "grad_norm": 0.03242744132876396, "learning_rate": 0.0001926285945727015, "loss": 0.3583, "step": 4756 }, { "epoch": 0.3853694102397926, "grad_norm": 0.03275972232222557, "learning_rate": 0.00019266909680032404, "loss": 0.386, "step": 4757 }, { "epoch": 0.385450421257291, "grad_norm": 0.03530082106590271, "learning_rate": 0.00019270959902794655, "loss": 0.3903, "step": 4758 }, { "epoch": 0.38553143227478937, "grad_norm": 0.03208938613533974, "learning_rate": 0.00019275010125556908, "loss": 0.4056, "step": 4759 }, { "epoch": 0.3856124432922878, "grad_norm": 0.024658476933836937, "learning_rate": 0.0001927906034831916, "loss": 0.3006, "step": 4760 }, { "epoch": 0.38569345430978613, "grad_norm": 0.028631461784243584, "learning_rate": 0.0001928311057108141, "loss": 0.3247, "step": 4761 }, { "epoch": 0.3857744653272845, "grad_norm": 0.03463675081729889, "learning_rate": 0.00019287160793843663, "loss": 0.361, "step": 4762 }, { "epoch": 0.3858554763447829, "grad_norm": 0.03447446972131729, "learning_rate": 0.00019291211016605914, "loss": 0.3156, "step": 4763 }, { "epoch": 0.38593648736228126, "grad_norm": 0.03354298695921898, "learning_rate": 0.00019295261239368164, "loss": 0.3552, "step": 4764 }, { "epoch": 0.38601749837977967, "grad_norm": 0.033569224178791046, "learning_rate": 0.00019299311462130417, "loss": 0.3036, "step": 4765 }, { "epoch": 0.386098509397278, "grad_norm": 0.0391860231757164, "learning_rate": 0.00019303361684892668, "loss": 0.3781, "step": 4766 }, { "epoch": 0.38617952041477643, "grad_norm": 0.03534352034330368, "learning_rate": 0.00019307411907654921, "loss": 0.3552, "step": 4767 }, { "epoch": 0.3862605314322748, "grad_norm": 0.03565150871872902, "learning_rate": 0.00019311462130417172, "loss": 0.3377, "step": 4768 }, { "epoch": 0.38634154244977315, "grad_norm": 0.037752967327833176, "learning_rate": 0.00019315512353179425, "loss": 0.3448, "step": 4769 }, { "epoch": 0.38642255346727156, "grad_norm": 0.035720087587833405, "learning_rate": 0.0001931956257594168, "loss": 0.3834, "step": 4770 }, { "epoch": 0.3865035644847699, "grad_norm": 0.03224213048815727, "learning_rate": 0.0001932361279870393, "loss": 0.3471, "step": 4771 }, { "epoch": 0.3865845755022683, "grad_norm": 0.03377991542220116, "learning_rate": 0.00019327663021466183, "loss": 0.3652, "step": 4772 }, { "epoch": 0.3866655865197667, "grad_norm": 0.0403682179749012, "learning_rate": 0.00019331713244228433, "loss": 0.3504, "step": 4773 }, { "epoch": 0.3867465975372651, "grad_norm": 0.030859939754009247, "learning_rate": 0.00019335763466990687, "loss": 0.3318, "step": 4774 }, { "epoch": 0.38682760855476345, "grad_norm": 0.032264601439237595, "learning_rate": 0.00019339813689752937, "loss": 0.3543, "step": 4775 }, { "epoch": 0.3869086195722618, "grad_norm": 0.035585030913352966, "learning_rate": 0.0001934386391251519, "loss": 0.3883, "step": 4776 }, { "epoch": 0.3869896305897602, "grad_norm": 0.03770442306995392, "learning_rate": 0.0001934791413527744, "loss": 0.3422, "step": 4777 }, { "epoch": 0.38707064160725857, "grad_norm": 0.033499933779239655, "learning_rate": 0.00019351964358039694, "loss": 0.3829, "step": 4778 }, { "epoch": 0.387151652624757, "grad_norm": 0.03047235682606697, "learning_rate": 0.00019356014580801945, "loss": 0.3219, "step": 4779 }, { "epoch": 0.38723266364225534, "grad_norm": 0.032883550971746445, "learning_rate": 0.00019360064803564198, "loss": 0.339, "step": 4780 }, { "epoch": 0.38731367465975375, "grad_norm": 0.03074515610933304, "learning_rate": 0.0001936411502632645, "loss": 0.3819, "step": 4781 }, { "epoch": 0.3873946856772521, "grad_norm": 0.03032887913286686, "learning_rate": 0.00019368165249088702, "loss": 0.4051, "step": 4782 }, { "epoch": 0.38747569669475046, "grad_norm": 0.03172651305794716, "learning_rate": 0.00019372215471850953, "loss": 0.3666, "step": 4783 }, { "epoch": 0.38755670771224887, "grad_norm": 0.036670222878456116, "learning_rate": 0.00019376265694613206, "loss": 0.4072, "step": 4784 }, { "epoch": 0.3876377187297472, "grad_norm": 0.03976541385054588, "learning_rate": 0.00019380315917375457, "loss": 0.4105, "step": 4785 }, { "epoch": 0.38771872974724564, "grad_norm": 0.03309793770313263, "learning_rate": 0.00019384366140137707, "loss": 0.3866, "step": 4786 }, { "epoch": 0.387799740764744, "grad_norm": 0.044504258781671524, "learning_rate": 0.00019388416362899958, "loss": 0.3373, "step": 4787 }, { "epoch": 0.3878807517822424, "grad_norm": 0.02997519262135029, "learning_rate": 0.0001939246658566221, "loss": 0.3295, "step": 4788 }, { "epoch": 0.38796176279974076, "grad_norm": 0.03902506083250046, "learning_rate": 0.00019396516808424465, "loss": 0.3407, "step": 4789 }, { "epoch": 0.3880427738172391, "grad_norm": 0.02696751430630684, "learning_rate": 0.00019400567031186715, "loss": 0.3298, "step": 4790 }, { "epoch": 0.38812378483473753, "grad_norm": 0.03264418616890907, "learning_rate": 0.00019404617253948969, "loss": 0.3433, "step": 4791 }, { "epoch": 0.3882047958522359, "grad_norm": 0.029885264113545418, "learning_rate": 0.0001940866747671122, "loss": 0.3605, "step": 4792 }, { "epoch": 0.3882858068697343, "grad_norm": 0.03691829741001129, "learning_rate": 0.00019412717699473473, "loss": 0.3941, "step": 4793 }, { "epoch": 0.38836681788723265, "grad_norm": 0.02847517654299736, "learning_rate": 0.00019416767922235723, "loss": 0.3088, "step": 4794 }, { "epoch": 0.38844782890473106, "grad_norm": 0.03323289379477501, "learning_rate": 0.00019420818144997976, "loss": 0.3716, "step": 4795 }, { "epoch": 0.3885288399222294, "grad_norm": 0.04099169000983238, "learning_rate": 0.00019424868367760227, "loss": 0.3842, "step": 4796 }, { "epoch": 0.38860985093972783, "grad_norm": 0.03556416183710098, "learning_rate": 0.0001942891859052248, "loss": 0.3681, "step": 4797 }, { "epoch": 0.3886908619572262, "grad_norm": 0.029316971078515053, "learning_rate": 0.0001943296881328473, "loss": 0.3185, "step": 4798 }, { "epoch": 0.38877187297472454, "grad_norm": 0.034768927842378616, "learning_rate": 0.00019437019036046984, "loss": 0.3992, "step": 4799 }, { "epoch": 0.38885288399222295, "grad_norm": 0.03491413965821266, "learning_rate": 0.00019441069258809235, "loss": 0.3631, "step": 4800 }, { "epoch": 0.3889338950097213, "grad_norm": 0.0341835580766201, "learning_rate": 0.00019445119481571488, "loss": 0.3734, "step": 4801 }, { "epoch": 0.3890149060272197, "grad_norm": 0.0318155474960804, "learning_rate": 0.0001944916970433374, "loss": 0.3113, "step": 4802 }, { "epoch": 0.3890959170447181, "grad_norm": 0.030088962987065315, "learning_rate": 0.00019453219927095992, "loss": 0.3346, "step": 4803 }, { "epoch": 0.3891769280622165, "grad_norm": 0.03774889186024666, "learning_rate": 0.00019457270149858243, "loss": 0.3834, "step": 4804 }, { "epoch": 0.38925793907971484, "grad_norm": 0.036289505660533905, "learning_rate": 0.00019461320372620496, "loss": 0.3579, "step": 4805 }, { "epoch": 0.3893389500972132, "grad_norm": 0.03278936818242073, "learning_rate": 0.00019465370595382747, "loss": 0.3482, "step": 4806 }, { "epoch": 0.3894199611147116, "grad_norm": 0.032946351915597916, "learning_rate": 0.00019469420818145, "loss": 0.3933, "step": 4807 }, { "epoch": 0.38950097213220997, "grad_norm": 0.031581562012434006, "learning_rate": 0.0001947347104090725, "loss": 0.3418, "step": 4808 }, { "epoch": 0.3895819831497084, "grad_norm": 0.03233380988240242, "learning_rate": 0.000194775212636695, "loss": 0.3888, "step": 4809 }, { "epoch": 0.38966299416720673, "grad_norm": 0.031648050993680954, "learning_rate": 0.00019481571486431755, "loss": 0.3344, "step": 4810 }, { "epoch": 0.38974400518470514, "grad_norm": 0.029684975743293762, "learning_rate": 0.00019485621709194005, "loss": 0.3455, "step": 4811 }, { "epoch": 0.3898250162022035, "grad_norm": 0.03547152504324913, "learning_rate": 0.00019489671931956259, "loss": 0.3635, "step": 4812 }, { "epoch": 0.38990602721970186, "grad_norm": 0.031869180500507355, "learning_rate": 0.0001949372215471851, "loss": 0.3015, "step": 4813 }, { "epoch": 0.38998703823720027, "grad_norm": 0.031538963317871094, "learning_rate": 0.00019497772377480762, "loss": 0.3453, "step": 4814 }, { "epoch": 0.3900680492546986, "grad_norm": 0.02930673398077488, "learning_rate": 0.00019501822600243013, "loss": 0.3513, "step": 4815 }, { "epoch": 0.39014906027219703, "grad_norm": 0.028427084907889366, "learning_rate": 0.00019505872823005266, "loss": 0.3621, "step": 4816 }, { "epoch": 0.3902300712896954, "grad_norm": 0.028040811419487, "learning_rate": 0.00019509923045767517, "loss": 0.3447, "step": 4817 }, { "epoch": 0.3903110823071938, "grad_norm": 0.033528126776218414, "learning_rate": 0.0001951397326852977, "loss": 0.3731, "step": 4818 }, { "epoch": 0.39039209332469216, "grad_norm": 0.03092779777944088, "learning_rate": 0.0001951802349129202, "loss": 0.3436, "step": 4819 }, { "epoch": 0.3904731043421905, "grad_norm": 0.032316166907548904, "learning_rate": 0.00019522073714054274, "loss": 0.3815, "step": 4820 }, { "epoch": 0.3905541153596889, "grad_norm": 0.03477739542722702, "learning_rate": 0.00019526123936816525, "loss": 0.372, "step": 4821 }, { "epoch": 0.3906351263771873, "grad_norm": 0.03641634061932564, "learning_rate": 0.00019530174159578778, "loss": 0.3085, "step": 4822 }, { "epoch": 0.3907161373946857, "grad_norm": 0.03308749943971634, "learning_rate": 0.0001953422438234103, "loss": 0.3569, "step": 4823 }, { "epoch": 0.39079714841218405, "grad_norm": 0.03331739082932472, "learning_rate": 0.00019538274605103282, "loss": 0.4018, "step": 4824 }, { "epoch": 0.39087815942968246, "grad_norm": 0.030000533908605576, "learning_rate": 0.00019542324827865535, "loss": 0.375, "step": 4825 }, { "epoch": 0.3909591704471808, "grad_norm": 0.03186730295419693, "learning_rate": 0.00019546375050627786, "loss": 0.3682, "step": 4826 }, { "epoch": 0.39104018146467917, "grad_norm": 0.030771762132644653, "learning_rate": 0.0001955042527339004, "loss": 0.4015, "step": 4827 }, { "epoch": 0.3911211924821776, "grad_norm": 0.031149568036198616, "learning_rate": 0.0001955447549615229, "loss": 0.38, "step": 4828 }, { "epoch": 0.39120220349967594, "grad_norm": 0.030003225430846214, "learning_rate": 0.00019558525718914543, "loss": 0.3638, "step": 4829 }, { "epoch": 0.39128321451717435, "grad_norm": 0.03363025560975075, "learning_rate": 0.00019562575941676794, "loss": 0.3597, "step": 4830 }, { "epoch": 0.3913642255346727, "grad_norm": 0.03597572073340416, "learning_rate": 0.00019566626164439045, "loss": 0.375, "step": 4831 }, { "epoch": 0.3914452365521711, "grad_norm": 0.03270312398672104, "learning_rate": 0.00019570676387201298, "loss": 0.3947, "step": 4832 }, { "epoch": 0.39152624756966947, "grad_norm": 0.03423533961176872, "learning_rate": 0.00019574726609963549, "loss": 0.3833, "step": 4833 }, { "epoch": 0.39160725858716783, "grad_norm": 0.03765803575515747, "learning_rate": 0.000195787768327258, "loss": 0.3912, "step": 4834 }, { "epoch": 0.39168826960466624, "grad_norm": 0.03126109763979912, "learning_rate": 0.00019582827055488052, "loss": 0.3802, "step": 4835 }, { "epoch": 0.3917692806221646, "grad_norm": 0.029166357591748238, "learning_rate": 0.00019586877278250303, "loss": 0.3504, "step": 4836 }, { "epoch": 0.391850291639663, "grad_norm": 0.042333848774433136, "learning_rate": 0.00019590927501012556, "loss": 0.3482, "step": 4837 }, { "epoch": 0.39193130265716136, "grad_norm": 0.03157378360629082, "learning_rate": 0.00019594977723774807, "loss": 0.3392, "step": 4838 }, { "epoch": 0.3920123136746598, "grad_norm": 0.03886628523468971, "learning_rate": 0.0001959902794653706, "loss": 0.407, "step": 4839 }, { "epoch": 0.39209332469215813, "grad_norm": 0.0372137650847435, "learning_rate": 0.0001960307816929931, "loss": 0.3892, "step": 4840 }, { "epoch": 0.39217433570965654, "grad_norm": 0.03133906424045563, "learning_rate": 0.00019607128392061564, "loss": 0.3363, "step": 4841 }, { "epoch": 0.3922553467271549, "grad_norm": 0.031031839549541473, "learning_rate": 0.00019611178614823815, "loss": 0.3423, "step": 4842 }, { "epoch": 0.39233635774465325, "grad_norm": 0.031718261539936066, "learning_rate": 0.00019615228837586068, "loss": 0.3545, "step": 4843 }, { "epoch": 0.39241736876215166, "grad_norm": 0.03188610449433327, "learning_rate": 0.00019619279060348321, "loss": 0.3426, "step": 4844 }, { "epoch": 0.39249837977965, "grad_norm": 0.039292700588703156, "learning_rate": 0.00019623329283110572, "loss": 0.3579, "step": 4845 }, { "epoch": 0.39257939079714843, "grad_norm": 0.03364202752709389, "learning_rate": 0.00019627379505872825, "loss": 0.4005, "step": 4846 }, { "epoch": 0.3926604018146468, "grad_norm": 0.03354499489068985, "learning_rate": 0.00019631429728635076, "loss": 0.3804, "step": 4847 }, { "epoch": 0.3927414128321452, "grad_norm": 0.032901547849178314, "learning_rate": 0.0001963547995139733, "loss": 0.3656, "step": 4848 }, { "epoch": 0.39282242384964355, "grad_norm": 0.042210184037685394, "learning_rate": 0.0001963953017415958, "loss": 0.3377, "step": 4849 }, { "epoch": 0.3929034348671419, "grad_norm": 0.034993741661310196, "learning_rate": 0.00019643580396921833, "loss": 0.3423, "step": 4850 }, { "epoch": 0.3929844458846403, "grad_norm": 0.03247562050819397, "learning_rate": 0.00019647630619684084, "loss": 0.3361, "step": 4851 }, { "epoch": 0.3930654569021387, "grad_norm": 0.0300375297665596, "learning_rate": 0.00019651680842446337, "loss": 0.3598, "step": 4852 }, { "epoch": 0.3931464679196371, "grad_norm": 0.037401050329208374, "learning_rate": 0.00019655731065208588, "loss": 0.432, "step": 4853 }, { "epoch": 0.39322747893713544, "grad_norm": 0.02842889353632927, "learning_rate": 0.0001965978128797084, "loss": 0.388, "step": 4854 }, { "epoch": 0.39330848995463386, "grad_norm": 0.0326065756380558, "learning_rate": 0.00019663831510733092, "loss": 0.3725, "step": 4855 }, { "epoch": 0.3933895009721322, "grad_norm": 0.02978591062128544, "learning_rate": 0.00019667881733495342, "loss": 0.3362, "step": 4856 }, { "epoch": 0.39347051198963057, "grad_norm": 0.03818797692656517, "learning_rate": 0.00019671931956257593, "loss": 0.3322, "step": 4857 }, { "epoch": 0.393551523007129, "grad_norm": 0.03465355560183525, "learning_rate": 0.00019675982179019846, "loss": 0.3494, "step": 4858 }, { "epoch": 0.39363253402462733, "grad_norm": 0.03535063937306404, "learning_rate": 0.00019680032401782097, "loss": 0.3486, "step": 4859 }, { "epoch": 0.39371354504212575, "grad_norm": 0.03481747955083847, "learning_rate": 0.0001968408262454435, "loss": 0.3884, "step": 4860 }, { "epoch": 0.3937945560596241, "grad_norm": 0.03376191481947899, "learning_rate": 0.000196881328473066, "loss": 0.3584, "step": 4861 }, { "epoch": 0.3938755670771225, "grad_norm": 0.03966887295246124, "learning_rate": 0.00019692183070068854, "loss": 0.3825, "step": 4862 }, { "epoch": 0.39395657809462087, "grad_norm": 0.03059571236371994, "learning_rate": 0.00019696233292831108, "loss": 0.3963, "step": 4863 }, { "epoch": 0.3940375891121192, "grad_norm": 0.03555373474955559, "learning_rate": 0.00019700283515593358, "loss": 0.3657, "step": 4864 }, { "epoch": 0.39411860012961764, "grad_norm": 0.034829266369342804, "learning_rate": 0.00019704333738355611, "loss": 0.3778, "step": 4865 }, { "epoch": 0.394199611147116, "grad_norm": 0.04059548303484917, "learning_rate": 0.00019708383961117862, "loss": 0.3893, "step": 4866 }, { "epoch": 0.3942806221646144, "grad_norm": 0.03126801922917366, "learning_rate": 0.00019712434183880115, "loss": 0.3812, "step": 4867 }, { "epoch": 0.39436163318211276, "grad_norm": 0.04134489223361015, "learning_rate": 0.00019716484406642366, "loss": 0.4021, "step": 4868 }, { "epoch": 0.39444264419961117, "grad_norm": 0.03955182060599327, "learning_rate": 0.0001972053462940462, "loss": 0.3436, "step": 4869 }, { "epoch": 0.3945236552171095, "grad_norm": 0.04063299670815468, "learning_rate": 0.0001972458485216687, "loss": 0.4133, "step": 4870 }, { "epoch": 0.3946046662346079, "grad_norm": 0.03148097172379494, "learning_rate": 0.00019728635074929123, "loss": 0.3633, "step": 4871 }, { "epoch": 0.3946856772521063, "grad_norm": 0.03044656105339527, "learning_rate": 0.00019732685297691374, "loss": 0.3714, "step": 4872 }, { "epoch": 0.39476668826960465, "grad_norm": 0.032794658094644547, "learning_rate": 0.00019736735520453627, "loss": 0.3534, "step": 4873 }, { "epoch": 0.39484769928710306, "grad_norm": 0.033350620418787, "learning_rate": 0.00019740785743215878, "loss": 0.401, "step": 4874 }, { "epoch": 0.3949287103046014, "grad_norm": 0.030544232577085495, "learning_rate": 0.0001974483596597813, "loss": 0.3758, "step": 4875 }, { "epoch": 0.3950097213220998, "grad_norm": 0.03313911333680153, "learning_rate": 0.00019748886188740382, "loss": 0.3789, "step": 4876 }, { "epoch": 0.3950907323395982, "grad_norm": 0.02834252268075943, "learning_rate": 0.00019752936411502635, "loss": 0.3096, "step": 4877 }, { "epoch": 0.39517174335709654, "grad_norm": 0.03375416249036789, "learning_rate": 0.00019756986634264886, "loss": 0.3144, "step": 4878 }, { "epoch": 0.39525275437459495, "grad_norm": 0.033389367163181305, "learning_rate": 0.00019761036857027136, "loss": 0.3339, "step": 4879 }, { "epoch": 0.3953337653920933, "grad_norm": 0.03308767452836037, "learning_rate": 0.0001976508707978939, "loss": 0.3632, "step": 4880 }, { "epoch": 0.3954147764095917, "grad_norm": 0.03741266205906868, "learning_rate": 0.0001976913730255164, "loss": 0.375, "step": 4881 }, { "epoch": 0.3954957874270901, "grad_norm": 0.03020811639726162, "learning_rate": 0.00019773187525313894, "loss": 0.357, "step": 4882 }, { "epoch": 0.3955767984445885, "grad_norm": 0.042745232582092285, "learning_rate": 0.00019777237748076144, "loss": 0.3791, "step": 4883 }, { "epoch": 0.39565780946208684, "grad_norm": 0.03163204714655876, "learning_rate": 0.00019781287970838397, "loss": 0.3984, "step": 4884 }, { "epoch": 0.3957388204795852, "grad_norm": 0.037180181592702866, "learning_rate": 0.00019785338193600648, "loss": 0.3619, "step": 4885 }, { "epoch": 0.3958198314970836, "grad_norm": 0.033253274857997894, "learning_rate": 0.00019789388416362901, "loss": 0.3699, "step": 4886 }, { "epoch": 0.39590084251458196, "grad_norm": 0.03679104149341583, "learning_rate": 0.00019793438639125152, "loss": 0.3738, "step": 4887 }, { "epoch": 0.3959818535320804, "grad_norm": 0.033668775111436844, "learning_rate": 0.00019797488861887405, "loss": 0.3771, "step": 4888 }, { "epoch": 0.39606286454957873, "grad_norm": 0.03718390315771103, "learning_rate": 0.00019801539084649656, "loss": 0.3656, "step": 4889 }, { "epoch": 0.39614387556707714, "grad_norm": 0.03134991228580475, "learning_rate": 0.0001980558930741191, "loss": 0.369, "step": 4890 }, { "epoch": 0.3962248865845755, "grad_norm": 0.029683059081435204, "learning_rate": 0.0001980963953017416, "loss": 0.3571, "step": 4891 }, { "epoch": 0.3963058976020739, "grad_norm": 0.03865545615553856, "learning_rate": 0.00019813689752936413, "loss": 0.3787, "step": 4892 }, { "epoch": 0.39638690861957226, "grad_norm": 0.031773313879966736, "learning_rate": 0.00019817739975698664, "loss": 0.3539, "step": 4893 }, { "epoch": 0.3964679196370706, "grad_norm": 0.03601973503828049, "learning_rate": 0.00019821790198460917, "loss": 0.3613, "step": 4894 }, { "epoch": 0.39654893065456903, "grad_norm": 0.034716833382844925, "learning_rate": 0.00019825840421223168, "loss": 0.3715, "step": 4895 }, { "epoch": 0.3966299416720674, "grad_norm": 0.036405500024557114, "learning_rate": 0.0001982989064398542, "loss": 0.3478, "step": 4896 }, { "epoch": 0.3967109526895658, "grad_norm": 0.0369153767824173, "learning_rate": 0.00019833940866747672, "loss": 0.402, "step": 4897 }, { "epoch": 0.39679196370706415, "grad_norm": 0.03298967704176903, "learning_rate": 0.00019837991089509925, "loss": 0.3493, "step": 4898 }, { "epoch": 0.39687297472456257, "grad_norm": 0.0389755517244339, "learning_rate": 0.00019842041312272176, "loss": 0.3571, "step": 4899 }, { "epoch": 0.3969539857420609, "grad_norm": 0.030207008123397827, "learning_rate": 0.0001984609153503443, "loss": 0.3154, "step": 4900 }, { "epoch": 0.3970349967595593, "grad_norm": 0.032016366720199585, "learning_rate": 0.0001985014175779668, "loss": 0.386, "step": 4901 }, { "epoch": 0.3971160077770577, "grad_norm": 0.03134904429316521, "learning_rate": 0.00019854191980558933, "loss": 0.3251, "step": 4902 }, { "epoch": 0.39719701879455604, "grad_norm": 0.048683155328035355, "learning_rate": 0.00019858242203321183, "loss": 0.3897, "step": 4903 }, { "epoch": 0.39727802981205446, "grad_norm": 0.0321296788752079, "learning_rate": 0.00019862292426083434, "loss": 0.3607, "step": 4904 }, { "epoch": 0.3973590408295528, "grad_norm": 0.03436422720551491, "learning_rate": 0.00019866342648845687, "loss": 0.344, "step": 4905 }, { "epoch": 0.3974400518470512, "grad_norm": 0.03357069194316864, "learning_rate": 0.00019870392871607938, "loss": 0.3771, "step": 4906 }, { "epoch": 0.3975210628645496, "grad_norm": 0.035833247005939484, "learning_rate": 0.0001987444309437019, "loss": 0.3856, "step": 4907 }, { "epoch": 0.39760207388204793, "grad_norm": 0.032385412603616714, "learning_rate": 0.00019878493317132442, "loss": 0.3829, "step": 4908 }, { "epoch": 0.39768308489954635, "grad_norm": 0.05283757671713829, "learning_rate": 0.00019882543539894695, "loss": 0.4404, "step": 4909 }, { "epoch": 0.3977640959170447, "grad_norm": 0.03171170875430107, "learning_rate": 0.00019886593762656946, "loss": 0.314, "step": 4910 }, { "epoch": 0.3978451069345431, "grad_norm": 0.04072034731507301, "learning_rate": 0.000198906439854192, "loss": 0.378, "step": 4911 }, { "epoch": 0.39792611795204147, "grad_norm": 0.03641786426305771, "learning_rate": 0.0001989469420818145, "loss": 0.3439, "step": 4912 }, { "epoch": 0.3980071289695399, "grad_norm": 0.03854703903198242, "learning_rate": 0.00019898744430943703, "loss": 0.3843, "step": 4913 }, { "epoch": 0.39808813998703824, "grad_norm": 0.031505096703767776, "learning_rate": 0.00019902794653705954, "loss": 0.3507, "step": 4914 }, { "epoch": 0.3981691510045366, "grad_norm": 0.03177189454436302, "learning_rate": 0.00019906844876468207, "loss": 0.3834, "step": 4915 }, { "epoch": 0.398250162022035, "grad_norm": 0.03535529226064682, "learning_rate": 0.00019910895099230458, "loss": 0.3542, "step": 4916 }, { "epoch": 0.39833117303953336, "grad_norm": 0.029137831181287766, "learning_rate": 0.0001991494532199271, "loss": 0.3283, "step": 4917 }, { "epoch": 0.39841218405703177, "grad_norm": 0.032600197941064835, "learning_rate": 0.00019918995544754962, "loss": 0.4159, "step": 4918 }, { "epoch": 0.3984931950745301, "grad_norm": 0.03417445346713066, "learning_rate": 0.00019923045767517215, "loss": 0.3524, "step": 4919 }, { "epoch": 0.39857420609202854, "grad_norm": 0.037261370569467545, "learning_rate": 0.00019927095990279468, "loss": 0.3115, "step": 4920 }, { "epoch": 0.3986552171095269, "grad_norm": 0.036581240594387054, "learning_rate": 0.0001993114621304172, "loss": 0.3512, "step": 4921 }, { "epoch": 0.39873622812702525, "grad_norm": 0.03378806263208389, "learning_rate": 0.00019935196435803972, "loss": 0.3433, "step": 4922 }, { "epoch": 0.39881723914452366, "grad_norm": 0.0315818227827549, "learning_rate": 0.00019939246658566223, "loss": 0.348, "step": 4923 }, { "epoch": 0.398898250162022, "grad_norm": 0.03489783778786659, "learning_rate": 0.00019943296881328476, "loss": 0.3581, "step": 4924 }, { "epoch": 0.39897926117952043, "grad_norm": 0.030317939817905426, "learning_rate": 0.00019947347104090727, "loss": 0.3354, "step": 4925 }, { "epoch": 0.3990602721970188, "grad_norm": 0.03686142712831497, "learning_rate": 0.00019951397326852977, "loss": 0.3672, "step": 4926 }, { "epoch": 0.3991412832145172, "grad_norm": 0.028810808435082436, "learning_rate": 0.0001995544754961523, "loss": 0.3068, "step": 4927 }, { "epoch": 0.39922229423201555, "grad_norm": 0.043715715408325195, "learning_rate": 0.0001995949777237748, "loss": 0.394, "step": 4928 }, { "epoch": 0.3993033052495139, "grad_norm": 0.034423165023326874, "learning_rate": 0.00019963547995139732, "loss": 0.3658, "step": 4929 }, { "epoch": 0.3993843162670123, "grad_norm": 0.03472477197647095, "learning_rate": 0.00019967598217901985, "loss": 0.3751, "step": 4930 }, { "epoch": 0.3994653272845107, "grad_norm": 0.03477395325899124, "learning_rate": 0.00019971648440664236, "loss": 0.3521, "step": 4931 }, { "epoch": 0.3995463383020091, "grad_norm": 0.03423608839511871, "learning_rate": 0.0001997569866342649, "loss": 0.366, "step": 4932 }, { "epoch": 0.39962734931950744, "grad_norm": 0.030516432598233223, "learning_rate": 0.0001997974888618874, "loss": 0.3782, "step": 4933 }, { "epoch": 0.39970836033700585, "grad_norm": 0.041002050042152405, "learning_rate": 0.00019983799108950993, "loss": 0.3048, "step": 4934 }, { "epoch": 0.3997893713545042, "grad_norm": 0.03749099746346474, "learning_rate": 0.00019987849331713244, "loss": 0.3872, "step": 4935 }, { "epoch": 0.3998703823720026, "grad_norm": 0.031427595764398575, "learning_rate": 0.00019991899554475497, "loss": 0.3599, "step": 4936 }, { "epoch": 0.399951393389501, "grad_norm": 0.027828801423311234, "learning_rate": 0.00019995949777237748, "loss": 0.3048, "step": 4937 }, { "epoch": 0.40003240440699933, "grad_norm": 0.03560652583837509, "learning_rate": 0.0002, "loss": 0.3507, "step": 4938 }, { "epoch": 0.40011341542449774, "grad_norm": 0.029362550005316734, "learning_rate": 0.00019999549934740537, "loss": 0.3621, "step": 4939 }, { "epoch": 0.4001944264419961, "grad_norm": 0.03609123453497887, "learning_rate": 0.00019999099869481076, "loss": 0.3837, "step": 4940 }, { "epoch": 0.4002754374594945, "grad_norm": 0.03292112052440643, "learning_rate": 0.00019998649804221614, "loss": 0.3704, "step": 4941 }, { "epoch": 0.40035644847699287, "grad_norm": 0.03446466848254204, "learning_rate": 0.0001999819973896215, "loss": 0.3422, "step": 4942 }, { "epoch": 0.4004374594944913, "grad_norm": 0.0325990729033947, "learning_rate": 0.0001999774967370269, "loss": 0.3621, "step": 4943 }, { "epoch": 0.40051847051198963, "grad_norm": 0.0336809903383255, "learning_rate": 0.00019997299608443225, "loss": 0.3424, "step": 4944 }, { "epoch": 0.400599481529488, "grad_norm": 0.034309420734643936, "learning_rate": 0.0001999684954318376, "loss": 0.3742, "step": 4945 }, { "epoch": 0.4006804925469864, "grad_norm": 0.034966353327035904, "learning_rate": 0.000199963994779243, "loss": 0.331, "step": 4946 }, { "epoch": 0.40076150356448476, "grad_norm": 0.03615720197558403, "learning_rate": 0.00019995949412664839, "loss": 0.4266, "step": 4947 }, { "epoch": 0.40084251458198317, "grad_norm": 0.03168513625860214, "learning_rate": 0.00019995499347405375, "loss": 0.3954, "step": 4948 }, { "epoch": 0.4009235255994815, "grad_norm": 0.031109081581234932, "learning_rate": 0.00019995049282145913, "loss": 0.3868, "step": 4949 }, { "epoch": 0.40100453661697993, "grad_norm": 0.03687392175197601, "learning_rate": 0.0001999459921688645, "loss": 0.3249, "step": 4950 }, { "epoch": 0.4010855476344783, "grad_norm": 0.033434923738241196, "learning_rate": 0.00019994149151626985, "loss": 0.339, "step": 4951 }, { "epoch": 0.40116655865197665, "grad_norm": 0.03710939735174179, "learning_rate": 0.00019993699086367524, "loss": 0.3879, "step": 4952 }, { "epoch": 0.40124756966947506, "grad_norm": 0.03508399426937103, "learning_rate": 0.00019993249021108063, "loss": 0.3097, "step": 4953 }, { "epoch": 0.4013285806869734, "grad_norm": 0.035740409046411514, "learning_rate": 0.000199927989558486, "loss": 0.3372, "step": 4954 }, { "epoch": 0.4014095917044718, "grad_norm": 0.03336473926901817, "learning_rate": 0.00019992348890589137, "loss": 0.4151, "step": 4955 }, { "epoch": 0.4014906027219702, "grad_norm": 0.03121289797127247, "learning_rate": 0.00019991898825329673, "loss": 0.3833, "step": 4956 }, { "epoch": 0.4015716137394686, "grad_norm": 0.028473064303398132, "learning_rate": 0.0001999144876007021, "loss": 0.3705, "step": 4957 }, { "epoch": 0.40165262475696695, "grad_norm": 0.03372488543391228, "learning_rate": 0.00019990998694810748, "loss": 0.3342, "step": 4958 }, { "epoch": 0.4017336357744653, "grad_norm": 0.049180999398231506, "learning_rate": 0.00019990548629551287, "loss": 0.4041, "step": 4959 }, { "epoch": 0.4018146467919637, "grad_norm": 0.03879746422171593, "learning_rate": 0.00019990098564291823, "loss": 0.3849, "step": 4960 }, { "epoch": 0.40189565780946207, "grad_norm": 0.037787068635225296, "learning_rate": 0.00019989648499032362, "loss": 0.389, "step": 4961 }, { "epoch": 0.4019766688269605, "grad_norm": 0.03139461576938629, "learning_rate": 0.00019989198433772898, "loss": 0.3873, "step": 4962 }, { "epoch": 0.40205767984445884, "grad_norm": 0.02805621363222599, "learning_rate": 0.00019988748368513434, "loss": 0.3795, "step": 4963 }, { "epoch": 0.40213869086195725, "grad_norm": 0.03285028785467148, "learning_rate": 0.00019988298303253972, "loss": 0.39, "step": 4964 }, { "epoch": 0.4022197018794556, "grad_norm": 0.03046387806534767, "learning_rate": 0.0001998784823799451, "loss": 0.3732, "step": 4965 }, { "epoch": 0.40230071289695396, "grad_norm": 0.0408925786614418, "learning_rate": 0.00019987398172735047, "loss": 0.3773, "step": 4966 }, { "epoch": 0.40238172391445237, "grad_norm": 0.03856538608670235, "learning_rate": 0.00019986948107475586, "loss": 0.3739, "step": 4967 }, { "epoch": 0.4024627349319507, "grad_norm": 0.032258667051792145, "learning_rate": 0.00019986498042216122, "loss": 0.3399, "step": 4968 }, { "epoch": 0.40254374594944914, "grad_norm": 0.03434364125132561, "learning_rate": 0.00019986047976956658, "loss": 0.3555, "step": 4969 }, { "epoch": 0.4026247569669475, "grad_norm": 0.030233683064579964, "learning_rate": 0.000199855979116972, "loss": 0.4375, "step": 4970 }, { "epoch": 0.4027057679844459, "grad_norm": 0.03484868258237839, "learning_rate": 0.00019985147846437735, "loss": 0.3555, "step": 4971 }, { "epoch": 0.40278677900194426, "grad_norm": 0.03328513354063034, "learning_rate": 0.0001998469778117827, "loss": 0.3359, "step": 4972 }, { "epoch": 0.4028677900194426, "grad_norm": 0.03617887571454048, "learning_rate": 0.0001998424771591881, "loss": 0.3908, "step": 4973 }, { "epoch": 0.40294880103694103, "grad_norm": 0.03129265829920769, "learning_rate": 0.00019983797650659346, "loss": 0.3506, "step": 4974 }, { "epoch": 0.4030298120544394, "grad_norm": 0.030116554349660873, "learning_rate": 0.00019983347585399882, "loss": 0.3632, "step": 4975 }, { "epoch": 0.4031108230719378, "grad_norm": 0.03023291938006878, "learning_rate": 0.00019982897520140423, "loss": 0.3086, "step": 4976 }, { "epoch": 0.40319183408943615, "grad_norm": 0.026817413046956062, "learning_rate": 0.0001998244745488096, "loss": 0.3218, "step": 4977 }, { "epoch": 0.40327284510693456, "grad_norm": 0.03046896867454052, "learning_rate": 0.00019981997389621495, "loss": 0.3745, "step": 4978 }, { "epoch": 0.4033538561244329, "grad_norm": 0.02875666506588459, "learning_rate": 0.00019981547324362034, "loss": 0.3823, "step": 4979 }, { "epoch": 0.40343486714193133, "grad_norm": 0.03567210212349892, "learning_rate": 0.0001998109725910257, "loss": 0.3305, "step": 4980 }, { "epoch": 0.4035158781594297, "grad_norm": 0.039444200694561005, "learning_rate": 0.00019980647193843106, "loss": 0.3671, "step": 4981 }, { "epoch": 0.40359688917692804, "grad_norm": 0.03511333838105202, "learning_rate": 0.00019980197128583648, "loss": 0.339, "step": 4982 }, { "epoch": 0.40367790019442645, "grad_norm": 0.03776915743947029, "learning_rate": 0.00019979747063324184, "loss": 0.4184, "step": 4983 }, { "epoch": 0.4037589112119248, "grad_norm": 0.03581022098660469, "learning_rate": 0.0001997929699806472, "loss": 0.3786, "step": 4984 }, { "epoch": 0.4038399222294232, "grad_norm": 0.03828386217355728, "learning_rate": 0.00019978846932805258, "loss": 0.3468, "step": 4985 }, { "epoch": 0.4039209332469216, "grad_norm": 0.02726336009800434, "learning_rate": 0.00019978396867545794, "loss": 0.368, "step": 4986 }, { "epoch": 0.40400194426442, "grad_norm": 0.02940969169139862, "learning_rate": 0.0001997794680228633, "loss": 0.3332, "step": 4987 }, { "epoch": 0.40408295528191834, "grad_norm": 0.0344647541642189, "learning_rate": 0.00019977496737026872, "loss": 0.3311, "step": 4988 }, { "epoch": 0.4041639662994167, "grad_norm": 0.037162743508815765, "learning_rate": 0.00019977046671767408, "loss": 0.3748, "step": 4989 }, { "epoch": 0.4042449773169151, "grad_norm": 0.03121703863143921, "learning_rate": 0.00019976596606507944, "loss": 0.3355, "step": 4990 }, { "epoch": 0.40432598833441347, "grad_norm": 0.033019792288541794, "learning_rate": 0.00019976146541248482, "loss": 0.3221, "step": 4991 }, { "epoch": 0.4044069993519119, "grad_norm": 0.03607960045337677, "learning_rate": 0.00019975696475989018, "loss": 0.3599, "step": 4992 }, { "epoch": 0.40448801036941023, "grad_norm": 0.03203876316547394, "learning_rate": 0.00019975246410729557, "loss": 0.4015, "step": 4993 }, { "epoch": 0.40456902138690864, "grad_norm": 0.031239103525877, "learning_rate": 0.00019974796345470096, "loss": 0.3448, "step": 4994 }, { "epoch": 0.404650032404407, "grad_norm": 0.04195361211895943, "learning_rate": 0.00019974346280210632, "loss": 0.4139, "step": 4995 }, { "epoch": 0.40473104342190536, "grad_norm": 0.042518921196460724, "learning_rate": 0.00019973896214951168, "loss": 0.4325, "step": 4996 }, { "epoch": 0.40481205443940377, "grad_norm": 0.027694914489984512, "learning_rate": 0.00019973446149691707, "loss": 0.3278, "step": 4997 }, { "epoch": 0.4048930654569021, "grad_norm": 0.027236877009272575, "learning_rate": 0.00019972996084432243, "loss": 0.3331, "step": 4998 }, { "epoch": 0.40497407647440054, "grad_norm": 0.030211325734853745, "learning_rate": 0.0001997254601917278, "loss": 0.3351, "step": 4999 }, { "epoch": 0.4050550874918989, "grad_norm": 0.03629770874977112, "learning_rate": 0.0001997209595391332, "loss": 0.3644, "step": 5000 }, { "epoch": 0.4051360985093973, "grad_norm": 0.033565860241651535, "learning_rate": 0.00019971645888653856, "loss": 0.3223, "step": 5001 }, { "epoch": 0.40521710952689566, "grad_norm": 0.030630042776465416, "learning_rate": 0.00019971195823394392, "loss": 0.3548, "step": 5002 }, { "epoch": 0.405298120544394, "grad_norm": 0.028197648003697395, "learning_rate": 0.0001997074575813493, "loss": 0.3707, "step": 5003 }, { "epoch": 0.4053791315618924, "grad_norm": 0.03611261025071144, "learning_rate": 0.00019970295692875467, "loss": 0.3407, "step": 5004 }, { "epoch": 0.4054601425793908, "grad_norm": 0.031974319368600845, "learning_rate": 0.00019969845627616005, "loss": 0.3836, "step": 5005 }, { "epoch": 0.4055411535968892, "grad_norm": 0.030820896849036217, "learning_rate": 0.00019969395562356544, "loss": 0.3428, "step": 5006 }, { "epoch": 0.40562216461438755, "grad_norm": 0.030217768624424934, "learning_rate": 0.0001996894549709708, "loss": 0.3382, "step": 5007 }, { "epoch": 0.40570317563188596, "grad_norm": 0.031406279653310776, "learning_rate": 0.00019968495431837616, "loss": 0.3348, "step": 5008 }, { "epoch": 0.4057841866493843, "grad_norm": 0.039374373853206635, "learning_rate": 0.00019968045366578155, "loss": 0.3772, "step": 5009 }, { "epoch": 0.40586519766688267, "grad_norm": 0.030828993767499924, "learning_rate": 0.0001996759530131869, "loss": 0.3263, "step": 5010 }, { "epoch": 0.4059462086843811, "grad_norm": 0.035990871489048004, "learning_rate": 0.0001996714523605923, "loss": 0.3575, "step": 5011 }, { "epoch": 0.40602721970187944, "grad_norm": 0.034174595028162, "learning_rate": 0.00019966695170799768, "loss": 0.33, "step": 5012 }, { "epoch": 0.40610823071937785, "grad_norm": 0.029588427394628525, "learning_rate": 0.00019966245105540304, "loss": 0.3219, "step": 5013 }, { "epoch": 0.4061892417368762, "grad_norm": 0.030147623270750046, "learning_rate": 0.0001996579504028084, "loss": 0.2707, "step": 5014 }, { "epoch": 0.4062702527543746, "grad_norm": 0.035346563905477524, "learning_rate": 0.0001996534497502138, "loss": 0.3642, "step": 5015 }, { "epoch": 0.406351263771873, "grad_norm": 0.03422393649816513, "learning_rate": 0.00019964894909761915, "loss": 0.3599, "step": 5016 }, { "epoch": 0.40643227478937133, "grad_norm": 0.03663076460361481, "learning_rate": 0.00019964444844502454, "loss": 0.3705, "step": 5017 }, { "epoch": 0.40651328580686974, "grad_norm": 0.03565813973546028, "learning_rate": 0.00019963994779242992, "loss": 0.4063, "step": 5018 }, { "epoch": 0.4065942968243681, "grad_norm": 0.030144767835736275, "learning_rate": 0.00019963544713983529, "loss": 0.2991, "step": 5019 }, { "epoch": 0.4066753078418665, "grad_norm": 0.030031368136405945, "learning_rate": 0.00019963094648724065, "loss": 0.3289, "step": 5020 }, { "epoch": 0.40675631885936486, "grad_norm": 0.02851344645023346, "learning_rate": 0.00019962644583464603, "loss": 0.3241, "step": 5021 }, { "epoch": 0.4068373298768633, "grad_norm": 0.03419654816389084, "learning_rate": 0.00019962194518205142, "loss": 0.3518, "step": 5022 }, { "epoch": 0.40691834089436163, "grad_norm": 0.02915254980325699, "learning_rate": 0.00019961744452945678, "loss": 0.3757, "step": 5023 }, { "epoch": 0.40699935191186, "grad_norm": 0.030751507729291916, "learning_rate": 0.00019961294387686217, "loss": 0.314, "step": 5024 }, { "epoch": 0.4070803629293584, "grad_norm": 0.032417912036180496, "learning_rate": 0.00019960844322426753, "loss": 0.3757, "step": 5025 }, { "epoch": 0.40716137394685675, "grad_norm": 0.03586619719862938, "learning_rate": 0.0001996039425716729, "loss": 0.3446, "step": 5026 }, { "epoch": 0.40724238496435516, "grad_norm": 0.028922390192747116, "learning_rate": 0.00019959944191907827, "loss": 0.3286, "step": 5027 }, { "epoch": 0.4073233959818535, "grad_norm": 0.029639948159456253, "learning_rate": 0.00019959494126648366, "loss": 0.3598, "step": 5028 }, { "epoch": 0.40740440699935193, "grad_norm": 0.03211815655231476, "learning_rate": 0.00019959044061388902, "loss": 0.3573, "step": 5029 }, { "epoch": 0.4074854180168503, "grad_norm": 0.03042125515639782, "learning_rate": 0.0001995859399612944, "loss": 0.3311, "step": 5030 }, { "epoch": 0.4075664290343487, "grad_norm": 0.03370456397533417, "learning_rate": 0.00019958143930869977, "loss": 0.3613, "step": 5031 }, { "epoch": 0.40764744005184705, "grad_norm": 0.03500419110059738, "learning_rate": 0.00019957693865610513, "loss": 0.3668, "step": 5032 }, { "epoch": 0.4077284510693454, "grad_norm": 0.031201496720314026, "learning_rate": 0.00019957243800351052, "loss": 0.3271, "step": 5033 }, { "epoch": 0.4078094620868438, "grad_norm": 0.03223037347197533, "learning_rate": 0.0001995679373509159, "loss": 0.3222, "step": 5034 }, { "epoch": 0.4078904731043422, "grad_norm": 0.032849233597517014, "learning_rate": 0.00019956343669832126, "loss": 0.3859, "step": 5035 }, { "epoch": 0.4079714841218406, "grad_norm": 0.031805459409952164, "learning_rate": 0.00019955893604572665, "loss": 0.3616, "step": 5036 }, { "epoch": 0.40805249513933894, "grad_norm": 0.030916647985577583, "learning_rate": 0.000199554435393132, "loss": 0.3589, "step": 5037 }, { "epoch": 0.40813350615683736, "grad_norm": 0.03671705722808838, "learning_rate": 0.00019954993474053737, "loss": 0.3622, "step": 5038 }, { "epoch": 0.4082145171743357, "grad_norm": 0.032508958131074905, "learning_rate": 0.00019954543408794276, "loss": 0.3363, "step": 5039 }, { "epoch": 0.40829552819183407, "grad_norm": 0.03839171677827835, "learning_rate": 0.00019954093343534814, "loss": 0.4027, "step": 5040 }, { "epoch": 0.4083765392093325, "grad_norm": 0.03205114230513573, "learning_rate": 0.0001995364327827535, "loss": 0.3773, "step": 5041 }, { "epoch": 0.40845755022683083, "grad_norm": 0.03220202773809433, "learning_rate": 0.0001995319321301589, "loss": 0.3162, "step": 5042 }, { "epoch": 0.40853856124432925, "grad_norm": 0.035653892904520035, "learning_rate": 0.00019952743147756425, "loss": 0.3868, "step": 5043 }, { "epoch": 0.4086195722618276, "grad_norm": 0.03405566141009331, "learning_rate": 0.0001995229308249696, "loss": 0.393, "step": 5044 }, { "epoch": 0.408700583279326, "grad_norm": 0.03670091927051544, "learning_rate": 0.00019951843017237503, "loss": 0.393, "step": 5045 }, { "epoch": 0.40878159429682437, "grad_norm": 0.026954319328069687, "learning_rate": 0.00019951392951978039, "loss": 0.3394, "step": 5046 }, { "epoch": 0.4088626053143227, "grad_norm": 0.029613513499498367, "learning_rate": 0.00019950942886718575, "loss": 0.3559, "step": 5047 }, { "epoch": 0.40894361633182114, "grad_norm": 0.03285623714327812, "learning_rate": 0.00019950492821459113, "loss": 0.3565, "step": 5048 }, { "epoch": 0.4090246273493195, "grad_norm": 0.046118028461933136, "learning_rate": 0.0001995004275619965, "loss": 0.3902, "step": 5049 }, { "epoch": 0.4091056383668179, "grad_norm": 0.0518961064517498, "learning_rate": 0.00019949592690940185, "loss": 0.3158, "step": 5050 }, { "epoch": 0.40918664938431626, "grad_norm": 0.04242045804858208, "learning_rate": 0.00019949142625680727, "loss": 0.3933, "step": 5051 }, { "epoch": 0.40926766040181467, "grad_norm": 0.032364003360271454, "learning_rate": 0.00019948692560421263, "loss": 0.3962, "step": 5052 }, { "epoch": 0.409348671419313, "grad_norm": 0.0313120037317276, "learning_rate": 0.000199482424951618, "loss": 0.3484, "step": 5053 }, { "epoch": 0.4094296824368114, "grad_norm": 0.026692088693380356, "learning_rate": 0.00019947792429902337, "loss": 0.3498, "step": 5054 }, { "epoch": 0.4095106934543098, "grad_norm": 0.03651599958539009, "learning_rate": 0.00019947342364642873, "loss": 0.3543, "step": 5055 }, { "epoch": 0.40959170447180815, "grad_norm": 0.036102522164583206, "learning_rate": 0.0001994689229938341, "loss": 0.3928, "step": 5056 }, { "epoch": 0.40967271548930656, "grad_norm": 0.03446866199374199, "learning_rate": 0.0001994644223412395, "loss": 0.3483, "step": 5057 }, { "epoch": 0.4097537265068049, "grad_norm": 0.029560396447777748, "learning_rate": 0.00019945992168864487, "loss": 0.3484, "step": 5058 }, { "epoch": 0.4098347375243033, "grad_norm": 0.045518580824136734, "learning_rate": 0.00019945542103605023, "loss": 0.3436, "step": 5059 }, { "epoch": 0.4099157485418017, "grad_norm": 0.03489755094051361, "learning_rate": 0.00019945092038345562, "loss": 0.4013, "step": 5060 }, { "epoch": 0.40999675955930004, "grad_norm": 0.029958384111523628, "learning_rate": 0.00019944641973086098, "loss": 0.306, "step": 5061 }, { "epoch": 0.41007777057679845, "grad_norm": 0.03341478854417801, "learning_rate": 0.00019944191907826634, "loss": 0.3578, "step": 5062 }, { "epoch": 0.4101587815942968, "grad_norm": 0.03505893051624298, "learning_rate": 0.00019943741842567175, "loss": 0.3299, "step": 5063 }, { "epoch": 0.4102397926117952, "grad_norm": 0.03374209254980087, "learning_rate": 0.0001994329177730771, "loss": 0.3581, "step": 5064 }, { "epoch": 0.4103208036292936, "grad_norm": 0.029370833188295364, "learning_rate": 0.00019942841712048247, "loss": 0.3853, "step": 5065 }, { "epoch": 0.410401814646792, "grad_norm": 0.037649236619472504, "learning_rate": 0.00019942391646788786, "loss": 0.3855, "step": 5066 }, { "epoch": 0.41048282566429034, "grad_norm": 0.0331161729991436, "learning_rate": 0.00019941941581529322, "loss": 0.3959, "step": 5067 }, { "epoch": 0.4105638366817887, "grad_norm": 0.028420720249414444, "learning_rate": 0.00019941491516269858, "loss": 0.3214, "step": 5068 }, { "epoch": 0.4106448476992871, "grad_norm": 0.028488215059041977, "learning_rate": 0.000199410414510104, "loss": 0.3575, "step": 5069 }, { "epoch": 0.41072585871678546, "grad_norm": 0.03867751732468605, "learning_rate": 0.00019940591385750935, "loss": 0.3763, "step": 5070 }, { "epoch": 0.4108068697342839, "grad_norm": 0.03423899784684181, "learning_rate": 0.0001994014132049147, "loss": 0.3847, "step": 5071 }, { "epoch": 0.41088788075178223, "grad_norm": 0.02935202606022358, "learning_rate": 0.0001993969125523201, "loss": 0.3605, "step": 5072 }, { "epoch": 0.41096889176928064, "grad_norm": 0.0323471873998642, "learning_rate": 0.00019939241189972546, "loss": 0.3426, "step": 5073 }, { "epoch": 0.411049902786779, "grad_norm": 0.03729783371090889, "learning_rate": 0.00019938791124713085, "loss": 0.353, "step": 5074 }, { "epoch": 0.4111309138042774, "grad_norm": 0.04616546258330345, "learning_rate": 0.00019938341059453623, "loss": 0.3813, "step": 5075 }, { "epoch": 0.41121192482177576, "grad_norm": 0.03555573895573616, "learning_rate": 0.0001993789099419416, "loss": 0.3102, "step": 5076 }, { "epoch": 0.4112929358392741, "grad_norm": 0.03330931067466736, "learning_rate": 0.00019937440928934695, "loss": 0.3645, "step": 5077 }, { "epoch": 0.41137394685677253, "grad_norm": 0.03230508789420128, "learning_rate": 0.00019936990863675234, "loss": 0.3492, "step": 5078 }, { "epoch": 0.4114549578742709, "grad_norm": 0.03547259420156479, "learning_rate": 0.0001993654079841577, "loss": 0.3662, "step": 5079 }, { "epoch": 0.4115359688917693, "grad_norm": 0.03292231634259224, "learning_rate": 0.0001993609073315631, "loss": 0.3901, "step": 5080 }, { "epoch": 0.41161697990926766, "grad_norm": 0.029186615720391273, "learning_rate": 0.00019935640667896848, "loss": 0.3055, "step": 5081 }, { "epoch": 0.41169799092676607, "grad_norm": 0.033885449171066284, "learning_rate": 0.00019935190602637384, "loss": 0.3886, "step": 5082 }, { "epoch": 0.4117790019442644, "grad_norm": 0.03691324219107628, "learning_rate": 0.0001993474053737792, "loss": 0.3715, "step": 5083 }, { "epoch": 0.4118600129617628, "grad_norm": 0.03496171906590462, "learning_rate": 0.00019934290472118458, "loss": 0.3795, "step": 5084 }, { "epoch": 0.4119410239792612, "grad_norm": 0.03149078041315079, "learning_rate": 0.00019933840406858994, "loss": 0.3449, "step": 5085 }, { "epoch": 0.41202203499675955, "grad_norm": 0.02938619628548622, "learning_rate": 0.00019933390341599533, "loss": 0.3181, "step": 5086 }, { "epoch": 0.41210304601425796, "grad_norm": 0.03057067282497883, "learning_rate": 0.00019932940276340072, "loss": 0.3412, "step": 5087 }, { "epoch": 0.4121840570317563, "grad_norm": 0.03213992714881897, "learning_rate": 0.00019932490211080608, "loss": 0.3871, "step": 5088 }, { "epoch": 0.4122650680492547, "grad_norm": 0.029805082827806473, "learning_rate": 0.00019932040145821144, "loss": 0.3548, "step": 5089 }, { "epoch": 0.4123460790667531, "grad_norm": 0.028804264962673187, "learning_rate": 0.00019931590080561682, "loss": 0.3005, "step": 5090 }, { "epoch": 0.41242709008425144, "grad_norm": 0.030629368498921394, "learning_rate": 0.00019931140015302218, "loss": 0.3645, "step": 5091 }, { "epoch": 0.41250810110174985, "grad_norm": 0.03798553720116615, "learning_rate": 0.00019930689950042757, "loss": 0.3771, "step": 5092 }, { "epoch": 0.4125891121192482, "grad_norm": 0.031806670129299164, "learning_rate": 0.00019930239884783296, "loss": 0.4109, "step": 5093 }, { "epoch": 0.4126701231367466, "grad_norm": 0.037300970405340195, "learning_rate": 0.00019929789819523832, "loss": 0.398, "step": 5094 }, { "epoch": 0.41275113415424497, "grad_norm": 0.033359628170728683, "learning_rate": 0.00019929339754264368, "loss": 0.3486, "step": 5095 }, { "epoch": 0.4128321451717434, "grad_norm": 0.031194040551781654, "learning_rate": 0.00019928889689004907, "loss": 0.3496, "step": 5096 }, { "epoch": 0.41291315618924174, "grad_norm": 0.029555628076195717, "learning_rate": 0.00019928439623745445, "loss": 0.3629, "step": 5097 }, { "epoch": 0.4129941672067401, "grad_norm": 0.03267825022339821, "learning_rate": 0.0001992798955848598, "loss": 0.361, "step": 5098 }, { "epoch": 0.4130751782242385, "grad_norm": 0.037240467965602875, "learning_rate": 0.0001992753949322652, "loss": 0.3751, "step": 5099 }, { "epoch": 0.41315618924173686, "grad_norm": 0.03187297657132149, "learning_rate": 0.00019927089427967056, "loss": 0.3345, "step": 5100 }, { "epoch": 0.41323720025923527, "grad_norm": 0.02669236622750759, "learning_rate": 0.00019926639362707592, "loss": 0.3153, "step": 5101 }, { "epoch": 0.4133182112767336, "grad_norm": 0.03205592930316925, "learning_rate": 0.0001992618929744813, "loss": 0.2747, "step": 5102 }, { "epoch": 0.41339922229423204, "grad_norm": 0.030281927436590195, "learning_rate": 0.0001992573923218867, "loss": 0.3505, "step": 5103 }, { "epoch": 0.4134802333117304, "grad_norm": 0.03139625862240791, "learning_rate": 0.00019925289166929205, "loss": 0.3535, "step": 5104 }, { "epoch": 0.41356124432922875, "grad_norm": 0.03957032784819603, "learning_rate": 0.00019924839101669744, "loss": 0.3793, "step": 5105 }, { "epoch": 0.41364225534672716, "grad_norm": 0.032334841787815094, "learning_rate": 0.0001992438903641028, "loss": 0.3739, "step": 5106 }, { "epoch": 0.4137232663642255, "grad_norm": 0.032936934381723404, "learning_rate": 0.00019923938971150816, "loss": 0.342, "step": 5107 }, { "epoch": 0.41380427738172393, "grad_norm": 0.03231712430715561, "learning_rate": 0.00019923488905891355, "loss": 0.3294, "step": 5108 }, { "epoch": 0.4138852883992223, "grad_norm": 0.033003080636262894, "learning_rate": 0.00019923038840631894, "loss": 0.3452, "step": 5109 }, { "epoch": 0.4139662994167207, "grad_norm": 0.04072129353880882, "learning_rate": 0.0001992258877537243, "loss": 0.3528, "step": 5110 }, { "epoch": 0.41404731043421905, "grad_norm": 0.03586127609014511, "learning_rate": 0.00019922138710112968, "loss": 0.3862, "step": 5111 }, { "epoch": 0.4141283214517174, "grad_norm": 0.03363611176609993, "learning_rate": 0.00019921688644853504, "loss": 0.3756, "step": 5112 }, { "epoch": 0.4142093324692158, "grad_norm": 0.03447722643613815, "learning_rate": 0.0001992123857959404, "loss": 0.3653, "step": 5113 }, { "epoch": 0.4142903434867142, "grad_norm": 0.034334730356931686, "learning_rate": 0.0001992078851433458, "loss": 0.3608, "step": 5114 }, { "epoch": 0.4143713545042126, "grad_norm": 0.03418297320604324, "learning_rate": 0.00019920338449075118, "loss": 0.3868, "step": 5115 }, { "epoch": 0.41445236552171094, "grad_norm": 0.03272546827793121, "learning_rate": 0.00019919888383815654, "loss": 0.3903, "step": 5116 }, { "epoch": 0.41453337653920935, "grad_norm": 0.03231920301914215, "learning_rate": 0.00019919438318556193, "loss": 0.335, "step": 5117 }, { "epoch": 0.4146143875567077, "grad_norm": 0.039603229612112045, "learning_rate": 0.00019918988253296729, "loss": 0.3478, "step": 5118 }, { "epoch": 0.41469539857420606, "grad_norm": 0.03109239600598812, "learning_rate": 0.00019918538188037265, "loss": 0.3458, "step": 5119 }, { "epoch": 0.4147764095917045, "grad_norm": 0.029578372836112976, "learning_rate": 0.00019918088122777803, "loss": 0.3372, "step": 5120 }, { "epoch": 0.41485742060920283, "grad_norm": 0.03594496101140976, "learning_rate": 0.00019917638057518342, "loss": 0.4146, "step": 5121 }, { "epoch": 0.41493843162670124, "grad_norm": 0.030784275382757187, "learning_rate": 0.00019917187992258878, "loss": 0.4104, "step": 5122 }, { "epoch": 0.4150194426441996, "grad_norm": 0.03049563802778721, "learning_rate": 0.00019916737926999417, "loss": 0.355, "step": 5123 }, { "epoch": 0.415100453661698, "grad_norm": 0.032201237976551056, "learning_rate": 0.00019916287861739953, "loss": 0.3582, "step": 5124 }, { "epoch": 0.41518146467919637, "grad_norm": 0.038234151899814606, "learning_rate": 0.0001991583779648049, "loss": 0.3957, "step": 5125 }, { "epoch": 0.4152624756966948, "grad_norm": 0.03416343033313751, "learning_rate": 0.0001991538773122103, "loss": 0.3751, "step": 5126 }, { "epoch": 0.41534348671419313, "grad_norm": 0.036890894174575806, "learning_rate": 0.00019914937665961566, "loss": 0.4049, "step": 5127 }, { "epoch": 0.4154244977316915, "grad_norm": 0.03059619665145874, "learning_rate": 0.00019914487600702102, "loss": 0.3943, "step": 5128 }, { "epoch": 0.4155055087491899, "grad_norm": 0.03785393759608269, "learning_rate": 0.0001991403753544264, "loss": 0.4098, "step": 5129 }, { "epoch": 0.41558651976668826, "grad_norm": 0.03127528354525566, "learning_rate": 0.00019913587470183177, "loss": 0.3484, "step": 5130 }, { "epoch": 0.41566753078418667, "grad_norm": 0.030445709824562073, "learning_rate": 0.00019913137404923713, "loss": 0.3255, "step": 5131 }, { "epoch": 0.415748541801685, "grad_norm": 0.03678746148943901, "learning_rate": 0.00019912687339664254, "loss": 0.3528, "step": 5132 }, { "epoch": 0.41582955281918343, "grad_norm": 0.03211822733283043, "learning_rate": 0.0001991223727440479, "loss": 0.3421, "step": 5133 }, { "epoch": 0.4159105638366818, "grad_norm": 0.034807238727808, "learning_rate": 0.00019911787209145326, "loss": 0.3781, "step": 5134 }, { "epoch": 0.41599157485418015, "grad_norm": 0.03515045717358589, "learning_rate": 0.00019911337143885865, "loss": 0.3803, "step": 5135 }, { "epoch": 0.41607258587167856, "grad_norm": 0.03006439283490181, "learning_rate": 0.000199108870786264, "loss": 0.316, "step": 5136 }, { "epoch": 0.4161535968891769, "grad_norm": 0.03301999717950821, "learning_rate": 0.00019910437013366937, "loss": 0.3341, "step": 5137 }, { "epoch": 0.4162346079066753, "grad_norm": 0.045103173702955246, "learning_rate": 0.00019909986948107478, "loss": 0.3515, "step": 5138 }, { "epoch": 0.4163156189241737, "grad_norm": 0.03581353649497032, "learning_rate": 0.00019909536882848014, "loss": 0.3232, "step": 5139 }, { "epoch": 0.4163966299416721, "grad_norm": 0.03379884734749794, "learning_rate": 0.0001990908681758855, "loss": 0.3655, "step": 5140 }, { "epoch": 0.41647764095917045, "grad_norm": 0.03726637735962868, "learning_rate": 0.0001990863675232909, "loss": 0.4216, "step": 5141 }, { "epoch": 0.4165586519766688, "grad_norm": 0.0365687757730484, "learning_rate": 0.00019908186687069625, "loss": 0.3847, "step": 5142 }, { "epoch": 0.4166396629941672, "grad_norm": 0.032937657088041306, "learning_rate": 0.0001990773662181016, "loss": 0.4113, "step": 5143 }, { "epoch": 0.41672067401166557, "grad_norm": 0.05359840393066406, "learning_rate": 0.00019907286556550703, "loss": 0.3892, "step": 5144 }, { "epoch": 0.416801685029164, "grad_norm": 0.03107147105038166, "learning_rate": 0.00019906836491291239, "loss": 0.3961, "step": 5145 }, { "epoch": 0.41688269604666234, "grad_norm": 0.0421084500849247, "learning_rate": 0.00019906386426031775, "loss": 0.3866, "step": 5146 }, { "epoch": 0.41696370706416075, "grad_norm": 0.028859462589025497, "learning_rate": 0.00019905936360772313, "loss": 0.3486, "step": 5147 }, { "epoch": 0.4170447180816591, "grad_norm": 0.032266031950712204, "learning_rate": 0.0001990548629551285, "loss": 0.3932, "step": 5148 }, { "epoch": 0.41712572909915746, "grad_norm": 0.04529944807291031, "learning_rate": 0.00019905036230253385, "loss": 0.3467, "step": 5149 }, { "epoch": 0.41720674011665587, "grad_norm": 0.02860589511692524, "learning_rate": 0.00019904586164993927, "loss": 0.3618, "step": 5150 }, { "epoch": 0.41728775113415423, "grad_norm": 0.02880844473838806, "learning_rate": 0.00019904136099734463, "loss": 0.3826, "step": 5151 }, { "epoch": 0.41736876215165264, "grad_norm": 0.03666391596198082, "learning_rate": 0.00019903686034475, "loss": 0.3662, "step": 5152 }, { "epoch": 0.417449773169151, "grad_norm": 0.02866402640938759, "learning_rate": 0.00019903235969215537, "loss": 0.3376, "step": 5153 }, { "epoch": 0.4175307841866494, "grad_norm": 0.03218850865960121, "learning_rate": 0.00019902785903956074, "loss": 0.3204, "step": 5154 }, { "epoch": 0.41761179520414776, "grad_norm": 0.035164568573236465, "learning_rate": 0.00019902335838696612, "loss": 0.3467, "step": 5155 }, { "epoch": 0.4176928062216461, "grad_norm": 0.03248968720436096, "learning_rate": 0.0001990188577343715, "loss": 0.3291, "step": 5156 }, { "epoch": 0.41777381723914453, "grad_norm": 0.03146893158555031, "learning_rate": 0.00019901435708177687, "loss": 0.3784, "step": 5157 }, { "epoch": 0.4178548282566429, "grad_norm": 0.03342106193304062, "learning_rate": 0.00019900985642918223, "loss": 0.3677, "step": 5158 }, { "epoch": 0.4179358392741413, "grad_norm": 0.03008284978568554, "learning_rate": 0.00019900535577658762, "loss": 0.355, "step": 5159 }, { "epoch": 0.41801685029163965, "grad_norm": 0.031455010175704956, "learning_rate": 0.00019900085512399298, "loss": 0.3984, "step": 5160 }, { "epoch": 0.41809786130913806, "grad_norm": 0.03575517609715462, "learning_rate": 0.00019899635447139836, "loss": 0.376, "step": 5161 }, { "epoch": 0.4181788723266364, "grad_norm": 0.03251372277736664, "learning_rate": 0.00019899185381880375, "loss": 0.3479, "step": 5162 }, { "epoch": 0.4182598833441348, "grad_norm": 0.03510560840368271, "learning_rate": 0.0001989873531662091, "loss": 0.3822, "step": 5163 }, { "epoch": 0.4183408943616332, "grad_norm": 0.03543466702103615, "learning_rate": 0.00019898285251361447, "loss": 0.3961, "step": 5164 }, { "epoch": 0.41842190537913154, "grad_norm": 0.033629365265369415, "learning_rate": 0.00019897835186101986, "loss": 0.3436, "step": 5165 }, { "epoch": 0.41850291639662995, "grad_norm": 0.03533930331468582, "learning_rate": 0.00019897385120842522, "loss": 0.3597, "step": 5166 }, { "epoch": 0.4185839274141283, "grad_norm": 0.034922514110803604, "learning_rate": 0.0001989693505558306, "loss": 0.362, "step": 5167 }, { "epoch": 0.4186649384316267, "grad_norm": 0.03290367126464844, "learning_rate": 0.000198964849903236, "loss": 0.377, "step": 5168 }, { "epoch": 0.4187459494491251, "grad_norm": 0.03566797077655792, "learning_rate": 0.00019896034925064135, "loss": 0.4076, "step": 5169 }, { "epoch": 0.4188269604666235, "grad_norm": 0.03208748996257782, "learning_rate": 0.0001989558485980467, "loss": 0.314, "step": 5170 }, { "epoch": 0.41890797148412184, "grad_norm": 0.029782714322209358, "learning_rate": 0.0001989513479454521, "loss": 0.3556, "step": 5171 }, { "epoch": 0.4189889825016202, "grad_norm": 0.033525895327329636, "learning_rate": 0.00019894684729285746, "loss": 0.4088, "step": 5172 }, { "epoch": 0.4190699935191186, "grad_norm": 0.029556160792708397, "learning_rate": 0.00019894234664026285, "loss": 0.3591, "step": 5173 }, { "epoch": 0.41915100453661697, "grad_norm": 0.03858495503664017, "learning_rate": 0.00019893784598766823, "loss": 0.3699, "step": 5174 }, { "epoch": 0.4192320155541154, "grad_norm": 0.03810839727520943, "learning_rate": 0.0001989333453350736, "loss": 0.3474, "step": 5175 }, { "epoch": 0.41931302657161373, "grad_norm": 0.030418671667575836, "learning_rate": 0.00019892884468247895, "loss": 0.3507, "step": 5176 }, { "epoch": 0.41939403758911215, "grad_norm": 0.031051823869347572, "learning_rate": 0.00019892434402988434, "loss": 0.3568, "step": 5177 }, { "epoch": 0.4194750486066105, "grad_norm": 0.03632812947034836, "learning_rate": 0.00019891984337728973, "loss": 0.3884, "step": 5178 }, { "epoch": 0.41955605962410886, "grad_norm": 0.034061893820762634, "learning_rate": 0.0001989153427246951, "loss": 0.3541, "step": 5179 }, { "epoch": 0.41963707064160727, "grad_norm": 0.03530232980847359, "learning_rate": 0.00019891084207210048, "loss": 0.387, "step": 5180 }, { "epoch": 0.4197180816591056, "grad_norm": 0.030564704909920692, "learning_rate": 0.00019890634141950584, "loss": 0.3974, "step": 5181 }, { "epoch": 0.41979909267660404, "grad_norm": 0.029824761673808098, "learning_rate": 0.0001989018407669112, "loss": 0.2902, "step": 5182 }, { "epoch": 0.4198801036941024, "grad_norm": 0.0386199913918972, "learning_rate": 0.00019889734011431658, "loss": 0.3861, "step": 5183 }, { "epoch": 0.4199611147116008, "grad_norm": 0.030515290796756744, "learning_rate": 0.00019889283946172197, "loss": 0.3376, "step": 5184 }, { "epoch": 0.42004212572909916, "grad_norm": 0.03223884105682373, "learning_rate": 0.00019888833880912733, "loss": 0.4042, "step": 5185 }, { "epoch": 0.4201231367465975, "grad_norm": 0.03462700545787811, "learning_rate": 0.00019888383815653272, "loss": 0.3469, "step": 5186 }, { "epoch": 0.4202041477640959, "grad_norm": 0.030687794089317322, "learning_rate": 0.00019887933750393808, "loss": 0.3238, "step": 5187 }, { "epoch": 0.4202851587815943, "grad_norm": 0.03233848139643669, "learning_rate": 0.00019887483685134344, "loss": 0.3399, "step": 5188 }, { "epoch": 0.4203661697990927, "grad_norm": 0.033750370144844055, "learning_rate": 0.00019887033619874882, "loss": 0.4309, "step": 5189 }, { "epoch": 0.42044718081659105, "grad_norm": 0.027981825172901154, "learning_rate": 0.0001988658355461542, "loss": 0.3105, "step": 5190 }, { "epoch": 0.42052819183408946, "grad_norm": 0.03219735622406006, "learning_rate": 0.00019886133489355957, "loss": 0.3925, "step": 5191 }, { "epoch": 0.4206092028515878, "grad_norm": 0.02849307656288147, "learning_rate": 0.00019885683424096496, "loss": 0.3735, "step": 5192 }, { "epoch": 0.42069021386908617, "grad_norm": 0.031731121242046356, "learning_rate": 0.00019885233358837032, "loss": 0.3774, "step": 5193 }, { "epoch": 0.4207712248865846, "grad_norm": 0.03386862203478813, "learning_rate": 0.00019884783293577568, "loss": 0.3527, "step": 5194 }, { "epoch": 0.42085223590408294, "grad_norm": 0.03693525865674019, "learning_rate": 0.00019884333228318107, "loss": 0.3667, "step": 5195 }, { "epoch": 0.42093324692158135, "grad_norm": 0.03634597361087799, "learning_rate": 0.00019883883163058645, "loss": 0.4062, "step": 5196 }, { "epoch": 0.4210142579390797, "grad_norm": 0.03302176296710968, "learning_rate": 0.0001988343309779918, "loss": 0.3638, "step": 5197 }, { "epoch": 0.4210952689565781, "grad_norm": 0.03496898338198662, "learning_rate": 0.0001988298303253972, "loss": 0.3592, "step": 5198 }, { "epoch": 0.4211762799740765, "grad_norm": 0.032557763159275055, "learning_rate": 0.00019882532967280256, "loss": 0.3665, "step": 5199 }, { "epoch": 0.42125729099157483, "grad_norm": 0.030008163303136826, "learning_rate": 0.00019882082902020792, "loss": 0.3572, "step": 5200 }, { "epoch": 0.42133830200907324, "grad_norm": 0.03188272565603256, "learning_rate": 0.0001988163283676133, "loss": 0.3361, "step": 5201 }, { "epoch": 0.4214193130265716, "grad_norm": 0.03313596546649933, "learning_rate": 0.0001988118277150187, "loss": 0.3759, "step": 5202 }, { "epoch": 0.42150032404407, "grad_norm": 0.032859593629837036, "learning_rate": 0.00019880732706242406, "loss": 0.379, "step": 5203 }, { "epoch": 0.42158133506156836, "grad_norm": 0.029185116291046143, "learning_rate": 0.00019880282640982944, "loss": 0.3275, "step": 5204 }, { "epoch": 0.4216623460790668, "grad_norm": 0.029771940782666206, "learning_rate": 0.0001987983257572348, "loss": 0.3778, "step": 5205 }, { "epoch": 0.42174335709656513, "grad_norm": 0.029124047607183456, "learning_rate": 0.00019879382510464016, "loss": 0.3467, "step": 5206 }, { "epoch": 0.4218243681140635, "grad_norm": 0.03311445564031601, "learning_rate": 0.00019878932445204558, "loss": 0.3557, "step": 5207 }, { "epoch": 0.4219053791315619, "grad_norm": 0.029799094423651695, "learning_rate": 0.00019878482379945094, "loss": 0.3477, "step": 5208 }, { "epoch": 0.42198639014906025, "grad_norm": 0.03274417296051979, "learning_rate": 0.0001987803231468563, "loss": 0.3755, "step": 5209 }, { "epoch": 0.42206740116655866, "grad_norm": 0.03296559676527977, "learning_rate": 0.00019877582249426168, "loss": 0.3356, "step": 5210 }, { "epoch": 0.422148412184057, "grad_norm": 0.03630368784070015, "learning_rate": 0.00019877132184166704, "loss": 0.4121, "step": 5211 }, { "epoch": 0.42222942320155543, "grad_norm": 0.03400547802448273, "learning_rate": 0.0001987668211890724, "loss": 0.3499, "step": 5212 }, { "epoch": 0.4223104342190538, "grad_norm": 0.03906463086605072, "learning_rate": 0.00019876232053647782, "loss": 0.3674, "step": 5213 }, { "epoch": 0.4223914452365522, "grad_norm": 0.031435608863830566, "learning_rate": 0.00019875781988388318, "loss": 0.3705, "step": 5214 }, { "epoch": 0.42247245625405055, "grad_norm": 0.03501134365797043, "learning_rate": 0.00019875331923128854, "loss": 0.3594, "step": 5215 }, { "epoch": 0.4225534672715489, "grad_norm": 0.029576752334833145, "learning_rate": 0.00019874881857869393, "loss": 0.3737, "step": 5216 }, { "epoch": 0.4226344782890473, "grad_norm": 0.03144533187150955, "learning_rate": 0.00019874431792609929, "loss": 0.3741, "step": 5217 }, { "epoch": 0.4227154893065457, "grad_norm": 0.03286955505609512, "learning_rate": 0.00019873981727350465, "loss": 0.3791, "step": 5218 }, { "epoch": 0.4227965003240441, "grad_norm": 0.02476130612194538, "learning_rate": 0.00019873531662091006, "loss": 0.3316, "step": 5219 }, { "epoch": 0.42287751134154244, "grad_norm": 0.03551604598760605, "learning_rate": 0.00019873081596831542, "loss": 0.3734, "step": 5220 }, { "epoch": 0.42295852235904086, "grad_norm": 0.03408364579081535, "learning_rate": 0.00019872631531572078, "loss": 0.3709, "step": 5221 }, { "epoch": 0.4230395333765392, "grad_norm": 0.0335887186229229, "learning_rate": 0.00019872181466312617, "loss": 0.3626, "step": 5222 }, { "epoch": 0.42312054439403757, "grad_norm": 0.03350028395652771, "learning_rate": 0.00019871731401053153, "loss": 0.364, "step": 5223 }, { "epoch": 0.423201555411536, "grad_norm": 0.03685237467288971, "learning_rate": 0.0001987128133579369, "loss": 0.3232, "step": 5224 }, { "epoch": 0.42328256642903433, "grad_norm": 0.03553393483161926, "learning_rate": 0.0001987083127053423, "loss": 0.358, "step": 5225 }, { "epoch": 0.42336357744653275, "grad_norm": 0.034406695514917374, "learning_rate": 0.00019870381205274766, "loss": 0.3339, "step": 5226 }, { "epoch": 0.4234445884640311, "grad_norm": 0.03492816910147667, "learning_rate": 0.00019869931140015302, "loss": 0.381, "step": 5227 }, { "epoch": 0.4235255994815295, "grad_norm": 0.028665270656347275, "learning_rate": 0.0001986948107475584, "loss": 0.3527, "step": 5228 }, { "epoch": 0.42360661049902787, "grad_norm": 0.04024519771337509, "learning_rate": 0.00019869031009496377, "loss": 0.3728, "step": 5229 }, { "epoch": 0.4236876215165262, "grad_norm": 0.032281264662742615, "learning_rate": 0.00019868580944236916, "loss": 0.3872, "step": 5230 }, { "epoch": 0.42376863253402464, "grad_norm": 0.03774891793727875, "learning_rate": 0.00019868130878977454, "loss": 0.4118, "step": 5231 }, { "epoch": 0.423849643551523, "grad_norm": 0.032088082283735275, "learning_rate": 0.0001986768081371799, "loss": 0.3465, "step": 5232 }, { "epoch": 0.4239306545690214, "grad_norm": 0.030594127252697945, "learning_rate": 0.00019867230748458526, "loss": 0.3673, "step": 5233 }, { "epoch": 0.42401166558651976, "grad_norm": 0.03421124815940857, "learning_rate": 0.00019866780683199065, "loss": 0.3336, "step": 5234 }, { "epoch": 0.42409267660401817, "grad_norm": 0.03351978585124016, "learning_rate": 0.000198663306179396, "loss": 0.3349, "step": 5235 }, { "epoch": 0.4241736876215165, "grad_norm": 0.03628509119153023, "learning_rate": 0.0001986588055268014, "loss": 0.3036, "step": 5236 }, { "epoch": 0.4242546986390149, "grad_norm": 0.03526661545038223, "learning_rate": 0.00019865430487420678, "loss": 0.3514, "step": 5237 }, { "epoch": 0.4243357096565133, "grad_norm": 0.029476439580321312, "learning_rate": 0.00019864980422161214, "loss": 0.3813, "step": 5238 }, { "epoch": 0.42441672067401165, "grad_norm": 0.03853764757514, "learning_rate": 0.0001986453035690175, "loss": 0.3639, "step": 5239 }, { "epoch": 0.42449773169151006, "grad_norm": 0.033753901720047, "learning_rate": 0.0001986408029164229, "loss": 0.3765, "step": 5240 }, { "epoch": 0.4245787427090084, "grad_norm": 0.035833194851875305, "learning_rate": 0.00019863630226382825, "loss": 0.3427, "step": 5241 }, { "epoch": 0.42465975372650683, "grad_norm": 0.03638903796672821, "learning_rate": 0.00019863180161123364, "loss": 0.3804, "step": 5242 }, { "epoch": 0.4247407647440052, "grad_norm": 0.029470784589648247, "learning_rate": 0.00019862730095863903, "loss": 0.3455, "step": 5243 }, { "epoch": 0.42482177576150354, "grad_norm": 0.03390977531671524, "learning_rate": 0.00019862280030604439, "loss": 0.3588, "step": 5244 }, { "epoch": 0.42490278677900195, "grad_norm": 0.03637409582734108, "learning_rate": 0.00019861829965344975, "loss": 0.4302, "step": 5245 }, { "epoch": 0.4249837977965003, "grad_norm": 0.0375310555100441, "learning_rate": 0.00019861379900085513, "loss": 0.3476, "step": 5246 }, { "epoch": 0.4250648088139987, "grad_norm": 0.027415787801146507, "learning_rate": 0.0001986092983482605, "loss": 0.3223, "step": 5247 }, { "epoch": 0.4251458198314971, "grad_norm": 0.03121158853173256, "learning_rate": 0.00019860479769566588, "loss": 0.376, "step": 5248 }, { "epoch": 0.4252268308489955, "grad_norm": 0.03210705518722534, "learning_rate": 0.00019860029704307127, "loss": 0.4036, "step": 5249 }, { "epoch": 0.42530784186649384, "grad_norm": 0.03374101221561432, "learning_rate": 0.00019859579639047663, "loss": 0.4263, "step": 5250 }, { "epoch": 0.4253888528839922, "grad_norm": 0.029598388820886612, "learning_rate": 0.000198591295737882, "loss": 0.362, "step": 5251 }, { "epoch": 0.4254698639014906, "grad_norm": 0.032661404460668564, "learning_rate": 0.00019858679508528738, "loss": 0.3719, "step": 5252 }, { "epoch": 0.42555087491898896, "grad_norm": 0.028480835258960724, "learning_rate": 0.00019858229443269274, "loss": 0.3359, "step": 5253 }, { "epoch": 0.4256318859364874, "grad_norm": 0.03154754266142845, "learning_rate": 0.00019857779378009812, "loss": 0.3264, "step": 5254 }, { "epoch": 0.42571289695398573, "grad_norm": 0.032808415591716766, "learning_rate": 0.0001985732931275035, "loss": 0.3761, "step": 5255 }, { "epoch": 0.42579390797148414, "grad_norm": 0.03287159278988838, "learning_rate": 0.00019856879247490887, "loss": 0.348, "step": 5256 }, { "epoch": 0.4258749189889825, "grad_norm": 0.02578105963766575, "learning_rate": 0.00019856429182231423, "loss": 0.3518, "step": 5257 }, { "epoch": 0.42595593000648085, "grad_norm": 0.02951761521399021, "learning_rate": 0.00019855979116971962, "loss": 0.3867, "step": 5258 }, { "epoch": 0.42603694102397927, "grad_norm": 0.03331972658634186, "learning_rate": 0.000198555290517125, "loss": 0.3304, "step": 5259 }, { "epoch": 0.4261179520414776, "grad_norm": 0.028827041387557983, "learning_rate": 0.00019855078986453036, "loss": 0.3001, "step": 5260 }, { "epoch": 0.42619896305897603, "grad_norm": 0.031208734959363937, "learning_rate": 0.00019854628921193575, "loss": 0.3473, "step": 5261 }, { "epoch": 0.4262799740764744, "grad_norm": 0.028891805559396744, "learning_rate": 0.0001985417885593411, "loss": 0.3395, "step": 5262 }, { "epoch": 0.4263609850939728, "grad_norm": 0.031027546152472496, "learning_rate": 0.00019853728790674647, "loss": 0.3426, "step": 5263 }, { "epoch": 0.42644199611147116, "grad_norm": 0.04123328626155853, "learning_rate": 0.00019853278725415186, "loss": 0.4216, "step": 5264 }, { "epoch": 0.42652300712896957, "grad_norm": 0.032555144280195236, "learning_rate": 0.00019852828660155725, "loss": 0.3763, "step": 5265 }, { "epoch": 0.4266040181464679, "grad_norm": 0.032303549349308014, "learning_rate": 0.0001985237859489626, "loss": 0.3624, "step": 5266 }, { "epoch": 0.4266850291639663, "grad_norm": 0.03178047016263008, "learning_rate": 0.000198519285296368, "loss": 0.3765, "step": 5267 }, { "epoch": 0.4267660401814647, "grad_norm": 0.02855534851551056, "learning_rate": 0.00019851478464377335, "loss": 0.3523, "step": 5268 }, { "epoch": 0.42684705119896305, "grad_norm": 0.04090893268585205, "learning_rate": 0.0001985102839911787, "loss": 0.3015, "step": 5269 }, { "epoch": 0.42692806221646146, "grad_norm": 0.033113036304712296, "learning_rate": 0.0001985057833385841, "loss": 0.3791, "step": 5270 }, { "epoch": 0.4270090732339598, "grad_norm": 0.029150884598493576, "learning_rate": 0.0001985012826859895, "loss": 0.3366, "step": 5271 }, { "epoch": 0.4270900842514582, "grad_norm": 0.029976367950439453, "learning_rate": 0.00019849678203339485, "loss": 0.3095, "step": 5272 }, { "epoch": 0.4271710952689566, "grad_norm": 0.029154542833566666, "learning_rate": 0.00019849228138080023, "loss": 0.3309, "step": 5273 }, { "epoch": 0.42725210628645494, "grad_norm": 0.030596930533647537, "learning_rate": 0.0001984877807282056, "loss": 0.3795, "step": 5274 }, { "epoch": 0.42733311730395335, "grad_norm": 0.03367812559008598, "learning_rate": 0.00019848328007561095, "loss": 0.3595, "step": 5275 }, { "epoch": 0.4274141283214517, "grad_norm": 0.029898041859269142, "learning_rate": 0.00019847877942301634, "loss": 0.364, "step": 5276 }, { "epoch": 0.4274951393389501, "grad_norm": 0.02948545664548874, "learning_rate": 0.00019847427877042173, "loss": 0.3455, "step": 5277 }, { "epoch": 0.42757615035644847, "grad_norm": 0.03094439208507538, "learning_rate": 0.0001984697781178271, "loss": 0.3427, "step": 5278 }, { "epoch": 0.4276571613739469, "grad_norm": 0.03338692709803581, "learning_rate": 0.00019846527746523248, "loss": 0.3592, "step": 5279 }, { "epoch": 0.42773817239144524, "grad_norm": 0.02944916859269142, "learning_rate": 0.00019846077681263784, "loss": 0.3383, "step": 5280 }, { "epoch": 0.4278191834089436, "grad_norm": 0.033850450068712234, "learning_rate": 0.0001984562761600432, "loss": 0.3489, "step": 5281 }, { "epoch": 0.427900194426442, "grad_norm": 0.03082362376153469, "learning_rate": 0.00019845177550744858, "loss": 0.3363, "step": 5282 }, { "epoch": 0.42798120544394036, "grad_norm": 0.03181496635079384, "learning_rate": 0.00019844727485485397, "loss": 0.3723, "step": 5283 }, { "epoch": 0.42806221646143877, "grad_norm": 0.03152613341808319, "learning_rate": 0.00019844277420225933, "loss": 0.322, "step": 5284 }, { "epoch": 0.4281432274789371, "grad_norm": 0.03608822450041771, "learning_rate": 0.00019843827354966472, "loss": 0.3746, "step": 5285 }, { "epoch": 0.42822423849643554, "grad_norm": 0.033797916024923325, "learning_rate": 0.00019843377289707008, "loss": 0.3856, "step": 5286 }, { "epoch": 0.4283052495139339, "grad_norm": 0.029529495164752007, "learning_rate": 0.00019842927224447544, "loss": 0.3359, "step": 5287 }, { "epoch": 0.42838626053143225, "grad_norm": 0.03147159516811371, "learning_rate": 0.00019842477159188085, "loss": 0.3501, "step": 5288 }, { "epoch": 0.42846727154893066, "grad_norm": 0.03382951766252518, "learning_rate": 0.0001984202709392862, "loss": 0.3169, "step": 5289 }, { "epoch": 0.428548282566429, "grad_norm": 0.030625872313976288, "learning_rate": 0.00019841577028669157, "loss": 0.3695, "step": 5290 }, { "epoch": 0.42862929358392743, "grad_norm": 0.03295085206627846, "learning_rate": 0.00019841126963409696, "loss": 0.385, "step": 5291 }, { "epoch": 0.4287103046014258, "grad_norm": 0.03444168344140053, "learning_rate": 0.00019840676898150232, "loss": 0.3397, "step": 5292 }, { "epoch": 0.4287913156189242, "grad_norm": 0.03426263481378555, "learning_rate": 0.00019840226832890768, "loss": 0.3454, "step": 5293 }, { "epoch": 0.42887232663642255, "grad_norm": 0.03757169842720032, "learning_rate": 0.0001983977676763131, "loss": 0.3777, "step": 5294 }, { "epoch": 0.4289533376539209, "grad_norm": 0.03087097406387329, "learning_rate": 0.00019839326702371845, "loss": 0.3677, "step": 5295 }, { "epoch": 0.4290343486714193, "grad_norm": 0.03303956612944603, "learning_rate": 0.00019838876637112381, "loss": 0.3769, "step": 5296 }, { "epoch": 0.4291153596889177, "grad_norm": 0.03492473065853119, "learning_rate": 0.0001983842657185292, "loss": 0.386, "step": 5297 }, { "epoch": 0.4291963707064161, "grad_norm": 0.03316411003470421, "learning_rate": 0.00019837976506593456, "loss": 0.3447, "step": 5298 }, { "epoch": 0.42927738172391444, "grad_norm": 0.03256673365831375, "learning_rate": 0.00019837526441333992, "loss": 0.3063, "step": 5299 }, { "epoch": 0.42935839274141285, "grad_norm": 0.03335904702544212, "learning_rate": 0.00019837076376074534, "loss": 0.3417, "step": 5300 }, { "epoch": 0.4294394037589112, "grad_norm": 0.030818233266472816, "learning_rate": 0.0001983662631081507, "loss": 0.3476, "step": 5301 }, { "epoch": 0.42952041477640956, "grad_norm": 0.03463114798069, "learning_rate": 0.00019836176245555606, "loss": 0.3799, "step": 5302 }, { "epoch": 0.429601425793908, "grad_norm": 0.03285285457968712, "learning_rate": 0.00019835726180296144, "loss": 0.387, "step": 5303 }, { "epoch": 0.42968243681140633, "grad_norm": 0.03394316881895065, "learning_rate": 0.0001983527611503668, "loss": 0.3592, "step": 5304 }, { "epoch": 0.42976344782890474, "grad_norm": 0.02860003523528576, "learning_rate": 0.00019834826049777216, "loss": 0.3425, "step": 5305 }, { "epoch": 0.4298444588464031, "grad_norm": 0.031108953058719635, "learning_rate": 0.00019834375984517758, "loss": 0.3452, "step": 5306 }, { "epoch": 0.4299254698639015, "grad_norm": 0.03230966255068779, "learning_rate": 0.00019833925919258294, "loss": 0.3784, "step": 5307 }, { "epoch": 0.43000648088139987, "grad_norm": 0.03210729360580444, "learning_rate": 0.0001983347585399883, "loss": 0.3496, "step": 5308 }, { "epoch": 0.4300874918988983, "grad_norm": 0.029922574758529663, "learning_rate": 0.00019833025788739368, "loss": 0.3956, "step": 5309 }, { "epoch": 0.43016850291639663, "grad_norm": 0.03001343458890915, "learning_rate": 0.00019832575723479904, "loss": 0.3706, "step": 5310 }, { "epoch": 0.430249513933895, "grad_norm": 0.030526647344231606, "learning_rate": 0.00019832125658220443, "loss": 0.3644, "step": 5311 }, { "epoch": 0.4303305249513934, "grad_norm": 0.03420925512909889, "learning_rate": 0.00019831675592960982, "loss": 0.3832, "step": 5312 }, { "epoch": 0.43041153596889176, "grad_norm": 0.03520764037966728, "learning_rate": 0.00019831225527701518, "loss": 0.3439, "step": 5313 }, { "epoch": 0.43049254698639017, "grad_norm": 0.03564382344484329, "learning_rate": 0.00019830775462442054, "loss": 0.3472, "step": 5314 }, { "epoch": 0.4305735580038885, "grad_norm": 0.03275568038225174, "learning_rate": 0.00019830325397182593, "loss": 0.408, "step": 5315 }, { "epoch": 0.43065456902138693, "grad_norm": 0.03222983330488205, "learning_rate": 0.00019829875331923129, "loss": 0.3672, "step": 5316 }, { "epoch": 0.4307355800388853, "grad_norm": 0.034696005284786224, "learning_rate": 0.00019829425266663667, "loss": 0.3478, "step": 5317 }, { "epoch": 0.43081659105638365, "grad_norm": 0.031081615015864372, "learning_rate": 0.00019828975201404206, "loss": 0.3881, "step": 5318 }, { "epoch": 0.43089760207388206, "grad_norm": 0.04065845534205437, "learning_rate": 0.00019828525136144742, "loss": 0.3613, "step": 5319 }, { "epoch": 0.4309786130913804, "grad_norm": 0.03409722447395325, "learning_rate": 0.00019828075070885278, "loss": 0.4114, "step": 5320 }, { "epoch": 0.4310596241088788, "grad_norm": 0.03448516130447388, "learning_rate": 0.00019827625005625817, "loss": 0.3711, "step": 5321 }, { "epoch": 0.4311406351263772, "grad_norm": 0.03240002319216728, "learning_rate": 0.00019827174940366353, "loss": 0.3646, "step": 5322 }, { "epoch": 0.4312216461438756, "grad_norm": 0.031075075268745422, "learning_rate": 0.00019826724875106891, "loss": 0.3832, "step": 5323 }, { "epoch": 0.43130265716137395, "grad_norm": 0.03438475355505943, "learning_rate": 0.0001982627480984743, "loss": 0.3838, "step": 5324 }, { "epoch": 0.4313836681788723, "grad_norm": 0.03652133792638779, "learning_rate": 0.00019825824744587966, "loss": 0.421, "step": 5325 }, { "epoch": 0.4314646791963707, "grad_norm": 0.029609760269522667, "learning_rate": 0.00019825374679328502, "loss": 0.3324, "step": 5326 }, { "epoch": 0.43154569021386907, "grad_norm": 0.03364776447415352, "learning_rate": 0.0001982492461406904, "loss": 0.4306, "step": 5327 }, { "epoch": 0.4316267012313675, "grad_norm": 0.03062380850315094, "learning_rate": 0.00019824474548809577, "loss": 0.3697, "step": 5328 }, { "epoch": 0.43170771224886584, "grad_norm": 0.03233860060572624, "learning_rate": 0.00019824024483550116, "loss": 0.347, "step": 5329 }, { "epoch": 0.43178872326636425, "grad_norm": 0.03236883133649826, "learning_rate": 0.00019823574418290654, "loss": 0.3684, "step": 5330 }, { "epoch": 0.4318697342838626, "grad_norm": 0.029878897592425346, "learning_rate": 0.0001982312435303119, "loss": 0.3848, "step": 5331 }, { "epoch": 0.43195074530136096, "grad_norm": 0.03530362248420715, "learning_rate": 0.00019822674287771726, "loss": 0.3679, "step": 5332 }, { "epoch": 0.4320317563188594, "grad_norm": 0.03218994662165642, "learning_rate": 0.00019822224222512265, "loss": 0.3433, "step": 5333 }, { "epoch": 0.43211276733635773, "grad_norm": 0.030317038297653198, "learning_rate": 0.000198217741572528, "loss": 0.3517, "step": 5334 }, { "epoch": 0.43219377835385614, "grad_norm": 0.03286263346672058, "learning_rate": 0.0001982132409199334, "loss": 0.3677, "step": 5335 }, { "epoch": 0.4322747893713545, "grad_norm": 0.03247006982564926, "learning_rate": 0.00019820874026733879, "loss": 0.3826, "step": 5336 }, { "epoch": 0.4323558003888529, "grad_norm": 0.030288146808743477, "learning_rate": 0.00019820423961474415, "loss": 0.3334, "step": 5337 }, { "epoch": 0.43243681140635126, "grad_norm": 0.034478385001420975, "learning_rate": 0.0001981997389621495, "loss": 0.3603, "step": 5338 }, { "epoch": 0.4325178224238496, "grad_norm": 0.03148753196001053, "learning_rate": 0.0001981952383095549, "loss": 0.4004, "step": 5339 }, { "epoch": 0.43259883344134803, "grad_norm": 0.03236803039908409, "learning_rate": 0.00019819073765696028, "loss": 0.3352, "step": 5340 }, { "epoch": 0.4326798444588464, "grad_norm": 0.03081342577934265, "learning_rate": 0.00019818623700436564, "loss": 0.3771, "step": 5341 }, { "epoch": 0.4327608554763448, "grad_norm": 0.0314449742436409, "learning_rate": 0.00019818173635177103, "loss": 0.362, "step": 5342 }, { "epoch": 0.43284186649384315, "grad_norm": 0.03407454490661621, "learning_rate": 0.0001981772356991764, "loss": 0.3476, "step": 5343 }, { "epoch": 0.43292287751134156, "grad_norm": 0.03543641045689583, "learning_rate": 0.00019817273504658175, "loss": 0.3954, "step": 5344 }, { "epoch": 0.4330038885288399, "grad_norm": 0.03256583213806152, "learning_rate": 0.00019816823439398713, "loss": 0.3339, "step": 5345 }, { "epoch": 0.4330848995463383, "grad_norm": 0.0349603109061718, "learning_rate": 0.00019816373374139252, "loss": 0.3841, "step": 5346 }, { "epoch": 0.4331659105638367, "grad_norm": 0.03182327374815941, "learning_rate": 0.00019815923308879788, "loss": 0.418, "step": 5347 }, { "epoch": 0.43324692158133504, "grad_norm": 0.03906280919909477, "learning_rate": 0.00019815473243620327, "loss": 0.3678, "step": 5348 }, { "epoch": 0.43332793259883345, "grad_norm": 0.02743501029908657, "learning_rate": 0.00019815023178360863, "loss": 0.3212, "step": 5349 }, { "epoch": 0.4334089436163318, "grad_norm": 0.034895461052656174, "learning_rate": 0.000198145731131014, "loss": 0.3985, "step": 5350 }, { "epoch": 0.4334899546338302, "grad_norm": 0.030332963913679123, "learning_rate": 0.00019814123047841938, "loss": 0.3461, "step": 5351 }, { "epoch": 0.4335709656513286, "grad_norm": 0.03224902227520943, "learning_rate": 0.00019813672982582476, "loss": 0.3443, "step": 5352 }, { "epoch": 0.43365197666882693, "grad_norm": 0.03049425408244133, "learning_rate": 0.00019813222917323012, "loss": 0.3866, "step": 5353 }, { "epoch": 0.43373298768632534, "grad_norm": 0.036649126559495926, "learning_rate": 0.0001981277285206355, "loss": 0.3809, "step": 5354 }, { "epoch": 0.4338139987038237, "grad_norm": 0.03557087481021881, "learning_rate": 0.00019812322786804087, "loss": 0.3624, "step": 5355 }, { "epoch": 0.4338950097213221, "grad_norm": 0.04648400843143463, "learning_rate": 0.00019811872721544623, "loss": 0.4313, "step": 5356 }, { "epoch": 0.43397602073882047, "grad_norm": 0.03537077084183693, "learning_rate": 0.00019811422656285162, "loss": 0.3906, "step": 5357 }, { "epoch": 0.4340570317563189, "grad_norm": 0.03709874302148819, "learning_rate": 0.000198109725910257, "loss": 0.3984, "step": 5358 }, { "epoch": 0.43413804277381723, "grad_norm": 0.03712714463472366, "learning_rate": 0.00019810522525766236, "loss": 0.365, "step": 5359 }, { "epoch": 0.43421905379131565, "grad_norm": 0.03208884969353676, "learning_rate": 0.00019810072460506775, "loss": 0.3303, "step": 5360 }, { "epoch": 0.434300064808814, "grad_norm": 0.03099757432937622, "learning_rate": 0.0001980962239524731, "loss": 0.3606, "step": 5361 }, { "epoch": 0.43438107582631236, "grad_norm": 0.0307605043053627, "learning_rate": 0.00019809172329987847, "loss": 0.3973, "step": 5362 }, { "epoch": 0.43446208684381077, "grad_norm": 0.036221764981746674, "learning_rate": 0.00019808722264728389, "loss": 0.3629, "step": 5363 }, { "epoch": 0.4345430978613091, "grad_norm": 0.03021487593650818, "learning_rate": 0.00019808272199468925, "loss": 0.3539, "step": 5364 }, { "epoch": 0.43462410887880754, "grad_norm": 0.038944393396377563, "learning_rate": 0.0001980782213420946, "loss": 0.3857, "step": 5365 }, { "epoch": 0.4347051198963059, "grad_norm": 0.029537489637732506, "learning_rate": 0.0001980737206895, "loss": 0.3449, "step": 5366 }, { "epoch": 0.4347861309138043, "grad_norm": 0.0289238803088665, "learning_rate": 0.00019806922003690535, "loss": 0.3115, "step": 5367 }, { "epoch": 0.43486714193130266, "grad_norm": 0.027230558916926384, "learning_rate": 0.0001980647193843107, "loss": 0.3351, "step": 5368 }, { "epoch": 0.434948152948801, "grad_norm": 0.032325826585292816, "learning_rate": 0.00019806021873171613, "loss": 0.3646, "step": 5369 }, { "epoch": 0.4350291639662994, "grad_norm": 0.03655783087015152, "learning_rate": 0.0001980557180791215, "loss": 0.4564, "step": 5370 }, { "epoch": 0.4351101749837978, "grad_norm": 0.028337333351373672, "learning_rate": 0.00019805121742652685, "loss": 0.3312, "step": 5371 }, { "epoch": 0.4351911860012962, "grad_norm": 0.037807732820510864, "learning_rate": 0.00019804671677393223, "loss": 0.401, "step": 5372 }, { "epoch": 0.43527219701879455, "grad_norm": 0.030689479783177376, "learning_rate": 0.0001980422161213376, "loss": 0.3411, "step": 5373 }, { "epoch": 0.43535320803629296, "grad_norm": 0.03371885046362877, "learning_rate": 0.00019803771546874295, "loss": 0.3303, "step": 5374 }, { "epoch": 0.4354342190537913, "grad_norm": 0.03178364410996437, "learning_rate": 0.00019803321481614837, "loss": 0.3628, "step": 5375 }, { "epoch": 0.43551523007128967, "grad_norm": 0.03330547362565994, "learning_rate": 0.00019802871416355373, "loss": 0.3534, "step": 5376 }, { "epoch": 0.4355962410887881, "grad_norm": 0.03564752638339996, "learning_rate": 0.0001980242135109591, "loss": 0.3938, "step": 5377 }, { "epoch": 0.43567725210628644, "grad_norm": 0.038495369255542755, "learning_rate": 0.00019801971285836448, "loss": 0.34, "step": 5378 }, { "epoch": 0.43575826312378485, "grad_norm": 0.03451234847307205, "learning_rate": 0.00019801521220576984, "loss": 0.3928, "step": 5379 }, { "epoch": 0.4358392741412832, "grad_norm": 0.03760962560772896, "learning_rate": 0.0001980107115531752, "loss": 0.4047, "step": 5380 }, { "epoch": 0.4359202851587816, "grad_norm": 0.034070443361997604, "learning_rate": 0.0001980062109005806, "loss": 0.3554, "step": 5381 }, { "epoch": 0.43600129617628, "grad_norm": 0.03605116531252861, "learning_rate": 0.00019800171024798597, "loss": 0.3732, "step": 5382 }, { "epoch": 0.43608230719377833, "grad_norm": 0.04791965335607529, "learning_rate": 0.00019799720959539133, "loss": 0.3819, "step": 5383 }, { "epoch": 0.43616331821127674, "grad_norm": 0.03060603328049183, "learning_rate": 0.00019799270894279672, "loss": 0.3421, "step": 5384 }, { "epoch": 0.4362443292287751, "grad_norm": 0.04278833419084549, "learning_rate": 0.00019798820829020208, "loss": 0.3558, "step": 5385 }, { "epoch": 0.4363253402462735, "grad_norm": 0.037542033940553665, "learning_rate": 0.00019798370763760747, "loss": 0.4201, "step": 5386 }, { "epoch": 0.43640635126377186, "grad_norm": 0.033243872225284576, "learning_rate": 0.00019797920698501285, "loss": 0.391, "step": 5387 }, { "epoch": 0.4364873622812703, "grad_norm": 0.032972801476716995, "learning_rate": 0.0001979747063324182, "loss": 0.3856, "step": 5388 }, { "epoch": 0.43656837329876863, "grad_norm": 0.034915633499622345, "learning_rate": 0.00019797020567982357, "loss": 0.3655, "step": 5389 }, { "epoch": 0.436649384316267, "grad_norm": 0.031369417905807495, "learning_rate": 0.00019796570502722896, "loss": 0.359, "step": 5390 }, { "epoch": 0.4367303953337654, "grad_norm": 0.03334236517548561, "learning_rate": 0.00019796120437463432, "loss": 0.4029, "step": 5391 }, { "epoch": 0.43681140635126375, "grad_norm": 0.030649734660983086, "learning_rate": 0.0001979567037220397, "loss": 0.343, "step": 5392 }, { "epoch": 0.43689241736876216, "grad_norm": 0.034339819103479385, "learning_rate": 0.0001979522030694451, "loss": 0.3765, "step": 5393 }, { "epoch": 0.4369734283862605, "grad_norm": 0.033588845282793045, "learning_rate": 0.00019794770241685045, "loss": 0.3586, "step": 5394 }, { "epoch": 0.43705443940375893, "grad_norm": 0.029606353491544724, "learning_rate": 0.00019794320176425581, "loss": 0.3613, "step": 5395 }, { "epoch": 0.4371354504212573, "grad_norm": 0.0318920835852623, "learning_rate": 0.0001979387011116612, "loss": 0.388, "step": 5396 }, { "epoch": 0.43721646143875564, "grad_norm": 0.03575948253273964, "learning_rate": 0.00019793420045906656, "loss": 0.3707, "step": 5397 }, { "epoch": 0.43729747245625405, "grad_norm": 0.02881302498281002, "learning_rate": 0.00019792969980647195, "loss": 0.3507, "step": 5398 }, { "epoch": 0.4373784834737524, "grad_norm": 0.031063402071595192, "learning_rate": 0.00019792519915387734, "loss": 0.3456, "step": 5399 }, { "epoch": 0.4374594944912508, "grad_norm": 0.030241835862398148, "learning_rate": 0.0001979206985012827, "loss": 0.3449, "step": 5400 }, { "epoch": 0.4375405055087492, "grad_norm": 0.03187227621674538, "learning_rate": 0.00019791619784868806, "loss": 0.3276, "step": 5401 }, { "epoch": 0.4376215165262476, "grad_norm": 0.032396234571933746, "learning_rate": 0.00019791169719609344, "loss": 0.333, "step": 5402 }, { "epoch": 0.43770252754374595, "grad_norm": 0.033492933958768845, "learning_rate": 0.0001979071965434988, "loss": 0.3664, "step": 5403 }, { "epoch": 0.43778353856124436, "grad_norm": 0.03617965802550316, "learning_rate": 0.0001979026958909042, "loss": 0.3862, "step": 5404 }, { "epoch": 0.4378645495787427, "grad_norm": 0.03024153970181942, "learning_rate": 0.00019789819523830958, "loss": 0.3379, "step": 5405 }, { "epoch": 0.43794556059624107, "grad_norm": 0.0315060131251812, "learning_rate": 0.00019789369458571494, "loss": 0.3679, "step": 5406 }, { "epoch": 0.4380265716137395, "grad_norm": 0.03162820264697075, "learning_rate": 0.0001978891939331203, "loss": 0.3864, "step": 5407 }, { "epoch": 0.43810758263123784, "grad_norm": 0.029816798865795135, "learning_rate": 0.00019788469328052568, "loss": 0.3392, "step": 5408 }, { "epoch": 0.43818859364873625, "grad_norm": 0.03811095282435417, "learning_rate": 0.00019788019262793104, "loss": 0.3207, "step": 5409 }, { "epoch": 0.4382696046662346, "grad_norm": 0.03211852163076401, "learning_rate": 0.00019787569197533643, "loss": 0.3174, "step": 5410 }, { "epoch": 0.438350615683733, "grad_norm": 0.043832749128341675, "learning_rate": 0.00019787119132274182, "loss": 0.3684, "step": 5411 }, { "epoch": 0.43843162670123137, "grad_norm": 0.032680824398994446, "learning_rate": 0.00019786669067014718, "loss": 0.3895, "step": 5412 }, { "epoch": 0.4385126377187297, "grad_norm": 0.03802061453461647, "learning_rate": 0.00019786219001755254, "loss": 0.3778, "step": 5413 }, { "epoch": 0.43859364873622814, "grad_norm": 0.03763697296380997, "learning_rate": 0.00019785768936495793, "loss": 0.3794, "step": 5414 }, { "epoch": 0.4386746597537265, "grad_norm": 0.027806898579001427, "learning_rate": 0.0001978531887123633, "loss": 0.3265, "step": 5415 }, { "epoch": 0.4387556707712249, "grad_norm": 0.03902803733944893, "learning_rate": 0.00019784868805976867, "loss": 0.3538, "step": 5416 }, { "epoch": 0.43883668178872326, "grad_norm": 0.03295661136507988, "learning_rate": 0.00019784418740717406, "loss": 0.3642, "step": 5417 }, { "epoch": 0.43891769280622167, "grad_norm": 0.03418942913413048, "learning_rate": 0.00019783968675457942, "loss": 0.3758, "step": 5418 }, { "epoch": 0.43899870382372, "grad_norm": 0.028170818462967873, "learning_rate": 0.00019783518610198478, "loss": 0.3357, "step": 5419 }, { "epoch": 0.4390797148412184, "grad_norm": 0.0327889621257782, "learning_rate": 0.00019783068544939017, "loss": 0.3371, "step": 5420 }, { "epoch": 0.4391607258587168, "grad_norm": 0.03259824588894844, "learning_rate": 0.00019782618479679555, "loss": 0.3319, "step": 5421 }, { "epoch": 0.43924173687621515, "grad_norm": 0.030759098008275032, "learning_rate": 0.00019782168414420091, "loss": 0.3241, "step": 5422 }, { "epoch": 0.43932274789371356, "grad_norm": 0.029848387464880943, "learning_rate": 0.0001978171834916063, "loss": 0.3527, "step": 5423 }, { "epoch": 0.4394037589112119, "grad_norm": 0.02862711437046528, "learning_rate": 0.00019781268283901166, "loss": 0.3428, "step": 5424 }, { "epoch": 0.43948476992871033, "grad_norm": 0.030342362821102142, "learning_rate": 0.00019780818218641702, "loss": 0.3593, "step": 5425 }, { "epoch": 0.4395657809462087, "grad_norm": 0.03511106222867966, "learning_rate": 0.0001978036815338224, "loss": 0.4057, "step": 5426 }, { "epoch": 0.43964679196370704, "grad_norm": 0.03200792893767357, "learning_rate": 0.0001977991808812278, "loss": 0.3096, "step": 5427 }, { "epoch": 0.43972780298120545, "grad_norm": 0.03348712623119354, "learning_rate": 0.00019779468022863316, "loss": 0.3458, "step": 5428 }, { "epoch": 0.4398088139987038, "grad_norm": 0.02808743715286255, "learning_rate": 0.00019779017957603854, "loss": 0.3225, "step": 5429 }, { "epoch": 0.4398898250162022, "grad_norm": 0.037103742361068726, "learning_rate": 0.0001977856789234439, "loss": 0.3741, "step": 5430 }, { "epoch": 0.4399708360337006, "grad_norm": 0.032084204256534576, "learning_rate": 0.00019778117827084926, "loss": 0.3507, "step": 5431 }, { "epoch": 0.440051847051199, "grad_norm": 0.037166427820920944, "learning_rate": 0.00019777667761825465, "loss": 0.4298, "step": 5432 }, { "epoch": 0.44013285806869734, "grad_norm": 0.03572225570678711, "learning_rate": 0.00019777217696566004, "loss": 0.3389, "step": 5433 }, { "epoch": 0.4402138690861957, "grad_norm": 0.030854439362883568, "learning_rate": 0.0001977676763130654, "loss": 0.3668, "step": 5434 }, { "epoch": 0.4402948801036941, "grad_norm": 0.029217591509222984, "learning_rate": 0.00019776317566047079, "loss": 0.3482, "step": 5435 }, { "epoch": 0.44037589112119246, "grad_norm": 0.035711340606212616, "learning_rate": 0.00019775867500787615, "loss": 0.3429, "step": 5436 }, { "epoch": 0.4404569021386909, "grad_norm": 0.03293585032224655, "learning_rate": 0.0001977541743552815, "loss": 0.3604, "step": 5437 }, { "epoch": 0.44053791315618923, "grad_norm": 0.034553904086351395, "learning_rate": 0.0001977496737026869, "loss": 0.3921, "step": 5438 }, { "epoch": 0.44061892417368764, "grad_norm": 0.03484650328755379, "learning_rate": 0.00019774517305009228, "loss": 0.418, "step": 5439 }, { "epoch": 0.440699935191186, "grad_norm": 0.03234095871448517, "learning_rate": 0.00019774067239749764, "loss": 0.3329, "step": 5440 }, { "epoch": 0.44078094620868435, "grad_norm": 0.03310469910502434, "learning_rate": 0.00019773617174490303, "loss": 0.3264, "step": 5441 }, { "epoch": 0.44086195722618277, "grad_norm": 0.03455287218093872, "learning_rate": 0.0001977316710923084, "loss": 0.363, "step": 5442 }, { "epoch": 0.4409429682436811, "grad_norm": 0.03075864352285862, "learning_rate": 0.00019772717043971375, "loss": 0.3325, "step": 5443 }, { "epoch": 0.44102397926117953, "grad_norm": 0.04186461120843887, "learning_rate": 0.00019772266978711916, "loss": 0.396, "step": 5444 }, { "epoch": 0.4411049902786779, "grad_norm": 0.03264939412474632, "learning_rate": 0.00019771816913452452, "loss": 0.3436, "step": 5445 }, { "epoch": 0.4411860012961763, "grad_norm": 0.02961140312254429, "learning_rate": 0.00019771366848192988, "loss": 0.337, "step": 5446 }, { "epoch": 0.44126701231367466, "grad_norm": 0.030168673023581505, "learning_rate": 0.00019770916782933527, "loss": 0.3311, "step": 5447 }, { "epoch": 0.44134802333117307, "grad_norm": 0.029339507222175598, "learning_rate": 0.00019770466717674063, "loss": 0.3735, "step": 5448 }, { "epoch": 0.4414290343486714, "grad_norm": 0.03168919309973717, "learning_rate": 0.000197700166524146, "loss": 0.383, "step": 5449 }, { "epoch": 0.4415100453661698, "grad_norm": 0.031165389344096184, "learning_rate": 0.0001976956658715514, "loss": 0.3793, "step": 5450 }, { "epoch": 0.4415910563836682, "grad_norm": 0.032787322998046875, "learning_rate": 0.00019769116521895676, "loss": 0.4, "step": 5451 }, { "epoch": 0.44167206740116655, "grad_norm": 0.029846899211406708, "learning_rate": 0.00019768666456636212, "loss": 0.3531, "step": 5452 }, { "epoch": 0.44175307841866496, "grad_norm": 0.034375566989183426, "learning_rate": 0.0001976821639137675, "loss": 0.3749, "step": 5453 }, { "epoch": 0.4418340894361633, "grad_norm": 0.03918051719665527, "learning_rate": 0.00019767766326117287, "loss": 0.4003, "step": 5454 }, { "epoch": 0.4419151004536617, "grad_norm": 0.03416171669960022, "learning_rate": 0.00019767316260857826, "loss": 0.3508, "step": 5455 }, { "epoch": 0.4419961114711601, "grad_norm": 0.029899321496486664, "learning_rate": 0.00019766866195598364, "loss": 0.3539, "step": 5456 }, { "epoch": 0.44207712248865844, "grad_norm": 0.033006105571985245, "learning_rate": 0.000197664161303389, "loss": 0.3613, "step": 5457 }, { "epoch": 0.44215813350615685, "grad_norm": 0.03142609819769859, "learning_rate": 0.00019765966065079436, "loss": 0.3513, "step": 5458 }, { "epoch": 0.4422391445236552, "grad_norm": 0.03870227932929993, "learning_rate": 0.00019765515999819975, "loss": 0.3846, "step": 5459 }, { "epoch": 0.4423201555411536, "grad_norm": 0.03481871262192726, "learning_rate": 0.0001976506593456051, "loss": 0.3803, "step": 5460 }, { "epoch": 0.44240116655865197, "grad_norm": 0.030058806762099266, "learning_rate": 0.0001976461586930105, "loss": 0.3983, "step": 5461 }, { "epoch": 0.4424821775761504, "grad_norm": 0.032486896961927414, "learning_rate": 0.00019764165804041589, "loss": 0.3837, "step": 5462 }, { "epoch": 0.44256318859364874, "grad_norm": 0.03169577196240425, "learning_rate": 0.00019763715738782125, "loss": 0.3636, "step": 5463 }, { "epoch": 0.4426441996111471, "grad_norm": 0.029622867703437805, "learning_rate": 0.0001976326567352266, "loss": 0.3682, "step": 5464 }, { "epoch": 0.4427252106286455, "grad_norm": 0.030483927577733994, "learning_rate": 0.000197628156082632, "loss": 0.3773, "step": 5465 }, { "epoch": 0.44280622164614386, "grad_norm": 0.034242622554302216, "learning_rate": 0.00019762365543003735, "loss": 0.3745, "step": 5466 }, { "epoch": 0.44288723266364227, "grad_norm": 0.03172431141138077, "learning_rate": 0.00019761915477744274, "loss": 0.3659, "step": 5467 }, { "epoch": 0.4429682436811406, "grad_norm": 0.03193086385726929, "learning_rate": 0.00019761465412484813, "loss": 0.3697, "step": 5468 }, { "epoch": 0.44304925469863904, "grad_norm": 0.02869819663465023, "learning_rate": 0.0001976101534722535, "loss": 0.3666, "step": 5469 }, { "epoch": 0.4431302657161374, "grad_norm": 0.03176020458340645, "learning_rate": 0.00019760565281965885, "loss": 0.3943, "step": 5470 }, { "epoch": 0.44321127673363575, "grad_norm": 0.03435826301574707, "learning_rate": 0.00019760115216706424, "loss": 0.3416, "step": 5471 }, { "epoch": 0.44329228775113416, "grad_norm": 0.033849213272333145, "learning_rate": 0.0001975966515144696, "loss": 0.3227, "step": 5472 }, { "epoch": 0.4433732987686325, "grad_norm": 0.032498378306627274, "learning_rate": 0.00019759215086187498, "loss": 0.3608, "step": 5473 }, { "epoch": 0.44345430978613093, "grad_norm": 0.029758907854557037, "learning_rate": 0.00019758765020928037, "loss": 0.3676, "step": 5474 }, { "epoch": 0.4435353208036293, "grad_norm": 0.032321639358997345, "learning_rate": 0.00019758314955668573, "loss": 0.3969, "step": 5475 }, { "epoch": 0.4436163318211277, "grad_norm": 0.03752026706933975, "learning_rate": 0.0001975786489040911, "loss": 0.4244, "step": 5476 }, { "epoch": 0.44369734283862605, "grad_norm": 0.035951949656009674, "learning_rate": 0.00019757414825149648, "loss": 0.3564, "step": 5477 }, { "epoch": 0.4437783538561244, "grad_norm": 0.03439652919769287, "learning_rate": 0.00019756964759890184, "loss": 0.3896, "step": 5478 }, { "epoch": 0.4438593648736228, "grad_norm": 0.028514515608549118, "learning_rate": 0.00019756514694630722, "loss": 0.3491, "step": 5479 }, { "epoch": 0.4439403758911212, "grad_norm": 0.03369169309735298, "learning_rate": 0.0001975606462937126, "loss": 0.3839, "step": 5480 }, { "epoch": 0.4440213869086196, "grad_norm": 0.034968357533216476, "learning_rate": 0.00019755614564111797, "loss": 0.3321, "step": 5481 }, { "epoch": 0.44410239792611794, "grad_norm": 0.041739922016859055, "learning_rate": 0.00019755164498852333, "loss": 0.3806, "step": 5482 }, { "epoch": 0.44418340894361635, "grad_norm": 0.031739480793476105, "learning_rate": 0.00019754714433592872, "loss": 0.3084, "step": 5483 }, { "epoch": 0.4442644199611147, "grad_norm": 0.035275254398584366, "learning_rate": 0.00019754264368333408, "loss": 0.3541, "step": 5484 }, { "epoch": 0.44434543097861307, "grad_norm": 0.03895600512623787, "learning_rate": 0.00019753814303073947, "loss": 0.3913, "step": 5485 }, { "epoch": 0.4444264419961115, "grad_norm": 0.03006908856332302, "learning_rate": 0.00019753364237814485, "loss": 0.3521, "step": 5486 }, { "epoch": 0.44450745301360983, "grad_norm": 0.030135368928313255, "learning_rate": 0.0001975291417255502, "loss": 0.3438, "step": 5487 }, { "epoch": 0.44458846403110824, "grad_norm": 0.02975146844983101, "learning_rate": 0.00019752464107295557, "loss": 0.3699, "step": 5488 }, { "epoch": 0.4446694750486066, "grad_norm": 0.033303115516901016, "learning_rate": 0.00019752014042036096, "loss": 0.3425, "step": 5489 }, { "epoch": 0.444750486066105, "grad_norm": 0.03164275363087654, "learning_rate": 0.00019751563976776632, "loss": 0.382, "step": 5490 }, { "epoch": 0.44483149708360337, "grad_norm": 0.032723914831876755, "learning_rate": 0.0001975111391151717, "loss": 0.374, "step": 5491 }, { "epoch": 0.4449125081011017, "grad_norm": 0.02998311258852482, "learning_rate": 0.0001975066384625771, "loss": 0.3281, "step": 5492 }, { "epoch": 0.44499351911860013, "grad_norm": 0.0313972532749176, "learning_rate": 0.00019750213780998245, "loss": 0.3308, "step": 5493 }, { "epoch": 0.4450745301360985, "grad_norm": 0.03089817613363266, "learning_rate": 0.00019749763715738781, "loss": 0.3538, "step": 5494 }, { "epoch": 0.4451555411535969, "grad_norm": 0.03646509349346161, "learning_rate": 0.0001974931365047932, "loss": 0.3647, "step": 5495 }, { "epoch": 0.44523655217109526, "grad_norm": 0.03091086447238922, "learning_rate": 0.0001974886358521986, "loss": 0.337, "step": 5496 }, { "epoch": 0.44531756318859367, "grad_norm": 0.03153252229094505, "learning_rate": 0.00019748413519960395, "loss": 0.3539, "step": 5497 }, { "epoch": 0.445398574206092, "grad_norm": 0.030094187706708908, "learning_rate": 0.00019747963454700934, "loss": 0.3462, "step": 5498 }, { "epoch": 0.44547958522359044, "grad_norm": 0.03305754065513611, "learning_rate": 0.0001974751338944147, "loss": 0.3711, "step": 5499 }, { "epoch": 0.4455605962410888, "grad_norm": 0.02947155386209488, "learning_rate": 0.00019747063324182006, "loss": 0.3559, "step": 5500 }, { "epoch": 0.44564160725858715, "grad_norm": 0.037813130766153336, "learning_rate": 0.00019746613258922544, "loss": 0.4117, "step": 5501 }, { "epoch": 0.44572261827608556, "grad_norm": 0.0321708545088768, "learning_rate": 0.00019746163193663083, "loss": 0.3622, "step": 5502 }, { "epoch": 0.4458036292935839, "grad_norm": 0.03237539157271385, "learning_rate": 0.0001974571312840362, "loss": 0.3718, "step": 5503 }, { "epoch": 0.4458846403110823, "grad_norm": 0.036956895142793655, "learning_rate": 0.00019745263063144158, "loss": 0.3655, "step": 5504 }, { "epoch": 0.4459656513285807, "grad_norm": 0.03339963033795357, "learning_rate": 0.00019744812997884694, "loss": 0.3751, "step": 5505 }, { "epoch": 0.4460466623460791, "grad_norm": 0.030577365309000015, "learning_rate": 0.0001974436293262523, "loss": 0.3382, "step": 5506 }, { "epoch": 0.44612767336357745, "grad_norm": 0.030274648219347, "learning_rate": 0.00019743912867365768, "loss": 0.3601, "step": 5507 }, { "epoch": 0.4462086843810758, "grad_norm": 0.027877582237124443, "learning_rate": 0.00019743462802106307, "loss": 0.3226, "step": 5508 }, { "epoch": 0.4462896953985742, "grad_norm": 0.029135365039110184, "learning_rate": 0.00019743012736846843, "loss": 0.3288, "step": 5509 }, { "epoch": 0.44637070641607257, "grad_norm": 0.030737759545445442, "learning_rate": 0.00019742562671587382, "loss": 0.3263, "step": 5510 }, { "epoch": 0.446451717433571, "grad_norm": 0.03417934849858284, "learning_rate": 0.00019742112606327918, "loss": 0.3272, "step": 5511 }, { "epoch": 0.44653272845106934, "grad_norm": 0.03573647886514664, "learning_rate": 0.00019741662541068454, "loss": 0.3576, "step": 5512 }, { "epoch": 0.44661373946856775, "grad_norm": 0.02828175760805607, "learning_rate": 0.00019741212475808993, "loss": 0.367, "step": 5513 }, { "epoch": 0.4466947504860661, "grad_norm": 0.03438275679945946, "learning_rate": 0.0001974076241054953, "loss": 0.3431, "step": 5514 }, { "epoch": 0.44677576150356446, "grad_norm": 0.029411716386675835, "learning_rate": 0.00019740312345290067, "loss": 0.3404, "step": 5515 }, { "epoch": 0.4468567725210629, "grad_norm": 0.030819594860076904, "learning_rate": 0.00019739862280030606, "loss": 0.3567, "step": 5516 }, { "epoch": 0.44693778353856123, "grad_norm": 0.036897122859954834, "learning_rate": 0.00019739412214771142, "loss": 0.4223, "step": 5517 }, { "epoch": 0.44701879455605964, "grad_norm": 0.0320262610912323, "learning_rate": 0.00019738962149511678, "loss": 0.3399, "step": 5518 }, { "epoch": 0.447099805573558, "grad_norm": 0.03475802764296532, "learning_rate": 0.00019738512084252217, "loss": 0.3839, "step": 5519 }, { "epoch": 0.4471808165910564, "grad_norm": 0.031090576201677322, "learning_rate": 0.00019738062018992756, "loss": 0.3841, "step": 5520 }, { "epoch": 0.44726182760855476, "grad_norm": 0.03029099851846695, "learning_rate": 0.00019737611953733292, "loss": 0.33, "step": 5521 }, { "epoch": 0.4473428386260531, "grad_norm": 0.03051835112273693, "learning_rate": 0.0001973716188847383, "loss": 0.368, "step": 5522 }, { "epoch": 0.44742384964355153, "grad_norm": 0.02957882173359394, "learning_rate": 0.00019736711823214366, "loss": 0.3619, "step": 5523 }, { "epoch": 0.4475048606610499, "grad_norm": 0.027077723294496536, "learning_rate": 0.00019736261757954905, "loss": 0.3441, "step": 5524 }, { "epoch": 0.4475858716785483, "grad_norm": 0.029595671221613884, "learning_rate": 0.00019735811692695444, "loss": 0.3311, "step": 5525 }, { "epoch": 0.44766688269604665, "grad_norm": 0.02908790111541748, "learning_rate": 0.0001973536162743598, "loss": 0.3529, "step": 5526 }, { "epoch": 0.44774789371354506, "grad_norm": 0.030402570962905884, "learning_rate": 0.00019734911562176516, "loss": 0.3176, "step": 5527 }, { "epoch": 0.4478289047310434, "grad_norm": 0.035793375223875046, "learning_rate": 0.00019734461496917054, "loss": 0.4044, "step": 5528 }, { "epoch": 0.4479099157485418, "grad_norm": 0.03458891436457634, "learning_rate": 0.0001973401143165759, "loss": 0.3557, "step": 5529 }, { "epoch": 0.4479909267660402, "grad_norm": 0.031998954713344574, "learning_rate": 0.0001973356136639813, "loss": 0.3524, "step": 5530 }, { "epoch": 0.44807193778353854, "grad_norm": 0.03016434609889984, "learning_rate": 0.00019733111301138668, "loss": 0.3728, "step": 5531 }, { "epoch": 0.44815294880103695, "grad_norm": 0.042123984545469284, "learning_rate": 0.00019732661235879204, "loss": 0.3771, "step": 5532 }, { "epoch": 0.4482339598185353, "grad_norm": 0.028418881818652153, "learning_rate": 0.0001973221117061974, "loss": 0.3518, "step": 5533 }, { "epoch": 0.4483149708360337, "grad_norm": 0.03634253889322281, "learning_rate": 0.00019731761105360279, "loss": 0.3659, "step": 5534 }, { "epoch": 0.4483959818535321, "grad_norm": 0.033422552049160004, "learning_rate": 0.00019731311040100815, "loss": 0.3763, "step": 5535 }, { "epoch": 0.44847699287103043, "grad_norm": 0.03268659487366676, "learning_rate": 0.00019730860974841353, "loss": 0.3353, "step": 5536 }, { "epoch": 0.44855800388852884, "grad_norm": 0.038906682282686234, "learning_rate": 0.00019730410909581892, "loss": 0.4272, "step": 5537 }, { "epoch": 0.4486390149060272, "grad_norm": 0.03435641527175903, "learning_rate": 0.00019729960844322428, "loss": 0.4166, "step": 5538 }, { "epoch": 0.4487200259235256, "grad_norm": 0.03522764891386032, "learning_rate": 0.00019729510779062964, "loss": 0.3679, "step": 5539 }, { "epoch": 0.44880103694102397, "grad_norm": 0.03225326165556908, "learning_rate": 0.00019729060713803503, "loss": 0.3456, "step": 5540 }, { "epoch": 0.4488820479585224, "grad_norm": 0.029571063816547394, "learning_rate": 0.0001972861064854404, "loss": 0.3183, "step": 5541 }, { "epoch": 0.44896305897602073, "grad_norm": 0.0386439673602581, "learning_rate": 0.00019728160583284577, "loss": 0.438, "step": 5542 }, { "epoch": 0.44904406999351915, "grad_norm": 0.035687077790498734, "learning_rate": 0.00019727710518025116, "loss": 0.3606, "step": 5543 }, { "epoch": 0.4491250810110175, "grad_norm": 0.03311741352081299, "learning_rate": 0.00019727260452765652, "loss": 0.3893, "step": 5544 }, { "epoch": 0.44920609202851586, "grad_norm": 0.0325443334877491, "learning_rate": 0.00019726810387506188, "loss": 0.3824, "step": 5545 }, { "epoch": 0.44928710304601427, "grad_norm": 0.03307313844561577, "learning_rate": 0.00019726360322246727, "loss": 0.3303, "step": 5546 }, { "epoch": 0.4493681140635126, "grad_norm": 0.03364656865596771, "learning_rate": 0.00019725910256987263, "loss": 0.3653, "step": 5547 }, { "epoch": 0.44944912508101104, "grad_norm": 0.03391062840819359, "learning_rate": 0.00019725460191727802, "loss": 0.346, "step": 5548 }, { "epoch": 0.4495301360985094, "grad_norm": 0.03271664306521416, "learning_rate": 0.0001972501012646834, "loss": 0.3862, "step": 5549 }, { "epoch": 0.4496111471160078, "grad_norm": 0.031478166580200195, "learning_rate": 0.00019724560061208876, "loss": 0.3557, "step": 5550 }, { "epoch": 0.44969215813350616, "grad_norm": 0.03432717174291611, "learning_rate": 0.00019724109995949412, "loss": 0.4148, "step": 5551 }, { "epoch": 0.4497731691510045, "grad_norm": 0.031578414142131805, "learning_rate": 0.0001972365993068995, "loss": 0.3852, "step": 5552 }, { "epoch": 0.4498541801685029, "grad_norm": 0.03257657587528229, "learning_rate": 0.00019723209865430487, "loss": 0.3939, "step": 5553 }, { "epoch": 0.4499351911860013, "grad_norm": 0.029145896434783936, "learning_rate": 0.00019722759800171026, "loss": 0.3018, "step": 5554 }, { "epoch": 0.4500162022034997, "grad_norm": 0.030488284304738045, "learning_rate": 0.00019722309734911564, "loss": 0.3805, "step": 5555 }, { "epoch": 0.45009721322099805, "grad_norm": 0.03530951589345932, "learning_rate": 0.000197218596696521, "loss": 0.3785, "step": 5556 }, { "epoch": 0.45017822423849646, "grad_norm": 0.029178744181990623, "learning_rate": 0.00019721409604392636, "loss": 0.3517, "step": 5557 }, { "epoch": 0.4502592352559948, "grad_norm": 0.037996888160705566, "learning_rate": 0.00019720959539133175, "loss": 0.4228, "step": 5558 }, { "epoch": 0.45034024627349317, "grad_norm": 0.03181913122534752, "learning_rate": 0.0001972050947387371, "loss": 0.4349, "step": 5559 }, { "epoch": 0.4504212572909916, "grad_norm": 0.03445330634713173, "learning_rate": 0.0001972005940861425, "loss": 0.4087, "step": 5560 }, { "epoch": 0.45050226830848994, "grad_norm": 0.0322774238884449, "learning_rate": 0.00019719609343354789, "loss": 0.3158, "step": 5561 }, { "epoch": 0.45058327932598835, "grad_norm": 0.03479272872209549, "learning_rate": 0.00019719159278095325, "loss": 0.3869, "step": 5562 }, { "epoch": 0.4506642903434867, "grad_norm": 0.03306402638554573, "learning_rate": 0.0001971870921283586, "loss": 0.3676, "step": 5563 }, { "epoch": 0.4507453013609851, "grad_norm": 0.031178992241621017, "learning_rate": 0.000197182591475764, "loss": 0.3454, "step": 5564 }, { "epoch": 0.4508263123784835, "grad_norm": 0.03359023854136467, "learning_rate": 0.00019717809082316935, "loss": 0.3874, "step": 5565 }, { "epoch": 0.45090732339598183, "grad_norm": 0.0333256796002388, "learning_rate": 0.00019717359017057474, "loss": 0.3842, "step": 5566 }, { "epoch": 0.45098833441348024, "grad_norm": 0.029935095459222794, "learning_rate": 0.00019716908951798013, "loss": 0.3551, "step": 5567 }, { "epoch": 0.4510693454309786, "grad_norm": 0.029074864462018013, "learning_rate": 0.0001971645888653855, "loss": 0.3303, "step": 5568 }, { "epoch": 0.451150356448477, "grad_norm": 0.03021335043013096, "learning_rate": 0.00019716008821279085, "loss": 0.3553, "step": 5569 }, { "epoch": 0.45123136746597536, "grad_norm": 0.036455187946558, "learning_rate": 0.00019715558756019624, "loss": 0.3459, "step": 5570 }, { "epoch": 0.4513123784834738, "grad_norm": 0.0361262671649456, "learning_rate": 0.0001971510869076016, "loss": 0.3783, "step": 5571 }, { "epoch": 0.45139338950097213, "grad_norm": 0.0377480685710907, "learning_rate": 0.00019714658625500698, "loss": 0.3911, "step": 5572 }, { "epoch": 0.4514744005184705, "grad_norm": 0.03418326750397682, "learning_rate": 0.00019714208560241237, "loss": 0.3826, "step": 5573 }, { "epoch": 0.4515554115359689, "grad_norm": 0.035968709737062454, "learning_rate": 0.00019713758494981773, "loss": 0.3411, "step": 5574 }, { "epoch": 0.45163642255346725, "grad_norm": 0.03250642493367195, "learning_rate": 0.0001971330842972231, "loss": 0.4007, "step": 5575 }, { "epoch": 0.45171743357096567, "grad_norm": 0.03724129498004913, "learning_rate": 0.00019712858364462848, "loss": 0.3675, "step": 5576 }, { "epoch": 0.451798444588464, "grad_norm": 0.034993384033441544, "learning_rate": 0.00019712408299203386, "loss": 0.3965, "step": 5577 }, { "epoch": 0.45187945560596243, "grad_norm": 0.03404005616903305, "learning_rate": 0.00019711958233943922, "loss": 0.3469, "step": 5578 }, { "epoch": 0.4519604666234608, "grad_norm": 0.033056486397981644, "learning_rate": 0.0001971150816868446, "loss": 0.3306, "step": 5579 }, { "epoch": 0.45204147764095914, "grad_norm": 0.04045993462204933, "learning_rate": 0.00019711058103424997, "loss": 0.3976, "step": 5580 }, { "epoch": 0.45212248865845756, "grad_norm": 0.03009396232664585, "learning_rate": 0.00019710608038165533, "loss": 0.3595, "step": 5581 }, { "epoch": 0.4522034996759559, "grad_norm": 0.03585132956504822, "learning_rate": 0.00019710157972906072, "loss": 0.3745, "step": 5582 }, { "epoch": 0.4522845106934543, "grad_norm": 0.03336992487311363, "learning_rate": 0.0001970970790764661, "loss": 0.371, "step": 5583 }, { "epoch": 0.4523655217109527, "grad_norm": 0.03080323152244091, "learning_rate": 0.00019709257842387147, "loss": 0.347, "step": 5584 }, { "epoch": 0.4524465327284511, "grad_norm": 0.031140008941292763, "learning_rate": 0.00019708807777127685, "loss": 0.3609, "step": 5585 }, { "epoch": 0.45252754374594945, "grad_norm": 0.029711594805121422, "learning_rate": 0.0001970835771186822, "loss": 0.3359, "step": 5586 }, { "epoch": 0.4526085547634478, "grad_norm": 0.03211309760808945, "learning_rate": 0.0001970790764660876, "loss": 0.3801, "step": 5587 }, { "epoch": 0.4526895657809462, "grad_norm": 0.028747573494911194, "learning_rate": 0.00019707457581349296, "loss": 0.3278, "step": 5588 }, { "epoch": 0.45277057679844457, "grad_norm": 0.02893175184726715, "learning_rate": 0.00019707007516089835, "loss": 0.3294, "step": 5589 }, { "epoch": 0.452851587815943, "grad_norm": 0.030056646093726158, "learning_rate": 0.0001970655745083037, "loss": 0.3809, "step": 5590 }, { "epoch": 0.45293259883344134, "grad_norm": 0.03192294016480446, "learning_rate": 0.0001970610738557091, "loss": 0.3171, "step": 5591 }, { "epoch": 0.45301360985093975, "grad_norm": 0.03785628080368042, "learning_rate": 0.00019705657320311445, "loss": 0.3788, "step": 5592 }, { "epoch": 0.4530946208684381, "grad_norm": 0.032300058752298355, "learning_rate": 0.00019705207255051984, "loss": 0.373, "step": 5593 }, { "epoch": 0.4531756318859365, "grad_norm": 0.03178001567721367, "learning_rate": 0.0001970475718979252, "loss": 0.343, "step": 5594 }, { "epoch": 0.45325664290343487, "grad_norm": 0.028133099898695946, "learning_rate": 0.0001970430712453306, "loss": 0.3536, "step": 5595 }, { "epoch": 0.4533376539209332, "grad_norm": 0.03809162229299545, "learning_rate": 0.00019703857059273595, "loss": 0.352, "step": 5596 }, { "epoch": 0.45341866493843164, "grad_norm": 0.028956517577171326, "learning_rate": 0.00019703406994014134, "loss": 0.3468, "step": 5597 }, { "epoch": 0.45349967595593, "grad_norm": 0.033848222345113754, "learning_rate": 0.0001970295692875467, "loss": 0.3263, "step": 5598 }, { "epoch": 0.4535806869734284, "grad_norm": 0.029715241864323616, "learning_rate": 0.00019702506863495208, "loss": 0.2873, "step": 5599 }, { "epoch": 0.45366169799092676, "grad_norm": 0.037406545132398605, "learning_rate": 0.00019702056798235744, "loss": 0.3849, "step": 5600 }, { "epoch": 0.45374270900842517, "grad_norm": 0.030025694519281387, "learning_rate": 0.00019701606732976283, "loss": 0.313, "step": 5601 }, { "epoch": 0.4538237200259235, "grad_norm": 0.03170083835721016, "learning_rate": 0.0001970115666771682, "loss": 0.4076, "step": 5602 }, { "epoch": 0.4539047310434219, "grad_norm": 0.03326018899679184, "learning_rate": 0.00019700706602457358, "loss": 0.3813, "step": 5603 }, { "epoch": 0.4539857420609203, "grad_norm": 0.029004201292991638, "learning_rate": 0.00019700256537197894, "loss": 0.3126, "step": 5604 }, { "epoch": 0.45406675307841865, "grad_norm": 0.030437350273132324, "learning_rate": 0.00019699806471938432, "loss": 0.3349, "step": 5605 }, { "epoch": 0.45414776409591706, "grad_norm": 0.031204288825392723, "learning_rate": 0.0001969935640667897, "loss": 0.3757, "step": 5606 }, { "epoch": 0.4542287751134154, "grad_norm": 0.03240903466939926, "learning_rate": 0.00019698906341419507, "loss": 0.35, "step": 5607 }, { "epoch": 0.45430978613091383, "grad_norm": 0.03153548762202263, "learning_rate": 0.00019698456276160043, "loss": 0.3715, "step": 5608 }, { "epoch": 0.4543907971484122, "grad_norm": 0.028369799256324768, "learning_rate": 0.00019698006210900582, "loss": 0.3137, "step": 5609 }, { "epoch": 0.45447180816591054, "grad_norm": 0.03753526136279106, "learning_rate": 0.00019697556145641118, "loss": 0.4511, "step": 5610 }, { "epoch": 0.45455281918340895, "grad_norm": 0.03225935623049736, "learning_rate": 0.00019697106080381657, "loss": 0.3959, "step": 5611 }, { "epoch": 0.4546338302009073, "grad_norm": 0.03187123313546181, "learning_rate": 0.00019696656015122195, "loss": 0.3559, "step": 5612 }, { "epoch": 0.4547148412184057, "grad_norm": 0.031548816710710526, "learning_rate": 0.00019696205949862731, "loss": 0.3414, "step": 5613 }, { "epoch": 0.4547958522359041, "grad_norm": 0.027448706328868866, "learning_rate": 0.00019695755884603267, "loss": 0.3235, "step": 5614 }, { "epoch": 0.4548768632534025, "grad_norm": 0.03475405275821686, "learning_rate": 0.00019695305819343806, "loss": 0.3632, "step": 5615 }, { "epoch": 0.45495787427090084, "grad_norm": 0.03420334309339523, "learning_rate": 0.00019694855754084342, "loss": 0.3965, "step": 5616 }, { "epoch": 0.4550388852883992, "grad_norm": 0.03267541527748108, "learning_rate": 0.0001969440568882488, "loss": 0.3534, "step": 5617 }, { "epoch": 0.4551198963058976, "grad_norm": 0.03327197954058647, "learning_rate": 0.0001969395562356542, "loss": 0.3453, "step": 5618 }, { "epoch": 0.45520090732339596, "grad_norm": 0.03551128879189491, "learning_rate": 0.00019693505558305956, "loss": 0.3759, "step": 5619 }, { "epoch": 0.4552819183408944, "grad_norm": 0.03346748650074005, "learning_rate": 0.00019693055493046492, "loss": 0.3887, "step": 5620 }, { "epoch": 0.45536292935839273, "grad_norm": 0.03371153026819229, "learning_rate": 0.0001969260542778703, "loss": 0.3671, "step": 5621 }, { "epoch": 0.45544394037589114, "grad_norm": 0.030512280762195587, "learning_rate": 0.00019692155362527566, "loss": 0.3943, "step": 5622 }, { "epoch": 0.4555249513933895, "grad_norm": 0.033492594957351685, "learning_rate": 0.00019691705297268105, "loss": 0.3823, "step": 5623 }, { "epoch": 0.45560596241088785, "grad_norm": 0.033078644424676895, "learning_rate": 0.00019691255232008644, "loss": 0.3974, "step": 5624 }, { "epoch": 0.45568697342838627, "grad_norm": 0.036155492067337036, "learning_rate": 0.0001969080516674918, "loss": 0.3953, "step": 5625 }, { "epoch": 0.4557679844458846, "grad_norm": 0.03399474173784256, "learning_rate": 0.00019690355101489716, "loss": 0.4092, "step": 5626 }, { "epoch": 0.45584899546338303, "grad_norm": 0.03158540651202202, "learning_rate": 0.00019689905036230254, "loss": 0.385, "step": 5627 }, { "epoch": 0.4559300064808814, "grad_norm": 0.03434424847364426, "learning_rate": 0.0001968945497097079, "loss": 0.3455, "step": 5628 }, { "epoch": 0.4560110174983798, "grad_norm": 0.033775173127651215, "learning_rate": 0.0001968900490571133, "loss": 0.3619, "step": 5629 }, { "epoch": 0.45609202851587816, "grad_norm": 0.0323668047785759, "learning_rate": 0.00019688554840451868, "loss": 0.3301, "step": 5630 }, { "epoch": 0.4561730395333765, "grad_norm": 0.031544625759124756, "learning_rate": 0.00019688104775192404, "loss": 0.3736, "step": 5631 }, { "epoch": 0.4562540505508749, "grad_norm": 0.03242504969239235, "learning_rate": 0.0001968765470993294, "loss": 0.3973, "step": 5632 }, { "epoch": 0.4563350615683733, "grad_norm": 0.03165287524461746, "learning_rate": 0.00019687204644673479, "loss": 0.3867, "step": 5633 }, { "epoch": 0.4564160725858717, "grad_norm": 0.030601616948843002, "learning_rate": 0.00019686754579414015, "loss": 0.3521, "step": 5634 }, { "epoch": 0.45649708360337005, "grad_norm": 0.029937176033854485, "learning_rate": 0.00019686304514154553, "loss": 0.4054, "step": 5635 }, { "epoch": 0.45657809462086846, "grad_norm": 0.030966006219387054, "learning_rate": 0.00019685854448895092, "loss": 0.332, "step": 5636 }, { "epoch": 0.4566591056383668, "grad_norm": 0.03530475124716759, "learning_rate": 0.00019685404383635628, "loss": 0.3658, "step": 5637 }, { "epoch": 0.4567401166558652, "grad_norm": 0.02700497955083847, "learning_rate": 0.00019684954318376164, "loss": 0.3259, "step": 5638 }, { "epoch": 0.4568211276733636, "grad_norm": 0.03353603556752205, "learning_rate": 0.00019684504253116703, "loss": 0.3527, "step": 5639 }, { "epoch": 0.45690213869086194, "grad_norm": 0.029281500726938248, "learning_rate": 0.0001968405418785724, "loss": 0.353, "step": 5640 }, { "epoch": 0.45698314970836035, "grad_norm": 0.032181404531002045, "learning_rate": 0.00019683604122597777, "loss": 0.379, "step": 5641 }, { "epoch": 0.4570641607258587, "grad_norm": 0.03690697252750397, "learning_rate": 0.00019683154057338316, "loss": 0.4451, "step": 5642 }, { "epoch": 0.4571451717433571, "grad_norm": 0.029781976714730263, "learning_rate": 0.00019682703992078852, "loss": 0.3974, "step": 5643 }, { "epoch": 0.45722618276085547, "grad_norm": 0.03640514239668846, "learning_rate": 0.00019682253926819388, "loss": 0.3696, "step": 5644 }, { "epoch": 0.4573071937783539, "grad_norm": 0.03272904083132744, "learning_rate": 0.00019681803861559927, "loss": 0.3926, "step": 5645 }, { "epoch": 0.45738820479585224, "grad_norm": 0.026668280363082886, "learning_rate": 0.00019681353796300463, "loss": 0.3005, "step": 5646 }, { "epoch": 0.4574692158133506, "grad_norm": 0.028446605429053307, "learning_rate": 0.00019680903731041002, "loss": 0.3242, "step": 5647 }, { "epoch": 0.457550226830849, "grad_norm": 0.028360169380903244, "learning_rate": 0.0001968045366578154, "loss": 0.3312, "step": 5648 }, { "epoch": 0.45763123784834736, "grad_norm": 0.02998286299407482, "learning_rate": 0.00019680003600522076, "loss": 0.3738, "step": 5649 }, { "epoch": 0.45771224886584577, "grad_norm": 0.029424121603369713, "learning_rate": 0.00019679553535262612, "loss": 0.3192, "step": 5650 }, { "epoch": 0.45779325988334413, "grad_norm": 0.03551146760582924, "learning_rate": 0.0001967910347000315, "loss": 0.3893, "step": 5651 }, { "epoch": 0.45787427090084254, "grad_norm": 0.027763670310378075, "learning_rate": 0.00019678653404743687, "loss": 0.3126, "step": 5652 }, { "epoch": 0.4579552819183409, "grad_norm": 0.030606647953391075, "learning_rate": 0.00019678203339484226, "loss": 0.3541, "step": 5653 }, { "epoch": 0.45803629293583925, "grad_norm": 0.029099253937602043, "learning_rate": 0.00019677753274224765, "loss": 0.3799, "step": 5654 }, { "epoch": 0.45811730395333766, "grad_norm": 0.030575638636946678, "learning_rate": 0.000196773032089653, "loss": 0.3879, "step": 5655 }, { "epoch": 0.458198314970836, "grad_norm": 0.027737515047192574, "learning_rate": 0.0001967685314370584, "loss": 0.3454, "step": 5656 }, { "epoch": 0.45827932598833443, "grad_norm": 0.03216322883963585, "learning_rate": 0.00019676403078446375, "loss": 0.4085, "step": 5657 }, { "epoch": 0.4583603370058328, "grad_norm": 0.032551418989896774, "learning_rate": 0.00019675953013186914, "loss": 0.3722, "step": 5658 }, { "epoch": 0.4584413480233312, "grad_norm": 0.03434035927057266, "learning_rate": 0.0001967550294792745, "loss": 0.3455, "step": 5659 }, { "epoch": 0.45852235904082955, "grad_norm": 0.03802090883255005, "learning_rate": 0.0001967505288266799, "loss": 0.4339, "step": 5660 }, { "epoch": 0.4586033700583279, "grad_norm": 0.03537151589989662, "learning_rate": 0.00019674602817408525, "loss": 0.3903, "step": 5661 }, { "epoch": 0.4586843810758263, "grad_norm": 0.03610273450613022, "learning_rate": 0.00019674152752149063, "loss": 0.3985, "step": 5662 }, { "epoch": 0.4587653920933247, "grad_norm": 0.0331452339887619, "learning_rate": 0.000196737026868896, "loss": 0.2959, "step": 5663 }, { "epoch": 0.4588464031108231, "grad_norm": 0.037477314472198486, "learning_rate": 0.00019673252621630138, "loss": 0.3811, "step": 5664 }, { "epoch": 0.45892741412832144, "grad_norm": 0.037166204303503036, "learning_rate": 0.00019672802556370674, "loss": 0.3617, "step": 5665 }, { "epoch": 0.45900842514581985, "grad_norm": 0.03388550877571106, "learning_rate": 0.00019672352491111213, "loss": 0.3591, "step": 5666 }, { "epoch": 0.4590894361633182, "grad_norm": 0.029943333938717842, "learning_rate": 0.0001967190242585175, "loss": 0.3568, "step": 5667 }, { "epoch": 0.45917044718081657, "grad_norm": 0.031574398279190063, "learning_rate": 0.00019671452360592288, "loss": 0.3161, "step": 5668 }, { "epoch": 0.459251458198315, "grad_norm": 0.032210998237133026, "learning_rate": 0.00019671002295332824, "loss": 0.4043, "step": 5669 }, { "epoch": 0.45933246921581333, "grad_norm": 0.0306962039321661, "learning_rate": 0.00019670552230073362, "loss": 0.3695, "step": 5670 }, { "epoch": 0.45941348023331174, "grad_norm": 0.03281569853425026, "learning_rate": 0.00019670102164813898, "loss": 0.3459, "step": 5671 }, { "epoch": 0.4594944912508101, "grad_norm": 0.03295455873012543, "learning_rate": 0.00019669652099554437, "loss": 0.3773, "step": 5672 }, { "epoch": 0.4595755022683085, "grad_norm": 0.03259165585041046, "learning_rate": 0.00019669202034294973, "loss": 0.365, "step": 5673 }, { "epoch": 0.45965651328580687, "grad_norm": 0.03317999839782715, "learning_rate": 0.00019668751969035512, "loss": 0.3842, "step": 5674 }, { "epoch": 0.4597375243033052, "grad_norm": 0.028184987604618073, "learning_rate": 0.00019668301903776048, "loss": 0.3677, "step": 5675 }, { "epoch": 0.45981853532080363, "grad_norm": 0.03515704721212387, "learning_rate": 0.00019667851838516586, "loss": 0.3957, "step": 5676 }, { "epoch": 0.459899546338302, "grad_norm": 0.03550994396209717, "learning_rate": 0.00019667401773257122, "loss": 0.3295, "step": 5677 }, { "epoch": 0.4599805573558004, "grad_norm": 0.034394558519124985, "learning_rate": 0.0001966695170799766, "loss": 0.363, "step": 5678 }, { "epoch": 0.46006156837329876, "grad_norm": 0.02899022586643696, "learning_rate": 0.00019666501642738197, "loss": 0.3353, "step": 5679 }, { "epoch": 0.46014257939079717, "grad_norm": 0.029689837247133255, "learning_rate": 0.00019666051577478736, "loss": 0.3633, "step": 5680 }, { "epoch": 0.4602235904082955, "grad_norm": 0.030063379555940628, "learning_rate": 0.00019665601512219275, "loss": 0.3576, "step": 5681 }, { "epoch": 0.46030460142579394, "grad_norm": 0.028926603496074677, "learning_rate": 0.0001966515144695981, "loss": 0.374, "step": 5682 }, { "epoch": 0.4603856124432923, "grad_norm": 0.02919875644147396, "learning_rate": 0.00019664701381700347, "loss": 0.3595, "step": 5683 }, { "epoch": 0.46046662346079065, "grad_norm": 0.03260861337184906, "learning_rate": 0.00019664251316440885, "loss": 0.3483, "step": 5684 }, { "epoch": 0.46054763447828906, "grad_norm": 0.03058563731610775, "learning_rate": 0.0001966380125118142, "loss": 0.3436, "step": 5685 }, { "epoch": 0.4606286454957874, "grad_norm": 0.034815624356269836, "learning_rate": 0.0001966335118592196, "loss": 0.3441, "step": 5686 }, { "epoch": 0.4607096565132858, "grad_norm": 0.030256694182753563, "learning_rate": 0.000196629011206625, "loss": 0.3332, "step": 5687 }, { "epoch": 0.4607906675307842, "grad_norm": 0.0370088554918766, "learning_rate": 0.00019662451055403035, "loss": 0.4123, "step": 5688 }, { "epoch": 0.4608716785482826, "grad_norm": 0.0312041062861681, "learning_rate": 0.0001966200099014357, "loss": 0.3864, "step": 5689 }, { "epoch": 0.46095268956578095, "grad_norm": 0.03293803706765175, "learning_rate": 0.0001966155092488411, "loss": 0.3534, "step": 5690 }, { "epoch": 0.4610337005832793, "grad_norm": 0.03456052392721176, "learning_rate": 0.00019661100859624645, "loss": 0.3744, "step": 5691 }, { "epoch": 0.4611147116007777, "grad_norm": 0.027828998863697052, "learning_rate": 0.00019660650794365184, "loss": 0.3319, "step": 5692 }, { "epoch": 0.46119572261827607, "grad_norm": 0.0315227285027504, "learning_rate": 0.00019660200729105723, "loss": 0.3482, "step": 5693 }, { "epoch": 0.4612767336357745, "grad_norm": 0.032845206558704376, "learning_rate": 0.0001965975066384626, "loss": 0.3641, "step": 5694 }, { "epoch": 0.46135774465327284, "grad_norm": 0.036846909672021866, "learning_rate": 0.00019659300598586795, "loss": 0.4032, "step": 5695 }, { "epoch": 0.46143875567077125, "grad_norm": 0.03248875215649605, "learning_rate": 0.00019658850533327334, "loss": 0.3364, "step": 5696 }, { "epoch": 0.4615197666882696, "grad_norm": 0.029342476278543472, "learning_rate": 0.0001965840046806787, "loss": 0.3364, "step": 5697 }, { "epoch": 0.46160077770576796, "grad_norm": 0.03533339127898216, "learning_rate": 0.00019657950402808408, "loss": 0.3959, "step": 5698 }, { "epoch": 0.4616817887232664, "grad_norm": 0.03511074557900429, "learning_rate": 0.00019657500337548947, "loss": 0.3542, "step": 5699 }, { "epoch": 0.46176279974076473, "grad_norm": 0.03128790482878685, "learning_rate": 0.00019657050272289483, "loss": 0.3457, "step": 5700 }, { "epoch": 0.46184381075826314, "grad_norm": 0.03102973848581314, "learning_rate": 0.0001965660020703002, "loss": 0.3221, "step": 5701 }, { "epoch": 0.4619248217757615, "grad_norm": 0.03910433501005173, "learning_rate": 0.00019656150141770558, "loss": 0.3338, "step": 5702 }, { "epoch": 0.4620058327932599, "grad_norm": 0.030722856521606445, "learning_rate": 0.00019655700076511094, "loss": 0.3936, "step": 5703 }, { "epoch": 0.46208684381075826, "grad_norm": 0.0336427241563797, "learning_rate": 0.00019655250011251633, "loss": 0.3546, "step": 5704 }, { "epoch": 0.4621678548282566, "grad_norm": 0.033403877168893814, "learning_rate": 0.0001965479994599217, "loss": 0.3971, "step": 5705 }, { "epoch": 0.46224886584575503, "grad_norm": 0.03144184499979019, "learning_rate": 0.00019654349880732707, "loss": 0.3449, "step": 5706 }, { "epoch": 0.4623298768632534, "grad_norm": 0.03269330784678459, "learning_rate": 0.00019653899815473243, "loss": 0.3501, "step": 5707 }, { "epoch": 0.4624108878807518, "grad_norm": 0.030189063400030136, "learning_rate": 0.00019653449750213782, "loss": 0.3574, "step": 5708 }, { "epoch": 0.46249189889825015, "grad_norm": 0.03226552903652191, "learning_rate": 0.00019652999684954318, "loss": 0.3588, "step": 5709 }, { "epoch": 0.46257290991574856, "grad_norm": 0.03771393373608589, "learning_rate": 0.00019652549619694857, "loss": 0.375, "step": 5710 }, { "epoch": 0.4626539209332469, "grad_norm": 0.028019536286592484, "learning_rate": 0.00019652099554435395, "loss": 0.3059, "step": 5711 }, { "epoch": 0.4627349319507453, "grad_norm": 0.03134125843644142, "learning_rate": 0.00019651649489175931, "loss": 0.3717, "step": 5712 }, { "epoch": 0.4628159429682437, "grad_norm": 0.031846798956394196, "learning_rate": 0.00019651199423916467, "loss": 0.3623, "step": 5713 }, { "epoch": 0.46289695398574204, "grad_norm": 0.030864115804433823, "learning_rate": 0.00019650749358657006, "loss": 0.3826, "step": 5714 }, { "epoch": 0.46297796500324045, "grad_norm": 0.0309914480894804, "learning_rate": 0.00019650299293397542, "loss": 0.3777, "step": 5715 }, { "epoch": 0.4630589760207388, "grad_norm": 0.05822967737913132, "learning_rate": 0.0001964984922813808, "loss": 0.4019, "step": 5716 }, { "epoch": 0.4631399870382372, "grad_norm": 0.034782227128744125, "learning_rate": 0.0001964939916287862, "loss": 0.3833, "step": 5717 }, { "epoch": 0.4632209980557356, "grad_norm": 0.0274979081004858, "learning_rate": 0.00019648949097619156, "loss": 0.3377, "step": 5718 }, { "epoch": 0.46330200907323393, "grad_norm": 0.04161456227302551, "learning_rate": 0.00019648499032359692, "loss": 0.3934, "step": 5719 }, { "epoch": 0.46338302009073234, "grad_norm": 0.028479766100645065, "learning_rate": 0.0001964804896710023, "loss": 0.3514, "step": 5720 }, { "epoch": 0.4634640311082307, "grad_norm": 0.03187427669763565, "learning_rate": 0.00019647598901840766, "loss": 0.3481, "step": 5721 }, { "epoch": 0.4635450421257291, "grad_norm": 0.030612220987677574, "learning_rate": 0.00019647148836581305, "loss": 0.3584, "step": 5722 }, { "epoch": 0.46362605314322747, "grad_norm": 0.0349239706993103, "learning_rate": 0.00019646698771321844, "loss": 0.3635, "step": 5723 }, { "epoch": 0.4637070641607259, "grad_norm": 0.03317857161164284, "learning_rate": 0.0001964624870606238, "loss": 0.4113, "step": 5724 }, { "epoch": 0.46378807517822424, "grad_norm": 0.030748484656214714, "learning_rate": 0.00019645798640802918, "loss": 0.3788, "step": 5725 }, { "epoch": 0.4638690861957226, "grad_norm": 0.028064358979463577, "learning_rate": 0.00019645348575543454, "loss": 0.3366, "step": 5726 }, { "epoch": 0.463950097213221, "grad_norm": 0.0314946211874485, "learning_rate": 0.0001964489851028399, "loss": 0.3536, "step": 5727 }, { "epoch": 0.46403110823071936, "grad_norm": 0.03007393144071102, "learning_rate": 0.0001964444844502453, "loss": 0.3389, "step": 5728 }, { "epoch": 0.46411211924821777, "grad_norm": 0.031874869018793106, "learning_rate": 0.00019643998379765068, "loss": 0.3696, "step": 5729 }, { "epoch": 0.4641931302657161, "grad_norm": 0.033363260328769684, "learning_rate": 0.00019643548314505604, "loss": 0.3965, "step": 5730 }, { "epoch": 0.46427414128321454, "grad_norm": 0.03128187730908394, "learning_rate": 0.00019643098249246143, "loss": 0.3264, "step": 5731 }, { "epoch": 0.4643551523007129, "grad_norm": 0.03325207158923149, "learning_rate": 0.00019642648183986679, "loss": 0.3524, "step": 5732 }, { "epoch": 0.4644361633182113, "grad_norm": 0.02750324457883835, "learning_rate": 0.00019642198118727217, "loss": 0.3199, "step": 5733 }, { "epoch": 0.46451717433570966, "grad_norm": 0.03271065652370453, "learning_rate": 0.00019641748053467753, "loss": 0.3883, "step": 5734 }, { "epoch": 0.464598185353208, "grad_norm": 0.03460625186562538, "learning_rate": 0.00019641297988208292, "loss": 0.3595, "step": 5735 }, { "epoch": 0.4646791963707064, "grad_norm": 0.03765340894460678, "learning_rate": 0.00019640847922948828, "loss": 0.3955, "step": 5736 }, { "epoch": 0.4647602073882048, "grad_norm": 0.03118179738521576, "learning_rate": 0.00019640397857689367, "loss": 0.3503, "step": 5737 }, { "epoch": 0.4648412184057032, "grad_norm": 0.030739959329366684, "learning_rate": 0.00019639947792429903, "loss": 0.3483, "step": 5738 }, { "epoch": 0.46492222942320155, "grad_norm": 0.03348294273018837, "learning_rate": 0.00019639497727170441, "loss": 0.3952, "step": 5739 }, { "epoch": 0.46500324044069996, "grad_norm": 0.03235217183828354, "learning_rate": 0.00019639047661910977, "loss": 0.4024, "step": 5740 }, { "epoch": 0.4650842514581983, "grad_norm": 0.03641180694103241, "learning_rate": 0.00019638597596651516, "loss": 0.3705, "step": 5741 }, { "epoch": 0.4651652624756967, "grad_norm": 0.029355797916650772, "learning_rate": 0.00019638147531392052, "loss": 0.3022, "step": 5742 }, { "epoch": 0.4652462734931951, "grad_norm": 0.033131200820207596, "learning_rate": 0.0001963769746613259, "loss": 0.3295, "step": 5743 }, { "epoch": 0.46532728451069344, "grad_norm": 0.0363098680973053, "learning_rate": 0.00019637247400873127, "loss": 0.4094, "step": 5744 }, { "epoch": 0.46540829552819185, "grad_norm": 0.03575873747467995, "learning_rate": 0.00019636797335613666, "loss": 0.3672, "step": 5745 }, { "epoch": 0.4654893065456902, "grad_norm": 0.030971111729741096, "learning_rate": 0.00019636347270354202, "loss": 0.3718, "step": 5746 }, { "epoch": 0.4655703175631886, "grad_norm": 0.030273951590061188, "learning_rate": 0.0001963589720509474, "loss": 0.3134, "step": 5747 }, { "epoch": 0.465651328580687, "grad_norm": 0.03387882933020592, "learning_rate": 0.00019635447139835276, "loss": 0.3703, "step": 5748 }, { "epoch": 0.46573233959818533, "grad_norm": 0.03747584670782089, "learning_rate": 0.00019634997074575815, "loss": 0.4081, "step": 5749 }, { "epoch": 0.46581335061568374, "grad_norm": 0.030774252489209175, "learning_rate": 0.0001963454700931635, "loss": 0.3737, "step": 5750 }, { "epoch": 0.4658943616331821, "grad_norm": 0.03460761159658432, "learning_rate": 0.0001963409694405689, "loss": 0.3649, "step": 5751 }, { "epoch": 0.4659753726506805, "grad_norm": 0.0365169532597065, "learning_rate": 0.00019633646878797426, "loss": 0.3755, "step": 5752 }, { "epoch": 0.46605638366817886, "grad_norm": 0.03159398213028908, "learning_rate": 0.00019633196813537965, "loss": 0.3296, "step": 5753 }, { "epoch": 0.4661373946856773, "grad_norm": 0.03654010221362114, "learning_rate": 0.000196327467482785, "loss": 0.3659, "step": 5754 }, { "epoch": 0.46621840570317563, "grad_norm": 0.030611230060458183, "learning_rate": 0.0001963229668301904, "loss": 0.3642, "step": 5755 }, { "epoch": 0.466299416720674, "grad_norm": 0.031239870935678482, "learning_rate": 0.00019631846617759575, "loss": 0.3329, "step": 5756 }, { "epoch": 0.4663804277381724, "grad_norm": 0.03149476647377014, "learning_rate": 0.00019631396552500114, "loss": 0.3521, "step": 5757 }, { "epoch": 0.46646143875567075, "grad_norm": 0.03362254425883293, "learning_rate": 0.0001963094648724065, "loss": 0.3426, "step": 5758 }, { "epoch": 0.46654244977316917, "grad_norm": 0.03327491879463196, "learning_rate": 0.0001963049642198119, "loss": 0.3881, "step": 5759 }, { "epoch": 0.4666234607906675, "grad_norm": 0.03225428983569145, "learning_rate": 0.00019630046356721725, "loss": 0.3441, "step": 5760 }, { "epoch": 0.46670447180816593, "grad_norm": 0.031021784991025925, "learning_rate": 0.00019629596291462263, "loss": 0.3446, "step": 5761 }, { "epoch": 0.4667854828256643, "grad_norm": 0.02941407449543476, "learning_rate": 0.00019629146226202802, "loss": 0.3714, "step": 5762 }, { "epoch": 0.46686649384316264, "grad_norm": 0.02663205750286579, "learning_rate": 0.00019628696160943338, "loss": 0.3546, "step": 5763 }, { "epoch": 0.46694750486066106, "grad_norm": 0.032318115234375, "learning_rate": 0.00019628246095683874, "loss": 0.3536, "step": 5764 }, { "epoch": 0.4670285158781594, "grad_norm": 0.029012607410550117, "learning_rate": 0.00019627796030424413, "loss": 0.3302, "step": 5765 }, { "epoch": 0.4671095268956578, "grad_norm": 0.03587726131081581, "learning_rate": 0.0001962734596516495, "loss": 0.4192, "step": 5766 }, { "epoch": 0.4671905379131562, "grad_norm": 0.03222670033574104, "learning_rate": 0.00019626895899905488, "loss": 0.3345, "step": 5767 }, { "epoch": 0.4672715489306546, "grad_norm": 0.03298739716410637, "learning_rate": 0.00019626445834646026, "loss": 0.3644, "step": 5768 }, { "epoch": 0.46735255994815295, "grad_norm": 0.030183693394064903, "learning_rate": 0.00019625995769386562, "loss": 0.3478, "step": 5769 }, { "epoch": 0.4674335709656513, "grad_norm": 0.03015521913766861, "learning_rate": 0.00019625545704127098, "loss": 0.3734, "step": 5770 }, { "epoch": 0.4675145819831497, "grad_norm": 0.033700130879879, "learning_rate": 0.00019625095638867637, "loss": 0.3917, "step": 5771 }, { "epoch": 0.46759559300064807, "grad_norm": 0.034100547432899475, "learning_rate": 0.00019624645573608173, "loss": 0.3633, "step": 5772 }, { "epoch": 0.4676766040181465, "grad_norm": 0.028375964611768723, "learning_rate": 0.00019624195508348712, "loss": 0.3957, "step": 5773 }, { "epoch": 0.46775761503564484, "grad_norm": 0.03054911084473133, "learning_rate": 0.0001962374544308925, "loss": 0.36, "step": 5774 }, { "epoch": 0.46783862605314325, "grad_norm": 0.029022803530097008, "learning_rate": 0.00019623295377829786, "loss": 0.3553, "step": 5775 }, { "epoch": 0.4679196370706416, "grad_norm": 0.029407450929284096, "learning_rate": 0.00019622845312570322, "loss": 0.3464, "step": 5776 }, { "epoch": 0.46800064808814, "grad_norm": 0.029288267716765404, "learning_rate": 0.0001962239524731086, "loss": 0.3328, "step": 5777 }, { "epoch": 0.46808165910563837, "grad_norm": 0.028497010469436646, "learning_rate": 0.00019621945182051397, "loss": 0.3166, "step": 5778 }, { "epoch": 0.4681626701231367, "grad_norm": 0.03194279596209526, "learning_rate": 0.00019621495116791936, "loss": 0.3442, "step": 5779 }, { "epoch": 0.46824368114063514, "grad_norm": 0.026724031195044518, "learning_rate": 0.00019621045051532475, "loss": 0.3245, "step": 5780 }, { "epoch": 0.4683246921581335, "grad_norm": 0.03415639325976372, "learning_rate": 0.0001962059498627301, "loss": 0.35, "step": 5781 }, { "epoch": 0.4684057031756319, "grad_norm": 0.03118039481341839, "learning_rate": 0.00019620144921013547, "loss": 0.3783, "step": 5782 }, { "epoch": 0.46848671419313026, "grad_norm": 0.035469118505716324, "learning_rate": 0.00019619694855754085, "loss": 0.3465, "step": 5783 }, { "epoch": 0.46856772521062867, "grad_norm": 0.03357202932238579, "learning_rate": 0.0001961924479049462, "loss": 0.4097, "step": 5784 }, { "epoch": 0.468648736228127, "grad_norm": 0.03250733017921448, "learning_rate": 0.0001961879472523516, "loss": 0.3894, "step": 5785 }, { "epoch": 0.4687297472456254, "grad_norm": 0.03183455392718315, "learning_rate": 0.000196183446599757, "loss": 0.3643, "step": 5786 }, { "epoch": 0.4688107582631238, "grad_norm": 0.035170722752809525, "learning_rate": 0.00019617894594716235, "loss": 0.339, "step": 5787 }, { "epoch": 0.46889176928062215, "grad_norm": 0.034206192940473557, "learning_rate": 0.0001961744452945677, "loss": 0.4072, "step": 5788 }, { "epoch": 0.46897278029812056, "grad_norm": 0.036689676344394684, "learning_rate": 0.0001961699446419731, "loss": 0.4151, "step": 5789 }, { "epoch": 0.4690537913156189, "grad_norm": 0.031407617032527924, "learning_rate": 0.00019616544398937846, "loss": 0.3366, "step": 5790 }, { "epoch": 0.46913480233311733, "grad_norm": 0.03071746416389942, "learning_rate": 0.00019616094333678384, "loss": 0.3637, "step": 5791 }, { "epoch": 0.4692158133506157, "grad_norm": 0.030194265767931938, "learning_rate": 0.00019615644268418923, "loss": 0.3516, "step": 5792 }, { "epoch": 0.46929682436811404, "grad_norm": 0.02895018830895424, "learning_rate": 0.0001961519420315946, "loss": 0.3574, "step": 5793 }, { "epoch": 0.46937783538561245, "grad_norm": 0.03663880005478859, "learning_rate": 0.00019614744137899998, "loss": 0.3274, "step": 5794 }, { "epoch": 0.4694588464031108, "grad_norm": 0.03252609446644783, "learning_rate": 0.00019614294072640534, "loss": 0.33, "step": 5795 }, { "epoch": 0.4695398574206092, "grad_norm": 0.035485222935676575, "learning_rate": 0.0001961384400738107, "loss": 0.3711, "step": 5796 }, { "epoch": 0.4696208684381076, "grad_norm": 0.0342998169362545, "learning_rate": 0.00019613393942121608, "loss": 0.3839, "step": 5797 }, { "epoch": 0.469701879455606, "grad_norm": 0.032619740813970566, "learning_rate": 0.00019612943876862147, "loss": 0.3822, "step": 5798 }, { "epoch": 0.46978289047310434, "grad_norm": 0.03156382218003273, "learning_rate": 0.00019612493811602683, "loss": 0.3753, "step": 5799 }, { "epoch": 0.4698639014906027, "grad_norm": 0.02982146479189396, "learning_rate": 0.00019612043746343222, "loss": 0.3427, "step": 5800 }, { "epoch": 0.4699449125081011, "grad_norm": 0.03202043101191521, "learning_rate": 0.00019611593681083758, "loss": 0.3244, "step": 5801 }, { "epoch": 0.47002592352559946, "grad_norm": 0.031166845932602882, "learning_rate": 0.00019611143615824294, "loss": 0.3527, "step": 5802 }, { "epoch": 0.4701069345430979, "grad_norm": 0.028590986505150795, "learning_rate": 0.00019610693550564833, "loss": 0.3551, "step": 5803 }, { "epoch": 0.47018794556059623, "grad_norm": 0.03039284236729145, "learning_rate": 0.0001961024348530537, "loss": 0.3772, "step": 5804 }, { "epoch": 0.47026895657809464, "grad_norm": 0.03211115673184395, "learning_rate": 0.00019609793420045907, "loss": 0.3392, "step": 5805 }, { "epoch": 0.470349967595593, "grad_norm": 0.03226887807250023, "learning_rate": 0.00019609343354786446, "loss": 0.3898, "step": 5806 }, { "epoch": 0.47043097861309136, "grad_norm": 0.03120870143175125, "learning_rate": 0.00019608893289526982, "loss": 0.3335, "step": 5807 }, { "epoch": 0.47051198963058977, "grad_norm": 0.03307618573307991, "learning_rate": 0.00019608443224267518, "loss": 0.4147, "step": 5808 }, { "epoch": 0.4705930006480881, "grad_norm": 0.029730724170804024, "learning_rate": 0.00019607993159008057, "loss": 0.3283, "step": 5809 }, { "epoch": 0.47067401166558653, "grad_norm": 0.030490990728139877, "learning_rate": 0.00019607543093748595, "loss": 0.3674, "step": 5810 }, { "epoch": 0.4707550226830849, "grad_norm": 0.03562087193131447, "learning_rate": 0.00019607093028489131, "loss": 0.3826, "step": 5811 }, { "epoch": 0.4708360337005833, "grad_norm": 0.033215828239917755, "learning_rate": 0.0001960664296322967, "loss": 0.3294, "step": 5812 }, { "epoch": 0.47091704471808166, "grad_norm": 0.030647827312350273, "learning_rate": 0.00019606192897970206, "loss": 0.3758, "step": 5813 }, { "epoch": 0.47099805573558, "grad_norm": 0.0350574292242527, "learning_rate": 0.00019605742832710745, "loss": 0.3849, "step": 5814 }, { "epoch": 0.4710790667530784, "grad_norm": 0.03518437221646309, "learning_rate": 0.0001960529276745128, "loss": 0.3768, "step": 5815 }, { "epoch": 0.4711600777705768, "grad_norm": 0.03374134749174118, "learning_rate": 0.0001960484270219182, "loss": 0.3908, "step": 5816 }, { "epoch": 0.4712410887880752, "grad_norm": 0.02921205572783947, "learning_rate": 0.00019604392636932356, "loss": 0.3264, "step": 5817 }, { "epoch": 0.47132209980557355, "grad_norm": 0.03081241063773632, "learning_rate": 0.00019603942571672894, "loss": 0.3214, "step": 5818 }, { "epoch": 0.47140311082307196, "grad_norm": 0.0298855472356081, "learning_rate": 0.0001960349250641343, "loss": 0.3989, "step": 5819 }, { "epoch": 0.4714841218405703, "grad_norm": 0.03593145310878754, "learning_rate": 0.0001960304244115397, "loss": 0.3994, "step": 5820 }, { "epoch": 0.47156513285806867, "grad_norm": 0.03263883292675018, "learning_rate": 0.00019602592375894505, "loss": 0.4075, "step": 5821 }, { "epoch": 0.4716461438755671, "grad_norm": 0.030939802527427673, "learning_rate": 0.00019602142310635044, "loss": 0.3339, "step": 5822 }, { "epoch": 0.47172715489306544, "grad_norm": 0.032803893089294434, "learning_rate": 0.0001960169224537558, "loss": 0.4188, "step": 5823 }, { "epoch": 0.47180816591056385, "grad_norm": 0.03152771666646004, "learning_rate": 0.00019601242180116118, "loss": 0.3906, "step": 5824 }, { "epoch": 0.4718891769280622, "grad_norm": 0.030248772352933884, "learning_rate": 0.00019600792114856654, "loss": 0.3348, "step": 5825 }, { "epoch": 0.4719701879455606, "grad_norm": 0.03298931196331978, "learning_rate": 0.00019600342049597193, "loss": 0.3274, "step": 5826 }, { "epoch": 0.47205119896305897, "grad_norm": 0.030176391825079918, "learning_rate": 0.0001959989198433773, "loss": 0.3597, "step": 5827 }, { "epoch": 0.4721322099805574, "grad_norm": 0.03578178584575653, "learning_rate": 0.00019599441919078268, "loss": 0.3713, "step": 5828 }, { "epoch": 0.47221322099805574, "grad_norm": 0.035716280341148376, "learning_rate": 0.00019598991853818804, "loss": 0.3836, "step": 5829 }, { "epoch": 0.4722942320155541, "grad_norm": 0.03593599051237106, "learning_rate": 0.00019598541788559343, "loss": 0.3151, "step": 5830 }, { "epoch": 0.4723752430330525, "grad_norm": 0.03321794047951698, "learning_rate": 0.00019598091723299879, "loss": 0.3865, "step": 5831 }, { "epoch": 0.47245625405055086, "grad_norm": 0.03270499035716057, "learning_rate": 0.00019597641658040417, "loss": 0.3526, "step": 5832 }, { "epoch": 0.4725372650680493, "grad_norm": 0.03048735298216343, "learning_rate": 0.00019597191592780953, "loss": 0.3733, "step": 5833 }, { "epoch": 0.47261827608554763, "grad_norm": 0.034730520099401474, "learning_rate": 0.00019596741527521492, "loss": 0.4253, "step": 5834 }, { "epoch": 0.47269928710304604, "grad_norm": 0.028934409841895103, "learning_rate": 0.00019596291462262028, "loss": 0.3417, "step": 5835 }, { "epoch": 0.4727802981205444, "grad_norm": 0.02618395909667015, "learning_rate": 0.00019595841397002567, "loss": 0.3397, "step": 5836 }, { "epoch": 0.47286130913804275, "grad_norm": 0.0328238420188427, "learning_rate": 0.00019595391331743103, "loss": 0.349, "step": 5837 }, { "epoch": 0.47294232015554116, "grad_norm": 0.03482384979724884, "learning_rate": 0.00019594941266483642, "loss": 0.33, "step": 5838 }, { "epoch": 0.4730233311730395, "grad_norm": 0.030215738341212273, "learning_rate": 0.00019594491201224178, "loss": 0.3656, "step": 5839 }, { "epoch": 0.47310434219053793, "grad_norm": 0.03280828148126602, "learning_rate": 0.00019594041135964716, "loss": 0.364, "step": 5840 }, { "epoch": 0.4731853532080363, "grad_norm": 0.03489303216338158, "learning_rate": 0.00019593591070705252, "loss": 0.3834, "step": 5841 }, { "epoch": 0.4732663642255347, "grad_norm": 0.029000435024499893, "learning_rate": 0.0001959314100544579, "loss": 0.3423, "step": 5842 }, { "epoch": 0.47334737524303305, "grad_norm": 0.028401605784893036, "learning_rate": 0.0001959269094018633, "loss": 0.3591, "step": 5843 }, { "epoch": 0.4734283862605314, "grad_norm": 0.031942471861839294, "learning_rate": 0.00019592240874926866, "loss": 0.3582, "step": 5844 }, { "epoch": 0.4735093972780298, "grad_norm": 0.03147265687584877, "learning_rate": 0.00019591790809667402, "loss": 0.3321, "step": 5845 }, { "epoch": 0.4735904082955282, "grad_norm": 0.030731940641999245, "learning_rate": 0.0001959134074440794, "loss": 0.3143, "step": 5846 }, { "epoch": 0.4736714193130266, "grad_norm": 0.02728288061916828, "learning_rate": 0.00019590890679148476, "loss": 0.3787, "step": 5847 }, { "epoch": 0.47375243033052494, "grad_norm": 0.02671802043914795, "learning_rate": 0.00019590440613889015, "loss": 0.3523, "step": 5848 }, { "epoch": 0.47383344134802335, "grad_norm": 0.02398890070617199, "learning_rate": 0.00019589990548629554, "loss": 0.3107, "step": 5849 }, { "epoch": 0.4739144523655217, "grad_norm": 0.028565047308802605, "learning_rate": 0.0001958954048337009, "loss": 0.355, "step": 5850 }, { "epoch": 0.47399546338302007, "grad_norm": 0.026737749576568604, "learning_rate": 0.00019589090418110626, "loss": 0.3599, "step": 5851 }, { "epoch": 0.4740764744005185, "grad_norm": 0.03038250282406807, "learning_rate": 0.00019588640352851165, "loss": 0.3882, "step": 5852 }, { "epoch": 0.47415748541801683, "grad_norm": 0.036394696682691574, "learning_rate": 0.000195881902875917, "loss": 0.4081, "step": 5853 }, { "epoch": 0.47423849643551524, "grad_norm": 0.031558968126773834, "learning_rate": 0.0001958774022233224, "loss": 0.3896, "step": 5854 }, { "epoch": 0.4743195074530136, "grad_norm": 0.029992422088980675, "learning_rate": 0.00019587290157072778, "loss": 0.3306, "step": 5855 }, { "epoch": 0.474400518470512, "grad_norm": 0.02885795570909977, "learning_rate": 0.00019586840091813314, "loss": 0.3439, "step": 5856 }, { "epoch": 0.47448152948801037, "grad_norm": 0.03568622097373009, "learning_rate": 0.0001958639002655385, "loss": 0.3697, "step": 5857 }, { "epoch": 0.4745625405055087, "grad_norm": 0.03252142295241356, "learning_rate": 0.0001958593996129439, "loss": 0.3378, "step": 5858 }, { "epoch": 0.47464355152300713, "grad_norm": 0.032284434884786606, "learning_rate": 0.00019585489896034925, "loss": 0.3845, "step": 5859 }, { "epoch": 0.4747245625405055, "grad_norm": 0.0372265949845314, "learning_rate": 0.00019585039830775463, "loss": 0.3658, "step": 5860 }, { "epoch": 0.4748055735580039, "grad_norm": 0.03087705560028553, "learning_rate": 0.00019584589765516002, "loss": 0.3932, "step": 5861 }, { "epoch": 0.47488658457550226, "grad_norm": 0.032511867582798004, "learning_rate": 0.00019584139700256538, "loss": 0.3503, "step": 5862 }, { "epoch": 0.47496759559300067, "grad_norm": 0.028408238664269447, "learning_rate": 0.00019583689634997077, "loss": 0.3346, "step": 5863 }, { "epoch": 0.475048606610499, "grad_norm": 0.031963370740413666, "learning_rate": 0.00019583239569737613, "loss": 0.3491, "step": 5864 }, { "epoch": 0.4751296176279974, "grad_norm": 0.031659066677093506, "learning_rate": 0.0001958278950447815, "loss": 0.363, "step": 5865 }, { "epoch": 0.4752106286454958, "grad_norm": 0.033491283655166626, "learning_rate": 0.00019582339439218688, "loss": 0.3356, "step": 5866 }, { "epoch": 0.47529163966299415, "grad_norm": 0.030474351719021797, "learning_rate": 0.00019581889373959226, "loss": 0.4148, "step": 5867 }, { "epoch": 0.47537265068049256, "grad_norm": 0.0345035158097744, "learning_rate": 0.00019581439308699762, "loss": 0.3473, "step": 5868 }, { "epoch": 0.4754536616979909, "grad_norm": 0.030190279707312584, "learning_rate": 0.000195809892434403, "loss": 0.3364, "step": 5869 }, { "epoch": 0.4755346727154893, "grad_norm": 0.034467823803424835, "learning_rate": 0.00019580539178180837, "loss": 0.3863, "step": 5870 }, { "epoch": 0.4756156837329877, "grad_norm": 0.034213580191135406, "learning_rate": 0.00019580089112921373, "loss": 0.3569, "step": 5871 }, { "epoch": 0.4756966947504861, "grad_norm": 0.03731187433004379, "learning_rate": 0.00019579639047661912, "loss": 0.3884, "step": 5872 }, { "epoch": 0.47577770576798445, "grad_norm": 0.030831869691610336, "learning_rate": 0.0001957918898240245, "loss": 0.3383, "step": 5873 }, { "epoch": 0.4758587167854828, "grad_norm": 0.02833954058587551, "learning_rate": 0.00019578738917142986, "loss": 0.297, "step": 5874 }, { "epoch": 0.4759397278029812, "grad_norm": 0.035140588879585266, "learning_rate": 0.00019578288851883525, "loss": 0.3608, "step": 5875 }, { "epoch": 0.47602073882047957, "grad_norm": 0.029223179444670677, "learning_rate": 0.0001957783878662406, "loss": 0.3978, "step": 5876 }, { "epoch": 0.476101749837978, "grad_norm": 0.02845843695104122, "learning_rate": 0.00019577388721364597, "loss": 0.3281, "step": 5877 }, { "epoch": 0.47618276085547634, "grad_norm": 0.03213419020175934, "learning_rate": 0.00019576938656105136, "loss": 0.3228, "step": 5878 }, { "epoch": 0.47626377187297475, "grad_norm": 0.03073774464428425, "learning_rate": 0.00019576488590845675, "loss": 0.3533, "step": 5879 }, { "epoch": 0.4763447828904731, "grad_norm": 0.028143148869276047, "learning_rate": 0.0001957603852558621, "loss": 0.3223, "step": 5880 }, { "epoch": 0.47642579390797146, "grad_norm": 0.03042803891003132, "learning_rate": 0.0001957558846032675, "loss": 0.3501, "step": 5881 }, { "epoch": 0.4765068049254699, "grad_norm": 0.02732650376856327, "learning_rate": 0.00019575138395067285, "loss": 0.3089, "step": 5882 }, { "epoch": 0.47658781594296823, "grad_norm": 0.026975180953741074, "learning_rate": 0.00019574688329807821, "loss": 0.338, "step": 5883 }, { "epoch": 0.47666882696046664, "grad_norm": 0.028704164549708366, "learning_rate": 0.0001957423826454836, "loss": 0.3402, "step": 5884 }, { "epoch": 0.476749837977965, "grad_norm": 0.031123366206884384, "learning_rate": 0.000195737881992889, "loss": 0.3004, "step": 5885 }, { "epoch": 0.4768308489954634, "grad_norm": 0.032670848071575165, "learning_rate": 0.00019573338134029435, "loss": 0.3459, "step": 5886 }, { "epoch": 0.47691186001296176, "grad_norm": 0.027524948120117188, "learning_rate": 0.00019572888068769974, "loss": 0.3338, "step": 5887 }, { "epoch": 0.4769928710304601, "grad_norm": 0.033073924481868744, "learning_rate": 0.0001957243800351051, "loss": 0.3398, "step": 5888 }, { "epoch": 0.47707388204795853, "grad_norm": 0.033785343170166016, "learning_rate": 0.00019571987938251046, "loss": 0.3872, "step": 5889 }, { "epoch": 0.4771548930654569, "grad_norm": 0.0337032824754715, "learning_rate": 0.00019571537872991584, "loss": 0.382, "step": 5890 }, { "epoch": 0.4772359040829553, "grad_norm": 0.03290359675884247, "learning_rate": 0.00019571087807732123, "loss": 0.3664, "step": 5891 }, { "epoch": 0.47731691510045365, "grad_norm": 0.03508547320961952, "learning_rate": 0.0001957063774247266, "loss": 0.398, "step": 5892 }, { "epoch": 0.47739792611795207, "grad_norm": 0.026666609570384026, "learning_rate": 0.00019570187677213198, "loss": 0.3114, "step": 5893 }, { "epoch": 0.4774789371354504, "grad_norm": 0.0331806018948555, "learning_rate": 0.00019569737611953734, "loss": 0.3685, "step": 5894 }, { "epoch": 0.4775599481529488, "grad_norm": 0.03472977504134178, "learning_rate": 0.00019569287546694272, "loss": 0.3646, "step": 5895 }, { "epoch": 0.4776409591704472, "grad_norm": 0.03430356830358505, "learning_rate": 0.00019568837481434808, "loss": 0.3676, "step": 5896 }, { "epoch": 0.47772197018794554, "grad_norm": 0.0349530354142189, "learning_rate": 0.00019568387416175347, "loss": 0.3811, "step": 5897 }, { "epoch": 0.47780298120544396, "grad_norm": 0.03207635134458542, "learning_rate": 0.00019567937350915883, "loss": 0.3917, "step": 5898 }, { "epoch": 0.4778839922229423, "grad_norm": 0.03403360769152641, "learning_rate": 0.00019567487285656422, "loss": 0.3919, "step": 5899 }, { "epoch": 0.4779650032404407, "grad_norm": 0.03717518225312233, "learning_rate": 0.00019567037220396958, "loss": 0.4059, "step": 5900 }, { "epoch": 0.4780460142579391, "grad_norm": 0.029602613300085068, "learning_rate": 0.00019566587155137497, "loss": 0.3616, "step": 5901 }, { "epoch": 0.47812702527543743, "grad_norm": 0.03548922762274742, "learning_rate": 0.00019566137089878033, "loss": 0.387, "step": 5902 }, { "epoch": 0.47820803629293585, "grad_norm": 0.030909236520528793, "learning_rate": 0.0001956568702461857, "loss": 0.3152, "step": 5903 }, { "epoch": 0.4782890473104342, "grad_norm": 0.033036008477211, "learning_rate": 0.00019565236959359107, "loss": 0.3995, "step": 5904 }, { "epoch": 0.4783700583279326, "grad_norm": 0.028769217431545258, "learning_rate": 0.00019564786894099646, "loss": 0.3319, "step": 5905 }, { "epoch": 0.47845106934543097, "grad_norm": 0.03641336411237717, "learning_rate": 0.00019564336828840182, "loss": 0.3804, "step": 5906 }, { "epoch": 0.4785320803629294, "grad_norm": 0.032285552471876144, "learning_rate": 0.0001956388676358072, "loss": 0.3149, "step": 5907 }, { "epoch": 0.47861309138042774, "grad_norm": 0.028594225645065308, "learning_rate": 0.00019563436698321257, "loss": 0.3349, "step": 5908 }, { "epoch": 0.4786941023979261, "grad_norm": 0.038507889956235886, "learning_rate": 0.00019562986633061795, "loss": 0.3708, "step": 5909 }, { "epoch": 0.4787751134154245, "grad_norm": 0.033712759613990784, "learning_rate": 0.00019562536567802331, "loss": 0.3981, "step": 5910 }, { "epoch": 0.47885612443292286, "grad_norm": 0.03205728530883789, "learning_rate": 0.0001956208650254287, "loss": 0.3889, "step": 5911 }, { "epoch": 0.47893713545042127, "grad_norm": 0.032320015132427216, "learning_rate": 0.00019561636437283406, "loss": 0.3299, "step": 5912 }, { "epoch": 0.4790181464679196, "grad_norm": 0.03484988957643509, "learning_rate": 0.00019561186372023945, "loss": 0.3899, "step": 5913 }, { "epoch": 0.47909915748541804, "grad_norm": 0.036610767245292664, "learning_rate": 0.0001956073630676448, "loss": 0.3949, "step": 5914 }, { "epoch": 0.4791801685029164, "grad_norm": 0.030673658475279808, "learning_rate": 0.0001956028624150502, "loss": 0.3216, "step": 5915 }, { "epoch": 0.4792611795204148, "grad_norm": 0.03905686363577843, "learning_rate": 0.00019559836176245556, "loss": 0.3362, "step": 5916 }, { "epoch": 0.47934219053791316, "grad_norm": 0.03621886670589447, "learning_rate": 0.00019559386110986094, "loss": 0.3495, "step": 5917 }, { "epoch": 0.4794232015554115, "grad_norm": 0.03008181042969227, "learning_rate": 0.00019558936045726633, "loss": 0.36, "step": 5918 }, { "epoch": 0.4795042125729099, "grad_norm": 0.02908833883702755, "learning_rate": 0.0001955848598046717, "loss": 0.3452, "step": 5919 }, { "epoch": 0.4795852235904083, "grad_norm": 0.033341579139232635, "learning_rate": 0.00019558035915207705, "loss": 0.3276, "step": 5920 }, { "epoch": 0.4796662346079067, "grad_norm": 0.028378089889883995, "learning_rate": 0.00019557585849948244, "loss": 0.3221, "step": 5921 }, { "epoch": 0.47974724562540505, "grad_norm": 0.03122635930776596, "learning_rate": 0.0001955713578468878, "loss": 0.3407, "step": 5922 }, { "epoch": 0.47982825664290346, "grad_norm": 0.03359805792570114, "learning_rate": 0.00019556685719429319, "loss": 0.3254, "step": 5923 }, { "epoch": 0.4799092676604018, "grad_norm": 0.03142951428890228, "learning_rate": 0.00019556235654169857, "loss": 0.3226, "step": 5924 }, { "epoch": 0.4799902786779002, "grad_norm": 0.02958759479224682, "learning_rate": 0.00019555785588910393, "loss": 0.3679, "step": 5925 }, { "epoch": 0.4800712896953986, "grad_norm": 0.03471509367227554, "learning_rate": 0.0001955533552365093, "loss": 0.365, "step": 5926 }, { "epoch": 0.48015230071289694, "grad_norm": 0.03234826773405075, "learning_rate": 0.00019554885458391468, "loss": 0.4251, "step": 5927 }, { "epoch": 0.48023331173039535, "grad_norm": 0.03339417651295662, "learning_rate": 0.00019554435393132004, "loss": 0.3869, "step": 5928 }, { "epoch": 0.4803143227478937, "grad_norm": 0.03371240943670273, "learning_rate": 0.00019553985327872543, "loss": 0.3468, "step": 5929 }, { "epoch": 0.4803953337653921, "grad_norm": 0.03137330710887909, "learning_rate": 0.00019553535262613081, "loss": 0.3755, "step": 5930 }, { "epoch": 0.4804763447828905, "grad_norm": 0.03640046343207359, "learning_rate": 0.00019553085197353617, "loss": 0.4203, "step": 5931 }, { "epoch": 0.48055735580038883, "grad_norm": 0.030032861977815628, "learning_rate": 0.00019552635132094156, "loss": 0.3377, "step": 5932 }, { "epoch": 0.48063836681788724, "grad_norm": 0.031163305044174194, "learning_rate": 0.00019552185066834692, "loss": 0.348, "step": 5933 }, { "epoch": 0.4807193778353856, "grad_norm": 0.02940339781343937, "learning_rate": 0.00019551735001575228, "loss": 0.3285, "step": 5934 }, { "epoch": 0.480800388852884, "grad_norm": 0.03373560681939125, "learning_rate": 0.00019551284936315767, "loss": 0.4155, "step": 5935 }, { "epoch": 0.48088139987038236, "grad_norm": 0.03505878895521164, "learning_rate": 0.00019550834871056306, "loss": 0.3487, "step": 5936 }, { "epoch": 0.4809624108878808, "grad_norm": 0.028804706409573555, "learning_rate": 0.00019550384805796842, "loss": 0.356, "step": 5937 }, { "epoch": 0.48104342190537913, "grad_norm": 0.030923301354050636, "learning_rate": 0.0001954993474053738, "loss": 0.3703, "step": 5938 }, { "epoch": 0.4811244329228775, "grad_norm": 0.035969726741313934, "learning_rate": 0.00019549484675277916, "loss": 0.3699, "step": 5939 }, { "epoch": 0.4812054439403759, "grad_norm": 0.030231349170207977, "learning_rate": 0.00019549034610018452, "loss": 0.339, "step": 5940 }, { "epoch": 0.48128645495787425, "grad_norm": 0.026125887408852577, "learning_rate": 0.0001954858454475899, "loss": 0.3463, "step": 5941 }, { "epoch": 0.48136746597537267, "grad_norm": 0.03523105010390282, "learning_rate": 0.0001954813447949953, "loss": 0.3409, "step": 5942 }, { "epoch": 0.481448476992871, "grad_norm": 0.03209773078560829, "learning_rate": 0.00019547684414240066, "loss": 0.3911, "step": 5943 }, { "epoch": 0.48152948801036943, "grad_norm": 0.028588753193616867, "learning_rate": 0.00019547234348980604, "loss": 0.3157, "step": 5944 }, { "epoch": 0.4816104990278678, "grad_norm": 0.03126561641693115, "learning_rate": 0.0001954678428372114, "loss": 0.3308, "step": 5945 }, { "epoch": 0.48169151004536614, "grad_norm": 0.030155273154377937, "learning_rate": 0.00019546334218461676, "loss": 0.3458, "step": 5946 }, { "epoch": 0.48177252106286456, "grad_norm": 0.03246188163757324, "learning_rate": 0.00019545884153202215, "loss": 0.3463, "step": 5947 }, { "epoch": 0.4818535320803629, "grad_norm": 0.03119421936571598, "learning_rate": 0.00019545434087942754, "loss": 0.3742, "step": 5948 }, { "epoch": 0.4819345430978613, "grad_norm": 0.03162112459540367, "learning_rate": 0.0001954498402268329, "loss": 0.3589, "step": 5949 }, { "epoch": 0.4820155541153597, "grad_norm": 0.02846745029091835, "learning_rate": 0.00019544533957423829, "loss": 0.3648, "step": 5950 }, { "epoch": 0.4820965651328581, "grad_norm": 0.03139728680253029, "learning_rate": 0.00019544083892164365, "loss": 0.3772, "step": 5951 }, { "epoch": 0.48217757615035645, "grad_norm": 0.028640342876315117, "learning_rate": 0.000195436338269049, "loss": 0.3831, "step": 5952 }, { "epoch": 0.4822585871678548, "grad_norm": 0.030841778963804245, "learning_rate": 0.0001954318376164544, "loss": 0.3822, "step": 5953 }, { "epoch": 0.4823395981853532, "grad_norm": 0.02715679071843624, "learning_rate": 0.00019542733696385978, "loss": 0.3489, "step": 5954 }, { "epoch": 0.48242060920285157, "grad_norm": 0.03216562792658806, "learning_rate": 0.00019542283631126514, "loss": 0.3706, "step": 5955 }, { "epoch": 0.48250162022035, "grad_norm": 0.03239520639181137, "learning_rate": 0.00019541833565867053, "loss": 0.3817, "step": 5956 }, { "epoch": 0.48258263123784834, "grad_norm": 0.0378153957426548, "learning_rate": 0.0001954138350060759, "loss": 0.3877, "step": 5957 }, { "epoch": 0.48266364225534675, "grad_norm": 0.037669822573661804, "learning_rate": 0.00019540933435348125, "loss": 0.3689, "step": 5958 }, { "epoch": 0.4827446532728451, "grad_norm": 0.029966356232762337, "learning_rate": 0.00019540483370088663, "loss": 0.3522, "step": 5959 }, { "epoch": 0.48282566429034346, "grad_norm": 0.03512732312083244, "learning_rate": 0.00019540033304829202, "loss": 0.3842, "step": 5960 }, { "epoch": 0.48290667530784187, "grad_norm": 0.028936125338077545, "learning_rate": 0.00019539583239569738, "loss": 0.3423, "step": 5961 }, { "epoch": 0.4829876863253402, "grad_norm": 0.029880812391638756, "learning_rate": 0.00019539133174310277, "loss": 0.362, "step": 5962 }, { "epoch": 0.48306869734283864, "grad_norm": 0.030505992472171783, "learning_rate": 0.00019538683109050813, "loss": 0.3325, "step": 5963 }, { "epoch": 0.483149708360337, "grad_norm": 0.030406080186367035, "learning_rate": 0.0001953823304379135, "loss": 0.3366, "step": 5964 }, { "epoch": 0.4832307193778354, "grad_norm": 0.03137945011258125, "learning_rate": 0.00019537782978531888, "loss": 0.3203, "step": 5965 }, { "epoch": 0.48331173039533376, "grad_norm": 0.03132305294275284, "learning_rate": 0.00019537332913272426, "loss": 0.3632, "step": 5966 }, { "epoch": 0.48339274141283217, "grad_norm": 0.02978110872209072, "learning_rate": 0.00019536882848012962, "loss": 0.344, "step": 5967 }, { "epoch": 0.48347375243033053, "grad_norm": 0.03157349303364754, "learning_rate": 0.000195364327827535, "loss": 0.3475, "step": 5968 }, { "epoch": 0.4835547634478289, "grad_norm": 0.03231159225106239, "learning_rate": 0.00019535982717494037, "loss": 0.4046, "step": 5969 }, { "epoch": 0.4836357744653273, "grad_norm": 0.03151550516486168, "learning_rate": 0.00019535532652234573, "loss": 0.382, "step": 5970 }, { "epoch": 0.48371678548282565, "grad_norm": 0.028865037485957146, "learning_rate": 0.00019535082586975112, "loss": 0.3196, "step": 5971 }, { "epoch": 0.48379779650032406, "grad_norm": 0.028897427022457123, "learning_rate": 0.0001953463252171565, "loss": 0.2927, "step": 5972 }, { "epoch": 0.4838788075178224, "grad_norm": 0.028442172333598137, "learning_rate": 0.00019534182456456187, "loss": 0.3318, "step": 5973 }, { "epoch": 0.48395981853532083, "grad_norm": 0.02451622672379017, "learning_rate": 0.00019533732391196725, "loss": 0.3253, "step": 5974 }, { "epoch": 0.4840408295528192, "grad_norm": 0.032599471509456635, "learning_rate": 0.0001953328232593726, "loss": 0.3427, "step": 5975 }, { "epoch": 0.48412184057031754, "grad_norm": 0.030640697106719017, "learning_rate": 0.000195328322606778, "loss": 0.3665, "step": 5976 }, { "epoch": 0.48420285158781595, "grad_norm": 0.02785688452422619, "learning_rate": 0.00019532382195418336, "loss": 0.3404, "step": 5977 }, { "epoch": 0.4842838626053143, "grad_norm": 0.030281536281108856, "learning_rate": 0.00019531932130158875, "loss": 0.3297, "step": 5978 }, { "epoch": 0.4843648736228127, "grad_norm": 0.0328233428299427, "learning_rate": 0.0001953148206489941, "loss": 0.3297, "step": 5979 }, { "epoch": 0.4844458846403111, "grad_norm": 0.027729716151952744, "learning_rate": 0.0001953103199963995, "loss": 0.3383, "step": 5980 }, { "epoch": 0.4845268956578095, "grad_norm": 0.03164516016840935, "learning_rate": 0.00019530581934380485, "loss": 0.3865, "step": 5981 }, { "epoch": 0.48460790667530784, "grad_norm": 0.03192558512091637, "learning_rate": 0.00019530131869121024, "loss": 0.3368, "step": 5982 }, { "epoch": 0.4846889176928062, "grad_norm": 0.03171934187412262, "learning_rate": 0.0001952968180386156, "loss": 0.3394, "step": 5983 }, { "epoch": 0.4847699287103046, "grad_norm": 0.034574296325445175, "learning_rate": 0.000195292317386021, "loss": 0.4038, "step": 5984 }, { "epoch": 0.48485093972780297, "grad_norm": 0.032755058258771896, "learning_rate": 0.00019528781673342635, "loss": 0.3144, "step": 5985 }, { "epoch": 0.4849319507453014, "grad_norm": 0.03499835729598999, "learning_rate": 0.00019528331608083174, "loss": 0.3517, "step": 5986 }, { "epoch": 0.48501296176279973, "grad_norm": 0.031813375651836395, "learning_rate": 0.0001952788154282371, "loss": 0.3153, "step": 5987 }, { "epoch": 0.48509397278029814, "grad_norm": 0.03329386189579964, "learning_rate": 0.00019527431477564248, "loss": 0.3723, "step": 5988 }, { "epoch": 0.4851749837977965, "grad_norm": 0.031179320067167282, "learning_rate": 0.00019526981412304784, "loss": 0.3379, "step": 5989 }, { "epoch": 0.48525599481529486, "grad_norm": 0.03648926317691803, "learning_rate": 0.00019526531347045323, "loss": 0.403, "step": 5990 }, { "epoch": 0.48533700583279327, "grad_norm": 0.0387888066470623, "learning_rate": 0.0001952608128178586, "loss": 0.4047, "step": 5991 }, { "epoch": 0.4854180168502916, "grad_norm": 0.03588513657450676, "learning_rate": 0.00019525631216526398, "loss": 0.3659, "step": 5992 }, { "epoch": 0.48549902786779003, "grad_norm": 0.0332111194729805, "learning_rate": 0.00019525181151266934, "loss": 0.3611, "step": 5993 }, { "epoch": 0.4855800388852884, "grad_norm": 0.03178248181939125, "learning_rate": 0.00019524731086007472, "loss": 0.3137, "step": 5994 }, { "epoch": 0.4856610499027868, "grad_norm": 0.03442096710205078, "learning_rate": 0.00019524281020748008, "loss": 0.3655, "step": 5995 }, { "epoch": 0.48574206092028516, "grad_norm": 0.028393248096108437, "learning_rate": 0.00019523830955488547, "loss": 0.3141, "step": 5996 }, { "epoch": 0.4858230719377835, "grad_norm": 0.030575979501008987, "learning_rate": 0.00019523380890229083, "loss": 0.3729, "step": 5997 }, { "epoch": 0.4859040829552819, "grad_norm": 0.0336354598402977, "learning_rate": 0.00019522930824969622, "loss": 0.4075, "step": 5998 }, { "epoch": 0.4859850939727803, "grad_norm": 0.03110545314848423, "learning_rate": 0.0001952248075971016, "loss": 0.3841, "step": 5999 }, { "epoch": 0.4860661049902787, "grad_norm": 0.03151804953813553, "learning_rate": 0.00019522030694450697, "loss": 0.3684, "step": 6000 }, { "epoch": 0.48614711600777705, "grad_norm": 0.03389604762196541, "learning_rate": 0.00019521580629191235, "loss": 0.4194, "step": 6001 }, { "epoch": 0.48622812702527546, "grad_norm": 0.02937939018011093, "learning_rate": 0.0001952113056393177, "loss": 0.3757, "step": 6002 }, { "epoch": 0.4863091380427738, "grad_norm": 0.03905171900987625, "learning_rate": 0.00019520680498672307, "loss": 0.4182, "step": 6003 }, { "epoch": 0.48639014906027217, "grad_norm": 0.02655964158475399, "learning_rate": 0.00019520230433412846, "loss": 0.3227, "step": 6004 }, { "epoch": 0.4864711600777706, "grad_norm": 0.03764092177152634, "learning_rate": 0.00019519780368153385, "loss": 0.3575, "step": 6005 }, { "epoch": 0.48655217109526894, "grad_norm": 0.031080730259418488, "learning_rate": 0.0001951933030289392, "loss": 0.3592, "step": 6006 }, { "epoch": 0.48663318211276735, "grad_norm": 0.03204982727766037, "learning_rate": 0.0001951888023763446, "loss": 0.281, "step": 6007 }, { "epoch": 0.4867141931302657, "grad_norm": 0.03170899301767349, "learning_rate": 0.00019518430172374995, "loss": 0.3886, "step": 6008 }, { "epoch": 0.4867952041477641, "grad_norm": 0.0295356884598732, "learning_rate": 0.00019517980107115531, "loss": 0.3828, "step": 6009 }, { "epoch": 0.48687621516526247, "grad_norm": 0.03186435624957085, "learning_rate": 0.0001951753004185607, "loss": 0.3921, "step": 6010 }, { "epoch": 0.4869572261827609, "grad_norm": 0.0325162410736084, "learning_rate": 0.0001951707997659661, "loss": 0.3469, "step": 6011 }, { "epoch": 0.48703823720025924, "grad_norm": 0.031725071370601654, "learning_rate": 0.00019516629911337145, "loss": 0.3444, "step": 6012 }, { "epoch": 0.4871192482177576, "grad_norm": 0.029257534071803093, "learning_rate": 0.00019516179846077684, "loss": 0.3549, "step": 6013 }, { "epoch": 0.487200259235256, "grad_norm": 0.029020339250564575, "learning_rate": 0.0001951572978081822, "loss": 0.3602, "step": 6014 }, { "epoch": 0.48728127025275436, "grad_norm": 0.030635559931397438, "learning_rate": 0.00019515279715558756, "loss": 0.3246, "step": 6015 }, { "epoch": 0.4873622812702528, "grad_norm": 0.029000451788306236, "learning_rate": 0.00019514829650299294, "loss": 0.3085, "step": 6016 }, { "epoch": 0.48744329228775113, "grad_norm": 0.03026675619184971, "learning_rate": 0.00019514379585039833, "loss": 0.3281, "step": 6017 }, { "epoch": 0.48752430330524954, "grad_norm": 0.03477197140455246, "learning_rate": 0.0001951392951978037, "loss": 0.3268, "step": 6018 }, { "epoch": 0.4876053143227479, "grad_norm": 0.031906504184007645, "learning_rate": 0.00019513479454520908, "loss": 0.3835, "step": 6019 }, { "epoch": 0.48768632534024625, "grad_norm": 0.03229931741952896, "learning_rate": 0.00019513029389261444, "loss": 0.3055, "step": 6020 }, { "epoch": 0.48776733635774466, "grad_norm": 0.039322223514318466, "learning_rate": 0.0001951257932400198, "loss": 0.3938, "step": 6021 }, { "epoch": 0.487848347375243, "grad_norm": 0.030645597726106644, "learning_rate": 0.00019512129258742519, "loss": 0.3629, "step": 6022 }, { "epoch": 0.48792935839274143, "grad_norm": 0.03114427626132965, "learning_rate": 0.00019511679193483057, "loss": 0.4017, "step": 6023 }, { "epoch": 0.4880103694102398, "grad_norm": 0.027782771736383438, "learning_rate": 0.00019511229128223593, "loss": 0.3442, "step": 6024 }, { "epoch": 0.4880913804277382, "grad_norm": 0.035519011318683624, "learning_rate": 0.00019510779062964132, "loss": 0.3857, "step": 6025 }, { "epoch": 0.48817239144523655, "grad_norm": 0.036083195358514786, "learning_rate": 0.00019510328997704668, "loss": 0.4052, "step": 6026 }, { "epoch": 0.4882534024627349, "grad_norm": 0.03073258511722088, "learning_rate": 0.00019509878932445204, "loss": 0.3382, "step": 6027 }, { "epoch": 0.4883344134802333, "grad_norm": 0.02905021235346794, "learning_rate": 0.00019509428867185743, "loss": 0.3685, "step": 6028 }, { "epoch": 0.4884154244977317, "grad_norm": 0.028884712606668472, "learning_rate": 0.00019508978801926281, "loss": 0.3323, "step": 6029 }, { "epoch": 0.4884964355152301, "grad_norm": 0.032662998884916306, "learning_rate": 0.00019508528736666817, "loss": 0.3487, "step": 6030 }, { "epoch": 0.48857744653272844, "grad_norm": 0.03282221034169197, "learning_rate": 0.00019508078671407356, "loss": 0.3565, "step": 6031 }, { "epoch": 0.48865845755022685, "grad_norm": 0.03624732792377472, "learning_rate": 0.00019507628606147892, "loss": 0.371, "step": 6032 }, { "epoch": 0.4887394685677252, "grad_norm": 0.037391260266304016, "learning_rate": 0.00019507178540888428, "loss": 0.3782, "step": 6033 }, { "epoch": 0.48882047958522357, "grad_norm": 0.029727550223469734, "learning_rate": 0.00019506728475628967, "loss": 0.3644, "step": 6034 }, { "epoch": 0.488901490602722, "grad_norm": 0.03331521898508072, "learning_rate": 0.00019506278410369506, "loss": 0.3554, "step": 6035 }, { "epoch": 0.48898250162022033, "grad_norm": 0.030349144712090492, "learning_rate": 0.00019505828345110042, "loss": 0.3938, "step": 6036 }, { "epoch": 0.48906351263771874, "grad_norm": 0.033913373947143555, "learning_rate": 0.0001950537827985058, "loss": 0.3725, "step": 6037 }, { "epoch": 0.4891445236552171, "grad_norm": 0.031365133821964264, "learning_rate": 0.00019504928214591116, "loss": 0.3856, "step": 6038 }, { "epoch": 0.4892255346727155, "grad_norm": 0.027343137189745903, "learning_rate": 0.00019504478149331652, "loss": 0.3286, "step": 6039 }, { "epoch": 0.48930654569021387, "grad_norm": 0.035865604877471924, "learning_rate": 0.0001950402808407219, "loss": 0.3906, "step": 6040 }, { "epoch": 0.4893875567077122, "grad_norm": 0.030563244596123695, "learning_rate": 0.0001950357801881273, "loss": 0.3393, "step": 6041 }, { "epoch": 0.48946856772521063, "grad_norm": 0.0307852104306221, "learning_rate": 0.00019503127953553266, "loss": 0.3799, "step": 6042 }, { "epoch": 0.489549578742709, "grad_norm": 0.03347328305244446, "learning_rate": 0.00019502677888293804, "loss": 0.3705, "step": 6043 }, { "epoch": 0.4896305897602074, "grad_norm": 0.03165578097105026, "learning_rate": 0.0001950222782303434, "loss": 0.3788, "step": 6044 }, { "epoch": 0.48971160077770576, "grad_norm": 0.02846166305243969, "learning_rate": 0.00019501777757774876, "loss": 0.3469, "step": 6045 }, { "epoch": 0.48979261179520417, "grad_norm": 0.03398086130619049, "learning_rate": 0.00019501327692515415, "loss": 0.4594, "step": 6046 }, { "epoch": 0.4898736228127025, "grad_norm": 0.0365835502743721, "learning_rate": 0.00019500877627255954, "loss": 0.3812, "step": 6047 }, { "epoch": 0.4899546338302009, "grad_norm": 0.02915862761437893, "learning_rate": 0.0001950042756199649, "loss": 0.3811, "step": 6048 }, { "epoch": 0.4900356448476993, "grad_norm": 0.029644619673490524, "learning_rate": 0.00019499977496737029, "loss": 0.3548, "step": 6049 }, { "epoch": 0.49011665586519765, "grad_norm": 0.030091730877757072, "learning_rate": 0.00019499527431477565, "loss": 0.3554, "step": 6050 }, { "epoch": 0.49019766688269606, "grad_norm": 0.030911961570382118, "learning_rate": 0.00019499077366218103, "loss": 0.3683, "step": 6051 }, { "epoch": 0.4902786779001944, "grad_norm": 0.031337589025497437, "learning_rate": 0.0001949862730095864, "loss": 0.3439, "step": 6052 }, { "epoch": 0.4903596889176928, "grad_norm": 0.027552342042326927, "learning_rate": 0.00019498177235699178, "loss": 0.3247, "step": 6053 }, { "epoch": 0.4904406999351912, "grad_norm": 0.0306177269667387, "learning_rate": 0.00019497727170439714, "loss": 0.3879, "step": 6054 }, { "epoch": 0.49052171095268954, "grad_norm": 0.030022302642464638, "learning_rate": 0.00019497277105180253, "loss": 0.3767, "step": 6055 }, { "epoch": 0.49060272197018795, "grad_norm": 0.03156706318259239, "learning_rate": 0.0001949682703992079, "loss": 0.3303, "step": 6056 }, { "epoch": 0.4906837329876863, "grad_norm": 0.031678952276706696, "learning_rate": 0.00019496376974661327, "loss": 0.3664, "step": 6057 }, { "epoch": 0.4907647440051847, "grad_norm": 0.029925771057605743, "learning_rate": 0.00019495926909401864, "loss": 0.3658, "step": 6058 }, { "epoch": 0.4908457550226831, "grad_norm": 0.03150847181677818, "learning_rate": 0.00019495476844142402, "loss": 0.3788, "step": 6059 }, { "epoch": 0.4909267660401815, "grad_norm": 0.03222969174385071, "learning_rate": 0.00019495026778882938, "loss": 0.4201, "step": 6060 }, { "epoch": 0.49100777705767984, "grad_norm": 0.036666642874479294, "learning_rate": 0.00019494576713623477, "loss": 0.4088, "step": 6061 }, { "epoch": 0.49108878807517825, "grad_norm": 0.031550489366054535, "learning_rate": 0.00019494126648364013, "loss": 0.3477, "step": 6062 }, { "epoch": 0.4911697990926766, "grad_norm": 0.02660016529262066, "learning_rate": 0.00019493676583104552, "loss": 0.3173, "step": 6063 }, { "epoch": 0.49125081011017496, "grad_norm": 0.03121725469827652, "learning_rate": 0.00019493226517845088, "loss": 0.3218, "step": 6064 }, { "epoch": 0.4913318211276734, "grad_norm": 0.03398605063557625, "learning_rate": 0.00019492776452585626, "loss": 0.4122, "step": 6065 }, { "epoch": 0.49141283214517173, "grad_norm": 0.03151047229766846, "learning_rate": 0.00019492326387326162, "loss": 0.3821, "step": 6066 }, { "epoch": 0.49149384316267014, "grad_norm": 0.03958398848772049, "learning_rate": 0.000194918763220667, "loss": 0.4201, "step": 6067 }, { "epoch": 0.4915748541801685, "grad_norm": 0.03347938135266304, "learning_rate": 0.00019491426256807237, "loss": 0.3793, "step": 6068 }, { "epoch": 0.4916558651976669, "grad_norm": 0.03312998265028, "learning_rate": 0.00019490976191547776, "loss": 0.3665, "step": 6069 }, { "epoch": 0.49173687621516526, "grad_norm": 0.031087879091501236, "learning_rate": 0.00019490526126288315, "loss": 0.3767, "step": 6070 }, { "epoch": 0.4918178872326636, "grad_norm": 0.03423725813627243, "learning_rate": 0.0001949007606102885, "loss": 0.4474, "step": 6071 }, { "epoch": 0.49189889825016203, "grad_norm": 0.03192099183797836, "learning_rate": 0.00019489625995769387, "loss": 0.4102, "step": 6072 }, { "epoch": 0.4919799092676604, "grad_norm": 0.03476814553141594, "learning_rate": 0.00019489175930509925, "loss": 0.414, "step": 6073 }, { "epoch": 0.4920609202851588, "grad_norm": 0.03009425476193428, "learning_rate": 0.0001948872586525046, "loss": 0.3568, "step": 6074 }, { "epoch": 0.49214193130265715, "grad_norm": 0.02842790260910988, "learning_rate": 0.00019488275799991, "loss": 0.344, "step": 6075 }, { "epoch": 0.49222294232015557, "grad_norm": 0.03067297860980034, "learning_rate": 0.0001948782573473154, "loss": 0.3728, "step": 6076 }, { "epoch": 0.4923039533376539, "grad_norm": 0.030920876190066338, "learning_rate": 0.00019487375669472075, "loss": 0.305, "step": 6077 }, { "epoch": 0.4923849643551523, "grad_norm": 0.02858106978237629, "learning_rate": 0.0001948692560421261, "loss": 0.3724, "step": 6078 }, { "epoch": 0.4924659753726507, "grad_norm": 0.03146613389253616, "learning_rate": 0.0001948647553895315, "loss": 0.3369, "step": 6079 }, { "epoch": 0.49254698639014904, "grad_norm": 0.03171651437878609, "learning_rate": 0.00019486025473693688, "loss": 0.3992, "step": 6080 }, { "epoch": 0.49262799740764746, "grad_norm": 0.027577511966228485, "learning_rate": 0.00019485575408434224, "loss": 0.3301, "step": 6081 }, { "epoch": 0.4927090084251458, "grad_norm": 0.0313507542014122, "learning_rate": 0.00019485125343174763, "loss": 0.369, "step": 6082 }, { "epoch": 0.4927900194426442, "grad_norm": 0.0450977124273777, "learning_rate": 0.000194846752779153, "loss": 0.3491, "step": 6083 }, { "epoch": 0.4928710304601426, "grad_norm": 0.029169466346502304, "learning_rate": 0.00019484225212655835, "loss": 0.3506, "step": 6084 }, { "epoch": 0.49295204147764093, "grad_norm": 0.026984231546521187, "learning_rate": 0.00019483775147396374, "loss": 0.3564, "step": 6085 }, { "epoch": 0.49303305249513935, "grad_norm": 0.027847252786159515, "learning_rate": 0.00019483325082136912, "loss": 0.3293, "step": 6086 }, { "epoch": 0.4931140635126377, "grad_norm": 0.03478049486875534, "learning_rate": 0.00019482875016877448, "loss": 0.4064, "step": 6087 }, { "epoch": 0.4931950745301361, "grad_norm": 0.032629888504743576, "learning_rate": 0.00019482424951617987, "loss": 0.3692, "step": 6088 }, { "epoch": 0.49327608554763447, "grad_norm": 0.032630983740091324, "learning_rate": 0.00019481974886358523, "loss": 0.342, "step": 6089 }, { "epoch": 0.4933570965651329, "grad_norm": 0.030905557796359062, "learning_rate": 0.0001948152482109906, "loss": 0.3336, "step": 6090 }, { "epoch": 0.49343810758263124, "grad_norm": 0.03159333020448685, "learning_rate": 0.00019481074755839598, "loss": 0.3174, "step": 6091 }, { "epoch": 0.4935191186001296, "grad_norm": 0.035348713397979736, "learning_rate": 0.00019480624690580136, "loss": 0.3726, "step": 6092 }, { "epoch": 0.493600129617628, "grad_norm": 0.029344551265239716, "learning_rate": 0.00019480174625320672, "loss": 0.3581, "step": 6093 }, { "epoch": 0.49368114063512636, "grad_norm": 0.033776115626096725, "learning_rate": 0.0001947972456006121, "loss": 0.3734, "step": 6094 }, { "epoch": 0.49376215165262477, "grad_norm": 0.032239265739917755, "learning_rate": 0.00019479274494801747, "loss": 0.3807, "step": 6095 }, { "epoch": 0.4938431626701231, "grad_norm": 0.035125527530908585, "learning_rate": 0.00019478824429542283, "loss": 0.3738, "step": 6096 }, { "epoch": 0.49392417368762154, "grad_norm": 0.028467318043112755, "learning_rate": 0.00019478374364282822, "loss": 0.3764, "step": 6097 }, { "epoch": 0.4940051847051199, "grad_norm": 0.029696613550186157, "learning_rate": 0.0001947792429902336, "loss": 0.3522, "step": 6098 }, { "epoch": 0.49408619572261825, "grad_norm": 0.035356346517801285, "learning_rate": 0.00019477474233763897, "loss": 0.3416, "step": 6099 }, { "epoch": 0.49416720674011666, "grad_norm": 0.03714694082736969, "learning_rate": 0.00019477024168504435, "loss": 0.3707, "step": 6100 }, { "epoch": 0.494248217757615, "grad_norm": 0.027115581557154655, "learning_rate": 0.0001947657410324497, "loss": 0.3327, "step": 6101 }, { "epoch": 0.4943292287751134, "grad_norm": 0.033466219902038574, "learning_rate": 0.00019476124037985507, "loss": 0.3518, "step": 6102 }, { "epoch": 0.4944102397926118, "grad_norm": 0.030489971861243248, "learning_rate": 0.00019475673972726046, "loss": 0.3641, "step": 6103 }, { "epoch": 0.4944912508101102, "grad_norm": 0.03086225688457489, "learning_rate": 0.00019475223907466585, "loss": 0.3086, "step": 6104 }, { "epoch": 0.49457226182760855, "grad_norm": 0.033797189593315125, "learning_rate": 0.0001947477384220712, "loss": 0.3786, "step": 6105 }, { "epoch": 0.49465327284510696, "grad_norm": 0.028506649658083916, "learning_rate": 0.0001947432377694766, "loss": 0.3215, "step": 6106 }, { "epoch": 0.4947342838626053, "grad_norm": 0.029441583901643753, "learning_rate": 0.00019473873711688196, "loss": 0.3412, "step": 6107 }, { "epoch": 0.4948152948801037, "grad_norm": 0.029824761673808098, "learning_rate": 0.00019473423646428732, "loss": 0.391, "step": 6108 }, { "epoch": 0.4948963058976021, "grad_norm": 0.031150689348578453, "learning_rate": 0.0001947297358116927, "loss": 0.3726, "step": 6109 }, { "epoch": 0.49497731691510044, "grad_norm": 0.03557276353240013, "learning_rate": 0.0001947252351590981, "loss": 0.3477, "step": 6110 }, { "epoch": 0.49505832793259885, "grad_norm": 0.03375493362545967, "learning_rate": 0.00019472073450650345, "loss": 0.4136, "step": 6111 }, { "epoch": 0.4951393389500972, "grad_norm": 0.030482076108455658, "learning_rate": 0.00019471623385390884, "loss": 0.3726, "step": 6112 }, { "epoch": 0.4952203499675956, "grad_norm": 0.03187844157218933, "learning_rate": 0.0001947117332013142, "loss": 0.4042, "step": 6113 }, { "epoch": 0.495301360985094, "grad_norm": 0.0333397351205349, "learning_rate": 0.00019470723254871956, "loss": 0.3641, "step": 6114 }, { "epoch": 0.49538237200259233, "grad_norm": 0.033241838216781616, "learning_rate": 0.00019470273189612494, "loss": 0.36, "step": 6115 }, { "epoch": 0.49546338302009074, "grad_norm": 0.03735223412513733, "learning_rate": 0.00019469823124353033, "loss": 0.3981, "step": 6116 }, { "epoch": 0.4955443940375891, "grad_norm": 0.0269224364310503, "learning_rate": 0.0001946937305909357, "loss": 0.2883, "step": 6117 }, { "epoch": 0.4956254050550875, "grad_norm": 0.029367348179221153, "learning_rate": 0.00019468922993834108, "loss": 0.3345, "step": 6118 }, { "epoch": 0.49570641607258586, "grad_norm": 0.02894587628543377, "learning_rate": 0.00019468472928574644, "loss": 0.326, "step": 6119 }, { "epoch": 0.4957874270900843, "grad_norm": 0.029514474794268608, "learning_rate": 0.0001946802286331518, "loss": 0.3831, "step": 6120 }, { "epoch": 0.49586843810758263, "grad_norm": 0.03847199305891991, "learning_rate": 0.00019467572798055719, "loss": 0.3559, "step": 6121 }, { "epoch": 0.495949449125081, "grad_norm": 0.03183659166097641, "learning_rate": 0.00019467122732796257, "loss": 0.3779, "step": 6122 }, { "epoch": 0.4960304601425794, "grad_norm": 0.031036654487252235, "learning_rate": 0.00019466672667536793, "loss": 0.3158, "step": 6123 }, { "epoch": 0.49611147116007775, "grad_norm": 0.034427400678396225, "learning_rate": 0.00019466222602277332, "loss": 0.3801, "step": 6124 }, { "epoch": 0.49619248217757617, "grad_norm": 0.03311762586236, "learning_rate": 0.00019465772537017868, "loss": 0.3867, "step": 6125 }, { "epoch": 0.4962734931950745, "grad_norm": 0.03220412880182266, "learning_rate": 0.00019465322471758404, "loss": 0.3672, "step": 6126 }, { "epoch": 0.49635450421257293, "grad_norm": 0.03264900669455528, "learning_rate": 0.00019464872406498943, "loss": 0.3424, "step": 6127 }, { "epoch": 0.4964355152300713, "grad_norm": 0.033384453505277634, "learning_rate": 0.00019464422341239481, "loss": 0.3283, "step": 6128 }, { "epoch": 0.49651652624756965, "grad_norm": 0.029015664011240005, "learning_rate": 0.00019463972275980017, "loss": 0.3281, "step": 6129 }, { "epoch": 0.49659753726506806, "grad_norm": 0.033148493617773056, "learning_rate": 0.00019463522210720556, "loss": 0.3696, "step": 6130 }, { "epoch": 0.4966785482825664, "grad_norm": 0.02691441774368286, "learning_rate": 0.00019463072145461092, "loss": 0.3583, "step": 6131 }, { "epoch": 0.4967595593000648, "grad_norm": 0.03129500895738602, "learning_rate": 0.0001946262208020163, "loss": 0.3397, "step": 6132 }, { "epoch": 0.4968405703175632, "grad_norm": 0.028962457552552223, "learning_rate": 0.0001946217201494217, "loss": 0.3609, "step": 6133 }, { "epoch": 0.4969215813350616, "grad_norm": 0.030767876654863358, "learning_rate": 0.00019461721949682706, "loss": 0.3421, "step": 6134 }, { "epoch": 0.49700259235255995, "grad_norm": 0.03145063295960426, "learning_rate": 0.00019461271884423242, "loss": 0.387, "step": 6135 }, { "epoch": 0.4970836033700583, "grad_norm": 0.033778853714466095, "learning_rate": 0.0001946082181916378, "loss": 0.3792, "step": 6136 }, { "epoch": 0.4971646143875567, "grad_norm": 0.028447460383176804, "learning_rate": 0.00019460371753904316, "loss": 0.3694, "step": 6137 }, { "epoch": 0.49724562540505507, "grad_norm": 0.027313074097037315, "learning_rate": 0.00019459921688644855, "loss": 0.334, "step": 6138 }, { "epoch": 0.4973266364225535, "grad_norm": 0.027841804549098015, "learning_rate": 0.00019459471623385394, "loss": 0.3851, "step": 6139 }, { "epoch": 0.49740764744005184, "grad_norm": 0.03338708356022835, "learning_rate": 0.0001945902155812593, "loss": 0.3966, "step": 6140 }, { "epoch": 0.49748865845755025, "grad_norm": 0.031054025515913963, "learning_rate": 0.00019458571492866466, "loss": 0.3282, "step": 6141 }, { "epoch": 0.4975696694750486, "grad_norm": 0.031216377392411232, "learning_rate": 0.00019458121427607004, "loss": 0.3998, "step": 6142 }, { "epoch": 0.49765068049254696, "grad_norm": 0.03344246372580528, "learning_rate": 0.0001945767136234754, "loss": 0.3644, "step": 6143 }, { "epoch": 0.49773169151004537, "grad_norm": 0.029684685170650482, "learning_rate": 0.0001945722129708808, "loss": 0.3209, "step": 6144 }, { "epoch": 0.4978127025275437, "grad_norm": 0.02766270935535431, "learning_rate": 0.00019456771231828618, "loss": 0.3444, "step": 6145 }, { "epoch": 0.49789371354504214, "grad_norm": 0.032080575823783875, "learning_rate": 0.00019456321166569154, "loss": 0.3416, "step": 6146 }, { "epoch": 0.4979747245625405, "grad_norm": 0.030261723324656487, "learning_rate": 0.0001945587110130969, "loss": 0.3732, "step": 6147 }, { "epoch": 0.4980557355800389, "grad_norm": 0.03317071497440338, "learning_rate": 0.00019455421036050229, "loss": 0.3109, "step": 6148 }, { "epoch": 0.49813674659753726, "grad_norm": 0.03357579931616783, "learning_rate": 0.00019454970970790765, "loss": 0.3439, "step": 6149 }, { "epoch": 0.4982177576150357, "grad_norm": 0.03235941380262375, "learning_rate": 0.00019454520905531303, "loss": 0.3818, "step": 6150 }, { "epoch": 0.49829876863253403, "grad_norm": 0.035714928060770035, "learning_rate": 0.00019454070840271842, "loss": 0.3561, "step": 6151 }, { "epoch": 0.4983797796500324, "grad_norm": 0.02831859700381756, "learning_rate": 0.00019453620775012378, "loss": 0.3232, "step": 6152 }, { "epoch": 0.4984607906675308, "grad_norm": 0.03555682301521301, "learning_rate": 0.00019453170709752914, "loss": 0.3169, "step": 6153 }, { "epoch": 0.49854180168502915, "grad_norm": 0.03351519629359245, "learning_rate": 0.00019452720644493453, "loss": 0.3638, "step": 6154 }, { "epoch": 0.49862281270252756, "grad_norm": 0.03014187328517437, "learning_rate": 0.0001945227057923399, "loss": 0.3477, "step": 6155 }, { "epoch": 0.4987038237200259, "grad_norm": 0.0321093387901783, "learning_rate": 0.00019451820513974528, "loss": 0.3541, "step": 6156 }, { "epoch": 0.49878483473752433, "grad_norm": 0.04117025434970856, "learning_rate": 0.00019451370448715066, "loss": 0.3738, "step": 6157 }, { "epoch": 0.4988658457550227, "grad_norm": 0.029803169891238213, "learning_rate": 0.00019450920383455602, "loss": 0.3473, "step": 6158 }, { "epoch": 0.49894685677252104, "grad_norm": 0.030828144401311874, "learning_rate": 0.00019450470318196138, "loss": 0.3796, "step": 6159 }, { "epoch": 0.49902786779001945, "grad_norm": 0.03853604570031166, "learning_rate": 0.00019450020252936677, "loss": 0.37, "step": 6160 }, { "epoch": 0.4991088788075178, "grad_norm": 0.03002386912703514, "learning_rate": 0.00019449570187677216, "loss": 0.347, "step": 6161 }, { "epoch": 0.4991898898250162, "grad_norm": 0.0349125862121582, "learning_rate": 0.00019449120122417752, "loss": 0.4022, "step": 6162 }, { "epoch": 0.4992709008425146, "grad_norm": 0.03078773058950901, "learning_rate": 0.0001944867005715829, "loss": 0.3651, "step": 6163 }, { "epoch": 0.499351911860013, "grad_norm": 0.03070513904094696, "learning_rate": 0.00019448219991898826, "loss": 0.3402, "step": 6164 }, { "epoch": 0.49943292287751134, "grad_norm": 0.03130587190389633, "learning_rate": 0.00019447769926639362, "loss": 0.3446, "step": 6165 }, { "epoch": 0.4995139338950097, "grad_norm": 0.028303734958171844, "learning_rate": 0.000194473198613799, "loss": 0.3357, "step": 6166 }, { "epoch": 0.4995949449125081, "grad_norm": 0.030292222276329994, "learning_rate": 0.0001944686979612044, "loss": 0.3322, "step": 6167 }, { "epoch": 0.49967595593000647, "grad_norm": 0.03299104794859886, "learning_rate": 0.00019446419730860976, "loss": 0.3517, "step": 6168 }, { "epoch": 0.4997569669475049, "grad_norm": 0.031113620847463608, "learning_rate": 0.00019445969665601515, "loss": 0.3741, "step": 6169 }, { "epoch": 0.49983797796500323, "grad_norm": 0.031416017562150955, "learning_rate": 0.0001944551960034205, "loss": 0.3427, "step": 6170 }, { "epoch": 0.49991898898250164, "grad_norm": 0.02874842658638954, "learning_rate": 0.00019445069535082587, "loss": 0.3353, "step": 6171 }, { "epoch": 0.5, "grad_norm": 0.028396783396601677, "learning_rate": 0.00019444619469823125, "loss": 0.3563, "step": 6172 }, { "epoch": 0.5000810110174984, "grad_norm": 0.029108745977282524, "learning_rate": 0.00019444169404563664, "loss": 0.3556, "step": 6173 }, { "epoch": 0.5001620220349967, "grad_norm": 0.03376203402876854, "learning_rate": 0.000194437193393042, "loss": 0.3347, "step": 6174 }, { "epoch": 0.5002430330524952, "grad_norm": 0.03200310841202736, "learning_rate": 0.0001944326927404474, "loss": 0.3773, "step": 6175 }, { "epoch": 0.5003240440699935, "grad_norm": 0.03190843015909195, "learning_rate": 0.00019442819208785275, "loss": 0.3494, "step": 6176 }, { "epoch": 0.5004050550874919, "grad_norm": 0.037339869886636734, "learning_rate": 0.0001944236914352581, "loss": 0.3953, "step": 6177 }, { "epoch": 0.5004860661049902, "grad_norm": 0.028407022356987, "learning_rate": 0.0001944191907826635, "loss": 0.3594, "step": 6178 }, { "epoch": 0.5005670771224887, "grad_norm": 0.02792937681078911, "learning_rate": 0.00019441469013006888, "loss": 0.3103, "step": 6179 }, { "epoch": 0.5006480881399871, "grad_norm": 0.03023194707930088, "learning_rate": 0.00019441018947747424, "loss": 0.3372, "step": 6180 }, { "epoch": 0.5007290991574854, "grad_norm": 0.030646204948425293, "learning_rate": 0.00019440568882487963, "loss": 0.3651, "step": 6181 }, { "epoch": 0.5008101101749838, "grad_norm": 0.03154953196644783, "learning_rate": 0.000194401188172285, "loss": 0.3742, "step": 6182 }, { "epoch": 0.5008911211924821, "grad_norm": 0.03270383179187775, "learning_rate": 0.00019439668751969035, "loss": 0.3629, "step": 6183 }, { "epoch": 0.5009721322099806, "grad_norm": 0.029822640120983124, "learning_rate": 0.00019439218686709574, "loss": 0.3848, "step": 6184 }, { "epoch": 0.501053143227479, "grad_norm": 0.02989530935883522, "learning_rate": 0.00019438768621450112, "loss": 0.3755, "step": 6185 }, { "epoch": 0.5011341542449773, "grad_norm": 0.029338372871279716, "learning_rate": 0.00019438318556190648, "loss": 0.3114, "step": 6186 }, { "epoch": 0.5012151652624757, "grad_norm": 0.029134294018149376, "learning_rate": 0.00019437868490931187, "loss": 0.3457, "step": 6187 }, { "epoch": 0.501296176279974, "grad_norm": 0.04486939311027527, "learning_rate": 0.00019437418425671723, "loss": 0.3917, "step": 6188 }, { "epoch": 0.5013771872974725, "grad_norm": 0.03448181971907616, "learning_rate": 0.0001943696836041226, "loss": 0.3609, "step": 6189 }, { "epoch": 0.5014581983149708, "grad_norm": 0.030938809737563133, "learning_rate": 0.00019436518295152798, "loss": 0.3559, "step": 6190 }, { "epoch": 0.5015392093324692, "grad_norm": 0.030646713450551033, "learning_rate": 0.00019436068229893336, "loss": 0.3587, "step": 6191 }, { "epoch": 0.5016202203499676, "grad_norm": 0.03244081139564514, "learning_rate": 0.00019435618164633872, "loss": 0.3761, "step": 6192 }, { "epoch": 0.501701231367466, "grad_norm": 0.035014357417821884, "learning_rate": 0.0001943516809937441, "loss": 0.3652, "step": 6193 }, { "epoch": 0.5017822423849644, "grad_norm": 0.031360477209091187, "learning_rate": 0.00019434718034114947, "loss": 0.3517, "step": 6194 }, { "epoch": 0.5018632534024627, "grad_norm": 0.028555797412991524, "learning_rate": 0.00019434267968855483, "loss": 0.363, "step": 6195 }, { "epoch": 0.5019442644199611, "grad_norm": 0.03647736459970474, "learning_rate": 0.00019433817903596022, "loss": 0.3547, "step": 6196 }, { "epoch": 0.5020252754374595, "grad_norm": 0.03341953083872795, "learning_rate": 0.0001943336783833656, "loss": 0.4141, "step": 6197 }, { "epoch": 0.5021062864549579, "grad_norm": 0.027791619300842285, "learning_rate": 0.00019432917773077097, "loss": 0.3209, "step": 6198 }, { "epoch": 0.5021872974724563, "grad_norm": 0.03126957640051842, "learning_rate": 0.00019432467707817635, "loss": 0.3297, "step": 6199 }, { "epoch": 0.5022683084899546, "grad_norm": 0.028604375198483467, "learning_rate": 0.00019432017642558171, "loss": 0.3534, "step": 6200 }, { "epoch": 0.502349319507453, "grad_norm": 0.03173546493053436, "learning_rate": 0.00019431567577298707, "loss": 0.377, "step": 6201 }, { "epoch": 0.5024303305249513, "grad_norm": 0.030360376462340355, "learning_rate": 0.0001943111751203925, "loss": 0.3215, "step": 6202 }, { "epoch": 0.5025113415424498, "grad_norm": 0.030665067955851555, "learning_rate": 0.00019430667446779785, "loss": 0.323, "step": 6203 }, { "epoch": 0.5025923525599482, "grad_norm": 0.02950088307261467, "learning_rate": 0.0001943021738152032, "loss": 0.3481, "step": 6204 }, { "epoch": 0.5026733635774465, "grad_norm": 0.026497744023799896, "learning_rate": 0.0001942976731626086, "loss": 0.338, "step": 6205 }, { "epoch": 0.5027543745949449, "grad_norm": 0.032492347061634064, "learning_rate": 0.00019429317251001396, "loss": 0.3416, "step": 6206 }, { "epoch": 0.5028353856124433, "grad_norm": 0.0336732417345047, "learning_rate": 0.00019428867185741932, "loss": 0.3219, "step": 6207 }, { "epoch": 0.5029163966299417, "grad_norm": 0.029335761442780495, "learning_rate": 0.00019428417120482473, "loss": 0.2768, "step": 6208 }, { "epoch": 0.50299740764744, "grad_norm": 0.03476308658719063, "learning_rate": 0.0001942796705522301, "loss": 0.3831, "step": 6209 }, { "epoch": 0.5030784186649384, "grad_norm": 0.03170355036854744, "learning_rate": 0.00019427516989963545, "loss": 0.3257, "step": 6210 }, { "epoch": 0.5031594296824368, "grad_norm": 0.035959310829639435, "learning_rate": 0.00019427066924704084, "loss": 0.3323, "step": 6211 }, { "epoch": 0.5032404406999352, "grad_norm": 0.029416095465421677, "learning_rate": 0.0001942661685944462, "loss": 0.3845, "step": 6212 }, { "epoch": 0.5033214517174336, "grad_norm": 0.03393389657139778, "learning_rate": 0.00019426166794185158, "loss": 0.3568, "step": 6213 }, { "epoch": 0.5034024627349319, "grad_norm": 0.032064225524663925, "learning_rate": 0.00019425716728925697, "loss": 0.4063, "step": 6214 }, { "epoch": 0.5034834737524303, "grad_norm": 0.03695201501250267, "learning_rate": 0.00019425266663666233, "loss": 0.3833, "step": 6215 }, { "epoch": 0.5035644847699287, "grad_norm": 0.027572009712457657, "learning_rate": 0.0001942481659840677, "loss": 0.3293, "step": 6216 }, { "epoch": 0.5036454957874271, "grad_norm": 0.03562445938587189, "learning_rate": 0.00019424366533147308, "loss": 0.3812, "step": 6217 }, { "epoch": 0.5037265068049255, "grad_norm": 0.03267737105488777, "learning_rate": 0.00019423916467887844, "loss": 0.4063, "step": 6218 }, { "epoch": 0.5038075178224238, "grad_norm": 0.03409668803215027, "learning_rate": 0.00019423466402628383, "loss": 0.4145, "step": 6219 }, { "epoch": 0.5038885288399222, "grad_norm": 0.02712850458920002, "learning_rate": 0.0001942301633736892, "loss": 0.3519, "step": 6220 }, { "epoch": 0.5039695398574207, "grad_norm": 0.03325193002820015, "learning_rate": 0.00019422566272109457, "loss": 0.3376, "step": 6221 }, { "epoch": 0.504050550874919, "grad_norm": 0.031705718487501144, "learning_rate": 0.00019422116206849993, "loss": 0.3336, "step": 6222 }, { "epoch": 0.5041315618924174, "grad_norm": 0.02508116513490677, "learning_rate": 0.00019421666141590532, "loss": 0.3163, "step": 6223 }, { "epoch": 0.5042125729099157, "grad_norm": 0.03339695557951927, "learning_rate": 0.00019421216076331068, "loss": 0.4402, "step": 6224 }, { "epoch": 0.5042935839274141, "grad_norm": 0.028711862862110138, "learning_rate": 0.00019420766011071607, "loss": 0.337, "step": 6225 }, { "epoch": 0.5043745949449125, "grad_norm": 0.030767934396862984, "learning_rate": 0.00019420315945812145, "loss": 0.365, "step": 6226 }, { "epoch": 0.5044556059624109, "grad_norm": 0.030805392190814018, "learning_rate": 0.00019419865880552681, "loss": 0.3305, "step": 6227 }, { "epoch": 0.5045366169799093, "grad_norm": 0.029501011595129967, "learning_rate": 0.00019419415815293217, "loss": 0.3484, "step": 6228 }, { "epoch": 0.5046176279974076, "grad_norm": 0.027413569390773773, "learning_rate": 0.00019418965750033756, "loss": 0.3342, "step": 6229 }, { "epoch": 0.5046986390149061, "grad_norm": 0.031460799276828766, "learning_rate": 0.00019418515684774292, "loss": 0.3617, "step": 6230 }, { "epoch": 0.5047796500324044, "grad_norm": 0.031566593796014786, "learning_rate": 0.0001941806561951483, "loss": 0.3821, "step": 6231 }, { "epoch": 0.5048606610499028, "grad_norm": 0.029631730169057846, "learning_rate": 0.0001941761555425537, "loss": 0.3189, "step": 6232 }, { "epoch": 0.5049416720674011, "grad_norm": 0.036956410855054855, "learning_rate": 0.00019417165488995906, "loss": 0.3916, "step": 6233 }, { "epoch": 0.5050226830848995, "grad_norm": 0.029559465125203133, "learning_rate": 0.00019416715423736442, "loss": 0.3444, "step": 6234 }, { "epoch": 0.505103694102398, "grad_norm": 0.02766140177845955, "learning_rate": 0.0001941626535847698, "loss": 0.3127, "step": 6235 }, { "epoch": 0.5051847051198963, "grad_norm": 0.030541831627488136, "learning_rate": 0.0001941581529321752, "loss": 0.3341, "step": 6236 }, { "epoch": 0.5052657161373947, "grad_norm": 0.029187265783548355, "learning_rate": 0.00019415365227958055, "loss": 0.3098, "step": 6237 }, { "epoch": 0.505346727154893, "grad_norm": 0.03717184439301491, "learning_rate": 0.00019414915162698594, "loss": 0.3742, "step": 6238 }, { "epoch": 0.5054277381723914, "grad_norm": 0.03266545757651329, "learning_rate": 0.0001941446509743913, "loss": 0.3678, "step": 6239 }, { "epoch": 0.5055087491898899, "grad_norm": 0.03215375915169716, "learning_rate": 0.00019414015032179666, "loss": 0.3236, "step": 6240 }, { "epoch": 0.5055897602073882, "grad_norm": 0.02944401279091835, "learning_rate": 0.00019413564966920205, "loss": 0.3693, "step": 6241 }, { "epoch": 0.5056707712248866, "grad_norm": 0.030675627291202545, "learning_rate": 0.00019413114901660743, "loss": 0.3741, "step": 6242 }, { "epoch": 0.5057517822423849, "grad_norm": 0.035757292062044144, "learning_rate": 0.0001941266483640128, "loss": 0.3646, "step": 6243 }, { "epoch": 0.5058327932598834, "grad_norm": 0.032462820410728455, "learning_rate": 0.00019412214771141818, "loss": 0.3472, "step": 6244 }, { "epoch": 0.5059138042773818, "grad_norm": 0.03389735147356987, "learning_rate": 0.00019411764705882354, "loss": 0.3818, "step": 6245 }, { "epoch": 0.5059948152948801, "grad_norm": 0.028163554146885872, "learning_rate": 0.0001941131464062289, "loss": 0.3447, "step": 6246 }, { "epoch": 0.5060758263123785, "grad_norm": 0.030725527554750443, "learning_rate": 0.0001941086457536343, "loss": 0.2915, "step": 6247 }, { "epoch": 0.5061568373298768, "grad_norm": 0.030973976477980614, "learning_rate": 0.00019410414510103967, "loss": 0.3482, "step": 6248 }, { "epoch": 0.5062378483473753, "grad_norm": 0.030928470194339752, "learning_rate": 0.00019409964444844503, "loss": 0.3567, "step": 6249 }, { "epoch": 0.5063188593648736, "grad_norm": 0.030242808163166046, "learning_rate": 0.00019409514379585042, "loss": 0.3557, "step": 6250 }, { "epoch": 0.506399870382372, "grad_norm": 0.02889467217028141, "learning_rate": 0.00019409064314325578, "loss": 0.3522, "step": 6251 }, { "epoch": 0.5064808813998704, "grad_norm": 0.030975107103586197, "learning_rate": 0.00019408614249066114, "loss": 0.3302, "step": 6252 }, { "epoch": 0.5065618924173687, "grad_norm": 0.03164440020918846, "learning_rate": 0.00019408164183806653, "loss": 0.3643, "step": 6253 }, { "epoch": 0.5066429034348672, "grad_norm": 0.03547287359833717, "learning_rate": 0.00019407714118547192, "loss": 0.3688, "step": 6254 }, { "epoch": 0.5067239144523655, "grad_norm": 0.031200233846902847, "learning_rate": 0.00019407264053287728, "loss": 0.3747, "step": 6255 }, { "epoch": 0.5068049254698639, "grad_norm": 0.028289861977100372, "learning_rate": 0.00019406813988028266, "loss": 0.3329, "step": 6256 }, { "epoch": 0.5068859364873622, "grad_norm": 0.03076610527932644, "learning_rate": 0.00019406363922768802, "loss": 0.3163, "step": 6257 }, { "epoch": 0.5069669475048607, "grad_norm": 0.03446902707219124, "learning_rate": 0.00019405913857509338, "loss": 0.3595, "step": 6258 }, { "epoch": 0.5070479585223591, "grad_norm": 0.03079809434711933, "learning_rate": 0.00019405463792249877, "loss": 0.3493, "step": 6259 }, { "epoch": 0.5071289695398574, "grad_norm": 0.03474215790629387, "learning_rate": 0.00019405013726990416, "loss": 0.4053, "step": 6260 }, { "epoch": 0.5072099805573558, "grad_norm": 0.04012686014175415, "learning_rate": 0.00019404563661730952, "loss": 0.3085, "step": 6261 }, { "epoch": 0.5072909915748541, "grad_norm": 0.029809720814228058, "learning_rate": 0.0001940411359647149, "loss": 0.3449, "step": 6262 }, { "epoch": 0.5073720025923526, "grad_norm": 0.028845548629760742, "learning_rate": 0.00019403663531212026, "loss": 0.3286, "step": 6263 }, { "epoch": 0.507453013609851, "grad_norm": 0.03084278292953968, "learning_rate": 0.00019403213465952562, "loss": 0.3731, "step": 6264 }, { "epoch": 0.5075340246273493, "grad_norm": 0.0335809662938118, "learning_rate": 0.000194027634006931, "loss": 0.3481, "step": 6265 }, { "epoch": 0.5076150356448477, "grad_norm": 0.04117157682776451, "learning_rate": 0.0001940231333543364, "loss": 0.3492, "step": 6266 }, { "epoch": 0.5076960466623461, "grad_norm": 0.034561336040496826, "learning_rate": 0.00019401863270174176, "loss": 0.3986, "step": 6267 }, { "epoch": 0.5077770576798445, "grad_norm": 0.033314213156700134, "learning_rate": 0.00019401413204914715, "loss": 0.412, "step": 6268 }, { "epoch": 0.5078580686973428, "grad_norm": 0.029534365981817245, "learning_rate": 0.0001940096313965525, "loss": 0.3524, "step": 6269 }, { "epoch": 0.5079390797148412, "grad_norm": 0.032147347927093506, "learning_rate": 0.00019400513074395787, "loss": 0.4007, "step": 6270 }, { "epoch": 0.5080200907323396, "grad_norm": 0.02948775887489319, "learning_rate": 0.00019400063009136328, "loss": 0.3446, "step": 6271 }, { "epoch": 0.508101101749838, "grad_norm": 0.03933773934841156, "learning_rate": 0.00019399612943876864, "loss": 0.3851, "step": 6272 }, { "epoch": 0.5081821127673364, "grad_norm": 0.02769440785050392, "learning_rate": 0.000193991628786174, "loss": 0.3081, "step": 6273 }, { "epoch": 0.5082631237848347, "grad_norm": 0.03911726921796799, "learning_rate": 0.0001939871281335794, "loss": 0.3732, "step": 6274 }, { "epoch": 0.5083441348023331, "grad_norm": 0.029643390327692032, "learning_rate": 0.00019398262748098475, "loss": 0.3612, "step": 6275 }, { "epoch": 0.5084251458198314, "grad_norm": 0.036275800317525864, "learning_rate": 0.0001939781268283901, "loss": 0.3405, "step": 6276 }, { "epoch": 0.5085061568373299, "grad_norm": 0.03233199194073677, "learning_rate": 0.00019397362617579552, "loss": 0.3421, "step": 6277 }, { "epoch": 0.5085871678548283, "grad_norm": 0.03278407081961632, "learning_rate": 0.00019396912552320088, "loss": 0.3852, "step": 6278 }, { "epoch": 0.5086681788723266, "grad_norm": 0.03374721109867096, "learning_rate": 0.00019396462487060624, "loss": 0.3024, "step": 6279 }, { "epoch": 0.508749189889825, "grad_norm": 0.032265424728393555, "learning_rate": 0.00019396012421801163, "loss": 0.373, "step": 6280 }, { "epoch": 0.5088302009073234, "grad_norm": 0.03495476022362709, "learning_rate": 0.000193955623565417, "loss": 0.3255, "step": 6281 }, { "epoch": 0.5089112119248218, "grad_norm": 0.03319685533642769, "learning_rate": 0.00019395112291282235, "loss": 0.3884, "step": 6282 }, { "epoch": 0.5089922229423202, "grad_norm": 0.030925067141652107, "learning_rate": 0.00019394662226022776, "loss": 0.3777, "step": 6283 }, { "epoch": 0.5090732339598185, "grad_norm": 0.03134768828749657, "learning_rate": 0.00019394212160763312, "loss": 0.3875, "step": 6284 }, { "epoch": 0.5091542449773169, "grad_norm": 0.034195996820926666, "learning_rate": 0.00019393762095503848, "loss": 0.3083, "step": 6285 }, { "epoch": 0.5092352559948153, "grad_norm": 0.039229799062013626, "learning_rate": 0.00019393312030244387, "loss": 0.3646, "step": 6286 }, { "epoch": 0.5093162670123137, "grad_norm": 0.03241065889596939, "learning_rate": 0.00019392861964984923, "loss": 0.3777, "step": 6287 }, { "epoch": 0.509397278029812, "grad_norm": 0.031153660267591476, "learning_rate": 0.0001939241189972546, "loss": 0.3651, "step": 6288 }, { "epoch": 0.5094782890473104, "grad_norm": 0.031294070184230804, "learning_rate": 0.00019391961834466, "loss": 0.3527, "step": 6289 }, { "epoch": 0.5095593000648088, "grad_norm": 0.03369227051734924, "learning_rate": 0.00019391511769206537, "loss": 0.3764, "step": 6290 }, { "epoch": 0.5096403110823072, "grad_norm": 0.03187112510204315, "learning_rate": 0.00019391061703947073, "loss": 0.3501, "step": 6291 }, { "epoch": 0.5097213220998056, "grad_norm": 0.03397297486662865, "learning_rate": 0.0001939061163868761, "loss": 0.3805, "step": 6292 }, { "epoch": 0.5098023331173039, "grad_norm": 0.027571003884077072, "learning_rate": 0.00019390161573428147, "loss": 0.3033, "step": 6293 }, { "epoch": 0.5098833441348023, "grad_norm": 0.03471944108605385, "learning_rate": 0.00019389711508168686, "loss": 0.3677, "step": 6294 }, { "epoch": 0.5099643551523008, "grad_norm": 0.032311148941516876, "learning_rate": 0.00019389261442909225, "loss": 0.3641, "step": 6295 }, { "epoch": 0.5100453661697991, "grad_norm": 0.03428216651082039, "learning_rate": 0.0001938881137764976, "loss": 0.3387, "step": 6296 }, { "epoch": 0.5101263771872975, "grad_norm": 0.03166080266237259, "learning_rate": 0.00019388361312390297, "loss": 0.3805, "step": 6297 }, { "epoch": 0.5102073882047958, "grad_norm": 0.02960868738591671, "learning_rate": 0.00019387911247130835, "loss": 0.3455, "step": 6298 }, { "epoch": 0.5102883992222942, "grad_norm": 0.031040215864777565, "learning_rate": 0.00019387461181871371, "loss": 0.3222, "step": 6299 }, { "epoch": 0.5103694102397927, "grad_norm": 0.02788618393242359, "learning_rate": 0.0001938701111661191, "loss": 0.3552, "step": 6300 }, { "epoch": 0.510450421257291, "grad_norm": 0.03101874515414238, "learning_rate": 0.0001938656105135245, "loss": 0.3212, "step": 6301 }, { "epoch": 0.5105314322747894, "grad_norm": 0.03254443407058716, "learning_rate": 0.00019386110986092985, "loss": 0.3084, "step": 6302 }, { "epoch": 0.5106124432922877, "grad_norm": 0.030638394877314568, "learning_rate": 0.0001938566092083352, "loss": 0.3793, "step": 6303 }, { "epoch": 0.5106934543097861, "grad_norm": 0.03493942320346832, "learning_rate": 0.0001938521085557406, "loss": 0.3468, "step": 6304 }, { "epoch": 0.5107744653272845, "grad_norm": 0.03019285574555397, "learning_rate": 0.00019384760790314596, "loss": 0.3434, "step": 6305 }, { "epoch": 0.5108554763447829, "grad_norm": 0.03360366448760033, "learning_rate": 0.00019384310725055134, "loss": 0.3754, "step": 6306 }, { "epoch": 0.5109364873622813, "grad_norm": 0.03600335493683815, "learning_rate": 0.00019383860659795673, "loss": 0.3806, "step": 6307 }, { "epoch": 0.5110174983797796, "grad_norm": 0.027877481654286385, "learning_rate": 0.0001938341059453621, "loss": 0.3768, "step": 6308 }, { "epoch": 0.5110985093972781, "grad_norm": 0.03358413279056549, "learning_rate": 0.00019382960529276745, "loss": 0.3755, "step": 6309 }, { "epoch": 0.5111795204147764, "grad_norm": 0.02829788252711296, "learning_rate": 0.00019382510464017284, "loss": 0.338, "step": 6310 }, { "epoch": 0.5112605314322748, "grad_norm": 0.03067375160753727, "learning_rate": 0.0001938206039875782, "loss": 0.3577, "step": 6311 }, { "epoch": 0.5113415424497731, "grad_norm": 0.03363307565450668, "learning_rate": 0.00019381610333498358, "loss": 0.3685, "step": 6312 }, { "epoch": 0.5114225534672715, "grad_norm": 0.03025786019861698, "learning_rate": 0.00019381160268238897, "loss": 0.3675, "step": 6313 }, { "epoch": 0.51150356448477, "grad_norm": 0.028437191620469093, "learning_rate": 0.00019380710202979433, "loss": 0.3162, "step": 6314 }, { "epoch": 0.5115845755022683, "grad_norm": 0.032357107847929, "learning_rate": 0.0001938026013771997, "loss": 0.3861, "step": 6315 }, { "epoch": 0.5116655865197667, "grad_norm": 0.030493425205349922, "learning_rate": 0.00019379810072460508, "loss": 0.3729, "step": 6316 }, { "epoch": 0.511746597537265, "grad_norm": 0.03158330172300339, "learning_rate": 0.00019379360007201047, "loss": 0.3112, "step": 6317 }, { "epoch": 0.5118276085547635, "grad_norm": 0.029356788843870163, "learning_rate": 0.00019378909941941583, "loss": 0.4015, "step": 6318 }, { "epoch": 0.5119086195722619, "grad_norm": 0.028847860172390938, "learning_rate": 0.0001937845987668212, "loss": 0.3186, "step": 6319 }, { "epoch": 0.5119896305897602, "grad_norm": 0.036853089928627014, "learning_rate": 0.00019378009811422657, "loss": 0.4413, "step": 6320 }, { "epoch": 0.5120706416072586, "grad_norm": 0.02763395383954048, "learning_rate": 0.00019377559746163193, "loss": 0.3072, "step": 6321 }, { "epoch": 0.5121516526247569, "grad_norm": 0.03145894780755043, "learning_rate": 0.00019377109680903732, "loss": 0.395, "step": 6322 }, { "epoch": 0.5122326636422554, "grad_norm": 0.02779548056423664, "learning_rate": 0.0001937665961564427, "loss": 0.3195, "step": 6323 }, { "epoch": 0.5123136746597537, "grad_norm": 0.028829006478190422, "learning_rate": 0.00019376209550384807, "loss": 0.4011, "step": 6324 }, { "epoch": 0.5123946856772521, "grad_norm": 0.03402452915906906, "learning_rate": 0.00019375759485125345, "loss": 0.369, "step": 6325 }, { "epoch": 0.5124756966947505, "grad_norm": 0.03007756732404232, "learning_rate": 0.00019375309419865881, "loss": 0.3516, "step": 6326 }, { "epoch": 0.5125567077122488, "grad_norm": 0.032619405537843704, "learning_rate": 0.00019374859354606417, "loss": 0.3723, "step": 6327 }, { "epoch": 0.5126377187297473, "grad_norm": 0.029638774693012238, "learning_rate": 0.00019374409289346956, "loss": 0.3269, "step": 6328 }, { "epoch": 0.5127187297472456, "grad_norm": 0.03590411692857742, "learning_rate": 0.00019373959224087495, "loss": 0.4164, "step": 6329 }, { "epoch": 0.512799740764744, "grad_norm": 0.031840380281209946, "learning_rate": 0.0001937350915882803, "loss": 0.3834, "step": 6330 }, { "epoch": 0.5128807517822424, "grad_norm": 0.03522587567567825, "learning_rate": 0.0001937305909356857, "loss": 0.3833, "step": 6331 }, { "epoch": 0.5129617627997408, "grad_norm": 0.029918646439909935, "learning_rate": 0.00019372609028309106, "loss": 0.3557, "step": 6332 }, { "epoch": 0.5130427738172392, "grad_norm": 0.0331931971013546, "learning_rate": 0.00019372158963049642, "loss": 0.3398, "step": 6333 }, { "epoch": 0.5131237848347375, "grad_norm": 0.031502824276685715, "learning_rate": 0.0001937170889779018, "loss": 0.3731, "step": 6334 }, { "epoch": 0.5132047958522359, "grad_norm": 0.029885951429605484, "learning_rate": 0.0001937125883253072, "loss": 0.3634, "step": 6335 }, { "epoch": 0.5132858068697342, "grad_norm": 0.02981005609035492, "learning_rate": 0.00019370808767271255, "loss": 0.3435, "step": 6336 }, { "epoch": 0.5133668178872327, "grad_norm": 0.032016005367040634, "learning_rate": 0.00019370358702011794, "loss": 0.3851, "step": 6337 }, { "epoch": 0.5134478289047311, "grad_norm": 0.034068964421749115, "learning_rate": 0.0001936990863675233, "loss": 0.3678, "step": 6338 }, { "epoch": 0.5135288399222294, "grad_norm": 0.031114788725972176, "learning_rate": 0.00019369458571492866, "loss": 0.3791, "step": 6339 }, { "epoch": 0.5136098509397278, "grad_norm": 0.03359213471412659, "learning_rate": 0.00019369008506233405, "loss": 0.3876, "step": 6340 }, { "epoch": 0.5136908619572261, "grad_norm": 0.03328750655055046, "learning_rate": 0.00019368558440973943, "loss": 0.3568, "step": 6341 }, { "epoch": 0.5137718729747246, "grad_norm": 0.028766583651304245, "learning_rate": 0.0001936810837571448, "loss": 0.3137, "step": 6342 }, { "epoch": 0.513852883992223, "grad_norm": 0.027417849749326706, "learning_rate": 0.00019367658310455018, "loss": 0.3123, "step": 6343 }, { "epoch": 0.5139338950097213, "grad_norm": 0.030872000381350517, "learning_rate": 0.00019367208245195554, "loss": 0.3476, "step": 6344 }, { "epoch": 0.5140149060272197, "grad_norm": 0.03216738626360893, "learning_rate": 0.0001936675817993609, "loss": 0.376, "step": 6345 }, { "epoch": 0.5140959170447181, "grad_norm": 0.030741369351744652, "learning_rate": 0.00019366308114676631, "loss": 0.3195, "step": 6346 }, { "epoch": 0.5141769280622165, "grad_norm": 0.029521172866225243, "learning_rate": 0.00019365858049417167, "loss": 0.3565, "step": 6347 }, { "epoch": 0.5142579390797148, "grad_norm": 0.03402463719248772, "learning_rate": 0.00019365407984157703, "loss": 0.3921, "step": 6348 }, { "epoch": 0.5143389500972132, "grad_norm": 0.033727966248989105, "learning_rate": 0.00019364957918898242, "loss": 0.3609, "step": 6349 }, { "epoch": 0.5144199611147116, "grad_norm": 0.029165782034397125, "learning_rate": 0.00019364507853638778, "loss": 0.3594, "step": 6350 }, { "epoch": 0.51450097213221, "grad_norm": 0.029388075694441795, "learning_rate": 0.00019364057788379314, "loss": 0.3183, "step": 6351 }, { "epoch": 0.5145819831497084, "grad_norm": 0.03150533139705658, "learning_rate": 0.00019363607723119856, "loss": 0.3508, "step": 6352 }, { "epoch": 0.5146629941672067, "grad_norm": 0.028366554528474808, "learning_rate": 0.00019363157657860392, "loss": 0.3234, "step": 6353 }, { "epoch": 0.5147440051847051, "grad_norm": 0.0290334802120924, "learning_rate": 0.00019362707592600928, "loss": 0.3214, "step": 6354 }, { "epoch": 0.5148250162022034, "grad_norm": 0.03059317171573639, "learning_rate": 0.00019362257527341466, "loss": 0.2869, "step": 6355 }, { "epoch": 0.5149060272197019, "grad_norm": 0.03641689568758011, "learning_rate": 0.00019361807462082002, "loss": 0.4037, "step": 6356 }, { "epoch": 0.5149870382372003, "grad_norm": 0.030488573014736176, "learning_rate": 0.00019361357396822538, "loss": 0.386, "step": 6357 }, { "epoch": 0.5150680492546986, "grad_norm": 0.03700888529419899, "learning_rate": 0.0001936090733156308, "loss": 0.3534, "step": 6358 }, { "epoch": 0.515149060272197, "grad_norm": 0.03261464461684227, "learning_rate": 0.00019360457266303616, "loss": 0.3377, "step": 6359 }, { "epoch": 0.5152300712896954, "grad_norm": 0.036103613674640656, "learning_rate": 0.00019360007201044152, "loss": 0.3113, "step": 6360 }, { "epoch": 0.5153110823071938, "grad_norm": 0.029434196650981903, "learning_rate": 0.0001935955713578469, "loss": 0.3724, "step": 6361 }, { "epoch": 0.5153920933246922, "grad_norm": 0.03052251972258091, "learning_rate": 0.00019359107070525226, "loss": 0.3562, "step": 6362 }, { "epoch": 0.5154731043421905, "grad_norm": 0.03182889148592949, "learning_rate": 0.00019358657005265762, "loss": 0.3399, "step": 6363 }, { "epoch": 0.5155541153596889, "grad_norm": 0.031418126076459885, "learning_rate": 0.00019358206940006304, "loss": 0.337, "step": 6364 }, { "epoch": 0.5156351263771873, "grad_norm": 0.028636738657951355, "learning_rate": 0.0001935775687474684, "loss": 0.349, "step": 6365 }, { "epoch": 0.5157161373946857, "grad_norm": 0.03573239594697952, "learning_rate": 0.00019357306809487376, "loss": 0.2985, "step": 6366 }, { "epoch": 0.515797148412184, "grad_norm": 0.03263181447982788, "learning_rate": 0.00019356856744227915, "loss": 0.3869, "step": 6367 }, { "epoch": 0.5158781594296824, "grad_norm": 0.03369957208633423, "learning_rate": 0.0001935640667896845, "loss": 0.365, "step": 6368 }, { "epoch": 0.5159591704471809, "grad_norm": 0.02732839621603489, "learning_rate": 0.0001935595661370899, "loss": 0.3822, "step": 6369 }, { "epoch": 0.5160401814646792, "grad_norm": 0.0374213382601738, "learning_rate": 0.00019355506548449528, "loss": 0.3375, "step": 6370 }, { "epoch": 0.5161211924821776, "grad_norm": 0.03227487951517105, "learning_rate": 0.00019355056483190064, "loss": 0.3982, "step": 6371 }, { "epoch": 0.5162022034996759, "grad_norm": 0.030356882140040398, "learning_rate": 0.000193546064179306, "loss": 0.3526, "step": 6372 }, { "epoch": 0.5162832145171743, "grad_norm": 0.03413158282637596, "learning_rate": 0.0001935415635267114, "loss": 0.3768, "step": 6373 }, { "epoch": 0.5163642255346728, "grad_norm": 0.02953726425766945, "learning_rate": 0.00019353706287411675, "loss": 0.3178, "step": 6374 }, { "epoch": 0.5164452365521711, "grad_norm": 0.029602529481053352, "learning_rate": 0.00019353256222152214, "loss": 0.3066, "step": 6375 }, { "epoch": 0.5165262475696695, "grad_norm": 0.03132502734661102, "learning_rate": 0.00019352806156892752, "loss": 0.3302, "step": 6376 }, { "epoch": 0.5166072585871678, "grad_norm": 0.029297152534127235, "learning_rate": 0.00019352356091633288, "loss": 0.3797, "step": 6377 }, { "epoch": 0.5166882696046662, "grad_norm": 0.030203981325030327, "learning_rate": 0.00019351906026373824, "loss": 0.3137, "step": 6378 }, { "epoch": 0.5167692806221647, "grad_norm": 0.03112986497581005, "learning_rate": 0.00019351455961114363, "loss": 0.3897, "step": 6379 }, { "epoch": 0.516850291639663, "grad_norm": 0.030600905418395996, "learning_rate": 0.000193510058958549, "loss": 0.406, "step": 6380 }, { "epoch": 0.5169313026571614, "grad_norm": 0.03712606802582741, "learning_rate": 0.00019350555830595438, "loss": 0.3828, "step": 6381 }, { "epoch": 0.5170123136746597, "grad_norm": 0.03237859159708023, "learning_rate": 0.00019350105765335976, "loss": 0.3284, "step": 6382 }, { "epoch": 0.5170933246921582, "grad_norm": 0.033010292798280716, "learning_rate": 0.00019349655700076512, "loss": 0.3705, "step": 6383 }, { "epoch": 0.5171743357096565, "grad_norm": 0.035950496792793274, "learning_rate": 0.00019349205634817048, "loss": 0.3146, "step": 6384 }, { "epoch": 0.5172553467271549, "grad_norm": 0.03335532546043396, "learning_rate": 0.00019348755569557587, "loss": 0.3776, "step": 6385 }, { "epoch": 0.5173363577446533, "grad_norm": 0.03218366205692291, "learning_rate": 0.00019348305504298123, "loss": 0.3743, "step": 6386 }, { "epoch": 0.5174173687621516, "grad_norm": 0.03454633429646492, "learning_rate": 0.00019347855439038662, "loss": 0.371, "step": 6387 }, { "epoch": 0.5174983797796501, "grad_norm": 0.03251352906227112, "learning_rate": 0.000193474053737792, "loss": 0.4115, "step": 6388 }, { "epoch": 0.5175793907971484, "grad_norm": 0.03137648105621338, "learning_rate": 0.00019346955308519737, "loss": 0.3467, "step": 6389 }, { "epoch": 0.5176604018146468, "grad_norm": 0.03319603577256203, "learning_rate": 0.00019346505243260273, "loss": 0.4092, "step": 6390 }, { "epoch": 0.5177414128321451, "grad_norm": 0.02964252606034279, "learning_rate": 0.0001934605517800081, "loss": 0.3554, "step": 6391 }, { "epoch": 0.5178224238496435, "grad_norm": 0.03132447227835655, "learning_rate": 0.00019345605112741347, "loss": 0.3331, "step": 6392 }, { "epoch": 0.517903434867142, "grad_norm": 0.033342160284519196, "learning_rate": 0.00019345155047481886, "loss": 0.36, "step": 6393 }, { "epoch": 0.5179844458846403, "grad_norm": 0.03459905460476875, "learning_rate": 0.00019344704982222425, "loss": 0.3617, "step": 6394 }, { "epoch": 0.5180654569021387, "grad_norm": 0.03589166700839996, "learning_rate": 0.0001934425491696296, "loss": 0.4322, "step": 6395 }, { "epoch": 0.518146467919637, "grad_norm": 0.03061509132385254, "learning_rate": 0.00019343804851703497, "loss": 0.3421, "step": 6396 }, { "epoch": 0.5182274789371355, "grad_norm": 0.029686959460377693, "learning_rate": 0.00019343354786444035, "loss": 0.3151, "step": 6397 }, { "epoch": 0.5183084899546339, "grad_norm": 0.03125001862645149, "learning_rate": 0.00019342904721184574, "loss": 0.32, "step": 6398 }, { "epoch": 0.5183895009721322, "grad_norm": 0.03100123070180416, "learning_rate": 0.0001934245465592511, "loss": 0.3775, "step": 6399 }, { "epoch": 0.5184705119896306, "grad_norm": 0.03699709102511406, "learning_rate": 0.0001934200459066565, "loss": 0.4467, "step": 6400 }, { "epoch": 0.5185515230071289, "grad_norm": 0.029937749728560448, "learning_rate": 0.00019341554525406185, "loss": 0.3409, "step": 6401 }, { "epoch": 0.5186325340246274, "grad_norm": 0.026631394401192665, "learning_rate": 0.0001934110446014672, "loss": 0.3464, "step": 6402 }, { "epoch": 0.5187135450421257, "grad_norm": 0.033784594386816025, "learning_rate": 0.0001934065439488726, "loss": 0.3718, "step": 6403 }, { "epoch": 0.5187945560596241, "grad_norm": 0.03262433782219887, "learning_rate": 0.00019340204329627798, "loss": 0.3505, "step": 6404 }, { "epoch": 0.5188755670771225, "grad_norm": 0.03220927715301514, "learning_rate": 0.00019339754264368334, "loss": 0.3633, "step": 6405 }, { "epoch": 0.5189565780946209, "grad_norm": 0.02535347267985344, "learning_rate": 0.00019339304199108873, "loss": 0.3258, "step": 6406 }, { "epoch": 0.5190375891121193, "grad_norm": 0.034237343817949295, "learning_rate": 0.0001933885413384941, "loss": 0.3755, "step": 6407 }, { "epoch": 0.5191186001296176, "grad_norm": 0.03497825190424919, "learning_rate": 0.00019338404068589945, "loss": 0.3463, "step": 6408 }, { "epoch": 0.519199611147116, "grad_norm": 0.03145504742860794, "learning_rate": 0.00019337954003330484, "loss": 0.3484, "step": 6409 }, { "epoch": 0.5192806221646143, "grad_norm": 0.03155725449323654, "learning_rate": 0.00019337503938071022, "loss": 0.3341, "step": 6410 }, { "epoch": 0.5193616331821128, "grad_norm": 0.03286818042397499, "learning_rate": 0.00019337053872811558, "loss": 0.3598, "step": 6411 }, { "epoch": 0.5194426441996112, "grad_norm": 0.029384689405560493, "learning_rate": 0.00019336603807552097, "loss": 0.3601, "step": 6412 }, { "epoch": 0.5195236552171095, "grad_norm": 0.03224647790193558, "learning_rate": 0.00019336153742292633, "loss": 0.3387, "step": 6413 }, { "epoch": 0.5196046662346079, "grad_norm": 0.03096454218029976, "learning_rate": 0.0001933570367703317, "loss": 0.3579, "step": 6414 }, { "epoch": 0.5196856772521062, "grad_norm": 0.03217236325144768, "learning_rate": 0.00019335253611773708, "loss": 0.3518, "step": 6415 }, { "epoch": 0.5197666882696047, "grad_norm": 0.030055413022637367, "learning_rate": 0.00019334803546514247, "loss": 0.3254, "step": 6416 }, { "epoch": 0.5198476992871031, "grad_norm": 0.03201695904135704, "learning_rate": 0.00019334353481254783, "loss": 0.4164, "step": 6417 }, { "epoch": 0.5199287103046014, "grad_norm": 0.033270809799432755, "learning_rate": 0.0001933390341599532, "loss": 0.3886, "step": 6418 }, { "epoch": 0.5200097213220998, "grad_norm": 0.032824356108903885, "learning_rate": 0.00019333453350735857, "loss": 0.3481, "step": 6419 }, { "epoch": 0.5200907323395982, "grad_norm": 0.03067057952284813, "learning_rate": 0.00019333003285476393, "loss": 0.3551, "step": 6420 }, { "epoch": 0.5201717433570966, "grad_norm": 0.03185015544295311, "learning_rate": 0.00019332553220216932, "loss": 0.3296, "step": 6421 }, { "epoch": 0.520252754374595, "grad_norm": 0.02917659282684326, "learning_rate": 0.0001933210315495747, "loss": 0.3056, "step": 6422 }, { "epoch": 0.5203337653920933, "grad_norm": 0.02973158471286297, "learning_rate": 0.00019331653089698007, "loss": 0.3248, "step": 6423 }, { "epoch": 0.5204147764095917, "grad_norm": 0.02465933933854103, "learning_rate": 0.00019331203024438546, "loss": 0.2779, "step": 6424 }, { "epoch": 0.5204957874270901, "grad_norm": 0.02845362387597561, "learning_rate": 0.00019330752959179082, "loss": 0.3187, "step": 6425 }, { "epoch": 0.5205767984445885, "grad_norm": 0.031649742275476456, "learning_rate": 0.00019330302893919618, "loss": 0.3582, "step": 6426 }, { "epoch": 0.5206578094620868, "grad_norm": 0.029350902885198593, "learning_rate": 0.0001932985282866016, "loss": 0.3251, "step": 6427 }, { "epoch": 0.5207388204795852, "grad_norm": 0.03055676445364952, "learning_rate": 0.00019329402763400695, "loss": 0.3295, "step": 6428 }, { "epoch": 0.5208198314970836, "grad_norm": 0.03613782301545143, "learning_rate": 0.0001932895269814123, "loss": 0.384, "step": 6429 }, { "epoch": 0.520900842514582, "grad_norm": 0.03135627508163452, "learning_rate": 0.0001932850263288177, "loss": 0.384, "step": 6430 }, { "epoch": 0.5209818535320804, "grad_norm": 0.031382665038108826, "learning_rate": 0.00019328052567622306, "loss": 0.3862, "step": 6431 }, { "epoch": 0.5210628645495787, "grad_norm": 0.031073955819010735, "learning_rate": 0.00019327602502362842, "loss": 0.374, "step": 6432 }, { "epoch": 0.5211438755670771, "grad_norm": 0.030740221962332726, "learning_rate": 0.00019327152437103383, "loss": 0.4147, "step": 6433 }, { "epoch": 0.5212248865845756, "grad_norm": 0.028593573719263077, "learning_rate": 0.0001932670237184392, "loss": 0.3231, "step": 6434 }, { "epoch": 0.5213058976020739, "grad_norm": 0.028780462220311165, "learning_rate": 0.00019326252306584455, "loss": 0.3409, "step": 6435 }, { "epoch": 0.5213869086195723, "grad_norm": 0.030874744057655334, "learning_rate": 0.00019325802241324994, "loss": 0.3895, "step": 6436 }, { "epoch": 0.5214679196370706, "grad_norm": 0.03160393610596657, "learning_rate": 0.0001932535217606553, "loss": 0.3526, "step": 6437 }, { "epoch": 0.521548930654569, "grad_norm": 0.03185877203941345, "learning_rate": 0.00019324902110806066, "loss": 0.3005, "step": 6438 }, { "epoch": 0.5216299416720674, "grad_norm": 0.03563063219189644, "learning_rate": 0.00019324452045546607, "loss": 0.3619, "step": 6439 }, { "epoch": 0.5217109526895658, "grad_norm": 0.032077718526124954, "learning_rate": 0.00019324001980287143, "loss": 0.3913, "step": 6440 }, { "epoch": 0.5217919637070642, "grad_norm": 0.028801945969462395, "learning_rate": 0.0001932355191502768, "loss": 0.3164, "step": 6441 }, { "epoch": 0.5218729747245625, "grad_norm": 0.036385100334882736, "learning_rate": 0.00019323101849768218, "loss": 0.3347, "step": 6442 }, { "epoch": 0.5219539857420609, "grad_norm": 0.02876608446240425, "learning_rate": 0.00019322651784508754, "loss": 0.35, "step": 6443 }, { "epoch": 0.5220349967595593, "grad_norm": 0.031699031591415405, "learning_rate": 0.0001932220171924929, "loss": 0.3678, "step": 6444 }, { "epoch": 0.5221160077770577, "grad_norm": 0.02782803401350975, "learning_rate": 0.00019321751653989831, "loss": 0.3406, "step": 6445 }, { "epoch": 0.522197018794556, "grad_norm": 0.030642272904515266, "learning_rate": 0.00019321301588730367, "loss": 0.3659, "step": 6446 }, { "epoch": 0.5222780298120544, "grad_norm": 0.033031292259693146, "learning_rate": 0.00019320851523470903, "loss": 0.3806, "step": 6447 }, { "epoch": 0.5223590408295529, "grad_norm": 0.03382324054837227, "learning_rate": 0.00019320401458211442, "loss": 0.3542, "step": 6448 }, { "epoch": 0.5224400518470512, "grad_norm": 0.03232893347740173, "learning_rate": 0.00019319951392951978, "loss": 0.3765, "step": 6449 }, { "epoch": 0.5225210628645496, "grad_norm": 0.03362128511071205, "learning_rate": 0.00019319501327692517, "loss": 0.3558, "step": 6450 }, { "epoch": 0.5226020738820479, "grad_norm": 0.033634036779403687, "learning_rate": 0.00019319051262433056, "loss": 0.3799, "step": 6451 }, { "epoch": 0.5226830848995463, "grad_norm": 0.03131242096424103, "learning_rate": 0.00019318601197173592, "loss": 0.3444, "step": 6452 }, { "epoch": 0.5227640959170448, "grad_norm": 0.028830870985984802, "learning_rate": 0.00019318151131914128, "loss": 0.3426, "step": 6453 }, { "epoch": 0.5228451069345431, "grad_norm": 0.03265361115336418, "learning_rate": 0.00019317701066654666, "loss": 0.3594, "step": 6454 }, { "epoch": 0.5229261179520415, "grad_norm": 0.028458425775170326, "learning_rate": 0.00019317251001395202, "loss": 0.3271, "step": 6455 }, { "epoch": 0.5230071289695398, "grad_norm": 0.03274376317858696, "learning_rate": 0.0001931680093613574, "loss": 0.3295, "step": 6456 }, { "epoch": 0.5230881399870383, "grad_norm": 0.029262587428092957, "learning_rate": 0.0001931635087087628, "loss": 0.3618, "step": 6457 }, { "epoch": 0.5231691510045366, "grad_norm": 0.03436655178666115, "learning_rate": 0.00019315900805616816, "loss": 0.3552, "step": 6458 }, { "epoch": 0.523250162022035, "grad_norm": 0.030990226194262505, "learning_rate": 0.00019315450740357352, "loss": 0.3752, "step": 6459 }, { "epoch": 0.5233311730395334, "grad_norm": 0.028373638167977333, "learning_rate": 0.0001931500067509789, "loss": 0.3136, "step": 6460 }, { "epoch": 0.5234121840570317, "grad_norm": 0.030110273510217667, "learning_rate": 0.00019314550609838426, "loss": 0.3447, "step": 6461 }, { "epoch": 0.5234931950745302, "grad_norm": 0.03270327299833298, "learning_rate": 0.00019314100544578965, "loss": 0.4455, "step": 6462 }, { "epoch": 0.5235742060920285, "grad_norm": 0.025852186605334282, "learning_rate": 0.00019313650479319504, "loss": 0.3235, "step": 6463 }, { "epoch": 0.5236552171095269, "grad_norm": 0.027759717777371407, "learning_rate": 0.0001931320041406004, "loss": 0.3391, "step": 6464 }, { "epoch": 0.5237362281270252, "grad_norm": 0.03059726394712925, "learning_rate": 0.00019312750348800576, "loss": 0.4047, "step": 6465 }, { "epoch": 0.5238172391445236, "grad_norm": 0.03364939242601395, "learning_rate": 0.00019312300283541115, "loss": 0.4049, "step": 6466 }, { "epoch": 0.5238982501620221, "grad_norm": 0.030493125319480896, "learning_rate": 0.0001931185021828165, "loss": 0.3328, "step": 6467 }, { "epoch": 0.5239792611795204, "grad_norm": 0.031018385663628578, "learning_rate": 0.0001931140015302219, "loss": 0.3816, "step": 6468 }, { "epoch": 0.5240602721970188, "grad_norm": 0.03358032926917076, "learning_rate": 0.00019310950087762728, "loss": 0.3498, "step": 6469 }, { "epoch": 0.5241412832145171, "grad_norm": 0.033210817724466324, "learning_rate": 0.00019310500022503264, "loss": 0.3131, "step": 6470 }, { "epoch": 0.5242222942320156, "grad_norm": 0.029688436537981033, "learning_rate": 0.000193100499572438, "loss": 0.3267, "step": 6471 }, { "epoch": 0.524303305249514, "grad_norm": 0.035529136657714844, "learning_rate": 0.0001930959989198434, "loss": 0.3478, "step": 6472 }, { "epoch": 0.5243843162670123, "grad_norm": 0.03089592605829239, "learning_rate": 0.00019309149826724875, "loss": 0.3441, "step": 6473 }, { "epoch": 0.5244653272845107, "grad_norm": 0.03198695927858353, "learning_rate": 0.00019308699761465414, "loss": 0.3006, "step": 6474 }, { "epoch": 0.524546338302009, "grad_norm": 0.03530554845929146, "learning_rate": 0.00019308249696205952, "loss": 0.4121, "step": 6475 }, { "epoch": 0.5246273493195075, "grad_norm": 0.032105859369039536, "learning_rate": 0.00019307799630946488, "loss": 0.3807, "step": 6476 }, { "epoch": 0.5247083603370059, "grad_norm": 0.028861409053206444, "learning_rate": 0.00019307349565687024, "loss": 0.3244, "step": 6477 }, { "epoch": 0.5247893713545042, "grad_norm": 0.032058510929346085, "learning_rate": 0.00019306899500427563, "loss": 0.3674, "step": 6478 }, { "epoch": 0.5248703823720026, "grad_norm": 0.03300195559859276, "learning_rate": 0.00019306449435168102, "loss": 0.384, "step": 6479 }, { "epoch": 0.5249513933895009, "grad_norm": 0.03070252574980259, "learning_rate": 0.00019305999369908638, "loss": 0.3162, "step": 6480 }, { "epoch": 0.5250324044069994, "grad_norm": 0.029741229489445686, "learning_rate": 0.00019305549304649176, "loss": 0.3501, "step": 6481 }, { "epoch": 0.5251134154244977, "grad_norm": 0.0281755980104208, "learning_rate": 0.00019305099239389712, "loss": 0.3101, "step": 6482 }, { "epoch": 0.5251944264419961, "grad_norm": 0.032033056020736694, "learning_rate": 0.00019304649174130248, "loss": 0.3721, "step": 6483 }, { "epoch": 0.5252754374594945, "grad_norm": 0.030394721776247025, "learning_rate": 0.00019304199108870787, "loss": 0.3891, "step": 6484 }, { "epoch": 0.5253564484769929, "grad_norm": 0.0320914126932621, "learning_rate": 0.00019303749043611326, "loss": 0.374, "step": 6485 }, { "epoch": 0.5254374594944913, "grad_norm": 0.02836509793996811, "learning_rate": 0.00019303298978351862, "loss": 0.3517, "step": 6486 }, { "epoch": 0.5255184705119896, "grad_norm": 0.03872750699520111, "learning_rate": 0.000193028489130924, "loss": 0.4366, "step": 6487 }, { "epoch": 0.525599481529488, "grad_norm": 0.028343653306365013, "learning_rate": 0.00019302398847832937, "loss": 0.3436, "step": 6488 }, { "epoch": 0.5256804925469863, "grad_norm": 0.030183296650648117, "learning_rate": 0.00019301948782573473, "loss": 0.338, "step": 6489 }, { "epoch": 0.5257615035644848, "grad_norm": 0.03298697993159294, "learning_rate": 0.0001930149871731401, "loss": 0.3999, "step": 6490 }, { "epoch": 0.5258425145819832, "grad_norm": 0.02960672415792942, "learning_rate": 0.0001930104865205455, "loss": 0.3683, "step": 6491 }, { "epoch": 0.5259235255994815, "grad_norm": 0.02848297730088234, "learning_rate": 0.00019300598586795086, "loss": 0.3567, "step": 6492 }, { "epoch": 0.5260045366169799, "grad_norm": 0.026230011135339737, "learning_rate": 0.00019300148521535625, "loss": 0.3399, "step": 6493 }, { "epoch": 0.5260855476344782, "grad_norm": 0.032105475664138794, "learning_rate": 0.0001929969845627616, "loss": 0.3805, "step": 6494 }, { "epoch": 0.5261665586519767, "grad_norm": 0.036830004304647446, "learning_rate": 0.00019299248391016697, "loss": 0.3794, "step": 6495 }, { "epoch": 0.5262475696694751, "grad_norm": 0.0314621739089489, "learning_rate": 0.00019298798325757235, "loss": 0.364, "step": 6496 }, { "epoch": 0.5263285806869734, "grad_norm": 0.035781923681497574, "learning_rate": 0.00019298348260497774, "loss": 0.3669, "step": 6497 }, { "epoch": 0.5264095917044718, "grad_norm": 0.03248453512787819, "learning_rate": 0.0001929789819523831, "loss": 0.3862, "step": 6498 }, { "epoch": 0.5264906027219702, "grad_norm": 0.030936148017644882, "learning_rate": 0.0001929744812997885, "loss": 0.317, "step": 6499 }, { "epoch": 0.5265716137394686, "grad_norm": 0.03752376511693001, "learning_rate": 0.00019296998064719385, "loss": 0.3976, "step": 6500 }, { "epoch": 0.526652624756967, "grad_norm": 0.028330406174063683, "learning_rate": 0.0001929654799945992, "loss": 0.3245, "step": 6501 }, { "epoch": 0.5267336357744653, "grad_norm": 0.031165296211838722, "learning_rate": 0.00019296097934200462, "loss": 0.3627, "step": 6502 }, { "epoch": 0.5268146467919637, "grad_norm": 0.0313921794295311, "learning_rate": 0.00019295647868940998, "loss": 0.3257, "step": 6503 }, { "epoch": 0.5268956578094621, "grad_norm": 0.027600150555372238, "learning_rate": 0.00019295197803681534, "loss": 0.311, "step": 6504 }, { "epoch": 0.5269766688269605, "grad_norm": 0.031687039881944656, "learning_rate": 0.00019294747738422073, "loss": 0.3638, "step": 6505 }, { "epoch": 0.5270576798444588, "grad_norm": 0.031069139018654823, "learning_rate": 0.0001929429767316261, "loss": 0.3466, "step": 6506 }, { "epoch": 0.5271386908619572, "grad_norm": 0.033416252583265305, "learning_rate": 0.00019293847607903145, "loss": 0.393, "step": 6507 }, { "epoch": 0.5272197018794557, "grad_norm": 0.03066958859562874, "learning_rate": 0.00019293397542643686, "loss": 0.352, "step": 6508 }, { "epoch": 0.527300712896954, "grad_norm": 0.0345117412507534, "learning_rate": 0.00019292947477384222, "loss": 0.3584, "step": 6509 }, { "epoch": 0.5273817239144524, "grad_norm": 0.031254980713129044, "learning_rate": 0.00019292497412124759, "loss": 0.3252, "step": 6510 }, { "epoch": 0.5274627349319507, "grad_norm": 0.04023100808262825, "learning_rate": 0.00019292047346865297, "loss": 0.3742, "step": 6511 }, { "epoch": 0.5275437459494491, "grad_norm": 0.03604496642947197, "learning_rate": 0.00019291597281605833, "loss": 0.4139, "step": 6512 }, { "epoch": 0.5276247569669476, "grad_norm": 0.02851884625852108, "learning_rate": 0.0001929114721634637, "loss": 0.3424, "step": 6513 }, { "epoch": 0.5277057679844459, "grad_norm": 0.029921604320406914, "learning_rate": 0.0001929069715108691, "loss": 0.3254, "step": 6514 }, { "epoch": 0.5277867790019443, "grad_norm": 0.03419069945812225, "learning_rate": 0.00019290247085827447, "loss": 0.416, "step": 6515 }, { "epoch": 0.5278677900194426, "grad_norm": 0.034494705498218536, "learning_rate": 0.00019289797020567983, "loss": 0.355, "step": 6516 }, { "epoch": 0.527948801036941, "grad_norm": 0.02677858993411064, "learning_rate": 0.00019289346955308521, "loss": 0.2843, "step": 6517 }, { "epoch": 0.5280298120544394, "grad_norm": 0.03140726312994957, "learning_rate": 0.00019288896890049057, "loss": 0.3116, "step": 6518 }, { "epoch": 0.5281108230719378, "grad_norm": 0.030908901244401932, "learning_rate": 0.00019288446824789593, "loss": 0.3422, "step": 6519 }, { "epoch": 0.5281918340894362, "grad_norm": 0.030447175726294518, "learning_rate": 0.00019287996759530135, "loss": 0.3347, "step": 6520 }, { "epoch": 0.5282728451069345, "grad_norm": 0.030320830643177032, "learning_rate": 0.0001928754669427067, "loss": 0.3295, "step": 6521 }, { "epoch": 0.528353856124433, "grad_norm": 0.03139973431825638, "learning_rate": 0.00019287096629011207, "loss": 0.363, "step": 6522 }, { "epoch": 0.5284348671419313, "grad_norm": 0.03257003426551819, "learning_rate": 0.00019286646563751746, "loss": 0.3209, "step": 6523 }, { "epoch": 0.5285158781594297, "grad_norm": 0.029077529907226562, "learning_rate": 0.00019286196498492282, "loss": 0.2955, "step": 6524 }, { "epoch": 0.528596889176928, "grad_norm": 0.035417500883340836, "learning_rate": 0.00019285746433232818, "loss": 0.3413, "step": 6525 }, { "epoch": 0.5286779001944264, "grad_norm": 0.031534593552351, "learning_rate": 0.0001928529636797336, "loss": 0.3356, "step": 6526 }, { "epoch": 0.5287589112119249, "grad_norm": 0.0340360589325428, "learning_rate": 0.00019284846302713895, "loss": 0.3489, "step": 6527 }, { "epoch": 0.5288399222294232, "grad_norm": 0.033144641667604446, "learning_rate": 0.0001928439623745443, "loss": 0.373, "step": 6528 }, { "epoch": 0.5289209332469216, "grad_norm": 0.0342666432261467, "learning_rate": 0.0001928394617219497, "loss": 0.38, "step": 6529 }, { "epoch": 0.5290019442644199, "grad_norm": 0.03981400281190872, "learning_rate": 0.00019283496106935506, "loss": 0.4201, "step": 6530 }, { "epoch": 0.5290829552819183, "grad_norm": 0.03236046060919762, "learning_rate": 0.00019283046041676044, "loss": 0.3718, "step": 6531 }, { "epoch": 0.5291639662994168, "grad_norm": 0.029104501008987427, "learning_rate": 0.00019282595976416583, "loss": 0.3399, "step": 6532 }, { "epoch": 0.5292449773169151, "grad_norm": 0.02896728925406933, "learning_rate": 0.0001928214591115712, "loss": 0.3597, "step": 6533 }, { "epoch": 0.5293259883344135, "grad_norm": 0.028998127207159996, "learning_rate": 0.00019281695845897655, "loss": 0.2995, "step": 6534 }, { "epoch": 0.5294069993519118, "grad_norm": 0.030537080019712448, "learning_rate": 0.00019281245780638194, "loss": 0.367, "step": 6535 }, { "epoch": 0.5294880103694103, "grad_norm": 0.030855638906359673, "learning_rate": 0.0001928079571537873, "loss": 0.3648, "step": 6536 }, { "epoch": 0.5295690213869086, "grad_norm": 0.03002474457025528, "learning_rate": 0.00019280345650119269, "loss": 0.3456, "step": 6537 }, { "epoch": 0.529650032404407, "grad_norm": 0.03153291717171669, "learning_rate": 0.00019279895584859807, "loss": 0.362, "step": 6538 }, { "epoch": 0.5297310434219054, "grad_norm": 0.035764485597610474, "learning_rate": 0.00019279445519600343, "loss": 0.3527, "step": 6539 }, { "epoch": 0.5298120544394037, "grad_norm": 0.028985558077692986, "learning_rate": 0.0001927899545434088, "loss": 0.3014, "step": 6540 }, { "epoch": 0.5298930654569022, "grad_norm": 0.03560829162597656, "learning_rate": 0.00019278545389081418, "loss": 0.3515, "step": 6541 }, { "epoch": 0.5299740764744005, "grad_norm": 0.034974757581949234, "learning_rate": 0.00019278095323821954, "loss": 0.3566, "step": 6542 }, { "epoch": 0.5300550874918989, "grad_norm": 0.0336134247481823, "learning_rate": 0.00019277645258562493, "loss": 0.3807, "step": 6543 }, { "epoch": 0.5301360985093972, "grad_norm": 0.031493667513132095, "learning_rate": 0.00019277195193303031, "loss": 0.3494, "step": 6544 }, { "epoch": 0.5302171095268956, "grad_norm": 0.033709052950143814, "learning_rate": 0.00019276745128043567, "loss": 0.3652, "step": 6545 }, { "epoch": 0.5302981205443941, "grad_norm": 0.033176809549331665, "learning_rate": 0.00019276295062784103, "loss": 0.4018, "step": 6546 }, { "epoch": 0.5303791315618924, "grad_norm": 0.03512312099337578, "learning_rate": 0.00019275844997524642, "loss": 0.396, "step": 6547 }, { "epoch": 0.5304601425793908, "grad_norm": 0.03072085976600647, "learning_rate": 0.00019275394932265178, "loss": 0.3562, "step": 6548 }, { "epoch": 0.5305411535968891, "grad_norm": 0.02936788834631443, "learning_rate": 0.00019274944867005717, "loss": 0.3802, "step": 6549 }, { "epoch": 0.5306221646143876, "grad_norm": 0.028300996869802475, "learning_rate": 0.00019274494801746256, "loss": 0.3461, "step": 6550 }, { "epoch": 0.530703175631886, "grad_norm": 0.032628510147333145, "learning_rate": 0.00019274044736486792, "loss": 0.3159, "step": 6551 }, { "epoch": 0.5307841866493843, "grad_norm": 0.028229277580976486, "learning_rate": 0.00019273594671227328, "loss": 0.3754, "step": 6552 }, { "epoch": 0.5308651976668827, "grad_norm": 0.029050737619400024, "learning_rate": 0.00019273144605967866, "loss": 0.3378, "step": 6553 }, { "epoch": 0.530946208684381, "grad_norm": 0.034775856882333755, "learning_rate": 0.00019272694540708405, "loss": 0.3415, "step": 6554 }, { "epoch": 0.5310272197018795, "grad_norm": 0.03148787468671799, "learning_rate": 0.0001927224447544894, "loss": 0.3355, "step": 6555 }, { "epoch": 0.5311082307193778, "grad_norm": 0.032840415835380554, "learning_rate": 0.0001927179441018948, "loss": 0.3908, "step": 6556 }, { "epoch": 0.5311892417368762, "grad_norm": 0.03288496658205986, "learning_rate": 0.00019271344344930016, "loss": 0.3472, "step": 6557 }, { "epoch": 0.5312702527543746, "grad_norm": 0.032925862818956375, "learning_rate": 0.00019270894279670552, "loss": 0.3843, "step": 6558 }, { "epoch": 0.531351263771873, "grad_norm": 0.02724250592291355, "learning_rate": 0.0001927044421441109, "loss": 0.3, "step": 6559 }, { "epoch": 0.5314322747893714, "grad_norm": 0.03701140731573105, "learning_rate": 0.0001926999414915163, "loss": 0.4249, "step": 6560 }, { "epoch": 0.5315132858068697, "grad_norm": 0.02940862812101841, "learning_rate": 0.00019269544083892165, "loss": 0.3136, "step": 6561 }, { "epoch": 0.5315942968243681, "grad_norm": 0.03185543790459633, "learning_rate": 0.00019269094018632704, "loss": 0.3823, "step": 6562 }, { "epoch": 0.5316753078418665, "grad_norm": 0.035884223878383636, "learning_rate": 0.0001926864395337324, "loss": 0.3259, "step": 6563 }, { "epoch": 0.5317563188593649, "grad_norm": 0.0321180559694767, "learning_rate": 0.00019268193888113776, "loss": 0.4205, "step": 6564 }, { "epoch": 0.5318373298768633, "grad_norm": 0.029376648366451263, "learning_rate": 0.00019267743822854315, "loss": 0.3507, "step": 6565 }, { "epoch": 0.5319183408943616, "grad_norm": 0.030484110116958618, "learning_rate": 0.00019267293757594853, "loss": 0.3006, "step": 6566 }, { "epoch": 0.53199935191186, "grad_norm": 0.031638842076063156, "learning_rate": 0.0001926684369233539, "loss": 0.3321, "step": 6567 }, { "epoch": 0.5320803629293583, "grad_norm": 0.02748614177107811, "learning_rate": 0.00019266393627075928, "loss": 0.323, "step": 6568 }, { "epoch": 0.5321613739468568, "grad_norm": 0.03108358010649681, "learning_rate": 0.00019265943561816464, "loss": 0.3184, "step": 6569 }, { "epoch": 0.5322423849643552, "grad_norm": 0.033117424696683884, "learning_rate": 0.00019265493496557, "loss": 0.3431, "step": 6570 }, { "epoch": 0.5323233959818535, "grad_norm": 0.03042810969054699, "learning_rate": 0.0001926504343129754, "loss": 0.3439, "step": 6571 }, { "epoch": 0.5324044069993519, "grad_norm": 0.029681673273444176, "learning_rate": 0.00019264593366038078, "loss": 0.3546, "step": 6572 }, { "epoch": 0.5324854180168503, "grad_norm": 0.03175319731235504, "learning_rate": 0.00019264143300778614, "loss": 0.3422, "step": 6573 }, { "epoch": 0.5325664290343487, "grad_norm": 0.02874058112502098, "learning_rate": 0.00019263693235519152, "loss": 0.3849, "step": 6574 }, { "epoch": 0.532647440051847, "grad_norm": 0.030932355672121048, "learning_rate": 0.00019263243170259688, "loss": 0.3795, "step": 6575 }, { "epoch": 0.5327284510693454, "grad_norm": 0.029642682522535324, "learning_rate": 0.00019262793105000224, "loss": 0.3304, "step": 6576 }, { "epoch": 0.5328094620868438, "grad_norm": 0.029917187988758087, "learning_rate": 0.00019262343039740763, "loss": 0.3417, "step": 6577 }, { "epoch": 0.5328904731043422, "grad_norm": 0.03342309966683388, "learning_rate": 0.00019261892974481302, "loss": 0.3096, "step": 6578 }, { "epoch": 0.5329714841218406, "grad_norm": 0.03146572411060333, "learning_rate": 0.00019261442909221838, "loss": 0.327, "step": 6579 }, { "epoch": 0.5330524951393389, "grad_norm": 0.030505899339914322, "learning_rate": 0.00019260992843962376, "loss": 0.3738, "step": 6580 }, { "epoch": 0.5331335061568373, "grad_norm": 0.032242707908153534, "learning_rate": 0.00019260542778702912, "loss": 0.3503, "step": 6581 }, { "epoch": 0.5332145171743357, "grad_norm": 0.029962383210659027, "learning_rate": 0.00019260092713443448, "loss": 0.3407, "step": 6582 }, { "epoch": 0.5332955281918341, "grad_norm": 0.03191268444061279, "learning_rate": 0.0001925964264818399, "loss": 0.3599, "step": 6583 }, { "epoch": 0.5333765392093325, "grad_norm": 0.032974135130643845, "learning_rate": 0.00019259192582924526, "loss": 0.3547, "step": 6584 }, { "epoch": 0.5334575502268308, "grad_norm": 0.03472721949219704, "learning_rate": 0.00019258742517665062, "loss": 0.3788, "step": 6585 }, { "epoch": 0.5335385612443292, "grad_norm": 0.03367713838815689, "learning_rate": 0.000192582924524056, "loss": 0.3431, "step": 6586 }, { "epoch": 0.5336195722618277, "grad_norm": 0.0349598303437233, "learning_rate": 0.00019257842387146137, "loss": 0.3453, "step": 6587 }, { "epoch": 0.533700583279326, "grad_norm": 0.03054990991950035, "learning_rate": 0.00019257392321886673, "loss": 0.3561, "step": 6588 }, { "epoch": 0.5337815942968244, "grad_norm": 0.0322737917304039, "learning_rate": 0.00019256942256627214, "loss": 0.3745, "step": 6589 }, { "epoch": 0.5338626053143227, "grad_norm": 0.03227555379271507, "learning_rate": 0.0001925649219136775, "loss": 0.3677, "step": 6590 }, { "epoch": 0.5339436163318211, "grad_norm": 0.031967077404260635, "learning_rate": 0.00019256042126108286, "loss": 0.3392, "step": 6591 }, { "epoch": 0.5340246273493195, "grad_norm": 0.03309684991836548, "learning_rate": 0.00019255592060848825, "loss": 0.3521, "step": 6592 }, { "epoch": 0.5341056383668179, "grad_norm": 0.033072203397750854, "learning_rate": 0.0001925514199558936, "loss": 0.3997, "step": 6593 }, { "epoch": 0.5341866493843163, "grad_norm": 0.031067723408341408, "learning_rate": 0.00019254691930329897, "loss": 0.3181, "step": 6594 }, { "epoch": 0.5342676604018146, "grad_norm": 0.030648205429315567, "learning_rate": 0.00019254241865070438, "loss": 0.3506, "step": 6595 }, { "epoch": 0.5343486714193131, "grad_norm": 0.030988864600658417, "learning_rate": 0.00019253791799810974, "loss": 0.3297, "step": 6596 }, { "epoch": 0.5344296824368114, "grad_norm": 0.03229081630706787, "learning_rate": 0.0001925334173455151, "loss": 0.3435, "step": 6597 }, { "epoch": 0.5345106934543098, "grad_norm": 0.036464571952819824, "learning_rate": 0.0001925289166929205, "loss": 0.3873, "step": 6598 }, { "epoch": 0.5345917044718081, "grad_norm": 0.03050421178340912, "learning_rate": 0.00019252441604032585, "loss": 0.3157, "step": 6599 }, { "epoch": 0.5346727154893065, "grad_norm": 0.032148249447345734, "learning_rate": 0.0001925199153877312, "loss": 0.3353, "step": 6600 }, { "epoch": 0.534753726506805, "grad_norm": 0.038223471492528915, "learning_rate": 0.00019251541473513662, "loss": 0.3748, "step": 6601 }, { "epoch": 0.5348347375243033, "grad_norm": 0.035033948719501495, "learning_rate": 0.00019251091408254198, "loss": 0.3619, "step": 6602 }, { "epoch": 0.5349157485418017, "grad_norm": 0.028229834511876106, "learning_rate": 0.00019250641342994734, "loss": 0.341, "step": 6603 }, { "epoch": 0.5349967595593, "grad_norm": 0.030927540734410286, "learning_rate": 0.00019250191277735273, "loss": 0.3958, "step": 6604 }, { "epoch": 0.5350777705767984, "grad_norm": 0.03158433735370636, "learning_rate": 0.0001924974121247581, "loss": 0.3543, "step": 6605 }, { "epoch": 0.5351587815942969, "grad_norm": 0.03204088285565376, "learning_rate": 0.00019249291147216345, "loss": 0.3538, "step": 6606 }, { "epoch": 0.5352397926117952, "grad_norm": 0.028660694137215614, "learning_rate": 0.00019248841081956887, "loss": 0.3721, "step": 6607 }, { "epoch": 0.5353208036292936, "grad_norm": 0.030562499538064003, "learning_rate": 0.00019248391016697423, "loss": 0.337, "step": 6608 }, { "epoch": 0.5354018146467919, "grad_norm": 0.03120999038219452, "learning_rate": 0.00019247940951437959, "loss": 0.3575, "step": 6609 }, { "epoch": 0.5354828256642904, "grad_norm": 0.030365686863660812, "learning_rate": 0.00019247490886178497, "loss": 0.3578, "step": 6610 }, { "epoch": 0.5355638366817888, "grad_norm": 0.03199993818998337, "learning_rate": 0.00019247040820919033, "loss": 0.3185, "step": 6611 }, { "epoch": 0.5356448476992871, "grad_norm": 0.030325790867209435, "learning_rate": 0.00019246590755659572, "loss": 0.3804, "step": 6612 }, { "epoch": 0.5357258587167855, "grad_norm": 0.030955428257584572, "learning_rate": 0.0001924614069040011, "loss": 0.361, "step": 6613 }, { "epoch": 0.5358068697342838, "grad_norm": 0.030837949365377426, "learning_rate": 0.00019245690625140647, "loss": 0.3887, "step": 6614 }, { "epoch": 0.5358878807517823, "grad_norm": 0.029817679896950722, "learning_rate": 0.00019245240559881183, "loss": 0.363, "step": 6615 }, { "epoch": 0.5359688917692806, "grad_norm": 0.029662759974598885, "learning_rate": 0.00019244790494621721, "loss": 0.3621, "step": 6616 }, { "epoch": 0.536049902786779, "grad_norm": 0.028333280235528946, "learning_rate": 0.00019244340429362257, "loss": 0.3288, "step": 6617 }, { "epoch": 0.5361309138042774, "grad_norm": 0.03501308336853981, "learning_rate": 0.00019243890364102796, "loss": 0.3274, "step": 6618 }, { "epoch": 0.5362119248217757, "grad_norm": 0.03427104651927948, "learning_rate": 0.00019243440298843335, "loss": 0.3607, "step": 6619 }, { "epoch": 0.5362929358392742, "grad_norm": 0.028717700392007828, "learning_rate": 0.0001924299023358387, "loss": 0.333, "step": 6620 }, { "epoch": 0.5363739468567725, "grad_norm": 0.03295782208442688, "learning_rate": 0.00019242540168324407, "loss": 0.3858, "step": 6621 }, { "epoch": 0.5364549578742709, "grad_norm": 0.035240571945905685, "learning_rate": 0.00019242090103064946, "loss": 0.3862, "step": 6622 }, { "epoch": 0.5365359688917692, "grad_norm": 0.032001230865716934, "learning_rate": 0.00019241640037805482, "loss": 0.3435, "step": 6623 }, { "epoch": 0.5366169799092677, "grad_norm": 0.031990185379981995, "learning_rate": 0.0001924118997254602, "loss": 0.3606, "step": 6624 }, { "epoch": 0.5366979909267661, "grad_norm": 0.03834180533885956, "learning_rate": 0.0001924073990728656, "loss": 0.355, "step": 6625 }, { "epoch": 0.5367790019442644, "grad_norm": 0.030459698289632797, "learning_rate": 0.00019240289842027095, "loss": 0.3294, "step": 6626 }, { "epoch": 0.5368600129617628, "grad_norm": 0.034062933176755905, "learning_rate": 0.0001923983977676763, "loss": 0.4105, "step": 6627 }, { "epoch": 0.5369410239792611, "grad_norm": 0.02938479371368885, "learning_rate": 0.0001923938971150817, "loss": 0.3429, "step": 6628 }, { "epoch": 0.5370220349967596, "grad_norm": 0.03130931779742241, "learning_rate": 0.00019238939646248706, "loss": 0.3225, "step": 6629 }, { "epoch": 0.537103046014258, "grad_norm": 0.03414647653698921, "learning_rate": 0.00019238489580989244, "loss": 0.3901, "step": 6630 }, { "epoch": 0.5371840570317563, "grad_norm": 0.029339836910367012, "learning_rate": 0.00019238039515729783, "loss": 0.3331, "step": 6631 }, { "epoch": 0.5372650680492547, "grad_norm": 0.028171516954898834, "learning_rate": 0.0001923758945047032, "loss": 0.308, "step": 6632 }, { "epoch": 0.537346079066753, "grad_norm": 0.03288857266306877, "learning_rate": 0.00019237139385210855, "loss": 0.3761, "step": 6633 }, { "epoch": 0.5374270900842515, "grad_norm": 0.028602857142686844, "learning_rate": 0.00019236689319951394, "loss": 0.3238, "step": 6634 }, { "epoch": 0.5375081011017498, "grad_norm": 0.02909966930747032, "learning_rate": 0.00019236239254691933, "loss": 0.3295, "step": 6635 }, { "epoch": 0.5375891121192482, "grad_norm": 0.03496485948562622, "learning_rate": 0.00019235789189432469, "loss": 0.3576, "step": 6636 }, { "epoch": 0.5376701231367466, "grad_norm": 0.03171485289931297, "learning_rate": 0.00019235339124173007, "loss": 0.346, "step": 6637 }, { "epoch": 0.537751134154245, "grad_norm": 0.028812715783715248, "learning_rate": 0.00019234889058913543, "loss": 0.2998, "step": 6638 }, { "epoch": 0.5378321451717434, "grad_norm": 0.030826063826680183, "learning_rate": 0.0001923443899365408, "loss": 0.3188, "step": 6639 }, { "epoch": 0.5379131561892417, "grad_norm": 0.03311021998524666, "learning_rate": 0.00019233988928394618, "loss": 0.3608, "step": 6640 }, { "epoch": 0.5379941672067401, "grad_norm": 0.030214812606573105, "learning_rate": 0.00019233538863135157, "loss": 0.373, "step": 6641 }, { "epoch": 0.5380751782242384, "grad_norm": 0.030127253383398056, "learning_rate": 0.00019233088797875693, "loss": 0.3482, "step": 6642 }, { "epoch": 0.5381561892417369, "grad_norm": 0.030809765681624413, "learning_rate": 0.00019232638732616231, "loss": 0.336, "step": 6643 }, { "epoch": 0.5382372002592353, "grad_norm": 0.02976158633828163, "learning_rate": 0.00019232188667356767, "loss": 0.3253, "step": 6644 }, { "epoch": 0.5383182112767336, "grad_norm": 0.03155311197042465, "learning_rate": 0.00019231738602097304, "loss": 0.3899, "step": 6645 }, { "epoch": 0.538399222294232, "grad_norm": 0.02847321890294552, "learning_rate": 0.00019231288536837842, "loss": 0.3457, "step": 6646 }, { "epoch": 0.5384802333117304, "grad_norm": 0.025865985080599785, "learning_rate": 0.0001923083847157838, "loss": 0.3404, "step": 6647 }, { "epoch": 0.5385612443292288, "grad_norm": 0.03417167067527771, "learning_rate": 0.00019230388406318917, "loss": 0.3644, "step": 6648 }, { "epoch": 0.5386422553467272, "grad_norm": 0.03334147855639458, "learning_rate": 0.00019229938341059456, "loss": 0.3967, "step": 6649 }, { "epoch": 0.5387232663642255, "grad_norm": 0.03236551582813263, "learning_rate": 0.00019229488275799992, "loss": 0.3303, "step": 6650 }, { "epoch": 0.5388042773817239, "grad_norm": 0.030227910727262497, "learning_rate": 0.00019229038210540528, "loss": 0.3705, "step": 6651 }, { "epoch": 0.5388852883992223, "grad_norm": 0.03478400781750679, "learning_rate": 0.00019228588145281066, "loss": 0.3776, "step": 6652 }, { "epoch": 0.5389662994167207, "grad_norm": 0.03594156354665756, "learning_rate": 0.00019228138080021605, "loss": 0.3548, "step": 6653 }, { "epoch": 0.539047310434219, "grad_norm": 0.02861880138516426, "learning_rate": 0.0001922768801476214, "loss": 0.3769, "step": 6654 }, { "epoch": 0.5391283214517174, "grad_norm": 0.047353651374578476, "learning_rate": 0.0001922723794950268, "loss": 0.3352, "step": 6655 }, { "epoch": 0.5392093324692158, "grad_norm": 0.030897874385118484, "learning_rate": 0.00019226787884243216, "loss": 0.3666, "step": 6656 }, { "epoch": 0.5392903434867142, "grad_norm": 0.031044932082295418, "learning_rate": 0.00019226337818983752, "loss": 0.3569, "step": 6657 }, { "epoch": 0.5393713545042126, "grad_norm": 0.02874862030148506, "learning_rate": 0.0001922588775372429, "loss": 0.302, "step": 6658 }, { "epoch": 0.5394523655217109, "grad_norm": 0.032438814640045166, "learning_rate": 0.0001922543768846483, "loss": 0.3438, "step": 6659 }, { "epoch": 0.5395333765392093, "grad_norm": 0.03133894130587578, "learning_rate": 0.00019224987623205365, "loss": 0.3543, "step": 6660 }, { "epoch": 0.5396143875567078, "grad_norm": 0.029364844784140587, "learning_rate": 0.00019224537557945904, "loss": 0.351, "step": 6661 }, { "epoch": 0.5396953985742061, "grad_norm": 0.024655230343341827, "learning_rate": 0.0001922408749268644, "loss": 0.2798, "step": 6662 }, { "epoch": 0.5397764095917045, "grad_norm": 0.03577417880296707, "learning_rate": 0.00019223637427426976, "loss": 0.3507, "step": 6663 }, { "epoch": 0.5398574206092028, "grad_norm": 0.028531793504953384, "learning_rate": 0.00019223187362167517, "loss": 0.3693, "step": 6664 }, { "epoch": 0.5399384316267012, "grad_norm": 0.027541454881429672, "learning_rate": 0.00019222737296908053, "loss": 0.3099, "step": 6665 }, { "epoch": 0.5400194426441997, "grad_norm": 0.02836167998611927, "learning_rate": 0.0001922228723164859, "loss": 0.3692, "step": 6666 }, { "epoch": 0.540100453661698, "grad_norm": 0.030694272369146347, "learning_rate": 0.00019221837166389128, "loss": 0.3601, "step": 6667 }, { "epoch": 0.5401814646791964, "grad_norm": 0.028306683525443077, "learning_rate": 0.00019221387101129664, "loss": 0.3149, "step": 6668 }, { "epoch": 0.5402624756966947, "grad_norm": 0.0326666533946991, "learning_rate": 0.000192209370358702, "loss": 0.3668, "step": 6669 }, { "epoch": 0.5403434867141931, "grad_norm": 0.02991878241300583, "learning_rate": 0.00019220486970610742, "loss": 0.306, "step": 6670 }, { "epoch": 0.5404244977316915, "grad_norm": 0.03173675015568733, "learning_rate": 0.00019220036905351278, "loss": 0.3454, "step": 6671 }, { "epoch": 0.5405055087491899, "grad_norm": 0.030415697023272514, "learning_rate": 0.00019219586840091814, "loss": 0.3276, "step": 6672 }, { "epoch": 0.5405865197666883, "grad_norm": 0.0356595441699028, "learning_rate": 0.00019219136774832352, "loss": 0.3416, "step": 6673 }, { "epoch": 0.5406675307841866, "grad_norm": 0.03170002996921539, "learning_rate": 0.00019218686709572888, "loss": 0.308, "step": 6674 }, { "epoch": 0.5407485418016851, "grad_norm": 0.03190978989005089, "learning_rate": 0.00019218236644313424, "loss": 0.3746, "step": 6675 }, { "epoch": 0.5408295528191834, "grad_norm": 0.029476001858711243, "learning_rate": 0.00019217786579053966, "loss": 0.3553, "step": 6676 }, { "epoch": 0.5409105638366818, "grad_norm": 0.02875671535730362, "learning_rate": 0.00019217336513794502, "loss": 0.333, "step": 6677 }, { "epoch": 0.5409915748541801, "grad_norm": 0.03290131688117981, "learning_rate": 0.00019216886448535038, "loss": 0.3558, "step": 6678 }, { "epoch": 0.5410725858716785, "grad_norm": 0.0369555726647377, "learning_rate": 0.00019216436383275576, "loss": 0.4028, "step": 6679 }, { "epoch": 0.541153596889177, "grad_norm": 0.03486994653940201, "learning_rate": 0.00019215986318016112, "loss": 0.346, "step": 6680 }, { "epoch": 0.5412346079066753, "grad_norm": 0.030410174280405045, "learning_rate": 0.00019215536252756648, "loss": 0.3199, "step": 6681 }, { "epoch": 0.5413156189241737, "grad_norm": 0.031043974682688713, "learning_rate": 0.0001921508618749719, "loss": 0.3585, "step": 6682 }, { "epoch": 0.541396629941672, "grad_norm": 0.03518590331077576, "learning_rate": 0.00019214636122237726, "loss": 0.3196, "step": 6683 }, { "epoch": 0.5414776409591704, "grad_norm": 0.030820732936263084, "learning_rate": 0.00019214186056978262, "loss": 0.338, "step": 6684 }, { "epoch": 0.5415586519766689, "grad_norm": 0.02911275252699852, "learning_rate": 0.000192137359917188, "loss": 0.3443, "step": 6685 }, { "epoch": 0.5416396629941672, "grad_norm": 0.02568489871919155, "learning_rate": 0.00019213285926459337, "loss": 0.2971, "step": 6686 }, { "epoch": 0.5417206740116656, "grad_norm": 0.031865790486335754, "learning_rate": 0.00019212835861199875, "loss": 0.3521, "step": 6687 }, { "epoch": 0.5418016850291639, "grad_norm": 0.02974405325949192, "learning_rate": 0.00019212385795940414, "loss": 0.3538, "step": 6688 }, { "epoch": 0.5418826960466624, "grad_norm": 0.034740883857011795, "learning_rate": 0.0001921193573068095, "loss": 0.3696, "step": 6689 }, { "epoch": 0.5419637070641607, "grad_norm": 0.03494054079055786, "learning_rate": 0.00019211485665421486, "loss": 0.3256, "step": 6690 }, { "epoch": 0.5420447180816591, "grad_norm": 0.031966958194971085, "learning_rate": 0.00019211035600162025, "loss": 0.3667, "step": 6691 }, { "epoch": 0.5421257290991575, "grad_norm": 0.029948875308036804, "learning_rate": 0.0001921058553490256, "loss": 0.3557, "step": 6692 }, { "epoch": 0.5422067401166558, "grad_norm": 0.02980083040893078, "learning_rate": 0.000192101354696431, "loss": 0.3299, "step": 6693 }, { "epoch": 0.5422877511341543, "grad_norm": 0.029759561643004417, "learning_rate": 0.00019209685404383638, "loss": 0.3587, "step": 6694 }, { "epoch": 0.5423687621516526, "grad_norm": 0.031510476022958755, "learning_rate": 0.00019209235339124174, "loss": 0.3741, "step": 6695 }, { "epoch": 0.542449773169151, "grad_norm": 0.03240029513835907, "learning_rate": 0.0001920878527386471, "loss": 0.3901, "step": 6696 }, { "epoch": 0.5425307841866494, "grad_norm": 0.03050781786441803, "learning_rate": 0.0001920833520860525, "loss": 0.3457, "step": 6697 }, { "epoch": 0.5426117952041478, "grad_norm": 0.030108438804745674, "learning_rate": 0.00019207885143345785, "loss": 0.3498, "step": 6698 }, { "epoch": 0.5426928062216462, "grad_norm": 0.0339641310274601, "learning_rate": 0.00019207435078086324, "loss": 0.3551, "step": 6699 }, { "epoch": 0.5427738172391445, "grad_norm": 0.030506672337651253, "learning_rate": 0.00019206985012826862, "loss": 0.343, "step": 6700 }, { "epoch": 0.5428548282566429, "grad_norm": 0.03556285798549652, "learning_rate": 0.00019206534947567398, "loss": 0.3685, "step": 6701 }, { "epoch": 0.5429358392741412, "grad_norm": 0.033519964665174484, "learning_rate": 0.00019206084882307934, "loss": 0.3473, "step": 6702 }, { "epoch": 0.5430168502916397, "grad_norm": 0.030523715540766716, "learning_rate": 0.00019205634817048473, "loss": 0.3449, "step": 6703 }, { "epoch": 0.5430978613091381, "grad_norm": 0.03125949949026108, "learning_rate": 0.0001920518475178901, "loss": 0.3239, "step": 6704 }, { "epoch": 0.5431788723266364, "grad_norm": 0.03124059922993183, "learning_rate": 0.00019204734686529548, "loss": 0.3408, "step": 6705 }, { "epoch": 0.5432598833441348, "grad_norm": 0.029732635244727135, "learning_rate": 0.00019204284621270087, "loss": 0.3434, "step": 6706 }, { "epoch": 0.5433408943616331, "grad_norm": 0.03286097198724747, "learning_rate": 0.00019203834556010623, "loss": 0.3496, "step": 6707 }, { "epoch": 0.5434219053791316, "grad_norm": 0.03427884355187416, "learning_rate": 0.00019203384490751159, "loss": 0.3767, "step": 6708 }, { "epoch": 0.54350291639663, "grad_norm": 0.02995665930211544, "learning_rate": 0.00019202934425491697, "loss": 0.3233, "step": 6709 }, { "epoch": 0.5435839274141283, "grad_norm": 0.031802188605070114, "learning_rate": 0.00019202484360232233, "loss": 0.2948, "step": 6710 }, { "epoch": 0.5436649384316267, "grad_norm": 0.03708088397979736, "learning_rate": 0.00019202034294972772, "loss": 0.3585, "step": 6711 }, { "epoch": 0.5437459494491251, "grad_norm": 0.03856736421585083, "learning_rate": 0.0001920158422971331, "loss": 0.3743, "step": 6712 }, { "epoch": 0.5438269604666235, "grad_norm": 0.027673332020640373, "learning_rate": 0.00019201134164453847, "loss": 0.3266, "step": 6713 }, { "epoch": 0.5439079714841218, "grad_norm": 0.02969091199338436, "learning_rate": 0.00019200684099194383, "loss": 0.3063, "step": 6714 }, { "epoch": 0.5439889825016202, "grad_norm": 0.03220966458320618, "learning_rate": 0.00019200234033934921, "loss": 0.375, "step": 6715 }, { "epoch": 0.5440699935191186, "grad_norm": 0.032075174152851105, "learning_rate": 0.0001919978396867546, "loss": 0.3759, "step": 6716 }, { "epoch": 0.544151004536617, "grad_norm": 0.029770556837320328, "learning_rate": 0.00019199333903415996, "loss": 0.3245, "step": 6717 }, { "epoch": 0.5442320155541154, "grad_norm": 0.030717231333255768, "learning_rate": 0.00019198883838156535, "loss": 0.3231, "step": 6718 }, { "epoch": 0.5443130265716137, "grad_norm": 0.037240952253341675, "learning_rate": 0.0001919843377289707, "loss": 0.3672, "step": 6719 }, { "epoch": 0.5443940375891121, "grad_norm": 0.029282161965966225, "learning_rate": 0.00019197983707637607, "loss": 0.3592, "step": 6720 }, { "epoch": 0.5444750486066104, "grad_norm": 0.03133406862616539, "learning_rate": 0.00019197533642378146, "loss": 0.3544, "step": 6721 }, { "epoch": 0.5445560596241089, "grad_norm": 0.03557344898581505, "learning_rate": 0.00019197083577118684, "loss": 0.3298, "step": 6722 }, { "epoch": 0.5446370706416073, "grad_norm": 0.03574608638882637, "learning_rate": 0.0001919663351185922, "loss": 0.3927, "step": 6723 }, { "epoch": 0.5447180816591056, "grad_norm": 0.03001011349260807, "learning_rate": 0.0001919618344659976, "loss": 0.3612, "step": 6724 }, { "epoch": 0.544799092676604, "grad_norm": 0.030254319310188293, "learning_rate": 0.00019195733381340295, "loss": 0.3542, "step": 6725 }, { "epoch": 0.5448801036941024, "grad_norm": 0.032271239906549454, "learning_rate": 0.0001919528331608083, "loss": 0.3552, "step": 6726 }, { "epoch": 0.5449611147116008, "grad_norm": 0.03296198323369026, "learning_rate": 0.0001919483325082137, "loss": 0.3746, "step": 6727 }, { "epoch": 0.5450421257290992, "grad_norm": 0.03257940709590912, "learning_rate": 0.00019194383185561908, "loss": 0.3433, "step": 6728 }, { "epoch": 0.5451231367465975, "grad_norm": 0.03006012551486492, "learning_rate": 0.00019193933120302444, "loss": 0.3734, "step": 6729 }, { "epoch": 0.5452041477640959, "grad_norm": 0.032573532313108444, "learning_rate": 0.00019193483055042983, "loss": 0.3438, "step": 6730 }, { "epoch": 0.5452851587815943, "grad_norm": 0.05190266668796539, "learning_rate": 0.0001919303298978352, "loss": 0.3493, "step": 6731 }, { "epoch": 0.5453661697990927, "grad_norm": 0.029056159779429436, "learning_rate": 0.00019192582924524055, "loss": 0.3305, "step": 6732 }, { "epoch": 0.545447180816591, "grad_norm": 0.02668183483183384, "learning_rate": 0.00019192132859264594, "loss": 0.3358, "step": 6733 }, { "epoch": 0.5455281918340894, "grad_norm": 0.035193026065826416, "learning_rate": 0.00019191682794005133, "loss": 0.3606, "step": 6734 }, { "epoch": 0.5456092028515879, "grad_norm": 0.031068945303559303, "learning_rate": 0.0001919123272874567, "loss": 0.3423, "step": 6735 }, { "epoch": 0.5456902138690862, "grad_norm": 0.03662171959877014, "learning_rate": 0.00019190782663486207, "loss": 0.4027, "step": 6736 }, { "epoch": 0.5457712248865846, "grad_norm": 0.02608422189950943, "learning_rate": 0.00019190332598226743, "loss": 0.3066, "step": 6737 }, { "epoch": 0.5458522359040829, "grad_norm": 0.02757170982658863, "learning_rate": 0.0001918988253296728, "loss": 0.3084, "step": 6738 }, { "epoch": 0.5459332469215813, "grad_norm": 0.03273205831646919, "learning_rate": 0.0001918943246770782, "loss": 0.3567, "step": 6739 }, { "epoch": 0.5460142579390798, "grad_norm": 0.035114750266075134, "learning_rate": 0.00019188982402448357, "loss": 0.3655, "step": 6740 }, { "epoch": 0.5460952689565781, "grad_norm": 0.027389245107769966, "learning_rate": 0.00019188532337188893, "loss": 0.3515, "step": 6741 }, { "epoch": 0.5461762799740765, "grad_norm": 0.03340989723801613, "learning_rate": 0.00019188082271929432, "loss": 0.3579, "step": 6742 }, { "epoch": 0.5462572909915748, "grad_norm": 0.029114002361893654, "learning_rate": 0.00019187632206669968, "loss": 0.3715, "step": 6743 }, { "epoch": 0.5463383020090732, "grad_norm": 0.028086459264159203, "learning_rate": 0.00019187182141410504, "loss": 0.3447, "step": 6744 }, { "epoch": 0.5464193130265717, "grad_norm": 0.03181225061416626, "learning_rate": 0.00019186732076151045, "loss": 0.3363, "step": 6745 }, { "epoch": 0.54650032404407, "grad_norm": 0.03511791676282883, "learning_rate": 0.0001918628201089158, "loss": 0.378, "step": 6746 }, { "epoch": 0.5465813350615684, "grad_norm": 0.031299296766519547, "learning_rate": 0.00019185831945632117, "loss": 0.3503, "step": 6747 }, { "epoch": 0.5466623460790667, "grad_norm": 0.025090057402849197, "learning_rate": 0.00019185381880372656, "loss": 0.2959, "step": 6748 }, { "epoch": 0.5467433570965652, "grad_norm": 0.030407389625906944, "learning_rate": 0.00019184931815113192, "loss": 0.3149, "step": 6749 }, { "epoch": 0.5468243681140635, "grad_norm": 0.03297053650021553, "learning_rate": 0.00019184481749853728, "loss": 0.3589, "step": 6750 }, { "epoch": 0.5469053791315619, "grad_norm": 0.02942829765379429, "learning_rate": 0.0001918403168459427, "loss": 0.3376, "step": 6751 }, { "epoch": 0.5469863901490603, "grad_norm": 0.027623575180768967, "learning_rate": 0.00019183581619334805, "loss": 0.3155, "step": 6752 }, { "epoch": 0.5470674011665586, "grad_norm": 0.029620088636875153, "learning_rate": 0.0001918313155407534, "loss": 0.3602, "step": 6753 }, { "epoch": 0.5471484121840571, "grad_norm": 0.035274140536785126, "learning_rate": 0.0001918268148881588, "loss": 0.3609, "step": 6754 }, { "epoch": 0.5472294232015554, "grad_norm": 0.039904288947582245, "learning_rate": 0.00019182231423556416, "loss": 0.4022, "step": 6755 }, { "epoch": 0.5473104342190538, "grad_norm": 0.028668256476521492, "learning_rate": 0.00019181781358296952, "loss": 0.3206, "step": 6756 }, { "epoch": 0.5473914452365521, "grad_norm": 0.030775291845202446, "learning_rate": 0.00019181331293037493, "loss": 0.3432, "step": 6757 }, { "epoch": 0.5474724562540505, "grad_norm": 0.03100411407649517, "learning_rate": 0.0001918088122777803, "loss": 0.3562, "step": 6758 }, { "epoch": 0.547553467271549, "grad_norm": 0.03083074279129505, "learning_rate": 0.00019180431162518565, "loss": 0.3479, "step": 6759 }, { "epoch": 0.5476344782890473, "grad_norm": 0.029746398329734802, "learning_rate": 0.00019179981097259104, "loss": 0.3299, "step": 6760 }, { "epoch": 0.5477154893065457, "grad_norm": 0.029062218964099884, "learning_rate": 0.0001917953103199964, "loss": 0.3753, "step": 6761 }, { "epoch": 0.547796500324044, "grad_norm": 0.03341421112418175, "learning_rate": 0.00019179080966740176, "loss": 0.3101, "step": 6762 }, { "epoch": 0.5478775113415425, "grad_norm": 0.03206118568778038, "learning_rate": 0.00019178630901480717, "loss": 0.3661, "step": 6763 }, { "epoch": 0.5479585223590409, "grad_norm": 0.029552146792411804, "learning_rate": 0.00019178180836221253, "loss": 0.3376, "step": 6764 }, { "epoch": 0.5480395333765392, "grad_norm": 0.028868060559034348, "learning_rate": 0.0001917773077096179, "loss": 0.3225, "step": 6765 }, { "epoch": 0.5481205443940376, "grad_norm": 0.02880750596523285, "learning_rate": 0.00019177280705702328, "loss": 0.3137, "step": 6766 }, { "epoch": 0.5482015554115359, "grad_norm": 0.03132180869579315, "learning_rate": 0.00019176830640442864, "loss": 0.3225, "step": 6767 }, { "epoch": 0.5482825664290344, "grad_norm": 0.030915534123778343, "learning_rate": 0.00019176380575183403, "loss": 0.337, "step": 6768 }, { "epoch": 0.5483635774465327, "grad_norm": 0.02946346439421177, "learning_rate": 0.00019175930509923942, "loss": 0.3765, "step": 6769 }, { "epoch": 0.5484445884640311, "grad_norm": 0.029647020623087883, "learning_rate": 0.00019175480444664478, "loss": 0.3635, "step": 6770 }, { "epoch": 0.5485255994815295, "grad_norm": 0.03362290561199188, "learning_rate": 0.00019175030379405014, "loss": 0.3469, "step": 6771 }, { "epoch": 0.5486066104990278, "grad_norm": 0.034770358353853226, "learning_rate": 0.00019174580314145552, "loss": 0.3524, "step": 6772 }, { "epoch": 0.5486876215165263, "grad_norm": 0.04310256242752075, "learning_rate": 0.00019174130248886088, "loss": 0.3756, "step": 6773 }, { "epoch": 0.5487686325340246, "grad_norm": 0.03369515761733055, "learning_rate": 0.00019173680183626627, "loss": 0.382, "step": 6774 }, { "epoch": 0.548849643551523, "grad_norm": 0.032771673053503036, "learning_rate": 0.00019173230118367166, "loss": 0.3968, "step": 6775 }, { "epoch": 0.5489306545690213, "grad_norm": 0.03280429169535637, "learning_rate": 0.00019172780053107702, "loss": 0.3491, "step": 6776 }, { "epoch": 0.5490116655865198, "grad_norm": 0.031371165066957474, "learning_rate": 0.00019172329987848238, "loss": 0.3224, "step": 6777 }, { "epoch": 0.5490926766040182, "grad_norm": 0.029993250966072083, "learning_rate": 0.00019171879922588776, "loss": 0.3669, "step": 6778 }, { "epoch": 0.5491736876215165, "grad_norm": 0.03484036773443222, "learning_rate": 0.00019171429857329312, "loss": 0.3361, "step": 6779 }, { "epoch": 0.5492546986390149, "grad_norm": 0.03162126988172531, "learning_rate": 0.0001917097979206985, "loss": 0.3849, "step": 6780 }, { "epoch": 0.5493357096565132, "grad_norm": 0.035372983664274216, "learning_rate": 0.0001917052972681039, "loss": 0.3978, "step": 6781 }, { "epoch": 0.5494167206740117, "grad_norm": 0.032826296985149384, "learning_rate": 0.00019170079661550926, "loss": 0.3882, "step": 6782 }, { "epoch": 0.5494977316915101, "grad_norm": 0.027257384732365608, "learning_rate": 0.00019169629596291462, "loss": 0.3338, "step": 6783 }, { "epoch": 0.5495787427090084, "grad_norm": 0.0283949077129364, "learning_rate": 0.00019169179531032, "loss": 0.2983, "step": 6784 }, { "epoch": 0.5496597537265068, "grad_norm": 0.03267160430550575, "learning_rate": 0.00019168729465772537, "loss": 0.3423, "step": 6785 }, { "epoch": 0.5497407647440052, "grad_norm": 0.03289078548550606, "learning_rate": 0.00019168279400513075, "loss": 0.311, "step": 6786 }, { "epoch": 0.5498217757615036, "grad_norm": 0.03308109566569328, "learning_rate": 0.00019167829335253614, "loss": 0.3191, "step": 6787 }, { "epoch": 0.549902786779002, "grad_norm": 0.034293193370103836, "learning_rate": 0.0001916737926999415, "loss": 0.3856, "step": 6788 }, { "epoch": 0.5499837977965003, "grad_norm": 0.03392151743173599, "learning_rate": 0.00019166929204734686, "loss": 0.3347, "step": 6789 }, { "epoch": 0.5500648088139987, "grad_norm": 0.031559936702251434, "learning_rate": 0.00019166479139475225, "loss": 0.3762, "step": 6790 }, { "epoch": 0.5501458198314971, "grad_norm": 0.03242883458733559, "learning_rate": 0.0001916602907421576, "loss": 0.3907, "step": 6791 }, { "epoch": 0.5502268308489955, "grad_norm": 0.02734988182783127, "learning_rate": 0.000191655790089563, "loss": 0.3148, "step": 6792 }, { "epoch": 0.5503078418664938, "grad_norm": 0.032763585448265076, "learning_rate": 0.00019165128943696838, "loss": 0.4039, "step": 6793 }, { "epoch": 0.5503888528839922, "grad_norm": 0.03262895345687866, "learning_rate": 0.00019164678878437374, "loss": 0.3682, "step": 6794 }, { "epoch": 0.5504698639014906, "grad_norm": 0.03161659091711044, "learning_rate": 0.0001916422881317791, "loss": 0.3162, "step": 6795 }, { "epoch": 0.550550874918989, "grad_norm": 0.03022010624408722, "learning_rate": 0.0001916377874791845, "loss": 0.3902, "step": 6796 }, { "epoch": 0.5506318859364874, "grad_norm": 0.03609905764460564, "learning_rate": 0.00019163328682658988, "loss": 0.3424, "step": 6797 }, { "epoch": 0.5507128969539857, "grad_norm": 0.03280745819211006, "learning_rate": 0.00019162878617399524, "loss": 0.4093, "step": 6798 }, { "epoch": 0.5507939079714841, "grad_norm": 0.03054327517747879, "learning_rate": 0.00019162428552140062, "loss": 0.3166, "step": 6799 }, { "epoch": 0.5508749189889826, "grad_norm": 0.030223997309803963, "learning_rate": 0.00019161978486880598, "loss": 0.3185, "step": 6800 }, { "epoch": 0.5509559300064809, "grad_norm": 0.029921215027570724, "learning_rate": 0.00019161528421621134, "loss": 0.3583, "step": 6801 }, { "epoch": 0.5510369410239793, "grad_norm": 0.030760707333683968, "learning_rate": 0.00019161078356361673, "loss": 0.3484, "step": 6802 }, { "epoch": 0.5511179520414776, "grad_norm": 0.02772986702620983, "learning_rate": 0.00019160628291102212, "loss": 0.3056, "step": 6803 }, { "epoch": 0.551198963058976, "grad_norm": 0.032765943557024, "learning_rate": 0.00019160178225842748, "loss": 0.3425, "step": 6804 }, { "epoch": 0.5512799740764744, "grad_norm": 0.028970535844564438, "learning_rate": 0.00019159728160583287, "loss": 0.3272, "step": 6805 }, { "epoch": 0.5513609850939728, "grad_norm": 0.03425658121705055, "learning_rate": 0.00019159278095323823, "loss": 0.3468, "step": 6806 }, { "epoch": 0.5514419961114712, "grad_norm": 0.03469391167163849, "learning_rate": 0.00019158828030064359, "loss": 0.4086, "step": 6807 }, { "epoch": 0.5515230071289695, "grad_norm": 0.031067317351698875, "learning_rate": 0.00019158377964804897, "loss": 0.3424, "step": 6808 }, { "epoch": 0.5516040181464679, "grad_norm": 0.03129751980304718, "learning_rate": 0.00019157927899545436, "loss": 0.3564, "step": 6809 }, { "epoch": 0.5516850291639663, "grad_norm": 0.02611689455807209, "learning_rate": 0.00019157477834285972, "loss": 0.3438, "step": 6810 }, { "epoch": 0.5517660401814647, "grad_norm": 0.030698176473379135, "learning_rate": 0.0001915702776902651, "loss": 0.3252, "step": 6811 }, { "epoch": 0.551847051198963, "grad_norm": 0.029681215062737465, "learning_rate": 0.00019156577703767047, "loss": 0.3413, "step": 6812 }, { "epoch": 0.5519280622164614, "grad_norm": 0.03175154700875282, "learning_rate": 0.00019156127638507583, "loss": 0.3418, "step": 6813 }, { "epoch": 0.5520090732339599, "grad_norm": 0.03310735896229744, "learning_rate": 0.00019155677573248121, "loss": 0.3601, "step": 6814 }, { "epoch": 0.5520900842514582, "grad_norm": 0.02929147146642208, "learning_rate": 0.0001915522750798866, "loss": 0.3651, "step": 6815 }, { "epoch": 0.5521710952689566, "grad_norm": 0.03329383581876755, "learning_rate": 0.00019154777442729196, "loss": 0.3508, "step": 6816 }, { "epoch": 0.5522521062864549, "grad_norm": 0.027802016586065292, "learning_rate": 0.00019154327377469735, "loss": 0.3346, "step": 6817 }, { "epoch": 0.5523331173039533, "grad_norm": 0.03057532198727131, "learning_rate": 0.0001915387731221027, "loss": 0.3249, "step": 6818 }, { "epoch": 0.5524141283214518, "grad_norm": 0.030911043286323547, "learning_rate": 0.00019153427246950807, "loss": 0.3036, "step": 6819 }, { "epoch": 0.5524951393389501, "grad_norm": 0.028215307742357254, "learning_rate": 0.00019152977181691348, "loss": 0.3631, "step": 6820 }, { "epoch": 0.5525761503564485, "grad_norm": 0.03021448478102684, "learning_rate": 0.00019152527116431884, "loss": 0.3646, "step": 6821 }, { "epoch": 0.5526571613739468, "grad_norm": 0.03648442029953003, "learning_rate": 0.0001915207705117242, "loss": 0.3783, "step": 6822 }, { "epoch": 0.5527381723914452, "grad_norm": 0.030310718342661858, "learning_rate": 0.0001915162698591296, "loss": 0.3367, "step": 6823 }, { "epoch": 0.5528191834089436, "grad_norm": 0.030238375067710876, "learning_rate": 0.00019151176920653495, "loss": 0.3696, "step": 6824 }, { "epoch": 0.552900194426442, "grad_norm": 0.03566858544945717, "learning_rate": 0.0001915072685539403, "loss": 0.4048, "step": 6825 }, { "epoch": 0.5529812054439404, "grad_norm": 0.030715545639395714, "learning_rate": 0.00019150276790134573, "loss": 0.354, "step": 6826 }, { "epoch": 0.5530622164614387, "grad_norm": 0.028971727937459946, "learning_rate": 0.00019149826724875109, "loss": 0.319, "step": 6827 }, { "epoch": 0.5531432274789372, "grad_norm": 0.028171943500638008, "learning_rate": 0.00019149376659615645, "loss": 0.3677, "step": 6828 }, { "epoch": 0.5532242384964355, "grad_norm": 0.03259630873799324, "learning_rate": 0.00019148926594356183, "loss": 0.3665, "step": 6829 }, { "epoch": 0.5533052495139339, "grad_norm": 0.034019727259874344, "learning_rate": 0.0001914847652909672, "loss": 0.362, "step": 6830 }, { "epoch": 0.5533862605314323, "grad_norm": 0.027019208297133446, "learning_rate": 0.00019148026463837255, "loss": 0.3097, "step": 6831 }, { "epoch": 0.5534672715489306, "grad_norm": 0.029250169172883034, "learning_rate": 0.00019147576398577797, "loss": 0.3312, "step": 6832 }, { "epoch": 0.5535482825664291, "grad_norm": 0.03057018667459488, "learning_rate": 0.00019147126333318333, "loss": 0.3799, "step": 6833 }, { "epoch": 0.5536292935839274, "grad_norm": 0.03147272393107414, "learning_rate": 0.0001914667626805887, "loss": 0.3241, "step": 6834 }, { "epoch": 0.5537103046014258, "grad_norm": 0.029593253508210182, "learning_rate": 0.00019146226202799407, "loss": 0.3662, "step": 6835 }, { "epoch": 0.5537913156189241, "grad_norm": 0.03245329111814499, "learning_rate": 0.00019145776137539943, "loss": 0.3119, "step": 6836 }, { "epoch": 0.5538723266364226, "grad_norm": 0.03435903042554855, "learning_rate": 0.0001914532607228048, "loss": 0.3854, "step": 6837 }, { "epoch": 0.553953337653921, "grad_norm": 0.036790501326322556, "learning_rate": 0.0001914487600702102, "loss": 0.4024, "step": 6838 }, { "epoch": 0.5540343486714193, "grad_norm": 0.02854119800031185, "learning_rate": 0.00019144425941761557, "loss": 0.3576, "step": 6839 }, { "epoch": 0.5541153596889177, "grad_norm": 0.032489001750946045, "learning_rate": 0.00019143975876502093, "loss": 0.3809, "step": 6840 }, { "epoch": 0.554196370706416, "grad_norm": 0.03234725818037987, "learning_rate": 0.00019143525811242632, "loss": 0.3146, "step": 6841 }, { "epoch": 0.5542773817239145, "grad_norm": 0.031654514372348785, "learning_rate": 0.00019143075745983168, "loss": 0.3793, "step": 6842 }, { "epoch": 0.5543583927414129, "grad_norm": 0.02953651174902916, "learning_rate": 0.00019142625680723704, "loss": 0.3314, "step": 6843 }, { "epoch": 0.5544394037589112, "grad_norm": 0.03533756360411644, "learning_rate": 0.00019142175615464245, "loss": 0.3854, "step": 6844 }, { "epoch": 0.5545204147764096, "grad_norm": 0.029549822211265564, "learning_rate": 0.0001914172555020478, "loss": 0.3541, "step": 6845 }, { "epoch": 0.5546014257939079, "grad_norm": 0.0319569893181324, "learning_rate": 0.00019141275484945317, "loss": 0.3656, "step": 6846 }, { "epoch": 0.5546824368114064, "grad_norm": 0.026602884754538536, "learning_rate": 0.00019140825419685856, "loss": 0.3507, "step": 6847 }, { "epoch": 0.5547634478289047, "grad_norm": 0.03057567961513996, "learning_rate": 0.00019140375354426392, "loss": 0.3551, "step": 6848 }, { "epoch": 0.5548444588464031, "grad_norm": 0.029442768543958664, "learning_rate": 0.0001913992528916693, "loss": 0.3521, "step": 6849 }, { "epoch": 0.5549254698639015, "grad_norm": 0.0324246883392334, "learning_rate": 0.0001913947522390747, "loss": 0.3345, "step": 6850 }, { "epoch": 0.5550064808813999, "grad_norm": 0.03182468190789223, "learning_rate": 0.00019139025158648005, "loss": 0.3794, "step": 6851 }, { "epoch": 0.5550874918988983, "grad_norm": 0.033127471804618835, "learning_rate": 0.0001913857509338854, "loss": 0.3408, "step": 6852 }, { "epoch": 0.5551685029163966, "grad_norm": 0.027491910383105278, "learning_rate": 0.0001913812502812908, "loss": 0.336, "step": 6853 }, { "epoch": 0.555249513933895, "grad_norm": 0.031573060899972916, "learning_rate": 0.00019137674962869616, "loss": 0.3205, "step": 6854 }, { "epoch": 0.5553305249513933, "grad_norm": 0.02980121038854122, "learning_rate": 0.00019137224897610155, "loss": 0.3579, "step": 6855 }, { "epoch": 0.5554115359688918, "grad_norm": 0.0305420383810997, "learning_rate": 0.00019136774832350693, "loss": 0.3647, "step": 6856 }, { "epoch": 0.5554925469863902, "grad_norm": 0.02846667729318142, "learning_rate": 0.0001913632476709123, "loss": 0.3339, "step": 6857 }, { "epoch": 0.5555735580038885, "grad_norm": 0.03440249711275101, "learning_rate": 0.00019135874701831765, "loss": 0.3948, "step": 6858 }, { "epoch": 0.5556545690213869, "grad_norm": 0.030161675065755844, "learning_rate": 0.00019135424636572304, "loss": 0.3686, "step": 6859 }, { "epoch": 0.5557355800388852, "grad_norm": 0.030002431944012642, "learning_rate": 0.0001913497457131284, "loss": 0.3303, "step": 6860 }, { "epoch": 0.5558165910563837, "grad_norm": 0.029472051188349724, "learning_rate": 0.0001913452450605338, "loss": 0.3622, "step": 6861 }, { "epoch": 0.5558976020738821, "grad_norm": 0.02902107685804367, "learning_rate": 0.00019134074440793917, "loss": 0.3077, "step": 6862 }, { "epoch": 0.5559786130913804, "grad_norm": 0.031501494348049164, "learning_rate": 0.00019133624375534453, "loss": 0.3397, "step": 6863 }, { "epoch": 0.5560596241088788, "grad_norm": 0.0340961217880249, "learning_rate": 0.0001913317431027499, "loss": 0.3326, "step": 6864 }, { "epoch": 0.5561406351263772, "grad_norm": 0.032292790710926056, "learning_rate": 0.00019132724245015528, "loss": 0.3664, "step": 6865 }, { "epoch": 0.5562216461438756, "grad_norm": 0.03301149979233742, "learning_rate": 0.00019132274179756064, "loss": 0.3626, "step": 6866 }, { "epoch": 0.556302657161374, "grad_norm": 0.02956380322575569, "learning_rate": 0.00019131824114496603, "loss": 0.335, "step": 6867 }, { "epoch": 0.5563836681788723, "grad_norm": 0.03247414156794548, "learning_rate": 0.00019131374049237142, "loss": 0.4058, "step": 6868 }, { "epoch": 0.5564646791963707, "grad_norm": 0.028093554079532623, "learning_rate": 0.00019130923983977678, "loss": 0.3335, "step": 6869 }, { "epoch": 0.5565456902138691, "grad_norm": 0.02879849076271057, "learning_rate": 0.00019130473918718214, "loss": 0.3434, "step": 6870 }, { "epoch": 0.5566267012313675, "grad_norm": 0.02984016016125679, "learning_rate": 0.00019130023853458752, "loss": 0.3348, "step": 6871 }, { "epoch": 0.5567077122488658, "grad_norm": 0.026220617815852165, "learning_rate": 0.0001912957378819929, "loss": 0.2983, "step": 6872 }, { "epoch": 0.5567887232663642, "grad_norm": 0.03234969452023506, "learning_rate": 0.00019129123722939827, "loss": 0.3404, "step": 6873 }, { "epoch": 0.5568697342838627, "grad_norm": 0.030391644686460495, "learning_rate": 0.00019128673657680366, "loss": 0.3228, "step": 6874 }, { "epoch": 0.556950745301361, "grad_norm": 0.031873755156993866, "learning_rate": 0.00019128223592420902, "loss": 0.3734, "step": 6875 }, { "epoch": 0.5570317563188594, "grad_norm": 0.02982131578028202, "learning_rate": 0.00019127773527161438, "loss": 0.3209, "step": 6876 }, { "epoch": 0.5571127673363577, "grad_norm": 0.029721610248088837, "learning_rate": 0.00019127323461901977, "loss": 0.3013, "step": 6877 }, { "epoch": 0.5571937783538561, "grad_norm": 0.04121321812272072, "learning_rate": 0.00019126873396642515, "loss": 0.3376, "step": 6878 }, { "epoch": 0.5572747893713546, "grad_norm": 0.032671697437763214, "learning_rate": 0.0001912642333138305, "loss": 0.3472, "step": 6879 }, { "epoch": 0.5573558003888529, "grad_norm": 0.03484325855970383, "learning_rate": 0.0001912597326612359, "loss": 0.3852, "step": 6880 }, { "epoch": 0.5574368114063513, "grad_norm": 0.02832927368581295, "learning_rate": 0.00019125523200864126, "loss": 0.3094, "step": 6881 }, { "epoch": 0.5575178224238496, "grad_norm": 0.03815742954611778, "learning_rate": 0.00019125073135604662, "loss": 0.3853, "step": 6882 }, { "epoch": 0.557598833441348, "grad_norm": 0.03191991522908211, "learning_rate": 0.000191246230703452, "loss": 0.3514, "step": 6883 }, { "epoch": 0.5576798444588464, "grad_norm": 0.03721702843904495, "learning_rate": 0.0001912417300508574, "loss": 0.3489, "step": 6884 }, { "epoch": 0.5577608554763448, "grad_norm": 0.02670007012784481, "learning_rate": 0.00019123722939826275, "loss": 0.282, "step": 6885 }, { "epoch": 0.5578418664938432, "grad_norm": 0.03530779480934143, "learning_rate": 0.00019123272874566814, "loss": 0.3499, "step": 6886 }, { "epoch": 0.5579228775113415, "grad_norm": 0.028200553730130196, "learning_rate": 0.0001912282280930735, "loss": 0.3153, "step": 6887 }, { "epoch": 0.55800388852884, "grad_norm": 0.034294601529836655, "learning_rate": 0.00019122372744047886, "loss": 0.3672, "step": 6888 }, { "epoch": 0.5580848995463383, "grad_norm": 0.03616178408265114, "learning_rate": 0.00019121922678788425, "loss": 0.425, "step": 6889 }, { "epoch": 0.5581659105638367, "grad_norm": 0.03213857114315033, "learning_rate": 0.00019121472613528964, "loss": 0.316, "step": 6890 }, { "epoch": 0.558246921581335, "grad_norm": 0.031516559422016144, "learning_rate": 0.000191210225482695, "loss": 0.3513, "step": 6891 }, { "epoch": 0.5583279325988334, "grad_norm": 0.03412635996937752, "learning_rate": 0.00019120572483010038, "loss": 0.3695, "step": 6892 }, { "epoch": 0.5584089436163319, "grad_norm": 0.032149460166692734, "learning_rate": 0.00019120122417750574, "loss": 0.4148, "step": 6893 }, { "epoch": 0.5584899546338302, "grad_norm": 0.03549889475107193, "learning_rate": 0.0001911967235249111, "loss": 0.3817, "step": 6894 }, { "epoch": 0.5585709656513286, "grad_norm": 0.027554932981729507, "learning_rate": 0.0001911922228723165, "loss": 0.2947, "step": 6895 }, { "epoch": 0.5586519766688269, "grad_norm": 0.040073104202747345, "learning_rate": 0.00019118772221972188, "loss": 0.3383, "step": 6896 }, { "epoch": 0.5587329876863253, "grad_norm": 0.03099329210817814, "learning_rate": 0.00019118322156712724, "loss": 0.2951, "step": 6897 }, { "epoch": 0.5588139987038238, "grad_norm": 0.029820909723639488, "learning_rate": 0.00019117872091453262, "loss": 0.3435, "step": 6898 }, { "epoch": 0.5588950097213221, "grad_norm": 0.030790315940976143, "learning_rate": 0.00019117422026193798, "loss": 0.3651, "step": 6899 }, { "epoch": 0.5589760207388205, "grad_norm": 0.03345898538827896, "learning_rate": 0.00019116971960934334, "loss": 0.3387, "step": 6900 }, { "epoch": 0.5590570317563188, "grad_norm": 0.031085064634680748, "learning_rate": 0.00019116521895674876, "loss": 0.3167, "step": 6901 }, { "epoch": 0.5591380427738173, "grad_norm": 0.03324053809046745, "learning_rate": 0.00019116071830415412, "loss": 0.3762, "step": 6902 }, { "epoch": 0.5592190537913156, "grad_norm": 0.03265579044818878, "learning_rate": 0.00019115621765155948, "loss": 0.3682, "step": 6903 }, { "epoch": 0.559300064808814, "grad_norm": 0.03241958096623421, "learning_rate": 0.00019115171699896487, "loss": 0.3431, "step": 6904 }, { "epoch": 0.5593810758263124, "grad_norm": 0.027653567492961884, "learning_rate": 0.00019114721634637023, "loss": 0.3514, "step": 6905 }, { "epoch": 0.5594620868438107, "grad_norm": 0.03009861335158348, "learning_rate": 0.00019114271569377559, "loss": 0.3885, "step": 6906 }, { "epoch": 0.5595430978613092, "grad_norm": 0.030470533296465874, "learning_rate": 0.000191138215041181, "loss": 0.3478, "step": 6907 }, { "epoch": 0.5596241088788075, "grad_norm": 0.03243311867117882, "learning_rate": 0.00019113371438858636, "loss": 0.3719, "step": 6908 }, { "epoch": 0.5597051198963059, "grad_norm": 0.03046790510416031, "learning_rate": 0.00019112921373599172, "loss": 0.3193, "step": 6909 }, { "epoch": 0.5597861309138042, "grad_norm": 0.02883930876851082, "learning_rate": 0.0001911247130833971, "loss": 0.3559, "step": 6910 }, { "epoch": 0.5598671419313026, "grad_norm": 0.03061991184949875, "learning_rate": 0.00019112021243080247, "loss": 0.3398, "step": 6911 }, { "epoch": 0.5599481529488011, "grad_norm": 0.027453241869807243, "learning_rate": 0.00019111571177820783, "loss": 0.3302, "step": 6912 }, { "epoch": 0.5600291639662994, "grad_norm": 0.034282706677913666, "learning_rate": 0.00019111121112561324, "loss": 0.3877, "step": 6913 }, { "epoch": 0.5601101749837978, "grad_norm": 0.028397291898727417, "learning_rate": 0.0001911067104730186, "loss": 0.3199, "step": 6914 }, { "epoch": 0.5601911860012961, "grad_norm": 0.029234793037176132, "learning_rate": 0.00019110220982042396, "loss": 0.3374, "step": 6915 }, { "epoch": 0.5602721970187946, "grad_norm": 0.02638567052781582, "learning_rate": 0.00019109770916782935, "loss": 0.3036, "step": 6916 }, { "epoch": 0.560353208036293, "grad_norm": 0.03524412959814072, "learning_rate": 0.0001910932085152347, "loss": 0.3744, "step": 6917 }, { "epoch": 0.5604342190537913, "grad_norm": 0.02868202142417431, "learning_rate": 0.00019108870786264007, "loss": 0.3432, "step": 6918 }, { "epoch": 0.5605152300712897, "grad_norm": 0.03504109010100365, "learning_rate": 0.00019108420721004548, "loss": 0.3756, "step": 6919 }, { "epoch": 0.560596241088788, "grad_norm": 0.03579915314912796, "learning_rate": 0.00019107970655745084, "loss": 0.3528, "step": 6920 }, { "epoch": 0.5606772521062865, "grad_norm": 0.03431423008441925, "learning_rate": 0.0001910752059048562, "loss": 0.3997, "step": 6921 }, { "epoch": 0.5607582631237849, "grad_norm": 0.02899775095283985, "learning_rate": 0.0001910707052522616, "loss": 0.3264, "step": 6922 }, { "epoch": 0.5608392741412832, "grad_norm": 0.031370386481285095, "learning_rate": 0.00019106620459966695, "loss": 0.3265, "step": 6923 }, { "epoch": 0.5609202851587816, "grad_norm": 0.02682410180568695, "learning_rate": 0.0001910617039470723, "loss": 0.3327, "step": 6924 }, { "epoch": 0.56100129617628, "grad_norm": 0.041649095714092255, "learning_rate": 0.00019105720329447773, "loss": 0.4209, "step": 6925 }, { "epoch": 0.5610823071937784, "grad_norm": 0.026437275111675262, "learning_rate": 0.00019105270264188309, "loss": 0.2907, "step": 6926 }, { "epoch": 0.5611633182112767, "grad_norm": 0.029267890378832817, "learning_rate": 0.00019104820198928845, "loss": 0.3051, "step": 6927 }, { "epoch": 0.5612443292287751, "grad_norm": 0.029865281656384468, "learning_rate": 0.00019104370133669383, "loss": 0.364, "step": 6928 }, { "epoch": 0.5613253402462735, "grad_norm": 0.03844697028398514, "learning_rate": 0.0001910392006840992, "loss": 0.3835, "step": 6929 }, { "epoch": 0.5614063512637719, "grad_norm": 0.03238433972001076, "learning_rate": 0.00019103470003150458, "loss": 0.346, "step": 6930 }, { "epoch": 0.5614873622812703, "grad_norm": 0.032621171325445175, "learning_rate": 0.00019103019937890997, "loss": 0.3792, "step": 6931 }, { "epoch": 0.5615683732987686, "grad_norm": 0.030204012989997864, "learning_rate": 0.00019102569872631533, "loss": 0.2853, "step": 6932 }, { "epoch": 0.561649384316267, "grad_norm": 0.03270819038152695, "learning_rate": 0.0001910211980737207, "loss": 0.3452, "step": 6933 }, { "epoch": 0.5617303953337653, "grad_norm": 0.028208116069436073, "learning_rate": 0.00019101669742112607, "loss": 0.3446, "step": 6934 }, { "epoch": 0.5618114063512638, "grad_norm": 0.0340554341673851, "learning_rate": 0.00019101219676853143, "loss": 0.4057, "step": 6935 }, { "epoch": 0.5618924173687622, "grad_norm": 0.033202871680259705, "learning_rate": 0.00019100769611593682, "loss": 0.3742, "step": 6936 }, { "epoch": 0.5619734283862605, "grad_norm": 0.035153090953826904, "learning_rate": 0.0001910031954633422, "loss": 0.3307, "step": 6937 }, { "epoch": 0.5620544394037589, "grad_norm": 0.03547315672039986, "learning_rate": 0.00019099869481074757, "loss": 0.4135, "step": 6938 }, { "epoch": 0.5621354504212573, "grad_norm": 0.03163422644138336, "learning_rate": 0.00019099419415815293, "loss": 0.3767, "step": 6939 }, { "epoch": 0.5622164614387557, "grad_norm": 0.030638709664344788, "learning_rate": 0.00019098969350555832, "loss": 0.3512, "step": 6940 }, { "epoch": 0.562297472456254, "grad_norm": 0.033417392522096634, "learning_rate": 0.00019098519285296368, "loss": 0.311, "step": 6941 }, { "epoch": 0.5623784834737524, "grad_norm": 0.032544467598199844, "learning_rate": 0.00019098069220036906, "loss": 0.3455, "step": 6942 }, { "epoch": 0.5624594944912508, "grad_norm": 0.03068172000348568, "learning_rate": 0.00019097619154777445, "loss": 0.3728, "step": 6943 }, { "epoch": 0.5625405055087492, "grad_norm": 0.026925476267933846, "learning_rate": 0.0001909716908951798, "loss": 0.3251, "step": 6944 }, { "epoch": 0.5626215165262476, "grad_norm": 0.03054945543408394, "learning_rate": 0.00019096719024258517, "loss": 0.3691, "step": 6945 }, { "epoch": 0.562702527543746, "grad_norm": 0.030231181532144547, "learning_rate": 0.00019096268958999056, "loss": 0.3419, "step": 6946 }, { "epoch": 0.5627835385612443, "grad_norm": 0.029685189947485924, "learning_rate": 0.00019095818893739592, "loss": 0.2949, "step": 6947 }, { "epoch": 0.5628645495787427, "grad_norm": 0.030578652396798134, "learning_rate": 0.0001909536882848013, "loss": 0.3367, "step": 6948 }, { "epoch": 0.5629455605962411, "grad_norm": 0.029063817113637924, "learning_rate": 0.0001909491876322067, "loss": 0.3408, "step": 6949 }, { "epoch": 0.5630265716137395, "grad_norm": 0.03256330266594887, "learning_rate": 0.00019094468697961205, "loss": 0.3879, "step": 6950 }, { "epoch": 0.5631075826312378, "grad_norm": 0.0308829378336668, "learning_rate": 0.0001909401863270174, "loss": 0.3672, "step": 6951 }, { "epoch": 0.5631885936487362, "grad_norm": 0.029668381437659264, "learning_rate": 0.0001909356856744228, "loss": 0.3685, "step": 6952 }, { "epoch": 0.5632696046662347, "grad_norm": 0.030312536284327507, "learning_rate": 0.00019093118502182819, "loss": 0.336, "step": 6953 }, { "epoch": 0.563350615683733, "grad_norm": 0.031126992776989937, "learning_rate": 0.00019092668436923355, "loss": 0.3314, "step": 6954 }, { "epoch": 0.5634316267012314, "grad_norm": 0.03099069371819496, "learning_rate": 0.00019092218371663893, "loss": 0.3329, "step": 6955 }, { "epoch": 0.5635126377187297, "grad_norm": 0.03418287634849548, "learning_rate": 0.0001909176830640443, "loss": 0.3692, "step": 6956 }, { "epoch": 0.5635936487362281, "grad_norm": 0.03272226080298424, "learning_rate": 0.00019091318241144965, "loss": 0.3692, "step": 6957 }, { "epoch": 0.5636746597537265, "grad_norm": 0.031897593289613724, "learning_rate": 0.00019090868175885504, "loss": 0.3494, "step": 6958 }, { "epoch": 0.5637556707712249, "grad_norm": 0.02550220675766468, "learning_rate": 0.00019090418110626043, "loss": 0.3067, "step": 6959 }, { "epoch": 0.5638366817887233, "grad_norm": 0.03189469128847122, "learning_rate": 0.0001908996804536658, "loss": 0.3767, "step": 6960 }, { "epoch": 0.5639176928062216, "grad_norm": 0.02977280505001545, "learning_rate": 0.00019089517980107118, "loss": 0.3379, "step": 6961 }, { "epoch": 0.56399870382372, "grad_norm": 0.029872236773371696, "learning_rate": 0.00019089067914847654, "loss": 0.3221, "step": 6962 }, { "epoch": 0.5640797148412184, "grad_norm": 0.03274345397949219, "learning_rate": 0.0001908861784958819, "loss": 0.4027, "step": 6963 }, { "epoch": 0.5641607258587168, "grad_norm": 0.03244532644748688, "learning_rate": 0.00019088167784328728, "loss": 0.3528, "step": 6964 }, { "epoch": 0.5642417368762151, "grad_norm": 0.03851190581917763, "learning_rate": 0.00019087717719069267, "loss": 0.3393, "step": 6965 }, { "epoch": 0.5643227478937135, "grad_norm": 0.032855067402124405, "learning_rate": 0.00019087267653809803, "loss": 0.4021, "step": 6966 }, { "epoch": 0.564403758911212, "grad_norm": 0.029516946524381638, "learning_rate": 0.00019086817588550342, "loss": 0.3405, "step": 6967 }, { "epoch": 0.5644847699287103, "grad_norm": 0.027933668345212936, "learning_rate": 0.00019086367523290878, "loss": 0.3783, "step": 6968 }, { "epoch": 0.5645657809462087, "grad_norm": 0.032324567437171936, "learning_rate": 0.00019085917458031414, "loss": 0.2918, "step": 6969 }, { "epoch": 0.564646791963707, "grad_norm": 0.03222862258553505, "learning_rate": 0.00019085467392771952, "loss": 0.3378, "step": 6970 }, { "epoch": 0.5647278029812054, "grad_norm": 0.030286898836493492, "learning_rate": 0.0001908501732751249, "loss": 0.3657, "step": 6971 }, { "epoch": 0.5648088139987039, "grad_norm": 0.0321059413254261, "learning_rate": 0.00019084567262253027, "loss": 0.364, "step": 6972 }, { "epoch": 0.5648898250162022, "grad_norm": 0.02939450554549694, "learning_rate": 0.00019084117196993566, "loss": 0.3024, "step": 6973 }, { "epoch": 0.5649708360337006, "grad_norm": 0.030884001404047012, "learning_rate": 0.00019083667131734102, "loss": 0.3682, "step": 6974 }, { "epoch": 0.5650518470511989, "grad_norm": 0.026744620874524117, "learning_rate": 0.00019083217066474638, "loss": 0.3452, "step": 6975 }, { "epoch": 0.5651328580686974, "grad_norm": 0.03520316630601883, "learning_rate": 0.00019082767001215177, "loss": 0.391, "step": 6976 }, { "epoch": 0.5652138690861958, "grad_norm": 0.030200695618987083, "learning_rate": 0.00019082316935955715, "loss": 0.3403, "step": 6977 }, { "epoch": 0.5652948801036941, "grad_norm": 0.03265073522925377, "learning_rate": 0.0001908186687069625, "loss": 0.3498, "step": 6978 }, { "epoch": 0.5653758911211925, "grad_norm": 0.030164245516061783, "learning_rate": 0.0001908141680543679, "loss": 0.3537, "step": 6979 }, { "epoch": 0.5654569021386908, "grad_norm": 0.031043274328112602, "learning_rate": 0.00019080966740177326, "loss": 0.3244, "step": 6980 }, { "epoch": 0.5655379131561893, "grad_norm": 0.03142424300312996, "learning_rate": 0.00019080516674917862, "loss": 0.3523, "step": 6981 }, { "epoch": 0.5656189241736876, "grad_norm": 0.03224541246891022, "learning_rate": 0.00019080066609658403, "loss": 0.4044, "step": 6982 }, { "epoch": 0.565699935191186, "grad_norm": 0.03293000906705856, "learning_rate": 0.0001907961654439894, "loss": 0.3533, "step": 6983 }, { "epoch": 0.5657809462086844, "grad_norm": 0.028464820235967636, "learning_rate": 0.00019079166479139475, "loss": 0.3517, "step": 6984 }, { "epoch": 0.5658619572261827, "grad_norm": 0.029161132872104645, "learning_rate": 0.00019078716413880014, "loss": 0.2946, "step": 6985 }, { "epoch": 0.5659429682436812, "grad_norm": 0.031095337122678757, "learning_rate": 0.0001907826634862055, "loss": 0.2974, "step": 6986 }, { "epoch": 0.5660239792611795, "grad_norm": 0.03502573445439339, "learning_rate": 0.00019077816283361086, "loss": 0.3453, "step": 6987 }, { "epoch": 0.5661049902786779, "grad_norm": 0.03055300936102867, "learning_rate": 0.00019077366218101628, "loss": 0.3246, "step": 6988 }, { "epoch": 0.5661860012961762, "grad_norm": 0.029000548645853996, "learning_rate": 0.00019076916152842164, "loss": 0.3267, "step": 6989 }, { "epoch": 0.5662670123136747, "grad_norm": 0.03340502828359604, "learning_rate": 0.000190764660875827, "loss": 0.3524, "step": 6990 }, { "epoch": 0.5663480233311731, "grad_norm": 0.028636228293180466, "learning_rate": 0.00019076016022323238, "loss": 0.3332, "step": 6991 }, { "epoch": 0.5664290343486714, "grad_norm": 0.02928844839334488, "learning_rate": 0.00019075565957063774, "loss": 0.349, "step": 6992 }, { "epoch": 0.5665100453661698, "grad_norm": 0.030798735097050667, "learning_rate": 0.0001907511589180431, "loss": 0.3215, "step": 6993 }, { "epoch": 0.5665910563836681, "grad_norm": 0.030463317409157753, "learning_rate": 0.00019074665826544852, "loss": 0.3882, "step": 6994 }, { "epoch": 0.5666720674011666, "grad_norm": 0.029138967394828796, "learning_rate": 0.00019074215761285388, "loss": 0.3598, "step": 6995 }, { "epoch": 0.566753078418665, "grad_norm": 0.032293178141117096, "learning_rate": 0.00019073765696025924, "loss": 0.3721, "step": 6996 }, { "epoch": 0.5668340894361633, "grad_norm": 0.029973942786455154, "learning_rate": 0.00019073315630766462, "loss": 0.3442, "step": 6997 }, { "epoch": 0.5669151004536617, "grad_norm": 0.03126822039484978, "learning_rate": 0.00019072865565506998, "loss": 0.3765, "step": 6998 }, { "epoch": 0.56699611147116, "grad_norm": 0.030398758128285408, "learning_rate": 0.00019072415500247534, "loss": 0.3642, "step": 6999 }, { "epoch": 0.5670771224886585, "grad_norm": 0.03205035626888275, "learning_rate": 0.00019071965434988076, "loss": 0.3284, "step": 7000 }, { "epoch": 0.5671581335061568, "grad_norm": 0.027481509372591972, "learning_rate": 0.00019071515369728612, "loss": 0.3222, "step": 7001 }, { "epoch": 0.5672391445236552, "grad_norm": 0.03299596533179283, "learning_rate": 0.00019071065304469148, "loss": 0.352, "step": 7002 }, { "epoch": 0.5673201555411536, "grad_norm": 0.03149527683854103, "learning_rate": 0.00019070615239209687, "loss": 0.3751, "step": 7003 }, { "epoch": 0.567401166558652, "grad_norm": 0.030225474387407303, "learning_rate": 0.00019070165173950223, "loss": 0.3298, "step": 7004 }, { "epoch": 0.5674821775761504, "grad_norm": 0.028560619801282883, "learning_rate": 0.00019069715108690761, "loss": 0.2978, "step": 7005 }, { "epoch": 0.5675631885936487, "grad_norm": 0.03127693012356758, "learning_rate": 0.000190692650434313, "loss": 0.3485, "step": 7006 }, { "epoch": 0.5676441996111471, "grad_norm": 0.032159753143787384, "learning_rate": 0.00019068814978171836, "loss": 0.3145, "step": 7007 }, { "epoch": 0.5677252106286454, "grad_norm": 0.027168098837137222, "learning_rate": 0.00019068364912912372, "loss": 0.3509, "step": 7008 }, { "epoch": 0.5678062216461439, "grad_norm": 0.03163224831223488, "learning_rate": 0.0001906791484765291, "loss": 0.3411, "step": 7009 }, { "epoch": 0.5678872326636423, "grad_norm": 0.034824687987565994, "learning_rate": 0.00019067464782393447, "loss": 0.4088, "step": 7010 }, { "epoch": 0.5679682436811406, "grad_norm": 0.028746243566274643, "learning_rate": 0.00019067014717133986, "loss": 0.3432, "step": 7011 }, { "epoch": 0.568049254698639, "grad_norm": 0.029679277911782265, "learning_rate": 0.00019066564651874524, "loss": 0.3717, "step": 7012 }, { "epoch": 0.5681302657161373, "grad_norm": 0.03528760373592377, "learning_rate": 0.0001906611458661506, "loss": 0.3522, "step": 7013 }, { "epoch": 0.5682112767336358, "grad_norm": 0.027924789115786552, "learning_rate": 0.00019065664521355596, "loss": 0.3783, "step": 7014 }, { "epoch": 0.5682922877511342, "grad_norm": 0.027431324124336243, "learning_rate": 0.00019065214456096135, "loss": 0.3478, "step": 7015 }, { "epoch": 0.5683732987686325, "grad_norm": 0.027628622949123383, "learning_rate": 0.0001906476439083667, "loss": 0.3212, "step": 7016 }, { "epoch": 0.5684543097861309, "grad_norm": 0.03062502108514309, "learning_rate": 0.0001906431432557721, "loss": 0.3789, "step": 7017 }, { "epoch": 0.5685353208036293, "grad_norm": 0.03195822983980179, "learning_rate": 0.00019063864260317748, "loss": 0.3883, "step": 7018 }, { "epoch": 0.5686163318211277, "grad_norm": 0.028229979798197746, "learning_rate": 0.00019063414195058284, "loss": 0.3258, "step": 7019 }, { "epoch": 0.568697342838626, "grad_norm": 0.027966110035777092, "learning_rate": 0.0001906296412979882, "loss": 0.328, "step": 7020 }, { "epoch": 0.5687783538561244, "grad_norm": 0.03365035727620125, "learning_rate": 0.0001906251406453936, "loss": 0.3187, "step": 7021 }, { "epoch": 0.5688593648736228, "grad_norm": 0.030285336077213287, "learning_rate": 0.00019062063999279895, "loss": 0.351, "step": 7022 }, { "epoch": 0.5689403758911212, "grad_norm": 0.03437206521630287, "learning_rate": 0.00019061613934020434, "loss": 0.385, "step": 7023 }, { "epoch": 0.5690213869086196, "grad_norm": 0.02868664637207985, "learning_rate": 0.00019061163868760973, "loss": 0.3158, "step": 7024 }, { "epoch": 0.5691023979261179, "grad_norm": 0.02950209379196167, "learning_rate": 0.00019060713803501509, "loss": 0.3166, "step": 7025 }, { "epoch": 0.5691834089436163, "grad_norm": 0.03166711702942848, "learning_rate": 0.00019060263738242045, "loss": 0.3657, "step": 7026 }, { "epoch": 0.5692644199611148, "grad_norm": 0.030810615047812462, "learning_rate": 0.00019059813672982583, "loss": 0.3119, "step": 7027 }, { "epoch": 0.5693454309786131, "grad_norm": 0.03254614397883415, "learning_rate": 0.0001905936360772312, "loss": 0.3126, "step": 7028 }, { "epoch": 0.5694264419961115, "grad_norm": 0.03156366944313049, "learning_rate": 0.00019058913542463658, "loss": 0.3276, "step": 7029 }, { "epoch": 0.5695074530136098, "grad_norm": 0.0318727120757103, "learning_rate": 0.00019058463477204197, "loss": 0.3988, "step": 7030 }, { "epoch": 0.5695884640311082, "grad_norm": 0.033699046820402145, "learning_rate": 0.00019058013411944733, "loss": 0.3496, "step": 7031 }, { "epoch": 0.5696694750486067, "grad_norm": 0.02864360436797142, "learning_rate": 0.0001905756334668527, "loss": 0.3686, "step": 7032 }, { "epoch": 0.569750486066105, "grad_norm": 0.03384806588292122, "learning_rate": 0.00019057113281425807, "loss": 0.3722, "step": 7033 }, { "epoch": 0.5698314970836034, "grad_norm": 0.032431185245513916, "learning_rate": 0.00019056663216166346, "loss": 0.3765, "step": 7034 }, { "epoch": 0.5699125081011017, "grad_norm": 0.02844301052391529, "learning_rate": 0.00019056213150906882, "loss": 0.3759, "step": 7035 }, { "epoch": 0.5699935191186001, "grad_norm": 0.03484733775258064, "learning_rate": 0.0001905576308564742, "loss": 0.3515, "step": 7036 }, { "epoch": 0.5700745301360985, "grad_norm": 0.03190077468752861, "learning_rate": 0.00019055313020387957, "loss": 0.3424, "step": 7037 }, { "epoch": 0.5701555411535969, "grad_norm": 0.030969643965363503, "learning_rate": 0.00019054862955128493, "loss": 0.3274, "step": 7038 }, { "epoch": 0.5702365521710953, "grad_norm": 0.029254410415887833, "learning_rate": 0.00019054412889869032, "loss": 0.3644, "step": 7039 }, { "epoch": 0.5703175631885936, "grad_norm": 0.028428638353943825, "learning_rate": 0.0001905396282460957, "loss": 0.3494, "step": 7040 }, { "epoch": 0.5703985742060921, "grad_norm": 0.02732779271900654, "learning_rate": 0.00019053512759350106, "loss": 0.3039, "step": 7041 }, { "epoch": 0.5704795852235904, "grad_norm": 0.03077746368944645, "learning_rate": 0.00019053062694090645, "loss": 0.342, "step": 7042 }, { "epoch": 0.5705605962410888, "grad_norm": 0.0320584736764431, "learning_rate": 0.0001905261262883118, "loss": 0.3728, "step": 7043 }, { "epoch": 0.5706416072585871, "grad_norm": 0.030684838071465492, "learning_rate": 0.00019052162563571717, "loss": 0.3453, "step": 7044 }, { "epoch": 0.5707226182760855, "grad_norm": 0.03280287981033325, "learning_rate": 0.00019051712498312256, "loss": 0.321, "step": 7045 }, { "epoch": 0.570803629293584, "grad_norm": 0.030638406053185463, "learning_rate": 0.00019051262433052794, "loss": 0.3576, "step": 7046 }, { "epoch": 0.5708846403110823, "grad_norm": 0.029015205800533295, "learning_rate": 0.0001905081236779333, "loss": 0.3577, "step": 7047 }, { "epoch": 0.5709656513285807, "grad_norm": 0.03286876901984215, "learning_rate": 0.0001905036230253387, "loss": 0.3475, "step": 7048 }, { "epoch": 0.571046662346079, "grad_norm": 0.029815932735800743, "learning_rate": 0.00019049912237274405, "loss": 0.3719, "step": 7049 }, { "epoch": 0.5711276733635774, "grad_norm": 0.0388667918741703, "learning_rate": 0.0001904946217201494, "loss": 0.3749, "step": 7050 }, { "epoch": 0.5712086843810759, "grad_norm": 0.04013926908373833, "learning_rate": 0.0001904901210675548, "loss": 0.4188, "step": 7051 }, { "epoch": 0.5712896953985742, "grad_norm": 0.030169228091835976, "learning_rate": 0.0001904856204149602, "loss": 0.3483, "step": 7052 }, { "epoch": 0.5713707064160726, "grad_norm": 0.032555319368839264, "learning_rate": 0.00019048111976236555, "loss": 0.357, "step": 7053 }, { "epoch": 0.5714517174335709, "grad_norm": 0.029092267155647278, "learning_rate": 0.00019047661910977093, "loss": 0.3342, "step": 7054 }, { "epoch": 0.5715327284510694, "grad_norm": 0.032974276691675186, "learning_rate": 0.0001904721184571763, "loss": 0.3531, "step": 7055 }, { "epoch": 0.5716137394685677, "grad_norm": 0.030939916148781776, "learning_rate": 0.00019046761780458165, "loss": 0.363, "step": 7056 }, { "epoch": 0.5716947504860661, "grad_norm": 0.02596089616417885, "learning_rate": 0.00019046311715198707, "loss": 0.2696, "step": 7057 }, { "epoch": 0.5717757615035645, "grad_norm": 0.029358698055148125, "learning_rate": 0.00019045861649939243, "loss": 0.3477, "step": 7058 }, { "epoch": 0.5718567725210628, "grad_norm": 0.02909529209136963, "learning_rate": 0.0001904541158467978, "loss": 0.3049, "step": 7059 }, { "epoch": 0.5719377835385613, "grad_norm": 0.0325615294277668, "learning_rate": 0.00019044961519420318, "loss": 0.4295, "step": 7060 }, { "epoch": 0.5720187945560596, "grad_norm": 0.027235547080636024, "learning_rate": 0.00019044511454160854, "loss": 0.2808, "step": 7061 }, { "epoch": 0.572099805573558, "grad_norm": 0.03443726897239685, "learning_rate": 0.0001904406138890139, "loss": 0.3784, "step": 7062 }, { "epoch": 0.5721808165910564, "grad_norm": 0.02712845802307129, "learning_rate": 0.0001904361132364193, "loss": 0.35, "step": 7063 }, { "epoch": 0.5722618276085548, "grad_norm": 0.0366659052670002, "learning_rate": 0.00019043161258382467, "loss": 0.3964, "step": 7064 }, { "epoch": 0.5723428386260532, "grad_norm": 0.029586222022771835, "learning_rate": 0.00019042711193123003, "loss": 0.361, "step": 7065 }, { "epoch": 0.5724238496435515, "grad_norm": 0.027487218379974365, "learning_rate": 0.00019042261127863542, "loss": 0.3015, "step": 7066 }, { "epoch": 0.5725048606610499, "grad_norm": 0.031158898025751114, "learning_rate": 0.00019041811062604078, "loss": 0.3368, "step": 7067 }, { "epoch": 0.5725858716785482, "grad_norm": 0.031523216515779495, "learning_rate": 0.00019041360997344614, "loss": 0.3424, "step": 7068 }, { "epoch": 0.5726668826960467, "grad_norm": 0.035301659256219864, "learning_rate": 0.00019040910932085155, "loss": 0.3952, "step": 7069 }, { "epoch": 0.5727478937135451, "grad_norm": 0.03992548957467079, "learning_rate": 0.0001904046086682569, "loss": 0.4048, "step": 7070 }, { "epoch": 0.5728289047310434, "grad_norm": 0.030019249767065048, "learning_rate": 0.00019040010801566227, "loss": 0.3521, "step": 7071 }, { "epoch": 0.5729099157485418, "grad_norm": 0.030296877026557922, "learning_rate": 0.00019039560736306766, "loss": 0.3257, "step": 7072 }, { "epoch": 0.5729909267660401, "grad_norm": 0.029138630256056786, "learning_rate": 0.00019039110671047302, "loss": 0.3359, "step": 7073 }, { "epoch": 0.5730719377835386, "grad_norm": 0.03025815263390541, "learning_rate": 0.00019038660605787838, "loss": 0.3194, "step": 7074 }, { "epoch": 0.573152948801037, "grad_norm": 0.03255913779139519, "learning_rate": 0.0001903821054052838, "loss": 0.3692, "step": 7075 }, { "epoch": 0.5732339598185353, "grad_norm": 0.031642988324165344, "learning_rate": 0.00019037760475268915, "loss": 0.3636, "step": 7076 }, { "epoch": 0.5733149708360337, "grad_norm": 0.02967303805053234, "learning_rate": 0.0001903731041000945, "loss": 0.3692, "step": 7077 }, { "epoch": 0.5733959818535321, "grad_norm": 0.029209032654762268, "learning_rate": 0.0001903686034474999, "loss": 0.3398, "step": 7078 }, { "epoch": 0.5734769928710305, "grad_norm": 0.03341130539774895, "learning_rate": 0.00019036410279490526, "loss": 0.3902, "step": 7079 }, { "epoch": 0.5735580038885288, "grad_norm": 0.03325894474983215, "learning_rate": 0.00019035960214231062, "loss": 0.3536, "step": 7080 }, { "epoch": 0.5736390149060272, "grad_norm": 0.0343037024140358, "learning_rate": 0.00019035510148971603, "loss": 0.4056, "step": 7081 }, { "epoch": 0.5737200259235256, "grad_norm": 0.028654051944613457, "learning_rate": 0.0001903506008371214, "loss": 0.3126, "step": 7082 }, { "epoch": 0.573801036941024, "grad_norm": 0.03184157609939575, "learning_rate": 0.00019034610018452675, "loss": 0.3597, "step": 7083 }, { "epoch": 0.5738820479585224, "grad_norm": 0.03419782966375351, "learning_rate": 0.00019034159953193214, "loss": 0.3551, "step": 7084 }, { "epoch": 0.5739630589760207, "grad_norm": 0.029133254662156105, "learning_rate": 0.0001903370988793375, "loss": 0.2879, "step": 7085 }, { "epoch": 0.5740440699935191, "grad_norm": 0.031131476163864136, "learning_rate": 0.0001903325982267429, "loss": 0.3761, "step": 7086 }, { "epoch": 0.5741250810110174, "grad_norm": 0.031101131811738014, "learning_rate": 0.00019032809757414828, "loss": 0.3361, "step": 7087 }, { "epoch": 0.5742060920285159, "grad_norm": 0.02945782244205475, "learning_rate": 0.00019032359692155364, "loss": 0.3903, "step": 7088 }, { "epoch": 0.5742871030460143, "grad_norm": 0.029181165620684624, "learning_rate": 0.000190319096268959, "loss": 0.3134, "step": 7089 }, { "epoch": 0.5743681140635126, "grad_norm": 0.028031928464770317, "learning_rate": 0.00019031459561636438, "loss": 0.3439, "step": 7090 }, { "epoch": 0.574449125081011, "grad_norm": 0.032897673547267914, "learning_rate": 0.00019031009496376974, "loss": 0.396, "step": 7091 }, { "epoch": 0.5745301360985094, "grad_norm": 0.031826216727495193, "learning_rate": 0.00019030559431117513, "loss": 0.3312, "step": 7092 }, { "epoch": 0.5746111471160078, "grad_norm": 0.028905490413308144, "learning_rate": 0.00019030109365858052, "loss": 0.3671, "step": 7093 }, { "epoch": 0.5746921581335062, "grad_norm": 0.03372209519147873, "learning_rate": 0.00019029659300598588, "loss": 0.3251, "step": 7094 }, { "epoch": 0.5747731691510045, "grad_norm": 0.03266318887472153, "learning_rate": 0.00019029209235339124, "loss": 0.3785, "step": 7095 }, { "epoch": 0.5748541801685029, "grad_norm": 0.034305647015571594, "learning_rate": 0.00019028759170079663, "loss": 0.3506, "step": 7096 }, { "epoch": 0.5749351911860013, "grad_norm": 0.031909115612506866, "learning_rate": 0.00019028309104820199, "loss": 0.3645, "step": 7097 }, { "epoch": 0.5750162022034997, "grad_norm": 0.029492471367120743, "learning_rate": 0.00019027859039560737, "loss": 0.3546, "step": 7098 }, { "epoch": 0.575097213220998, "grad_norm": 0.027777908369898796, "learning_rate": 0.00019027408974301276, "loss": 0.3045, "step": 7099 }, { "epoch": 0.5751782242384964, "grad_norm": 0.029632898047566414, "learning_rate": 0.00019026958909041812, "loss": 0.376, "step": 7100 }, { "epoch": 0.5752592352559948, "grad_norm": 0.031298041343688965, "learning_rate": 0.00019026508843782348, "loss": 0.3796, "step": 7101 }, { "epoch": 0.5753402462734932, "grad_norm": 0.031095469370484352, "learning_rate": 0.00019026058778522887, "loss": 0.2957, "step": 7102 }, { "epoch": 0.5754212572909916, "grad_norm": 0.02930424176156521, "learning_rate": 0.00019025608713263423, "loss": 0.3761, "step": 7103 }, { "epoch": 0.5755022683084899, "grad_norm": 0.030215157195925713, "learning_rate": 0.00019025158648003961, "loss": 0.3833, "step": 7104 }, { "epoch": 0.5755832793259883, "grad_norm": 0.033876512199640274, "learning_rate": 0.000190247085827445, "loss": 0.3075, "step": 7105 }, { "epoch": 0.5756642903434868, "grad_norm": 0.03203409165143967, "learning_rate": 0.00019024258517485036, "loss": 0.3537, "step": 7106 }, { "epoch": 0.5757453013609851, "grad_norm": 0.029567353427410126, "learning_rate": 0.00019023808452225572, "loss": 0.3328, "step": 7107 }, { "epoch": 0.5758263123784835, "grad_norm": 0.0335480161011219, "learning_rate": 0.0001902335838696611, "loss": 0.3695, "step": 7108 }, { "epoch": 0.5759073233959818, "grad_norm": 0.03027655929327011, "learning_rate": 0.00019022908321706647, "loss": 0.2899, "step": 7109 }, { "epoch": 0.5759883344134802, "grad_norm": 0.03336027264595032, "learning_rate": 0.00019022458256447186, "loss": 0.3664, "step": 7110 }, { "epoch": 0.5760693454309787, "grad_norm": 0.03019268438220024, "learning_rate": 0.00019022008191187724, "loss": 0.3169, "step": 7111 }, { "epoch": 0.576150356448477, "grad_norm": 0.02925017476081848, "learning_rate": 0.0001902155812592826, "loss": 0.3746, "step": 7112 }, { "epoch": 0.5762313674659754, "grad_norm": 0.03173941746354103, "learning_rate": 0.00019021108060668796, "loss": 0.3079, "step": 7113 }, { "epoch": 0.5763123784834737, "grad_norm": 0.027620505541563034, "learning_rate": 0.00019020657995409335, "loss": 0.3396, "step": 7114 }, { "epoch": 0.5763933895009722, "grad_norm": 0.032209865748882294, "learning_rate": 0.00019020207930149874, "loss": 0.3093, "step": 7115 }, { "epoch": 0.5764744005184705, "grad_norm": 0.033973708748817444, "learning_rate": 0.0001901975786489041, "loss": 0.3483, "step": 7116 }, { "epoch": 0.5765554115359689, "grad_norm": 0.03276333212852478, "learning_rate": 0.00019019307799630948, "loss": 0.373, "step": 7117 }, { "epoch": 0.5766364225534673, "grad_norm": 0.02871558628976345, "learning_rate": 0.00019018857734371484, "loss": 0.3484, "step": 7118 }, { "epoch": 0.5767174335709656, "grad_norm": 0.03591687232255936, "learning_rate": 0.0001901840766911202, "loss": 0.3226, "step": 7119 }, { "epoch": 0.5767984445884641, "grad_norm": 0.02873329259455204, "learning_rate": 0.0001901795760385256, "loss": 0.3407, "step": 7120 }, { "epoch": 0.5768794556059624, "grad_norm": 0.02535742148756981, "learning_rate": 0.00019017507538593098, "loss": 0.3217, "step": 7121 }, { "epoch": 0.5769604666234608, "grad_norm": 0.03068891353905201, "learning_rate": 0.00019017057473333634, "loss": 0.3338, "step": 7122 }, { "epoch": 0.5770414776409591, "grad_norm": 0.033300936222076416, "learning_rate": 0.00019016607408074173, "loss": 0.3581, "step": 7123 }, { "epoch": 0.5771224886584575, "grad_norm": 0.029896607622504234, "learning_rate": 0.00019016157342814709, "loss": 0.3226, "step": 7124 }, { "epoch": 0.577203499675956, "grad_norm": 0.03473919630050659, "learning_rate": 0.00019015707277555245, "loss": 0.3508, "step": 7125 }, { "epoch": 0.5772845106934543, "grad_norm": 0.03545752912759781, "learning_rate": 0.00019015257212295783, "loss": 0.3609, "step": 7126 }, { "epoch": 0.5773655217109527, "grad_norm": 0.03342561423778534, "learning_rate": 0.00019014807147036322, "loss": 0.3596, "step": 7127 }, { "epoch": 0.577446532728451, "grad_norm": 0.032791558653116226, "learning_rate": 0.00019014357081776858, "loss": 0.3319, "step": 7128 }, { "epoch": 0.5775275437459495, "grad_norm": 0.029370320960879326, "learning_rate": 0.00019013907016517397, "loss": 0.355, "step": 7129 }, { "epoch": 0.5776085547634479, "grad_norm": 0.03259564936161041, "learning_rate": 0.00019013456951257933, "loss": 0.386, "step": 7130 }, { "epoch": 0.5776895657809462, "grad_norm": 0.03333577513694763, "learning_rate": 0.0001901300688599847, "loss": 0.3386, "step": 7131 }, { "epoch": 0.5777705767984446, "grad_norm": 0.029573451727628708, "learning_rate": 0.00019012556820739007, "loss": 0.3498, "step": 7132 }, { "epoch": 0.5778515878159429, "grad_norm": 0.030514564365148544, "learning_rate": 0.00019012106755479546, "loss": 0.3264, "step": 7133 }, { "epoch": 0.5779325988334414, "grad_norm": 0.029826374724507332, "learning_rate": 0.00019011656690220082, "loss": 0.3528, "step": 7134 }, { "epoch": 0.5780136098509397, "grad_norm": 0.02787317894399166, "learning_rate": 0.0001901120662496062, "loss": 0.3244, "step": 7135 }, { "epoch": 0.5780946208684381, "grad_norm": 0.03055379167199135, "learning_rate": 0.00019010756559701157, "loss": 0.3541, "step": 7136 }, { "epoch": 0.5781756318859365, "grad_norm": 0.02805292047560215, "learning_rate": 0.00019010306494441693, "loss": 0.3204, "step": 7137 }, { "epoch": 0.5782566429034348, "grad_norm": 0.036902476102113724, "learning_rate": 0.00019009856429182234, "loss": 0.3539, "step": 7138 }, { "epoch": 0.5783376539209333, "grad_norm": 0.031508274376392365, "learning_rate": 0.0001900940636392277, "loss": 0.3322, "step": 7139 }, { "epoch": 0.5784186649384316, "grad_norm": 0.03548916056752205, "learning_rate": 0.00019008956298663306, "loss": 0.3459, "step": 7140 }, { "epoch": 0.57849967595593, "grad_norm": 0.03395074978470802, "learning_rate": 0.00019008506233403845, "loss": 0.3777, "step": 7141 }, { "epoch": 0.5785806869734283, "grad_norm": 0.029324263334274292, "learning_rate": 0.0001900805616814438, "loss": 0.3407, "step": 7142 }, { "epoch": 0.5786616979909268, "grad_norm": 0.033915892243385315, "learning_rate": 0.00019007606102884917, "loss": 0.3588, "step": 7143 }, { "epoch": 0.5787427090084252, "grad_norm": 0.03298439085483551, "learning_rate": 0.00019007156037625459, "loss": 0.3362, "step": 7144 }, { "epoch": 0.5788237200259235, "grad_norm": 0.034808751195669174, "learning_rate": 0.00019006705972365995, "loss": 0.3884, "step": 7145 }, { "epoch": 0.5789047310434219, "grad_norm": 0.03707476705312729, "learning_rate": 0.0001900625590710653, "loss": 0.4259, "step": 7146 }, { "epoch": 0.5789857420609202, "grad_norm": 0.03482748568058014, "learning_rate": 0.0001900580584184707, "loss": 0.355, "step": 7147 }, { "epoch": 0.5790667530784187, "grad_norm": 0.028616994619369507, "learning_rate": 0.00019005355776587605, "loss": 0.3333, "step": 7148 }, { "epoch": 0.5791477640959171, "grad_norm": 0.03026905469596386, "learning_rate": 0.0001900490571132814, "loss": 0.2797, "step": 7149 }, { "epoch": 0.5792287751134154, "grad_norm": 0.03337970748543739, "learning_rate": 0.00019004455646068683, "loss": 0.3674, "step": 7150 }, { "epoch": 0.5793097861309138, "grad_norm": 0.030933627858757973, "learning_rate": 0.0001900400558080922, "loss": 0.3401, "step": 7151 }, { "epoch": 0.5793907971484121, "grad_norm": 0.03672805428504944, "learning_rate": 0.00019003555515549755, "loss": 0.3895, "step": 7152 }, { "epoch": 0.5794718081659106, "grad_norm": 0.034799739718437195, "learning_rate": 0.00019003105450290293, "loss": 0.3476, "step": 7153 }, { "epoch": 0.579552819183409, "grad_norm": 0.02711411938071251, "learning_rate": 0.0001900265538503083, "loss": 0.3195, "step": 7154 }, { "epoch": 0.5796338302009073, "grad_norm": 0.026923276484012604, "learning_rate": 0.00019002205319771365, "loss": 0.3521, "step": 7155 }, { "epoch": 0.5797148412184057, "grad_norm": 0.03141951188445091, "learning_rate": 0.00019001755254511907, "loss": 0.3422, "step": 7156 }, { "epoch": 0.5797958522359041, "grad_norm": 0.02795341983437538, "learning_rate": 0.00019001305189252443, "loss": 0.3449, "step": 7157 }, { "epoch": 0.5798768632534025, "grad_norm": 0.028946880251169205, "learning_rate": 0.0001900085512399298, "loss": 0.3549, "step": 7158 }, { "epoch": 0.5799578742709008, "grad_norm": 0.029874345287680626, "learning_rate": 0.00019000405058733518, "loss": 0.3188, "step": 7159 }, { "epoch": 0.5800388852883992, "grad_norm": 0.029896339401602745, "learning_rate": 0.00018999954993474054, "loss": 0.3976, "step": 7160 }, { "epoch": 0.5801198963058976, "grad_norm": 0.030478930100798607, "learning_rate": 0.00018999504928214592, "loss": 0.3394, "step": 7161 }, { "epoch": 0.580200907323396, "grad_norm": 0.02973802201449871, "learning_rate": 0.0001899905486295513, "loss": 0.3528, "step": 7162 }, { "epoch": 0.5802819183408944, "grad_norm": 0.02996090054512024, "learning_rate": 0.00018998604797695667, "loss": 0.3468, "step": 7163 }, { "epoch": 0.5803629293583927, "grad_norm": 0.026388095691800117, "learning_rate": 0.00018998154732436203, "loss": 0.3132, "step": 7164 }, { "epoch": 0.5804439403758911, "grad_norm": 0.029814952984452248, "learning_rate": 0.00018997704667176742, "loss": 0.3203, "step": 7165 }, { "epoch": 0.5805249513933896, "grad_norm": 0.033563531935214996, "learning_rate": 0.00018997254601917278, "loss": 0.3301, "step": 7166 }, { "epoch": 0.5806059624108879, "grad_norm": 0.03099728748202324, "learning_rate": 0.00018996804536657816, "loss": 0.3577, "step": 7167 }, { "epoch": 0.5806869734283863, "grad_norm": 0.03393007069826126, "learning_rate": 0.00018996354471398355, "loss": 0.3309, "step": 7168 }, { "epoch": 0.5807679844458846, "grad_norm": 0.02811267040669918, "learning_rate": 0.0001899590440613889, "loss": 0.3344, "step": 7169 }, { "epoch": 0.580848995463383, "grad_norm": 0.030411504209041595, "learning_rate": 0.00018995454340879427, "loss": 0.3786, "step": 7170 }, { "epoch": 0.5809300064808814, "grad_norm": 0.0271066315472126, "learning_rate": 0.00018995004275619966, "loss": 0.3355, "step": 7171 }, { "epoch": 0.5810110174983798, "grad_norm": 0.03240623325109482, "learning_rate": 0.00018994554210360502, "loss": 0.3215, "step": 7172 }, { "epoch": 0.5810920285158782, "grad_norm": 0.03191445395350456, "learning_rate": 0.0001899410414510104, "loss": 0.3229, "step": 7173 }, { "epoch": 0.5811730395333765, "grad_norm": 0.029774196445941925, "learning_rate": 0.0001899365407984158, "loss": 0.3283, "step": 7174 }, { "epoch": 0.5812540505508749, "grad_norm": 0.03459349274635315, "learning_rate": 0.00018993204014582115, "loss": 0.3482, "step": 7175 }, { "epoch": 0.5813350615683733, "grad_norm": 0.033518653362989426, "learning_rate": 0.0001899275394932265, "loss": 0.3709, "step": 7176 }, { "epoch": 0.5814160725858717, "grad_norm": 0.03485652804374695, "learning_rate": 0.0001899230388406319, "loss": 0.3757, "step": 7177 }, { "epoch": 0.58149708360337, "grad_norm": 0.03507082536816597, "learning_rate": 0.00018991853818803726, "loss": 0.3428, "step": 7178 }, { "epoch": 0.5815780946208684, "grad_norm": 0.028515536338090897, "learning_rate": 0.00018991403753544265, "loss": 0.3469, "step": 7179 }, { "epoch": 0.5816591056383669, "grad_norm": 0.0331462137401104, "learning_rate": 0.00018990953688284803, "loss": 0.3641, "step": 7180 }, { "epoch": 0.5817401166558652, "grad_norm": 0.028841599822044373, "learning_rate": 0.0001899050362302534, "loss": 0.3821, "step": 7181 }, { "epoch": 0.5818211276733636, "grad_norm": 0.03024439513683319, "learning_rate": 0.00018990053557765875, "loss": 0.4041, "step": 7182 }, { "epoch": 0.5819021386908619, "grad_norm": 0.03342823311686516, "learning_rate": 0.00018989603492506414, "loss": 0.3355, "step": 7183 }, { "epoch": 0.5819831497083603, "grad_norm": 0.0266738161444664, "learning_rate": 0.0001898915342724695, "loss": 0.3015, "step": 7184 }, { "epoch": 0.5820641607258588, "grad_norm": 0.03139263391494751, "learning_rate": 0.0001898870336198749, "loss": 0.3446, "step": 7185 }, { "epoch": 0.5821451717433571, "grad_norm": 0.029045145958662033, "learning_rate": 0.00018988253296728028, "loss": 0.3601, "step": 7186 }, { "epoch": 0.5822261827608555, "grad_norm": 0.028520744293928146, "learning_rate": 0.00018987803231468564, "loss": 0.3602, "step": 7187 }, { "epoch": 0.5823071937783538, "grad_norm": 0.02972353622317314, "learning_rate": 0.000189873531662091, "loss": 0.3549, "step": 7188 }, { "epoch": 0.5823882047958522, "grad_norm": 0.029112640768289566, "learning_rate": 0.00018986903100949638, "loss": 0.2799, "step": 7189 }, { "epoch": 0.5824692158133506, "grad_norm": 0.029921172186732292, "learning_rate": 0.00018986453035690177, "loss": 0.353, "step": 7190 }, { "epoch": 0.582550226830849, "grad_norm": 0.03464297205209732, "learning_rate": 0.00018986002970430713, "loss": 0.3872, "step": 7191 }, { "epoch": 0.5826312378483474, "grad_norm": 0.028278376907110214, "learning_rate": 0.00018985552905171252, "loss": 0.3366, "step": 7192 }, { "epoch": 0.5827122488658457, "grad_norm": 0.0291961207985878, "learning_rate": 0.00018985102839911788, "loss": 0.3643, "step": 7193 }, { "epoch": 0.5827932598833442, "grad_norm": 0.029754353687167168, "learning_rate": 0.00018984652774652324, "loss": 0.2975, "step": 7194 }, { "epoch": 0.5828742709008425, "grad_norm": 0.036267850548028946, "learning_rate": 0.00018984202709392863, "loss": 0.405, "step": 7195 }, { "epoch": 0.5829552819183409, "grad_norm": 0.03386032581329346, "learning_rate": 0.000189837526441334, "loss": 0.396, "step": 7196 }, { "epoch": 0.5830362929358393, "grad_norm": 0.030160658061504364, "learning_rate": 0.00018983302578873937, "loss": 0.3308, "step": 7197 }, { "epoch": 0.5831173039533376, "grad_norm": 0.03126070648431778, "learning_rate": 0.00018982852513614476, "loss": 0.377, "step": 7198 }, { "epoch": 0.5831983149708361, "grad_norm": 0.031703200191259384, "learning_rate": 0.00018982402448355012, "loss": 0.3651, "step": 7199 }, { "epoch": 0.5832793259883344, "grad_norm": 0.03930109739303589, "learning_rate": 0.00018981952383095548, "loss": 0.3751, "step": 7200 }, { "epoch": 0.5833603370058328, "grad_norm": 0.033295638859272, "learning_rate": 0.00018981502317836087, "loss": 0.3573, "step": 7201 }, { "epoch": 0.5834413480233311, "grad_norm": 0.030004773288965225, "learning_rate": 0.00018981052252576625, "loss": 0.3387, "step": 7202 }, { "epoch": 0.5835223590408296, "grad_norm": 0.028721129521727562, "learning_rate": 0.00018980602187317161, "loss": 0.3108, "step": 7203 }, { "epoch": 0.583603370058328, "grad_norm": 0.030161743983626366, "learning_rate": 0.000189801521220577, "loss": 0.3435, "step": 7204 }, { "epoch": 0.5836843810758263, "grad_norm": 0.02853168360888958, "learning_rate": 0.00018979702056798236, "loss": 0.3471, "step": 7205 }, { "epoch": 0.5837653920933247, "grad_norm": 0.030113236978650093, "learning_rate": 0.00018979251991538772, "loss": 0.3293, "step": 7206 }, { "epoch": 0.583846403110823, "grad_norm": 0.03496495261788368, "learning_rate": 0.0001897880192627931, "loss": 0.389, "step": 7207 }, { "epoch": 0.5839274141283215, "grad_norm": 0.03152168542146683, "learning_rate": 0.0001897835186101985, "loss": 0.3555, "step": 7208 }, { "epoch": 0.5840084251458199, "grad_norm": 0.03420482948422432, "learning_rate": 0.00018977901795760386, "loss": 0.3499, "step": 7209 }, { "epoch": 0.5840894361633182, "grad_norm": 0.02827419713139534, "learning_rate": 0.00018977451730500924, "loss": 0.3431, "step": 7210 }, { "epoch": 0.5841704471808166, "grad_norm": 0.032837990671396255, "learning_rate": 0.0001897700166524146, "loss": 0.3529, "step": 7211 }, { "epoch": 0.5842514581983149, "grad_norm": 0.0367714948952198, "learning_rate": 0.00018976551599981996, "loss": 0.3061, "step": 7212 }, { "epoch": 0.5843324692158134, "grad_norm": 0.02832326851785183, "learning_rate": 0.00018976101534722535, "loss": 0.312, "step": 7213 }, { "epoch": 0.5844134802333117, "grad_norm": 0.03139325976371765, "learning_rate": 0.00018975651469463074, "loss": 0.3408, "step": 7214 }, { "epoch": 0.5844944912508101, "grad_norm": 0.03237767145037651, "learning_rate": 0.0001897520140420361, "loss": 0.3624, "step": 7215 }, { "epoch": 0.5845755022683085, "grad_norm": 0.03246236592531204, "learning_rate": 0.00018974751338944148, "loss": 0.344, "step": 7216 }, { "epoch": 0.5846565132858069, "grad_norm": 0.033114735037088394, "learning_rate": 0.00018974301273684684, "loss": 0.3782, "step": 7217 }, { "epoch": 0.5847375243033053, "grad_norm": 0.037286460399627686, "learning_rate": 0.0001897385120842522, "loss": 0.3727, "step": 7218 }, { "epoch": 0.5848185353208036, "grad_norm": 0.03592041879892349, "learning_rate": 0.00018973401143165762, "loss": 0.4344, "step": 7219 }, { "epoch": 0.584899546338302, "grad_norm": 0.03247380256652832, "learning_rate": 0.00018972951077906298, "loss": 0.3834, "step": 7220 }, { "epoch": 0.5849805573558003, "grad_norm": 0.03495294228196144, "learning_rate": 0.00018972501012646834, "loss": 0.4064, "step": 7221 }, { "epoch": 0.5850615683732988, "grad_norm": 0.028913801535964012, "learning_rate": 0.00018972050947387373, "loss": 0.3313, "step": 7222 }, { "epoch": 0.5851425793907972, "grad_norm": 0.028750916942954063, "learning_rate": 0.00018971600882127909, "loss": 0.3515, "step": 7223 }, { "epoch": 0.5852235904082955, "grad_norm": 0.03291678428649902, "learning_rate": 0.00018971150816868445, "loss": 0.3918, "step": 7224 }, { "epoch": 0.5853046014257939, "grad_norm": 0.030185990035533905, "learning_rate": 0.00018970700751608986, "loss": 0.3017, "step": 7225 }, { "epoch": 0.5853856124432922, "grad_norm": 0.03135082498192787, "learning_rate": 0.00018970250686349522, "loss": 0.3407, "step": 7226 }, { "epoch": 0.5854666234607907, "grad_norm": 0.03142891451716423, "learning_rate": 0.00018969800621090058, "loss": 0.3675, "step": 7227 }, { "epoch": 0.5855476344782891, "grad_norm": 0.03312395513057709, "learning_rate": 0.00018969350555830597, "loss": 0.3129, "step": 7228 }, { "epoch": 0.5856286454957874, "grad_norm": 0.030316771939396858, "learning_rate": 0.00018968900490571133, "loss": 0.3024, "step": 7229 }, { "epoch": 0.5857096565132858, "grad_norm": 0.03373432531952858, "learning_rate": 0.00018968450425311671, "loss": 0.3566, "step": 7230 }, { "epoch": 0.5857906675307842, "grad_norm": 0.03295702487230301, "learning_rate": 0.0001896800036005221, "loss": 0.3486, "step": 7231 }, { "epoch": 0.5858716785482826, "grad_norm": 0.03206154331564903, "learning_rate": 0.00018967550294792746, "loss": 0.3414, "step": 7232 }, { "epoch": 0.585952689565781, "grad_norm": 0.036674872040748596, "learning_rate": 0.00018967100229533282, "loss": 0.3827, "step": 7233 }, { "epoch": 0.5860337005832793, "grad_norm": 0.028908833861351013, "learning_rate": 0.0001896665016427382, "loss": 0.3596, "step": 7234 }, { "epoch": 0.5861147116007777, "grad_norm": 0.03456667810678482, "learning_rate": 0.00018966200099014357, "loss": 0.3575, "step": 7235 }, { "epoch": 0.5861957226182761, "grad_norm": 0.02778259664773941, "learning_rate": 0.00018965750033754896, "loss": 0.3151, "step": 7236 }, { "epoch": 0.5862767336357745, "grad_norm": 0.0309758223593235, "learning_rate": 0.00018965299968495434, "loss": 0.3201, "step": 7237 }, { "epoch": 0.5863577446532728, "grad_norm": 0.02835778519511223, "learning_rate": 0.0001896484990323597, "loss": 0.3391, "step": 7238 }, { "epoch": 0.5864387556707712, "grad_norm": 0.03552369028329849, "learning_rate": 0.00018964399837976506, "loss": 0.4067, "step": 7239 }, { "epoch": 0.5865197666882696, "grad_norm": 0.03288602828979492, "learning_rate": 0.00018963949772717045, "loss": 0.3515, "step": 7240 }, { "epoch": 0.586600777705768, "grad_norm": 0.03445067256689072, "learning_rate": 0.0001896349970745758, "loss": 0.3746, "step": 7241 }, { "epoch": 0.5866817887232664, "grad_norm": 0.02727990224957466, "learning_rate": 0.0001896304964219812, "loss": 0.3459, "step": 7242 }, { "epoch": 0.5867627997407647, "grad_norm": 0.027717137709259987, "learning_rate": 0.00018962599576938659, "loss": 0.3723, "step": 7243 }, { "epoch": 0.5868438107582631, "grad_norm": 0.03080059587955475, "learning_rate": 0.00018962149511679195, "loss": 0.3556, "step": 7244 }, { "epoch": 0.5869248217757616, "grad_norm": 0.03022826835513115, "learning_rate": 0.0001896169944641973, "loss": 0.3416, "step": 7245 }, { "epoch": 0.5870058327932599, "grad_norm": 0.03362521901726723, "learning_rate": 0.0001896124938116027, "loss": 0.3765, "step": 7246 }, { "epoch": 0.5870868438107583, "grad_norm": 0.02809925563633442, "learning_rate": 0.00018960799315900805, "loss": 0.3707, "step": 7247 }, { "epoch": 0.5871678548282566, "grad_norm": 0.02911153994500637, "learning_rate": 0.00018960349250641344, "loss": 0.326, "step": 7248 }, { "epoch": 0.587248865845755, "grad_norm": 0.027382275089621544, "learning_rate": 0.00018959899185381883, "loss": 0.2986, "step": 7249 }, { "epoch": 0.5873298768632534, "grad_norm": 0.03484692424535751, "learning_rate": 0.0001895944912012242, "loss": 0.3613, "step": 7250 }, { "epoch": 0.5874108878807518, "grad_norm": 0.03420795127749443, "learning_rate": 0.00018958999054862955, "loss": 0.3925, "step": 7251 }, { "epoch": 0.5874918988982502, "grad_norm": 0.03072218969464302, "learning_rate": 0.00018958548989603493, "loss": 0.3445, "step": 7252 }, { "epoch": 0.5875729099157485, "grad_norm": 0.028487669304013252, "learning_rate": 0.0001895809892434403, "loss": 0.3237, "step": 7253 }, { "epoch": 0.587653920933247, "grad_norm": 0.030667198821902275, "learning_rate": 0.00018957648859084568, "loss": 0.3768, "step": 7254 }, { "epoch": 0.5877349319507453, "grad_norm": 0.03093591146171093, "learning_rate": 0.00018957198793825107, "loss": 0.3227, "step": 7255 }, { "epoch": 0.5878159429682437, "grad_norm": 0.02597019262611866, "learning_rate": 0.00018956748728565643, "loss": 0.3214, "step": 7256 }, { "epoch": 0.587896953985742, "grad_norm": 0.02928873524069786, "learning_rate": 0.0001895629866330618, "loss": 0.35, "step": 7257 }, { "epoch": 0.5879779650032404, "grad_norm": 0.03613502159714699, "learning_rate": 0.00018955848598046718, "loss": 0.3712, "step": 7258 }, { "epoch": 0.5880589760207389, "grad_norm": 0.0315786674618721, "learning_rate": 0.00018955398532787254, "loss": 0.3383, "step": 7259 }, { "epoch": 0.5881399870382372, "grad_norm": 0.03728878125548363, "learning_rate": 0.00018954948467527792, "loss": 0.3634, "step": 7260 }, { "epoch": 0.5882209980557356, "grad_norm": 0.03164689242839813, "learning_rate": 0.0001895449840226833, "loss": 0.3308, "step": 7261 }, { "epoch": 0.5883020090732339, "grad_norm": 0.03446730971336365, "learning_rate": 0.00018954048337008867, "loss": 0.4361, "step": 7262 }, { "epoch": 0.5883830200907323, "grad_norm": 0.036188770085573196, "learning_rate": 0.00018953598271749403, "loss": 0.4244, "step": 7263 }, { "epoch": 0.5884640311082308, "grad_norm": 0.031748078763484955, "learning_rate": 0.00018953148206489942, "loss": 0.365, "step": 7264 }, { "epoch": 0.5885450421257291, "grad_norm": 0.03347034379839897, "learning_rate": 0.00018952698141230478, "loss": 0.3854, "step": 7265 }, { "epoch": 0.5886260531432275, "grad_norm": 0.03278379887342453, "learning_rate": 0.00018952248075971016, "loss": 0.3878, "step": 7266 }, { "epoch": 0.5887070641607258, "grad_norm": 0.032228413969278336, "learning_rate": 0.00018951798010711555, "loss": 0.3348, "step": 7267 }, { "epoch": 0.5887880751782243, "grad_norm": 0.03474995121359825, "learning_rate": 0.0001895134794545209, "loss": 0.4198, "step": 7268 }, { "epoch": 0.5888690861957226, "grad_norm": 0.03140547126531601, "learning_rate": 0.00018950897880192627, "loss": 0.3601, "step": 7269 }, { "epoch": 0.588950097213221, "grad_norm": 0.02940104901790619, "learning_rate": 0.00018950447814933166, "loss": 0.3501, "step": 7270 }, { "epoch": 0.5890311082307194, "grad_norm": 0.044192470610141754, "learning_rate": 0.00018949997749673705, "loss": 0.3985, "step": 7271 }, { "epoch": 0.5891121192482177, "grad_norm": 0.030684489756822586, "learning_rate": 0.0001894954768441424, "loss": 0.359, "step": 7272 }, { "epoch": 0.5891931302657162, "grad_norm": 0.0336628220975399, "learning_rate": 0.0001894909761915478, "loss": 0.3745, "step": 7273 }, { "epoch": 0.5892741412832145, "grad_norm": 0.028922492638230324, "learning_rate": 0.00018948647553895315, "loss": 0.2946, "step": 7274 }, { "epoch": 0.5893551523007129, "grad_norm": 0.032071251422166824, "learning_rate": 0.00018948197488635851, "loss": 0.3145, "step": 7275 }, { "epoch": 0.5894361633182112, "grad_norm": 0.03068896383047104, "learning_rate": 0.0001894774742337639, "loss": 0.3355, "step": 7276 }, { "epoch": 0.5895171743357096, "grad_norm": 0.03323281556367874, "learning_rate": 0.0001894729735811693, "loss": 0.3663, "step": 7277 }, { "epoch": 0.5895981853532081, "grad_norm": 0.035299479961395264, "learning_rate": 0.00018946847292857465, "loss": 0.3555, "step": 7278 }, { "epoch": 0.5896791963707064, "grad_norm": 0.03173135221004486, "learning_rate": 0.00018946397227598004, "loss": 0.3478, "step": 7279 }, { "epoch": 0.5897602073882048, "grad_norm": 0.03322529047727585, "learning_rate": 0.0001894594716233854, "loss": 0.399, "step": 7280 }, { "epoch": 0.5898412184057031, "grad_norm": 0.031474243849515915, "learning_rate": 0.00018945497097079076, "loss": 0.3601, "step": 7281 }, { "epoch": 0.5899222294232016, "grad_norm": 0.03509430214762688, "learning_rate": 0.00018945047031819614, "loss": 0.3971, "step": 7282 }, { "epoch": 0.5900032404407, "grad_norm": 0.033451810479164124, "learning_rate": 0.00018944596966560153, "loss": 0.3539, "step": 7283 }, { "epoch": 0.5900842514581983, "grad_norm": 0.028561068698763847, "learning_rate": 0.0001894414690130069, "loss": 0.3101, "step": 7284 }, { "epoch": 0.5901652624756967, "grad_norm": 0.0287350844591856, "learning_rate": 0.00018943696836041228, "loss": 0.3632, "step": 7285 }, { "epoch": 0.590246273493195, "grad_norm": 0.030074043199419975, "learning_rate": 0.00018943246770781764, "loss": 0.334, "step": 7286 }, { "epoch": 0.5903272845106935, "grad_norm": 0.03051457181572914, "learning_rate": 0.000189427967055223, "loss": 0.341, "step": 7287 }, { "epoch": 0.5904082955281919, "grad_norm": 0.028257975354790688, "learning_rate": 0.00018942346640262838, "loss": 0.3435, "step": 7288 }, { "epoch": 0.5904893065456902, "grad_norm": 0.026414060965180397, "learning_rate": 0.00018941896575003377, "loss": 0.2944, "step": 7289 }, { "epoch": 0.5905703175631886, "grad_norm": 0.029365338385105133, "learning_rate": 0.00018941446509743913, "loss": 0.3395, "step": 7290 }, { "epoch": 0.5906513285806869, "grad_norm": 0.03021685592830181, "learning_rate": 0.00018940996444484452, "loss": 0.3159, "step": 7291 }, { "epoch": 0.5907323395981854, "grad_norm": 0.026979271322488785, "learning_rate": 0.00018940546379224988, "loss": 0.3154, "step": 7292 }, { "epoch": 0.5908133506156837, "grad_norm": 0.03209391236305237, "learning_rate": 0.00018940096313965524, "loss": 0.368, "step": 7293 }, { "epoch": 0.5908943616331821, "grad_norm": 0.032004281878471375, "learning_rate": 0.00018939646248706063, "loss": 0.3188, "step": 7294 }, { "epoch": 0.5909753726506805, "grad_norm": 0.032680317759513855, "learning_rate": 0.000189391961834466, "loss": 0.3584, "step": 7295 }, { "epoch": 0.5910563836681789, "grad_norm": 0.03193636238574982, "learning_rate": 0.00018938746118187137, "loss": 0.3295, "step": 7296 }, { "epoch": 0.5911373946856773, "grad_norm": 0.029368802905082703, "learning_rate": 0.00018938296052927676, "loss": 0.3596, "step": 7297 }, { "epoch": 0.5912184057031756, "grad_norm": 0.030950840562582016, "learning_rate": 0.00018937845987668212, "loss": 0.3813, "step": 7298 }, { "epoch": 0.591299416720674, "grad_norm": 0.03713708743453026, "learning_rate": 0.0001893739592240875, "loss": 0.415, "step": 7299 }, { "epoch": 0.5913804277381723, "grad_norm": 0.02990836836397648, "learning_rate": 0.0001893694585714929, "loss": 0.3518, "step": 7300 }, { "epoch": 0.5914614387556708, "grad_norm": 0.029787639155983925, "learning_rate": 0.00018936495791889825, "loss": 0.3212, "step": 7301 }, { "epoch": 0.5915424497731692, "grad_norm": 0.03178705275058746, "learning_rate": 0.00018936045726630361, "loss": 0.3802, "step": 7302 }, { "epoch": 0.5916234607906675, "grad_norm": 0.034910041838884354, "learning_rate": 0.000189355956613709, "loss": 0.3656, "step": 7303 }, { "epoch": 0.5917044718081659, "grad_norm": 0.02986535057425499, "learning_rate": 0.00018935145596111436, "loss": 0.322, "step": 7304 }, { "epoch": 0.5917854828256643, "grad_norm": 0.03602313622832298, "learning_rate": 0.00018934695530851975, "loss": 0.3598, "step": 7305 }, { "epoch": 0.5918664938431627, "grad_norm": 0.028011471033096313, "learning_rate": 0.00018934245465592514, "loss": 0.3045, "step": 7306 }, { "epoch": 0.5919475048606611, "grad_norm": 0.03489091992378235, "learning_rate": 0.0001893379540033305, "loss": 0.3552, "step": 7307 }, { "epoch": 0.5920285158781594, "grad_norm": 0.033708952367305756, "learning_rate": 0.00018933345335073586, "loss": 0.3914, "step": 7308 }, { "epoch": 0.5921095268956578, "grad_norm": 0.03242769464850426, "learning_rate": 0.00018932895269814124, "loss": 0.35, "step": 7309 }, { "epoch": 0.5921905379131562, "grad_norm": 0.03306904435157776, "learning_rate": 0.0001893244520455466, "loss": 0.3893, "step": 7310 }, { "epoch": 0.5922715489306546, "grad_norm": 0.033809393644332886, "learning_rate": 0.000189319951392952, "loss": 0.4143, "step": 7311 }, { "epoch": 0.592352559948153, "grad_norm": 0.030917203053832054, "learning_rate": 0.00018931545074035738, "loss": 0.3322, "step": 7312 }, { "epoch": 0.5924335709656513, "grad_norm": 0.03382820636034012, "learning_rate": 0.00018931095008776274, "loss": 0.3669, "step": 7313 }, { "epoch": 0.5925145819831497, "grad_norm": 0.03001822717487812, "learning_rate": 0.0001893064494351681, "loss": 0.3484, "step": 7314 }, { "epoch": 0.5925955930006481, "grad_norm": 0.03102373518049717, "learning_rate": 0.00018930194878257348, "loss": 0.4064, "step": 7315 }, { "epoch": 0.5926766040181465, "grad_norm": 0.03309882804751396, "learning_rate": 0.00018929744812997884, "loss": 0.3638, "step": 7316 }, { "epoch": 0.5927576150356448, "grad_norm": 0.03323843702673912, "learning_rate": 0.00018929294747738423, "loss": 0.4001, "step": 7317 }, { "epoch": 0.5928386260531432, "grad_norm": 0.030602650716900826, "learning_rate": 0.00018928844682478962, "loss": 0.3165, "step": 7318 }, { "epoch": 0.5929196370706417, "grad_norm": 0.025571037083864212, "learning_rate": 0.00018928394617219498, "loss": 0.2903, "step": 7319 }, { "epoch": 0.59300064808814, "grad_norm": 0.03153804689645767, "learning_rate": 0.00018927944551960034, "loss": 0.3626, "step": 7320 }, { "epoch": 0.5930816591056384, "grad_norm": 0.030782267451286316, "learning_rate": 0.00018927494486700573, "loss": 0.3972, "step": 7321 }, { "epoch": 0.5931626701231367, "grad_norm": 0.030006734654307365, "learning_rate": 0.0001892704442144111, "loss": 0.3545, "step": 7322 }, { "epoch": 0.5932436811406351, "grad_norm": 0.028132524341344833, "learning_rate": 0.00018926594356181647, "loss": 0.2896, "step": 7323 }, { "epoch": 0.5933246921581335, "grad_norm": 0.033880989998579025, "learning_rate": 0.00018926144290922186, "loss": 0.4088, "step": 7324 }, { "epoch": 0.5934057031756319, "grad_norm": 0.03421669453382492, "learning_rate": 0.00018925694225662722, "loss": 0.3562, "step": 7325 }, { "epoch": 0.5934867141931303, "grad_norm": 0.03325583040714264, "learning_rate": 0.00018925244160403258, "loss": 0.3458, "step": 7326 }, { "epoch": 0.5935677252106286, "grad_norm": 0.030235623940825462, "learning_rate": 0.00018924794095143797, "loss": 0.3524, "step": 7327 }, { "epoch": 0.593648736228127, "grad_norm": 0.02768390066921711, "learning_rate": 0.00018924344029884333, "loss": 0.3424, "step": 7328 }, { "epoch": 0.5937297472456254, "grad_norm": 0.03422306105494499, "learning_rate": 0.00018923893964624872, "loss": 0.3298, "step": 7329 }, { "epoch": 0.5938107582631238, "grad_norm": 0.029942166060209274, "learning_rate": 0.0001892344389936541, "loss": 0.3097, "step": 7330 }, { "epoch": 0.5938917692806222, "grad_norm": 0.03228619322180748, "learning_rate": 0.00018922993834105946, "loss": 0.3228, "step": 7331 }, { "epoch": 0.5939727802981205, "grad_norm": 0.029120970517396927, "learning_rate": 0.00018922543768846482, "loss": 0.3278, "step": 7332 }, { "epoch": 0.594053791315619, "grad_norm": 0.03116501122713089, "learning_rate": 0.0001892209370358702, "loss": 0.3299, "step": 7333 }, { "epoch": 0.5941348023331173, "grad_norm": 0.026393789798021317, "learning_rate": 0.00018921643638327557, "loss": 0.296, "step": 7334 }, { "epoch": 0.5942158133506157, "grad_norm": 0.03156831115484238, "learning_rate": 0.00018921193573068096, "loss": 0.3492, "step": 7335 }, { "epoch": 0.594296824368114, "grad_norm": 0.028718816116452217, "learning_rate": 0.00018920743507808634, "loss": 0.3266, "step": 7336 }, { "epoch": 0.5943778353856124, "grad_norm": 0.032878655940294266, "learning_rate": 0.0001892029344254917, "loss": 0.3585, "step": 7337 }, { "epoch": 0.5944588464031109, "grad_norm": 0.028278445824980736, "learning_rate": 0.00018919843377289706, "loss": 0.3349, "step": 7338 }, { "epoch": 0.5945398574206092, "grad_norm": 0.03184044361114502, "learning_rate": 0.00018919393312030245, "loss": 0.345, "step": 7339 }, { "epoch": 0.5946208684381076, "grad_norm": 0.034407783299684525, "learning_rate": 0.0001891894324677078, "loss": 0.3698, "step": 7340 }, { "epoch": 0.5947018794556059, "grad_norm": 0.02938602864742279, "learning_rate": 0.0001891849318151132, "loss": 0.322, "step": 7341 }, { "epoch": 0.5947828904731044, "grad_norm": 0.02893815189599991, "learning_rate": 0.00018918043116251859, "loss": 0.3289, "step": 7342 }, { "epoch": 0.5948639014906028, "grad_norm": 0.03124549798667431, "learning_rate": 0.00018917593050992395, "loss": 0.3528, "step": 7343 }, { "epoch": 0.5949449125081011, "grad_norm": 0.03118320368230343, "learning_rate": 0.0001891714298573293, "loss": 0.3395, "step": 7344 }, { "epoch": 0.5950259235255995, "grad_norm": 0.034015022218227386, "learning_rate": 0.0001891669292047347, "loss": 0.3687, "step": 7345 }, { "epoch": 0.5951069345430978, "grad_norm": 0.028784219175577164, "learning_rate": 0.00018916242855214005, "loss": 0.3328, "step": 7346 }, { "epoch": 0.5951879455605963, "grad_norm": 0.030091838911175728, "learning_rate": 0.00018915792789954544, "loss": 0.3041, "step": 7347 }, { "epoch": 0.5952689565780946, "grad_norm": 0.03068556636571884, "learning_rate": 0.00018915342724695083, "loss": 0.3472, "step": 7348 }, { "epoch": 0.595349967595593, "grad_norm": 0.031809356063604355, "learning_rate": 0.0001891489265943562, "loss": 0.3515, "step": 7349 }, { "epoch": 0.5954309786130914, "grad_norm": 0.03569292649626732, "learning_rate": 0.00018914442594176155, "loss": 0.3772, "step": 7350 }, { "epoch": 0.5955119896305897, "grad_norm": 0.029809661209583282, "learning_rate": 0.00018913992528916693, "loss": 0.3893, "step": 7351 }, { "epoch": 0.5955930006480882, "grad_norm": 0.03562194108963013, "learning_rate": 0.00018913542463657232, "loss": 0.3478, "step": 7352 }, { "epoch": 0.5956740116655865, "grad_norm": 0.030969291925430298, "learning_rate": 0.00018913092398397768, "loss": 0.3484, "step": 7353 }, { "epoch": 0.5957550226830849, "grad_norm": 0.02741086110472679, "learning_rate": 0.00018912642333138307, "loss": 0.3232, "step": 7354 }, { "epoch": 0.5958360337005832, "grad_norm": 0.03458785638213158, "learning_rate": 0.00018912192267878843, "loss": 0.3574, "step": 7355 }, { "epoch": 0.5959170447180817, "grad_norm": 0.03107859566807747, "learning_rate": 0.0001891174220261938, "loss": 0.3316, "step": 7356 }, { "epoch": 0.5959980557355801, "grad_norm": 0.032016459852457047, "learning_rate": 0.00018911292137359918, "loss": 0.3892, "step": 7357 }, { "epoch": 0.5960790667530784, "grad_norm": 0.02998381108045578, "learning_rate": 0.00018910842072100456, "loss": 0.3514, "step": 7358 }, { "epoch": 0.5961600777705768, "grad_norm": 0.03130162134766579, "learning_rate": 0.00018910392006840992, "loss": 0.3637, "step": 7359 }, { "epoch": 0.5962410887880751, "grad_norm": 0.030972221866250038, "learning_rate": 0.0001890994194158153, "loss": 0.3611, "step": 7360 }, { "epoch": 0.5963220998055736, "grad_norm": 0.03162587434053421, "learning_rate": 0.00018909491876322067, "loss": 0.4011, "step": 7361 }, { "epoch": 0.596403110823072, "grad_norm": 0.030603982508182526, "learning_rate": 0.00018909041811062606, "loss": 0.3557, "step": 7362 }, { "epoch": 0.5964841218405703, "grad_norm": 0.029461462050676346, "learning_rate": 0.00018908591745803142, "loss": 0.3157, "step": 7363 }, { "epoch": 0.5965651328580687, "grad_norm": 0.03398451954126358, "learning_rate": 0.0001890814168054368, "loss": 0.346, "step": 7364 }, { "epoch": 0.596646143875567, "grad_norm": 0.02900012768805027, "learning_rate": 0.00018907691615284216, "loss": 0.321, "step": 7365 }, { "epoch": 0.5967271548930655, "grad_norm": 0.03144826740026474, "learning_rate": 0.00018907241550024755, "loss": 0.3189, "step": 7366 }, { "epoch": 0.5968081659105638, "grad_norm": 0.02788534387946129, "learning_rate": 0.0001890679148476529, "loss": 0.3219, "step": 7367 }, { "epoch": 0.5968891769280622, "grad_norm": 0.030930208042263985, "learning_rate": 0.0001890634141950583, "loss": 0.3584, "step": 7368 }, { "epoch": 0.5969701879455606, "grad_norm": 0.03033963218331337, "learning_rate": 0.00018905891354246366, "loss": 0.3441, "step": 7369 }, { "epoch": 0.597051198963059, "grad_norm": 0.0332016684114933, "learning_rate": 0.00018905441288986905, "loss": 0.3553, "step": 7370 }, { "epoch": 0.5971322099805574, "grad_norm": 0.028269365429878235, "learning_rate": 0.0001890499122372744, "loss": 0.2937, "step": 7371 }, { "epoch": 0.5972132209980557, "grad_norm": 0.029664890840649605, "learning_rate": 0.0001890454115846798, "loss": 0.3122, "step": 7372 }, { "epoch": 0.5972942320155541, "grad_norm": 0.032333459705114365, "learning_rate": 0.00018904091093208515, "loss": 0.3575, "step": 7373 }, { "epoch": 0.5973752430330524, "grad_norm": 0.03276979178190231, "learning_rate": 0.00018903641027949054, "loss": 0.3543, "step": 7374 }, { "epoch": 0.5974562540505509, "grad_norm": 0.030932294204831123, "learning_rate": 0.00018903190962689593, "loss": 0.3839, "step": 7375 }, { "epoch": 0.5975372650680493, "grad_norm": 0.034941308200359344, "learning_rate": 0.0001890274089743013, "loss": 0.3879, "step": 7376 }, { "epoch": 0.5976182760855476, "grad_norm": 0.03369393199682236, "learning_rate": 0.00018902290832170665, "loss": 0.347, "step": 7377 }, { "epoch": 0.597699287103046, "grad_norm": 0.035528942942619324, "learning_rate": 0.00018901840766911204, "loss": 0.38, "step": 7378 }, { "epoch": 0.5977802981205443, "grad_norm": 0.03295198827981949, "learning_rate": 0.0001890139070165174, "loss": 0.3525, "step": 7379 }, { "epoch": 0.5978613091380428, "grad_norm": 0.029161540791392326, "learning_rate": 0.00018900940636392278, "loss": 0.3543, "step": 7380 }, { "epoch": 0.5979423201555412, "grad_norm": 0.026549329981207848, "learning_rate": 0.00018900490571132817, "loss": 0.2638, "step": 7381 }, { "epoch": 0.5980233311730395, "grad_norm": 0.03204452246427536, "learning_rate": 0.00018900040505873353, "loss": 0.3266, "step": 7382 }, { "epoch": 0.5981043421905379, "grad_norm": 0.02657189778983593, "learning_rate": 0.0001889959044061389, "loss": 0.304, "step": 7383 }, { "epoch": 0.5981853532080363, "grad_norm": 0.03293665871024132, "learning_rate": 0.00018899140375354428, "loss": 0.3721, "step": 7384 }, { "epoch": 0.5982663642255347, "grad_norm": 0.032227154821157455, "learning_rate": 0.00018898690310094964, "loss": 0.3504, "step": 7385 }, { "epoch": 0.598347375243033, "grad_norm": 0.03022935800254345, "learning_rate": 0.00018898240244835502, "loss": 0.388, "step": 7386 }, { "epoch": 0.5984283862605314, "grad_norm": 0.032245948910713196, "learning_rate": 0.0001889779017957604, "loss": 0.3542, "step": 7387 }, { "epoch": 0.5985093972780298, "grad_norm": 0.029357144609093666, "learning_rate": 0.00018897340114316577, "loss": 0.3575, "step": 7388 }, { "epoch": 0.5985904082955282, "grad_norm": 0.03233001008629799, "learning_rate": 0.00018896890049057113, "loss": 0.388, "step": 7389 }, { "epoch": 0.5986714193130266, "grad_norm": 0.033273451030254364, "learning_rate": 0.00018896439983797652, "loss": 0.3334, "step": 7390 }, { "epoch": 0.5987524303305249, "grad_norm": 0.03183024749159813, "learning_rate": 0.00018895989918538188, "loss": 0.3427, "step": 7391 }, { "epoch": 0.5988334413480233, "grad_norm": 0.0347750298678875, "learning_rate": 0.00018895539853278727, "loss": 0.3704, "step": 7392 }, { "epoch": 0.5989144523655218, "grad_norm": 0.03242835775017738, "learning_rate": 0.00018895089788019265, "loss": 0.369, "step": 7393 }, { "epoch": 0.5989954633830201, "grad_norm": 0.026880014687776566, "learning_rate": 0.000188946397227598, "loss": 0.3017, "step": 7394 }, { "epoch": 0.5990764744005185, "grad_norm": 0.03177474066615105, "learning_rate": 0.00018894189657500337, "loss": 0.3304, "step": 7395 }, { "epoch": 0.5991574854180168, "grad_norm": 0.03396380692720413, "learning_rate": 0.00018893739592240876, "loss": 0.3638, "step": 7396 }, { "epoch": 0.5992384964355152, "grad_norm": 0.02995561808347702, "learning_rate": 0.00018893289526981412, "loss": 0.3706, "step": 7397 }, { "epoch": 0.5993195074530137, "grad_norm": 0.030224697664380074, "learning_rate": 0.0001889283946172195, "loss": 0.3403, "step": 7398 }, { "epoch": 0.599400518470512, "grad_norm": 0.0329534150660038, "learning_rate": 0.0001889238939646249, "loss": 0.327, "step": 7399 }, { "epoch": 0.5994815294880104, "grad_norm": 0.03500063344836235, "learning_rate": 0.00018891939331203025, "loss": 0.365, "step": 7400 }, { "epoch": 0.5995625405055087, "grad_norm": 0.03050912730395794, "learning_rate": 0.00018891489265943561, "loss": 0.3489, "step": 7401 }, { "epoch": 0.5996435515230071, "grad_norm": 0.03011389449238777, "learning_rate": 0.000188910392006841, "loss": 0.3636, "step": 7402 }, { "epoch": 0.5997245625405055, "grad_norm": 0.03238969296216965, "learning_rate": 0.00018890589135424636, "loss": 0.3729, "step": 7403 }, { "epoch": 0.5998055735580039, "grad_norm": 0.02938798815011978, "learning_rate": 0.00018890139070165175, "loss": 0.346, "step": 7404 }, { "epoch": 0.5998865845755023, "grad_norm": 0.03192545101046562, "learning_rate": 0.00018889689004905714, "loss": 0.3871, "step": 7405 }, { "epoch": 0.5999675955930006, "grad_norm": 0.03070968948304653, "learning_rate": 0.0001888923893964625, "loss": 0.3447, "step": 7406 }, { "epoch": 0.6000486066104991, "grad_norm": 0.0291475597769022, "learning_rate": 0.00018888788874386786, "loss": 0.3284, "step": 7407 }, { "epoch": 0.6001296176279974, "grad_norm": 0.029424374923110008, "learning_rate": 0.00018888338809127324, "loss": 0.3387, "step": 7408 }, { "epoch": 0.6002106286454958, "grad_norm": 0.029002133756875992, "learning_rate": 0.0001888788874386786, "loss": 0.3533, "step": 7409 }, { "epoch": 0.6002916396629941, "grad_norm": 0.03278941661119461, "learning_rate": 0.000188874386786084, "loss": 0.3748, "step": 7410 }, { "epoch": 0.6003726506804925, "grad_norm": 0.02870086394250393, "learning_rate": 0.00018886988613348938, "loss": 0.3564, "step": 7411 }, { "epoch": 0.600453661697991, "grad_norm": 0.033800091594457626, "learning_rate": 0.00018886538548089474, "loss": 0.416, "step": 7412 }, { "epoch": 0.6005346727154893, "grad_norm": 0.02814219333231449, "learning_rate": 0.0001888608848283001, "loss": 0.3087, "step": 7413 }, { "epoch": 0.6006156837329877, "grad_norm": 0.031001700088381767, "learning_rate": 0.00018885638417570549, "loss": 0.3408, "step": 7414 }, { "epoch": 0.600696694750486, "grad_norm": 0.030194900929927826, "learning_rate": 0.00018885188352311085, "loss": 0.3389, "step": 7415 }, { "epoch": 0.6007777057679844, "grad_norm": 0.03206025809049606, "learning_rate": 0.00018884738287051623, "loss": 0.3463, "step": 7416 }, { "epoch": 0.6008587167854829, "grad_norm": 0.0302756205201149, "learning_rate": 0.00018884288221792162, "loss": 0.3369, "step": 7417 }, { "epoch": 0.6009397278029812, "grad_norm": 0.0310045275837183, "learning_rate": 0.00018883838156532698, "loss": 0.4104, "step": 7418 }, { "epoch": 0.6010207388204796, "grad_norm": 0.029064510017633438, "learning_rate": 0.00018883388091273234, "loss": 0.3764, "step": 7419 }, { "epoch": 0.6011017498379779, "grad_norm": 0.02953030914068222, "learning_rate": 0.00018882938026013773, "loss": 0.3594, "step": 7420 }, { "epoch": 0.6011827608554764, "grad_norm": 0.03259488195180893, "learning_rate": 0.0001888248796075431, "loss": 0.3665, "step": 7421 }, { "epoch": 0.6012637718729748, "grad_norm": 0.03570137545466423, "learning_rate": 0.00018882037895494847, "loss": 0.3738, "step": 7422 }, { "epoch": 0.6013447828904731, "grad_norm": 0.03246882185339928, "learning_rate": 0.00018881587830235386, "loss": 0.3403, "step": 7423 }, { "epoch": 0.6014257939079715, "grad_norm": 0.03044661320745945, "learning_rate": 0.00018881137764975922, "loss": 0.3397, "step": 7424 }, { "epoch": 0.6015068049254698, "grad_norm": 0.033912573009729385, "learning_rate": 0.00018880687699716458, "loss": 0.3188, "step": 7425 }, { "epoch": 0.6015878159429683, "grad_norm": 0.03139291703701019, "learning_rate": 0.00018880237634456997, "loss": 0.4062, "step": 7426 }, { "epoch": 0.6016688269604666, "grad_norm": 0.025072293356060982, "learning_rate": 0.00018879787569197533, "loss": 0.2751, "step": 7427 }, { "epoch": 0.601749837977965, "grad_norm": 0.0371062196791172, "learning_rate": 0.00018879337503938072, "loss": 0.3948, "step": 7428 }, { "epoch": 0.6018308489954634, "grad_norm": 0.03248792514204979, "learning_rate": 0.0001887888743867861, "loss": 0.3191, "step": 7429 }, { "epoch": 0.6019118600129617, "grad_norm": 0.029657980427145958, "learning_rate": 0.00018878437373419146, "loss": 0.3028, "step": 7430 }, { "epoch": 0.6019928710304602, "grad_norm": 0.029270123690366745, "learning_rate": 0.00018877987308159685, "loss": 0.3518, "step": 7431 }, { "epoch": 0.6020738820479585, "grad_norm": 0.032467812299728394, "learning_rate": 0.0001887753724290022, "loss": 0.3462, "step": 7432 }, { "epoch": 0.6021548930654569, "grad_norm": 0.028843361884355545, "learning_rate": 0.0001887708717764076, "loss": 0.317, "step": 7433 }, { "epoch": 0.6022359040829552, "grad_norm": 0.029680926352739334, "learning_rate": 0.00018876637112381296, "loss": 0.3395, "step": 7434 }, { "epoch": 0.6023169151004537, "grad_norm": 0.03853990137577057, "learning_rate": 0.00018876187047121834, "loss": 0.39, "step": 7435 }, { "epoch": 0.6023979261179521, "grad_norm": 0.034090928733348846, "learning_rate": 0.0001887573698186237, "loss": 0.355, "step": 7436 }, { "epoch": 0.6024789371354504, "grad_norm": 0.030918769538402557, "learning_rate": 0.0001887528691660291, "loss": 0.3414, "step": 7437 }, { "epoch": 0.6025599481529488, "grad_norm": 0.029954658821225166, "learning_rate": 0.00018874836851343445, "loss": 0.3191, "step": 7438 }, { "epoch": 0.6026409591704471, "grad_norm": 0.03110465221107006, "learning_rate": 0.00018874386786083984, "loss": 0.3761, "step": 7439 }, { "epoch": 0.6027219701879456, "grad_norm": 0.029768571257591248, "learning_rate": 0.0001887393672082452, "loss": 0.3439, "step": 7440 }, { "epoch": 0.602802981205444, "grad_norm": 0.029363000765442848, "learning_rate": 0.00018873486655565059, "loss": 0.3499, "step": 7441 }, { "epoch": 0.6028839922229423, "grad_norm": 0.03165175020694733, "learning_rate": 0.00018873036590305595, "loss": 0.4128, "step": 7442 }, { "epoch": 0.6029650032404407, "grad_norm": 0.03060912899672985, "learning_rate": 0.00018872586525046133, "loss": 0.3343, "step": 7443 }, { "epoch": 0.6030460142579391, "grad_norm": 0.03013191744685173, "learning_rate": 0.0001887213645978667, "loss": 0.3377, "step": 7444 }, { "epoch": 0.6031270252754375, "grad_norm": 0.030229419469833374, "learning_rate": 0.00018871686394527208, "loss": 0.3538, "step": 7445 }, { "epoch": 0.6032080362929358, "grad_norm": 0.028185758739709854, "learning_rate": 0.00018871236329267744, "loss": 0.3352, "step": 7446 }, { "epoch": 0.6032890473104342, "grad_norm": 0.029758667573332787, "learning_rate": 0.00018870786264008283, "loss": 0.3567, "step": 7447 }, { "epoch": 0.6033700583279326, "grad_norm": 0.028832679614424706, "learning_rate": 0.0001887033619874882, "loss": 0.3317, "step": 7448 }, { "epoch": 0.603451069345431, "grad_norm": 0.03557024523615837, "learning_rate": 0.00018869886133489357, "loss": 0.3251, "step": 7449 }, { "epoch": 0.6035320803629294, "grad_norm": 0.0323876328766346, "learning_rate": 0.00018869436068229893, "loss": 0.35, "step": 7450 }, { "epoch": 0.6036130913804277, "grad_norm": 0.031438153237104416, "learning_rate": 0.00018868986002970432, "loss": 0.3505, "step": 7451 }, { "epoch": 0.6036941023979261, "grad_norm": 0.031943872570991516, "learning_rate": 0.00018868535937710968, "loss": 0.3453, "step": 7452 }, { "epoch": 0.6037751134154244, "grad_norm": 0.03199175372719765, "learning_rate": 0.00018868085872451507, "loss": 0.3101, "step": 7453 }, { "epoch": 0.6038561244329229, "grad_norm": 0.03276806324720383, "learning_rate": 0.00018867635807192043, "loss": 0.3513, "step": 7454 }, { "epoch": 0.6039371354504213, "grad_norm": 0.03356426954269409, "learning_rate": 0.00018867185741932582, "loss": 0.3261, "step": 7455 }, { "epoch": 0.6040181464679196, "grad_norm": 0.03232236206531525, "learning_rate": 0.0001886673567667312, "loss": 0.3478, "step": 7456 }, { "epoch": 0.604099157485418, "grad_norm": 0.0322866290807724, "learning_rate": 0.00018866285611413656, "loss": 0.3869, "step": 7457 }, { "epoch": 0.6041801685029164, "grad_norm": 0.03561306744813919, "learning_rate": 0.00018865835546154192, "loss": 0.3321, "step": 7458 }, { "epoch": 0.6042611795204148, "grad_norm": 0.027576128020882607, "learning_rate": 0.0001886538548089473, "loss": 0.3066, "step": 7459 }, { "epoch": 0.6043421905379132, "grad_norm": 0.036662038415670395, "learning_rate": 0.00018864935415635267, "loss": 0.3476, "step": 7460 }, { "epoch": 0.6044232015554115, "grad_norm": 0.032418131828308105, "learning_rate": 0.00018864485350375806, "loss": 0.3525, "step": 7461 }, { "epoch": 0.6045042125729099, "grad_norm": 0.03187812119722366, "learning_rate": 0.00018864035285116345, "loss": 0.3518, "step": 7462 }, { "epoch": 0.6045852235904083, "grad_norm": 0.03083401545882225, "learning_rate": 0.0001886358521985688, "loss": 0.346, "step": 7463 }, { "epoch": 0.6046662346079067, "grad_norm": 0.030447332188487053, "learning_rate": 0.00018863135154597417, "loss": 0.3357, "step": 7464 }, { "epoch": 0.604747245625405, "grad_norm": 0.02773471362888813, "learning_rate": 0.00018862685089337955, "loss": 0.3616, "step": 7465 }, { "epoch": 0.6048282566429034, "grad_norm": 0.029380742460489273, "learning_rate": 0.0001886223502407849, "loss": 0.3325, "step": 7466 }, { "epoch": 0.6049092676604018, "grad_norm": 0.03036758117377758, "learning_rate": 0.0001886178495881903, "loss": 0.3378, "step": 7467 }, { "epoch": 0.6049902786779002, "grad_norm": 0.028923461213707924, "learning_rate": 0.0001886133489355957, "loss": 0.3138, "step": 7468 }, { "epoch": 0.6050712896953986, "grad_norm": 0.02898426540195942, "learning_rate": 0.00018860884828300105, "loss": 0.3135, "step": 7469 }, { "epoch": 0.6051523007128969, "grad_norm": 0.0326344259083271, "learning_rate": 0.0001886043476304064, "loss": 0.406, "step": 7470 }, { "epoch": 0.6052333117303953, "grad_norm": 0.029875773936510086, "learning_rate": 0.0001885998469778118, "loss": 0.355, "step": 7471 }, { "epoch": 0.6053143227478938, "grad_norm": 0.03648769482970238, "learning_rate": 0.00018859534632521715, "loss": 0.4147, "step": 7472 }, { "epoch": 0.6053953337653921, "grad_norm": 0.02821452170610428, "learning_rate": 0.00018859084567262254, "loss": 0.3285, "step": 7473 }, { "epoch": 0.6054763447828905, "grad_norm": 0.034381620585918427, "learning_rate": 0.00018858634502002793, "loss": 0.39, "step": 7474 }, { "epoch": 0.6055573558003888, "grad_norm": 0.032561976462602615, "learning_rate": 0.0001885818443674333, "loss": 0.3234, "step": 7475 }, { "epoch": 0.6056383668178872, "grad_norm": 0.030314048752188683, "learning_rate": 0.00018857734371483865, "loss": 0.3129, "step": 7476 }, { "epoch": 0.6057193778353857, "grad_norm": 0.027895336970686913, "learning_rate": 0.00018857284306224404, "loss": 0.3218, "step": 7477 }, { "epoch": 0.605800388852884, "grad_norm": 0.029156718403100967, "learning_rate": 0.0001885683424096494, "loss": 0.2981, "step": 7478 }, { "epoch": 0.6058813998703824, "grad_norm": 0.030389677733182907, "learning_rate": 0.00018856384175705478, "loss": 0.3639, "step": 7479 }, { "epoch": 0.6059624108878807, "grad_norm": 0.03531801328063011, "learning_rate": 0.00018855934110446017, "loss": 0.3279, "step": 7480 }, { "epoch": 0.6060434219053791, "grad_norm": 0.03205801919102669, "learning_rate": 0.00018855484045186553, "loss": 0.3109, "step": 7481 }, { "epoch": 0.6061244329228775, "grad_norm": 0.02922491915524006, "learning_rate": 0.0001885503397992709, "loss": 0.3292, "step": 7482 }, { "epoch": 0.6062054439403759, "grad_norm": 0.03221955522894859, "learning_rate": 0.00018854583914667628, "loss": 0.3552, "step": 7483 }, { "epoch": 0.6062864549578743, "grad_norm": 0.035640884190797806, "learning_rate": 0.00018854133849408164, "loss": 0.381, "step": 7484 }, { "epoch": 0.6063674659753726, "grad_norm": 0.03220059350132942, "learning_rate": 0.00018853683784148702, "loss": 0.3554, "step": 7485 }, { "epoch": 0.6064484769928711, "grad_norm": 0.02993176132440567, "learning_rate": 0.0001885323371888924, "loss": 0.3594, "step": 7486 }, { "epoch": 0.6065294880103694, "grad_norm": 0.03339843451976776, "learning_rate": 0.00018852783653629777, "loss": 0.3492, "step": 7487 }, { "epoch": 0.6066104990278678, "grad_norm": 0.032218873500823975, "learning_rate": 0.00018852333588370313, "loss": 0.3189, "step": 7488 }, { "epoch": 0.6066915100453661, "grad_norm": 0.02685026451945305, "learning_rate": 0.00018851883523110852, "loss": 0.3296, "step": 7489 }, { "epoch": 0.6067725210628645, "grad_norm": 0.027700411155819893, "learning_rate": 0.00018851433457851388, "loss": 0.3303, "step": 7490 }, { "epoch": 0.606853532080363, "grad_norm": 0.04086620360612869, "learning_rate": 0.00018850983392591927, "loss": 0.429, "step": 7491 }, { "epoch": 0.6069345430978613, "grad_norm": 0.036509573459625244, "learning_rate": 0.00018850533327332465, "loss": 0.3302, "step": 7492 }, { "epoch": 0.6070155541153597, "grad_norm": 0.03572225570678711, "learning_rate": 0.00018850083262073, "loss": 0.3754, "step": 7493 }, { "epoch": 0.607096565132858, "grad_norm": 0.029395904392004013, "learning_rate": 0.00018849633196813537, "loss": 0.3518, "step": 7494 }, { "epoch": 0.6071775761503565, "grad_norm": 0.03418650105595589, "learning_rate": 0.00018849183131554076, "loss": 0.3434, "step": 7495 }, { "epoch": 0.6072585871678549, "grad_norm": 0.030631721019744873, "learning_rate": 0.00018848733066294612, "loss": 0.3968, "step": 7496 }, { "epoch": 0.6073395981853532, "grad_norm": 0.035159096121788025, "learning_rate": 0.0001884828300103515, "loss": 0.392, "step": 7497 }, { "epoch": 0.6074206092028516, "grad_norm": 0.03384539857506752, "learning_rate": 0.0001884783293577569, "loss": 0.3973, "step": 7498 }, { "epoch": 0.6075016202203499, "grad_norm": 0.03203663229942322, "learning_rate": 0.00018847382870516225, "loss": 0.3499, "step": 7499 }, { "epoch": 0.6075826312378484, "grad_norm": 0.03463354334235191, "learning_rate": 0.00018846932805256764, "loss": 0.371, "step": 7500 }, { "epoch": 0.6076636422553467, "grad_norm": 0.034394148737192154, "learning_rate": 0.000188464827399973, "loss": 0.3824, "step": 7501 }, { "epoch": 0.6077446532728451, "grad_norm": 0.03237493708729744, "learning_rate": 0.00018846032674737836, "loss": 0.338, "step": 7502 }, { "epoch": 0.6078256642903435, "grad_norm": 0.032026149332523346, "learning_rate": 0.00018845582609478375, "loss": 0.3727, "step": 7503 }, { "epoch": 0.6079066753078418, "grad_norm": 0.0315285362303257, "learning_rate": 0.00018845132544218914, "loss": 0.343, "step": 7504 }, { "epoch": 0.6079876863253403, "grad_norm": 0.028412126004695892, "learning_rate": 0.0001884468247895945, "loss": 0.3156, "step": 7505 }, { "epoch": 0.6080686973428386, "grad_norm": 0.03015442192554474, "learning_rate": 0.00018844232413699988, "loss": 0.3419, "step": 7506 }, { "epoch": 0.608149708360337, "grad_norm": 0.03320285677909851, "learning_rate": 0.00018843782348440524, "loss": 0.4232, "step": 7507 }, { "epoch": 0.6082307193778353, "grad_norm": 0.03172954171895981, "learning_rate": 0.00018843332283181063, "loss": 0.3471, "step": 7508 }, { "epoch": 0.6083117303953338, "grad_norm": 0.03579653054475784, "learning_rate": 0.000188428822179216, "loss": 0.3192, "step": 7509 }, { "epoch": 0.6083927414128322, "grad_norm": 0.031022492796182632, "learning_rate": 0.00018842432152662138, "loss": 0.3419, "step": 7510 }, { "epoch": 0.6084737524303305, "grad_norm": 0.029089458286762238, "learning_rate": 0.00018841982087402674, "loss": 0.3166, "step": 7511 }, { "epoch": 0.6085547634478289, "grad_norm": 0.02824055217206478, "learning_rate": 0.00018841532022143213, "loss": 0.3633, "step": 7512 }, { "epoch": 0.6086357744653272, "grad_norm": 0.03523353487253189, "learning_rate": 0.00018841081956883749, "loss": 0.3655, "step": 7513 }, { "epoch": 0.6087167854828257, "grad_norm": 0.03349660709500313, "learning_rate": 0.00018840631891624287, "loss": 0.3749, "step": 7514 }, { "epoch": 0.6087977965003241, "grad_norm": 0.02907554619014263, "learning_rate": 0.00018840181826364823, "loss": 0.3263, "step": 7515 }, { "epoch": 0.6088788075178224, "grad_norm": 0.03144342452287674, "learning_rate": 0.00018839731761105362, "loss": 0.4028, "step": 7516 }, { "epoch": 0.6089598185353208, "grad_norm": 0.037257906049489975, "learning_rate": 0.00018839281695845898, "loss": 0.3366, "step": 7517 }, { "epoch": 0.6090408295528191, "grad_norm": 0.03802880272269249, "learning_rate": 0.00018838831630586437, "loss": 0.366, "step": 7518 }, { "epoch": 0.6091218405703176, "grad_norm": 0.03249386325478554, "learning_rate": 0.00018838381565326973, "loss": 0.3743, "step": 7519 }, { "epoch": 0.609202851587816, "grad_norm": 0.031599901616573334, "learning_rate": 0.00018837931500067511, "loss": 0.3632, "step": 7520 }, { "epoch": 0.6092838626053143, "grad_norm": 0.032178837805986404, "learning_rate": 0.00018837481434808047, "loss": 0.3521, "step": 7521 }, { "epoch": 0.6093648736228127, "grad_norm": 0.042940713465213776, "learning_rate": 0.00018837031369548586, "loss": 0.3405, "step": 7522 }, { "epoch": 0.6094458846403111, "grad_norm": 0.03091159276664257, "learning_rate": 0.00018836581304289122, "loss": 0.3694, "step": 7523 }, { "epoch": 0.6095268956578095, "grad_norm": 0.03217526152729988, "learning_rate": 0.0001883613123902966, "loss": 0.3212, "step": 7524 }, { "epoch": 0.6096079066753078, "grad_norm": 0.0323089137673378, "learning_rate": 0.00018835681173770197, "loss": 0.3987, "step": 7525 }, { "epoch": 0.6096889176928062, "grad_norm": 0.033394478261470795, "learning_rate": 0.00018835231108510736, "loss": 0.3771, "step": 7526 }, { "epoch": 0.6097699287103046, "grad_norm": 0.02519734390079975, "learning_rate": 0.00018834781043251272, "loss": 0.3115, "step": 7527 }, { "epoch": 0.609850939727803, "grad_norm": 0.030933715403079987, "learning_rate": 0.0001883433097799181, "loss": 0.3129, "step": 7528 }, { "epoch": 0.6099319507453014, "grad_norm": 0.03571024537086487, "learning_rate": 0.00018833880912732346, "loss": 0.3683, "step": 7529 }, { "epoch": 0.6100129617627997, "grad_norm": 0.03200255334377289, "learning_rate": 0.00018833430847472885, "loss": 0.3812, "step": 7530 }, { "epoch": 0.6100939727802981, "grad_norm": 0.02796841971576214, "learning_rate": 0.0001883298078221342, "loss": 0.3288, "step": 7531 }, { "epoch": 0.6101749837977966, "grad_norm": 0.037334516644477844, "learning_rate": 0.0001883253071695396, "loss": 0.3238, "step": 7532 }, { "epoch": 0.6102559948152949, "grad_norm": 0.03164404258131981, "learning_rate": 0.00018832080651694496, "loss": 0.306, "step": 7533 }, { "epoch": 0.6103370058327933, "grad_norm": 0.028371304273605347, "learning_rate": 0.00018831630586435034, "loss": 0.3295, "step": 7534 }, { "epoch": 0.6104180168502916, "grad_norm": 0.03163844719529152, "learning_rate": 0.0001883118052117557, "loss": 0.3692, "step": 7535 }, { "epoch": 0.61049902786779, "grad_norm": 0.029623981565237045, "learning_rate": 0.0001883073045591611, "loss": 0.3555, "step": 7536 }, { "epoch": 0.6105800388852884, "grad_norm": 0.0332329086959362, "learning_rate": 0.00018830280390656648, "loss": 0.2943, "step": 7537 }, { "epoch": 0.6106610499027868, "grad_norm": 0.031672000885009766, "learning_rate": 0.00018829830325397184, "loss": 0.3936, "step": 7538 }, { "epoch": 0.6107420609202852, "grad_norm": 0.030289702117443085, "learning_rate": 0.0001882938026013772, "loss": 0.3335, "step": 7539 }, { "epoch": 0.6108230719377835, "grad_norm": 0.03449969366192818, "learning_rate": 0.00018828930194878259, "loss": 0.378, "step": 7540 }, { "epoch": 0.6109040829552819, "grad_norm": 0.03260310739278793, "learning_rate": 0.00018828480129618795, "loss": 0.3875, "step": 7541 }, { "epoch": 0.6109850939727803, "grad_norm": 0.026728922501206398, "learning_rate": 0.00018828030064359333, "loss": 0.287, "step": 7542 }, { "epoch": 0.6110661049902787, "grad_norm": 0.02870047651231289, "learning_rate": 0.00018827579999099872, "loss": 0.3006, "step": 7543 }, { "epoch": 0.611147116007777, "grad_norm": 0.030876463279128075, "learning_rate": 0.00018827129933840408, "loss": 0.3794, "step": 7544 }, { "epoch": 0.6112281270252754, "grad_norm": 0.034999217838048935, "learning_rate": 0.00018826679868580944, "loss": 0.3356, "step": 7545 }, { "epoch": 0.6113091380427739, "grad_norm": 0.03449104726314545, "learning_rate": 0.00018826229803321483, "loss": 0.4207, "step": 7546 }, { "epoch": 0.6113901490602722, "grad_norm": 0.03226851671934128, "learning_rate": 0.0001882577973806202, "loss": 0.3652, "step": 7547 }, { "epoch": 0.6114711600777706, "grad_norm": 0.027200650423765182, "learning_rate": 0.00018825329672802558, "loss": 0.3235, "step": 7548 }, { "epoch": 0.6115521710952689, "grad_norm": 0.029985858127474785, "learning_rate": 0.00018824879607543096, "loss": 0.3626, "step": 7549 }, { "epoch": 0.6116331821127673, "grad_norm": 0.033420026302337646, "learning_rate": 0.00018824429542283632, "loss": 0.3958, "step": 7550 }, { "epoch": 0.6117141931302658, "grad_norm": 0.028016239404678345, "learning_rate": 0.00018823979477024168, "loss": 0.2803, "step": 7551 }, { "epoch": 0.6117952041477641, "grad_norm": 0.034927189350128174, "learning_rate": 0.00018823529411764707, "loss": 0.3053, "step": 7552 }, { "epoch": 0.6118762151652625, "grad_norm": 0.031104566529393196, "learning_rate": 0.00018823079346505243, "loss": 0.325, "step": 7553 }, { "epoch": 0.6119572261827608, "grad_norm": 0.03127651661634445, "learning_rate": 0.00018822629281245782, "loss": 0.3617, "step": 7554 }, { "epoch": 0.6120382372002592, "grad_norm": 0.03370172902941704, "learning_rate": 0.0001882217921598632, "loss": 0.3424, "step": 7555 }, { "epoch": 0.6121192482177576, "grad_norm": 0.03209322318434715, "learning_rate": 0.00018821729150726856, "loss": 0.3977, "step": 7556 }, { "epoch": 0.612200259235256, "grad_norm": 0.030257215723395348, "learning_rate": 0.00018821279085467392, "loss": 0.3375, "step": 7557 }, { "epoch": 0.6122812702527544, "grad_norm": 0.03481970727443695, "learning_rate": 0.0001882082902020793, "loss": 0.3502, "step": 7558 }, { "epoch": 0.6123622812702527, "grad_norm": 0.03415451943874359, "learning_rate": 0.00018820378954948467, "loss": 0.3839, "step": 7559 }, { "epoch": 0.6124432922877512, "grad_norm": 0.02852708287537098, "learning_rate": 0.00018819928889689006, "loss": 0.328, "step": 7560 }, { "epoch": 0.6125243033052495, "grad_norm": 0.03239717334508896, "learning_rate": 0.00018819478824429545, "loss": 0.3375, "step": 7561 }, { "epoch": 0.6126053143227479, "grad_norm": 0.03367842733860016, "learning_rate": 0.0001881902875917008, "loss": 0.3581, "step": 7562 }, { "epoch": 0.6126863253402463, "grad_norm": 0.029296424239873886, "learning_rate": 0.00018818578693910617, "loss": 0.2953, "step": 7563 }, { "epoch": 0.6127673363577446, "grad_norm": 0.028935760259628296, "learning_rate": 0.00018818128628651155, "loss": 0.3405, "step": 7564 }, { "epoch": 0.6128483473752431, "grad_norm": 0.04053902253508568, "learning_rate": 0.0001881767856339169, "loss": 0.394, "step": 7565 }, { "epoch": 0.6129293583927414, "grad_norm": 0.03760660067200661, "learning_rate": 0.0001881722849813223, "loss": 0.3728, "step": 7566 }, { "epoch": 0.6130103694102398, "grad_norm": 0.03172369301319122, "learning_rate": 0.0001881677843287277, "loss": 0.3449, "step": 7567 }, { "epoch": 0.6130913804277381, "grad_norm": 0.037709664553403854, "learning_rate": 0.00018816328367613305, "loss": 0.3574, "step": 7568 }, { "epoch": 0.6131723914452365, "grad_norm": 0.03204488754272461, "learning_rate": 0.00018815878302353843, "loss": 0.3516, "step": 7569 }, { "epoch": 0.613253402462735, "grad_norm": 0.03243907913565636, "learning_rate": 0.0001881542823709438, "loss": 0.3128, "step": 7570 }, { "epoch": 0.6133344134802333, "grad_norm": 0.030570013448596, "learning_rate": 0.00018814978171834915, "loss": 0.3163, "step": 7571 }, { "epoch": 0.6134154244977317, "grad_norm": 0.03219159319996834, "learning_rate": 0.00018814528106575454, "loss": 0.3558, "step": 7572 }, { "epoch": 0.61349643551523, "grad_norm": 0.04272877797484398, "learning_rate": 0.00018814078041315993, "loss": 0.3984, "step": 7573 }, { "epoch": 0.6135774465327285, "grad_norm": 0.030328383669257164, "learning_rate": 0.0001881362797605653, "loss": 0.3269, "step": 7574 }, { "epoch": 0.6136584575502269, "grad_norm": 0.03045385703444481, "learning_rate": 0.00018813177910797068, "loss": 0.3806, "step": 7575 }, { "epoch": 0.6137394685677252, "grad_norm": 0.027790486812591553, "learning_rate": 0.00018812727845537604, "loss": 0.3117, "step": 7576 }, { "epoch": 0.6138204795852236, "grad_norm": 0.03376387804746628, "learning_rate": 0.0001881227778027814, "loss": 0.3243, "step": 7577 }, { "epoch": 0.6139014906027219, "grad_norm": 0.03374413028359413, "learning_rate": 0.00018811827715018678, "loss": 0.3735, "step": 7578 }, { "epoch": 0.6139825016202204, "grad_norm": 0.025805898010730743, "learning_rate": 0.00018811377649759217, "loss": 0.3219, "step": 7579 }, { "epoch": 0.6140635126377187, "grad_norm": 0.031635358929634094, "learning_rate": 0.00018810927584499753, "loss": 0.3219, "step": 7580 }, { "epoch": 0.6141445236552171, "grad_norm": 0.029126780107617378, "learning_rate": 0.00018810477519240292, "loss": 0.3237, "step": 7581 }, { "epoch": 0.6142255346727155, "grad_norm": 0.026430238038301468, "learning_rate": 0.00018810027453980828, "loss": 0.2978, "step": 7582 }, { "epoch": 0.6143065456902139, "grad_norm": 0.03805132582783699, "learning_rate": 0.00018809577388721364, "loss": 0.4294, "step": 7583 }, { "epoch": 0.6143875567077123, "grad_norm": 0.03655115142464638, "learning_rate": 0.00018809127323461902, "loss": 0.3913, "step": 7584 }, { "epoch": 0.6144685677252106, "grad_norm": 0.03136638179421425, "learning_rate": 0.0001880867725820244, "loss": 0.405, "step": 7585 }, { "epoch": 0.614549578742709, "grad_norm": 0.03049628436565399, "learning_rate": 0.00018808227192942977, "loss": 0.3337, "step": 7586 }, { "epoch": 0.6146305897602073, "grad_norm": 0.030468054115772247, "learning_rate": 0.00018807777127683516, "loss": 0.343, "step": 7587 }, { "epoch": 0.6147116007777058, "grad_norm": 0.031013580039143562, "learning_rate": 0.00018807327062424052, "loss": 0.3543, "step": 7588 }, { "epoch": 0.6147926117952042, "grad_norm": 0.030559582635760307, "learning_rate": 0.0001880687699716459, "loss": 0.3742, "step": 7589 }, { "epoch": 0.6148736228127025, "grad_norm": 0.03500194847583771, "learning_rate": 0.00018806426931905127, "loss": 0.3978, "step": 7590 }, { "epoch": 0.6149546338302009, "grad_norm": 0.029537200927734375, "learning_rate": 0.00018805976866645665, "loss": 0.3563, "step": 7591 }, { "epoch": 0.6150356448476992, "grad_norm": 0.03477493301033974, "learning_rate": 0.00018805526801386201, "loss": 0.3144, "step": 7592 }, { "epoch": 0.6151166558651977, "grad_norm": 0.02951614372432232, "learning_rate": 0.0001880507673612674, "loss": 0.3141, "step": 7593 }, { "epoch": 0.6151976668826961, "grad_norm": 0.03100944310426712, "learning_rate": 0.00018804626670867276, "loss": 0.3837, "step": 7594 }, { "epoch": 0.6152786779001944, "grad_norm": 0.041775528341531754, "learning_rate": 0.00018804176605607815, "loss": 0.3651, "step": 7595 }, { "epoch": 0.6153596889176928, "grad_norm": 0.031040530651807785, "learning_rate": 0.0001880372654034835, "loss": 0.3317, "step": 7596 }, { "epoch": 0.6154406999351912, "grad_norm": 0.0322146899998188, "learning_rate": 0.0001880327647508889, "loss": 0.3203, "step": 7597 }, { "epoch": 0.6155217109526896, "grad_norm": 0.02917400188744068, "learning_rate": 0.00018802826409829426, "loss": 0.3457, "step": 7598 }, { "epoch": 0.615602721970188, "grad_norm": 0.028559932485222816, "learning_rate": 0.00018802376344569964, "loss": 0.3112, "step": 7599 }, { "epoch": 0.6156837329876863, "grad_norm": 0.030218252912163734, "learning_rate": 0.000188019262793105, "loss": 0.3319, "step": 7600 }, { "epoch": 0.6157647440051847, "grad_norm": 0.03289555385708809, "learning_rate": 0.0001880147621405104, "loss": 0.3458, "step": 7601 }, { "epoch": 0.6158457550226831, "grad_norm": 0.031023845076560974, "learning_rate": 0.00018801026148791575, "loss": 0.345, "step": 7602 }, { "epoch": 0.6159267660401815, "grad_norm": 0.02872307039797306, "learning_rate": 0.00018800576083532114, "loss": 0.3196, "step": 7603 }, { "epoch": 0.6160077770576798, "grad_norm": 0.030780525878071785, "learning_rate": 0.0001880012601827265, "loss": 0.3173, "step": 7604 }, { "epoch": 0.6160887880751782, "grad_norm": 0.033606596291065216, "learning_rate": 0.00018799675953013188, "loss": 0.3704, "step": 7605 }, { "epoch": 0.6161697990926766, "grad_norm": 0.028881927952170372, "learning_rate": 0.00018799225887753724, "loss": 0.3254, "step": 7606 }, { "epoch": 0.616250810110175, "grad_norm": 0.033225275576114655, "learning_rate": 0.00018798775822494263, "loss": 0.3329, "step": 7607 }, { "epoch": 0.6163318211276734, "grad_norm": 0.03254341334104538, "learning_rate": 0.000187983257572348, "loss": 0.358, "step": 7608 }, { "epoch": 0.6164128321451717, "grad_norm": 0.033052753657102585, "learning_rate": 0.00018797875691975338, "loss": 0.341, "step": 7609 }, { "epoch": 0.6164938431626701, "grad_norm": 0.03443504869937897, "learning_rate": 0.00018797425626715874, "loss": 0.3831, "step": 7610 }, { "epoch": 0.6165748541801686, "grad_norm": 0.02747352235019207, "learning_rate": 0.00018796975561456413, "loss": 0.3274, "step": 7611 }, { "epoch": 0.6166558651976669, "grad_norm": 0.03181469440460205, "learning_rate": 0.00018796525496196949, "loss": 0.3884, "step": 7612 }, { "epoch": 0.6167368762151653, "grad_norm": 0.033690501004457474, "learning_rate": 0.00018796075430937487, "loss": 0.398, "step": 7613 }, { "epoch": 0.6168178872326636, "grad_norm": 0.03067837283015251, "learning_rate": 0.00018795625365678023, "loss": 0.3083, "step": 7614 }, { "epoch": 0.616898898250162, "grad_norm": 0.027984514832496643, "learning_rate": 0.00018795175300418562, "loss": 0.2947, "step": 7615 }, { "epoch": 0.6169799092676604, "grad_norm": 0.02972647361457348, "learning_rate": 0.00018794725235159098, "loss": 0.3436, "step": 7616 }, { "epoch": 0.6170609202851588, "grad_norm": 0.031133973971009254, "learning_rate": 0.00018794275169899637, "loss": 0.353, "step": 7617 }, { "epoch": 0.6171419313026572, "grad_norm": 0.031696900725364685, "learning_rate": 0.00018793825104640175, "loss": 0.3642, "step": 7618 }, { "epoch": 0.6172229423201555, "grad_norm": 0.03247181698679924, "learning_rate": 0.00018793375039380711, "loss": 0.3375, "step": 7619 }, { "epoch": 0.6173039533376539, "grad_norm": 0.030099336057901382, "learning_rate": 0.00018792924974121247, "loss": 0.31, "step": 7620 }, { "epoch": 0.6173849643551523, "grad_norm": 0.028563443571329117, "learning_rate": 0.00018792474908861786, "loss": 0.339, "step": 7621 }, { "epoch": 0.6174659753726507, "grad_norm": 0.030126718804240227, "learning_rate": 0.00018792024843602322, "loss": 0.3602, "step": 7622 }, { "epoch": 0.617546986390149, "grad_norm": 0.029762936756014824, "learning_rate": 0.0001879157477834286, "loss": 0.3322, "step": 7623 }, { "epoch": 0.6176279974076474, "grad_norm": 0.029128240421414375, "learning_rate": 0.000187911247130834, "loss": 0.2891, "step": 7624 }, { "epoch": 0.6177090084251459, "grad_norm": 0.028977826237678528, "learning_rate": 0.00018790674647823936, "loss": 0.3313, "step": 7625 }, { "epoch": 0.6177900194426442, "grad_norm": 0.029257269576191902, "learning_rate": 0.00018790224582564472, "loss": 0.2927, "step": 7626 }, { "epoch": 0.6178710304601426, "grad_norm": 0.03232651203870773, "learning_rate": 0.0001878977451730501, "loss": 0.3648, "step": 7627 }, { "epoch": 0.6179520414776409, "grad_norm": 0.02883930690586567, "learning_rate": 0.00018789324452045546, "loss": 0.3391, "step": 7628 }, { "epoch": 0.6180330524951393, "grad_norm": 0.03152291476726532, "learning_rate": 0.00018788874386786085, "loss": 0.319, "step": 7629 }, { "epoch": 0.6181140635126378, "grad_norm": 0.03525060415267944, "learning_rate": 0.00018788424321526624, "loss": 0.3852, "step": 7630 }, { "epoch": 0.6181950745301361, "grad_norm": 0.03459348902106285, "learning_rate": 0.0001878797425626716, "loss": 0.3838, "step": 7631 }, { "epoch": 0.6182760855476345, "grad_norm": 0.030856028199195862, "learning_rate": 0.00018787524191007696, "loss": 0.3459, "step": 7632 }, { "epoch": 0.6183570965651328, "grad_norm": 0.029476845636963844, "learning_rate": 0.00018787074125748234, "loss": 0.3599, "step": 7633 }, { "epoch": 0.6184381075826313, "grad_norm": 0.029246686026453972, "learning_rate": 0.0001878662406048877, "loss": 0.3057, "step": 7634 }, { "epoch": 0.6185191186001296, "grad_norm": 0.03888120874762535, "learning_rate": 0.0001878617399522931, "loss": 0.4396, "step": 7635 }, { "epoch": 0.618600129617628, "grad_norm": 0.03742235526442528, "learning_rate": 0.00018785723929969848, "loss": 0.3666, "step": 7636 }, { "epoch": 0.6186811406351264, "grad_norm": 0.029268227517604828, "learning_rate": 0.00018785273864710384, "loss": 0.3696, "step": 7637 }, { "epoch": 0.6187621516526247, "grad_norm": 0.02961529977619648, "learning_rate": 0.00018784823799450923, "loss": 0.3066, "step": 7638 }, { "epoch": 0.6188431626701232, "grad_norm": 0.03180916979908943, "learning_rate": 0.0001878437373419146, "loss": 0.3737, "step": 7639 }, { "epoch": 0.6189241736876215, "grad_norm": 0.027828671038150787, "learning_rate": 0.00018783923668931995, "loss": 0.2885, "step": 7640 }, { "epoch": 0.6190051847051199, "grad_norm": 0.034832458943128586, "learning_rate": 0.00018783473603672533, "loss": 0.3623, "step": 7641 }, { "epoch": 0.6190861957226182, "grad_norm": 0.03153369948267937, "learning_rate": 0.00018783023538413072, "loss": 0.3357, "step": 7642 }, { "epoch": 0.6191672067401166, "grad_norm": 0.032440654933452606, "learning_rate": 0.00018782573473153608, "loss": 0.321, "step": 7643 }, { "epoch": 0.6192482177576151, "grad_norm": 0.03241625428199768, "learning_rate": 0.00018782123407894147, "loss": 0.3742, "step": 7644 }, { "epoch": 0.6193292287751134, "grad_norm": 0.032071325927972794, "learning_rate": 0.00018781673342634683, "loss": 0.3665, "step": 7645 }, { "epoch": 0.6194102397926118, "grad_norm": 0.030852077528834343, "learning_rate": 0.0001878122327737522, "loss": 0.3976, "step": 7646 }, { "epoch": 0.6194912508101101, "grad_norm": 0.027692748233675957, "learning_rate": 0.00018780773212115758, "loss": 0.3431, "step": 7647 }, { "epoch": 0.6195722618276086, "grad_norm": 0.03268222138285637, "learning_rate": 0.00018780323146856296, "loss": 0.3486, "step": 7648 }, { "epoch": 0.619653272845107, "grad_norm": 0.03294394537806511, "learning_rate": 0.00018779873081596832, "loss": 0.3759, "step": 7649 }, { "epoch": 0.6197342838626053, "grad_norm": 0.0347847044467926, "learning_rate": 0.0001877942301633737, "loss": 0.3716, "step": 7650 }, { "epoch": 0.6198152948801037, "grad_norm": 0.03194839507341385, "learning_rate": 0.00018778972951077907, "loss": 0.3021, "step": 7651 }, { "epoch": 0.619896305897602, "grad_norm": 0.038782477378845215, "learning_rate": 0.00018778522885818443, "loss": 0.3673, "step": 7652 }, { "epoch": 0.6199773169151005, "grad_norm": 0.029995804652571678, "learning_rate": 0.00018778072820558982, "loss": 0.324, "step": 7653 }, { "epoch": 0.6200583279325989, "grad_norm": 0.03394217789173126, "learning_rate": 0.0001877762275529952, "loss": 0.3304, "step": 7654 }, { "epoch": 0.6201393389500972, "grad_norm": 0.03123002126812935, "learning_rate": 0.00018777172690040056, "loss": 0.3457, "step": 7655 }, { "epoch": 0.6202203499675956, "grad_norm": 0.029245445504784584, "learning_rate": 0.00018776722624780595, "loss": 0.3354, "step": 7656 }, { "epoch": 0.6203013609850939, "grad_norm": 0.03261977806687355, "learning_rate": 0.0001877627255952113, "loss": 0.3578, "step": 7657 }, { "epoch": 0.6203823720025924, "grad_norm": 0.03168657794594765, "learning_rate": 0.00018775822494261667, "loss": 0.3391, "step": 7658 }, { "epoch": 0.6204633830200907, "grad_norm": 0.02707383781671524, "learning_rate": 0.00018775372429002206, "loss": 0.3067, "step": 7659 }, { "epoch": 0.6205443940375891, "grad_norm": 0.034147847443819046, "learning_rate": 0.00018774922363742745, "loss": 0.3351, "step": 7660 }, { "epoch": 0.6206254050550875, "grad_norm": 0.02819540537893772, "learning_rate": 0.0001877447229848328, "loss": 0.3439, "step": 7661 }, { "epoch": 0.6207064160725859, "grad_norm": 0.032641153782606125, "learning_rate": 0.0001877402223322382, "loss": 0.3474, "step": 7662 }, { "epoch": 0.6207874270900843, "grad_norm": 0.031857844442129135, "learning_rate": 0.00018773572167964355, "loss": 0.3122, "step": 7663 }, { "epoch": 0.6208684381075826, "grad_norm": 0.03519831597805023, "learning_rate": 0.0001877312210270489, "loss": 0.3792, "step": 7664 }, { "epoch": 0.620949449125081, "grad_norm": 0.028352493420243263, "learning_rate": 0.0001877267203744543, "loss": 0.334, "step": 7665 }, { "epoch": 0.6210304601425793, "grad_norm": 0.029478255659341812, "learning_rate": 0.0001877222197218597, "loss": 0.3336, "step": 7666 }, { "epoch": 0.6211114711600778, "grad_norm": 0.030945241451263428, "learning_rate": 0.00018771771906926505, "loss": 0.3041, "step": 7667 }, { "epoch": 0.6211924821775762, "grad_norm": 0.03310845419764519, "learning_rate": 0.00018771321841667043, "loss": 0.3457, "step": 7668 }, { "epoch": 0.6212734931950745, "grad_norm": 0.030136924237012863, "learning_rate": 0.0001877087177640758, "loss": 0.3792, "step": 7669 }, { "epoch": 0.6213545042125729, "grad_norm": 0.035356342792510986, "learning_rate": 0.00018770421711148118, "loss": 0.3677, "step": 7670 }, { "epoch": 0.6214355152300713, "grad_norm": 0.028500456362962723, "learning_rate": 0.00018769971645888654, "loss": 0.3355, "step": 7671 }, { "epoch": 0.6215165262475697, "grad_norm": 0.032192058861255646, "learning_rate": 0.00018769521580629193, "loss": 0.3773, "step": 7672 }, { "epoch": 0.6215975372650681, "grad_norm": 0.031182562932372093, "learning_rate": 0.0001876907151536973, "loss": 0.3782, "step": 7673 }, { "epoch": 0.6216785482825664, "grad_norm": 0.027648283168673515, "learning_rate": 0.00018768621450110268, "loss": 0.3275, "step": 7674 }, { "epoch": 0.6217595593000648, "grad_norm": 0.02733623795211315, "learning_rate": 0.00018768171384850804, "loss": 0.3194, "step": 7675 }, { "epoch": 0.6218405703175632, "grad_norm": 0.031033562496304512, "learning_rate": 0.00018767721319591342, "loss": 0.3608, "step": 7676 }, { "epoch": 0.6219215813350616, "grad_norm": 0.031006228178739548, "learning_rate": 0.00018767271254331878, "loss": 0.3551, "step": 7677 }, { "epoch": 0.62200259235256, "grad_norm": 0.029379108920693398, "learning_rate": 0.00018766821189072417, "loss": 0.3467, "step": 7678 }, { "epoch": 0.6220836033700583, "grad_norm": 0.02919275499880314, "learning_rate": 0.00018766371123812953, "loss": 0.3358, "step": 7679 }, { "epoch": 0.6221646143875567, "grad_norm": 0.028002485632896423, "learning_rate": 0.00018765921058553492, "loss": 0.3253, "step": 7680 }, { "epoch": 0.6222456254050551, "grad_norm": 0.03162265196442604, "learning_rate": 0.00018765470993294028, "loss": 0.3578, "step": 7681 }, { "epoch": 0.6223266364225535, "grad_norm": 0.0298593882471323, "learning_rate": 0.00018765020928034566, "loss": 0.3134, "step": 7682 }, { "epoch": 0.6224076474400518, "grad_norm": 0.03213672339916229, "learning_rate": 0.00018764570862775103, "loss": 0.3407, "step": 7683 }, { "epoch": 0.6224886584575502, "grad_norm": 0.02828054688870907, "learning_rate": 0.0001876412079751564, "loss": 0.3089, "step": 7684 }, { "epoch": 0.6225696694750487, "grad_norm": 0.03332474082708359, "learning_rate": 0.00018763670732256177, "loss": 0.3777, "step": 7685 }, { "epoch": 0.622650680492547, "grad_norm": 0.035733021795749664, "learning_rate": 0.00018763220666996716, "loss": 0.3843, "step": 7686 }, { "epoch": 0.6227316915100454, "grad_norm": 0.03247293457388878, "learning_rate": 0.00018762770601737252, "loss": 0.3606, "step": 7687 }, { "epoch": 0.6228127025275437, "grad_norm": 0.034030482172966, "learning_rate": 0.0001876232053647779, "loss": 0.4188, "step": 7688 }, { "epoch": 0.6228937135450421, "grad_norm": 0.03167286515235901, "learning_rate": 0.00018761870471218327, "loss": 0.3326, "step": 7689 }, { "epoch": 0.6229747245625405, "grad_norm": 0.03559541329741478, "learning_rate": 0.00018761420405958865, "loss": 0.4577, "step": 7690 }, { "epoch": 0.6230557355800389, "grad_norm": 0.03855305537581444, "learning_rate": 0.00018760970340699401, "loss": 0.3574, "step": 7691 }, { "epoch": 0.6231367465975373, "grad_norm": 0.03338480740785599, "learning_rate": 0.0001876052027543994, "loss": 0.347, "step": 7692 }, { "epoch": 0.6232177576150356, "grad_norm": 0.032319311052560806, "learning_rate": 0.0001876007021018048, "loss": 0.321, "step": 7693 }, { "epoch": 0.623298768632534, "grad_norm": 0.03374495357275009, "learning_rate": 0.00018759620144921015, "loss": 0.3338, "step": 7694 }, { "epoch": 0.6233797796500324, "grad_norm": 0.030150065198540688, "learning_rate": 0.0001875917007966155, "loss": 0.4023, "step": 7695 }, { "epoch": 0.6234607906675308, "grad_norm": 0.02881564199924469, "learning_rate": 0.0001875872001440209, "loss": 0.3131, "step": 7696 }, { "epoch": 0.6235418016850292, "grad_norm": 0.03331762179732323, "learning_rate": 0.00018758269949142626, "loss": 0.377, "step": 7697 }, { "epoch": 0.6236228127025275, "grad_norm": 0.031027859076857567, "learning_rate": 0.00018757819883883164, "loss": 0.3222, "step": 7698 }, { "epoch": 0.623703823720026, "grad_norm": 0.03061489760875702, "learning_rate": 0.00018757369818623703, "loss": 0.3708, "step": 7699 }, { "epoch": 0.6237848347375243, "grad_norm": 0.02916574850678444, "learning_rate": 0.0001875691975336424, "loss": 0.3638, "step": 7700 }, { "epoch": 0.6238658457550227, "grad_norm": 0.0307163055986166, "learning_rate": 0.00018756469688104775, "loss": 0.3371, "step": 7701 }, { "epoch": 0.623946856772521, "grad_norm": 0.02987971156835556, "learning_rate": 0.00018756019622845314, "loss": 0.3574, "step": 7702 }, { "epoch": 0.6240278677900194, "grad_norm": 0.043917465955019, "learning_rate": 0.0001875556955758585, "loss": 0.3741, "step": 7703 }, { "epoch": 0.6241088788075179, "grad_norm": 0.0349520780146122, "learning_rate": 0.00018755119492326388, "loss": 0.3873, "step": 7704 }, { "epoch": 0.6241898898250162, "grad_norm": 0.03076990880072117, "learning_rate": 0.00018754669427066927, "loss": 0.3318, "step": 7705 }, { "epoch": 0.6242709008425146, "grad_norm": 0.031307514756917953, "learning_rate": 0.00018754219361807463, "loss": 0.3338, "step": 7706 }, { "epoch": 0.6243519118600129, "grad_norm": 0.033534422516822815, "learning_rate": 0.00018753769296548002, "loss": 0.368, "step": 7707 }, { "epoch": 0.6244329228775113, "grad_norm": 0.03319765254855156, "learning_rate": 0.00018753319231288538, "loss": 0.3658, "step": 7708 }, { "epoch": 0.6245139338950098, "grad_norm": 0.03517692908644676, "learning_rate": 0.00018752869166029074, "loss": 0.4121, "step": 7709 }, { "epoch": 0.6245949449125081, "grad_norm": 0.044076353311538696, "learning_rate": 0.00018752419100769613, "loss": 0.3644, "step": 7710 }, { "epoch": 0.6246759559300065, "grad_norm": 0.03173117712140083, "learning_rate": 0.0001875196903551015, "loss": 0.3017, "step": 7711 }, { "epoch": 0.6247569669475048, "grad_norm": 0.02849593758583069, "learning_rate": 0.00018751518970250687, "loss": 0.3117, "step": 7712 }, { "epoch": 0.6248379779650033, "grad_norm": 0.036401744931936264, "learning_rate": 0.00018751068904991226, "loss": 0.3876, "step": 7713 }, { "epoch": 0.6249189889825016, "grad_norm": 0.034901734441518784, "learning_rate": 0.00018750618839731762, "loss": 0.4069, "step": 7714 }, { "epoch": 0.625, "grad_norm": 0.031746748834848404, "learning_rate": 0.00018750168774472298, "loss": 0.3244, "step": 7715 }, { "epoch": 0.6250810110174984, "grad_norm": 0.03301462158560753, "learning_rate": 0.00018749718709212837, "loss": 0.3879, "step": 7716 }, { "epoch": 0.6251620220349967, "grad_norm": 0.029027685523033142, "learning_rate": 0.00018749268643953375, "loss": 0.3395, "step": 7717 }, { "epoch": 0.6252430330524952, "grad_norm": 0.02682817541062832, "learning_rate": 0.00018748818578693911, "loss": 0.319, "step": 7718 }, { "epoch": 0.6253240440699935, "grad_norm": 0.02966170758008957, "learning_rate": 0.0001874836851343445, "loss": 0.3461, "step": 7719 }, { "epoch": 0.6254050550874919, "grad_norm": 0.029544176533818245, "learning_rate": 0.00018747918448174986, "loss": 0.3465, "step": 7720 }, { "epoch": 0.6254860661049902, "grad_norm": 0.03321243077516556, "learning_rate": 0.00018747468382915522, "loss": 0.3737, "step": 7721 }, { "epoch": 0.6255670771224887, "grad_norm": 0.03554457798600197, "learning_rate": 0.0001874701831765606, "loss": 0.3616, "step": 7722 }, { "epoch": 0.6256480881399871, "grad_norm": 0.030386896803975105, "learning_rate": 0.000187465682523966, "loss": 0.3075, "step": 7723 }, { "epoch": 0.6257290991574854, "grad_norm": 0.02811279334127903, "learning_rate": 0.00018746118187137136, "loss": 0.3463, "step": 7724 }, { "epoch": 0.6258101101749838, "grad_norm": 0.033893097192049026, "learning_rate": 0.00018745668121877674, "loss": 0.4574, "step": 7725 }, { "epoch": 0.6258911211924821, "grad_norm": 0.030976947396993637, "learning_rate": 0.0001874521805661821, "loss": 0.3689, "step": 7726 }, { "epoch": 0.6259721322099806, "grad_norm": 0.03269078955054283, "learning_rate": 0.00018744767991358746, "loss": 0.421, "step": 7727 }, { "epoch": 0.626053143227479, "grad_norm": 0.028709085658192635, "learning_rate": 0.00018744317926099285, "loss": 0.3167, "step": 7728 }, { "epoch": 0.6261341542449773, "grad_norm": 0.02881304919719696, "learning_rate": 0.00018743867860839824, "loss": 0.3373, "step": 7729 }, { "epoch": 0.6262151652624757, "grad_norm": 0.030857166275382042, "learning_rate": 0.0001874341779558036, "loss": 0.3754, "step": 7730 }, { "epoch": 0.626296176279974, "grad_norm": 0.0322621688246727, "learning_rate": 0.00018742967730320899, "loss": 0.3685, "step": 7731 }, { "epoch": 0.6263771872974725, "grad_norm": 0.03219965472817421, "learning_rate": 0.00018742517665061435, "loss": 0.3557, "step": 7732 }, { "epoch": 0.6264581983149708, "grad_norm": 0.033386070281267166, "learning_rate": 0.0001874206759980197, "loss": 0.3894, "step": 7733 }, { "epoch": 0.6265392093324692, "grad_norm": 0.031703755259513855, "learning_rate": 0.0001874161753454251, "loss": 0.3368, "step": 7734 }, { "epoch": 0.6266202203499676, "grad_norm": 0.03309629485011101, "learning_rate": 0.00018741167469283048, "loss": 0.4076, "step": 7735 }, { "epoch": 0.626701231367466, "grad_norm": 0.030072150751948357, "learning_rate": 0.00018740717404023584, "loss": 0.3714, "step": 7736 }, { "epoch": 0.6267822423849644, "grad_norm": 0.03196774795651436, "learning_rate": 0.00018740267338764123, "loss": 0.3403, "step": 7737 }, { "epoch": 0.6268632534024627, "grad_norm": 0.03893179073929787, "learning_rate": 0.0001873981727350466, "loss": 0.3679, "step": 7738 }, { "epoch": 0.6269442644199611, "grad_norm": 0.027131741866469383, "learning_rate": 0.00018739367208245195, "loss": 0.3135, "step": 7739 }, { "epoch": 0.6270252754374595, "grad_norm": 0.03447889909148216, "learning_rate": 0.00018738917142985733, "loss": 0.3686, "step": 7740 }, { "epoch": 0.6271062864549579, "grad_norm": 0.0332777164876461, "learning_rate": 0.00018738467077726272, "loss": 0.3708, "step": 7741 }, { "epoch": 0.6271872974724563, "grad_norm": 0.03280436247587204, "learning_rate": 0.00018738017012466808, "loss": 0.3166, "step": 7742 }, { "epoch": 0.6272683084899546, "grad_norm": 0.031332600861787796, "learning_rate": 0.00018737566947207347, "loss": 0.315, "step": 7743 }, { "epoch": 0.627349319507453, "grad_norm": 0.03338506817817688, "learning_rate": 0.00018737116881947883, "loss": 0.3947, "step": 7744 }, { "epoch": 0.6274303305249513, "grad_norm": 0.03453171253204346, "learning_rate": 0.0001873666681668842, "loss": 0.3335, "step": 7745 }, { "epoch": 0.6275113415424498, "grad_norm": 0.032016437500715256, "learning_rate": 0.00018736216751428958, "loss": 0.3425, "step": 7746 }, { "epoch": 0.6275923525599482, "grad_norm": 0.026858022436499596, "learning_rate": 0.00018735766686169496, "loss": 0.3468, "step": 7747 }, { "epoch": 0.6276733635774465, "grad_norm": 0.028182942420244217, "learning_rate": 0.00018735316620910032, "loss": 0.3507, "step": 7748 }, { "epoch": 0.6277543745949449, "grad_norm": 0.027926893904805183, "learning_rate": 0.0001873486655565057, "loss": 0.2843, "step": 7749 }, { "epoch": 0.6278353856124433, "grad_norm": 0.028946882113814354, "learning_rate": 0.00018734416490391107, "loss": 0.3046, "step": 7750 }, { "epoch": 0.6279163966299417, "grad_norm": 0.03570643439888954, "learning_rate": 0.00018733966425131646, "loss": 0.3791, "step": 7751 }, { "epoch": 0.62799740764744, "grad_norm": 0.03203314542770386, "learning_rate": 0.00018733516359872182, "loss": 0.357, "step": 7752 }, { "epoch": 0.6280784186649384, "grad_norm": 0.029860027134418488, "learning_rate": 0.0001873306629461272, "loss": 0.3278, "step": 7753 }, { "epoch": 0.6281594296824368, "grad_norm": 0.033992551267147064, "learning_rate": 0.00018732616229353256, "loss": 0.3851, "step": 7754 }, { "epoch": 0.6282404406999352, "grad_norm": 0.03233917057514191, "learning_rate": 0.00018732166164093795, "loss": 0.3922, "step": 7755 }, { "epoch": 0.6283214517174336, "grad_norm": 0.03380677103996277, "learning_rate": 0.0001873171609883433, "loss": 0.3704, "step": 7756 }, { "epoch": 0.6284024627349319, "grad_norm": 0.03246676176786423, "learning_rate": 0.0001873126603357487, "loss": 0.3935, "step": 7757 }, { "epoch": 0.6284834737524303, "grad_norm": 0.029080143198370934, "learning_rate": 0.00018730815968315406, "loss": 0.3525, "step": 7758 }, { "epoch": 0.6285644847699287, "grad_norm": 0.028809476643800735, "learning_rate": 0.00018730365903055945, "loss": 0.2819, "step": 7759 }, { "epoch": 0.6286454957874271, "grad_norm": 0.028122996911406517, "learning_rate": 0.0001872991583779648, "loss": 0.3095, "step": 7760 }, { "epoch": 0.6287265068049255, "grad_norm": 0.0307499747723341, "learning_rate": 0.0001872946577253702, "loss": 0.36, "step": 7761 }, { "epoch": 0.6288075178224238, "grad_norm": 0.03517741337418556, "learning_rate": 0.00018729015707277555, "loss": 0.3515, "step": 7762 }, { "epoch": 0.6288885288399222, "grad_norm": 0.03172396123409271, "learning_rate": 0.00018728565642018094, "loss": 0.3623, "step": 7763 }, { "epoch": 0.6289695398574207, "grad_norm": 0.02997780777513981, "learning_rate": 0.0001872811557675863, "loss": 0.3266, "step": 7764 }, { "epoch": 0.629050550874919, "grad_norm": 0.02853599190711975, "learning_rate": 0.0001872766551149917, "loss": 0.3123, "step": 7765 }, { "epoch": 0.6291315618924174, "grad_norm": 0.02735970728099346, "learning_rate": 0.00018727215446239705, "loss": 0.3058, "step": 7766 }, { "epoch": 0.6292125729099157, "grad_norm": 0.03487173095345497, "learning_rate": 0.00018726765380980243, "loss": 0.3259, "step": 7767 }, { "epoch": 0.6292935839274141, "grad_norm": 0.032258134335279465, "learning_rate": 0.0001872631531572078, "loss": 0.3393, "step": 7768 }, { "epoch": 0.6293745949449125, "grad_norm": 0.034025609493255615, "learning_rate": 0.00018725865250461318, "loss": 0.3721, "step": 7769 }, { "epoch": 0.6294556059624109, "grad_norm": 0.03420078381896019, "learning_rate": 0.00018725415185201854, "loss": 0.3673, "step": 7770 }, { "epoch": 0.6295366169799093, "grad_norm": 0.034589920192956924, "learning_rate": 0.00018724965119942393, "loss": 0.3744, "step": 7771 }, { "epoch": 0.6296176279974076, "grad_norm": 0.029632601886987686, "learning_rate": 0.0001872451505468293, "loss": 0.3097, "step": 7772 }, { "epoch": 0.6296986390149061, "grad_norm": 0.030544817447662354, "learning_rate": 0.00018724064989423468, "loss": 0.2801, "step": 7773 }, { "epoch": 0.6297796500324044, "grad_norm": 0.029614605009555817, "learning_rate": 0.00018723614924164006, "loss": 0.3636, "step": 7774 }, { "epoch": 0.6298606610499028, "grad_norm": 0.030968619510531425, "learning_rate": 0.00018723164858904542, "loss": 0.3664, "step": 7775 }, { "epoch": 0.6299416720674011, "grad_norm": 0.026602793484926224, "learning_rate": 0.0001872271479364508, "loss": 0.3372, "step": 7776 }, { "epoch": 0.6300226830848995, "grad_norm": 0.033030539751052856, "learning_rate": 0.00018722264728385617, "loss": 0.3602, "step": 7777 }, { "epoch": 0.630103694102398, "grad_norm": 0.03224138543009758, "learning_rate": 0.00018721814663126153, "loss": 0.3378, "step": 7778 }, { "epoch": 0.6301847051198963, "grad_norm": 0.032139744609594345, "learning_rate": 0.00018721364597866692, "loss": 0.3274, "step": 7779 }, { "epoch": 0.6302657161373947, "grad_norm": 0.0309866052120924, "learning_rate": 0.0001872091453260723, "loss": 0.3763, "step": 7780 }, { "epoch": 0.630346727154893, "grad_norm": 0.030568985268473625, "learning_rate": 0.00018720464467347767, "loss": 0.3292, "step": 7781 }, { "epoch": 0.6304277381723914, "grad_norm": 0.036407891660928726, "learning_rate": 0.00018720014402088305, "loss": 0.3756, "step": 7782 }, { "epoch": 0.6305087491898899, "grad_norm": 0.029157212004065514, "learning_rate": 0.0001871956433682884, "loss": 0.3273, "step": 7783 }, { "epoch": 0.6305897602073882, "grad_norm": 0.031077854335308075, "learning_rate": 0.00018719114271569377, "loss": 0.3667, "step": 7784 }, { "epoch": 0.6306707712248866, "grad_norm": 0.027137432247400284, "learning_rate": 0.00018718664206309916, "loss": 0.2917, "step": 7785 }, { "epoch": 0.6307517822423849, "grad_norm": 0.03677370026707649, "learning_rate": 0.00018718214141050455, "loss": 0.3672, "step": 7786 }, { "epoch": 0.6308327932598834, "grad_norm": 0.029398785904049873, "learning_rate": 0.0001871776407579099, "loss": 0.3165, "step": 7787 }, { "epoch": 0.6309138042773818, "grad_norm": 0.028446868062019348, "learning_rate": 0.0001871731401053153, "loss": 0.3239, "step": 7788 }, { "epoch": 0.6309948152948801, "grad_norm": 0.032294776290655136, "learning_rate": 0.00018716863945272065, "loss": 0.3201, "step": 7789 }, { "epoch": 0.6310758263123785, "grad_norm": 0.030610181391239166, "learning_rate": 0.00018716413880012601, "loss": 0.3521, "step": 7790 }, { "epoch": 0.6311568373298768, "grad_norm": 0.029071614146232605, "learning_rate": 0.0001871596381475314, "loss": 0.3449, "step": 7791 }, { "epoch": 0.6312378483473753, "grad_norm": 0.0313870944082737, "learning_rate": 0.0001871551374949368, "loss": 0.327, "step": 7792 }, { "epoch": 0.6313188593648736, "grad_norm": 0.028792880475521088, "learning_rate": 0.00018715063684234215, "loss": 0.3437, "step": 7793 }, { "epoch": 0.631399870382372, "grad_norm": 0.02864512801170349, "learning_rate": 0.00018714613618974754, "loss": 0.3288, "step": 7794 }, { "epoch": 0.6314808813998704, "grad_norm": 0.0337587371468544, "learning_rate": 0.0001871416355371529, "loss": 0.3454, "step": 7795 }, { "epoch": 0.6315618924173687, "grad_norm": 0.03164323791861534, "learning_rate": 0.00018713713488455826, "loss": 0.3309, "step": 7796 }, { "epoch": 0.6316429034348672, "grad_norm": 0.032779909670352936, "learning_rate": 0.00018713263423196364, "loss": 0.3963, "step": 7797 }, { "epoch": 0.6317239144523655, "grad_norm": 0.031031692400574684, "learning_rate": 0.00018712813357936903, "loss": 0.3399, "step": 7798 }, { "epoch": 0.6318049254698639, "grad_norm": 0.028976168483495712, "learning_rate": 0.0001871236329267744, "loss": 0.3275, "step": 7799 }, { "epoch": 0.6318859364873622, "grad_norm": 0.03551590442657471, "learning_rate": 0.00018711913227417978, "loss": 0.3604, "step": 7800 }, { "epoch": 0.6319669475048607, "grad_norm": 0.0324922576546669, "learning_rate": 0.00018711463162158514, "loss": 0.4056, "step": 7801 }, { "epoch": 0.6320479585223591, "grad_norm": 0.030459264293313026, "learning_rate": 0.0001871101309689905, "loss": 0.3151, "step": 7802 }, { "epoch": 0.6321289695398574, "grad_norm": 0.03610960766673088, "learning_rate": 0.00018710563031639588, "loss": 0.3772, "step": 7803 }, { "epoch": 0.6322099805573558, "grad_norm": 0.033341750502586365, "learning_rate": 0.00018710112966380127, "loss": 0.3688, "step": 7804 }, { "epoch": 0.6322909915748541, "grad_norm": 0.029451124370098114, "learning_rate": 0.00018709662901120663, "loss": 0.3213, "step": 7805 }, { "epoch": 0.6323720025923526, "grad_norm": 0.03107476606965065, "learning_rate": 0.00018709212835861202, "loss": 0.351, "step": 7806 }, { "epoch": 0.632453013609851, "grad_norm": 0.03220190480351448, "learning_rate": 0.00018708762770601738, "loss": 0.3563, "step": 7807 }, { "epoch": 0.6325340246273493, "grad_norm": 0.028155187144875526, "learning_rate": 0.00018708312705342274, "loss": 0.3067, "step": 7808 }, { "epoch": 0.6326150356448477, "grad_norm": 0.03451511636376381, "learning_rate": 0.00018707862640082813, "loss": 0.406, "step": 7809 }, { "epoch": 0.6326960466623461, "grad_norm": 0.03255141153931618, "learning_rate": 0.0001870741257482335, "loss": 0.3816, "step": 7810 }, { "epoch": 0.6327770576798445, "grad_norm": 0.027588123455643654, "learning_rate": 0.00018706962509563887, "loss": 0.3374, "step": 7811 }, { "epoch": 0.6328580686973428, "grad_norm": 0.03637088090181351, "learning_rate": 0.00018706512444304426, "loss": 0.3403, "step": 7812 }, { "epoch": 0.6329390797148412, "grad_norm": 0.0299672894179821, "learning_rate": 0.00018706062379044962, "loss": 0.3079, "step": 7813 }, { "epoch": 0.6330200907323396, "grad_norm": 0.03295118361711502, "learning_rate": 0.00018705612313785498, "loss": 0.3351, "step": 7814 }, { "epoch": 0.633101101749838, "grad_norm": 0.034902893006801605, "learning_rate": 0.00018705162248526037, "loss": 0.354, "step": 7815 }, { "epoch": 0.6331821127673364, "grad_norm": 0.03641962260007858, "learning_rate": 0.00018704712183266575, "loss": 0.3717, "step": 7816 }, { "epoch": 0.6332631237848347, "grad_norm": 0.028225794434547424, "learning_rate": 0.00018704262118007111, "loss": 0.3003, "step": 7817 }, { "epoch": 0.6333441348023331, "grad_norm": 0.033281516283750534, "learning_rate": 0.0001870381205274765, "loss": 0.3613, "step": 7818 }, { "epoch": 0.6334251458198314, "grad_norm": 0.03376254066824913, "learning_rate": 0.00018703361987488186, "loss": 0.3418, "step": 7819 }, { "epoch": 0.6335061568373299, "grad_norm": 0.02990681864321232, "learning_rate": 0.00018702911922228722, "loss": 0.35, "step": 7820 }, { "epoch": 0.6335871678548283, "grad_norm": 0.029667438939213753, "learning_rate": 0.0001870246185696926, "loss": 0.3329, "step": 7821 }, { "epoch": 0.6336681788723266, "grad_norm": 0.0334978923201561, "learning_rate": 0.000187020117917098, "loss": 0.353, "step": 7822 }, { "epoch": 0.633749189889825, "grad_norm": 0.033578719943761826, "learning_rate": 0.00018701561726450336, "loss": 0.3596, "step": 7823 }, { "epoch": 0.6338302009073234, "grad_norm": 0.033971913158893585, "learning_rate": 0.00018701111661190874, "loss": 0.3467, "step": 7824 }, { "epoch": 0.6339112119248218, "grad_norm": 0.034550849348306656, "learning_rate": 0.0001870066159593141, "loss": 0.3418, "step": 7825 }, { "epoch": 0.6339922229423202, "grad_norm": 0.02897421084344387, "learning_rate": 0.0001870021153067195, "loss": 0.3507, "step": 7826 }, { "epoch": 0.6340732339598185, "grad_norm": 0.028931085020303726, "learning_rate": 0.00018699761465412485, "loss": 0.3433, "step": 7827 }, { "epoch": 0.6341542449773169, "grad_norm": 0.034571584314107895, "learning_rate": 0.00018699311400153024, "loss": 0.3595, "step": 7828 }, { "epoch": 0.6342352559948153, "grad_norm": 0.03314541280269623, "learning_rate": 0.0001869886133489356, "loss": 0.3272, "step": 7829 }, { "epoch": 0.6343162670123137, "grad_norm": 0.031858861446380615, "learning_rate": 0.00018698411269634099, "loss": 0.3978, "step": 7830 }, { "epoch": 0.634397278029812, "grad_norm": 0.031506266444921494, "learning_rate": 0.00018697961204374635, "loss": 0.3604, "step": 7831 }, { "epoch": 0.6344782890473104, "grad_norm": 0.028984401375055313, "learning_rate": 0.00018697511139115173, "loss": 0.3421, "step": 7832 }, { "epoch": 0.6345593000648088, "grad_norm": 0.029323618859052658, "learning_rate": 0.0001869706107385571, "loss": 0.324, "step": 7833 }, { "epoch": 0.6346403110823072, "grad_norm": 0.02964533492922783, "learning_rate": 0.00018696611008596248, "loss": 0.371, "step": 7834 }, { "epoch": 0.6347213220998056, "grad_norm": 0.029131043702363968, "learning_rate": 0.00018696160943336784, "loss": 0.3247, "step": 7835 }, { "epoch": 0.6348023331173039, "grad_norm": 0.032547157257795334, "learning_rate": 0.00018695710878077323, "loss": 0.3574, "step": 7836 }, { "epoch": 0.6348833441348023, "grad_norm": 0.03153744339942932, "learning_rate": 0.0001869526081281786, "loss": 0.363, "step": 7837 }, { "epoch": 0.6349643551523008, "grad_norm": 0.02874799631536007, "learning_rate": 0.00018694810747558397, "loss": 0.3587, "step": 7838 }, { "epoch": 0.6350453661697991, "grad_norm": 0.02552749775350094, "learning_rate": 0.00018694360682298936, "loss": 0.3196, "step": 7839 }, { "epoch": 0.6351263771872975, "grad_norm": 0.03303792327642441, "learning_rate": 0.00018693910617039472, "loss": 0.4216, "step": 7840 }, { "epoch": 0.6352073882047958, "grad_norm": 0.03018331155180931, "learning_rate": 0.00018693460551780008, "loss": 0.3297, "step": 7841 }, { "epoch": 0.6352883992222942, "grad_norm": 0.034510936588048935, "learning_rate": 0.00018693010486520547, "loss": 0.3494, "step": 7842 }, { "epoch": 0.6353694102397927, "grad_norm": 0.031153760850429535, "learning_rate": 0.00018692560421261083, "loss": 0.3, "step": 7843 }, { "epoch": 0.635450421257291, "grad_norm": 0.03127124160528183, "learning_rate": 0.00018692110356001622, "loss": 0.3681, "step": 7844 }, { "epoch": 0.6355314322747894, "grad_norm": 0.028756583109498024, "learning_rate": 0.0001869166029074216, "loss": 0.3521, "step": 7845 }, { "epoch": 0.6356124432922877, "grad_norm": 0.03337053209543228, "learning_rate": 0.00018691210225482696, "loss": 0.3818, "step": 7846 }, { "epoch": 0.6356934543097861, "grad_norm": 0.032148126512765884, "learning_rate": 0.00018690760160223232, "loss": 0.3168, "step": 7847 }, { "epoch": 0.6357744653272845, "grad_norm": 0.03170749545097351, "learning_rate": 0.0001869031009496377, "loss": 0.3426, "step": 7848 }, { "epoch": 0.6358554763447829, "grad_norm": 0.03537492826581001, "learning_rate": 0.00018689860029704307, "loss": 0.41, "step": 7849 }, { "epoch": 0.6359364873622813, "grad_norm": 0.032313790172338486, "learning_rate": 0.00018689409964444846, "loss": 0.3755, "step": 7850 }, { "epoch": 0.6360174983797796, "grad_norm": 0.029278071597218513, "learning_rate": 0.00018688959899185384, "loss": 0.349, "step": 7851 }, { "epoch": 0.6360985093972781, "grad_norm": 0.032198451459407806, "learning_rate": 0.0001868850983392592, "loss": 0.3568, "step": 7852 }, { "epoch": 0.6361795204147764, "grad_norm": 0.03112160414457321, "learning_rate": 0.00018688059768666456, "loss": 0.3429, "step": 7853 }, { "epoch": 0.6362605314322748, "grad_norm": 0.03611839562654495, "learning_rate": 0.00018687609703406995, "loss": 0.3426, "step": 7854 }, { "epoch": 0.6363415424497731, "grad_norm": 0.030980095267295837, "learning_rate": 0.00018687159638147534, "loss": 0.3306, "step": 7855 }, { "epoch": 0.6364225534672715, "grad_norm": 0.028781326487660408, "learning_rate": 0.0001868670957288807, "loss": 0.324, "step": 7856 }, { "epoch": 0.63650356448477, "grad_norm": 0.033035773783922195, "learning_rate": 0.00018686259507628609, "loss": 0.3372, "step": 7857 }, { "epoch": 0.6365845755022683, "grad_norm": 0.03632812201976776, "learning_rate": 0.00018685809442369145, "loss": 0.4069, "step": 7858 }, { "epoch": 0.6366655865197667, "grad_norm": 0.03556077554821968, "learning_rate": 0.0001868535937710968, "loss": 0.3416, "step": 7859 }, { "epoch": 0.636746597537265, "grad_norm": 0.04521572217345238, "learning_rate": 0.0001868490931185022, "loss": 0.3711, "step": 7860 }, { "epoch": 0.6368276085547635, "grad_norm": 0.031015830114483833, "learning_rate": 0.00018684459246590758, "loss": 0.3253, "step": 7861 }, { "epoch": 0.6369086195722619, "grad_norm": 0.03212182968854904, "learning_rate": 0.00018684009181331294, "loss": 0.3253, "step": 7862 }, { "epoch": 0.6369896305897602, "grad_norm": 0.02970615215599537, "learning_rate": 0.00018683559116071833, "loss": 0.3117, "step": 7863 }, { "epoch": 0.6370706416072586, "grad_norm": 0.03084675595164299, "learning_rate": 0.0001868310905081237, "loss": 0.3269, "step": 7864 }, { "epoch": 0.6371516526247569, "grad_norm": 0.029871240258216858, "learning_rate": 0.00018682658985552905, "loss": 0.3584, "step": 7865 }, { "epoch": 0.6372326636422554, "grad_norm": 0.02989710122346878, "learning_rate": 0.00018682208920293444, "loss": 0.3301, "step": 7866 }, { "epoch": 0.6373136746597537, "grad_norm": 0.0319591723382473, "learning_rate": 0.00018681758855033982, "loss": 0.3443, "step": 7867 }, { "epoch": 0.6373946856772521, "grad_norm": 0.03004080429673195, "learning_rate": 0.00018681308789774518, "loss": 0.3612, "step": 7868 }, { "epoch": 0.6374756966947505, "grad_norm": 0.0289496760815382, "learning_rate": 0.00018680858724515057, "loss": 0.3276, "step": 7869 }, { "epoch": 0.6375567077122488, "grad_norm": 0.032016411423683167, "learning_rate": 0.00018680408659255593, "loss": 0.3369, "step": 7870 }, { "epoch": 0.6376377187297473, "grad_norm": 0.031566351652145386, "learning_rate": 0.0001867995859399613, "loss": 0.2975, "step": 7871 }, { "epoch": 0.6377187297472456, "grad_norm": 0.030370811000466347, "learning_rate": 0.00018679508528736668, "loss": 0.3555, "step": 7872 }, { "epoch": 0.637799740764744, "grad_norm": 0.031356330960989, "learning_rate": 0.00018679058463477206, "loss": 0.3226, "step": 7873 }, { "epoch": 0.6378807517822424, "grad_norm": 0.025103889405727386, "learning_rate": 0.00018678608398217742, "loss": 0.3158, "step": 7874 }, { "epoch": 0.6379617627997408, "grad_norm": 0.036442261189222336, "learning_rate": 0.0001867815833295828, "loss": 0.3843, "step": 7875 }, { "epoch": 0.6380427738172392, "grad_norm": 0.03667015582323074, "learning_rate": 0.00018677708267698817, "loss": 0.4306, "step": 7876 }, { "epoch": 0.6381237848347375, "grad_norm": 0.03109908103942871, "learning_rate": 0.00018677258202439353, "loss": 0.344, "step": 7877 }, { "epoch": 0.6382047958522359, "grad_norm": 0.028864892199635506, "learning_rate": 0.00018676808137179892, "loss": 0.3103, "step": 7878 }, { "epoch": 0.6382858068697342, "grad_norm": 0.031213095411658287, "learning_rate": 0.0001867635807192043, "loss": 0.343, "step": 7879 }, { "epoch": 0.6383668178872327, "grad_norm": 0.030226996168494225, "learning_rate": 0.00018675908006660967, "loss": 0.3656, "step": 7880 }, { "epoch": 0.6384478289047311, "grad_norm": 0.031217820942401886, "learning_rate": 0.00018675457941401505, "loss": 0.3583, "step": 7881 }, { "epoch": 0.6385288399222294, "grad_norm": 0.032496050000190735, "learning_rate": 0.0001867500787614204, "loss": 0.3667, "step": 7882 }, { "epoch": 0.6386098509397278, "grad_norm": 0.02922358550131321, "learning_rate": 0.00018674557810882577, "loss": 0.3303, "step": 7883 }, { "epoch": 0.6386908619572261, "grad_norm": 0.03338933363556862, "learning_rate": 0.00018674107745623116, "loss": 0.3584, "step": 7884 }, { "epoch": 0.6387718729747246, "grad_norm": 0.033961571753025055, "learning_rate": 0.00018673657680363655, "loss": 0.3591, "step": 7885 }, { "epoch": 0.638852883992223, "grad_norm": 0.028723198920488358, "learning_rate": 0.0001867320761510419, "loss": 0.3674, "step": 7886 }, { "epoch": 0.6389338950097213, "grad_norm": 0.036117613315582275, "learning_rate": 0.0001867275754984473, "loss": 0.4079, "step": 7887 }, { "epoch": 0.6390149060272197, "grad_norm": 0.028472524136304855, "learning_rate": 0.00018672307484585265, "loss": 0.3171, "step": 7888 }, { "epoch": 0.6390959170447181, "grad_norm": 0.03445472568273544, "learning_rate": 0.00018671857419325801, "loss": 0.3583, "step": 7889 }, { "epoch": 0.6391769280622165, "grad_norm": 0.030528537929058075, "learning_rate": 0.0001867140735406634, "loss": 0.3694, "step": 7890 }, { "epoch": 0.6392579390797148, "grad_norm": 0.029722632840275764, "learning_rate": 0.0001867095728880688, "loss": 0.3617, "step": 7891 }, { "epoch": 0.6393389500972132, "grad_norm": 0.03173793479800224, "learning_rate": 0.00018670507223547415, "loss": 0.3909, "step": 7892 }, { "epoch": 0.6394199611147116, "grad_norm": 0.030716672539711, "learning_rate": 0.00018670057158287954, "loss": 0.3438, "step": 7893 }, { "epoch": 0.63950097213221, "grad_norm": 0.02941414900124073, "learning_rate": 0.0001866960709302849, "loss": 0.3159, "step": 7894 }, { "epoch": 0.6395819831497084, "grad_norm": 0.0353083461523056, "learning_rate": 0.00018669157027769026, "loss": 0.2895, "step": 7895 }, { "epoch": 0.6396629941672067, "grad_norm": 0.03264469653367996, "learning_rate": 0.00018668706962509564, "loss": 0.3612, "step": 7896 }, { "epoch": 0.6397440051847051, "grad_norm": 0.02999110147356987, "learning_rate": 0.00018668256897250103, "loss": 0.3251, "step": 7897 }, { "epoch": 0.6398250162022034, "grad_norm": 0.029968377202749252, "learning_rate": 0.0001866780683199064, "loss": 0.3717, "step": 7898 }, { "epoch": 0.6399060272197019, "grad_norm": 0.030061427503824234, "learning_rate": 0.00018667356766731178, "loss": 0.3565, "step": 7899 }, { "epoch": 0.6399870382372003, "grad_norm": 0.029621459543704987, "learning_rate": 0.00018666906701471714, "loss": 0.3551, "step": 7900 }, { "epoch": 0.6400680492546986, "grad_norm": 0.03125537186861038, "learning_rate": 0.0001866645663621225, "loss": 0.3238, "step": 7901 }, { "epoch": 0.640149060272197, "grad_norm": 0.030191486701369286, "learning_rate": 0.00018666006570952788, "loss": 0.3346, "step": 7902 }, { "epoch": 0.6402300712896954, "grad_norm": 0.03128298744559288, "learning_rate": 0.00018665556505693327, "loss": 0.2971, "step": 7903 }, { "epoch": 0.6403110823071938, "grad_norm": 0.03299170359969139, "learning_rate": 0.00018665106440433863, "loss": 0.329, "step": 7904 }, { "epoch": 0.6403920933246922, "grad_norm": 0.03404494374990463, "learning_rate": 0.00018664656375174402, "loss": 0.3708, "step": 7905 }, { "epoch": 0.6404731043421905, "grad_norm": 0.031427644193172455, "learning_rate": 0.00018664206309914938, "loss": 0.329, "step": 7906 }, { "epoch": 0.6405541153596889, "grad_norm": 0.030701419338583946, "learning_rate": 0.00018663756244655477, "loss": 0.37, "step": 7907 }, { "epoch": 0.6406351263771873, "grad_norm": 0.030969882383942604, "learning_rate": 0.00018663306179396015, "loss": 0.3344, "step": 7908 }, { "epoch": 0.6407161373946857, "grad_norm": 0.03479425981640816, "learning_rate": 0.00018662856114136551, "loss": 0.3623, "step": 7909 }, { "epoch": 0.640797148412184, "grad_norm": 0.03328912705183029, "learning_rate": 0.00018662406048877087, "loss": 0.3736, "step": 7910 }, { "epoch": 0.6408781594296824, "grad_norm": 0.029649941250681877, "learning_rate": 0.00018661955983617626, "loss": 0.3327, "step": 7911 }, { "epoch": 0.6409591704471809, "grad_norm": 0.03194352239370346, "learning_rate": 0.00018661505918358162, "loss": 0.3356, "step": 7912 }, { "epoch": 0.6410401814646792, "grad_norm": 0.029047805815935135, "learning_rate": 0.000186610558530987, "loss": 0.3159, "step": 7913 }, { "epoch": 0.6411211924821776, "grad_norm": 0.02954118326306343, "learning_rate": 0.0001866060578783924, "loss": 0.3533, "step": 7914 }, { "epoch": 0.6412022034996759, "grad_norm": 0.03956909477710724, "learning_rate": 0.00018660155722579776, "loss": 0.3732, "step": 7915 }, { "epoch": 0.6412832145171743, "grad_norm": 0.03225688263773918, "learning_rate": 0.00018659705657320312, "loss": 0.3361, "step": 7916 }, { "epoch": 0.6413642255346728, "grad_norm": 0.03178594261407852, "learning_rate": 0.0001865925559206085, "loss": 0.3506, "step": 7917 }, { "epoch": 0.6414452365521711, "grad_norm": 0.025829335674643517, "learning_rate": 0.00018658805526801386, "loss": 0.2901, "step": 7918 }, { "epoch": 0.6415262475696695, "grad_norm": 0.031244834885001183, "learning_rate": 0.00018658355461541925, "loss": 0.3615, "step": 7919 }, { "epoch": 0.6416072585871678, "grad_norm": 0.03178013116121292, "learning_rate": 0.00018657905396282464, "loss": 0.314, "step": 7920 }, { "epoch": 0.6416882696046662, "grad_norm": 0.03225788101553917, "learning_rate": 0.00018657455331023, "loss": 0.3678, "step": 7921 }, { "epoch": 0.6417692806221647, "grad_norm": 0.03708713874220848, "learning_rate": 0.00018657005265763536, "loss": 0.4256, "step": 7922 }, { "epoch": 0.641850291639663, "grad_norm": 0.029338078573346138, "learning_rate": 0.00018656555200504074, "loss": 0.3137, "step": 7923 }, { "epoch": 0.6419313026571614, "grad_norm": 0.032095056027173996, "learning_rate": 0.0001865610513524461, "loss": 0.3916, "step": 7924 }, { "epoch": 0.6420123136746597, "grad_norm": 0.03248443081974983, "learning_rate": 0.0001865565506998515, "loss": 0.3505, "step": 7925 }, { "epoch": 0.6420933246921582, "grad_norm": 0.03844140097498894, "learning_rate": 0.00018655205004725688, "loss": 0.3834, "step": 7926 }, { "epoch": 0.6421743357096565, "grad_norm": 0.032976362854242325, "learning_rate": 0.00018654754939466224, "loss": 0.3248, "step": 7927 }, { "epoch": 0.6422553467271549, "grad_norm": 0.03275357559323311, "learning_rate": 0.0001865430487420676, "loss": 0.327, "step": 7928 }, { "epoch": 0.6423363577446533, "grad_norm": 0.03425171598792076, "learning_rate": 0.00018653854808947299, "loss": 0.3443, "step": 7929 }, { "epoch": 0.6424173687621516, "grad_norm": 0.03208903223276138, "learning_rate": 0.00018653404743687835, "loss": 0.3217, "step": 7930 }, { "epoch": 0.6424983797796501, "grad_norm": 0.028323756530880928, "learning_rate": 0.00018652954678428373, "loss": 0.3016, "step": 7931 }, { "epoch": 0.6425793907971484, "grad_norm": 0.0322665199637413, "learning_rate": 0.00018652504613168912, "loss": 0.3503, "step": 7932 }, { "epoch": 0.6426604018146468, "grad_norm": 0.02915794961154461, "learning_rate": 0.00018652054547909448, "loss": 0.305, "step": 7933 }, { "epoch": 0.6427414128321451, "grad_norm": 0.03422217071056366, "learning_rate": 0.00018651604482649984, "loss": 0.3062, "step": 7934 }, { "epoch": 0.6428224238496435, "grad_norm": 0.032783396542072296, "learning_rate": 0.00018651154417390523, "loss": 0.3768, "step": 7935 }, { "epoch": 0.642903434867142, "grad_norm": 0.045881237834692, "learning_rate": 0.00018650704352131061, "loss": 0.3574, "step": 7936 }, { "epoch": 0.6429844458846403, "grad_norm": 0.031124461442232132, "learning_rate": 0.00018650254286871597, "loss": 0.3339, "step": 7937 }, { "epoch": 0.6430654569021387, "grad_norm": 0.03581704944372177, "learning_rate": 0.00018649804221612136, "loss": 0.3366, "step": 7938 }, { "epoch": 0.643146467919637, "grad_norm": 0.03543675318360329, "learning_rate": 0.00018649354156352672, "loss": 0.412, "step": 7939 }, { "epoch": 0.6432274789371355, "grad_norm": 0.0327875129878521, "learning_rate": 0.00018648904091093208, "loss": 0.3712, "step": 7940 }, { "epoch": 0.6433084899546339, "grad_norm": 0.033418960869312286, "learning_rate": 0.00018648454025833747, "loss": 0.3482, "step": 7941 }, { "epoch": 0.6433895009721322, "grad_norm": 0.029584798961877823, "learning_rate": 0.00018648003960574286, "loss": 0.3146, "step": 7942 }, { "epoch": 0.6434705119896306, "grad_norm": 0.033126283437013626, "learning_rate": 0.00018647553895314822, "loss": 0.3722, "step": 7943 }, { "epoch": 0.6435515230071289, "grad_norm": 0.03137566149234772, "learning_rate": 0.0001864710383005536, "loss": 0.3713, "step": 7944 }, { "epoch": 0.6436325340246274, "grad_norm": 0.03218172490596771, "learning_rate": 0.00018646653764795896, "loss": 0.3373, "step": 7945 }, { "epoch": 0.6437135450421257, "grad_norm": 0.03332442045211792, "learning_rate": 0.00018646203699536432, "loss": 0.3969, "step": 7946 }, { "epoch": 0.6437945560596241, "grad_norm": 0.029476812109351158, "learning_rate": 0.0001864575363427697, "loss": 0.3237, "step": 7947 }, { "epoch": 0.6438755670771225, "grad_norm": 0.029915448278188705, "learning_rate": 0.0001864530356901751, "loss": 0.3233, "step": 7948 }, { "epoch": 0.6439565780946209, "grad_norm": 0.028523625805974007, "learning_rate": 0.00018644853503758046, "loss": 0.3583, "step": 7949 }, { "epoch": 0.6440375891121193, "grad_norm": 0.03365233168005943, "learning_rate": 0.00018644403438498584, "loss": 0.3135, "step": 7950 }, { "epoch": 0.6441186001296176, "grad_norm": 0.029672134667634964, "learning_rate": 0.0001864395337323912, "loss": 0.3326, "step": 7951 }, { "epoch": 0.644199611147116, "grad_norm": 0.03292734920978546, "learning_rate": 0.00018643503307979656, "loss": 0.3933, "step": 7952 }, { "epoch": 0.6442806221646143, "grad_norm": 0.02732773683965206, "learning_rate": 0.00018643053242720195, "loss": 0.3157, "step": 7953 }, { "epoch": 0.6443616331821128, "grad_norm": 0.02848062478005886, "learning_rate": 0.00018642603177460734, "loss": 0.3228, "step": 7954 }, { "epoch": 0.6444426441996112, "grad_norm": 0.03194410353899002, "learning_rate": 0.0001864215311220127, "loss": 0.3175, "step": 7955 }, { "epoch": 0.6445236552171095, "grad_norm": 0.031859688460826874, "learning_rate": 0.0001864170304694181, "loss": 0.3438, "step": 7956 }, { "epoch": 0.6446046662346079, "grad_norm": 0.03289076313376427, "learning_rate": 0.00018641252981682345, "loss": 0.3058, "step": 7957 }, { "epoch": 0.6446856772521062, "grad_norm": 0.02861196920275688, "learning_rate": 0.0001864080291642288, "loss": 0.3345, "step": 7958 }, { "epoch": 0.6447666882696047, "grad_norm": 0.03252119943499565, "learning_rate": 0.0001864035285116342, "loss": 0.3565, "step": 7959 }, { "epoch": 0.6448476992871031, "grad_norm": 0.03106110729277134, "learning_rate": 0.00018639902785903958, "loss": 0.3154, "step": 7960 }, { "epoch": 0.6449287103046014, "grad_norm": 0.032401468604803085, "learning_rate": 0.00018639452720644494, "loss": 0.3781, "step": 7961 }, { "epoch": 0.6450097213220998, "grad_norm": 0.03558249771595001, "learning_rate": 0.00018639002655385033, "loss": 0.3246, "step": 7962 }, { "epoch": 0.6450907323395982, "grad_norm": 0.02981211617588997, "learning_rate": 0.0001863855259012557, "loss": 0.3168, "step": 7963 }, { "epoch": 0.6451717433570966, "grad_norm": 0.031262945383787155, "learning_rate": 0.00018638102524866105, "loss": 0.3403, "step": 7964 }, { "epoch": 0.645252754374595, "grad_norm": 0.036666229367256165, "learning_rate": 0.00018637652459606644, "loss": 0.3721, "step": 7965 }, { "epoch": 0.6453337653920933, "grad_norm": 0.03510688245296478, "learning_rate": 0.00018637202394347182, "loss": 0.3607, "step": 7966 }, { "epoch": 0.6454147764095917, "grad_norm": 0.032817598432302475, "learning_rate": 0.00018636752329087718, "loss": 0.3372, "step": 7967 }, { "epoch": 0.6454957874270901, "grad_norm": 0.02903442457318306, "learning_rate": 0.00018636302263828257, "loss": 0.3263, "step": 7968 }, { "epoch": 0.6455767984445885, "grad_norm": 0.03062974475324154, "learning_rate": 0.00018635852198568793, "loss": 0.3696, "step": 7969 }, { "epoch": 0.6456578094620868, "grad_norm": 0.032218314707279205, "learning_rate": 0.0001863540213330933, "loss": 0.3409, "step": 7970 }, { "epoch": 0.6457388204795852, "grad_norm": 0.031189072877168655, "learning_rate": 0.00018634952068049868, "loss": 0.3022, "step": 7971 }, { "epoch": 0.6458198314970836, "grad_norm": 0.02876521833240986, "learning_rate": 0.00018634502002790406, "loss": 0.3585, "step": 7972 }, { "epoch": 0.645900842514582, "grad_norm": 0.029905641451478004, "learning_rate": 0.00018634051937530942, "loss": 0.3379, "step": 7973 }, { "epoch": 0.6459818535320804, "grad_norm": 0.038259658962488174, "learning_rate": 0.0001863360187227148, "loss": 0.4316, "step": 7974 }, { "epoch": 0.6460628645495787, "grad_norm": 0.03205183893442154, "learning_rate": 0.00018633151807012017, "loss": 0.3666, "step": 7975 }, { "epoch": 0.6461438755670771, "grad_norm": 0.031414229422807693, "learning_rate": 0.00018632701741752553, "loss": 0.3861, "step": 7976 }, { "epoch": 0.6462248865845756, "grad_norm": 0.03359837830066681, "learning_rate": 0.00018632251676493095, "loss": 0.4069, "step": 7977 }, { "epoch": 0.6463058976020739, "grad_norm": 0.03183357045054436, "learning_rate": 0.0001863180161123363, "loss": 0.3205, "step": 7978 }, { "epoch": 0.6463869086195723, "grad_norm": 0.03323815017938614, "learning_rate": 0.00018631351545974167, "loss": 0.3296, "step": 7979 }, { "epoch": 0.6464679196370706, "grad_norm": 0.0322708860039711, "learning_rate": 0.00018630901480714705, "loss": 0.4201, "step": 7980 }, { "epoch": 0.646548930654569, "grad_norm": 0.025975298136472702, "learning_rate": 0.0001863045141545524, "loss": 0.2796, "step": 7981 }, { "epoch": 0.6466299416720674, "grad_norm": 0.03103083372116089, "learning_rate": 0.00018630001350195777, "loss": 0.3357, "step": 7982 }, { "epoch": 0.6467109526895658, "grad_norm": 0.03107629343867302, "learning_rate": 0.0001862955128493632, "loss": 0.3631, "step": 7983 }, { "epoch": 0.6467919637070642, "grad_norm": 0.028790367767214775, "learning_rate": 0.00018629101219676855, "loss": 0.3155, "step": 7984 }, { "epoch": 0.6468729747245625, "grad_norm": 0.029491981491446495, "learning_rate": 0.0001862865115441739, "loss": 0.3385, "step": 7985 }, { "epoch": 0.6469539857420609, "grad_norm": 0.03248732164502144, "learning_rate": 0.0001862820108915793, "loss": 0.3698, "step": 7986 }, { "epoch": 0.6470349967595593, "grad_norm": 0.030972251668572426, "learning_rate": 0.00018627751023898465, "loss": 0.3182, "step": 7987 }, { "epoch": 0.6471160077770577, "grad_norm": 0.029722899198532104, "learning_rate": 0.00018627300958639004, "loss": 0.2785, "step": 7988 }, { "epoch": 0.647197018794556, "grad_norm": 0.03773869574069977, "learning_rate": 0.00018626850893379543, "loss": 0.3493, "step": 7989 }, { "epoch": 0.6472780298120544, "grad_norm": 0.031442657113075256, "learning_rate": 0.0001862640082812008, "loss": 0.3414, "step": 7990 }, { "epoch": 0.6473590408295529, "grad_norm": 0.030141377821564674, "learning_rate": 0.00018625950762860615, "loss": 0.2964, "step": 7991 }, { "epoch": 0.6474400518470512, "grad_norm": 0.03201432153582573, "learning_rate": 0.00018625500697601154, "loss": 0.355, "step": 7992 }, { "epoch": 0.6475210628645496, "grad_norm": 0.027599439024925232, "learning_rate": 0.0001862505063234169, "loss": 0.2955, "step": 7993 }, { "epoch": 0.6476020738820479, "grad_norm": 0.035235144197940826, "learning_rate": 0.00018624600567082228, "loss": 0.343, "step": 7994 }, { "epoch": 0.6476830848995463, "grad_norm": 0.03464788198471069, "learning_rate": 0.00018624150501822767, "loss": 0.3726, "step": 7995 }, { "epoch": 0.6477640959170448, "grad_norm": 0.03147656470537186, "learning_rate": 0.00018623700436563303, "loss": 0.3669, "step": 7996 }, { "epoch": 0.6478451069345431, "grad_norm": 0.030429191887378693, "learning_rate": 0.0001862325037130384, "loss": 0.3533, "step": 7997 }, { "epoch": 0.6479261179520415, "grad_norm": 0.03295878693461418, "learning_rate": 0.00018622800306044378, "loss": 0.298, "step": 7998 }, { "epoch": 0.6480071289695398, "grad_norm": 0.03022496961057186, "learning_rate": 0.00018622350240784914, "loss": 0.3597, "step": 7999 }, { "epoch": 0.6480881399870383, "grad_norm": 0.03181470185518265, "learning_rate": 0.00018621900175525453, "loss": 0.3484, "step": 8000 }, { "epoch": 0.6481691510045366, "grad_norm": 0.040238749235868454, "learning_rate": 0.0001862145011026599, "loss": 0.2987, "step": 8001 }, { "epoch": 0.648250162022035, "grad_norm": 0.027749789878726006, "learning_rate": 0.00018621000045006527, "loss": 0.3122, "step": 8002 }, { "epoch": 0.6483311730395334, "grad_norm": 0.028344471007585526, "learning_rate": 0.00018620549979747063, "loss": 0.2646, "step": 8003 }, { "epoch": 0.6484121840570317, "grad_norm": 0.028361354023218155, "learning_rate": 0.00018620099914487602, "loss": 0.3586, "step": 8004 }, { "epoch": 0.6484931950745302, "grad_norm": 0.030784590169787407, "learning_rate": 0.00018619649849228138, "loss": 0.336, "step": 8005 }, { "epoch": 0.6485742060920285, "grad_norm": 0.027588283643126488, "learning_rate": 0.00018619199783968677, "loss": 0.297, "step": 8006 }, { "epoch": 0.6486552171095269, "grad_norm": 0.03066411241889, "learning_rate": 0.00018618749718709215, "loss": 0.3539, "step": 8007 }, { "epoch": 0.6487362281270252, "grad_norm": 0.033937420696020126, "learning_rate": 0.00018618299653449751, "loss": 0.3963, "step": 8008 }, { "epoch": 0.6488172391445236, "grad_norm": 0.03501052036881447, "learning_rate": 0.00018617849588190287, "loss": 0.3579, "step": 8009 }, { "epoch": 0.6488982501620221, "grad_norm": 0.03133450448513031, "learning_rate": 0.00018617399522930826, "loss": 0.3442, "step": 8010 }, { "epoch": 0.6489792611795204, "grad_norm": 0.03345509245991707, "learning_rate": 0.00018616949457671365, "loss": 0.3604, "step": 8011 }, { "epoch": 0.6490602721970188, "grad_norm": 0.033844854682683945, "learning_rate": 0.000186164993924119, "loss": 0.3981, "step": 8012 }, { "epoch": 0.6491412832145171, "grad_norm": 0.03028370812535286, "learning_rate": 0.0001861604932715244, "loss": 0.3166, "step": 8013 }, { "epoch": 0.6492222942320156, "grad_norm": 0.026143895462155342, "learning_rate": 0.00018615599261892976, "loss": 0.3192, "step": 8014 }, { "epoch": 0.649303305249514, "grad_norm": 0.034095458686351776, "learning_rate": 0.00018615149196633512, "loss": 0.4142, "step": 8015 }, { "epoch": 0.6493843162670123, "grad_norm": 0.03332599624991417, "learning_rate": 0.0001861469913137405, "loss": 0.3668, "step": 8016 }, { "epoch": 0.6494653272845107, "grad_norm": 0.03390030190348625, "learning_rate": 0.0001861424906611459, "loss": 0.3198, "step": 8017 }, { "epoch": 0.649546338302009, "grad_norm": 0.029879910871386528, "learning_rate": 0.00018613799000855125, "loss": 0.3186, "step": 8018 }, { "epoch": 0.6496273493195075, "grad_norm": 0.03314916417002678, "learning_rate": 0.00018613348935595664, "loss": 0.3611, "step": 8019 }, { "epoch": 0.6497083603370059, "grad_norm": 0.03120088204741478, "learning_rate": 0.000186128988703362, "loss": 0.3322, "step": 8020 }, { "epoch": 0.6497893713545042, "grad_norm": 0.029262244701385498, "learning_rate": 0.00018612448805076736, "loss": 0.3151, "step": 8021 }, { "epoch": 0.6498703823720026, "grad_norm": 0.03213749825954437, "learning_rate": 0.00018611998739817274, "loss": 0.3447, "step": 8022 }, { "epoch": 0.6499513933895009, "grad_norm": 0.030877763405442238, "learning_rate": 0.00018611548674557813, "loss": 0.3584, "step": 8023 }, { "epoch": 0.6500324044069994, "grad_norm": 0.031971901655197144, "learning_rate": 0.0001861109860929835, "loss": 0.3481, "step": 8024 }, { "epoch": 0.6501134154244977, "grad_norm": 0.03156515210866928, "learning_rate": 0.00018610648544038888, "loss": 0.3788, "step": 8025 }, { "epoch": 0.6501944264419961, "grad_norm": 0.03007367067039013, "learning_rate": 0.00018610198478779424, "loss": 0.3403, "step": 8026 }, { "epoch": 0.6502754374594945, "grad_norm": 0.0311642587184906, "learning_rate": 0.0001860974841351996, "loss": 0.3279, "step": 8027 }, { "epoch": 0.6503564484769929, "grad_norm": 0.03929786756634712, "learning_rate": 0.00018609298348260499, "loss": 0.3465, "step": 8028 }, { "epoch": 0.6504374594944913, "grad_norm": 0.02747124247252941, "learning_rate": 0.00018608848283001037, "loss": 0.2891, "step": 8029 }, { "epoch": 0.6505184705119896, "grad_norm": 0.03623297065496445, "learning_rate": 0.00018608398217741573, "loss": 0.418, "step": 8030 }, { "epoch": 0.650599481529488, "grad_norm": 0.03151100128889084, "learning_rate": 0.00018607948152482112, "loss": 0.3487, "step": 8031 }, { "epoch": 0.6506804925469863, "grad_norm": 0.028096545487642288, "learning_rate": 0.00018607498087222648, "loss": 0.3142, "step": 8032 }, { "epoch": 0.6507615035644848, "grad_norm": 0.03158478066325188, "learning_rate": 0.00018607048021963184, "loss": 0.3316, "step": 8033 }, { "epoch": 0.6508425145819832, "grad_norm": 0.029952632263302803, "learning_rate": 0.00018606597956703723, "loss": 0.339, "step": 8034 }, { "epoch": 0.6509235255994815, "grad_norm": 0.03018800914287567, "learning_rate": 0.00018606147891444261, "loss": 0.3186, "step": 8035 }, { "epoch": 0.6510045366169799, "grad_norm": 0.028747960925102234, "learning_rate": 0.00018605697826184797, "loss": 0.3322, "step": 8036 }, { "epoch": 0.6510855476344782, "grad_norm": 0.030452804639935493, "learning_rate": 0.00018605247760925336, "loss": 0.3833, "step": 8037 }, { "epoch": 0.6511665586519767, "grad_norm": 0.030978774651885033, "learning_rate": 0.00018604797695665872, "loss": 0.3378, "step": 8038 }, { "epoch": 0.6512475696694751, "grad_norm": 0.030736364424228668, "learning_rate": 0.00018604347630406408, "loss": 0.3492, "step": 8039 }, { "epoch": 0.6513285806869734, "grad_norm": 0.03373675048351288, "learning_rate": 0.00018603897565146947, "loss": 0.3894, "step": 8040 }, { "epoch": 0.6514095917044718, "grad_norm": 0.03460337966680527, "learning_rate": 0.00018603447499887486, "loss": 0.3471, "step": 8041 }, { "epoch": 0.6514906027219702, "grad_norm": 0.031132934615015984, "learning_rate": 0.00018602997434628022, "loss": 0.3235, "step": 8042 }, { "epoch": 0.6515716137394686, "grad_norm": 0.03173597529530525, "learning_rate": 0.0001860254736936856, "loss": 0.3369, "step": 8043 }, { "epoch": 0.651652624756967, "grad_norm": 0.030315032228827477, "learning_rate": 0.00018602097304109096, "loss": 0.3247, "step": 8044 }, { "epoch": 0.6517336357744653, "grad_norm": 0.034832924604415894, "learning_rate": 0.00018601647238849632, "loss": 0.3624, "step": 8045 }, { "epoch": 0.6518146467919637, "grad_norm": 0.031159505248069763, "learning_rate": 0.00018601197173590174, "loss": 0.3391, "step": 8046 }, { "epoch": 0.6518956578094621, "grad_norm": 0.03149305656552315, "learning_rate": 0.0001860074710833071, "loss": 0.3688, "step": 8047 }, { "epoch": 0.6519766688269605, "grad_norm": 0.03061858005821705, "learning_rate": 0.00018600297043071246, "loss": 0.3645, "step": 8048 }, { "epoch": 0.6520576798444588, "grad_norm": 0.03019680269062519, "learning_rate": 0.00018599846977811785, "loss": 0.3284, "step": 8049 }, { "epoch": 0.6521386908619572, "grad_norm": 0.029862701892852783, "learning_rate": 0.0001859939691255232, "loss": 0.3286, "step": 8050 }, { "epoch": 0.6522197018794557, "grad_norm": 0.03851095587015152, "learning_rate": 0.00018598946847292857, "loss": 0.3709, "step": 8051 }, { "epoch": 0.652300712896954, "grad_norm": 0.034326620399951935, "learning_rate": 0.00018598496782033398, "loss": 0.3327, "step": 8052 }, { "epoch": 0.6523817239144524, "grad_norm": 0.02795341983437538, "learning_rate": 0.00018598046716773934, "loss": 0.3188, "step": 8053 }, { "epoch": 0.6524627349319507, "grad_norm": 0.03119768388569355, "learning_rate": 0.0001859759665151447, "loss": 0.3795, "step": 8054 }, { "epoch": 0.6525437459494491, "grad_norm": 0.031861960887908936, "learning_rate": 0.0001859714658625501, "loss": 0.3837, "step": 8055 }, { "epoch": 0.6526247569669476, "grad_norm": 0.028594138100743294, "learning_rate": 0.00018596696520995545, "loss": 0.3313, "step": 8056 }, { "epoch": 0.6527057679844459, "grad_norm": 0.033589012920856476, "learning_rate": 0.0001859624645573608, "loss": 0.354, "step": 8057 }, { "epoch": 0.6527867790019443, "grad_norm": 0.03334236517548561, "learning_rate": 0.00018595796390476622, "loss": 0.3248, "step": 8058 }, { "epoch": 0.6528677900194426, "grad_norm": 0.03135516494512558, "learning_rate": 0.00018595346325217158, "loss": 0.3385, "step": 8059 }, { "epoch": 0.652948801036941, "grad_norm": 0.03384825587272644, "learning_rate": 0.00018594896259957694, "loss": 0.3494, "step": 8060 }, { "epoch": 0.6530298120544394, "grad_norm": 0.02878384292125702, "learning_rate": 0.00018594446194698233, "loss": 0.3105, "step": 8061 }, { "epoch": 0.6531108230719378, "grad_norm": 0.03048752248287201, "learning_rate": 0.0001859399612943877, "loss": 0.3408, "step": 8062 }, { "epoch": 0.6531918340894362, "grad_norm": 0.03223764896392822, "learning_rate": 0.00018593546064179308, "loss": 0.3665, "step": 8063 }, { "epoch": 0.6532728451069345, "grad_norm": 0.03150539472699165, "learning_rate": 0.00018593095998919846, "loss": 0.3255, "step": 8064 }, { "epoch": 0.653353856124433, "grad_norm": 0.03526121377944946, "learning_rate": 0.00018592645933660382, "loss": 0.4035, "step": 8065 }, { "epoch": 0.6534348671419313, "grad_norm": 0.028970615938305855, "learning_rate": 0.00018592195868400918, "loss": 0.365, "step": 8066 }, { "epoch": 0.6535158781594297, "grad_norm": 0.034498874098062515, "learning_rate": 0.00018591745803141457, "loss": 0.3666, "step": 8067 }, { "epoch": 0.653596889176928, "grad_norm": 0.030491981655359268, "learning_rate": 0.00018591295737881993, "loss": 0.3711, "step": 8068 }, { "epoch": 0.6536779001944264, "grad_norm": 0.029132137075066566, "learning_rate": 0.00018590845672622532, "loss": 0.303, "step": 8069 }, { "epoch": 0.6537589112119249, "grad_norm": 0.02738095261156559, "learning_rate": 0.0001859039560736307, "loss": 0.3228, "step": 8070 }, { "epoch": 0.6538399222294232, "grad_norm": 0.03282042592763901, "learning_rate": 0.00018589945542103606, "loss": 0.3893, "step": 8071 }, { "epoch": 0.6539209332469216, "grad_norm": 0.03241070732474327, "learning_rate": 0.00018589495476844142, "loss": 0.3724, "step": 8072 }, { "epoch": 0.6540019442644199, "grad_norm": 0.032008569687604904, "learning_rate": 0.0001858904541158468, "loss": 0.3184, "step": 8073 }, { "epoch": 0.6540829552819183, "grad_norm": 0.03105410560965538, "learning_rate": 0.00018588595346325217, "loss": 0.354, "step": 8074 }, { "epoch": 0.6541639662994168, "grad_norm": 0.028940342366695404, "learning_rate": 0.00018588145281065756, "loss": 0.3252, "step": 8075 }, { "epoch": 0.6542449773169151, "grad_norm": 0.03389149159193039, "learning_rate": 0.00018587695215806295, "loss": 0.3761, "step": 8076 }, { "epoch": 0.6543259883344135, "grad_norm": 0.028859373182058334, "learning_rate": 0.0001858724515054683, "loss": 0.3609, "step": 8077 }, { "epoch": 0.6544069993519118, "grad_norm": 0.03037380985915661, "learning_rate": 0.00018586795085287367, "loss": 0.3317, "step": 8078 }, { "epoch": 0.6544880103694103, "grad_norm": 0.03146057575941086, "learning_rate": 0.00018586345020027905, "loss": 0.3699, "step": 8079 }, { "epoch": 0.6545690213869086, "grad_norm": 0.031758084893226624, "learning_rate": 0.0001858589495476844, "loss": 0.3537, "step": 8080 }, { "epoch": 0.654650032404407, "grad_norm": 0.03247644007205963, "learning_rate": 0.0001858544488950898, "loss": 0.3575, "step": 8081 }, { "epoch": 0.6547310434219054, "grad_norm": 0.03290760517120361, "learning_rate": 0.0001858499482424952, "loss": 0.3372, "step": 8082 }, { "epoch": 0.6548120544394037, "grad_norm": 0.029254132881760597, "learning_rate": 0.00018584544758990055, "loss": 0.3084, "step": 8083 }, { "epoch": 0.6548930654569022, "grad_norm": 0.03385884314775467, "learning_rate": 0.0001858409469373059, "loss": 0.3641, "step": 8084 }, { "epoch": 0.6549740764744005, "grad_norm": 0.032351624220609665, "learning_rate": 0.0001858364462847113, "loss": 0.3702, "step": 8085 }, { "epoch": 0.6550550874918989, "grad_norm": 0.03416949883103371, "learning_rate": 0.00018583194563211665, "loss": 0.3482, "step": 8086 }, { "epoch": 0.6551360985093972, "grad_norm": 0.03164816275238991, "learning_rate": 0.00018582744497952204, "loss": 0.3747, "step": 8087 }, { "epoch": 0.6552171095268956, "grad_norm": 0.029934266582131386, "learning_rate": 0.00018582294432692743, "loss": 0.3367, "step": 8088 }, { "epoch": 0.6552981205443941, "grad_norm": 0.03290180861949921, "learning_rate": 0.0001858184436743328, "loss": 0.3688, "step": 8089 }, { "epoch": 0.6553791315618924, "grad_norm": 0.032323382794857025, "learning_rate": 0.00018581394302173815, "loss": 0.3525, "step": 8090 }, { "epoch": 0.6554601425793908, "grad_norm": 0.033882077783346176, "learning_rate": 0.00018580944236914354, "loss": 0.3368, "step": 8091 }, { "epoch": 0.6555411535968891, "grad_norm": 0.02703363448381424, "learning_rate": 0.00018580494171654892, "loss": 0.3239, "step": 8092 }, { "epoch": 0.6556221646143876, "grad_norm": 0.03567137196660042, "learning_rate": 0.00018580044106395428, "loss": 0.349, "step": 8093 }, { "epoch": 0.655703175631886, "grad_norm": 0.03596005216240883, "learning_rate": 0.00018579594041135967, "loss": 0.3806, "step": 8094 }, { "epoch": 0.6557841866493843, "grad_norm": 0.02762463130056858, "learning_rate": 0.00018579143975876503, "loss": 0.3162, "step": 8095 }, { "epoch": 0.6558651976668827, "grad_norm": 0.02889867126941681, "learning_rate": 0.0001857869391061704, "loss": 0.3202, "step": 8096 }, { "epoch": 0.655946208684381, "grad_norm": 0.03050253912806511, "learning_rate": 0.00018578243845357578, "loss": 0.3443, "step": 8097 }, { "epoch": 0.6560272197018795, "grad_norm": 0.03690246492624283, "learning_rate": 0.00018577793780098117, "loss": 0.3787, "step": 8098 }, { "epoch": 0.6561082307193778, "grad_norm": 0.03827010095119476, "learning_rate": 0.00018577343714838653, "loss": 0.346, "step": 8099 }, { "epoch": 0.6561892417368762, "grad_norm": 0.031673092395067215, "learning_rate": 0.0001857689364957919, "loss": 0.3205, "step": 8100 }, { "epoch": 0.6562702527543746, "grad_norm": 0.027814095839858055, "learning_rate": 0.00018576443584319727, "loss": 0.3237, "step": 8101 }, { "epoch": 0.656351263771873, "grad_norm": 0.035531893372535706, "learning_rate": 0.00018575993519060263, "loss": 0.3353, "step": 8102 }, { "epoch": 0.6564322747893714, "grad_norm": 0.029679805040359497, "learning_rate": 0.00018575543453800802, "loss": 0.2917, "step": 8103 }, { "epoch": 0.6565132858068697, "grad_norm": 0.035042133182287216, "learning_rate": 0.0001857509338854134, "loss": 0.3663, "step": 8104 }, { "epoch": 0.6565942968243681, "grad_norm": 0.03472837433218956, "learning_rate": 0.00018574643323281877, "loss": 0.3746, "step": 8105 }, { "epoch": 0.6566753078418665, "grad_norm": 0.033048685640096664, "learning_rate": 0.00018574193258022415, "loss": 0.3643, "step": 8106 }, { "epoch": 0.6567563188593649, "grad_norm": 0.028987662866711617, "learning_rate": 0.00018573743192762951, "loss": 0.367, "step": 8107 }, { "epoch": 0.6568373298768633, "grad_norm": 0.03321503847837448, "learning_rate": 0.00018573293127503487, "loss": 0.3682, "step": 8108 }, { "epoch": 0.6569183408943616, "grad_norm": 0.02941848896443844, "learning_rate": 0.00018572843062244026, "loss": 0.3408, "step": 8109 }, { "epoch": 0.65699935191186, "grad_norm": 0.03230408951640129, "learning_rate": 0.00018572392996984565, "loss": 0.3347, "step": 8110 }, { "epoch": 0.6570803629293583, "grad_norm": 0.028914272785186768, "learning_rate": 0.000185719429317251, "loss": 0.3017, "step": 8111 }, { "epoch": 0.6571613739468568, "grad_norm": 0.03766142949461937, "learning_rate": 0.0001857149286646564, "loss": 0.3322, "step": 8112 }, { "epoch": 0.6572423849643552, "grad_norm": 0.03311333432793617, "learning_rate": 0.00018571042801206176, "loss": 0.3262, "step": 8113 }, { "epoch": 0.6573233959818535, "grad_norm": 0.02609996497631073, "learning_rate": 0.00018570592735946712, "loss": 0.3018, "step": 8114 }, { "epoch": 0.6574044069993519, "grad_norm": 0.03328759968280792, "learning_rate": 0.0001857014267068725, "loss": 0.3557, "step": 8115 }, { "epoch": 0.6574854180168503, "grad_norm": 0.0315190814435482, "learning_rate": 0.0001856969260542779, "loss": 0.3634, "step": 8116 }, { "epoch": 0.6575664290343487, "grad_norm": 0.03723594918847084, "learning_rate": 0.00018569242540168325, "loss": 0.3691, "step": 8117 }, { "epoch": 0.657647440051847, "grad_norm": 0.03286529704928398, "learning_rate": 0.00018568792474908864, "loss": 0.3265, "step": 8118 }, { "epoch": 0.6577284510693454, "grad_norm": 0.026647770777344704, "learning_rate": 0.000185683424096494, "loss": 0.3176, "step": 8119 }, { "epoch": 0.6578094620868438, "grad_norm": 0.02880423702299595, "learning_rate": 0.00018567892344389936, "loss": 0.3488, "step": 8120 }, { "epoch": 0.6578904731043422, "grad_norm": 0.03130809962749481, "learning_rate": 0.00018567442279130477, "loss": 0.3071, "step": 8121 }, { "epoch": 0.6579714841218406, "grad_norm": 0.03377383574843407, "learning_rate": 0.00018566992213871013, "loss": 0.3177, "step": 8122 }, { "epoch": 0.6580524951393389, "grad_norm": 0.03247836232185364, "learning_rate": 0.0001856654214861155, "loss": 0.31, "step": 8123 }, { "epoch": 0.6581335061568373, "grad_norm": 0.031876444816589355, "learning_rate": 0.00018566092083352088, "loss": 0.3618, "step": 8124 }, { "epoch": 0.6582145171743357, "grad_norm": 0.03695908561348915, "learning_rate": 0.00018565642018092624, "loss": 0.3867, "step": 8125 }, { "epoch": 0.6582955281918341, "grad_norm": 0.032040636986494064, "learning_rate": 0.0001856519195283316, "loss": 0.3652, "step": 8126 }, { "epoch": 0.6583765392093325, "grad_norm": 0.02838469110429287, "learning_rate": 0.000185647418875737, "loss": 0.3076, "step": 8127 }, { "epoch": 0.6584575502268308, "grad_norm": 0.030958110466599464, "learning_rate": 0.00018564291822314237, "loss": 0.3494, "step": 8128 }, { "epoch": 0.6585385612443292, "grad_norm": 0.0332995280623436, "learning_rate": 0.00018563841757054773, "loss": 0.3514, "step": 8129 }, { "epoch": 0.6586195722618277, "grad_norm": 0.03220449760556221, "learning_rate": 0.00018563391691795312, "loss": 0.379, "step": 8130 }, { "epoch": 0.658700583279326, "grad_norm": 0.03334110975265503, "learning_rate": 0.00018562941626535848, "loss": 0.3494, "step": 8131 }, { "epoch": 0.6587815942968244, "grad_norm": 0.0317775122821331, "learning_rate": 0.00018562491561276384, "loss": 0.3264, "step": 8132 }, { "epoch": 0.6588626053143227, "grad_norm": 0.03261169418692589, "learning_rate": 0.00018562041496016925, "loss": 0.3918, "step": 8133 }, { "epoch": 0.6589436163318211, "grad_norm": 0.033578574657440186, "learning_rate": 0.00018561591430757461, "loss": 0.3693, "step": 8134 }, { "epoch": 0.6590246273493195, "grad_norm": 0.02868012897670269, "learning_rate": 0.00018561141365497998, "loss": 0.3405, "step": 8135 }, { "epoch": 0.6591056383668179, "grad_norm": 0.03563758358359337, "learning_rate": 0.00018560691300238536, "loss": 0.3384, "step": 8136 }, { "epoch": 0.6591866493843163, "grad_norm": 0.030504295602440834, "learning_rate": 0.00018560241234979072, "loss": 0.3529, "step": 8137 }, { "epoch": 0.6592676604018146, "grad_norm": 0.030688825994729996, "learning_rate": 0.00018559791169719608, "loss": 0.3393, "step": 8138 }, { "epoch": 0.6593486714193131, "grad_norm": 0.02721015177667141, "learning_rate": 0.0001855934110446015, "loss": 0.3117, "step": 8139 }, { "epoch": 0.6594296824368114, "grad_norm": 0.0309552401304245, "learning_rate": 0.00018558891039200686, "loss": 0.3516, "step": 8140 }, { "epoch": 0.6595106934543098, "grad_norm": 0.03438500314950943, "learning_rate": 0.00018558440973941222, "loss": 0.2714, "step": 8141 }, { "epoch": 0.6595917044718081, "grad_norm": 0.0357445664703846, "learning_rate": 0.0001855799090868176, "loss": 0.3761, "step": 8142 }, { "epoch": 0.6596727154893065, "grad_norm": 0.031231852248311043, "learning_rate": 0.00018557540843422296, "loss": 0.3256, "step": 8143 }, { "epoch": 0.659753726506805, "grad_norm": 0.03136920928955078, "learning_rate": 0.00018557090778162835, "loss": 0.3326, "step": 8144 }, { "epoch": 0.6598347375243033, "grad_norm": 0.030037716031074524, "learning_rate": 0.00018556640712903374, "loss": 0.3229, "step": 8145 }, { "epoch": 0.6599157485418017, "grad_norm": 0.031204765662550926, "learning_rate": 0.0001855619064764391, "loss": 0.3507, "step": 8146 }, { "epoch": 0.6599967595593, "grad_norm": 0.030983639881014824, "learning_rate": 0.00018555740582384446, "loss": 0.3427, "step": 8147 }, { "epoch": 0.6600777705767984, "grad_norm": 0.03538487106561661, "learning_rate": 0.00018555290517124985, "loss": 0.3719, "step": 8148 }, { "epoch": 0.6601587815942969, "grad_norm": 0.036955807358026505, "learning_rate": 0.0001855484045186552, "loss": 0.3721, "step": 8149 }, { "epoch": 0.6602397926117952, "grad_norm": 0.03064347244799137, "learning_rate": 0.0001855439038660606, "loss": 0.3213, "step": 8150 }, { "epoch": 0.6603208036292936, "grad_norm": 0.036659225821495056, "learning_rate": 0.00018553940321346598, "loss": 0.3554, "step": 8151 }, { "epoch": 0.6604018146467919, "grad_norm": 0.0313510037958622, "learning_rate": 0.00018553490256087134, "loss": 0.3794, "step": 8152 }, { "epoch": 0.6604828256642904, "grad_norm": 0.03193637356162071, "learning_rate": 0.0001855304019082767, "loss": 0.3221, "step": 8153 }, { "epoch": 0.6605638366817888, "grad_norm": 0.031372442841529846, "learning_rate": 0.0001855259012556821, "loss": 0.2984, "step": 8154 }, { "epoch": 0.6606448476992871, "grad_norm": 0.034660547971725464, "learning_rate": 0.00018552140060308745, "loss": 0.3699, "step": 8155 }, { "epoch": 0.6607258587167855, "grad_norm": 0.03529341146349907, "learning_rate": 0.00018551689995049283, "loss": 0.3348, "step": 8156 }, { "epoch": 0.6608068697342838, "grad_norm": 0.02971823327243328, "learning_rate": 0.00018551239929789822, "loss": 0.3282, "step": 8157 }, { "epoch": 0.6608878807517823, "grad_norm": 0.03183675929903984, "learning_rate": 0.00018550789864530358, "loss": 0.3902, "step": 8158 }, { "epoch": 0.6609688917692806, "grad_norm": 0.0275438129901886, "learning_rate": 0.00018550339799270894, "loss": 0.3099, "step": 8159 }, { "epoch": 0.661049902786779, "grad_norm": 0.036063600331544876, "learning_rate": 0.00018549889734011433, "loss": 0.4058, "step": 8160 }, { "epoch": 0.6611309138042774, "grad_norm": 0.028650248423218727, "learning_rate": 0.0001854943966875197, "loss": 0.3305, "step": 8161 }, { "epoch": 0.6612119248217757, "grad_norm": 0.03210330381989479, "learning_rate": 0.00018548989603492508, "loss": 0.3486, "step": 8162 }, { "epoch": 0.6612929358392742, "grad_norm": 0.02795662172138691, "learning_rate": 0.00018548539538233046, "loss": 0.3106, "step": 8163 }, { "epoch": 0.6613739468567725, "grad_norm": 0.03249041736125946, "learning_rate": 0.00018548089472973582, "loss": 0.3766, "step": 8164 }, { "epoch": 0.6614549578742709, "grad_norm": 0.0343630425632, "learning_rate": 0.00018547639407714118, "loss": 0.3537, "step": 8165 }, { "epoch": 0.6615359688917692, "grad_norm": 0.030740728601813316, "learning_rate": 0.00018547189342454657, "loss": 0.3559, "step": 8166 }, { "epoch": 0.6616169799092677, "grad_norm": 0.029429970309138298, "learning_rate": 0.00018546739277195193, "loss": 0.3559, "step": 8167 }, { "epoch": 0.6616979909267661, "grad_norm": 0.03364703431725502, "learning_rate": 0.00018546289211935732, "loss": 0.3855, "step": 8168 }, { "epoch": 0.6617790019442644, "grad_norm": 0.028997313231229782, "learning_rate": 0.0001854583914667627, "loss": 0.3238, "step": 8169 }, { "epoch": 0.6618600129617628, "grad_norm": 0.027220774441957474, "learning_rate": 0.00018545389081416806, "loss": 0.2873, "step": 8170 }, { "epoch": 0.6619410239792611, "grad_norm": 0.036889396607875824, "learning_rate": 0.00018544939016157342, "loss": 0.3443, "step": 8171 }, { "epoch": 0.6620220349967596, "grad_norm": 0.03279150277376175, "learning_rate": 0.0001854448895089788, "loss": 0.3325, "step": 8172 }, { "epoch": 0.662103046014258, "grad_norm": 0.029903072863817215, "learning_rate": 0.0001854403888563842, "loss": 0.3398, "step": 8173 }, { "epoch": 0.6621840570317563, "grad_norm": 0.03309585154056549, "learning_rate": 0.00018543588820378956, "loss": 0.3411, "step": 8174 }, { "epoch": 0.6622650680492547, "grad_norm": 0.027920817956328392, "learning_rate": 0.00018543138755119495, "loss": 0.3086, "step": 8175 }, { "epoch": 0.662346079066753, "grad_norm": 0.03215247392654419, "learning_rate": 0.0001854268868986003, "loss": 0.3609, "step": 8176 }, { "epoch": 0.6624270900842515, "grad_norm": 0.032950956374406815, "learning_rate": 0.00018542238624600567, "loss": 0.3389, "step": 8177 }, { "epoch": 0.6625081011017498, "grad_norm": 0.03412731736898422, "learning_rate": 0.00018541788559341105, "loss": 0.3539, "step": 8178 }, { "epoch": 0.6625891121192482, "grad_norm": 0.029734715819358826, "learning_rate": 0.00018541338494081644, "loss": 0.3468, "step": 8179 }, { "epoch": 0.6626701231367466, "grad_norm": 0.0334324948489666, "learning_rate": 0.0001854088842882218, "loss": 0.3548, "step": 8180 }, { "epoch": 0.662751134154245, "grad_norm": 0.029621506109833717, "learning_rate": 0.0001854043836356272, "loss": 0.3672, "step": 8181 }, { "epoch": 0.6628321451717434, "grad_norm": 0.033380456268787384, "learning_rate": 0.00018539988298303255, "loss": 0.3399, "step": 8182 }, { "epoch": 0.6629131561892417, "grad_norm": 0.032133206725120544, "learning_rate": 0.0001853953823304379, "loss": 0.4174, "step": 8183 }, { "epoch": 0.6629941672067401, "grad_norm": 0.03078743815422058, "learning_rate": 0.0001853908816778433, "loss": 0.3531, "step": 8184 }, { "epoch": 0.6630751782242384, "grad_norm": 0.03292972594499588, "learning_rate": 0.00018538638102524868, "loss": 0.3256, "step": 8185 }, { "epoch": 0.6631561892417369, "grad_norm": 0.03613421693444252, "learning_rate": 0.00018538188037265404, "loss": 0.3266, "step": 8186 }, { "epoch": 0.6632372002592353, "grad_norm": 0.030362753197550774, "learning_rate": 0.00018537737972005943, "loss": 0.3285, "step": 8187 }, { "epoch": 0.6633182112767336, "grad_norm": 0.03654602915048599, "learning_rate": 0.0001853728790674648, "loss": 0.3572, "step": 8188 }, { "epoch": 0.663399222294232, "grad_norm": 0.028821973130106926, "learning_rate": 0.00018536837841487015, "loss": 0.3269, "step": 8189 }, { "epoch": 0.6634802333117304, "grad_norm": 0.03146751970052719, "learning_rate": 0.00018536387776227554, "loss": 0.3292, "step": 8190 }, { "epoch": 0.6635612443292288, "grad_norm": 0.026077449321746826, "learning_rate": 0.00018535937710968092, "loss": 0.3013, "step": 8191 }, { "epoch": 0.6636422553467272, "grad_norm": 0.02873549982905388, "learning_rate": 0.00018535487645708628, "loss": 0.3095, "step": 8192 }, { "epoch": 0.6637232663642255, "grad_norm": 0.03518807142972946, "learning_rate": 0.00018535037580449167, "loss": 0.3682, "step": 8193 }, { "epoch": 0.6638042773817239, "grad_norm": 0.02819172665476799, "learning_rate": 0.00018534587515189703, "loss": 0.3173, "step": 8194 }, { "epoch": 0.6638852883992223, "grad_norm": 0.0349283330142498, "learning_rate": 0.0001853413744993024, "loss": 0.3611, "step": 8195 }, { "epoch": 0.6639662994167207, "grad_norm": 0.03043026104569435, "learning_rate": 0.0001853368738467078, "loss": 0.3297, "step": 8196 }, { "epoch": 0.664047310434219, "grad_norm": 0.03040027804672718, "learning_rate": 0.00018533237319411317, "loss": 0.3362, "step": 8197 }, { "epoch": 0.6641283214517174, "grad_norm": 0.030086055397987366, "learning_rate": 0.00018532787254151853, "loss": 0.3285, "step": 8198 }, { "epoch": 0.6642093324692158, "grad_norm": 0.03521738573908806, "learning_rate": 0.0001853233718889239, "loss": 0.3297, "step": 8199 }, { "epoch": 0.6642903434867142, "grad_norm": 0.029127212241292, "learning_rate": 0.00018531887123632927, "loss": 0.2985, "step": 8200 }, { "epoch": 0.6643713545042126, "grad_norm": 0.031439512968063354, "learning_rate": 0.00018531437058373463, "loss": 0.3424, "step": 8201 }, { "epoch": 0.6644523655217109, "grad_norm": 0.028876887634396553, "learning_rate": 0.00018530986993114005, "loss": 0.3458, "step": 8202 }, { "epoch": 0.6645333765392093, "grad_norm": 0.027514170855283737, "learning_rate": 0.0001853053692785454, "loss": 0.3167, "step": 8203 }, { "epoch": 0.6646143875567078, "grad_norm": 0.031886518001556396, "learning_rate": 0.00018530086862595077, "loss": 0.3359, "step": 8204 }, { "epoch": 0.6646953985742061, "grad_norm": 0.032745279371738434, "learning_rate": 0.00018529636797335615, "loss": 0.3414, "step": 8205 }, { "epoch": 0.6647764095917045, "grad_norm": 0.03147970885038376, "learning_rate": 0.00018529186732076151, "loss": 0.3224, "step": 8206 }, { "epoch": 0.6648574206092028, "grad_norm": 0.030254552140831947, "learning_rate": 0.00018528736666816687, "loss": 0.355, "step": 8207 }, { "epoch": 0.6649384316267012, "grad_norm": 0.03358277678489685, "learning_rate": 0.0001852828660155723, "loss": 0.4045, "step": 8208 }, { "epoch": 0.6650194426441997, "grad_norm": 0.028438769280910492, "learning_rate": 0.00018527836536297765, "loss": 0.3173, "step": 8209 }, { "epoch": 0.665100453661698, "grad_norm": 0.03194088488817215, "learning_rate": 0.000185273864710383, "loss": 0.3578, "step": 8210 }, { "epoch": 0.6651814646791964, "grad_norm": 0.034081485122442245, "learning_rate": 0.0001852693640577884, "loss": 0.3131, "step": 8211 }, { "epoch": 0.6652624756966947, "grad_norm": 0.027561400085687637, "learning_rate": 0.00018526486340519376, "loss": 0.3138, "step": 8212 }, { "epoch": 0.6653434867141931, "grad_norm": 0.030994897708296776, "learning_rate": 0.00018526036275259912, "loss": 0.3247, "step": 8213 }, { "epoch": 0.6654244977316915, "grad_norm": 0.031196480616927147, "learning_rate": 0.00018525586210000453, "loss": 0.3538, "step": 8214 }, { "epoch": 0.6655055087491899, "grad_norm": 0.030344417318701744, "learning_rate": 0.0001852513614474099, "loss": 0.334, "step": 8215 }, { "epoch": 0.6655865197666883, "grad_norm": 0.03306497633457184, "learning_rate": 0.00018524686079481525, "loss": 0.3763, "step": 8216 }, { "epoch": 0.6656675307841866, "grad_norm": 0.03008945658802986, "learning_rate": 0.00018524236014222064, "loss": 0.3221, "step": 8217 }, { "epoch": 0.6657485418016851, "grad_norm": 0.03661755844950676, "learning_rate": 0.000185237859489626, "loss": 0.367, "step": 8218 }, { "epoch": 0.6658295528191834, "grad_norm": 0.031068217009305954, "learning_rate": 0.00018523335883703136, "loss": 0.3296, "step": 8219 }, { "epoch": 0.6659105638366818, "grad_norm": 0.032785579562187195, "learning_rate": 0.00018522885818443677, "loss": 0.3532, "step": 8220 }, { "epoch": 0.6659915748541801, "grad_norm": 0.035862911492586136, "learning_rate": 0.00018522435753184213, "loss": 0.3114, "step": 8221 }, { "epoch": 0.6660725858716785, "grad_norm": 0.030160140246152878, "learning_rate": 0.0001852198568792475, "loss": 0.3415, "step": 8222 }, { "epoch": 0.666153596889177, "grad_norm": 0.029396269470453262, "learning_rate": 0.00018521535622665288, "loss": 0.3189, "step": 8223 }, { "epoch": 0.6662346079066753, "grad_norm": 0.02842693403363228, "learning_rate": 0.00018521085557405824, "loss": 0.3364, "step": 8224 }, { "epoch": 0.6663156189241737, "grad_norm": 0.03332171589136124, "learning_rate": 0.00018520635492146363, "loss": 0.3405, "step": 8225 }, { "epoch": 0.666396629941672, "grad_norm": 0.030777165666222572, "learning_rate": 0.00018520185426886901, "loss": 0.3495, "step": 8226 }, { "epoch": 0.6664776409591704, "grad_norm": 0.031115055084228516, "learning_rate": 0.00018519735361627437, "loss": 0.2807, "step": 8227 }, { "epoch": 0.6665586519766689, "grad_norm": 0.029170986264944077, "learning_rate": 0.00018519285296367973, "loss": 0.3295, "step": 8228 }, { "epoch": 0.6666396629941672, "grad_norm": 0.030726497992873192, "learning_rate": 0.00018518835231108512, "loss": 0.3405, "step": 8229 }, { "epoch": 0.6667206740116656, "grad_norm": 0.031001776456832886, "learning_rate": 0.00018518385165849048, "loss": 0.3136, "step": 8230 }, { "epoch": 0.6668016850291639, "grad_norm": 0.03498663753271103, "learning_rate": 0.00018517935100589587, "loss": 0.3649, "step": 8231 }, { "epoch": 0.6668826960466624, "grad_norm": 0.029227035120129585, "learning_rate": 0.00018517485035330126, "loss": 0.3397, "step": 8232 }, { "epoch": 0.6669637070641607, "grad_norm": 0.034446362406015396, "learning_rate": 0.00018517034970070662, "loss": 0.3718, "step": 8233 }, { "epoch": 0.6670447180816591, "grad_norm": 0.03119107149541378, "learning_rate": 0.00018516584904811198, "loss": 0.3188, "step": 8234 }, { "epoch": 0.6671257290991575, "grad_norm": 0.036731645464897156, "learning_rate": 0.00018516134839551736, "loss": 0.3484, "step": 8235 }, { "epoch": 0.6672067401166558, "grad_norm": 0.031873635947704315, "learning_rate": 0.00018515684774292272, "loss": 0.356, "step": 8236 }, { "epoch": 0.6672877511341543, "grad_norm": 0.03385842964053154, "learning_rate": 0.0001851523470903281, "loss": 0.3968, "step": 8237 }, { "epoch": 0.6673687621516526, "grad_norm": 0.03451808542013168, "learning_rate": 0.0001851478464377335, "loss": 0.347, "step": 8238 }, { "epoch": 0.667449773169151, "grad_norm": 0.03075961396098137, "learning_rate": 0.00018514334578513886, "loss": 0.3388, "step": 8239 }, { "epoch": 0.6675307841866494, "grad_norm": 0.028750881552696228, "learning_rate": 0.00018513884513254422, "loss": 0.3216, "step": 8240 }, { "epoch": 0.6676117952041478, "grad_norm": 0.028194140642881393, "learning_rate": 0.0001851343444799496, "loss": 0.3215, "step": 8241 }, { "epoch": 0.6676928062216462, "grad_norm": 0.028071299195289612, "learning_rate": 0.00018512984382735496, "loss": 0.3221, "step": 8242 }, { "epoch": 0.6677738172391445, "grad_norm": 0.03381093963980675, "learning_rate": 0.00018512534317476035, "loss": 0.3777, "step": 8243 }, { "epoch": 0.6678548282566429, "grad_norm": 0.03282523527741432, "learning_rate": 0.00018512084252216574, "loss": 0.3485, "step": 8244 }, { "epoch": 0.6679358392741412, "grad_norm": 0.030734248459339142, "learning_rate": 0.0001851163418695711, "loss": 0.3075, "step": 8245 }, { "epoch": 0.6680168502916397, "grad_norm": 0.031647514551877975, "learning_rate": 0.00018511184121697646, "loss": 0.3644, "step": 8246 }, { "epoch": 0.6680978613091381, "grad_norm": 0.03423544764518738, "learning_rate": 0.00018510734056438185, "loss": 0.3365, "step": 8247 }, { "epoch": 0.6681788723266364, "grad_norm": 0.03142764791846275, "learning_rate": 0.0001851028399117872, "loss": 0.3527, "step": 8248 }, { "epoch": 0.6682598833441348, "grad_norm": 0.03567124903202057, "learning_rate": 0.0001850983392591926, "loss": 0.323, "step": 8249 }, { "epoch": 0.6683408943616331, "grad_norm": 0.033891335129737854, "learning_rate": 0.00018509383860659798, "loss": 0.3747, "step": 8250 }, { "epoch": 0.6684219053791316, "grad_norm": 0.02757412940263748, "learning_rate": 0.00018508933795400334, "loss": 0.3186, "step": 8251 }, { "epoch": 0.66850291639663, "grad_norm": 0.030500739812850952, "learning_rate": 0.0001850848373014087, "loss": 0.3387, "step": 8252 }, { "epoch": 0.6685839274141283, "grad_norm": 0.03229675069451332, "learning_rate": 0.0001850803366488141, "loss": 0.333, "step": 8253 }, { "epoch": 0.6686649384316267, "grad_norm": 0.031556446105241776, "learning_rate": 0.00018507583599621947, "loss": 0.3234, "step": 8254 }, { "epoch": 0.6687459494491251, "grad_norm": 0.03277357295155525, "learning_rate": 0.00018507133534362483, "loss": 0.3282, "step": 8255 }, { "epoch": 0.6688269604666235, "grad_norm": 0.030226023867726326, "learning_rate": 0.00018506683469103022, "loss": 0.3975, "step": 8256 }, { "epoch": 0.6689079714841218, "grad_norm": 0.03333505243062973, "learning_rate": 0.00018506233403843558, "loss": 0.3657, "step": 8257 }, { "epoch": 0.6689889825016202, "grad_norm": 0.030513428151607513, "learning_rate": 0.00018505783338584094, "loss": 0.2682, "step": 8258 }, { "epoch": 0.6690699935191186, "grad_norm": 0.02662210538983345, "learning_rate": 0.00018505333273324633, "loss": 0.3014, "step": 8259 }, { "epoch": 0.669151004536617, "grad_norm": 0.033432211726903915, "learning_rate": 0.00018504883208065172, "loss": 0.3837, "step": 8260 }, { "epoch": 0.6692320155541154, "grad_norm": 0.028870683163404465, "learning_rate": 0.00018504433142805708, "loss": 0.3765, "step": 8261 }, { "epoch": 0.6693130265716137, "grad_norm": 0.03151526302099228, "learning_rate": 0.00018503983077546246, "loss": 0.3777, "step": 8262 }, { "epoch": 0.6693940375891121, "grad_norm": 0.029690612107515335, "learning_rate": 0.00018503533012286782, "loss": 0.3425, "step": 8263 }, { "epoch": 0.6694750486066104, "grad_norm": 0.03496913984417915, "learning_rate": 0.00018503082947027318, "loss": 0.3598, "step": 8264 }, { "epoch": 0.6695560596241089, "grad_norm": 0.03096066601574421, "learning_rate": 0.00018502632881767857, "loss": 0.33, "step": 8265 }, { "epoch": 0.6696370706416073, "grad_norm": 0.032060880213975906, "learning_rate": 0.00018502182816508396, "loss": 0.3653, "step": 8266 }, { "epoch": 0.6697180816591056, "grad_norm": 0.031135201454162598, "learning_rate": 0.00018501732751248932, "loss": 0.3451, "step": 8267 }, { "epoch": 0.669799092676604, "grad_norm": 0.03153124824166298, "learning_rate": 0.0001850128268598947, "loss": 0.3761, "step": 8268 }, { "epoch": 0.6698801036941024, "grad_norm": 0.03391728922724724, "learning_rate": 0.00018500832620730006, "loss": 0.3559, "step": 8269 }, { "epoch": 0.6699611147116008, "grad_norm": 0.033236369490623474, "learning_rate": 0.00018500382555470543, "loss": 0.3162, "step": 8270 }, { "epoch": 0.6700421257290992, "grad_norm": 0.041579004377126694, "learning_rate": 0.0001849993249021108, "loss": 0.4065, "step": 8271 }, { "epoch": 0.6701231367465975, "grad_norm": 0.02709999307990074, "learning_rate": 0.0001849948242495162, "loss": 0.338, "step": 8272 }, { "epoch": 0.6702041477640959, "grad_norm": 0.033023085445165634, "learning_rate": 0.00018499032359692156, "loss": 0.3523, "step": 8273 }, { "epoch": 0.6702851587815943, "grad_norm": 0.026633795350790024, "learning_rate": 0.00018498582294432695, "loss": 0.2917, "step": 8274 }, { "epoch": 0.6703661697990927, "grad_norm": 0.030886471271514893, "learning_rate": 0.0001849813222917323, "loss": 0.3144, "step": 8275 }, { "epoch": 0.670447180816591, "grad_norm": 0.02772926539182663, "learning_rate": 0.00018497682163913767, "loss": 0.311, "step": 8276 }, { "epoch": 0.6705281918340894, "grad_norm": 0.03047368861734867, "learning_rate": 0.00018497232098654308, "loss": 0.3737, "step": 8277 }, { "epoch": 0.6706092028515879, "grad_norm": 0.030798176303505898, "learning_rate": 0.00018496782033394844, "loss": 0.3343, "step": 8278 }, { "epoch": 0.6706902138690862, "grad_norm": 0.03067445196211338, "learning_rate": 0.0001849633196813538, "loss": 0.3801, "step": 8279 }, { "epoch": 0.6707712248865846, "grad_norm": 0.03117203153669834, "learning_rate": 0.0001849588190287592, "loss": 0.3651, "step": 8280 }, { "epoch": 0.6708522359040829, "grad_norm": 0.031838901340961456, "learning_rate": 0.00018495431837616455, "loss": 0.3705, "step": 8281 }, { "epoch": 0.6709332469215813, "grad_norm": 0.031705450266599655, "learning_rate": 0.0001849498177235699, "loss": 0.399, "step": 8282 }, { "epoch": 0.6710142579390798, "grad_norm": 0.030952507629990578, "learning_rate": 0.00018494531707097532, "loss": 0.3718, "step": 8283 }, { "epoch": 0.6710952689565781, "grad_norm": 0.03643830493092537, "learning_rate": 0.00018494081641838068, "loss": 0.3151, "step": 8284 }, { "epoch": 0.6711762799740765, "grad_norm": 0.030212044715881348, "learning_rate": 0.00018493631576578604, "loss": 0.3665, "step": 8285 }, { "epoch": 0.6712572909915748, "grad_norm": 0.0286360252648592, "learning_rate": 0.00018493181511319143, "loss": 0.2955, "step": 8286 }, { "epoch": 0.6713383020090732, "grad_norm": 0.0322282612323761, "learning_rate": 0.0001849273144605968, "loss": 0.3847, "step": 8287 }, { "epoch": 0.6714193130265717, "grad_norm": 0.03221067413687706, "learning_rate": 0.00018492281380800215, "loss": 0.3419, "step": 8288 }, { "epoch": 0.67150032404407, "grad_norm": 0.03293781727552414, "learning_rate": 0.00018491831315540756, "loss": 0.3746, "step": 8289 }, { "epoch": 0.6715813350615684, "grad_norm": 0.03397240489721298, "learning_rate": 0.00018491381250281292, "loss": 0.326, "step": 8290 }, { "epoch": 0.6716623460790667, "grad_norm": 0.027765223756432533, "learning_rate": 0.00018490931185021828, "loss": 0.2949, "step": 8291 }, { "epoch": 0.6717433570965652, "grad_norm": 0.030565418303012848, "learning_rate": 0.00018490481119762367, "loss": 0.3155, "step": 8292 }, { "epoch": 0.6718243681140635, "grad_norm": 0.02972385101020336, "learning_rate": 0.00018490031054502903, "loss": 0.3434, "step": 8293 }, { "epoch": 0.6719053791315619, "grad_norm": 0.03217386454343796, "learning_rate": 0.0001848958098924344, "loss": 0.3643, "step": 8294 }, { "epoch": 0.6719863901490603, "grad_norm": 0.03395373001694679, "learning_rate": 0.0001848913092398398, "loss": 0.3811, "step": 8295 }, { "epoch": 0.6720674011665586, "grad_norm": 0.0331270694732666, "learning_rate": 0.00018488680858724517, "loss": 0.312, "step": 8296 }, { "epoch": 0.6721484121840571, "grad_norm": 0.032217953354120255, "learning_rate": 0.00018488230793465053, "loss": 0.3978, "step": 8297 }, { "epoch": 0.6722294232015554, "grad_norm": 0.03126753121614456, "learning_rate": 0.0001848778072820559, "loss": 0.3478, "step": 8298 }, { "epoch": 0.6723104342190538, "grad_norm": 0.030235685408115387, "learning_rate": 0.00018487330662946127, "loss": 0.3219, "step": 8299 }, { "epoch": 0.6723914452365521, "grad_norm": 0.032127559185028076, "learning_rate": 0.00018486880597686663, "loss": 0.3244, "step": 8300 }, { "epoch": 0.6724724562540505, "grad_norm": 0.029689908027648926, "learning_rate": 0.00018486430532427205, "loss": 0.3231, "step": 8301 }, { "epoch": 0.672553467271549, "grad_norm": 0.032204680144786835, "learning_rate": 0.0001848598046716774, "loss": 0.3704, "step": 8302 }, { "epoch": 0.6726344782890473, "grad_norm": 0.033696506172418594, "learning_rate": 0.00018485530401908277, "loss": 0.3683, "step": 8303 }, { "epoch": 0.6727154893065457, "grad_norm": 0.030479637905955315, "learning_rate": 0.00018485080336648815, "loss": 0.3205, "step": 8304 }, { "epoch": 0.672796500324044, "grad_norm": 0.02611781284213066, "learning_rate": 0.00018484630271389351, "loss": 0.298, "step": 8305 }, { "epoch": 0.6728775113415425, "grad_norm": 0.03304387629032135, "learning_rate": 0.0001848418020612989, "loss": 0.3671, "step": 8306 }, { "epoch": 0.6729585223590409, "grad_norm": 0.030400149524211884, "learning_rate": 0.0001848373014087043, "loss": 0.3655, "step": 8307 }, { "epoch": 0.6730395333765392, "grad_norm": 0.035930197685956955, "learning_rate": 0.00018483280075610965, "loss": 0.3748, "step": 8308 }, { "epoch": 0.6731205443940376, "grad_norm": 0.028552010655403137, "learning_rate": 0.000184828300103515, "loss": 0.3544, "step": 8309 }, { "epoch": 0.6732015554115359, "grad_norm": 0.031653061509132385, "learning_rate": 0.0001848237994509204, "loss": 0.3633, "step": 8310 }, { "epoch": 0.6732825664290344, "grad_norm": 0.029851393774151802, "learning_rate": 0.00018481929879832576, "loss": 0.3013, "step": 8311 }, { "epoch": 0.6733635774465327, "grad_norm": 0.034095648676157, "learning_rate": 0.00018481479814573114, "loss": 0.3643, "step": 8312 }, { "epoch": 0.6734445884640311, "grad_norm": 0.029064346104860306, "learning_rate": 0.00018481029749313653, "loss": 0.3238, "step": 8313 }, { "epoch": 0.6735255994815295, "grad_norm": 0.03601328283548355, "learning_rate": 0.0001848057968405419, "loss": 0.3288, "step": 8314 }, { "epoch": 0.6736066104990278, "grad_norm": 0.033166661858558655, "learning_rate": 0.00018480129618794725, "loss": 0.3303, "step": 8315 }, { "epoch": 0.6736876215165263, "grad_norm": 0.03911403939127922, "learning_rate": 0.00018479679553535264, "loss": 0.3487, "step": 8316 }, { "epoch": 0.6737686325340246, "grad_norm": 0.03566893935203552, "learning_rate": 0.000184792294882758, "loss": 0.3874, "step": 8317 }, { "epoch": 0.673849643551523, "grad_norm": 0.03268427029252052, "learning_rate": 0.00018478779423016339, "loss": 0.3461, "step": 8318 }, { "epoch": 0.6739306545690213, "grad_norm": 0.03220139816403389, "learning_rate": 0.00018478329357756877, "loss": 0.3425, "step": 8319 }, { "epoch": 0.6740116655865198, "grad_norm": 0.033977918326854706, "learning_rate": 0.00018477879292497413, "loss": 0.3653, "step": 8320 }, { "epoch": 0.6740926766040182, "grad_norm": 0.0333586148917675, "learning_rate": 0.0001847742922723795, "loss": 0.3588, "step": 8321 }, { "epoch": 0.6741736876215165, "grad_norm": 0.03474782779812813, "learning_rate": 0.00018476979161978488, "loss": 0.3246, "step": 8322 }, { "epoch": 0.6742546986390149, "grad_norm": 0.03482431918382645, "learning_rate": 0.00018476529096719024, "loss": 0.3209, "step": 8323 }, { "epoch": 0.6743357096565132, "grad_norm": 0.03185975179076195, "learning_rate": 0.00018476079031459563, "loss": 0.347, "step": 8324 }, { "epoch": 0.6744167206740117, "grad_norm": 0.03250424191355705, "learning_rate": 0.00018475628966200101, "loss": 0.3224, "step": 8325 }, { "epoch": 0.6744977316915101, "grad_norm": 0.03220516815781593, "learning_rate": 0.00018475178900940637, "loss": 0.3783, "step": 8326 }, { "epoch": 0.6745787427090084, "grad_norm": 0.03147347643971443, "learning_rate": 0.00018474728835681173, "loss": 0.3235, "step": 8327 }, { "epoch": 0.6746597537265068, "grad_norm": 0.03596566244959831, "learning_rate": 0.00018474278770421712, "loss": 0.3858, "step": 8328 }, { "epoch": 0.6747407647440052, "grad_norm": 0.03046397678554058, "learning_rate": 0.0001847382870516225, "loss": 0.3356, "step": 8329 }, { "epoch": 0.6748217757615036, "grad_norm": 0.030585501343011856, "learning_rate": 0.00018473378639902787, "loss": 0.3516, "step": 8330 }, { "epoch": 0.674902786779002, "grad_norm": 0.03178463876247406, "learning_rate": 0.00018472928574643326, "loss": 0.3332, "step": 8331 }, { "epoch": 0.6749837977965003, "grad_norm": 0.03212091699242592, "learning_rate": 0.00018472478509383862, "loss": 0.3483, "step": 8332 }, { "epoch": 0.6750648088139987, "grad_norm": 0.03113105148077011, "learning_rate": 0.00018472028444124398, "loss": 0.313, "step": 8333 }, { "epoch": 0.6751458198314971, "grad_norm": 0.03049953654408455, "learning_rate": 0.00018471578378864936, "loss": 0.3194, "step": 8334 }, { "epoch": 0.6752268308489955, "grad_norm": 0.03434020280838013, "learning_rate": 0.00018471128313605475, "loss": 0.3494, "step": 8335 }, { "epoch": 0.6753078418664938, "grad_norm": 0.03446441516280174, "learning_rate": 0.0001847067824834601, "loss": 0.3756, "step": 8336 }, { "epoch": 0.6753888528839922, "grad_norm": 0.02943461760878563, "learning_rate": 0.0001847022818308655, "loss": 0.3164, "step": 8337 }, { "epoch": 0.6754698639014906, "grad_norm": 0.03737198933959007, "learning_rate": 0.00018469778117827086, "loss": 0.3463, "step": 8338 }, { "epoch": 0.675550874918989, "grad_norm": 0.026089653372764587, "learning_rate": 0.00018469328052567622, "loss": 0.2937, "step": 8339 }, { "epoch": 0.6756318859364874, "grad_norm": 0.03368382155895233, "learning_rate": 0.0001846887798730816, "loss": 0.3888, "step": 8340 }, { "epoch": 0.6757128969539857, "grad_norm": 0.034756697714328766, "learning_rate": 0.000184684279220487, "loss": 0.3741, "step": 8341 }, { "epoch": 0.6757939079714841, "grad_norm": 0.035592664033174515, "learning_rate": 0.00018467977856789235, "loss": 0.3442, "step": 8342 }, { "epoch": 0.6758749189889826, "grad_norm": 0.028692577034235, "learning_rate": 0.00018467527791529774, "loss": 0.3846, "step": 8343 }, { "epoch": 0.6759559300064809, "grad_norm": 0.03535209223628044, "learning_rate": 0.0001846707772627031, "loss": 0.3569, "step": 8344 }, { "epoch": 0.6760369410239793, "grad_norm": 0.03144112229347229, "learning_rate": 0.00018466627661010846, "loss": 0.3458, "step": 8345 }, { "epoch": 0.6761179520414776, "grad_norm": 0.03163455054163933, "learning_rate": 0.00018466177595751385, "loss": 0.3408, "step": 8346 }, { "epoch": 0.676198963058976, "grad_norm": 0.03496427834033966, "learning_rate": 0.00018465727530491923, "loss": 0.3751, "step": 8347 }, { "epoch": 0.6762799740764744, "grad_norm": 0.029395341873168945, "learning_rate": 0.0001846527746523246, "loss": 0.3297, "step": 8348 }, { "epoch": 0.6763609850939728, "grad_norm": 0.03403710573911667, "learning_rate": 0.00018464827399972998, "loss": 0.3831, "step": 8349 }, { "epoch": 0.6764419961114712, "grad_norm": 0.028789136558771133, "learning_rate": 0.00018464377334713534, "loss": 0.3725, "step": 8350 }, { "epoch": 0.6765230071289695, "grad_norm": 0.03976523131132126, "learning_rate": 0.0001846392726945407, "loss": 0.4335, "step": 8351 }, { "epoch": 0.6766040181464679, "grad_norm": 0.02864331193268299, "learning_rate": 0.0001846347720419461, "loss": 0.3131, "step": 8352 }, { "epoch": 0.6766850291639663, "grad_norm": 0.030334942042827606, "learning_rate": 0.00018463027138935147, "loss": 0.3114, "step": 8353 }, { "epoch": 0.6767660401814647, "grad_norm": 0.03436267375946045, "learning_rate": 0.00018462577073675683, "loss": 0.3356, "step": 8354 }, { "epoch": 0.676847051198963, "grad_norm": 0.032568447291851044, "learning_rate": 0.00018462127008416222, "loss": 0.3685, "step": 8355 }, { "epoch": 0.6769280622164614, "grad_norm": 0.03440425917506218, "learning_rate": 0.00018461676943156758, "loss": 0.3552, "step": 8356 }, { "epoch": 0.6770090732339599, "grad_norm": 0.03378186374902725, "learning_rate": 0.00018461226877897294, "loss": 0.3257, "step": 8357 }, { "epoch": 0.6770900842514582, "grad_norm": 0.03300986438989639, "learning_rate": 0.00018460776812637836, "loss": 0.3587, "step": 8358 }, { "epoch": 0.6771710952689566, "grad_norm": 0.03187122195959091, "learning_rate": 0.00018460326747378372, "loss": 0.3304, "step": 8359 }, { "epoch": 0.6772521062864549, "grad_norm": 0.030382633209228516, "learning_rate": 0.00018459876682118908, "loss": 0.3617, "step": 8360 }, { "epoch": 0.6773331173039533, "grad_norm": 0.030665893107652664, "learning_rate": 0.00018459426616859446, "loss": 0.3531, "step": 8361 }, { "epoch": 0.6774141283214518, "grad_norm": 0.032478105276823044, "learning_rate": 0.00018458976551599982, "loss": 0.3567, "step": 8362 }, { "epoch": 0.6774951393389501, "grad_norm": 0.031920671463012695, "learning_rate": 0.00018458526486340518, "loss": 0.3899, "step": 8363 }, { "epoch": 0.6775761503564485, "grad_norm": 0.0325034074485302, "learning_rate": 0.0001845807642108106, "loss": 0.3315, "step": 8364 }, { "epoch": 0.6776571613739468, "grad_norm": 0.028699731454253197, "learning_rate": 0.00018457626355821596, "loss": 0.2991, "step": 8365 }, { "epoch": 0.6777381723914452, "grad_norm": 0.031557776033878326, "learning_rate": 0.00018457176290562132, "loss": 0.385, "step": 8366 }, { "epoch": 0.6778191834089436, "grad_norm": 0.031747668981552124, "learning_rate": 0.0001845672622530267, "loss": 0.3662, "step": 8367 }, { "epoch": 0.677900194426442, "grad_norm": 0.03107340633869171, "learning_rate": 0.00018456276160043207, "loss": 0.3672, "step": 8368 }, { "epoch": 0.6779812054439404, "grad_norm": 0.029906675219535828, "learning_rate": 0.00018455826094783743, "loss": 0.3334, "step": 8369 }, { "epoch": 0.6780622164614387, "grad_norm": 0.029467299580574036, "learning_rate": 0.00018455376029524284, "loss": 0.3618, "step": 8370 }, { "epoch": 0.6781432274789372, "grad_norm": 0.027794750407338142, "learning_rate": 0.0001845492596426482, "loss": 0.2963, "step": 8371 }, { "epoch": 0.6782242384964355, "grad_norm": 0.029678959399461746, "learning_rate": 0.00018454475899005356, "loss": 0.3364, "step": 8372 }, { "epoch": 0.6783052495139339, "grad_norm": 0.029389753937721252, "learning_rate": 0.00018454025833745895, "loss": 0.3207, "step": 8373 }, { "epoch": 0.6783862605314323, "grad_norm": 0.031182603910565376, "learning_rate": 0.0001845357576848643, "loss": 0.3015, "step": 8374 }, { "epoch": 0.6784672715489306, "grad_norm": 0.0318613238632679, "learning_rate": 0.00018453125703226967, "loss": 0.3365, "step": 8375 }, { "epoch": 0.6785482825664291, "grad_norm": 0.03271263837814331, "learning_rate": 0.00018452675637967508, "loss": 0.382, "step": 8376 }, { "epoch": 0.6786292935839274, "grad_norm": 0.03947818651795387, "learning_rate": 0.00018452225572708044, "loss": 0.3519, "step": 8377 }, { "epoch": 0.6787103046014258, "grad_norm": 0.029119271785020828, "learning_rate": 0.0001845177550744858, "loss": 0.3464, "step": 8378 }, { "epoch": 0.6787913156189241, "grad_norm": 0.026837065815925598, "learning_rate": 0.0001845132544218912, "loss": 0.3269, "step": 8379 }, { "epoch": 0.6788723266364226, "grad_norm": 0.03194974735379219, "learning_rate": 0.00018450875376929655, "loss": 0.3412, "step": 8380 }, { "epoch": 0.678953337653921, "grad_norm": 0.031717248260974884, "learning_rate": 0.00018450425311670194, "loss": 0.3267, "step": 8381 }, { "epoch": 0.6790343486714193, "grad_norm": 0.03393028676509857, "learning_rate": 0.00018449975246410732, "loss": 0.3378, "step": 8382 }, { "epoch": 0.6791153596889177, "grad_norm": 0.02875995635986328, "learning_rate": 0.00018449525181151268, "loss": 0.2964, "step": 8383 }, { "epoch": 0.679196370706416, "grad_norm": 0.03428420424461365, "learning_rate": 0.00018449075115891804, "loss": 0.345, "step": 8384 }, { "epoch": 0.6792773817239145, "grad_norm": 0.033055152744054794, "learning_rate": 0.00018448625050632343, "loss": 0.334, "step": 8385 }, { "epoch": 0.6793583927414129, "grad_norm": 0.02540542371571064, "learning_rate": 0.0001844817498537288, "loss": 0.3016, "step": 8386 }, { "epoch": 0.6794394037589112, "grad_norm": 0.027096103876829147, "learning_rate": 0.00018447724920113418, "loss": 0.3058, "step": 8387 }, { "epoch": 0.6795204147764096, "grad_norm": 0.033959295600652695, "learning_rate": 0.00018447274854853956, "loss": 0.3308, "step": 8388 }, { "epoch": 0.6796014257939079, "grad_norm": 0.037247296422719955, "learning_rate": 0.00018446824789594492, "loss": 0.4068, "step": 8389 }, { "epoch": 0.6796824368114064, "grad_norm": 0.029535872861742973, "learning_rate": 0.00018446374724335028, "loss": 0.3042, "step": 8390 }, { "epoch": 0.6797634478289047, "grad_norm": 0.030849486589431763, "learning_rate": 0.00018445924659075567, "loss": 0.3305, "step": 8391 }, { "epoch": 0.6798444588464031, "grad_norm": 0.036509912461042404, "learning_rate": 0.00018445474593816103, "loss": 0.3492, "step": 8392 }, { "epoch": 0.6799254698639015, "grad_norm": 0.036684028804302216, "learning_rate": 0.00018445024528556642, "loss": 0.4093, "step": 8393 }, { "epoch": 0.6800064808813999, "grad_norm": 0.029528088867664337, "learning_rate": 0.0001844457446329718, "loss": 0.3526, "step": 8394 }, { "epoch": 0.6800874918988983, "grad_norm": 0.029075436294078827, "learning_rate": 0.00018444124398037717, "loss": 0.3485, "step": 8395 }, { "epoch": 0.6801685029163966, "grad_norm": 0.02978142723441124, "learning_rate": 0.00018443674332778253, "loss": 0.2908, "step": 8396 }, { "epoch": 0.680249513933895, "grad_norm": 0.034966953098773956, "learning_rate": 0.0001844322426751879, "loss": 0.3829, "step": 8397 }, { "epoch": 0.6803305249513933, "grad_norm": 0.03151193633675575, "learning_rate": 0.00018442774202259327, "loss": 0.3413, "step": 8398 }, { "epoch": 0.6804115359688918, "grad_norm": 0.0359109602868557, "learning_rate": 0.00018442324136999866, "loss": 0.3949, "step": 8399 }, { "epoch": 0.6804925469863902, "grad_norm": 0.03561898693442345, "learning_rate": 0.00018441874071740405, "loss": 0.3485, "step": 8400 }, { "epoch": 0.6805735580038885, "grad_norm": 0.027923088520765305, "learning_rate": 0.0001844142400648094, "loss": 0.3571, "step": 8401 }, { "epoch": 0.6806545690213869, "grad_norm": 0.03303304314613342, "learning_rate": 0.00018440973941221477, "loss": 0.3832, "step": 8402 }, { "epoch": 0.6807355800388852, "grad_norm": 0.028605502098798752, "learning_rate": 0.00018440523875962015, "loss": 0.3472, "step": 8403 }, { "epoch": 0.6808165910563837, "grad_norm": 0.03851553052663803, "learning_rate": 0.00018440073810702551, "loss": 0.3674, "step": 8404 }, { "epoch": 0.6808976020738821, "grad_norm": 0.03283935785293579, "learning_rate": 0.0001843962374544309, "loss": 0.3333, "step": 8405 }, { "epoch": 0.6809786130913804, "grad_norm": 0.03001883625984192, "learning_rate": 0.0001843917368018363, "loss": 0.3492, "step": 8406 }, { "epoch": 0.6810596241088788, "grad_norm": 0.03826170414686203, "learning_rate": 0.00018438723614924165, "loss": 0.3458, "step": 8407 }, { "epoch": 0.6811406351263772, "grad_norm": 0.033792685717344284, "learning_rate": 0.000184382735496647, "loss": 0.3444, "step": 8408 }, { "epoch": 0.6812216461438756, "grad_norm": 0.032256945967674255, "learning_rate": 0.0001843782348440524, "loss": 0.3727, "step": 8409 }, { "epoch": 0.681302657161374, "grad_norm": 0.032892536371946335, "learning_rate": 0.00018437373419145778, "loss": 0.3385, "step": 8410 }, { "epoch": 0.6813836681788723, "grad_norm": 0.03750142082571983, "learning_rate": 0.00018436923353886314, "loss": 0.373, "step": 8411 }, { "epoch": 0.6814646791963707, "grad_norm": 0.03716817870736122, "learning_rate": 0.00018436473288626853, "loss": 0.3735, "step": 8412 }, { "epoch": 0.6815456902138691, "grad_norm": 0.02930922619998455, "learning_rate": 0.0001843602322336739, "loss": 0.3086, "step": 8413 }, { "epoch": 0.6816267012313675, "grad_norm": 0.03268182650208473, "learning_rate": 0.00018435573158107925, "loss": 0.349, "step": 8414 }, { "epoch": 0.6817077122488658, "grad_norm": 0.03252032399177551, "learning_rate": 0.00018435123092848464, "loss": 0.3054, "step": 8415 }, { "epoch": 0.6817887232663642, "grad_norm": 0.03493228182196617, "learning_rate": 0.00018434673027589003, "loss": 0.3688, "step": 8416 }, { "epoch": 0.6818697342838627, "grad_norm": 0.033659689128398895, "learning_rate": 0.00018434222962329539, "loss": 0.3069, "step": 8417 }, { "epoch": 0.681950745301361, "grad_norm": 0.0300731398165226, "learning_rate": 0.00018433772897070077, "loss": 0.2972, "step": 8418 }, { "epoch": 0.6820317563188594, "grad_norm": 0.035994041711091995, "learning_rate": 0.00018433322831810613, "loss": 0.324, "step": 8419 }, { "epoch": 0.6821127673363577, "grad_norm": 0.03320501744747162, "learning_rate": 0.0001843287276655115, "loss": 0.3607, "step": 8420 }, { "epoch": 0.6821937783538561, "grad_norm": 0.02966352365911007, "learning_rate": 0.00018432422701291688, "loss": 0.3363, "step": 8421 }, { "epoch": 0.6822747893713546, "grad_norm": 0.034290432929992676, "learning_rate": 0.00018431972636032227, "loss": 0.351, "step": 8422 }, { "epoch": 0.6823558003888529, "grad_norm": 0.031076081097126007, "learning_rate": 0.00018431522570772763, "loss": 0.3386, "step": 8423 }, { "epoch": 0.6824368114063513, "grad_norm": 0.04111815616488457, "learning_rate": 0.00018431072505513301, "loss": 0.3651, "step": 8424 }, { "epoch": 0.6825178224238496, "grad_norm": 0.0348864309489727, "learning_rate": 0.00018430622440253837, "loss": 0.4214, "step": 8425 }, { "epoch": 0.682598833441348, "grad_norm": 0.030616192147135735, "learning_rate": 0.00018430172374994373, "loss": 0.3693, "step": 8426 }, { "epoch": 0.6826798444588464, "grad_norm": 0.030621033161878586, "learning_rate": 0.00018429722309734912, "loss": 0.3228, "step": 8427 }, { "epoch": 0.6827608554763448, "grad_norm": 0.032740604132413864, "learning_rate": 0.0001842927224447545, "loss": 0.3253, "step": 8428 }, { "epoch": 0.6828418664938432, "grad_norm": 0.032946109771728516, "learning_rate": 0.00018428822179215987, "loss": 0.3695, "step": 8429 }, { "epoch": 0.6829228775113415, "grad_norm": 0.03482314199209213, "learning_rate": 0.00018428372113956526, "loss": 0.3573, "step": 8430 }, { "epoch": 0.68300388852884, "grad_norm": 0.03567078337073326, "learning_rate": 0.00018427922048697062, "loss": 0.3514, "step": 8431 }, { "epoch": 0.6830848995463383, "grad_norm": 0.040290411561727524, "learning_rate": 0.00018427471983437598, "loss": 0.3861, "step": 8432 }, { "epoch": 0.6831659105638367, "grad_norm": 0.039115242660045624, "learning_rate": 0.00018427021918178136, "loss": 0.3529, "step": 8433 }, { "epoch": 0.683246921581335, "grad_norm": 0.03234872967004776, "learning_rate": 0.00018426571852918675, "loss": 0.3497, "step": 8434 }, { "epoch": 0.6833279325988334, "grad_norm": 0.029645444825291634, "learning_rate": 0.0001842612178765921, "loss": 0.3215, "step": 8435 }, { "epoch": 0.6834089436163319, "grad_norm": 0.03823678195476532, "learning_rate": 0.0001842567172239975, "loss": 0.3486, "step": 8436 }, { "epoch": 0.6834899546338302, "grad_norm": 0.033858608454465866, "learning_rate": 0.00018425221657140286, "loss": 0.3571, "step": 8437 }, { "epoch": 0.6835709656513286, "grad_norm": 0.029137929901480675, "learning_rate": 0.00018424771591880822, "loss": 0.3132, "step": 8438 }, { "epoch": 0.6836519766688269, "grad_norm": 0.030431587249040604, "learning_rate": 0.00018424321526621363, "loss": 0.3677, "step": 8439 }, { "epoch": 0.6837329876863253, "grad_norm": 0.03393100947141647, "learning_rate": 0.000184238714613619, "loss": 0.3538, "step": 8440 }, { "epoch": 0.6838139987038238, "grad_norm": 0.033213693648576736, "learning_rate": 0.00018423421396102435, "loss": 0.3299, "step": 8441 }, { "epoch": 0.6838950097213221, "grad_norm": 0.03575395420193672, "learning_rate": 0.00018422971330842974, "loss": 0.3084, "step": 8442 }, { "epoch": 0.6839760207388205, "grad_norm": 0.033678457140922546, "learning_rate": 0.0001842252126558351, "loss": 0.3386, "step": 8443 }, { "epoch": 0.6840570317563188, "grad_norm": 0.030759470537304878, "learning_rate": 0.00018422071200324046, "loss": 0.3371, "step": 8444 }, { "epoch": 0.6841380427738173, "grad_norm": 0.033590931445360184, "learning_rate": 0.00018421621135064587, "loss": 0.3781, "step": 8445 }, { "epoch": 0.6842190537913156, "grad_norm": 0.03153382986783981, "learning_rate": 0.00018421171069805123, "loss": 0.3207, "step": 8446 }, { "epoch": 0.684300064808814, "grad_norm": 0.03318261355161667, "learning_rate": 0.0001842072100454566, "loss": 0.3428, "step": 8447 }, { "epoch": 0.6843810758263124, "grad_norm": 0.03142682835459709, "learning_rate": 0.00018420270939286198, "loss": 0.3678, "step": 8448 }, { "epoch": 0.6844620868438107, "grad_norm": 0.03981975093483925, "learning_rate": 0.00018419820874026734, "loss": 0.3752, "step": 8449 }, { "epoch": 0.6845430978613092, "grad_norm": 0.027852242812514305, "learning_rate": 0.0001841937080876727, "loss": 0.3113, "step": 8450 }, { "epoch": 0.6846241088788075, "grad_norm": 0.03192290663719177, "learning_rate": 0.00018418920743507812, "loss": 0.3583, "step": 8451 }, { "epoch": 0.6847051198963059, "grad_norm": 0.03634326532483101, "learning_rate": 0.00018418470678248348, "loss": 0.3149, "step": 8452 }, { "epoch": 0.6847861309138042, "grad_norm": 0.03109004907310009, "learning_rate": 0.00018418020612988884, "loss": 0.3133, "step": 8453 }, { "epoch": 0.6848671419313026, "grad_norm": 0.03400532156229019, "learning_rate": 0.00018417570547729422, "loss": 0.3254, "step": 8454 }, { "epoch": 0.6849481529488011, "grad_norm": 0.029681047424674034, "learning_rate": 0.00018417120482469958, "loss": 0.3686, "step": 8455 }, { "epoch": 0.6850291639662994, "grad_norm": 0.029582427814602852, "learning_rate": 0.00018416670417210494, "loss": 0.308, "step": 8456 }, { "epoch": 0.6851101749837978, "grad_norm": 0.030207743868231773, "learning_rate": 0.00018416220351951036, "loss": 0.3149, "step": 8457 }, { "epoch": 0.6851911860012961, "grad_norm": 0.02820783481001854, "learning_rate": 0.00018415770286691572, "loss": 0.2731, "step": 8458 }, { "epoch": 0.6852721970187946, "grad_norm": 0.030728664249181747, "learning_rate": 0.00018415320221432108, "loss": 0.3038, "step": 8459 }, { "epoch": 0.685353208036293, "grad_norm": 0.03349175676703453, "learning_rate": 0.00018414870156172646, "loss": 0.39, "step": 8460 }, { "epoch": 0.6854342190537913, "grad_norm": 0.03066192753612995, "learning_rate": 0.00018414420090913182, "loss": 0.3464, "step": 8461 }, { "epoch": 0.6855152300712897, "grad_norm": 0.030717190355062485, "learning_rate": 0.0001841397002565372, "loss": 0.3424, "step": 8462 }, { "epoch": 0.685596241088788, "grad_norm": 0.028941521421074867, "learning_rate": 0.0001841351996039426, "loss": 0.3517, "step": 8463 }, { "epoch": 0.6856772521062865, "grad_norm": 0.031130949035286903, "learning_rate": 0.00018413069895134796, "loss": 0.3115, "step": 8464 }, { "epoch": 0.6857582631237849, "grad_norm": 0.029154805466532707, "learning_rate": 0.00018412619829875332, "loss": 0.3008, "step": 8465 }, { "epoch": 0.6858392741412832, "grad_norm": 0.03485098108649254, "learning_rate": 0.0001841216976461587, "loss": 0.3451, "step": 8466 }, { "epoch": 0.6859202851587816, "grad_norm": 0.030184783041477203, "learning_rate": 0.00018411719699356407, "loss": 0.3554, "step": 8467 }, { "epoch": 0.68600129617628, "grad_norm": 0.02812212146818638, "learning_rate": 0.00018411269634096945, "loss": 0.33, "step": 8468 }, { "epoch": 0.6860823071937784, "grad_norm": 0.0370754636824131, "learning_rate": 0.00018410819568837484, "loss": 0.3428, "step": 8469 }, { "epoch": 0.6861633182112767, "grad_norm": 0.032499175518751144, "learning_rate": 0.0001841036950357802, "loss": 0.3263, "step": 8470 }, { "epoch": 0.6862443292287751, "grad_norm": 0.028472188860177994, "learning_rate": 0.00018409919438318556, "loss": 0.3552, "step": 8471 }, { "epoch": 0.6863253402462735, "grad_norm": 0.0350298248231411, "learning_rate": 0.00018409469373059095, "loss": 0.3764, "step": 8472 }, { "epoch": 0.6864063512637719, "grad_norm": 0.0346914604306221, "learning_rate": 0.0001840901930779963, "loss": 0.3325, "step": 8473 }, { "epoch": 0.6864873622812703, "grad_norm": 0.031484149396419525, "learning_rate": 0.0001840856924254017, "loss": 0.3588, "step": 8474 }, { "epoch": 0.6865683732987686, "grad_norm": 0.03626801818609238, "learning_rate": 0.00018408119177280708, "loss": 0.3355, "step": 8475 }, { "epoch": 0.686649384316267, "grad_norm": 0.0336102731525898, "learning_rate": 0.00018407669112021244, "loss": 0.408, "step": 8476 }, { "epoch": 0.6867303953337653, "grad_norm": 0.0309014730155468, "learning_rate": 0.0001840721904676178, "loss": 0.3403, "step": 8477 }, { "epoch": 0.6868114063512638, "grad_norm": 0.035775888711214066, "learning_rate": 0.0001840676898150232, "loss": 0.3254, "step": 8478 }, { "epoch": 0.6868924173687622, "grad_norm": 0.03134749084711075, "learning_rate": 0.00018406318916242855, "loss": 0.3703, "step": 8479 }, { "epoch": 0.6869734283862605, "grad_norm": 0.03138517588376999, "learning_rate": 0.00018405868850983394, "loss": 0.3614, "step": 8480 }, { "epoch": 0.6870544394037589, "grad_norm": 0.030462482944130898, "learning_rate": 0.00018405418785723932, "loss": 0.2867, "step": 8481 }, { "epoch": 0.6871354504212573, "grad_norm": 0.029852738603949547, "learning_rate": 0.00018404968720464468, "loss": 0.312, "step": 8482 }, { "epoch": 0.6872164614387557, "grad_norm": 0.033276185393333435, "learning_rate": 0.00018404518655205004, "loss": 0.3144, "step": 8483 }, { "epoch": 0.687297472456254, "grad_norm": 0.03628126531839371, "learning_rate": 0.00018404068589945543, "loss": 0.3954, "step": 8484 }, { "epoch": 0.6873784834737524, "grad_norm": 0.03308907151222229, "learning_rate": 0.0001840361852468608, "loss": 0.3578, "step": 8485 }, { "epoch": 0.6874594944912508, "grad_norm": 0.029518576338887215, "learning_rate": 0.00018403168459426618, "loss": 0.3736, "step": 8486 }, { "epoch": 0.6875405055087492, "grad_norm": 0.031972698867321014, "learning_rate": 0.00018402718394167156, "loss": 0.3777, "step": 8487 }, { "epoch": 0.6876215165262476, "grad_norm": 0.036778468638658524, "learning_rate": 0.00018402268328907692, "loss": 0.409, "step": 8488 }, { "epoch": 0.687702527543746, "grad_norm": 0.033233892172575, "learning_rate": 0.00018401818263648228, "loss": 0.4315, "step": 8489 }, { "epoch": 0.6877835385612443, "grad_norm": 0.033360805362463, "learning_rate": 0.00018401368198388767, "loss": 0.3999, "step": 8490 }, { "epoch": 0.6878645495787427, "grad_norm": 0.035713110119104385, "learning_rate": 0.00018400918133129306, "loss": 0.3866, "step": 8491 }, { "epoch": 0.6879455605962411, "grad_norm": 0.029528697952628136, "learning_rate": 0.00018400468067869842, "loss": 0.2948, "step": 8492 }, { "epoch": 0.6880265716137395, "grad_norm": 0.03073948062956333, "learning_rate": 0.0001840001800261038, "loss": 0.4063, "step": 8493 }, { "epoch": 0.6881075826312378, "grad_norm": 0.033134352415800095, "learning_rate": 0.00018399567937350917, "loss": 0.3205, "step": 8494 }, { "epoch": 0.6881885936487362, "grad_norm": 0.03518693521618843, "learning_rate": 0.00018399117872091453, "loss": 0.4068, "step": 8495 }, { "epoch": 0.6882696046662347, "grad_norm": 0.030890299007296562, "learning_rate": 0.00018398667806831991, "loss": 0.3325, "step": 8496 }, { "epoch": 0.688350615683733, "grad_norm": 0.03359353542327881, "learning_rate": 0.0001839821774157253, "loss": 0.3043, "step": 8497 }, { "epoch": 0.6884316267012314, "grad_norm": 0.028195347636938095, "learning_rate": 0.00018397767676313066, "loss": 0.2898, "step": 8498 }, { "epoch": 0.6885126377187297, "grad_norm": 0.031246118247509003, "learning_rate": 0.00018397317611053605, "loss": 0.373, "step": 8499 }, { "epoch": 0.6885936487362281, "grad_norm": 0.035277605056762695, "learning_rate": 0.0001839686754579414, "loss": 0.329, "step": 8500 }, { "epoch": 0.6886746597537265, "grad_norm": 0.030519159510731697, "learning_rate": 0.00018396417480534677, "loss": 0.3366, "step": 8501 }, { "epoch": 0.6887556707712249, "grad_norm": 0.03541049733757973, "learning_rate": 0.00018395967415275216, "loss": 0.3487, "step": 8502 }, { "epoch": 0.6888366817887233, "grad_norm": 0.03722112998366356, "learning_rate": 0.00018395517350015754, "loss": 0.358, "step": 8503 }, { "epoch": 0.6889176928062216, "grad_norm": 0.028488805517554283, "learning_rate": 0.0001839506728475629, "loss": 0.289, "step": 8504 }, { "epoch": 0.68899870382372, "grad_norm": 0.030860982835292816, "learning_rate": 0.0001839461721949683, "loss": 0.353, "step": 8505 }, { "epoch": 0.6890797148412184, "grad_norm": 0.0341181643307209, "learning_rate": 0.00018394167154237365, "loss": 0.3434, "step": 8506 }, { "epoch": 0.6891607258587168, "grad_norm": 0.030584245920181274, "learning_rate": 0.000183937170889779, "loss": 0.3334, "step": 8507 }, { "epoch": 0.6892417368762151, "grad_norm": 0.027770398184657097, "learning_rate": 0.0001839326702371844, "loss": 0.3031, "step": 8508 }, { "epoch": 0.6893227478937135, "grad_norm": 0.030859703198075294, "learning_rate": 0.00018392816958458978, "loss": 0.3319, "step": 8509 }, { "epoch": 0.689403758911212, "grad_norm": 0.03638660907745361, "learning_rate": 0.00018392366893199514, "loss": 0.4019, "step": 8510 }, { "epoch": 0.6894847699287103, "grad_norm": 0.028055081143975258, "learning_rate": 0.00018391916827940053, "loss": 0.3395, "step": 8511 }, { "epoch": 0.6895657809462087, "grad_norm": 0.031972650438547134, "learning_rate": 0.0001839146676268059, "loss": 0.3476, "step": 8512 }, { "epoch": 0.689646791963707, "grad_norm": 0.03158603608608246, "learning_rate": 0.00018391016697421125, "loss": 0.3258, "step": 8513 }, { "epoch": 0.6897278029812054, "grad_norm": 0.03295688331127167, "learning_rate": 0.00018390566632161667, "loss": 0.3516, "step": 8514 }, { "epoch": 0.6898088139987039, "grad_norm": 0.03116844780743122, "learning_rate": 0.00018390116566902203, "loss": 0.3671, "step": 8515 }, { "epoch": 0.6898898250162022, "grad_norm": 0.02899783104658127, "learning_rate": 0.00018389666501642739, "loss": 0.3082, "step": 8516 }, { "epoch": 0.6899708360337006, "grad_norm": 0.030172912403941154, "learning_rate": 0.00018389216436383277, "loss": 0.3322, "step": 8517 }, { "epoch": 0.6900518470511989, "grad_norm": 0.030508432537317276, "learning_rate": 0.00018388766371123813, "loss": 0.3723, "step": 8518 }, { "epoch": 0.6901328580686974, "grad_norm": 0.032022520899772644, "learning_rate": 0.0001838831630586435, "loss": 0.3673, "step": 8519 }, { "epoch": 0.6902138690861958, "grad_norm": 0.0314970389008522, "learning_rate": 0.0001838786624060489, "loss": 0.3344, "step": 8520 }, { "epoch": 0.6902948801036941, "grad_norm": 0.027550017461180687, "learning_rate": 0.00018387416175345427, "loss": 0.2939, "step": 8521 }, { "epoch": 0.6903758911211925, "grad_norm": 0.035641346126794815, "learning_rate": 0.00018386966110085963, "loss": 0.3855, "step": 8522 }, { "epoch": 0.6904569021386908, "grad_norm": 0.03057931363582611, "learning_rate": 0.00018386516044826501, "loss": 0.3432, "step": 8523 }, { "epoch": 0.6905379131561893, "grad_norm": 0.03537129983305931, "learning_rate": 0.00018386065979567037, "loss": 0.3375, "step": 8524 }, { "epoch": 0.6906189241736876, "grad_norm": 0.03352898359298706, "learning_rate": 0.00018385615914307573, "loss": 0.3071, "step": 8525 }, { "epoch": 0.690699935191186, "grad_norm": 0.028735622763633728, "learning_rate": 0.00018385165849048115, "loss": 0.3063, "step": 8526 }, { "epoch": 0.6907809462086844, "grad_norm": 0.027944544330239296, "learning_rate": 0.0001838471578378865, "loss": 0.3412, "step": 8527 }, { "epoch": 0.6908619572261827, "grad_norm": 0.026370206847786903, "learning_rate": 0.00018384265718529187, "loss": 0.3208, "step": 8528 }, { "epoch": 0.6909429682436812, "grad_norm": 0.035538118332624435, "learning_rate": 0.00018383815653269726, "loss": 0.3605, "step": 8529 }, { "epoch": 0.6910239792611795, "grad_norm": 0.030197838321328163, "learning_rate": 0.00018383365588010262, "loss": 0.36, "step": 8530 }, { "epoch": 0.6911049902786779, "grad_norm": 0.026188869029283524, "learning_rate": 0.00018382915522750798, "loss": 0.297, "step": 8531 }, { "epoch": 0.6911860012961762, "grad_norm": 0.032884180545806885, "learning_rate": 0.0001838246545749134, "loss": 0.3684, "step": 8532 }, { "epoch": 0.6912670123136747, "grad_norm": 0.027330821380019188, "learning_rate": 0.00018382015392231875, "loss": 0.2778, "step": 8533 }, { "epoch": 0.6913480233311731, "grad_norm": 0.03134836256504059, "learning_rate": 0.0001838156532697241, "loss": 0.3519, "step": 8534 }, { "epoch": 0.6914290343486714, "grad_norm": 0.03451668098568916, "learning_rate": 0.0001838111526171295, "loss": 0.408, "step": 8535 }, { "epoch": 0.6915100453661698, "grad_norm": 0.037174392491579056, "learning_rate": 0.00018380665196453486, "loss": 0.3307, "step": 8536 }, { "epoch": 0.6915910563836681, "grad_norm": 0.030446277931332588, "learning_rate": 0.00018380215131194022, "loss": 0.3321, "step": 8537 }, { "epoch": 0.6916720674011666, "grad_norm": 0.031310826539993286, "learning_rate": 0.00018379765065934563, "loss": 0.3356, "step": 8538 }, { "epoch": 0.691753078418665, "grad_norm": 0.036476824432611465, "learning_rate": 0.000183793150006751, "loss": 0.3875, "step": 8539 }, { "epoch": 0.6918340894361633, "grad_norm": 0.027982639148831367, "learning_rate": 0.00018378864935415635, "loss": 0.3096, "step": 8540 }, { "epoch": 0.6919151004536617, "grad_norm": 0.02935284748673439, "learning_rate": 0.00018378414870156174, "loss": 0.3416, "step": 8541 }, { "epoch": 0.69199611147116, "grad_norm": 0.03340727463364601, "learning_rate": 0.0001837796480489671, "loss": 0.3885, "step": 8542 }, { "epoch": 0.6920771224886585, "grad_norm": 0.030995968729257584, "learning_rate": 0.0001837751473963725, "loss": 0.3612, "step": 8543 }, { "epoch": 0.6921581335061568, "grad_norm": 0.030012935400009155, "learning_rate": 0.00018377064674377787, "loss": 0.3599, "step": 8544 }, { "epoch": 0.6922391445236552, "grad_norm": 0.033467911183834076, "learning_rate": 0.00018376614609118323, "loss": 0.3333, "step": 8545 }, { "epoch": 0.6923201555411536, "grad_norm": 0.03058590553700924, "learning_rate": 0.0001837616454385886, "loss": 0.3182, "step": 8546 }, { "epoch": 0.692401166558652, "grad_norm": 0.028448492288589478, "learning_rate": 0.00018375714478599398, "loss": 0.3114, "step": 8547 }, { "epoch": 0.6924821775761504, "grad_norm": 0.04189586266875267, "learning_rate": 0.00018375264413339934, "loss": 0.378, "step": 8548 }, { "epoch": 0.6925631885936487, "grad_norm": 0.032369352877140045, "learning_rate": 0.00018374814348080473, "loss": 0.3884, "step": 8549 }, { "epoch": 0.6926441996111471, "grad_norm": 0.02977856807410717, "learning_rate": 0.00018374364282821012, "loss": 0.3471, "step": 8550 }, { "epoch": 0.6927252106286454, "grad_norm": 0.0340099073946476, "learning_rate": 0.00018373914217561548, "loss": 0.3973, "step": 8551 }, { "epoch": 0.6928062216461439, "grad_norm": 0.026700271293520927, "learning_rate": 0.00018373464152302084, "loss": 0.3254, "step": 8552 }, { "epoch": 0.6928872326636423, "grad_norm": 0.03509001433849335, "learning_rate": 0.00018373014087042622, "loss": 0.3497, "step": 8553 }, { "epoch": 0.6929682436811406, "grad_norm": 0.03207175433635712, "learning_rate": 0.00018372564021783158, "loss": 0.3451, "step": 8554 }, { "epoch": 0.693049254698639, "grad_norm": 0.032138824462890625, "learning_rate": 0.00018372113956523697, "loss": 0.3531, "step": 8555 }, { "epoch": 0.6931302657161373, "grad_norm": 0.033208414912223816, "learning_rate": 0.00018371663891264236, "loss": 0.348, "step": 8556 }, { "epoch": 0.6932112767336358, "grad_norm": 0.03227568790316582, "learning_rate": 0.00018371213826004772, "loss": 0.3497, "step": 8557 }, { "epoch": 0.6932922877511342, "grad_norm": 0.03891857713460922, "learning_rate": 0.00018370763760745308, "loss": 0.3617, "step": 8558 }, { "epoch": 0.6933732987686325, "grad_norm": 0.031320828944444656, "learning_rate": 0.00018370313695485846, "loss": 0.3262, "step": 8559 }, { "epoch": 0.6934543097861309, "grad_norm": 0.03595731779932976, "learning_rate": 0.00018369863630226382, "loss": 0.3573, "step": 8560 }, { "epoch": 0.6935353208036293, "grad_norm": 0.0347558967769146, "learning_rate": 0.0001836941356496692, "loss": 0.3578, "step": 8561 }, { "epoch": 0.6936163318211277, "grad_norm": 0.02745945379137993, "learning_rate": 0.0001836896349970746, "loss": 0.3237, "step": 8562 }, { "epoch": 0.693697342838626, "grad_norm": 0.031957414001226425, "learning_rate": 0.00018368513434447996, "loss": 0.3488, "step": 8563 }, { "epoch": 0.6937783538561244, "grad_norm": 0.03347107395529747, "learning_rate": 0.00018368063369188532, "loss": 0.4016, "step": 8564 }, { "epoch": 0.6938593648736228, "grad_norm": 0.03430968523025513, "learning_rate": 0.0001836761330392907, "loss": 0.3403, "step": 8565 }, { "epoch": 0.6939403758911212, "grad_norm": 0.03299398720264435, "learning_rate": 0.00018367163238669607, "loss": 0.3568, "step": 8566 }, { "epoch": 0.6940213869086196, "grad_norm": 0.03152777627110481, "learning_rate": 0.00018366713173410145, "loss": 0.3231, "step": 8567 }, { "epoch": 0.6941023979261179, "grad_norm": 0.0307911466807127, "learning_rate": 0.00018366263108150684, "loss": 0.3596, "step": 8568 }, { "epoch": 0.6941834089436163, "grad_norm": 0.029098432511091232, "learning_rate": 0.0001836581304289122, "loss": 0.3651, "step": 8569 }, { "epoch": 0.6942644199611148, "grad_norm": 0.02972140535712242, "learning_rate": 0.00018365362977631756, "loss": 0.3355, "step": 8570 }, { "epoch": 0.6943454309786131, "grad_norm": 0.027684060856699944, "learning_rate": 0.00018364912912372295, "loss": 0.3261, "step": 8571 }, { "epoch": 0.6944264419961115, "grad_norm": 0.034204453229904175, "learning_rate": 0.00018364462847112833, "loss": 0.4012, "step": 8572 }, { "epoch": 0.6945074530136098, "grad_norm": 0.03092847764492035, "learning_rate": 0.0001836401278185337, "loss": 0.3891, "step": 8573 }, { "epoch": 0.6945884640311082, "grad_norm": 0.0319262258708477, "learning_rate": 0.00018363562716593908, "loss": 0.3884, "step": 8574 }, { "epoch": 0.6946694750486067, "grad_norm": 0.030752461403608322, "learning_rate": 0.00018363112651334444, "loss": 0.3154, "step": 8575 }, { "epoch": 0.694750486066105, "grad_norm": 0.03386900946497917, "learning_rate": 0.0001836266258607498, "loss": 0.3706, "step": 8576 }, { "epoch": 0.6948314970836034, "grad_norm": 0.034681811928749084, "learning_rate": 0.0001836221252081552, "loss": 0.3592, "step": 8577 }, { "epoch": 0.6949125081011017, "grad_norm": 0.027019526809453964, "learning_rate": 0.00018361762455556058, "loss": 0.3067, "step": 8578 }, { "epoch": 0.6949935191186001, "grad_norm": 0.031102588400244713, "learning_rate": 0.00018361312390296594, "loss": 0.3359, "step": 8579 }, { "epoch": 0.6950745301360985, "grad_norm": 0.030616452917456627, "learning_rate": 0.00018360862325037132, "loss": 0.3852, "step": 8580 }, { "epoch": 0.6951555411535969, "grad_norm": 0.044032927602529526, "learning_rate": 0.00018360412259777668, "loss": 0.3905, "step": 8581 }, { "epoch": 0.6952365521710953, "grad_norm": 0.029699688777327538, "learning_rate": 0.00018359962194518204, "loss": 0.3783, "step": 8582 }, { "epoch": 0.6953175631885936, "grad_norm": 0.026341859251260757, "learning_rate": 0.00018359512129258743, "loss": 0.2967, "step": 8583 }, { "epoch": 0.6953985742060921, "grad_norm": 0.035602036863565445, "learning_rate": 0.00018359062063999282, "loss": 0.3264, "step": 8584 }, { "epoch": 0.6954795852235904, "grad_norm": 0.03353574126958847, "learning_rate": 0.00018358611998739818, "loss": 0.3803, "step": 8585 }, { "epoch": 0.6955605962410888, "grad_norm": 0.0328943133354187, "learning_rate": 0.00018358161933480357, "loss": 0.3333, "step": 8586 }, { "epoch": 0.6956416072585871, "grad_norm": 0.032131899148225784, "learning_rate": 0.00018357711868220893, "loss": 0.3782, "step": 8587 }, { "epoch": 0.6957226182760855, "grad_norm": 0.02701735869050026, "learning_rate": 0.00018357261802961429, "loss": 0.3116, "step": 8588 }, { "epoch": 0.695803629293584, "grad_norm": 0.03216202184557915, "learning_rate": 0.00018356811737701967, "loss": 0.3545, "step": 8589 }, { "epoch": 0.6958846403110823, "grad_norm": 0.028192881494760513, "learning_rate": 0.00018356361672442506, "loss": 0.306, "step": 8590 }, { "epoch": 0.6959656513285807, "grad_norm": 0.02953306771814823, "learning_rate": 0.00018355911607183042, "loss": 0.3099, "step": 8591 }, { "epoch": 0.696046662346079, "grad_norm": 0.03247429430484772, "learning_rate": 0.0001835546154192358, "loss": 0.3325, "step": 8592 }, { "epoch": 0.6961276733635774, "grad_norm": 0.032983459532260895, "learning_rate": 0.00018355011476664117, "loss": 0.3769, "step": 8593 }, { "epoch": 0.6962086843810759, "grad_norm": 0.03049176186323166, "learning_rate": 0.00018354561411404653, "loss": 0.2843, "step": 8594 }, { "epoch": 0.6962896953985742, "grad_norm": 0.025875264778733253, "learning_rate": 0.00018354111346145194, "loss": 0.3501, "step": 8595 }, { "epoch": 0.6963707064160726, "grad_norm": 0.030669327825307846, "learning_rate": 0.0001835366128088573, "loss": 0.3537, "step": 8596 }, { "epoch": 0.6964517174335709, "grad_norm": 0.028857417404651642, "learning_rate": 0.00018353211215626266, "loss": 0.2993, "step": 8597 }, { "epoch": 0.6965327284510694, "grad_norm": 0.030767953023314476, "learning_rate": 0.00018352761150366805, "loss": 0.3266, "step": 8598 }, { "epoch": 0.6966137394685677, "grad_norm": 0.035894282162189484, "learning_rate": 0.0001835231108510734, "loss": 0.3871, "step": 8599 }, { "epoch": 0.6966947504860661, "grad_norm": 0.03249775990843773, "learning_rate": 0.00018351861019847877, "loss": 0.345, "step": 8600 }, { "epoch": 0.6967757615035645, "grad_norm": 0.03632766380906105, "learning_rate": 0.00018351410954588418, "loss": 0.405, "step": 8601 }, { "epoch": 0.6968567725210628, "grad_norm": 0.029767895117402077, "learning_rate": 0.00018350960889328954, "loss": 0.3026, "step": 8602 }, { "epoch": 0.6969377835385613, "grad_norm": 0.03096391074359417, "learning_rate": 0.0001835051082406949, "loss": 0.2842, "step": 8603 }, { "epoch": 0.6970187945560596, "grad_norm": 0.03562683239579201, "learning_rate": 0.0001835006075881003, "loss": 0.3859, "step": 8604 }, { "epoch": 0.697099805573558, "grad_norm": 0.030624883249402046, "learning_rate": 0.00018349610693550565, "loss": 0.3152, "step": 8605 }, { "epoch": 0.6971808165910564, "grad_norm": 0.035233303904533386, "learning_rate": 0.000183491606282911, "loss": 0.3304, "step": 8606 }, { "epoch": 0.6972618276085548, "grad_norm": 0.031249161809682846, "learning_rate": 0.00018348710563031642, "loss": 0.3441, "step": 8607 }, { "epoch": 0.6973428386260532, "grad_norm": 0.0314306803047657, "learning_rate": 0.00018348260497772178, "loss": 0.3646, "step": 8608 }, { "epoch": 0.6974238496435515, "grad_norm": 0.03200050815939903, "learning_rate": 0.00018347810432512714, "loss": 0.3381, "step": 8609 }, { "epoch": 0.6975048606610499, "grad_norm": 0.04529730975627899, "learning_rate": 0.00018347360367253253, "loss": 0.3428, "step": 8610 }, { "epoch": 0.6975858716785482, "grad_norm": 0.029167592525482178, "learning_rate": 0.0001834691030199379, "loss": 0.2873, "step": 8611 }, { "epoch": 0.6976668826960467, "grad_norm": 0.03315237909555435, "learning_rate": 0.00018346460236734325, "loss": 0.3004, "step": 8612 }, { "epoch": 0.6977478937135451, "grad_norm": 0.033364132046699524, "learning_rate": 0.00018346010171474867, "loss": 0.3412, "step": 8613 }, { "epoch": 0.6978289047310434, "grad_norm": 0.03286145254969597, "learning_rate": 0.00018345560106215403, "loss": 0.3819, "step": 8614 }, { "epoch": 0.6979099157485418, "grad_norm": 0.030075784772634506, "learning_rate": 0.00018345110040955939, "loss": 0.3545, "step": 8615 }, { "epoch": 0.6979909267660401, "grad_norm": 0.0326153002679348, "learning_rate": 0.00018344659975696477, "loss": 0.2943, "step": 8616 }, { "epoch": 0.6980719377835386, "grad_norm": 0.031880997121334076, "learning_rate": 0.00018344209910437013, "loss": 0.3736, "step": 8617 }, { "epoch": 0.698152948801037, "grad_norm": 0.034937191754579544, "learning_rate": 0.0001834375984517755, "loss": 0.3137, "step": 8618 }, { "epoch": 0.6982339598185353, "grad_norm": 0.03716542199254036, "learning_rate": 0.0001834330977991809, "loss": 0.3653, "step": 8619 }, { "epoch": 0.6983149708360337, "grad_norm": 0.03413591533899307, "learning_rate": 0.00018342859714658627, "loss": 0.3028, "step": 8620 }, { "epoch": 0.6983959818535321, "grad_norm": 0.030354049056768417, "learning_rate": 0.00018342409649399163, "loss": 0.3092, "step": 8621 }, { "epoch": 0.6984769928710305, "grad_norm": 0.026400646194815636, "learning_rate": 0.00018341959584139701, "loss": 0.3403, "step": 8622 }, { "epoch": 0.6985580038885288, "grad_norm": 0.03018624521791935, "learning_rate": 0.00018341509518880237, "loss": 0.3382, "step": 8623 }, { "epoch": 0.6986390149060272, "grad_norm": 0.028660321608185768, "learning_rate": 0.00018341059453620776, "loss": 0.3172, "step": 8624 }, { "epoch": 0.6987200259235256, "grad_norm": 0.03087625838816166, "learning_rate": 0.00018340609388361315, "loss": 0.3266, "step": 8625 }, { "epoch": 0.698801036941024, "grad_norm": 0.030530283227562904, "learning_rate": 0.0001834015932310185, "loss": 0.3811, "step": 8626 }, { "epoch": 0.6988820479585224, "grad_norm": 0.030472485348582268, "learning_rate": 0.00018339709257842387, "loss": 0.3513, "step": 8627 }, { "epoch": 0.6989630589760207, "grad_norm": 0.035994235426187515, "learning_rate": 0.00018339259192582926, "loss": 0.4433, "step": 8628 }, { "epoch": 0.6990440699935191, "grad_norm": 0.031035959720611572, "learning_rate": 0.00018338809127323462, "loss": 0.319, "step": 8629 }, { "epoch": 0.6991250810110174, "grad_norm": 0.03235156834125519, "learning_rate": 0.00018338359062064, "loss": 0.3806, "step": 8630 }, { "epoch": 0.6992060920285159, "grad_norm": 0.027372000738978386, "learning_rate": 0.0001833790899680454, "loss": 0.3305, "step": 8631 }, { "epoch": 0.6992871030460143, "grad_norm": 0.032422930002212524, "learning_rate": 0.00018337458931545075, "loss": 0.3677, "step": 8632 }, { "epoch": 0.6993681140635126, "grad_norm": 0.028228141367435455, "learning_rate": 0.0001833700886628561, "loss": 0.3047, "step": 8633 }, { "epoch": 0.699449125081011, "grad_norm": 0.033268511295318604, "learning_rate": 0.0001833655880102615, "loss": 0.3476, "step": 8634 }, { "epoch": 0.6995301360985094, "grad_norm": 0.03443459793925285, "learning_rate": 0.00018336108735766686, "loss": 0.3696, "step": 8635 }, { "epoch": 0.6996111471160078, "grad_norm": 0.03373868018388748, "learning_rate": 0.00018335658670507225, "loss": 0.3547, "step": 8636 }, { "epoch": 0.6996921581335062, "grad_norm": 0.03315461426973343, "learning_rate": 0.00018335208605247763, "loss": 0.3661, "step": 8637 }, { "epoch": 0.6997731691510045, "grad_norm": 0.034669872373342514, "learning_rate": 0.000183347585399883, "loss": 0.3788, "step": 8638 }, { "epoch": 0.6998541801685029, "grad_norm": 0.03502920642495155, "learning_rate": 0.00018334308474728835, "loss": 0.3149, "step": 8639 }, { "epoch": 0.6999351911860013, "grad_norm": 0.02984611876308918, "learning_rate": 0.00018333858409469374, "loss": 0.3106, "step": 8640 }, { "epoch": 0.7000162022034997, "grad_norm": 0.03439144045114517, "learning_rate": 0.0001833340834420991, "loss": 0.3083, "step": 8641 }, { "epoch": 0.700097213220998, "grad_norm": 0.03035905957221985, "learning_rate": 0.0001833295827895045, "loss": 0.3185, "step": 8642 }, { "epoch": 0.7001782242384964, "grad_norm": 0.02940438501536846, "learning_rate": 0.00018332508213690987, "loss": 0.3162, "step": 8643 }, { "epoch": 0.7002592352559948, "grad_norm": 0.031967442482709885, "learning_rate": 0.00018332058148431523, "loss": 0.3732, "step": 8644 }, { "epoch": 0.7003402462734932, "grad_norm": 0.03425975888967514, "learning_rate": 0.0001833160808317206, "loss": 0.3661, "step": 8645 }, { "epoch": 0.7004212572909916, "grad_norm": 0.03647081181406975, "learning_rate": 0.00018331158017912598, "loss": 0.3241, "step": 8646 }, { "epoch": 0.7005022683084899, "grad_norm": 0.03272273391485214, "learning_rate": 0.00018330707952653137, "loss": 0.3178, "step": 8647 }, { "epoch": 0.7005832793259883, "grad_norm": 0.03189370036125183, "learning_rate": 0.00018330257887393673, "loss": 0.3577, "step": 8648 }, { "epoch": 0.7006642903434868, "grad_norm": 0.03181673586368561, "learning_rate": 0.00018329807822134212, "loss": 0.355, "step": 8649 }, { "epoch": 0.7007453013609851, "grad_norm": 0.034718107432127, "learning_rate": 0.00018329357756874748, "loss": 0.3608, "step": 8650 }, { "epoch": 0.7008263123784835, "grad_norm": 0.027240954339504242, "learning_rate": 0.00018328907691615284, "loss": 0.3099, "step": 8651 }, { "epoch": 0.7009073233959818, "grad_norm": 0.03302300348877907, "learning_rate": 0.00018328457626355822, "loss": 0.3498, "step": 8652 }, { "epoch": 0.7009883344134802, "grad_norm": 0.031916722655296326, "learning_rate": 0.0001832800756109636, "loss": 0.3537, "step": 8653 }, { "epoch": 0.7010693454309787, "grad_norm": 0.03751882165670395, "learning_rate": 0.00018327557495836897, "loss": 0.3693, "step": 8654 }, { "epoch": 0.701150356448477, "grad_norm": 0.03253519907593727, "learning_rate": 0.00018327107430577436, "loss": 0.3914, "step": 8655 }, { "epoch": 0.7012313674659754, "grad_norm": 0.029587239027023315, "learning_rate": 0.00018326657365317972, "loss": 0.3344, "step": 8656 }, { "epoch": 0.7013123784834737, "grad_norm": 0.025954367592930794, "learning_rate": 0.00018326207300058508, "loss": 0.3117, "step": 8657 }, { "epoch": 0.7013933895009722, "grad_norm": 0.030775019899010658, "learning_rate": 0.00018325757234799046, "loss": 0.3515, "step": 8658 }, { "epoch": 0.7014744005184705, "grad_norm": 0.0295817069709301, "learning_rate": 0.00018325307169539585, "loss": 0.3184, "step": 8659 }, { "epoch": 0.7015554115359689, "grad_norm": 0.03209137171506882, "learning_rate": 0.0001832485710428012, "loss": 0.3163, "step": 8660 }, { "epoch": 0.7016364225534673, "grad_norm": 0.03405177965760231, "learning_rate": 0.0001832440703902066, "loss": 0.3536, "step": 8661 }, { "epoch": 0.7017174335709656, "grad_norm": 0.034785494208335876, "learning_rate": 0.00018323956973761196, "loss": 0.3315, "step": 8662 }, { "epoch": 0.7017984445884641, "grad_norm": 0.030162710696458817, "learning_rate": 0.00018323506908501732, "loss": 0.3357, "step": 8663 }, { "epoch": 0.7018794556059624, "grad_norm": 0.03082972951233387, "learning_rate": 0.0001832305684324227, "loss": 0.3536, "step": 8664 }, { "epoch": 0.7019604666234608, "grad_norm": 0.03256779909133911, "learning_rate": 0.0001832260677798281, "loss": 0.3666, "step": 8665 }, { "epoch": 0.7020414776409591, "grad_norm": 0.035849783569574356, "learning_rate": 0.00018322156712723345, "loss": 0.3553, "step": 8666 }, { "epoch": 0.7021224886584575, "grad_norm": 0.030329683795571327, "learning_rate": 0.00018321706647463884, "loss": 0.3572, "step": 8667 }, { "epoch": 0.702203499675956, "grad_norm": 0.03349778801202774, "learning_rate": 0.0001832125658220442, "loss": 0.391, "step": 8668 }, { "epoch": 0.7022845106934543, "grad_norm": 0.029244735836982727, "learning_rate": 0.00018320806516944956, "loss": 0.3374, "step": 8669 }, { "epoch": 0.7023655217109527, "grad_norm": 0.03602052479982376, "learning_rate": 0.00018320356451685495, "loss": 0.3542, "step": 8670 }, { "epoch": 0.702446532728451, "grad_norm": 0.032265808433294296, "learning_rate": 0.00018319906386426033, "loss": 0.3379, "step": 8671 }, { "epoch": 0.7025275437459495, "grad_norm": 0.03190753981471062, "learning_rate": 0.0001831945632116657, "loss": 0.3775, "step": 8672 }, { "epoch": 0.7026085547634479, "grad_norm": 0.03543262183666229, "learning_rate": 0.00018319006255907108, "loss": 0.3369, "step": 8673 }, { "epoch": 0.7026895657809462, "grad_norm": 0.030671080574393272, "learning_rate": 0.00018318556190647644, "loss": 0.3364, "step": 8674 }, { "epoch": 0.7027705767984446, "grad_norm": 0.031106283888220787, "learning_rate": 0.0001831810612538818, "loss": 0.2999, "step": 8675 }, { "epoch": 0.7028515878159429, "grad_norm": 0.03163905814290047, "learning_rate": 0.00018317656060128722, "loss": 0.3724, "step": 8676 }, { "epoch": 0.7029325988334414, "grad_norm": 0.02911280281841755, "learning_rate": 0.00018317205994869258, "loss": 0.3077, "step": 8677 }, { "epoch": 0.7030136098509397, "grad_norm": 0.0325298085808754, "learning_rate": 0.00018316755929609794, "loss": 0.3562, "step": 8678 }, { "epoch": 0.7030946208684381, "grad_norm": 0.027824802324175835, "learning_rate": 0.00018316305864350332, "loss": 0.317, "step": 8679 }, { "epoch": 0.7031756318859365, "grad_norm": 0.027131253853440285, "learning_rate": 0.00018315855799090868, "loss": 0.3124, "step": 8680 }, { "epoch": 0.7032566429034348, "grad_norm": 0.033741287887096405, "learning_rate": 0.00018315405733831404, "loss": 0.3374, "step": 8681 }, { "epoch": 0.7033376539209333, "grad_norm": 0.03055637702345848, "learning_rate": 0.00018314955668571946, "loss": 0.3231, "step": 8682 }, { "epoch": 0.7034186649384316, "grad_norm": 0.03051404468715191, "learning_rate": 0.00018314505603312482, "loss": 0.3192, "step": 8683 }, { "epoch": 0.70349967595593, "grad_norm": 0.029856974259018898, "learning_rate": 0.00018314055538053018, "loss": 0.3274, "step": 8684 }, { "epoch": 0.7035806869734283, "grad_norm": 0.0327875092625618, "learning_rate": 0.00018313605472793557, "loss": 0.3225, "step": 8685 }, { "epoch": 0.7036616979909268, "grad_norm": 0.03562135621905327, "learning_rate": 0.00018313155407534093, "loss": 0.3994, "step": 8686 }, { "epoch": 0.7037427090084252, "grad_norm": 0.03183677792549133, "learning_rate": 0.00018312705342274629, "loss": 0.3577, "step": 8687 }, { "epoch": 0.7038237200259235, "grad_norm": 0.034511812031269073, "learning_rate": 0.0001831225527701517, "loss": 0.3383, "step": 8688 }, { "epoch": 0.7039047310434219, "grad_norm": 0.034360211342573166, "learning_rate": 0.00018311805211755706, "loss": 0.3884, "step": 8689 }, { "epoch": 0.7039857420609202, "grad_norm": 0.03103705681860447, "learning_rate": 0.00018311355146496242, "loss": 0.2922, "step": 8690 }, { "epoch": 0.7040667530784187, "grad_norm": 0.029840853065252304, "learning_rate": 0.0001831090508123678, "loss": 0.3547, "step": 8691 }, { "epoch": 0.7041477640959171, "grad_norm": 0.03040078841149807, "learning_rate": 0.00018310455015977317, "loss": 0.3452, "step": 8692 }, { "epoch": 0.7042287751134154, "grad_norm": 0.030439404770731926, "learning_rate": 0.00018310004950717853, "loss": 0.3216, "step": 8693 }, { "epoch": 0.7043097861309138, "grad_norm": 0.0319373719394207, "learning_rate": 0.00018309554885458394, "loss": 0.3025, "step": 8694 }, { "epoch": 0.7043907971484121, "grad_norm": 0.02826685458421707, "learning_rate": 0.0001830910482019893, "loss": 0.3383, "step": 8695 }, { "epoch": 0.7044718081659106, "grad_norm": 0.030664505437016487, "learning_rate": 0.00018308654754939466, "loss": 0.3165, "step": 8696 }, { "epoch": 0.704552819183409, "grad_norm": 0.03440213203430176, "learning_rate": 0.00018308204689680005, "loss": 0.3911, "step": 8697 }, { "epoch": 0.7046338302009073, "grad_norm": 0.03921644017100334, "learning_rate": 0.0001830775462442054, "loss": 0.3432, "step": 8698 }, { "epoch": 0.7047148412184057, "grad_norm": 0.033602550625801086, "learning_rate": 0.0001830730455916108, "loss": 0.3138, "step": 8699 }, { "epoch": 0.7047958522359041, "grad_norm": 0.03248998522758484, "learning_rate": 0.00018306854493901618, "loss": 0.3435, "step": 8700 }, { "epoch": 0.7048768632534025, "grad_norm": 0.030209265649318695, "learning_rate": 0.00018306404428642154, "loss": 0.3108, "step": 8701 }, { "epoch": 0.7049578742709008, "grad_norm": 0.0281781367957592, "learning_rate": 0.0001830595436338269, "loss": 0.2978, "step": 8702 }, { "epoch": 0.7050388852883992, "grad_norm": 0.03777993470430374, "learning_rate": 0.0001830550429812323, "loss": 0.4091, "step": 8703 }, { "epoch": 0.7051198963058976, "grad_norm": 0.030923016369342804, "learning_rate": 0.00018305054232863765, "loss": 0.3174, "step": 8704 }, { "epoch": 0.705200907323396, "grad_norm": 0.03093315102159977, "learning_rate": 0.00018304604167604304, "loss": 0.324, "step": 8705 }, { "epoch": 0.7052819183408944, "grad_norm": 0.03139530122280121, "learning_rate": 0.00018304154102344842, "loss": 0.3232, "step": 8706 }, { "epoch": 0.7053629293583927, "grad_norm": 0.029654931277036667, "learning_rate": 0.00018303704037085378, "loss": 0.3483, "step": 8707 }, { "epoch": 0.7054439403758911, "grad_norm": 0.03465942665934563, "learning_rate": 0.00018303253971825914, "loss": 0.3657, "step": 8708 }, { "epoch": 0.7055249513933896, "grad_norm": 0.028565427288413048, "learning_rate": 0.00018302803906566453, "loss": 0.2924, "step": 8709 }, { "epoch": 0.7056059624108879, "grad_norm": 0.03348225727677345, "learning_rate": 0.0001830235384130699, "loss": 0.3582, "step": 8710 }, { "epoch": 0.7056869734283863, "grad_norm": 0.032475586980581284, "learning_rate": 0.00018301903776047528, "loss": 0.377, "step": 8711 }, { "epoch": 0.7057679844458846, "grad_norm": 0.03190094232559204, "learning_rate": 0.00018301453710788067, "loss": 0.3596, "step": 8712 }, { "epoch": 0.705848995463383, "grad_norm": 0.03844151273369789, "learning_rate": 0.00018301003645528603, "loss": 0.3868, "step": 8713 }, { "epoch": 0.7059300064808814, "grad_norm": 0.03228616714477539, "learning_rate": 0.00018300553580269139, "loss": 0.3538, "step": 8714 }, { "epoch": 0.7060110174983798, "grad_norm": 0.02916380949318409, "learning_rate": 0.00018300103515009677, "loss": 0.34, "step": 8715 }, { "epoch": 0.7060920285158782, "grad_norm": 0.03413008525967598, "learning_rate": 0.00018299653449750213, "loss": 0.3049, "step": 8716 }, { "epoch": 0.7061730395333765, "grad_norm": 0.033210255205631256, "learning_rate": 0.00018299203384490752, "loss": 0.3668, "step": 8717 }, { "epoch": 0.7062540505508749, "grad_norm": 0.033286821097135544, "learning_rate": 0.0001829875331923129, "loss": 0.3584, "step": 8718 }, { "epoch": 0.7063350615683733, "grad_norm": 0.029751164838671684, "learning_rate": 0.00018298303253971827, "loss": 0.301, "step": 8719 }, { "epoch": 0.7064160725858717, "grad_norm": 0.030733181163668633, "learning_rate": 0.00018297853188712363, "loss": 0.3435, "step": 8720 }, { "epoch": 0.70649708360337, "grad_norm": 0.03550855442881584, "learning_rate": 0.00018297403123452902, "loss": 0.4085, "step": 8721 }, { "epoch": 0.7065780946208684, "grad_norm": 0.0320737287402153, "learning_rate": 0.00018296953058193438, "loss": 0.325, "step": 8722 }, { "epoch": 0.7066591056383669, "grad_norm": 0.03291591629385948, "learning_rate": 0.00018296502992933976, "loss": 0.4128, "step": 8723 }, { "epoch": 0.7067401166558652, "grad_norm": 0.031303029507398605, "learning_rate": 0.00018296052927674515, "loss": 0.3524, "step": 8724 }, { "epoch": 0.7068211276733636, "grad_norm": 0.0403035506606102, "learning_rate": 0.0001829560286241505, "loss": 0.4219, "step": 8725 }, { "epoch": 0.7069021386908619, "grad_norm": 0.03615202009677887, "learning_rate": 0.00018295152797155587, "loss": 0.4069, "step": 8726 }, { "epoch": 0.7069831497083603, "grad_norm": 0.03636353090405464, "learning_rate": 0.00018294702731896126, "loss": 0.3371, "step": 8727 }, { "epoch": 0.7070641607258588, "grad_norm": 0.03489084169268608, "learning_rate": 0.00018294252666636664, "loss": 0.4238, "step": 8728 }, { "epoch": 0.7071451717433571, "grad_norm": 0.028424890711903572, "learning_rate": 0.000182938026013772, "loss": 0.3169, "step": 8729 }, { "epoch": 0.7072261827608555, "grad_norm": 0.03434450924396515, "learning_rate": 0.0001829335253611774, "loss": 0.3473, "step": 8730 }, { "epoch": 0.7073071937783538, "grad_norm": 0.02949521876871586, "learning_rate": 0.00018292902470858275, "loss": 0.312, "step": 8731 }, { "epoch": 0.7073882047958522, "grad_norm": 0.03468482196331024, "learning_rate": 0.0001829245240559881, "loss": 0.3502, "step": 8732 }, { "epoch": 0.7074692158133506, "grad_norm": 0.03378097712993622, "learning_rate": 0.0001829200234033935, "loss": 0.3398, "step": 8733 }, { "epoch": 0.707550226830849, "grad_norm": 0.030609462410211563, "learning_rate": 0.00018291552275079889, "loss": 0.3552, "step": 8734 }, { "epoch": 0.7076312378483474, "grad_norm": 0.028447598218917847, "learning_rate": 0.00018291102209820425, "loss": 0.3073, "step": 8735 }, { "epoch": 0.7077122488658457, "grad_norm": 0.02937387302517891, "learning_rate": 0.00018290652144560963, "loss": 0.3475, "step": 8736 }, { "epoch": 0.7077932598833442, "grad_norm": 0.03563931584358215, "learning_rate": 0.000182902020793015, "loss": 0.3395, "step": 8737 }, { "epoch": 0.7078742709008425, "grad_norm": 0.02817721478641033, "learning_rate": 0.00018289752014042035, "loss": 0.33, "step": 8738 }, { "epoch": 0.7079552819183409, "grad_norm": 0.03269148990511894, "learning_rate": 0.00018289301948782574, "loss": 0.3608, "step": 8739 }, { "epoch": 0.7080362929358393, "grad_norm": 0.03341267630457878, "learning_rate": 0.00018288851883523113, "loss": 0.393, "step": 8740 }, { "epoch": 0.7081173039533376, "grad_norm": 0.03010784648358822, "learning_rate": 0.0001828840181826365, "loss": 0.3202, "step": 8741 }, { "epoch": 0.7081983149708361, "grad_norm": 0.04232911765575409, "learning_rate": 0.00018287951753004187, "loss": 0.3817, "step": 8742 }, { "epoch": 0.7082793259883344, "grad_norm": 0.03095560148358345, "learning_rate": 0.00018287501687744723, "loss": 0.346, "step": 8743 }, { "epoch": 0.7083603370058328, "grad_norm": 0.031654082238674164, "learning_rate": 0.0001828705162248526, "loss": 0.3696, "step": 8744 }, { "epoch": 0.7084413480233311, "grad_norm": 0.03190073370933533, "learning_rate": 0.00018286601557225798, "loss": 0.3623, "step": 8745 }, { "epoch": 0.7085223590408296, "grad_norm": 0.03220773860812187, "learning_rate": 0.00018286151491966337, "loss": 0.36, "step": 8746 }, { "epoch": 0.708603370058328, "grad_norm": 0.031856559216976166, "learning_rate": 0.00018285701426706873, "loss": 0.3149, "step": 8747 }, { "epoch": 0.7086843810758263, "grad_norm": 0.03256354480981827, "learning_rate": 0.00018285251361447412, "loss": 0.3331, "step": 8748 }, { "epoch": 0.7087653920933247, "grad_norm": 0.03743598982691765, "learning_rate": 0.00018284801296187948, "loss": 0.3936, "step": 8749 }, { "epoch": 0.708846403110823, "grad_norm": 0.03272002190351486, "learning_rate": 0.00018284351230928484, "loss": 0.3475, "step": 8750 }, { "epoch": 0.7089274141283215, "grad_norm": 0.03147174045443535, "learning_rate": 0.00018283901165669022, "loss": 0.3302, "step": 8751 }, { "epoch": 0.7090084251458199, "grad_norm": 0.030662400647997856, "learning_rate": 0.0001828345110040956, "loss": 0.3505, "step": 8752 }, { "epoch": 0.7090894361633182, "grad_norm": 0.029921991750597954, "learning_rate": 0.00018283001035150097, "loss": 0.3221, "step": 8753 }, { "epoch": 0.7091704471808166, "grad_norm": 0.03617072105407715, "learning_rate": 0.00018282550969890636, "loss": 0.3553, "step": 8754 }, { "epoch": 0.7092514581983149, "grad_norm": 0.030982332304120064, "learning_rate": 0.00018282100904631172, "loss": 0.3824, "step": 8755 }, { "epoch": 0.7093324692158134, "grad_norm": 0.03520418331027031, "learning_rate": 0.00018281650839371708, "loss": 0.3946, "step": 8756 }, { "epoch": 0.7094134802333117, "grad_norm": 0.03270168974995613, "learning_rate": 0.0001828120077411225, "loss": 0.3001, "step": 8757 }, { "epoch": 0.7094944912508101, "grad_norm": 0.033834751695394516, "learning_rate": 0.00018280750708852785, "loss": 0.412, "step": 8758 }, { "epoch": 0.7095755022683085, "grad_norm": 0.028466802090406418, "learning_rate": 0.0001828030064359332, "loss": 0.3104, "step": 8759 }, { "epoch": 0.7096565132858069, "grad_norm": 0.032070502638816833, "learning_rate": 0.0001827985057833386, "loss": 0.3134, "step": 8760 }, { "epoch": 0.7097375243033053, "grad_norm": 0.03090481273829937, "learning_rate": 0.00018279400513074396, "loss": 0.3136, "step": 8761 }, { "epoch": 0.7098185353208036, "grad_norm": 0.030255356803536415, "learning_rate": 0.00018278950447814932, "loss": 0.3208, "step": 8762 }, { "epoch": 0.709899546338302, "grad_norm": 0.03436191380023956, "learning_rate": 0.00018278500382555473, "loss": 0.3745, "step": 8763 }, { "epoch": 0.7099805573558003, "grad_norm": 0.03800062835216522, "learning_rate": 0.0001827805031729601, "loss": 0.3415, "step": 8764 }, { "epoch": 0.7100615683732988, "grad_norm": 0.03259582817554474, "learning_rate": 0.00018277600252036545, "loss": 0.3683, "step": 8765 }, { "epoch": 0.7101425793907972, "grad_norm": 0.030449820682406425, "learning_rate": 0.00018277150186777084, "loss": 0.2866, "step": 8766 }, { "epoch": 0.7102235904082955, "grad_norm": 0.035687729716300964, "learning_rate": 0.0001827670012151762, "loss": 0.321, "step": 8767 }, { "epoch": 0.7103046014257939, "grad_norm": 0.0287131629884243, "learning_rate": 0.00018276250056258156, "loss": 0.3534, "step": 8768 }, { "epoch": 0.7103856124432922, "grad_norm": 0.030978770926594734, "learning_rate": 0.00018275799990998698, "loss": 0.3558, "step": 8769 }, { "epoch": 0.7104666234607907, "grad_norm": 0.03859172388911247, "learning_rate": 0.00018275349925739234, "loss": 0.3639, "step": 8770 }, { "epoch": 0.7105476344782891, "grad_norm": 0.029355809092521667, "learning_rate": 0.0001827489986047977, "loss": 0.3119, "step": 8771 }, { "epoch": 0.7106286454957874, "grad_norm": 0.03254703804850578, "learning_rate": 0.00018274449795220308, "loss": 0.3357, "step": 8772 }, { "epoch": 0.7107096565132858, "grad_norm": 0.029273660853505135, "learning_rate": 0.00018273999729960844, "loss": 0.3378, "step": 8773 }, { "epoch": 0.7107906675307842, "grad_norm": 0.034787289798259735, "learning_rate": 0.0001827354966470138, "loss": 0.3994, "step": 8774 }, { "epoch": 0.7108716785482826, "grad_norm": 0.034370459616184235, "learning_rate": 0.00018273099599441922, "loss": 0.3461, "step": 8775 }, { "epoch": 0.710952689565781, "grad_norm": 0.03280269354581833, "learning_rate": 0.00018272649534182458, "loss": 0.3364, "step": 8776 }, { "epoch": 0.7110337005832793, "grad_norm": 0.03019985742866993, "learning_rate": 0.00018272199468922994, "loss": 0.3456, "step": 8777 }, { "epoch": 0.7111147116007777, "grad_norm": 0.038414500653743744, "learning_rate": 0.00018271749403663532, "loss": 0.3545, "step": 8778 }, { "epoch": 0.7111957226182761, "grad_norm": 0.03398662805557251, "learning_rate": 0.00018271299338404068, "loss": 0.3426, "step": 8779 }, { "epoch": 0.7112767336357745, "grad_norm": 0.03139416128396988, "learning_rate": 0.00018270849273144607, "loss": 0.3325, "step": 8780 }, { "epoch": 0.7113577446532728, "grad_norm": 0.027758194133639336, "learning_rate": 0.00018270399207885146, "loss": 0.299, "step": 8781 }, { "epoch": 0.7114387556707712, "grad_norm": 0.03087412379682064, "learning_rate": 0.00018269949142625682, "loss": 0.3143, "step": 8782 }, { "epoch": 0.7115197666882696, "grad_norm": 0.03145689144730568, "learning_rate": 0.00018269499077366218, "loss": 0.3453, "step": 8783 }, { "epoch": 0.711600777705768, "grad_norm": 0.035226672887802124, "learning_rate": 0.00018269049012106757, "loss": 0.375, "step": 8784 }, { "epoch": 0.7116817887232664, "grad_norm": 0.03201188147068024, "learning_rate": 0.00018268598946847293, "loss": 0.3434, "step": 8785 }, { "epoch": 0.7117627997407647, "grad_norm": 0.03402809053659439, "learning_rate": 0.0001826814888158783, "loss": 0.3468, "step": 8786 }, { "epoch": 0.7118438107582631, "grad_norm": 0.034564413130283356, "learning_rate": 0.0001826769881632837, "loss": 0.3548, "step": 8787 }, { "epoch": 0.7119248217757616, "grad_norm": 0.030737025663256645, "learning_rate": 0.00018267248751068906, "loss": 0.3013, "step": 8788 }, { "epoch": 0.7120058327932599, "grad_norm": 0.02977253682911396, "learning_rate": 0.00018266798685809442, "loss": 0.3244, "step": 8789 }, { "epoch": 0.7120868438107583, "grad_norm": 0.028753027319908142, "learning_rate": 0.0001826634862054998, "loss": 0.321, "step": 8790 }, { "epoch": 0.7121678548282566, "grad_norm": 0.028307564556598663, "learning_rate": 0.00018265898555290517, "loss": 0.3137, "step": 8791 }, { "epoch": 0.712248865845755, "grad_norm": 0.030974021181464195, "learning_rate": 0.00018265448490031055, "loss": 0.3008, "step": 8792 }, { "epoch": 0.7123298768632534, "grad_norm": 0.028313852846622467, "learning_rate": 0.00018264998424771594, "loss": 0.3281, "step": 8793 }, { "epoch": 0.7124108878807518, "grad_norm": 0.03233834356069565, "learning_rate": 0.0001826454835951213, "loss": 0.302, "step": 8794 }, { "epoch": 0.7124918988982502, "grad_norm": 0.030246617272496223, "learning_rate": 0.00018264098294252666, "loss": 0.3111, "step": 8795 }, { "epoch": 0.7125729099157485, "grad_norm": 0.03288370370864868, "learning_rate": 0.00018263648228993205, "loss": 0.3461, "step": 8796 }, { "epoch": 0.712653920933247, "grad_norm": 0.03088957816362381, "learning_rate": 0.0001826319816373374, "loss": 0.34, "step": 8797 }, { "epoch": 0.7127349319507453, "grad_norm": 0.032008424401283264, "learning_rate": 0.0001826274809847428, "loss": 0.3632, "step": 8798 }, { "epoch": 0.7128159429682437, "grad_norm": 0.03967854380607605, "learning_rate": 0.00018262298033214818, "loss": 0.3617, "step": 8799 }, { "epoch": 0.712896953985742, "grad_norm": 0.03548341989517212, "learning_rate": 0.00018261847967955354, "loss": 0.3606, "step": 8800 }, { "epoch": 0.7129779650032404, "grad_norm": 0.033855367451906204, "learning_rate": 0.0001826139790269589, "loss": 0.3144, "step": 8801 }, { "epoch": 0.7130589760207389, "grad_norm": 0.0331764742732048, "learning_rate": 0.0001826094783743643, "loss": 0.4024, "step": 8802 }, { "epoch": 0.7131399870382372, "grad_norm": 0.036456841975450516, "learning_rate": 0.00018260497772176965, "loss": 0.3421, "step": 8803 }, { "epoch": 0.7132209980557356, "grad_norm": 0.03295794501900673, "learning_rate": 0.00018260047706917504, "loss": 0.3488, "step": 8804 }, { "epoch": 0.7133020090732339, "grad_norm": 0.03683172166347504, "learning_rate": 0.00018259597641658042, "loss": 0.3197, "step": 8805 }, { "epoch": 0.7133830200907323, "grad_norm": 0.02898084558546543, "learning_rate": 0.00018259147576398578, "loss": 0.3493, "step": 8806 }, { "epoch": 0.7134640311082308, "grad_norm": 0.03242986276745796, "learning_rate": 0.00018258697511139114, "loss": 0.3182, "step": 8807 }, { "epoch": 0.7135450421257291, "grad_norm": 0.03240397945046425, "learning_rate": 0.00018258247445879653, "loss": 0.3224, "step": 8808 }, { "epoch": 0.7136260531432275, "grad_norm": 0.03218116611242294, "learning_rate": 0.00018257797380620192, "loss": 0.3398, "step": 8809 }, { "epoch": 0.7137070641607258, "grad_norm": 0.03495160490274429, "learning_rate": 0.00018257347315360728, "loss": 0.3756, "step": 8810 }, { "epoch": 0.7137880751782243, "grad_norm": 0.035370394587516785, "learning_rate": 0.00018256897250101267, "loss": 0.3639, "step": 8811 }, { "epoch": 0.7138690861957226, "grad_norm": 0.029185624793171883, "learning_rate": 0.00018256447184841803, "loss": 0.34, "step": 8812 }, { "epoch": 0.713950097213221, "grad_norm": 0.034185972064733505, "learning_rate": 0.0001825599711958234, "loss": 0.3777, "step": 8813 }, { "epoch": 0.7140311082307194, "grad_norm": 0.03374910727143288, "learning_rate": 0.00018255547054322877, "loss": 0.3375, "step": 8814 }, { "epoch": 0.7141121192482177, "grad_norm": 0.029371364042162895, "learning_rate": 0.00018255096989063416, "loss": 0.3024, "step": 8815 }, { "epoch": 0.7141931302657162, "grad_norm": 0.03439253568649292, "learning_rate": 0.00018254646923803952, "loss": 0.3091, "step": 8816 }, { "epoch": 0.7142741412832145, "grad_norm": 0.042450908571481705, "learning_rate": 0.0001825419685854449, "loss": 0.3589, "step": 8817 }, { "epoch": 0.7143551523007129, "grad_norm": 0.03298235312104225, "learning_rate": 0.00018253746793285027, "loss": 0.3122, "step": 8818 }, { "epoch": 0.7144361633182112, "grad_norm": 0.028749145567417145, "learning_rate": 0.00018253296728025563, "loss": 0.3337, "step": 8819 }, { "epoch": 0.7145171743357096, "grad_norm": 0.02993011847138405, "learning_rate": 0.00018252846662766102, "loss": 0.354, "step": 8820 }, { "epoch": 0.7145981853532081, "grad_norm": 0.030420931056141853, "learning_rate": 0.0001825239659750664, "loss": 0.3231, "step": 8821 }, { "epoch": 0.7146791963707064, "grad_norm": 0.03501799702644348, "learning_rate": 0.00018251946532247176, "loss": 0.3781, "step": 8822 }, { "epoch": 0.7147602073882048, "grad_norm": 0.030919726938009262, "learning_rate": 0.00018251496466987715, "loss": 0.3437, "step": 8823 }, { "epoch": 0.7148412184057031, "grad_norm": 0.03802415356040001, "learning_rate": 0.0001825104640172825, "loss": 0.3688, "step": 8824 }, { "epoch": 0.7149222294232016, "grad_norm": 0.03371911495923996, "learning_rate": 0.00018250596336468787, "loss": 0.3584, "step": 8825 }, { "epoch": 0.7150032404407, "grad_norm": 0.03014080785214901, "learning_rate": 0.00018250146271209326, "loss": 0.3006, "step": 8826 }, { "epoch": 0.7150842514581983, "grad_norm": 0.032793935388326645, "learning_rate": 0.00018249696205949864, "loss": 0.3479, "step": 8827 }, { "epoch": 0.7151652624756967, "grad_norm": 0.02692987769842148, "learning_rate": 0.000182492461406904, "loss": 0.3172, "step": 8828 }, { "epoch": 0.715246273493195, "grad_norm": 0.031958647072315216, "learning_rate": 0.0001824879607543094, "loss": 0.34, "step": 8829 }, { "epoch": 0.7153272845106935, "grad_norm": 0.03293073922395706, "learning_rate": 0.00018248346010171475, "loss": 0.3599, "step": 8830 }, { "epoch": 0.7154082955281919, "grad_norm": 0.037021856755018234, "learning_rate": 0.0001824789594491201, "loss": 0.3591, "step": 8831 }, { "epoch": 0.7154893065456902, "grad_norm": 0.034067876636981964, "learning_rate": 0.00018247445879652553, "loss": 0.3729, "step": 8832 }, { "epoch": 0.7155703175631886, "grad_norm": 0.030054716393351555, "learning_rate": 0.00018246995814393089, "loss": 0.3042, "step": 8833 }, { "epoch": 0.7156513285806869, "grad_norm": 0.030154969543218613, "learning_rate": 0.00018246545749133625, "loss": 0.334, "step": 8834 }, { "epoch": 0.7157323395981854, "grad_norm": 0.0348099060356617, "learning_rate": 0.00018246095683874163, "loss": 0.3847, "step": 8835 }, { "epoch": 0.7158133506156837, "grad_norm": 0.029983345419168472, "learning_rate": 0.000182456456186147, "loss": 0.2999, "step": 8836 }, { "epoch": 0.7158943616331821, "grad_norm": 0.03360939770936966, "learning_rate": 0.00018245195553355235, "loss": 0.3884, "step": 8837 }, { "epoch": 0.7159753726506805, "grad_norm": 0.03326816111803055, "learning_rate": 0.00018244745488095777, "loss": 0.3523, "step": 8838 }, { "epoch": 0.7160563836681789, "grad_norm": 0.0402449294924736, "learning_rate": 0.00018244295422836313, "loss": 0.4038, "step": 8839 }, { "epoch": 0.7161373946856773, "grad_norm": 0.031454093754291534, "learning_rate": 0.0001824384535757685, "loss": 0.3471, "step": 8840 }, { "epoch": 0.7162184057031756, "grad_norm": 0.03565017133951187, "learning_rate": 0.00018243395292317387, "loss": 0.341, "step": 8841 }, { "epoch": 0.716299416720674, "grad_norm": 0.03517875447869301, "learning_rate": 0.00018242945227057923, "loss": 0.3271, "step": 8842 }, { "epoch": 0.7163804277381723, "grad_norm": 0.03396892547607422, "learning_rate": 0.0001824249516179846, "loss": 0.3345, "step": 8843 }, { "epoch": 0.7164614387556708, "grad_norm": 0.03428976982831955, "learning_rate": 0.00018242045096539, "loss": 0.3868, "step": 8844 }, { "epoch": 0.7165424497731692, "grad_norm": 0.03712880238890648, "learning_rate": 0.00018241595031279537, "loss": 0.3605, "step": 8845 }, { "epoch": 0.7166234607906675, "grad_norm": 0.030677849426865578, "learning_rate": 0.00018241144966020073, "loss": 0.289, "step": 8846 }, { "epoch": 0.7167044718081659, "grad_norm": 0.03206459432840347, "learning_rate": 0.00018240694900760612, "loss": 0.3164, "step": 8847 }, { "epoch": 0.7167854828256643, "grad_norm": 0.033618632704019547, "learning_rate": 0.00018240244835501148, "loss": 0.3338, "step": 8848 }, { "epoch": 0.7168664938431627, "grad_norm": 0.031833041459321976, "learning_rate": 0.00018239794770241684, "loss": 0.3573, "step": 8849 }, { "epoch": 0.7169475048606611, "grad_norm": 0.03609463945031166, "learning_rate": 0.00018239344704982225, "loss": 0.3515, "step": 8850 }, { "epoch": 0.7170285158781594, "grad_norm": 0.030079221352934837, "learning_rate": 0.0001823889463972276, "loss": 0.3107, "step": 8851 }, { "epoch": 0.7171095268956578, "grad_norm": 0.03663382679224014, "learning_rate": 0.00018238444574463297, "loss": 0.3602, "step": 8852 }, { "epoch": 0.7171905379131562, "grad_norm": 0.03408436477184296, "learning_rate": 0.00018237994509203836, "loss": 0.3563, "step": 8853 }, { "epoch": 0.7172715489306546, "grad_norm": 0.033193428069353104, "learning_rate": 0.00018237544443944372, "loss": 0.3681, "step": 8854 }, { "epoch": 0.717352559948153, "grad_norm": 0.031171130016446114, "learning_rate": 0.00018237094378684908, "loss": 0.3607, "step": 8855 }, { "epoch": 0.7174335709656513, "grad_norm": 0.034352436661720276, "learning_rate": 0.0001823664431342545, "loss": 0.384, "step": 8856 }, { "epoch": 0.7175145819831497, "grad_norm": 0.0330151803791523, "learning_rate": 0.00018236194248165985, "loss": 0.3193, "step": 8857 }, { "epoch": 0.7175955930006481, "grad_norm": 0.027999212965369225, "learning_rate": 0.0001823574418290652, "loss": 0.2807, "step": 8858 }, { "epoch": 0.7176766040181465, "grad_norm": 0.02855718694627285, "learning_rate": 0.0001823529411764706, "loss": 0.3139, "step": 8859 }, { "epoch": 0.7177576150356448, "grad_norm": 0.03285132721066475, "learning_rate": 0.00018234844052387596, "loss": 0.3573, "step": 8860 }, { "epoch": 0.7178386260531432, "grad_norm": 0.030333518981933594, "learning_rate": 0.00018234393987128135, "loss": 0.3014, "step": 8861 }, { "epoch": 0.7179196370706417, "grad_norm": 0.03320077806711197, "learning_rate": 0.00018233943921868673, "loss": 0.3319, "step": 8862 }, { "epoch": 0.71800064808814, "grad_norm": 0.031354501843452454, "learning_rate": 0.0001823349385660921, "loss": 0.3028, "step": 8863 }, { "epoch": 0.7180816591056384, "grad_norm": 0.03711242601275444, "learning_rate": 0.00018233043791349745, "loss": 0.357, "step": 8864 }, { "epoch": 0.7181626701231367, "grad_norm": 0.030047696083784103, "learning_rate": 0.00018232593726090284, "loss": 0.315, "step": 8865 }, { "epoch": 0.7182436811406351, "grad_norm": 0.03306373208761215, "learning_rate": 0.0001823214366083082, "loss": 0.3628, "step": 8866 }, { "epoch": 0.7183246921581335, "grad_norm": 0.03267044574022293, "learning_rate": 0.0001823169359557136, "loss": 0.2989, "step": 8867 }, { "epoch": 0.7184057031756319, "grad_norm": 0.02751380018889904, "learning_rate": 0.00018231243530311898, "loss": 0.3077, "step": 8868 }, { "epoch": 0.7184867141931303, "grad_norm": 0.029321538284420967, "learning_rate": 0.00018230793465052434, "loss": 0.3114, "step": 8869 }, { "epoch": 0.7185677252106286, "grad_norm": 0.03440944105386734, "learning_rate": 0.0001823034339979297, "loss": 0.3438, "step": 8870 }, { "epoch": 0.718648736228127, "grad_norm": 0.03138351812958717, "learning_rate": 0.00018229893334533508, "loss": 0.296, "step": 8871 }, { "epoch": 0.7187297472456254, "grad_norm": 0.034056250005960464, "learning_rate": 0.00018229443269274044, "loss": 0.3643, "step": 8872 }, { "epoch": 0.7188107582631238, "grad_norm": 0.032820384949445724, "learning_rate": 0.00018228993204014583, "loss": 0.3449, "step": 8873 }, { "epoch": 0.7188917692806222, "grad_norm": 0.030815161764621735, "learning_rate": 0.00018228543138755122, "loss": 0.3456, "step": 8874 }, { "epoch": 0.7189727802981205, "grad_norm": 0.03418358787894249, "learning_rate": 0.00018228093073495658, "loss": 0.3523, "step": 8875 }, { "epoch": 0.719053791315619, "grad_norm": 0.029444202780723572, "learning_rate": 0.00018227643008236194, "loss": 0.3286, "step": 8876 }, { "epoch": 0.7191348023331173, "grad_norm": 0.047734808176755905, "learning_rate": 0.00018227192942976732, "loss": 0.4268, "step": 8877 }, { "epoch": 0.7192158133506157, "grad_norm": 0.03228252753615379, "learning_rate": 0.00018226742877717268, "loss": 0.349, "step": 8878 }, { "epoch": 0.719296824368114, "grad_norm": 0.034021634608507156, "learning_rate": 0.00018226292812457807, "loss": 0.3693, "step": 8879 }, { "epoch": 0.7193778353856124, "grad_norm": 0.03035876527428627, "learning_rate": 0.00018225842747198346, "loss": 0.3519, "step": 8880 }, { "epoch": 0.7194588464031109, "grad_norm": 0.02635454386472702, "learning_rate": 0.00018225392681938882, "loss": 0.3025, "step": 8881 }, { "epoch": 0.7195398574206092, "grad_norm": 0.028250424191355705, "learning_rate": 0.00018224942616679418, "loss": 0.3595, "step": 8882 }, { "epoch": 0.7196208684381076, "grad_norm": 0.02870979905128479, "learning_rate": 0.00018224492551419957, "loss": 0.3139, "step": 8883 }, { "epoch": 0.7197018794556059, "grad_norm": 0.032601818442344666, "learning_rate": 0.00018224042486160495, "loss": 0.3251, "step": 8884 }, { "epoch": 0.7197828904731044, "grad_norm": 0.03433837369084358, "learning_rate": 0.0001822359242090103, "loss": 0.3472, "step": 8885 }, { "epoch": 0.7198639014906028, "grad_norm": 0.03213446959853172, "learning_rate": 0.0001822314235564157, "loss": 0.3386, "step": 8886 }, { "epoch": 0.7199449125081011, "grad_norm": 0.03527262806892395, "learning_rate": 0.00018222692290382106, "loss": 0.3702, "step": 8887 }, { "epoch": 0.7200259235255995, "grad_norm": 0.032607708126306534, "learning_rate": 0.00018222242225122642, "loss": 0.332, "step": 8888 }, { "epoch": 0.7201069345430978, "grad_norm": 0.03458176553249359, "learning_rate": 0.0001822179215986318, "loss": 0.3398, "step": 8889 }, { "epoch": 0.7201879455605963, "grad_norm": 0.033175062388181686, "learning_rate": 0.0001822134209460372, "loss": 0.3496, "step": 8890 }, { "epoch": 0.7202689565780946, "grad_norm": 0.03210841119289398, "learning_rate": 0.00018220892029344255, "loss": 0.3456, "step": 8891 }, { "epoch": 0.720349967595593, "grad_norm": 0.03000030666589737, "learning_rate": 0.00018220441964084794, "loss": 0.3228, "step": 8892 }, { "epoch": 0.7204309786130914, "grad_norm": 0.037756647914648056, "learning_rate": 0.0001821999189882533, "loss": 0.3243, "step": 8893 }, { "epoch": 0.7205119896305897, "grad_norm": 0.03188977390527725, "learning_rate": 0.00018219541833565866, "loss": 0.2873, "step": 8894 }, { "epoch": 0.7205930006480882, "grad_norm": 0.03612763434648514, "learning_rate": 0.00018219091768306405, "loss": 0.3652, "step": 8895 }, { "epoch": 0.7206740116655865, "grad_norm": 0.032409533858299255, "learning_rate": 0.00018218641703046944, "loss": 0.3268, "step": 8896 }, { "epoch": 0.7207550226830849, "grad_norm": 0.03179197385907173, "learning_rate": 0.0001821819163778748, "loss": 0.3328, "step": 8897 }, { "epoch": 0.7208360337005832, "grad_norm": 0.03408501669764519, "learning_rate": 0.00018217741572528018, "loss": 0.3547, "step": 8898 }, { "epoch": 0.7209170447180817, "grad_norm": 0.031727708876132965, "learning_rate": 0.00018217291507268554, "loss": 0.3515, "step": 8899 }, { "epoch": 0.7209980557355801, "grad_norm": 0.033311303704977036, "learning_rate": 0.0001821684144200909, "loss": 0.3462, "step": 8900 }, { "epoch": 0.7210790667530784, "grad_norm": 0.03319597244262695, "learning_rate": 0.0001821639137674963, "loss": 0.288, "step": 8901 }, { "epoch": 0.7211600777705768, "grad_norm": 0.03720049187541008, "learning_rate": 0.00018215941311490168, "loss": 0.3932, "step": 8902 }, { "epoch": 0.7212410887880751, "grad_norm": 0.034940432757139206, "learning_rate": 0.00018215491246230704, "loss": 0.3307, "step": 8903 }, { "epoch": 0.7213220998055736, "grad_norm": 0.030946683138608932, "learning_rate": 0.00018215041180971243, "loss": 0.3118, "step": 8904 }, { "epoch": 0.721403110823072, "grad_norm": 0.03591233864426613, "learning_rate": 0.00018214591115711779, "loss": 0.3601, "step": 8905 }, { "epoch": 0.7214841218405703, "grad_norm": 0.03742532059550285, "learning_rate": 0.00018214141050452315, "loss": 0.334, "step": 8906 }, { "epoch": 0.7215651328580687, "grad_norm": 0.032043810933828354, "learning_rate": 0.00018213690985192853, "loss": 0.3403, "step": 8907 }, { "epoch": 0.721646143875567, "grad_norm": 0.031024204567074776, "learning_rate": 0.00018213240919933392, "loss": 0.3284, "step": 8908 }, { "epoch": 0.7217271548930655, "grad_norm": 0.03355749696493149, "learning_rate": 0.00018212790854673928, "loss": 0.3646, "step": 8909 }, { "epoch": 0.7218081659105638, "grad_norm": 0.033181991428136826, "learning_rate": 0.00018212340789414467, "loss": 0.3336, "step": 8910 }, { "epoch": 0.7218891769280622, "grad_norm": 0.0307527594268322, "learning_rate": 0.00018211890724155003, "loss": 0.3282, "step": 8911 }, { "epoch": 0.7219701879455606, "grad_norm": 0.035663995891809464, "learning_rate": 0.0001821144065889554, "loss": 0.3281, "step": 8912 }, { "epoch": 0.722051198963059, "grad_norm": 0.030169153586030006, "learning_rate": 0.0001821099059363608, "loss": 0.293, "step": 8913 }, { "epoch": 0.7221322099805574, "grad_norm": 0.03254105895757675, "learning_rate": 0.00018210540528376616, "loss": 0.3449, "step": 8914 }, { "epoch": 0.7222132209980557, "grad_norm": 0.033432308584451675, "learning_rate": 0.00018210090463117152, "loss": 0.3908, "step": 8915 }, { "epoch": 0.7222942320155541, "grad_norm": 0.03857411816716194, "learning_rate": 0.0001820964039785769, "loss": 0.3751, "step": 8916 }, { "epoch": 0.7223752430330524, "grad_norm": 0.03752148523926735, "learning_rate": 0.00018209190332598227, "loss": 0.3848, "step": 8917 }, { "epoch": 0.7224562540505509, "grad_norm": 0.03653761371970177, "learning_rate": 0.00018208740267338763, "loss": 0.3679, "step": 8918 }, { "epoch": 0.7225372650680493, "grad_norm": 0.034680552780628204, "learning_rate": 0.00018208290202079304, "loss": 0.3571, "step": 8919 }, { "epoch": 0.7226182760855476, "grad_norm": 0.028377162292599678, "learning_rate": 0.0001820784013681984, "loss": 0.337, "step": 8920 }, { "epoch": 0.722699287103046, "grad_norm": 0.029166093096137047, "learning_rate": 0.00018207390071560376, "loss": 0.312, "step": 8921 }, { "epoch": 0.7227802981205443, "grad_norm": 0.034398071467876434, "learning_rate": 0.00018206940006300915, "loss": 0.3587, "step": 8922 }, { "epoch": 0.7228613091380428, "grad_norm": 0.03137093037366867, "learning_rate": 0.0001820648994104145, "loss": 0.3204, "step": 8923 }, { "epoch": 0.7229423201555412, "grad_norm": 0.029238324612379074, "learning_rate": 0.00018206039875781987, "loss": 0.3021, "step": 8924 }, { "epoch": 0.7230233311730395, "grad_norm": 0.028114039450883865, "learning_rate": 0.00018205589810522528, "loss": 0.3014, "step": 8925 }, { "epoch": 0.7231043421905379, "grad_norm": 0.03557706996798515, "learning_rate": 0.00018205139745263064, "loss": 0.3371, "step": 8926 }, { "epoch": 0.7231853532080363, "grad_norm": 0.03201264515519142, "learning_rate": 0.000182046896800036, "loss": 0.3082, "step": 8927 }, { "epoch": 0.7232663642255347, "grad_norm": 0.03449628874659538, "learning_rate": 0.0001820423961474414, "loss": 0.3353, "step": 8928 }, { "epoch": 0.723347375243033, "grad_norm": 0.029499752447009087, "learning_rate": 0.00018203789549484675, "loss": 0.3292, "step": 8929 }, { "epoch": 0.7234283862605314, "grad_norm": 0.0337161123752594, "learning_rate": 0.0001820333948422521, "loss": 0.408, "step": 8930 }, { "epoch": 0.7235093972780298, "grad_norm": 0.02877146005630493, "learning_rate": 0.00018202889418965753, "loss": 0.3312, "step": 8931 }, { "epoch": 0.7235904082955282, "grad_norm": 0.03432083502411842, "learning_rate": 0.00018202439353706289, "loss": 0.3652, "step": 8932 }, { "epoch": 0.7236714193130266, "grad_norm": 0.03515870124101639, "learning_rate": 0.00018201989288446825, "loss": 0.3677, "step": 8933 }, { "epoch": 0.7237524303305249, "grad_norm": 0.028151020407676697, "learning_rate": 0.00018201539223187363, "loss": 0.3109, "step": 8934 }, { "epoch": 0.7238334413480233, "grad_norm": 0.0321088470518589, "learning_rate": 0.000182010891579279, "loss": 0.34, "step": 8935 }, { "epoch": 0.7239144523655218, "grad_norm": 0.030212152749300003, "learning_rate": 0.00018200639092668438, "loss": 0.33, "step": 8936 }, { "epoch": 0.7239954633830201, "grad_norm": 0.030875565484166145, "learning_rate": 0.00018200189027408977, "loss": 0.3162, "step": 8937 }, { "epoch": 0.7240764744005185, "grad_norm": 0.02767636999487877, "learning_rate": 0.00018199738962149513, "loss": 0.2755, "step": 8938 }, { "epoch": 0.7241574854180168, "grad_norm": 0.03224732726812363, "learning_rate": 0.0001819928889689005, "loss": 0.3359, "step": 8939 }, { "epoch": 0.7242384964355152, "grad_norm": 0.027454540133476257, "learning_rate": 0.00018198838831630587, "loss": 0.3014, "step": 8940 }, { "epoch": 0.7243195074530137, "grad_norm": 0.028916843235492706, "learning_rate": 0.00018198388766371123, "loss": 0.3156, "step": 8941 }, { "epoch": 0.724400518470512, "grad_norm": 0.0313744843006134, "learning_rate": 0.00018197938701111662, "loss": 0.3091, "step": 8942 }, { "epoch": 0.7244815294880104, "grad_norm": 0.029769916087388992, "learning_rate": 0.000181974886358522, "loss": 0.304, "step": 8943 }, { "epoch": 0.7245625405055087, "grad_norm": 0.028017953038215637, "learning_rate": 0.00018197038570592737, "loss": 0.3212, "step": 8944 }, { "epoch": 0.7246435515230071, "grad_norm": 0.02983788400888443, "learning_rate": 0.00018196588505333273, "loss": 0.3384, "step": 8945 }, { "epoch": 0.7247245625405055, "grad_norm": 0.03687024116516113, "learning_rate": 0.00018196138440073812, "loss": 0.4235, "step": 8946 }, { "epoch": 0.7248055735580039, "grad_norm": 0.02993650548160076, "learning_rate": 0.00018195688374814348, "loss": 0.3374, "step": 8947 }, { "epoch": 0.7248865845755023, "grad_norm": 0.038119856268167496, "learning_rate": 0.00018195238309554886, "loss": 0.4032, "step": 8948 }, { "epoch": 0.7249675955930006, "grad_norm": 0.02985922433435917, "learning_rate": 0.00018194788244295425, "loss": 0.3356, "step": 8949 }, { "epoch": 0.7250486066104991, "grad_norm": 0.03387674316763878, "learning_rate": 0.0001819433817903596, "loss": 0.3578, "step": 8950 }, { "epoch": 0.7251296176279974, "grad_norm": 0.03529488295316696, "learning_rate": 0.00018193888113776497, "loss": 0.3207, "step": 8951 }, { "epoch": 0.7252106286454958, "grad_norm": 0.03560759872198105, "learning_rate": 0.00018193438048517036, "loss": 0.3062, "step": 8952 }, { "epoch": 0.7252916396629941, "grad_norm": 0.02996155433356762, "learning_rate": 0.00018192987983257572, "loss": 0.3398, "step": 8953 }, { "epoch": 0.7253726506804925, "grad_norm": 0.036760371178388596, "learning_rate": 0.0001819253791799811, "loss": 0.3428, "step": 8954 }, { "epoch": 0.725453661697991, "grad_norm": 0.03608694672584534, "learning_rate": 0.0001819208785273865, "loss": 0.3686, "step": 8955 }, { "epoch": 0.7255346727154893, "grad_norm": 0.03458326682448387, "learning_rate": 0.00018191637787479185, "loss": 0.3414, "step": 8956 }, { "epoch": 0.7256156837329877, "grad_norm": 0.032023146748542786, "learning_rate": 0.0001819118772221972, "loss": 0.3464, "step": 8957 }, { "epoch": 0.725696694750486, "grad_norm": 0.03176487609744072, "learning_rate": 0.0001819073765696026, "loss": 0.3124, "step": 8958 }, { "epoch": 0.7257777057679844, "grad_norm": 0.0323118157684803, "learning_rate": 0.00018190287591700796, "loss": 0.3714, "step": 8959 }, { "epoch": 0.7258587167854829, "grad_norm": 0.034501854330301285, "learning_rate": 0.00018189837526441335, "loss": 0.3555, "step": 8960 }, { "epoch": 0.7259397278029812, "grad_norm": 0.032414842396974564, "learning_rate": 0.00018189387461181873, "loss": 0.3498, "step": 8961 }, { "epoch": 0.7260207388204796, "grad_norm": 0.03302488103508949, "learning_rate": 0.0001818893739592241, "loss": 0.3768, "step": 8962 }, { "epoch": 0.7261017498379779, "grad_norm": 0.027776505798101425, "learning_rate": 0.00018188487330662945, "loss": 0.3008, "step": 8963 }, { "epoch": 0.7261827608554764, "grad_norm": 0.03728478029370308, "learning_rate": 0.00018188037265403484, "loss": 0.4108, "step": 8964 }, { "epoch": 0.7262637718729748, "grad_norm": 0.03290198743343353, "learning_rate": 0.00018187587200144023, "loss": 0.3348, "step": 8965 }, { "epoch": 0.7263447828904731, "grad_norm": 0.03009003959596157, "learning_rate": 0.0001818713713488456, "loss": 0.3328, "step": 8966 }, { "epoch": 0.7264257939079715, "grad_norm": 0.041864633560180664, "learning_rate": 0.00018186687069625098, "loss": 0.3214, "step": 8967 }, { "epoch": 0.7265068049254698, "grad_norm": 0.03529899939894676, "learning_rate": 0.00018186237004365634, "loss": 0.3512, "step": 8968 }, { "epoch": 0.7265878159429683, "grad_norm": 0.030811816453933716, "learning_rate": 0.0001818578693910617, "loss": 0.3534, "step": 8969 }, { "epoch": 0.7266688269604666, "grad_norm": 0.02578016184270382, "learning_rate": 0.00018185336873846708, "loss": 0.3255, "step": 8970 }, { "epoch": 0.726749837977965, "grad_norm": 0.03245696797966957, "learning_rate": 0.00018184886808587247, "loss": 0.2953, "step": 8971 }, { "epoch": 0.7268308489954634, "grad_norm": 0.03241654485464096, "learning_rate": 0.00018184436743327783, "loss": 0.3336, "step": 8972 }, { "epoch": 0.7269118600129617, "grad_norm": 0.032060008496046066, "learning_rate": 0.00018183986678068322, "loss": 0.3119, "step": 8973 }, { "epoch": 0.7269928710304602, "grad_norm": 0.03207783028483391, "learning_rate": 0.00018183536612808858, "loss": 0.3818, "step": 8974 }, { "epoch": 0.7270738820479585, "grad_norm": 0.03175025433301926, "learning_rate": 0.00018183086547549394, "loss": 0.3239, "step": 8975 }, { "epoch": 0.7271548930654569, "grad_norm": 0.030761821195483208, "learning_rate": 0.00018182636482289932, "loss": 0.3228, "step": 8976 }, { "epoch": 0.7272359040829552, "grad_norm": 0.03948160260915756, "learning_rate": 0.0001818218641703047, "loss": 0.4037, "step": 8977 }, { "epoch": 0.7273169151004537, "grad_norm": 0.032243337482213974, "learning_rate": 0.00018181736351771007, "loss": 0.3502, "step": 8978 }, { "epoch": 0.7273979261179521, "grad_norm": 0.03181379660964012, "learning_rate": 0.00018181286286511546, "loss": 0.3384, "step": 8979 }, { "epoch": 0.7274789371354504, "grad_norm": 0.034360308200120926, "learning_rate": 0.00018180836221252082, "loss": 0.3473, "step": 8980 }, { "epoch": 0.7275599481529488, "grad_norm": 0.03012571483850479, "learning_rate": 0.00018180386155992618, "loss": 0.3076, "step": 8981 }, { "epoch": 0.7276409591704471, "grad_norm": 0.03172267973423004, "learning_rate": 0.00018179936090733157, "loss": 0.3409, "step": 8982 }, { "epoch": 0.7277219701879456, "grad_norm": 0.03266200050711632, "learning_rate": 0.00018179486025473695, "loss": 0.3604, "step": 8983 }, { "epoch": 0.727802981205444, "grad_norm": 0.03298585116863251, "learning_rate": 0.0001817903596021423, "loss": 0.3856, "step": 8984 }, { "epoch": 0.7278839922229423, "grad_norm": 0.032270126044750214, "learning_rate": 0.0001817858589495477, "loss": 0.3444, "step": 8985 }, { "epoch": 0.7279650032404407, "grad_norm": 0.030016066506505013, "learning_rate": 0.00018178135829695306, "loss": 0.3222, "step": 8986 }, { "epoch": 0.7280460142579391, "grad_norm": 0.038993485271930695, "learning_rate": 0.00018177685764435842, "loss": 0.3631, "step": 8987 }, { "epoch": 0.7281270252754375, "grad_norm": 0.03430574759840965, "learning_rate": 0.0001817723569917638, "loss": 0.338, "step": 8988 }, { "epoch": 0.7282080362929358, "grad_norm": 0.031583987176418304, "learning_rate": 0.0001817678563391692, "loss": 0.3672, "step": 8989 }, { "epoch": 0.7282890473104342, "grad_norm": 0.02773544006049633, "learning_rate": 0.00018176335568657455, "loss": 0.3033, "step": 8990 }, { "epoch": 0.7283700583279326, "grad_norm": 0.03437868878245354, "learning_rate": 0.00018175885503397994, "loss": 0.3621, "step": 8991 }, { "epoch": 0.728451069345431, "grad_norm": 0.03022439405322075, "learning_rate": 0.0001817543543813853, "loss": 0.3064, "step": 8992 }, { "epoch": 0.7285320803629294, "grad_norm": 0.032135121524333954, "learning_rate": 0.00018174985372879066, "loss": 0.3562, "step": 8993 }, { "epoch": 0.7286130913804277, "grad_norm": 0.03385743498802185, "learning_rate": 0.00018174535307619608, "loss": 0.3733, "step": 8994 }, { "epoch": 0.7286941023979261, "grad_norm": 0.031596675515174866, "learning_rate": 0.00018174085242360144, "loss": 0.3298, "step": 8995 }, { "epoch": 0.7287751134154244, "grad_norm": 0.029566364362835884, "learning_rate": 0.0001817363517710068, "loss": 0.3548, "step": 8996 }, { "epoch": 0.7288561244329229, "grad_norm": 0.031259965151548386, "learning_rate": 0.00018173185111841218, "loss": 0.3474, "step": 8997 }, { "epoch": 0.7289371354504213, "grad_norm": 0.03048304282128811, "learning_rate": 0.00018172735046581754, "loss": 0.3289, "step": 8998 }, { "epoch": 0.7290181464679196, "grad_norm": 0.03218503296375275, "learning_rate": 0.0001817228498132229, "loss": 0.2826, "step": 8999 }, { "epoch": 0.729099157485418, "grad_norm": 0.030204620212316513, "learning_rate": 0.00018171834916062832, "loss": 0.3565, "step": 9000 }, { "epoch": 0.7291801685029164, "grad_norm": 0.03479839116334915, "learning_rate": 0.00018171384850803368, "loss": 0.3533, "step": 9001 }, { "epoch": 0.7292611795204148, "grad_norm": 0.035329870879650116, "learning_rate": 0.00018170934785543904, "loss": 0.3319, "step": 9002 }, { "epoch": 0.7293421905379132, "grad_norm": 0.03221248835325241, "learning_rate": 0.00018170484720284443, "loss": 0.3336, "step": 9003 }, { "epoch": 0.7294232015554115, "grad_norm": 0.031972408294677734, "learning_rate": 0.00018170034655024979, "loss": 0.3342, "step": 9004 }, { "epoch": 0.7295042125729099, "grad_norm": 0.03126239404082298, "learning_rate": 0.00018169584589765517, "loss": 0.3445, "step": 9005 }, { "epoch": 0.7295852235904083, "grad_norm": 0.030894698575139046, "learning_rate": 0.00018169134524506056, "loss": 0.3464, "step": 9006 }, { "epoch": 0.7296662346079067, "grad_norm": 0.032019056379795074, "learning_rate": 0.00018168684459246592, "loss": 0.3407, "step": 9007 }, { "epoch": 0.729747245625405, "grad_norm": 0.03275280445814133, "learning_rate": 0.00018168234393987128, "loss": 0.3025, "step": 9008 }, { "epoch": 0.7298282566429034, "grad_norm": 0.02743900939822197, "learning_rate": 0.00018167784328727667, "loss": 0.3116, "step": 9009 }, { "epoch": 0.7299092676604018, "grad_norm": 0.02820882946252823, "learning_rate": 0.00018167334263468203, "loss": 0.2914, "step": 9010 }, { "epoch": 0.7299902786779002, "grad_norm": 0.03033355250954628, "learning_rate": 0.00018166884198208741, "loss": 0.3626, "step": 9011 }, { "epoch": 0.7300712896953986, "grad_norm": 0.03902474045753479, "learning_rate": 0.0001816643413294928, "loss": 0.3859, "step": 9012 }, { "epoch": 0.7301523007128969, "grad_norm": 0.03890910744667053, "learning_rate": 0.00018165984067689816, "loss": 0.3857, "step": 9013 }, { "epoch": 0.7302333117303953, "grad_norm": 0.03715941682457924, "learning_rate": 0.00018165534002430352, "loss": 0.3821, "step": 9014 }, { "epoch": 0.7303143227478938, "grad_norm": 0.039741143584251404, "learning_rate": 0.0001816508393717089, "loss": 0.3778, "step": 9015 }, { "epoch": 0.7303953337653921, "grad_norm": 0.03291528299450874, "learning_rate": 0.00018164633871911427, "loss": 0.3162, "step": 9016 }, { "epoch": 0.7304763447828905, "grad_norm": 0.03132300078868866, "learning_rate": 0.00018164183806651966, "loss": 0.3653, "step": 9017 }, { "epoch": 0.7305573558003888, "grad_norm": 0.03031282313168049, "learning_rate": 0.00018163733741392504, "loss": 0.3354, "step": 9018 }, { "epoch": 0.7306383668178872, "grad_norm": 0.03178204596042633, "learning_rate": 0.0001816328367613304, "loss": 0.324, "step": 9019 }, { "epoch": 0.7307193778353857, "grad_norm": 0.03401080146431923, "learning_rate": 0.00018162833610873576, "loss": 0.3569, "step": 9020 }, { "epoch": 0.730800388852884, "grad_norm": 0.03225165233016014, "learning_rate": 0.00018162383545614115, "loss": 0.3675, "step": 9021 }, { "epoch": 0.7308813998703824, "grad_norm": 0.03994085639715195, "learning_rate": 0.0001816193348035465, "loss": 0.3501, "step": 9022 }, { "epoch": 0.7309624108878807, "grad_norm": 0.03080485202372074, "learning_rate": 0.0001816148341509519, "loss": 0.3166, "step": 9023 }, { "epoch": 0.7310434219053791, "grad_norm": 0.03139037266373634, "learning_rate": 0.00018161033349835728, "loss": 0.3216, "step": 9024 }, { "epoch": 0.7311244329228775, "grad_norm": 0.03789443522691727, "learning_rate": 0.00018160583284576264, "loss": 0.3481, "step": 9025 }, { "epoch": 0.7312054439403759, "grad_norm": 0.029310574755072594, "learning_rate": 0.000181601332193168, "loss": 0.2941, "step": 9026 }, { "epoch": 0.7312864549578743, "grad_norm": 0.03206866607069969, "learning_rate": 0.0001815968315405734, "loss": 0.3174, "step": 9027 }, { "epoch": 0.7313674659753726, "grad_norm": 0.031277187168598175, "learning_rate": 0.00018159233088797875, "loss": 0.2838, "step": 9028 }, { "epoch": 0.7314484769928711, "grad_norm": 0.03462287411093712, "learning_rate": 0.00018158783023538414, "loss": 0.367, "step": 9029 }, { "epoch": 0.7315294880103694, "grad_norm": 0.031755078583955765, "learning_rate": 0.00018158332958278953, "loss": 0.3271, "step": 9030 }, { "epoch": 0.7316104990278678, "grad_norm": 0.03572135418653488, "learning_rate": 0.00018157882893019489, "loss": 0.3138, "step": 9031 }, { "epoch": 0.7316915100453661, "grad_norm": 0.03189965337514877, "learning_rate": 0.00018157432827760025, "loss": 0.2958, "step": 9032 }, { "epoch": 0.7317725210628645, "grad_norm": 0.033261414617300034, "learning_rate": 0.00018156982762500563, "loss": 0.3294, "step": 9033 }, { "epoch": 0.731853532080363, "grad_norm": 0.0299600288271904, "learning_rate": 0.000181565326972411, "loss": 0.3578, "step": 9034 }, { "epoch": 0.7319345430978613, "grad_norm": 0.0337442122399807, "learning_rate": 0.00018156082631981638, "loss": 0.3425, "step": 9035 }, { "epoch": 0.7320155541153597, "grad_norm": 0.0325690433382988, "learning_rate": 0.00018155632566722177, "loss": 0.3504, "step": 9036 }, { "epoch": 0.732096565132858, "grad_norm": 0.029540935531258583, "learning_rate": 0.00018155182501462713, "loss": 0.3249, "step": 9037 }, { "epoch": 0.7321775761503565, "grad_norm": 0.03266843780875206, "learning_rate": 0.0001815473243620325, "loss": 0.3712, "step": 9038 }, { "epoch": 0.7322585871678549, "grad_norm": 0.031022358685731888, "learning_rate": 0.00018154282370943788, "loss": 0.3624, "step": 9039 }, { "epoch": 0.7323395981853532, "grad_norm": 0.03409833088517189, "learning_rate": 0.00018153832305684324, "loss": 0.3533, "step": 9040 }, { "epoch": 0.7324206092028516, "grad_norm": 0.02747887372970581, "learning_rate": 0.00018153382240424862, "loss": 0.2489, "step": 9041 }, { "epoch": 0.7325016202203499, "grad_norm": 0.03128967061638832, "learning_rate": 0.000181529321751654, "loss": 0.3403, "step": 9042 }, { "epoch": 0.7325826312378484, "grad_norm": 0.03193770721554756, "learning_rate": 0.00018152482109905937, "loss": 0.3551, "step": 9043 }, { "epoch": 0.7326636422553467, "grad_norm": 0.03336536884307861, "learning_rate": 0.00018152032044646473, "loss": 0.3658, "step": 9044 }, { "epoch": 0.7327446532728451, "grad_norm": 0.029813924804329872, "learning_rate": 0.00018151581979387012, "loss": 0.3186, "step": 9045 }, { "epoch": 0.7328256642903435, "grad_norm": 0.03379664942622185, "learning_rate": 0.0001815113191412755, "loss": 0.3077, "step": 9046 }, { "epoch": 0.7329066753078418, "grad_norm": 0.032446689903736115, "learning_rate": 0.00018150681848868086, "loss": 0.3538, "step": 9047 }, { "epoch": 0.7329876863253403, "grad_norm": 0.03147300332784653, "learning_rate": 0.00018150231783608625, "loss": 0.3358, "step": 9048 }, { "epoch": 0.7330686973428386, "grad_norm": 0.031243788078427315, "learning_rate": 0.0001814978171834916, "loss": 0.324, "step": 9049 }, { "epoch": 0.733149708360337, "grad_norm": 0.031456124037504196, "learning_rate": 0.00018149331653089697, "loss": 0.354, "step": 9050 }, { "epoch": 0.7332307193778353, "grad_norm": 0.030385632067918777, "learning_rate": 0.00018148881587830236, "loss": 0.3145, "step": 9051 }, { "epoch": 0.7333117303953338, "grad_norm": 0.03331984207034111, "learning_rate": 0.00018148431522570775, "loss": 0.3716, "step": 9052 }, { "epoch": 0.7333927414128322, "grad_norm": 0.02952803298830986, "learning_rate": 0.0001814798145731131, "loss": 0.3105, "step": 9053 }, { "epoch": 0.7334737524303305, "grad_norm": 0.031981199979782104, "learning_rate": 0.0001814753139205185, "loss": 0.369, "step": 9054 }, { "epoch": 0.7335547634478289, "grad_norm": 0.03284860774874687, "learning_rate": 0.00018147081326792385, "loss": 0.3362, "step": 9055 }, { "epoch": 0.7336357744653272, "grad_norm": 0.03138614073395729, "learning_rate": 0.0001814663126153292, "loss": 0.3132, "step": 9056 }, { "epoch": 0.7337167854828257, "grad_norm": 0.032481104135513306, "learning_rate": 0.0001814618119627346, "loss": 0.3668, "step": 9057 }, { "epoch": 0.7337977965003241, "grad_norm": 0.0347520187497139, "learning_rate": 0.00018145731131014, "loss": 0.327, "step": 9058 }, { "epoch": 0.7338788075178224, "grad_norm": 0.034793466329574585, "learning_rate": 0.00018145281065754535, "loss": 0.3724, "step": 9059 }, { "epoch": 0.7339598185353208, "grad_norm": 0.04012209549546242, "learning_rate": 0.00018144831000495073, "loss": 0.346, "step": 9060 }, { "epoch": 0.7340408295528191, "grad_norm": 0.029469860717654228, "learning_rate": 0.0001814438093523561, "loss": 0.3149, "step": 9061 }, { "epoch": 0.7341218405703176, "grad_norm": 0.028441384434700012, "learning_rate": 0.00018143930869976145, "loss": 0.2956, "step": 9062 }, { "epoch": 0.734202851587816, "grad_norm": 0.03293197229504585, "learning_rate": 0.00018143480804716684, "loss": 0.3505, "step": 9063 }, { "epoch": 0.7342838626053143, "grad_norm": 0.03378473222255707, "learning_rate": 0.00018143030739457223, "loss": 0.3419, "step": 9064 }, { "epoch": 0.7343648736228127, "grad_norm": 0.0314219705760479, "learning_rate": 0.0001814258067419776, "loss": 0.3001, "step": 9065 }, { "epoch": 0.7344458846403111, "grad_norm": 0.029219215735793114, "learning_rate": 0.00018142130608938298, "loss": 0.3235, "step": 9066 }, { "epoch": 0.7345268956578095, "grad_norm": 0.029307449236512184, "learning_rate": 0.00018141680543678834, "loss": 0.3082, "step": 9067 }, { "epoch": 0.7346079066753078, "grad_norm": 0.029544701799750328, "learning_rate": 0.0001814123047841937, "loss": 0.3379, "step": 9068 }, { "epoch": 0.7346889176928062, "grad_norm": 0.039076659828424454, "learning_rate": 0.00018140780413159908, "loss": 0.377, "step": 9069 }, { "epoch": 0.7347699287103046, "grad_norm": 0.029530270025134087, "learning_rate": 0.00018140330347900447, "loss": 0.3219, "step": 9070 }, { "epoch": 0.734850939727803, "grad_norm": 0.03346060961484909, "learning_rate": 0.00018139880282640983, "loss": 0.3302, "step": 9071 }, { "epoch": 0.7349319507453014, "grad_norm": 0.03790099173784256, "learning_rate": 0.00018139430217381522, "loss": 0.3431, "step": 9072 }, { "epoch": 0.7350129617627997, "grad_norm": 0.031614601612091064, "learning_rate": 0.00018138980152122058, "loss": 0.3386, "step": 9073 }, { "epoch": 0.7350939727802981, "grad_norm": 0.031407468020915985, "learning_rate": 0.00018138530086862596, "loss": 0.3031, "step": 9074 }, { "epoch": 0.7351749837977966, "grad_norm": 0.03491174429655075, "learning_rate": 0.00018138080021603135, "loss": 0.3829, "step": 9075 }, { "epoch": 0.7352559948152949, "grad_norm": 0.03396368399262428, "learning_rate": 0.0001813762995634367, "loss": 0.3787, "step": 9076 }, { "epoch": 0.7353370058327933, "grad_norm": 0.037365756928920746, "learning_rate": 0.00018137179891084207, "loss": 0.3225, "step": 9077 }, { "epoch": 0.7354180168502916, "grad_norm": 0.031209466978907585, "learning_rate": 0.00018136729825824746, "loss": 0.3593, "step": 9078 }, { "epoch": 0.73549902786779, "grad_norm": 0.0321301631629467, "learning_rate": 0.00018136279760565282, "loss": 0.3472, "step": 9079 }, { "epoch": 0.7355800388852884, "grad_norm": 0.031743042171001434, "learning_rate": 0.0001813582969530582, "loss": 0.3606, "step": 9080 }, { "epoch": 0.7356610499027868, "grad_norm": 0.030633676797151566, "learning_rate": 0.0001813537963004636, "loss": 0.3498, "step": 9081 }, { "epoch": 0.7357420609202852, "grad_norm": 0.031029678881168365, "learning_rate": 0.00018134929564786895, "loss": 0.3216, "step": 9082 }, { "epoch": 0.7358230719377835, "grad_norm": 0.030585667118430138, "learning_rate": 0.00018134479499527431, "loss": 0.3398, "step": 9083 }, { "epoch": 0.7359040829552819, "grad_norm": 0.031699955463409424, "learning_rate": 0.0001813402943426797, "loss": 0.3434, "step": 9084 }, { "epoch": 0.7359850939727803, "grad_norm": 0.03134704753756523, "learning_rate": 0.00018133579369008506, "loss": 0.3384, "step": 9085 }, { "epoch": 0.7360661049902787, "grad_norm": 0.03340228646993637, "learning_rate": 0.00018133129303749045, "loss": 0.3712, "step": 9086 }, { "epoch": 0.736147116007777, "grad_norm": 0.03207501396536827, "learning_rate": 0.00018132679238489584, "loss": 0.3361, "step": 9087 }, { "epoch": 0.7362281270252754, "grad_norm": 0.03203867748379707, "learning_rate": 0.0001813222917323012, "loss": 0.3264, "step": 9088 }, { "epoch": 0.7363091380427739, "grad_norm": 0.028404178097844124, "learning_rate": 0.00018131779107970656, "loss": 0.3341, "step": 9089 }, { "epoch": 0.7363901490602722, "grad_norm": 0.028629284352064133, "learning_rate": 0.00018131329042711194, "loss": 0.3268, "step": 9090 }, { "epoch": 0.7364711600777706, "grad_norm": 0.03248591721057892, "learning_rate": 0.0001813087897745173, "loss": 0.3821, "step": 9091 }, { "epoch": 0.7365521710952689, "grad_norm": 0.0325838178396225, "learning_rate": 0.0001813042891219227, "loss": 0.359, "step": 9092 }, { "epoch": 0.7366331821127673, "grad_norm": 0.034078460186719894, "learning_rate": 0.00018129978846932808, "loss": 0.3479, "step": 9093 }, { "epoch": 0.7367141931302658, "grad_norm": 0.04527908191084862, "learning_rate": 0.00018129528781673344, "loss": 0.37, "step": 9094 }, { "epoch": 0.7367952041477641, "grad_norm": 0.031279996037483215, "learning_rate": 0.0001812907871641388, "loss": 0.3571, "step": 9095 }, { "epoch": 0.7368762151652625, "grad_norm": 0.02940979227423668, "learning_rate": 0.00018128628651154418, "loss": 0.3211, "step": 9096 }, { "epoch": 0.7369572261827608, "grad_norm": 0.03120758943259716, "learning_rate": 0.00018128178585894954, "loss": 0.365, "step": 9097 }, { "epoch": 0.7370382372002592, "grad_norm": 0.029934510588645935, "learning_rate": 0.00018127728520635493, "loss": 0.3587, "step": 9098 }, { "epoch": 0.7371192482177576, "grad_norm": 0.02927226759493351, "learning_rate": 0.00018127278455376032, "loss": 0.3283, "step": 9099 }, { "epoch": 0.737200259235256, "grad_norm": 0.03264165297150612, "learning_rate": 0.00018126828390116568, "loss": 0.3582, "step": 9100 }, { "epoch": 0.7372812702527544, "grad_norm": 0.027434570714831352, "learning_rate": 0.00018126378324857104, "loss": 0.3311, "step": 9101 }, { "epoch": 0.7373622812702527, "grad_norm": 0.02903290092945099, "learning_rate": 0.00018125928259597643, "loss": 0.3428, "step": 9102 }, { "epoch": 0.7374432922877512, "grad_norm": 0.03369829058647156, "learning_rate": 0.00018125478194338179, "loss": 0.3104, "step": 9103 }, { "epoch": 0.7375243033052495, "grad_norm": 0.03176895156502724, "learning_rate": 0.00018125028129078717, "loss": 0.3222, "step": 9104 }, { "epoch": 0.7376053143227479, "grad_norm": 0.03280607610940933, "learning_rate": 0.00018124578063819256, "loss": 0.3583, "step": 9105 }, { "epoch": 0.7376863253402463, "grad_norm": 0.033629160374403, "learning_rate": 0.00018124127998559792, "loss": 0.3244, "step": 9106 }, { "epoch": 0.7377673363577446, "grad_norm": 0.03247423097491264, "learning_rate": 0.00018123677933300328, "loss": 0.3439, "step": 9107 }, { "epoch": 0.7378483473752431, "grad_norm": 0.04204316437244415, "learning_rate": 0.00018123227868040867, "loss": 0.3505, "step": 9108 }, { "epoch": 0.7379293583927414, "grad_norm": 0.03633909299969673, "learning_rate": 0.00018122777802781403, "loss": 0.3615, "step": 9109 }, { "epoch": 0.7380103694102398, "grad_norm": 0.031753137707710266, "learning_rate": 0.00018122327737521941, "loss": 0.3329, "step": 9110 }, { "epoch": 0.7380913804277381, "grad_norm": 0.033632081001996994, "learning_rate": 0.0001812187767226248, "loss": 0.3854, "step": 9111 }, { "epoch": 0.7381723914452365, "grad_norm": 0.03247777372598648, "learning_rate": 0.00018121427607003016, "loss": 0.3689, "step": 9112 }, { "epoch": 0.738253402462735, "grad_norm": 0.033727072179317474, "learning_rate": 0.00018120977541743552, "loss": 0.39, "step": 9113 }, { "epoch": 0.7383344134802333, "grad_norm": 0.03229176253080368, "learning_rate": 0.0001812052747648409, "loss": 0.3513, "step": 9114 }, { "epoch": 0.7384154244977317, "grad_norm": 0.028008291497826576, "learning_rate": 0.00018120077411224627, "loss": 0.3261, "step": 9115 }, { "epoch": 0.73849643551523, "grad_norm": 0.03333825245499611, "learning_rate": 0.00018119627345965166, "loss": 0.3434, "step": 9116 }, { "epoch": 0.7385774465327285, "grad_norm": 0.033771634101867676, "learning_rate": 0.00018119177280705704, "loss": 0.3979, "step": 9117 }, { "epoch": 0.7386584575502269, "grad_norm": 0.02980533428490162, "learning_rate": 0.0001811872721544624, "loss": 0.3241, "step": 9118 }, { "epoch": 0.7387394685677252, "grad_norm": 0.030567575246095657, "learning_rate": 0.00018118277150186776, "loss": 0.3496, "step": 9119 }, { "epoch": 0.7388204795852236, "grad_norm": 0.03218156844377518, "learning_rate": 0.00018117827084927315, "loss": 0.3302, "step": 9120 }, { "epoch": 0.7389014906027219, "grad_norm": 0.02872970513999462, "learning_rate": 0.0001811737701966785, "loss": 0.2925, "step": 9121 }, { "epoch": 0.7389825016202204, "grad_norm": 0.0280693881213665, "learning_rate": 0.0001811692695440839, "loss": 0.2888, "step": 9122 }, { "epoch": 0.7390635126377187, "grad_norm": 0.03380297124385834, "learning_rate": 0.00018116476889148928, "loss": 0.3334, "step": 9123 }, { "epoch": 0.7391445236552171, "grad_norm": 0.029702387750148773, "learning_rate": 0.00018116026823889464, "loss": 0.3116, "step": 9124 }, { "epoch": 0.7392255346727155, "grad_norm": 0.03500824049115181, "learning_rate": 0.0001811557675863, "loss": 0.3944, "step": 9125 }, { "epoch": 0.7393065456902139, "grad_norm": 0.03508531674742699, "learning_rate": 0.0001811512669337054, "loss": 0.3393, "step": 9126 }, { "epoch": 0.7393875567077123, "grad_norm": 0.03019407019019127, "learning_rate": 0.00018114676628111078, "loss": 0.3625, "step": 9127 }, { "epoch": 0.7394685677252106, "grad_norm": 0.02897045575082302, "learning_rate": 0.00018114226562851614, "loss": 0.3062, "step": 9128 }, { "epoch": 0.739549578742709, "grad_norm": 0.03262624889612198, "learning_rate": 0.00018113776497592153, "loss": 0.3678, "step": 9129 }, { "epoch": 0.7396305897602073, "grad_norm": 0.031078308820724487, "learning_rate": 0.0001811332643233269, "loss": 0.3602, "step": 9130 }, { "epoch": 0.7397116007777058, "grad_norm": 0.034019969403743744, "learning_rate": 0.00018112876367073225, "loss": 0.33, "step": 9131 }, { "epoch": 0.7397926117952042, "grad_norm": 0.033825770020484924, "learning_rate": 0.00018112426301813763, "loss": 0.3672, "step": 9132 }, { "epoch": 0.7398736228127025, "grad_norm": 0.03215663880109787, "learning_rate": 0.00018111976236554302, "loss": 0.3426, "step": 9133 }, { "epoch": 0.7399546338302009, "grad_norm": 0.031370263546705246, "learning_rate": 0.00018111526171294838, "loss": 0.3459, "step": 9134 }, { "epoch": 0.7400356448476992, "grad_norm": 0.031213058158755302, "learning_rate": 0.00018111076106035377, "loss": 0.3464, "step": 9135 }, { "epoch": 0.7401166558651977, "grad_norm": 0.031317610293626785, "learning_rate": 0.00018110626040775913, "loss": 0.3533, "step": 9136 }, { "epoch": 0.7401976668826961, "grad_norm": 0.028887102380394936, "learning_rate": 0.00018110175975516452, "loss": 0.2923, "step": 9137 }, { "epoch": 0.7402786779001944, "grad_norm": 0.030203672125935555, "learning_rate": 0.00018109725910256988, "loss": 0.3149, "step": 9138 }, { "epoch": 0.7403596889176928, "grad_norm": 0.03250570967793465, "learning_rate": 0.00018109275844997526, "loss": 0.3381, "step": 9139 }, { "epoch": 0.7404406999351912, "grad_norm": 0.02843714877963066, "learning_rate": 0.00018108825779738062, "loss": 0.2859, "step": 9140 }, { "epoch": 0.7405217109526896, "grad_norm": 0.038352809846401215, "learning_rate": 0.000181083757144786, "loss": 0.3449, "step": 9141 }, { "epoch": 0.740602721970188, "grad_norm": 0.030800314620137215, "learning_rate": 0.00018107925649219137, "loss": 0.3251, "step": 9142 }, { "epoch": 0.7406837329876863, "grad_norm": 0.031433526426553726, "learning_rate": 0.00018107475583959676, "loss": 0.3338, "step": 9143 }, { "epoch": 0.7407647440051847, "grad_norm": 0.038461629301309586, "learning_rate": 0.00018107025518700212, "loss": 0.3632, "step": 9144 }, { "epoch": 0.7408457550226831, "grad_norm": 0.03290737792849541, "learning_rate": 0.0001810657545344075, "loss": 0.3538, "step": 9145 }, { "epoch": 0.7409267660401815, "grad_norm": 0.02991539239883423, "learning_rate": 0.00018106125388181286, "loss": 0.3025, "step": 9146 }, { "epoch": 0.7410077770576798, "grad_norm": 0.03495603799819946, "learning_rate": 0.00018105675322921825, "loss": 0.3859, "step": 9147 }, { "epoch": 0.7410887880751782, "grad_norm": 0.031578708440065384, "learning_rate": 0.0001810522525766236, "loss": 0.3463, "step": 9148 }, { "epoch": 0.7411697990926766, "grad_norm": 0.031590063124895096, "learning_rate": 0.000181047751924029, "loss": 0.3333, "step": 9149 }, { "epoch": 0.741250810110175, "grad_norm": 0.03381989896297455, "learning_rate": 0.00018104325127143439, "loss": 0.3665, "step": 9150 }, { "epoch": 0.7413318211276734, "grad_norm": 0.03611378371715546, "learning_rate": 0.00018103875061883975, "loss": 0.3665, "step": 9151 }, { "epoch": 0.7414128321451717, "grad_norm": 0.03055768832564354, "learning_rate": 0.0001810342499662451, "loss": 0.3569, "step": 9152 }, { "epoch": 0.7414938431626701, "grad_norm": 0.03822949901223183, "learning_rate": 0.0001810297493136505, "loss": 0.3644, "step": 9153 }, { "epoch": 0.7415748541801686, "grad_norm": 0.03513355553150177, "learning_rate": 0.00018102524866105585, "loss": 0.3741, "step": 9154 }, { "epoch": 0.7416558651976669, "grad_norm": 0.028875524178147316, "learning_rate": 0.00018102074800846124, "loss": 0.2756, "step": 9155 }, { "epoch": 0.7417368762151653, "grad_norm": 0.03183237463235855, "learning_rate": 0.00018101624735586663, "loss": 0.3582, "step": 9156 }, { "epoch": 0.7418178872326636, "grad_norm": 0.03398773819208145, "learning_rate": 0.000181011746703272, "loss": 0.362, "step": 9157 }, { "epoch": 0.741898898250162, "grad_norm": 0.03188676759600639, "learning_rate": 0.00018100724605067735, "loss": 0.3328, "step": 9158 }, { "epoch": 0.7419799092676604, "grad_norm": 0.03117707185447216, "learning_rate": 0.00018100274539808273, "loss": 0.3177, "step": 9159 }, { "epoch": 0.7420609202851588, "grad_norm": 0.03517724573612213, "learning_rate": 0.0001809982447454881, "loss": 0.3785, "step": 9160 }, { "epoch": 0.7421419313026572, "grad_norm": 0.03325748071074486, "learning_rate": 0.00018099374409289348, "loss": 0.3567, "step": 9161 }, { "epoch": 0.7422229423201555, "grad_norm": 0.03816870599985123, "learning_rate": 0.00018098924344029887, "loss": 0.4106, "step": 9162 }, { "epoch": 0.7423039533376539, "grad_norm": 0.032118070870637894, "learning_rate": 0.00018098474278770423, "loss": 0.399, "step": 9163 }, { "epoch": 0.7423849643551523, "grad_norm": 0.032386768609285355, "learning_rate": 0.0001809802421351096, "loss": 0.3135, "step": 9164 }, { "epoch": 0.7424659753726507, "grad_norm": 0.0341571681201458, "learning_rate": 0.00018097574148251498, "loss": 0.3677, "step": 9165 }, { "epoch": 0.742546986390149, "grad_norm": 0.03333856537938118, "learning_rate": 0.00018097124082992034, "loss": 0.3186, "step": 9166 }, { "epoch": 0.7426279974076474, "grad_norm": 0.028618505224585533, "learning_rate": 0.00018096674017732572, "loss": 0.3031, "step": 9167 }, { "epoch": 0.7427090084251459, "grad_norm": 0.029666319489479065, "learning_rate": 0.0001809622395247311, "loss": 0.3142, "step": 9168 }, { "epoch": 0.7427900194426442, "grad_norm": 0.036109957844018936, "learning_rate": 0.00018095773887213647, "loss": 0.3375, "step": 9169 }, { "epoch": 0.7428710304601426, "grad_norm": 0.03363575413823128, "learning_rate": 0.00018095323821954183, "loss": 0.3346, "step": 9170 }, { "epoch": 0.7429520414776409, "grad_norm": 0.03813802823424339, "learning_rate": 0.00018094873756694722, "loss": 0.3885, "step": 9171 }, { "epoch": 0.7430330524951393, "grad_norm": 0.03305772319436073, "learning_rate": 0.00018094423691435258, "loss": 0.3521, "step": 9172 }, { "epoch": 0.7431140635126378, "grad_norm": 0.031648457050323486, "learning_rate": 0.00018093973626175797, "loss": 0.3652, "step": 9173 }, { "epoch": 0.7431950745301361, "grad_norm": 0.03677235171198845, "learning_rate": 0.00018093523560916335, "loss": 0.3655, "step": 9174 }, { "epoch": 0.7432760855476345, "grad_norm": 0.032715436071157455, "learning_rate": 0.0001809307349565687, "loss": 0.3218, "step": 9175 }, { "epoch": 0.7433570965651328, "grad_norm": 0.028857214376330376, "learning_rate": 0.00018092623430397407, "loss": 0.3114, "step": 9176 }, { "epoch": 0.7434381075826313, "grad_norm": 0.029268058016896248, "learning_rate": 0.00018092173365137946, "loss": 0.3041, "step": 9177 }, { "epoch": 0.7435191186001296, "grad_norm": 0.03330998495221138, "learning_rate": 0.00018091723299878482, "loss": 0.3391, "step": 9178 }, { "epoch": 0.743600129617628, "grad_norm": 0.038648154586553574, "learning_rate": 0.0001809127323461902, "loss": 0.4163, "step": 9179 }, { "epoch": 0.7436811406351264, "grad_norm": 0.028641097247600555, "learning_rate": 0.0001809082316935956, "loss": 0.3762, "step": 9180 }, { "epoch": 0.7437621516526247, "grad_norm": 0.032774269580841064, "learning_rate": 0.00018090373104100095, "loss": 0.3515, "step": 9181 }, { "epoch": 0.7438431626701232, "grad_norm": 0.029507745057344437, "learning_rate": 0.00018089923038840631, "loss": 0.3251, "step": 9182 }, { "epoch": 0.7439241736876215, "grad_norm": 0.030811132863163948, "learning_rate": 0.0001808947297358117, "loss": 0.305, "step": 9183 }, { "epoch": 0.7440051847051199, "grad_norm": 0.029300684109330177, "learning_rate": 0.00018089022908321706, "loss": 0.2801, "step": 9184 }, { "epoch": 0.7440861957226182, "grad_norm": 0.03540259227156639, "learning_rate": 0.00018088572843062245, "loss": 0.3535, "step": 9185 }, { "epoch": 0.7441672067401166, "grad_norm": 0.03168850019574165, "learning_rate": 0.00018088122777802784, "loss": 0.3148, "step": 9186 }, { "epoch": 0.7442482177576151, "grad_norm": 0.035167254507541656, "learning_rate": 0.0001808767271254332, "loss": 0.3692, "step": 9187 }, { "epoch": 0.7443292287751134, "grad_norm": 0.032082077115774155, "learning_rate": 0.00018087222647283856, "loss": 0.357, "step": 9188 }, { "epoch": 0.7444102397926118, "grad_norm": 0.037227094173431396, "learning_rate": 0.00018086772582024394, "loss": 0.4317, "step": 9189 }, { "epoch": 0.7444912508101101, "grad_norm": 0.029042791575193405, "learning_rate": 0.0001808632251676493, "loss": 0.3345, "step": 9190 }, { "epoch": 0.7445722618276086, "grad_norm": 0.031363196671009064, "learning_rate": 0.0001808587245150547, "loss": 0.2798, "step": 9191 }, { "epoch": 0.744653272845107, "grad_norm": 0.03424050286412239, "learning_rate": 0.00018085422386246008, "loss": 0.4305, "step": 9192 }, { "epoch": 0.7447342838626053, "grad_norm": 0.03143888711929321, "learning_rate": 0.00018084972320986544, "loss": 0.3572, "step": 9193 }, { "epoch": 0.7448152948801037, "grad_norm": 0.02935538813471794, "learning_rate": 0.0001808452225572708, "loss": 0.319, "step": 9194 }, { "epoch": 0.744896305897602, "grad_norm": 0.029158979654312134, "learning_rate": 0.00018084072190467618, "loss": 0.3371, "step": 9195 }, { "epoch": 0.7449773169151005, "grad_norm": 0.030298082157969475, "learning_rate": 0.00018083622125208154, "loss": 0.3047, "step": 9196 }, { "epoch": 0.7450583279325989, "grad_norm": 0.034861814230680466, "learning_rate": 0.00018083172059948693, "loss": 0.3294, "step": 9197 }, { "epoch": 0.7451393389500972, "grad_norm": 0.027019919827580452, "learning_rate": 0.00018082721994689232, "loss": 0.2687, "step": 9198 }, { "epoch": 0.7452203499675956, "grad_norm": 0.028821589425206184, "learning_rate": 0.00018082271929429768, "loss": 0.3444, "step": 9199 }, { "epoch": 0.7453013609850939, "grad_norm": 0.03376943618059158, "learning_rate": 0.00018081821864170304, "loss": 0.3363, "step": 9200 }, { "epoch": 0.7453823720025924, "grad_norm": 0.029261022806167603, "learning_rate": 0.00018081371798910843, "loss": 0.3549, "step": 9201 }, { "epoch": 0.7454633830200907, "grad_norm": 0.03731764853000641, "learning_rate": 0.0001808092173365138, "loss": 0.3923, "step": 9202 }, { "epoch": 0.7455443940375891, "grad_norm": 0.03250044956803322, "learning_rate": 0.00018080471668391917, "loss": 0.3124, "step": 9203 }, { "epoch": 0.7456254050550875, "grad_norm": 0.032679181545972824, "learning_rate": 0.00018080021603132456, "loss": 0.3426, "step": 9204 }, { "epoch": 0.7457064160725859, "grad_norm": 0.034462928771972656, "learning_rate": 0.00018079571537872992, "loss": 0.343, "step": 9205 }, { "epoch": 0.7457874270900843, "grad_norm": 0.03392226994037628, "learning_rate": 0.0001807912147261353, "loss": 0.3827, "step": 9206 }, { "epoch": 0.7458684381075826, "grad_norm": 0.02565544657409191, "learning_rate": 0.00018078671407354067, "loss": 0.2889, "step": 9207 }, { "epoch": 0.745949449125081, "grad_norm": 0.03116597980260849, "learning_rate": 0.00018078221342094605, "loss": 0.353, "step": 9208 }, { "epoch": 0.7460304601425793, "grad_norm": 0.037584736943244934, "learning_rate": 0.00018077771276835141, "loss": 0.2985, "step": 9209 }, { "epoch": 0.7461114711600778, "grad_norm": 0.03558696433901787, "learning_rate": 0.0001807732121157568, "loss": 0.3611, "step": 9210 }, { "epoch": 0.7461924821775762, "grad_norm": 0.027648333460092545, "learning_rate": 0.00018076871146316216, "loss": 0.3073, "step": 9211 }, { "epoch": 0.7462734931950745, "grad_norm": 0.0350969135761261, "learning_rate": 0.00018076421081056755, "loss": 0.3525, "step": 9212 }, { "epoch": 0.7463545042125729, "grad_norm": 0.04231669008731842, "learning_rate": 0.0001807597101579729, "loss": 0.3325, "step": 9213 }, { "epoch": 0.7464355152300713, "grad_norm": 0.02886810339987278, "learning_rate": 0.0001807552095053783, "loss": 0.33, "step": 9214 }, { "epoch": 0.7465165262475697, "grad_norm": 0.035336822271347046, "learning_rate": 0.00018075070885278366, "loss": 0.3424, "step": 9215 }, { "epoch": 0.7465975372650681, "grad_norm": 0.026189319789409637, "learning_rate": 0.00018074620820018904, "loss": 0.301, "step": 9216 }, { "epoch": 0.7466785482825664, "grad_norm": 0.029928090050816536, "learning_rate": 0.0001807417075475944, "loss": 0.3352, "step": 9217 }, { "epoch": 0.7467595593000648, "grad_norm": 0.030840082094073296, "learning_rate": 0.0001807372068949998, "loss": 0.3578, "step": 9218 }, { "epoch": 0.7468405703175632, "grad_norm": 0.03217003121972084, "learning_rate": 0.00018073270624240515, "loss": 0.3904, "step": 9219 }, { "epoch": 0.7469215813350616, "grad_norm": 0.028606245294213295, "learning_rate": 0.00018072820558981054, "loss": 0.3082, "step": 9220 }, { "epoch": 0.74700259235256, "grad_norm": 0.028930075466632843, "learning_rate": 0.0001807237049372159, "loss": 0.3107, "step": 9221 }, { "epoch": 0.7470836033700583, "grad_norm": 0.03044108673930168, "learning_rate": 0.00018071920428462129, "loss": 0.3099, "step": 9222 }, { "epoch": 0.7471646143875567, "grad_norm": 0.02919185906648636, "learning_rate": 0.00018071470363202665, "loss": 0.3367, "step": 9223 }, { "epoch": 0.7472456254050551, "grad_norm": 0.03341258689761162, "learning_rate": 0.00018071020297943203, "loss": 0.3635, "step": 9224 }, { "epoch": 0.7473266364225535, "grad_norm": 0.03471376374363899, "learning_rate": 0.0001807057023268374, "loss": 0.3618, "step": 9225 }, { "epoch": 0.7474076474400518, "grad_norm": 0.03482995182275772, "learning_rate": 0.00018070120167424278, "loss": 0.3518, "step": 9226 }, { "epoch": 0.7474886584575502, "grad_norm": 0.033793773502111435, "learning_rate": 0.00018069670102164814, "loss": 0.3788, "step": 9227 }, { "epoch": 0.7475696694750487, "grad_norm": 0.03680787235498428, "learning_rate": 0.00018069220036905353, "loss": 0.3649, "step": 9228 }, { "epoch": 0.747650680492547, "grad_norm": 0.03281555324792862, "learning_rate": 0.0001806876997164589, "loss": 0.3062, "step": 9229 }, { "epoch": 0.7477316915100454, "grad_norm": 0.03725738078355789, "learning_rate": 0.00018068319906386427, "loss": 0.3573, "step": 9230 }, { "epoch": 0.7478127025275437, "grad_norm": 0.02526703290641308, "learning_rate": 0.00018067869841126966, "loss": 0.2774, "step": 9231 }, { "epoch": 0.7478937135450421, "grad_norm": 0.030975675210356712, "learning_rate": 0.00018067419775867502, "loss": 0.3356, "step": 9232 }, { "epoch": 0.7479747245625405, "grad_norm": 0.030292998999357224, "learning_rate": 0.00018066969710608038, "loss": 0.3183, "step": 9233 }, { "epoch": 0.7480557355800389, "grad_norm": 0.03010285459458828, "learning_rate": 0.00018066519645348577, "loss": 0.3192, "step": 9234 }, { "epoch": 0.7481367465975373, "grad_norm": 0.03180127218365669, "learning_rate": 0.00018066069580089113, "loss": 0.3286, "step": 9235 }, { "epoch": 0.7482177576150356, "grad_norm": 0.03309743478894234, "learning_rate": 0.00018065619514829652, "loss": 0.3836, "step": 9236 }, { "epoch": 0.748298768632534, "grad_norm": 0.02979223243892193, "learning_rate": 0.0001806516944957019, "loss": 0.2981, "step": 9237 }, { "epoch": 0.7483797796500324, "grad_norm": 0.0348726287484169, "learning_rate": 0.00018064719384310726, "loss": 0.3966, "step": 9238 }, { "epoch": 0.7484607906675308, "grad_norm": 0.03552888706326485, "learning_rate": 0.00018064269319051262, "loss": 0.3175, "step": 9239 }, { "epoch": 0.7485418016850292, "grad_norm": 0.03508378192782402, "learning_rate": 0.000180638192537918, "loss": 0.3756, "step": 9240 }, { "epoch": 0.7486228127025275, "grad_norm": 0.03183341398835182, "learning_rate": 0.00018063369188532337, "loss": 0.3585, "step": 9241 }, { "epoch": 0.748703823720026, "grad_norm": 0.035623688250780106, "learning_rate": 0.00018062919123272876, "loss": 0.3623, "step": 9242 }, { "epoch": 0.7487848347375243, "grad_norm": 0.03101072646677494, "learning_rate": 0.00018062469058013414, "loss": 0.3117, "step": 9243 }, { "epoch": 0.7488658457550227, "grad_norm": 0.03312772512435913, "learning_rate": 0.0001806201899275395, "loss": 0.315, "step": 9244 }, { "epoch": 0.748946856772521, "grad_norm": 0.03041679970920086, "learning_rate": 0.00018061568927494486, "loss": 0.3006, "step": 9245 }, { "epoch": 0.7490278677900194, "grad_norm": 0.03130163997411728, "learning_rate": 0.00018061118862235025, "loss": 0.3769, "step": 9246 }, { "epoch": 0.7491088788075179, "grad_norm": 0.031065113842487335, "learning_rate": 0.0001806066879697556, "loss": 0.314, "step": 9247 }, { "epoch": 0.7491898898250162, "grad_norm": 0.0288732647895813, "learning_rate": 0.000180602187317161, "loss": 0.2995, "step": 9248 }, { "epoch": 0.7492709008425146, "grad_norm": 0.036610234528779984, "learning_rate": 0.00018059768666456639, "loss": 0.3862, "step": 9249 }, { "epoch": 0.7493519118600129, "grad_norm": 0.029909661039710045, "learning_rate": 0.00018059318601197175, "loss": 0.2936, "step": 9250 }, { "epoch": 0.7494329228775113, "grad_norm": 0.029548456892371178, "learning_rate": 0.0001805886853593771, "loss": 0.3241, "step": 9251 }, { "epoch": 0.7495139338950098, "grad_norm": 0.03577359765768051, "learning_rate": 0.0001805841847067825, "loss": 0.3205, "step": 9252 }, { "epoch": 0.7495949449125081, "grad_norm": 0.02838653326034546, "learning_rate": 0.00018057968405418785, "loss": 0.3168, "step": 9253 }, { "epoch": 0.7496759559300065, "grad_norm": 0.034319330006837845, "learning_rate": 0.00018057518340159324, "loss": 0.354, "step": 9254 }, { "epoch": 0.7497569669475048, "grad_norm": 0.033737730234861374, "learning_rate": 0.00018057068274899863, "loss": 0.392, "step": 9255 }, { "epoch": 0.7498379779650033, "grad_norm": 0.029997693374753, "learning_rate": 0.000180566182096404, "loss": 0.3458, "step": 9256 }, { "epoch": 0.7499189889825016, "grad_norm": 0.030124330893158913, "learning_rate": 0.00018056168144380935, "loss": 0.3336, "step": 9257 }, { "epoch": 0.75, "grad_norm": 0.03392377868294716, "learning_rate": 0.00018055718079121473, "loss": 0.3515, "step": 9258 }, { "epoch": 0.7500810110174984, "grad_norm": 0.03187362104654312, "learning_rate": 0.0001805526801386201, "loss": 0.3582, "step": 9259 }, { "epoch": 0.7501620220349967, "grad_norm": 0.03177661448717117, "learning_rate": 0.00018054817948602548, "loss": 0.3308, "step": 9260 }, { "epoch": 0.7502430330524952, "grad_norm": 0.02912510186433792, "learning_rate": 0.00018054367883343087, "loss": 0.3492, "step": 9261 }, { "epoch": 0.7503240440699935, "grad_norm": 0.027753859758377075, "learning_rate": 0.00018053917818083623, "loss": 0.3043, "step": 9262 }, { "epoch": 0.7504050550874919, "grad_norm": 0.03045106679201126, "learning_rate": 0.0001805346775282416, "loss": 0.3586, "step": 9263 }, { "epoch": 0.7504860661049902, "grad_norm": 0.03325005993247032, "learning_rate": 0.00018053017687564698, "loss": 0.3423, "step": 9264 }, { "epoch": 0.7505670771224887, "grad_norm": 0.029894059523940086, "learning_rate": 0.00018052567622305234, "loss": 0.3433, "step": 9265 }, { "epoch": 0.7506480881399871, "grad_norm": 0.03508707880973816, "learning_rate": 0.00018052117557045772, "loss": 0.3774, "step": 9266 }, { "epoch": 0.7507290991574854, "grad_norm": 0.02966720052063465, "learning_rate": 0.0001805166749178631, "loss": 0.2835, "step": 9267 }, { "epoch": 0.7508101101749838, "grad_norm": 0.03271722048521042, "learning_rate": 0.00018051217426526847, "loss": 0.3535, "step": 9268 }, { "epoch": 0.7508911211924821, "grad_norm": 0.03025810234248638, "learning_rate": 0.00018050767361267383, "loss": 0.3233, "step": 9269 }, { "epoch": 0.7509721322099806, "grad_norm": 0.030399255454540253, "learning_rate": 0.00018050317296007922, "loss": 0.3071, "step": 9270 }, { "epoch": 0.751053143227479, "grad_norm": 0.034914761781692505, "learning_rate": 0.00018049867230748458, "loss": 0.3435, "step": 9271 }, { "epoch": 0.7511341542449773, "grad_norm": 0.028581790626049042, "learning_rate": 0.00018049417165488997, "loss": 0.3035, "step": 9272 }, { "epoch": 0.7512151652624757, "grad_norm": 0.034693315625190735, "learning_rate": 0.00018048967100229535, "loss": 0.3154, "step": 9273 }, { "epoch": 0.751296176279974, "grad_norm": 0.030193015933036804, "learning_rate": 0.0001804851703497007, "loss": 0.3567, "step": 9274 }, { "epoch": 0.7513771872974725, "grad_norm": 0.03147919476032257, "learning_rate": 0.0001804806696971061, "loss": 0.3303, "step": 9275 }, { "epoch": 0.7514581983149708, "grad_norm": 0.037018414586782455, "learning_rate": 0.00018047616904451146, "loss": 0.351, "step": 9276 }, { "epoch": 0.7515392093324692, "grad_norm": 0.028499385342001915, "learning_rate": 0.00018047166839191682, "loss": 0.2793, "step": 9277 }, { "epoch": 0.7516202203499676, "grad_norm": 0.029187791049480438, "learning_rate": 0.0001804671677393222, "loss": 0.3322, "step": 9278 }, { "epoch": 0.751701231367466, "grad_norm": 0.034434039145708084, "learning_rate": 0.0001804626670867276, "loss": 0.3262, "step": 9279 }, { "epoch": 0.7517822423849644, "grad_norm": 0.033068154007196426, "learning_rate": 0.00018045816643413295, "loss": 0.3139, "step": 9280 }, { "epoch": 0.7518632534024627, "grad_norm": 0.03756197914481163, "learning_rate": 0.00018045366578153834, "loss": 0.3722, "step": 9281 }, { "epoch": 0.7519442644199611, "grad_norm": 0.03321055322885513, "learning_rate": 0.0001804491651289437, "loss": 0.319, "step": 9282 }, { "epoch": 0.7520252754374595, "grad_norm": 0.03404540941119194, "learning_rate": 0.0001804446644763491, "loss": 0.3463, "step": 9283 }, { "epoch": 0.7521062864549579, "grad_norm": 0.03360103443264961, "learning_rate": 0.00018044016382375445, "loss": 0.3457, "step": 9284 }, { "epoch": 0.7521872974724563, "grad_norm": 0.03434904292225838, "learning_rate": 0.00018043566317115984, "loss": 0.333, "step": 9285 }, { "epoch": 0.7522683084899546, "grad_norm": 0.029737526550889015, "learning_rate": 0.0001804311625185652, "loss": 0.3167, "step": 9286 }, { "epoch": 0.752349319507453, "grad_norm": 0.03503742441534996, "learning_rate": 0.00018042666186597058, "loss": 0.3429, "step": 9287 }, { "epoch": 0.7524303305249513, "grad_norm": 0.030850261449813843, "learning_rate": 0.00018042216121337594, "loss": 0.3236, "step": 9288 }, { "epoch": 0.7525113415424498, "grad_norm": 0.03333286568522453, "learning_rate": 0.00018041766056078133, "loss": 0.3372, "step": 9289 }, { "epoch": 0.7525923525599482, "grad_norm": 0.03200223669409752, "learning_rate": 0.0001804131599081867, "loss": 0.3486, "step": 9290 }, { "epoch": 0.7526733635774465, "grad_norm": 0.0319368951022625, "learning_rate": 0.00018040865925559208, "loss": 0.3156, "step": 9291 }, { "epoch": 0.7527543745949449, "grad_norm": 0.030817804858088493, "learning_rate": 0.00018040415860299744, "loss": 0.3128, "step": 9292 }, { "epoch": 0.7528353856124433, "grad_norm": 0.03215906769037247, "learning_rate": 0.00018039965795040282, "loss": 0.3327, "step": 9293 }, { "epoch": 0.7529163966299417, "grad_norm": 0.03907868638634682, "learning_rate": 0.00018039515729780818, "loss": 0.3626, "step": 9294 }, { "epoch": 0.75299740764744, "grad_norm": 0.02961132489144802, "learning_rate": 0.00018039065664521357, "loss": 0.3254, "step": 9295 }, { "epoch": 0.7530784186649384, "grad_norm": 0.03202762454748154, "learning_rate": 0.00018038615599261893, "loss": 0.3301, "step": 9296 }, { "epoch": 0.7531594296824368, "grad_norm": 0.036983706057071686, "learning_rate": 0.00018038165534002432, "loss": 0.3997, "step": 9297 }, { "epoch": 0.7532404406999352, "grad_norm": 0.031749822199344635, "learning_rate": 0.00018037715468742968, "loss": 0.3464, "step": 9298 }, { "epoch": 0.7533214517174336, "grad_norm": 0.030583178624510765, "learning_rate": 0.00018037265403483507, "loss": 0.3031, "step": 9299 }, { "epoch": 0.7534024627349319, "grad_norm": 0.02886773645877838, "learning_rate": 0.00018036815338224043, "loss": 0.3287, "step": 9300 }, { "epoch": 0.7534834737524303, "grad_norm": 0.03687857836484909, "learning_rate": 0.0001803636527296458, "loss": 0.354, "step": 9301 }, { "epoch": 0.7535644847699287, "grad_norm": 0.03391767665743828, "learning_rate": 0.00018035915207705117, "loss": 0.3836, "step": 9302 }, { "epoch": 0.7536454957874271, "grad_norm": 0.02986670285463333, "learning_rate": 0.00018035465142445656, "loss": 0.2838, "step": 9303 }, { "epoch": 0.7537265068049255, "grad_norm": 0.034182414412498474, "learning_rate": 0.00018035015077186192, "loss": 0.3159, "step": 9304 }, { "epoch": 0.7538075178224238, "grad_norm": 0.03301328048110008, "learning_rate": 0.0001803456501192673, "loss": 0.3639, "step": 9305 }, { "epoch": 0.7538885288399222, "grad_norm": 0.030668480321764946, "learning_rate": 0.00018034114946667267, "loss": 0.3159, "step": 9306 }, { "epoch": 0.7539695398574207, "grad_norm": 0.03611016646027565, "learning_rate": 0.00018033664881407805, "loss": 0.3313, "step": 9307 }, { "epoch": 0.754050550874919, "grad_norm": 0.030725689604878426, "learning_rate": 0.00018033214816148342, "loss": 0.3352, "step": 9308 }, { "epoch": 0.7541315618924174, "grad_norm": 0.0315089076757431, "learning_rate": 0.0001803276475088888, "loss": 0.3145, "step": 9309 }, { "epoch": 0.7542125729099157, "grad_norm": 0.02995385229587555, "learning_rate": 0.00018032314685629416, "loss": 0.3134, "step": 9310 }, { "epoch": 0.7542935839274141, "grad_norm": 0.02921799197793007, "learning_rate": 0.00018031864620369955, "loss": 0.361, "step": 9311 }, { "epoch": 0.7543745949449125, "grad_norm": 0.033073678612709045, "learning_rate": 0.00018031414555110494, "loss": 0.3352, "step": 9312 }, { "epoch": 0.7544556059624109, "grad_norm": 0.03068387322127819, "learning_rate": 0.0001803096448985103, "loss": 0.3258, "step": 9313 }, { "epoch": 0.7545366169799093, "grad_norm": 0.031815964728593826, "learning_rate": 0.00018030514424591566, "loss": 0.3541, "step": 9314 }, { "epoch": 0.7546176279974076, "grad_norm": 0.03938791900873184, "learning_rate": 0.00018030064359332104, "loss": 0.3434, "step": 9315 }, { "epoch": 0.7546986390149061, "grad_norm": 0.03221413120627403, "learning_rate": 0.0001802961429407264, "loss": 0.3123, "step": 9316 }, { "epoch": 0.7547796500324044, "grad_norm": 0.030221721157431602, "learning_rate": 0.0001802916422881318, "loss": 0.3206, "step": 9317 }, { "epoch": 0.7548606610499028, "grad_norm": 0.029876358807086945, "learning_rate": 0.00018028714163553718, "loss": 0.3194, "step": 9318 }, { "epoch": 0.7549416720674011, "grad_norm": 0.03315850347280502, "learning_rate": 0.00018028264098294254, "loss": 0.3337, "step": 9319 }, { "epoch": 0.7550226830848995, "grad_norm": 0.03478917479515076, "learning_rate": 0.0001802781403303479, "loss": 0.3457, "step": 9320 }, { "epoch": 0.755103694102398, "grad_norm": 0.03456339240074158, "learning_rate": 0.00018027363967775329, "loss": 0.3242, "step": 9321 }, { "epoch": 0.7551847051198963, "grad_norm": 0.03251144289970398, "learning_rate": 0.00018026913902515865, "loss": 0.3594, "step": 9322 }, { "epoch": 0.7552657161373947, "grad_norm": 0.0323588103055954, "learning_rate": 0.00018026463837256403, "loss": 0.3321, "step": 9323 }, { "epoch": 0.755346727154893, "grad_norm": 0.034761857241392136, "learning_rate": 0.00018026013771996942, "loss": 0.3711, "step": 9324 }, { "epoch": 0.7554277381723914, "grad_norm": 0.032338641583919525, "learning_rate": 0.00018025563706737478, "loss": 0.3045, "step": 9325 }, { "epoch": 0.7555087491898899, "grad_norm": 0.030599815770983696, "learning_rate": 0.00018025113641478014, "loss": 0.3054, "step": 9326 }, { "epoch": 0.7555897602073882, "grad_norm": 0.03238639235496521, "learning_rate": 0.00018024663576218553, "loss": 0.3283, "step": 9327 }, { "epoch": 0.7556707712248866, "grad_norm": 0.0294541846960783, "learning_rate": 0.0001802421351095909, "loss": 0.3159, "step": 9328 }, { "epoch": 0.7557517822423849, "grad_norm": 0.03155055642127991, "learning_rate": 0.00018023763445699627, "loss": 0.2916, "step": 9329 }, { "epoch": 0.7558327932598834, "grad_norm": 0.02897864580154419, "learning_rate": 0.00018023313380440166, "loss": 0.2919, "step": 9330 }, { "epoch": 0.7559138042773818, "grad_norm": 0.03341088071465492, "learning_rate": 0.00018022863315180702, "loss": 0.3239, "step": 9331 }, { "epoch": 0.7559948152948801, "grad_norm": 0.03133474290370941, "learning_rate": 0.00018022413249921238, "loss": 0.371, "step": 9332 }, { "epoch": 0.7560758263123785, "grad_norm": 0.034195493906736374, "learning_rate": 0.00018021963184661777, "loss": 0.3424, "step": 9333 }, { "epoch": 0.7561568373298768, "grad_norm": 0.026915594935417175, "learning_rate": 0.00018021513119402313, "loss": 0.2903, "step": 9334 }, { "epoch": 0.7562378483473753, "grad_norm": 0.030386967584490776, "learning_rate": 0.00018021063054142852, "loss": 0.334, "step": 9335 }, { "epoch": 0.7563188593648736, "grad_norm": 0.029541438445448875, "learning_rate": 0.0001802061298888339, "loss": 0.2776, "step": 9336 }, { "epoch": 0.756399870382372, "grad_norm": 0.03158644214272499, "learning_rate": 0.00018020162923623926, "loss": 0.3009, "step": 9337 }, { "epoch": 0.7564808813998704, "grad_norm": 0.03073226846754551, "learning_rate": 0.00018019712858364462, "loss": 0.3201, "step": 9338 }, { "epoch": 0.7565618924173687, "grad_norm": 0.0377579927444458, "learning_rate": 0.00018019262793105, "loss": 0.3696, "step": 9339 }, { "epoch": 0.7566429034348672, "grad_norm": 0.03365955501794815, "learning_rate": 0.00018018812727845537, "loss": 0.3081, "step": 9340 }, { "epoch": 0.7567239144523655, "grad_norm": 0.032374121248722076, "learning_rate": 0.00018018362662586076, "loss": 0.343, "step": 9341 }, { "epoch": 0.7568049254698639, "grad_norm": 0.03342792019248009, "learning_rate": 0.00018017912597326614, "loss": 0.3669, "step": 9342 }, { "epoch": 0.7568859364873622, "grad_norm": 0.03184104710817337, "learning_rate": 0.0001801746253206715, "loss": 0.3532, "step": 9343 }, { "epoch": 0.7569669475048607, "grad_norm": 0.03416401892900467, "learning_rate": 0.0001801701246680769, "loss": 0.4207, "step": 9344 }, { "epoch": 0.7570479585223591, "grad_norm": 0.03579508140683174, "learning_rate": 0.00018016562401548225, "loss": 0.3759, "step": 9345 }, { "epoch": 0.7571289695398574, "grad_norm": 0.03344917297363281, "learning_rate": 0.0001801611233628876, "loss": 0.3761, "step": 9346 }, { "epoch": 0.7572099805573558, "grad_norm": 0.03486419841647148, "learning_rate": 0.000180156622710293, "loss": 0.3805, "step": 9347 }, { "epoch": 0.7572909915748541, "grad_norm": 0.03962663188576698, "learning_rate": 0.00018015212205769839, "loss": 0.3267, "step": 9348 }, { "epoch": 0.7573720025923526, "grad_norm": 0.03494442626833916, "learning_rate": 0.00018014762140510375, "loss": 0.3187, "step": 9349 }, { "epoch": 0.757453013609851, "grad_norm": 0.02679494582116604, "learning_rate": 0.00018014312075250913, "loss": 0.292, "step": 9350 }, { "epoch": 0.7575340246273493, "grad_norm": 0.03419162333011627, "learning_rate": 0.0001801386200999145, "loss": 0.3537, "step": 9351 }, { "epoch": 0.7576150356448477, "grad_norm": 0.031362127512693405, "learning_rate": 0.00018013411944731985, "loss": 0.3032, "step": 9352 }, { "epoch": 0.7576960466623461, "grad_norm": 0.03217475116252899, "learning_rate": 0.00018012961879472524, "loss": 0.3378, "step": 9353 }, { "epoch": 0.7577770576798445, "grad_norm": 0.030293717980384827, "learning_rate": 0.00018012511814213063, "loss": 0.3262, "step": 9354 }, { "epoch": 0.7578580686973428, "grad_norm": 0.03205201402306557, "learning_rate": 0.000180120617489536, "loss": 0.3296, "step": 9355 }, { "epoch": 0.7579390797148412, "grad_norm": 0.03602740168571472, "learning_rate": 0.00018011611683694138, "loss": 0.3174, "step": 9356 }, { "epoch": 0.7580200907323396, "grad_norm": 0.029463456943631172, "learning_rate": 0.00018011161618434674, "loss": 0.2948, "step": 9357 }, { "epoch": 0.758101101749838, "grad_norm": 0.03281380981206894, "learning_rate": 0.0001801071155317521, "loss": 0.3417, "step": 9358 }, { "epoch": 0.7581821127673364, "grad_norm": 0.031789641827344894, "learning_rate": 0.00018010261487915748, "loss": 0.3238, "step": 9359 }, { "epoch": 0.7582631237848347, "grad_norm": 0.03468538448214531, "learning_rate": 0.00018009811422656287, "loss": 0.3624, "step": 9360 }, { "epoch": 0.7583441348023331, "grad_norm": 0.02855679579079151, "learning_rate": 0.00018009361357396823, "loss": 0.2951, "step": 9361 }, { "epoch": 0.7584251458198314, "grad_norm": 0.03410002589225769, "learning_rate": 0.00018008911292137362, "loss": 0.3651, "step": 9362 }, { "epoch": 0.7585061568373299, "grad_norm": 0.037260934710502625, "learning_rate": 0.00018008461226877898, "loss": 0.3393, "step": 9363 }, { "epoch": 0.7585871678548283, "grad_norm": 0.03792154788970947, "learning_rate": 0.00018008011161618436, "loss": 0.3471, "step": 9364 }, { "epoch": 0.7586681788723266, "grad_norm": 0.030598971992731094, "learning_rate": 0.00018007561096358972, "loss": 0.3217, "step": 9365 }, { "epoch": 0.758749189889825, "grad_norm": 0.0321931317448616, "learning_rate": 0.0001800711103109951, "loss": 0.3284, "step": 9366 }, { "epoch": 0.7588302009073234, "grad_norm": 0.032547708600759506, "learning_rate": 0.00018006660965840047, "loss": 0.3484, "step": 9367 }, { "epoch": 0.7589112119248218, "grad_norm": 0.034850314259529114, "learning_rate": 0.00018006210900580586, "loss": 0.3422, "step": 9368 }, { "epoch": 0.7589922229423202, "grad_norm": 0.03074449487030506, "learning_rate": 0.00018005760835321122, "loss": 0.3437, "step": 9369 }, { "epoch": 0.7590732339598185, "grad_norm": 0.03898514434695244, "learning_rate": 0.0001800531077006166, "loss": 0.3742, "step": 9370 }, { "epoch": 0.7591542449773169, "grad_norm": 0.033423904329538345, "learning_rate": 0.00018004860704802197, "loss": 0.3415, "step": 9371 }, { "epoch": 0.7592352559948153, "grad_norm": 0.030887460336089134, "learning_rate": 0.00018004410639542735, "loss": 0.3329, "step": 9372 }, { "epoch": 0.7593162670123137, "grad_norm": 0.031972624361515045, "learning_rate": 0.0001800396057428327, "loss": 0.3321, "step": 9373 }, { "epoch": 0.759397278029812, "grad_norm": 0.03002900630235672, "learning_rate": 0.0001800351050902381, "loss": 0.3058, "step": 9374 }, { "epoch": 0.7594782890473104, "grad_norm": 0.03169652074575424, "learning_rate": 0.00018003060443764346, "loss": 0.3284, "step": 9375 }, { "epoch": 0.7595593000648088, "grad_norm": 0.030031373724341393, "learning_rate": 0.00018002610378504885, "loss": 0.2913, "step": 9376 }, { "epoch": 0.7596403110823072, "grad_norm": 0.029552794992923737, "learning_rate": 0.0001800216031324542, "loss": 0.3228, "step": 9377 }, { "epoch": 0.7597213220998056, "grad_norm": 0.029546670615673065, "learning_rate": 0.0001800171024798596, "loss": 0.3027, "step": 9378 }, { "epoch": 0.7598023331173039, "grad_norm": 0.028050288558006287, "learning_rate": 0.00018001260182726495, "loss": 0.2832, "step": 9379 }, { "epoch": 0.7598833441348023, "grad_norm": 0.03491856902837753, "learning_rate": 0.00018000810117467034, "loss": 0.3216, "step": 9380 }, { "epoch": 0.7599643551523008, "grad_norm": 0.027808241546154022, "learning_rate": 0.0001800036005220757, "loss": 0.2929, "step": 9381 }, { "epoch": 0.7600453661697991, "grad_norm": 0.031897857785224915, "learning_rate": 0.0001799990998694811, "loss": 0.3254, "step": 9382 }, { "epoch": 0.7601263771872975, "grad_norm": 0.03805245831608772, "learning_rate": 0.00017999459921688645, "loss": 0.4039, "step": 9383 }, { "epoch": 0.7602073882047958, "grad_norm": 0.03206348046660423, "learning_rate": 0.00017999009856429184, "loss": 0.3588, "step": 9384 }, { "epoch": 0.7602883992222942, "grad_norm": 0.03059943951666355, "learning_rate": 0.0001799855979116972, "loss": 0.2983, "step": 9385 }, { "epoch": 0.7603694102397927, "grad_norm": 0.03752979636192322, "learning_rate": 0.00017998109725910258, "loss": 0.3583, "step": 9386 }, { "epoch": 0.760450421257291, "grad_norm": 0.03344959393143654, "learning_rate": 0.00017997659660650794, "loss": 0.3581, "step": 9387 }, { "epoch": 0.7605314322747894, "grad_norm": 0.039618462324142456, "learning_rate": 0.00017997209595391333, "loss": 0.3975, "step": 9388 }, { "epoch": 0.7606124432922877, "grad_norm": 0.030214224010705948, "learning_rate": 0.0001799675953013187, "loss": 0.353, "step": 9389 }, { "epoch": 0.7606934543097861, "grad_norm": 0.03098473511636257, "learning_rate": 0.00017996309464872408, "loss": 0.3169, "step": 9390 }, { "epoch": 0.7607744653272845, "grad_norm": 0.029453137889504433, "learning_rate": 0.00017995859399612944, "loss": 0.3098, "step": 9391 }, { "epoch": 0.7608554763447829, "grad_norm": 0.03248664364218712, "learning_rate": 0.00017995409334353482, "loss": 0.3471, "step": 9392 }, { "epoch": 0.7609364873622813, "grad_norm": 0.03373479098081589, "learning_rate": 0.0001799495926909402, "loss": 0.3884, "step": 9393 }, { "epoch": 0.7610174983797796, "grad_norm": 0.03235981985926628, "learning_rate": 0.00017994509203834557, "loss": 0.3485, "step": 9394 }, { "epoch": 0.7610985093972781, "grad_norm": 0.0278353039175272, "learning_rate": 0.00017994059138575093, "loss": 0.3181, "step": 9395 }, { "epoch": 0.7611795204147764, "grad_norm": 0.03313388302922249, "learning_rate": 0.00017993609073315632, "loss": 0.3985, "step": 9396 }, { "epoch": 0.7612605314322748, "grad_norm": 0.0328248105943203, "learning_rate": 0.00017993159008056168, "loss": 0.3568, "step": 9397 }, { "epoch": 0.7613415424497731, "grad_norm": 0.032912254333496094, "learning_rate": 0.00017992708942796707, "loss": 0.3182, "step": 9398 }, { "epoch": 0.7614225534672715, "grad_norm": 0.03135635331273079, "learning_rate": 0.00017992258877537245, "loss": 0.3237, "step": 9399 }, { "epoch": 0.76150356448477, "grad_norm": 0.02806262858211994, "learning_rate": 0.00017991808812277781, "loss": 0.3339, "step": 9400 }, { "epoch": 0.7615845755022683, "grad_norm": 0.033621639013290405, "learning_rate": 0.00017991358747018317, "loss": 0.3376, "step": 9401 }, { "epoch": 0.7616655865197667, "grad_norm": 0.03141626715660095, "learning_rate": 0.00017990908681758856, "loss": 0.3296, "step": 9402 }, { "epoch": 0.761746597537265, "grad_norm": 0.030434491112828255, "learning_rate": 0.00017990458616499392, "loss": 0.2825, "step": 9403 }, { "epoch": 0.7618276085547635, "grad_norm": 0.029175328090786934, "learning_rate": 0.0001799000855123993, "loss": 0.3109, "step": 9404 }, { "epoch": 0.7619086195722619, "grad_norm": 0.031339529901742935, "learning_rate": 0.0001798955848598047, "loss": 0.3267, "step": 9405 }, { "epoch": 0.7619896305897602, "grad_norm": 0.031228726729750633, "learning_rate": 0.00017989108420721006, "loss": 0.3275, "step": 9406 }, { "epoch": 0.7620706416072586, "grad_norm": 0.032652441412210464, "learning_rate": 0.00017988658355461542, "loss": 0.3489, "step": 9407 }, { "epoch": 0.7621516526247569, "grad_norm": 0.03801897168159485, "learning_rate": 0.0001798820829020208, "loss": 0.3678, "step": 9408 }, { "epoch": 0.7622326636422554, "grad_norm": 0.03199564665555954, "learning_rate": 0.00017987758224942616, "loss": 0.3903, "step": 9409 }, { "epoch": 0.7623136746597537, "grad_norm": 0.03358980268239975, "learning_rate": 0.00017987308159683155, "loss": 0.3262, "step": 9410 }, { "epoch": 0.7623946856772521, "grad_norm": 0.03218737989664078, "learning_rate": 0.00017986858094423694, "loss": 0.351, "step": 9411 }, { "epoch": 0.7624756966947505, "grad_norm": 0.028905585408210754, "learning_rate": 0.0001798640802916423, "loss": 0.3422, "step": 9412 }, { "epoch": 0.7625567077122488, "grad_norm": 0.03379735350608826, "learning_rate": 0.00017985957963904768, "loss": 0.3237, "step": 9413 }, { "epoch": 0.7626377187297473, "grad_norm": 0.034211091697216034, "learning_rate": 0.00017985507898645304, "loss": 0.3419, "step": 9414 }, { "epoch": 0.7627187297472456, "grad_norm": 0.03187450021505356, "learning_rate": 0.0001798505783338584, "loss": 0.3476, "step": 9415 }, { "epoch": 0.762799740764744, "grad_norm": 0.031442102044820786, "learning_rate": 0.0001798460776812638, "loss": 0.3068, "step": 9416 }, { "epoch": 0.7628807517822424, "grad_norm": 0.03673580288887024, "learning_rate": 0.00017984157702866918, "loss": 0.3134, "step": 9417 }, { "epoch": 0.7629617627997408, "grad_norm": 0.028350602835416794, "learning_rate": 0.00017983707637607454, "loss": 0.3164, "step": 9418 }, { "epoch": 0.7630427738172392, "grad_norm": 0.03699628636240959, "learning_rate": 0.00017983257572347993, "loss": 0.3638, "step": 9419 }, { "epoch": 0.7631237848347375, "grad_norm": 0.03153945133090019, "learning_rate": 0.00017982807507088529, "loss": 0.3471, "step": 9420 }, { "epoch": 0.7632047958522359, "grad_norm": 0.03328992798924446, "learning_rate": 0.00017982357441829065, "loss": 0.3345, "step": 9421 }, { "epoch": 0.7632858068697342, "grad_norm": 0.03313197195529938, "learning_rate": 0.00017981907376569603, "loss": 0.3277, "step": 9422 }, { "epoch": 0.7633668178872327, "grad_norm": 0.033376265317201614, "learning_rate": 0.00017981457311310142, "loss": 0.3647, "step": 9423 }, { "epoch": 0.7634478289047311, "grad_norm": 0.029434161260724068, "learning_rate": 0.00017981007246050678, "loss": 0.3253, "step": 9424 }, { "epoch": 0.7635288399222294, "grad_norm": 0.035335421562194824, "learning_rate": 0.00017980557180791217, "loss": 0.3435, "step": 9425 }, { "epoch": 0.7636098509397278, "grad_norm": 0.033159878104925156, "learning_rate": 0.00017980107115531753, "loss": 0.3497, "step": 9426 }, { "epoch": 0.7636908619572261, "grad_norm": 0.035425253212451935, "learning_rate": 0.0001797965705027229, "loss": 0.3333, "step": 9427 }, { "epoch": 0.7637718729747246, "grad_norm": 0.032743070274591446, "learning_rate": 0.00017979206985012827, "loss": 0.3062, "step": 9428 }, { "epoch": 0.763852883992223, "grad_norm": 0.032274626195430756, "learning_rate": 0.00017978756919753366, "loss": 0.3587, "step": 9429 }, { "epoch": 0.7639338950097213, "grad_norm": 0.032194703817367554, "learning_rate": 0.00017978306854493902, "loss": 0.3454, "step": 9430 }, { "epoch": 0.7640149060272197, "grad_norm": 0.028805602341890335, "learning_rate": 0.0001797785678923444, "loss": 0.3055, "step": 9431 }, { "epoch": 0.7640959170447181, "grad_norm": 0.03278293088078499, "learning_rate": 0.00017977406723974977, "loss": 0.3308, "step": 9432 }, { "epoch": 0.7641769280622165, "grad_norm": 0.03008844517171383, "learning_rate": 0.00017976956658715513, "loss": 0.3352, "step": 9433 }, { "epoch": 0.7642579390797148, "grad_norm": 0.03664538636803627, "learning_rate": 0.00017976506593456052, "loss": 0.3172, "step": 9434 }, { "epoch": 0.7643389500972132, "grad_norm": 0.034973543137311935, "learning_rate": 0.0001797605652819659, "loss": 0.3243, "step": 9435 }, { "epoch": 0.7644199611147116, "grad_norm": 0.02906326949596405, "learning_rate": 0.00017975606462937126, "loss": 0.3168, "step": 9436 }, { "epoch": 0.76450097213221, "grad_norm": 0.028848815709352493, "learning_rate": 0.00017975156397677665, "loss": 0.3766, "step": 9437 }, { "epoch": 0.7645819831497084, "grad_norm": 0.033257681876420975, "learning_rate": 0.000179747063324182, "loss": 0.3484, "step": 9438 }, { "epoch": 0.7646629941672067, "grad_norm": 0.03166605159640312, "learning_rate": 0.00017974256267158737, "loss": 0.3636, "step": 9439 }, { "epoch": 0.7647440051847051, "grad_norm": 0.03185746818780899, "learning_rate": 0.00017973806201899276, "loss": 0.3032, "step": 9440 }, { "epoch": 0.7648250162022034, "grad_norm": 0.028180088847875595, "learning_rate": 0.00017973356136639814, "loss": 0.3359, "step": 9441 }, { "epoch": 0.7649060272197019, "grad_norm": 0.03395479917526245, "learning_rate": 0.0001797290607138035, "loss": 0.3566, "step": 9442 }, { "epoch": 0.7649870382372003, "grad_norm": 0.03671841323375702, "learning_rate": 0.0001797245600612089, "loss": 0.3645, "step": 9443 }, { "epoch": 0.7650680492546986, "grad_norm": 0.03072306700050831, "learning_rate": 0.00017972005940861425, "loss": 0.295, "step": 9444 }, { "epoch": 0.765149060272197, "grad_norm": 0.03294285759329796, "learning_rate": 0.00017971555875601964, "loss": 0.355, "step": 9445 }, { "epoch": 0.7652300712896954, "grad_norm": 0.03324880078434944, "learning_rate": 0.000179711058103425, "loss": 0.3283, "step": 9446 }, { "epoch": 0.7653110823071938, "grad_norm": 0.02983265370130539, "learning_rate": 0.0001797065574508304, "loss": 0.2818, "step": 9447 }, { "epoch": 0.7653920933246922, "grad_norm": 0.03002902865409851, "learning_rate": 0.00017970205679823575, "loss": 0.3334, "step": 9448 }, { "epoch": 0.7654731043421905, "grad_norm": 0.03378603234887123, "learning_rate": 0.00017969755614564113, "loss": 0.3495, "step": 9449 }, { "epoch": 0.7655541153596889, "grad_norm": 0.02799426205456257, "learning_rate": 0.0001796930554930465, "loss": 0.293, "step": 9450 }, { "epoch": 0.7656351263771873, "grad_norm": 0.0365104004740715, "learning_rate": 0.00017968855484045188, "loss": 0.3561, "step": 9451 }, { "epoch": 0.7657161373946857, "grad_norm": 0.030740009620785713, "learning_rate": 0.00017968405418785724, "loss": 0.3388, "step": 9452 }, { "epoch": 0.765797148412184, "grad_norm": 0.029759861528873444, "learning_rate": 0.00017967955353526263, "loss": 0.2985, "step": 9453 }, { "epoch": 0.7658781594296824, "grad_norm": 0.03421476483345032, "learning_rate": 0.000179675052882668, "loss": 0.3903, "step": 9454 }, { "epoch": 0.7659591704471809, "grad_norm": 0.03636905923485756, "learning_rate": 0.00017967055223007338, "loss": 0.3207, "step": 9455 }, { "epoch": 0.7660401814646792, "grad_norm": 0.030458511784672737, "learning_rate": 0.00017966605157747874, "loss": 0.3277, "step": 9456 }, { "epoch": 0.7661211924821776, "grad_norm": 0.03039832040667534, "learning_rate": 0.00017966155092488412, "loss": 0.3439, "step": 9457 }, { "epoch": 0.7662022034996759, "grad_norm": 0.03035557270050049, "learning_rate": 0.00017965705027228948, "loss": 0.3679, "step": 9458 }, { "epoch": 0.7662832145171743, "grad_norm": 0.028992289677262306, "learning_rate": 0.00017965254961969487, "loss": 0.3216, "step": 9459 }, { "epoch": 0.7663642255346728, "grad_norm": 0.029640663415193558, "learning_rate": 0.00017964804896710023, "loss": 0.3101, "step": 9460 }, { "epoch": 0.7664452365521711, "grad_norm": 0.03412988781929016, "learning_rate": 0.00017964354831450562, "loss": 0.3398, "step": 9461 }, { "epoch": 0.7665262475696695, "grad_norm": 0.02864237129688263, "learning_rate": 0.00017963904766191098, "loss": 0.3201, "step": 9462 }, { "epoch": 0.7666072585871678, "grad_norm": 0.03264410048723221, "learning_rate": 0.00017963454700931636, "loss": 0.3465, "step": 9463 }, { "epoch": 0.7666882696046662, "grad_norm": 0.03233474865555763, "learning_rate": 0.00017963004635672172, "loss": 0.3573, "step": 9464 }, { "epoch": 0.7667692806221647, "grad_norm": 0.03645838797092438, "learning_rate": 0.0001796255457041271, "loss": 0.3595, "step": 9465 }, { "epoch": 0.766850291639663, "grad_norm": 0.03317071124911308, "learning_rate": 0.00017962104505153247, "loss": 0.348, "step": 9466 }, { "epoch": 0.7669313026571614, "grad_norm": 0.02817850187420845, "learning_rate": 0.00017961654439893786, "loss": 0.2987, "step": 9467 }, { "epoch": 0.7670123136746597, "grad_norm": 0.028615185990929604, "learning_rate": 0.00017961204374634325, "loss": 0.29, "step": 9468 }, { "epoch": 0.7670933246921582, "grad_norm": 0.039165839552879333, "learning_rate": 0.0001796075430937486, "loss": 0.3669, "step": 9469 }, { "epoch": 0.7671743357096565, "grad_norm": 0.0342554971575737, "learning_rate": 0.00017960304244115397, "loss": 0.3504, "step": 9470 }, { "epoch": 0.7672553467271549, "grad_norm": 0.030537201091647148, "learning_rate": 0.00017959854178855935, "loss": 0.2925, "step": 9471 }, { "epoch": 0.7673363577446533, "grad_norm": 0.033227209001779556, "learning_rate": 0.0001795940411359647, "loss": 0.3262, "step": 9472 }, { "epoch": 0.7674173687621516, "grad_norm": 0.03859534487128258, "learning_rate": 0.0001795895404833701, "loss": 0.3416, "step": 9473 }, { "epoch": 0.7674983797796501, "grad_norm": 0.031956154853105545, "learning_rate": 0.0001795850398307755, "loss": 0.3614, "step": 9474 }, { "epoch": 0.7675793907971484, "grad_norm": 0.032025307416915894, "learning_rate": 0.00017958053917818085, "loss": 0.3267, "step": 9475 }, { "epoch": 0.7676604018146468, "grad_norm": 0.03213995695114136, "learning_rate": 0.0001795760385255862, "loss": 0.3687, "step": 9476 }, { "epoch": 0.7677414128321451, "grad_norm": 0.03206200152635574, "learning_rate": 0.0001795715378729916, "loss": 0.3219, "step": 9477 }, { "epoch": 0.7678224238496435, "grad_norm": 0.03265627846121788, "learning_rate": 0.00017956703722039695, "loss": 0.3215, "step": 9478 }, { "epoch": 0.767903434867142, "grad_norm": 0.03244243562221527, "learning_rate": 0.00017956253656780234, "loss": 0.3481, "step": 9479 }, { "epoch": 0.7679844458846403, "grad_norm": 0.037454694509506226, "learning_rate": 0.00017955803591520773, "loss": 0.4056, "step": 9480 }, { "epoch": 0.7680654569021387, "grad_norm": 0.03103182651102543, "learning_rate": 0.0001795535352626131, "loss": 0.3104, "step": 9481 }, { "epoch": 0.768146467919637, "grad_norm": 0.03464951366186142, "learning_rate": 0.00017954903461001848, "loss": 0.3621, "step": 9482 }, { "epoch": 0.7682274789371355, "grad_norm": 0.03253443166613579, "learning_rate": 0.00017954453395742384, "loss": 0.3356, "step": 9483 }, { "epoch": 0.7683084899546339, "grad_norm": 0.03156166151165962, "learning_rate": 0.0001795400333048292, "loss": 0.3175, "step": 9484 }, { "epoch": 0.7683895009721322, "grad_norm": 0.030760960653424263, "learning_rate": 0.00017953553265223458, "loss": 0.3421, "step": 9485 }, { "epoch": 0.7684705119896306, "grad_norm": 0.031788572669029236, "learning_rate": 0.00017953103199963997, "loss": 0.2998, "step": 9486 }, { "epoch": 0.7685515230071289, "grad_norm": 0.02940155379474163, "learning_rate": 0.00017952653134704533, "loss": 0.301, "step": 9487 }, { "epoch": 0.7686325340246274, "grad_norm": 0.03467442840337753, "learning_rate": 0.00017952203069445072, "loss": 0.3603, "step": 9488 }, { "epoch": 0.7687135450421257, "grad_norm": 0.0318903923034668, "learning_rate": 0.00017951753004185608, "loss": 0.3347, "step": 9489 }, { "epoch": 0.7687945560596241, "grad_norm": 0.03219975158572197, "learning_rate": 0.00017951302938926144, "loss": 0.3164, "step": 9490 }, { "epoch": 0.7688755670771225, "grad_norm": 0.03198402747511864, "learning_rate": 0.00017950852873666683, "loss": 0.3344, "step": 9491 }, { "epoch": 0.7689565780946209, "grad_norm": 0.029385024681687355, "learning_rate": 0.0001795040280840722, "loss": 0.2955, "step": 9492 }, { "epoch": 0.7690375891121193, "grad_norm": 0.03244990482926369, "learning_rate": 0.00017949952743147757, "loss": 0.3284, "step": 9493 }, { "epoch": 0.7691186001296176, "grad_norm": 0.0280169527977705, "learning_rate": 0.00017949502677888296, "loss": 0.2783, "step": 9494 }, { "epoch": 0.769199611147116, "grad_norm": 0.035642508417367935, "learning_rate": 0.00017949052612628832, "loss": 0.3739, "step": 9495 }, { "epoch": 0.7692806221646143, "grad_norm": 0.03269866853952408, "learning_rate": 0.00017948602547369368, "loss": 0.315, "step": 9496 }, { "epoch": 0.7693616331821128, "grad_norm": 0.03235873579978943, "learning_rate": 0.00017948152482109907, "loss": 0.3678, "step": 9497 }, { "epoch": 0.7694426441996112, "grad_norm": 0.03456562012434006, "learning_rate": 0.00017947702416850445, "loss": 0.322, "step": 9498 }, { "epoch": 0.7695236552171095, "grad_norm": 0.036083102226257324, "learning_rate": 0.00017947252351590981, "loss": 0.3906, "step": 9499 }, { "epoch": 0.7696046662346079, "grad_norm": 0.03269893676042557, "learning_rate": 0.0001794680228633152, "loss": 0.334, "step": 9500 }, { "epoch": 0.7696856772521062, "grad_norm": 0.033791083842515945, "learning_rate": 0.00017946352221072056, "loss": 0.3306, "step": 9501 }, { "epoch": 0.7697666882696047, "grad_norm": 0.03444517031311989, "learning_rate": 0.00017945902155812592, "loss": 0.3183, "step": 9502 }, { "epoch": 0.7698476992871031, "grad_norm": 0.03153405338525772, "learning_rate": 0.0001794545209055313, "loss": 0.3229, "step": 9503 }, { "epoch": 0.7699287103046014, "grad_norm": 0.03601398691534996, "learning_rate": 0.0001794500202529367, "loss": 0.3615, "step": 9504 }, { "epoch": 0.7700097213220998, "grad_norm": 0.03581347316503525, "learning_rate": 0.00017944551960034206, "loss": 0.3464, "step": 9505 }, { "epoch": 0.7700907323395982, "grad_norm": 0.031124750152230263, "learning_rate": 0.00017944101894774744, "loss": 0.3338, "step": 9506 }, { "epoch": 0.7701717433570966, "grad_norm": 0.031031455844640732, "learning_rate": 0.0001794365182951528, "loss": 0.3398, "step": 9507 }, { "epoch": 0.770252754374595, "grad_norm": 0.031076917424798012, "learning_rate": 0.00017943201764255816, "loss": 0.31, "step": 9508 }, { "epoch": 0.7703337653920933, "grad_norm": 0.037329282611608505, "learning_rate": 0.00017942751698996355, "loss": 0.3188, "step": 9509 }, { "epoch": 0.7704147764095917, "grad_norm": 0.036704570055007935, "learning_rate": 0.00017942301633736894, "loss": 0.3305, "step": 9510 }, { "epoch": 0.7704957874270901, "grad_norm": 0.036286912858486176, "learning_rate": 0.0001794185156847743, "loss": 0.3318, "step": 9511 }, { "epoch": 0.7705767984445885, "grad_norm": 0.033339180052280426, "learning_rate": 0.00017941401503217968, "loss": 0.3735, "step": 9512 }, { "epoch": 0.7706578094620868, "grad_norm": 0.026896296069025993, "learning_rate": 0.00017940951437958504, "loss": 0.274, "step": 9513 }, { "epoch": 0.7707388204795852, "grad_norm": 0.03523967042565346, "learning_rate": 0.0001794050137269904, "loss": 0.3305, "step": 9514 }, { "epoch": 0.7708198314970836, "grad_norm": 0.034618232399225235, "learning_rate": 0.0001794005130743958, "loss": 0.3455, "step": 9515 }, { "epoch": 0.770900842514582, "grad_norm": 0.034029290080070496, "learning_rate": 0.00017939601242180118, "loss": 0.3354, "step": 9516 }, { "epoch": 0.7709818535320804, "grad_norm": 0.034780219197273254, "learning_rate": 0.00017939151176920654, "loss": 0.365, "step": 9517 }, { "epoch": 0.7710628645495787, "grad_norm": 0.03206856921315193, "learning_rate": 0.00017938701111661193, "loss": 0.367, "step": 9518 }, { "epoch": 0.7711438755670771, "grad_norm": 0.03862106427550316, "learning_rate": 0.00017938251046401729, "loss": 0.384, "step": 9519 }, { "epoch": 0.7712248865845756, "grad_norm": 0.03470539674162865, "learning_rate": 0.00017937800981142267, "loss": 0.3311, "step": 9520 }, { "epoch": 0.7713058976020739, "grad_norm": 0.02929665707051754, "learning_rate": 0.00017937350915882803, "loss": 0.3018, "step": 9521 }, { "epoch": 0.7713869086195723, "grad_norm": 0.03172649070620537, "learning_rate": 0.00017936900850623342, "loss": 0.3377, "step": 9522 }, { "epoch": 0.7714679196370706, "grad_norm": 0.03501424938440323, "learning_rate": 0.00017936450785363878, "loss": 0.388, "step": 9523 }, { "epoch": 0.771548930654569, "grad_norm": 0.03126747906208038, "learning_rate": 0.00017936000720104417, "loss": 0.3486, "step": 9524 }, { "epoch": 0.7716299416720674, "grad_norm": 0.02895498462021351, "learning_rate": 0.00017935550654844953, "loss": 0.3092, "step": 9525 }, { "epoch": 0.7717109526895658, "grad_norm": 0.03243519365787506, "learning_rate": 0.00017935100589585491, "loss": 0.3233, "step": 9526 }, { "epoch": 0.7717919637070642, "grad_norm": 0.03405696526169777, "learning_rate": 0.00017934650524326027, "loss": 0.2988, "step": 9527 }, { "epoch": 0.7718729747245625, "grad_norm": 0.03410395607352257, "learning_rate": 0.00017934200459066566, "loss": 0.3578, "step": 9528 }, { "epoch": 0.7719539857420609, "grad_norm": 0.030598022043704987, "learning_rate": 0.00017933750393807102, "loss": 0.3091, "step": 9529 }, { "epoch": 0.7720349967595593, "grad_norm": 0.0366281159222126, "learning_rate": 0.0001793330032854764, "loss": 0.3863, "step": 9530 }, { "epoch": 0.7721160077770577, "grad_norm": 0.036652810871601105, "learning_rate": 0.00017932850263288177, "loss": 0.3833, "step": 9531 }, { "epoch": 0.772197018794556, "grad_norm": 0.03007068671286106, "learning_rate": 0.00017932400198028716, "loss": 0.3465, "step": 9532 }, { "epoch": 0.7722780298120544, "grad_norm": 0.031867507845163345, "learning_rate": 0.00017931950132769252, "loss": 0.3191, "step": 9533 }, { "epoch": 0.7723590408295529, "grad_norm": 0.031599272042512894, "learning_rate": 0.0001793150006750979, "loss": 0.3866, "step": 9534 }, { "epoch": 0.7724400518470512, "grad_norm": 0.03572344034910202, "learning_rate": 0.00017931050002250326, "loss": 0.3925, "step": 9535 }, { "epoch": 0.7725210628645496, "grad_norm": 0.033920932561159134, "learning_rate": 0.00017930599936990865, "loss": 0.3545, "step": 9536 }, { "epoch": 0.7726020738820479, "grad_norm": 0.032732460647821426, "learning_rate": 0.000179301498717314, "loss": 0.3351, "step": 9537 }, { "epoch": 0.7726830848995463, "grad_norm": 0.030768388882279396, "learning_rate": 0.0001792969980647194, "loss": 0.3133, "step": 9538 }, { "epoch": 0.7727640959170448, "grad_norm": 0.028435923159122467, "learning_rate": 0.00017929249741212476, "loss": 0.325, "step": 9539 }, { "epoch": 0.7728451069345431, "grad_norm": 0.030626384541392326, "learning_rate": 0.00017928799675953015, "loss": 0.2976, "step": 9540 }, { "epoch": 0.7729261179520415, "grad_norm": 0.03307020291686058, "learning_rate": 0.0001792834961069355, "loss": 0.3515, "step": 9541 }, { "epoch": 0.7730071289695398, "grad_norm": 0.030811350792646408, "learning_rate": 0.0001792789954543409, "loss": 0.3202, "step": 9542 }, { "epoch": 0.7730881399870383, "grad_norm": 0.03180959075689316, "learning_rate": 0.00017927449480174625, "loss": 0.3461, "step": 9543 }, { "epoch": 0.7731691510045366, "grad_norm": 0.03422219678759575, "learning_rate": 0.00017926999414915164, "loss": 0.3214, "step": 9544 }, { "epoch": 0.773250162022035, "grad_norm": 0.03370879590511322, "learning_rate": 0.000179265493496557, "loss": 0.3632, "step": 9545 }, { "epoch": 0.7733311730395334, "grad_norm": 0.028467241674661636, "learning_rate": 0.0001792609928439624, "loss": 0.2876, "step": 9546 }, { "epoch": 0.7734121840570317, "grad_norm": 0.034185174852609634, "learning_rate": 0.00017925649219136775, "loss": 0.3264, "step": 9547 }, { "epoch": 0.7734931950745302, "grad_norm": 0.030525004491209984, "learning_rate": 0.00017925199153877313, "loss": 0.3245, "step": 9548 }, { "epoch": 0.7735742060920285, "grad_norm": 0.03793109208345413, "learning_rate": 0.00017924749088617852, "loss": 0.3661, "step": 9549 }, { "epoch": 0.7736552171095269, "grad_norm": 0.03561738505959511, "learning_rate": 0.00017924299023358388, "loss": 0.397, "step": 9550 }, { "epoch": 0.7737362281270252, "grad_norm": 0.029582394286990166, "learning_rate": 0.00017923848958098927, "loss": 0.2889, "step": 9551 }, { "epoch": 0.7738172391445236, "grad_norm": 0.030550727620720863, "learning_rate": 0.00017923398892839463, "loss": 0.3313, "step": 9552 }, { "epoch": 0.7738982501620221, "grad_norm": 0.040079787373542786, "learning_rate": 0.0001792294882758, "loss": 0.3366, "step": 9553 }, { "epoch": 0.7739792611795204, "grad_norm": 0.033145103603601456, "learning_rate": 0.00017922498762320538, "loss": 0.3652, "step": 9554 }, { "epoch": 0.7740602721970188, "grad_norm": 0.03565409779548645, "learning_rate": 0.00017922048697061076, "loss": 0.3709, "step": 9555 }, { "epoch": 0.7741412832145171, "grad_norm": 0.03129686042666435, "learning_rate": 0.00017921598631801612, "loss": 0.3, "step": 9556 }, { "epoch": 0.7742222942320156, "grad_norm": 0.033627647906541824, "learning_rate": 0.0001792114856654215, "loss": 0.3353, "step": 9557 }, { "epoch": 0.774303305249514, "grad_norm": 0.03130074590444565, "learning_rate": 0.00017920698501282687, "loss": 0.3693, "step": 9558 }, { "epoch": 0.7743843162670123, "grad_norm": 0.031756918877363205, "learning_rate": 0.00017920248436023223, "loss": 0.3226, "step": 9559 }, { "epoch": 0.7744653272845107, "grad_norm": 0.027761923149228096, "learning_rate": 0.00017919798370763762, "loss": 0.3002, "step": 9560 }, { "epoch": 0.774546338302009, "grad_norm": 0.030556004494428635, "learning_rate": 0.000179193483055043, "loss": 0.3502, "step": 9561 }, { "epoch": 0.7746273493195075, "grad_norm": 0.03501565754413605, "learning_rate": 0.00017918898240244836, "loss": 0.3477, "step": 9562 }, { "epoch": 0.7747083603370059, "grad_norm": 0.035923443734645844, "learning_rate": 0.00017918448174985375, "loss": 0.3512, "step": 9563 }, { "epoch": 0.7747893713545042, "grad_norm": 0.037229202687740326, "learning_rate": 0.0001791799810972591, "loss": 0.3648, "step": 9564 }, { "epoch": 0.7748703823720026, "grad_norm": 0.029280126094818115, "learning_rate": 0.00017917548044466447, "loss": 0.3377, "step": 9565 }, { "epoch": 0.7749513933895009, "grad_norm": 0.03008767031133175, "learning_rate": 0.00017917097979206986, "loss": 0.3225, "step": 9566 }, { "epoch": 0.7750324044069994, "grad_norm": 0.030362514778971672, "learning_rate": 0.00017916647913947525, "loss": 0.3643, "step": 9567 }, { "epoch": 0.7751134154244977, "grad_norm": 0.029189782217144966, "learning_rate": 0.0001791619784868806, "loss": 0.3192, "step": 9568 }, { "epoch": 0.7751944264419961, "grad_norm": 0.030873114243149757, "learning_rate": 0.000179157477834286, "loss": 0.3158, "step": 9569 }, { "epoch": 0.7752754374594945, "grad_norm": 0.03154481202363968, "learning_rate": 0.00017915297718169135, "loss": 0.3176, "step": 9570 }, { "epoch": 0.7753564484769929, "grad_norm": 0.03424935042858124, "learning_rate": 0.0001791484765290967, "loss": 0.3161, "step": 9571 }, { "epoch": 0.7754374594944913, "grad_norm": 0.0363914854824543, "learning_rate": 0.0001791439758765021, "loss": 0.3239, "step": 9572 }, { "epoch": 0.7755184705119896, "grad_norm": 0.031175851821899414, "learning_rate": 0.0001791394752239075, "loss": 0.3578, "step": 9573 }, { "epoch": 0.775599481529488, "grad_norm": 0.033968258649110794, "learning_rate": 0.00017913497457131285, "loss": 0.3488, "step": 9574 }, { "epoch": 0.7756804925469863, "grad_norm": 0.03248978033661842, "learning_rate": 0.00017913047391871823, "loss": 0.3245, "step": 9575 }, { "epoch": 0.7757615035644848, "grad_norm": 0.036548539996147156, "learning_rate": 0.0001791259732661236, "loss": 0.3343, "step": 9576 }, { "epoch": 0.7758425145819832, "grad_norm": 0.031858112663030624, "learning_rate": 0.00017912147261352895, "loss": 0.3543, "step": 9577 }, { "epoch": 0.7759235255994815, "grad_norm": 0.032914452254772186, "learning_rate": 0.00017911697196093434, "loss": 0.3456, "step": 9578 }, { "epoch": 0.7760045366169799, "grad_norm": 0.02853773906826973, "learning_rate": 0.00017911247130833973, "loss": 0.3068, "step": 9579 }, { "epoch": 0.7760855476344782, "grad_norm": 0.036762382835149765, "learning_rate": 0.0001791079706557451, "loss": 0.3823, "step": 9580 }, { "epoch": 0.7761665586519767, "grad_norm": 0.0352318175137043, "learning_rate": 0.00017910347000315048, "loss": 0.3595, "step": 9581 }, { "epoch": 0.7762475696694751, "grad_norm": 0.03461692854762077, "learning_rate": 0.00017909896935055584, "loss": 0.3355, "step": 9582 }, { "epoch": 0.7763285806869734, "grad_norm": 0.036282241344451904, "learning_rate": 0.0001790944686979612, "loss": 0.3133, "step": 9583 }, { "epoch": 0.7764095917044718, "grad_norm": 0.03350308537483215, "learning_rate": 0.00017908996804536658, "loss": 0.3294, "step": 9584 }, { "epoch": 0.7764906027219702, "grad_norm": 0.029508352279663086, "learning_rate": 0.00017908546739277197, "loss": 0.325, "step": 9585 }, { "epoch": 0.7765716137394686, "grad_norm": 0.03445051982998848, "learning_rate": 0.00017908096674017733, "loss": 0.3428, "step": 9586 }, { "epoch": 0.776652624756967, "grad_norm": 0.029148763045668602, "learning_rate": 0.00017907646608758272, "loss": 0.3175, "step": 9587 }, { "epoch": 0.7767336357744653, "grad_norm": 0.037063565105199814, "learning_rate": 0.00017907196543498808, "loss": 0.3849, "step": 9588 }, { "epoch": 0.7768146467919637, "grad_norm": 0.033602166920900345, "learning_rate": 0.00017906746478239344, "loss": 0.3114, "step": 9589 }, { "epoch": 0.7768956578094621, "grad_norm": 0.03480584919452667, "learning_rate": 0.00017906296412979883, "loss": 0.3049, "step": 9590 }, { "epoch": 0.7769766688269605, "grad_norm": 0.02770129404962063, "learning_rate": 0.0001790584634772042, "loss": 0.34, "step": 9591 }, { "epoch": 0.7770576798444588, "grad_norm": 0.03278804570436478, "learning_rate": 0.00017905396282460957, "loss": 0.3414, "step": 9592 }, { "epoch": 0.7771386908619572, "grad_norm": 0.02992059662938118, "learning_rate": 0.00017904946217201496, "loss": 0.3476, "step": 9593 }, { "epoch": 0.7772197018794557, "grad_norm": 0.028599180281162262, "learning_rate": 0.00017904496151942032, "loss": 0.3124, "step": 9594 }, { "epoch": 0.777300712896954, "grad_norm": 0.033986471593379974, "learning_rate": 0.00017904046086682568, "loss": 0.3397, "step": 9595 }, { "epoch": 0.7773817239144524, "grad_norm": 0.032153479754924774, "learning_rate": 0.00017903596021423107, "loss": 0.3471, "step": 9596 }, { "epoch": 0.7774627349319507, "grad_norm": 0.030729763209819794, "learning_rate": 0.00017903145956163645, "loss": 0.2989, "step": 9597 }, { "epoch": 0.7775437459494491, "grad_norm": 0.0319899320602417, "learning_rate": 0.00017902695890904181, "loss": 0.3418, "step": 9598 }, { "epoch": 0.7776247569669476, "grad_norm": 0.042746610939502716, "learning_rate": 0.0001790224582564472, "loss": 0.3612, "step": 9599 }, { "epoch": 0.7777057679844459, "grad_norm": 0.031451623886823654, "learning_rate": 0.00017901795760385256, "loss": 0.3429, "step": 9600 }, { "epoch": 0.7777867790019443, "grad_norm": 0.034506361931562424, "learning_rate": 0.00017901345695125795, "loss": 0.3292, "step": 9601 }, { "epoch": 0.7778677900194426, "grad_norm": 0.027968307957053185, "learning_rate": 0.0001790089562986633, "loss": 0.2688, "step": 9602 }, { "epoch": 0.777948801036941, "grad_norm": 0.035632938146591187, "learning_rate": 0.0001790044556460687, "loss": 0.3485, "step": 9603 }, { "epoch": 0.7780298120544394, "grad_norm": 0.03224572911858559, "learning_rate": 0.00017899995499347406, "loss": 0.3222, "step": 9604 }, { "epoch": 0.7781108230719378, "grad_norm": 0.03422819823026657, "learning_rate": 0.00017899545434087944, "loss": 0.3058, "step": 9605 }, { "epoch": 0.7781918340894362, "grad_norm": 0.032302357256412506, "learning_rate": 0.0001789909536882848, "loss": 0.3048, "step": 9606 }, { "epoch": 0.7782728451069345, "grad_norm": 0.04375520721077919, "learning_rate": 0.0001789864530356902, "loss": 0.3714, "step": 9607 }, { "epoch": 0.778353856124433, "grad_norm": 0.031402699649333954, "learning_rate": 0.00017898195238309555, "loss": 0.3035, "step": 9608 }, { "epoch": 0.7784348671419313, "grad_norm": 0.03309689834713936, "learning_rate": 0.00017897745173050094, "loss": 0.3285, "step": 9609 }, { "epoch": 0.7785158781594297, "grad_norm": 0.03338422253727913, "learning_rate": 0.0001789729510779063, "loss": 0.3332, "step": 9610 }, { "epoch": 0.778596889176928, "grad_norm": 0.03191295266151428, "learning_rate": 0.00017896845042531168, "loss": 0.3559, "step": 9611 }, { "epoch": 0.7786779001944264, "grad_norm": 0.02891041338443756, "learning_rate": 0.00017896394977271704, "loss": 0.319, "step": 9612 }, { "epoch": 0.7787589112119249, "grad_norm": 0.03670676797628403, "learning_rate": 0.00017895944912012243, "loss": 0.3447, "step": 9613 }, { "epoch": 0.7788399222294232, "grad_norm": 0.03295731917023659, "learning_rate": 0.00017895494846752782, "loss": 0.3487, "step": 9614 }, { "epoch": 0.7789209332469216, "grad_norm": 0.032904211431741714, "learning_rate": 0.00017895044781493318, "loss": 0.3538, "step": 9615 }, { "epoch": 0.7790019442644199, "grad_norm": 0.040504857897758484, "learning_rate": 0.00017894594716233854, "loss": 0.374, "step": 9616 }, { "epoch": 0.7790829552819183, "grad_norm": 0.03470660373568535, "learning_rate": 0.00017894144650974393, "loss": 0.3646, "step": 9617 }, { "epoch": 0.7791639662994168, "grad_norm": 0.03370751813054085, "learning_rate": 0.00017893694585714929, "loss": 0.3769, "step": 9618 }, { "epoch": 0.7792449773169151, "grad_norm": 0.03140348568558693, "learning_rate": 0.00017893244520455467, "loss": 0.2578, "step": 9619 }, { "epoch": 0.7793259883344135, "grad_norm": 0.033045411109924316, "learning_rate": 0.00017892794455196006, "loss": 0.3683, "step": 9620 }, { "epoch": 0.7794069993519118, "grad_norm": 0.030752673745155334, "learning_rate": 0.00017892344389936542, "loss": 0.3476, "step": 9621 }, { "epoch": 0.7794880103694103, "grad_norm": 0.035168688744306564, "learning_rate": 0.00017891894324677078, "loss": 0.3032, "step": 9622 }, { "epoch": 0.7795690213869086, "grad_norm": 0.03096243180334568, "learning_rate": 0.00017891444259417617, "loss": 0.3188, "step": 9623 }, { "epoch": 0.779650032404407, "grad_norm": 0.0341077521443367, "learning_rate": 0.00017890994194158153, "loss": 0.358, "step": 9624 }, { "epoch": 0.7797310434219054, "grad_norm": 0.031084155663847923, "learning_rate": 0.00017890544128898692, "loss": 0.3123, "step": 9625 }, { "epoch": 0.7798120544394037, "grad_norm": 0.032854244112968445, "learning_rate": 0.0001789009406363923, "loss": 0.3659, "step": 9626 }, { "epoch": 0.7798930654569022, "grad_norm": 0.033259496092796326, "learning_rate": 0.00017889643998379766, "loss": 0.3104, "step": 9627 }, { "epoch": 0.7799740764744005, "grad_norm": 0.03488700091838837, "learning_rate": 0.00017889193933120302, "loss": 0.2932, "step": 9628 }, { "epoch": 0.7800550874918989, "grad_norm": 0.03404168039560318, "learning_rate": 0.0001788874386786084, "loss": 0.3103, "step": 9629 }, { "epoch": 0.7801360985093972, "grad_norm": 0.03553805127739906, "learning_rate": 0.0001788829380260138, "loss": 0.3551, "step": 9630 }, { "epoch": 0.7802171095268956, "grad_norm": 0.03256218135356903, "learning_rate": 0.00017887843737341916, "loss": 0.3292, "step": 9631 }, { "epoch": 0.7802981205443941, "grad_norm": 0.031110748648643494, "learning_rate": 0.00017887393672082454, "loss": 0.3527, "step": 9632 }, { "epoch": 0.7803791315618924, "grad_norm": 0.03429475054144859, "learning_rate": 0.0001788694360682299, "loss": 0.2781, "step": 9633 }, { "epoch": 0.7804601425793908, "grad_norm": 0.03137778490781784, "learning_rate": 0.00017886493541563526, "loss": 0.3287, "step": 9634 }, { "epoch": 0.7805411535968891, "grad_norm": 0.03139874339103699, "learning_rate": 0.00017886043476304065, "loss": 0.3391, "step": 9635 }, { "epoch": 0.7806221646143876, "grad_norm": 0.03607549890875816, "learning_rate": 0.00017885593411044604, "loss": 0.3364, "step": 9636 }, { "epoch": 0.780703175631886, "grad_norm": 0.03316293656826019, "learning_rate": 0.0001788514334578514, "loss": 0.3261, "step": 9637 }, { "epoch": 0.7807841866493843, "grad_norm": 0.031096961349248886, "learning_rate": 0.00017884693280525679, "loss": 0.325, "step": 9638 }, { "epoch": 0.7808651976668827, "grad_norm": 0.029852962121367455, "learning_rate": 0.00017884243215266215, "loss": 0.3336, "step": 9639 }, { "epoch": 0.780946208684381, "grad_norm": 0.035916849970817566, "learning_rate": 0.0001788379315000675, "loss": 0.3333, "step": 9640 }, { "epoch": 0.7810272197018795, "grad_norm": 0.03139684349298477, "learning_rate": 0.0001788334308474729, "loss": 0.3193, "step": 9641 }, { "epoch": 0.7811082307193778, "grad_norm": 0.030681351199746132, "learning_rate": 0.00017882893019487828, "loss": 0.3389, "step": 9642 }, { "epoch": 0.7811892417368762, "grad_norm": 0.033301860094070435, "learning_rate": 0.00017882442954228364, "loss": 0.3235, "step": 9643 }, { "epoch": 0.7812702527543746, "grad_norm": 0.03338354825973511, "learning_rate": 0.00017881992888968903, "loss": 0.348, "step": 9644 }, { "epoch": 0.781351263771873, "grad_norm": 0.04342193156480789, "learning_rate": 0.0001788154282370944, "loss": 0.3586, "step": 9645 }, { "epoch": 0.7814322747893714, "grad_norm": 0.035788752138614655, "learning_rate": 0.00017881092758449975, "loss": 0.2832, "step": 9646 }, { "epoch": 0.7815132858068697, "grad_norm": 0.032872676849365234, "learning_rate": 0.00017880642693190513, "loss": 0.3529, "step": 9647 }, { "epoch": 0.7815942968243681, "grad_norm": 0.03165189176797867, "learning_rate": 0.00017880192627931052, "loss": 0.3199, "step": 9648 }, { "epoch": 0.7816753078418665, "grad_norm": 0.0314420610666275, "learning_rate": 0.00017879742562671588, "loss": 0.348, "step": 9649 }, { "epoch": 0.7817563188593649, "grad_norm": 0.030172783881425858, "learning_rate": 0.00017879292497412127, "loss": 0.2972, "step": 9650 }, { "epoch": 0.7818373298768633, "grad_norm": 0.029220450669527054, "learning_rate": 0.00017878842432152663, "loss": 0.332, "step": 9651 }, { "epoch": 0.7819183408943616, "grad_norm": 0.030133794993162155, "learning_rate": 0.000178783923668932, "loss": 0.319, "step": 9652 }, { "epoch": 0.78199935191186, "grad_norm": 0.0334329754114151, "learning_rate": 0.00017877942301633738, "loss": 0.3412, "step": 9653 }, { "epoch": 0.7820803629293583, "grad_norm": 0.03315896913409233, "learning_rate": 0.00017877492236374276, "loss": 0.3074, "step": 9654 }, { "epoch": 0.7821613739468568, "grad_norm": 0.03713025152683258, "learning_rate": 0.00017877042171114812, "loss": 0.3226, "step": 9655 }, { "epoch": 0.7822423849643552, "grad_norm": 0.03378988802433014, "learning_rate": 0.0001787659210585535, "loss": 0.359, "step": 9656 }, { "epoch": 0.7823233959818535, "grad_norm": 0.03350520133972168, "learning_rate": 0.00017876142040595887, "loss": 0.3346, "step": 9657 }, { "epoch": 0.7824044069993519, "grad_norm": 0.03147077187895775, "learning_rate": 0.00017875691975336423, "loss": 0.3423, "step": 9658 }, { "epoch": 0.7824854180168503, "grad_norm": 0.02769390493631363, "learning_rate": 0.00017875241910076962, "loss": 0.3126, "step": 9659 }, { "epoch": 0.7825664290343487, "grad_norm": 0.03711729869246483, "learning_rate": 0.000178747918448175, "loss": 0.2779, "step": 9660 }, { "epoch": 0.782647440051847, "grad_norm": 0.03212438523769379, "learning_rate": 0.00017874341779558036, "loss": 0.2873, "step": 9661 }, { "epoch": 0.7827284510693454, "grad_norm": 0.03071577101945877, "learning_rate": 0.00017873891714298575, "loss": 0.2942, "step": 9662 }, { "epoch": 0.7828094620868438, "grad_norm": 0.03193732351064682, "learning_rate": 0.0001787344164903911, "loss": 0.3417, "step": 9663 }, { "epoch": 0.7828904731043422, "grad_norm": 0.03078223206102848, "learning_rate": 0.00017872991583779647, "loss": 0.3429, "step": 9664 }, { "epoch": 0.7829714841218406, "grad_norm": 0.034121815115213394, "learning_rate": 0.00017872541518520186, "loss": 0.3843, "step": 9665 }, { "epoch": 0.7830524951393389, "grad_norm": 0.03557079657912254, "learning_rate": 0.00017872091453260725, "loss": 0.3732, "step": 9666 }, { "epoch": 0.7831335061568373, "grad_norm": 0.032793350517749786, "learning_rate": 0.0001787164138800126, "loss": 0.3637, "step": 9667 }, { "epoch": 0.7832145171743357, "grad_norm": 0.03170783817768097, "learning_rate": 0.000178711913227418, "loss": 0.3299, "step": 9668 }, { "epoch": 0.7832955281918341, "grad_norm": 0.03139970824122429, "learning_rate": 0.00017870741257482335, "loss": 0.3519, "step": 9669 }, { "epoch": 0.7833765392093325, "grad_norm": 0.03657853603363037, "learning_rate": 0.00017870291192222871, "loss": 0.3497, "step": 9670 }, { "epoch": 0.7834575502268308, "grad_norm": 0.03443003445863724, "learning_rate": 0.0001786984112696341, "loss": 0.3633, "step": 9671 }, { "epoch": 0.7835385612443292, "grad_norm": 0.03483427315950394, "learning_rate": 0.0001786939106170395, "loss": 0.3797, "step": 9672 }, { "epoch": 0.7836195722618277, "grad_norm": 0.031927600502967834, "learning_rate": 0.00017868940996444485, "loss": 0.301, "step": 9673 }, { "epoch": 0.783700583279326, "grad_norm": 0.03260060027241707, "learning_rate": 0.00017868490931185024, "loss": 0.3489, "step": 9674 }, { "epoch": 0.7837815942968244, "grad_norm": 0.027573350816965103, "learning_rate": 0.0001786804086592556, "loss": 0.2931, "step": 9675 }, { "epoch": 0.7838626053143227, "grad_norm": 0.02995600365102291, "learning_rate": 0.00017867590800666096, "loss": 0.2981, "step": 9676 }, { "epoch": 0.7839436163318211, "grad_norm": 0.03223692625761032, "learning_rate": 0.00017867140735406634, "loss": 0.3589, "step": 9677 }, { "epoch": 0.7840246273493195, "grad_norm": 0.03705068677663803, "learning_rate": 0.00017866690670147173, "loss": 0.3344, "step": 9678 }, { "epoch": 0.7841056383668179, "grad_norm": 0.03044736385345459, "learning_rate": 0.0001786624060488771, "loss": 0.3734, "step": 9679 }, { "epoch": 0.7841866493843163, "grad_norm": 0.031635627150535583, "learning_rate": 0.00017865790539628248, "loss": 0.342, "step": 9680 }, { "epoch": 0.7842676604018146, "grad_norm": 0.028439925983548164, "learning_rate": 0.00017865340474368784, "loss": 0.2895, "step": 9681 }, { "epoch": 0.7843486714193131, "grad_norm": 0.03232768177986145, "learning_rate": 0.00017864890409109322, "loss": 0.3541, "step": 9682 }, { "epoch": 0.7844296824368114, "grad_norm": 0.030741017311811447, "learning_rate": 0.0001786444034384986, "loss": 0.3588, "step": 9683 }, { "epoch": 0.7845106934543098, "grad_norm": 0.03578881546854973, "learning_rate": 0.00017863990278590397, "loss": 0.3798, "step": 9684 }, { "epoch": 0.7845917044718081, "grad_norm": 0.029170949012041092, "learning_rate": 0.00017863540213330933, "loss": 0.3481, "step": 9685 }, { "epoch": 0.7846727154893065, "grad_norm": 0.031627245247364044, "learning_rate": 0.00017863090148071472, "loss": 0.3582, "step": 9686 }, { "epoch": 0.784753726506805, "grad_norm": 0.03248753771185875, "learning_rate": 0.00017862640082812008, "loss": 0.3353, "step": 9687 }, { "epoch": 0.7848347375243033, "grad_norm": 0.030072558671236038, "learning_rate": 0.00017862190017552547, "loss": 0.3435, "step": 9688 }, { "epoch": 0.7849157485418017, "grad_norm": 0.032075896859169006, "learning_rate": 0.00017861739952293085, "loss": 0.3313, "step": 9689 }, { "epoch": 0.7849967595593, "grad_norm": 0.032127607613801956, "learning_rate": 0.0001786128988703362, "loss": 0.3554, "step": 9690 }, { "epoch": 0.7850777705767984, "grad_norm": 0.028677096590399742, "learning_rate": 0.00017860839821774157, "loss": 0.2903, "step": 9691 }, { "epoch": 0.7851587815942969, "grad_norm": 0.030471572652459145, "learning_rate": 0.00017860389756514696, "loss": 0.34, "step": 9692 }, { "epoch": 0.7852397926117952, "grad_norm": 0.035030148923397064, "learning_rate": 0.00017859939691255232, "loss": 0.3296, "step": 9693 }, { "epoch": 0.7853208036292936, "grad_norm": 0.03215445205569267, "learning_rate": 0.0001785948962599577, "loss": 0.3515, "step": 9694 }, { "epoch": 0.7854018146467919, "grad_norm": 0.0335715189576149, "learning_rate": 0.0001785903956073631, "loss": 0.3452, "step": 9695 }, { "epoch": 0.7854828256642904, "grad_norm": 0.03462142124772072, "learning_rate": 0.00017858589495476845, "loss": 0.3536, "step": 9696 }, { "epoch": 0.7855638366817888, "grad_norm": 0.030049508437514305, "learning_rate": 0.00017858139430217381, "loss": 0.306, "step": 9697 }, { "epoch": 0.7856448476992871, "grad_norm": 0.03550144284963608, "learning_rate": 0.0001785768936495792, "loss": 0.3499, "step": 9698 }, { "epoch": 0.7857258587167855, "grad_norm": 0.031812943518161774, "learning_rate": 0.00017857239299698456, "loss": 0.3201, "step": 9699 }, { "epoch": 0.7858068697342838, "grad_norm": 0.03390384465456009, "learning_rate": 0.00017856789234438995, "loss": 0.3668, "step": 9700 }, { "epoch": 0.7858878807517823, "grad_norm": 0.03422324359416962, "learning_rate": 0.00017856339169179534, "loss": 0.3161, "step": 9701 }, { "epoch": 0.7859688917692806, "grad_norm": 0.037070129066705704, "learning_rate": 0.0001785588910392007, "loss": 0.3561, "step": 9702 }, { "epoch": 0.786049902786779, "grad_norm": 0.032369211316108704, "learning_rate": 0.00017855439038660606, "loss": 0.3626, "step": 9703 }, { "epoch": 0.7861309138042774, "grad_norm": 0.03542504832148552, "learning_rate": 0.00017854988973401144, "loss": 0.4142, "step": 9704 }, { "epoch": 0.7862119248217757, "grad_norm": 0.03197111189365387, "learning_rate": 0.00017854538908141683, "loss": 0.3509, "step": 9705 }, { "epoch": 0.7862929358392742, "grad_norm": 0.032501548528671265, "learning_rate": 0.0001785408884288222, "loss": 0.353, "step": 9706 }, { "epoch": 0.7863739468567725, "grad_norm": 0.03382599353790283, "learning_rate": 0.00017853638777622758, "loss": 0.3428, "step": 9707 }, { "epoch": 0.7864549578742709, "grad_norm": 0.035178374499082565, "learning_rate": 0.00017853188712363294, "loss": 0.3423, "step": 9708 }, { "epoch": 0.7865359688917692, "grad_norm": 0.0341215543448925, "learning_rate": 0.0001785273864710383, "loss": 0.3263, "step": 9709 }, { "epoch": 0.7866169799092677, "grad_norm": 0.030356815084815025, "learning_rate": 0.00017852288581844368, "loss": 0.3085, "step": 9710 }, { "epoch": 0.7866979909267661, "grad_norm": 0.03421403467655182, "learning_rate": 0.00017851838516584907, "loss": 0.3651, "step": 9711 }, { "epoch": 0.7867790019442644, "grad_norm": 0.03532508388161659, "learning_rate": 0.00017851388451325443, "loss": 0.3753, "step": 9712 }, { "epoch": 0.7868600129617628, "grad_norm": 0.03950536251068115, "learning_rate": 0.00017850938386065982, "loss": 0.3683, "step": 9713 }, { "epoch": 0.7869410239792611, "grad_norm": 0.031308356672525406, "learning_rate": 0.00017850488320806518, "loss": 0.3372, "step": 9714 }, { "epoch": 0.7870220349967596, "grad_norm": 0.03673476725816727, "learning_rate": 0.00017850038255547054, "loss": 0.3931, "step": 9715 }, { "epoch": 0.787103046014258, "grad_norm": 0.03149571642279625, "learning_rate": 0.00017849588190287593, "loss": 0.3008, "step": 9716 }, { "epoch": 0.7871840570317563, "grad_norm": 0.02885325625538826, "learning_rate": 0.00017849138125028131, "loss": 0.3077, "step": 9717 }, { "epoch": 0.7872650680492547, "grad_norm": 0.031650837510824203, "learning_rate": 0.00017848688059768667, "loss": 0.3456, "step": 9718 }, { "epoch": 0.787346079066753, "grad_norm": 0.03272511065006256, "learning_rate": 0.00017848237994509206, "loss": 0.3169, "step": 9719 }, { "epoch": 0.7874270900842515, "grad_norm": 0.028613831847906113, "learning_rate": 0.00017847787929249742, "loss": 0.2844, "step": 9720 }, { "epoch": 0.7875081011017498, "grad_norm": 0.0399998277425766, "learning_rate": 0.00017847337863990278, "loss": 0.3512, "step": 9721 }, { "epoch": 0.7875891121192482, "grad_norm": 0.03618674352765083, "learning_rate": 0.00017846887798730817, "loss": 0.3317, "step": 9722 }, { "epoch": 0.7876701231367466, "grad_norm": 0.03203833848237991, "learning_rate": 0.00017846437733471356, "loss": 0.3709, "step": 9723 }, { "epoch": 0.787751134154245, "grad_norm": 0.03079145960509777, "learning_rate": 0.00017845987668211892, "loss": 0.3337, "step": 9724 }, { "epoch": 0.7878321451717434, "grad_norm": 0.031215449795126915, "learning_rate": 0.0001784553760295243, "loss": 0.301, "step": 9725 }, { "epoch": 0.7879131561892417, "grad_norm": 0.032910317182540894, "learning_rate": 0.00017845087537692966, "loss": 0.3394, "step": 9726 }, { "epoch": 0.7879941672067401, "grad_norm": 0.029228439554572105, "learning_rate": 0.00017844637472433502, "loss": 0.3028, "step": 9727 }, { "epoch": 0.7880751782242384, "grad_norm": 0.03681560978293419, "learning_rate": 0.0001784418740717404, "loss": 0.366, "step": 9728 }, { "epoch": 0.7881561892417369, "grad_norm": 0.02952171303331852, "learning_rate": 0.0001784373734191458, "loss": 0.3399, "step": 9729 }, { "epoch": 0.7882372002592353, "grad_norm": 0.03275004029273987, "learning_rate": 0.00017843287276655116, "loss": 0.345, "step": 9730 }, { "epoch": 0.7883182112767336, "grad_norm": 0.0315512977540493, "learning_rate": 0.00017842837211395654, "loss": 0.3281, "step": 9731 }, { "epoch": 0.788399222294232, "grad_norm": 0.03237222507596016, "learning_rate": 0.0001784238714613619, "loss": 0.3266, "step": 9732 }, { "epoch": 0.7884802333117304, "grad_norm": 0.03108234703540802, "learning_rate": 0.00017841937080876726, "loss": 0.3206, "step": 9733 }, { "epoch": 0.7885612443292288, "grad_norm": 0.03223662078380585, "learning_rate": 0.00017841487015617265, "loss": 0.3309, "step": 9734 }, { "epoch": 0.7886422553467272, "grad_norm": 0.03800920397043228, "learning_rate": 0.00017841036950357804, "loss": 0.3842, "step": 9735 }, { "epoch": 0.7887232663642255, "grad_norm": 0.03151298314332962, "learning_rate": 0.0001784058688509834, "loss": 0.314, "step": 9736 }, { "epoch": 0.7888042773817239, "grad_norm": 0.03089182637631893, "learning_rate": 0.00017840136819838879, "loss": 0.3261, "step": 9737 }, { "epoch": 0.7888852883992223, "grad_norm": 0.03249596431851387, "learning_rate": 0.00017839686754579415, "loss": 0.2848, "step": 9738 }, { "epoch": 0.7889662994167207, "grad_norm": 0.03219173103570938, "learning_rate": 0.0001783923668931995, "loss": 0.3275, "step": 9739 }, { "epoch": 0.789047310434219, "grad_norm": 0.033864930272102356, "learning_rate": 0.0001783878662406049, "loss": 0.3618, "step": 9740 }, { "epoch": 0.7891283214517174, "grad_norm": 0.031739283353090286, "learning_rate": 0.00017838336558801028, "loss": 0.3355, "step": 9741 }, { "epoch": 0.7892093324692158, "grad_norm": 0.029978927224874496, "learning_rate": 0.00017837886493541564, "loss": 0.3614, "step": 9742 }, { "epoch": 0.7892903434867142, "grad_norm": 0.03132997453212738, "learning_rate": 0.00017837436428282103, "loss": 0.3458, "step": 9743 }, { "epoch": 0.7893713545042126, "grad_norm": 0.0318823978304863, "learning_rate": 0.0001783698636302264, "loss": 0.3637, "step": 9744 }, { "epoch": 0.7894523655217109, "grad_norm": 0.031721170991659164, "learning_rate": 0.00017836536297763175, "loss": 0.3446, "step": 9745 }, { "epoch": 0.7895333765392093, "grad_norm": 0.036005645990371704, "learning_rate": 0.00017836086232503713, "loss": 0.3616, "step": 9746 }, { "epoch": 0.7896143875567078, "grad_norm": 0.029349252581596375, "learning_rate": 0.00017835636167244252, "loss": 0.3128, "step": 9747 }, { "epoch": 0.7896953985742061, "grad_norm": 0.03366193547844887, "learning_rate": 0.00017835186101984788, "loss": 0.3768, "step": 9748 }, { "epoch": 0.7897764095917045, "grad_norm": 0.03656681627035141, "learning_rate": 0.00017834736036725327, "loss": 0.3455, "step": 9749 }, { "epoch": 0.7898574206092028, "grad_norm": 0.035946715623140335, "learning_rate": 0.00017834285971465863, "loss": 0.3624, "step": 9750 }, { "epoch": 0.7899384316267012, "grad_norm": 0.033776670694351196, "learning_rate": 0.000178338359062064, "loss": 0.416, "step": 9751 }, { "epoch": 0.7900194426441997, "grad_norm": 0.033469364047050476, "learning_rate": 0.0001783338584094694, "loss": 0.4144, "step": 9752 }, { "epoch": 0.790100453661698, "grad_norm": 0.029603907838463783, "learning_rate": 0.00017832935775687476, "loss": 0.3114, "step": 9753 }, { "epoch": 0.7901814646791964, "grad_norm": 0.031154030933976173, "learning_rate": 0.00017832485710428012, "loss": 0.3165, "step": 9754 }, { "epoch": 0.7902624756966947, "grad_norm": 0.03153345733880997, "learning_rate": 0.0001783203564516855, "loss": 0.3285, "step": 9755 }, { "epoch": 0.7903434867141931, "grad_norm": 0.033831704407930374, "learning_rate": 0.00017831585579909087, "loss": 0.3407, "step": 9756 }, { "epoch": 0.7904244977316915, "grad_norm": 0.02914523147046566, "learning_rate": 0.00017831135514649623, "loss": 0.3039, "step": 9757 }, { "epoch": 0.7905055087491899, "grad_norm": 0.02872067503631115, "learning_rate": 0.00017830685449390164, "loss": 0.3161, "step": 9758 }, { "epoch": 0.7905865197666883, "grad_norm": 0.03430357947945595, "learning_rate": 0.000178302353841307, "loss": 0.3809, "step": 9759 }, { "epoch": 0.7906675307841866, "grad_norm": 0.0303533636033535, "learning_rate": 0.00017829785318871237, "loss": 0.3368, "step": 9760 }, { "epoch": 0.7907485418016851, "grad_norm": 0.03114994615316391, "learning_rate": 0.00017829335253611775, "loss": 0.3374, "step": 9761 }, { "epoch": 0.7908295528191834, "grad_norm": 0.032797671854496, "learning_rate": 0.0001782888518835231, "loss": 0.3391, "step": 9762 }, { "epoch": 0.7909105638366818, "grad_norm": 0.034057628363370895, "learning_rate": 0.0001782843512309285, "loss": 0.3473, "step": 9763 }, { "epoch": 0.7909915748541801, "grad_norm": 0.03768150880932808, "learning_rate": 0.0001782798505783339, "loss": 0.3216, "step": 9764 }, { "epoch": 0.7910725858716785, "grad_norm": 0.03195664659142494, "learning_rate": 0.00017827534992573925, "loss": 0.3239, "step": 9765 }, { "epoch": 0.791153596889177, "grad_norm": 0.034110911190509796, "learning_rate": 0.0001782708492731446, "loss": 0.2988, "step": 9766 }, { "epoch": 0.7912346079066753, "grad_norm": 0.03776692971587181, "learning_rate": 0.00017826634862055, "loss": 0.3188, "step": 9767 }, { "epoch": 0.7913156189241737, "grad_norm": 0.03164956718683243, "learning_rate": 0.00017826184796795535, "loss": 0.3367, "step": 9768 }, { "epoch": 0.791396629941672, "grad_norm": 0.034085437655448914, "learning_rate": 0.00017825734731536074, "loss": 0.3762, "step": 9769 }, { "epoch": 0.7914776409591704, "grad_norm": 0.059056442230939865, "learning_rate": 0.00017825284666276613, "loss": 0.3315, "step": 9770 }, { "epoch": 0.7915586519766689, "grad_norm": 0.03333378955721855, "learning_rate": 0.0001782483460101715, "loss": 0.3224, "step": 9771 }, { "epoch": 0.7916396629941672, "grad_norm": 0.035385195165872574, "learning_rate": 0.00017824384535757685, "loss": 0.3564, "step": 9772 }, { "epoch": 0.7917206740116656, "grad_norm": 0.03095993772149086, "learning_rate": 0.00017823934470498224, "loss": 0.306, "step": 9773 }, { "epoch": 0.7918016850291639, "grad_norm": 0.035980224609375, "learning_rate": 0.0001782348440523876, "loss": 0.321, "step": 9774 }, { "epoch": 0.7918826960466624, "grad_norm": 0.03179408237338066, "learning_rate": 0.00017823034339979298, "loss": 0.3073, "step": 9775 }, { "epoch": 0.7919637070641607, "grad_norm": 0.0315295048058033, "learning_rate": 0.00017822584274719837, "loss": 0.2791, "step": 9776 }, { "epoch": 0.7920447180816591, "grad_norm": 0.03435314819216728, "learning_rate": 0.00017822134209460373, "loss": 0.3752, "step": 9777 }, { "epoch": 0.7921257290991575, "grad_norm": 0.0332770012319088, "learning_rate": 0.0001782168414420091, "loss": 0.3101, "step": 9778 }, { "epoch": 0.7922067401166558, "grad_norm": 0.031906016170978546, "learning_rate": 0.00017821234078941448, "loss": 0.2801, "step": 9779 }, { "epoch": 0.7922877511341543, "grad_norm": 0.03557458519935608, "learning_rate": 0.00017820784013681984, "loss": 0.3043, "step": 9780 }, { "epoch": 0.7923687621516526, "grad_norm": 0.031249122694134712, "learning_rate": 0.00017820333948422522, "loss": 0.323, "step": 9781 }, { "epoch": 0.792449773169151, "grad_norm": 0.027938146144151688, "learning_rate": 0.0001781988388316306, "loss": 0.2701, "step": 9782 }, { "epoch": 0.7925307841866494, "grad_norm": 0.038791898638010025, "learning_rate": 0.00017819433817903597, "loss": 0.3682, "step": 9783 }, { "epoch": 0.7926117952041478, "grad_norm": 0.03866951912641525, "learning_rate": 0.00017818983752644133, "loss": 0.3229, "step": 9784 }, { "epoch": 0.7926928062216462, "grad_norm": 0.03439750149846077, "learning_rate": 0.00017818533687384672, "loss": 0.3426, "step": 9785 }, { "epoch": 0.7927738172391445, "grad_norm": 0.03489131107926369, "learning_rate": 0.0001781808362212521, "loss": 0.3889, "step": 9786 }, { "epoch": 0.7928548282566429, "grad_norm": 0.031454797834157944, "learning_rate": 0.00017817633556865747, "loss": 0.3119, "step": 9787 }, { "epoch": 0.7929358392741412, "grad_norm": 0.0329572819173336, "learning_rate": 0.00017817183491606285, "loss": 0.3671, "step": 9788 }, { "epoch": 0.7930168502916397, "grad_norm": 0.03671346977353096, "learning_rate": 0.0001781673342634682, "loss": 0.3855, "step": 9789 }, { "epoch": 0.7930978613091381, "grad_norm": 0.03178151696920395, "learning_rate": 0.00017816283361087357, "loss": 0.326, "step": 9790 }, { "epoch": 0.7931788723266364, "grad_norm": 0.03052881546318531, "learning_rate": 0.00017815833295827896, "loss": 0.3398, "step": 9791 }, { "epoch": 0.7932598833441348, "grad_norm": 0.04047226905822754, "learning_rate": 0.00017815383230568435, "loss": 0.3886, "step": 9792 }, { "epoch": 0.7933408943616331, "grad_norm": 0.034657519310712814, "learning_rate": 0.0001781493316530897, "loss": 0.3747, "step": 9793 }, { "epoch": 0.7934219053791316, "grad_norm": 0.0302236657589674, "learning_rate": 0.0001781448310004951, "loss": 0.3105, "step": 9794 }, { "epoch": 0.79350291639663, "grad_norm": 0.035705894231796265, "learning_rate": 0.00017814033034790045, "loss": 0.4141, "step": 9795 }, { "epoch": 0.7935839274141283, "grad_norm": 0.03531669080257416, "learning_rate": 0.00017813582969530581, "loss": 0.3131, "step": 9796 }, { "epoch": 0.7936649384316267, "grad_norm": 0.03963521495461464, "learning_rate": 0.0001781313290427112, "loss": 0.3832, "step": 9797 }, { "epoch": 0.7937459494491251, "grad_norm": 0.03380154073238373, "learning_rate": 0.0001781268283901166, "loss": 0.3527, "step": 9798 }, { "epoch": 0.7938269604666235, "grad_norm": 0.03466390073299408, "learning_rate": 0.00017812232773752195, "loss": 0.3342, "step": 9799 }, { "epoch": 0.7939079714841218, "grad_norm": 0.031100409105420113, "learning_rate": 0.00017811782708492734, "loss": 0.2889, "step": 9800 }, { "epoch": 0.7939889825016202, "grad_norm": 0.03908621892333031, "learning_rate": 0.0001781133264323327, "loss": 0.3044, "step": 9801 }, { "epoch": 0.7940699935191186, "grad_norm": 0.03332590311765671, "learning_rate": 0.00017810882577973806, "loss": 0.3317, "step": 9802 }, { "epoch": 0.794151004536617, "grad_norm": 0.03381446376442909, "learning_rate": 0.00017810432512714344, "loss": 0.3451, "step": 9803 }, { "epoch": 0.7942320155541154, "grad_norm": 0.032552119344472885, "learning_rate": 0.00017809982447454883, "loss": 0.3047, "step": 9804 }, { "epoch": 0.7943130265716137, "grad_norm": 0.03203838691115379, "learning_rate": 0.0001780953238219542, "loss": 0.3456, "step": 9805 }, { "epoch": 0.7943940375891121, "grad_norm": 0.03360417112708092, "learning_rate": 0.00017809082316935958, "loss": 0.3442, "step": 9806 }, { "epoch": 0.7944750486066104, "grad_norm": 0.03208596259355545, "learning_rate": 0.00017808632251676494, "loss": 0.329, "step": 9807 }, { "epoch": 0.7945560596241089, "grad_norm": 0.032233938574790955, "learning_rate": 0.0001780818218641703, "loss": 0.3261, "step": 9808 }, { "epoch": 0.7946370706416073, "grad_norm": 0.034315407276153564, "learning_rate": 0.00017807732121157569, "loss": 0.3596, "step": 9809 }, { "epoch": 0.7947180816591056, "grad_norm": 0.033394258469343185, "learning_rate": 0.00017807282055898107, "loss": 0.3577, "step": 9810 }, { "epoch": 0.794799092676604, "grad_norm": 0.0287623293697834, "learning_rate": 0.00017806831990638643, "loss": 0.3068, "step": 9811 }, { "epoch": 0.7948801036941024, "grad_norm": 0.03077227808535099, "learning_rate": 0.00017806381925379182, "loss": 0.2974, "step": 9812 }, { "epoch": 0.7949611147116008, "grad_norm": 0.0382005013525486, "learning_rate": 0.00017805931860119718, "loss": 0.4317, "step": 9813 }, { "epoch": 0.7950421257290992, "grad_norm": 0.03552140295505524, "learning_rate": 0.00017805481794860254, "loss": 0.3376, "step": 9814 }, { "epoch": 0.7951231367465975, "grad_norm": 0.03667037934064865, "learning_rate": 0.00017805031729600793, "loss": 0.3729, "step": 9815 }, { "epoch": 0.7952041477640959, "grad_norm": 0.030753176659345627, "learning_rate": 0.00017804581664341331, "loss": 0.3105, "step": 9816 }, { "epoch": 0.7952851587815943, "grad_norm": 0.03592974320054054, "learning_rate": 0.00017804131599081867, "loss": 0.3686, "step": 9817 }, { "epoch": 0.7953661697990927, "grad_norm": 0.034059733152389526, "learning_rate": 0.00017803681533822406, "loss": 0.3527, "step": 9818 }, { "epoch": 0.795447180816591, "grad_norm": 0.03542809560894966, "learning_rate": 0.00017803231468562942, "loss": 0.3431, "step": 9819 }, { "epoch": 0.7955281918340894, "grad_norm": 0.03323250636458397, "learning_rate": 0.00017802781403303478, "loss": 0.3434, "step": 9820 }, { "epoch": 0.7956092028515879, "grad_norm": 0.03204082325100899, "learning_rate": 0.0001780233133804402, "loss": 0.3056, "step": 9821 }, { "epoch": 0.7956902138690862, "grad_norm": 0.031638313084840775, "learning_rate": 0.00017801881272784556, "loss": 0.3473, "step": 9822 }, { "epoch": 0.7957712248865846, "grad_norm": 0.027752108871936798, "learning_rate": 0.00017801431207525092, "loss": 0.3222, "step": 9823 }, { "epoch": 0.7958522359040829, "grad_norm": 0.037686675786972046, "learning_rate": 0.0001780098114226563, "loss": 0.3285, "step": 9824 }, { "epoch": 0.7959332469215813, "grad_norm": 0.040860336273908615, "learning_rate": 0.00017800531077006166, "loss": 0.3929, "step": 9825 }, { "epoch": 0.7960142579390798, "grad_norm": 0.03692979738116264, "learning_rate": 0.00017800081011746702, "loss": 0.3687, "step": 9826 }, { "epoch": 0.7960952689565781, "grad_norm": 0.03794914856553078, "learning_rate": 0.00017799630946487244, "loss": 0.4021, "step": 9827 }, { "epoch": 0.7961762799740765, "grad_norm": 0.032314639538526535, "learning_rate": 0.0001779918088122778, "loss": 0.384, "step": 9828 }, { "epoch": 0.7962572909915748, "grad_norm": 0.03454400226473808, "learning_rate": 0.00017798730815968316, "loss": 0.3652, "step": 9829 }, { "epoch": 0.7963383020090732, "grad_norm": 0.033590447157621384, "learning_rate": 0.00017798280750708854, "loss": 0.3331, "step": 9830 }, { "epoch": 0.7964193130265717, "grad_norm": 0.032915979623794556, "learning_rate": 0.0001779783068544939, "loss": 0.3188, "step": 9831 }, { "epoch": 0.79650032404407, "grad_norm": 0.034886036068201065, "learning_rate": 0.00017797380620189926, "loss": 0.3538, "step": 9832 }, { "epoch": 0.7965813350615684, "grad_norm": 0.03149457648396492, "learning_rate": 0.00017796930554930468, "loss": 0.2962, "step": 9833 }, { "epoch": 0.7966623460790667, "grad_norm": 0.03190891072154045, "learning_rate": 0.00017796480489671004, "loss": 0.3518, "step": 9834 }, { "epoch": 0.7967433570965652, "grad_norm": 0.03555048629641533, "learning_rate": 0.0001779603042441154, "loss": 0.3769, "step": 9835 }, { "epoch": 0.7968243681140635, "grad_norm": 0.03308168426156044, "learning_rate": 0.00017795580359152079, "loss": 0.3265, "step": 9836 }, { "epoch": 0.7969053791315619, "grad_norm": 0.0301455520093441, "learning_rate": 0.00017795130293892615, "loss": 0.3352, "step": 9837 }, { "epoch": 0.7969863901490603, "grad_norm": 0.0303630530834198, "learning_rate": 0.00017794680228633153, "loss": 0.3216, "step": 9838 }, { "epoch": 0.7970674011665586, "grad_norm": 0.03650288283824921, "learning_rate": 0.00017794230163373692, "loss": 0.3274, "step": 9839 }, { "epoch": 0.7971484121840571, "grad_norm": 0.03212815895676613, "learning_rate": 0.00017793780098114228, "loss": 0.3295, "step": 9840 }, { "epoch": 0.7972294232015554, "grad_norm": 0.02993020787835121, "learning_rate": 0.00017793330032854764, "loss": 0.3319, "step": 9841 }, { "epoch": 0.7973104342190538, "grad_norm": 0.0330897755920887, "learning_rate": 0.00017792879967595303, "loss": 0.3578, "step": 9842 }, { "epoch": 0.7973914452365521, "grad_norm": 0.033456459641456604, "learning_rate": 0.0001779242990233584, "loss": 0.3331, "step": 9843 }, { "epoch": 0.7974724562540505, "grad_norm": 0.030742675065994263, "learning_rate": 0.00017791979837076377, "loss": 0.3283, "step": 9844 }, { "epoch": 0.797553467271549, "grad_norm": 0.03512030839920044, "learning_rate": 0.00017791529771816916, "loss": 0.3325, "step": 9845 }, { "epoch": 0.7976344782890473, "grad_norm": 0.03695352002978325, "learning_rate": 0.00017791079706557452, "loss": 0.3491, "step": 9846 }, { "epoch": 0.7977154893065457, "grad_norm": 0.03346193954348564, "learning_rate": 0.00017790629641297988, "loss": 0.3334, "step": 9847 }, { "epoch": 0.797796500324044, "grad_norm": 0.032957058399915695, "learning_rate": 0.00017790179576038527, "loss": 0.3567, "step": 9848 }, { "epoch": 0.7978775113415425, "grad_norm": 0.034393198788166046, "learning_rate": 0.00017789729510779063, "loss": 0.3555, "step": 9849 }, { "epoch": 0.7979585223590409, "grad_norm": 0.03429095074534416, "learning_rate": 0.00017789279445519602, "loss": 0.344, "step": 9850 }, { "epoch": 0.7980395333765392, "grad_norm": 0.03974846377968788, "learning_rate": 0.0001778882938026014, "loss": 0.3608, "step": 9851 }, { "epoch": 0.7981205443940376, "grad_norm": 0.02912759967148304, "learning_rate": 0.00017788379315000676, "loss": 0.2945, "step": 9852 }, { "epoch": 0.7982015554115359, "grad_norm": 0.03488336130976677, "learning_rate": 0.00017787929249741212, "loss": 0.3598, "step": 9853 }, { "epoch": 0.7982825664290344, "grad_norm": 0.0349576361477375, "learning_rate": 0.0001778747918448175, "loss": 0.3569, "step": 9854 }, { "epoch": 0.7983635774465327, "grad_norm": 0.031084474176168442, "learning_rate": 0.00017787029119222287, "loss": 0.2779, "step": 9855 }, { "epoch": 0.7984445884640311, "grad_norm": 0.04295002669095993, "learning_rate": 0.00017786579053962826, "loss": 0.3446, "step": 9856 }, { "epoch": 0.7985255994815295, "grad_norm": 0.03482022136449814, "learning_rate": 0.00017786128988703365, "loss": 0.2966, "step": 9857 }, { "epoch": 0.7986066104990278, "grad_norm": 0.032117944210767746, "learning_rate": 0.000177856789234439, "loss": 0.3177, "step": 9858 }, { "epoch": 0.7986876215165263, "grad_norm": 0.03325260058045387, "learning_rate": 0.00017785228858184437, "loss": 0.3332, "step": 9859 }, { "epoch": 0.7987686325340246, "grad_norm": 0.033402219414711, "learning_rate": 0.00017784778792924975, "loss": 0.356, "step": 9860 }, { "epoch": 0.798849643551523, "grad_norm": 0.02923320233821869, "learning_rate": 0.0001778432872766551, "loss": 0.2805, "step": 9861 }, { "epoch": 0.7989306545690213, "grad_norm": 0.03429705277085304, "learning_rate": 0.0001778387866240605, "loss": 0.3445, "step": 9862 }, { "epoch": 0.7990116655865198, "grad_norm": 0.03572859242558479, "learning_rate": 0.0001778342859714659, "loss": 0.3237, "step": 9863 }, { "epoch": 0.7990926766040182, "grad_norm": 0.03505443409085274, "learning_rate": 0.00017782978531887125, "loss": 0.3418, "step": 9864 }, { "epoch": 0.7991736876215165, "grad_norm": 0.03291371092200279, "learning_rate": 0.0001778252846662766, "loss": 0.3022, "step": 9865 }, { "epoch": 0.7992546986390149, "grad_norm": 0.04195602238178253, "learning_rate": 0.000177820784013682, "loss": 0.3914, "step": 9866 }, { "epoch": 0.7993357096565132, "grad_norm": 0.030259612947702408, "learning_rate": 0.00017781628336108738, "loss": 0.3484, "step": 9867 }, { "epoch": 0.7994167206740117, "grad_norm": 0.03401730954647064, "learning_rate": 0.00017781178270849274, "loss": 0.3492, "step": 9868 }, { "epoch": 0.7994977316915101, "grad_norm": 0.02929941564798355, "learning_rate": 0.00017780728205589813, "loss": 0.3135, "step": 9869 }, { "epoch": 0.7995787427090084, "grad_norm": 0.03494355082511902, "learning_rate": 0.0001778027814033035, "loss": 0.3343, "step": 9870 }, { "epoch": 0.7996597537265068, "grad_norm": 0.035004131495952606, "learning_rate": 0.00017779828075070885, "loss": 0.3092, "step": 9871 }, { "epoch": 0.7997407647440052, "grad_norm": 0.03391312435269356, "learning_rate": 0.00017779378009811424, "loss": 0.3313, "step": 9872 }, { "epoch": 0.7998217757615036, "grad_norm": 0.032676223665475845, "learning_rate": 0.00017778927944551962, "loss": 0.3053, "step": 9873 }, { "epoch": 0.799902786779002, "grad_norm": 0.03063894808292389, "learning_rate": 0.00017778477879292498, "loss": 0.3541, "step": 9874 }, { "epoch": 0.7999837977965003, "grad_norm": 0.030848519876599312, "learning_rate": 0.00017778027814033037, "loss": 0.3774, "step": 9875 }, { "epoch": 0.8000648088139987, "grad_norm": 0.03999854996800423, "learning_rate": 0.00017777577748773573, "loss": 0.3959, "step": 9876 }, { "epoch": 0.8001458198314971, "grad_norm": 0.031490426510572433, "learning_rate": 0.0001777712768351411, "loss": 0.3219, "step": 9877 }, { "epoch": 0.8002268308489955, "grad_norm": 0.03349410369992256, "learning_rate": 0.00017776677618254648, "loss": 0.3838, "step": 9878 }, { "epoch": 0.8003078418664938, "grad_norm": 0.03521084040403366, "learning_rate": 0.00017776227552995186, "loss": 0.3527, "step": 9879 }, { "epoch": 0.8003888528839922, "grad_norm": 0.03576532378792763, "learning_rate": 0.00017775777487735722, "loss": 0.3284, "step": 9880 }, { "epoch": 0.8004698639014906, "grad_norm": 0.033710140734910965, "learning_rate": 0.0001777532742247626, "loss": 0.3252, "step": 9881 }, { "epoch": 0.800550874918989, "grad_norm": 0.03096635453402996, "learning_rate": 0.00017774877357216797, "loss": 0.3208, "step": 9882 }, { "epoch": 0.8006318859364874, "grad_norm": 0.03235981613397598, "learning_rate": 0.00017774427291957333, "loss": 0.3158, "step": 9883 }, { "epoch": 0.8007128969539857, "grad_norm": 0.03289040923118591, "learning_rate": 0.00017773977226697872, "loss": 0.3225, "step": 9884 }, { "epoch": 0.8007939079714841, "grad_norm": 0.03425959497690201, "learning_rate": 0.0001777352716143841, "loss": 0.3607, "step": 9885 }, { "epoch": 0.8008749189889826, "grad_norm": 0.03126666322350502, "learning_rate": 0.00017773077096178947, "loss": 0.3311, "step": 9886 }, { "epoch": 0.8009559300064809, "grad_norm": 0.03363567590713501, "learning_rate": 0.00017772627030919485, "loss": 0.3225, "step": 9887 }, { "epoch": 0.8010369410239793, "grad_norm": 0.03323504328727722, "learning_rate": 0.0001777217696566002, "loss": 0.3588, "step": 9888 }, { "epoch": 0.8011179520414776, "grad_norm": 0.03272292762994766, "learning_rate": 0.00017771726900400557, "loss": 0.3388, "step": 9889 }, { "epoch": 0.801198963058976, "grad_norm": 0.035594481974840164, "learning_rate": 0.00017771276835141096, "loss": 0.3581, "step": 9890 }, { "epoch": 0.8012799740764744, "grad_norm": 0.03276817500591278, "learning_rate": 0.00017770826769881635, "loss": 0.3742, "step": 9891 }, { "epoch": 0.8013609850939728, "grad_norm": 0.03029584512114525, "learning_rate": 0.0001777037670462217, "loss": 0.2998, "step": 9892 }, { "epoch": 0.8014419961114712, "grad_norm": 0.030790219083428383, "learning_rate": 0.0001776992663936271, "loss": 0.3391, "step": 9893 }, { "epoch": 0.8015230071289695, "grad_norm": 0.040925610810518265, "learning_rate": 0.00017769476574103245, "loss": 0.3516, "step": 9894 }, { "epoch": 0.8016040181464679, "grad_norm": 0.03036440722644329, "learning_rate": 0.00017769026508843782, "loss": 0.339, "step": 9895 }, { "epoch": 0.8016850291639663, "grad_norm": 0.0375761054456234, "learning_rate": 0.00017768576443584323, "loss": 0.3519, "step": 9896 }, { "epoch": 0.8017660401814647, "grad_norm": 0.0340602844953537, "learning_rate": 0.0001776812637832486, "loss": 0.3145, "step": 9897 }, { "epoch": 0.801847051198963, "grad_norm": 0.033066585659980774, "learning_rate": 0.00017767676313065395, "loss": 0.3557, "step": 9898 }, { "epoch": 0.8019280622164614, "grad_norm": 0.0324365571141243, "learning_rate": 0.00017767226247805934, "loss": 0.3625, "step": 9899 }, { "epoch": 0.8020090732339599, "grad_norm": 0.02973266690969467, "learning_rate": 0.0001776677618254647, "loss": 0.3006, "step": 9900 }, { "epoch": 0.8020900842514582, "grad_norm": 0.032307885587215424, "learning_rate": 0.00017766326117287006, "loss": 0.3152, "step": 9901 }, { "epoch": 0.8021710952689566, "grad_norm": 0.029426628723740578, "learning_rate": 0.00017765876052027547, "loss": 0.2689, "step": 9902 }, { "epoch": 0.8022521062864549, "grad_norm": 0.03942330554127693, "learning_rate": 0.00017765425986768083, "loss": 0.3758, "step": 9903 }, { "epoch": 0.8023331173039533, "grad_norm": 0.03324083238840103, "learning_rate": 0.0001776497592150862, "loss": 0.3391, "step": 9904 }, { "epoch": 0.8024141283214518, "grad_norm": 0.03555333986878395, "learning_rate": 0.00017764525856249158, "loss": 0.3283, "step": 9905 }, { "epoch": 0.8024951393389501, "grad_norm": 0.03296968713402748, "learning_rate": 0.00017764075790989694, "loss": 0.3561, "step": 9906 }, { "epoch": 0.8025761503564485, "grad_norm": 0.0356278233230114, "learning_rate": 0.0001776362572573023, "loss": 0.3368, "step": 9907 }, { "epoch": 0.8026571613739468, "grad_norm": 0.03892393410205841, "learning_rate": 0.0001776317566047077, "loss": 0.4099, "step": 9908 }, { "epoch": 0.8027381723914452, "grad_norm": 0.035411980003118515, "learning_rate": 0.00017762725595211307, "loss": 0.377, "step": 9909 }, { "epoch": 0.8028191834089436, "grad_norm": 0.03715914860367775, "learning_rate": 0.00017762275529951843, "loss": 0.3227, "step": 9910 }, { "epoch": 0.802900194426442, "grad_norm": 0.036011308431625366, "learning_rate": 0.00017761825464692382, "loss": 0.3568, "step": 9911 }, { "epoch": 0.8029812054439404, "grad_norm": 0.03165072947740555, "learning_rate": 0.00017761375399432918, "loss": 0.3277, "step": 9912 }, { "epoch": 0.8030622164614387, "grad_norm": 0.02398240566253662, "learning_rate": 0.00017760925334173454, "loss": 0.2622, "step": 9913 }, { "epoch": 0.8031432274789372, "grad_norm": 0.036116186529397964, "learning_rate": 0.00017760475268913995, "loss": 0.3896, "step": 9914 }, { "epoch": 0.8032242384964355, "grad_norm": 0.033894285559654236, "learning_rate": 0.00017760025203654531, "loss": 0.3327, "step": 9915 }, { "epoch": 0.8033052495139339, "grad_norm": 0.030235815793275833, "learning_rate": 0.00017759575138395067, "loss": 0.342, "step": 9916 }, { "epoch": 0.8033862605314323, "grad_norm": 0.031925395131111145, "learning_rate": 0.00017759125073135606, "loss": 0.3369, "step": 9917 }, { "epoch": 0.8034672715489306, "grad_norm": 0.032986488193273544, "learning_rate": 0.00017758675007876142, "loss": 0.333, "step": 9918 }, { "epoch": 0.8035482825664291, "grad_norm": 0.030651472508907318, "learning_rate": 0.0001775822494261668, "loss": 0.3445, "step": 9919 }, { "epoch": 0.8036292935839274, "grad_norm": 0.03186076134443283, "learning_rate": 0.0001775777487735722, "loss": 0.3913, "step": 9920 }, { "epoch": 0.8037103046014258, "grad_norm": 0.03048551455140114, "learning_rate": 0.00017757324812097756, "loss": 0.31, "step": 9921 }, { "epoch": 0.8037913156189241, "grad_norm": 0.028865564614534378, "learning_rate": 0.00017756874746838292, "loss": 0.3382, "step": 9922 }, { "epoch": 0.8038723266364226, "grad_norm": 0.032060034573078156, "learning_rate": 0.0001775642468157883, "loss": 0.3355, "step": 9923 }, { "epoch": 0.803953337653921, "grad_norm": 0.03471523895859718, "learning_rate": 0.00017755974616319366, "loss": 0.3643, "step": 9924 }, { "epoch": 0.8040343486714193, "grad_norm": 0.030344653874635696, "learning_rate": 0.00017755524551059905, "loss": 0.3015, "step": 9925 }, { "epoch": 0.8041153596889177, "grad_norm": 0.033976975828409195, "learning_rate": 0.00017755074485800444, "loss": 0.3253, "step": 9926 }, { "epoch": 0.804196370706416, "grad_norm": 0.034605782479047775, "learning_rate": 0.0001775462442054098, "loss": 0.3234, "step": 9927 }, { "epoch": 0.8042773817239145, "grad_norm": 0.03288121148943901, "learning_rate": 0.00017754174355281516, "loss": 0.3525, "step": 9928 }, { "epoch": 0.8043583927414129, "grad_norm": 0.0387084074318409, "learning_rate": 0.00017753724290022054, "loss": 0.3987, "step": 9929 }, { "epoch": 0.8044394037589112, "grad_norm": 0.02676405757665634, "learning_rate": 0.0001775327422476259, "loss": 0.2951, "step": 9930 }, { "epoch": 0.8045204147764096, "grad_norm": 0.032487448304891586, "learning_rate": 0.0001775282415950313, "loss": 0.3576, "step": 9931 }, { "epoch": 0.8046014257939079, "grad_norm": 0.03143838793039322, "learning_rate": 0.00017752374094243668, "loss": 0.3081, "step": 9932 }, { "epoch": 0.8046824368114064, "grad_norm": 0.030801311135292053, "learning_rate": 0.00017751924028984204, "loss": 0.3141, "step": 9933 }, { "epoch": 0.8047634478289047, "grad_norm": 0.033451713621616364, "learning_rate": 0.0001775147396372474, "loss": 0.3355, "step": 9934 }, { "epoch": 0.8048444588464031, "grad_norm": 0.03344229608774185, "learning_rate": 0.00017751023898465279, "loss": 0.3176, "step": 9935 }, { "epoch": 0.8049254698639015, "grad_norm": 0.037789855152368546, "learning_rate": 0.00017750573833205815, "loss": 0.3356, "step": 9936 }, { "epoch": 0.8050064808813999, "grad_norm": 0.027795903384685516, "learning_rate": 0.00017750123767946353, "loss": 0.2775, "step": 9937 }, { "epoch": 0.8050874918988983, "grad_norm": 0.035118166357278824, "learning_rate": 0.00017749673702686892, "loss": 0.3245, "step": 9938 }, { "epoch": 0.8051685029163966, "grad_norm": 0.03690321743488312, "learning_rate": 0.00017749223637427428, "loss": 0.3588, "step": 9939 }, { "epoch": 0.805249513933895, "grad_norm": 0.03635449334979057, "learning_rate": 0.00017748773572167964, "loss": 0.3713, "step": 9940 }, { "epoch": 0.8053305249513933, "grad_norm": 0.038405247032642365, "learning_rate": 0.00017748323506908503, "loss": 0.3332, "step": 9941 }, { "epoch": 0.8054115359688918, "grad_norm": 0.03047071024775505, "learning_rate": 0.0001774787344164904, "loss": 0.309, "step": 9942 }, { "epoch": 0.8054925469863902, "grad_norm": 0.03196856752038002, "learning_rate": 0.00017747423376389578, "loss": 0.3169, "step": 9943 }, { "epoch": 0.8055735580038885, "grad_norm": 0.032038189470767975, "learning_rate": 0.00017746973311130116, "loss": 0.3235, "step": 9944 }, { "epoch": 0.8056545690213869, "grad_norm": 0.03425036743283272, "learning_rate": 0.00017746523245870652, "loss": 0.3052, "step": 9945 }, { "epoch": 0.8057355800388852, "grad_norm": 0.03385873883962631, "learning_rate": 0.00017746073180611188, "loss": 0.3353, "step": 9946 }, { "epoch": 0.8058165910563837, "grad_norm": 0.030192499980330467, "learning_rate": 0.00017745623115351727, "loss": 0.3524, "step": 9947 }, { "epoch": 0.8058976020738821, "grad_norm": 0.0328400619328022, "learning_rate": 0.00017745173050092266, "loss": 0.2959, "step": 9948 }, { "epoch": 0.8059786130913804, "grad_norm": 0.036807384341955185, "learning_rate": 0.00017744722984832802, "loss": 0.3319, "step": 9949 }, { "epoch": 0.8060596241088788, "grad_norm": 0.038014866411685944, "learning_rate": 0.0001774427291957334, "loss": 0.3558, "step": 9950 }, { "epoch": 0.8061406351263772, "grad_norm": 0.04583004489541054, "learning_rate": 0.00017743822854313876, "loss": 0.3558, "step": 9951 }, { "epoch": 0.8062216461438756, "grad_norm": 0.03124105930328369, "learning_rate": 0.00017743372789054412, "loss": 0.2939, "step": 9952 }, { "epoch": 0.806302657161374, "grad_norm": 0.03650376573204994, "learning_rate": 0.0001774292272379495, "loss": 0.3778, "step": 9953 }, { "epoch": 0.8063836681788723, "grad_norm": 0.038504041731357574, "learning_rate": 0.0001774247265853549, "loss": 0.4135, "step": 9954 }, { "epoch": 0.8064646791963707, "grad_norm": 0.02942444011569023, "learning_rate": 0.00017742022593276026, "loss": 0.2947, "step": 9955 }, { "epoch": 0.8065456902138691, "grad_norm": 0.03135211765766144, "learning_rate": 0.00017741572528016565, "loss": 0.3236, "step": 9956 }, { "epoch": 0.8066267012313675, "grad_norm": 0.026941142976284027, "learning_rate": 0.000177411224627571, "loss": 0.2815, "step": 9957 }, { "epoch": 0.8067077122488658, "grad_norm": 0.03467633202672005, "learning_rate": 0.00017740672397497637, "loss": 0.329, "step": 9958 }, { "epoch": 0.8067887232663642, "grad_norm": 0.034031517803668976, "learning_rate": 0.00017740222332238175, "loss": 0.3436, "step": 9959 }, { "epoch": 0.8068697342838627, "grad_norm": 0.041298530995845795, "learning_rate": 0.00017739772266978714, "loss": 0.3865, "step": 9960 }, { "epoch": 0.806950745301361, "grad_norm": 0.029256189242005348, "learning_rate": 0.0001773932220171925, "loss": 0.2955, "step": 9961 }, { "epoch": 0.8070317563188594, "grad_norm": 0.033907074481248856, "learning_rate": 0.0001773887213645979, "loss": 0.3672, "step": 9962 }, { "epoch": 0.8071127673363577, "grad_norm": 0.03336393088102341, "learning_rate": 0.00017738422071200325, "loss": 0.382, "step": 9963 }, { "epoch": 0.8071937783538561, "grad_norm": 0.03110332041978836, "learning_rate": 0.0001773797200594086, "loss": 0.3467, "step": 9964 }, { "epoch": 0.8072747893713546, "grad_norm": 0.0371004119515419, "learning_rate": 0.000177375219406814, "loss": 0.3596, "step": 9965 }, { "epoch": 0.8073558003888529, "grad_norm": 0.033193279057741165, "learning_rate": 0.00017737071875421938, "loss": 0.31, "step": 9966 }, { "epoch": 0.8074368114063513, "grad_norm": 0.030885837972164154, "learning_rate": 0.00017736621810162474, "loss": 0.3003, "step": 9967 }, { "epoch": 0.8075178224238496, "grad_norm": 0.033284615725278854, "learning_rate": 0.00017736171744903013, "loss": 0.3384, "step": 9968 }, { "epoch": 0.807598833441348, "grad_norm": 0.02768583968281746, "learning_rate": 0.0001773572167964355, "loss": 0.2796, "step": 9969 }, { "epoch": 0.8076798444588464, "grad_norm": 0.03884153440594673, "learning_rate": 0.00017735271614384085, "loss": 0.3976, "step": 9970 }, { "epoch": 0.8077608554763448, "grad_norm": 0.03400607034564018, "learning_rate": 0.00017734821549124626, "loss": 0.3267, "step": 9971 }, { "epoch": 0.8078418664938432, "grad_norm": 0.030479488894343376, "learning_rate": 0.00017734371483865162, "loss": 0.3018, "step": 9972 }, { "epoch": 0.8079228775113415, "grad_norm": 0.032650839537382126, "learning_rate": 0.00017733921418605698, "loss": 0.3255, "step": 9973 }, { "epoch": 0.80800388852884, "grad_norm": 0.03217211365699768, "learning_rate": 0.00017733471353346237, "loss": 0.3618, "step": 9974 }, { "epoch": 0.8080848995463383, "grad_norm": 0.030187807977199554, "learning_rate": 0.00017733021288086773, "loss": 0.3099, "step": 9975 }, { "epoch": 0.8081659105638367, "grad_norm": 0.027642568573355675, "learning_rate": 0.0001773257122282731, "loss": 0.3518, "step": 9976 }, { "epoch": 0.808246921581335, "grad_norm": 0.036619480699300766, "learning_rate": 0.0001773212115756785, "loss": 0.381, "step": 9977 }, { "epoch": 0.8083279325988334, "grad_norm": 0.03958069160580635, "learning_rate": 0.00017731671092308386, "loss": 0.4041, "step": 9978 }, { "epoch": 0.8084089436163319, "grad_norm": 0.03649488463997841, "learning_rate": 0.00017731221027048922, "loss": 0.3567, "step": 9979 }, { "epoch": 0.8084899546338302, "grad_norm": 0.03155497834086418, "learning_rate": 0.0001773077096178946, "loss": 0.3163, "step": 9980 }, { "epoch": 0.8085709656513286, "grad_norm": 0.02980252169072628, "learning_rate": 0.00017730320896529997, "loss": 0.3462, "step": 9981 }, { "epoch": 0.8086519766688269, "grad_norm": 0.03588375821709633, "learning_rate": 0.00017729870831270533, "loss": 0.3528, "step": 9982 }, { "epoch": 0.8087329876863253, "grad_norm": 0.031652361154556274, "learning_rate": 0.00017729420766011075, "loss": 0.3122, "step": 9983 }, { "epoch": 0.8088139987038238, "grad_norm": 0.03213665261864662, "learning_rate": 0.0001772897070075161, "loss": 0.3334, "step": 9984 }, { "epoch": 0.8088950097213221, "grad_norm": 0.03248204290866852, "learning_rate": 0.00017728520635492147, "loss": 0.3086, "step": 9985 }, { "epoch": 0.8089760207388205, "grad_norm": 0.03270760923624039, "learning_rate": 0.00017728070570232685, "loss": 0.3195, "step": 9986 }, { "epoch": 0.8090570317563188, "grad_norm": 0.035143230110406876, "learning_rate": 0.00017727620504973221, "loss": 0.3604, "step": 9987 }, { "epoch": 0.8091380427738173, "grad_norm": 0.030242938548326492, "learning_rate": 0.00017727170439713757, "loss": 0.3452, "step": 9988 }, { "epoch": 0.8092190537913156, "grad_norm": 0.031948208808898926, "learning_rate": 0.000177267203744543, "loss": 0.311, "step": 9989 }, { "epoch": 0.809300064808814, "grad_norm": 0.03433237969875336, "learning_rate": 0.00017726270309194835, "loss": 0.352, "step": 9990 }, { "epoch": 0.8093810758263124, "grad_norm": 0.036170594394207, "learning_rate": 0.0001772582024393537, "loss": 0.3987, "step": 9991 }, { "epoch": 0.8094620868438107, "grad_norm": 0.029423857107758522, "learning_rate": 0.0001772537017867591, "loss": 0.2831, "step": 9992 }, { "epoch": 0.8095430978613092, "grad_norm": 0.03154407814145088, "learning_rate": 0.00017724920113416446, "loss": 0.3293, "step": 9993 }, { "epoch": 0.8096241088788075, "grad_norm": 0.03517995402216911, "learning_rate": 0.00017724470048156982, "loss": 0.3704, "step": 9994 }, { "epoch": 0.8097051198963059, "grad_norm": 0.03546958789229393, "learning_rate": 0.00017724019982897523, "loss": 0.3842, "step": 9995 }, { "epoch": 0.8097861309138042, "grad_norm": 0.03441493958234787, "learning_rate": 0.0001772356991763806, "loss": 0.3228, "step": 9996 }, { "epoch": 0.8098671419313026, "grad_norm": 0.03107467293739319, "learning_rate": 0.00017723119852378595, "loss": 0.2757, "step": 9997 }, { "epoch": 0.8099481529488011, "grad_norm": 0.02883220836520195, "learning_rate": 0.00017722669787119134, "loss": 0.296, "step": 9998 }, { "epoch": 0.8100291639662994, "grad_norm": 0.030075686052441597, "learning_rate": 0.0001772221972185967, "loss": 0.3419, "step": 9999 }, { "epoch": 0.8101101749837978, "grad_norm": 0.03298826888203621, "learning_rate": 0.00017721769656600208, "loss": 0.3751, "step": 10000 }, { "epoch": 0.8101911860012961, "grad_norm": 0.03761919215321541, "learning_rate": 0.00017721319591340747, "loss": 0.3478, "step": 10001 }, { "epoch": 0.8102721970187946, "grad_norm": 0.03876710310578346, "learning_rate": 0.00017720869526081283, "loss": 0.3734, "step": 10002 }, { "epoch": 0.810353208036293, "grad_norm": 0.02835431881248951, "learning_rate": 0.0001772041946082182, "loss": 0.2926, "step": 10003 }, { "epoch": 0.8104342190537913, "grad_norm": 0.03384508937597275, "learning_rate": 0.00017719969395562358, "loss": 0.3082, "step": 10004 }, { "epoch": 0.8105152300712897, "grad_norm": 0.0412626713514328, "learning_rate": 0.00017719519330302894, "loss": 0.3824, "step": 10005 }, { "epoch": 0.810596241088788, "grad_norm": 0.027186481282114983, "learning_rate": 0.00017719069265043433, "loss": 0.2845, "step": 10006 }, { "epoch": 0.8106772521062865, "grad_norm": 0.03608010709285736, "learning_rate": 0.0001771861919978397, "loss": 0.343, "step": 10007 }, { "epoch": 0.8107582631237849, "grad_norm": 0.034267593175172806, "learning_rate": 0.00017718169134524507, "loss": 0.3317, "step": 10008 }, { "epoch": 0.8108392741412832, "grad_norm": 0.03193562477827072, "learning_rate": 0.00017717719069265043, "loss": 0.3081, "step": 10009 }, { "epoch": 0.8109202851587816, "grad_norm": 0.029407281428575516, "learning_rate": 0.00017717269004005582, "loss": 0.3008, "step": 10010 }, { "epoch": 0.81100129617628, "grad_norm": 0.0316649004817009, "learning_rate": 0.00017716818938746118, "loss": 0.3247, "step": 10011 }, { "epoch": 0.8110823071937784, "grad_norm": 0.03278086706995964, "learning_rate": 0.00017716368873486657, "loss": 0.3495, "step": 10012 }, { "epoch": 0.8111633182112767, "grad_norm": 0.02849617227911949, "learning_rate": 0.00017715918808227195, "loss": 0.3042, "step": 10013 }, { "epoch": 0.8112443292287751, "grad_norm": 0.03393520787358284, "learning_rate": 0.00017715468742967731, "loss": 0.3218, "step": 10014 }, { "epoch": 0.8113253402462735, "grad_norm": 0.03271503746509552, "learning_rate": 0.00017715018677708267, "loss": 0.329, "step": 10015 }, { "epoch": 0.8114063512637719, "grad_norm": 0.035968679934740067, "learning_rate": 0.00017714568612448806, "loss": 0.3237, "step": 10016 }, { "epoch": 0.8114873622812703, "grad_norm": 0.03288400173187256, "learning_rate": 0.00017714118547189342, "loss": 0.3547, "step": 10017 }, { "epoch": 0.8115683732987686, "grad_norm": 0.032152384519577026, "learning_rate": 0.0001771366848192988, "loss": 0.3109, "step": 10018 }, { "epoch": 0.811649384316267, "grad_norm": 0.03296571597456932, "learning_rate": 0.0001771321841667042, "loss": 0.3286, "step": 10019 }, { "epoch": 0.8117303953337653, "grad_norm": 0.03662797063589096, "learning_rate": 0.00017712768351410956, "loss": 0.3494, "step": 10020 }, { "epoch": 0.8118114063512638, "grad_norm": 0.030709531158208847, "learning_rate": 0.00017712318286151492, "loss": 0.3459, "step": 10021 }, { "epoch": 0.8118924173687622, "grad_norm": 0.03615014627575874, "learning_rate": 0.0001771186822089203, "loss": 0.3693, "step": 10022 }, { "epoch": 0.8119734283862605, "grad_norm": 0.034624602645635605, "learning_rate": 0.0001771141815563257, "loss": 0.3282, "step": 10023 }, { "epoch": 0.8120544394037589, "grad_norm": 0.03422831743955612, "learning_rate": 0.00017710968090373105, "loss": 0.3027, "step": 10024 }, { "epoch": 0.8121354504212573, "grad_norm": 0.03537493571639061, "learning_rate": 0.00017710518025113644, "loss": 0.357, "step": 10025 }, { "epoch": 0.8122164614387557, "grad_norm": 0.03175731748342514, "learning_rate": 0.0001771006795985418, "loss": 0.3397, "step": 10026 }, { "epoch": 0.812297472456254, "grad_norm": 0.0324130654335022, "learning_rate": 0.00017709617894594716, "loss": 0.3181, "step": 10027 }, { "epoch": 0.8123784834737524, "grad_norm": 0.03641660511493683, "learning_rate": 0.00017709167829335254, "loss": 0.3697, "step": 10028 }, { "epoch": 0.8124594944912508, "grad_norm": 0.028983507305383682, "learning_rate": 0.00017708717764075793, "loss": 0.2776, "step": 10029 }, { "epoch": 0.8125405055087492, "grad_norm": 0.02874094247817993, "learning_rate": 0.0001770826769881633, "loss": 0.3062, "step": 10030 }, { "epoch": 0.8126215165262476, "grad_norm": 0.03256585821509361, "learning_rate": 0.00017707817633556868, "loss": 0.2878, "step": 10031 }, { "epoch": 0.812702527543746, "grad_norm": 0.03371914476156235, "learning_rate": 0.00017707367568297404, "loss": 0.3744, "step": 10032 }, { "epoch": 0.8127835385612443, "grad_norm": 0.03364307060837746, "learning_rate": 0.0001770691750303794, "loss": 0.3373, "step": 10033 }, { "epoch": 0.8128645495787427, "grad_norm": 0.03251344710588455, "learning_rate": 0.0001770646743777848, "loss": 0.3503, "step": 10034 }, { "epoch": 0.8129455605962411, "grad_norm": 0.03772956505417824, "learning_rate": 0.00017706017372519017, "loss": 0.3693, "step": 10035 }, { "epoch": 0.8130265716137395, "grad_norm": 0.03383497893810272, "learning_rate": 0.00017705567307259553, "loss": 0.3337, "step": 10036 }, { "epoch": 0.8131075826312378, "grad_norm": 0.030101200565695763, "learning_rate": 0.00017705117242000092, "loss": 0.3006, "step": 10037 }, { "epoch": 0.8131885936487362, "grad_norm": 0.03550754860043526, "learning_rate": 0.00017704667176740628, "loss": 0.3391, "step": 10038 }, { "epoch": 0.8132696046662347, "grad_norm": 0.033610474318265915, "learning_rate": 0.00017704217111481164, "loss": 0.3198, "step": 10039 }, { "epoch": 0.813350615683733, "grad_norm": 0.033679138869047165, "learning_rate": 0.00017703767046221703, "loss": 0.3716, "step": 10040 }, { "epoch": 0.8134316267012314, "grad_norm": 0.03023621067404747, "learning_rate": 0.00017703316980962242, "loss": 0.3359, "step": 10041 }, { "epoch": 0.8135126377187297, "grad_norm": 0.030499301850795746, "learning_rate": 0.00017702866915702778, "loss": 0.2694, "step": 10042 }, { "epoch": 0.8135936487362281, "grad_norm": 0.03403441235423088, "learning_rate": 0.00017702416850443316, "loss": 0.352, "step": 10043 }, { "epoch": 0.8136746597537265, "grad_norm": 0.02972903847694397, "learning_rate": 0.00017701966785183852, "loss": 0.298, "step": 10044 }, { "epoch": 0.8137556707712249, "grad_norm": 0.03293878212571144, "learning_rate": 0.00017701516719924388, "loss": 0.3617, "step": 10045 }, { "epoch": 0.8138366817887233, "grad_norm": 0.03338270261883736, "learning_rate": 0.00017701066654664927, "loss": 0.3154, "step": 10046 }, { "epoch": 0.8139176928062216, "grad_norm": 0.03914094343781471, "learning_rate": 0.00017700616589405466, "loss": 0.3046, "step": 10047 }, { "epoch": 0.81399870382372, "grad_norm": 0.03613367676734924, "learning_rate": 0.00017700166524146002, "loss": 0.3574, "step": 10048 }, { "epoch": 0.8140797148412184, "grad_norm": 0.04005276784300804, "learning_rate": 0.0001769971645888654, "loss": 0.3526, "step": 10049 }, { "epoch": 0.8141607258587168, "grad_norm": 0.032530419528484344, "learning_rate": 0.00017699266393627076, "loss": 0.2935, "step": 10050 }, { "epoch": 0.8142417368762151, "grad_norm": 0.02893023192882538, "learning_rate": 0.00017698816328367612, "loss": 0.2801, "step": 10051 }, { "epoch": 0.8143227478937135, "grad_norm": 0.03815500810742378, "learning_rate": 0.00017698366263108154, "loss": 0.3827, "step": 10052 }, { "epoch": 0.814403758911212, "grad_norm": 0.03357556834816933, "learning_rate": 0.0001769791619784869, "loss": 0.3487, "step": 10053 }, { "epoch": 0.8144847699287103, "grad_norm": 0.03304484114050865, "learning_rate": 0.00017697466132589226, "loss": 0.36, "step": 10054 }, { "epoch": 0.8145657809462087, "grad_norm": 0.03409358486533165, "learning_rate": 0.00017697016067329765, "loss": 0.3653, "step": 10055 }, { "epoch": 0.814646791963707, "grad_norm": 0.0298635121434927, "learning_rate": 0.000176965660020703, "loss": 0.2991, "step": 10056 }, { "epoch": 0.8147278029812054, "grad_norm": 0.03272094205021858, "learning_rate": 0.00017696115936810837, "loss": 0.3561, "step": 10057 }, { "epoch": 0.8148088139987039, "grad_norm": 0.031532127410173416, "learning_rate": 0.00017695665871551378, "loss": 0.3426, "step": 10058 }, { "epoch": 0.8148898250162022, "grad_norm": 0.029812676832079887, "learning_rate": 0.00017695215806291914, "loss": 0.3313, "step": 10059 }, { "epoch": 0.8149708360337006, "grad_norm": 0.032686151564121246, "learning_rate": 0.0001769476574103245, "loss": 0.3582, "step": 10060 }, { "epoch": 0.8150518470511989, "grad_norm": 0.02958156354725361, "learning_rate": 0.0001769431567577299, "loss": 0.3346, "step": 10061 }, { "epoch": 0.8151328580686974, "grad_norm": 0.03275395929813385, "learning_rate": 0.00017693865610513525, "loss": 0.3527, "step": 10062 }, { "epoch": 0.8152138690861958, "grad_norm": 0.03696022182703018, "learning_rate": 0.0001769341554525406, "loss": 0.3439, "step": 10063 }, { "epoch": 0.8152948801036941, "grad_norm": 0.036947984248399734, "learning_rate": 0.00017692965479994602, "loss": 0.3293, "step": 10064 }, { "epoch": 0.8153758911211925, "grad_norm": 0.034265387803316116, "learning_rate": 0.00017692515414735138, "loss": 0.3234, "step": 10065 }, { "epoch": 0.8154569021386908, "grad_norm": 0.03201863914728165, "learning_rate": 0.00017692065349475674, "loss": 0.3489, "step": 10066 }, { "epoch": 0.8155379131561893, "grad_norm": 0.0339871384203434, "learning_rate": 0.00017691615284216213, "loss": 0.3307, "step": 10067 }, { "epoch": 0.8156189241736876, "grad_norm": 0.0365617498755455, "learning_rate": 0.0001769116521895675, "loss": 0.3246, "step": 10068 }, { "epoch": 0.815699935191186, "grad_norm": 0.032406214624643326, "learning_rate": 0.00017690715153697285, "loss": 0.3279, "step": 10069 }, { "epoch": 0.8157809462086844, "grad_norm": 0.034895364195108414, "learning_rate": 0.00017690265088437826, "loss": 0.3606, "step": 10070 }, { "epoch": 0.8158619572261827, "grad_norm": 0.03705539554357529, "learning_rate": 0.00017689815023178362, "loss": 0.3373, "step": 10071 }, { "epoch": 0.8159429682436812, "grad_norm": 0.03331300616264343, "learning_rate": 0.00017689364957918898, "loss": 0.3623, "step": 10072 }, { "epoch": 0.8160239792611795, "grad_norm": 0.03506195545196533, "learning_rate": 0.00017688914892659437, "loss": 0.3565, "step": 10073 }, { "epoch": 0.8161049902786779, "grad_norm": 0.033705681562423706, "learning_rate": 0.00017688464827399973, "loss": 0.3304, "step": 10074 }, { "epoch": 0.8161860012961762, "grad_norm": 0.03693459555506706, "learning_rate": 0.0001768801476214051, "loss": 0.4013, "step": 10075 }, { "epoch": 0.8162670123136747, "grad_norm": 0.034374382346868515, "learning_rate": 0.0001768756469688105, "loss": 0.3793, "step": 10076 }, { "epoch": 0.8163480233311731, "grad_norm": 0.029740922152996063, "learning_rate": 0.00017687114631621587, "loss": 0.3023, "step": 10077 }, { "epoch": 0.8164290343486714, "grad_norm": 0.03397730737924576, "learning_rate": 0.00017686664566362123, "loss": 0.3225, "step": 10078 }, { "epoch": 0.8165100453661698, "grad_norm": 0.03439175710082054, "learning_rate": 0.0001768621450110266, "loss": 0.3202, "step": 10079 }, { "epoch": 0.8165910563836681, "grad_norm": 0.03943687304854393, "learning_rate": 0.00017685764435843197, "loss": 0.3415, "step": 10080 }, { "epoch": 0.8166720674011666, "grad_norm": 0.03075510449707508, "learning_rate": 0.00017685314370583736, "loss": 0.3429, "step": 10081 }, { "epoch": 0.816753078418665, "grad_norm": 0.03713309392333031, "learning_rate": 0.00017684864305324275, "loss": 0.3386, "step": 10082 }, { "epoch": 0.8168340894361633, "grad_norm": 0.03184698894619942, "learning_rate": 0.0001768441424006481, "loss": 0.3186, "step": 10083 }, { "epoch": 0.8169151004536617, "grad_norm": 0.028865061700344086, "learning_rate": 0.00017683964174805347, "loss": 0.287, "step": 10084 }, { "epoch": 0.81699611147116, "grad_norm": 0.02944747731089592, "learning_rate": 0.00017683514109545885, "loss": 0.2863, "step": 10085 }, { "epoch": 0.8170771224886585, "grad_norm": 0.03916551172733307, "learning_rate": 0.00017683064044286421, "loss": 0.3541, "step": 10086 }, { "epoch": 0.8171581335061568, "grad_norm": 0.030422579497098923, "learning_rate": 0.0001768261397902696, "loss": 0.3448, "step": 10087 }, { "epoch": 0.8172391445236552, "grad_norm": 0.03345503285527229, "learning_rate": 0.000176821639137675, "loss": 0.3954, "step": 10088 }, { "epoch": 0.8173201555411536, "grad_norm": 0.03584202006459236, "learning_rate": 0.00017681713848508035, "loss": 0.3315, "step": 10089 }, { "epoch": 0.817401166558652, "grad_norm": 0.03900199756026268, "learning_rate": 0.0001768126378324857, "loss": 0.382, "step": 10090 }, { "epoch": 0.8174821775761504, "grad_norm": 0.03155552223324776, "learning_rate": 0.0001768081371798911, "loss": 0.3376, "step": 10091 }, { "epoch": 0.8175631885936487, "grad_norm": 0.03717714920639992, "learning_rate": 0.00017680363652729646, "loss": 0.3707, "step": 10092 }, { "epoch": 0.8176441996111471, "grad_norm": 0.036984365433454514, "learning_rate": 0.00017679913587470184, "loss": 0.3643, "step": 10093 }, { "epoch": 0.8177252106286454, "grad_norm": 0.03007912077009678, "learning_rate": 0.00017679463522210723, "loss": 0.3161, "step": 10094 }, { "epoch": 0.8178062216461439, "grad_norm": 0.030122673138976097, "learning_rate": 0.0001767901345695126, "loss": 0.3519, "step": 10095 }, { "epoch": 0.8178872326636423, "grad_norm": 0.033813752233982086, "learning_rate": 0.00017678563391691795, "loss": 0.3217, "step": 10096 }, { "epoch": 0.8179682436811406, "grad_norm": 0.03976140543818474, "learning_rate": 0.00017678113326432334, "loss": 0.3755, "step": 10097 }, { "epoch": 0.818049254698639, "grad_norm": 0.02783932164311409, "learning_rate": 0.0001767766326117287, "loss": 0.2846, "step": 10098 }, { "epoch": 0.8181302657161373, "grad_norm": 0.03258772939443588, "learning_rate": 0.00017677213195913408, "loss": 0.329, "step": 10099 }, { "epoch": 0.8182112767336358, "grad_norm": 0.02766154520213604, "learning_rate": 0.00017676763130653947, "loss": 0.3437, "step": 10100 }, { "epoch": 0.8182922877511342, "grad_norm": 0.03032521903514862, "learning_rate": 0.00017676313065394483, "loss": 0.3026, "step": 10101 }, { "epoch": 0.8183732987686325, "grad_norm": 0.03306029364466667, "learning_rate": 0.0001767586300013502, "loss": 0.3731, "step": 10102 }, { "epoch": 0.8184543097861309, "grad_norm": 0.03311099484562874, "learning_rate": 0.00017675412934875558, "loss": 0.3349, "step": 10103 }, { "epoch": 0.8185353208036293, "grad_norm": 0.03536633029580116, "learning_rate": 0.00017674962869616097, "loss": 0.3336, "step": 10104 }, { "epoch": 0.8186163318211277, "grad_norm": 0.034599754959344864, "learning_rate": 0.00017674512804356633, "loss": 0.3893, "step": 10105 }, { "epoch": 0.818697342838626, "grad_norm": 0.035283416509628296, "learning_rate": 0.0001767406273909717, "loss": 0.3381, "step": 10106 }, { "epoch": 0.8187783538561244, "grad_norm": 0.035050179809331894, "learning_rate": 0.00017673612673837707, "loss": 0.356, "step": 10107 }, { "epoch": 0.8188593648736228, "grad_norm": 0.032310303300619125, "learning_rate": 0.00017673162608578243, "loss": 0.3146, "step": 10108 }, { "epoch": 0.8189403758911212, "grad_norm": 0.03521250560879707, "learning_rate": 0.00017672712543318782, "loss": 0.3217, "step": 10109 }, { "epoch": 0.8190213869086196, "grad_norm": 0.0328909195959568, "learning_rate": 0.0001767226247805932, "loss": 0.3146, "step": 10110 }, { "epoch": 0.8191023979261179, "grad_norm": 0.027794158086180687, "learning_rate": 0.00017671812412799857, "loss": 0.3128, "step": 10111 }, { "epoch": 0.8191834089436163, "grad_norm": 0.032471973448991776, "learning_rate": 0.00017671362347540395, "loss": 0.3042, "step": 10112 }, { "epoch": 0.8192644199611148, "grad_norm": 0.03750569745898247, "learning_rate": 0.00017670912282280931, "loss": 0.3869, "step": 10113 }, { "epoch": 0.8193454309786131, "grad_norm": 0.035073671489953995, "learning_rate": 0.00017670462217021467, "loss": 0.3423, "step": 10114 }, { "epoch": 0.8194264419961115, "grad_norm": 0.033087559044361115, "learning_rate": 0.00017670012151762006, "loss": 0.3588, "step": 10115 }, { "epoch": 0.8195074530136098, "grad_norm": 0.030937805771827698, "learning_rate": 0.00017669562086502545, "loss": 0.3604, "step": 10116 }, { "epoch": 0.8195884640311082, "grad_norm": 0.03352980315685272, "learning_rate": 0.0001766911202124308, "loss": 0.3375, "step": 10117 }, { "epoch": 0.8196694750486067, "grad_norm": 0.030550308525562286, "learning_rate": 0.0001766866195598362, "loss": 0.316, "step": 10118 }, { "epoch": 0.819750486066105, "grad_norm": 0.030858267098665237, "learning_rate": 0.00017668211890724156, "loss": 0.3223, "step": 10119 }, { "epoch": 0.8198314970836034, "grad_norm": 0.03076436184346676, "learning_rate": 0.00017667761825464692, "loss": 0.3202, "step": 10120 }, { "epoch": 0.8199125081011017, "grad_norm": 0.029977362602949142, "learning_rate": 0.0001766731176020523, "loss": 0.3316, "step": 10121 }, { "epoch": 0.8199935191186001, "grad_norm": 0.03315039351582527, "learning_rate": 0.0001766686169494577, "loss": 0.3452, "step": 10122 }, { "epoch": 0.8200745301360985, "grad_norm": 0.0308857299387455, "learning_rate": 0.00017666411629686305, "loss": 0.3231, "step": 10123 }, { "epoch": 0.8201555411535969, "grad_norm": 0.036206457763910294, "learning_rate": 0.00017665961564426844, "loss": 0.358, "step": 10124 }, { "epoch": 0.8202365521710953, "grad_norm": 0.037194930016994476, "learning_rate": 0.0001766551149916738, "loss": 0.3359, "step": 10125 }, { "epoch": 0.8203175631885936, "grad_norm": 0.032011616975069046, "learning_rate": 0.00017665061433907916, "loss": 0.3539, "step": 10126 }, { "epoch": 0.8203985742060921, "grad_norm": 0.03428258001804352, "learning_rate": 0.00017664611368648455, "loss": 0.3126, "step": 10127 }, { "epoch": 0.8204795852235904, "grad_norm": 0.03403123840689659, "learning_rate": 0.00017664161303388993, "loss": 0.3002, "step": 10128 }, { "epoch": 0.8205605962410888, "grad_norm": 0.03198724612593651, "learning_rate": 0.0001766371123812953, "loss": 0.3712, "step": 10129 }, { "epoch": 0.8206416072585871, "grad_norm": 0.036207620054483414, "learning_rate": 0.00017663261172870068, "loss": 0.3321, "step": 10130 }, { "epoch": 0.8207226182760855, "grad_norm": 0.029472071677446365, "learning_rate": 0.00017662811107610604, "loss": 0.3188, "step": 10131 }, { "epoch": 0.820803629293584, "grad_norm": 0.035696543753147125, "learning_rate": 0.0001766236104235114, "loss": 0.3607, "step": 10132 }, { "epoch": 0.8208846403110823, "grad_norm": 0.03706861287355423, "learning_rate": 0.00017661910977091681, "loss": 0.3192, "step": 10133 }, { "epoch": 0.8209656513285807, "grad_norm": 0.03445323556661606, "learning_rate": 0.00017661460911832217, "loss": 0.3363, "step": 10134 }, { "epoch": 0.821046662346079, "grad_norm": 0.0338570699095726, "learning_rate": 0.00017661010846572753, "loss": 0.4025, "step": 10135 }, { "epoch": 0.8211276733635774, "grad_norm": 0.032541487365961075, "learning_rate": 0.00017660560781313292, "loss": 0.3042, "step": 10136 }, { "epoch": 0.8212086843810759, "grad_norm": 0.03224986419081688, "learning_rate": 0.00017660110716053828, "loss": 0.3385, "step": 10137 }, { "epoch": 0.8212896953985742, "grad_norm": 0.03629942238330841, "learning_rate": 0.00017659660650794364, "loss": 0.3329, "step": 10138 }, { "epoch": 0.8213707064160726, "grad_norm": 0.02894359454512596, "learning_rate": 0.00017659210585534906, "loss": 0.2813, "step": 10139 }, { "epoch": 0.8214517174335709, "grad_norm": 0.033627405762672424, "learning_rate": 0.00017658760520275442, "loss": 0.3437, "step": 10140 }, { "epoch": 0.8215327284510694, "grad_norm": 0.03281831368803978, "learning_rate": 0.00017658310455015978, "loss": 0.3184, "step": 10141 }, { "epoch": 0.8216137394685677, "grad_norm": 0.03470015153288841, "learning_rate": 0.00017657860389756516, "loss": 0.3477, "step": 10142 }, { "epoch": 0.8216947504860661, "grad_norm": 0.03491885960102081, "learning_rate": 0.00017657410324497052, "loss": 0.3619, "step": 10143 }, { "epoch": 0.8217757615035645, "grad_norm": 0.03157167136669159, "learning_rate": 0.00017656960259237588, "loss": 0.2922, "step": 10144 }, { "epoch": 0.8218567725210628, "grad_norm": 0.030484763905405998, "learning_rate": 0.0001765651019397813, "loss": 0.3078, "step": 10145 }, { "epoch": 0.8219377835385613, "grad_norm": 0.030474934726953506, "learning_rate": 0.00017656060128718666, "loss": 0.3291, "step": 10146 }, { "epoch": 0.8220187945560596, "grad_norm": 0.030692746862769127, "learning_rate": 0.00017655610063459202, "loss": 0.3524, "step": 10147 }, { "epoch": 0.822099805573558, "grad_norm": 0.0413612499833107, "learning_rate": 0.0001765515999819974, "loss": 0.3379, "step": 10148 }, { "epoch": 0.8221808165910564, "grad_norm": 0.03265791013836861, "learning_rate": 0.00017654709932940276, "loss": 0.3039, "step": 10149 }, { "epoch": 0.8222618276085548, "grad_norm": 0.03477988764643669, "learning_rate": 0.00017654259867680812, "loss": 0.4187, "step": 10150 }, { "epoch": 0.8223428386260532, "grad_norm": 0.03015865385532379, "learning_rate": 0.00017653809802421354, "loss": 0.2651, "step": 10151 }, { "epoch": 0.8224238496435515, "grad_norm": 0.03432740271091461, "learning_rate": 0.0001765335973716189, "loss": 0.3499, "step": 10152 }, { "epoch": 0.8225048606610499, "grad_norm": 0.033863767981529236, "learning_rate": 0.00017652909671902426, "loss": 0.3242, "step": 10153 }, { "epoch": 0.8225858716785482, "grad_norm": 0.03144434094429016, "learning_rate": 0.00017652459606642965, "loss": 0.3114, "step": 10154 }, { "epoch": 0.8226668826960467, "grad_norm": 0.03368362784385681, "learning_rate": 0.000176520095413835, "loss": 0.3216, "step": 10155 }, { "epoch": 0.8227478937135451, "grad_norm": 0.032564952969551086, "learning_rate": 0.0001765155947612404, "loss": 0.3055, "step": 10156 }, { "epoch": 0.8228289047310434, "grad_norm": 0.03535742685198784, "learning_rate": 0.00017651109410864578, "loss": 0.3472, "step": 10157 }, { "epoch": 0.8229099157485418, "grad_norm": 0.033608000725507736, "learning_rate": 0.00017650659345605114, "loss": 0.3403, "step": 10158 }, { "epoch": 0.8229909267660401, "grad_norm": 0.03379244729876518, "learning_rate": 0.0001765020928034565, "loss": 0.3519, "step": 10159 }, { "epoch": 0.8230719377835386, "grad_norm": 0.03284695744514465, "learning_rate": 0.0001764975921508619, "loss": 0.3404, "step": 10160 }, { "epoch": 0.823152948801037, "grad_norm": 0.03292253240942955, "learning_rate": 0.00017649309149826725, "loss": 0.3426, "step": 10161 }, { "epoch": 0.8232339598185353, "grad_norm": 0.03824080526828766, "learning_rate": 0.00017648859084567263, "loss": 0.3025, "step": 10162 }, { "epoch": 0.8233149708360337, "grad_norm": 0.033393364399671555, "learning_rate": 0.00017648409019307802, "loss": 0.3542, "step": 10163 }, { "epoch": 0.8233959818535321, "grad_norm": 0.03220822662115097, "learning_rate": 0.00017647958954048338, "loss": 0.3412, "step": 10164 }, { "epoch": 0.8234769928710305, "grad_norm": 0.033468231558799744, "learning_rate": 0.00017647508888788874, "loss": 0.3682, "step": 10165 }, { "epoch": 0.8235580038885288, "grad_norm": 0.03393976017832756, "learning_rate": 0.00017647058823529413, "loss": 0.3251, "step": 10166 }, { "epoch": 0.8236390149060272, "grad_norm": 0.03448769450187683, "learning_rate": 0.0001764660875826995, "loss": 0.3865, "step": 10167 }, { "epoch": 0.8237200259235256, "grad_norm": 0.03279362991452217, "learning_rate": 0.00017646158693010488, "loss": 0.3293, "step": 10168 }, { "epoch": 0.823801036941024, "grad_norm": 0.03309918940067291, "learning_rate": 0.00017645708627751026, "loss": 0.3241, "step": 10169 }, { "epoch": 0.8238820479585224, "grad_norm": 0.03760819882154465, "learning_rate": 0.00017645258562491562, "loss": 0.3705, "step": 10170 }, { "epoch": 0.8239630589760207, "grad_norm": 0.03316948935389519, "learning_rate": 0.00017644808497232098, "loss": 0.3367, "step": 10171 }, { "epoch": 0.8240440699935191, "grad_norm": 0.034577999264001846, "learning_rate": 0.00017644358431972637, "loss": 0.3283, "step": 10172 }, { "epoch": 0.8241250810110174, "grad_norm": 0.031563322991132736, "learning_rate": 0.00017643908366713173, "loss": 0.3797, "step": 10173 }, { "epoch": 0.8242060920285159, "grad_norm": 0.03220498561859131, "learning_rate": 0.00017643458301453712, "loss": 0.2818, "step": 10174 }, { "epoch": 0.8242871030460143, "grad_norm": 0.036133624613285065, "learning_rate": 0.0001764300823619425, "loss": 0.3076, "step": 10175 }, { "epoch": 0.8243681140635126, "grad_norm": 0.0324624739587307, "learning_rate": 0.00017642558170934787, "loss": 0.3597, "step": 10176 }, { "epoch": 0.824449125081011, "grad_norm": 0.0329521968960762, "learning_rate": 0.00017642108105675323, "loss": 0.3029, "step": 10177 }, { "epoch": 0.8245301360985094, "grad_norm": 0.03469759225845337, "learning_rate": 0.0001764165804041586, "loss": 0.3496, "step": 10178 }, { "epoch": 0.8246111471160078, "grad_norm": 0.03664855659008026, "learning_rate": 0.00017641207975156397, "loss": 0.3336, "step": 10179 }, { "epoch": 0.8246921581335062, "grad_norm": 0.03370808809995651, "learning_rate": 0.00017640757909896936, "loss": 0.3523, "step": 10180 }, { "epoch": 0.8247731691510045, "grad_norm": 0.029433516785502434, "learning_rate": 0.00017640307844637475, "loss": 0.3281, "step": 10181 }, { "epoch": 0.8248541801685029, "grad_norm": 0.029438281431794167, "learning_rate": 0.0001763985777937801, "loss": 0.2875, "step": 10182 }, { "epoch": 0.8249351911860013, "grad_norm": 0.03132937103509903, "learning_rate": 0.00017639407714118547, "loss": 0.3136, "step": 10183 }, { "epoch": 0.8250162022034997, "grad_norm": 0.03620680794119835, "learning_rate": 0.00017638957648859085, "loss": 0.3434, "step": 10184 }, { "epoch": 0.825097213220998, "grad_norm": 0.031460925936698914, "learning_rate": 0.00017638507583599624, "loss": 0.3506, "step": 10185 }, { "epoch": 0.8251782242384964, "grad_norm": 0.03671254590153694, "learning_rate": 0.0001763805751834016, "loss": 0.3396, "step": 10186 }, { "epoch": 0.8252592352559948, "grad_norm": 0.040558792650699615, "learning_rate": 0.000176376074530807, "loss": 0.371, "step": 10187 }, { "epoch": 0.8253402462734932, "grad_norm": 0.040832262486219406, "learning_rate": 0.00017637157387821235, "loss": 0.3513, "step": 10188 }, { "epoch": 0.8254212572909916, "grad_norm": 0.030765267089009285, "learning_rate": 0.0001763670732256177, "loss": 0.3731, "step": 10189 }, { "epoch": 0.8255022683084899, "grad_norm": 0.032495588064193726, "learning_rate": 0.0001763625725730231, "loss": 0.3384, "step": 10190 }, { "epoch": 0.8255832793259883, "grad_norm": 0.035736821591854095, "learning_rate": 0.00017635807192042848, "loss": 0.3452, "step": 10191 }, { "epoch": 0.8256642903434868, "grad_norm": 0.03253974765539169, "learning_rate": 0.00017635357126783384, "loss": 0.3396, "step": 10192 }, { "epoch": 0.8257453013609851, "grad_norm": 0.033621449023485184, "learning_rate": 0.00017634907061523923, "loss": 0.3445, "step": 10193 }, { "epoch": 0.8258263123784835, "grad_norm": 0.035593949258327484, "learning_rate": 0.0001763445699626446, "loss": 0.3359, "step": 10194 }, { "epoch": 0.8259073233959818, "grad_norm": 0.03543344885110855, "learning_rate": 0.00017634006931004995, "loss": 0.3429, "step": 10195 }, { "epoch": 0.8259883344134802, "grad_norm": 0.03207477554678917, "learning_rate": 0.00017633556865745534, "loss": 0.3537, "step": 10196 }, { "epoch": 0.8260693454309787, "grad_norm": 0.03806965425610542, "learning_rate": 0.00017633106800486072, "loss": 0.317, "step": 10197 }, { "epoch": 0.826150356448477, "grad_norm": 0.048110056668519974, "learning_rate": 0.00017632656735226608, "loss": 0.3788, "step": 10198 }, { "epoch": 0.8262313674659754, "grad_norm": 0.034069642424583435, "learning_rate": 0.00017632206669967147, "loss": 0.3338, "step": 10199 }, { "epoch": 0.8263123784834737, "grad_norm": 0.03619742393493652, "learning_rate": 0.00017631756604707683, "loss": 0.344, "step": 10200 }, { "epoch": 0.8263933895009722, "grad_norm": 0.03356582671403885, "learning_rate": 0.0001763130653944822, "loss": 0.3025, "step": 10201 }, { "epoch": 0.8264744005184705, "grad_norm": 0.030345069244503975, "learning_rate": 0.00017630856474188758, "loss": 0.3266, "step": 10202 }, { "epoch": 0.8265554115359689, "grad_norm": 0.029872342944145203, "learning_rate": 0.00017630406408929297, "loss": 0.3091, "step": 10203 }, { "epoch": 0.8266364225534673, "grad_norm": 0.03376317024230957, "learning_rate": 0.00017629956343669833, "loss": 0.3665, "step": 10204 }, { "epoch": 0.8267174335709656, "grad_norm": 0.03272249549627304, "learning_rate": 0.0001762950627841037, "loss": 0.3275, "step": 10205 }, { "epoch": 0.8267984445884641, "grad_norm": 0.035184647887945175, "learning_rate": 0.00017629056213150907, "loss": 0.367, "step": 10206 }, { "epoch": 0.8268794556059624, "grad_norm": 0.02668853849172592, "learning_rate": 0.00017628606147891443, "loss": 0.2825, "step": 10207 }, { "epoch": 0.8269604666234608, "grad_norm": 0.0330558642745018, "learning_rate": 0.00017628156082631982, "loss": 0.3166, "step": 10208 }, { "epoch": 0.8270414776409591, "grad_norm": 0.03391389548778534, "learning_rate": 0.0001762770601737252, "loss": 0.3426, "step": 10209 }, { "epoch": 0.8271224886584575, "grad_norm": 0.03699477016925812, "learning_rate": 0.00017627255952113057, "loss": 0.3551, "step": 10210 }, { "epoch": 0.827203499675956, "grad_norm": 0.028039144352078438, "learning_rate": 0.00017626805886853595, "loss": 0.2904, "step": 10211 }, { "epoch": 0.8272845106934543, "grad_norm": 0.031306397169828415, "learning_rate": 0.00017626355821594132, "loss": 0.3135, "step": 10212 }, { "epoch": 0.8273655217109527, "grad_norm": 0.032856930047273636, "learning_rate": 0.00017625905756334668, "loss": 0.3002, "step": 10213 }, { "epoch": 0.827446532728451, "grad_norm": 0.032040588557720184, "learning_rate": 0.0001762545569107521, "loss": 0.329, "step": 10214 }, { "epoch": 0.8275275437459495, "grad_norm": 0.03887049853801727, "learning_rate": 0.00017625005625815745, "loss": 0.3489, "step": 10215 }, { "epoch": 0.8276085547634479, "grad_norm": 0.03505093976855278, "learning_rate": 0.0001762455556055628, "loss": 0.3306, "step": 10216 }, { "epoch": 0.8276895657809462, "grad_norm": 0.032911516726017, "learning_rate": 0.0001762410549529682, "loss": 0.3399, "step": 10217 }, { "epoch": 0.8277705767984446, "grad_norm": 0.029951702803373337, "learning_rate": 0.00017623655430037356, "loss": 0.2972, "step": 10218 }, { "epoch": 0.8278515878159429, "grad_norm": 0.039747558534145355, "learning_rate": 0.00017623205364777892, "loss": 0.3619, "step": 10219 }, { "epoch": 0.8279325988334414, "grad_norm": 0.03002556785941124, "learning_rate": 0.00017622755299518433, "loss": 0.3075, "step": 10220 }, { "epoch": 0.8280136098509397, "grad_norm": 0.031569890677928925, "learning_rate": 0.0001762230523425897, "loss": 0.2906, "step": 10221 }, { "epoch": 0.8280946208684381, "grad_norm": 0.036268286406993866, "learning_rate": 0.00017621855168999505, "loss": 0.365, "step": 10222 }, { "epoch": 0.8281756318859365, "grad_norm": 0.0369952954351902, "learning_rate": 0.00017621405103740044, "loss": 0.341, "step": 10223 }, { "epoch": 0.8282566429034348, "grad_norm": 0.03524234518408775, "learning_rate": 0.0001762095503848058, "loss": 0.3623, "step": 10224 }, { "epoch": 0.8283376539209333, "grad_norm": 0.03595670685172081, "learning_rate": 0.00017620504973221116, "loss": 0.3357, "step": 10225 }, { "epoch": 0.8284186649384316, "grad_norm": 0.03600858896970749, "learning_rate": 0.00017620054907961657, "loss": 0.3593, "step": 10226 }, { "epoch": 0.82849967595593, "grad_norm": 0.0327393002808094, "learning_rate": 0.00017619604842702193, "loss": 0.3542, "step": 10227 }, { "epoch": 0.8285806869734283, "grad_norm": 0.03379761055111885, "learning_rate": 0.0001761915477744273, "loss": 0.3669, "step": 10228 }, { "epoch": 0.8286616979909268, "grad_norm": 0.03441119194030762, "learning_rate": 0.00017618704712183268, "loss": 0.3492, "step": 10229 }, { "epoch": 0.8287427090084252, "grad_norm": 0.03550002723932266, "learning_rate": 0.00017618254646923804, "loss": 0.3634, "step": 10230 }, { "epoch": 0.8288237200259235, "grad_norm": 0.029466258361935616, "learning_rate": 0.0001761780458166434, "loss": 0.3013, "step": 10231 }, { "epoch": 0.8289047310434219, "grad_norm": 0.03521445021033287, "learning_rate": 0.00017617354516404881, "loss": 0.3302, "step": 10232 }, { "epoch": 0.8289857420609202, "grad_norm": 0.03384638950228691, "learning_rate": 0.00017616904451145417, "loss": 0.3117, "step": 10233 }, { "epoch": 0.8290667530784187, "grad_norm": 0.03205539658665657, "learning_rate": 0.00017616454385885953, "loss": 0.2758, "step": 10234 }, { "epoch": 0.8291477640959171, "grad_norm": 0.034657254815101624, "learning_rate": 0.00017616004320626492, "loss": 0.3505, "step": 10235 }, { "epoch": 0.8292287751134154, "grad_norm": 0.03520963340997696, "learning_rate": 0.00017615554255367028, "loss": 0.3589, "step": 10236 }, { "epoch": 0.8293097861309138, "grad_norm": 0.03619067370891571, "learning_rate": 0.00017615104190107567, "loss": 0.3584, "step": 10237 }, { "epoch": 0.8293907971484121, "grad_norm": 0.03255670145153999, "learning_rate": 0.00017614654124848106, "loss": 0.3254, "step": 10238 }, { "epoch": 0.8294718081659106, "grad_norm": 0.037719033658504486, "learning_rate": 0.00017614204059588642, "loss": 0.3668, "step": 10239 }, { "epoch": 0.829552819183409, "grad_norm": 0.03547034412622452, "learning_rate": 0.00017613753994329178, "loss": 0.348, "step": 10240 }, { "epoch": 0.8296338302009073, "grad_norm": 0.03199911117553711, "learning_rate": 0.00017613303929069716, "loss": 0.346, "step": 10241 }, { "epoch": 0.8297148412184057, "grad_norm": 0.032698921859264374, "learning_rate": 0.00017612853863810252, "loss": 0.3688, "step": 10242 }, { "epoch": 0.8297958522359041, "grad_norm": 0.03255463391542435, "learning_rate": 0.0001761240379855079, "loss": 0.3056, "step": 10243 }, { "epoch": 0.8298768632534025, "grad_norm": 0.03685171157121658, "learning_rate": 0.0001761195373329133, "loss": 0.3088, "step": 10244 }, { "epoch": 0.8299578742709008, "grad_norm": 0.035410698503255844, "learning_rate": 0.00017611503668031866, "loss": 0.3413, "step": 10245 }, { "epoch": 0.8300388852883992, "grad_norm": 0.036075409501791, "learning_rate": 0.00017611053602772402, "loss": 0.3515, "step": 10246 }, { "epoch": 0.8301198963058976, "grad_norm": 0.04027034714818001, "learning_rate": 0.0001761060353751294, "loss": 0.3424, "step": 10247 }, { "epoch": 0.830200907323396, "grad_norm": 0.03788850083947182, "learning_rate": 0.00017610153472253476, "loss": 0.3113, "step": 10248 }, { "epoch": 0.8302819183408944, "grad_norm": 0.030312340706586838, "learning_rate": 0.00017609703406994015, "loss": 0.3107, "step": 10249 }, { "epoch": 0.8303629293583927, "grad_norm": 0.0325654000043869, "learning_rate": 0.00017609253341734554, "loss": 0.3605, "step": 10250 }, { "epoch": 0.8304439403758911, "grad_norm": 0.032030895352363586, "learning_rate": 0.0001760880327647509, "loss": 0.3274, "step": 10251 }, { "epoch": 0.8305249513933896, "grad_norm": 0.03195015341043472, "learning_rate": 0.00017608353211215626, "loss": 0.3199, "step": 10252 }, { "epoch": 0.8306059624108879, "grad_norm": 0.03378359600901604, "learning_rate": 0.00017607903145956165, "loss": 0.3425, "step": 10253 }, { "epoch": 0.8306869734283863, "grad_norm": 0.030721347779035568, "learning_rate": 0.000176074530806967, "loss": 0.3134, "step": 10254 }, { "epoch": 0.8307679844458846, "grad_norm": 0.037971507757902145, "learning_rate": 0.0001760700301543724, "loss": 0.3284, "step": 10255 }, { "epoch": 0.830848995463383, "grad_norm": 0.03493460640311241, "learning_rate": 0.00017606552950177778, "loss": 0.3524, "step": 10256 }, { "epoch": 0.8309300064808814, "grad_norm": 0.026955394074320793, "learning_rate": 0.00017606102884918314, "loss": 0.2959, "step": 10257 }, { "epoch": 0.8310110174983798, "grad_norm": 0.03566323220729828, "learning_rate": 0.0001760565281965885, "loss": 0.3519, "step": 10258 }, { "epoch": 0.8310920285158782, "grad_norm": 0.035410210490226746, "learning_rate": 0.0001760520275439939, "loss": 0.3128, "step": 10259 }, { "epoch": 0.8311730395333765, "grad_norm": 0.037130143493413925, "learning_rate": 0.00017604752689139925, "loss": 0.3271, "step": 10260 }, { "epoch": 0.8312540505508749, "grad_norm": 0.0340542234480381, "learning_rate": 0.00017604302623880464, "loss": 0.3247, "step": 10261 }, { "epoch": 0.8313350615683733, "grad_norm": 0.03176279738545418, "learning_rate": 0.00017603852558621002, "loss": 0.3623, "step": 10262 }, { "epoch": 0.8314160725858717, "grad_norm": 0.034670326858758926, "learning_rate": 0.00017603402493361538, "loss": 0.3571, "step": 10263 }, { "epoch": 0.83149708360337, "grad_norm": 0.03411025553941727, "learning_rate": 0.00017602952428102074, "loss": 0.3358, "step": 10264 }, { "epoch": 0.8315780946208684, "grad_norm": 0.03430251032114029, "learning_rate": 0.00017602502362842613, "loss": 0.3704, "step": 10265 }, { "epoch": 0.8316591056383669, "grad_norm": 0.034683723002672195, "learning_rate": 0.00017602052297583152, "loss": 0.3604, "step": 10266 }, { "epoch": 0.8317401166558652, "grad_norm": 0.027798432856798172, "learning_rate": 0.00017601602232323688, "loss": 0.2718, "step": 10267 }, { "epoch": 0.8318211276733636, "grad_norm": 0.029978135600686073, "learning_rate": 0.00017601152167064226, "loss": 0.3384, "step": 10268 }, { "epoch": 0.8319021386908619, "grad_norm": 0.03305796906352043, "learning_rate": 0.00017600702101804762, "loss": 0.3121, "step": 10269 }, { "epoch": 0.8319831497083603, "grad_norm": 0.036289576441049576, "learning_rate": 0.00017600252036545298, "loss": 0.3989, "step": 10270 }, { "epoch": 0.8320641607258588, "grad_norm": 0.0353555865585804, "learning_rate": 0.00017599801971285837, "loss": 0.3777, "step": 10271 }, { "epoch": 0.8321451717433571, "grad_norm": 0.030521580949425697, "learning_rate": 0.00017599351906026376, "loss": 0.367, "step": 10272 }, { "epoch": 0.8322261827608555, "grad_norm": 0.04173492640256882, "learning_rate": 0.00017598901840766912, "loss": 0.3639, "step": 10273 }, { "epoch": 0.8323071937783538, "grad_norm": 0.02986193262040615, "learning_rate": 0.0001759845177550745, "loss": 0.3024, "step": 10274 }, { "epoch": 0.8323882047958522, "grad_norm": 0.032112687826156616, "learning_rate": 0.00017598001710247987, "loss": 0.3203, "step": 10275 }, { "epoch": 0.8324692158133506, "grad_norm": 0.03689032047986984, "learning_rate": 0.00017597551644988523, "loss": 0.3443, "step": 10276 }, { "epoch": 0.832550226830849, "grad_norm": 0.0335177518427372, "learning_rate": 0.0001759710157972906, "loss": 0.3579, "step": 10277 }, { "epoch": 0.8326312378483474, "grad_norm": 0.03220372647047043, "learning_rate": 0.000175966515144696, "loss": 0.3264, "step": 10278 }, { "epoch": 0.8327122488658457, "grad_norm": 0.03158966824412346, "learning_rate": 0.00017596201449210136, "loss": 0.3068, "step": 10279 }, { "epoch": 0.8327932598833442, "grad_norm": 0.035905398428440094, "learning_rate": 0.00017595751383950675, "loss": 0.3317, "step": 10280 }, { "epoch": 0.8328742709008425, "grad_norm": 0.04016716405749321, "learning_rate": 0.0001759530131869121, "loss": 0.372, "step": 10281 }, { "epoch": 0.8329552819183409, "grad_norm": 0.033283233642578125, "learning_rate": 0.00017594851253431747, "loss": 0.3443, "step": 10282 }, { "epoch": 0.8330362929358393, "grad_norm": 0.04536040499806404, "learning_rate": 0.00017594401188172285, "loss": 0.3507, "step": 10283 }, { "epoch": 0.8331173039533376, "grad_norm": 0.037573862820863724, "learning_rate": 0.00017593951122912824, "loss": 0.3726, "step": 10284 }, { "epoch": 0.8331983149708361, "grad_norm": 0.036852750927209854, "learning_rate": 0.0001759350105765336, "loss": 0.3693, "step": 10285 }, { "epoch": 0.8332793259883344, "grad_norm": 0.036361176520586014, "learning_rate": 0.000175930509923939, "loss": 0.3116, "step": 10286 }, { "epoch": 0.8333603370058328, "grad_norm": 0.032356224954128265, "learning_rate": 0.00017592600927134435, "loss": 0.3175, "step": 10287 }, { "epoch": 0.8334413480233311, "grad_norm": 0.03756551444530487, "learning_rate": 0.0001759215086187497, "loss": 0.3124, "step": 10288 }, { "epoch": 0.8335223590408296, "grad_norm": 0.029808249324560165, "learning_rate": 0.00017591700796615512, "loss": 0.3145, "step": 10289 }, { "epoch": 0.833603370058328, "grad_norm": 0.03625890985131264, "learning_rate": 0.00017591250731356048, "loss": 0.3583, "step": 10290 }, { "epoch": 0.8336843810758263, "grad_norm": 0.030652465298771858, "learning_rate": 0.00017590800666096584, "loss": 0.3323, "step": 10291 }, { "epoch": 0.8337653920933247, "grad_norm": 0.03474270552396774, "learning_rate": 0.00017590350600837123, "loss": 0.3724, "step": 10292 }, { "epoch": 0.833846403110823, "grad_norm": 0.03331577777862549, "learning_rate": 0.0001758990053557766, "loss": 0.3337, "step": 10293 }, { "epoch": 0.8339274141283215, "grad_norm": 0.034953150898218155, "learning_rate": 0.00017589450470318195, "loss": 0.3337, "step": 10294 }, { "epoch": 0.8340084251458199, "grad_norm": 0.0396929569542408, "learning_rate": 0.00017589000405058736, "loss": 0.378, "step": 10295 }, { "epoch": 0.8340894361633182, "grad_norm": 0.03195977956056595, "learning_rate": 0.00017588550339799272, "loss": 0.3093, "step": 10296 }, { "epoch": 0.8341704471808166, "grad_norm": 0.03176313266158104, "learning_rate": 0.00017588100274539808, "loss": 0.35, "step": 10297 }, { "epoch": 0.8342514581983149, "grad_norm": 0.03105923719704151, "learning_rate": 0.00017587650209280347, "loss": 0.3063, "step": 10298 }, { "epoch": 0.8343324692158134, "grad_norm": 0.03520163521170616, "learning_rate": 0.00017587200144020883, "loss": 0.3734, "step": 10299 }, { "epoch": 0.8344134802333117, "grad_norm": 0.0330432653427124, "learning_rate": 0.0001758675007876142, "loss": 0.3141, "step": 10300 }, { "epoch": 0.8344944912508101, "grad_norm": 0.02842598967254162, "learning_rate": 0.0001758630001350196, "loss": 0.312, "step": 10301 }, { "epoch": 0.8345755022683085, "grad_norm": 0.036401763558387756, "learning_rate": 0.00017585849948242497, "loss": 0.4048, "step": 10302 }, { "epoch": 0.8346565132858069, "grad_norm": 0.03183615207672119, "learning_rate": 0.00017585399882983033, "loss": 0.3077, "step": 10303 }, { "epoch": 0.8347375243033053, "grad_norm": 0.030855121091008186, "learning_rate": 0.00017584949817723571, "loss": 0.3145, "step": 10304 }, { "epoch": 0.8348185353208036, "grad_norm": 0.0320095457136631, "learning_rate": 0.00017584499752464107, "loss": 0.3457, "step": 10305 }, { "epoch": 0.834899546338302, "grad_norm": 0.0305408276617527, "learning_rate": 0.00017584049687204643, "loss": 0.3296, "step": 10306 }, { "epoch": 0.8349805573558003, "grad_norm": 0.03183217719197273, "learning_rate": 0.00017583599621945185, "loss": 0.3173, "step": 10307 }, { "epoch": 0.8350615683732988, "grad_norm": 0.03041781298816204, "learning_rate": 0.0001758314955668572, "loss": 0.3178, "step": 10308 }, { "epoch": 0.8351425793907972, "grad_norm": 0.035926688462495804, "learning_rate": 0.00017582699491426257, "loss": 0.3723, "step": 10309 }, { "epoch": 0.8352235904082955, "grad_norm": 0.034372445195913315, "learning_rate": 0.00017582249426166796, "loss": 0.3116, "step": 10310 }, { "epoch": 0.8353046014257939, "grad_norm": 0.03208107501268387, "learning_rate": 0.00017581799360907332, "loss": 0.2975, "step": 10311 }, { "epoch": 0.8353856124432922, "grad_norm": 0.0312693789601326, "learning_rate": 0.00017581349295647868, "loss": 0.3534, "step": 10312 }, { "epoch": 0.8354666234607907, "grad_norm": 0.02982800267636776, "learning_rate": 0.0001758089923038841, "loss": 0.2737, "step": 10313 }, { "epoch": 0.8355476344782891, "grad_norm": 0.03189459815621376, "learning_rate": 0.00017580449165128945, "loss": 0.325, "step": 10314 }, { "epoch": 0.8356286454957874, "grad_norm": 0.032958321273326874, "learning_rate": 0.0001757999909986948, "loss": 0.3049, "step": 10315 }, { "epoch": 0.8357096565132858, "grad_norm": 0.0360761396586895, "learning_rate": 0.0001757954903461002, "loss": 0.3337, "step": 10316 }, { "epoch": 0.8357906675307842, "grad_norm": 0.03744465112686157, "learning_rate": 0.00017579098969350556, "loss": 0.3731, "step": 10317 }, { "epoch": 0.8358716785482826, "grad_norm": 0.02999812364578247, "learning_rate": 0.00017578648904091094, "loss": 0.3192, "step": 10318 }, { "epoch": 0.835952689565781, "grad_norm": 0.03587619215250015, "learning_rate": 0.00017578198838831633, "loss": 0.3333, "step": 10319 }, { "epoch": 0.8360337005832793, "grad_norm": 0.03604312986135483, "learning_rate": 0.0001757774877357217, "loss": 0.3697, "step": 10320 }, { "epoch": 0.8361147116007777, "grad_norm": 0.03261552378535271, "learning_rate": 0.00017577298708312705, "loss": 0.3659, "step": 10321 }, { "epoch": 0.8361957226182761, "grad_norm": 0.03550964221358299, "learning_rate": 0.00017576848643053244, "loss": 0.3168, "step": 10322 }, { "epoch": 0.8362767336357745, "grad_norm": 0.03809160739183426, "learning_rate": 0.0001757639857779378, "loss": 0.3518, "step": 10323 }, { "epoch": 0.8363577446532728, "grad_norm": 0.03714146092534065, "learning_rate": 0.00017575948512534319, "loss": 0.3622, "step": 10324 }, { "epoch": 0.8364387556707712, "grad_norm": 0.03425988182425499, "learning_rate": 0.00017575498447274857, "loss": 0.3196, "step": 10325 }, { "epoch": 0.8365197666882696, "grad_norm": 0.03243013843894005, "learning_rate": 0.00017575048382015393, "loss": 0.2836, "step": 10326 }, { "epoch": 0.836600777705768, "grad_norm": 0.03315865993499756, "learning_rate": 0.0001757459831675593, "loss": 0.3028, "step": 10327 }, { "epoch": 0.8366817887232664, "grad_norm": 0.03174262493848801, "learning_rate": 0.00017574148251496468, "loss": 0.3402, "step": 10328 }, { "epoch": 0.8367627997407647, "grad_norm": 0.03199068829417229, "learning_rate": 0.00017573698186237004, "loss": 0.3028, "step": 10329 }, { "epoch": 0.8368438107582631, "grad_norm": 0.03462492674589157, "learning_rate": 0.00017573248120977543, "loss": 0.3441, "step": 10330 }, { "epoch": 0.8369248217757616, "grad_norm": 0.031907789409160614, "learning_rate": 0.00017572798055718081, "loss": 0.3094, "step": 10331 }, { "epoch": 0.8370058327932599, "grad_norm": 0.03425981476902962, "learning_rate": 0.00017572347990458617, "loss": 0.3765, "step": 10332 }, { "epoch": 0.8370868438107583, "grad_norm": 0.030974196270108223, "learning_rate": 0.00017571897925199153, "loss": 0.3041, "step": 10333 }, { "epoch": 0.8371678548282566, "grad_norm": 0.033227961510419846, "learning_rate": 0.00017571447859939692, "loss": 0.3493, "step": 10334 }, { "epoch": 0.837248865845755, "grad_norm": 0.03223063424229622, "learning_rate": 0.00017570997794680228, "loss": 0.3668, "step": 10335 }, { "epoch": 0.8373298768632534, "grad_norm": 0.03262501582503319, "learning_rate": 0.00017570547729420767, "loss": 0.3485, "step": 10336 }, { "epoch": 0.8374108878807518, "grad_norm": 0.03690466657280922, "learning_rate": 0.00017570097664161306, "loss": 0.3207, "step": 10337 }, { "epoch": 0.8374918988982502, "grad_norm": 0.042168475687503815, "learning_rate": 0.00017569647598901842, "loss": 0.326, "step": 10338 }, { "epoch": 0.8375729099157485, "grad_norm": 0.03320693597197533, "learning_rate": 0.00017569197533642378, "loss": 0.3514, "step": 10339 }, { "epoch": 0.837653920933247, "grad_norm": 0.03084513545036316, "learning_rate": 0.00017568747468382916, "loss": 0.3072, "step": 10340 }, { "epoch": 0.8377349319507453, "grad_norm": 0.03049416095018387, "learning_rate": 0.00017568297403123455, "loss": 0.3292, "step": 10341 }, { "epoch": 0.8378159429682437, "grad_norm": 0.03144636005163193, "learning_rate": 0.0001756784733786399, "loss": 0.3068, "step": 10342 }, { "epoch": 0.837896953985742, "grad_norm": 0.032478101551532745, "learning_rate": 0.0001756739727260453, "loss": 0.3345, "step": 10343 }, { "epoch": 0.8379779650032404, "grad_norm": 0.034912481904029846, "learning_rate": 0.00017566947207345066, "loss": 0.3603, "step": 10344 }, { "epoch": 0.8380589760207389, "grad_norm": 0.02963745966553688, "learning_rate": 0.00017566497142085602, "loss": 0.3777, "step": 10345 }, { "epoch": 0.8381399870382372, "grad_norm": 0.029963036999106407, "learning_rate": 0.0001756604707682614, "loss": 0.3091, "step": 10346 }, { "epoch": 0.8382209980557356, "grad_norm": 0.033851027488708496, "learning_rate": 0.0001756559701156668, "loss": 0.3792, "step": 10347 }, { "epoch": 0.8383020090732339, "grad_norm": 0.03498721122741699, "learning_rate": 0.00017565146946307215, "loss": 0.3297, "step": 10348 }, { "epoch": 0.8383830200907323, "grad_norm": 0.032017070800065994, "learning_rate": 0.00017564696881047754, "loss": 0.3312, "step": 10349 }, { "epoch": 0.8384640311082308, "grad_norm": 0.036637600511312485, "learning_rate": 0.0001756424681578829, "loss": 0.3858, "step": 10350 }, { "epoch": 0.8385450421257291, "grad_norm": 0.034795694053173065, "learning_rate": 0.00017563796750528826, "loss": 0.3326, "step": 10351 }, { "epoch": 0.8386260531432275, "grad_norm": 0.04814450815320015, "learning_rate": 0.00017563346685269365, "loss": 0.362, "step": 10352 }, { "epoch": 0.8387070641607258, "grad_norm": 0.034067779779434204, "learning_rate": 0.00017562896620009903, "loss": 0.3023, "step": 10353 }, { "epoch": 0.8387880751782243, "grad_norm": 0.04440617933869362, "learning_rate": 0.0001756244655475044, "loss": 0.3306, "step": 10354 }, { "epoch": 0.8388690861957226, "grad_norm": 0.033861372619867325, "learning_rate": 0.00017561996489490978, "loss": 0.3611, "step": 10355 }, { "epoch": 0.838950097213221, "grad_norm": 0.0317358635365963, "learning_rate": 0.00017561546424231514, "loss": 0.3187, "step": 10356 }, { "epoch": 0.8390311082307194, "grad_norm": 0.030743295326828957, "learning_rate": 0.0001756109635897205, "loss": 0.2925, "step": 10357 }, { "epoch": 0.8391121192482177, "grad_norm": 0.03173130378127098, "learning_rate": 0.0001756064629371259, "loss": 0.341, "step": 10358 }, { "epoch": 0.8391931302657162, "grad_norm": 0.03562208637595177, "learning_rate": 0.00017560196228453128, "loss": 0.2951, "step": 10359 }, { "epoch": 0.8392741412832145, "grad_norm": 0.0299212709069252, "learning_rate": 0.00017559746163193664, "loss": 0.37, "step": 10360 }, { "epoch": 0.8393551523007129, "grad_norm": 0.03546148166060448, "learning_rate": 0.00017559296097934202, "loss": 0.3246, "step": 10361 }, { "epoch": 0.8394361633182112, "grad_norm": 0.027700217440724373, "learning_rate": 0.00017558846032674738, "loss": 0.2963, "step": 10362 }, { "epoch": 0.8395171743357096, "grad_norm": 0.0363149456679821, "learning_rate": 0.00017558395967415274, "loss": 0.3417, "step": 10363 }, { "epoch": 0.8395981853532081, "grad_norm": 0.032922759652137756, "learning_rate": 0.00017557945902155813, "loss": 0.3704, "step": 10364 }, { "epoch": 0.8396791963707064, "grad_norm": 0.0318986251950264, "learning_rate": 0.00017557495836896352, "loss": 0.3125, "step": 10365 }, { "epoch": 0.8397602073882048, "grad_norm": 0.03699880093336105, "learning_rate": 0.00017557045771636888, "loss": 0.3382, "step": 10366 }, { "epoch": 0.8398412184057031, "grad_norm": 0.03683356195688248, "learning_rate": 0.00017556595706377426, "loss": 0.3457, "step": 10367 }, { "epoch": 0.8399222294232016, "grad_norm": 0.0369548536837101, "learning_rate": 0.00017556145641117962, "loss": 0.3256, "step": 10368 }, { "epoch": 0.8400032404407, "grad_norm": 0.03477805480360985, "learning_rate": 0.00017555695575858498, "loss": 0.3288, "step": 10369 }, { "epoch": 0.8400842514581983, "grad_norm": 0.03249835595488548, "learning_rate": 0.0001755524551059904, "loss": 0.3157, "step": 10370 }, { "epoch": 0.8401652624756967, "grad_norm": 0.035522498190402985, "learning_rate": 0.00017554795445339576, "loss": 0.3019, "step": 10371 }, { "epoch": 0.840246273493195, "grad_norm": 0.039500270038843155, "learning_rate": 0.00017554345380080112, "loss": 0.3576, "step": 10372 }, { "epoch": 0.8403272845106935, "grad_norm": 0.03279853239655495, "learning_rate": 0.0001755389531482065, "loss": 0.3433, "step": 10373 }, { "epoch": 0.8404082955281919, "grad_norm": 0.03178710490465164, "learning_rate": 0.00017553445249561187, "loss": 0.3494, "step": 10374 }, { "epoch": 0.8404893065456902, "grad_norm": 0.04652286320924759, "learning_rate": 0.00017552995184301723, "loss": 0.3443, "step": 10375 }, { "epoch": 0.8405703175631886, "grad_norm": 0.03587408363819122, "learning_rate": 0.00017552545119042264, "loss": 0.366, "step": 10376 }, { "epoch": 0.8406513285806869, "grad_norm": 0.03234461694955826, "learning_rate": 0.000175520950537828, "loss": 0.3429, "step": 10377 }, { "epoch": 0.8407323395981854, "grad_norm": 0.028930282220244408, "learning_rate": 0.00017551644988523336, "loss": 0.252, "step": 10378 }, { "epoch": 0.8408133506156837, "grad_norm": 0.03449437767267227, "learning_rate": 0.00017551194923263875, "loss": 0.3081, "step": 10379 }, { "epoch": 0.8408943616331821, "grad_norm": 0.037620704621076584, "learning_rate": 0.0001755074485800441, "loss": 0.3037, "step": 10380 }, { "epoch": 0.8409753726506805, "grad_norm": 0.035372767597436905, "learning_rate": 0.00017550294792744947, "loss": 0.3238, "step": 10381 }, { "epoch": 0.8410563836681789, "grad_norm": 0.037627965211868286, "learning_rate": 0.00017549844727485488, "loss": 0.3703, "step": 10382 }, { "epoch": 0.8411373946856773, "grad_norm": 0.037020884454250336, "learning_rate": 0.00017549394662226024, "loss": 0.3649, "step": 10383 }, { "epoch": 0.8412184057031756, "grad_norm": 0.0348907932639122, "learning_rate": 0.0001754894459696656, "loss": 0.331, "step": 10384 }, { "epoch": 0.841299416720674, "grad_norm": 0.0314050018787384, "learning_rate": 0.000175484945317071, "loss": 0.349, "step": 10385 }, { "epoch": 0.8413804277381723, "grad_norm": 0.02945929765701294, "learning_rate": 0.00017548044466447635, "loss": 0.3174, "step": 10386 }, { "epoch": 0.8414614387556708, "grad_norm": 0.03195718303322792, "learning_rate": 0.0001754759440118817, "loss": 0.3399, "step": 10387 }, { "epoch": 0.8415424497731692, "grad_norm": 0.029619552195072174, "learning_rate": 0.00017547144335928712, "loss": 0.3145, "step": 10388 }, { "epoch": 0.8416234607906675, "grad_norm": 0.033388081938028336, "learning_rate": 0.00017546694270669248, "loss": 0.371, "step": 10389 }, { "epoch": 0.8417044718081659, "grad_norm": 0.034243419766426086, "learning_rate": 0.00017546244205409784, "loss": 0.3269, "step": 10390 }, { "epoch": 0.8417854828256643, "grad_norm": 0.03303029388189316, "learning_rate": 0.00017545794140150323, "loss": 0.361, "step": 10391 }, { "epoch": 0.8418664938431627, "grad_norm": 0.03691716119647026, "learning_rate": 0.0001754534407489086, "loss": 0.3715, "step": 10392 }, { "epoch": 0.8419475048606611, "grad_norm": 0.03396623581647873, "learning_rate": 0.00017544894009631395, "loss": 0.3227, "step": 10393 }, { "epoch": 0.8420285158781594, "grad_norm": 0.03716912865638733, "learning_rate": 0.00017544443944371937, "loss": 0.3289, "step": 10394 }, { "epoch": 0.8421095268956578, "grad_norm": 0.038433950394392014, "learning_rate": 0.00017543993879112473, "loss": 0.3227, "step": 10395 }, { "epoch": 0.8421905379131562, "grad_norm": 0.03316536918282509, "learning_rate": 0.00017543543813853009, "loss": 0.3503, "step": 10396 }, { "epoch": 0.8422715489306546, "grad_norm": 0.03443404287099838, "learning_rate": 0.00017543093748593547, "loss": 0.3407, "step": 10397 }, { "epoch": 0.842352559948153, "grad_norm": 0.032896075397729874, "learning_rate": 0.00017542643683334083, "loss": 0.356, "step": 10398 }, { "epoch": 0.8424335709656513, "grad_norm": 0.03154495730996132, "learning_rate": 0.00017542193618074622, "loss": 0.2861, "step": 10399 }, { "epoch": 0.8425145819831497, "grad_norm": 0.031888049095869064, "learning_rate": 0.0001754174355281516, "loss": 0.3499, "step": 10400 }, { "epoch": 0.8425955930006481, "grad_norm": 0.035155389457941055, "learning_rate": 0.00017541293487555697, "loss": 0.3075, "step": 10401 }, { "epoch": 0.8426766040181465, "grad_norm": 0.03185368701815605, "learning_rate": 0.00017540843422296233, "loss": 0.3378, "step": 10402 }, { "epoch": 0.8427576150356448, "grad_norm": 0.03206276893615723, "learning_rate": 0.00017540393357036771, "loss": 0.2845, "step": 10403 }, { "epoch": 0.8428386260531432, "grad_norm": 0.033448535948991776, "learning_rate": 0.00017539943291777307, "loss": 0.3136, "step": 10404 }, { "epoch": 0.8429196370706417, "grad_norm": 0.03267358988523483, "learning_rate": 0.00017539493226517846, "loss": 0.2805, "step": 10405 }, { "epoch": 0.84300064808814, "grad_norm": 0.031093450263142586, "learning_rate": 0.00017539043161258385, "loss": 0.3217, "step": 10406 }, { "epoch": 0.8430816591056384, "grad_norm": 0.03250857815146446, "learning_rate": 0.0001753859309599892, "loss": 0.3299, "step": 10407 }, { "epoch": 0.8431626701231367, "grad_norm": 0.030597645789384842, "learning_rate": 0.00017538143030739457, "loss": 0.2808, "step": 10408 }, { "epoch": 0.8432436811406351, "grad_norm": 0.032161563634872437, "learning_rate": 0.00017537692965479996, "loss": 0.321, "step": 10409 }, { "epoch": 0.8433246921581335, "grad_norm": 0.03388415649533272, "learning_rate": 0.00017537242900220532, "loss": 0.3399, "step": 10410 }, { "epoch": 0.8434057031756319, "grad_norm": 0.038028065115213394, "learning_rate": 0.0001753679283496107, "loss": 0.3773, "step": 10411 }, { "epoch": 0.8434867141931303, "grad_norm": 0.030943255871534348, "learning_rate": 0.0001753634276970161, "loss": 0.326, "step": 10412 }, { "epoch": 0.8435677252106286, "grad_norm": 0.0353950560092926, "learning_rate": 0.00017535892704442145, "loss": 0.3228, "step": 10413 }, { "epoch": 0.843648736228127, "grad_norm": 0.034129682928323746, "learning_rate": 0.0001753544263918268, "loss": 0.3608, "step": 10414 }, { "epoch": 0.8437297472456254, "grad_norm": 0.031029662117362022, "learning_rate": 0.0001753499257392322, "loss": 0.3247, "step": 10415 }, { "epoch": 0.8438107582631238, "grad_norm": 0.0346672423183918, "learning_rate": 0.00017534542508663756, "loss": 0.3631, "step": 10416 }, { "epoch": 0.8438917692806222, "grad_norm": 0.03463343158364296, "learning_rate": 0.00017534092443404294, "loss": 0.3119, "step": 10417 }, { "epoch": 0.8439727802981205, "grad_norm": 0.027508914470672607, "learning_rate": 0.00017533642378144833, "loss": 0.3191, "step": 10418 }, { "epoch": 0.844053791315619, "grad_norm": 0.032727956771850586, "learning_rate": 0.0001753319231288537, "loss": 0.3611, "step": 10419 }, { "epoch": 0.8441348023331173, "grad_norm": 0.038039255887269974, "learning_rate": 0.00017532742247625905, "loss": 0.3386, "step": 10420 }, { "epoch": 0.8442158133506157, "grad_norm": 0.029720397666096687, "learning_rate": 0.00017532292182366444, "loss": 0.3256, "step": 10421 }, { "epoch": 0.844296824368114, "grad_norm": 0.033293940126895905, "learning_rate": 0.00017531842117106983, "loss": 0.357, "step": 10422 }, { "epoch": 0.8443778353856124, "grad_norm": 0.03200975060462952, "learning_rate": 0.00017531392051847519, "loss": 0.2927, "step": 10423 }, { "epoch": 0.8444588464031109, "grad_norm": 0.034486331045627594, "learning_rate": 0.00017530941986588057, "loss": 0.3625, "step": 10424 }, { "epoch": 0.8445398574206092, "grad_norm": 0.03146517276763916, "learning_rate": 0.00017530491921328593, "loss": 0.2829, "step": 10425 }, { "epoch": 0.8446208684381076, "grad_norm": 0.03595639392733574, "learning_rate": 0.0001753004185606913, "loss": 0.3401, "step": 10426 }, { "epoch": 0.8447018794556059, "grad_norm": 0.031279925256967545, "learning_rate": 0.00017529591790809668, "loss": 0.3248, "step": 10427 }, { "epoch": 0.8447828904731044, "grad_norm": 0.02898409217596054, "learning_rate": 0.00017529141725550207, "loss": 0.3023, "step": 10428 }, { "epoch": 0.8448639014906028, "grad_norm": 0.03126189485192299, "learning_rate": 0.00017528691660290743, "loss": 0.3171, "step": 10429 }, { "epoch": 0.8449449125081011, "grad_norm": 0.03708004206418991, "learning_rate": 0.00017528241595031281, "loss": 0.3685, "step": 10430 }, { "epoch": 0.8450259235255995, "grad_norm": 0.03363499045372009, "learning_rate": 0.00017527791529771817, "loss": 0.3127, "step": 10431 }, { "epoch": 0.8451069345430978, "grad_norm": 0.031809255480766296, "learning_rate": 0.00017527341464512353, "loss": 0.2751, "step": 10432 }, { "epoch": 0.8451879455605963, "grad_norm": 0.03433963283896446, "learning_rate": 0.00017526891399252892, "loss": 0.3465, "step": 10433 }, { "epoch": 0.8452689565780946, "grad_norm": 0.030995117500424385, "learning_rate": 0.0001752644133399343, "loss": 0.3213, "step": 10434 }, { "epoch": 0.845349967595593, "grad_norm": 0.035280533134937286, "learning_rate": 0.00017525991268733967, "loss": 0.3344, "step": 10435 }, { "epoch": 0.8454309786130914, "grad_norm": 0.030559923499822617, "learning_rate": 0.00017525541203474506, "loss": 0.3134, "step": 10436 }, { "epoch": 0.8455119896305897, "grad_norm": 0.04106120392680168, "learning_rate": 0.00017525091138215042, "loss": 0.3434, "step": 10437 }, { "epoch": 0.8455930006480882, "grad_norm": 0.0386023111641407, "learning_rate": 0.00017524641072955578, "loss": 0.3193, "step": 10438 }, { "epoch": 0.8456740116655865, "grad_norm": 0.03373211249709129, "learning_rate": 0.00017524191007696116, "loss": 0.3165, "step": 10439 }, { "epoch": 0.8457550226830849, "grad_norm": 0.03274044767022133, "learning_rate": 0.00017523740942436655, "loss": 0.3494, "step": 10440 }, { "epoch": 0.8458360337005832, "grad_norm": 0.036142315715551376, "learning_rate": 0.0001752329087717719, "loss": 0.324, "step": 10441 }, { "epoch": 0.8459170447180817, "grad_norm": 0.03499079868197441, "learning_rate": 0.0001752284081191773, "loss": 0.3474, "step": 10442 }, { "epoch": 0.8459980557355801, "grad_norm": 0.03600572049617767, "learning_rate": 0.00017522390746658266, "loss": 0.3471, "step": 10443 }, { "epoch": 0.8460790667530784, "grad_norm": 0.031942158937454224, "learning_rate": 0.00017521940681398802, "loss": 0.3209, "step": 10444 }, { "epoch": 0.8461600777705768, "grad_norm": 0.03473243489861488, "learning_rate": 0.0001752149061613934, "loss": 0.306, "step": 10445 }, { "epoch": 0.8462410887880751, "grad_norm": 0.033768992871046066, "learning_rate": 0.0001752104055087988, "loss": 0.354, "step": 10446 }, { "epoch": 0.8463220998055736, "grad_norm": 0.03379128873348236, "learning_rate": 0.00017520590485620415, "loss": 0.3339, "step": 10447 }, { "epoch": 0.846403110823072, "grad_norm": 0.035574495792388916, "learning_rate": 0.00017520140420360954, "loss": 0.3418, "step": 10448 }, { "epoch": 0.8464841218405703, "grad_norm": 0.03196391090750694, "learning_rate": 0.0001751969035510149, "loss": 0.3601, "step": 10449 }, { "epoch": 0.8465651328580687, "grad_norm": 0.03568727895617485, "learning_rate": 0.00017519240289842026, "loss": 0.3679, "step": 10450 }, { "epoch": 0.846646143875567, "grad_norm": 0.034467652440071106, "learning_rate": 0.00017518790224582567, "loss": 0.3713, "step": 10451 }, { "epoch": 0.8467271548930655, "grad_norm": 0.029677096754312515, "learning_rate": 0.00017518340159323103, "loss": 0.3436, "step": 10452 }, { "epoch": 0.8468081659105638, "grad_norm": 0.03185281902551651, "learning_rate": 0.0001751789009406364, "loss": 0.3117, "step": 10453 }, { "epoch": 0.8468891769280622, "grad_norm": 0.0332181490957737, "learning_rate": 0.00017517440028804178, "loss": 0.3082, "step": 10454 }, { "epoch": 0.8469701879455606, "grad_norm": 0.034928590059280396, "learning_rate": 0.00017516989963544714, "loss": 0.284, "step": 10455 }, { "epoch": 0.847051198963059, "grad_norm": 0.036926280707120895, "learning_rate": 0.0001751653989828525, "loss": 0.3399, "step": 10456 }, { "epoch": 0.8471322099805574, "grad_norm": 0.033530738204717636, "learning_rate": 0.00017516089833025792, "loss": 0.3319, "step": 10457 }, { "epoch": 0.8472132209980557, "grad_norm": 0.03474058955907822, "learning_rate": 0.00017515639767766328, "loss": 0.3459, "step": 10458 }, { "epoch": 0.8472942320155541, "grad_norm": 0.03582526743412018, "learning_rate": 0.00017515189702506864, "loss": 0.3388, "step": 10459 }, { "epoch": 0.8473752430330524, "grad_norm": 0.030622687190771103, "learning_rate": 0.00017514739637247402, "loss": 0.2943, "step": 10460 }, { "epoch": 0.8474562540505509, "grad_norm": 0.03136473521590233, "learning_rate": 0.00017514289571987938, "loss": 0.3152, "step": 10461 }, { "epoch": 0.8475372650680493, "grad_norm": 0.03528090938925743, "learning_rate": 0.00017513839506728474, "loss": 0.3494, "step": 10462 }, { "epoch": 0.8476182760855476, "grad_norm": 0.03588859736919403, "learning_rate": 0.00017513389441469016, "loss": 0.3291, "step": 10463 }, { "epoch": 0.847699287103046, "grad_norm": 0.031587012112140656, "learning_rate": 0.00017512939376209552, "loss": 0.3038, "step": 10464 }, { "epoch": 0.8477802981205443, "grad_norm": 0.03372107073664665, "learning_rate": 0.00017512489310950088, "loss": 0.3606, "step": 10465 }, { "epoch": 0.8478613091380428, "grad_norm": 0.03304535523056984, "learning_rate": 0.00017512039245690626, "loss": 0.3513, "step": 10466 }, { "epoch": 0.8479423201555412, "grad_norm": 0.03352010250091553, "learning_rate": 0.00017511589180431162, "loss": 0.3307, "step": 10467 }, { "epoch": 0.8480233311730395, "grad_norm": 0.03563792631030083, "learning_rate": 0.00017511139115171698, "loss": 0.2981, "step": 10468 }, { "epoch": 0.8481043421905379, "grad_norm": 0.031774427741765976, "learning_rate": 0.0001751068904991224, "loss": 0.3627, "step": 10469 }, { "epoch": 0.8481853532080363, "grad_norm": 0.030490538105368614, "learning_rate": 0.00017510238984652776, "loss": 0.3148, "step": 10470 }, { "epoch": 0.8482663642255347, "grad_norm": 0.03514517843723297, "learning_rate": 0.00017509788919393312, "loss": 0.3667, "step": 10471 }, { "epoch": 0.848347375243033, "grad_norm": 0.035855576395988464, "learning_rate": 0.0001750933885413385, "loss": 0.3683, "step": 10472 }, { "epoch": 0.8484283862605314, "grad_norm": 0.03173260763287544, "learning_rate": 0.00017508888788874387, "loss": 0.3434, "step": 10473 }, { "epoch": 0.8485093972780298, "grad_norm": 0.029711991548538208, "learning_rate": 0.00017508438723614925, "loss": 0.3222, "step": 10474 }, { "epoch": 0.8485904082955282, "grad_norm": 0.038392074406147, "learning_rate": 0.00017507988658355464, "loss": 0.329, "step": 10475 }, { "epoch": 0.8486714193130266, "grad_norm": 0.03413744643330574, "learning_rate": 0.00017507538593096, "loss": 0.2911, "step": 10476 }, { "epoch": 0.8487524303305249, "grad_norm": 0.03350900858640671, "learning_rate": 0.00017507088527836536, "loss": 0.3408, "step": 10477 }, { "epoch": 0.8488334413480233, "grad_norm": 0.03474394232034683, "learning_rate": 0.00017506638462577075, "loss": 0.3367, "step": 10478 }, { "epoch": 0.8489144523655218, "grad_norm": 0.032444316893815994, "learning_rate": 0.0001750618839731761, "loss": 0.3016, "step": 10479 }, { "epoch": 0.8489954633830201, "grad_norm": 0.035130154341459274, "learning_rate": 0.0001750573833205815, "loss": 0.3083, "step": 10480 }, { "epoch": 0.8490764744005185, "grad_norm": 0.03369656205177307, "learning_rate": 0.00017505288266798688, "loss": 0.3756, "step": 10481 }, { "epoch": 0.8491574854180168, "grad_norm": 0.03448232635855675, "learning_rate": 0.00017504838201539224, "loss": 0.3186, "step": 10482 }, { "epoch": 0.8492384964355152, "grad_norm": 0.031114330515265465, "learning_rate": 0.0001750438813627976, "loss": 0.3067, "step": 10483 }, { "epoch": 0.8493195074530137, "grad_norm": 0.039805684238672256, "learning_rate": 0.000175039380710203, "loss": 0.3406, "step": 10484 }, { "epoch": 0.849400518470512, "grad_norm": 0.03329307585954666, "learning_rate": 0.00017503488005760835, "loss": 0.3063, "step": 10485 }, { "epoch": 0.8494815294880104, "grad_norm": 0.03293229639530182, "learning_rate": 0.00017503037940501374, "loss": 0.302, "step": 10486 }, { "epoch": 0.8495625405055087, "grad_norm": 0.03178049623966217, "learning_rate": 0.00017502587875241912, "loss": 0.3079, "step": 10487 }, { "epoch": 0.8496435515230071, "grad_norm": 0.04478497803211212, "learning_rate": 0.00017502137809982448, "loss": 0.3592, "step": 10488 }, { "epoch": 0.8497245625405055, "grad_norm": 0.03788404539227486, "learning_rate": 0.00017501687744722984, "loss": 0.3441, "step": 10489 }, { "epoch": 0.8498055735580039, "grad_norm": 0.031607985496520996, "learning_rate": 0.00017501237679463523, "loss": 0.3114, "step": 10490 }, { "epoch": 0.8498865845755023, "grad_norm": 0.03063138946890831, "learning_rate": 0.0001750078761420406, "loss": 0.2909, "step": 10491 }, { "epoch": 0.8499675955930006, "grad_norm": 0.030662069097161293, "learning_rate": 0.00017500337548944598, "loss": 0.3031, "step": 10492 }, { "epoch": 0.8500486066104991, "grad_norm": 0.031277742236852646, "learning_rate": 0.00017499887483685137, "loss": 0.3337, "step": 10493 }, { "epoch": 0.8501296176279974, "grad_norm": 0.029651375487446785, "learning_rate": 0.00017499437418425673, "loss": 0.3225, "step": 10494 }, { "epoch": 0.8502106286454958, "grad_norm": 0.03209708631038666, "learning_rate": 0.00017498987353166209, "loss": 0.2684, "step": 10495 }, { "epoch": 0.8502916396629941, "grad_norm": 0.03412383422255516, "learning_rate": 0.00017498537287906747, "loss": 0.3399, "step": 10496 }, { "epoch": 0.8503726506804925, "grad_norm": 0.037482064217329025, "learning_rate": 0.00017498087222647283, "loss": 0.3399, "step": 10497 }, { "epoch": 0.850453661697991, "grad_norm": 0.030939724296331406, "learning_rate": 0.00017497637157387822, "loss": 0.3098, "step": 10498 }, { "epoch": 0.8505346727154893, "grad_norm": 0.034733328968286514, "learning_rate": 0.0001749718709212836, "loss": 0.3347, "step": 10499 }, { "epoch": 0.8506156837329877, "grad_norm": 0.027981078252196312, "learning_rate": 0.00017496737026868897, "loss": 0.2766, "step": 10500 }, { "epoch": 0.850696694750486, "grad_norm": 0.03607220947742462, "learning_rate": 0.00017496286961609433, "loss": 0.3655, "step": 10501 }, { "epoch": 0.8507777057679844, "grad_norm": 0.030028050765395164, "learning_rate": 0.00017495836896349971, "loss": 0.3004, "step": 10502 }, { "epoch": 0.8508587167854829, "grad_norm": 0.03431127220392227, "learning_rate": 0.0001749538683109051, "loss": 0.2987, "step": 10503 }, { "epoch": 0.8509397278029812, "grad_norm": 0.035340793430805206, "learning_rate": 0.00017494936765831046, "loss": 0.3555, "step": 10504 }, { "epoch": 0.8510207388204796, "grad_norm": 0.034876782447099686, "learning_rate": 0.00017494486700571585, "loss": 0.3196, "step": 10505 }, { "epoch": 0.8511017498379779, "grad_norm": 0.03613532707095146, "learning_rate": 0.0001749403663531212, "loss": 0.2934, "step": 10506 }, { "epoch": 0.8511827608554764, "grad_norm": 0.03943546861410141, "learning_rate": 0.00017493586570052657, "loss": 0.3606, "step": 10507 }, { "epoch": 0.8512637718729748, "grad_norm": 0.028644273057579994, "learning_rate": 0.00017493136504793196, "loss": 0.2971, "step": 10508 }, { "epoch": 0.8513447828904731, "grad_norm": 0.03636188432574272, "learning_rate": 0.00017492686439533734, "loss": 0.3565, "step": 10509 }, { "epoch": 0.8514257939079715, "grad_norm": 0.03777918219566345, "learning_rate": 0.0001749223637427427, "loss": 0.3487, "step": 10510 }, { "epoch": 0.8515068049254698, "grad_norm": 0.03311832621693611, "learning_rate": 0.0001749178630901481, "loss": 0.3344, "step": 10511 }, { "epoch": 0.8515878159429683, "grad_norm": 0.037984661757946014, "learning_rate": 0.00017491336243755345, "loss": 0.3333, "step": 10512 }, { "epoch": 0.8516688269604666, "grad_norm": 0.034046683460474014, "learning_rate": 0.0001749088617849588, "loss": 0.3901, "step": 10513 }, { "epoch": 0.851749837977965, "grad_norm": 0.03571656718850136, "learning_rate": 0.0001749043611323642, "loss": 0.335, "step": 10514 }, { "epoch": 0.8518308489954634, "grad_norm": 0.030205728486180305, "learning_rate": 0.00017489986047976958, "loss": 0.3063, "step": 10515 }, { "epoch": 0.8519118600129617, "grad_norm": 0.029302062466740608, "learning_rate": 0.00017489535982717494, "loss": 0.2921, "step": 10516 }, { "epoch": 0.8519928710304602, "grad_norm": 0.035452794283628464, "learning_rate": 0.00017489085917458033, "loss": 0.3215, "step": 10517 }, { "epoch": 0.8520738820479585, "grad_norm": 0.02976333722472191, "learning_rate": 0.0001748863585219857, "loss": 0.2924, "step": 10518 }, { "epoch": 0.8521548930654569, "grad_norm": 0.03485938161611557, "learning_rate": 0.00017488185786939105, "loss": 0.3418, "step": 10519 }, { "epoch": 0.8522359040829552, "grad_norm": 0.03175254538655281, "learning_rate": 0.00017487735721679644, "loss": 0.3056, "step": 10520 }, { "epoch": 0.8523169151004537, "grad_norm": 0.029901329427957535, "learning_rate": 0.00017487285656420183, "loss": 0.2967, "step": 10521 }, { "epoch": 0.8523979261179521, "grad_norm": 0.029504723846912384, "learning_rate": 0.00017486835591160719, "loss": 0.2951, "step": 10522 }, { "epoch": 0.8524789371354504, "grad_norm": 0.03407463803887367, "learning_rate": 0.00017486385525901257, "loss": 0.3007, "step": 10523 }, { "epoch": 0.8525599481529488, "grad_norm": 0.029503922909498215, "learning_rate": 0.00017485935460641793, "loss": 0.2701, "step": 10524 }, { "epoch": 0.8526409591704471, "grad_norm": 0.03525906056165695, "learning_rate": 0.0001748548539538233, "loss": 0.3396, "step": 10525 }, { "epoch": 0.8527219701879456, "grad_norm": 0.035471223294734955, "learning_rate": 0.0001748503533012287, "loss": 0.3647, "step": 10526 }, { "epoch": 0.852802981205444, "grad_norm": 0.031278256326913834, "learning_rate": 0.00017484585264863407, "loss": 0.2727, "step": 10527 }, { "epoch": 0.8528839922229423, "grad_norm": 0.0368519127368927, "learning_rate": 0.00017484135199603943, "loss": 0.2909, "step": 10528 }, { "epoch": 0.8529650032404407, "grad_norm": 0.034553319215774536, "learning_rate": 0.00017483685134344482, "loss": 0.3483, "step": 10529 }, { "epoch": 0.8530460142579391, "grad_norm": 0.03882437199354172, "learning_rate": 0.00017483235069085018, "loss": 0.3794, "step": 10530 }, { "epoch": 0.8531270252754375, "grad_norm": 0.029881006106734276, "learning_rate": 0.00017482785003825554, "loss": 0.3027, "step": 10531 }, { "epoch": 0.8532080362929358, "grad_norm": 0.03130098059773445, "learning_rate": 0.00017482334938566095, "loss": 0.3028, "step": 10532 }, { "epoch": 0.8532890473104342, "grad_norm": 0.032947756350040436, "learning_rate": 0.0001748188487330663, "loss": 0.328, "step": 10533 }, { "epoch": 0.8533700583279326, "grad_norm": 0.03265698254108429, "learning_rate": 0.00017481434808047167, "loss": 0.289, "step": 10534 }, { "epoch": 0.853451069345431, "grad_norm": 0.0320311076939106, "learning_rate": 0.00017480984742787706, "loss": 0.3247, "step": 10535 }, { "epoch": 0.8535320803629294, "grad_norm": 0.03273462876677513, "learning_rate": 0.00017480534677528242, "loss": 0.303, "step": 10536 }, { "epoch": 0.8536130913804277, "grad_norm": 0.03394000604748726, "learning_rate": 0.00017480084612268778, "loss": 0.3492, "step": 10537 }, { "epoch": 0.8536941023979261, "grad_norm": 0.03348904103040695, "learning_rate": 0.0001747963454700932, "loss": 0.3686, "step": 10538 }, { "epoch": 0.8537751134154244, "grad_norm": 0.03144761547446251, "learning_rate": 0.00017479184481749855, "loss": 0.2992, "step": 10539 }, { "epoch": 0.8538561244329229, "grad_norm": 0.03692207485437393, "learning_rate": 0.0001747873441649039, "loss": 0.3173, "step": 10540 }, { "epoch": 0.8539371354504213, "grad_norm": 0.032168686389923096, "learning_rate": 0.0001747828435123093, "loss": 0.2916, "step": 10541 }, { "epoch": 0.8540181464679196, "grad_norm": 0.03592012822628021, "learning_rate": 0.00017477834285971466, "loss": 0.3574, "step": 10542 }, { "epoch": 0.854099157485418, "grad_norm": 0.03215634450316429, "learning_rate": 0.00017477384220712002, "loss": 0.3042, "step": 10543 }, { "epoch": 0.8541801685029164, "grad_norm": 0.030979136005043983, "learning_rate": 0.00017476934155452543, "loss": 0.312, "step": 10544 }, { "epoch": 0.8542611795204148, "grad_norm": 0.03511403873562813, "learning_rate": 0.0001747648409019308, "loss": 0.3502, "step": 10545 }, { "epoch": 0.8543421905379132, "grad_norm": 0.03832389414310455, "learning_rate": 0.00017476034024933615, "loss": 0.3195, "step": 10546 }, { "epoch": 0.8544232015554115, "grad_norm": 0.036246124655008316, "learning_rate": 0.00017475583959674154, "loss": 0.3676, "step": 10547 }, { "epoch": 0.8545042125729099, "grad_norm": 0.0348796620965004, "learning_rate": 0.0001747513389441469, "loss": 0.3575, "step": 10548 }, { "epoch": 0.8545852235904083, "grad_norm": 0.031146438792347908, "learning_rate": 0.00017474683829155226, "loss": 0.3296, "step": 10549 }, { "epoch": 0.8546662346079067, "grad_norm": 0.03914908692240715, "learning_rate": 0.00017474233763895767, "loss": 0.3367, "step": 10550 }, { "epoch": 0.854747245625405, "grad_norm": 0.045957714319229126, "learning_rate": 0.00017473783698636303, "loss": 0.4161, "step": 10551 }, { "epoch": 0.8548282566429034, "grad_norm": 0.03508540242910385, "learning_rate": 0.0001747333363337684, "loss": 0.3005, "step": 10552 }, { "epoch": 0.8549092676604018, "grad_norm": 0.03551001101732254, "learning_rate": 0.00017472883568117378, "loss": 0.3527, "step": 10553 }, { "epoch": 0.8549902786779002, "grad_norm": 0.03131183981895447, "learning_rate": 0.00017472433502857914, "loss": 0.3136, "step": 10554 }, { "epoch": 0.8550712896953986, "grad_norm": 0.02973882667720318, "learning_rate": 0.00017471983437598453, "loss": 0.2885, "step": 10555 }, { "epoch": 0.8551523007128969, "grad_norm": 0.03016551584005356, "learning_rate": 0.00017471533372338992, "loss": 0.3037, "step": 10556 }, { "epoch": 0.8552333117303953, "grad_norm": 0.033437248319387436, "learning_rate": 0.00017471083307079528, "loss": 0.3456, "step": 10557 }, { "epoch": 0.8553143227478938, "grad_norm": 0.03692424297332764, "learning_rate": 0.00017470633241820064, "loss": 0.3644, "step": 10558 }, { "epoch": 0.8553953337653921, "grad_norm": 0.03292136266827583, "learning_rate": 0.00017470183176560602, "loss": 0.3453, "step": 10559 }, { "epoch": 0.8554763447828905, "grad_norm": 0.03032582625746727, "learning_rate": 0.00017469733111301138, "loss": 0.3173, "step": 10560 }, { "epoch": 0.8555573558003888, "grad_norm": 0.03443504869937897, "learning_rate": 0.00017469283046041677, "loss": 0.349, "step": 10561 }, { "epoch": 0.8556383668178872, "grad_norm": 0.03508608415722847, "learning_rate": 0.00017468832980782216, "loss": 0.3351, "step": 10562 }, { "epoch": 0.8557193778353857, "grad_norm": 0.03486845642328262, "learning_rate": 0.00017468382915522752, "loss": 0.3078, "step": 10563 }, { "epoch": 0.855800388852884, "grad_norm": 0.036465246230363846, "learning_rate": 0.00017467932850263288, "loss": 0.4078, "step": 10564 }, { "epoch": 0.8558813998703824, "grad_norm": 0.03374059870839119, "learning_rate": 0.00017467482785003826, "loss": 0.3448, "step": 10565 }, { "epoch": 0.8559624108878807, "grad_norm": 0.03241916373372078, "learning_rate": 0.00017467032719744362, "loss": 0.2998, "step": 10566 }, { "epoch": 0.8560434219053791, "grad_norm": 0.029919980093836784, "learning_rate": 0.000174665826544849, "loss": 0.3075, "step": 10567 }, { "epoch": 0.8561244329228775, "grad_norm": 0.037165362387895584, "learning_rate": 0.0001746613258922544, "loss": 0.437, "step": 10568 }, { "epoch": 0.8562054439403759, "grad_norm": 0.03804047033190727, "learning_rate": 0.00017465682523965976, "loss": 0.3562, "step": 10569 }, { "epoch": 0.8562864549578743, "grad_norm": 0.029782762750983238, "learning_rate": 0.00017465232458706512, "loss": 0.3238, "step": 10570 }, { "epoch": 0.8563674659753726, "grad_norm": 0.03304661065340042, "learning_rate": 0.0001746478239344705, "loss": 0.3193, "step": 10571 }, { "epoch": 0.8564484769928711, "grad_norm": 0.03296075761318207, "learning_rate": 0.00017464332328187587, "loss": 0.3256, "step": 10572 }, { "epoch": 0.8565294880103694, "grad_norm": 0.0316382572054863, "learning_rate": 0.00017463882262928125, "loss": 0.3024, "step": 10573 }, { "epoch": 0.8566104990278678, "grad_norm": 0.032189108431339264, "learning_rate": 0.00017463432197668664, "loss": 0.3169, "step": 10574 }, { "epoch": 0.8566915100453661, "grad_norm": 0.03579109162092209, "learning_rate": 0.000174629821324092, "loss": 0.3472, "step": 10575 }, { "epoch": 0.8567725210628645, "grad_norm": 0.037844136357307434, "learning_rate": 0.00017462532067149736, "loss": 0.305, "step": 10576 }, { "epoch": 0.856853532080363, "grad_norm": 0.030680108815431595, "learning_rate": 0.00017462082001890275, "loss": 0.3346, "step": 10577 }, { "epoch": 0.8569345430978613, "grad_norm": 0.030500588938593864, "learning_rate": 0.0001746163193663081, "loss": 0.3188, "step": 10578 }, { "epoch": 0.8570155541153597, "grad_norm": 0.031004825606942177, "learning_rate": 0.0001746118187137135, "loss": 0.3341, "step": 10579 }, { "epoch": 0.857096565132858, "grad_norm": 0.038353081792593, "learning_rate": 0.00017460731806111888, "loss": 0.3641, "step": 10580 }, { "epoch": 0.8571775761503565, "grad_norm": 0.03716079890727997, "learning_rate": 0.00017460281740852424, "loss": 0.3545, "step": 10581 }, { "epoch": 0.8572585871678549, "grad_norm": 0.038967959582805634, "learning_rate": 0.0001745983167559296, "loss": 0.3494, "step": 10582 }, { "epoch": 0.8573395981853532, "grad_norm": 0.032367292791604996, "learning_rate": 0.000174593816103335, "loss": 0.3105, "step": 10583 }, { "epoch": 0.8574206092028516, "grad_norm": 0.03238913044333458, "learning_rate": 0.00017458931545074038, "loss": 0.3067, "step": 10584 }, { "epoch": 0.8575016202203499, "grad_norm": 0.03460938110947609, "learning_rate": 0.00017458481479814574, "loss": 0.3403, "step": 10585 }, { "epoch": 0.8575826312378484, "grad_norm": 0.031412165611982346, "learning_rate": 0.00017458031414555112, "loss": 0.3467, "step": 10586 }, { "epoch": 0.8576636422553467, "grad_norm": 0.03444996848702431, "learning_rate": 0.00017457581349295648, "loss": 0.3632, "step": 10587 }, { "epoch": 0.8577446532728451, "grad_norm": 0.035433437675237656, "learning_rate": 0.00017457131284036184, "loss": 0.3549, "step": 10588 }, { "epoch": 0.8578256642903435, "grad_norm": 0.03140580281615257, "learning_rate": 0.00017456681218776723, "loss": 0.3354, "step": 10589 }, { "epoch": 0.8579066753078418, "grad_norm": 0.03797769546508789, "learning_rate": 0.00017456231153517262, "loss": 0.3699, "step": 10590 }, { "epoch": 0.8579876863253403, "grad_norm": 0.03754464536905289, "learning_rate": 0.00017455781088257798, "loss": 0.3747, "step": 10591 }, { "epoch": 0.8580686973428386, "grad_norm": 0.04102705046534538, "learning_rate": 0.00017455331022998337, "loss": 0.3145, "step": 10592 }, { "epoch": 0.858149708360337, "grad_norm": 0.02819933369755745, "learning_rate": 0.00017454880957738873, "loss": 0.3025, "step": 10593 }, { "epoch": 0.8582307193778353, "grad_norm": 0.03205656632781029, "learning_rate": 0.00017454430892479409, "loss": 0.3274, "step": 10594 }, { "epoch": 0.8583117303953338, "grad_norm": 0.03971799835562706, "learning_rate": 0.00017453980827219947, "loss": 0.3275, "step": 10595 }, { "epoch": 0.8583927414128322, "grad_norm": 0.03544100001454353, "learning_rate": 0.00017453530761960486, "loss": 0.3637, "step": 10596 }, { "epoch": 0.8584737524303305, "grad_norm": 0.03206910938024521, "learning_rate": 0.00017453080696701022, "loss": 0.3333, "step": 10597 }, { "epoch": 0.8585547634478289, "grad_norm": 0.03851524367928505, "learning_rate": 0.0001745263063144156, "loss": 0.3343, "step": 10598 }, { "epoch": 0.8586357744653272, "grad_norm": 0.03511196747422218, "learning_rate": 0.00017452180566182097, "loss": 0.3406, "step": 10599 }, { "epoch": 0.8587167854828257, "grad_norm": 0.033691514283418655, "learning_rate": 0.00017451730500922633, "loss": 0.3267, "step": 10600 }, { "epoch": 0.8587977965003241, "grad_norm": 0.03204326704144478, "learning_rate": 0.00017451280435663171, "loss": 0.3155, "step": 10601 }, { "epoch": 0.8588788075178224, "grad_norm": 0.033211641013622284, "learning_rate": 0.0001745083037040371, "loss": 0.3562, "step": 10602 }, { "epoch": 0.8589598185353208, "grad_norm": 0.03935149312019348, "learning_rate": 0.00017450380305144246, "loss": 0.3606, "step": 10603 }, { "epoch": 0.8590408295528191, "grad_norm": 0.034088894724845886, "learning_rate": 0.00017449930239884785, "loss": 0.3354, "step": 10604 }, { "epoch": 0.8591218405703176, "grad_norm": 0.03058660961687565, "learning_rate": 0.0001744948017462532, "loss": 0.295, "step": 10605 }, { "epoch": 0.859202851587816, "grad_norm": 0.03271281719207764, "learning_rate": 0.00017449030109365857, "loss": 0.2943, "step": 10606 }, { "epoch": 0.8592838626053143, "grad_norm": 0.036279551684856415, "learning_rate": 0.00017448580044106398, "loss": 0.3883, "step": 10607 }, { "epoch": 0.8593648736228127, "grad_norm": 0.03347590193152428, "learning_rate": 0.00017448129978846934, "loss": 0.3241, "step": 10608 }, { "epoch": 0.8594458846403111, "grad_norm": 0.03564797714352608, "learning_rate": 0.0001744767991358747, "loss": 0.3127, "step": 10609 }, { "epoch": 0.8595268956578095, "grad_norm": 0.03396541625261307, "learning_rate": 0.0001744722984832801, "loss": 0.3016, "step": 10610 }, { "epoch": 0.8596079066753078, "grad_norm": 0.028714140877127647, "learning_rate": 0.00017446779783068545, "loss": 0.2933, "step": 10611 }, { "epoch": 0.8596889176928062, "grad_norm": 0.030577784404158592, "learning_rate": 0.0001744632971780908, "loss": 0.3014, "step": 10612 }, { "epoch": 0.8597699287103046, "grad_norm": 0.03310084715485573, "learning_rate": 0.00017445879652549622, "loss": 0.316, "step": 10613 }, { "epoch": 0.859850939727803, "grad_norm": 0.03821340203285217, "learning_rate": 0.00017445429587290158, "loss": 0.3858, "step": 10614 }, { "epoch": 0.8599319507453014, "grad_norm": 0.03301851078867912, "learning_rate": 0.00017444979522030694, "loss": 0.3136, "step": 10615 }, { "epoch": 0.8600129617627997, "grad_norm": 0.03597554191946983, "learning_rate": 0.00017444529456771233, "loss": 0.3253, "step": 10616 }, { "epoch": 0.8600939727802981, "grad_norm": 0.03361279517412186, "learning_rate": 0.0001744407939151177, "loss": 0.3394, "step": 10617 }, { "epoch": 0.8601749837977966, "grad_norm": 0.03018251433968544, "learning_rate": 0.00017443629326252305, "loss": 0.3303, "step": 10618 }, { "epoch": 0.8602559948152949, "grad_norm": 0.03364105150103569, "learning_rate": 0.00017443179260992847, "loss": 0.3443, "step": 10619 }, { "epoch": 0.8603370058327933, "grad_norm": 0.03645896166563034, "learning_rate": 0.00017442729195733383, "loss": 0.3852, "step": 10620 }, { "epoch": 0.8604180168502916, "grad_norm": 0.03501100465655327, "learning_rate": 0.0001744227913047392, "loss": 0.3107, "step": 10621 }, { "epoch": 0.86049902786779, "grad_norm": 0.034452538937330246, "learning_rate": 0.00017441829065214457, "loss": 0.3213, "step": 10622 }, { "epoch": 0.8605800388852884, "grad_norm": 0.030402205884456635, "learning_rate": 0.00017441378999954993, "loss": 0.2975, "step": 10623 }, { "epoch": 0.8606610499027868, "grad_norm": 0.03308364003896713, "learning_rate": 0.0001744092893469553, "loss": 0.3142, "step": 10624 }, { "epoch": 0.8607420609202852, "grad_norm": 0.0385962538421154, "learning_rate": 0.0001744047886943607, "loss": 0.3829, "step": 10625 }, { "epoch": 0.8608230719377835, "grad_norm": 0.034132879227399826, "learning_rate": 0.00017440028804176607, "loss": 0.346, "step": 10626 }, { "epoch": 0.8609040829552819, "grad_norm": 0.039486926048994064, "learning_rate": 0.00017439578738917143, "loss": 0.3325, "step": 10627 }, { "epoch": 0.8609850939727803, "grad_norm": 0.03526204451918602, "learning_rate": 0.00017439128673657682, "loss": 0.3385, "step": 10628 }, { "epoch": 0.8610661049902787, "grad_norm": 0.03386912867426872, "learning_rate": 0.00017438678608398218, "loss": 0.3401, "step": 10629 }, { "epoch": 0.861147116007777, "grad_norm": 0.03569406270980835, "learning_rate": 0.00017438228543138754, "loss": 0.3134, "step": 10630 }, { "epoch": 0.8612281270252754, "grad_norm": 0.031888384371995926, "learning_rate": 0.00017437778477879295, "loss": 0.3116, "step": 10631 }, { "epoch": 0.8613091380427739, "grad_norm": 0.04027692601084709, "learning_rate": 0.0001743732841261983, "loss": 0.3985, "step": 10632 }, { "epoch": 0.8613901490602722, "grad_norm": 0.03750056028366089, "learning_rate": 0.00017436878347360367, "loss": 0.3761, "step": 10633 }, { "epoch": 0.8614711600777706, "grad_norm": 0.035463955253362656, "learning_rate": 0.00017436428282100906, "loss": 0.355, "step": 10634 }, { "epoch": 0.8615521710952689, "grad_norm": 0.03540815785527229, "learning_rate": 0.00017435978216841442, "loss": 0.3559, "step": 10635 }, { "epoch": 0.8616331821127673, "grad_norm": 0.032286386936903, "learning_rate": 0.0001743552815158198, "loss": 0.3134, "step": 10636 }, { "epoch": 0.8617141931302658, "grad_norm": 0.039775971323251724, "learning_rate": 0.0001743507808632252, "loss": 0.379, "step": 10637 }, { "epoch": 0.8617952041477641, "grad_norm": 0.03340979665517807, "learning_rate": 0.00017434628021063055, "loss": 0.3042, "step": 10638 }, { "epoch": 0.8618762151652625, "grad_norm": 0.03356197848916054, "learning_rate": 0.0001743417795580359, "loss": 0.3474, "step": 10639 }, { "epoch": 0.8619572261827608, "grad_norm": 0.030694514513015747, "learning_rate": 0.0001743372789054413, "loss": 0.2946, "step": 10640 }, { "epoch": 0.8620382372002592, "grad_norm": 0.034636642783880234, "learning_rate": 0.00017433277825284666, "loss": 0.3392, "step": 10641 }, { "epoch": 0.8621192482177576, "grad_norm": 0.03530239313840866, "learning_rate": 0.00017432827760025205, "loss": 0.3405, "step": 10642 }, { "epoch": 0.862200259235256, "grad_norm": 0.03424142301082611, "learning_rate": 0.00017432377694765743, "loss": 0.3384, "step": 10643 }, { "epoch": 0.8622812702527544, "grad_norm": 0.02966010756790638, "learning_rate": 0.0001743192762950628, "loss": 0.3191, "step": 10644 }, { "epoch": 0.8623622812702527, "grad_norm": 0.035714924335479736, "learning_rate": 0.00017431477564246815, "loss": 0.3296, "step": 10645 }, { "epoch": 0.8624432922877512, "grad_norm": 0.033053092658519745, "learning_rate": 0.00017431027498987354, "loss": 0.3273, "step": 10646 }, { "epoch": 0.8625243033052495, "grad_norm": 0.030531764030456543, "learning_rate": 0.0001743057743372789, "loss": 0.3126, "step": 10647 }, { "epoch": 0.8626053143227479, "grad_norm": 0.03648407757282257, "learning_rate": 0.0001743012736846843, "loss": 0.3023, "step": 10648 }, { "epoch": 0.8626863253402463, "grad_norm": 0.030021555721759796, "learning_rate": 0.00017429677303208967, "loss": 0.2931, "step": 10649 }, { "epoch": 0.8627673363577446, "grad_norm": 0.03257157281041145, "learning_rate": 0.00017429227237949503, "loss": 0.313, "step": 10650 }, { "epoch": 0.8628483473752431, "grad_norm": 0.03287028893828392, "learning_rate": 0.0001742877717269004, "loss": 0.3271, "step": 10651 }, { "epoch": 0.8629293583927414, "grad_norm": 0.0382518544793129, "learning_rate": 0.00017428327107430578, "loss": 0.2954, "step": 10652 }, { "epoch": 0.8630103694102398, "grad_norm": 0.03195209801197052, "learning_rate": 0.00017427877042171114, "loss": 0.3085, "step": 10653 }, { "epoch": 0.8630913804277381, "grad_norm": 0.028789833188056946, "learning_rate": 0.00017427426976911653, "loss": 0.3043, "step": 10654 }, { "epoch": 0.8631723914452365, "grad_norm": 0.03181454911828041, "learning_rate": 0.00017426976911652192, "loss": 0.2868, "step": 10655 }, { "epoch": 0.863253402462735, "grad_norm": 0.03049142099916935, "learning_rate": 0.00017426526846392728, "loss": 0.3284, "step": 10656 }, { "epoch": 0.8633344134802333, "grad_norm": 0.03163653239607811, "learning_rate": 0.00017426076781133264, "loss": 0.2851, "step": 10657 }, { "epoch": 0.8634154244977317, "grad_norm": 0.032725002616643906, "learning_rate": 0.00017425626715873802, "loss": 0.3044, "step": 10658 }, { "epoch": 0.86349643551523, "grad_norm": 0.03800255060195923, "learning_rate": 0.0001742517665061434, "loss": 0.352, "step": 10659 }, { "epoch": 0.8635774465327285, "grad_norm": 0.043231409043073654, "learning_rate": 0.00017424726585354877, "loss": 0.3637, "step": 10660 }, { "epoch": 0.8636584575502269, "grad_norm": 0.037208881229162216, "learning_rate": 0.00017424276520095416, "loss": 0.3086, "step": 10661 }, { "epoch": 0.8637394685677252, "grad_norm": 0.03458251804113388, "learning_rate": 0.00017423826454835952, "loss": 0.2955, "step": 10662 }, { "epoch": 0.8638204795852236, "grad_norm": 0.03293118625879288, "learning_rate": 0.00017423376389576488, "loss": 0.304, "step": 10663 }, { "epoch": 0.8639014906027219, "grad_norm": 0.034051090478897095, "learning_rate": 0.00017422926324317027, "loss": 0.3041, "step": 10664 }, { "epoch": 0.8639825016202204, "grad_norm": 0.03171685338020325, "learning_rate": 0.00017422476259057565, "loss": 0.3146, "step": 10665 }, { "epoch": 0.8640635126377187, "grad_norm": 0.035040758550167084, "learning_rate": 0.000174220261937981, "loss": 0.3429, "step": 10666 }, { "epoch": 0.8641445236552171, "grad_norm": 0.029530083760619164, "learning_rate": 0.0001742157612853864, "loss": 0.3277, "step": 10667 }, { "epoch": 0.8642255346727155, "grad_norm": 0.036100320518016815, "learning_rate": 0.00017421126063279176, "loss": 0.3547, "step": 10668 }, { "epoch": 0.8643065456902139, "grad_norm": 0.033129528164863586, "learning_rate": 0.00017420675998019712, "loss": 0.3367, "step": 10669 }, { "epoch": 0.8643875567077123, "grad_norm": 0.03324815630912781, "learning_rate": 0.0001742022593276025, "loss": 0.3198, "step": 10670 }, { "epoch": 0.8644685677252106, "grad_norm": 0.03402934595942497, "learning_rate": 0.0001741977586750079, "loss": 0.3392, "step": 10671 }, { "epoch": 0.864549578742709, "grad_norm": 0.03805414214730263, "learning_rate": 0.00017419325802241325, "loss": 0.3836, "step": 10672 }, { "epoch": 0.8646305897602073, "grad_norm": 0.03230415657162666, "learning_rate": 0.00017418875736981864, "loss": 0.3108, "step": 10673 }, { "epoch": 0.8647116007777058, "grad_norm": 0.03507767990231514, "learning_rate": 0.000174184256717224, "loss": 0.2988, "step": 10674 }, { "epoch": 0.8647926117952042, "grad_norm": 0.03984934464097023, "learning_rate": 0.00017417975606462936, "loss": 0.344, "step": 10675 }, { "epoch": 0.8648736228127025, "grad_norm": 0.041479699313640594, "learning_rate": 0.00017417525541203475, "loss": 0.3316, "step": 10676 }, { "epoch": 0.8649546338302009, "grad_norm": 0.0320621095597744, "learning_rate": 0.00017417075475944014, "loss": 0.3287, "step": 10677 }, { "epoch": 0.8650356448476992, "grad_norm": 0.0352802574634552, "learning_rate": 0.0001741662541068455, "loss": 0.3131, "step": 10678 }, { "epoch": 0.8651166558651977, "grad_norm": 0.0304051972925663, "learning_rate": 0.00017416175345425088, "loss": 0.2471, "step": 10679 }, { "epoch": 0.8651976668826961, "grad_norm": 0.036712054163217545, "learning_rate": 0.00017415725280165624, "loss": 0.38, "step": 10680 }, { "epoch": 0.8652786779001944, "grad_norm": 0.03605759143829346, "learning_rate": 0.0001741527521490616, "loss": 0.3202, "step": 10681 }, { "epoch": 0.8653596889176928, "grad_norm": 0.032023441046476364, "learning_rate": 0.000174148251496467, "loss": 0.3106, "step": 10682 }, { "epoch": 0.8654406999351912, "grad_norm": 0.03299464285373688, "learning_rate": 0.00017414375084387238, "loss": 0.2842, "step": 10683 }, { "epoch": 0.8655217109526896, "grad_norm": 0.036962252110242844, "learning_rate": 0.00017413925019127774, "loss": 0.3104, "step": 10684 }, { "epoch": 0.865602721970188, "grad_norm": 0.0329434871673584, "learning_rate": 0.00017413474953868312, "loss": 0.3073, "step": 10685 }, { "epoch": 0.8656837329876863, "grad_norm": 0.050255440175533295, "learning_rate": 0.00017413024888608848, "loss": 0.3358, "step": 10686 }, { "epoch": 0.8657647440051847, "grad_norm": 0.029616905376315117, "learning_rate": 0.00017412574823349384, "loss": 0.306, "step": 10687 }, { "epoch": 0.8658457550226831, "grad_norm": 0.029950309544801712, "learning_rate": 0.00017412124758089926, "loss": 0.3012, "step": 10688 }, { "epoch": 0.8659267660401815, "grad_norm": 0.03012317605316639, "learning_rate": 0.00017411674692830462, "loss": 0.2814, "step": 10689 }, { "epoch": 0.8660077770576798, "grad_norm": 0.03209361433982849, "learning_rate": 0.00017411224627570998, "loss": 0.3218, "step": 10690 }, { "epoch": 0.8660887880751782, "grad_norm": 0.03671686351299286, "learning_rate": 0.00017410774562311537, "loss": 0.3721, "step": 10691 }, { "epoch": 0.8661697990926766, "grad_norm": 0.029738424345850945, "learning_rate": 0.00017410324497052073, "loss": 0.2891, "step": 10692 }, { "epoch": 0.866250810110175, "grad_norm": 0.039622507989406586, "learning_rate": 0.00017409874431792609, "loss": 0.323, "step": 10693 }, { "epoch": 0.8663318211276734, "grad_norm": 0.03439989686012268, "learning_rate": 0.0001740942436653315, "loss": 0.3709, "step": 10694 }, { "epoch": 0.8664128321451717, "grad_norm": 0.03073376975953579, "learning_rate": 0.00017408974301273686, "loss": 0.3305, "step": 10695 }, { "epoch": 0.8664938431626701, "grad_norm": 0.031084010377526283, "learning_rate": 0.00017408524236014222, "loss": 0.3121, "step": 10696 }, { "epoch": 0.8665748541801686, "grad_norm": 0.032315392047166824, "learning_rate": 0.0001740807417075476, "loss": 0.327, "step": 10697 }, { "epoch": 0.8666558651976669, "grad_norm": 0.029009966179728508, "learning_rate": 0.00017407624105495297, "loss": 0.3286, "step": 10698 }, { "epoch": 0.8667368762151653, "grad_norm": 0.03397389501333237, "learning_rate": 0.00017407174040235833, "loss": 0.3512, "step": 10699 }, { "epoch": 0.8668178872326636, "grad_norm": 0.03850146010518074, "learning_rate": 0.00017406723974976374, "loss": 0.3224, "step": 10700 }, { "epoch": 0.866898898250162, "grad_norm": 0.031645797193050385, "learning_rate": 0.0001740627390971691, "loss": 0.3326, "step": 10701 }, { "epoch": 0.8669799092676604, "grad_norm": 0.030226711183786392, "learning_rate": 0.00017405823844457446, "loss": 0.3079, "step": 10702 }, { "epoch": 0.8670609202851588, "grad_norm": 0.03322417661547661, "learning_rate": 0.00017405373779197985, "loss": 0.3504, "step": 10703 }, { "epoch": 0.8671419313026572, "grad_norm": 0.03156125918030739, "learning_rate": 0.0001740492371393852, "loss": 0.3623, "step": 10704 }, { "epoch": 0.8672229423201555, "grad_norm": 0.03021164797246456, "learning_rate": 0.00017404473648679057, "loss": 0.3162, "step": 10705 }, { "epoch": 0.8673039533376539, "grad_norm": 0.028827734291553497, "learning_rate": 0.00017404023583419598, "loss": 0.2772, "step": 10706 }, { "epoch": 0.8673849643551523, "grad_norm": 0.03488578647375107, "learning_rate": 0.00017403573518160134, "loss": 0.3003, "step": 10707 }, { "epoch": 0.8674659753726507, "grad_norm": 0.03198534995317459, "learning_rate": 0.0001740312345290067, "loss": 0.3357, "step": 10708 }, { "epoch": 0.867546986390149, "grad_norm": 0.034626707434654236, "learning_rate": 0.0001740267338764121, "loss": 0.3404, "step": 10709 }, { "epoch": 0.8676279974076474, "grad_norm": 0.033544886857271194, "learning_rate": 0.00017402223322381745, "loss": 0.3644, "step": 10710 }, { "epoch": 0.8677090084251459, "grad_norm": 0.03247756510972977, "learning_rate": 0.00017401773257122284, "loss": 0.2922, "step": 10711 }, { "epoch": 0.8677900194426442, "grad_norm": 0.030314233154058456, "learning_rate": 0.00017401323191862823, "loss": 0.2931, "step": 10712 }, { "epoch": 0.8678710304601426, "grad_norm": 0.032003603875637054, "learning_rate": 0.00017400873126603359, "loss": 0.272, "step": 10713 }, { "epoch": 0.8679520414776409, "grad_norm": 0.034450311213731766, "learning_rate": 0.00017400423061343895, "loss": 0.3421, "step": 10714 }, { "epoch": 0.8680330524951393, "grad_norm": 0.029790136963129044, "learning_rate": 0.00017399972996084433, "loss": 0.2894, "step": 10715 }, { "epoch": 0.8681140635126378, "grad_norm": 0.03776300325989723, "learning_rate": 0.0001739952293082497, "loss": 0.3921, "step": 10716 }, { "epoch": 0.8681950745301361, "grad_norm": 0.03808490186929703, "learning_rate": 0.00017399072865565508, "loss": 0.3456, "step": 10717 }, { "epoch": 0.8682760855476345, "grad_norm": 0.03374411538243294, "learning_rate": 0.00017398622800306047, "loss": 0.3152, "step": 10718 }, { "epoch": 0.8683570965651328, "grad_norm": 0.04357757046818733, "learning_rate": 0.00017398172735046583, "loss": 0.3132, "step": 10719 }, { "epoch": 0.8684381075826313, "grad_norm": 0.032372117042541504, "learning_rate": 0.0001739772266978712, "loss": 0.3417, "step": 10720 }, { "epoch": 0.8685191186001296, "grad_norm": 0.033516012132167816, "learning_rate": 0.00017397272604527657, "loss": 0.3177, "step": 10721 }, { "epoch": 0.868600129617628, "grad_norm": 0.03316143900156021, "learning_rate": 0.00017396822539268193, "loss": 0.3479, "step": 10722 }, { "epoch": 0.8686811406351264, "grad_norm": 0.03665821626782417, "learning_rate": 0.00017396372474008732, "loss": 0.41, "step": 10723 }, { "epoch": 0.8687621516526247, "grad_norm": 0.03130485117435455, "learning_rate": 0.0001739592240874927, "loss": 0.3165, "step": 10724 }, { "epoch": 0.8688431626701232, "grad_norm": 0.040841370820999146, "learning_rate": 0.00017395472343489807, "loss": 0.363, "step": 10725 }, { "epoch": 0.8689241736876215, "grad_norm": 0.03354490175843239, "learning_rate": 0.00017395022278230343, "loss": 0.3516, "step": 10726 }, { "epoch": 0.8690051847051199, "grad_norm": 0.03047320991754532, "learning_rate": 0.00017394572212970882, "loss": 0.2943, "step": 10727 }, { "epoch": 0.8690861957226182, "grad_norm": 0.03401746600866318, "learning_rate": 0.00017394122147711418, "loss": 0.3112, "step": 10728 }, { "epoch": 0.8691672067401166, "grad_norm": 0.03398209437727928, "learning_rate": 0.00017393672082451956, "loss": 0.3462, "step": 10729 }, { "epoch": 0.8692482177576151, "grad_norm": 0.03289905935525894, "learning_rate": 0.00017393222017192495, "loss": 0.3224, "step": 10730 }, { "epoch": 0.8693292287751134, "grad_norm": 0.03407776728272438, "learning_rate": 0.0001739277195193303, "loss": 0.3419, "step": 10731 }, { "epoch": 0.8694102397926118, "grad_norm": 0.035825107246637344, "learning_rate": 0.00017392321886673567, "loss": 0.3768, "step": 10732 }, { "epoch": 0.8694912508101101, "grad_norm": 0.03510098159313202, "learning_rate": 0.00017391871821414106, "loss": 0.3312, "step": 10733 }, { "epoch": 0.8695722618276086, "grad_norm": 0.03410264849662781, "learning_rate": 0.00017391421756154642, "loss": 0.3689, "step": 10734 }, { "epoch": 0.869653272845107, "grad_norm": 0.03720330819487572, "learning_rate": 0.0001739097169089518, "loss": 0.3598, "step": 10735 }, { "epoch": 0.8697342838626053, "grad_norm": 0.03458027541637421, "learning_rate": 0.0001739052162563572, "loss": 0.3532, "step": 10736 }, { "epoch": 0.8698152948801037, "grad_norm": 0.03295762836933136, "learning_rate": 0.00017390071560376255, "loss": 0.3231, "step": 10737 }, { "epoch": 0.869896305897602, "grad_norm": 0.033154476433992386, "learning_rate": 0.0001738962149511679, "loss": 0.3661, "step": 10738 }, { "epoch": 0.8699773169151005, "grad_norm": 0.03453659638762474, "learning_rate": 0.0001738917142985733, "loss": 0.3342, "step": 10739 }, { "epoch": 0.8700583279325989, "grad_norm": 0.037514906376600266, "learning_rate": 0.00017388721364597869, "loss": 0.3413, "step": 10740 }, { "epoch": 0.8701393389500972, "grad_norm": 0.03465067967772484, "learning_rate": 0.00017388271299338405, "loss": 0.3946, "step": 10741 }, { "epoch": 0.8702203499675956, "grad_norm": 0.030446641147136688, "learning_rate": 0.00017387821234078943, "loss": 0.3085, "step": 10742 }, { "epoch": 0.8703013609850939, "grad_norm": 0.03456110879778862, "learning_rate": 0.0001738737116881948, "loss": 0.3391, "step": 10743 }, { "epoch": 0.8703823720025924, "grad_norm": 0.03867906332015991, "learning_rate": 0.00017386921103560015, "loss": 0.346, "step": 10744 }, { "epoch": 0.8704633830200907, "grad_norm": 0.036336660385131836, "learning_rate": 0.00017386471038300554, "loss": 0.3707, "step": 10745 }, { "epoch": 0.8705443940375891, "grad_norm": 0.03098193369805813, "learning_rate": 0.00017386020973041093, "loss": 0.3021, "step": 10746 }, { "epoch": 0.8706254050550875, "grad_norm": 0.03248054161667824, "learning_rate": 0.0001738557090778163, "loss": 0.3275, "step": 10747 }, { "epoch": 0.8707064160725859, "grad_norm": 0.034962303936481476, "learning_rate": 0.00017385120842522167, "loss": 0.3374, "step": 10748 }, { "epoch": 0.8707874270900843, "grad_norm": 0.033861901611089706, "learning_rate": 0.00017384670777262703, "loss": 0.3022, "step": 10749 }, { "epoch": 0.8708684381075826, "grad_norm": 0.03983207419514656, "learning_rate": 0.0001738422071200324, "loss": 0.3946, "step": 10750 }, { "epoch": 0.870949449125081, "grad_norm": 0.032978422939777374, "learning_rate": 0.00017383770646743778, "loss": 0.3489, "step": 10751 }, { "epoch": 0.8710304601425793, "grad_norm": 0.03442319110035896, "learning_rate": 0.00017383320581484317, "loss": 0.3146, "step": 10752 }, { "epoch": 0.8711114711600778, "grad_norm": 0.029209937900304794, "learning_rate": 0.00017382870516224853, "loss": 0.2718, "step": 10753 }, { "epoch": 0.8711924821775762, "grad_norm": 0.03570421040058136, "learning_rate": 0.00017382420450965392, "loss": 0.3383, "step": 10754 }, { "epoch": 0.8712734931950745, "grad_norm": 0.04989160969853401, "learning_rate": 0.00017381970385705928, "loss": 0.3582, "step": 10755 }, { "epoch": 0.8713545042125729, "grad_norm": 0.031460873782634735, "learning_rate": 0.00017381520320446464, "loss": 0.3243, "step": 10756 }, { "epoch": 0.8714355152300713, "grad_norm": 0.03317393362522125, "learning_rate": 0.00017381070255187002, "loss": 0.3197, "step": 10757 }, { "epoch": 0.8715165262475697, "grad_norm": 0.036221351474523544, "learning_rate": 0.0001738062018992754, "loss": 0.3549, "step": 10758 }, { "epoch": 0.8715975372650681, "grad_norm": 0.036588188260793686, "learning_rate": 0.00017380170124668077, "loss": 0.3305, "step": 10759 }, { "epoch": 0.8716785482825664, "grad_norm": 0.038899991661310196, "learning_rate": 0.00017379720059408616, "loss": 0.3229, "step": 10760 }, { "epoch": 0.8717595593000648, "grad_norm": 0.03450969606637955, "learning_rate": 0.00017379269994149152, "loss": 0.3075, "step": 10761 }, { "epoch": 0.8718405703175632, "grad_norm": 0.03251654654741287, "learning_rate": 0.00017378819928889688, "loss": 0.3405, "step": 10762 }, { "epoch": 0.8719215813350616, "grad_norm": 0.033687327057123184, "learning_rate": 0.00017378369863630227, "loss": 0.324, "step": 10763 }, { "epoch": 0.87200259235256, "grad_norm": 0.03482494875788689, "learning_rate": 0.00017377919798370765, "loss": 0.333, "step": 10764 }, { "epoch": 0.8720836033700583, "grad_norm": 0.03824484348297119, "learning_rate": 0.000173774697331113, "loss": 0.3509, "step": 10765 }, { "epoch": 0.8721646143875567, "grad_norm": 0.03036884032189846, "learning_rate": 0.0001737701966785184, "loss": 0.3204, "step": 10766 }, { "epoch": 0.8722456254050551, "grad_norm": 0.03216506168246269, "learning_rate": 0.00017376569602592376, "loss": 0.3263, "step": 10767 }, { "epoch": 0.8723266364225535, "grad_norm": 0.03733062371611595, "learning_rate": 0.00017376119537332912, "loss": 0.3485, "step": 10768 }, { "epoch": 0.8724076474400518, "grad_norm": 0.035098589956760406, "learning_rate": 0.00017375669472073453, "loss": 0.3384, "step": 10769 }, { "epoch": 0.8724886584575502, "grad_norm": 0.032889917492866516, "learning_rate": 0.0001737521940681399, "loss": 0.2968, "step": 10770 }, { "epoch": 0.8725696694750487, "grad_norm": 0.034361012279987335, "learning_rate": 0.00017374769341554525, "loss": 0.3312, "step": 10771 }, { "epoch": 0.872650680492547, "grad_norm": 0.03438268601894379, "learning_rate": 0.00017374319276295064, "loss": 0.3398, "step": 10772 }, { "epoch": 0.8727316915100454, "grad_norm": 0.032490380108356476, "learning_rate": 0.000173738692110356, "loss": 0.3235, "step": 10773 }, { "epoch": 0.8728127025275437, "grad_norm": 0.03424321487545967, "learning_rate": 0.00017373419145776136, "loss": 0.3227, "step": 10774 }, { "epoch": 0.8728937135450421, "grad_norm": 0.03552704304456711, "learning_rate": 0.00017372969080516678, "loss": 0.3065, "step": 10775 }, { "epoch": 0.8729747245625405, "grad_norm": 0.03595755994319916, "learning_rate": 0.00017372519015257214, "loss": 0.3538, "step": 10776 }, { "epoch": 0.8730557355800389, "grad_norm": 0.0303262397646904, "learning_rate": 0.0001737206894999775, "loss": 0.3143, "step": 10777 }, { "epoch": 0.8731367465975373, "grad_norm": 0.02945934422314167, "learning_rate": 0.00017371618884738288, "loss": 0.303, "step": 10778 }, { "epoch": 0.8732177576150356, "grad_norm": 0.03361841291189194, "learning_rate": 0.00017371168819478824, "loss": 0.3539, "step": 10779 }, { "epoch": 0.873298768632534, "grad_norm": 0.02983030490577221, "learning_rate": 0.00017370718754219363, "loss": 0.3095, "step": 10780 }, { "epoch": 0.8733797796500324, "grad_norm": 0.032523952424526215, "learning_rate": 0.00017370268688959902, "loss": 0.3488, "step": 10781 }, { "epoch": 0.8734607906675308, "grad_norm": 0.03318939730525017, "learning_rate": 0.00017369818623700438, "loss": 0.3189, "step": 10782 }, { "epoch": 0.8735418016850292, "grad_norm": 0.03516692668199539, "learning_rate": 0.00017369368558440974, "loss": 0.3287, "step": 10783 }, { "epoch": 0.8736228127025275, "grad_norm": 0.03213590756058693, "learning_rate": 0.00017368918493181512, "loss": 0.2941, "step": 10784 }, { "epoch": 0.873703823720026, "grad_norm": 0.03252600133419037, "learning_rate": 0.00017368468427922048, "loss": 0.3446, "step": 10785 }, { "epoch": 0.8737848347375243, "grad_norm": 0.03503256291151047, "learning_rate": 0.00017368018362662587, "loss": 0.3329, "step": 10786 }, { "epoch": 0.8738658457550227, "grad_norm": 0.033437907695770264, "learning_rate": 0.00017367568297403126, "loss": 0.3219, "step": 10787 }, { "epoch": 0.873946856772521, "grad_norm": 0.03503791615366936, "learning_rate": 0.00017367118232143662, "loss": 0.3627, "step": 10788 }, { "epoch": 0.8740278677900194, "grad_norm": 0.037898220121860504, "learning_rate": 0.00017366668166884198, "loss": 0.3716, "step": 10789 }, { "epoch": 0.8741088788075179, "grad_norm": 0.0397721491754055, "learning_rate": 0.00017366218101624737, "loss": 0.361, "step": 10790 }, { "epoch": 0.8741898898250162, "grad_norm": 0.037444885820150375, "learning_rate": 0.00017365768036365273, "loss": 0.3577, "step": 10791 }, { "epoch": 0.8742709008425146, "grad_norm": 0.036365706473588943, "learning_rate": 0.0001736531797110581, "loss": 0.364, "step": 10792 }, { "epoch": 0.8743519118600129, "grad_norm": 0.03337782248854637, "learning_rate": 0.0001736486790584635, "loss": 0.3322, "step": 10793 }, { "epoch": 0.8744329228775113, "grad_norm": 0.037269555032253265, "learning_rate": 0.00017364417840586886, "loss": 0.3359, "step": 10794 }, { "epoch": 0.8745139338950098, "grad_norm": 0.03035913221538067, "learning_rate": 0.00017363967775327422, "loss": 0.3126, "step": 10795 }, { "epoch": 0.8745949449125081, "grad_norm": 0.028627660125494003, "learning_rate": 0.0001736351771006796, "loss": 0.2996, "step": 10796 }, { "epoch": 0.8746759559300065, "grad_norm": 0.03484364226460457, "learning_rate": 0.00017363067644808497, "loss": 0.3422, "step": 10797 }, { "epoch": 0.8747569669475048, "grad_norm": 0.03468231111764908, "learning_rate": 0.00017362617579549036, "loss": 0.3615, "step": 10798 }, { "epoch": 0.8748379779650033, "grad_norm": 0.03218459337949753, "learning_rate": 0.00017362167514289574, "loss": 0.3283, "step": 10799 }, { "epoch": 0.8749189889825016, "grad_norm": 0.029084214940667152, "learning_rate": 0.0001736171744903011, "loss": 0.3068, "step": 10800 }, { "epoch": 0.875, "grad_norm": 0.03318289667367935, "learning_rate": 0.00017361267383770646, "loss": 0.3468, "step": 10801 }, { "epoch": 0.8750810110174984, "grad_norm": 0.032937172800302505, "learning_rate": 0.00017360817318511185, "loss": 0.3316, "step": 10802 }, { "epoch": 0.8751620220349967, "grad_norm": 0.02804538980126381, "learning_rate": 0.0001736036725325172, "loss": 0.2894, "step": 10803 }, { "epoch": 0.8752430330524952, "grad_norm": 0.03209392726421356, "learning_rate": 0.0001735991718799226, "loss": 0.318, "step": 10804 }, { "epoch": 0.8753240440699935, "grad_norm": 0.0320693776011467, "learning_rate": 0.00017359467122732798, "loss": 0.3094, "step": 10805 }, { "epoch": 0.8754050550874919, "grad_norm": 0.030849037691950798, "learning_rate": 0.00017359017057473334, "loss": 0.3057, "step": 10806 }, { "epoch": 0.8754860661049902, "grad_norm": 0.032688066363334656, "learning_rate": 0.0001735856699221387, "loss": 0.316, "step": 10807 }, { "epoch": 0.8755670771224887, "grad_norm": 0.03459910303354263, "learning_rate": 0.0001735811692695441, "loss": 0.3298, "step": 10808 }, { "epoch": 0.8756480881399871, "grad_norm": 0.02897426299750805, "learning_rate": 0.00017357666861694945, "loss": 0.2906, "step": 10809 }, { "epoch": 0.8757290991574854, "grad_norm": 0.03555037081241608, "learning_rate": 0.00017357216796435484, "loss": 0.3439, "step": 10810 }, { "epoch": 0.8758101101749838, "grad_norm": 0.037332337349653244, "learning_rate": 0.00017356766731176023, "loss": 0.345, "step": 10811 }, { "epoch": 0.8758911211924821, "grad_norm": 0.037951596081256866, "learning_rate": 0.00017356316665916559, "loss": 0.3423, "step": 10812 }, { "epoch": 0.8759721322099806, "grad_norm": 0.030132196843624115, "learning_rate": 0.00017355866600657095, "loss": 0.2823, "step": 10813 }, { "epoch": 0.876053143227479, "grad_norm": 0.041832491755485535, "learning_rate": 0.00017355416535397633, "loss": 0.3397, "step": 10814 }, { "epoch": 0.8761341542449773, "grad_norm": 0.03547552973031998, "learning_rate": 0.0001735496647013817, "loss": 0.3229, "step": 10815 }, { "epoch": 0.8762151652624757, "grad_norm": 0.031897854059934616, "learning_rate": 0.00017354516404878708, "loss": 0.2958, "step": 10816 }, { "epoch": 0.876296176279974, "grad_norm": 0.03446755185723305, "learning_rate": 0.00017354066339619247, "loss": 0.3402, "step": 10817 }, { "epoch": 0.8763771872974725, "grad_norm": 0.03411189094185829, "learning_rate": 0.00017353616274359783, "loss": 0.393, "step": 10818 }, { "epoch": 0.8764581983149708, "grad_norm": 0.03914996236562729, "learning_rate": 0.0001735316620910032, "loss": 0.3346, "step": 10819 }, { "epoch": 0.8765392093324692, "grad_norm": 0.03950195014476776, "learning_rate": 0.00017352716143840857, "loss": 0.3642, "step": 10820 }, { "epoch": 0.8766202203499676, "grad_norm": 0.0327347032725811, "learning_rate": 0.00017352266078581396, "loss": 0.3027, "step": 10821 }, { "epoch": 0.876701231367466, "grad_norm": 0.040154580026865005, "learning_rate": 0.00017351816013321932, "loss": 0.3457, "step": 10822 }, { "epoch": 0.8767822423849644, "grad_norm": 0.03418951481580734, "learning_rate": 0.0001735136594806247, "loss": 0.314, "step": 10823 }, { "epoch": 0.8768632534024627, "grad_norm": 0.03217468410730362, "learning_rate": 0.00017350915882803007, "loss": 0.3392, "step": 10824 }, { "epoch": 0.8769442644199611, "grad_norm": 0.034714192152023315, "learning_rate": 0.00017350465817543543, "loss": 0.3374, "step": 10825 }, { "epoch": 0.8770252754374595, "grad_norm": 0.03474406152963638, "learning_rate": 0.00017350015752284082, "loss": 0.3819, "step": 10826 }, { "epoch": 0.8771062864549579, "grad_norm": 0.03752182424068451, "learning_rate": 0.0001734956568702462, "loss": 0.3416, "step": 10827 }, { "epoch": 0.8771872974724563, "grad_norm": 0.031079022213816643, "learning_rate": 0.00017349115621765156, "loss": 0.3083, "step": 10828 }, { "epoch": 0.8772683084899546, "grad_norm": 0.038465503603219986, "learning_rate": 0.00017348665556505695, "loss": 0.3686, "step": 10829 }, { "epoch": 0.877349319507453, "grad_norm": 0.03562534973025322, "learning_rate": 0.0001734821549124623, "loss": 0.3569, "step": 10830 }, { "epoch": 0.8774303305249513, "grad_norm": 0.030780943110585213, "learning_rate": 0.00017347765425986767, "loss": 0.3092, "step": 10831 }, { "epoch": 0.8775113415424498, "grad_norm": 0.03834667429327965, "learning_rate": 0.00017347315360727306, "loss": 0.3336, "step": 10832 }, { "epoch": 0.8775923525599482, "grad_norm": 0.032464317977428436, "learning_rate": 0.00017346865295467844, "loss": 0.3284, "step": 10833 }, { "epoch": 0.8776733635774465, "grad_norm": 0.033732820302248, "learning_rate": 0.0001734641523020838, "loss": 0.3481, "step": 10834 }, { "epoch": 0.8777543745949449, "grad_norm": 0.0332949161529541, "learning_rate": 0.0001734596516494892, "loss": 0.3244, "step": 10835 }, { "epoch": 0.8778353856124433, "grad_norm": 0.030543554574251175, "learning_rate": 0.00017345515099689455, "loss": 0.3044, "step": 10836 }, { "epoch": 0.8779163966299417, "grad_norm": 0.030538873746991158, "learning_rate": 0.0001734506503442999, "loss": 0.2957, "step": 10837 }, { "epoch": 0.87799740764744, "grad_norm": 0.03430887684226036, "learning_rate": 0.0001734461496917053, "loss": 0.3731, "step": 10838 }, { "epoch": 0.8780784186649384, "grad_norm": 0.02899688109755516, "learning_rate": 0.00017344164903911069, "loss": 0.3316, "step": 10839 }, { "epoch": 0.8781594296824368, "grad_norm": 0.035096172243356705, "learning_rate": 0.00017343714838651605, "loss": 0.3175, "step": 10840 }, { "epoch": 0.8782404406999352, "grad_norm": 0.03392845019698143, "learning_rate": 0.00017343264773392143, "loss": 0.3243, "step": 10841 }, { "epoch": 0.8783214517174336, "grad_norm": 0.04853187873959541, "learning_rate": 0.0001734281470813268, "loss": 0.3504, "step": 10842 }, { "epoch": 0.8784024627349319, "grad_norm": 0.02897292748093605, "learning_rate": 0.00017342364642873218, "loss": 0.307, "step": 10843 }, { "epoch": 0.8784834737524303, "grad_norm": 0.035371195524930954, "learning_rate": 0.00017341914577613757, "loss": 0.3441, "step": 10844 }, { "epoch": 0.8785644847699287, "grad_norm": 0.03544081747531891, "learning_rate": 0.00017341464512354293, "loss": 0.3592, "step": 10845 }, { "epoch": 0.8786454957874271, "grad_norm": 0.029531244188547134, "learning_rate": 0.0001734101444709483, "loss": 0.2998, "step": 10846 }, { "epoch": 0.8787265068049255, "grad_norm": 0.034727614372968674, "learning_rate": 0.00017340564381835368, "loss": 0.3093, "step": 10847 }, { "epoch": 0.8788075178224238, "grad_norm": 0.029829610139131546, "learning_rate": 0.00017340114316575904, "loss": 0.3056, "step": 10848 }, { "epoch": 0.8788885288399222, "grad_norm": 0.026881849393248558, "learning_rate": 0.00017339664251316442, "loss": 0.2613, "step": 10849 }, { "epoch": 0.8789695398574207, "grad_norm": 0.03386076167225838, "learning_rate": 0.0001733921418605698, "loss": 0.3473, "step": 10850 }, { "epoch": 0.879050550874919, "grad_norm": 0.045226048678159714, "learning_rate": 0.00017338764120797517, "loss": 0.3694, "step": 10851 }, { "epoch": 0.8791315618924174, "grad_norm": 0.03687673062086105, "learning_rate": 0.00017338314055538053, "loss": 0.3564, "step": 10852 }, { "epoch": 0.8792125729099157, "grad_norm": 0.03726038336753845, "learning_rate": 0.00017337863990278592, "loss": 0.3299, "step": 10853 }, { "epoch": 0.8792935839274141, "grad_norm": 0.034257080405950546, "learning_rate": 0.00017337413925019128, "loss": 0.3573, "step": 10854 }, { "epoch": 0.8793745949449125, "grad_norm": 0.0355970673263073, "learning_rate": 0.00017336963859759666, "loss": 0.3459, "step": 10855 }, { "epoch": 0.8794556059624109, "grad_norm": 0.03855161368846893, "learning_rate": 0.00017336513794500205, "loss": 0.3271, "step": 10856 }, { "epoch": 0.8795366169799093, "grad_norm": 0.033906761556863785, "learning_rate": 0.0001733606372924074, "loss": 0.3253, "step": 10857 }, { "epoch": 0.8796176279974076, "grad_norm": 0.04081016033887863, "learning_rate": 0.00017335613663981277, "loss": 0.3626, "step": 10858 }, { "epoch": 0.8796986390149061, "grad_norm": 0.03275776654481888, "learning_rate": 0.00017335163598721816, "loss": 0.3439, "step": 10859 }, { "epoch": 0.8797796500324044, "grad_norm": 0.037287477403879166, "learning_rate": 0.00017334713533462352, "loss": 0.3505, "step": 10860 }, { "epoch": 0.8798606610499028, "grad_norm": 0.03666910529136658, "learning_rate": 0.0001733426346820289, "loss": 0.3528, "step": 10861 }, { "epoch": 0.8799416720674011, "grad_norm": 0.03178151696920395, "learning_rate": 0.0001733381340294343, "loss": 0.3151, "step": 10862 }, { "epoch": 0.8800226830848995, "grad_norm": 0.03164476528763771, "learning_rate": 0.00017333363337683965, "loss": 0.3157, "step": 10863 }, { "epoch": 0.880103694102398, "grad_norm": 0.03677202761173248, "learning_rate": 0.000173329132724245, "loss": 0.3508, "step": 10864 }, { "epoch": 0.8801847051198963, "grad_norm": 0.03549559414386749, "learning_rate": 0.0001733246320716504, "loss": 0.325, "step": 10865 }, { "epoch": 0.8802657161373947, "grad_norm": 0.03302009403705597, "learning_rate": 0.00017332013141905576, "loss": 0.3393, "step": 10866 }, { "epoch": 0.880346727154893, "grad_norm": 0.03544938564300537, "learning_rate": 0.00017331563076646115, "loss": 0.3309, "step": 10867 }, { "epoch": 0.8804277381723914, "grad_norm": 0.03859511390328407, "learning_rate": 0.00017331113011386653, "loss": 0.3496, "step": 10868 }, { "epoch": 0.8805087491898899, "grad_norm": 0.03511954843997955, "learning_rate": 0.0001733066294612719, "loss": 0.3354, "step": 10869 }, { "epoch": 0.8805897602073882, "grad_norm": 0.03728478029370308, "learning_rate": 0.00017330212880867725, "loss": 0.344, "step": 10870 }, { "epoch": 0.8806707712248866, "grad_norm": 0.03648219630122185, "learning_rate": 0.00017329762815608264, "loss": 0.3614, "step": 10871 }, { "epoch": 0.8807517822423849, "grad_norm": 0.04043553024530411, "learning_rate": 0.000173293127503488, "loss": 0.3726, "step": 10872 }, { "epoch": 0.8808327932598834, "grad_norm": 0.037512943148612976, "learning_rate": 0.0001732886268508934, "loss": 0.3654, "step": 10873 }, { "epoch": 0.8809138042773818, "grad_norm": 0.03332279995083809, "learning_rate": 0.00017328412619829878, "loss": 0.3645, "step": 10874 }, { "epoch": 0.8809948152948801, "grad_norm": 0.03482624888420105, "learning_rate": 0.00017327962554570414, "loss": 0.3478, "step": 10875 }, { "epoch": 0.8810758263123785, "grad_norm": 0.031530581414699554, "learning_rate": 0.0001732751248931095, "loss": 0.3207, "step": 10876 }, { "epoch": 0.8811568373298768, "grad_norm": 0.03738391399383545, "learning_rate": 0.00017327062424051488, "loss": 0.3668, "step": 10877 }, { "epoch": 0.8812378483473753, "grad_norm": 0.03019733913242817, "learning_rate": 0.00017326612358792024, "loss": 0.3161, "step": 10878 }, { "epoch": 0.8813188593648736, "grad_norm": 0.0378187894821167, "learning_rate": 0.00017326162293532563, "loss": 0.3082, "step": 10879 }, { "epoch": 0.881399870382372, "grad_norm": 0.03067973628640175, "learning_rate": 0.00017325712228273102, "loss": 0.3149, "step": 10880 }, { "epoch": 0.8814808813998704, "grad_norm": 0.03693872317671776, "learning_rate": 0.00017325262163013638, "loss": 0.3524, "step": 10881 }, { "epoch": 0.8815618924173687, "grad_norm": 0.036563705652952194, "learning_rate": 0.00017324812097754174, "loss": 0.3549, "step": 10882 }, { "epoch": 0.8816429034348672, "grad_norm": 0.031392719596624374, "learning_rate": 0.00017324362032494712, "loss": 0.2875, "step": 10883 }, { "epoch": 0.8817239144523655, "grad_norm": 0.035123202949762344, "learning_rate": 0.00017323911967235248, "loss": 0.3157, "step": 10884 }, { "epoch": 0.8818049254698639, "grad_norm": 0.0297453124076128, "learning_rate": 0.00017323461901975787, "loss": 0.3111, "step": 10885 }, { "epoch": 0.8818859364873622, "grad_norm": 0.030868038535118103, "learning_rate": 0.00017323011836716326, "loss": 0.3521, "step": 10886 }, { "epoch": 0.8819669475048607, "grad_norm": 0.03564748540520668, "learning_rate": 0.00017322561771456862, "loss": 0.3233, "step": 10887 }, { "epoch": 0.8820479585223591, "grad_norm": 0.03591104969382286, "learning_rate": 0.00017322111706197398, "loss": 0.3787, "step": 10888 }, { "epoch": 0.8821289695398574, "grad_norm": 0.03601299971342087, "learning_rate": 0.00017321661640937937, "loss": 0.2917, "step": 10889 }, { "epoch": 0.8822099805573558, "grad_norm": 0.03527918457984924, "learning_rate": 0.00017321211575678473, "loss": 0.3529, "step": 10890 }, { "epoch": 0.8822909915748541, "grad_norm": 0.029023386538028717, "learning_rate": 0.00017320761510419011, "loss": 0.2677, "step": 10891 }, { "epoch": 0.8823720025923526, "grad_norm": 0.039927802979946136, "learning_rate": 0.0001732031144515955, "loss": 0.3724, "step": 10892 }, { "epoch": 0.882453013609851, "grad_norm": 0.03533445671200752, "learning_rate": 0.00017319861379900086, "loss": 0.3346, "step": 10893 }, { "epoch": 0.8825340246273493, "grad_norm": 0.033590059727430344, "learning_rate": 0.00017319411314640622, "loss": 0.2974, "step": 10894 }, { "epoch": 0.8826150356448477, "grad_norm": 0.031166430562734604, "learning_rate": 0.0001731896124938116, "loss": 0.3015, "step": 10895 }, { "epoch": 0.8826960466623461, "grad_norm": 0.032051268965005875, "learning_rate": 0.00017318511184121697, "loss": 0.3282, "step": 10896 }, { "epoch": 0.8827770576798445, "grad_norm": 0.03437373787164688, "learning_rate": 0.00017318061118862236, "loss": 0.3441, "step": 10897 }, { "epoch": 0.8828580686973428, "grad_norm": 0.03485589846968651, "learning_rate": 0.00017317611053602774, "loss": 0.3577, "step": 10898 }, { "epoch": 0.8829390797148412, "grad_norm": 0.033468667417764664, "learning_rate": 0.0001731716098834331, "loss": 0.3135, "step": 10899 }, { "epoch": 0.8830200907323396, "grad_norm": 0.036351680755615234, "learning_rate": 0.00017316710923083846, "loss": 0.3505, "step": 10900 }, { "epoch": 0.883101101749838, "grad_norm": 0.03218887001276016, "learning_rate": 0.00017316260857824385, "loss": 0.3124, "step": 10901 }, { "epoch": 0.8831821127673364, "grad_norm": 0.03348303586244583, "learning_rate": 0.00017315810792564924, "loss": 0.3012, "step": 10902 }, { "epoch": 0.8832631237848347, "grad_norm": 0.029188605025410652, "learning_rate": 0.0001731536072730546, "loss": 0.3096, "step": 10903 }, { "epoch": 0.8833441348023331, "grad_norm": 0.033830948173999786, "learning_rate": 0.00017314910662045998, "loss": 0.3506, "step": 10904 }, { "epoch": 0.8834251458198314, "grad_norm": 0.03579388186335564, "learning_rate": 0.00017314460596786534, "loss": 0.3254, "step": 10905 }, { "epoch": 0.8835061568373299, "grad_norm": 0.03170622140169144, "learning_rate": 0.0001731401053152707, "loss": 0.3513, "step": 10906 }, { "epoch": 0.8835871678548283, "grad_norm": 0.03922513499855995, "learning_rate": 0.0001731356046626761, "loss": 0.3259, "step": 10907 }, { "epoch": 0.8836681788723266, "grad_norm": 0.034124623984098434, "learning_rate": 0.00017313110401008148, "loss": 0.4014, "step": 10908 }, { "epoch": 0.883749189889825, "grad_norm": 0.031138645485043526, "learning_rate": 0.00017312660335748684, "loss": 0.3222, "step": 10909 }, { "epoch": 0.8838302009073234, "grad_norm": 0.03142575919628143, "learning_rate": 0.00017312210270489223, "loss": 0.3274, "step": 10910 }, { "epoch": 0.8839112119248218, "grad_norm": 0.036674655973911285, "learning_rate": 0.00017311760205229759, "loss": 0.3872, "step": 10911 }, { "epoch": 0.8839922229423202, "grad_norm": 0.03667711466550827, "learning_rate": 0.00017311310139970297, "loss": 0.3524, "step": 10912 }, { "epoch": 0.8840732339598185, "grad_norm": 0.03456597402691841, "learning_rate": 0.00017310860074710833, "loss": 0.3658, "step": 10913 }, { "epoch": 0.8841542449773169, "grad_norm": 0.0352916345000267, "learning_rate": 0.00017310410009451372, "loss": 0.3236, "step": 10914 }, { "epoch": 0.8842352559948153, "grad_norm": 0.03116772323846817, "learning_rate": 0.00017309959944191908, "loss": 0.2683, "step": 10915 }, { "epoch": 0.8843162670123137, "grad_norm": 0.0329105444252491, "learning_rate": 0.00017309509878932447, "loss": 0.3473, "step": 10916 }, { "epoch": 0.884397278029812, "grad_norm": 0.0340874008834362, "learning_rate": 0.00017309059813672983, "loss": 0.3575, "step": 10917 }, { "epoch": 0.8844782890473104, "grad_norm": 0.03931410610675812, "learning_rate": 0.00017308609748413521, "loss": 0.3495, "step": 10918 }, { "epoch": 0.8845593000648088, "grad_norm": 0.037920136004686356, "learning_rate": 0.00017308159683154057, "loss": 0.3809, "step": 10919 }, { "epoch": 0.8846403110823072, "grad_norm": 0.037753473967313766, "learning_rate": 0.00017307709617894596, "loss": 0.3371, "step": 10920 }, { "epoch": 0.8847213220998056, "grad_norm": 0.03462715074419975, "learning_rate": 0.00017307259552635132, "loss": 0.3513, "step": 10921 }, { "epoch": 0.8848023331173039, "grad_norm": 0.03926094248890877, "learning_rate": 0.0001730680948737567, "loss": 0.365, "step": 10922 }, { "epoch": 0.8848833441348023, "grad_norm": 0.02999107912182808, "learning_rate": 0.00017306359422116207, "loss": 0.3364, "step": 10923 }, { "epoch": 0.8849643551523008, "grad_norm": 0.02969319559633732, "learning_rate": 0.00017305909356856746, "loss": 0.324, "step": 10924 }, { "epoch": 0.8850453661697991, "grad_norm": 0.03290926665067673, "learning_rate": 0.00017305459291597284, "loss": 0.3514, "step": 10925 }, { "epoch": 0.8851263771872975, "grad_norm": 0.034250661730766296, "learning_rate": 0.0001730500922633782, "loss": 0.3367, "step": 10926 }, { "epoch": 0.8852073882047958, "grad_norm": 0.040132924914360046, "learning_rate": 0.00017304559161078356, "loss": 0.3632, "step": 10927 }, { "epoch": 0.8852883992222942, "grad_norm": 0.036323387175798416, "learning_rate": 0.00017304109095818895, "loss": 0.3199, "step": 10928 }, { "epoch": 0.8853694102397927, "grad_norm": 0.03790266811847687, "learning_rate": 0.0001730365903055943, "loss": 0.3318, "step": 10929 }, { "epoch": 0.885450421257291, "grad_norm": 0.03556707128882408, "learning_rate": 0.0001730320896529997, "loss": 0.3132, "step": 10930 }, { "epoch": 0.8855314322747894, "grad_norm": 0.03412071615457535, "learning_rate": 0.00017302758900040508, "loss": 0.3475, "step": 10931 }, { "epoch": 0.8856124432922877, "grad_norm": 0.03974820300936699, "learning_rate": 0.00017302308834781044, "loss": 0.3767, "step": 10932 }, { "epoch": 0.8856934543097861, "grad_norm": 0.03283865377306938, "learning_rate": 0.0001730185876952158, "loss": 0.2992, "step": 10933 }, { "epoch": 0.8857744653272845, "grad_norm": 0.03139305114746094, "learning_rate": 0.0001730140870426212, "loss": 0.3106, "step": 10934 }, { "epoch": 0.8858554763447829, "grad_norm": 0.03446220979094505, "learning_rate": 0.00017300958639002655, "loss": 0.3128, "step": 10935 }, { "epoch": 0.8859364873622813, "grad_norm": 0.03159172087907791, "learning_rate": 0.00017300508573743194, "loss": 0.2882, "step": 10936 }, { "epoch": 0.8860174983797796, "grad_norm": 0.03451040759682655, "learning_rate": 0.00017300058508483733, "loss": 0.3288, "step": 10937 }, { "epoch": 0.8860985093972781, "grad_norm": 0.029484622180461884, "learning_rate": 0.0001729960844322427, "loss": 0.2764, "step": 10938 }, { "epoch": 0.8861795204147764, "grad_norm": 0.0305886659771204, "learning_rate": 0.00017299158377964805, "loss": 0.331, "step": 10939 }, { "epoch": 0.8862605314322748, "grad_norm": 0.03970607370138168, "learning_rate": 0.00017298708312705343, "loss": 0.3571, "step": 10940 }, { "epoch": 0.8863415424497731, "grad_norm": 0.032614629715681076, "learning_rate": 0.0001729825824744588, "loss": 0.3438, "step": 10941 }, { "epoch": 0.8864225534672715, "grad_norm": 0.03078024461865425, "learning_rate": 0.00017297808182186418, "loss": 0.3229, "step": 10942 }, { "epoch": 0.88650356448477, "grad_norm": 0.03623131290078163, "learning_rate": 0.00017297358116926957, "loss": 0.38, "step": 10943 }, { "epoch": 0.8865845755022683, "grad_norm": 0.03395291045308113, "learning_rate": 0.00017296908051667493, "loss": 0.3349, "step": 10944 }, { "epoch": 0.8866655865197667, "grad_norm": 0.031880639493465424, "learning_rate": 0.0001729645798640803, "loss": 0.2841, "step": 10945 }, { "epoch": 0.886746597537265, "grad_norm": 0.033075135201215744, "learning_rate": 0.00017296007921148568, "loss": 0.31, "step": 10946 }, { "epoch": 0.8868276085547635, "grad_norm": 0.03080565668642521, "learning_rate": 0.00017295557855889104, "loss": 0.2977, "step": 10947 }, { "epoch": 0.8869086195722619, "grad_norm": 0.039215441793203354, "learning_rate": 0.00017295107790629642, "loss": 0.3396, "step": 10948 }, { "epoch": 0.8869896305897602, "grad_norm": 0.03834895044565201, "learning_rate": 0.0001729465772537018, "loss": 0.3419, "step": 10949 }, { "epoch": 0.8870706416072586, "grad_norm": 0.03201017901301384, "learning_rate": 0.00017294207660110717, "loss": 0.3251, "step": 10950 }, { "epoch": 0.8871516526247569, "grad_norm": 0.028811967000365257, "learning_rate": 0.00017293757594851253, "loss": 0.3065, "step": 10951 }, { "epoch": 0.8872326636422554, "grad_norm": 0.03520270437002182, "learning_rate": 0.00017293307529591792, "loss": 0.3207, "step": 10952 }, { "epoch": 0.8873136746597537, "grad_norm": 0.033660054206848145, "learning_rate": 0.00017292857464332328, "loss": 0.322, "step": 10953 }, { "epoch": 0.8873946856772521, "grad_norm": 0.029386943206191063, "learning_rate": 0.00017292407399072866, "loss": 0.2527, "step": 10954 }, { "epoch": 0.8874756966947505, "grad_norm": 0.03373291343450546, "learning_rate": 0.00017291957333813405, "loss": 0.3207, "step": 10955 }, { "epoch": 0.8875567077122488, "grad_norm": 0.03560221195220947, "learning_rate": 0.0001729150726855394, "loss": 0.355, "step": 10956 }, { "epoch": 0.8876377187297473, "grad_norm": 0.030335064977407455, "learning_rate": 0.00017291057203294477, "loss": 0.2609, "step": 10957 }, { "epoch": 0.8877187297472456, "grad_norm": 0.027602190151810646, "learning_rate": 0.00017290607138035016, "loss": 0.2777, "step": 10958 }, { "epoch": 0.887799740764744, "grad_norm": 0.040721770375967026, "learning_rate": 0.00017290157072775552, "loss": 0.3634, "step": 10959 }, { "epoch": 0.8878807517822424, "grad_norm": 0.037371281534433365, "learning_rate": 0.0001728970700751609, "loss": 0.3755, "step": 10960 }, { "epoch": 0.8879617627997408, "grad_norm": 0.03600545600056648, "learning_rate": 0.0001728925694225663, "loss": 0.3431, "step": 10961 }, { "epoch": 0.8880427738172392, "grad_norm": 0.03449264541268349, "learning_rate": 0.00017288806876997165, "loss": 0.3277, "step": 10962 }, { "epoch": 0.8881237848347375, "grad_norm": 0.033802565187215805, "learning_rate": 0.000172883568117377, "loss": 0.3102, "step": 10963 }, { "epoch": 0.8882047958522359, "grad_norm": 0.031701382249593735, "learning_rate": 0.0001728790674647824, "loss": 0.2825, "step": 10964 }, { "epoch": 0.8882858068697342, "grad_norm": 0.03377435356378555, "learning_rate": 0.00017287456681218776, "loss": 0.3627, "step": 10965 }, { "epoch": 0.8883668178872327, "grad_norm": 0.029154321178793907, "learning_rate": 0.00017287006615959315, "loss": 0.2998, "step": 10966 }, { "epoch": 0.8884478289047311, "grad_norm": 0.03917527571320534, "learning_rate": 0.00017286556550699853, "loss": 0.3575, "step": 10967 }, { "epoch": 0.8885288399222294, "grad_norm": 0.03809036687016487, "learning_rate": 0.0001728610648544039, "loss": 0.3551, "step": 10968 }, { "epoch": 0.8886098509397278, "grad_norm": 0.028501780703663826, "learning_rate": 0.00017285656420180925, "loss": 0.2913, "step": 10969 }, { "epoch": 0.8886908619572261, "grad_norm": 0.04084382951259613, "learning_rate": 0.00017285206354921464, "loss": 0.3357, "step": 10970 }, { "epoch": 0.8887718729747246, "grad_norm": 0.032277580350637436, "learning_rate": 0.00017284756289662, "loss": 0.3286, "step": 10971 }, { "epoch": 0.888852883992223, "grad_norm": 0.03367283195257187, "learning_rate": 0.0001728430622440254, "loss": 0.2913, "step": 10972 }, { "epoch": 0.8889338950097213, "grad_norm": 0.035349562764167786, "learning_rate": 0.00017283856159143078, "loss": 0.3036, "step": 10973 }, { "epoch": 0.8890149060272197, "grad_norm": 0.035391971468925476, "learning_rate": 0.00017283406093883614, "loss": 0.3499, "step": 10974 }, { "epoch": 0.8890959170447181, "grad_norm": 0.03234266862273216, "learning_rate": 0.0001728295602862415, "loss": 0.3213, "step": 10975 }, { "epoch": 0.8891769280622165, "grad_norm": 0.032877422869205475, "learning_rate": 0.00017282505963364688, "loss": 0.3347, "step": 10976 }, { "epoch": 0.8892579390797148, "grad_norm": 0.030273448675870895, "learning_rate": 0.00017282055898105227, "loss": 0.3127, "step": 10977 }, { "epoch": 0.8893389500972132, "grad_norm": 0.035193730145692825, "learning_rate": 0.00017281605832845763, "loss": 0.3409, "step": 10978 }, { "epoch": 0.8894199611147116, "grad_norm": 0.030384352430701256, "learning_rate": 0.00017281155767586302, "loss": 0.315, "step": 10979 }, { "epoch": 0.88950097213221, "grad_norm": 0.030477603897452354, "learning_rate": 0.00017280705702326838, "loss": 0.2877, "step": 10980 }, { "epoch": 0.8895819831497084, "grad_norm": 0.03655509278178215, "learning_rate": 0.00017280255637067377, "loss": 0.3396, "step": 10981 }, { "epoch": 0.8896629941672067, "grad_norm": 0.03506140410900116, "learning_rate": 0.00017279805571807913, "loss": 0.3275, "step": 10982 }, { "epoch": 0.8897440051847051, "grad_norm": 0.0328826829791069, "learning_rate": 0.0001727935550654845, "loss": 0.3162, "step": 10983 }, { "epoch": 0.8898250162022034, "grad_norm": 0.03526667132973671, "learning_rate": 0.00017278905441288987, "loss": 0.3562, "step": 10984 }, { "epoch": 0.8899060272197019, "grad_norm": 0.031116103753447533, "learning_rate": 0.00017278455376029526, "loss": 0.3126, "step": 10985 }, { "epoch": 0.8899870382372003, "grad_norm": 0.03344593569636345, "learning_rate": 0.00017278005310770062, "loss": 0.3167, "step": 10986 }, { "epoch": 0.8900680492546986, "grad_norm": 0.0322323776781559, "learning_rate": 0.000172775552455106, "loss": 0.2934, "step": 10987 }, { "epoch": 0.890149060272197, "grad_norm": 0.037336114794015884, "learning_rate": 0.00017277105180251137, "loss": 0.3233, "step": 10988 }, { "epoch": 0.8902300712896954, "grad_norm": 0.042128514498472214, "learning_rate": 0.00017276655114991675, "loss": 0.3382, "step": 10989 }, { "epoch": 0.8903110823071938, "grad_norm": 0.042901746928691864, "learning_rate": 0.00017276205049732211, "loss": 0.3601, "step": 10990 }, { "epoch": 0.8903920933246922, "grad_norm": 0.034772779792547226, "learning_rate": 0.0001727575498447275, "loss": 0.3356, "step": 10991 }, { "epoch": 0.8904731043421905, "grad_norm": 0.04229419305920601, "learning_rate": 0.00017275304919213286, "loss": 0.3381, "step": 10992 }, { "epoch": 0.8905541153596889, "grad_norm": 0.0391651950776577, "learning_rate": 0.00017274854853953825, "loss": 0.3807, "step": 10993 }, { "epoch": 0.8906351263771873, "grad_norm": 0.03737068176269531, "learning_rate": 0.0001727440478869436, "loss": 0.3548, "step": 10994 }, { "epoch": 0.8907161373946857, "grad_norm": 0.03788726031780243, "learning_rate": 0.000172739547234349, "loss": 0.3282, "step": 10995 }, { "epoch": 0.890797148412184, "grad_norm": 0.03355458006262779, "learning_rate": 0.00017273504658175436, "loss": 0.3528, "step": 10996 }, { "epoch": 0.8908781594296824, "grad_norm": 0.034730155020952225, "learning_rate": 0.00017273054592915974, "loss": 0.3806, "step": 10997 }, { "epoch": 0.8909591704471809, "grad_norm": 0.034574177116155624, "learning_rate": 0.0001727260452765651, "loss": 0.3267, "step": 10998 }, { "epoch": 0.8910401814646792, "grad_norm": 0.03197081759572029, "learning_rate": 0.0001727215446239705, "loss": 0.3088, "step": 10999 }, { "epoch": 0.8911211924821776, "grad_norm": 0.029353322461247444, "learning_rate": 0.00017271704397137585, "loss": 0.2935, "step": 11000 }, { "epoch": 0.8912022034996759, "grad_norm": 0.031723231077194214, "learning_rate": 0.00017271254331878124, "loss": 0.3083, "step": 11001 }, { "epoch": 0.8912832145171743, "grad_norm": 0.03682200238108635, "learning_rate": 0.0001727080426661866, "loss": 0.3472, "step": 11002 }, { "epoch": 0.8913642255346728, "grad_norm": 0.031014375388622284, "learning_rate": 0.00017270354201359198, "loss": 0.3053, "step": 11003 }, { "epoch": 0.8914452365521711, "grad_norm": 0.030643856152892113, "learning_rate": 0.00017269904136099734, "loss": 0.3239, "step": 11004 }, { "epoch": 0.8915262475696695, "grad_norm": 0.03582141920924187, "learning_rate": 0.00017269454070840273, "loss": 0.3672, "step": 11005 }, { "epoch": 0.8916072585871678, "grad_norm": 0.03675571084022522, "learning_rate": 0.00017269004005580812, "loss": 0.3595, "step": 11006 }, { "epoch": 0.8916882696046662, "grad_norm": 0.03711831569671631, "learning_rate": 0.00017268553940321348, "loss": 0.325, "step": 11007 }, { "epoch": 0.8917692806221647, "grad_norm": 0.031868595629930496, "learning_rate": 0.00017268103875061884, "loss": 0.3189, "step": 11008 }, { "epoch": 0.891850291639663, "grad_norm": 0.03502114117145538, "learning_rate": 0.00017267653809802423, "loss": 0.355, "step": 11009 }, { "epoch": 0.8919313026571614, "grad_norm": 0.032476335763931274, "learning_rate": 0.00017267203744542959, "loss": 0.3506, "step": 11010 }, { "epoch": 0.8920123136746597, "grad_norm": 0.03275424987077713, "learning_rate": 0.00017266753679283497, "loss": 0.2934, "step": 11011 }, { "epoch": 0.8920933246921582, "grad_norm": 0.03534052148461342, "learning_rate": 0.00017266303614024036, "loss": 0.3401, "step": 11012 }, { "epoch": 0.8921743357096565, "grad_norm": 0.03902437165379524, "learning_rate": 0.00017265853548764572, "loss": 0.3376, "step": 11013 }, { "epoch": 0.8922553467271549, "grad_norm": 0.036552559584379196, "learning_rate": 0.00017265403483505108, "loss": 0.3468, "step": 11014 }, { "epoch": 0.8923363577446533, "grad_norm": 0.03513502702116966, "learning_rate": 0.00017264953418245647, "loss": 0.3471, "step": 11015 }, { "epoch": 0.8924173687621516, "grad_norm": 0.033258937299251556, "learning_rate": 0.00017264503352986183, "loss": 0.3179, "step": 11016 }, { "epoch": 0.8924983797796501, "grad_norm": 0.034728024154901505, "learning_rate": 0.00017264053287726721, "loss": 0.3294, "step": 11017 }, { "epoch": 0.8925793907971484, "grad_norm": 0.03324027359485626, "learning_rate": 0.0001726360322246726, "loss": 0.3556, "step": 11018 }, { "epoch": 0.8926604018146468, "grad_norm": 0.03504133224487305, "learning_rate": 0.00017263153157207796, "loss": 0.3237, "step": 11019 }, { "epoch": 0.8927414128321451, "grad_norm": 0.03394149988889694, "learning_rate": 0.00017262703091948332, "loss": 0.3378, "step": 11020 }, { "epoch": 0.8928224238496435, "grad_norm": 0.04117913171648979, "learning_rate": 0.0001726225302668887, "loss": 0.3468, "step": 11021 }, { "epoch": 0.892903434867142, "grad_norm": 0.034105632454156876, "learning_rate": 0.00017261802961429407, "loss": 0.3601, "step": 11022 }, { "epoch": 0.8929844458846403, "grad_norm": 0.03052723966538906, "learning_rate": 0.00017261352896169946, "loss": 0.3062, "step": 11023 }, { "epoch": 0.8930654569021387, "grad_norm": 0.03380640968680382, "learning_rate": 0.00017260902830910484, "loss": 0.3059, "step": 11024 }, { "epoch": 0.893146467919637, "grad_norm": 0.034155409783124924, "learning_rate": 0.0001726045276565102, "loss": 0.2754, "step": 11025 }, { "epoch": 0.8932274789371355, "grad_norm": 0.03457861393690109, "learning_rate": 0.00017260002700391556, "loss": 0.3358, "step": 11026 }, { "epoch": 0.8933084899546339, "grad_norm": 0.03328819200396538, "learning_rate": 0.00017259552635132095, "loss": 0.3069, "step": 11027 }, { "epoch": 0.8933895009721322, "grad_norm": 0.033737119287252426, "learning_rate": 0.0001725910256987263, "loss": 0.3335, "step": 11028 }, { "epoch": 0.8934705119896306, "grad_norm": 0.03014223463833332, "learning_rate": 0.0001725865250461317, "loss": 0.2744, "step": 11029 }, { "epoch": 0.8935515230071289, "grad_norm": 0.030346151441335678, "learning_rate": 0.00017258202439353709, "loss": 0.299, "step": 11030 }, { "epoch": 0.8936325340246274, "grad_norm": 0.03608626872301102, "learning_rate": 0.00017257752374094245, "loss": 0.3276, "step": 11031 }, { "epoch": 0.8937135450421257, "grad_norm": 0.03907999023795128, "learning_rate": 0.0001725730230883478, "loss": 0.3778, "step": 11032 }, { "epoch": 0.8937945560596241, "grad_norm": 0.03341106325387955, "learning_rate": 0.0001725685224357532, "loss": 0.3848, "step": 11033 }, { "epoch": 0.8938755670771225, "grad_norm": 0.04504524916410446, "learning_rate": 0.00017256402178315855, "loss": 0.4034, "step": 11034 }, { "epoch": 0.8939565780946209, "grad_norm": 0.037876032292842865, "learning_rate": 0.00017255952113056394, "loss": 0.3598, "step": 11035 }, { "epoch": 0.8940375891121193, "grad_norm": 0.03509625792503357, "learning_rate": 0.00017255502047796933, "loss": 0.3249, "step": 11036 }, { "epoch": 0.8941186001296176, "grad_norm": 0.03736604377627373, "learning_rate": 0.0001725505198253747, "loss": 0.3802, "step": 11037 }, { "epoch": 0.894199611147116, "grad_norm": 0.03288540616631508, "learning_rate": 0.00017254601917278005, "loss": 0.3542, "step": 11038 }, { "epoch": 0.8942806221646143, "grad_norm": 0.030276447534561157, "learning_rate": 0.00017254151852018543, "loss": 0.3297, "step": 11039 }, { "epoch": 0.8943616331821128, "grad_norm": 0.03415091335773468, "learning_rate": 0.0001725370178675908, "loss": 0.3715, "step": 11040 }, { "epoch": 0.8944426441996112, "grad_norm": 0.035904526710510254, "learning_rate": 0.00017253251721499618, "loss": 0.3552, "step": 11041 }, { "epoch": 0.8945236552171095, "grad_norm": 0.03375767543911934, "learning_rate": 0.00017252801656240157, "loss": 0.346, "step": 11042 }, { "epoch": 0.8946046662346079, "grad_norm": 0.03472788259387016, "learning_rate": 0.00017252351590980693, "loss": 0.3534, "step": 11043 }, { "epoch": 0.8946856772521062, "grad_norm": 0.03359057009220123, "learning_rate": 0.0001725190152572123, "loss": 0.2999, "step": 11044 }, { "epoch": 0.8947666882696047, "grad_norm": 0.03802908584475517, "learning_rate": 0.00017251451460461768, "loss": 0.2986, "step": 11045 }, { "epoch": 0.8948476992871031, "grad_norm": 0.031641487032175064, "learning_rate": 0.00017251001395202304, "loss": 0.3225, "step": 11046 }, { "epoch": 0.8949287103046014, "grad_norm": 0.037392813712358475, "learning_rate": 0.00017250551329942842, "loss": 0.3274, "step": 11047 }, { "epoch": 0.8950097213220998, "grad_norm": 0.032086845487356186, "learning_rate": 0.0001725010126468338, "loss": 0.3255, "step": 11048 }, { "epoch": 0.8950907323395982, "grad_norm": 0.035567983984947205, "learning_rate": 0.00017249651199423917, "loss": 0.3345, "step": 11049 }, { "epoch": 0.8951717433570966, "grad_norm": 0.03568422794342041, "learning_rate": 0.00017249201134164456, "loss": 0.3835, "step": 11050 }, { "epoch": 0.895252754374595, "grad_norm": 0.032751090824604034, "learning_rate": 0.00017248751068904992, "loss": 0.2823, "step": 11051 }, { "epoch": 0.8953337653920933, "grad_norm": 0.03469252213835716, "learning_rate": 0.00017248301003645528, "loss": 0.3056, "step": 11052 }, { "epoch": 0.8954147764095917, "grad_norm": 0.03916336968541145, "learning_rate": 0.00017247850938386066, "loss": 0.3917, "step": 11053 }, { "epoch": 0.8954957874270901, "grad_norm": 0.034609757363796234, "learning_rate": 0.00017247400873126605, "loss": 0.3444, "step": 11054 }, { "epoch": 0.8955767984445885, "grad_norm": 0.03403623029589653, "learning_rate": 0.0001724695080786714, "loss": 0.3524, "step": 11055 }, { "epoch": 0.8956578094620868, "grad_norm": 0.03477954864501953, "learning_rate": 0.0001724650074260768, "loss": 0.3344, "step": 11056 }, { "epoch": 0.8957388204795852, "grad_norm": 0.037678562104701996, "learning_rate": 0.00017246050677348216, "loss": 0.3426, "step": 11057 }, { "epoch": 0.8958198314970836, "grad_norm": 0.029955245554447174, "learning_rate": 0.00017245600612088755, "loss": 0.3056, "step": 11058 }, { "epoch": 0.895900842514582, "grad_norm": 0.03784172981977463, "learning_rate": 0.0001724515054682929, "loss": 0.3624, "step": 11059 }, { "epoch": 0.8959818535320804, "grad_norm": 0.03630274161696434, "learning_rate": 0.0001724470048156983, "loss": 0.3516, "step": 11060 }, { "epoch": 0.8960628645495787, "grad_norm": 0.0336889885365963, "learning_rate": 0.00017244250416310365, "loss": 0.3306, "step": 11061 }, { "epoch": 0.8961438755670771, "grad_norm": 0.036519281566143036, "learning_rate": 0.00017243800351050904, "loss": 0.3459, "step": 11062 }, { "epoch": 0.8962248865845756, "grad_norm": 0.032081425189971924, "learning_rate": 0.0001724335028579144, "loss": 0.3077, "step": 11063 }, { "epoch": 0.8963058976020739, "grad_norm": 0.03331870585680008, "learning_rate": 0.0001724290022053198, "loss": 0.3369, "step": 11064 }, { "epoch": 0.8963869086195723, "grad_norm": 0.032635319977998734, "learning_rate": 0.00017242450155272515, "loss": 0.3273, "step": 11065 }, { "epoch": 0.8964679196370706, "grad_norm": 0.03170187398791313, "learning_rate": 0.00017242000090013053, "loss": 0.2604, "step": 11066 }, { "epoch": 0.896548930654569, "grad_norm": 0.03879852965474129, "learning_rate": 0.0001724155002475359, "loss": 0.3501, "step": 11067 }, { "epoch": 0.8966299416720674, "grad_norm": 0.03534620627760887, "learning_rate": 0.00017241099959494128, "loss": 0.3419, "step": 11068 }, { "epoch": 0.8967109526895658, "grad_norm": 0.03365003690123558, "learning_rate": 0.00017240649894234664, "loss": 0.3471, "step": 11069 }, { "epoch": 0.8967919637070642, "grad_norm": 0.037116698920726776, "learning_rate": 0.00017240199828975203, "loss": 0.3562, "step": 11070 }, { "epoch": 0.8968729747245625, "grad_norm": 0.031580351293087006, "learning_rate": 0.0001723974976371574, "loss": 0.3336, "step": 11071 }, { "epoch": 0.8969539857420609, "grad_norm": 0.03468446806073189, "learning_rate": 0.00017239299698456278, "loss": 0.341, "step": 11072 }, { "epoch": 0.8970349967595593, "grad_norm": 0.03346063196659088, "learning_rate": 0.00017238849633196814, "loss": 0.318, "step": 11073 }, { "epoch": 0.8971160077770577, "grad_norm": 0.030881447717547417, "learning_rate": 0.00017238399567937352, "loss": 0.3021, "step": 11074 }, { "epoch": 0.897197018794556, "grad_norm": 0.03686084598302841, "learning_rate": 0.00017237949502677888, "loss": 0.3768, "step": 11075 }, { "epoch": 0.8972780298120544, "grad_norm": 0.0324772372841835, "learning_rate": 0.00017237499437418427, "loss": 0.3233, "step": 11076 }, { "epoch": 0.8973590408295529, "grad_norm": 0.03122434765100479, "learning_rate": 0.00017237049372158963, "loss": 0.2928, "step": 11077 }, { "epoch": 0.8974400518470512, "grad_norm": 0.03259376436471939, "learning_rate": 0.00017236599306899502, "loss": 0.3438, "step": 11078 }, { "epoch": 0.8975210628645496, "grad_norm": 0.03402949869632721, "learning_rate": 0.00017236149241640038, "loss": 0.3401, "step": 11079 }, { "epoch": 0.8976020738820479, "grad_norm": 0.041615165770053864, "learning_rate": 0.00017235699176380577, "loss": 0.4367, "step": 11080 }, { "epoch": 0.8976830848995463, "grad_norm": 0.034159209579229355, "learning_rate": 0.00017235249111121113, "loss": 0.362, "step": 11081 }, { "epoch": 0.8977640959170448, "grad_norm": 0.03641688451170921, "learning_rate": 0.0001723479904586165, "loss": 0.3262, "step": 11082 }, { "epoch": 0.8978451069345431, "grad_norm": 0.03852277249097824, "learning_rate": 0.00017234348980602187, "loss": 0.3482, "step": 11083 }, { "epoch": 0.8979261179520415, "grad_norm": 0.03586672246456146, "learning_rate": 0.00017233898915342726, "loss": 0.3441, "step": 11084 }, { "epoch": 0.8980071289695398, "grad_norm": 0.034681983292102814, "learning_rate": 0.00017233448850083262, "loss": 0.3424, "step": 11085 }, { "epoch": 0.8980881399870383, "grad_norm": 0.03471839800477028, "learning_rate": 0.000172329987848238, "loss": 0.3411, "step": 11086 }, { "epoch": 0.8981691510045366, "grad_norm": 0.03647003322839737, "learning_rate": 0.0001723254871956434, "loss": 0.3706, "step": 11087 }, { "epoch": 0.898250162022035, "grad_norm": 0.03368555009365082, "learning_rate": 0.00017232098654304875, "loss": 0.3682, "step": 11088 }, { "epoch": 0.8983311730395334, "grad_norm": 0.03130301833152771, "learning_rate": 0.00017231648589045411, "loss": 0.2777, "step": 11089 }, { "epoch": 0.8984121840570317, "grad_norm": 0.03784201294183731, "learning_rate": 0.0001723119852378595, "loss": 0.3645, "step": 11090 }, { "epoch": 0.8984931950745302, "grad_norm": 0.03592813014984131, "learning_rate": 0.00017230748458526486, "loss": 0.35, "step": 11091 }, { "epoch": 0.8985742060920285, "grad_norm": 0.04151960462331772, "learning_rate": 0.00017230298393267025, "loss": 0.296, "step": 11092 }, { "epoch": 0.8986552171095269, "grad_norm": 0.040424298495054245, "learning_rate": 0.00017229848328007564, "loss": 0.3996, "step": 11093 }, { "epoch": 0.8987362281270252, "grad_norm": 0.0319959856569767, "learning_rate": 0.000172293982627481, "loss": 0.2948, "step": 11094 }, { "epoch": 0.8988172391445236, "grad_norm": 0.03344470262527466, "learning_rate": 0.00017228948197488636, "loss": 0.3116, "step": 11095 }, { "epoch": 0.8988982501620221, "grad_norm": 0.035156577825546265, "learning_rate": 0.00017228498132229174, "loss": 0.3508, "step": 11096 }, { "epoch": 0.8989792611795204, "grad_norm": 0.035696208477020264, "learning_rate": 0.0001722804806696971, "loss": 0.3711, "step": 11097 }, { "epoch": 0.8990602721970188, "grad_norm": 0.0365951843559742, "learning_rate": 0.0001722759800171025, "loss": 0.3306, "step": 11098 }, { "epoch": 0.8991412832145171, "grad_norm": 0.034049030393362045, "learning_rate": 0.00017227147936450788, "loss": 0.3341, "step": 11099 }, { "epoch": 0.8992222942320156, "grad_norm": 0.031511444598436356, "learning_rate": 0.00017226697871191324, "loss": 0.2812, "step": 11100 }, { "epoch": 0.899303305249514, "grad_norm": 0.038552649319171906, "learning_rate": 0.0001722624780593186, "loss": 0.308, "step": 11101 }, { "epoch": 0.8993843162670123, "grad_norm": 0.03560515120625496, "learning_rate": 0.00017225797740672398, "loss": 0.341, "step": 11102 }, { "epoch": 0.8994653272845107, "grad_norm": 0.031026704236865044, "learning_rate": 0.00017225347675412934, "loss": 0.2962, "step": 11103 }, { "epoch": 0.899546338302009, "grad_norm": 0.03231107443571091, "learning_rate": 0.00017224897610153473, "loss": 0.3072, "step": 11104 }, { "epoch": 0.8996273493195075, "grad_norm": 0.033269885927438736, "learning_rate": 0.00017224447544894012, "loss": 0.3487, "step": 11105 }, { "epoch": 0.8997083603370059, "grad_norm": 0.03612830489873886, "learning_rate": 0.00017223997479634548, "loss": 0.3468, "step": 11106 }, { "epoch": 0.8997893713545042, "grad_norm": 0.03197575733065605, "learning_rate": 0.00017223547414375084, "loss": 0.3024, "step": 11107 }, { "epoch": 0.8998703823720026, "grad_norm": 0.030702589079737663, "learning_rate": 0.00017223097349115623, "loss": 0.2801, "step": 11108 }, { "epoch": 0.8999513933895009, "grad_norm": 0.03103921003639698, "learning_rate": 0.00017222647283856159, "loss": 0.3345, "step": 11109 }, { "epoch": 0.9000324044069994, "grad_norm": 0.029365181922912598, "learning_rate": 0.00017222197218596697, "loss": 0.3055, "step": 11110 }, { "epoch": 0.9001134154244977, "grad_norm": 0.031475115567445755, "learning_rate": 0.00017221747153337236, "loss": 0.3088, "step": 11111 }, { "epoch": 0.9001944264419961, "grad_norm": 0.032637711614370346, "learning_rate": 0.00017221297088077772, "loss": 0.306, "step": 11112 }, { "epoch": 0.9002754374594945, "grad_norm": 0.0325482040643692, "learning_rate": 0.00017220847022818308, "loss": 0.3099, "step": 11113 }, { "epoch": 0.9003564484769929, "grad_norm": 0.037501901388168335, "learning_rate": 0.00017220396957558847, "loss": 0.3421, "step": 11114 }, { "epoch": 0.9004374594944913, "grad_norm": 0.030759846791625023, "learning_rate": 0.00017219946892299383, "loss": 0.3364, "step": 11115 }, { "epoch": 0.9005184705119896, "grad_norm": 0.0340886265039444, "learning_rate": 0.00017219496827039922, "loss": 0.3079, "step": 11116 }, { "epoch": 0.900599481529488, "grad_norm": 0.03348534554243088, "learning_rate": 0.0001721904676178046, "loss": 0.3003, "step": 11117 }, { "epoch": 0.9006804925469863, "grad_norm": 0.03073684312403202, "learning_rate": 0.00017218596696520996, "loss": 0.297, "step": 11118 }, { "epoch": 0.9007615035644848, "grad_norm": 0.03075311705470085, "learning_rate": 0.00017218146631261535, "loss": 0.3001, "step": 11119 }, { "epoch": 0.9008425145819832, "grad_norm": 0.035695288330316544, "learning_rate": 0.0001721769656600207, "loss": 0.3263, "step": 11120 }, { "epoch": 0.9009235255994815, "grad_norm": 0.03151996061205864, "learning_rate": 0.00017217246500742607, "loss": 0.3215, "step": 11121 }, { "epoch": 0.9010045366169799, "grad_norm": 0.03588097542524338, "learning_rate": 0.00017216796435483146, "loss": 0.3328, "step": 11122 }, { "epoch": 0.9010855476344782, "grad_norm": 0.03737993165850639, "learning_rate": 0.00017216346370223684, "loss": 0.3409, "step": 11123 }, { "epoch": 0.9011665586519767, "grad_norm": 0.03557238727807999, "learning_rate": 0.0001721589630496422, "loss": 0.346, "step": 11124 }, { "epoch": 0.9012475696694751, "grad_norm": 0.030679596588015556, "learning_rate": 0.0001721544623970476, "loss": 0.3092, "step": 11125 }, { "epoch": 0.9013285806869734, "grad_norm": 0.02960105612874031, "learning_rate": 0.00017214996174445295, "loss": 0.2876, "step": 11126 }, { "epoch": 0.9014095917044718, "grad_norm": 0.03361353278160095, "learning_rate": 0.0001721454610918583, "loss": 0.3638, "step": 11127 }, { "epoch": 0.9014906027219702, "grad_norm": 0.039340220391750336, "learning_rate": 0.0001721409604392637, "loss": 0.3688, "step": 11128 }, { "epoch": 0.9015716137394686, "grad_norm": 0.03614228218793869, "learning_rate": 0.00017213645978666909, "loss": 0.3435, "step": 11129 }, { "epoch": 0.901652624756967, "grad_norm": 0.029744107276201248, "learning_rate": 0.00017213195913407445, "loss": 0.2884, "step": 11130 }, { "epoch": 0.9017336357744653, "grad_norm": 0.03261115774512291, "learning_rate": 0.00017212745848147983, "loss": 0.2932, "step": 11131 }, { "epoch": 0.9018146467919637, "grad_norm": 0.03626837581396103, "learning_rate": 0.0001721229578288852, "loss": 0.3324, "step": 11132 }, { "epoch": 0.9018956578094621, "grad_norm": 0.02938266471028328, "learning_rate": 0.00017211845717629055, "loss": 0.3234, "step": 11133 }, { "epoch": 0.9019766688269605, "grad_norm": 0.03389137610793114, "learning_rate": 0.00017211395652369594, "loss": 0.3378, "step": 11134 }, { "epoch": 0.9020576798444588, "grad_norm": 0.03858252987265587, "learning_rate": 0.00017210945587110133, "loss": 0.332, "step": 11135 }, { "epoch": 0.9021386908619572, "grad_norm": 0.036512937396764755, "learning_rate": 0.0001721049552185067, "loss": 0.3257, "step": 11136 }, { "epoch": 0.9022197018794557, "grad_norm": 0.040067460387945175, "learning_rate": 0.00017210045456591207, "loss": 0.4024, "step": 11137 }, { "epoch": 0.902300712896954, "grad_norm": 0.03473897650837898, "learning_rate": 0.00017209595391331743, "loss": 0.3167, "step": 11138 }, { "epoch": 0.9023817239144524, "grad_norm": 0.03343284875154495, "learning_rate": 0.00017209145326072282, "loss": 0.3048, "step": 11139 }, { "epoch": 0.9024627349319507, "grad_norm": 0.03314567729830742, "learning_rate": 0.00017208695260812818, "loss": 0.3384, "step": 11140 }, { "epoch": 0.9025437459494491, "grad_norm": 0.029058843851089478, "learning_rate": 0.00017208245195553357, "loss": 0.2856, "step": 11141 }, { "epoch": 0.9026247569669476, "grad_norm": 0.03524141386151314, "learning_rate": 0.00017207795130293893, "loss": 0.3532, "step": 11142 }, { "epoch": 0.9027057679844459, "grad_norm": 0.03530716150999069, "learning_rate": 0.00017207345065034432, "loss": 0.3738, "step": 11143 }, { "epoch": 0.9027867790019443, "grad_norm": 0.030532442033290863, "learning_rate": 0.00017206894999774968, "loss": 0.3049, "step": 11144 }, { "epoch": 0.9028677900194426, "grad_norm": 0.03170107305049896, "learning_rate": 0.00017206444934515506, "loss": 0.3551, "step": 11145 }, { "epoch": 0.902948801036941, "grad_norm": 0.03420290723443031, "learning_rate": 0.00017205994869256042, "loss": 0.36, "step": 11146 }, { "epoch": 0.9030298120544394, "grad_norm": 0.036962270736694336, "learning_rate": 0.0001720554480399658, "loss": 0.3437, "step": 11147 }, { "epoch": 0.9031108230719378, "grad_norm": 0.0335623174905777, "learning_rate": 0.00017205094738737117, "loss": 0.3373, "step": 11148 }, { "epoch": 0.9031918340894362, "grad_norm": 0.03984922543168068, "learning_rate": 0.00017204644673477656, "loss": 0.3905, "step": 11149 }, { "epoch": 0.9032728451069345, "grad_norm": 0.04073641821742058, "learning_rate": 0.00017204194608218192, "loss": 0.3668, "step": 11150 }, { "epoch": 0.903353856124433, "grad_norm": 0.033864449709653854, "learning_rate": 0.0001720374454295873, "loss": 0.3345, "step": 11151 }, { "epoch": 0.9034348671419313, "grad_norm": 0.036866188049316406, "learning_rate": 0.00017203294477699266, "loss": 0.3107, "step": 11152 }, { "epoch": 0.9035158781594297, "grad_norm": 0.03417964652180672, "learning_rate": 0.00017202844412439805, "loss": 0.3361, "step": 11153 }, { "epoch": 0.903596889176928, "grad_norm": 0.03552941605448723, "learning_rate": 0.0001720239434718034, "loss": 0.3485, "step": 11154 }, { "epoch": 0.9036779001944264, "grad_norm": 0.030915522947907448, "learning_rate": 0.0001720194428192088, "loss": 0.3156, "step": 11155 }, { "epoch": 0.9037589112119249, "grad_norm": 0.03367985785007477, "learning_rate": 0.00017201494216661416, "loss": 0.2865, "step": 11156 }, { "epoch": 0.9038399222294232, "grad_norm": 0.031156057491898537, "learning_rate": 0.00017201044151401955, "loss": 0.318, "step": 11157 }, { "epoch": 0.9039209332469216, "grad_norm": 0.031971532851457596, "learning_rate": 0.0001720059408614249, "loss": 0.3321, "step": 11158 }, { "epoch": 0.9040019442644199, "grad_norm": 0.0335182249546051, "learning_rate": 0.0001720014402088303, "loss": 0.3206, "step": 11159 }, { "epoch": 0.9040829552819183, "grad_norm": 0.0367172434926033, "learning_rate": 0.00017199693955623565, "loss": 0.342, "step": 11160 }, { "epoch": 0.9041639662994168, "grad_norm": 0.030467765405774117, "learning_rate": 0.00017199243890364104, "loss": 0.2973, "step": 11161 }, { "epoch": 0.9042449773169151, "grad_norm": 0.0324619859457016, "learning_rate": 0.00017198793825104643, "loss": 0.3715, "step": 11162 }, { "epoch": 0.9043259883344135, "grad_norm": 0.03537648171186447, "learning_rate": 0.0001719834375984518, "loss": 0.3652, "step": 11163 }, { "epoch": 0.9044069993519118, "grad_norm": 0.03005852736532688, "learning_rate": 0.00017197893694585715, "loss": 0.3367, "step": 11164 }, { "epoch": 0.9044880103694103, "grad_norm": 0.033330272883176804, "learning_rate": 0.00017197443629326254, "loss": 0.3327, "step": 11165 }, { "epoch": 0.9045690213869086, "grad_norm": 0.03628942742943764, "learning_rate": 0.0001719699356406679, "loss": 0.3349, "step": 11166 }, { "epoch": 0.904650032404407, "grad_norm": 0.032023970037698746, "learning_rate": 0.00017196543498807328, "loss": 0.3235, "step": 11167 }, { "epoch": 0.9047310434219054, "grad_norm": 0.03418853133916855, "learning_rate": 0.00017196093433547867, "loss": 0.3044, "step": 11168 }, { "epoch": 0.9048120544394037, "grad_norm": 0.03541678190231323, "learning_rate": 0.00017195643368288403, "loss": 0.3405, "step": 11169 }, { "epoch": 0.9048930654569022, "grad_norm": 0.0324513241648674, "learning_rate": 0.0001719519330302894, "loss": 0.328, "step": 11170 }, { "epoch": 0.9049740764744005, "grad_norm": 0.03757879137992859, "learning_rate": 0.00017194743237769478, "loss": 0.3464, "step": 11171 }, { "epoch": 0.9050550874918989, "grad_norm": 0.03542768955230713, "learning_rate": 0.00017194293172510014, "loss": 0.3238, "step": 11172 }, { "epoch": 0.9051360985093972, "grad_norm": 0.040335994213819504, "learning_rate": 0.00017193843107250552, "loss": 0.3683, "step": 11173 }, { "epoch": 0.9052171095268956, "grad_norm": 0.038330186158418655, "learning_rate": 0.0001719339304199109, "loss": 0.3151, "step": 11174 }, { "epoch": 0.9052981205443941, "grad_norm": 0.0329168438911438, "learning_rate": 0.00017192942976731627, "loss": 0.3149, "step": 11175 }, { "epoch": 0.9053791315618924, "grad_norm": 0.036200057715177536, "learning_rate": 0.00017192492911472163, "loss": 0.3267, "step": 11176 }, { "epoch": 0.9054601425793908, "grad_norm": 0.038981903344392776, "learning_rate": 0.00017192042846212702, "loss": 0.329, "step": 11177 }, { "epoch": 0.9055411535968891, "grad_norm": 0.04097919166088104, "learning_rate": 0.00017191592780953238, "loss": 0.3441, "step": 11178 }, { "epoch": 0.9056221646143876, "grad_norm": 0.03847785294055939, "learning_rate": 0.00017191142715693777, "loss": 0.3578, "step": 11179 }, { "epoch": 0.905703175631886, "grad_norm": 0.03198770061135292, "learning_rate": 0.00017190692650434315, "loss": 0.309, "step": 11180 }, { "epoch": 0.9057841866493843, "grad_norm": 0.037624429911375046, "learning_rate": 0.0001719024258517485, "loss": 0.3443, "step": 11181 }, { "epoch": 0.9058651976668827, "grad_norm": 0.0348743237555027, "learning_rate": 0.00017189792519915387, "loss": 0.3141, "step": 11182 }, { "epoch": 0.905946208684381, "grad_norm": 0.036382030695676804, "learning_rate": 0.00017189342454655926, "loss": 0.3801, "step": 11183 }, { "epoch": 0.9060272197018795, "grad_norm": 0.03109712339937687, "learning_rate": 0.00017188892389396462, "loss": 0.3263, "step": 11184 }, { "epoch": 0.9061082307193778, "grad_norm": 0.029653826728463173, "learning_rate": 0.00017188442324137, "loss": 0.2775, "step": 11185 }, { "epoch": 0.9061892417368762, "grad_norm": 0.0371454581618309, "learning_rate": 0.0001718799225887754, "loss": 0.3024, "step": 11186 }, { "epoch": 0.9062702527543746, "grad_norm": 0.034754831343889236, "learning_rate": 0.00017187542193618075, "loss": 0.3296, "step": 11187 }, { "epoch": 0.906351263771873, "grad_norm": 0.038768380880355835, "learning_rate": 0.00017187092128358614, "loss": 0.374, "step": 11188 }, { "epoch": 0.9064322747893714, "grad_norm": 0.03388087823987007, "learning_rate": 0.0001718664206309915, "loss": 0.3241, "step": 11189 }, { "epoch": 0.9065132858068697, "grad_norm": 0.032058071345090866, "learning_rate": 0.00017186191997839686, "loss": 0.2732, "step": 11190 }, { "epoch": 0.9065942968243681, "grad_norm": 0.03243362531065941, "learning_rate": 0.00017185741932580225, "loss": 0.3347, "step": 11191 }, { "epoch": 0.9066753078418665, "grad_norm": 0.03509004786610603, "learning_rate": 0.00017185291867320764, "loss": 0.3421, "step": 11192 }, { "epoch": 0.9067563188593649, "grad_norm": 0.03373107314109802, "learning_rate": 0.000171848418020613, "loss": 0.332, "step": 11193 }, { "epoch": 0.9068373298768633, "grad_norm": 0.03082781843841076, "learning_rate": 0.00017184391736801838, "loss": 0.3062, "step": 11194 }, { "epoch": 0.9069183408943616, "grad_norm": 0.03341514617204666, "learning_rate": 0.00017183941671542374, "loss": 0.3813, "step": 11195 }, { "epoch": 0.90699935191186, "grad_norm": 0.033063508570194244, "learning_rate": 0.0001718349160628291, "loss": 0.3548, "step": 11196 }, { "epoch": 0.9070803629293583, "grad_norm": 0.033410415053367615, "learning_rate": 0.0001718304154102345, "loss": 0.3382, "step": 11197 }, { "epoch": 0.9071613739468568, "grad_norm": 0.032123420387506485, "learning_rate": 0.00017182591475763988, "loss": 0.316, "step": 11198 }, { "epoch": 0.9072423849643552, "grad_norm": 0.03485981002449989, "learning_rate": 0.00017182141410504524, "loss": 0.3348, "step": 11199 }, { "epoch": 0.9073233959818535, "grad_norm": 0.04091982915997505, "learning_rate": 0.00017181691345245062, "loss": 0.3457, "step": 11200 }, { "epoch": 0.9074044069993519, "grad_norm": 0.029208384454250336, "learning_rate": 0.00017181241279985598, "loss": 0.3125, "step": 11201 }, { "epoch": 0.9074854180168503, "grad_norm": 0.035110436379909515, "learning_rate": 0.00017180791214726134, "loss": 0.3414, "step": 11202 }, { "epoch": 0.9075664290343487, "grad_norm": 0.03841705992817879, "learning_rate": 0.00017180341149466673, "loss": 0.3394, "step": 11203 }, { "epoch": 0.907647440051847, "grad_norm": 0.034956254065036774, "learning_rate": 0.00017179891084207212, "loss": 0.3441, "step": 11204 }, { "epoch": 0.9077284510693454, "grad_norm": 0.03248672932386398, "learning_rate": 0.00017179441018947748, "loss": 0.3605, "step": 11205 }, { "epoch": 0.9078094620868438, "grad_norm": 0.03265668451786041, "learning_rate": 0.00017178990953688287, "loss": 0.362, "step": 11206 }, { "epoch": 0.9078904731043422, "grad_norm": 0.033454276621341705, "learning_rate": 0.00017178540888428823, "loss": 0.3425, "step": 11207 }, { "epoch": 0.9079714841218406, "grad_norm": 0.0342661514878273, "learning_rate": 0.0001717809082316936, "loss": 0.3545, "step": 11208 }, { "epoch": 0.9080524951393389, "grad_norm": 0.02960863895714283, "learning_rate": 0.00017177640757909897, "loss": 0.3217, "step": 11209 }, { "epoch": 0.9081335061568373, "grad_norm": 0.033228784799575806, "learning_rate": 0.00017177190692650436, "loss": 0.2741, "step": 11210 }, { "epoch": 0.9082145171743357, "grad_norm": 0.030154986307024956, "learning_rate": 0.00017176740627390972, "loss": 0.2771, "step": 11211 }, { "epoch": 0.9082955281918341, "grad_norm": 0.03683822229504585, "learning_rate": 0.0001717629056213151, "loss": 0.3257, "step": 11212 }, { "epoch": 0.9083765392093325, "grad_norm": 0.03959393501281738, "learning_rate": 0.00017175840496872047, "loss": 0.3753, "step": 11213 }, { "epoch": 0.9084575502268308, "grad_norm": 0.03272836282849312, "learning_rate": 0.00017175390431612583, "loss": 0.3436, "step": 11214 }, { "epoch": 0.9085385612443292, "grad_norm": 0.03133765608072281, "learning_rate": 0.00017174940366353122, "loss": 0.3141, "step": 11215 }, { "epoch": 0.9086195722618277, "grad_norm": 0.03695807605981827, "learning_rate": 0.0001717449030109366, "loss": 0.3312, "step": 11216 }, { "epoch": 0.908700583279326, "grad_norm": 0.03227134048938751, "learning_rate": 0.00017174040235834196, "loss": 0.3215, "step": 11217 }, { "epoch": 0.9087815942968244, "grad_norm": 0.034273579716682434, "learning_rate": 0.00017173590170574735, "loss": 0.3459, "step": 11218 }, { "epoch": 0.9088626053143227, "grad_norm": 0.041489578783512115, "learning_rate": 0.0001717314010531527, "loss": 0.397, "step": 11219 }, { "epoch": 0.9089436163318211, "grad_norm": 0.033856477588415146, "learning_rate": 0.0001717269004005581, "loss": 0.2967, "step": 11220 }, { "epoch": 0.9090246273493195, "grad_norm": 0.03919477388262749, "learning_rate": 0.00017172239974796346, "loss": 0.3253, "step": 11221 }, { "epoch": 0.9091056383668179, "grad_norm": 0.03546192869544029, "learning_rate": 0.00017171789909536884, "loss": 0.3376, "step": 11222 }, { "epoch": 0.9091866493843163, "grad_norm": 0.03431613743305206, "learning_rate": 0.0001717133984427742, "loss": 0.3302, "step": 11223 }, { "epoch": 0.9092676604018146, "grad_norm": 0.032933030277490616, "learning_rate": 0.0001717088977901796, "loss": 0.3175, "step": 11224 }, { "epoch": 0.9093486714193131, "grad_norm": 0.03571391850709915, "learning_rate": 0.00017170439713758495, "loss": 0.3086, "step": 11225 }, { "epoch": 0.9094296824368114, "grad_norm": 0.03721616417169571, "learning_rate": 0.00017169989648499034, "loss": 0.3678, "step": 11226 }, { "epoch": 0.9095106934543098, "grad_norm": 0.033615391701459885, "learning_rate": 0.0001716953958323957, "loss": 0.3555, "step": 11227 }, { "epoch": 0.9095917044718081, "grad_norm": 0.028835784643888474, "learning_rate": 0.00017169089517980109, "loss": 0.3028, "step": 11228 }, { "epoch": 0.9096727154893065, "grad_norm": 0.03430754318833351, "learning_rate": 0.00017168639452720645, "loss": 0.3465, "step": 11229 }, { "epoch": 0.909753726506805, "grad_norm": 0.03223668411374092, "learning_rate": 0.00017168189387461183, "loss": 0.2622, "step": 11230 }, { "epoch": 0.9098347375243033, "grad_norm": 0.031379371881484985, "learning_rate": 0.0001716773932220172, "loss": 0.3265, "step": 11231 }, { "epoch": 0.9099157485418017, "grad_norm": 0.04087335988879204, "learning_rate": 0.00017167289256942258, "loss": 0.4356, "step": 11232 }, { "epoch": 0.9099967595593, "grad_norm": 0.03372536227107048, "learning_rate": 0.00017166839191682794, "loss": 0.3474, "step": 11233 }, { "epoch": 0.9100777705767984, "grad_norm": 0.0366133376955986, "learning_rate": 0.00017166389126423333, "loss": 0.3522, "step": 11234 }, { "epoch": 0.9101587815942969, "grad_norm": 0.03601552173495293, "learning_rate": 0.0001716593906116387, "loss": 0.2926, "step": 11235 }, { "epoch": 0.9102397926117952, "grad_norm": 0.03612074628472328, "learning_rate": 0.00017165488995904407, "loss": 0.3286, "step": 11236 }, { "epoch": 0.9103208036292936, "grad_norm": 0.032681941986083984, "learning_rate": 0.00017165038930644943, "loss": 0.3076, "step": 11237 }, { "epoch": 0.9104018146467919, "grad_norm": 0.03919201344251633, "learning_rate": 0.00017164588865385482, "loss": 0.3569, "step": 11238 }, { "epoch": 0.9104828256642904, "grad_norm": 0.03580492362380028, "learning_rate": 0.00017164138800126018, "loss": 0.325, "step": 11239 }, { "epoch": 0.9105638366817888, "grad_norm": 0.03724316507577896, "learning_rate": 0.00017163688734866557, "loss": 0.3448, "step": 11240 }, { "epoch": 0.9106448476992871, "grad_norm": 0.0412566177546978, "learning_rate": 0.00017163238669607093, "loss": 0.3547, "step": 11241 }, { "epoch": 0.9107258587167855, "grad_norm": 0.038360945880413055, "learning_rate": 0.00017162788604347632, "loss": 0.3004, "step": 11242 }, { "epoch": 0.9108068697342838, "grad_norm": 0.03706123307347298, "learning_rate": 0.0001716233853908817, "loss": 0.3773, "step": 11243 }, { "epoch": 0.9108878807517823, "grad_norm": 0.03696974739432335, "learning_rate": 0.00017161888473828706, "loss": 0.3565, "step": 11244 }, { "epoch": 0.9109688917692806, "grad_norm": 0.03767175227403641, "learning_rate": 0.00017161438408569242, "loss": 0.3378, "step": 11245 }, { "epoch": 0.911049902786779, "grad_norm": 0.03733237832784653, "learning_rate": 0.0001716098834330978, "loss": 0.3499, "step": 11246 }, { "epoch": 0.9111309138042774, "grad_norm": 0.03460073471069336, "learning_rate": 0.00017160538278050317, "loss": 0.3479, "step": 11247 }, { "epoch": 0.9112119248217757, "grad_norm": 0.037753161042928696, "learning_rate": 0.00017160088212790856, "loss": 0.3499, "step": 11248 }, { "epoch": 0.9112929358392742, "grad_norm": 0.03585457429289818, "learning_rate": 0.00017159638147531394, "loss": 0.3369, "step": 11249 }, { "epoch": 0.9113739468567725, "grad_norm": 0.0369020476937294, "learning_rate": 0.0001715918808227193, "loss": 0.289, "step": 11250 }, { "epoch": 0.9114549578742709, "grad_norm": 0.036454979330301285, "learning_rate": 0.00017158738017012467, "loss": 0.3271, "step": 11251 }, { "epoch": 0.9115359688917692, "grad_norm": 0.034425899386405945, "learning_rate": 0.00017158287951753005, "loss": 0.372, "step": 11252 }, { "epoch": 0.9116169799092677, "grad_norm": 0.03651287779211998, "learning_rate": 0.0001715783788649354, "loss": 0.3525, "step": 11253 }, { "epoch": 0.9116979909267661, "grad_norm": 0.03623446449637413, "learning_rate": 0.0001715738782123408, "loss": 0.3519, "step": 11254 }, { "epoch": 0.9117790019442644, "grad_norm": 0.03538570553064346, "learning_rate": 0.0001715693775597462, "loss": 0.3524, "step": 11255 }, { "epoch": 0.9118600129617628, "grad_norm": 0.038185957819223404, "learning_rate": 0.00017156487690715155, "loss": 0.3425, "step": 11256 }, { "epoch": 0.9119410239792611, "grad_norm": 0.0360933318734169, "learning_rate": 0.00017156037625455693, "loss": 0.3282, "step": 11257 }, { "epoch": 0.9120220349967596, "grad_norm": 0.03948797285556793, "learning_rate": 0.0001715558756019623, "loss": 0.4002, "step": 11258 }, { "epoch": 0.912103046014258, "grad_norm": 0.039010416716337204, "learning_rate": 0.00017155137494936765, "loss": 0.3217, "step": 11259 }, { "epoch": 0.9121840570317563, "grad_norm": 0.03200877457857132, "learning_rate": 0.00017154687429677304, "loss": 0.3044, "step": 11260 }, { "epoch": 0.9122650680492547, "grad_norm": 0.03211471065878868, "learning_rate": 0.00017154237364417843, "loss": 0.3395, "step": 11261 }, { "epoch": 0.912346079066753, "grad_norm": 0.030386080965399742, "learning_rate": 0.0001715378729915838, "loss": 0.2794, "step": 11262 }, { "epoch": 0.9124270900842515, "grad_norm": 0.037416622042655945, "learning_rate": 0.00017153337233898918, "loss": 0.3557, "step": 11263 }, { "epoch": 0.9125081011017498, "grad_norm": 0.03650791198015213, "learning_rate": 0.00017152887168639454, "loss": 0.3542, "step": 11264 }, { "epoch": 0.9125891121192482, "grad_norm": 0.03503980487585068, "learning_rate": 0.0001715243710337999, "loss": 0.3502, "step": 11265 }, { "epoch": 0.9126701231367466, "grad_norm": 0.033888183534145355, "learning_rate": 0.00017151987038120528, "loss": 0.3508, "step": 11266 }, { "epoch": 0.912751134154245, "grad_norm": 0.03710508719086647, "learning_rate": 0.00017151536972861067, "loss": 0.3347, "step": 11267 }, { "epoch": 0.9128321451717434, "grad_norm": 0.03084034100174904, "learning_rate": 0.00017151086907601603, "loss": 0.2781, "step": 11268 }, { "epoch": 0.9129131561892417, "grad_norm": 0.03391794487833977, "learning_rate": 0.00017150636842342142, "loss": 0.3406, "step": 11269 }, { "epoch": 0.9129941672067401, "grad_norm": 0.03140241652727127, "learning_rate": 0.00017150186777082678, "loss": 0.3048, "step": 11270 }, { "epoch": 0.9130751782242384, "grad_norm": 0.04186585173010826, "learning_rate": 0.00017149736711823214, "loss": 0.3957, "step": 11271 }, { "epoch": 0.9131561892417369, "grad_norm": 0.03869124501943588, "learning_rate": 0.00017149286646563752, "loss": 0.3725, "step": 11272 }, { "epoch": 0.9132372002592353, "grad_norm": 0.038467105478048325, "learning_rate": 0.0001714883658130429, "loss": 0.3514, "step": 11273 }, { "epoch": 0.9133182112767336, "grad_norm": 0.03266032412648201, "learning_rate": 0.00017148386516044827, "loss": 0.3406, "step": 11274 }, { "epoch": 0.913399222294232, "grad_norm": 0.03216484189033508, "learning_rate": 0.00017147936450785366, "loss": 0.3249, "step": 11275 }, { "epoch": 0.9134802333117304, "grad_norm": 0.03364231809973717, "learning_rate": 0.00017147486385525902, "loss": 0.309, "step": 11276 }, { "epoch": 0.9135612443292288, "grad_norm": 0.03598964586853981, "learning_rate": 0.00017147036320266438, "loss": 0.3246, "step": 11277 }, { "epoch": 0.9136422553467272, "grad_norm": 0.03309572860598564, "learning_rate": 0.00017146586255006977, "loss": 0.2848, "step": 11278 }, { "epoch": 0.9137232663642255, "grad_norm": 0.03358837962150574, "learning_rate": 0.00017146136189747515, "loss": 0.3364, "step": 11279 }, { "epoch": 0.9138042773817239, "grad_norm": 0.03198292478919029, "learning_rate": 0.0001714568612448805, "loss": 0.2818, "step": 11280 }, { "epoch": 0.9138852883992223, "grad_norm": 0.03653620928525925, "learning_rate": 0.0001714523605922859, "loss": 0.4019, "step": 11281 }, { "epoch": 0.9139662994167207, "grad_norm": 0.031275875866413116, "learning_rate": 0.00017144785993969126, "loss": 0.3208, "step": 11282 }, { "epoch": 0.914047310434219, "grad_norm": 0.040892936289310455, "learning_rate": 0.00017144335928709662, "loss": 0.3516, "step": 11283 }, { "epoch": 0.9141283214517174, "grad_norm": 0.032283004373311996, "learning_rate": 0.000171438858634502, "loss": 0.3202, "step": 11284 }, { "epoch": 0.9142093324692158, "grad_norm": 0.03630262613296509, "learning_rate": 0.0001714343579819074, "loss": 0.3574, "step": 11285 }, { "epoch": 0.9142903434867142, "grad_norm": 0.03689377382397652, "learning_rate": 0.00017142985732931275, "loss": 0.3583, "step": 11286 }, { "epoch": 0.9143713545042126, "grad_norm": 0.0360412560403347, "learning_rate": 0.00017142535667671814, "loss": 0.3132, "step": 11287 }, { "epoch": 0.9144523655217109, "grad_norm": 0.035145342350006104, "learning_rate": 0.0001714208560241235, "loss": 0.3105, "step": 11288 }, { "epoch": 0.9145333765392093, "grad_norm": 0.02984432876110077, "learning_rate": 0.00017141635537152886, "loss": 0.3126, "step": 11289 }, { "epoch": 0.9146143875567078, "grad_norm": 0.03315803408622742, "learning_rate": 0.00017141185471893425, "loss": 0.3362, "step": 11290 }, { "epoch": 0.9146953985742061, "grad_norm": 0.031095709651708603, "learning_rate": 0.00017140735406633964, "loss": 0.3677, "step": 11291 }, { "epoch": 0.9147764095917045, "grad_norm": 0.03344155102968216, "learning_rate": 0.000171402853413745, "loss": 0.3581, "step": 11292 }, { "epoch": 0.9148574206092028, "grad_norm": 0.03367290273308754, "learning_rate": 0.00017139835276115038, "loss": 0.3454, "step": 11293 }, { "epoch": 0.9149384316267012, "grad_norm": 0.035224586725234985, "learning_rate": 0.00017139385210855574, "loss": 0.3567, "step": 11294 }, { "epoch": 0.9150194426441997, "grad_norm": 0.033881694078445435, "learning_rate": 0.00017138935145596113, "loss": 0.2955, "step": 11295 }, { "epoch": 0.915100453661698, "grad_norm": 0.037541892379522324, "learning_rate": 0.0001713848508033665, "loss": 0.3421, "step": 11296 }, { "epoch": 0.9151814646791964, "grad_norm": 0.03299560025334358, "learning_rate": 0.00017138035015077188, "loss": 0.2979, "step": 11297 }, { "epoch": 0.9152624756966947, "grad_norm": 0.04041426256299019, "learning_rate": 0.00017137584949817724, "loss": 0.3696, "step": 11298 }, { "epoch": 0.9153434867141931, "grad_norm": 0.03309905156493187, "learning_rate": 0.00017137134884558263, "loss": 0.2828, "step": 11299 }, { "epoch": 0.9154244977316915, "grad_norm": 0.031124910339713097, "learning_rate": 0.00017136684819298799, "loss": 0.2948, "step": 11300 }, { "epoch": 0.9155055087491899, "grad_norm": 0.038336314260959625, "learning_rate": 0.00017136234754039337, "loss": 0.3329, "step": 11301 }, { "epoch": 0.9155865197666883, "grad_norm": 0.034365057945251465, "learning_rate": 0.00017135784688779873, "loss": 0.3208, "step": 11302 }, { "epoch": 0.9156675307841866, "grad_norm": 0.032800037413835526, "learning_rate": 0.00017135334623520412, "loss": 0.3003, "step": 11303 }, { "epoch": 0.9157485418016851, "grad_norm": 0.03155320882797241, "learning_rate": 0.00017134884558260948, "loss": 0.2989, "step": 11304 }, { "epoch": 0.9158295528191834, "grad_norm": 0.040037475526332855, "learning_rate": 0.00017134434493001487, "loss": 0.3596, "step": 11305 }, { "epoch": 0.9159105638366818, "grad_norm": 0.03675394132733345, "learning_rate": 0.00017133984427742023, "loss": 0.3357, "step": 11306 }, { "epoch": 0.9159915748541801, "grad_norm": 0.03477318212389946, "learning_rate": 0.00017133534362482561, "loss": 0.3089, "step": 11307 }, { "epoch": 0.9160725858716785, "grad_norm": 0.036784540861845016, "learning_rate": 0.00017133084297223097, "loss": 0.3537, "step": 11308 }, { "epoch": 0.916153596889177, "grad_norm": 0.03151548653841019, "learning_rate": 0.00017132634231963636, "loss": 0.3111, "step": 11309 }, { "epoch": 0.9162346079066753, "grad_norm": 0.03540056571364403, "learning_rate": 0.00017132184166704172, "loss": 0.3867, "step": 11310 }, { "epoch": 0.9163156189241737, "grad_norm": 0.035191185772418976, "learning_rate": 0.0001713173410144471, "loss": 0.3107, "step": 11311 }, { "epoch": 0.916396629941672, "grad_norm": 0.03517398610711098, "learning_rate": 0.00017131284036185247, "loss": 0.3428, "step": 11312 }, { "epoch": 0.9164776409591704, "grad_norm": 0.03177648410201073, "learning_rate": 0.00017130833970925786, "loss": 0.296, "step": 11313 }, { "epoch": 0.9165586519766689, "grad_norm": 0.034015148878097534, "learning_rate": 0.00017130383905666322, "loss": 0.3476, "step": 11314 }, { "epoch": 0.9166396629941672, "grad_norm": 0.029152886942029, "learning_rate": 0.0001712993384040686, "loss": 0.2802, "step": 11315 }, { "epoch": 0.9167206740116656, "grad_norm": 0.030134642496705055, "learning_rate": 0.00017129483775147396, "loss": 0.3336, "step": 11316 }, { "epoch": 0.9168016850291639, "grad_norm": 0.037936773151159286, "learning_rate": 0.00017129033709887935, "loss": 0.3464, "step": 11317 }, { "epoch": 0.9168826960466624, "grad_norm": 0.037356019020080566, "learning_rate": 0.0001712858364462847, "loss": 0.4063, "step": 11318 }, { "epoch": 0.9169637070641607, "grad_norm": 0.04031597450375557, "learning_rate": 0.0001712813357936901, "loss": 0.3689, "step": 11319 }, { "epoch": 0.9170447180816591, "grad_norm": 0.035003118216991425, "learning_rate": 0.00017127683514109546, "loss": 0.3517, "step": 11320 }, { "epoch": 0.9171257290991575, "grad_norm": 0.04067349061369896, "learning_rate": 0.00017127233448850084, "loss": 0.3245, "step": 11321 }, { "epoch": 0.9172067401166558, "grad_norm": 0.037310849875211716, "learning_rate": 0.0001712678338359062, "loss": 0.3059, "step": 11322 }, { "epoch": 0.9172877511341543, "grad_norm": 0.033759478479623795, "learning_rate": 0.0001712633331833116, "loss": 0.3194, "step": 11323 }, { "epoch": 0.9173687621516526, "grad_norm": 0.03483540192246437, "learning_rate": 0.00017125883253071698, "loss": 0.3311, "step": 11324 }, { "epoch": 0.917449773169151, "grad_norm": 0.03583363816142082, "learning_rate": 0.00017125433187812234, "loss": 0.3828, "step": 11325 }, { "epoch": 0.9175307841866494, "grad_norm": 0.03208068013191223, "learning_rate": 0.00017124983122552773, "loss": 0.2907, "step": 11326 }, { "epoch": 0.9176117952041478, "grad_norm": 0.03374846279621124, "learning_rate": 0.00017124533057293309, "loss": 0.2758, "step": 11327 }, { "epoch": 0.9176928062216462, "grad_norm": 0.03522154688835144, "learning_rate": 0.00017124082992033845, "loss": 0.3436, "step": 11328 }, { "epoch": 0.9177738172391445, "grad_norm": 0.03557585924863815, "learning_rate": 0.00017123632926774383, "loss": 0.3376, "step": 11329 }, { "epoch": 0.9178548282566429, "grad_norm": 0.03580590337514877, "learning_rate": 0.00017123182861514922, "loss": 0.3489, "step": 11330 }, { "epoch": 0.9179358392741412, "grad_norm": 0.036937177181243896, "learning_rate": 0.00017122732796255458, "loss": 0.2889, "step": 11331 }, { "epoch": 0.9180168502916397, "grad_norm": 0.03509330749511719, "learning_rate": 0.00017122282730995997, "loss": 0.318, "step": 11332 }, { "epoch": 0.9180978613091381, "grad_norm": 0.03411509841680527, "learning_rate": 0.00017121832665736533, "loss": 0.321, "step": 11333 }, { "epoch": 0.9181788723266364, "grad_norm": 0.034354668110609055, "learning_rate": 0.0001712138260047707, "loss": 0.3503, "step": 11334 }, { "epoch": 0.9182598833441348, "grad_norm": 0.03821565955877304, "learning_rate": 0.00017120932535217607, "loss": 0.3174, "step": 11335 }, { "epoch": 0.9183408943616331, "grad_norm": 0.03345862030982971, "learning_rate": 0.00017120482469958146, "loss": 0.3462, "step": 11336 }, { "epoch": 0.9184219053791316, "grad_norm": 0.03985141962766647, "learning_rate": 0.00017120032404698682, "loss": 0.3578, "step": 11337 }, { "epoch": 0.91850291639663, "grad_norm": 0.04216769337654114, "learning_rate": 0.0001711958233943922, "loss": 0.3522, "step": 11338 }, { "epoch": 0.9185839274141283, "grad_norm": 0.030603772029280663, "learning_rate": 0.00017119132274179757, "loss": 0.3129, "step": 11339 }, { "epoch": 0.9186649384316267, "grad_norm": 0.03371666744351387, "learning_rate": 0.00017118682208920293, "loss": 0.3322, "step": 11340 }, { "epoch": 0.9187459494491251, "grad_norm": 0.03615809231996536, "learning_rate": 0.00017118232143660832, "loss": 0.3615, "step": 11341 }, { "epoch": 0.9188269604666235, "grad_norm": 0.03493629768490791, "learning_rate": 0.0001711778207840137, "loss": 0.3396, "step": 11342 }, { "epoch": 0.9189079714841218, "grad_norm": 0.03149208053946495, "learning_rate": 0.00017117332013141906, "loss": 0.3103, "step": 11343 }, { "epoch": 0.9189889825016202, "grad_norm": 0.03813861310482025, "learning_rate": 0.00017116881947882445, "loss": 0.3619, "step": 11344 }, { "epoch": 0.9190699935191186, "grad_norm": 0.034407857805490494, "learning_rate": 0.0001711643188262298, "loss": 0.3453, "step": 11345 }, { "epoch": 0.919151004536617, "grad_norm": 0.034576304256916046, "learning_rate": 0.00017115981817363517, "loss": 0.3239, "step": 11346 }, { "epoch": 0.9192320155541154, "grad_norm": 0.03313819319009781, "learning_rate": 0.00017115531752104056, "loss": 0.2812, "step": 11347 }, { "epoch": 0.9193130265716137, "grad_norm": 0.032415058463811874, "learning_rate": 0.00017115081686844595, "loss": 0.3224, "step": 11348 }, { "epoch": 0.9193940375891121, "grad_norm": 0.03585551306605339, "learning_rate": 0.0001711463162158513, "loss": 0.3718, "step": 11349 }, { "epoch": 0.9194750486066104, "grad_norm": 0.03217494115233421, "learning_rate": 0.0001711418155632567, "loss": 0.3295, "step": 11350 }, { "epoch": 0.9195560596241089, "grad_norm": 0.03694257512688637, "learning_rate": 0.00017113731491066205, "loss": 0.348, "step": 11351 }, { "epoch": 0.9196370706416073, "grad_norm": 0.031091250479221344, "learning_rate": 0.0001711328142580674, "loss": 0.3289, "step": 11352 }, { "epoch": 0.9197180816591056, "grad_norm": 0.034045275300741196, "learning_rate": 0.0001711283136054728, "loss": 0.3563, "step": 11353 }, { "epoch": 0.919799092676604, "grad_norm": 0.03653084114193916, "learning_rate": 0.0001711238129528782, "loss": 0.3752, "step": 11354 }, { "epoch": 0.9198801036941024, "grad_norm": 0.03138238564133644, "learning_rate": 0.00017111931230028355, "loss": 0.3176, "step": 11355 }, { "epoch": 0.9199611147116008, "grad_norm": 0.030700651928782463, "learning_rate": 0.00017111481164768893, "loss": 0.2794, "step": 11356 }, { "epoch": 0.9200421257290992, "grad_norm": 0.03846675902605057, "learning_rate": 0.0001711103109950943, "loss": 0.3192, "step": 11357 }, { "epoch": 0.9201231367465975, "grad_norm": 0.03891396149992943, "learning_rate": 0.00017110581034249965, "loss": 0.3276, "step": 11358 }, { "epoch": 0.9202041477640959, "grad_norm": 0.03765661641955376, "learning_rate": 0.00017110130968990504, "loss": 0.3326, "step": 11359 }, { "epoch": 0.9202851587815943, "grad_norm": 0.03686787560582161, "learning_rate": 0.00017109680903731043, "loss": 0.3327, "step": 11360 }, { "epoch": 0.9203661697990927, "grad_norm": 0.03930029645562172, "learning_rate": 0.0001710923083847158, "loss": 0.3086, "step": 11361 }, { "epoch": 0.920447180816591, "grad_norm": 0.032593511044979095, "learning_rate": 0.00017108780773212118, "loss": 0.2982, "step": 11362 }, { "epoch": 0.9205281918340894, "grad_norm": 0.04143695533275604, "learning_rate": 0.00017108330707952654, "loss": 0.3586, "step": 11363 }, { "epoch": 0.9206092028515879, "grad_norm": 0.0322968028485775, "learning_rate": 0.0001710788064269319, "loss": 0.3224, "step": 11364 }, { "epoch": 0.9206902138690862, "grad_norm": 0.037244465202093124, "learning_rate": 0.00017107430577433728, "loss": 0.3114, "step": 11365 }, { "epoch": 0.9207712248865846, "grad_norm": 0.03762952238321304, "learning_rate": 0.00017106980512174267, "loss": 0.3404, "step": 11366 }, { "epoch": 0.9208522359040829, "grad_norm": 0.03405272588133812, "learning_rate": 0.00017106530446914803, "loss": 0.3496, "step": 11367 }, { "epoch": 0.9209332469215813, "grad_norm": 0.03447031229734421, "learning_rate": 0.00017106080381655342, "loss": 0.3883, "step": 11368 }, { "epoch": 0.9210142579390798, "grad_norm": 0.04121264070272446, "learning_rate": 0.00017105630316395878, "loss": 0.3286, "step": 11369 }, { "epoch": 0.9210952689565781, "grad_norm": 0.03879169002175331, "learning_rate": 0.00017105180251136414, "loss": 0.3051, "step": 11370 }, { "epoch": 0.9211762799740765, "grad_norm": 0.03608115017414093, "learning_rate": 0.00017104730185876952, "loss": 0.318, "step": 11371 }, { "epoch": 0.9212572909915748, "grad_norm": 0.032185833901166916, "learning_rate": 0.0001710428012061749, "loss": 0.3164, "step": 11372 }, { "epoch": 0.9213383020090732, "grad_norm": 0.035732269287109375, "learning_rate": 0.00017103830055358027, "loss": 0.3474, "step": 11373 }, { "epoch": 0.9214193130265717, "grad_norm": 0.03568696603178978, "learning_rate": 0.00017103379990098566, "loss": 0.3533, "step": 11374 }, { "epoch": 0.92150032404407, "grad_norm": 0.03487107902765274, "learning_rate": 0.00017102929924839102, "loss": 0.2939, "step": 11375 }, { "epoch": 0.9215813350615684, "grad_norm": 0.03586133196949959, "learning_rate": 0.0001710247985957964, "loss": 0.3227, "step": 11376 }, { "epoch": 0.9216623460790667, "grad_norm": 0.039440032094717026, "learning_rate": 0.00017102029794320177, "loss": 0.3714, "step": 11377 }, { "epoch": 0.9217433570965652, "grad_norm": 0.03375830128788948, "learning_rate": 0.00017101579729060715, "loss": 0.3298, "step": 11378 }, { "epoch": 0.9218243681140635, "grad_norm": 0.03659455105662346, "learning_rate": 0.0001710112966380125, "loss": 0.3109, "step": 11379 }, { "epoch": 0.9219053791315619, "grad_norm": 0.030281485989689827, "learning_rate": 0.0001710067959854179, "loss": 0.321, "step": 11380 }, { "epoch": 0.9219863901490603, "grad_norm": 0.03286823630332947, "learning_rate": 0.00017100229533282326, "loss": 0.3255, "step": 11381 }, { "epoch": 0.9220674011665586, "grad_norm": 0.02968735620379448, "learning_rate": 0.00017099779468022865, "loss": 0.2788, "step": 11382 }, { "epoch": 0.9221484121840571, "grad_norm": 0.034185122698545456, "learning_rate": 0.000170993294027634, "loss": 0.3215, "step": 11383 }, { "epoch": 0.9222294232015554, "grad_norm": 0.030537785962224007, "learning_rate": 0.0001709887933750394, "loss": 0.3023, "step": 11384 }, { "epoch": 0.9223104342190538, "grad_norm": 0.03613237664103508, "learning_rate": 0.00017098429272244476, "loss": 0.338, "step": 11385 }, { "epoch": 0.9223914452365521, "grad_norm": 0.03522944450378418, "learning_rate": 0.00017097979206985014, "loss": 0.3225, "step": 11386 }, { "epoch": 0.9224724562540505, "grad_norm": 0.0329560749232769, "learning_rate": 0.0001709752914172555, "loss": 0.3556, "step": 11387 }, { "epoch": 0.922553467271549, "grad_norm": 0.03431179001927376, "learning_rate": 0.0001709707907646609, "loss": 0.3191, "step": 11388 }, { "epoch": 0.9226344782890473, "grad_norm": 0.03724316507577896, "learning_rate": 0.00017096629011206628, "loss": 0.3533, "step": 11389 }, { "epoch": 0.9227154893065457, "grad_norm": 0.034799784421920776, "learning_rate": 0.00017096178945947164, "loss": 0.3598, "step": 11390 }, { "epoch": 0.922796500324044, "grad_norm": 0.03583141788840294, "learning_rate": 0.000170957288806877, "loss": 0.3348, "step": 11391 }, { "epoch": 0.9228775113415425, "grad_norm": 0.033143218606710434, "learning_rate": 0.00017095278815428238, "loss": 0.3405, "step": 11392 }, { "epoch": 0.9229585223590409, "grad_norm": 0.03624225780367851, "learning_rate": 0.00017094828750168774, "loss": 0.3299, "step": 11393 }, { "epoch": 0.9230395333765392, "grad_norm": 0.04120873287320137, "learning_rate": 0.00017094378684909313, "loss": 0.3725, "step": 11394 }, { "epoch": 0.9231205443940376, "grad_norm": 0.038186412304639816, "learning_rate": 0.00017093928619649852, "loss": 0.3405, "step": 11395 }, { "epoch": 0.9232015554115359, "grad_norm": 0.03770510479807854, "learning_rate": 0.00017093478554390388, "loss": 0.3575, "step": 11396 }, { "epoch": 0.9232825664290344, "grad_norm": 0.033232200890779495, "learning_rate": 0.00017093028489130924, "loss": 0.2977, "step": 11397 }, { "epoch": 0.9233635774465327, "grad_norm": 0.02936842478811741, "learning_rate": 0.00017092578423871463, "loss": 0.2745, "step": 11398 }, { "epoch": 0.9234445884640311, "grad_norm": 0.03786390647292137, "learning_rate": 0.00017092128358611999, "loss": 0.3699, "step": 11399 }, { "epoch": 0.9235255994815295, "grad_norm": 0.03299250081181526, "learning_rate": 0.00017091678293352537, "loss": 0.3151, "step": 11400 }, { "epoch": 0.9236066104990278, "grad_norm": 0.03264123201370239, "learning_rate": 0.00017091228228093076, "loss": 0.3312, "step": 11401 }, { "epoch": 0.9236876215165263, "grad_norm": 0.039083484560251236, "learning_rate": 0.00017090778162833612, "loss": 0.3418, "step": 11402 }, { "epoch": 0.9237686325340246, "grad_norm": 0.03410564363002777, "learning_rate": 0.00017090328097574148, "loss": 0.345, "step": 11403 }, { "epoch": 0.923849643551523, "grad_norm": 0.036052338778972626, "learning_rate": 0.00017089878032314687, "loss": 0.3506, "step": 11404 }, { "epoch": 0.9239306545690213, "grad_norm": 0.03703179582953453, "learning_rate": 0.00017089427967055225, "loss": 0.3332, "step": 11405 }, { "epoch": 0.9240116655865198, "grad_norm": 0.03270883858203888, "learning_rate": 0.00017088977901795761, "loss": 0.3088, "step": 11406 }, { "epoch": 0.9240926766040182, "grad_norm": 0.03068448416888714, "learning_rate": 0.000170885278365363, "loss": 0.3115, "step": 11407 }, { "epoch": 0.9241736876215165, "grad_norm": 0.037590015679597855, "learning_rate": 0.00017088077771276836, "loss": 0.3389, "step": 11408 }, { "epoch": 0.9242546986390149, "grad_norm": 0.035884857177734375, "learning_rate": 0.00017087627706017372, "loss": 0.3257, "step": 11409 }, { "epoch": 0.9243357096565132, "grad_norm": 0.03558656573295593, "learning_rate": 0.0001708717764075791, "loss": 0.3096, "step": 11410 }, { "epoch": 0.9244167206740117, "grad_norm": 0.03657744452357292, "learning_rate": 0.0001708672757549845, "loss": 0.3602, "step": 11411 }, { "epoch": 0.9244977316915101, "grad_norm": 0.03377017378807068, "learning_rate": 0.00017086277510238986, "loss": 0.253, "step": 11412 }, { "epoch": 0.9245787427090084, "grad_norm": 0.03472224622964859, "learning_rate": 0.00017085827444979524, "loss": 0.3189, "step": 11413 }, { "epoch": 0.9246597537265068, "grad_norm": 0.033812422305345535, "learning_rate": 0.0001708537737972006, "loss": 0.3387, "step": 11414 }, { "epoch": 0.9247407647440052, "grad_norm": 0.03222503140568733, "learning_rate": 0.00017084927314460596, "loss": 0.3508, "step": 11415 }, { "epoch": 0.9248217757615036, "grad_norm": 0.04204294830560684, "learning_rate": 0.00017084477249201135, "loss": 0.3582, "step": 11416 }, { "epoch": 0.924902786779002, "grad_norm": 0.034957461059093475, "learning_rate": 0.00017084027183941674, "loss": 0.3077, "step": 11417 }, { "epoch": 0.9249837977965003, "grad_norm": 0.039951667189598083, "learning_rate": 0.0001708357711868221, "loss": 0.37, "step": 11418 }, { "epoch": 0.9250648088139987, "grad_norm": 0.034192297607660294, "learning_rate": 0.00017083127053422748, "loss": 0.2993, "step": 11419 }, { "epoch": 0.9251458198314971, "grad_norm": 0.03962968662381172, "learning_rate": 0.00017082676988163284, "loss": 0.4001, "step": 11420 }, { "epoch": 0.9252268308489955, "grad_norm": 0.03515026345849037, "learning_rate": 0.0001708222692290382, "loss": 0.3177, "step": 11421 }, { "epoch": 0.9253078418664938, "grad_norm": 0.038349054753780365, "learning_rate": 0.0001708177685764436, "loss": 0.3432, "step": 11422 }, { "epoch": 0.9253888528839922, "grad_norm": 0.035203397274017334, "learning_rate": 0.00017081326792384898, "loss": 0.3304, "step": 11423 }, { "epoch": 0.9254698639014906, "grad_norm": 0.029140474274754524, "learning_rate": 0.00017080876727125434, "loss": 0.2933, "step": 11424 }, { "epoch": 0.925550874918989, "grad_norm": 0.03449895232915878, "learning_rate": 0.00017080426661865973, "loss": 0.3045, "step": 11425 }, { "epoch": 0.9256318859364874, "grad_norm": 0.03640986233949661, "learning_rate": 0.00017079976596606509, "loss": 0.3374, "step": 11426 }, { "epoch": 0.9257128969539857, "grad_norm": 0.043093591928482056, "learning_rate": 0.00017079526531347045, "loss": 0.3548, "step": 11427 }, { "epoch": 0.9257939079714841, "grad_norm": 0.030513029545545578, "learning_rate": 0.00017079076466087583, "loss": 0.2871, "step": 11428 }, { "epoch": 0.9258749189889826, "grad_norm": 0.029281042516231537, "learning_rate": 0.00017078626400828122, "loss": 0.2716, "step": 11429 }, { "epoch": 0.9259559300064809, "grad_norm": 0.04084150120615959, "learning_rate": 0.00017078176335568658, "loss": 0.3118, "step": 11430 }, { "epoch": 0.9260369410239793, "grad_norm": 0.03145955502986908, "learning_rate": 0.00017077726270309197, "loss": 0.319, "step": 11431 }, { "epoch": 0.9261179520414776, "grad_norm": 0.03483365476131439, "learning_rate": 0.00017077276205049733, "loss": 0.3576, "step": 11432 }, { "epoch": 0.926198963058976, "grad_norm": 0.038508594036102295, "learning_rate": 0.0001707682613979027, "loss": 0.3321, "step": 11433 }, { "epoch": 0.9262799740764744, "grad_norm": 0.03275780379772186, "learning_rate": 0.00017076376074530808, "loss": 0.3241, "step": 11434 }, { "epoch": 0.9263609850939728, "grad_norm": 0.028733495622873306, "learning_rate": 0.00017075926009271346, "loss": 0.28, "step": 11435 }, { "epoch": 0.9264419961114712, "grad_norm": 0.03230396658182144, "learning_rate": 0.00017075475944011882, "loss": 0.2993, "step": 11436 }, { "epoch": 0.9265230071289695, "grad_norm": 0.03491473197937012, "learning_rate": 0.0001707502587875242, "loss": 0.3475, "step": 11437 }, { "epoch": 0.9266040181464679, "grad_norm": 0.03464370220899582, "learning_rate": 0.00017074575813492957, "loss": 0.3335, "step": 11438 }, { "epoch": 0.9266850291639663, "grad_norm": 0.032484348863363266, "learning_rate": 0.00017074125748233493, "loss": 0.3357, "step": 11439 }, { "epoch": 0.9267660401814647, "grad_norm": 0.035478755831718445, "learning_rate": 0.00017073675682974032, "loss": 0.3395, "step": 11440 }, { "epoch": 0.926847051198963, "grad_norm": 0.03466634079813957, "learning_rate": 0.0001707322561771457, "loss": 0.3336, "step": 11441 }, { "epoch": 0.9269280622164614, "grad_norm": 0.0307964738458395, "learning_rate": 0.00017072775552455106, "loss": 0.291, "step": 11442 }, { "epoch": 0.9270090732339599, "grad_norm": 0.036242470145225525, "learning_rate": 0.00017072325487195645, "loss": 0.3682, "step": 11443 }, { "epoch": 0.9270900842514582, "grad_norm": 0.03543122857809067, "learning_rate": 0.0001707187542193618, "loss": 0.3305, "step": 11444 }, { "epoch": 0.9271710952689566, "grad_norm": 0.03091447241604328, "learning_rate": 0.00017071425356676717, "loss": 0.3281, "step": 11445 }, { "epoch": 0.9272521062864549, "grad_norm": 0.03639169782400131, "learning_rate": 0.00017070975291417256, "loss": 0.351, "step": 11446 }, { "epoch": 0.9273331173039533, "grad_norm": 0.03385458141565323, "learning_rate": 0.00017070525226157795, "loss": 0.3301, "step": 11447 }, { "epoch": 0.9274141283214518, "grad_norm": 0.034979742020368576, "learning_rate": 0.0001707007516089833, "loss": 0.3356, "step": 11448 }, { "epoch": 0.9274951393389501, "grad_norm": 0.03415704146027565, "learning_rate": 0.0001706962509563887, "loss": 0.3231, "step": 11449 }, { "epoch": 0.9275761503564485, "grad_norm": 0.03785685822367668, "learning_rate": 0.00017069175030379405, "loss": 0.3528, "step": 11450 }, { "epoch": 0.9276571613739468, "grad_norm": 0.03500499576330185, "learning_rate": 0.0001706872496511994, "loss": 0.3426, "step": 11451 }, { "epoch": 0.9277381723914452, "grad_norm": 0.036131519824266434, "learning_rate": 0.0001706827489986048, "loss": 0.3472, "step": 11452 }, { "epoch": 0.9278191834089436, "grad_norm": 0.03363850712776184, "learning_rate": 0.0001706782483460102, "loss": 0.3279, "step": 11453 }, { "epoch": 0.927900194426442, "grad_norm": 0.035742562264204025, "learning_rate": 0.00017067374769341555, "loss": 0.3214, "step": 11454 }, { "epoch": 0.9279812054439404, "grad_norm": 0.03095664642751217, "learning_rate": 0.00017066924704082093, "loss": 0.3056, "step": 11455 }, { "epoch": 0.9280622164614387, "grad_norm": 0.03762781620025635, "learning_rate": 0.0001706647463882263, "loss": 0.3286, "step": 11456 }, { "epoch": 0.9281432274789372, "grad_norm": 0.038630541414022446, "learning_rate": 0.00017066024573563168, "loss": 0.3846, "step": 11457 }, { "epoch": 0.9282242384964355, "grad_norm": 0.038850728422403336, "learning_rate": 0.00017065574508303707, "loss": 0.3631, "step": 11458 }, { "epoch": 0.9283052495139339, "grad_norm": 0.03348441794514656, "learning_rate": 0.00017065124443044243, "loss": 0.3132, "step": 11459 }, { "epoch": 0.9283862605314323, "grad_norm": 0.0356043204665184, "learning_rate": 0.0001706467437778478, "loss": 0.3085, "step": 11460 }, { "epoch": 0.9284672715489306, "grad_norm": 0.03675344958901405, "learning_rate": 0.00017064224312525318, "loss": 0.3378, "step": 11461 }, { "epoch": 0.9285482825664291, "grad_norm": 0.03896648809313774, "learning_rate": 0.00017063774247265854, "loss": 0.3529, "step": 11462 }, { "epoch": 0.9286292935839274, "grad_norm": 0.03709070757031441, "learning_rate": 0.00017063324182006392, "loss": 0.3499, "step": 11463 }, { "epoch": 0.9287103046014258, "grad_norm": 0.03415576368570328, "learning_rate": 0.0001706287411674693, "loss": 0.3342, "step": 11464 }, { "epoch": 0.9287913156189241, "grad_norm": 0.03336158022284508, "learning_rate": 0.00017062424051487467, "loss": 0.3098, "step": 11465 }, { "epoch": 0.9288723266364226, "grad_norm": 0.03390232473611832, "learning_rate": 0.00017061973986228003, "loss": 0.3064, "step": 11466 }, { "epoch": 0.928953337653921, "grad_norm": 0.03544805198907852, "learning_rate": 0.00017061523920968542, "loss": 0.3387, "step": 11467 }, { "epoch": 0.9290343486714193, "grad_norm": 0.03590017184615135, "learning_rate": 0.00017061073855709078, "loss": 0.3063, "step": 11468 }, { "epoch": 0.9291153596889177, "grad_norm": 0.031386904418468475, "learning_rate": 0.00017060623790449616, "loss": 0.2848, "step": 11469 }, { "epoch": 0.929196370706416, "grad_norm": 0.03534477576613426, "learning_rate": 0.00017060173725190155, "loss": 0.36, "step": 11470 }, { "epoch": 0.9292773817239145, "grad_norm": 0.033903393894433975, "learning_rate": 0.0001705972365993069, "loss": 0.2919, "step": 11471 }, { "epoch": 0.9293583927414129, "grad_norm": 0.03623591363430023, "learning_rate": 0.00017059273594671227, "loss": 0.3357, "step": 11472 }, { "epoch": 0.9294394037589112, "grad_norm": 0.03716109320521355, "learning_rate": 0.00017058823529411766, "loss": 0.3357, "step": 11473 }, { "epoch": 0.9295204147764096, "grad_norm": 0.03469749540090561, "learning_rate": 0.00017058373464152302, "loss": 0.3176, "step": 11474 }, { "epoch": 0.9296014257939079, "grad_norm": 0.03799695894122124, "learning_rate": 0.0001705792339889284, "loss": 0.3297, "step": 11475 }, { "epoch": 0.9296824368114064, "grad_norm": 0.03861555457115173, "learning_rate": 0.0001705747333363338, "loss": 0.3193, "step": 11476 }, { "epoch": 0.9297634478289047, "grad_norm": 0.02883889339864254, "learning_rate": 0.00017057023268373915, "loss": 0.2863, "step": 11477 }, { "epoch": 0.9298444588464031, "grad_norm": 0.03888789564371109, "learning_rate": 0.00017056573203114451, "loss": 0.343, "step": 11478 }, { "epoch": 0.9299254698639015, "grad_norm": 0.03772765025496483, "learning_rate": 0.0001705612313785499, "loss": 0.3638, "step": 11479 }, { "epoch": 0.9300064808813999, "grad_norm": 0.03691214695572853, "learning_rate": 0.0001705567307259553, "loss": 0.3235, "step": 11480 }, { "epoch": 0.9300874918988983, "grad_norm": 0.03321418911218643, "learning_rate": 0.00017055223007336065, "loss": 0.3747, "step": 11481 }, { "epoch": 0.9301685029163966, "grad_norm": 0.03231301158666611, "learning_rate": 0.00017054772942076604, "loss": 0.3113, "step": 11482 }, { "epoch": 0.930249513933895, "grad_norm": 0.03366365283727646, "learning_rate": 0.0001705432287681714, "loss": 0.3078, "step": 11483 }, { "epoch": 0.9303305249513933, "grad_norm": 0.03471920266747475, "learning_rate": 0.00017053872811557676, "loss": 0.3472, "step": 11484 }, { "epoch": 0.9304115359688918, "grad_norm": 0.03448682278394699, "learning_rate": 0.00017053422746298214, "loss": 0.3388, "step": 11485 }, { "epoch": 0.9304925469863902, "grad_norm": 0.033348411321640015, "learning_rate": 0.00017052972681038753, "loss": 0.358, "step": 11486 }, { "epoch": 0.9305735580038885, "grad_norm": 0.03339369595050812, "learning_rate": 0.0001705252261577929, "loss": 0.3165, "step": 11487 }, { "epoch": 0.9306545690213869, "grad_norm": 0.033238619565963745, "learning_rate": 0.00017052072550519828, "loss": 0.3264, "step": 11488 }, { "epoch": 0.9307355800388852, "grad_norm": 0.03498007729649544, "learning_rate": 0.00017051622485260364, "loss": 0.3196, "step": 11489 }, { "epoch": 0.9308165910563837, "grad_norm": 0.03177583962678909, "learning_rate": 0.000170511724200009, "loss": 0.3158, "step": 11490 }, { "epoch": 0.9308976020738821, "grad_norm": 0.036919716745615005, "learning_rate": 0.00017050722354741438, "loss": 0.3724, "step": 11491 }, { "epoch": 0.9309786130913804, "grad_norm": 0.03319632634520531, "learning_rate": 0.00017050272289481977, "loss": 0.3294, "step": 11492 }, { "epoch": 0.9310596241088788, "grad_norm": 0.03813881799578667, "learning_rate": 0.00017049822224222513, "loss": 0.3445, "step": 11493 }, { "epoch": 0.9311406351263772, "grad_norm": 0.03665956109762192, "learning_rate": 0.00017049372158963052, "loss": 0.3549, "step": 11494 }, { "epoch": 0.9312216461438756, "grad_norm": 0.033054448664188385, "learning_rate": 0.00017048922093703588, "loss": 0.3153, "step": 11495 }, { "epoch": 0.931302657161374, "grad_norm": 0.033781781792640686, "learning_rate": 0.00017048472028444124, "loss": 0.3292, "step": 11496 }, { "epoch": 0.9313836681788723, "grad_norm": 0.030734725296497345, "learning_rate": 0.00017048021963184663, "loss": 0.2866, "step": 11497 }, { "epoch": 0.9314646791963707, "grad_norm": 0.0317283570766449, "learning_rate": 0.000170475718979252, "loss": 0.3138, "step": 11498 }, { "epoch": 0.9315456902138691, "grad_norm": 0.035663917660713196, "learning_rate": 0.00017047121832665737, "loss": 0.329, "step": 11499 }, { "epoch": 0.9316267012313675, "grad_norm": 0.03555869683623314, "learning_rate": 0.00017046671767406276, "loss": 0.3159, "step": 11500 }, { "epoch": 0.9317077122488658, "grad_norm": 0.035777702927589417, "learning_rate": 0.00017046221702146812, "loss": 0.3305, "step": 11501 }, { "epoch": 0.9317887232663642, "grad_norm": 0.029906559735536575, "learning_rate": 0.00017045771636887348, "loss": 0.3008, "step": 11502 }, { "epoch": 0.9318697342838627, "grad_norm": 0.03649575635790825, "learning_rate": 0.00017045321571627887, "loss": 0.3771, "step": 11503 }, { "epoch": 0.931950745301361, "grad_norm": 0.033256951719522476, "learning_rate": 0.00017044871506368425, "loss": 0.3219, "step": 11504 }, { "epoch": 0.9320317563188594, "grad_norm": 0.0365782156586647, "learning_rate": 0.00017044421441108961, "loss": 0.3143, "step": 11505 }, { "epoch": 0.9321127673363577, "grad_norm": 0.03269030153751373, "learning_rate": 0.000170439713758495, "loss": 0.2937, "step": 11506 }, { "epoch": 0.9321937783538561, "grad_norm": 0.03456766530871391, "learning_rate": 0.00017043521310590036, "loss": 0.3096, "step": 11507 }, { "epoch": 0.9322747893713546, "grad_norm": 0.035860177129507065, "learning_rate": 0.00017043071245330572, "loss": 0.3588, "step": 11508 }, { "epoch": 0.9323558003888529, "grad_norm": 0.034222107380628586, "learning_rate": 0.0001704262118007111, "loss": 0.3318, "step": 11509 }, { "epoch": 0.9324368114063513, "grad_norm": 0.04168862849473953, "learning_rate": 0.0001704217111481165, "loss": 0.3149, "step": 11510 }, { "epoch": 0.9325178224238496, "grad_norm": 0.03862250596284866, "learning_rate": 0.00017041721049552186, "loss": 0.3747, "step": 11511 }, { "epoch": 0.932598833441348, "grad_norm": 0.030834008008241653, "learning_rate": 0.00017041270984292724, "loss": 0.3011, "step": 11512 }, { "epoch": 0.9326798444588464, "grad_norm": 0.03323963284492493, "learning_rate": 0.0001704082091903326, "loss": 0.3053, "step": 11513 }, { "epoch": 0.9327608554763448, "grad_norm": 0.03129046410322189, "learning_rate": 0.00017040370853773796, "loss": 0.3059, "step": 11514 }, { "epoch": 0.9328418664938432, "grad_norm": 0.038254525512456894, "learning_rate": 0.00017039920788514335, "loss": 0.3393, "step": 11515 }, { "epoch": 0.9329228775113415, "grad_norm": 0.033706698566675186, "learning_rate": 0.00017039470723254874, "loss": 0.3213, "step": 11516 }, { "epoch": 0.93300388852884, "grad_norm": 0.032409097999334335, "learning_rate": 0.0001703902065799541, "loss": 0.2629, "step": 11517 }, { "epoch": 0.9330848995463383, "grad_norm": 0.0412583090364933, "learning_rate": 0.00017038570592735948, "loss": 0.397, "step": 11518 }, { "epoch": 0.9331659105638367, "grad_norm": 0.030987797304987907, "learning_rate": 0.00017038120527476484, "loss": 0.3079, "step": 11519 }, { "epoch": 0.933246921581335, "grad_norm": 0.03425063192844391, "learning_rate": 0.0001703767046221702, "loss": 0.3438, "step": 11520 }, { "epoch": 0.9333279325988334, "grad_norm": 0.032528121024370193, "learning_rate": 0.0001703722039695756, "loss": 0.339, "step": 11521 }, { "epoch": 0.9334089436163319, "grad_norm": 0.03559907153248787, "learning_rate": 0.00017036770331698098, "loss": 0.3516, "step": 11522 }, { "epoch": 0.9334899546338302, "grad_norm": 0.03166068717837334, "learning_rate": 0.00017036320266438634, "loss": 0.309, "step": 11523 }, { "epoch": 0.9335709656513286, "grad_norm": 0.034030213952064514, "learning_rate": 0.00017035870201179173, "loss": 0.3057, "step": 11524 }, { "epoch": 0.9336519766688269, "grad_norm": 0.03300134092569351, "learning_rate": 0.0001703542013591971, "loss": 0.3307, "step": 11525 }, { "epoch": 0.9337329876863253, "grad_norm": 0.03915845975279808, "learning_rate": 0.00017034970070660245, "loss": 0.3698, "step": 11526 }, { "epoch": 0.9338139987038238, "grad_norm": 0.032462168484926224, "learning_rate": 0.00017034520005400786, "loss": 0.3215, "step": 11527 }, { "epoch": 0.9338950097213221, "grad_norm": 0.03716924414038658, "learning_rate": 0.00017034069940141322, "loss": 0.3183, "step": 11528 }, { "epoch": 0.9339760207388205, "grad_norm": 0.03721143305301666, "learning_rate": 0.00017033619874881858, "loss": 0.3399, "step": 11529 }, { "epoch": 0.9340570317563188, "grad_norm": 0.034695371985435486, "learning_rate": 0.00017033169809622397, "loss": 0.2887, "step": 11530 }, { "epoch": 0.9341380427738173, "grad_norm": 0.03291356563568115, "learning_rate": 0.00017032719744362933, "loss": 0.3333, "step": 11531 }, { "epoch": 0.9342190537913156, "grad_norm": 0.04289603978395462, "learning_rate": 0.0001703226967910347, "loss": 0.3503, "step": 11532 }, { "epoch": 0.934300064808814, "grad_norm": 0.03919799253344536, "learning_rate": 0.0001703181961384401, "loss": 0.3218, "step": 11533 }, { "epoch": 0.9343810758263124, "grad_norm": 0.03670763596892357, "learning_rate": 0.00017031369548584546, "loss": 0.3216, "step": 11534 }, { "epoch": 0.9344620868438107, "grad_norm": 0.03523210808634758, "learning_rate": 0.00017030919483325082, "loss": 0.2918, "step": 11535 }, { "epoch": 0.9345430978613092, "grad_norm": 0.035426367074251175, "learning_rate": 0.0001703046941806562, "loss": 0.3414, "step": 11536 }, { "epoch": 0.9346241088788075, "grad_norm": 0.034479837864637375, "learning_rate": 0.00017030019352806157, "loss": 0.3218, "step": 11537 }, { "epoch": 0.9347051198963059, "grad_norm": 0.0363641120493412, "learning_rate": 0.00017029569287546696, "loss": 0.3709, "step": 11538 }, { "epoch": 0.9347861309138042, "grad_norm": 0.037798330187797546, "learning_rate": 0.00017029119222287234, "loss": 0.333, "step": 11539 }, { "epoch": 0.9348671419313026, "grad_norm": 0.03117884136736393, "learning_rate": 0.0001702866915702777, "loss": 0.315, "step": 11540 }, { "epoch": 0.9349481529488011, "grad_norm": 0.035172026604413986, "learning_rate": 0.00017028219091768306, "loss": 0.365, "step": 11541 }, { "epoch": 0.9350291639662994, "grad_norm": 0.03705654665827751, "learning_rate": 0.00017027769026508845, "loss": 0.3459, "step": 11542 }, { "epoch": 0.9351101749837978, "grad_norm": 0.03678044676780701, "learning_rate": 0.0001702731896124938, "loss": 0.3268, "step": 11543 }, { "epoch": 0.9351911860012961, "grad_norm": 0.03360627591609955, "learning_rate": 0.0001702686889598992, "loss": 0.3017, "step": 11544 }, { "epoch": 0.9352721970187946, "grad_norm": 0.038903381675481796, "learning_rate": 0.00017026418830730459, "loss": 0.3278, "step": 11545 }, { "epoch": 0.935353208036293, "grad_norm": 0.033496957272291183, "learning_rate": 0.00017025968765470995, "loss": 0.3388, "step": 11546 }, { "epoch": 0.9354342190537913, "grad_norm": 0.03374259173870087, "learning_rate": 0.0001702551870021153, "loss": 0.3332, "step": 11547 }, { "epoch": 0.9355152300712897, "grad_norm": 0.03631717339158058, "learning_rate": 0.0001702506863495207, "loss": 0.3125, "step": 11548 }, { "epoch": 0.935596241088788, "grad_norm": 0.032183315604925156, "learning_rate": 0.00017024618569692605, "loss": 0.3005, "step": 11549 }, { "epoch": 0.9356772521062865, "grad_norm": 0.03625780716538429, "learning_rate": 0.00017024168504433144, "loss": 0.3246, "step": 11550 }, { "epoch": 0.9357582631237849, "grad_norm": 0.03256357088685036, "learning_rate": 0.00017023718439173683, "loss": 0.3119, "step": 11551 }, { "epoch": 0.9358392741412832, "grad_norm": 0.046169064939022064, "learning_rate": 0.0001702326837391422, "loss": 0.4229, "step": 11552 }, { "epoch": 0.9359202851587816, "grad_norm": 0.03766137734055519, "learning_rate": 0.00017022818308654755, "loss": 0.3421, "step": 11553 }, { "epoch": 0.93600129617628, "grad_norm": 0.029175125062465668, "learning_rate": 0.00017022368243395293, "loss": 0.2945, "step": 11554 }, { "epoch": 0.9360823071937784, "grad_norm": 0.03890814259648323, "learning_rate": 0.0001702191817813583, "loss": 0.3493, "step": 11555 }, { "epoch": 0.9361633182112767, "grad_norm": 0.030468717217445374, "learning_rate": 0.00017021468112876368, "loss": 0.2837, "step": 11556 }, { "epoch": 0.9362443292287751, "grad_norm": 0.03299061954021454, "learning_rate": 0.00017021018047616907, "loss": 0.3475, "step": 11557 }, { "epoch": 0.9363253402462735, "grad_norm": 0.033994968980550766, "learning_rate": 0.00017020567982357443, "loss": 0.359, "step": 11558 }, { "epoch": 0.9364063512637719, "grad_norm": 0.03398413211107254, "learning_rate": 0.0001702011791709798, "loss": 0.34, "step": 11559 }, { "epoch": 0.9364873622812703, "grad_norm": 0.03915158659219742, "learning_rate": 0.00017019667851838518, "loss": 0.3156, "step": 11560 }, { "epoch": 0.9365683732987686, "grad_norm": 0.033859699964523315, "learning_rate": 0.00017019217786579056, "loss": 0.3224, "step": 11561 }, { "epoch": 0.936649384316267, "grad_norm": 0.0342167429625988, "learning_rate": 0.00017018767721319592, "loss": 0.3362, "step": 11562 }, { "epoch": 0.9367303953337653, "grad_norm": 0.03582310676574707, "learning_rate": 0.0001701831765606013, "loss": 0.342, "step": 11563 }, { "epoch": 0.9368114063512638, "grad_norm": 0.03299706429243088, "learning_rate": 0.00017017867590800667, "loss": 0.3339, "step": 11564 }, { "epoch": 0.9368924173687622, "grad_norm": 0.03426812216639519, "learning_rate": 0.00017017417525541203, "loss": 0.3237, "step": 11565 }, { "epoch": 0.9369734283862605, "grad_norm": 0.03299387916922569, "learning_rate": 0.00017016967460281742, "loss": 0.305, "step": 11566 }, { "epoch": 0.9370544394037589, "grad_norm": 0.03170327469706535, "learning_rate": 0.0001701651739502228, "loss": 0.3085, "step": 11567 }, { "epoch": 0.9371354504212573, "grad_norm": 0.03617962822318077, "learning_rate": 0.00017016067329762817, "loss": 0.3366, "step": 11568 }, { "epoch": 0.9372164614387557, "grad_norm": 0.03227115795016289, "learning_rate": 0.00017015617264503355, "loss": 0.3222, "step": 11569 }, { "epoch": 0.937297472456254, "grad_norm": 0.03870026767253876, "learning_rate": 0.0001701516719924389, "loss": 0.3413, "step": 11570 }, { "epoch": 0.9373784834737524, "grad_norm": 0.03202787786722183, "learning_rate": 0.00017014717133984427, "loss": 0.3302, "step": 11571 }, { "epoch": 0.9374594944912508, "grad_norm": 0.034028373658657074, "learning_rate": 0.00017014267068724966, "loss": 0.324, "step": 11572 }, { "epoch": 0.9375405055087492, "grad_norm": 0.03828026354312897, "learning_rate": 0.00017013817003465505, "loss": 0.3441, "step": 11573 }, { "epoch": 0.9376215165262476, "grad_norm": 0.035729967057704926, "learning_rate": 0.0001701336693820604, "loss": 0.288, "step": 11574 }, { "epoch": 0.937702527543746, "grad_norm": 0.02852979488670826, "learning_rate": 0.0001701291687294658, "loss": 0.2586, "step": 11575 }, { "epoch": 0.9377835385612443, "grad_norm": 0.03379920497536659, "learning_rate": 0.00017012466807687115, "loss": 0.2936, "step": 11576 }, { "epoch": 0.9378645495787427, "grad_norm": 0.03683913126587868, "learning_rate": 0.00017012016742427651, "loss": 0.3608, "step": 11577 }, { "epoch": 0.9379455605962411, "grad_norm": 0.03738532215356827, "learning_rate": 0.0001701156667716819, "loss": 0.3765, "step": 11578 }, { "epoch": 0.9380265716137395, "grad_norm": 0.03998420760035515, "learning_rate": 0.0001701111661190873, "loss": 0.3014, "step": 11579 }, { "epoch": 0.9381075826312378, "grad_norm": 0.03761008381843567, "learning_rate": 0.00017010666546649265, "loss": 0.3352, "step": 11580 }, { "epoch": 0.9381885936487362, "grad_norm": 0.03799549490213394, "learning_rate": 0.00017010216481389804, "loss": 0.326, "step": 11581 }, { "epoch": 0.9382696046662347, "grad_norm": 0.03644454851746559, "learning_rate": 0.0001700976641613034, "loss": 0.3306, "step": 11582 }, { "epoch": 0.938350615683733, "grad_norm": 0.0304781012237072, "learning_rate": 0.00017009316350870876, "loss": 0.2902, "step": 11583 }, { "epoch": 0.9384316267012314, "grad_norm": 0.0314607247710228, "learning_rate": 0.00017008866285611414, "loss": 0.3029, "step": 11584 }, { "epoch": 0.9385126377187297, "grad_norm": 0.03330124914646149, "learning_rate": 0.00017008416220351953, "loss": 0.3073, "step": 11585 }, { "epoch": 0.9385936487362281, "grad_norm": 0.038247060030698776, "learning_rate": 0.0001700796615509249, "loss": 0.3443, "step": 11586 }, { "epoch": 0.9386746597537265, "grad_norm": 0.03827197477221489, "learning_rate": 0.00017007516089833028, "loss": 0.3741, "step": 11587 }, { "epoch": 0.9387556707712249, "grad_norm": 0.035438474267721176, "learning_rate": 0.00017007066024573564, "loss": 0.3206, "step": 11588 }, { "epoch": 0.9388366817887233, "grad_norm": 0.03596781566739082, "learning_rate": 0.000170066159593141, "loss": 0.3348, "step": 11589 }, { "epoch": 0.9389176928062216, "grad_norm": 0.031203007325530052, "learning_rate": 0.00017006165894054638, "loss": 0.3496, "step": 11590 }, { "epoch": 0.93899870382372, "grad_norm": 0.03926529362797737, "learning_rate": 0.00017005715828795177, "loss": 0.3612, "step": 11591 }, { "epoch": 0.9390797148412184, "grad_norm": 0.04273316264152527, "learning_rate": 0.00017005265763535713, "loss": 0.364, "step": 11592 }, { "epoch": 0.9391607258587168, "grad_norm": 0.033596985042095184, "learning_rate": 0.00017004815698276252, "loss": 0.3308, "step": 11593 }, { "epoch": 0.9392417368762151, "grad_norm": 0.03773493692278862, "learning_rate": 0.00017004365633016788, "loss": 0.3731, "step": 11594 }, { "epoch": 0.9393227478937135, "grad_norm": 0.03566722571849823, "learning_rate": 0.00017003915567757324, "loss": 0.3478, "step": 11595 }, { "epoch": 0.939403758911212, "grad_norm": 0.029825484380126, "learning_rate": 0.00017003465502497865, "loss": 0.2838, "step": 11596 }, { "epoch": 0.9394847699287103, "grad_norm": 0.034685224294662476, "learning_rate": 0.000170030154372384, "loss": 0.3065, "step": 11597 }, { "epoch": 0.9395657809462087, "grad_norm": 0.03669968619942665, "learning_rate": 0.00017002565371978937, "loss": 0.3638, "step": 11598 }, { "epoch": 0.939646791963707, "grad_norm": 0.035428546369075775, "learning_rate": 0.00017002115306719476, "loss": 0.2985, "step": 11599 }, { "epoch": 0.9397278029812054, "grad_norm": 0.032245926558971405, "learning_rate": 0.00017001665241460012, "loss": 0.3037, "step": 11600 }, { "epoch": 0.9398088139987039, "grad_norm": 0.03529680520296097, "learning_rate": 0.00017001215176200548, "loss": 0.3433, "step": 11601 }, { "epoch": 0.9398898250162022, "grad_norm": 0.035892363637685776, "learning_rate": 0.0001700076511094109, "loss": 0.3487, "step": 11602 }, { "epoch": 0.9399708360337006, "grad_norm": 0.03226469084620476, "learning_rate": 0.00017000315045681625, "loss": 0.3295, "step": 11603 }, { "epoch": 0.9400518470511989, "grad_norm": 0.030066296458244324, "learning_rate": 0.00016999864980422161, "loss": 0.2968, "step": 11604 }, { "epoch": 0.9401328580686974, "grad_norm": 0.035130247473716736, "learning_rate": 0.000169994149151627, "loss": 0.3375, "step": 11605 }, { "epoch": 0.9402138690861958, "grad_norm": 0.033153027296066284, "learning_rate": 0.00016998964849903236, "loss": 0.3437, "step": 11606 }, { "epoch": 0.9402948801036941, "grad_norm": 0.03841940313577652, "learning_rate": 0.00016998514784643772, "loss": 0.3726, "step": 11607 }, { "epoch": 0.9403758911211925, "grad_norm": 0.036327630281448364, "learning_rate": 0.00016998064719384314, "loss": 0.332, "step": 11608 }, { "epoch": 0.9404569021386908, "grad_norm": 0.03609196096658707, "learning_rate": 0.0001699761465412485, "loss": 0.3465, "step": 11609 }, { "epoch": 0.9405379131561893, "grad_norm": 0.034757182002067566, "learning_rate": 0.00016997164588865386, "loss": 0.3241, "step": 11610 }, { "epoch": 0.9406189241736876, "grad_norm": 0.04242127388715744, "learning_rate": 0.00016996714523605924, "loss": 0.3737, "step": 11611 }, { "epoch": 0.940699935191186, "grad_norm": 0.037196334451436996, "learning_rate": 0.0001699626445834646, "loss": 0.3549, "step": 11612 }, { "epoch": 0.9407809462086844, "grad_norm": 0.03243549168109894, "learning_rate": 0.00016995814393087, "loss": 0.3365, "step": 11613 }, { "epoch": 0.9408619572261827, "grad_norm": 0.03829599916934967, "learning_rate": 0.00016995364327827538, "loss": 0.3163, "step": 11614 }, { "epoch": 0.9409429682436812, "grad_norm": 0.03744173422455788, "learning_rate": 0.00016994914262568074, "loss": 0.3281, "step": 11615 }, { "epoch": 0.9410239792611795, "grad_norm": 0.035117655992507935, "learning_rate": 0.0001699446419730861, "loss": 0.337, "step": 11616 }, { "epoch": 0.9411049902786779, "grad_norm": 0.037259314209222794, "learning_rate": 0.00016994014132049149, "loss": 0.3937, "step": 11617 }, { "epoch": 0.9411860012961762, "grad_norm": 0.0373586043715477, "learning_rate": 0.00016993564066789685, "loss": 0.3403, "step": 11618 }, { "epoch": 0.9412670123136747, "grad_norm": 0.03547549992799759, "learning_rate": 0.00016993114001530223, "loss": 0.333, "step": 11619 }, { "epoch": 0.9413480233311731, "grad_norm": 0.03412862494587898, "learning_rate": 0.00016992663936270762, "loss": 0.3196, "step": 11620 }, { "epoch": 0.9414290343486714, "grad_norm": 0.03266661614179611, "learning_rate": 0.00016992213871011298, "loss": 0.319, "step": 11621 }, { "epoch": 0.9415100453661698, "grad_norm": 0.035066504031419754, "learning_rate": 0.00016991763805751834, "loss": 0.343, "step": 11622 }, { "epoch": 0.9415910563836681, "grad_norm": 0.03465098515152931, "learning_rate": 0.00016991313740492373, "loss": 0.3424, "step": 11623 }, { "epoch": 0.9416720674011666, "grad_norm": 0.03283773362636566, "learning_rate": 0.0001699086367523291, "loss": 0.3125, "step": 11624 }, { "epoch": 0.941753078418665, "grad_norm": 0.033413149416446686, "learning_rate": 0.00016990413609973447, "loss": 0.3532, "step": 11625 }, { "epoch": 0.9418340894361633, "grad_norm": 0.029487356543540955, "learning_rate": 0.00016989963544713986, "loss": 0.2655, "step": 11626 }, { "epoch": 0.9419151004536617, "grad_norm": 0.04271164536476135, "learning_rate": 0.00016989513479454522, "loss": 0.3881, "step": 11627 }, { "epoch": 0.94199611147116, "grad_norm": 0.03455482795834541, "learning_rate": 0.00016989063414195058, "loss": 0.3244, "step": 11628 }, { "epoch": 0.9420771224886585, "grad_norm": 0.03296886757016182, "learning_rate": 0.00016988613348935597, "loss": 0.3237, "step": 11629 }, { "epoch": 0.9421581335061568, "grad_norm": 0.03511711582541466, "learning_rate": 0.00016988163283676133, "loss": 0.3444, "step": 11630 }, { "epoch": 0.9422391445236552, "grad_norm": 0.03849519416689873, "learning_rate": 0.00016987713218416672, "loss": 0.3313, "step": 11631 }, { "epoch": 0.9423201555411536, "grad_norm": 0.037213683128356934, "learning_rate": 0.0001698726315315721, "loss": 0.32, "step": 11632 }, { "epoch": 0.942401166558652, "grad_norm": 0.03361336514353752, "learning_rate": 0.00016986813087897746, "loss": 0.3153, "step": 11633 }, { "epoch": 0.9424821775761504, "grad_norm": 0.03675851225852966, "learning_rate": 0.00016986363022638282, "loss": 0.3525, "step": 11634 }, { "epoch": 0.9425631885936487, "grad_norm": 0.0376199372112751, "learning_rate": 0.0001698591295737882, "loss": 0.3468, "step": 11635 }, { "epoch": 0.9426441996111471, "grad_norm": 0.0355244055390358, "learning_rate": 0.00016985462892119357, "loss": 0.3516, "step": 11636 }, { "epoch": 0.9427252106286454, "grad_norm": 0.03431635722517967, "learning_rate": 0.00016985012826859896, "loss": 0.2943, "step": 11637 }, { "epoch": 0.9428062216461439, "grad_norm": 0.032604239881038666, "learning_rate": 0.00016984562761600434, "loss": 0.3412, "step": 11638 }, { "epoch": 0.9428872326636423, "grad_norm": 0.0344766266644001, "learning_rate": 0.0001698411269634097, "loss": 0.3224, "step": 11639 }, { "epoch": 0.9429682436811406, "grad_norm": 0.03054865449666977, "learning_rate": 0.00016983662631081506, "loss": 0.284, "step": 11640 }, { "epoch": 0.943049254698639, "grad_norm": 0.034912411123514175, "learning_rate": 0.00016983212565822045, "loss": 0.3106, "step": 11641 }, { "epoch": 0.9431302657161373, "grad_norm": 0.03549661487340927, "learning_rate": 0.00016982762500562584, "loss": 0.3144, "step": 11642 }, { "epoch": 0.9432112767336358, "grad_norm": 0.03743693605065346, "learning_rate": 0.0001698231243530312, "loss": 0.3596, "step": 11643 }, { "epoch": 0.9432922877511342, "grad_norm": 0.03865379840135574, "learning_rate": 0.00016981862370043659, "loss": 0.3242, "step": 11644 }, { "epoch": 0.9433732987686325, "grad_norm": 0.030137626454234123, "learning_rate": 0.00016981412304784195, "loss": 0.2934, "step": 11645 }, { "epoch": 0.9434543097861309, "grad_norm": 0.03273788467049599, "learning_rate": 0.0001698096223952473, "loss": 0.2888, "step": 11646 }, { "epoch": 0.9435353208036293, "grad_norm": 0.03315626084804535, "learning_rate": 0.0001698051217426527, "loss": 0.2767, "step": 11647 }, { "epoch": 0.9436163318211277, "grad_norm": 0.03848462179303169, "learning_rate": 0.00016980062109005808, "loss": 0.3099, "step": 11648 }, { "epoch": 0.943697342838626, "grad_norm": 0.030941886827349663, "learning_rate": 0.00016979612043746344, "loss": 0.2987, "step": 11649 }, { "epoch": 0.9437783538561244, "grad_norm": 0.04029727354645729, "learning_rate": 0.00016979161978486883, "loss": 0.3577, "step": 11650 }, { "epoch": 0.9438593648736228, "grad_norm": 0.041079651564359665, "learning_rate": 0.0001697871191322742, "loss": 0.3579, "step": 11651 }, { "epoch": 0.9439403758911212, "grad_norm": 0.033457282930612564, "learning_rate": 0.00016978261847967955, "loss": 0.3292, "step": 11652 }, { "epoch": 0.9440213869086196, "grad_norm": 0.03081076219677925, "learning_rate": 0.00016977811782708493, "loss": 0.2804, "step": 11653 }, { "epoch": 0.9441023979261179, "grad_norm": 0.034133411943912506, "learning_rate": 0.00016977361717449032, "loss": 0.2907, "step": 11654 }, { "epoch": 0.9441834089436163, "grad_norm": 0.030764104798436165, "learning_rate": 0.00016976911652189568, "loss": 0.2712, "step": 11655 }, { "epoch": 0.9442644199611148, "grad_norm": 0.03566576540470123, "learning_rate": 0.00016976461586930107, "loss": 0.3491, "step": 11656 }, { "epoch": 0.9443454309786131, "grad_norm": 0.038679346442222595, "learning_rate": 0.00016976011521670643, "loss": 0.3152, "step": 11657 }, { "epoch": 0.9444264419961115, "grad_norm": 0.03520083427429199, "learning_rate": 0.0001697556145641118, "loss": 0.2969, "step": 11658 }, { "epoch": 0.9445074530136098, "grad_norm": 0.03682069107890129, "learning_rate": 0.00016975111391151718, "loss": 0.3412, "step": 11659 }, { "epoch": 0.9445884640311082, "grad_norm": 0.034318264573812485, "learning_rate": 0.00016974661325892256, "loss": 0.3412, "step": 11660 }, { "epoch": 0.9446694750486067, "grad_norm": 0.03663177788257599, "learning_rate": 0.00016974211260632792, "loss": 0.3447, "step": 11661 }, { "epoch": 0.944750486066105, "grad_norm": 0.03302030265331268, "learning_rate": 0.0001697376119537333, "loss": 0.3214, "step": 11662 }, { "epoch": 0.9448314970836034, "grad_norm": 0.034949298948049545, "learning_rate": 0.00016973311130113867, "loss": 0.3263, "step": 11663 }, { "epoch": 0.9449125081011017, "grad_norm": 0.03676712140440941, "learning_rate": 0.00016972861064854403, "loss": 0.3366, "step": 11664 }, { "epoch": 0.9449935191186001, "grad_norm": 0.03827500343322754, "learning_rate": 0.00016972410999594945, "loss": 0.3845, "step": 11665 }, { "epoch": 0.9450745301360985, "grad_norm": 0.028249388560652733, "learning_rate": 0.0001697196093433548, "loss": 0.2928, "step": 11666 }, { "epoch": 0.9451555411535969, "grad_norm": 0.035137057304382324, "learning_rate": 0.00016971510869076017, "loss": 0.3038, "step": 11667 }, { "epoch": 0.9452365521710953, "grad_norm": 0.03510432690382004, "learning_rate": 0.00016971060803816555, "loss": 0.3282, "step": 11668 }, { "epoch": 0.9453175631885936, "grad_norm": 0.033443015068769455, "learning_rate": 0.0001697061073855709, "loss": 0.3412, "step": 11669 }, { "epoch": 0.9453985742060921, "grad_norm": 0.03263236582279205, "learning_rate": 0.00016970160673297627, "loss": 0.3158, "step": 11670 }, { "epoch": 0.9454795852235904, "grad_norm": 0.038706421852111816, "learning_rate": 0.0001696971060803817, "loss": 0.3624, "step": 11671 }, { "epoch": 0.9455605962410888, "grad_norm": 0.029902534559369087, "learning_rate": 0.00016969260542778705, "loss": 0.3152, "step": 11672 }, { "epoch": 0.9456416072585871, "grad_norm": 0.0366581529378891, "learning_rate": 0.0001696881047751924, "loss": 0.3915, "step": 11673 }, { "epoch": 0.9457226182760855, "grad_norm": 0.03990044817328453, "learning_rate": 0.0001696836041225978, "loss": 0.3217, "step": 11674 }, { "epoch": 0.945803629293584, "grad_norm": 0.034472111612558365, "learning_rate": 0.00016967910347000315, "loss": 0.3514, "step": 11675 }, { "epoch": 0.9458846403110823, "grad_norm": 0.0318378284573555, "learning_rate": 0.00016967460281740851, "loss": 0.3206, "step": 11676 }, { "epoch": 0.9459656513285807, "grad_norm": 0.03478666767477989, "learning_rate": 0.00016967010216481393, "loss": 0.3141, "step": 11677 }, { "epoch": 0.946046662346079, "grad_norm": 0.03769747540354729, "learning_rate": 0.0001696656015122193, "loss": 0.3969, "step": 11678 }, { "epoch": 0.9461276733635774, "grad_norm": 0.036210544407367706, "learning_rate": 0.00016966110085962465, "loss": 0.3578, "step": 11679 }, { "epoch": 0.9462086843810759, "grad_norm": 0.03335197642445564, "learning_rate": 0.00016965660020703004, "loss": 0.2924, "step": 11680 }, { "epoch": 0.9462896953985742, "grad_norm": 0.031656358391046524, "learning_rate": 0.0001696520995544354, "loss": 0.3034, "step": 11681 }, { "epoch": 0.9463707064160726, "grad_norm": 0.033151499927043915, "learning_rate": 0.00016964759890184076, "loss": 0.3418, "step": 11682 }, { "epoch": 0.9464517174335709, "grad_norm": 0.037764452397823334, "learning_rate": 0.00016964309824924617, "loss": 0.365, "step": 11683 }, { "epoch": 0.9465327284510694, "grad_norm": 0.03604017570614815, "learning_rate": 0.00016963859759665153, "loss": 0.3216, "step": 11684 }, { "epoch": 0.9466137394685677, "grad_norm": 0.03398576378822327, "learning_rate": 0.0001696340969440569, "loss": 0.2693, "step": 11685 }, { "epoch": 0.9466947504860661, "grad_norm": 0.04054470360279083, "learning_rate": 0.00016962959629146228, "loss": 0.381, "step": 11686 }, { "epoch": 0.9467757615035645, "grad_norm": 0.03417252004146576, "learning_rate": 0.00016962509563886764, "loss": 0.2849, "step": 11687 }, { "epoch": 0.9468567725210628, "grad_norm": 0.03717213496565819, "learning_rate": 0.000169620594986273, "loss": 0.3327, "step": 11688 }, { "epoch": 0.9469377835385613, "grad_norm": 0.03692119941115379, "learning_rate": 0.0001696160943336784, "loss": 0.3463, "step": 11689 }, { "epoch": 0.9470187945560596, "grad_norm": 0.03388385847210884, "learning_rate": 0.00016961159368108377, "loss": 0.312, "step": 11690 }, { "epoch": 0.947099805573558, "grad_norm": 0.03554116189479828, "learning_rate": 0.00016960709302848913, "loss": 0.3774, "step": 11691 }, { "epoch": 0.9471808165910564, "grad_norm": 0.032233826816082, "learning_rate": 0.00016960259237589452, "loss": 0.3052, "step": 11692 }, { "epoch": 0.9472618276085548, "grad_norm": 0.03355953097343445, "learning_rate": 0.00016959809172329988, "loss": 0.3092, "step": 11693 }, { "epoch": 0.9473428386260532, "grad_norm": 0.0348745621740818, "learning_rate": 0.00016959359107070527, "loss": 0.2982, "step": 11694 }, { "epoch": 0.9474238496435515, "grad_norm": 0.04244668781757355, "learning_rate": 0.00016958909041811065, "loss": 0.3179, "step": 11695 }, { "epoch": 0.9475048606610499, "grad_norm": 0.03393128514289856, "learning_rate": 0.000169584589765516, "loss": 0.3385, "step": 11696 }, { "epoch": 0.9475858716785482, "grad_norm": 0.03268209099769592, "learning_rate": 0.00016958008911292137, "loss": 0.2917, "step": 11697 }, { "epoch": 0.9476668826960467, "grad_norm": 0.03826176002621651, "learning_rate": 0.00016957558846032676, "loss": 0.3272, "step": 11698 }, { "epoch": 0.9477478937135451, "grad_norm": 0.03487805277109146, "learning_rate": 0.00016957108780773212, "loss": 0.3428, "step": 11699 }, { "epoch": 0.9478289047310434, "grad_norm": 0.029413651674985886, "learning_rate": 0.0001695665871551375, "loss": 0.2864, "step": 11700 }, { "epoch": 0.9479099157485418, "grad_norm": 0.041242972016334534, "learning_rate": 0.0001695620865025429, "loss": 0.3314, "step": 11701 }, { "epoch": 0.9479909267660401, "grad_norm": 0.03674250841140747, "learning_rate": 0.00016955758584994826, "loss": 0.3567, "step": 11702 }, { "epoch": 0.9480719377835386, "grad_norm": 0.0323912687599659, "learning_rate": 0.00016955308519735362, "loss": 0.3049, "step": 11703 }, { "epoch": 0.948152948801037, "grad_norm": 0.03778766468167305, "learning_rate": 0.000169548584544759, "loss": 0.2959, "step": 11704 }, { "epoch": 0.9482339598185353, "grad_norm": 0.034497227519750595, "learning_rate": 0.00016954408389216436, "loss": 0.347, "step": 11705 }, { "epoch": 0.9483149708360337, "grad_norm": 0.03679029643535614, "learning_rate": 0.00016953958323956975, "loss": 0.3651, "step": 11706 }, { "epoch": 0.9483959818535321, "grad_norm": 0.04503096267580986, "learning_rate": 0.00016953508258697514, "loss": 0.378, "step": 11707 }, { "epoch": 0.9484769928710305, "grad_norm": 0.03016662783920765, "learning_rate": 0.0001695305819343805, "loss": 0.2731, "step": 11708 }, { "epoch": 0.9485580038885288, "grad_norm": 0.03268558531999588, "learning_rate": 0.00016952608128178586, "loss": 0.2694, "step": 11709 }, { "epoch": 0.9486390149060272, "grad_norm": 0.03649129718542099, "learning_rate": 0.00016952158062919124, "loss": 0.3221, "step": 11710 }, { "epoch": 0.9487200259235256, "grad_norm": 0.03539815917611122, "learning_rate": 0.0001695170799765966, "loss": 0.3662, "step": 11711 }, { "epoch": 0.948801036941024, "grad_norm": 0.0350770466029644, "learning_rate": 0.000169512579324002, "loss": 0.3065, "step": 11712 }, { "epoch": 0.9488820479585224, "grad_norm": 0.03900324925780296, "learning_rate": 0.00016950807867140738, "loss": 0.3552, "step": 11713 }, { "epoch": 0.9489630589760207, "grad_norm": 0.03432242199778557, "learning_rate": 0.00016950357801881274, "loss": 0.314, "step": 11714 }, { "epoch": 0.9490440699935191, "grad_norm": 0.03255319967865944, "learning_rate": 0.0001694990773662181, "loss": 0.2936, "step": 11715 }, { "epoch": 0.9491250810110174, "grad_norm": 0.03342980891466141, "learning_rate": 0.00016949457671362349, "loss": 0.3205, "step": 11716 }, { "epoch": 0.9492060920285159, "grad_norm": 0.03174217417836189, "learning_rate": 0.00016949007606102885, "loss": 0.3155, "step": 11717 }, { "epoch": 0.9492871030460143, "grad_norm": 0.04021428897976875, "learning_rate": 0.00016948557540843423, "loss": 0.3229, "step": 11718 }, { "epoch": 0.9493681140635126, "grad_norm": 0.03540870547294617, "learning_rate": 0.00016948107475583962, "loss": 0.3245, "step": 11719 }, { "epoch": 0.949449125081011, "grad_norm": 0.039765365421772, "learning_rate": 0.00016947657410324498, "loss": 0.3519, "step": 11720 }, { "epoch": 0.9495301360985094, "grad_norm": 0.039002127945423126, "learning_rate": 0.00016947207345065034, "loss": 0.372, "step": 11721 }, { "epoch": 0.9496111471160078, "grad_norm": 0.031765103340148926, "learning_rate": 0.00016946757279805573, "loss": 0.3012, "step": 11722 }, { "epoch": 0.9496921581335062, "grad_norm": 0.031993210315704346, "learning_rate": 0.00016946307214546111, "loss": 0.3037, "step": 11723 }, { "epoch": 0.9497731691510045, "grad_norm": 0.03384470194578171, "learning_rate": 0.00016945857149286647, "loss": 0.3278, "step": 11724 }, { "epoch": 0.9498541801685029, "grad_norm": 0.03596783056855202, "learning_rate": 0.00016945407084027186, "loss": 0.331, "step": 11725 }, { "epoch": 0.9499351911860013, "grad_norm": 0.029281174764037132, "learning_rate": 0.00016944957018767722, "loss": 0.2366, "step": 11726 }, { "epoch": 0.9500162022034997, "grad_norm": 0.03412824124097824, "learning_rate": 0.00016944506953508258, "loss": 0.3133, "step": 11727 }, { "epoch": 0.950097213220998, "grad_norm": 0.03497035428881645, "learning_rate": 0.00016944056888248797, "loss": 0.3024, "step": 11728 }, { "epoch": 0.9501782242384964, "grad_norm": 0.03496416285634041, "learning_rate": 0.00016943606822989336, "loss": 0.3502, "step": 11729 }, { "epoch": 0.9502592352559948, "grad_norm": 0.036330707371234894, "learning_rate": 0.00016943156757729872, "loss": 0.3328, "step": 11730 }, { "epoch": 0.9503402462734932, "grad_norm": 0.03646592050790787, "learning_rate": 0.0001694270669247041, "loss": 0.3299, "step": 11731 }, { "epoch": 0.9504212572909916, "grad_norm": 0.033516980707645416, "learning_rate": 0.00016942256627210946, "loss": 0.3106, "step": 11732 }, { "epoch": 0.9505022683084899, "grad_norm": 0.03749392181634903, "learning_rate": 0.00016941806561951482, "loss": 0.3687, "step": 11733 }, { "epoch": 0.9505832793259883, "grad_norm": 0.03816765919327736, "learning_rate": 0.0001694135649669202, "loss": 0.3196, "step": 11734 }, { "epoch": 0.9506642903434868, "grad_norm": 0.032571665942668915, "learning_rate": 0.0001694090643143256, "loss": 0.2962, "step": 11735 }, { "epoch": 0.9507453013609851, "grad_norm": 0.034489717334508896, "learning_rate": 0.00016940456366173096, "loss": 0.3183, "step": 11736 }, { "epoch": 0.9508263123784835, "grad_norm": 0.03572354093194008, "learning_rate": 0.00016940006300913634, "loss": 0.3061, "step": 11737 }, { "epoch": 0.9509073233959818, "grad_norm": 0.034814946353435516, "learning_rate": 0.0001693955623565417, "loss": 0.3264, "step": 11738 }, { "epoch": 0.9509883344134802, "grad_norm": 0.03461809828877449, "learning_rate": 0.00016939106170394706, "loss": 0.3138, "step": 11739 }, { "epoch": 0.9510693454309787, "grad_norm": 0.03412418067455292, "learning_rate": 0.00016938656105135245, "loss": 0.3284, "step": 11740 }, { "epoch": 0.951150356448477, "grad_norm": 0.03454767167568207, "learning_rate": 0.00016938206039875784, "loss": 0.3381, "step": 11741 }, { "epoch": 0.9512313674659754, "grad_norm": 0.03485751524567604, "learning_rate": 0.0001693775597461632, "loss": 0.3043, "step": 11742 }, { "epoch": 0.9513123784834737, "grad_norm": 0.03471321612596512, "learning_rate": 0.00016937305909356859, "loss": 0.3475, "step": 11743 }, { "epoch": 0.9513933895009722, "grad_norm": 0.03523659333586693, "learning_rate": 0.00016936855844097395, "loss": 0.3372, "step": 11744 }, { "epoch": 0.9514744005184705, "grad_norm": 0.0338854119181633, "learning_rate": 0.0001693640577883793, "loss": 0.311, "step": 11745 }, { "epoch": 0.9515554115359689, "grad_norm": 0.0323248915374279, "learning_rate": 0.00016935955713578472, "loss": 0.3268, "step": 11746 }, { "epoch": 0.9516364225534673, "grad_norm": 0.03338275104761124, "learning_rate": 0.00016935505648319008, "loss": 0.2595, "step": 11747 }, { "epoch": 0.9517174335709656, "grad_norm": 0.04064786061644554, "learning_rate": 0.00016935055583059544, "loss": 0.3568, "step": 11748 }, { "epoch": 0.9517984445884641, "grad_norm": 0.03257180005311966, "learning_rate": 0.00016934605517800083, "loss": 0.2703, "step": 11749 }, { "epoch": 0.9518794556059624, "grad_norm": 0.03152458369731903, "learning_rate": 0.0001693415545254062, "loss": 0.2934, "step": 11750 }, { "epoch": 0.9519604666234608, "grad_norm": 0.03133175149559975, "learning_rate": 0.00016933705387281155, "loss": 0.2884, "step": 11751 }, { "epoch": 0.9520414776409591, "grad_norm": 0.035321589559316635, "learning_rate": 0.00016933255322021696, "loss": 0.3162, "step": 11752 }, { "epoch": 0.9521224886584575, "grad_norm": 0.04080462455749512, "learning_rate": 0.00016932805256762232, "loss": 0.354, "step": 11753 }, { "epoch": 0.952203499675956, "grad_norm": 0.0328151099383831, "learning_rate": 0.00016932355191502768, "loss": 0.3083, "step": 11754 }, { "epoch": 0.9522845106934543, "grad_norm": 0.03481130301952362, "learning_rate": 0.00016931905126243307, "loss": 0.3128, "step": 11755 }, { "epoch": 0.9523655217109527, "grad_norm": 0.03485405072569847, "learning_rate": 0.00016931455060983843, "loss": 0.3458, "step": 11756 }, { "epoch": 0.952446532728451, "grad_norm": 0.03622061014175415, "learning_rate": 0.0001693100499572438, "loss": 0.3733, "step": 11757 }, { "epoch": 0.9525275437459495, "grad_norm": 0.040508657693862915, "learning_rate": 0.0001693055493046492, "loss": 0.3509, "step": 11758 }, { "epoch": 0.9526085547634479, "grad_norm": 0.03345491737127304, "learning_rate": 0.00016930104865205456, "loss": 0.3284, "step": 11759 }, { "epoch": 0.9526895657809462, "grad_norm": 0.03780923783779144, "learning_rate": 0.00016929654799945992, "loss": 0.3215, "step": 11760 }, { "epoch": 0.9527705767984446, "grad_norm": 0.02839631587266922, "learning_rate": 0.0001692920473468653, "loss": 0.2715, "step": 11761 }, { "epoch": 0.9528515878159429, "grad_norm": 0.0344814732670784, "learning_rate": 0.00016928754669427067, "loss": 0.3353, "step": 11762 }, { "epoch": 0.9529325988334414, "grad_norm": 0.0311529953032732, "learning_rate": 0.00016928304604167603, "loss": 0.2887, "step": 11763 }, { "epoch": 0.9530136098509397, "grad_norm": 0.042994722723960876, "learning_rate": 0.00016927854538908145, "loss": 0.4151, "step": 11764 }, { "epoch": 0.9530946208684381, "grad_norm": 0.030827734619379044, "learning_rate": 0.0001692740447364868, "loss": 0.3259, "step": 11765 }, { "epoch": 0.9531756318859365, "grad_norm": 0.03870192915201187, "learning_rate": 0.00016926954408389217, "loss": 0.3981, "step": 11766 }, { "epoch": 0.9532566429034348, "grad_norm": 0.033833280205726624, "learning_rate": 0.00016926504343129755, "loss": 0.3258, "step": 11767 }, { "epoch": 0.9533376539209333, "grad_norm": 0.03414291888475418, "learning_rate": 0.0001692605427787029, "loss": 0.3437, "step": 11768 }, { "epoch": 0.9534186649384316, "grad_norm": 0.03322215750813484, "learning_rate": 0.00016925604212610827, "loss": 0.3217, "step": 11769 }, { "epoch": 0.95349967595593, "grad_norm": 0.0363927036523819, "learning_rate": 0.0001692515414735137, "loss": 0.322, "step": 11770 }, { "epoch": 0.9535806869734283, "grad_norm": 0.03173397481441498, "learning_rate": 0.00016924704082091905, "loss": 0.311, "step": 11771 }, { "epoch": 0.9536616979909268, "grad_norm": 0.03396917134523392, "learning_rate": 0.0001692425401683244, "loss": 0.3083, "step": 11772 }, { "epoch": 0.9537427090084252, "grad_norm": 0.03490575775504112, "learning_rate": 0.0001692380395157298, "loss": 0.3028, "step": 11773 }, { "epoch": 0.9538237200259235, "grad_norm": 0.03380593657493591, "learning_rate": 0.00016923353886313515, "loss": 0.3213, "step": 11774 }, { "epoch": 0.9539047310434219, "grad_norm": 0.03855833038687706, "learning_rate": 0.00016922903821054054, "loss": 0.3698, "step": 11775 }, { "epoch": 0.9539857420609202, "grad_norm": 0.03649449720978737, "learning_rate": 0.00016922453755794593, "loss": 0.3911, "step": 11776 }, { "epoch": 0.9540667530784187, "grad_norm": 0.032006874680519104, "learning_rate": 0.0001692200369053513, "loss": 0.3117, "step": 11777 }, { "epoch": 0.9541477640959171, "grad_norm": 0.037732262164354324, "learning_rate": 0.00016921553625275665, "loss": 0.328, "step": 11778 }, { "epoch": 0.9542287751134154, "grad_norm": 0.034876108169555664, "learning_rate": 0.00016921103560016204, "loss": 0.3054, "step": 11779 }, { "epoch": 0.9543097861309138, "grad_norm": 0.03947483375668526, "learning_rate": 0.0001692065349475674, "loss": 0.3248, "step": 11780 }, { "epoch": 0.9543907971484121, "grad_norm": 0.033376701176166534, "learning_rate": 0.00016920203429497278, "loss": 0.2932, "step": 11781 }, { "epoch": 0.9544718081659106, "grad_norm": 0.03565700352191925, "learning_rate": 0.00016919753364237817, "loss": 0.3275, "step": 11782 }, { "epoch": 0.954552819183409, "grad_norm": 0.036495938897132874, "learning_rate": 0.00016919303298978353, "loss": 0.2915, "step": 11783 }, { "epoch": 0.9546338302009073, "grad_norm": 0.036930181086063385, "learning_rate": 0.0001691885323371889, "loss": 0.3189, "step": 11784 }, { "epoch": 0.9547148412184057, "grad_norm": 0.03827503323554993, "learning_rate": 0.00016918403168459428, "loss": 0.321, "step": 11785 }, { "epoch": 0.9547958522359041, "grad_norm": 0.03679677098989487, "learning_rate": 0.00016917953103199964, "loss": 0.2892, "step": 11786 }, { "epoch": 0.9548768632534025, "grad_norm": 0.03638848662376404, "learning_rate": 0.00016917503037940502, "loss": 0.3257, "step": 11787 }, { "epoch": 0.9549578742709008, "grad_norm": 0.03703775629401207, "learning_rate": 0.0001691705297268104, "loss": 0.3547, "step": 11788 }, { "epoch": 0.9550388852883992, "grad_norm": 0.03831619769334793, "learning_rate": 0.00016916602907421577, "loss": 0.3986, "step": 11789 }, { "epoch": 0.9551198963058976, "grad_norm": 0.03259553760290146, "learning_rate": 0.00016916152842162113, "loss": 0.3045, "step": 11790 }, { "epoch": 0.955200907323396, "grad_norm": 0.03698387369513512, "learning_rate": 0.00016915702776902652, "loss": 0.2949, "step": 11791 }, { "epoch": 0.9552819183408944, "grad_norm": 0.03987254947423935, "learning_rate": 0.00016915252711643188, "loss": 0.3408, "step": 11792 }, { "epoch": 0.9553629293583927, "grad_norm": 0.0366261750459671, "learning_rate": 0.00016914802646383727, "loss": 0.3705, "step": 11793 }, { "epoch": 0.9554439403758911, "grad_norm": 0.03159358724951744, "learning_rate": 0.00016914352581124265, "loss": 0.2974, "step": 11794 }, { "epoch": 0.9555249513933896, "grad_norm": 0.03561757504940033, "learning_rate": 0.00016913902515864801, "loss": 0.3321, "step": 11795 }, { "epoch": 0.9556059624108879, "grad_norm": 0.030125150457024574, "learning_rate": 0.00016913452450605337, "loss": 0.2685, "step": 11796 }, { "epoch": 0.9556869734283863, "grad_norm": 0.04590509086847305, "learning_rate": 0.00016913002385345876, "loss": 0.3406, "step": 11797 }, { "epoch": 0.9557679844458846, "grad_norm": 0.03866960480809212, "learning_rate": 0.00016912552320086415, "loss": 0.3871, "step": 11798 }, { "epoch": 0.955848995463383, "grad_norm": 0.03353342413902283, "learning_rate": 0.0001691210225482695, "loss": 0.3109, "step": 11799 }, { "epoch": 0.9559300064808814, "grad_norm": 0.03562798351049423, "learning_rate": 0.0001691165218956749, "loss": 0.3502, "step": 11800 }, { "epoch": 0.9560110174983798, "grad_norm": 0.03377343341708183, "learning_rate": 0.00016911202124308026, "loss": 0.3315, "step": 11801 }, { "epoch": 0.9560920285158782, "grad_norm": 0.034913334995508194, "learning_rate": 0.00016910752059048562, "loss": 0.3222, "step": 11802 }, { "epoch": 0.9561730395333765, "grad_norm": 0.03448820114135742, "learning_rate": 0.000169103019937891, "loss": 0.3303, "step": 11803 }, { "epoch": 0.9562540505508749, "grad_norm": 0.028959903866052628, "learning_rate": 0.0001690985192852964, "loss": 0.2945, "step": 11804 }, { "epoch": 0.9563350615683733, "grad_norm": 0.03772129863500595, "learning_rate": 0.00016909401863270175, "loss": 0.3726, "step": 11805 }, { "epoch": 0.9564160725858717, "grad_norm": 0.03504560515284538, "learning_rate": 0.00016908951798010714, "loss": 0.3372, "step": 11806 }, { "epoch": 0.95649708360337, "grad_norm": 0.03733687847852707, "learning_rate": 0.0001690850173275125, "loss": 0.3983, "step": 11807 }, { "epoch": 0.9565780946208684, "grad_norm": 0.03707200288772583, "learning_rate": 0.00016908051667491786, "loss": 0.3513, "step": 11808 }, { "epoch": 0.9566591056383669, "grad_norm": 0.03620739281177521, "learning_rate": 0.00016907601602232324, "loss": 0.3467, "step": 11809 }, { "epoch": 0.9567401166558652, "grad_norm": 0.03168788552284241, "learning_rate": 0.00016907151536972863, "loss": 0.2961, "step": 11810 }, { "epoch": 0.9568211276733636, "grad_norm": 0.03348376229405403, "learning_rate": 0.000169067014717134, "loss": 0.2898, "step": 11811 }, { "epoch": 0.9569021386908619, "grad_norm": 0.03537198528647423, "learning_rate": 0.00016906251406453938, "loss": 0.3066, "step": 11812 }, { "epoch": 0.9569831497083603, "grad_norm": 0.037128619849681854, "learning_rate": 0.00016905801341194474, "loss": 0.3561, "step": 11813 }, { "epoch": 0.9570641607258588, "grad_norm": 0.03736058622598648, "learning_rate": 0.0001690535127593501, "loss": 0.37, "step": 11814 }, { "epoch": 0.9571451717433571, "grad_norm": 0.033690933138132095, "learning_rate": 0.00016904901210675549, "loss": 0.3121, "step": 11815 }, { "epoch": 0.9572261827608555, "grad_norm": 0.0378740020096302, "learning_rate": 0.00016904451145416087, "loss": 0.3614, "step": 11816 }, { "epoch": 0.9573071937783538, "grad_norm": 0.033114880323410034, "learning_rate": 0.00016904001080156623, "loss": 0.2907, "step": 11817 }, { "epoch": 0.9573882047958522, "grad_norm": 0.04144694283604622, "learning_rate": 0.00016903551014897162, "loss": 0.3657, "step": 11818 }, { "epoch": 0.9574692158133506, "grad_norm": 0.035634271800518036, "learning_rate": 0.00016903100949637698, "loss": 0.3649, "step": 11819 }, { "epoch": 0.957550226830849, "grad_norm": 0.03484927490353584, "learning_rate": 0.00016902650884378234, "loss": 0.344, "step": 11820 }, { "epoch": 0.9576312378483474, "grad_norm": 0.03607147932052612, "learning_rate": 0.00016902200819118773, "loss": 0.3054, "step": 11821 }, { "epoch": 0.9577122488658457, "grad_norm": 0.030834076926112175, "learning_rate": 0.00016901750753859311, "loss": 0.3091, "step": 11822 }, { "epoch": 0.9577932598833442, "grad_norm": 0.03185582533478737, "learning_rate": 0.00016901300688599847, "loss": 0.28, "step": 11823 }, { "epoch": 0.9578742709008425, "grad_norm": 0.038030751049518585, "learning_rate": 0.00016900850623340386, "loss": 0.2906, "step": 11824 }, { "epoch": 0.9579552819183409, "grad_norm": 0.034883469343185425, "learning_rate": 0.00016900400558080922, "loss": 0.339, "step": 11825 }, { "epoch": 0.9580362929358393, "grad_norm": 0.04374323785305023, "learning_rate": 0.00016899950492821458, "loss": 0.3658, "step": 11826 }, { "epoch": 0.9581173039533376, "grad_norm": 0.03804289177060127, "learning_rate": 0.00016899500427562, "loss": 0.3321, "step": 11827 }, { "epoch": 0.9581983149708361, "grad_norm": 0.033111900091171265, "learning_rate": 0.00016899050362302536, "loss": 0.3215, "step": 11828 }, { "epoch": 0.9582793259883344, "grad_norm": 0.033424705266952515, "learning_rate": 0.00016898600297043072, "loss": 0.3278, "step": 11829 }, { "epoch": 0.9583603370058328, "grad_norm": 0.032798223197460175, "learning_rate": 0.0001689815023178361, "loss": 0.2936, "step": 11830 }, { "epoch": 0.9584413480233311, "grad_norm": 0.03180072829127312, "learning_rate": 0.00016897700166524146, "loss": 0.3061, "step": 11831 }, { "epoch": 0.9585223590408296, "grad_norm": 0.030326934531331062, "learning_rate": 0.00016897250101264682, "loss": 0.3027, "step": 11832 }, { "epoch": 0.958603370058328, "grad_norm": 0.037974853068590164, "learning_rate": 0.00016896800036005224, "loss": 0.309, "step": 11833 }, { "epoch": 0.9586843810758263, "grad_norm": 0.03329726681113243, "learning_rate": 0.0001689634997074576, "loss": 0.2979, "step": 11834 }, { "epoch": 0.9587653920933247, "grad_norm": 0.03679864853620529, "learning_rate": 0.00016895899905486296, "loss": 0.3325, "step": 11835 }, { "epoch": 0.958846403110823, "grad_norm": 0.03249267116189003, "learning_rate": 0.00016895449840226834, "loss": 0.2676, "step": 11836 }, { "epoch": 0.9589274141283215, "grad_norm": 0.035779163241386414, "learning_rate": 0.0001689499977496737, "loss": 0.3245, "step": 11837 }, { "epoch": 0.9590084251458199, "grad_norm": 0.038208309561014175, "learning_rate": 0.00016894549709707907, "loss": 0.3432, "step": 11838 }, { "epoch": 0.9590894361633182, "grad_norm": 0.034464698284864426, "learning_rate": 0.00016894099644448448, "loss": 0.2977, "step": 11839 }, { "epoch": 0.9591704471808166, "grad_norm": 0.03685634583234787, "learning_rate": 0.00016893649579188984, "loss": 0.3223, "step": 11840 }, { "epoch": 0.9592514581983149, "grad_norm": 0.036837100982666016, "learning_rate": 0.0001689319951392952, "loss": 0.2679, "step": 11841 }, { "epoch": 0.9593324692158134, "grad_norm": 0.03724145144224167, "learning_rate": 0.0001689274944867006, "loss": 0.2949, "step": 11842 }, { "epoch": 0.9594134802333117, "grad_norm": 0.03809111937880516, "learning_rate": 0.00016892299383410595, "loss": 0.3518, "step": 11843 }, { "epoch": 0.9594944912508101, "grad_norm": 0.03469406068325043, "learning_rate": 0.0001689184931815113, "loss": 0.3644, "step": 11844 }, { "epoch": 0.9595755022683085, "grad_norm": 0.03754259645938873, "learning_rate": 0.00016891399252891672, "loss": 0.3545, "step": 11845 }, { "epoch": 0.9596565132858069, "grad_norm": 0.03645508363842964, "learning_rate": 0.00016890949187632208, "loss": 0.3223, "step": 11846 }, { "epoch": 0.9597375243033053, "grad_norm": 0.03522754833102226, "learning_rate": 0.00016890499122372744, "loss": 0.353, "step": 11847 }, { "epoch": 0.9598185353208036, "grad_norm": 0.036083534359931946, "learning_rate": 0.00016890049057113283, "loss": 0.3477, "step": 11848 }, { "epoch": 0.959899546338302, "grad_norm": 0.03646800294518471, "learning_rate": 0.0001688959899185382, "loss": 0.3192, "step": 11849 }, { "epoch": 0.9599805573558003, "grad_norm": 0.03613121807575226, "learning_rate": 0.00016889148926594358, "loss": 0.3567, "step": 11850 }, { "epoch": 0.9600615683732988, "grad_norm": 0.03716088458895683, "learning_rate": 0.00016888698861334896, "loss": 0.3071, "step": 11851 }, { "epoch": 0.9601425793907972, "grad_norm": 0.0373094417154789, "learning_rate": 0.00016888248796075432, "loss": 0.333, "step": 11852 }, { "epoch": 0.9602235904082955, "grad_norm": 0.034615617245435715, "learning_rate": 0.00016887798730815968, "loss": 0.3143, "step": 11853 }, { "epoch": 0.9603046014257939, "grad_norm": 0.041054386645555496, "learning_rate": 0.00016887348665556507, "loss": 0.3148, "step": 11854 }, { "epoch": 0.9603856124432922, "grad_norm": 0.03472714498639107, "learning_rate": 0.00016886898600297043, "loss": 0.3177, "step": 11855 }, { "epoch": 0.9604666234607907, "grad_norm": 0.032427918165922165, "learning_rate": 0.00016886448535037582, "loss": 0.279, "step": 11856 }, { "epoch": 0.9605476344782891, "grad_norm": 0.036566201597452164, "learning_rate": 0.0001688599846977812, "loss": 0.3376, "step": 11857 }, { "epoch": 0.9606286454957874, "grad_norm": 0.030958328396081924, "learning_rate": 0.00016885548404518656, "loss": 0.3252, "step": 11858 }, { "epoch": 0.9607096565132858, "grad_norm": 0.02840495854616165, "learning_rate": 0.00016885098339259192, "loss": 0.2626, "step": 11859 }, { "epoch": 0.9607906675307842, "grad_norm": 0.03243676573038101, "learning_rate": 0.0001688464827399973, "loss": 0.3303, "step": 11860 }, { "epoch": 0.9608716785482826, "grad_norm": 0.034418750554323196, "learning_rate": 0.00016884198208740267, "loss": 0.3564, "step": 11861 }, { "epoch": 0.960952689565781, "grad_norm": 0.038227930665016174, "learning_rate": 0.00016883748143480806, "loss": 0.3468, "step": 11862 }, { "epoch": 0.9610337005832793, "grad_norm": 0.034565817564725876, "learning_rate": 0.00016883298078221345, "loss": 0.2818, "step": 11863 }, { "epoch": 0.9611147116007777, "grad_norm": 0.03184519335627556, "learning_rate": 0.0001688284801296188, "loss": 0.3059, "step": 11864 }, { "epoch": 0.9611957226182761, "grad_norm": 0.03285719081759453, "learning_rate": 0.00016882397947702417, "loss": 0.3113, "step": 11865 }, { "epoch": 0.9612767336357745, "grad_norm": 0.037567757070064545, "learning_rate": 0.00016881947882442955, "loss": 0.2988, "step": 11866 }, { "epoch": 0.9613577446532728, "grad_norm": 0.039061255753040314, "learning_rate": 0.0001688149781718349, "loss": 0.323, "step": 11867 }, { "epoch": 0.9614387556707712, "grad_norm": 0.03476432338356972, "learning_rate": 0.0001688104775192403, "loss": 0.347, "step": 11868 }, { "epoch": 0.9615197666882696, "grad_norm": 0.032409682869911194, "learning_rate": 0.0001688059768666457, "loss": 0.307, "step": 11869 }, { "epoch": 0.961600777705768, "grad_norm": 0.03706350550055504, "learning_rate": 0.00016880147621405105, "loss": 0.3321, "step": 11870 }, { "epoch": 0.9616817887232664, "grad_norm": 0.03343446925282478, "learning_rate": 0.0001687969755614564, "loss": 0.3036, "step": 11871 }, { "epoch": 0.9617627997407647, "grad_norm": 0.031622350215911865, "learning_rate": 0.0001687924749088618, "loss": 0.3016, "step": 11872 }, { "epoch": 0.9618438107582631, "grad_norm": 0.032086245715618134, "learning_rate": 0.00016878797425626715, "loss": 0.3114, "step": 11873 }, { "epoch": 0.9619248217757616, "grad_norm": 0.03357468545436859, "learning_rate": 0.00016878347360367254, "loss": 0.309, "step": 11874 }, { "epoch": 0.9620058327932599, "grad_norm": 0.03183509781956673, "learning_rate": 0.00016877897295107793, "loss": 0.3006, "step": 11875 }, { "epoch": 0.9620868438107583, "grad_norm": 0.03746958449482918, "learning_rate": 0.0001687744722984833, "loss": 0.3443, "step": 11876 }, { "epoch": 0.9621678548282566, "grad_norm": 0.03041190281510353, "learning_rate": 0.00016876997164588865, "loss": 0.3113, "step": 11877 }, { "epoch": 0.962248865845755, "grad_norm": 0.03378698229789734, "learning_rate": 0.00016876547099329404, "loss": 0.3163, "step": 11878 }, { "epoch": 0.9623298768632534, "grad_norm": 0.03692770004272461, "learning_rate": 0.00016876097034069942, "loss": 0.2977, "step": 11879 }, { "epoch": 0.9624108878807518, "grad_norm": 0.038222331553697586, "learning_rate": 0.00016875646968810478, "loss": 0.3146, "step": 11880 }, { "epoch": 0.9624918988982502, "grad_norm": 0.04049944877624512, "learning_rate": 0.00016875196903551017, "loss": 0.3635, "step": 11881 }, { "epoch": 0.9625729099157485, "grad_norm": 0.03273358568549156, "learning_rate": 0.00016874746838291553, "loss": 0.3228, "step": 11882 }, { "epoch": 0.962653920933247, "grad_norm": 0.03491583839058876, "learning_rate": 0.0001687429677303209, "loss": 0.2915, "step": 11883 }, { "epoch": 0.9627349319507453, "grad_norm": 0.037491586059331894, "learning_rate": 0.00016873846707772628, "loss": 0.3297, "step": 11884 }, { "epoch": 0.9628159429682437, "grad_norm": 0.03505074232816696, "learning_rate": 0.00016873396642513167, "loss": 0.3111, "step": 11885 }, { "epoch": 0.962896953985742, "grad_norm": 0.036803267896175385, "learning_rate": 0.00016872946577253703, "loss": 0.3451, "step": 11886 }, { "epoch": 0.9629779650032404, "grad_norm": 0.03782041743397713, "learning_rate": 0.0001687249651199424, "loss": 0.3575, "step": 11887 }, { "epoch": 0.9630589760207389, "grad_norm": 0.0339006632566452, "learning_rate": 0.00016872046446734777, "loss": 0.2886, "step": 11888 }, { "epoch": 0.9631399870382372, "grad_norm": 0.038778334856033325, "learning_rate": 0.00016871596381475313, "loss": 0.3627, "step": 11889 }, { "epoch": 0.9632209980557356, "grad_norm": 0.033467769622802734, "learning_rate": 0.00016871146316215852, "loss": 0.3282, "step": 11890 }, { "epoch": 0.9633020090732339, "grad_norm": 0.04024284705519676, "learning_rate": 0.0001687069625095639, "loss": 0.3378, "step": 11891 }, { "epoch": 0.9633830200907323, "grad_norm": 0.032425012439489365, "learning_rate": 0.00016870246185696927, "loss": 0.318, "step": 11892 }, { "epoch": 0.9634640311082308, "grad_norm": 0.033856865018606186, "learning_rate": 0.00016869796120437465, "loss": 0.3001, "step": 11893 }, { "epoch": 0.9635450421257291, "grad_norm": 0.0412677526473999, "learning_rate": 0.00016869346055178001, "loss": 0.325, "step": 11894 }, { "epoch": 0.9636260531432275, "grad_norm": 0.037516701966524124, "learning_rate": 0.00016868895989918537, "loss": 0.3859, "step": 11895 }, { "epoch": 0.9637070641607258, "grad_norm": 0.03334256261587143, "learning_rate": 0.00016868445924659076, "loss": 0.3469, "step": 11896 }, { "epoch": 0.9637880751782243, "grad_norm": 0.0391136072576046, "learning_rate": 0.00016867995859399615, "loss": 0.3106, "step": 11897 }, { "epoch": 0.9638690861957226, "grad_norm": 0.03461690992116928, "learning_rate": 0.0001686754579414015, "loss": 0.3162, "step": 11898 }, { "epoch": 0.963950097213221, "grad_norm": 0.03258664906024933, "learning_rate": 0.0001686709572888069, "loss": 0.3114, "step": 11899 }, { "epoch": 0.9640311082307194, "grad_norm": 0.03400241583585739, "learning_rate": 0.00016866645663621226, "loss": 0.3152, "step": 11900 }, { "epoch": 0.9641121192482177, "grad_norm": 0.034580402076244354, "learning_rate": 0.00016866195598361762, "loss": 0.3116, "step": 11901 }, { "epoch": 0.9641931302657162, "grad_norm": 0.032911915332078934, "learning_rate": 0.000168657455331023, "loss": 0.326, "step": 11902 }, { "epoch": 0.9642741412832145, "grad_norm": 0.035322003066539764, "learning_rate": 0.0001686529546784284, "loss": 0.3314, "step": 11903 }, { "epoch": 0.9643551523007129, "grad_norm": 0.044158484786748886, "learning_rate": 0.00016864845402583375, "loss": 0.3966, "step": 11904 }, { "epoch": 0.9644361633182112, "grad_norm": 0.03736729547381401, "learning_rate": 0.00016864395337323914, "loss": 0.2828, "step": 11905 }, { "epoch": 0.9645171743357096, "grad_norm": 0.03226647526025772, "learning_rate": 0.0001686394527206445, "loss": 0.3046, "step": 11906 }, { "epoch": 0.9645981853532081, "grad_norm": 0.03562768176198006, "learning_rate": 0.00016863495206804986, "loss": 0.3168, "step": 11907 }, { "epoch": 0.9646791963707064, "grad_norm": 0.03386708348989487, "learning_rate": 0.00016863045141545527, "loss": 0.3114, "step": 11908 }, { "epoch": 0.9647602073882048, "grad_norm": 0.04202236980199814, "learning_rate": 0.00016862595076286063, "loss": 0.3351, "step": 11909 }, { "epoch": 0.9648412184057031, "grad_norm": 0.0363929346203804, "learning_rate": 0.000168621450110266, "loss": 0.3274, "step": 11910 }, { "epoch": 0.9649222294232016, "grad_norm": 0.041646115481853485, "learning_rate": 0.00016861694945767138, "loss": 0.3577, "step": 11911 }, { "epoch": 0.9650032404407, "grad_norm": 0.03489062562584877, "learning_rate": 0.00016861244880507674, "loss": 0.3014, "step": 11912 }, { "epoch": 0.9650842514581983, "grad_norm": 0.03679816424846649, "learning_rate": 0.0001686079481524821, "loss": 0.3269, "step": 11913 }, { "epoch": 0.9651652624756967, "grad_norm": 0.030343173071742058, "learning_rate": 0.0001686034474998875, "loss": 0.2979, "step": 11914 }, { "epoch": 0.965246273493195, "grad_norm": 0.03204835578799248, "learning_rate": 0.00016859894684729287, "loss": 0.2832, "step": 11915 }, { "epoch": 0.9653272845106935, "grad_norm": 0.03680717572569847, "learning_rate": 0.00016859444619469823, "loss": 0.3436, "step": 11916 }, { "epoch": 0.9654082955281919, "grad_norm": 0.0416983887553215, "learning_rate": 0.00016858994554210362, "loss": 0.3516, "step": 11917 }, { "epoch": 0.9654893065456902, "grad_norm": 0.0383131317794323, "learning_rate": 0.00016858544488950898, "loss": 0.3122, "step": 11918 }, { "epoch": 0.9655703175631886, "grad_norm": 0.03952203318476677, "learning_rate": 0.00016858094423691434, "loss": 0.3272, "step": 11919 }, { "epoch": 0.9656513285806869, "grad_norm": 0.03892749920487404, "learning_rate": 0.00016857644358431975, "loss": 0.3626, "step": 11920 }, { "epoch": 0.9657323395981854, "grad_norm": 0.04064110666513443, "learning_rate": 0.00016857194293172511, "loss": 0.347, "step": 11921 }, { "epoch": 0.9658133506156837, "grad_norm": 0.03372594714164734, "learning_rate": 0.00016856744227913047, "loss": 0.294, "step": 11922 }, { "epoch": 0.9658943616331821, "grad_norm": 0.036753252148628235, "learning_rate": 0.00016856294162653586, "loss": 0.3506, "step": 11923 }, { "epoch": 0.9659753726506805, "grad_norm": 0.035451460629701614, "learning_rate": 0.00016855844097394122, "loss": 0.327, "step": 11924 }, { "epoch": 0.9660563836681789, "grad_norm": 0.038048360496759415, "learning_rate": 0.00016855394032134658, "loss": 0.3112, "step": 11925 }, { "epoch": 0.9661373946856773, "grad_norm": 0.032932937145233154, "learning_rate": 0.000168549439668752, "loss": 0.3371, "step": 11926 }, { "epoch": 0.9662184057031756, "grad_norm": 0.034424230456352234, "learning_rate": 0.00016854493901615736, "loss": 0.312, "step": 11927 }, { "epoch": 0.966299416720674, "grad_norm": 0.03948157653212547, "learning_rate": 0.00016854043836356272, "loss": 0.3457, "step": 11928 }, { "epoch": 0.9663804277381723, "grad_norm": 0.03489063307642937, "learning_rate": 0.0001685359377109681, "loss": 0.3272, "step": 11929 }, { "epoch": 0.9664614387556708, "grad_norm": 0.038051407784223557, "learning_rate": 0.00016853143705837346, "loss": 0.3468, "step": 11930 }, { "epoch": 0.9665424497731692, "grad_norm": 0.036160316318273544, "learning_rate": 0.00016852693640577885, "loss": 0.3286, "step": 11931 }, { "epoch": 0.9666234607906675, "grad_norm": 0.043658457696437836, "learning_rate": 0.00016852243575318424, "loss": 0.3146, "step": 11932 }, { "epoch": 0.9667044718081659, "grad_norm": 0.0352136455476284, "learning_rate": 0.0001685179351005896, "loss": 0.2906, "step": 11933 }, { "epoch": 0.9667854828256643, "grad_norm": 0.03503730893135071, "learning_rate": 0.00016851343444799496, "loss": 0.3469, "step": 11934 }, { "epoch": 0.9668664938431627, "grad_norm": 0.03400106355547905, "learning_rate": 0.00016850893379540035, "loss": 0.2711, "step": 11935 }, { "epoch": 0.9669475048606611, "grad_norm": 0.03846592828631401, "learning_rate": 0.0001685044331428057, "loss": 0.343, "step": 11936 }, { "epoch": 0.9670285158781594, "grad_norm": 0.037533823400735855, "learning_rate": 0.0001684999324902111, "loss": 0.3429, "step": 11937 }, { "epoch": 0.9671095268956578, "grad_norm": 0.03524777293205261, "learning_rate": 0.00016849543183761648, "loss": 0.3448, "step": 11938 }, { "epoch": 0.9671905379131562, "grad_norm": 0.0354623906314373, "learning_rate": 0.00016849093118502184, "loss": 0.3177, "step": 11939 }, { "epoch": 0.9672715489306546, "grad_norm": 0.03387176990509033, "learning_rate": 0.0001684864305324272, "loss": 0.3028, "step": 11940 }, { "epoch": 0.967352559948153, "grad_norm": 0.033984046429395676, "learning_rate": 0.0001684819298798326, "loss": 0.3333, "step": 11941 }, { "epoch": 0.9674335709656513, "grad_norm": 0.034461379051208496, "learning_rate": 0.00016847742922723795, "loss": 0.2981, "step": 11942 }, { "epoch": 0.9675145819831497, "grad_norm": 0.0331692174077034, "learning_rate": 0.00016847292857464333, "loss": 0.3362, "step": 11943 }, { "epoch": 0.9675955930006481, "grad_norm": 0.030690953135490417, "learning_rate": 0.00016846842792204872, "loss": 0.2703, "step": 11944 }, { "epoch": 0.9676766040181465, "grad_norm": 0.03597325086593628, "learning_rate": 0.00016846392726945408, "loss": 0.3525, "step": 11945 }, { "epoch": 0.9677576150356448, "grad_norm": 0.03336905315518379, "learning_rate": 0.00016845942661685944, "loss": 0.3418, "step": 11946 }, { "epoch": 0.9678386260531432, "grad_norm": 0.03322875499725342, "learning_rate": 0.00016845492596426483, "loss": 0.3311, "step": 11947 }, { "epoch": 0.9679196370706417, "grad_norm": 0.03448665514588356, "learning_rate": 0.0001684504253116702, "loss": 0.3301, "step": 11948 }, { "epoch": 0.96800064808814, "grad_norm": 0.03418492153286934, "learning_rate": 0.00016844592465907558, "loss": 0.3148, "step": 11949 }, { "epoch": 0.9680816591056384, "grad_norm": 0.040306005626916885, "learning_rate": 0.00016844142400648096, "loss": 0.3654, "step": 11950 }, { "epoch": 0.9681626701231367, "grad_norm": 0.0345134511590004, "learning_rate": 0.00016843692335388632, "loss": 0.324, "step": 11951 }, { "epoch": 0.9682436811406351, "grad_norm": 0.03828649967908859, "learning_rate": 0.00016843242270129168, "loss": 0.312, "step": 11952 }, { "epoch": 0.9683246921581335, "grad_norm": 0.03464636951684952, "learning_rate": 0.00016842792204869707, "loss": 0.3217, "step": 11953 }, { "epoch": 0.9684057031756319, "grad_norm": 0.03320741280913353, "learning_rate": 0.00016842342139610243, "loss": 0.3334, "step": 11954 }, { "epoch": 0.9684867141931303, "grad_norm": 0.03954046592116356, "learning_rate": 0.00016841892074350782, "loss": 0.3285, "step": 11955 }, { "epoch": 0.9685677252106286, "grad_norm": 0.03934956341981888, "learning_rate": 0.0001684144200909132, "loss": 0.3164, "step": 11956 }, { "epoch": 0.968648736228127, "grad_norm": 0.031792640686035156, "learning_rate": 0.00016840991943831856, "loss": 0.2543, "step": 11957 }, { "epoch": 0.9687297472456254, "grad_norm": 0.037689026445150375, "learning_rate": 0.00016840541878572392, "loss": 0.3358, "step": 11958 }, { "epoch": 0.9688107582631238, "grad_norm": 0.03448779508471489, "learning_rate": 0.0001684009181331293, "loss": 0.3659, "step": 11959 }, { "epoch": 0.9688917692806222, "grad_norm": 0.03423665836453438, "learning_rate": 0.0001683964174805347, "loss": 0.314, "step": 11960 }, { "epoch": 0.9689727802981205, "grad_norm": 0.03471982851624489, "learning_rate": 0.00016839191682794006, "loss": 0.3302, "step": 11961 }, { "epoch": 0.969053791315619, "grad_norm": 0.03254758566617966, "learning_rate": 0.00016838741617534545, "loss": 0.3197, "step": 11962 }, { "epoch": 0.9691348023331173, "grad_norm": 0.03508211299777031, "learning_rate": 0.0001683829155227508, "loss": 0.3282, "step": 11963 }, { "epoch": 0.9692158133506157, "grad_norm": 0.037181735038757324, "learning_rate": 0.00016837841487015617, "loss": 0.3467, "step": 11964 }, { "epoch": 0.969296824368114, "grad_norm": 0.03629257529973984, "learning_rate": 0.00016837391421756155, "loss": 0.3154, "step": 11965 }, { "epoch": 0.9693778353856124, "grad_norm": 0.03312021866440773, "learning_rate": 0.00016836941356496694, "loss": 0.3009, "step": 11966 }, { "epoch": 0.9694588464031109, "grad_norm": 0.0314815379679203, "learning_rate": 0.0001683649129123723, "loss": 0.3232, "step": 11967 }, { "epoch": 0.9695398574206092, "grad_norm": 0.03045765683054924, "learning_rate": 0.0001683604122597777, "loss": 0.2831, "step": 11968 }, { "epoch": 0.9696208684381076, "grad_norm": 0.03353416547179222, "learning_rate": 0.00016835591160718305, "loss": 0.2955, "step": 11969 }, { "epoch": 0.9697018794556059, "grad_norm": 0.035733599215745926, "learning_rate": 0.0001683514109545884, "loss": 0.3178, "step": 11970 }, { "epoch": 0.9697828904731044, "grad_norm": 0.03448764607310295, "learning_rate": 0.0001683469103019938, "loss": 0.336, "step": 11971 }, { "epoch": 0.9698639014906028, "grad_norm": 0.035881828516721725, "learning_rate": 0.00016834240964939918, "loss": 0.2854, "step": 11972 }, { "epoch": 0.9699449125081011, "grad_norm": 0.03701227903366089, "learning_rate": 0.00016833790899680454, "loss": 0.3392, "step": 11973 }, { "epoch": 0.9700259235255995, "grad_norm": 0.03427772596478462, "learning_rate": 0.00016833340834420993, "loss": 0.3131, "step": 11974 }, { "epoch": 0.9701069345430978, "grad_norm": 0.03733328357338905, "learning_rate": 0.0001683289076916153, "loss": 0.3246, "step": 11975 }, { "epoch": 0.9701879455605963, "grad_norm": 0.03317997232079506, "learning_rate": 0.00016832440703902065, "loss": 0.3102, "step": 11976 }, { "epoch": 0.9702689565780946, "grad_norm": 0.032740697264671326, "learning_rate": 0.00016831990638642604, "loss": 0.3, "step": 11977 }, { "epoch": 0.970349967595593, "grad_norm": 0.03583207353949547, "learning_rate": 0.00016831540573383142, "loss": 0.3076, "step": 11978 }, { "epoch": 0.9704309786130914, "grad_norm": 0.04081375524401665, "learning_rate": 0.00016831090508123678, "loss": 0.356, "step": 11979 }, { "epoch": 0.9705119896305897, "grad_norm": 0.039508093148469925, "learning_rate": 0.00016830640442864217, "loss": 0.2998, "step": 11980 }, { "epoch": 0.9705930006480882, "grad_norm": 0.03704408183693886, "learning_rate": 0.00016830190377604753, "loss": 0.3403, "step": 11981 }, { "epoch": 0.9706740116655865, "grad_norm": 0.038443513214588165, "learning_rate": 0.0001682974031234529, "loss": 0.3315, "step": 11982 }, { "epoch": 0.9707550226830849, "grad_norm": 0.03290339931845665, "learning_rate": 0.0001682929024708583, "loss": 0.3021, "step": 11983 }, { "epoch": 0.9708360337005832, "grad_norm": 0.03160303458571434, "learning_rate": 0.00016828840181826367, "loss": 0.2866, "step": 11984 }, { "epoch": 0.9709170447180817, "grad_norm": 0.03647925704717636, "learning_rate": 0.00016828390116566903, "loss": 0.3198, "step": 11985 }, { "epoch": 0.9709980557355801, "grad_norm": 0.038130663335323334, "learning_rate": 0.0001682794005130744, "loss": 0.3386, "step": 11986 }, { "epoch": 0.9710790667530784, "grad_norm": 0.036003727465867996, "learning_rate": 0.00016827489986047977, "loss": 0.2976, "step": 11987 }, { "epoch": 0.9711600777705768, "grad_norm": 0.03716365620493889, "learning_rate": 0.00016827039920788513, "loss": 0.369, "step": 11988 }, { "epoch": 0.9712410887880751, "grad_norm": 0.03195514902472496, "learning_rate": 0.00016826589855529055, "loss": 0.3043, "step": 11989 }, { "epoch": 0.9713220998055736, "grad_norm": 0.03346724435687065, "learning_rate": 0.0001682613979026959, "loss": 0.3165, "step": 11990 }, { "epoch": 0.971403110823072, "grad_norm": 0.029482269659638405, "learning_rate": 0.00016825689725010127, "loss": 0.2691, "step": 11991 }, { "epoch": 0.9714841218405703, "grad_norm": 0.031241513788700104, "learning_rate": 0.00016825239659750665, "loss": 0.3227, "step": 11992 }, { "epoch": 0.9715651328580687, "grad_norm": 0.03511514514684677, "learning_rate": 0.00016824789594491201, "loss": 0.3189, "step": 11993 }, { "epoch": 0.971646143875567, "grad_norm": 0.03130088746547699, "learning_rate": 0.00016824339529231737, "loss": 0.3331, "step": 11994 }, { "epoch": 0.9717271548930655, "grad_norm": 0.034547485411167145, "learning_rate": 0.0001682388946397228, "loss": 0.3485, "step": 11995 }, { "epoch": 0.9718081659105638, "grad_norm": 0.04532260447740555, "learning_rate": 0.00016823439398712815, "loss": 0.3239, "step": 11996 }, { "epoch": 0.9718891769280622, "grad_norm": 0.03304055333137512, "learning_rate": 0.0001682298933345335, "loss": 0.3066, "step": 11997 }, { "epoch": 0.9719701879455606, "grad_norm": 0.031891681253910065, "learning_rate": 0.0001682253926819389, "loss": 0.3086, "step": 11998 }, { "epoch": 0.972051198963059, "grad_norm": 0.03132178634405136, "learning_rate": 0.00016822089202934426, "loss": 0.2935, "step": 11999 }, { "epoch": 0.9721322099805574, "grad_norm": 0.03411552309989929, "learning_rate": 0.00016821639137674962, "loss": 0.3179, "step": 12000 }, { "epoch": 0.9722132209980557, "grad_norm": 0.03236962482333183, "learning_rate": 0.00016821189072415503, "loss": 0.3086, "step": 12001 }, { "epoch": 0.9722942320155541, "grad_norm": 0.03681671991944313, "learning_rate": 0.0001682073900715604, "loss": 0.3545, "step": 12002 }, { "epoch": 0.9723752430330524, "grad_norm": 0.0358586311340332, "learning_rate": 0.00016820288941896575, "loss": 0.2965, "step": 12003 }, { "epoch": 0.9724562540505509, "grad_norm": 0.036946721374988556, "learning_rate": 0.00016819838876637114, "loss": 0.3616, "step": 12004 }, { "epoch": 0.9725372650680493, "grad_norm": 0.037640947848558426, "learning_rate": 0.0001681938881137765, "loss": 0.3717, "step": 12005 }, { "epoch": 0.9726182760855476, "grad_norm": 0.039124995470047, "learning_rate": 0.00016818938746118186, "loss": 0.3478, "step": 12006 }, { "epoch": 0.972699287103046, "grad_norm": 0.03698194399476051, "learning_rate": 0.00016818488680858727, "loss": 0.3166, "step": 12007 }, { "epoch": 0.9727802981205443, "grad_norm": 0.0408819317817688, "learning_rate": 0.00016818038615599263, "loss": 0.3266, "step": 12008 }, { "epoch": 0.9728613091380428, "grad_norm": 0.03516923636198044, "learning_rate": 0.000168175885503398, "loss": 0.3507, "step": 12009 }, { "epoch": 0.9729423201555412, "grad_norm": 0.03453219681978226, "learning_rate": 0.00016817138485080338, "loss": 0.2954, "step": 12010 }, { "epoch": 0.9730233311730395, "grad_norm": 0.04005401208996773, "learning_rate": 0.00016816688419820874, "loss": 0.3521, "step": 12011 }, { "epoch": 0.9731043421905379, "grad_norm": 0.03611200675368309, "learning_rate": 0.00016816238354561413, "loss": 0.3156, "step": 12012 }, { "epoch": 0.9731853532080363, "grad_norm": 0.031062688678503036, "learning_rate": 0.0001681578828930195, "loss": 0.2817, "step": 12013 }, { "epoch": 0.9732663642255347, "grad_norm": 0.033956896513700485, "learning_rate": 0.00016815338224042487, "loss": 0.3013, "step": 12014 }, { "epoch": 0.973347375243033, "grad_norm": 0.03631037846207619, "learning_rate": 0.00016814888158783023, "loss": 0.3182, "step": 12015 }, { "epoch": 0.9734283862605314, "grad_norm": 0.035651616752147675, "learning_rate": 0.00016814438093523562, "loss": 0.3212, "step": 12016 }, { "epoch": 0.9735093972780298, "grad_norm": 0.036742936819791794, "learning_rate": 0.00016813988028264098, "loss": 0.3165, "step": 12017 }, { "epoch": 0.9735904082955282, "grad_norm": 0.03688966482877731, "learning_rate": 0.00016813537963004637, "loss": 0.3841, "step": 12018 }, { "epoch": 0.9736714193130266, "grad_norm": 0.03511689230799675, "learning_rate": 0.00016813087897745176, "loss": 0.29, "step": 12019 }, { "epoch": 0.9737524303305249, "grad_norm": 0.0386803075671196, "learning_rate": 0.00016812637832485712, "loss": 0.3592, "step": 12020 }, { "epoch": 0.9738334413480233, "grad_norm": 0.03346274420619011, "learning_rate": 0.00016812187767226248, "loss": 0.2931, "step": 12021 }, { "epoch": 0.9739144523655218, "grad_norm": 0.03622061759233475, "learning_rate": 0.00016811737701966786, "loss": 0.3199, "step": 12022 }, { "epoch": 0.9739954633830201, "grad_norm": 0.043558187782764435, "learning_rate": 0.00016811287636707322, "loss": 0.3601, "step": 12023 }, { "epoch": 0.9740764744005185, "grad_norm": 0.03961658477783203, "learning_rate": 0.0001681083757144786, "loss": 0.3545, "step": 12024 }, { "epoch": 0.9741574854180168, "grad_norm": 0.032546672970056534, "learning_rate": 0.000168103875061884, "loss": 0.2833, "step": 12025 }, { "epoch": 0.9742384964355152, "grad_norm": 0.03565071523189545, "learning_rate": 0.00016809937440928936, "loss": 0.3108, "step": 12026 }, { "epoch": 0.9743195074530137, "grad_norm": 0.03935694321990013, "learning_rate": 0.00016809487375669472, "loss": 0.3424, "step": 12027 }, { "epoch": 0.974400518470512, "grad_norm": 0.03298564627766609, "learning_rate": 0.0001680903731041001, "loss": 0.2936, "step": 12028 }, { "epoch": 0.9744815294880104, "grad_norm": 0.029612941667437553, "learning_rate": 0.00016808587245150546, "loss": 0.2605, "step": 12029 }, { "epoch": 0.9745625405055087, "grad_norm": 0.03617319092154503, "learning_rate": 0.00016808137179891085, "loss": 0.3319, "step": 12030 }, { "epoch": 0.9746435515230071, "grad_norm": 0.036087553948163986, "learning_rate": 0.00016807687114631624, "loss": 0.3346, "step": 12031 }, { "epoch": 0.9747245625405055, "grad_norm": 0.033548567444086075, "learning_rate": 0.0001680723704937216, "loss": 0.3214, "step": 12032 }, { "epoch": 0.9748055735580039, "grad_norm": 0.03468198701739311, "learning_rate": 0.00016806786984112696, "loss": 0.3056, "step": 12033 }, { "epoch": 0.9748865845755023, "grad_norm": 0.03659326583147049, "learning_rate": 0.00016806336918853235, "loss": 0.3159, "step": 12034 }, { "epoch": 0.9749675955930006, "grad_norm": 0.03924743831157684, "learning_rate": 0.0001680588685359377, "loss": 0.3788, "step": 12035 }, { "epoch": 0.9750486066104991, "grad_norm": 0.03203437477350235, "learning_rate": 0.0001680543678833431, "loss": 0.2802, "step": 12036 }, { "epoch": 0.9751296176279974, "grad_norm": 0.034819379448890686, "learning_rate": 0.00016804986723074848, "loss": 0.3311, "step": 12037 }, { "epoch": 0.9752106286454958, "grad_norm": 0.033765386790037155, "learning_rate": 0.00016804536657815384, "loss": 0.3156, "step": 12038 }, { "epoch": 0.9752916396629941, "grad_norm": 0.035257935523986816, "learning_rate": 0.0001680408659255592, "loss": 0.2908, "step": 12039 }, { "epoch": 0.9753726506804925, "grad_norm": 0.03877081349492073, "learning_rate": 0.0001680363652729646, "loss": 0.337, "step": 12040 }, { "epoch": 0.975453661697991, "grad_norm": 0.03562001883983612, "learning_rate": 0.00016803186462036997, "loss": 0.2947, "step": 12041 }, { "epoch": 0.9755346727154893, "grad_norm": 0.036863259971141815, "learning_rate": 0.00016802736396777533, "loss": 0.3313, "step": 12042 }, { "epoch": 0.9756156837329877, "grad_norm": 0.03537532687187195, "learning_rate": 0.00016802286331518072, "loss": 0.3271, "step": 12043 }, { "epoch": 0.975696694750486, "grad_norm": 0.031441546976566315, "learning_rate": 0.00016801836266258608, "loss": 0.2997, "step": 12044 }, { "epoch": 0.9757777057679844, "grad_norm": 0.029675081372261047, "learning_rate": 0.00016801386200999144, "loss": 0.2649, "step": 12045 }, { "epoch": 0.9758587167854829, "grad_norm": 0.038790564984083176, "learning_rate": 0.00016800936135739683, "loss": 0.3773, "step": 12046 }, { "epoch": 0.9759397278029812, "grad_norm": 0.031189288944005966, "learning_rate": 0.00016800486070480222, "loss": 0.3211, "step": 12047 }, { "epoch": 0.9760207388204796, "grad_norm": 0.03335803374648094, "learning_rate": 0.00016800036005220758, "loss": 0.3334, "step": 12048 }, { "epoch": 0.9761017498379779, "grad_norm": 0.033088475465774536, "learning_rate": 0.00016799585939961296, "loss": 0.3224, "step": 12049 }, { "epoch": 0.9761827608554764, "grad_norm": 0.037221070379018784, "learning_rate": 0.00016799135874701832, "loss": 0.3163, "step": 12050 }, { "epoch": 0.9762637718729748, "grad_norm": 0.03499598801136017, "learning_rate": 0.00016798685809442368, "loss": 0.3521, "step": 12051 }, { "epoch": 0.9763447828904731, "grad_norm": 0.03871636092662811, "learning_rate": 0.00016798235744182907, "loss": 0.3824, "step": 12052 }, { "epoch": 0.9764257939079715, "grad_norm": 0.03844713792204857, "learning_rate": 0.00016797785678923446, "loss": 0.4189, "step": 12053 }, { "epoch": 0.9765068049254698, "grad_norm": 0.037115678191185, "learning_rate": 0.00016797335613663982, "loss": 0.3243, "step": 12054 }, { "epoch": 0.9765878159429683, "grad_norm": 0.040090303868055344, "learning_rate": 0.0001679688554840452, "loss": 0.3462, "step": 12055 }, { "epoch": 0.9766688269604666, "grad_norm": 0.030491787940263748, "learning_rate": 0.00016796435483145056, "loss": 0.3018, "step": 12056 }, { "epoch": 0.976749837977965, "grad_norm": 0.033908918499946594, "learning_rate": 0.00016795985417885592, "loss": 0.2919, "step": 12057 }, { "epoch": 0.9768308489954634, "grad_norm": 0.034717120230197906, "learning_rate": 0.0001679553535262613, "loss": 0.2919, "step": 12058 }, { "epoch": 0.9769118600129617, "grad_norm": 0.031752314418554306, "learning_rate": 0.0001679508528736667, "loss": 0.2668, "step": 12059 }, { "epoch": 0.9769928710304602, "grad_norm": 0.03231941908597946, "learning_rate": 0.00016794635222107206, "loss": 0.2891, "step": 12060 }, { "epoch": 0.9770738820479585, "grad_norm": 0.038936734199523926, "learning_rate": 0.00016794185156847745, "loss": 0.3659, "step": 12061 }, { "epoch": 0.9771548930654569, "grad_norm": 0.03846294805407524, "learning_rate": 0.0001679373509158828, "loss": 0.395, "step": 12062 }, { "epoch": 0.9772359040829552, "grad_norm": 0.04152911528944969, "learning_rate": 0.00016793285026328817, "loss": 0.4063, "step": 12063 }, { "epoch": 0.9773169151004537, "grad_norm": 0.03231941536068916, "learning_rate": 0.00016792834961069358, "loss": 0.2817, "step": 12064 }, { "epoch": 0.9773979261179521, "grad_norm": 0.0361601822078228, "learning_rate": 0.00016792384895809894, "loss": 0.3337, "step": 12065 }, { "epoch": 0.9774789371354504, "grad_norm": 0.0319807194173336, "learning_rate": 0.0001679193483055043, "loss": 0.3655, "step": 12066 }, { "epoch": 0.9775599481529488, "grad_norm": 0.039843834936618805, "learning_rate": 0.0001679148476529097, "loss": 0.3795, "step": 12067 }, { "epoch": 0.9776409591704471, "grad_norm": 0.03642282634973526, "learning_rate": 0.00016791034700031505, "loss": 0.3392, "step": 12068 }, { "epoch": 0.9777219701879456, "grad_norm": 0.036994218826293945, "learning_rate": 0.0001679058463477204, "loss": 0.315, "step": 12069 }, { "epoch": 0.977802981205444, "grad_norm": 0.040465034544467926, "learning_rate": 0.00016790134569512582, "loss": 0.3278, "step": 12070 }, { "epoch": 0.9778839922229423, "grad_norm": 0.03564620018005371, "learning_rate": 0.00016789684504253118, "loss": 0.3344, "step": 12071 }, { "epoch": 0.9779650032404407, "grad_norm": 0.03501134738326073, "learning_rate": 0.00016789234438993654, "loss": 0.3137, "step": 12072 }, { "epoch": 0.9780460142579391, "grad_norm": 0.03337648883461952, "learning_rate": 0.00016788784373734193, "loss": 0.3183, "step": 12073 }, { "epoch": 0.9781270252754375, "grad_norm": 0.0345093309879303, "learning_rate": 0.0001678833430847473, "loss": 0.264, "step": 12074 }, { "epoch": 0.9782080362929358, "grad_norm": 0.0420089066028595, "learning_rate": 0.00016787884243215265, "loss": 0.3451, "step": 12075 }, { "epoch": 0.9782890473104342, "grad_norm": 0.02996142767369747, "learning_rate": 0.00016787434177955806, "loss": 0.3257, "step": 12076 }, { "epoch": 0.9783700583279326, "grad_norm": 0.03614578768610954, "learning_rate": 0.00016786984112696342, "loss": 0.3039, "step": 12077 }, { "epoch": 0.978451069345431, "grad_norm": 0.03654995560646057, "learning_rate": 0.00016786534047436878, "loss": 0.3459, "step": 12078 }, { "epoch": 0.9785320803629294, "grad_norm": 0.03231252729892731, "learning_rate": 0.00016786083982177417, "loss": 0.2984, "step": 12079 }, { "epoch": 0.9786130913804277, "grad_norm": 0.03695613890886307, "learning_rate": 0.00016785633916917953, "loss": 0.3186, "step": 12080 }, { "epoch": 0.9786941023979261, "grad_norm": 0.03418729826807976, "learning_rate": 0.0001678518385165849, "loss": 0.304, "step": 12081 }, { "epoch": 0.9787751134154244, "grad_norm": 0.031289901584386826, "learning_rate": 0.0001678473378639903, "loss": 0.3236, "step": 12082 }, { "epoch": 0.9788561244329229, "grad_norm": 0.03405127674341202, "learning_rate": 0.00016784283721139567, "loss": 0.3379, "step": 12083 }, { "epoch": 0.9789371354504213, "grad_norm": 0.040710579603910446, "learning_rate": 0.00016783833655880103, "loss": 0.3391, "step": 12084 }, { "epoch": 0.9790181464679196, "grad_norm": 0.03499474376440048, "learning_rate": 0.0001678338359062064, "loss": 0.3436, "step": 12085 }, { "epoch": 0.979099157485418, "grad_norm": 0.04072335362434387, "learning_rate": 0.00016782933525361177, "loss": 0.3629, "step": 12086 }, { "epoch": 0.9791801685029164, "grad_norm": 0.039114028215408325, "learning_rate": 0.00016782483460101713, "loss": 0.3565, "step": 12087 }, { "epoch": 0.9792611795204148, "grad_norm": 0.03882155194878578, "learning_rate": 0.00016782033394842255, "loss": 0.3352, "step": 12088 }, { "epoch": 0.9793421905379132, "grad_norm": 0.04164344444870949, "learning_rate": 0.0001678158332958279, "loss": 0.3681, "step": 12089 }, { "epoch": 0.9794232015554115, "grad_norm": 0.03964466229081154, "learning_rate": 0.00016781133264323327, "loss": 0.3562, "step": 12090 }, { "epoch": 0.9795042125729099, "grad_norm": 0.03374399617314339, "learning_rate": 0.00016780683199063865, "loss": 0.3174, "step": 12091 }, { "epoch": 0.9795852235904083, "grad_norm": 0.03571087867021561, "learning_rate": 0.00016780233133804401, "loss": 0.3276, "step": 12092 }, { "epoch": 0.9796662346079067, "grad_norm": 0.037918128073215485, "learning_rate": 0.0001677978306854494, "loss": 0.2998, "step": 12093 }, { "epoch": 0.979747245625405, "grad_norm": 0.03598501533269882, "learning_rate": 0.0001677933300328548, "loss": 0.3824, "step": 12094 }, { "epoch": 0.9798282566429034, "grad_norm": 0.030146285891532898, "learning_rate": 0.00016778882938026015, "loss": 0.2893, "step": 12095 }, { "epoch": 0.9799092676604018, "grad_norm": 0.038079775869846344, "learning_rate": 0.0001677843287276655, "loss": 0.3156, "step": 12096 }, { "epoch": 0.9799902786779002, "grad_norm": 0.0377010740339756, "learning_rate": 0.0001677798280750709, "loss": 0.3612, "step": 12097 }, { "epoch": 0.9800712896953986, "grad_norm": 0.03465786576271057, "learning_rate": 0.00016777532742247626, "loss": 0.3, "step": 12098 }, { "epoch": 0.9801523007128969, "grad_norm": 0.03254738450050354, "learning_rate": 0.00016777082676988164, "loss": 0.2917, "step": 12099 }, { "epoch": 0.9802333117303953, "grad_norm": 0.04372397065162659, "learning_rate": 0.00016776632611728703, "loss": 0.3801, "step": 12100 }, { "epoch": 0.9803143227478938, "grad_norm": 0.03171698376536369, "learning_rate": 0.0001677618254646924, "loss": 0.2705, "step": 12101 }, { "epoch": 0.9803953337653921, "grad_norm": 0.03690371662378311, "learning_rate": 0.00016775732481209775, "loss": 0.3376, "step": 12102 }, { "epoch": 0.9804763447828905, "grad_norm": 0.037281621247529984, "learning_rate": 0.00016775282415950314, "loss": 0.3308, "step": 12103 }, { "epoch": 0.9805573558003888, "grad_norm": 0.038886263966560364, "learning_rate": 0.0001677483235069085, "loss": 0.3401, "step": 12104 }, { "epoch": 0.9806383668178872, "grad_norm": 0.032279498875141144, "learning_rate": 0.00016774382285431388, "loss": 0.3005, "step": 12105 }, { "epoch": 0.9807193778353857, "grad_norm": 0.04319687932729721, "learning_rate": 0.00016773932220171927, "loss": 0.3647, "step": 12106 }, { "epoch": 0.980800388852884, "grad_norm": 0.03467544540762901, "learning_rate": 0.00016773482154912463, "loss": 0.3023, "step": 12107 }, { "epoch": 0.9808813998703824, "grad_norm": 0.03290662169456482, "learning_rate": 0.00016773032089653, "loss": 0.3045, "step": 12108 }, { "epoch": 0.9809624108878807, "grad_norm": 0.03559955209493637, "learning_rate": 0.00016772582024393538, "loss": 0.3516, "step": 12109 }, { "epoch": 0.9810434219053791, "grad_norm": 0.035643838346004486, "learning_rate": 0.00016772131959134074, "loss": 0.3338, "step": 12110 }, { "epoch": 0.9811244329228775, "grad_norm": 0.03310045227408409, "learning_rate": 0.00016771681893874613, "loss": 0.3109, "step": 12111 }, { "epoch": 0.9812054439403759, "grad_norm": 0.031548332422971725, "learning_rate": 0.00016771231828615151, "loss": 0.2958, "step": 12112 }, { "epoch": 0.9812864549578743, "grad_norm": 0.0302834864705801, "learning_rate": 0.00016770781763355687, "loss": 0.305, "step": 12113 }, { "epoch": 0.9813674659753726, "grad_norm": 0.045317355543375015, "learning_rate": 0.00016770331698096223, "loss": 0.383, "step": 12114 }, { "epoch": 0.9814484769928711, "grad_norm": 0.03549464792013168, "learning_rate": 0.00016769881632836762, "loss": 0.3062, "step": 12115 }, { "epoch": 0.9815294880103694, "grad_norm": 0.042516469955444336, "learning_rate": 0.000167694315675773, "loss": 0.3586, "step": 12116 }, { "epoch": 0.9816104990278678, "grad_norm": 0.035244572907686234, "learning_rate": 0.00016768981502317837, "loss": 0.3084, "step": 12117 }, { "epoch": 0.9816915100453661, "grad_norm": 0.037299104034900665, "learning_rate": 0.00016768531437058376, "loss": 0.3453, "step": 12118 }, { "epoch": 0.9817725210628645, "grad_norm": 0.042558420449495316, "learning_rate": 0.00016768081371798912, "loss": 0.3437, "step": 12119 }, { "epoch": 0.981853532080363, "grad_norm": 0.034451086074113846, "learning_rate": 0.00016767631306539448, "loss": 0.3098, "step": 12120 }, { "epoch": 0.9819345430978613, "grad_norm": 0.03349757939577103, "learning_rate": 0.00016767181241279986, "loss": 0.3304, "step": 12121 }, { "epoch": 0.9820155541153597, "grad_norm": 0.034323133528232574, "learning_rate": 0.00016766731176020525, "loss": 0.3007, "step": 12122 }, { "epoch": 0.982096565132858, "grad_norm": 0.03829013928771019, "learning_rate": 0.0001676628111076106, "loss": 0.3474, "step": 12123 }, { "epoch": 0.9821775761503565, "grad_norm": 0.03527158126235008, "learning_rate": 0.000167658310455016, "loss": 0.3264, "step": 12124 }, { "epoch": 0.9822585871678549, "grad_norm": 0.03139869496226311, "learning_rate": 0.00016765380980242136, "loss": 0.3016, "step": 12125 }, { "epoch": 0.9823395981853532, "grad_norm": 0.034956566989421844, "learning_rate": 0.00016764930914982672, "loss": 0.3475, "step": 12126 }, { "epoch": 0.9824206092028516, "grad_norm": 0.03701526299118996, "learning_rate": 0.0001676448084972321, "loss": 0.3376, "step": 12127 }, { "epoch": 0.9825016202203499, "grad_norm": 0.03835771232843399, "learning_rate": 0.0001676403078446375, "loss": 0.3733, "step": 12128 }, { "epoch": 0.9825826312378484, "grad_norm": 0.03585449978709221, "learning_rate": 0.00016763580719204285, "loss": 0.3314, "step": 12129 }, { "epoch": 0.9826636422553467, "grad_norm": 0.033534858375787735, "learning_rate": 0.00016763130653944824, "loss": 0.3812, "step": 12130 }, { "epoch": 0.9827446532728451, "grad_norm": 0.03601505607366562, "learning_rate": 0.0001676268058868536, "loss": 0.2844, "step": 12131 }, { "epoch": 0.9828256642903435, "grad_norm": 0.03342147171497345, "learning_rate": 0.00016762230523425896, "loss": 0.3221, "step": 12132 }, { "epoch": 0.9829066753078418, "grad_norm": 0.03414756432175636, "learning_rate": 0.00016761780458166435, "loss": 0.3119, "step": 12133 }, { "epoch": 0.9829876863253403, "grad_norm": 0.033940937370061874, "learning_rate": 0.00016761330392906973, "loss": 0.3078, "step": 12134 }, { "epoch": 0.9830686973428386, "grad_norm": 0.03797852620482445, "learning_rate": 0.0001676088032764751, "loss": 0.3289, "step": 12135 }, { "epoch": 0.983149708360337, "grad_norm": 0.03421001508831978, "learning_rate": 0.00016760430262388048, "loss": 0.3313, "step": 12136 }, { "epoch": 0.9832307193778353, "grad_norm": 0.034913770854473114, "learning_rate": 0.00016759980197128584, "loss": 0.3149, "step": 12137 }, { "epoch": 0.9833117303953338, "grad_norm": 0.03968416526913643, "learning_rate": 0.0001675953013186912, "loss": 0.3632, "step": 12138 }, { "epoch": 0.9833927414128322, "grad_norm": 0.035320572555065155, "learning_rate": 0.0001675908006660966, "loss": 0.3232, "step": 12139 }, { "epoch": 0.9834737524303305, "grad_norm": 0.033722467720508575, "learning_rate": 0.00016758630001350197, "loss": 0.2897, "step": 12140 }, { "epoch": 0.9835547634478289, "grad_norm": 0.03793656826019287, "learning_rate": 0.00016758179936090733, "loss": 0.322, "step": 12141 }, { "epoch": 0.9836357744653272, "grad_norm": 0.032825589179992676, "learning_rate": 0.00016757729870831272, "loss": 0.3085, "step": 12142 }, { "epoch": 0.9837167854828257, "grad_norm": 0.03451994061470032, "learning_rate": 0.00016757279805571808, "loss": 0.3335, "step": 12143 }, { "epoch": 0.9837977965003241, "grad_norm": 0.03874523565173149, "learning_rate": 0.00016756829740312344, "loss": 0.3385, "step": 12144 }, { "epoch": 0.9838788075178224, "grad_norm": 0.038094766438007355, "learning_rate": 0.00016756379675052886, "loss": 0.3322, "step": 12145 }, { "epoch": 0.9839598185353208, "grad_norm": 0.036857735365629196, "learning_rate": 0.00016755929609793422, "loss": 0.3054, "step": 12146 }, { "epoch": 0.9840408295528191, "grad_norm": 0.04062890261411667, "learning_rate": 0.00016755479544533958, "loss": 0.3367, "step": 12147 }, { "epoch": 0.9841218405703176, "grad_norm": 0.032043006271123886, "learning_rate": 0.00016755029479274496, "loss": 0.3227, "step": 12148 }, { "epoch": 0.984202851587816, "grad_norm": 0.0364932045340538, "learning_rate": 0.00016754579414015032, "loss": 0.3285, "step": 12149 }, { "epoch": 0.9842838626053143, "grad_norm": 0.03572333604097366, "learning_rate": 0.00016754129348755568, "loss": 0.3439, "step": 12150 }, { "epoch": 0.9843648736228127, "grad_norm": 0.039901476353406906, "learning_rate": 0.0001675367928349611, "loss": 0.3169, "step": 12151 }, { "epoch": 0.9844458846403111, "grad_norm": 0.042402952909469604, "learning_rate": 0.00016753229218236646, "loss": 0.3442, "step": 12152 }, { "epoch": 0.9845268956578095, "grad_norm": 0.036621030420064926, "learning_rate": 0.00016752779152977182, "loss": 0.2791, "step": 12153 }, { "epoch": 0.9846079066753078, "grad_norm": 0.036967337131500244, "learning_rate": 0.0001675232908771772, "loss": 0.2982, "step": 12154 }, { "epoch": 0.9846889176928062, "grad_norm": 0.039531875401735306, "learning_rate": 0.00016751879022458257, "loss": 0.3337, "step": 12155 }, { "epoch": 0.9847699287103046, "grad_norm": 0.03411232680082321, "learning_rate": 0.00016751428957198793, "loss": 0.3021, "step": 12156 }, { "epoch": 0.984850939727803, "grad_norm": 0.03600526601076126, "learning_rate": 0.00016750978891939334, "loss": 0.3203, "step": 12157 }, { "epoch": 0.9849319507453014, "grad_norm": 0.04176699370145798, "learning_rate": 0.0001675052882667987, "loss": 0.32, "step": 12158 }, { "epoch": 0.9850129617627997, "grad_norm": 0.036995962262153625, "learning_rate": 0.00016750078761420406, "loss": 0.3388, "step": 12159 }, { "epoch": 0.9850939727802981, "grad_norm": 0.039030853658914566, "learning_rate": 0.00016749628696160945, "loss": 0.3352, "step": 12160 }, { "epoch": 0.9851749837977966, "grad_norm": 0.032328519970178604, "learning_rate": 0.0001674917863090148, "loss": 0.276, "step": 12161 }, { "epoch": 0.9852559948152949, "grad_norm": 0.03794671595096588, "learning_rate": 0.00016748728565642017, "loss": 0.3641, "step": 12162 }, { "epoch": 0.9853370058327933, "grad_norm": 0.03538952022790909, "learning_rate": 0.00016748278500382558, "loss": 0.331, "step": 12163 }, { "epoch": 0.9854180168502916, "grad_norm": 0.035941652953624725, "learning_rate": 0.00016747828435123094, "loss": 0.3681, "step": 12164 }, { "epoch": 0.98549902786779, "grad_norm": 0.03243683651089668, "learning_rate": 0.0001674737836986363, "loss": 0.2974, "step": 12165 }, { "epoch": 0.9855800388852884, "grad_norm": 0.03725123032927513, "learning_rate": 0.0001674692830460417, "loss": 0.3333, "step": 12166 }, { "epoch": 0.9856610499027868, "grad_norm": 0.041863132268190384, "learning_rate": 0.00016746478239344705, "loss": 0.3771, "step": 12167 }, { "epoch": 0.9857420609202852, "grad_norm": 0.03277840465307236, "learning_rate": 0.00016746028174085244, "loss": 0.3114, "step": 12168 }, { "epoch": 0.9858230719377835, "grad_norm": 0.035245537757873535, "learning_rate": 0.00016745578108825782, "loss": 0.3384, "step": 12169 }, { "epoch": 0.9859040829552819, "grad_norm": 0.04435291886329651, "learning_rate": 0.00016745128043566318, "loss": 0.3479, "step": 12170 }, { "epoch": 0.9859850939727803, "grad_norm": 0.03915781155228615, "learning_rate": 0.00016744677978306854, "loss": 0.3298, "step": 12171 }, { "epoch": 0.9860661049902787, "grad_norm": 0.037133507430553436, "learning_rate": 0.00016744227913047393, "loss": 0.3671, "step": 12172 }, { "epoch": 0.986147116007777, "grad_norm": 0.03917395696043968, "learning_rate": 0.0001674377784778793, "loss": 0.3151, "step": 12173 }, { "epoch": 0.9862281270252754, "grad_norm": 0.03865097463130951, "learning_rate": 0.00016743327782528468, "loss": 0.3627, "step": 12174 }, { "epoch": 0.9863091380427739, "grad_norm": 0.0351426862180233, "learning_rate": 0.00016742877717269006, "loss": 0.302, "step": 12175 }, { "epoch": 0.9863901490602722, "grad_norm": 0.03547579050064087, "learning_rate": 0.00016742427652009542, "loss": 0.3213, "step": 12176 }, { "epoch": 0.9864711600777706, "grad_norm": 0.034349165856838226, "learning_rate": 0.00016741977586750078, "loss": 0.3177, "step": 12177 }, { "epoch": 0.9865521710952689, "grad_norm": 0.03572181612253189, "learning_rate": 0.00016741527521490617, "loss": 0.2843, "step": 12178 }, { "epoch": 0.9866331821127673, "grad_norm": 0.03393491730093956, "learning_rate": 0.00016741077456231153, "loss": 0.3074, "step": 12179 }, { "epoch": 0.9867141931302658, "grad_norm": 0.037414468824863434, "learning_rate": 0.00016740627390971692, "loss": 0.3719, "step": 12180 }, { "epoch": 0.9867952041477641, "grad_norm": 0.03554844483733177, "learning_rate": 0.0001674017732571223, "loss": 0.3543, "step": 12181 }, { "epoch": 0.9868762151652625, "grad_norm": 0.03349216282367706, "learning_rate": 0.00016739727260452767, "loss": 0.323, "step": 12182 }, { "epoch": 0.9869572261827608, "grad_norm": 0.03510260209441185, "learning_rate": 0.00016739277195193303, "loss": 0.3536, "step": 12183 }, { "epoch": 0.9870382372002592, "grad_norm": 0.033853139728307724, "learning_rate": 0.0001673882712993384, "loss": 0.3127, "step": 12184 }, { "epoch": 0.9871192482177576, "grad_norm": 0.036612339317798615, "learning_rate": 0.00016738377064674377, "loss": 0.3265, "step": 12185 }, { "epoch": 0.987200259235256, "grad_norm": 0.03170229122042656, "learning_rate": 0.00016737926999414916, "loss": 0.3193, "step": 12186 }, { "epoch": 0.9872812702527544, "grad_norm": 0.03500431403517723, "learning_rate": 0.00016737476934155455, "loss": 0.3057, "step": 12187 }, { "epoch": 0.9873622812702527, "grad_norm": 0.032831739634275436, "learning_rate": 0.0001673702686889599, "loss": 0.3013, "step": 12188 }, { "epoch": 0.9874432922877512, "grad_norm": 0.03684018552303314, "learning_rate": 0.00016736576803636527, "loss": 0.33, "step": 12189 }, { "epoch": 0.9875243033052495, "grad_norm": 0.037288084626197815, "learning_rate": 0.00016736126738377065, "loss": 0.3402, "step": 12190 }, { "epoch": 0.9876053143227479, "grad_norm": 0.03490764647722244, "learning_rate": 0.00016735676673117601, "loss": 0.3314, "step": 12191 }, { "epoch": 0.9876863253402463, "grad_norm": 0.03680797666311264, "learning_rate": 0.0001673522660785814, "loss": 0.3295, "step": 12192 }, { "epoch": 0.9877673363577446, "grad_norm": 0.03332938253879547, "learning_rate": 0.0001673477654259868, "loss": 0.3503, "step": 12193 }, { "epoch": 0.9878483473752431, "grad_norm": 0.037796203047037125, "learning_rate": 0.00016734326477339215, "loss": 0.3532, "step": 12194 }, { "epoch": 0.9879293583927414, "grad_norm": 0.035063859075307846, "learning_rate": 0.0001673387641207975, "loss": 0.3151, "step": 12195 }, { "epoch": 0.9880103694102398, "grad_norm": 0.03438085690140724, "learning_rate": 0.0001673342634682029, "loss": 0.3169, "step": 12196 }, { "epoch": 0.9880913804277381, "grad_norm": 0.03508453816175461, "learning_rate": 0.00016732976281560828, "loss": 0.3033, "step": 12197 }, { "epoch": 0.9881723914452365, "grad_norm": 0.036688365042209625, "learning_rate": 0.00016732526216301364, "loss": 0.2952, "step": 12198 }, { "epoch": 0.988253402462735, "grad_norm": 0.03475033864378929, "learning_rate": 0.00016732076151041903, "loss": 0.3235, "step": 12199 }, { "epoch": 0.9883344134802333, "grad_norm": 0.03848425671458244, "learning_rate": 0.0001673162608578244, "loss": 0.254, "step": 12200 }, { "epoch": 0.9884154244977317, "grad_norm": 0.036981042474508286, "learning_rate": 0.00016731176020522975, "loss": 0.3465, "step": 12201 }, { "epoch": 0.98849643551523, "grad_norm": 0.03264646604657173, "learning_rate": 0.00016730725955263514, "loss": 0.2708, "step": 12202 }, { "epoch": 0.9885774465327285, "grad_norm": 0.0368381030857563, "learning_rate": 0.00016730275890004053, "loss": 0.314, "step": 12203 }, { "epoch": 0.9886584575502269, "grad_norm": 0.04556920379400253, "learning_rate": 0.00016729825824744589, "loss": 0.3608, "step": 12204 }, { "epoch": 0.9887394685677252, "grad_norm": 0.03348538279533386, "learning_rate": 0.00016729375759485127, "loss": 0.2967, "step": 12205 }, { "epoch": 0.9888204795852236, "grad_norm": 0.03768174350261688, "learning_rate": 0.00016728925694225663, "loss": 0.3023, "step": 12206 }, { "epoch": 0.9889014906027219, "grad_norm": 0.036302268505096436, "learning_rate": 0.000167284756289662, "loss": 0.32, "step": 12207 }, { "epoch": 0.9889825016202204, "grad_norm": 0.03752124309539795, "learning_rate": 0.00016728025563706738, "loss": 0.3545, "step": 12208 }, { "epoch": 0.9890635126377187, "grad_norm": 0.04945499822497368, "learning_rate": 0.00016727575498447277, "loss": 0.3927, "step": 12209 }, { "epoch": 0.9891445236552171, "grad_norm": 0.03695805370807648, "learning_rate": 0.00016727125433187813, "loss": 0.3258, "step": 12210 }, { "epoch": 0.9892255346727155, "grad_norm": 0.03560155630111694, "learning_rate": 0.00016726675367928351, "loss": 0.3048, "step": 12211 }, { "epoch": 0.9893065456902139, "grad_norm": 0.04134264215826988, "learning_rate": 0.00016726225302668887, "loss": 0.3407, "step": 12212 }, { "epoch": 0.9893875567077123, "grad_norm": 0.035698454827070236, "learning_rate": 0.00016725775237409423, "loss": 0.332, "step": 12213 }, { "epoch": 0.9894685677252106, "grad_norm": 0.03139304742217064, "learning_rate": 0.00016725325172149962, "loss": 0.3244, "step": 12214 }, { "epoch": 0.989549578742709, "grad_norm": 0.03921914100646973, "learning_rate": 0.000167248751068905, "loss": 0.3876, "step": 12215 }, { "epoch": 0.9896305897602073, "grad_norm": 0.036349404603242874, "learning_rate": 0.00016724425041631037, "loss": 0.3254, "step": 12216 }, { "epoch": 0.9897116007777058, "grad_norm": 0.037594377994537354, "learning_rate": 0.00016723974976371576, "loss": 0.3609, "step": 12217 }, { "epoch": 0.9897926117952042, "grad_norm": 0.03646136447787285, "learning_rate": 0.00016723524911112112, "loss": 0.3481, "step": 12218 }, { "epoch": 0.9898736228127025, "grad_norm": 0.031988371163606644, "learning_rate": 0.00016723074845852648, "loss": 0.2894, "step": 12219 }, { "epoch": 0.9899546338302009, "grad_norm": 0.03128928318619728, "learning_rate": 0.00016722624780593186, "loss": 0.3097, "step": 12220 }, { "epoch": 0.9900356448476992, "grad_norm": 0.03760599344968796, "learning_rate": 0.00016722174715333725, "loss": 0.3216, "step": 12221 }, { "epoch": 0.9901166558651977, "grad_norm": 0.034738097339868546, "learning_rate": 0.0001672172465007426, "loss": 0.3486, "step": 12222 }, { "epoch": 0.9901976668826961, "grad_norm": 0.03593483567237854, "learning_rate": 0.000167212745848148, "loss": 0.3223, "step": 12223 }, { "epoch": 0.9902786779001944, "grad_norm": 0.034317515790462494, "learning_rate": 0.00016720824519555336, "loss": 0.3191, "step": 12224 }, { "epoch": 0.9903596889176928, "grad_norm": 0.034238673746585846, "learning_rate": 0.00016720374454295872, "loss": 0.3271, "step": 12225 }, { "epoch": 0.9904406999351912, "grad_norm": 0.040035225450992584, "learning_rate": 0.00016719924389036413, "loss": 0.3203, "step": 12226 }, { "epoch": 0.9905217109526896, "grad_norm": 0.036540232598781586, "learning_rate": 0.0001671947432377695, "loss": 0.307, "step": 12227 }, { "epoch": 0.990602721970188, "grad_norm": 0.0375128798186779, "learning_rate": 0.00016719024258517485, "loss": 0.3247, "step": 12228 }, { "epoch": 0.9906837329876863, "grad_norm": 0.03315060958266258, "learning_rate": 0.00016718574193258024, "loss": 0.3176, "step": 12229 }, { "epoch": 0.9907647440051847, "grad_norm": 0.03492714464664459, "learning_rate": 0.0001671812412799856, "loss": 0.3182, "step": 12230 }, { "epoch": 0.9908457550226831, "grad_norm": 0.036610666662454605, "learning_rate": 0.00016717674062739096, "loss": 0.3409, "step": 12231 }, { "epoch": 0.9909267660401815, "grad_norm": 0.03414812311530113, "learning_rate": 0.00016717223997479637, "loss": 0.3163, "step": 12232 }, { "epoch": 0.9910077770576798, "grad_norm": 0.03545241430401802, "learning_rate": 0.00016716773932220173, "loss": 0.3071, "step": 12233 }, { "epoch": 0.9910887880751782, "grad_norm": 0.04074105620384216, "learning_rate": 0.0001671632386696071, "loss": 0.3231, "step": 12234 }, { "epoch": 0.9911697990926766, "grad_norm": 0.03582175076007843, "learning_rate": 0.00016715873801701248, "loss": 0.3559, "step": 12235 }, { "epoch": 0.991250810110175, "grad_norm": 0.038345787674188614, "learning_rate": 0.00016715423736441784, "loss": 0.3599, "step": 12236 }, { "epoch": 0.9913318211276734, "grad_norm": 0.033684276044368744, "learning_rate": 0.0001671497367118232, "loss": 0.3029, "step": 12237 }, { "epoch": 0.9914128321451717, "grad_norm": 0.03453746438026428, "learning_rate": 0.00016714523605922861, "loss": 0.2979, "step": 12238 }, { "epoch": 0.9914938431626701, "grad_norm": 0.03586553782224655, "learning_rate": 0.00016714073540663397, "loss": 0.3443, "step": 12239 }, { "epoch": 0.9915748541801686, "grad_norm": 0.03529420495033264, "learning_rate": 0.00016713623475403933, "loss": 0.3262, "step": 12240 }, { "epoch": 0.9916558651976669, "grad_norm": 0.0342470221221447, "learning_rate": 0.00016713173410144472, "loss": 0.3247, "step": 12241 }, { "epoch": 0.9917368762151653, "grad_norm": 0.03083527274429798, "learning_rate": 0.00016712723344885008, "loss": 0.2956, "step": 12242 }, { "epoch": 0.9918178872326636, "grad_norm": 0.03758340701460838, "learning_rate": 0.00016712273279625544, "loss": 0.31, "step": 12243 }, { "epoch": 0.991898898250162, "grad_norm": 0.03650803864002228, "learning_rate": 0.00016711823214366086, "loss": 0.3517, "step": 12244 }, { "epoch": 0.9919799092676604, "grad_norm": 0.03343210741877556, "learning_rate": 0.00016711373149106622, "loss": 0.2898, "step": 12245 }, { "epoch": 0.9920609202851588, "grad_norm": 0.03352636843919754, "learning_rate": 0.00016710923083847158, "loss": 0.2622, "step": 12246 }, { "epoch": 0.9921419313026572, "grad_norm": 0.03206267207860947, "learning_rate": 0.00016710473018587696, "loss": 0.2606, "step": 12247 }, { "epoch": 0.9922229423201555, "grad_norm": 0.03664777800440788, "learning_rate": 0.00016710022953328232, "loss": 0.3447, "step": 12248 }, { "epoch": 0.9923039533376539, "grad_norm": 0.0360250361263752, "learning_rate": 0.0001670957288806877, "loss": 0.3173, "step": 12249 }, { "epoch": 0.9923849643551523, "grad_norm": 0.032525595277547836, "learning_rate": 0.0001670912282280931, "loss": 0.2989, "step": 12250 }, { "epoch": 0.9924659753726507, "grad_norm": 0.03685523942112923, "learning_rate": 0.00016708672757549846, "loss": 0.3359, "step": 12251 }, { "epoch": 0.992546986390149, "grad_norm": 0.039908938109874725, "learning_rate": 0.00016708222692290382, "loss": 0.3506, "step": 12252 }, { "epoch": 0.9926279974076474, "grad_norm": 0.03686125949025154, "learning_rate": 0.0001670777262703092, "loss": 0.2967, "step": 12253 }, { "epoch": 0.9927090084251459, "grad_norm": 0.03284912183880806, "learning_rate": 0.00016707322561771457, "loss": 0.3224, "step": 12254 }, { "epoch": 0.9927900194426442, "grad_norm": 0.037038374692201614, "learning_rate": 0.00016706872496511995, "loss": 0.3101, "step": 12255 }, { "epoch": 0.9928710304601426, "grad_norm": 0.03874828293919563, "learning_rate": 0.00016706422431252534, "loss": 0.3035, "step": 12256 }, { "epoch": 0.9929520414776409, "grad_norm": 0.030359134078025818, "learning_rate": 0.0001670597236599307, "loss": 0.3111, "step": 12257 }, { "epoch": 0.9930330524951393, "grad_norm": 0.040259990841150284, "learning_rate": 0.00016705522300733606, "loss": 0.3559, "step": 12258 }, { "epoch": 0.9931140635126378, "grad_norm": 0.03642154484987259, "learning_rate": 0.00016705072235474145, "loss": 0.3568, "step": 12259 }, { "epoch": 0.9931950745301361, "grad_norm": 0.042818669229745865, "learning_rate": 0.0001670462217021468, "loss": 0.409, "step": 12260 }, { "epoch": 0.9932760855476345, "grad_norm": 0.03275144472718239, "learning_rate": 0.0001670417210495522, "loss": 0.2907, "step": 12261 }, { "epoch": 0.9933570965651328, "grad_norm": 0.03294292464852333, "learning_rate": 0.00016703722039695758, "loss": 0.3168, "step": 12262 }, { "epoch": 0.9934381075826313, "grad_norm": 0.03793772682547569, "learning_rate": 0.00016703271974436294, "loss": 0.3324, "step": 12263 }, { "epoch": 0.9935191186001296, "grad_norm": 0.03757007420063019, "learning_rate": 0.0001670282190917683, "loss": 0.3266, "step": 12264 }, { "epoch": 0.993600129617628, "grad_norm": 0.03577961027622223, "learning_rate": 0.0001670237184391737, "loss": 0.2696, "step": 12265 }, { "epoch": 0.9936811406351264, "grad_norm": 0.036166343837976456, "learning_rate": 0.00016701921778657905, "loss": 0.2861, "step": 12266 }, { "epoch": 0.9937621516526247, "grad_norm": 0.03126160427927971, "learning_rate": 0.00016701471713398444, "loss": 0.3055, "step": 12267 }, { "epoch": 0.9938431626701232, "grad_norm": 0.04049358144402504, "learning_rate": 0.00016701021648138982, "loss": 0.3118, "step": 12268 }, { "epoch": 0.9939241736876215, "grad_norm": 0.03834820166230202, "learning_rate": 0.00016700571582879518, "loss": 0.3451, "step": 12269 }, { "epoch": 0.9940051847051199, "grad_norm": 0.03607412055134773, "learning_rate": 0.00016700121517620054, "loss": 0.325, "step": 12270 }, { "epoch": 0.9940861957226182, "grad_norm": 0.03167823702096939, "learning_rate": 0.00016699671452360593, "loss": 0.3014, "step": 12271 }, { "epoch": 0.9941672067401166, "grad_norm": 0.038372207432985306, "learning_rate": 0.0001669922138710113, "loss": 0.3583, "step": 12272 }, { "epoch": 0.9942482177576151, "grad_norm": 0.03487422317266464, "learning_rate": 0.00016698771321841668, "loss": 0.3359, "step": 12273 }, { "epoch": 0.9943292287751134, "grad_norm": 0.03847598284482956, "learning_rate": 0.00016698321256582206, "loss": 0.335, "step": 12274 }, { "epoch": 0.9944102397926118, "grad_norm": 0.03265409916639328, "learning_rate": 0.00016697871191322742, "loss": 0.2944, "step": 12275 }, { "epoch": 0.9944912508101101, "grad_norm": 0.03902866318821907, "learning_rate": 0.00016697421126063278, "loss": 0.3686, "step": 12276 }, { "epoch": 0.9945722618276086, "grad_norm": 0.03776983544230461, "learning_rate": 0.00016696971060803817, "loss": 0.3537, "step": 12277 }, { "epoch": 0.994653272845107, "grad_norm": 0.03842049464583397, "learning_rate": 0.00016696520995544356, "loss": 0.2971, "step": 12278 }, { "epoch": 0.9947342838626053, "grad_norm": 0.03293439745903015, "learning_rate": 0.00016696070930284892, "loss": 0.2864, "step": 12279 }, { "epoch": 0.9948152948801037, "grad_norm": 0.033829886466264725, "learning_rate": 0.0001669562086502543, "loss": 0.2979, "step": 12280 }, { "epoch": 0.994896305897602, "grad_norm": 0.04007561877369881, "learning_rate": 0.00016695170799765967, "loss": 0.298, "step": 12281 }, { "epoch": 0.9949773169151005, "grad_norm": 0.03744842857122421, "learning_rate": 0.00016694720734506503, "loss": 0.299, "step": 12282 }, { "epoch": 0.9950583279325989, "grad_norm": 0.034444358199834824, "learning_rate": 0.0001669427066924704, "loss": 0.2833, "step": 12283 }, { "epoch": 0.9951393389500972, "grad_norm": 0.044794388115406036, "learning_rate": 0.0001669382060398758, "loss": 0.338, "step": 12284 }, { "epoch": 0.9952203499675956, "grad_norm": 0.0318923182785511, "learning_rate": 0.00016693370538728116, "loss": 0.2646, "step": 12285 }, { "epoch": 0.9953013609850939, "grad_norm": 0.032464563846588135, "learning_rate": 0.00016692920473468655, "loss": 0.2779, "step": 12286 }, { "epoch": 0.9953823720025924, "grad_norm": 0.03734520822763443, "learning_rate": 0.0001669247040820919, "loss": 0.3252, "step": 12287 }, { "epoch": 0.9954633830200907, "grad_norm": 0.038119930773973465, "learning_rate": 0.00016692020342949727, "loss": 0.3091, "step": 12288 }, { "epoch": 0.9955443940375891, "grad_norm": 0.03723182529211044, "learning_rate": 0.00016691570277690266, "loss": 0.3309, "step": 12289 }, { "epoch": 0.9956254050550875, "grad_norm": 0.04023761674761772, "learning_rate": 0.00016691120212430804, "loss": 0.3339, "step": 12290 }, { "epoch": 0.9957064160725859, "grad_norm": 0.03981214016675949, "learning_rate": 0.0001669067014717134, "loss": 0.3338, "step": 12291 }, { "epoch": 0.9957874270900843, "grad_norm": 0.04064285010099411, "learning_rate": 0.0001669022008191188, "loss": 0.3596, "step": 12292 }, { "epoch": 0.9958684381075826, "grad_norm": 0.03574245050549507, "learning_rate": 0.00016689770016652415, "loss": 0.2799, "step": 12293 }, { "epoch": 0.995949449125081, "grad_norm": 0.03438296541571617, "learning_rate": 0.0001668931995139295, "loss": 0.3576, "step": 12294 }, { "epoch": 0.9960304601425793, "grad_norm": 0.034820858389139175, "learning_rate": 0.0001668886988613349, "loss": 0.2916, "step": 12295 }, { "epoch": 0.9961114711600778, "grad_norm": 0.0366164855659008, "learning_rate": 0.00016688419820874028, "loss": 0.3207, "step": 12296 }, { "epoch": 0.9961924821775762, "grad_norm": 0.0425511971116066, "learning_rate": 0.00016687969755614564, "loss": 0.348, "step": 12297 }, { "epoch": 0.9962734931950745, "grad_norm": 0.03484644740819931, "learning_rate": 0.00016687519690355103, "loss": 0.3605, "step": 12298 }, { "epoch": 0.9963545042125729, "grad_norm": 0.03128006309270859, "learning_rate": 0.0001668706962509564, "loss": 0.3131, "step": 12299 }, { "epoch": 0.9964355152300713, "grad_norm": 0.039280328899621964, "learning_rate": 0.00016686619559836175, "loss": 0.3344, "step": 12300 }, { "epoch": 0.9965165262475697, "grad_norm": 0.03059135191142559, "learning_rate": 0.00016686169494576717, "loss": 0.3399, "step": 12301 }, { "epoch": 0.9965975372650681, "grad_norm": 0.033593568950891495, "learning_rate": 0.00016685719429317253, "loss": 0.2888, "step": 12302 }, { "epoch": 0.9966785482825664, "grad_norm": 0.037966132164001465, "learning_rate": 0.00016685269364057789, "loss": 0.3288, "step": 12303 }, { "epoch": 0.9967595593000648, "grad_norm": 0.03207383677363396, "learning_rate": 0.00016684819298798327, "loss": 0.2973, "step": 12304 }, { "epoch": 0.9968405703175632, "grad_norm": 0.03187767416238785, "learning_rate": 0.00016684369233538863, "loss": 0.3199, "step": 12305 }, { "epoch": 0.9969215813350616, "grad_norm": 0.03184106573462486, "learning_rate": 0.000166839191682794, "loss": 0.2718, "step": 12306 }, { "epoch": 0.99700259235256, "grad_norm": 0.039083920419216156, "learning_rate": 0.0001668346910301994, "loss": 0.3683, "step": 12307 }, { "epoch": 0.9970836033700583, "grad_norm": 0.033640190958976746, "learning_rate": 0.00016683019037760477, "loss": 0.3077, "step": 12308 }, { "epoch": 0.9971646143875567, "grad_norm": 0.038430314511060715, "learning_rate": 0.00016682568972501013, "loss": 0.315, "step": 12309 }, { "epoch": 0.9972456254050551, "grad_norm": 0.03640434890985489, "learning_rate": 0.00016682118907241551, "loss": 0.3046, "step": 12310 }, { "epoch": 0.9973266364225535, "grad_norm": 0.03566000610589981, "learning_rate": 0.00016681668841982087, "loss": 0.3054, "step": 12311 }, { "epoch": 0.9974076474400518, "grad_norm": 0.04168863222002983, "learning_rate": 0.00016681218776722623, "loss": 0.393, "step": 12312 }, { "epoch": 0.9974886584575502, "grad_norm": 0.03590601310133934, "learning_rate": 0.00016680768711463165, "loss": 0.2891, "step": 12313 }, { "epoch": 0.9975696694750487, "grad_norm": 0.03797917440533638, "learning_rate": 0.000166803186462037, "loss": 0.3614, "step": 12314 }, { "epoch": 0.997650680492547, "grad_norm": 0.03066432662308216, "learning_rate": 0.00016679868580944237, "loss": 0.2477, "step": 12315 }, { "epoch": 0.9977316915100454, "grad_norm": 0.03818430006504059, "learning_rate": 0.00016679418515684776, "loss": 0.3103, "step": 12316 }, { "epoch": 0.9978127025275437, "grad_norm": 0.043079014867544174, "learning_rate": 0.00016678968450425312, "loss": 0.294, "step": 12317 }, { "epoch": 0.9978937135450421, "grad_norm": 0.03821693733334541, "learning_rate": 0.00016678518385165848, "loss": 0.3659, "step": 12318 }, { "epoch": 0.9979747245625405, "grad_norm": 0.03290431946516037, "learning_rate": 0.0001667806831990639, "loss": 0.2696, "step": 12319 }, { "epoch": 0.9980557355800389, "grad_norm": 0.037341225892305374, "learning_rate": 0.00016677618254646925, "loss": 0.3211, "step": 12320 }, { "epoch": 0.9981367465975373, "grad_norm": 0.037481408566236496, "learning_rate": 0.0001667716818938746, "loss": 0.332, "step": 12321 }, { "epoch": 0.9982177576150356, "grad_norm": 0.03560345619916916, "learning_rate": 0.00016676718124128, "loss": 0.3281, "step": 12322 }, { "epoch": 0.998298768632534, "grad_norm": 0.036946386098861694, "learning_rate": 0.00016676268058868536, "loss": 0.326, "step": 12323 }, { "epoch": 0.9983797796500324, "grad_norm": 0.03420798480510712, "learning_rate": 0.00016675817993609072, "loss": 0.3193, "step": 12324 }, { "epoch": 0.9984607906675308, "grad_norm": 0.03938150405883789, "learning_rate": 0.00016675367928349613, "loss": 0.3319, "step": 12325 }, { "epoch": 0.9985418016850292, "grad_norm": 0.03574543818831444, "learning_rate": 0.0001667491786309015, "loss": 0.3217, "step": 12326 }, { "epoch": 0.9986228127025275, "grad_norm": 0.037979912012815475, "learning_rate": 0.00016674467797830685, "loss": 0.3628, "step": 12327 }, { "epoch": 0.998703823720026, "grad_norm": 0.038943979889154434, "learning_rate": 0.00016674017732571224, "loss": 0.3481, "step": 12328 }, { "epoch": 0.9987848347375243, "grad_norm": 0.03603614494204521, "learning_rate": 0.0001667356766731176, "loss": 0.2906, "step": 12329 }, { "epoch": 0.9988658457550227, "grad_norm": 0.030259212478995323, "learning_rate": 0.00016673117602052299, "loss": 0.2556, "step": 12330 }, { "epoch": 0.998946856772521, "grad_norm": 0.03213247284293175, "learning_rate": 0.00016672667536792837, "loss": 0.3307, "step": 12331 }, { "epoch": 0.9990278677900194, "grad_norm": 0.03017602674663067, "learning_rate": 0.00016672217471533373, "loss": 0.2732, "step": 12332 }, { "epoch": 0.9991088788075179, "grad_norm": 0.04762164503335953, "learning_rate": 0.0001667176740627391, "loss": 0.3062, "step": 12333 }, { "epoch": 0.9991898898250162, "grad_norm": 0.03786870837211609, "learning_rate": 0.00016671317341014448, "loss": 0.4087, "step": 12334 }, { "epoch": 0.9992709008425146, "grad_norm": 0.030480682849884033, "learning_rate": 0.00016670867275754984, "loss": 0.2739, "step": 12335 }, { "epoch": 0.9993519118600129, "grad_norm": 0.030645664781332016, "learning_rate": 0.00016670417210495523, "loss": 0.2811, "step": 12336 }, { "epoch": 0.9994329228775113, "grad_norm": 0.033013977110385895, "learning_rate": 0.00016669967145236062, "loss": 0.2941, "step": 12337 }, { "epoch": 0.9995139338950098, "grad_norm": 0.03498419001698494, "learning_rate": 0.00016669517079976598, "loss": 0.3035, "step": 12338 }, { "epoch": 0.9995949449125081, "grad_norm": 0.035238780081272125, "learning_rate": 0.00016669067014717134, "loss": 0.3082, "step": 12339 }, { "epoch": 0.9996759559300065, "grad_norm": 0.036749210208654404, "learning_rate": 0.00016668616949457672, "loss": 0.3347, "step": 12340 }, { "epoch": 0.9997569669475048, "grad_norm": 0.037299081683158875, "learning_rate": 0.00016668166884198208, "loss": 0.3616, "step": 12341 }, { "epoch": 0.9998379779650033, "grad_norm": 0.03310994803905487, "learning_rate": 0.00016667716818938747, "loss": 0.2927, "step": 12342 }, { "epoch": 0.9999189889825016, "grad_norm": 0.03382963687181473, "learning_rate": 0.00016667266753679286, "loss": 0.3183, "step": 12343 }, { "epoch": 1.0, "grad_norm": 0.03378660976886749, "learning_rate": 0.00016666816688419822, "loss": 0.3154, "step": 12344 }, { "epoch": 1.0000810110174985, "grad_norm": 0.03642452135682106, "learning_rate": 0.00016666366623160358, "loss": 0.3174, "step": 12345 }, { "epoch": 1.0001620220349967, "grad_norm": 0.03315473720431328, "learning_rate": 0.00016665916557900896, "loss": 0.2641, "step": 12346 }, { "epoch": 1.0002430330524952, "grad_norm": 0.037134990096092224, "learning_rate": 0.00016665466492641432, "loss": 0.3233, "step": 12347 }, { "epoch": 1.0003240440699934, "grad_norm": 0.03000638820230961, "learning_rate": 0.0001666501642738197, "loss": 0.269, "step": 12348 }, { "epoch": 1.000405055087492, "grad_norm": 0.038361046463251114, "learning_rate": 0.0001666456636212251, "loss": 0.2948, "step": 12349 }, { "epoch": 1.0004860661049904, "grad_norm": 0.03925226256251335, "learning_rate": 0.00016664116296863046, "loss": 0.3089, "step": 12350 }, { "epoch": 1.0005670771224886, "grad_norm": 0.03771889954805374, "learning_rate": 0.00016663666231603582, "loss": 0.3116, "step": 12351 }, { "epoch": 1.000648088139987, "grad_norm": 0.03906197100877762, "learning_rate": 0.0001666321616634412, "loss": 0.3379, "step": 12352 }, { "epoch": 1.0007290991574853, "grad_norm": 0.03280739486217499, "learning_rate": 0.00016662766101084657, "loss": 0.3226, "step": 12353 }, { "epoch": 1.0008101101749838, "grad_norm": 0.03842942789196968, "learning_rate": 0.00016662316035825195, "loss": 0.3435, "step": 12354 }, { "epoch": 1.0008911211924822, "grad_norm": 0.043962206691503525, "learning_rate": 0.00016661865970565734, "loss": 0.3883, "step": 12355 }, { "epoch": 1.0009721322099805, "grad_norm": 0.031128423288464546, "learning_rate": 0.0001666141590530627, "loss": 0.2722, "step": 12356 }, { "epoch": 1.001053143227479, "grad_norm": 0.03812249004840851, "learning_rate": 0.00016660965840046806, "loss": 0.3343, "step": 12357 }, { "epoch": 1.0011341542449774, "grad_norm": 0.035646624863147736, "learning_rate": 0.00016660515774787345, "loss": 0.308, "step": 12358 }, { "epoch": 1.0012151652624757, "grad_norm": 0.032420992851257324, "learning_rate": 0.00016660065709527883, "loss": 0.2966, "step": 12359 }, { "epoch": 1.0012961762799741, "grad_norm": 0.0395088866353035, "learning_rate": 0.0001665961564426842, "loss": 0.387, "step": 12360 }, { "epoch": 1.0013771872974724, "grad_norm": 0.03513415530323982, "learning_rate": 0.00016659165579008958, "loss": 0.3448, "step": 12361 }, { "epoch": 1.0014581983149708, "grad_norm": 0.03443319723010063, "learning_rate": 0.00016658715513749494, "loss": 0.3102, "step": 12362 }, { "epoch": 1.0015392093324693, "grad_norm": 0.03467671200633049, "learning_rate": 0.0001665826544849003, "loss": 0.2769, "step": 12363 }, { "epoch": 1.0016202203499676, "grad_norm": 0.03799883648753166, "learning_rate": 0.0001665781538323057, "loss": 0.336, "step": 12364 }, { "epoch": 1.001701231367466, "grad_norm": 0.046157464385032654, "learning_rate": 0.00016657365317971108, "loss": 0.3336, "step": 12365 }, { "epoch": 1.0017822423849643, "grad_norm": 0.03803658112883568, "learning_rate": 0.00016656915252711644, "loss": 0.2913, "step": 12366 }, { "epoch": 1.0018632534024627, "grad_norm": 0.032749395817518234, "learning_rate": 0.00016656465187452182, "loss": 0.2897, "step": 12367 }, { "epoch": 1.0019442644199612, "grad_norm": 0.04132390394806862, "learning_rate": 0.00016656015122192718, "loss": 0.3353, "step": 12368 }, { "epoch": 1.0020252754374595, "grad_norm": 0.03762146458029747, "learning_rate": 0.00016655565056933254, "loss": 0.3598, "step": 12369 }, { "epoch": 1.002106286454958, "grad_norm": 0.038307737559080124, "learning_rate": 0.00016655114991673793, "loss": 0.3231, "step": 12370 }, { "epoch": 1.0021872974724562, "grad_norm": 0.04003438353538513, "learning_rate": 0.00016654664926414332, "loss": 0.3427, "step": 12371 }, { "epoch": 1.0022683084899546, "grad_norm": 0.040446821600198746, "learning_rate": 0.00016654214861154868, "loss": 0.3412, "step": 12372 }, { "epoch": 1.002349319507453, "grad_norm": 0.02912214770913124, "learning_rate": 0.00016653764795895406, "loss": 0.2351, "step": 12373 }, { "epoch": 1.0024303305249513, "grad_norm": 0.03535939007997513, "learning_rate": 0.00016653314730635942, "loss": 0.289, "step": 12374 }, { "epoch": 1.0025113415424498, "grad_norm": 0.04004836454987526, "learning_rate": 0.00016652864665376478, "loss": 0.3261, "step": 12375 }, { "epoch": 1.002592352559948, "grad_norm": 0.034039877355098724, "learning_rate": 0.00016652414600117017, "loss": 0.2957, "step": 12376 }, { "epoch": 1.0026733635774465, "grad_norm": 0.03624784201383591, "learning_rate": 0.00016651964534857556, "loss": 0.3374, "step": 12377 }, { "epoch": 1.002754374594945, "grad_norm": 0.03206413611769676, "learning_rate": 0.00016651514469598092, "loss": 0.2958, "step": 12378 }, { "epoch": 1.0028353856124432, "grad_norm": 0.04170289263129234, "learning_rate": 0.0001665106440433863, "loss": 0.3392, "step": 12379 }, { "epoch": 1.0029163966299417, "grad_norm": 0.036238156259059906, "learning_rate": 0.00016650614339079167, "loss": 0.3556, "step": 12380 }, { "epoch": 1.00299740764744, "grad_norm": 0.03602059930562973, "learning_rate": 0.00016650164273819703, "loss": 0.3006, "step": 12381 }, { "epoch": 1.0030784186649384, "grad_norm": 0.039142753928899765, "learning_rate": 0.00016649714208560244, "loss": 0.3518, "step": 12382 }, { "epoch": 1.0031594296824369, "grad_norm": 0.03825153782963753, "learning_rate": 0.0001664926414330078, "loss": 0.3141, "step": 12383 }, { "epoch": 1.0032404406999351, "grad_norm": 0.033279530704021454, "learning_rate": 0.00016648814078041316, "loss": 0.2877, "step": 12384 }, { "epoch": 1.0033214517174336, "grad_norm": 0.03614173084497452, "learning_rate": 0.00016648364012781855, "loss": 0.3441, "step": 12385 }, { "epoch": 1.003402462734932, "grad_norm": 0.039306920021772385, "learning_rate": 0.0001664791394752239, "loss": 0.3253, "step": 12386 }, { "epoch": 1.0034834737524303, "grad_norm": 0.03397579491138458, "learning_rate": 0.00016647463882262927, "loss": 0.2791, "step": 12387 }, { "epoch": 1.0035644847699288, "grad_norm": 0.0360901802778244, "learning_rate": 0.00016647013817003468, "loss": 0.3041, "step": 12388 }, { "epoch": 1.003645495787427, "grad_norm": 0.03415064141154289, "learning_rate": 0.00016646563751744004, "loss": 0.2809, "step": 12389 }, { "epoch": 1.0037265068049255, "grad_norm": 0.03701115772128105, "learning_rate": 0.0001664611368648454, "loss": 0.298, "step": 12390 }, { "epoch": 1.003807517822424, "grad_norm": 0.03698696568608284, "learning_rate": 0.0001664566362122508, "loss": 0.3428, "step": 12391 }, { "epoch": 1.0038885288399222, "grad_norm": 0.03689829260110855, "learning_rate": 0.00016645213555965615, "loss": 0.2951, "step": 12392 }, { "epoch": 1.0039695398574207, "grad_norm": 0.03843197971582413, "learning_rate": 0.0001664476349070615, "loss": 0.3324, "step": 12393 }, { "epoch": 1.004050550874919, "grad_norm": 0.03739136457443237, "learning_rate": 0.00016644313425446692, "loss": 0.2794, "step": 12394 }, { "epoch": 1.0041315618924174, "grad_norm": 0.03774438798427582, "learning_rate": 0.00016643863360187228, "loss": 0.3869, "step": 12395 }, { "epoch": 1.0042125729099158, "grad_norm": 0.0323587991297245, "learning_rate": 0.00016643413294927764, "loss": 0.283, "step": 12396 }, { "epoch": 1.004293583927414, "grad_norm": 0.030298395082354546, "learning_rate": 0.00016642963229668303, "loss": 0.2823, "step": 12397 }, { "epoch": 1.0043745949449125, "grad_norm": 0.03403405472636223, "learning_rate": 0.0001664251316440884, "loss": 0.3001, "step": 12398 }, { "epoch": 1.0044556059624108, "grad_norm": 0.03376943618059158, "learning_rate": 0.00016642063099149375, "loss": 0.2832, "step": 12399 }, { "epoch": 1.0045366169799093, "grad_norm": 0.039719048887491226, "learning_rate": 0.00016641613033889917, "loss": 0.3748, "step": 12400 }, { "epoch": 1.0046176279974077, "grad_norm": 0.034429267048835754, "learning_rate": 0.00016641162968630453, "loss": 0.3099, "step": 12401 }, { "epoch": 1.004698639014906, "grad_norm": 0.03809856250882149, "learning_rate": 0.00016640712903370989, "loss": 0.3077, "step": 12402 }, { "epoch": 1.0047796500324044, "grad_norm": 0.035106975585222244, "learning_rate": 0.00016640262838111527, "loss": 0.3042, "step": 12403 }, { "epoch": 1.0048606610499027, "grad_norm": 0.034927934408187866, "learning_rate": 0.00016639812772852063, "loss": 0.3166, "step": 12404 }, { "epoch": 1.0049416720674011, "grad_norm": 0.03464159741997719, "learning_rate": 0.000166393627075926, "loss": 0.3071, "step": 12405 }, { "epoch": 1.0050226830848996, "grad_norm": 0.036465100944042206, "learning_rate": 0.0001663891264233314, "loss": 0.309, "step": 12406 }, { "epoch": 1.0051036941023979, "grad_norm": 0.035587284713983536, "learning_rate": 0.00016638462577073677, "loss": 0.3246, "step": 12407 }, { "epoch": 1.0051847051198963, "grad_norm": 0.03844556584954262, "learning_rate": 0.00016638012511814213, "loss": 0.2976, "step": 12408 }, { "epoch": 1.0052657161373948, "grad_norm": 0.03252662718296051, "learning_rate": 0.00016637562446554751, "loss": 0.271, "step": 12409 }, { "epoch": 1.005346727154893, "grad_norm": 0.03310299292206764, "learning_rate": 0.00016637112381295287, "loss": 0.3237, "step": 12410 }, { "epoch": 1.0054277381723915, "grad_norm": 0.03731595352292061, "learning_rate": 0.00016636662316035826, "loss": 0.2918, "step": 12411 }, { "epoch": 1.0055087491898898, "grad_norm": 0.03584537282586098, "learning_rate": 0.00016636212250776365, "loss": 0.3427, "step": 12412 }, { "epoch": 1.0055897602073882, "grad_norm": 0.03774617984890938, "learning_rate": 0.000166357621855169, "loss": 0.3049, "step": 12413 }, { "epoch": 1.0056707712248867, "grad_norm": 0.03594934195280075, "learning_rate": 0.00016635312120257437, "loss": 0.3343, "step": 12414 }, { "epoch": 1.005751782242385, "grad_norm": 0.03343866392970085, "learning_rate": 0.00016634862054997976, "loss": 0.2872, "step": 12415 }, { "epoch": 1.0058327932598834, "grad_norm": 0.036227621138095856, "learning_rate": 0.00016634411989738512, "loss": 0.3081, "step": 12416 }, { "epoch": 1.0059138042773816, "grad_norm": 0.039095692336559296, "learning_rate": 0.0001663396192447905, "loss": 0.3371, "step": 12417 }, { "epoch": 1.00599481529488, "grad_norm": 0.03556566312909126, "learning_rate": 0.0001663351185921959, "loss": 0.3305, "step": 12418 }, { "epoch": 1.0060758263123786, "grad_norm": 0.04125303030014038, "learning_rate": 0.00016633061793960125, "loss": 0.3408, "step": 12419 }, { "epoch": 1.0061568373298768, "grad_norm": 0.0392603874206543, "learning_rate": 0.0001663261172870066, "loss": 0.304, "step": 12420 }, { "epoch": 1.0062378483473753, "grad_norm": 0.0386536680161953, "learning_rate": 0.000166321616634412, "loss": 0.3502, "step": 12421 }, { "epoch": 1.0063188593648735, "grad_norm": 0.03425775468349457, "learning_rate": 0.00016631711598181736, "loss": 0.3169, "step": 12422 }, { "epoch": 1.006399870382372, "grad_norm": 0.032373424619436264, "learning_rate": 0.00016631261532922275, "loss": 0.2838, "step": 12423 }, { "epoch": 1.0064808813998705, "grad_norm": 0.03774702921509743, "learning_rate": 0.00016630811467662813, "loss": 0.3258, "step": 12424 }, { "epoch": 1.0065618924173687, "grad_norm": 0.03462809696793556, "learning_rate": 0.0001663036140240335, "loss": 0.3146, "step": 12425 }, { "epoch": 1.0066429034348672, "grad_norm": 0.03529775142669678, "learning_rate": 0.00016629911337143885, "loss": 0.3624, "step": 12426 }, { "epoch": 1.0067239144523654, "grad_norm": 0.03785771131515503, "learning_rate": 0.00016629461271884424, "loss": 0.3217, "step": 12427 }, { "epoch": 1.0068049254698639, "grad_norm": 0.03252212703227997, "learning_rate": 0.0001662901120662496, "loss": 0.2931, "step": 12428 }, { "epoch": 1.0068859364873624, "grad_norm": 0.03546149656176567, "learning_rate": 0.000166285611413655, "loss": 0.3254, "step": 12429 }, { "epoch": 1.0069669475048606, "grad_norm": 0.034065522253513336, "learning_rate": 0.00016628111076106037, "loss": 0.3355, "step": 12430 }, { "epoch": 1.007047958522359, "grad_norm": 0.03405478969216347, "learning_rate": 0.00016627661010846573, "loss": 0.2963, "step": 12431 }, { "epoch": 1.0071289695398573, "grad_norm": 0.039432525634765625, "learning_rate": 0.0001662721094558711, "loss": 0.3565, "step": 12432 }, { "epoch": 1.0072099805573558, "grad_norm": 0.042425453662872314, "learning_rate": 0.00016626760880327648, "loss": 0.3517, "step": 12433 }, { "epoch": 1.0072909915748542, "grad_norm": 0.03917066380381584, "learning_rate": 0.00016626310815068187, "loss": 0.3633, "step": 12434 }, { "epoch": 1.0073720025923525, "grad_norm": 0.0367281399667263, "learning_rate": 0.00016625860749808723, "loss": 0.3253, "step": 12435 }, { "epoch": 1.007453013609851, "grad_norm": 0.03884049132466316, "learning_rate": 0.00016625410684549262, "loss": 0.328, "step": 12436 }, { "epoch": 1.0075340246273494, "grad_norm": 0.04218551516532898, "learning_rate": 0.00016624960619289798, "loss": 0.3234, "step": 12437 }, { "epoch": 1.0076150356448477, "grad_norm": 0.03948225453495979, "learning_rate": 0.00016624510554030334, "loss": 0.3129, "step": 12438 }, { "epoch": 1.0076960466623461, "grad_norm": 0.03605709224939346, "learning_rate": 0.00016624060488770872, "loss": 0.3264, "step": 12439 }, { "epoch": 1.0077770576798444, "grad_norm": 0.04017477482557297, "learning_rate": 0.0001662361042351141, "loss": 0.3624, "step": 12440 }, { "epoch": 1.0078580686973428, "grad_norm": 0.03859005868434906, "learning_rate": 0.00016623160358251947, "loss": 0.2997, "step": 12441 }, { "epoch": 1.0079390797148413, "grad_norm": 0.03483903780579567, "learning_rate": 0.00016622710292992486, "loss": 0.2916, "step": 12442 }, { "epoch": 1.0080200907323396, "grad_norm": 0.034934330731630325, "learning_rate": 0.00016622260227733022, "loss": 0.3259, "step": 12443 }, { "epoch": 1.008101101749838, "grad_norm": 0.029897112399339676, "learning_rate": 0.00016621810162473558, "loss": 0.2831, "step": 12444 }, { "epoch": 1.0081821127673363, "grad_norm": 0.03647159785032272, "learning_rate": 0.00016621360097214096, "loss": 0.3101, "step": 12445 }, { "epoch": 1.0082631237848347, "grad_norm": 0.035187117755413055, "learning_rate": 0.00016620910031954635, "loss": 0.3158, "step": 12446 }, { "epoch": 1.0083441348023332, "grad_norm": 0.04031701385974884, "learning_rate": 0.0001662045996669517, "loss": 0.3255, "step": 12447 }, { "epoch": 1.0084251458198314, "grad_norm": 0.041569486260414124, "learning_rate": 0.0001662000990143571, "loss": 0.3324, "step": 12448 }, { "epoch": 1.00850615683733, "grad_norm": 0.04560425877571106, "learning_rate": 0.00016619559836176246, "loss": 0.3304, "step": 12449 }, { "epoch": 1.0085871678548282, "grad_norm": 0.04038703069090843, "learning_rate": 0.00016619109770916782, "loss": 0.313, "step": 12450 }, { "epoch": 1.0086681788723266, "grad_norm": 0.03961900994181633, "learning_rate": 0.0001661865970565732, "loss": 0.3351, "step": 12451 }, { "epoch": 1.008749189889825, "grad_norm": 0.03639005124568939, "learning_rate": 0.0001661820964039786, "loss": 0.3294, "step": 12452 }, { "epoch": 1.0088302009073233, "grad_norm": 0.0363776758313179, "learning_rate": 0.00016617759575138395, "loss": 0.2832, "step": 12453 }, { "epoch": 1.0089112119248218, "grad_norm": 0.033802758902311325, "learning_rate": 0.00016617309509878934, "loss": 0.3063, "step": 12454 }, { "epoch": 1.00899222294232, "grad_norm": 0.04182208329439163, "learning_rate": 0.0001661685944461947, "loss": 0.3325, "step": 12455 }, { "epoch": 1.0090732339598185, "grad_norm": 0.03720999136567116, "learning_rate": 0.00016616409379360006, "loss": 0.3423, "step": 12456 }, { "epoch": 1.009154244977317, "grad_norm": 0.046621162444353104, "learning_rate": 0.00016615959314100545, "loss": 0.3477, "step": 12457 }, { "epoch": 1.0092352559948152, "grad_norm": 0.0410497672855854, "learning_rate": 0.00016615509248841083, "loss": 0.3279, "step": 12458 }, { "epoch": 1.0093162670123137, "grad_norm": 0.032629966735839844, "learning_rate": 0.0001661505918358162, "loss": 0.2904, "step": 12459 }, { "epoch": 1.0093972780298122, "grad_norm": 0.040290966629981995, "learning_rate": 0.00016614609118322158, "loss": 0.3397, "step": 12460 }, { "epoch": 1.0094782890473104, "grad_norm": 0.042767442762851715, "learning_rate": 0.00016614159053062694, "loss": 0.3455, "step": 12461 }, { "epoch": 1.0095593000648089, "grad_norm": 0.03545527160167694, "learning_rate": 0.0001661370898780323, "loss": 0.3493, "step": 12462 }, { "epoch": 1.0096403110823071, "grad_norm": 0.03302915021777153, "learning_rate": 0.00016613258922543772, "loss": 0.2907, "step": 12463 }, { "epoch": 1.0097213220998056, "grad_norm": 0.031950343400239944, "learning_rate": 0.00016612808857284308, "loss": 0.3082, "step": 12464 }, { "epoch": 1.009802333117304, "grad_norm": 0.040452003479003906, "learning_rate": 0.00016612358792024844, "loss": 0.3198, "step": 12465 }, { "epoch": 1.0098833441348023, "grad_norm": 0.04159904643893242, "learning_rate": 0.00016611908726765382, "loss": 0.3603, "step": 12466 }, { "epoch": 1.0099643551523008, "grad_norm": 0.0391816720366478, "learning_rate": 0.00016611458661505918, "loss": 0.3622, "step": 12467 }, { "epoch": 1.010045366169799, "grad_norm": 0.03843311592936516, "learning_rate": 0.00016611008596246454, "loss": 0.3093, "step": 12468 }, { "epoch": 1.0101263771872975, "grad_norm": 0.03490731120109558, "learning_rate": 0.00016610558530986996, "loss": 0.2906, "step": 12469 }, { "epoch": 1.010207388204796, "grad_norm": 0.036694154143333435, "learning_rate": 0.00016610108465727532, "loss": 0.296, "step": 12470 }, { "epoch": 1.0102883992222942, "grad_norm": 0.03656122460961342, "learning_rate": 0.00016609658400468068, "loss": 0.3082, "step": 12471 }, { "epoch": 1.0103694102397927, "grad_norm": 0.03301248699426651, "learning_rate": 0.00016609208335208607, "loss": 0.3047, "step": 12472 }, { "epoch": 1.010450421257291, "grad_norm": 0.03141988813877106, "learning_rate": 0.00016608758269949143, "loss": 0.3005, "step": 12473 }, { "epoch": 1.0105314322747894, "grad_norm": 0.03507707640528679, "learning_rate": 0.00016608308204689679, "loss": 0.3019, "step": 12474 }, { "epoch": 1.0106124432922878, "grad_norm": 0.04133852943778038, "learning_rate": 0.0001660785813943022, "loss": 0.3315, "step": 12475 }, { "epoch": 1.010693454309786, "grad_norm": 0.04655103757977486, "learning_rate": 0.00016607408074170756, "loss": 0.3819, "step": 12476 }, { "epoch": 1.0107744653272845, "grad_norm": 0.03938587009906769, "learning_rate": 0.00016606958008911292, "loss": 0.3146, "step": 12477 }, { "epoch": 1.0108554763447828, "grad_norm": 0.03262874856591225, "learning_rate": 0.0001660650794365183, "loss": 0.2895, "step": 12478 }, { "epoch": 1.0109364873622813, "grad_norm": 0.0301420446485281, "learning_rate": 0.00016606057878392367, "loss": 0.2891, "step": 12479 }, { "epoch": 1.0110174983797797, "grad_norm": 0.03468109294772148, "learning_rate": 0.00016605607813132903, "loss": 0.3424, "step": 12480 }, { "epoch": 1.011098509397278, "grad_norm": 0.03772737830877304, "learning_rate": 0.00016605157747873444, "loss": 0.3303, "step": 12481 }, { "epoch": 1.0111795204147764, "grad_norm": 0.03408652916550636, "learning_rate": 0.0001660470768261398, "loss": 0.3008, "step": 12482 }, { "epoch": 1.0112605314322747, "grad_norm": 0.03782588988542557, "learning_rate": 0.00016604257617354516, "loss": 0.3382, "step": 12483 }, { "epoch": 1.0113415424497731, "grad_norm": 0.03254377096891403, "learning_rate": 0.00016603807552095055, "loss": 0.2517, "step": 12484 }, { "epoch": 1.0114225534672716, "grad_norm": 0.046504005789756775, "learning_rate": 0.0001660335748683559, "loss": 0.3585, "step": 12485 }, { "epoch": 1.0115035644847699, "grad_norm": 0.039096616208553314, "learning_rate": 0.0001660290742157613, "loss": 0.3187, "step": 12486 }, { "epoch": 1.0115845755022683, "grad_norm": 0.04427647963166237, "learning_rate": 0.00016602457356316668, "loss": 0.3409, "step": 12487 }, { "epoch": 1.0116655865197668, "grad_norm": 0.0376652292907238, "learning_rate": 0.00016602007291057204, "loss": 0.3174, "step": 12488 }, { "epoch": 1.011746597537265, "grad_norm": 0.044266387820243835, "learning_rate": 0.0001660155722579774, "loss": 0.3298, "step": 12489 }, { "epoch": 1.0118276085547635, "grad_norm": 0.03599374368786812, "learning_rate": 0.0001660110716053828, "loss": 0.307, "step": 12490 }, { "epoch": 1.0119086195722617, "grad_norm": 0.03235464543104172, "learning_rate": 0.00016600657095278815, "loss": 0.2877, "step": 12491 }, { "epoch": 1.0119896305897602, "grad_norm": 0.0392383337020874, "learning_rate": 0.00016600207030019354, "loss": 0.3156, "step": 12492 }, { "epoch": 1.0120706416072587, "grad_norm": 0.030660009011626244, "learning_rate": 0.00016599756964759892, "loss": 0.277, "step": 12493 }, { "epoch": 1.012151652624757, "grad_norm": 0.03609098121523857, "learning_rate": 0.00016599306899500428, "loss": 0.3084, "step": 12494 }, { "epoch": 1.0122326636422554, "grad_norm": 0.032300084829330444, "learning_rate": 0.00016598856834240964, "loss": 0.281, "step": 12495 }, { "epoch": 1.0123136746597536, "grad_norm": 0.04154512658715248, "learning_rate": 0.00016598406768981503, "loss": 0.3069, "step": 12496 }, { "epoch": 1.012394685677252, "grad_norm": 0.04292042553424835, "learning_rate": 0.0001659795670372204, "loss": 0.3163, "step": 12497 }, { "epoch": 1.0124756966947506, "grad_norm": 0.038710352033376694, "learning_rate": 0.00016597506638462578, "loss": 0.3267, "step": 12498 }, { "epoch": 1.0125567077122488, "grad_norm": 0.040433041751384735, "learning_rate": 0.00016597056573203117, "loss": 0.3697, "step": 12499 }, { "epoch": 1.0126377187297473, "grad_norm": 0.03633313998579979, "learning_rate": 0.00016596606507943653, "loss": 0.3227, "step": 12500 }, { "epoch": 1.0127187297472455, "grad_norm": 0.037605587393045425, "learning_rate": 0.00016596156442684189, "loss": 0.3401, "step": 12501 }, { "epoch": 1.012799740764744, "grad_norm": 0.042344413697719574, "learning_rate": 0.00016595706377424727, "loss": 0.3238, "step": 12502 }, { "epoch": 1.0128807517822425, "grad_norm": 0.038133446127176285, "learning_rate": 0.00016595256312165263, "loss": 0.3116, "step": 12503 }, { "epoch": 1.0129617627997407, "grad_norm": 0.03859379515051842, "learning_rate": 0.00016594806246905802, "loss": 0.2812, "step": 12504 }, { "epoch": 1.0130427738172392, "grad_norm": 0.03487183526158333, "learning_rate": 0.0001659435618164634, "loss": 0.3167, "step": 12505 }, { "epoch": 1.0131237848347374, "grad_norm": 0.03911701962351799, "learning_rate": 0.00016593906116386877, "loss": 0.334, "step": 12506 }, { "epoch": 1.0132047958522359, "grad_norm": 0.03974626958370209, "learning_rate": 0.00016593456051127413, "loss": 0.2963, "step": 12507 }, { "epoch": 1.0132858068697344, "grad_norm": 0.03957384079694748, "learning_rate": 0.00016593005985867951, "loss": 0.3501, "step": 12508 }, { "epoch": 1.0133668178872326, "grad_norm": 0.03603899106383324, "learning_rate": 0.00016592555920608487, "loss": 0.2898, "step": 12509 }, { "epoch": 1.013447828904731, "grad_norm": 0.03210967779159546, "learning_rate": 0.00016592105855349026, "loss": 0.3226, "step": 12510 }, { "epoch": 1.0135288399222295, "grad_norm": 0.03462260589003563, "learning_rate": 0.00016591655790089565, "loss": 0.3106, "step": 12511 }, { "epoch": 1.0136098509397278, "grad_norm": 0.03556382656097412, "learning_rate": 0.000165912057248301, "loss": 0.301, "step": 12512 }, { "epoch": 1.0136908619572262, "grad_norm": 0.03905484452843666, "learning_rate": 0.00016590755659570637, "loss": 0.2933, "step": 12513 }, { "epoch": 1.0137718729747245, "grad_norm": 0.03452567011117935, "learning_rate": 0.00016590305594311176, "loss": 0.3317, "step": 12514 }, { "epoch": 1.013852883992223, "grad_norm": 0.038224294781684875, "learning_rate": 0.00016589855529051714, "loss": 0.3469, "step": 12515 }, { "epoch": 1.0139338950097214, "grad_norm": 0.03787916898727417, "learning_rate": 0.0001658940546379225, "loss": 0.3032, "step": 12516 }, { "epoch": 1.0140149060272197, "grad_norm": 0.0391772985458374, "learning_rate": 0.0001658895539853279, "loss": 0.3541, "step": 12517 }, { "epoch": 1.0140959170447181, "grad_norm": 0.04080815985798836, "learning_rate": 0.00016588505333273325, "loss": 0.3605, "step": 12518 }, { "epoch": 1.0141769280622164, "grad_norm": 0.04079817235469818, "learning_rate": 0.0001658805526801386, "loss": 0.3003, "step": 12519 }, { "epoch": 1.0142579390797148, "grad_norm": 0.035111624747514725, "learning_rate": 0.000165876052027544, "loss": 0.3102, "step": 12520 }, { "epoch": 1.0143389500972133, "grad_norm": 0.03829184174537659, "learning_rate": 0.00016587155137494939, "loss": 0.3625, "step": 12521 }, { "epoch": 1.0144199611147116, "grad_norm": 0.039515420794487, "learning_rate": 0.00016586705072235475, "loss": 0.3352, "step": 12522 }, { "epoch": 1.01450097213221, "grad_norm": 0.035139210522174835, "learning_rate": 0.00016586255006976013, "loss": 0.2576, "step": 12523 }, { "epoch": 1.0145819831497083, "grad_norm": 0.04180475324392319, "learning_rate": 0.0001658580494171655, "loss": 0.3453, "step": 12524 }, { "epoch": 1.0146629941672067, "grad_norm": 0.039726074784994125, "learning_rate": 0.00016585354876457085, "loss": 0.3493, "step": 12525 }, { "epoch": 1.0147440051847052, "grad_norm": 0.03973781317472458, "learning_rate": 0.00016584904811197624, "loss": 0.3633, "step": 12526 }, { "epoch": 1.0148250162022034, "grad_norm": 0.03894086182117462, "learning_rate": 0.00016584454745938163, "loss": 0.3417, "step": 12527 }, { "epoch": 1.014906027219702, "grad_norm": 0.0374528244137764, "learning_rate": 0.000165840046806787, "loss": 0.3058, "step": 12528 }, { "epoch": 1.0149870382372002, "grad_norm": 0.038687460124492645, "learning_rate": 0.00016583554615419237, "loss": 0.3745, "step": 12529 }, { "epoch": 1.0150680492546986, "grad_norm": 0.05107789486646652, "learning_rate": 0.00016583104550159773, "loss": 0.3321, "step": 12530 }, { "epoch": 1.015149060272197, "grad_norm": 0.03942408785223961, "learning_rate": 0.0001658265448490031, "loss": 0.3309, "step": 12531 }, { "epoch": 1.0152300712896953, "grad_norm": 0.037755463272333145, "learning_rate": 0.00016582204419640848, "loss": 0.3305, "step": 12532 }, { "epoch": 1.0153110823071938, "grad_norm": 0.03136523813009262, "learning_rate": 0.00016581754354381387, "loss": 0.2528, "step": 12533 }, { "epoch": 1.0153920933246923, "grad_norm": 0.042500585317611694, "learning_rate": 0.00016581304289121923, "loss": 0.3954, "step": 12534 }, { "epoch": 1.0154731043421905, "grad_norm": 0.03316302224993706, "learning_rate": 0.00016580854223862462, "loss": 0.3228, "step": 12535 }, { "epoch": 1.015554115359689, "grad_norm": 0.03290242329239845, "learning_rate": 0.00016580404158602998, "loss": 0.2799, "step": 12536 }, { "epoch": 1.0156351263771872, "grad_norm": 0.043563112616539, "learning_rate": 0.00016579954093343534, "loss": 0.3374, "step": 12537 }, { "epoch": 1.0157161373946857, "grad_norm": 0.04143699258565903, "learning_rate": 0.00016579504028084072, "loss": 0.3435, "step": 12538 }, { "epoch": 1.0157971484121842, "grad_norm": 0.03597777336835861, "learning_rate": 0.0001657905396282461, "loss": 0.3279, "step": 12539 }, { "epoch": 1.0158781594296824, "grad_norm": 0.03355882689356804, "learning_rate": 0.00016578603897565147, "loss": 0.3118, "step": 12540 }, { "epoch": 1.0159591704471809, "grad_norm": 0.0339498370885849, "learning_rate": 0.00016578153832305686, "loss": 0.2899, "step": 12541 }, { "epoch": 1.0160401814646791, "grad_norm": 0.03829336538910866, "learning_rate": 0.00016577703767046222, "loss": 0.3292, "step": 12542 }, { "epoch": 1.0161211924821776, "grad_norm": 0.034184329211711884, "learning_rate": 0.00016577253701786758, "loss": 0.3025, "step": 12543 }, { "epoch": 1.016202203499676, "grad_norm": 0.03295601159334183, "learning_rate": 0.000165768036365273, "loss": 0.2878, "step": 12544 }, { "epoch": 1.0162832145171743, "grad_norm": 0.03263118490576744, "learning_rate": 0.00016576353571267835, "loss": 0.3457, "step": 12545 }, { "epoch": 1.0163642255346728, "grad_norm": 0.03363572061061859, "learning_rate": 0.0001657590350600837, "loss": 0.2976, "step": 12546 }, { "epoch": 1.016445236552171, "grad_norm": 0.037825170904397964, "learning_rate": 0.0001657545344074891, "loss": 0.3104, "step": 12547 }, { "epoch": 1.0165262475696695, "grad_norm": 0.04601071774959564, "learning_rate": 0.00016575003375489446, "loss": 0.3304, "step": 12548 }, { "epoch": 1.016607258587168, "grad_norm": 0.035257138311862946, "learning_rate": 0.00016574553310229982, "loss": 0.3397, "step": 12549 }, { "epoch": 1.0166882696046662, "grad_norm": 0.03136202692985535, "learning_rate": 0.00016574103244970523, "loss": 0.2956, "step": 12550 }, { "epoch": 1.0167692806221647, "grad_norm": 0.033236436545848846, "learning_rate": 0.0001657365317971106, "loss": 0.3206, "step": 12551 }, { "epoch": 1.016850291639663, "grad_norm": 0.03548416122794151, "learning_rate": 0.00016573203114451595, "loss": 0.3247, "step": 12552 }, { "epoch": 1.0169313026571614, "grad_norm": 0.036083072423934937, "learning_rate": 0.00016572753049192134, "loss": 0.3196, "step": 12553 }, { "epoch": 1.0170123136746598, "grad_norm": 0.03948529064655304, "learning_rate": 0.0001657230298393267, "loss": 0.3812, "step": 12554 }, { "epoch": 1.017093324692158, "grad_norm": 0.03718789666891098, "learning_rate": 0.0001657185291867321, "loss": 0.3371, "step": 12555 }, { "epoch": 1.0171743357096565, "grad_norm": 0.036488890647888184, "learning_rate": 0.00016571402853413747, "loss": 0.3426, "step": 12556 }, { "epoch": 1.0172553467271548, "grad_norm": 0.03828705847263336, "learning_rate": 0.00016570952788154283, "loss": 0.3322, "step": 12557 }, { "epoch": 1.0173363577446533, "grad_norm": 0.03848506510257721, "learning_rate": 0.0001657050272289482, "loss": 0.3401, "step": 12558 }, { "epoch": 1.0174173687621517, "grad_norm": 0.037985723465681076, "learning_rate": 0.00016570052657635358, "loss": 0.3494, "step": 12559 }, { "epoch": 1.01749837977965, "grad_norm": 0.03458062931895256, "learning_rate": 0.00016569602592375894, "loss": 0.3056, "step": 12560 }, { "epoch": 1.0175793907971484, "grad_norm": 0.037043794989585876, "learning_rate": 0.00016569152527116433, "loss": 0.3423, "step": 12561 }, { "epoch": 1.017660401814647, "grad_norm": 0.036882054060697556, "learning_rate": 0.00016568702461856972, "loss": 0.323, "step": 12562 }, { "epoch": 1.0177414128321451, "grad_norm": 0.036688271909952164, "learning_rate": 0.00016568252396597508, "loss": 0.3355, "step": 12563 }, { "epoch": 1.0178224238496436, "grad_norm": 0.03149713948369026, "learning_rate": 0.00016567802331338044, "loss": 0.2675, "step": 12564 }, { "epoch": 1.0179034348671419, "grad_norm": 0.035839568823575974, "learning_rate": 0.00016567352266078582, "loss": 0.3237, "step": 12565 }, { "epoch": 1.0179844458846403, "grad_norm": 0.036895159631967545, "learning_rate": 0.00016566902200819118, "loss": 0.3171, "step": 12566 }, { "epoch": 1.0180654569021388, "grad_norm": 0.033506982028484344, "learning_rate": 0.00016566452135559657, "loss": 0.304, "step": 12567 }, { "epoch": 1.018146467919637, "grad_norm": 0.04358444735407829, "learning_rate": 0.00016566002070300196, "loss": 0.3285, "step": 12568 }, { "epoch": 1.0182274789371355, "grad_norm": 0.03842248022556305, "learning_rate": 0.00016565552005040732, "loss": 0.3214, "step": 12569 }, { "epoch": 1.0183084899546337, "grad_norm": 0.03867550566792488, "learning_rate": 0.00016565101939781268, "loss": 0.3095, "step": 12570 }, { "epoch": 1.0183895009721322, "grad_norm": 0.037699684500694275, "learning_rate": 0.00016564651874521807, "loss": 0.3342, "step": 12571 }, { "epoch": 1.0184705119896307, "grad_norm": 0.033856455236673355, "learning_rate": 0.00016564201809262343, "loss": 0.2795, "step": 12572 }, { "epoch": 1.018551523007129, "grad_norm": 0.03416990861296654, "learning_rate": 0.0001656375174400288, "loss": 0.2877, "step": 12573 }, { "epoch": 1.0186325340246274, "grad_norm": 0.03922758251428604, "learning_rate": 0.0001656330167874342, "loss": 0.351, "step": 12574 }, { "epoch": 1.0187135450421256, "grad_norm": 0.03650622069835663, "learning_rate": 0.00016562851613483956, "loss": 0.3001, "step": 12575 }, { "epoch": 1.018794556059624, "grad_norm": 0.040765952318906784, "learning_rate": 0.00016562401548224492, "loss": 0.3258, "step": 12576 }, { "epoch": 1.0188755670771226, "grad_norm": 0.03666510432958603, "learning_rate": 0.0001656195148296503, "loss": 0.316, "step": 12577 }, { "epoch": 1.0189565780946208, "grad_norm": 0.0341981016099453, "learning_rate": 0.00016561501417705567, "loss": 0.331, "step": 12578 }, { "epoch": 1.0190375891121193, "grad_norm": 0.035523395985364914, "learning_rate": 0.00016561051352446105, "loss": 0.3054, "step": 12579 }, { "epoch": 1.0191186001296175, "grad_norm": 0.0412684865295887, "learning_rate": 0.00016560601287186644, "loss": 0.3498, "step": 12580 }, { "epoch": 1.019199611147116, "grad_norm": 0.04070408269762993, "learning_rate": 0.0001656015122192718, "loss": 0.3212, "step": 12581 }, { "epoch": 1.0192806221646145, "grad_norm": 0.03692404925823212, "learning_rate": 0.00016559701156667716, "loss": 0.2891, "step": 12582 }, { "epoch": 1.0193616331821127, "grad_norm": 0.037462033331394196, "learning_rate": 0.00016559251091408255, "loss": 0.3539, "step": 12583 }, { "epoch": 1.0194426441996112, "grad_norm": 0.03793095424771309, "learning_rate": 0.0001655880102614879, "loss": 0.3357, "step": 12584 }, { "epoch": 1.0195236552171094, "grad_norm": 0.04043465107679367, "learning_rate": 0.0001655835096088933, "loss": 0.3406, "step": 12585 }, { "epoch": 1.0196046662346079, "grad_norm": 0.03590738773345947, "learning_rate": 0.00016557900895629868, "loss": 0.3509, "step": 12586 }, { "epoch": 1.0196856772521063, "grad_norm": 0.04093816131353378, "learning_rate": 0.00016557450830370404, "loss": 0.3627, "step": 12587 }, { "epoch": 1.0197666882696046, "grad_norm": 0.04243616387248039, "learning_rate": 0.0001655700076511094, "loss": 0.3127, "step": 12588 }, { "epoch": 1.019847699287103, "grad_norm": 0.03815116733312607, "learning_rate": 0.0001655655069985148, "loss": 0.306, "step": 12589 }, { "epoch": 1.0199287103046015, "grad_norm": 0.03636857122182846, "learning_rate": 0.00016556100634592015, "loss": 0.3488, "step": 12590 }, { "epoch": 1.0200097213220998, "grad_norm": 0.03637069836258888, "learning_rate": 0.00016555650569332554, "loss": 0.3454, "step": 12591 }, { "epoch": 1.0200907323395982, "grad_norm": 0.04027386009693146, "learning_rate": 0.00016555200504073092, "loss": 0.3202, "step": 12592 }, { "epoch": 1.0201717433570965, "grad_norm": 0.029983578249812126, "learning_rate": 0.00016554750438813628, "loss": 0.2673, "step": 12593 }, { "epoch": 1.020252754374595, "grad_norm": 0.03252333030104637, "learning_rate": 0.00016554300373554164, "loss": 0.2997, "step": 12594 }, { "epoch": 1.0203337653920934, "grad_norm": 0.03685925528407097, "learning_rate": 0.00016553850308294703, "loss": 0.3156, "step": 12595 }, { "epoch": 1.0204147764095917, "grad_norm": 0.04175392538309097, "learning_rate": 0.00016553400243035242, "loss": 0.3562, "step": 12596 }, { "epoch": 1.0204957874270901, "grad_norm": 0.04049062356352806, "learning_rate": 0.00016552950177775778, "loss": 0.3742, "step": 12597 }, { "epoch": 1.0205767984445884, "grad_norm": 0.03493852540850639, "learning_rate": 0.00016552500112516317, "loss": 0.3111, "step": 12598 }, { "epoch": 1.0206578094620868, "grad_norm": 0.04160711541771889, "learning_rate": 0.00016552050047256853, "loss": 0.3593, "step": 12599 }, { "epoch": 1.0207388204795853, "grad_norm": 0.03934788703918457, "learning_rate": 0.00016551599981997389, "loss": 0.3271, "step": 12600 }, { "epoch": 1.0208198314970836, "grad_norm": 0.038866519927978516, "learning_rate": 0.00016551149916737927, "loss": 0.3023, "step": 12601 }, { "epoch": 1.020900842514582, "grad_norm": 0.04214729741215706, "learning_rate": 0.00016550699851478466, "loss": 0.346, "step": 12602 }, { "epoch": 1.0209818535320803, "grad_norm": 0.0387173555791378, "learning_rate": 0.00016550249786219002, "loss": 0.3135, "step": 12603 }, { "epoch": 1.0210628645495787, "grad_norm": 0.035196542739868164, "learning_rate": 0.0001654979972095954, "loss": 0.3233, "step": 12604 }, { "epoch": 1.0211438755670772, "grad_norm": 0.036050982773303986, "learning_rate": 0.00016549349655700077, "loss": 0.2754, "step": 12605 }, { "epoch": 1.0212248865845754, "grad_norm": 0.039573341608047485, "learning_rate": 0.00016548899590440613, "loss": 0.3403, "step": 12606 }, { "epoch": 1.021305897602074, "grad_norm": 0.034855302423238754, "learning_rate": 0.00016548449525181152, "loss": 0.3194, "step": 12607 }, { "epoch": 1.0213869086195722, "grad_norm": 0.042670026421546936, "learning_rate": 0.0001654799945992169, "loss": 0.3411, "step": 12608 }, { "epoch": 1.0214679196370706, "grad_norm": 0.03425101190805435, "learning_rate": 0.00016547549394662226, "loss": 0.294, "step": 12609 }, { "epoch": 1.021548930654569, "grad_norm": 0.035355765372514725, "learning_rate": 0.00016547099329402765, "loss": 0.3115, "step": 12610 }, { "epoch": 1.0216299416720673, "grad_norm": 0.043876733630895615, "learning_rate": 0.000165466492641433, "loss": 0.3033, "step": 12611 }, { "epoch": 1.0217109526895658, "grad_norm": 0.04104451835155487, "learning_rate": 0.00016546199198883837, "loss": 0.3531, "step": 12612 }, { "epoch": 1.0217919637070643, "grad_norm": 0.038133054971694946, "learning_rate": 0.00016545749133624376, "loss": 0.3153, "step": 12613 }, { "epoch": 1.0218729747245625, "grad_norm": 0.039468154311180115, "learning_rate": 0.00016545299068364914, "loss": 0.3387, "step": 12614 }, { "epoch": 1.021953985742061, "grad_norm": 0.04036974534392357, "learning_rate": 0.0001654484900310545, "loss": 0.3256, "step": 12615 }, { "epoch": 1.0220349967595592, "grad_norm": 0.03471231833100319, "learning_rate": 0.0001654439893784599, "loss": 0.2947, "step": 12616 }, { "epoch": 1.0221160077770577, "grad_norm": 0.04138367995619774, "learning_rate": 0.00016543948872586525, "loss": 0.3119, "step": 12617 }, { "epoch": 1.0221970187945562, "grad_norm": 0.04733224958181381, "learning_rate": 0.00016543498807327064, "loss": 0.3432, "step": 12618 }, { "epoch": 1.0222780298120544, "grad_norm": 0.03255488723516464, "learning_rate": 0.00016543048742067603, "loss": 0.2945, "step": 12619 }, { "epoch": 1.0223590408295529, "grad_norm": 0.04019397124648094, "learning_rate": 0.00016542598676808139, "loss": 0.3161, "step": 12620 }, { "epoch": 1.0224400518470511, "grad_norm": 0.03219766542315483, "learning_rate": 0.00016542148611548675, "loss": 0.2848, "step": 12621 }, { "epoch": 1.0225210628645496, "grad_norm": 0.041206423193216324, "learning_rate": 0.00016541698546289213, "loss": 0.3047, "step": 12622 }, { "epoch": 1.022602073882048, "grad_norm": 0.043796516954898834, "learning_rate": 0.0001654124848102975, "loss": 0.3757, "step": 12623 }, { "epoch": 1.0226830848995463, "grad_norm": 0.038199108093976974, "learning_rate": 0.00016540798415770288, "loss": 0.3825, "step": 12624 }, { "epoch": 1.0227640959170448, "grad_norm": 0.03842081502079964, "learning_rate": 0.00016540348350510827, "loss": 0.3104, "step": 12625 }, { "epoch": 1.022845106934543, "grad_norm": 0.03330446034669876, "learning_rate": 0.00016539898285251363, "loss": 0.2845, "step": 12626 }, { "epoch": 1.0229261179520415, "grad_norm": 0.03181296959519386, "learning_rate": 0.000165394482199919, "loss": 0.299, "step": 12627 }, { "epoch": 1.02300712896954, "grad_norm": 0.03798550367355347, "learning_rate": 0.00016538998154732437, "loss": 0.3271, "step": 12628 }, { "epoch": 1.0230881399870382, "grad_norm": 0.035294223576784134, "learning_rate": 0.00016538548089472973, "loss": 0.2878, "step": 12629 }, { "epoch": 1.0231691510045366, "grad_norm": 0.04210999608039856, "learning_rate": 0.00016538098024213512, "loss": 0.3353, "step": 12630 }, { "epoch": 1.023250162022035, "grad_norm": 0.039759013801813126, "learning_rate": 0.0001653764795895405, "loss": 0.4063, "step": 12631 }, { "epoch": 1.0233311730395334, "grad_norm": 0.036347441375255585, "learning_rate": 0.00016537197893694587, "loss": 0.3421, "step": 12632 }, { "epoch": 1.0234121840570318, "grad_norm": 0.032233960926532745, "learning_rate": 0.00016536747828435123, "loss": 0.291, "step": 12633 }, { "epoch": 1.02349319507453, "grad_norm": 0.030994798988103867, "learning_rate": 0.00016536297763175662, "loss": 0.2779, "step": 12634 }, { "epoch": 1.0235742060920285, "grad_norm": 0.03740865737199783, "learning_rate": 0.00016535847697916198, "loss": 0.3453, "step": 12635 }, { "epoch": 1.023655217109527, "grad_norm": 0.03850658982992172, "learning_rate": 0.00016535397632656736, "loss": 0.321, "step": 12636 }, { "epoch": 1.0237362281270252, "grad_norm": 0.03866725414991379, "learning_rate": 0.00016534947567397275, "loss": 0.3467, "step": 12637 }, { "epoch": 1.0238172391445237, "grad_norm": 0.04454237222671509, "learning_rate": 0.0001653449750213781, "loss": 0.3599, "step": 12638 }, { "epoch": 1.023898250162022, "grad_norm": 0.036081261932849884, "learning_rate": 0.00016534047436878347, "loss": 0.2961, "step": 12639 }, { "epoch": 1.0239792611795204, "grad_norm": 0.03776612877845764, "learning_rate": 0.00016533597371618886, "loss": 0.338, "step": 12640 }, { "epoch": 1.024060272197019, "grad_norm": 0.0352783240377903, "learning_rate": 0.00016533147306359422, "loss": 0.3215, "step": 12641 }, { "epoch": 1.0241412832145171, "grad_norm": 0.03597136586904526, "learning_rate": 0.0001653269724109996, "loss": 0.3075, "step": 12642 }, { "epoch": 1.0242222942320156, "grad_norm": 0.037728115916252136, "learning_rate": 0.000165322471758405, "loss": 0.3122, "step": 12643 }, { "epoch": 1.0243033052495139, "grad_norm": 0.039568349719047546, "learning_rate": 0.00016531797110581035, "loss": 0.32, "step": 12644 }, { "epoch": 1.0243843162670123, "grad_norm": 0.039027776569128036, "learning_rate": 0.0001653134704532157, "loss": 0.3213, "step": 12645 }, { "epoch": 1.0244653272845108, "grad_norm": 0.03547472506761551, "learning_rate": 0.0001653089698006211, "loss": 0.3103, "step": 12646 }, { "epoch": 1.024546338302009, "grad_norm": 0.03676692023873329, "learning_rate": 0.00016530446914802646, "loss": 0.3289, "step": 12647 }, { "epoch": 1.0246273493195075, "grad_norm": 0.04158513993024826, "learning_rate": 0.00016529996849543185, "loss": 0.3409, "step": 12648 }, { "epoch": 1.0247083603370057, "grad_norm": 0.040971919894218445, "learning_rate": 0.00016529546784283723, "loss": 0.3207, "step": 12649 }, { "epoch": 1.0247893713545042, "grad_norm": 0.03509372100234032, "learning_rate": 0.0001652909671902426, "loss": 0.3189, "step": 12650 }, { "epoch": 1.0248703823720027, "grad_norm": 0.043991539627313614, "learning_rate": 0.00016528646653764795, "loss": 0.3416, "step": 12651 }, { "epoch": 1.024951393389501, "grad_norm": 0.0418696291744709, "learning_rate": 0.00016528196588505334, "loss": 0.3327, "step": 12652 }, { "epoch": 1.0250324044069994, "grad_norm": 0.03739069774746895, "learning_rate": 0.0001652774652324587, "loss": 0.3865, "step": 12653 }, { "epoch": 1.0251134154244976, "grad_norm": 0.03675004467368126, "learning_rate": 0.0001652729645798641, "loss": 0.3498, "step": 12654 }, { "epoch": 1.025194426441996, "grad_norm": 0.03807086870074272, "learning_rate": 0.00016526846392726948, "loss": 0.331, "step": 12655 }, { "epoch": 1.0252754374594946, "grad_norm": 0.03432943671941757, "learning_rate": 0.00016526396327467484, "loss": 0.3042, "step": 12656 }, { "epoch": 1.0253564484769928, "grad_norm": 0.03586483374238014, "learning_rate": 0.0001652594626220802, "loss": 0.2827, "step": 12657 }, { "epoch": 1.0254374594944913, "grad_norm": 0.03931160271167755, "learning_rate": 0.00016525496196948558, "loss": 0.3446, "step": 12658 }, { "epoch": 1.0255184705119895, "grad_norm": 0.036072421818971634, "learning_rate": 0.00016525046131689094, "loss": 0.3451, "step": 12659 }, { "epoch": 1.025599481529488, "grad_norm": 0.04291589930653572, "learning_rate": 0.00016524596066429633, "loss": 0.3378, "step": 12660 }, { "epoch": 1.0256804925469865, "grad_norm": 0.03836192935705185, "learning_rate": 0.00016524146001170172, "loss": 0.3498, "step": 12661 }, { "epoch": 1.0257615035644847, "grad_norm": 0.038849957287311554, "learning_rate": 0.00016523695935910708, "loss": 0.3155, "step": 12662 }, { "epoch": 1.0258425145819832, "grad_norm": 0.035676270723342896, "learning_rate": 0.00016523245870651244, "loss": 0.3029, "step": 12663 }, { "epoch": 1.0259235255994816, "grad_norm": 0.04012364521622658, "learning_rate": 0.00016522795805391782, "loss": 0.3858, "step": 12664 }, { "epoch": 1.0260045366169799, "grad_norm": 0.037432339042425156, "learning_rate": 0.00016522345740132318, "loss": 0.322, "step": 12665 }, { "epoch": 1.0260855476344783, "grad_norm": 0.036835625767707825, "learning_rate": 0.00016521895674872857, "loss": 0.3359, "step": 12666 }, { "epoch": 1.0261665586519766, "grad_norm": 0.03636383265256882, "learning_rate": 0.00016521445609613396, "loss": 0.2959, "step": 12667 }, { "epoch": 1.026247569669475, "grad_norm": 0.03819280117750168, "learning_rate": 0.00016520995544353932, "loss": 0.2792, "step": 12668 }, { "epoch": 1.0263285806869735, "grad_norm": 0.039029382169246674, "learning_rate": 0.00016520545479094468, "loss": 0.3051, "step": 12669 }, { "epoch": 1.0264095917044718, "grad_norm": 0.03400282561779022, "learning_rate": 0.00016520095413835007, "loss": 0.2967, "step": 12670 }, { "epoch": 1.0264906027219702, "grad_norm": 0.03828096762299538, "learning_rate": 0.00016519645348575545, "loss": 0.3152, "step": 12671 }, { "epoch": 1.0265716137394685, "grad_norm": 0.039998847991228104, "learning_rate": 0.0001651919528331608, "loss": 0.3238, "step": 12672 }, { "epoch": 1.026652624756967, "grad_norm": 0.03841540217399597, "learning_rate": 0.0001651874521805662, "loss": 0.3157, "step": 12673 }, { "epoch": 1.0267336357744654, "grad_norm": 0.03829701989889145, "learning_rate": 0.00016518295152797156, "loss": 0.3399, "step": 12674 }, { "epoch": 1.0268146467919637, "grad_norm": 0.03337425738573074, "learning_rate": 0.00016517845087537692, "loss": 0.3033, "step": 12675 }, { "epoch": 1.0268956578094621, "grad_norm": 0.03805439919233322, "learning_rate": 0.0001651739502227823, "loss": 0.3487, "step": 12676 }, { "epoch": 1.0269766688269604, "grad_norm": 0.039699751883745193, "learning_rate": 0.0001651694495701877, "loss": 0.324, "step": 12677 }, { "epoch": 1.0270576798444588, "grad_norm": 0.03741767629981041, "learning_rate": 0.00016516494891759305, "loss": 0.297, "step": 12678 }, { "epoch": 1.0271386908619573, "grad_norm": 0.03826878219842911, "learning_rate": 0.00016516044826499844, "loss": 0.3311, "step": 12679 }, { "epoch": 1.0272197018794555, "grad_norm": 0.041468605399131775, "learning_rate": 0.0001651559476124038, "loss": 0.3181, "step": 12680 }, { "epoch": 1.027300712896954, "grad_norm": 0.03656802698969841, "learning_rate": 0.00016515144695980916, "loss": 0.3039, "step": 12681 }, { "epoch": 1.0273817239144523, "grad_norm": 0.03783682361245155, "learning_rate": 0.00016514694630721455, "loss": 0.3318, "step": 12682 }, { "epoch": 1.0274627349319507, "grad_norm": 0.042352866381406784, "learning_rate": 0.00016514244565461994, "loss": 0.3418, "step": 12683 }, { "epoch": 1.0275437459494492, "grad_norm": 0.039490267634391785, "learning_rate": 0.0001651379450020253, "loss": 0.3186, "step": 12684 }, { "epoch": 1.0276247569669474, "grad_norm": 0.04535860940814018, "learning_rate": 0.00016513344434943068, "loss": 0.3763, "step": 12685 }, { "epoch": 1.027705767984446, "grad_norm": 0.039086759090423584, "learning_rate": 0.00016512894369683604, "loss": 0.3221, "step": 12686 }, { "epoch": 1.0277867790019444, "grad_norm": 0.03470296785235405, "learning_rate": 0.00016512444304424143, "loss": 0.2955, "step": 12687 }, { "epoch": 1.0278677900194426, "grad_norm": 0.03623901680111885, "learning_rate": 0.0001651199423916468, "loss": 0.3199, "step": 12688 }, { "epoch": 1.027948801036941, "grad_norm": 0.03384559229016304, "learning_rate": 0.00016511544173905218, "loss": 0.3335, "step": 12689 }, { "epoch": 1.0280298120544393, "grad_norm": 0.03289582580327988, "learning_rate": 0.00016511094108645754, "loss": 0.3128, "step": 12690 }, { "epoch": 1.0281108230719378, "grad_norm": 0.042164016515016556, "learning_rate": 0.00016510644043386292, "loss": 0.3708, "step": 12691 }, { "epoch": 1.0281918340894363, "grad_norm": 0.035185497254133224, "learning_rate": 0.00016510193978126828, "loss": 0.3108, "step": 12692 }, { "epoch": 1.0282728451069345, "grad_norm": 0.039614636451005936, "learning_rate": 0.00016509743912867367, "loss": 0.3313, "step": 12693 }, { "epoch": 1.028353856124433, "grad_norm": 0.03454237058758736, "learning_rate": 0.00016509293847607903, "loss": 0.3086, "step": 12694 }, { "epoch": 1.0284348671419312, "grad_norm": 0.043382588773965836, "learning_rate": 0.00016508843782348442, "loss": 0.368, "step": 12695 }, { "epoch": 1.0285158781594297, "grad_norm": 0.034158188849687576, "learning_rate": 0.00016508393717088978, "loss": 0.2759, "step": 12696 }, { "epoch": 1.0285968891769282, "grad_norm": 0.03920743614435196, "learning_rate": 0.00016507943651829517, "loss": 0.3234, "step": 12697 }, { "epoch": 1.0286779001944264, "grad_norm": 0.03635401278734207, "learning_rate": 0.00016507493586570053, "loss": 0.3071, "step": 12698 }, { "epoch": 1.0287589112119249, "grad_norm": 0.037762533873319626, "learning_rate": 0.00016507043521310591, "loss": 0.3268, "step": 12699 }, { "epoch": 1.028839922229423, "grad_norm": 0.034993529319763184, "learning_rate": 0.0001650659345605113, "loss": 0.2665, "step": 12700 }, { "epoch": 1.0289209332469216, "grad_norm": 0.03569914773106575, "learning_rate": 0.00016506143390791666, "loss": 0.2778, "step": 12701 }, { "epoch": 1.02900194426442, "grad_norm": 0.03663048893213272, "learning_rate": 0.00016505693325532202, "loss": 0.3148, "step": 12702 }, { "epoch": 1.0290829552819183, "grad_norm": 0.03235512226819992, "learning_rate": 0.0001650524326027274, "loss": 0.2983, "step": 12703 }, { "epoch": 1.0291639662994168, "grad_norm": 0.03489063307642937, "learning_rate": 0.00016504793195013277, "loss": 0.3131, "step": 12704 }, { "epoch": 1.029244977316915, "grad_norm": 0.03714337944984436, "learning_rate": 0.00016504343129753816, "loss": 0.3165, "step": 12705 }, { "epoch": 1.0293259883344135, "grad_norm": 0.032778069376945496, "learning_rate": 0.00016503893064494354, "loss": 0.2898, "step": 12706 }, { "epoch": 1.029406999351912, "grad_norm": 0.04097514972090721, "learning_rate": 0.0001650344299923489, "loss": 0.3605, "step": 12707 }, { "epoch": 1.0294880103694102, "grad_norm": 0.03973684087395668, "learning_rate": 0.00016502992933975426, "loss": 0.3336, "step": 12708 }, { "epoch": 1.0295690213869086, "grad_norm": 0.03882990777492523, "learning_rate": 0.00016502542868715965, "loss": 0.3164, "step": 12709 }, { "epoch": 1.029650032404407, "grad_norm": 0.03919418901205063, "learning_rate": 0.000165020928034565, "loss": 0.3582, "step": 12710 }, { "epoch": 1.0297310434219054, "grad_norm": 0.03751210868358612, "learning_rate": 0.0001650164273819704, "loss": 0.2969, "step": 12711 }, { "epoch": 1.0298120544394038, "grad_norm": 0.034326836466789246, "learning_rate": 0.00016501192672937578, "loss": 0.3231, "step": 12712 }, { "epoch": 1.029893065456902, "grad_norm": 0.02914053201675415, "learning_rate": 0.00016500742607678114, "loss": 0.2834, "step": 12713 }, { "epoch": 1.0299740764744005, "grad_norm": 0.036301035434007645, "learning_rate": 0.0001650029254241865, "loss": 0.3006, "step": 12714 }, { "epoch": 1.030055087491899, "grad_norm": 0.03592952340841293, "learning_rate": 0.0001649984247715919, "loss": 0.307, "step": 12715 }, { "epoch": 1.0301360985093972, "grad_norm": 0.04360431432723999, "learning_rate": 0.00016499392411899725, "loss": 0.3239, "step": 12716 }, { "epoch": 1.0302171095268957, "grad_norm": 0.036142975091934204, "learning_rate": 0.00016498942346640264, "loss": 0.3437, "step": 12717 }, { "epoch": 1.030298120544394, "grad_norm": 0.03672938793897629, "learning_rate": 0.00016498492281380803, "loss": 0.313, "step": 12718 }, { "epoch": 1.0303791315618924, "grad_norm": 0.033051345497369766, "learning_rate": 0.00016498042216121339, "loss": 0.2892, "step": 12719 }, { "epoch": 1.030460142579391, "grad_norm": 0.036963798105716705, "learning_rate": 0.00016497592150861875, "loss": 0.2768, "step": 12720 }, { "epoch": 1.0305411535968891, "grad_norm": 0.03400468826293945, "learning_rate": 0.00016497142085602413, "loss": 0.2884, "step": 12721 }, { "epoch": 1.0306221646143876, "grad_norm": 0.03542281687259674, "learning_rate": 0.0001649669202034295, "loss": 0.3374, "step": 12722 }, { "epoch": 1.0307031756318858, "grad_norm": 0.03506822884082794, "learning_rate": 0.00016496241955083488, "loss": 0.3056, "step": 12723 }, { "epoch": 1.0307841866493843, "grad_norm": 0.039111554622650146, "learning_rate": 0.00016495791889824027, "loss": 0.3104, "step": 12724 }, { "epoch": 1.0308651976668828, "grad_norm": 0.03314598277211189, "learning_rate": 0.00016495341824564563, "loss": 0.296, "step": 12725 }, { "epoch": 1.030946208684381, "grad_norm": 0.03986050933599472, "learning_rate": 0.000164948917593051, "loss": 0.3249, "step": 12726 }, { "epoch": 1.0310272197018795, "grad_norm": 0.040087323635816574, "learning_rate": 0.00016494441694045637, "loss": 0.303, "step": 12727 }, { "epoch": 1.0311082307193777, "grad_norm": 0.03449118509888649, "learning_rate": 0.00016493991628786173, "loss": 0.3083, "step": 12728 }, { "epoch": 1.0311892417368762, "grad_norm": 0.04075778275728226, "learning_rate": 0.00016493541563526712, "loss": 0.3229, "step": 12729 }, { "epoch": 1.0312702527543747, "grad_norm": 0.03875141218304634, "learning_rate": 0.0001649309149826725, "loss": 0.342, "step": 12730 }, { "epoch": 1.031351263771873, "grad_norm": 0.03614082932472229, "learning_rate": 0.00016492641433007787, "loss": 0.3056, "step": 12731 }, { "epoch": 1.0314322747893714, "grad_norm": 0.03601795807480812, "learning_rate": 0.00016492191367748323, "loss": 0.3039, "step": 12732 }, { "epoch": 1.0315132858068696, "grad_norm": 0.0373661145567894, "learning_rate": 0.00016491741302488862, "loss": 0.3006, "step": 12733 }, { "epoch": 1.031594296824368, "grad_norm": 0.043441496789455414, "learning_rate": 0.00016491291237229398, "loss": 0.3699, "step": 12734 }, { "epoch": 1.0316753078418666, "grad_norm": 0.04167543724179268, "learning_rate": 0.00016490841171969936, "loss": 0.3815, "step": 12735 }, { "epoch": 1.0317563188593648, "grad_norm": 0.03903650864958763, "learning_rate": 0.00016490391106710475, "loss": 0.3061, "step": 12736 }, { "epoch": 1.0318373298768633, "grad_norm": 0.03695125877857208, "learning_rate": 0.0001648994104145101, "loss": 0.3025, "step": 12737 }, { "epoch": 1.0319183408943617, "grad_norm": 0.04857954382896423, "learning_rate": 0.00016489490976191547, "loss": 0.2771, "step": 12738 }, { "epoch": 1.03199935191186, "grad_norm": 0.03504607826471329, "learning_rate": 0.00016489040910932086, "loss": 0.2675, "step": 12739 }, { "epoch": 1.0320803629293585, "grad_norm": 0.03955131024122238, "learning_rate": 0.00016488590845672622, "loss": 0.2919, "step": 12740 }, { "epoch": 1.0321613739468567, "grad_norm": 0.03247314691543579, "learning_rate": 0.0001648814078041316, "loss": 0.3099, "step": 12741 }, { "epoch": 1.0322423849643552, "grad_norm": 0.03682674840092659, "learning_rate": 0.000164876907151537, "loss": 0.3116, "step": 12742 }, { "epoch": 1.0323233959818536, "grad_norm": 0.036403726786375046, "learning_rate": 0.00016487240649894235, "loss": 0.2985, "step": 12743 }, { "epoch": 1.0324044069993519, "grad_norm": 0.03299999237060547, "learning_rate": 0.0001648679058463477, "loss": 0.2904, "step": 12744 }, { "epoch": 1.0324854180168503, "grad_norm": 0.04140744358301163, "learning_rate": 0.0001648634051937531, "loss": 0.3295, "step": 12745 }, { "epoch": 1.0325664290343486, "grad_norm": 0.03611183539032936, "learning_rate": 0.00016485890454115846, "loss": 0.3071, "step": 12746 }, { "epoch": 1.032647440051847, "grad_norm": 0.04135493189096451, "learning_rate": 0.00016485440388856385, "loss": 0.3406, "step": 12747 }, { "epoch": 1.0327284510693455, "grad_norm": 0.03947634994983673, "learning_rate": 0.00016484990323596923, "loss": 0.3157, "step": 12748 }, { "epoch": 1.0328094620868438, "grad_norm": 0.03282932937145233, "learning_rate": 0.0001648454025833746, "loss": 0.2609, "step": 12749 }, { "epoch": 1.0328904731043422, "grad_norm": 0.04158890247344971, "learning_rate": 0.00016484090193077995, "loss": 0.3281, "step": 12750 }, { "epoch": 1.0329714841218405, "grad_norm": 0.0459459125995636, "learning_rate": 0.00016483640127818534, "loss": 0.318, "step": 12751 }, { "epoch": 1.033052495139339, "grad_norm": 0.04458964243531227, "learning_rate": 0.00016483190062559073, "loss": 0.2824, "step": 12752 }, { "epoch": 1.0331335061568374, "grad_norm": 0.03444017842411995, "learning_rate": 0.0001648273999729961, "loss": 0.2451, "step": 12753 }, { "epoch": 1.0332145171743357, "grad_norm": 0.037729572504758835, "learning_rate": 0.00016482289932040148, "loss": 0.2816, "step": 12754 }, { "epoch": 1.0332955281918341, "grad_norm": 0.03754454106092453, "learning_rate": 0.00016481839866780684, "loss": 0.325, "step": 12755 }, { "epoch": 1.0333765392093324, "grad_norm": 0.035541508346796036, "learning_rate": 0.00016481389801521222, "loss": 0.3064, "step": 12756 }, { "epoch": 1.0334575502268308, "grad_norm": 0.040059611201286316, "learning_rate": 0.00016480939736261758, "loss": 0.3018, "step": 12757 }, { "epoch": 1.0335385612443293, "grad_norm": 0.034477706998586655, "learning_rate": 0.00016480489671002297, "loss": 0.2846, "step": 12758 }, { "epoch": 1.0336195722618275, "grad_norm": 0.0396190881729126, "learning_rate": 0.00016480039605742833, "loss": 0.3092, "step": 12759 }, { "epoch": 1.033700583279326, "grad_norm": 0.03748718649148941, "learning_rate": 0.00016479589540483372, "loss": 0.2901, "step": 12760 }, { "epoch": 1.0337815942968245, "grad_norm": 0.03152185305953026, "learning_rate": 0.00016479139475223908, "loss": 0.2671, "step": 12761 }, { "epoch": 1.0338626053143227, "grad_norm": 0.038291048258543015, "learning_rate": 0.00016478689409964446, "loss": 0.3398, "step": 12762 }, { "epoch": 1.0339436163318212, "grad_norm": 0.037285178899765015, "learning_rate": 0.00016478239344704982, "loss": 0.3386, "step": 12763 }, { "epoch": 1.0340246273493194, "grad_norm": 0.041180677711963654, "learning_rate": 0.0001647778927944552, "loss": 0.3311, "step": 12764 }, { "epoch": 1.034105638366818, "grad_norm": 0.044005073606967926, "learning_rate": 0.00016477339214186057, "loss": 0.3778, "step": 12765 }, { "epoch": 1.0341866493843164, "grad_norm": 0.036691803485155106, "learning_rate": 0.00016476889148926596, "loss": 0.2971, "step": 12766 }, { "epoch": 1.0342676604018146, "grad_norm": 0.03426551818847656, "learning_rate": 0.00016476439083667132, "loss": 0.3008, "step": 12767 }, { "epoch": 1.034348671419313, "grad_norm": 0.03679486736655235, "learning_rate": 0.0001647598901840767, "loss": 0.3064, "step": 12768 }, { "epoch": 1.0344296824368113, "grad_norm": 0.04301043227314949, "learning_rate": 0.00016475538953148207, "loss": 0.3404, "step": 12769 }, { "epoch": 1.0345106934543098, "grad_norm": 0.03880010172724724, "learning_rate": 0.00016475088887888745, "loss": 0.3673, "step": 12770 }, { "epoch": 1.0345917044718083, "grad_norm": 0.04446203634142876, "learning_rate": 0.0001647463882262928, "loss": 0.3706, "step": 12771 }, { "epoch": 1.0346727154893065, "grad_norm": 0.03888606280088425, "learning_rate": 0.0001647418875736982, "loss": 0.3157, "step": 12772 }, { "epoch": 1.034753726506805, "grad_norm": 0.03600630164146423, "learning_rate": 0.00016473738692110356, "loss": 0.3125, "step": 12773 }, { "epoch": 1.0348347375243032, "grad_norm": 0.035312626510858536, "learning_rate": 0.00016473288626850895, "loss": 0.2885, "step": 12774 }, { "epoch": 1.0349157485418017, "grad_norm": 0.033152591437101364, "learning_rate": 0.0001647283856159143, "loss": 0.2872, "step": 12775 }, { "epoch": 1.0349967595593002, "grad_norm": 0.040598925203084946, "learning_rate": 0.0001647238849633197, "loss": 0.359, "step": 12776 }, { "epoch": 1.0350777705767984, "grad_norm": 0.03144126012921333, "learning_rate": 0.00016471938431072505, "loss": 0.2809, "step": 12777 }, { "epoch": 1.0351587815942969, "grad_norm": 0.0429706797003746, "learning_rate": 0.00016471488365813044, "loss": 0.3573, "step": 12778 }, { "epoch": 1.035239792611795, "grad_norm": 0.04190703108906746, "learning_rate": 0.0001647103830055358, "loss": 0.3757, "step": 12779 }, { "epoch": 1.0353208036292936, "grad_norm": 0.03837323933839798, "learning_rate": 0.0001647058823529412, "loss": 0.3048, "step": 12780 }, { "epoch": 1.035401814646792, "grad_norm": 0.037326592952013016, "learning_rate": 0.00016470138170034658, "loss": 0.2965, "step": 12781 }, { "epoch": 1.0354828256642903, "grad_norm": 0.04110584780573845, "learning_rate": 0.00016469688104775194, "loss": 0.2806, "step": 12782 }, { "epoch": 1.0355638366817888, "grad_norm": 0.04354546591639519, "learning_rate": 0.0001646923803951573, "loss": 0.3752, "step": 12783 }, { "epoch": 1.035644847699287, "grad_norm": 0.03725029155611992, "learning_rate": 0.00016468787974256268, "loss": 0.3183, "step": 12784 }, { "epoch": 1.0357258587167855, "grad_norm": 0.03914825618267059, "learning_rate": 0.00016468337908996804, "loss": 0.3576, "step": 12785 }, { "epoch": 1.035806869734284, "grad_norm": 0.03538013994693756, "learning_rate": 0.00016467887843737343, "loss": 0.3384, "step": 12786 }, { "epoch": 1.0358878807517822, "grad_norm": 0.040092017501592636, "learning_rate": 0.00016467437778477882, "loss": 0.3358, "step": 12787 }, { "epoch": 1.0359688917692806, "grad_norm": 0.03664885461330414, "learning_rate": 0.00016466987713218418, "loss": 0.3106, "step": 12788 }, { "epoch": 1.036049902786779, "grad_norm": 0.03369804471731186, "learning_rate": 0.00016466537647958954, "loss": 0.3123, "step": 12789 }, { "epoch": 1.0361309138042774, "grad_norm": 0.03672627732157707, "learning_rate": 0.00016466087582699493, "loss": 0.3299, "step": 12790 }, { "epoch": 1.0362119248217758, "grad_norm": 0.03600330650806427, "learning_rate": 0.00016465637517440029, "loss": 0.3311, "step": 12791 }, { "epoch": 1.036292935839274, "grad_norm": 0.03647133335471153, "learning_rate": 0.00016465187452180567, "loss": 0.3326, "step": 12792 }, { "epoch": 1.0363739468567725, "grad_norm": 0.03292451798915863, "learning_rate": 0.00016464737386921106, "loss": 0.2895, "step": 12793 }, { "epoch": 1.036454957874271, "grad_norm": 0.03374708443880081, "learning_rate": 0.00016464287321661642, "loss": 0.315, "step": 12794 }, { "epoch": 1.0365359688917692, "grad_norm": 0.043175119906663895, "learning_rate": 0.00016463837256402178, "loss": 0.387, "step": 12795 }, { "epoch": 1.0366169799092677, "grad_norm": 0.03675583750009537, "learning_rate": 0.00016463387191142717, "loss": 0.3378, "step": 12796 }, { "epoch": 1.036697990926766, "grad_norm": 0.036697644740343094, "learning_rate": 0.00016462937125883253, "loss": 0.3323, "step": 12797 }, { "epoch": 1.0367790019442644, "grad_norm": 0.036025747656822205, "learning_rate": 0.00016462487060623791, "loss": 0.3212, "step": 12798 }, { "epoch": 1.036860012961763, "grad_norm": 0.04477591812610626, "learning_rate": 0.0001646203699536433, "loss": 0.3703, "step": 12799 }, { "epoch": 1.0369410239792611, "grad_norm": 0.03663002327084541, "learning_rate": 0.00016461586930104866, "loss": 0.2979, "step": 12800 }, { "epoch": 1.0370220349967596, "grad_norm": 0.031259819865226746, "learning_rate": 0.00016461136864845402, "loss": 0.3268, "step": 12801 }, { "epoch": 1.0371030460142578, "grad_norm": 0.03488311171531677, "learning_rate": 0.0001646068679958594, "loss": 0.3042, "step": 12802 }, { "epoch": 1.0371840570317563, "grad_norm": 0.03731519356369972, "learning_rate": 0.00016460236734326477, "loss": 0.2905, "step": 12803 }, { "epoch": 1.0372650680492548, "grad_norm": 0.034211575984954834, "learning_rate": 0.00016459786669067016, "loss": 0.2775, "step": 12804 }, { "epoch": 1.037346079066753, "grad_norm": 0.03737207502126694, "learning_rate": 0.00016459336603807554, "loss": 0.3309, "step": 12805 }, { "epoch": 1.0374270900842515, "grad_norm": 0.0373041145503521, "learning_rate": 0.0001645888653854809, "loss": 0.3127, "step": 12806 }, { "epoch": 1.0375081011017497, "grad_norm": 0.03419501706957817, "learning_rate": 0.00016458436473288626, "loss": 0.3012, "step": 12807 }, { "epoch": 1.0375891121192482, "grad_norm": 0.03152981400489807, "learning_rate": 0.00016457986408029165, "loss": 0.2665, "step": 12808 }, { "epoch": 1.0376701231367467, "grad_norm": 0.03565287962555885, "learning_rate": 0.000164575363427697, "loss": 0.2807, "step": 12809 }, { "epoch": 1.037751134154245, "grad_norm": 0.03869590908288956, "learning_rate": 0.0001645708627751024, "loss": 0.3173, "step": 12810 }, { "epoch": 1.0378321451717434, "grad_norm": 0.03857014328241348, "learning_rate": 0.00016456636212250778, "loss": 0.332, "step": 12811 }, { "epoch": 1.0379131561892416, "grad_norm": 0.035806186497211456, "learning_rate": 0.00016456186146991314, "loss": 0.3253, "step": 12812 }, { "epoch": 1.03799416720674, "grad_norm": 0.043626558035612106, "learning_rate": 0.0001645573608173185, "loss": 0.3661, "step": 12813 }, { "epoch": 1.0380751782242386, "grad_norm": 0.04251417517662048, "learning_rate": 0.0001645528601647239, "loss": 0.326, "step": 12814 }, { "epoch": 1.0381561892417368, "grad_norm": 0.035238828510046005, "learning_rate": 0.00016454835951212925, "loss": 0.2567, "step": 12815 }, { "epoch": 1.0382372002592353, "grad_norm": 0.03851184621453285, "learning_rate": 0.00016454385885953464, "loss": 0.276, "step": 12816 }, { "epoch": 1.0383182112767337, "grad_norm": 0.03683389723300934, "learning_rate": 0.00016453935820694003, "loss": 0.3146, "step": 12817 }, { "epoch": 1.038399222294232, "grad_norm": 0.0394960418343544, "learning_rate": 0.00016453485755434539, "loss": 0.3678, "step": 12818 }, { "epoch": 1.0384802333117304, "grad_norm": 0.035948723554611206, "learning_rate": 0.00016453035690175075, "loss": 0.3143, "step": 12819 }, { "epoch": 1.0385612443292287, "grad_norm": 0.03658129274845123, "learning_rate": 0.00016452585624915613, "loss": 0.3083, "step": 12820 }, { "epoch": 1.0386422553467272, "grad_norm": 0.03749111294746399, "learning_rate": 0.0001645213555965615, "loss": 0.3199, "step": 12821 }, { "epoch": 1.0387232663642256, "grad_norm": 0.036379821598529816, "learning_rate": 0.00016451685494396688, "loss": 0.3364, "step": 12822 }, { "epoch": 1.0388042773817239, "grad_norm": 0.03274570032954216, "learning_rate": 0.00016451235429137227, "loss": 0.2769, "step": 12823 }, { "epoch": 1.0388852883992223, "grad_norm": 0.03964697942137718, "learning_rate": 0.00016450785363877763, "loss": 0.3666, "step": 12824 }, { "epoch": 1.0389662994167206, "grad_norm": 0.037210613489151, "learning_rate": 0.00016450335298618301, "loss": 0.2968, "step": 12825 }, { "epoch": 1.039047310434219, "grad_norm": 0.04473162814974785, "learning_rate": 0.00016449885233358837, "loss": 0.3418, "step": 12826 }, { "epoch": 1.0391283214517175, "grad_norm": 0.03509940579533577, "learning_rate": 0.00016449435168099373, "loss": 0.2978, "step": 12827 }, { "epoch": 1.0392093324692158, "grad_norm": 0.03791176155209541, "learning_rate": 0.00016448985102839912, "loss": 0.2901, "step": 12828 }, { "epoch": 1.0392903434867142, "grad_norm": 0.03966226801276207, "learning_rate": 0.0001644853503758045, "loss": 0.32, "step": 12829 }, { "epoch": 1.0393713545042125, "grad_norm": 0.03378706052899361, "learning_rate": 0.00016448084972320987, "loss": 0.2834, "step": 12830 }, { "epoch": 1.039452365521711, "grad_norm": 0.044208187609910965, "learning_rate": 0.00016447634907061526, "loss": 0.3345, "step": 12831 }, { "epoch": 1.0395333765392094, "grad_norm": 0.03585111349821091, "learning_rate": 0.00016447184841802062, "loss": 0.2822, "step": 12832 }, { "epoch": 1.0396143875567077, "grad_norm": 0.03986233100295067, "learning_rate": 0.000164467347765426, "loss": 0.3364, "step": 12833 }, { "epoch": 1.0396953985742061, "grad_norm": 0.048165518790483475, "learning_rate": 0.00016446284711283136, "loss": 0.3471, "step": 12834 }, { "epoch": 1.0397764095917044, "grad_norm": 0.03753822296857834, "learning_rate": 0.00016445834646023675, "loss": 0.3161, "step": 12835 }, { "epoch": 1.0398574206092028, "grad_norm": 0.040102481842041016, "learning_rate": 0.0001644538458076421, "loss": 0.3347, "step": 12836 }, { "epoch": 1.0399384316267013, "grad_norm": 0.041427258402109146, "learning_rate": 0.0001644493451550475, "loss": 0.3632, "step": 12837 }, { "epoch": 1.0400194426441995, "grad_norm": 0.041916172951459885, "learning_rate": 0.00016444484450245286, "loss": 0.3338, "step": 12838 }, { "epoch": 1.040100453661698, "grad_norm": 0.03925507515668869, "learning_rate": 0.00016444034384985825, "loss": 0.3249, "step": 12839 }, { "epoch": 1.0401814646791965, "grad_norm": 0.03893420845270157, "learning_rate": 0.0001644358431972636, "loss": 0.3433, "step": 12840 }, { "epoch": 1.0402624756966947, "grad_norm": 0.04083637148141861, "learning_rate": 0.000164431342544669, "loss": 0.3355, "step": 12841 }, { "epoch": 1.0403434867141932, "grad_norm": 0.04069959744811058, "learning_rate": 0.00016442684189207435, "loss": 0.3214, "step": 12842 }, { "epoch": 1.0404244977316914, "grad_norm": 0.0337250754237175, "learning_rate": 0.00016442234123947974, "loss": 0.3004, "step": 12843 }, { "epoch": 1.04050550874919, "grad_norm": 0.040874969214200974, "learning_rate": 0.0001644178405868851, "loss": 0.3274, "step": 12844 }, { "epoch": 1.0405865197666884, "grad_norm": 0.038601577281951904, "learning_rate": 0.0001644133399342905, "loss": 0.3136, "step": 12845 }, { "epoch": 1.0406675307841866, "grad_norm": 0.04093305021524429, "learning_rate": 0.00016440883928169585, "loss": 0.3371, "step": 12846 }, { "epoch": 1.040748541801685, "grad_norm": 0.036659788340330124, "learning_rate": 0.00016440433862910123, "loss": 0.2797, "step": 12847 }, { "epoch": 1.0408295528191833, "grad_norm": 0.04440654441714287, "learning_rate": 0.0001643998379765066, "loss": 0.3396, "step": 12848 }, { "epoch": 1.0409105638366818, "grad_norm": 0.03522910550236702, "learning_rate": 0.00016439533732391198, "loss": 0.2922, "step": 12849 }, { "epoch": 1.0409915748541803, "grad_norm": 0.03405408933758736, "learning_rate": 0.00016439083667131734, "loss": 0.288, "step": 12850 }, { "epoch": 1.0410725858716785, "grad_norm": 0.03984960913658142, "learning_rate": 0.00016438633601872273, "loss": 0.3072, "step": 12851 }, { "epoch": 1.041153596889177, "grad_norm": 0.03902706876397133, "learning_rate": 0.0001643818353661281, "loss": 0.3158, "step": 12852 }, { "epoch": 1.0412346079066752, "grad_norm": 0.04116075485944748, "learning_rate": 0.00016437733471353348, "loss": 0.3211, "step": 12853 }, { "epoch": 1.0413156189241737, "grad_norm": 0.042279284447431564, "learning_rate": 0.00016437283406093884, "loss": 0.3322, "step": 12854 }, { "epoch": 1.0413966299416721, "grad_norm": 0.03590260446071625, "learning_rate": 0.00016436833340834422, "loss": 0.2918, "step": 12855 }, { "epoch": 1.0414776409591704, "grad_norm": 0.034707460552453995, "learning_rate": 0.00016436383275574958, "loss": 0.2947, "step": 12856 }, { "epoch": 1.0415586519766689, "grad_norm": 0.0405760295689106, "learning_rate": 0.00016435933210315497, "loss": 0.3064, "step": 12857 }, { "epoch": 1.041639662994167, "grad_norm": 0.04545693099498749, "learning_rate": 0.00016435483145056033, "loss": 0.3494, "step": 12858 }, { "epoch": 1.0417206740116656, "grad_norm": 0.03815295919775963, "learning_rate": 0.00016435033079796572, "loss": 0.2945, "step": 12859 }, { "epoch": 1.041801685029164, "grad_norm": 0.03414066880941391, "learning_rate": 0.00016434583014537108, "loss": 0.299, "step": 12860 }, { "epoch": 1.0418826960466623, "grad_norm": 0.03565811365842819, "learning_rate": 0.00016434132949277646, "loss": 0.2928, "step": 12861 }, { "epoch": 1.0419637070641607, "grad_norm": 0.03718414530158043, "learning_rate": 0.00016433682884018185, "loss": 0.2813, "step": 12862 }, { "epoch": 1.0420447180816592, "grad_norm": 0.04182311147451401, "learning_rate": 0.0001643323281875872, "loss": 0.3207, "step": 12863 }, { "epoch": 1.0421257290991575, "grad_norm": 0.032909996807575226, "learning_rate": 0.00016432782753499257, "loss": 0.2821, "step": 12864 }, { "epoch": 1.042206740116656, "grad_norm": 0.039710626006126404, "learning_rate": 0.00016432332688239796, "loss": 0.3407, "step": 12865 }, { "epoch": 1.0422877511341542, "grad_norm": 0.0371675118803978, "learning_rate": 0.00016431882622980332, "loss": 0.3247, "step": 12866 }, { "epoch": 1.0423687621516526, "grad_norm": 0.03714926168322563, "learning_rate": 0.0001643143255772087, "loss": 0.3256, "step": 12867 }, { "epoch": 1.042449773169151, "grad_norm": 0.043942391872406006, "learning_rate": 0.0001643098249246141, "loss": 0.3158, "step": 12868 }, { "epoch": 1.0425307841866494, "grad_norm": 0.04367447271943092, "learning_rate": 0.00016430532427201945, "loss": 0.326, "step": 12869 }, { "epoch": 1.0426117952041478, "grad_norm": 0.03442533686757088, "learning_rate": 0.0001643008236194248, "loss": 0.2983, "step": 12870 }, { "epoch": 1.042692806221646, "grad_norm": 0.04258235916495323, "learning_rate": 0.0001642963229668302, "loss": 0.3203, "step": 12871 }, { "epoch": 1.0427738172391445, "grad_norm": 0.035100020468235016, "learning_rate": 0.00016429182231423556, "loss": 0.3276, "step": 12872 }, { "epoch": 1.042854828256643, "grad_norm": 0.03650961443781853, "learning_rate": 0.00016428732166164095, "loss": 0.3231, "step": 12873 }, { "epoch": 1.0429358392741412, "grad_norm": 0.037116020917892456, "learning_rate": 0.00016428282100904633, "loss": 0.318, "step": 12874 }, { "epoch": 1.0430168502916397, "grad_norm": 0.03844992816448212, "learning_rate": 0.0001642783203564517, "loss": 0.315, "step": 12875 }, { "epoch": 1.043097861309138, "grad_norm": 0.036285724490880966, "learning_rate": 0.00016427381970385706, "loss": 0.2932, "step": 12876 }, { "epoch": 1.0431788723266364, "grad_norm": 0.034318238496780396, "learning_rate": 0.00016426931905126244, "loss": 0.3243, "step": 12877 }, { "epoch": 1.0432598833441349, "grad_norm": 0.03383239731192589, "learning_rate": 0.0001642648183986678, "loss": 0.314, "step": 12878 }, { "epoch": 1.0433408943616331, "grad_norm": 0.034283652901649475, "learning_rate": 0.0001642603177460732, "loss": 0.3085, "step": 12879 }, { "epoch": 1.0434219053791316, "grad_norm": 0.03602326661348343, "learning_rate": 0.00016425581709347858, "loss": 0.312, "step": 12880 }, { "epoch": 1.0435029163966298, "grad_norm": 0.04088228940963745, "learning_rate": 0.00016425131644088394, "loss": 0.3097, "step": 12881 }, { "epoch": 1.0435839274141283, "grad_norm": 0.03683295473456383, "learning_rate": 0.0001642468157882893, "loss": 0.3107, "step": 12882 }, { "epoch": 1.0436649384316268, "grad_norm": 0.03878350928425789, "learning_rate": 0.00016424231513569468, "loss": 0.3135, "step": 12883 }, { "epoch": 1.043745949449125, "grad_norm": 0.03578276187181473, "learning_rate": 0.00016423781448310004, "loss": 0.3065, "step": 12884 }, { "epoch": 1.0438269604666235, "grad_norm": 0.034060198813676834, "learning_rate": 0.00016423331383050543, "loss": 0.3074, "step": 12885 }, { "epoch": 1.043907971484122, "grad_norm": 0.03889356181025505, "learning_rate": 0.00016422881317791082, "loss": 0.3294, "step": 12886 }, { "epoch": 1.0439889825016202, "grad_norm": 0.03599948436021805, "learning_rate": 0.00016422431252531618, "loss": 0.288, "step": 12887 }, { "epoch": 1.0440699935191187, "grad_norm": 0.03581930696964264, "learning_rate": 0.00016421981187272154, "loss": 0.2796, "step": 12888 }, { "epoch": 1.044151004536617, "grad_norm": 0.037626467645168304, "learning_rate": 0.00016421531122012693, "loss": 0.3272, "step": 12889 }, { "epoch": 1.0442320155541154, "grad_norm": 0.04451899230480194, "learning_rate": 0.00016421081056753229, "loss": 0.3691, "step": 12890 }, { "epoch": 1.0443130265716138, "grad_norm": 0.037104591727256775, "learning_rate": 0.00016420630991493767, "loss": 0.3217, "step": 12891 }, { "epoch": 1.044394037589112, "grad_norm": 0.0360608771443367, "learning_rate": 0.00016420180926234306, "loss": 0.305, "step": 12892 }, { "epoch": 1.0444750486066106, "grad_norm": 0.03921990096569061, "learning_rate": 0.00016419730860974842, "loss": 0.3319, "step": 12893 }, { "epoch": 1.0445560596241088, "grad_norm": 0.04052642732858658, "learning_rate": 0.0001641928079571538, "loss": 0.3399, "step": 12894 }, { "epoch": 1.0446370706416073, "grad_norm": 0.0338423028588295, "learning_rate": 0.00016418830730455917, "loss": 0.2775, "step": 12895 }, { "epoch": 1.0447180816591057, "grad_norm": 0.03587032109498978, "learning_rate": 0.00016418380665196453, "loss": 0.3293, "step": 12896 }, { "epoch": 1.044799092676604, "grad_norm": 0.041994258761405945, "learning_rate": 0.00016417930599936991, "loss": 0.3898, "step": 12897 }, { "epoch": 1.0448801036941024, "grad_norm": 0.04043351486325264, "learning_rate": 0.0001641748053467753, "loss": 0.3151, "step": 12898 }, { "epoch": 1.0449611147116007, "grad_norm": 0.03635840490460396, "learning_rate": 0.00016417030469418066, "loss": 0.3521, "step": 12899 }, { "epoch": 1.0450421257290992, "grad_norm": 0.034990232437849045, "learning_rate": 0.00016416580404158605, "loss": 0.2937, "step": 12900 }, { "epoch": 1.0451231367465976, "grad_norm": 0.03458486497402191, "learning_rate": 0.0001641613033889914, "loss": 0.3201, "step": 12901 }, { "epoch": 1.0452041477640959, "grad_norm": 0.04071643203496933, "learning_rate": 0.00016415680273639677, "loss": 0.323, "step": 12902 }, { "epoch": 1.0452851587815943, "grad_norm": 0.03523261845111847, "learning_rate": 0.00016415230208380216, "loss": 0.285, "step": 12903 }, { "epoch": 1.0453661697990926, "grad_norm": 0.035539254546165466, "learning_rate": 0.00016414780143120754, "loss": 0.321, "step": 12904 }, { "epoch": 1.045447180816591, "grad_norm": 0.03952726349234581, "learning_rate": 0.0001641433007786129, "loss": 0.3593, "step": 12905 }, { "epoch": 1.0455281918340895, "grad_norm": 0.03822580352425575, "learning_rate": 0.0001641388001260183, "loss": 0.3165, "step": 12906 }, { "epoch": 1.0456092028515878, "grad_norm": 0.03817811980843544, "learning_rate": 0.00016413429947342365, "loss": 0.3337, "step": 12907 }, { "epoch": 1.0456902138690862, "grad_norm": 0.0448293499648571, "learning_rate": 0.000164129798820829, "loss": 0.3813, "step": 12908 }, { "epoch": 1.0457712248865845, "grad_norm": 0.03960398584604263, "learning_rate": 0.0001641252981682344, "loss": 0.3625, "step": 12909 }, { "epoch": 1.045852235904083, "grad_norm": 0.041171953082084656, "learning_rate": 0.00016412079751563978, "loss": 0.3329, "step": 12910 }, { "epoch": 1.0459332469215814, "grad_norm": 0.03784125670790672, "learning_rate": 0.00016411629686304514, "loss": 0.317, "step": 12911 }, { "epoch": 1.0460142579390797, "grad_norm": 0.034470465034246445, "learning_rate": 0.00016411179621045053, "loss": 0.2741, "step": 12912 }, { "epoch": 1.0460952689565781, "grad_norm": 0.04065170884132385, "learning_rate": 0.0001641072955578559, "loss": 0.3182, "step": 12913 }, { "epoch": 1.0461762799740764, "grad_norm": 0.03109212964773178, "learning_rate": 0.00016410279490526128, "loss": 0.2838, "step": 12914 }, { "epoch": 1.0462572909915748, "grad_norm": 0.03720557689666748, "learning_rate": 0.00016409829425266664, "loss": 0.311, "step": 12915 }, { "epoch": 1.0463383020090733, "grad_norm": 0.03801567479968071, "learning_rate": 0.00016409379360007203, "loss": 0.333, "step": 12916 }, { "epoch": 1.0464193130265715, "grad_norm": 0.037921782582998276, "learning_rate": 0.00016408929294747739, "loss": 0.3161, "step": 12917 }, { "epoch": 1.04650032404407, "grad_norm": 0.03623802587389946, "learning_rate": 0.00016408479229488277, "loss": 0.3286, "step": 12918 }, { "epoch": 1.0465813350615685, "grad_norm": 0.03764597326517105, "learning_rate": 0.00016408029164228813, "loss": 0.3306, "step": 12919 }, { "epoch": 1.0466623460790667, "grad_norm": 0.03784124553203583, "learning_rate": 0.00016407579098969352, "loss": 0.3103, "step": 12920 }, { "epoch": 1.0467433570965652, "grad_norm": 0.038992203772068024, "learning_rate": 0.00016407129033709888, "loss": 0.3061, "step": 12921 }, { "epoch": 1.0468243681140634, "grad_norm": 0.03431059420108795, "learning_rate": 0.00016406678968450427, "loss": 0.2677, "step": 12922 }, { "epoch": 1.046905379131562, "grad_norm": 0.03752612695097923, "learning_rate": 0.00016406228903190963, "loss": 0.3261, "step": 12923 }, { "epoch": 1.0469863901490604, "grad_norm": 0.031996533274650574, "learning_rate": 0.00016405778837931502, "loss": 0.2638, "step": 12924 }, { "epoch": 1.0470674011665586, "grad_norm": 0.03687674179673195, "learning_rate": 0.00016405328772672038, "loss": 0.335, "step": 12925 }, { "epoch": 1.047148412184057, "grad_norm": 0.039893630892038345, "learning_rate": 0.00016404878707412576, "loss": 0.3086, "step": 12926 }, { "epoch": 1.0472294232015553, "grad_norm": 0.038995370268821716, "learning_rate": 0.00016404428642153112, "loss": 0.3669, "step": 12927 }, { "epoch": 1.0473104342190538, "grad_norm": 0.04130131006240845, "learning_rate": 0.0001640397857689365, "loss": 0.3297, "step": 12928 }, { "epoch": 1.0473914452365523, "grad_norm": 0.04454691708087921, "learning_rate": 0.00016403528511634187, "loss": 0.3558, "step": 12929 }, { "epoch": 1.0474724562540505, "grad_norm": 0.03834373503923416, "learning_rate": 0.00016403078446374726, "loss": 0.3092, "step": 12930 }, { "epoch": 1.047553467271549, "grad_norm": 0.037397079169750214, "learning_rate": 0.00016402628381115262, "loss": 0.2944, "step": 12931 }, { "epoch": 1.0476344782890472, "grad_norm": 0.03549044206738472, "learning_rate": 0.000164021783158558, "loss": 0.2894, "step": 12932 }, { "epoch": 1.0477154893065457, "grad_norm": 0.03430061787366867, "learning_rate": 0.00016401728250596336, "loss": 0.3142, "step": 12933 }, { "epoch": 1.0477965003240441, "grad_norm": 0.0349055640399456, "learning_rate": 0.00016401278185336875, "loss": 0.2956, "step": 12934 }, { "epoch": 1.0478775113415424, "grad_norm": 0.038699712604284286, "learning_rate": 0.0001640082812007741, "loss": 0.3227, "step": 12935 }, { "epoch": 1.0479585223590409, "grad_norm": 0.03880271688103676, "learning_rate": 0.0001640037805481795, "loss": 0.293, "step": 12936 }, { "epoch": 1.048039533376539, "grad_norm": 0.03803123161196709, "learning_rate": 0.00016399927989558489, "loss": 0.3085, "step": 12937 }, { "epoch": 1.0481205443940376, "grad_norm": 0.03667737543582916, "learning_rate": 0.00016399477924299025, "loss": 0.2785, "step": 12938 }, { "epoch": 1.048201555411536, "grad_norm": 0.038014575839042664, "learning_rate": 0.0001639902785903956, "loss": 0.3486, "step": 12939 }, { "epoch": 1.0482825664290343, "grad_norm": 0.03516806662082672, "learning_rate": 0.000163985777937801, "loss": 0.3293, "step": 12940 }, { "epoch": 1.0483635774465327, "grad_norm": 0.03823082149028778, "learning_rate": 0.00016398127728520635, "loss": 0.3442, "step": 12941 }, { "epoch": 1.0484445884640312, "grad_norm": 0.03937181457877159, "learning_rate": 0.00016397677663261174, "loss": 0.3099, "step": 12942 }, { "epoch": 1.0485255994815295, "grad_norm": 0.03163138031959534, "learning_rate": 0.00016397227598001713, "loss": 0.2934, "step": 12943 }, { "epoch": 1.048606610499028, "grad_norm": 0.03477585315704346, "learning_rate": 0.0001639677753274225, "loss": 0.2887, "step": 12944 }, { "epoch": 1.0486876215165262, "grad_norm": 0.03753078728914261, "learning_rate": 0.00016396327467482785, "loss": 0.3147, "step": 12945 }, { "epoch": 1.0487686325340246, "grad_norm": 0.04368378594517708, "learning_rate": 0.00016395877402223323, "loss": 0.3497, "step": 12946 }, { "epoch": 1.048849643551523, "grad_norm": 0.04576634243130684, "learning_rate": 0.0001639542733696386, "loss": 0.3426, "step": 12947 }, { "epoch": 1.0489306545690213, "grad_norm": 0.037550900131464005, "learning_rate": 0.00016394977271704398, "loss": 0.3578, "step": 12948 }, { "epoch": 1.0490116655865198, "grad_norm": 0.045421287417411804, "learning_rate": 0.00016394527206444937, "loss": 0.2868, "step": 12949 }, { "epoch": 1.049092676604018, "grad_norm": 0.03243739530444145, "learning_rate": 0.00016394077141185473, "loss": 0.2742, "step": 12950 }, { "epoch": 1.0491736876215165, "grad_norm": 0.039180856198072433, "learning_rate": 0.0001639362707592601, "loss": 0.3233, "step": 12951 }, { "epoch": 1.049254698639015, "grad_norm": 0.04149232432246208, "learning_rate": 0.00016393177010666548, "loss": 0.335, "step": 12952 }, { "epoch": 1.0493357096565132, "grad_norm": 0.04031761735677719, "learning_rate": 0.00016392726945407084, "loss": 0.3435, "step": 12953 }, { "epoch": 1.0494167206740117, "grad_norm": 0.04236694052815437, "learning_rate": 0.00016392276880147622, "loss": 0.3478, "step": 12954 }, { "epoch": 1.04949773169151, "grad_norm": 0.051949284970760345, "learning_rate": 0.0001639182681488816, "loss": 0.3584, "step": 12955 }, { "epoch": 1.0495787427090084, "grad_norm": 0.046114757657051086, "learning_rate": 0.00016391376749628697, "loss": 0.3402, "step": 12956 }, { "epoch": 1.0496597537265069, "grad_norm": 0.03253263607621193, "learning_rate": 0.00016390926684369233, "loss": 0.2886, "step": 12957 }, { "epoch": 1.0497407647440051, "grad_norm": 0.04305479675531387, "learning_rate": 0.00016390476619109772, "loss": 0.2942, "step": 12958 }, { "epoch": 1.0498217757615036, "grad_norm": 0.03444790840148926, "learning_rate": 0.00016390026553850308, "loss": 0.3051, "step": 12959 }, { "epoch": 1.0499027867790018, "grad_norm": 0.03732207044959068, "learning_rate": 0.00016389576488590846, "loss": 0.3095, "step": 12960 }, { "epoch": 1.0499837977965003, "grad_norm": 0.041557539254426956, "learning_rate": 0.00016389126423331385, "loss": 0.3039, "step": 12961 }, { "epoch": 1.0500648088139988, "grad_norm": 0.03193178027868271, "learning_rate": 0.0001638867635807192, "loss": 0.2837, "step": 12962 }, { "epoch": 1.050145819831497, "grad_norm": 0.04071090370416641, "learning_rate": 0.0001638822629281246, "loss": 0.3618, "step": 12963 }, { "epoch": 1.0502268308489955, "grad_norm": 0.036791760474443436, "learning_rate": 0.00016387776227552996, "loss": 0.3014, "step": 12964 }, { "epoch": 1.050307841866494, "grad_norm": 0.04111775755882263, "learning_rate": 0.00016387326162293532, "loss": 0.3258, "step": 12965 }, { "epoch": 1.0503888528839922, "grad_norm": 0.04430720582604408, "learning_rate": 0.0001638687609703407, "loss": 0.3106, "step": 12966 }, { "epoch": 1.0504698639014907, "grad_norm": 0.039440203458070755, "learning_rate": 0.0001638642603177461, "loss": 0.2866, "step": 12967 }, { "epoch": 1.050550874918989, "grad_norm": 0.042060382664203644, "learning_rate": 0.00016385975966515145, "loss": 0.3435, "step": 12968 }, { "epoch": 1.0506318859364874, "grad_norm": 0.03567097708582878, "learning_rate": 0.00016385525901255684, "loss": 0.2904, "step": 12969 }, { "epoch": 1.0507128969539858, "grad_norm": 0.038154810667037964, "learning_rate": 0.0001638507583599622, "loss": 0.3069, "step": 12970 }, { "epoch": 1.050793907971484, "grad_norm": 0.043698426336050034, "learning_rate": 0.00016384625770736756, "loss": 0.3597, "step": 12971 }, { "epoch": 1.0508749189889826, "grad_norm": 0.040363430976867676, "learning_rate": 0.00016384175705477295, "loss": 0.3008, "step": 12972 }, { "epoch": 1.0509559300064808, "grad_norm": 0.03564104810357094, "learning_rate": 0.00016383725640217834, "loss": 0.3045, "step": 12973 }, { "epoch": 1.0510369410239793, "grad_norm": 0.03445684164762497, "learning_rate": 0.0001638327557495837, "loss": 0.29, "step": 12974 }, { "epoch": 1.0511179520414777, "grad_norm": 0.03905816003680229, "learning_rate": 0.00016382825509698908, "loss": 0.2679, "step": 12975 }, { "epoch": 1.051198963058976, "grad_norm": 0.0405798964202404, "learning_rate": 0.00016382375444439444, "loss": 0.3466, "step": 12976 }, { "epoch": 1.0512799740764744, "grad_norm": 0.03701363876461983, "learning_rate": 0.0001638192537917998, "loss": 0.2943, "step": 12977 }, { "epoch": 1.0513609850939727, "grad_norm": 0.042401645332574844, "learning_rate": 0.0001638147531392052, "loss": 0.3147, "step": 12978 }, { "epoch": 1.0514419961114712, "grad_norm": 0.036430634558200836, "learning_rate": 0.00016381025248661058, "loss": 0.3064, "step": 12979 }, { "epoch": 1.0515230071289696, "grad_norm": 0.03717387095093727, "learning_rate": 0.00016380575183401594, "loss": 0.2709, "step": 12980 }, { "epoch": 1.0516040181464679, "grad_norm": 0.04026516526937485, "learning_rate": 0.00016380125118142132, "loss": 0.2824, "step": 12981 }, { "epoch": 1.0516850291639663, "grad_norm": 0.03760538995265961, "learning_rate": 0.00016379675052882668, "loss": 0.3229, "step": 12982 }, { "epoch": 1.0517660401814646, "grad_norm": 0.03460705652832985, "learning_rate": 0.00016379224987623204, "loss": 0.2997, "step": 12983 }, { "epoch": 1.051847051198963, "grad_norm": 0.04178851097822189, "learning_rate": 0.00016378774922363743, "loss": 0.3235, "step": 12984 }, { "epoch": 1.0519280622164615, "grad_norm": 0.04115087911486626, "learning_rate": 0.00016378324857104282, "loss": 0.3357, "step": 12985 }, { "epoch": 1.0520090732339598, "grad_norm": 0.040281862020492554, "learning_rate": 0.00016377874791844818, "loss": 0.3274, "step": 12986 }, { "epoch": 1.0520900842514582, "grad_norm": 0.04082822427153587, "learning_rate": 0.00016377424726585357, "loss": 0.3274, "step": 12987 }, { "epoch": 1.0521710952689567, "grad_norm": 0.03982575237751007, "learning_rate": 0.00016376974661325893, "loss": 0.3402, "step": 12988 }, { "epoch": 1.052252106286455, "grad_norm": 0.03970259800553322, "learning_rate": 0.0001637652459606643, "loss": 0.335, "step": 12989 }, { "epoch": 1.0523331173039534, "grad_norm": 0.034251753240823746, "learning_rate": 0.00016376074530806967, "loss": 0.2573, "step": 12990 }, { "epoch": 1.0524141283214516, "grad_norm": 0.03692222386598587, "learning_rate": 0.00016375624465547506, "loss": 0.2965, "step": 12991 }, { "epoch": 1.0524951393389501, "grad_norm": 0.03736606240272522, "learning_rate": 0.00016375174400288042, "loss": 0.3059, "step": 12992 }, { "epoch": 1.0525761503564486, "grad_norm": 0.043112173676490784, "learning_rate": 0.0001637472433502858, "loss": 0.3189, "step": 12993 }, { "epoch": 1.0526571613739468, "grad_norm": 0.034782927483320236, "learning_rate": 0.00016374274269769117, "loss": 0.2956, "step": 12994 }, { "epoch": 1.0527381723914453, "grad_norm": 0.04126705229282379, "learning_rate": 0.00016373824204509655, "loss": 0.3298, "step": 12995 }, { "epoch": 1.0528191834089435, "grad_norm": 0.037063319236040115, "learning_rate": 0.00016373374139250191, "loss": 0.3403, "step": 12996 }, { "epoch": 1.052900194426442, "grad_norm": 0.042787209153175354, "learning_rate": 0.0001637292407399073, "loss": 0.3404, "step": 12997 }, { "epoch": 1.0529812054439405, "grad_norm": 0.04349652677774429, "learning_rate": 0.00016372474008731266, "loss": 0.3265, "step": 12998 }, { "epoch": 1.0530622164614387, "grad_norm": 0.03636706620454788, "learning_rate": 0.00016372023943471805, "loss": 0.2942, "step": 12999 }, { "epoch": 1.0531432274789372, "grad_norm": 0.05386228486895561, "learning_rate": 0.0001637157387821234, "loss": 0.3552, "step": 13000 }, { "epoch": 1.0532242384964354, "grad_norm": 0.04019056633114815, "learning_rate": 0.0001637112381295288, "loss": 0.3149, "step": 13001 }, { "epoch": 1.053305249513934, "grad_norm": 0.047883372753858566, "learning_rate": 0.00016370673747693416, "loss": 0.3082, "step": 13002 }, { "epoch": 1.0533862605314324, "grad_norm": 0.03943159058690071, "learning_rate": 0.00016370223682433954, "loss": 0.3077, "step": 13003 }, { "epoch": 1.0534672715489306, "grad_norm": 0.038364700973033905, "learning_rate": 0.0001636977361717449, "loss": 0.3107, "step": 13004 }, { "epoch": 1.053548282566429, "grad_norm": 0.03844317048788071, "learning_rate": 0.0001636932355191503, "loss": 0.3027, "step": 13005 }, { "epoch": 1.0536292935839273, "grad_norm": 0.04385514557361603, "learning_rate": 0.00016368873486655565, "loss": 0.3321, "step": 13006 }, { "epoch": 1.0537103046014258, "grad_norm": 0.038291554898023605, "learning_rate": 0.00016368423421396104, "loss": 0.3239, "step": 13007 }, { "epoch": 1.0537913156189243, "grad_norm": 0.03738854452967644, "learning_rate": 0.0001636797335613664, "loss": 0.3322, "step": 13008 }, { "epoch": 1.0538723266364225, "grad_norm": 0.0458126999437809, "learning_rate": 0.00016367523290877178, "loss": 0.3485, "step": 13009 }, { "epoch": 1.053953337653921, "grad_norm": 0.03953162580728531, "learning_rate": 0.00016367073225617715, "loss": 0.2743, "step": 13010 }, { "epoch": 1.0540343486714192, "grad_norm": 0.044255178421735764, "learning_rate": 0.00016366623160358253, "loss": 0.3115, "step": 13011 }, { "epoch": 1.0541153596889177, "grad_norm": 0.03467179089784622, "learning_rate": 0.0001636617309509879, "loss": 0.281, "step": 13012 }, { "epoch": 1.0541963707064161, "grad_norm": 0.038243770599365234, "learning_rate": 0.00016365723029839328, "loss": 0.3054, "step": 13013 }, { "epoch": 1.0542773817239144, "grad_norm": 0.039358288049697876, "learning_rate": 0.00016365272964579864, "loss": 0.3009, "step": 13014 }, { "epoch": 1.0543583927414129, "grad_norm": 0.03707633540034294, "learning_rate": 0.00016364822899320403, "loss": 0.312, "step": 13015 }, { "epoch": 1.054439403758911, "grad_norm": 0.04041290655732155, "learning_rate": 0.0001636437283406094, "loss": 0.3237, "step": 13016 }, { "epoch": 1.0545204147764096, "grad_norm": 0.05521214008331299, "learning_rate": 0.00016363922768801477, "loss": 0.3193, "step": 13017 }, { "epoch": 1.054601425793908, "grad_norm": 0.03907698765397072, "learning_rate": 0.00016363472703542016, "loss": 0.3282, "step": 13018 }, { "epoch": 1.0546824368114063, "grad_norm": 0.0368485264480114, "learning_rate": 0.00016363022638282552, "loss": 0.3078, "step": 13019 }, { "epoch": 1.0547634478289047, "grad_norm": 0.036330461502075195, "learning_rate": 0.00016362572573023088, "loss": 0.2957, "step": 13020 }, { "epoch": 1.0548444588464032, "grad_norm": 0.045567553490400314, "learning_rate": 0.00016362122507763627, "loss": 0.3387, "step": 13021 }, { "epoch": 1.0549254698639015, "grad_norm": 0.035702306777238846, "learning_rate": 0.00016361672442504163, "loss": 0.2929, "step": 13022 }, { "epoch": 1.0550064808814, "grad_norm": 0.042946431785821915, "learning_rate": 0.00016361222377244702, "loss": 0.3737, "step": 13023 }, { "epoch": 1.0550874918988982, "grad_norm": 0.038137488067150116, "learning_rate": 0.0001636077231198524, "loss": 0.317, "step": 13024 }, { "epoch": 1.0551685029163966, "grad_norm": 0.036011893302202225, "learning_rate": 0.00016360322246725776, "loss": 0.3223, "step": 13025 }, { "epoch": 1.055249513933895, "grad_norm": 0.039589326828718185, "learning_rate": 0.00016359872181466312, "loss": 0.3017, "step": 13026 }, { "epoch": 1.0553305249513933, "grad_norm": 0.04104938358068466, "learning_rate": 0.0001635942211620685, "loss": 0.2986, "step": 13027 }, { "epoch": 1.0554115359688918, "grad_norm": 0.03723173215985298, "learning_rate": 0.00016358972050947387, "loss": 0.2994, "step": 13028 }, { "epoch": 1.05549254698639, "grad_norm": 0.039599400013685226, "learning_rate": 0.00016358521985687926, "loss": 0.3278, "step": 13029 }, { "epoch": 1.0555735580038885, "grad_norm": 0.034441448748111725, "learning_rate": 0.00016358071920428464, "loss": 0.3217, "step": 13030 }, { "epoch": 1.055654569021387, "grad_norm": 0.037437498569488525, "learning_rate": 0.00016357621855169, "loss": 0.2911, "step": 13031 }, { "epoch": 1.0557355800388852, "grad_norm": 0.0356329008936882, "learning_rate": 0.0001635717178990954, "loss": 0.3308, "step": 13032 }, { "epoch": 1.0558165910563837, "grad_norm": 0.03364642709493637, "learning_rate": 0.00016356721724650075, "loss": 0.2906, "step": 13033 }, { "epoch": 1.055897602073882, "grad_norm": 0.04249563440680504, "learning_rate": 0.0001635627165939061, "loss": 0.342, "step": 13034 }, { "epoch": 1.0559786130913804, "grad_norm": 0.04023630544543266, "learning_rate": 0.0001635582159413115, "loss": 0.3249, "step": 13035 }, { "epoch": 1.0560596241088789, "grad_norm": 0.03922013193368912, "learning_rate": 0.00016355371528871689, "loss": 0.3284, "step": 13036 }, { "epoch": 1.0561406351263771, "grad_norm": 0.040603943169116974, "learning_rate": 0.00016354921463612225, "loss": 0.3261, "step": 13037 }, { "epoch": 1.0562216461438756, "grad_norm": 0.03869624435901642, "learning_rate": 0.00016354471398352763, "loss": 0.275, "step": 13038 }, { "epoch": 1.0563026571613738, "grad_norm": 0.04690798372030258, "learning_rate": 0.000163540213330933, "loss": 0.3152, "step": 13039 }, { "epoch": 1.0563836681788723, "grad_norm": 0.0407758466899395, "learning_rate": 0.00016353571267833835, "loss": 0.3065, "step": 13040 }, { "epoch": 1.0564646791963708, "grad_norm": 0.03870948776602745, "learning_rate": 0.00016353121202574374, "loss": 0.3174, "step": 13041 }, { "epoch": 1.056545690213869, "grad_norm": 0.0395541675388813, "learning_rate": 0.00016352671137314913, "loss": 0.2982, "step": 13042 }, { "epoch": 1.0566267012313675, "grad_norm": 0.03934469446539879, "learning_rate": 0.0001635222107205545, "loss": 0.3801, "step": 13043 }, { "epoch": 1.056707712248866, "grad_norm": 0.035781171172857285, "learning_rate": 0.00016351771006795987, "loss": 0.2897, "step": 13044 }, { "epoch": 1.0567887232663642, "grad_norm": 0.04096796363592148, "learning_rate": 0.00016351320941536523, "loss": 0.327, "step": 13045 }, { "epoch": 1.0568697342838627, "grad_norm": 0.04043768346309662, "learning_rate": 0.0001635087087627706, "loss": 0.313, "step": 13046 }, { "epoch": 1.056950745301361, "grad_norm": 0.03739682585000992, "learning_rate": 0.00016350420811017598, "loss": 0.2706, "step": 13047 }, { "epoch": 1.0570317563188594, "grad_norm": 0.0397224985063076, "learning_rate": 0.00016349970745758137, "loss": 0.3215, "step": 13048 }, { "epoch": 1.0571127673363578, "grad_norm": 0.04893937706947327, "learning_rate": 0.00016349520680498673, "loss": 0.3143, "step": 13049 }, { "epoch": 1.057193778353856, "grad_norm": 0.03793314844369888, "learning_rate": 0.00016349070615239212, "loss": 0.3362, "step": 13050 }, { "epoch": 1.0572747893713546, "grad_norm": 0.043163977563381195, "learning_rate": 0.00016348620549979748, "loss": 0.3462, "step": 13051 }, { "epoch": 1.0573558003888528, "grad_norm": 0.04014117643237114, "learning_rate": 0.00016348170484720284, "loss": 0.3339, "step": 13052 }, { "epoch": 1.0574368114063513, "grad_norm": 0.04330155998468399, "learning_rate": 0.00016347720419460822, "loss": 0.313, "step": 13053 }, { "epoch": 1.0575178224238497, "grad_norm": 0.0403081513941288, "learning_rate": 0.0001634727035420136, "loss": 0.4026, "step": 13054 }, { "epoch": 1.057598833441348, "grad_norm": 0.03789180889725685, "learning_rate": 0.00016346820288941897, "loss": 0.2955, "step": 13055 }, { "epoch": 1.0576798444588464, "grad_norm": 0.03631223365664482, "learning_rate": 0.00016346370223682436, "loss": 0.3115, "step": 13056 }, { "epoch": 1.0577608554763447, "grad_norm": 0.03948785737156868, "learning_rate": 0.00016345920158422972, "loss": 0.2895, "step": 13057 }, { "epoch": 1.0578418664938432, "grad_norm": 0.03748257830739021, "learning_rate": 0.00016345470093163508, "loss": 0.3001, "step": 13058 }, { "epoch": 1.0579228775113416, "grad_norm": 0.03798984736204147, "learning_rate": 0.00016345020027904047, "loss": 0.3112, "step": 13059 }, { "epoch": 1.0580038885288399, "grad_norm": 0.033647872507572174, "learning_rate": 0.00016344569962644585, "loss": 0.2851, "step": 13060 }, { "epoch": 1.0580848995463383, "grad_norm": 0.036701709032058716, "learning_rate": 0.0001634411989738512, "loss": 0.3051, "step": 13061 }, { "epoch": 1.0581659105638366, "grad_norm": 0.040191926062107086, "learning_rate": 0.0001634366983212566, "loss": 0.3661, "step": 13062 }, { "epoch": 1.058246921581335, "grad_norm": 0.036935754120349884, "learning_rate": 0.00016343219766866196, "loss": 0.3035, "step": 13063 }, { "epoch": 1.0583279325988335, "grad_norm": 0.037047579884529114, "learning_rate": 0.00016342769701606732, "loss": 0.2997, "step": 13064 }, { "epoch": 1.0584089436163318, "grad_norm": 0.0359288714826107, "learning_rate": 0.0001634231963634727, "loss": 0.3031, "step": 13065 }, { "epoch": 1.0584899546338302, "grad_norm": 0.037823259830474854, "learning_rate": 0.0001634186957108781, "loss": 0.3405, "step": 13066 }, { "epoch": 1.0585709656513287, "grad_norm": 0.03662921488285065, "learning_rate": 0.00016341419505828345, "loss": 0.3159, "step": 13067 }, { "epoch": 1.058651976668827, "grad_norm": 0.04636320844292641, "learning_rate": 0.00016340969440568884, "loss": 0.3256, "step": 13068 }, { "epoch": 1.0587329876863254, "grad_norm": 0.04248104989528656, "learning_rate": 0.0001634051937530942, "loss": 0.354, "step": 13069 }, { "epoch": 1.0588139987038236, "grad_norm": 0.03274742141366005, "learning_rate": 0.0001634006931004996, "loss": 0.2699, "step": 13070 }, { "epoch": 1.0588950097213221, "grad_norm": 0.036615077406167984, "learning_rate": 0.00016339619244790495, "loss": 0.3416, "step": 13071 }, { "epoch": 1.0589760207388206, "grad_norm": 0.03806189075112343, "learning_rate": 0.00016339169179531034, "loss": 0.3002, "step": 13072 }, { "epoch": 1.0590570317563188, "grad_norm": 0.041345518082380295, "learning_rate": 0.0001633871911427157, "loss": 0.3322, "step": 13073 }, { "epoch": 1.0591380427738173, "grad_norm": 0.037649959325790405, "learning_rate": 0.00016338269049012108, "loss": 0.3091, "step": 13074 }, { "epoch": 1.0592190537913155, "grad_norm": 0.04146602004766464, "learning_rate": 0.00016337818983752644, "loss": 0.3423, "step": 13075 }, { "epoch": 1.059300064808814, "grad_norm": 0.04146433621644974, "learning_rate": 0.00016337368918493183, "loss": 0.3235, "step": 13076 }, { "epoch": 1.0593810758263125, "grad_norm": 0.04007774218916893, "learning_rate": 0.0001633691885323372, "loss": 0.3634, "step": 13077 }, { "epoch": 1.0594620868438107, "grad_norm": 0.03304166719317436, "learning_rate": 0.00016336468787974258, "loss": 0.3014, "step": 13078 }, { "epoch": 1.0595430978613092, "grad_norm": 0.04017319157719612, "learning_rate": 0.00016336018722714794, "loss": 0.3375, "step": 13079 }, { "epoch": 1.0596241088788074, "grad_norm": 0.04752025380730629, "learning_rate": 0.00016335568657455332, "loss": 0.3722, "step": 13080 }, { "epoch": 1.059705119896306, "grad_norm": 0.03203922510147095, "learning_rate": 0.00016335118592195868, "loss": 0.2696, "step": 13081 }, { "epoch": 1.0597861309138044, "grad_norm": 0.03847889602184296, "learning_rate": 0.00016334668526936407, "loss": 0.3433, "step": 13082 }, { "epoch": 1.0598671419313026, "grad_norm": 0.03569968417286873, "learning_rate": 0.00016334218461676943, "loss": 0.2844, "step": 13083 }, { "epoch": 1.059948152948801, "grad_norm": 0.03317081928253174, "learning_rate": 0.00016333768396417482, "loss": 0.318, "step": 13084 }, { "epoch": 1.0600291639662993, "grad_norm": 0.03200779855251312, "learning_rate": 0.00016333318331158018, "loss": 0.3009, "step": 13085 }, { "epoch": 1.0601101749837978, "grad_norm": 0.04079586640000343, "learning_rate": 0.00016332868265898557, "loss": 0.3539, "step": 13086 }, { "epoch": 1.0601911860012962, "grad_norm": 0.037388890981674194, "learning_rate": 0.00016332418200639093, "loss": 0.3013, "step": 13087 }, { "epoch": 1.0602721970187945, "grad_norm": 0.04083319753408432, "learning_rate": 0.0001633196813537963, "loss": 0.3637, "step": 13088 }, { "epoch": 1.060353208036293, "grad_norm": 0.03597578406333923, "learning_rate": 0.00016331518070120167, "loss": 0.3176, "step": 13089 }, { "epoch": 1.0604342190537914, "grad_norm": 0.03463973477482796, "learning_rate": 0.00016331068004860706, "loss": 0.3161, "step": 13090 }, { "epoch": 1.0605152300712897, "grad_norm": 0.03823839873075485, "learning_rate": 0.00016330617939601242, "loss": 0.3191, "step": 13091 }, { "epoch": 1.0605962410887881, "grad_norm": 0.03756007179617882, "learning_rate": 0.0001633016787434178, "loss": 0.3219, "step": 13092 }, { "epoch": 1.0606772521062864, "grad_norm": 0.0337604284286499, "learning_rate": 0.00016329717809082317, "loss": 0.3246, "step": 13093 }, { "epoch": 1.0607582631237849, "grad_norm": 0.03267901390790939, "learning_rate": 0.00016329267743822855, "loss": 0.2514, "step": 13094 }, { "epoch": 1.0608392741412833, "grad_norm": 0.03957577422261238, "learning_rate": 0.00016328817678563391, "loss": 0.3338, "step": 13095 }, { "epoch": 1.0609202851587816, "grad_norm": 0.04130283370614052, "learning_rate": 0.0001632836761330393, "loss": 0.3544, "step": 13096 }, { "epoch": 1.06100129617628, "grad_norm": 0.03718462213873863, "learning_rate": 0.00016327917548044466, "loss": 0.3132, "step": 13097 }, { "epoch": 1.0610823071937783, "grad_norm": 0.03442040830850601, "learning_rate": 0.00016327467482785005, "loss": 0.3061, "step": 13098 }, { "epoch": 1.0611633182112767, "grad_norm": 0.04067227989435196, "learning_rate": 0.00016327017417525544, "loss": 0.3235, "step": 13099 }, { "epoch": 1.0612443292287752, "grad_norm": 0.035462141036987305, "learning_rate": 0.0001632656735226608, "loss": 0.301, "step": 13100 }, { "epoch": 1.0613253402462735, "grad_norm": 0.040709950029850006, "learning_rate": 0.00016326117287006618, "loss": 0.3091, "step": 13101 }, { "epoch": 1.061406351263772, "grad_norm": 0.040912844240665436, "learning_rate": 0.00016325667221747154, "loss": 0.3084, "step": 13102 }, { "epoch": 1.0614873622812702, "grad_norm": 0.04343321919441223, "learning_rate": 0.0001632521715648769, "loss": 0.3367, "step": 13103 }, { "epoch": 1.0615683732987686, "grad_norm": 0.03998672589659691, "learning_rate": 0.0001632476709122823, "loss": 0.3231, "step": 13104 }, { "epoch": 1.061649384316267, "grad_norm": 0.03875920921564102, "learning_rate": 0.00016324317025968768, "loss": 0.3041, "step": 13105 }, { "epoch": 1.0617303953337653, "grad_norm": 0.03709811344742775, "learning_rate": 0.00016323866960709304, "loss": 0.3094, "step": 13106 }, { "epoch": 1.0618114063512638, "grad_norm": 0.05348275601863861, "learning_rate": 0.00016323416895449843, "loss": 0.2579, "step": 13107 }, { "epoch": 1.061892417368762, "grad_norm": 0.042734403163194656, "learning_rate": 0.00016322966830190379, "loss": 0.3195, "step": 13108 }, { "epoch": 1.0619734283862605, "grad_norm": 0.04072289541363716, "learning_rate": 0.00016322516764930915, "loss": 0.3008, "step": 13109 }, { "epoch": 1.062054439403759, "grad_norm": 0.038316257297992706, "learning_rate": 0.00016322066699671453, "loss": 0.3334, "step": 13110 }, { "epoch": 1.0621354504212572, "grad_norm": 0.031170308589935303, "learning_rate": 0.00016321616634411992, "loss": 0.2796, "step": 13111 }, { "epoch": 1.0622164614387557, "grad_norm": 0.0355224534869194, "learning_rate": 0.00016321166569152528, "loss": 0.3164, "step": 13112 }, { "epoch": 1.062297472456254, "grad_norm": 0.038635775446891785, "learning_rate": 0.00016320716503893067, "loss": 0.3264, "step": 13113 }, { "epoch": 1.0623784834737524, "grad_norm": 0.03472799062728882, "learning_rate": 0.00016320266438633603, "loss": 0.3303, "step": 13114 }, { "epoch": 1.0624594944912509, "grad_norm": 0.033231720328330994, "learning_rate": 0.0001631981637337414, "loss": 0.2996, "step": 13115 }, { "epoch": 1.0625405055087491, "grad_norm": 0.03596806153655052, "learning_rate": 0.00016319366308114677, "loss": 0.3168, "step": 13116 }, { "epoch": 1.0626215165262476, "grad_norm": 0.03601103276014328, "learning_rate": 0.00016318916242855216, "loss": 0.2643, "step": 13117 }, { "epoch": 1.0627025275437458, "grad_norm": 0.03784925863146782, "learning_rate": 0.00016318466177595752, "loss": 0.2973, "step": 13118 }, { "epoch": 1.0627835385612443, "grad_norm": 0.038176700472831726, "learning_rate": 0.0001631801611233629, "loss": 0.3395, "step": 13119 }, { "epoch": 1.0628645495787428, "grad_norm": 0.03922443091869354, "learning_rate": 0.00016317566047076827, "loss": 0.3558, "step": 13120 }, { "epoch": 1.062945560596241, "grad_norm": 0.03958688676357269, "learning_rate": 0.00016317115981817363, "loss": 0.2903, "step": 13121 }, { "epoch": 1.0630265716137395, "grad_norm": 0.04546581208705902, "learning_rate": 0.00016316665916557902, "loss": 0.3679, "step": 13122 }, { "epoch": 1.063107582631238, "grad_norm": 0.04076612740755081, "learning_rate": 0.0001631621585129844, "loss": 0.2901, "step": 13123 }, { "epoch": 1.0631885936487362, "grad_norm": 0.030991926789283752, "learning_rate": 0.00016315765786038976, "loss": 0.2661, "step": 13124 }, { "epoch": 1.0632696046662347, "grad_norm": 0.040161989629268646, "learning_rate": 0.00016315315720779515, "loss": 0.3112, "step": 13125 }, { "epoch": 1.063350615683733, "grad_norm": 0.03585921600461006, "learning_rate": 0.0001631486565552005, "loss": 0.2859, "step": 13126 }, { "epoch": 1.0634316267012314, "grad_norm": 0.037038449198007584, "learning_rate": 0.00016314415590260587, "loss": 0.2949, "step": 13127 }, { "epoch": 1.0635126377187298, "grad_norm": 0.03819497674703598, "learning_rate": 0.00016313965525001126, "loss": 0.355, "step": 13128 }, { "epoch": 1.063593648736228, "grad_norm": 0.03675805404782295, "learning_rate": 0.00016313515459741664, "loss": 0.3155, "step": 13129 }, { "epoch": 1.0636746597537265, "grad_norm": 0.04112779349088669, "learning_rate": 0.000163130653944822, "loss": 0.3144, "step": 13130 }, { "epoch": 1.0637556707712248, "grad_norm": 0.04026306793093681, "learning_rate": 0.0001631261532922274, "loss": 0.3382, "step": 13131 }, { "epoch": 1.0638366817887233, "grad_norm": 0.037816621363162994, "learning_rate": 0.00016312165263963275, "loss": 0.3044, "step": 13132 }, { "epoch": 1.0639176928062217, "grad_norm": 0.04128459095954895, "learning_rate": 0.0001631171519870381, "loss": 0.3149, "step": 13133 }, { "epoch": 1.06399870382372, "grad_norm": 0.036820001900196075, "learning_rate": 0.0001631126513344435, "loss": 0.295, "step": 13134 }, { "epoch": 1.0640797148412184, "grad_norm": 0.040834393352270126, "learning_rate": 0.00016310815068184889, "loss": 0.3369, "step": 13135 }, { "epoch": 1.0641607258587167, "grad_norm": 0.03920666500926018, "learning_rate": 0.00016310365002925425, "loss": 0.3184, "step": 13136 }, { "epoch": 1.0642417368762151, "grad_norm": 0.04006076604127884, "learning_rate": 0.00016309914937665963, "loss": 0.3597, "step": 13137 }, { "epoch": 1.0643227478937136, "grad_norm": 0.03771907463669777, "learning_rate": 0.000163094648724065, "loss": 0.3003, "step": 13138 }, { "epoch": 1.0644037589112119, "grad_norm": 0.04066121578216553, "learning_rate": 0.00016309014807147035, "loss": 0.3441, "step": 13139 }, { "epoch": 1.0644847699287103, "grad_norm": 0.033965013921260834, "learning_rate": 0.00016308564741887574, "loss": 0.3123, "step": 13140 }, { "epoch": 1.0645657809462086, "grad_norm": 0.04005855694413185, "learning_rate": 0.00016308114676628113, "loss": 0.3413, "step": 13141 }, { "epoch": 1.064646791963707, "grad_norm": 0.03749069198966026, "learning_rate": 0.0001630766461136865, "loss": 0.2959, "step": 13142 }, { "epoch": 1.0647278029812055, "grad_norm": 0.04194750636816025, "learning_rate": 0.00016307214546109187, "loss": 0.3204, "step": 13143 }, { "epoch": 1.0648088139987038, "grad_norm": 0.035881157964468, "learning_rate": 0.00016306764480849723, "loss": 0.297, "step": 13144 }, { "epoch": 1.0648898250162022, "grad_norm": 0.03590167313814163, "learning_rate": 0.0001630631441559026, "loss": 0.2872, "step": 13145 }, { "epoch": 1.0649708360337007, "grad_norm": 0.04288012161850929, "learning_rate": 0.00016305864350330798, "loss": 0.3507, "step": 13146 }, { "epoch": 1.065051847051199, "grad_norm": 0.035566214472055435, "learning_rate": 0.00016305414285071337, "loss": 0.3073, "step": 13147 }, { "epoch": 1.0651328580686974, "grad_norm": 0.03152133896946907, "learning_rate": 0.00016304964219811873, "loss": 0.2696, "step": 13148 }, { "epoch": 1.0652138690861956, "grad_norm": 0.04102449119091034, "learning_rate": 0.00016304514154552412, "loss": 0.3524, "step": 13149 }, { "epoch": 1.065294880103694, "grad_norm": 0.03946152701973915, "learning_rate": 0.00016304064089292948, "loss": 0.3044, "step": 13150 }, { "epoch": 1.0653758911211926, "grad_norm": 0.04190444201231003, "learning_rate": 0.00016303614024033486, "loss": 0.3705, "step": 13151 }, { "epoch": 1.0654569021386908, "grad_norm": 0.03652665764093399, "learning_rate": 0.00016303163958774022, "loss": 0.3397, "step": 13152 }, { "epoch": 1.0655379131561893, "grad_norm": 0.04137907177209854, "learning_rate": 0.0001630271389351456, "loss": 0.3439, "step": 13153 }, { "epoch": 1.0656189241736875, "grad_norm": 0.03943207114934921, "learning_rate": 0.00016302263828255097, "loss": 0.2565, "step": 13154 }, { "epoch": 1.065699935191186, "grad_norm": 0.04126992076635361, "learning_rate": 0.00016301813762995636, "loss": 0.3527, "step": 13155 }, { "epoch": 1.0657809462086845, "grad_norm": 0.038512103259563446, "learning_rate": 0.00016301363697736172, "loss": 0.3271, "step": 13156 }, { "epoch": 1.0658619572261827, "grad_norm": 0.03877699747681618, "learning_rate": 0.0001630091363247671, "loss": 0.3409, "step": 13157 }, { "epoch": 1.0659429682436812, "grad_norm": 0.03787248954176903, "learning_rate": 0.00016300463567217247, "loss": 0.3627, "step": 13158 }, { "epoch": 1.0660239792611794, "grad_norm": 0.0397280678153038, "learning_rate": 0.00016300013501957785, "loss": 0.2851, "step": 13159 }, { "epoch": 1.0661049902786779, "grad_norm": 0.0416765958070755, "learning_rate": 0.0001629956343669832, "loss": 0.2748, "step": 13160 }, { "epoch": 1.0661860012961764, "grad_norm": 0.03804435208439827, "learning_rate": 0.0001629911337143886, "loss": 0.3328, "step": 13161 }, { "epoch": 1.0662670123136746, "grad_norm": 0.04016241803765297, "learning_rate": 0.00016298663306179396, "loss": 0.331, "step": 13162 }, { "epoch": 1.066348023331173, "grad_norm": 0.042689867317676544, "learning_rate": 0.00016298213240919935, "loss": 0.344, "step": 13163 }, { "epoch": 1.0664290343486713, "grad_norm": 0.03467041999101639, "learning_rate": 0.00016297763175660473, "loss": 0.2828, "step": 13164 }, { "epoch": 1.0665100453661698, "grad_norm": 0.03881874307990074, "learning_rate": 0.0001629731311040101, "loss": 0.3323, "step": 13165 }, { "epoch": 1.0665910563836682, "grad_norm": 0.04006616026163101, "learning_rate": 0.00016296863045141545, "loss": 0.3439, "step": 13166 }, { "epoch": 1.0666720674011665, "grad_norm": 0.04216201975941658, "learning_rate": 0.00016296412979882084, "loss": 0.3469, "step": 13167 }, { "epoch": 1.066753078418665, "grad_norm": 0.03559141233563423, "learning_rate": 0.0001629596291462262, "loss": 0.2801, "step": 13168 }, { "epoch": 1.0668340894361634, "grad_norm": 0.03447365388274193, "learning_rate": 0.0001629551284936316, "loss": 0.3118, "step": 13169 }, { "epoch": 1.0669151004536617, "grad_norm": 0.031914982944726944, "learning_rate": 0.00016295062784103698, "loss": 0.3054, "step": 13170 }, { "epoch": 1.0669961114711601, "grad_norm": 0.04312901198863983, "learning_rate": 0.00016294612718844234, "loss": 0.3305, "step": 13171 }, { "epoch": 1.0670771224886584, "grad_norm": 0.04013076052069664, "learning_rate": 0.0001629416265358477, "loss": 0.331, "step": 13172 }, { "epoch": 1.0671581335061568, "grad_norm": 0.04022202268242836, "learning_rate": 0.00016293712588325308, "loss": 0.3138, "step": 13173 }, { "epoch": 1.0672391445236553, "grad_norm": 0.036136429756879807, "learning_rate": 0.00016293262523065844, "loss": 0.3198, "step": 13174 }, { "epoch": 1.0673201555411536, "grad_norm": 0.0353563167154789, "learning_rate": 0.00016292812457806383, "loss": 0.2695, "step": 13175 }, { "epoch": 1.067401166558652, "grad_norm": 0.03855662792921066, "learning_rate": 0.00016292362392546922, "loss": 0.2931, "step": 13176 }, { "epoch": 1.0674821775761503, "grad_norm": 0.03579946234822273, "learning_rate": 0.00016291912327287458, "loss": 0.3143, "step": 13177 }, { "epoch": 1.0675631885936487, "grad_norm": 0.038776103407144547, "learning_rate": 0.00016291462262027994, "loss": 0.3107, "step": 13178 }, { "epoch": 1.0676441996111472, "grad_norm": 0.035354942083358765, "learning_rate": 0.00016291012196768532, "loss": 0.2964, "step": 13179 }, { "epoch": 1.0677252106286454, "grad_norm": 0.03727215155959129, "learning_rate": 0.0001629056213150907, "loss": 0.3202, "step": 13180 }, { "epoch": 1.067806221646144, "grad_norm": 0.041301924735307693, "learning_rate": 0.00016290112066249607, "loss": 0.3336, "step": 13181 }, { "epoch": 1.0678872326636422, "grad_norm": 0.03613514080643654, "learning_rate": 0.00016289662000990146, "loss": 0.3225, "step": 13182 }, { "epoch": 1.0679682436811406, "grad_norm": 0.040005408227443695, "learning_rate": 0.00016289211935730682, "loss": 0.3023, "step": 13183 }, { "epoch": 1.068049254698639, "grad_norm": 0.03439958021044731, "learning_rate": 0.00016288761870471218, "loss": 0.2818, "step": 13184 }, { "epoch": 1.0681302657161373, "grad_norm": 0.03871123865246773, "learning_rate": 0.00016288311805211757, "loss": 0.3091, "step": 13185 }, { "epoch": 1.0682112767336358, "grad_norm": 0.04212125390768051, "learning_rate": 0.00016287861739952295, "loss": 0.2852, "step": 13186 }, { "epoch": 1.068292287751134, "grad_norm": 0.03327667713165283, "learning_rate": 0.0001628741167469283, "loss": 0.2623, "step": 13187 }, { "epoch": 1.0683732987686325, "grad_norm": 0.04048386961221695, "learning_rate": 0.0001628696160943337, "loss": 0.3345, "step": 13188 }, { "epoch": 1.068454309786131, "grad_norm": 0.03479389473795891, "learning_rate": 0.00016286511544173906, "loss": 0.2986, "step": 13189 }, { "epoch": 1.0685353208036292, "grad_norm": 0.03682103753089905, "learning_rate": 0.00016286061478914442, "loss": 0.3061, "step": 13190 }, { "epoch": 1.0686163318211277, "grad_norm": 0.043244585394859314, "learning_rate": 0.0001628561141365498, "loss": 0.3194, "step": 13191 }, { "epoch": 1.0686973428386262, "grad_norm": 0.04426548257470131, "learning_rate": 0.0001628516134839552, "loss": 0.3121, "step": 13192 }, { "epoch": 1.0687783538561244, "grad_norm": 0.03656848147511482, "learning_rate": 0.00016284711283136056, "loss": 0.2951, "step": 13193 }, { "epoch": 1.0688593648736229, "grad_norm": 0.039186250418424606, "learning_rate": 0.00016284261217876594, "loss": 0.328, "step": 13194 }, { "epoch": 1.0689403758911211, "grad_norm": 0.041322581470012665, "learning_rate": 0.0001628381115261713, "loss": 0.3287, "step": 13195 }, { "epoch": 1.0690213869086196, "grad_norm": 0.03569335862994194, "learning_rate": 0.00016283361087357666, "loss": 0.2767, "step": 13196 }, { "epoch": 1.069102397926118, "grad_norm": 0.03692762553691864, "learning_rate": 0.00016282911022098205, "loss": 0.3011, "step": 13197 }, { "epoch": 1.0691834089436163, "grad_norm": 0.034691039472818375, "learning_rate": 0.00016282460956838744, "loss": 0.3094, "step": 13198 }, { "epoch": 1.0692644199611148, "grad_norm": 0.037673961371183395, "learning_rate": 0.0001628201089157928, "loss": 0.3295, "step": 13199 }, { "epoch": 1.069345430978613, "grad_norm": 0.03888675570487976, "learning_rate": 0.00016281560826319818, "loss": 0.3302, "step": 13200 }, { "epoch": 1.0694264419961115, "grad_norm": 0.0368889719247818, "learning_rate": 0.00016281110761060354, "loss": 0.3081, "step": 13201 }, { "epoch": 1.06950745301361, "grad_norm": 0.038887474685907364, "learning_rate": 0.0001628066069580089, "loss": 0.351, "step": 13202 }, { "epoch": 1.0695884640311082, "grad_norm": 0.03788851201534271, "learning_rate": 0.0001628021063054143, "loss": 0.3169, "step": 13203 }, { "epoch": 1.0696694750486067, "grad_norm": 0.04103012755513191, "learning_rate": 0.00016279760565281968, "loss": 0.3415, "step": 13204 }, { "epoch": 1.069750486066105, "grad_norm": 0.03659411892294884, "learning_rate": 0.00016279310500022504, "loss": 0.3414, "step": 13205 }, { "epoch": 1.0698314970836034, "grad_norm": 0.03779793530702591, "learning_rate": 0.00016278860434763043, "loss": 0.3365, "step": 13206 }, { "epoch": 1.0699125081011018, "grad_norm": 0.03485560789704323, "learning_rate": 0.00016278410369503579, "loss": 0.279, "step": 13207 }, { "epoch": 1.0699935191186, "grad_norm": 0.04025677591562271, "learning_rate": 0.00016277960304244115, "loss": 0.3753, "step": 13208 }, { "epoch": 1.0700745301360985, "grad_norm": 0.03404794633388519, "learning_rate": 0.00016277510238984653, "loss": 0.2857, "step": 13209 }, { "epoch": 1.0701555411535968, "grad_norm": 0.035707104951143265, "learning_rate": 0.00016277060173725192, "loss": 0.2874, "step": 13210 }, { "epoch": 1.0702365521710953, "grad_norm": 0.04319519177079201, "learning_rate": 0.00016276610108465728, "loss": 0.2729, "step": 13211 }, { "epoch": 1.0703175631885937, "grad_norm": 0.03902759775519371, "learning_rate": 0.00016276160043206267, "loss": 0.3145, "step": 13212 }, { "epoch": 1.070398574206092, "grad_norm": 0.03968610614538193, "learning_rate": 0.00016275709977946803, "loss": 0.3467, "step": 13213 }, { "epoch": 1.0704795852235904, "grad_norm": 0.035142820328474045, "learning_rate": 0.0001627525991268734, "loss": 0.3028, "step": 13214 }, { "epoch": 1.070560596241089, "grad_norm": 0.04717862606048584, "learning_rate": 0.00016274809847427877, "loss": 0.3768, "step": 13215 }, { "epoch": 1.0706416072585871, "grad_norm": 0.04234934598207474, "learning_rate": 0.00016274359782168416, "loss": 0.3106, "step": 13216 }, { "epoch": 1.0707226182760856, "grad_norm": 0.04084145277738571, "learning_rate": 0.00016273909716908952, "loss": 0.3159, "step": 13217 }, { "epoch": 1.0708036292935839, "grad_norm": 0.047838181257247925, "learning_rate": 0.0001627345965164949, "loss": 0.3782, "step": 13218 }, { "epoch": 1.0708846403110823, "grad_norm": 0.03364909067749977, "learning_rate": 0.00016273009586390027, "loss": 0.2973, "step": 13219 }, { "epoch": 1.0709656513285806, "grad_norm": 0.03884413465857506, "learning_rate": 0.00016272559521130563, "loss": 0.3182, "step": 13220 }, { "epoch": 1.071046662346079, "grad_norm": 0.04059368371963501, "learning_rate": 0.00016272109455871102, "loss": 0.3005, "step": 13221 }, { "epoch": 1.0711276733635775, "grad_norm": 0.03587368503212929, "learning_rate": 0.0001627165939061164, "loss": 0.3038, "step": 13222 }, { "epoch": 1.0712086843810757, "grad_norm": 0.04080083593726158, "learning_rate": 0.00016271209325352176, "loss": 0.336, "step": 13223 }, { "epoch": 1.0712896953985742, "grad_norm": 0.03729918226599693, "learning_rate": 0.00016270759260092715, "loss": 0.3002, "step": 13224 }, { "epoch": 1.0713707064160727, "grad_norm": 0.036567870527505875, "learning_rate": 0.0001627030919483325, "loss": 0.3065, "step": 13225 }, { "epoch": 1.071451717433571, "grad_norm": 0.04116319119930267, "learning_rate": 0.00016269859129573787, "loss": 0.3131, "step": 13226 }, { "epoch": 1.0715327284510694, "grad_norm": 0.03758919984102249, "learning_rate": 0.00016269409064314326, "loss": 0.3006, "step": 13227 }, { "epoch": 1.0716137394685676, "grad_norm": 0.039732519537210464, "learning_rate": 0.00016268958999054864, "loss": 0.32, "step": 13228 }, { "epoch": 1.071694750486066, "grad_norm": 0.04335511475801468, "learning_rate": 0.000162685089337954, "loss": 0.3034, "step": 13229 }, { "epoch": 1.0717757615035646, "grad_norm": 0.034147460013628006, "learning_rate": 0.0001626805886853594, "loss": 0.2718, "step": 13230 }, { "epoch": 1.0718567725210628, "grad_norm": 0.03723817691206932, "learning_rate": 0.00016267608803276475, "loss": 0.3082, "step": 13231 }, { "epoch": 1.0719377835385613, "grad_norm": 0.03524041175842285, "learning_rate": 0.00016267158738017014, "loss": 0.2836, "step": 13232 }, { "epoch": 1.0720187945560595, "grad_norm": 0.040226735174655914, "learning_rate": 0.00016266708672757553, "loss": 0.2881, "step": 13233 }, { "epoch": 1.072099805573558, "grad_norm": 0.033649519085884094, "learning_rate": 0.00016266258607498089, "loss": 0.2919, "step": 13234 }, { "epoch": 1.0721808165910565, "grad_norm": 0.03617114946246147, "learning_rate": 0.00016265808542238625, "loss": 0.2749, "step": 13235 }, { "epoch": 1.0722618276085547, "grad_norm": 0.03616161271929741, "learning_rate": 0.00016265358476979163, "loss": 0.2836, "step": 13236 }, { "epoch": 1.0723428386260532, "grad_norm": 0.0450529009103775, "learning_rate": 0.000162649084117197, "loss": 0.3569, "step": 13237 }, { "epoch": 1.0724238496435514, "grad_norm": 0.03892510384321213, "learning_rate": 0.00016264458346460238, "loss": 0.3192, "step": 13238 }, { "epoch": 1.0725048606610499, "grad_norm": 0.04008268192410469, "learning_rate": 0.00016264008281200777, "loss": 0.3062, "step": 13239 }, { "epoch": 1.0725858716785484, "grad_norm": 0.03673470765352249, "learning_rate": 0.00016263558215941313, "loss": 0.3093, "step": 13240 }, { "epoch": 1.0726668826960466, "grad_norm": 0.03691423684358597, "learning_rate": 0.0001626310815068185, "loss": 0.2929, "step": 13241 }, { "epoch": 1.072747893713545, "grad_norm": 0.03831551969051361, "learning_rate": 0.00016262658085422388, "loss": 0.2825, "step": 13242 }, { "epoch": 1.0728289047310433, "grad_norm": 0.036802027374506, "learning_rate": 0.00016262208020162924, "loss": 0.3217, "step": 13243 }, { "epoch": 1.0729099157485418, "grad_norm": 0.04076598584651947, "learning_rate": 0.00016261757954903462, "loss": 0.3348, "step": 13244 }, { "epoch": 1.0729909267660402, "grad_norm": 0.03657490387558937, "learning_rate": 0.00016261307889644, "loss": 0.3152, "step": 13245 }, { "epoch": 1.0730719377835385, "grad_norm": 0.03813399747014046, "learning_rate": 0.00016260857824384537, "loss": 0.307, "step": 13246 }, { "epoch": 1.073152948801037, "grad_norm": 0.03411310538649559, "learning_rate": 0.00016260407759125073, "loss": 0.3038, "step": 13247 }, { "epoch": 1.0732339598185354, "grad_norm": 0.032801553606987, "learning_rate": 0.00016259957693865612, "loss": 0.2617, "step": 13248 }, { "epoch": 1.0733149708360337, "grad_norm": 0.03492745757102966, "learning_rate": 0.00016259507628606148, "loss": 0.2922, "step": 13249 }, { "epoch": 1.0733959818535321, "grad_norm": 0.041702572256326675, "learning_rate": 0.00016259057563346686, "loss": 0.3483, "step": 13250 }, { "epoch": 1.0734769928710304, "grad_norm": 0.03449796885251999, "learning_rate": 0.00016258607498087225, "loss": 0.2842, "step": 13251 }, { "epoch": 1.0735580038885288, "grad_norm": 0.03720291703939438, "learning_rate": 0.0001625815743282776, "loss": 0.2815, "step": 13252 }, { "epoch": 1.0736390149060273, "grad_norm": 0.03792194649577141, "learning_rate": 0.00016257707367568297, "loss": 0.3124, "step": 13253 }, { "epoch": 1.0737200259235256, "grad_norm": 0.03389739990234375, "learning_rate": 0.00016257257302308836, "loss": 0.3081, "step": 13254 }, { "epoch": 1.073801036941024, "grad_norm": 0.0402398556470871, "learning_rate": 0.00016256807237049375, "loss": 0.32, "step": 13255 }, { "epoch": 1.0738820479585223, "grad_norm": 0.03836144134402275, "learning_rate": 0.0001625635717178991, "loss": 0.2938, "step": 13256 }, { "epoch": 1.0739630589760207, "grad_norm": 0.040930040180683136, "learning_rate": 0.0001625590710653045, "loss": 0.2935, "step": 13257 }, { "epoch": 1.0740440699935192, "grad_norm": 0.038079436868429184, "learning_rate": 0.00016255457041270985, "loss": 0.339, "step": 13258 }, { "epoch": 1.0741250810110174, "grad_norm": 0.03508659079670906, "learning_rate": 0.0001625500697601152, "loss": 0.2712, "step": 13259 }, { "epoch": 1.074206092028516, "grad_norm": 0.03754564747214317, "learning_rate": 0.0001625455691075206, "loss": 0.3105, "step": 13260 }, { "epoch": 1.0742871030460142, "grad_norm": 0.03899519145488739, "learning_rate": 0.000162541068454926, "loss": 0.3361, "step": 13261 }, { "epoch": 1.0743681140635126, "grad_norm": 0.043972063809633255, "learning_rate": 0.00016253656780233135, "loss": 0.346, "step": 13262 }, { "epoch": 1.074449125081011, "grad_norm": 0.03968114033341408, "learning_rate": 0.00016253206714973673, "loss": 0.2943, "step": 13263 }, { "epoch": 1.0745301360985093, "grad_norm": 0.042418111115694046, "learning_rate": 0.0001625275664971421, "loss": 0.3463, "step": 13264 }, { "epoch": 1.0746111471160078, "grad_norm": 0.04304884001612663, "learning_rate": 0.00016252306584454745, "loss": 0.3182, "step": 13265 }, { "epoch": 1.074692158133506, "grad_norm": 0.03539090231060982, "learning_rate": 0.00016251856519195284, "loss": 0.2934, "step": 13266 }, { "epoch": 1.0747731691510045, "grad_norm": 0.045006927102804184, "learning_rate": 0.00016251406453935823, "loss": 0.3262, "step": 13267 }, { "epoch": 1.074854180168503, "grad_norm": 0.036517515778541565, "learning_rate": 0.0001625095638867636, "loss": 0.3148, "step": 13268 }, { "epoch": 1.0749351911860012, "grad_norm": 0.04659212380647659, "learning_rate": 0.00016250506323416898, "loss": 0.3616, "step": 13269 }, { "epoch": 1.0750162022034997, "grad_norm": 0.03476553037762642, "learning_rate": 0.00016250056258157434, "loss": 0.2881, "step": 13270 }, { "epoch": 1.0750972132209982, "grad_norm": 0.035790711641311646, "learning_rate": 0.0001624960619289797, "loss": 0.3192, "step": 13271 }, { "epoch": 1.0751782242384964, "grad_norm": 0.039025261998176575, "learning_rate": 0.00016249156127638508, "loss": 0.3133, "step": 13272 }, { "epoch": 1.0752592352559949, "grad_norm": 0.032551951706409454, "learning_rate": 0.00016248706062379047, "loss": 0.2713, "step": 13273 }, { "epoch": 1.0753402462734931, "grad_norm": 0.03767121210694313, "learning_rate": 0.00016248255997119583, "loss": 0.3512, "step": 13274 }, { "epoch": 1.0754212572909916, "grad_norm": 0.03417384251952171, "learning_rate": 0.00016247805931860122, "loss": 0.3119, "step": 13275 }, { "epoch": 1.07550226830849, "grad_norm": 0.0347181111574173, "learning_rate": 0.00016247355866600658, "loss": 0.3036, "step": 13276 }, { "epoch": 1.0755832793259883, "grad_norm": 0.03645511344075203, "learning_rate": 0.00016246905801341194, "loss": 0.3242, "step": 13277 }, { "epoch": 1.0756642903434868, "grad_norm": 0.043344851583242416, "learning_rate": 0.00016246455736081732, "loss": 0.3296, "step": 13278 }, { "epoch": 1.075745301360985, "grad_norm": 0.03987930715084076, "learning_rate": 0.0001624600567082227, "loss": 0.3501, "step": 13279 }, { "epoch": 1.0758263123784835, "grad_norm": 0.03823409229516983, "learning_rate": 0.00016245555605562807, "loss": 0.2972, "step": 13280 }, { "epoch": 1.075907323395982, "grad_norm": 0.041413746774196625, "learning_rate": 0.00016245105540303346, "loss": 0.3113, "step": 13281 }, { "epoch": 1.0759883344134802, "grad_norm": 0.04083913937211037, "learning_rate": 0.00016244655475043882, "loss": 0.3037, "step": 13282 }, { "epoch": 1.0760693454309787, "grad_norm": 0.03827805444598198, "learning_rate": 0.00016244205409784418, "loss": 0.3334, "step": 13283 }, { "epoch": 1.076150356448477, "grad_norm": 0.03913137689232826, "learning_rate": 0.00016243755344524957, "loss": 0.2937, "step": 13284 }, { "epoch": 1.0762313674659754, "grad_norm": 0.03928875923156738, "learning_rate": 0.00016243305279265495, "loss": 0.3421, "step": 13285 }, { "epoch": 1.0763123784834738, "grad_norm": 0.037695445120334625, "learning_rate": 0.00016242855214006031, "loss": 0.3135, "step": 13286 }, { "epoch": 1.076393389500972, "grad_norm": 0.03602603077888489, "learning_rate": 0.0001624240514874657, "loss": 0.3054, "step": 13287 }, { "epoch": 1.0764744005184705, "grad_norm": 0.038182325661182404, "learning_rate": 0.00016241955083487106, "loss": 0.3499, "step": 13288 }, { "epoch": 1.0765554115359688, "grad_norm": 0.0474996492266655, "learning_rate": 0.00016241505018227642, "loss": 0.3918, "step": 13289 }, { "epoch": 1.0766364225534673, "grad_norm": 0.04044167697429657, "learning_rate": 0.0001624105495296818, "loss": 0.3046, "step": 13290 }, { "epoch": 1.0767174335709657, "grad_norm": 0.03613236919045448, "learning_rate": 0.0001624060488770872, "loss": 0.2835, "step": 13291 }, { "epoch": 1.076798444588464, "grad_norm": 0.03918663412332535, "learning_rate": 0.00016240154822449256, "loss": 0.2991, "step": 13292 }, { "epoch": 1.0768794556059624, "grad_norm": 0.043039511889219284, "learning_rate": 0.00016239704757189794, "loss": 0.306, "step": 13293 }, { "epoch": 1.076960466623461, "grad_norm": 0.041325464844703674, "learning_rate": 0.0001623925469193033, "loss": 0.3253, "step": 13294 }, { "epoch": 1.0770414776409591, "grad_norm": 0.038608718663454056, "learning_rate": 0.00016238804626670866, "loss": 0.3157, "step": 13295 }, { "epoch": 1.0771224886584576, "grad_norm": 0.04200710356235504, "learning_rate": 0.00016238354561411405, "loss": 0.3496, "step": 13296 }, { "epoch": 1.0772034996759559, "grad_norm": 0.039992254227399826, "learning_rate": 0.00016237904496151944, "loss": 0.3241, "step": 13297 }, { "epoch": 1.0772845106934543, "grad_norm": 0.03987909108400345, "learning_rate": 0.0001623745443089248, "loss": 0.3189, "step": 13298 }, { "epoch": 1.0773655217109528, "grad_norm": 0.04262173920869827, "learning_rate": 0.00016237004365633018, "loss": 0.3386, "step": 13299 }, { "epoch": 1.077446532728451, "grad_norm": 0.03864779695868492, "learning_rate": 0.00016236554300373554, "loss": 0.3103, "step": 13300 }, { "epoch": 1.0775275437459495, "grad_norm": 0.03877343609929085, "learning_rate": 0.0001623610423511409, "loss": 0.3345, "step": 13301 }, { "epoch": 1.0776085547634477, "grad_norm": 0.04017069935798645, "learning_rate": 0.00016235654169854632, "loss": 0.3255, "step": 13302 }, { "epoch": 1.0776895657809462, "grad_norm": 0.0396341048181057, "learning_rate": 0.00016235204104595168, "loss": 0.3445, "step": 13303 }, { "epoch": 1.0777705767984447, "grad_norm": 0.03865274041891098, "learning_rate": 0.00016234754039335704, "loss": 0.2811, "step": 13304 }, { "epoch": 1.077851587815943, "grad_norm": 0.03512314707040787, "learning_rate": 0.00016234303974076243, "loss": 0.2475, "step": 13305 }, { "epoch": 1.0779325988334414, "grad_norm": 0.04257293790578842, "learning_rate": 0.00016233853908816779, "loss": 0.3237, "step": 13306 }, { "epoch": 1.0780136098509396, "grad_norm": 0.03783601522445679, "learning_rate": 0.00016233403843557317, "loss": 0.2938, "step": 13307 }, { "epoch": 1.078094620868438, "grad_norm": 0.03904139623045921, "learning_rate": 0.00016232953778297856, "loss": 0.2985, "step": 13308 }, { "epoch": 1.0781756318859366, "grad_norm": 0.037982627749443054, "learning_rate": 0.00016232503713038392, "loss": 0.2701, "step": 13309 }, { "epoch": 1.0782566429034348, "grad_norm": 0.03868936002254486, "learning_rate": 0.00016232053647778928, "loss": 0.2822, "step": 13310 }, { "epoch": 1.0783376539209333, "grad_norm": 0.044151484966278076, "learning_rate": 0.00016231603582519467, "loss": 0.3327, "step": 13311 }, { "epoch": 1.0784186649384315, "grad_norm": 0.04237937554717064, "learning_rate": 0.00016231153517260003, "loss": 0.3746, "step": 13312 }, { "epoch": 1.07849967595593, "grad_norm": 0.041088610887527466, "learning_rate": 0.00016230703452000541, "loss": 0.3689, "step": 13313 }, { "epoch": 1.0785806869734285, "grad_norm": 0.039148733019828796, "learning_rate": 0.0001623025338674108, "loss": 0.3415, "step": 13314 }, { "epoch": 1.0786616979909267, "grad_norm": 0.04008744657039642, "learning_rate": 0.00016229803321481616, "loss": 0.3418, "step": 13315 }, { "epoch": 1.0787427090084252, "grad_norm": 0.03863966092467308, "learning_rate": 0.00016229353256222152, "loss": 0.3719, "step": 13316 }, { "epoch": 1.0788237200259236, "grad_norm": 0.04270761087536812, "learning_rate": 0.0001622890319096269, "loss": 0.338, "step": 13317 }, { "epoch": 1.0789047310434219, "grad_norm": 0.03537660092115402, "learning_rate": 0.00016228453125703227, "loss": 0.3262, "step": 13318 }, { "epoch": 1.0789857420609203, "grad_norm": 0.04052428528666496, "learning_rate": 0.00016228003060443766, "loss": 0.3001, "step": 13319 }, { "epoch": 1.0790667530784186, "grad_norm": 0.03691032528877258, "learning_rate": 0.00016227552995184304, "loss": 0.3051, "step": 13320 }, { "epoch": 1.079147764095917, "grad_norm": 0.04182439669966698, "learning_rate": 0.0001622710292992484, "loss": 0.3253, "step": 13321 }, { "epoch": 1.0792287751134155, "grad_norm": 0.04117942601442337, "learning_rate": 0.00016226652864665376, "loss": 0.3312, "step": 13322 }, { "epoch": 1.0793097861309138, "grad_norm": 0.03158815950155258, "learning_rate": 0.00016226202799405915, "loss": 0.2854, "step": 13323 }, { "epoch": 1.0793907971484122, "grad_norm": 0.04201388731598854, "learning_rate": 0.0001622575273414645, "loss": 0.341, "step": 13324 }, { "epoch": 1.0794718081659105, "grad_norm": 0.033538252115249634, "learning_rate": 0.0001622530266888699, "loss": 0.2677, "step": 13325 }, { "epoch": 1.079552819183409, "grad_norm": 0.03603677079081535, "learning_rate": 0.00016224852603627528, "loss": 0.2832, "step": 13326 }, { "epoch": 1.0796338302009074, "grad_norm": 0.04016566649079323, "learning_rate": 0.00016224402538368065, "loss": 0.2885, "step": 13327 }, { "epoch": 1.0797148412184057, "grad_norm": 0.031220566481351852, "learning_rate": 0.000162239524731086, "loss": 0.262, "step": 13328 }, { "epoch": 1.0797958522359041, "grad_norm": 0.044721174985170364, "learning_rate": 0.0001622350240784914, "loss": 0.3727, "step": 13329 }, { "epoch": 1.0798768632534024, "grad_norm": 0.034470535814762115, "learning_rate": 0.00016223052342589675, "loss": 0.291, "step": 13330 }, { "epoch": 1.0799578742709008, "grad_norm": 0.04218203201889992, "learning_rate": 0.00016222602277330214, "loss": 0.3315, "step": 13331 }, { "epoch": 1.0800388852883993, "grad_norm": 0.04010971263051033, "learning_rate": 0.00016222152212070753, "loss": 0.3192, "step": 13332 }, { "epoch": 1.0801198963058976, "grad_norm": 0.03233395144343376, "learning_rate": 0.0001622170214681129, "loss": 0.3063, "step": 13333 }, { "epoch": 1.080200907323396, "grad_norm": 0.037743669003248215, "learning_rate": 0.00016221252081551825, "loss": 0.3124, "step": 13334 }, { "epoch": 1.0802819183408943, "grad_norm": 0.03384163975715637, "learning_rate": 0.00016220802016292363, "loss": 0.2798, "step": 13335 }, { "epoch": 1.0803629293583927, "grad_norm": 0.0341823548078537, "learning_rate": 0.00016220351951032902, "loss": 0.2978, "step": 13336 }, { "epoch": 1.0804439403758912, "grad_norm": 0.03492417186498642, "learning_rate": 0.00016219901885773438, "loss": 0.2919, "step": 13337 }, { "epoch": 1.0805249513933894, "grad_norm": 0.03947907313704491, "learning_rate": 0.00016219451820513977, "loss": 0.3078, "step": 13338 }, { "epoch": 1.080605962410888, "grad_norm": 0.03812957927584648, "learning_rate": 0.00016219001755254513, "loss": 0.3157, "step": 13339 }, { "epoch": 1.0806869734283864, "grad_norm": 0.04574564844369888, "learning_rate": 0.0001621855168999505, "loss": 0.313, "step": 13340 }, { "epoch": 1.0807679844458846, "grad_norm": 0.043251559138298035, "learning_rate": 0.00016218101624735588, "loss": 0.2827, "step": 13341 }, { "epoch": 1.080848995463383, "grad_norm": 0.04355183616280556, "learning_rate": 0.00016217651559476126, "loss": 0.3519, "step": 13342 }, { "epoch": 1.0809300064808813, "grad_norm": 0.039647556841373444, "learning_rate": 0.00016217201494216662, "loss": 0.2988, "step": 13343 }, { "epoch": 1.0810110174983798, "grad_norm": 0.03978102281689644, "learning_rate": 0.000162167514289572, "loss": 0.3303, "step": 13344 }, { "epoch": 1.081092028515878, "grad_norm": 0.03419504687190056, "learning_rate": 0.00016216301363697737, "loss": 0.3003, "step": 13345 }, { "epoch": 1.0811730395333765, "grad_norm": 0.03994619846343994, "learning_rate": 0.00016215851298438273, "loss": 0.3025, "step": 13346 }, { "epoch": 1.081254050550875, "grad_norm": 0.04877599701285362, "learning_rate": 0.00016215401233178812, "loss": 0.3348, "step": 13347 }, { "epoch": 1.0813350615683732, "grad_norm": 0.03815867006778717, "learning_rate": 0.0001621495116791935, "loss": 0.2887, "step": 13348 }, { "epoch": 1.0814160725858717, "grad_norm": 0.04251018539071083, "learning_rate": 0.00016214501102659886, "loss": 0.2907, "step": 13349 }, { "epoch": 1.0814970836033702, "grad_norm": 0.038885097950696945, "learning_rate": 0.00016214051037400425, "loss": 0.3316, "step": 13350 }, { "epoch": 1.0815780946208684, "grad_norm": 0.039103202521800995, "learning_rate": 0.0001621360097214096, "loss": 0.3476, "step": 13351 }, { "epoch": 1.0816591056383669, "grad_norm": 0.036649372428655624, "learning_rate": 0.00016213150906881497, "loss": 0.3336, "step": 13352 }, { "epoch": 1.0817401166558651, "grad_norm": 0.03846638649702072, "learning_rate": 0.00016212700841622036, "loss": 0.2873, "step": 13353 }, { "epoch": 1.0818211276733636, "grad_norm": 0.03785313665866852, "learning_rate": 0.00016212250776362575, "loss": 0.3242, "step": 13354 }, { "epoch": 1.081902138690862, "grad_norm": 0.04096831753849983, "learning_rate": 0.0001621180071110311, "loss": 0.3688, "step": 13355 }, { "epoch": 1.0819831497083603, "grad_norm": 0.03899220749735832, "learning_rate": 0.0001621135064584365, "loss": 0.3206, "step": 13356 }, { "epoch": 1.0820641607258588, "grad_norm": 0.031790584325790405, "learning_rate": 0.00016210900580584185, "loss": 0.2658, "step": 13357 }, { "epoch": 1.082145171743357, "grad_norm": 0.043712154030799866, "learning_rate": 0.0001621045051532472, "loss": 0.3022, "step": 13358 }, { "epoch": 1.0822261827608555, "grad_norm": 0.03479669988155365, "learning_rate": 0.0001621000045006526, "loss": 0.34, "step": 13359 }, { "epoch": 1.082307193778354, "grad_norm": 0.04478292167186737, "learning_rate": 0.000162095503848058, "loss": 0.3535, "step": 13360 }, { "epoch": 1.0823882047958522, "grad_norm": 0.03777686879038811, "learning_rate": 0.00016209100319546335, "loss": 0.3142, "step": 13361 }, { "epoch": 1.0824692158133506, "grad_norm": 0.041430700570344925, "learning_rate": 0.00016208650254286873, "loss": 0.3208, "step": 13362 }, { "epoch": 1.082550226830849, "grad_norm": 0.04191647469997406, "learning_rate": 0.0001620820018902741, "loss": 0.3726, "step": 13363 }, { "epoch": 1.0826312378483474, "grad_norm": 0.03765102103352547, "learning_rate": 0.00016207750123767945, "loss": 0.2909, "step": 13364 }, { "epoch": 1.0827122488658458, "grad_norm": 0.03670511394739151, "learning_rate": 0.00016207300058508484, "loss": 0.3226, "step": 13365 }, { "epoch": 1.082793259883344, "grad_norm": 0.03823100030422211, "learning_rate": 0.00016206849993249023, "loss": 0.294, "step": 13366 }, { "epoch": 1.0828742709008425, "grad_norm": 0.04215855523943901, "learning_rate": 0.0001620639992798956, "loss": 0.2575, "step": 13367 }, { "epoch": 1.0829552819183408, "grad_norm": 0.037356503307819366, "learning_rate": 0.00016205949862730098, "loss": 0.3122, "step": 13368 }, { "epoch": 1.0830362929358393, "grad_norm": 0.03582943230867386, "learning_rate": 0.00016205499797470634, "loss": 0.2976, "step": 13369 }, { "epoch": 1.0831173039533377, "grad_norm": 0.03671623393893242, "learning_rate": 0.0001620504973221117, "loss": 0.3179, "step": 13370 }, { "epoch": 1.083198314970836, "grad_norm": 0.04530152678489685, "learning_rate": 0.0001620459966695171, "loss": 0.3153, "step": 13371 }, { "epoch": 1.0832793259883344, "grad_norm": 0.04155336320400238, "learning_rate": 0.00016204149601692247, "loss": 0.3517, "step": 13372 }, { "epoch": 1.083360337005833, "grad_norm": 0.036439746618270874, "learning_rate": 0.00016203699536432783, "loss": 0.2897, "step": 13373 }, { "epoch": 1.0834413480233311, "grad_norm": 0.03697358816862106, "learning_rate": 0.00016203249471173322, "loss": 0.2568, "step": 13374 }, { "epoch": 1.0835223590408296, "grad_norm": 0.04164021089673042, "learning_rate": 0.00016202799405913858, "loss": 0.3273, "step": 13375 }, { "epoch": 1.0836033700583279, "grad_norm": 0.03787509724497795, "learning_rate": 0.00016202349340654394, "loss": 0.3166, "step": 13376 }, { "epoch": 1.0836843810758263, "grad_norm": 0.0382017083466053, "learning_rate": 0.00016201899275394935, "loss": 0.2994, "step": 13377 }, { "epoch": 1.0837653920933248, "grad_norm": 0.03486346825957298, "learning_rate": 0.0001620144921013547, "loss": 0.2928, "step": 13378 }, { "epoch": 1.083846403110823, "grad_norm": 0.041407354176044464, "learning_rate": 0.00016200999144876007, "loss": 0.3383, "step": 13379 }, { "epoch": 1.0839274141283215, "grad_norm": 0.03394337743520737, "learning_rate": 0.00016200549079616546, "loss": 0.3089, "step": 13380 }, { "epoch": 1.0840084251458197, "grad_norm": 0.030327385291457176, "learning_rate": 0.00016200099014357082, "loss": 0.2834, "step": 13381 }, { "epoch": 1.0840894361633182, "grad_norm": 0.03304734453558922, "learning_rate": 0.00016199648949097618, "loss": 0.3094, "step": 13382 }, { "epoch": 1.0841704471808167, "grad_norm": 0.03882576897740364, "learning_rate": 0.0001619919888383816, "loss": 0.2942, "step": 13383 }, { "epoch": 1.084251458198315, "grad_norm": 0.03737260773777962, "learning_rate": 0.00016198748818578695, "loss": 0.301, "step": 13384 }, { "epoch": 1.0843324692158134, "grad_norm": 0.036476656794548035, "learning_rate": 0.00016198298753319231, "loss": 0.3604, "step": 13385 }, { "epoch": 1.0844134802333116, "grad_norm": 0.042435936629772186, "learning_rate": 0.0001619784868805977, "loss": 0.3552, "step": 13386 }, { "epoch": 1.08449449125081, "grad_norm": 0.04005247727036476, "learning_rate": 0.00016197398622800306, "loss": 0.2998, "step": 13387 }, { "epoch": 1.0845755022683086, "grad_norm": 0.035631150007247925, "learning_rate": 0.00016196948557540845, "loss": 0.2897, "step": 13388 }, { "epoch": 1.0846565132858068, "grad_norm": 0.037405405193567276, "learning_rate": 0.00016196498492281384, "loss": 0.351, "step": 13389 }, { "epoch": 1.0847375243033053, "grad_norm": 0.035022035241127014, "learning_rate": 0.0001619604842702192, "loss": 0.2817, "step": 13390 }, { "epoch": 1.0848185353208035, "grad_norm": 0.03630216792225838, "learning_rate": 0.00016195598361762456, "loss": 0.2864, "step": 13391 }, { "epoch": 1.084899546338302, "grad_norm": 0.03898398578166962, "learning_rate": 0.00016195148296502994, "loss": 0.3124, "step": 13392 }, { "epoch": 1.0849805573558005, "grad_norm": 0.04213237389922142, "learning_rate": 0.0001619469823124353, "loss": 0.3074, "step": 13393 }, { "epoch": 1.0850615683732987, "grad_norm": 0.04674211144447327, "learning_rate": 0.0001619424816598407, "loss": 0.3913, "step": 13394 }, { "epoch": 1.0851425793907972, "grad_norm": 0.04056910052895546, "learning_rate": 0.00016193798100724608, "loss": 0.3173, "step": 13395 }, { "epoch": 1.0852235904082956, "grad_norm": 0.038967233151197433, "learning_rate": 0.00016193348035465144, "loss": 0.3144, "step": 13396 }, { "epoch": 1.0853046014257939, "grad_norm": 0.04120781272649765, "learning_rate": 0.0001619289797020568, "loss": 0.3032, "step": 13397 }, { "epoch": 1.0853856124432923, "grad_norm": 0.04656161367893219, "learning_rate": 0.00016192447904946218, "loss": 0.3592, "step": 13398 }, { "epoch": 1.0854666234607906, "grad_norm": 0.047699473798274994, "learning_rate": 0.00016191997839686754, "loss": 0.3621, "step": 13399 }, { "epoch": 1.085547634478289, "grad_norm": 0.051550593227148056, "learning_rate": 0.00016191547774427293, "loss": 0.3737, "step": 13400 }, { "epoch": 1.0856286454957875, "grad_norm": 0.0335693359375, "learning_rate": 0.00016191097709167832, "loss": 0.2983, "step": 13401 }, { "epoch": 1.0857096565132858, "grad_norm": 0.03921520337462425, "learning_rate": 0.00016190647643908368, "loss": 0.312, "step": 13402 }, { "epoch": 1.0857906675307842, "grad_norm": 0.040765002369880676, "learning_rate": 0.00016190197578648904, "loss": 0.3251, "step": 13403 }, { "epoch": 1.0858716785482825, "grad_norm": 0.04020378366112709, "learning_rate": 0.00016189747513389443, "loss": 0.3174, "step": 13404 }, { "epoch": 1.085952689565781, "grad_norm": 0.035458121448755264, "learning_rate": 0.00016189297448129979, "loss": 0.2916, "step": 13405 }, { "epoch": 1.0860337005832794, "grad_norm": 0.03757420554757118, "learning_rate": 0.00016188847382870517, "loss": 0.3191, "step": 13406 }, { "epoch": 1.0861147116007777, "grad_norm": 0.037379421293735504, "learning_rate": 0.00016188397317611056, "loss": 0.3095, "step": 13407 }, { "epoch": 1.0861957226182761, "grad_norm": 0.039359867572784424, "learning_rate": 0.00016187947252351592, "loss": 0.2978, "step": 13408 }, { "epoch": 1.0862767336357744, "grad_norm": 0.034363653510808945, "learning_rate": 0.00016187497187092128, "loss": 0.2862, "step": 13409 }, { "epoch": 1.0863577446532728, "grad_norm": 0.04078854247927666, "learning_rate": 0.00016187047121832667, "loss": 0.3244, "step": 13410 }, { "epoch": 1.0864387556707713, "grad_norm": 0.04132116585969925, "learning_rate": 0.00016186597056573203, "loss": 0.3263, "step": 13411 }, { "epoch": 1.0865197666882696, "grad_norm": 0.03711467981338501, "learning_rate": 0.00016186146991313741, "loss": 0.2796, "step": 13412 }, { "epoch": 1.086600777705768, "grad_norm": 0.04074230045080185, "learning_rate": 0.0001618569692605428, "loss": 0.3382, "step": 13413 }, { "epoch": 1.0866817887232663, "grad_norm": 0.03637443482875824, "learning_rate": 0.00016185246860794816, "loss": 0.2998, "step": 13414 }, { "epoch": 1.0867627997407647, "grad_norm": 0.03684492036700249, "learning_rate": 0.00016184796795535352, "loss": 0.2826, "step": 13415 }, { "epoch": 1.0868438107582632, "grad_norm": 0.035718128085136414, "learning_rate": 0.0001618434673027589, "loss": 0.3025, "step": 13416 }, { "epoch": 1.0869248217757614, "grad_norm": 0.03895338252186775, "learning_rate": 0.0001618389666501643, "loss": 0.3109, "step": 13417 }, { "epoch": 1.08700583279326, "grad_norm": 0.04166723042726517, "learning_rate": 0.00016183446599756966, "loss": 0.3203, "step": 13418 }, { "epoch": 1.0870868438107584, "grad_norm": 0.04138009995222092, "learning_rate": 0.00016182996534497504, "loss": 0.316, "step": 13419 }, { "epoch": 1.0871678548282566, "grad_norm": 0.04135255515575409, "learning_rate": 0.0001618254646923804, "loss": 0.3207, "step": 13420 }, { "epoch": 1.087248865845755, "grad_norm": 0.03714308515191078, "learning_rate": 0.00016182096403978576, "loss": 0.2905, "step": 13421 }, { "epoch": 1.0873298768632533, "grad_norm": 0.04605689272284508, "learning_rate": 0.00016181646338719115, "loss": 0.3506, "step": 13422 }, { "epoch": 1.0874108878807518, "grad_norm": 0.03826843202114105, "learning_rate": 0.00016181196273459654, "loss": 0.3247, "step": 13423 }, { "epoch": 1.0874918988982503, "grad_norm": 0.04529668018221855, "learning_rate": 0.0001618074620820019, "loss": 0.321, "step": 13424 }, { "epoch": 1.0875729099157485, "grad_norm": 0.03837461769580841, "learning_rate": 0.00016180296142940729, "loss": 0.314, "step": 13425 }, { "epoch": 1.087653920933247, "grad_norm": 0.04195813834667206, "learning_rate": 0.00016179846077681265, "loss": 0.3204, "step": 13426 }, { "epoch": 1.0877349319507452, "grad_norm": 0.03744436800479889, "learning_rate": 0.000161793960124218, "loss": 0.3355, "step": 13427 }, { "epoch": 1.0878159429682437, "grad_norm": 0.04252445697784424, "learning_rate": 0.0001617894594716234, "loss": 0.3436, "step": 13428 }, { "epoch": 1.0878969539857422, "grad_norm": 0.03918018937110901, "learning_rate": 0.00016178495881902878, "loss": 0.316, "step": 13429 }, { "epoch": 1.0879779650032404, "grad_norm": 0.036307670176029205, "learning_rate": 0.00016178045816643414, "loss": 0.2882, "step": 13430 }, { "epoch": 1.0880589760207389, "grad_norm": 0.03624377399682999, "learning_rate": 0.00016177595751383953, "loss": 0.2911, "step": 13431 }, { "epoch": 1.088139987038237, "grad_norm": 0.044146664440631866, "learning_rate": 0.0001617714568612449, "loss": 0.3389, "step": 13432 }, { "epoch": 1.0882209980557356, "grad_norm": 0.0352458581328392, "learning_rate": 0.00016176695620865025, "loss": 0.2871, "step": 13433 }, { "epoch": 1.088302009073234, "grad_norm": 0.038899607956409454, "learning_rate": 0.00016176245555605563, "loss": 0.2902, "step": 13434 }, { "epoch": 1.0883830200907323, "grad_norm": 0.037828993052244186, "learning_rate": 0.00016175795490346102, "loss": 0.3268, "step": 13435 }, { "epoch": 1.0884640311082308, "grad_norm": 0.03922426328063011, "learning_rate": 0.00016175345425086638, "loss": 0.3154, "step": 13436 }, { "epoch": 1.088545042125729, "grad_norm": 0.037640493363142014, "learning_rate": 0.00016174895359827177, "loss": 0.325, "step": 13437 }, { "epoch": 1.0886260531432275, "grad_norm": 0.04123581945896149, "learning_rate": 0.00016174445294567713, "loss": 0.3023, "step": 13438 }, { "epoch": 1.088707064160726, "grad_norm": 0.038537222892045975, "learning_rate": 0.0001617399522930825, "loss": 0.2977, "step": 13439 }, { "epoch": 1.0887880751782242, "grad_norm": 0.037821464240550995, "learning_rate": 0.0001617354516404879, "loss": 0.3106, "step": 13440 }, { "epoch": 1.0888690861957226, "grad_norm": 0.035032059997320175, "learning_rate": 0.00016173095098789326, "loss": 0.3369, "step": 13441 }, { "epoch": 1.0889500972132211, "grad_norm": 0.037530556321144104, "learning_rate": 0.00016172645033529862, "loss": 0.3109, "step": 13442 }, { "epoch": 1.0890311082307194, "grad_norm": 0.042946796864271164, "learning_rate": 0.000161721949682704, "loss": 0.3547, "step": 13443 }, { "epoch": 1.0891121192482178, "grad_norm": 0.0371532179415226, "learning_rate": 0.00016171744903010937, "loss": 0.3108, "step": 13444 }, { "epoch": 1.089193130265716, "grad_norm": 0.03632621839642525, "learning_rate": 0.00016171294837751473, "loss": 0.3438, "step": 13445 }, { "epoch": 1.0892741412832145, "grad_norm": 0.039438121020793915, "learning_rate": 0.00016170844772492014, "loss": 0.3297, "step": 13446 }, { "epoch": 1.0893551523007128, "grad_norm": 0.03756951540708542, "learning_rate": 0.0001617039470723255, "loss": 0.2914, "step": 13447 }, { "epoch": 1.0894361633182112, "grad_norm": 0.0482478141784668, "learning_rate": 0.00016169944641973086, "loss": 0.3197, "step": 13448 }, { "epoch": 1.0895171743357097, "grad_norm": 0.03478642553091049, "learning_rate": 0.00016169494576713625, "loss": 0.3024, "step": 13449 }, { "epoch": 1.089598185353208, "grad_norm": 0.04219109192490578, "learning_rate": 0.0001616904451145416, "loss": 0.3742, "step": 13450 }, { "epoch": 1.0896791963707064, "grad_norm": 0.04149060696363449, "learning_rate": 0.00016168594446194697, "loss": 0.3444, "step": 13451 }, { "epoch": 1.089760207388205, "grad_norm": 0.04618578031659126, "learning_rate": 0.00016168144380935239, "loss": 0.328, "step": 13452 }, { "epoch": 1.0898412184057031, "grad_norm": 0.04148811101913452, "learning_rate": 0.00016167694315675775, "loss": 0.3284, "step": 13453 }, { "epoch": 1.0899222294232016, "grad_norm": 0.03678377345204353, "learning_rate": 0.0001616724425041631, "loss": 0.3237, "step": 13454 }, { "epoch": 1.0900032404406998, "grad_norm": 0.039635706692934036, "learning_rate": 0.0001616679418515685, "loss": 0.3288, "step": 13455 }, { "epoch": 1.0900842514581983, "grad_norm": 0.03519574925303459, "learning_rate": 0.00016166344119897385, "loss": 0.2836, "step": 13456 }, { "epoch": 1.0901652624756968, "grad_norm": 0.032641809433698654, "learning_rate": 0.0001616589405463792, "loss": 0.2589, "step": 13457 }, { "epoch": 1.090246273493195, "grad_norm": 0.037794966250658035, "learning_rate": 0.00016165443989378463, "loss": 0.297, "step": 13458 }, { "epoch": 1.0903272845106935, "grad_norm": 0.031619079411029816, "learning_rate": 0.00016164993924119, "loss": 0.2983, "step": 13459 }, { "epoch": 1.0904082955281917, "grad_norm": 0.03748596832156181, "learning_rate": 0.00016164543858859535, "loss": 0.309, "step": 13460 }, { "epoch": 1.0904893065456902, "grad_norm": 0.039078161120414734, "learning_rate": 0.00016164093793600073, "loss": 0.2891, "step": 13461 }, { "epoch": 1.0905703175631887, "grad_norm": 0.03955300524830818, "learning_rate": 0.0001616364372834061, "loss": 0.3721, "step": 13462 }, { "epoch": 1.090651328580687, "grad_norm": 0.044077832251787186, "learning_rate": 0.00016163193663081146, "loss": 0.3641, "step": 13463 }, { "epoch": 1.0907323395981854, "grad_norm": 0.036638155579566956, "learning_rate": 0.00016162743597821687, "loss": 0.2976, "step": 13464 }, { "epoch": 1.0908133506156836, "grad_norm": 0.040555648505687714, "learning_rate": 0.00016162293532562223, "loss": 0.3259, "step": 13465 }, { "epoch": 1.090894361633182, "grad_norm": 0.03348960727453232, "learning_rate": 0.0001616184346730276, "loss": 0.2584, "step": 13466 }, { "epoch": 1.0909753726506806, "grad_norm": 0.04036477208137512, "learning_rate": 0.00016161393402043298, "loss": 0.298, "step": 13467 }, { "epoch": 1.0910563836681788, "grad_norm": 0.03749343752861023, "learning_rate": 0.00016160943336783834, "loss": 0.3002, "step": 13468 }, { "epoch": 1.0911373946856773, "grad_norm": 0.036856718361377716, "learning_rate": 0.00016160493271524372, "loss": 0.2934, "step": 13469 }, { "epoch": 1.0912184057031755, "grad_norm": 0.03889523446559906, "learning_rate": 0.0001616004320626491, "loss": 0.315, "step": 13470 }, { "epoch": 1.091299416720674, "grad_norm": 0.04326172545552254, "learning_rate": 0.00016159593141005447, "loss": 0.324, "step": 13471 }, { "epoch": 1.0913804277381725, "grad_norm": 0.037450067698955536, "learning_rate": 0.00016159143075745983, "loss": 0.2916, "step": 13472 }, { "epoch": 1.0914614387556707, "grad_norm": 0.03876281529664993, "learning_rate": 0.00016158693010486522, "loss": 0.3034, "step": 13473 }, { "epoch": 1.0915424497731692, "grad_norm": 0.04315292090177536, "learning_rate": 0.00016158242945227058, "loss": 0.3361, "step": 13474 }, { "epoch": 1.0916234607906676, "grad_norm": 0.063300721347332, "learning_rate": 0.00016157792879967597, "loss": 0.3916, "step": 13475 }, { "epoch": 1.0917044718081659, "grad_norm": 0.04203910008072853, "learning_rate": 0.00016157342814708135, "loss": 0.364, "step": 13476 }, { "epoch": 1.0917854828256643, "grad_norm": 0.03829997405409813, "learning_rate": 0.0001615689274944867, "loss": 0.2792, "step": 13477 }, { "epoch": 1.0918664938431626, "grad_norm": 0.035567138344049454, "learning_rate": 0.00016156442684189207, "loss": 0.302, "step": 13478 }, { "epoch": 1.091947504860661, "grad_norm": 0.03505342826247215, "learning_rate": 0.00016155992618929746, "loss": 0.2906, "step": 13479 }, { "epoch": 1.0920285158781595, "grad_norm": 0.03667743504047394, "learning_rate": 0.00016155542553670282, "loss": 0.2909, "step": 13480 }, { "epoch": 1.0921095268956578, "grad_norm": 0.03935784846544266, "learning_rate": 0.0001615509248841082, "loss": 0.3509, "step": 13481 }, { "epoch": 1.0921905379131562, "grad_norm": 0.039088696241378784, "learning_rate": 0.0001615464242315136, "loss": 0.2943, "step": 13482 }, { "epoch": 1.0922715489306545, "grad_norm": 0.04239175096154213, "learning_rate": 0.00016154192357891895, "loss": 0.3225, "step": 13483 }, { "epoch": 1.092352559948153, "grad_norm": 0.03504040092229843, "learning_rate": 0.00016153742292632431, "loss": 0.3069, "step": 13484 }, { "epoch": 1.0924335709656514, "grad_norm": 0.03937983140349388, "learning_rate": 0.0001615329222737297, "loss": 0.3103, "step": 13485 }, { "epoch": 1.0925145819831497, "grad_norm": 0.03920026496052742, "learning_rate": 0.00016152842162113506, "loss": 0.2984, "step": 13486 }, { "epoch": 1.0925955930006481, "grad_norm": 0.038286179304122925, "learning_rate": 0.00016152392096854045, "loss": 0.3066, "step": 13487 }, { "epoch": 1.0926766040181464, "grad_norm": 0.03593042120337486, "learning_rate": 0.00016151942031594584, "loss": 0.3042, "step": 13488 }, { "epoch": 1.0927576150356448, "grad_norm": 0.03297584876418114, "learning_rate": 0.0001615149196633512, "loss": 0.2847, "step": 13489 }, { "epoch": 1.0928386260531433, "grad_norm": 0.047829046845436096, "learning_rate": 0.00016151041901075656, "loss": 0.316, "step": 13490 }, { "epoch": 1.0929196370706415, "grad_norm": 0.043388236314058304, "learning_rate": 0.00016150591835816194, "loss": 0.3403, "step": 13491 }, { "epoch": 1.09300064808814, "grad_norm": 0.04012511670589447, "learning_rate": 0.00016150141770556733, "loss": 0.3023, "step": 13492 }, { "epoch": 1.0930816591056383, "grad_norm": 0.04274914413690567, "learning_rate": 0.0001614969170529727, "loss": 0.3708, "step": 13493 }, { "epoch": 1.0931626701231367, "grad_norm": 0.036317285150289536, "learning_rate": 0.00016149241640037808, "loss": 0.317, "step": 13494 }, { "epoch": 1.0932436811406352, "grad_norm": 0.043395668268203735, "learning_rate": 0.00016148791574778344, "loss": 0.3627, "step": 13495 }, { "epoch": 1.0933246921581334, "grad_norm": 0.038292981684207916, "learning_rate": 0.0001614834150951888, "loss": 0.3077, "step": 13496 }, { "epoch": 1.093405703175632, "grad_norm": 0.040114227682352066, "learning_rate": 0.00016147891444259418, "loss": 0.3092, "step": 13497 }, { "epoch": 1.0934867141931304, "grad_norm": 0.03980846703052521, "learning_rate": 0.00016147441378999957, "loss": 0.3377, "step": 13498 }, { "epoch": 1.0935677252106286, "grad_norm": 0.03343471139669418, "learning_rate": 0.00016146991313740493, "loss": 0.2697, "step": 13499 }, { "epoch": 1.093648736228127, "grad_norm": 0.03758417069911957, "learning_rate": 0.00016146541248481032, "loss": 0.2826, "step": 13500 }, { "epoch": 1.0937297472456253, "grad_norm": 0.03633441776037216, "learning_rate": 0.00016146091183221568, "loss": 0.2761, "step": 13501 }, { "epoch": 1.0938107582631238, "grad_norm": 0.04054310545325279, "learning_rate": 0.00016145641117962104, "loss": 0.303, "step": 13502 }, { "epoch": 1.0938917692806223, "grad_norm": 0.039192259311676025, "learning_rate": 0.00016145191052702643, "loss": 0.3088, "step": 13503 }, { "epoch": 1.0939727802981205, "grad_norm": 0.036303408443927765, "learning_rate": 0.0001614474098744318, "loss": 0.2925, "step": 13504 }, { "epoch": 1.094053791315619, "grad_norm": 0.04174082353711128, "learning_rate": 0.00016144290922183717, "loss": 0.3105, "step": 13505 }, { "epoch": 1.0941348023331172, "grad_norm": 0.03811978921294212, "learning_rate": 0.00016143840856924256, "loss": 0.2958, "step": 13506 }, { "epoch": 1.0942158133506157, "grad_norm": 0.03642289713025093, "learning_rate": 0.00016143390791664792, "loss": 0.3037, "step": 13507 }, { "epoch": 1.0942968243681142, "grad_norm": 0.03437235206365585, "learning_rate": 0.00016142940726405328, "loss": 0.2908, "step": 13508 }, { "epoch": 1.0943778353856124, "grad_norm": 0.03871536627411842, "learning_rate": 0.00016142490661145867, "loss": 0.3089, "step": 13509 }, { "epoch": 1.0944588464031109, "grad_norm": 0.04082522913813591, "learning_rate": 0.00016142040595886406, "loss": 0.3303, "step": 13510 }, { "epoch": 1.094539857420609, "grad_norm": 0.04203804209828377, "learning_rate": 0.00016141590530626942, "loss": 0.3556, "step": 13511 }, { "epoch": 1.0946208684381076, "grad_norm": 0.03674378991127014, "learning_rate": 0.0001614114046536748, "loss": 0.2966, "step": 13512 }, { "epoch": 1.094701879455606, "grad_norm": 0.03520248457789421, "learning_rate": 0.00016140690400108016, "loss": 0.3171, "step": 13513 }, { "epoch": 1.0947828904731043, "grad_norm": 0.03618206828832626, "learning_rate": 0.00016140240334848552, "loss": 0.279, "step": 13514 }, { "epoch": 1.0948639014906028, "grad_norm": 0.04096698388457298, "learning_rate": 0.0001613979026958909, "loss": 0.2968, "step": 13515 }, { "epoch": 1.094944912508101, "grad_norm": 0.03836210444569588, "learning_rate": 0.0001613934020432963, "loss": 0.2983, "step": 13516 }, { "epoch": 1.0950259235255995, "grad_norm": 0.04501728713512421, "learning_rate": 0.00016138890139070166, "loss": 0.3536, "step": 13517 }, { "epoch": 1.095106934543098, "grad_norm": 0.04326443746685982, "learning_rate": 0.00016138440073810704, "loss": 0.3309, "step": 13518 }, { "epoch": 1.0951879455605962, "grad_norm": 0.044201552867889404, "learning_rate": 0.0001613799000855124, "loss": 0.3766, "step": 13519 }, { "epoch": 1.0952689565780946, "grad_norm": 0.037375301122665405, "learning_rate": 0.00016137539943291776, "loss": 0.2803, "step": 13520 }, { "epoch": 1.095349967595593, "grad_norm": 0.04308674857020378, "learning_rate": 0.00016137089878032318, "loss": 0.3287, "step": 13521 }, { "epoch": 1.0954309786130914, "grad_norm": 0.03787890076637268, "learning_rate": 0.00016136639812772854, "loss": 0.2913, "step": 13522 }, { "epoch": 1.0955119896305898, "grad_norm": 0.03910102695226669, "learning_rate": 0.0001613618974751339, "loss": 0.3162, "step": 13523 }, { "epoch": 1.095593000648088, "grad_norm": 0.04268760234117508, "learning_rate": 0.00016135739682253929, "loss": 0.3078, "step": 13524 }, { "epoch": 1.0956740116655865, "grad_norm": 0.03253033012151718, "learning_rate": 0.00016135289616994465, "loss": 0.2824, "step": 13525 }, { "epoch": 1.095755022683085, "grad_norm": 0.04201733320951462, "learning_rate": 0.00016134839551735, "loss": 0.3268, "step": 13526 }, { "epoch": 1.0958360337005832, "grad_norm": 0.03552337363362312, "learning_rate": 0.00016134389486475542, "loss": 0.2847, "step": 13527 }, { "epoch": 1.0959170447180817, "grad_norm": 0.039983950555324554, "learning_rate": 0.00016133939421216078, "loss": 0.336, "step": 13528 }, { "epoch": 1.09599805573558, "grad_norm": 0.0374993234872818, "learning_rate": 0.00016133489355956614, "loss": 0.3133, "step": 13529 }, { "epoch": 1.0960790667530784, "grad_norm": 0.038941461592912674, "learning_rate": 0.00016133039290697153, "loss": 0.3335, "step": 13530 }, { "epoch": 1.096160077770577, "grad_norm": 0.035729698836803436, "learning_rate": 0.0001613258922543769, "loss": 0.3002, "step": 13531 }, { "epoch": 1.0962410887880751, "grad_norm": 0.04123429208993912, "learning_rate": 0.00016132139160178225, "loss": 0.2853, "step": 13532 }, { "epoch": 1.0963220998055736, "grad_norm": 0.04294388368725777, "learning_rate": 0.00016131689094918766, "loss": 0.3547, "step": 13533 }, { "epoch": 1.0964031108230718, "grad_norm": 0.0382094569504261, "learning_rate": 0.00016131239029659302, "loss": 0.3267, "step": 13534 }, { "epoch": 1.0964841218405703, "grad_norm": 0.03548954427242279, "learning_rate": 0.00016130788964399838, "loss": 0.3127, "step": 13535 }, { "epoch": 1.0965651328580688, "grad_norm": 0.03575649857521057, "learning_rate": 0.00016130338899140377, "loss": 0.2671, "step": 13536 }, { "epoch": 1.096646143875567, "grad_norm": 0.04112618416547775, "learning_rate": 0.00016129888833880913, "loss": 0.2985, "step": 13537 }, { "epoch": 1.0967271548930655, "grad_norm": 0.03346872702240944, "learning_rate": 0.0001612943876862145, "loss": 0.2774, "step": 13538 }, { "epoch": 1.0968081659105637, "grad_norm": 0.04214170202612877, "learning_rate": 0.0001612898870336199, "loss": 0.3526, "step": 13539 }, { "epoch": 1.0968891769280622, "grad_norm": 0.03760145977139473, "learning_rate": 0.00016128538638102526, "loss": 0.3031, "step": 13540 }, { "epoch": 1.0969701879455607, "grad_norm": 0.045291200280189514, "learning_rate": 0.00016128088572843062, "loss": 0.3563, "step": 13541 }, { "epoch": 1.097051198963059, "grad_norm": 0.04029682278633118, "learning_rate": 0.000161276385075836, "loss": 0.3036, "step": 13542 }, { "epoch": 1.0971322099805574, "grad_norm": 0.039196692407131195, "learning_rate": 0.00016127188442324137, "loss": 0.3513, "step": 13543 }, { "epoch": 1.0972132209980558, "grad_norm": 0.034414540976285934, "learning_rate": 0.00016126738377064673, "loss": 0.2933, "step": 13544 }, { "epoch": 1.097294232015554, "grad_norm": 0.03912563994526863, "learning_rate": 0.00016126288311805214, "loss": 0.3291, "step": 13545 }, { "epoch": 1.0973752430330526, "grad_norm": 0.035514701157808304, "learning_rate": 0.0001612583824654575, "loss": 0.2644, "step": 13546 }, { "epoch": 1.0974562540505508, "grad_norm": 0.03862490504980087, "learning_rate": 0.00016125388181286286, "loss": 0.2769, "step": 13547 }, { "epoch": 1.0975372650680493, "grad_norm": 0.03533685579895973, "learning_rate": 0.00016124938116026825, "loss": 0.3076, "step": 13548 }, { "epoch": 1.0976182760855475, "grad_norm": 0.034725695848464966, "learning_rate": 0.0001612448805076736, "loss": 0.2808, "step": 13549 }, { "epoch": 1.097699287103046, "grad_norm": 0.03933245316147804, "learning_rate": 0.000161240379855079, "loss": 0.3208, "step": 13550 }, { "epoch": 1.0977802981205445, "grad_norm": 0.03894717991352081, "learning_rate": 0.00016123587920248439, "loss": 0.2945, "step": 13551 }, { "epoch": 1.0978613091380427, "grad_norm": 0.03713586926460266, "learning_rate": 0.00016123137854988975, "loss": 0.3056, "step": 13552 }, { "epoch": 1.0979423201555412, "grad_norm": 0.03865823894739151, "learning_rate": 0.0001612268778972951, "loss": 0.3257, "step": 13553 }, { "epoch": 1.0980233311730396, "grad_norm": 0.03470766171813011, "learning_rate": 0.0001612223772447005, "loss": 0.2865, "step": 13554 }, { "epoch": 1.0981043421905379, "grad_norm": 0.041255801916122437, "learning_rate": 0.00016121787659210585, "loss": 0.3433, "step": 13555 }, { "epoch": 1.0981853532080363, "grad_norm": 0.03621998056769371, "learning_rate": 0.00016121337593951124, "loss": 0.3256, "step": 13556 }, { "epoch": 1.0982663642255346, "grad_norm": 0.039620328694581985, "learning_rate": 0.00016120887528691663, "loss": 0.3068, "step": 13557 }, { "epoch": 1.098347375243033, "grad_norm": 0.04105069488286972, "learning_rate": 0.000161204374634322, "loss": 0.3051, "step": 13558 }, { "epoch": 1.0984283862605315, "grad_norm": 0.039884231984615326, "learning_rate": 0.00016119987398172735, "loss": 0.3234, "step": 13559 }, { "epoch": 1.0985093972780298, "grad_norm": 0.03688472881913185, "learning_rate": 0.00016119537332913274, "loss": 0.2954, "step": 13560 }, { "epoch": 1.0985904082955282, "grad_norm": 0.04016055911779404, "learning_rate": 0.0001611908726765381, "loss": 0.3341, "step": 13561 }, { "epoch": 1.0986714193130265, "grad_norm": 0.042149852961301804, "learning_rate": 0.00016118637202394348, "loss": 0.3075, "step": 13562 }, { "epoch": 1.098752430330525, "grad_norm": 0.037303440272808075, "learning_rate": 0.00016118187137134887, "loss": 0.2749, "step": 13563 }, { "epoch": 1.0988334413480234, "grad_norm": 0.03796634450554848, "learning_rate": 0.00016117737071875423, "loss": 0.2935, "step": 13564 }, { "epoch": 1.0989144523655217, "grad_norm": 0.03543466329574585, "learning_rate": 0.0001611728700661596, "loss": 0.2904, "step": 13565 }, { "epoch": 1.0989954633830201, "grad_norm": 0.04048040136694908, "learning_rate": 0.00016116836941356498, "loss": 0.3239, "step": 13566 }, { "epoch": 1.0990764744005186, "grad_norm": 0.042633056640625, "learning_rate": 0.00016116386876097034, "loss": 0.3353, "step": 13567 }, { "epoch": 1.0991574854180168, "grad_norm": 0.03895155340433121, "learning_rate": 0.00016115936810837572, "loss": 0.3197, "step": 13568 }, { "epoch": 1.0992384964355153, "grad_norm": 0.03703373670578003, "learning_rate": 0.0001611548674557811, "loss": 0.3239, "step": 13569 }, { "epoch": 1.0993195074530135, "grad_norm": 0.03645946457982063, "learning_rate": 0.00016115036680318647, "loss": 0.3097, "step": 13570 }, { "epoch": 1.099400518470512, "grad_norm": 0.036779873073101044, "learning_rate": 0.00016114586615059183, "loss": 0.3182, "step": 13571 }, { "epoch": 1.0994815294880103, "grad_norm": 0.037688739597797394, "learning_rate": 0.00016114136549799722, "loss": 0.2918, "step": 13572 }, { "epoch": 1.0995625405055087, "grad_norm": 0.03933629021048546, "learning_rate": 0.0001611368648454026, "loss": 0.3546, "step": 13573 }, { "epoch": 1.0996435515230072, "grad_norm": 0.041833870112895966, "learning_rate": 0.00016113236419280797, "loss": 0.317, "step": 13574 }, { "epoch": 1.0997245625405054, "grad_norm": 0.036539576947689056, "learning_rate": 0.00016112786354021335, "loss": 0.2978, "step": 13575 }, { "epoch": 1.099805573558004, "grad_norm": 0.03901558741927147, "learning_rate": 0.0001611233628876187, "loss": 0.3173, "step": 13576 }, { "epoch": 1.0998865845755024, "grad_norm": 0.03997525945305824, "learning_rate": 0.00016111886223502407, "loss": 0.3291, "step": 13577 }, { "epoch": 1.0999675955930006, "grad_norm": 0.03499395772814751, "learning_rate": 0.00016111436158242946, "loss": 0.3013, "step": 13578 }, { "epoch": 1.100048606610499, "grad_norm": 0.03280987963080406, "learning_rate": 0.00016110986092983485, "loss": 0.2733, "step": 13579 }, { "epoch": 1.1001296176279973, "grad_norm": 0.0375014953315258, "learning_rate": 0.0001611053602772402, "loss": 0.2918, "step": 13580 }, { "epoch": 1.1002106286454958, "grad_norm": 0.040403205901384354, "learning_rate": 0.0001611008596246456, "loss": 0.3348, "step": 13581 }, { "epoch": 1.1002916396629943, "grad_norm": 0.039181552827358246, "learning_rate": 0.00016109635897205095, "loss": 0.2923, "step": 13582 }, { "epoch": 1.1003726506804925, "grad_norm": 0.03221115842461586, "learning_rate": 0.00016109185831945631, "loss": 0.2504, "step": 13583 }, { "epoch": 1.100453661697991, "grad_norm": 0.042914848774671555, "learning_rate": 0.0001610873576668617, "loss": 0.3013, "step": 13584 }, { "epoch": 1.1005346727154892, "grad_norm": 0.04518318921327591, "learning_rate": 0.0001610828570142671, "loss": 0.318, "step": 13585 }, { "epoch": 1.1006156837329877, "grad_norm": 0.03647695481777191, "learning_rate": 0.00016107835636167245, "loss": 0.2799, "step": 13586 }, { "epoch": 1.1006966947504861, "grad_norm": 0.04397977516055107, "learning_rate": 0.00016107385570907784, "loss": 0.3168, "step": 13587 }, { "epoch": 1.1007777057679844, "grad_norm": 0.03797837346792221, "learning_rate": 0.0001610693550564832, "loss": 0.2605, "step": 13588 }, { "epoch": 1.1008587167854829, "grad_norm": 0.037378404289484024, "learning_rate": 0.00016106485440388856, "loss": 0.2823, "step": 13589 }, { "epoch": 1.100939727802981, "grad_norm": 0.042138807475566864, "learning_rate": 0.00016106035375129394, "loss": 0.3103, "step": 13590 }, { "epoch": 1.1010207388204796, "grad_norm": 0.03948663920164108, "learning_rate": 0.00016105585309869933, "loss": 0.3197, "step": 13591 }, { "epoch": 1.101101749837978, "grad_norm": 0.03886083886027336, "learning_rate": 0.0001610513524461047, "loss": 0.2976, "step": 13592 }, { "epoch": 1.1011827608554763, "grad_norm": 0.047055937349796295, "learning_rate": 0.00016104685179351008, "loss": 0.3242, "step": 13593 }, { "epoch": 1.1012637718729748, "grad_norm": 0.04041250795125961, "learning_rate": 0.00016104235114091544, "loss": 0.3103, "step": 13594 }, { "epoch": 1.101344782890473, "grad_norm": 0.04913514479994774, "learning_rate": 0.0001610378504883208, "loss": 0.3058, "step": 13595 }, { "epoch": 1.1014257939079715, "grad_norm": 0.04520176723599434, "learning_rate": 0.00016103334983572618, "loss": 0.3102, "step": 13596 }, { "epoch": 1.10150680492547, "grad_norm": 0.0359567254781723, "learning_rate": 0.00016102884918313157, "loss": 0.3043, "step": 13597 }, { "epoch": 1.1015878159429682, "grad_norm": 0.034442391246557236, "learning_rate": 0.00016102434853053693, "loss": 0.2853, "step": 13598 }, { "epoch": 1.1016688269604666, "grad_norm": 0.03527701646089554, "learning_rate": 0.00016101984787794232, "loss": 0.2869, "step": 13599 }, { "epoch": 1.101749837977965, "grad_norm": 0.035869915038347244, "learning_rate": 0.00016101534722534768, "loss": 0.3078, "step": 13600 }, { "epoch": 1.1018308489954634, "grad_norm": 0.032918624579906464, "learning_rate": 0.00016101084657275304, "loss": 0.2818, "step": 13601 }, { "epoch": 1.1019118600129618, "grad_norm": 0.03698991239070892, "learning_rate": 0.00016100634592015845, "loss": 0.3051, "step": 13602 }, { "epoch": 1.10199287103046, "grad_norm": 0.03834715113043785, "learning_rate": 0.00016100184526756381, "loss": 0.3272, "step": 13603 }, { "epoch": 1.1020738820479585, "grad_norm": 0.03652876615524292, "learning_rate": 0.00016099734461496917, "loss": 0.3024, "step": 13604 }, { "epoch": 1.102154893065457, "grad_norm": 0.03805585205554962, "learning_rate": 0.00016099284396237456, "loss": 0.27, "step": 13605 }, { "epoch": 1.1022359040829552, "grad_norm": 0.045338697731494904, "learning_rate": 0.00016098834330977992, "loss": 0.3537, "step": 13606 }, { "epoch": 1.1023169151004537, "grad_norm": 0.037921786308288574, "learning_rate": 0.00016098384265718528, "loss": 0.3129, "step": 13607 }, { "epoch": 1.102397926117952, "grad_norm": 0.0399080328643322, "learning_rate": 0.0001609793420045907, "loss": 0.3162, "step": 13608 }, { "epoch": 1.1024789371354504, "grad_norm": 0.04143821448087692, "learning_rate": 0.00016097484135199606, "loss": 0.3199, "step": 13609 }, { "epoch": 1.1025599481529489, "grad_norm": 0.03901328518986702, "learning_rate": 0.00016097034069940142, "loss": 0.3591, "step": 13610 }, { "epoch": 1.1026409591704471, "grad_norm": 0.039841070771217346, "learning_rate": 0.0001609658400468068, "loss": 0.3459, "step": 13611 }, { "epoch": 1.1027219701879456, "grad_norm": 0.03591349720954895, "learning_rate": 0.00016096133939421216, "loss": 0.2803, "step": 13612 }, { "epoch": 1.1028029812054438, "grad_norm": 0.041079454123973846, "learning_rate": 0.00016095683874161752, "loss": 0.3069, "step": 13613 }, { "epoch": 1.1028839922229423, "grad_norm": 0.03818468004465103, "learning_rate": 0.00016095233808902294, "loss": 0.2928, "step": 13614 }, { "epoch": 1.1029650032404408, "grad_norm": 0.03975100442767143, "learning_rate": 0.0001609478374364283, "loss": 0.3156, "step": 13615 }, { "epoch": 1.103046014257939, "grad_norm": 0.03451533615589142, "learning_rate": 0.00016094333678383366, "loss": 0.2956, "step": 13616 }, { "epoch": 1.1031270252754375, "grad_norm": 0.04473431780934334, "learning_rate": 0.00016093883613123904, "loss": 0.2829, "step": 13617 }, { "epoch": 1.1032080362929357, "grad_norm": 0.045738816261291504, "learning_rate": 0.0001609343354786444, "loss": 0.358, "step": 13618 }, { "epoch": 1.1032890473104342, "grad_norm": 0.046089205890893936, "learning_rate": 0.00016092983482604976, "loss": 0.3782, "step": 13619 }, { "epoch": 1.1033700583279327, "grad_norm": 0.03943808749318123, "learning_rate": 0.00016092533417345518, "loss": 0.2896, "step": 13620 }, { "epoch": 1.103451069345431, "grad_norm": 0.04181476682424545, "learning_rate": 0.00016092083352086054, "loss": 0.3226, "step": 13621 }, { "epoch": 1.1035320803629294, "grad_norm": 0.04341307654976845, "learning_rate": 0.0001609163328682659, "loss": 0.3435, "step": 13622 }, { "epoch": 1.1036130913804278, "grad_norm": 0.03791226074099541, "learning_rate": 0.00016091183221567129, "loss": 0.3019, "step": 13623 }, { "epoch": 1.103694102397926, "grad_norm": 0.03913251310586929, "learning_rate": 0.00016090733156307665, "loss": 0.3095, "step": 13624 }, { "epoch": 1.1037751134154246, "grad_norm": 0.03419218584895134, "learning_rate": 0.00016090283091048203, "loss": 0.2964, "step": 13625 }, { "epoch": 1.1038561244329228, "grad_norm": 0.03727216273546219, "learning_rate": 0.00016089833025788742, "loss": 0.281, "step": 13626 }, { "epoch": 1.1039371354504213, "grad_norm": 0.040743958204984665, "learning_rate": 0.00016089382960529278, "loss": 0.3185, "step": 13627 }, { "epoch": 1.1040181464679197, "grad_norm": 0.033087439835071564, "learning_rate": 0.00016088932895269814, "loss": 0.2941, "step": 13628 }, { "epoch": 1.104099157485418, "grad_norm": 0.04077974334359169, "learning_rate": 0.00016088482830010353, "loss": 0.3073, "step": 13629 }, { "epoch": 1.1041801685029164, "grad_norm": 0.043515462428331375, "learning_rate": 0.0001608803276475089, "loss": 0.3272, "step": 13630 }, { "epoch": 1.1042611795204147, "grad_norm": 0.041284892708063126, "learning_rate": 0.00016087582699491427, "loss": 0.3217, "step": 13631 }, { "epoch": 1.1043421905379132, "grad_norm": 0.03675724193453789, "learning_rate": 0.00016087132634231966, "loss": 0.3003, "step": 13632 }, { "epoch": 1.1044232015554116, "grad_norm": 0.038290172815322876, "learning_rate": 0.00016086682568972502, "loss": 0.3122, "step": 13633 }, { "epoch": 1.1045042125729099, "grad_norm": 0.040265701711177826, "learning_rate": 0.00016086232503713038, "loss": 0.3189, "step": 13634 }, { "epoch": 1.1045852235904083, "grad_norm": 0.03757871687412262, "learning_rate": 0.00016085782438453577, "loss": 0.3035, "step": 13635 }, { "epoch": 1.1046662346079066, "grad_norm": 0.03657103329896927, "learning_rate": 0.00016085332373194113, "loss": 0.3059, "step": 13636 }, { "epoch": 1.104747245625405, "grad_norm": 0.03874574601650238, "learning_rate": 0.00016084882307934652, "loss": 0.2982, "step": 13637 }, { "epoch": 1.1048282566429035, "grad_norm": 0.03787166625261307, "learning_rate": 0.0001608443224267519, "loss": 0.3235, "step": 13638 }, { "epoch": 1.1049092676604018, "grad_norm": 0.03862585499882698, "learning_rate": 0.00016083982177415726, "loss": 0.353, "step": 13639 }, { "epoch": 1.1049902786779002, "grad_norm": 0.03860185667872429, "learning_rate": 0.00016083532112156262, "loss": 0.3144, "step": 13640 }, { "epoch": 1.1050712896953985, "grad_norm": 0.03666646033525467, "learning_rate": 0.000160830820468968, "loss": 0.3328, "step": 13641 }, { "epoch": 1.105152300712897, "grad_norm": 0.036767471581697464, "learning_rate": 0.00016082631981637337, "loss": 0.2915, "step": 13642 }, { "epoch": 1.1052333117303954, "grad_norm": 0.03964807093143463, "learning_rate": 0.00016082181916377876, "loss": 0.3609, "step": 13643 }, { "epoch": 1.1053143227478937, "grad_norm": 0.04057459905743599, "learning_rate": 0.00016081731851118415, "loss": 0.3036, "step": 13644 }, { "epoch": 1.1053953337653921, "grad_norm": 0.04356246814131737, "learning_rate": 0.0001608128178585895, "loss": 0.3394, "step": 13645 }, { "epoch": 1.1054763447828906, "grad_norm": 0.03935835137963295, "learning_rate": 0.00016080831720599487, "loss": 0.3399, "step": 13646 }, { "epoch": 1.1055573558003888, "grad_norm": 0.03753943741321564, "learning_rate": 0.00016080381655340025, "loss": 0.2971, "step": 13647 }, { "epoch": 1.1056383668178873, "grad_norm": 0.03873160481452942, "learning_rate": 0.0001607993159008056, "loss": 0.3276, "step": 13648 }, { "epoch": 1.1057193778353855, "grad_norm": 0.0393451452255249, "learning_rate": 0.000160794815248211, "loss": 0.3387, "step": 13649 }, { "epoch": 1.105800388852884, "grad_norm": 0.038663193583488464, "learning_rate": 0.0001607903145956164, "loss": 0.3553, "step": 13650 }, { "epoch": 1.1058813998703825, "grad_norm": 0.04521648958325386, "learning_rate": 0.00016078581394302175, "loss": 0.3386, "step": 13651 }, { "epoch": 1.1059624108878807, "grad_norm": 0.044531892985105515, "learning_rate": 0.0001607813132904271, "loss": 0.3004, "step": 13652 }, { "epoch": 1.1060434219053792, "grad_norm": 0.037462715059518814, "learning_rate": 0.0001607768126378325, "loss": 0.3165, "step": 13653 }, { "epoch": 1.1061244329228774, "grad_norm": 0.041082963347435, "learning_rate": 0.00016077231198523788, "loss": 0.3294, "step": 13654 }, { "epoch": 1.106205443940376, "grad_norm": 0.039622195065021515, "learning_rate": 0.00016076781133264324, "loss": 0.2801, "step": 13655 }, { "epoch": 1.1062864549578744, "grad_norm": 0.04551248252391815, "learning_rate": 0.00016076331068004863, "loss": 0.2985, "step": 13656 }, { "epoch": 1.1063674659753726, "grad_norm": 0.03860415145754814, "learning_rate": 0.000160758810027454, "loss": 0.3012, "step": 13657 }, { "epoch": 1.106448476992871, "grad_norm": 0.03250998258590698, "learning_rate": 0.00016075430937485935, "loss": 0.2973, "step": 13658 }, { "epoch": 1.1065294880103693, "grad_norm": 0.03704884275794029, "learning_rate": 0.00016074980872226474, "loss": 0.2773, "step": 13659 }, { "epoch": 1.1066104990278678, "grad_norm": 0.0459744855761528, "learning_rate": 0.00016074530806967012, "loss": 0.3019, "step": 13660 }, { "epoch": 1.1066915100453663, "grad_norm": 0.03988320380449295, "learning_rate": 0.00016074080741707548, "loss": 0.324, "step": 13661 }, { "epoch": 1.1067725210628645, "grad_norm": 0.03656921163201332, "learning_rate": 0.00016073630676448087, "loss": 0.2909, "step": 13662 }, { "epoch": 1.106853532080363, "grad_norm": 0.03722751513123512, "learning_rate": 0.00016073180611188623, "loss": 0.329, "step": 13663 }, { "epoch": 1.1069345430978612, "grad_norm": 0.03713303059339523, "learning_rate": 0.0001607273054592916, "loss": 0.3059, "step": 13664 }, { "epoch": 1.1070155541153597, "grad_norm": 0.04212358966469765, "learning_rate": 0.00016072280480669698, "loss": 0.3593, "step": 13665 }, { "epoch": 1.1070965651328581, "grad_norm": 0.04464373737573624, "learning_rate": 0.00016071830415410236, "loss": 0.3548, "step": 13666 }, { "epoch": 1.1071775761503564, "grad_norm": 0.040497563779354095, "learning_rate": 0.00016071380350150772, "loss": 0.2827, "step": 13667 }, { "epoch": 1.1072585871678549, "grad_norm": 0.0430973619222641, "learning_rate": 0.0001607093028489131, "loss": 0.2946, "step": 13668 }, { "epoch": 1.1073395981853533, "grad_norm": 0.04000406712293625, "learning_rate": 0.00016070480219631847, "loss": 0.3266, "step": 13669 }, { "epoch": 1.1074206092028516, "grad_norm": 0.04005291685461998, "learning_rate": 0.00016070030154372383, "loss": 0.3224, "step": 13670 }, { "epoch": 1.10750162022035, "grad_norm": 0.039465539157390594, "learning_rate": 0.00016069580089112922, "loss": 0.3165, "step": 13671 }, { "epoch": 1.1075826312378483, "grad_norm": 0.04171768203377724, "learning_rate": 0.0001606913002385346, "loss": 0.2878, "step": 13672 }, { "epoch": 1.1076636422553467, "grad_norm": 0.03892872855067253, "learning_rate": 0.00016068679958593997, "loss": 0.3245, "step": 13673 }, { "epoch": 1.107744653272845, "grad_norm": 0.03817931190133095, "learning_rate": 0.00016068229893334535, "loss": 0.3052, "step": 13674 }, { "epoch": 1.1078256642903435, "grad_norm": 0.040611643344163895, "learning_rate": 0.0001606777982807507, "loss": 0.3325, "step": 13675 }, { "epoch": 1.107906675307842, "grad_norm": 0.03535166755318642, "learning_rate": 0.00016067329762815607, "loss": 0.3035, "step": 13676 }, { "epoch": 1.1079876863253402, "grad_norm": 0.035269174724817276, "learning_rate": 0.00016066879697556146, "loss": 0.2889, "step": 13677 }, { "epoch": 1.1080686973428386, "grad_norm": 0.036208029836416245, "learning_rate": 0.00016066429632296685, "loss": 0.3215, "step": 13678 }, { "epoch": 1.108149708360337, "grad_norm": 0.03432176262140274, "learning_rate": 0.0001606597956703722, "loss": 0.2806, "step": 13679 }, { "epoch": 1.1082307193778353, "grad_norm": 0.048559654504060745, "learning_rate": 0.0001606552950177776, "loss": 0.3267, "step": 13680 }, { "epoch": 1.1083117303953338, "grad_norm": 0.04100848361849785, "learning_rate": 0.00016065079436518295, "loss": 0.3196, "step": 13681 }, { "epoch": 1.108392741412832, "grad_norm": 0.03949177637696266, "learning_rate": 0.00016064629371258831, "loss": 0.3001, "step": 13682 }, { "epoch": 1.1084737524303305, "grad_norm": 0.036436039954423904, "learning_rate": 0.00016064179305999373, "loss": 0.3062, "step": 13683 }, { "epoch": 1.108554763447829, "grad_norm": 0.0427464097738266, "learning_rate": 0.0001606372924073991, "loss": 0.3726, "step": 13684 }, { "epoch": 1.1086357744653272, "grad_norm": 0.050769273191690445, "learning_rate": 0.00016063279175480445, "loss": 0.3005, "step": 13685 }, { "epoch": 1.1087167854828257, "grad_norm": 0.03931155800819397, "learning_rate": 0.00016062829110220984, "loss": 0.2743, "step": 13686 }, { "epoch": 1.108797796500324, "grad_norm": 0.04116865247488022, "learning_rate": 0.0001606237904496152, "loss": 0.3373, "step": 13687 }, { "epoch": 1.1088788075178224, "grad_norm": 0.04374229907989502, "learning_rate": 0.00016061928979702056, "loss": 0.3198, "step": 13688 }, { "epoch": 1.1089598185353209, "grad_norm": 0.045593272894620895, "learning_rate": 0.00016061478914442597, "loss": 0.3158, "step": 13689 }, { "epoch": 1.1090408295528191, "grad_norm": 0.043646588921546936, "learning_rate": 0.00016061028849183133, "loss": 0.3859, "step": 13690 }, { "epoch": 1.1091218405703176, "grad_norm": 0.0321785993874073, "learning_rate": 0.0001606057878392367, "loss": 0.2515, "step": 13691 }, { "epoch": 1.1092028515878158, "grad_norm": 0.04451699182391167, "learning_rate": 0.00016060128718664208, "loss": 0.333, "step": 13692 }, { "epoch": 1.1092838626053143, "grad_norm": 0.04380195960402489, "learning_rate": 0.00016059678653404744, "loss": 0.3268, "step": 13693 }, { "epoch": 1.1093648736228128, "grad_norm": 0.03785150125622749, "learning_rate": 0.0001605922858814528, "loss": 0.3239, "step": 13694 }, { "epoch": 1.109445884640311, "grad_norm": 0.03865273296833038, "learning_rate": 0.0001605877852288582, "loss": 0.2987, "step": 13695 }, { "epoch": 1.1095268956578095, "grad_norm": 0.034965209662914276, "learning_rate": 0.00016058328457626357, "loss": 0.2739, "step": 13696 }, { "epoch": 1.1096079066753077, "grad_norm": 0.034332286566495895, "learning_rate": 0.00016057878392366893, "loss": 0.2636, "step": 13697 }, { "epoch": 1.1096889176928062, "grad_norm": 0.03668433055281639, "learning_rate": 0.00016057428327107432, "loss": 0.3158, "step": 13698 }, { "epoch": 1.1097699287103047, "grad_norm": 0.04302181676030159, "learning_rate": 0.00016056978261847968, "loss": 0.3359, "step": 13699 }, { "epoch": 1.109850939727803, "grad_norm": 0.037046417593955994, "learning_rate": 0.00016056528196588504, "loss": 0.3147, "step": 13700 }, { "epoch": 1.1099319507453014, "grad_norm": 0.037314411252737045, "learning_rate": 0.00016056078131329045, "loss": 0.3443, "step": 13701 }, { "epoch": 1.1100129617627998, "grad_norm": 0.03947966545820236, "learning_rate": 0.00016055628066069581, "loss": 0.3145, "step": 13702 }, { "epoch": 1.110093972780298, "grad_norm": 0.03757372498512268, "learning_rate": 0.00016055178000810117, "loss": 0.3245, "step": 13703 }, { "epoch": 1.1101749837977966, "grad_norm": 0.041169408708810806, "learning_rate": 0.00016054727935550656, "loss": 0.3149, "step": 13704 }, { "epoch": 1.1102559948152948, "grad_norm": 0.04169107601046562, "learning_rate": 0.00016054277870291192, "loss": 0.3571, "step": 13705 }, { "epoch": 1.1103370058327933, "grad_norm": 0.03553405776619911, "learning_rate": 0.0001605382780503173, "loss": 0.2502, "step": 13706 }, { "epoch": 1.1104180168502917, "grad_norm": 0.042865827679634094, "learning_rate": 0.0001605337773977227, "loss": 0.3373, "step": 13707 }, { "epoch": 1.11049902786779, "grad_norm": 0.03908531740307808, "learning_rate": 0.00016052927674512806, "loss": 0.3003, "step": 13708 }, { "epoch": 1.1105800388852884, "grad_norm": 0.037927430123090744, "learning_rate": 0.00016052477609253342, "loss": 0.3063, "step": 13709 }, { "epoch": 1.1106610499027867, "grad_norm": 0.04091384634375572, "learning_rate": 0.0001605202754399388, "loss": 0.3533, "step": 13710 }, { "epoch": 1.1107420609202852, "grad_norm": 0.040006060153245926, "learning_rate": 0.00016051577478734416, "loss": 0.2912, "step": 13711 }, { "epoch": 1.1108230719377836, "grad_norm": 0.041538480669260025, "learning_rate": 0.00016051127413474955, "loss": 0.3069, "step": 13712 }, { "epoch": 1.1109040829552819, "grad_norm": 0.036893781274557114, "learning_rate": 0.00016050677348215494, "loss": 0.3215, "step": 13713 }, { "epoch": 1.1109850939727803, "grad_norm": 0.03629325330257416, "learning_rate": 0.0001605022728295603, "loss": 0.2887, "step": 13714 }, { "epoch": 1.1110661049902786, "grad_norm": 0.04308544471859932, "learning_rate": 0.00016049777217696566, "loss": 0.3463, "step": 13715 }, { "epoch": 1.111147116007777, "grad_norm": 0.03460239991545677, "learning_rate": 0.00016049327152437104, "loss": 0.3038, "step": 13716 }, { "epoch": 1.1112281270252755, "grad_norm": 0.03839297592639923, "learning_rate": 0.0001604887708717764, "loss": 0.3112, "step": 13717 }, { "epoch": 1.1113091380427738, "grad_norm": 0.04267555847764015, "learning_rate": 0.0001604842702191818, "loss": 0.341, "step": 13718 }, { "epoch": 1.1113901490602722, "grad_norm": 0.04148966446518898, "learning_rate": 0.00016047976956658718, "loss": 0.3254, "step": 13719 }, { "epoch": 1.1114711600777705, "grad_norm": 0.0401795469224453, "learning_rate": 0.00016047526891399254, "loss": 0.2806, "step": 13720 }, { "epoch": 1.111552171095269, "grad_norm": 0.0394086129963398, "learning_rate": 0.0001604707682613979, "loss": 0.3138, "step": 13721 }, { "epoch": 1.1116331821127674, "grad_norm": 0.041196051985025406, "learning_rate": 0.00016046626760880329, "loss": 0.3121, "step": 13722 }, { "epoch": 1.1117141931302656, "grad_norm": 0.03980787470936775, "learning_rate": 0.00016046176695620865, "loss": 0.2778, "step": 13723 }, { "epoch": 1.1117952041477641, "grad_norm": 0.045564647763967514, "learning_rate": 0.00016045726630361403, "loss": 0.3361, "step": 13724 }, { "epoch": 1.1118762151652626, "grad_norm": 0.04442529380321503, "learning_rate": 0.00016045276565101942, "loss": 0.3089, "step": 13725 }, { "epoch": 1.1119572261827608, "grad_norm": 0.03888750821352005, "learning_rate": 0.00016044826499842478, "loss": 0.291, "step": 13726 }, { "epoch": 1.1120382372002593, "grad_norm": 0.03487385809421539, "learning_rate": 0.00016044376434583014, "loss": 0.2892, "step": 13727 }, { "epoch": 1.1121192482177575, "grad_norm": 0.046714432537555695, "learning_rate": 0.00016043926369323553, "loss": 0.3259, "step": 13728 }, { "epoch": 1.112200259235256, "grad_norm": 0.04512115195393562, "learning_rate": 0.0001604347630406409, "loss": 0.3052, "step": 13729 }, { "epoch": 1.1122812702527545, "grad_norm": 0.039746955037117004, "learning_rate": 0.00016043026238804627, "loss": 0.3195, "step": 13730 }, { "epoch": 1.1123622812702527, "grad_norm": 0.037748560309410095, "learning_rate": 0.00016042576173545166, "loss": 0.3181, "step": 13731 }, { "epoch": 1.1124432922877512, "grad_norm": 0.04270657151937485, "learning_rate": 0.00016042126108285702, "loss": 0.3559, "step": 13732 }, { "epoch": 1.1125243033052494, "grad_norm": 0.04279698058962822, "learning_rate": 0.00016041676043026238, "loss": 0.311, "step": 13733 }, { "epoch": 1.112605314322748, "grad_norm": 0.04194524139165878, "learning_rate": 0.00016041225977766777, "loss": 0.2672, "step": 13734 }, { "epoch": 1.1126863253402464, "grad_norm": 0.041094232350587845, "learning_rate": 0.00016040775912507316, "loss": 0.2919, "step": 13735 }, { "epoch": 1.1127673363577446, "grad_norm": 0.03560243919491768, "learning_rate": 0.00016040325847247852, "loss": 0.2923, "step": 13736 }, { "epoch": 1.112848347375243, "grad_norm": 0.0350363552570343, "learning_rate": 0.0001603987578198839, "loss": 0.2972, "step": 13737 }, { "epoch": 1.1129293583927413, "grad_norm": 0.03738215193152428, "learning_rate": 0.00016039425716728926, "loss": 0.2931, "step": 13738 }, { "epoch": 1.1130103694102398, "grad_norm": 0.03830656036734581, "learning_rate": 0.00016038975651469462, "loss": 0.3016, "step": 13739 }, { "epoch": 1.1130913804277383, "grad_norm": 0.0368039608001709, "learning_rate": 0.0001603852558621, "loss": 0.2775, "step": 13740 }, { "epoch": 1.1131723914452365, "grad_norm": 0.03418166935443878, "learning_rate": 0.0001603807552095054, "loss": 0.2883, "step": 13741 }, { "epoch": 1.113253402462735, "grad_norm": 0.04323248937726021, "learning_rate": 0.00016037625455691076, "loss": 0.3058, "step": 13742 }, { "epoch": 1.1133344134802332, "grad_norm": 0.03451283648610115, "learning_rate": 0.00016037175390431615, "loss": 0.2643, "step": 13743 }, { "epoch": 1.1134154244977317, "grad_norm": 0.037699535489082336, "learning_rate": 0.0001603672532517215, "loss": 0.2845, "step": 13744 }, { "epoch": 1.1134964355152301, "grad_norm": 0.04381730407476425, "learning_rate": 0.00016036275259912687, "loss": 0.2783, "step": 13745 }, { "epoch": 1.1135774465327284, "grad_norm": 0.04685590788722038, "learning_rate": 0.00016035825194653225, "loss": 0.3031, "step": 13746 }, { "epoch": 1.1136584575502269, "grad_norm": 0.044405192136764526, "learning_rate": 0.00016035375129393764, "loss": 0.3171, "step": 13747 }, { "epoch": 1.1137394685677253, "grad_norm": 0.047562070190906525, "learning_rate": 0.000160349250641343, "loss": 0.3485, "step": 13748 }, { "epoch": 1.1138204795852236, "grad_norm": 0.04554133117198944, "learning_rate": 0.0001603447499887484, "loss": 0.3508, "step": 13749 }, { "epoch": 1.113901490602722, "grad_norm": 0.037589896470308304, "learning_rate": 0.00016034024933615375, "loss": 0.2722, "step": 13750 }, { "epoch": 1.1139825016202203, "grad_norm": 0.04414276033639908, "learning_rate": 0.0001603357486835591, "loss": 0.3225, "step": 13751 }, { "epoch": 1.1140635126377187, "grad_norm": 0.03974426910281181, "learning_rate": 0.0001603312480309645, "loss": 0.296, "step": 13752 }, { "epoch": 1.1141445236552172, "grad_norm": 0.036167360842227936, "learning_rate": 0.00016032674737836988, "loss": 0.3036, "step": 13753 }, { "epoch": 1.1142255346727155, "grad_norm": 0.035503290593624115, "learning_rate": 0.00016032224672577524, "loss": 0.3042, "step": 13754 }, { "epoch": 1.114306545690214, "grad_norm": 0.041777096688747406, "learning_rate": 0.00016031774607318063, "loss": 0.3543, "step": 13755 }, { "epoch": 1.1143875567077122, "grad_norm": 0.037984009832143784, "learning_rate": 0.000160313245420586, "loss": 0.2943, "step": 13756 }, { "epoch": 1.1144685677252106, "grad_norm": 0.03160495311021805, "learning_rate": 0.00016030874476799135, "loss": 0.2878, "step": 13757 }, { "epoch": 1.114549578742709, "grad_norm": 0.04195152595639229, "learning_rate": 0.00016030424411539676, "loss": 0.2704, "step": 13758 }, { "epoch": 1.1146305897602073, "grad_norm": 0.038813620805740356, "learning_rate": 0.00016029974346280212, "loss": 0.3305, "step": 13759 }, { "epoch": 1.1147116007777058, "grad_norm": 0.04088602215051651, "learning_rate": 0.00016029524281020748, "loss": 0.3159, "step": 13760 }, { "epoch": 1.114792611795204, "grad_norm": 0.039398763328790665, "learning_rate": 0.00016029074215761287, "loss": 0.2996, "step": 13761 }, { "epoch": 1.1148736228127025, "grad_norm": 0.03807293251156807, "learning_rate": 0.00016028624150501823, "loss": 0.3218, "step": 13762 }, { "epoch": 1.114954633830201, "grad_norm": 0.04101305827498436, "learning_rate": 0.0001602817408524236, "loss": 0.2992, "step": 13763 }, { "epoch": 1.1150356448476992, "grad_norm": 0.050497643649578094, "learning_rate": 0.000160277240199829, "loss": 0.3338, "step": 13764 }, { "epoch": 1.1151166558651977, "grad_norm": 0.03950982540845871, "learning_rate": 0.00016027273954723436, "loss": 0.2737, "step": 13765 }, { "epoch": 1.115197666882696, "grad_norm": 0.041152723133563995, "learning_rate": 0.00016026823889463972, "loss": 0.3497, "step": 13766 }, { "epoch": 1.1152786779001944, "grad_norm": 0.045656152069568634, "learning_rate": 0.0001602637382420451, "loss": 0.3661, "step": 13767 }, { "epoch": 1.1153596889176929, "grad_norm": 0.035172052681446075, "learning_rate": 0.00016025923758945047, "loss": 0.2839, "step": 13768 }, { "epoch": 1.1154406999351911, "grad_norm": 0.04323038086295128, "learning_rate": 0.00016025473693685583, "loss": 0.3197, "step": 13769 }, { "epoch": 1.1155217109526896, "grad_norm": 0.04258815944194794, "learning_rate": 0.00016025023628426125, "loss": 0.3421, "step": 13770 }, { "epoch": 1.115602721970188, "grad_norm": 0.04450952261686325, "learning_rate": 0.0001602457356316666, "loss": 0.3218, "step": 13771 }, { "epoch": 1.1156837329876863, "grad_norm": 0.03626592829823494, "learning_rate": 0.00016024123497907197, "loss": 0.2947, "step": 13772 }, { "epoch": 1.1157647440051848, "grad_norm": 0.03189956769347191, "learning_rate": 0.00016023673432647735, "loss": 0.2906, "step": 13773 }, { "epoch": 1.115845755022683, "grad_norm": 0.037484340369701385, "learning_rate": 0.0001602322336738827, "loss": 0.2983, "step": 13774 }, { "epoch": 1.1159267660401815, "grad_norm": 0.03559768199920654, "learning_rate": 0.00016022773302128807, "loss": 0.2906, "step": 13775 }, { "epoch": 1.1160077770576797, "grad_norm": 0.046286530792713165, "learning_rate": 0.0001602232323686935, "loss": 0.3647, "step": 13776 }, { "epoch": 1.1160887880751782, "grad_norm": 0.03982963413000107, "learning_rate": 0.00016021873171609885, "loss": 0.3175, "step": 13777 }, { "epoch": 1.1161697990926767, "grad_norm": 0.03955851122736931, "learning_rate": 0.0001602142310635042, "loss": 0.3196, "step": 13778 }, { "epoch": 1.116250810110175, "grad_norm": 0.03698073327541351, "learning_rate": 0.0001602097304109096, "loss": 0.2503, "step": 13779 }, { "epoch": 1.1163318211276734, "grad_norm": 0.03260596841573715, "learning_rate": 0.00016020522975831496, "loss": 0.2944, "step": 13780 }, { "epoch": 1.1164128321451718, "grad_norm": 0.03877520188689232, "learning_rate": 0.00016020072910572032, "loss": 0.3295, "step": 13781 }, { "epoch": 1.11649384316267, "grad_norm": 0.045589517802000046, "learning_rate": 0.00016019622845312573, "loss": 0.3598, "step": 13782 }, { "epoch": 1.1165748541801686, "grad_norm": 0.04367116466164589, "learning_rate": 0.0001601917278005311, "loss": 0.3007, "step": 13783 }, { "epoch": 1.1166558651976668, "grad_norm": 0.03556925430893898, "learning_rate": 0.00016018722714793645, "loss": 0.3085, "step": 13784 }, { "epoch": 1.1167368762151653, "grad_norm": 0.03693739324808121, "learning_rate": 0.00016018272649534184, "loss": 0.3143, "step": 13785 }, { "epoch": 1.1168178872326637, "grad_norm": 0.03582121804356575, "learning_rate": 0.0001601782258427472, "loss": 0.267, "step": 13786 }, { "epoch": 1.116898898250162, "grad_norm": 0.03946210816502571, "learning_rate": 0.00016017372519015258, "loss": 0.2989, "step": 13787 }, { "epoch": 1.1169799092676604, "grad_norm": 0.045077886432409286, "learning_rate": 0.00016016922453755797, "loss": 0.3211, "step": 13788 }, { "epoch": 1.1170609202851587, "grad_norm": 0.03933589905500412, "learning_rate": 0.00016016472388496333, "loss": 0.3248, "step": 13789 }, { "epoch": 1.1171419313026572, "grad_norm": 0.03940128535032272, "learning_rate": 0.0001601602232323687, "loss": 0.3191, "step": 13790 }, { "epoch": 1.1172229423201556, "grad_norm": 0.035947635769844055, "learning_rate": 0.00016015572257977408, "loss": 0.3125, "step": 13791 }, { "epoch": 1.1173039533376539, "grad_norm": 0.038889285176992416, "learning_rate": 0.00016015122192717944, "loss": 0.3005, "step": 13792 }, { "epoch": 1.1173849643551523, "grad_norm": 0.039641644805669785, "learning_rate": 0.00016014672127458483, "loss": 0.3274, "step": 13793 }, { "epoch": 1.1174659753726506, "grad_norm": 0.0386282354593277, "learning_rate": 0.0001601422206219902, "loss": 0.3202, "step": 13794 }, { "epoch": 1.117546986390149, "grad_norm": 0.03944721817970276, "learning_rate": 0.00016013771996939557, "loss": 0.3184, "step": 13795 }, { "epoch": 1.1176279974076475, "grad_norm": 0.04022477939724922, "learning_rate": 0.00016013321931680093, "loss": 0.31, "step": 13796 }, { "epoch": 1.1177090084251458, "grad_norm": 0.04214589670300484, "learning_rate": 0.00016012871866420632, "loss": 0.3351, "step": 13797 }, { "epoch": 1.1177900194426442, "grad_norm": 0.036494381725788116, "learning_rate": 0.00016012421801161168, "loss": 0.2615, "step": 13798 }, { "epoch": 1.1178710304601425, "grad_norm": 0.04439876228570938, "learning_rate": 0.00016011971735901707, "loss": 0.2859, "step": 13799 }, { "epoch": 1.117952041477641, "grad_norm": 0.0420045368373394, "learning_rate": 0.00016011521670642245, "loss": 0.3235, "step": 13800 }, { "epoch": 1.1180330524951394, "grad_norm": 0.037003420293331146, "learning_rate": 0.00016011071605382781, "loss": 0.2992, "step": 13801 }, { "epoch": 1.1181140635126376, "grad_norm": 0.04284486547112465, "learning_rate": 0.00016010621540123317, "loss": 0.3373, "step": 13802 }, { "epoch": 1.1181950745301361, "grad_norm": 0.040728338062763214, "learning_rate": 0.00016010171474863856, "loss": 0.3062, "step": 13803 }, { "epoch": 1.1182760855476346, "grad_norm": 0.039135854691267014, "learning_rate": 0.00016009721409604392, "loss": 0.3061, "step": 13804 }, { "epoch": 1.1183570965651328, "grad_norm": 0.035060565918684006, "learning_rate": 0.0001600927134434493, "loss": 0.2931, "step": 13805 }, { "epoch": 1.1184381075826313, "grad_norm": 0.043133411556482315, "learning_rate": 0.0001600882127908547, "loss": 0.3366, "step": 13806 }, { "epoch": 1.1185191186001295, "grad_norm": 0.0373905710875988, "learning_rate": 0.00016008371213826006, "loss": 0.3236, "step": 13807 }, { "epoch": 1.118600129617628, "grad_norm": 0.04570890590548515, "learning_rate": 0.00016007921148566542, "loss": 0.3283, "step": 13808 }, { "epoch": 1.1186811406351265, "grad_norm": 0.042422425001859665, "learning_rate": 0.0001600747108330708, "loss": 0.345, "step": 13809 }, { "epoch": 1.1187621516526247, "grad_norm": 0.03744294494390488, "learning_rate": 0.0001600702101804762, "loss": 0.2998, "step": 13810 }, { "epoch": 1.1188431626701232, "grad_norm": 0.04061727970838547, "learning_rate": 0.00016006570952788155, "loss": 0.2984, "step": 13811 }, { "epoch": 1.1189241736876214, "grad_norm": 0.041145216673612595, "learning_rate": 0.00016006120887528694, "loss": 0.3325, "step": 13812 }, { "epoch": 1.11900518470512, "grad_norm": 0.03937564790248871, "learning_rate": 0.0001600567082226923, "loss": 0.2948, "step": 13813 }, { "epoch": 1.1190861957226184, "grad_norm": 0.039172567427158356, "learning_rate": 0.00016005220757009766, "loss": 0.3151, "step": 13814 }, { "epoch": 1.1191672067401166, "grad_norm": 0.03841695189476013, "learning_rate": 0.00016004770691750304, "loss": 0.3169, "step": 13815 }, { "epoch": 1.119248217757615, "grad_norm": 0.04288164898753166, "learning_rate": 0.00016004320626490843, "loss": 0.3313, "step": 13816 }, { "epoch": 1.1193292287751133, "grad_norm": 0.036649782210588455, "learning_rate": 0.0001600387056123138, "loss": 0.2685, "step": 13817 }, { "epoch": 1.1194102397926118, "grad_norm": 0.03929361701011658, "learning_rate": 0.00016003420495971918, "loss": 0.359, "step": 13818 }, { "epoch": 1.1194912508101102, "grad_norm": 0.041453681886196136, "learning_rate": 0.00016002970430712454, "loss": 0.3186, "step": 13819 }, { "epoch": 1.1195722618276085, "grad_norm": 0.038998812437057495, "learning_rate": 0.0001600252036545299, "loss": 0.3488, "step": 13820 }, { "epoch": 1.119653272845107, "grad_norm": 0.04024531692266464, "learning_rate": 0.00016002070300193529, "loss": 0.3356, "step": 13821 }, { "epoch": 1.1197342838626052, "grad_norm": 0.037038642913103104, "learning_rate": 0.00016001620234934067, "loss": 0.2813, "step": 13822 }, { "epoch": 1.1198152948801037, "grad_norm": 0.039935849606990814, "learning_rate": 0.00016001170169674603, "loss": 0.3184, "step": 13823 }, { "epoch": 1.1198963058976021, "grad_norm": 0.039462849497795105, "learning_rate": 0.00016000720104415142, "loss": 0.3609, "step": 13824 }, { "epoch": 1.1199773169151004, "grad_norm": 0.04245318844914436, "learning_rate": 0.00016000270039155678, "loss": 0.3234, "step": 13825 }, { "epoch": 1.1200583279325989, "grad_norm": 0.04086868464946747, "learning_rate": 0.00015999819973896214, "loss": 0.3421, "step": 13826 }, { "epoch": 1.1201393389500973, "grad_norm": 0.03381660208106041, "learning_rate": 0.00015999369908636753, "loss": 0.2728, "step": 13827 }, { "epoch": 1.1202203499675956, "grad_norm": 0.0502673014998436, "learning_rate": 0.00015998919843377292, "loss": 0.3618, "step": 13828 }, { "epoch": 1.120301360985094, "grad_norm": 0.04325926676392555, "learning_rate": 0.00015998469778117828, "loss": 0.3295, "step": 13829 }, { "epoch": 1.1203823720025923, "grad_norm": 0.04531802982091904, "learning_rate": 0.00015998019712858366, "loss": 0.3748, "step": 13830 }, { "epoch": 1.1204633830200907, "grad_norm": 0.03567253053188324, "learning_rate": 0.00015997569647598902, "loss": 0.307, "step": 13831 }, { "epoch": 1.1205443940375892, "grad_norm": 0.040204357355833054, "learning_rate": 0.00015997119582339438, "loss": 0.3271, "step": 13832 }, { "epoch": 1.1206254050550875, "grad_norm": 0.03894279897212982, "learning_rate": 0.00015996669517079977, "loss": 0.3309, "step": 13833 }, { "epoch": 1.120706416072586, "grad_norm": 0.0587170273065567, "learning_rate": 0.00015996219451820516, "loss": 0.353, "step": 13834 }, { "epoch": 1.1207874270900842, "grad_norm": 0.03796997666358948, "learning_rate": 0.00015995769386561052, "loss": 0.2953, "step": 13835 }, { "epoch": 1.1208684381075826, "grad_norm": 0.03643454238772392, "learning_rate": 0.0001599531932130159, "loss": 0.2976, "step": 13836 }, { "epoch": 1.120949449125081, "grad_norm": 0.038293469697237015, "learning_rate": 0.00015994869256042126, "loss": 0.3251, "step": 13837 }, { "epoch": 1.1210304601425793, "grad_norm": 0.04205470159649849, "learning_rate": 0.00015994419190782662, "loss": 0.3321, "step": 13838 }, { "epoch": 1.1211114711600778, "grad_norm": 0.04378296434879303, "learning_rate": 0.00015993969125523204, "loss": 0.3149, "step": 13839 }, { "epoch": 1.121192482177576, "grad_norm": 0.03723248094320297, "learning_rate": 0.0001599351906026374, "loss": 0.3334, "step": 13840 }, { "epoch": 1.1212734931950745, "grad_norm": 0.03505038842558861, "learning_rate": 0.00015993068995004276, "loss": 0.2801, "step": 13841 }, { "epoch": 1.121354504212573, "grad_norm": 0.03849254548549652, "learning_rate": 0.00015992618929744815, "loss": 0.3196, "step": 13842 }, { "epoch": 1.1214355152300712, "grad_norm": 0.04109663888812065, "learning_rate": 0.0001599216886448535, "loss": 0.3283, "step": 13843 }, { "epoch": 1.1215165262475697, "grad_norm": 0.04248509556055069, "learning_rate": 0.00015991718799225887, "loss": 0.3455, "step": 13844 }, { "epoch": 1.121597537265068, "grad_norm": 0.03689438849687576, "learning_rate": 0.00015991268733966428, "loss": 0.3069, "step": 13845 }, { "epoch": 1.1216785482825664, "grad_norm": 0.04201400279998779, "learning_rate": 0.00015990818668706964, "loss": 0.3465, "step": 13846 }, { "epoch": 1.1217595593000649, "grad_norm": 0.040428366512060165, "learning_rate": 0.000159903686034475, "loss": 0.3367, "step": 13847 }, { "epoch": 1.1218405703175631, "grad_norm": 0.04223720356822014, "learning_rate": 0.0001598991853818804, "loss": 0.3644, "step": 13848 }, { "epoch": 1.1219215813350616, "grad_norm": 0.0399593859910965, "learning_rate": 0.00015989468472928575, "loss": 0.2838, "step": 13849 }, { "epoch": 1.12200259235256, "grad_norm": 0.038730867207050323, "learning_rate": 0.0001598901840766911, "loss": 0.3084, "step": 13850 }, { "epoch": 1.1220836033700583, "grad_norm": 0.04075966402888298, "learning_rate": 0.00015988568342409652, "loss": 0.3212, "step": 13851 }, { "epoch": 1.1221646143875568, "grad_norm": 0.04116553068161011, "learning_rate": 0.00015988118277150188, "loss": 0.3366, "step": 13852 }, { "epoch": 1.122245625405055, "grad_norm": 0.04146783798933029, "learning_rate": 0.00015987668211890724, "loss": 0.3376, "step": 13853 }, { "epoch": 1.1223266364225535, "grad_norm": 0.038811422884464264, "learning_rate": 0.00015987218146631263, "loss": 0.26, "step": 13854 }, { "epoch": 1.122407647440052, "grad_norm": 0.035753101110458374, "learning_rate": 0.000159867680813718, "loss": 0.338, "step": 13855 }, { "epoch": 1.1224886584575502, "grad_norm": 0.03897230327129364, "learning_rate": 0.00015986318016112335, "loss": 0.2695, "step": 13856 }, { "epoch": 1.1225696694750487, "grad_norm": 0.044433873146772385, "learning_rate": 0.00015985867950852876, "loss": 0.3424, "step": 13857 }, { "epoch": 1.122650680492547, "grad_norm": 0.04034457355737686, "learning_rate": 0.00015985417885593412, "loss": 0.3306, "step": 13858 }, { "epoch": 1.1227316915100454, "grad_norm": 0.04004496708512306, "learning_rate": 0.00015984967820333948, "loss": 0.3126, "step": 13859 }, { "epoch": 1.1228127025275438, "grad_norm": 0.041642673313617706, "learning_rate": 0.00015984517755074487, "loss": 0.3152, "step": 13860 }, { "epoch": 1.122893713545042, "grad_norm": 0.04158201068639755, "learning_rate": 0.00015984067689815023, "loss": 0.3176, "step": 13861 }, { "epoch": 1.1229747245625405, "grad_norm": 0.038879748433828354, "learning_rate": 0.0001598361762455556, "loss": 0.3179, "step": 13862 }, { "epoch": 1.1230557355800388, "grad_norm": 0.038589294999837875, "learning_rate": 0.000159831675592961, "loss": 0.3396, "step": 13863 }, { "epoch": 1.1231367465975373, "grad_norm": 0.042497556656599045, "learning_rate": 0.00015982717494036636, "loss": 0.3039, "step": 13864 }, { "epoch": 1.1232177576150357, "grad_norm": 0.040472209453582764, "learning_rate": 0.00015982267428777172, "loss": 0.3498, "step": 13865 }, { "epoch": 1.123298768632534, "grad_norm": 0.03559787943959236, "learning_rate": 0.0001598181736351771, "loss": 0.3019, "step": 13866 }, { "epoch": 1.1233797796500324, "grad_norm": 0.039029110223054886, "learning_rate": 0.00015981367298258247, "loss": 0.3296, "step": 13867 }, { "epoch": 1.1234607906675307, "grad_norm": 0.03882824629545212, "learning_rate": 0.00015980917232998786, "loss": 0.3076, "step": 13868 }, { "epoch": 1.1235418016850292, "grad_norm": 0.03973846137523651, "learning_rate": 0.00015980467167739325, "loss": 0.2771, "step": 13869 }, { "epoch": 1.1236228127025276, "grad_norm": 0.035050515085458755, "learning_rate": 0.0001598001710247986, "loss": 0.2669, "step": 13870 }, { "epoch": 1.1237038237200259, "grad_norm": 0.04399830475449562, "learning_rate": 0.00015979567037220397, "loss": 0.3435, "step": 13871 }, { "epoch": 1.1237848347375243, "grad_norm": 0.042005930095911026, "learning_rate": 0.00015979116971960935, "loss": 0.323, "step": 13872 }, { "epoch": 1.1238658457550228, "grad_norm": 0.046611249446868896, "learning_rate": 0.00015978666906701471, "loss": 0.3635, "step": 13873 }, { "epoch": 1.123946856772521, "grad_norm": 0.03784230723977089, "learning_rate": 0.0001597821684144201, "loss": 0.2741, "step": 13874 }, { "epoch": 1.1240278677900195, "grad_norm": 0.037510622292757034, "learning_rate": 0.0001597776677618255, "loss": 0.3011, "step": 13875 }, { "epoch": 1.1241088788075178, "grad_norm": 0.03867172822356224, "learning_rate": 0.00015977316710923085, "loss": 0.3401, "step": 13876 }, { "epoch": 1.1241898898250162, "grad_norm": 0.0382206067442894, "learning_rate": 0.0001597686664566362, "loss": 0.3258, "step": 13877 }, { "epoch": 1.1242709008425145, "grad_norm": 0.041581250727176666, "learning_rate": 0.0001597641658040416, "loss": 0.2976, "step": 13878 }, { "epoch": 1.124351911860013, "grad_norm": 0.039854757487773895, "learning_rate": 0.00015975966515144696, "loss": 0.2854, "step": 13879 }, { "epoch": 1.1244329228775114, "grad_norm": 0.04291190207004547, "learning_rate": 0.00015975516449885234, "loss": 0.2964, "step": 13880 }, { "epoch": 1.1245139338950096, "grad_norm": 0.038031816482543945, "learning_rate": 0.00015975066384625773, "loss": 0.2957, "step": 13881 }, { "epoch": 1.124594944912508, "grad_norm": 0.04880787804722786, "learning_rate": 0.0001597461631936631, "loss": 0.3277, "step": 13882 }, { "epoch": 1.1246759559300066, "grad_norm": 0.0424063503742218, "learning_rate": 0.00015974166254106845, "loss": 0.321, "step": 13883 }, { "epoch": 1.1247569669475048, "grad_norm": 0.0411037839949131, "learning_rate": 0.00015973716188847384, "loss": 0.3259, "step": 13884 }, { "epoch": 1.1248379779650033, "grad_norm": 0.03793978691101074, "learning_rate": 0.0001597326612358792, "loss": 0.2871, "step": 13885 }, { "epoch": 1.1249189889825015, "grad_norm": 0.040353622287511826, "learning_rate": 0.00015972816058328458, "loss": 0.2924, "step": 13886 }, { "epoch": 1.125, "grad_norm": 0.042982496321201324, "learning_rate": 0.00015972365993068997, "loss": 0.3395, "step": 13887 }, { "epoch": 1.1250810110174985, "grad_norm": 0.04520965367555618, "learning_rate": 0.00015971915927809533, "loss": 0.3761, "step": 13888 }, { "epoch": 1.1251620220349967, "grad_norm": 0.036293741315603256, "learning_rate": 0.0001597146586255007, "loss": 0.2902, "step": 13889 }, { "epoch": 1.1252430330524952, "grad_norm": 0.0378212071955204, "learning_rate": 0.00015971015797290608, "loss": 0.2862, "step": 13890 }, { "epoch": 1.1253240440699934, "grad_norm": 0.036096226423978806, "learning_rate": 0.00015970565732031147, "loss": 0.3155, "step": 13891 }, { "epoch": 1.125405055087492, "grad_norm": 0.03830007091164589, "learning_rate": 0.00015970115666771683, "loss": 0.3307, "step": 13892 }, { "epoch": 1.1254860661049904, "grad_norm": 0.03723623603582382, "learning_rate": 0.0001596966560151222, "loss": 0.2781, "step": 13893 }, { "epoch": 1.1255670771224886, "grad_norm": 0.039892762899398804, "learning_rate": 0.00015969215536252757, "loss": 0.3453, "step": 13894 }, { "epoch": 1.125648088139987, "grad_norm": 0.03982089087367058, "learning_rate": 0.00015968765470993293, "loss": 0.3028, "step": 13895 }, { "epoch": 1.1257290991574855, "grad_norm": 0.03398760408163071, "learning_rate": 0.00015968315405733832, "loss": 0.2907, "step": 13896 }, { "epoch": 1.1258101101749838, "grad_norm": 0.03256816416978836, "learning_rate": 0.0001596786534047437, "loss": 0.2921, "step": 13897 }, { "epoch": 1.1258911211924822, "grad_norm": 0.03531118854880333, "learning_rate": 0.00015967415275214907, "loss": 0.2794, "step": 13898 }, { "epoch": 1.1259721322099805, "grad_norm": 0.04041526839137077, "learning_rate": 0.00015966965209955445, "loss": 0.3278, "step": 13899 }, { "epoch": 1.126053143227479, "grad_norm": 0.039820361882448196, "learning_rate": 0.00015966515144695981, "loss": 0.3472, "step": 13900 }, { "epoch": 1.1261341542449772, "grad_norm": 0.03759269416332245, "learning_rate": 0.00015966065079436517, "loss": 0.3216, "step": 13901 }, { "epoch": 1.1262151652624757, "grad_norm": 0.03688056394457817, "learning_rate": 0.00015965615014177056, "loss": 0.2979, "step": 13902 }, { "epoch": 1.1262961762799741, "grad_norm": 0.03714337944984436, "learning_rate": 0.00015965164948917595, "loss": 0.3015, "step": 13903 }, { "epoch": 1.1263771872974724, "grad_norm": 0.03759916499257088, "learning_rate": 0.0001596471488365813, "loss": 0.3035, "step": 13904 }, { "epoch": 1.1264581983149708, "grad_norm": 0.04206295683979988, "learning_rate": 0.0001596426481839867, "loss": 0.2859, "step": 13905 }, { "epoch": 1.1265392093324693, "grad_norm": 0.04519706219434738, "learning_rate": 0.00015963814753139206, "loss": 0.3358, "step": 13906 }, { "epoch": 1.1266202203499676, "grad_norm": 0.038674045354127884, "learning_rate": 0.00015963364687879742, "loss": 0.3141, "step": 13907 }, { "epoch": 1.126701231367466, "grad_norm": 0.04178118705749512, "learning_rate": 0.0001596291462262028, "loss": 0.319, "step": 13908 }, { "epoch": 1.1267822423849643, "grad_norm": 0.03776085376739502, "learning_rate": 0.0001596246455736082, "loss": 0.2629, "step": 13909 }, { "epoch": 1.1268632534024627, "grad_norm": 0.035705432295799255, "learning_rate": 0.00015962014492101355, "loss": 0.2704, "step": 13910 }, { "epoch": 1.1269442644199612, "grad_norm": 0.03641466423869133, "learning_rate": 0.00015961564426841894, "loss": 0.2981, "step": 13911 }, { "epoch": 1.1270252754374595, "grad_norm": 0.0384918749332428, "learning_rate": 0.0001596111436158243, "loss": 0.2709, "step": 13912 }, { "epoch": 1.127106286454958, "grad_norm": 0.03596267104148865, "learning_rate": 0.00015960664296322966, "loss": 0.3003, "step": 13913 }, { "epoch": 1.1271872974724562, "grad_norm": 0.0391799733042717, "learning_rate": 0.00015960214231063505, "loss": 0.3269, "step": 13914 }, { "epoch": 1.1272683084899546, "grad_norm": 0.046616896986961365, "learning_rate": 0.00015959764165804043, "loss": 0.3749, "step": 13915 }, { "epoch": 1.127349319507453, "grad_norm": 0.043165434151887894, "learning_rate": 0.0001595931410054458, "loss": 0.3351, "step": 13916 }, { "epoch": 1.1274303305249513, "grad_norm": 0.03316278010606766, "learning_rate": 0.00015958864035285118, "loss": 0.2853, "step": 13917 }, { "epoch": 1.1275113415424498, "grad_norm": 0.0387754887342453, "learning_rate": 0.00015958413970025654, "loss": 0.3071, "step": 13918 }, { "epoch": 1.1275923525599483, "grad_norm": 0.03791152685880661, "learning_rate": 0.0001595796390476619, "loss": 0.3022, "step": 13919 }, { "epoch": 1.1276733635774465, "grad_norm": 0.044122401624917984, "learning_rate": 0.00015957513839506731, "loss": 0.3311, "step": 13920 }, { "epoch": 1.127754374594945, "grad_norm": 0.03224045783281326, "learning_rate": 0.00015957063774247267, "loss": 0.2618, "step": 13921 }, { "epoch": 1.1278353856124432, "grad_norm": 0.04090102016925812, "learning_rate": 0.00015956613708987803, "loss": 0.3309, "step": 13922 }, { "epoch": 1.1279163966299417, "grad_norm": 0.04744292050600052, "learning_rate": 0.00015956163643728342, "loss": 0.3234, "step": 13923 }, { "epoch": 1.12799740764744, "grad_norm": 0.040213972330093384, "learning_rate": 0.00015955713578468878, "loss": 0.3255, "step": 13924 }, { "epoch": 1.1280784186649384, "grad_norm": 0.04174848645925522, "learning_rate": 0.00015955263513209414, "loss": 0.3219, "step": 13925 }, { "epoch": 1.1281594296824369, "grad_norm": 0.04265562444925308, "learning_rate": 0.00015954813447949956, "loss": 0.3139, "step": 13926 }, { "epoch": 1.1282404406999351, "grad_norm": 0.03909571096301079, "learning_rate": 0.00015954363382690492, "loss": 0.2832, "step": 13927 }, { "epoch": 1.1283214517174336, "grad_norm": 0.03760486841201782, "learning_rate": 0.00015953913317431028, "loss": 0.3112, "step": 13928 }, { "epoch": 1.128402462734932, "grad_norm": 0.03686079382896423, "learning_rate": 0.00015953463252171566, "loss": 0.2786, "step": 13929 }, { "epoch": 1.1284834737524303, "grad_norm": 0.03789995238184929, "learning_rate": 0.00015953013186912102, "loss": 0.3289, "step": 13930 }, { "epoch": 1.1285644847699288, "grad_norm": 0.03965533524751663, "learning_rate": 0.00015952563121652638, "loss": 0.3029, "step": 13931 }, { "epoch": 1.128645495787427, "grad_norm": 0.04098164662718773, "learning_rate": 0.0001595211305639318, "loss": 0.2928, "step": 13932 }, { "epoch": 1.1287265068049255, "grad_norm": 0.03985410928726196, "learning_rate": 0.00015951662991133716, "loss": 0.322, "step": 13933 }, { "epoch": 1.128807517822424, "grad_norm": 0.038576241582632065, "learning_rate": 0.00015951212925874252, "loss": 0.3067, "step": 13934 }, { "epoch": 1.1288885288399222, "grad_norm": 0.0459025502204895, "learning_rate": 0.0001595076286061479, "loss": 0.3236, "step": 13935 }, { "epoch": 1.1289695398574207, "grad_norm": 0.044762130826711655, "learning_rate": 0.00015950312795355326, "loss": 0.3272, "step": 13936 }, { "epoch": 1.129050550874919, "grad_norm": 0.035859379917383194, "learning_rate": 0.00015949862730095862, "loss": 0.2561, "step": 13937 }, { "epoch": 1.1291315618924174, "grad_norm": 0.036720190197229385, "learning_rate": 0.00015949412664836404, "loss": 0.2986, "step": 13938 }, { "epoch": 1.1292125729099158, "grad_norm": 0.04893483966588974, "learning_rate": 0.0001594896259957694, "loss": 0.3214, "step": 13939 }, { "epoch": 1.129293583927414, "grad_norm": 0.03709689900279045, "learning_rate": 0.00015948512534317476, "loss": 0.312, "step": 13940 }, { "epoch": 1.1293745949449125, "grad_norm": 0.040259964764118195, "learning_rate": 0.00015948062469058015, "loss": 0.2999, "step": 13941 }, { "epoch": 1.1294556059624108, "grad_norm": 0.044082023203372955, "learning_rate": 0.0001594761240379855, "loss": 0.3319, "step": 13942 }, { "epoch": 1.1295366169799093, "grad_norm": 0.037773314863443375, "learning_rate": 0.0001594716233853909, "loss": 0.3176, "step": 13943 }, { "epoch": 1.1296176279974077, "grad_norm": 0.04064793884754181, "learning_rate": 0.00015946712273279628, "loss": 0.3004, "step": 13944 }, { "epoch": 1.129698639014906, "grad_norm": 0.04040275514125824, "learning_rate": 0.00015946262208020164, "loss": 0.328, "step": 13945 }, { "epoch": 1.1297796500324044, "grad_norm": 0.043435558676719666, "learning_rate": 0.000159458121427607, "loss": 0.3366, "step": 13946 }, { "epoch": 1.1298606610499027, "grad_norm": 0.03831151872873306, "learning_rate": 0.0001594536207750124, "loss": 0.2747, "step": 13947 }, { "epoch": 1.1299416720674011, "grad_norm": 0.037033118307590485, "learning_rate": 0.00015944912012241775, "loss": 0.3001, "step": 13948 }, { "epoch": 1.1300226830848996, "grad_norm": 0.03587247431278229, "learning_rate": 0.00015944461946982313, "loss": 0.275, "step": 13949 }, { "epoch": 1.1301036941023979, "grad_norm": 0.04261304438114166, "learning_rate": 0.00015944011881722852, "loss": 0.2938, "step": 13950 }, { "epoch": 1.1301847051198963, "grad_norm": 0.040578443557024, "learning_rate": 0.00015943561816463388, "loss": 0.3367, "step": 13951 }, { "epoch": 1.1302657161373948, "grad_norm": 0.044319093227386475, "learning_rate": 0.00015943111751203924, "loss": 0.3281, "step": 13952 }, { "epoch": 1.130346727154893, "grad_norm": 0.046974748373031616, "learning_rate": 0.00015942661685944463, "loss": 0.3121, "step": 13953 }, { "epoch": 1.1304277381723915, "grad_norm": 0.04281812161207199, "learning_rate": 0.00015942211620685, "loss": 0.3386, "step": 13954 }, { "epoch": 1.1305087491898898, "grad_norm": 0.03786301240324974, "learning_rate": 0.00015941761555425538, "loss": 0.3311, "step": 13955 }, { "epoch": 1.1305897602073882, "grad_norm": 0.04455956444144249, "learning_rate": 0.00015941311490166076, "loss": 0.3553, "step": 13956 }, { "epoch": 1.1306707712248865, "grad_norm": 0.03868843615055084, "learning_rate": 0.00015940861424906612, "loss": 0.3131, "step": 13957 }, { "epoch": 1.130751782242385, "grad_norm": 0.038352277129888535, "learning_rate": 0.00015940411359647148, "loss": 0.3174, "step": 13958 }, { "epoch": 1.1308327932598834, "grad_norm": 0.047438718378543854, "learning_rate": 0.00015939961294387687, "loss": 0.3297, "step": 13959 }, { "epoch": 1.1309138042773816, "grad_norm": 0.04253394529223442, "learning_rate": 0.00015939511229128223, "loss": 0.2715, "step": 13960 }, { "epoch": 1.13099481529488, "grad_norm": 0.04169415682554245, "learning_rate": 0.00015939061163868762, "loss": 0.3359, "step": 13961 }, { "epoch": 1.1310758263123786, "grad_norm": 0.036576855927705765, "learning_rate": 0.000159386110986093, "loss": 0.2846, "step": 13962 }, { "epoch": 1.1311568373298768, "grad_norm": 0.046189580112695694, "learning_rate": 0.00015938161033349837, "loss": 0.3306, "step": 13963 }, { "epoch": 1.1312378483473753, "grad_norm": 0.03953096270561218, "learning_rate": 0.00015937710968090373, "loss": 0.3131, "step": 13964 }, { "epoch": 1.1313188593648735, "grad_norm": 0.04358987510204315, "learning_rate": 0.0001593726090283091, "loss": 0.3084, "step": 13965 }, { "epoch": 1.131399870382372, "grad_norm": 0.03750518709421158, "learning_rate": 0.00015936810837571447, "loss": 0.3118, "step": 13966 }, { "epoch": 1.1314808813998705, "grad_norm": 0.04085781052708626, "learning_rate": 0.00015936360772311986, "loss": 0.3118, "step": 13967 }, { "epoch": 1.1315618924173687, "grad_norm": 0.04128627851605415, "learning_rate": 0.00015935910707052525, "loss": 0.3291, "step": 13968 }, { "epoch": 1.1316429034348672, "grad_norm": 0.04109803959727287, "learning_rate": 0.0001593546064179306, "loss": 0.3251, "step": 13969 }, { "epoch": 1.1317239144523654, "grad_norm": 0.03611079975962639, "learning_rate": 0.00015935010576533597, "loss": 0.3247, "step": 13970 }, { "epoch": 1.1318049254698639, "grad_norm": 0.04212360829114914, "learning_rate": 0.00015934560511274135, "loss": 0.3064, "step": 13971 }, { "epoch": 1.1318859364873624, "grad_norm": 0.03634580597281456, "learning_rate": 0.00015934110446014674, "loss": 0.2657, "step": 13972 }, { "epoch": 1.1319669475048606, "grad_norm": 0.04142121225595474, "learning_rate": 0.0001593366038075521, "loss": 0.3054, "step": 13973 }, { "epoch": 1.132047958522359, "grad_norm": 0.043657053261995316, "learning_rate": 0.0001593321031549575, "loss": 0.3286, "step": 13974 }, { "epoch": 1.1321289695398575, "grad_norm": 0.03691680356860161, "learning_rate": 0.00015932760250236285, "loss": 0.3089, "step": 13975 }, { "epoch": 1.1322099805573558, "grad_norm": 0.038224101066589355, "learning_rate": 0.0001593231018497682, "loss": 0.2662, "step": 13976 }, { "epoch": 1.1322909915748542, "grad_norm": 0.04033510386943817, "learning_rate": 0.0001593186011971736, "loss": 0.3308, "step": 13977 }, { "epoch": 1.1323720025923525, "grad_norm": 0.037437643855810165, "learning_rate": 0.00015931410054457898, "loss": 0.3141, "step": 13978 }, { "epoch": 1.132453013609851, "grad_norm": 0.03672100603580475, "learning_rate": 0.00015930959989198434, "loss": 0.2986, "step": 13979 }, { "epoch": 1.1325340246273492, "grad_norm": 0.034205105155706406, "learning_rate": 0.00015930509923938973, "loss": 0.294, "step": 13980 }, { "epoch": 1.1326150356448477, "grad_norm": 0.04216152802109718, "learning_rate": 0.0001593005985867951, "loss": 0.3082, "step": 13981 }, { "epoch": 1.1326960466623461, "grad_norm": 0.03836638107895851, "learning_rate": 0.00015929609793420045, "loss": 0.3158, "step": 13982 }, { "epoch": 1.1327770576798444, "grad_norm": 0.042772434651851654, "learning_rate": 0.00015929159728160584, "loss": 0.3331, "step": 13983 }, { "epoch": 1.1328580686973428, "grad_norm": 0.03767096996307373, "learning_rate": 0.00015928709662901122, "loss": 0.3099, "step": 13984 }, { "epoch": 1.1329390797148413, "grad_norm": 0.037372101098299026, "learning_rate": 0.00015928259597641658, "loss": 0.2968, "step": 13985 }, { "epoch": 1.1330200907323396, "grad_norm": 0.03995388001203537, "learning_rate": 0.00015927809532382197, "loss": 0.3296, "step": 13986 }, { "epoch": 1.133101101749838, "grad_norm": 0.03512907400727272, "learning_rate": 0.00015927359467122733, "loss": 0.2998, "step": 13987 }, { "epoch": 1.1331821127673363, "grad_norm": 0.038549166172742844, "learning_rate": 0.0001592690940186327, "loss": 0.343, "step": 13988 }, { "epoch": 1.1332631237848347, "grad_norm": 0.03864383324980736, "learning_rate": 0.00015926459336603808, "loss": 0.3171, "step": 13989 }, { "epoch": 1.1333441348023332, "grad_norm": 0.036088306456804276, "learning_rate": 0.00015926009271344347, "loss": 0.3048, "step": 13990 }, { "epoch": 1.1334251458198314, "grad_norm": 0.042519714683294296, "learning_rate": 0.00015925559206084883, "loss": 0.3058, "step": 13991 }, { "epoch": 1.13350615683733, "grad_norm": 0.036296386271715164, "learning_rate": 0.0001592510914082542, "loss": 0.2861, "step": 13992 }, { "epoch": 1.1335871678548282, "grad_norm": 0.041319336742162704, "learning_rate": 0.00015924659075565957, "loss": 0.2783, "step": 13993 }, { "epoch": 1.1336681788723266, "grad_norm": 0.03771928697824478, "learning_rate": 0.00015924209010306493, "loss": 0.2959, "step": 13994 }, { "epoch": 1.133749189889825, "grad_norm": 0.03731353208422661, "learning_rate": 0.00015923758945047032, "loss": 0.2769, "step": 13995 }, { "epoch": 1.1338302009073233, "grad_norm": 0.04432228207588196, "learning_rate": 0.0001592330887978757, "loss": 0.3052, "step": 13996 }, { "epoch": 1.1339112119248218, "grad_norm": 0.042736418545246124, "learning_rate": 0.00015922858814528107, "loss": 0.3085, "step": 13997 }, { "epoch": 1.1339922229423203, "grad_norm": 0.04248335212469101, "learning_rate": 0.00015922408749268645, "loss": 0.3064, "step": 13998 }, { "epoch": 1.1340732339598185, "grad_norm": 0.04500725120306015, "learning_rate": 0.00015921958684009181, "loss": 0.3088, "step": 13999 }, { "epoch": 1.134154244977317, "grad_norm": 0.050973884761333466, "learning_rate": 0.00015921508618749717, "loss": 0.3918, "step": 14000 }, { "epoch": 1.1342352559948152, "grad_norm": 0.03775111958384514, "learning_rate": 0.0001592105855349026, "loss": 0.2903, "step": 14001 }, { "epoch": 1.1343162670123137, "grad_norm": 0.04045481234788895, "learning_rate": 0.00015920608488230795, "loss": 0.268, "step": 14002 }, { "epoch": 1.134397278029812, "grad_norm": 0.034963369369506836, "learning_rate": 0.0001592015842297133, "loss": 0.2996, "step": 14003 }, { "epoch": 1.1344782890473104, "grad_norm": 0.03908831253647804, "learning_rate": 0.0001591970835771187, "loss": 0.3033, "step": 14004 }, { "epoch": 1.1345593000648089, "grad_norm": 0.038431428372859955, "learning_rate": 0.00015919258292452406, "loss": 0.3054, "step": 14005 }, { "epoch": 1.1346403110823071, "grad_norm": 0.03698623180389404, "learning_rate": 0.00015918808227192942, "loss": 0.2849, "step": 14006 }, { "epoch": 1.1347213220998056, "grad_norm": 0.03957449644804001, "learning_rate": 0.00015918358161933483, "loss": 0.3003, "step": 14007 }, { "epoch": 1.134802333117304, "grad_norm": 0.040528152137994766, "learning_rate": 0.0001591790809667402, "loss": 0.3377, "step": 14008 }, { "epoch": 1.1348833441348023, "grad_norm": 0.03744984418153763, "learning_rate": 0.00015917458031414555, "loss": 0.3261, "step": 14009 }, { "epoch": 1.1349643551523008, "grad_norm": 0.037619199603796005, "learning_rate": 0.00015917007966155094, "loss": 0.2998, "step": 14010 }, { "epoch": 1.135045366169799, "grad_norm": 0.04669422656297684, "learning_rate": 0.0001591655790089563, "loss": 0.3315, "step": 14011 }, { "epoch": 1.1351263771872975, "grad_norm": 0.041102923452854156, "learning_rate": 0.00015916107835636166, "loss": 0.2808, "step": 14012 }, { "epoch": 1.135207388204796, "grad_norm": 0.03753142058849335, "learning_rate": 0.00015915657770376707, "loss": 0.2902, "step": 14013 }, { "epoch": 1.1352883992222942, "grad_norm": 0.03314594924449921, "learning_rate": 0.00015915207705117243, "loss": 0.2415, "step": 14014 }, { "epoch": 1.1353694102397927, "grad_norm": 0.03576765954494476, "learning_rate": 0.0001591475763985778, "loss": 0.2806, "step": 14015 }, { "epoch": 1.135450421257291, "grad_norm": 0.039405256509780884, "learning_rate": 0.00015914307574598318, "loss": 0.3163, "step": 14016 }, { "epoch": 1.1355314322747894, "grad_norm": 0.047493211925029755, "learning_rate": 0.00015913857509338854, "loss": 0.3541, "step": 14017 }, { "epoch": 1.1356124432922878, "grad_norm": 0.032639991492033005, "learning_rate": 0.0001591340744407939, "loss": 0.2691, "step": 14018 }, { "epoch": 1.135693454309786, "grad_norm": 0.03855365887284279, "learning_rate": 0.00015912957378819931, "loss": 0.3026, "step": 14019 }, { "epoch": 1.1357744653272845, "grad_norm": 0.04066522419452667, "learning_rate": 0.00015912507313560467, "loss": 0.326, "step": 14020 }, { "epoch": 1.135855476344783, "grad_norm": 0.037609148770570755, "learning_rate": 0.00015912057248301003, "loss": 0.3111, "step": 14021 }, { "epoch": 1.1359364873622813, "grad_norm": 0.0433291494846344, "learning_rate": 0.00015911607183041542, "loss": 0.3109, "step": 14022 }, { "epoch": 1.1360174983797797, "grad_norm": 0.04275543987751007, "learning_rate": 0.00015911157117782078, "loss": 0.3141, "step": 14023 }, { "epoch": 1.136098509397278, "grad_norm": 0.043383073061704636, "learning_rate": 0.00015910707052522617, "loss": 0.337, "step": 14024 }, { "epoch": 1.1361795204147764, "grad_norm": 0.04801047593355179, "learning_rate": 0.00015910256987263156, "loss": 0.361, "step": 14025 }, { "epoch": 1.1362605314322747, "grad_norm": 0.04143408313393593, "learning_rate": 0.00015909806922003692, "loss": 0.2957, "step": 14026 }, { "epoch": 1.1363415424497731, "grad_norm": 0.039822012186050415, "learning_rate": 0.00015909356856744228, "loss": 0.3023, "step": 14027 }, { "epoch": 1.1364225534672716, "grad_norm": 0.039689600467681885, "learning_rate": 0.00015908906791484766, "loss": 0.2697, "step": 14028 }, { "epoch": 1.1365035644847699, "grad_norm": 0.044523414224386215, "learning_rate": 0.00015908456726225302, "loss": 0.3203, "step": 14029 }, { "epoch": 1.1365845755022683, "grad_norm": 0.046490006148815155, "learning_rate": 0.0001590800666096584, "loss": 0.3288, "step": 14030 }, { "epoch": 1.1366655865197668, "grad_norm": 0.04369664937257767, "learning_rate": 0.0001590755659570638, "loss": 0.3013, "step": 14031 }, { "epoch": 1.136746597537265, "grad_norm": 0.04383647069334984, "learning_rate": 0.00015907106530446916, "loss": 0.2924, "step": 14032 }, { "epoch": 1.1368276085547635, "grad_norm": 0.041048433631658554, "learning_rate": 0.00015906656465187452, "loss": 0.3095, "step": 14033 }, { "epoch": 1.1369086195722617, "grad_norm": 0.04076990857720375, "learning_rate": 0.0001590620639992799, "loss": 0.298, "step": 14034 }, { "epoch": 1.1369896305897602, "grad_norm": 0.03735438734292984, "learning_rate": 0.00015905756334668526, "loss": 0.2903, "step": 14035 }, { "epoch": 1.1370706416072587, "grad_norm": 0.03785363584756851, "learning_rate": 0.00015905306269409065, "loss": 0.2893, "step": 14036 }, { "epoch": 1.137151652624757, "grad_norm": 0.04257530719041824, "learning_rate": 0.00015904856204149604, "loss": 0.2786, "step": 14037 }, { "epoch": 1.1372326636422554, "grad_norm": 0.037066444754600525, "learning_rate": 0.0001590440613889014, "loss": 0.3019, "step": 14038 }, { "epoch": 1.1373136746597536, "grad_norm": 0.03434824198484421, "learning_rate": 0.00015903956073630676, "loss": 0.2924, "step": 14039 }, { "epoch": 1.137394685677252, "grad_norm": 0.03972025588154793, "learning_rate": 0.00015903506008371215, "loss": 0.2738, "step": 14040 }, { "epoch": 1.1374756966947506, "grad_norm": 0.03562634810805321, "learning_rate": 0.0001590305594311175, "loss": 0.308, "step": 14041 }, { "epoch": 1.1375567077122488, "grad_norm": 0.038715023547410965, "learning_rate": 0.0001590260587785229, "loss": 0.3383, "step": 14042 }, { "epoch": 1.1376377187297473, "grad_norm": 0.03528916835784912, "learning_rate": 0.00015902155812592828, "loss": 0.2796, "step": 14043 }, { "epoch": 1.1377187297472457, "grad_norm": 0.037791598588228226, "learning_rate": 0.00015901705747333364, "loss": 0.2826, "step": 14044 }, { "epoch": 1.137799740764744, "grad_norm": 0.03439135104417801, "learning_rate": 0.000159012556820739, "loss": 0.3217, "step": 14045 }, { "epoch": 1.1378807517822425, "grad_norm": 0.03895426541566849, "learning_rate": 0.0001590080561681444, "loss": 0.2836, "step": 14046 }, { "epoch": 1.1379617627997407, "grad_norm": 0.03870061784982681, "learning_rate": 0.00015900355551554975, "loss": 0.3294, "step": 14047 }, { "epoch": 1.1380427738172392, "grad_norm": 0.03975560888648033, "learning_rate": 0.00015899905486295513, "loss": 0.3192, "step": 14048 }, { "epoch": 1.1381237848347374, "grad_norm": 0.034254979342222214, "learning_rate": 0.00015899455421036052, "loss": 0.2719, "step": 14049 }, { "epoch": 1.1382047958522359, "grad_norm": 0.038271915167570114, "learning_rate": 0.00015899005355776588, "loss": 0.326, "step": 14050 }, { "epoch": 1.1382858068697344, "grad_norm": 0.039710208773612976, "learning_rate": 0.00015898555290517124, "loss": 0.3065, "step": 14051 }, { "epoch": 1.1383668178872326, "grad_norm": 0.035166531801223755, "learning_rate": 0.00015898105225257663, "loss": 0.2895, "step": 14052 }, { "epoch": 1.138447828904731, "grad_norm": 0.03941623494029045, "learning_rate": 0.00015897655159998202, "loss": 0.2963, "step": 14053 }, { "epoch": 1.1385288399222295, "grad_norm": 0.038826532661914825, "learning_rate": 0.00015897205094738738, "loss": 0.3084, "step": 14054 }, { "epoch": 1.1386098509397278, "grad_norm": 0.03994929790496826, "learning_rate": 0.00015896755029479276, "loss": 0.3236, "step": 14055 }, { "epoch": 1.1386908619572262, "grad_norm": 0.04244782030582428, "learning_rate": 0.00015896304964219812, "loss": 0.3336, "step": 14056 }, { "epoch": 1.1387718729747245, "grad_norm": 0.042472947388887405, "learning_rate": 0.00015895854898960348, "loss": 0.3268, "step": 14057 }, { "epoch": 1.138852883992223, "grad_norm": 0.03509005531668663, "learning_rate": 0.00015895404833700887, "loss": 0.2596, "step": 14058 }, { "epoch": 1.1389338950097212, "grad_norm": 0.03898772597312927, "learning_rate": 0.00015894954768441426, "loss": 0.3302, "step": 14059 }, { "epoch": 1.1390149060272197, "grad_norm": 0.04091065004467964, "learning_rate": 0.00015894504703181962, "loss": 0.2922, "step": 14060 }, { "epoch": 1.1390959170447181, "grad_norm": 0.038985904306173325, "learning_rate": 0.000158940546379225, "loss": 0.2673, "step": 14061 }, { "epoch": 1.1391769280622164, "grad_norm": 0.04241928085684776, "learning_rate": 0.00015893604572663037, "loss": 0.3198, "step": 14062 }, { "epoch": 1.1392579390797148, "grad_norm": 0.03850384056568146, "learning_rate": 0.00015893154507403573, "loss": 0.2849, "step": 14063 }, { "epoch": 1.1393389500972133, "grad_norm": 0.04147018864750862, "learning_rate": 0.0001589270444214411, "loss": 0.3157, "step": 14064 }, { "epoch": 1.1394199611147116, "grad_norm": 0.04068557918071747, "learning_rate": 0.0001589225437688465, "loss": 0.3169, "step": 14065 }, { "epoch": 1.13950097213221, "grad_norm": 0.03858393803238869, "learning_rate": 0.00015891804311625186, "loss": 0.3044, "step": 14066 }, { "epoch": 1.1395819831497083, "grad_norm": 0.03682612255215645, "learning_rate": 0.00015891354246365725, "loss": 0.2826, "step": 14067 }, { "epoch": 1.1396629941672067, "grad_norm": 0.037353694438934326, "learning_rate": 0.0001589090418110626, "loss": 0.315, "step": 14068 }, { "epoch": 1.1397440051847052, "grad_norm": 0.03581754118204117, "learning_rate": 0.00015890454115846797, "loss": 0.2829, "step": 14069 }, { "epoch": 1.1398250162022034, "grad_norm": 0.03504345193505287, "learning_rate": 0.00015890004050587335, "loss": 0.2922, "step": 14070 }, { "epoch": 1.139906027219702, "grad_norm": 0.03751121088862419, "learning_rate": 0.00015889553985327874, "loss": 0.2826, "step": 14071 }, { "epoch": 1.1399870382372002, "grad_norm": 0.03658273443579674, "learning_rate": 0.0001588910392006841, "loss": 0.3045, "step": 14072 }, { "epoch": 1.1400680492546986, "grad_norm": 0.04107685387134552, "learning_rate": 0.0001588865385480895, "loss": 0.3275, "step": 14073 }, { "epoch": 1.140149060272197, "grad_norm": 0.04228817671537399, "learning_rate": 0.00015888203789549485, "loss": 0.2982, "step": 14074 }, { "epoch": 1.1402300712896953, "grad_norm": 0.0487077496945858, "learning_rate": 0.0001588775372429002, "loss": 0.3327, "step": 14075 }, { "epoch": 1.1403110823071938, "grad_norm": 0.04449397325515747, "learning_rate": 0.00015887303659030562, "loss": 0.3315, "step": 14076 }, { "epoch": 1.1403920933246923, "grad_norm": 0.038302674889564514, "learning_rate": 0.00015886853593771098, "loss": 0.2889, "step": 14077 }, { "epoch": 1.1404731043421905, "grad_norm": 0.039194364100694656, "learning_rate": 0.00015886403528511634, "loss": 0.2758, "step": 14078 }, { "epoch": 1.140554115359689, "grad_norm": 0.04212159290909767, "learning_rate": 0.00015885953463252173, "loss": 0.3066, "step": 14079 }, { "epoch": 1.1406351263771872, "grad_norm": 0.045051414519548416, "learning_rate": 0.0001588550339799271, "loss": 0.2842, "step": 14080 }, { "epoch": 1.1407161373946857, "grad_norm": 0.042434271425008774, "learning_rate": 0.00015885053332733245, "loss": 0.3434, "step": 14081 }, { "epoch": 1.140797148412184, "grad_norm": 0.05168015509843826, "learning_rate": 0.00015884603267473786, "loss": 0.3231, "step": 14082 }, { "epoch": 1.1408781594296824, "grad_norm": 0.04804101586341858, "learning_rate": 0.00015884153202214322, "loss": 0.3347, "step": 14083 }, { "epoch": 1.1409591704471809, "grad_norm": 0.042534634470939636, "learning_rate": 0.00015883703136954858, "loss": 0.3204, "step": 14084 }, { "epoch": 1.1410401814646791, "grad_norm": 0.04687447473406792, "learning_rate": 0.00015883253071695397, "loss": 0.3502, "step": 14085 }, { "epoch": 1.1411211924821776, "grad_norm": 0.04413938149809837, "learning_rate": 0.00015882803006435933, "loss": 0.302, "step": 14086 }, { "epoch": 1.141202203499676, "grad_norm": 0.046513497829437256, "learning_rate": 0.0001588235294117647, "loss": 0.3513, "step": 14087 }, { "epoch": 1.1412832145171743, "grad_norm": 0.04094574972987175, "learning_rate": 0.0001588190287591701, "loss": 0.3386, "step": 14088 }, { "epoch": 1.1413642255346728, "grad_norm": 0.04302053898572922, "learning_rate": 0.00015881452810657547, "loss": 0.3285, "step": 14089 }, { "epoch": 1.141445236552171, "grad_norm": 0.039479270577430725, "learning_rate": 0.00015881002745398083, "loss": 0.309, "step": 14090 }, { "epoch": 1.1415262475696695, "grad_norm": 0.04067131131887436, "learning_rate": 0.0001588055268013862, "loss": 0.3072, "step": 14091 }, { "epoch": 1.141607258587168, "grad_norm": 0.038594089448451996, "learning_rate": 0.00015880102614879157, "loss": 0.3116, "step": 14092 }, { "epoch": 1.1416882696046662, "grad_norm": 0.035079196095466614, "learning_rate": 0.00015879652549619693, "loss": 0.3006, "step": 14093 }, { "epoch": 1.1417692806221647, "grad_norm": 0.03509014472365379, "learning_rate": 0.00015879202484360235, "loss": 0.2823, "step": 14094 }, { "epoch": 1.141850291639663, "grad_norm": 0.04012055695056915, "learning_rate": 0.0001587875241910077, "loss": 0.2946, "step": 14095 }, { "epoch": 1.1419313026571614, "grad_norm": 0.04498868063092232, "learning_rate": 0.00015878302353841307, "loss": 0.3341, "step": 14096 }, { "epoch": 1.1420123136746598, "grad_norm": 0.04122866317629814, "learning_rate": 0.00015877852288581846, "loss": 0.3181, "step": 14097 }, { "epoch": 1.142093324692158, "grad_norm": 0.04573645442724228, "learning_rate": 0.00015877402223322382, "loss": 0.3106, "step": 14098 }, { "epoch": 1.1421743357096565, "grad_norm": 0.03992963582277298, "learning_rate": 0.00015876952158062918, "loss": 0.3174, "step": 14099 }, { "epoch": 1.142255346727155, "grad_norm": 0.03769395500421524, "learning_rate": 0.0001587650209280346, "loss": 0.2803, "step": 14100 }, { "epoch": 1.1423363577446533, "grad_norm": 0.037454042583703995, "learning_rate": 0.00015876052027543995, "loss": 0.337, "step": 14101 }, { "epoch": 1.1424173687621517, "grad_norm": 0.035413019359111786, "learning_rate": 0.0001587560196228453, "loss": 0.2735, "step": 14102 }, { "epoch": 1.14249837977965, "grad_norm": 0.04085841029882431, "learning_rate": 0.0001587515189702507, "loss": 0.2882, "step": 14103 }, { "epoch": 1.1425793907971484, "grad_norm": 0.03542950376868248, "learning_rate": 0.00015874701831765606, "loss": 0.3079, "step": 14104 }, { "epoch": 1.1426604018146467, "grad_norm": 0.0359371043741703, "learning_rate": 0.00015874251766506144, "loss": 0.313, "step": 14105 }, { "epoch": 1.1427414128321451, "grad_norm": 0.04420680180191994, "learning_rate": 0.00015873801701246683, "loss": 0.3043, "step": 14106 }, { "epoch": 1.1428224238496436, "grad_norm": 0.044378090649843216, "learning_rate": 0.0001587335163598722, "loss": 0.3589, "step": 14107 }, { "epoch": 1.1429034348671419, "grad_norm": 0.0452517494559288, "learning_rate": 0.00015872901570727755, "loss": 0.3643, "step": 14108 }, { "epoch": 1.1429844458846403, "grad_norm": 0.042304959148168564, "learning_rate": 0.00015872451505468294, "loss": 0.3275, "step": 14109 }, { "epoch": 1.1430654569021388, "grad_norm": 0.04190251603722572, "learning_rate": 0.0001587200144020883, "loss": 0.3102, "step": 14110 }, { "epoch": 1.143146467919637, "grad_norm": 0.036500733345746994, "learning_rate": 0.00015871551374949369, "loss": 0.3007, "step": 14111 }, { "epoch": 1.1432274789371355, "grad_norm": 0.036681219935417175, "learning_rate": 0.00015871101309689907, "loss": 0.2906, "step": 14112 }, { "epoch": 1.1433084899546337, "grad_norm": 0.049908604472875595, "learning_rate": 0.00015870651244430443, "loss": 0.3585, "step": 14113 }, { "epoch": 1.1433895009721322, "grad_norm": 0.041187454015016556, "learning_rate": 0.0001587020117917098, "loss": 0.3177, "step": 14114 }, { "epoch": 1.1434705119896307, "grad_norm": 0.03634697571396828, "learning_rate": 0.00015869751113911518, "loss": 0.2874, "step": 14115 }, { "epoch": 1.143551523007129, "grad_norm": 0.045490801334381104, "learning_rate": 0.00015869301048652054, "loss": 0.3447, "step": 14116 }, { "epoch": 1.1436325340246274, "grad_norm": 0.04303700476884842, "learning_rate": 0.00015868850983392593, "loss": 0.3525, "step": 14117 }, { "epoch": 1.1437135450421256, "grad_norm": 0.03868240490555763, "learning_rate": 0.00015868400918133131, "loss": 0.3193, "step": 14118 }, { "epoch": 1.143794556059624, "grad_norm": 0.04534424841403961, "learning_rate": 0.00015867950852873667, "loss": 0.3478, "step": 14119 }, { "epoch": 1.1438755670771226, "grad_norm": 0.039549026638269424, "learning_rate": 0.00015867500787614203, "loss": 0.2657, "step": 14120 }, { "epoch": 1.1439565780946208, "grad_norm": 0.04135221242904663, "learning_rate": 0.00015867050722354742, "loss": 0.3023, "step": 14121 }, { "epoch": 1.1440375891121193, "grad_norm": 0.0366831049323082, "learning_rate": 0.00015866600657095278, "loss": 0.2896, "step": 14122 }, { "epoch": 1.1441186001296177, "grad_norm": 0.044443391263484955, "learning_rate": 0.00015866150591835817, "loss": 0.316, "step": 14123 }, { "epoch": 1.144199611147116, "grad_norm": 0.035869430750608444, "learning_rate": 0.00015865700526576356, "loss": 0.2828, "step": 14124 }, { "epoch": 1.1442806221646145, "grad_norm": 0.039250753819942474, "learning_rate": 0.00015865250461316892, "loss": 0.299, "step": 14125 }, { "epoch": 1.1443616331821127, "grad_norm": 0.036866240203380585, "learning_rate": 0.00015864800396057428, "loss": 0.2866, "step": 14126 }, { "epoch": 1.1444426441996112, "grad_norm": 0.0382998064160347, "learning_rate": 0.00015864350330797966, "loss": 0.2939, "step": 14127 }, { "epoch": 1.1445236552171094, "grad_norm": 0.04512723162770271, "learning_rate": 0.00015863900265538505, "loss": 0.3071, "step": 14128 }, { "epoch": 1.1446046662346079, "grad_norm": 0.0422217883169651, "learning_rate": 0.0001586345020027904, "loss": 0.282, "step": 14129 }, { "epoch": 1.1446856772521063, "grad_norm": 0.03788993880152702, "learning_rate": 0.0001586300013501958, "loss": 0.2789, "step": 14130 }, { "epoch": 1.1447666882696046, "grad_norm": 0.03889959305524826, "learning_rate": 0.00015862550069760116, "loss": 0.2998, "step": 14131 }, { "epoch": 1.144847699287103, "grad_norm": 0.03628894314169884, "learning_rate": 0.00015862100004500652, "loss": 0.3067, "step": 14132 }, { "epoch": 1.1449287103046015, "grad_norm": 0.03689340874552727, "learning_rate": 0.0001586164993924119, "loss": 0.2959, "step": 14133 }, { "epoch": 1.1450097213220998, "grad_norm": 0.04336564615368843, "learning_rate": 0.0001586119987398173, "loss": 0.283, "step": 14134 }, { "epoch": 1.1450907323395982, "grad_norm": 0.045776378363370895, "learning_rate": 0.00015860749808722265, "loss": 0.3326, "step": 14135 }, { "epoch": 1.1451717433570965, "grad_norm": 0.03566877543926239, "learning_rate": 0.00015860299743462804, "loss": 0.2991, "step": 14136 }, { "epoch": 1.145252754374595, "grad_norm": 0.04115091264247894, "learning_rate": 0.0001585984967820334, "loss": 0.3198, "step": 14137 }, { "epoch": 1.1453337653920934, "grad_norm": 0.04076642915606499, "learning_rate": 0.00015859399612943876, "loss": 0.3067, "step": 14138 }, { "epoch": 1.1454147764095917, "grad_norm": 0.041703108698129654, "learning_rate": 0.00015858949547684415, "loss": 0.3291, "step": 14139 }, { "epoch": 1.1454957874270901, "grad_norm": 0.037575382739305496, "learning_rate": 0.00015858499482424953, "loss": 0.2899, "step": 14140 }, { "epoch": 1.1455767984445884, "grad_norm": 0.04002415016293526, "learning_rate": 0.0001585804941716549, "loss": 0.3049, "step": 14141 }, { "epoch": 1.1456578094620868, "grad_norm": 0.040226858109235764, "learning_rate": 0.00015857599351906028, "loss": 0.3073, "step": 14142 }, { "epoch": 1.1457388204795853, "grad_norm": 0.03888882324099541, "learning_rate": 0.00015857149286646564, "loss": 0.3226, "step": 14143 }, { "epoch": 1.1458198314970836, "grad_norm": 0.03859817981719971, "learning_rate": 0.000158566992213871, "loss": 0.3056, "step": 14144 }, { "epoch": 1.145900842514582, "grad_norm": 0.04122031852602959, "learning_rate": 0.0001585624915612764, "loss": 0.3412, "step": 14145 }, { "epoch": 1.1459818535320805, "grad_norm": 0.033064428716897964, "learning_rate": 0.00015855799090868178, "loss": 0.2723, "step": 14146 }, { "epoch": 1.1460628645495787, "grad_norm": 0.04350362345576286, "learning_rate": 0.00015855349025608714, "loss": 0.3383, "step": 14147 }, { "epoch": 1.1461438755670772, "grad_norm": 0.03967111557722092, "learning_rate": 0.00015854898960349252, "loss": 0.3021, "step": 14148 }, { "epoch": 1.1462248865845754, "grad_norm": 0.03711658716201782, "learning_rate": 0.00015854448895089788, "loss": 0.3245, "step": 14149 }, { "epoch": 1.146305897602074, "grad_norm": 0.03623461723327637, "learning_rate": 0.00015853998829830324, "loss": 0.296, "step": 14150 }, { "epoch": 1.1463869086195722, "grad_norm": 0.04315349832177162, "learning_rate": 0.00015853548764570863, "loss": 0.272, "step": 14151 }, { "epoch": 1.1464679196370706, "grad_norm": 0.038551878184080124, "learning_rate": 0.00015853098699311402, "loss": 0.3242, "step": 14152 }, { "epoch": 1.146548930654569, "grad_norm": 0.0382794514298439, "learning_rate": 0.00015852648634051938, "loss": 0.3034, "step": 14153 }, { "epoch": 1.1466299416720673, "grad_norm": 0.03740396350622177, "learning_rate": 0.00015852198568792476, "loss": 0.2885, "step": 14154 }, { "epoch": 1.1467109526895658, "grad_norm": 0.046128056943416595, "learning_rate": 0.00015851748503533012, "loss": 0.3699, "step": 14155 }, { "epoch": 1.1467919637070643, "grad_norm": 0.040248434990644455, "learning_rate": 0.00015851298438273548, "loss": 0.2957, "step": 14156 }, { "epoch": 1.1468729747245625, "grad_norm": 0.047767698764801025, "learning_rate": 0.0001585084837301409, "loss": 0.3637, "step": 14157 }, { "epoch": 1.146953985742061, "grad_norm": 0.04790830239653587, "learning_rate": 0.00015850398307754626, "loss": 0.3506, "step": 14158 }, { "epoch": 1.1470349967595592, "grad_norm": 0.03813810646533966, "learning_rate": 0.00015849948242495162, "loss": 0.2931, "step": 14159 }, { "epoch": 1.1471160077770577, "grad_norm": 0.0405675433576107, "learning_rate": 0.000158494981772357, "loss": 0.3258, "step": 14160 }, { "epoch": 1.1471970187945562, "grad_norm": 0.03747477009892464, "learning_rate": 0.00015849048111976237, "loss": 0.2754, "step": 14161 }, { "epoch": 1.1472780298120544, "grad_norm": 0.04514666646718979, "learning_rate": 0.00015848598046716773, "loss": 0.3132, "step": 14162 }, { "epoch": 1.1473590408295529, "grad_norm": 0.03907744958996773, "learning_rate": 0.00015848147981457314, "loss": 0.3237, "step": 14163 }, { "epoch": 1.1474400518470511, "grad_norm": 0.044495511800050735, "learning_rate": 0.0001584769791619785, "loss": 0.3362, "step": 14164 }, { "epoch": 1.1475210628645496, "grad_norm": 0.04200047254562378, "learning_rate": 0.00015847247850938386, "loss": 0.349, "step": 14165 }, { "epoch": 1.147602073882048, "grad_norm": 0.03570564463734627, "learning_rate": 0.00015846797785678925, "loss": 0.2715, "step": 14166 }, { "epoch": 1.1476830848995463, "grad_norm": 0.03856359422206879, "learning_rate": 0.0001584634772041946, "loss": 0.3037, "step": 14167 }, { "epoch": 1.1477640959170448, "grad_norm": 0.03832743689417839, "learning_rate": 0.00015845897655159997, "loss": 0.3179, "step": 14168 }, { "epoch": 1.147845106934543, "grad_norm": 0.044524624943733215, "learning_rate": 0.00015845447589900538, "loss": 0.3458, "step": 14169 }, { "epoch": 1.1479261179520415, "grad_norm": 0.03662329539656639, "learning_rate": 0.00015844997524641074, "loss": 0.2949, "step": 14170 }, { "epoch": 1.14800712896954, "grad_norm": 0.04072437062859535, "learning_rate": 0.0001584454745938161, "loss": 0.2882, "step": 14171 }, { "epoch": 1.1480881399870382, "grad_norm": 0.03684643656015396, "learning_rate": 0.0001584409739412215, "loss": 0.2946, "step": 14172 }, { "epoch": 1.1481691510045366, "grad_norm": 0.03623619303107262, "learning_rate": 0.00015843647328862685, "loss": 0.2823, "step": 14173 }, { "epoch": 1.148250162022035, "grad_norm": 0.04452592134475708, "learning_rate": 0.0001584319726360322, "loss": 0.3427, "step": 14174 }, { "epoch": 1.1483311730395334, "grad_norm": 0.035655438899993896, "learning_rate": 0.00015842747198343762, "loss": 0.2673, "step": 14175 }, { "epoch": 1.1484121840570318, "grad_norm": 0.032316237688064575, "learning_rate": 0.00015842297133084298, "loss": 0.2778, "step": 14176 }, { "epoch": 1.14849319507453, "grad_norm": 0.035419922322034836, "learning_rate": 0.00015841847067824834, "loss": 0.2521, "step": 14177 }, { "epoch": 1.1485742060920285, "grad_norm": 0.03664263337850571, "learning_rate": 0.00015841397002565373, "loss": 0.3023, "step": 14178 }, { "epoch": 1.148655217109527, "grad_norm": 0.04097932577133179, "learning_rate": 0.0001584094693730591, "loss": 0.3242, "step": 14179 }, { "epoch": 1.1487362281270252, "grad_norm": 0.04287213459610939, "learning_rate": 0.00015840496872046445, "loss": 0.3638, "step": 14180 }, { "epoch": 1.1488172391445237, "grad_norm": 0.03930720314383507, "learning_rate": 0.00015840046806786986, "loss": 0.3225, "step": 14181 }, { "epoch": 1.148898250162022, "grad_norm": 0.037118248641490936, "learning_rate": 0.00015839596741527522, "loss": 0.2974, "step": 14182 }, { "epoch": 1.1489792611795204, "grad_norm": 0.04094469174742699, "learning_rate": 0.00015839146676268058, "loss": 0.3339, "step": 14183 }, { "epoch": 1.1490602721970187, "grad_norm": 0.03562622144818306, "learning_rate": 0.00015838696611008597, "loss": 0.2681, "step": 14184 }, { "epoch": 1.1491412832145171, "grad_norm": 0.03689657896757126, "learning_rate": 0.00015838246545749133, "loss": 0.2941, "step": 14185 }, { "epoch": 1.1492222942320156, "grad_norm": 0.04406815394759178, "learning_rate": 0.00015837796480489672, "loss": 0.3157, "step": 14186 }, { "epoch": 1.1493033052495139, "grad_norm": 0.03726741299033165, "learning_rate": 0.0001583734641523021, "loss": 0.3033, "step": 14187 }, { "epoch": 1.1493843162670123, "grad_norm": 0.040153663605451584, "learning_rate": 0.00015836896349970747, "loss": 0.2765, "step": 14188 }, { "epoch": 1.1494653272845108, "grad_norm": 0.04548710212111473, "learning_rate": 0.00015836446284711283, "loss": 0.3308, "step": 14189 }, { "epoch": 1.149546338302009, "grad_norm": 0.03714310750365257, "learning_rate": 0.00015835996219451821, "loss": 0.3235, "step": 14190 }, { "epoch": 1.1496273493195075, "grad_norm": 0.04844687879085541, "learning_rate": 0.00015835546154192357, "loss": 0.3682, "step": 14191 }, { "epoch": 1.1497083603370057, "grad_norm": 0.04365730285644531, "learning_rate": 0.00015835096088932896, "loss": 0.3371, "step": 14192 }, { "epoch": 1.1497893713545042, "grad_norm": 0.040620166808366776, "learning_rate": 0.00015834646023673435, "loss": 0.3111, "step": 14193 }, { "epoch": 1.1498703823720027, "grad_norm": 0.04068024456501007, "learning_rate": 0.0001583419595841397, "loss": 0.2997, "step": 14194 }, { "epoch": 1.149951393389501, "grad_norm": 0.04177763685584068, "learning_rate": 0.00015833745893154507, "loss": 0.2964, "step": 14195 }, { "epoch": 1.1500324044069994, "grad_norm": 0.05007220804691315, "learning_rate": 0.00015833295827895046, "loss": 0.3481, "step": 14196 }, { "epoch": 1.1501134154244976, "grad_norm": 0.04170903190970421, "learning_rate": 0.00015832845762635582, "loss": 0.3339, "step": 14197 }, { "epoch": 1.150194426441996, "grad_norm": 0.03846409544348717, "learning_rate": 0.0001583239569737612, "loss": 0.3054, "step": 14198 }, { "epoch": 1.1502754374594946, "grad_norm": 0.04317399486899376, "learning_rate": 0.0001583194563211666, "loss": 0.3124, "step": 14199 }, { "epoch": 1.1503564484769928, "grad_norm": 0.03954905644059181, "learning_rate": 0.00015831495566857195, "loss": 0.306, "step": 14200 }, { "epoch": 1.1504374594944913, "grad_norm": 0.04700899496674538, "learning_rate": 0.0001583104550159773, "loss": 0.3149, "step": 14201 }, { "epoch": 1.1505184705119897, "grad_norm": 0.04225290194153786, "learning_rate": 0.0001583059543633827, "loss": 0.2912, "step": 14202 }, { "epoch": 1.150599481529488, "grad_norm": 0.039747338742017746, "learning_rate": 0.00015830145371078806, "loss": 0.3522, "step": 14203 }, { "epoch": 1.1506804925469865, "grad_norm": 0.031397707760334015, "learning_rate": 0.00015829695305819344, "loss": 0.2765, "step": 14204 }, { "epoch": 1.1507615035644847, "grad_norm": 0.04136206582188606, "learning_rate": 0.00015829245240559883, "loss": 0.3057, "step": 14205 }, { "epoch": 1.1508425145819832, "grad_norm": 0.03835195675492287, "learning_rate": 0.0001582879517530042, "loss": 0.2947, "step": 14206 }, { "epoch": 1.1509235255994814, "grad_norm": 0.042692024260759354, "learning_rate": 0.00015828345110040955, "loss": 0.3265, "step": 14207 }, { "epoch": 1.1510045366169799, "grad_norm": 0.03989066928625107, "learning_rate": 0.00015827895044781494, "loss": 0.3259, "step": 14208 }, { "epoch": 1.1510855476344783, "grad_norm": 0.036856938153505325, "learning_rate": 0.00015827444979522033, "loss": 0.3095, "step": 14209 }, { "epoch": 1.1511665586519766, "grad_norm": 0.03925073519349098, "learning_rate": 0.00015826994914262569, "loss": 0.318, "step": 14210 }, { "epoch": 1.151247569669475, "grad_norm": 0.03539036586880684, "learning_rate": 0.00015826544849003107, "loss": 0.3117, "step": 14211 }, { "epoch": 1.1513285806869735, "grad_norm": 0.036580659449100494, "learning_rate": 0.00015826094783743643, "loss": 0.2845, "step": 14212 }, { "epoch": 1.1514095917044718, "grad_norm": 0.047995951026678085, "learning_rate": 0.0001582564471848418, "loss": 0.3726, "step": 14213 }, { "epoch": 1.1514906027219702, "grad_norm": 0.03786792606115341, "learning_rate": 0.00015825194653224718, "loss": 0.2974, "step": 14214 }, { "epoch": 1.1515716137394685, "grad_norm": 0.04033001884818077, "learning_rate": 0.00015824744587965257, "loss": 0.3026, "step": 14215 }, { "epoch": 1.151652624756967, "grad_norm": 0.035620976239442825, "learning_rate": 0.00015824294522705793, "loss": 0.3363, "step": 14216 }, { "epoch": 1.1517336357744654, "grad_norm": 0.041697435081005096, "learning_rate": 0.00015823844457446331, "loss": 0.3125, "step": 14217 }, { "epoch": 1.1518146467919637, "grad_norm": 0.044587064534425735, "learning_rate": 0.00015823394392186867, "loss": 0.4001, "step": 14218 }, { "epoch": 1.1518956578094621, "grad_norm": 0.03764927387237549, "learning_rate": 0.00015822944326927403, "loss": 0.2945, "step": 14219 }, { "epoch": 1.1519766688269604, "grad_norm": 0.041600629687309265, "learning_rate": 0.00015822494261667942, "loss": 0.3169, "step": 14220 }, { "epoch": 1.1520576798444588, "grad_norm": 0.040279630571603775, "learning_rate": 0.0001582204419640848, "loss": 0.3132, "step": 14221 }, { "epoch": 1.1521386908619573, "grad_norm": 0.043576162308454514, "learning_rate": 0.00015821594131149017, "loss": 0.3182, "step": 14222 }, { "epoch": 1.1522197018794555, "grad_norm": 0.04142558574676514, "learning_rate": 0.00015821144065889556, "loss": 0.2998, "step": 14223 }, { "epoch": 1.152300712896954, "grad_norm": 0.039318881928920746, "learning_rate": 0.00015820694000630092, "loss": 0.314, "step": 14224 }, { "epoch": 1.1523817239144525, "grad_norm": 0.03945578262209892, "learning_rate": 0.00015820243935370628, "loss": 0.2977, "step": 14225 }, { "epoch": 1.1524627349319507, "grad_norm": 0.0380149707198143, "learning_rate": 0.00015819793870111166, "loss": 0.29, "step": 14226 }, { "epoch": 1.1525437459494492, "grad_norm": 0.03710747882723808, "learning_rate": 0.00015819343804851705, "loss": 0.2713, "step": 14227 }, { "epoch": 1.1526247569669474, "grad_norm": 0.04076594486832619, "learning_rate": 0.0001581889373959224, "loss": 0.305, "step": 14228 }, { "epoch": 1.152705767984446, "grad_norm": 0.03769194334745407, "learning_rate": 0.0001581844367433278, "loss": 0.2675, "step": 14229 }, { "epoch": 1.1527867790019442, "grad_norm": 0.038101229816675186, "learning_rate": 0.00015817993609073316, "loss": 0.2922, "step": 14230 }, { "epoch": 1.1528677900194426, "grad_norm": 0.04154670238494873, "learning_rate": 0.00015817543543813852, "loss": 0.3006, "step": 14231 }, { "epoch": 1.152948801036941, "grad_norm": 0.04486631602048874, "learning_rate": 0.0001581709347855439, "loss": 0.3604, "step": 14232 }, { "epoch": 1.1530298120544393, "grad_norm": 0.04321269318461418, "learning_rate": 0.0001581664341329493, "loss": 0.3726, "step": 14233 }, { "epoch": 1.1531108230719378, "grad_norm": 0.039845582097768784, "learning_rate": 0.00015816193348035465, "loss": 0.2857, "step": 14234 }, { "epoch": 1.1531918340894363, "grad_norm": 0.03998946771025658, "learning_rate": 0.00015815743282776004, "loss": 0.2798, "step": 14235 }, { "epoch": 1.1532728451069345, "grad_norm": 0.03772175684571266, "learning_rate": 0.0001581529321751654, "loss": 0.2998, "step": 14236 }, { "epoch": 1.153353856124433, "grad_norm": 0.038299474865198135, "learning_rate": 0.00015814843152257076, "loss": 0.3004, "step": 14237 }, { "epoch": 1.1534348671419312, "grad_norm": 0.035598307847976685, "learning_rate": 0.00015814393086997617, "loss": 0.2991, "step": 14238 }, { "epoch": 1.1535158781594297, "grad_norm": 0.04178384318947792, "learning_rate": 0.00015813943021738153, "loss": 0.3, "step": 14239 }, { "epoch": 1.1535968891769282, "grad_norm": 0.04785295948386192, "learning_rate": 0.0001581349295647869, "loss": 0.319, "step": 14240 }, { "epoch": 1.1536779001944264, "grad_norm": 0.038838669657707214, "learning_rate": 0.00015813042891219228, "loss": 0.2968, "step": 14241 }, { "epoch": 1.1537589112119249, "grad_norm": 0.034681666642427444, "learning_rate": 0.00015812592825959764, "loss": 0.294, "step": 14242 }, { "epoch": 1.153839922229423, "grad_norm": 0.03547977656126022, "learning_rate": 0.000158121427607003, "loss": 0.2661, "step": 14243 }, { "epoch": 1.1539209332469216, "grad_norm": 0.049016062170267105, "learning_rate": 0.00015811692695440842, "loss": 0.3183, "step": 14244 }, { "epoch": 1.15400194426442, "grad_norm": 0.04250559210777283, "learning_rate": 0.00015811242630181378, "loss": 0.2893, "step": 14245 }, { "epoch": 1.1540829552819183, "grad_norm": 0.03848905861377716, "learning_rate": 0.00015810792564921914, "loss": 0.3089, "step": 14246 }, { "epoch": 1.1541639662994168, "grad_norm": 0.033761803060770035, "learning_rate": 0.00015810342499662452, "loss": 0.2733, "step": 14247 }, { "epoch": 1.1542449773169152, "grad_norm": 0.039992865175008774, "learning_rate": 0.00015809892434402988, "loss": 0.3329, "step": 14248 }, { "epoch": 1.1543259883344135, "grad_norm": 0.04326672852039337, "learning_rate": 0.00015809442369143524, "loss": 0.3033, "step": 14249 }, { "epoch": 1.154406999351912, "grad_norm": 0.04974553734064102, "learning_rate": 0.00015808992303884066, "loss": 0.3068, "step": 14250 }, { "epoch": 1.1544880103694102, "grad_norm": 0.03991048410534859, "learning_rate": 0.00015808542238624602, "loss": 0.328, "step": 14251 }, { "epoch": 1.1545690213869086, "grad_norm": 0.04298831522464752, "learning_rate": 0.00015808092173365138, "loss": 0.2872, "step": 14252 }, { "epoch": 1.154650032404407, "grad_norm": 0.0435427762567997, "learning_rate": 0.00015807642108105676, "loss": 0.336, "step": 14253 }, { "epoch": 1.1547310434219054, "grad_norm": 0.043134018778800964, "learning_rate": 0.00015807192042846212, "loss": 0.2864, "step": 14254 }, { "epoch": 1.1548120544394038, "grad_norm": 0.04340551421046257, "learning_rate": 0.00015806741977586748, "loss": 0.339, "step": 14255 }, { "epoch": 1.154893065456902, "grad_norm": 0.0379253514111042, "learning_rate": 0.0001580629191232729, "loss": 0.3031, "step": 14256 }, { "epoch": 1.1549740764744005, "grad_norm": 0.0484885573387146, "learning_rate": 0.00015805841847067826, "loss": 0.3403, "step": 14257 }, { "epoch": 1.155055087491899, "grad_norm": 0.04308895766735077, "learning_rate": 0.00015805391781808362, "loss": 0.3047, "step": 14258 }, { "epoch": 1.1551360985093972, "grad_norm": 0.04077281430363655, "learning_rate": 0.000158049417165489, "loss": 0.3278, "step": 14259 }, { "epoch": 1.1552171095268957, "grad_norm": 0.038421981036663055, "learning_rate": 0.00015804491651289437, "loss": 0.3179, "step": 14260 }, { "epoch": 1.155298120544394, "grad_norm": 0.036804962903261185, "learning_rate": 0.00015804041586029975, "loss": 0.2898, "step": 14261 }, { "epoch": 1.1553791315618924, "grad_norm": 0.04037683829665184, "learning_rate": 0.00015803591520770514, "loss": 0.3247, "step": 14262 }, { "epoch": 1.155460142579391, "grad_norm": 0.04343143478035927, "learning_rate": 0.0001580314145551105, "loss": 0.3185, "step": 14263 }, { "epoch": 1.1555411535968891, "grad_norm": 0.03817358240485191, "learning_rate": 0.00015802691390251586, "loss": 0.2801, "step": 14264 }, { "epoch": 1.1556221646143876, "grad_norm": 0.0375547930598259, "learning_rate": 0.00015802241324992125, "loss": 0.3564, "step": 14265 }, { "epoch": 1.1557031756318858, "grad_norm": 0.0396089144051075, "learning_rate": 0.0001580179125973266, "loss": 0.307, "step": 14266 }, { "epoch": 1.1557841866493843, "grad_norm": 0.039912160485982895, "learning_rate": 0.000158013411944732, "loss": 0.3099, "step": 14267 }, { "epoch": 1.1558651976668828, "grad_norm": 0.039689574390649796, "learning_rate": 0.00015800891129213738, "loss": 0.2731, "step": 14268 }, { "epoch": 1.155946208684381, "grad_norm": 0.03810999542474747, "learning_rate": 0.00015800441063954274, "loss": 0.3546, "step": 14269 }, { "epoch": 1.1560272197018795, "grad_norm": 0.04423951730132103, "learning_rate": 0.0001579999099869481, "loss": 0.3234, "step": 14270 }, { "epoch": 1.1561082307193777, "grad_norm": 0.047452397644519806, "learning_rate": 0.0001579954093343535, "loss": 0.3106, "step": 14271 }, { "epoch": 1.1561892417368762, "grad_norm": 0.03986579552292824, "learning_rate": 0.00015799090868175885, "loss": 0.3345, "step": 14272 }, { "epoch": 1.1562702527543747, "grad_norm": 0.045032236725091934, "learning_rate": 0.00015798640802916424, "loss": 0.3599, "step": 14273 }, { "epoch": 1.156351263771873, "grad_norm": 0.04494628682732582, "learning_rate": 0.00015798190737656962, "loss": 0.3585, "step": 14274 }, { "epoch": 1.1564322747893714, "grad_norm": 0.035816531628370285, "learning_rate": 0.00015797740672397498, "loss": 0.3082, "step": 14275 }, { "epoch": 1.1565132858068696, "grad_norm": 0.04182044044137001, "learning_rate": 0.00015797290607138034, "loss": 0.3045, "step": 14276 }, { "epoch": 1.156594296824368, "grad_norm": 0.0457390621304512, "learning_rate": 0.00015796840541878573, "loss": 0.3367, "step": 14277 }, { "epoch": 1.1566753078418666, "grad_norm": 0.03820004686713219, "learning_rate": 0.0001579639047661911, "loss": 0.2954, "step": 14278 }, { "epoch": 1.1567563188593648, "grad_norm": 0.03487598896026611, "learning_rate": 0.00015795940411359648, "loss": 0.2639, "step": 14279 }, { "epoch": 1.1568373298768633, "grad_norm": 0.04177277535200119, "learning_rate": 0.00015795490346100187, "loss": 0.3396, "step": 14280 }, { "epoch": 1.1569183408943617, "grad_norm": 0.03737100213766098, "learning_rate": 0.00015795040280840723, "loss": 0.3039, "step": 14281 }, { "epoch": 1.15699935191186, "grad_norm": 0.03937702625989914, "learning_rate": 0.00015794590215581259, "loss": 0.3154, "step": 14282 }, { "epoch": 1.1570803629293585, "grad_norm": 0.04279126599431038, "learning_rate": 0.00015794140150321797, "loss": 0.3449, "step": 14283 }, { "epoch": 1.1571613739468567, "grad_norm": 0.04076807200908661, "learning_rate": 0.00015793690085062333, "loss": 0.3244, "step": 14284 }, { "epoch": 1.1572423849643552, "grad_norm": 0.04064446687698364, "learning_rate": 0.00015793240019802872, "loss": 0.2993, "step": 14285 }, { "epoch": 1.1573233959818534, "grad_norm": 0.03838472068309784, "learning_rate": 0.0001579278995454341, "loss": 0.2863, "step": 14286 }, { "epoch": 1.1574044069993519, "grad_norm": 0.04467492550611496, "learning_rate": 0.00015792339889283947, "loss": 0.3393, "step": 14287 }, { "epoch": 1.1574854180168503, "grad_norm": 0.042215894907712936, "learning_rate": 0.00015791889824024483, "loss": 0.2828, "step": 14288 }, { "epoch": 1.1575664290343486, "grad_norm": 0.048905014991760254, "learning_rate": 0.00015791439758765021, "loss": 0.3662, "step": 14289 }, { "epoch": 1.157647440051847, "grad_norm": 0.033709824085235596, "learning_rate": 0.0001579098969350556, "loss": 0.2772, "step": 14290 }, { "epoch": 1.1577284510693455, "grad_norm": 0.049707137048244476, "learning_rate": 0.00015790539628246096, "loss": 0.3643, "step": 14291 }, { "epoch": 1.1578094620868438, "grad_norm": 0.04020284488797188, "learning_rate": 0.00015790089562986635, "loss": 0.2992, "step": 14292 }, { "epoch": 1.1578904731043422, "grad_norm": 0.04444952309131622, "learning_rate": 0.0001578963949772717, "loss": 0.3155, "step": 14293 }, { "epoch": 1.1579714841218405, "grad_norm": 0.040825799107551575, "learning_rate": 0.00015789189432467707, "loss": 0.2988, "step": 14294 }, { "epoch": 1.158052495139339, "grad_norm": 0.040664974600076675, "learning_rate": 0.00015788739367208246, "loss": 0.3179, "step": 14295 }, { "epoch": 1.1581335061568374, "grad_norm": 0.03809911012649536, "learning_rate": 0.00015788289301948784, "loss": 0.3052, "step": 14296 }, { "epoch": 1.1582145171743357, "grad_norm": 0.03830299898982048, "learning_rate": 0.0001578783923668932, "loss": 0.2803, "step": 14297 }, { "epoch": 1.1582955281918341, "grad_norm": 0.04559274762868881, "learning_rate": 0.0001578738917142986, "loss": 0.2901, "step": 14298 }, { "epoch": 1.1583765392093324, "grad_norm": 0.03717794641852379, "learning_rate": 0.00015786939106170395, "loss": 0.3092, "step": 14299 }, { "epoch": 1.1584575502268308, "grad_norm": 0.03662274405360222, "learning_rate": 0.0001578648904091093, "loss": 0.2942, "step": 14300 }, { "epoch": 1.1585385612443293, "grad_norm": 0.045669075101614, "learning_rate": 0.0001578603897565147, "loss": 0.3138, "step": 14301 }, { "epoch": 1.1586195722618275, "grad_norm": 0.042246490716934204, "learning_rate": 0.00015785588910392008, "loss": 0.3654, "step": 14302 }, { "epoch": 1.158700583279326, "grad_norm": 0.04223867878317833, "learning_rate": 0.00015785138845132544, "loss": 0.3255, "step": 14303 }, { "epoch": 1.1587815942968245, "grad_norm": 0.036577608436346054, "learning_rate": 0.00015784688779873083, "loss": 0.2949, "step": 14304 }, { "epoch": 1.1588626053143227, "grad_norm": 0.04074166342616081, "learning_rate": 0.0001578423871461362, "loss": 0.3073, "step": 14305 }, { "epoch": 1.1589436163318212, "grad_norm": 0.03720313683152199, "learning_rate": 0.00015783788649354155, "loss": 0.2975, "step": 14306 }, { "epoch": 1.1590246273493194, "grad_norm": 0.043085984885692596, "learning_rate": 0.00015783338584094694, "loss": 0.3295, "step": 14307 }, { "epoch": 1.159105638366818, "grad_norm": 0.04072624817490578, "learning_rate": 0.00015782888518835233, "loss": 0.3283, "step": 14308 }, { "epoch": 1.1591866493843161, "grad_norm": 0.03722485899925232, "learning_rate": 0.00015782438453575769, "loss": 0.304, "step": 14309 }, { "epoch": 1.1592676604018146, "grad_norm": 0.036215007305145264, "learning_rate": 0.00015781988388316307, "loss": 0.3029, "step": 14310 }, { "epoch": 1.159348671419313, "grad_norm": 0.04115285351872444, "learning_rate": 0.00015781538323056843, "loss": 0.3074, "step": 14311 }, { "epoch": 1.1594296824368113, "grad_norm": 0.04612356796860695, "learning_rate": 0.0001578108825779738, "loss": 0.2944, "step": 14312 }, { "epoch": 1.1595106934543098, "grad_norm": 0.04167279973626137, "learning_rate": 0.0001578063819253792, "loss": 0.3255, "step": 14313 }, { "epoch": 1.1595917044718083, "grad_norm": 0.04015865549445152, "learning_rate": 0.00015780188127278457, "loss": 0.2837, "step": 14314 }, { "epoch": 1.1596727154893065, "grad_norm": 0.04215192049741745, "learning_rate": 0.00015779738062018993, "loss": 0.3126, "step": 14315 }, { "epoch": 1.159753726506805, "grad_norm": 0.04285452142357826, "learning_rate": 0.00015779287996759531, "loss": 0.3375, "step": 14316 }, { "epoch": 1.1598347375243032, "grad_norm": 0.03980492800474167, "learning_rate": 0.00015778837931500067, "loss": 0.2828, "step": 14317 }, { "epoch": 1.1599157485418017, "grad_norm": 0.03826780244708061, "learning_rate": 0.00015778387866240603, "loss": 0.2739, "step": 14318 }, { "epoch": 1.1599967595593002, "grad_norm": 0.038279950618743896, "learning_rate": 0.00015777937800981145, "loss": 0.2948, "step": 14319 }, { "epoch": 1.1600777705767984, "grad_norm": 0.037352073937654495, "learning_rate": 0.0001577748773572168, "loss": 0.2801, "step": 14320 }, { "epoch": 1.1601587815942969, "grad_norm": 0.03508422523736954, "learning_rate": 0.00015777037670462217, "loss": 0.2843, "step": 14321 }, { "epoch": 1.160239792611795, "grad_norm": 0.03702421486377716, "learning_rate": 0.00015776587605202756, "loss": 0.2824, "step": 14322 }, { "epoch": 1.1603208036292936, "grad_norm": 0.04090304300189018, "learning_rate": 0.00015776137539943292, "loss": 0.2995, "step": 14323 }, { "epoch": 1.160401814646792, "grad_norm": 0.049013279378414154, "learning_rate": 0.00015775687474683828, "loss": 0.2739, "step": 14324 }, { "epoch": 1.1604828256642903, "grad_norm": 0.034894946962594986, "learning_rate": 0.0001577523740942437, "loss": 0.2742, "step": 14325 }, { "epoch": 1.1605638366817888, "grad_norm": 0.04372987896203995, "learning_rate": 0.00015774787344164905, "loss": 0.3516, "step": 14326 }, { "epoch": 1.1606448476992872, "grad_norm": 0.04152096062898636, "learning_rate": 0.0001577433727890544, "loss": 0.3299, "step": 14327 }, { "epoch": 1.1607258587167855, "grad_norm": 0.032177865505218506, "learning_rate": 0.0001577388721364598, "loss": 0.252, "step": 14328 }, { "epoch": 1.160806869734284, "grad_norm": 0.043868500739336014, "learning_rate": 0.00015773437148386516, "loss": 0.325, "step": 14329 }, { "epoch": 1.1608878807517822, "grad_norm": 0.044172488152980804, "learning_rate": 0.00015772987083127055, "loss": 0.3035, "step": 14330 }, { "epoch": 1.1609688917692806, "grad_norm": 0.03395839408040047, "learning_rate": 0.00015772537017867593, "loss": 0.2612, "step": 14331 }, { "epoch": 1.1610499027867789, "grad_norm": 0.03549252077937126, "learning_rate": 0.0001577208695260813, "loss": 0.2882, "step": 14332 }, { "epoch": 1.1611309138042774, "grad_norm": 0.04983199015259743, "learning_rate": 0.00015771636887348665, "loss": 0.3429, "step": 14333 }, { "epoch": 1.1612119248217758, "grad_norm": 0.03520864620804787, "learning_rate": 0.00015771186822089204, "loss": 0.2724, "step": 14334 }, { "epoch": 1.161292935839274, "grad_norm": 0.04003923013806343, "learning_rate": 0.0001577073675682974, "loss": 0.3025, "step": 14335 }, { "epoch": 1.1613739468567725, "grad_norm": 0.037281379103660583, "learning_rate": 0.0001577028669157028, "loss": 0.3092, "step": 14336 }, { "epoch": 1.161454957874271, "grad_norm": 0.03576560318470001, "learning_rate": 0.00015769836626310817, "loss": 0.3162, "step": 14337 }, { "epoch": 1.1615359688917692, "grad_norm": 0.039813894778490067, "learning_rate": 0.00015769386561051353, "loss": 0.3403, "step": 14338 }, { "epoch": 1.1616169799092677, "grad_norm": 0.039217110723257065, "learning_rate": 0.0001576893649579189, "loss": 0.3053, "step": 14339 }, { "epoch": 1.161697990926766, "grad_norm": 0.04114118218421936, "learning_rate": 0.00015768486430532428, "loss": 0.2588, "step": 14340 }, { "epoch": 1.1617790019442644, "grad_norm": 0.04316568747162819, "learning_rate": 0.00015768036365272964, "loss": 0.336, "step": 14341 }, { "epoch": 1.161860012961763, "grad_norm": 0.03854662552475929, "learning_rate": 0.00015767586300013503, "loss": 0.2715, "step": 14342 }, { "epoch": 1.1619410239792611, "grad_norm": 0.041014164686203, "learning_rate": 0.00015767136234754042, "loss": 0.3493, "step": 14343 }, { "epoch": 1.1620220349967596, "grad_norm": 0.04309874773025513, "learning_rate": 0.00015766686169494578, "loss": 0.3256, "step": 14344 }, { "epoch": 1.1621030460142578, "grad_norm": 0.039331283420324326, "learning_rate": 0.00015766236104235114, "loss": 0.2745, "step": 14345 }, { "epoch": 1.1621840570317563, "grad_norm": 0.04590243846178055, "learning_rate": 0.00015765786038975652, "loss": 0.2849, "step": 14346 }, { "epoch": 1.1622650680492548, "grad_norm": 0.04023681581020355, "learning_rate": 0.00015765335973716188, "loss": 0.299, "step": 14347 }, { "epoch": 1.162346079066753, "grad_norm": 0.03895244002342224, "learning_rate": 0.00015764885908456727, "loss": 0.2907, "step": 14348 }, { "epoch": 1.1624270900842515, "grad_norm": 0.03700018674135208, "learning_rate": 0.00015764435843197266, "loss": 0.2937, "step": 14349 }, { "epoch": 1.16250810110175, "grad_norm": 0.038235098123550415, "learning_rate": 0.00015763985777937802, "loss": 0.2813, "step": 14350 }, { "epoch": 1.1625891121192482, "grad_norm": 0.04438399523496628, "learning_rate": 0.00015763535712678338, "loss": 0.329, "step": 14351 }, { "epoch": 1.1626701231367467, "grad_norm": 0.04124677553772926, "learning_rate": 0.00015763085647418876, "loss": 0.3366, "step": 14352 }, { "epoch": 1.162751134154245, "grad_norm": 0.04604765772819519, "learning_rate": 0.00015762635582159412, "loss": 0.3107, "step": 14353 }, { "epoch": 1.1628321451717434, "grad_norm": 0.0400199219584465, "learning_rate": 0.0001576218551689995, "loss": 0.2957, "step": 14354 }, { "epoch": 1.1629131561892416, "grad_norm": 0.03940681368112564, "learning_rate": 0.0001576173545164049, "loss": 0.3156, "step": 14355 }, { "epoch": 1.16299416720674, "grad_norm": 0.043107450008392334, "learning_rate": 0.00015761285386381026, "loss": 0.3307, "step": 14356 }, { "epoch": 1.1630751782242386, "grad_norm": 0.03884882107377052, "learning_rate": 0.00015760835321121562, "loss": 0.2736, "step": 14357 }, { "epoch": 1.1631561892417368, "grad_norm": 0.034172337502241135, "learning_rate": 0.000157603852558621, "loss": 0.2895, "step": 14358 }, { "epoch": 1.1632372002592353, "grad_norm": 0.041414301842451096, "learning_rate": 0.00015759935190602637, "loss": 0.3168, "step": 14359 }, { "epoch": 1.1633182112767337, "grad_norm": 0.03927793353796005, "learning_rate": 0.00015759485125343175, "loss": 0.303, "step": 14360 }, { "epoch": 1.163399222294232, "grad_norm": 0.04739841818809509, "learning_rate": 0.00015759035060083714, "loss": 0.3275, "step": 14361 }, { "epoch": 1.1634802333117304, "grad_norm": 0.0419585183262825, "learning_rate": 0.0001575858499482425, "loss": 0.3686, "step": 14362 }, { "epoch": 1.1635612443292287, "grad_norm": 0.042601704597473145, "learning_rate": 0.00015758134929564786, "loss": 0.3097, "step": 14363 }, { "epoch": 1.1636422553467272, "grad_norm": 0.0376935750246048, "learning_rate": 0.00015757684864305325, "loss": 0.3189, "step": 14364 }, { "epoch": 1.1637232663642256, "grad_norm": 0.03754288703203201, "learning_rate": 0.0001575723479904586, "loss": 0.283, "step": 14365 }, { "epoch": 1.1638042773817239, "grad_norm": 0.03639476001262665, "learning_rate": 0.000157567847337864, "loss": 0.2776, "step": 14366 }, { "epoch": 1.1638852883992223, "grad_norm": 0.04828599467873573, "learning_rate": 0.00015756334668526938, "loss": 0.3138, "step": 14367 }, { "epoch": 1.1639662994167206, "grad_norm": 0.04046766087412834, "learning_rate": 0.00015755884603267474, "loss": 0.305, "step": 14368 }, { "epoch": 1.164047310434219, "grad_norm": 0.03723064810037613, "learning_rate": 0.0001575543453800801, "loss": 0.2456, "step": 14369 }, { "epoch": 1.1641283214517175, "grad_norm": 0.03720134496688843, "learning_rate": 0.0001575498447274855, "loss": 0.2928, "step": 14370 }, { "epoch": 1.1642093324692158, "grad_norm": 0.03799651190638542, "learning_rate": 0.00015754534407489088, "loss": 0.2884, "step": 14371 }, { "epoch": 1.1642903434867142, "grad_norm": 0.04502912238240242, "learning_rate": 0.00015754084342229624, "loss": 0.3492, "step": 14372 }, { "epoch": 1.1643713545042127, "grad_norm": 0.046434637159109116, "learning_rate": 0.00015753634276970162, "loss": 0.3515, "step": 14373 }, { "epoch": 1.164452365521711, "grad_norm": 0.039329420775175095, "learning_rate": 0.00015753184211710698, "loss": 0.3143, "step": 14374 }, { "epoch": 1.1645333765392094, "grad_norm": 0.03723298758268356, "learning_rate": 0.00015752734146451234, "loss": 0.2921, "step": 14375 }, { "epoch": 1.1646143875567077, "grad_norm": 0.04591132700443268, "learning_rate": 0.00015752284081191773, "loss": 0.3352, "step": 14376 }, { "epoch": 1.1646953985742061, "grad_norm": 0.03952774778008461, "learning_rate": 0.00015751834015932312, "loss": 0.2851, "step": 14377 }, { "epoch": 1.1647764095917044, "grad_norm": 0.04761730879545212, "learning_rate": 0.00015751383950672848, "loss": 0.335, "step": 14378 }, { "epoch": 1.1648574206092028, "grad_norm": 0.041464634239673615, "learning_rate": 0.00015750933885413387, "loss": 0.3514, "step": 14379 }, { "epoch": 1.1649384316267013, "grad_norm": 0.03948846831917763, "learning_rate": 0.00015750483820153923, "loss": 0.3129, "step": 14380 }, { "epoch": 1.1650194426441995, "grad_norm": 0.03981523960828781, "learning_rate": 0.00015750033754894459, "loss": 0.3009, "step": 14381 }, { "epoch": 1.165100453661698, "grad_norm": 0.03930754214525223, "learning_rate": 0.00015749583689634997, "loss": 0.2811, "step": 14382 }, { "epoch": 1.1651814646791965, "grad_norm": 0.03473753482103348, "learning_rate": 0.00015749133624375536, "loss": 0.2982, "step": 14383 }, { "epoch": 1.1652624756966947, "grad_norm": 0.040980495512485504, "learning_rate": 0.00015748683559116072, "loss": 0.3249, "step": 14384 }, { "epoch": 1.1653434867141932, "grad_norm": 0.04522862285375595, "learning_rate": 0.0001574823349385661, "loss": 0.3463, "step": 14385 }, { "epoch": 1.1654244977316914, "grad_norm": 0.040367692708969116, "learning_rate": 0.00015747783428597147, "loss": 0.325, "step": 14386 }, { "epoch": 1.16550550874919, "grad_norm": 0.03818906843662262, "learning_rate": 0.00015747333363337683, "loss": 0.3203, "step": 14387 }, { "epoch": 1.1655865197666881, "grad_norm": 0.03720659017562866, "learning_rate": 0.00015746883298078221, "loss": 0.2866, "step": 14388 }, { "epoch": 1.1656675307841866, "grad_norm": 0.0384877473115921, "learning_rate": 0.0001574643323281876, "loss": 0.2803, "step": 14389 }, { "epoch": 1.165748541801685, "grad_norm": 0.04146861657500267, "learning_rate": 0.00015745983167559296, "loss": 0.3333, "step": 14390 }, { "epoch": 1.1658295528191833, "grad_norm": 0.04111800342798233, "learning_rate": 0.00015745533102299835, "loss": 0.2812, "step": 14391 }, { "epoch": 1.1659105638366818, "grad_norm": 0.04299687221646309, "learning_rate": 0.0001574508303704037, "loss": 0.382, "step": 14392 }, { "epoch": 1.1659915748541803, "grad_norm": 0.03818053752183914, "learning_rate": 0.0001574463297178091, "loss": 0.2905, "step": 14393 }, { "epoch": 1.1660725858716785, "grad_norm": 0.043594978749752045, "learning_rate": 0.00015744182906521448, "loss": 0.2981, "step": 14394 }, { "epoch": 1.166153596889177, "grad_norm": 0.04402077943086624, "learning_rate": 0.00015743732841261984, "loss": 0.2602, "step": 14395 }, { "epoch": 1.1662346079066752, "grad_norm": 0.03786975145339966, "learning_rate": 0.0001574328277600252, "loss": 0.2975, "step": 14396 }, { "epoch": 1.1663156189241737, "grad_norm": 0.04601101204752922, "learning_rate": 0.0001574283271074306, "loss": 0.3394, "step": 14397 }, { "epoch": 1.1663966299416721, "grad_norm": 0.04181542620062828, "learning_rate": 0.00015742382645483595, "loss": 0.3478, "step": 14398 }, { "epoch": 1.1664776409591704, "grad_norm": 0.05040718615055084, "learning_rate": 0.00015741932580224134, "loss": 0.313, "step": 14399 }, { "epoch": 1.1665586519766689, "grad_norm": 0.04258178547024727, "learning_rate": 0.00015741482514964672, "loss": 0.3442, "step": 14400 }, { "epoch": 1.166639662994167, "grad_norm": 0.038349322974681854, "learning_rate": 0.00015741032449705208, "loss": 0.3307, "step": 14401 }, { "epoch": 1.1667206740116656, "grad_norm": 0.04305305331945419, "learning_rate": 0.00015740582384445744, "loss": 0.3368, "step": 14402 }, { "epoch": 1.166801685029164, "grad_norm": 0.04231201857328415, "learning_rate": 0.00015740132319186283, "loss": 0.3361, "step": 14403 }, { "epoch": 1.1668826960466623, "grad_norm": 0.04161004349589348, "learning_rate": 0.0001573968225392682, "loss": 0.3189, "step": 14404 }, { "epoch": 1.1669637070641607, "grad_norm": 0.03625449910759926, "learning_rate": 0.00015739232188667358, "loss": 0.3041, "step": 14405 }, { "epoch": 1.1670447180816592, "grad_norm": 0.03723830729722977, "learning_rate": 0.00015738782123407897, "loss": 0.2839, "step": 14406 }, { "epoch": 1.1671257290991575, "grad_norm": 0.04593285173177719, "learning_rate": 0.00015738332058148433, "loss": 0.3139, "step": 14407 }, { "epoch": 1.167206740116656, "grad_norm": 0.044112276285886765, "learning_rate": 0.00015737881992888969, "loss": 0.3136, "step": 14408 }, { "epoch": 1.1672877511341542, "grad_norm": 0.04640216380357742, "learning_rate": 0.00015737431927629507, "loss": 0.3331, "step": 14409 }, { "epoch": 1.1673687621516526, "grad_norm": 0.03842916712164879, "learning_rate": 0.00015736981862370043, "loss": 0.3166, "step": 14410 }, { "epoch": 1.1674497731691509, "grad_norm": 0.03956562653183937, "learning_rate": 0.00015736531797110582, "loss": 0.3116, "step": 14411 }, { "epoch": 1.1675307841866494, "grad_norm": 0.03777385875582695, "learning_rate": 0.0001573608173185112, "loss": 0.2882, "step": 14412 }, { "epoch": 1.1676117952041478, "grad_norm": 0.03600339964032173, "learning_rate": 0.00015735631666591657, "loss": 0.2897, "step": 14413 }, { "epoch": 1.167692806221646, "grad_norm": 0.042145904153585434, "learning_rate": 0.00015735181601332193, "loss": 0.3072, "step": 14414 }, { "epoch": 1.1677738172391445, "grad_norm": 0.04632596671581268, "learning_rate": 0.00015734731536072732, "loss": 0.3458, "step": 14415 }, { "epoch": 1.167854828256643, "grad_norm": 0.04889935255050659, "learning_rate": 0.00015734281470813268, "loss": 0.2927, "step": 14416 }, { "epoch": 1.1679358392741412, "grad_norm": 0.04121779277920723, "learning_rate": 0.00015733831405553806, "loss": 0.3133, "step": 14417 }, { "epoch": 1.1680168502916397, "grad_norm": 0.04164166748523712, "learning_rate": 0.00015733381340294345, "loss": 0.3279, "step": 14418 }, { "epoch": 1.168097861309138, "grad_norm": 0.03872714564204216, "learning_rate": 0.0001573293127503488, "loss": 0.2808, "step": 14419 }, { "epoch": 1.1681788723266364, "grad_norm": 0.04460643604397774, "learning_rate": 0.00015732481209775417, "loss": 0.3182, "step": 14420 }, { "epoch": 1.1682598833441349, "grad_norm": 0.045483093708753586, "learning_rate": 0.00015732031144515956, "loss": 0.3827, "step": 14421 }, { "epoch": 1.1683408943616331, "grad_norm": 0.03746965900063515, "learning_rate": 0.00015731581079256492, "loss": 0.3131, "step": 14422 }, { "epoch": 1.1684219053791316, "grad_norm": 0.03765581175684929, "learning_rate": 0.0001573113101399703, "loss": 0.3005, "step": 14423 }, { "epoch": 1.1685029163966298, "grad_norm": 0.04205656424164772, "learning_rate": 0.0001573068094873757, "loss": 0.2785, "step": 14424 }, { "epoch": 1.1685839274141283, "grad_norm": 0.042121246457099915, "learning_rate": 0.00015730230883478105, "loss": 0.3031, "step": 14425 }, { "epoch": 1.1686649384316268, "grad_norm": 0.03836040943861008, "learning_rate": 0.0001572978081821864, "loss": 0.2686, "step": 14426 }, { "epoch": 1.168745949449125, "grad_norm": 0.03815663978457451, "learning_rate": 0.0001572933075295918, "loss": 0.3018, "step": 14427 }, { "epoch": 1.1688269604666235, "grad_norm": 0.04434993490576744, "learning_rate": 0.00015728880687699716, "loss": 0.3274, "step": 14428 }, { "epoch": 1.168907971484122, "grad_norm": 0.03531665354967117, "learning_rate": 0.00015728430622440255, "loss": 0.2539, "step": 14429 }, { "epoch": 1.1689889825016202, "grad_norm": 0.039583683013916016, "learning_rate": 0.00015727980557180793, "loss": 0.2733, "step": 14430 }, { "epoch": 1.1690699935191187, "grad_norm": 0.04048553854227066, "learning_rate": 0.0001572753049192133, "loss": 0.3116, "step": 14431 }, { "epoch": 1.169151004536617, "grad_norm": 0.04091466963291168, "learning_rate": 0.00015727080426661865, "loss": 0.3222, "step": 14432 }, { "epoch": 1.1692320155541154, "grad_norm": 0.044963400810956955, "learning_rate": 0.00015726630361402404, "loss": 0.3188, "step": 14433 }, { "epoch": 1.1693130265716136, "grad_norm": 0.03848746791481972, "learning_rate": 0.0001572618029614294, "loss": 0.2883, "step": 14434 }, { "epoch": 1.169394037589112, "grad_norm": 0.045976340770721436, "learning_rate": 0.0001572573023088348, "loss": 0.2802, "step": 14435 }, { "epoch": 1.1694750486066106, "grad_norm": 0.04464418813586235, "learning_rate": 0.00015725280165624017, "loss": 0.3392, "step": 14436 }, { "epoch": 1.1695560596241088, "grad_norm": 0.040776655077934265, "learning_rate": 0.00015724830100364553, "loss": 0.2888, "step": 14437 }, { "epoch": 1.1696370706416073, "grad_norm": 0.04193189740180969, "learning_rate": 0.0001572438003510509, "loss": 0.32, "step": 14438 }, { "epoch": 1.1697180816591057, "grad_norm": 0.038984332233667374, "learning_rate": 0.00015723929969845628, "loss": 0.299, "step": 14439 }, { "epoch": 1.169799092676604, "grad_norm": 0.04495490342378616, "learning_rate": 0.00015723479904586164, "loss": 0.2879, "step": 14440 }, { "epoch": 1.1698801036941024, "grad_norm": 0.03746641427278519, "learning_rate": 0.00015723029839326703, "loss": 0.3071, "step": 14441 }, { "epoch": 1.1699611147116007, "grad_norm": 0.04167228192090988, "learning_rate": 0.00015722579774067242, "loss": 0.2975, "step": 14442 }, { "epoch": 1.1700421257290992, "grad_norm": 0.041208457201719284, "learning_rate": 0.00015722129708807778, "loss": 0.3123, "step": 14443 }, { "epoch": 1.1701231367465976, "grad_norm": 0.03645787760615349, "learning_rate": 0.00015721679643548314, "loss": 0.298, "step": 14444 }, { "epoch": 1.1702041477640959, "grad_norm": 0.035555098205804825, "learning_rate": 0.00015721229578288852, "loss": 0.2618, "step": 14445 }, { "epoch": 1.1702851587815943, "grad_norm": 0.04322396591305733, "learning_rate": 0.0001572077951302939, "loss": 0.2824, "step": 14446 }, { "epoch": 1.1703661697990926, "grad_norm": 0.04139561951160431, "learning_rate": 0.00015720329447769927, "loss": 0.3275, "step": 14447 }, { "epoch": 1.170447180816591, "grad_norm": 0.049311134964227676, "learning_rate": 0.00015719879382510466, "loss": 0.3481, "step": 14448 }, { "epoch": 1.1705281918340895, "grad_norm": 0.04070025682449341, "learning_rate": 0.00015719429317251002, "loss": 0.3049, "step": 14449 }, { "epoch": 1.1706092028515878, "grad_norm": 0.04206862300634384, "learning_rate": 0.00015718979251991538, "loss": 0.2965, "step": 14450 }, { "epoch": 1.1706902138690862, "grad_norm": 0.047473225742578506, "learning_rate": 0.00015718529186732076, "loss": 0.3326, "step": 14451 }, { "epoch": 1.1707712248865847, "grad_norm": 0.0422075018286705, "learning_rate": 0.00015718079121472615, "loss": 0.3224, "step": 14452 }, { "epoch": 1.170852235904083, "grad_norm": 0.04035336151719093, "learning_rate": 0.0001571762905621315, "loss": 0.3277, "step": 14453 }, { "epoch": 1.1709332469215814, "grad_norm": 0.041684288531541824, "learning_rate": 0.0001571717899095369, "loss": 0.2951, "step": 14454 }, { "epoch": 1.1710142579390797, "grad_norm": 0.04003993794322014, "learning_rate": 0.00015716728925694226, "loss": 0.2992, "step": 14455 }, { "epoch": 1.1710952689565781, "grad_norm": 0.03539128601551056, "learning_rate": 0.00015716278860434762, "loss": 0.2498, "step": 14456 }, { "epoch": 1.1711762799740764, "grad_norm": 0.03944115713238716, "learning_rate": 0.000157158287951753, "loss": 0.307, "step": 14457 }, { "epoch": 1.1712572909915748, "grad_norm": 0.04354918375611305, "learning_rate": 0.0001571537872991584, "loss": 0.3214, "step": 14458 }, { "epoch": 1.1713383020090733, "grad_norm": 0.03932163119316101, "learning_rate": 0.00015714928664656375, "loss": 0.3064, "step": 14459 }, { "epoch": 1.1714193130265715, "grad_norm": 0.03566412627696991, "learning_rate": 0.00015714478599396914, "loss": 0.2781, "step": 14460 }, { "epoch": 1.17150032404407, "grad_norm": 0.03720712289214134, "learning_rate": 0.0001571402853413745, "loss": 0.302, "step": 14461 }, { "epoch": 1.1715813350615685, "grad_norm": 0.03921864926815033, "learning_rate": 0.0001571357846887799, "loss": 0.2906, "step": 14462 }, { "epoch": 1.1716623460790667, "grad_norm": 0.037216588854789734, "learning_rate": 0.00015713128403618525, "loss": 0.295, "step": 14463 }, { "epoch": 1.1717433570965652, "grad_norm": 0.03720587491989136, "learning_rate": 0.00015712678338359064, "loss": 0.3003, "step": 14464 }, { "epoch": 1.1718243681140634, "grad_norm": 0.03727641701698303, "learning_rate": 0.000157122282730996, "loss": 0.2664, "step": 14465 }, { "epoch": 1.171905379131562, "grad_norm": 0.04216910898685455, "learning_rate": 0.00015711778207840138, "loss": 0.3153, "step": 14466 }, { "epoch": 1.1719863901490604, "grad_norm": 0.034736037254333496, "learning_rate": 0.00015711328142580674, "loss": 0.2879, "step": 14467 }, { "epoch": 1.1720674011665586, "grad_norm": 0.04738327115774155, "learning_rate": 0.00015710878077321213, "loss": 0.316, "step": 14468 }, { "epoch": 1.172148412184057, "grad_norm": 0.05080415681004524, "learning_rate": 0.0001571042801206175, "loss": 0.3563, "step": 14469 }, { "epoch": 1.1722294232015553, "grad_norm": 0.038124267011880875, "learning_rate": 0.00015709977946802288, "loss": 0.3089, "step": 14470 }, { "epoch": 1.1723104342190538, "grad_norm": 0.03749233856797218, "learning_rate": 0.00015709527881542824, "loss": 0.2939, "step": 14471 }, { "epoch": 1.1723914452365523, "grad_norm": 0.0384153351187706, "learning_rate": 0.00015709077816283362, "loss": 0.2987, "step": 14472 }, { "epoch": 1.1724724562540505, "grad_norm": 0.04736333712935448, "learning_rate": 0.00015708627751023898, "loss": 0.3691, "step": 14473 }, { "epoch": 1.172553467271549, "grad_norm": 0.0412851981818676, "learning_rate": 0.00015708177685764437, "loss": 0.3414, "step": 14474 }, { "epoch": 1.1726344782890474, "grad_norm": 0.042260248214006424, "learning_rate": 0.00015707727620504976, "loss": 0.3084, "step": 14475 }, { "epoch": 1.1727154893065457, "grad_norm": 0.03734608739614487, "learning_rate": 0.00015707277555245512, "loss": 0.2988, "step": 14476 }, { "epoch": 1.1727965003240441, "grad_norm": 0.0376180000603199, "learning_rate": 0.00015706827489986048, "loss": 0.2771, "step": 14477 }, { "epoch": 1.1728775113415424, "grad_norm": 0.05453381687402725, "learning_rate": 0.00015706377424726587, "loss": 0.3332, "step": 14478 }, { "epoch": 1.1729585223590409, "grad_norm": 0.04309339076280594, "learning_rate": 0.00015705927359467123, "loss": 0.2636, "step": 14479 }, { "epoch": 1.173039533376539, "grad_norm": 0.04677773267030716, "learning_rate": 0.0001570547729420766, "loss": 0.3339, "step": 14480 }, { "epoch": 1.1731205443940376, "grad_norm": 0.04022238403558731, "learning_rate": 0.000157050272289482, "loss": 0.2977, "step": 14481 }, { "epoch": 1.173201555411536, "grad_norm": 0.04072597250342369, "learning_rate": 0.00015704577163688736, "loss": 0.2985, "step": 14482 }, { "epoch": 1.1732825664290343, "grad_norm": 0.04243917018175125, "learning_rate": 0.00015704127098429272, "loss": 0.3302, "step": 14483 }, { "epoch": 1.1733635774465327, "grad_norm": 0.037157706916332245, "learning_rate": 0.0001570367703316981, "loss": 0.292, "step": 14484 }, { "epoch": 1.1734445884640312, "grad_norm": 0.042577095329761505, "learning_rate": 0.00015703226967910347, "loss": 0.3118, "step": 14485 }, { "epoch": 1.1735255994815295, "grad_norm": 0.040878184139728546, "learning_rate": 0.00015702776902650885, "loss": 0.2686, "step": 14486 }, { "epoch": 1.173606610499028, "grad_norm": 0.042164161801338196, "learning_rate": 0.00015702326837391424, "loss": 0.3169, "step": 14487 }, { "epoch": 1.1736876215165262, "grad_norm": 0.04308300465345383, "learning_rate": 0.0001570187677213196, "loss": 0.2903, "step": 14488 }, { "epoch": 1.1737686325340246, "grad_norm": 0.04266469553112984, "learning_rate": 0.00015701426706872496, "loss": 0.3199, "step": 14489 }, { "epoch": 1.173849643551523, "grad_norm": 0.03647917881608009, "learning_rate": 0.00015700976641613035, "loss": 0.29, "step": 14490 }, { "epoch": 1.1739306545690213, "grad_norm": 0.04074126482009888, "learning_rate": 0.0001570052657635357, "loss": 0.3361, "step": 14491 }, { "epoch": 1.1740116655865198, "grad_norm": 0.03795121610164642, "learning_rate": 0.0001570007651109411, "loss": 0.2605, "step": 14492 }, { "epoch": 1.174092676604018, "grad_norm": 0.0438787043094635, "learning_rate": 0.00015699626445834648, "loss": 0.3447, "step": 14493 }, { "epoch": 1.1741736876215165, "grad_norm": 0.037022512406110764, "learning_rate": 0.00015699176380575184, "loss": 0.2762, "step": 14494 }, { "epoch": 1.174254698639015, "grad_norm": 0.03483128175139427, "learning_rate": 0.0001569872631531572, "loss": 0.268, "step": 14495 }, { "epoch": 1.1743357096565132, "grad_norm": 0.03925696015357971, "learning_rate": 0.0001569827625005626, "loss": 0.3382, "step": 14496 }, { "epoch": 1.1744167206740117, "grad_norm": 0.03809134289622307, "learning_rate": 0.00015697826184796795, "loss": 0.2947, "step": 14497 }, { "epoch": 1.17449773169151, "grad_norm": 0.04280867800116539, "learning_rate": 0.00015697376119537334, "loss": 0.3184, "step": 14498 }, { "epoch": 1.1745787427090084, "grad_norm": 0.039980579167604446, "learning_rate": 0.00015696926054277872, "loss": 0.308, "step": 14499 }, { "epoch": 1.1746597537265069, "grad_norm": 0.03806230053305626, "learning_rate": 0.00015696475989018409, "loss": 0.2924, "step": 14500 }, { "epoch": 1.1747407647440051, "grad_norm": 0.040367115288972855, "learning_rate": 0.00015696025923758945, "loss": 0.3222, "step": 14501 }, { "epoch": 1.1748217757615036, "grad_norm": 0.04814135283231735, "learning_rate": 0.00015695575858499483, "loss": 0.3295, "step": 14502 }, { "epoch": 1.1749027867790018, "grad_norm": 0.03762340918183327, "learning_rate": 0.0001569512579324002, "loss": 0.3057, "step": 14503 }, { "epoch": 1.1749837977965003, "grad_norm": 0.03948173671960831, "learning_rate": 0.00015694675727980558, "loss": 0.2905, "step": 14504 }, { "epoch": 1.1750648088139988, "grad_norm": 0.03911614045500755, "learning_rate": 0.00015694225662721097, "loss": 0.3082, "step": 14505 }, { "epoch": 1.175145819831497, "grad_norm": 0.040515441447496414, "learning_rate": 0.00015693775597461633, "loss": 0.3392, "step": 14506 }, { "epoch": 1.1752268308489955, "grad_norm": 0.037460993975400925, "learning_rate": 0.0001569332553220217, "loss": 0.2566, "step": 14507 }, { "epoch": 1.175307841866494, "grad_norm": 0.04020340368151665, "learning_rate": 0.00015692875466942707, "loss": 0.3127, "step": 14508 }, { "epoch": 1.1753888528839922, "grad_norm": 0.037423986941576004, "learning_rate": 0.00015692425401683243, "loss": 0.3021, "step": 14509 }, { "epoch": 1.1754698639014907, "grad_norm": 0.041398096829652786, "learning_rate": 0.00015691975336423782, "loss": 0.3459, "step": 14510 }, { "epoch": 1.175550874918989, "grad_norm": 0.034351762384176254, "learning_rate": 0.0001569152527116432, "loss": 0.2458, "step": 14511 }, { "epoch": 1.1756318859364874, "grad_norm": 0.04114198684692383, "learning_rate": 0.00015691075205904857, "loss": 0.3298, "step": 14512 }, { "epoch": 1.1757128969539856, "grad_norm": 0.0437115840613842, "learning_rate": 0.00015690625140645393, "loss": 0.3463, "step": 14513 }, { "epoch": 1.175793907971484, "grad_norm": 0.03909998759627342, "learning_rate": 0.00015690175075385932, "loss": 0.3115, "step": 14514 }, { "epoch": 1.1758749189889826, "grad_norm": 0.04081261530518532, "learning_rate": 0.00015689725010126468, "loss": 0.2969, "step": 14515 }, { "epoch": 1.1759559300064808, "grad_norm": 0.039999548345804214, "learning_rate": 0.00015689274944867006, "loss": 0.3258, "step": 14516 }, { "epoch": 1.1760369410239793, "grad_norm": 0.04130193963646889, "learning_rate": 0.00015688824879607545, "loss": 0.3124, "step": 14517 }, { "epoch": 1.1761179520414777, "grad_norm": 0.04370003193616867, "learning_rate": 0.0001568837481434808, "loss": 0.3379, "step": 14518 }, { "epoch": 1.176198963058976, "grad_norm": 0.03728923201560974, "learning_rate": 0.00015687924749088617, "loss": 0.2934, "step": 14519 }, { "epoch": 1.1762799740764744, "grad_norm": 0.043891359120607376, "learning_rate": 0.00015687474683829156, "loss": 0.3174, "step": 14520 }, { "epoch": 1.1763609850939727, "grad_norm": 0.03610162436962128, "learning_rate": 0.00015687024618569692, "loss": 0.3094, "step": 14521 }, { "epoch": 1.1764419961114712, "grad_norm": 0.04091033712029457, "learning_rate": 0.0001568657455331023, "loss": 0.3072, "step": 14522 }, { "epoch": 1.1765230071289696, "grad_norm": 0.038009703159332275, "learning_rate": 0.0001568612448805077, "loss": 0.2712, "step": 14523 }, { "epoch": 1.1766040181464679, "grad_norm": 0.041291069239377975, "learning_rate": 0.00015685674422791305, "loss": 0.3014, "step": 14524 }, { "epoch": 1.1766850291639663, "grad_norm": 0.04737641662359238, "learning_rate": 0.0001568522435753184, "loss": 0.3081, "step": 14525 }, { "epoch": 1.1767660401814646, "grad_norm": 0.038802701979875565, "learning_rate": 0.0001568477429227238, "loss": 0.298, "step": 14526 }, { "epoch": 1.176847051198963, "grad_norm": 0.03882235661149025, "learning_rate": 0.00015684324227012919, "loss": 0.3296, "step": 14527 }, { "epoch": 1.1769280622164615, "grad_norm": 0.041273076087236404, "learning_rate": 0.00015683874161753455, "loss": 0.298, "step": 14528 }, { "epoch": 1.1770090732339598, "grad_norm": 0.03610391914844513, "learning_rate": 0.00015683424096493993, "loss": 0.2757, "step": 14529 }, { "epoch": 1.1770900842514582, "grad_norm": 0.03763338550925255, "learning_rate": 0.0001568297403123453, "loss": 0.2966, "step": 14530 }, { "epoch": 1.1771710952689567, "grad_norm": 0.04555139318108559, "learning_rate": 0.00015682523965975068, "loss": 0.2791, "step": 14531 }, { "epoch": 1.177252106286455, "grad_norm": 0.03985441476106644, "learning_rate": 0.00015682073900715604, "loss": 0.2839, "step": 14532 }, { "epoch": 1.1773331173039534, "grad_norm": 0.03928079083561897, "learning_rate": 0.00015681623835456143, "loss": 0.3061, "step": 14533 }, { "epoch": 1.1774141283214516, "grad_norm": 0.03562358021736145, "learning_rate": 0.0001568117377019668, "loss": 0.2928, "step": 14534 }, { "epoch": 1.1774951393389501, "grad_norm": 0.040488019585609436, "learning_rate": 0.00015680723704937217, "loss": 0.3247, "step": 14535 }, { "epoch": 1.1775761503564484, "grad_norm": 0.04520033299922943, "learning_rate": 0.00015680273639677753, "loss": 0.3615, "step": 14536 }, { "epoch": 1.1776571613739468, "grad_norm": 0.04714178293943405, "learning_rate": 0.00015679823574418292, "loss": 0.2838, "step": 14537 }, { "epoch": 1.1777381723914453, "grad_norm": 0.041020654141902924, "learning_rate": 0.00015679373509158828, "loss": 0.3136, "step": 14538 }, { "epoch": 1.1778191834089435, "grad_norm": 0.044311195611953735, "learning_rate": 0.00015678923443899367, "loss": 0.3027, "step": 14539 }, { "epoch": 1.177900194426442, "grad_norm": 0.04358860105276108, "learning_rate": 0.00015678473378639903, "loss": 0.2881, "step": 14540 }, { "epoch": 1.1779812054439405, "grad_norm": 0.038203202188014984, "learning_rate": 0.00015678023313380442, "loss": 0.2989, "step": 14541 }, { "epoch": 1.1780622164614387, "grad_norm": 0.04246249049901962, "learning_rate": 0.00015677573248120978, "loss": 0.2721, "step": 14542 }, { "epoch": 1.1781432274789372, "grad_norm": 0.04331636056303978, "learning_rate": 0.00015677123182861516, "loss": 0.2921, "step": 14543 }, { "epoch": 1.1782242384964354, "grad_norm": 0.03967595845460892, "learning_rate": 0.00015676673117602052, "loss": 0.2995, "step": 14544 }, { "epoch": 1.178305249513934, "grad_norm": 0.04198235273361206, "learning_rate": 0.0001567622305234259, "loss": 0.3049, "step": 14545 }, { "epoch": 1.1783862605314324, "grad_norm": 0.03865557909011841, "learning_rate": 0.00015675772987083127, "loss": 0.2707, "step": 14546 }, { "epoch": 1.1784672715489306, "grad_norm": 0.03893511742353439, "learning_rate": 0.00015675322921823666, "loss": 0.2957, "step": 14547 }, { "epoch": 1.178548282566429, "grad_norm": 0.045068565756082535, "learning_rate": 0.00015674872856564202, "loss": 0.3531, "step": 14548 }, { "epoch": 1.1786292935839273, "grad_norm": 0.043633587658405304, "learning_rate": 0.0001567442279130474, "loss": 0.2947, "step": 14549 }, { "epoch": 1.1787103046014258, "grad_norm": 0.03821111097931862, "learning_rate": 0.00015673972726045277, "loss": 0.277, "step": 14550 }, { "epoch": 1.1787913156189243, "grad_norm": 0.038753848522901535, "learning_rate": 0.00015673522660785815, "loss": 0.2961, "step": 14551 }, { "epoch": 1.1788723266364225, "grad_norm": 0.0451669916510582, "learning_rate": 0.0001567307259552635, "loss": 0.2882, "step": 14552 }, { "epoch": 1.178953337653921, "grad_norm": 0.050977639853954315, "learning_rate": 0.0001567262253026689, "loss": 0.3457, "step": 14553 }, { "epoch": 1.1790343486714194, "grad_norm": 0.0382966622710228, "learning_rate": 0.00015672172465007426, "loss": 0.3034, "step": 14554 }, { "epoch": 1.1791153596889177, "grad_norm": 0.05075497925281525, "learning_rate": 0.00015671722399747965, "loss": 0.3421, "step": 14555 }, { "epoch": 1.1791963707064161, "grad_norm": 0.0408305898308754, "learning_rate": 0.00015671272334488503, "loss": 0.3074, "step": 14556 }, { "epoch": 1.1792773817239144, "grad_norm": 0.04049469158053398, "learning_rate": 0.0001567082226922904, "loss": 0.2931, "step": 14557 }, { "epoch": 1.1793583927414129, "grad_norm": 0.03187038004398346, "learning_rate": 0.00015670372203969575, "loss": 0.2607, "step": 14558 }, { "epoch": 1.179439403758911, "grad_norm": 0.04170691594481468, "learning_rate": 0.00015669922138710114, "loss": 0.2891, "step": 14559 }, { "epoch": 1.1795204147764096, "grad_norm": 0.04311882704496384, "learning_rate": 0.0001566947207345065, "loss": 0.3076, "step": 14560 }, { "epoch": 1.179601425793908, "grad_norm": 0.04691338539123535, "learning_rate": 0.0001566902200819119, "loss": 0.3463, "step": 14561 }, { "epoch": 1.1796824368114063, "grad_norm": 0.04103444144129753, "learning_rate": 0.00015668571942931728, "loss": 0.3452, "step": 14562 }, { "epoch": 1.1797634478289047, "grad_norm": 0.038828045129776, "learning_rate": 0.00015668121877672264, "loss": 0.2922, "step": 14563 }, { "epoch": 1.1798444588464032, "grad_norm": 0.04108811914920807, "learning_rate": 0.000156676718124128, "loss": 0.3192, "step": 14564 }, { "epoch": 1.1799254698639015, "grad_norm": 0.03875766694545746, "learning_rate": 0.00015667221747153338, "loss": 0.2748, "step": 14565 }, { "epoch": 1.1800064808814, "grad_norm": 0.044968828558921814, "learning_rate": 0.00015666771681893874, "loss": 0.3084, "step": 14566 }, { "epoch": 1.1800874918988982, "grad_norm": 0.03477316349744797, "learning_rate": 0.00015666321616634413, "loss": 0.2727, "step": 14567 }, { "epoch": 1.1801685029163966, "grad_norm": 0.03975457698106766, "learning_rate": 0.00015665871551374952, "loss": 0.3022, "step": 14568 }, { "epoch": 1.180249513933895, "grad_norm": 0.04314548522233963, "learning_rate": 0.00015665421486115488, "loss": 0.3261, "step": 14569 }, { "epoch": 1.1803305249513933, "grad_norm": 0.04482617601752281, "learning_rate": 0.00015664971420856024, "loss": 0.3477, "step": 14570 }, { "epoch": 1.1804115359688918, "grad_norm": 0.037931445986032486, "learning_rate": 0.00015664521355596562, "loss": 0.3109, "step": 14571 }, { "epoch": 1.18049254698639, "grad_norm": 0.03743727505207062, "learning_rate": 0.00015664071290337098, "loss": 0.3048, "step": 14572 }, { "epoch": 1.1805735580038885, "grad_norm": 0.039792127907276154, "learning_rate": 0.00015663621225077637, "loss": 0.2796, "step": 14573 }, { "epoch": 1.180654569021387, "grad_norm": 0.040228910744190216, "learning_rate": 0.00015663171159818176, "loss": 0.3057, "step": 14574 }, { "epoch": 1.1807355800388852, "grad_norm": 0.042243167757987976, "learning_rate": 0.00015662721094558712, "loss": 0.2978, "step": 14575 }, { "epoch": 1.1808165910563837, "grad_norm": 0.041131455451250076, "learning_rate": 0.00015662271029299248, "loss": 0.293, "step": 14576 }, { "epoch": 1.1808976020738822, "grad_norm": 0.04173069819808006, "learning_rate": 0.00015661820964039787, "loss": 0.3145, "step": 14577 }, { "epoch": 1.1809786130913804, "grad_norm": 0.0336281880736351, "learning_rate": 0.00015661370898780323, "loss": 0.2806, "step": 14578 }, { "epoch": 1.1810596241088789, "grad_norm": 0.04197990521788597, "learning_rate": 0.0001566092083352086, "loss": 0.3016, "step": 14579 }, { "epoch": 1.1811406351263771, "grad_norm": 0.04100755229592323, "learning_rate": 0.000156604707682614, "loss": 0.3097, "step": 14580 }, { "epoch": 1.1812216461438756, "grad_norm": 0.040469493716955185, "learning_rate": 0.00015660020703001936, "loss": 0.2729, "step": 14581 }, { "epoch": 1.1813026571613738, "grad_norm": 0.04121621325612068, "learning_rate": 0.00015659570637742472, "loss": 0.3219, "step": 14582 }, { "epoch": 1.1813836681788723, "grad_norm": 0.04582282528281212, "learning_rate": 0.0001565912057248301, "loss": 0.3659, "step": 14583 }, { "epoch": 1.1814646791963708, "grad_norm": 0.04043503850698471, "learning_rate": 0.00015658670507223547, "loss": 0.3278, "step": 14584 }, { "epoch": 1.181545690213869, "grad_norm": 0.04137546941637993, "learning_rate": 0.00015658220441964085, "loss": 0.2734, "step": 14585 }, { "epoch": 1.1816267012313675, "grad_norm": 0.043692342936992645, "learning_rate": 0.00015657770376704624, "loss": 0.2952, "step": 14586 }, { "epoch": 1.181707712248866, "grad_norm": 0.04300142824649811, "learning_rate": 0.0001565732031144516, "loss": 0.3212, "step": 14587 }, { "epoch": 1.1817887232663642, "grad_norm": 0.050188302993774414, "learning_rate": 0.00015656870246185696, "loss": 0.3537, "step": 14588 }, { "epoch": 1.1818697342838627, "grad_norm": 0.04783901944756508, "learning_rate": 0.00015656420180926235, "loss": 0.318, "step": 14589 }, { "epoch": 1.181950745301361, "grad_norm": 0.042725108563899994, "learning_rate": 0.0001565597011566677, "loss": 0.3274, "step": 14590 }, { "epoch": 1.1820317563188594, "grad_norm": 0.03940672427415848, "learning_rate": 0.0001565552005040731, "loss": 0.2916, "step": 14591 }, { "epoch": 1.1821127673363578, "grad_norm": 0.03988233208656311, "learning_rate": 0.00015655069985147848, "loss": 0.2568, "step": 14592 }, { "epoch": 1.182193778353856, "grad_norm": 0.04097625985741615, "learning_rate": 0.00015654619919888384, "loss": 0.3416, "step": 14593 }, { "epoch": 1.1822747893713546, "grad_norm": 0.04196294769644737, "learning_rate": 0.0001565416985462892, "loss": 0.3506, "step": 14594 }, { "epoch": 1.1823558003888528, "grad_norm": 0.04109298437833786, "learning_rate": 0.0001565371978936946, "loss": 0.3081, "step": 14595 }, { "epoch": 1.1824368114063513, "grad_norm": 0.04117002338171005, "learning_rate": 0.00015653269724109995, "loss": 0.3119, "step": 14596 }, { "epoch": 1.1825178224238497, "grad_norm": 0.04104756563901901, "learning_rate": 0.00015652819658850534, "loss": 0.3054, "step": 14597 }, { "epoch": 1.182598833441348, "grad_norm": 0.03538989648222923, "learning_rate": 0.00015652369593591073, "loss": 0.3011, "step": 14598 }, { "epoch": 1.1826798444588464, "grad_norm": 0.04043510556221008, "learning_rate": 0.00015651919528331609, "loss": 0.2906, "step": 14599 }, { "epoch": 1.1827608554763447, "grad_norm": 0.040407996624708176, "learning_rate": 0.00015651469463072147, "loss": 0.3063, "step": 14600 }, { "epoch": 1.1828418664938432, "grad_norm": 0.04454110935330391, "learning_rate": 0.00015651019397812683, "loss": 0.3811, "step": 14601 }, { "epoch": 1.1829228775113416, "grad_norm": 0.04084227234125137, "learning_rate": 0.0001565056933255322, "loss": 0.3268, "step": 14602 }, { "epoch": 1.1830038885288399, "grad_norm": 0.04479276016354561, "learning_rate": 0.00015650119267293758, "loss": 0.344, "step": 14603 }, { "epoch": 1.1830848995463383, "grad_norm": 0.04356138035655022, "learning_rate": 0.00015649669202034297, "loss": 0.3259, "step": 14604 }, { "epoch": 1.1831659105638366, "grad_norm": 0.03863155096769333, "learning_rate": 0.00015649219136774833, "loss": 0.2627, "step": 14605 }, { "epoch": 1.183246921581335, "grad_norm": 0.043492794036865234, "learning_rate": 0.00015648769071515371, "loss": 0.3267, "step": 14606 }, { "epoch": 1.1833279325988335, "grad_norm": 0.03926840052008629, "learning_rate": 0.00015648319006255907, "loss": 0.3278, "step": 14607 }, { "epoch": 1.1834089436163318, "grad_norm": 0.0472257025539875, "learning_rate": 0.00015647868940996446, "loss": 0.3595, "step": 14608 }, { "epoch": 1.1834899546338302, "grad_norm": 0.036868367344141006, "learning_rate": 0.00015647418875736982, "loss": 0.2795, "step": 14609 }, { "epoch": 1.1835709656513287, "grad_norm": 0.04061976820230484, "learning_rate": 0.0001564696881047752, "loss": 0.3147, "step": 14610 }, { "epoch": 1.183651976668827, "grad_norm": 0.048738934099674225, "learning_rate": 0.00015646518745218057, "loss": 0.3379, "step": 14611 }, { "epoch": 1.1837329876863254, "grad_norm": 0.041473086923360825, "learning_rate": 0.00015646068679958596, "loss": 0.3316, "step": 14612 }, { "epoch": 1.1838139987038236, "grad_norm": 0.04210762307047844, "learning_rate": 0.00015645618614699132, "loss": 0.3278, "step": 14613 }, { "epoch": 1.1838950097213221, "grad_norm": 0.04672138765454292, "learning_rate": 0.0001564516854943967, "loss": 0.3275, "step": 14614 }, { "epoch": 1.1839760207388204, "grad_norm": 0.03847205638885498, "learning_rate": 0.00015644718484180206, "loss": 0.2786, "step": 14615 }, { "epoch": 1.1840570317563188, "grad_norm": 0.04784049838781357, "learning_rate": 0.00015644268418920745, "loss": 0.3308, "step": 14616 }, { "epoch": 1.1841380427738173, "grad_norm": 0.046784672886133194, "learning_rate": 0.0001564381835366128, "loss": 0.3287, "step": 14617 }, { "epoch": 1.1842190537913155, "grad_norm": 0.044036705046892166, "learning_rate": 0.0001564336828840182, "loss": 0.3216, "step": 14618 }, { "epoch": 1.184300064808814, "grad_norm": 0.04180029034614563, "learning_rate": 0.00015642918223142356, "loss": 0.2978, "step": 14619 }, { "epoch": 1.1843810758263125, "grad_norm": 0.039441563189029694, "learning_rate": 0.00015642468157882894, "loss": 0.3131, "step": 14620 }, { "epoch": 1.1844620868438107, "grad_norm": 0.037397947162389755, "learning_rate": 0.0001564201809262343, "loss": 0.2865, "step": 14621 }, { "epoch": 1.1845430978613092, "grad_norm": 0.044736817479133606, "learning_rate": 0.0001564156802736397, "loss": 0.3352, "step": 14622 }, { "epoch": 1.1846241088788074, "grad_norm": 0.04054537042975426, "learning_rate": 0.00015641117962104505, "loss": 0.2892, "step": 14623 }, { "epoch": 1.184705119896306, "grad_norm": 0.03535526618361473, "learning_rate": 0.00015640667896845044, "loss": 0.2897, "step": 14624 }, { "epoch": 1.1847861309138044, "grad_norm": 0.03459848091006279, "learning_rate": 0.0001564021783158558, "loss": 0.287, "step": 14625 }, { "epoch": 1.1848671419313026, "grad_norm": 0.044806841760873795, "learning_rate": 0.00015639767766326119, "loss": 0.3095, "step": 14626 }, { "epoch": 1.184948152948801, "grad_norm": 0.040577251464128494, "learning_rate": 0.00015639317701066655, "loss": 0.3046, "step": 14627 }, { "epoch": 1.1850291639662993, "grad_norm": 0.03882734477519989, "learning_rate": 0.00015638867635807193, "loss": 0.281, "step": 14628 }, { "epoch": 1.1851101749837978, "grad_norm": 0.040150970220565796, "learning_rate": 0.0001563841757054773, "loss": 0.3207, "step": 14629 }, { "epoch": 1.1851911860012962, "grad_norm": 0.037906549870967865, "learning_rate": 0.00015637967505288268, "loss": 0.2897, "step": 14630 }, { "epoch": 1.1852721970187945, "grad_norm": 0.042285289615392685, "learning_rate": 0.00015637517440028807, "loss": 0.342, "step": 14631 }, { "epoch": 1.185353208036293, "grad_norm": 0.04037885740399361, "learning_rate": 0.00015637067374769343, "loss": 0.3101, "step": 14632 }, { "epoch": 1.1854342190537914, "grad_norm": 0.03909390792250633, "learning_rate": 0.0001563661730950988, "loss": 0.2753, "step": 14633 }, { "epoch": 1.1855152300712897, "grad_norm": 0.03916388377547264, "learning_rate": 0.00015636167244250417, "loss": 0.2996, "step": 14634 }, { "epoch": 1.1855962410887881, "grad_norm": 0.03810279443860054, "learning_rate": 0.00015635717178990954, "loss": 0.3103, "step": 14635 }, { "epoch": 1.1856772521062864, "grad_norm": 0.04713892191648483, "learning_rate": 0.00015635267113731492, "loss": 0.331, "step": 14636 }, { "epoch": 1.1857582631237849, "grad_norm": 0.041569601744413376, "learning_rate": 0.0001563481704847203, "loss": 0.2999, "step": 14637 }, { "epoch": 1.185839274141283, "grad_norm": 0.03498614951968193, "learning_rate": 0.00015634366983212567, "loss": 0.291, "step": 14638 }, { "epoch": 1.1859202851587816, "grad_norm": 0.038540758192539215, "learning_rate": 0.00015633916917953103, "loss": 0.2772, "step": 14639 }, { "epoch": 1.18600129617628, "grad_norm": 0.04108189418911934, "learning_rate": 0.00015633466852693642, "loss": 0.3065, "step": 14640 }, { "epoch": 1.1860823071937783, "grad_norm": 0.039533913135528564, "learning_rate": 0.00015633016787434178, "loss": 0.3456, "step": 14641 }, { "epoch": 1.1861633182112767, "grad_norm": 0.040663499385118484, "learning_rate": 0.00015632566722174716, "loss": 0.2879, "step": 14642 }, { "epoch": 1.1862443292287752, "grad_norm": 0.04121604189276695, "learning_rate": 0.00015632116656915255, "loss": 0.3075, "step": 14643 }, { "epoch": 1.1863253402462735, "grad_norm": 0.03926914185285568, "learning_rate": 0.0001563166659165579, "loss": 0.3049, "step": 14644 }, { "epoch": 1.186406351263772, "grad_norm": 0.04522679001092911, "learning_rate": 0.00015631216526396327, "loss": 0.3001, "step": 14645 }, { "epoch": 1.1864873622812702, "grad_norm": 0.03877709433436394, "learning_rate": 0.00015630766461136866, "loss": 0.2935, "step": 14646 }, { "epoch": 1.1865683732987686, "grad_norm": 0.03700802102684975, "learning_rate": 0.00015630316395877402, "loss": 0.3181, "step": 14647 }, { "epoch": 1.186649384316267, "grad_norm": 0.05110809579491615, "learning_rate": 0.0001562986633061794, "loss": 0.3649, "step": 14648 }, { "epoch": 1.1867303953337653, "grad_norm": 0.03704257681965828, "learning_rate": 0.0001562941626535848, "loss": 0.2711, "step": 14649 }, { "epoch": 1.1868114063512638, "grad_norm": 0.04234371334314346, "learning_rate": 0.00015628966200099015, "loss": 0.3677, "step": 14650 }, { "epoch": 1.186892417368762, "grad_norm": 0.04498139023780823, "learning_rate": 0.0001562851613483955, "loss": 0.3933, "step": 14651 }, { "epoch": 1.1869734283862605, "grad_norm": 0.041128672659397125, "learning_rate": 0.0001562806606958009, "loss": 0.3123, "step": 14652 }, { "epoch": 1.187054439403759, "grad_norm": 0.04224330186843872, "learning_rate": 0.00015627616004320626, "loss": 0.2788, "step": 14653 }, { "epoch": 1.1871354504212572, "grad_norm": 0.0469592809677124, "learning_rate": 0.00015627165939061165, "loss": 0.3154, "step": 14654 }, { "epoch": 1.1872164614387557, "grad_norm": 0.04049103334546089, "learning_rate": 0.00015626715873801703, "loss": 0.3137, "step": 14655 }, { "epoch": 1.1872974724562542, "grad_norm": 0.04263480752706528, "learning_rate": 0.0001562626580854224, "loss": 0.3052, "step": 14656 }, { "epoch": 1.1873784834737524, "grad_norm": 0.040018241852521896, "learning_rate": 0.00015625815743282775, "loss": 0.273, "step": 14657 }, { "epoch": 1.1874594944912509, "grad_norm": 0.03727559745311737, "learning_rate": 0.00015625365678023314, "loss": 0.3177, "step": 14658 }, { "epoch": 1.1875405055087491, "grad_norm": 0.03743201121687889, "learning_rate": 0.0001562491561276385, "loss": 0.3113, "step": 14659 }, { "epoch": 1.1876215165262476, "grad_norm": 0.0455901138484478, "learning_rate": 0.0001562446554750439, "loss": 0.2975, "step": 14660 }, { "epoch": 1.1877025275437458, "grad_norm": 0.03635476902127266, "learning_rate": 0.00015624015482244928, "loss": 0.2866, "step": 14661 }, { "epoch": 1.1877835385612443, "grad_norm": 0.045002736151218414, "learning_rate": 0.00015623565416985464, "loss": 0.358, "step": 14662 }, { "epoch": 1.1878645495787428, "grad_norm": 0.0431043915450573, "learning_rate": 0.00015623115351726, "loss": 0.3263, "step": 14663 }, { "epoch": 1.187945560596241, "grad_norm": 0.037004776298999786, "learning_rate": 0.00015622665286466538, "loss": 0.2974, "step": 14664 }, { "epoch": 1.1880265716137395, "grad_norm": 0.03819013759493828, "learning_rate": 0.00015622215221207074, "loss": 0.2786, "step": 14665 }, { "epoch": 1.188107582631238, "grad_norm": 0.042751818895339966, "learning_rate": 0.00015621765155947613, "loss": 0.3182, "step": 14666 }, { "epoch": 1.1881885936487362, "grad_norm": 0.04809313639998436, "learning_rate": 0.00015621315090688152, "loss": 0.3312, "step": 14667 }, { "epoch": 1.1882696046662347, "grad_norm": 0.04383840411901474, "learning_rate": 0.00015620865025428688, "loss": 0.321, "step": 14668 }, { "epoch": 1.188350615683733, "grad_norm": 0.05272422730922699, "learning_rate": 0.00015620414960169226, "loss": 0.3385, "step": 14669 }, { "epoch": 1.1884316267012314, "grad_norm": 0.049258504062891006, "learning_rate": 0.00015619964894909762, "loss": 0.3773, "step": 14670 }, { "epoch": 1.1885126377187298, "grad_norm": 0.043628692626953125, "learning_rate": 0.00015619514829650298, "loss": 0.309, "step": 14671 }, { "epoch": 1.188593648736228, "grad_norm": 0.03882945701479912, "learning_rate": 0.00015619064764390837, "loss": 0.268, "step": 14672 }, { "epoch": 1.1886746597537265, "grad_norm": 0.0409681610763073, "learning_rate": 0.00015618614699131376, "loss": 0.3279, "step": 14673 }, { "epoch": 1.1887556707712248, "grad_norm": 0.042711541056632996, "learning_rate": 0.00015618164633871912, "loss": 0.3125, "step": 14674 }, { "epoch": 1.1888366817887233, "grad_norm": 0.046427272260189056, "learning_rate": 0.0001561771456861245, "loss": 0.3627, "step": 14675 }, { "epoch": 1.1889176928062217, "grad_norm": 0.03923366591334343, "learning_rate": 0.00015617264503352987, "loss": 0.2783, "step": 14676 }, { "epoch": 1.18899870382372, "grad_norm": 0.042100224643945694, "learning_rate": 0.00015616814438093523, "loss": 0.3132, "step": 14677 }, { "epoch": 1.1890797148412184, "grad_norm": 0.05138513818383217, "learning_rate": 0.0001561636437283406, "loss": 0.3283, "step": 14678 }, { "epoch": 1.189160725858717, "grad_norm": 0.038992591202259064, "learning_rate": 0.000156159143075746, "loss": 0.2974, "step": 14679 }, { "epoch": 1.1892417368762151, "grad_norm": 0.03872741758823395, "learning_rate": 0.00015615464242315136, "loss": 0.2748, "step": 14680 }, { "epoch": 1.1893227478937136, "grad_norm": 0.03662829101085663, "learning_rate": 0.00015615014177055675, "loss": 0.2969, "step": 14681 }, { "epoch": 1.1894037589112119, "grad_norm": 0.044510021805763245, "learning_rate": 0.0001561456411179621, "loss": 0.3523, "step": 14682 }, { "epoch": 1.1894847699287103, "grad_norm": 0.03760409727692604, "learning_rate": 0.00015614114046536747, "loss": 0.2595, "step": 14683 }, { "epoch": 1.1895657809462086, "grad_norm": 0.03967760503292084, "learning_rate": 0.00015613663981277286, "loss": 0.2953, "step": 14684 }, { "epoch": 1.189646791963707, "grad_norm": 0.043346818536520004, "learning_rate": 0.00015613213916017824, "loss": 0.3033, "step": 14685 }, { "epoch": 1.1897278029812055, "grad_norm": 0.04163350909948349, "learning_rate": 0.0001561276385075836, "loss": 0.3169, "step": 14686 }, { "epoch": 1.1898088139987038, "grad_norm": 0.04359233006834984, "learning_rate": 0.000156123137854989, "loss": 0.3216, "step": 14687 }, { "epoch": 1.1898898250162022, "grad_norm": 0.04172962158918381, "learning_rate": 0.00015611863720239435, "loss": 0.2949, "step": 14688 }, { "epoch": 1.1899708360337007, "grad_norm": 0.0403163880109787, "learning_rate": 0.00015611413654979974, "loss": 0.3082, "step": 14689 }, { "epoch": 1.190051847051199, "grad_norm": 0.03849377855658531, "learning_rate": 0.0001561096358972051, "loss": 0.3316, "step": 14690 }, { "epoch": 1.1901328580686974, "grad_norm": 0.041115447878837585, "learning_rate": 0.00015610513524461048, "loss": 0.2796, "step": 14691 }, { "epoch": 1.1902138690861956, "grad_norm": 0.0451335534453392, "learning_rate": 0.00015610063459201584, "loss": 0.3169, "step": 14692 }, { "epoch": 1.190294880103694, "grad_norm": 0.04398505389690399, "learning_rate": 0.00015609613393942123, "loss": 0.3318, "step": 14693 }, { "epoch": 1.1903758911211926, "grad_norm": 0.034957144409418106, "learning_rate": 0.0001560916332868266, "loss": 0.2525, "step": 14694 }, { "epoch": 1.1904569021386908, "grad_norm": 0.040896087884902954, "learning_rate": 0.00015608713263423198, "loss": 0.3242, "step": 14695 }, { "epoch": 1.1905379131561893, "grad_norm": 0.03556426241993904, "learning_rate": 0.00015608263198163734, "loss": 0.2734, "step": 14696 }, { "epoch": 1.1906189241736875, "grad_norm": 0.04682194069027901, "learning_rate": 0.00015607813132904273, "loss": 0.3236, "step": 14697 }, { "epoch": 1.190699935191186, "grad_norm": 0.044954705983400345, "learning_rate": 0.00015607363067644809, "loss": 0.348, "step": 14698 }, { "epoch": 1.1907809462086845, "grad_norm": 0.04162221774458885, "learning_rate": 0.00015606913002385347, "loss": 0.3005, "step": 14699 }, { "epoch": 1.1908619572261827, "grad_norm": 0.045600537210702896, "learning_rate": 0.00015606462937125883, "loss": 0.3018, "step": 14700 }, { "epoch": 1.1909429682436812, "grad_norm": 0.04428591579198837, "learning_rate": 0.00015606012871866422, "loss": 0.2874, "step": 14701 }, { "epoch": 1.1910239792611796, "grad_norm": 0.04231373220682144, "learning_rate": 0.00015605562806606958, "loss": 0.3303, "step": 14702 }, { "epoch": 1.1911049902786779, "grad_norm": 0.04606911540031433, "learning_rate": 0.00015605112741347497, "loss": 0.3176, "step": 14703 }, { "epoch": 1.1911860012961764, "grad_norm": 0.04126888886094093, "learning_rate": 0.00015604662676088033, "loss": 0.2939, "step": 14704 }, { "epoch": 1.1912670123136746, "grad_norm": 0.050967976450920105, "learning_rate": 0.00015604212610828571, "loss": 0.3263, "step": 14705 }, { "epoch": 1.191348023331173, "grad_norm": 0.04140590876340866, "learning_rate": 0.00015603762545569107, "loss": 0.3294, "step": 14706 }, { "epoch": 1.1914290343486713, "grad_norm": 0.03835592046380043, "learning_rate": 0.00015603312480309646, "loss": 0.2925, "step": 14707 }, { "epoch": 1.1915100453661698, "grad_norm": 0.04171990230679512, "learning_rate": 0.00015602862415050182, "loss": 0.3168, "step": 14708 }, { "epoch": 1.1915910563836682, "grad_norm": 0.04122872278094292, "learning_rate": 0.0001560241234979072, "loss": 0.3165, "step": 14709 }, { "epoch": 1.1916720674011665, "grad_norm": 0.04952973499894142, "learning_rate": 0.00015601962284531257, "loss": 0.3228, "step": 14710 }, { "epoch": 1.191753078418665, "grad_norm": 0.03939840570092201, "learning_rate": 0.00015601512219271796, "loss": 0.3099, "step": 14711 }, { "epoch": 1.1918340894361634, "grad_norm": 0.04720155522227287, "learning_rate": 0.00015601062154012334, "loss": 0.3551, "step": 14712 }, { "epoch": 1.1919151004536617, "grad_norm": 0.04138209670782089, "learning_rate": 0.0001560061208875287, "loss": 0.308, "step": 14713 }, { "epoch": 1.1919961114711601, "grad_norm": 0.042646877467632294, "learning_rate": 0.00015600162023493406, "loss": 0.3357, "step": 14714 }, { "epoch": 1.1920771224886584, "grad_norm": 0.03877168893814087, "learning_rate": 0.00015599711958233945, "loss": 0.3161, "step": 14715 }, { "epoch": 1.1921581335061568, "grad_norm": 0.03586186096072197, "learning_rate": 0.0001559926189297448, "loss": 0.2873, "step": 14716 }, { "epoch": 1.1922391445236553, "grad_norm": 0.039949409663677216, "learning_rate": 0.0001559881182771502, "loss": 0.2944, "step": 14717 }, { "epoch": 1.1923201555411536, "grad_norm": 0.035949863493442535, "learning_rate": 0.00015598361762455558, "loss": 0.2724, "step": 14718 }, { "epoch": 1.192401166558652, "grad_norm": 0.047032542526721954, "learning_rate": 0.00015597911697196094, "loss": 0.2925, "step": 14719 }, { "epoch": 1.1924821775761503, "grad_norm": 0.042840149253606796, "learning_rate": 0.0001559746163193663, "loss": 0.3464, "step": 14720 }, { "epoch": 1.1925631885936487, "grad_norm": 0.03936777636408806, "learning_rate": 0.0001559701156667717, "loss": 0.2892, "step": 14721 }, { "epoch": 1.1926441996111472, "grad_norm": 0.03563765808939934, "learning_rate": 0.00015596561501417705, "loss": 0.271, "step": 14722 }, { "epoch": 1.1927252106286454, "grad_norm": 0.04076451063156128, "learning_rate": 0.00015596111436158244, "loss": 0.3216, "step": 14723 }, { "epoch": 1.192806221646144, "grad_norm": 0.04466822370886803, "learning_rate": 0.00015595661370898783, "loss": 0.3353, "step": 14724 }, { "epoch": 1.1928872326636422, "grad_norm": 0.04330145940184593, "learning_rate": 0.00015595211305639319, "loss": 0.2819, "step": 14725 }, { "epoch": 1.1929682436811406, "grad_norm": 0.04840012267231941, "learning_rate": 0.00015594761240379855, "loss": 0.3132, "step": 14726 }, { "epoch": 1.193049254698639, "grad_norm": 0.052081044763326645, "learning_rate": 0.00015594311175120393, "loss": 0.3, "step": 14727 }, { "epoch": 1.1931302657161373, "grad_norm": 0.04210412874817848, "learning_rate": 0.0001559386110986093, "loss": 0.3137, "step": 14728 }, { "epoch": 1.1932112767336358, "grad_norm": 0.04098066687583923, "learning_rate": 0.00015593411044601468, "loss": 0.301, "step": 14729 }, { "epoch": 1.193292287751134, "grad_norm": 0.043190643191337585, "learning_rate": 0.00015592960979342007, "loss": 0.299, "step": 14730 }, { "epoch": 1.1933732987686325, "grad_norm": 0.041205570101737976, "learning_rate": 0.00015592510914082543, "loss": 0.2928, "step": 14731 }, { "epoch": 1.193454309786131, "grad_norm": 0.04265919327735901, "learning_rate": 0.0001559206084882308, "loss": 0.3183, "step": 14732 }, { "epoch": 1.1935353208036292, "grad_norm": 0.041771385818719864, "learning_rate": 0.00015591610783563618, "loss": 0.2907, "step": 14733 }, { "epoch": 1.1936163318211277, "grad_norm": 0.04530198499560356, "learning_rate": 0.00015591160718304154, "loss": 0.2799, "step": 14734 }, { "epoch": 1.1936973428386262, "grad_norm": 0.03936564549803734, "learning_rate": 0.00015590710653044692, "loss": 0.2737, "step": 14735 }, { "epoch": 1.1937783538561244, "grad_norm": 0.051649149507284164, "learning_rate": 0.0001559026058778523, "loss": 0.3649, "step": 14736 }, { "epoch": 1.1938593648736229, "grad_norm": 0.0410127229988575, "learning_rate": 0.00015589810522525767, "loss": 0.3007, "step": 14737 }, { "epoch": 1.1939403758911211, "grad_norm": 0.03969676047563553, "learning_rate": 0.00015589360457266306, "loss": 0.2863, "step": 14738 }, { "epoch": 1.1940213869086196, "grad_norm": 0.043512873351573944, "learning_rate": 0.00015588910392006842, "loss": 0.3205, "step": 14739 }, { "epoch": 1.1941023979261178, "grad_norm": 0.04225597903132439, "learning_rate": 0.00015588460326747378, "loss": 0.2796, "step": 14740 }, { "epoch": 1.1941834089436163, "grad_norm": 0.04662526398897171, "learning_rate": 0.00015588010261487916, "loss": 0.3338, "step": 14741 }, { "epoch": 1.1942644199611148, "grad_norm": 0.03814233839511871, "learning_rate": 0.00015587560196228455, "loss": 0.3217, "step": 14742 }, { "epoch": 1.194345430978613, "grad_norm": 0.04339953511953354, "learning_rate": 0.0001558711013096899, "loss": 0.3292, "step": 14743 }, { "epoch": 1.1944264419961115, "grad_norm": 0.038826216012239456, "learning_rate": 0.0001558666006570953, "loss": 0.3074, "step": 14744 }, { "epoch": 1.19450745301361, "grad_norm": 0.044687457382678986, "learning_rate": 0.00015586210000450066, "loss": 0.3296, "step": 14745 }, { "epoch": 1.1945884640311082, "grad_norm": 0.04153241962194443, "learning_rate": 0.00015585759935190602, "loss": 0.3272, "step": 14746 }, { "epoch": 1.1946694750486067, "grad_norm": 0.039639074355363846, "learning_rate": 0.0001558530986993114, "loss": 0.2531, "step": 14747 }, { "epoch": 1.194750486066105, "grad_norm": 0.04232201352715492, "learning_rate": 0.0001558485980467168, "loss": 0.3128, "step": 14748 }, { "epoch": 1.1948314970836034, "grad_norm": 0.03908455744385719, "learning_rate": 0.00015584409739412215, "loss": 0.2594, "step": 14749 }, { "epoch": 1.1949125081011018, "grad_norm": 0.04053455963730812, "learning_rate": 0.00015583959674152754, "loss": 0.3064, "step": 14750 }, { "epoch": 1.1949935191186, "grad_norm": 0.04426248371601105, "learning_rate": 0.0001558350960889329, "loss": 0.3536, "step": 14751 }, { "epoch": 1.1950745301360985, "grad_norm": 0.04585438594222069, "learning_rate": 0.00015583059543633826, "loss": 0.2993, "step": 14752 }, { "epoch": 1.1951555411535968, "grad_norm": 0.056634701788425446, "learning_rate": 0.00015582609478374365, "loss": 0.3503, "step": 14753 }, { "epoch": 1.1952365521710953, "grad_norm": 0.0459580272436142, "learning_rate": 0.00015582159413114903, "loss": 0.2628, "step": 14754 }, { "epoch": 1.1953175631885937, "grad_norm": 0.04491327330470085, "learning_rate": 0.0001558170934785544, "loss": 0.3054, "step": 14755 }, { "epoch": 1.195398574206092, "grad_norm": 0.044432204216718674, "learning_rate": 0.00015581259282595978, "loss": 0.3375, "step": 14756 }, { "epoch": 1.1954795852235904, "grad_norm": 0.03885927423834801, "learning_rate": 0.00015580809217336514, "loss": 0.2913, "step": 14757 }, { "epoch": 1.195560596241089, "grad_norm": 0.04088282212615013, "learning_rate": 0.0001558035915207705, "loss": 0.2791, "step": 14758 }, { "epoch": 1.1956416072585871, "grad_norm": 0.041324764490127563, "learning_rate": 0.0001557990908681759, "loss": 0.2867, "step": 14759 }, { "epoch": 1.1957226182760856, "grad_norm": 0.049072910100221634, "learning_rate": 0.00015579459021558128, "loss": 0.3258, "step": 14760 }, { "epoch": 1.1958036292935839, "grad_norm": 0.03818585351109505, "learning_rate": 0.00015579008956298664, "loss": 0.3228, "step": 14761 }, { "epoch": 1.1958846403110823, "grad_norm": 0.051422007381916046, "learning_rate": 0.00015578558891039202, "loss": 0.3233, "step": 14762 }, { "epoch": 1.1959656513285806, "grad_norm": 0.04118951037526131, "learning_rate": 0.00015578108825779738, "loss": 0.3029, "step": 14763 }, { "epoch": 1.196046662346079, "grad_norm": 0.03960846737027168, "learning_rate": 0.00015577658760520277, "loss": 0.2672, "step": 14764 }, { "epoch": 1.1961276733635775, "grad_norm": 0.039535727351903915, "learning_rate": 0.00015577208695260813, "loss": 0.3157, "step": 14765 }, { "epoch": 1.1962086843810757, "grad_norm": 0.042673733085393906, "learning_rate": 0.00015576758630001352, "loss": 0.2838, "step": 14766 }, { "epoch": 1.1962896953985742, "grad_norm": 0.04212876781821251, "learning_rate": 0.00015576308564741888, "loss": 0.2957, "step": 14767 }, { "epoch": 1.1963707064160727, "grad_norm": 0.04549403116106987, "learning_rate": 0.00015575858499482426, "loss": 0.3269, "step": 14768 }, { "epoch": 1.196451717433571, "grad_norm": 0.04399016126990318, "learning_rate": 0.00015575408434222962, "loss": 0.2969, "step": 14769 }, { "epoch": 1.1965327284510694, "grad_norm": 0.044396717101335526, "learning_rate": 0.000155749583689635, "loss": 0.3453, "step": 14770 }, { "epoch": 1.1966137394685676, "grad_norm": 0.03851454332470894, "learning_rate": 0.00015574508303704037, "loss": 0.2775, "step": 14771 }, { "epoch": 1.196694750486066, "grad_norm": 0.043037425726652145, "learning_rate": 0.00015574058238444576, "loss": 0.3044, "step": 14772 }, { "epoch": 1.1967757615035646, "grad_norm": 0.047682128846645355, "learning_rate": 0.00015573608173185112, "loss": 0.3349, "step": 14773 }, { "epoch": 1.1968567725210628, "grad_norm": 0.04036872088909149, "learning_rate": 0.0001557315810792565, "loss": 0.2752, "step": 14774 }, { "epoch": 1.1969377835385613, "grad_norm": 0.041800472885370255, "learning_rate": 0.00015572708042666187, "loss": 0.3147, "step": 14775 }, { "epoch": 1.1970187945560595, "grad_norm": 0.042227502912282944, "learning_rate": 0.00015572257977406725, "loss": 0.318, "step": 14776 }, { "epoch": 1.197099805573558, "grad_norm": 0.03940373286604881, "learning_rate": 0.00015571807912147261, "loss": 0.3208, "step": 14777 }, { "epoch": 1.1971808165910565, "grad_norm": 0.03364204987883568, "learning_rate": 0.000155713578468878, "loss": 0.294, "step": 14778 }, { "epoch": 1.1972618276085547, "grad_norm": 0.03734065592288971, "learning_rate": 0.00015570907781628336, "loss": 0.2902, "step": 14779 }, { "epoch": 1.1973428386260532, "grad_norm": 0.03665858134627342, "learning_rate": 0.00015570457716368875, "loss": 0.2887, "step": 14780 }, { "epoch": 1.1974238496435516, "grad_norm": 0.043738704174757004, "learning_rate": 0.0001557000765110941, "loss": 0.3203, "step": 14781 }, { "epoch": 1.1975048606610499, "grad_norm": 0.04412857070565224, "learning_rate": 0.0001556955758584995, "loss": 0.316, "step": 14782 }, { "epoch": 1.1975858716785484, "grad_norm": 0.03947075083851814, "learning_rate": 0.00015569107520590486, "loss": 0.2555, "step": 14783 }, { "epoch": 1.1976668826960466, "grad_norm": 0.04453642666339874, "learning_rate": 0.00015568657455331024, "loss": 0.3004, "step": 14784 }, { "epoch": 1.197747893713545, "grad_norm": 0.0402337908744812, "learning_rate": 0.0001556820739007156, "loss": 0.2874, "step": 14785 }, { "epoch": 1.1978289047310433, "grad_norm": 0.042971957474946976, "learning_rate": 0.000155677573248121, "loss": 0.3229, "step": 14786 }, { "epoch": 1.1979099157485418, "grad_norm": 0.03841191530227661, "learning_rate": 0.00015567307259552635, "loss": 0.2986, "step": 14787 }, { "epoch": 1.1979909267660402, "grad_norm": 0.04925091192126274, "learning_rate": 0.00015566857194293174, "loss": 0.3171, "step": 14788 }, { "epoch": 1.1980719377835385, "grad_norm": 0.04737703502178192, "learning_rate": 0.0001556640712903371, "loss": 0.299, "step": 14789 }, { "epoch": 1.198152948801037, "grad_norm": 0.03469955176115036, "learning_rate": 0.00015565957063774248, "loss": 0.258, "step": 14790 }, { "epoch": 1.1982339598185354, "grad_norm": 0.03793976455926895, "learning_rate": 0.00015565506998514784, "loss": 0.2769, "step": 14791 }, { "epoch": 1.1983149708360337, "grad_norm": 0.04765570908784866, "learning_rate": 0.00015565056933255323, "loss": 0.3252, "step": 14792 }, { "epoch": 1.1983959818535321, "grad_norm": 0.03665093332529068, "learning_rate": 0.00015564606867995862, "loss": 0.2515, "step": 14793 }, { "epoch": 1.1984769928710304, "grad_norm": 0.04346012324094772, "learning_rate": 0.00015564156802736398, "loss": 0.3131, "step": 14794 }, { "epoch": 1.1985580038885288, "grad_norm": 0.03895379975438118, "learning_rate": 0.00015563706737476934, "loss": 0.2658, "step": 14795 }, { "epoch": 1.1986390149060273, "grad_norm": 0.05214675888419151, "learning_rate": 0.00015563256672217473, "loss": 0.3183, "step": 14796 }, { "epoch": 1.1987200259235256, "grad_norm": 0.043334607034921646, "learning_rate": 0.00015562806606958009, "loss": 0.3181, "step": 14797 }, { "epoch": 1.198801036941024, "grad_norm": 0.04547828808426857, "learning_rate": 0.00015562356541698547, "loss": 0.3412, "step": 14798 }, { "epoch": 1.1988820479585223, "grad_norm": 0.037553027272224426, "learning_rate": 0.00015561906476439086, "loss": 0.3114, "step": 14799 }, { "epoch": 1.1989630589760207, "grad_norm": 0.03953569009900093, "learning_rate": 0.00015561456411179622, "loss": 0.3184, "step": 14800 }, { "epoch": 1.1990440699935192, "grad_norm": 0.03854568675160408, "learning_rate": 0.00015561006345920158, "loss": 0.27, "step": 14801 }, { "epoch": 1.1991250810110174, "grad_norm": 0.03653649240732193, "learning_rate": 0.00015560556280660697, "loss": 0.2947, "step": 14802 }, { "epoch": 1.199206092028516, "grad_norm": 0.0445239320397377, "learning_rate": 0.00015560106215401233, "loss": 0.3404, "step": 14803 }, { "epoch": 1.1992871030460144, "grad_norm": 0.041224073618650436, "learning_rate": 0.00015559656150141771, "loss": 0.3194, "step": 14804 }, { "epoch": 1.1993681140635126, "grad_norm": 0.04126469045877457, "learning_rate": 0.0001555920608488231, "loss": 0.2901, "step": 14805 }, { "epoch": 1.199449125081011, "grad_norm": 0.04383617639541626, "learning_rate": 0.00015558756019622846, "loss": 0.3178, "step": 14806 }, { "epoch": 1.1995301360985093, "grad_norm": 0.040692463517189026, "learning_rate": 0.00015558305954363385, "loss": 0.323, "step": 14807 }, { "epoch": 1.1996111471160078, "grad_norm": 0.04072236642241478, "learning_rate": 0.0001555785588910392, "loss": 0.3049, "step": 14808 }, { "epoch": 1.199692158133506, "grad_norm": 0.04335068538784981, "learning_rate": 0.00015557405823844457, "loss": 0.3169, "step": 14809 }, { "epoch": 1.1997731691510045, "grad_norm": 0.037739019840955734, "learning_rate": 0.00015556955758584996, "loss": 0.2959, "step": 14810 }, { "epoch": 1.199854180168503, "grad_norm": 0.038804128766059875, "learning_rate": 0.00015556505693325534, "loss": 0.3127, "step": 14811 }, { "epoch": 1.1999351911860012, "grad_norm": 0.045990537852048874, "learning_rate": 0.0001555605562806607, "loss": 0.3228, "step": 14812 }, { "epoch": 1.2000162022034997, "grad_norm": 0.040363702923059464, "learning_rate": 0.0001555560556280661, "loss": 0.3352, "step": 14813 }, { "epoch": 1.2000972132209982, "grad_norm": 0.03828015923500061, "learning_rate": 0.00015555155497547145, "loss": 0.3149, "step": 14814 }, { "epoch": 1.2001782242384964, "grad_norm": 0.040625084191560745, "learning_rate": 0.0001555470543228768, "loss": 0.3105, "step": 14815 }, { "epoch": 1.2002592352559949, "grad_norm": 0.03519357740879059, "learning_rate": 0.0001555425536702822, "loss": 0.2812, "step": 14816 }, { "epoch": 1.2003402462734931, "grad_norm": 0.03987620398402214, "learning_rate": 0.00015553805301768759, "loss": 0.3012, "step": 14817 }, { "epoch": 1.2004212572909916, "grad_norm": 0.0429069809615612, "learning_rate": 0.00015553355236509295, "loss": 0.2906, "step": 14818 }, { "epoch": 1.20050226830849, "grad_norm": 0.0450630784034729, "learning_rate": 0.00015552905171249833, "loss": 0.3188, "step": 14819 }, { "epoch": 1.2005832793259883, "grad_norm": 0.03968661651015282, "learning_rate": 0.0001555245510599037, "loss": 0.2709, "step": 14820 }, { "epoch": 1.2006642903434868, "grad_norm": 0.03986947983503342, "learning_rate": 0.00015552005040730905, "loss": 0.2897, "step": 14821 }, { "epoch": 1.200745301360985, "grad_norm": 0.03897624462842941, "learning_rate": 0.00015551554975471444, "loss": 0.2697, "step": 14822 }, { "epoch": 1.2008263123784835, "grad_norm": 0.04822707921266556, "learning_rate": 0.00015551104910211983, "loss": 0.3644, "step": 14823 }, { "epoch": 1.200907323395982, "grad_norm": 0.03778839111328125, "learning_rate": 0.0001555065484495252, "loss": 0.2905, "step": 14824 }, { "epoch": 1.2009883344134802, "grad_norm": 0.047402072697877884, "learning_rate": 0.00015550204779693057, "loss": 0.3363, "step": 14825 }, { "epoch": 1.2010693454309787, "grad_norm": 0.04571527615189552, "learning_rate": 0.00015549754714433593, "loss": 0.3272, "step": 14826 }, { "epoch": 1.201150356448477, "grad_norm": 0.0438561737537384, "learning_rate": 0.0001554930464917413, "loss": 0.2901, "step": 14827 }, { "epoch": 1.2012313674659754, "grad_norm": 0.0397321879863739, "learning_rate": 0.00015548854583914668, "loss": 0.2673, "step": 14828 }, { "epoch": 1.2013123784834738, "grad_norm": 0.04492023214697838, "learning_rate": 0.00015548404518655207, "loss": 0.3245, "step": 14829 }, { "epoch": 1.201393389500972, "grad_norm": 0.04138173535466194, "learning_rate": 0.00015547954453395743, "loss": 0.3149, "step": 14830 }, { "epoch": 1.2014744005184705, "grad_norm": 0.041036274284124374, "learning_rate": 0.00015547504388136282, "loss": 0.2777, "step": 14831 }, { "epoch": 1.2015554115359688, "grad_norm": 0.03809208422899246, "learning_rate": 0.00015547054322876818, "loss": 0.2816, "step": 14832 }, { "epoch": 1.2016364225534673, "grad_norm": 0.047296952456235886, "learning_rate": 0.00015546604257617354, "loss": 0.3102, "step": 14833 }, { "epoch": 1.2017174335709657, "grad_norm": 0.041915081441402435, "learning_rate": 0.00015546154192357892, "loss": 0.3149, "step": 14834 }, { "epoch": 1.201798444588464, "grad_norm": 0.038084451109170914, "learning_rate": 0.0001554570412709843, "loss": 0.2709, "step": 14835 }, { "epoch": 1.2018794556059624, "grad_norm": 0.04211721941828728, "learning_rate": 0.00015545254061838967, "loss": 0.312, "step": 14836 }, { "epoch": 1.201960466623461, "grad_norm": 0.044992879033088684, "learning_rate": 0.00015544803996579506, "loss": 0.3102, "step": 14837 }, { "epoch": 1.2020414776409591, "grad_norm": 0.03669695556163788, "learning_rate": 0.00015544353931320042, "loss": 0.3174, "step": 14838 }, { "epoch": 1.2021224886584576, "grad_norm": 0.036718446761369705, "learning_rate": 0.00015543903866060578, "loss": 0.2909, "step": 14839 }, { "epoch": 1.2022034996759559, "grad_norm": 0.042346637696027756, "learning_rate": 0.00015543453800801116, "loss": 0.3211, "step": 14840 }, { "epoch": 1.2022845106934543, "grad_norm": 0.038561925292015076, "learning_rate": 0.00015543003735541655, "loss": 0.2951, "step": 14841 }, { "epoch": 1.2023655217109526, "grad_norm": 0.039432037621736526, "learning_rate": 0.0001554255367028219, "loss": 0.2802, "step": 14842 }, { "epoch": 1.202446532728451, "grad_norm": 0.04120011627674103, "learning_rate": 0.0001554210360502273, "loss": 0.3294, "step": 14843 }, { "epoch": 1.2025275437459495, "grad_norm": 0.044444065541028976, "learning_rate": 0.00015541653539763266, "loss": 0.3118, "step": 14844 }, { "epoch": 1.2026085547634477, "grad_norm": 0.04563391953706741, "learning_rate": 0.00015541203474503805, "loss": 0.2899, "step": 14845 }, { "epoch": 1.2026895657809462, "grad_norm": 0.03690198436379433, "learning_rate": 0.0001554075340924434, "loss": 0.3212, "step": 14846 }, { "epoch": 1.2027705767984447, "grad_norm": 0.03772515431046486, "learning_rate": 0.0001554030334398488, "loss": 0.2357, "step": 14847 }, { "epoch": 1.202851587815943, "grad_norm": 0.0380142480134964, "learning_rate": 0.00015539853278725415, "loss": 0.2648, "step": 14848 }, { "epoch": 1.2029325988334414, "grad_norm": 0.042475104331970215, "learning_rate": 0.00015539403213465954, "loss": 0.3387, "step": 14849 }, { "epoch": 1.2030136098509396, "grad_norm": 0.0449688620865345, "learning_rate": 0.0001553895314820649, "loss": 0.3302, "step": 14850 }, { "epoch": 1.203094620868438, "grad_norm": 0.03996169567108154, "learning_rate": 0.0001553850308294703, "loss": 0.2959, "step": 14851 }, { "epoch": 1.2031756318859366, "grad_norm": 0.04284268245100975, "learning_rate": 0.00015538053017687565, "loss": 0.3064, "step": 14852 }, { "epoch": 1.2032566429034348, "grad_norm": 0.04294389858841896, "learning_rate": 0.00015537602952428103, "loss": 0.2904, "step": 14853 }, { "epoch": 1.2033376539209333, "grad_norm": 0.04475172236561775, "learning_rate": 0.0001553715288716864, "loss": 0.3193, "step": 14854 }, { "epoch": 1.2034186649384315, "grad_norm": 0.042766768485307693, "learning_rate": 0.00015536702821909178, "loss": 0.3168, "step": 14855 }, { "epoch": 1.20349967595593, "grad_norm": 0.04142007604241371, "learning_rate": 0.00015536252756649714, "loss": 0.2939, "step": 14856 }, { "epoch": 1.2035806869734285, "grad_norm": 0.04174195975065231, "learning_rate": 0.00015535802691390253, "loss": 0.2897, "step": 14857 }, { "epoch": 1.2036616979909267, "grad_norm": 0.04544617980718613, "learning_rate": 0.0001553535262613079, "loss": 0.2798, "step": 14858 }, { "epoch": 1.2037427090084252, "grad_norm": 0.03798126056790352, "learning_rate": 0.00015534902560871328, "loss": 0.2868, "step": 14859 }, { "epoch": 1.2038237200259236, "grad_norm": 0.045044515281915665, "learning_rate": 0.00015534452495611864, "loss": 0.3122, "step": 14860 }, { "epoch": 1.2039047310434219, "grad_norm": 0.044914714992046356, "learning_rate": 0.00015534002430352402, "loss": 0.3229, "step": 14861 }, { "epoch": 1.2039857420609203, "grad_norm": 0.044051643460989, "learning_rate": 0.00015533552365092938, "loss": 0.2951, "step": 14862 }, { "epoch": 1.2040667530784186, "grad_norm": 0.04305178299546242, "learning_rate": 0.00015533102299833477, "loss": 0.3235, "step": 14863 }, { "epoch": 1.204147764095917, "grad_norm": 0.04135410860180855, "learning_rate": 0.00015532652234574013, "loss": 0.3039, "step": 14864 }, { "epoch": 1.2042287751134153, "grad_norm": 0.04174239560961723, "learning_rate": 0.00015532202169314552, "loss": 0.3394, "step": 14865 }, { "epoch": 1.2043097861309138, "grad_norm": 0.041874948889017105, "learning_rate": 0.00015531752104055088, "loss": 0.3091, "step": 14866 }, { "epoch": 1.2043907971484122, "grad_norm": 0.039929281920194626, "learning_rate": 0.00015531302038795627, "loss": 0.3189, "step": 14867 }, { "epoch": 1.2044718081659105, "grad_norm": 0.040334559977054596, "learning_rate": 0.00015530851973536163, "loss": 0.2749, "step": 14868 }, { "epoch": 1.204552819183409, "grad_norm": 0.04389198124408722, "learning_rate": 0.000155304019082767, "loss": 0.3207, "step": 14869 }, { "epoch": 1.2046338302009074, "grad_norm": 0.04084376245737076, "learning_rate": 0.0001552995184301724, "loss": 0.2819, "step": 14870 }, { "epoch": 1.2047148412184057, "grad_norm": 0.03534472733736038, "learning_rate": 0.00015529501777757776, "loss": 0.2908, "step": 14871 }, { "epoch": 1.2047958522359041, "grad_norm": 0.03947620093822479, "learning_rate": 0.00015529051712498312, "loss": 0.2971, "step": 14872 }, { "epoch": 1.2048768632534024, "grad_norm": 0.042341433465480804, "learning_rate": 0.0001552860164723885, "loss": 0.3296, "step": 14873 }, { "epoch": 1.2049578742709008, "grad_norm": 0.04293164238333702, "learning_rate": 0.0001552815158197939, "loss": 0.2891, "step": 14874 }, { "epoch": 1.2050388852883993, "grad_norm": 0.041425734758377075, "learning_rate": 0.00015527701516719925, "loss": 0.3247, "step": 14875 }, { "epoch": 1.2051198963058976, "grad_norm": 0.0458022877573967, "learning_rate": 0.00015527251451460464, "loss": 0.3297, "step": 14876 }, { "epoch": 1.205200907323396, "grad_norm": 0.040675088763237, "learning_rate": 0.00015526801386201, "loss": 0.2815, "step": 14877 }, { "epoch": 1.2052819183408943, "grad_norm": 0.0410967692732811, "learning_rate": 0.00015526351320941536, "loss": 0.2938, "step": 14878 }, { "epoch": 1.2053629293583927, "grad_norm": 0.038196299225091934, "learning_rate": 0.00015525901255682075, "loss": 0.3281, "step": 14879 }, { "epoch": 1.2054439403758912, "grad_norm": 0.04951519891619682, "learning_rate": 0.00015525451190422614, "loss": 0.3125, "step": 14880 }, { "epoch": 1.2055249513933894, "grad_norm": 0.05080864578485489, "learning_rate": 0.0001552500112516315, "loss": 0.3106, "step": 14881 }, { "epoch": 1.205605962410888, "grad_norm": 0.05040706321597099, "learning_rate": 0.00015524551059903688, "loss": 0.3864, "step": 14882 }, { "epoch": 1.2056869734283864, "grad_norm": 0.03856421634554863, "learning_rate": 0.00015524100994644224, "loss": 0.2875, "step": 14883 }, { "epoch": 1.2057679844458846, "grad_norm": 0.041090596467256546, "learning_rate": 0.0001552365092938476, "loss": 0.2899, "step": 14884 }, { "epoch": 1.205848995463383, "grad_norm": 0.05047939345240593, "learning_rate": 0.000155232008641253, "loss": 0.3544, "step": 14885 }, { "epoch": 1.2059300064808813, "grad_norm": 0.04525664448738098, "learning_rate": 0.00015522750798865838, "loss": 0.3289, "step": 14886 }, { "epoch": 1.2060110174983798, "grad_norm": 0.04346325248479843, "learning_rate": 0.00015522300733606374, "loss": 0.3322, "step": 14887 }, { "epoch": 1.206092028515878, "grad_norm": 0.044985491782426834, "learning_rate": 0.00015521850668346912, "loss": 0.3022, "step": 14888 }, { "epoch": 1.2061730395333765, "grad_norm": 0.03676772490143776, "learning_rate": 0.00015521400603087448, "loss": 0.2682, "step": 14889 }, { "epoch": 1.206254050550875, "grad_norm": 0.035457346588373184, "learning_rate": 0.00015520950537827984, "loss": 0.2738, "step": 14890 }, { "epoch": 1.2063350615683732, "grad_norm": 0.04279814288020134, "learning_rate": 0.00015520500472568523, "loss": 0.3924, "step": 14891 }, { "epoch": 1.2064160725858717, "grad_norm": 0.04287739098072052, "learning_rate": 0.00015520050407309062, "loss": 0.3226, "step": 14892 }, { "epoch": 1.2064970836033702, "grad_norm": 0.04235265776515007, "learning_rate": 0.00015519600342049598, "loss": 0.335, "step": 14893 }, { "epoch": 1.2065780946208684, "grad_norm": 0.04114942252635956, "learning_rate": 0.00015519150276790137, "loss": 0.3094, "step": 14894 }, { "epoch": 1.2066591056383669, "grad_norm": 0.04386473447084427, "learning_rate": 0.00015518700211530673, "loss": 0.309, "step": 14895 }, { "epoch": 1.2067401166558651, "grad_norm": 0.04421220347285271, "learning_rate": 0.00015518250146271209, "loss": 0.3256, "step": 14896 }, { "epoch": 1.2068211276733636, "grad_norm": 0.041027799248695374, "learning_rate": 0.00015517800081011747, "loss": 0.3279, "step": 14897 }, { "epoch": 1.206902138690862, "grad_norm": 0.038625966757535934, "learning_rate": 0.00015517350015752286, "loss": 0.2986, "step": 14898 }, { "epoch": 1.2069831497083603, "grad_norm": 0.05030160769820213, "learning_rate": 0.00015516899950492822, "loss": 0.3738, "step": 14899 }, { "epoch": 1.2070641607258588, "grad_norm": 0.03595034033060074, "learning_rate": 0.0001551644988523336, "loss": 0.2843, "step": 14900 }, { "epoch": 1.207145171743357, "grad_norm": 0.04498247429728508, "learning_rate": 0.00015515999819973897, "loss": 0.3449, "step": 14901 }, { "epoch": 1.2072261827608555, "grad_norm": 0.042141638696193695, "learning_rate": 0.00015515549754714433, "loss": 0.321, "step": 14902 }, { "epoch": 1.207307193778354, "grad_norm": 0.04549839720129967, "learning_rate": 0.00015515099689454971, "loss": 0.262, "step": 14903 }, { "epoch": 1.2073882047958522, "grad_norm": 0.047006864100694656, "learning_rate": 0.0001551464962419551, "loss": 0.2865, "step": 14904 }, { "epoch": 1.2074692158133506, "grad_norm": 0.041136160492897034, "learning_rate": 0.00015514199558936046, "loss": 0.3178, "step": 14905 }, { "epoch": 1.2075502268308491, "grad_norm": 0.03632412478327751, "learning_rate": 0.00015513749493676585, "loss": 0.2786, "step": 14906 }, { "epoch": 1.2076312378483474, "grad_norm": 0.03551272302865982, "learning_rate": 0.0001551329942841712, "loss": 0.2647, "step": 14907 }, { "epoch": 1.2077122488658458, "grad_norm": 0.0367630310356617, "learning_rate": 0.00015512849363157657, "loss": 0.292, "step": 14908 }, { "epoch": 1.207793259883344, "grad_norm": 0.03985973820090294, "learning_rate": 0.00015512399297898196, "loss": 0.2996, "step": 14909 }, { "epoch": 1.2078742709008425, "grad_norm": 0.03747883066534996, "learning_rate": 0.00015511949232638734, "loss": 0.2588, "step": 14910 }, { "epoch": 1.2079552819183408, "grad_norm": 0.037703000009059906, "learning_rate": 0.0001551149916737927, "loss": 0.2753, "step": 14911 }, { "epoch": 1.2080362929358393, "grad_norm": 0.046212971210479736, "learning_rate": 0.0001551104910211981, "loss": 0.3252, "step": 14912 }, { "epoch": 1.2081173039533377, "grad_norm": 0.03943370282649994, "learning_rate": 0.00015510599036860345, "loss": 0.2965, "step": 14913 }, { "epoch": 1.208198314970836, "grad_norm": 0.04212084040045738, "learning_rate": 0.0001551014897160088, "loss": 0.3352, "step": 14914 }, { "epoch": 1.2082793259883344, "grad_norm": 0.0414251834154129, "learning_rate": 0.0001550969890634142, "loss": 0.3091, "step": 14915 }, { "epoch": 1.208360337005833, "grad_norm": 0.040891196578741074, "learning_rate": 0.00015509248841081959, "loss": 0.2814, "step": 14916 }, { "epoch": 1.2084413480233311, "grad_norm": 0.03951037675142288, "learning_rate": 0.00015508798775822495, "loss": 0.2863, "step": 14917 }, { "epoch": 1.2085223590408296, "grad_norm": 0.039379850029945374, "learning_rate": 0.00015508348710563033, "loss": 0.3006, "step": 14918 }, { "epoch": 1.2086033700583279, "grad_norm": 0.04138374701142311, "learning_rate": 0.0001550789864530357, "loss": 0.2759, "step": 14919 }, { "epoch": 1.2086843810758263, "grad_norm": 0.059719931334257126, "learning_rate": 0.00015507448580044105, "loss": 0.3148, "step": 14920 }, { "epoch": 1.2087653920933248, "grad_norm": 0.04690566286444664, "learning_rate": 0.00015506998514784644, "loss": 0.3427, "step": 14921 }, { "epoch": 1.208846403110823, "grad_norm": 0.05078961327672005, "learning_rate": 0.00015506548449525183, "loss": 0.3673, "step": 14922 }, { "epoch": 1.2089274141283215, "grad_norm": 0.039885710924863815, "learning_rate": 0.0001550609838426572, "loss": 0.2906, "step": 14923 }, { "epoch": 1.2090084251458197, "grad_norm": 0.03954209014773369, "learning_rate": 0.00015505648319006257, "loss": 0.3098, "step": 14924 }, { "epoch": 1.2090894361633182, "grad_norm": 0.0377873033285141, "learning_rate": 0.00015505198253746793, "loss": 0.2479, "step": 14925 }, { "epoch": 1.2091704471808167, "grad_norm": 0.04104090481996536, "learning_rate": 0.00015504748188487332, "loss": 0.3105, "step": 14926 }, { "epoch": 1.209251458198315, "grad_norm": 0.0429408960044384, "learning_rate": 0.00015504298123227868, "loss": 0.3034, "step": 14927 }, { "epoch": 1.2093324692158134, "grad_norm": 0.03847665339708328, "learning_rate": 0.00015503848057968407, "loss": 0.3016, "step": 14928 }, { "epoch": 1.2094134802333119, "grad_norm": 0.03994767740368843, "learning_rate": 0.00015503397992708943, "loss": 0.2945, "step": 14929 }, { "epoch": 1.20949449125081, "grad_norm": 0.04051990061998367, "learning_rate": 0.00015502947927449482, "loss": 0.3454, "step": 14930 }, { "epoch": 1.2095755022683086, "grad_norm": 0.04037369042634964, "learning_rate": 0.00015502497862190018, "loss": 0.3038, "step": 14931 }, { "epoch": 1.2096565132858068, "grad_norm": 0.0358559787273407, "learning_rate": 0.00015502047796930556, "loss": 0.267, "step": 14932 }, { "epoch": 1.2097375243033053, "grad_norm": 0.039637815207242966, "learning_rate": 0.00015501597731671092, "loss": 0.2757, "step": 14933 }, { "epoch": 1.2098185353208035, "grad_norm": 0.042503852397203445, "learning_rate": 0.0001550114766641163, "loss": 0.3243, "step": 14934 }, { "epoch": 1.209899546338302, "grad_norm": 0.04102681949734688, "learning_rate": 0.00015500697601152167, "loss": 0.3234, "step": 14935 }, { "epoch": 1.2099805573558005, "grad_norm": 0.04349510744214058, "learning_rate": 0.00015500247535892706, "loss": 0.3462, "step": 14936 }, { "epoch": 1.2100615683732987, "grad_norm": 0.037747252732515335, "learning_rate": 0.00015499797470633242, "loss": 0.2838, "step": 14937 }, { "epoch": 1.2101425793907972, "grad_norm": 0.04947254806756973, "learning_rate": 0.0001549934740537378, "loss": 0.2977, "step": 14938 }, { "epoch": 1.2102235904082956, "grad_norm": 0.0345742292702198, "learning_rate": 0.0001549889734011432, "loss": 0.2769, "step": 14939 }, { "epoch": 1.2103046014257939, "grad_norm": 0.04381577670574188, "learning_rate": 0.00015498447274854855, "loss": 0.3068, "step": 14940 }, { "epoch": 1.2103856124432923, "grad_norm": 0.04694143310189247, "learning_rate": 0.0001549799720959539, "loss": 0.2989, "step": 14941 }, { "epoch": 1.2104666234607906, "grad_norm": 0.04614692181348801, "learning_rate": 0.0001549754714433593, "loss": 0.3008, "step": 14942 }, { "epoch": 1.210547634478289, "grad_norm": 0.04504687711596489, "learning_rate": 0.00015497097079076466, "loss": 0.2721, "step": 14943 }, { "epoch": 1.2106286454957873, "grad_norm": 0.04158374294638634, "learning_rate": 0.00015496647013817005, "loss": 0.3073, "step": 14944 }, { "epoch": 1.2107096565132858, "grad_norm": 0.06658129394054413, "learning_rate": 0.00015496196948557543, "loss": 0.3523, "step": 14945 }, { "epoch": 1.2107906675307842, "grad_norm": 0.03513149544596672, "learning_rate": 0.0001549574688329808, "loss": 0.265, "step": 14946 }, { "epoch": 1.2108716785482825, "grad_norm": 0.04534105584025383, "learning_rate": 0.00015495296818038615, "loss": 0.2945, "step": 14947 }, { "epoch": 1.210952689565781, "grad_norm": 0.04596344754099846, "learning_rate": 0.00015494846752779154, "loss": 0.3335, "step": 14948 }, { "epoch": 1.2110337005832794, "grad_norm": 0.042053647339344025, "learning_rate": 0.00015494396687519693, "loss": 0.3075, "step": 14949 }, { "epoch": 1.2111147116007777, "grad_norm": 0.0502544566988945, "learning_rate": 0.0001549394662226023, "loss": 0.3104, "step": 14950 }, { "epoch": 1.2111957226182761, "grad_norm": 0.04380401596426964, "learning_rate": 0.00015493496557000767, "loss": 0.3072, "step": 14951 }, { "epoch": 1.2112767336357744, "grad_norm": 0.043562911450862885, "learning_rate": 0.00015493046491741304, "loss": 0.3615, "step": 14952 }, { "epoch": 1.2113577446532728, "grad_norm": 0.04325402155518532, "learning_rate": 0.0001549259642648184, "loss": 0.3349, "step": 14953 }, { "epoch": 1.2114387556707713, "grad_norm": 0.04529077187180519, "learning_rate": 0.00015492146361222378, "loss": 0.337, "step": 14954 }, { "epoch": 1.2115197666882696, "grad_norm": 0.04269943758845329, "learning_rate": 0.00015491696295962917, "loss": 0.3169, "step": 14955 }, { "epoch": 1.211600777705768, "grad_norm": 0.03706458956003189, "learning_rate": 0.00015491246230703453, "loss": 0.2868, "step": 14956 }, { "epoch": 1.2116817887232663, "grad_norm": 0.04907451570034027, "learning_rate": 0.00015490796165443992, "loss": 0.3273, "step": 14957 }, { "epoch": 1.2117627997407647, "grad_norm": 0.035258274525403976, "learning_rate": 0.00015490346100184528, "loss": 0.2706, "step": 14958 }, { "epoch": 1.2118438107582632, "grad_norm": 0.05509074032306671, "learning_rate": 0.00015489896034925064, "loss": 0.3811, "step": 14959 }, { "epoch": 1.2119248217757614, "grad_norm": 0.038064759224653244, "learning_rate": 0.00015489445969665602, "loss": 0.3257, "step": 14960 }, { "epoch": 1.21200583279326, "grad_norm": 0.038369227200746536, "learning_rate": 0.0001548899590440614, "loss": 0.2874, "step": 14961 }, { "epoch": 1.2120868438107584, "grad_norm": 0.04148680716753006, "learning_rate": 0.00015488545839146677, "loss": 0.3028, "step": 14962 }, { "epoch": 1.2121678548282566, "grad_norm": 0.038149863481521606, "learning_rate": 0.00015488095773887216, "loss": 0.3129, "step": 14963 }, { "epoch": 1.212248865845755, "grad_norm": 0.04594748839735985, "learning_rate": 0.00015487645708627752, "loss": 0.3335, "step": 14964 }, { "epoch": 1.2123298768632533, "grad_norm": 0.042934972792863846, "learning_rate": 0.00015487195643368288, "loss": 0.3348, "step": 14965 }, { "epoch": 1.2124108878807518, "grad_norm": 0.040971312671899796, "learning_rate": 0.00015486745578108827, "loss": 0.3171, "step": 14966 }, { "epoch": 1.21249189889825, "grad_norm": 0.041622862219810486, "learning_rate": 0.00015486295512849365, "loss": 0.2778, "step": 14967 }, { "epoch": 1.2125729099157485, "grad_norm": 0.041777707636356354, "learning_rate": 0.000154858454475899, "loss": 0.2911, "step": 14968 }, { "epoch": 1.212653920933247, "grad_norm": 0.04000363126397133, "learning_rate": 0.0001548539538233044, "loss": 0.3156, "step": 14969 }, { "epoch": 1.2127349319507452, "grad_norm": 0.048961807042360306, "learning_rate": 0.00015484945317070976, "loss": 0.3202, "step": 14970 }, { "epoch": 1.2128159429682437, "grad_norm": 0.03984816372394562, "learning_rate": 0.00015484495251811512, "loss": 0.2502, "step": 14971 }, { "epoch": 1.2128969539857422, "grad_norm": 0.045237038284540176, "learning_rate": 0.0001548404518655205, "loss": 0.3387, "step": 14972 }, { "epoch": 1.2129779650032404, "grad_norm": 0.039658062160015106, "learning_rate": 0.0001548359512129259, "loss": 0.2838, "step": 14973 }, { "epoch": 1.2130589760207389, "grad_norm": 0.03694017976522446, "learning_rate": 0.00015483145056033125, "loss": 0.2642, "step": 14974 }, { "epoch": 1.213139987038237, "grad_norm": 0.03846210986375809, "learning_rate": 0.00015482694990773664, "loss": 0.2934, "step": 14975 }, { "epoch": 1.2132209980557356, "grad_norm": 0.04987949877977371, "learning_rate": 0.000154822449255142, "loss": 0.3406, "step": 14976 }, { "epoch": 1.213302009073234, "grad_norm": 0.03913595527410507, "learning_rate": 0.00015481794860254736, "loss": 0.2639, "step": 14977 }, { "epoch": 1.2133830200907323, "grad_norm": 0.04530547186732292, "learning_rate": 0.00015481344794995275, "loss": 0.3033, "step": 14978 }, { "epoch": 1.2134640311082308, "grad_norm": 0.04652739688754082, "learning_rate": 0.00015480894729735814, "loss": 0.3455, "step": 14979 }, { "epoch": 1.213545042125729, "grad_norm": 0.038609545677900314, "learning_rate": 0.0001548044466447635, "loss": 0.2893, "step": 14980 }, { "epoch": 1.2136260531432275, "grad_norm": 0.03681579604744911, "learning_rate": 0.00015479994599216888, "loss": 0.2949, "step": 14981 }, { "epoch": 1.213707064160726, "grad_norm": 0.035748064517974854, "learning_rate": 0.00015479544533957424, "loss": 0.2806, "step": 14982 }, { "epoch": 1.2137880751782242, "grad_norm": 0.04264812543988228, "learning_rate": 0.0001547909446869796, "loss": 0.3046, "step": 14983 }, { "epoch": 1.2138690861957226, "grad_norm": 0.041751082986593246, "learning_rate": 0.000154786444034385, "loss": 0.3084, "step": 14984 }, { "epoch": 1.2139500972132211, "grad_norm": 0.04104901850223541, "learning_rate": 0.00015478194338179038, "loss": 0.3208, "step": 14985 }, { "epoch": 1.2140311082307194, "grad_norm": 0.05218047276139259, "learning_rate": 0.00015477744272919574, "loss": 0.3657, "step": 14986 }, { "epoch": 1.2141121192482178, "grad_norm": 0.04347262904047966, "learning_rate": 0.00015477294207660112, "loss": 0.3151, "step": 14987 }, { "epoch": 1.214193130265716, "grad_norm": 0.03328379616141319, "learning_rate": 0.00015476844142400648, "loss": 0.2409, "step": 14988 }, { "epoch": 1.2142741412832145, "grad_norm": 0.0384984090924263, "learning_rate": 0.00015476394077141184, "loss": 0.2918, "step": 14989 }, { "epoch": 1.2143551523007128, "grad_norm": 0.03588297218084335, "learning_rate": 0.00015475944011881723, "loss": 0.2647, "step": 14990 }, { "epoch": 1.2144361633182112, "grad_norm": 0.041738592088222504, "learning_rate": 0.00015475493946622262, "loss": 0.3172, "step": 14991 }, { "epoch": 1.2145171743357097, "grad_norm": 0.04426318779587746, "learning_rate": 0.00015475043881362798, "loss": 0.3277, "step": 14992 }, { "epoch": 1.214598185353208, "grad_norm": 0.04357115179300308, "learning_rate": 0.00015474593816103337, "loss": 0.2669, "step": 14993 }, { "epoch": 1.2146791963707064, "grad_norm": 0.03911040350794792, "learning_rate": 0.00015474143750843873, "loss": 0.2868, "step": 14994 }, { "epoch": 1.214760207388205, "grad_norm": 0.04531647264957428, "learning_rate": 0.00015473693685584409, "loss": 0.3478, "step": 14995 }, { "epoch": 1.2148412184057031, "grad_norm": 0.04873902350664139, "learning_rate": 0.00015473243620324947, "loss": 0.3047, "step": 14996 }, { "epoch": 1.2149222294232016, "grad_norm": 0.040747951716184616, "learning_rate": 0.00015472793555065486, "loss": 0.3153, "step": 14997 }, { "epoch": 1.2150032404406998, "grad_norm": 0.04346758872270584, "learning_rate": 0.00015472343489806022, "loss": 0.261, "step": 14998 }, { "epoch": 1.2150842514581983, "grad_norm": 0.045580729842185974, "learning_rate": 0.0001547189342454656, "loss": 0.3188, "step": 14999 }, { "epoch": 1.2151652624756968, "grad_norm": 0.041919250041246414, "learning_rate": 0.00015471443359287097, "loss": 0.2996, "step": 15000 }, { "epoch": 1.215246273493195, "grad_norm": 0.04074418172240257, "learning_rate": 0.00015470993294027633, "loss": 0.2698, "step": 15001 }, { "epoch": 1.2153272845106935, "grad_norm": 0.03675012290477753, "learning_rate": 0.00015470543228768172, "loss": 0.2639, "step": 15002 }, { "epoch": 1.2154082955281917, "grad_norm": 0.0448453314602375, "learning_rate": 0.0001547009316350871, "loss": 0.3048, "step": 15003 }, { "epoch": 1.2154893065456902, "grad_norm": 0.04736652597784996, "learning_rate": 0.00015469643098249246, "loss": 0.3033, "step": 15004 }, { "epoch": 1.2155703175631887, "grad_norm": 0.03896206244826317, "learning_rate": 0.00015469193032989785, "loss": 0.2953, "step": 15005 }, { "epoch": 1.215651328580687, "grad_norm": 0.03998485207557678, "learning_rate": 0.0001546874296773032, "loss": 0.2969, "step": 15006 }, { "epoch": 1.2157323395981854, "grad_norm": 0.040892038494348526, "learning_rate": 0.0001546829290247086, "loss": 0.3103, "step": 15007 }, { "epoch": 1.2158133506156839, "grad_norm": 0.04300196096301079, "learning_rate": 0.00015467842837211398, "loss": 0.2877, "step": 15008 }, { "epoch": 1.215894361633182, "grad_norm": 0.04934476315975189, "learning_rate": 0.00015467392771951934, "loss": 0.3347, "step": 15009 }, { "epoch": 1.2159753726506806, "grad_norm": 0.04549160599708557, "learning_rate": 0.0001546694270669247, "loss": 0.2961, "step": 15010 }, { "epoch": 1.2160563836681788, "grad_norm": 0.03903596103191376, "learning_rate": 0.0001546649264143301, "loss": 0.309, "step": 15011 }, { "epoch": 1.2161373946856773, "grad_norm": 0.0424087792634964, "learning_rate": 0.00015466042576173545, "loss": 0.3197, "step": 15012 }, { "epoch": 1.2162184057031755, "grad_norm": 0.037096478044986725, "learning_rate": 0.00015465592510914084, "loss": 0.29, "step": 15013 }, { "epoch": 1.216299416720674, "grad_norm": 0.034570056945085526, "learning_rate": 0.00015465142445654623, "loss": 0.2682, "step": 15014 }, { "epoch": 1.2163804277381725, "grad_norm": 0.038456372916698456, "learning_rate": 0.00015464692380395159, "loss": 0.3088, "step": 15015 }, { "epoch": 1.2164614387556707, "grad_norm": 0.044545628130435944, "learning_rate": 0.00015464242315135695, "loss": 0.3117, "step": 15016 }, { "epoch": 1.2165424497731692, "grad_norm": 0.043802566826343536, "learning_rate": 0.00015463792249876233, "loss": 0.3331, "step": 15017 }, { "epoch": 1.2166234607906676, "grad_norm": 0.04616328328847885, "learning_rate": 0.0001546334218461677, "loss": 0.3316, "step": 15018 }, { "epoch": 1.2167044718081659, "grad_norm": 0.040806081146001816, "learning_rate": 0.00015462892119357308, "loss": 0.2668, "step": 15019 }, { "epoch": 1.2167854828256643, "grad_norm": 0.05050874873995781, "learning_rate": 0.00015462442054097847, "loss": 0.3035, "step": 15020 }, { "epoch": 1.2168664938431626, "grad_norm": 0.04004824161529541, "learning_rate": 0.00015461991988838383, "loss": 0.3014, "step": 15021 }, { "epoch": 1.216947504860661, "grad_norm": 0.03522452712059021, "learning_rate": 0.0001546154192357892, "loss": 0.2926, "step": 15022 }, { "epoch": 1.2170285158781595, "grad_norm": 0.040806613862514496, "learning_rate": 0.00015461091858319457, "loss": 0.3207, "step": 15023 }, { "epoch": 1.2171095268956578, "grad_norm": 0.04380201920866966, "learning_rate": 0.00015460641793059993, "loss": 0.3041, "step": 15024 }, { "epoch": 1.2171905379131562, "grad_norm": 0.04264390841126442, "learning_rate": 0.00015460191727800532, "loss": 0.3032, "step": 15025 }, { "epoch": 1.2172715489306545, "grad_norm": 0.04169783741235733, "learning_rate": 0.0001545974166254107, "loss": 0.3072, "step": 15026 }, { "epoch": 1.217352559948153, "grad_norm": 0.0487448051571846, "learning_rate": 0.00015459291597281607, "loss": 0.2996, "step": 15027 }, { "epoch": 1.2174335709656514, "grad_norm": 0.0421229712665081, "learning_rate": 0.00015458841532022143, "loss": 0.2939, "step": 15028 }, { "epoch": 1.2175145819831497, "grad_norm": 0.041163280606269836, "learning_rate": 0.00015458391466762682, "loss": 0.3402, "step": 15029 }, { "epoch": 1.2175955930006481, "grad_norm": 0.03774140030145645, "learning_rate": 0.0001545794140150322, "loss": 0.2603, "step": 15030 }, { "epoch": 1.2176766040181466, "grad_norm": 0.03964897617697716, "learning_rate": 0.00015457491336243756, "loss": 0.2829, "step": 15031 }, { "epoch": 1.2177576150356448, "grad_norm": 0.05228666588664055, "learning_rate": 0.00015457041270984295, "loss": 0.3825, "step": 15032 }, { "epoch": 1.2178386260531433, "grad_norm": 0.03329094871878624, "learning_rate": 0.0001545659120572483, "loss": 0.2762, "step": 15033 }, { "epoch": 1.2179196370706415, "grad_norm": 0.041959576308727264, "learning_rate": 0.00015456141140465367, "loss": 0.3409, "step": 15034 }, { "epoch": 1.21800064808814, "grad_norm": 0.04155031964182854, "learning_rate": 0.00015455691075205906, "loss": 0.3188, "step": 15035 }, { "epoch": 1.2180816591056383, "grad_norm": 0.039347093552351, "learning_rate": 0.00015455241009946444, "loss": 0.3165, "step": 15036 }, { "epoch": 1.2181626701231367, "grad_norm": 0.039845991879701614, "learning_rate": 0.0001545479094468698, "loss": 0.2788, "step": 15037 }, { "epoch": 1.2182436811406352, "grad_norm": 0.04731302335858345, "learning_rate": 0.0001545434087942752, "loss": 0.2945, "step": 15038 }, { "epoch": 1.2183246921581334, "grad_norm": 0.04452233016490936, "learning_rate": 0.00015453890814168055, "loss": 0.314, "step": 15039 }, { "epoch": 1.218405703175632, "grad_norm": 0.04520030319690704, "learning_rate": 0.0001545344074890859, "loss": 0.3582, "step": 15040 }, { "epoch": 1.2184867141931304, "grad_norm": 0.04778093472123146, "learning_rate": 0.0001545299068364913, "loss": 0.3604, "step": 15041 }, { "epoch": 1.2185677252106286, "grad_norm": 0.04183390736579895, "learning_rate": 0.00015452540618389669, "loss": 0.3324, "step": 15042 }, { "epoch": 1.218648736228127, "grad_norm": 0.0479646660387516, "learning_rate": 0.00015452090553130205, "loss": 0.3547, "step": 15043 }, { "epoch": 1.2187297472456253, "grad_norm": 0.049034297466278076, "learning_rate": 0.00015451640487870743, "loss": 0.3318, "step": 15044 }, { "epoch": 1.2188107582631238, "grad_norm": 0.034791890531778336, "learning_rate": 0.0001545119042261128, "loss": 0.283, "step": 15045 }, { "epoch": 1.2188917692806223, "grad_norm": 0.04443153366446495, "learning_rate": 0.00015450740357351815, "loss": 0.2866, "step": 15046 }, { "epoch": 1.2189727802981205, "grad_norm": 0.04115704074501991, "learning_rate": 0.00015450290292092354, "loss": 0.2974, "step": 15047 }, { "epoch": 1.219053791315619, "grad_norm": 0.04250843822956085, "learning_rate": 0.00015449840226832893, "loss": 0.2979, "step": 15048 }, { "epoch": 1.2191348023331172, "grad_norm": 0.04619358107447624, "learning_rate": 0.0001544939016157343, "loss": 0.3257, "step": 15049 }, { "epoch": 1.2192158133506157, "grad_norm": 0.04598325490951538, "learning_rate": 0.00015448940096313968, "loss": 0.3024, "step": 15050 }, { "epoch": 1.2192968243681142, "grad_norm": 0.046086326241493225, "learning_rate": 0.00015448490031054504, "loss": 0.3025, "step": 15051 }, { "epoch": 1.2193778353856124, "grad_norm": 0.04900304973125458, "learning_rate": 0.0001544803996579504, "loss": 0.2745, "step": 15052 }, { "epoch": 1.2194588464031109, "grad_norm": 0.048376649618148804, "learning_rate": 0.00015447589900535578, "loss": 0.2921, "step": 15053 }, { "epoch": 1.219539857420609, "grad_norm": 0.04333197697997093, "learning_rate": 0.00015447139835276117, "loss": 0.3155, "step": 15054 }, { "epoch": 1.2196208684381076, "grad_norm": 0.042843107134103775, "learning_rate": 0.00015446689770016653, "loss": 0.3148, "step": 15055 }, { "epoch": 1.219701879455606, "grad_norm": 0.044335655868053436, "learning_rate": 0.00015446239704757192, "loss": 0.3293, "step": 15056 }, { "epoch": 1.2197828904731043, "grad_norm": 0.0423131063580513, "learning_rate": 0.00015445789639497728, "loss": 0.3143, "step": 15057 }, { "epoch": 1.2198639014906028, "grad_norm": 0.0378672294318676, "learning_rate": 0.00015445339574238264, "loss": 0.2685, "step": 15058 }, { "epoch": 1.219944912508101, "grad_norm": 0.04331289604306221, "learning_rate": 0.00015444889508978802, "loss": 0.3153, "step": 15059 }, { "epoch": 1.2200259235255995, "grad_norm": 0.03961778059601784, "learning_rate": 0.0001544443944371934, "loss": 0.2746, "step": 15060 }, { "epoch": 1.220106934543098, "grad_norm": 0.04394698515534401, "learning_rate": 0.00015443989378459877, "loss": 0.3073, "step": 15061 }, { "epoch": 1.2201879455605962, "grad_norm": 0.03818221762776375, "learning_rate": 0.00015443539313200416, "loss": 0.2914, "step": 15062 }, { "epoch": 1.2202689565780946, "grad_norm": 0.04286661371588707, "learning_rate": 0.00015443089247940952, "loss": 0.2921, "step": 15063 }, { "epoch": 1.220349967595593, "grad_norm": 0.04242391511797905, "learning_rate": 0.00015442639182681488, "loss": 0.2607, "step": 15064 }, { "epoch": 1.2204309786130914, "grad_norm": 0.04057682305574417, "learning_rate": 0.00015442189117422027, "loss": 0.3318, "step": 15065 }, { "epoch": 1.2205119896305898, "grad_norm": 0.0383298434317112, "learning_rate": 0.00015441739052162565, "loss": 0.2709, "step": 15066 }, { "epoch": 1.220593000648088, "grad_norm": 0.03954831883311272, "learning_rate": 0.000154412889869031, "loss": 0.2917, "step": 15067 }, { "epoch": 1.2206740116655865, "grad_norm": 0.03849297761917114, "learning_rate": 0.0001544083892164364, "loss": 0.2938, "step": 15068 }, { "epoch": 1.2207550226830848, "grad_norm": 0.04343443736433983, "learning_rate": 0.00015440388856384176, "loss": 0.2971, "step": 15069 }, { "epoch": 1.2208360337005832, "grad_norm": 0.04036508873105049, "learning_rate": 0.00015439938791124712, "loss": 0.3394, "step": 15070 }, { "epoch": 1.2209170447180817, "grad_norm": 0.03749840706586838, "learning_rate": 0.0001543948872586525, "loss": 0.2866, "step": 15071 }, { "epoch": 1.22099805573558, "grad_norm": 0.03948388993740082, "learning_rate": 0.0001543903866060579, "loss": 0.3344, "step": 15072 }, { "epoch": 1.2210790667530784, "grad_norm": 0.04332873970270157, "learning_rate": 0.00015438588595346325, "loss": 0.3372, "step": 15073 }, { "epoch": 1.221160077770577, "grad_norm": 0.032650191336870193, "learning_rate": 0.00015438138530086864, "loss": 0.2635, "step": 15074 }, { "epoch": 1.2212410887880751, "grad_norm": 0.04500601440668106, "learning_rate": 0.000154376884648274, "loss": 0.3153, "step": 15075 }, { "epoch": 1.2213220998055736, "grad_norm": 0.03901544585824013, "learning_rate": 0.00015437238399567936, "loss": 0.3147, "step": 15076 }, { "epoch": 1.2214031108230718, "grad_norm": 0.04170721769332886, "learning_rate": 0.00015436788334308478, "loss": 0.2852, "step": 15077 }, { "epoch": 1.2214841218405703, "grad_norm": 0.04137682914733887, "learning_rate": 0.00015436338269049014, "loss": 0.2999, "step": 15078 }, { "epoch": 1.2215651328580688, "grad_norm": 0.045757956802845, "learning_rate": 0.0001543588820378955, "loss": 0.3354, "step": 15079 }, { "epoch": 1.221646143875567, "grad_norm": 0.04264063015580177, "learning_rate": 0.00015435438138530088, "loss": 0.2822, "step": 15080 }, { "epoch": 1.2217271548930655, "grad_norm": 0.04429285600781441, "learning_rate": 0.00015434988073270624, "loss": 0.3438, "step": 15081 }, { "epoch": 1.2218081659105637, "grad_norm": 0.038446854799985886, "learning_rate": 0.00015434538008011163, "loss": 0.2803, "step": 15082 }, { "epoch": 1.2218891769280622, "grad_norm": 0.0383138470351696, "learning_rate": 0.00015434087942751702, "loss": 0.2487, "step": 15083 }, { "epoch": 1.2219701879455607, "grad_norm": 0.04340263456106186, "learning_rate": 0.00015433637877492238, "loss": 0.3308, "step": 15084 }, { "epoch": 1.222051198963059, "grad_norm": 0.04444803297519684, "learning_rate": 0.00015433187812232774, "loss": 0.3341, "step": 15085 }, { "epoch": 1.2221322099805574, "grad_norm": 0.04100262001156807, "learning_rate": 0.00015432737746973312, "loss": 0.2889, "step": 15086 }, { "epoch": 1.2222132209980558, "grad_norm": 0.04645245522260666, "learning_rate": 0.00015432287681713849, "loss": 0.3037, "step": 15087 }, { "epoch": 1.222294232015554, "grad_norm": 0.0426318496465683, "learning_rate": 0.00015431837616454387, "loss": 0.3033, "step": 15088 }, { "epoch": 1.2223752430330526, "grad_norm": 0.0452832467854023, "learning_rate": 0.00015431387551194926, "loss": 0.3108, "step": 15089 }, { "epoch": 1.2224562540505508, "grad_norm": 0.038874559104442596, "learning_rate": 0.00015430937485935462, "loss": 0.3092, "step": 15090 }, { "epoch": 1.2225372650680493, "grad_norm": 0.04850398376584053, "learning_rate": 0.00015430487420675998, "loss": 0.3435, "step": 15091 }, { "epoch": 1.2226182760855475, "grad_norm": 0.04427666589617729, "learning_rate": 0.00015430037355416537, "loss": 0.3104, "step": 15092 }, { "epoch": 1.222699287103046, "grad_norm": 0.045927468687295914, "learning_rate": 0.00015429587290157073, "loss": 0.3275, "step": 15093 }, { "epoch": 1.2227802981205445, "grad_norm": 0.04388774186372757, "learning_rate": 0.00015429137224897611, "loss": 0.2877, "step": 15094 }, { "epoch": 1.2228613091380427, "grad_norm": 0.040757108479738235, "learning_rate": 0.0001542868715963815, "loss": 0.2963, "step": 15095 }, { "epoch": 1.2229423201555412, "grad_norm": 0.041438765823841095, "learning_rate": 0.00015428237094378686, "loss": 0.2913, "step": 15096 }, { "epoch": 1.2230233311730396, "grad_norm": 0.03743297979235649, "learning_rate": 0.00015427787029119222, "loss": 0.3281, "step": 15097 }, { "epoch": 1.2231043421905379, "grad_norm": 0.048839520663022995, "learning_rate": 0.0001542733696385976, "loss": 0.2851, "step": 15098 }, { "epoch": 1.2231853532080363, "grad_norm": 0.04056665673851967, "learning_rate": 0.00015426886898600297, "loss": 0.2745, "step": 15099 }, { "epoch": 1.2232663642255346, "grad_norm": 0.04060543701052666, "learning_rate": 0.00015426436833340836, "loss": 0.3062, "step": 15100 }, { "epoch": 1.223347375243033, "grad_norm": 0.04008857533335686, "learning_rate": 0.00015425986768081374, "loss": 0.3009, "step": 15101 }, { "epoch": 1.2234283862605315, "grad_norm": 0.04103999212384224, "learning_rate": 0.0001542553670282191, "loss": 0.3311, "step": 15102 }, { "epoch": 1.2235093972780298, "grad_norm": 0.047354403883218765, "learning_rate": 0.00015425086637562446, "loss": 0.3822, "step": 15103 }, { "epoch": 1.2235904082955282, "grad_norm": 0.045010678470134735, "learning_rate": 0.00015424636572302985, "loss": 0.3392, "step": 15104 }, { "epoch": 1.2236714193130265, "grad_norm": 0.04420321062207222, "learning_rate": 0.0001542418650704352, "loss": 0.3222, "step": 15105 }, { "epoch": 1.223752430330525, "grad_norm": 0.04299933463335037, "learning_rate": 0.0001542373644178406, "loss": 0.3307, "step": 15106 }, { "epoch": 1.2238334413480234, "grad_norm": 0.0378875657916069, "learning_rate": 0.00015423286376524598, "loss": 0.2635, "step": 15107 }, { "epoch": 1.2239144523655217, "grad_norm": 0.04010816290974617, "learning_rate": 0.00015422836311265134, "loss": 0.2729, "step": 15108 }, { "epoch": 1.2239954633830201, "grad_norm": 0.04282679781317711, "learning_rate": 0.0001542238624600567, "loss": 0.3142, "step": 15109 }, { "epoch": 1.2240764744005186, "grad_norm": 0.04327215999364853, "learning_rate": 0.0001542193618074621, "loss": 0.2783, "step": 15110 }, { "epoch": 1.2241574854180168, "grad_norm": 0.04052073508501053, "learning_rate": 0.00015421486115486748, "loss": 0.2601, "step": 15111 }, { "epoch": 1.2242384964355153, "grad_norm": 0.049192775040864944, "learning_rate": 0.00015421036050227284, "loss": 0.3162, "step": 15112 }, { "epoch": 1.2243195074530135, "grad_norm": 0.041703008115291595, "learning_rate": 0.00015420585984967823, "loss": 0.3364, "step": 15113 }, { "epoch": 1.224400518470512, "grad_norm": 0.04128837585449219, "learning_rate": 0.00015420135919708359, "loss": 0.3077, "step": 15114 }, { "epoch": 1.2244815294880103, "grad_norm": 0.0411527045071125, "learning_rate": 0.00015419685854448895, "loss": 0.2942, "step": 15115 }, { "epoch": 1.2245625405055087, "grad_norm": 0.03827161341905594, "learning_rate": 0.00015419235789189433, "loss": 0.2545, "step": 15116 }, { "epoch": 1.2246435515230072, "grad_norm": 0.04548992961645126, "learning_rate": 0.00015418785723929972, "loss": 0.3359, "step": 15117 }, { "epoch": 1.2247245625405054, "grad_norm": 0.043934501707553864, "learning_rate": 0.00015418335658670508, "loss": 0.295, "step": 15118 }, { "epoch": 1.224805573558004, "grad_norm": 0.04573789983987808, "learning_rate": 0.00015417885593411047, "loss": 0.3106, "step": 15119 }, { "epoch": 1.2248865845755024, "grad_norm": 0.040008544921875, "learning_rate": 0.00015417435528151583, "loss": 0.3301, "step": 15120 }, { "epoch": 1.2249675955930006, "grad_norm": 0.04708806052803993, "learning_rate": 0.0001541698546289212, "loss": 0.3116, "step": 15121 }, { "epoch": 1.225048606610499, "grad_norm": 0.044105127453804016, "learning_rate": 0.00015416535397632657, "loss": 0.3214, "step": 15122 }, { "epoch": 1.2251296176279973, "grad_norm": 0.04013490676879883, "learning_rate": 0.00015416085332373196, "loss": 0.268, "step": 15123 }, { "epoch": 1.2252106286454958, "grad_norm": 0.0509316511452198, "learning_rate": 0.00015415635267113732, "loss": 0.3329, "step": 15124 }, { "epoch": 1.2252916396629943, "grad_norm": 0.04100751504302025, "learning_rate": 0.0001541518520185427, "loss": 0.3061, "step": 15125 }, { "epoch": 1.2253726506804925, "grad_norm": 0.03576541692018509, "learning_rate": 0.00015414735136594807, "loss": 0.2857, "step": 15126 }, { "epoch": 1.225453661697991, "grad_norm": 0.03365354984998703, "learning_rate": 0.00015414285071335343, "loss": 0.2441, "step": 15127 }, { "epoch": 1.2255346727154892, "grad_norm": 0.04429009184241295, "learning_rate": 0.00015413835006075882, "loss": 0.289, "step": 15128 }, { "epoch": 1.2256156837329877, "grad_norm": 0.04027354344725609, "learning_rate": 0.0001541338494081642, "loss": 0.2798, "step": 15129 }, { "epoch": 1.2256966947504861, "grad_norm": 0.046282071620225906, "learning_rate": 0.00015412934875556956, "loss": 0.3011, "step": 15130 }, { "epoch": 1.2257777057679844, "grad_norm": 0.04794919118285179, "learning_rate": 0.00015412484810297495, "loss": 0.3395, "step": 15131 }, { "epoch": 1.2258587167854829, "grad_norm": 0.04859710484743118, "learning_rate": 0.0001541203474503803, "loss": 0.3479, "step": 15132 }, { "epoch": 1.2259397278029813, "grad_norm": 0.043037887662649155, "learning_rate": 0.00015411584679778567, "loss": 0.2922, "step": 15133 }, { "epoch": 1.2260207388204796, "grad_norm": 0.039615124464035034, "learning_rate": 0.00015411134614519106, "loss": 0.3031, "step": 15134 }, { "epoch": 1.226101749837978, "grad_norm": 0.04278057813644409, "learning_rate": 0.00015410684549259645, "loss": 0.3131, "step": 15135 }, { "epoch": 1.2261827608554763, "grad_norm": 0.0393165685236454, "learning_rate": 0.0001541023448400018, "loss": 0.3058, "step": 15136 }, { "epoch": 1.2262637718729748, "grad_norm": 0.044020235538482666, "learning_rate": 0.0001540978441874072, "loss": 0.3091, "step": 15137 }, { "epoch": 1.226344782890473, "grad_norm": 0.042965419590473175, "learning_rate": 0.00015409334353481255, "loss": 0.3214, "step": 15138 }, { "epoch": 1.2264257939079715, "grad_norm": 0.03816758468747139, "learning_rate": 0.0001540888428822179, "loss": 0.2711, "step": 15139 }, { "epoch": 1.22650680492547, "grad_norm": 0.03608936816453934, "learning_rate": 0.0001540843422296233, "loss": 0.3091, "step": 15140 }, { "epoch": 1.2265878159429682, "grad_norm": 0.0426860935986042, "learning_rate": 0.0001540798415770287, "loss": 0.3157, "step": 15141 }, { "epoch": 1.2266688269604666, "grad_norm": 0.04165487736463547, "learning_rate": 0.00015407534092443405, "loss": 0.3022, "step": 15142 }, { "epoch": 1.226749837977965, "grad_norm": 0.04956432059407234, "learning_rate": 0.00015407084027183943, "loss": 0.2851, "step": 15143 }, { "epoch": 1.2268308489954634, "grad_norm": 0.03930883854627609, "learning_rate": 0.0001540663396192448, "loss": 0.304, "step": 15144 }, { "epoch": 1.2269118600129618, "grad_norm": 0.0448654443025589, "learning_rate": 0.00015406183896665015, "loss": 0.3332, "step": 15145 }, { "epoch": 1.22699287103046, "grad_norm": 0.03653280436992645, "learning_rate": 0.00015405733831405557, "loss": 0.3023, "step": 15146 }, { "epoch": 1.2270738820479585, "grad_norm": 0.04919267073273659, "learning_rate": 0.00015405283766146093, "loss": 0.3538, "step": 15147 }, { "epoch": 1.227154893065457, "grad_norm": 0.04806479811668396, "learning_rate": 0.0001540483370088663, "loss": 0.3123, "step": 15148 }, { "epoch": 1.2272359040829552, "grad_norm": 0.048344314098358154, "learning_rate": 0.00015404383635627168, "loss": 0.3102, "step": 15149 }, { "epoch": 1.2273169151004537, "grad_norm": 0.04515837877988815, "learning_rate": 0.00015403933570367704, "loss": 0.312, "step": 15150 }, { "epoch": 1.227397926117952, "grad_norm": 0.04120798036456108, "learning_rate": 0.0001540348350510824, "loss": 0.3222, "step": 15151 }, { "epoch": 1.2274789371354504, "grad_norm": 0.0508146807551384, "learning_rate": 0.0001540303343984878, "loss": 0.3242, "step": 15152 }, { "epoch": 1.2275599481529489, "grad_norm": 0.04866446927189827, "learning_rate": 0.00015402583374589317, "loss": 0.3112, "step": 15153 }, { "epoch": 1.2276409591704471, "grad_norm": 0.04131367430090904, "learning_rate": 0.00015402133309329853, "loss": 0.3098, "step": 15154 }, { "epoch": 1.2277219701879456, "grad_norm": 0.04007503390312195, "learning_rate": 0.00015401683244070392, "loss": 0.3208, "step": 15155 }, { "epoch": 1.2278029812054438, "grad_norm": 0.04036682844161987, "learning_rate": 0.00015401233178810928, "loss": 0.2761, "step": 15156 }, { "epoch": 1.2278839922229423, "grad_norm": 0.04943064972758293, "learning_rate": 0.00015400783113551464, "loss": 0.3039, "step": 15157 }, { "epoch": 1.2279650032404408, "grad_norm": 0.03702479973435402, "learning_rate": 0.00015400333048292005, "loss": 0.3129, "step": 15158 }, { "epoch": 1.228046014257939, "grad_norm": 0.04905194044113159, "learning_rate": 0.0001539988298303254, "loss": 0.2974, "step": 15159 }, { "epoch": 1.2281270252754375, "grad_norm": 0.04597773775458336, "learning_rate": 0.00015399432917773077, "loss": 0.3339, "step": 15160 }, { "epoch": 1.2282080362929357, "grad_norm": 0.03559258207678795, "learning_rate": 0.00015398982852513616, "loss": 0.2707, "step": 15161 }, { "epoch": 1.2282890473104342, "grad_norm": 0.039929188787937164, "learning_rate": 0.00015398532787254152, "loss": 0.288, "step": 15162 }, { "epoch": 1.2283700583279327, "grad_norm": 0.04652782529592514, "learning_rate": 0.0001539808272199469, "loss": 0.3275, "step": 15163 }, { "epoch": 1.228451069345431, "grad_norm": 0.03702322393655777, "learning_rate": 0.0001539763265673523, "loss": 0.2802, "step": 15164 }, { "epoch": 1.2285320803629294, "grad_norm": 0.04209384322166443, "learning_rate": 0.00015397182591475765, "loss": 0.3511, "step": 15165 }, { "epoch": 1.2286130913804278, "grad_norm": 0.04149395599961281, "learning_rate": 0.000153967325262163, "loss": 0.3299, "step": 15166 }, { "epoch": 1.228694102397926, "grad_norm": 0.046072304248809814, "learning_rate": 0.0001539628246095684, "loss": 0.3295, "step": 15167 }, { "epoch": 1.2287751134154246, "grad_norm": 0.045435693114995956, "learning_rate": 0.00015395832395697376, "loss": 0.3289, "step": 15168 }, { "epoch": 1.2288561244329228, "grad_norm": 0.039649687707424164, "learning_rate": 0.00015395382330437915, "loss": 0.2727, "step": 15169 }, { "epoch": 1.2289371354504213, "grad_norm": 0.049699172377586365, "learning_rate": 0.00015394932265178453, "loss": 0.3213, "step": 15170 }, { "epoch": 1.2290181464679195, "grad_norm": 0.04703816771507263, "learning_rate": 0.0001539448219991899, "loss": 0.3281, "step": 15171 }, { "epoch": 1.229099157485418, "grad_norm": 0.04082157090306282, "learning_rate": 0.00015394032134659525, "loss": 0.2701, "step": 15172 }, { "epoch": 1.2291801685029164, "grad_norm": 0.03887148201465607, "learning_rate": 0.00015393582069400064, "loss": 0.308, "step": 15173 }, { "epoch": 1.2292611795204147, "grad_norm": 0.041878700256347656, "learning_rate": 0.000153931320041406, "loss": 0.3172, "step": 15174 }, { "epoch": 1.2293421905379132, "grad_norm": 0.03661678358912468, "learning_rate": 0.0001539268193888114, "loss": 0.2712, "step": 15175 }, { "epoch": 1.2294232015554116, "grad_norm": 0.05468657985329628, "learning_rate": 0.00015392231873621678, "loss": 0.3144, "step": 15176 }, { "epoch": 1.2295042125729099, "grad_norm": 0.03974331542849541, "learning_rate": 0.00015391781808362214, "loss": 0.3124, "step": 15177 }, { "epoch": 1.2295852235904083, "grad_norm": 0.04192359372973442, "learning_rate": 0.0001539133174310275, "loss": 0.3183, "step": 15178 }, { "epoch": 1.2296662346079066, "grad_norm": 0.04826575890183449, "learning_rate": 0.00015390881677843288, "loss": 0.3606, "step": 15179 }, { "epoch": 1.229747245625405, "grad_norm": 0.04235263168811798, "learning_rate": 0.00015390431612583824, "loss": 0.3161, "step": 15180 }, { "epoch": 1.2298282566429035, "grad_norm": 0.04713137820363045, "learning_rate": 0.00015389981547324363, "loss": 0.3756, "step": 15181 }, { "epoch": 1.2299092676604018, "grad_norm": 0.037279751151800156, "learning_rate": 0.00015389531482064902, "loss": 0.294, "step": 15182 }, { "epoch": 1.2299902786779002, "grad_norm": 0.04957104101777077, "learning_rate": 0.00015389081416805438, "loss": 0.2958, "step": 15183 }, { "epoch": 1.2300712896953985, "grad_norm": 0.04053379222750664, "learning_rate": 0.00015388631351545974, "loss": 0.3146, "step": 15184 }, { "epoch": 1.230152300712897, "grad_norm": 0.04531674087047577, "learning_rate": 0.00015388181286286513, "loss": 0.3101, "step": 15185 }, { "epoch": 1.2302333117303954, "grad_norm": 0.03993414342403412, "learning_rate": 0.00015387731221027049, "loss": 0.2923, "step": 15186 }, { "epoch": 1.2303143227478937, "grad_norm": 0.04393785446882248, "learning_rate": 0.00015387281155767587, "loss": 0.3202, "step": 15187 }, { "epoch": 1.2303953337653921, "grad_norm": 0.03797182813286781, "learning_rate": 0.00015386831090508126, "loss": 0.2721, "step": 15188 }, { "epoch": 1.2304763447828906, "grad_norm": 0.040326736867427826, "learning_rate": 0.00015386381025248662, "loss": 0.3051, "step": 15189 }, { "epoch": 1.2305573558003888, "grad_norm": 0.033600758761167526, "learning_rate": 0.00015385930959989198, "loss": 0.2619, "step": 15190 }, { "epoch": 1.2306383668178873, "grad_norm": 0.04964074492454529, "learning_rate": 0.00015385480894729737, "loss": 0.3344, "step": 15191 }, { "epoch": 1.2307193778353855, "grad_norm": 0.0473080575466156, "learning_rate": 0.00015385030829470275, "loss": 0.3022, "step": 15192 }, { "epoch": 1.230800388852884, "grad_norm": 0.0415453277528286, "learning_rate": 0.00015384580764210811, "loss": 0.3057, "step": 15193 }, { "epoch": 1.2308813998703823, "grad_norm": 0.040875665843486786, "learning_rate": 0.0001538413069895135, "loss": 0.307, "step": 15194 }, { "epoch": 1.2309624108878807, "grad_norm": 0.04135308414697647, "learning_rate": 0.00015383680633691886, "loss": 0.3161, "step": 15195 }, { "epoch": 1.2310434219053792, "grad_norm": 0.04142339527606964, "learning_rate": 0.00015383230568432422, "loss": 0.2891, "step": 15196 }, { "epoch": 1.2311244329228774, "grad_norm": 0.04539525508880615, "learning_rate": 0.0001538278050317296, "loss": 0.3082, "step": 15197 }, { "epoch": 1.231205443940376, "grad_norm": 0.044109832495450974, "learning_rate": 0.000153823304379135, "loss": 0.3335, "step": 15198 }, { "epoch": 1.2312864549578744, "grad_norm": 0.046666741371154785, "learning_rate": 0.00015381880372654036, "loss": 0.3194, "step": 15199 }, { "epoch": 1.2313674659753726, "grad_norm": 0.0451471321284771, "learning_rate": 0.00015381430307394574, "loss": 0.3056, "step": 15200 }, { "epoch": 1.231448476992871, "grad_norm": 0.042114224284887314, "learning_rate": 0.0001538098024213511, "loss": 0.3196, "step": 15201 }, { "epoch": 1.2315294880103693, "grad_norm": 0.044611066579818726, "learning_rate": 0.00015380530176875646, "loss": 0.2855, "step": 15202 }, { "epoch": 1.2316104990278678, "grad_norm": 0.04717979580163956, "learning_rate": 0.00015380080111616185, "loss": 0.3026, "step": 15203 }, { "epoch": 1.2316915100453663, "grad_norm": 0.04977048188447952, "learning_rate": 0.00015379630046356724, "loss": 0.3032, "step": 15204 }, { "epoch": 1.2317725210628645, "grad_norm": 0.0431169793009758, "learning_rate": 0.0001537917998109726, "loss": 0.2813, "step": 15205 }, { "epoch": 1.231853532080363, "grad_norm": 0.04329026862978935, "learning_rate": 0.00015378729915837798, "loss": 0.3119, "step": 15206 }, { "epoch": 1.2319345430978612, "grad_norm": 0.046638138592243195, "learning_rate": 0.00015378279850578334, "loss": 0.3054, "step": 15207 }, { "epoch": 1.2320155541153597, "grad_norm": 0.03704584017395973, "learning_rate": 0.0001537782978531887, "loss": 0.2952, "step": 15208 }, { "epoch": 1.2320965651328581, "grad_norm": 0.04100892320275307, "learning_rate": 0.0001537737972005941, "loss": 0.2741, "step": 15209 }, { "epoch": 1.2321775761503564, "grad_norm": 0.04119610786437988, "learning_rate": 0.00015376929654799948, "loss": 0.2946, "step": 15210 }, { "epoch": 1.2322585871678549, "grad_norm": 0.041077807545661926, "learning_rate": 0.00015376479589540484, "loss": 0.3135, "step": 15211 }, { "epoch": 1.2323395981853533, "grad_norm": 0.043649882078170776, "learning_rate": 0.00015376029524281023, "loss": 0.3032, "step": 15212 }, { "epoch": 1.2324206092028516, "grad_norm": 0.04387475550174713, "learning_rate": 0.00015375579459021559, "loss": 0.2948, "step": 15213 }, { "epoch": 1.23250162022035, "grad_norm": 0.04019913822412491, "learning_rate": 0.00015375129393762095, "loss": 0.305, "step": 15214 }, { "epoch": 1.2325826312378483, "grad_norm": 0.05033343657851219, "learning_rate": 0.00015374679328502636, "loss": 0.3652, "step": 15215 }, { "epoch": 1.2326636422553467, "grad_norm": 0.0421714186668396, "learning_rate": 0.00015374229263243172, "loss": 0.3317, "step": 15216 }, { "epoch": 1.232744653272845, "grad_norm": 0.043096888810396194, "learning_rate": 0.00015373779197983708, "loss": 0.3201, "step": 15217 }, { "epoch": 1.2328256642903435, "grad_norm": 0.04364513233304024, "learning_rate": 0.00015373329132724247, "loss": 0.2738, "step": 15218 }, { "epoch": 1.232906675307842, "grad_norm": 0.04265977814793587, "learning_rate": 0.00015372879067464783, "loss": 0.3075, "step": 15219 }, { "epoch": 1.2329876863253402, "grad_norm": 0.041783396154642105, "learning_rate": 0.0001537242900220532, "loss": 0.3063, "step": 15220 }, { "epoch": 1.2330686973428386, "grad_norm": 0.04352150484919548, "learning_rate": 0.0001537197893694586, "loss": 0.3298, "step": 15221 }, { "epoch": 1.233149708360337, "grad_norm": 0.039487238973379135, "learning_rate": 0.00015371528871686396, "loss": 0.2893, "step": 15222 }, { "epoch": 1.2332307193778353, "grad_norm": 0.0369989238679409, "learning_rate": 0.00015371078806426932, "loss": 0.2741, "step": 15223 }, { "epoch": 1.2333117303953338, "grad_norm": 0.04650986194610596, "learning_rate": 0.0001537062874116747, "loss": 0.3306, "step": 15224 }, { "epoch": 1.233392741412832, "grad_norm": 0.03859679773449898, "learning_rate": 0.00015370178675908007, "loss": 0.2607, "step": 15225 }, { "epoch": 1.2334737524303305, "grad_norm": 0.0406336709856987, "learning_rate": 0.00015369728610648543, "loss": 0.3277, "step": 15226 }, { "epoch": 1.233554763447829, "grad_norm": 0.046244632452726364, "learning_rate": 0.00015369278545389084, "loss": 0.3064, "step": 15227 }, { "epoch": 1.2336357744653272, "grad_norm": 0.04249390587210655, "learning_rate": 0.0001536882848012962, "loss": 0.3184, "step": 15228 }, { "epoch": 1.2337167854828257, "grad_norm": 0.04118916392326355, "learning_rate": 0.00015368378414870156, "loss": 0.2811, "step": 15229 }, { "epoch": 1.233797796500324, "grad_norm": 0.05073089152574539, "learning_rate": 0.00015367928349610695, "loss": 0.3123, "step": 15230 }, { "epoch": 1.2338788075178224, "grad_norm": 0.039565760642290115, "learning_rate": 0.0001536747828435123, "loss": 0.317, "step": 15231 }, { "epoch": 1.2339598185353209, "grad_norm": 0.043358445167541504, "learning_rate": 0.00015367028219091767, "loss": 0.3027, "step": 15232 }, { "epoch": 1.2340408295528191, "grad_norm": 0.041212573647499084, "learning_rate": 0.00015366578153832309, "loss": 0.3226, "step": 15233 }, { "epoch": 1.2341218405703176, "grad_norm": 0.04205805063247681, "learning_rate": 0.00015366128088572845, "loss": 0.3027, "step": 15234 }, { "epoch": 1.234202851587816, "grad_norm": 0.04577530920505524, "learning_rate": 0.0001536567802331338, "loss": 0.3123, "step": 15235 }, { "epoch": 1.2342838626053143, "grad_norm": 0.04439268633723259, "learning_rate": 0.0001536522795805392, "loss": 0.3067, "step": 15236 }, { "epoch": 1.2343648736228128, "grad_norm": 0.04194721207022667, "learning_rate": 0.00015364777892794455, "loss": 0.3329, "step": 15237 }, { "epoch": 1.234445884640311, "grad_norm": 0.040099386125802994, "learning_rate": 0.0001536432782753499, "loss": 0.3056, "step": 15238 }, { "epoch": 1.2345268956578095, "grad_norm": 0.04326711222529411, "learning_rate": 0.00015363877762275533, "loss": 0.3409, "step": 15239 }, { "epoch": 1.2346079066753077, "grad_norm": 0.03979482874274254, "learning_rate": 0.0001536342769701607, "loss": 0.3006, "step": 15240 }, { "epoch": 1.2346889176928062, "grad_norm": 0.05187808722257614, "learning_rate": 0.00015362977631756605, "loss": 0.3089, "step": 15241 }, { "epoch": 1.2347699287103047, "grad_norm": 0.05291687324643135, "learning_rate": 0.00015362527566497143, "loss": 0.3692, "step": 15242 }, { "epoch": 1.234850939727803, "grad_norm": 0.03968328982591629, "learning_rate": 0.0001536207750123768, "loss": 0.2979, "step": 15243 }, { "epoch": 1.2349319507453014, "grad_norm": 0.03818349540233612, "learning_rate": 0.00015361627435978218, "loss": 0.3323, "step": 15244 }, { "epoch": 1.2350129617627998, "grad_norm": 0.04607069119811058, "learning_rate": 0.00015361177370718757, "loss": 0.3302, "step": 15245 }, { "epoch": 1.235093972780298, "grad_norm": 0.04158300533890724, "learning_rate": 0.00015360727305459293, "loss": 0.2774, "step": 15246 }, { "epoch": 1.2351749837977966, "grad_norm": 0.03845590353012085, "learning_rate": 0.0001536027724019983, "loss": 0.3039, "step": 15247 }, { "epoch": 1.2352559948152948, "grad_norm": 0.05031782016158104, "learning_rate": 0.00015359827174940368, "loss": 0.2619, "step": 15248 }, { "epoch": 1.2353370058327933, "grad_norm": 0.036526355892419815, "learning_rate": 0.00015359377109680904, "loss": 0.2565, "step": 15249 }, { "epoch": 1.2354180168502917, "grad_norm": 0.04311860725283623, "learning_rate": 0.00015358927044421442, "loss": 0.2894, "step": 15250 }, { "epoch": 1.23549902786779, "grad_norm": 0.04755273088812828, "learning_rate": 0.0001535847697916198, "loss": 0.3003, "step": 15251 }, { "epoch": 1.2355800388852884, "grad_norm": 0.044981442391872406, "learning_rate": 0.00015358026913902517, "loss": 0.3073, "step": 15252 }, { "epoch": 1.2356610499027867, "grad_norm": 0.051071830093860626, "learning_rate": 0.00015357576848643053, "loss": 0.3178, "step": 15253 }, { "epoch": 1.2357420609202852, "grad_norm": 0.040591999888420105, "learning_rate": 0.00015357126783383592, "loss": 0.2578, "step": 15254 }, { "epoch": 1.2358230719377836, "grad_norm": 0.03376752510666847, "learning_rate": 0.00015356676718124128, "loss": 0.2572, "step": 15255 }, { "epoch": 1.2359040829552819, "grad_norm": 0.04118896648287773, "learning_rate": 0.00015356226652864666, "loss": 0.2724, "step": 15256 }, { "epoch": 1.2359850939727803, "grad_norm": 0.04479838162660599, "learning_rate": 0.00015355776587605205, "loss": 0.2559, "step": 15257 }, { "epoch": 1.2360661049902788, "grad_norm": 0.047844868153333664, "learning_rate": 0.0001535532652234574, "loss": 0.3781, "step": 15258 }, { "epoch": 1.236147116007777, "grad_norm": 0.043366435915231705, "learning_rate": 0.00015354876457086277, "loss": 0.294, "step": 15259 }, { "epoch": 1.2362281270252755, "grad_norm": 0.0474378801882267, "learning_rate": 0.00015354426391826816, "loss": 0.2816, "step": 15260 }, { "epoch": 1.2363091380427738, "grad_norm": 0.05177191272377968, "learning_rate": 0.00015353976326567352, "loss": 0.2862, "step": 15261 }, { "epoch": 1.2363901490602722, "grad_norm": 0.04230916500091553, "learning_rate": 0.0001535352626130789, "loss": 0.3047, "step": 15262 }, { "epoch": 1.2364711600777705, "grad_norm": 0.0430777408182621, "learning_rate": 0.0001535307619604843, "loss": 0.2985, "step": 15263 }, { "epoch": 1.236552171095269, "grad_norm": 0.04171663895249367, "learning_rate": 0.00015352626130788965, "loss": 0.3184, "step": 15264 }, { "epoch": 1.2366331821127674, "grad_norm": 0.040578003972768784, "learning_rate": 0.000153521760655295, "loss": 0.2826, "step": 15265 }, { "epoch": 1.2367141931302656, "grad_norm": 0.0430280901491642, "learning_rate": 0.0001535172600027004, "loss": 0.3106, "step": 15266 }, { "epoch": 1.2367952041477641, "grad_norm": 0.04540487751364708, "learning_rate": 0.0001535127593501058, "loss": 0.3408, "step": 15267 }, { "epoch": 1.2368762151652626, "grad_norm": 0.03872782737016678, "learning_rate": 0.00015350825869751115, "loss": 0.2879, "step": 15268 }, { "epoch": 1.2369572261827608, "grad_norm": 0.041879042983055115, "learning_rate": 0.00015350375804491654, "loss": 0.3497, "step": 15269 }, { "epoch": 1.2370382372002593, "grad_norm": 0.04300076141953468, "learning_rate": 0.0001534992573923219, "loss": 0.3395, "step": 15270 }, { "epoch": 1.2371192482177575, "grad_norm": 0.03565739467740059, "learning_rate": 0.00015349475673972726, "loss": 0.2584, "step": 15271 }, { "epoch": 1.237200259235256, "grad_norm": 0.048258304595947266, "learning_rate": 0.00015349025608713264, "loss": 0.3012, "step": 15272 }, { "epoch": 1.2372812702527543, "grad_norm": 0.03877298906445503, "learning_rate": 0.00015348575543453803, "loss": 0.2592, "step": 15273 }, { "epoch": 1.2373622812702527, "grad_norm": 0.04685317352414131, "learning_rate": 0.0001534812547819434, "loss": 0.3059, "step": 15274 }, { "epoch": 1.2374432922877512, "grad_norm": 0.038681793957948685, "learning_rate": 0.00015347675412934878, "loss": 0.2573, "step": 15275 }, { "epoch": 1.2375243033052494, "grad_norm": 0.04133173078298569, "learning_rate": 0.00015347225347675414, "loss": 0.2834, "step": 15276 }, { "epoch": 1.237605314322748, "grad_norm": 0.04690474644303322, "learning_rate": 0.0001534677528241595, "loss": 0.3354, "step": 15277 }, { "epoch": 1.2376863253402464, "grad_norm": 0.04486876353621483, "learning_rate": 0.00015346325217156488, "loss": 0.3612, "step": 15278 }, { "epoch": 1.2377673363577446, "grad_norm": 0.04361611232161522, "learning_rate": 0.00015345875151897027, "loss": 0.3043, "step": 15279 }, { "epoch": 1.237848347375243, "grad_norm": 0.041666265577077866, "learning_rate": 0.00015345425086637563, "loss": 0.2907, "step": 15280 }, { "epoch": 1.2379293583927413, "grad_norm": 0.04613722488284111, "learning_rate": 0.00015344975021378102, "loss": 0.3166, "step": 15281 }, { "epoch": 1.2380103694102398, "grad_norm": 0.04196156933903694, "learning_rate": 0.00015344524956118638, "loss": 0.2955, "step": 15282 }, { "epoch": 1.2380913804277383, "grad_norm": 0.045780882239341736, "learning_rate": 0.00015344074890859174, "loss": 0.3262, "step": 15283 }, { "epoch": 1.2381723914452365, "grad_norm": 0.049061235040426254, "learning_rate": 0.00015343624825599713, "loss": 0.3049, "step": 15284 }, { "epoch": 1.238253402462735, "grad_norm": 0.044471532106399536, "learning_rate": 0.0001534317476034025, "loss": 0.2768, "step": 15285 }, { "epoch": 1.2383344134802332, "grad_norm": 0.04229315370321274, "learning_rate": 0.00015342724695080787, "loss": 0.299, "step": 15286 }, { "epoch": 1.2384154244977317, "grad_norm": 0.03775394335389137, "learning_rate": 0.00015342274629821326, "loss": 0.3116, "step": 15287 }, { "epoch": 1.2384964355152301, "grad_norm": 0.04446544870734215, "learning_rate": 0.00015341824564561862, "loss": 0.2938, "step": 15288 }, { "epoch": 1.2385774465327284, "grad_norm": 0.0390334390103817, "learning_rate": 0.00015341374499302398, "loss": 0.2985, "step": 15289 }, { "epoch": 1.2386584575502269, "grad_norm": 0.046083953231573105, "learning_rate": 0.00015340924434042937, "loss": 0.3355, "step": 15290 }, { "epoch": 1.2387394685677253, "grad_norm": 0.04037284478545189, "learning_rate": 0.00015340474368783475, "loss": 0.3072, "step": 15291 }, { "epoch": 1.2388204795852236, "grad_norm": 0.04536395147442818, "learning_rate": 0.00015340024303524011, "loss": 0.319, "step": 15292 }, { "epoch": 1.238901490602722, "grad_norm": 0.043972741812467575, "learning_rate": 0.0001533957423826455, "loss": 0.3173, "step": 15293 }, { "epoch": 1.2389825016202203, "grad_norm": 0.04083540290594101, "learning_rate": 0.00015339124173005086, "loss": 0.3294, "step": 15294 }, { "epoch": 1.2390635126377187, "grad_norm": 0.04482501000165939, "learning_rate": 0.00015338674107745622, "loss": 0.3093, "step": 15295 }, { "epoch": 1.239144523655217, "grad_norm": 0.0340910442173481, "learning_rate": 0.00015338224042486164, "loss": 0.2693, "step": 15296 }, { "epoch": 1.2392255346727155, "grad_norm": 0.05177828669548035, "learning_rate": 0.000153377739772267, "loss": 0.3094, "step": 15297 }, { "epoch": 1.239306545690214, "grad_norm": 0.05645804852247238, "learning_rate": 0.00015337323911967236, "loss": 0.3089, "step": 15298 }, { "epoch": 1.2393875567077122, "grad_norm": 0.04944702982902527, "learning_rate": 0.00015336873846707774, "loss": 0.3414, "step": 15299 }, { "epoch": 1.2394685677252106, "grad_norm": 0.04450514167547226, "learning_rate": 0.0001533642378144831, "loss": 0.3182, "step": 15300 }, { "epoch": 1.239549578742709, "grad_norm": 0.04502875730395317, "learning_rate": 0.00015335973716188846, "loss": 0.3041, "step": 15301 }, { "epoch": 1.2396305897602073, "grad_norm": 0.042548295110464096, "learning_rate": 0.00015335523650929388, "loss": 0.2947, "step": 15302 }, { "epoch": 1.2397116007777058, "grad_norm": 0.0360729917883873, "learning_rate": 0.00015335073585669924, "loss": 0.2631, "step": 15303 }, { "epoch": 1.239792611795204, "grad_norm": 0.04980987682938576, "learning_rate": 0.0001533462352041046, "loss": 0.3343, "step": 15304 }, { "epoch": 1.2398736228127025, "grad_norm": 0.041066061705350876, "learning_rate": 0.00015334173455150998, "loss": 0.3111, "step": 15305 }, { "epoch": 1.239954633830201, "grad_norm": 0.03820958361029625, "learning_rate": 0.00015333723389891534, "loss": 0.2949, "step": 15306 }, { "epoch": 1.2400356448476992, "grad_norm": 0.037509698420763016, "learning_rate": 0.0001533327332463207, "loss": 0.3029, "step": 15307 }, { "epoch": 1.2401166558651977, "grad_norm": 0.04517628997564316, "learning_rate": 0.00015332823259372612, "loss": 0.313, "step": 15308 }, { "epoch": 1.240197666882696, "grad_norm": 0.053675659000873566, "learning_rate": 0.00015332373194113148, "loss": 0.2906, "step": 15309 }, { "epoch": 1.2402786779001944, "grad_norm": 0.041454900056123734, "learning_rate": 0.00015331923128853684, "loss": 0.3135, "step": 15310 }, { "epoch": 1.2403596889176929, "grad_norm": 0.03922869265079498, "learning_rate": 0.00015331473063594223, "loss": 0.2857, "step": 15311 }, { "epoch": 1.2404406999351911, "grad_norm": 0.04632218927145004, "learning_rate": 0.00015331022998334759, "loss": 0.296, "step": 15312 }, { "epoch": 1.2405217109526896, "grad_norm": 0.036529503762722015, "learning_rate": 0.00015330572933075295, "loss": 0.2847, "step": 15313 }, { "epoch": 1.240602721970188, "grad_norm": 0.04689698666334152, "learning_rate": 0.00015330122867815836, "loss": 0.3178, "step": 15314 }, { "epoch": 1.2406837329876863, "grad_norm": 0.04123736917972565, "learning_rate": 0.00015329672802556372, "loss": 0.2981, "step": 15315 }, { "epoch": 1.2407647440051848, "grad_norm": 0.041495393961668015, "learning_rate": 0.00015329222737296908, "loss": 0.3112, "step": 15316 }, { "epoch": 1.240845755022683, "grad_norm": 0.04156705364584923, "learning_rate": 0.00015328772672037447, "loss": 0.3111, "step": 15317 }, { "epoch": 1.2409267660401815, "grad_norm": 0.04301241785287857, "learning_rate": 0.00015328322606777983, "loss": 0.3103, "step": 15318 }, { "epoch": 1.2410077770576797, "grad_norm": 0.041196610778570175, "learning_rate": 0.0001532787254151852, "loss": 0.3036, "step": 15319 }, { "epoch": 1.2410887880751782, "grad_norm": 0.04731073975563049, "learning_rate": 0.0001532742247625906, "loss": 0.3079, "step": 15320 }, { "epoch": 1.2411697990926767, "grad_norm": 0.03111322596669197, "learning_rate": 0.00015326972410999596, "loss": 0.235, "step": 15321 }, { "epoch": 1.241250810110175, "grad_norm": 0.04209109768271446, "learning_rate": 0.00015326522345740132, "loss": 0.3478, "step": 15322 }, { "epoch": 1.2413318211276734, "grad_norm": 0.041773680597543716, "learning_rate": 0.0001532607228048067, "loss": 0.2853, "step": 15323 }, { "epoch": 1.2414128321451718, "grad_norm": 0.048851095139980316, "learning_rate": 0.00015325622215221207, "loss": 0.3461, "step": 15324 }, { "epoch": 1.24149384316267, "grad_norm": 0.037057485431432724, "learning_rate": 0.00015325172149961746, "loss": 0.2731, "step": 15325 }, { "epoch": 1.2415748541801686, "grad_norm": 0.046550452709198, "learning_rate": 0.00015324722084702284, "loss": 0.3483, "step": 15326 }, { "epoch": 1.2416558651976668, "grad_norm": 0.05344468355178833, "learning_rate": 0.0001532427201944282, "loss": 0.319, "step": 15327 }, { "epoch": 1.2417368762151653, "grad_norm": 0.05149804428219795, "learning_rate": 0.00015323821954183356, "loss": 0.3447, "step": 15328 }, { "epoch": 1.2418178872326637, "grad_norm": 0.0394924022257328, "learning_rate": 0.00015323371888923895, "loss": 0.2996, "step": 15329 }, { "epoch": 1.241898898250162, "grad_norm": 0.039302125573158264, "learning_rate": 0.0001532292182366443, "loss": 0.2984, "step": 15330 }, { "epoch": 1.2419799092676604, "grad_norm": 0.03921142965555191, "learning_rate": 0.0001532247175840497, "loss": 0.2767, "step": 15331 }, { "epoch": 1.2420609202851587, "grad_norm": 0.04495244100689888, "learning_rate": 0.00015322021693145509, "loss": 0.3193, "step": 15332 }, { "epoch": 1.2421419313026572, "grad_norm": 0.04314647987484932, "learning_rate": 0.00015321571627886045, "loss": 0.2974, "step": 15333 }, { "epoch": 1.2422229423201556, "grad_norm": 0.04043354466557503, "learning_rate": 0.0001532112156262658, "loss": 0.2751, "step": 15334 }, { "epoch": 1.2423039533376539, "grad_norm": 0.03722742944955826, "learning_rate": 0.0001532067149736712, "loss": 0.2962, "step": 15335 }, { "epoch": 1.2423849643551523, "grad_norm": 0.04360896348953247, "learning_rate": 0.00015320221432107655, "loss": 0.3269, "step": 15336 }, { "epoch": 1.2424659753726508, "grad_norm": 0.055030226707458496, "learning_rate": 0.00015319771366848194, "loss": 0.3611, "step": 15337 }, { "epoch": 1.242546986390149, "grad_norm": 0.04381483793258667, "learning_rate": 0.00015319321301588733, "loss": 0.2907, "step": 15338 }, { "epoch": 1.2426279974076475, "grad_norm": 0.036216773092746735, "learning_rate": 0.0001531887123632927, "loss": 0.321, "step": 15339 }, { "epoch": 1.2427090084251458, "grad_norm": 0.03686912730336189, "learning_rate": 0.00015318421171069805, "loss": 0.2704, "step": 15340 }, { "epoch": 1.2427900194426442, "grad_norm": 0.04307206720113754, "learning_rate": 0.00015317971105810343, "loss": 0.3245, "step": 15341 }, { "epoch": 1.2428710304601425, "grad_norm": 0.04554265737533569, "learning_rate": 0.0001531752104055088, "loss": 0.3081, "step": 15342 }, { "epoch": 1.242952041477641, "grad_norm": 0.0367271825671196, "learning_rate": 0.00015317070975291418, "loss": 0.3246, "step": 15343 }, { "epoch": 1.2430330524951394, "grad_norm": 0.037328340113162994, "learning_rate": 0.00015316620910031957, "loss": 0.2989, "step": 15344 }, { "epoch": 1.2431140635126376, "grad_norm": 0.037408363074064255, "learning_rate": 0.00015316170844772493, "loss": 0.3109, "step": 15345 }, { "epoch": 1.2431950745301361, "grad_norm": 0.04492205008864403, "learning_rate": 0.0001531572077951303, "loss": 0.2851, "step": 15346 }, { "epoch": 1.2432760855476346, "grad_norm": 0.052792858332395554, "learning_rate": 0.00015315270714253568, "loss": 0.3778, "step": 15347 }, { "epoch": 1.2433570965651328, "grad_norm": 0.03828036040067673, "learning_rate": 0.00015314820648994106, "loss": 0.2956, "step": 15348 }, { "epoch": 1.2434381075826313, "grad_norm": 0.046718232333660126, "learning_rate": 0.00015314370583734642, "loss": 0.3328, "step": 15349 }, { "epoch": 1.2435191186001295, "grad_norm": 0.04345204308629036, "learning_rate": 0.0001531392051847518, "loss": 0.3214, "step": 15350 }, { "epoch": 1.243600129617628, "grad_norm": 0.03833979368209839, "learning_rate": 0.00015313470453215717, "loss": 0.3186, "step": 15351 }, { "epoch": 1.2436811406351265, "grad_norm": 0.04523218795657158, "learning_rate": 0.00015313020387956253, "loss": 0.3308, "step": 15352 }, { "epoch": 1.2437621516526247, "grad_norm": 0.043142274022102356, "learning_rate": 0.00015312570322696792, "loss": 0.3069, "step": 15353 }, { "epoch": 1.2438431626701232, "grad_norm": 0.043952710926532745, "learning_rate": 0.0001531212025743733, "loss": 0.2971, "step": 15354 }, { "epoch": 1.2439241736876214, "grad_norm": 0.04158525541424751, "learning_rate": 0.00015311670192177866, "loss": 0.3382, "step": 15355 }, { "epoch": 1.24400518470512, "grad_norm": 0.040137652307748795, "learning_rate": 0.00015311220126918405, "loss": 0.317, "step": 15356 }, { "epoch": 1.2440861957226184, "grad_norm": 0.04606618359684944, "learning_rate": 0.0001531077006165894, "loss": 0.3215, "step": 15357 }, { "epoch": 1.2441672067401166, "grad_norm": 0.041263725608587265, "learning_rate": 0.00015310319996399477, "loss": 0.311, "step": 15358 }, { "epoch": 1.244248217757615, "grad_norm": 0.044039517641067505, "learning_rate": 0.00015309869931140016, "loss": 0.3153, "step": 15359 }, { "epoch": 1.2443292287751135, "grad_norm": 0.04090086743235588, "learning_rate": 0.00015309419865880555, "loss": 0.3142, "step": 15360 }, { "epoch": 1.2444102397926118, "grad_norm": 0.04677727445960045, "learning_rate": 0.0001530896980062109, "loss": 0.2843, "step": 15361 }, { "epoch": 1.2444912508101102, "grad_norm": 0.04350362718105316, "learning_rate": 0.0001530851973536163, "loss": 0.3047, "step": 15362 }, { "epoch": 1.2445722618276085, "grad_norm": 0.0443713404238224, "learning_rate": 0.00015308069670102165, "loss": 0.3237, "step": 15363 }, { "epoch": 1.244653272845107, "grad_norm": 0.04677911847829819, "learning_rate": 0.00015307619604842701, "loss": 0.3249, "step": 15364 }, { "epoch": 1.2447342838626052, "grad_norm": 0.04166271165013313, "learning_rate": 0.0001530716953958324, "loss": 0.3289, "step": 15365 }, { "epoch": 1.2448152948801037, "grad_norm": 0.03945012018084526, "learning_rate": 0.0001530671947432378, "loss": 0.2753, "step": 15366 }, { "epoch": 1.2448963058976021, "grad_norm": 0.04785705730319023, "learning_rate": 0.00015306269409064315, "loss": 0.3095, "step": 15367 }, { "epoch": 1.2449773169151004, "grad_norm": 0.04102540388703346, "learning_rate": 0.00015305819343804854, "loss": 0.3083, "step": 15368 }, { "epoch": 1.2450583279325989, "grad_norm": 0.04111190885305405, "learning_rate": 0.0001530536927854539, "loss": 0.3204, "step": 15369 }, { "epoch": 1.2451393389500973, "grad_norm": 0.054699547588825226, "learning_rate": 0.00015304919213285926, "loss": 0.326, "step": 15370 }, { "epoch": 1.2452203499675956, "grad_norm": 0.03741315379738808, "learning_rate": 0.00015304469148026464, "loss": 0.3103, "step": 15371 }, { "epoch": 1.245301360985094, "grad_norm": 0.03876160457730293, "learning_rate": 0.00015304019082767003, "loss": 0.3016, "step": 15372 }, { "epoch": 1.2453823720025923, "grad_norm": 0.03826634958386421, "learning_rate": 0.0001530356901750754, "loss": 0.3104, "step": 15373 }, { "epoch": 1.2454633830200907, "grad_norm": 0.03665310889482498, "learning_rate": 0.00015303118952248078, "loss": 0.2841, "step": 15374 }, { "epoch": 1.2455443940375892, "grad_norm": 0.03642250970005989, "learning_rate": 0.00015302668886988614, "loss": 0.3106, "step": 15375 }, { "epoch": 1.2456254050550875, "grad_norm": 0.042411211878061295, "learning_rate": 0.0001530221882172915, "loss": 0.2864, "step": 15376 }, { "epoch": 1.245706416072586, "grad_norm": 0.03778219595551491, "learning_rate": 0.0001530176875646969, "loss": 0.2864, "step": 15377 }, { "epoch": 1.2457874270900842, "grad_norm": 0.050751928240060806, "learning_rate": 0.00015301318691210227, "loss": 0.3848, "step": 15378 }, { "epoch": 1.2458684381075826, "grad_norm": 0.042749639600515366, "learning_rate": 0.00015300868625950763, "loss": 0.3099, "step": 15379 }, { "epoch": 1.245949449125081, "grad_norm": 0.039685070514678955, "learning_rate": 0.00015300418560691302, "loss": 0.3135, "step": 15380 }, { "epoch": 1.2460304601425793, "grad_norm": 0.04631935432553291, "learning_rate": 0.00015299968495431838, "loss": 0.3155, "step": 15381 }, { "epoch": 1.2461114711600778, "grad_norm": 0.048279713839292526, "learning_rate": 0.00015299518430172374, "loss": 0.3345, "step": 15382 }, { "epoch": 1.246192482177576, "grad_norm": 0.04498374089598656, "learning_rate": 0.00015299068364912915, "loss": 0.3359, "step": 15383 }, { "epoch": 1.2462734931950745, "grad_norm": 0.04783787950873375, "learning_rate": 0.0001529861829965345, "loss": 0.3339, "step": 15384 }, { "epoch": 1.246354504212573, "grad_norm": 0.042057670652866364, "learning_rate": 0.00015298168234393987, "loss": 0.2938, "step": 15385 }, { "epoch": 1.2464355152300712, "grad_norm": 0.03839834779500961, "learning_rate": 0.00015297718169134526, "loss": 0.3028, "step": 15386 }, { "epoch": 1.2465165262475697, "grad_norm": 0.04023413360118866, "learning_rate": 0.00015297268103875062, "loss": 0.2902, "step": 15387 }, { "epoch": 1.246597537265068, "grad_norm": 0.040760256350040436, "learning_rate": 0.00015296818038615598, "loss": 0.3011, "step": 15388 }, { "epoch": 1.2466785482825664, "grad_norm": 0.050573479384183884, "learning_rate": 0.0001529636797335614, "loss": 0.2941, "step": 15389 }, { "epoch": 1.2467595593000649, "grad_norm": 0.04576423019170761, "learning_rate": 0.00015295917908096675, "loss": 0.3508, "step": 15390 }, { "epoch": 1.2468405703175631, "grad_norm": 0.04411442205309868, "learning_rate": 0.00015295467842837211, "loss": 0.2608, "step": 15391 }, { "epoch": 1.2469215813350616, "grad_norm": 0.04792153090238571, "learning_rate": 0.0001529501777757775, "loss": 0.3503, "step": 15392 }, { "epoch": 1.24700259235256, "grad_norm": 0.04913616180419922, "learning_rate": 0.00015294567712318286, "loss": 0.2908, "step": 15393 }, { "epoch": 1.2470836033700583, "grad_norm": 0.038902029395103455, "learning_rate": 0.00015294117647058822, "loss": 0.2665, "step": 15394 }, { "epoch": 1.2471646143875568, "grad_norm": 0.03957482799887657, "learning_rate": 0.00015293667581799364, "loss": 0.2804, "step": 15395 }, { "epoch": 1.247245625405055, "grad_norm": 0.062009721994400024, "learning_rate": 0.000152932175165399, "loss": 0.3534, "step": 15396 }, { "epoch": 1.2473266364225535, "grad_norm": 0.0566871352493763, "learning_rate": 0.00015292767451280436, "loss": 0.3503, "step": 15397 }, { "epoch": 1.2474076474400517, "grad_norm": 0.040789470076560974, "learning_rate": 0.00015292317386020974, "loss": 0.2838, "step": 15398 }, { "epoch": 1.2474886584575502, "grad_norm": 0.038834284991025925, "learning_rate": 0.0001529186732076151, "loss": 0.265, "step": 15399 }, { "epoch": 1.2475696694750487, "grad_norm": 0.044831741601228714, "learning_rate": 0.0001529141725550205, "loss": 0.2864, "step": 15400 }, { "epoch": 1.247650680492547, "grad_norm": 0.05505714565515518, "learning_rate": 0.00015290967190242588, "loss": 0.3743, "step": 15401 }, { "epoch": 1.2477316915100454, "grad_norm": 0.04826575517654419, "learning_rate": 0.00015290517124983124, "loss": 0.3201, "step": 15402 }, { "epoch": 1.2478127025275438, "grad_norm": 0.03487708047032356, "learning_rate": 0.0001529006705972366, "loss": 0.262, "step": 15403 }, { "epoch": 1.247893713545042, "grad_norm": 0.053101979196071625, "learning_rate": 0.00015289616994464199, "loss": 0.3263, "step": 15404 }, { "epoch": 1.2479747245625405, "grad_norm": 0.04719730094075203, "learning_rate": 0.00015289166929204735, "loss": 0.3098, "step": 15405 }, { "epoch": 1.2480557355800388, "grad_norm": 0.049038488417863846, "learning_rate": 0.00015288716863945273, "loss": 0.3135, "step": 15406 }, { "epoch": 1.2481367465975373, "grad_norm": 0.04459191858768463, "learning_rate": 0.00015288266798685812, "loss": 0.3194, "step": 15407 }, { "epoch": 1.2482177576150357, "grad_norm": 0.04248789697885513, "learning_rate": 0.00015287816733426348, "loss": 0.3279, "step": 15408 }, { "epoch": 1.248298768632534, "grad_norm": 0.03661114349961281, "learning_rate": 0.00015287366668166884, "loss": 0.2861, "step": 15409 }, { "epoch": 1.2483797796500324, "grad_norm": 0.03910385072231293, "learning_rate": 0.00015286916602907423, "loss": 0.3059, "step": 15410 }, { "epoch": 1.2484607906675307, "grad_norm": 0.043062131851911545, "learning_rate": 0.0001528646653764796, "loss": 0.2868, "step": 15411 }, { "epoch": 1.2485418016850292, "grad_norm": 0.04848279803991318, "learning_rate": 0.00015286016472388497, "loss": 0.3387, "step": 15412 }, { "epoch": 1.2486228127025276, "grad_norm": 0.045629847794771194, "learning_rate": 0.00015285566407129036, "loss": 0.3147, "step": 15413 }, { "epoch": 1.2487038237200259, "grad_norm": 0.044876888394355774, "learning_rate": 0.00015285116341869572, "loss": 0.309, "step": 15414 }, { "epoch": 1.2487848347375243, "grad_norm": 0.04068077355623245, "learning_rate": 0.00015284666276610108, "loss": 0.2813, "step": 15415 }, { "epoch": 1.2488658457550228, "grad_norm": 0.043438006192445755, "learning_rate": 0.00015284216211350647, "loss": 0.3236, "step": 15416 }, { "epoch": 1.248946856772521, "grad_norm": 0.04731074348092079, "learning_rate": 0.00015283766146091183, "loss": 0.3089, "step": 15417 }, { "epoch": 1.2490278677900195, "grad_norm": 0.04230158403515816, "learning_rate": 0.00015283316080831722, "loss": 0.3025, "step": 15418 }, { "epoch": 1.2491088788075178, "grad_norm": 0.04563254117965698, "learning_rate": 0.0001528286601557226, "loss": 0.3188, "step": 15419 }, { "epoch": 1.2491898898250162, "grad_norm": 0.04034588858485222, "learning_rate": 0.00015282415950312796, "loss": 0.2977, "step": 15420 }, { "epoch": 1.2492709008425145, "grad_norm": 0.045665670186281204, "learning_rate": 0.00015281965885053332, "loss": 0.2815, "step": 15421 }, { "epoch": 1.249351911860013, "grad_norm": 0.04323578253388405, "learning_rate": 0.0001528151581979387, "loss": 0.291, "step": 15422 }, { "epoch": 1.2494329228775114, "grad_norm": 0.03672725334763527, "learning_rate": 0.00015281065754534407, "loss": 0.2454, "step": 15423 }, { "epoch": 1.2495139338950096, "grad_norm": 0.03858471289277077, "learning_rate": 0.00015280615689274946, "loss": 0.3145, "step": 15424 }, { "epoch": 1.249594944912508, "grad_norm": 0.03955140709877014, "learning_rate": 0.00015280165624015484, "loss": 0.3142, "step": 15425 }, { "epoch": 1.2496759559300066, "grad_norm": 0.03527604788541794, "learning_rate": 0.0001527971555875602, "loss": 0.2805, "step": 15426 }, { "epoch": 1.2497569669475048, "grad_norm": 0.04132481664419174, "learning_rate": 0.00015279265493496556, "loss": 0.2937, "step": 15427 }, { "epoch": 1.2498379779650033, "grad_norm": 0.045345671474933624, "learning_rate": 0.00015278815428237095, "loss": 0.3325, "step": 15428 }, { "epoch": 1.2499189889825015, "grad_norm": 0.047520022839307785, "learning_rate": 0.00015278365362977634, "loss": 0.3387, "step": 15429 }, { "epoch": 1.25, "grad_norm": 0.03609742969274521, "learning_rate": 0.0001527791529771817, "loss": 0.297, "step": 15430 }, { "epoch": 1.2500810110174982, "grad_norm": 0.0441846139729023, "learning_rate": 0.00015277465232458709, "loss": 0.324, "step": 15431 }, { "epoch": 1.2501620220349967, "grad_norm": 0.04409998655319214, "learning_rate": 0.00015277015167199245, "loss": 0.3145, "step": 15432 }, { "epoch": 1.2502430330524952, "grad_norm": 0.045291196554899216, "learning_rate": 0.0001527656510193978, "loss": 0.3083, "step": 15433 }, { "epoch": 1.2503240440699934, "grad_norm": 0.043974634259939194, "learning_rate": 0.0001527611503668032, "loss": 0.3109, "step": 15434 }, { "epoch": 1.250405055087492, "grad_norm": 0.04255552589893341, "learning_rate": 0.00015275664971420858, "loss": 0.2912, "step": 15435 }, { "epoch": 1.2504860661049904, "grad_norm": 0.04650450497865677, "learning_rate": 0.00015275214906161394, "loss": 0.3228, "step": 15436 }, { "epoch": 1.2505670771224886, "grad_norm": 0.05076903477311134, "learning_rate": 0.00015274764840901933, "loss": 0.3413, "step": 15437 }, { "epoch": 1.250648088139987, "grad_norm": 0.05367875471711159, "learning_rate": 0.0001527431477564247, "loss": 0.3418, "step": 15438 }, { "epoch": 1.2507290991574855, "grad_norm": 0.04548540711402893, "learning_rate": 0.00015273864710383005, "loss": 0.3201, "step": 15439 }, { "epoch": 1.2508101101749838, "grad_norm": 0.041880495846271515, "learning_rate": 0.00015273414645123543, "loss": 0.3281, "step": 15440 }, { "epoch": 1.2508911211924822, "grad_norm": 0.04110018536448479, "learning_rate": 0.00015272964579864082, "loss": 0.2631, "step": 15441 }, { "epoch": 1.2509721322099805, "grad_norm": 0.04534178227186203, "learning_rate": 0.00015272514514604618, "loss": 0.317, "step": 15442 }, { "epoch": 1.251053143227479, "grad_norm": 0.038438715040683746, "learning_rate": 0.00015272064449345157, "loss": 0.2932, "step": 15443 }, { "epoch": 1.2511341542449772, "grad_norm": 0.04278789460659027, "learning_rate": 0.00015271614384085693, "loss": 0.3121, "step": 15444 }, { "epoch": 1.2512151652624757, "grad_norm": 0.04125070944428444, "learning_rate": 0.0001527116431882623, "loss": 0.3168, "step": 15445 }, { "epoch": 1.2512961762799741, "grad_norm": 0.03827229142189026, "learning_rate": 0.00015270714253566768, "loss": 0.2752, "step": 15446 }, { "epoch": 1.2513771872974724, "grad_norm": 0.0392126627266407, "learning_rate": 0.00015270264188307306, "loss": 0.2835, "step": 15447 }, { "epoch": 1.2514581983149708, "grad_norm": 0.0405149906873703, "learning_rate": 0.00015269814123047842, "loss": 0.2923, "step": 15448 }, { "epoch": 1.2515392093324693, "grad_norm": 0.03790876641869545, "learning_rate": 0.0001526936405778838, "loss": 0.3077, "step": 15449 }, { "epoch": 1.2516202203499676, "grad_norm": 0.05015541613101959, "learning_rate": 0.00015268913992528917, "loss": 0.338, "step": 15450 }, { "epoch": 1.251701231367466, "grad_norm": 0.04771968349814415, "learning_rate": 0.00015268463927269453, "loss": 0.3261, "step": 15451 }, { "epoch": 1.2517822423849643, "grad_norm": 0.042020346969366074, "learning_rate": 0.00015268013862009995, "loss": 0.3063, "step": 15452 }, { "epoch": 1.2518632534024627, "grad_norm": 0.044841647148132324, "learning_rate": 0.0001526756379675053, "loss": 0.3062, "step": 15453 }, { "epoch": 1.251944264419961, "grad_norm": 0.035896364599466324, "learning_rate": 0.00015267113731491067, "loss": 0.2622, "step": 15454 }, { "epoch": 1.2520252754374595, "grad_norm": 0.05748176574707031, "learning_rate": 0.00015266663666231605, "loss": 0.3526, "step": 15455 }, { "epoch": 1.252106286454958, "grad_norm": 0.039488907903432846, "learning_rate": 0.0001526621360097214, "loss": 0.3131, "step": 15456 }, { "epoch": 1.2521872974724562, "grad_norm": 0.04794564098119736, "learning_rate": 0.00015265763535712677, "loss": 0.2788, "step": 15457 }, { "epoch": 1.2522683084899546, "grad_norm": 0.0368480309844017, "learning_rate": 0.0001526531347045322, "loss": 0.2824, "step": 15458 }, { "epoch": 1.252349319507453, "grad_norm": 0.04272850602865219, "learning_rate": 0.00015264863405193755, "loss": 0.2885, "step": 15459 }, { "epoch": 1.2524303305249513, "grad_norm": 0.04270879179239273, "learning_rate": 0.0001526441333993429, "loss": 0.2925, "step": 15460 }, { "epoch": 1.2525113415424498, "grad_norm": 0.04070347175002098, "learning_rate": 0.0001526396327467483, "loss": 0.3141, "step": 15461 }, { "epoch": 1.2525923525599483, "grad_norm": 0.047418802976608276, "learning_rate": 0.00015263513209415365, "loss": 0.3224, "step": 15462 }, { "epoch": 1.2526733635774465, "grad_norm": 0.040149953216314316, "learning_rate": 0.00015263063144155901, "loss": 0.2434, "step": 15463 }, { "epoch": 1.252754374594945, "grad_norm": 0.03160017356276512, "learning_rate": 0.00015262613078896443, "loss": 0.2669, "step": 15464 }, { "epoch": 1.2528353856124432, "grad_norm": 0.043901924043893814, "learning_rate": 0.0001526216301363698, "loss": 0.3326, "step": 15465 }, { "epoch": 1.2529163966299417, "grad_norm": 0.03790322691202164, "learning_rate": 0.00015261712948377515, "loss": 0.2761, "step": 15466 }, { "epoch": 1.25299740764744, "grad_norm": 0.043482448905706406, "learning_rate": 0.00015261262883118054, "loss": 0.3094, "step": 15467 }, { "epoch": 1.2530784186649384, "grad_norm": 0.039540037512779236, "learning_rate": 0.0001526081281785859, "loss": 0.2762, "step": 15468 }, { "epoch": 1.2531594296824369, "grad_norm": 0.03756888955831528, "learning_rate": 0.00015260362752599126, "loss": 0.2782, "step": 15469 }, { "epoch": 1.2532404406999351, "grad_norm": 0.045715007930994034, "learning_rate": 0.00015259912687339667, "loss": 0.2472, "step": 15470 }, { "epoch": 1.2533214517174336, "grad_norm": 0.04127716273069382, "learning_rate": 0.00015259462622080203, "loss": 0.2787, "step": 15471 }, { "epoch": 1.253402462734932, "grad_norm": 0.042705681174993515, "learning_rate": 0.0001525901255682074, "loss": 0.3274, "step": 15472 }, { "epoch": 1.2534834737524303, "grad_norm": 0.04192306101322174, "learning_rate": 0.00015258562491561278, "loss": 0.3108, "step": 15473 }, { "epoch": 1.2535644847699288, "grad_norm": 0.04167857766151428, "learning_rate": 0.00015258112426301814, "loss": 0.2934, "step": 15474 }, { "epoch": 1.253645495787427, "grad_norm": 0.044703904539346695, "learning_rate": 0.0001525766236104235, "loss": 0.3106, "step": 15475 }, { "epoch": 1.2537265068049255, "grad_norm": 0.03742494434118271, "learning_rate": 0.0001525721229578289, "loss": 0.2525, "step": 15476 }, { "epoch": 1.2538075178224237, "grad_norm": 0.04481251910328865, "learning_rate": 0.00015256762230523427, "loss": 0.3179, "step": 15477 }, { "epoch": 1.2538885288399222, "grad_norm": 0.04333583638072014, "learning_rate": 0.00015256312165263963, "loss": 0.3089, "step": 15478 }, { "epoch": 1.2539695398574207, "grad_norm": 0.052331723272800446, "learning_rate": 0.00015255862100004502, "loss": 0.343, "step": 15479 }, { "epoch": 1.254050550874919, "grad_norm": 0.045910757035017014, "learning_rate": 0.00015255412034745038, "loss": 0.3119, "step": 15480 }, { "epoch": 1.2541315618924174, "grad_norm": 0.04972159489989281, "learning_rate": 0.00015254961969485577, "loss": 0.3247, "step": 15481 }, { "epoch": 1.2542125729099158, "grad_norm": 0.045085709542036057, "learning_rate": 0.00015254511904226115, "loss": 0.3349, "step": 15482 }, { "epoch": 1.254293583927414, "grad_norm": 0.13385389745235443, "learning_rate": 0.0001525406183896665, "loss": 0.2876, "step": 15483 }, { "epoch": 1.2543745949449125, "grad_norm": 0.04315098002552986, "learning_rate": 0.00015253611773707187, "loss": 0.3223, "step": 15484 }, { "epoch": 1.254455605962411, "grad_norm": 0.045498088002204895, "learning_rate": 0.00015253161708447726, "loss": 0.3079, "step": 15485 }, { "epoch": 1.2545366169799093, "grad_norm": 0.041511572897434235, "learning_rate": 0.00015252711643188262, "loss": 0.3216, "step": 15486 }, { "epoch": 1.2546176279974077, "grad_norm": 0.0475231409072876, "learning_rate": 0.000152522615779288, "loss": 0.3182, "step": 15487 }, { "epoch": 1.254698639014906, "grad_norm": 0.03755148500204086, "learning_rate": 0.0001525181151266934, "loss": 0.3006, "step": 15488 }, { "epoch": 1.2547796500324044, "grad_norm": 0.045942701399326324, "learning_rate": 0.00015251361447409875, "loss": 0.3557, "step": 15489 }, { "epoch": 1.2548606610499027, "grad_norm": 0.04447175934910774, "learning_rate": 0.00015250911382150411, "loss": 0.2819, "step": 15490 }, { "epoch": 1.2549416720674011, "grad_norm": 0.04260551556944847, "learning_rate": 0.0001525046131689095, "loss": 0.3191, "step": 15491 }, { "epoch": 1.2550226830848996, "grad_norm": 0.04521317034959793, "learning_rate": 0.00015250011251631486, "loss": 0.3028, "step": 15492 }, { "epoch": 1.2551036941023979, "grad_norm": 0.04569809138774872, "learning_rate": 0.00015249561186372025, "loss": 0.3328, "step": 15493 }, { "epoch": 1.2551847051198963, "grad_norm": 0.045397352427244186, "learning_rate": 0.00015249111121112564, "loss": 0.2722, "step": 15494 }, { "epoch": 1.2552657161373948, "grad_norm": 0.04033495858311653, "learning_rate": 0.000152486610558531, "loss": 0.2855, "step": 15495 }, { "epoch": 1.255346727154893, "grad_norm": 0.041850823909044266, "learning_rate": 0.00015248210990593636, "loss": 0.2908, "step": 15496 }, { "epoch": 1.2554277381723915, "grad_norm": 0.042379043996334076, "learning_rate": 0.00015247760925334174, "loss": 0.2709, "step": 15497 }, { "epoch": 1.2555087491898898, "grad_norm": 0.04308145493268967, "learning_rate": 0.0001524731086007471, "loss": 0.3076, "step": 15498 }, { "epoch": 1.2555897602073882, "grad_norm": 0.036246269941329956, "learning_rate": 0.0001524686079481525, "loss": 0.2688, "step": 15499 }, { "epoch": 1.2556707712248865, "grad_norm": 0.04566572234034538, "learning_rate": 0.00015246410729555788, "loss": 0.2818, "step": 15500 }, { "epoch": 1.255751782242385, "grad_norm": 0.04354838281869888, "learning_rate": 0.00015245960664296324, "loss": 0.3119, "step": 15501 }, { "epoch": 1.2558327932598834, "grad_norm": 0.03831060230731964, "learning_rate": 0.0001524551059903686, "loss": 0.2723, "step": 15502 }, { "epoch": 1.2559138042773816, "grad_norm": 0.04013657569885254, "learning_rate": 0.00015245060533777399, "loss": 0.2976, "step": 15503 }, { "epoch": 1.25599481529488, "grad_norm": 0.04500504210591316, "learning_rate": 0.00015244610468517935, "loss": 0.3199, "step": 15504 }, { "epoch": 1.2560758263123786, "grad_norm": 0.04480867460370064, "learning_rate": 0.00015244160403258473, "loss": 0.3475, "step": 15505 }, { "epoch": 1.2561568373298768, "grad_norm": 0.042447056621313095, "learning_rate": 0.00015243710337999012, "loss": 0.2794, "step": 15506 }, { "epoch": 1.2562378483473753, "grad_norm": 0.040769752115011215, "learning_rate": 0.00015243260272739548, "loss": 0.3123, "step": 15507 }, { "epoch": 1.2563188593648738, "grad_norm": 0.04607153683900833, "learning_rate": 0.00015242810207480084, "loss": 0.306, "step": 15508 }, { "epoch": 1.256399870382372, "grad_norm": 0.0423283614218235, "learning_rate": 0.00015242360142220623, "loss": 0.2972, "step": 15509 }, { "epoch": 1.2564808813998705, "grad_norm": 0.04975079372525215, "learning_rate": 0.00015241910076961161, "loss": 0.3243, "step": 15510 }, { "epoch": 1.2565618924173687, "grad_norm": 0.042507603764534, "learning_rate": 0.00015241460011701697, "loss": 0.2612, "step": 15511 }, { "epoch": 1.2566429034348672, "grad_norm": 0.04295576363801956, "learning_rate": 0.00015241009946442236, "loss": 0.3121, "step": 15512 }, { "epoch": 1.2567239144523654, "grad_norm": 0.054205574095249176, "learning_rate": 0.00015240559881182772, "loss": 0.3556, "step": 15513 }, { "epoch": 1.2568049254698639, "grad_norm": 0.041493676602840424, "learning_rate": 0.00015240109815923308, "loss": 0.3016, "step": 15514 }, { "epoch": 1.2568859364873624, "grad_norm": 0.0442705899477005, "learning_rate": 0.00015239659750663847, "loss": 0.2703, "step": 15515 }, { "epoch": 1.2569669475048606, "grad_norm": 0.043185483664274216, "learning_rate": 0.00015239209685404386, "loss": 0.2641, "step": 15516 }, { "epoch": 1.257047958522359, "grad_norm": 0.04261036589741707, "learning_rate": 0.00015238759620144922, "loss": 0.2885, "step": 15517 }, { "epoch": 1.2571289695398575, "grad_norm": 0.03761589154601097, "learning_rate": 0.0001523830955488546, "loss": 0.2964, "step": 15518 }, { "epoch": 1.2572099805573558, "grad_norm": 0.039735615253448486, "learning_rate": 0.00015237859489625996, "loss": 0.3292, "step": 15519 }, { "epoch": 1.2572909915748542, "grad_norm": 0.043260227888822556, "learning_rate": 0.00015237409424366532, "loss": 0.3044, "step": 15520 }, { "epoch": 1.2573720025923525, "grad_norm": 0.046115051954984665, "learning_rate": 0.0001523695935910707, "loss": 0.2867, "step": 15521 }, { "epoch": 1.257453013609851, "grad_norm": 0.05022734776139259, "learning_rate": 0.0001523650929384761, "loss": 0.3176, "step": 15522 }, { "epoch": 1.2575340246273492, "grad_norm": 0.047274477779865265, "learning_rate": 0.00015236059228588146, "loss": 0.3044, "step": 15523 }, { "epoch": 1.2576150356448477, "grad_norm": 0.04098426550626755, "learning_rate": 0.00015235609163328684, "loss": 0.2952, "step": 15524 }, { "epoch": 1.2576960466623461, "grad_norm": 0.04345531016588211, "learning_rate": 0.0001523515909806922, "loss": 0.2888, "step": 15525 }, { "epoch": 1.2577770576798444, "grad_norm": 0.044809646904468536, "learning_rate": 0.00015234709032809756, "loss": 0.287, "step": 15526 }, { "epoch": 1.2578580686973428, "grad_norm": 0.045098863542079926, "learning_rate": 0.00015234258967550295, "loss": 0.3268, "step": 15527 }, { "epoch": 1.2579390797148413, "grad_norm": 0.04287554696202278, "learning_rate": 0.00015233808902290834, "loss": 0.2836, "step": 15528 }, { "epoch": 1.2580200907323396, "grad_norm": 0.04294641315937042, "learning_rate": 0.0001523335883703137, "loss": 0.3327, "step": 15529 }, { "epoch": 1.258101101749838, "grad_norm": 0.04521435499191284, "learning_rate": 0.00015232908771771909, "loss": 0.3041, "step": 15530 }, { "epoch": 1.2581821127673365, "grad_norm": 0.046372584998607635, "learning_rate": 0.00015232458706512445, "loss": 0.315, "step": 15531 }, { "epoch": 1.2582631237848347, "grad_norm": 0.050368066877126694, "learning_rate": 0.0001523200864125298, "loss": 0.3145, "step": 15532 }, { "epoch": 1.258344134802333, "grad_norm": 0.04091639816761017, "learning_rate": 0.00015231558575993522, "loss": 0.2912, "step": 15533 }, { "epoch": 1.2584251458198314, "grad_norm": 0.03936908766627312, "learning_rate": 0.00015231108510734058, "loss": 0.2832, "step": 15534 }, { "epoch": 1.25850615683733, "grad_norm": 0.0416514128446579, "learning_rate": 0.00015230658445474594, "loss": 0.2746, "step": 15535 }, { "epoch": 1.2585871678548282, "grad_norm": 0.04907258227467537, "learning_rate": 0.00015230208380215133, "loss": 0.357, "step": 15536 }, { "epoch": 1.2586681788723266, "grad_norm": 0.04133478179574013, "learning_rate": 0.0001522975831495567, "loss": 0.3154, "step": 15537 }, { "epoch": 1.258749189889825, "grad_norm": 0.03562864661216736, "learning_rate": 0.00015229308249696205, "loss": 0.2747, "step": 15538 }, { "epoch": 1.2588302009073233, "grad_norm": 0.045105401426553726, "learning_rate": 0.00015228858184436746, "loss": 0.3255, "step": 15539 }, { "epoch": 1.2589112119248218, "grad_norm": 0.04034801945090294, "learning_rate": 0.00015228408119177282, "loss": 0.2869, "step": 15540 }, { "epoch": 1.2589922229423203, "grad_norm": 0.0402255542576313, "learning_rate": 0.00015227958053917818, "loss": 0.2629, "step": 15541 }, { "epoch": 1.2590732339598185, "grad_norm": 0.042664676904678345, "learning_rate": 0.00015227507988658357, "loss": 0.3399, "step": 15542 }, { "epoch": 1.259154244977317, "grad_norm": 0.04219324514269829, "learning_rate": 0.00015227057923398893, "loss": 0.2668, "step": 15543 }, { "epoch": 1.2592352559948152, "grad_norm": 0.047899648547172546, "learning_rate": 0.0001522660785813943, "loss": 0.3185, "step": 15544 }, { "epoch": 1.2593162670123137, "grad_norm": 0.04278462007641792, "learning_rate": 0.0001522615779287997, "loss": 0.3015, "step": 15545 }, { "epoch": 1.259397278029812, "grad_norm": 0.04308171570301056, "learning_rate": 0.00015225707727620506, "loss": 0.3054, "step": 15546 }, { "epoch": 1.2594782890473104, "grad_norm": 0.0512288436293602, "learning_rate": 0.00015225257662361042, "loss": 0.3416, "step": 15547 }, { "epoch": 1.2595593000648089, "grad_norm": 0.04132172837853432, "learning_rate": 0.0001522480759710158, "loss": 0.2973, "step": 15548 }, { "epoch": 1.2596403110823071, "grad_norm": 0.046558722853660583, "learning_rate": 0.00015224357531842117, "loss": 0.3055, "step": 15549 }, { "epoch": 1.2597213220998056, "grad_norm": 0.036320433020591736, "learning_rate": 0.00015223907466582653, "loss": 0.3049, "step": 15550 }, { "epoch": 1.259802333117304, "grad_norm": 0.048258814960718155, "learning_rate": 0.00015223457401323195, "loss": 0.3205, "step": 15551 }, { "epoch": 1.2598833441348023, "grad_norm": 0.045856986194849014, "learning_rate": 0.0001522300733606373, "loss": 0.3342, "step": 15552 }, { "epoch": 1.2599643551523008, "grad_norm": 0.04905549809336662, "learning_rate": 0.00015222557270804267, "loss": 0.3411, "step": 15553 }, { "epoch": 1.2600453661697992, "grad_norm": 0.043548863381147385, "learning_rate": 0.00015222107205544805, "loss": 0.2846, "step": 15554 }, { "epoch": 1.2601263771872975, "grad_norm": 0.043483052402734756, "learning_rate": 0.0001522165714028534, "loss": 0.2673, "step": 15555 }, { "epoch": 1.2602073882047957, "grad_norm": 0.043352123349905014, "learning_rate": 0.00015221207075025877, "loss": 0.286, "step": 15556 }, { "epoch": 1.2602883992222942, "grad_norm": 0.04295540973544121, "learning_rate": 0.0001522075700976642, "loss": 0.2844, "step": 15557 }, { "epoch": 1.2603694102397927, "grad_norm": 0.04903509467840195, "learning_rate": 0.00015220306944506955, "loss": 0.3473, "step": 15558 }, { "epoch": 1.260450421257291, "grad_norm": 0.04419170320034027, "learning_rate": 0.0001521985687924749, "loss": 0.2864, "step": 15559 }, { "epoch": 1.2605314322747894, "grad_norm": 0.04279334098100662, "learning_rate": 0.0001521940681398803, "loss": 0.293, "step": 15560 }, { "epoch": 1.2606124432922878, "grad_norm": 0.03789504989981651, "learning_rate": 0.00015218956748728565, "loss": 0.261, "step": 15561 }, { "epoch": 1.260693454309786, "grad_norm": 0.0386795774102211, "learning_rate": 0.00015218506683469104, "loss": 0.2895, "step": 15562 }, { "epoch": 1.2607744653272845, "grad_norm": 0.0440162718296051, "learning_rate": 0.00015218056618209643, "loss": 0.3101, "step": 15563 }, { "epoch": 1.260855476344783, "grad_norm": 0.04073643311858177, "learning_rate": 0.0001521760655295018, "loss": 0.3154, "step": 15564 }, { "epoch": 1.2609364873622813, "grad_norm": 0.036714520305395126, "learning_rate": 0.00015217156487690715, "loss": 0.3254, "step": 15565 }, { "epoch": 1.2610174983797797, "grad_norm": 0.039863135665655136, "learning_rate": 0.00015216706422431254, "loss": 0.2819, "step": 15566 }, { "epoch": 1.261098509397278, "grad_norm": 0.04084132984280586, "learning_rate": 0.0001521625635717179, "loss": 0.2923, "step": 15567 }, { "epoch": 1.2611795204147764, "grad_norm": 0.03947173058986664, "learning_rate": 0.00015215806291912328, "loss": 0.2807, "step": 15568 }, { "epoch": 1.2612605314322747, "grad_norm": 0.04105982184410095, "learning_rate": 0.00015215356226652867, "loss": 0.2919, "step": 15569 }, { "epoch": 1.2613415424497731, "grad_norm": 0.049376294016838074, "learning_rate": 0.00015214906161393403, "loss": 0.3503, "step": 15570 }, { "epoch": 1.2614225534672716, "grad_norm": 0.039907824248075485, "learning_rate": 0.0001521445609613394, "loss": 0.2742, "step": 15571 }, { "epoch": 1.2615035644847699, "grad_norm": 0.03646915778517723, "learning_rate": 0.00015214006030874478, "loss": 0.2981, "step": 15572 }, { "epoch": 1.2615845755022683, "grad_norm": 0.04895637184381485, "learning_rate": 0.00015213555965615014, "loss": 0.3376, "step": 15573 }, { "epoch": 1.2616655865197668, "grad_norm": 0.043007057160139084, "learning_rate": 0.00015213105900355552, "loss": 0.2945, "step": 15574 }, { "epoch": 1.261746597537265, "grad_norm": 0.03746945038437843, "learning_rate": 0.0001521265583509609, "loss": 0.3082, "step": 15575 }, { "epoch": 1.2618276085547635, "grad_norm": 0.0426875576376915, "learning_rate": 0.00015212205769836627, "loss": 0.2913, "step": 15576 }, { "epoch": 1.2619086195722617, "grad_norm": 0.04929223656654358, "learning_rate": 0.00015211755704577163, "loss": 0.3079, "step": 15577 }, { "epoch": 1.2619896305897602, "grad_norm": 0.051895756274461746, "learning_rate": 0.00015211305639317702, "loss": 0.3234, "step": 15578 }, { "epoch": 1.2620706416072585, "grad_norm": 0.036069467663764954, "learning_rate": 0.00015210855574058238, "loss": 0.2533, "step": 15579 }, { "epoch": 1.262151652624757, "grad_norm": 0.04682864993810654, "learning_rate": 0.00015210405508798777, "loss": 0.3029, "step": 15580 }, { "epoch": 1.2622326636422554, "grad_norm": 0.05245593190193176, "learning_rate": 0.00015209955443539315, "loss": 0.2854, "step": 15581 }, { "epoch": 1.2623136746597536, "grad_norm": 0.04711830988526344, "learning_rate": 0.0001520950537827985, "loss": 0.2838, "step": 15582 }, { "epoch": 1.262394685677252, "grad_norm": 0.03892109543085098, "learning_rate": 0.00015209055313020387, "loss": 0.3, "step": 15583 }, { "epoch": 1.2624756966947506, "grad_norm": 0.040280282497406006, "learning_rate": 0.00015208605247760926, "loss": 0.2914, "step": 15584 }, { "epoch": 1.2625567077122488, "grad_norm": 0.04260111600160599, "learning_rate": 0.00015208155182501465, "loss": 0.3323, "step": 15585 }, { "epoch": 1.2626377187297473, "grad_norm": 0.03925931453704834, "learning_rate": 0.00015207705117242, "loss": 0.2601, "step": 15586 }, { "epoch": 1.2627187297472457, "grad_norm": 0.03990313410758972, "learning_rate": 0.0001520725505198254, "loss": 0.3163, "step": 15587 }, { "epoch": 1.262799740764744, "grad_norm": 0.036611080169677734, "learning_rate": 0.00015206804986723076, "loss": 0.3046, "step": 15588 }, { "epoch": 1.2628807517822425, "grad_norm": 0.04936470463871956, "learning_rate": 0.00015206354921463612, "loss": 0.3541, "step": 15589 }, { "epoch": 1.2629617627997407, "grad_norm": 0.04631369933485985, "learning_rate": 0.0001520590485620415, "loss": 0.3364, "step": 15590 }, { "epoch": 1.2630427738172392, "grad_norm": 0.03967304900288582, "learning_rate": 0.0001520545479094469, "loss": 0.2897, "step": 15591 }, { "epoch": 1.2631237848347374, "grad_norm": 0.03701883181929588, "learning_rate": 0.00015205004725685225, "loss": 0.2763, "step": 15592 }, { "epoch": 1.2632047958522359, "grad_norm": 0.04268396273255348, "learning_rate": 0.00015204554660425764, "loss": 0.2927, "step": 15593 }, { "epoch": 1.2632858068697344, "grad_norm": 0.0401676669716835, "learning_rate": 0.000152041045951663, "loss": 0.2805, "step": 15594 }, { "epoch": 1.2633668178872326, "grad_norm": 0.04197492450475693, "learning_rate": 0.00015203654529906836, "loss": 0.2643, "step": 15595 }, { "epoch": 1.263447828904731, "grad_norm": 0.0464635044336319, "learning_rate": 0.00015203204464647374, "loss": 0.3178, "step": 15596 }, { "epoch": 1.2635288399222295, "grad_norm": 0.039722513407468796, "learning_rate": 0.00015202754399387913, "loss": 0.2892, "step": 15597 }, { "epoch": 1.2636098509397278, "grad_norm": 0.049931466579437256, "learning_rate": 0.0001520230433412845, "loss": 0.3061, "step": 15598 }, { "epoch": 1.2636908619572262, "grad_norm": 0.04221179708838463, "learning_rate": 0.00015201854268868988, "loss": 0.3012, "step": 15599 }, { "epoch": 1.2637718729747245, "grad_norm": 0.045019712299108505, "learning_rate": 0.00015201404203609524, "loss": 0.2959, "step": 15600 }, { "epoch": 1.263852883992223, "grad_norm": 0.04281473159790039, "learning_rate": 0.0001520095413835006, "loss": 0.2961, "step": 15601 }, { "epoch": 1.2639338950097212, "grad_norm": 0.04097730293869972, "learning_rate": 0.00015200504073090599, "loss": 0.3082, "step": 15602 }, { "epoch": 1.2640149060272197, "grad_norm": 0.0397987924516201, "learning_rate": 0.00015200054007831137, "loss": 0.2656, "step": 15603 }, { "epoch": 1.2640959170447181, "grad_norm": 0.04969324544072151, "learning_rate": 0.00015199603942571673, "loss": 0.329, "step": 15604 }, { "epoch": 1.2641769280622164, "grad_norm": 0.04034838080406189, "learning_rate": 0.00015199153877312212, "loss": 0.2902, "step": 15605 }, { "epoch": 1.2642579390797148, "grad_norm": 0.0383288599550724, "learning_rate": 0.00015198703812052748, "loss": 0.2864, "step": 15606 }, { "epoch": 1.2643389500972133, "grad_norm": 0.044115833938121796, "learning_rate": 0.00015198253746793284, "loss": 0.2675, "step": 15607 }, { "epoch": 1.2644199611147116, "grad_norm": 0.04049624130129814, "learning_rate": 0.00015197803681533823, "loss": 0.3262, "step": 15608 }, { "epoch": 1.26450097213221, "grad_norm": 0.04395872727036476, "learning_rate": 0.00015197353616274361, "loss": 0.2952, "step": 15609 }, { "epoch": 1.2645819831497085, "grad_norm": 0.04466897249221802, "learning_rate": 0.00015196903551014897, "loss": 0.3386, "step": 15610 }, { "epoch": 1.2646629941672067, "grad_norm": 0.040849585086107254, "learning_rate": 0.00015196453485755436, "loss": 0.2914, "step": 15611 }, { "epoch": 1.2647440051847052, "grad_norm": 0.04917623847723007, "learning_rate": 0.00015196003420495972, "loss": 0.3178, "step": 15612 }, { "epoch": 1.2648250162022034, "grad_norm": 0.04036034271121025, "learning_rate": 0.00015195553355236508, "loss": 0.2835, "step": 15613 }, { "epoch": 1.264906027219702, "grad_norm": 0.044372837990522385, "learning_rate": 0.0001519510328997705, "loss": 0.326, "step": 15614 }, { "epoch": 1.2649870382372002, "grad_norm": 0.048746511340141296, "learning_rate": 0.00015194653224717586, "loss": 0.3475, "step": 15615 }, { "epoch": 1.2650680492546986, "grad_norm": 0.03705955669283867, "learning_rate": 0.00015194203159458122, "loss": 0.2772, "step": 15616 }, { "epoch": 1.265149060272197, "grad_norm": 0.040319714695215225, "learning_rate": 0.0001519375309419866, "loss": 0.3251, "step": 15617 }, { "epoch": 1.2652300712896953, "grad_norm": 0.04241141304373741, "learning_rate": 0.00015193303028939196, "loss": 0.2897, "step": 15618 }, { "epoch": 1.2653110823071938, "grad_norm": 0.03959643840789795, "learning_rate": 0.00015192852963679732, "loss": 0.2967, "step": 15619 }, { "epoch": 1.2653920933246923, "grad_norm": 0.03806019946932793, "learning_rate": 0.00015192402898420274, "loss": 0.2612, "step": 15620 }, { "epoch": 1.2654731043421905, "grad_norm": 0.03872664272785187, "learning_rate": 0.0001519195283316081, "loss": 0.2668, "step": 15621 }, { "epoch": 1.265554115359689, "grad_norm": 0.04202438145875931, "learning_rate": 0.00015191502767901346, "loss": 0.3002, "step": 15622 }, { "epoch": 1.2656351263771872, "grad_norm": 0.03957134857773781, "learning_rate": 0.00015191052702641884, "loss": 0.3285, "step": 15623 }, { "epoch": 1.2657161373946857, "grad_norm": 0.04093240946531296, "learning_rate": 0.0001519060263738242, "loss": 0.3002, "step": 15624 }, { "epoch": 1.265797148412184, "grad_norm": 0.043608762323856354, "learning_rate": 0.00015190152572122956, "loss": 0.2952, "step": 15625 }, { "epoch": 1.2658781594296824, "grad_norm": 0.045039575546979904, "learning_rate": 0.00015189702506863498, "loss": 0.3193, "step": 15626 }, { "epoch": 1.2659591704471809, "grad_norm": 0.04398264363408089, "learning_rate": 0.00015189252441604034, "loss": 0.3487, "step": 15627 }, { "epoch": 1.2660401814646791, "grad_norm": 0.04162249714136124, "learning_rate": 0.0001518880237634457, "loss": 0.3025, "step": 15628 }, { "epoch": 1.2661211924821776, "grad_norm": 0.03937450423836708, "learning_rate": 0.00015188352311085109, "loss": 0.3137, "step": 15629 }, { "epoch": 1.266202203499676, "grad_norm": 0.046762723475694656, "learning_rate": 0.00015187902245825645, "loss": 0.2971, "step": 15630 }, { "epoch": 1.2662832145171743, "grad_norm": 0.03874950483441353, "learning_rate": 0.0001518745218056618, "loss": 0.3116, "step": 15631 }, { "epoch": 1.2663642255346728, "grad_norm": 0.04077541083097458, "learning_rate": 0.00015187002115306722, "loss": 0.2905, "step": 15632 }, { "epoch": 1.2664452365521712, "grad_norm": 0.04175429791212082, "learning_rate": 0.00015186552050047258, "loss": 0.2429, "step": 15633 }, { "epoch": 1.2665262475696695, "grad_norm": 0.04630570858716965, "learning_rate": 0.00015186101984787794, "loss": 0.3291, "step": 15634 }, { "epoch": 1.2666072585871677, "grad_norm": 0.04232180118560791, "learning_rate": 0.00015185651919528333, "loss": 0.2794, "step": 15635 }, { "epoch": 1.2666882696046662, "grad_norm": 0.043028462678194046, "learning_rate": 0.0001518520185426887, "loss": 0.324, "step": 15636 }, { "epoch": 1.2667692806221647, "grad_norm": 0.04590842127799988, "learning_rate": 0.00015184751789009405, "loss": 0.319, "step": 15637 }, { "epoch": 1.266850291639663, "grad_norm": 0.040628597140312195, "learning_rate": 0.00015184301723749946, "loss": 0.2947, "step": 15638 }, { "epoch": 1.2669313026571614, "grad_norm": 0.03495374694466591, "learning_rate": 0.00015183851658490482, "loss": 0.2917, "step": 15639 }, { "epoch": 1.2670123136746598, "grad_norm": 0.04080813005566597, "learning_rate": 0.00015183401593231018, "loss": 0.2997, "step": 15640 }, { "epoch": 1.267093324692158, "grad_norm": 0.038417864590883255, "learning_rate": 0.00015182951527971557, "loss": 0.2355, "step": 15641 }, { "epoch": 1.2671743357096565, "grad_norm": 0.04798012971878052, "learning_rate": 0.00015182501462712093, "loss": 0.2792, "step": 15642 }, { "epoch": 1.267255346727155, "grad_norm": 0.045979537069797516, "learning_rate": 0.00015182051397452632, "loss": 0.2972, "step": 15643 }, { "epoch": 1.2673363577446533, "grad_norm": 0.05009061470627785, "learning_rate": 0.0001518160133219317, "loss": 0.3475, "step": 15644 }, { "epoch": 1.2674173687621517, "grad_norm": 0.046117499470710754, "learning_rate": 0.00015181151266933706, "loss": 0.289, "step": 15645 }, { "epoch": 1.26749837977965, "grad_norm": 0.04126918315887451, "learning_rate": 0.00015180701201674242, "loss": 0.2819, "step": 15646 }, { "epoch": 1.2675793907971484, "grad_norm": 0.041201528161764145, "learning_rate": 0.0001518025113641478, "loss": 0.3306, "step": 15647 }, { "epoch": 1.2676604018146467, "grad_norm": 0.04013405367732048, "learning_rate": 0.00015179801071155317, "loss": 0.2826, "step": 15648 }, { "epoch": 1.2677414128321451, "grad_norm": 0.03912288695573807, "learning_rate": 0.00015179351005895856, "loss": 0.3013, "step": 15649 }, { "epoch": 1.2678224238496436, "grad_norm": 0.04670781269669533, "learning_rate": 0.00015178900940636395, "loss": 0.2704, "step": 15650 }, { "epoch": 1.2679034348671419, "grad_norm": 0.038590800017118454, "learning_rate": 0.0001517845087537693, "loss": 0.2888, "step": 15651 }, { "epoch": 1.2679844458846403, "grad_norm": 0.04906099662184715, "learning_rate": 0.00015178000810117467, "loss": 0.3401, "step": 15652 }, { "epoch": 1.2680654569021388, "grad_norm": 0.04225248098373413, "learning_rate": 0.00015177550744858005, "loss": 0.2836, "step": 15653 }, { "epoch": 1.268146467919637, "grad_norm": 0.038599442690610886, "learning_rate": 0.0001517710067959854, "loss": 0.2802, "step": 15654 }, { "epoch": 1.2682274789371355, "grad_norm": 0.038595061749219894, "learning_rate": 0.0001517665061433908, "loss": 0.2787, "step": 15655 }, { "epoch": 1.268308489954634, "grad_norm": 0.038777854293584824, "learning_rate": 0.0001517620054907962, "loss": 0.3059, "step": 15656 }, { "epoch": 1.2683895009721322, "grad_norm": 0.04319249466061592, "learning_rate": 0.00015175750483820155, "loss": 0.3212, "step": 15657 }, { "epoch": 1.2684705119896305, "grad_norm": 0.04285728558897972, "learning_rate": 0.0001517530041856069, "loss": 0.3142, "step": 15658 }, { "epoch": 1.268551523007129, "grad_norm": 0.045756593346595764, "learning_rate": 0.0001517485035330123, "loss": 0.3022, "step": 15659 }, { "epoch": 1.2686325340246274, "grad_norm": 0.03946535289287567, "learning_rate": 0.00015174400288041765, "loss": 0.28, "step": 15660 }, { "epoch": 1.2687135450421256, "grad_norm": 0.04428525269031525, "learning_rate": 0.00015173950222782304, "loss": 0.2901, "step": 15661 }, { "epoch": 1.268794556059624, "grad_norm": 0.04757576063275337, "learning_rate": 0.00015173500157522843, "loss": 0.3419, "step": 15662 }, { "epoch": 1.2688755670771226, "grad_norm": 0.043652888387441635, "learning_rate": 0.0001517305009226338, "loss": 0.3208, "step": 15663 }, { "epoch": 1.2689565780946208, "grad_norm": 0.044308267533779144, "learning_rate": 0.00015172600027003915, "loss": 0.29, "step": 15664 }, { "epoch": 1.2690375891121193, "grad_norm": 0.04619684815406799, "learning_rate": 0.00015172149961744454, "loss": 0.3121, "step": 15665 }, { "epoch": 1.2691186001296177, "grad_norm": 0.04882878437638283, "learning_rate": 0.00015171699896484992, "loss": 0.3082, "step": 15666 }, { "epoch": 1.269199611147116, "grad_norm": 0.04964202269911766, "learning_rate": 0.00015171249831225528, "loss": 0.3177, "step": 15667 }, { "epoch": 1.2692806221646145, "grad_norm": 0.05015742406249046, "learning_rate": 0.00015170799765966067, "loss": 0.3823, "step": 15668 }, { "epoch": 1.2693616331821127, "grad_norm": 0.047566697001457214, "learning_rate": 0.00015170349700706603, "loss": 0.3273, "step": 15669 }, { "epoch": 1.2694426441996112, "grad_norm": 0.04747958853840828, "learning_rate": 0.0001516989963544714, "loss": 0.2991, "step": 15670 }, { "epoch": 1.2695236552171094, "grad_norm": 0.04064423590898514, "learning_rate": 0.00015169449570187678, "loss": 0.2945, "step": 15671 }, { "epoch": 1.2696046662346079, "grad_norm": 0.04318852722644806, "learning_rate": 0.00015168999504928216, "loss": 0.3127, "step": 15672 }, { "epoch": 1.2696856772521063, "grad_norm": 0.040038831532001495, "learning_rate": 0.00015168549439668752, "loss": 0.2837, "step": 15673 }, { "epoch": 1.2697666882696046, "grad_norm": 0.04599372297525406, "learning_rate": 0.0001516809937440929, "loss": 0.3082, "step": 15674 }, { "epoch": 1.269847699287103, "grad_norm": 0.047460220754146576, "learning_rate": 0.00015167649309149827, "loss": 0.3208, "step": 15675 }, { "epoch": 1.2699287103046015, "grad_norm": 0.03879735246300697, "learning_rate": 0.00015167199243890363, "loss": 0.2752, "step": 15676 }, { "epoch": 1.2700097213220998, "grad_norm": 0.04079611599445343, "learning_rate": 0.00015166749178630902, "loss": 0.2928, "step": 15677 }, { "epoch": 1.2700907323395982, "grad_norm": 0.049497924745082855, "learning_rate": 0.0001516629911337144, "loss": 0.3384, "step": 15678 }, { "epoch": 1.2701717433570965, "grad_norm": 0.03955889493227005, "learning_rate": 0.00015165849048111977, "loss": 0.285, "step": 15679 }, { "epoch": 1.270252754374595, "grad_norm": 0.040690623223781586, "learning_rate": 0.00015165398982852515, "loss": 0.3007, "step": 15680 }, { "epoch": 1.2703337653920932, "grad_norm": 0.047002773731946945, "learning_rate": 0.00015164948917593051, "loss": 0.3075, "step": 15681 }, { "epoch": 1.2704147764095917, "grad_norm": 0.04874710366129875, "learning_rate": 0.00015164498852333587, "loss": 0.324, "step": 15682 }, { "epoch": 1.2704957874270901, "grad_norm": 0.04769693315029144, "learning_rate": 0.00015164048787074126, "loss": 0.3241, "step": 15683 }, { "epoch": 1.2705767984445884, "grad_norm": 0.043867453932762146, "learning_rate": 0.00015163598721814665, "loss": 0.3269, "step": 15684 }, { "epoch": 1.2706578094620868, "grad_norm": 0.04034234583377838, "learning_rate": 0.000151631486565552, "loss": 0.2962, "step": 15685 }, { "epoch": 1.2707388204795853, "grad_norm": 0.04986928403377533, "learning_rate": 0.0001516269859129574, "loss": 0.3145, "step": 15686 }, { "epoch": 1.2708198314970836, "grad_norm": 0.04775730147957802, "learning_rate": 0.00015162248526036276, "loss": 0.3141, "step": 15687 }, { "epoch": 1.270900842514582, "grad_norm": 0.04053473100066185, "learning_rate": 0.00015161798460776812, "loss": 0.2885, "step": 15688 }, { "epoch": 1.2709818535320805, "grad_norm": 0.042710866779088974, "learning_rate": 0.0001516134839551735, "loss": 0.3097, "step": 15689 }, { "epoch": 1.2710628645495787, "grad_norm": 0.03983086347579956, "learning_rate": 0.0001516089833025789, "loss": 0.3149, "step": 15690 }, { "epoch": 1.2711438755670772, "grad_norm": 0.04158749803900719, "learning_rate": 0.00015160448264998425, "loss": 0.2694, "step": 15691 }, { "epoch": 1.2712248865845754, "grad_norm": 0.04513084515929222, "learning_rate": 0.00015159998199738964, "loss": 0.2985, "step": 15692 }, { "epoch": 1.271305897602074, "grad_norm": 0.053826138377189636, "learning_rate": 0.000151595481344795, "loss": 0.3506, "step": 15693 }, { "epoch": 1.2713869086195722, "grad_norm": 0.04571051150560379, "learning_rate": 0.00015159098069220036, "loss": 0.3144, "step": 15694 }, { "epoch": 1.2714679196370706, "grad_norm": 0.04284343495965004, "learning_rate": 0.00015158648003960577, "loss": 0.3118, "step": 15695 }, { "epoch": 1.271548930654569, "grad_norm": 0.04464377835392952, "learning_rate": 0.00015158197938701113, "loss": 0.3267, "step": 15696 }, { "epoch": 1.2716299416720673, "grad_norm": 0.03773884475231171, "learning_rate": 0.0001515774787344165, "loss": 0.2651, "step": 15697 }, { "epoch": 1.2717109526895658, "grad_norm": 0.051023904234170914, "learning_rate": 0.00015157297808182188, "loss": 0.3329, "step": 15698 }, { "epoch": 1.2717919637070643, "grad_norm": 0.040893442928791046, "learning_rate": 0.00015156847742922724, "loss": 0.2699, "step": 15699 }, { "epoch": 1.2718729747245625, "grad_norm": 0.06270833313465118, "learning_rate": 0.0001515639767766326, "loss": 0.3397, "step": 15700 }, { "epoch": 1.271953985742061, "grad_norm": 0.043569017201662064, "learning_rate": 0.000151559476124038, "loss": 0.3259, "step": 15701 }, { "epoch": 1.2720349967595592, "grad_norm": 0.04084454104304314, "learning_rate": 0.00015155497547144337, "loss": 0.3232, "step": 15702 }, { "epoch": 1.2721160077770577, "grad_norm": 0.04653387516736984, "learning_rate": 0.00015155047481884873, "loss": 0.3565, "step": 15703 }, { "epoch": 1.272197018794556, "grad_norm": 0.03843921422958374, "learning_rate": 0.00015154597416625412, "loss": 0.3202, "step": 15704 }, { "epoch": 1.2722780298120544, "grad_norm": 0.04410295560956001, "learning_rate": 0.00015154147351365948, "loss": 0.2891, "step": 15705 }, { "epoch": 1.2723590408295529, "grad_norm": 0.04347636550664902, "learning_rate": 0.00015153697286106484, "loss": 0.3228, "step": 15706 }, { "epoch": 1.2724400518470511, "grad_norm": 0.043565716594457626, "learning_rate": 0.00015153247220847025, "loss": 0.3351, "step": 15707 }, { "epoch": 1.2725210628645496, "grad_norm": 0.040979839861392975, "learning_rate": 0.00015152797155587561, "loss": 0.3374, "step": 15708 }, { "epoch": 1.272602073882048, "grad_norm": 0.04642684757709503, "learning_rate": 0.00015152347090328097, "loss": 0.3309, "step": 15709 }, { "epoch": 1.2726830848995463, "grad_norm": 0.04228156432509422, "learning_rate": 0.00015151897025068636, "loss": 0.3325, "step": 15710 }, { "epoch": 1.2727640959170448, "grad_norm": 0.038930002599954605, "learning_rate": 0.00015151446959809172, "loss": 0.3278, "step": 15711 }, { "epoch": 1.2728451069345432, "grad_norm": 0.04951472580432892, "learning_rate": 0.00015150996894549708, "loss": 0.3123, "step": 15712 }, { "epoch": 1.2729261179520415, "grad_norm": 0.043662507086992264, "learning_rate": 0.0001515054682929025, "loss": 0.2975, "step": 15713 }, { "epoch": 1.27300712896954, "grad_norm": 0.04310872405767441, "learning_rate": 0.00015150096764030786, "loss": 0.2943, "step": 15714 }, { "epoch": 1.2730881399870382, "grad_norm": 0.04810471460223198, "learning_rate": 0.00015149646698771322, "loss": 0.2975, "step": 15715 }, { "epoch": 1.2731691510045366, "grad_norm": 0.043445102870464325, "learning_rate": 0.0001514919663351186, "loss": 0.2829, "step": 15716 }, { "epoch": 1.273250162022035, "grad_norm": 0.03980591893196106, "learning_rate": 0.00015148746568252396, "loss": 0.2977, "step": 15717 }, { "epoch": 1.2733311730395334, "grad_norm": 0.04535724222660065, "learning_rate": 0.00015148296502992935, "loss": 0.332, "step": 15718 }, { "epoch": 1.2734121840570318, "grad_norm": 0.04394984245300293, "learning_rate": 0.00015147846437733474, "loss": 0.2866, "step": 15719 }, { "epoch": 1.27349319507453, "grad_norm": 0.04253106936812401, "learning_rate": 0.0001514739637247401, "loss": 0.3218, "step": 15720 }, { "epoch": 1.2735742060920285, "grad_norm": 0.04656419903039932, "learning_rate": 0.00015146946307214546, "loss": 0.28, "step": 15721 }, { "epoch": 1.273655217109527, "grad_norm": 0.038466498255729675, "learning_rate": 0.00015146496241955085, "loss": 0.2783, "step": 15722 }, { "epoch": 1.2737362281270252, "grad_norm": 0.03624296188354492, "learning_rate": 0.0001514604617669562, "loss": 0.2554, "step": 15723 }, { "epoch": 1.2738172391445237, "grad_norm": 0.048857830464839935, "learning_rate": 0.0001514559611143616, "loss": 0.3126, "step": 15724 }, { "epoch": 1.273898250162022, "grad_norm": 0.04408116638660431, "learning_rate": 0.00015145146046176698, "loss": 0.2878, "step": 15725 }, { "epoch": 1.2739792611795204, "grad_norm": 0.04815554991364479, "learning_rate": 0.00015144695980917234, "loss": 0.3494, "step": 15726 }, { "epoch": 1.2740602721970187, "grad_norm": 0.04634399339556694, "learning_rate": 0.0001514424591565777, "loss": 0.3288, "step": 15727 }, { "epoch": 1.2741412832145171, "grad_norm": 0.044304654002189636, "learning_rate": 0.0001514379585039831, "loss": 0.2857, "step": 15728 }, { "epoch": 1.2742222942320156, "grad_norm": 0.04345221072435379, "learning_rate": 0.00015143345785138845, "loss": 0.3198, "step": 15729 }, { "epoch": 1.2743033052495139, "grad_norm": 0.038415733724832535, "learning_rate": 0.00015142895719879383, "loss": 0.2673, "step": 15730 }, { "epoch": 1.2743843162670123, "grad_norm": 0.047189049422740936, "learning_rate": 0.00015142445654619922, "loss": 0.3257, "step": 15731 }, { "epoch": 1.2744653272845108, "grad_norm": 0.053995609283447266, "learning_rate": 0.00015141995589360458, "loss": 0.3678, "step": 15732 }, { "epoch": 1.274546338302009, "grad_norm": 0.04572221264243126, "learning_rate": 0.00015141545524100994, "loss": 0.2847, "step": 15733 }, { "epoch": 1.2746273493195075, "grad_norm": 0.03478986769914627, "learning_rate": 0.00015141095458841533, "loss": 0.248, "step": 15734 }, { "epoch": 1.274708360337006, "grad_norm": 0.04654289036989212, "learning_rate": 0.0001514064539358207, "loss": 0.3294, "step": 15735 }, { "epoch": 1.2747893713545042, "grad_norm": 0.05372928828001022, "learning_rate": 0.00015140195328322608, "loss": 0.3601, "step": 15736 }, { "epoch": 1.2748703823720027, "grad_norm": 0.044612761586904526, "learning_rate": 0.00015139745263063146, "loss": 0.2979, "step": 15737 }, { "epoch": 1.274951393389501, "grad_norm": 0.04504215344786644, "learning_rate": 0.00015139295197803682, "loss": 0.2803, "step": 15738 }, { "epoch": 1.2750324044069994, "grad_norm": 0.037995193153619766, "learning_rate": 0.00015138845132544218, "loss": 0.296, "step": 15739 }, { "epoch": 1.2751134154244976, "grad_norm": 0.042726241052150726, "learning_rate": 0.00015138395067284757, "loss": 0.2689, "step": 15740 }, { "epoch": 1.275194426441996, "grad_norm": 0.04570639133453369, "learning_rate": 0.00015137945002025293, "loss": 0.2965, "step": 15741 }, { "epoch": 1.2752754374594946, "grad_norm": 0.04122673347592354, "learning_rate": 0.00015137494936765832, "loss": 0.305, "step": 15742 }, { "epoch": 1.2753564484769928, "grad_norm": 0.058693546801805496, "learning_rate": 0.0001513704487150637, "loss": 0.3323, "step": 15743 }, { "epoch": 1.2754374594944913, "grad_norm": 0.03962014988064766, "learning_rate": 0.00015136594806246906, "loss": 0.2568, "step": 15744 }, { "epoch": 1.2755184705119897, "grad_norm": 0.04162263125181198, "learning_rate": 0.00015136144740987442, "loss": 0.2972, "step": 15745 }, { "epoch": 1.275599481529488, "grad_norm": 0.0414293073117733, "learning_rate": 0.0001513569467572798, "loss": 0.2943, "step": 15746 }, { "epoch": 1.2756804925469865, "grad_norm": 0.036952096968889236, "learning_rate": 0.0001513524461046852, "loss": 0.2626, "step": 15747 }, { "epoch": 1.2757615035644847, "grad_norm": 0.03710842505097389, "learning_rate": 0.00015134794545209056, "loss": 0.286, "step": 15748 }, { "epoch": 1.2758425145819832, "grad_norm": 0.04875032976269722, "learning_rate": 0.00015134344479949595, "loss": 0.3319, "step": 15749 }, { "epoch": 1.2759235255994814, "grad_norm": 0.03967723250389099, "learning_rate": 0.0001513389441469013, "loss": 0.2607, "step": 15750 }, { "epoch": 1.2760045366169799, "grad_norm": 0.04703553766012192, "learning_rate": 0.00015133444349430667, "loss": 0.3261, "step": 15751 }, { "epoch": 1.2760855476344783, "grad_norm": 0.04855358600616455, "learning_rate": 0.00015132994284171205, "loss": 0.3398, "step": 15752 }, { "epoch": 1.2761665586519766, "grad_norm": 0.038160208612680435, "learning_rate": 0.00015132544218911744, "loss": 0.284, "step": 15753 }, { "epoch": 1.276247569669475, "grad_norm": 0.05315076559782028, "learning_rate": 0.0001513209415365228, "loss": 0.2767, "step": 15754 }, { "epoch": 1.2763285806869735, "grad_norm": 0.05116632208228111, "learning_rate": 0.0001513164408839282, "loss": 0.319, "step": 15755 }, { "epoch": 1.2764095917044718, "grad_norm": 0.041848476976156235, "learning_rate": 0.00015131194023133355, "loss": 0.2921, "step": 15756 }, { "epoch": 1.2764906027219702, "grad_norm": 0.03994974493980408, "learning_rate": 0.0001513074395787389, "loss": 0.2873, "step": 15757 }, { "epoch": 1.2765716137394687, "grad_norm": 0.04145783931016922, "learning_rate": 0.0001513029389261443, "loss": 0.263, "step": 15758 }, { "epoch": 1.276652624756967, "grad_norm": 0.045399490743875504, "learning_rate": 0.00015129843827354968, "loss": 0.2765, "step": 15759 }, { "epoch": 1.2767336357744652, "grad_norm": 0.041569847613573074, "learning_rate": 0.00015129393762095504, "loss": 0.3211, "step": 15760 }, { "epoch": 1.2768146467919637, "grad_norm": 0.04491395503282547, "learning_rate": 0.00015128943696836043, "loss": 0.2978, "step": 15761 }, { "epoch": 1.2768956578094621, "grad_norm": 0.04015737026929855, "learning_rate": 0.0001512849363157658, "loss": 0.2363, "step": 15762 }, { "epoch": 1.2769766688269604, "grad_norm": 0.05360627546906471, "learning_rate": 0.00015128043566317115, "loss": 0.3579, "step": 15763 }, { "epoch": 1.2770576798444588, "grad_norm": 0.04414328187704086, "learning_rate": 0.00015127593501057654, "loss": 0.3453, "step": 15764 }, { "epoch": 1.2771386908619573, "grad_norm": 0.04586583375930786, "learning_rate": 0.00015127143435798192, "loss": 0.3126, "step": 15765 }, { "epoch": 1.2772197018794555, "grad_norm": 0.04022606834769249, "learning_rate": 0.00015126693370538728, "loss": 0.3004, "step": 15766 }, { "epoch": 1.277300712896954, "grad_norm": 0.04480239376425743, "learning_rate": 0.00015126243305279267, "loss": 0.2437, "step": 15767 }, { "epoch": 1.2773817239144525, "grad_norm": 0.048118509352207184, "learning_rate": 0.00015125793240019803, "loss": 0.358, "step": 15768 }, { "epoch": 1.2774627349319507, "grad_norm": 0.043030571192502975, "learning_rate": 0.0001512534317476034, "loss": 0.2872, "step": 15769 }, { "epoch": 1.2775437459494492, "grad_norm": 0.04102887958288193, "learning_rate": 0.0001512489310950088, "loss": 0.3027, "step": 15770 }, { "epoch": 1.2776247569669474, "grad_norm": 0.04139826446771622, "learning_rate": 0.00015124443044241417, "loss": 0.2893, "step": 15771 }, { "epoch": 1.277705767984446, "grad_norm": 0.045518554747104645, "learning_rate": 0.00015123992978981953, "loss": 0.29, "step": 15772 }, { "epoch": 1.2777867790019442, "grad_norm": 0.04312366247177124, "learning_rate": 0.0001512354291372249, "loss": 0.3286, "step": 15773 }, { "epoch": 1.2778677900194426, "grad_norm": 0.05005163326859474, "learning_rate": 0.00015123092848463027, "loss": 0.2689, "step": 15774 }, { "epoch": 1.277948801036941, "grad_norm": 0.04603840783238411, "learning_rate": 0.00015122642783203563, "loss": 0.3112, "step": 15775 }, { "epoch": 1.2780298120544393, "grad_norm": 0.03954975679516792, "learning_rate": 0.00015122192717944105, "loss": 0.2838, "step": 15776 }, { "epoch": 1.2781108230719378, "grad_norm": 0.04375135526061058, "learning_rate": 0.0001512174265268464, "loss": 0.3219, "step": 15777 }, { "epoch": 1.2781918340894363, "grad_norm": 0.048291079699993134, "learning_rate": 0.00015121292587425177, "loss": 0.3121, "step": 15778 }, { "epoch": 1.2782728451069345, "grad_norm": 0.043258678168058395, "learning_rate": 0.00015120842522165715, "loss": 0.2719, "step": 15779 }, { "epoch": 1.278353856124433, "grad_norm": 0.0422712042927742, "learning_rate": 0.00015120392456906251, "loss": 0.3121, "step": 15780 }, { "epoch": 1.2784348671419314, "grad_norm": 0.04089551419019699, "learning_rate": 0.00015119942391646787, "loss": 0.2963, "step": 15781 }, { "epoch": 1.2785158781594297, "grad_norm": 0.051967184990644455, "learning_rate": 0.0001511949232638733, "loss": 0.3152, "step": 15782 }, { "epoch": 1.278596889176928, "grad_norm": 0.038720134645700455, "learning_rate": 0.00015119042261127865, "loss": 0.2836, "step": 15783 }, { "epoch": 1.2786779001944264, "grad_norm": 0.04551508277654648, "learning_rate": 0.000151185921958684, "loss": 0.3157, "step": 15784 }, { "epoch": 1.2787589112119249, "grad_norm": 0.045961856842041016, "learning_rate": 0.0001511814213060894, "loss": 0.3197, "step": 15785 }, { "epoch": 1.278839922229423, "grad_norm": 0.04323948547244072, "learning_rate": 0.00015117692065349476, "loss": 0.3239, "step": 15786 }, { "epoch": 1.2789209332469216, "grad_norm": 0.04054180532693863, "learning_rate": 0.00015117242000090012, "loss": 0.2835, "step": 15787 }, { "epoch": 1.27900194426442, "grad_norm": 0.04344893619418144, "learning_rate": 0.00015116791934830553, "loss": 0.3076, "step": 15788 }, { "epoch": 1.2790829552819183, "grad_norm": 0.042704347521066666, "learning_rate": 0.0001511634186957109, "loss": 0.3257, "step": 15789 }, { "epoch": 1.2791639662994168, "grad_norm": 0.03959158435463905, "learning_rate": 0.00015115891804311625, "loss": 0.2839, "step": 15790 }, { "epoch": 1.2792449773169152, "grad_norm": 0.04714252054691315, "learning_rate": 0.00015115441739052164, "loss": 0.2966, "step": 15791 }, { "epoch": 1.2793259883344135, "grad_norm": 0.04205985367298126, "learning_rate": 0.000151149916737927, "loss": 0.2981, "step": 15792 }, { "epoch": 1.279406999351912, "grad_norm": 0.05052034929394722, "learning_rate": 0.00015114541608533236, "loss": 0.3106, "step": 15793 }, { "epoch": 1.2794880103694102, "grad_norm": 0.04639451950788498, "learning_rate": 0.00015114091543273777, "loss": 0.3283, "step": 15794 }, { "epoch": 1.2795690213869086, "grad_norm": 0.04097670689225197, "learning_rate": 0.00015113641478014313, "loss": 0.2752, "step": 15795 }, { "epoch": 1.279650032404407, "grad_norm": 0.0460551492869854, "learning_rate": 0.0001511319141275485, "loss": 0.2972, "step": 15796 }, { "epoch": 1.2797310434219054, "grad_norm": 0.03947293758392334, "learning_rate": 0.00015112741347495388, "loss": 0.331, "step": 15797 }, { "epoch": 1.2798120544394038, "grad_norm": 0.039245136082172394, "learning_rate": 0.00015112291282235924, "loss": 0.2845, "step": 15798 }, { "epoch": 1.279893065456902, "grad_norm": 0.058283090591430664, "learning_rate": 0.00015111841216976463, "loss": 0.3352, "step": 15799 }, { "epoch": 1.2799740764744005, "grad_norm": 0.04156630113720894, "learning_rate": 0.00015111391151717, "loss": 0.3177, "step": 15800 }, { "epoch": 1.280055087491899, "grad_norm": 0.04782935976982117, "learning_rate": 0.00015110941086457537, "loss": 0.3109, "step": 15801 }, { "epoch": 1.2801360985093972, "grad_norm": 0.047765400260686874, "learning_rate": 0.00015110491021198073, "loss": 0.3176, "step": 15802 }, { "epoch": 1.2802171095268957, "grad_norm": 0.045515209436416626, "learning_rate": 0.00015110040955938612, "loss": 0.3055, "step": 15803 }, { "epoch": 1.280298120544394, "grad_norm": 0.041018128395080566, "learning_rate": 0.00015109590890679148, "loss": 0.311, "step": 15804 }, { "epoch": 1.2803791315618924, "grad_norm": 0.037294745445251465, "learning_rate": 0.00015109140825419687, "loss": 0.2812, "step": 15805 }, { "epoch": 1.2804601425793907, "grad_norm": 0.0468532033264637, "learning_rate": 0.00015108690760160225, "loss": 0.3211, "step": 15806 }, { "epoch": 1.2805411535968891, "grad_norm": 0.039705317467451096, "learning_rate": 0.00015108240694900761, "loss": 0.2898, "step": 15807 }, { "epoch": 1.2806221646143876, "grad_norm": 0.03788604214787483, "learning_rate": 0.00015107790629641297, "loss": 0.2577, "step": 15808 }, { "epoch": 1.2807031756318858, "grad_norm": 0.04811332747340202, "learning_rate": 0.00015107340564381836, "loss": 0.3519, "step": 15809 }, { "epoch": 1.2807841866493843, "grad_norm": 0.03987358510494232, "learning_rate": 0.00015106890499122372, "loss": 0.2791, "step": 15810 }, { "epoch": 1.2808651976668828, "grad_norm": 0.050667937844991684, "learning_rate": 0.0001510644043386291, "loss": 0.3111, "step": 15811 }, { "epoch": 1.280946208684381, "grad_norm": 0.04332325607538223, "learning_rate": 0.0001510599036860345, "loss": 0.2667, "step": 15812 }, { "epoch": 1.2810272197018795, "grad_norm": 0.041449159383773804, "learning_rate": 0.00015105540303343986, "loss": 0.2847, "step": 15813 }, { "epoch": 1.281108230719378, "grad_norm": 0.03840943053364754, "learning_rate": 0.00015105090238084522, "loss": 0.2873, "step": 15814 }, { "epoch": 1.2811892417368762, "grad_norm": 0.043712034821510315, "learning_rate": 0.0001510464017282506, "loss": 0.3275, "step": 15815 }, { "epoch": 1.2812702527543747, "grad_norm": 0.062320683151483536, "learning_rate": 0.00015104190107565596, "loss": 0.3337, "step": 15816 }, { "epoch": 1.281351263771873, "grad_norm": 0.040366072207689285, "learning_rate": 0.00015103740042306135, "loss": 0.2903, "step": 15817 }, { "epoch": 1.2814322747893714, "grad_norm": 0.05467378348112106, "learning_rate": 0.00015103289977046674, "loss": 0.3829, "step": 15818 }, { "epoch": 1.2815132858068696, "grad_norm": 0.04404067620635033, "learning_rate": 0.0001510283991178721, "loss": 0.3029, "step": 15819 }, { "epoch": 1.281594296824368, "grad_norm": 0.0416707769036293, "learning_rate": 0.00015102389846527746, "loss": 0.2667, "step": 15820 }, { "epoch": 1.2816753078418666, "grad_norm": 0.051012761890888214, "learning_rate": 0.00015101939781268285, "loss": 0.3023, "step": 15821 }, { "epoch": 1.2817563188593648, "grad_norm": 0.04759092628955841, "learning_rate": 0.0001510148971600882, "loss": 0.2905, "step": 15822 }, { "epoch": 1.2818373298768633, "grad_norm": 0.04226645827293396, "learning_rate": 0.0001510103965074936, "loss": 0.3202, "step": 15823 }, { "epoch": 1.2819183408943617, "grad_norm": 0.04447139799594879, "learning_rate": 0.00015100589585489898, "loss": 0.349, "step": 15824 }, { "epoch": 1.28199935191186, "grad_norm": 0.04605408385396004, "learning_rate": 0.00015100139520230434, "loss": 0.311, "step": 15825 }, { "epoch": 1.2820803629293585, "grad_norm": 0.05514140799641609, "learning_rate": 0.0001509968945497097, "loss": 0.3447, "step": 15826 }, { "epoch": 1.2821613739468567, "grad_norm": 0.04618806391954422, "learning_rate": 0.0001509923938971151, "loss": 0.3194, "step": 15827 }, { "epoch": 1.2822423849643552, "grad_norm": 0.04209521412849426, "learning_rate": 0.00015098789324452047, "loss": 0.2996, "step": 15828 }, { "epoch": 1.2823233959818534, "grad_norm": 0.04071464017033577, "learning_rate": 0.00015098339259192583, "loss": 0.291, "step": 15829 }, { "epoch": 1.2824044069993519, "grad_norm": 0.048178721219301224, "learning_rate": 0.00015097889193933122, "loss": 0.3175, "step": 15830 }, { "epoch": 1.2824854180168503, "grad_norm": 0.04599504545331001, "learning_rate": 0.00015097439128673658, "loss": 0.2919, "step": 15831 }, { "epoch": 1.2825664290343486, "grad_norm": 0.04587290808558464, "learning_rate": 0.00015096989063414194, "loss": 0.3469, "step": 15832 }, { "epoch": 1.282647440051847, "grad_norm": 0.04044627398252487, "learning_rate": 0.00015096538998154733, "loss": 0.2704, "step": 15833 }, { "epoch": 1.2827284510693455, "grad_norm": 0.0455966591835022, "learning_rate": 0.00015096088932895272, "loss": 0.3059, "step": 15834 }, { "epoch": 1.2828094620868438, "grad_norm": 0.04464682191610336, "learning_rate": 0.00015095638867635808, "loss": 0.2863, "step": 15835 }, { "epoch": 1.2828904731043422, "grad_norm": 0.04729429632425308, "learning_rate": 0.00015095188802376346, "loss": 0.3079, "step": 15836 }, { "epoch": 1.2829714841218407, "grad_norm": 0.04429822415113449, "learning_rate": 0.00015094738737116882, "loss": 0.2738, "step": 15837 }, { "epoch": 1.283052495139339, "grad_norm": 0.04838932305574417, "learning_rate": 0.00015094288671857418, "loss": 0.3454, "step": 15838 }, { "epoch": 1.2831335061568374, "grad_norm": 0.0450855977833271, "learning_rate": 0.00015093838606597957, "loss": 0.2738, "step": 15839 }, { "epoch": 1.2832145171743357, "grad_norm": 0.044912971556186676, "learning_rate": 0.00015093388541338496, "loss": 0.3287, "step": 15840 }, { "epoch": 1.2832955281918341, "grad_norm": 0.05100622400641441, "learning_rate": 0.00015092938476079032, "loss": 0.337, "step": 15841 }, { "epoch": 1.2833765392093324, "grad_norm": 0.043419577181339264, "learning_rate": 0.0001509248841081957, "loss": 0.2982, "step": 15842 }, { "epoch": 1.2834575502268308, "grad_norm": 0.04492282122373581, "learning_rate": 0.00015092038345560106, "loss": 0.2882, "step": 15843 }, { "epoch": 1.2835385612443293, "grad_norm": 0.04124194756150246, "learning_rate": 0.00015091588280300642, "loss": 0.2766, "step": 15844 }, { "epoch": 1.2836195722618275, "grad_norm": 0.04995120316743851, "learning_rate": 0.0001509113821504118, "loss": 0.3657, "step": 15845 }, { "epoch": 1.283700583279326, "grad_norm": 0.03788880258798599, "learning_rate": 0.0001509068814978172, "loss": 0.2765, "step": 15846 }, { "epoch": 1.2837815942968245, "grad_norm": 0.04601196572184563, "learning_rate": 0.00015090238084522256, "loss": 0.295, "step": 15847 }, { "epoch": 1.2838626053143227, "grad_norm": 0.03844109922647476, "learning_rate": 0.00015089788019262795, "loss": 0.2282, "step": 15848 }, { "epoch": 1.2839436163318212, "grad_norm": 0.03849412500858307, "learning_rate": 0.0001508933795400333, "loss": 0.2936, "step": 15849 }, { "epoch": 1.2840246273493194, "grad_norm": 0.03941682353615761, "learning_rate": 0.00015088887888743867, "loss": 0.2896, "step": 15850 }, { "epoch": 1.284105638366818, "grad_norm": 0.038460344076156616, "learning_rate": 0.00015088437823484408, "loss": 0.3021, "step": 15851 }, { "epoch": 1.2841866493843161, "grad_norm": 0.0497819148004055, "learning_rate": 0.00015087987758224944, "loss": 0.3235, "step": 15852 }, { "epoch": 1.2842676604018146, "grad_norm": 0.048957642167806625, "learning_rate": 0.0001508753769296548, "loss": 0.3431, "step": 15853 }, { "epoch": 1.284348671419313, "grad_norm": 0.051105812191963196, "learning_rate": 0.0001508708762770602, "loss": 0.3352, "step": 15854 }, { "epoch": 1.2844296824368113, "grad_norm": 0.04054543375968933, "learning_rate": 0.00015086637562446555, "loss": 0.2647, "step": 15855 }, { "epoch": 1.2845106934543098, "grad_norm": 0.03664393350481987, "learning_rate": 0.0001508618749718709, "loss": 0.2576, "step": 15856 }, { "epoch": 1.2845917044718083, "grad_norm": 0.04197325557470322, "learning_rate": 0.00015085737431927632, "loss": 0.2832, "step": 15857 }, { "epoch": 1.2846727154893065, "grad_norm": 0.04285228252410889, "learning_rate": 0.00015085287366668168, "loss": 0.2866, "step": 15858 }, { "epoch": 1.284753726506805, "grad_norm": 0.04115356504917145, "learning_rate": 0.00015084837301408704, "loss": 0.3039, "step": 15859 }, { "epoch": 1.2848347375243034, "grad_norm": 0.04148733615875244, "learning_rate": 0.00015084387236149243, "loss": 0.2662, "step": 15860 }, { "epoch": 1.2849157485418017, "grad_norm": 0.04504326730966568, "learning_rate": 0.0001508393717088978, "loss": 0.303, "step": 15861 }, { "epoch": 1.2849967595593, "grad_norm": 0.04083064943552017, "learning_rate": 0.00015083487105630315, "loss": 0.2733, "step": 15862 }, { "epoch": 1.2850777705767984, "grad_norm": 0.049322277307510376, "learning_rate": 0.00015083037040370856, "loss": 0.3014, "step": 15863 }, { "epoch": 1.2851587815942969, "grad_norm": 0.049569688737392426, "learning_rate": 0.00015082586975111392, "loss": 0.2967, "step": 15864 }, { "epoch": 1.285239792611795, "grad_norm": 0.0467229038476944, "learning_rate": 0.00015082136909851928, "loss": 0.3244, "step": 15865 }, { "epoch": 1.2853208036292936, "grad_norm": 0.04364887624979019, "learning_rate": 0.00015081686844592467, "loss": 0.2963, "step": 15866 }, { "epoch": 1.285401814646792, "grad_norm": 0.04608934745192528, "learning_rate": 0.00015081236779333003, "loss": 0.3102, "step": 15867 }, { "epoch": 1.2854828256642903, "grad_norm": 0.055260661989450455, "learning_rate": 0.0001508078671407354, "loss": 0.3336, "step": 15868 }, { "epoch": 1.2855638366817888, "grad_norm": 0.046738460659980774, "learning_rate": 0.0001508033664881408, "loss": 0.3034, "step": 15869 }, { "epoch": 1.2856448476992872, "grad_norm": 0.03964695334434509, "learning_rate": 0.00015079886583554617, "loss": 0.2938, "step": 15870 }, { "epoch": 1.2857258587167855, "grad_norm": 0.041268426924943924, "learning_rate": 0.00015079436518295153, "loss": 0.2884, "step": 15871 }, { "epoch": 1.285806869734284, "grad_norm": 0.04362771660089493, "learning_rate": 0.0001507898645303569, "loss": 0.2964, "step": 15872 }, { "epoch": 1.2858878807517822, "grad_norm": 0.04615508392453194, "learning_rate": 0.00015078536387776227, "loss": 0.2959, "step": 15873 }, { "epoch": 1.2859688917692806, "grad_norm": 0.040400903671979904, "learning_rate": 0.00015078086322516763, "loss": 0.2843, "step": 15874 }, { "epoch": 1.2860499027867789, "grad_norm": 0.05180887505412102, "learning_rate": 0.00015077636257257305, "loss": 0.3766, "step": 15875 }, { "epoch": 1.2861309138042774, "grad_norm": 0.049692705273628235, "learning_rate": 0.0001507718619199784, "loss": 0.3236, "step": 15876 }, { "epoch": 1.2862119248217758, "grad_norm": 0.044623278081417084, "learning_rate": 0.00015076736126738377, "loss": 0.3004, "step": 15877 }, { "epoch": 1.286292935839274, "grad_norm": 0.04762900993227959, "learning_rate": 0.00015076286061478915, "loss": 0.3315, "step": 15878 }, { "epoch": 1.2863739468567725, "grad_norm": 0.054607536643743515, "learning_rate": 0.00015075835996219451, "loss": 0.3493, "step": 15879 }, { "epoch": 1.286454957874271, "grad_norm": 0.044743411242961884, "learning_rate": 0.0001507538593095999, "loss": 0.2617, "step": 15880 }, { "epoch": 1.2865359688917692, "grad_norm": 0.046804748475551605, "learning_rate": 0.0001507493586570053, "loss": 0.3197, "step": 15881 }, { "epoch": 1.2866169799092677, "grad_norm": 0.03905186429619789, "learning_rate": 0.00015074485800441065, "loss": 0.2653, "step": 15882 }, { "epoch": 1.2866979909267662, "grad_norm": 0.04008020833134651, "learning_rate": 0.000150740357351816, "loss": 0.3009, "step": 15883 }, { "epoch": 1.2867790019442644, "grad_norm": 0.039590250700712204, "learning_rate": 0.0001507358566992214, "loss": 0.2968, "step": 15884 }, { "epoch": 1.2868600129617627, "grad_norm": 0.0442819744348526, "learning_rate": 0.00015073135604662676, "loss": 0.3143, "step": 15885 }, { "epoch": 1.2869410239792611, "grad_norm": 0.04467121511697769, "learning_rate": 0.00015072685539403214, "loss": 0.289, "step": 15886 }, { "epoch": 1.2870220349967596, "grad_norm": 0.045479509979486465, "learning_rate": 0.00015072235474143753, "loss": 0.3054, "step": 15887 }, { "epoch": 1.2871030460142578, "grad_norm": 0.043260522186756134, "learning_rate": 0.0001507178540888429, "loss": 0.3363, "step": 15888 }, { "epoch": 1.2871840570317563, "grad_norm": 0.0441555455327034, "learning_rate": 0.00015071335343624825, "loss": 0.295, "step": 15889 }, { "epoch": 1.2872650680492548, "grad_norm": 0.04531952738761902, "learning_rate": 0.00015070885278365364, "loss": 0.3141, "step": 15890 }, { "epoch": 1.287346079066753, "grad_norm": 0.03444262593984604, "learning_rate": 0.000150704352131059, "loss": 0.2738, "step": 15891 }, { "epoch": 1.2874270900842515, "grad_norm": 0.04534309729933739, "learning_rate": 0.00015069985147846438, "loss": 0.2784, "step": 15892 }, { "epoch": 1.28750810110175, "grad_norm": 0.042812563478946686, "learning_rate": 0.00015069535082586977, "loss": 0.2636, "step": 15893 }, { "epoch": 1.2875891121192482, "grad_norm": 0.05558900535106659, "learning_rate": 0.00015069085017327513, "loss": 0.3311, "step": 15894 }, { "epoch": 1.2876701231367467, "grad_norm": 0.046561017632484436, "learning_rate": 0.0001506863495206805, "loss": 0.3049, "step": 15895 }, { "epoch": 1.287751134154245, "grad_norm": 0.04676572233438492, "learning_rate": 0.00015068184886808588, "loss": 0.317, "step": 15896 }, { "epoch": 1.2878321451717434, "grad_norm": 0.04445261508226395, "learning_rate": 0.00015067734821549124, "loss": 0.2855, "step": 15897 }, { "epoch": 1.2879131561892416, "grad_norm": 0.039349205791950226, "learning_rate": 0.00015067284756289663, "loss": 0.2909, "step": 15898 }, { "epoch": 1.28799416720674, "grad_norm": 0.041501231491565704, "learning_rate": 0.000150668346910302, "loss": 0.2833, "step": 15899 }, { "epoch": 1.2880751782242386, "grad_norm": 0.04623178020119667, "learning_rate": 0.00015066384625770737, "loss": 0.2798, "step": 15900 }, { "epoch": 1.2881561892417368, "grad_norm": 0.03733741492033005, "learning_rate": 0.00015065934560511273, "loss": 0.2776, "step": 15901 }, { "epoch": 1.2882372002592353, "grad_norm": 0.0544278547167778, "learning_rate": 0.00015065484495251812, "loss": 0.289, "step": 15902 }, { "epoch": 1.2883182112767337, "grad_norm": 0.0542629212141037, "learning_rate": 0.0001506503442999235, "loss": 0.2831, "step": 15903 }, { "epoch": 1.288399222294232, "grad_norm": 0.04090864211320877, "learning_rate": 0.00015064584364732887, "loss": 0.2856, "step": 15904 }, { "epoch": 1.2884802333117304, "grad_norm": 0.04039837047457695, "learning_rate": 0.00015064134299473426, "loss": 0.2829, "step": 15905 }, { "epoch": 1.2885612443292287, "grad_norm": 0.039326541125774384, "learning_rate": 0.00015063684234213962, "loss": 0.3147, "step": 15906 }, { "epoch": 1.2886422553467272, "grad_norm": 0.040783412754535675, "learning_rate": 0.00015063234168954498, "loss": 0.2949, "step": 15907 }, { "epoch": 1.2887232663642254, "grad_norm": 0.04851400852203369, "learning_rate": 0.00015062784103695036, "loss": 0.3431, "step": 15908 }, { "epoch": 1.2888042773817239, "grad_norm": 0.05781017243862152, "learning_rate": 0.00015062334038435575, "loss": 0.3282, "step": 15909 }, { "epoch": 1.2888852883992223, "grad_norm": 0.047731805592775345, "learning_rate": 0.0001506188397317611, "loss": 0.3127, "step": 15910 }, { "epoch": 1.2889662994167206, "grad_norm": 0.04410161077976227, "learning_rate": 0.0001506143390791665, "loss": 0.3221, "step": 15911 }, { "epoch": 1.289047310434219, "grad_norm": 0.03798040747642517, "learning_rate": 0.00015060983842657186, "loss": 0.275, "step": 15912 }, { "epoch": 1.2891283214517175, "grad_norm": 0.042938366532325745, "learning_rate": 0.00015060533777397722, "loss": 0.3042, "step": 15913 }, { "epoch": 1.2892093324692158, "grad_norm": 0.04302908107638359, "learning_rate": 0.0001506008371213826, "loss": 0.2968, "step": 15914 }, { "epoch": 1.2892903434867142, "grad_norm": 0.04758167266845703, "learning_rate": 0.000150596336468788, "loss": 0.3327, "step": 15915 }, { "epoch": 1.2893713545042127, "grad_norm": 0.04208313673734665, "learning_rate": 0.00015059183581619335, "loss": 0.2942, "step": 15916 }, { "epoch": 1.289452365521711, "grad_norm": 0.039246536791324615, "learning_rate": 0.00015058733516359874, "loss": 0.2958, "step": 15917 }, { "epoch": 1.2895333765392094, "grad_norm": 0.0409688763320446, "learning_rate": 0.0001505828345110041, "loss": 0.3468, "step": 15918 }, { "epoch": 1.2896143875567077, "grad_norm": 0.04577702283859253, "learning_rate": 0.00015057833385840946, "loss": 0.294, "step": 15919 }, { "epoch": 1.2896953985742061, "grad_norm": 0.04426812753081322, "learning_rate": 0.00015057383320581485, "loss": 0.3375, "step": 15920 }, { "epoch": 1.2897764095917044, "grad_norm": 0.03992077335715294, "learning_rate": 0.00015056933255322023, "loss": 0.3062, "step": 15921 }, { "epoch": 1.2898574206092028, "grad_norm": 0.04026995599269867, "learning_rate": 0.0001505648319006256, "loss": 0.2851, "step": 15922 }, { "epoch": 1.2899384316267013, "grad_norm": 0.04487210139632225, "learning_rate": 0.00015056033124803098, "loss": 0.3218, "step": 15923 }, { "epoch": 1.2900194426441995, "grad_norm": 0.03883304074406624, "learning_rate": 0.00015055583059543634, "loss": 0.2588, "step": 15924 }, { "epoch": 1.290100453661698, "grad_norm": 0.04050496220588684, "learning_rate": 0.0001505513299428417, "loss": 0.2928, "step": 15925 }, { "epoch": 1.2901814646791965, "grad_norm": 0.046088963747024536, "learning_rate": 0.0001505468292902471, "loss": 0.3024, "step": 15926 }, { "epoch": 1.2902624756966947, "grad_norm": 0.0416017584502697, "learning_rate": 0.00015054232863765247, "loss": 0.3235, "step": 15927 }, { "epoch": 1.2903434867141932, "grad_norm": 0.04543936997652054, "learning_rate": 0.00015053782798505783, "loss": 0.3001, "step": 15928 }, { "epoch": 1.2904244977316914, "grad_norm": 0.046383414417505264, "learning_rate": 0.00015053332733246322, "loss": 0.3005, "step": 15929 }, { "epoch": 1.29050550874919, "grad_norm": 0.03874411806464195, "learning_rate": 0.00015052882667986858, "loss": 0.2843, "step": 15930 }, { "epoch": 1.2905865197666881, "grad_norm": 0.040499668568372726, "learning_rate": 0.00015052432602727394, "loss": 0.29, "step": 15931 }, { "epoch": 1.2906675307841866, "grad_norm": 0.040241751819849014, "learning_rate": 0.00015051982537467936, "loss": 0.3069, "step": 15932 }, { "epoch": 1.290748541801685, "grad_norm": 0.039121709764003754, "learning_rate": 0.00015051532472208472, "loss": 0.2483, "step": 15933 }, { "epoch": 1.2908295528191833, "grad_norm": 0.04153452441096306, "learning_rate": 0.00015051082406949008, "loss": 0.2819, "step": 15934 }, { "epoch": 1.2909105638366818, "grad_norm": 0.041106656193733215, "learning_rate": 0.00015050632341689546, "loss": 0.317, "step": 15935 }, { "epoch": 1.2909915748541803, "grad_norm": 0.04836004227399826, "learning_rate": 0.00015050182276430082, "loss": 0.2961, "step": 15936 }, { "epoch": 1.2910725858716785, "grad_norm": 0.04820879176259041, "learning_rate": 0.00015049732211170618, "loss": 0.3212, "step": 15937 }, { "epoch": 1.291153596889177, "grad_norm": 0.053317271173000336, "learning_rate": 0.0001504928214591116, "loss": 0.3056, "step": 15938 }, { "epoch": 1.2912346079066754, "grad_norm": 0.04657156392931938, "learning_rate": 0.00015048832080651696, "loss": 0.3214, "step": 15939 }, { "epoch": 1.2913156189241737, "grad_norm": 0.050732627511024475, "learning_rate": 0.00015048382015392232, "loss": 0.2788, "step": 15940 }, { "epoch": 1.2913966299416721, "grad_norm": 0.04055896773934364, "learning_rate": 0.0001504793195013277, "loss": 0.2733, "step": 15941 }, { "epoch": 1.2914776409591704, "grad_norm": 0.04172273725271225, "learning_rate": 0.00015047481884873306, "loss": 0.2872, "step": 15942 }, { "epoch": 1.2915586519766689, "grad_norm": 0.055269479751586914, "learning_rate": 0.00015047031819613842, "loss": 0.3522, "step": 15943 }, { "epoch": 1.291639662994167, "grad_norm": 0.035411085933446884, "learning_rate": 0.00015046581754354384, "loss": 0.2478, "step": 15944 }, { "epoch": 1.2917206740116656, "grad_norm": 0.044676654040813446, "learning_rate": 0.0001504613168909492, "loss": 0.3349, "step": 15945 }, { "epoch": 1.291801685029164, "grad_norm": 0.044342026114463806, "learning_rate": 0.00015045681623835456, "loss": 0.326, "step": 15946 }, { "epoch": 1.2918826960466623, "grad_norm": 0.03670578449964523, "learning_rate": 0.00015045231558575995, "loss": 0.2899, "step": 15947 }, { "epoch": 1.2919637070641607, "grad_norm": 0.04561435803771019, "learning_rate": 0.0001504478149331653, "loss": 0.2935, "step": 15948 }, { "epoch": 1.2920447180816592, "grad_norm": 0.04605408012866974, "learning_rate": 0.00015044331428057067, "loss": 0.2711, "step": 15949 }, { "epoch": 1.2921257290991575, "grad_norm": 0.049196407198905945, "learning_rate": 0.00015043881362797608, "loss": 0.3535, "step": 15950 }, { "epoch": 1.292206740116656, "grad_norm": 0.041564781218767166, "learning_rate": 0.00015043431297538144, "loss": 0.2731, "step": 15951 }, { "epoch": 1.2922877511341542, "grad_norm": 0.04630525782704353, "learning_rate": 0.0001504298123227868, "loss": 0.3286, "step": 15952 }, { "epoch": 1.2923687621516526, "grad_norm": 0.040204476565122604, "learning_rate": 0.0001504253116701922, "loss": 0.2874, "step": 15953 }, { "epoch": 1.2924497731691509, "grad_norm": 0.041800253093242645, "learning_rate": 0.00015042081101759755, "loss": 0.3494, "step": 15954 }, { "epoch": 1.2925307841866494, "grad_norm": 0.04281659051775932, "learning_rate": 0.00015041631036500294, "loss": 0.2867, "step": 15955 }, { "epoch": 1.2926117952041478, "grad_norm": 0.03701747581362724, "learning_rate": 0.00015041180971240832, "loss": 0.3058, "step": 15956 }, { "epoch": 1.292692806221646, "grad_norm": 0.04389016330242157, "learning_rate": 0.00015040730905981368, "loss": 0.3048, "step": 15957 }, { "epoch": 1.2927738172391445, "grad_norm": 0.04472460597753525, "learning_rate": 0.00015040280840721904, "loss": 0.2887, "step": 15958 }, { "epoch": 1.292854828256643, "grad_norm": 0.04361078143119812, "learning_rate": 0.00015039830775462443, "loss": 0.3126, "step": 15959 }, { "epoch": 1.2929358392741412, "grad_norm": 0.050942130386829376, "learning_rate": 0.0001503938071020298, "loss": 0.3615, "step": 15960 }, { "epoch": 1.2930168502916397, "grad_norm": 0.04138937592506409, "learning_rate": 0.00015038930644943518, "loss": 0.2942, "step": 15961 }, { "epoch": 1.2930978613091382, "grad_norm": 0.045624807476997375, "learning_rate": 0.00015038480579684056, "loss": 0.3094, "step": 15962 }, { "epoch": 1.2931788723266364, "grad_norm": 0.0472959503531456, "learning_rate": 0.00015038030514424592, "loss": 0.33, "step": 15963 }, { "epoch": 1.2932598833441347, "grad_norm": 0.043403808027505875, "learning_rate": 0.00015037580449165128, "loss": 0.2962, "step": 15964 }, { "epoch": 1.2933408943616331, "grad_norm": 0.045694395899772644, "learning_rate": 0.00015037130383905667, "loss": 0.3109, "step": 15965 }, { "epoch": 1.2934219053791316, "grad_norm": 0.03987668454647064, "learning_rate": 0.00015036680318646203, "loss": 0.2422, "step": 15966 }, { "epoch": 1.2935029163966298, "grad_norm": 0.04315539449453354, "learning_rate": 0.00015036230253386742, "loss": 0.3088, "step": 15967 }, { "epoch": 1.2935839274141283, "grad_norm": 0.04949916899204254, "learning_rate": 0.0001503578018812728, "loss": 0.3267, "step": 15968 }, { "epoch": 1.2936649384316268, "grad_norm": 0.040975723415613174, "learning_rate": 0.00015035330122867817, "loss": 0.3058, "step": 15969 }, { "epoch": 1.293745949449125, "grad_norm": 0.047277871519327164, "learning_rate": 0.00015034880057608353, "loss": 0.3061, "step": 15970 }, { "epoch": 1.2938269604666235, "grad_norm": 0.04787454754114151, "learning_rate": 0.0001503442999234889, "loss": 0.3157, "step": 15971 }, { "epoch": 1.293907971484122, "grad_norm": 0.0453806146979332, "learning_rate": 0.00015033979927089427, "loss": 0.2985, "step": 15972 }, { "epoch": 1.2939889825016202, "grad_norm": 0.046817343682050705, "learning_rate": 0.00015033529861829966, "loss": 0.3108, "step": 15973 }, { "epoch": 1.2940699935191187, "grad_norm": 0.05633321404457092, "learning_rate": 0.00015033079796570505, "loss": 0.3269, "step": 15974 }, { "epoch": 1.294151004536617, "grad_norm": 0.043721895664930344, "learning_rate": 0.0001503262973131104, "loss": 0.256, "step": 15975 }, { "epoch": 1.2942320155541154, "grad_norm": 0.04535561054944992, "learning_rate": 0.00015032179666051577, "loss": 0.276, "step": 15976 }, { "epoch": 1.2943130265716136, "grad_norm": 0.049087490886449814, "learning_rate": 0.00015031729600792115, "loss": 0.3295, "step": 15977 }, { "epoch": 1.294394037589112, "grad_norm": 0.043317973613739014, "learning_rate": 0.00015031279535532651, "loss": 0.3067, "step": 15978 }, { "epoch": 1.2944750486066106, "grad_norm": 0.04272015765309334, "learning_rate": 0.0001503082947027319, "loss": 0.2662, "step": 15979 }, { "epoch": 1.2945560596241088, "grad_norm": 0.04230866953730583, "learning_rate": 0.0001503037940501373, "loss": 0.2777, "step": 15980 }, { "epoch": 1.2946370706416073, "grad_norm": 0.04598616436123848, "learning_rate": 0.00015029929339754265, "loss": 0.2968, "step": 15981 }, { "epoch": 1.2947180816591057, "grad_norm": 0.042806848883628845, "learning_rate": 0.000150294792744948, "loss": 0.2666, "step": 15982 }, { "epoch": 1.294799092676604, "grad_norm": 0.04042374715209007, "learning_rate": 0.0001502902920923534, "loss": 0.2923, "step": 15983 }, { "epoch": 1.2948801036941024, "grad_norm": 0.05210564658045769, "learning_rate": 0.00015028579143975878, "loss": 0.2913, "step": 15984 }, { "epoch": 1.294961114711601, "grad_norm": 0.0484447255730629, "learning_rate": 0.00015028129078716414, "loss": 0.3355, "step": 15985 }, { "epoch": 1.2950421257290992, "grad_norm": 0.03525340557098389, "learning_rate": 0.00015027679013456953, "loss": 0.2806, "step": 15986 }, { "epoch": 1.2951231367465974, "grad_norm": 0.05046231672167778, "learning_rate": 0.0001502722894819749, "loss": 0.3127, "step": 15987 }, { "epoch": 1.2952041477640959, "grad_norm": 0.03851288929581642, "learning_rate": 0.00015026778882938025, "loss": 0.2616, "step": 15988 }, { "epoch": 1.2952851587815943, "grad_norm": 0.04725256562232971, "learning_rate": 0.00015026328817678564, "loss": 0.3236, "step": 15989 }, { "epoch": 1.2953661697990926, "grad_norm": 0.041266750544309616, "learning_rate": 0.00015025878752419102, "loss": 0.3075, "step": 15990 }, { "epoch": 1.295447180816591, "grad_norm": 0.0488935261964798, "learning_rate": 0.00015025428687159639, "loss": 0.3391, "step": 15991 }, { "epoch": 1.2955281918340895, "grad_norm": 0.04301506653428078, "learning_rate": 0.00015024978621900177, "loss": 0.2989, "step": 15992 }, { "epoch": 1.2956092028515878, "grad_norm": 0.04415897652506828, "learning_rate": 0.00015024528556640713, "loss": 0.3148, "step": 15993 }, { "epoch": 1.2956902138690862, "grad_norm": 0.040060512721538544, "learning_rate": 0.0001502407849138125, "loss": 0.2771, "step": 15994 }, { "epoch": 1.2957712248865847, "grad_norm": 0.04739377275109291, "learning_rate": 0.00015023628426121788, "loss": 0.3172, "step": 15995 }, { "epoch": 1.295852235904083, "grad_norm": 0.045232877135276794, "learning_rate": 0.00015023178360862327, "loss": 0.279, "step": 15996 }, { "epoch": 1.2959332469215814, "grad_norm": 0.04951009899377823, "learning_rate": 0.00015022728295602863, "loss": 0.3251, "step": 15997 }, { "epoch": 1.2960142579390797, "grad_norm": 0.04696754366159439, "learning_rate": 0.00015022278230343401, "loss": 0.2981, "step": 15998 }, { "epoch": 1.2960952689565781, "grad_norm": 0.045662686228752136, "learning_rate": 0.00015021828165083937, "loss": 0.2965, "step": 15999 }, { "epoch": 1.2961762799740764, "grad_norm": 0.04374690353870392, "learning_rate": 0.00015021378099824473, "loss": 0.3251, "step": 16000 }, { "epoch": 1.2962572909915748, "grad_norm": 0.04445065185427666, "learning_rate": 0.00015020928034565012, "loss": 0.2917, "step": 16001 }, { "epoch": 1.2963383020090733, "grad_norm": 0.04260138422250748, "learning_rate": 0.0001502047796930555, "loss": 0.3029, "step": 16002 }, { "epoch": 1.2964193130265715, "grad_norm": 0.037433069199323654, "learning_rate": 0.00015020027904046087, "loss": 0.2683, "step": 16003 }, { "epoch": 1.29650032404407, "grad_norm": 0.04841666668653488, "learning_rate": 0.00015019577838786626, "loss": 0.2876, "step": 16004 }, { "epoch": 1.2965813350615685, "grad_norm": 0.03761151432991028, "learning_rate": 0.00015019127773527162, "loss": 0.2823, "step": 16005 }, { "epoch": 1.2966623460790667, "grad_norm": 0.04309995472431183, "learning_rate": 0.00015018677708267698, "loss": 0.302, "step": 16006 }, { "epoch": 1.2967433570965652, "grad_norm": 0.045569028705358505, "learning_rate": 0.00015018227643008236, "loss": 0.3338, "step": 16007 }, { "epoch": 1.2968243681140634, "grad_norm": 0.04372565820813179, "learning_rate": 0.00015017777577748775, "loss": 0.3042, "step": 16008 }, { "epoch": 1.296905379131562, "grad_norm": 0.043127454817295074, "learning_rate": 0.0001501732751248931, "loss": 0.2757, "step": 16009 }, { "epoch": 1.2969863901490601, "grad_norm": 0.04809211939573288, "learning_rate": 0.0001501687744722985, "loss": 0.3057, "step": 16010 }, { "epoch": 1.2970674011665586, "grad_norm": 0.04196740314364433, "learning_rate": 0.00015016427381970386, "loss": 0.2729, "step": 16011 }, { "epoch": 1.297148412184057, "grad_norm": 0.04643071070313454, "learning_rate": 0.00015015977316710922, "loss": 0.2972, "step": 16012 }, { "epoch": 1.2972294232015553, "grad_norm": 0.045497287064790726, "learning_rate": 0.00015015527251451463, "loss": 0.3334, "step": 16013 }, { "epoch": 1.2973104342190538, "grad_norm": 0.04679165408015251, "learning_rate": 0.00015015077186192, "loss": 0.3005, "step": 16014 }, { "epoch": 1.2973914452365523, "grad_norm": 0.045765623450279236, "learning_rate": 0.00015014627120932535, "loss": 0.3369, "step": 16015 }, { "epoch": 1.2974724562540505, "grad_norm": 0.04222280532121658, "learning_rate": 0.00015014177055673074, "loss": 0.2684, "step": 16016 }, { "epoch": 1.297553467271549, "grad_norm": 0.04675154760479927, "learning_rate": 0.0001501372699041361, "loss": 0.3181, "step": 16017 }, { "epoch": 1.2976344782890474, "grad_norm": 0.04510919377207756, "learning_rate": 0.00015013276925154146, "loss": 0.3513, "step": 16018 }, { "epoch": 1.2977154893065457, "grad_norm": 0.05718798562884331, "learning_rate": 0.00015012826859894687, "loss": 0.3105, "step": 16019 }, { "epoch": 1.2977965003240441, "grad_norm": 0.03958940505981445, "learning_rate": 0.00015012376794635223, "loss": 0.2973, "step": 16020 }, { "epoch": 1.2978775113415424, "grad_norm": 0.036500755697488785, "learning_rate": 0.0001501192672937576, "loss": 0.241, "step": 16021 }, { "epoch": 1.2979585223590409, "grad_norm": 0.03581966832280159, "learning_rate": 0.00015011476664116298, "loss": 0.2634, "step": 16022 }, { "epoch": 1.298039533376539, "grad_norm": 0.04557995870709419, "learning_rate": 0.00015011026598856834, "loss": 0.304, "step": 16023 }, { "epoch": 1.2981205443940376, "grad_norm": 0.04019990190863609, "learning_rate": 0.0001501057653359737, "loss": 0.2557, "step": 16024 }, { "epoch": 1.298201555411536, "grad_norm": 0.050131477415561676, "learning_rate": 0.00015010126468337911, "loss": 0.2861, "step": 16025 }, { "epoch": 1.2982825664290343, "grad_norm": 0.046285029500722885, "learning_rate": 0.00015009676403078447, "loss": 0.2933, "step": 16026 }, { "epoch": 1.2983635774465327, "grad_norm": 0.04279644414782524, "learning_rate": 0.00015009226337818983, "loss": 0.2914, "step": 16027 }, { "epoch": 1.2984445884640312, "grad_norm": 0.049533769488334656, "learning_rate": 0.00015008776272559522, "loss": 0.3532, "step": 16028 }, { "epoch": 1.2985255994815295, "grad_norm": 0.04774849861860275, "learning_rate": 0.00015008326207300058, "loss": 0.2548, "step": 16029 }, { "epoch": 1.298606610499028, "grad_norm": 0.04950843006372452, "learning_rate": 0.00015007876142040594, "loss": 0.3234, "step": 16030 }, { "epoch": 1.2986876215165262, "grad_norm": 0.040186185389757156, "learning_rate": 0.00015007426076781136, "loss": 0.2999, "step": 16031 }, { "epoch": 1.2987686325340246, "grad_norm": 0.04521823674440384, "learning_rate": 0.00015006976011521672, "loss": 0.3095, "step": 16032 }, { "epoch": 1.2988496435515229, "grad_norm": 0.04393572732806206, "learning_rate": 0.00015006525946262208, "loss": 0.2888, "step": 16033 }, { "epoch": 1.2989306545690213, "grad_norm": 0.037971485406160355, "learning_rate": 0.00015006075881002746, "loss": 0.2801, "step": 16034 }, { "epoch": 1.2990116655865198, "grad_norm": 0.04628375545144081, "learning_rate": 0.00015005625815743282, "loss": 0.2941, "step": 16035 }, { "epoch": 1.299092676604018, "grad_norm": 0.04361240938305855, "learning_rate": 0.0001500517575048382, "loss": 0.2815, "step": 16036 }, { "epoch": 1.2991736876215165, "grad_norm": 0.044115811586380005, "learning_rate": 0.0001500472568522436, "loss": 0.3492, "step": 16037 }, { "epoch": 1.299254698639015, "grad_norm": 0.04842967540025711, "learning_rate": 0.00015004275619964896, "loss": 0.326, "step": 16038 }, { "epoch": 1.2993357096565132, "grad_norm": 0.045519985258579254, "learning_rate": 0.00015003825554705432, "loss": 0.3042, "step": 16039 }, { "epoch": 1.2994167206740117, "grad_norm": 0.04397178441286087, "learning_rate": 0.0001500337548944597, "loss": 0.2878, "step": 16040 }, { "epoch": 1.2994977316915102, "grad_norm": 0.040161408483982086, "learning_rate": 0.00015002925424186507, "loss": 0.2637, "step": 16041 }, { "epoch": 1.2995787427090084, "grad_norm": 0.04626128077507019, "learning_rate": 0.00015002475358927045, "loss": 0.2902, "step": 16042 }, { "epoch": 1.2996597537265069, "grad_norm": 0.050964128226041794, "learning_rate": 0.00015002025293667584, "loss": 0.3491, "step": 16043 }, { "epoch": 1.2997407647440051, "grad_norm": 0.04593636468052864, "learning_rate": 0.0001500157522840812, "loss": 0.3294, "step": 16044 }, { "epoch": 1.2998217757615036, "grad_norm": 0.05174465477466583, "learning_rate": 0.00015001125163148656, "loss": 0.3259, "step": 16045 }, { "epoch": 1.2999027867790018, "grad_norm": 0.043847907334566116, "learning_rate": 0.00015000675097889195, "loss": 0.3159, "step": 16046 }, { "epoch": 1.2999837977965003, "grad_norm": 0.047936998307704926, "learning_rate": 0.0001500022503262973, "loss": 0.3451, "step": 16047 }, { "epoch": 1.3000648088139988, "grad_norm": 0.04735151678323746, "learning_rate": 0.0001499977496737027, "loss": 0.3138, "step": 16048 }, { "epoch": 1.300145819831497, "grad_norm": 0.038559772074222565, "learning_rate": 0.00014999324902110808, "loss": 0.2651, "step": 16049 }, { "epoch": 1.3002268308489955, "grad_norm": 0.04015229269862175, "learning_rate": 0.00014998874836851344, "loss": 0.2682, "step": 16050 }, { "epoch": 1.300307841866494, "grad_norm": 0.042013492435216904, "learning_rate": 0.0001499842477159188, "loss": 0.2803, "step": 16051 }, { "epoch": 1.3003888528839922, "grad_norm": 0.043495483696460724, "learning_rate": 0.0001499797470633242, "loss": 0.2651, "step": 16052 }, { "epoch": 1.3004698639014907, "grad_norm": 0.03844980522990227, "learning_rate": 0.00014997524641072955, "loss": 0.2618, "step": 16053 }, { "epoch": 1.300550874918989, "grad_norm": 0.04398927837610245, "learning_rate": 0.00014997074575813494, "loss": 0.2874, "step": 16054 }, { "epoch": 1.3006318859364874, "grad_norm": 0.04379849135875702, "learning_rate": 0.00014996624510554032, "loss": 0.3122, "step": 16055 }, { "epoch": 1.3007128969539856, "grad_norm": 0.044488757848739624, "learning_rate": 0.00014996174445294568, "loss": 0.3078, "step": 16056 }, { "epoch": 1.300793907971484, "grad_norm": 0.04269413650035858, "learning_rate": 0.00014995724380035104, "loss": 0.2786, "step": 16057 }, { "epoch": 1.3008749189889826, "grad_norm": 0.04010821878910065, "learning_rate": 0.00014995274314775643, "loss": 0.252, "step": 16058 }, { "epoch": 1.3009559300064808, "grad_norm": 0.042552195489406586, "learning_rate": 0.0001499482424951618, "loss": 0.3149, "step": 16059 }, { "epoch": 1.3010369410239793, "grad_norm": 0.043433062732219696, "learning_rate": 0.00014994374184256718, "loss": 0.3159, "step": 16060 }, { "epoch": 1.3011179520414777, "grad_norm": 0.0455293282866478, "learning_rate": 0.00014993924118997256, "loss": 0.3078, "step": 16061 }, { "epoch": 1.301198963058976, "grad_norm": 0.04365817457437515, "learning_rate": 0.00014993474053737792, "loss": 0.2855, "step": 16062 }, { "epoch": 1.3012799740764744, "grad_norm": 0.04493528977036476, "learning_rate": 0.00014993023988478328, "loss": 0.3133, "step": 16063 }, { "epoch": 1.301360985093973, "grad_norm": 0.04929269477725029, "learning_rate": 0.00014992573923218867, "loss": 0.3045, "step": 16064 }, { "epoch": 1.3014419961114712, "grad_norm": 0.047745268791913986, "learning_rate": 0.00014992123857959406, "loss": 0.3086, "step": 16065 }, { "epoch": 1.3015230071289696, "grad_norm": 0.0507318489253521, "learning_rate": 0.00014991673792699942, "loss": 0.3284, "step": 16066 }, { "epoch": 1.3016040181464679, "grad_norm": 0.04578344523906708, "learning_rate": 0.0001499122372744048, "loss": 0.289, "step": 16067 }, { "epoch": 1.3016850291639663, "grad_norm": 0.04364114999771118, "learning_rate": 0.00014990773662181017, "loss": 0.3165, "step": 16068 }, { "epoch": 1.3017660401814646, "grad_norm": 0.052346739917993546, "learning_rate": 0.00014990323596921553, "loss": 0.3213, "step": 16069 }, { "epoch": 1.301847051198963, "grad_norm": 0.047455038875341415, "learning_rate": 0.0001498987353166209, "loss": 0.2998, "step": 16070 }, { "epoch": 1.3019280622164615, "grad_norm": 0.046549778431653976, "learning_rate": 0.0001498942346640263, "loss": 0.3323, "step": 16071 }, { "epoch": 1.3020090732339598, "grad_norm": 0.04644530639052391, "learning_rate": 0.00014988973401143166, "loss": 0.2931, "step": 16072 }, { "epoch": 1.3020900842514582, "grad_norm": 0.044147513806819916, "learning_rate": 0.00014988523335883705, "loss": 0.2885, "step": 16073 }, { "epoch": 1.3021710952689567, "grad_norm": 0.05160215497016907, "learning_rate": 0.0001498807327062424, "loss": 0.3206, "step": 16074 }, { "epoch": 1.302252106286455, "grad_norm": 0.04298309236764908, "learning_rate": 0.00014987623205364777, "loss": 0.3082, "step": 16075 }, { "epoch": 1.3023331173039534, "grad_norm": 0.03974214941263199, "learning_rate": 0.00014987173140105315, "loss": 0.3057, "step": 16076 }, { "epoch": 1.3024141283214516, "grad_norm": 0.04440717026591301, "learning_rate": 0.00014986723074845854, "loss": 0.2749, "step": 16077 }, { "epoch": 1.3024951393389501, "grad_norm": 0.043637245893478394, "learning_rate": 0.0001498627300958639, "loss": 0.3224, "step": 16078 }, { "epoch": 1.3025761503564484, "grad_norm": 0.05086296796798706, "learning_rate": 0.0001498582294432693, "loss": 0.3007, "step": 16079 }, { "epoch": 1.3026571613739468, "grad_norm": 0.044451914727687836, "learning_rate": 0.00014985372879067465, "loss": 0.314, "step": 16080 }, { "epoch": 1.3027381723914453, "grad_norm": 0.04223513975739479, "learning_rate": 0.00014984922813808, "loss": 0.2982, "step": 16081 }, { "epoch": 1.3028191834089435, "grad_norm": 0.035806506872177124, "learning_rate": 0.0001498447274854854, "loss": 0.2795, "step": 16082 }, { "epoch": 1.302900194426442, "grad_norm": 0.03943643346428871, "learning_rate": 0.00014984022683289078, "loss": 0.2938, "step": 16083 }, { "epoch": 1.3029812054439405, "grad_norm": 0.0411057323217392, "learning_rate": 0.00014983572618029614, "loss": 0.3254, "step": 16084 }, { "epoch": 1.3030622164614387, "grad_norm": 0.0396103709936142, "learning_rate": 0.00014983122552770153, "loss": 0.2992, "step": 16085 }, { "epoch": 1.3031432274789372, "grad_norm": 0.03847880661487579, "learning_rate": 0.0001498267248751069, "loss": 0.2803, "step": 16086 }, { "epoch": 1.3032242384964356, "grad_norm": 0.044615842401981354, "learning_rate": 0.00014982222422251225, "loss": 0.2959, "step": 16087 }, { "epoch": 1.303305249513934, "grad_norm": 0.046743474900722504, "learning_rate": 0.00014981772356991767, "loss": 0.3133, "step": 16088 }, { "epoch": 1.3033862605314321, "grad_norm": 0.051682259887456894, "learning_rate": 0.00014981322291732303, "loss": 0.2637, "step": 16089 }, { "epoch": 1.3034672715489306, "grad_norm": 0.045838940888643265, "learning_rate": 0.00014980872226472839, "loss": 0.3488, "step": 16090 }, { "epoch": 1.303548282566429, "grad_norm": 0.039805248379707336, "learning_rate": 0.00014980422161213377, "loss": 0.2865, "step": 16091 }, { "epoch": 1.3036292935839273, "grad_norm": 0.04791427031159401, "learning_rate": 0.00014979972095953913, "loss": 0.325, "step": 16092 }, { "epoch": 1.3037103046014258, "grad_norm": 0.04406188055872917, "learning_rate": 0.0001497952203069445, "loss": 0.3095, "step": 16093 }, { "epoch": 1.3037913156189243, "grad_norm": 0.04150623828172684, "learning_rate": 0.0001497907196543499, "loss": 0.2898, "step": 16094 }, { "epoch": 1.3038723266364225, "grad_norm": 0.04663126915693283, "learning_rate": 0.00014978621900175527, "loss": 0.2939, "step": 16095 }, { "epoch": 1.303953337653921, "grad_norm": 0.04955912381410599, "learning_rate": 0.00014978171834916063, "loss": 0.2944, "step": 16096 }, { "epoch": 1.3040343486714194, "grad_norm": 0.044626910239458084, "learning_rate": 0.00014977721769656601, "loss": 0.294, "step": 16097 }, { "epoch": 1.3041153596889177, "grad_norm": 0.047228164970874786, "learning_rate": 0.00014977271704397137, "loss": 0.3016, "step": 16098 }, { "epoch": 1.3041963707064161, "grad_norm": 0.052044130861759186, "learning_rate": 0.00014976821639137673, "loss": 0.2996, "step": 16099 }, { "epoch": 1.3042773817239144, "grad_norm": 0.0400003157556057, "learning_rate": 0.00014976371573878215, "loss": 0.2809, "step": 16100 }, { "epoch": 1.3043583927414129, "grad_norm": 0.042021676898002625, "learning_rate": 0.0001497592150861875, "loss": 0.3078, "step": 16101 }, { "epoch": 1.304439403758911, "grad_norm": 0.04687660560011864, "learning_rate": 0.00014975471443359287, "loss": 0.29, "step": 16102 }, { "epoch": 1.3045204147764096, "grad_norm": 0.04040345549583435, "learning_rate": 0.00014975021378099826, "loss": 0.3057, "step": 16103 }, { "epoch": 1.304601425793908, "grad_norm": 0.037515588104724884, "learning_rate": 0.00014974571312840362, "loss": 0.2705, "step": 16104 }, { "epoch": 1.3046824368114063, "grad_norm": 0.039864424616098404, "learning_rate": 0.000149741212475809, "loss": 0.2901, "step": 16105 }, { "epoch": 1.3047634478289047, "grad_norm": 0.046790868043899536, "learning_rate": 0.0001497367118232144, "loss": 0.3593, "step": 16106 }, { "epoch": 1.3048444588464032, "grad_norm": 0.044947970658540726, "learning_rate": 0.00014973221117061975, "loss": 0.3466, "step": 16107 }, { "epoch": 1.3049254698639015, "grad_norm": 0.04707445576786995, "learning_rate": 0.0001497277105180251, "loss": 0.3203, "step": 16108 }, { "epoch": 1.3050064808814, "grad_norm": 0.04465334489941597, "learning_rate": 0.0001497232098654305, "loss": 0.3207, "step": 16109 }, { "epoch": 1.3050874918988984, "grad_norm": 0.03804957494139671, "learning_rate": 0.00014971870921283586, "loss": 0.2731, "step": 16110 }, { "epoch": 1.3051685029163966, "grad_norm": 0.04358971491456032, "learning_rate": 0.00014971420856024124, "loss": 0.3086, "step": 16111 }, { "epoch": 1.3052495139338949, "grad_norm": 0.040443554520606995, "learning_rate": 0.00014970970790764663, "loss": 0.2703, "step": 16112 }, { "epoch": 1.3053305249513933, "grad_norm": 0.043810319155454636, "learning_rate": 0.000149705207255052, "loss": 0.3033, "step": 16113 }, { "epoch": 1.3054115359688918, "grad_norm": 0.04541864991188049, "learning_rate": 0.00014970070660245735, "loss": 0.2834, "step": 16114 }, { "epoch": 1.30549254698639, "grad_norm": 0.04730004817247391, "learning_rate": 0.00014969620594986274, "loss": 0.357, "step": 16115 }, { "epoch": 1.3055735580038885, "grad_norm": 0.04252275452017784, "learning_rate": 0.0001496917052972681, "loss": 0.3275, "step": 16116 }, { "epoch": 1.305654569021387, "grad_norm": 0.04537322744727135, "learning_rate": 0.00014968720464467349, "loss": 0.318, "step": 16117 }, { "epoch": 1.3057355800388852, "grad_norm": 0.03675117343664169, "learning_rate": 0.00014968270399207887, "loss": 0.2532, "step": 16118 }, { "epoch": 1.3058165910563837, "grad_norm": 0.04434943199157715, "learning_rate": 0.00014967820333948423, "loss": 0.2633, "step": 16119 }, { "epoch": 1.3058976020738822, "grad_norm": 0.04275086894631386, "learning_rate": 0.0001496737026868896, "loss": 0.2996, "step": 16120 }, { "epoch": 1.3059786130913804, "grad_norm": 0.044664327055215836, "learning_rate": 0.00014966920203429498, "loss": 0.2699, "step": 16121 }, { "epoch": 1.3060596241088789, "grad_norm": 0.041324496269226074, "learning_rate": 0.00014966470138170034, "loss": 0.3208, "step": 16122 }, { "epoch": 1.3061406351263771, "grad_norm": 0.052138637751340866, "learning_rate": 0.00014966020072910573, "loss": 0.3199, "step": 16123 }, { "epoch": 1.3062216461438756, "grad_norm": 0.04451883211731911, "learning_rate": 0.00014965570007651111, "loss": 0.2913, "step": 16124 }, { "epoch": 1.3063026571613738, "grad_norm": 0.04054604098200798, "learning_rate": 0.00014965119942391647, "loss": 0.2798, "step": 16125 }, { "epoch": 1.3063836681788723, "grad_norm": 0.04771227017045021, "learning_rate": 0.00014964669877132184, "loss": 0.2989, "step": 16126 }, { "epoch": 1.3064646791963708, "grad_norm": 0.0469462089240551, "learning_rate": 0.00014964219811872722, "loss": 0.3066, "step": 16127 }, { "epoch": 1.306545690213869, "grad_norm": 0.03986061364412308, "learning_rate": 0.00014963769746613258, "loss": 0.2905, "step": 16128 }, { "epoch": 1.3066267012313675, "grad_norm": 0.04885406419634819, "learning_rate": 0.00014963319681353797, "loss": 0.257, "step": 16129 }, { "epoch": 1.306707712248866, "grad_norm": 0.047162119299173355, "learning_rate": 0.00014962869616094336, "loss": 0.3222, "step": 16130 }, { "epoch": 1.3067887232663642, "grad_norm": 0.04074522852897644, "learning_rate": 0.00014962419550834872, "loss": 0.2669, "step": 16131 }, { "epoch": 1.3068697342838627, "grad_norm": 0.03833572939038277, "learning_rate": 0.00014961969485575408, "loss": 0.2841, "step": 16132 }, { "epoch": 1.306950745301361, "grad_norm": 0.04301103204488754, "learning_rate": 0.00014961519420315946, "loss": 0.2464, "step": 16133 }, { "epoch": 1.3070317563188594, "grad_norm": 0.03820543736219406, "learning_rate": 0.00014961069355056482, "loss": 0.2788, "step": 16134 }, { "epoch": 1.3071127673363576, "grad_norm": 0.040909651666879654, "learning_rate": 0.0001496061928979702, "loss": 0.2877, "step": 16135 }, { "epoch": 1.307193778353856, "grad_norm": 0.04465370252728462, "learning_rate": 0.0001496016922453756, "loss": 0.296, "step": 16136 }, { "epoch": 1.3072747893713546, "grad_norm": 0.043238457292318344, "learning_rate": 0.00014959719159278096, "loss": 0.3099, "step": 16137 }, { "epoch": 1.3073558003888528, "grad_norm": 0.05324762687087059, "learning_rate": 0.00014959269094018632, "loss": 0.3171, "step": 16138 }, { "epoch": 1.3074368114063513, "grad_norm": 0.04730679839849472, "learning_rate": 0.0001495881902875917, "loss": 0.2649, "step": 16139 }, { "epoch": 1.3075178224238497, "grad_norm": 0.04472094029188156, "learning_rate": 0.00014958368963499707, "loss": 0.3214, "step": 16140 }, { "epoch": 1.307598833441348, "grad_norm": 0.04622946307063103, "learning_rate": 0.00014957918898240245, "loss": 0.3102, "step": 16141 }, { "epoch": 1.3076798444588464, "grad_norm": 0.04655399173498154, "learning_rate": 0.00014957468832980784, "loss": 0.3093, "step": 16142 }, { "epoch": 1.307760855476345, "grad_norm": 0.05537700280547142, "learning_rate": 0.0001495701876772132, "loss": 0.304, "step": 16143 }, { "epoch": 1.3078418664938432, "grad_norm": 0.04523095861077309, "learning_rate": 0.00014956568702461856, "loss": 0.3066, "step": 16144 }, { "epoch": 1.3079228775113416, "grad_norm": 0.04799005389213562, "learning_rate": 0.00014956118637202395, "loss": 0.3204, "step": 16145 }, { "epoch": 1.3080038885288399, "grad_norm": 0.04450956732034683, "learning_rate": 0.00014955668571942933, "loss": 0.3182, "step": 16146 }, { "epoch": 1.3080848995463383, "grad_norm": 0.045555002987384796, "learning_rate": 0.0001495521850668347, "loss": 0.3486, "step": 16147 }, { "epoch": 1.3081659105638366, "grad_norm": 0.04731540009379387, "learning_rate": 0.00014954768441424008, "loss": 0.3025, "step": 16148 }, { "epoch": 1.308246921581335, "grad_norm": 0.04336179420351982, "learning_rate": 0.00014954318376164544, "loss": 0.2982, "step": 16149 }, { "epoch": 1.3083279325988335, "grad_norm": 0.04941492900252342, "learning_rate": 0.0001495386831090508, "loss": 0.3448, "step": 16150 }, { "epoch": 1.3084089436163318, "grad_norm": 0.04972584545612335, "learning_rate": 0.0001495341824564562, "loss": 0.3649, "step": 16151 }, { "epoch": 1.3084899546338302, "grad_norm": 0.04234988987445831, "learning_rate": 0.00014952968180386158, "loss": 0.3235, "step": 16152 }, { "epoch": 1.3085709656513287, "grad_norm": 0.03894378989934921, "learning_rate": 0.00014952518115126694, "loss": 0.2583, "step": 16153 }, { "epoch": 1.308651976668827, "grad_norm": 0.043541986495256424, "learning_rate": 0.00014952068049867232, "loss": 0.257, "step": 16154 }, { "epoch": 1.3087329876863254, "grad_norm": 0.049867913126945496, "learning_rate": 0.00014951617984607768, "loss": 0.3288, "step": 16155 }, { "epoch": 1.3088139987038236, "grad_norm": 0.044467225670814514, "learning_rate": 0.00014951167919348304, "loss": 0.3199, "step": 16156 }, { "epoch": 1.3088950097213221, "grad_norm": 0.0463348850607872, "learning_rate": 0.00014950717854088843, "loss": 0.3653, "step": 16157 }, { "epoch": 1.3089760207388204, "grad_norm": 0.043573103845119476, "learning_rate": 0.00014950267788829382, "loss": 0.2814, "step": 16158 }, { "epoch": 1.3090570317563188, "grad_norm": 0.04555029794573784, "learning_rate": 0.00014949817723569918, "loss": 0.3303, "step": 16159 }, { "epoch": 1.3091380427738173, "grad_norm": 0.04604102298617363, "learning_rate": 0.00014949367658310456, "loss": 0.2835, "step": 16160 }, { "epoch": 1.3092190537913155, "grad_norm": 0.049761392176151276, "learning_rate": 0.00014948917593050992, "loss": 0.3303, "step": 16161 }, { "epoch": 1.309300064808814, "grad_norm": 0.044333383440971375, "learning_rate": 0.00014948467527791528, "loss": 0.2645, "step": 16162 }, { "epoch": 1.3093810758263125, "grad_norm": 0.04771001636981964, "learning_rate": 0.00014948017462532067, "loss": 0.2924, "step": 16163 }, { "epoch": 1.3094620868438107, "grad_norm": 0.050036001950502396, "learning_rate": 0.00014947567397272606, "loss": 0.307, "step": 16164 }, { "epoch": 1.3095430978613092, "grad_norm": 0.037328965961933136, "learning_rate": 0.00014947117332013142, "loss": 0.2602, "step": 16165 }, { "epoch": 1.3096241088788076, "grad_norm": 0.04898840934038162, "learning_rate": 0.0001494666726675368, "loss": 0.3122, "step": 16166 }, { "epoch": 1.309705119896306, "grad_norm": 0.04634608328342438, "learning_rate": 0.00014946217201494217, "loss": 0.2927, "step": 16167 }, { "epoch": 1.3097861309138044, "grad_norm": 0.040241241455078125, "learning_rate": 0.00014945767136234755, "loss": 0.2361, "step": 16168 }, { "epoch": 1.3098671419313026, "grad_norm": 0.03884800523519516, "learning_rate": 0.00014945317070975294, "loss": 0.2764, "step": 16169 }, { "epoch": 1.309948152948801, "grad_norm": 0.04845311865210533, "learning_rate": 0.0001494486700571583, "loss": 0.2989, "step": 16170 }, { "epoch": 1.3100291639662993, "grad_norm": 0.04241282120347023, "learning_rate": 0.00014944416940456366, "loss": 0.2814, "step": 16171 }, { "epoch": 1.3101101749837978, "grad_norm": 0.045538969337940216, "learning_rate": 0.00014943966875196905, "loss": 0.3064, "step": 16172 }, { "epoch": 1.3101911860012962, "grad_norm": 0.045505356043577194, "learning_rate": 0.0001494351680993744, "loss": 0.3011, "step": 16173 }, { "epoch": 1.3102721970187945, "grad_norm": 0.05274083465337753, "learning_rate": 0.0001494306674467798, "loss": 0.3548, "step": 16174 }, { "epoch": 1.310353208036293, "grad_norm": 0.04924383759498596, "learning_rate": 0.00014942616679418518, "loss": 0.3182, "step": 16175 }, { "epoch": 1.3104342190537914, "grad_norm": 0.04586834833025932, "learning_rate": 0.00014942166614159054, "loss": 0.2625, "step": 16176 }, { "epoch": 1.3105152300712897, "grad_norm": 0.036821745336055756, "learning_rate": 0.0001494171654889959, "loss": 0.2864, "step": 16177 }, { "epoch": 1.3105962410887881, "grad_norm": 0.0499521940946579, "learning_rate": 0.0001494126648364013, "loss": 0.3059, "step": 16178 }, { "epoch": 1.3106772521062864, "grad_norm": 0.046249426901340485, "learning_rate": 0.00014940816418380665, "loss": 0.3361, "step": 16179 }, { "epoch": 1.3107582631237849, "grad_norm": 0.04398550093173981, "learning_rate": 0.00014940366353121204, "loss": 0.2944, "step": 16180 }, { "epoch": 1.310839274141283, "grad_norm": 0.04730512201786041, "learning_rate": 0.00014939916287861742, "loss": 0.2812, "step": 16181 }, { "epoch": 1.3109202851587816, "grad_norm": 0.045634277164936066, "learning_rate": 0.00014939466222602278, "loss": 0.3237, "step": 16182 }, { "epoch": 1.31100129617628, "grad_norm": 0.0385470911860466, "learning_rate": 0.00014939016157342814, "loss": 0.2575, "step": 16183 }, { "epoch": 1.3110823071937783, "grad_norm": 0.0583970732986927, "learning_rate": 0.00014938566092083353, "loss": 0.326, "step": 16184 }, { "epoch": 1.3111633182112767, "grad_norm": 0.0451340489089489, "learning_rate": 0.0001493811602682389, "loss": 0.3356, "step": 16185 }, { "epoch": 1.3112443292287752, "grad_norm": 0.04714914783835411, "learning_rate": 0.00014937665961564428, "loss": 0.3159, "step": 16186 }, { "epoch": 1.3113253402462735, "grad_norm": 0.04920654743909836, "learning_rate": 0.00014937215896304967, "loss": 0.3433, "step": 16187 }, { "epoch": 1.311406351263772, "grad_norm": 0.047340042889118195, "learning_rate": 0.00014936765831045503, "loss": 0.3036, "step": 16188 }, { "epoch": 1.3114873622812704, "grad_norm": 0.04961530491709709, "learning_rate": 0.00014936315765786039, "loss": 0.3524, "step": 16189 }, { "epoch": 1.3115683732987686, "grad_norm": 0.047209519892930984, "learning_rate": 0.00014935865700526577, "loss": 0.283, "step": 16190 }, { "epoch": 1.3116493843162669, "grad_norm": 0.04718087241053581, "learning_rate": 0.00014935415635267113, "loss": 0.3011, "step": 16191 }, { "epoch": 1.3117303953337653, "grad_norm": 0.03914792090654373, "learning_rate": 0.00014934965570007652, "loss": 0.2609, "step": 16192 }, { "epoch": 1.3118114063512638, "grad_norm": 0.0349646620452404, "learning_rate": 0.0001493451550474819, "loss": 0.2508, "step": 16193 }, { "epoch": 1.311892417368762, "grad_norm": 0.04413396492600441, "learning_rate": 0.00014934065439488727, "loss": 0.3095, "step": 16194 }, { "epoch": 1.3119734283862605, "grad_norm": 0.04602505639195442, "learning_rate": 0.00014933615374229263, "loss": 0.3345, "step": 16195 }, { "epoch": 1.312054439403759, "grad_norm": 0.046725794672966, "learning_rate": 0.00014933165308969801, "loss": 0.284, "step": 16196 }, { "epoch": 1.3121354504212572, "grad_norm": 0.04499703273177147, "learning_rate": 0.00014932715243710337, "loss": 0.3166, "step": 16197 }, { "epoch": 1.3122164614387557, "grad_norm": 0.04107973352074623, "learning_rate": 0.00014932265178450876, "loss": 0.2955, "step": 16198 }, { "epoch": 1.3122974724562542, "grad_norm": 0.047366026788949966, "learning_rate": 0.00014931815113191415, "loss": 0.3459, "step": 16199 }, { "epoch": 1.3123784834737524, "grad_norm": 0.046791817992925644, "learning_rate": 0.0001493136504793195, "loss": 0.3169, "step": 16200 }, { "epoch": 1.3124594944912509, "grad_norm": 0.04345640167593956, "learning_rate": 0.00014930914982672487, "loss": 0.3244, "step": 16201 }, { "epoch": 1.3125405055087491, "grad_norm": 0.049602605402469635, "learning_rate": 0.00014930464917413026, "loss": 0.3081, "step": 16202 }, { "epoch": 1.3126215165262476, "grad_norm": 0.038922298699617386, "learning_rate": 0.00014930014852153562, "loss": 0.3138, "step": 16203 }, { "epoch": 1.3127025275437458, "grad_norm": 0.038196027278900146, "learning_rate": 0.000149295647868941, "loss": 0.2738, "step": 16204 }, { "epoch": 1.3127835385612443, "grad_norm": 0.04386325925588608, "learning_rate": 0.0001492911472163464, "loss": 0.2777, "step": 16205 }, { "epoch": 1.3128645495787428, "grad_norm": 0.04231401905417442, "learning_rate": 0.00014928664656375175, "loss": 0.2798, "step": 16206 }, { "epoch": 1.312945560596241, "grad_norm": 0.04036155715584755, "learning_rate": 0.0001492821459111571, "loss": 0.2626, "step": 16207 }, { "epoch": 1.3130265716137395, "grad_norm": 0.044200096279382706, "learning_rate": 0.0001492776452585625, "loss": 0.2679, "step": 16208 }, { "epoch": 1.313107582631238, "grad_norm": 0.050400812178850174, "learning_rate": 0.00014927314460596786, "loss": 0.2955, "step": 16209 }, { "epoch": 1.3131885936487362, "grad_norm": 0.05265969783067703, "learning_rate": 0.00014926864395337324, "loss": 0.3352, "step": 16210 }, { "epoch": 1.3132696046662347, "grad_norm": 0.05643681436777115, "learning_rate": 0.00014926414330077863, "loss": 0.3202, "step": 16211 }, { "epoch": 1.3133506156837331, "grad_norm": 0.044375356286764145, "learning_rate": 0.000149259642648184, "loss": 0.2982, "step": 16212 }, { "epoch": 1.3134316267012314, "grad_norm": 0.04609943926334381, "learning_rate": 0.00014925514199558935, "loss": 0.2706, "step": 16213 }, { "epoch": 1.3135126377187296, "grad_norm": 0.04458443075418472, "learning_rate": 0.00014925064134299474, "loss": 0.3428, "step": 16214 }, { "epoch": 1.313593648736228, "grad_norm": 0.043120864778757095, "learning_rate": 0.0001492461406904001, "loss": 0.314, "step": 16215 }, { "epoch": 1.3136746597537265, "grad_norm": 0.04164094477891922, "learning_rate": 0.0001492416400378055, "loss": 0.2947, "step": 16216 }, { "epoch": 1.3137556707712248, "grad_norm": 0.04162612557411194, "learning_rate": 0.00014923713938521087, "loss": 0.2416, "step": 16217 }, { "epoch": 1.3138366817887233, "grad_norm": 0.04056188836693764, "learning_rate": 0.00014923263873261623, "loss": 0.295, "step": 16218 }, { "epoch": 1.3139176928062217, "grad_norm": 0.04554165154695511, "learning_rate": 0.0001492281380800216, "loss": 0.2958, "step": 16219 }, { "epoch": 1.31399870382372, "grad_norm": 0.03735050931572914, "learning_rate": 0.00014922363742742698, "loss": 0.2633, "step": 16220 }, { "epoch": 1.3140797148412184, "grad_norm": 0.04008159786462784, "learning_rate": 0.00014921913677483237, "loss": 0.2751, "step": 16221 }, { "epoch": 1.314160725858717, "grad_norm": 0.044225722551345825, "learning_rate": 0.00014921463612223773, "loss": 0.3397, "step": 16222 }, { "epoch": 1.3142417368762151, "grad_norm": 0.043361153453588486, "learning_rate": 0.00014921013546964312, "loss": 0.2926, "step": 16223 }, { "epoch": 1.3143227478937136, "grad_norm": 0.052007514983415604, "learning_rate": 0.00014920563481704848, "loss": 0.3254, "step": 16224 }, { "epoch": 1.3144037589112119, "grad_norm": 0.04618023708462715, "learning_rate": 0.00014920113416445384, "loss": 0.297, "step": 16225 }, { "epoch": 1.3144847699287103, "grad_norm": 0.036869119852781296, "learning_rate": 0.00014919663351185922, "loss": 0.2568, "step": 16226 }, { "epoch": 1.3145657809462086, "grad_norm": 0.041353028267621994, "learning_rate": 0.0001491921328592646, "loss": 0.3196, "step": 16227 }, { "epoch": 1.314646791963707, "grad_norm": 0.04122963547706604, "learning_rate": 0.00014918763220666997, "loss": 0.2587, "step": 16228 }, { "epoch": 1.3147278029812055, "grad_norm": 0.04404785856604576, "learning_rate": 0.00014918313155407536, "loss": 0.292, "step": 16229 }, { "epoch": 1.3148088139987038, "grad_norm": 0.04821160435676575, "learning_rate": 0.00014917863090148072, "loss": 0.3102, "step": 16230 }, { "epoch": 1.3148898250162022, "grad_norm": 0.04912686347961426, "learning_rate": 0.00014917413024888608, "loss": 0.3306, "step": 16231 }, { "epoch": 1.3149708360337007, "grad_norm": 0.03853919729590416, "learning_rate": 0.00014916962959629146, "loss": 0.2546, "step": 16232 }, { "epoch": 1.315051847051199, "grad_norm": 0.05155406892299652, "learning_rate": 0.00014916512894369685, "loss": 0.3081, "step": 16233 }, { "epoch": 1.3151328580686974, "grad_norm": 0.04806877300143242, "learning_rate": 0.0001491606282911022, "loss": 0.2985, "step": 16234 }, { "epoch": 1.3152138690861956, "grad_norm": 0.04302647337317467, "learning_rate": 0.0001491561276385076, "loss": 0.2841, "step": 16235 }, { "epoch": 1.315294880103694, "grad_norm": 0.05157895386219025, "learning_rate": 0.00014915162698591296, "loss": 0.3008, "step": 16236 }, { "epoch": 1.3153758911211924, "grad_norm": 0.04015476256608963, "learning_rate": 0.00014914712633331835, "loss": 0.2475, "step": 16237 }, { "epoch": 1.3154569021386908, "grad_norm": 0.04195760563015938, "learning_rate": 0.0001491426256807237, "loss": 0.2656, "step": 16238 }, { "epoch": 1.3155379131561893, "grad_norm": 0.04301521182060242, "learning_rate": 0.0001491381250281291, "loss": 0.2797, "step": 16239 }, { "epoch": 1.3156189241736875, "grad_norm": 0.03678332269191742, "learning_rate": 0.00014913362437553445, "loss": 0.2708, "step": 16240 }, { "epoch": 1.315699935191186, "grad_norm": 0.04338261857628822, "learning_rate": 0.00014912912372293984, "loss": 0.284, "step": 16241 }, { "epoch": 1.3157809462086845, "grad_norm": 0.04346082732081413, "learning_rate": 0.0001491246230703452, "loss": 0.3124, "step": 16242 }, { "epoch": 1.3158619572261827, "grad_norm": 0.046911466866731644, "learning_rate": 0.0001491201224177506, "loss": 0.3336, "step": 16243 }, { "epoch": 1.3159429682436812, "grad_norm": 0.045720525085926056, "learning_rate": 0.00014911562176515595, "loss": 0.3069, "step": 16244 }, { "epoch": 1.3160239792611796, "grad_norm": 0.04304931312799454, "learning_rate": 0.00014911112111256133, "loss": 0.325, "step": 16245 }, { "epoch": 1.3161049902786779, "grad_norm": 0.04115717113018036, "learning_rate": 0.0001491066204599667, "loss": 0.2686, "step": 16246 }, { "epoch": 1.3161860012961764, "grad_norm": 0.04282047972083092, "learning_rate": 0.00014910211980737208, "loss": 0.2918, "step": 16247 }, { "epoch": 1.3162670123136746, "grad_norm": 0.041621021926403046, "learning_rate": 0.00014909761915477744, "loss": 0.2957, "step": 16248 }, { "epoch": 1.316348023331173, "grad_norm": 0.052234623581171036, "learning_rate": 0.00014909311850218283, "loss": 0.3141, "step": 16249 }, { "epoch": 1.3164290343486713, "grad_norm": 0.04725967347621918, "learning_rate": 0.00014908861784958822, "loss": 0.3002, "step": 16250 }, { "epoch": 1.3165100453661698, "grad_norm": 0.04024965688586235, "learning_rate": 0.00014908411719699358, "loss": 0.2501, "step": 16251 }, { "epoch": 1.3165910563836682, "grad_norm": 0.04805604740977287, "learning_rate": 0.00014907961654439894, "loss": 0.3048, "step": 16252 }, { "epoch": 1.3166720674011665, "grad_norm": 0.03625888749957085, "learning_rate": 0.00014907511589180432, "loss": 0.257, "step": 16253 }, { "epoch": 1.316753078418665, "grad_norm": 0.041517432779073715, "learning_rate": 0.00014907061523920968, "loss": 0.2926, "step": 16254 }, { "epoch": 1.3168340894361634, "grad_norm": 0.03820646181702614, "learning_rate": 0.00014906611458661507, "loss": 0.2643, "step": 16255 }, { "epoch": 1.3169151004536617, "grad_norm": 0.04265735298395157, "learning_rate": 0.00014906161393402046, "loss": 0.2882, "step": 16256 }, { "epoch": 1.3169961114711601, "grad_norm": 0.04392039030790329, "learning_rate": 0.00014905711328142582, "loss": 0.2883, "step": 16257 }, { "epoch": 1.3170771224886584, "grad_norm": 0.05036157742142677, "learning_rate": 0.00014905261262883118, "loss": 0.2946, "step": 16258 }, { "epoch": 1.3171581335061568, "grad_norm": 0.042300909757614136, "learning_rate": 0.00014904811197623656, "loss": 0.2485, "step": 16259 }, { "epoch": 1.317239144523655, "grad_norm": 0.04726019129157066, "learning_rate": 0.00014904361132364192, "loss": 0.2971, "step": 16260 }, { "epoch": 1.3173201555411536, "grad_norm": 0.04520502686500549, "learning_rate": 0.0001490391106710473, "loss": 0.3104, "step": 16261 }, { "epoch": 1.317401166558652, "grad_norm": 0.04767908155918121, "learning_rate": 0.0001490346100184527, "loss": 0.3258, "step": 16262 }, { "epoch": 1.3174821775761503, "grad_norm": 0.04345300421118736, "learning_rate": 0.00014903010936585806, "loss": 0.2666, "step": 16263 }, { "epoch": 1.3175631885936487, "grad_norm": 0.0426764152944088, "learning_rate": 0.00014902560871326342, "loss": 0.2775, "step": 16264 }, { "epoch": 1.3176441996111472, "grad_norm": 0.045147597789764404, "learning_rate": 0.0001490211080606688, "loss": 0.3101, "step": 16265 }, { "epoch": 1.3177252106286454, "grad_norm": 0.04169589281082153, "learning_rate": 0.00014901660740807417, "loss": 0.3093, "step": 16266 }, { "epoch": 1.317806221646144, "grad_norm": 0.04596323519945145, "learning_rate": 0.00014901210675547955, "loss": 0.3083, "step": 16267 }, { "epoch": 1.3178872326636424, "grad_norm": 0.040057264268398285, "learning_rate": 0.00014900760610288494, "loss": 0.2925, "step": 16268 }, { "epoch": 1.3179682436811406, "grad_norm": 0.04390899837017059, "learning_rate": 0.0001490031054502903, "loss": 0.3158, "step": 16269 }, { "epoch": 1.318049254698639, "grad_norm": 0.04600910469889641, "learning_rate": 0.00014899860479769566, "loss": 0.2962, "step": 16270 }, { "epoch": 1.3181302657161373, "grad_norm": 0.04342671483755112, "learning_rate": 0.00014899410414510105, "loss": 0.2998, "step": 16271 }, { "epoch": 1.3182112767336358, "grad_norm": 0.0377708300948143, "learning_rate": 0.0001489896034925064, "loss": 0.2635, "step": 16272 }, { "epoch": 1.318292287751134, "grad_norm": 0.04475024342536926, "learning_rate": 0.0001489851028399118, "loss": 0.3128, "step": 16273 }, { "epoch": 1.3183732987686325, "grad_norm": 0.045971859246492386, "learning_rate": 0.00014898060218731718, "loss": 0.2968, "step": 16274 }, { "epoch": 1.318454309786131, "grad_norm": 0.046566497534513474, "learning_rate": 0.00014897610153472254, "loss": 0.3358, "step": 16275 }, { "epoch": 1.3185353208036292, "grad_norm": 0.04151848703622818, "learning_rate": 0.0001489716008821279, "loss": 0.2838, "step": 16276 }, { "epoch": 1.3186163318211277, "grad_norm": 0.050141703337430954, "learning_rate": 0.0001489671002295333, "loss": 0.3329, "step": 16277 }, { "epoch": 1.3186973428386262, "grad_norm": 0.04717142507433891, "learning_rate": 0.00014896259957693865, "loss": 0.2995, "step": 16278 }, { "epoch": 1.3187783538561244, "grad_norm": 0.036442629992961884, "learning_rate": 0.00014895809892434404, "loss": 0.2636, "step": 16279 }, { "epoch": 1.3188593648736229, "grad_norm": 0.04248730465769768, "learning_rate": 0.00014895359827174942, "loss": 0.2475, "step": 16280 }, { "epoch": 1.3189403758911211, "grad_norm": 0.04148675128817558, "learning_rate": 0.00014894909761915478, "loss": 0.3094, "step": 16281 }, { "epoch": 1.3190213869086196, "grad_norm": 0.041609227657318115, "learning_rate": 0.00014894459696656014, "loss": 0.2662, "step": 16282 }, { "epoch": 1.3191023979261178, "grad_norm": 0.037535324692726135, "learning_rate": 0.00014894009631396553, "loss": 0.2697, "step": 16283 }, { "epoch": 1.3191834089436163, "grad_norm": 0.04177939519286156, "learning_rate": 0.0001489355956613709, "loss": 0.2829, "step": 16284 }, { "epoch": 1.3192644199611148, "grad_norm": 0.04366298019886017, "learning_rate": 0.00014893109500877628, "loss": 0.2721, "step": 16285 }, { "epoch": 1.319345430978613, "grad_norm": 0.05289445072412491, "learning_rate": 0.00014892659435618167, "loss": 0.3097, "step": 16286 }, { "epoch": 1.3194264419961115, "grad_norm": 0.05009431391954422, "learning_rate": 0.00014892209370358703, "loss": 0.3314, "step": 16287 }, { "epoch": 1.31950745301361, "grad_norm": 0.044810205698013306, "learning_rate": 0.00014891759305099239, "loss": 0.2782, "step": 16288 }, { "epoch": 1.3195884640311082, "grad_norm": 0.04492989182472229, "learning_rate": 0.00014891309239839777, "loss": 0.2969, "step": 16289 }, { "epoch": 1.3196694750486067, "grad_norm": 0.043226175010204315, "learning_rate": 0.00014890859174580313, "loss": 0.2892, "step": 16290 }, { "epoch": 1.3197504860661051, "grad_norm": 0.043849408626556396, "learning_rate": 0.00014890409109320852, "loss": 0.2898, "step": 16291 }, { "epoch": 1.3198314970836034, "grad_norm": 0.04191071540117264, "learning_rate": 0.0001488995904406139, "loss": 0.2868, "step": 16292 }, { "epoch": 1.3199125081011016, "grad_norm": 0.046212535351514816, "learning_rate": 0.00014889508978801927, "loss": 0.3105, "step": 16293 }, { "epoch": 1.3199935191186, "grad_norm": 0.04912080615758896, "learning_rate": 0.00014889058913542463, "loss": 0.3092, "step": 16294 }, { "epoch": 1.3200745301360985, "grad_norm": 0.039494119584560394, "learning_rate": 0.00014888608848283001, "loss": 0.301, "step": 16295 }, { "epoch": 1.3201555411535968, "grad_norm": 0.04608963057398796, "learning_rate": 0.00014888158783023537, "loss": 0.3144, "step": 16296 }, { "epoch": 1.3202365521710953, "grad_norm": 0.04640983045101166, "learning_rate": 0.00014887708717764076, "loss": 0.319, "step": 16297 }, { "epoch": 1.3203175631885937, "grad_norm": 0.05373993143439293, "learning_rate": 0.00014887258652504615, "loss": 0.2999, "step": 16298 }, { "epoch": 1.320398574206092, "grad_norm": 0.04362735524773598, "learning_rate": 0.0001488680858724515, "loss": 0.2942, "step": 16299 }, { "epoch": 1.3204795852235904, "grad_norm": 0.04731497913599014, "learning_rate": 0.00014886358521985687, "loss": 0.3022, "step": 16300 }, { "epoch": 1.320560596241089, "grad_norm": 0.052373286336660385, "learning_rate": 0.00014885908456726226, "loss": 0.2766, "step": 16301 }, { "epoch": 1.3206416072585871, "grad_norm": 0.04618718847632408, "learning_rate": 0.00014885458391466764, "loss": 0.2711, "step": 16302 }, { "epoch": 1.3207226182760856, "grad_norm": 0.03901274874806404, "learning_rate": 0.000148850083262073, "loss": 0.2865, "step": 16303 }, { "epoch": 1.3208036292935839, "grad_norm": 0.04215940088033676, "learning_rate": 0.0001488455826094784, "loss": 0.2837, "step": 16304 }, { "epoch": 1.3208846403110823, "grad_norm": 0.04067666083574295, "learning_rate": 0.00014884108195688375, "loss": 0.2676, "step": 16305 }, { "epoch": 1.3209656513285806, "grad_norm": 0.042485661804676056, "learning_rate": 0.00014883658130428914, "loss": 0.3269, "step": 16306 }, { "epoch": 1.321046662346079, "grad_norm": 0.042939670383930206, "learning_rate": 0.0001488320806516945, "loss": 0.2639, "step": 16307 }, { "epoch": 1.3211276733635775, "grad_norm": 0.04631086438894272, "learning_rate": 0.00014882757999909989, "loss": 0.3038, "step": 16308 }, { "epoch": 1.3212086843810757, "grad_norm": 0.05879298225045204, "learning_rate": 0.00014882307934650525, "loss": 0.3235, "step": 16309 }, { "epoch": 1.3212896953985742, "grad_norm": 0.04729129374027252, "learning_rate": 0.00014881857869391063, "loss": 0.3623, "step": 16310 }, { "epoch": 1.3213707064160727, "grad_norm": 0.04663609713315964, "learning_rate": 0.000148814078041316, "loss": 0.3376, "step": 16311 }, { "epoch": 1.321451717433571, "grad_norm": 0.03580520674586296, "learning_rate": 0.00014880957738872138, "loss": 0.278, "step": 16312 }, { "epoch": 1.3215327284510694, "grad_norm": 0.04505905508995056, "learning_rate": 0.00014880507673612674, "loss": 0.2813, "step": 16313 }, { "epoch": 1.3216137394685679, "grad_norm": 0.04475625604391098, "learning_rate": 0.00014880057608353213, "loss": 0.2851, "step": 16314 }, { "epoch": 1.321694750486066, "grad_norm": 0.04107343405485153, "learning_rate": 0.0001487960754309375, "loss": 0.2649, "step": 16315 }, { "epoch": 1.3217757615035644, "grad_norm": 0.04097530618309975, "learning_rate": 0.00014879157477834287, "loss": 0.2806, "step": 16316 }, { "epoch": 1.3218567725210628, "grad_norm": 0.04211768880486488, "learning_rate": 0.00014878707412574823, "loss": 0.2702, "step": 16317 }, { "epoch": 1.3219377835385613, "grad_norm": 0.043108146637678146, "learning_rate": 0.00014878257347315362, "loss": 0.2729, "step": 16318 }, { "epoch": 1.3220187945560595, "grad_norm": 0.04378829896450043, "learning_rate": 0.00014877807282055898, "loss": 0.2976, "step": 16319 }, { "epoch": 1.322099805573558, "grad_norm": 0.04426180198788643, "learning_rate": 0.00014877357216796437, "loss": 0.3138, "step": 16320 }, { "epoch": 1.3221808165910565, "grad_norm": 0.05119778588414192, "learning_rate": 0.00014876907151536973, "loss": 0.3341, "step": 16321 }, { "epoch": 1.3222618276085547, "grad_norm": 0.04219551756978035, "learning_rate": 0.00014876457086277512, "loss": 0.304, "step": 16322 }, { "epoch": 1.3223428386260532, "grad_norm": 0.04721548780798912, "learning_rate": 0.00014876007021018048, "loss": 0.3352, "step": 16323 }, { "epoch": 1.3224238496435516, "grad_norm": 0.046636637300252914, "learning_rate": 0.00014875556955758586, "loss": 0.3165, "step": 16324 }, { "epoch": 1.3225048606610499, "grad_norm": 0.04913676157593727, "learning_rate": 0.00014875106890499122, "loss": 0.3119, "step": 16325 }, { "epoch": 1.3225858716785484, "grad_norm": 0.043943922966718674, "learning_rate": 0.0001487465682523966, "loss": 0.2866, "step": 16326 }, { "epoch": 1.3226668826960466, "grad_norm": 0.04472362995147705, "learning_rate": 0.00014874206759980197, "loss": 0.2931, "step": 16327 }, { "epoch": 1.322747893713545, "grad_norm": 0.035618189722299576, "learning_rate": 0.00014873756694720736, "loss": 0.2295, "step": 16328 }, { "epoch": 1.3228289047310433, "grad_norm": 0.04164008051156998, "learning_rate": 0.00014873306629461272, "loss": 0.2838, "step": 16329 }, { "epoch": 1.3229099157485418, "grad_norm": 0.05346325784921646, "learning_rate": 0.0001487285656420181, "loss": 0.3446, "step": 16330 }, { "epoch": 1.3229909267660402, "grad_norm": 0.04301166161894798, "learning_rate": 0.0001487240649894235, "loss": 0.2922, "step": 16331 }, { "epoch": 1.3230719377835385, "grad_norm": 0.04445755109190941, "learning_rate": 0.00014871956433682885, "loss": 0.2521, "step": 16332 }, { "epoch": 1.323152948801037, "grad_norm": 0.04274790361523628, "learning_rate": 0.0001487150636842342, "loss": 0.2775, "step": 16333 }, { "epoch": 1.3232339598185354, "grad_norm": 0.047810930758714676, "learning_rate": 0.0001487105630316396, "loss": 0.3125, "step": 16334 }, { "epoch": 1.3233149708360337, "grad_norm": 0.04470434412360191, "learning_rate": 0.00014870606237904496, "loss": 0.2963, "step": 16335 }, { "epoch": 1.3233959818535321, "grad_norm": 0.05826588347554207, "learning_rate": 0.00014870156172645035, "loss": 0.3744, "step": 16336 }, { "epoch": 1.3234769928710304, "grad_norm": 0.04465784505009651, "learning_rate": 0.00014869706107385573, "loss": 0.2895, "step": 16337 }, { "epoch": 1.3235580038885288, "grad_norm": 0.04440483823418617, "learning_rate": 0.0001486925604212611, "loss": 0.2829, "step": 16338 }, { "epoch": 1.323639014906027, "grad_norm": 0.04333437606692314, "learning_rate": 0.00014868805976866645, "loss": 0.29, "step": 16339 }, { "epoch": 1.3237200259235256, "grad_norm": 0.04430120065808296, "learning_rate": 0.00014868355911607184, "loss": 0.2673, "step": 16340 }, { "epoch": 1.323801036941024, "grad_norm": 0.04287057742476463, "learning_rate": 0.0001486790584634772, "loss": 0.3074, "step": 16341 }, { "epoch": 1.3238820479585223, "grad_norm": 0.045032232999801636, "learning_rate": 0.0001486745578108826, "loss": 0.3032, "step": 16342 }, { "epoch": 1.3239630589760207, "grad_norm": 0.046202123165130615, "learning_rate": 0.00014867005715828797, "loss": 0.2965, "step": 16343 }, { "epoch": 1.3240440699935192, "grad_norm": 0.042404867708683014, "learning_rate": 0.00014866555650569333, "loss": 0.3006, "step": 16344 }, { "epoch": 1.3241250810110174, "grad_norm": 0.045439064502716064, "learning_rate": 0.0001486610558530987, "loss": 0.2766, "step": 16345 }, { "epoch": 1.324206092028516, "grad_norm": 0.045394547283649445, "learning_rate": 0.00014865655520050408, "loss": 0.271, "step": 16346 }, { "epoch": 1.3242871030460144, "grad_norm": 0.04254448786377907, "learning_rate": 0.00014865205454790944, "loss": 0.3006, "step": 16347 }, { "epoch": 1.3243681140635126, "grad_norm": 0.05081197991967201, "learning_rate": 0.00014864755389531483, "loss": 0.3272, "step": 16348 }, { "epoch": 1.324449125081011, "grad_norm": 0.05010443925857544, "learning_rate": 0.00014864305324272022, "loss": 0.3003, "step": 16349 }, { "epoch": 1.3245301360985093, "grad_norm": 0.04016037657856941, "learning_rate": 0.00014863855259012558, "loss": 0.285, "step": 16350 }, { "epoch": 1.3246111471160078, "grad_norm": 0.04254277050495148, "learning_rate": 0.00014863405193753094, "loss": 0.2802, "step": 16351 }, { "epoch": 1.324692158133506, "grad_norm": 0.048468347638845444, "learning_rate": 0.00014862955128493632, "loss": 0.2796, "step": 16352 }, { "epoch": 1.3247731691510045, "grad_norm": 0.03761716187000275, "learning_rate": 0.00014862505063234168, "loss": 0.2681, "step": 16353 }, { "epoch": 1.324854180168503, "grad_norm": 0.04276173934340477, "learning_rate": 0.00014862054997974707, "loss": 0.2996, "step": 16354 }, { "epoch": 1.3249351911860012, "grad_norm": 0.044217273592948914, "learning_rate": 0.00014861604932715246, "loss": 0.2882, "step": 16355 }, { "epoch": 1.3250162022034997, "grad_norm": 0.0482565313577652, "learning_rate": 0.00014861154867455782, "loss": 0.3273, "step": 16356 }, { "epoch": 1.3250972132209982, "grad_norm": 0.04667958989739418, "learning_rate": 0.00014860704802196318, "loss": 0.3081, "step": 16357 }, { "epoch": 1.3251782242384964, "grad_norm": 0.04477810487151146, "learning_rate": 0.00014860254736936857, "loss": 0.2585, "step": 16358 }, { "epoch": 1.3252592352559949, "grad_norm": 0.04654529318213463, "learning_rate": 0.00014859804671677393, "loss": 0.3354, "step": 16359 }, { "epoch": 1.3253402462734931, "grad_norm": 0.04797455668449402, "learning_rate": 0.0001485935460641793, "loss": 0.338, "step": 16360 }, { "epoch": 1.3254212572909916, "grad_norm": 0.039246611297130585, "learning_rate": 0.0001485890454115847, "loss": 0.2936, "step": 16361 }, { "epoch": 1.3255022683084898, "grad_norm": 0.04272114485502243, "learning_rate": 0.00014858454475899006, "loss": 0.2724, "step": 16362 }, { "epoch": 1.3255832793259883, "grad_norm": 0.0355217345058918, "learning_rate": 0.00014858004410639542, "loss": 0.2761, "step": 16363 }, { "epoch": 1.3256642903434868, "grad_norm": 0.040763113647699356, "learning_rate": 0.0001485755434538008, "loss": 0.2998, "step": 16364 }, { "epoch": 1.325745301360985, "grad_norm": 0.04279746860265732, "learning_rate": 0.00014857104280120617, "loss": 0.2933, "step": 16365 }, { "epoch": 1.3258263123784835, "grad_norm": 0.04600545018911362, "learning_rate": 0.00014856654214861155, "loss": 0.3289, "step": 16366 }, { "epoch": 1.325907323395982, "grad_norm": 0.04490230605006218, "learning_rate": 0.00014856204149601694, "loss": 0.3455, "step": 16367 }, { "epoch": 1.3259883344134802, "grad_norm": 0.047660212963819504, "learning_rate": 0.0001485575408434223, "loss": 0.31, "step": 16368 }, { "epoch": 1.3260693454309787, "grad_norm": 0.05434110388159752, "learning_rate": 0.00014855304019082766, "loss": 0.357, "step": 16369 }, { "epoch": 1.3261503564484771, "grad_norm": 0.04194248095154762, "learning_rate": 0.00014854853953823305, "loss": 0.2769, "step": 16370 }, { "epoch": 1.3262313674659754, "grad_norm": 0.04256175085902214, "learning_rate": 0.0001485440388856384, "loss": 0.3126, "step": 16371 }, { "epoch": 1.3263123784834738, "grad_norm": 0.04505886137485504, "learning_rate": 0.0001485395382330438, "loss": 0.3247, "step": 16372 }, { "epoch": 1.326393389500972, "grad_norm": 0.05674450099468231, "learning_rate": 0.00014853503758044918, "loss": 0.3143, "step": 16373 }, { "epoch": 1.3264744005184705, "grad_norm": 0.055465519428253174, "learning_rate": 0.00014853053692785454, "loss": 0.3387, "step": 16374 }, { "epoch": 1.3265554115359688, "grad_norm": 0.04465511441230774, "learning_rate": 0.00014852603627525993, "loss": 0.29, "step": 16375 }, { "epoch": 1.3266364225534673, "grad_norm": 0.04995818808674812, "learning_rate": 0.0001485215356226653, "loss": 0.3619, "step": 16376 }, { "epoch": 1.3267174335709657, "grad_norm": 0.04046409949660301, "learning_rate": 0.00014851703497007065, "loss": 0.3127, "step": 16377 }, { "epoch": 1.326798444588464, "grad_norm": 0.04179410636425018, "learning_rate": 0.00014851253431747604, "loss": 0.2956, "step": 16378 }, { "epoch": 1.3268794556059624, "grad_norm": 0.04889443516731262, "learning_rate": 0.00014850803366488142, "loss": 0.3252, "step": 16379 }, { "epoch": 1.326960466623461, "grad_norm": 0.04586949199438095, "learning_rate": 0.00014850353301228678, "loss": 0.2492, "step": 16380 }, { "epoch": 1.3270414776409591, "grad_norm": 0.046670492738485336, "learning_rate": 0.00014849903235969217, "loss": 0.3469, "step": 16381 }, { "epoch": 1.3271224886584576, "grad_norm": 0.04176964983344078, "learning_rate": 0.00014849453170709753, "loss": 0.2757, "step": 16382 }, { "epoch": 1.3272034996759559, "grad_norm": 0.04027750715613365, "learning_rate": 0.00014849003105450292, "loss": 0.2785, "step": 16383 }, { "epoch": 1.3272845106934543, "grad_norm": 0.04773688316345215, "learning_rate": 0.00014848553040190828, "loss": 0.2931, "step": 16384 }, { "epoch": 1.3273655217109526, "grad_norm": 0.045987844467163086, "learning_rate": 0.00014848102974931367, "loss": 0.2867, "step": 16385 }, { "epoch": 1.327446532728451, "grad_norm": 0.03335455805063248, "learning_rate": 0.00014847652909671903, "loss": 0.2578, "step": 16386 }, { "epoch": 1.3275275437459495, "grad_norm": 0.048070844262838364, "learning_rate": 0.0001484720284441244, "loss": 0.316, "step": 16387 }, { "epoch": 1.3276085547634477, "grad_norm": 0.05480222404003143, "learning_rate": 0.00014846752779152977, "loss": 0.3444, "step": 16388 }, { "epoch": 1.3276895657809462, "grad_norm": 0.05221012607216835, "learning_rate": 0.00014846302713893516, "loss": 0.3302, "step": 16389 }, { "epoch": 1.3277705767984447, "grad_norm": 0.04818179830908775, "learning_rate": 0.00014845852648634052, "loss": 0.305, "step": 16390 }, { "epoch": 1.327851587815943, "grad_norm": 0.04937390983104706, "learning_rate": 0.0001484540258337459, "loss": 0.3346, "step": 16391 }, { "epoch": 1.3279325988334414, "grad_norm": 0.041629184037446976, "learning_rate": 0.00014844952518115127, "loss": 0.3057, "step": 16392 }, { "epoch": 1.3280136098509399, "grad_norm": 0.04728194326162338, "learning_rate": 0.00014844502452855665, "loss": 0.2956, "step": 16393 }, { "epoch": 1.328094620868438, "grad_norm": 0.0420515350997448, "learning_rate": 0.00014844052387596201, "loss": 0.2648, "step": 16394 }, { "epoch": 1.3281756318859366, "grad_norm": 0.045087482780218124, "learning_rate": 0.0001484360232233674, "loss": 0.3079, "step": 16395 }, { "epoch": 1.3282566429034348, "grad_norm": 0.04409553483128548, "learning_rate": 0.00014843152257077276, "loss": 0.3124, "step": 16396 }, { "epoch": 1.3283376539209333, "grad_norm": 0.043479710817337036, "learning_rate": 0.00014842702191817815, "loss": 0.2951, "step": 16397 }, { "epoch": 1.3284186649384315, "grad_norm": 0.049205128103494644, "learning_rate": 0.0001484225212655835, "loss": 0.297, "step": 16398 }, { "epoch": 1.32849967595593, "grad_norm": 0.05884668231010437, "learning_rate": 0.0001484180206129889, "loss": 0.3413, "step": 16399 }, { "epoch": 1.3285806869734285, "grad_norm": 0.04603936895728111, "learning_rate": 0.00014841351996039426, "loss": 0.2662, "step": 16400 }, { "epoch": 1.3286616979909267, "grad_norm": 0.04407604783773422, "learning_rate": 0.00014840901930779964, "loss": 0.3168, "step": 16401 }, { "epoch": 1.3287427090084252, "grad_norm": 0.051175616681575775, "learning_rate": 0.000148404518655205, "loss": 0.3223, "step": 16402 }, { "epoch": 1.3288237200259236, "grad_norm": 0.053574636578559875, "learning_rate": 0.0001484000180026104, "loss": 0.3955, "step": 16403 }, { "epoch": 1.3289047310434219, "grad_norm": 0.0408712662756443, "learning_rate": 0.00014839551735001575, "loss": 0.3262, "step": 16404 }, { "epoch": 1.3289857420609203, "grad_norm": 0.038339763879776, "learning_rate": 0.00014839101669742114, "loss": 0.2753, "step": 16405 }, { "epoch": 1.3290667530784186, "grad_norm": 0.045235779136419296, "learning_rate": 0.00014838651604482653, "loss": 0.3101, "step": 16406 }, { "epoch": 1.329147764095917, "grad_norm": 0.049895305186510086, "learning_rate": 0.00014838201539223189, "loss": 0.3351, "step": 16407 }, { "epoch": 1.3292287751134153, "grad_norm": 0.0396096333861351, "learning_rate": 0.00014837751473963725, "loss": 0.2719, "step": 16408 }, { "epoch": 1.3293097861309138, "grad_norm": 0.041300006210803986, "learning_rate": 0.00014837301408704263, "loss": 0.2878, "step": 16409 }, { "epoch": 1.3293907971484122, "grad_norm": 0.04082218557596207, "learning_rate": 0.000148368513434448, "loss": 0.3059, "step": 16410 }, { "epoch": 1.3294718081659105, "grad_norm": 0.04691319167613983, "learning_rate": 0.00014836401278185338, "loss": 0.3249, "step": 16411 }, { "epoch": 1.329552819183409, "grad_norm": 0.037029873579740524, "learning_rate": 0.00014835951212925877, "loss": 0.2554, "step": 16412 }, { "epoch": 1.3296338302009074, "grad_norm": 0.048528771847486496, "learning_rate": 0.00014835501147666413, "loss": 0.2937, "step": 16413 }, { "epoch": 1.3297148412184057, "grad_norm": 0.04355372115969658, "learning_rate": 0.0001483505108240695, "loss": 0.2976, "step": 16414 }, { "epoch": 1.3297958522359041, "grad_norm": 0.045403700321912766, "learning_rate": 0.00014834601017147487, "loss": 0.3183, "step": 16415 }, { "epoch": 1.3298768632534026, "grad_norm": 0.04810098558664322, "learning_rate": 0.00014834150951888023, "loss": 0.2951, "step": 16416 }, { "epoch": 1.3299578742709008, "grad_norm": 0.04955977946519852, "learning_rate": 0.00014833700886628562, "loss": 0.3336, "step": 16417 }, { "epoch": 1.330038885288399, "grad_norm": 0.047212012112140656, "learning_rate": 0.000148332508213691, "loss": 0.3043, "step": 16418 }, { "epoch": 1.3301198963058976, "grad_norm": 0.05372791364789009, "learning_rate": 0.00014832800756109637, "loss": 0.3067, "step": 16419 }, { "epoch": 1.330200907323396, "grad_norm": 0.03757733106613159, "learning_rate": 0.00014832350690850173, "loss": 0.2678, "step": 16420 }, { "epoch": 1.3302819183408943, "grad_norm": 0.044789623469114304, "learning_rate": 0.00014831900625590712, "loss": 0.3491, "step": 16421 }, { "epoch": 1.3303629293583927, "grad_norm": 0.04268178716301918, "learning_rate": 0.00014831450560331248, "loss": 0.2949, "step": 16422 }, { "epoch": 1.3304439403758912, "grad_norm": 0.041059188544750214, "learning_rate": 0.00014831000495071786, "loss": 0.2526, "step": 16423 }, { "epoch": 1.3305249513933894, "grad_norm": 0.052552729845047, "learning_rate": 0.00014830550429812325, "loss": 0.3152, "step": 16424 }, { "epoch": 1.330605962410888, "grad_norm": 0.04184343293309212, "learning_rate": 0.0001483010036455286, "loss": 0.3075, "step": 16425 }, { "epoch": 1.3306869734283864, "grad_norm": 0.04641185328364372, "learning_rate": 0.00014829650299293397, "loss": 0.3, "step": 16426 }, { "epoch": 1.3307679844458846, "grad_norm": 0.04540396109223366, "learning_rate": 0.00014829200234033936, "loss": 0.2751, "step": 16427 }, { "epoch": 1.330848995463383, "grad_norm": 0.04766688868403435, "learning_rate": 0.00014828750168774472, "loss": 0.2932, "step": 16428 }, { "epoch": 1.3309300064808813, "grad_norm": 0.050059616565704346, "learning_rate": 0.0001482830010351501, "loss": 0.2732, "step": 16429 }, { "epoch": 1.3310110174983798, "grad_norm": 0.055511631071567535, "learning_rate": 0.0001482785003825555, "loss": 0.3117, "step": 16430 }, { "epoch": 1.331092028515878, "grad_norm": 0.04264995455741882, "learning_rate": 0.00014827399972996085, "loss": 0.2903, "step": 16431 }, { "epoch": 1.3311730395333765, "grad_norm": 0.052487634122371674, "learning_rate": 0.0001482694990773662, "loss": 0.3735, "step": 16432 }, { "epoch": 1.331254050550875, "grad_norm": 0.051421862095594406, "learning_rate": 0.0001482649984247716, "loss": 0.332, "step": 16433 }, { "epoch": 1.3313350615683732, "grad_norm": 0.04284625127911568, "learning_rate": 0.00014826049777217696, "loss": 0.266, "step": 16434 }, { "epoch": 1.3314160725858717, "grad_norm": 0.046187713742256165, "learning_rate": 0.00014825599711958235, "loss": 0.2859, "step": 16435 }, { "epoch": 1.3314970836033702, "grad_norm": 0.04239524528384209, "learning_rate": 0.00014825149646698773, "loss": 0.2705, "step": 16436 }, { "epoch": 1.3315780946208684, "grad_norm": 0.04885365813970566, "learning_rate": 0.0001482469958143931, "loss": 0.3089, "step": 16437 }, { "epoch": 1.3316591056383669, "grad_norm": 0.046007949858903885, "learning_rate": 0.00014824249516179845, "loss": 0.3065, "step": 16438 }, { "epoch": 1.3317401166558653, "grad_norm": 0.05193227529525757, "learning_rate": 0.00014823799450920384, "loss": 0.3323, "step": 16439 }, { "epoch": 1.3318211276733636, "grad_norm": 0.04352136328816414, "learning_rate": 0.0001482334938566092, "loss": 0.323, "step": 16440 }, { "epoch": 1.3319021386908618, "grad_norm": 0.04223669320344925, "learning_rate": 0.0001482289932040146, "loss": 0.2823, "step": 16441 }, { "epoch": 1.3319831497083603, "grad_norm": 0.04365150257945061, "learning_rate": 0.00014822449255141998, "loss": 0.2984, "step": 16442 }, { "epoch": 1.3320641607258588, "grad_norm": 0.03911641985177994, "learning_rate": 0.00014821999189882534, "loss": 0.2908, "step": 16443 }, { "epoch": 1.332145171743357, "grad_norm": 0.03676704689860344, "learning_rate": 0.00014821549124623072, "loss": 0.246, "step": 16444 }, { "epoch": 1.3322261827608555, "grad_norm": 0.051078058779239655, "learning_rate": 0.00014821099059363608, "loss": 0.2601, "step": 16445 }, { "epoch": 1.332307193778354, "grad_norm": 0.04659094661474228, "learning_rate": 0.00014820648994104144, "loss": 0.3044, "step": 16446 }, { "epoch": 1.3323882047958522, "grad_norm": 0.04543064907193184, "learning_rate": 0.00014820198928844683, "loss": 0.2749, "step": 16447 }, { "epoch": 1.3324692158133506, "grad_norm": 0.053839847445487976, "learning_rate": 0.00014819748863585222, "loss": 0.3291, "step": 16448 }, { "epoch": 1.3325502268308491, "grad_norm": 0.04643457382917404, "learning_rate": 0.00014819298798325758, "loss": 0.2965, "step": 16449 }, { "epoch": 1.3326312378483474, "grad_norm": 0.05472167581319809, "learning_rate": 0.00014818848733066296, "loss": 0.3555, "step": 16450 }, { "epoch": 1.3327122488658458, "grad_norm": 0.05177174508571625, "learning_rate": 0.00014818398667806832, "loss": 0.2944, "step": 16451 }, { "epoch": 1.332793259883344, "grad_norm": 0.04957205057144165, "learning_rate": 0.00014817948602547368, "loss": 0.2958, "step": 16452 }, { "epoch": 1.3328742709008425, "grad_norm": 0.049657341092824936, "learning_rate": 0.00014817498537287907, "loss": 0.288, "step": 16453 }, { "epoch": 1.3329552819183408, "grad_norm": 0.045000869780778885, "learning_rate": 0.00014817048472028446, "loss": 0.278, "step": 16454 }, { "epoch": 1.3330362929358393, "grad_norm": 0.04302288219332695, "learning_rate": 0.00014816598406768982, "loss": 0.3046, "step": 16455 }, { "epoch": 1.3331173039533377, "grad_norm": 0.04679296538233757, "learning_rate": 0.0001481614834150952, "loss": 0.2865, "step": 16456 }, { "epoch": 1.333198314970836, "grad_norm": 0.04690950736403465, "learning_rate": 0.00014815698276250057, "loss": 0.3157, "step": 16457 }, { "epoch": 1.3332793259883344, "grad_norm": 0.044018328189849854, "learning_rate": 0.00014815248210990595, "loss": 0.3128, "step": 16458 }, { "epoch": 1.333360337005833, "grad_norm": 0.0467769019305706, "learning_rate": 0.0001481479814573113, "loss": 0.3097, "step": 16459 }, { "epoch": 1.3334413480233311, "grad_norm": 0.0482206828892231, "learning_rate": 0.0001481434808047167, "loss": 0.2782, "step": 16460 }, { "epoch": 1.3335223590408296, "grad_norm": 0.04488348215818405, "learning_rate": 0.00014813898015212206, "loss": 0.2825, "step": 16461 }, { "epoch": 1.3336033700583279, "grad_norm": 0.045889075845479965, "learning_rate": 0.00014813447949952745, "loss": 0.3228, "step": 16462 }, { "epoch": 1.3336843810758263, "grad_norm": 0.0443207211792469, "learning_rate": 0.0001481299788469328, "loss": 0.3034, "step": 16463 }, { "epoch": 1.3337653920933246, "grad_norm": 0.035741958767175674, "learning_rate": 0.0001481254781943382, "loss": 0.2797, "step": 16464 }, { "epoch": 1.333846403110823, "grad_norm": 0.04170793667435646, "learning_rate": 0.00014812097754174355, "loss": 0.2595, "step": 16465 }, { "epoch": 1.3339274141283215, "grad_norm": 0.05149725452065468, "learning_rate": 0.00014811647688914894, "loss": 0.3146, "step": 16466 }, { "epoch": 1.3340084251458197, "grad_norm": 0.04214129596948624, "learning_rate": 0.0001481119762365543, "loss": 0.2842, "step": 16467 }, { "epoch": 1.3340894361633182, "grad_norm": 0.04943012818694115, "learning_rate": 0.0001481074755839597, "loss": 0.3227, "step": 16468 }, { "epoch": 1.3341704471808167, "grad_norm": 0.04872075840830803, "learning_rate": 0.00014810297493136505, "loss": 0.3225, "step": 16469 }, { "epoch": 1.334251458198315, "grad_norm": 0.0405779629945755, "learning_rate": 0.00014809847427877044, "loss": 0.2914, "step": 16470 }, { "epoch": 1.3343324692158134, "grad_norm": 0.05061832815408707, "learning_rate": 0.0001480939736261758, "loss": 0.3559, "step": 16471 }, { "epoch": 1.3344134802333119, "grad_norm": 0.04265550896525383, "learning_rate": 0.00014808947297358118, "loss": 0.3212, "step": 16472 }, { "epoch": 1.33449449125081, "grad_norm": 0.04414314031600952, "learning_rate": 0.00014808497232098654, "loss": 0.3507, "step": 16473 }, { "epoch": 1.3345755022683086, "grad_norm": 0.04216546565294266, "learning_rate": 0.00014808047166839193, "loss": 0.276, "step": 16474 }, { "epoch": 1.3346565132858068, "grad_norm": 0.04311543330550194, "learning_rate": 0.0001480759710157973, "loss": 0.2618, "step": 16475 }, { "epoch": 1.3347375243033053, "grad_norm": 0.04496273025870323, "learning_rate": 0.00014807147036320268, "loss": 0.3317, "step": 16476 }, { "epoch": 1.3348185353208035, "grad_norm": 0.04574717953801155, "learning_rate": 0.00014806696971060804, "loss": 0.3539, "step": 16477 }, { "epoch": 1.334899546338302, "grad_norm": 0.04789029061794281, "learning_rate": 0.00014806246905801342, "loss": 0.3045, "step": 16478 }, { "epoch": 1.3349805573558005, "grad_norm": 0.03678739443421364, "learning_rate": 0.00014805796840541878, "loss": 0.2564, "step": 16479 }, { "epoch": 1.3350615683732987, "grad_norm": 0.047425612807273865, "learning_rate": 0.00014805346775282417, "loss": 0.2778, "step": 16480 }, { "epoch": 1.3351425793907972, "grad_norm": 0.03888651356101036, "learning_rate": 0.00014804896710022953, "loss": 0.2499, "step": 16481 }, { "epoch": 1.3352235904082956, "grad_norm": 0.05751119554042816, "learning_rate": 0.00014804446644763492, "loss": 0.3156, "step": 16482 }, { "epoch": 1.3353046014257939, "grad_norm": 0.043744124472141266, "learning_rate": 0.00014803996579504028, "loss": 0.2931, "step": 16483 }, { "epoch": 1.3353856124432923, "grad_norm": 0.04385169968008995, "learning_rate": 0.00014803546514244567, "loss": 0.258, "step": 16484 }, { "epoch": 1.3354666234607906, "grad_norm": 0.04109294340014458, "learning_rate": 0.00014803096448985103, "loss": 0.2515, "step": 16485 }, { "epoch": 1.335547634478289, "grad_norm": 0.05116341635584831, "learning_rate": 0.00014802646383725641, "loss": 0.2932, "step": 16486 }, { "epoch": 1.3356286454957873, "grad_norm": 0.04903048649430275, "learning_rate": 0.0001480219631846618, "loss": 0.3168, "step": 16487 }, { "epoch": 1.3357096565132858, "grad_norm": 0.05178588256239891, "learning_rate": 0.00014801746253206716, "loss": 0.3728, "step": 16488 }, { "epoch": 1.3357906675307842, "grad_norm": 0.04729301482439041, "learning_rate": 0.00014801296187947252, "loss": 0.3108, "step": 16489 }, { "epoch": 1.3358716785482825, "grad_norm": 0.038130391389131546, "learning_rate": 0.0001480084612268779, "loss": 0.2694, "step": 16490 }, { "epoch": 1.335952689565781, "grad_norm": 0.05216986685991287, "learning_rate": 0.00014800396057428327, "loss": 0.3211, "step": 16491 }, { "epoch": 1.3360337005832794, "grad_norm": 0.05172670632600784, "learning_rate": 0.00014799945992168866, "loss": 0.329, "step": 16492 }, { "epoch": 1.3361147116007777, "grad_norm": 0.045888472348451614, "learning_rate": 0.00014799495926909404, "loss": 0.2929, "step": 16493 }, { "epoch": 1.3361957226182761, "grad_norm": 0.0423261895775795, "learning_rate": 0.0001479904586164994, "loss": 0.2788, "step": 16494 }, { "epoch": 1.3362767336357746, "grad_norm": 0.04532453417778015, "learning_rate": 0.00014798595796390476, "loss": 0.2841, "step": 16495 }, { "epoch": 1.3363577446532728, "grad_norm": 0.0453985370695591, "learning_rate": 0.00014798145731131015, "loss": 0.2901, "step": 16496 }, { "epoch": 1.3364387556707713, "grad_norm": 0.049831438809633255, "learning_rate": 0.0001479769566587155, "loss": 0.3611, "step": 16497 }, { "epoch": 1.3365197666882696, "grad_norm": 0.050108373165130615, "learning_rate": 0.0001479724560061209, "loss": 0.2991, "step": 16498 }, { "epoch": 1.336600777705768, "grad_norm": 0.04659736528992653, "learning_rate": 0.00014796795535352628, "loss": 0.2765, "step": 16499 }, { "epoch": 1.3366817887232663, "grad_norm": 0.04281048849225044, "learning_rate": 0.00014796345470093164, "loss": 0.2732, "step": 16500 }, { "epoch": 1.3367627997407647, "grad_norm": 0.04346665367484093, "learning_rate": 0.000147958954048337, "loss": 0.3203, "step": 16501 }, { "epoch": 1.3368438107582632, "grad_norm": 0.04083739593625069, "learning_rate": 0.0001479544533957424, "loss": 0.2664, "step": 16502 }, { "epoch": 1.3369248217757614, "grad_norm": 0.04874040186405182, "learning_rate": 0.00014794995274314775, "loss": 0.3213, "step": 16503 }, { "epoch": 1.33700583279326, "grad_norm": 0.042423397302627563, "learning_rate": 0.00014794545209055314, "loss": 0.2827, "step": 16504 }, { "epoch": 1.3370868438107584, "grad_norm": 0.0455317497253418, "learning_rate": 0.00014794095143795853, "loss": 0.3029, "step": 16505 }, { "epoch": 1.3371678548282566, "grad_norm": 0.05330074205994606, "learning_rate": 0.00014793645078536389, "loss": 0.2914, "step": 16506 }, { "epoch": 1.337248865845755, "grad_norm": 0.04478881135582924, "learning_rate": 0.00014793195013276925, "loss": 0.2997, "step": 16507 }, { "epoch": 1.3373298768632533, "grad_norm": 0.05366770923137665, "learning_rate": 0.00014792744948017463, "loss": 0.2951, "step": 16508 }, { "epoch": 1.3374108878807518, "grad_norm": 0.053794004023075104, "learning_rate": 0.00014792294882758, "loss": 0.2707, "step": 16509 }, { "epoch": 1.33749189889825, "grad_norm": 0.04974134638905525, "learning_rate": 0.00014791844817498538, "loss": 0.3038, "step": 16510 }, { "epoch": 1.3375729099157485, "grad_norm": 0.05007876455783844, "learning_rate": 0.00014791394752239077, "loss": 0.2686, "step": 16511 }, { "epoch": 1.337653920933247, "grad_norm": 0.05567089840769768, "learning_rate": 0.00014790944686979613, "loss": 0.298, "step": 16512 }, { "epoch": 1.3377349319507452, "grad_norm": 0.05598515644669533, "learning_rate": 0.00014790494621720151, "loss": 0.339, "step": 16513 }, { "epoch": 1.3378159429682437, "grad_norm": 0.04248699173331261, "learning_rate": 0.00014790044556460687, "loss": 0.3005, "step": 16514 }, { "epoch": 1.3378969539857422, "grad_norm": 0.039727192372083664, "learning_rate": 0.00014789594491201223, "loss": 0.2775, "step": 16515 }, { "epoch": 1.3379779650032404, "grad_norm": 0.04616788029670715, "learning_rate": 0.00014789144425941762, "loss": 0.305, "step": 16516 }, { "epoch": 1.3380589760207389, "grad_norm": 0.04732425510883331, "learning_rate": 0.000147886943606823, "loss": 0.3015, "step": 16517 }, { "epoch": 1.3381399870382373, "grad_norm": 0.04137314856052399, "learning_rate": 0.00014788244295422837, "loss": 0.2626, "step": 16518 }, { "epoch": 1.3382209980557356, "grad_norm": 0.04044054448604584, "learning_rate": 0.00014787794230163376, "loss": 0.2897, "step": 16519 }, { "epoch": 1.3383020090732338, "grad_norm": 0.05046490207314491, "learning_rate": 0.00014787344164903912, "loss": 0.2944, "step": 16520 }, { "epoch": 1.3383830200907323, "grad_norm": 0.05168326944112778, "learning_rate": 0.00014786894099644448, "loss": 0.2951, "step": 16521 }, { "epoch": 1.3384640311082308, "grad_norm": 0.04427158087491989, "learning_rate": 0.00014786444034384986, "loss": 0.3418, "step": 16522 }, { "epoch": 1.338545042125729, "grad_norm": 0.04621238633990288, "learning_rate": 0.00014785993969125525, "loss": 0.3099, "step": 16523 }, { "epoch": 1.3386260531432275, "grad_norm": 0.04961875081062317, "learning_rate": 0.0001478554390386606, "loss": 0.3417, "step": 16524 }, { "epoch": 1.338707064160726, "grad_norm": 0.041570305824279785, "learning_rate": 0.000147850938386066, "loss": 0.2956, "step": 16525 }, { "epoch": 1.3387880751782242, "grad_norm": 0.04750753566622734, "learning_rate": 0.00014784643773347136, "loss": 0.3531, "step": 16526 }, { "epoch": 1.3388690861957226, "grad_norm": 0.041701339185237885, "learning_rate": 0.00014784193708087672, "loss": 0.2812, "step": 16527 }, { "epoch": 1.3389500972132211, "grad_norm": 0.04579732567071915, "learning_rate": 0.0001478374364282821, "loss": 0.2792, "step": 16528 }, { "epoch": 1.3390311082307194, "grad_norm": 0.048295967280864716, "learning_rate": 0.0001478329357756875, "loss": 0.3072, "step": 16529 }, { "epoch": 1.3391121192482178, "grad_norm": 0.053043704479932785, "learning_rate": 0.00014782843512309285, "loss": 0.3387, "step": 16530 }, { "epoch": 1.339193130265716, "grad_norm": 0.04526020213961601, "learning_rate": 0.00014782393447049824, "loss": 0.2962, "step": 16531 }, { "epoch": 1.3392741412832145, "grad_norm": 0.04586252570152283, "learning_rate": 0.0001478194338179036, "loss": 0.2838, "step": 16532 }, { "epoch": 1.3393551523007128, "grad_norm": 0.043184127658605576, "learning_rate": 0.00014781493316530896, "loss": 0.2821, "step": 16533 }, { "epoch": 1.3394361633182112, "grad_norm": 0.04876738041639328, "learning_rate": 0.00014781043251271435, "loss": 0.3037, "step": 16534 }, { "epoch": 1.3395171743357097, "grad_norm": 0.04843151569366455, "learning_rate": 0.00014780593186011973, "loss": 0.2878, "step": 16535 }, { "epoch": 1.339598185353208, "grad_norm": 0.04222843796014786, "learning_rate": 0.0001478014312075251, "loss": 0.2696, "step": 16536 }, { "epoch": 1.3396791963707064, "grad_norm": 0.04102891683578491, "learning_rate": 0.00014779693055493048, "loss": 0.2983, "step": 16537 }, { "epoch": 1.339760207388205, "grad_norm": 0.04077741503715515, "learning_rate": 0.00014779242990233584, "loss": 0.2883, "step": 16538 }, { "epoch": 1.3398412184057031, "grad_norm": 0.0449368953704834, "learning_rate": 0.00014778792924974123, "loss": 0.2648, "step": 16539 }, { "epoch": 1.3399222294232016, "grad_norm": 0.04294194281101227, "learning_rate": 0.0001477834285971466, "loss": 0.2885, "step": 16540 }, { "epoch": 1.3400032404407, "grad_norm": 0.048157889395952225, "learning_rate": 0.00014777892794455198, "loss": 0.3053, "step": 16541 }, { "epoch": 1.3400842514581983, "grad_norm": 0.043538760393857956, "learning_rate": 0.00014777442729195734, "loss": 0.283, "step": 16542 }, { "epoch": 1.3401652624756966, "grad_norm": 0.04985831677913666, "learning_rate": 0.00014776992663936272, "loss": 0.339, "step": 16543 }, { "epoch": 1.340246273493195, "grad_norm": 0.04058707505464554, "learning_rate": 0.00014776542598676808, "loss": 0.3098, "step": 16544 }, { "epoch": 1.3403272845106935, "grad_norm": 0.04834947735071182, "learning_rate": 0.00014776092533417347, "loss": 0.3083, "step": 16545 }, { "epoch": 1.3404082955281917, "grad_norm": 0.04549410566687584, "learning_rate": 0.00014775642468157883, "loss": 0.2795, "step": 16546 }, { "epoch": 1.3404893065456902, "grad_norm": 0.05225663259625435, "learning_rate": 0.00014775192402898422, "loss": 0.3025, "step": 16547 }, { "epoch": 1.3405703175631887, "grad_norm": 0.04144188389182091, "learning_rate": 0.00014774742337638958, "loss": 0.2712, "step": 16548 }, { "epoch": 1.340651328580687, "grad_norm": 0.04098622500896454, "learning_rate": 0.00014774292272379496, "loss": 0.282, "step": 16549 }, { "epoch": 1.3407323395981854, "grad_norm": 0.0445941723883152, "learning_rate": 0.00014773842207120032, "loss": 0.3265, "step": 16550 }, { "epoch": 1.3408133506156839, "grad_norm": 0.04298444464802742, "learning_rate": 0.0001477339214186057, "loss": 0.2892, "step": 16551 }, { "epoch": 1.340894361633182, "grad_norm": 0.04422687739133835, "learning_rate": 0.00014772942076601107, "loss": 0.289, "step": 16552 }, { "epoch": 1.3409753726506806, "grad_norm": 0.04136590287089348, "learning_rate": 0.00014772492011341646, "loss": 0.2734, "step": 16553 }, { "epoch": 1.3410563836681788, "grad_norm": 0.03996701166033745, "learning_rate": 0.00014772041946082182, "loss": 0.2978, "step": 16554 }, { "epoch": 1.3411373946856773, "grad_norm": 0.03591948375105858, "learning_rate": 0.0001477159188082272, "loss": 0.2585, "step": 16555 }, { "epoch": 1.3412184057031755, "grad_norm": 0.045594893395900726, "learning_rate": 0.00014771141815563257, "loss": 0.3528, "step": 16556 }, { "epoch": 1.341299416720674, "grad_norm": 0.052428532391786575, "learning_rate": 0.00014770691750303795, "loss": 0.3203, "step": 16557 }, { "epoch": 1.3413804277381725, "grad_norm": 0.04628319665789604, "learning_rate": 0.0001477024168504433, "loss": 0.2687, "step": 16558 }, { "epoch": 1.3414614387556707, "grad_norm": 0.04328621178865433, "learning_rate": 0.0001476979161978487, "loss": 0.2834, "step": 16559 }, { "epoch": 1.3415424497731692, "grad_norm": 0.044492077082395554, "learning_rate": 0.00014769341554525406, "loss": 0.317, "step": 16560 }, { "epoch": 1.3416234607906676, "grad_norm": 0.039298199117183685, "learning_rate": 0.00014768891489265945, "loss": 0.2663, "step": 16561 }, { "epoch": 1.3417044718081659, "grad_norm": 0.04610498994588852, "learning_rate": 0.0001476844142400648, "loss": 0.2967, "step": 16562 }, { "epoch": 1.3417854828256643, "grad_norm": 0.04996765777468681, "learning_rate": 0.0001476799135874702, "loss": 0.3204, "step": 16563 }, { "epoch": 1.3418664938431626, "grad_norm": 0.04526286572217941, "learning_rate": 0.00014767541293487555, "loss": 0.3328, "step": 16564 }, { "epoch": 1.341947504860661, "grad_norm": 0.04287734255194664, "learning_rate": 0.00014767091228228094, "loss": 0.2392, "step": 16565 }, { "epoch": 1.3420285158781593, "grad_norm": 0.049837589263916016, "learning_rate": 0.0001476664116296863, "loss": 0.2849, "step": 16566 }, { "epoch": 1.3421095268956578, "grad_norm": 0.04827631637454033, "learning_rate": 0.0001476619109770917, "loss": 0.3377, "step": 16567 }, { "epoch": 1.3421905379131562, "grad_norm": 0.048483602702617645, "learning_rate": 0.00014765741032449708, "loss": 0.3081, "step": 16568 }, { "epoch": 1.3422715489306545, "grad_norm": 0.04682917147874832, "learning_rate": 0.00014765290967190244, "loss": 0.3185, "step": 16569 }, { "epoch": 1.342352559948153, "grad_norm": 0.04458910971879959, "learning_rate": 0.0001476484090193078, "loss": 0.3021, "step": 16570 }, { "epoch": 1.3424335709656514, "grad_norm": 0.04497900605201721, "learning_rate": 0.00014764390836671318, "loss": 0.3189, "step": 16571 }, { "epoch": 1.3425145819831497, "grad_norm": 0.039753060787916183, "learning_rate": 0.00014763940771411854, "loss": 0.2651, "step": 16572 }, { "epoch": 1.3425955930006481, "grad_norm": 0.05053701251745224, "learning_rate": 0.00014763490706152393, "loss": 0.3575, "step": 16573 }, { "epoch": 1.3426766040181466, "grad_norm": 0.04674624651670456, "learning_rate": 0.00014763040640892932, "loss": 0.2702, "step": 16574 }, { "epoch": 1.3427576150356448, "grad_norm": 0.0462510772049427, "learning_rate": 0.00014762590575633468, "loss": 0.2925, "step": 16575 }, { "epoch": 1.3428386260531433, "grad_norm": 0.04673721268773079, "learning_rate": 0.00014762140510374004, "loss": 0.2746, "step": 16576 }, { "epoch": 1.3429196370706415, "grad_norm": 0.043024204671382904, "learning_rate": 0.00014761690445114543, "loss": 0.3132, "step": 16577 }, { "epoch": 1.34300064808814, "grad_norm": 0.040180426090955734, "learning_rate": 0.00014761240379855079, "loss": 0.3054, "step": 16578 }, { "epoch": 1.3430816591056383, "grad_norm": 0.05153704062104225, "learning_rate": 0.00014760790314595617, "loss": 0.3107, "step": 16579 }, { "epoch": 1.3431626701231367, "grad_norm": 0.04208536073565483, "learning_rate": 0.00014760340249336156, "loss": 0.2981, "step": 16580 }, { "epoch": 1.3432436811406352, "grad_norm": 0.04051988944411278, "learning_rate": 0.00014759890184076692, "loss": 0.3113, "step": 16581 }, { "epoch": 1.3433246921581334, "grad_norm": 0.0628843903541565, "learning_rate": 0.0001475944011881723, "loss": 0.2698, "step": 16582 }, { "epoch": 1.343405703175632, "grad_norm": 0.04788912460207939, "learning_rate": 0.00014758990053557767, "loss": 0.3223, "step": 16583 }, { "epoch": 1.3434867141931304, "grad_norm": 0.042176347225904465, "learning_rate": 0.00014758539988298303, "loss": 0.2468, "step": 16584 }, { "epoch": 1.3435677252106286, "grad_norm": 0.03956156224012375, "learning_rate": 0.00014758089923038841, "loss": 0.3001, "step": 16585 }, { "epoch": 1.343648736228127, "grad_norm": 0.04439728707075119, "learning_rate": 0.0001475763985777938, "loss": 0.3086, "step": 16586 }, { "epoch": 1.3437297472456253, "grad_norm": 0.04363232105970383, "learning_rate": 0.00014757189792519916, "loss": 0.2737, "step": 16587 }, { "epoch": 1.3438107582631238, "grad_norm": 0.05417967215180397, "learning_rate": 0.00014756739727260455, "loss": 0.3214, "step": 16588 }, { "epoch": 1.343891769280622, "grad_norm": 0.03835189715027809, "learning_rate": 0.0001475628966200099, "loss": 0.265, "step": 16589 }, { "epoch": 1.3439727802981205, "grad_norm": 0.05207205191254616, "learning_rate": 0.00014755839596741527, "loss": 0.3273, "step": 16590 }, { "epoch": 1.344053791315619, "grad_norm": 0.04198315367102623, "learning_rate": 0.00014755389531482066, "loss": 0.2906, "step": 16591 }, { "epoch": 1.3441348023331172, "grad_norm": 0.04492257907986641, "learning_rate": 0.00014754939466222604, "loss": 0.2879, "step": 16592 }, { "epoch": 1.3442158133506157, "grad_norm": 0.04414338245987892, "learning_rate": 0.0001475448940096314, "loss": 0.2829, "step": 16593 }, { "epoch": 1.3442968243681142, "grad_norm": 0.036125894635915756, "learning_rate": 0.0001475403933570368, "loss": 0.2216, "step": 16594 }, { "epoch": 1.3443778353856124, "grad_norm": 0.04091149568557739, "learning_rate": 0.00014753589270444215, "loss": 0.2783, "step": 16595 }, { "epoch": 1.3444588464031109, "grad_norm": 0.04277289658784866, "learning_rate": 0.0001475313920518475, "loss": 0.279, "step": 16596 }, { "epoch": 1.3445398574206093, "grad_norm": 0.04748170077800751, "learning_rate": 0.0001475268913992529, "loss": 0.2978, "step": 16597 }, { "epoch": 1.3446208684381076, "grad_norm": 0.049579180777072906, "learning_rate": 0.00014752239074665828, "loss": 0.2737, "step": 16598 }, { "epoch": 1.344701879455606, "grad_norm": 0.055698081851005554, "learning_rate": 0.00014751789009406364, "loss": 0.3412, "step": 16599 }, { "epoch": 1.3447828904731043, "grad_norm": 0.04331175982952118, "learning_rate": 0.00014751338944146903, "loss": 0.3004, "step": 16600 }, { "epoch": 1.3448639014906028, "grad_norm": 0.04292542114853859, "learning_rate": 0.0001475088887888744, "loss": 0.2767, "step": 16601 }, { "epoch": 1.344944912508101, "grad_norm": 0.04305851086974144, "learning_rate": 0.00014750438813627975, "loss": 0.2672, "step": 16602 }, { "epoch": 1.3450259235255995, "grad_norm": 0.05051714926958084, "learning_rate": 0.00014749988748368514, "loss": 0.302, "step": 16603 }, { "epoch": 1.345106934543098, "grad_norm": 0.05231226980686188, "learning_rate": 0.00014749538683109053, "loss": 0.3682, "step": 16604 }, { "epoch": 1.3451879455605962, "grad_norm": 0.052741702646017075, "learning_rate": 0.00014749088617849589, "loss": 0.3041, "step": 16605 }, { "epoch": 1.3452689565780946, "grad_norm": 0.041467368602752686, "learning_rate": 0.00014748638552590127, "loss": 0.3073, "step": 16606 }, { "epoch": 1.345349967595593, "grad_norm": 0.048310671001672745, "learning_rate": 0.00014748188487330663, "loss": 0.2952, "step": 16607 }, { "epoch": 1.3454309786130914, "grad_norm": 0.043791867792606354, "learning_rate": 0.000147477384220712, "loss": 0.2843, "step": 16608 }, { "epoch": 1.3455119896305898, "grad_norm": 0.04615228623151779, "learning_rate": 0.00014747288356811738, "loss": 0.3369, "step": 16609 }, { "epoch": 1.345593000648088, "grad_norm": 0.04794533923268318, "learning_rate": 0.00014746838291552277, "loss": 0.2995, "step": 16610 }, { "epoch": 1.3456740116655865, "grad_norm": 0.03745264932513237, "learning_rate": 0.00014746388226292813, "loss": 0.2865, "step": 16611 }, { "epoch": 1.3457550226830848, "grad_norm": 0.04993395879864693, "learning_rate": 0.00014745938161033351, "loss": 0.341, "step": 16612 }, { "epoch": 1.3458360337005832, "grad_norm": 0.053492747247219086, "learning_rate": 0.00014745488095773887, "loss": 0.3089, "step": 16613 }, { "epoch": 1.3459170447180817, "grad_norm": 0.04382877051830292, "learning_rate": 0.00014745038030514423, "loss": 0.2897, "step": 16614 }, { "epoch": 1.34599805573558, "grad_norm": 0.039354801177978516, "learning_rate": 0.00014744587965254962, "loss": 0.2673, "step": 16615 }, { "epoch": 1.3460790667530784, "grad_norm": 0.03777182474732399, "learning_rate": 0.000147441378999955, "loss": 0.2716, "step": 16616 }, { "epoch": 1.346160077770577, "grad_norm": 0.043617118149995804, "learning_rate": 0.00014743687834736037, "loss": 0.2819, "step": 16617 }, { "epoch": 1.3462410887880751, "grad_norm": 0.0481729619204998, "learning_rate": 0.00014743237769476576, "loss": 0.2864, "step": 16618 }, { "epoch": 1.3463220998055736, "grad_norm": 0.048706311732530594, "learning_rate": 0.00014742787704217112, "loss": 0.3245, "step": 16619 }, { "epoch": 1.346403110823072, "grad_norm": 0.046131476759910583, "learning_rate": 0.0001474233763895765, "loss": 0.2945, "step": 16620 }, { "epoch": 1.3464841218405703, "grad_norm": 0.049588076770305634, "learning_rate": 0.00014741887573698186, "loss": 0.2744, "step": 16621 }, { "epoch": 1.3465651328580686, "grad_norm": 0.045150045305490494, "learning_rate": 0.00014741437508438725, "loss": 0.3076, "step": 16622 }, { "epoch": 1.346646143875567, "grad_norm": 0.03973205387592316, "learning_rate": 0.0001474098744317926, "loss": 0.2607, "step": 16623 }, { "epoch": 1.3467271548930655, "grad_norm": 0.042886774986982346, "learning_rate": 0.000147405373779198, "loss": 0.2928, "step": 16624 }, { "epoch": 1.3468081659105637, "grad_norm": 0.046221207827329636, "learning_rate": 0.00014740087312660336, "loss": 0.3033, "step": 16625 }, { "epoch": 1.3468891769280622, "grad_norm": 0.04569321125745773, "learning_rate": 0.00014739637247400875, "loss": 0.3301, "step": 16626 }, { "epoch": 1.3469701879455607, "grad_norm": 0.049562666565179825, "learning_rate": 0.0001473918718214141, "loss": 0.3283, "step": 16627 }, { "epoch": 1.347051198963059, "grad_norm": 0.042315803468227386, "learning_rate": 0.0001473873711688195, "loss": 0.2909, "step": 16628 }, { "epoch": 1.3471322099805574, "grad_norm": 0.04370975121855736, "learning_rate": 0.00014738287051622485, "loss": 0.3323, "step": 16629 }, { "epoch": 1.3472132209980558, "grad_norm": 0.05320056900382042, "learning_rate": 0.00014737836986363024, "loss": 0.3371, "step": 16630 }, { "epoch": 1.347294232015554, "grad_norm": 0.05056382715702057, "learning_rate": 0.0001473738692110356, "loss": 0.3028, "step": 16631 }, { "epoch": 1.3473752430330526, "grad_norm": 0.04404057189822197, "learning_rate": 0.000147369368558441, "loss": 0.3013, "step": 16632 }, { "epoch": 1.3474562540505508, "grad_norm": 0.04282480478286743, "learning_rate": 0.00014736486790584635, "loss": 0.2595, "step": 16633 }, { "epoch": 1.3475372650680493, "grad_norm": 0.04623326659202576, "learning_rate": 0.00014736036725325173, "loss": 0.294, "step": 16634 }, { "epoch": 1.3476182760855475, "grad_norm": 0.050502434372901917, "learning_rate": 0.0001473558666006571, "loss": 0.3359, "step": 16635 }, { "epoch": 1.347699287103046, "grad_norm": 0.04713571071624756, "learning_rate": 0.00014735136594806248, "loss": 0.3093, "step": 16636 }, { "epoch": 1.3477802981205445, "grad_norm": 0.04389585182070732, "learning_rate": 0.00014734686529546784, "loss": 0.3002, "step": 16637 }, { "epoch": 1.3478613091380427, "grad_norm": 0.04367394745349884, "learning_rate": 0.00014734236464287323, "loss": 0.2863, "step": 16638 }, { "epoch": 1.3479423201555412, "grad_norm": 0.04276891425251961, "learning_rate": 0.0001473378639902786, "loss": 0.3023, "step": 16639 }, { "epoch": 1.3480233311730396, "grad_norm": 0.06252908706665039, "learning_rate": 0.00014733336333768398, "loss": 0.353, "step": 16640 }, { "epoch": 1.3481043421905379, "grad_norm": 0.04885753616690636, "learning_rate": 0.00014732886268508934, "loss": 0.2753, "step": 16641 }, { "epoch": 1.3481853532080363, "grad_norm": 0.04044380784034729, "learning_rate": 0.00014732436203249472, "loss": 0.2673, "step": 16642 }, { "epoch": 1.3482663642255348, "grad_norm": 0.04267757385969162, "learning_rate": 0.00014731986137990008, "loss": 0.2644, "step": 16643 }, { "epoch": 1.348347375243033, "grad_norm": 0.04965035989880562, "learning_rate": 0.00014731536072730547, "loss": 0.3132, "step": 16644 }, { "epoch": 1.3484283862605313, "grad_norm": 0.04539056494832039, "learning_rate": 0.00014731086007471086, "loss": 0.2785, "step": 16645 }, { "epoch": 1.3485093972780298, "grad_norm": 0.04558189958333969, "learning_rate": 0.00014730635942211622, "loss": 0.3345, "step": 16646 }, { "epoch": 1.3485904082955282, "grad_norm": 0.048003729432821274, "learning_rate": 0.00014730185876952158, "loss": 0.2813, "step": 16647 }, { "epoch": 1.3486714193130265, "grad_norm": 0.04879504814743996, "learning_rate": 0.00014729735811692696, "loss": 0.3074, "step": 16648 }, { "epoch": 1.348752430330525, "grad_norm": 0.0417785607278347, "learning_rate": 0.00014729285746433235, "loss": 0.2859, "step": 16649 }, { "epoch": 1.3488334413480234, "grad_norm": 0.042382605373859406, "learning_rate": 0.0001472883568117377, "loss": 0.3068, "step": 16650 }, { "epoch": 1.3489144523655217, "grad_norm": 0.05004865303635597, "learning_rate": 0.0001472838561591431, "loss": 0.3222, "step": 16651 }, { "epoch": 1.3489954633830201, "grad_norm": 0.0468045249581337, "learning_rate": 0.00014727935550654846, "loss": 0.315, "step": 16652 }, { "epoch": 1.3490764744005186, "grad_norm": 0.040945470333099365, "learning_rate": 0.00014727485485395382, "loss": 0.3163, "step": 16653 }, { "epoch": 1.3491574854180168, "grad_norm": 0.03802323713898659, "learning_rate": 0.0001472703542013592, "loss": 0.2759, "step": 16654 }, { "epoch": 1.3492384964355153, "grad_norm": 0.04716533049941063, "learning_rate": 0.0001472658535487646, "loss": 0.3159, "step": 16655 }, { "epoch": 1.3493195074530135, "grad_norm": 0.056089192628860474, "learning_rate": 0.00014726135289616995, "loss": 0.3586, "step": 16656 }, { "epoch": 1.349400518470512, "grad_norm": 0.042343322187662125, "learning_rate": 0.00014725685224357534, "loss": 0.3355, "step": 16657 }, { "epoch": 1.3494815294880103, "grad_norm": 0.045325689017772675, "learning_rate": 0.0001472523515909807, "loss": 0.2954, "step": 16658 }, { "epoch": 1.3495625405055087, "grad_norm": 0.04668663069605827, "learning_rate": 0.00014724785093838606, "loss": 0.2666, "step": 16659 }, { "epoch": 1.3496435515230072, "grad_norm": 0.044138941913843155, "learning_rate": 0.00014724335028579145, "loss": 0.2783, "step": 16660 }, { "epoch": 1.3497245625405054, "grad_norm": 0.04154333844780922, "learning_rate": 0.00014723884963319683, "loss": 0.3154, "step": 16661 }, { "epoch": 1.349805573558004, "grad_norm": 0.04226982966065407, "learning_rate": 0.0001472343489806022, "loss": 0.2385, "step": 16662 }, { "epoch": 1.3498865845755024, "grad_norm": 0.040852706879377365, "learning_rate": 0.00014722984832800758, "loss": 0.2797, "step": 16663 }, { "epoch": 1.3499675955930006, "grad_norm": 0.04127061739563942, "learning_rate": 0.00014722534767541294, "loss": 0.3099, "step": 16664 }, { "epoch": 1.350048606610499, "grad_norm": 0.0487934872508049, "learning_rate": 0.0001472208470228183, "loss": 0.2807, "step": 16665 }, { "epoch": 1.3501296176279973, "grad_norm": 0.04939623177051544, "learning_rate": 0.0001472163463702237, "loss": 0.3383, "step": 16666 }, { "epoch": 1.3502106286454958, "grad_norm": 0.05318183824419975, "learning_rate": 0.00014721184571762908, "loss": 0.337, "step": 16667 }, { "epoch": 1.350291639662994, "grad_norm": 0.03577316924929619, "learning_rate": 0.00014720734506503444, "loss": 0.2556, "step": 16668 }, { "epoch": 1.3503726506804925, "grad_norm": 0.04493138939142227, "learning_rate": 0.00014720284441243982, "loss": 0.2872, "step": 16669 }, { "epoch": 1.350453661697991, "grad_norm": 0.03896234929561615, "learning_rate": 0.00014719834375984518, "loss": 0.284, "step": 16670 }, { "epoch": 1.3505346727154892, "grad_norm": 0.04790037125349045, "learning_rate": 0.00014719384310725054, "loss": 0.3177, "step": 16671 }, { "epoch": 1.3506156837329877, "grad_norm": 0.049241382628679276, "learning_rate": 0.00014718934245465593, "loss": 0.2927, "step": 16672 }, { "epoch": 1.3506966947504861, "grad_norm": 0.046655260026454926, "learning_rate": 0.00014718484180206132, "loss": 0.2792, "step": 16673 }, { "epoch": 1.3507777057679844, "grad_norm": 0.04560606926679611, "learning_rate": 0.00014718034114946668, "loss": 0.2893, "step": 16674 }, { "epoch": 1.3508587167854829, "grad_norm": 0.059893637895584106, "learning_rate": 0.00014717584049687207, "loss": 0.3453, "step": 16675 }, { "epoch": 1.3509397278029813, "grad_norm": 0.051331330090761185, "learning_rate": 0.00014717133984427743, "loss": 0.2729, "step": 16676 }, { "epoch": 1.3510207388204796, "grad_norm": 0.04739075154066086, "learning_rate": 0.00014716683919168279, "loss": 0.2542, "step": 16677 }, { "epoch": 1.351101749837978, "grad_norm": 0.05445697903633118, "learning_rate": 0.00014716233853908817, "loss": 0.2887, "step": 16678 }, { "epoch": 1.3511827608554763, "grad_norm": 0.04767782613635063, "learning_rate": 0.00014715783788649356, "loss": 0.3112, "step": 16679 }, { "epoch": 1.3512637718729748, "grad_norm": 0.04585118964314461, "learning_rate": 0.00014715333723389892, "loss": 0.3262, "step": 16680 }, { "epoch": 1.351344782890473, "grad_norm": 0.04599033296108246, "learning_rate": 0.0001471488365813043, "loss": 0.2791, "step": 16681 }, { "epoch": 1.3514257939079715, "grad_norm": 0.041311413049697876, "learning_rate": 0.00014714433592870967, "loss": 0.2836, "step": 16682 }, { "epoch": 1.35150680492547, "grad_norm": 0.04801380634307861, "learning_rate": 0.00014713983527611503, "loss": 0.2978, "step": 16683 }, { "epoch": 1.3515878159429682, "grad_norm": 0.04437221586704254, "learning_rate": 0.00014713533462352041, "loss": 0.3324, "step": 16684 }, { "epoch": 1.3516688269604666, "grad_norm": 0.042100001126527786, "learning_rate": 0.0001471308339709258, "loss": 0.2648, "step": 16685 }, { "epoch": 1.351749837977965, "grad_norm": 0.04318871721625328, "learning_rate": 0.00014712633331833116, "loss": 0.311, "step": 16686 }, { "epoch": 1.3518308489954634, "grad_norm": 0.037165261805057526, "learning_rate": 0.00014712183266573655, "loss": 0.2695, "step": 16687 }, { "epoch": 1.3519118600129618, "grad_norm": 0.05104199796915054, "learning_rate": 0.0001471173320131419, "loss": 0.3237, "step": 16688 }, { "epoch": 1.35199287103046, "grad_norm": 0.05252790078520775, "learning_rate": 0.00014711283136054727, "loss": 0.3422, "step": 16689 }, { "epoch": 1.3520738820479585, "grad_norm": 0.04297966510057449, "learning_rate": 0.00014710833070795266, "loss": 0.3139, "step": 16690 }, { "epoch": 1.3521548930654568, "grad_norm": 0.04057961329817772, "learning_rate": 0.00014710383005535804, "loss": 0.2905, "step": 16691 }, { "epoch": 1.3522359040829552, "grad_norm": 0.043488454073667526, "learning_rate": 0.0001470993294027634, "loss": 0.2831, "step": 16692 }, { "epoch": 1.3523169151004537, "grad_norm": 0.04362538829445839, "learning_rate": 0.0001470948287501688, "loss": 0.31, "step": 16693 }, { "epoch": 1.352397926117952, "grad_norm": 0.05452515929937363, "learning_rate": 0.00014709032809757415, "loss": 0.3327, "step": 16694 }, { "epoch": 1.3524789371354504, "grad_norm": 0.0442318469285965, "learning_rate": 0.0001470858274449795, "loss": 0.2992, "step": 16695 }, { "epoch": 1.3525599481529489, "grad_norm": 0.04387791454792023, "learning_rate": 0.0001470813267923849, "loss": 0.315, "step": 16696 }, { "epoch": 1.3526409591704471, "grad_norm": 0.03960513323545456, "learning_rate": 0.00014707682613979028, "loss": 0.277, "step": 16697 }, { "epoch": 1.3527219701879456, "grad_norm": 0.05234871059656143, "learning_rate": 0.00014707232548719564, "loss": 0.3153, "step": 16698 }, { "epoch": 1.352802981205444, "grad_norm": 0.04601548984646797, "learning_rate": 0.00014706782483460103, "loss": 0.2602, "step": 16699 }, { "epoch": 1.3528839922229423, "grad_norm": 0.0502309650182724, "learning_rate": 0.0001470633241820064, "loss": 0.3246, "step": 16700 }, { "epoch": 1.3529650032404408, "grad_norm": 0.04587402939796448, "learning_rate": 0.00014705882352941178, "loss": 0.2925, "step": 16701 }, { "epoch": 1.353046014257939, "grad_norm": 0.04139650985598564, "learning_rate": 0.00014705432287681714, "loss": 0.305, "step": 16702 }, { "epoch": 1.3531270252754375, "grad_norm": 0.04805470257997513, "learning_rate": 0.00014704982222422253, "loss": 0.3126, "step": 16703 }, { "epoch": 1.3532080362929357, "grad_norm": 0.04224188253283501, "learning_rate": 0.00014704532157162789, "loss": 0.273, "step": 16704 }, { "epoch": 1.3532890473104342, "grad_norm": 0.04937662184238434, "learning_rate": 0.00014704082091903327, "loss": 0.3476, "step": 16705 }, { "epoch": 1.3533700583279327, "grad_norm": 0.0471457913517952, "learning_rate": 0.00014703632026643863, "loss": 0.3152, "step": 16706 }, { "epoch": 1.353451069345431, "grad_norm": 0.04233188554644585, "learning_rate": 0.00014703181961384402, "loss": 0.2936, "step": 16707 }, { "epoch": 1.3535320803629294, "grad_norm": 0.04140879586338997, "learning_rate": 0.00014702731896124938, "loss": 0.2541, "step": 16708 }, { "epoch": 1.3536130913804278, "grad_norm": 0.050857096910476685, "learning_rate": 0.00014702281830865477, "loss": 0.3222, "step": 16709 }, { "epoch": 1.353694102397926, "grad_norm": 0.04600680246949196, "learning_rate": 0.00014701831765606013, "loss": 0.2904, "step": 16710 }, { "epoch": 1.3537751134154246, "grad_norm": 0.04443364217877388, "learning_rate": 0.00014701381700346551, "loss": 0.3104, "step": 16711 }, { "epoch": 1.3538561244329228, "grad_norm": 0.08359038829803467, "learning_rate": 0.00014700931635087088, "loss": 0.3043, "step": 16712 }, { "epoch": 1.3539371354504213, "grad_norm": 0.04045131430029869, "learning_rate": 0.00014700481569827626, "loss": 0.3045, "step": 16713 }, { "epoch": 1.3540181464679195, "grad_norm": 0.04331865534186363, "learning_rate": 0.00014700031504568165, "loss": 0.3371, "step": 16714 }, { "epoch": 1.354099157485418, "grad_norm": 0.039291027933359146, "learning_rate": 0.000146995814393087, "loss": 0.2393, "step": 16715 }, { "epoch": 1.3541801685029164, "grad_norm": 0.043209612369537354, "learning_rate": 0.00014699131374049237, "loss": 0.3281, "step": 16716 }, { "epoch": 1.3542611795204147, "grad_norm": 0.04783690720796585, "learning_rate": 0.00014698681308789776, "loss": 0.3078, "step": 16717 }, { "epoch": 1.3543421905379132, "grad_norm": 0.04343714192509651, "learning_rate": 0.00014698231243530312, "loss": 0.2993, "step": 16718 }, { "epoch": 1.3544232015554116, "grad_norm": 0.04262785241007805, "learning_rate": 0.0001469778117827085, "loss": 0.2941, "step": 16719 }, { "epoch": 1.3545042125729099, "grad_norm": 0.045166414231061935, "learning_rate": 0.0001469733111301139, "loss": 0.2887, "step": 16720 }, { "epoch": 1.3545852235904083, "grad_norm": 0.04084278643131256, "learning_rate": 0.00014696881047751925, "loss": 0.268, "step": 16721 }, { "epoch": 1.3546662346079068, "grad_norm": 0.042416613548994064, "learning_rate": 0.0001469643098249246, "loss": 0.2828, "step": 16722 }, { "epoch": 1.354747245625405, "grad_norm": 0.05023491382598877, "learning_rate": 0.00014695980917233, "loss": 0.2986, "step": 16723 }, { "epoch": 1.3548282566429035, "grad_norm": 0.04192998260259628, "learning_rate": 0.00014695530851973539, "loss": 0.2799, "step": 16724 }, { "epoch": 1.3549092676604018, "grad_norm": 0.04842350259423256, "learning_rate": 0.00014695080786714075, "loss": 0.3371, "step": 16725 }, { "epoch": 1.3549902786779002, "grad_norm": 0.0444394126534462, "learning_rate": 0.00014694630721454613, "loss": 0.3015, "step": 16726 }, { "epoch": 1.3550712896953985, "grad_norm": 0.04761913791298866, "learning_rate": 0.0001469418065619515, "loss": 0.3308, "step": 16727 }, { "epoch": 1.355152300712897, "grad_norm": 0.04710664227604866, "learning_rate": 0.00014693730590935685, "loss": 0.2814, "step": 16728 }, { "epoch": 1.3552333117303954, "grad_norm": 0.04607471823692322, "learning_rate": 0.00014693280525676224, "loss": 0.3356, "step": 16729 }, { "epoch": 1.3553143227478937, "grad_norm": 0.0481552891433239, "learning_rate": 0.00014692830460416763, "loss": 0.291, "step": 16730 }, { "epoch": 1.3553953337653921, "grad_norm": 0.045392464846372604, "learning_rate": 0.000146923803951573, "loss": 0.2821, "step": 16731 }, { "epoch": 1.3554763447828906, "grad_norm": 0.045290619134902954, "learning_rate": 0.00014691930329897837, "loss": 0.3186, "step": 16732 }, { "epoch": 1.3555573558003888, "grad_norm": 0.045965004712343216, "learning_rate": 0.00014691480264638373, "loss": 0.3045, "step": 16733 }, { "epoch": 1.3556383668178873, "grad_norm": 0.054191283881664276, "learning_rate": 0.0001469103019937891, "loss": 0.3299, "step": 16734 }, { "epoch": 1.3557193778353855, "grad_norm": 0.04757121577858925, "learning_rate": 0.00014690580134119448, "loss": 0.3076, "step": 16735 }, { "epoch": 1.355800388852884, "grad_norm": 0.05400211736559868, "learning_rate": 0.00014690130068859987, "loss": 0.3373, "step": 16736 }, { "epoch": 1.3558813998703823, "grad_norm": 0.05310973897576332, "learning_rate": 0.00014689680003600523, "loss": 0.3002, "step": 16737 }, { "epoch": 1.3559624108878807, "grad_norm": 0.03847629949450493, "learning_rate": 0.00014689229938341062, "loss": 0.2417, "step": 16738 }, { "epoch": 1.3560434219053792, "grad_norm": 0.04777615889906883, "learning_rate": 0.00014688779873081598, "loss": 0.2763, "step": 16739 }, { "epoch": 1.3561244329228774, "grad_norm": 0.04702061042189598, "learning_rate": 0.00014688329807822134, "loss": 0.3106, "step": 16740 }, { "epoch": 1.356205443940376, "grad_norm": 0.044273655861616135, "learning_rate": 0.00014687879742562672, "loss": 0.27, "step": 16741 }, { "epoch": 1.3562864549578744, "grad_norm": 0.04390040040016174, "learning_rate": 0.0001468742967730321, "loss": 0.3069, "step": 16742 }, { "epoch": 1.3563674659753726, "grad_norm": 0.05370044335722923, "learning_rate": 0.00014686979612043747, "loss": 0.3169, "step": 16743 }, { "epoch": 1.356448476992871, "grad_norm": 0.05036519095301628, "learning_rate": 0.00014686529546784286, "loss": 0.315, "step": 16744 }, { "epoch": 1.3565294880103695, "grad_norm": 0.04587824270129204, "learning_rate": 0.00014686079481524822, "loss": 0.3091, "step": 16745 }, { "epoch": 1.3566104990278678, "grad_norm": 0.04910546541213989, "learning_rate": 0.00014685629416265358, "loss": 0.281, "step": 16746 }, { "epoch": 1.356691510045366, "grad_norm": 0.040887534618377686, "learning_rate": 0.00014685179351005896, "loss": 0.2888, "step": 16747 }, { "epoch": 1.3567725210628645, "grad_norm": 0.03990871086716652, "learning_rate": 0.00014684729285746435, "loss": 0.2821, "step": 16748 }, { "epoch": 1.356853532080363, "grad_norm": 0.04099421203136444, "learning_rate": 0.0001468427922048697, "loss": 0.2888, "step": 16749 }, { "epoch": 1.3569345430978612, "grad_norm": 0.04588409140706062, "learning_rate": 0.0001468382915522751, "loss": 0.329, "step": 16750 }, { "epoch": 1.3570155541153597, "grad_norm": 0.052914079278707504, "learning_rate": 0.00014683379089968046, "loss": 0.3042, "step": 16751 }, { "epoch": 1.3570965651328581, "grad_norm": 0.04542308300733566, "learning_rate": 0.00014682929024708582, "loss": 0.3289, "step": 16752 }, { "epoch": 1.3571775761503564, "grad_norm": 0.048569608479738235, "learning_rate": 0.0001468247895944912, "loss": 0.3064, "step": 16753 }, { "epoch": 1.3572585871678549, "grad_norm": 0.04433464631438255, "learning_rate": 0.0001468202889418966, "loss": 0.2435, "step": 16754 }, { "epoch": 1.3573395981853533, "grad_norm": 0.052565090358257294, "learning_rate": 0.00014681578828930195, "loss": 0.3285, "step": 16755 }, { "epoch": 1.3574206092028516, "grad_norm": 0.04711638391017914, "learning_rate": 0.00014681128763670734, "loss": 0.2904, "step": 16756 }, { "epoch": 1.35750162022035, "grad_norm": 0.05769209563732147, "learning_rate": 0.0001468067869841127, "loss": 0.3118, "step": 16757 }, { "epoch": 1.3575826312378483, "grad_norm": 0.04438735172152519, "learning_rate": 0.00014680228633151806, "loss": 0.2729, "step": 16758 }, { "epoch": 1.3576636422553467, "grad_norm": 0.053901974111795425, "learning_rate": 0.00014679778567892345, "loss": 0.3609, "step": 16759 }, { "epoch": 1.357744653272845, "grad_norm": 0.04904919117689133, "learning_rate": 0.00014679328502632884, "loss": 0.2664, "step": 16760 }, { "epoch": 1.3578256642903435, "grad_norm": 0.038649268448352814, "learning_rate": 0.0001467887843737342, "loss": 0.2512, "step": 16761 }, { "epoch": 1.357906675307842, "grad_norm": 0.05013785883784294, "learning_rate": 0.00014678428372113958, "loss": 0.275, "step": 16762 }, { "epoch": 1.3579876863253402, "grad_norm": 0.05385733023285866, "learning_rate": 0.00014677978306854494, "loss": 0.2983, "step": 16763 }, { "epoch": 1.3580686973428386, "grad_norm": 0.05296805500984192, "learning_rate": 0.0001467752824159503, "loss": 0.3009, "step": 16764 }, { "epoch": 1.358149708360337, "grad_norm": 0.048822782933712006, "learning_rate": 0.0001467707817633557, "loss": 0.2802, "step": 16765 }, { "epoch": 1.3582307193778353, "grad_norm": 0.04486341029405594, "learning_rate": 0.00014676628111076108, "loss": 0.2856, "step": 16766 }, { "epoch": 1.3583117303953338, "grad_norm": 0.04700850695371628, "learning_rate": 0.00014676178045816644, "loss": 0.2815, "step": 16767 }, { "epoch": 1.3583927414128323, "grad_norm": 0.04619745537638664, "learning_rate": 0.00014675727980557182, "loss": 0.3142, "step": 16768 }, { "epoch": 1.3584737524303305, "grad_norm": 0.045293714851140976, "learning_rate": 0.00014675277915297718, "loss": 0.2886, "step": 16769 }, { "epoch": 1.3585547634478288, "grad_norm": 0.04872259870171547, "learning_rate": 0.00014674827850038254, "loss": 0.3158, "step": 16770 }, { "epoch": 1.3586357744653272, "grad_norm": 0.05652736499905586, "learning_rate": 0.00014674377784778793, "loss": 0.3662, "step": 16771 }, { "epoch": 1.3587167854828257, "grad_norm": 0.04640227183699608, "learning_rate": 0.00014673927719519332, "loss": 0.3093, "step": 16772 }, { "epoch": 1.358797796500324, "grad_norm": 0.04424037039279938, "learning_rate": 0.00014673477654259868, "loss": 0.3359, "step": 16773 }, { "epoch": 1.3588788075178224, "grad_norm": 0.048362359404563904, "learning_rate": 0.00014673027589000407, "loss": 0.2752, "step": 16774 }, { "epoch": 1.3589598185353209, "grad_norm": 0.04164674133062363, "learning_rate": 0.00014672577523740943, "loss": 0.2781, "step": 16775 }, { "epoch": 1.3590408295528191, "grad_norm": 0.04155677929520607, "learning_rate": 0.0001467212745848148, "loss": 0.2752, "step": 16776 }, { "epoch": 1.3591218405703176, "grad_norm": 0.05100404471158981, "learning_rate": 0.00014671677393222017, "loss": 0.3171, "step": 16777 }, { "epoch": 1.359202851587816, "grad_norm": 0.04038337990641594, "learning_rate": 0.00014671227327962556, "loss": 0.2684, "step": 16778 }, { "epoch": 1.3592838626053143, "grad_norm": 0.05007191002368927, "learning_rate": 0.00014670777262703092, "loss": 0.2847, "step": 16779 }, { "epoch": 1.3593648736228128, "grad_norm": 0.04454847052693367, "learning_rate": 0.0001467032719744363, "loss": 0.278, "step": 16780 }, { "epoch": 1.359445884640311, "grad_norm": 0.040483422577381134, "learning_rate": 0.00014669877132184167, "loss": 0.2605, "step": 16781 }, { "epoch": 1.3595268956578095, "grad_norm": 0.04559832438826561, "learning_rate": 0.00014669427066924705, "loss": 0.3204, "step": 16782 }, { "epoch": 1.3596079066753077, "grad_norm": 0.04614581912755966, "learning_rate": 0.00014668977001665244, "loss": 0.3391, "step": 16783 }, { "epoch": 1.3596889176928062, "grad_norm": 0.047624580562114716, "learning_rate": 0.0001466852693640578, "loss": 0.302, "step": 16784 }, { "epoch": 1.3597699287103047, "grad_norm": 0.04650955647230148, "learning_rate": 0.00014668076871146316, "loss": 0.2762, "step": 16785 }, { "epoch": 1.359850939727803, "grad_norm": 0.04590458795428276, "learning_rate": 0.00014667626805886855, "loss": 0.2966, "step": 16786 }, { "epoch": 1.3599319507453014, "grad_norm": 0.04254709929227829, "learning_rate": 0.0001466717674062739, "loss": 0.2759, "step": 16787 }, { "epoch": 1.3600129617627998, "grad_norm": 0.04450596868991852, "learning_rate": 0.0001466672667536793, "loss": 0.2721, "step": 16788 }, { "epoch": 1.360093972780298, "grad_norm": 0.04304325953125954, "learning_rate": 0.00014666276610108468, "loss": 0.2606, "step": 16789 }, { "epoch": 1.3601749837977966, "grad_norm": 0.046191249042749405, "learning_rate": 0.00014665826544849004, "loss": 0.3214, "step": 16790 }, { "epoch": 1.3602559948152948, "grad_norm": 0.04645387455821037, "learning_rate": 0.0001466537647958954, "loss": 0.3186, "step": 16791 }, { "epoch": 1.3603370058327933, "grad_norm": 0.04364481940865517, "learning_rate": 0.0001466492641433008, "loss": 0.314, "step": 16792 }, { "epoch": 1.3604180168502915, "grad_norm": 0.04091165214776993, "learning_rate": 0.00014664476349070615, "loss": 0.2958, "step": 16793 }, { "epoch": 1.36049902786779, "grad_norm": 0.04911810904741287, "learning_rate": 0.00014664026283811154, "loss": 0.322, "step": 16794 }, { "epoch": 1.3605800388852884, "grad_norm": 0.04907684773206711, "learning_rate": 0.00014663576218551692, "loss": 0.2986, "step": 16795 }, { "epoch": 1.3606610499027867, "grad_norm": 0.04427606239914894, "learning_rate": 0.00014663126153292228, "loss": 0.288, "step": 16796 }, { "epoch": 1.3607420609202852, "grad_norm": 0.048113491386175156, "learning_rate": 0.00014662676088032764, "loss": 0.2997, "step": 16797 }, { "epoch": 1.3608230719377836, "grad_norm": 0.04680365324020386, "learning_rate": 0.00014662226022773303, "loss": 0.3007, "step": 16798 }, { "epoch": 1.3609040829552819, "grad_norm": 0.0571950301527977, "learning_rate": 0.0001466177595751384, "loss": 0.2874, "step": 16799 }, { "epoch": 1.3609850939727803, "grad_norm": 0.03910844773054123, "learning_rate": 0.00014661325892254378, "loss": 0.2678, "step": 16800 }, { "epoch": 1.3610661049902788, "grad_norm": 0.04900038614869118, "learning_rate": 0.00014660875826994917, "loss": 0.3503, "step": 16801 }, { "epoch": 1.361147116007777, "grad_norm": 0.047941070050001144, "learning_rate": 0.00014660425761735453, "loss": 0.3336, "step": 16802 }, { "epoch": 1.3612281270252755, "grad_norm": 0.049411624670028687, "learning_rate": 0.0001465997569647599, "loss": 0.2802, "step": 16803 }, { "epoch": 1.3613091380427738, "grad_norm": 0.04363062605261803, "learning_rate": 0.00014659525631216527, "loss": 0.2786, "step": 16804 }, { "epoch": 1.3613901490602722, "grad_norm": 0.0482541024684906, "learning_rate": 0.00014659075565957066, "loss": 0.2847, "step": 16805 }, { "epoch": 1.3614711600777705, "grad_norm": 0.04462805762887001, "learning_rate": 0.00014658625500697602, "loss": 0.3073, "step": 16806 }, { "epoch": 1.361552171095269, "grad_norm": 0.047291915863752365, "learning_rate": 0.0001465817543543814, "loss": 0.2919, "step": 16807 }, { "epoch": 1.3616331821127674, "grad_norm": 0.044917941093444824, "learning_rate": 0.00014657725370178677, "loss": 0.3, "step": 16808 }, { "epoch": 1.3617141931302656, "grad_norm": 0.045333001762628555, "learning_rate": 0.00014657275304919213, "loss": 0.2598, "step": 16809 }, { "epoch": 1.3617952041477641, "grad_norm": 0.052305012941360474, "learning_rate": 0.00014656825239659752, "loss": 0.3113, "step": 16810 }, { "epoch": 1.3618762151652626, "grad_norm": 0.046395689249038696, "learning_rate": 0.0001465637517440029, "loss": 0.3137, "step": 16811 }, { "epoch": 1.3619572261827608, "grad_norm": 0.038306351751089096, "learning_rate": 0.00014655925109140826, "loss": 0.2799, "step": 16812 }, { "epoch": 1.3620382372002593, "grad_norm": 0.04151049628853798, "learning_rate": 0.00014655475043881365, "loss": 0.247, "step": 16813 }, { "epoch": 1.3621192482177575, "grad_norm": 0.05814644321799278, "learning_rate": 0.000146550249786219, "loss": 0.3111, "step": 16814 }, { "epoch": 1.362200259235256, "grad_norm": 0.03642072528600693, "learning_rate": 0.00014654574913362437, "loss": 0.2466, "step": 16815 }, { "epoch": 1.3622812702527543, "grad_norm": 0.05054713413119316, "learning_rate": 0.00014654124848102976, "loss": 0.3172, "step": 16816 }, { "epoch": 1.3623622812702527, "grad_norm": 0.04339751601219177, "learning_rate": 0.00014653674782843514, "loss": 0.2787, "step": 16817 }, { "epoch": 1.3624432922877512, "grad_norm": 0.05909118056297302, "learning_rate": 0.0001465322471758405, "loss": 0.3593, "step": 16818 }, { "epoch": 1.3625243033052494, "grad_norm": 0.04289279133081436, "learning_rate": 0.0001465277465232459, "loss": 0.2902, "step": 16819 }, { "epoch": 1.362605314322748, "grad_norm": 0.04428792744874954, "learning_rate": 0.00014652324587065125, "loss": 0.2842, "step": 16820 }, { "epoch": 1.3626863253402464, "grad_norm": 0.050207529217004776, "learning_rate": 0.0001465187452180566, "loss": 0.3221, "step": 16821 }, { "epoch": 1.3627673363577446, "grad_norm": 0.04290533438324928, "learning_rate": 0.000146514244565462, "loss": 0.2907, "step": 16822 }, { "epoch": 1.362848347375243, "grad_norm": 0.04496665298938751, "learning_rate": 0.00014650974391286739, "loss": 0.3309, "step": 16823 }, { "epoch": 1.3629293583927415, "grad_norm": 0.04096877574920654, "learning_rate": 0.00014650524326027275, "loss": 0.2958, "step": 16824 }, { "epoch": 1.3630103694102398, "grad_norm": 0.05002980679273605, "learning_rate": 0.00014650074260767813, "loss": 0.3151, "step": 16825 }, { "epoch": 1.3630913804277383, "grad_norm": 0.04218354821205139, "learning_rate": 0.0001464962419550835, "loss": 0.3038, "step": 16826 }, { "epoch": 1.3631723914452365, "grad_norm": 0.047836434096097946, "learning_rate": 0.00014649174130248885, "loss": 0.313, "step": 16827 }, { "epoch": 1.363253402462735, "grad_norm": 0.04476723447442055, "learning_rate": 0.00014648724064989424, "loss": 0.3203, "step": 16828 }, { "epoch": 1.3633344134802332, "grad_norm": 0.05513838678598404, "learning_rate": 0.00014648273999729963, "loss": 0.3342, "step": 16829 }, { "epoch": 1.3634154244977317, "grad_norm": 0.05754096433520317, "learning_rate": 0.000146478239344705, "loss": 0.3432, "step": 16830 }, { "epoch": 1.3634964355152301, "grad_norm": 0.04611041024327278, "learning_rate": 0.00014647373869211037, "loss": 0.2919, "step": 16831 }, { "epoch": 1.3635774465327284, "grad_norm": 0.04048772528767586, "learning_rate": 0.00014646923803951573, "loss": 0.272, "step": 16832 }, { "epoch": 1.3636584575502269, "grad_norm": 0.03679872676730156, "learning_rate": 0.0001464647373869211, "loss": 0.2405, "step": 16833 }, { "epoch": 1.3637394685677253, "grad_norm": 0.046283673495054245, "learning_rate": 0.00014646023673432648, "loss": 0.3303, "step": 16834 }, { "epoch": 1.3638204795852236, "grad_norm": 0.047594837844371796, "learning_rate": 0.00014645573608173187, "loss": 0.3108, "step": 16835 }, { "epoch": 1.363901490602722, "grad_norm": 0.04769527167081833, "learning_rate": 0.00014645123542913723, "loss": 0.3136, "step": 16836 }, { "epoch": 1.3639825016202203, "grad_norm": 0.04665999859571457, "learning_rate": 0.00014644673477654262, "loss": 0.2843, "step": 16837 }, { "epoch": 1.3640635126377187, "grad_norm": 0.04001079127192497, "learning_rate": 0.00014644223412394798, "loss": 0.2717, "step": 16838 }, { "epoch": 1.364144523655217, "grad_norm": 0.04948032647371292, "learning_rate": 0.00014643773347135334, "loss": 0.2933, "step": 16839 }, { "epoch": 1.3642255346727155, "grad_norm": 0.04881342127919197, "learning_rate": 0.00014643323281875872, "loss": 0.3302, "step": 16840 }, { "epoch": 1.364306545690214, "grad_norm": 0.04329438880085945, "learning_rate": 0.0001464287321661641, "loss": 0.2974, "step": 16841 }, { "epoch": 1.3643875567077122, "grad_norm": 0.0506560280919075, "learning_rate": 0.00014642423151356947, "loss": 0.3261, "step": 16842 }, { "epoch": 1.3644685677252106, "grad_norm": 0.046005334705114365, "learning_rate": 0.00014641973086097486, "loss": 0.283, "step": 16843 }, { "epoch": 1.364549578742709, "grad_norm": 0.05078957602381706, "learning_rate": 0.00014641523020838022, "loss": 0.3248, "step": 16844 }, { "epoch": 1.3646305897602073, "grad_norm": 0.04549172893166542, "learning_rate": 0.00014641072955578558, "loss": 0.2666, "step": 16845 }, { "epoch": 1.3647116007777058, "grad_norm": 0.04811951890587807, "learning_rate": 0.00014640622890319096, "loss": 0.2956, "step": 16846 }, { "epoch": 1.3647926117952043, "grad_norm": 0.05006801337003708, "learning_rate": 0.00014640172825059635, "loss": 0.314, "step": 16847 }, { "epoch": 1.3648736228127025, "grad_norm": 0.04566046968102455, "learning_rate": 0.0001463972275980017, "loss": 0.2905, "step": 16848 }, { "epoch": 1.3649546338302008, "grad_norm": 0.049143534153699875, "learning_rate": 0.0001463927269454071, "loss": 0.2689, "step": 16849 }, { "epoch": 1.3650356448476992, "grad_norm": 0.050470754504203796, "learning_rate": 0.00014638822629281246, "loss": 0.3447, "step": 16850 }, { "epoch": 1.3651166558651977, "grad_norm": 0.04866886883974075, "learning_rate": 0.00014638372564021782, "loss": 0.2903, "step": 16851 }, { "epoch": 1.365197666882696, "grad_norm": 0.053343262523412704, "learning_rate": 0.00014637922498762323, "loss": 0.2902, "step": 16852 }, { "epoch": 1.3652786779001944, "grad_norm": 0.04238571971654892, "learning_rate": 0.0001463747243350286, "loss": 0.2767, "step": 16853 }, { "epoch": 1.3653596889176929, "grad_norm": 0.054251790046691895, "learning_rate": 0.00014637022368243395, "loss": 0.3201, "step": 16854 }, { "epoch": 1.3654406999351911, "grad_norm": 0.046247709542512894, "learning_rate": 0.00014636572302983934, "loss": 0.3041, "step": 16855 }, { "epoch": 1.3655217109526896, "grad_norm": 0.05009758844971657, "learning_rate": 0.0001463612223772447, "loss": 0.2653, "step": 16856 }, { "epoch": 1.365602721970188, "grad_norm": 0.055453184992074966, "learning_rate": 0.0001463567217246501, "loss": 0.3334, "step": 16857 }, { "epoch": 1.3656837329876863, "grad_norm": 0.04012593999505043, "learning_rate": 0.00014635222107205548, "loss": 0.2727, "step": 16858 }, { "epoch": 1.3657647440051848, "grad_norm": 0.04042156785726547, "learning_rate": 0.00014634772041946084, "loss": 0.2738, "step": 16859 }, { "epoch": 1.365845755022683, "grad_norm": 0.04087120667099953, "learning_rate": 0.0001463432197668662, "loss": 0.2631, "step": 16860 }, { "epoch": 1.3659267660401815, "grad_norm": 0.04135334491729736, "learning_rate": 0.00014633871911427158, "loss": 0.247, "step": 16861 }, { "epoch": 1.3660077770576797, "grad_norm": 0.04643384367227554, "learning_rate": 0.00014633421846167694, "loss": 0.2988, "step": 16862 }, { "epoch": 1.3660887880751782, "grad_norm": 0.04361230880022049, "learning_rate": 0.00014632971780908233, "loss": 0.295, "step": 16863 }, { "epoch": 1.3661697990926767, "grad_norm": 0.053531769663095474, "learning_rate": 0.00014632521715648772, "loss": 0.2676, "step": 16864 }, { "epoch": 1.366250810110175, "grad_norm": 0.04573979601264, "learning_rate": 0.00014632071650389308, "loss": 0.2732, "step": 16865 }, { "epoch": 1.3663318211276734, "grad_norm": 0.05206222087144852, "learning_rate": 0.00014631621585129844, "loss": 0.3256, "step": 16866 }, { "epoch": 1.3664128321451718, "grad_norm": 0.046878959983587265, "learning_rate": 0.00014631171519870382, "loss": 0.3083, "step": 16867 }, { "epoch": 1.36649384316267, "grad_norm": 0.046941112726926804, "learning_rate": 0.00014630721454610918, "loss": 0.2547, "step": 16868 }, { "epoch": 1.3665748541801686, "grad_norm": 0.04700201749801636, "learning_rate": 0.00014630271389351457, "loss": 0.3062, "step": 16869 }, { "epoch": 1.366655865197667, "grad_norm": 0.042983245104551315, "learning_rate": 0.00014629821324091996, "loss": 0.294, "step": 16870 }, { "epoch": 1.3667368762151653, "grad_norm": 0.0427616648375988, "learning_rate": 0.00014629371258832532, "loss": 0.2736, "step": 16871 }, { "epoch": 1.3668178872326635, "grad_norm": 0.04252465441823006, "learning_rate": 0.00014628921193573068, "loss": 0.2706, "step": 16872 }, { "epoch": 1.366898898250162, "grad_norm": 0.04775184020400047, "learning_rate": 0.00014628471128313607, "loss": 0.3251, "step": 16873 }, { "epoch": 1.3669799092676604, "grad_norm": 0.051210030913352966, "learning_rate": 0.00014628021063054143, "loss": 0.301, "step": 16874 }, { "epoch": 1.3670609202851587, "grad_norm": 0.050241678953170776, "learning_rate": 0.0001462757099779468, "loss": 0.3119, "step": 16875 }, { "epoch": 1.3671419313026572, "grad_norm": 0.042821403592824936, "learning_rate": 0.0001462712093253522, "loss": 0.2613, "step": 16876 }, { "epoch": 1.3672229423201556, "grad_norm": 0.04285527765750885, "learning_rate": 0.00014626670867275756, "loss": 0.3185, "step": 16877 }, { "epoch": 1.3673039533376539, "grad_norm": 0.04558612033724785, "learning_rate": 0.00014626220802016292, "loss": 0.2942, "step": 16878 }, { "epoch": 1.3673849643551523, "grad_norm": 0.04869738593697548, "learning_rate": 0.0001462577073675683, "loss": 0.3108, "step": 16879 }, { "epoch": 1.3674659753726508, "grad_norm": 0.04682771861553192, "learning_rate": 0.00014625320671497367, "loss": 0.3039, "step": 16880 }, { "epoch": 1.367546986390149, "grad_norm": 0.04650042578577995, "learning_rate": 0.00014624870606237905, "loss": 0.2919, "step": 16881 }, { "epoch": 1.3676279974076475, "grad_norm": 0.042950332164764404, "learning_rate": 0.00014624420540978444, "loss": 0.2497, "step": 16882 }, { "epoch": 1.3677090084251458, "grad_norm": 0.04653036221861839, "learning_rate": 0.0001462397047571898, "loss": 0.2843, "step": 16883 }, { "epoch": 1.3677900194426442, "grad_norm": 0.049992140382528305, "learning_rate": 0.00014623520410459516, "loss": 0.3237, "step": 16884 }, { "epoch": 1.3678710304601425, "grad_norm": 0.0425015352666378, "learning_rate": 0.00014623070345200055, "loss": 0.3143, "step": 16885 }, { "epoch": 1.367952041477641, "grad_norm": 0.05130867660045624, "learning_rate": 0.00014622620279940594, "loss": 0.3054, "step": 16886 }, { "epoch": 1.3680330524951394, "grad_norm": 0.045671138912439346, "learning_rate": 0.0001462217021468113, "loss": 0.2901, "step": 16887 }, { "epoch": 1.3681140635126376, "grad_norm": 0.04083164036273956, "learning_rate": 0.00014621720149421668, "loss": 0.2918, "step": 16888 }, { "epoch": 1.3681950745301361, "grad_norm": 0.041421759873628616, "learning_rate": 0.00014621270084162204, "loss": 0.2704, "step": 16889 }, { "epoch": 1.3682760855476346, "grad_norm": 0.053979452699422836, "learning_rate": 0.0001462082001890274, "loss": 0.3252, "step": 16890 }, { "epoch": 1.3683570965651328, "grad_norm": 0.0409124456346035, "learning_rate": 0.0001462036995364328, "loss": 0.2791, "step": 16891 }, { "epoch": 1.3684381075826313, "grad_norm": 0.04741591587662697, "learning_rate": 0.00014619919888383818, "loss": 0.288, "step": 16892 }, { "epoch": 1.3685191186001295, "grad_norm": 0.054248884320259094, "learning_rate": 0.00014619469823124354, "loss": 0.3152, "step": 16893 }, { "epoch": 1.368600129617628, "grad_norm": 0.047506216913461685, "learning_rate": 0.00014619019757864893, "loss": 0.2871, "step": 16894 }, { "epoch": 1.3686811406351262, "grad_norm": 0.04755987972021103, "learning_rate": 0.00014618569692605429, "loss": 0.3085, "step": 16895 }, { "epoch": 1.3687621516526247, "grad_norm": 0.04973354935646057, "learning_rate": 0.00014618119627345965, "loss": 0.3267, "step": 16896 }, { "epoch": 1.3688431626701232, "grad_norm": 0.04893682152032852, "learning_rate": 0.00014617669562086503, "loss": 0.3173, "step": 16897 }, { "epoch": 1.3689241736876214, "grad_norm": 0.044737182557582855, "learning_rate": 0.00014617219496827042, "loss": 0.2754, "step": 16898 }, { "epoch": 1.36900518470512, "grad_norm": 0.05379049479961395, "learning_rate": 0.00014616769431567578, "loss": 0.3503, "step": 16899 }, { "epoch": 1.3690861957226184, "grad_norm": 0.04396909475326538, "learning_rate": 0.00014616319366308117, "loss": 0.259, "step": 16900 }, { "epoch": 1.3691672067401166, "grad_norm": 0.04770589992403984, "learning_rate": 0.00014615869301048653, "loss": 0.285, "step": 16901 }, { "epoch": 1.369248217757615, "grad_norm": 0.0434144102036953, "learning_rate": 0.0001461541923578919, "loss": 0.305, "step": 16902 }, { "epoch": 1.3693292287751135, "grad_norm": 0.04375230893492699, "learning_rate": 0.00014614969170529727, "loss": 0.2663, "step": 16903 }, { "epoch": 1.3694102397926118, "grad_norm": 0.05147865414619446, "learning_rate": 0.00014614519105270266, "loss": 0.295, "step": 16904 }, { "epoch": 1.3694912508101102, "grad_norm": 0.047179412096738815, "learning_rate": 0.00014614069040010802, "loss": 0.3113, "step": 16905 }, { "epoch": 1.3695722618276085, "grad_norm": 0.04674246534705162, "learning_rate": 0.0001461361897475134, "loss": 0.2754, "step": 16906 }, { "epoch": 1.369653272845107, "grad_norm": 0.04732316732406616, "learning_rate": 0.00014613168909491877, "loss": 0.3144, "step": 16907 }, { "epoch": 1.3697342838626052, "grad_norm": 0.0458158478140831, "learning_rate": 0.00014612718844232413, "loss": 0.3086, "step": 16908 }, { "epoch": 1.3698152948801037, "grad_norm": 0.049340032041072845, "learning_rate": 0.00014612268778972952, "loss": 0.3215, "step": 16909 }, { "epoch": 1.3698963058976021, "grad_norm": 0.04514643922448158, "learning_rate": 0.0001461181871371349, "loss": 0.3146, "step": 16910 }, { "epoch": 1.3699773169151004, "grad_norm": 0.04190957173705101, "learning_rate": 0.00014611368648454026, "loss": 0.2956, "step": 16911 }, { "epoch": 1.3700583279325989, "grad_norm": 0.04475057125091553, "learning_rate": 0.00014610918583194565, "loss": 0.2908, "step": 16912 }, { "epoch": 1.3701393389500973, "grad_norm": 0.04853258281946182, "learning_rate": 0.000146104685179351, "loss": 0.3459, "step": 16913 }, { "epoch": 1.3702203499675956, "grad_norm": 0.04533115401864052, "learning_rate": 0.00014610018452675637, "loss": 0.3097, "step": 16914 }, { "epoch": 1.370301360985094, "grad_norm": 0.03996393084526062, "learning_rate": 0.00014609568387416176, "loss": 0.2828, "step": 16915 }, { "epoch": 1.3703823720025923, "grad_norm": 0.05056044086813927, "learning_rate": 0.00014609118322156714, "loss": 0.2908, "step": 16916 }, { "epoch": 1.3704633830200907, "grad_norm": 0.04065033793449402, "learning_rate": 0.0001460866825689725, "loss": 0.317, "step": 16917 }, { "epoch": 1.370544394037589, "grad_norm": 0.04022886976599693, "learning_rate": 0.0001460821819163779, "loss": 0.2535, "step": 16918 }, { "epoch": 1.3706254050550875, "grad_norm": 0.04769391939043999, "learning_rate": 0.00014607768126378325, "loss": 0.3187, "step": 16919 }, { "epoch": 1.370706416072586, "grad_norm": 0.043292880058288574, "learning_rate": 0.0001460731806111886, "loss": 0.2666, "step": 16920 }, { "epoch": 1.3707874270900842, "grad_norm": 0.04575731232762337, "learning_rate": 0.00014606867995859403, "loss": 0.3144, "step": 16921 }, { "epoch": 1.3708684381075826, "grad_norm": 0.04306940734386444, "learning_rate": 0.00014606417930599939, "loss": 0.3005, "step": 16922 }, { "epoch": 1.370949449125081, "grad_norm": 0.04159868508577347, "learning_rate": 0.00014605967865340475, "loss": 0.2348, "step": 16923 }, { "epoch": 1.3710304601425793, "grad_norm": 0.04979290813207626, "learning_rate": 0.00014605517800081013, "loss": 0.3427, "step": 16924 }, { "epoch": 1.3711114711600778, "grad_norm": 0.05398887023329735, "learning_rate": 0.0001460506773482155, "loss": 0.3358, "step": 16925 }, { "epoch": 1.3711924821775763, "grad_norm": 0.04066229984164238, "learning_rate": 0.00014604617669562085, "loss": 0.2587, "step": 16926 }, { "epoch": 1.3712734931950745, "grad_norm": 0.046182893216609955, "learning_rate": 0.00014604167604302627, "loss": 0.3177, "step": 16927 }, { "epoch": 1.371354504212573, "grad_norm": 0.03884848207235336, "learning_rate": 0.00014603717539043163, "loss": 0.2703, "step": 16928 }, { "epoch": 1.3714355152300712, "grad_norm": 0.04299648851156235, "learning_rate": 0.000146032674737837, "loss": 0.3059, "step": 16929 }, { "epoch": 1.3715165262475697, "grad_norm": 0.04192721098661423, "learning_rate": 0.00014602817408524237, "loss": 0.2758, "step": 16930 }, { "epoch": 1.371597537265068, "grad_norm": 0.055731527507305145, "learning_rate": 0.00014602367343264773, "loss": 0.3202, "step": 16931 }, { "epoch": 1.3716785482825664, "grad_norm": 0.04261324554681778, "learning_rate": 0.0001460191727800531, "loss": 0.2719, "step": 16932 }, { "epoch": 1.3717595593000649, "grad_norm": 0.04510960727930069, "learning_rate": 0.0001460146721274585, "loss": 0.305, "step": 16933 }, { "epoch": 1.3718405703175631, "grad_norm": 0.04371994361281395, "learning_rate": 0.00014601017147486387, "loss": 0.2983, "step": 16934 }, { "epoch": 1.3719215813350616, "grad_norm": 0.048851270228624344, "learning_rate": 0.00014600567082226923, "loss": 0.3076, "step": 16935 }, { "epoch": 1.37200259235256, "grad_norm": 0.0492849163711071, "learning_rate": 0.00014600117016967462, "loss": 0.3139, "step": 16936 }, { "epoch": 1.3720836033700583, "grad_norm": 0.04994687810540199, "learning_rate": 0.00014599666951707998, "loss": 0.2965, "step": 16937 }, { "epoch": 1.3721646143875568, "grad_norm": 0.044168904423713684, "learning_rate": 0.00014599216886448536, "loss": 0.2867, "step": 16938 }, { "epoch": 1.372245625405055, "grad_norm": 0.047688718885183334, "learning_rate": 0.00014598766821189075, "loss": 0.3327, "step": 16939 }, { "epoch": 1.3723266364225535, "grad_norm": 0.04277990758419037, "learning_rate": 0.0001459831675592961, "loss": 0.2811, "step": 16940 }, { "epoch": 1.3724076474400517, "grad_norm": 0.044236935675144196, "learning_rate": 0.00014597866690670147, "loss": 0.3336, "step": 16941 }, { "epoch": 1.3724886584575502, "grad_norm": 0.042176589369773865, "learning_rate": 0.00014597416625410686, "loss": 0.3254, "step": 16942 }, { "epoch": 1.3725696694750487, "grad_norm": 0.05188947916030884, "learning_rate": 0.00014596966560151222, "loss": 0.3014, "step": 16943 }, { "epoch": 1.372650680492547, "grad_norm": 0.04572753980755806, "learning_rate": 0.0001459651649489176, "loss": 0.3422, "step": 16944 }, { "epoch": 1.3727316915100454, "grad_norm": 0.0529056079685688, "learning_rate": 0.000145960664296323, "loss": 0.3064, "step": 16945 }, { "epoch": 1.3728127025275438, "grad_norm": 0.05411520600318909, "learning_rate": 0.00014595616364372835, "loss": 0.2982, "step": 16946 }, { "epoch": 1.372893713545042, "grad_norm": 0.04950553923845291, "learning_rate": 0.0001459516629911337, "loss": 0.3289, "step": 16947 }, { "epoch": 1.3729747245625405, "grad_norm": 0.04515305906534195, "learning_rate": 0.0001459471623385391, "loss": 0.3142, "step": 16948 }, { "epoch": 1.373055735580039, "grad_norm": 0.05544627085328102, "learning_rate": 0.00014594266168594446, "loss": 0.325, "step": 16949 }, { "epoch": 1.3731367465975373, "grad_norm": 0.04322170093655586, "learning_rate": 0.00014593816103334985, "loss": 0.2928, "step": 16950 }, { "epoch": 1.3732177576150357, "grad_norm": 0.04435882344841957, "learning_rate": 0.00014593366038075523, "loss": 0.279, "step": 16951 }, { "epoch": 1.373298768632534, "grad_norm": 0.04589640349149704, "learning_rate": 0.0001459291597281606, "loss": 0.3156, "step": 16952 }, { "epoch": 1.3733797796500324, "grad_norm": 0.04302145168185234, "learning_rate": 0.00014592465907556595, "loss": 0.3197, "step": 16953 }, { "epoch": 1.3734607906675307, "grad_norm": 0.04616771265864372, "learning_rate": 0.00014592015842297134, "loss": 0.2785, "step": 16954 }, { "epoch": 1.3735418016850292, "grad_norm": 0.04837910830974579, "learning_rate": 0.0001459156577703767, "loss": 0.3168, "step": 16955 }, { "epoch": 1.3736228127025276, "grad_norm": 0.056114088743925095, "learning_rate": 0.0001459111571177821, "loss": 0.2685, "step": 16956 }, { "epoch": 1.3737038237200259, "grad_norm": 0.04780881479382515, "learning_rate": 0.00014590665646518748, "loss": 0.295, "step": 16957 }, { "epoch": 1.3737848347375243, "grad_norm": 0.047661397606134415, "learning_rate": 0.00014590215581259284, "loss": 0.3353, "step": 16958 }, { "epoch": 1.3738658457550228, "grad_norm": 0.052063945680856705, "learning_rate": 0.0001458976551599982, "loss": 0.298, "step": 16959 }, { "epoch": 1.373946856772521, "grad_norm": 0.049559805542230606, "learning_rate": 0.00014589315450740358, "loss": 0.3432, "step": 16960 }, { "epoch": 1.3740278677900195, "grad_norm": 0.04893847927451134, "learning_rate": 0.00014588865385480894, "loss": 0.3224, "step": 16961 }, { "epoch": 1.3741088788075178, "grad_norm": 0.04671037942171097, "learning_rate": 0.00014588415320221433, "loss": 0.3191, "step": 16962 }, { "epoch": 1.3741898898250162, "grad_norm": 0.04712628945708275, "learning_rate": 0.00014587965254961972, "loss": 0.3082, "step": 16963 }, { "epoch": 1.3742709008425145, "grad_norm": 0.037124503403902054, "learning_rate": 0.00014587515189702508, "loss": 0.2676, "step": 16964 }, { "epoch": 1.374351911860013, "grad_norm": 0.04226502776145935, "learning_rate": 0.00014587065124443044, "loss": 0.3052, "step": 16965 }, { "epoch": 1.3744329228775114, "grad_norm": 0.0540158748626709, "learning_rate": 0.00014586615059183582, "loss": 0.2956, "step": 16966 }, { "epoch": 1.3745139338950096, "grad_norm": 0.04997124522924423, "learning_rate": 0.0001458616499392412, "loss": 0.2883, "step": 16967 }, { "epoch": 1.374594944912508, "grad_norm": 0.044050950556993484, "learning_rate": 0.00014585714928664657, "loss": 0.2909, "step": 16968 }, { "epoch": 1.3746759559300066, "grad_norm": 0.04482242837548256, "learning_rate": 0.00014585264863405196, "loss": 0.3312, "step": 16969 }, { "epoch": 1.3747569669475048, "grad_norm": 0.049184177070856094, "learning_rate": 0.00014584814798145732, "loss": 0.2984, "step": 16970 }, { "epoch": 1.3748379779650033, "grad_norm": 0.040518440306186676, "learning_rate": 0.00014584364732886268, "loss": 0.278, "step": 16971 }, { "epoch": 1.3749189889825018, "grad_norm": 0.0521448478102684, "learning_rate": 0.00014583914667626807, "loss": 0.306, "step": 16972 }, { "epoch": 1.375, "grad_norm": 0.04701065644621849, "learning_rate": 0.00014583464602367345, "loss": 0.3253, "step": 16973 }, { "epoch": 1.3750810110174982, "grad_norm": 0.04387321695685387, "learning_rate": 0.0001458301453710788, "loss": 0.2966, "step": 16974 }, { "epoch": 1.3751620220349967, "grad_norm": 0.04575170576572418, "learning_rate": 0.0001458256447184842, "loss": 0.3201, "step": 16975 }, { "epoch": 1.3752430330524952, "grad_norm": 0.04442807659506798, "learning_rate": 0.00014582114406588956, "loss": 0.2826, "step": 16976 }, { "epoch": 1.3753240440699934, "grad_norm": 0.04788399487733841, "learning_rate": 0.00014581664341329492, "loss": 0.2797, "step": 16977 }, { "epoch": 1.375405055087492, "grad_norm": 0.0455004945397377, "learning_rate": 0.0001458121427607003, "loss": 0.3181, "step": 16978 }, { "epoch": 1.3754860661049904, "grad_norm": 0.05180566757917404, "learning_rate": 0.0001458076421081057, "loss": 0.293, "step": 16979 }, { "epoch": 1.3755670771224886, "grad_norm": 0.040193237364292145, "learning_rate": 0.00014580314145551105, "loss": 0.2766, "step": 16980 }, { "epoch": 1.375648088139987, "grad_norm": 0.05090703070163727, "learning_rate": 0.00014579864080291644, "loss": 0.3479, "step": 16981 }, { "epoch": 1.3757290991574855, "grad_norm": 0.04570503905415535, "learning_rate": 0.0001457941401503218, "loss": 0.3072, "step": 16982 }, { "epoch": 1.3758101101749838, "grad_norm": 0.04995448887348175, "learning_rate": 0.00014578963949772716, "loss": 0.3209, "step": 16983 }, { "epoch": 1.3758911211924822, "grad_norm": 0.048252664506435394, "learning_rate": 0.00014578513884513255, "loss": 0.2397, "step": 16984 }, { "epoch": 1.3759721322099805, "grad_norm": 0.04591905698180199, "learning_rate": 0.00014578063819253794, "loss": 0.3117, "step": 16985 }, { "epoch": 1.376053143227479, "grad_norm": 0.04259791970252991, "learning_rate": 0.0001457761375399433, "loss": 0.2689, "step": 16986 }, { "epoch": 1.3761341542449772, "grad_norm": 0.04957650601863861, "learning_rate": 0.00014577163688734868, "loss": 0.3, "step": 16987 }, { "epoch": 1.3762151652624757, "grad_norm": 0.037725962698459625, "learning_rate": 0.00014576713623475404, "loss": 0.2581, "step": 16988 }, { "epoch": 1.3762961762799741, "grad_norm": 0.04181136563420296, "learning_rate": 0.0001457626355821594, "loss": 0.271, "step": 16989 }, { "epoch": 1.3763771872974724, "grad_norm": 0.043500158935785294, "learning_rate": 0.00014575813492956482, "loss": 0.2849, "step": 16990 }, { "epoch": 1.3764581983149708, "grad_norm": 0.04969172552227974, "learning_rate": 0.00014575363427697018, "loss": 0.3, "step": 16991 }, { "epoch": 1.3765392093324693, "grad_norm": 0.04714135825634003, "learning_rate": 0.00014574913362437554, "loss": 0.3405, "step": 16992 }, { "epoch": 1.3766202203499676, "grad_norm": 0.04822403937578201, "learning_rate": 0.00014574463297178093, "loss": 0.2928, "step": 16993 }, { "epoch": 1.376701231367466, "grad_norm": 0.05032119154930115, "learning_rate": 0.00014574013231918629, "loss": 0.3185, "step": 16994 }, { "epoch": 1.3767822423849643, "grad_norm": 0.05615558847784996, "learning_rate": 0.00014573563166659165, "loss": 0.3301, "step": 16995 }, { "epoch": 1.3768632534024627, "grad_norm": 0.06276535987854004, "learning_rate": 0.00014573113101399706, "loss": 0.2663, "step": 16996 }, { "epoch": 1.376944264419961, "grad_norm": 0.05143308266997337, "learning_rate": 0.00014572663036140242, "loss": 0.358, "step": 16997 }, { "epoch": 1.3770252754374595, "grad_norm": 0.04123299568891525, "learning_rate": 0.00014572212970880778, "loss": 0.2668, "step": 16998 }, { "epoch": 1.377106286454958, "grad_norm": 0.047784462571144104, "learning_rate": 0.00014571762905621317, "loss": 0.3215, "step": 16999 }, { "epoch": 1.3771872974724562, "grad_norm": 0.041549235582351685, "learning_rate": 0.00014571312840361853, "loss": 0.2888, "step": 17000 }, { "epoch": 1.3772683084899546, "grad_norm": 0.046233825385570526, "learning_rate": 0.0001457086277510239, "loss": 0.3146, "step": 17001 }, { "epoch": 1.377349319507453, "grad_norm": 0.04736294224858284, "learning_rate": 0.0001457041270984293, "loss": 0.289, "step": 17002 }, { "epoch": 1.3774303305249513, "grad_norm": 0.03976118192076683, "learning_rate": 0.00014569962644583466, "loss": 0.2903, "step": 17003 }, { "epoch": 1.3775113415424498, "grad_norm": 0.04543759301304817, "learning_rate": 0.00014569512579324002, "loss": 0.2915, "step": 17004 }, { "epoch": 1.3775923525599483, "grad_norm": 0.05306227132678032, "learning_rate": 0.0001456906251406454, "loss": 0.3543, "step": 17005 }, { "epoch": 1.3776733635774465, "grad_norm": 0.045866914093494415, "learning_rate": 0.00014568612448805077, "loss": 0.2865, "step": 17006 }, { "epoch": 1.377754374594945, "grad_norm": 0.04696584492921829, "learning_rate": 0.00014568162383545613, "loss": 0.3061, "step": 17007 }, { "epoch": 1.3778353856124432, "grad_norm": 0.04269943758845329, "learning_rate": 0.00014567712318286154, "loss": 0.289, "step": 17008 }, { "epoch": 1.3779163966299417, "grad_norm": 0.04807358980178833, "learning_rate": 0.0001456726225302669, "loss": 0.3005, "step": 17009 }, { "epoch": 1.37799740764744, "grad_norm": 0.04203903675079346, "learning_rate": 0.00014566812187767226, "loss": 0.2712, "step": 17010 }, { "epoch": 1.3780784186649384, "grad_norm": 0.044012703001499176, "learning_rate": 0.00014566362122507765, "loss": 0.2589, "step": 17011 }, { "epoch": 1.3781594296824369, "grad_norm": 0.05308305844664574, "learning_rate": 0.000145659120572483, "loss": 0.2844, "step": 17012 }, { "epoch": 1.3782404406999351, "grad_norm": 0.06335795670747757, "learning_rate": 0.00014565461991988837, "loss": 0.2811, "step": 17013 }, { "epoch": 1.3783214517174336, "grad_norm": 0.049153171479701996, "learning_rate": 0.00014565011926729378, "loss": 0.2922, "step": 17014 }, { "epoch": 1.378402462734932, "grad_norm": 0.04896317794919014, "learning_rate": 0.00014564561861469914, "loss": 0.2884, "step": 17015 }, { "epoch": 1.3784834737524303, "grad_norm": 0.04377609118819237, "learning_rate": 0.0001456411179621045, "loss": 0.3007, "step": 17016 }, { "epoch": 1.3785644847699288, "grad_norm": 0.042761363089084625, "learning_rate": 0.0001456366173095099, "loss": 0.281, "step": 17017 }, { "epoch": 1.378645495787427, "grad_norm": 0.044017307460308075, "learning_rate": 0.00014563211665691525, "loss": 0.2993, "step": 17018 }, { "epoch": 1.3787265068049255, "grad_norm": 0.046515289694070816, "learning_rate": 0.00014562761600432064, "loss": 0.2952, "step": 17019 }, { "epoch": 1.3788075178224237, "grad_norm": 0.050369229167699814, "learning_rate": 0.00014562311535172603, "loss": 0.297, "step": 17020 }, { "epoch": 1.3788885288399222, "grad_norm": 0.045705631375312805, "learning_rate": 0.00014561861469913139, "loss": 0.2727, "step": 17021 }, { "epoch": 1.3789695398574207, "grad_norm": 0.039376430213451385, "learning_rate": 0.00014561411404653675, "loss": 0.2973, "step": 17022 }, { "epoch": 1.379050550874919, "grad_norm": 0.04685184359550476, "learning_rate": 0.00014560961339394213, "loss": 0.2878, "step": 17023 }, { "epoch": 1.3791315618924174, "grad_norm": 0.04315702244639397, "learning_rate": 0.0001456051127413475, "loss": 0.2918, "step": 17024 }, { "epoch": 1.3792125729099158, "grad_norm": 0.051262155175209045, "learning_rate": 0.00014560061208875288, "loss": 0.3369, "step": 17025 }, { "epoch": 1.379293583927414, "grad_norm": 0.041663069278001785, "learning_rate": 0.00014559611143615827, "loss": 0.2638, "step": 17026 }, { "epoch": 1.3793745949449125, "grad_norm": 0.04210826754570007, "learning_rate": 0.00014559161078356363, "loss": 0.2637, "step": 17027 }, { "epoch": 1.379455605962411, "grad_norm": 0.0514579713344574, "learning_rate": 0.000145587110130969, "loss": 0.3166, "step": 17028 }, { "epoch": 1.3795366169799093, "grad_norm": 0.04053298756480217, "learning_rate": 0.00014558260947837438, "loss": 0.2989, "step": 17029 }, { "epoch": 1.3796176279974077, "grad_norm": 0.041419435292482376, "learning_rate": 0.00014557810882577974, "loss": 0.2582, "step": 17030 }, { "epoch": 1.379698639014906, "grad_norm": 0.050826363265514374, "learning_rate": 0.00014557360817318512, "loss": 0.2929, "step": 17031 }, { "epoch": 1.3797796500324044, "grad_norm": 0.0434403158724308, "learning_rate": 0.0001455691075205905, "loss": 0.2849, "step": 17032 }, { "epoch": 1.3798606610499027, "grad_norm": 0.056152284145355225, "learning_rate": 0.00014556460686799587, "loss": 0.2936, "step": 17033 }, { "epoch": 1.3799416720674011, "grad_norm": 0.04322462156414986, "learning_rate": 0.00014556010621540123, "loss": 0.2972, "step": 17034 }, { "epoch": 1.3800226830848996, "grad_norm": 0.04615986347198486, "learning_rate": 0.00014555560556280662, "loss": 0.2901, "step": 17035 }, { "epoch": 1.3801036941023979, "grad_norm": 0.043903838843107224, "learning_rate": 0.00014555110491021198, "loss": 0.2885, "step": 17036 }, { "epoch": 1.3801847051198963, "grad_norm": 0.04112345725297928, "learning_rate": 0.00014554660425761736, "loss": 0.2867, "step": 17037 }, { "epoch": 1.3802657161373948, "grad_norm": 0.04830494895577431, "learning_rate": 0.00014554210360502275, "loss": 0.3119, "step": 17038 }, { "epoch": 1.380346727154893, "grad_norm": 0.053838301450014114, "learning_rate": 0.0001455376029524281, "loss": 0.294, "step": 17039 }, { "epoch": 1.3804277381723915, "grad_norm": 0.041832443326711655, "learning_rate": 0.00014553310229983347, "loss": 0.27, "step": 17040 }, { "epoch": 1.3805087491898898, "grad_norm": 0.04943208023905754, "learning_rate": 0.00014552860164723886, "loss": 0.3196, "step": 17041 }, { "epoch": 1.3805897602073882, "grad_norm": 0.05328208953142166, "learning_rate": 0.00014552410099464425, "loss": 0.3522, "step": 17042 }, { "epoch": 1.3806707712248865, "grad_norm": 0.052018262445926666, "learning_rate": 0.0001455196003420496, "loss": 0.2947, "step": 17043 }, { "epoch": 1.380751782242385, "grad_norm": 0.054234579205513, "learning_rate": 0.000145515099689455, "loss": 0.3148, "step": 17044 }, { "epoch": 1.3808327932598834, "grad_norm": 0.044175803661346436, "learning_rate": 0.00014551059903686035, "loss": 0.2758, "step": 17045 }, { "epoch": 1.3809138042773816, "grad_norm": 0.046540990471839905, "learning_rate": 0.0001455060983842657, "loss": 0.3532, "step": 17046 }, { "epoch": 1.38099481529488, "grad_norm": 0.04491589963436127, "learning_rate": 0.0001455015977316711, "loss": 0.2979, "step": 17047 }, { "epoch": 1.3810758263123786, "grad_norm": 0.044613197445869446, "learning_rate": 0.0001454970970790765, "loss": 0.281, "step": 17048 }, { "epoch": 1.3811568373298768, "grad_norm": 0.043135564774274826, "learning_rate": 0.00014549259642648185, "loss": 0.298, "step": 17049 }, { "epoch": 1.3812378483473753, "grad_norm": 0.046049654483795166, "learning_rate": 0.00014548809577388723, "loss": 0.312, "step": 17050 }, { "epoch": 1.3813188593648738, "grad_norm": 0.05278931185603142, "learning_rate": 0.0001454835951212926, "loss": 0.3036, "step": 17051 }, { "epoch": 1.381399870382372, "grad_norm": 0.03843000903725624, "learning_rate": 0.00014547909446869795, "loss": 0.2619, "step": 17052 }, { "epoch": 1.3814808813998705, "grad_norm": 0.043770916759967804, "learning_rate": 0.00014547459381610334, "loss": 0.284, "step": 17053 }, { "epoch": 1.3815618924173687, "grad_norm": 0.045125383883714676, "learning_rate": 0.00014547009316350873, "loss": 0.2819, "step": 17054 }, { "epoch": 1.3816429034348672, "grad_norm": 0.043314822018146515, "learning_rate": 0.0001454655925109141, "loss": 0.2853, "step": 17055 }, { "epoch": 1.3817239144523654, "grad_norm": 0.05009135231375694, "learning_rate": 0.00014546109185831948, "loss": 0.3329, "step": 17056 }, { "epoch": 1.3818049254698639, "grad_norm": 0.04217385873198509, "learning_rate": 0.00014545659120572484, "loss": 0.3421, "step": 17057 }, { "epoch": 1.3818859364873624, "grad_norm": 0.04606221616268158, "learning_rate": 0.0001454520905531302, "loss": 0.2794, "step": 17058 }, { "epoch": 1.3819669475048606, "grad_norm": 0.04918968677520752, "learning_rate": 0.00014544758990053558, "loss": 0.3144, "step": 17059 }, { "epoch": 1.382047958522359, "grad_norm": 0.04076380655169487, "learning_rate": 0.00014544308924794097, "loss": 0.2515, "step": 17060 }, { "epoch": 1.3821289695398575, "grad_norm": 0.045574650168418884, "learning_rate": 0.00014543858859534633, "loss": 0.2567, "step": 17061 }, { "epoch": 1.3822099805573558, "grad_norm": 0.05103718861937523, "learning_rate": 0.00014543408794275172, "loss": 0.2954, "step": 17062 }, { "epoch": 1.3822909915748542, "grad_norm": 0.048355668783187866, "learning_rate": 0.00014542958729015708, "loss": 0.3115, "step": 17063 }, { "epoch": 1.3823720025923525, "grad_norm": 0.04657682776451111, "learning_rate": 0.00014542508663756244, "loss": 0.2857, "step": 17064 }, { "epoch": 1.382453013609851, "grad_norm": 0.04876073822379112, "learning_rate": 0.00014542058598496782, "loss": 0.2728, "step": 17065 }, { "epoch": 1.3825340246273492, "grad_norm": 0.04318344220519066, "learning_rate": 0.0001454160853323732, "loss": 0.2725, "step": 17066 }, { "epoch": 1.3826150356448477, "grad_norm": 0.04278486222028732, "learning_rate": 0.00014541158467977857, "loss": 0.2927, "step": 17067 }, { "epoch": 1.3826960466623461, "grad_norm": 0.039600007236003876, "learning_rate": 0.00014540708402718396, "loss": 0.2698, "step": 17068 }, { "epoch": 1.3827770576798444, "grad_norm": 0.03894643113017082, "learning_rate": 0.00014540258337458932, "loss": 0.3127, "step": 17069 }, { "epoch": 1.3828580686973428, "grad_norm": 0.05142957344651222, "learning_rate": 0.00014539808272199468, "loss": 0.3259, "step": 17070 }, { "epoch": 1.3829390797148413, "grad_norm": 0.049879662692546844, "learning_rate": 0.0001453935820694001, "loss": 0.37, "step": 17071 }, { "epoch": 1.3830200907323396, "grad_norm": 0.043519534170627594, "learning_rate": 0.00014538908141680545, "loss": 0.2947, "step": 17072 }, { "epoch": 1.383101101749838, "grad_norm": 0.054054081439971924, "learning_rate": 0.00014538458076421081, "loss": 0.3453, "step": 17073 }, { "epoch": 1.3831821127673365, "grad_norm": 0.04072052985429764, "learning_rate": 0.0001453800801116162, "loss": 0.2949, "step": 17074 }, { "epoch": 1.3832631237848347, "grad_norm": 0.052756231278181076, "learning_rate": 0.00014537557945902156, "loss": 0.3266, "step": 17075 }, { "epoch": 1.383344134802333, "grad_norm": 0.05641273781657219, "learning_rate": 0.00014537107880642692, "loss": 0.3461, "step": 17076 }, { "epoch": 1.3834251458198314, "grad_norm": 0.04871641844511032, "learning_rate": 0.00014536657815383234, "loss": 0.2565, "step": 17077 }, { "epoch": 1.38350615683733, "grad_norm": 0.052028074860572815, "learning_rate": 0.0001453620775012377, "loss": 0.3101, "step": 17078 }, { "epoch": 1.3835871678548282, "grad_norm": 0.044042039662599564, "learning_rate": 0.00014535757684864306, "loss": 0.3034, "step": 17079 }, { "epoch": 1.3836681788723266, "grad_norm": 0.040498316287994385, "learning_rate": 0.00014535307619604844, "loss": 0.2616, "step": 17080 }, { "epoch": 1.383749189889825, "grad_norm": 0.057534705847501755, "learning_rate": 0.0001453485755434538, "loss": 0.2903, "step": 17081 }, { "epoch": 1.3838302009073233, "grad_norm": 0.045549727976322174, "learning_rate": 0.00014534407489085916, "loss": 0.348, "step": 17082 }, { "epoch": 1.3839112119248218, "grad_norm": 0.045121192932128906, "learning_rate": 0.00014533957423826458, "loss": 0.299, "step": 17083 }, { "epoch": 1.3839922229423203, "grad_norm": 0.04222647100687027, "learning_rate": 0.00014533507358566994, "loss": 0.3126, "step": 17084 }, { "epoch": 1.3840732339598185, "grad_norm": 0.04713848978281021, "learning_rate": 0.0001453305729330753, "loss": 0.268, "step": 17085 }, { "epoch": 1.384154244977317, "grad_norm": 0.04560885950922966, "learning_rate": 0.00014532607228048068, "loss": 0.2943, "step": 17086 }, { "epoch": 1.3842352559948152, "grad_norm": 0.04805802181363106, "learning_rate": 0.00014532157162788604, "loss": 0.2794, "step": 17087 }, { "epoch": 1.3843162670123137, "grad_norm": 0.052336081862449646, "learning_rate": 0.0001453170709752914, "loss": 0.32, "step": 17088 }, { "epoch": 1.384397278029812, "grad_norm": 0.040716711431741714, "learning_rate": 0.00014531257032269682, "loss": 0.2258, "step": 17089 }, { "epoch": 1.3844782890473104, "grad_norm": 0.04950367286801338, "learning_rate": 0.00014530806967010218, "loss": 0.309, "step": 17090 }, { "epoch": 1.3845593000648089, "grad_norm": 0.045787155628204346, "learning_rate": 0.00014530356901750754, "loss": 0.279, "step": 17091 }, { "epoch": 1.3846403110823071, "grad_norm": 0.04720460996031761, "learning_rate": 0.00014529906836491293, "loss": 0.3106, "step": 17092 }, { "epoch": 1.3847213220998056, "grad_norm": 0.048547353595495224, "learning_rate": 0.00014529456771231829, "loss": 0.2946, "step": 17093 }, { "epoch": 1.384802333117304, "grad_norm": 0.057024236768484116, "learning_rate": 0.00014529006705972367, "loss": 0.2819, "step": 17094 }, { "epoch": 1.3848833441348023, "grad_norm": 0.04217173159122467, "learning_rate": 0.00014528556640712906, "loss": 0.3118, "step": 17095 }, { "epoch": 1.3849643551523008, "grad_norm": 0.04492117837071419, "learning_rate": 0.00014528106575453442, "loss": 0.2932, "step": 17096 }, { "epoch": 1.3850453661697992, "grad_norm": 0.04013582691550255, "learning_rate": 0.00014527656510193978, "loss": 0.2452, "step": 17097 }, { "epoch": 1.3851263771872975, "grad_norm": 0.04117108881473541, "learning_rate": 0.00014527206444934517, "loss": 0.2858, "step": 17098 }, { "epoch": 1.3852073882047957, "grad_norm": 0.05159113183617592, "learning_rate": 0.00014526756379675053, "loss": 0.3098, "step": 17099 }, { "epoch": 1.3852883992222942, "grad_norm": 0.047474876046180725, "learning_rate": 0.00014526306314415591, "loss": 0.3164, "step": 17100 }, { "epoch": 1.3853694102397927, "grad_norm": 0.05002215877175331, "learning_rate": 0.0001452585624915613, "loss": 0.3241, "step": 17101 }, { "epoch": 1.385450421257291, "grad_norm": 0.0520719438791275, "learning_rate": 0.00014525406183896666, "loss": 0.3029, "step": 17102 }, { "epoch": 1.3855314322747894, "grad_norm": 0.05380908399820328, "learning_rate": 0.00014524956118637202, "loss": 0.3065, "step": 17103 }, { "epoch": 1.3856124432922878, "grad_norm": 0.044111158698797226, "learning_rate": 0.0001452450605337774, "loss": 0.2745, "step": 17104 }, { "epoch": 1.385693454309786, "grad_norm": 0.04867509752511978, "learning_rate": 0.00014524055988118277, "loss": 0.2873, "step": 17105 }, { "epoch": 1.3857744653272845, "grad_norm": 0.042329221963882446, "learning_rate": 0.00014523605922858816, "loss": 0.2689, "step": 17106 }, { "epoch": 1.385855476344783, "grad_norm": 0.048590246587991714, "learning_rate": 0.00014523155857599354, "loss": 0.3431, "step": 17107 }, { "epoch": 1.3859364873622813, "grad_norm": 0.05173628404736519, "learning_rate": 0.0001452270579233989, "loss": 0.3024, "step": 17108 }, { "epoch": 1.3860174983797797, "grad_norm": 0.04990382865071297, "learning_rate": 0.00014522255727080426, "loss": 0.3145, "step": 17109 }, { "epoch": 1.386098509397278, "grad_norm": 0.04661140218377113, "learning_rate": 0.00014521805661820965, "loss": 0.3038, "step": 17110 }, { "epoch": 1.3861795204147764, "grad_norm": 0.050237026065588, "learning_rate": 0.000145213555965615, "loss": 0.3157, "step": 17111 }, { "epoch": 1.3862605314322747, "grad_norm": 0.0511772558093071, "learning_rate": 0.0001452090553130204, "loss": 0.2904, "step": 17112 }, { "epoch": 1.3863415424497731, "grad_norm": 0.05320873484015465, "learning_rate": 0.00014520455466042578, "loss": 0.2833, "step": 17113 }, { "epoch": 1.3864225534672716, "grad_norm": 0.05491953343153, "learning_rate": 0.00014520005400783114, "loss": 0.3432, "step": 17114 }, { "epoch": 1.3865035644847699, "grad_norm": 0.0481005497276783, "learning_rate": 0.0001451955533552365, "loss": 0.2956, "step": 17115 }, { "epoch": 1.3865845755022683, "grad_norm": 0.04878426343202591, "learning_rate": 0.0001451910527026419, "loss": 0.3071, "step": 17116 }, { "epoch": 1.3866655865197668, "grad_norm": 0.04745024815201759, "learning_rate": 0.00014518655205004725, "loss": 0.3111, "step": 17117 }, { "epoch": 1.386746597537265, "grad_norm": 0.044038161635398865, "learning_rate": 0.00014518205139745264, "loss": 0.2737, "step": 17118 }, { "epoch": 1.3868276085547635, "grad_norm": 0.04577605426311493, "learning_rate": 0.00014517755074485803, "loss": 0.2782, "step": 17119 }, { "epoch": 1.3869086195722617, "grad_norm": 0.052599694579839706, "learning_rate": 0.0001451730500922634, "loss": 0.3343, "step": 17120 }, { "epoch": 1.3869896305897602, "grad_norm": 0.045472402125597, "learning_rate": 0.00014516854943966875, "loss": 0.2855, "step": 17121 }, { "epoch": 1.3870706416072585, "grad_norm": 0.04352157935500145, "learning_rate": 0.00014516404878707413, "loss": 0.2857, "step": 17122 }, { "epoch": 1.387151652624757, "grad_norm": 0.04844208061695099, "learning_rate": 0.00014515954813447952, "loss": 0.2909, "step": 17123 }, { "epoch": 1.3872326636422554, "grad_norm": 0.041017644107341766, "learning_rate": 0.00014515504748188488, "loss": 0.301, "step": 17124 }, { "epoch": 1.3873136746597536, "grad_norm": 0.0501578226685524, "learning_rate": 0.00014515054682929027, "loss": 0.326, "step": 17125 }, { "epoch": 1.387394685677252, "grad_norm": 0.04330161586403847, "learning_rate": 0.00014514604617669563, "loss": 0.2822, "step": 17126 }, { "epoch": 1.3874756966947506, "grad_norm": 0.046240877360105515, "learning_rate": 0.000145141545524101, "loss": 0.2819, "step": 17127 }, { "epoch": 1.3875567077122488, "grad_norm": 0.04883384704589844, "learning_rate": 0.00014513704487150638, "loss": 0.3387, "step": 17128 }, { "epoch": 1.3876377187297473, "grad_norm": 0.04287239536643028, "learning_rate": 0.00014513254421891176, "loss": 0.2792, "step": 17129 }, { "epoch": 1.3877187297472457, "grad_norm": 0.04883171617984772, "learning_rate": 0.00014512804356631712, "loss": 0.3086, "step": 17130 }, { "epoch": 1.387799740764744, "grad_norm": 0.04150502011179924, "learning_rate": 0.0001451235429137225, "loss": 0.2497, "step": 17131 }, { "epoch": 1.3878807517822425, "grad_norm": 0.048590175807476044, "learning_rate": 0.00014511904226112787, "loss": 0.3063, "step": 17132 }, { "epoch": 1.3879617627997407, "grad_norm": 0.044636547565460205, "learning_rate": 0.00014511454160853323, "loss": 0.2559, "step": 17133 }, { "epoch": 1.3880427738172392, "grad_norm": 0.047890182584524155, "learning_rate": 0.00014511004095593862, "loss": 0.2809, "step": 17134 }, { "epoch": 1.3881237848347374, "grad_norm": 0.04968463256955147, "learning_rate": 0.000145105540303344, "loss": 0.2887, "step": 17135 }, { "epoch": 1.3882047958522359, "grad_norm": 0.04098277539014816, "learning_rate": 0.00014510103965074936, "loss": 0.2905, "step": 17136 }, { "epoch": 1.3882858068697344, "grad_norm": 0.0413254052400589, "learning_rate": 0.00014509653899815475, "loss": 0.2363, "step": 17137 }, { "epoch": 1.3883668178872326, "grad_norm": 0.047378312796354294, "learning_rate": 0.0001450920383455601, "loss": 0.2808, "step": 17138 }, { "epoch": 1.388447828904731, "grad_norm": 0.04449395462870598, "learning_rate": 0.00014508753769296547, "loss": 0.2763, "step": 17139 }, { "epoch": 1.3885288399222295, "grad_norm": 0.04626189172267914, "learning_rate": 0.00014508303704037086, "loss": 0.2849, "step": 17140 }, { "epoch": 1.3886098509397278, "grad_norm": 0.03956826031208038, "learning_rate": 0.00014507853638777625, "loss": 0.2612, "step": 17141 }, { "epoch": 1.3886908619572262, "grad_norm": 0.04964509606361389, "learning_rate": 0.0001450740357351816, "loss": 0.3347, "step": 17142 }, { "epoch": 1.3887718729747245, "grad_norm": 0.04381033033132553, "learning_rate": 0.000145069535082587, "loss": 0.2745, "step": 17143 }, { "epoch": 1.388852883992223, "grad_norm": 0.05008375644683838, "learning_rate": 0.00014506503442999235, "loss": 0.3153, "step": 17144 }, { "epoch": 1.3889338950097212, "grad_norm": 0.053938351571559906, "learning_rate": 0.0001450605337773977, "loss": 0.3015, "step": 17145 }, { "epoch": 1.3890149060272197, "grad_norm": 0.04867379739880562, "learning_rate": 0.0001450560331248031, "loss": 0.3107, "step": 17146 }, { "epoch": 1.3890959170447181, "grad_norm": 0.036837805062532425, "learning_rate": 0.0001450515324722085, "loss": 0.2407, "step": 17147 }, { "epoch": 1.3891769280622164, "grad_norm": 0.04842856153845787, "learning_rate": 0.00014504703181961385, "loss": 0.335, "step": 17148 }, { "epoch": 1.3892579390797148, "grad_norm": 0.050567056983709335, "learning_rate": 0.00014504253116701923, "loss": 0.3484, "step": 17149 }, { "epoch": 1.3893389500972133, "grad_norm": 0.04667452722787857, "learning_rate": 0.0001450380305144246, "loss": 0.3453, "step": 17150 }, { "epoch": 1.3894199611147116, "grad_norm": 0.04436580464243889, "learning_rate": 0.00014503352986182995, "loss": 0.3176, "step": 17151 }, { "epoch": 1.38950097213221, "grad_norm": 0.04287957027554512, "learning_rate": 0.00014502902920923537, "loss": 0.2645, "step": 17152 }, { "epoch": 1.3895819831497085, "grad_norm": 0.04573435336351395, "learning_rate": 0.00014502452855664073, "loss": 0.3097, "step": 17153 }, { "epoch": 1.3896629941672067, "grad_norm": 0.052313655614852905, "learning_rate": 0.0001450200279040461, "loss": 0.3208, "step": 17154 }, { "epoch": 1.3897440051847052, "grad_norm": 0.04121054708957672, "learning_rate": 0.00014501552725145148, "loss": 0.2768, "step": 17155 }, { "epoch": 1.3898250162022034, "grad_norm": 0.03237319737672806, "learning_rate": 0.00014501102659885684, "loss": 0.2415, "step": 17156 }, { "epoch": 1.389906027219702, "grad_norm": 0.04825165122747421, "learning_rate": 0.0001450065259462622, "loss": 0.3146, "step": 17157 }, { "epoch": 1.3899870382372002, "grad_norm": 0.0393325574696064, "learning_rate": 0.0001450020252936676, "loss": 0.3009, "step": 17158 }, { "epoch": 1.3900680492546986, "grad_norm": 0.04445468261837959, "learning_rate": 0.00014499752464107297, "loss": 0.2746, "step": 17159 }, { "epoch": 1.390149060272197, "grad_norm": 0.050849344581365585, "learning_rate": 0.00014499302398847833, "loss": 0.2809, "step": 17160 }, { "epoch": 1.3902300712896953, "grad_norm": 0.04268684610724449, "learning_rate": 0.00014498852333588372, "loss": 0.2856, "step": 17161 }, { "epoch": 1.3903110823071938, "grad_norm": 0.040725164115428925, "learning_rate": 0.00014498402268328908, "loss": 0.2781, "step": 17162 }, { "epoch": 1.3903920933246923, "grad_norm": 0.05028020963072777, "learning_rate": 0.00014497952203069444, "loss": 0.3093, "step": 17163 }, { "epoch": 1.3904731043421905, "grad_norm": 0.04498764127492905, "learning_rate": 0.00014497502137809985, "loss": 0.3043, "step": 17164 }, { "epoch": 1.390554115359689, "grad_norm": 0.0481903962790966, "learning_rate": 0.0001449705207255052, "loss": 0.2977, "step": 17165 }, { "epoch": 1.3906351263771872, "grad_norm": 0.055557865649461746, "learning_rate": 0.00014496602007291057, "loss": 0.3173, "step": 17166 }, { "epoch": 1.3907161373946857, "grad_norm": 0.04860248789191246, "learning_rate": 0.00014496151942031596, "loss": 0.3222, "step": 17167 }, { "epoch": 1.390797148412184, "grad_norm": 0.048381075263023376, "learning_rate": 0.00014495701876772132, "loss": 0.2739, "step": 17168 }, { "epoch": 1.3908781594296824, "grad_norm": 0.0555526427924633, "learning_rate": 0.00014495251811512668, "loss": 0.3217, "step": 17169 }, { "epoch": 1.3909591704471809, "grad_norm": 0.04361342266201973, "learning_rate": 0.0001449480174625321, "loss": 0.3157, "step": 17170 }, { "epoch": 1.3910401814646791, "grad_norm": 0.05354481190443039, "learning_rate": 0.00014494351680993745, "loss": 0.3008, "step": 17171 }, { "epoch": 1.3911211924821776, "grad_norm": 0.046501703560352325, "learning_rate": 0.00014493901615734281, "loss": 0.2904, "step": 17172 }, { "epoch": 1.391202203499676, "grad_norm": 0.0433221235871315, "learning_rate": 0.0001449345155047482, "loss": 0.2671, "step": 17173 }, { "epoch": 1.3912832145171743, "grad_norm": 0.04641888290643692, "learning_rate": 0.00014493001485215356, "loss": 0.2833, "step": 17174 }, { "epoch": 1.3913642255346728, "grad_norm": 0.047434285283088684, "learning_rate": 0.00014492551419955895, "loss": 0.2848, "step": 17175 }, { "epoch": 1.3914452365521712, "grad_norm": 0.04717138037085533, "learning_rate": 0.00014492101354696434, "loss": 0.3092, "step": 17176 }, { "epoch": 1.3915262475696695, "grad_norm": 0.050207510590553284, "learning_rate": 0.0001449165128943697, "loss": 0.2864, "step": 17177 }, { "epoch": 1.3916072585871677, "grad_norm": 0.047605060040950775, "learning_rate": 0.00014491201224177506, "loss": 0.3189, "step": 17178 }, { "epoch": 1.3916882696046662, "grad_norm": 0.04513358324766159, "learning_rate": 0.00014490751158918044, "loss": 0.3065, "step": 17179 }, { "epoch": 1.3917692806221647, "grad_norm": 0.04292263090610504, "learning_rate": 0.0001449030109365858, "loss": 0.3054, "step": 17180 }, { "epoch": 1.391850291639663, "grad_norm": 0.04038042575120926, "learning_rate": 0.0001448985102839912, "loss": 0.2982, "step": 17181 }, { "epoch": 1.3919313026571614, "grad_norm": 0.038698893040418625, "learning_rate": 0.00014489400963139658, "loss": 0.2906, "step": 17182 }, { "epoch": 1.3920123136746598, "grad_norm": 0.04013046249747276, "learning_rate": 0.00014488950897880194, "loss": 0.2854, "step": 17183 }, { "epoch": 1.392093324692158, "grad_norm": 0.0430646613240242, "learning_rate": 0.0001448850083262073, "loss": 0.3223, "step": 17184 }, { "epoch": 1.3921743357096565, "grad_norm": 0.04854704812169075, "learning_rate": 0.00014488050767361268, "loss": 0.3339, "step": 17185 }, { "epoch": 1.392255346727155, "grad_norm": 0.04367974027991295, "learning_rate": 0.00014487600702101804, "loss": 0.3059, "step": 17186 }, { "epoch": 1.3923363577446533, "grad_norm": 0.044980958104133606, "learning_rate": 0.00014487150636842343, "loss": 0.3084, "step": 17187 }, { "epoch": 1.3924173687621517, "grad_norm": 0.04090898111462593, "learning_rate": 0.00014486700571582882, "loss": 0.2755, "step": 17188 }, { "epoch": 1.39249837977965, "grad_norm": 0.04232384264469147, "learning_rate": 0.00014486250506323418, "loss": 0.3006, "step": 17189 }, { "epoch": 1.3925793907971484, "grad_norm": 0.051384977996349335, "learning_rate": 0.00014485800441063954, "loss": 0.342, "step": 17190 }, { "epoch": 1.3926604018146467, "grad_norm": 0.04421504586935043, "learning_rate": 0.00014485350375804493, "loss": 0.2529, "step": 17191 }, { "epoch": 1.3927414128321451, "grad_norm": 0.05006080120801926, "learning_rate": 0.00014484900310545029, "loss": 0.2788, "step": 17192 }, { "epoch": 1.3928224238496436, "grad_norm": 0.04934096336364746, "learning_rate": 0.00014484450245285567, "loss": 0.3027, "step": 17193 }, { "epoch": 1.3929034348671419, "grad_norm": 0.04648890718817711, "learning_rate": 0.00014484000180026106, "loss": 0.2849, "step": 17194 }, { "epoch": 1.3929844458846403, "grad_norm": 0.04589102044701576, "learning_rate": 0.00014483550114766642, "loss": 0.3191, "step": 17195 }, { "epoch": 1.3930654569021388, "grad_norm": 0.05279819667339325, "learning_rate": 0.00014483100049507178, "loss": 0.3106, "step": 17196 }, { "epoch": 1.393146467919637, "grad_norm": 0.04485386237502098, "learning_rate": 0.00014482649984247717, "loss": 0.2625, "step": 17197 }, { "epoch": 1.3932274789371355, "grad_norm": 0.05579359084367752, "learning_rate": 0.00014482199918988253, "loss": 0.3536, "step": 17198 }, { "epoch": 1.393308489954634, "grad_norm": 0.052236780524253845, "learning_rate": 0.00014481749853728791, "loss": 0.302, "step": 17199 }, { "epoch": 1.3933895009721322, "grad_norm": 0.04790930077433586, "learning_rate": 0.0001448129978846933, "loss": 0.2871, "step": 17200 }, { "epoch": 1.3934705119896305, "grad_norm": 0.05095363408327103, "learning_rate": 0.00014480849723209866, "loss": 0.2967, "step": 17201 }, { "epoch": 1.393551523007129, "grad_norm": 0.04160373657941818, "learning_rate": 0.00014480399657950402, "loss": 0.2765, "step": 17202 }, { "epoch": 1.3936325340246274, "grad_norm": 0.058393362909555435, "learning_rate": 0.0001447994959269094, "loss": 0.3585, "step": 17203 }, { "epoch": 1.3937135450421256, "grad_norm": 0.049784865230321884, "learning_rate": 0.0001447949952743148, "loss": 0.2966, "step": 17204 }, { "epoch": 1.393794556059624, "grad_norm": 0.04413321241736412, "learning_rate": 0.00014479049462172016, "loss": 0.2658, "step": 17205 }, { "epoch": 1.3938755670771226, "grad_norm": 0.05727369338274002, "learning_rate": 0.00014478599396912554, "loss": 0.2876, "step": 17206 }, { "epoch": 1.3939565780946208, "grad_norm": 0.05408977344632149, "learning_rate": 0.0001447814933165309, "loss": 0.294, "step": 17207 }, { "epoch": 1.3940375891121193, "grad_norm": 0.056550346314907074, "learning_rate": 0.00014477699266393626, "loss": 0.3085, "step": 17208 }, { "epoch": 1.3941186001296177, "grad_norm": 0.05060124769806862, "learning_rate": 0.00014477249201134165, "loss": 0.297, "step": 17209 }, { "epoch": 1.394199611147116, "grad_norm": 0.04698517918586731, "learning_rate": 0.00014476799135874704, "loss": 0.3272, "step": 17210 }, { "epoch": 1.3942806221646145, "grad_norm": 0.0382196418941021, "learning_rate": 0.0001447634907061524, "loss": 0.2702, "step": 17211 }, { "epoch": 1.3943616331821127, "grad_norm": 0.05150233581662178, "learning_rate": 0.00014475899005355779, "loss": 0.3431, "step": 17212 }, { "epoch": 1.3944426441996112, "grad_norm": 0.05292709171772003, "learning_rate": 0.00014475448940096315, "loss": 0.3246, "step": 17213 }, { "epoch": 1.3945236552171094, "grad_norm": 0.04780266806483269, "learning_rate": 0.0001447499887483685, "loss": 0.3281, "step": 17214 }, { "epoch": 1.3946046662346079, "grad_norm": 0.04817802086472511, "learning_rate": 0.0001447454880957739, "loss": 0.3149, "step": 17215 }, { "epoch": 1.3946856772521063, "grad_norm": 0.04268627613782883, "learning_rate": 0.00014474098744317928, "loss": 0.2797, "step": 17216 }, { "epoch": 1.3947666882696046, "grad_norm": 0.05220309644937515, "learning_rate": 0.00014473648679058464, "loss": 0.3197, "step": 17217 }, { "epoch": 1.394847699287103, "grad_norm": 0.0412866473197937, "learning_rate": 0.00014473198613799003, "loss": 0.2593, "step": 17218 }, { "epoch": 1.3949287103046015, "grad_norm": 0.042804330587387085, "learning_rate": 0.0001447274854853954, "loss": 0.3024, "step": 17219 }, { "epoch": 1.3950097213220998, "grad_norm": 0.049674104899168015, "learning_rate": 0.00014472298483280075, "loss": 0.2912, "step": 17220 }, { "epoch": 1.3950907323395982, "grad_norm": 0.04464632272720337, "learning_rate": 0.00014471848418020613, "loss": 0.29, "step": 17221 }, { "epoch": 1.3951717433570965, "grad_norm": 0.04729987680912018, "learning_rate": 0.00014471398352761152, "loss": 0.3131, "step": 17222 }, { "epoch": 1.395252754374595, "grad_norm": 0.04054408147931099, "learning_rate": 0.00014470948287501688, "loss": 0.2598, "step": 17223 }, { "epoch": 1.3953337653920932, "grad_norm": 0.04292924329638481, "learning_rate": 0.00014470498222242227, "loss": 0.2883, "step": 17224 }, { "epoch": 1.3954147764095917, "grad_norm": 0.05302182585000992, "learning_rate": 0.00014470048156982763, "loss": 0.3552, "step": 17225 }, { "epoch": 1.3954957874270901, "grad_norm": 0.05824055150151253, "learning_rate": 0.000144695980917233, "loss": 0.3049, "step": 17226 }, { "epoch": 1.3955767984445884, "grad_norm": 0.04591545835137367, "learning_rate": 0.0001446914802646384, "loss": 0.3713, "step": 17227 }, { "epoch": 1.3956578094620868, "grad_norm": 0.04642440751194954, "learning_rate": 0.00014468697961204376, "loss": 0.2973, "step": 17228 }, { "epoch": 1.3957388204795853, "grad_norm": 0.050178322941064835, "learning_rate": 0.00014468247895944912, "loss": 0.3178, "step": 17229 }, { "epoch": 1.3958198314970836, "grad_norm": 0.04168196767568588, "learning_rate": 0.0001446779783068545, "loss": 0.2828, "step": 17230 }, { "epoch": 1.395900842514582, "grad_norm": 0.044799406081438065, "learning_rate": 0.00014467347765425987, "loss": 0.2779, "step": 17231 }, { "epoch": 1.3959818535320805, "grad_norm": 0.04037578031420708, "learning_rate": 0.00014466897700166523, "loss": 0.27, "step": 17232 }, { "epoch": 1.3960628645495787, "grad_norm": 0.0469634085893631, "learning_rate": 0.00014466447634907064, "loss": 0.3166, "step": 17233 }, { "epoch": 1.3961438755670772, "grad_norm": 0.04802028089761734, "learning_rate": 0.000144659975696476, "loss": 0.2894, "step": 17234 }, { "epoch": 1.3962248865845754, "grad_norm": 0.05452967435121536, "learning_rate": 0.00014465547504388136, "loss": 0.2915, "step": 17235 }, { "epoch": 1.396305897602074, "grad_norm": 0.05245371535420418, "learning_rate": 0.00014465097439128675, "loss": 0.3184, "step": 17236 }, { "epoch": 1.3963869086195722, "grad_norm": 0.04539962857961655, "learning_rate": 0.0001446464737386921, "loss": 0.2804, "step": 17237 }, { "epoch": 1.3964679196370706, "grad_norm": 0.04864729195833206, "learning_rate": 0.00014464197308609747, "loss": 0.3261, "step": 17238 }, { "epoch": 1.396548930654569, "grad_norm": 0.042114321142435074, "learning_rate": 0.00014463747243350289, "loss": 0.2805, "step": 17239 }, { "epoch": 1.3966299416720673, "grad_norm": 0.045672401785850525, "learning_rate": 0.00014463297178090825, "loss": 0.2741, "step": 17240 }, { "epoch": 1.3967109526895658, "grad_norm": 0.048088978976011276, "learning_rate": 0.0001446284711283136, "loss": 0.3332, "step": 17241 }, { "epoch": 1.3967919637070643, "grad_norm": 0.04908011108636856, "learning_rate": 0.000144623970475719, "loss": 0.3323, "step": 17242 }, { "epoch": 1.3968729747245625, "grad_norm": 0.041393984109163284, "learning_rate": 0.00014461946982312435, "loss": 0.304, "step": 17243 }, { "epoch": 1.396953985742061, "grad_norm": 0.041824113577604294, "learning_rate": 0.0001446149691705297, "loss": 0.2911, "step": 17244 }, { "epoch": 1.3970349967595592, "grad_norm": 0.04793976619839668, "learning_rate": 0.00014461046851793513, "loss": 0.3734, "step": 17245 }, { "epoch": 1.3971160077770577, "grad_norm": 0.04303743317723274, "learning_rate": 0.0001446059678653405, "loss": 0.2384, "step": 17246 }, { "epoch": 1.397197018794556, "grad_norm": 0.04717111960053444, "learning_rate": 0.00014460146721274585, "loss": 0.3152, "step": 17247 }, { "epoch": 1.3972780298120544, "grad_norm": 0.040281910449266434, "learning_rate": 0.00014459696656015123, "loss": 0.2517, "step": 17248 }, { "epoch": 1.3973590408295529, "grad_norm": 0.056516893208026886, "learning_rate": 0.0001445924659075566, "loss": 0.3499, "step": 17249 }, { "epoch": 1.3974400518470511, "grad_norm": 0.04654380679130554, "learning_rate": 0.00014458796525496195, "loss": 0.3169, "step": 17250 }, { "epoch": 1.3975210628645496, "grad_norm": 0.03778151795268059, "learning_rate": 0.00014458346460236737, "loss": 0.248, "step": 17251 }, { "epoch": 1.397602073882048, "grad_norm": 0.040179815143346786, "learning_rate": 0.00014457896394977273, "loss": 0.2426, "step": 17252 }, { "epoch": 1.3976830848995463, "grad_norm": 0.0417088121175766, "learning_rate": 0.0001445744632971781, "loss": 0.2871, "step": 17253 }, { "epoch": 1.3977640959170448, "grad_norm": 0.04187750443816185, "learning_rate": 0.00014456996264458348, "loss": 0.2724, "step": 17254 }, { "epoch": 1.3978451069345432, "grad_norm": 0.04558548703789711, "learning_rate": 0.00014456546199198884, "loss": 0.2547, "step": 17255 }, { "epoch": 1.3979261179520415, "grad_norm": 0.04336896911263466, "learning_rate": 0.00014456096133939422, "loss": 0.2819, "step": 17256 }, { "epoch": 1.39800712896954, "grad_norm": 0.04509134590625763, "learning_rate": 0.0001445564606867996, "loss": 0.2874, "step": 17257 }, { "epoch": 1.3980881399870382, "grad_norm": 0.05202138051390648, "learning_rate": 0.00014455196003420497, "loss": 0.3186, "step": 17258 }, { "epoch": 1.3981691510045366, "grad_norm": 0.04768068343400955, "learning_rate": 0.00014454745938161033, "loss": 0.2865, "step": 17259 }, { "epoch": 1.398250162022035, "grad_norm": 0.050039276480674744, "learning_rate": 0.00014454295872901572, "loss": 0.3219, "step": 17260 }, { "epoch": 1.3983311730395334, "grad_norm": 0.04390815645456314, "learning_rate": 0.00014453845807642108, "loss": 0.2912, "step": 17261 }, { "epoch": 1.3984121840570318, "grad_norm": 0.047734200954437256, "learning_rate": 0.00014453395742382647, "loss": 0.2992, "step": 17262 }, { "epoch": 1.39849319507453, "grad_norm": 0.043062061071395874, "learning_rate": 0.00014452945677123185, "loss": 0.2591, "step": 17263 }, { "epoch": 1.3985742060920285, "grad_norm": 0.057991400361061096, "learning_rate": 0.0001445249561186372, "loss": 0.3354, "step": 17264 }, { "epoch": 1.398655217109527, "grad_norm": 0.05490287020802498, "learning_rate": 0.00014452045546604257, "loss": 0.2914, "step": 17265 }, { "epoch": 1.3987362281270252, "grad_norm": 0.04641677439212799, "learning_rate": 0.00014451595481344796, "loss": 0.2985, "step": 17266 }, { "epoch": 1.3988172391445237, "grad_norm": 0.043556440621614456, "learning_rate": 0.00014451145416085332, "loss": 0.2545, "step": 17267 }, { "epoch": 1.398898250162022, "grad_norm": 0.050701990723609924, "learning_rate": 0.0001445069535082587, "loss": 0.3046, "step": 17268 }, { "epoch": 1.3989792611795204, "grad_norm": 0.04276740923523903, "learning_rate": 0.0001445024528556641, "loss": 0.2597, "step": 17269 }, { "epoch": 1.3990602721970187, "grad_norm": 0.050360411405563354, "learning_rate": 0.00014449795220306945, "loss": 0.298, "step": 17270 }, { "epoch": 1.3991412832145171, "grad_norm": 0.050647344440221786, "learning_rate": 0.00014449345155047481, "loss": 0.29, "step": 17271 }, { "epoch": 1.3992222942320156, "grad_norm": 0.04819323495030403, "learning_rate": 0.0001444889508978802, "loss": 0.2892, "step": 17272 }, { "epoch": 1.3993033052495139, "grad_norm": 0.046121057122945786, "learning_rate": 0.00014448445024528556, "loss": 0.2957, "step": 17273 }, { "epoch": 1.3993843162670123, "grad_norm": 0.042291220277547836, "learning_rate": 0.00014447994959269095, "loss": 0.2936, "step": 17274 }, { "epoch": 1.3994653272845108, "grad_norm": 0.04065663740038872, "learning_rate": 0.00014447544894009634, "loss": 0.2683, "step": 17275 }, { "epoch": 1.399546338302009, "grad_norm": 0.04960739612579346, "learning_rate": 0.0001444709482875017, "loss": 0.3218, "step": 17276 }, { "epoch": 1.3996273493195075, "grad_norm": 0.0449470616877079, "learning_rate": 0.00014446644763490706, "loss": 0.3225, "step": 17277 }, { "epoch": 1.399708360337006, "grad_norm": 0.038688644766807556, "learning_rate": 0.00014446194698231244, "loss": 0.2883, "step": 17278 }, { "epoch": 1.3997893713545042, "grad_norm": 0.04773914813995361, "learning_rate": 0.00014445744632971783, "loss": 0.2974, "step": 17279 }, { "epoch": 1.3998703823720027, "grad_norm": 0.04623384773731232, "learning_rate": 0.0001444529456771232, "loss": 0.2791, "step": 17280 }, { "epoch": 1.399951393389501, "grad_norm": 0.05427708104252815, "learning_rate": 0.00014444844502452858, "loss": 0.3503, "step": 17281 }, { "epoch": 1.4000324044069994, "grad_norm": 0.04826388135552406, "learning_rate": 0.00014444394437193394, "loss": 0.2907, "step": 17282 }, { "epoch": 1.4001134154244976, "grad_norm": 0.04934573173522949, "learning_rate": 0.0001444394437193393, "loss": 0.3046, "step": 17283 }, { "epoch": 1.400194426441996, "grad_norm": 0.053253334015607834, "learning_rate": 0.00014443494306674468, "loss": 0.2954, "step": 17284 }, { "epoch": 1.4002754374594946, "grad_norm": 0.04720380902290344, "learning_rate": 0.00014443044241415007, "loss": 0.2678, "step": 17285 }, { "epoch": 1.4003564484769928, "grad_norm": 0.05413306504487991, "learning_rate": 0.00014442594176155543, "loss": 0.3269, "step": 17286 }, { "epoch": 1.4004374594944913, "grad_norm": 0.04777345061302185, "learning_rate": 0.00014442144110896082, "loss": 0.2964, "step": 17287 }, { "epoch": 1.4005184705119897, "grad_norm": 0.04929986596107483, "learning_rate": 0.00014441694045636618, "loss": 0.3227, "step": 17288 }, { "epoch": 1.400599481529488, "grad_norm": 0.05416525527834892, "learning_rate": 0.00014441243980377154, "loss": 0.342, "step": 17289 }, { "epoch": 1.4006804925469865, "grad_norm": 0.04756501317024231, "learning_rate": 0.00014440793915117693, "loss": 0.2701, "step": 17290 }, { "epoch": 1.4007615035644847, "grad_norm": 0.05230550467967987, "learning_rate": 0.0001444034384985823, "loss": 0.3122, "step": 17291 }, { "epoch": 1.4008425145819832, "grad_norm": 0.04841061308979988, "learning_rate": 0.00014439893784598767, "loss": 0.283, "step": 17292 }, { "epoch": 1.4009235255994814, "grad_norm": 0.054997723549604416, "learning_rate": 0.00014439443719339306, "loss": 0.2914, "step": 17293 }, { "epoch": 1.4010045366169799, "grad_norm": 0.04789207503199577, "learning_rate": 0.00014438993654079842, "loss": 0.3314, "step": 17294 }, { "epoch": 1.4010855476344783, "grad_norm": 0.04890673980116844, "learning_rate": 0.00014438543588820378, "loss": 0.2881, "step": 17295 }, { "epoch": 1.4011665586519766, "grad_norm": 0.052223674952983856, "learning_rate": 0.00014438093523560917, "loss": 0.288, "step": 17296 }, { "epoch": 1.401247569669475, "grad_norm": 0.04447759687900543, "learning_rate": 0.00014437643458301455, "loss": 0.3056, "step": 17297 }, { "epoch": 1.4013285806869735, "grad_norm": 0.05134432390332222, "learning_rate": 0.00014437193393041991, "loss": 0.3211, "step": 17298 }, { "epoch": 1.4014095917044718, "grad_norm": 0.04146159812808037, "learning_rate": 0.0001443674332778253, "loss": 0.2842, "step": 17299 }, { "epoch": 1.4014906027219702, "grad_norm": 0.049241818487644196, "learning_rate": 0.00014436293262523066, "loss": 0.2775, "step": 17300 }, { "epoch": 1.4015716137394687, "grad_norm": 0.044086962938308716, "learning_rate": 0.00014435843197263602, "loss": 0.2868, "step": 17301 }, { "epoch": 1.401652624756967, "grad_norm": 0.04333024471998215, "learning_rate": 0.0001443539313200414, "loss": 0.2454, "step": 17302 }, { "epoch": 1.4017336357744652, "grad_norm": 0.04945259913802147, "learning_rate": 0.0001443494306674468, "loss": 0.3419, "step": 17303 }, { "epoch": 1.4018146467919637, "grad_norm": 0.04579545557498932, "learning_rate": 0.00014434493001485216, "loss": 0.2969, "step": 17304 }, { "epoch": 1.4018956578094621, "grad_norm": 0.04276122525334358, "learning_rate": 0.00014434042936225754, "loss": 0.2742, "step": 17305 }, { "epoch": 1.4019766688269604, "grad_norm": 0.04025482386350632, "learning_rate": 0.0001443359287096629, "loss": 0.2863, "step": 17306 }, { "epoch": 1.4020576798444588, "grad_norm": 0.04628659412264824, "learning_rate": 0.00014433142805706826, "loss": 0.3348, "step": 17307 }, { "epoch": 1.4021386908619573, "grad_norm": 0.04473595693707466, "learning_rate": 0.00014432692740447368, "loss": 0.334, "step": 17308 }, { "epoch": 1.4022197018794555, "grad_norm": 0.04520515352487564, "learning_rate": 0.00014432242675187904, "loss": 0.2613, "step": 17309 }, { "epoch": 1.402300712896954, "grad_norm": 0.04926326498389244, "learning_rate": 0.0001443179260992844, "loss": 0.2874, "step": 17310 }, { "epoch": 1.4023817239144525, "grad_norm": 0.050219591706991196, "learning_rate": 0.00014431342544668979, "loss": 0.3218, "step": 17311 }, { "epoch": 1.4024627349319507, "grad_norm": 0.04277326911687851, "learning_rate": 0.00014430892479409515, "loss": 0.2502, "step": 17312 }, { "epoch": 1.4025437459494492, "grad_norm": 0.04090512916445732, "learning_rate": 0.0001443044241415005, "loss": 0.2855, "step": 17313 }, { "epoch": 1.4026247569669474, "grad_norm": 0.03912261128425598, "learning_rate": 0.00014429992348890592, "loss": 0.2717, "step": 17314 }, { "epoch": 1.402705767984446, "grad_norm": 0.04668138921260834, "learning_rate": 0.00014429542283631128, "loss": 0.2876, "step": 17315 }, { "epoch": 1.4027867790019442, "grad_norm": 0.04623110964894295, "learning_rate": 0.00014429092218371664, "loss": 0.3286, "step": 17316 }, { "epoch": 1.4028677900194426, "grad_norm": 0.052278418093919754, "learning_rate": 0.00014428642153112203, "loss": 0.3155, "step": 17317 }, { "epoch": 1.402948801036941, "grad_norm": 0.04533730074763298, "learning_rate": 0.0001442819208785274, "loss": 0.279, "step": 17318 }, { "epoch": 1.4030298120544393, "grad_norm": 0.047513846307992935, "learning_rate": 0.00014427742022593275, "loss": 0.2908, "step": 17319 }, { "epoch": 1.4031108230719378, "grad_norm": 0.04029323533177376, "learning_rate": 0.00014427291957333816, "loss": 0.2659, "step": 17320 }, { "epoch": 1.4031918340894363, "grad_norm": 0.041442520916461945, "learning_rate": 0.00014426841892074352, "loss": 0.2502, "step": 17321 }, { "epoch": 1.4032728451069345, "grad_norm": 0.039636142551898956, "learning_rate": 0.00014426391826814888, "loss": 0.2842, "step": 17322 }, { "epoch": 1.403353856124433, "grad_norm": 0.04423043504357338, "learning_rate": 0.00014425941761555427, "loss": 0.2613, "step": 17323 }, { "epoch": 1.4034348671419314, "grad_norm": 0.04182843491435051, "learning_rate": 0.00014425491696295963, "loss": 0.2998, "step": 17324 }, { "epoch": 1.4035158781594297, "grad_norm": 0.043021295219659805, "learning_rate": 0.000144250416310365, "loss": 0.2717, "step": 17325 }, { "epoch": 1.403596889176928, "grad_norm": 0.060046643018722534, "learning_rate": 0.0001442459156577704, "loss": 0.313, "step": 17326 }, { "epoch": 1.4036779001944264, "grad_norm": 0.04655013605952263, "learning_rate": 0.00014424141500517576, "loss": 0.2629, "step": 17327 }, { "epoch": 1.4037589112119249, "grad_norm": 0.04855664446949959, "learning_rate": 0.00014423691435258112, "loss": 0.2973, "step": 17328 }, { "epoch": 1.403839922229423, "grad_norm": 0.044373802840709686, "learning_rate": 0.0001442324136999865, "loss": 0.2731, "step": 17329 }, { "epoch": 1.4039209332469216, "grad_norm": 0.045156631618738174, "learning_rate": 0.00014422791304739187, "loss": 0.3102, "step": 17330 }, { "epoch": 1.40400194426442, "grad_norm": 0.048915185034275055, "learning_rate": 0.00014422341239479723, "loss": 0.2817, "step": 17331 }, { "epoch": 1.4040829552819183, "grad_norm": 0.05194459110498428, "learning_rate": 0.00014421891174220264, "loss": 0.2527, "step": 17332 }, { "epoch": 1.4041639662994168, "grad_norm": 0.05551757663488388, "learning_rate": 0.000144214411089608, "loss": 0.3105, "step": 17333 }, { "epoch": 1.4042449773169152, "grad_norm": 0.05318770557641983, "learning_rate": 0.00014420991043701336, "loss": 0.282, "step": 17334 }, { "epoch": 1.4043259883344135, "grad_norm": 0.05146007612347603, "learning_rate": 0.00014420540978441875, "loss": 0.3012, "step": 17335 }, { "epoch": 1.404406999351912, "grad_norm": 0.04602733254432678, "learning_rate": 0.0001442009091318241, "loss": 0.2793, "step": 17336 }, { "epoch": 1.4044880103694102, "grad_norm": 0.048853326588869095, "learning_rate": 0.0001441964084792295, "loss": 0.274, "step": 17337 }, { "epoch": 1.4045690213869086, "grad_norm": 0.0575927197933197, "learning_rate": 0.00014419190782663489, "loss": 0.3131, "step": 17338 }, { "epoch": 1.404650032404407, "grad_norm": 0.04368162155151367, "learning_rate": 0.00014418740717404025, "loss": 0.2708, "step": 17339 }, { "epoch": 1.4047310434219054, "grad_norm": 0.050481297075748444, "learning_rate": 0.0001441829065214456, "loss": 0.3178, "step": 17340 }, { "epoch": 1.4048120544394038, "grad_norm": 0.05853893235325813, "learning_rate": 0.000144178405868851, "loss": 0.2754, "step": 17341 }, { "epoch": 1.404893065456902, "grad_norm": 0.04734257981181145, "learning_rate": 0.00014417390521625635, "loss": 0.2655, "step": 17342 }, { "epoch": 1.4049740764744005, "grad_norm": 0.04163322597742081, "learning_rate": 0.00014416940456366174, "loss": 0.2701, "step": 17343 }, { "epoch": 1.405055087491899, "grad_norm": 0.049195900559425354, "learning_rate": 0.00014416490391106713, "loss": 0.2844, "step": 17344 }, { "epoch": 1.4051360985093972, "grad_norm": 0.04943544417619705, "learning_rate": 0.0001441604032584725, "loss": 0.3052, "step": 17345 }, { "epoch": 1.4052171095268957, "grad_norm": 0.03431745246052742, "learning_rate": 0.00014415590260587785, "loss": 0.2339, "step": 17346 }, { "epoch": 1.405298120544394, "grad_norm": 0.0482507050037384, "learning_rate": 0.00014415140195328324, "loss": 0.3311, "step": 17347 }, { "epoch": 1.4053791315618924, "grad_norm": 0.05462920293211937, "learning_rate": 0.0001441469013006886, "loss": 0.2867, "step": 17348 }, { "epoch": 1.4054601425793907, "grad_norm": 0.04644763469696045, "learning_rate": 0.00014414240064809398, "loss": 0.2695, "step": 17349 }, { "epoch": 1.4055411535968891, "grad_norm": 0.046674180775880814, "learning_rate": 0.00014413789999549937, "loss": 0.2702, "step": 17350 }, { "epoch": 1.4056221646143876, "grad_norm": 0.04269137978553772, "learning_rate": 0.00014413339934290473, "loss": 0.2963, "step": 17351 }, { "epoch": 1.4057031756318858, "grad_norm": 0.05493892356753349, "learning_rate": 0.0001441288986903101, "loss": 0.2937, "step": 17352 }, { "epoch": 1.4057841866493843, "grad_norm": 0.04131404310464859, "learning_rate": 0.00014412439803771548, "loss": 0.2734, "step": 17353 }, { "epoch": 1.4058651976668828, "grad_norm": 0.049556102603673935, "learning_rate": 0.00014411989738512084, "loss": 0.2928, "step": 17354 }, { "epoch": 1.405946208684381, "grad_norm": 0.05011004954576492, "learning_rate": 0.00014411539673252622, "loss": 0.3133, "step": 17355 }, { "epoch": 1.4060272197018795, "grad_norm": 0.044947218149900436, "learning_rate": 0.0001441108960799316, "loss": 0.279, "step": 17356 }, { "epoch": 1.406108230719378, "grad_norm": 0.04014430567622185, "learning_rate": 0.00014410639542733697, "loss": 0.2732, "step": 17357 }, { "epoch": 1.4061892417368762, "grad_norm": 0.044052235782146454, "learning_rate": 0.00014410189477474233, "loss": 0.2701, "step": 17358 }, { "epoch": 1.4062702527543747, "grad_norm": 0.05131961405277252, "learning_rate": 0.00014409739412214772, "loss": 0.2827, "step": 17359 }, { "epoch": 1.406351263771873, "grad_norm": 0.048617325723171234, "learning_rate": 0.0001440928934695531, "loss": 0.2951, "step": 17360 }, { "epoch": 1.4064322747893714, "grad_norm": 0.04641023278236389, "learning_rate": 0.00014408839281695847, "loss": 0.2634, "step": 17361 }, { "epoch": 1.4065132858068696, "grad_norm": 0.04850432276725769, "learning_rate": 0.00014408389216436385, "loss": 0.3197, "step": 17362 }, { "epoch": 1.406594296824368, "grad_norm": 0.052125390619039536, "learning_rate": 0.0001440793915117692, "loss": 0.318, "step": 17363 }, { "epoch": 1.4066753078418666, "grad_norm": 0.05008331313729286, "learning_rate": 0.00014407489085917457, "loss": 0.2845, "step": 17364 }, { "epoch": 1.4067563188593648, "grad_norm": 0.043981775641441345, "learning_rate": 0.00014407039020657996, "loss": 0.2716, "step": 17365 }, { "epoch": 1.4068373298768633, "grad_norm": 0.04916222020983696, "learning_rate": 0.00014406588955398535, "loss": 0.3511, "step": 17366 }, { "epoch": 1.4069183408943617, "grad_norm": 0.05921647697687149, "learning_rate": 0.0001440613889013907, "loss": 0.3094, "step": 17367 }, { "epoch": 1.40699935191186, "grad_norm": 0.059582922607660294, "learning_rate": 0.0001440568882487961, "loss": 0.3039, "step": 17368 }, { "epoch": 1.4070803629293585, "grad_norm": 0.046799056231975555, "learning_rate": 0.00014405238759620145, "loss": 0.2956, "step": 17369 }, { "epoch": 1.4071613739468567, "grad_norm": 0.04210817441344261, "learning_rate": 0.00014404788694360681, "loss": 0.282, "step": 17370 }, { "epoch": 1.4072423849643552, "grad_norm": 0.047428447753190994, "learning_rate": 0.0001440433862910122, "loss": 0.3341, "step": 17371 }, { "epoch": 1.4073233959818534, "grad_norm": 0.04631941020488739, "learning_rate": 0.0001440388856384176, "loss": 0.2893, "step": 17372 }, { "epoch": 1.4074044069993519, "grad_norm": 0.04561125487089157, "learning_rate": 0.00014403438498582295, "loss": 0.2995, "step": 17373 }, { "epoch": 1.4074854180168503, "grad_norm": 0.041558220982551575, "learning_rate": 0.00014402988433322834, "loss": 0.2435, "step": 17374 }, { "epoch": 1.4075664290343486, "grad_norm": 0.04296829551458359, "learning_rate": 0.0001440253836806337, "loss": 0.2909, "step": 17375 }, { "epoch": 1.407647440051847, "grad_norm": 0.04158598557114601, "learning_rate": 0.00014402088302803906, "loss": 0.2772, "step": 17376 }, { "epoch": 1.4077284510693455, "grad_norm": 0.048123203217983246, "learning_rate": 0.00014401638237544444, "loss": 0.317, "step": 17377 }, { "epoch": 1.4078094620868438, "grad_norm": 0.046641673892736435, "learning_rate": 0.00014401188172284983, "loss": 0.2621, "step": 17378 }, { "epoch": 1.4078904731043422, "grad_norm": 0.04805005341768265, "learning_rate": 0.0001440073810702552, "loss": 0.2927, "step": 17379 }, { "epoch": 1.4079714841218407, "grad_norm": 0.042665332555770874, "learning_rate": 0.00014400288041766058, "loss": 0.2765, "step": 17380 }, { "epoch": 1.408052495139339, "grad_norm": 0.040832191705703735, "learning_rate": 0.00014399837976506594, "loss": 0.2718, "step": 17381 }, { "epoch": 1.4081335061568374, "grad_norm": 0.052016451954841614, "learning_rate": 0.0001439938791124713, "loss": 0.3092, "step": 17382 }, { "epoch": 1.4082145171743357, "grad_norm": 0.051831454038619995, "learning_rate": 0.00014398937845987668, "loss": 0.2816, "step": 17383 }, { "epoch": 1.4082955281918341, "grad_norm": 0.05814917013049126, "learning_rate": 0.00014398487780728207, "loss": 0.3025, "step": 17384 }, { "epoch": 1.4083765392093324, "grad_norm": 0.044517673552036285, "learning_rate": 0.00014398037715468743, "loss": 0.302, "step": 17385 }, { "epoch": 1.4084575502268308, "grad_norm": 0.04865877702832222, "learning_rate": 0.00014397587650209282, "loss": 0.2901, "step": 17386 }, { "epoch": 1.4085385612443293, "grad_norm": 0.04901030287146568, "learning_rate": 0.00014397137584949818, "loss": 0.2944, "step": 17387 }, { "epoch": 1.4086195722618275, "grad_norm": 0.049782563000917435, "learning_rate": 0.00014396687519690354, "loss": 0.3291, "step": 17388 }, { "epoch": 1.408700583279326, "grad_norm": 0.04484162852168083, "learning_rate": 0.00014396237454430895, "loss": 0.2685, "step": 17389 }, { "epoch": 1.4087815942968245, "grad_norm": 0.044998399913311005, "learning_rate": 0.00014395787389171431, "loss": 0.3116, "step": 17390 }, { "epoch": 1.4088626053143227, "grad_norm": 0.05644839629530907, "learning_rate": 0.00014395337323911967, "loss": 0.3065, "step": 17391 }, { "epoch": 1.4089436163318212, "grad_norm": 0.04989921674132347, "learning_rate": 0.00014394887258652506, "loss": 0.305, "step": 17392 }, { "epoch": 1.4090246273493194, "grad_norm": 0.04435759410262108, "learning_rate": 0.00014394437193393042, "loss": 0.2975, "step": 17393 }, { "epoch": 1.409105638366818, "grad_norm": 0.03662458434700966, "learning_rate": 0.00014393987128133578, "loss": 0.237, "step": 17394 }, { "epoch": 1.4091866493843161, "grad_norm": 0.04757731780409813, "learning_rate": 0.0001439353706287412, "loss": 0.2674, "step": 17395 }, { "epoch": 1.4092676604018146, "grad_norm": 0.04408986121416092, "learning_rate": 0.00014393086997614656, "loss": 0.2879, "step": 17396 }, { "epoch": 1.409348671419313, "grad_norm": 0.04086359590291977, "learning_rate": 0.00014392636932355192, "loss": 0.2643, "step": 17397 }, { "epoch": 1.4094296824368113, "grad_norm": 0.04684007167816162, "learning_rate": 0.0001439218686709573, "loss": 0.3393, "step": 17398 }, { "epoch": 1.4095106934543098, "grad_norm": 0.05799132585525513, "learning_rate": 0.00014391736801836266, "loss": 0.3512, "step": 17399 }, { "epoch": 1.4095917044718083, "grad_norm": 0.046153489500284195, "learning_rate": 0.00014391286736576802, "loss": 0.2964, "step": 17400 }, { "epoch": 1.4096727154893065, "grad_norm": 0.041574712842702866, "learning_rate": 0.00014390836671317344, "loss": 0.306, "step": 17401 }, { "epoch": 1.409753726506805, "grad_norm": 0.046612098813056946, "learning_rate": 0.0001439038660605788, "loss": 0.3065, "step": 17402 }, { "epoch": 1.4098347375243034, "grad_norm": 0.056140147149562836, "learning_rate": 0.00014389936540798416, "loss": 0.3139, "step": 17403 }, { "epoch": 1.4099157485418017, "grad_norm": 0.04897418990731239, "learning_rate": 0.00014389486475538954, "loss": 0.3261, "step": 17404 }, { "epoch": 1.4099967595593, "grad_norm": 0.046919066458940506, "learning_rate": 0.0001438903641027949, "loss": 0.2988, "step": 17405 }, { "epoch": 1.4100777705767984, "grad_norm": 0.044254470616579056, "learning_rate": 0.00014388586345020026, "loss": 0.2725, "step": 17406 }, { "epoch": 1.4101587815942969, "grad_norm": 0.04446631669998169, "learning_rate": 0.00014388136279760568, "loss": 0.2901, "step": 17407 }, { "epoch": 1.410239792611795, "grad_norm": 0.041478581726551056, "learning_rate": 0.00014387686214501104, "loss": 0.2538, "step": 17408 }, { "epoch": 1.4103208036292936, "grad_norm": 0.04481646791100502, "learning_rate": 0.0001438723614924164, "loss": 0.2382, "step": 17409 }, { "epoch": 1.410401814646792, "grad_norm": 0.05845337361097336, "learning_rate": 0.00014386786083982179, "loss": 0.2876, "step": 17410 }, { "epoch": 1.4104828256642903, "grad_norm": 0.04668666422367096, "learning_rate": 0.00014386336018722715, "loss": 0.2543, "step": 17411 }, { "epoch": 1.4105638366817888, "grad_norm": 0.05035103112459183, "learning_rate": 0.00014385885953463253, "loss": 0.31, "step": 17412 }, { "epoch": 1.4106448476992872, "grad_norm": 0.050838127732276917, "learning_rate": 0.00014385435888203792, "loss": 0.3168, "step": 17413 }, { "epoch": 1.4107258587167855, "grad_norm": 0.045839011669158936, "learning_rate": 0.00014384985822944328, "loss": 0.3024, "step": 17414 }, { "epoch": 1.410806869734284, "grad_norm": 0.05352982133626938, "learning_rate": 0.00014384535757684864, "loss": 0.244, "step": 17415 }, { "epoch": 1.4108878807517822, "grad_norm": 0.03870326653122902, "learning_rate": 0.00014384085692425403, "loss": 0.3024, "step": 17416 }, { "epoch": 1.4109688917692806, "grad_norm": 0.04420241713523865, "learning_rate": 0.0001438363562716594, "loss": 0.3003, "step": 17417 }, { "epoch": 1.4110499027867789, "grad_norm": 0.0470711812376976, "learning_rate": 0.00014383185561906477, "loss": 0.314, "step": 17418 }, { "epoch": 1.4111309138042774, "grad_norm": 0.05128021910786629, "learning_rate": 0.00014382735496647016, "loss": 0.2769, "step": 17419 }, { "epoch": 1.4112119248217758, "grad_norm": 0.05623095855116844, "learning_rate": 0.00014382285431387552, "loss": 0.2973, "step": 17420 }, { "epoch": 1.411292935839274, "grad_norm": 0.04474678263068199, "learning_rate": 0.00014381835366128088, "loss": 0.2599, "step": 17421 }, { "epoch": 1.4113739468567725, "grad_norm": 0.047019362449645996, "learning_rate": 0.00014381385300868627, "loss": 0.3274, "step": 17422 }, { "epoch": 1.411454957874271, "grad_norm": 0.048342298716306686, "learning_rate": 0.00014380935235609163, "loss": 0.2736, "step": 17423 }, { "epoch": 1.4115359688917692, "grad_norm": 0.05731989070773125, "learning_rate": 0.00014380485170349702, "loss": 0.3377, "step": 17424 }, { "epoch": 1.4116169799092677, "grad_norm": 0.05092157796025276, "learning_rate": 0.0001438003510509024, "loss": 0.2952, "step": 17425 }, { "epoch": 1.4116979909267662, "grad_norm": 0.05478844791650772, "learning_rate": 0.00014379585039830776, "loss": 0.2794, "step": 17426 }, { "epoch": 1.4117790019442644, "grad_norm": 0.055522721260786057, "learning_rate": 0.00014379134974571312, "loss": 0.3056, "step": 17427 }, { "epoch": 1.4118600129617627, "grad_norm": 0.058090608566999435, "learning_rate": 0.0001437868490931185, "loss": 0.316, "step": 17428 }, { "epoch": 1.4119410239792611, "grad_norm": 0.050185300409793854, "learning_rate": 0.00014378234844052387, "loss": 0.3293, "step": 17429 }, { "epoch": 1.4120220349967596, "grad_norm": 0.053385164588689804, "learning_rate": 0.00014377784778792926, "loss": 0.273, "step": 17430 }, { "epoch": 1.4121030460142578, "grad_norm": 0.039984747767448425, "learning_rate": 0.00014377334713533464, "loss": 0.2756, "step": 17431 }, { "epoch": 1.4121840570317563, "grad_norm": 0.04153714329004288, "learning_rate": 0.00014376884648274, "loss": 0.2774, "step": 17432 }, { "epoch": 1.4122650680492548, "grad_norm": 0.04483303055167198, "learning_rate": 0.00014376434583014536, "loss": 0.2972, "step": 17433 }, { "epoch": 1.412346079066753, "grad_norm": 0.048719923943281174, "learning_rate": 0.00014375984517755075, "loss": 0.2931, "step": 17434 }, { "epoch": 1.4124270900842515, "grad_norm": 0.05421128123998642, "learning_rate": 0.0001437553445249561, "loss": 0.3014, "step": 17435 }, { "epoch": 1.41250810110175, "grad_norm": 0.04559450224041939, "learning_rate": 0.0001437508438723615, "loss": 0.2765, "step": 17436 }, { "epoch": 1.4125891121192482, "grad_norm": 0.04924327880144119, "learning_rate": 0.0001437463432197669, "loss": 0.2795, "step": 17437 }, { "epoch": 1.4126701231367467, "grad_norm": 0.047822628170251846, "learning_rate": 0.00014374184256717225, "loss": 0.2936, "step": 17438 }, { "epoch": 1.412751134154245, "grad_norm": 0.05012073740363121, "learning_rate": 0.0001437373419145776, "loss": 0.2822, "step": 17439 }, { "epoch": 1.4128321451717434, "grad_norm": 0.04607747495174408, "learning_rate": 0.000143732841261983, "loss": 0.2702, "step": 17440 }, { "epoch": 1.4129131561892416, "grad_norm": 0.0424031987786293, "learning_rate": 0.00014372834060938838, "loss": 0.2642, "step": 17441 }, { "epoch": 1.41299416720674, "grad_norm": 0.04610888659954071, "learning_rate": 0.00014372383995679374, "loss": 0.3052, "step": 17442 }, { "epoch": 1.4130751782242386, "grad_norm": 0.05413646996021271, "learning_rate": 0.00014371933930419913, "loss": 0.3381, "step": 17443 }, { "epoch": 1.4131561892417368, "grad_norm": 0.041760869324207306, "learning_rate": 0.0001437148386516045, "loss": 0.2994, "step": 17444 }, { "epoch": 1.4132372002592353, "grad_norm": 0.05390756204724312, "learning_rate": 0.00014371033799900985, "loss": 0.3173, "step": 17445 }, { "epoch": 1.4133182112767337, "grad_norm": 0.04660959541797638, "learning_rate": 0.00014370583734641524, "loss": 0.2799, "step": 17446 }, { "epoch": 1.413399222294232, "grad_norm": 0.05528154969215393, "learning_rate": 0.00014370133669382062, "loss": 0.2931, "step": 17447 }, { "epoch": 1.4134802333117304, "grad_norm": 0.052793197333812714, "learning_rate": 0.00014369683604122598, "loss": 0.3628, "step": 17448 }, { "epoch": 1.4135612443292287, "grad_norm": 0.059102971106767654, "learning_rate": 0.00014369233538863137, "loss": 0.2787, "step": 17449 }, { "epoch": 1.4136422553467272, "grad_norm": 0.04352337867021561, "learning_rate": 0.00014368783473603673, "loss": 0.2759, "step": 17450 }, { "epoch": 1.4137232663642254, "grad_norm": 0.04289750009775162, "learning_rate": 0.0001436833340834421, "loss": 0.2716, "step": 17451 }, { "epoch": 1.4138042773817239, "grad_norm": 0.04225050285458565, "learning_rate": 0.00014367883343084748, "loss": 0.2609, "step": 17452 }, { "epoch": 1.4138852883992223, "grad_norm": 0.0459427535533905, "learning_rate": 0.00014367433277825286, "loss": 0.3019, "step": 17453 }, { "epoch": 1.4139662994167206, "grad_norm": 0.04791085422039032, "learning_rate": 0.00014366983212565822, "loss": 0.27, "step": 17454 }, { "epoch": 1.414047310434219, "grad_norm": 0.043755799531936646, "learning_rate": 0.0001436653314730636, "loss": 0.2919, "step": 17455 }, { "epoch": 1.4141283214517175, "grad_norm": 0.048999980092048645, "learning_rate": 0.00014366083082046897, "loss": 0.2995, "step": 17456 }, { "epoch": 1.4142093324692158, "grad_norm": 0.05153946951031685, "learning_rate": 0.00014365633016787433, "loss": 0.3496, "step": 17457 }, { "epoch": 1.4142903434867142, "grad_norm": 0.047688182443380356, "learning_rate": 0.00014365182951527972, "loss": 0.2802, "step": 17458 }, { "epoch": 1.4143713545042127, "grad_norm": 0.049873966723680496, "learning_rate": 0.0001436473288626851, "loss": 0.2901, "step": 17459 }, { "epoch": 1.414452365521711, "grad_norm": 0.038455478847026825, "learning_rate": 0.00014364282821009047, "loss": 0.2676, "step": 17460 }, { "epoch": 1.4145333765392094, "grad_norm": 0.045070916414260864, "learning_rate": 0.00014363832755749585, "loss": 0.3148, "step": 17461 }, { "epoch": 1.4146143875567077, "grad_norm": 0.051428597420454025, "learning_rate": 0.0001436338269049012, "loss": 0.3067, "step": 17462 }, { "epoch": 1.4146953985742061, "grad_norm": 0.044932324439287186, "learning_rate": 0.00014362932625230657, "loss": 0.284, "step": 17463 }, { "epoch": 1.4147764095917044, "grad_norm": 0.04706451669335365, "learning_rate": 0.00014362482559971196, "loss": 0.2753, "step": 17464 }, { "epoch": 1.4148574206092028, "grad_norm": 0.045721590518951416, "learning_rate": 0.00014362032494711735, "loss": 0.318, "step": 17465 }, { "epoch": 1.4149384316267013, "grad_norm": 0.04566137120127678, "learning_rate": 0.0001436158242945227, "loss": 0.2943, "step": 17466 }, { "epoch": 1.4150194426441995, "grad_norm": 0.05262121185660362, "learning_rate": 0.0001436113236419281, "loss": 0.2875, "step": 17467 }, { "epoch": 1.415100453661698, "grad_norm": 0.03956965357065201, "learning_rate": 0.00014360682298933345, "loss": 0.2832, "step": 17468 }, { "epoch": 1.4151814646791965, "grad_norm": 0.05302854999899864, "learning_rate": 0.00014360232233673881, "loss": 0.3504, "step": 17469 }, { "epoch": 1.4152624756966947, "grad_norm": 0.04107096791267395, "learning_rate": 0.00014359782168414423, "loss": 0.2568, "step": 17470 }, { "epoch": 1.4153434867141932, "grad_norm": 0.04127948731184006, "learning_rate": 0.0001435933210315496, "loss": 0.2456, "step": 17471 }, { "epoch": 1.4154244977316914, "grad_norm": 0.05177254602313042, "learning_rate": 0.00014358882037895495, "loss": 0.2714, "step": 17472 }, { "epoch": 1.41550550874919, "grad_norm": 0.051889799535274506, "learning_rate": 0.00014358431972636034, "loss": 0.3258, "step": 17473 }, { "epoch": 1.4155865197666881, "grad_norm": 0.05505822226405144, "learning_rate": 0.0001435798190737657, "loss": 0.3176, "step": 17474 }, { "epoch": 1.4156675307841866, "grad_norm": 0.045118533074855804, "learning_rate": 0.00014357531842117106, "loss": 0.2549, "step": 17475 }, { "epoch": 1.415748541801685, "grad_norm": 0.04382086545228958, "learning_rate": 0.00014357081776857647, "loss": 0.2793, "step": 17476 }, { "epoch": 1.4158295528191833, "grad_norm": 0.04673447832465172, "learning_rate": 0.00014356631711598183, "loss": 0.2626, "step": 17477 }, { "epoch": 1.4159105638366818, "grad_norm": 0.05098240077495575, "learning_rate": 0.0001435618164633872, "loss": 0.3066, "step": 17478 }, { "epoch": 1.4159915748541803, "grad_norm": 0.044868770986795425, "learning_rate": 0.00014355731581079258, "loss": 0.3084, "step": 17479 }, { "epoch": 1.4160725858716785, "grad_norm": 0.05144832283258438, "learning_rate": 0.00014355281515819794, "loss": 0.3084, "step": 17480 }, { "epoch": 1.416153596889177, "grad_norm": 0.04495812952518463, "learning_rate": 0.0001435483145056033, "loss": 0.2875, "step": 17481 }, { "epoch": 1.4162346079066754, "grad_norm": 0.0467311292886734, "learning_rate": 0.0001435438138530087, "loss": 0.2889, "step": 17482 }, { "epoch": 1.4163156189241737, "grad_norm": 0.041240394115448, "learning_rate": 0.00014353931320041407, "loss": 0.2478, "step": 17483 }, { "epoch": 1.4163966299416721, "grad_norm": 0.05013001710176468, "learning_rate": 0.00014353481254781943, "loss": 0.2951, "step": 17484 }, { "epoch": 1.4164776409591704, "grad_norm": 0.043379612267017365, "learning_rate": 0.00014353031189522482, "loss": 0.3091, "step": 17485 }, { "epoch": 1.4165586519766689, "grad_norm": 0.043509263545274734, "learning_rate": 0.00014352581124263018, "loss": 0.2935, "step": 17486 }, { "epoch": 1.416639662994167, "grad_norm": 0.0534079447388649, "learning_rate": 0.00014352131059003554, "loss": 0.3182, "step": 17487 }, { "epoch": 1.4167206740116656, "grad_norm": 0.03979664295911789, "learning_rate": 0.00014351680993744095, "loss": 0.2867, "step": 17488 }, { "epoch": 1.416801685029164, "grad_norm": 0.044355884194374084, "learning_rate": 0.00014351230928484631, "loss": 0.2988, "step": 17489 }, { "epoch": 1.4168826960466623, "grad_norm": 0.05444491282105446, "learning_rate": 0.00014350780863225167, "loss": 0.3639, "step": 17490 }, { "epoch": 1.4169637070641607, "grad_norm": 0.04946650564670563, "learning_rate": 0.00014350330797965706, "loss": 0.313, "step": 17491 }, { "epoch": 1.4170447180816592, "grad_norm": 0.04110720753669739, "learning_rate": 0.00014349880732706242, "loss": 0.2727, "step": 17492 }, { "epoch": 1.4171257290991575, "grad_norm": 0.04805957153439522, "learning_rate": 0.0001434943066744678, "loss": 0.2919, "step": 17493 }, { "epoch": 1.417206740116656, "grad_norm": 0.04576924815773964, "learning_rate": 0.0001434898060218732, "loss": 0.3147, "step": 17494 }, { "epoch": 1.4172877511341542, "grad_norm": 0.04689079895615578, "learning_rate": 0.00014348530536927856, "loss": 0.2974, "step": 17495 }, { "epoch": 1.4173687621516526, "grad_norm": 0.04951557517051697, "learning_rate": 0.00014348080471668392, "loss": 0.298, "step": 17496 }, { "epoch": 1.4174497731691509, "grad_norm": 0.051957134157419205, "learning_rate": 0.0001434763040640893, "loss": 0.3061, "step": 17497 }, { "epoch": 1.4175307841866494, "grad_norm": 0.04447033628821373, "learning_rate": 0.00014347180341149466, "loss": 0.3085, "step": 17498 }, { "epoch": 1.4176117952041478, "grad_norm": 0.04421267658472061, "learning_rate": 0.00014346730275890005, "loss": 0.2995, "step": 17499 }, { "epoch": 1.417692806221646, "grad_norm": 0.051344774663448334, "learning_rate": 0.00014346280210630544, "loss": 0.3357, "step": 17500 }, { "epoch": 1.4177738172391445, "grad_norm": 0.04639267548918724, "learning_rate": 0.0001434583014537108, "loss": 0.2876, "step": 17501 }, { "epoch": 1.417854828256643, "grad_norm": 0.052176062017679214, "learning_rate": 0.00014345380080111616, "loss": 0.3093, "step": 17502 }, { "epoch": 1.4179358392741412, "grad_norm": 0.043765075504779816, "learning_rate": 0.00014344930014852154, "loss": 0.2827, "step": 17503 }, { "epoch": 1.4180168502916397, "grad_norm": 0.04381469264626503, "learning_rate": 0.0001434447994959269, "loss": 0.3041, "step": 17504 }, { "epoch": 1.4180978613091382, "grad_norm": 0.04380662366747856, "learning_rate": 0.0001434402988433323, "loss": 0.2814, "step": 17505 }, { "epoch": 1.4181788723266364, "grad_norm": 0.049800705164670944, "learning_rate": 0.00014343579819073768, "loss": 0.2786, "step": 17506 }, { "epoch": 1.4182598833441347, "grad_norm": 0.039393678307533264, "learning_rate": 0.00014343129753814304, "loss": 0.2672, "step": 17507 }, { "epoch": 1.4183408943616331, "grad_norm": 0.04441035911440849, "learning_rate": 0.0001434267968855484, "loss": 0.3138, "step": 17508 }, { "epoch": 1.4184219053791316, "grad_norm": 0.06868467479944229, "learning_rate": 0.00014342229623295379, "loss": 0.3052, "step": 17509 }, { "epoch": 1.4185029163966298, "grad_norm": 0.038274139165878296, "learning_rate": 0.00014341779558035915, "loss": 0.2415, "step": 17510 }, { "epoch": 1.4185839274141283, "grad_norm": 0.04708916321396828, "learning_rate": 0.00014341329492776453, "loss": 0.2847, "step": 17511 }, { "epoch": 1.4186649384316268, "grad_norm": 0.03978331759572029, "learning_rate": 0.00014340879427516992, "loss": 0.2361, "step": 17512 }, { "epoch": 1.418745949449125, "grad_norm": 0.04263559356331825, "learning_rate": 0.00014340429362257528, "loss": 0.278, "step": 17513 }, { "epoch": 1.4188269604666235, "grad_norm": 0.04371342808008194, "learning_rate": 0.00014339979296998064, "loss": 0.2796, "step": 17514 }, { "epoch": 1.418907971484122, "grad_norm": 0.05186539515852928, "learning_rate": 0.00014339529231738603, "loss": 0.2925, "step": 17515 }, { "epoch": 1.4189889825016202, "grad_norm": 0.045742131769657135, "learning_rate": 0.0001433907916647914, "loss": 0.3061, "step": 17516 }, { "epoch": 1.4190699935191187, "grad_norm": 0.05176788941025734, "learning_rate": 0.00014338629101219677, "loss": 0.2963, "step": 17517 }, { "epoch": 1.419151004536617, "grad_norm": 0.05797381326556206, "learning_rate": 0.00014338179035960216, "loss": 0.2565, "step": 17518 }, { "epoch": 1.4192320155541154, "grad_norm": 0.03917957469820976, "learning_rate": 0.00014337728970700752, "loss": 0.25, "step": 17519 }, { "epoch": 1.4193130265716136, "grad_norm": 0.05365482345223427, "learning_rate": 0.00014337278905441288, "loss": 0.2898, "step": 17520 }, { "epoch": 1.419394037589112, "grad_norm": 0.03978351876139641, "learning_rate": 0.00014336828840181827, "loss": 0.2653, "step": 17521 }, { "epoch": 1.4194750486066106, "grad_norm": 0.05424314737319946, "learning_rate": 0.00014336378774922366, "loss": 0.3284, "step": 17522 }, { "epoch": 1.4195560596241088, "grad_norm": 0.05543599650263786, "learning_rate": 0.00014335928709662902, "loss": 0.3098, "step": 17523 }, { "epoch": 1.4196370706416073, "grad_norm": 0.04948757216334343, "learning_rate": 0.0001433547864440344, "loss": 0.3232, "step": 17524 }, { "epoch": 1.4197180816591057, "grad_norm": 0.05123332887887955, "learning_rate": 0.00014335028579143976, "loss": 0.3639, "step": 17525 }, { "epoch": 1.419799092676604, "grad_norm": 0.05080258101224899, "learning_rate": 0.00014334578513884512, "loss": 0.2935, "step": 17526 }, { "epoch": 1.4198801036941024, "grad_norm": 0.0436762310564518, "learning_rate": 0.0001433412844862505, "loss": 0.2761, "step": 17527 }, { "epoch": 1.419961114711601, "grad_norm": 0.04575483128428459, "learning_rate": 0.0001433367838336559, "loss": 0.2589, "step": 17528 }, { "epoch": 1.4200421257290992, "grad_norm": 0.04396912455558777, "learning_rate": 0.00014333228318106126, "loss": 0.298, "step": 17529 }, { "epoch": 1.4201231367465974, "grad_norm": 0.05272604525089264, "learning_rate": 0.00014332778252846665, "loss": 0.3248, "step": 17530 }, { "epoch": 1.4202041477640959, "grad_norm": 0.048338234424591064, "learning_rate": 0.000143323281875872, "loss": 0.3337, "step": 17531 }, { "epoch": 1.4202851587815943, "grad_norm": 0.05578792467713356, "learning_rate": 0.00014331878122327737, "loss": 0.3559, "step": 17532 }, { "epoch": 1.4203661697990926, "grad_norm": 0.04565225541591644, "learning_rate": 0.00014331428057068275, "loss": 0.3109, "step": 17533 }, { "epoch": 1.420447180816591, "grad_norm": 0.04514371603727341, "learning_rate": 0.00014330977991808814, "loss": 0.3046, "step": 17534 }, { "epoch": 1.4205281918340895, "grad_norm": 0.04526843503117561, "learning_rate": 0.0001433052792654935, "loss": 0.2795, "step": 17535 }, { "epoch": 1.4206092028515878, "grad_norm": 0.04618031159043312, "learning_rate": 0.0001433007786128989, "loss": 0.2451, "step": 17536 }, { "epoch": 1.4206902138690862, "grad_norm": 0.05096264183521271, "learning_rate": 0.00014329627796030425, "loss": 0.302, "step": 17537 }, { "epoch": 1.4207712248865847, "grad_norm": 0.048887792974710464, "learning_rate": 0.0001432917773077096, "loss": 0.3095, "step": 17538 }, { "epoch": 1.420852235904083, "grad_norm": 0.04953059181571007, "learning_rate": 0.000143287276655115, "loss": 0.2865, "step": 17539 }, { "epoch": 1.4209332469215814, "grad_norm": 0.050928402692079544, "learning_rate": 0.00014328277600252038, "loss": 0.2743, "step": 17540 }, { "epoch": 1.4210142579390797, "grad_norm": 0.05457817390561104, "learning_rate": 0.00014327827534992574, "loss": 0.2768, "step": 17541 }, { "epoch": 1.4210952689565781, "grad_norm": 0.05337541922926903, "learning_rate": 0.00014327377469733113, "loss": 0.312, "step": 17542 }, { "epoch": 1.4211762799740764, "grad_norm": 0.049907129257917404, "learning_rate": 0.0001432692740447365, "loss": 0.294, "step": 17543 }, { "epoch": 1.4212572909915748, "grad_norm": 0.04843693599104881, "learning_rate": 0.00014326477339214185, "loss": 0.2941, "step": 17544 }, { "epoch": 1.4213383020090733, "grad_norm": 0.05297977849841118, "learning_rate": 0.00014326027273954726, "loss": 0.31, "step": 17545 }, { "epoch": 1.4214193130265715, "grad_norm": 0.051067106425762177, "learning_rate": 0.00014325577208695262, "loss": 0.3499, "step": 17546 }, { "epoch": 1.42150032404407, "grad_norm": 0.05172743275761604, "learning_rate": 0.00014325127143435798, "loss": 0.319, "step": 17547 }, { "epoch": 1.4215813350615685, "grad_norm": 0.04919605702161789, "learning_rate": 0.00014324677078176337, "loss": 0.2782, "step": 17548 }, { "epoch": 1.4216623460790667, "grad_norm": 0.05195494741201401, "learning_rate": 0.00014324227012916873, "loss": 0.3252, "step": 17549 }, { "epoch": 1.4217433570965652, "grad_norm": 0.045493561774492264, "learning_rate": 0.0001432377694765741, "loss": 0.2898, "step": 17550 }, { "epoch": 1.4218243681140634, "grad_norm": 0.04597517102956772, "learning_rate": 0.0001432332688239795, "loss": 0.2682, "step": 17551 }, { "epoch": 1.421905379131562, "grad_norm": 0.04705383628606796, "learning_rate": 0.00014322876817138486, "loss": 0.3145, "step": 17552 }, { "epoch": 1.4219863901490601, "grad_norm": 0.0446794331073761, "learning_rate": 0.00014322426751879022, "loss": 0.2827, "step": 17553 }, { "epoch": 1.4220674011665586, "grad_norm": 0.051160577684640884, "learning_rate": 0.0001432197668661956, "loss": 0.2759, "step": 17554 }, { "epoch": 1.422148412184057, "grad_norm": 0.04348822310566902, "learning_rate": 0.00014321526621360097, "loss": 0.2852, "step": 17555 }, { "epoch": 1.4222294232015553, "grad_norm": 0.053231049329042435, "learning_rate": 0.00014321076556100633, "loss": 0.2997, "step": 17556 }, { "epoch": 1.4223104342190538, "grad_norm": 0.052900660783052444, "learning_rate": 0.00014320626490841175, "loss": 0.3744, "step": 17557 }, { "epoch": 1.4223914452365523, "grad_norm": 0.04624491184949875, "learning_rate": 0.0001432017642558171, "loss": 0.3021, "step": 17558 }, { "epoch": 1.4224724562540505, "grad_norm": 0.04583517462015152, "learning_rate": 0.00014319726360322247, "loss": 0.2795, "step": 17559 }, { "epoch": 1.422553467271549, "grad_norm": 0.049809325486421585, "learning_rate": 0.00014319276295062785, "loss": 0.2634, "step": 17560 }, { "epoch": 1.4226344782890474, "grad_norm": 0.04780822992324829, "learning_rate": 0.0001431882622980332, "loss": 0.2788, "step": 17561 }, { "epoch": 1.4227154893065457, "grad_norm": 0.049446288496255875, "learning_rate": 0.00014318376164543857, "loss": 0.2799, "step": 17562 }, { "epoch": 1.4227965003240441, "grad_norm": 0.0466834157705307, "learning_rate": 0.000143179260992844, "loss": 0.3056, "step": 17563 }, { "epoch": 1.4228775113415424, "grad_norm": 0.05659082531929016, "learning_rate": 0.00014317476034024935, "loss": 0.3108, "step": 17564 }, { "epoch": 1.4229585223590409, "grad_norm": 0.045440223067998886, "learning_rate": 0.0001431702596876547, "loss": 0.3025, "step": 17565 }, { "epoch": 1.423039533376539, "grad_norm": 0.04813716188073158, "learning_rate": 0.0001431657590350601, "loss": 0.2921, "step": 17566 }, { "epoch": 1.4231205443940376, "grad_norm": 0.05100923404097557, "learning_rate": 0.00014316125838246545, "loss": 0.2936, "step": 17567 }, { "epoch": 1.423201555411536, "grad_norm": 0.05094525218009949, "learning_rate": 0.00014315675772987081, "loss": 0.2931, "step": 17568 }, { "epoch": 1.4232825664290343, "grad_norm": 0.042763397097587585, "learning_rate": 0.00014315225707727623, "loss": 0.26, "step": 17569 }, { "epoch": 1.4233635774465327, "grad_norm": 0.0470680370926857, "learning_rate": 0.0001431477564246816, "loss": 0.287, "step": 17570 }, { "epoch": 1.4234445884640312, "grad_norm": 0.04994115233421326, "learning_rate": 0.00014314325577208695, "loss": 0.3198, "step": 17571 }, { "epoch": 1.4235255994815295, "grad_norm": 0.04325047507882118, "learning_rate": 0.00014313875511949234, "loss": 0.3214, "step": 17572 }, { "epoch": 1.423606610499028, "grad_norm": 0.049197446554899216, "learning_rate": 0.0001431342544668977, "loss": 0.3107, "step": 17573 }, { "epoch": 1.4236876215165262, "grad_norm": 0.043233487755060196, "learning_rate": 0.00014312975381430308, "loss": 0.2531, "step": 17574 }, { "epoch": 1.4237686325340246, "grad_norm": 0.045737169682979584, "learning_rate": 0.00014312525316170847, "loss": 0.2693, "step": 17575 }, { "epoch": 1.4238496435515229, "grad_norm": 0.05091886594891548, "learning_rate": 0.00014312075250911383, "loss": 0.2524, "step": 17576 }, { "epoch": 1.4239306545690213, "grad_norm": 0.04994532838463783, "learning_rate": 0.0001431162518565192, "loss": 0.3172, "step": 17577 }, { "epoch": 1.4240116655865198, "grad_norm": 0.044858817011117935, "learning_rate": 0.00014311175120392458, "loss": 0.2679, "step": 17578 }, { "epoch": 1.424092676604018, "grad_norm": 0.05132998526096344, "learning_rate": 0.00014310725055132994, "loss": 0.2839, "step": 17579 }, { "epoch": 1.4241736876215165, "grad_norm": 0.04050062224268913, "learning_rate": 0.00014310274989873533, "loss": 0.2747, "step": 17580 }, { "epoch": 1.424254698639015, "grad_norm": 0.048599354922771454, "learning_rate": 0.0001430982492461407, "loss": 0.3611, "step": 17581 }, { "epoch": 1.4243357096565132, "grad_norm": 0.05495349317789078, "learning_rate": 0.00014309374859354607, "loss": 0.3496, "step": 17582 }, { "epoch": 1.4244167206740117, "grad_norm": 0.046302735805511475, "learning_rate": 0.00014308924794095143, "loss": 0.3011, "step": 17583 }, { "epoch": 1.4244977316915102, "grad_norm": 0.04650885984301567, "learning_rate": 0.00014308474728835682, "loss": 0.2765, "step": 17584 }, { "epoch": 1.4245787427090084, "grad_norm": 0.043730463832616806, "learning_rate": 0.00014308024663576218, "loss": 0.2923, "step": 17585 }, { "epoch": 1.4246597537265069, "grad_norm": 0.040796149522066116, "learning_rate": 0.00014307574598316757, "loss": 0.264, "step": 17586 }, { "epoch": 1.4247407647440051, "grad_norm": 0.05266737937927246, "learning_rate": 0.00014307124533057295, "loss": 0.3034, "step": 17587 }, { "epoch": 1.4248217757615036, "grad_norm": 0.051856864243745804, "learning_rate": 0.00014306674467797831, "loss": 0.3652, "step": 17588 }, { "epoch": 1.4249027867790018, "grad_norm": 0.053262967616319656, "learning_rate": 0.00014306224402538367, "loss": 0.3236, "step": 17589 }, { "epoch": 1.4249837977965003, "grad_norm": 0.05611581355333328, "learning_rate": 0.00014305774337278906, "loss": 0.3119, "step": 17590 }, { "epoch": 1.4250648088139988, "grad_norm": 0.0478108711540699, "learning_rate": 0.00014305324272019442, "loss": 0.2652, "step": 17591 }, { "epoch": 1.425145819831497, "grad_norm": 0.04079686850309372, "learning_rate": 0.0001430487420675998, "loss": 0.2671, "step": 17592 }, { "epoch": 1.4252268308489955, "grad_norm": 0.05292508751153946, "learning_rate": 0.0001430442414150052, "loss": 0.3286, "step": 17593 }, { "epoch": 1.425307841866494, "grad_norm": 0.040990427136421204, "learning_rate": 0.00014303974076241056, "loss": 0.2554, "step": 17594 }, { "epoch": 1.4253888528839922, "grad_norm": 0.044480640441179276, "learning_rate": 0.00014303524010981592, "loss": 0.2897, "step": 17595 }, { "epoch": 1.4254698639014907, "grad_norm": 0.0434565506875515, "learning_rate": 0.0001430307394572213, "loss": 0.2837, "step": 17596 }, { "epoch": 1.425550874918989, "grad_norm": 0.053456861525774, "learning_rate": 0.0001430262388046267, "loss": 0.3343, "step": 17597 }, { "epoch": 1.4256318859364874, "grad_norm": 0.05187445133924484, "learning_rate": 0.00014302173815203205, "loss": 0.3253, "step": 17598 }, { "epoch": 1.4257128969539856, "grad_norm": 0.046234335750341415, "learning_rate": 0.00014301723749943744, "loss": 0.2808, "step": 17599 }, { "epoch": 1.425793907971484, "grad_norm": 0.048222288489341736, "learning_rate": 0.0001430127368468428, "loss": 0.2329, "step": 17600 }, { "epoch": 1.4258749189889826, "grad_norm": 0.05416186898946762, "learning_rate": 0.00014300823619424816, "loss": 0.3028, "step": 17601 }, { "epoch": 1.4259559300064808, "grad_norm": 0.04630477353930473, "learning_rate": 0.00014300373554165354, "loss": 0.3023, "step": 17602 }, { "epoch": 1.4260369410239793, "grad_norm": 0.05828891322016716, "learning_rate": 0.00014299923488905893, "loss": 0.3259, "step": 17603 }, { "epoch": 1.4261179520414777, "grad_norm": 0.04590104520320892, "learning_rate": 0.0001429947342364643, "loss": 0.2762, "step": 17604 }, { "epoch": 1.426198963058976, "grad_norm": 0.050497934222221375, "learning_rate": 0.00014299023358386968, "loss": 0.2782, "step": 17605 }, { "epoch": 1.4262799740764744, "grad_norm": 0.04419134929776192, "learning_rate": 0.00014298573293127504, "loss": 0.2795, "step": 17606 }, { "epoch": 1.426360985093973, "grad_norm": 0.047383926808834076, "learning_rate": 0.0001429812322786804, "loss": 0.2885, "step": 17607 }, { "epoch": 1.4264419961114712, "grad_norm": 0.04415738582611084, "learning_rate": 0.00014297673162608579, "loss": 0.2795, "step": 17608 }, { "epoch": 1.4265230071289696, "grad_norm": 0.048009831458330154, "learning_rate": 0.00014297223097349117, "loss": 0.312, "step": 17609 }, { "epoch": 1.4266040181464679, "grad_norm": 0.04848635196685791, "learning_rate": 0.00014296773032089653, "loss": 0.3205, "step": 17610 }, { "epoch": 1.4266850291639663, "grad_norm": 0.04432026669383049, "learning_rate": 0.00014296322966830192, "loss": 0.277, "step": 17611 }, { "epoch": 1.4267660401814646, "grad_norm": 0.04767496883869171, "learning_rate": 0.00014295872901570728, "loss": 0.3138, "step": 17612 }, { "epoch": 1.426847051198963, "grad_norm": 0.04357754439115524, "learning_rate": 0.00014295422836311264, "loss": 0.2943, "step": 17613 }, { "epoch": 1.4269280622164615, "grad_norm": 0.05366687476634979, "learning_rate": 0.00014294972771051803, "loss": 0.2843, "step": 17614 }, { "epoch": 1.4270090732339598, "grad_norm": 0.03847344592213631, "learning_rate": 0.00014294522705792341, "loss": 0.2662, "step": 17615 }, { "epoch": 1.4270900842514582, "grad_norm": 0.0483989343047142, "learning_rate": 0.00014294072640532878, "loss": 0.2377, "step": 17616 }, { "epoch": 1.4271710952689567, "grad_norm": 0.0436316654086113, "learning_rate": 0.00014293622575273416, "loss": 0.2917, "step": 17617 }, { "epoch": 1.427252106286455, "grad_norm": 0.052246369421482086, "learning_rate": 0.00014293172510013952, "loss": 0.3499, "step": 17618 }, { "epoch": 1.4273331173039534, "grad_norm": 0.058432210236787796, "learning_rate": 0.00014292722444754488, "loss": 0.2948, "step": 17619 }, { "epoch": 1.4274141283214516, "grad_norm": 0.052339889109134674, "learning_rate": 0.00014292272379495027, "loss": 0.3119, "step": 17620 }, { "epoch": 1.4274951393389501, "grad_norm": 0.045229263603687286, "learning_rate": 0.00014291822314235566, "loss": 0.3068, "step": 17621 }, { "epoch": 1.4275761503564484, "grad_norm": 0.04981428384780884, "learning_rate": 0.00014291372248976102, "loss": 0.3281, "step": 17622 }, { "epoch": 1.4276571613739468, "grad_norm": 0.047702182084321976, "learning_rate": 0.0001429092218371664, "loss": 0.3222, "step": 17623 }, { "epoch": 1.4277381723914453, "grad_norm": 0.043431397527456284, "learning_rate": 0.00014290472118457176, "loss": 0.2738, "step": 17624 }, { "epoch": 1.4278191834089435, "grad_norm": 0.037868376821279526, "learning_rate": 0.00014290022053197712, "loss": 0.2335, "step": 17625 }, { "epoch": 1.427900194426442, "grad_norm": 0.04401480779051781, "learning_rate": 0.00014289571987938254, "loss": 0.3073, "step": 17626 }, { "epoch": 1.4279812054439405, "grad_norm": 0.05242225527763367, "learning_rate": 0.0001428912192267879, "loss": 0.352, "step": 17627 }, { "epoch": 1.4280622164614387, "grad_norm": 0.049203623086214066, "learning_rate": 0.00014288671857419326, "loss": 0.2981, "step": 17628 }, { "epoch": 1.4281432274789372, "grad_norm": 0.04736964777112007, "learning_rate": 0.00014288221792159865, "loss": 0.2753, "step": 17629 }, { "epoch": 1.4282242384964356, "grad_norm": 0.053792353719472885, "learning_rate": 0.000142877717269004, "loss": 0.2722, "step": 17630 }, { "epoch": 1.428305249513934, "grad_norm": 0.04648834094405174, "learning_rate": 0.00014287321661640937, "loss": 0.2952, "step": 17631 }, { "epoch": 1.4283862605314321, "grad_norm": 0.04978673905134201, "learning_rate": 0.00014286871596381478, "loss": 0.3041, "step": 17632 }, { "epoch": 1.4284672715489306, "grad_norm": 0.053194884210824966, "learning_rate": 0.00014286421531122014, "loss": 0.2755, "step": 17633 }, { "epoch": 1.428548282566429, "grad_norm": 0.048594143241643906, "learning_rate": 0.0001428597146586255, "loss": 0.3009, "step": 17634 }, { "epoch": 1.4286292935839273, "grad_norm": 0.039179448038339615, "learning_rate": 0.0001428552140060309, "loss": 0.2707, "step": 17635 }, { "epoch": 1.4287103046014258, "grad_norm": 0.052394479513168335, "learning_rate": 0.00014285071335343625, "loss": 0.3635, "step": 17636 }, { "epoch": 1.4287913156189243, "grad_norm": 0.04384802281856537, "learning_rate": 0.0001428462127008416, "loss": 0.2835, "step": 17637 }, { "epoch": 1.4288723266364225, "grad_norm": 0.05427468195557594, "learning_rate": 0.00014284171204824702, "loss": 0.3465, "step": 17638 }, { "epoch": 1.428953337653921, "grad_norm": 0.047030430287122726, "learning_rate": 0.00014283721139565238, "loss": 0.3357, "step": 17639 }, { "epoch": 1.4290343486714194, "grad_norm": 0.04885796457529068, "learning_rate": 0.00014283271074305774, "loss": 0.2944, "step": 17640 }, { "epoch": 1.4291153596889177, "grad_norm": 0.05539722368121147, "learning_rate": 0.00014282821009046313, "loss": 0.2876, "step": 17641 }, { "epoch": 1.4291963707064161, "grad_norm": 0.03706725314259529, "learning_rate": 0.0001428237094378685, "loss": 0.269, "step": 17642 }, { "epoch": 1.4292773817239144, "grad_norm": 0.047710664570331573, "learning_rate": 0.00014281920878527385, "loss": 0.3027, "step": 17643 }, { "epoch": 1.4293583927414129, "grad_norm": 0.04649528115987778, "learning_rate": 0.00014281470813267926, "loss": 0.2662, "step": 17644 }, { "epoch": 1.429439403758911, "grad_norm": 0.052990663796663284, "learning_rate": 0.00014281020748008462, "loss": 0.3236, "step": 17645 }, { "epoch": 1.4295204147764096, "grad_norm": 0.04301619529724121, "learning_rate": 0.00014280570682748998, "loss": 0.2903, "step": 17646 }, { "epoch": 1.429601425793908, "grad_norm": 0.04583217203617096, "learning_rate": 0.00014280120617489537, "loss": 0.3078, "step": 17647 }, { "epoch": 1.4296824368114063, "grad_norm": 0.04579533636569977, "learning_rate": 0.00014279670552230073, "loss": 0.269, "step": 17648 }, { "epoch": 1.4297634478289047, "grad_norm": 0.048855386674404144, "learning_rate": 0.0001427922048697061, "loss": 0.2819, "step": 17649 }, { "epoch": 1.4298444588464032, "grad_norm": 0.045893169939517975, "learning_rate": 0.0001427877042171115, "loss": 0.3297, "step": 17650 }, { "epoch": 1.4299254698639015, "grad_norm": 0.04148982837796211, "learning_rate": 0.00014278320356451686, "loss": 0.2879, "step": 17651 }, { "epoch": 1.4300064808814, "grad_norm": 0.04670432209968567, "learning_rate": 0.00014277870291192222, "loss": 0.2744, "step": 17652 }, { "epoch": 1.4300874918988984, "grad_norm": 0.05058503895998001, "learning_rate": 0.0001427742022593276, "loss": 0.2655, "step": 17653 }, { "epoch": 1.4301685029163966, "grad_norm": 0.04702136293053627, "learning_rate": 0.00014276970160673297, "loss": 0.3089, "step": 17654 }, { "epoch": 1.4302495139338949, "grad_norm": 0.04836324602365494, "learning_rate": 0.00014276520095413836, "loss": 0.2993, "step": 17655 }, { "epoch": 1.4303305249513933, "grad_norm": 0.0631798654794693, "learning_rate": 0.00014276070030154375, "loss": 0.2981, "step": 17656 }, { "epoch": 1.4304115359688918, "grad_norm": 0.06408338248729706, "learning_rate": 0.0001427561996489491, "loss": 0.302, "step": 17657 }, { "epoch": 1.43049254698639, "grad_norm": 0.04548690468072891, "learning_rate": 0.00014275169899635447, "loss": 0.2829, "step": 17658 }, { "epoch": 1.4305735580038885, "grad_norm": 0.053524602204561234, "learning_rate": 0.00014274719834375985, "loss": 0.3103, "step": 17659 }, { "epoch": 1.430654569021387, "grad_norm": 0.05356777086853981, "learning_rate": 0.00014274269769116521, "loss": 0.2945, "step": 17660 }, { "epoch": 1.4307355800388852, "grad_norm": 0.046562310308218, "learning_rate": 0.0001427381970385706, "loss": 0.3177, "step": 17661 }, { "epoch": 1.4308165910563837, "grad_norm": 0.043342217803001404, "learning_rate": 0.000142733696385976, "loss": 0.2433, "step": 17662 }, { "epoch": 1.4308976020738822, "grad_norm": 0.04943990707397461, "learning_rate": 0.00014272919573338135, "loss": 0.2833, "step": 17663 }, { "epoch": 1.4309786130913804, "grad_norm": 0.0495469756424427, "learning_rate": 0.0001427246950807867, "loss": 0.3207, "step": 17664 }, { "epoch": 1.4310596241088789, "grad_norm": 0.041657302528619766, "learning_rate": 0.0001427201944281921, "loss": 0.2768, "step": 17665 }, { "epoch": 1.4311406351263771, "grad_norm": 0.054395124316215515, "learning_rate": 0.00014271569377559746, "loss": 0.32, "step": 17666 }, { "epoch": 1.4312216461438756, "grad_norm": 0.04485293850302696, "learning_rate": 0.00014271119312300284, "loss": 0.3026, "step": 17667 }, { "epoch": 1.4313026571613738, "grad_norm": 0.03999423235654831, "learning_rate": 0.00014270669247040823, "loss": 0.2654, "step": 17668 }, { "epoch": 1.4313836681788723, "grad_norm": 0.05643171817064285, "learning_rate": 0.0001427021918178136, "loss": 0.3557, "step": 17669 }, { "epoch": 1.4314646791963708, "grad_norm": 0.05252460390329361, "learning_rate": 0.00014269769116521895, "loss": 0.3132, "step": 17670 }, { "epoch": 1.431545690213869, "grad_norm": 0.05533226206898689, "learning_rate": 0.00014269319051262434, "loss": 0.3212, "step": 17671 }, { "epoch": 1.4316267012313675, "grad_norm": 0.04171612858772278, "learning_rate": 0.0001426886898600297, "loss": 0.2695, "step": 17672 }, { "epoch": 1.431707712248866, "grad_norm": 0.04758473113179207, "learning_rate": 0.00014268418920743508, "loss": 0.2862, "step": 17673 }, { "epoch": 1.4317887232663642, "grad_norm": 0.03944450989365578, "learning_rate": 0.00014267968855484047, "loss": 0.2567, "step": 17674 }, { "epoch": 1.4318697342838627, "grad_norm": 0.052580054849386215, "learning_rate": 0.00014267518790224583, "loss": 0.2806, "step": 17675 }, { "epoch": 1.431950745301361, "grad_norm": 0.04589229077100754, "learning_rate": 0.0001426706872496512, "loss": 0.2795, "step": 17676 }, { "epoch": 1.4320317563188594, "grad_norm": 0.05369507148861885, "learning_rate": 0.00014266618659705658, "loss": 0.2978, "step": 17677 }, { "epoch": 1.4321127673363576, "grad_norm": 0.06819190084934235, "learning_rate": 0.00014266168594446197, "loss": 0.3306, "step": 17678 }, { "epoch": 1.432193778353856, "grad_norm": 0.04986560717225075, "learning_rate": 0.00014265718529186733, "loss": 0.3172, "step": 17679 }, { "epoch": 1.4322747893713546, "grad_norm": 0.06651494652032852, "learning_rate": 0.0001426526846392727, "loss": 0.3052, "step": 17680 }, { "epoch": 1.4323558003888528, "grad_norm": 0.04793441295623779, "learning_rate": 0.00014264818398667807, "loss": 0.2924, "step": 17681 }, { "epoch": 1.4324368114063513, "grad_norm": 0.04924444481730461, "learning_rate": 0.00014264368333408343, "loss": 0.2638, "step": 17682 }, { "epoch": 1.4325178224238497, "grad_norm": 0.047013383358716965, "learning_rate": 0.00014263918268148882, "loss": 0.3146, "step": 17683 }, { "epoch": 1.432598833441348, "grad_norm": 0.04840421304106712, "learning_rate": 0.0001426346820288942, "loss": 0.2787, "step": 17684 }, { "epoch": 1.4326798444588464, "grad_norm": 0.0506095290184021, "learning_rate": 0.00014263018137629957, "loss": 0.3159, "step": 17685 }, { "epoch": 1.432760855476345, "grad_norm": 0.04708279296755791, "learning_rate": 0.00014262568072370495, "loss": 0.2614, "step": 17686 }, { "epoch": 1.4328418664938432, "grad_norm": 0.05179835110902786, "learning_rate": 0.00014262118007111031, "loss": 0.2564, "step": 17687 }, { "epoch": 1.4329228775113416, "grad_norm": 0.04784620180726051, "learning_rate": 0.00014261667941851567, "loss": 0.3049, "step": 17688 }, { "epoch": 1.4330038885288399, "grad_norm": 0.04964848980307579, "learning_rate": 0.00014261217876592106, "loss": 0.2776, "step": 17689 }, { "epoch": 1.4330848995463383, "grad_norm": 0.049182306975126266, "learning_rate": 0.00014260767811332645, "loss": 0.3259, "step": 17690 }, { "epoch": 1.4331659105638366, "grad_norm": 0.05033789947628975, "learning_rate": 0.0001426031774607318, "loss": 0.3005, "step": 17691 }, { "epoch": 1.433246921581335, "grad_norm": 0.04850676655769348, "learning_rate": 0.0001425986768081372, "loss": 0.2947, "step": 17692 }, { "epoch": 1.4333279325988335, "grad_norm": 0.041882604360580444, "learning_rate": 0.00014259417615554256, "loss": 0.2313, "step": 17693 }, { "epoch": 1.4334089436163318, "grad_norm": 0.053109001368284225, "learning_rate": 0.00014258967550294792, "loss": 0.2912, "step": 17694 }, { "epoch": 1.4334899546338302, "grad_norm": 0.0499955415725708, "learning_rate": 0.0001425851748503533, "loss": 0.3335, "step": 17695 }, { "epoch": 1.4335709656513287, "grad_norm": 0.044821664690971375, "learning_rate": 0.0001425806741977587, "loss": 0.288, "step": 17696 }, { "epoch": 1.433651976668827, "grad_norm": 0.04659029468894005, "learning_rate": 0.00014257617354516405, "loss": 0.2719, "step": 17697 }, { "epoch": 1.4337329876863254, "grad_norm": 0.04900295287370682, "learning_rate": 0.00014257167289256944, "loss": 0.2859, "step": 17698 }, { "epoch": 1.4338139987038236, "grad_norm": 0.04085109010338783, "learning_rate": 0.0001425671722399748, "loss": 0.2879, "step": 17699 }, { "epoch": 1.4338950097213221, "grad_norm": 0.044099897146224976, "learning_rate": 0.00014256267158738016, "loss": 0.2745, "step": 17700 }, { "epoch": 1.4339760207388204, "grad_norm": 0.04478868842124939, "learning_rate": 0.00014255817093478554, "loss": 0.2946, "step": 17701 }, { "epoch": 1.4340570317563188, "grad_norm": 0.0422249473631382, "learning_rate": 0.00014255367028219093, "loss": 0.2904, "step": 17702 }, { "epoch": 1.4341380427738173, "grad_norm": 0.048670295625925064, "learning_rate": 0.0001425491696295963, "loss": 0.3119, "step": 17703 }, { "epoch": 1.4342190537913155, "grad_norm": 0.046880774199962616, "learning_rate": 0.00014254466897700168, "loss": 0.2731, "step": 17704 }, { "epoch": 1.434300064808814, "grad_norm": 0.05380629748106003, "learning_rate": 0.00014254016832440704, "loss": 0.3093, "step": 17705 }, { "epoch": 1.4343810758263125, "grad_norm": 0.05190601944923401, "learning_rate": 0.0001425356676718124, "loss": 0.2951, "step": 17706 }, { "epoch": 1.4344620868438107, "grad_norm": 0.0382777564227581, "learning_rate": 0.00014253116701921781, "loss": 0.2484, "step": 17707 }, { "epoch": 1.4345430978613092, "grad_norm": 0.04491351172327995, "learning_rate": 0.00014252666636662317, "loss": 0.2674, "step": 17708 }, { "epoch": 1.4346241088788076, "grad_norm": 0.05805452540516853, "learning_rate": 0.00014252216571402853, "loss": 0.2843, "step": 17709 }, { "epoch": 1.434705119896306, "grad_norm": 0.03996812179684639, "learning_rate": 0.00014251766506143392, "loss": 0.2579, "step": 17710 }, { "epoch": 1.4347861309138044, "grad_norm": 0.047434549778699875, "learning_rate": 0.00014251316440883928, "loss": 0.2858, "step": 17711 }, { "epoch": 1.4348671419313026, "grad_norm": 0.0471944697201252, "learning_rate": 0.00014250866375624464, "loss": 0.2849, "step": 17712 }, { "epoch": 1.434948152948801, "grad_norm": 0.06150485947728157, "learning_rate": 0.00014250416310365006, "loss": 0.3582, "step": 17713 }, { "epoch": 1.4350291639662993, "grad_norm": 0.050793759524822235, "learning_rate": 0.00014249966245105542, "loss": 0.2473, "step": 17714 }, { "epoch": 1.4351101749837978, "grad_norm": 0.04835623875260353, "learning_rate": 0.00014249516179846078, "loss": 0.3284, "step": 17715 }, { "epoch": 1.4351911860012962, "grad_norm": 0.047694023698568344, "learning_rate": 0.00014249066114586616, "loss": 0.2609, "step": 17716 }, { "epoch": 1.4352721970187945, "grad_norm": 0.0514262430369854, "learning_rate": 0.00014248616049327152, "loss": 0.3275, "step": 17717 }, { "epoch": 1.435353208036293, "grad_norm": 0.046545013785362244, "learning_rate": 0.00014248165984067688, "loss": 0.2731, "step": 17718 }, { "epoch": 1.4354342190537914, "grad_norm": 0.043400492519140244, "learning_rate": 0.0001424771591880823, "loss": 0.2886, "step": 17719 }, { "epoch": 1.4355152300712897, "grad_norm": 0.05241835489869118, "learning_rate": 0.00014247265853548766, "loss": 0.3394, "step": 17720 }, { "epoch": 1.4355962410887881, "grad_norm": 0.046961188316345215, "learning_rate": 0.00014246815788289302, "loss": 0.2782, "step": 17721 }, { "epoch": 1.4356772521062864, "grad_norm": 0.05200467258691788, "learning_rate": 0.0001424636572302984, "loss": 0.3346, "step": 17722 }, { "epoch": 1.4357582631237849, "grad_norm": 0.04980026185512543, "learning_rate": 0.00014245915657770376, "loss": 0.3204, "step": 17723 }, { "epoch": 1.435839274141283, "grad_norm": 0.04377777874469757, "learning_rate": 0.00014245465592510912, "loss": 0.2862, "step": 17724 }, { "epoch": 1.4359202851587816, "grad_norm": 0.04981323331594467, "learning_rate": 0.00014245015527251454, "loss": 0.3012, "step": 17725 }, { "epoch": 1.43600129617628, "grad_norm": 0.049664206802845, "learning_rate": 0.0001424456546199199, "loss": 0.3252, "step": 17726 }, { "epoch": 1.4360823071937783, "grad_norm": 0.05872774124145508, "learning_rate": 0.00014244115396732526, "loss": 0.3248, "step": 17727 }, { "epoch": 1.4361633182112767, "grad_norm": 0.0413481779396534, "learning_rate": 0.00014243665331473065, "loss": 0.2742, "step": 17728 }, { "epoch": 1.4362443292287752, "grad_norm": 0.05045654624700546, "learning_rate": 0.000142432152662136, "loss": 0.299, "step": 17729 }, { "epoch": 1.4363253402462735, "grad_norm": 0.050098661333322525, "learning_rate": 0.0001424276520095414, "loss": 0.2855, "step": 17730 }, { "epoch": 1.436406351263772, "grad_norm": 0.043931033462285995, "learning_rate": 0.00014242315135694678, "loss": 0.2678, "step": 17731 }, { "epoch": 1.4364873622812704, "grad_norm": 0.04665660858154297, "learning_rate": 0.00014241865070435214, "loss": 0.2913, "step": 17732 }, { "epoch": 1.4365683732987686, "grad_norm": 0.04515928775072098, "learning_rate": 0.0001424141500517575, "loss": 0.2995, "step": 17733 }, { "epoch": 1.4366493843162669, "grad_norm": 0.05133425071835518, "learning_rate": 0.0001424096493991629, "loss": 0.3253, "step": 17734 }, { "epoch": 1.4367303953337653, "grad_norm": 0.05313633382320404, "learning_rate": 0.00014240514874656825, "loss": 0.3065, "step": 17735 }, { "epoch": 1.4368114063512638, "grad_norm": 0.04846735671162605, "learning_rate": 0.00014240064809397363, "loss": 0.2985, "step": 17736 }, { "epoch": 1.436892417368762, "grad_norm": 0.04319280758500099, "learning_rate": 0.00014239614744137902, "loss": 0.2498, "step": 17737 }, { "epoch": 1.4369734283862605, "grad_norm": 0.03988509252667427, "learning_rate": 0.00014239164678878438, "loss": 0.2677, "step": 17738 }, { "epoch": 1.437054439403759, "grad_norm": 0.04044670984148979, "learning_rate": 0.00014238714613618974, "loss": 0.2926, "step": 17739 }, { "epoch": 1.4371354504212572, "grad_norm": 0.05485132709145546, "learning_rate": 0.00014238264548359513, "loss": 0.3302, "step": 17740 }, { "epoch": 1.4372164614387557, "grad_norm": 0.04441394284367561, "learning_rate": 0.0001423781448310005, "loss": 0.2643, "step": 17741 }, { "epoch": 1.4372974724562542, "grad_norm": 0.04969778284430504, "learning_rate": 0.00014237364417840588, "loss": 0.2784, "step": 17742 }, { "epoch": 1.4373784834737524, "grad_norm": 0.04707542806863785, "learning_rate": 0.00014236914352581126, "loss": 0.2669, "step": 17743 }, { "epoch": 1.4374594944912509, "grad_norm": 0.04709019884467125, "learning_rate": 0.00014236464287321662, "loss": 0.2504, "step": 17744 }, { "epoch": 1.4375405055087491, "grad_norm": 0.04919714853167534, "learning_rate": 0.00014236014222062198, "loss": 0.2899, "step": 17745 }, { "epoch": 1.4376215165262476, "grad_norm": 0.043658845126628876, "learning_rate": 0.00014235564156802737, "loss": 0.3024, "step": 17746 }, { "epoch": 1.4377025275437458, "grad_norm": 0.04478023573756218, "learning_rate": 0.00014235114091543273, "loss": 0.3129, "step": 17747 }, { "epoch": 1.4377835385612443, "grad_norm": 0.04204754903912544, "learning_rate": 0.00014234664026283812, "loss": 0.2523, "step": 17748 }, { "epoch": 1.4378645495787428, "grad_norm": 0.04705436900258064, "learning_rate": 0.0001423421396102435, "loss": 0.3134, "step": 17749 }, { "epoch": 1.437945560596241, "grad_norm": 0.045828547328710556, "learning_rate": 0.00014233763895764886, "loss": 0.2992, "step": 17750 }, { "epoch": 1.4380265716137395, "grad_norm": 0.04419634863734245, "learning_rate": 0.00014233313830505423, "loss": 0.2926, "step": 17751 }, { "epoch": 1.438107582631238, "grad_norm": 0.047092728316783905, "learning_rate": 0.0001423286376524596, "loss": 0.251, "step": 17752 }, { "epoch": 1.4381885936487362, "grad_norm": 0.0492546483874321, "learning_rate": 0.00014232413699986497, "loss": 0.2937, "step": 17753 }, { "epoch": 1.4382696046662347, "grad_norm": 0.04803192988038063, "learning_rate": 0.00014231963634727036, "loss": 0.2688, "step": 17754 }, { "epoch": 1.4383506156837331, "grad_norm": 0.05287334322929382, "learning_rate": 0.00014231513569467575, "loss": 0.3084, "step": 17755 }, { "epoch": 1.4384316267012314, "grad_norm": 0.04357105493545532, "learning_rate": 0.0001423106350420811, "loss": 0.2838, "step": 17756 }, { "epoch": 1.4385126377187296, "grad_norm": 0.03843948245048523, "learning_rate": 0.00014230613438948647, "loss": 0.2891, "step": 17757 }, { "epoch": 1.438593648736228, "grad_norm": 0.04217882826924324, "learning_rate": 0.00014230163373689185, "loss": 0.278, "step": 17758 }, { "epoch": 1.4386746597537265, "grad_norm": 0.04510761424899101, "learning_rate": 0.00014229713308429724, "loss": 0.2839, "step": 17759 }, { "epoch": 1.4387556707712248, "grad_norm": 0.04868185892701149, "learning_rate": 0.0001422926324317026, "loss": 0.3012, "step": 17760 }, { "epoch": 1.4388366817887233, "grad_norm": 0.04734332486987114, "learning_rate": 0.000142288131779108, "loss": 0.3107, "step": 17761 }, { "epoch": 1.4389176928062217, "grad_norm": 0.05641593411564827, "learning_rate": 0.00014228363112651335, "loss": 0.3275, "step": 17762 }, { "epoch": 1.43899870382372, "grad_norm": 0.04929044097661972, "learning_rate": 0.0001422791304739187, "loss": 0.3088, "step": 17763 }, { "epoch": 1.4390797148412184, "grad_norm": 0.055661290884017944, "learning_rate": 0.0001422746298213241, "loss": 0.2993, "step": 17764 }, { "epoch": 1.439160725858717, "grad_norm": 0.04595927894115448, "learning_rate": 0.00014227012916872948, "loss": 0.2959, "step": 17765 }, { "epoch": 1.4392417368762151, "grad_norm": 0.04587925225496292, "learning_rate": 0.00014226562851613484, "loss": 0.2938, "step": 17766 }, { "epoch": 1.4393227478937136, "grad_norm": 0.043285440653562546, "learning_rate": 0.00014226112786354023, "loss": 0.2862, "step": 17767 }, { "epoch": 1.4394037589112119, "grad_norm": 0.04577759653329849, "learning_rate": 0.0001422566272109456, "loss": 0.284, "step": 17768 }, { "epoch": 1.4394847699287103, "grad_norm": 0.05022251978516579, "learning_rate": 0.00014225212655835095, "loss": 0.2691, "step": 17769 }, { "epoch": 1.4395657809462086, "grad_norm": 0.04994913190603256, "learning_rate": 0.00014224762590575634, "loss": 0.347, "step": 17770 }, { "epoch": 1.439646791963707, "grad_norm": 0.04189305379986763, "learning_rate": 0.00014224312525316172, "loss": 0.2782, "step": 17771 }, { "epoch": 1.4397278029812055, "grad_norm": 0.05979640409350395, "learning_rate": 0.00014223862460056708, "loss": 0.275, "step": 17772 }, { "epoch": 1.4398088139987038, "grad_norm": 0.041147999465465546, "learning_rate": 0.00014223412394797247, "loss": 0.2508, "step": 17773 }, { "epoch": 1.4398898250162022, "grad_norm": 0.05074213445186615, "learning_rate": 0.00014222962329537783, "loss": 0.2646, "step": 17774 }, { "epoch": 1.4399708360337007, "grad_norm": 0.0547347255051136, "learning_rate": 0.0001422251226427832, "loss": 0.2874, "step": 17775 }, { "epoch": 1.440051847051199, "grad_norm": 0.04395623505115509, "learning_rate": 0.00014222062199018858, "loss": 0.2624, "step": 17776 }, { "epoch": 1.4401328580686974, "grad_norm": 0.05009927973151207, "learning_rate": 0.00014221612133759397, "loss": 0.3157, "step": 17777 }, { "epoch": 1.4402138690861956, "grad_norm": 0.05450264737010002, "learning_rate": 0.00014221162068499933, "loss": 0.3138, "step": 17778 }, { "epoch": 1.440294880103694, "grad_norm": 0.05897742137312889, "learning_rate": 0.0001422071200324047, "loss": 0.3221, "step": 17779 }, { "epoch": 1.4403758911211924, "grad_norm": 0.049845121800899506, "learning_rate": 0.00014220261937981007, "loss": 0.2763, "step": 17780 }, { "epoch": 1.4404569021386908, "grad_norm": 0.04709333926439285, "learning_rate": 0.00014219811872721543, "loss": 0.3036, "step": 17781 }, { "epoch": 1.4405379131561893, "grad_norm": 0.0525975339114666, "learning_rate": 0.00014219361807462082, "loss": 0.3123, "step": 17782 }, { "epoch": 1.4406189241736875, "grad_norm": 0.04980793967843056, "learning_rate": 0.0001421891174220262, "loss": 0.3106, "step": 17783 }, { "epoch": 1.440699935191186, "grad_norm": 0.04696754738688469, "learning_rate": 0.00014218461676943157, "loss": 0.3115, "step": 17784 }, { "epoch": 1.4407809462086845, "grad_norm": 0.046658169478178024, "learning_rate": 0.00014218011611683695, "loss": 0.2919, "step": 17785 }, { "epoch": 1.4408619572261827, "grad_norm": 0.058640480041503906, "learning_rate": 0.00014217561546424231, "loss": 0.3256, "step": 17786 }, { "epoch": 1.4409429682436812, "grad_norm": 0.04588627442717552, "learning_rate": 0.00014217111481164767, "loss": 0.2918, "step": 17787 }, { "epoch": 1.4410239792611796, "grad_norm": 0.04578271880745888, "learning_rate": 0.0001421666141590531, "loss": 0.2941, "step": 17788 }, { "epoch": 1.4411049902786779, "grad_norm": 0.05437834560871124, "learning_rate": 0.00014216211350645845, "loss": 0.3152, "step": 17789 }, { "epoch": 1.4411860012961764, "grad_norm": 0.05292617529630661, "learning_rate": 0.0001421576128538638, "loss": 0.3266, "step": 17790 }, { "epoch": 1.4412670123136746, "grad_norm": 0.04186420515179634, "learning_rate": 0.0001421531122012692, "loss": 0.2849, "step": 17791 }, { "epoch": 1.441348023331173, "grad_norm": 0.04509038105607033, "learning_rate": 0.00014214861154867456, "loss": 0.3231, "step": 17792 }, { "epoch": 1.4414290343486713, "grad_norm": 0.053367290645837784, "learning_rate": 0.00014214411089607992, "loss": 0.338, "step": 17793 }, { "epoch": 1.4415100453661698, "grad_norm": 0.04629664123058319, "learning_rate": 0.00014213961024348533, "loss": 0.2907, "step": 17794 }, { "epoch": 1.4415910563836682, "grad_norm": 0.04417245090007782, "learning_rate": 0.0001421351095908907, "loss": 0.2831, "step": 17795 }, { "epoch": 1.4416720674011665, "grad_norm": 0.04873376712203026, "learning_rate": 0.00014213060893829605, "loss": 0.2693, "step": 17796 }, { "epoch": 1.441753078418665, "grad_norm": 0.0454351082444191, "learning_rate": 0.00014212610828570144, "loss": 0.2783, "step": 17797 }, { "epoch": 1.4418340894361634, "grad_norm": 0.052286408841609955, "learning_rate": 0.0001421216076331068, "loss": 0.3376, "step": 17798 }, { "epoch": 1.4419151004536617, "grad_norm": 0.04228602349758148, "learning_rate": 0.00014211710698051216, "loss": 0.257, "step": 17799 }, { "epoch": 1.4419961114711601, "grad_norm": 0.05968720465898514, "learning_rate": 0.00014211260632791757, "loss": 0.3144, "step": 17800 }, { "epoch": 1.4420771224886584, "grad_norm": 0.05366584286093712, "learning_rate": 0.00014210810567532293, "loss": 0.3307, "step": 17801 }, { "epoch": 1.4421581335061568, "grad_norm": 0.05395115166902542, "learning_rate": 0.0001421036050227283, "loss": 0.3175, "step": 17802 }, { "epoch": 1.442239144523655, "grad_norm": 0.05399424210190773, "learning_rate": 0.00014209910437013368, "loss": 0.3105, "step": 17803 }, { "epoch": 1.4423201555411536, "grad_norm": 0.041070666164159775, "learning_rate": 0.00014209460371753904, "loss": 0.2223, "step": 17804 }, { "epoch": 1.442401166558652, "grad_norm": 0.05013779550790787, "learning_rate": 0.0001420901030649444, "loss": 0.3038, "step": 17805 }, { "epoch": 1.4424821775761503, "grad_norm": 0.04620078206062317, "learning_rate": 0.00014208560241234981, "loss": 0.2561, "step": 17806 }, { "epoch": 1.4425631885936487, "grad_norm": 0.050512149930000305, "learning_rate": 0.00014208110175975517, "loss": 0.2422, "step": 17807 }, { "epoch": 1.4426441996111472, "grad_norm": 0.041955072432756424, "learning_rate": 0.00014207660110716053, "loss": 0.3296, "step": 17808 }, { "epoch": 1.4427252106286454, "grad_norm": 0.043352846056222916, "learning_rate": 0.00014207210045456592, "loss": 0.2497, "step": 17809 }, { "epoch": 1.442806221646144, "grad_norm": 0.05135820806026459, "learning_rate": 0.00014206759980197128, "loss": 0.2877, "step": 17810 }, { "epoch": 1.4428872326636424, "grad_norm": 0.04289088025689125, "learning_rate": 0.00014206309914937667, "loss": 0.2951, "step": 17811 }, { "epoch": 1.4429682436811406, "grad_norm": 0.053548287600278854, "learning_rate": 0.00014205859849678206, "loss": 0.2862, "step": 17812 }, { "epoch": 1.443049254698639, "grad_norm": 0.04844128340482712, "learning_rate": 0.00014205409784418742, "loss": 0.2694, "step": 17813 }, { "epoch": 1.4431302657161373, "grad_norm": 0.059893593192100525, "learning_rate": 0.00014204959719159278, "loss": 0.3669, "step": 17814 }, { "epoch": 1.4432112767336358, "grad_norm": 0.04999880865216255, "learning_rate": 0.00014204509653899816, "loss": 0.2994, "step": 17815 }, { "epoch": 1.443292287751134, "grad_norm": 0.054112501442432404, "learning_rate": 0.00014204059588640352, "loss": 0.3448, "step": 17816 }, { "epoch": 1.4433732987686325, "grad_norm": 0.057990577071905136, "learning_rate": 0.0001420360952338089, "loss": 0.3429, "step": 17817 }, { "epoch": 1.443454309786131, "grad_norm": 0.047677554190158844, "learning_rate": 0.0001420315945812143, "loss": 0.3265, "step": 17818 }, { "epoch": 1.4435353208036292, "grad_norm": 0.04634621739387512, "learning_rate": 0.00014202709392861966, "loss": 0.3059, "step": 17819 }, { "epoch": 1.4436163318211277, "grad_norm": 0.05056798458099365, "learning_rate": 0.00014202259327602502, "loss": 0.3319, "step": 17820 }, { "epoch": 1.4436973428386262, "grad_norm": 0.04358064383268356, "learning_rate": 0.0001420180926234304, "loss": 0.3111, "step": 17821 }, { "epoch": 1.4437783538561244, "grad_norm": 0.05352209135890007, "learning_rate": 0.00014201359197083576, "loss": 0.2741, "step": 17822 }, { "epoch": 1.4438593648736229, "grad_norm": 0.05255879834294319, "learning_rate": 0.00014200909131824115, "loss": 0.3207, "step": 17823 }, { "epoch": 1.4439403758911211, "grad_norm": 0.048375453799963, "learning_rate": 0.00014200459066564654, "loss": 0.2694, "step": 17824 }, { "epoch": 1.4440213869086196, "grad_norm": 0.04612603783607483, "learning_rate": 0.0001420000900130519, "loss": 0.2852, "step": 17825 }, { "epoch": 1.4441023979261178, "grad_norm": 0.041807424277067184, "learning_rate": 0.00014199558936045726, "loss": 0.3001, "step": 17826 }, { "epoch": 1.4441834089436163, "grad_norm": 0.04739182069897652, "learning_rate": 0.00014199108870786265, "loss": 0.337, "step": 17827 }, { "epoch": 1.4442644199611148, "grad_norm": 0.04741252213716507, "learning_rate": 0.000141986588055268, "loss": 0.2582, "step": 17828 }, { "epoch": 1.444345430978613, "grad_norm": 0.05981162562966347, "learning_rate": 0.0001419820874026734, "loss": 0.3234, "step": 17829 }, { "epoch": 1.4444264419961115, "grad_norm": 0.04911743104457855, "learning_rate": 0.00014197758675007878, "loss": 0.3054, "step": 17830 }, { "epoch": 1.44450745301361, "grad_norm": 0.04073718190193176, "learning_rate": 0.00014197308609748414, "loss": 0.2575, "step": 17831 }, { "epoch": 1.4445884640311082, "grad_norm": 0.06219786778092384, "learning_rate": 0.0001419685854448895, "loss": 0.3347, "step": 17832 }, { "epoch": 1.4446694750486067, "grad_norm": 0.054439812898635864, "learning_rate": 0.0001419640847922949, "loss": 0.2808, "step": 17833 }, { "epoch": 1.4447504860661051, "grad_norm": 0.05290693789720535, "learning_rate": 0.00014195958413970025, "loss": 0.2597, "step": 17834 }, { "epoch": 1.4448314970836034, "grad_norm": 0.04396319016814232, "learning_rate": 0.00014195508348710563, "loss": 0.287, "step": 17835 }, { "epoch": 1.4449125081011016, "grad_norm": 0.04777374863624573, "learning_rate": 0.00014195058283451102, "loss": 0.3194, "step": 17836 }, { "epoch": 1.4449935191186, "grad_norm": 0.041372284293174744, "learning_rate": 0.00014194608218191638, "loss": 0.2489, "step": 17837 }, { "epoch": 1.4450745301360985, "grad_norm": 0.048189952969551086, "learning_rate": 0.00014194158152932174, "loss": 0.2694, "step": 17838 }, { "epoch": 1.4451555411535968, "grad_norm": 0.04953427240252495, "learning_rate": 0.00014193708087672713, "loss": 0.3139, "step": 17839 }, { "epoch": 1.4452365521710953, "grad_norm": 0.0434766449034214, "learning_rate": 0.00014193258022413252, "loss": 0.3034, "step": 17840 }, { "epoch": 1.4453175631885937, "grad_norm": 0.0509701706469059, "learning_rate": 0.00014192807957153788, "loss": 0.2699, "step": 17841 }, { "epoch": 1.445398574206092, "grad_norm": 0.044415779411792755, "learning_rate": 0.00014192357891894326, "loss": 0.2832, "step": 17842 }, { "epoch": 1.4454795852235904, "grad_norm": 0.048741765320301056, "learning_rate": 0.00014191907826634862, "loss": 0.2821, "step": 17843 }, { "epoch": 1.445560596241089, "grad_norm": 0.043712157756090164, "learning_rate": 0.00014191457761375398, "loss": 0.286, "step": 17844 }, { "epoch": 1.4456416072585871, "grad_norm": 0.051524609327316284, "learning_rate": 0.00014191007696115937, "loss": 0.3056, "step": 17845 }, { "epoch": 1.4457226182760856, "grad_norm": 0.051667921245098114, "learning_rate": 0.00014190557630856476, "loss": 0.278, "step": 17846 }, { "epoch": 1.4458036292935839, "grad_norm": 0.05290085822343826, "learning_rate": 0.00014190107565597012, "loss": 0.2713, "step": 17847 }, { "epoch": 1.4458846403110823, "grad_norm": 0.04937123507261276, "learning_rate": 0.0001418965750033755, "loss": 0.2541, "step": 17848 }, { "epoch": 1.4459656513285806, "grad_norm": 0.04817288741469383, "learning_rate": 0.00014189207435078087, "loss": 0.2999, "step": 17849 }, { "epoch": 1.446046662346079, "grad_norm": 0.061793092638254166, "learning_rate": 0.00014188757369818623, "loss": 0.3782, "step": 17850 }, { "epoch": 1.4461276733635775, "grad_norm": 0.04492030665278435, "learning_rate": 0.0001418830730455916, "loss": 0.2565, "step": 17851 }, { "epoch": 1.4462086843810757, "grad_norm": 0.04402526840567589, "learning_rate": 0.000141878572392997, "loss": 0.2974, "step": 17852 }, { "epoch": 1.4462896953985742, "grad_norm": 0.04675083979964256, "learning_rate": 0.00014187407174040236, "loss": 0.2617, "step": 17853 }, { "epoch": 1.4463707064160727, "grad_norm": 0.046562857925891876, "learning_rate": 0.00014186957108780775, "loss": 0.2916, "step": 17854 }, { "epoch": 1.446451717433571, "grad_norm": 0.05099884793162346, "learning_rate": 0.0001418650704352131, "loss": 0.3285, "step": 17855 }, { "epoch": 1.4465327284510694, "grad_norm": 0.04533091560006142, "learning_rate": 0.00014186056978261847, "loss": 0.2499, "step": 17856 }, { "epoch": 1.4466137394685679, "grad_norm": 0.053434841334819794, "learning_rate": 0.00014185606913002385, "loss": 0.2926, "step": 17857 }, { "epoch": 1.446694750486066, "grad_norm": 0.04376845434308052, "learning_rate": 0.00014185156847742924, "loss": 0.2999, "step": 17858 }, { "epoch": 1.4467757615035644, "grad_norm": 0.046161361038684845, "learning_rate": 0.0001418470678248346, "loss": 0.2671, "step": 17859 }, { "epoch": 1.4468567725210628, "grad_norm": 0.0418320931494236, "learning_rate": 0.00014184256717224, "loss": 0.2765, "step": 17860 }, { "epoch": 1.4469377835385613, "grad_norm": 0.04614844545722008, "learning_rate": 0.00014183806651964535, "loss": 0.2622, "step": 17861 }, { "epoch": 1.4470187945560595, "grad_norm": 0.04278077557682991, "learning_rate": 0.0001418335658670507, "loss": 0.2763, "step": 17862 }, { "epoch": 1.447099805573558, "grad_norm": 0.051653698086738586, "learning_rate": 0.00014182906521445612, "loss": 0.289, "step": 17863 }, { "epoch": 1.4471808165910565, "grad_norm": 0.04692598059773445, "learning_rate": 0.00014182456456186148, "loss": 0.2912, "step": 17864 }, { "epoch": 1.4472618276085547, "grad_norm": 0.05240803584456444, "learning_rate": 0.00014182006390926684, "loss": 0.3039, "step": 17865 }, { "epoch": 1.4473428386260532, "grad_norm": 0.04631833732128143, "learning_rate": 0.00014181556325667223, "loss": 0.2821, "step": 17866 }, { "epoch": 1.4474238496435516, "grad_norm": 0.05302568897604942, "learning_rate": 0.0001418110626040776, "loss": 0.3315, "step": 17867 }, { "epoch": 1.4475048606610499, "grad_norm": 0.0616886280477047, "learning_rate": 0.00014180656195148295, "loss": 0.3201, "step": 17868 }, { "epoch": 1.4475858716785484, "grad_norm": 0.04690217599272728, "learning_rate": 0.00014180206129888836, "loss": 0.2642, "step": 17869 }, { "epoch": 1.4476668826960466, "grad_norm": 0.059810154139995575, "learning_rate": 0.00014179756064629372, "loss": 0.3053, "step": 17870 }, { "epoch": 1.447747893713545, "grad_norm": 0.04747282341122627, "learning_rate": 0.00014179305999369908, "loss": 0.3423, "step": 17871 }, { "epoch": 1.4478289047310433, "grad_norm": 0.05117661878466606, "learning_rate": 0.00014178855934110447, "loss": 0.3011, "step": 17872 }, { "epoch": 1.4479099157485418, "grad_norm": 0.045453134924173355, "learning_rate": 0.00014178405868850983, "loss": 0.2932, "step": 17873 }, { "epoch": 1.4479909267660402, "grad_norm": 0.04899902641773224, "learning_rate": 0.00014177955803591522, "loss": 0.3111, "step": 17874 }, { "epoch": 1.4480719377835385, "grad_norm": 0.04835022613406181, "learning_rate": 0.0001417750573833206, "loss": 0.2965, "step": 17875 }, { "epoch": 1.448152948801037, "grad_norm": 0.04425729438662529, "learning_rate": 0.00014177055673072597, "loss": 0.2807, "step": 17876 }, { "epoch": 1.4482339598185354, "grad_norm": 0.04642229899764061, "learning_rate": 0.00014176605607813133, "loss": 0.2362, "step": 17877 }, { "epoch": 1.4483149708360337, "grad_norm": 0.04590180143713951, "learning_rate": 0.0001417615554255367, "loss": 0.2883, "step": 17878 }, { "epoch": 1.4483959818535321, "grad_norm": 0.046345289796590805, "learning_rate": 0.00014175705477294207, "loss": 0.2911, "step": 17879 }, { "epoch": 1.4484769928710304, "grad_norm": 0.05325405299663544, "learning_rate": 0.00014175255412034746, "loss": 0.2957, "step": 17880 }, { "epoch": 1.4485580038885288, "grad_norm": 0.05683707445859909, "learning_rate": 0.00014174805346775285, "loss": 0.2585, "step": 17881 }, { "epoch": 1.448639014906027, "grad_norm": 0.04651009663939476, "learning_rate": 0.0001417435528151582, "loss": 0.2747, "step": 17882 }, { "epoch": 1.4487200259235256, "grad_norm": 0.04978650435805321, "learning_rate": 0.00014173905216256357, "loss": 0.2927, "step": 17883 }, { "epoch": 1.448801036941024, "grad_norm": 0.05127064511179924, "learning_rate": 0.00014173455150996895, "loss": 0.3347, "step": 17884 }, { "epoch": 1.4488820479585223, "grad_norm": 0.04449443891644478, "learning_rate": 0.00014173005085737431, "loss": 0.2603, "step": 17885 }, { "epoch": 1.4489630589760207, "grad_norm": 0.051162779331207275, "learning_rate": 0.0001417255502047797, "loss": 0.3022, "step": 17886 }, { "epoch": 1.4490440699935192, "grad_norm": 0.04203864932060242, "learning_rate": 0.0001417210495521851, "loss": 0.2839, "step": 17887 }, { "epoch": 1.4491250810110174, "grad_norm": 0.05202589929103851, "learning_rate": 0.00014171654889959045, "loss": 0.2748, "step": 17888 }, { "epoch": 1.449206092028516, "grad_norm": 0.05412520840764046, "learning_rate": 0.0001417120482469958, "loss": 0.3208, "step": 17889 }, { "epoch": 1.4492871030460144, "grad_norm": 0.040271785110235214, "learning_rate": 0.0001417075475944012, "loss": 0.2777, "step": 17890 }, { "epoch": 1.4493681140635126, "grad_norm": 0.04579610377550125, "learning_rate": 0.00014170304694180656, "loss": 0.293, "step": 17891 }, { "epoch": 1.449449125081011, "grad_norm": 0.05482166260480881, "learning_rate": 0.00014169854628921194, "loss": 0.3704, "step": 17892 }, { "epoch": 1.4495301360985093, "grad_norm": 0.05623460188508034, "learning_rate": 0.00014169404563661733, "loss": 0.3182, "step": 17893 }, { "epoch": 1.4496111471160078, "grad_norm": 0.04352636635303497, "learning_rate": 0.0001416895449840227, "loss": 0.2911, "step": 17894 }, { "epoch": 1.449692158133506, "grad_norm": 0.04971605911850929, "learning_rate": 0.00014168504433142805, "loss": 0.3116, "step": 17895 }, { "epoch": 1.4497731691510045, "grad_norm": 0.04764750599861145, "learning_rate": 0.00014168054367883344, "loss": 0.2911, "step": 17896 }, { "epoch": 1.449854180168503, "grad_norm": 0.0546216182410717, "learning_rate": 0.0001416760430262388, "loss": 0.3103, "step": 17897 }, { "epoch": 1.4499351911860012, "grad_norm": 0.046539004892110825, "learning_rate": 0.00014167154237364419, "loss": 0.2938, "step": 17898 }, { "epoch": 1.4500162022034997, "grad_norm": 0.043932583183050156, "learning_rate": 0.00014166704172104957, "loss": 0.2989, "step": 17899 }, { "epoch": 1.4500972132209982, "grad_norm": 0.0462028943002224, "learning_rate": 0.00014166254106845493, "loss": 0.2953, "step": 17900 }, { "epoch": 1.4501782242384964, "grad_norm": 0.04697510227560997, "learning_rate": 0.0001416580404158603, "loss": 0.3176, "step": 17901 }, { "epoch": 1.4502592352559949, "grad_norm": 0.04830094054341316, "learning_rate": 0.00014165353976326568, "loss": 0.2974, "step": 17902 }, { "epoch": 1.4503402462734931, "grad_norm": 0.05398035794496536, "learning_rate": 0.00014164903911067104, "loss": 0.3185, "step": 17903 }, { "epoch": 1.4504212572909916, "grad_norm": 0.04325325787067413, "learning_rate": 0.00014164453845807643, "loss": 0.2901, "step": 17904 }, { "epoch": 1.4505022683084898, "grad_norm": 0.05543939396739006, "learning_rate": 0.00014164003780548181, "loss": 0.299, "step": 17905 }, { "epoch": 1.4505832793259883, "grad_norm": 0.046884406358003616, "learning_rate": 0.00014163553715288717, "loss": 0.3183, "step": 17906 }, { "epoch": 1.4506642903434868, "grad_norm": 0.04522902891039848, "learning_rate": 0.00014163103650029253, "loss": 0.3069, "step": 17907 }, { "epoch": 1.450745301360985, "grad_norm": 0.04732300341129303, "learning_rate": 0.00014162653584769792, "loss": 0.2952, "step": 17908 }, { "epoch": 1.4508263123784835, "grad_norm": 0.04480345919728279, "learning_rate": 0.00014162203519510328, "loss": 0.2469, "step": 17909 }, { "epoch": 1.450907323395982, "grad_norm": 0.04138052836060524, "learning_rate": 0.00014161753454250867, "loss": 0.259, "step": 17910 }, { "epoch": 1.4509883344134802, "grad_norm": 0.04558480530977249, "learning_rate": 0.00014161303388991406, "loss": 0.2778, "step": 17911 }, { "epoch": 1.4510693454309787, "grad_norm": 0.04157237336039543, "learning_rate": 0.00014160853323731942, "loss": 0.2622, "step": 17912 }, { "epoch": 1.4511503564484771, "grad_norm": 0.04621091112494469, "learning_rate": 0.00014160403258472478, "loss": 0.3095, "step": 17913 }, { "epoch": 1.4512313674659754, "grad_norm": 0.04249318689107895, "learning_rate": 0.00014159953193213016, "loss": 0.2948, "step": 17914 }, { "epoch": 1.4513123784834738, "grad_norm": 0.05524726212024689, "learning_rate": 0.00014159503127953555, "loss": 0.3546, "step": 17915 }, { "epoch": 1.451393389500972, "grad_norm": 0.05138620734214783, "learning_rate": 0.0001415905306269409, "loss": 0.2997, "step": 17916 }, { "epoch": 1.4514744005184705, "grad_norm": 0.04627459496259689, "learning_rate": 0.0001415860299743463, "loss": 0.269, "step": 17917 }, { "epoch": 1.4515554115359688, "grad_norm": 0.042437851428985596, "learning_rate": 0.00014158152932175166, "loss": 0.3065, "step": 17918 }, { "epoch": 1.4516364225534673, "grad_norm": 0.048960473388433456, "learning_rate": 0.00014157702866915702, "loss": 0.3007, "step": 17919 }, { "epoch": 1.4517174335709657, "grad_norm": 0.04249783605337143, "learning_rate": 0.0001415725280165624, "loss": 0.2606, "step": 17920 }, { "epoch": 1.451798444588464, "grad_norm": 0.0489497147500515, "learning_rate": 0.0001415680273639678, "loss": 0.3152, "step": 17921 }, { "epoch": 1.4518794556059624, "grad_norm": 0.04541374370455742, "learning_rate": 0.00014156352671137315, "loss": 0.3066, "step": 17922 }, { "epoch": 1.451960466623461, "grad_norm": 0.05575599893927574, "learning_rate": 0.00014155902605877854, "loss": 0.2829, "step": 17923 }, { "epoch": 1.4520414776409591, "grad_norm": 0.05299101397395134, "learning_rate": 0.0001415545254061839, "loss": 0.3202, "step": 17924 }, { "epoch": 1.4521224886584576, "grad_norm": 0.03805427998304367, "learning_rate": 0.00014155002475358926, "loss": 0.247, "step": 17925 }, { "epoch": 1.4522034996759559, "grad_norm": 0.047305621206760406, "learning_rate": 0.00014154552410099465, "loss": 0.2711, "step": 17926 }, { "epoch": 1.4522845106934543, "grad_norm": 0.05216123163700104, "learning_rate": 0.00014154102344840003, "loss": 0.2862, "step": 17927 }, { "epoch": 1.4523655217109526, "grad_norm": 0.04718941077589989, "learning_rate": 0.0001415365227958054, "loss": 0.33, "step": 17928 }, { "epoch": 1.452446532728451, "grad_norm": 0.04569919407367706, "learning_rate": 0.00014153202214321078, "loss": 0.2696, "step": 17929 }, { "epoch": 1.4525275437459495, "grad_norm": 0.03612606227397919, "learning_rate": 0.00014152752149061614, "loss": 0.2425, "step": 17930 }, { "epoch": 1.4526085547634477, "grad_norm": 0.05211987718939781, "learning_rate": 0.0001415230208380215, "loss": 0.2915, "step": 17931 }, { "epoch": 1.4526895657809462, "grad_norm": 0.04864644259214401, "learning_rate": 0.0001415185201854269, "loss": 0.2833, "step": 17932 }, { "epoch": 1.4527705767984447, "grad_norm": 0.05485297366976738, "learning_rate": 0.00014151401953283228, "loss": 0.2679, "step": 17933 }, { "epoch": 1.452851587815943, "grad_norm": 0.05420330911874771, "learning_rate": 0.00014150951888023764, "loss": 0.3259, "step": 17934 }, { "epoch": 1.4529325988334414, "grad_norm": 0.04696902632713318, "learning_rate": 0.00014150501822764302, "loss": 0.2796, "step": 17935 }, { "epoch": 1.4530136098509399, "grad_norm": 0.05307691916823387, "learning_rate": 0.00014150051757504838, "loss": 0.2913, "step": 17936 }, { "epoch": 1.453094620868438, "grad_norm": 0.05445937439799309, "learning_rate": 0.00014149601692245374, "loss": 0.2901, "step": 17937 }, { "epoch": 1.4531756318859366, "grad_norm": 0.0543481819331646, "learning_rate": 0.00014149151626985913, "loss": 0.3006, "step": 17938 }, { "epoch": 1.4532566429034348, "grad_norm": 0.0560697577893734, "learning_rate": 0.00014148701561726452, "loss": 0.2659, "step": 17939 }, { "epoch": 1.4533376539209333, "grad_norm": 0.04972623661160469, "learning_rate": 0.00014148251496466988, "loss": 0.2573, "step": 17940 }, { "epoch": 1.4534186649384315, "grad_norm": 0.05344128608703613, "learning_rate": 0.00014147801431207526, "loss": 0.2826, "step": 17941 }, { "epoch": 1.45349967595593, "grad_norm": 0.04564177617430687, "learning_rate": 0.00014147351365948062, "loss": 0.287, "step": 17942 }, { "epoch": 1.4535806869734285, "grad_norm": 0.04605825990438461, "learning_rate": 0.000141469013006886, "loss": 0.2749, "step": 17943 }, { "epoch": 1.4536616979909267, "grad_norm": 0.04860905557870865, "learning_rate": 0.0001414645123542914, "loss": 0.3092, "step": 17944 }, { "epoch": 1.4537427090084252, "grad_norm": 0.048022761940956116, "learning_rate": 0.00014146001170169676, "loss": 0.2806, "step": 17945 }, { "epoch": 1.4538237200259236, "grad_norm": 0.05513114109635353, "learning_rate": 0.00014145551104910212, "loss": 0.2919, "step": 17946 }, { "epoch": 1.4539047310434219, "grad_norm": 0.04558014124631882, "learning_rate": 0.0001414510103965075, "loss": 0.2748, "step": 17947 }, { "epoch": 1.4539857420609203, "grad_norm": 0.05039425566792488, "learning_rate": 0.00014144650974391287, "loss": 0.2663, "step": 17948 }, { "epoch": 1.4540667530784186, "grad_norm": 0.03847365826368332, "learning_rate": 0.00014144200909131825, "loss": 0.2411, "step": 17949 }, { "epoch": 1.454147764095917, "grad_norm": 0.04872952029109001, "learning_rate": 0.00014143750843872364, "loss": 0.3062, "step": 17950 }, { "epoch": 1.4542287751134153, "grad_norm": 0.04281361401081085, "learning_rate": 0.000141433007786129, "loss": 0.285, "step": 17951 }, { "epoch": 1.4543097861309138, "grad_norm": 0.04651939123868942, "learning_rate": 0.00014142850713353436, "loss": 0.2774, "step": 17952 }, { "epoch": 1.4543907971484122, "grad_norm": 0.04647616669535637, "learning_rate": 0.00014142400648093975, "loss": 0.2742, "step": 17953 }, { "epoch": 1.4544718081659105, "grad_norm": 0.05316271260380745, "learning_rate": 0.0001414195058283451, "loss": 0.3138, "step": 17954 }, { "epoch": 1.454552819183409, "grad_norm": 0.048011794686317444, "learning_rate": 0.0001414150051757505, "loss": 0.3148, "step": 17955 }, { "epoch": 1.4546338302009074, "grad_norm": 0.046368878334760666, "learning_rate": 0.00014141050452315588, "loss": 0.3113, "step": 17956 }, { "epoch": 1.4547148412184057, "grad_norm": 0.05619870498776436, "learning_rate": 0.00014140600387056124, "loss": 0.2799, "step": 17957 }, { "epoch": 1.4547958522359041, "grad_norm": 0.04389476403594017, "learning_rate": 0.0001414015032179666, "loss": 0.2668, "step": 17958 }, { "epoch": 1.4548768632534026, "grad_norm": 0.0636439397931099, "learning_rate": 0.000141397002565372, "loss": 0.3739, "step": 17959 }, { "epoch": 1.4549578742709008, "grad_norm": 0.04439515620470047, "learning_rate": 0.00014139250191277735, "loss": 0.2836, "step": 17960 }, { "epoch": 1.455038885288399, "grad_norm": 0.05382426083087921, "learning_rate": 0.00014138800126018274, "loss": 0.2935, "step": 17961 }, { "epoch": 1.4551198963058976, "grad_norm": 0.04022745415568352, "learning_rate": 0.00014138350060758812, "loss": 0.2283, "step": 17962 }, { "epoch": 1.455200907323396, "grad_norm": 0.047797948122024536, "learning_rate": 0.00014137899995499348, "loss": 0.2851, "step": 17963 }, { "epoch": 1.4552819183408943, "grad_norm": 0.0559130422770977, "learning_rate": 0.00014137449930239884, "loss": 0.3301, "step": 17964 }, { "epoch": 1.4553629293583927, "grad_norm": 0.05886486545205116, "learning_rate": 0.00014136999864980423, "loss": 0.3114, "step": 17965 }, { "epoch": 1.4554439403758912, "grad_norm": 0.04601144418120384, "learning_rate": 0.0001413654979972096, "loss": 0.2874, "step": 17966 }, { "epoch": 1.4555249513933894, "grad_norm": 0.05067246034741402, "learning_rate": 0.00014136099734461498, "loss": 0.3187, "step": 17967 }, { "epoch": 1.455605962410888, "grad_norm": 0.04644129052758217, "learning_rate": 0.00014135649669202036, "loss": 0.2851, "step": 17968 }, { "epoch": 1.4556869734283864, "grad_norm": 0.04704859480261803, "learning_rate": 0.00014135199603942572, "loss": 0.3031, "step": 17969 }, { "epoch": 1.4557679844458846, "grad_norm": 0.04019991680979729, "learning_rate": 0.00014134749538683108, "loss": 0.293, "step": 17970 }, { "epoch": 1.455848995463383, "grad_norm": 0.04760279878973961, "learning_rate": 0.00014134299473423647, "loss": 0.3085, "step": 17971 }, { "epoch": 1.4559300064808813, "grad_norm": 0.03884115442633629, "learning_rate": 0.00014133849408164183, "loss": 0.2446, "step": 17972 }, { "epoch": 1.4560110174983798, "grad_norm": 0.04347531870007515, "learning_rate": 0.00014133399342904722, "loss": 0.2866, "step": 17973 }, { "epoch": 1.456092028515878, "grad_norm": 0.045292872935533524, "learning_rate": 0.0001413294927764526, "loss": 0.305, "step": 17974 }, { "epoch": 1.4561730395333765, "grad_norm": 0.04531371593475342, "learning_rate": 0.00014132499212385797, "loss": 0.3083, "step": 17975 }, { "epoch": 1.456254050550875, "grad_norm": 0.05037960782647133, "learning_rate": 0.00014132049147126333, "loss": 0.3167, "step": 17976 }, { "epoch": 1.4563350615683732, "grad_norm": 0.04448464885354042, "learning_rate": 0.00014131599081866871, "loss": 0.2932, "step": 17977 }, { "epoch": 1.4564160725858717, "grad_norm": 0.06180466711521149, "learning_rate": 0.00014131149016607407, "loss": 0.3675, "step": 17978 }, { "epoch": 1.4564970836033702, "grad_norm": 0.04839387536048889, "learning_rate": 0.00014130698951347946, "loss": 0.2794, "step": 17979 }, { "epoch": 1.4565780946208684, "grad_norm": 0.04605800285935402, "learning_rate": 0.00014130248886088485, "loss": 0.2985, "step": 17980 }, { "epoch": 1.4566591056383669, "grad_norm": 0.04858919233083725, "learning_rate": 0.0001412979882082902, "loss": 0.3068, "step": 17981 }, { "epoch": 1.4567401166558653, "grad_norm": 0.0511137954890728, "learning_rate": 0.00014129348755569557, "loss": 0.3077, "step": 17982 }, { "epoch": 1.4568211276733636, "grad_norm": 0.040559422224760056, "learning_rate": 0.00014128898690310096, "loss": 0.2673, "step": 17983 }, { "epoch": 1.4569021386908618, "grad_norm": 0.05546125769615173, "learning_rate": 0.00014128448625050632, "loss": 0.2827, "step": 17984 }, { "epoch": 1.4569831497083603, "grad_norm": 0.047785788774490356, "learning_rate": 0.0001412799855979117, "loss": 0.2556, "step": 17985 }, { "epoch": 1.4570641607258588, "grad_norm": 0.04613126814365387, "learning_rate": 0.0001412754849453171, "loss": 0.2681, "step": 17986 }, { "epoch": 1.457145171743357, "grad_norm": 0.053333960473537445, "learning_rate": 0.00014127098429272245, "loss": 0.3264, "step": 17987 }, { "epoch": 1.4572261827608555, "grad_norm": 0.0531015619635582, "learning_rate": 0.0001412664836401278, "loss": 0.3412, "step": 17988 }, { "epoch": 1.457307193778354, "grad_norm": 0.04451011121273041, "learning_rate": 0.0001412619829875332, "loss": 0.282, "step": 17989 }, { "epoch": 1.4573882047958522, "grad_norm": 0.04917840659618378, "learning_rate": 0.00014125748233493856, "loss": 0.2605, "step": 17990 }, { "epoch": 1.4574692158133506, "grad_norm": 0.04786407947540283, "learning_rate": 0.00014125298168234394, "loss": 0.2776, "step": 17991 }, { "epoch": 1.4575502268308491, "grad_norm": 0.04443423077464104, "learning_rate": 0.00014124848102974933, "loss": 0.2901, "step": 17992 }, { "epoch": 1.4576312378483474, "grad_norm": 0.05426434054970741, "learning_rate": 0.0001412439803771547, "loss": 0.3013, "step": 17993 }, { "epoch": 1.4577122488658458, "grad_norm": 0.05337275192141533, "learning_rate": 0.00014123947972456005, "loss": 0.2869, "step": 17994 }, { "epoch": 1.457793259883344, "grad_norm": 0.04778950288891792, "learning_rate": 0.00014123497907196544, "loss": 0.2652, "step": 17995 }, { "epoch": 1.4578742709008425, "grad_norm": 0.05062709003686905, "learning_rate": 0.00014123047841937083, "loss": 0.329, "step": 17996 }, { "epoch": 1.4579552819183408, "grad_norm": 0.045372236520051956, "learning_rate": 0.00014122597776677619, "loss": 0.2665, "step": 17997 }, { "epoch": 1.4580362929358393, "grad_norm": 0.058160923421382904, "learning_rate": 0.00014122147711418157, "loss": 0.3171, "step": 17998 }, { "epoch": 1.4581173039533377, "grad_norm": 0.052749160677194595, "learning_rate": 0.00014121697646158693, "loss": 0.2994, "step": 17999 }, { "epoch": 1.458198314970836, "grad_norm": 0.04014609754085541, "learning_rate": 0.0001412124758089923, "loss": 0.2543, "step": 18000 }, { "epoch": 1.4582793259883344, "grad_norm": 0.050106558948755264, "learning_rate": 0.00014120797515639768, "loss": 0.3141, "step": 18001 }, { "epoch": 1.458360337005833, "grad_norm": 0.061030786484479904, "learning_rate": 0.00014120347450380307, "loss": 0.2815, "step": 18002 }, { "epoch": 1.4584413480233311, "grad_norm": 0.047040242701768875, "learning_rate": 0.00014119897385120843, "loss": 0.2907, "step": 18003 }, { "epoch": 1.4585223590408296, "grad_norm": 0.06359859555959702, "learning_rate": 0.00014119447319861381, "loss": 0.34, "step": 18004 }, { "epoch": 1.4586033700583279, "grad_norm": 0.04778430610895157, "learning_rate": 0.00014118997254601917, "loss": 0.3083, "step": 18005 }, { "epoch": 1.4586843810758263, "grad_norm": 0.04784518852829933, "learning_rate": 0.00014118547189342453, "loss": 0.2844, "step": 18006 }, { "epoch": 1.4587653920933246, "grad_norm": 0.04674403741955757, "learning_rate": 0.00014118097124082992, "loss": 0.3182, "step": 18007 }, { "epoch": 1.458846403110823, "grad_norm": 0.04453251138329506, "learning_rate": 0.0001411764705882353, "loss": 0.2985, "step": 18008 }, { "epoch": 1.4589274141283215, "grad_norm": 0.06165555119514465, "learning_rate": 0.00014117196993564067, "loss": 0.3341, "step": 18009 }, { "epoch": 1.4590084251458197, "grad_norm": 0.04634040594100952, "learning_rate": 0.00014116746928304606, "loss": 0.3042, "step": 18010 }, { "epoch": 1.4590894361633182, "grad_norm": 0.04452341049909592, "learning_rate": 0.00014116296863045142, "loss": 0.3252, "step": 18011 }, { "epoch": 1.4591704471808167, "grad_norm": 0.041508760303258896, "learning_rate": 0.0001411584679778568, "loss": 0.2849, "step": 18012 }, { "epoch": 1.459251458198315, "grad_norm": 0.05098029226064682, "learning_rate": 0.00014115396732526216, "loss": 0.2989, "step": 18013 }, { "epoch": 1.4593324692158134, "grad_norm": 0.04109809920191765, "learning_rate": 0.00014114946667266755, "loss": 0.2533, "step": 18014 }, { "epoch": 1.4594134802333119, "grad_norm": 0.043970998376607895, "learning_rate": 0.0001411449660200729, "loss": 0.2887, "step": 18015 }, { "epoch": 1.45949449125081, "grad_norm": 0.043616171926259995, "learning_rate": 0.0001411404653674783, "loss": 0.2799, "step": 18016 }, { "epoch": 1.4595755022683086, "grad_norm": 0.046028878539800644, "learning_rate": 0.00014113596471488366, "loss": 0.2528, "step": 18017 }, { "epoch": 1.4596565132858068, "grad_norm": 0.037560880184173584, "learning_rate": 0.00014113146406228904, "loss": 0.2436, "step": 18018 }, { "epoch": 1.4597375243033053, "grad_norm": 0.059593748301267624, "learning_rate": 0.0001411269634096944, "loss": 0.3001, "step": 18019 }, { "epoch": 1.4598185353208035, "grad_norm": 0.046373482793569565, "learning_rate": 0.0001411224627570998, "loss": 0.232, "step": 18020 }, { "epoch": 1.459899546338302, "grad_norm": 0.05016429349780083, "learning_rate": 0.00014111796210450515, "loss": 0.2768, "step": 18021 }, { "epoch": 1.4599805573558005, "grad_norm": 0.05345659330487251, "learning_rate": 0.00014111346145191054, "loss": 0.2539, "step": 18022 }, { "epoch": 1.4600615683732987, "grad_norm": 0.05080629140138626, "learning_rate": 0.0001411089607993159, "loss": 0.2969, "step": 18023 }, { "epoch": 1.4601425793907972, "grad_norm": 0.049367163330316544, "learning_rate": 0.0001411044601467213, "loss": 0.2725, "step": 18024 }, { "epoch": 1.4602235904082956, "grad_norm": 0.048777006566524506, "learning_rate": 0.00014109995949412667, "loss": 0.2831, "step": 18025 }, { "epoch": 1.4603046014257939, "grad_norm": 0.05571383982896805, "learning_rate": 0.00014109545884153203, "loss": 0.3142, "step": 18026 }, { "epoch": 1.4603856124432923, "grad_norm": 0.051831867545843124, "learning_rate": 0.0001410909581889374, "loss": 0.2662, "step": 18027 }, { "epoch": 1.4604666234607906, "grad_norm": 0.0479249432682991, "learning_rate": 0.00014108645753634278, "loss": 0.279, "step": 18028 }, { "epoch": 1.460547634478289, "grad_norm": 0.05813343822956085, "learning_rate": 0.00014108195688374814, "loss": 0.3218, "step": 18029 }, { "epoch": 1.4606286454957873, "grad_norm": 0.06661345064640045, "learning_rate": 0.00014107745623115353, "loss": 0.3647, "step": 18030 }, { "epoch": 1.4607096565132858, "grad_norm": 0.048136595636606216, "learning_rate": 0.00014107295557855892, "loss": 0.3022, "step": 18031 }, { "epoch": 1.4607906675307842, "grad_norm": 0.046726103872060776, "learning_rate": 0.00014106845492596428, "loss": 0.3248, "step": 18032 }, { "epoch": 1.4608716785482825, "grad_norm": 0.04466182738542557, "learning_rate": 0.00014106395427336964, "loss": 0.2979, "step": 18033 }, { "epoch": 1.460952689565781, "grad_norm": 0.04451071098446846, "learning_rate": 0.00014105945362077502, "loss": 0.2743, "step": 18034 }, { "epoch": 1.4610337005832794, "grad_norm": 0.045306671410799026, "learning_rate": 0.00014105495296818038, "loss": 0.2636, "step": 18035 }, { "epoch": 1.4611147116007777, "grad_norm": 0.044049546122550964, "learning_rate": 0.00014105045231558577, "loss": 0.2532, "step": 18036 }, { "epoch": 1.4611957226182761, "grad_norm": 0.04672233387827873, "learning_rate": 0.00014104595166299116, "loss": 0.2601, "step": 18037 }, { "epoch": 1.4612767336357746, "grad_norm": 0.05258664861321449, "learning_rate": 0.00014104145101039652, "loss": 0.3111, "step": 18038 }, { "epoch": 1.4613577446532728, "grad_norm": 0.04627470672130585, "learning_rate": 0.00014103695035780188, "loss": 0.3116, "step": 18039 }, { "epoch": 1.4614387556707713, "grad_norm": 0.054817069321870804, "learning_rate": 0.00014103244970520726, "loss": 0.3016, "step": 18040 }, { "epoch": 1.4615197666882696, "grad_norm": 0.045628152787685394, "learning_rate": 0.00014102794905261262, "loss": 0.2678, "step": 18041 }, { "epoch": 1.461600777705768, "grad_norm": 0.04165344685316086, "learning_rate": 0.000141023448400018, "loss": 0.3255, "step": 18042 }, { "epoch": 1.4616817887232663, "grad_norm": 0.056146860122680664, "learning_rate": 0.0001410189477474234, "loss": 0.3322, "step": 18043 }, { "epoch": 1.4617627997407647, "grad_norm": 0.04300034046173096, "learning_rate": 0.00014101444709482876, "loss": 0.2555, "step": 18044 }, { "epoch": 1.4618438107582632, "grad_norm": 0.04808373376727104, "learning_rate": 0.00014100994644223412, "loss": 0.3001, "step": 18045 }, { "epoch": 1.4619248217757614, "grad_norm": 0.04786072298884392, "learning_rate": 0.0001410054457896395, "loss": 0.3113, "step": 18046 }, { "epoch": 1.46200583279326, "grad_norm": 0.04329356551170349, "learning_rate": 0.00014100094513704487, "loss": 0.2971, "step": 18047 }, { "epoch": 1.4620868438107584, "grad_norm": 0.0741897001862526, "learning_rate": 0.00014099644448445025, "loss": 0.3311, "step": 18048 }, { "epoch": 1.4621678548282566, "grad_norm": 0.0446975976228714, "learning_rate": 0.00014099194383185564, "loss": 0.2803, "step": 18049 }, { "epoch": 1.462248865845755, "grad_norm": 0.054658737033605576, "learning_rate": 0.000140987443179261, "loss": 0.3161, "step": 18050 }, { "epoch": 1.4623298768632533, "grad_norm": 0.053584374487400055, "learning_rate": 0.00014098294252666636, "loss": 0.2971, "step": 18051 }, { "epoch": 1.4624108878807518, "grad_norm": 0.04646073654294014, "learning_rate": 0.00014097844187407175, "loss": 0.2612, "step": 18052 }, { "epoch": 1.46249189889825, "grad_norm": 0.049993809312582016, "learning_rate": 0.0001409739412214771, "loss": 0.3111, "step": 18053 }, { "epoch": 1.4625729099157485, "grad_norm": 0.05515061691403389, "learning_rate": 0.0001409694405688825, "loss": 0.2937, "step": 18054 }, { "epoch": 1.462653920933247, "grad_norm": 0.042825594544410706, "learning_rate": 0.00014096493991628788, "loss": 0.2597, "step": 18055 }, { "epoch": 1.4627349319507452, "grad_norm": 0.04601466655731201, "learning_rate": 0.00014096043926369324, "loss": 0.2796, "step": 18056 }, { "epoch": 1.4628159429682437, "grad_norm": 0.04693853110074997, "learning_rate": 0.0001409559386110986, "loss": 0.2861, "step": 18057 }, { "epoch": 1.4628969539857422, "grad_norm": 0.05535530298948288, "learning_rate": 0.000140951437958504, "loss": 0.3323, "step": 18058 }, { "epoch": 1.4629779650032404, "grad_norm": 0.047633446753025055, "learning_rate": 0.00014094693730590935, "loss": 0.3258, "step": 18059 }, { "epoch": 1.4630589760207389, "grad_norm": 0.05180913954973221, "learning_rate": 0.00014094243665331474, "loss": 0.2995, "step": 18060 }, { "epoch": 1.4631399870382373, "grad_norm": 0.04927406460046768, "learning_rate": 0.00014093793600072012, "loss": 0.3096, "step": 18061 }, { "epoch": 1.4632209980557356, "grad_norm": 0.04759169742465019, "learning_rate": 0.00014093343534812548, "loss": 0.2886, "step": 18062 }, { "epoch": 1.4633020090732338, "grad_norm": 0.04811166226863861, "learning_rate": 0.00014092893469553084, "loss": 0.2862, "step": 18063 }, { "epoch": 1.4633830200907323, "grad_norm": 0.05206162855029106, "learning_rate": 0.00014092443404293623, "loss": 0.2675, "step": 18064 }, { "epoch": 1.4634640311082308, "grad_norm": 0.051354411989450455, "learning_rate": 0.0001409199333903416, "loss": 0.2809, "step": 18065 }, { "epoch": 1.463545042125729, "grad_norm": 0.05393500253558159, "learning_rate": 0.00014091543273774698, "loss": 0.299, "step": 18066 }, { "epoch": 1.4636260531432275, "grad_norm": 0.048284344375133514, "learning_rate": 0.00014091093208515237, "loss": 0.3017, "step": 18067 }, { "epoch": 1.463707064160726, "grad_norm": 0.05360013619065285, "learning_rate": 0.00014090643143255773, "loss": 0.2872, "step": 18068 }, { "epoch": 1.4637880751782242, "grad_norm": 0.04138386994600296, "learning_rate": 0.00014090193077996309, "loss": 0.2826, "step": 18069 }, { "epoch": 1.4638690861957226, "grad_norm": 0.053617946803569794, "learning_rate": 0.00014089743012736847, "loss": 0.3046, "step": 18070 }, { "epoch": 1.4639500972132211, "grad_norm": 0.04091386869549751, "learning_rate": 0.00014089292947477383, "loss": 0.2847, "step": 18071 }, { "epoch": 1.4640311082307194, "grad_norm": 0.05604439973831177, "learning_rate": 0.00014088842882217922, "loss": 0.3217, "step": 18072 }, { "epoch": 1.4641121192482178, "grad_norm": 0.049055736511945724, "learning_rate": 0.0001408839281695846, "loss": 0.2878, "step": 18073 }, { "epoch": 1.464193130265716, "grad_norm": 0.05496743321418762, "learning_rate": 0.00014087942751698997, "loss": 0.2818, "step": 18074 }, { "epoch": 1.4642741412832145, "grad_norm": 0.05350017547607422, "learning_rate": 0.00014087492686439533, "loss": 0.3004, "step": 18075 }, { "epoch": 1.4643551523007128, "grad_norm": 0.060675207525491714, "learning_rate": 0.00014087042621180071, "loss": 0.2802, "step": 18076 }, { "epoch": 1.4644361633182112, "grad_norm": 0.06121957674622536, "learning_rate": 0.0001408659255592061, "loss": 0.3384, "step": 18077 }, { "epoch": 1.4645171743357097, "grad_norm": 0.05118240416049957, "learning_rate": 0.00014086142490661146, "loss": 0.2998, "step": 18078 }, { "epoch": 1.464598185353208, "grad_norm": 0.05059054493904114, "learning_rate": 0.00014085692425401685, "loss": 0.3121, "step": 18079 }, { "epoch": 1.4646791963707064, "grad_norm": 0.05860761180520058, "learning_rate": 0.0001408524236014222, "loss": 0.3112, "step": 18080 }, { "epoch": 1.464760207388205, "grad_norm": 0.0478266216814518, "learning_rate": 0.0001408479229488276, "loss": 0.2879, "step": 18081 }, { "epoch": 1.4648412184057031, "grad_norm": 0.04629899561405182, "learning_rate": 0.00014084342229623296, "loss": 0.2616, "step": 18082 }, { "epoch": 1.4649222294232016, "grad_norm": 0.05281487852334976, "learning_rate": 0.00014083892164363834, "loss": 0.3508, "step": 18083 }, { "epoch": 1.4650032404407, "grad_norm": 0.04183907061815262, "learning_rate": 0.0001408344209910437, "loss": 0.2721, "step": 18084 }, { "epoch": 1.4650842514581983, "grad_norm": 0.052894480526447296, "learning_rate": 0.0001408299203384491, "loss": 0.2953, "step": 18085 }, { "epoch": 1.4651652624756966, "grad_norm": 0.04844099283218384, "learning_rate": 0.00014082541968585445, "loss": 0.3042, "step": 18086 }, { "epoch": 1.465246273493195, "grad_norm": 0.04348544776439667, "learning_rate": 0.00014082091903325984, "loss": 0.2652, "step": 18087 }, { "epoch": 1.4653272845106935, "grad_norm": 0.04488521069288254, "learning_rate": 0.0001408164183806652, "loss": 0.2853, "step": 18088 }, { "epoch": 1.4654082955281917, "grad_norm": 0.05188070237636566, "learning_rate": 0.00014081191772807058, "loss": 0.2873, "step": 18089 }, { "epoch": 1.4654893065456902, "grad_norm": 0.05340287834405899, "learning_rate": 0.00014080741707547594, "loss": 0.2951, "step": 18090 }, { "epoch": 1.4655703175631887, "grad_norm": 0.05681544169783592, "learning_rate": 0.00014080291642288133, "loss": 0.2965, "step": 18091 }, { "epoch": 1.465651328580687, "grad_norm": 0.043873131275177, "learning_rate": 0.0001407984157702867, "loss": 0.2541, "step": 18092 }, { "epoch": 1.4657323395981854, "grad_norm": 0.058677103370428085, "learning_rate": 0.00014079391511769208, "loss": 0.3375, "step": 18093 }, { "epoch": 1.4658133506156839, "grad_norm": 0.05628986656665802, "learning_rate": 0.00014078941446509744, "loss": 0.3147, "step": 18094 }, { "epoch": 1.465894361633182, "grad_norm": 0.049282487481832504, "learning_rate": 0.00014078491381250283, "loss": 0.2791, "step": 18095 }, { "epoch": 1.4659753726506806, "grad_norm": 0.053728487342596054, "learning_rate": 0.00014078041315990819, "loss": 0.34, "step": 18096 }, { "epoch": 1.4660563836681788, "grad_norm": 0.04620833694934845, "learning_rate": 0.00014077591250731357, "loss": 0.272, "step": 18097 }, { "epoch": 1.4661373946856773, "grad_norm": 0.048192430287599564, "learning_rate": 0.00014077141185471893, "loss": 0.2744, "step": 18098 }, { "epoch": 1.4662184057031755, "grad_norm": 0.0455770380795002, "learning_rate": 0.00014076691120212432, "loss": 0.2665, "step": 18099 }, { "epoch": 1.466299416720674, "grad_norm": 0.04394909366965294, "learning_rate": 0.0001407624105495297, "loss": 0.282, "step": 18100 }, { "epoch": 1.4663804277381725, "grad_norm": 0.054294608533382416, "learning_rate": 0.00014075790989693507, "loss": 0.2904, "step": 18101 }, { "epoch": 1.4664614387556707, "grad_norm": 0.04804273322224617, "learning_rate": 0.00014075340924434043, "loss": 0.3221, "step": 18102 }, { "epoch": 1.4665424497731692, "grad_norm": 0.04664277285337448, "learning_rate": 0.00014074890859174581, "loss": 0.2732, "step": 18103 }, { "epoch": 1.4666234607906676, "grad_norm": 0.04305239021778107, "learning_rate": 0.00014074440793915117, "loss": 0.2846, "step": 18104 }, { "epoch": 1.4667044718081659, "grad_norm": 0.0597095862030983, "learning_rate": 0.00014073990728655656, "loss": 0.3724, "step": 18105 }, { "epoch": 1.4667854828256643, "grad_norm": 0.050561290234327316, "learning_rate": 0.00014073540663396195, "loss": 0.3046, "step": 18106 }, { "epoch": 1.4668664938431626, "grad_norm": 0.039581701159477234, "learning_rate": 0.0001407309059813673, "loss": 0.2853, "step": 18107 }, { "epoch": 1.466947504860661, "grad_norm": 0.05390976369380951, "learning_rate": 0.00014072640532877267, "loss": 0.2613, "step": 18108 }, { "epoch": 1.4670285158781593, "grad_norm": 0.06380044668912888, "learning_rate": 0.00014072190467617806, "loss": 0.2974, "step": 18109 }, { "epoch": 1.4671095268956578, "grad_norm": 0.04904315248131752, "learning_rate": 0.00014071740402358342, "loss": 0.2834, "step": 18110 }, { "epoch": 1.4671905379131562, "grad_norm": 0.048541679978370667, "learning_rate": 0.0001407129033709888, "loss": 0.2899, "step": 18111 }, { "epoch": 1.4672715489306545, "grad_norm": 0.04984492063522339, "learning_rate": 0.0001407084027183942, "loss": 0.2583, "step": 18112 }, { "epoch": 1.467352559948153, "grad_norm": 0.0442902147769928, "learning_rate": 0.00014070390206579955, "loss": 0.2719, "step": 18113 }, { "epoch": 1.4674335709656514, "grad_norm": 0.046335652470588684, "learning_rate": 0.0001406994014132049, "loss": 0.3099, "step": 18114 }, { "epoch": 1.4675145819831497, "grad_norm": 0.051748134195804596, "learning_rate": 0.0001406949007606103, "loss": 0.3051, "step": 18115 }, { "epoch": 1.4675955930006481, "grad_norm": 0.04839160665869713, "learning_rate": 0.00014069040010801566, "loss": 0.2853, "step": 18116 }, { "epoch": 1.4676766040181466, "grad_norm": 0.053315989673137665, "learning_rate": 0.00014068589945542105, "loss": 0.3041, "step": 18117 }, { "epoch": 1.4677576150356448, "grad_norm": 0.048422813415527344, "learning_rate": 0.00014068139880282643, "loss": 0.3255, "step": 18118 }, { "epoch": 1.4678386260531433, "grad_norm": 0.045284856110811234, "learning_rate": 0.0001406768981502318, "loss": 0.3213, "step": 18119 }, { "epoch": 1.4679196370706415, "grad_norm": 0.04589143767952919, "learning_rate": 0.00014067239749763715, "loss": 0.2953, "step": 18120 }, { "epoch": 1.46800064808814, "grad_norm": 0.04163743555545807, "learning_rate": 0.00014066789684504254, "loss": 0.2669, "step": 18121 }, { "epoch": 1.4680816591056383, "grad_norm": 0.050901882350444794, "learning_rate": 0.0001406633961924479, "loss": 0.2972, "step": 18122 }, { "epoch": 1.4681626701231367, "grad_norm": 0.042865533381700516, "learning_rate": 0.0001406588955398533, "loss": 0.2819, "step": 18123 }, { "epoch": 1.4682436811406352, "grad_norm": 0.04150066897273064, "learning_rate": 0.00014065439488725867, "loss": 0.2596, "step": 18124 }, { "epoch": 1.4683246921581334, "grad_norm": 0.053796712309122086, "learning_rate": 0.00014064989423466403, "loss": 0.3035, "step": 18125 }, { "epoch": 1.468405703175632, "grad_norm": 0.045569829642772675, "learning_rate": 0.0001406453935820694, "loss": 0.2902, "step": 18126 }, { "epoch": 1.4684867141931304, "grad_norm": 0.05256570503115654, "learning_rate": 0.00014064089292947478, "loss": 0.307, "step": 18127 }, { "epoch": 1.4685677252106286, "grad_norm": 0.054923005402088165, "learning_rate": 0.00014063639227688014, "loss": 0.3389, "step": 18128 }, { "epoch": 1.468648736228127, "grad_norm": 0.05528505519032478, "learning_rate": 0.00014063189162428553, "loss": 0.2991, "step": 18129 }, { "epoch": 1.4687297472456253, "grad_norm": 0.059665605425834656, "learning_rate": 0.00014062739097169092, "loss": 0.3061, "step": 18130 }, { "epoch": 1.4688107582631238, "grad_norm": 0.058723267167806625, "learning_rate": 0.00014062289031909628, "loss": 0.3146, "step": 18131 }, { "epoch": 1.468891769280622, "grad_norm": 0.055252302438020706, "learning_rate": 0.00014061838966650164, "loss": 0.3149, "step": 18132 }, { "epoch": 1.4689727802981205, "grad_norm": 0.053718701004981995, "learning_rate": 0.00014061388901390702, "loss": 0.3216, "step": 18133 }, { "epoch": 1.469053791315619, "grad_norm": 0.05037958547472954, "learning_rate": 0.00014060938836131238, "loss": 0.2632, "step": 18134 }, { "epoch": 1.4691348023331172, "grad_norm": 0.043746188282966614, "learning_rate": 0.00014060488770871777, "loss": 0.2798, "step": 18135 }, { "epoch": 1.4692158133506157, "grad_norm": 0.06655730307102203, "learning_rate": 0.00014060038705612316, "loss": 0.2882, "step": 18136 }, { "epoch": 1.4692968243681142, "grad_norm": 0.04868955910205841, "learning_rate": 0.00014059588640352852, "loss": 0.2769, "step": 18137 }, { "epoch": 1.4693778353856124, "grad_norm": 0.04965856298804283, "learning_rate": 0.00014059138575093388, "loss": 0.287, "step": 18138 }, { "epoch": 1.4694588464031109, "grad_norm": 0.053828127682209015, "learning_rate": 0.00014058688509833926, "loss": 0.3166, "step": 18139 }, { "epoch": 1.4695398574206093, "grad_norm": 0.045842841267585754, "learning_rate": 0.00014058238444574462, "loss": 0.2803, "step": 18140 }, { "epoch": 1.4696208684381076, "grad_norm": 0.04934623837471008, "learning_rate": 0.00014057788379315, "loss": 0.2944, "step": 18141 }, { "epoch": 1.469701879455606, "grad_norm": 0.051355887204408646, "learning_rate": 0.0001405733831405554, "loss": 0.2672, "step": 18142 }, { "epoch": 1.4697828904731043, "grad_norm": 0.045832522213459015, "learning_rate": 0.00014056888248796076, "loss": 0.2666, "step": 18143 }, { "epoch": 1.4698639014906028, "grad_norm": 0.04589347168803215, "learning_rate": 0.00014056438183536612, "loss": 0.2691, "step": 18144 }, { "epoch": 1.469944912508101, "grad_norm": 0.05721011757850647, "learning_rate": 0.0001405598811827715, "loss": 0.3372, "step": 18145 }, { "epoch": 1.4700259235255995, "grad_norm": 0.04498670995235443, "learning_rate": 0.00014055538053017687, "loss": 0.2897, "step": 18146 }, { "epoch": 1.470106934543098, "grad_norm": 0.04440313205122948, "learning_rate": 0.00014055087987758225, "loss": 0.2951, "step": 18147 }, { "epoch": 1.4701879455605962, "grad_norm": 0.046017423272132874, "learning_rate": 0.00014054637922498764, "loss": 0.2621, "step": 18148 }, { "epoch": 1.4702689565780946, "grad_norm": 0.05513910576701164, "learning_rate": 0.000140541878572393, "loss": 0.3399, "step": 18149 }, { "epoch": 1.470349967595593, "grad_norm": 0.054075244814157486, "learning_rate": 0.0001405373779197984, "loss": 0.3114, "step": 18150 }, { "epoch": 1.4704309786130914, "grad_norm": 0.04777602478861809, "learning_rate": 0.00014053287726720375, "loss": 0.2559, "step": 18151 }, { "epoch": 1.4705119896305898, "grad_norm": 0.03741517290472984, "learning_rate": 0.0001405283766146091, "loss": 0.2615, "step": 18152 }, { "epoch": 1.470593000648088, "grad_norm": 0.045710425823926926, "learning_rate": 0.0001405238759620145, "loss": 0.2705, "step": 18153 }, { "epoch": 1.4706740116655865, "grad_norm": 0.05262228474020958, "learning_rate": 0.00014051937530941988, "loss": 0.2863, "step": 18154 }, { "epoch": 1.4707550226830848, "grad_norm": 0.049241792410612106, "learning_rate": 0.00014051487465682524, "loss": 0.3101, "step": 18155 }, { "epoch": 1.4708360337005832, "grad_norm": 0.047971051186323166, "learning_rate": 0.00014051037400423063, "loss": 0.3168, "step": 18156 }, { "epoch": 1.4709170447180817, "grad_norm": 0.04645678028464317, "learning_rate": 0.000140505873351636, "loss": 0.3053, "step": 18157 }, { "epoch": 1.47099805573558, "grad_norm": 0.044148728251457214, "learning_rate": 0.00014050137269904138, "loss": 0.2624, "step": 18158 }, { "epoch": 1.4710790667530784, "grad_norm": 0.05452611297369003, "learning_rate": 0.00014049687204644674, "loss": 0.3388, "step": 18159 }, { "epoch": 1.471160077770577, "grad_norm": 0.05382085219025612, "learning_rate": 0.00014049237139385212, "loss": 0.309, "step": 18160 }, { "epoch": 1.4712410887880751, "grad_norm": 0.05108357220888138, "learning_rate": 0.00014048787074125748, "loss": 0.275, "step": 18161 }, { "epoch": 1.4713220998055736, "grad_norm": 0.0608837716281414, "learning_rate": 0.00014048337008866287, "loss": 0.3417, "step": 18162 }, { "epoch": 1.471403110823072, "grad_norm": 0.036789074540138245, "learning_rate": 0.00014047886943606823, "loss": 0.2618, "step": 18163 }, { "epoch": 1.4714841218405703, "grad_norm": 0.038801342248916626, "learning_rate": 0.00014047436878347362, "loss": 0.2433, "step": 18164 }, { "epoch": 1.4715651328580686, "grad_norm": 0.04561394080519676, "learning_rate": 0.00014046986813087898, "loss": 0.2788, "step": 18165 }, { "epoch": 1.471646143875567, "grad_norm": 0.04409138858318329, "learning_rate": 0.00014046536747828437, "loss": 0.2491, "step": 18166 }, { "epoch": 1.4717271548930655, "grad_norm": 0.04505815729498863, "learning_rate": 0.00014046086682568973, "loss": 0.2643, "step": 18167 }, { "epoch": 1.4718081659105637, "grad_norm": 0.05399011820554733, "learning_rate": 0.0001404563661730951, "loss": 0.2834, "step": 18168 }, { "epoch": 1.4718891769280622, "grad_norm": 0.049549635499715805, "learning_rate": 0.00014045186552050047, "loss": 0.2641, "step": 18169 }, { "epoch": 1.4719701879455607, "grad_norm": 0.04148612171411514, "learning_rate": 0.00014044736486790586, "loss": 0.3006, "step": 18170 }, { "epoch": 1.472051198963059, "grad_norm": 0.042439449578523636, "learning_rate": 0.00014044286421531122, "loss": 0.2713, "step": 18171 }, { "epoch": 1.4721322099805574, "grad_norm": 0.046045321971178055, "learning_rate": 0.0001404383635627166, "loss": 0.2845, "step": 18172 }, { "epoch": 1.4722132209980558, "grad_norm": 0.04349822551012039, "learning_rate": 0.00014043386291012197, "loss": 0.2893, "step": 18173 }, { "epoch": 1.472294232015554, "grad_norm": 0.04683893546462059, "learning_rate": 0.00014042936225752735, "loss": 0.2782, "step": 18174 }, { "epoch": 1.4723752430330526, "grad_norm": 0.05124137923121452, "learning_rate": 0.00014042486160493271, "loss": 0.3123, "step": 18175 }, { "epoch": 1.4724562540505508, "grad_norm": 0.050686314702034, "learning_rate": 0.0001404203609523381, "loss": 0.3128, "step": 18176 }, { "epoch": 1.4725372650680493, "grad_norm": 0.04599520564079285, "learning_rate": 0.00014041586029974346, "loss": 0.275, "step": 18177 }, { "epoch": 1.4726182760855475, "grad_norm": 0.056231122463941574, "learning_rate": 0.00014041135964714885, "loss": 0.3083, "step": 18178 }, { "epoch": 1.472699287103046, "grad_norm": 0.0451853983104229, "learning_rate": 0.0001404068589945542, "loss": 0.2657, "step": 18179 }, { "epoch": 1.4727802981205445, "grad_norm": 0.04859994724392891, "learning_rate": 0.0001404023583419596, "loss": 0.2684, "step": 18180 }, { "epoch": 1.4728613091380427, "grad_norm": 0.05537387728691101, "learning_rate": 0.00014039785768936498, "loss": 0.309, "step": 18181 }, { "epoch": 1.4729423201555412, "grad_norm": 0.04671725258231163, "learning_rate": 0.00014039335703677034, "loss": 0.3001, "step": 18182 }, { "epoch": 1.4730233311730396, "grad_norm": 0.05779655650258064, "learning_rate": 0.0001403888563841757, "loss": 0.3482, "step": 18183 }, { "epoch": 1.4731043421905379, "grad_norm": 0.04974488168954849, "learning_rate": 0.0001403843557315811, "loss": 0.2858, "step": 18184 }, { "epoch": 1.4731853532080363, "grad_norm": 0.047579389065504074, "learning_rate": 0.00014037985507898645, "loss": 0.2808, "step": 18185 }, { "epoch": 1.4732663642255348, "grad_norm": 0.05018430948257446, "learning_rate": 0.00014037535442639184, "loss": 0.3242, "step": 18186 }, { "epoch": 1.473347375243033, "grad_norm": 0.04664905369281769, "learning_rate": 0.00014037085377379722, "loss": 0.2626, "step": 18187 }, { "epoch": 1.4734283862605313, "grad_norm": 0.043412186205387115, "learning_rate": 0.00014036635312120258, "loss": 0.2443, "step": 18188 }, { "epoch": 1.4735093972780298, "grad_norm": 0.04253380000591278, "learning_rate": 0.00014036185246860794, "loss": 0.2591, "step": 18189 }, { "epoch": 1.4735904082955282, "grad_norm": 0.05158968269824982, "learning_rate": 0.00014035735181601333, "loss": 0.2936, "step": 18190 }, { "epoch": 1.4736714193130265, "grad_norm": 0.05277436599135399, "learning_rate": 0.0001403528511634187, "loss": 0.2995, "step": 18191 }, { "epoch": 1.473752430330525, "grad_norm": 0.047883354127407074, "learning_rate": 0.00014034835051082408, "loss": 0.2825, "step": 18192 }, { "epoch": 1.4738334413480234, "grad_norm": 0.04544740915298462, "learning_rate": 0.00014034384985822947, "loss": 0.2947, "step": 18193 }, { "epoch": 1.4739144523655217, "grad_norm": 0.05491040274500847, "learning_rate": 0.00014033934920563483, "loss": 0.2906, "step": 18194 }, { "epoch": 1.4739954633830201, "grad_norm": 0.057347312569618225, "learning_rate": 0.00014033484855304019, "loss": 0.2865, "step": 18195 }, { "epoch": 1.4740764744005186, "grad_norm": 0.048794638365507126, "learning_rate": 0.00014033034790044557, "loss": 0.2695, "step": 18196 }, { "epoch": 1.4741574854180168, "grad_norm": 0.04354074224829674, "learning_rate": 0.00014032584724785093, "loss": 0.2649, "step": 18197 }, { "epoch": 1.4742384964355153, "grad_norm": 0.05033900961279869, "learning_rate": 0.00014032134659525632, "loss": 0.2949, "step": 18198 }, { "epoch": 1.4743195074530135, "grad_norm": 0.046822357922792435, "learning_rate": 0.0001403168459426617, "loss": 0.2547, "step": 18199 }, { "epoch": 1.474400518470512, "grad_norm": 0.05028039216995239, "learning_rate": 0.00014031234529006707, "loss": 0.3164, "step": 18200 }, { "epoch": 1.4744815294880103, "grad_norm": 0.058645930141210556, "learning_rate": 0.00014030784463747243, "loss": 0.2958, "step": 18201 }, { "epoch": 1.4745625405055087, "grad_norm": 0.05373714864253998, "learning_rate": 0.00014030334398487782, "loss": 0.2801, "step": 18202 }, { "epoch": 1.4746435515230072, "grad_norm": 0.04682587832212448, "learning_rate": 0.00014029884333228318, "loss": 0.2834, "step": 18203 }, { "epoch": 1.4747245625405054, "grad_norm": 0.057389553636312485, "learning_rate": 0.00014029434267968856, "loss": 0.3034, "step": 18204 }, { "epoch": 1.474805573558004, "grad_norm": 0.05309831351041794, "learning_rate": 0.00014028984202709395, "loss": 0.3266, "step": 18205 }, { "epoch": 1.4748865845755024, "grad_norm": 0.05439450964331627, "learning_rate": 0.0001402853413744993, "loss": 0.3163, "step": 18206 }, { "epoch": 1.4749675955930006, "grad_norm": 0.04491687938570976, "learning_rate": 0.00014028084072190467, "loss": 0.2546, "step": 18207 }, { "epoch": 1.475048606610499, "grad_norm": 0.04674319177865982, "learning_rate": 0.00014027634006931006, "loss": 0.2771, "step": 18208 }, { "epoch": 1.4751296176279973, "grad_norm": 0.044882722198963165, "learning_rate": 0.00014027183941671542, "loss": 0.3102, "step": 18209 }, { "epoch": 1.4752106286454958, "grad_norm": 0.049309100955724716, "learning_rate": 0.0001402673387641208, "loss": 0.2976, "step": 18210 }, { "epoch": 1.475291639662994, "grad_norm": 0.04568256065249443, "learning_rate": 0.0001402628381115262, "loss": 0.2624, "step": 18211 }, { "epoch": 1.4753726506804925, "grad_norm": 0.048085831105709076, "learning_rate": 0.00014025833745893155, "loss": 0.2628, "step": 18212 }, { "epoch": 1.475453661697991, "grad_norm": 0.045278891921043396, "learning_rate": 0.0001402538368063369, "loss": 0.2605, "step": 18213 }, { "epoch": 1.4755346727154892, "grad_norm": 0.04354943707585335, "learning_rate": 0.0001402493361537423, "loss": 0.2852, "step": 18214 }, { "epoch": 1.4756156837329877, "grad_norm": 0.051201093941926956, "learning_rate": 0.00014024483550114766, "loss": 0.2679, "step": 18215 }, { "epoch": 1.4756966947504861, "grad_norm": 0.0424734428524971, "learning_rate": 0.00014024033484855305, "loss": 0.2383, "step": 18216 }, { "epoch": 1.4757777057679844, "grad_norm": 0.052323777228593826, "learning_rate": 0.00014023583419595843, "loss": 0.3315, "step": 18217 }, { "epoch": 1.4758587167854829, "grad_norm": 0.04114263877272606, "learning_rate": 0.0001402313335433638, "loss": 0.2762, "step": 18218 }, { "epoch": 1.4759397278029813, "grad_norm": 0.046002596616744995, "learning_rate": 0.00014022683289076918, "loss": 0.2701, "step": 18219 }, { "epoch": 1.4760207388204796, "grad_norm": 0.049378395080566406, "learning_rate": 0.00014022233223817454, "loss": 0.2435, "step": 18220 }, { "epoch": 1.476101749837978, "grad_norm": 0.04540861397981644, "learning_rate": 0.0001402178315855799, "loss": 0.3062, "step": 18221 }, { "epoch": 1.4761827608554763, "grad_norm": 0.047455836087465286, "learning_rate": 0.0001402133309329853, "loss": 0.3133, "step": 18222 }, { "epoch": 1.4762637718729748, "grad_norm": 0.04688198119401932, "learning_rate": 0.00014020883028039067, "loss": 0.2991, "step": 18223 }, { "epoch": 1.476344782890473, "grad_norm": 0.04360269755125046, "learning_rate": 0.00014020432962779603, "loss": 0.2659, "step": 18224 }, { "epoch": 1.4764257939079715, "grad_norm": 0.04572906717658043, "learning_rate": 0.00014019982897520142, "loss": 0.2849, "step": 18225 }, { "epoch": 1.47650680492547, "grad_norm": 0.04466778039932251, "learning_rate": 0.00014019532832260678, "loss": 0.2693, "step": 18226 }, { "epoch": 1.4765878159429682, "grad_norm": 0.04478795453906059, "learning_rate": 0.00014019082767001214, "loss": 0.2815, "step": 18227 }, { "epoch": 1.4766688269604666, "grad_norm": 0.04503241553902626, "learning_rate": 0.00014018632701741753, "loss": 0.2585, "step": 18228 }, { "epoch": 1.476749837977965, "grad_norm": 0.04465152323246002, "learning_rate": 0.00014018182636482292, "loss": 0.2921, "step": 18229 }, { "epoch": 1.4768308489954634, "grad_norm": 0.04942317679524422, "learning_rate": 0.00014017732571222828, "loss": 0.326, "step": 18230 }, { "epoch": 1.4769118600129618, "grad_norm": 0.05225396901369095, "learning_rate": 0.00014017282505963366, "loss": 0.2962, "step": 18231 }, { "epoch": 1.47699287103046, "grad_norm": 0.042505279183387756, "learning_rate": 0.00014016832440703902, "loss": 0.2689, "step": 18232 }, { "epoch": 1.4770738820479585, "grad_norm": 0.055289316922426224, "learning_rate": 0.0001401638237544444, "loss": 0.3329, "step": 18233 }, { "epoch": 1.4771548930654568, "grad_norm": 0.04561949148774147, "learning_rate": 0.00014015932310184977, "loss": 0.3152, "step": 18234 }, { "epoch": 1.4772359040829552, "grad_norm": 0.041787758469581604, "learning_rate": 0.00014015482244925516, "loss": 0.2709, "step": 18235 }, { "epoch": 1.4773169151004537, "grad_norm": 0.055366311222314835, "learning_rate": 0.00014015032179666052, "loss": 0.2999, "step": 18236 }, { "epoch": 1.477397926117952, "grad_norm": 0.047048453241586685, "learning_rate": 0.0001401458211440659, "loss": 0.2852, "step": 18237 }, { "epoch": 1.4774789371354504, "grad_norm": 0.048917848616838455, "learning_rate": 0.00014014132049147126, "loss": 0.2683, "step": 18238 }, { "epoch": 1.4775599481529489, "grad_norm": 0.04917580261826515, "learning_rate": 0.00014013681983887665, "loss": 0.286, "step": 18239 }, { "epoch": 1.4776409591704471, "grad_norm": 0.04912577569484711, "learning_rate": 0.000140132319186282, "loss": 0.261, "step": 18240 }, { "epoch": 1.4777219701879456, "grad_norm": 0.04626433923840523, "learning_rate": 0.0001401278185336874, "loss": 0.2988, "step": 18241 }, { "epoch": 1.477802981205444, "grad_norm": 0.05389011278748512, "learning_rate": 0.00014012331788109276, "loss": 0.3133, "step": 18242 }, { "epoch": 1.4778839922229423, "grad_norm": 0.051110610365867615, "learning_rate": 0.00014011881722849815, "loss": 0.3139, "step": 18243 }, { "epoch": 1.4779650032404408, "grad_norm": 0.04332983121275902, "learning_rate": 0.0001401143165759035, "loss": 0.2587, "step": 18244 }, { "epoch": 1.478046014257939, "grad_norm": 0.04385901615023613, "learning_rate": 0.0001401098159233089, "loss": 0.2861, "step": 18245 }, { "epoch": 1.4781270252754375, "grad_norm": 0.04720534384250641, "learning_rate": 0.00014010531527071425, "loss": 0.3258, "step": 18246 }, { "epoch": 1.4782080362929357, "grad_norm": 0.04445433244109154, "learning_rate": 0.00014010081461811964, "loss": 0.2608, "step": 18247 }, { "epoch": 1.4782890473104342, "grad_norm": 0.04828941076993942, "learning_rate": 0.000140096313965525, "loss": 0.231, "step": 18248 }, { "epoch": 1.4783700583279327, "grad_norm": 0.04584532603621483, "learning_rate": 0.0001400918133129304, "loss": 0.3124, "step": 18249 }, { "epoch": 1.478451069345431, "grad_norm": 0.04655173793435097, "learning_rate": 0.00014008731266033575, "loss": 0.2961, "step": 18250 }, { "epoch": 1.4785320803629294, "grad_norm": 0.06158711016178131, "learning_rate": 0.00014008281200774114, "loss": 0.3231, "step": 18251 }, { "epoch": 1.4786130913804278, "grad_norm": 0.05093095451593399, "learning_rate": 0.0001400783113551465, "loss": 0.2938, "step": 18252 }, { "epoch": 1.478694102397926, "grad_norm": 0.06089789792895317, "learning_rate": 0.00014007381070255188, "loss": 0.3123, "step": 18253 }, { "epoch": 1.4787751134154246, "grad_norm": 0.04535433277487755, "learning_rate": 0.00014006931004995724, "loss": 0.2581, "step": 18254 }, { "epoch": 1.4788561244329228, "grad_norm": 0.04125452786684036, "learning_rate": 0.00014006480939736263, "loss": 0.2553, "step": 18255 }, { "epoch": 1.4789371354504213, "grad_norm": 0.056558143347501755, "learning_rate": 0.000140060308744768, "loss": 0.278, "step": 18256 }, { "epoch": 1.4790181464679195, "grad_norm": 0.04998468607664108, "learning_rate": 0.00014005580809217338, "loss": 0.2844, "step": 18257 }, { "epoch": 1.479099157485418, "grad_norm": 0.05126415938138962, "learning_rate": 0.00014005130743957874, "loss": 0.3271, "step": 18258 }, { "epoch": 1.4791801685029164, "grad_norm": 0.04823947697877884, "learning_rate": 0.00014004680678698412, "loss": 0.2878, "step": 18259 }, { "epoch": 1.4792611795204147, "grad_norm": 0.04105127230286598, "learning_rate": 0.00014004230613438948, "loss": 0.2413, "step": 18260 }, { "epoch": 1.4793421905379132, "grad_norm": 0.0530543178319931, "learning_rate": 0.00014003780548179487, "loss": 0.287, "step": 18261 }, { "epoch": 1.4794232015554116, "grad_norm": 0.04239355027675629, "learning_rate": 0.00014003330482920026, "loss": 0.3181, "step": 18262 }, { "epoch": 1.4795042125729099, "grad_norm": 0.043683599680662155, "learning_rate": 0.00014002880417660562, "loss": 0.2634, "step": 18263 }, { "epoch": 1.4795852235904083, "grad_norm": 0.05439092591404915, "learning_rate": 0.00014002430352401098, "loss": 0.3294, "step": 18264 }, { "epoch": 1.4796662346079068, "grad_norm": 0.04399878531694412, "learning_rate": 0.00014001980287141637, "loss": 0.3174, "step": 18265 }, { "epoch": 1.479747245625405, "grad_norm": 0.04555346071720123, "learning_rate": 0.00014001530221882173, "loss": 0.3127, "step": 18266 }, { "epoch": 1.4798282566429035, "grad_norm": 0.041780564934015274, "learning_rate": 0.0001400108015662271, "loss": 0.2853, "step": 18267 }, { "epoch": 1.4799092676604018, "grad_norm": 0.046299781650304794, "learning_rate": 0.0001400063009136325, "loss": 0.3038, "step": 18268 }, { "epoch": 1.4799902786779002, "grad_norm": 0.04602280631661415, "learning_rate": 0.00014000180026103786, "loss": 0.2824, "step": 18269 }, { "epoch": 1.4800712896953985, "grad_norm": 0.04503730311989784, "learning_rate": 0.00013999729960844322, "loss": 0.27, "step": 18270 }, { "epoch": 1.480152300712897, "grad_norm": 0.04678992182016373, "learning_rate": 0.0001399927989558486, "loss": 0.3059, "step": 18271 }, { "epoch": 1.4802333117303954, "grad_norm": 0.04083758592605591, "learning_rate": 0.00013998829830325397, "loss": 0.3109, "step": 18272 }, { "epoch": 1.4803143227478937, "grad_norm": 0.04555083438754082, "learning_rate": 0.00013998379765065935, "loss": 0.2654, "step": 18273 }, { "epoch": 1.4803953337653921, "grad_norm": 0.04763795807957649, "learning_rate": 0.00013997929699806474, "loss": 0.2976, "step": 18274 }, { "epoch": 1.4804763447828906, "grad_norm": 0.06001761183142662, "learning_rate": 0.0001399747963454701, "loss": 0.3181, "step": 18275 }, { "epoch": 1.4805573558003888, "grad_norm": 0.04997747391462326, "learning_rate": 0.00013997029569287546, "loss": 0.3341, "step": 18276 }, { "epoch": 1.4806383668178873, "grad_norm": 0.05390239134430885, "learning_rate": 0.00013996579504028085, "loss": 0.3128, "step": 18277 }, { "epoch": 1.4807193778353855, "grad_norm": 0.04573750123381615, "learning_rate": 0.0001399612943876862, "loss": 0.2834, "step": 18278 }, { "epoch": 1.480800388852884, "grad_norm": 0.05009123310446739, "learning_rate": 0.0001399567937350916, "loss": 0.2786, "step": 18279 }, { "epoch": 1.4808813998703823, "grad_norm": 0.049583666026592255, "learning_rate": 0.00013995229308249698, "loss": 0.2841, "step": 18280 }, { "epoch": 1.4809624108878807, "grad_norm": 0.04602464661002159, "learning_rate": 0.00013994779242990234, "loss": 0.2817, "step": 18281 }, { "epoch": 1.4810434219053792, "grad_norm": 0.04475036635994911, "learning_rate": 0.0001399432917773077, "loss": 0.272, "step": 18282 }, { "epoch": 1.4811244329228774, "grad_norm": 0.04411635920405388, "learning_rate": 0.0001399387911247131, "loss": 0.2847, "step": 18283 }, { "epoch": 1.481205443940376, "grad_norm": 0.057060644030570984, "learning_rate": 0.00013993429047211845, "loss": 0.3075, "step": 18284 }, { "epoch": 1.4812864549578744, "grad_norm": 0.04523240029811859, "learning_rate": 0.00013992978981952384, "loss": 0.2952, "step": 18285 }, { "epoch": 1.4813674659753726, "grad_norm": 0.059247277677059174, "learning_rate": 0.00013992528916692922, "loss": 0.3341, "step": 18286 }, { "epoch": 1.481448476992871, "grad_norm": 0.043466899544000626, "learning_rate": 0.00013992078851433458, "loss": 0.2429, "step": 18287 }, { "epoch": 1.4815294880103695, "grad_norm": 0.05750332400202751, "learning_rate": 0.00013991628786173997, "loss": 0.2758, "step": 18288 }, { "epoch": 1.4816104990278678, "grad_norm": 0.05220554396510124, "learning_rate": 0.00013991178720914533, "loss": 0.2578, "step": 18289 }, { "epoch": 1.481691510045366, "grad_norm": 0.05209901183843613, "learning_rate": 0.0001399072865565507, "loss": 0.2796, "step": 18290 }, { "epoch": 1.4817725210628645, "grad_norm": 0.043190717697143555, "learning_rate": 0.00013990278590395608, "loss": 0.2821, "step": 18291 }, { "epoch": 1.481853532080363, "grad_norm": 0.04831504821777344, "learning_rate": 0.00013989828525136147, "loss": 0.3245, "step": 18292 }, { "epoch": 1.4819345430978612, "grad_norm": 0.04756138473749161, "learning_rate": 0.00013989378459876683, "loss": 0.3114, "step": 18293 }, { "epoch": 1.4820155541153597, "grad_norm": 0.04918740317225456, "learning_rate": 0.00013988928394617221, "loss": 0.3163, "step": 18294 }, { "epoch": 1.4820965651328581, "grad_norm": 0.04362611472606659, "learning_rate": 0.00013988478329357757, "loss": 0.2815, "step": 18295 }, { "epoch": 1.4821775761503564, "grad_norm": 0.043086010962724686, "learning_rate": 0.00013988028264098293, "loss": 0.2774, "step": 18296 }, { "epoch": 1.4822585871678549, "grad_norm": 0.051327820867300034, "learning_rate": 0.00013987578198838832, "loss": 0.2935, "step": 18297 }, { "epoch": 1.4823395981853533, "grad_norm": 0.0406213216483593, "learning_rate": 0.0001398712813357937, "loss": 0.2591, "step": 18298 }, { "epoch": 1.4824206092028516, "grad_norm": 0.050979651510715485, "learning_rate": 0.00013986678068319907, "loss": 0.2859, "step": 18299 }, { "epoch": 1.48250162022035, "grad_norm": 0.04462830349802971, "learning_rate": 0.00013986228003060446, "loss": 0.2924, "step": 18300 }, { "epoch": 1.4825826312378483, "grad_norm": 0.04354723170399666, "learning_rate": 0.00013985777937800982, "loss": 0.2744, "step": 18301 }, { "epoch": 1.4826636422553467, "grad_norm": 0.05323687568306923, "learning_rate": 0.00013985327872541518, "loss": 0.2964, "step": 18302 }, { "epoch": 1.482744653272845, "grad_norm": 0.046059656888246536, "learning_rate": 0.00013984877807282056, "loss": 0.3088, "step": 18303 }, { "epoch": 1.4828256642903435, "grad_norm": 0.05356958881020546, "learning_rate": 0.00013984427742022595, "loss": 0.3491, "step": 18304 }, { "epoch": 1.482906675307842, "grad_norm": 0.042450789362192154, "learning_rate": 0.0001398397767676313, "loss": 0.2752, "step": 18305 }, { "epoch": 1.4829876863253402, "grad_norm": 0.05261091887950897, "learning_rate": 0.0001398352761150367, "loss": 0.3322, "step": 18306 }, { "epoch": 1.4830686973428386, "grad_norm": 0.04929700866341591, "learning_rate": 0.00013983077546244206, "loss": 0.2929, "step": 18307 }, { "epoch": 1.483149708360337, "grad_norm": 0.044822052121162415, "learning_rate": 0.00013982627480984742, "loss": 0.2821, "step": 18308 }, { "epoch": 1.4832307193778353, "grad_norm": 0.04936167225241661, "learning_rate": 0.0001398217741572528, "loss": 0.2719, "step": 18309 }, { "epoch": 1.4833117303953338, "grad_norm": 0.051214780658483505, "learning_rate": 0.0001398172735046582, "loss": 0.274, "step": 18310 }, { "epoch": 1.4833927414128323, "grad_norm": 0.048769284039735794, "learning_rate": 0.00013981277285206355, "loss": 0.3083, "step": 18311 }, { "epoch": 1.4834737524303305, "grad_norm": 0.050249192863702774, "learning_rate": 0.00013980827219946894, "loss": 0.324, "step": 18312 }, { "epoch": 1.4835547634478288, "grad_norm": 0.042009156197309494, "learning_rate": 0.0001398037715468743, "loss": 0.2717, "step": 18313 }, { "epoch": 1.4836357744653272, "grad_norm": 0.05800290405750275, "learning_rate": 0.00013979927089427969, "loss": 0.3219, "step": 18314 }, { "epoch": 1.4837167854828257, "grad_norm": 0.04696693271398544, "learning_rate": 0.00013979477024168505, "loss": 0.2772, "step": 18315 }, { "epoch": 1.483797796500324, "grad_norm": 0.04265100136399269, "learning_rate": 0.00013979026958909043, "loss": 0.3088, "step": 18316 }, { "epoch": 1.4838788075178224, "grad_norm": 0.05752689763903618, "learning_rate": 0.0001397857689364958, "loss": 0.3209, "step": 18317 }, { "epoch": 1.4839598185353209, "grad_norm": 0.04664149880409241, "learning_rate": 0.00013978126828390118, "loss": 0.3288, "step": 18318 }, { "epoch": 1.4840408295528191, "grad_norm": 0.06104827672243118, "learning_rate": 0.00013977676763130654, "loss": 0.3064, "step": 18319 }, { "epoch": 1.4841218405703176, "grad_norm": 0.055936019867658615, "learning_rate": 0.00013977226697871193, "loss": 0.2811, "step": 18320 }, { "epoch": 1.484202851587816, "grad_norm": 0.0556662380695343, "learning_rate": 0.0001397677663261173, "loss": 0.2917, "step": 18321 }, { "epoch": 1.4842838626053143, "grad_norm": 0.04936107248067856, "learning_rate": 0.00013976326567352267, "loss": 0.2785, "step": 18322 }, { "epoch": 1.4843648736228128, "grad_norm": 0.04691338539123535, "learning_rate": 0.00013975876502092803, "loss": 0.2867, "step": 18323 }, { "epoch": 1.484445884640311, "grad_norm": 0.04699557274580002, "learning_rate": 0.00013975426436833342, "loss": 0.2698, "step": 18324 }, { "epoch": 1.4845268956578095, "grad_norm": 0.046980343759059906, "learning_rate": 0.00013974976371573878, "loss": 0.3092, "step": 18325 }, { "epoch": 1.4846079066753077, "grad_norm": 0.04569510743021965, "learning_rate": 0.00013974526306314417, "loss": 0.292, "step": 18326 }, { "epoch": 1.4846889176928062, "grad_norm": 0.05107861012220383, "learning_rate": 0.00013974076241054953, "loss": 0.3182, "step": 18327 }, { "epoch": 1.4847699287103047, "grad_norm": 0.040237490087747574, "learning_rate": 0.00013973626175795492, "loss": 0.2539, "step": 18328 }, { "epoch": 1.484850939727803, "grad_norm": 0.052254654467105865, "learning_rate": 0.00013973176110536028, "loss": 0.3111, "step": 18329 }, { "epoch": 1.4849319507453014, "grad_norm": 0.03984718397259712, "learning_rate": 0.00013972726045276566, "loss": 0.2724, "step": 18330 }, { "epoch": 1.4850129617627998, "grad_norm": 0.05064450949430466, "learning_rate": 0.00013972275980017102, "loss": 0.2959, "step": 18331 }, { "epoch": 1.485093972780298, "grad_norm": 0.04890420287847519, "learning_rate": 0.0001397182591475764, "loss": 0.2624, "step": 18332 }, { "epoch": 1.4851749837977966, "grad_norm": 0.05717543512582779, "learning_rate": 0.00013971375849498177, "loss": 0.3479, "step": 18333 }, { "epoch": 1.4852559948152948, "grad_norm": 0.05508338287472725, "learning_rate": 0.00013970925784238716, "loss": 0.3087, "step": 18334 }, { "epoch": 1.4853370058327933, "grad_norm": 0.04428192973136902, "learning_rate": 0.00013970475718979252, "loss": 0.2528, "step": 18335 }, { "epoch": 1.4854180168502915, "grad_norm": 0.046977389603853226, "learning_rate": 0.0001397002565371979, "loss": 0.2712, "step": 18336 }, { "epoch": 1.48549902786779, "grad_norm": 0.051317162811756134, "learning_rate": 0.00013969575588460327, "loss": 0.2886, "step": 18337 }, { "epoch": 1.4855800388852884, "grad_norm": 0.04508852958679199, "learning_rate": 0.00013969125523200865, "loss": 0.2637, "step": 18338 }, { "epoch": 1.4856610499027867, "grad_norm": 0.04501248523592949, "learning_rate": 0.000139686754579414, "loss": 0.2792, "step": 18339 }, { "epoch": 1.4857420609202852, "grad_norm": 0.05640905722975731, "learning_rate": 0.0001396822539268194, "loss": 0.3121, "step": 18340 }, { "epoch": 1.4858230719377836, "grad_norm": 0.048217132687568665, "learning_rate": 0.00013967775327422476, "loss": 0.2848, "step": 18341 }, { "epoch": 1.4859040829552819, "grad_norm": 0.04802275076508522, "learning_rate": 0.00013967325262163015, "loss": 0.2975, "step": 18342 }, { "epoch": 1.4859850939727803, "grad_norm": 0.04685540497303009, "learning_rate": 0.00013966875196903553, "loss": 0.2761, "step": 18343 }, { "epoch": 1.4860661049902788, "grad_norm": 0.04356532543897629, "learning_rate": 0.0001396642513164409, "loss": 0.2649, "step": 18344 }, { "epoch": 1.486147116007777, "grad_norm": 0.04948470741510391, "learning_rate": 0.00013965975066384625, "loss": 0.2986, "step": 18345 }, { "epoch": 1.4862281270252755, "grad_norm": 0.04743214324116707, "learning_rate": 0.00013965525001125164, "loss": 0.2842, "step": 18346 }, { "epoch": 1.4863091380427738, "grad_norm": 0.05075887218117714, "learning_rate": 0.000139650749358657, "loss": 0.3015, "step": 18347 }, { "epoch": 1.4863901490602722, "grad_norm": 0.052088137716054916, "learning_rate": 0.0001396462487060624, "loss": 0.2715, "step": 18348 }, { "epoch": 1.4864711600777705, "grad_norm": 0.04909933730959892, "learning_rate": 0.00013964174805346778, "loss": 0.2645, "step": 18349 }, { "epoch": 1.486552171095269, "grad_norm": 0.04945594444870949, "learning_rate": 0.00013963724740087314, "loss": 0.2919, "step": 18350 }, { "epoch": 1.4866331821127674, "grad_norm": 0.05588537082076073, "learning_rate": 0.0001396327467482785, "loss": 0.3082, "step": 18351 }, { "epoch": 1.4867141931302656, "grad_norm": 0.058845289051532745, "learning_rate": 0.00013962824609568388, "loss": 0.3624, "step": 18352 }, { "epoch": 1.4867952041477641, "grad_norm": 0.047321610152721405, "learning_rate": 0.00013962374544308924, "loss": 0.3048, "step": 18353 }, { "epoch": 1.4868762151652626, "grad_norm": 0.046751320362091064, "learning_rate": 0.00013961924479049463, "loss": 0.2997, "step": 18354 }, { "epoch": 1.4869572261827608, "grad_norm": 0.05394889786839485, "learning_rate": 0.00013961474413790002, "loss": 0.3257, "step": 18355 }, { "epoch": 1.4870382372002593, "grad_norm": 0.06627120077610016, "learning_rate": 0.00013961024348530538, "loss": 0.2903, "step": 18356 }, { "epoch": 1.4871192482177575, "grad_norm": 0.05737738311290741, "learning_rate": 0.00013960574283271076, "loss": 0.3475, "step": 18357 }, { "epoch": 1.487200259235256, "grad_norm": 0.05117543041706085, "learning_rate": 0.00013960124218011612, "loss": 0.2446, "step": 18358 }, { "epoch": 1.4872812702527543, "grad_norm": 0.04747667536139488, "learning_rate": 0.00013959674152752148, "loss": 0.2513, "step": 18359 }, { "epoch": 1.4873622812702527, "grad_norm": 0.046115368604660034, "learning_rate": 0.00013959224087492687, "loss": 0.3149, "step": 18360 }, { "epoch": 1.4874432922877512, "grad_norm": 0.05121514946222305, "learning_rate": 0.00013958774022233226, "loss": 0.2996, "step": 18361 }, { "epoch": 1.4875243033052494, "grad_norm": 0.05462023615837097, "learning_rate": 0.00013958323956973762, "loss": 0.2928, "step": 18362 }, { "epoch": 1.487605314322748, "grad_norm": 0.052688416093587875, "learning_rate": 0.000139578738917143, "loss": 0.3073, "step": 18363 }, { "epoch": 1.4876863253402464, "grad_norm": 0.04555663838982582, "learning_rate": 0.00013957423826454837, "loss": 0.2668, "step": 18364 }, { "epoch": 1.4877673363577446, "grad_norm": 0.051788438111543655, "learning_rate": 0.00013956973761195373, "loss": 0.2916, "step": 18365 }, { "epoch": 1.487848347375243, "grad_norm": 0.04320453852415085, "learning_rate": 0.0001395652369593591, "loss": 0.2672, "step": 18366 }, { "epoch": 1.4879293583927415, "grad_norm": 0.055264852941036224, "learning_rate": 0.0001395607363067645, "loss": 0.3008, "step": 18367 }, { "epoch": 1.4880103694102398, "grad_norm": 0.05236586555838585, "learning_rate": 0.00013955623565416986, "loss": 0.282, "step": 18368 }, { "epoch": 1.4880913804277383, "grad_norm": 0.04332033917307854, "learning_rate": 0.00013955173500157525, "loss": 0.2821, "step": 18369 }, { "epoch": 1.4881723914452365, "grad_norm": 0.054549943655729294, "learning_rate": 0.0001395472343489806, "loss": 0.2878, "step": 18370 }, { "epoch": 1.488253402462735, "grad_norm": 0.04600951448082924, "learning_rate": 0.00013954273369638597, "loss": 0.2934, "step": 18371 }, { "epoch": 1.4883344134802332, "grad_norm": 0.042095400393009186, "learning_rate": 0.00013953823304379135, "loss": 0.2413, "step": 18372 }, { "epoch": 1.4884154244977317, "grad_norm": 0.056848566979169846, "learning_rate": 0.00013953373239119674, "loss": 0.3536, "step": 18373 }, { "epoch": 1.4884964355152301, "grad_norm": 0.046874742954969406, "learning_rate": 0.0001395292317386021, "loss": 0.2433, "step": 18374 }, { "epoch": 1.4885774465327284, "grad_norm": 0.053148914128541946, "learning_rate": 0.0001395247310860075, "loss": 0.2906, "step": 18375 }, { "epoch": 1.4886584575502269, "grad_norm": 0.04982958361506462, "learning_rate": 0.00013952023043341285, "loss": 0.3081, "step": 18376 }, { "epoch": 1.4887394685677253, "grad_norm": 0.050669070333242416, "learning_rate": 0.0001395157297808182, "loss": 0.3039, "step": 18377 }, { "epoch": 1.4888204795852236, "grad_norm": 0.05367177352309227, "learning_rate": 0.0001395112291282236, "loss": 0.3278, "step": 18378 }, { "epoch": 1.488901490602722, "grad_norm": 0.04868924245238304, "learning_rate": 0.00013950672847562898, "loss": 0.3139, "step": 18379 }, { "epoch": 1.4889825016202203, "grad_norm": 0.04303275793790817, "learning_rate": 0.00013950222782303434, "loss": 0.2567, "step": 18380 }, { "epoch": 1.4890635126377187, "grad_norm": 0.05001796782016754, "learning_rate": 0.00013949772717043973, "loss": 0.3035, "step": 18381 }, { "epoch": 1.489144523655217, "grad_norm": 0.05062073841691017, "learning_rate": 0.0001394932265178451, "loss": 0.2901, "step": 18382 }, { "epoch": 1.4892255346727155, "grad_norm": 0.04815378040075302, "learning_rate": 0.00013948872586525045, "loss": 0.2813, "step": 18383 }, { "epoch": 1.489306545690214, "grad_norm": 0.04104358330368996, "learning_rate": 0.00013948422521265584, "loss": 0.2862, "step": 18384 }, { "epoch": 1.4893875567077122, "grad_norm": 0.05661662295460701, "learning_rate": 0.00013947972456006123, "loss": 0.3342, "step": 18385 }, { "epoch": 1.4894685677252106, "grad_norm": 0.04493676871061325, "learning_rate": 0.00013947522390746659, "loss": 0.2763, "step": 18386 }, { "epoch": 1.489549578742709, "grad_norm": 0.05062812939286232, "learning_rate": 0.00013947072325487197, "loss": 0.2884, "step": 18387 }, { "epoch": 1.4896305897602073, "grad_norm": 0.043501146137714386, "learning_rate": 0.00013946622260227733, "loss": 0.2742, "step": 18388 }, { "epoch": 1.4897116007777058, "grad_norm": 0.04774140566587448, "learning_rate": 0.0001394617219496827, "loss": 0.2644, "step": 18389 }, { "epoch": 1.4897926117952043, "grad_norm": 0.05543723329901695, "learning_rate": 0.00013945722129708808, "loss": 0.2662, "step": 18390 }, { "epoch": 1.4898736228127025, "grad_norm": 0.04897398129105568, "learning_rate": 0.00013945272064449347, "loss": 0.2799, "step": 18391 }, { "epoch": 1.4899546338302008, "grad_norm": 0.04249687120318413, "learning_rate": 0.00013944821999189883, "loss": 0.2538, "step": 18392 }, { "epoch": 1.4900356448476992, "grad_norm": 0.03824577480554581, "learning_rate": 0.00013944371933930421, "loss": 0.219, "step": 18393 }, { "epoch": 1.4901166558651977, "grad_norm": 0.0580143965780735, "learning_rate": 0.00013943921868670957, "loss": 0.285, "step": 18394 }, { "epoch": 1.490197666882696, "grad_norm": 0.04311606287956238, "learning_rate": 0.00013943471803411496, "loss": 0.2878, "step": 18395 }, { "epoch": 1.4902786779001944, "grad_norm": 0.05549796298146248, "learning_rate": 0.00013943021738152032, "loss": 0.3463, "step": 18396 }, { "epoch": 1.4903596889176929, "grad_norm": 0.05039701983332634, "learning_rate": 0.0001394257167289257, "loss": 0.2625, "step": 18397 }, { "epoch": 1.4904406999351911, "grad_norm": 0.04013196378946304, "learning_rate": 0.00013942121607633107, "loss": 0.2531, "step": 18398 }, { "epoch": 1.4905217109526896, "grad_norm": 0.04465265944600105, "learning_rate": 0.00013941671542373646, "loss": 0.2879, "step": 18399 }, { "epoch": 1.490602721970188, "grad_norm": 0.04678081348538399, "learning_rate": 0.00013941221477114182, "loss": 0.2783, "step": 18400 }, { "epoch": 1.4906837329876863, "grad_norm": 0.04981692135334015, "learning_rate": 0.0001394077141185472, "loss": 0.3072, "step": 18401 }, { "epoch": 1.4907647440051848, "grad_norm": 0.05292908474802971, "learning_rate": 0.00013940321346595256, "loss": 0.3052, "step": 18402 }, { "epoch": 1.490845755022683, "grad_norm": 0.06737762689590454, "learning_rate": 0.00013939871281335795, "loss": 0.307, "step": 18403 }, { "epoch": 1.4909267660401815, "grad_norm": 0.04568171873688698, "learning_rate": 0.0001393942121607633, "loss": 0.2601, "step": 18404 }, { "epoch": 1.4910077770576797, "grad_norm": 0.052503764629364014, "learning_rate": 0.0001393897115081687, "loss": 0.3105, "step": 18405 }, { "epoch": 1.4910887880751782, "grad_norm": 0.046734701842069626, "learning_rate": 0.00013938521085557406, "loss": 0.292, "step": 18406 }, { "epoch": 1.4911697990926767, "grad_norm": 0.042353659868240356, "learning_rate": 0.00013938071020297944, "loss": 0.2559, "step": 18407 }, { "epoch": 1.491250810110175, "grad_norm": 0.04739280045032501, "learning_rate": 0.0001393762095503848, "loss": 0.2775, "step": 18408 }, { "epoch": 1.4913318211276734, "grad_norm": 0.04475112631917, "learning_rate": 0.0001393717088977902, "loss": 0.286, "step": 18409 }, { "epoch": 1.4914128321451718, "grad_norm": 0.047658856958150864, "learning_rate": 0.00013936720824519555, "loss": 0.2744, "step": 18410 }, { "epoch": 1.49149384316267, "grad_norm": 0.0562983863055706, "learning_rate": 0.00013936270759260094, "loss": 0.2906, "step": 18411 }, { "epoch": 1.4915748541801686, "grad_norm": 0.047967586666345596, "learning_rate": 0.0001393582069400063, "loss": 0.3272, "step": 18412 }, { "epoch": 1.491655865197667, "grad_norm": 0.047111138701438904, "learning_rate": 0.00013935370628741169, "loss": 0.2879, "step": 18413 }, { "epoch": 1.4917368762151653, "grad_norm": 0.047560688108205795, "learning_rate": 0.00013934920563481705, "loss": 0.2566, "step": 18414 }, { "epoch": 1.4918178872326635, "grad_norm": 0.04752170667052269, "learning_rate": 0.00013934470498222243, "loss": 0.26, "step": 18415 }, { "epoch": 1.491898898250162, "grad_norm": 0.04229968041181564, "learning_rate": 0.0001393402043296278, "loss": 0.269, "step": 18416 }, { "epoch": 1.4919799092676604, "grad_norm": 0.05088653042912483, "learning_rate": 0.00013933570367703318, "loss": 0.2976, "step": 18417 }, { "epoch": 1.4920609202851587, "grad_norm": 0.04122448340058327, "learning_rate": 0.00013933120302443857, "loss": 0.2717, "step": 18418 }, { "epoch": 1.4921419313026572, "grad_norm": 0.0648375079035759, "learning_rate": 0.00013932670237184393, "loss": 0.2909, "step": 18419 }, { "epoch": 1.4922229423201556, "grad_norm": 0.056340668350458145, "learning_rate": 0.00013932220171924931, "loss": 0.3037, "step": 18420 }, { "epoch": 1.4923039533376539, "grad_norm": 0.043164364993572235, "learning_rate": 0.00013931770106665467, "loss": 0.2863, "step": 18421 }, { "epoch": 1.4923849643551523, "grad_norm": 0.049361515790224075, "learning_rate": 0.00013931320041406003, "loss": 0.2761, "step": 18422 }, { "epoch": 1.4924659753726508, "grad_norm": 0.044015221297740936, "learning_rate": 0.00013930869976146542, "loss": 0.3073, "step": 18423 }, { "epoch": 1.492546986390149, "grad_norm": 0.0538572296500206, "learning_rate": 0.0001393041991088708, "loss": 0.3024, "step": 18424 }, { "epoch": 1.4926279974076475, "grad_norm": 0.04860401153564453, "learning_rate": 0.00013929969845627617, "loss": 0.2891, "step": 18425 }, { "epoch": 1.4927090084251458, "grad_norm": 0.04993215203285217, "learning_rate": 0.00013929519780368156, "loss": 0.2824, "step": 18426 }, { "epoch": 1.4927900194426442, "grad_norm": 0.047310274094343185, "learning_rate": 0.00013929069715108692, "loss": 0.251, "step": 18427 }, { "epoch": 1.4928710304601425, "grad_norm": 0.043921127915382385, "learning_rate": 0.00013928619649849228, "loss": 0.2705, "step": 18428 }, { "epoch": 1.492952041477641, "grad_norm": 0.050883155316114426, "learning_rate": 0.00013928169584589766, "loss": 0.3099, "step": 18429 }, { "epoch": 1.4930330524951394, "grad_norm": 0.0460064597427845, "learning_rate": 0.00013927719519330305, "loss": 0.2787, "step": 18430 }, { "epoch": 1.4931140635126376, "grad_norm": 0.050332728773355484, "learning_rate": 0.0001392726945407084, "loss": 0.285, "step": 18431 }, { "epoch": 1.4931950745301361, "grad_norm": 0.049601174890995026, "learning_rate": 0.0001392681938881138, "loss": 0.2961, "step": 18432 }, { "epoch": 1.4932760855476346, "grad_norm": 0.04399363324046135, "learning_rate": 0.00013926369323551916, "loss": 0.2833, "step": 18433 }, { "epoch": 1.4933570965651328, "grad_norm": 0.044266752898693085, "learning_rate": 0.00013925919258292452, "loss": 0.2742, "step": 18434 }, { "epoch": 1.4934381075826313, "grad_norm": 0.05331169441342354, "learning_rate": 0.0001392546919303299, "loss": 0.2892, "step": 18435 }, { "epoch": 1.4935191186001295, "grad_norm": 0.0475342683494091, "learning_rate": 0.0001392501912777353, "loss": 0.2838, "step": 18436 }, { "epoch": 1.493600129617628, "grad_norm": 0.04593534767627716, "learning_rate": 0.00013924569062514065, "loss": 0.3068, "step": 18437 }, { "epoch": 1.4936811406351262, "grad_norm": 0.04650498926639557, "learning_rate": 0.00013924118997254604, "loss": 0.272, "step": 18438 }, { "epoch": 1.4937621516526247, "grad_norm": 0.04349982365965843, "learning_rate": 0.0001392366893199514, "loss": 0.2705, "step": 18439 }, { "epoch": 1.4938431626701232, "grad_norm": 0.05977478250861168, "learning_rate": 0.00013923218866735676, "loss": 0.313, "step": 18440 }, { "epoch": 1.4939241736876214, "grad_norm": 0.04337127506732941, "learning_rate": 0.00013922768801476215, "loss": 0.2555, "step": 18441 }, { "epoch": 1.49400518470512, "grad_norm": 0.05166735500097275, "learning_rate": 0.00013922318736216753, "loss": 0.3171, "step": 18442 }, { "epoch": 1.4940861957226184, "grad_norm": 0.05477170646190643, "learning_rate": 0.0001392186867095729, "loss": 0.2734, "step": 18443 }, { "epoch": 1.4941672067401166, "grad_norm": 0.05633719637989998, "learning_rate": 0.00013921418605697828, "loss": 0.2861, "step": 18444 }, { "epoch": 1.494248217757615, "grad_norm": 0.05167591571807861, "learning_rate": 0.00013920968540438364, "loss": 0.2632, "step": 18445 }, { "epoch": 1.4943292287751135, "grad_norm": 0.05309131368994713, "learning_rate": 0.000139205184751789, "loss": 0.2883, "step": 18446 }, { "epoch": 1.4944102397926118, "grad_norm": 0.04940500110387802, "learning_rate": 0.0001392006840991944, "loss": 0.2707, "step": 18447 }, { "epoch": 1.4944912508101102, "grad_norm": 0.04576598480343819, "learning_rate": 0.00013919618344659978, "loss": 0.2856, "step": 18448 }, { "epoch": 1.4945722618276085, "grad_norm": 0.04570867866277695, "learning_rate": 0.00013919168279400514, "loss": 0.2681, "step": 18449 }, { "epoch": 1.494653272845107, "grad_norm": 0.04411808401346207, "learning_rate": 0.00013918718214141052, "loss": 0.2846, "step": 18450 }, { "epoch": 1.4947342838626052, "grad_norm": 0.050253257155418396, "learning_rate": 0.00013918268148881588, "loss": 0.2648, "step": 18451 }, { "epoch": 1.4948152948801037, "grad_norm": 0.05088942497968674, "learning_rate": 0.00013917818083622124, "loss": 0.2748, "step": 18452 }, { "epoch": 1.4948963058976021, "grad_norm": 0.045565392822027206, "learning_rate": 0.00013917368018362663, "loss": 0.2779, "step": 18453 }, { "epoch": 1.4949773169151004, "grad_norm": 0.04917675629258156, "learning_rate": 0.00013916917953103202, "loss": 0.3069, "step": 18454 }, { "epoch": 1.4950583279325989, "grad_norm": 0.05241185799241066, "learning_rate": 0.00013916467887843738, "loss": 0.3057, "step": 18455 }, { "epoch": 1.4951393389500973, "grad_norm": 0.045868489891290665, "learning_rate": 0.00013916017822584276, "loss": 0.2771, "step": 18456 }, { "epoch": 1.4952203499675956, "grad_norm": 0.04369146004319191, "learning_rate": 0.00013915567757324812, "loss": 0.2512, "step": 18457 }, { "epoch": 1.495301360985094, "grad_norm": 0.044587358832359314, "learning_rate": 0.00013915117692065348, "loss": 0.2714, "step": 18458 }, { "epoch": 1.4953823720025923, "grad_norm": 0.050855766981840134, "learning_rate": 0.00013914667626805887, "loss": 0.2898, "step": 18459 }, { "epoch": 1.4954633830200907, "grad_norm": 0.05688999593257904, "learning_rate": 0.00013914217561546426, "loss": 0.2924, "step": 18460 }, { "epoch": 1.495544394037589, "grad_norm": 0.05517961457371712, "learning_rate": 0.00013913767496286962, "loss": 0.268, "step": 18461 }, { "epoch": 1.4956254050550875, "grad_norm": 0.05347253754734993, "learning_rate": 0.000139133174310275, "loss": 0.2947, "step": 18462 }, { "epoch": 1.495706416072586, "grad_norm": 0.048167914152145386, "learning_rate": 0.00013912867365768037, "loss": 0.2519, "step": 18463 }, { "epoch": 1.4957874270900842, "grad_norm": 0.045652590692043304, "learning_rate": 0.00013912417300508573, "loss": 0.2295, "step": 18464 }, { "epoch": 1.4958684381075826, "grad_norm": 0.04998354986310005, "learning_rate": 0.0001391196723524911, "loss": 0.2637, "step": 18465 }, { "epoch": 1.495949449125081, "grad_norm": 0.05486319586634636, "learning_rate": 0.0001391151716998965, "loss": 0.334, "step": 18466 }, { "epoch": 1.4960304601425793, "grad_norm": 0.048164885491132736, "learning_rate": 0.00013911067104730186, "loss": 0.3039, "step": 18467 }, { "epoch": 1.4961114711600778, "grad_norm": 0.03973591327667236, "learning_rate": 0.00013910617039470725, "loss": 0.2758, "step": 18468 }, { "epoch": 1.4961924821775763, "grad_norm": 0.05702631548047066, "learning_rate": 0.0001391016697421126, "loss": 0.2889, "step": 18469 }, { "epoch": 1.4962734931950745, "grad_norm": 0.05530570447444916, "learning_rate": 0.00013909716908951797, "loss": 0.2989, "step": 18470 }, { "epoch": 1.496354504212573, "grad_norm": 0.04951141029596329, "learning_rate": 0.00013909266843692335, "loss": 0.3221, "step": 18471 }, { "epoch": 1.4964355152300712, "grad_norm": 0.04481812193989754, "learning_rate": 0.00013908816778432874, "loss": 0.2597, "step": 18472 }, { "epoch": 1.4965165262475697, "grad_norm": 0.05551238730549812, "learning_rate": 0.0001390836671317341, "loss": 0.2723, "step": 18473 }, { "epoch": 1.496597537265068, "grad_norm": 0.052483901381492615, "learning_rate": 0.0001390791664791395, "loss": 0.283, "step": 18474 }, { "epoch": 1.4966785482825664, "grad_norm": 0.0454094335436821, "learning_rate": 0.00013907466582654485, "loss": 0.2775, "step": 18475 }, { "epoch": 1.4967595593000649, "grad_norm": 0.05346366763114929, "learning_rate": 0.00013907016517395024, "loss": 0.3174, "step": 18476 }, { "epoch": 1.4968405703175631, "grad_norm": 0.044364653527736664, "learning_rate": 0.0001390656645213556, "loss": 0.2586, "step": 18477 }, { "epoch": 1.4969215813350616, "grad_norm": 0.04217236861586571, "learning_rate": 0.00013906116386876098, "loss": 0.2678, "step": 18478 }, { "epoch": 1.49700259235256, "grad_norm": 0.04457024112343788, "learning_rate": 0.00013905666321616634, "loss": 0.2623, "step": 18479 }, { "epoch": 1.4970836033700583, "grad_norm": 0.0516212061047554, "learning_rate": 0.00013905216256357173, "loss": 0.2941, "step": 18480 }, { "epoch": 1.4971646143875568, "grad_norm": 0.05036289617419243, "learning_rate": 0.0001390476619109771, "loss": 0.3116, "step": 18481 }, { "epoch": 1.497245625405055, "grad_norm": 0.045464541763067245, "learning_rate": 0.00013904316125838248, "loss": 0.3038, "step": 18482 }, { "epoch": 1.4973266364225535, "grad_norm": 0.05470053851604462, "learning_rate": 0.00013903866060578784, "loss": 0.2821, "step": 18483 }, { "epoch": 1.4974076474400517, "grad_norm": 0.05759792774915695, "learning_rate": 0.00013903415995319323, "loss": 0.3298, "step": 18484 }, { "epoch": 1.4974886584575502, "grad_norm": 0.049921244382858276, "learning_rate": 0.00013902965930059859, "loss": 0.2963, "step": 18485 }, { "epoch": 1.4975696694750487, "grad_norm": 0.04551756754517555, "learning_rate": 0.00013902515864800397, "loss": 0.2899, "step": 18486 }, { "epoch": 1.497650680492547, "grad_norm": 0.053860023617744446, "learning_rate": 0.00013902065799540933, "loss": 0.2752, "step": 18487 }, { "epoch": 1.4977316915100454, "grad_norm": 0.060332879424095154, "learning_rate": 0.00013901615734281472, "loss": 0.3393, "step": 18488 }, { "epoch": 1.4978127025275438, "grad_norm": 0.05206407979130745, "learning_rate": 0.0001390116566902201, "loss": 0.2747, "step": 18489 }, { "epoch": 1.497893713545042, "grad_norm": 0.0485624261200428, "learning_rate": 0.00013900715603762547, "loss": 0.2808, "step": 18490 }, { "epoch": 1.4979747245625405, "grad_norm": 0.059844858944416046, "learning_rate": 0.00013900265538503083, "loss": 0.3153, "step": 18491 }, { "epoch": 1.498055735580039, "grad_norm": 0.04991476610302925, "learning_rate": 0.00013899815473243621, "loss": 0.2955, "step": 18492 }, { "epoch": 1.4981367465975373, "grad_norm": 0.05374494194984436, "learning_rate": 0.00013899365407984157, "loss": 0.3298, "step": 18493 }, { "epoch": 1.4982177576150357, "grad_norm": 0.057440925389528275, "learning_rate": 0.00013898915342724696, "loss": 0.3143, "step": 18494 }, { "epoch": 1.498298768632534, "grad_norm": 0.04277382791042328, "learning_rate": 0.00013898465277465235, "loss": 0.2817, "step": 18495 }, { "epoch": 1.4983797796500324, "grad_norm": 0.04948005452752113, "learning_rate": 0.0001389801521220577, "loss": 0.3178, "step": 18496 }, { "epoch": 1.4984607906675307, "grad_norm": 0.06216001138091087, "learning_rate": 0.00013897565146946307, "loss": 0.3059, "step": 18497 }, { "epoch": 1.4985418016850292, "grad_norm": 0.05815086141228676, "learning_rate": 0.00013897115081686846, "loss": 0.2974, "step": 18498 }, { "epoch": 1.4986228127025276, "grad_norm": 0.04554952308535576, "learning_rate": 0.00013896665016427384, "loss": 0.2602, "step": 18499 }, { "epoch": 1.4987038237200259, "grad_norm": 0.046857476234436035, "learning_rate": 0.0001389621495116792, "loss": 0.2645, "step": 18500 }, { "epoch": 1.4987848347375243, "grad_norm": 0.049220532178878784, "learning_rate": 0.0001389576488590846, "loss": 0.3211, "step": 18501 }, { "epoch": 1.4988658457550228, "grad_norm": 0.04458710551261902, "learning_rate": 0.00013895314820648995, "loss": 0.3119, "step": 18502 }, { "epoch": 1.498946856772521, "grad_norm": 0.05195513367652893, "learning_rate": 0.0001389486475538953, "loss": 0.2737, "step": 18503 }, { "epoch": 1.4990278677900195, "grad_norm": 0.043182797729969025, "learning_rate": 0.0001389441469013007, "loss": 0.2691, "step": 18504 }, { "epoch": 1.4991088788075178, "grad_norm": 0.049426015466451645, "learning_rate": 0.00013893964624870608, "loss": 0.2943, "step": 18505 }, { "epoch": 1.4991898898250162, "grad_norm": 0.04895054176449776, "learning_rate": 0.00013893514559611144, "loss": 0.2965, "step": 18506 }, { "epoch": 1.4992709008425145, "grad_norm": 0.05293383076786995, "learning_rate": 0.00013893064494351683, "loss": 0.3132, "step": 18507 }, { "epoch": 1.499351911860013, "grad_norm": 0.053053028881549835, "learning_rate": 0.0001389261442909222, "loss": 0.2741, "step": 18508 }, { "epoch": 1.4994329228775114, "grad_norm": 0.04623028635978699, "learning_rate": 0.00013892164363832755, "loss": 0.2794, "step": 18509 }, { "epoch": 1.4995139338950096, "grad_norm": 0.05131025239825249, "learning_rate": 0.00013891714298573294, "loss": 0.3244, "step": 18510 }, { "epoch": 1.499594944912508, "grad_norm": 0.05024973303079605, "learning_rate": 0.00013891264233313833, "loss": 0.2817, "step": 18511 }, { "epoch": 1.4996759559300066, "grad_norm": 0.048978786915540695, "learning_rate": 0.00013890814168054369, "loss": 0.2873, "step": 18512 }, { "epoch": 1.4997569669475048, "grad_norm": 0.045770205557346344, "learning_rate": 0.00013890364102794907, "loss": 0.2728, "step": 18513 }, { "epoch": 1.4998379779650033, "grad_norm": 0.05459889397025108, "learning_rate": 0.00013889914037535443, "loss": 0.2726, "step": 18514 }, { "epoch": 1.4999189889825018, "grad_norm": 0.058012884110212326, "learning_rate": 0.0001388946397227598, "loss": 0.2823, "step": 18515 }, { "epoch": 1.5, "grad_norm": 0.06093365326523781, "learning_rate": 0.00013889013907016518, "loss": 0.2882, "step": 18516 }, { "epoch": 1.5000810110174982, "grad_norm": 0.054403156042099, "learning_rate": 0.00013888563841757057, "loss": 0.328, "step": 18517 }, { "epoch": 1.5001620220349967, "grad_norm": 0.05934557691216469, "learning_rate": 0.00013888113776497593, "loss": 0.2826, "step": 18518 }, { "epoch": 1.5002430330524952, "grad_norm": 0.05087953060865402, "learning_rate": 0.00013887663711238132, "loss": 0.2786, "step": 18519 }, { "epoch": 1.5003240440699934, "grad_norm": 0.04411671683192253, "learning_rate": 0.00013887213645978668, "loss": 0.2637, "step": 18520 }, { "epoch": 1.500405055087492, "grad_norm": 0.049634527415037155, "learning_rate": 0.00013886763580719204, "loss": 0.2708, "step": 18521 }, { "epoch": 1.5004860661049904, "grad_norm": 0.0397411547601223, "learning_rate": 0.00013886313515459742, "loss": 0.2411, "step": 18522 }, { "epoch": 1.5005670771224886, "grad_norm": 0.060061804950237274, "learning_rate": 0.0001388586345020028, "loss": 0.3434, "step": 18523 }, { "epoch": 1.500648088139987, "grad_norm": 0.05206239968538284, "learning_rate": 0.00013885413384940817, "loss": 0.2641, "step": 18524 }, { "epoch": 1.5007290991574855, "grad_norm": 0.04855002462863922, "learning_rate": 0.00013884963319681356, "loss": 0.3072, "step": 18525 }, { "epoch": 1.5008101101749838, "grad_norm": 0.0422656387090683, "learning_rate": 0.00013884513254421892, "loss": 0.2511, "step": 18526 }, { "epoch": 1.500891121192482, "grad_norm": 0.05039331316947937, "learning_rate": 0.00013884063189162428, "loss": 0.2744, "step": 18527 }, { "epoch": 1.5009721322099807, "grad_norm": 0.04326654598116875, "learning_rate": 0.00013883613123902966, "loss": 0.2671, "step": 18528 }, { "epoch": 1.501053143227479, "grad_norm": 0.04627027362585068, "learning_rate": 0.00013883163058643505, "loss": 0.2412, "step": 18529 }, { "epoch": 1.5011341542449772, "grad_norm": 0.04790741577744484, "learning_rate": 0.0001388271299338404, "loss": 0.3072, "step": 18530 }, { "epoch": 1.5012151652624757, "grad_norm": 0.05516636371612549, "learning_rate": 0.0001388226292812458, "loss": 0.3146, "step": 18531 }, { "epoch": 1.5012961762799741, "grad_norm": 0.0491386316716671, "learning_rate": 0.00013881812862865116, "loss": 0.2919, "step": 18532 }, { "epoch": 1.5013771872974724, "grad_norm": 0.05206599831581116, "learning_rate": 0.00013881362797605652, "loss": 0.3314, "step": 18533 }, { "epoch": 1.5014581983149708, "grad_norm": 0.05277208983898163, "learning_rate": 0.0001388091273234619, "loss": 0.3017, "step": 18534 }, { "epoch": 1.5015392093324693, "grad_norm": 0.049126457422971725, "learning_rate": 0.0001388046266708673, "loss": 0.2696, "step": 18535 }, { "epoch": 1.5016202203499676, "grad_norm": 0.04647735878825188, "learning_rate": 0.00013880012601827265, "loss": 0.2639, "step": 18536 }, { "epoch": 1.501701231367466, "grad_norm": 0.04798891395330429, "learning_rate": 0.00013879562536567804, "loss": 0.283, "step": 18537 }, { "epoch": 1.5017822423849645, "grad_norm": 0.04328464716672897, "learning_rate": 0.0001387911247130834, "loss": 0.2765, "step": 18538 }, { "epoch": 1.5018632534024627, "grad_norm": 0.05452942103147507, "learning_rate": 0.00013878662406048876, "loss": 0.2826, "step": 18539 }, { "epoch": 1.501944264419961, "grad_norm": 0.045899324119091034, "learning_rate": 0.00013878212340789415, "loss": 0.2913, "step": 18540 }, { "epoch": 1.5020252754374595, "grad_norm": 0.042319826781749725, "learning_rate": 0.00013877762275529953, "loss": 0.2545, "step": 18541 }, { "epoch": 1.502106286454958, "grad_norm": 0.05380789935588837, "learning_rate": 0.0001387731221027049, "loss": 0.2885, "step": 18542 }, { "epoch": 1.5021872974724562, "grad_norm": 0.05620346963405609, "learning_rate": 0.00013876862145011028, "loss": 0.3163, "step": 18543 }, { "epoch": 1.5022683084899546, "grad_norm": 0.05543176829814911, "learning_rate": 0.00013876412079751564, "loss": 0.2977, "step": 18544 }, { "epoch": 1.502349319507453, "grad_norm": 0.047539882361888885, "learning_rate": 0.000138759620144921, "loss": 0.2925, "step": 18545 }, { "epoch": 1.5024303305249513, "grad_norm": 0.05766825005412102, "learning_rate": 0.0001387551194923264, "loss": 0.3395, "step": 18546 }, { "epoch": 1.5025113415424498, "grad_norm": 0.04431447759270668, "learning_rate": 0.00013875061883973178, "loss": 0.2685, "step": 18547 }, { "epoch": 1.5025923525599483, "grad_norm": 0.061709705740213394, "learning_rate": 0.00013874611818713714, "loss": 0.3308, "step": 18548 }, { "epoch": 1.5026733635774465, "grad_norm": 0.043706364929676056, "learning_rate": 0.00013874161753454252, "loss": 0.2554, "step": 18549 }, { "epoch": 1.5027543745949448, "grad_norm": 0.04865025728940964, "learning_rate": 0.00013873711688194788, "loss": 0.2923, "step": 18550 }, { "epoch": 1.5028353856124435, "grad_norm": 0.05347156152129173, "learning_rate": 0.00013873261622935327, "loss": 0.3071, "step": 18551 }, { "epoch": 1.5029163966299417, "grad_norm": 0.055005237460136414, "learning_rate": 0.00013872811557675863, "loss": 0.3183, "step": 18552 }, { "epoch": 1.50299740764744, "grad_norm": 0.049281034618616104, "learning_rate": 0.00013872361492416402, "loss": 0.2497, "step": 18553 }, { "epoch": 1.5030784186649384, "grad_norm": 0.05435588210821152, "learning_rate": 0.00013871911427156938, "loss": 0.3302, "step": 18554 }, { "epoch": 1.5031594296824369, "grad_norm": 0.04774954169988632, "learning_rate": 0.00013871461361897476, "loss": 0.2671, "step": 18555 }, { "epoch": 1.5032404406999351, "grad_norm": 0.05450746417045593, "learning_rate": 0.00013871011296638012, "loss": 0.3222, "step": 18556 }, { "epoch": 1.5033214517174336, "grad_norm": 0.04396272823214531, "learning_rate": 0.0001387056123137855, "loss": 0.2874, "step": 18557 }, { "epoch": 1.503402462734932, "grad_norm": 0.04642244800925255, "learning_rate": 0.0001387011116611909, "loss": 0.3038, "step": 18558 }, { "epoch": 1.5034834737524303, "grad_norm": 0.04344436153769493, "learning_rate": 0.00013869661100859626, "loss": 0.264, "step": 18559 }, { "epoch": 1.5035644847699285, "grad_norm": 0.04258277639746666, "learning_rate": 0.00013869211035600162, "loss": 0.2747, "step": 18560 }, { "epoch": 1.5036454957874272, "grad_norm": 0.05755390226840973, "learning_rate": 0.000138687609703407, "loss": 0.3079, "step": 18561 }, { "epoch": 1.5037265068049255, "grad_norm": 0.046239741146564484, "learning_rate": 0.00013868310905081237, "loss": 0.289, "step": 18562 }, { "epoch": 1.5038075178224237, "grad_norm": 0.053038790822029114, "learning_rate": 0.00013867860839821775, "loss": 0.2833, "step": 18563 }, { "epoch": 1.5038885288399222, "grad_norm": 0.04764937609434128, "learning_rate": 0.00013867410774562314, "loss": 0.2666, "step": 18564 }, { "epoch": 1.5039695398574207, "grad_norm": 0.05545291304588318, "learning_rate": 0.0001386696070930285, "loss": 0.282, "step": 18565 }, { "epoch": 1.504050550874919, "grad_norm": 0.061405330896377563, "learning_rate": 0.00013866510644043386, "loss": 0.3036, "step": 18566 }, { "epoch": 1.5041315618924174, "grad_norm": 0.046580031514167786, "learning_rate": 0.00013866060578783925, "loss": 0.2773, "step": 18567 }, { "epoch": 1.5042125729099158, "grad_norm": 0.055441372096538544, "learning_rate": 0.0001386561051352446, "loss": 0.3012, "step": 18568 }, { "epoch": 1.504293583927414, "grad_norm": 0.06004418060183525, "learning_rate": 0.00013865160448265, "loss": 0.2965, "step": 18569 }, { "epoch": 1.5043745949449125, "grad_norm": 0.05109797790646553, "learning_rate": 0.00013864710383005538, "loss": 0.313, "step": 18570 }, { "epoch": 1.504455605962411, "grad_norm": 0.0582164004445076, "learning_rate": 0.00013864260317746074, "loss": 0.3293, "step": 18571 }, { "epoch": 1.5045366169799093, "grad_norm": 0.05205508694052696, "learning_rate": 0.0001386381025248661, "loss": 0.3073, "step": 18572 }, { "epoch": 1.5046176279974075, "grad_norm": 0.05729497969150543, "learning_rate": 0.0001386336018722715, "loss": 0.2782, "step": 18573 }, { "epoch": 1.5046986390149062, "grad_norm": 0.05498183146119118, "learning_rate": 0.00013862910121967685, "loss": 0.2763, "step": 18574 }, { "epoch": 1.5047796500324044, "grad_norm": 0.05813434720039368, "learning_rate": 0.00013862460056708224, "loss": 0.2855, "step": 18575 }, { "epoch": 1.5048606610499027, "grad_norm": 0.050552502274513245, "learning_rate": 0.00013862009991448762, "loss": 0.3153, "step": 18576 }, { "epoch": 1.5049416720674011, "grad_norm": 0.0465339720249176, "learning_rate": 0.00013861559926189298, "loss": 0.2752, "step": 18577 }, { "epoch": 1.5050226830848996, "grad_norm": 0.05752887949347496, "learning_rate": 0.00013861109860929834, "loss": 0.2684, "step": 18578 }, { "epoch": 1.5051036941023979, "grad_norm": 0.05408168584108353, "learning_rate": 0.00013860659795670373, "loss": 0.3078, "step": 18579 }, { "epoch": 1.5051847051198963, "grad_norm": 0.04079076275229454, "learning_rate": 0.00013860209730410912, "loss": 0.2744, "step": 18580 }, { "epoch": 1.5052657161373948, "grad_norm": 0.04741482436656952, "learning_rate": 0.00013859759665151448, "loss": 0.2624, "step": 18581 }, { "epoch": 1.505346727154893, "grad_norm": 0.04578164219856262, "learning_rate": 0.00013859309599891987, "loss": 0.2917, "step": 18582 }, { "epoch": 1.5054277381723913, "grad_norm": 0.04694436863064766, "learning_rate": 0.00013858859534632523, "loss": 0.3019, "step": 18583 }, { "epoch": 1.50550874918989, "grad_norm": 0.05345213785767555, "learning_rate": 0.00013858409469373059, "loss": 0.2916, "step": 18584 }, { "epoch": 1.5055897602073882, "grad_norm": 0.0502551831305027, "learning_rate": 0.00013857959404113597, "loss": 0.2881, "step": 18585 }, { "epoch": 1.5056707712248865, "grad_norm": 0.04362354055047035, "learning_rate": 0.00013857509338854136, "loss": 0.2879, "step": 18586 }, { "epoch": 1.505751782242385, "grad_norm": 0.061859581619501114, "learning_rate": 0.00013857059273594672, "loss": 0.3341, "step": 18587 }, { "epoch": 1.5058327932598834, "grad_norm": 0.048323486000299454, "learning_rate": 0.0001385660920833521, "loss": 0.3062, "step": 18588 }, { "epoch": 1.5059138042773816, "grad_norm": 0.05235842987895012, "learning_rate": 0.00013856159143075747, "loss": 0.31, "step": 18589 }, { "epoch": 1.50599481529488, "grad_norm": 0.05464842915534973, "learning_rate": 0.00013855709077816283, "loss": 0.3411, "step": 18590 }, { "epoch": 1.5060758263123786, "grad_norm": 0.05268260836601257, "learning_rate": 0.00013855259012556821, "loss": 0.3265, "step": 18591 }, { "epoch": 1.5061568373298768, "grad_norm": 0.05023358017206192, "learning_rate": 0.0001385480894729736, "loss": 0.2897, "step": 18592 }, { "epoch": 1.5062378483473753, "grad_norm": 0.046102315187454224, "learning_rate": 0.00013854358882037896, "loss": 0.2753, "step": 18593 }, { "epoch": 1.5063188593648738, "grad_norm": 0.04473813995718956, "learning_rate": 0.00013853908816778435, "loss": 0.2897, "step": 18594 }, { "epoch": 1.506399870382372, "grad_norm": 0.049142077565193176, "learning_rate": 0.0001385345875151897, "loss": 0.2912, "step": 18595 }, { "epoch": 1.5064808813998702, "grad_norm": 0.05272606760263443, "learning_rate": 0.00013853008686259507, "loss": 0.3167, "step": 18596 }, { "epoch": 1.5065618924173687, "grad_norm": 0.06326889991760254, "learning_rate": 0.00013852558621000046, "loss": 0.3176, "step": 18597 }, { "epoch": 1.5066429034348672, "grad_norm": 0.04475260153412819, "learning_rate": 0.00013852108555740584, "loss": 0.2458, "step": 18598 }, { "epoch": 1.5067239144523654, "grad_norm": 0.051189832389354706, "learning_rate": 0.0001385165849048112, "loss": 0.3195, "step": 18599 }, { "epoch": 1.5068049254698639, "grad_norm": 0.05150541290640831, "learning_rate": 0.0001385120842522166, "loss": 0.2626, "step": 18600 }, { "epoch": 1.5068859364873624, "grad_norm": 0.05144593492150307, "learning_rate": 0.00013850758359962195, "loss": 0.2497, "step": 18601 }, { "epoch": 1.5069669475048606, "grad_norm": 0.04862655699253082, "learning_rate": 0.0001385030829470273, "loss": 0.2796, "step": 18602 }, { "epoch": 1.507047958522359, "grad_norm": 0.03998184576630592, "learning_rate": 0.0001384985822944327, "loss": 0.2604, "step": 18603 }, { "epoch": 1.5071289695398575, "grad_norm": 0.0496944859623909, "learning_rate": 0.00013849408164183808, "loss": 0.3112, "step": 18604 }, { "epoch": 1.5072099805573558, "grad_norm": 0.04353320598602295, "learning_rate": 0.00013848958098924344, "loss": 0.2425, "step": 18605 }, { "epoch": 1.507290991574854, "grad_norm": 0.046205393970012665, "learning_rate": 0.00013848508033664883, "loss": 0.2865, "step": 18606 }, { "epoch": 1.5073720025923527, "grad_norm": 0.06040682643651962, "learning_rate": 0.0001384805796840542, "loss": 0.3052, "step": 18607 }, { "epoch": 1.507453013609851, "grad_norm": 0.05146654695272446, "learning_rate": 0.00013847607903145955, "loss": 0.2685, "step": 18608 }, { "epoch": 1.5075340246273492, "grad_norm": 0.05239380523562431, "learning_rate": 0.00013847157837886494, "loss": 0.2962, "step": 18609 }, { "epoch": 1.5076150356448477, "grad_norm": 0.04868777096271515, "learning_rate": 0.00013846707772627033, "loss": 0.2733, "step": 18610 }, { "epoch": 1.5076960466623461, "grad_norm": 0.06022943556308746, "learning_rate": 0.0001384625770736757, "loss": 0.2893, "step": 18611 }, { "epoch": 1.5077770576798444, "grad_norm": 0.045286670327186584, "learning_rate": 0.00013845807642108107, "loss": 0.3286, "step": 18612 }, { "epoch": 1.5078580686973428, "grad_norm": 0.04287097230553627, "learning_rate": 0.00013845357576848643, "loss": 0.2638, "step": 18613 }, { "epoch": 1.5079390797148413, "grad_norm": 0.058371007442474365, "learning_rate": 0.0001384490751158918, "loss": 0.3051, "step": 18614 }, { "epoch": 1.5080200907323396, "grad_norm": 0.04612262547016144, "learning_rate": 0.00013844457446329718, "loss": 0.2739, "step": 18615 }, { "epoch": 1.508101101749838, "grad_norm": 0.04888421297073364, "learning_rate": 0.00013844007381070257, "loss": 0.2859, "step": 18616 }, { "epoch": 1.5081821127673365, "grad_norm": 0.04447241500020027, "learning_rate": 0.00013843557315810793, "loss": 0.3008, "step": 18617 }, { "epoch": 1.5082631237848347, "grad_norm": 0.047536179423332214, "learning_rate": 0.00013843107250551332, "loss": 0.2653, "step": 18618 }, { "epoch": 1.508344134802333, "grad_norm": 0.052498817443847656, "learning_rate": 0.00013842657185291868, "loss": 0.2943, "step": 18619 }, { "epoch": 1.5084251458198314, "grad_norm": 0.053822360932826996, "learning_rate": 0.00013842207120032404, "loss": 0.322, "step": 18620 }, { "epoch": 1.50850615683733, "grad_norm": 0.058178890496492386, "learning_rate": 0.00013841757054772942, "loss": 0.2767, "step": 18621 }, { "epoch": 1.5085871678548282, "grad_norm": 0.06313327699899673, "learning_rate": 0.0001384130698951348, "loss": 0.3065, "step": 18622 }, { "epoch": 1.5086681788723266, "grad_norm": 0.06296181678771973, "learning_rate": 0.00013840856924254017, "loss": 0.259, "step": 18623 }, { "epoch": 1.508749189889825, "grad_norm": 0.04204053431749344, "learning_rate": 0.00013840406858994556, "loss": 0.2567, "step": 18624 }, { "epoch": 1.5088302009073233, "grad_norm": 0.049650777131319046, "learning_rate": 0.00013839956793735092, "loss": 0.2601, "step": 18625 }, { "epoch": 1.5089112119248218, "grad_norm": 0.050487373024225235, "learning_rate": 0.00013839506728475628, "loss": 0.2811, "step": 18626 }, { "epoch": 1.5089922229423203, "grad_norm": 0.05244138464331627, "learning_rate": 0.0001383905666321617, "loss": 0.3, "step": 18627 }, { "epoch": 1.5090732339598185, "grad_norm": 0.04472174495458603, "learning_rate": 0.00013838606597956705, "loss": 0.2397, "step": 18628 }, { "epoch": 1.5091542449773168, "grad_norm": 0.057056084275245667, "learning_rate": 0.0001383815653269724, "loss": 0.3052, "step": 18629 }, { "epoch": 1.5092352559948155, "grad_norm": 0.0568094402551651, "learning_rate": 0.0001383770646743778, "loss": 0.3133, "step": 18630 }, { "epoch": 1.5093162670123137, "grad_norm": 0.040568944066762924, "learning_rate": 0.00013837256402178316, "loss": 0.2348, "step": 18631 }, { "epoch": 1.509397278029812, "grad_norm": 0.05801571533083916, "learning_rate": 0.00013836806336918855, "loss": 0.3339, "step": 18632 }, { "epoch": 1.5094782890473104, "grad_norm": 0.04366452246904373, "learning_rate": 0.00013836356271659393, "loss": 0.2626, "step": 18633 }, { "epoch": 1.5095593000648089, "grad_norm": 0.05549009516835213, "learning_rate": 0.0001383590620639993, "loss": 0.331, "step": 18634 }, { "epoch": 1.5096403110823071, "grad_norm": 0.0553416907787323, "learning_rate": 0.00013835456141140465, "loss": 0.2957, "step": 18635 }, { "epoch": 1.5097213220998056, "grad_norm": 0.05349811166524887, "learning_rate": 0.00013835006075881004, "loss": 0.2911, "step": 18636 }, { "epoch": 1.509802333117304, "grad_norm": 0.04866451397538185, "learning_rate": 0.0001383455601062154, "loss": 0.2892, "step": 18637 }, { "epoch": 1.5098833441348023, "grad_norm": 0.046243034303188324, "learning_rate": 0.0001383410594536208, "loss": 0.3073, "step": 18638 }, { "epoch": 1.5099643551523008, "grad_norm": 0.053792405873537064, "learning_rate": 0.00013833655880102617, "loss": 0.3153, "step": 18639 }, { "epoch": 1.5100453661697992, "grad_norm": 0.05517613887786865, "learning_rate": 0.00013833205814843153, "loss": 0.3032, "step": 18640 }, { "epoch": 1.5101263771872975, "grad_norm": 0.04403368756175041, "learning_rate": 0.0001383275574958369, "loss": 0.2745, "step": 18641 }, { "epoch": 1.5102073882047957, "grad_norm": 0.04711280018091202, "learning_rate": 0.00013832305684324228, "loss": 0.2498, "step": 18642 }, { "epoch": 1.5102883992222942, "grad_norm": 0.04522211104631424, "learning_rate": 0.00013831855619064764, "loss": 0.2616, "step": 18643 }, { "epoch": 1.5103694102397927, "grad_norm": 0.04840913414955139, "learning_rate": 0.00013831405553805303, "loss": 0.2929, "step": 18644 }, { "epoch": 1.510450421257291, "grad_norm": 0.045142967253923416, "learning_rate": 0.00013830955488545842, "loss": 0.2936, "step": 18645 }, { "epoch": 1.5105314322747894, "grad_norm": 0.04537534341216087, "learning_rate": 0.00013830505423286378, "loss": 0.2616, "step": 18646 }, { "epoch": 1.5106124432922878, "grad_norm": 0.04791952297091484, "learning_rate": 0.00013830055358026914, "loss": 0.2661, "step": 18647 }, { "epoch": 1.510693454309786, "grad_norm": 0.051988594233989716, "learning_rate": 0.00013829605292767452, "loss": 0.2785, "step": 18648 }, { "epoch": 1.5107744653272845, "grad_norm": 0.043577536940574646, "learning_rate": 0.00013829155227507988, "loss": 0.2803, "step": 18649 }, { "epoch": 1.510855476344783, "grad_norm": 0.04489186778664589, "learning_rate": 0.00013828705162248527, "loss": 0.2837, "step": 18650 }, { "epoch": 1.5109364873622813, "grad_norm": 0.04787694290280342, "learning_rate": 0.00013828255096989066, "loss": 0.3055, "step": 18651 }, { "epoch": 1.5110174983797795, "grad_norm": 0.06286022067070007, "learning_rate": 0.00013827805031729602, "loss": 0.3101, "step": 18652 }, { "epoch": 1.5110985093972782, "grad_norm": 0.05875331163406372, "learning_rate": 0.00013827354966470138, "loss": 0.3048, "step": 18653 }, { "epoch": 1.5111795204147764, "grad_norm": 0.050677020102739334, "learning_rate": 0.00013826904901210677, "loss": 0.3073, "step": 18654 }, { "epoch": 1.5112605314322747, "grad_norm": 0.04322303086519241, "learning_rate": 0.00013826454835951213, "loss": 0.2828, "step": 18655 }, { "epoch": 1.5113415424497731, "grad_norm": 0.0534433051943779, "learning_rate": 0.0001382600477069175, "loss": 0.3363, "step": 18656 }, { "epoch": 1.5114225534672716, "grad_norm": 0.046610474586486816, "learning_rate": 0.0001382555470543229, "loss": 0.3099, "step": 18657 }, { "epoch": 1.5115035644847699, "grad_norm": 0.051539093255996704, "learning_rate": 0.00013825104640172826, "loss": 0.2926, "step": 18658 }, { "epoch": 1.5115845755022683, "grad_norm": 0.047712355852127075, "learning_rate": 0.00013824654574913362, "loss": 0.2777, "step": 18659 }, { "epoch": 1.5116655865197668, "grad_norm": 0.0468960702419281, "learning_rate": 0.000138242045096539, "loss": 0.2748, "step": 18660 }, { "epoch": 1.511746597537265, "grad_norm": 0.05287209525704384, "learning_rate": 0.0001382375444439444, "loss": 0.2902, "step": 18661 }, { "epoch": 1.5118276085547635, "grad_norm": 0.04535992816090584, "learning_rate": 0.00013823304379134975, "loss": 0.2915, "step": 18662 }, { "epoch": 1.511908619572262, "grad_norm": 0.05408494547009468, "learning_rate": 0.00013822854313875514, "loss": 0.3081, "step": 18663 }, { "epoch": 1.5119896305897602, "grad_norm": 0.04434020444750786, "learning_rate": 0.0001382240424861605, "loss": 0.281, "step": 18664 }, { "epoch": 1.5120706416072585, "grad_norm": 0.05857977271080017, "learning_rate": 0.00013821954183356586, "loss": 0.255, "step": 18665 }, { "epoch": 1.512151652624757, "grad_norm": 0.04309207201004028, "learning_rate": 0.00013821504118097125, "loss": 0.2897, "step": 18666 }, { "epoch": 1.5122326636422554, "grad_norm": 0.06811527162790298, "learning_rate": 0.00013821054052837664, "loss": 0.2611, "step": 18667 }, { "epoch": 1.5123136746597536, "grad_norm": 0.047615617513656616, "learning_rate": 0.000138206039875782, "loss": 0.31, "step": 18668 }, { "epoch": 1.512394685677252, "grad_norm": 0.05192350596189499, "learning_rate": 0.00013820153922318738, "loss": 0.2876, "step": 18669 }, { "epoch": 1.5124756966947506, "grad_norm": 0.04546111449599266, "learning_rate": 0.00013819703857059274, "loss": 0.2567, "step": 18670 }, { "epoch": 1.5125567077122488, "grad_norm": 0.04693352431058884, "learning_rate": 0.0001381925379179981, "loss": 0.2647, "step": 18671 }, { "epoch": 1.5126377187297473, "grad_norm": 0.0454537570476532, "learning_rate": 0.0001381880372654035, "loss": 0.2871, "step": 18672 }, { "epoch": 1.5127187297472457, "grad_norm": 0.05148732289671898, "learning_rate": 0.00013818353661280888, "loss": 0.3049, "step": 18673 }, { "epoch": 1.512799740764744, "grad_norm": 0.059178005903959274, "learning_rate": 0.00013817903596021424, "loss": 0.303, "step": 18674 }, { "epoch": 1.5128807517822422, "grad_norm": 0.05884253978729248, "learning_rate": 0.00013817453530761962, "loss": 0.3042, "step": 18675 }, { "epoch": 1.512961762799741, "grad_norm": 0.05346214771270752, "learning_rate": 0.00013817003465502498, "loss": 0.3251, "step": 18676 }, { "epoch": 1.5130427738172392, "grad_norm": 0.049198366701602936, "learning_rate": 0.00013816553400243034, "loss": 0.2834, "step": 18677 }, { "epoch": 1.5131237848347374, "grad_norm": 0.054034922271966934, "learning_rate": 0.00013816103334983573, "loss": 0.2889, "step": 18678 }, { "epoch": 1.5132047958522359, "grad_norm": 0.0477595180273056, "learning_rate": 0.00013815653269724112, "loss": 0.2607, "step": 18679 }, { "epoch": 1.5132858068697344, "grad_norm": 0.05005710944533348, "learning_rate": 0.00013815203204464648, "loss": 0.2908, "step": 18680 }, { "epoch": 1.5133668178872326, "grad_norm": 0.055473003536462784, "learning_rate": 0.00013814753139205187, "loss": 0.3023, "step": 18681 }, { "epoch": 1.513447828904731, "grad_norm": 0.05821644887328148, "learning_rate": 0.00013814303073945723, "loss": 0.28, "step": 18682 }, { "epoch": 1.5135288399222295, "grad_norm": 0.047872528433799744, "learning_rate": 0.00013813853008686259, "loss": 0.2639, "step": 18683 }, { "epoch": 1.5136098509397278, "grad_norm": 0.05764663219451904, "learning_rate": 0.00013813402943426797, "loss": 0.3254, "step": 18684 }, { "epoch": 1.513690861957226, "grad_norm": 0.06259345263242722, "learning_rate": 0.00013812952878167336, "loss": 0.3194, "step": 18685 }, { "epoch": 1.5137718729747247, "grad_norm": 0.054814137518405914, "learning_rate": 0.00013812502812907872, "loss": 0.2866, "step": 18686 }, { "epoch": 1.513852883992223, "grad_norm": 0.04742126166820526, "learning_rate": 0.0001381205274764841, "loss": 0.2908, "step": 18687 }, { "epoch": 1.5139338950097212, "grad_norm": 0.04217066615819931, "learning_rate": 0.00013811602682388947, "loss": 0.2882, "step": 18688 }, { "epoch": 1.5140149060272197, "grad_norm": 0.04281475394964218, "learning_rate": 0.00013811152617129483, "loss": 0.3041, "step": 18689 }, { "epoch": 1.5140959170447181, "grad_norm": 0.0419379360973835, "learning_rate": 0.00013810702551870021, "loss": 0.2662, "step": 18690 }, { "epoch": 1.5141769280622164, "grad_norm": 0.050524819642305374, "learning_rate": 0.0001381025248661056, "loss": 0.2754, "step": 18691 }, { "epoch": 1.5142579390797148, "grad_norm": 0.04706466943025589, "learning_rate": 0.00013809802421351096, "loss": 0.2768, "step": 18692 }, { "epoch": 1.5143389500972133, "grad_norm": 0.05239817500114441, "learning_rate": 0.00013809352356091635, "loss": 0.2826, "step": 18693 }, { "epoch": 1.5144199611147116, "grad_norm": 0.04976906254887581, "learning_rate": 0.0001380890229083217, "loss": 0.2582, "step": 18694 }, { "epoch": 1.51450097213221, "grad_norm": 0.05384444817900658, "learning_rate": 0.00013808452225572707, "loss": 0.2548, "step": 18695 }, { "epoch": 1.5145819831497085, "grad_norm": 0.043319009244441986, "learning_rate": 0.00013808002160313248, "loss": 0.2722, "step": 18696 }, { "epoch": 1.5146629941672067, "grad_norm": 0.05653691291809082, "learning_rate": 0.00013807552095053784, "loss": 0.327, "step": 18697 }, { "epoch": 1.514744005184705, "grad_norm": 0.05521196871995926, "learning_rate": 0.0001380710202979432, "loss": 0.3193, "step": 18698 }, { "epoch": 1.5148250162022034, "grad_norm": 0.05279504880309105, "learning_rate": 0.0001380665196453486, "loss": 0.3488, "step": 18699 }, { "epoch": 1.514906027219702, "grad_norm": 0.04920048266649246, "learning_rate": 0.00013806201899275395, "loss": 0.2944, "step": 18700 }, { "epoch": 1.5149870382372002, "grad_norm": 0.05808188021183014, "learning_rate": 0.0001380575183401593, "loss": 0.2877, "step": 18701 }, { "epoch": 1.5150680492546986, "grad_norm": 0.04960033297538757, "learning_rate": 0.00013805301768756473, "loss": 0.3101, "step": 18702 }, { "epoch": 1.515149060272197, "grad_norm": 0.042428918182849884, "learning_rate": 0.00013804851703497009, "loss": 0.2628, "step": 18703 }, { "epoch": 1.5152300712896953, "grad_norm": 0.04198756814002991, "learning_rate": 0.00013804401638237545, "loss": 0.2959, "step": 18704 }, { "epoch": 1.5153110823071938, "grad_norm": 0.04299665987491608, "learning_rate": 0.00013803951572978083, "loss": 0.2875, "step": 18705 }, { "epoch": 1.5153920933246923, "grad_norm": 0.039975300431251526, "learning_rate": 0.0001380350150771862, "loss": 0.2438, "step": 18706 }, { "epoch": 1.5154731043421905, "grad_norm": 0.06994131207466125, "learning_rate": 0.00013803051442459155, "loss": 0.3259, "step": 18707 }, { "epoch": 1.5155541153596888, "grad_norm": 0.044267553836107254, "learning_rate": 0.00013802601377199697, "loss": 0.2604, "step": 18708 }, { "epoch": 1.5156351263771874, "grad_norm": 0.04812091961503029, "learning_rate": 0.00013802151311940233, "loss": 0.2525, "step": 18709 }, { "epoch": 1.5157161373946857, "grad_norm": 0.05721137672662735, "learning_rate": 0.0001380170124668077, "loss": 0.2777, "step": 18710 }, { "epoch": 1.515797148412184, "grad_norm": 0.05760475993156433, "learning_rate": 0.00013801251181421307, "loss": 0.3565, "step": 18711 }, { "epoch": 1.5158781594296824, "grad_norm": 0.04711325094103813, "learning_rate": 0.00013800801116161843, "loss": 0.2974, "step": 18712 }, { "epoch": 1.5159591704471809, "grad_norm": 0.05328639969229698, "learning_rate": 0.00013800351050902382, "loss": 0.3143, "step": 18713 }, { "epoch": 1.5160401814646791, "grad_norm": 0.0565636083483696, "learning_rate": 0.0001379990098564292, "loss": 0.2871, "step": 18714 }, { "epoch": 1.5161211924821776, "grad_norm": 0.05294475704431534, "learning_rate": 0.00013799450920383457, "loss": 0.3358, "step": 18715 }, { "epoch": 1.516202203499676, "grad_norm": 0.047887083142995834, "learning_rate": 0.00013799000855123993, "loss": 0.2812, "step": 18716 }, { "epoch": 1.5162832145171743, "grad_norm": 0.05028738081455231, "learning_rate": 0.00013798550789864532, "loss": 0.2868, "step": 18717 }, { "epoch": 1.5163642255346728, "grad_norm": 0.05214919522404671, "learning_rate": 0.00013798100724605068, "loss": 0.3086, "step": 18718 }, { "epoch": 1.5164452365521712, "grad_norm": 0.047210779041051865, "learning_rate": 0.00013797650659345606, "loss": 0.2988, "step": 18719 }, { "epoch": 1.5165262475696695, "grad_norm": 0.04988611117005348, "learning_rate": 0.00013797200594086145, "loss": 0.2667, "step": 18720 }, { "epoch": 1.5166072585871677, "grad_norm": 0.052436117082834244, "learning_rate": 0.0001379675052882668, "loss": 0.3187, "step": 18721 }, { "epoch": 1.5166882696046662, "grad_norm": 0.05738939344882965, "learning_rate": 0.00013796300463567217, "loss": 0.283, "step": 18722 }, { "epoch": 1.5167692806221647, "grad_norm": 0.04664130136370659, "learning_rate": 0.00013795850398307756, "loss": 0.3041, "step": 18723 }, { "epoch": 1.516850291639663, "grad_norm": 0.044622667133808136, "learning_rate": 0.00013795400333048292, "loss": 0.2655, "step": 18724 }, { "epoch": 1.5169313026571614, "grad_norm": 0.052109766751527786, "learning_rate": 0.0001379495026778883, "loss": 0.2748, "step": 18725 }, { "epoch": 1.5170123136746598, "grad_norm": 0.05363732948899269, "learning_rate": 0.0001379450020252937, "loss": 0.3003, "step": 18726 }, { "epoch": 1.517093324692158, "grad_norm": 0.04929107427597046, "learning_rate": 0.00013794050137269905, "loss": 0.2836, "step": 18727 }, { "epoch": 1.5171743357096565, "grad_norm": 0.04485325142741203, "learning_rate": 0.0001379360007201044, "loss": 0.2683, "step": 18728 }, { "epoch": 1.517255346727155, "grad_norm": 0.05199460685253143, "learning_rate": 0.0001379315000675098, "loss": 0.2702, "step": 18729 }, { "epoch": 1.5173363577446533, "grad_norm": 0.04943700134754181, "learning_rate": 0.00013792699941491516, "loss": 0.2759, "step": 18730 }, { "epoch": 1.5174173687621515, "grad_norm": 0.05386276915669441, "learning_rate": 0.00013792249876232055, "loss": 0.2885, "step": 18731 }, { "epoch": 1.5174983797796502, "grad_norm": 0.04591602087020874, "learning_rate": 0.00013791799810972593, "loss": 0.2807, "step": 18732 }, { "epoch": 1.5175793907971484, "grad_norm": 0.041390374302864075, "learning_rate": 0.0001379134974571313, "loss": 0.2853, "step": 18733 }, { "epoch": 1.5176604018146467, "grad_norm": 0.050120241940021515, "learning_rate": 0.00013790899680453665, "loss": 0.3006, "step": 18734 }, { "epoch": 1.5177414128321451, "grad_norm": 0.04909467324614525, "learning_rate": 0.00013790449615194204, "loss": 0.2909, "step": 18735 }, { "epoch": 1.5178224238496436, "grad_norm": 0.05132598802447319, "learning_rate": 0.00013789999549934743, "loss": 0.2679, "step": 18736 }, { "epoch": 1.5179034348671419, "grad_norm": 0.060010362416505814, "learning_rate": 0.0001378954948467528, "loss": 0.3035, "step": 18737 }, { "epoch": 1.5179844458846403, "grad_norm": 0.07251916080713272, "learning_rate": 0.00013789099419415817, "loss": 0.3148, "step": 18738 }, { "epoch": 1.5180654569021388, "grad_norm": 0.042271800339221954, "learning_rate": 0.00013788649354156353, "loss": 0.2836, "step": 18739 }, { "epoch": 1.518146467919637, "grad_norm": 0.055395521223545074, "learning_rate": 0.0001378819928889689, "loss": 0.3016, "step": 18740 }, { "epoch": 1.5182274789371355, "grad_norm": 0.050871629267930984, "learning_rate": 0.00013787749223637428, "loss": 0.2927, "step": 18741 }, { "epoch": 1.518308489954634, "grad_norm": 0.06203100457787514, "learning_rate": 0.00013787299158377967, "loss": 0.2865, "step": 18742 }, { "epoch": 1.5183895009721322, "grad_norm": 0.042529165744781494, "learning_rate": 0.00013786849093118503, "loss": 0.2828, "step": 18743 }, { "epoch": 1.5184705119896305, "grad_norm": 0.04327407479286194, "learning_rate": 0.00013786399027859042, "loss": 0.29, "step": 18744 }, { "epoch": 1.518551523007129, "grad_norm": 0.05121006444096565, "learning_rate": 0.00013785948962599578, "loss": 0.2783, "step": 18745 }, { "epoch": 1.5186325340246274, "grad_norm": 0.051384586840867996, "learning_rate": 0.00013785498897340114, "loss": 0.3032, "step": 18746 }, { "epoch": 1.5187135450421256, "grad_norm": 0.0496654212474823, "learning_rate": 0.00013785048832080652, "loss": 0.2931, "step": 18747 }, { "epoch": 1.518794556059624, "grad_norm": 0.05933926999568939, "learning_rate": 0.0001378459876682119, "loss": 0.3127, "step": 18748 }, { "epoch": 1.5188755670771226, "grad_norm": 0.045520905405282974, "learning_rate": 0.00013784148701561727, "loss": 0.2372, "step": 18749 }, { "epoch": 1.5189565780946208, "grad_norm": 0.05414096266031265, "learning_rate": 0.00013783698636302266, "loss": 0.2762, "step": 18750 }, { "epoch": 1.5190375891121193, "grad_norm": 0.044945355504751205, "learning_rate": 0.00013783248571042802, "loss": 0.2687, "step": 18751 }, { "epoch": 1.5191186001296177, "grad_norm": 0.04823799431324005, "learning_rate": 0.00013782798505783338, "loss": 0.2872, "step": 18752 }, { "epoch": 1.519199611147116, "grad_norm": 0.05766402184963226, "learning_rate": 0.00013782348440523877, "loss": 0.3247, "step": 18753 }, { "epoch": 1.5192806221646142, "grad_norm": 0.05425436422228813, "learning_rate": 0.00013781898375264415, "loss": 0.3004, "step": 18754 }, { "epoch": 1.519361633182113, "grad_norm": 0.05799352005124092, "learning_rate": 0.0001378144831000495, "loss": 0.2595, "step": 18755 }, { "epoch": 1.5194426441996112, "grad_norm": 0.0714273452758789, "learning_rate": 0.0001378099824474549, "loss": 0.28, "step": 18756 }, { "epoch": 1.5195236552171094, "grad_norm": 0.046737149357795715, "learning_rate": 0.00013780548179486026, "loss": 0.2781, "step": 18757 }, { "epoch": 1.5196046662346079, "grad_norm": 0.06238202005624771, "learning_rate": 0.00013780098114226562, "loss": 0.3323, "step": 18758 }, { "epoch": 1.5196856772521063, "grad_norm": 0.05099009349942207, "learning_rate": 0.000137796480489671, "loss": 0.3094, "step": 18759 }, { "epoch": 1.5197666882696046, "grad_norm": 0.04891882464289665, "learning_rate": 0.0001377919798370764, "loss": 0.2804, "step": 18760 }, { "epoch": 1.519847699287103, "grad_norm": 0.04960760846734047, "learning_rate": 0.00013778747918448175, "loss": 0.2865, "step": 18761 }, { "epoch": 1.5199287103046015, "grad_norm": 0.04393989220261574, "learning_rate": 0.00013778297853188714, "loss": 0.3369, "step": 18762 }, { "epoch": 1.5200097213220998, "grad_norm": 0.05547909438610077, "learning_rate": 0.0001377784778792925, "loss": 0.2722, "step": 18763 }, { "epoch": 1.5200907323395982, "grad_norm": 0.04545421525835991, "learning_rate": 0.00013777397722669786, "loss": 0.2816, "step": 18764 }, { "epoch": 1.5201717433570967, "grad_norm": 0.051297880709171295, "learning_rate": 0.00013776947657410328, "loss": 0.2571, "step": 18765 }, { "epoch": 1.520252754374595, "grad_norm": 0.05652231723070145, "learning_rate": 0.00013776497592150864, "loss": 0.3263, "step": 18766 }, { "epoch": 1.5203337653920932, "grad_norm": 0.041101183742284775, "learning_rate": 0.000137760475268914, "loss": 0.2426, "step": 18767 }, { "epoch": 1.5204147764095917, "grad_norm": 0.047826338559389114, "learning_rate": 0.00013775597461631938, "loss": 0.2619, "step": 18768 }, { "epoch": 1.5204957874270901, "grad_norm": 0.04008450359106064, "learning_rate": 0.00013775147396372474, "loss": 0.2381, "step": 18769 }, { "epoch": 1.5205767984445884, "grad_norm": 0.057629168033599854, "learning_rate": 0.0001377469733111301, "loss": 0.2777, "step": 18770 }, { "epoch": 1.5206578094620868, "grad_norm": 0.050094276666641235, "learning_rate": 0.00013774247265853552, "loss": 0.262, "step": 18771 }, { "epoch": 1.5207388204795853, "grad_norm": 0.05211780220270157, "learning_rate": 0.00013773797200594088, "loss": 0.3132, "step": 18772 }, { "epoch": 1.5208198314970836, "grad_norm": 0.05060528963804245, "learning_rate": 0.00013773347135334624, "loss": 0.2381, "step": 18773 }, { "epoch": 1.520900842514582, "grad_norm": 0.047037769109010696, "learning_rate": 0.00013772897070075162, "loss": 0.3041, "step": 18774 }, { "epoch": 1.5209818535320805, "grad_norm": 0.05610048770904541, "learning_rate": 0.00013772447004815698, "loss": 0.2731, "step": 18775 }, { "epoch": 1.5210628645495787, "grad_norm": 0.05277544632554054, "learning_rate": 0.00013771996939556234, "loss": 0.3047, "step": 18776 }, { "epoch": 1.521143875567077, "grad_norm": 0.047693025320768356, "learning_rate": 0.00013771546874296776, "loss": 0.2823, "step": 18777 }, { "epoch": 1.5212248865845757, "grad_norm": 0.050775110721588135, "learning_rate": 0.00013771096809037312, "loss": 0.2768, "step": 18778 }, { "epoch": 1.521305897602074, "grad_norm": 0.058379460126161575, "learning_rate": 0.00013770646743777848, "loss": 0.2978, "step": 18779 }, { "epoch": 1.5213869086195722, "grad_norm": 0.053059257566928864, "learning_rate": 0.00013770196678518387, "loss": 0.2895, "step": 18780 }, { "epoch": 1.5214679196370706, "grad_norm": 0.04597696289420128, "learning_rate": 0.00013769746613258923, "loss": 0.3044, "step": 18781 }, { "epoch": 1.521548930654569, "grad_norm": 0.04516984522342682, "learning_rate": 0.00013769296547999459, "loss": 0.2924, "step": 18782 }, { "epoch": 1.5216299416720673, "grad_norm": 0.04692981392145157, "learning_rate": 0.0001376884648274, "loss": 0.2911, "step": 18783 }, { "epoch": 1.5217109526895658, "grad_norm": 0.049040842801332474, "learning_rate": 0.00013768396417480536, "loss": 0.2863, "step": 18784 }, { "epoch": 1.5217919637070643, "grad_norm": 0.047845106571912766, "learning_rate": 0.00013767946352221072, "loss": 0.2823, "step": 18785 }, { "epoch": 1.5218729747245625, "grad_norm": 0.047967370599508286, "learning_rate": 0.0001376749628696161, "loss": 0.3035, "step": 18786 }, { "epoch": 1.5219539857420608, "grad_norm": 0.047568053007125854, "learning_rate": 0.00013767046221702147, "loss": 0.2859, "step": 18787 }, { "epoch": 1.5220349967595594, "grad_norm": 0.046472664922475815, "learning_rate": 0.00013766596156442683, "loss": 0.2749, "step": 18788 }, { "epoch": 1.5221160077770577, "grad_norm": 0.04872645437717438, "learning_rate": 0.00013766146091183224, "loss": 0.2833, "step": 18789 }, { "epoch": 1.522197018794556, "grad_norm": 0.05547712370753288, "learning_rate": 0.0001376569602592376, "loss": 0.2678, "step": 18790 }, { "epoch": 1.5222780298120544, "grad_norm": 0.046784743666648865, "learning_rate": 0.00013765245960664296, "loss": 0.2886, "step": 18791 }, { "epoch": 1.5223590408295529, "grad_norm": 0.048536427319049835, "learning_rate": 0.00013764795895404835, "loss": 0.2632, "step": 18792 }, { "epoch": 1.5224400518470511, "grad_norm": 0.05491678789258003, "learning_rate": 0.0001376434583014537, "loss": 0.2646, "step": 18793 }, { "epoch": 1.5225210628645496, "grad_norm": 0.04058699309825897, "learning_rate": 0.0001376389576488591, "loss": 0.2609, "step": 18794 }, { "epoch": 1.522602073882048, "grad_norm": 0.06221689283847809, "learning_rate": 0.00013763445699626448, "loss": 0.2851, "step": 18795 }, { "epoch": 1.5226830848995463, "grad_norm": 0.05283937230706215, "learning_rate": 0.00013762995634366984, "loss": 0.3339, "step": 18796 }, { "epoch": 1.5227640959170448, "grad_norm": 0.05966833978891373, "learning_rate": 0.0001376254556910752, "loss": 0.2732, "step": 18797 }, { "epoch": 1.5228451069345432, "grad_norm": 0.047837648540735245, "learning_rate": 0.0001376209550384806, "loss": 0.2711, "step": 18798 }, { "epoch": 1.5229261179520415, "grad_norm": 0.05150927975773811, "learning_rate": 0.00013761645438588595, "loss": 0.2826, "step": 18799 }, { "epoch": 1.5230071289695397, "grad_norm": 0.053468842059373856, "learning_rate": 0.00013761195373329134, "loss": 0.2756, "step": 18800 }, { "epoch": 1.5230881399870384, "grad_norm": 0.04660942032933235, "learning_rate": 0.00013760745308069673, "loss": 0.2749, "step": 18801 }, { "epoch": 1.5231691510045366, "grad_norm": 0.05439922958612442, "learning_rate": 0.00013760295242810209, "loss": 0.2701, "step": 18802 }, { "epoch": 1.523250162022035, "grad_norm": 0.06845547258853912, "learning_rate": 0.00013759845177550745, "loss": 0.3109, "step": 18803 }, { "epoch": 1.5233311730395334, "grad_norm": 0.07089272886514664, "learning_rate": 0.00013759395112291283, "loss": 0.301, "step": 18804 }, { "epoch": 1.5234121840570318, "grad_norm": 0.05223238095641136, "learning_rate": 0.0001375894504703182, "loss": 0.2846, "step": 18805 }, { "epoch": 1.52349319507453, "grad_norm": 0.06159967929124832, "learning_rate": 0.00013758494981772358, "loss": 0.2898, "step": 18806 }, { "epoch": 1.5235742060920285, "grad_norm": 0.048389732837677, "learning_rate": 0.00013758044916512897, "loss": 0.293, "step": 18807 }, { "epoch": 1.523655217109527, "grad_norm": 0.04446495324373245, "learning_rate": 0.00013757594851253433, "loss": 0.2769, "step": 18808 }, { "epoch": 1.5237362281270252, "grad_norm": 0.05596426874399185, "learning_rate": 0.0001375714478599397, "loss": 0.3366, "step": 18809 }, { "epoch": 1.5238172391445235, "grad_norm": 0.04864346235990524, "learning_rate": 0.00013756694720734507, "loss": 0.3128, "step": 18810 }, { "epoch": 1.5238982501620222, "grad_norm": 0.042249538004398346, "learning_rate": 0.00013756244655475043, "loss": 0.2685, "step": 18811 }, { "epoch": 1.5239792611795204, "grad_norm": 0.05110679939389229, "learning_rate": 0.00013755794590215582, "loss": 0.282, "step": 18812 }, { "epoch": 1.5240602721970187, "grad_norm": 0.05548878759145737, "learning_rate": 0.0001375534452495612, "loss": 0.3002, "step": 18813 }, { "epoch": 1.5241412832145171, "grad_norm": 0.04173737019300461, "learning_rate": 0.00013754894459696657, "loss": 0.2399, "step": 18814 }, { "epoch": 1.5242222942320156, "grad_norm": 0.0537974052131176, "learning_rate": 0.00013754444394437193, "loss": 0.2716, "step": 18815 }, { "epoch": 1.5243033052495139, "grad_norm": 0.05061505362391472, "learning_rate": 0.00013753994329177732, "loss": 0.2845, "step": 18816 }, { "epoch": 1.5243843162670123, "grad_norm": 0.055333398282527924, "learning_rate": 0.0001375354426391827, "loss": 0.3048, "step": 18817 }, { "epoch": 1.5244653272845108, "grad_norm": 0.05376095324754715, "learning_rate": 0.00013753094198658806, "loss": 0.3129, "step": 18818 }, { "epoch": 1.524546338302009, "grad_norm": 0.053078003227710724, "learning_rate": 0.00013752644133399345, "loss": 0.3172, "step": 18819 }, { "epoch": 1.5246273493195075, "grad_norm": 0.05460762605071068, "learning_rate": 0.0001375219406813988, "loss": 0.3039, "step": 18820 }, { "epoch": 1.524708360337006, "grad_norm": 0.055081307888031006, "learning_rate": 0.00013751744002880417, "loss": 0.2923, "step": 18821 }, { "epoch": 1.5247893713545042, "grad_norm": 0.055108651518821716, "learning_rate": 0.00013751293937620956, "loss": 0.2926, "step": 18822 }, { "epoch": 1.5248703823720025, "grad_norm": 0.050001360476017, "learning_rate": 0.00013750843872361494, "loss": 0.2775, "step": 18823 }, { "epoch": 1.524951393389501, "grad_norm": 0.042685020714998245, "learning_rate": 0.0001375039380710203, "loss": 0.2413, "step": 18824 }, { "epoch": 1.5250324044069994, "grad_norm": 0.055367305874824524, "learning_rate": 0.0001374994374184257, "loss": 0.2962, "step": 18825 }, { "epoch": 1.5251134154244976, "grad_norm": 0.05210021138191223, "learning_rate": 0.00013749493676583105, "loss": 0.3033, "step": 18826 }, { "epoch": 1.525194426441996, "grad_norm": 0.04955118149518967, "learning_rate": 0.0001374904361132364, "loss": 0.277, "step": 18827 }, { "epoch": 1.5252754374594946, "grad_norm": 0.04781867936253548, "learning_rate": 0.0001374859354606418, "loss": 0.2971, "step": 18828 }, { "epoch": 1.5253564484769928, "grad_norm": 0.05286426097154617, "learning_rate": 0.00013748143480804719, "loss": 0.2948, "step": 18829 }, { "epoch": 1.5254374594944913, "grad_norm": 0.04718099907040596, "learning_rate": 0.00013747693415545255, "loss": 0.2907, "step": 18830 }, { "epoch": 1.5255184705119897, "grad_norm": 0.05359350144863129, "learning_rate": 0.00013747243350285793, "loss": 0.2927, "step": 18831 }, { "epoch": 1.525599481529488, "grad_norm": 0.06179993227124214, "learning_rate": 0.0001374679328502633, "loss": 0.3047, "step": 18832 }, { "epoch": 1.5256804925469862, "grad_norm": 0.048131536692380905, "learning_rate": 0.00013746343219766865, "loss": 0.294, "step": 18833 }, { "epoch": 1.525761503564485, "grad_norm": 0.04800281301140785, "learning_rate": 0.00013745893154507404, "loss": 0.2949, "step": 18834 }, { "epoch": 1.5258425145819832, "grad_norm": 0.03917567431926727, "learning_rate": 0.00013745443089247943, "loss": 0.2439, "step": 18835 }, { "epoch": 1.5259235255994814, "grad_norm": 0.047527339309453964, "learning_rate": 0.0001374499302398848, "loss": 0.3206, "step": 18836 }, { "epoch": 1.5260045366169799, "grad_norm": 0.04575043171644211, "learning_rate": 0.00013744542958729018, "loss": 0.2978, "step": 18837 }, { "epoch": 1.5260855476344783, "grad_norm": 0.04156474769115448, "learning_rate": 0.00013744092893469554, "loss": 0.2449, "step": 18838 }, { "epoch": 1.5261665586519766, "grad_norm": 0.0502975694835186, "learning_rate": 0.0001374364282821009, "loss": 0.2913, "step": 18839 }, { "epoch": 1.526247569669475, "grad_norm": 0.048595551401376724, "learning_rate": 0.00013743192762950628, "loss": 0.2757, "step": 18840 }, { "epoch": 1.5263285806869735, "grad_norm": 0.049081310629844666, "learning_rate": 0.00013742742697691167, "loss": 0.2851, "step": 18841 }, { "epoch": 1.5264095917044718, "grad_norm": 0.0556858591735363, "learning_rate": 0.00013742292632431703, "loss": 0.2821, "step": 18842 }, { "epoch": 1.5264906027219702, "grad_norm": 0.053837850689888, "learning_rate": 0.00013741842567172242, "loss": 0.3199, "step": 18843 }, { "epoch": 1.5265716137394687, "grad_norm": 0.055824149399995804, "learning_rate": 0.00013741392501912778, "loss": 0.2556, "step": 18844 }, { "epoch": 1.526652624756967, "grad_norm": 0.05074144899845123, "learning_rate": 0.00013740942436653314, "loss": 0.2915, "step": 18845 }, { "epoch": 1.5267336357744652, "grad_norm": 0.057336222380399704, "learning_rate": 0.00013740492371393855, "loss": 0.27, "step": 18846 }, { "epoch": 1.5268146467919637, "grad_norm": 0.054322924464941025, "learning_rate": 0.0001374004230613439, "loss": 0.3092, "step": 18847 }, { "epoch": 1.5268956578094621, "grad_norm": 0.04016140475869179, "learning_rate": 0.00013739592240874927, "loss": 0.2597, "step": 18848 }, { "epoch": 1.5269766688269604, "grad_norm": 0.05268358811736107, "learning_rate": 0.00013739142175615466, "loss": 0.2882, "step": 18849 }, { "epoch": 1.5270576798444588, "grad_norm": 0.059245605021715164, "learning_rate": 0.00013738692110356002, "loss": 0.3328, "step": 18850 }, { "epoch": 1.5271386908619573, "grad_norm": 0.05614931508898735, "learning_rate": 0.00013738242045096538, "loss": 0.383, "step": 18851 }, { "epoch": 1.5272197018794555, "grad_norm": 0.05188550800085068, "learning_rate": 0.0001373779197983708, "loss": 0.2679, "step": 18852 }, { "epoch": 1.527300712896954, "grad_norm": 0.049801215529441833, "learning_rate": 0.00013737341914577615, "loss": 0.2703, "step": 18853 }, { "epoch": 1.5273817239144525, "grad_norm": 0.046407975256443024, "learning_rate": 0.0001373689184931815, "loss": 0.2531, "step": 18854 }, { "epoch": 1.5274627349319507, "grad_norm": 0.051928356289863586, "learning_rate": 0.0001373644178405869, "loss": 0.3049, "step": 18855 }, { "epoch": 1.527543745949449, "grad_norm": 0.04042185842990875, "learning_rate": 0.00013735991718799226, "loss": 0.2441, "step": 18856 }, { "epoch": 1.5276247569669477, "grad_norm": 0.04542936757206917, "learning_rate": 0.00013735541653539762, "loss": 0.3054, "step": 18857 }, { "epoch": 1.527705767984446, "grad_norm": 0.044426001608371735, "learning_rate": 0.00013735091588280303, "loss": 0.3067, "step": 18858 }, { "epoch": 1.5277867790019442, "grad_norm": 0.0555790439248085, "learning_rate": 0.0001373464152302084, "loss": 0.301, "step": 18859 }, { "epoch": 1.5278677900194426, "grad_norm": 0.045266322791576385, "learning_rate": 0.00013734191457761375, "loss": 0.2609, "step": 18860 }, { "epoch": 1.527948801036941, "grad_norm": 0.04733710736036301, "learning_rate": 0.00013733741392501914, "loss": 0.2707, "step": 18861 }, { "epoch": 1.5280298120544393, "grad_norm": 0.051505301147699356, "learning_rate": 0.0001373329132724245, "loss": 0.295, "step": 18862 }, { "epoch": 1.5281108230719378, "grad_norm": 0.04492027685046196, "learning_rate": 0.00013732841261982986, "loss": 0.3219, "step": 18863 }, { "epoch": 1.5281918340894363, "grad_norm": 0.04752475023269653, "learning_rate": 0.00013732391196723528, "loss": 0.2978, "step": 18864 }, { "epoch": 1.5282728451069345, "grad_norm": 0.04412756860256195, "learning_rate": 0.00013731941131464064, "loss": 0.2908, "step": 18865 }, { "epoch": 1.528353856124433, "grad_norm": 0.045019157230854034, "learning_rate": 0.000137314910662046, "loss": 0.2837, "step": 18866 }, { "epoch": 1.5284348671419314, "grad_norm": 0.05006730556488037, "learning_rate": 0.00013731041000945138, "loss": 0.3036, "step": 18867 }, { "epoch": 1.5285158781594297, "grad_norm": 0.04813380166888237, "learning_rate": 0.00013730590935685674, "loss": 0.2999, "step": 18868 }, { "epoch": 1.528596889176928, "grad_norm": 0.052330706268548965, "learning_rate": 0.00013730140870426213, "loss": 0.2947, "step": 18869 }, { "epoch": 1.5286779001944264, "grad_norm": 0.05194742977619171, "learning_rate": 0.00013729690805166752, "loss": 0.2825, "step": 18870 }, { "epoch": 1.5287589112119249, "grad_norm": 0.052682504057884216, "learning_rate": 0.00013729240739907288, "loss": 0.3046, "step": 18871 }, { "epoch": 1.528839922229423, "grad_norm": 0.0457267127931118, "learning_rate": 0.00013728790674647824, "loss": 0.2767, "step": 18872 }, { "epoch": 1.5289209332469216, "grad_norm": 0.05539494380354881, "learning_rate": 0.00013728340609388362, "loss": 0.2687, "step": 18873 }, { "epoch": 1.52900194426442, "grad_norm": 0.04881107062101364, "learning_rate": 0.00013727890544128898, "loss": 0.2965, "step": 18874 }, { "epoch": 1.5290829552819183, "grad_norm": 0.05195486173033714, "learning_rate": 0.00013727440478869437, "loss": 0.3044, "step": 18875 }, { "epoch": 1.5291639662994168, "grad_norm": 0.03716466948390007, "learning_rate": 0.00013726990413609976, "loss": 0.2507, "step": 18876 }, { "epoch": 1.5292449773169152, "grad_norm": 0.05379246175289154, "learning_rate": 0.00013726540348350512, "loss": 0.3013, "step": 18877 }, { "epoch": 1.5293259883344135, "grad_norm": 0.04568881914019585, "learning_rate": 0.00013726090283091048, "loss": 0.2533, "step": 18878 }, { "epoch": 1.5294069993519117, "grad_norm": 0.05620739236474037, "learning_rate": 0.00013725640217831587, "loss": 0.2854, "step": 18879 }, { "epoch": 1.5294880103694104, "grad_norm": 0.053102824836969376, "learning_rate": 0.00013725190152572123, "loss": 0.2861, "step": 18880 }, { "epoch": 1.5295690213869086, "grad_norm": 0.046912338584661484, "learning_rate": 0.00013724740087312661, "loss": 0.2486, "step": 18881 }, { "epoch": 1.529650032404407, "grad_norm": 0.04645490646362305, "learning_rate": 0.000137242900220532, "loss": 0.3039, "step": 18882 }, { "epoch": 1.5297310434219054, "grad_norm": 0.04673031345009804, "learning_rate": 0.00013723839956793736, "loss": 0.2477, "step": 18883 }, { "epoch": 1.5298120544394038, "grad_norm": 0.04913533106446266, "learning_rate": 0.00013723389891534272, "loss": 0.2624, "step": 18884 }, { "epoch": 1.529893065456902, "grad_norm": 0.04520237073302269, "learning_rate": 0.0001372293982627481, "loss": 0.2663, "step": 18885 }, { "epoch": 1.5299740764744005, "grad_norm": 0.048991698771715164, "learning_rate": 0.00013722489761015347, "loss": 0.2824, "step": 18886 }, { "epoch": 1.530055087491899, "grad_norm": 0.05292793735861778, "learning_rate": 0.00013722039695755886, "loss": 0.2912, "step": 18887 }, { "epoch": 1.5301360985093972, "grad_norm": 0.0511743500828743, "learning_rate": 0.00013721589630496424, "loss": 0.3158, "step": 18888 }, { "epoch": 1.5302171095268955, "grad_norm": 0.05098418518900871, "learning_rate": 0.0001372113956523696, "loss": 0.2752, "step": 18889 }, { "epoch": 1.5302981205443942, "grad_norm": 0.04406699538230896, "learning_rate": 0.00013720689499977496, "loss": 0.2852, "step": 18890 }, { "epoch": 1.5303791315618924, "grad_norm": 0.04527914896607399, "learning_rate": 0.00013720239434718035, "loss": 0.317, "step": 18891 }, { "epoch": 1.5304601425793907, "grad_norm": 0.049641575664281845, "learning_rate": 0.0001371978936945857, "loss": 0.2888, "step": 18892 }, { "epoch": 1.5305411535968891, "grad_norm": 0.06409697979688644, "learning_rate": 0.0001371933930419911, "loss": 0.3378, "step": 18893 }, { "epoch": 1.5306221646143876, "grad_norm": 0.04698451608419418, "learning_rate": 0.00013718889238939648, "loss": 0.2883, "step": 18894 }, { "epoch": 1.5307031756318858, "grad_norm": 0.04558132216334343, "learning_rate": 0.00013718439173680184, "loss": 0.2941, "step": 18895 }, { "epoch": 1.5307841866493843, "grad_norm": 0.04831308871507645, "learning_rate": 0.0001371798910842072, "loss": 0.2903, "step": 18896 }, { "epoch": 1.5308651976668828, "grad_norm": 0.05204075947403908, "learning_rate": 0.0001371753904316126, "loss": 0.2744, "step": 18897 }, { "epoch": 1.530946208684381, "grad_norm": 0.043518051505088806, "learning_rate": 0.00013717088977901798, "loss": 0.2612, "step": 18898 }, { "epoch": 1.5310272197018795, "grad_norm": 0.04689870402216911, "learning_rate": 0.00013716638912642334, "loss": 0.2682, "step": 18899 }, { "epoch": 1.531108230719378, "grad_norm": 0.040445707738399506, "learning_rate": 0.00013716188847382873, "loss": 0.2518, "step": 18900 }, { "epoch": 1.5311892417368762, "grad_norm": 0.0630839467048645, "learning_rate": 0.00013715738782123409, "loss": 0.3021, "step": 18901 }, { "epoch": 1.5312702527543745, "grad_norm": 0.05061734840273857, "learning_rate": 0.00013715288716863945, "loss": 0.3078, "step": 18902 }, { "epoch": 1.5313512637718731, "grad_norm": 0.05375457555055618, "learning_rate": 0.00013714838651604483, "loss": 0.2956, "step": 18903 }, { "epoch": 1.5314322747893714, "grad_norm": 0.043118610978126526, "learning_rate": 0.00013714388586345022, "loss": 0.2787, "step": 18904 }, { "epoch": 1.5315132858068696, "grad_norm": 0.04651355743408203, "learning_rate": 0.00013713938521085558, "loss": 0.3131, "step": 18905 }, { "epoch": 1.531594296824368, "grad_norm": 0.04447954148054123, "learning_rate": 0.00013713488455826097, "loss": 0.2437, "step": 18906 }, { "epoch": 1.5316753078418666, "grad_norm": 0.04906405508518219, "learning_rate": 0.00013713038390566633, "loss": 0.3045, "step": 18907 }, { "epoch": 1.5317563188593648, "grad_norm": 0.04233681783080101, "learning_rate": 0.0001371258832530717, "loss": 0.2579, "step": 18908 }, { "epoch": 1.5318373298768633, "grad_norm": 0.05201317369937897, "learning_rate": 0.00013712138260047707, "loss": 0.2673, "step": 18909 }, { "epoch": 1.5319183408943617, "grad_norm": 0.04613606631755829, "learning_rate": 0.00013711688194788246, "loss": 0.2893, "step": 18910 }, { "epoch": 1.53199935191186, "grad_norm": 0.04684216529130936, "learning_rate": 0.00013711238129528782, "loss": 0.2955, "step": 18911 }, { "epoch": 1.5320803629293582, "grad_norm": 0.05115676671266556, "learning_rate": 0.0001371078806426932, "loss": 0.2879, "step": 18912 }, { "epoch": 1.532161373946857, "grad_norm": 0.04565107822418213, "learning_rate": 0.00013710337999009857, "loss": 0.2841, "step": 18913 }, { "epoch": 1.5322423849643552, "grad_norm": 0.0487804040312767, "learning_rate": 0.00013709887933750393, "loss": 0.2481, "step": 18914 }, { "epoch": 1.5323233959818534, "grad_norm": 0.060450732707977295, "learning_rate": 0.00013709437868490932, "loss": 0.3028, "step": 18915 }, { "epoch": 1.5324044069993519, "grad_norm": 0.05281240865588188, "learning_rate": 0.0001370898780323147, "loss": 0.3199, "step": 18916 }, { "epoch": 1.5324854180168503, "grad_norm": 0.053525056689977646, "learning_rate": 0.00013708537737972006, "loss": 0.3291, "step": 18917 }, { "epoch": 1.5325664290343486, "grad_norm": 0.053674425929784775, "learning_rate": 0.00013708087672712545, "loss": 0.2716, "step": 18918 }, { "epoch": 1.532647440051847, "grad_norm": 0.05458691343665123, "learning_rate": 0.0001370763760745308, "loss": 0.2458, "step": 18919 }, { "epoch": 1.5327284510693455, "grad_norm": 0.05632995441555977, "learning_rate": 0.00013707187542193617, "loss": 0.3023, "step": 18920 }, { "epoch": 1.5328094620868438, "grad_norm": 0.04286112263798714, "learning_rate": 0.00013706737476934158, "loss": 0.2662, "step": 18921 }, { "epoch": 1.5328904731043422, "grad_norm": 0.05146702751517296, "learning_rate": 0.00013706287411674694, "loss": 0.2854, "step": 18922 }, { "epoch": 1.5329714841218407, "grad_norm": 0.05799100548028946, "learning_rate": 0.0001370583734641523, "loss": 0.3491, "step": 18923 }, { "epoch": 1.533052495139339, "grad_norm": 0.05911766365170479, "learning_rate": 0.0001370538728115577, "loss": 0.2962, "step": 18924 }, { "epoch": 1.5331335061568372, "grad_norm": 0.047158051282167435, "learning_rate": 0.00013704937215896305, "loss": 0.3015, "step": 18925 }, { "epoch": 1.5332145171743357, "grad_norm": 0.04690094664692879, "learning_rate": 0.0001370448715063684, "loss": 0.2827, "step": 18926 }, { "epoch": 1.5332955281918341, "grad_norm": 0.0472753569483757, "learning_rate": 0.00013704037085377383, "loss": 0.286, "step": 18927 }, { "epoch": 1.5333765392093324, "grad_norm": 0.051584504544734955, "learning_rate": 0.0001370358702011792, "loss": 0.2563, "step": 18928 }, { "epoch": 1.5334575502268308, "grad_norm": 0.04185318574309349, "learning_rate": 0.00013703136954858455, "loss": 0.2376, "step": 18929 }, { "epoch": 1.5335385612443293, "grad_norm": 0.047741927206516266, "learning_rate": 0.00013702686889598993, "loss": 0.2723, "step": 18930 }, { "epoch": 1.5336195722618275, "grad_norm": 0.04596942290663719, "learning_rate": 0.0001370223682433953, "loss": 0.2821, "step": 18931 }, { "epoch": 1.533700583279326, "grad_norm": 0.041286639869213104, "learning_rate": 0.00013701786759080065, "loss": 0.2836, "step": 18932 }, { "epoch": 1.5337815942968245, "grad_norm": 0.048663485795259476, "learning_rate": 0.00013701336693820607, "loss": 0.3046, "step": 18933 }, { "epoch": 1.5338626053143227, "grad_norm": 0.05503995344042778, "learning_rate": 0.00013700886628561143, "loss": 0.2844, "step": 18934 }, { "epoch": 1.533943616331821, "grad_norm": 0.04451589286327362, "learning_rate": 0.0001370043656330168, "loss": 0.2738, "step": 18935 }, { "epoch": 1.5340246273493197, "grad_norm": 0.04549826681613922, "learning_rate": 0.00013699986498042218, "loss": 0.2936, "step": 18936 }, { "epoch": 1.534105638366818, "grad_norm": 0.046200018376111984, "learning_rate": 0.00013699536432782754, "loss": 0.3045, "step": 18937 }, { "epoch": 1.5341866493843161, "grad_norm": 0.05498679727315903, "learning_rate": 0.0001369908636752329, "loss": 0.3277, "step": 18938 }, { "epoch": 1.5342676604018146, "grad_norm": 0.057283565402030945, "learning_rate": 0.0001369863630226383, "loss": 0.2978, "step": 18939 }, { "epoch": 1.534348671419313, "grad_norm": 0.05207530036568642, "learning_rate": 0.00013698186237004367, "loss": 0.2682, "step": 18940 }, { "epoch": 1.5344296824368113, "grad_norm": 0.05777692794799805, "learning_rate": 0.00013697736171744903, "loss": 0.319, "step": 18941 }, { "epoch": 1.5345106934543098, "grad_norm": 0.04652559012174606, "learning_rate": 0.00013697286106485442, "loss": 0.2649, "step": 18942 }, { "epoch": 1.5345917044718083, "grad_norm": 0.05349148064851761, "learning_rate": 0.00013696836041225978, "loss": 0.3342, "step": 18943 }, { "epoch": 1.5346727154893065, "grad_norm": 0.057243578135967255, "learning_rate": 0.00013696385975966514, "loss": 0.3424, "step": 18944 }, { "epoch": 1.534753726506805, "grad_norm": 0.051278337836265564, "learning_rate": 0.00013695935910707055, "loss": 0.294, "step": 18945 }, { "epoch": 1.5348347375243034, "grad_norm": 0.04856126755475998, "learning_rate": 0.0001369548584544759, "loss": 0.2559, "step": 18946 }, { "epoch": 1.5349157485418017, "grad_norm": 0.05090457201004028, "learning_rate": 0.00013695035780188127, "loss": 0.2822, "step": 18947 }, { "epoch": 1.5349967595593, "grad_norm": 0.05697944760322571, "learning_rate": 0.00013694585714928666, "loss": 0.2799, "step": 18948 }, { "epoch": 1.5350777705767984, "grad_norm": 0.04438178613781929, "learning_rate": 0.00013694135649669202, "loss": 0.2901, "step": 18949 }, { "epoch": 1.5351587815942969, "grad_norm": 0.05179465189576149, "learning_rate": 0.0001369368558440974, "loss": 0.2867, "step": 18950 }, { "epoch": 1.535239792611795, "grad_norm": 0.044153694063425064, "learning_rate": 0.0001369323551915028, "loss": 0.2616, "step": 18951 }, { "epoch": 1.5353208036292936, "grad_norm": 0.05822839215397835, "learning_rate": 0.00013692785453890815, "loss": 0.3369, "step": 18952 }, { "epoch": 1.535401814646792, "grad_norm": 0.044818926602602005, "learning_rate": 0.0001369233538863135, "loss": 0.2568, "step": 18953 }, { "epoch": 1.5354828256642903, "grad_norm": 0.050371669232845306, "learning_rate": 0.0001369188532337189, "loss": 0.3114, "step": 18954 }, { "epoch": 1.5355638366817888, "grad_norm": 0.04291696846485138, "learning_rate": 0.00013691435258112426, "loss": 0.2632, "step": 18955 }, { "epoch": 1.5356448476992872, "grad_norm": 0.04437808692455292, "learning_rate": 0.00013690985192852965, "loss": 0.251, "step": 18956 }, { "epoch": 1.5357258587167855, "grad_norm": 0.04808518663048744, "learning_rate": 0.00013690535127593503, "loss": 0.2611, "step": 18957 }, { "epoch": 1.5358068697342837, "grad_norm": 0.0442044772207737, "learning_rate": 0.0001369008506233404, "loss": 0.3265, "step": 18958 }, { "epoch": 1.5358878807517824, "grad_norm": 0.04689866304397583, "learning_rate": 0.00013689634997074575, "loss": 0.307, "step": 18959 }, { "epoch": 1.5359688917692806, "grad_norm": 0.051713429391384125, "learning_rate": 0.00013689184931815114, "loss": 0.3164, "step": 18960 }, { "epoch": 1.5360499027867789, "grad_norm": 0.04567249119281769, "learning_rate": 0.0001368873486655565, "loss": 0.2978, "step": 18961 }, { "epoch": 1.5361309138042774, "grad_norm": 0.045768845826387405, "learning_rate": 0.0001368828480129619, "loss": 0.2633, "step": 18962 }, { "epoch": 1.5362119248217758, "grad_norm": 0.04977133870124817, "learning_rate": 0.00013687834736036728, "loss": 0.2855, "step": 18963 }, { "epoch": 1.536292935839274, "grad_norm": 0.0451236255466938, "learning_rate": 0.00013687384670777264, "loss": 0.3, "step": 18964 }, { "epoch": 1.5363739468567725, "grad_norm": 0.05996592342853546, "learning_rate": 0.000136869346055178, "loss": 0.2787, "step": 18965 }, { "epoch": 1.536454957874271, "grad_norm": 0.052222318947315216, "learning_rate": 0.00013686484540258338, "loss": 0.2754, "step": 18966 }, { "epoch": 1.5365359688917692, "grad_norm": 0.05358374863862991, "learning_rate": 0.00013686034474998874, "loss": 0.3048, "step": 18967 }, { "epoch": 1.5366169799092677, "grad_norm": 0.0475376695394516, "learning_rate": 0.00013685584409739413, "loss": 0.2867, "step": 18968 }, { "epoch": 1.5366979909267662, "grad_norm": 0.04334684833884239, "learning_rate": 0.00013685134344479952, "loss": 0.2459, "step": 18969 }, { "epoch": 1.5367790019442644, "grad_norm": 0.0507650263607502, "learning_rate": 0.00013684684279220488, "loss": 0.2975, "step": 18970 }, { "epoch": 1.5368600129617627, "grad_norm": 0.05293979123234749, "learning_rate": 0.00013684234213961024, "loss": 0.2737, "step": 18971 }, { "epoch": 1.5369410239792611, "grad_norm": 0.051188874989748, "learning_rate": 0.00013683784148701563, "loss": 0.2817, "step": 18972 }, { "epoch": 1.5370220349967596, "grad_norm": 0.05214584991335869, "learning_rate": 0.00013683334083442099, "loss": 0.2927, "step": 18973 }, { "epoch": 1.5371030460142578, "grad_norm": 0.05490950495004654, "learning_rate": 0.00013682884018182637, "loss": 0.3274, "step": 18974 }, { "epoch": 1.5371840570317563, "grad_norm": 0.05883365496993065, "learning_rate": 0.00013682433952923176, "loss": 0.3159, "step": 18975 }, { "epoch": 1.5372650680492548, "grad_norm": 0.058409273624420166, "learning_rate": 0.00013681983887663712, "loss": 0.3258, "step": 18976 }, { "epoch": 1.537346079066753, "grad_norm": 0.05302930995821953, "learning_rate": 0.00013681533822404248, "loss": 0.2753, "step": 18977 }, { "epoch": 1.5374270900842515, "grad_norm": 0.052349865436553955, "learning_rate": 0.00013681083757144787, "loss": 0.2985, "step": 18978 }, { "epoch": 1.53750810110175, "grad_norm": 0.045233648270368576, "learning_rate": 0.00013680633691885325, "loss": 0.2495, "step": 18979 }, { "epoch": 1.5375891121192482, "grad_norm": 0.053387340158224106, "learning_rate": 0.00013680183626625861, "loss": 0.2737, "step": 18980 }, { "epoch": 1.5376701231367464, "grad_norm": 0.05499692261219025, "learning_rate": 0.000136797335613664, "loss": 0.3166, "step": 18981 }, { "epoch": 1.5377511341542451, "grad_norm": 0.049109093844890594, "learning_rate": 0.00013679283496106936, "loss": 0.2929, "step": 18982 }, { "epoch": 1.5378321451717434, "grad_norm": 0.05261330306529999, "learning_rate": 0.00013678833430847472, "loss": 0.3063, "step": 18983 }, { "epoch": 1.5379131561892416, "grad_norm": 0.04580008238554001, "learning_rate": 0.0001367838336558801, "loss": 0.248, "step": 18984 }, { "epoch": 1.53799416720674, "grad_norm": 0.051151324063539505, "learning_rate": 0.0001367793330032855, "loss": 0.3288, "step": 18985 }, { "epoch": 1.5380751782242386, "grad_norm": 0.04817868024110794, "learning_rate": 0.00013677483235069086, "loss": 0.2552, "step": 18986 }, { "epoch": 1.5381561892417368, "grad_norm": 0.06041828915476799, "learning_rate": 0.00013677033169809624, "loss": 0.3565, "step": 18987 }, { "epoch": 1.5382372002592353, "grad_norm": 0.045897360891103745, "learning_rate": 0.0001367658310455016, "loss": 0.2806, "step": 18988 }, { "epoch": 1.5383182112767337, "grad_norm": 0.041819456964731216, "learning_rate": 0.00013676133039290696, "loss": 0.2634, "step": 18989 }, { "epoch": 1.538399222294232, "grad_norm": 0.05615771561861038, "learning_rate": 0.00013675682974031235, "loss": 0.2944, "step": 18990 }, { "epoch": 1.5384802333117304, "grad_norm": 0.047729093581438065, "learning_rate": 0.00013675232908771774, "loss": 0.2856, "step": 18991 }, { "epoch": 1.538561244329229, "grad_norm": 0.041438955813646317, "learning_rate": 0.0001367478284351231, "loss": 0.256, "step": 18992 }, { "epoch": 1.5386422553467272, "grad_norm": 0.04753054305911064, "learning_rate": 0.00013674332778252848, "loss": 0.2723, "step": 18993 }, { "epoch": 1.5387232663642254, "grad_norm": 0.055781301110982895, "learning_rate": 0.00013673882712993384, "loss": 0.2838, "step": 18994 }, { "epoch": 1.5388042773817239, "grad_norm": 0.046481624245643616, "learning_rate": 0.0001367343264773392, "loss": 0.298, "step": 18995 }, { "epoch": 1.5388852883992223, "grad_norm": 0.0620138980448246, "learning_rate": 0.0001367298258247446, "loss": 0.2808, "step": 18996 }, { "epoch": 1.5389662994167206, "grad_norm": 0.05862954258918762, "learning_rate": 0.00013672532517214998, "loss": 0.2687, "step": 18997 }, { "epoch": 1.539047310434219, "grad_norm": 0.051817674189805984, "learning_rate": 0.00013672082451955534, "loss": 0.2347, "step": 18998 }, { "epoch": 1.5391283214517175, "grad_norm": 0.04890400543808937, "learning_rate": 0.00013671632386696073, "loss": 0.2528, "step": 18999 }, { "epoch": 1.5392093324692158, "grad_norm": 0.06076984480023384, "learning_rate": 0.00013671182321436609, "loss": 0.3509, "step": 19000 }, { "epoch": 1.5392903434867142, "grad_norm": 0.046450335532426834, "learning_rate": 0.00013670732256177145, "loss": 0.3023, "step": 19001 }, { "epoch": 1.5393713545042127, "grad_norm": 0.043737445026636124, "learning_rate": 0.00013670282190917686, "loss": 0.2698, "step": 19002 }, { "epoch": 1.539452365521711, "grad_norm": 0.0688759982585907, "learning_rate": 0.00013669832125658222, "loss": 0.3433, "step": 19003 }, { "epoch": 1.5395333765392092, "grad_norm": 0.048470985144376755, "learning_rate": 0.00013669382060398758, "loss": 0.2475, "step": 19004 }, { "epoch": 1.5396143875567079, "grad_norm": 0.04027277231216431, "learning_rate": 0.00013668931995139297, "loss": 0.2401, "step": 19005 }, { "epoch": 1.5396953985742061, "grad_norm": 0.05675550922751427, "learning_rate": 0.00013668481929879833, "loss": 0.317, "step": 19006 }, { "epoch": 1.5397764095917044, "grad_norm": 0.04998868331313133, "learning_rate": 0.0001366803186462037, "loss": 0.3076, "step": 19007 }, { "epoch": 1.5398574206092028, "grad_norm": 0.04275386407971382, "learning_rate": 0.0001366758179936091, "loss": 0.26, "step": 19008 }, { "epoch": 1.5399384316267013, "grad_norm": 0.05047708749771118, "learning_rate": 0.00013667131734101446, "loss": 0.2602, "step": 19009 }, { "epoch": 1.5400194426441995, "grad_norm": 0.05603862553834915, "learning_rate": 0.00013666681668841982, "loss": 0.2823, "step": 19010 }, { "epoch": 1.540100453661698, "grad_norm": 0.0549708716571331, "learning_rate": 0.0001366623160358252, "loss": 0.2937, "step": 19011 }, { "epoch": 1.5401814646791965, "grad_norm": 0.06376846134662628, "learning_rate": 0.00013665781538323057, "loss": 0.3173, "step": 19012 }, { "epoch": 1.5402624756966947, "grad_norm": 0.05238895118236542, "learning_rate": 0.00013665331473063593, "loss": 0.2934, "step": 19013 }, { "epoch": 1.540343486714193, "grad_norm": 0.04790802299976349, "learning_rate": 0.00013664881407804134, "loss": 0.3099, "step": 19014 }, { "epoch": 1.5404244977316917, "grad_norm": 0.05614732950925827, "learning_rate": 0.0001366443134254467, "loss": 0.2684, "step": 19015 }, { "epoch": 1.54050550874919, "grad_norm": 0.044618602842092514, "learning_rate": 0.00013663981277285206, "loss": 0.2466, "step": 19016 }, { "epoch": 1.5405865197666881, "grad_norm": 0.04293638467788696, "learning_rate": 0.00013663531212025745, "loss": 0.2836, "step": 19017 }, { "epoch": 1.5406675307841866, "grad_norm": 0.044261954724788666, "learning_rate": 0.0001366308114676628, "loss": 0.269, "step": 19018 }, { "epoch": 1.540748541801685, "grad_norm": 0.048489660024642944, "learning_rate": 0.00013662631081506817, "loss": 0.2914, "step": 19019 }, { "epoch": 1.5408295528191833, "grad_norm": 0.04978737235069275, "learning_rate": 0.00013662181016247359, "loss": 0.2629, "step": 19020 }, { "epoch": 1.5409105638366818, "grad_norm": 0.04418829083442688, "learning_rate": 0.00013661730950987895, "loss": 0.2401, "step": 19021 }, { "epoch": 1.5409915748541803, "grad_norm": 0.05238080397248268, "learning_rate": 0.0001366128088572843, "loss": 0.2939, "step": 19022 }, { "epoch": 1.5410725858716785, "grad_norm": 0.05178670585155487, "learning_rate": 0.0001366083082046897, "loss": 0.3246, "step": 19023 }, { "epoch": 1.541153596889177, "grad_norm": 0.046030569821596146, "learning_rate": 0.00013660380755209505, "loss": 0.2812, "step": 19024 }, { "epoch": 1.5412346079066754, "grad_norm": 0.03960884362459183, "learning_rate": 0.0001365993068995004, "loss": 0.2723, "step": 19025 }, { "epoch": 1.5413156189241737, "grad_norm": 0.048268433660268784, "learning_rate": 0.00013659480624690583, "loss": 0.3143, "step": 19026 }, { "epoch": 1.541396629941672, "grad_norm": 0.05451720952987671, "learning_rate": 0.0001365903055943112, "loss": 0.2771, "step": 19027 }, { "epoch": 1.5414776409591704, "grad_norm": 0.05472222715616226, "learning_rate": 0.00013658580494171655, "loss": 0.2614, "step": 19028 }, { "epoch": 1.5415586519766689, "grad_norm": 0.05245329067111015, "learning_rate": 0.00013658130428912193, "loss": 0.3151, "step": 19029 }, { "epoch": 1.541639662994167, "grad_norm": 0.059196244925260544, "learning_rate": 0.0001365768036365273, "loss": 0.319, "step": 19030 }, { "epoch": 1.5417206740116656, "grad_norm": 0.05338888615369797, "learning_rate": 0.00013657230298393268, "loss": 0.2921, "step": 19031 }, { "epoch": 1.541801685029164, "grad_norm": 0.04819253832101822, "learning_rate": 0.00013656780233133807, "loss": 0.265, "step": 19032 }, { "epoch": 1.5418826960466623, "grad_norm": 0.05281459912657738, "learning_rate": 0.00013656330167874343, "loss": 0.2933, "step": 19033 }, { "epoch": 1.5419637070641607, "grad_norm": 0.043195176869630814, "learning_rate": 0.0001365588010261488, "loss": 0.2945, "step": 19034 }, { "epoch": 1.5420447180816592, "grad_norm": 0.049393683671951294, "learning_rate": 0.00013655430037355418, "loss": 0.3001, "step": 19035 }, { "epoch": 1.5421257290991575, "grad_norm": 0.04863373190164566, "learning_rate": 0.00013654979972095954, "loss": 0.2658, "step": 19036 }, { "epoch": 1.5422067401166557, "grad_norm": 0.05779989808797836, "learning_rate": 0.00013654529906836492, "loss": 0.2974, "step": 19037 }, { "epoch": 1.5422877511341544, "grad_norm": 0.04628223553299904, "learning_rate": 0.0001365407984157703, "loss": 0.27, "step": 19038 }, { "epoch": 1.5423687621516526, "grad_norm": 0.05051897466182709, "learning_rate": 0.00013653629776317567, "loss": 0.2776, "step": 19039 }, { "epoch": 1.5424497731691509, "grad_norm": 0.05451449379324913, "learning_rate": 0.00013653179711058103, "loss": 0.2728, "step": 19040 }, { "epoch": 1.5425307841866494, "grad_norm": 0.05515547841787338, "learning_rate": 0.00013652729645798642, "loss": 0.2856, "step": 19041 }, { "epoch": 1.5426117952041478, "grad_norm": 0.06005766987800598, "learning_rate": 0.00013652279580539178, "loss": 0.3029, "step": 19042 }, { "epoch": 1.542692806221646, "grad_norm": 0.0674663782119751, "learning_rate": 0.00013651829515279716, "loss": 0.2887, "step": 19043 }, { "epoch": 1.5427738172391445, "grad_norm": 0.0524708591401577, "learning_rate": 0.00013651379450020255, "loss": 0.2841, "step": 19044 }, { "epoch": 1.542854828256643, "grad_norm": 0.04781711474061012, "learning_rate": 0.0001365092938476079, "loss": 0.2882, "step": 19045 }, { "epoch": 1.5429358392741412, "grad_norm": 0.05105120688676834, "learning_rate": 0.00013650479319501327, "loss": 0.3169, "step": 19046 }, { "epoch": 1.5430168502916397, "grad_norm": 0.05355464294552803, "learning_rate": 0.00013650029254241866, "loss": 0.2709, "step": 19047 }, { "epoch": 1.5430978613091382, "grad_norm": 0.045331329107284546, "learning_rate": 0.00013649579188982402, "loss": 0.2627, "step": 19048 }, { "epoch": 1.5431788723266364, "grad_norm": 0.045703526586294174, "learning_rate": 0.0001364912912372294, "loss": 0.2689, "step": 19049 }, { "epoch": 1.5432598833441347, "grad_norm": 0.05270133540034294, "learning_rate": 0.0001364867905846348, "loss": 0.2694, "step": 19050 }, { "epoch": 1.5433408943616331, "grad_norm": 0.05833232030272484, "learning_rate": 0.00013648228993204015, "loss": 0.3095, "step": 19051 }, { "epoch": 1.5434219053791316, "grad_norm": 0.06126684695482254, "learning_rate": 0.0001364777892794455, "loss": 0.2659, "step": 19052 }, { "epoch": 1.5435029163966298, "grad_norm": 0.06940959393978119, "learning_rate": 0.0001364732886268509, "loss": 0.2783, "step": 19053 }, { "epoch": 1.5435839274141283, "grad_norm": 0.06364569813013077, "learning_rate": 0.0001364687879742563, "loss": 0.3155, "step": 19054 }, { "epoch": 1.5436649384316268, "grad_norm": 0.04655701667070389, "learning_rate": 0.00013646428732166165, "loss": 0.2484, "step": 19055 }, { "epoch": 1.543745949449125, "grad_norm": 0.05707899108529091, "learning_rate": 0.00013645978666906703, "loss": 0.2915, "step": 19056 }, { "epoch": 1.5438269604666235, "grad_norm": 0.05672769248485565, "learning_rate": 0.0001364552860164724, "loss": 0.2799, "step": 19057 }, { "epoch": 1.543907971484122, "grad_norm": 0.06137559935450554, "learning_rate": 0.00013645078536387775, "loss": 0.301, "step": 19058 }, { "epoch": 1.5439889825016202, "grad_norm": 0.051159653812646866, "learning_rate": 0.00013644628471128314, "loss": 0.2557, "step": 19059 }, { "epoch": 1.5440699935191184, "grad_norm": 0.047979846596717834, "learning_rate": 0.00013644178405868853, "loss": 0.2928, "step": 19060 }, { "epoch": 1.5441510045366171, "grad_norm": 0.04678960144519806, "learning_rate": 0.0001364372834060939, "loss": 0.3019, "step": 19061 }, { "epoch": 1.5442320155541154, "grad_norm": 0.053505491465330124, "learning_rate": 0.00013643278275349928, "loss": 0.2764, "step": 19062 }, { "epoch": 1.5443130265716136, "grad_norm": 0.04693808779120445, "learning_rate": 0.00013642828210090464, "loss": 0.2854, "step": 19063 }, { "epoch": 1.544394037589112, "grad_norm": 0.0580628328025341, "learning_rate": 0.00013642378144831, "loss": 0.2941, "step": 19064 }, { "epoch": 1.5444750486066106, "grad_norm": 0.06445372849702835, "learning_rate": 0.00013641928079571538, "loss": 0.322, "step": 19065 }, { "epoch": 1.5445560596241088, "grad_norm": 0.04730147495865822, "learning_rate": 0.00013641478014312077, "loss": 0.2475, "step": 19066 }, { "epoch": 1.5446370706416073, "grad_norm": 0.052395354956388474, "learning_rate": 0.00013641027949052613, "loss": 0.3442, "step": 19067 }, { "epoch": 1.5447180816591057, "grad_norm": 0.060794439166784286, "learning_rate": 0.00013640577883793152, "loss": 0.2869, "step": 19068 }, { "epoch": 1.544799092676604, "grad_norm": 0.062032055109739304, "learning_rate": 0.00013640127818533688, "loss": 0.2932, "step": 19069 }, { "epoch": 1.5448801036941024, "grad_norm": 0.04806280508637428, "learning_rate": 0.00013639677753274224, "loss": 0.3021, "step": 19070 }, { "epoch": 1.544961114711601, "grad_norm": 0.05165525898337364, "learning_rate": 0.00013639227688014763, "loss": 0.2776, "step": 19071 }, { "epoch": 1.5450421257290992, "grad_norm": 0.051210954785346985, "learning_rate": 0.000136387776227553, "loss": 0.3015, "step": 19072 }, { "epoch": 1.5451231367465974, "grad_norm": 0.059677209705114365, "learning_rate": 0.00013638327557495837, "loss": 0.2757, "step": 19073 }, { "epoch": 1.5452041477640959, "grad_norm": 0.04551670327782631, "learning_rate": 0.00013637877492236376, "loss": 0.2604, "step": 19074 }, { "epoch": 1.5452851587815943, "grad_norm": 0.05329824611544609, "learning_rate": 0.00013637427426976912, "loss": 0.259, "step": 19075 }, { "epoch": 1.5453661697990926, "grad_norm": 0.05617989972233772, "learning_rate": 0.00013636977361717448, "loss": 0.3109, "step": 19076 }, { "epoch": 1.545447180816591, "grad_norm": 0.04428596794605255, "learning_rate": 0.00013636527296457987, "loss": 0.2748, "step": 19077 }, { "epoch": 1.5455281918340895, "grad_norm": 0.057310551404953, "learning_rate": 0.00013636077231198525, "loss": 0.267, "step": 19078 }, { "epoch": 1.5456092028515878, "grad_norm": 0.043585795909166336, "learning_rate": 0.00013635627165939061, "loss": 0.2734, "step": 19079 }, { "epoch": 1.5456902138690862, "grad_norm": 0.05618955194950104, "learning_rate": 0.000136351771006796, "loss": 0.3172, "step": 19080 }, { "epoch": 1.5457712248865847, "grad_norm": 0.0505533441901207, "learning_rate": 0.00013634727035420136, "loss": 0.2817, "step": 19081 }, { "epoch": 1.545852235904083, "grad_norm": 0.04784523695707321, "learning_rate": 0.00013634276970160672, "loss": 0.2702, "step": 19082 }, { "epoch": 1.5459332469215812, "grad_norm": 0.04198610410094261, "learning_rate": 0.00013633826904901214, "loss": 0.2668, "step": 19083 }, { "epoch": 1.5460142579390799, "grad_norm": 0.05166913568973541, "learning_rate": 0.0001363337683964175, "loss": 0.2733, "step": 19084 }, { "epoch": 1.5460952689565781, "grad_norm": 0.053657166659832, "learning_rate": 0.00013632926774382286, "loss": 0.2881, "step": 19085 }, { "epoch": 1.5461762799740764, "grad_norm": 0.05604879558086395, "learning_rate": 0.00013632476709122824, "loss": 0.287, "step": 19086 }, { "epoch": 1.5462572909915748, "grad_norm": 0.049701027572155, "learning_rate": 0.0001363202664386336, "loss": 0.3058, "step": 19087 }, { "epoch": 1.5463383020090733, "grad_norm": 0.04624010622501373, "learning_rate": 0.00013631576578603896, "loss": 0.2654, "step": 19088 }, { "epoch": 1.5464193130265715, "grad_norm": 0.052133120596408844, "learning_rate": 0.00013631126513344438, "loss": 0.2648, "step": 19089 }, { "epoch": 1.54650032404407, "grad_norm": 0.04207282140851021, "learning_rate": 0.00013630676448084974, "loss": 0.2635, "step": 19090 }, { "epoch": 1.5465813350615685, "grad_norm": 0.04825148731470108, "learning_rate": 0.0001363022638282551, "loss": 0.2842, "step": 19091 }, { "epoch": 1.5466623460790667, "grad_norm": 0.04433571919798851, "learning_rate": 0.00013629776317566048, "loss": 0.2424, "step": 19092 }, { "epoch": 1.5467433570965652, "grad_norm": 0.05187336727976799, "learning_rate": 0.00013629326252306584, "loss": 0.3201, "step": 19093 }, { "epoch": 1.5468243681140637, "grad_norm": 0.05122842639684677, "learning_rate": 0.0001362887618704712, "loss": 0.2831, "step": 19094 }, { "epoch": 1.546905379131562, "grad_norm": 0.042315494269132614, "learning_rate": 0.00013628426121787662, "loss": 0.2557, "step": 19095 }, { "epoch": 1.5469863901490601, "grad_norm": 0.05235358327627182, "learning_rate": 0.00013627976056528198, "loss": 0.2863, "step": 19096 }, { "epoch": 1.5470674011665586, "grad_norm": 0.04973796010017395, "learning_rate": 0.00013627525991268734, "loss": 0.2852, "step": 19097 }, { "epoch": 1.547148412184057, "grad_norm": 0.05600909888744354, "learning_rate": 0.00013627075926009273, "loss": 0.3081, "step": 19098 }, { "epoch": 1.5472294232015553, "grad_norm": 0.05479155853390694, "learning_rate": 0.00013626625860749809, "loss": 0.2887, "step": 19099 }, { "epoch": 1.5473104342190538, "grad_norm": 0.04682200029492378, "learning_rate": 0.00013626175795490345, "loss": 0.2567, "step": 19100 }, { "epoch": 1.5473914452365523, "grad_norm": 0.04580902308225632, "learning_rate": 0.00013625725730230886, "loss": 0.2551, "step": 19101 }, { "epoch": 1.5474724562540505, "grad_norm": 0.047862354665994644, "learning_rate": 0.00013625275664971422, "loss": 0.2854, "step": 19102 }, { "epoch": 1.547553467271549, "grad_norm": 0.058732111006975174, "learning_rate": 0.00013624825599711958, "loss": 0.2954, "step": 19103 }, { "epoch": 1.5476344782890474, "grad_norm": 0.05385908856987953, "learning_rate": 0.00013624375534452497, "loss": 0.2788, "step": 19104 }, { "epoch": 1.5477154893065457, "grad_norm": 0.05026691034436226, "learning_rate": 0.00013623925469193033, "loss": 0.2716, "step": 19105 }, { "epoch": 1.547796500324044, "grad_norm": 0.05301313474774361, "learning_rate": 0.0001362347540393357, "loss": 0.3034, "step": 19106 }, { "epoch": 1.5478775113415426, "grad_norm": 0.0488019734621048, "learning_rate": 0.0001362302533867411, "loss": 0.3002, "step": 19107 }, { "epoch": 1.5479585223590409, "grad_norm": 0.05719370022416115, "learning_rate": 0.00013622575273414646, "loss": 0.2721, "step": 19108 }, { "epoch": 1.548039533376539, "grad_norm": 0.05312721058726311, "learning_rate": 0.00013622125208155182, "loss": 0.3002, "step": 19109 }, { "epoch": 1.5481205443940376, "grad_norm": 0.05826451629400253, "learning_rate": 0.0001362167514289572, "loss": 0.3005, "step": 19110 }, { "epoch": 1.548201555411536, "grad_norm": 0.046084482222795486, "learning_rate": 0.00013621225077636257, "loss": 0.3012, "step": 19111 }, { "epoch": 1.5482825664290343, "grad_norm": 0.05233805999159813, "learning_rate": 0.00013620775012376796, "loss": 0.2818, "step": 19112 }, { "epoch": 1.5483635774465327, "grad_norm": 0.05259188264608383, "learning_rate": 0.00013620324947117334, "loss": 0.2608, "step": 19113 }, { "epoch": 1.5484445884640312, "grad_norm": 0.05448652058839798, "learning_rate": 0.0001361987488185787, "loss": 0.2965, "step": 19114 }, { "epoch": 1.5485255994815295, "grad_norm": 0.055655933916568756, "learning_rate": 0.00013619424816598406, "loss": 0.2867, "step": 19115 }, { "epoch": 1.5486066104990277, "grad_norm": 0.05749504640698433, "learning_rate": 0.00013618974751338945, "loss": 0.3009, "step": 19116 }, { "epoch": 1.5486876215165264, "grad_norm": 0.05507310479879379, "learning_rate": 0.0001361852468607948, "loss": 0.2982, "step": 19117 }, { "epoch": 1.5487686325340246, "grad_norm": 0.0514855682849884, "learning_rate": 0.0001361807462082002, "loss": 0.2551, "step": 19118 }, { "epoch": 1.5488496435515229, "grad_norm": 0.03809289261698723, "learning_rate": 0.00013617624555560559, "loss": 0.2445, "step": 19119 }, { "epoch": 1.5489306545690213, "grad_norm": 0.04524914175271988, "learning_rate": 0.00013617174490301095, "loss": 0.2473, "step": 19120 }, { "epoch": 1.5490116655865198, "grad_norm": 0.04850262776017189, "learning_rate": 0.0001361672442504163, "loss": 0.2869, "step": 19121 }, { "epoch": 1.549092676604018, "grad_norm": 0.04947483539581299, "learning_rate": 0.0001361627435978217, "loss": 0.2706, "step": 19122 }, { "epoch": 1.5491736876215165, "grad_norm": 0.04799078777432442, "learning_rate": 0.00013615824294522705, "loss": 0.2938, "step": 19123 }, { "epoch": 1.549254698639015, "grad_norm": 0.058111388236284256, "learning_rate": 0.00013615374229263244, "loss": 0.2702, "step": 19124 }, { "epoch": 1.5493357096565132, "grad_norm": 0.055344365537166595, "learning_rate": 0.00013614924164003783, "loss": 0.3008, "step": 19125 }, { "epoch": 1.5494167206740117, "grad_norm": 0.04902368038892746, "learning_rate": 0.0001361447409874432, "loss": 0.2692, "step": 19126 }, { "epoch": 1.5494977316915102, "grad_norm": 0.04455145075917244, "learning_rate": 0.00013614024033484855, "loss": 0.3004, "step": 19127 }, { "epoch": 1.5495787427090084, "grad_norm": 0.04834412783384323, "learning_rate": 0.00013613573968225393, "loss": 0.2658, "step": 19128 }, { "epoch": 1.5496597537265067, "grad_norm": 0.04589447006583214, "learning_rate": 0.0001361312390296593, "loss": 0.3171, "step": 19129 }, { "epoch": 1.5497407647440054, "grad_norm": 0.04412047564983368, "learning_rate": 0.00013612673837706468, "loss": 0.299, "step": 19130 }, { "epoch": 1.5498217757615036, "grad_norm": 0.042024120688438416, "learning_rate": 0.00013612223772447007, "loss": 0.2811, "step": 19131 }, { "epoch": 1.5499027867790018, "grad_norm": 0.04573988541960716, "learning_rate": 0.00013611773707187543, "loss": 0.3066, "step": 19132 }, { "epoch": 1.5499837977965003, "grad_norm": 0.044483110308647156, "learning_rate": 0.0001361132364192808, "loss": 0.2522, "step": 19133 }, { "epoch": 1.5500648088139988, "grad_norm": 0.050241101533174515, "learning_rate": 0.00013610873576668618, "loss": 0.3022, "step": 19134 }, { "epoch": 1.550145819831497, "grad_norm": 0.05022579804062843, "learning_rate": 0.00013610423511409156, "loss": 0.3209, "step": 19135 }, { "epoch": 1.5502268308489955, "grad_norm": 0.045545876026153564, "learning_rate": 0.00013609973446149692, "loss": 0.2856, "step": 19136 }, { "epoch": 1.550307841866494, "grad_norm": 0.04494267329573631, "learning_rate": 0.0001360952338089023, "loss": 0.2814, "step": 19137 }, { "epoch": 1.5503888528839922, "grad_norm": 0.050585806369781494, "learning_rate": 0.00013609073315630767, "loss": 0.3115, "step": 19138 }, { "epoch": 1.5504698639014904, "grad_norm": 0.05736057087779045, "learning_rate": 0.00013608623250371303, "loss": 0.2699, "step": 19139 }, { "epoch": 1.5505508749189891, "grad_norm": 0.04995905980467796, "learning_rate": 0.00013608173185111842, "loss": 0.2802, "step": 19140 }, { "epoch": 1.5506318859364874, "grad_norm": 0.05028783157467842, "learning_rate": 0.0001360772311985238, "loss": 0.3247, "step": 19141 }, { "epoch": 1.5507128969539856, "grad_norm": 0.05537936091423035, "learning_rate": 0.00013607273054592916, "loss": 0.2982, "step": 19142 }, { "epoch": 1.550793907971484, "grad_norm": 0.049491897225379944, "learning_rate": 0.00013606822989333455, "loss": 0.3079, "step": 19143 }, { "epoch": 1.5508749189889826, "grad_norm": 0.042341284453868866, "learning_rate": 0.0001360637292407399, "loss": 0.289, "step": 19144 }, { "epoch": 1.5509559300064808, "grad_norm": 0.05065532401204109, "learning_rate": 0.00013605922858814527, "loss": 0.3116, "step": 19145 }, { "epoch": 1.5510369410239793, "grad_norm": 0.0424003005027771, "learning_rate": 0.00013605472793555066, "loss": 0.253, "step": 19146 }, { "epoch": 1.5511179520414777, "grad_norm": 0.05388662964105606, "learning_rate": 0.00013605022728295605, "loss": 0.3141, "step": 19147 }, { "epoch": 1.551198963058976, "grad_norm": 0.06315270066261292, "learning_rate": 0.0001360457266303614, "loss": 0.293, "step": 19148 }, { "epoch": 1.5512799740764744, "grad_norm": 0.05217668414115906, "learning_rate": 0.0001360412259777668, "loss": 0.2742, "step": 19149 }, { "epoch": 1.551360985093973, "grad_norm": 0.04942014440894127, "learning_rate": 0.00013603672532517215, "loss": 0.2802, "step": 19150 }, { "epoch": 1.5514419961114712, "grad_norm": 0.05215258523821831, "learning_rate": 0.00013603222467257751, "loss": 0.2602, "step": 19151 }, { "epoch": 1.5515230071289694, "grad_norm": 0.05975009500980377, "learning_rate": 0.0001360277240199829, "loss": 0.3159, "step": 19152 }, { "epoch": 1.5516040181464679, "grad_norm": 0.05319590866565704, "learning_rate": 0.0001360232233673883, "loss": 0.3004, "step": 19153 }, { "epoch": 1.5516850291639663, "grad_norm": 0.05167746916413307, "learning_rate": 0.00013601872271479365, "loss": 0.2637, "step": 19154 }, { "epoch": 1.5517660401814646, "grad_norm": 0.05901259556412697, "learning_rate": 0.00013601422206219904, "loss": 0.3177, "step": 19155 }, { "epoch": 1.551847051198963, "grad_norm": 0.051348935812711716, "learning_rate": 0.0001360097214096044, "loss": 0.2617, "step": 19156 }, { "epoch": 1.5519280622164615, "grad_norm": 0.055392563343048096, "learning_rate": 0.00013600522075700976, "loss": 0.2699, "step": 19157 }, { "epoch": 1.5520090732339598, "grad_norm": 0.05384603887796402, "learning_rate": 0.00013600072010441514, "loss": 0.2647, "step": 19158 }, { "epoch": 1.5520900842514582, "grad_norm": 0.05610581114888191, "learning_rate": 0.00013599621945182053, "loss": 0.3151, "step": 19159 }, { "epoch": 1.5521710952689567, "grad_norm": 0.04605025053024292, "learning_rate": 0.0001359917187992259, "loss": 0.244, "step": 19160 }, { "epoch": 1.552252106286455, "grad_norm": 0.045179493725299835, "learning_rate": 0.00013598721814663128, "loss": 0.2821, "step": 19161 }, { "epoch": 1.5523331173039532, "grad_norm": 0.04526267200708389, "learning_rate": 0.00013598271749403664, "loss": 0.2885, "step": 19162 }, { "epoch": 1.5524141283214519, "grad_norm": 0.04857548326253891, "learning_rate": 0.000135978216841442, "loss": 0.3125, "step": 19163 }, { "epoch": 1.5524951393389501, "grad_norm": 0.051715102046728134, "learning_rate": 0.0001359737161888474, "loss": 0.2556, "step": 19164 }, { "epoch": 1.5525761503564484, "grad_norm": 0.05461445078253746, "learning_rate": 0.00013596921553625277, "loss": 0.2873, "step": 19165 }, { "epoch": 1.5526571613739468, "grad_norm": 0.044770218431949615, "learning_rate": 0.00013596471488365813, "loss": 0.2829, "step": 19166 }, { "epoch": 1.5527381723914453, "grad_norm": 0.044490501284599304, "learning_rate": 0.00013596021423106352, "loss": 0.3031, "step": 19167 }, { "epoch": 1.5528191834089435, "grad_norm": 0.046804603189229965, "learning_rate": 0.00013595571357846888, "loss": 0.2768, "step": 19168 }, { "epoch": 1.552900194426442, "grad_norm": 0.04840075969696045, "learning_rate": 0.00013595121292587424, "loss": 0.2539, "step": 19169 }, { "epoch": 1.5529812054439405, "grad_norm": 0.05297040939331055, "learning_rate": 0.00013594671227327965, "loss": 0.3076, "step": 19170 }, { "epoch": 1.5530622164614387, "grad_norm": 0.050705526024103165, "learning_rate": 0.000135942211620685, "loss": 0.2785, "step": 19171 }, { "epoch": 1.5531432274789372, "grad_norm": 0.053877465426921844, "learning_rate": 0.00013593771096809037, "loss": 0.2833, "step": 19172 }, { "epoch": 1.5532242384964356, "grad_norm": 0.04980199784040451, "learning_rate": 0.00013593321031549576, "loss": 0.2553, "step": 19173 }, { "epoch": 1.553305249513934, "grad_norm": 0.050213202834129333, "learning_rate": 0.00013592870966290112, "loss": 0.2904, "step": 19174 }, { "epoch": 1.5533862605314321, "grad_norm": 0.04667943716049194, "learning_rate": 0.00013592420901030648, "loss": 0.2767, "step": 19175 }, { "epoch": 1.5534672715489306, "grad_norm": 0.04889126121997833, "learning_rate": 0.0001359197083577119, "loss": 0.2961, "step": 19176 }, { "epoch": 1.553548282566429, "grad_norm": 0.04985184967517853, "learning_rate": 0.00013591520770511725, "loss": 0.2659, "step": 19177 }, { "epoch": 1.5536292935839273, "grad_norm": 0.0674809142947197, "learning_rate": 0.00013591070705252261, "loss": 0.3292, "step": 19178 }, { "epoch": 1.5537103046014258, "grad_norm": 0.05704353377223015, "learning_rate": 0.000135906206399928, "loss": 0.294, "step": 19179 }, { "epoch": 1.5537913156189243, "grad_norm": 0.049726054072380066, "learning_rate": 0.00013590170574733336, "loss": 0.2835, "step": 19180 }, { "epoch": 1.5538723266364225, "grad_norm": 0.044575802981853485, "learning_rate": 0.00013589720509473872, "loss": 0.3227, "step": 19181 }, { "epoch": 1.553953337653921, "grad_norm": 0.04700697213411331, "learning_rate": 0.00013589270444214414, "loss": 0.2811, "step": 19182 }, { "epoch": 1.5540343486714194, "grad_norm": 0.05900681018829346, "learning_rate": 0.0001358882037895495, "loss": 0.3242, "step": 19183 }, { "epoch": 1.5541153596889177, "grad_norm": 0.05375407263636589, "learning_rate": 0.00013588370313695486, "loss": 0.2843, "step": 19184 }, { "epoch": 1.554196370706416, "grad_norm": 0.04696367681026459, "learning_rate": 0.00013587920248436024, "loss": 0.2742, "step": 19185 }, { "epoch": 1.5542773817239146, "grad_norm": 0.051585521548986435, "learning_rate": 0.0001358747018317656, "loss": 0.3225, "step": 19186 }, { "epoch": 1.5543583927414129, "grad_norm": 0.05937698483467102, "learning_rate": 0.000135870201179171, "loss": 0.3203, "step": 19187 }, { "epoch": 1.554439403758911, "grad_norm": 0.05009043589234352, "learning_rate": 0.00013586570052657638, "loss": 0.3167, "step": 19188 }, { "epoch": 1.5545204147764096, "grad_norm": 0.05212171748280525, "learning_rate": 0.00013586119987398174, "loss": 0.2756, "step": 19189 }, { "epoch": 1.554601425793908, "grad_norm": 0.04361918941140175, "learning_rate": 0.0001358566992213871, "loss": 0.2835, "step": 19190 }, { "epoch": 1.5546824368114063, "grad_norm": 0.05416923388838768, "learning_rate": 0.00013585219856879248, "loss": 0.3242, "step": 19191 }, { "epoch": 1.5547634478289047, "grad_norm": 0.0524650439620018, "learning_rate": 0.00013584769791619784, "loss": 0.2967, "step": 19192 }, { "epoch": 1.5548444588464032, "grad_norm": 0.052620600908994675, "learning_rate": 0.00013584319726360323, "loss": 0.2755, "step": 19193 }, { "epoch": 1.5549254698639015, "grad_norm": 0.05024907365441322, "learning_rate": 0.00013583869661100862, "loss": 0.2904, "step": 19194 }, { "epoch": 1.5550064808814, "grad_norm": 0.04997064173221588, "learning_rate": 0.00013583419595841398, "loss": 0.2797, "step": 19195 }, { "epoch": 1.5550874918988984, "grad_norm": 0.05072508379817009, "learning_rate": 0.00013582969530581934, "loss": 0.2913, "step": 19196 }, { "epoch": 1.5551685029163966, "grad_norm": 0.04423803091049194, "learning_rate": 0.00013582519465322473, "loss": 0.237, "step": 19197 }, { "epoch": 1.5552495139338949, "grad_norm": 0.048984941095113754, "learning_rate": 0.0001358206940006301, "loss": 0.2689, "step": 19198 }, { "epoch": 1.5553305249513933, "grad_norm": 0.04901856929063797, "learning_rate": 0.00013581619334803547, "loss": 0.2846, "step": 19199 }, { "epoch": 1.5554115359688918, "grad_norm": 0.05620553344488144, "learning_rate": 0.00013581169269544086, "loss": 0.2786, "step": 19200 }, { "epoch": 1.55549254698639, "grad_norm": 0.06047564372420311, "learning_rate": 0.00013580719204284622, "loss": 0.2953, "step": 19201 }, { "epoch": 1.5555735580038885, "grad_norm": 0.049802668392658234, "learning_rate": 0.00013580269139025158, "loss": 0.282, "step": 19202 }, { "epoch": 1.555654569021387, "grad_norm": 0.04509425163269043, "learning_rate": 0.00013579819073765697, "loss": 0.2552, "step": 19203 }, { "epoch": 1.5557355800388852, "grad_norm": 0.053334422409534454, "learning_rate": 0.00013579369008506233, "loss": 0.2578, "step": 19204 }, { "epoch": 1.5558165910563837, "grad_norm": 0.05020352452993393, "learning_rate": 0.00013578918943246772, "loss": 0.3115, "step": 19205 }, { "epoch": 1.5558976020738822, "grad_norm": 0.05819111317396164, "learning_rate": 0.0001357846887798731, "loss": 0.2912, "step": 19206 }, { "epoch": 1.5559786130913804, "grad_norm": 0.06057784706354141, "learning_rate": 0.00013578018812727846, "loss": 0.3445, "step": 19207 }, { "epoch": 1.5560596241088787, "grad_norm": 0.050647422671318054, "learning_rate": 0.00013577568747468382, "loss": 0.3067, "step": 19208 }, { "epoch": 1.5561406351263773, "grad_norm": 0.05491769313812256, "learning_rate": 0.0001357711868220892, "loss": 0.3065, "step": 19209 }, { "epoch": 1.5562216461438756, "grad_norm": 0.0505046546459198, "learning_rate": 0.00013576668616949457, "loss": 0.2424, "step": 19210 }, { "epoch": 1.5563026571613738, "grad_norm": 0.05184290558099747, "learning_rate": 0.00013576218551689996, "loss": 0.3057, "step": 19211 }, { "epoch": 1.5563836681788723, "grad_norm": 0.050715941935777664, "learning_rate": 0.00013575768486430534, "loss": 0.3058, "step": 19212 }, { "epoch": 1.5564646791963708, "grad_norm": 0.049122605472803116, "learning_rate": 0.0001357531842117107, "loss": 0.3018, "step": 19213 }, { "epoch": 1.556545690213869, "grad_norm": 0.0555947870016098, "learning_rate": 0.00013574868355911606, "loss": 0.3167, "step": 19214 }, { "epoch": 1.5566267012313675, "grad_norm": 0.046853404492139816, "learning_rate": 0.00013574418290652145, "loss": 0.2695, "step": 19215 }, { "epoch": 1.556707712248866, "grad_norm": 0.05228375643491745, "learning_rate": 0.00013573968225392684, "loss": 0.3139, "step": 19216 }, { "epoch": 1.5567887232663642, "grad_norm": 0.05127185583114624, "learning_rate": 0.0001357351816013322, "loss": 0.2837, "step": 19217 }, { "epoch": 1.5568697342838627, "grad_norm": 0.05675850808620453, "learning_rate": 0.00013573068094873759, "loss": 0.3022, "step": 19218 }, { "epoch": 1.5569507453013611, "grad_norm": 0.050446517765522, "learning_rate": 0.00013572618029614295, "loss": 0.2719, "step": 19219 }, { "epoch": 1.5570317563188594, "grad_norm": 0.06610075384378433, "learning_rate": 0.0001357216796435483, "loss": 0.3132, "step": 19220 }, { "epoch": 1.5571127673363576, "grad_norm": 0.046209342777729034, "learning_rate": 0.0001357171789909537, "loss": 0.262, "step": 19221 }, { "epoch": 1.557193778353856, "grad_norm": 0.05675473064184189, "learning_rate": 0.00013571267833835908, "loss": 0.2933, "step": 19222 }, { "epoch": 1.5572747893713546, "grad_norm": 0.05571332573890686, "learning_rate": 0.00013570817768576444, "loss": 0.2948, "step": 19223 }, { "epoch": 1.5573558003888528, "grad_norm": 0.06254886090755463, "learning_rate": 0.00013570367703316983, "loss": 0.3251, "step": 19224 }, { "epoch": 1.5574368114063513, "grad_norm": 0.05003569275140762, "learning_rate": 0.0001356991763805752, "loss": 0.3413, "step": 19225 }, { "epoch": 1.5575178224238497, "grad_norm": 0.048256766051054, "learning_rate": 0.00013569467572798055, "loss": 0.2815, "step": 19226 }, { "epoch": 1.557598833441348, "grad_norm": 0.05164124816656113, "learning_rate": 0.00013569017507538593, "loss": 0.278, "step": 19227 }, { "epoch": 1.5576798444588464, "grad_norm": 0.05035339668393135, "learning_rate": 0.00013568567442279132, "loss": 0.2995, "step": 19228 }, { "epoch": 1.557760855476345, "grad_norm": 0.037863705307245255, "learning_rate": 0.00013568117377019668, "loss": 0.2534, "step": 19229 }, { "epoch": 1.5578418664938432, "grad_norm": 0.048973795026540756, "learning_rate": 0.00013567667311760207, "loss": 0.2748, "step": 19230 }, { "epoch": 1.5579228775113414, "grad_norm": 0.043724387884140015, "learning_rate": 0.00013567217246500743, "loss": 0.2589, "step": 19231 }, { "epoch": 1.55800388852884, "grad_norm": 0.052051693201065063, "learning_rate": 0.0001356676718124128, "loss": 0.3129, "step": 19232 }, { "epoch": 1.5580848995463383, "grad_norm": 0.04797010496258736, "learning_rate": 0.00013566317115981818, "loss": 0.277, "step": 19233 }, { "epoch": 1.5581659105638366, "grad_norm": 0.04128566384315491, "learning_rate": 0.00013565867050722356, "loss": 0.2721, "step": 19234 }, { "epoch": 1.558246921581335, "grad_norm": 0.041017647832632065, "learning_rate": 0.00013565416985462892, "loss": 0.2811, "step": 19235 }, { "epoch": 1.5583279325988335, "grad_norm": 0.04845331236720085, "learning_rate": 0.0001356496692020343, "loss": 0.2946, "step": 19236 }, { "epoch": 1.5584089436163318, "grad_norm": 0.0455239862203598, "learning_rate": 0.00013564516854943967, "loss": 0.2714, "step": 19237 }, { "epoch": 1.5584899546338302, "grad_norm": 0.058418456465005875, "learning_rate": 0.00013564066789684503, "loss": 0.2962, "step": 19238 }, { "epoch": 1.5585709656513287, "grad_norm": 0.046155910938978195, "learning_rate": 0.00013563616724425044, "loss": 0.2602, "step": 19239 }, { "epoch": 1.558651976668827, "grad_norm": 0.05215545743703842, "learning_rate": 0.0001356316665916558, "loss": 0.2988, "step": 19240 }, { "epoch": 1.5587329876863252, "grad_norm": 0.04679625481367111, "learning_rate": 0.00013562716593906117, "loss": 0.2687, "step": 19241 }, { "epoch": 1.5588139987038239, "grad_norm": 0.055831775069236755, "learning_rate": 0.00013562266528646655, "loss": 0.3001, "step": 19242 }, { "epoch": 1.5588950097213221, "grad_norm": 0.05438535660505295, "learning_rate": 0.0001356181646338719, "loss": 0.3026, "step": 19243 }, { "epoch": 1.5589760207388204, "grad_norm": 0.04468968138098717, "learning_rate": 0.00013561366398127727, "loss": 0.2537, "step": 19244 }, { "epoch": 1.5590570317563188, "grad_norm": 0.06018408387899399, "learning_rate": 0.0001356091633286827, "loss": 0.3102, "step": 19245 }, { "epoch": 1.5591380427738173, "grad_norm": 0.050145700573921204, "learning_rate": 0.00013560466267608805, "loss": 0.2801, "step": 19246 }, { "epoch": 1.5592190537913155, "grad_norm": 0.07588254660367966, "learning_rate": 0.0001356001620234934, "loss": 0.3548, "step": 19247 }, { "epoch": 1.559300064808814, "grad_norm": 0.05625752732157707, "learning_rate": 0.0001355956613708988, "loss": 0.3069, "step": 19248 }, { "epoch": 1.5593810758263125, "grad_norm": 0.0515391044318676, "learning_rate": 0.00013559116071830415, "loss": 0.3146, "step": 19249 }, { "epoch": 1.5594620868438107, "grad_norm": 0.05147033929824829, "learning_rate": 0.00013558666006570951, "loss": 0.2481, "step": 19250 }, { "epoch": 1.5595430978613092, "grad_norm": 0.05439019575715065, "learning_rate": 0.00013558215941311493, "loss": 0.3062, "step": 19251 }, { "epoch": 1.5596241088788076, "grad_norm": 0.04817438870668411, "learning_rate": 0.0001355776587605203, "loss": 0.2924, "step": 19252 }, { "epoch": 1.559705119896306, "grad_norm": 0.059530194848775864, "learning_rate": 0.00013557315810792565, "loss": 0.3252, "step": 19253 }, { "epoch": 1.5597861309138041, "grad_norm": 0.051332466304302216, "learning_rate": 0.00013556865745533104, "loss": 0.2866, "step": 19254 }, { "epoch": 1.5598671419313026, "grad_norm": 0.04718302935361862, "learning_rate": 0.0001355641568027364, "loss": 0.2859, "step": 19255 }, { "epoch": 1.559948152948801, "grad_norm": 0.044280778616666794, "learning_rate": 0.00013555965615014176, "loss": 0.2865, "step": 19256 }, { "epoch": 1.5600291639662993, "grad_norm": 0.04140870273113251, "learning_rate": 0.00013555515549754717, "loss": 0.2405, "step": 19257 }, { "epoch": 1.5601101749837978, "grad_norm": 0.046388860791921616, "learning_rate": 0.00013555065484495253, "loss": 0.2829, "step": 19258 }, { "epoch": 1.5601911860012962, "grad_norm": 0.04737057164311409, "learning_rate": 0.0001355461541923579, "loss": 0.2739, "step": 19259 }, { "epoch": 1.5602721970187945, "grad_norm": 0.04849565029144287, "learning_rate": 0.00013554165353976328, "loss": 0.2338, "step": 19260 }, { "epoch": 1.560353208036293, "grad_norm": 0.05050482973456383, "learning_rate": 0.00013553715288716864, "loss": 0.2674, "step": 19261 }, { "epoch": 1.5604342190537914, "grad_norm": 0.04807204753160477, "learning_rate": 0.000135532652234574, "loss": 0.2819, "step": 19262 }, { "epoch": 1.5605152300712897, "grad_norm": 0.05020787939429283, "learning_rate": 0.0001355281515819794, "loss": 0.2694, "step": 19263 }, { "epoch": 1.560596241088788, "grad_norm": 0.044631484895944595, "learning_rate": 0.00013552365092938477, "loss": 0.2222, "step": 19264 }, { "epoch": 1.5606772521062866, "grad_norm": 0.0597679540514946, "learning_rate": 0.00013551915027679013, "loss": 0.3144, "step": 19265 }, { "epoch": 1.5607582631237849, "grad_norm": 0.049190703779459, "learning_rate": 0.00013551464962419552, "loss": 0.2971, "step": 19266 }, { "epoch": 1.560839274141283, "grad_norm": 0.05174947530031204, "learning_rate": 0.00013551014897160088, "loss": 0.2619, "step": 19267 }, { "epoch": 1.5609202851587816, "grad_norm": 0.053329844027757645, "learning_rate": 0.00013550564831900627, "loss": 0.2714, "step": 19268 }, { "epoch": 1.56100129617628, "grad_norm": 0.05406653881072998, "learning_rate": 0.00013550114766641165, "loss": 0.3264, "step": 19269 }, { "epoch": 1.5610823071937783, "grad_norm": 0.05473935604095459, "learning_rate": 0.000135496647013817, "loss": 0.2865, "step": 19270 }, { "epoch": 1.5611633182112767, "grad_norm": 0.05382101982831955, "learning_rate": 0.00013549214636122237, "loss": 0.2634, "step": 19271 }, { "epoch": 1.5612443292287752, "grad_norm": 0.06472937762737274, "learning_rate": 0.00013548764570862776, "loss": 0.2841, "step": 19272 }, { "epoch": 1.5613253402462735, "grad_norm": 0.04545477405190468, "learning_rate": 0.00013548314505603312, "loss": 0.2482, "step": 19273 }, { "epoch": 1.561406351263772, "grad_norm": 0.05186472088098526, "learning_rate": 0.0001354786444034385, "loss": 0.2807, "step": 19274 }, { "epoch": 1.5614873622812704, "grad_norm": 0.062175050377845764, "learning_rate": 0.0001354741437508439, "loss": 0.3009, "step": 19275 }, { "epoch": 1.5615683732987686, "grad_norm": 0.0523802749812603, "learning_rate": 0.00013546964309824925, "loss": 0.2879, "step": 19276 }, { "epoch": 1.5616493843162669, "grad_norm": 0.06917037814855576, "learning_rate": 0.00013546514244565461, "loss": 0.3506, "step": 19277 }, { "epoch": 1.5617303953337653, "grad_norm": 0.05295359715819359, "learning_rate": 0.00013546064179306, "loss": 0.3047, "step": 19278 }, { "epoch": 1.5618114063512638, "grad_norm": 0.05048537999391556, "learning_rate": 0.00013545614114046536, "loss": 0.3138, "step": 19279 }, { "epoch": 1.561892417368762, "grad_norm": 0.0454871729016304, "learning_rate": 0.00013545164048787075, "loss": 0.3103, "step": 19280 }, { "epoch": 1.5619734283862605, "grad_norm": 0.048254676163196564, "learning_rate": 0.00013544713983527614, "loss": 0.2625, "step": 19281 }, { "epoch": 1.562054439403759, "grad_norm": 0.052300721406936646, "learning_rate": 0.0001354426391826815, "loss": 0.2706, "step": 19282 }, { "epoch": 1.5621354504212572, "grad_norm": 0.04279913753271103, "learning_rate": 0.00013543813853008686, "loss": 0.2616, "step": 19283 }, { "epoch": 1.5622164614387557, "grad_norm": 0.04854980483651161, "learning_rate": 0.00013543363787749224, "loss": 0.2711, "step": 19284 }, { "epoch": 1.5622974724562542, "grad_norm": 0.06649527698755264, "learning_rate": 0.0001354291372248976, "loss": 0.2833, "step": 19285 }, { "epoch": 1.5623784834737524, "grad_norm": 0.04639168828725815, "learning_rate": 0.000135424636572303, "loss": 0.2559, "step": 19286 }, { "epoch": 1.5624594944912507, "grad_norm": 0.049177560955286026, "learning_rate": 0.00013542013591970838, "loss": 0.3056, "step": 19287 }, { "epoch": 1.5625405055087493, "grad_norm": 0.05000094324350357, "learning_rate": 0.00013541563526711374, "loss": 0.2694, "step": 19288 }, { "epoch": 1.5626215165262476, "grad_norm": 0.048982229083776474, "learning_rate": 0.0001354111346145191, "loss": 0.2824, "step": 19289 }, { "epoch": 1.5627025275437458, "grad_norm": 0.05096309632062912, "learning_rate": 0.00013540663396192449, "loss": 0.2573, "step": 19290 }, { "epoch": 1.5627835385612443, "grad_norm": 0.04594533517956734, "learning_rate": 0.00013540213330932985, "loss": 0.2853, "step": 19291 }, { "epoch": 1.5628645495787428, "grad_norm": 0.049910467118024826, "learning_rate": 0.00013539763265673523, "loss": 0.2632, "step": 19292 }, { "epoch": 1.562945560596241, "grad_norm": 0.05282336473464966, "learning_rate": 0.00013539313200414062, "loss": 0.2825, "step": 19293 }, { "epoch": 1.5630265716137395, "grad_norm": 0.04863569512963295, "learning_rate": 0.00013538863135154598, "loss": 0.3071, "step": 19294 }, { "epoch": 1.563107582631238, "grad_norm": 0.05358542501926422, "learning_rate": 0.00013538413069895134, "loss": 0.2847, "step": 19295 }, { "epoch": 1.5631885936487362, "grad_norm": 0.0446048267185688, "learning_rate": 0.00013537963004635673, "loss": 0.2695, "step": 19296 }, { "epoch": 1.5632696046662347, "grad_norm": 0.05163099244236946, "learning_rate": 0.00013537512939376211, "loss": 0.2833, "step": 19297 }, { "epoch": 1.5633506156837331, "grad_norm": 0.04648342728614807, "learning_rate": 0.00013537062874116747, "loss": 0.2864, "step": 19298 }, { "epoch": 1.5634316267012314, "grad_norm": 0.059515487402677536, "learning_rate": 0.00013536612808857286, "loss": 0.2898, "step": 19299 }, { "epoch": 1.5635126377187296, "grad_norm": 0.04971949756145477, "learning_rate": 0.00013536162743597822, "loss": 0.2834, "step": 19300 }, { "epoch": 1.563593648736228, "grad_norm": 0.05322429537773132, "learning_rate": 0.00013535712678338358, "loss": 0.2776, "step": 19301 }, { "epoch": 1.5636746597537265, "grad_norm": 0.051670897752046585, "learning_rate": 0.00013535262613078897, "loss": 0.2525, "step": 19302 }, { "epoch": 1.5637556707712248, "grad_norm": 0.043443743139505386, "learning_rate": 0.00013534812547819436, "loss": 0.2671, "step": 19303 }, { "epoch": 1.5638366817887233, "grad_norm": 0.0499313585460186, "learning_rate": 0.00013534362482559972, "loss": 0.3068, "step": 19304 }, { "epoch": 1.5639176928062217, "grad_norm": 0.057684704661369324, "learning_rate": 0.0001353391241730051, "loss": 0.3108, "step": 19305 }, { "epoch": 1.56399870382372, "grad_norm": 0.0660921260714531, "learning_rate": 0.00013533462352041046, "loss": 0.2984, "step": 19306 }, { "epoch": 1.5640797148412184, "grad_norm": 0.048565756529569626, "learning_rate": 0.00013533012286781582, "loss": 0.2767, "step": 19307 }, { "epoch": 1.564160725858717, "grad_norm": 0.04774582386016846, "learning_rate": 0.0001353256222152212, "loss": 0.2724, "step": 19308 }, { "epoch": 1.5642417368762151, "grad_norm": 0.048284415155649185, "learning_rate": 0.0001353211215626266, "loss": 0.2749, "step": 19309 }, { "epoch": 1.5643227478937134, "grad_norm": 0.04952572286128998, "learning_rate": 0.00013531662091003196, "loss": 0.304, "step": 19310 }, { "epoch": 1.564403758911212, "grad_norm": 0.047845348715782166, "learning_rate": 0.00013531212025743734, "loss": 0.2561, "step": 19311 }, { "epoch": 1.5644847699287103, "grad_norm": 0.04719892144203186, "learning_rate": 0.0001353076196048427, "loss": 0.3392, "step": 19312 }, { "epoch": 1.5645657809462086, "grad_norm": 0.045906711369752884, "learning_rate": 0.00013530311895224806, "loss": 0.2525, "step": 19313 }, { "epoch": 1.564646791963707, "grad_norm": 0.0455571748316288, "learning_rate": 0.00013529861829965345, "loss": 0.2435, "step": 19314 }, { "epoch": 1.5647278029812055, "grad_norm": 0.048583488911390305, "learning_rate": 0.00013529411764705884, "loss": 0.3033, "step": 19315 }, { "epoch": 1.5648088139987038, "grad_norm": 0.055878374725580215, "learning_rate": 0.0001352896169944642, "loss": 0.3001, "step": 19316 }, { "epoch": 1.5648898250162022, "grad_norm": 0.04683026298880577, "learning_rate": 0.00013528511634186959, "loss": 0.2882, "step": 19317 }, { "epoch": 1.5649708360337007, "grad_norm": 0.055208344012498856, "learning_rate": 0.00013528061568927495, "loss": 0.3108, "step": 19318 }, { "epoch": 1.565051847051199, "grad_norm": 0.06561359763145447, "learning_rate": 0.0001352761150366803, "loss": 0.2945, "step": 19319 }, { "epoch": 1.5651328580686974, "grad_norm": 0.05128493160009384, "learning_rate": 0.00013527161438408572, "loss": 0.2801, "step": 19320 }, { "epoch": 1.5652138690861959, "grad_norm": 0.04719381034374237, "learning_rate": 0.00013526711373149108, "loss": 0.2506, "step": 19321 }, { "epoch": 1.565294880103694, "grad_norm": 0.05548151209950447, "learning_rate": 0.00013526261307889644, "loss": 0.2729, "step": 19322 }, { "epoch": 1.5653758911211924, "grad_norm": 0.05958663299679756, "learning_rate": 0.00013525811242630183, "loss": 0.2758, "step": 19323 }, { "epoch": 1.5654569021386908, "grad_norm": 0.04910269379615784, "learning_rate": 0.0001352536117737072, "loss": 0.2814, "step": 19324 }, { "epoch": 1.5655379131561893, "grad_norm": 0.05134573206305504, "learning_rate": 0.00013524911112111255, "loss": 0.2896, "step": 19325 }, { "epoch": 1.5656189241736875, "grad_norm": 0.05062694102525711, "learning_rate": 0.00013524461046851796, "loss": 0.2615, "step": 19326 }, { "epoch": 1.565699935191186, "grad_norm": 0.05874665826559067, "learning_rate": 0.00013524010981592332, "loss": 0.2627, "step": 19327 }, { "epoch": 1.5657809462086845, "grad_norm": 0.05786015838384628, "learning_rate": 0.00013523560916332868, "loss": 0.2801, "step": 19328 }, { "epoch": 1.5658619572261827, "grad_norm": 0.05415690690279007, "learning_rate": 0.00013523110851073407, "loss": 0.3098, "step": 19329 }, { "epoch": 1.5659429682436812, "grad_norm": 0.05289224535226822, "learning_rate": 0.00013522660785813943, "loss": 0.3232, "step": 19330 }, { "epoch": 1.5660239792611796, "grad_norm": 0.04391355440020561, "learning_rate": 0.0001352221072055448, "loss": 0.2806, "step": 19331 }, { "epoch": 1.5661049902786779, "grad_norm": 0.04492342099547386, "learning_rate": 0.0001352176065529502, "loss": 0.2628, "step": 19332 }, { "epoch": 1.5661860012961761, "grad_norm": 0.047401949763298035, "learning_rate": 0.00013521310590035556, "loss": 0.3076, "step": 19333 }, { "epoch": 1.5662670123136748, "grad_norm": 0.05914795398712158, "learning_rate": 0.00013520860524776092, "loss": 0.2709, "step": 19334 }, { "epoch": 1.566348023331173, "grad_norm": 0.052210431545972824, "learning_rate": 0.0001352041045951663, "loss": 0.2864, "step": 19335 }, { "epoch": 1.5664290343486713, "grad_norm": 0.0464906245470047, "learning_rate": 0.00013519960394257167, "loss": 0.2522, "step": 19336 }, { "epoch": 1.5665100453661698, "grad_norm": 0.05061884596943855, "learning_rate": 0.00013519510328997703, "loss": 0.2815, "step": 19337 }, { "epoch": 1.5665910563836682, "grad_norm": 0.05345157906413078, "learning_rate": 0.00013519060263738245, "loss": 0.2781, "step": 19338 }, { "epoch": 1.5666720674011665, "grad_norm": 0.048576343804597855, "learning_rate": 0.0001351861019847878, "loss": 0.3225, "step": 19339 }, { "epoch": 1.566753078418665, "grad_norm": 0.05282697454094887, "learning_rate": 0.00013518160133219317, "loss": 0.2956, "step": 19340 }, { "epoch": 1.5668340894361634, "grad_norm": 0.05145876109600067, "learning_rate": 0.00013517710067959855, "loss": 0.2932, "step": 19341 }, { "epoch": 1.5669151004536617, "grad_norm": 0.05020388588309288, "learning_rate": 0.0001351726000270039, "loss": 0.2749, "step": 19342 }, { "epoch": 1.56699611147116, "grad_norm": 0.05083455145359039, "learning_rate": 0.00013516809937440927, "loss": 0.2608, "step": 19343 }, { "epoch": 1.5670771224886586, "grad_norm": 0.04994390159845352, "learning_rate": 0.0001351635987218147, "loss": 0.2783, "step": 19344 }, { "epoch": 1.5671581335061568, "grad_norm": 0.05404075235128403, "learning_rate": 0.00013515909806922005, "loss": 0.2697, "step": 19345 }, { "epoch": 1.567239144523655, "grad_norm": 0.05101686343550682, "learning_rate": 0.0001351545974166254, "loss": 0.2923, "step": 19346 }, { "epoch": 1.5673201555411536, "grad_norm": 0.04621126502752304, "learning_rate": 0.0001351500967640308, "loss": 0.3011, "step": 19347 }, { "epoch": 1.567401166558652, "grad_norm": 0.058542583137750626, "learning_rate": 0.00013514559611143615, "loss": 0.2787, "step": 19348 }, { "epoch": 1.5674821775761503, "grad_norm": 0.055109862238168716, "learning_rate": 0.00013514109545884154, "loss": 0.324, "step": 19349 }, { "epoch": 1.5675631885936487, "grad_norm": 0.042174868285655975, "learning_rate": 0.00013513659480624693, "loss": 0.2538, "step": 19350 }, { "epoch": 1.5676441996111472, "grad_norm": 0.05382600799202919, "learning_rate": 0.0001351320941536523, "loss": 0.283, "step": 19351 }, { "epoch": 1.5677252106286454, "grad_norm": 0.049266114830970764, "learning_rate": 0.00013512759350105765, "loss": 0.2964, "step": 19352 }, { "epoch": 1.567806221646144, "grad_norm": 0.04278041794896126, "learning_rate": 0.00013512309284846304, "loss": 0.2726, "step": 19353 }, { "epoch": 1.5678872326636424, "grad_norm": 0.05372599512338638, "learning_rate": 0.0001351185921958684, "loss": 0.3168, "step": 19354 }, { "epoch": 1.5679682436811406, "grad_norm": 0.05103114992380142, "learning_rate": 0.00013511409154327378, "loss": 0.2903, "step": 19355 }, { "epoch": 1.5680492546986389, "grad_norm": 0.053942613303661346, "learning_rate": 0.00013510959089067917, "loss": 0.2771, "step": 19356 }, { "epoch": 1.5681302657161373, "grad_norm": 0.04800404980778694, "learning_rate": 0.00013510509023808453, "loss": 0.3018, "step": 19357 }, { "epoch": 1.5682112767336358, "grad_norm": 0.04979465529322624, "learning_rate": 0.0001351005895854899, "loss": 0.2866, "step": 19358 }, { "epoch": 1.568292287751134, "grad_norm": 0.047334734350442886, "learning_rate": 0.00013509608893289528, "loss": 0.2854, "step": 19359 }, { "epoch": 1.5683732987686325, "grad_norm": 0.04779450595378876, "learning_rate": 0.00013509158828030064, "loss": 0.2819, "step": 19360 }, { "epoch": 1.568454309786131, "grad_norm": 0.0574072003364563, "learning_rate": 0.00013508708762770602, "loss": 0.2799, "step": 19361 }, { "epoch": 1.5685353208036292, "grad_norm": 0.0440792478621006, "learning_rate": 0.0001350825869751114, "loss": 0.2604, "step": 19362 }, { "epoch": 1.5686163318211277, "grad_norm": 0.06242513284087181, "learning_rate": 0.00013507808632251677, "loss": 0.3305, "step": 19363 }, { "epoch": 1.5686973428386262, "grad_norm": 0.06059674918651581, "learning_rate": 0.00013507358566992213, "loss": 0.2981, "step": 19364 }, { "epoch": 1.5687783538561244, "grad_norm": 0.04905066266655922, "learning_rate": 0.00013506908501732752, "loss": 0.2793, "step": 19365 }, { "epoch": 1.5688593648736227, "grad_norm": 0.04303139075636864, "learning_rate": 0.00013506458436473288, "loss": 0.2432, "step": 19366 }, { "epoch": 1.5689403758911213, "grad_norm": 0.04874027520418167, "learning_rate": 0.00013506008371213827, "loss": 0.2696, "step": 19367 }, { "epoch": 1.5690213869086196, "grad_norm": 0.05774131789803505, "learning_rate": 0.00013505558305954365, "loss": 0.2821, "step": 19368 }, { "epoch": 1.5691023979261178, "grad_norm": 0.04771944135427475, "learning_rate": 0.000135051082406949, "loss": 0.2732, "step": 19369 }, { "epoch": 1.5691834089436163, "grad_norm": 0.054138049483299255, "learning_rate": 0.00013504658175435437, "loss": 0.3147, "step": 19370 }, { "epoch": 1.5692644199611148, "grad_norm": 0.05368463322520256, "learning_rate": 0.00013504208110175976, "loss": 0.2969, "step": 19371 }, { "epoch": 1.569345430978613, "grad_norm": 0.05088932812213898, "learning_rate": 0.00013503758044916515, "loss": 0.2483, "step": 19372 }, { "epoch": 1.5694264419961115, "grad_norm": 0.049424633383750916, "learning_rate": 0.0001350330797965705, "loss": 0.2875, "step": 19373 }, { "epoch": 1.56950745301361, "grad_norm": 0.05756201595067978, "learning_rate": 0.0001350285791439759, "loss": 0.3183, "step": 19374 }, { "epoch": 1.5695884640311082, "grad_norm": 0.047381263226270676, "learning_rate": 0.00013502407849138125, "loss": 0.271, "step": 19375 }, { "epoch": 1.5696694750486067, "grad_norm": 0.053029779344797134, "learning_rate": 0.00013501957783878662, "loss": 0.299, "step": 19376 }, { "epoch": 1.5697504860661051, "grad_norm": 0.053522516041994095, "learning_rate": 0.000135015077186192, "loss": 0.3147, "step": 19377 }, { "epoch": 1.5698314970836034, "grad_norm": 0.04558708518743515, "learning_rate": 0.0001350105765335974, "loss": 0.2693, "step": 19378 }, { "epoch": 1.5699125081011016, "grad_norm": 0.055690888315439224, "learning_rate": 0.00013500607588100275, "loss": 0.3297, "step": 19379 }, { "epoch": 1.5699935191186, "grad_norm": 0.0678800642490387, "learning_rate": 0.00013500157522840814, "loss": 0.3371, "step": 19380 }, { "epoch": 1.5700745301360985, "grad_norm": 0.04769781231880188, "learning_rate": 0.0001349970745758135, "loss": 0.2699, "step": 19381 }, { "epoch": 1.5701555411535968, "grad_norm": 0.05501679331064224, "learning_rate": 0.00013499257392321886, "loss": 0.2611, "step": 19382 }, { "epoch": 1.5702365521710953, "grad_norm": 0.05800247937440872, "learning_rate": 0.00013498807327062424, "loss": 0.312, "step": 19383 }, { "epoch": 1.5703175631885937, "grad_norm": 0.05244293808937073, "learning_rate": 0.00013498357261802963, "loss": 0.2793, "step": 19384 }, { "epoch": 1.570398574206092, "grad_norm": 0.04592202231287956, "learning_rate": 0.000134979071965435, "loss": 0.2493, "step": 19385 }, { "epoch": 1.5704795852235904, "grad_norm": 0.059504128992557526, "learning_rate": 0.00013497457131284038, "loss": 0.3381, "step": 19386 }, { "epoch": 1.570560596241089, "grad_norm": 0.04630604386329651, "learning_rate": 0.00013497007066024574, "loss": 0.3126, "step": 19387 }, { "epoch": 1.5706416072585871, "grad_norm": 0.05985010787844658, "learning_rate": 0.0001349655700076511, "loss": 0.3084, "step": 19388 }, { "epoch": 1.5707226182760854, "grad_norm": 0.04947219043970108, "learning_rate": 0.00013496106935505649, "loss": 0.2638, "step": 19389 }, { "epoch": 1.570803629293584, "grad_norm": 0.05288197100162506, "learning_rate": 0.00013495656870246187, "loss": 0.2529, "step": 19390 }, { "epoch": 1.5708846403110823, "grad_norm": 0.04978443309664726, "learning_rate": 0.00013495206804986723, "loss": 0.2473, "step": 19391 }, { "epoch": 1.5709656513285806, "grad_norm": 0.05209624394774437, "learning_rate": 0.00013494756739727262, "loss": 0.2942, "step": 19392 }, { "epoch": 1.571046662346079, "grad_norm": 0.04603233560919762, "learning_rate": 0.00013494306674467798, "loss": 0.2777, "step": 19393 }, { "epoch": 1.5711276733635775, "grad_norm": 0.052153099328279495, "learning_rate": 0.00013493856609208334, "loss": 0.2729, "step": 19394 }, { "epoch": 1.5712086843810757, "grad_norm": 0.044548213481903076, "learning_rate": 0.00013493406543948873, "loss": 0.2765, "step": 19395 }, { "epoch": 1.5712896953985742, "grad_norm": 0.04247027635574341, "learning_rate": 0.00013492956478689411, "loss": 0.2657, "step": 19396 }, { "epoch": 1.5713707064160727, "grad_norm": 0.05424690991640091, "learning_rate": 0.00013492506413429947, "loss": 0.3083, "step": 19397 }, { "epoch": 1.571451717433571, "grad_norm": 0.05457203835248947, "learning_rate": 0.00013492056348170486, "loss": 0.2551, "step": 19398 }, { "epoch": 1.5715327284510694, "grad_norm": 0.054207317531108856, "learning_rate": 0.00013491606282911022, "loss": 0.3004, "step": 19399 }, { "epoch": 1.5716137394685679, "grad_norm": 0.05946018546819687, "learning_rate": 0.00013491156217651558, "loss": 0.3862, "step": 19400 }, { "epoch": 1.571694750486066, "grad_norm": 0.05369342491030693, "learning_rate": 0.000134907061523921, "loss": 0.2731, "step": 19401 }, { "epoch": 1.5717757615035644, "grad_norm": 0.04194348305463791, "learning_rate": 0.00013490256087132636, "loss": 0.2552, "step": 19402 }, { "epoch": 1.5718567725210628, "grad_norm": 0.048225175589323044, "learning_rate": 0.00013489806021873172, "loss": 0.2676, "step": 19403 }, { "epoch": 1.5719377835385613, "grad_norm": 0.04498755931854248, "learning_rate": 0.0001348935595661371, "loss": 0.2717, "step": 19404 }, { "epoch": 1.5720187945560595, "grad_norm": 0.048275288194417953, "learning_rate": 0.00013488905891354246, "loss": 0.2988, "step": 19405 }, { "epoch": 1.572099805573558, "grad_norm": 0.05003470182418823, "learning_rate": 0.00013488455826094782, "loss": 0.3039, "step": 19406 }, { "epoch": 1.5721808165910565, "grad_norm": 0.04922705143690109, "learning_rate": 0.00013488005760835324, "loss": 0.3035, "step": 19407 }, { "epoch": 1.5722618276085547, "grad_norm": 0.06346455216407776, "learning_rate": 0.0001348755569557586, "loss": 0.3198, "step": 19408 }, { "epoch": 1.5723428386260532, "grad_norm": 0.052734699100255966, "learning_rate": 0.00013487105630316396, "loss": 0.2844, "step": 19409 }, { "epoch": 1.5724238496435516, "grad_norm": 0.05498180910944939, "learning_rate": 0.00013486655565056934, "loss": 0.3042, "step": 19410 }, { "epoch": 1.5725048606610499, "grad_norm": 0.040842171758413315, "learning_rate": 0.0001348620549979747, "loss": 0.2255, "step": 19411 }, { "epoch": 1.5725858716785481, "grad_norm": 0.047706086188554764, "learning_rate": 0.00013485755434538006, "loss": 0.2813, "step": 19412 }, { "epoch": 1.5726668826960468, "grad_norm": 0.04232097789645195, "learning_rate": 0.00013485305369278548, "loss": 0.2824, "step": 19413 }, { "epoch": 1.572747893713545, "grad_norm": 0.04460098221898079, "learning_rate": 0.00013484855304019084, "loss": 0.2744, "step": 19414 }, { "epoch": 1.5728289047310433, "grad_norm": 0.06017705798149109, "learning_rate": 0.0001348440523875962, "loss": 0.2867, "step": 19415 }, { "epoch": 1.5729099157485418, "grad_norm": 0.05444073677062988, "learning_rate": 0.00013483955173500159, "loss": 0.2994, "step": 19416 }, { "epoch": 1.5729909267660402, "grad_norm": 0.047964904457330704, "learning_rate": 0.00013483505108240695, "loss": 0.2811, "step": 19417 }, { "epoch": 1.5730719377835385, "grad_norm": 0.049424637109041214, "learning_rate": 0.0001348305504298123, "loss": 0.2927, "step": 19418 }, { "epoch": 1.573152948801037, "grad_norm": 0.05039997026324272, "learning_rate": 0.00013482604977721772, "loss": 0.2835, "step": 19419 }, { "epoch": 1.5732339598185354, "grad_norm": 0.04230662062764168, "learning_rate": 0.00013482154912462308, "loss": 0.2777, "step": 19420 }, { "epoch": 1.5733149708360337, "grad_norm": 0.05108647048473358, "learning_rate": 0.00013481704847202844, "loss": 0.2719, "step": 19421 }, { "epoch": 1.5733959818535321, "grad_norm": 0.045273154973983765, "learning_rate": 0.00013481254781943383, "loss": 0.2714, "step": 19422 }, { "epoch": 1.5734769928710306, "grad_norm": 0.05585255101323128, "learning_rate": 0.0001348080471668392, "loss": 0.3056, "step": 19423 }, { "epoch": 1.5735580038885288, "grad_norm": 0.04518725723028183, "learning_rate": 0.00013480354651424455, "loss": 0.2783, "step": 19424 }, { "epoch": 1.573639014906027, "grad_norm": 0.0475885234773159, "learning_rate": 0.00013479904586164996, "loss": 0.3019, "step": 19425 }, { "epoch": 1.5737200259235256, "grad_norm": 0.05177724361419678, "learning_rate": 0.00013479454520905532, "loss": 0.3063, "step": 19426 }, { "epoch": 1.573801036941024, "grad_norm": 0.053175684064626694, "learning_rate": 0.00013479004455646068, "loss": 0.3155, "step": 19427 }, { "epoch": 1.5738820479585223, "grad_norm": 0.05215033143758774, "learning_rate": 0.00013478554390386607, "loss": 0.2957, "step": 19428 }, { "epoch": 1.5739630589760207, "grad_norm": 0.04903462529182434, "learning_rate": 0.00013478104325127143, "loss": 0.2765, "step": 19429 }, { "epoch": 1.5740440699935192, "grad_norm": 0.058999255299568176, "learning_rate": 0.00013477654259867682, "loss": 0.3445, "step": 19430 }, { "epoch": 1.5741250810110174, "grad_norm": 0.042476482689380646, "learning_rate": 0.0001347720419460822, "loss": 0.2704, "step": 19431 }, { "epoch": 1.574206092028516, "grad_norm": 0.05020613968372345, "learning_rate": 0.00013476754129348756, "loss": 0.2711, "step": 19432 }, { "epoch": 1.5742871030460144, "grad_norm": 0.05600776523351669, "learning_rate": 0.00013476304064089292, "loss": 0.2579, "step": 19433 }, { "epoch": 1.5743681140635126, "grad_norm": 0.07813206315040588, "learning_rate": 0.0001347585399882983, "loss": 0.393, "step": 19434 }, { "epoch": 1.5744491250810109, "grad_norm": 0.055210404098033905, "learning_rate": 0.00013475403933570367, "loss": 0.3479, "step": 19435 }, { "epoch": 1.5745301360985096, "grad_norm": 0.042050011456012726, "learning_rate": 0.00013474953868310906, "loss": 0.2318, "step": 19436 }, { "epoch": 1.5746111471160078, "grad_norm": 0.05371921509504318, "learning_rate": 0.00013474503803051445, "loss": 0.2841, "step": 19437 }, { "epoch": 1.574692158133506, "grad_norm": 0.05059438198804855, "learning_rate": 0.0001347405373779198, "loss": 0.2818, "step": 19438 }, { "epoch": 1.5747731691510045, "grad_norm": 0.054289672523736954, "learning_rate": 0.00013473603672532517, "loss": 0.3056, "step": 19439 }, { "epoch": 1.574854180168503, "grad_norm": 0.049676887691020966, "learning_rate": 0.00013473153607273055, "loss": 0.2827, "step": 19440 }, { "epoch": 1.5749351911860012, "grad_norm": 0.055509135127067566, "learning_rate": 0.0001347270354201359, "loss": 0.3255, "step": 19441 }, { "epoch": 1.5750162022034997, "grad_norm": 0.04096323624253273, "learning_rate": 0.0001347225347675413, "loss": 0.2607, "step": 19442 }, { "epoch": 1.5750972132209982, "grad_norm": 0.05271543562412262, "learning_rate": 0.0001347180341149467, "loss": 0.2707, "step": 19443 }, { "epoch": 1.5751782242384964, "grad_norm": 0.05153711512684822, "learning_rate": 0.00013471353346235205, "loss": 0.291, "step": 19444 }, { "epoch": 1.5752592352559946, "grad_norm": 0.05824122950434685, "learning_rate": 0.0001347090328097574, "loss": 0.2906, "step": 19445 }, { "epoch": 1.5753402462734933, "grad_norm": 0.055363208055496216, "learning_rate": 0.0001347045321571628, "loss": 0.2877, "step": 19446 }, { "epoch": 1.5754212572909916, "grad_norm": 0.048949237912893295, "learning_rate": 0.00013470003150456815, "loss": 0.289, "step": 19447 }, { "epoch": 1.5755022683084898, "grad_norm": 0.05374382436275482, "learning_rate": 0.00013469553085197354, "loss": 0.2762, "step": 19448 }, { "epoch": 1.5755832793259883, "grad_norm": 0.052326034754514694, "learning_rate": 0.00013469103019937893, "loss": 0.2752, "step": 19449 }, { "epoch": 1.5756642903434868, "grad_norm": 0.052585724741220474, "learning_rate": 0.0001346865295467843, "loss": 0.3296, "step": 19450 }, { "epoch": 1.575745301360985, "grad_norm": 0.06618500500917435, "learning_rate": 0.00013468202889418965, "loss": 0.2835, "step": 19451 }, { "epoch": 1.5758263123784835, "grad_norm": 0.046448152512311935, "learning_rate": 0.00013467752824159504, "loss": 0.3083, "step": 19452 }, { "epoch": 1.575907323395982, "grad_norm": 0.053853485733270645, "learning_rate": 0.00013467302758900042, "loss": 0.3046, "step": 19453 }, { "epoch": 1.5759883344134802, "grad_norm": 0.056492555886507034, "learning_rate": 0.00013466852693640578, "loss": 0.3049, "step": 19454 }, { "epoch": 1.5760693454309787, "grad_norm": 0.06340819597244263, "learning_rate": 0.00013466402628381117, "loss": 0.3437, "step": 19455 }, { "epoch": 1.5761503564484771, "grad_norm": 0.04808042198419571, "learning_rate": 0.00013465952563121653, "loss": 0.2704, "step": 19456 }, { "epoch": 1.5762313674659754, "grad_norm": 0.04911893606185913, "learning_rate": 0.0001346550249786219, "loss": 0.2651, "step": 19457 }, { "epoch": 1.5763123784834736, "grad_norm": 0.05891195312142372, "learning_rate": 0.00013465052432602728, "loss": 0.269, "step": 19458 }, { "epoch": 1.5763933895009723, "grad_norm": 0.06033793464303017, "learning_rate": 0.00013464602367343266, "loss": 0.2942, "step": 19459 }, { "epoch": 1.5764744005184705, "grad_norm": 0.05624804645776749, "learning_rate": 0.00013464152302083802, "loss": 0.2491, "step": 19460 }, { "epoch": 1.5765554115359688, "grad_norm": 0.054478585720062256, "learning_rate": 0.0001346370223682434, "loss": 0.2932, "step": 19461 }, { "epoch": 1.5766364225534673, "grad_norm": 0.050502706319093704, "learning_rate": 0.00013463252171564877, "loss": 0.2737, "step": 19462 }, { "epoch": 1.5767174335709657, "grad_norm": 0.05171595513820648, "learning_rate": 0.00013462802106305413, "loss": 0.2842, "step": 19463 }, { "epoch": 1.576798444588464, "grad_norm": 0.04873314127326012, "learning_rate": 0.00013462352041045952, "loss": 0.2706, "step": 19464 }, { "epoch": 1.5768794556059624, "grad_norm": 0.052232805639505386, "learning_rate": 0.0001346190197578649, "loss": 0.2655, "step": 19465 }, { "epoch": 1.576960466623461, "grad_norm": 0.04653653874993324, "learning_rate": 0.00013461451910527027, "loss": 0.2775, "step": 19466 }, { "epoch": 1.5770414776409591, "grad_norm": 0.055312540382146835, "learning_rate": 0.00013461001845267565, "loss": 0.3084, "step": 19467 }, { "epoch": 1.5771224886584574, "grad_norm": 0.06885188072919846, "learning_rate": 0.00013460551780008101, "loss": 0.2853, "step": 19468 }, { "epoch": 1.577203499675956, "grad_norm": 0.055660393089056015, "learning_rate": 0.00013460101714748637, "loss": 0.3098, "step": 19469 }, { "epoch": 1.5772845106934543, "grad_norm": 0.04958134889602661, "learning_rate": 0.00013459651649489176, "loss": 0.2592, "step": 19470 }, { "epoch": 1.5773655217109526, "grad_norm": 0.04702504724264145, "learning_rate": 0.00013459201584229715, "loss": 0.2825, "step": 19471 }, { "epoch": 1.577446532728451, "grad_norm": 0.04876773804426193, "learning_rate": 0.0001345875151897025, "loss": 0.2493, "step": 19472 }, { "epoch": 1.5775275437459495, "grad_norm": 0.05104687064886093, "learning_rate": 0.0001345830145371079, "loss": 0.3036, "step": 19473 }, { "epoch": 1.5776085547634477, "grad_norm": 0.048596564680337906, "learning_rate": 0.00013457851388451326, "loss": 0.2729, "step": 19474 }, { "epoch": 1.5776895657809462, "grad_norm": 0.05432022362947464, "learning_rate": 0.00013457401323191862, "loss": 0.3191, "step": 19475 }, { "epoch": 1.5777705767984447, "grad_norm": 0.05890238285064697, "learning_rate": 0.000134569512579324, "loss": 0.3281, "step": 19476 }, { "epoch": 1.577851587815943, "grad_norm": 0.0422152616083622, "learning_rate": 0.0001345650119267294, "loss": 0.2609, "step": 19477 }, { "epoch": 1.5779325988334414, "grad_norm": 0.048545897006988525, "learning_rate": 0.00013456051127413475, "loss": 0.2838, "step": 19478 }, { "epoch": 1.5780136098509399, "grad_norm": 0.048626262694597244, "learning_rate": 0.00013455601062154014, "loss": 0.2753, "step": 19479 }, { "epoch": 1.578094620868438, "grad_norm": 0.0606694258749485, "learning_rate": 0.0001345515099689455, "loss": 0.3308, "step": 19480 }, { "epoch": 1.5781756318859363, "grad_norm": 0.04808412119746208, "learning_rate": 0.00013454700931635086, "loss": 0.2622, "step": 19481 }, { "epoch": 1.5782566429034348, "grad_norm": 0.056980349123477936, "learning_rate": 0.00013454250866375627, "loss": 0.3184, "step": 19482 }, { "epoch": 1.5783376539209333, "grad_norm": 0.047187671065330505, "learning_rate": 0.00013453800801116163, "loss": 0.2792, "step": 19483 }, { "epoch": 1.5784186649384315, "grad_norm": 0.05486692488193512, "learning_rate": 0.000134533507358567, "loss": 0.2921, "step": 19484 }, { "epoch": 1.57849967595593, "grad_norm": 0.042787306010723114, "learning_rate": 0.00013452900670597238, "loss": 0.2742, "step": 19485 }, { "epoch": 1.5785806869734285, "grad_norm": 0.04654007405042648, "learning_rate": 0.00013452450605337774, "loss": 0.2752, "step": 19486 }, { "epoch": 1.5786616979909267, "grad_norm": 0.04305479675531387, "learning_rate": 0.0001345200054007831, "loss": 0.2936, "step": 19487 }, { "epoch": 1.5787427090084252, "grad_norm": 0.04199616238474846, "learning_rate": 0.0001345155047481885, "loss": 0.251, "step": 19488 }, { "epoch": 1.5788237200259236, "grad_norm": 0.055879976600408554, "learning_rate": 0.00013451100409559387, "loss": 0.3394, "step": 19489 }, { "epoch": 1.5789047310434219, "grad_norm": 0.04630248621106148, "learning_rate": 0.00013450650344299923, "loss": 0.2817, "step": 19490 }, { "epoch": 1.5789857420609201, "grad_norm": 0.06566954404115677, "learning_rate": 0.00013450200279040462, "loss": 0.2839, "step": 19491 }, { "epoch": 1.5790667530784188, "grad_norm": 0.058554135262966156, "learning_rate": 0.00013449750213780998, "loss": 0.2627, "step": 19492 }, { "epoch": 1.579147764095917, "grad_norm": 0.05380035191774368, "learning_rate": 0.00013449300148521534, "loss": 0.2689, "step": 19493 }, { "epoch": 1.5792287751134153, "grad_norm": 0.042370859533548355, "learning_rate": 0.00013448850083262075, "loss": 0.2608, "step": 19494 }, { "epoch": 1.5793097861309138, "grad_norm": 0.05102236941456795, "learning_rate": 0.00013448400018002611, "loss": 0.2601, "step": 19495 }, { "epoch": 1.5793907971484122, "grad_norm": 0.05376815423369408, "learning_rate": 0.00013447949952743147, "loss": 0.2965, "step": 19496 }, { "epoch": 1.5794718081659105, "grad_norm": 0.055364444851875305, "learning_rate": 0.00013447499887483686, "loss": 0.3266, "step": 19497 }, { "epoch": 1.579552819183409, "grad_norm": 0.0610194057226181, "learning_rate": 0.00013447049822224222, "loss": 0.2914, "step": 19498 }, { "epoch": 1.5796338302009074, "grad_norm": 0.051366176456213, "learning_rate": 0.00013446599756964758, "loss": 0.2758, "step": 19499 }, { "epoch": 1.5797148412184057, "grad_norm": 0.05474573001265526, "learning_rate": 0.000134461496917053, "loss": 0.264, "step": 19500 }, { "epoch": 1.5797958522359041, "grad_norm": 0.058302175253629684, "learning_rate": 0.00013445699626445836, "loss": 0.2763, "step": 19501 }, { "epoch": 1.5798768632534026, "grad_norm": 0.04867679625749588, "learning_rate": 0.00013445249561186372, "loss": 0.2755, "step": 19502 }, { "epoch": 1.5799578742709008, "grad_norm": 0.051118914037942886, "learning_rate": 0.0001344479949592691, "loss": 0.2846, "step": 19503 }, { "epoch": 1.580038885288399, "grad_norm": 0.05177982151508331, "learning_rate": 0.00013444349430667446, "loss": 0.2764, "step": 19504 }, { "epoch": 1.5801198963058976, "grad_norm": 0.050728704780340195, "learning_rate": 0.00013443899365407985, "loss": 0.2439, "step": 19505 }, { "epoch": 1.580200907323396, "grad_norm": 0.055721383541822433, "learning_rate": 0.00013443449300148524, "loss": 0.317, "step": 19506 }, { "epoch": 1.5802819183408943, "grad_norm": 0.0517570935189724, "learning_rate": 0.0001344299923488906, "loss": 0.2538, "step": 19507 }, { "epoch": 1.5803629293583927, "grad_norm": 0.04586503654718399, "learning_rate": 0.00013442549169629596, "loss": 0.308, "step": 19508 }, { "epoch": 1.5804439403758912, "grad_norm": 0.03953614458441734, "learning_rate": 0.00013442099104370134, "loss": 0.2506, "step": 19509 }, { "epoch": 1.5805249513933894, "grad_norm": 0.05850844085216522, "learning_rate": 0.0001344164903911067, "loss": 0.3596, "step": 19510 }, { "epoch": 1.580605962410888, "grad_norm": 0.05324858799576759, "learning_rate": 0.0001344119897385121, "loss": 0.3396, "step": 19511 }, { "epoch": 1.5806869734283864, "grad_norm": 0.04520611837506294, "learning_rate": 0.00013440748908591748, "loss": 0.2488, "step": 19512 }, { "epoch": 1.5807679844458846, "grad_norm": 0.0491422601044178, "learning_rate": 0.00013440298843332284, "loss": 0.2924, "step": 19513 }, { "epoch": 1.5808489954633829, "grad_norm": 0.05382819101214409, "learning_rate": 0.0001343984877807282, "loss": 0.2977, "step": 19514 }, { "epoch": 1.5809300064808816, "grad_norm": 0.04993279650807381, "learning_rate": 0.0001343939871281336, "loss": 0.3074, "step": 19515 }, { "epoch": 1.5810110174983798, "grad_norm": 0.0456116609275341, "learning_rate": 0.00013438948647553895, "loss": 0.3097, "step": 19516 }, { "epoch": 1.581092028515878, "grad_norm": 0.05514628440141678, "learning_rate": 0.00013438498582294433, "loss": 0.3011, "step": 19517 }, { "epoch": 1.5811730395333765, "grad_norm": 0.05581878498196602, "learning_rate": 0.00013438048517034972, "loss": 0.3181, "step": 19518 }, { "epoch": 1.581254050550875, "grad_norm": 0.052173078060150146, "learning_rate": 0.00013437598451775508, "loss": 0.2768, "step": 19519 }, { "epoch": 1.5813350615683732, "grad_norm": 0.04671182110905647, "learning_rate": 0.00013437148386516044, "loss": 0.2637, "step": 19520 }, { "epoch": 1.5814160725858717, "grad_norm": 0.051288314163684845, "learning_rate": 0.00013436698321256583, "loss": 0.2879, "step": 19521 }, { "epoch": 1.5814970836033702, "grad_norm": 0.055354394018650055, "learning_rate": 0.0001343624825599712, "loss": 0.3061, "step": 19522 }, { "epoch": 1.5815780946208684, "grad_norm": 0.05607502534985542, "learning_rate": 0.00013435798190737658, "loss": 0.2576, "step": 19523 }, { "epoch": 1.5816591056383669, "grad_norm": 0.047366395592689514, "learning_rate": 0.00013435348125478196, "loss": 0.2879, "step": 19524 }, { "epoch": 1.5817401166558653, "grad_norm": 0.048803966492414474, "learning_rate": 0.00013434898060218732, "loss": 0.2782, "step": 19525 }, { "epoch": 1.5818211276733636, "grad_norm": 0.05734861642122269, "learning_rate": 0.00013434447994959268, "loss": 0.3287, "step": 19526 }, { "epoch": 1.5819021386908618, "grad_norm": 0.04951030761003494, "learning_rate": 0.00013433997929699807, "loss": 0.2746, "step": 19527 }, { "epoch": 1.5819831497083603, "grad_norm": 0.051852792501449585, "learning_rate": 0.00013433547864440343, "loss": 0.2498, "step": 19528 }, { "epoch": 1.5820641607258588, "grad_norm": 0.05859070271253586, "learning_rate": 0.00013433097799180882, "loss": 0.3057, "step": 19529 }, { "epoch": 1.582145171743357, "grad_norm": 0.0653352215886116, "learning_rate": 0.0001343264773392142, "loss": 0.3034, "step": 19530 }, { "epoch": 1.5822261827608555, "grad_norm": 0.051609303802251816, "learning_rate": 0.00013432197668661956, "loss": 0.2612, "step": 19531 }, { "epoch": 1.582307193778354, "grad_norm": 0.05950654670596123, "learning_rate": 0.00013431747603402492, "loss": 0.3185, "step": 19532 }, { "epoch": 1.5823882047958522, "grad_norm": 0.047630857676267624, "learning_rate": 0.0001343129753814303, "loss": 0.2937, "step": 19533 }, { "epoch": 1.5824692158133506, "grad_norm": 0.04461874067783356, "learning_rate": 0.0001343084747288357, "loss": 0.2714, "step": 19534 }, { "epoch": 1.5825502268308491, "grad_norm": 0.0678873211145401, "learning_rate": 0.00013430397407624106, "loss": 0.3534, "step": 19535 }, { "epoch": 1.5826312378483474, "grad_norm": 0.04869205504655838, "learning_rate": 0.00013429947342364645, "loss": 0.2626, "step": 19536 }, { "epoch": 1.5827122488658456, "grad_norm": 0.044049911201000214, "learning_rate": 0.0001342949727710518, "loss": 0.2942, "step": 19537 }, { "epoch": 1.5827932598833443, "grad_norm": 0.05476945638656616, "learning_rate": 0.00013429047211845717, "loss": 0.2983, "step": 19538 }, { "epoch": 1.5828742709008425, "grad_norm": 0.044789087027311325, "learning_rate": 0.00013428597146586255, "loss": 0.2706, "step": 19539 }, { "epoch": 1.5829552819183408, "grad_norm": 0.0458502434194088, "learning_rate": 0.00013428147081326794, "loss": 0.2838, "step": 19540 }, { "epoch": 1.5830362929358393, "grad_norm": 0.04885415360331535, "learning_rate": 0.0001342769701606733, "loss": 0.2562, "step": 19541 }, { "epoch": 1.5831173039533377, "grad_norm": 0.044314928352832794, "learning_rate": 0.0001342724695080787, "loss": 0.2976, "step": 19542 }, { "epoch": 1.583198314970836, "grad_norm": 0.05950617417693138, "learning_rate": 0.00013426796885548405, "loss": 0.2917, "step": 19543 }, { "epoch": 1.5832793259883344, "grad_norm": 0.06253184378147125, "learning_rate": 0.0001342634682028894, "loss": 0.3023, "step": 19544 }, { "epoch": 1.583360337005833, "grad_norm": 0.047593940049409866, "learning_rate": 0.0001342589675502948, "loss": 0.2945, "step": 19545 }, { "epoch": 1.5834413480233311, "grad_norm": 0.05673489719629288, "learning_rate": 0.00013425446689770018, "loss": 0.3181, "step": 19546 }, { "epoch": 1.5835223590408296, "grad_norm": 0.06391898542642593, "learning_rate": 0.00013424996624510554, "loss": 0.3179, "step": 19547 }, { "epoch": 1.583603370058328, "grad_norm": 0.049235813319683075, "learning_rate": 0.00013424546559251093, "loss": 0.2842, "step": 19548 }, { "epoch": 1.5836843810758263, "grad_norm": 0.048863064497709274, "learning_rate": 0.0001342409649399163, "loss": 0.2508, "step": 19549 }, { "epoch": 1.5837653920933246, "grad_norm": 0.06420667469501495, "learning_rate": 0.00013423646428732165, "loss": 0.2819, "step": 19550 }, { "epoch": 1.583846403110823, "grad_norm": 0.048935070633888245, "learning_rate": 0.00013423196363472704, "loss": 0.2987, "step": 19551 }, { "epoch": 1.5839274141283215, "grad_norm": 0.04862522333860397, "learning_rate": 0.00013422746298213242, "loss": 0.2812, "step": 19552 }, { "epoch": 1.5840084251458197, "grad_norm": 0.053085774183273315, "learning_rate": 0.00013422296232953778, "loss": 0.2789, "step": 19553 }, { "epoch": 1.5840894361633182, "grad_norm": 0.048895008862018585, "learning_rate": 0.00013421846167694317, "loss": 0.2916, "step": 19554 }, { "epoch": 1.5841704471808167, "grad_norm": 0.0500405989587307, "learning_rate": 0.00013421396102434853, "loss": 0.2506, "step": 19555 }, { "epoch": 1.584251458198315, "grad_norm": 0.05650602653622627, "learning_rate": 0.0001342094603717539, "loss": 0.2992, "step": 19556 }, { "epoch": 1.5843324692158134, "grad_norm": 0.050165776163339615, "learning_rate": 0.0001342049597191593, "loss": 0.2973, "step": 19557 }, { "epoch": 1.5844134802333119, "grad_norm": 0.0493108294904232, "learning_rate": 0.00013420045906656467, "loss": 0.2982, "step": 19558 }, { "epoch": 1.58449449125081, "grad_norm": 0.0474187433719635, "learning_rate": 0.00013419595841397003, "loss": 0.262, "step": 19559 }, { "epoch": 1.5845755022683083, "grad_norm": 0.04100741446018219, "learning_rate": 0.0001341914577613754, "loss": 0.2536, "step": 19560 }, { "epoch": 1.584656513285807, "grad_norm": 0.04271881654858589, "learning_rate": 0.00013418695710878077, "loss": 0.2489, "step": 19561 }, { "epoch": 1.5847375243033053, "grad_norm": 0.055077798664569855, "learning_rate": 0.00013418245645618613, "loss": 0.2785, "step": 19562 }, { "epoch": 1.5848185353208035, "grad_norm": 0.05069742351770401, "learning_rate": 0.00013417795580359155, "loss": 0.2351, "step": 19563 }, { "epoch": 1.584899546338302, "grad_norm": 0.053563978523015976, "learning_rate": 0.0001341734551509969, "loss": 0.2767, "step": 19564 }, { "epoch": 1.5849805573558005, "grad_norm": 0.049226801842451096, "learning_rate": 0.00013416895449840227, "loss": 0.2993, "step": 19565 }, { "epoch": 1.5850615683732987, "grad_norm": 0.05453573539853096, "learning_rate": 0.00013416445384580765, "loss": 0.3365, "step": 19566 }, { "epoch": 1.5851425793907972, "grad_norm": 0.05534984543919563, "learning_rate": 0.00013415995319321301, "loss": 0.2636, "step": 19567 }, { "epoch": 1.5852235904082956, "grad_norm": 0.0382307767868042, "learning_rate": 0.00013415545254061837, "loss": 0.2521, "step": 19568 }, { "epoch": 1.5853046014257939, "grad_norm": 0.04936755821108818, "learning_rate": 0.0001341509518880238, "loss": 0.2632, "step": 19569 }, { "epoch": 1.5853856124432921, "grad_norm": 0.047801628708839417, "learning_rate": 0.00013414645123542915, "loss": 0.2908, "step": 19570 }, { "epoch": 1.5854666234607908, "grad_norm": 0.047462377697229385, "learning_rate": 0.0001341419505828345, "loss": 0.2765, "step": 19571 }, { "epoch": 1.585547634478289, "grad_norm": 0.047430410981178284, "learning_rate": 0.0001341374499302399, "loss": 0.2358, "step": 19572 }, { "epoch": 1.5856286454957873, "grad_norm": 0.047434475272893906, "learning_rate": 0.00013413294927764526, "loss": 0.2591, "step": 19573 }, { "epoch": 1.5857096565132858, "grad_norm": 0.044510822743177414, "learning_rate": 0.00013412844862505062, "loss": 0.2849, "step": 19574 }, { "epoch": 1.5857906675307842, "grad_norm": 0.04769321531057358, "learning_rate": 0.00013412394797245603, "loss": 0.2614, "step": 19575 }, { "epoch": 1.5858716785482825, "grad_norm": 0.06467246264219284, "learning_rate": 0.0001341194473198614, "loss": 0.2783, "step": 19576 }, { "epoch": 1.585952689565781, "grad_norm": 0.05072154104709625, "learning_rate": 0.00013411494666726675, "loss": 0.3038, "step": 19577 }, { "epoch": 1.5860337005832794, "grad_norm": 0.057482898235321045, "learning_rate": 0.00013411044601467214, "loss": 0.3215, "step": 19578 }, { "epoch": 1.5861147116007777, "grad_norm": 0.05687712877988815, "learning_rate": 0.0001341059453620775, "loss": 0.319, "step": 19579 }, { "epoch": 1.5861957226182761, "grad_norm": 0.047046370804309845, "learning_rate": 0.00013410144470948286, "loss": 0.2611, "step": 19580 }, { "epoch": 1.5862767336357746, "grad_norm": 0.04093106836080551, "learning_rate": 0.00013409694405688827, "loss": 0.2503, "step": 19581 }, { "epoch": 1.5863577446532728, "grad_norm": 0.0708787590265274, "learning_rate": 0.00013409244340429363, "loss": 0.3086, "step": 19582 }, { "epoch": 1.586438755670771, "grad_norm": 0.05671603977680206, "learning_rate": 0.000134087942751699, "loss": 0.2799, "step": 19583 }, { "epoch": 1.5865197666882696, "grad_norm": 0.051028985530138016, "learning_rate": 0.00013408344209910438, "loss": 0.3173, "step": 19584 }, { "epoch": 1.586600777705768, "grad_norm": 0.05028403177857399, "learning_rate": 0.00013407894144650974, "loss": 0.271, "step": 19585 }, { "epoch": 1.5866817887232663, "grad_norm": 0.056971270591020584, "learning_rate": 0.00013407444079391513, "loss": 0.3144, "step": 19586 }, { "epoch": 1.5867627997407647, "grad_norm": 0.05798279494047165, "learning_rate": 0.0001340699401413205, "loss": 0.297, "step": 19587 }, { "epoch": 1.5868438107582632, "grad_norm": 0.05333030968904495, "learning_rate": 0.00013406543948872587, "loss": 0.2364, "step": 19588 }, { "epoch": 1.5869248217757614, "grad_norm": 0.055028073489665985, "learning_rate": 0.00013406093883613123, "loss": 0.3158, "step": 19589 }, { "epoch": 1.58700583279326, "grad_norm": 0.048676956444978714, "learning_rate": 0.00013405643818353662, "loss": 0.2999, "step": 19590 }, { "epoch": 1.5870868438107584, "grad_norm": 0.06379703432321548, "learning_rate": 0.00013405193753094198, "loss": 0.2945, "step": 19591 }, { "epoch": 1.5871678548282566, "grad_norm": 0.056056320667266846, "learning_rate": 0.00013404743687834737, "loss": 0.2736, "step": 19592 }, { "epoch": 1.5872488658457549, "grad_norm": 0.05605302378535271, "learning_rate": 0.00013404293622575275, "loss": 0.303, "step": 19593 }, { "epoch": 1.5873298768632536, "grad_norm": 0.04912743717432022, "learning_rate": 0.00013403843557315811, "loss": 0.2689, "step": 19594 }, { "epoch": 1.5874108878807518, "grad_norm": 0.06005407124757767, "learning_rate": 0.00013403393492056347, "loss": 0.3133, "step": 19595 }, { "epoch": 1.58749189889825, "grad_norm": 0.05698971077799797, "learning_rate": 0.00013402943426796886, "loss": 0.2711, "step": 19596 }, { "epoch": 1.5875729099157485, "grad_norm": 0.040859419852495193, "learning_rate": 0.00013402493361537422, "loss": 0.2427, "step": 19597 }, { "epoch": 1.587653920933247, "grad_norm": 0.05441635474562645, "learning_rate": 0.0001340204329627796, "loss": 0.2553, "step": 19598 }, { "epoch": 1.5877349319507452, "grad_norm": 0.049378473311662674, "learning_rate": 0.000134015932310185, "loss": 0.2659, "step": 19599 }, { "epoch": 1.5878159429682437, "grad_norm": 0.043783094733953476, "learning_rate": 0.00013401143165759036, "loss": 0.2473, "step": 19600 }, { "epoch": 1.5878969539857422, "grad_norm": 0.05409387871623039, "learning_rate": 0.00013400693100499572, "loss": 0.2935, "step": 19601 }, { "epoch": 1.5879779650032404, "grad_norm": 0.058873020112514496, "learning_rate": 0.0001340024303524011, "loss": 0.3162, "step": 19602 }, { "epoch": 1.5880589760207389, "grad_norm": 0.05617973953485489, "learning_rate": 0.00013399792969980646, "loss": 0.2918, "step": 19603 }, { "epoch": 1.5881399870382373, "grad_norm": 0.046545181423425674, "learning_rate": 0.00013399342904721185, "loss": 0.2495, "step": 19604 }, { "epoch": 1.5882209980557356, "grad_norm": 0.04627370461821556, "learning_rate": 0.00013398892839461724, "loss": 0.2722, "step": 19605 }, { "epoch": 1.5883020090732338, "grad_norm": 0.04813306778669357, "learning_rate": 0.0001339844277420226, "loss": 0.2627, "step": 19606 }, { "epoch": 1.5883830200907323, "grad_norm": 0.04957534372806549, "learning_rate": 0.00013397992708942796, "loss": 0.2822, "step": 19607 }, { "epoch": 1.5884640311082308, "grad_norm": 0.04994957521557808, "learning_rate": 0.00013397542643683335, "loss": 0.2609, "step": 19608 }, { "epoch": 1.588545042125729, "grad_norm": 0.049995407462120056, "learning_rate": 0.0001339709257842387, "loss": 0.2599, "step": 19609 }, { "epoch": 1.5886260531432275, "grad_norm": 0.05555260181427002, "learning_rate": 0.0001339664251316441, "loss": 0.3067, "step": 19610 }, { "epoch": 1.588707064160726, "grad_norm": 0.05339108407497406, "learning_rate": 0.00013396192447904948, "loss": 0.3451, "step": 19611 }, { "epoch": 1.5887880751782242, "grad_norm": 0.04250529035925865, "learning_rate": 0.00013395742382645484, "loss": 0.2602, "step": 19612 }, { "epoch": 1.5888690861957226, "grad_norm": 0.04517268389463425, "learning_rate": 0.0001339529231738602, "loss": 0.2728, "step": 19613 }, { "epoch": 1.5889500972132211, "grad_norm": 0.04415430128574371, "learning_rate": 0.0001339484225212656, "loss": 0.2613, "step": 19614 }, { "epoch": 1.5890311082307194, "grad_norm": 0.03891155496239662, "learning_rate": 0.00013394392186867097, "loss": 0.2542, "step": 19615 }, { "epoch": 1.5891121192482176, "grad_norm": 0.055569179356098175, "learning_rate": 0.00013393942121607633, "loss": 0.3395, "step": 19616 }, { "epoch": 1.5891931302657163, "grad_norm": 0.05368742346763611, "learning_rate": 0.00013393492056348172, "loss": 0.2745, "step": 19617 }, { "epoch": 1.5892741412832145, "grad_norm": 0.051335424184799194, "learning_rate": 0.00013393041991088708, "loss": 0.2891, "step": 19618 }, { "epoch": 1.5893551523007128, "grad_norm": 0.05128169432282448, "learning_rate": 0.00013392591925829244, "loss": 0.2831, "step": 19619 }, { "epoch": 1.5894361633182112, "grad_norm": 0.04808347672224045, "learning_rate": 0.00013392141860569783, "loss": 0.2637, "step": 19620 }, { "epoch": 1.5895171743357097, "grad_norm": 0.04751835763454437, "learning_rate": 0.00013391691795310322, "loss": 0.2618, "step": 19621 }, { "epoch": 1.589598185353208, "grad_norm": 0.04695792868733406, "learning_rate": 0.00013391241730050858, "loss": 0.2314, "step": 19622 }, { "epoch": 1.5896791963707064, "grad_norm": 0.06030832231044769, "learning_rate": 0.00013390791664791396, "loss": 0.2612, "step": 19623 }, { "epoch": 1.589760207388205, "grad_norm": 0.0565250962972641, "learning_rate": 0.00013390341599531932, "loss": 0.2887, "step": 19624 }, { "epoch": 1.5898412184057031, "grad_norm": 0.05243346095085144, "learning_rate": 0.00013389891534272468, "loss": 0.3233, "step": 19625 }, { "epoch": 1.5899222294232016, "grad_norm": 0.058184653520584106, "learning_rate": 0.00013389441469013007, "loss": 0.3108, "step": 19626 }, { "epoch": 1.5900032404407, "grad_norm": 0.06172959506511688, "learning_rate": 0.00013388991403753546, "loss": 0.2936, "step": 19627 }, { "epoch": 1.5900842514581983, "grad_norm": 0.04676850140094757, "learning_rate": 0.00013388541338494082, "loss": 0.2531, "step": 19628 }, { "epoch": 1.5901652624756966, "grad_norm": 0.052669767290353775, "learning_rate": 0.0001338809127323462, "loss": 0.3351, "step": 19629 }, { "epoch": 1.590246273493195, "grad_norm": 0.04854946210980415, "learning_rate": 0.00013387641207975156, "loss": 0.2523, "step": 19630 }, { "epoch": 1.5903272845106935, "grad_norm": 0.0540703609585762, "learning_rate": 0.00013387191142715692, "loss": 0.2923, "step": 19631 }, { "epoch": 1.5904082955281917, "grad_norm": 0.06427466869354248, "learning_rate": 0.0001338674107745623, "loss": 0.3333, "step": 19632 }, { "epoch": 1.5904893065456902, "grad_norm": 0.05325065180659294, "learning_rate": 0.0001338629101219677, "loss": 0.3103, "step": 19633 }, { "epoch": 1.5905703175631887, "grad_norm": 0.047619082033634186, "learning_rate": 0.00013385840946937306, "loss": 0.2728, "step": 19634 }, { "epoch": 1.590651328580687, "grad_norm": 0.057228464633226395, "learning_rate": 0.00013385390881677845, "loss": 0.2492, "step": 19635 }, { "epoch": 1.5907323395981854, "grad_norm": 0.056406740099191666, "learning_rate": 0.0001338494081641838, "loss": 0.2636, "step": 19636 }, { "epoch": 1.5908133506156839, "grad_norm": 0.0460297130048275, "learning_rate": 0.00013384490751158917, "loss": 0.2481, "step": 19637 }, { "epoch": 1.590894361633182, "grad_norm": 0.05521533265709877, "learning_rate": 0.00013384040685899458, "loss": 0.3225, "step": 19638 }, { "epoch": 1.5909753726506803, "grad_norm": 0.06045277416706085, "learning_rate": 0.00013383590620639994, "loss": 0.3055, "step": 19639 }, { "epoch": 1.591056383668179, "grad_norm": 0.06774187088012695, "learning_rate": 0.0001338314055538053, "loss": 0.3209, "step": 19640 }, { "epoch": 1.5911373946856773, "grad_norm": 0.05821792781352997, "learning_rate": 0.0001338269049012107, "loss": 0.3121, "step": 19641 }, { "epoch": 1.5912184057031755, "grad_norm": 0.0538976714015007, "learning_rate": 0.00013382240424861605, "loss": 0.2769, "step": 19642 }, { "epoch": 1.591299416720674, "grad_norm": 0.04965272173285484, "learning_rate": 0.0001338179035960214, "loss": 0.2695, "step": 19643 }, { "epoch": 1.5913804277381725, "grad_norm": 0.04983029142022133, "learning_rate": 0.00013381340294342682, "loss": 0.2999, "step": 19644 }, { "epoch": 1.5914614387556707, "grad_norm": 0.058221280574798584, "learning_rate": 0.00013380890229083218, "loss": 0.2875, "step": 19645 }, { "epoch": 1.5915424497731692, "grad_norm": 0.06419771909713745, "learning_rate": 0.00013380440163823754, "loss": 0.3187, "step": 19646 }, { "epoch": 1.5916234607906676, "grad_norm": 0.05506671965122223, "learning_rate": 0.00013379990098564293, "loss": 0.3442, "step": 19647 }, { "epoch": 1.5917044718081659, "grad_norm": 0.04870070889592171, "learning_rate": 0.0001337954003330483, "loss": 0.2922, "step": 19648 }, { "epoch": 1.5917854828256643, "grad_norm": 0.04817147180438042, "learning_rate": 0.00013379089968045368, "loss": 0.2788, "step": 19649 }, { "epoch": 1.5918664938431628, "grad_norm": 0.0634249746799469, "learning_rate": 0.00013378639902785906, "loss": 0.2812, "step": 19650 }, { "epoch": 1.591947504860661, "grad_norm": 0.05736738443374634, "learning_rate": 0.00013378189837526442, "loss": 0.2951, "step": 19651 }, { "epoch": 1.5920285158781593, "grad_norm": 0.055955443531274796, "learning_rate": 0.00013377739772266978, "loss": 0.3102, "step": 19652 }, { "epoch": 1.5921095268956578, "grad_norm": 0.05353270098567009, "learning_rate": 0.00013377289707007517, "loss": 0.2824, "step": 19653 }, { "epoch": 1.5921905379131562, "grad_norm": 0.054427746683359146, "learning_rate": 0.00013376839641748053, "loss": 0.2642, "step": 19654 }, { "epoch": 1.5922715489306545, "grad_norm": 0.05478702858090401, "learning_rate": 0.00013376389576488592, "loss": 0.2947, "step": 19655 }, { "epoch": 1.592352559948153, "grad_norm": 0.05507731810212135, "learning_rate": 0.0001337593951122913, "loss": 0.3027, "step": 19656 }, { "epoch": 1.5924335709656514, "grad_norm": 0.05065590888261795, "learning_rate": 0.00013375489445969667, "loss": 0.229, "step": 19657 }, { "epoch": 1.5925145819831497, "grad_norm": 0.047295115888118744, "learning_rate": 0.00013375039380710203, "loss": 0.2758, "step": 19658 }, { "epoch": 1.5925955930006481, "grad_norm": 0.050888653844594955, "learning_rate": 0.0001337458931545074, "loss": 0.3103, "step": 19659 }, { "epoch": 1.5926766040181466, "grad_norm": 0.05038614943623543, "learning_rate": 0.00013374139250191277, "loss": 0.2487, "step": 19660 }, { "epoch": 1.5927576150356448, "grad_norm": 0.042133983224630356, "learning_rate": 0.00013373689184931816, "loss": 0.2702, "step": 19661 }, { "epoch": 1.592838626053143, "grad_norm": 0.05329374596476555, "learning_rate": 0.00013373239119672355, "loss": 0.2634, "step": 19662 }, { "epoch": 1.5929196370706418, "grad_norm": 0.06329121440649033, "learning_rate": 0.0001337278905441289, "loss": 0.3362, "step": 19663 }, { "epoch": 1.59300064808814, "grad_norm": 0.05555858090519905, "learning_rate": 0.00013372338989153427, "loss": 0.2875, "step": 19664 }, { "epoch": 1.5930816591056383, "grad_norm": 0.061923980712890625, "learning_rate": 0.00013371888923893965, "loss": 0.3068, "step": 19665 }, { "epoch": 1.5931626701231367, "grad_norm": 0.054352499544620514, "learning_rate": 0.00013371438858634501, "loss": 0.3165, "step": 19666 }, { "epoch": 1.5932436811406352, "grad_norm": 0.06146497651934624, "learning_rate": 0.0001337098879337504, "loss": 0.3136, "step": 19667 }, { "epoch": 1.5933246921581334, "grad_norm": 0.05410310998558998, "learning_rate": 0.0001337053872811558, "loss": 0.2703, "step": 19668 }, { "epoch": 1.593405703175632, "grad_norm": 0.043867308646440506, "learning_rate": 0.00013370088662856115, "loss": 0.2674, "step": 19669 }, { "epoch": 1.5934867141931304, "grad_norm": 0.0502147302031517, "learning_rate": 0.0001336963859759665, "loss": 0.3127, "step": 19670 }, { "epoch": 1.5935677252106286, "grad_norm": 0.05285639315843582, "learning_rate": 0.0001336918853233719, "loss": 0.3093, "step": 19671 }, { "epoch": 1.5936487362281269, "grad_norm": 0.04711088910698891, "learning_rate": 0.00013368738467077726, "loss": 0.2931, "step": 19672 }, { "epoch": 1.5937297472456255, "grad_norm": 0.04537595063447952, "learning_rate": 0.00013368288401818264, "loss": 0.2638, "step": 19673 }, { "epoch": 1.5938107582631238, "grad_norm": 0.05486955866217613, "learning_rate": 0.00013367838336558803, "loss": 0.2727, "step": 19674 }, { "epoch": 1.593891769280622, "grad_norm": 0.06857334822416306, "learning_rate": 0.0001336738827129934, "loss": 0.3224, "step": 19675 }, { "epoch": 1.5939727802981205, "grad_norm": 0.04904793202877045, "learning_rate": 0.00013366938206039875, "loss": 0.2678, "step": 19676 }, { "epoch": 1.594053791315619, "grad_norm": 0.04866638034582138, "learning_rate": 0.00013366488140780414, "loss": 0.2629, "step": 19677 }, { "epoch": 1.5941348023331172, "grad_norm": 0.05716971680521965, "learning_rate": 0.0001336603807552095, "loss": 0.2929, "step": 19678 }, { "epoch": 1.5942158133506157, "grad_norm": 0.058549296110868454, "learning_rate": 0.00013365588010261488, "loss": 0.3016, "step": 19679 }, { "epoch": 1.5942968243681142, "grad_norm": 0.04250664636492729, "learning_rate": 0.00013365137945002027, "loss": 0.29, "step": 19680 }, { "epoch": 1.5943778353856124, "grad_norm": 0.0584678053855896, "learning_rate": 0.00013364687879742563, "loss": 0.2894, "step": 19681 }, { "epoch": 1.5944588464031109, "grad_norm": 0.04140187054872513, "learning_rate": 0.000133642378144831, "loss": 0.2392, "step": 19682 }, { "epoch": 1.5945398574206093, "grad_norm": 0.044475093483924866, "learning_rate": 0.00013363787749223638, "loss": 0.2629, "step": 19683 }, { "epoch": 1.5946208684381076, "grad_norm": 0.05308195948600769, "learning_rate": 0.00013363337683964174, "loss": 0.2899, "step": 19684 }, { "epoch": 1.5947018794556058, "grad_norm": 0.05468233302235603, "learning_rate": 0.00013362887618704713, "loss": 0.3294, "step": 19685 }, { "epoch": 1.5947828904731045, "grad_norm": 0.05329752340912819, "learning_rate": 0.0001336243755344525, "loss": 0.3098, "step": 19686 }, { "epoch": 1.5948639014906028, "grad_norm": 0.05600878596305847, "learning_rate": 0.00013361987488185787, "loss": 0.272, "step": 19687 }, { "epoch": 1.594944912508101, "grad_norm": 0.061277762055397034, "learning_rate": 0.00013361537422926323, "loss": 0.2922, "step": 19688 }, { "epoch": 1.5950259235255995, "grad_norm": 0.045982733368873596, "learning_rate": 0.00013361087357666862, "loss": 0.281, "step": 19689 }, { "epoch": 1.595106934543098, "grad_norm": 0.06412628293037415, "learning_rate": 0.000133606372924074, "loss": 0.2422, "step": 19690 }, { "epoch": 1.5951879455605962, "grad_norm": 0.04596994072198868, "learning_rate": 0.00013360187227147937, "loss": 0.28, "step": 19691 }, { "epoch": 1.5952689565780946, "grad_norm": 0.05474567785859108, "learning_rate": 0.00013359737161888475, "loss": 0.2654, "step": 19692 }, { "epoch": 1.595349967595593, "grad_norm": 0.04921437427401543, "learning_rate": 0.00013359287096629012, "loss": 0.2825, "step": 19693 }, { "epoch": 1.5954309786130914, "grad_norm": 0.05111038312315941, "learning_rate": 0.00013358837031369548, "loss": 0.2894, "step": 19694 }, { "epoch": 1.5955119896305896, "grad_norm": 0.04755771532654762, "learning_rate": 0.00013358386966110086, "loss": 0.2627, "step": 19695 }, { "epoch": 1.5955930006480883, "grad_norm": 0.047934528440237045, "learning_rate": 0.00013357936900850625, "loss": 0.252, "step": 19696 }, { "epoch": 1.5956740116655865, "grad_norm": 0.058660369366407394, "learning_rate": 0.0001335748683559116, "loss": 0.2857, "step": 19697 }, { "epoch": 1.5957550226830848, "grad_norm": 0.049679480493068695, "learning_rate": 0.000133570367703317, "loss": 0.2798, "step": 19698 }, { "epoch": 1.5958360337005832, "grad_norm": 0.05734862759709358, "learning_rate": 0.00013356586705072236, "loss": 0.2779, "step": 19699 }, { "epoch": 1.5959170447180817, "grad_norm": 0.056212328374385834, "learning_rate": 0.00013356136639812772, "loss": 0.2877, "step": 19700 }, { "epoch": 1.59599805573558, "grad_norm": 0.056470245122909546, "learning_rate": 0.0001335568657455331, "loss": 0.2883, "step": 19701 }, { "epoch": 1.5960790667530784, "grad_norm": 0.04747643321752548, "learning_rate": 0.0001335523650929385, "loss": 0.2784, "step": 19702 }, { "epoch": 1.596160077770577, "grad_norm": 0.05902129039168358, "learning_rate": 0.00013354786444034385, "loss": 0.2904, "step": 19703 }, { "epoch": 1.5962410887880751, "grad_norm": 0.057399291545152664, "learning_rate": 0.00013354336378774924, "loss": 0.2585, "step": 19704 }, { "epoch": 1.5963220998055736, "grad_norm": 0.05625062435865402, "learning_rate": 0.0001335388631351546, "loss": 0.3098, "step": 19705 }, { "epoch": 1.596403110823072, "grad_norm": 0.06203244626522064, "learning_rate": 0.00013353436248255996, "loss": 0.3249, "step": 19706 }, { "epoch": 1.5964841218405703, "grad_norm": 0.056399717926979065, "learning_rate": 0.00013352986182996535, "loss": 0.2994, "step": 19707 }, { "epoch": 1.5965651328580686, "grad_norm": 0.05485859513282776, "learning_rate": 0.00013352536117737073, "loss": 0.2942, "step": 19708 }, { "epoch": 1.596646143875567, "grad_norm": 0.047439463436603546, "learning_rate": 0.0001335208605247761, "loss": 0.2994, "step": 19709 }, { "epoch": 1.5967271548930655, "grad_norm": 0.05042650178074837, "learning_rate": 0.00013351635987218148, "loss": 0.2681, "step": 19710 }, { "epoch": 1.5968081659105637, "grad_norm": 0.06080637127161026, "learning_rate": 0.00013351185921958684, "loss": 0.3173, "step": 19711 }, { "epoch": 1.5968891769280622, "grad_norm": 0.06810148060321808, "learning_rate": 0.0001335073585669922, "loss": 0.3148, "step": 19712 }, { "epoch": 1.5969701879455607, "grad_norm": 0.0476883128285408, "learning_rate": 0.0001335028579143976, "loss": 0.2561, "step": 19713 }, { "epoch": 1.597051198963059, "grad_norm": 0.058156874030828476, "learning_rate": 0.00013349835726180297, "loss": 0.2933, "step": 19714 }, { "epoch": 1.5971322099805574, "grad_norm": 0.046778421849012375, "learning_rate": 0.00013349385660920833, "loss": 0.2485, "step": 19715 }, { "epoch": 1.5972132209980558, "grad_norm": 0.06392556428909302, "learning_rate": 0.00013348935595661372, "loss": 0.3113, "step": 19716 }, { "epoch": 1.597294232015554, "grad_norm": 0.04123203828930855, "learning_rate": 0.00013348485530401908, "loss": 0.2385, "step": 19717 }, { "epoch": 1.5973752430330523, "grad_norm": 0.04342777654528618, "learning_rate": 0.00013348035465142447, "loss": 0.2473, "step": 19718 }, { "epoch": 1.597456254050551, "grad_norm": 0.04915757104754448, "learning_rate": 0.00013347585399882986, "loss": 0.283, "step": 19719 }, { "epoch": 1.5975372650680493, "grad_norm": 0.04471985995769501, "learning_rate": 0.00013347135334623522, "loss": 0.2644, "step": 19720 }, { "epoch": 1.5976182760855475, "grad_norm": 0.05780022218823433, "learning_rate": 0.00013346685269364058, "loss": 0.3069, "step": 19721 }, { "epoch": 1.597699287103046, "grad_norm": 0.05209074914455414, "learning_rate": 0.00013346235204104596, "loss": 0.2777, "step": 19722 }, { "epoch": 1.5977802981205445, "grad_norm": 0.05138828232884407, "learning_rate": 0.00013345785138845132, "loss": 0.3253, "step": 19723 }, { "epoch": 1.5978613091380427, "grad_norm": 0.05990460887551308, "learning_rate": 0.0001334533507358567, "loss": 0.3176, "step": 19724 }, { "epoch": 1.5979423201555412, "grad_norm": 0.04816710948944092, "learning_rate": 0.0001334488500832621, "loss": 0.24, "step": 19725 }, { "epoch": 1.5980233311730396, "grad_norm": 0.045972611755132675, "learning_rate": 0.00013344434943066746, "loss": 0.2509, "step": 19726 }, { "epoch": 1.5981043421905379, "grad_norm": 0.04820011183619499, "learning_rate": 0.00013343984877807282, "loss": 0.2836, "step": 19727 }, { "epoch": 1.5981853532080363, "grad_norm": 0.06232719495892525, "learning_rate": 0.0001334353481254782, "loss": 0.3038, "step": 19728 }, { "epoch": 1.5982663642255348, "grad_norm": 0.05783186852931976, "learning_rate": 0.00013343084747288356, "loss": 0.3069, "step": 19729 }, { "epoch": 1.598347375243033, "grad_norm": 0.059091247618198395, "learning_rate": 0.00013342634682028895, "loss": 0.2702, "step": 19730 }, { "epoch": 1.5984283862605313, "grad_norm": 0.05414794012904167, "learning_rate": 0.00013342184616769434, "loss": 0.2929, "step": 19731 }, { "epoch": 1.5985093972780298, "grad_norm": 0.0421329103410244, "learning_rate": 0.0001334173455150997, "loss": 0.2436, "step": 19732 }, { "epoch": 1.5985904082955282, "grad_norm": 0.053817491978406906, "learning_rate": 0.00013341284486250506, "loss": 0.2695, "step": 19733 }, { "epoch": 1.5986714193130265, "grad_norm": 0.059979382902383804, "learning_rate": 0.00013340834420991045, "loss": 0.2983, "step": 19734 }, { "epoch": 1.598752430330525, "grad_norm": 0.04933563247323036, "learning_rate": 0.0001334038435573158, "loss": 0.2356, "step": 19735 }, { "epoch": 1.5988334413480234, "grad_norm": 0.046898871660232544, "learning_rate": 0.0001333993429047212, "loss": 0.273, "step": 19736 }, { "epoch": 1.5989144523655217, "grad_norm": 0.0689791813492775, "learning_rate": 0.00013339484225212658, "loss": 0.3238, "step": 19737 }, { "epoch": 1.5989954633830201, "grad_norm": 0.05081493780016899, "learning_rate": 0.00013339034159953194, "loss": 0.2774, "step": 19738 }, { "epoch": 1.5990764744005186, "grad_norm": 0.052922870963811874, "learning_rate": 0.0001333858409469373, "loss": 0.2425, "step": 19739 }, { "epoch": 1.5991574854180168, "grad_norm": 0.049743879586458206, "learning_rate": 0.0001333813402943427, "loss": 0.2914, "step": 19740 }, { "epoch": 1.599238496435515, "grad_norm": 0.05588243901729584, "learning_rate": 0.00013337683964174805, "loss": 0.2706, "step": 19741 }, { "epoch": 1.5993195074530138, "grad_norm": 0.04906295984983444, "learning_rate": 0.00013337233898915344, "loss": 0.261, "step": 19742 }, { "epoch": 1.599400518470512, "grad_norm": 0.04569677263498306, "learning_rate": 0.00013336783833655882, "loss": 0.2512, "step": 19743 }, { "epoch": 1.5994815294880103, "grad_norm": 0.05717282369732857, "learning_rate": 0.00013336333768396418, "loss": 0.2593, "step": 19744 }, { "epoch": 1.5995625405055087, "grad_norm": 0.05262308940291405, "learning_rate": 0.00013335883703136954, "loss": 0.2638, "step": 19745 }, { "epoch": 1.5996435515230072, "grad_norm": 0.05189024284482002, "learning_rate": 0.00013335433637877493, "loss": 0.3071, "step": 19746 }, { "epoch": 1.5997245625405054, "grad_norm": 0.06069159135222435, "learning_rate": 0.0001333498357261803, "loss": 0.3037, "step": 19747 }, { "epoch": 1.599805573558004, "grad_norm": 0.05159150809049606, "learning_rate": 0.00013334533507358568, "loss": 0.2855, "step": 19748 }, { "epoch": 1.5998865845755024, "grad_norm": 0.055219776928424835, "learning_rate": 0.00013334083442099106, "loss": 0.2991, "step": 19749 }, { "epoch": 1.5999675955930006, "grad_norm": 0.049742020666599274, "learning_rate": 0.00013333633376839642, "loss": 0.2937, "step": 19750 }, { "epoch": 1.600048606610499, "grad_norm": 0.04801604896783829, "learning_rate": 0.00013333183311580178, "loss": 0.2799, "step": 19751 }, { "epoch": 1.6001296176279975, "grad_norm": 0.05257720500230789, "learning_rate": 0.00013332733246320717, "loss": 0.2931, "step": 19752 }, { "epoch": 1.6002106286454958, "grad_norm": 0.05499967560172081, "learning_rate": 0.00013332283181061253, "loss": 0.2917, "step": 19753 }, { "epoch": 1.600291639662994, "grad_norm": 0.04537876695394516, "learning_rate": 0.00013331833115801792, "loss": 0.2593, "step": 19754 }, { "epoch": 1.6003726506804925, "grad_norm": 0.05684947222471237, "learning_rate": 0.0001333138305054233, "loss": 0.2915, "step": 19755 }, { "epoch": 1.600453661697991, "grad_norm": 0.051478736102581024, "learning_rate": 0.00013330932985282867, "loss": 0.2961, "step": 19756 }, { "epoch": 1.6005346727154892, "grad_norm": 0.045026905834674835, "learning_rate": 0.00013330482920023403, "loss": 0.2744, "step": 19757 }, { "epoch": 1.6006156837329877, "grad_norm": 0.05228782445192337, "learning_rate": 0.0001333003285476394, "loss": 0.3082, "step": 19758 }, { "epoch": 1.6006966947504861, "grad_norm": 0.050884321331977844, "learning_rate": 0.00013329582789504477, "loss": 0.2914, "step": 19759 }, { "epoch": 1.6007777057679844, "grad_norm": 0.050620514899492264, "learning_rate": 0.00013329132724245016, "loss": 0.2926, "step": 19760 }, { "epoch": 1.6008587167854829, "grad_norm": 0.061932142823934555, "learning_rate": 0.00013328682658985555, "loss": 0.2993, "step": 19761 }, { "epoch": 1.6009397278029813, "grad_norm": 0.05216047912836075, "learning_rate": 0.0001332823259372609, "loss": 0.2462, "step": 19762 }, { "epoch": 1.6010207388204796, "grad_norm": 0.05384761095046997, "learning_rate": 0.00013327782528466627, "loss": 0.3201, "step": 19763 }, { "epoch": 1.6011017498379778, "grad_norm": 0.048409927636384964, "learning_rate": 0.00013327332463207165, "loss": 0.2529, "step": 19764 }, { "epoch": 1.6011827608554765, "grad_norm": 0.05935129523277283, "learning_rate": 0.00013326882397947701, "loss": 0.2723, "step": 19765 }, { "epoch": 1.6012637718729748, "grad_norm": 0.05299336835741997, "learning_rate": 0.0001332643233268824, "loss": 0.2942, "step": 19766 }, { "epoch": 1.601344782890473, "grad_norm": 0.04570074379444122, "learning_rate": 0.0001332598226742878, "loss": 0.3037, "step": 19767 }, { "epoch": 1.6014257939079715, "grad_norm": 0.05521856248378754, "learning_rate": 0.00013325532202169315, "loss": 0.313, "step": 19768 }, { "epoch": 1.60150680492547, "grad_norm": 0.05584537982940674, "learning_rate": 0.0001332508213690985, "loss": 0.268, "step": 19769 }, { "epoch": 1.6015878159429682, "grad_norm": 0.05476771667599678, "learning_rate": 0.0001332463207165039, "loss": 0.2673, "step": 19770 }, { "epoch": 1.6016688269604666, "grad_norm": 0.047139693051576614, "learning_rate": 0.00013324182006390928, "loss": 0.2551, "step": 19771 }, { "epoch": 1.601749837977965, "grad_norm": 0.053314223885536194, "learning_rate": 0.00013323731941131464, "loss": 0.2799, "step": 19772 }, { "epoch": 1.6018308489954634, "grad_norm": 0.04546458646655083, "learning_rate": 0.00013323281875872003, "loss": 0.2599, "step": 19773 }, { "epoch": 1.6019118600129616, "grad_norm": 0.05735450237989426, "learning_rate": 0.0001332283181061254, "loss": 0.3001, "step": 19774 }, { "epoch": 1.6019928710304603, "grad_norm": 0.049065109342336655, "learning_rate": 0.00013322381745353075, "loss": 0.2525, "step": 19775 }, { "epoch": 1.6020738820479585, "grad_norm": 0.05018934980034828, "learning_rate": 0.00013321931680093614, "loss": 0.3187, "step": 19776 }, { "epoch": 1.6021548930654568, "grad_norm": 0.05642695724964142, "learning_rate": 0.00013321481614834152, "loss": 0.3033, "step": 19777 }, { "epoch": 1.6022359040829552, "grad_norm": 0.058584533631801605, "learning_rate": 0.00013321031549574688, "loss": 0.3525, "step": 19778 }, { "epoch": 1.6023169151004537, "grad_norm": 0.051006946712732315, "learning_rate": 0.00013320581484315227, "loss": 0.2552, "step": 19779 }, { "epoch": 1.602397926117952, "grad_norm": 0.05385277792811394, "learning_rate": 0.00013320131419055763, "loss": 0.2986, "step": 19780 }, { "epoch": 1.6024789371354504, "grad_norm": 0.04713079333305359, "learning_rate": 0.000133196813537963, "loss": 0.246, "step": 19781 }, { "epoch": 1.6025599481529489, "grad_norm": 0.043642837554216385, "learning_rate": 0.00013319231288536838, "loss": 0.2647, "step": 19782 }, { "epoch": 1.6026409591704471, "grad_norm": 0.05561329796910286, "learning_rate": 0.00013318781223277377, "loss": 0.3199, "step": 19783 }, { "epoch": 1.6027219701879456, "grad_norm": 0.05162304639816284, "learning_rate": 0.00013318331158017913, "loss": 0.264, "step": 19784 }, { "epoch": 1.602802981205444, "grad_norm": 0.048306021839380264, "learning_rate": 0.00013317881092758451, "loss": 0.2482, "step": 19785 }, { "epoch": 1.6028839922229423, "grad_norm": 0.053587399423122406, "learning_rate": 0.00013317431027498987, "loss": 0.296, "step": 19786 }, { "epoch": 1.6029650032404406, "grad_norm": 0.06057516857981682, "learning_rate": 0.00013316980962239526, "loss": 0.288, "step": 19787 }, { "epoch": 1.6030460142579392, "grad_norm": 0.05490785837173462, "learning_rate": 0.00013316530896980062, "loss": 0.3423, "step": 19788 }, { "epoch": 1.6031270252754375, "grad_norm": 0.06102534383535385, "learning_rate": 0.000133160808317206, "loss": 0.2629, "step": 19789 }, { "epoch": 1.6032080362929357, "grad_norm": 0.05123463645577431, "learning_rate": 0.00013315630766461137, "loss": 0.2602, "step": 19790 }, { "epoch": 1.6032890473104342, "grad_norm": 0.052425283938646317, "learning_rate": 0.00013315180701201676, "loss": 0.285, "step": 19791 }, { "epoch": 1.6033700583279327, "grad_norm": 0.05635687708854675, "learning_rate": 0.00013314730635942212, "loss": 0.26, "step": 19792 }, { "epoch": 1.603451069345431, "grad_norm": 0.05245514586567879, "learning_rate": 0.0001331428057068275, "loss": 0.3, "step": 19793 }, { "epoch": 1.6035320803629294, "grad_norm": 0.04669433832168579, "learning_rate": 0.00013313830505423286, "loss": 0.2645, "step": 19794 }, { "epoch": 1.6036130913804278, "grad_norm": 0.060190096497535706, "learning_rate": 0.00013313380440163825, "loss": 0.3014, "step": 19795 }, { "epoch": 1.603694102397926, "grad_norm": 0.05049053579568863, "learning_rate": 0.0001331293037490436, "loss": 0.2762, "step": 19796 }, { "epoch": 1.6037751134154243, "grad_norm": 0.052670665085315704, "learning_rate": 0.000133124803096449, "loss": 0.3005, "step": 19797 }, { "epoch": 1.603856124432923, "grad_norm": 0.044620417058467865, "learning_rate": 0.00013312030244385436, "loss": 0.2919, "step": 19798 }, { "epoch": 1.6039371354504213, "grad_norm": 0.04406053200364113, "learning_rate": 0.00013311580179125974, "loss": 0.2886, "step": 19799 }, { "epoch": 1.6040181464679195, "grad_norm": 0.055919043719768524, "learning_rate": 0.00013311130113866513, "loss": 0.3127, "step": 19800 }, { "epoch": 1.604099157485418, "grad_norm": 0.05733250826597214, "learning_rate": 0.0001331068004860705, "loss": 0.2822, "step": 19801 }, { "epoch": 1.6041801685029164, "grad_norm": 0.056601881980895996, "learning_rate": 0.00013310229983347585, "loss": 0.3001, "step": 19802 }, { "epoch": 1.6042611795204147, "grad_norm": 0.056987252086400986, "learning_rate": 0.00013309779918088124, "loss": 0.3274, "step": 19803 }, { "epoch": 1.6043421905379132, "grad_norm": 0.051350705325603485, "learning_rate": 0.0001330932985282866, "loss": 0.2828, "step": 19804 }, { "epoch": 1.6044232015554116, "grad_norm": 0.04618507996201515, "learning_rate": 0.00013308879787569199, "loss": 0.2887, "step": 19805 }, { "epoch": 1.6045042125729099, "grad_norm": 0.04378882050514221, "learning_rate": 0.00013308429722309737, "loss": 0.2404, "step": 19806 }, { "epoch": 1.6045852235904083, "grad_norm": 0.05531271919608116, "learning_rate": 0.00013307979657050273, "loss": 0.3079, "step": 19807 }, { "epoch": 1.6046662346079068, "grad_norm": 0.046200696378946304, "learning_rate": 0.0001330752959179081, "loss": 0.247, "step": 19808 }, { "epoch": 1.604747245625405, "grad_norm": 0.04871489107608795, "learning_rate": 0.00013307079526531348, "loss": 0.2675, "step": 19809 }, { "epoch": 1.6048282566429033, "grad_norm": 0.05007130280137062, "learning_rate": 0.00013306629461271884, "loss": 0.2578, "step": 19810 }, { "epoch": 1.6049092676604018, "grad_norm": 0.04867855831980705, "learning_rate": 0.00013306179396012423, "loss": 0.2535, "step": 19811 }, { "epoch": 1.6049902786779002, "grad_norm": 0.06068941205739975, "learning_rate": 0.00013305729330752961, "loss": 0.3194, "step": 19812 }, { "epoch": 1.6050712896953985, "grad_norm": 0.05833900719881058, "learning_rate": 0.00013305279265493497, "loss": 0.3053, "step": 19813 }, { "epoch": 1.605152300712897, "grad_norm": 0.05458652228116989, "learning_rate": 0.00013304829200234033, "loss": 0.2762, "step": 19814 }, { "epoch": 1.6052333117303954, "grad_norm": 0.04864118620753288, "learning_rate": 0.00013304379134974572, "loss": 0.2749, "step": 19815 }, { "epoch": 1.6053143227478937, "grad_norm": 0.050415292382240295, "learning_rate": 0.00013303929069715108, "loss": 0.2581, "step": 19816 }, { "epoch": 1.6053953337653921, "grad_norm": 0.05480793118476868, "learning_rate": 0.00013303479004455647, "loss": 0.2748, "step": 19817 }, { "epoch": 1.6054763447828906, "grad_norm": 0.06063258647918701, "learning_rate": 0.00013303028939196186, "loss": 0.3672, "step": 19818 }, { "epoch": 1.6055573558003888, "grad_norm": 0.045498237013816833, "learning_rate": 0.00013302578873936722, "loss": 0.2592, "step": 19819 }, { "epoch": 1.605638366817887, "grad_norm": 0.05446869507431984, "learning_rate": 0.00013302128808677258, "loss": 0.3247, "step": 19820 }, { "epoch": 1.6057193778353858, "grad_norm": 0.05108707770705223, "learning_rate": 0.00013301678743417796, "loss": 0.3059, "step": 19821 }, { "epoch": 1.605800388852884, "grad_norm": 0.051279496401548386, "learning_rate": 0.00013301228678158332, "loss": 0.3078, "step": 19822 }, { "epoch": 1.6058813998703823, "grad_norm": 0.04803336411714554, "learning_rate": 0.0001330077861289887, "loss": 0.2683, "step": 19823 }, { "epoch": 1.6059624108878807, "grad_norm": 0.04614466428756714, "learning_rate": 0.0001330032854763941, "loss": 0.3015, "step": 19824 }, { "epoch": 1.6060434219053792, "grad_norm": 0.04936172068119049, "learning_rate": 0.00013299878482379946, "loss": 0.2777, "step": 19825 }, { "epoch": 1.6061244329228774, "grad_norm": 0.05479852110147476, "learning_rate": 0.00013299428417120482, "loss": 0.2587, "step": 19826 }, { "epoch": 1.606205443940376, "grad_norm": 0.05028698593378067, "learning_rate": 0.0001329897835186102, "loss": 0.2459, "step": 19827 }, { "epoch": 1.6062864549578744, "grad_norm": 0.04531967639923096, "learning_rate": 0.00013298528286601557, "loss": 0.2751, "step": 19828 }, { "epoch": 1.6063674659753726, "grad_norm": 0.057223107665777206, "learning_rate": 0.00013298078221342095, "loss": 0.2888, "step": 19829 }, { "epoch": 1.606448476992871, "grad_norm": 0.05254938453435898, "learning_rate": 0.00013297628156082634, "loss": 0.2792, "step": 19830 }, { "epoch": 1.6065294880103695, "grad_norm": 0.04757395759224892, "learning_rate": 0.0001329717809082317, "loss": 0.2652, "step": 19831 }, { "epoch": 1.6066104990278678, "grad_norm": 0.04999027028679848, "learning_rate": 0.00013296728025563706, "loss": 0.253, "step": 19832 }, { "epoch": 1.606691510045366, "grad_norm": 0.04609188064932823, "learning_rate": 0.00013296277960304245, "loss": 0.2631, "step": 19833 }, { "epoch": 1.6067725210628645, "grad_norm": 0.0687926635146141, "learning_rate": 0.0001329582789504478, "loss": 0.2908, "step": 19834 }, { "epoch": 1.606853532080363, "grad_norm": 0.04869447648525238, "learning_rate": 0.0001329537782978532, "loss": 0.2789, "step": 19835 }, { "epoch": 1.6069345430978612, "grad_norm": 0.056731611490249634, "learning_rate": 0.00013294927764525858, "loss": 0.2591, "step": 19836 }, { "epoch": 1.6070155541153597, "grad_norm": 0.04279259964823723, "learning_rate": 0.00013294477699266394, "loss": 0.2566, "step": 19837 }, { "epoch": 1.6070965651328581, "grad_norm": 0.04434427246451378, "learning_rate": 0.0001329402763400693, "loss": 0.2493, "step": 19838 }, { "epoch": 1.6071775761503564, "grad_norm": 0.051402896642684937, "learning_rate": 0.0001329357756874747, "loss": 0.2977, "step": 19839 }, { "epoch": 1.6072585871678549, "grad_norm": 0.043560683727264404, "learning_rate": 0.00013293127503488005, "loss": 0.2336, "step": 19840 }, { "epoch": 1.6073395981853533, "grad_norm": 0.04773182049393654, "learning_rate": 0.00013292677438228544, "loss": 0.2619, "step": 19841 }, { "epoch": 1.6074206092028516, "grad_norm": 0.06585206091403961, "learning_rate": 0.00013292227372969082, "loss": 0.3395, "step": 19842 }, { "epoch": 1.6075016202203498, "grad_norm": 0.05797537788748741, "learning_rate": 0.00013291777307709618, "loss": 0.3004, "step": 19843 }, { "epoch": 1.6075826312378485, "grad_norm": 0.04809495434165001, "learning_rate": 0.00013291327242450154, "loss": 0.2938, "step": 19844 }, { "epoch": 1.6076636422553467, "grad_norm": 0.05231109634041786, "learning_rate": 0.00013290877177190693, "loss": 0.3025, "step": 19845 }, { "epoch": 1.607744653272845, "grad_norm": 0.050500620156526566, "learning_rate": 0.0001329042711193123, "loss": 0.2904, "step": 19846 }, { "epoch": 1.6078256642903435, "grad_norm": 0.04343164339661598, "learning_rate": 0.00013289977046671768, "loss": 0.2609, "step": 19847 }, { "epoch": 1.607906675307842, "grad_norm": 0.047382425516843796, "learning_rate": 0.00013289526981412306, "loss": 0.2377, "step": 19848 }, { "epoch": 1.6079876863253402, "grad_norm": 0.06244940310716629, "learning_rate": 0.00013289076916152842, "loss": 0.3594, "step": 19849 }, { "epoch": 1.6080686973428386, "grad_norm": 0.04549206793308258, "learning_rate": 0.00013288626850893378, "loss": 0.2358, "step": 19850 }, { "epoch": 1.608149708360337, "grad_norm": 0.05260597914457321, "learning_rate": 0.00013288176785633917, "loss": 0.258, "step": 19851 }, { "epoch": 1.6082307193778353, "grad_norm": 0.05020173266530037, "learning_rate": 0.00013287726720374456, "loss": 0.3011, "step": 19852 }, { "epoch": 1.6083117303953338, "grad_norm": 0.0409814827144146, "learning_rate": 0.00013287276655114992, "loss": 0.2598, "step": 19853 }, { "epoch": 1.6083927414128323, "grad_norm": 0.05606376752257347, "learning_rate": 0.0001328682658985553, "loss": 0.2746, "step": 19854 }, { "epoch": 1.6084737524303305, "grad_norm": 0.05606624856591225, "learning_rate": 0.00013286376524596067, "loss": 0.3034, "step": 19855 }, { "epoch": 1.6085547634478288, "grad_norm": 0.04946485906839371, "learning_rate": 0.00013285926459336605, "loss": 0.3051, "step": 19856 }, { "epoch": 1.6086357744653272, "grad_norm": 0.05261649191379547, "learning_rate": 0.0001328547639407714, "loss": 0.2926, "step": 19857 }, { "epoch": 1.6087167854828257, "grad_norm": 0.0560835599899292, "learning_rate": 0.0001328502632881768, "loss": 0.3266, "step": 19858 }, { "epoch": 1.608797796500324, "grad_norm": 0.04868018627166748, "learning_rate": 0.00013284576263558216, "loss": 0.2871, "step": 19859 }, { "epoch": 1.6088788075178224, "grad_norm": 0.051668599247932434, "learning_rate": 0.00013284126198298755, "loss": 0.2739, "step": 19860 }, { "epoch": 1.6089598185353209, "grad_norm": 0.0633268728852272, "learning_rate": 0.0001328367613303929, "loss": 0.2766, "step": 19861 }, { "epoch": 1.6090408295528191, "grad_norm": 0.05197061970829964, "learning_rate": 0.0001328322606777983, "loss": 0.2537, "step": 19862 }, { "epoch": 1.6091218405703176, "grad_norm": 0.051223527640104294, "learning_rate": 0.00013282776002520365, "loss": 0.2814, "step": 19863 }, { "epoch": 1.609202851587816, "grad_norm": 0.05875227227807045, "learning_rate": 0.00013282325937260904, "loss": 0.3074, "step": 19864 }, { "epoch": 1.6092838626053143, "grad_norm": 0.046049121767282486, "learning_rate": 0.0001328187587200144, "loss": 0.2814, "step": 19865 }, { "epoch": 1.6093648736228126, "grad_norm": 0.04564657807350159, "learning_rate": 0.0001328142580674198, "loss": 0.2924, "step": 19866 }, { "epoch": 1.6094458846403112, "grad_norm": 0.058303602039813995, "learning_rate": 0.00013280975741482515, "loss": 0.3222, "step": 19867 }, { "epoch": 1.6095268956578095, "grad_norm": 0.07431328296661377, "learning_rate": 0.00013280525676223054, "loss": 0.3287, "step": 19868 }, { "epoch": 1.6096079066753077, "grad_norm": 0.054437801241874695, "learning_rate": 0.0001328007561096359, "loss": 0.2953, "step": 19869 }, { "epoch": 1.6096889176928062, "grad_norm": 0.04726843908429146, "learning_rate": 0.00013279625545704128, "loss": 0.2388, "step": 19870 }, { "epoch": 1.6097699287103047, "grad_norm": 0.052995022386312485, "learning_rate": 0.00013279175480444664, "loss": 0.2557, "step": 19871 }, { "epoch": 1.609850939727803, "grad_norm": 0.06081311032176018, "learning_rate": 0.00013278725415185203, "loss": 0.3259, "step": 19872 }, { "epoch": 1.6099319507453014, "grad_norm": 0.0430576428771019, "learning_rate": 0.0001327827534992574, "loss": 0.2449, "step": 19873 }, { "epoch": 1.6100129617627998, "grad_norm": 0.050903283059597015, "learning_rate": 0.00013277825284666278, "loss": 0.2343, "step": 19874 }, { "epoch": 1.610093972780298, "grad_norm": 0.053889211267232895, "learning_rate": 0.00013277375219406817, "loss": 0.2699, "step": 19875 }, { "epoch": 1.6101749837977966, "grad_norm": 0.04701241850852966, "learning_rate": 0.00013276925154147353, "loss": 0.2732, "step": 19876 }, { "epoch": 1.610255994815295, "grad_norm": 0.060669828206300735, "learning_rate": 0.00013276475088887889, "loss": 0.3078, "step": 19877 }, { "epoch": 1.6103370058327933, "grad_norm": 0.0490998774766922, "learning_rate": 0.00013276025023628427, "loss": 0.2881, "step": 19878 }, { "epoch": 1.6104180168502915, "grad_norm": 0.04689916968345642, "learning_rate": 0.00013275574958368963, "loss": 0.2816, "step": 19879 }, { "epoch": 1.61049902786779, "grad_norm": 0.04962588846683502, "learning_rate": 0.00013275124893109502, "loss": 0.2833, "step": 19880 }, { "epoch": 1.6105800388852884, "grad_norm": 0.05080372467637062, "learning_rate": 0.0001327467482785004, "loss": 0.2993, "step": 19881 }, { "epoch": 1.6106610499027867, "grad_norm": 0.051091890782117844, "learning_rate": 0.00013274224762590577, "loss": 0.2686, "step": 19882 }, { "epoch": 1.6107420609202852, "grad_norm": 0.04882129281759262, "learning_rate": 0.00013273774697331113, "loss": 0.2645, "step": 19883 }, { "epoch": 1.6108230719377836, "grad_norm": 0.04933195188641548, "learning_rate": 0.00013273324632071651, "loss": 0.2666, "step": 19884 }, { "epoch": 1.6109040829552819, "grad_norm": 0.05477169156074524, "learning_rate": 0.00013272874566812187, "loss": 0.309, "step": 19885 }, { "epoch": 1.6109850939727803, "grad_norm": 0.05535150319337845, "learning_rate": 0.00013272424501552726, "loss": 0.2667, "step": 19886 }, { "epoch": 1.6110661049902788, "grad_norm": 0.06334707140922546, "learning_rate": 0.00013271974436293265, "loss": 0.3023, "step": 19887 }, { "epoch": 1.611147116007777, "grad_norm": 0.054047200828790665, "learning_rate": 0.000132715243710338, "loss": 0.2959, "step": 19888 }, { "epoch": 1.6112281270252753, "grad_norm": 0.058965399861335754, "learning_rate": 0.00013271074305774337, "loss": 0.2971, "step": 19889 }, { "epoch": 1.611309138042774, "grad_norm": 0.054235897958278656, "learning_rate": 0.00013270624240514876, "loss": 0.2655, "step": 19890 }, { "epoch": 1.6113901490602722, "grad_norm": 0.04498407617211342, "learning_rate": 0.00013270174175255412, "loss": 0.2698, "step": 19891 }, { "epoch": 1.6114711600777705, "grad_norm": 0.04842450097203255, "learning_rate": 0.0001326972410999595, "loss": 0.2538, "step": 19892 }, { "epoch": 1.611552171095269, "grad_norm": 0.05681498348712921, "learning_rate": 0.0001326927404473649, "loss": 0.306, "step": 19893 }, { "epoch": 1.6116331821127674, "grad_norm": 0.058336373418569565, "learning_rate": 0.00013268823979477025, "loss": 0.302, "step": 19894 }, { "epoch": 1.6117141931302656, "grad_norm": 0.060595858842134476, "learning_rate": 0.0001326837391421756, "loss": 0.3151, "step": 19895 }, { "epoch": 1.6117952041477641, "grad_norm": 0.05249301716685295, "learning_rate": 0.000132679238489581, "loss": 0.2786, "step": 19896 }, { "epoch": 1.6118762151652626, "grad_norm": 0.05370105430483818, "learning_rate": 0.00013267473783698636, "loss": 0.2927, "step": 19897 }, { "epoch": 1.6119572261827608, "grad_norm": 0.06065599247813225, "learning_rate": 0.00013267023718439174, "loss": 0.3331, "step": 19898 }, { "epoch": 1.612038237200259, "grad_norm": 0.05296957865357399, "learning_rate": 0.00013266573653179713, "loss": 0.3013, "step": 19899 }, { "epoch": 1.6121192482177578, "grad_norm": 0.043871358036994934, "learning_rate": 0.0001326612358792025, "loss": 0.2553, "step": 19900 }, { "epoch": 1.612200259235256, "grad_norm": 0.053302399814128876, "learning_rate": 0.00013265673522660785, "loss": 0.2641, "step": 19901 }, { "epoch": 1.6122812702527543, "grad_norm": 0.05802002549171448, "learning_rate": 0.00013265223457401324, "loss": 0.2746, "step": 19902 }, { "epoch": 1.6123622812702527, "grad_norm": 0.05712159350514412, "learning_rate": 0.0001326477339214186, "loss": 0.3001, "step": 19903 }, { "epoch": 1.6124432922877512, "grad_norm": 0.05771899223327637, "learning_rate": 0.00013264323326882399, "loss": 0.2807, "step": 19904 }, { "epoch": 1.6125243033052494, "grad_norm": 0.0562940388917923, "learning_rate": 0.00013263873261622937, "loss": 0.2796, "step": 19905 }, { "epoch": 1.612605314322748, "grad_norm": 0.054468363523483276, "learning_rate": 0.00013263423196363473, "loss": 0.2586, "step": 19906 }, { "epoch": 1.6126863253402464, "grad_norm": 0.05557860806584358, "learning_rate": 0.0001326297313110401, "loss": 0.2958, "step": 19907 }, { "epoch": 1.6127673363577446, "grad_norm": 0.04691595211625099, "learning_rate": 0.00013262523065844548, "loss": 0.2893, "step": 19908 }, { "epoch": 1.612848347375243, "grad_norm": 0.04499751329421997, "learning_rate": 0.00013262073000585084, "loss": 0.2617, "step": 19909 }, { "epoch": 1.6129293583927415, "grad_norm": 0.05714256316423416, "learning_rate": 0.00013261622935325623, "loss": 0.3022, "step": 19910 }, { "epoch": 1.6130103694102398, "grad_norm": 0.04927900433540344, "learning_rate": 0.00013261172870066161, "loss": 0.2652, "step": 19911 }, { "epoch": 1.613091380427738, "grad_norm": 0.05857254937291145, "learning_rate": 0.00013260722804806697, "loss": 0.28, "step": 19912 }, { "epoch": 1.6131723914452365, "grad_norm": 0.04966528341174126, "learning_rate": 0.00013260272739547233, "loss": 0.2972, "step": 19913 }, { "epoch": 1.613253402462735, "grad_norm": 0.0434284582734108, "learning_rate": 0.00013259822674287772, "loss": 0.2709, "step": 19914 }, { "epoch": 1.6133344134802332, "grad_norm": 0.0510571151971817, "learning_rate": 0.00013259372609028308, "loss": 0.2709, "step": 19915 }, { "epoch": 1.6134154244977317, "grad_norm": 0.04903542995452881, "learning_rate": 0.00013258922543768847, "loss": 0.2988, "step": 19916 }, { "epoch": 1.6134964355152301, "grad_norm": 0.047472693026065826, "learning_rate": 0.00013258472478509386, "loss": 0.2568, "step": 19917 }, { "epoch": 1.6135774465327284, "grad_norm": 0.04385467246174812, "learning_rate": 0.00013258022413249922, "loss": 0.2541, "step": 19918 }, { "epoch": 1.6136584575502269, "grad_norm": 0.05597635731101036, "learning_rate": 0.00013257572347990458, "loss": 0.2855, "step": 19919 }, { "epoch": 1.6137394685677253, "grad_norm": 0.04206670820713043, "learning_rate": 0.00013257122282730996, "loss": 0.2741, "step": 19920 }, { "epoch": 1.6138204795852236, "grad_norm": 0.04178616777062416, "learning_rate": 0.00013256672217471532, "loss": 0.2366, "step": 19921 }, { "epoch": 1.6139014906027218, "grad_norm": 0.046484146267175674, "learning_rate": 0.0001325622215221207, "loss": 0.248, "step": 19922 }, { "epoch": 1.6139825016202205, "grad_norm": 0.05964015796780586, "learning_rate": 0.0001325577208695261, "loss": 0.3048, "step": 19923 }, { "epoch": 1.6140635126377187, "grad_norm": 0.04161230847239494, "learning_rate": 0.00013255322021693146, "loss": 0.2623, "step": 19924 }, { "epoch": 1.614144523655217, "grad_norm": 0.05486117675900459, "learning_rate": 0.00013254871956433685, "loss": 0.2727, "step": 19925 }, { "epoch": 1.6142255346727155, "grad_norm": 0.053675271570682526, "learning_rate": 0.0001325442189117422, "loss": 0.2751, "step": 19926 }, { "epoch": 1.614306545690214, "grad_norm": 0.054313309490680695, "learning_rate": 0.00013253971825914757, "loss": 0.3051, "step": 19927 }, { "epoch": 1.6143875567077122, "grad_norm": 0.06386713683605194, "learning_rate": 0.00013253521760655295, "loss": 0.2903, "step": 19928 }, { "epoch": 1.6144685677252106, "grad_norm": 0.049881864339113235, "learning_rate": 0.00013253071695395834, "loss": 0.2439, "step": 19929 }, { "epoch": 1.614549578742709, "grad_norm": 0.05016208440065384, "learning_rate": 0.0001325262163013637, "loss": 0.2485, "step": 19930 }, { "epoch": 1.6146305897602073, "grad_norm": 0.055789947509765625, "learning_rate": 0.0001325217156487691, "loss": 0.2693, "step": 19931 }, { "epoch": 1.6147116007777058, "grad_norm": 0.04924854636192322, "learning_rate": 0.00013251721499617445, "loss": 0.2696, "step": 19932 }, { "epoch": 1.6147926117952043, "grad_norm": 0.052969809621572495, "learning_rate": 0.00013251271434357983, "loss": 0.2935, "step": 19933 }, { "epoch": 1.6148736228127025, "grad_norm": 0.046944133937358856, "learning_rate": 0.0001325082136909852, "loss": 0.2736, "step": 19934 }, { "epoch": 1.6149546338302008, "grad_norm": 0.05402558296918869, "learning_rate": 0.00013250371303839058, "loss": 0.2807, "step": 19935 }, { "epoch": 1.6150356448476992, "grad_norm": 0.05391369387507439, "learning_rate": 0.00013249921238579594, "loss": 0.3001, "step": 19936 }, { "epoch": 1.6151166558651977, "grad_norm": 0.04467498138546944, "learning_rate": 0.00013249471173320133, "loss": 0.2821, "step": 19937 }, { "epoch": 1.615197666882696, "grad_norm": 0.047838423401117325, "learning_rate": 0.0001324902110806067, "loss": 0.2755, "step": 19938 }, { "epoch": 1.6152786779001944, "grad_norm": 0.04848472401499748, "learning_rate": 0.00013248571042801208, "loss": 0.2691, "step": 19939 }, { "epoch": 1.6153596889176929, "grad_norm": 0.050038691610097885, "learning_rate": 0.00013248120977541744, "loss": 0.2876, "step": 19940 }, { "epoch": 1.6154406999351911, "grad_norm": 0.04588980972766876, "learning_rate": 0.00013247670912282282, "loss": 0.2601, "step": 19941 }, { "epoch": 1.6155217109526896, "grad_norm": 0.0561276376247406, "learning_rate": 0.00013247220847022818, "loss": 0.2882, "step": 19942 }, { "epoch": 1.615602721970188, "grad_norm": 0.05049389973282814, "learning_rate": 0.00013246770781763357, "loss": 0.2887, "step": 19943 }, { "epoch": 1.6156837329876863, "grad_norm": 0.045459870249032974, "learning_rate": 0.00013246320716503893, "loss": 0.2607, "step": 19944 }, { "epoch": 1.6157647440051845, "grad_norm": 0.06378405541181564, "learning_rate": 0.00013245870651244432, "loss": 0.2883, "step": 19945 }, { "epoch": 1.6158457550226832, "grad_norm": 0.042002785950899124, "learning_rate": 0.00013245420585984968, "loss": 0.245, "step": 19946 }, { "epoch": 1.6159267660401815, "grad_norm": 0.058416012674570084, "learning_rate": 0.00013244970520725506, "loss": 0.2586, "step": 19947 }, { "epoch": 1.6160077770576797, "grad_norm": 0.06198345497250557, "learning_rate": 0.00013244520455466042, "loss": 0.2756, "step": 19948 }, { "epoch": 1.6160887880751782, "grad_norm": 0.04815270006656647, "learning_rate": 0.0001324407039020658, "loss": 0.2747, "step": 19949 }, { "epoch": 1.6161697990926767, "grad_norm": 0.05506781488656998, "learning_rate": 0.00013243620324947117, "loss": 0.2645, "step": 19950 }, { "epoch": 1.616250810110175, "grad_norm": 0.053383927792310715, "learning_rate": 0.00013243170259687656, "loss": 0.2612, "step": 19951 }, { "epoch": 1.6163318211276734, "grad_norm": 0.05213800072669983, "learning_rate": 0.00013242720194428192, "loss": 0.3022, "step": 19952 }, { "epoch": 1.6164128321451718, "grad_norm": 0.05713577941060066, "learning_rate": 0.0001324227012916873, "loss": 0.272, "step": 19953 }, { "epoch": 1.61649384316267, "grad_norm": 0.06162557750940323, "learning_rate": 0.00013241820063909267, "loss": 0.2899, "step": 19954 }, { "epoch": 1.6165748541801686, "grad_norm": 0.0560946948826313, "learning_rate": 0.00013241369998649805, "loss": 0.2882, "step": 19955 }, { "epoch": 1.616655865197667, "grad_norm": 0.0490209124982357, "learning_rate": 0.00013240919933390344, "loss": 0.258, "step": 19956 }, { "epoch": 1.6167368762151653, "grad_norm": 0.05507722124457359, "learning_rate": 0.0001324046986813088, "loss": 0.3084, "step": 19957 }, { "epoch": 1.6168178872326635, "grad_norm": 0.05552484840154648, "learning_rate": 0.00013240019802871416, "loss": 0.2671, "step": 19958 }, { "epoch": 1.616898898250162, "grad_norm": 0.05078752711415291, "learning_rate": 0.00013239569737611955, "loss": 0.2699, "step": 19959 }, { "epoch": 1.6169799092676604, "grad_norm": 0.058282483369112015, "learning_rate": 0.0001323911967235249, "loss": 0.2638, "step": 19960 }, { "epoch": 1.6170609202851587, "grad_norm": 0.05486408621072769, "learning_rate": 0.0001323866960709303, "loss": 0.3054, "step": 19961 }, { "epoch": 1.6171419313026572, "grad_norm": 0.06649196892976761, "learning_rate": 0.00013238219541833568, "loss": 0.2942, "step": 19962 }, { "epoch": 1.6172229423201556, "grad_norm": 0.04470723867416382, "learning_rate": 0.00013237769476574104, "loss": 0.2525, "step": 19963 }, { "epoch": 1.6173039533376539, "grad_norm": 0.04812270402908325, "learning_rate": 0.0001323731941131464, "loss": 0.307, "step": 19964 }, { "epoch": 1.6173849643551523, "grad_norm": 0.04237162694334984, "learning_rate": 0.0001323686934605518, "loss": 0.2718, "step": 19965 }, { "epoch": 1.6174659753726508, "grad_norm": 0.05187264829874039, "learning_rate": 0.00013236419280795715, "loss": 0.2814, "step": 19966 }, { "epoch": 1.617546986390149, "grad_norm": 0.04562138766050339, "learning_rate": 0.00013235969215536254, "loss": 0.2613, "step": 19967 }, { "epoch": 1.6176279974076473, "grad_norm": 0.057726096361875534, "learning_rate": 0.00013235519150276792, "loss": 0.2808, "step": 19968 }, { "epoch": 1.617709008425146, "grad_norm": 0.04597414657473564, "learning_rate": 0.00013235069085017328, "loss": 0.2576, "step": 19969 }, { "epoch": 1.6177900194426442, "grad_norm": 0.0467311330139637, "learning_rate": 0.00013234619019757864, "loss": 0.2876, "step": 19970 }, { "epoch": 1.6178710304601425, "grad_norm": 0.05734841153025627, "learning_rate": 0.00013234168954498403, "loss": 0.2855, "step": 19971 }, { "epoch": 1.617952041477641, "grad_norm": 0.05373970791697502, "learning_rate": 0.0001323371888923894, "loss": 0.2681, "step": 19972 }, { "epoch": 1.6180330524951394, "grad_norm": 0.054225221276283264, "learning_rate": 0.00013233268823979478, "loss": 0.2718, "step": 19973 }, { "epoch": 1.6181140635126376, "grad_norm": 0.059341464191675186, "learning_rate": 0.00013232818758720017, "loss": 0.298, "step": 19974 }, { "epoch": 1.6181950745301361, "grad_norm": 0.05167774483561516, "learning_rate": 0.00013232368693460553, "loss": 0.2943, "step": 19975 }, { "epoch": 1.6182760855476346, "grad_norm": 0.04827655479311943, "learning_rate": 0.00013231918628201089, "loss": 0.3108, "step": 19976 }, { "epoch": 1.6183570965651328, "grad_norm": 0.058139413595199585, "learning_rate": 0.00013231468562941627, "loss": 0.3123, "step": 19977 }, { "epoch": 1.6184381075826313, "grad_norm": 0.053694140166044235, "learning_rate": 0.00013231018497682163, "loss": 0.2752, "step": 19978 }, { "epoch": 1.6185191186001298, "grad_norm": 0.06099880859255791, "learning_rate": 0.00013230568432422702, "loss": 0.3095, "step": 19979 }, { "epoch": 1.618600129617628, "grad_norm": 0.04789900779724121, "learning_rate": 0.0001323011836716324, "loss": 0.3085, "step": 19980 }, { "epoch": 1.6186811406351262, "grad_norm": 0.055885620415210724, "learning_rate": 0.00013229668301903777, "loss": 0.2826, "step": 19981 }, { "epoch": 1.6187621516526247, "grad_norm": 0.05799628049135208, "learning_rate": 0.00013229218236644313, "loss": 0.3371, "step": 19982 }, { "epoch": 1.6188431626701232, "grad_norm": 0.06020451337099075, "learning_rate": 0.00013228768171384851, "loss": 0.3252, "step": 19983 }, { "epoch": 1.6189241736876214, "grad_norm": 0.04801985248923302, "learning_rate": 0.00013228318106125387, "loss": 0.2793, "step": 19984 }, { "epoch": 1.61900518470512, "grad_norm": 0.055071376264095306, "learning_rate": 0.00013227868040865926, "loss": 0.2328, "step": 19985 }, { "epoch": 1.6190861957226184, "grad_norm": 0.050464339554309845, "learning_rate": 0.00013227417975606465, "loss": 0.3128, "step": 19986 }, { "epoch": 1.6191672067401166, "grad_norm": 0.048161376267671585, "learning_rate": 0.00013226967910347, "loss": 0.2921, "step": 19987 }, { "epoch": 1.619248217757615, "grad_norm": 0.049764443188905716, "learning_rate": 0.00013226517845087537, "loss": 0.2977, "step": 19988 }, { "epoch": 1.6193292287751135, "grad_norm": 0.04596319422125816, "learning_rate": 0.00013226067779828076, "loss": 0.2582, "step": 19989 }, { "epoch": 1.6194102397926118, "grad_norm": 0.048334065824747086, "learning_rate": 0.00013225617714568612, "loss": 0.2833, "step": 19990 }, { "epoch": 1.61949125081011, "grad_norm": 0.046483226120471954, "learning_rate": 0.0001322516764930915, "loss": 0.2516, "step": 19991 }, { "epoch": 1.6195722618276087, "grad_norm": 0.05041767284274101, "learning_rate": 0.0001322471758404969, "loss": 0.2894, "step": 19992 }, { "epoch": 1.619653272845107, "grad_norm": 0.06127709522843361, "learning_rate": 0.00013224267518790225, "loss": 0.2809, "step": 19993 }, { "epoch": 1.6197342838626052, "grad_norm": 0.04631965979933739, "learning_rate": 0.00013223817453530764, "loss": 0.2726, "step": 19994 }, { "epoch": 1.6198152948801037, "grad_norm": 0.04897136241197586, "learning_rate": 0.000132233673882713, "loss": 0.2937, "step": 19995 }, { "epoch": 1.6198963058976021, "grad_norm": 0.045244332402944565, "learning_rate": 0.00013222917323011836, "loss": 0.2655, "step": 19996 }, { "epoch": 1.6199773169151004, "grad_norm": 0.054183993488550186, "learning_rate": 0.00013222467257752374, "loss": 0.3365, "step": 19997 }, { "epoch": 1.6200583279325989, "grad_norm": 0.047290198504924774, "learning_rate": 0.00013222017192492913, "loss": 0.2607, "step": 19998 }, { "epoch": 1.6201393389500973, "grad_norm": 0.06739253550767899, "learning_rate": 0.0001322156712723345, "loss": 0.3229, "step": 19999 }, { "epoch": 1.6202203499675956, "grad_norm": 0.05656042322516441, "learning_rate": 0.00013221117061973988, "loss": 0.2965, "step": 20000 }, { "epoch": 1.6203013609850938, "grad_norm": 0.05531647056341171, "learning_rate": 0.00013220666996714524, "loss": 0.2771, "step": 20001 }, { "epoch": 1.6203823720025925, "grad_norm": 0.04169746860861778, "learning_rate": 0.0001322021693145506, "loss": 0.2543, "step": 20002 }, { "epoch": 1.6204633830200907, "grad_norm": 0.05271003022789955, "learning_rate": 0.00013219766866195599, "loss": 0.3237, "step": 20003 }, { "epoch": 1.620544394037589, "grad_norm": 0.04714246466755867, "learning_rate": 0.00013219316800936137, "loss": 0.2578, "step": 20004 }, { "epoch": 1.6206254050550875, "grad_norm": 0.04622301459312439, "learning_rate": 0.00013218866735676673, "loss": 0.2473, "step": 20005 }, { "epoch": 1.620706416072586, "grad_norm": 0.06228908896446228, "learning_rate": 0.00013218416670417212, "loss": 0.2811, "step": 20006 }, { "epoch": 1.6207874270900842, "grad_norm": 0.04656405746936798, "learning_rate": 0.00013217966605157748, "loss": 0.2645, "step": 20007 }, { "epoch": 1.6208684381075826, "grad_norm": 0.04981234669685364, "learning_rate": 0.00013217516539898287, "loss": 0.3067, "step": 20008 }, { "epoch": 1.620949449125081, "grad_norm": 0.059026286005973816, "learning_rate": 0.00013217066474638823, "loss": 0.2659, "step": 20009 }, { "epoch": 1.6210304601425793, "grad_norm": 0.05904344469308853, "learning_rate": 0.00013216616409379362, "loss": 0.2942, "step": 20010 }, { "epoch": 1.6211114711600778, "grad_norm": 0.057300008833408356, "learning_rate": 0.00013216166344119898, "loss": 0.363, "step": 20011 }, { "epoch": 1.6211924821775763, "grad_norm": 0.04654074087738991, "learning_rate": 0.00013215716278860436, "loss": 0.2622, "step": 20012 }, { "epoch": 1.6212734931950745, "grad_norm": 0.04676036536693573, "learning_rate": 0.00013215266213600972, "loss": 0.2864, "step": 20013 }, { "epoch": 1.6213545042125728, "grad_norm": 0.05033322051167488, "learning_rate": 0.0001321481614834151, "loss": 0.2451, "step": 20014 }, { "epoch": 1.6214355152300715, "grad_norm": 0.05171377584338188, "learning_rate": 0.00013214366083082047, "loss": 0.2608, "step": 20015 }, { "epoch": 1.6215165262475697, "grad_norm": 0.0688476637005806, "learning_rate": 0.00013213916017822586, "loss": 0.3154, "step": 20016 }, { "epoch": 1.621597537265068, "grad_norm": 0.06028394401073456, "learning_rate": 0.00013213465952563122, "loss": 0.2567, "step": 20017 }, { "epoch": 1.6216785482825664, "grad_norm": 0.05227206274867058, "learning_rate": 0.0001321301588730366, "loss": 0.2897, "step": 20018 }, { "epoch": 1.6217595593000649, "grad_norm": 0.06116931885480881, "learning_rate": 0.00013212565822044196, "loss": 0.2995, "step": 20019 }, { "epoch": 1.6218405703175631, "grad_norm": 0.06551968306303024, "learning_rate": 0.00013212115756784735, "loss": 0.3206, "step": 20020 }, { "epoch": 1.6219215813350616, "grad_norm": 0.052293311804533005, "learning_rate": 0.0001321166569152527, "loss": 0.269, "step": 20021 }, { "epoch": 1.62200259235256, "grad_norm": 0.04668476805090904, "learning_rate": 0.0001321121562626581, "loss": 0.2801, "step": 20022 }, { "epoch": 1.6220836033700583, "grad_norm": 0.044271472841501236, "learning_rate": 0.00013210765561006346, "loss": 0.2416, "step": 20023 }, { "epoch": 1.6221646143875565, "grad_norm": 0.04560382664203644, "learning_rate": 0.00013210315495746885, "loss": 0.256, "step": 20024 }, { "epoch": 1.6222456254050552, "grad_norm": 0.06311310082674026, "learning_rate": 0.0001320986543048742, "loss": 0.3124, "step": 20025 }, { "epoch": 1.6223266364225535, "grad_norm": 0.05169247090816498, "learning_rate": 0.0001320941536522796, "loss": 0.2653, "step": 20026 }, { "epoch": 1.6224076474400517, "grad_norm": 0.05335776507854462, "learning_rate": 0.00013208965299968495, "loss": 0.2366, "step": 20027 }, { "epoch": 1.6224886584575502, "grad_norm": 0.06243913248181343, "learning_rate": 0.00013208515234709034, "loss": 0.3162, "step": 20028 }, { "epoch": 1.6225696694750487, "grad_norm": 0.04636109620332718, "learning_rate": 0.0001320806516944957, "loss": 0.2772, "step": 20029 }, { "epoch": 1.622650680492547, "grad_norm": 0.042730703949928284, "learning_rate": 0.0001320761510419011, "loss": 0.2821, "step": 20030 }, { "epoch": 1.6227316915100454, "grad_norm": 0.05869164690375328, "learning_rate": 0.00013207165038930645, "loss": 0.2794, "step": 20031 }, { "epoch": 1.6228127025275438, "grad_norm": 0.05897437408566475, "learning_rate": 0.00013206714973671183, "loss": 0.2435, "step": 20032 }, { "epoch": 1.622893713545042, "grad_norm": 0.04350670427083969, "learning_rate": 0.0001320626490841172, "loss": 0.2421, "step": 20033 }, { "epoch": 1.6229747245625405, "grad_norm": 0.05823887139558792, "learning_rate": 0.00013205814843152258, "loss": 0.2929, "step": 20034 }, { "epoch": 1.623055735580039, "grad_norm": 0.0536017045378685, "learning_rate": 0.00013205364777892794, "loss": 0.2449, "step": 20035 }, { "epoch": 1.6231367465975373, "grad_norm": 0.05177351087331772, "learning_rate": 0.00013204914712633333, "loss": 0.2554, "step": 20036 }, { "epoch": 1.6232177576150355, "grad_norm": 0.04570000618696213, "learning_rate": 0.00013204464647373872, "loss": 0.2591, "step": 20037 }, { "epoch": 1.623298768632534, "grad_norm": 0.046484798192977905, "learning_rate": 0.00013204014582114408, "loss": 0.258, "step": 20038 }, { "epoch": 1.6233797796500324, "grad_norm": 0.057637009769678116, "learning_rate": 0.00013203564516854944, "loss": 0.2751, "step": 20039 }, { "epoch": 1.6234607906675307, "grad_norm": 0.05245767906308174, "learning_rate": 0.00013203114451595482, "loss": 0.2826, "step": 20040 }, { "epoch": 1.6235418016850292, "grad_norm": 0.047266315668821335, "learning_rate": 0.00013202664386336018, "loss": 0.2923, "step": 20041 }, { "epoch": 1.6236228127025276, "grad_norm": 0.051065824925899506, "learning_rate": 0.00013202214321076557, "loss": 0.3063, "step": 20042 }, { "epoch": 1.6237038237200259, "grad_norm": 0.046526215970516205, "learning_rate": 0.00013201764255817096, "loss": 0.2932, "step": 20043 }, { "epoch": 1.6237848347375243, "grad_norm": 0.04801933839917183, "learning_rate": 0.00013201314190557632, "loss": 0.2875, "step": 20044 }, { "epoch": 1.6238658457550228, "grad_norm": 0.050072457641363144, "learning_rate": 0.00013200864125298168, "loss": 0.2944, "step": 20045 }, { "epoch": 1.623946856772521, "grad_norm": 0.06301888823509216, "learning_rate": 0.00013200414060038706, "loss": 0.28, "step": 20046 }, { "epoch": 1.6240278677900193, "grad_norm": 0.05195078253746033, "learning_rate": 0.00013199963994779242, "loss": 0.2753, "step": 20047 }, { "epoch": 1.624108878807518, "grad_norm": 0.05736146122217178, "learning_rate": 0.0001319951392951978, "loss": 0.2927, "step": 20048 }, { "epoch": 1.6241898898250162, "grad_norm": 0.046378325670957565, "learning_rate": 0.0001319906386426032, "loss": 0.2689, "step": 20049 }, { "epoch": 1.6242709008425145, "grad_norm": 0.046528127044439316, "learning_rate": 0.00013198613799000856, "loss": 0.2705, "step": 20050 }, { "epoch": 1.624351911860013, "grad_norm": 0.06076221540570259, "learning_rate": 0.00013198163733741392, "loss": 0.296, "step": 20051 }, { "epoch": 1.6244329228775114, "grad_norm": 0.060062017291784286, "learning_rate": 0.0001319771366848193, "loss": 0.2803, "step": 20052 }, { "epoch": 1.6245139338950096, "grad_norm": 0.05728905275464058, "learning_rate": 0.00013197263603222467, "loss": 0.2699, "step": 20053 }, { "epoch": 1.624594944912508, "grad_norm": 0.047682132571935654, "learning_rate": 0.00013196813537963005, "loss": 0.3047, "step": 20054 }, { "epoch": 1.6246759559300066, "grad_norm": 0.05280459299683571, "learning_rate": 0.00013196363472703544, "loss": 0.291, "step": 20055 }, { "epoch": 1.6247569669475048, "grad_norm": 0.06270458549261093, "learning_rate": 0.0001319591340744408, "loss": 0.333, "step": 20056 }, { "epoch": 1.6248379779650033, "grad_norm": 0.056303154677152634, "learning_rate": 0.00013195463342184616, "loss": 0.2765, "step": 20057 }, { "epoch": 1.6249189889825018, "grad_norm": 0.046801600605249405, "learning_rate": 0.00013195013276925155, "loss": 0.2613, "step": 20058 }, { "epoch": 1.625, "grad_norm": 0.0449674017727375, "learning_rate": 0.0001319456321166569, "loss": 0.3219, "step": 20059 }, { "epoch": 1.6250810110174982, "grad_norm": 0.05680710822343826, "learning_rate": 0.0001319411314640623, "loss": 0.3005, "step": 20060 }, { "epoch": 1.6251620220349967, "grad_norm": 0.043305426836013794, "learning_rate": 0.00013193663081146768, "loss": 0.2581, "step": 20061 }, { "epoch": 1.6252430330524952, "grad_norm": 0.05441425368189812, "learning_rate": 0.00013193213015887304, "loss": 0.3147, "step": 20062 }, { "epoch": 1.6253240440699934, "grad_norm": 0.05140216648578644, "learning_rate": 0.00013192762950627843, "loss": 0.2828, "step": 20063 }, { "epoch": 1.625405055087492, "grad_norm": 0.06091989949345589, "learning_rate": 0.0001319231288536838, "loss": 0.3423, "step": 20064 }, { "epoch": 1.6254860661049904, "grad_norm": 0.05574041232466698, "learning_rate": 0.00013191862820108915, "loss": 0.2845, "step": 20065 }, { "epoch": 1.6255670771224886, "grad_norm": 0.049806464463472366, "learning_rate": 0.00013191412754849454, "loss": 0.2564, "step": 20066 }, { "epoch": 1.625648088139987, "grad_norm": 0.05343364179134369, "learning_rate": 0.00013190962689589992, "loss": 0.2985, "step": 20067 }, { "epoch": 1.6257290991574855, "grad_norm": 0.04434232786297798, "learning_rate": 0.00013190512624330528, "loss": 0.2855, "step": 20068 }, { "epoch": 1.6258101101749838, "grad_norm": 0.05006261169910431, "learning_rate": 0.00013190062559071067, "loss": 0.2845, "step": 20069 }, { "epoch": 1.625891121192482, "grad_norm": 0.05992110073566437, "learning_rate": 0.00013189612493811603, "loss": 0.2911, "step": 20070 }, { "epoch": 1.6259721322099807, "grad_norm": 0.05126938223838806, "learning_rate": 0.0001318916242855214, "loss": 0.3202, "step": 20071 }, { "epoch": 1.626053143227479, "grad_norm": 0.05510419234633446, "learning_rate": 0.00013188712363292678, "loss": 0.3291, "step": 20072 }, { "epoch": 1.6261341542449772, "grad_norm": 0.05981983244419098, "learning_rate": 0.00013188262298033217, "loss": 0.2688, "step": 20073 }, { "epoch": 1.6262151652624757, "grad_norm": 0.045334115624427795, "learning_rate": 0.00013187812232773753, "loss": 0.257, "step": 20074 }, { "epoch": 1.6262961762799741, "grad_norm": 0.060123879462480545, "learning_rate": 0.0001318736216751429, "loss": 0.308, "step": 20075 }, { "epoch": 1.6263771872974724, "grad_norm": 0.055499814450740814, "learning_rate": 0.00013186912102254827, "loss": 0.2708, "step": 20076 }, { "epoch": 1.6264581983149708, "grad_norm": 0.06412205100059509, "learning_rate": 0.00013186462036995363, "loss": 0.3135, "step": 20077 }, { "epoch": 1.6265392093324693, "grad_norm": 0.05298837646842003, "learning_rate": 0.00013186011971735902, "loss": 0.2762, "step": 20078 }, { "epoch": 1.6266202203499676, "grad_norm": 0.05011403560638428, "learning_rate": 0.0001318556190647644, "loss": 0.2516, "step": 20079 }, { "epoch": 1.626701231367466, "grad_norm": 0.060437921434640884, "learning_rate": 0.00013185111841216977, "loss": 0.3206, "step": 20080 }, { "epoch": 1.6267822423849645, "grad_norm": 0.04542511701583862, "learning_rate": 0.00013184661775957515, "loss": 0.2584, "step": 20081 }, { "epoch": 1.6268632534024627, "grad_norm": 0.059091124683618546, "learning_rate": 0.00013184211710698051, "loss": 0.2532, "step": 20082 }, { "epoch": 1.626944264419961, "grad_norm": 0.04803014174103737, "learning_rate": 0.00013183761645438587, "loss": 0.2554, "step": 20083 }, { "epoch": 1.6270252754374595, "grad_norm": 0.056689485907554626, "learning_rate": 0.00013183311580179126, "loss": 0.29, "step": 20084 }, { "epoch": 1.627106286454958, "grad_norm": 0.05078176409006119, "learning_rate": 0.00013182861514919665, "loss": 0.229, "step": 20085 }, { "epoch": 1.6271872974724562, "grad_norm": 0.0545271635055542, "learning_rate": 0.000131824114496602, "loss": 0.2475, "step": 20086 }, { "epoch": 1.6272683084899546, "grad_norm": 0.052732549607753754, "learning_rate": 0.0001318196138440074, "loss": 0.307, "step": 20087 }, { "epoch": 1.627349319507453, "grad_norm": 0.05406218767166138, "learning_rate": 0.00013181511319141276, "loss": 0.2839, "step": 20088 }, { "epoch": 1.6274303305249513, "grad_norm": 0.05025879293680191, "learning_rate": 0.00013181061253881814, "loss": 0.2795, "step": 20089 }, { "epoch": 1.6275113415424498, "grad_norm": 0.04761442542076111, "learning_rate": 0.0001318061118862235, "loss": 0.266, "step": 20090 }, { "epoch": 1.6275923525599483, "grad_norm": 0.06434717029333115, "learning_rate": 0.0001318016112336289, "loss": 0.303, "step": 20091 }, { "epoch": 1.6276733635774465, "grad_norm": 0.06810448318719864, "learning_rate": 0.00013179711058103425, "loss": 0.3327, "step": 20092 }, { "epoch": 1.6277543745949448, "grad_norm": 0.04770027473568916, "learning_rate": 0.00013179260992843964, "loss": 0.2729, "step": 20093 }, { "epoch": 1.6278353856124435, "grad_norm": 0.056478384882211685, "learning_rate": 0.000131788109275845, "loss": 0.2728, "step": 20094 }, { "epoch": 1.6279163966299417, "grad_norm": 0.04458216577768326, "learning_rate": 0.00013178360862325038, "loss": 0.2847, "step": 20095 }, { "epoch": 1.62799740764744, "grad_norm": 0.04367813467979431, "learning_rate": 0.00013177910797065574, "loss": 0.2731, "step": 20096 }, { "epoch": 1.6280784186649384, "grad_norm": 0.05187131091952324, "learning_rate": 0.00013177460731806113, "loss": 0.2605, "step": 20097 }, { "epoch": 1.6281594296824369, "grad_norm": 0.05495690554380417, "learning_rate": 0.0001317701066654665, "loss": 0.2637, "step": 20098 }, { "epoch": 1.6282404406999351, "grad_norm": 0.055277954787015915, "learning_rate": 0.00013176560601287188, "loss": 0.2953, "step": 20099 }, { "epoch": 1.6283214517174336, "grad_norm": 0.04511606693267822, "learning_rate": 0.00013176110536027724, "loss": 0.2593, "step": 20100 }, { "epoch": 1.628402462734932, "grad_norm": 0.05307050421833992, "learning_rate": 0.00013175660470768263, "loss": 0.2926, "step": 20101 }, { "epoch": 1.6284834737524303, "grad_norm": 0.057604528963565826, "learning_rate": 0.000131752104055088, "loss": 0.2901, "step": 20102 }, { "epoch": 1.6285644847699285, "grad_norm": 0.06348736584186554, "learning_rate": 0.00013174760340249337, "loss": 0.2771, "step": 20103 }, { "epoch": 1.6286454957874272, "grad_norm": 0.04991592839360237, "learning_rate": 0.00013174310274989873, "loss": 0.2991, "step": 20104 }, { "epoch": 1.6287265068049255, "grad_norm": 0.058820828795433044, "learning_rate": 0.00013173860209730412, "loss": 0.2914, "step": 20105 }, { "epoch": 1.6288075178224237, "grad_norm": 0.05092411860823631, "learning_rate": 0.00013173410144470948, "loss": 0.2716, "step": 20106 }, { "epoch": 1.6288885288399222, "grad_norm": 0.05166938900947571, "learning_rate": 0.00013172960079211487, "loss": 0.2684, "step": 20107 }, { "epoch": 1.6289695398574207, "grad_norm": 0.0568322129547596, "learning_rate": 0.00013172510013952023, "loss": 0.2474, "step": 20108 }, { "epoch": 1.629050550874919, "grad_norm": 0.05410854145884514, "learning_rate": 0.00013172059948692562, "loss": 0.3125, "step": 20109 }, { "epoch": 1.6291315618924174, "grad_norm": 0.05433070659637451, "learning_rate": 0.00013171609883433098, "loss": 0.2614, "step": 20110 }, { "epoch": 1.6292125729099158, "grad_norm": 0.05375772714614868, "learning_rate": 0.00013171159818173636, "loss": 0.2543, "step": 20111 }, { "epoch": 1.629293583927414, "grad_norm": 0.05799659341573715, "learning_rate": 0.00013170709752914172, "loss": 0.3079, "step": 20112 }, { "epoch": 1.6293745949449125, "grad_norm": 0.0654786229133606, "learning_rate": 0.0001317025968765471, "loss": 0.3204, "step": 20113 }, { "epoch": 1.629455605962411, "grad_norm": 0.059415362775325775, "learning_rate": 0.00013169809622395247, "loss": 0.2798, "step": 20114 }, { "epoch": 1.6295366169799093, "grad_norm": 0.0585675872862339, "learning_rate": 0.00013169359557135786, "loss": 0.2913, "step": 20115 }, { "epoch": 1.6296176279974075, "grad_norm": 0.05293847620487213, "learning_rate": 0.00013168909491876322, "loss": 0.2866, "step": 20116 }, { "epoch": 1.6296986390149062, "grad_norm": 0.05343810096383095, "learning_rate": 0.0001316845942661686, "loss": 0.2786, "step": 20117 }, { "epoch": 1.6297796500324044, "grad_norm": 0.05106763541698456, "learning_rate": 0.000131680093613574, "loss": 0.2988, "step": 20118 }, { "epoch": 1.6298606610499027, "grad_norm": 0.054290104657411575, "learning_rate": 0.00013167559296097935, "loss": 0.316, "step": 20119 }, { "epoch": 1.6299416720674011, "grad_norm": 0.05953530967235565, "learning_rate": 0.0001316710923083847, "loss": 0.2736, "step": 20120 }, { "epoch": 1.6300226830848996, "grad_norm": 0.05168008804321289, "learning_rate": 0.0001316665916557901, "loss": 0.3099, "step": 20121 }, { "epoch": 1.6301036941023979, "grad_norm": 0.04760206490755081, "learning_rate": 0.00013166209100319546, "loss": 0.2531, "step": 20122 }, { "epoch": 1.6301847051198963, "grad_norm": 0.05584317818284035, "learning_rate": 0.00013165759035060085, "loss": 0.2728, "step": 20123 }, { "epoch": 1.6302657161373948, "grad_norm": 0.05340051278471947, "learning_rate": 0.00013165308969800623, "loss": 0.2668, "step": 20124 }, { "epoch": 1.630346727154893, "grad_norm": 0.04660770669579506, "learning_rate": 0.0001316485890454116, "loss": 0.224, "step": 20125 }, { "epoch": 1.6304277381723913, "grad_norm": 0.05097239837050438, "learning_rate": 0.00013164408839281695, "loss": 0.2676, "step": 20126 }, { "epoch": 1.63050874918989, "grad_norm": 0.04739508777856827, "learning_rate": 0.00013163958774022234, "loss": 0.2661, "step": 20127 }, { "epoch": 1.6305897602073882, "grad_norm": 0.0610460564494133, "learning_rate": 0.0001316350870876277, "loss": 0.266, "step": 20128 }, { "epoch": 1.6306707712248865, "grad_norm": 0.05964686721563339, "learning_rate": 0.0001316305864350331, "loss": 0.2859, "step": 20129 }, { "epoch": 1.630751782242385, "grad_norm": 0.04790445789694786, "learning_rate": 0.00013162608578243847, "loss": 0.2922, "step": 20130 }, { "epoch": 1.6308327932598834, "grad_norm": 0.06071170046925545, "learning_rate": 0.00013162158512984383, "loss": 0.2737, "step": 20131 }, { "epoch": 1.6309138042773816, "grad_norm": 0.05310392007231712, "learning_rate": 0.00013161708447724922, "loss": 0.2589, "step": 20132 }, { "epoch": 1.63099481529488, "grad_norm": 0.05222730338573456, "learning_rate": 0.00013161258382465458, "loss": 0.2686, "step": 20133 }, { "epoch": 1.6310758263123786, "grad_norm": 0.04885387793183327, "learning_rate": 0.00013160808317205994, "loss": 0.2792, "step": 20134 }, { "epoch": 1.6311568373298768, "grad_norm": 0.042033977806568146, "learning_rate": 0.00013160358251946533, "loss": 0.2782, "step": 20135 }, { "epoch": 1.6312378483473753, "grad_norm": 0.0582573227584362, "learning_rate": 0.00013159908186687072, "loss": 0.3166, "step": 20136 }, { "epoch": 1.6313188593648738, "grad_norm": 0.056182943284511566, "learning_rate": 0.00013159458121427608, "loss": 0.296, "step": 20137 }, { "epoch": 1.631399870382372, "grad_norm": 0.06435632705688477, "learning_rate": 0.00013159008056168146, "loss": 0.2847, "step": 20138 }, { "epoch": 1.6314808813998702, "grad_norm": 0.055193573236465454, "learning_rate": 0.00013158557990908682, "loss": 0.3055, "step": 20139 }, { "epoch": 1.6315618924173687, "grad_norm": 0.05577018857002258, "learning_rate": 0.00013158107925649218, "loss": 0.2911, "step": 20140 }, { "epoch": 1.6316429034348672, "grad_norm": 0.04244306683540344, "learning_rate": 0.00013157657860389757, "loss": 0.253, "step": 20141 }, { "epoch": 1.6317239144523654, "grad_norm": 0.04200892895460129, "learning_rate": 0.00013157207795130296, "loss": 0.251, "step": 20142 }, { "epoch": 1.6318049254698639, "grad_norm": 0.05072421580553055, "learning_rate": 0.00013156757729870832, "loss": 0.2786, "step": 20143 }, { "epoch": 1.6318859364873624, "grad_norm": 0.044243331998586655, "learning_rate": 0.0001315630766461137, "loss": 0.2764, "step": 20144 }, { "epoch": 1.6319669475048606, "grad_norm": 0.04605749621987343, "learning_rate": 0.00013155857599351907, "loss": 0.255, "step": 20145 }, { "epoch": 1.632047958522359, "grad_norm": 0.052316538989543915, "learning_rate": 0.00013155407534092443, "loss": 0.2871, "step": 20146 }, { "epoch": 1.6321289695398575, "grad_norm": 0.0470651313662529, "learning_rate": 0.0001315495746883298, "loss": 0.2831, "step": 20147 }, { "epoch": 1.6322099805573558, "grad_norm": 0.053917501121759415, "learning_rate": 0.0001315450740357352, "loss": 0.3259, "step": 20148 }, { "epoch": 1.632290991574854, "grad_norm": 0.06069952994585037, "learning_rate": 0.00013154057338314056, "loss": 0.2908, "step": 20149 }, { "epoch": 1.6323720025923527, "grad_norm": 0.04950543865561485, "learning_rate": 0.00013153607273054595, "loss": 0.3107, "step": 20150 }, { "epoch": 1.632453013609851, "grad_norm": 0.05352965369820595, "learning_rate": 0.0001315315720779513, "loss": 0.2724, "step": 20151 }, { "epoch": 1.6325340246273492, "grad_norm": 0.0512455552816391, "learning_rate": 0.00013152707142535667, "loss": 0.2733, "step": 20152 }, { "epoch": 1.6326150356448477, "grad_norm": 0.046733301132917404, "learning_rate": 0.00013152257077276205, "loss": 0.2442, "step": 20153 }, { "epoch": 1.6326960466623461, "grad_norm": 0.0573970265686512, "learning_rate": 0.00013151807012016744, "loss": 0.2914, "step": 20154 }, { "epoch": 1.6327770576798444, "grad_norm": 0.049615781754255295, "learning_rate": 0.0001315135694675728, "loss": 0.2836, "step": 20155 }, { "epoch": 1.6328580686973428, "grad_norm": 0.05010844022035599, "learning_rate": 0.0001315090688149782, "loss": 0.2526, "step": 20156 }, { "epoch": 1.6329390797148413, "grad_norm": 0.05412207543849945, "learning_rate": 0.00013150456816238355, "loss": 0.2725, "step": 20157 }, { "epoch": 1.6330200907323396, "grad_norm": 0.055283691734075546, "learning_rate": 0.0001315000675097889, "loss": 0.3277, "step": 20158 }, { "epoch": 1.633101101749838, "grad_norm": 0.058319512754678726, "learning_rate": 0.0001314955668571943, "loss": 0.2579, "step": 20159 }, { "epoch": 1.6331821127673365, "grad_norm": 0.05270714685320854, "learning_rate": 0.00013149106620459968, "loss": 0.294, "step": 20160 }, { "epoch": 1.6332631237848347, "grad_norm": 0.05729326605796814, "learning_rate": 0.00013148656555200504, "loss": 0.2738, "step": 20161 }, { "epoch": 1.633344134802333, "grad_norm": 0.04980797320604324, "learning_rate": 0.00013148206489941043, "loss": 0.2803, "step": 20162 }, { "epoch": 1.6334251458198314, "grad_norm": 0.059038545936346054, "learning_rate": 0.0001314775642468158, "loss": 0.2587, "step": 20163 }, { "epoch": 1.63350615683733, "grad_norm": 0.04804708808660507, "learning_rate": 0.00013147306359422115, "loss": 0.2635, "step": 20164 }, { "epoch": 1.6335871678548282, "grad_norm": 0.05786438658833504, "learning_rate": 0.00013146856294162654, "loss": 0.3201, "step": 20165 }, { "epoch": 1.6336681788723266, "grad_norm": 0.05506935343146324, "learning_rate": 0.00013146406228903192, "loss": 0.2825, "step": 20166 }, { "epoch": 1.633749189889825, "grad_norm": 0.04705440625548363, "learning_rate": 0.00013145956163643728, "loss": 0.2446, "step": 20167 }, { "epoch": 1.6338302009073233, "grad_norm": 0.05770174041390419, "learning_rate": 0.00013145506098384267, "loss": 0.2974, "step": 20168 }, { "epoch": 1.6339112119248218, "grad_norm": 0.049085672944784164, "learning_rate": 0.00013145056033124803, "loss": 0.2468, "step": 20169 }, { "epoch": 1.6339922229423203, "grad_norm": 0.053372543305158615, "learning_rate": 0.00013144605967865342, "loss": 0.303, "step": 20170 }, { "epoch": 1.6340732339598185, "grad_norm": 0.06096953526139259, "learning_rate": 0.00013144155902605878, "loss": 0.2802, "step": 20171 }, { "epoch": 1.6341542449773168, "grad_norm": 0.047225162386894226, "learning_rate": 0.00013143705837346417, "loss": 0.2732, "step": 20172 }, { "epoch": 1.6342352559948155, "grad_norm": 0.06632104516029358, "learning_rate": 0.00013143255772086953, "loss": 0.3016, "step": 20173 }, { "epoch": 1.6343162670123137, "grad_norm": 0.05281345173716545, "learning_rate": 0.0001314280570682749, "loss": 0.2807, "step": 20174 }, { "epoch": 1.634397278029812, "grad_norm": 0.048660047352313995, "learning_rate": 0.00013142355641568027, "loss": 0.2648, "step": 20175 }, { "epoch": 1.6344782890473104, "grad_norm": 0.050357669591903687, "learning_rate": 0.00013141905576308566, "loss": 0.2743, "step": 20176 }, { "epoch": 1.6345593000648089, "grad_norm": 0.059288132935762405, "learning_rate": 0.00013141455511049102, "loss": 0.3192, "step": 20177 }, { "epoch": 1.6346403110823071, "grad_norm": 0.04744809865951538, "learning_rate": 0.0001314100544578964, "loss": 0.246, "step": 20178 }, { "epoch": 1.6347213220998056, "grad_norm": 0.0480501614511013, "learning_rate": 0.00013140555380530177, "loss": 0.2612, "step": 20179 }, { "epoch": 1.634802333117304, "grad_norm": 0.04682370275259018, "learning_rate": 0.00013140105315270715, "loss": 0.2773, "step": 20180 }, { "epoch": 1.6348833441348023, "grad_norm": 0.04360784590244293, "learning_rate": 0.00013139655250011251, "loss": 0.2636, "step": 20181 }, { "epoch": 1.6349643551523008, "grad_norm": 0.051908232271671295, "learning_rate": 0.0001313920518475179, "loss": 0.3092, "step": 20182 }, { "epoch": 1.6350453661697992, "grad_norm": 0.051509421318769455, "learning_rate": 0.00013138755119492326, "loss": 0.2803, "step": 20183 }, { "epoch": 1.6351263771872975, "grad_norm": 0.05230452120304108, "learning_rate": 0.00013138305054232865, "loss": 0.3067, "step": 20184 }, { "epoch": 1.6352073882047957, "grad_norm": 0.043822553008794785, "learning_rate": 0.000131378549889734, "loss": 0.2649, "step": 20185 }, { "epoch": 1.6352883992222942, "grad_norm": 0.058286089450120926, "learning_rate": 0.0001313740492371394, "loss": 0.2641, "step": 20186 }, { "epoch": 1.6353694102397927, "grad_norm": 0.051882125437259674, "learning_rate": 0.00013136954858454476, "loss": 0.2829, "step": 20187 }, { "epoch": 1.635450421257291, "grad_norm": 0.055661335587501526, "learning_rate": 0.00013136504793195014, "loss": 0.3405, "step": 20188 }, { "epoch": 1.6355314322747894, "grad_norm": 0.05822267755866051, "learning_rate": 0.0001313605472793555, "loss": 0.2748, "step": 20189 }, { "epoch": 1.6356124432922878, "grad_norm": 0.04890811815857887, "learning_rate": 0.0001313560466267609, "loss": 0.2865, "step": 20190 }, { "epoch": 1.635693454309786, "grad_norm": 0.04698796942830086, "learning_rate": 0.00013135154597416625, "loss": 0.2644, "step": 20191 }, { "epoch": 1.6357744653272845, "grad_norm": 0.04925640672445297, "learning_rate": 0.00013134704532157164, "loss": 0.3161, "step": 20192 }, { "epoch": 1.635855476344783, "grad_norm": 0.0510898195207119, "learning_rate": 0.00013134254466897703, "loss": 0.2785, "step": 20193 }, { "epoch": 1.6359364873622813, "grad_norm": 0.050242744386196136, "learning_rate": 0.00013133804401638239, "loss": 0.2656, "step": 20194 }, { "epoch": 1.6360174983797795, "grad_norm": 0.04777007922530174, "learning_rate": 0.00013133354336378777, "loss": 0.3204, "step": 20195 }, { "epoch": 1.6360985093972782, "grad_norm": 0.05214406177401543, "learning_rate": 0.00013132904271119313, "loss": 0.2745, "step": 20196 }, { "epoch": 1.6361795204147764, "grad_norm": 0.04864123463630676, "learning_rate": 0.0001313245420585985, "loss": 0.2579, "step": 20197 }, { "epoch": 1.6362605314322747, "grad_norm": 0.05020648613572121, "learning_rate": 0.00013132004140600388, "loss": 0.2676, "step": 20198 }, { "epoch": 1.6363415424497731, "grad_norm": 0.056893881410360336, "learning_rate": 0.00013131554075340927, "loss": 0.2787, "step": 20199 }, { "epoch": 1.6364225534672716, "grad_norm": 0.05220147222280502, "learning_rate": 0.00013131104010081463, "loss": 0.2912, "step": 20200 }, { "epoch": 1.6365035644847699, "grad_norm": 0.054814036935567856, "learning_rate": 0.00013130653944822001, "loss": 0.279, "step": 20201 }, { "epoch": 1.6365845755022683, "grad_norm": 0.04547103866934776, "learning_rate": 0.00013130203879562537, "loss": 0.2545, "step": 20202 }, { "epoch": 1.6366655865197668, "grad_norm": 0.05352579429745674, "learning_rate": 0.00013129753814303073, "loss": 0.3117, "step": 20203 }, { "epoch": 1.636746597537265, "grad_norm": 0.058115895837545395, "learning_rate": 0.00013129303749043612, "loss": 0.2895, "step": 20204 }, { "epoch": 1.6368276085547635, "grad_norm": 0.04940159246325493, "learning_rate": 0.0001312885368378415, "loss": 0.2506, "step": 20205 }, { "epoch": 1.636908619572262, "grad_norm": 0.04255246743559837, "learning_rate": 0.00013128403618524687, "loss": 0.2632, "step": 20206 }, { "epoch": 1.6369896305897602, "grad_norm": 0.053756408393383026, "learning_rate": 0.00013127953553265226, "loss": 0.2918, "step": 20207 }, { "epoch": 1.6370706416072585, "grad_norm": 0.05477878823876381, "learning_rate": 0.00013127503488005762, "loss": 0.3305, "step": 20208 }, { "epoch": 1.637151652624757, "grad_norm": 0.04882469028234482, "learning_rate": 0.00013127053422746298, "loss": 0.229, "step": 20209 }, { "epoch": 1.6372326636422554, "grad_norm": 0.049400970339775085, "learning_rate": 0.00013126603357486836, "loss": 0.2977, "step": 20210 }, { "epoch": 1.6373136746597536, "grad_norm": 0.05772639438509941, "learning_rate": 0.00013126153292227375, "loss": 0.3003, "step": 20211 }, { "epoch": 1.637394685677252, "grad_norm": 0.04149458929896355, "learning_rate": 0.0001312570322696791, "loss": 0.223, "step": 20212 }, { "epoch": 1.6374756966947506, "grad_norm": 0.047275107353925705, "learning_rate": 0.0001312525316170845, "loss": 0.2717, "step": 20213 }, { "epoch": 1.6375567077122488, "grad_norm": 0.04378741979598999, "learning_rate": 0.00013124803096448986, "loss": 0.2655, "step": 20214 }, { "epoch": 1.6376377187297473, "grad_norm": 0.04953139275312424, "learning_rate": 0.00013124353031189522, "loss": 0.267, "step": 20215 }, { "epoch": 1.6377187297472457, "grad_norm": 0.04930621385574341, "learning_rate": 0.0001312390296593006, "loss": 0.2901, "step": 20216 }, { "epoch": 1.637799740764744, "grad_norm": 0.056873247027397156, "learning_rate": 0.000131234529006706, "loss": 0.287, "step": 20217 }, { "epoch": 1.6378807517822422, "grad_norm": 0.06545663625001907, "learning_rate": 0.00013123002835411135, "loss": 0.2924, "step": 20218 }, { "epoch": 1.637961762799741, "grad_norm": 0.050055429339408875, "learning_rate": 0.00013122552770151674, "loss": 0.2541, "step": 20219 }, { "epoch": 1.6380427738172392, "grad_norm": 0.05391817167401314, "learning_rate": 0.0001312210270489221, "loss": 0.2993, "step": 20220 }, { "epoch": 1.6381237848347374, "grad_norm": 0.060411565005779266, "learning_rate": 0.00013121652639632746, "loss": 0.2711, "step": 20221 }, { "epoch": 1.6382047958522359, "grad_norm": 0.05216376855969429, "learning_rate": 0.00013121202574373285, "loss": 0.2893, "step": 20222 }, { "epoch": 1.6382858068697344, "grad_norm": 0.058760929852724075, "learning_rate": 0.00013120752509113823, "loss": 0.3197, "step": 20223 }, { "epoch": 1.6383668178872326, "grad_norm": 0.05227070674300194, "learning_rate": 0.0001312030244385436, "loss": 0.2757, "step": 20224 }, { "epoch": 1.638447828904731, "grad_norm": 0.059265658259391785, "learning_rate": 0.00013119852378594898, "loss": 0.3337, "step": 20225 }, { "epoch": 1.6385288399222295, "grad_norm": 0.0466594323515892, "learning_rate": 0.00013119402313335434, "loss": 0.2669, "step": 20226 }, { "epoch": 1.6386098509397278, "grad_norm": 0.05499688908457756, "learning_rate": 0.0001311895224807597, "loss": 0.2749, "step": 20227 }, { "epoch": 1.638690861957226, "grad_norm": 0.05702508985996246, "learning_rate": 0.0001311850218281651, "loss": 0.3247, "step": 20228 }, { "epoch": 1.6387718729747247, "grad_norm": 0.06296319514513016, "learning_rate": 0.00013118052117557047, "loss": 0.3166, "step": 20229 }, { "epoch": 1.638852883992223, "grad_norm": 0.04582956060767174, "learning_rate": 0.00013117602052297583, "loss": 0.2367, "step": 20230 }, { "epoch": 1.6389338950097212, "grad_norm": 0.0633278340101242, "learning_rate": 0.00013117151987038122, "loss": 0.2992, "step": 20231 }, { "epoch": 1.6390149060272197, "grad_norm": 0.05358787998557091, "learning_rate": 0.00013116701921778658, "loss": 0.3292, "step": 20232 }, { "epoch": 1.6390959170447181, "grad_norm": 0.040785521268844604, "learning_rate": 0.00013116251856519194, "loss": 0.2723, "step": 20233 }, { "epoch": 1.6391769280622164, "grad_norm": 0.049127183854579926, "learning_rate": 0.00013115801791259733, "loss": 0.2379, "step": 20234 }, { "epoch": 1.6392579390797148, "grad_norm": 0.05054868385195732, "learning_rate": 0.00013115351726000272, "loss": 0.2747, "step": 20235 }, { "epoch": 1.6393389500972133, "grad_norm": 0.05272538959980011, "learning_rate": 0.00013114901660740808, "loss": 0.2752, "step": 20236 }, { "epoch": 1.6394199611147116, "grad_norm": 0.04339519515633583, "learning_rate": 0.00013114451595481346, "loss": 0.2689, "step": 20237 }, { "epoch": 1.63950097213221, "grad_norm": 0.04754365608096123, "learning_rate": 0.00013114001530221882, "loss": 0.2559, "step": 20238 }, { "epoch": 1.6395819831497085, "grad_norm": 0.058999720960855484, "learning_rate": 0.00013113551464962418, "loss": 0.2714, "step": 20239 }, { "epoch": 1.6396629941672067, "grad_norm": 0.05868087708950043, "learning_rate": 0.00013113101399702957, "loss": 0.2717, "step": 20240 }, { "epoch": 1.639744005184705, "grad_norm": 0.049035605043172836, "learning_rate": 0.00013112651334443496, "loss": 0.2686, "step": 20241 }, { "epoch": 1.6398250162022034, "grad_norm": 0.051105234771966934, "learning_rate": 0.00013112201269184032, "loss": 0.2869, "step": 20242 }, { "epoch": 1.639906027219702, "grad_norm": 0.052198246121406555, "learning_rate": 0.0001311175120392457, "loss": 0.2844, "step": 20243 }, { "epoch": 1.6399870382372002, "grad_norm": 0.05529845878481865, "learning_rate": 0.00013111301138665107, "loss": 0.2922, "step": 20244 }, { "epoch": 1.6400680492546986, "grad_norm": 0.057615384459495544, "learning_rate": 0.00013110851073405643, "loss": 0.281, "step": 20245 }, { "epoch": 1.640149060272197, "grad_norm": 0.05965334177017212, "learning_rate": 0.0001311040100814618, "loss": 0.3059, "step": 20246 }, { "epoch": 1.6402300712896953, "grad_norm": 0.05527438223361969, "learning_rate": 0.0001310995094288672, "loss": 0.2756, "step": 20247 }, { "epoch": 1.6403110823071938, "grad_norm": 0.0498809777200222, "learning_rate": 0.00013109500877627256, "loss": 0.2556, "step": 20248 }, { "epoch": 1.6403920933246923, "grad_norm": 0.0581757053732872, "learning_rate": 0.00013109050812367795, "loss": 0.3066, "step": 20249 }, { "epoch": 1.6404731043421905, "grad_norm": 0.05269348621368408, "learning_rate": 0.0001310860074710833, "loss": 0.2849, "step": 20250 }, { "epoch": 1.6405541153596888, "grad_norm": 0.05979086831212044, "learning_rate": 0.0001310815068184887, "loss": 0.2938, "step": 20251 }, { "epoch": 1.6406351263771874, "grad_norm": 0.04561556130647659, "learning_rate": 0.00013107700616589405, "loss": 0.2656, "step": 20252 }, { "epoch": 1.6407161373946857, "grad_norm": 0.06042017787694931, "learning_rate": 0.00013107250551329944, "loss": 0.3043, "step": 20253 }, { "epoch": 1.640797148412184, "grad_norm": 0.047696713358163834, "learning_rate": 0.0001310680048607048, "loss": 0.272, "step": 20254 }, { "epoch": 1.6408781594296824, "grad_norm": 0.052089713513851166, "learning_rate": 0.0001310635042081102, "loss": 0.3056, "step": 20255 }, { "epoch": 1.6409591704471809, "grad_norm": 0.05706511810421944, "learning_rate": 0.00013105900355551555, "loss": 0.2903, "step": 20256 }, { "epoch": 1.6410401814646791, "grad_norm": 0.0495259165763855, "learning_rate": 0.00013105450290292094, "loss": 0.2448, "step": 20257 }, { "epoch": 1.6411211924821776, "grad_norm": 0.04729805141687393, "learning_rate": 0.0001310500022503263, "loss": 0.2734, "step": 20258 }, { "epoch": 1.641202203499676, "grad_norm": 0.057043250650167465, "learning_rate": 0.00013104550159773168, "loss": 0.2919, "step": 20259 }, { "epoch": 1.6412832145171743, "grad_norm": 0.05842726677656174, "learning_rate": 0.00013104100094513704, "loss": 0.3415, "step": 20260 }, { "epoch": 1.6413642255346728, "grad_norm": 0.048161424696445465, "learning_rate": 0.00013103650029254243, "loss": 0.2791, "step": 20261 }, { "epoch": 1.6414452365521712, "grad_norm": 0.045487869530916214, "learning_rate": 0.0001310319996399478, "loss": 0.2468, "step": 20262 }, { "epoch": 1.6415262475696695, "grad_norm": 0.05410471558570862, "learning_rate": 0.00013102749898735318, "loss": 0.2646, "step": 20263 }, { "epoch": 1.6416072585871677, "grad_norm": 0.04716672748327255, "learning_rate": 0.00013102299833475856, "loss": 0.2815, "step": 20264 }, { "epoch": 1.6416882696046662, "grad_norm": 0.0537717230618, "learning_rate": 0.00013101849768216392, "loss": 0.3024, "step": 20265 }, { "epoch": 1.6417692806221647, "grad_norm": 0.04909708723425865, "learning_rate": 0.00013101399702956928, "loss": 0.2653, "step": 20266 }, { "epoch": 1.641850291639663, "grad_norm": 0.05597676336765289, "learning_rate": 0.00013100949637697467, "loss": 0.2867, "step": 20267 }, { "epoch": 1.6419313026571614, "grad_norm": 0.05452214181423187, "learning_rate": 0.00013100499572438003, "loss": 0.2578, "step": 20268 }, { "epoch": 1.6420123136746598, "grad_norm": 0.05111711472272873, "learning_rate": 0.00013100049507178542, "loss": 0.2473, "step": 20269 }, { "epoch": 1.642093324692158, "grad_norm": 0.059686530381441116, "learning_rate": 0.0001309959944191908, "loss": 0.2795, "step": 20270 }, { "epoch": 1.6421743357096565, "grad_norm": 0.047529637813568115, "learning_rate": 0.00013099149376659617, "loss": 0.2571, "step": 20271 }, { "epoch": 1.642255346727155, "grad_norm": 0.05156063660979271, "learning_rate": 0.00013098699311400153, "loss": 0.287, "step": 20272 }, { "epoch": 1.6423363577446533, "grad_norm": 0.05362094193696976, "learning_rate": 0.0001309824924614069, "loss": 0.2528, "step": 20273 }, { "epoch": 1.6424173687621515, "grad_norm": 0.05864579230546951, "learning_rate": 0.0001309779918088123, "loss": 0.2787, "step": 20274 }, { "epoch": 1.6424983797796502, "grad_norm": 0.04949674382805824, "learning_rate": 0.00013097349115621766, "loss": 0.228, "step": 20275 }, { "epoch": 1.6425793907971484, "grad_norm": 0.0509091354906559, "learning_rate": 0.00013096899050362305, "loss": 0.2809, "step": 20276 }, { "epoch": 1.6426604018146467, "grad_norm": 0.058638475835323334, "learning_rate": 0.0001309644898510284, "loss": 0.332, "step": 20277 }, { "epoch": 1.6427414128321451, "grad_norm": 0.049978744238615036, "learning_rate": 0.00013095998919843377, "loss": 0.2658, "step": 20278 }, { "epoch": 1.6428224238496436, "grad_norm": 0.07554782927036285, "learning_rate": 0.00013095548854583916, "loss": 0.3293, "step": 20279 }, { "epoch": 1.6429034348671419, "grad_norm": 0.06077819690108299, "learning_rate": 0.00013095098789324454, "loss": 0.2941, "step": 20280 }, { "epoch": 1.6429844458846403, "grad_norm": 0.060247037559747696, "learning_rate": 0.0001309464872406499, "loss": 0.314, "step": 20281 }, { "epoch": 1.6430654569021388, "grad_norm": 0.055557847023010254, "learning_rate": 0.0001309419865880553, "loss": 0.3071, "step": 20282 }, { "epoch": 1.643146467919637, "grad_norm": 0.05224480479955673, "learning_rate": 0.00013093748593546065, "loss": 0.2662, "step": 20283 }, { "epoch": 1.6432274789371355, "grad_norm": 0.0507444366812706, "learning_rate": 0.000130932985282866, "loss": 0.2676, "step": 20284 }, { "epoch": 1.643308489954634, "grad_norm": 0.052977025508880615, "learning_rate": 0.0001309284846302714, "loss": 0.2647, "step": 20285 }, { "epoch": 1.6433895009721322, "grad_norm": 0.05128326639533043, "learning_rate": 0.00013092398397767678, "loss": 0.2753, "step": 20286 }, { "epoch": 1.6434705119896305, "grad_norm": 0.048690736293792725, "learning_rate": 0.00013091948332508214, "loss": 0.2403, "step": 20287 }, { "epoch": 1.643551523007129, "grad_norm": 0.05496395751833916, "learning_rate": 0.00013091498267248753, "loss": 0.2716, "step": 20288 }, { "epoch": 1.6436325340246274, "grad_norm": 0.056154485791921616, "learning_rate": 0.0001309104820198929, "loss": 0.309, "step": 20289 }, { "epoch": 1.6437135450421256, "grad_norm": 0.04780879244208336, "learning_rate": 0.00013090598136729825, "loss": 0.2867, "step": 20290 }, { "epoch": 1.643794556059624, "grad_norm": 0.05079265311360359, "learning_rate": 0.00013090148071470364, "loss": 0.2678, "step": 20291 }, { "epoch": 1.6438755670771226, "grad_norm": 0.05555180087685585, "learning_rate": 0.00013089698006210903, "loss": 0.2979, "step": 20292 }, { "epoch": 1.6439565780946208, "grad_norm": 0.05565644055604935, "learning_rate": 0.00013089247940951439, "loss": 0.2806, "step": 20293 }, { "epoch": 1.6440375891121193, "grad_norm": 0.06228690221905708, "learning_rate": 0.00013088797875691977, "loss": 0.3504, "step": 20294 }, { "epoch": 1.6441186001296177, "grad_norm": 0.06760898977518082, "learning_rate": 0.00013088347810432513, "loss": 0.377, "step": 20295 }, { "epoch": 1.644199611147116, "grad_norm": 0.05215907469391823, "learning_rate": 0.0001308789774517305, "loss": 0.3102, "step": 20296 }, { "epoch": 1.6442806221646142, "grad_norm": 0.059776682406663895, "learning_rate": 0.00013087447679913588, "loss": 0.324, "step": 20297 }, { "epoch": 1.644361633182113, "grad_norm": 0.04926168546080589, "learning_rate": 0.00013086997614654127, "loss": 0.2834, "step": 20298 }, { "epoch": 1.6444426441996112, "grad_norm": 0.05643089860677719, "learning_rate": 0.00013086547549394663, "loss": 0.2819, "step": 20299 }, { "epoch": 1.6445236552171094, "grad_norm": 0.0547204464673996, "learning_rate": 0.00013086097484135201, "loss": 0.3061, "step": 20300 }, { "epoch": 1.6446046662346079, "grad_norm": 0.04373621568083763, "learning_rate": 0.00013085647418875737, "loss": 0.2539, "step": 20301 }, { "epoch": 1.6446856772521063, "grad_norm": 0.06481010466814041, "learning_rate": 0.00013085197353616273, "loss": 0.2944, "step": 20302 }, { "epoch": 1.6447666882696046, "grad_norm": 0.047319646924734116, "learning_rate": 0.00013084747288356812, "loss": 0.2712, "step": 20303 }, { "epoch": 1.644847699287103, "grad_norm": 0.04775365814566612, "learning_rate": 0.0001308429722309735, "loss": 0.2608, "step": 20304 }, { "epoch": 1.6449287103046015, "grad_norm": 0.08077125251293182, "learning_rate": 0.00013083847157837887, "loss": 0.2927, "step": 20305 }, { "epoch": 1.6450097213220998, "grad_norm": 0.05693648383021355, "learning_rate": 0.00013083397092578426, "loss": 0.3129, "step": 20306 }, { "epoch": 1.6450907323395982, "grad_norm": 0.06557469815015793, "learning_rate": 0.00013082947027318962, "loss": 0.2719, "step": 20307 }, { "epoch": 1.6451717433570967, "grad_norm": 0.047028761357069016, "learning_rate": 0.00013082496962059498, "loss": 0.2601, "step": 20308 }, { "epoch": 1.645252754374595, "grad_norm": 0.05751359462738037, "learning_rate": 0.00013082046896800036, "loss": 0.3182, "step": 20309 }, { "epoch": 1.6453337653920932, "grad_norm": 0.044193051755428314, "learning_rate": 0.00013081596831540575, "loss": 0.2716, "step": 20310 }, { "epoch": 1.6454147764095917, "grad_norm": 0.05198691040277481, "learning_rate": 0.0001308114676628111, "loss": 0.2334, "step": 20311 }, { "epoch": 1.6454957874270901, "grad_norm": 0.04755130782723427, "learning_rate": 0.0001308069670102165, "loss": 0.2808, "step": 20312 }, { "epoch": 1.6455767984445884, "grad_norm": 0.051487136632204056, "learning_rate": 0.00013080246635762186, "loss": 0.2478, "step": 20313 }, { "epoch": 1.6456578094620868, "grad_norm": 0.0572948083281517, "learning_rate": 0.00013079796570502722, "loss": 0.2927, "step": 20314 }, { "epoch": 1.6457388204795853, "grad_norm": 0.060806773602962494, "learning_rate": 0.0001307934650524326, "loss": 0.2897, "step": 20315 }, { "epoch": 1.6458198314970836, "grad_norm": 0.05558852106332779, "learning_rate": 0.000130788964399838, "loss": 0.3071, "step": 20316 }, { "epoch": 1.645900842514582, "grad_norm": 0.042439140379428864, "learning_rate": 0.00013078446374724335, "loss": 0.2648, "step": 20317 }, { "epoch": 1.6459818535320805, "grad_norm": 0.04296138882637024, "learning_rate": 0.00013077996309464874, "loss": 0.2554, "step": 20318 }, { "epoch": 1.6460628645495787, "grad_norm": 0.060558926314115524, "learning_rate": 0.0001307754624420541, "loss": 0.2917, "step": 20319 }, { "epoch": 1.646143875567077, "grad_norm": 0.058144014328718185, "learning_rate": 0.00013077096178945946, "loss": 0.2486, "step": 20320 }, { "epoch": 1.6462248865845757, "grad_norm": 0.05100826546549797, "learning_rate": 0.00013076646113686485, "loss": 0.2704, "step": 20321 }, { "epoch": 1.646305897602074, "grad_norm": 0.0650191381573677, "learning_rate": 0.00013076196048427023, "loss": 0.3276, "step": 20322 }, { "epoch": 1.6463869086195722, "grad_norm": 0.0722268596291542, "learning_rate": 0.0001307574598316756, "loss": 0.2804, "step": 20323 }, { "epoch": 1.6464679196370706, "grad_norm": 0.06000415235757828, "learning_rate": 0.00013075295917908098, "loss": 0.2547, "step": 20324 }, { "epoch": 1.646548930654569, "grad_norm": 0.06664841622114182, "learning_rate": 0.00013074845852648634, "loss": 0.3037, "step": 20325 }, { "epoch": 1.6466299416720673, "grad_norm": 0.06420580297708511, "learning_rate": 0.00013074395787389173, "loss": 0.2886, "step": 20326 }, { "epoch": 1.6467109526895658, "grad_norm": 0.05521161109209061, "learning_rate": 0.0001307394572212971, "loss": 0.3071, "step": 20327 }, { "epoch": 1.6467919637070643, "grad_norm": 0.05648175626993179, "learning_rate": 0.00013073495656870248, "loss": 0.279, "step": 20328 }, { "epoch": 1.6468729747245625, "grad_norm": 0.055062033236026764, "learning_rate": 0.00013073045591610784, "loss": 0.289, "step": 20329 }, { "epoch": 1.6469539857420608, "grad_norm": 0.04393686354160309, "learning_rate": 0.00013072595526351322, "loss": 0.2747, "step": 20330 }, { "epoch": 1.6470349967595594, "grad_norm": 0.06465456634759903, "learning_rate": 0.00013072145461091858, "loss": 0.3107, "step": 20331 }, { "epoch": 1.6471160077770577, "grad_norm": 0.048442304134368896, "learning_rate": 0.00013071695395832397, "loss": 0.3138, "step": 20332 }, { "epoch": 1.647197018794556, "grad_norm": 0.04944649338722229, "learning_rate": 0.00013071245330572936, "loss": 0.2602, "step": 20333 }, { "epoch": 1.6472780298120544, "grad_norm": 0.05407331883907318, "learning_rate": 0.00013070795265313472, "loss": 0.3204, "step": 20334 }, { "epoch": 1.6473590408295529, "grad_norm": 0.04247947037220001, "learning_rate": 0.00013070345200054008, "loss": 0.2753, "step": 20335 }, { "epoch": 1.6474400518470511, "grad_norm": 0.055948950350284576, "learning_rate": 0.00013069895134794546, "loss": 0.3221, "step": 20336 }, { "epoch": 1.6475210628645496, "grad_norm": 0.04786483943462372, "learning_rate": 0.00013069445069535082, "loss": 0.2718, "step": 20337 }, { "epoch": 1.647602073882048, "grad_norm": 0.05264323949813843, "learning_rate": 0.0001306899500427562, "loss": 0.293, "step": 20338 }, { "epoch": 1.6476830848995463, "grad_norm": 0.0563749223947525, "learning_rate": 0.0001306854493901616, "loss": 0.3238, "step": 20339 }, { "epoch": 1.6477640959170448, "grad_norm": 0.05304846167564392, "learning_rate": 0.00013068094873756696, "loss": 0.2754, "step": 20340 }, { "epoch": 1.6478451069345432, "grad_norm": 0.04724569618701935, "learning_rate": 0.00013067644808497232, "loss": 0.2655, "step": 20341 }, { "epoch": 1.6479261179520415, "grad_norm": 0.05889305844902992, "learning_rate": 0.0001306719474323777, "loss": 0.2864, "step": 20342 }, { "epoch": 1.6480071289695397, "grad_norm": 0.05639440193772316, "learning_rate": 0.00013066744677978307, "loss": 0.2899, "step": 20343 }, { "epoch": 1.6480881399870384, "grad_norm": 0.05589323863387108, "learning_rate": 0.00013066294612718845, "loss": 0.287, "step": 20344 }, { "epoch": 1.6481691510045366, "grad_norm": 0.04943550378084183, "learning_rate": 0.00013065844547459384, "loss": 0.2824, "step": 20345 }, { "epoch": 1.648250162022035, "grad_norm": 0.05775272101163864, "learning_rate": 0.0001306539448219992, "loss": 0.2698, "step": 20346 }, { "epoch": 1.6483311730395334, "grad_norm": 0.05911261960864067, "learning_rate": 0.00013064944416940456, "loss": 0.3123, "step": 20347 }, { "epoch": 1.6484121840570318, "grad_norm": 0.05397322401404381, "learning_rate": 0.00013064494351680995, "loss": 0.2795, "step": 20348 }, { "epoch": 1.64849319507453, "grad_norm": 0.04608829692006111, "learning_rate": 0.0001306404428642153, "loss": 0.3049, "step": 20349 }, { "epoch": 1.6485742060920285, "grad_norm": 0.051778387278318405, "learning_rate": 0.0001306359422116207, "loss": 0.2885, "step": 20350 }, { "epoch": 1.648655217109527, "grad_norm": 0.04542142152786255, "learning_rate": 0.00013063144155902608, "loss": 0.2235, "step": 20351 }, { "epoch": 1.6487362281270252, "grad_norm": 0.04840834438800812, "learning_rate": 0.00013062694090643144, "loss": 0.2946, "step": 20352 }, { "epoch": 1.6488172391445235, "grad_norm": 0.05071964114904404, "learning_rate": 0.0001306224402538368, "loss": 0.271, "step": 20353 }, { "epoch": 1.6488982501620222, "grad_norm": 0.05756606161594391, "learning_rate": 0.0001306179396012422, "loss": 0.2519, "step": 20354 }, { "epoch": 1.6489792611795204, "grad_norm": 0.05126466229557991, "learning_rate": 0.00013061343894864758, "loss": 0.2839, "step": 20355 }, { "epoch": 1.6490602721970187, "grad_norm": 0.06566730886697769, "learning_rate": 0.00013060893829605294, "loss": 0.3015, "step": 20356 }, { "epoch": 1.6491412832145171, "grad_norm": 0.06146741658449173, "learning_rate": 0.00013060443764345832, "loss": 0.2913, "step": 20357 }, { "epoch": 1.6492222942320156, "grad_norm": 0.04742487892508507, "learning_rate": 0.00013059993699086368, "loss": 0.2492, "step": 20358 }, { "epoch": 1.6493033052495139, "grad_norm": 0.051782794296741486, "learning_rate": 0.00013059543633826904, "loss": 0.299, "step": 20359 }, { "epoch": 1.6493843162670123, "grad_norm": 0.056378141045570374, "learning_rate": 0.00013059093568567443, "loss": 0.2448, "step": 20360 }, { "epoch": 1.6494653272845108, "grad_norm": 0.06598246842622757, "learning_rate": 0.00013058643503307982, "loss": 0.3373, "step": 20361 }, { "epoch": 1.649546338302009, "grad_norm": 0.05629661679267883, "learning_rate": 0.00013058193438048518, "loss": 0.2929, "step": 20362 }, { "epoch": 1.6496273493195075, "grad_norm": 0.05046766623854637, "learning_rate": 0.00013057743372789056, "loss": 0.2524, "step": 20363 }, { "epoch": 1.649708360337006, "grad_norm": 0.0475756861269474, "learning_rate": 0.00013057293307529592, "loss": 0.2779, "step": 20364 }, { "epoch": 1.6497893713545042, "grad_norm": 0.045517344027757645, "learning_rate": 0.00013056843242270128, "loss": 0.2772, "step": 20365 }, { "epoch": 1.6498703823720025, "grad_norm": 0.054667480289936066, "learning_rate": 0.00013056393177010667, "loss": 0.2758, "step": 20366 }, { "epoch": 1.649951393389501, "grad_norm": 0.04701853170990944, "learning_rate": 0.00013055943111751206, "loss": 0.2884, "step": 20367 }, { "epoch": 1.6500324044069994, "grad_norm": 0.049440670758485794, "learning_rate": 0.00013055493046491742, "loss": 0.2548, "step": 20368 }, { "epoch": 1.6501134154244976, "grad_norm": 0.05815757438540459, "learning_rate": 0.0001305504298123228, "loss": 0.2529, "step": 20369 }, { "epoch": 1.650194426441996, "grad_norm": 0.049093883484601974, "learning_rate": 0.00013054592915972817, "loss": 0.3112, "step": 20370 }, { "epoch": 1.6502754374594946, "grad_norm": 0.05279753729701042, "learning_rate": 0.00013054142850713353, "loss": 0.2654, "step": 20371 }, { "epoch": 1.6503564484769928, "grad_norm": 0.04632333666086197, "learning_rate": 0.00013053692785453891, "loss": 0.2417, "step": 20372 }, { "epoch": 1.6504374594944913, "grad_norm": 0.043918710201978683, "learning_rate": 0.0001305324272019443, "loss": 0.2442, "step": 20373 }, { "epoch": 1.6505184705119897, "grad_norm": 0.04563755542039871, "learning_rate": 0.00013052792654934966, "loss": 0.2821, "step": 20374 }, { "epoch": 1.650599481529488, "grad_norm": 0.05067136138677597, "learning_rate": 0.00013052342589675505, "loss": 0.236, "step": 20375 }, { "epoch": 1.6506804925469862, "grad_norm": 0.05287901684641838, "learning_rate": 0.0001305189252441604, "loss": 0.2115, "step": 20376 }, { "epoch": 1.650761503564485, "grad_norm": 0.04865317791700363, "learning_rate": 0.00013051442459156577, "loss": 0.2497, "step": 20377 }, { "epoch": 1.6508425145819832, "grad_norm": 0.059927087277173996, "learning_rate": 0.00013050992393897116, "loss": 0.2981, "step": 20378 }, { "epoch": 1.6509235255994814, "grad_norm": 0.044385023415088654, "learning_rate": 0.00013050542328637654, "loss": 0.2481, "step": 20379 }, { "epoch": 1.6510045366169799, "grad_norm": 0.05484599247574806, "learning_rate": 0.0001305009226337819, "loss": 0.2793, "step": 20380 }, { "epoch": 1.6510855476344783, "grad_norm": 0.053661175072193146, "learning_rate": 0.0001304964219811873, "loss": 0.2613, "step": 20381 }, { "epoch": 1.6511665586519766, "grad_norm": 0.050644651055336, "learning_rate": 0.00013049192132859265, "loss": 0.2869, "step": 20382 }, { "epoch": 1.651247569669475, "grad_norm": 0.0510164275765419, "learning_rate": 0.000130487420675998, "loss": 0.2726, "step": 20383 }, { "epoch": 1.6513285806869735, "grad_norm": 0.05315467342734337, "learning_rate": 0.0001304829200234034, "loss": 0.2751, "step": 20384 }, { "epoch": 1.6514095917044718, "grad_norm": 0.05707908421754837, "learning_rate": 0.00013047841937080878, "loss": 0.2527, "step": 20385 }, { "epoch": 1.6514906027219702, "grad_norm": 0.05162080377340317, "learning_rate": 0.00013047391871821414, "loss": 0.2513, "step": 20386 }, { "epoch": 1.6515716137394687, "grad_norm": 0.06012650206685066, "learning_rate": 0.00013046941806561953, "loss": 0.2579, "step": 20387 }, { "epoch": 1.651652624756967, "grad_norm": 0.05233855918049812, "learning_rate": 0.0001304649174130249, "loss": 0.2584, "step": 20388 }, { "epoch": 1.6517336357744652, "grad_norm": 0.06599611043930054, "learning_rate": 0.00013046041676043025, "loss": 0.2751, "step": 20389 }, { "epoch": 1.6518146467919637, "grad_norm": 0.07051976770162582, "learning_rate": 0.00013045591610783564, "loss": 0.3284, "step": 20390 }, { "epoch": 1.6518956578094621, "grad_norm": 0.052024535834789276, "learning_rate": 0.00013045141545524103, "loss": 0.291, "step": 20391 }, { "epoch": 1.6519766688269604, "grad_norm": 0.05041825398802757, "learning_rate": 0.00013044691480264639, "loss": 0.2656, "step": 20392 }, { "epoch": 1.6520576798444588, "grad_norm": 0.059028733521699905, "learning_rate": 0.00013044241415005177, "loss": 0.3197, "step": 20393 }, { "epoch": 1.6521386908619573, "grad_norm": 0.06829122453927994, "learning_rate": 0.00013043791349745713, "loss": 0.2976, "step": 20394 }, { "epoch": 1.6522197018794555, "grad_norm": 0.050032202154397964, "learning_rate": 0.0001304334128448625, "loss": 0.2844, "step": 20395 }, { "epoch": 1.652300712896954, "grad_norm": 0.05230595916509628, "learning_rate": 0.00013042891219226788, "loss": 0.2435, "step": 20396 }, { "epoch": 1.6523817239144525, "grad_norm": 0.048366427421569824, "learning_rate": 0.00013042441153967327, "loss": 0.3016, "step": 20397 }, { "epoch": 1.6524627349319507, "grad_norm": 0.05470036715269089, "learning_rate": 0.00013041991088707863, "loss": 0.2688, "step": 20398 }, { "epoch": 1.652543745949449, "grad_norm": 0.048787783831357956, "learning_rate": 0.00013041541023448401, "loss": 0.3097, "step": 20399 }, { "epoch": 1.6526247569669477, "grad_norm": 0.0499483160674572, "learning_rate": 0.00013041090958188937, "loss": 0.2896, "step": 20400 }, { "epoch": 1.652705767984446, "grad_norm": 0.05546579509973526, "learning_rate": 0.00013040640892929473, "loss": 0.2932, "step": 20401 }, { "epoch": 1.6527867790019442, "grad_norm": 0.05673975497484207, "learning_rate": 0.00013040190827670015, "loss": 0.3236, "step": 20402 }, { "epoch": 1.6528677900194426, "grad_norm": 0.05085534229874611, "learning_rate": 0.0001303974076241055, "loss": 0.2284, "step": 20403 }, { "epoch": 1.652948801036941, "grad_norm": 0.05804755911231041, "learning_rate": 0.00013039290697151087, "loss": 0.3285, "step": 20404 }, { "epoch": 1.6530298120544393, "grad_norm": 0.05439860746264458, "learning_rate": 0.00013038840631891626, "loss": 0.3477, "step": 20405 }, { "epoch": 1.6531108230719378, "grad_norm": 0.04965033009648323, "learning_rate": 0.00013038390566632162, "loss": 0.266, "step": 20406 }, { "epoch": 1.6531918340894363, "grad_norm": 0.05848463624715805, "learning_rate": 0.000130379405013727, "loss": 0.3379, "step": 20407 }, { "epoch": 1.6532728451069345, "grad_norm": 0.05859372764825821, "learning_rate": 0.0001303749043611324, "loss": 0.2968, "step": 20408 }, { "epoch": 1.653353856124433, "grad_norm": 0.05018116161227226, "learning_rate": 0.00013037040370853775, "loss": 0.2596, "step": 20409 }, { "epoch": 1.6534348671419314, "grad_norm": 0.046670421957969666, "learning_rate": 0.0001303659030559431, "loss": 0.2622, "step": 20410 }, { "epoch": 1.6535158781594297, "grad_norm": 0.0573502741754055, "learning_rate": 0.0001303614024033485, "loss": 0.2577, "step": 20411 }, { "epoch": 1.653596889176928, "grad_norm": 0.05708976462483406, "learning_rate": 0.00013035690175075386, "loss": 0.2701, "step": 20412 }, { "epoch": 1.6536779001944264, "grad_norm": 0.05246642231941223, "learning_rate": 0.00013035240109815924, "loss": 0.2621, "step": 20413 }, { "epoch": 1.6537589112119249, "grad_norm": 0.0493154302239418, "learning_rate": 0.00013034790044556463, "loss": 0.2561, "step": 20414 }, { "epoch": 1.653839922229423, "grad_norm": 0.061303410679101944, "learning_rate": 0.00013034339979297, "loss": 0.2716, "step": 20415 }, { "epoch": 1.6539209332469216, "grad_norm": 0.05280701443552971, "learning_rate": 0.00013033889914037535, "loss": 0.2537, "step": 20416 }, { "epoch": 1.65400194426442, "grad_norm": 0.059893831610679626, "learning_rate": 0.00013033439848778074, "loss": 0.2837, "step": 20417 }, { "epoch": 1.6540829552819183, "grad_norm": 0.07370199263095856, "learning_rate": 0.0001303298978351861, "loss": 0.2741, "step": 20418 }, { "epoch": 1.6541639662994168, "grad_norm": 0.06630618870258331, "learning_rate": 0.0001303253971825915, "loss": 0.286, "step": 20419 }, { "epoch": 1.6542449773169152, "grad_norm": 0.053009796887636185, "learning_rate": 0.00013032089652999687, "loss": 0.256, "step": 20420 }, { "epoch": 1.6543259883344135, "grad_norm": 0.05130352824926376, "learning_rate": 0.00013031639587740223, "loss": 0.3012, "step": 20421 }, { "epoch": 1.6544069993519117, "grad_norm": 0.04923132061958313, "learning_rate": 0.0001303118952248076, "loss": 0.2647, "step": 20422 }, { "epoch": 1.6544880103694104, "grad_norm": 0.062321193516254425, "learning_rate": 0.00013030739457221298, "loss": 0.3192, "step": 20423 }, { "epoch": 1.6545690213869086, "grad_norm": 0.05298849195241928, "learning_rate": 0.00013030289391961834, "loss": 0.3241, "step": 20424 }, { "epoch": 1.654650032404407, "grad_norm": 0.057993028312921524, "learning_rate": 0.00013029839326702373, "loss": 0.3059, "step": 20425 }, { "epoch": 1.6547310434219054, "grad_norm": 0.05777161940932274, "learning_rate": 0.00013029389261442912, "loss": 0.308, "step": 20426 }, { "epoch": 1.6548120544394038, "grad_norm": 0.049483541399240494, "learning_rate": 0.00013028939196183448, "loss": 0.2899, "step": 20427 }, { "epoch": 1.654893065456902, "grad_norm": 0.04754691198468208, "learning_rate": 0.00013028489130923984, "loss": 0.2583, "step": 20428 }, { "epoch": 1.6549740764744005, "grad_norm": 0.0468088760972023, "learning_rate": 0.00013028039065664522, "loss": 0.2979, "step": 20429 }, { "epoch": 1.655055087491899, "grad_norm": 0.05034726858139038, "learning_rate": 0.00013027589000405058, "loss": 0.2969, "step": 20430 }, { "epoch": 1.6551360985093972, "grad_norm": 0.05580078065395355, "learning_rate": 0.00013027138935145597, "loss": 0.2764, "step": 20431 }, { "epoch": 1.6552171095268955, "grad_norm": 0.0637517049908638, "learning_rate": 0.00013026688869886136, "loss": 0.3075, "step": 20432 }, { "epoch": 1.6552981205443942, "grad_norm": 0.03854849934577942, "learning_rate": 0.00013026238804626672, "loss": 0.2383, "step": 20433 }, { "epoch": 1.6553791315618924, "grad_norm": 0.04968152195215225, "learning_rate": 0.00013025788739367208, "loss": 0.2882, "step": 20434 }, { "epoch": 1.6554601425793907, "grad_norm": 0.0447208434343338, "learning_rate": 0.00013025338674107746, "loss": 0.3073, "step": 20435 }, { "epoch": 1.6555411535968891, "grad_norm": 0.07282092422246933, "learning_rate": 0.00013024888608848285, "loss": 0.3316, "step": 20436 }, { "epoch": 1.6556221646143876, "grad_norm": 0.06616365164518356, "learning_rate": 0.0001302443854358882, "loss": 0.3561, "step": 20437 }, { "epoch": 1.6557031756318858, "grad_norm": 0.04712116718292236, "learning_rate": 0.0001302398847832936, "loss": 0.2482, "step": 20438 }, { "epoch": 1.6557841866493843, "grad_norm": 0.05836176499724388, "learning_rate": 0.00013023538413069896, "loss": 0.3062, "step": 20439 }, { "epoch": 1.6558651976668828, "grad_norm": 0.05766526237130165, "learning_rate": 0.00013023088347810432, "loss": 0.2625, "step": 20440 }, { "epoch": 1.655946208684381, "grad_norm": 0.04732430353760719, "learning_rate": 0.0001302263828255097, "loss": 0.3104, "step": 20441 }, { "epoch": 1.6560272197018795, "grad_norm": 0.05533413216471672, "learning_rate": 0.0001302218821729151, "loss": 0.332, "step": 20442 }, { "epoch": 1.656108230719378, "grad_norm": 0.04660717025399208, "learning_rate": 0.00013021738152032045, "loss": 0.2553, "step": 20443 }, { "epoch": 1.6561892417368762, "grad_norm": 0.04941389709711075, "learning_rate": 0.00013021288086772584, "loss": 0.2431, "step": 20444 }, { "epoch": 1.6562702527543745, "grad_norm": 0.052802614867687225, "learning_rate": 0.0001302083802151312, "loss": 0.2749, "step": 20445 }, { "epoch": 1.6563512637718731, "grad_norm": 0.059079330414533615, "learning_rate": 0.00013020387956253656, "loss": 0.3094, "step": 20446 }, { "epoch": 1.6564322747893714, "grad_norm": 0.04814727231860161, "learning_rate": 0.00013019937890994195, "loss": 0.2567, "step": 20447 }, { "epoch": 1.6565132858068696, "grad_norm": 0.049222610890865326, "learning_rate": 0.00013019487825734733, "loss": 0.2786, "step": 20448 }, { "epoch": 1.656594296824368, "grad_norm": 0.0641581192612648, "learning_rate": 0.0001301903776047527, "loss": 0.2967, "step": 20449 }, { "epoch": 1.6566753078418666, "grad_norm": 0.0636487528681755, "learning_rate": 0.00013018587695215808, "loss": 0.3429, "step": 20450 }, { "epoch": 1.6567563188593648, "grad_norm": 0.06307125836610794, "learning_rate": 0.00013018137629956344, "loss": 0.306, "step": 20451 }, { "epoch": 1.6568373298768633, "grad_norm": 0.054315872490406036, "learning_rate": 0.0001301768756469688, "loss": 0.283, "step": 20452 }, { "epoch": 1.6569183408943617, "grad_norm": 0.05288204923272133, "learning_rate": 0.0001301723749943742, "loss": 0.2725, "step": 20453 }, { "epoch": 1.65699935191186, "grad_norm": 0.05411860719323158, "learning_rate": 0.00013016787434177958, "loss": 0.2966, "step": 20454 }, { "epoch": 1.6570803629293582, "grad_norm": 0.05066904053092003, "learning_rate": 0.00013016337368918494, "loss": 0.2858, "step": 20455 }, { "epoch": 1.657161373946857, "grad_norm": 0.06001361459493637, "learning_rate": 0.00013015887303659032, "loss": 0.303, "step": 20456 }, { "epoch": 1.6572423849643552, "grad_norm": 0.05340325087308884, "learning_rate": 0.00013015437238399568, "loss": 0.2669, "step": 20457 }, { "epoch": 1.6573233959818534, "grad_norm": 0.050688330084085464, "learning_rate": 0.00013014987173140104, "loss": 0.2609, "step": 20458 }, { "epoch": 1.6574044069993519, "grad_norm": 0.05546950921416283, "learning_rate": 0.00013014537107880643, "loss": 0.3079, "step": 20459 }, { "epoch": 1.6574854180168503, "grad_norm": 0.05770743638277054, "learning_rate": 0.00013014087042621182, "loss": 0.2666, "step": 20460 }, { "epoch": 1.6575664290343486, "grad_norm": 0.05666600912809372, "learning_rate": 0.00013013636977361718, "loss": 0.2775, "step": 20461 }, { "epoch": 1.657647440051847, "grad_norm": 0.05800339952111244, "learning_rate": 0.00013013186912102257, "loss": 0.2936, "step": 20462 }, { "epoch": 1.6577284510693455, "grad_norm": 0.04769282788038254, "learning_rate": 0.00013012736846842793, "loss": 0.2965, "step": 20463 }, { "epoch": 1.6578094620868438, "grad_norm": 0.05040494725108147, "learning_rate": 0.00013012286781583329, "loss": 0.2545, "step": 20464 }, { "epoch": 1.6578904731043422, "grad_norm": 0.058276791125535965, "learning_rate": 0.00013011836716323867, "loss": 0.3043, "step": 20465 }, { "epoch": 1.6579714841218407, "grad_norm": 0.05103198066353798, "learning_rate": 0.00013011386651064406, "loss": 0.293, "step": 20466 }, { "epoch": 1.658052495139339, "grad_norm": 0.050009891390800476, "learning_rate": 0.00013010936585804942, "loss": 0.2961, "step": 20467 }, { "epoch": 1.6581335061568372, "grad_norm": 0.04965313896536827, "learning_rate": 0.0001301048652054548, "loss": 0.2734, "step": 20468 }, { "epoch": 1.6582145171743357, "grad_norm": 0.0604669414460659, "learning_rate": 0.00013010036455286017, "loss": 0.2839, "step": 20469 }, { "epoch": 1.6582955281918341, "grad_norm": 0.052262432873249054, "learning_rate": 0.00013009586390026553, "loss": 0.2666, "step": 20470 }, { "epoch": 1.6583765392093324, "grad_norm": 0.06164982542395592, "learning_rate": 0.00013009136324767094, "loss": 0.27, "step": 20471 }, { "epoch": 1.6584575502268308, "grad_norm": 0.049257468432188034, "learning_rate": 0.0001300868625950763, "loss": 0.2946, "step": 20472 }, { "epoch": 1.6585385612443293, "grad_norm": 0.04953254014253616, "learning_rate": 0.00013008236194248166, "loss": 0.2726, "step": 20473 }, { "epoch": 1.6586195722618275, "grad_norm": 0.047944556921720505, "learning_rate": 0.00013007786128988705, "loss": 0.2686, "step": 20474 }, { "epoch": 1.658700583279326, "grad_norm": 0.0578407347202301, "learning_rate": 0.0001300733606372924, "loss": 0.308, "step": 20475 }, { "epoch": 1.6587815942968245, "grad_norm": 0.048618391156196594, "learning_rate": 0.00013006885998469777, "loss": 0.2911, "step": 20476 }, { "epoch": 1.6588626053143227, "grad_norm": 0.05097212642431259, "learning_rate": 0.00013006435933210318, "loss": 0.2919, "step": 20477 }, { "epoch": 1.658943616331821, "grad_norm": 0.05795508995652199, "learning_rate": 0.00013005985867950854, "loss": 0.2925, "step": 20478 }, { "epoch": 1.6590246273493197, "grad_norm": 0.04863395914435387, "learning_rate": 0.0001300553580269139, "loss": 0.2612, "step": 20479 }, { "epoch": 1.659105638366818, "grad_norm": 0.0567551851272583, "learning_rate": 0.0001300508573743193, "loss": 0.2638, "step": 20480 }, { "epoch": 1.6591866493843161, "grad_norm": 0.05892769247293472, "learning_rate": 0.00013004635672172465, "loss": 0.3301, "step": 20481 }, { "epoch": 1.6592676604018146, "grad_norm": 0.049185313284397125, "learning_rate": 0.00013004185606913, "loss": 0.246, "step": 20482 }, { "epoch": 1.659348671419313, "grad_norm": 0.055416181683540344, "learning_rate": 0.00013003735541653542, "loss": 0.3175, "step": 20483 }, { "epoch": 1.6594296824368113, "grad_norm": 0.06165776774287224, "learning_rate": 0.00013003285476394078, "loss": 0.2874, "step": 20484 }, { "epoch": 1.6595106934543098, "grad_norm": 0.04640460014343262, "learning_rate": 0.00013002835411134614, "loss": 0.257, "step": 20485 }, { "epoch": 1.6595917044718083, "grad_norm": 0.04604089632630348, "learning_rate": 0.00013002385345875153, "loss": 0.2438, "step": 20486 }, { "epoch": 1.6596727154893065, "grad_norm": 0.04631912335753441, "learning_rate": 0.0001300193528061569, "loss": 0.272, "step": 20487 }, { "epoch": 1.659753726506805, "grad_norm": 0.04735549911856651, "learning_rate": 0.00013001485215356228, "loss": 0.2845, "step": 20488 }, { "epoch": 1.6598347375243034, "grad_norm": 0.05785086750984192, "learning_rate": 0.00013001035150096767, "loss": 0.3138, "step": 20489 }, { "epoch": 1.6599157485418017, "grad_norm": 0.046076275408267975, "learning_rate": 0.00013000585084837303, "loss": 0.2419, "step": 20490 }, { "epoch": 1.6599967595593, "grad_norm": 0.04774550348520279, "learning_rate": 0.00013000135019577839, "loss": 0.2504, "step": 20491 }, { "epoch": 1.6600777705767984, "grad_norm": 0.04708302021026611, "learning_rate": 0.00012999684954318377, "loss": 0.2562, "step": 20492 }, { "epoch": 1.6601587815942969, "grad_norm": 0.048026781529188156, "learning_rate": 0.00012999234889058913, "loss": 0.2327, "step": 20493 }, { "epoch": 1.660239792611795, "grad_norm": 0.05444342643022537, "learning_rate": 0.00012998784823799452, "loss": 0.2696, "step": 20494 }, { "epoch": 1.6603208036292936, "grad_norm": 0.05067823454737663, "learning_rate": 0.0001299833475853999, "loss": 0.2818, "step": 20495 }, { "epoch": 1.660401814646792, "grad_norm": 0.051078833639621735, "learning_rate": 0.00012997884693280527, "loss": 0.2722, "step": 20496 }, { "epoch": 1.6604828256642903, "grad_norm": 0.06498068571090698, "learning_rate": 0.00012997434628021063, "loss": 0.2903, "step": 20497 }, { "epoch": 1.6605638366817888, "grad_norm": 0.05589541792869568, "learning_rate": 0.00012996984562761601, "loss": 0.2993, "step": 20498 }, { "epoch": 1.6606448476992872, "grad_norm": 0.06637276709079742, "learning_rate": 0.00012996534497502137, "loss": 0.2834, "step": 20499 }, { "epoch": 1.6607258587167855, "grad_norm": 0.060838427394628525, "learning_rate": 0.00012996084432242676, "loss": 0.2652, "step": 20500 }, { "epoch": 1.6608068697342837, "grad_norm": 0.0554727278649807, "learning_rate": 0.00012995634366983215, "loss": 0.2752, "step": 20501 }, { "epoch": 1.6608878807517824, "grad_norm": 0.06209820508956909, "learning_rate": 0.0001299518430172375, "loss": 0.302, "step": 20502 }, { "epoch": 1.6609688917692806, "grad_norm": 0.05828804150223732, "learning_rate": 0.00012994734236464287, "loss": 0.2439, "step": 20503 }, { "epoch": 1.6610499027867789, "grad_norm": 0.05182472616434097, "learning_rate": 0.00012994284171204826, "loss": 0.2688, "step": 20504 }, { "epoch": 1.6611309138042774, "grad_norm": 0.0592801459133625, "learning_rate": 0.00012993834105945362, "loss": 0.2649, "step": 20505 }, { "epoch": 1.6612119248217758, "grad_norm": 0.047676533460617065, "learning_rate": 0.000129933840406859, "loss": 0.2698, "step": 20506 }, { "epoch": 1.661292935839274, "grad_norm": 0.058383118361234665, "learning_rate": 0.0001299293397542644, "loss": 0.3119, "step": 20507 }, { "epoch": 1.6613739468567725, "grad_norm": 0.05639087036252022, "learning_rate": 0.00012992483910166975, "loss": 0.2971, "step": 20508 }, { "epoch": 1.661454957874271, "grad_norm": 0.05297534912824631, "learning_rate": 0.0001299203384490751, "loss": 0.3108, "step": 20509 }, { "epoch": 1.6615359688917692, "grad_norm": 0.0611218698322773, "learning_rate": 0.0001299158377964805, "loss": 0.3098, "step": 20510 }, { "epoch": 1.6616169799092677, "grad_norm": 0.04799456521868706, "learning_rate": 0.00012991133714388589, "loss": 0.2553, "step": 20511 }, { "epoch": 1.6616979909267662, "grad_norm": 0.0584605410695076, "learning_rate": 0.00012990683649129125, "loss": 0.2777, "step": 20512 }, { "epoch": 1.6617790019442644, "grad_norm": 0.057419463992118835, "learning_rate": 0.00012990233583869663, "loss": 0.2732, "step": 20513 }, { "epoch": 1.6618600129617627, "grad_norm": 0.05271988362073898, "learning_rate": 0.000129897835186102, "loss": 0.2453, "step": 20514 }, { "epoch": 1.6619410239792611, "grad_norm": 0.04664996638894081, "learning_rate": 0.00012989333453350735, "loss": 0.2789, "step": 20515 }, { "epoch": 1.6620220349967596, "grad_norm": 0.05341833829879761, "learning_rate": 0.00012988883388091274, "loss": 0.3222, "step": 20516 }, { "epoch": 1.6621030460142578, "grad_norm": 0.06119184568524361, "learning_rate": 0.00012988433322831813, "loss": 0.3275, "step": 20517 }, { "epoch": 1.6621840570317563, "grad_norm": 0.05141368880867958, "learning_rate": 0.0001298798325757235, "loss": 0.2669, "step": 20518 }, { "epoch": 1.6622650680492548, "grad_norm": 0.05545364320278168, "learning_rate": 0.00012987533192312887, "loss": 0.299, "step": 20519 }, { "epoch": 1.662346079066753, "grad_norm": 0.060117967426776886, "learning_rate": 0.00012987083127053423, "loss": 0.2914, "step": 20520 }, { "epoch": 1.6624270900842515, "grad_norm": 0.04995759576559067, "learning_rate": 0.0001298663306179396, "loss": 0.2584, "step": 20521 }, { "epoch": 1.66250810110175, "grad_norm": 0.061563752591609955, "learning_rate": 0.00012986182996534498, "loss": 0.2928, "step": 20522 }, { "epoch": 1.6625891121192482, "grad_norm": 0.05292750149965286, "learning_rate": 0.00012985732931275037, "loss": 0.277, "step": 20523 }, { "epoch": 1.6626701231367464, "grad_norm": 0.0493243932723999, "learning_rate": 0.00012985282866015573, "loss": 0.2379, "step": 20524 }, { "epoch": 1.6627511341542451, "grad_norm": 0.05584977939724922, "learning_rate": 0.00012984832800756112, "loss": 0.3169, "step": 20525 }, { "epoch": 1.6628321451717434, "grad_norm": 0.05710172653198242, "learning_rate": 0.00012984382735496648, "loss": 0.3137, "step": 20526 }, { "epoch": 1.6629131561892416, "grad_norm": 0.05750979483127594, "learning_rate": 0.00012983932670237184, "loss": 0.2796, "step": 20527 }, { "epoch": 1.66299416720674, "grad_norm": 0.04641050845384598, "learning_rate": 0.00012983482604977722, "loss": 0.2926, "step": 20528 }, { "epoch": 1.6630751782242386, "grad_norm": 0.046793535351753235, "learning_rate": 0.0001298303253971826, "loss": 0.3039, "step": 20529 }, { "epoch": 1.6631561892417368, "grad_norm": 0.06085945665836334, "learning_rate": 0.00012982582474458797, "loss": 0.2852, "step": 20530 }, { "epoch": 1.6632372002592353, "grad_norm": 0.05577259510755539, "learning_rate": 0.00012982132409199336, "loss": 0.2757, "step": 20531 }, { "epoch": 1.6633182112767337, "grad_norm": 0.06003398075699806, "learning_rate": 0.00012981682343939872, "loss": 0.3303, "step": 20532 }, { "epoch": 1.663399222294232, "grad_norm": 0.04969777911901474, "learning_rate": 0.00012981232278680408, "loss": 0.2768, "step": 20533 }, { "epoch": 1.6634802333117304, "grad_norm": 0.04817051440477371, "learning_rate": 0.00012980782213420946, "loss": 0.285, "step": 20534 }, { "epoch": 1.663561244329229, "grad_norm": 0.05781611427664757, "learning_rate": 0.00012980332148161485, "loss": 0.3096, "step": 20535 }, { "epoch": 1.6636422553467272, "grad_norm": 0.05585617944598198, "learning_rate": 0.0001297988208290202, "loss": 0.2738, "step": 20536 }, { "epoch": 1.6637232663642254, "grad_norm": 0.06109308823943138, "learning_rate": 0.0001297943201764256, "loss": 0.3138, "step": 20537 }, { "epoch": 1.6638042773817239, "grad_norm": 0.06365103274583817, "learning_rate": 0.00012978981952383096, "loss": 0.2642, "step": 20538 }, { "epoch": 1.6638852883992223, "grad_norm": 0.05765408277511597, "learning_rate": 0.00012978531887123632, "loss": 0.2954, "step": 20539 }, { "epoch": 1.6639662994167206, "grad_norm": 0.05007627233862877, "learning_rate": 0.00012978081821864173, "loss": 0.2646, "step": 20540 }, { "epoch": 1.664047310434219, "grad_norm": 0.05124497786164284, "learning_rate": 0.0001297763175660471, "loss": 0.2546, "step": 20541 }, { "epoch": 1.6641283214517175, "grad_norm": 0.05661545321345329, "learning_rate": 0.00012977181691345245, "loss": 0.2833, "step": 20542 }, { "epoch": 1.6642093324692158, "grad_norm": 0.05698556825518608, "learning_rate": 0.00012976731626085784, "loss": 0.3098, "step": 20543 }, { "epoch": 1.6642903434867142, "grad_norm": 0.052347540855407715, "learning_rate": 0.0001297628156082632, "loss": 0.2823, "step": 20544 }, { "epoch": 1.6643713545042127, "grad_norm": 0.05601882189512253, "learning_rate": 0.00012975831495566856, "loss": 0.2739, "step": 20545 }, { "epoch": 1.664452365521711, "grad_norm": 0.04795810952782631, "learning_rate": 0.00012975381430307397, "loss": 0.3018, "step": 20546 }, { "epoch": 1.6645333765392092, "grad_norm": 0.053273119032382965, "learning_rate": 0.00012974931365047933, "loss": 0.2787, "step": 20547 }, { "epoch": 1.6646143875567079, "grad_norm": 0.058366890996694565, "learning_rate": 0.0001297448129978847, "loss": 0.2966, "step": 20548 }, { "epoch": 1.6646953985742061, "grad_norm": 0.05622422322630882, "learning_rate": 0.00012974031234529008, "loss": 0.2686, "step": 20549 }, { "epoch": 1.6647764095917044, "grad_norm": 0.07095862179994583, "learning_rate": 0.00012973581169269544, "loss": 0.3119, "step": 20550 }, { "epoch": 1.6648574206092028, "grad_norm": 0.056710973381996155, "learning_rate": 0.0001297313110401008, "loss": 0.2593, "step": 20551 }, { "epoch": 1.6649384316267013, "grad_norm": 0.05565338581800461, "learning_rate": 0.00012972681038750622, "loss": 0.2868, "step": 20552 }, { "epoch": 1.6650194426441995, "grad_norm": 0.05398929491639137, "learning_rate": 0.00012972230973491158, "loss": 0.3339, "step": 20553 }, { "epoch": 1.665100453661698, "grad_norm": 0.05687215179204941, "learning_rate": 0.00012971780908231694, "loss": 0.2824, "step": 20554 }, { "epoch": 1.6651814646791965, "grad_norm": 0.06144799664616585, "learning_rate": 0.00012971330842972232, "loss": 0.2891, "step": 20555 }, { "epoch": 1.6652624756966947, "grad_norm": 0.05557479336857796, "learning_rate": 0.00012970880777712768, "loss": 0.2883, "step": 20556 }, { "epoch": 1.665343486714193, "grad_norm": 0.050593696534633636, "learning_rate": 0.00012970430712453304, "loss": 0.2786, "step": 20557 }, { "epoch": 1.6654244977316917, "grad_norm": 0.049495626240968704, "learning_rate": 0.00012969980647193846, "loss": 0.28, "step": 20558 }, { "epoch": 1.66550550874919, "grad_norm": 0.05402037128806114, "learning_rate": 0.00012969530581934382, "loss": 0.3113, "step": 20559 }, { "epoch": 1.6655865197666881, "grad_norm": 0.06765372306108475, "learning_rate": 0.00012969080516674918, "loss": 0.332, "step": 20560 }, { "epoch": 1.6656675307841866, "grad_norm": 0.056443896144628525, "learning_rate": 0.00012968630451415457, "loss": 0.2614, "step": 20561 }, { "epoch": 1.665748541801685, "grad_norm": 0.058400485664606094, "learning_rate": 0.00012968180386155993, "loss": 0.2766, "step": 20562 }, { "epoch": 1.6658295528191833, "grad_norm": 0.05166015774011612, "learning_rate": 0.0001296773032089653, "loss": 0.2637, "step": 20563 }, { "epoch": 1.6659105638366818, "grad_norm": 0.0569235160946846, "learning_rate": 0.0001296728025563707, "loss": 0.3026, "step": 20564 }, { "epoch": 1.6659915748541803, "grad_norm": 0.06572642177343369, "learning_rate": 0.00012966830190377606, "loss": 0.297, "step": 20565 }, { "epoch": 1.6660725858716785, "grad_norm": 0.04707733914256096, "learning_rate": 0.00012966380125118142, "loss": 0.2663, "step": 20566 }, { "epoch": 1.666153596889177, "grad_norm": 0.06494346261024475, "learning_rate": 0.0001296593005985868, "loss": 0.3112, "step": 20567 }, { "epoch": 1.6662346079066754, "grad_norm": 0.05284969136118889, "learning_rate": 0.00012965479994599217, "loss": 0.2605, "step": 20568 }, { "epoch": 1.6663156189241737, "grad_norm": 0.06947685033082962, "learning_rate": 0.00012965029929339755, "loss": 0.2734, "step": 20569 }, { "epoch": 1.666396629941672, "grad_norm": 0.056318532675504684, "learning_rate": 0.00012964579864080294, "loss": 0.2526, "step": 20570 }, { "epoch": 1.6664776409591704, "grad_norm": 0.05064530298113823, "learning_rate": 0.0001296412979882083, "loss": 0.289, "step": 20571 }, { "epoch": 1.6665586519766689, "grad_norm": 0.04759865626692772, "learning_rate": 0.00012963679733561366, "loss": 0.2512, "step": 20572 }, { "epoch": 1.666639662994167, "grad_norm": 0.04251798987388611, "learning_rate": 0.00012963229668301905, "loss": 0.2449, "step": 20573 }, { "epoch": 1.6667206740116656, "grad_norm": 0.05017111077904701, "learning_rate": 0.0001296277960304244, "loss": 0.2238, "step": 20574 }, { "epoch": 1.666801685029164, "grad_norm": 0.0470963716506958, "learning_rate": 0.0001296232953778298, "loss": 0.2688, "step": 20575 }, { "epoch": 1.6668826960466623, "grad_norm": 0.057013124227523804, "learning_rate": 0.00012961879472523518, "loss": 0.2855, "step": 20576 }, { "epoch": 1.6669637070641607, "grad_norm": 0.0556722991168499, "learning_rate": 0.00012961429407264054, "loss": 0.2772, "step": 20577 }, { "epoch": 1.6670447180816592, "grad_norm": 0.04770893603563309, "learning_rate": 0.0001296097934200459, "loss": 0.2668, "step": 20578 }, { "epoch": 1.6671257290991575, "grad_norm": 0.05699243023991585, "learning_rate": 0.0001296052927674513, "loss": 0.3188, "step": 20579 }, { "epoch": 1.6672067401166557, "grad_norm": 0.055579714477062225, "learning_rate": 0.00012960079211485665, "loss": 0.3047, "step": 20580 }, { "epoch": 1.6672877511341544, "grad_norm": 0.05369487777352333, "learning_rate": 0.00012959629146226204, "loss": 0.2819, "step": 20581 }, { "epoch": 1.6673687621516526, "grad_norm": 0.049619536846876144, "learning_rate": 0.00012959179080966742, "loss": 0.2517, "step": 20582 }, { "epoch": 1.6674497731691509, "grad_norm": 0.04203467816114426, "learning_rate": 0.00012958729015707278, "loss": 0.2601, "step": 20583 }, { "epoch": 1.6675307841866494, "grad_norm": 0.05514657124876976, "learning_rate": 0.00012958278950447814, "loss": 0.2758, "step": 20584 }, { "epoch": 1.6676117952041478, "grad_norm": 0.052887558937072754, "learning_rate": 0.00012957828885188353, "loss": 0.2788, "step": 20585 }, { "epoch": 1.667692806221646, "grad_norm": 0.045983172953128815, "learning_rate": 0.0001295737881992889, "loss": 0.2397, "step": 20586 }, { "epoch": 1.6677738172391445, "grad_norm": 0.062085479497909546, "learning_rate": 0.00012956928754669428, "loss": 0.324, "step": 20587 }, { "epoch": 1.667854828256643, "grad_norm": 0.049464598298072815, "learning_rate": 0.00012956478689409967, "loss": 0.2674, "step": 20588 }, { "epoch": 1.6679358392741412, "grad_norm": 0.053387902677059174, "learning_rate": 0.00012956028624150503, "loss": 0.3101, "step": 20589 }, { "epoch": 1.6680168502916397, "grad_norm": 0.06461282074451447, "learning_rate": 0.00012955578558891039, "loss": 0.3134, "step": 20590 }, { "epoch": 1.6680978613091382, "grad_norm": 0.05711589381098747, "learning_rate": 0.00012955128493631577, "loss": 0.3026, "step": 20591 }, { "epoch": 1.6681788723266364, "grad_norm": 0.0830380916595459, "learning_rate": 0.00012954678428372116, "loss": 0.2814, "step": 20592 }, { "epoch": 1.6682598833441347, "grad_norm": 0.062311798334121704, "learning_rate": 0.00012954228363112652, "loss": 0.3021, "step": 20593 }, { "epoch": 1.6683408943616331, "grad_norm": 0.04709800332784653, "learning_rate": 0.0001295377829785319, "loss": 0.2514, "step": 20594 }, { "epoch": 1.6684219053791316, "grad_norm": 0.05270976200699806, "learning_rate": 0.00012953328232593727, "loss": 0.2879, "step": 20595 }, { "epoch": 1.6685029163966298, "grad_norm": 0.0634923130273819, "learning_rate": 0.00012952878167334263, "loss": 0.2683, "step": 20596 }, { "epoch": 1.6685839274141283, "grad_norm": 0.055968694388866425, "learning_rate": 0.00012952428102074802, "loss": 0.2837, "step": 20597 }, { "epoch": 1.6686649384316268, "grad_norm": 0.04898613691329956, "learning_rate": 0.0001295197803681534, "loss": 0.2484, "step": 20598 }, { "epoch": 1.668745949449125, "grad_norm": 0.05628184974193573, "learning_rate": 0.00012951527971555876, "loss": 0.2705, "step": 20599 }, { "epoch": 1.6688269604666235, "grad_norm": 0.05504019185900688, "learning_rate": 0.00012951077906296415, "loss": 0.3015, "step": 20600 }, { "epoch": 1.668907971484122, "grad_norm": 0.05534826219081879, "learning_rate": 0.0001295062784103695, "loss": 0.2768, "step": 20601 }, { "epoch": 1.6689889825016202, "grad_norm": 0.06548382341861725, "learning_rate": 0.00012950177775777487, "loss": 0.2393, "step": 20602 }, { "epoch": 1.6690699935191184, "grad_norm": 0.0632016733288765, "learning_rate": 0.00012949727710518026, "loss": 0.2924, "step": 20603 }, { "epoch": 1.6691510045366171, "grad_norm": 0.06453005969524384, "learning_rate": 0.00012949277645258564, "loss": 0.3037, "step": 20604 }, { "epoch": 1.6692320155541154, "grad_norm": 0.06613216549158096, "learning_rate": 0.000129488275799991, "loss": 0.3031, "step": 20605 }, { "epoch": 1.6693130265716136, "grad_norm": 0.050734955817461014, "learning_rate": 0.0001294837751473964, "loss": 0.3052, "step": 20606 }, { "epoch": 1.669394037589112, "grad_norm": 0.05854317173361778, "learning_rate": 0.00012947927449480175, "loss": 0.3104, "step": 20607 }, { "epoch": 1.6694750486066106, "grad_norm": 0.060037367045879364, "learning_rate": 0.0001294747738422071, "loss": 0.2905, "step": 20608 }, { "epoch": 1.6695560596241088, "grad_norm": 0.056980401277542114, "learning_rate": 0.0001294702731896125, "loss": 0.2875, "step": 20609 }, { "epoch": 1.6696370706416073, "grad_norm": 0.05283212289214134, "learning_rate": 0.00012946577253701789, "loss": 0.2639, "step": 20610 }, { "epoch": 1.6697180816591057, "grad_norm": 0.06313985586166382, "learning_rate": 0.00012946127188442325, "loss": 0.2819, "step": 20611 }, { "epoch": 1.669799092676604, "grad_norm": 0.05628564953804016, "learning_rate": 0.00012945677123182863, "loss": 0.3137, "step": 20612 }, { "epoch": 1.6698801036941024, "grad_norm": 0.06897181272506714, "learning_rate": 0.000129452270579234, "loss": 0.2928, "step": 20613 }, { "epoch": 1.669961114711601, "grad_norm": 0.05419771000742912, "learning_rate": 0.00012944776992663935, "loss": 0.2446, "step": 20614 }, { "epoch": 1.6700421257290992, "grad_norm": 0.05473244562745094, "learning_rate": 0.00012944326927404474, "loss": 0.2953, "step": 20615 }, { "epoch": 1.6701231367465974, "grad_norm": 0.05732650309801102, "learning_rate": 0.00012943876862145013, "loss": 0.3044, "step": 20616 }, { "epoch": 1.6702041477640959, "grad_norm": 0.052806396037340164, "learning_rate": 0.0001294342679688555, "loss": 0.2676, "step": 20617 }, { "epoch": 1.6702851587815943, "grad_norm": 0.05250782519578934, "learning_rate": 0.00012942976731626087, "loss": 0.2572, "step": 20618 }, { "epoch": 1.6703661697990926, "grad_norm": 0.05007362365722656, "learning_rate": 0.00012942526666366623, "loss": 0.2733, "step": 20619 }, { "epoch": 1.670447180816591, "grad_norm": 0.058031823486089706, "learning_rate": 0.0001294207660110716, "loss": 0.2791, "step": 20620 }, { "epoch": 1.6705281918340895, "grad_norm": 0.04678702726960182, "learning_rate": 0.000129416265358477, "loss": 0.2754, "step": 20621 }, { "epoch": 1.6706092028515878, "grad_norm": 0.05354061350226402, "learning_rate": 0.00012941176470588237, "loss": 0.2984, "step": 20622 }, { "epoch": 1.6706902138690862, "grad_norm": 0.04879770427942276, "learning_rate": 0.00012940726405328773, "loss": 0.2545, "step": 20623 }, { "epoch": 1.6707712248865847, "grad_norm": 0.046921178698539734, "learning_rate": 0.00012940276340069312, "loss": 0.2406, "step": 20624 }, { "epoch": 1.670852235904083, "grad_norm": 0.046346645802259445, "learning_rate": 0.00012939826274809848, "loss": 0.2841, "step": 20625 }, { "epoch": 1.6709332469215812, "grad_norm": 0.054475318640470505, "learning_rate": 0.00012939376209550384, "loss": 0.3229, "step": 20626 }, { "epoch": 1.6710142579390799, "grad_norm": 0.05155748873949051, "learning_rate": 0.00012938926144290925, "loss": 0.2663, "step": 20627 }, { "epoch": 1.6710952689565781, "grad_norm": 0.05086018890142441, "learning_rate": 0.0001293847607903146, "loss": 0.2469, "step": 20628 }, { "epoch": 1.6711762799740764, "grad_norm": 0.05731143429875374, "learning_rate": 0.00012938026013771997, "loss": 0.2846, "step": 20629 }, { "epoch": 1.6712572909915748, "grad_norm": 0.06155941262841225, "learning_rate": 0.00012937575948512536, "loss": 0.2922, "step": 20630 }, { "epoch": 1.6713383020090733, "grad_norm": 0.043573517352342606, "learning_rate": 0.00012937125883253072, "loss": 0.2457, "step": 20631 }, { "epoch": 1.6714193130265715, "grad_norm": 0.04966678470373154, "learning_rate": 0.00012936675817993608, "loss": 0.2833, "step": 20632 }, { "epoch": 1.67150032404407, "grad_norm": 0.05184457451105118, "learning_rate": 0.0001293622575273415, "loss": 0.2837, "step": 20633 }, { "epoch": 1.6715813350615685, "grad_norm": 0.052014995366334915, "learning_rate": 0.00012935775687474685, "loss": 0.2912, "step": 20634 }, { "epoch": 1.6716623460790667, "grad_norm": 0.053042974323034286, "learning_rate": 0.0001293532562221522, "loss": 0.2888, "step": 20635 }, { "epoch": 1.6717433570965652, "grad_norm": 0.0523151159286499, "learning_rate": 0.0001293487555695576, "loss": 0.2608, "step": 20636 }, { "epoch": 1.6718243681140637, "grad_norm": 0.05375725403428078, "learning_rate": 0.00012934425491696296, "loss": 0.2976, "step": 20637 }, { "epoch": 1.671905379131562, "grad_norm": 0.05377538502216339, "learning_rate": 0.00012933975426436832, "loss": 0.2689, "step": 20638 }, { "epoch": 1.6719863901490601, "grad_norm": 0.058378856629133224, "learning_rate": 0.00012933525361177373, "loss": 0.2767, "step": 20639 }, { "epoch": 1.6720674011665586, "grad_norm": 0.05989628657698631, "learning_rate": 0.0001293307529591791, "loss": 0.3311, "step": 20640 }, { "epoch": 1.672148412184057, "grad_norm": 0.05216878652572632, "learning_rate": 0.00012932625230658445, "loss": 0.2917, "step": 20641 }, { "epoch": 1.6722294232015553, "grad_norm": 0.05707364156842232, "learning_rate": 0.00012932175165398984, "loss": 0.3096, "step": 20642 }, { "epoch": 1.6723104342190538, "grad_norm": 0.04194723442196846, "learning_rate": 0.0001293172510013952, "loss": 0.2297, "step": 20643 }, { "epoch": 1.6723914452365523, "grad_norm": 0.04870683327317238, "learning_rate": 0.0001293127503488006, "loss": 0.2698, "step": 20644 }, { "epoch": 1.6724724562540505, "grad_norm": 0.05523337423801422, "learning_rate": 0.00012930824969620598, "loss": 0.3107, "step": 20645 }, { "epoch": 1.672553467271549, "grad_norm": 0.04324561730027199, "learning_rate": 0.00012930374904361134, "loss": 0.2683, "step": 20646 }, { "epoch": 1.6726344782890474, "grad_norm": 0.049787040799856186, "learning_rate": 0.0001292992483910167, "loss": 0.3126, "step": 20647 }, { "epoch": 1.6727154893065457, "grad_norm": 0.059789739549160004, "learning_rate": 0.00012929474773842208, "loss": 0.2588, "step": 20648 }, { "epoch": 1.672796500324044, "grad_norm": 0.05197549983859062, "learning_rate": 0.00012929024708582744, "loss": 0.2771, "step": 20649 }, { "epoch": 1.6728775113415426, "grad_norm": 0.05637570470571518, "learning_rate": 0.00012928574643323283, "loss": 0.2943, "step": 20650 }, { "epoch": 1.6729585223590409, "grad_norm": 0.06383640319108963, "learning_rate": 0.00012928124578063822, "loss": 0.278, "step": 20651 }, { "epoch": 1.673039533376539, "grad_norm": 0.05009127035737038, "learning_rate": 0.00012927674512804358, "loss": 0.2547, "step": 20652 }, { "epoch": 1.6731205443940376, "grad_norm": 0.05628490820527077, "learning_rate": 0.00012927224447544894, "loss": 0.2892, "step": 20653 }, { "epoch": 1.673201555411536, "grad_norm": 0.0458667129278183, "learning_rate": 0.00012926774382285432, "loss": 0.2209, "step": 20654 }, { "epoch": 1.6732825664290343, "grad_norm": 0.049527622759342194, "learning_rate": 0.00012926324317025968, "loss": 0.2369, "step": 20655 }, { "epoch": 1.6733635774465327, "grad_norm": 0.06553801894187927, "learning_rate": 0.00012925874251766507, "loss": 0.294, "step": 20656 }, { "epoch": 1.6734445884640312, "grad_norm": 0.06354054808616638, "learning_rate": 0.00012925424186507046, "loss": 0.294, "step": 20657 }, { "epoch": 1.6735255994815295, "grad_norm": 0.06330928206443787, "learning_rate": 0.00012924974121247582, "loss": 0.3038, "step": 20658 }, { "epoch": 1.6736066104990277, "grad_norm": 0.05782151222229004, "learning_rate": 0.00012924524055988118, "loss": 0.2555, "step": 20659 }, { "epoch": 1.6736876215165264, "grad_norm": 0.05652369186282158, "learning_rate": 0.00012924073990728657, "loss": 0.3064, "step": 20660 }, { "epoch": 1.6737686325340246, "grad_norm": 0.04747147858142853, "learning_rate": 0.00012923623925469193, "loss": 0.2983, "step": 20661 }, { "epoch": 1.6738496435515229, "grad_norm": 0.047677140682935715, "learning_rate": 0.0001292317386020973, "loss": 0.2811, "step": 20662 }, { "epoch": 1.6739306545690213, "grad_norm": 0.048979323357343674, "learning_rate": 0.0001292272379495027, "loss": 0.2795, "step": 20663 }, { "epoch": 1.6740116655865198, "grad_norm": 0.04414774477481842, "learning_rate": 0.00012922273729690806, "loss": 0.2478, "step": 20664 }, { "epoch": 1.674092676604018, "grad_norm": 0.060810793191194534, "learning_rate": 0.00012921823664431342, "loss": 0.2607, "step": 20665 }, { "epoch": 1.6741736876215165, "grad_norm": 0.0502205528318882, "learning_rate": 0.0001292137359917188, "loss": 0.2426, "step": 20666 }, { "epoch": 1.674254698639015, "grad_norm": 0.05981326103210449, "learning_rate": 0.00012920923533912417, "loss": 0.2912, "step": 20667 }, { "epoch": 1.6743357096565132, "grad_norm": 0.05012542009353638, "learning_rate": 0.00012920473468652955, "loss": 0.2721, "step": 20668 }, { "epoch": 1.6744167206740117, "grad_norm": 0.05499397963285446, "learning_rate": 0.00012920023403393494, "loss": 0.3, "step": 20669 }, { "epoch": 1.6744977316915102, "grad_norm": 0.057382624596357346, "learning_rate": 0.0001291957333813403, "loss": 0.278, "step": 20670 }, { "epoch": 1.6745787427090084, "grad_norm": 0.05538110062479973, "learning_rate": 0.00012919123272874566, "loss": 0.2779, "step": 20671 }, { "epoch": 1.6746597537265067, "grad_norm": 0.061685092747211456, "learning_rate": 0.00012918673207615105, "loss": 0.289, "step": 20672 }, { "epoch": 1.6747407647440054, "grad_norm": 0.07038842141628265, "learning_rate": 0.00012918223142355644, "loss": 0.3155, "step": 20673 }, { "epoch": 1.6748217757615036, "grad_norm": 0.060771241784095764, "learning_rate": 0.0001291777307709618, "loss": 0.3103, "step": 20674 }, { "epoch": 1.6749027867790018, "grad_norm": 0.052663542330265045, "learning_rate": 0.00012917323011836718, "loss": 0.2545, "step": 20675 }, { "epoch": 1.6749837977965003, "grad_norm": 0.0837271586060524, "learning_rate": 0.00012916872946577254, "loss": 0.3386, "step": 20676 }, { "epoch": 1.6750648088139988, "grad_norm": 0.045294780284166336, "learning_rate": 0.0001291642288131779, "loss": 0.2472, "step": 20677 }, { "epoch": 1.675145819831497, "grad_norm": 0.05026855319738388, "learning_rate": 0.0001291597281605833, "loss": 0.2859, "step": 20678 }, { "epoch": 1.6752268308489955, "grad_norm": 0.058996520936489105, "learning_rate": 0.00012915522750798868, "loss": 0.2834, "step": 20679 }, { "epoch": 1.675307841866494, "grad_norm": 0.05485003441572189, "learning_rate": 0.00012915072685539404, "loss": 0.2755, "step": 20680 }, { "epoch": 1.6753888528839922, "grad_norm": 0.05620567500591278, "learning_rate": 0.00012914622620279942, "loss": 0.2792, "step": 20681 }, { "epoch": 1.6754698639014904, "grad_norm": 0.051583871245384216, "learning_rate": 0.00012914172555020478, "loss": 0.2537, "step": 20682 }, { "epoch": 1.6755508749189891, "grad_norm": 0.050795476883649826, "learning_rate": 0.00012913722489761014, "loss": 0.308, "step": 20683 }, { "epoch": 1.6756318859364874, "grad_norm": 0.052895255386829376, "learning_rate": 0.00012913272424501553, "loss": 0.28, "step": 20684 }, { "epoch": 1.6757128969539856, "grad_norm": 0.051595091819763184, "learning_rate": 0.00012912822359242092, "loss": 0.3025, "step": 20685 }, { "epoch": 1.675793907971484, "grad_norm": 0.06392206996679306, "learning_rate": 0.00012912372293982628, "loss": 0.3176, "step": 20686 }, { "epoch": 1.6758749189889826, "grad_norm": 0.055683355778455734, "learning_rate": 0.00012911922228723167, "loss": 0.2902, "step": 20687 }, { "epoch": 1.6759559300064808, "grad_norm": 0.05265095829963684, "learning_rate": 0.00012911472163463703, "loss": 0.2988, "step": 20688 }, { "epoch": 1.6760369410239793, "grad_norm": 0.05111074075102806, "learning_rate": 0.0001291102209820424, "loss": 0.2888, "step": 20689 }, { "epoch": 1.6761179520414777, "grad_norm": 0.04726038873195648, "learning_rate": 0.00012910572032944777, "loss": 0.2649, "step": 20690 }, { "epoch": 1.676198963058976, "grad_norm": 0.05326800048351288, "learning_rate": 0.00012910121967685316, "loss": 0.2836, "step": 20691 }, { "epoch": 1.6762799740764744, "grad_norm": 0.061078645288944244, "learning_rate": 0.00012909671902425852, "loss": 0.2928, "step": 20692 }, { "epoch": 1.676360985093973, "grad_norm": 0.051358435302972794, "learning_rate": 0.0001290922183716639, "loss": 0.2802, "step": 20693 }, { "epoch": 1.6764419961114712, "grad_norm": 0.04708100110292435, "learning_rate": 0.00012908771771906927, "loss": 0.3042, "step": 20694 }, { "epoch": 1.6765230071289694, "grad_norm": 0.05056946352124214, "learning_rate": 0.00012908321706647463, "loss": 0.2921, "step": 20695 }, { "epoch": 1.6766040181464679, "grad_norm": 0.05706246197223663, "learning_rate": 0.00012907871641388004, "loss": 0.3176, "step": 20696 }, { "epoch": 1.6766850291639663, "grad_norm": 0.06805767118930817, "learning_rate": 0.0001290742157612854, "loss": 0.2965, "step": 20697 }, { "epoch": 1.6767660401814646, "grad_norm": 0.06454560905694962, "learning_rate": 0.00012906971510869076, "loss": 0.2863, "step": 20698 }, { "epoch": 1.676847051198963, "grad_norm": 0.052051447331905365, "learning_rate": 0.00012906521445609615, "loss": 0.297, "step": 20699 }, { "epoch": 1.6769280622164615, "grad_norm": 0.05402744561433792, "learning_rate": 0.0001290607138035015, "loss": 0.2792, "step": 20700 }, { "epoch": 1.6770090732339598, "grad_norm": 0.0522739440202713, "learning_rate": 0.00012905621315090687, "loss": 0.3004, "step": 20701 }, { "epoch": 1.6770900842514582, "grad_norm": 0.04815400764346123, "learning_rate": 0.00012905171249831228, "loss": 0.2512, "step": 20702 }, { "epoch": 1.6771710952689567, "grad_norm": 0.05232715234160423, "learning_rate": 0.00012904721184571764, "loss": 0.2482, "step": 20703 }, { "epoch": 1.677252106286455, "grad_norm": 0.058511216193437576, "learning_rate": 0.000129042711193123, "loss": 0.2865, "step": 20704 }, { "epoch": 1.6773331173039532, "grad_norm": 0.05682501196861267, "learning_rate": 0.0001290382105405284, "loss": 0.2623, "step": 20705 }, { "epoch": 1.6774141283214519, "grad_norm": 0.05637514218688011, "learning_rate": 0.00012903370988793375, "loss": 0.2702, "step": 20706 }, { "epoch": 1.6774951393389501, "grad_norm": 0.05251268669962883, "learning_rate": 0.0001290292092353391, "loss": 0.3053, "step": 20707 }, { "epoch": 1.6775761503564484, "grad_norm": 0.04838861525058746, "learning_rate": 0.00012902470858274453, "loss": 0.2284, "step": 20708 }, { "epoch": 1.6776571613739468, "grad_norm": 0.05945704132318497, "learning_rate": 0.00012902020793014989, "loss": 0.2693, "step": 20709 }, { "epoch": 1.6777381723914453, "grad_norm": 0.051180288195610046, "learning_rate": 0.00012901570727755525, "loss": 0.248, "step": 20710 }, { "epoch": 1.6778191834089435, "grad_norm": 0.053558263927698135, "learning_rate": 0.00012901120662496063, "loss": 0.282, "step": 20711 }, { "epoch": 1.677900194426442, "grad_norm": 0.05868763476610184, "learning_rate": 0.000129006705972366, "loss": 0.3009, "step": 20712 }, { "epoch": 1.6779812054439405, "grad_norm": 0.05742897465825081, "learning_rate": 0.00012900220531977135, "loss": 0.296, "step": 20713 }, { "epoch": 1.6780622164614387, "grad_norm": 0.05803952366113663, "learning_rate": 0.00012899770466717677, "loss": 0.2632, "step": 20714 }, { "epoch": 1.6781432274789372, "grad_norm": 0.053818389773368835, "learning_rate": 0.00012899320401458213, "loss": 0.2453, "step": 20715 }, { "epoch": 1.6782242384964356, "grad_norm": 0.06133642420172691, "learning_rate": 0.0001289887033619875, "loss": 0.2794, "step": 20716 }, { "epoch": 1.678305249513934, "grad_norm": 0.05301635339856148, "learning_rate": 0.00012898420270939287, "loss": 0.3004, "step": 20717 }, { "epoch": 1.6783862605314321, "grad_norm": 0.05932917818427086, "learning_rate": 0.00012897970205679823, "loss": 0.2614, "step": 20718 }, { "epoch": 1.6784672715489306, "grad_norm": 0.04853160306811333, "learning_rate": 0.0001289752014042036, "loss": 0.2496, "step": 20719 }, { "epoch": 1.678548282566429, "grad_norm": 0.058354031294584274, "learning_rate": 0.000128970700751609, "loss": 0.3044, "step": 20720 }, { "epoch": 1.6786292935839273, "grad_norm": 0.05149131640791893, "learning_rate": 0.00012896620009901437, "loss": 0.2766, "step": 20721 }, { "epoch": 1.6787103046014258, "grad_norm": 0.05151427909731865, "learning_rate": 0.00012896169944641973, "loss": 0.2734, "step": 20722 }, { "epoch": 1.6787913156189243, "grad_norm": 0.05959561467170715, "learning_rate": 0.00012895719879382512, "loss": 0.2876, "step": 20723 }, { "epoch": 1.6788723266364225, "grad_norm": 0.05226140841841698, "learning_rate": 0.00012895269814123048, "loss": 0.261, "step": 20724 }, { "epoch": 1.678953337653921, "grad_norm": 0.044787511229515076, "learning_rate": 0.00012894819748863586, "loss": 0.2482, "step": 20725 }, { "epoch": 1.6790343486714194, "grad_norm": 0.04887351393699646, "learning_rate": 0.00012894369683604125, "loss": 0.2702, "step": 20726 }, { "epoch": 1.6791153596889177, "grad_norm": 0.06846939772367477, "learning_rate": 0.0001289391961834466, "loss": 0.3236, "step": 20727 }, { "epoch": 1.679196370706416, "grad_norm": 0.04261481389403343, "learning_rate": 0.00012893469553085197, "loss": 0.2375, "step": 20728 }, { "epoch": 1.6792773817239146, "grad_norm": 0.05724846199154854, "learning_rate": 0.00012893019487825736, "loss": 0.2916, "step": 20729 }, { "epoch": 1.6793583927414129, "grad_norm": 0.053449541330337524, "learning_rate": 0.00012892569422566272, "loss": 0.2881, "step": 20730 }, { "epoch": 1.679439403758911, "grad_norm": 0.06028129532933235, "learning_rate": 0.0001289211935730681, "loss": 0.3287, "step": 20731 }, { "epoch": 1.6795204147764096, "grad_norm": 0.0621492937207222, "learning_rate": 0.0001289166929204735, "loss": 0.2622, "step": 20732 }, { "epoch": 1.679601425793908, "grad_norm": 0.060144782066345215, "learning_rate": 0.00012891219226787885, "loss": 0.3052, "step": 20733 }, { "epoch": 1.6796824368114063, "grad_norm": 0.05058223754167557, "learning_rate": 0.0001289076916152842, "loss": 0.2842, "step": 20734 }, { "epoch": 1.6797634478289047, "grad_norm": 0.04709825664758682, "learning_rate": 0.0001289031909626896, "loss": 0.2284, "step": 20735 }, { "epoch": 1.6798444588464032, "grad_norm": 0.05158427730202675, "learning_rate": 0.00012889869031009496, "loss": 0.2426, "step": 20736 }, { "epoch": 1.6799254698639015, "grad_norm": 0.07317867130041122, "learning_rate": 0.00012889418965750035, "loss": 0.2963, "step": 20737 }, { "epoch": 1.6800064808814, "grad_norm": 0.05780772864818573, "learning_rate": 0.00012888968900490573, "loss": 0.3051, "step": 20738 }, { "epoch": 1.6800874918988984, "grad_norm": 0.05150393769145012, "learning_rate": 0.0001288851883523111, "loss": 0.2998, "step": 20739 }, { "epoch": 1.6801685029163966, "grad_norm": 0.048123449087142944, "learning_rate": 0.00012888068769971645, "loss": 0.25, "step": 20740 }, { "epoch": 1.6802495139338949, "grad_norm": 0.054436638951301575, "learning_rate": 0.00012887618704712184, "loss": 0.3114, "step": 20741 }, { "epoch": 1.6803305249513933, "grad_norm": 0.05005558952689171, "learning_rate": 0.0001288716863945272, "loss": 0.2566, "step": 20742 }, { "epoch": 1.6804115359688918, "grad_norm": 0.060406871140003204, "learning_rate": 0.0001288671857419326, "loss": 0.2977, "step": 20743 }, { "epoch": 1.68049254698639, "grad_norm": 0.05145399272441864, "learning_rate": 0.00012886268508933798, "loss": 0.2792, "step": 20744 }, { "epoch": 1.6805735580038885, "grad_norm": 0.04409467428922653, "learning_rate": 0.00012885818443674334, "loss": 0.253, "step": 20745 }, { "epoch": 1.680654569021387, "grad_norm": 0.05243460834026337, "learning_rate": 0.0001288536837841487, "loss": 0.2766, "step": 20746 }, { "epoch": 1.6807355800388852, "grad_norm": 0.05014029145240784, "learning_rate": 0.00012884918313155408, "loss": 0.2773, "step": 20747 }, { "epoch": 1.6808165910563837, "grad_norm": 0.05909423530101776, "learning_rate": 0.00012884468247895944, "loss": 0.2925, "step": 20748 }, { "epoch": 1.6808976020738822, "grad_norm": 0.053994834423065186, "learning_rate": 0.00012884018182636483, "loss": 0.2547, "step": 20749 }, { "epoch": 1.6809786130913804, "grad_norm": 0.059154659509658813, "learning_rate": 0.00012883568117377022, "loss": 0.3333, "step": 20750 }, { "epoch": 1.6810596241088787, "grad_norm": 0.0678883045911789, "learning_rate": 0.00012883118052117558, "loss": 0.3044, "step": 20751 }, { "epoch": 1.6811406351263773, "grad_norm": 0.049253012984991074, "learning_rate": 0.00012882667986858094, "loss": 0.2851, "step": 20752 }, { "epoch": 1.6812216461438756, "grad_norm": 0.06529238820075989, "learning_rate": 0.00012882217921598632, "loss": 0.3103, "step": 20753 }, { "epoch": 1.6813026571613738, "grad_norm": 0.05641709268093109, "learning_rate": 0.0001288176785633917, "loss": 0.269, "step": 20754 }, { "epoch": 1.6813836681788723, "grad_norm": 0.05562283471226692, "learning_rate": 0.00012881317791079707, "loss": 0.299, "step": 20755 }, { "epoch": 1.6814646791963708, "grad_norm": 0.042370688170194626, "learning_rate": 0.00012880867725820246, "loss": 0.2386, "step": 20756 }, { "epoch": 1.681545690213869, "grad_norm": 0.05039699375629425, "learning_rate": 0.00012880417660560782, "loss": 0.2541, "step": 20757 }, { "epoch": 1.6816267012313675, "grad_norm": 0.06104245409369469, "learning_rate": 0.00012879967595301318, "loss": 0.2773, "step": 20758 }, { "epoch": 1.681707712248866, "grad_norm": 0.05837586894631386, "learning_rate": 0.00012879517530041857, "loss": 0.295, "step": 20759 }, { "epoch": 1.6817887232663642, "grad_norm": 0.046630583703517914, "learning_rate": 0.00012879067464782395, "loss": 0.264, "step": 20760 }, { "epoch": 1.6818697342838627, "grad_norm": 0.05475366860628128, "learning_rate": 0.0001287861739952293, "loss": 0.2939, "step": 20761 }, { "epoch": 1.6819507453013611, "grad_norm": 0.048055436462163925, "learning_rate": 0.0001287816733426347, "loss": 0.2697, "step": 20762 }, { "epoch": 1.6820317563188594, "grad_norm": 0.055682431906461716, "learning_rate": 0.00012877717269004006, "loss": 0.2667, "step": 20763 }, { "epoch": 1.6821127673363576, "grad_norm": 0.04282234236598015, "learning_rate": 0.00012877267203744542, "loss": 0.2542, "step": 20764 }, { "epoch": 1.682193778353856, "grad_norm": 0.05483906716108322, "learning_rate": 0.0001287681713848508, "loss": 0.3043, "step": 20765 }, { "epoch": 1.6822747893713546, "grad_norm": 0.044364769011735916, "learning_rate": 0.0001287636707322562, "loss": 0.2642, "step": 20766 }, { "epoch": 1.6823558003888528, "grad_norm": 0.06031596660614014, "learning_rate": 0.00012875917007966155, "loss": 0.295, "step": 20767 }, { "epoch": 1.6824368114063513, "grad_norm": 0.04557067155838013, "learning_rate": 0.00012875466942706694, "loss": 0.2715, "step": 20768 }, { "epoch": 1.6825178224238497, "grad_norm": 0.058976978063583374, "learning_rate": 0.0001287501687744723, "loss": 0.2877, "step": 20769 }, { "epoch": 1.682598833441348, "grad_norm": 0.0493365079164505, "learning_rate": 0.00012874566812187766, "loss": 0.2472, "step": 20770 }, { "epoch": 1.6826798444588464, "grad_norm": 0.07643086463212967, "learning_rate": 0.00012874116746928305, "loss": 0.3133, "step": 20771 }, { "epoch": 1.682760855476345, "grad_norm": 0.06138523668050766, "learning_rate": 0.00012873666681668844, "loss": 0.3085, "step": 20772 }, { "epoch": 1.6828418664938432, "grad_norm": 0.049202244728803635, "learning_rate": 0.0001287321661640938, "loss": 0.2776, "step": 20773 }, { "epoch": 1.6829228775113414, "grad_norm": 0.05692125856876373, "learning_rate": 0.00012872766551149918, "loss": 0.2871, "step": 20774 }, { "epoch": 1.68300388852884, "grad_norm": 0.04877998307347298, "learning_rate": 0.00012872316485890454, "loss": 0.291, "step": 20775 }, { "epoch": 1.6830848995463383, "grad_norm": 0.05860499292612076, "learning_rate": 0.0001287186642063099, "loss": 0.3119, "step": 20776 }, { "epoch": 1.6831659105638366, "grad_norm": 0.05695262551307678, "learning_rate": 0.00012871416355371532, "loss": 0.2751, "step": 20777 }, { "epoch": 1.683246921581335, "grad_norm": 0.0618031769990921, "learning_rate": 0.00012870966290112068, "loss": 0.3365, "step": 20778 }, { "epoch": 1.6833279325988335, "grad_norm": 0.05980094522237778, "learning_rate": 0.00012870516224852604, "loss": 0.3221, "step": 20779 }, { "epoch": 1.6834089436163318, "grad_norm": 0.04977751895785332, "learning_rate": 0.00012870066159593143, "loss": 0.3018, "step": 20780 }, { "epoch": 1.6834899546338302, "grad_norm": 0.04806521162390709, "learning_rate": 0.00012869616094333679, "loss": 0.2362, "step": 20781 }, { "epoch": 1.6835709656513287, "grad_norm": 0.06428217142820358, "learning_rate": 0.00012869166029074215, "loss": 0.2974, "step": 20782 }, { "epoch": 1.683651976668827, "grad_norm": 0.048949554562568665, "learning_rate": 0.00012868715963814756, "loss": 0.2726, "step": 20783 }, { "epoch": 1.6837329876863252, "grad_norm": 0.04711005091667175, "learning_rate": 0.00012868265898555292, "loss": 0.2634, "step": 20784 }, { "epoch": 1.6838139987038239, "grad_norm": 0.05052475258708, "learning_rate": 0.00012867815833295828, "loss": 0.2577, "step": 20785 }, { "epoch": 1.6838950097213221, "grad_norm": 0.062074240297079086, "learning_rate": 0.00012867365768036367, "loss": 0.3477, "step": 20786 }, { "epoch": 1.6839760207388204, "grad_norm": 0.05127348750829697, "learning_rate": 0.00012866915702776903, "loss": 0.3066, "step": 20787 }, { "epoch": 1.6840570317563188, "grad_norm": 0.0517883263528347, "learning_rate": 0.0001286646563751744, "loss": 0.2778, "step": 20788 }, { "epoch": 1.6841380427738173, "grad_norm": 0.050224028527736664, "learning_rate": 0.0001286601557225798, "loss": 0.2827, "step": 20789 }, { "epoch": 1.6842190537913155, "grad_norm": 0.05457613989710808, "learning_rate": 0.00012865565506998516, "loss": 0.3157, "step": 20790 }, { "epoch": 1.684300064808814, "grad_norm": 0.05149427428841591, "learning_rate": 0.00012865115441739052, "loss": 0.2896, "step": 20791 }, { "epoch": 1.6843810758263125, "grad_norm": 0.050849854946136475, "learning_rate": 0.0001286466537647959, "loss": 0.311, "step": 20792 }, { "epoch": 1.6844620868438107, "grad_norm": 0.05668675899505615, "learning_rate": 0.00012864215311220127, "loss": 0.326, "step": 20793 }, { "epoch": 1.6845430978613092, "grad_norm": 0.05928093567490578, "learning_rate": 0.00012863765245960663, "loss": 0.3129, "step": 20794 }, { "epoch": 1.6846241088788076, "grad_norm": 0.049157604575157166, "learning_rate": 0.00012863315180701204, "loss": 0.2941, "step": 20795 }, { "epoch": 1.684705119896306, "grad_norm": 0.05986670032143593, "learning_rate": 0.0001286286511544174, "loss": 0.282, "step": 20796 }, { "epoch": 1.6847861309138041, "grad_norm": 0.05676200985908508, "learning_rate": 0.00012862415050182276, "loss": 0.2784, "step": 20797 }, { "epoch": 1.6848671419313026, "grad_norm": 0.0540945827960968, "learning_rate": 0.00012861964984922815, "loss": 0.2402, "step": 20798 }, { "epoch": 1.684948152948801, "grad_norm": 0.055026695132255554, "learning_rate": 0.0001286151491966335, "loss": 0.2869, "step": 20799 }, { "epoch": 1.6850291639662993, "grad_norm": 0.057930923998355865, "learning_rate": 0.00012861064854403887, "loss": 0.2931, "step": 20800 }, { "epoch": 1.6851101749837978, "grad_norm": 0.05763215571641922, "learning_rate": 0.00012860614789144428, "loss": 0.2743, "step": 20801 }, { "epoch": 1.6851911860012962, "grad_norm": 0.061730217188596725, "learning_rate": 0.00012860164723884964, "loss": 0.2572, "step": 20802 }, { "epoch": 1.6852721970187945, "grad_norm": 0.05967223271727562, "learning_rate": 0.000128597146586255, "loss": 0.2968, "step": 20803 }, { "epoch": 1.685353208036293, "grad_norm": 0.06001397594809532, "learning_rate": 0.0001285926459336604, "loss": 0.3049, "step": 20804 }, { "epoch": 1.6854342190537914, "grad_norm": 0.05998661741614342, "learning_rate": 0.00012858814528106575, "loss": 0.2757, "step": 20805 }, { "epoch": 1.6855152300712897, "grad_norm": 0.05021259933710098, "learning_rate": 0.00012858364462847114, "loss": 0.2494, "step": 20806 }, { "epoch": 1.685596241088788, "grad_norm": 0.06738859415054321, "learning_rate": 0.00012857914397587653, "loss": 0.2915, "step": 20807 }, { "epoch": 1.6856772521062866, "grad_norm": 0.04895463213324547, "learning_rate": 0.00012857464332328189, "loss": 0.2639, "step": 20808 }, { "epoch": 1.6857582631237849, "grad_norm": 0.05599305033683777, "learning_rate": 0.00012857014267068725, "loss": 0.2374, "step": 20809 }, { "epoch": 1.685839274141283, "grad_norm": 0.07651516050100327, "learning_rate": 0.00012856564201809263, "loss": 0.2839, "step": 20810 }, { "epoch": 1.6859202851587816, "grad_norm": 0.046530772000551224, "learning_rate": 0.000128561141365498, "loss": 0.2518, "step": 20811 }, { "epoch": 1.68600129617628, "grad_norm": 0.06377851963043213, "learning_rate": 0.00012855664071290338, "loss": 0.2948, "step": 20812 }, { "epoch": 1.6860823071937783, "grad_norm": 0.04535413905978203, "learning_rate": 0.00012855214006030877, "loss": 0.2587, "step": 20813 }, { "epoch": 1.6861633182112767, "grad_norm": 0.06376946717500687, "learning_rate": 0.00012854763940771413, "loss": 0.2953, "step": 20814 }, { "epoch": 1.6862443292287752, "grad_norm": 0.061817001551389694, "learning_rate": 0.0001285431387551195, "loss": 0.2642, "step": 20815 }, { "epoch": 1.6863253402462735, "grad_norm": 0.05439610034227371, "learning_rate": 0.00012853863810252487, "loss": 0.283, "step": 20816 }, { "epoch": 1.686406351263772, "grad_norm": 0.05513634905219078, "learning_rate": 0.00012853413744993023, "loss": 0.2958, "step": 20817 }, { "epoch": 1.6864873622812704, "grad_norm": 0.05512907728552818, "learning_rate": 0.00012852963679733562, "loss": 0.2607, "step": 20818 }, { "epoch": 1.6865683732987686, "grad_norm": 0.046912264078855515, "learning_rate": 0.000128525136144741, "loss": 0.2632, "step": 20819 }, { "epoch": 1.6866493843162669, "grad_norm": 0.06997843831777573, "learning_rate": 0.00012852063549214637, "loss": 0.3013, "step": 20820 }, { "epoch": 1.6867303953337653, "grad_norm": 0.0577542781829834, "learning_rate": 0.00012851613483955173, "loss": 0.3108, "step": 20821 }, { "epoch": 1.6868114063512638, "grad_norm": 0.057242099195718765, "learning_rate": 0.00012851163418695712, "loss": 0.2931, "step": 20822 }, { "epoch": 1.686892417368762, "grad_norm": 0.05348726734519005, "learning_rate": 0.00012850713353436248, "loss": 0.2881, "step": 20823 }, { "epoch": 1.6869734283862605, "grad_norm": 0.05593732371926308, "learning_rate": 0.00012850263288176786, "loss": 0.2489, "step": 20824 }, { "epoch": 1.687054439403759, "grad_norm": 0.05484984442591667, "learning_rate": 0.00012849813222917325, "loss": 0.2887, "step": 20825 }, { "epoch": 1.6871354504212572, "grad_norm": 0.0500054694712162, "learning_rate": 0.0001284936315765786, "loss": 0.2889, "step": 20826 }, { "epoch": 1.6872164614387557, "grad_norm": 0.04467097297310829, "learning_rate": 0.00012848913092398397, "loss": 0.2735, "step": 20827 }, { "epoch": 1.6872974724562542, "grad_norm": 0.05153514817357063, "learning_rate": 0.00012848463027138936, "loss": 0.2521, "step": 20828 }, { "epoch": 1.6873784834737524, "grad_norm": 0.04148275777697563, "learning_rate": 0.00012848012961879475, "loss": 0.2359, "step": 20829 }, { "epoch": 1.6874594944912507, "grad_norm": 0.057666435837745667, "learning_rate": 0.0001284756289662001, "loss": 0.2951, "step": 20830 }, { "epoch": 1.6875405055087493, "grad_norm": 0.05120784044265747, "learning_rate": 0.0001284711283136055, "loss": 0.2797, "step": 20831 }, { "epoch": 1.6876215165262476, "grad_norm": 0.04996515065431595, "learning_rate": 0.00012846662766101085, "loss": 0.2624, "step": 20832 }, { "epoch": 1.6877025275437458, "grad_norm": 0.07200618833303452, "learning_rate": 0.0001284621270084162, "loss": 0.296, "step": 20833 }, { "epoch": 1.6877835385612443, "grad_norm": 0.06274188309907913, "learning_rate": 0.0001284576263558216, "loss": 0.3114, "step": 20834 }, { "epoch": 1.6878645495787428, "grad_norm": 0.058538395911455154, "learning_rate": 0.000128453125703227, "loss": 0.3231, "step": 20835 }, { "epoch": 1.687945560596241, "grad_norm": 0.05318986251950264, "learning_rate": 0.00012844862505063235, "loss": 0.2907, "step": 20836 }, { "epoch": 1.6880265716137395, "grad_norm": 0.04951918497681618, "learning_rate": 0.00012844412439803773, "loss": 0.2628, "step": 20837 }, { "epoch": 1.688107582631238, "grad_norm": 0.03856775164604187, "learning_rate": 0.0001284396237454431, "loss": 0.232, "step": 20838 }, { "epoch": 1.6881885936487362, "grad_norm": 0.051587074995040894, "learning_rate": 0.00012843512309284845, "loss": 0.3128, "step": 20839 }, { "epoch": 1.6882696046662347, "grad_norm": 0.04774363711476326, "learning_rate": 0.00012843062244025384, "loss": 0.2544, "step": 20840 }, { "epoch": 1.6883506156837331, "grad_norm": 0.05891667678952217, "learning_rate": 0.00012842612178765923, "loss": 0.3391, "step": 20841 }, { "epoch": 1.6884316267012314, "grad_norm": 0.04155726358294487, "learning_rate": 0.0001284216211350646, "loss": 0.2679, "step": 20842 }, { "epoch": 1.6885126377187296, "grad_norm": 0.060029737651348114, "learning_rate": 0.00012841712048246998, "loss": 0.2772, "step": 20843 }, { "epoch": 1.688593648736228, "grad_norm": 0.05514264106750488, "learning_rate": 0.00012841261982987534, "loss": 0.3129, "step": 20844 }, { "epoch": 1.6886746597537265, "grad_norm": 0.05384942144155502, "learning_rate": 0.0001284081191772807, "loss": 0.283, "step": 20845 }, { "epoch": 1.6887556707712248, "grad_norm": 0.043618954718112946, "learning_rate": 0.00012840361852468608, "loss": 0.2453, "step": 20846 }, { "epoch": 1.6888366817887233, "grad_norm": 0.05581897497177124, "learning_rate": 0.00012839911787209147, "loss": 0.2889, "step": 20847 }, { "epoch": 1.6889176928062217, "grad_norm": 0.05275612697005272, "learning_rate": 0.00012839461721949683, "loss": 0.2718, "step": 20848 }, { "epoch": 1.68899870382372, "grad_norm": 0.045593030750751495, "learning_rate": 0.00012839011656690222, "loss": 0.2462, "step": 20849 }, { "epoch": 1.6890797148412184, "grad_norm": 0.05088045448064804, "learning_rate": 0.00012838561591430758, "loss": 0.2864, "step": 20850 }, { "epoch": 1.689160725858717, "grad_norm": 0.056569818407297134, "learning_rate": 0.00012838111526171294, "loss": 0.2831, "step": 20851 }, { "epoch": 1.6892417368762151, "grad_norm": 0.06638054549694061, "learning_rate": 0.00012837661460911832, "loss": 0.2928, "step": 20852 }, { "epoch": 1.6893227478937134, "grad_norm": 0.05728859081864357, "learning_rate": 0.0001283721139565237, "loss": 0.3075, "step": 20853 }, { "epoch": 1.689403758911212, "grad_norm": 0.04625353589653969, "learning_rate": 0.00012836761330392907, "loss": 0.2441, "step": 20854 }, { "epoch": 1.6894847699287103, "grad_norm": 0.0503210686147213, "learning_rate": 0.00012836311265133446, "loss": 0.2689, "step": 20855 }, { "epoch": 1.6895657809462086, "grad_norm": 0.054897889494895935, "learning_rate": 0.00012835861199873982, "loss": 0.2731, "step": 20856 }, { "epoch": 1.689646791963707, "grad_norm": 0.04353231564164162, "learning_rate": 0.00012835411134614518, "loss": 0.2226, "step": 20857 }, { "epoch": 1.6897278029812055, "grad_norm": 0.07019881904125214, "learning_rate": 0.0001283496106935506, "loss": 0.308, "step": 20858 }, { "epoch": 1.6898088139987038, "grad_norm": 0.061276018619537354, "learning_rate": 0.00012834511004095595, "loss": 0.2721, "step": 20859 }, { "epoch": 1.6898898250162022, "grad_norm": 0.05636150389909744, "learning_rate": 0.0001283406093883613, "loss": 0.3094, "step": 20860 }, { "epoch": 1.6899708360337007, "grad_norm": 0.05796360224485397, "learning_rate": 0.0001283361087357667, "loss": 0.3013, "step": 20861 }, { "epoch": 1.690051847051199, "grad_norm": 0.06425435841083527, "learning_rate": 0.00012833160808317206, "loss": 0.2917, "step": 20862 }, { "epoch": 1.6901328580686974, "grad_norm": 0.046928420662879944, "learning_rate": 0.00012832710743057742, "loss": 0.254, "step": 20863 }, { "epoch": 1.6902138690861959, "grad_norm": 0.05220559984445572, "learning_rate": 0.00012832260677798283, "loss": 0.2858, "step": 20864 }, { "epoch": 1.690294880103694, "grad_norm": 0.04551084712147713, "learning_rate": 0.0001283181061253882, "loss": 0.2597, "step": 20865 }, { "epoch": 1.6903758911211924, "grad_norm": 0.05507994815707207, "learning_rate": 0.00012831360547279356, "loss": 0.2619, "step": 20866 }, { "epoch": 1.6904569021386908, "grad_norm": 0.06035435572266579, "learning_rate": 0.00012830910482019894, "loss": 0.3125, "step": 20867 }, { "epoch": 1.6905379131561893, "grad_norm": 0.05258706584572792, "learning_rate": 0.0001283046041676043, "loss": 0.2726, "step": 20868 }, { "epoch": 1.6906189241736875, "grad_norm": 0.05856553837656975, "learning_rate": 0.00012830010351500966, "loss": 0.2849, "step": 20869 }, { "epoch": 1.690699935191186, "grad_norm": 0.05880199745297432, "learning_rate": 0.00012829560286241508, "loss": 0.2828, "step": 20870 }, { "epoch": 1.6907809462086845, "grad_norm": 0.049865689128637314, "learning_rate": 0.00012829110220982044, "loss": 0.2599, "step": 20871 }, { "epoch": 1.6908619572261827, "grad_norm": 0.054322708398103714, "learning_rate": 0.0001282866015572258, "loss": 0.2874, "step": 20872 }, { "epoch": 1.6909429682436812, "grad_norm": 0.05771753937005997, "learning_rate": 0.00012828210090463118, "loss": 0.3064, "step": 20873 }, { "epoch": 1.6910239792611796, "grad_norm": 0.057738374918699265, "learning_rate": 0.00012827760025203654, "loss": 0.3114, "step": 20874 }, { "epoch": 1.6911049902786779, "grad_norm": 0.0632070004940033, "learning_rate": 0.0001282730995994419, "loss": 0.2768, "step": 20875 }, { "epoch": 1.6911860012961761, "grad_norm": 0.05157562717795372, "learning_rate": 0.00012826859894684732, "loss": 0.2667, "step": 20876 }, { "epoch": 1.6912670123136748, "grad_norm": 0.06026139110326767, "learning_rate": 0.00012826409829425268, "loss": 0.2678, "step": 20877 }, { "epoch": 1.691348023331173, "grad_norm": 0.05859403684735298, "learning_rate": 0.00012825959764165804, "loss": 0.313, "step": 20878 }, { "epoch": 1.6914290343486713, "grad_norm": 0.06271016597747803, "learning_rate": 0.00012825509698906343, "loss": 0.2845, "step": 20879 }, { "epoch": 1.6915100453661698, "grad_norm": 0.07072103023529053, "learning_rate": 0.00012825059633646879, "loss": 0.266, "step": 20880 }, { "epoch": 1.6915910563836682, "grad_norm": 0.05164683610200882, "learning_rate": 0.00012824609568387417, "loss": 0.2517, "step": 20881 }, { "epoch": 1.6916720674011665, "grad_norm": 0.06269783526659012, "learning_rate": 0.00012824159503127956, "loss": 0.298, "step": 20882 }, { "epoch": 1.691753078418665, "grad_norm": 0.06345339119434357, "learning_rate": 0.00012823709437868492, "loss": 0.2779, "step": 20883 }, { "epoch": 1.6918340894361634, "grad_norm": 0.05781438946723938, "learning_rate": 0.00012823259372609028, "loss": 0.267, "step": 20884 }, { "epoch": 1.6919151004536617, "grad_norm": 0.054122913628816605, "learning_rate": 0.00012822809307349567, "loss": 0.297, "step": 20885 }, { "epoch": 1.69199611147116, "grad_norm": 0.0550195574760437, "learning_rate": 0.00012822359242090103, "loss": 0.2628, "step": 20886 }, { "epoch": 1.6920771224886586, "grad_norm": 0.06621591001749039, "learning_rate": 0.00012821909176830641, "loss": 0.294, "step": 20887 }, { "epoch": 1.6921581335061568, "grad_norm": 0.04980989173054695, "learning_rate": 0.0001282145911157118, "loss": 0.3155, "step": 20888 }, { "epoch": 1.692239144523655, "grad_norm": 0.04660594090819359, "learning_rate": 0.00012821009046311716, "loss": 0.256, "step": 20889 }, { "epoch": 1.6923201555411536, "grad_norm": 0.05284009501338005, "learning_rate": 0.00012820558981052252, "loss": 0.2647, "step": 20890 }, { "epoch": 1.692401166558652, "grad_norm": 0.042124852538108826, "learning_rate": 0.0001282010891579279, "loss": 0.2752, "step": 20891 }, { "epoch": 1.6924821775761503, "grad_norm": 0.05570278316736221, "learning_rate": 0.00012819658850533327, "loss": 0.2909, "step": 20892 }, { "epoch": 1.6925631885936487, "grad_norm": 0.06244548037648201, "learning_rate": 0.00012819208785273866, "loss": 0.3273, "step": 20893 }, { "epoch": 1.6926441996111472, "grad_norm": 0.050186578184366226, "learning_rate": 0.00012818758720014404, "loss": 0.2584, "step": 20894 }, { "epoch": 1.6927252106286454, "grad_norm": 0.055262841284275055, "learning_rate": 0.0001281830865475494, "loss": 0.307, "step": 20895 }, { "epoch": 1.692806221646144, "grad_norm": 0.051545362919569016, "learning_rate": 0.00012817858589495476, "loss": 0.2571, "step": 20896 }, { "epoch": 1.6928872326636424, "grad_norm": 0.04908451810479164, "learning_rate": 0.00012817408524236015, "loss": 0.2695, "step": 20897 }, { "epoch": 1.6929682436811406, "grad_norm": 0.07136844098567963, "learning_rate": 0.0001281695845897655, "loss": 0.3325, "step": 20898 }, { "epoch": 1.6930492546986389, "grad_norm": 0.057610828429460526, "learning_rate": 0.0001281650839371709, "loss": 0.225, "step": 20899 }, { "epoch": 1.6931302657161373, "grad_norm": 0.05767635256052017, "learning_rate": 0.00012816058328457628, "loss": 0.2623, "step": 20900 }, { "epoch": 1.6932112767336358, "grad_norm": 0.04903646931052208, "learning_rate": 0.00012815608263198164, "loss": 0.2907, "step": 20901 }, { "epoch": 1.693292287751134, "grad_norm": 0.04549427703022957, "learning_rate": 0.000128151581979387, "loss": 0.2858, "step": 20902 }, { "epoch": 1.6933732987686325, "grad_norm": 0.06195332109928131, "learning_rate": 0.0001281470813267924, "loss": 0.2713, "step": 20903 }, { "epoch": 1.693454309786131, "grad_norm": 0.04301120340824127, "learning_rate": 0.00012814258067419775, "loss": 0.269, "step": 20904 }, { "epoch": 1.6935353208036292, "grad_norm": 0.05006838217377663, "learning_rate": 0.00012813808002160314, "loss": 0.2541, "step": 20905 }, { "epoch": 1.6936163318211277, "grad_norm": 0.06199532374739647, "learning_rate": 0.00012813357936900853, "loss": 0.2884, "step": 20906 }, { "epoch": 1.6936973428386262, "grad_norm": 0.07248139381408691, "learning_rate": 0.00012812907871641389, "loss": 0.3172, "step": 20907 }, { "epoch": 1.6937783538561244, "grad_norm": 0.05518472567200661, "learning_rate": 0.00012812457806381925, "loss": 0.3236, "step": 20908 }, { "epoch": 1.6938593648736227, "grad_norm": 0.05791405588388443, "learning_rate": 0.00012812007741122463, "loss": 0.2963, "step": 20909 }, { "epoch": 1.6939403758911213, "grad_norm": 0.06537389755249023, "learning_rate": 0.00012811557675863002, "loss": 0.3128, "step": 20910 }, { "epoch": 1.6940213869086196, "grad_norm": 0.05233273655176163, "learning_rate": 0.00012811107610603538, "loss": 0.2617, "step": 20911 }, { "epoch": 1.6941023979261178, "grad_norm": 0.05526323616504669, "learning_rate": 0.00012810657545344077, "loss": 0.2473, "step": 20912 }, { "epoch": 1.6941834089436163, "grad_norm": 0.05334017053246498, "learning_rate": 0.00012810207480084613, "loss": 0.291, "step": 20913 }, { "epoch": 1.6942644199611148, "grad_norm": 0.05555533617734909, "learning_rate": 0.0001280975741482515, "loss": 0.2981, "step": 20914 }, { "epoch": 1.694345430978613, "grad_norm": 0.04688166454434395, "learning_rate": 0.00012809307349565688, "loss": 0.2667, "step": 20915 }, { "epoch": 1.6944264419961115, "grad_norm": 0.054582808166742325, "learning_rate": 0.00012808857284306226, "loss": 0.2771, "step": 20916 }, { "epoch": 1.69450745301361, "grad_norm": 0.05647823214530945, "learning_rate": 0.00012808407219046762, "loss": 0.2949, "step": 20917 }, { "epoch": 1.6945884640311082, "grad_norm": 0.05077538266777992, "learning_rate": 0.000128079571537873, "loss": 0.2537, "step": 20918 }, { "epoch": 1.6946694750486067, "grad_norm": 0.059872034937143326, "learning_rate": 0.00012807507088527837, "loss": 0.2731, "step": 20919 }, { "epoch": 1.6947504860661051, "grad_norm": 0.05296333506703377, "learning_rate": 0.00012807057023268373, "loss": 0.2547, "step": 20920 }, { "epoch": 1.6948314970836034, "grad_norm": 0.0454367958009243, "learning_rate": 0.00012806606958008912, "loss": 0.2442, "step": 20921 }, { "epoch": 1.6949125081011016, "grad_norm": 0.05745544284582138, "learning_rate": 0.0001280615689274945, "loss": 0.2773, "step": 20922 }, { "epoch": 1.6949935191186, "grad_norm": 0.05502451956272125, "learning_rate": 0.00012805706827489986, "loss": 0.2551, "step": 20923 }, { "epoch": 1.6950745301360985, "grad_norm": 0.0566684864461422, "learning_rate": 0.00012805256762230525, "loss": 0.3057, "step": 20924 }, { "epoch": 1.6951555411535968, "grad_norm": 0.0476890504360199, "learning_rate": 0.0001280480669697106, "loss": 0.3012, "step": 20925 }, { "epoch": 1.6952365521710953, "grad_norm": 0.05982305482029915, "learning_rate": 0.00012804356631711597, "loss": 0.304, "step": 20926 }, { "epoch": 1.6953175631885937, "grad_norm": 0.05153541639447212, "learning_rate": 0.00012803906566452136, "loss": 0.2715, "step": 20927 }, { "epoch": 1.695398574206092, "grad_norm": 0.05451453849673271, "learning_rate": 0.00012803456501192675, "loss": 0.2833, "step": 20928 }, { "epoch": 1.6954795852235904, "grad_norm": 0.056824252009391785, "learning_rate": 0.0001280300643593321, "loss": 0.2743, "step": 20929 }, { "epoch": 1.695560596241089, "grad_norm": 0.05149870365858078, "learning_rate": 0.0001280255637067375, "loss": 0.2686, "step": 20930 }, { "epoch": 1.6956416072585871, "grad_norm": 0.06263695657253265, "learning_rate": 0.00012802106305414285, "loss": 0.2669, "step": 20931 }, { "epoch": 1.6957226182760854, "grad_norm": 0.05872713401913643, "learning_rate": 0.0001280165624015482, "loss": 0.3088, "step": 20932 }, { "epoch": 1.695803629293584, "grad_norm": 0.05683526769280434, "learning_rate": 0.0001280120617489536, "loss": 0.3005, "step": 20933 }, { "epoch": 1.6958846403110823, "grad_norm": 0.04795790836215019, "learning_rate": 0.000128007561096359, "loss": 0.2629, "step": 20934 }, { "epoch": 1.6959656513285806, "grad_norm": 0.05641131475567818, "learning_rate": 0.00012800306044376435, "loss": 0.2666, "step": 20935 }, { "epoch": 1.696046662346079, "grad_norm": 0.05589877441525459, "learning_rate": 0.00012799855979116973, "loss": 0.2851, "step": 20936 }, { "epoch": 1.6961276733635775, "grad_norm": 0.0438174232840538, "learning_rate": 0.0001279940591385751, "loss": 0.2198, "step": 20937 }, { "epoch": 1.6962086843810757, "grad_norm": 0.05916321277618408, "learning_rate": 0.00012798955848598045, "loss": 0.2808, "step": 20938 }, { "epoch": 1.6962896953985742, "grad_norm": 0.04272516444325447, "learning_rate": 0.00012798505783338587, "loss": 0.2337, "step": 20939 }, { "epoch": 1.6963707064160727, "grad_norm": 0.05487491562962532, "learning_rate": 0.00012798055718079123, "loss": 0.2671, "step": 20940 }, { "epoch": 1.696451717433571, "grad_norm": 0.042466625571250916, "learning_rate": 0.0001279760565281966, "loss": 0.2555, "step": 20941 }, { "epoch": 1.6965327284510694, "grad_norm": 0.05536244064569473, "learning_rate": 0.00012797155587560198, "loss": 0.2683, "step": 20942 }, { "epoch": 1.6966137394685679, "grad_norm": 0.047094427049160004, "learning_rate": 0.00012796705522300734, "loss": 0.2975, "step": 20943 }, { "epoch": 1.696694750486066, "grad_norm": 0.05219883844256401, "learning_rate": 0.0001279625545704127, "loss": 0.2904, "step": 20944 }, { "epoch": 1.6967757615035644, "grad_norm": 0.04873950034379959, "learning_rate": 0.0001279580539178181, "loss": 0.2689, "step": 20945 }, { "epoch": 1.6968567725210628, "grad_norm": 0.04848574101924896, "learning_rate": 0.00012795355326522347, "loss": 0.2856, "step": 20946 }, { "epoch": 1.6969377835385613, "grad_norm": 0.047113820910453796, "learning_rate": 0.00012794905261262883, "loss": 0.2732, "step": 20947 }, { "epoch": 1.6970187945560595, "grad_norm": 0.05355425179004669, "learning_rate": 0.00012794455196003422, "loss": 0.2782, "step": 20948 }, { "epoch": 1.697099805573558, "grad_norm": 0.05074724927544594, "learning_rate": 0.00012794005130743958, "loss": 0.2804, "step": 20949 }, { "epoch": 1.6971808165910565, "grad_norm": 0.0466444194316864, "learning_rate": 0.00012793555065484494, "loss": 0.2729, "step": 20950 }, { "epoch": 1.6972618276085547, "grad_norm": 0.05626093968749046, "learning_rate": 0.00012793105000225035, "loss": 0.2727, "step": 20951 }, { "epoch": 1.6973428386260532, "grad_norm": 0.05510025471448898, "learning_rate": 0.0001279265493496557, "loss": 0.2864, "step": 20952 }, { "epoch": 1.6974238496435516, "grad_norm": 0.0634729415178299, "learning_rate": 0.00012792204869706107, "loss": 0.3169, "step": 20953 }, { "epoch": 1.6975048606610499, "grad_norm": 0.05881008878350258, "learning_rate": 0.00012791754804446646, "loss": 0.2871, "step": 20954 }, { "epoch": 1.6975858716785481, "grad_norm": 0.05638792738318443, "learning_rate": 0.00012791304739187182, "loss": 0.3048, "step": 20955 }, { "epoch": 1.6976668826960468, "grad_norm": 0.05283451825380325, "learning_rate": 0.00012790854673927718, "loss": 0.2665, "step": 20956 }, { "epoch": 1.697747893713545, "grad_norm": 0.05943385511636734, "learning_rate": 0.0001279040460866826, "loss": 0.2945, "step": 20957 }, { "epoch": 1.6978289047310433, "grad_norm": 0.050613295286893845, "learning_rate": 0.00012789954543408795, "loss": 0.2932, "step": 20958 }, { "epoch": 1.6979099157485418, "grad_norm": 0.0523209348320961, "learning_rate": 0.00012789504478149331, "loss": 0.2743, "step": 20959 }, { "epoch": 1.6979909267660402, "grad_norm": 0.0570591501891613, "learning_rate": 0.0001278905441288987, "loss": 0.3079, "step": 20960 }, { "epoch": 1.6980719377835385, "grad_norm": 0.05599812790751457, "learning_rate": 0.00012788604347630406, "loss": 0.2843, "step": 20961 }, { "epoch": 1.698152948801037, "grad_norm": 0.05278458818793297, "learning_rate": 0.00012788154282370945, "loss": 0.2517, "step": 20962 }, { "epoch": 1.6982339598185354, "grad_norm": 0.051319461315870285, "learning_rate": 0.00012787704217111484, "loss": 0.2643, "step": 20963 }, { "epoch": 1.6983149708360337, "grad_norm": 0.05669070780277252, "learning_rate": 0.0001278725415185202, "loss": 0.2677, "step": 20964 }, { "epoch": 1.6983959818535321, "grad_norm": 0.046938829123973846, "learning_rate": 0.00012786804086592556, "loss": 0.2722, "step": 20965 }, { "epoch": 1.6984769928710306, "grad_norm": 0.05684944614768028, "learning_rate": 0.00012786354021333094, "loss": 0.2751, "step": 20966 }, { "epoch": 1.6985580038885288, "grad_norm": 0.05575013533234596, "learning_rate": 0.0001278590395607363, "loss": 0.2971, "step": 20967 }, { "epoch": 1.698639014906027, "grad_norm": 0.052737586200237274, "learning_rate": 0.0001278545389081417, "loss": 0.2252, "step": 20968 }, { "epoch": 1.6987200259235256, "grad_norm": 0.05768506973981857, "learning_rate": 0.00012785003825554708, "loss": 0.261, "step": 20969 }, { "epoch": 1.698801036941024, "grad_norm": 0.04958980903029442, "learning_rate": 0.00012784553760295244, "loss": 0.2788, "step": 20970 }, { "epoch": 1.6988820479585223, "grad_norm": 0.05882476642727852, "learning_rate": 0.0001278410369503578, "loss": 0.2598, "step": 20971 }, { "epoch": 1.6989630589760207, "grad_norm": 0.052960388362407684, "learning_rate": 0.00012783653629776318, "loss": 0.2885, "step": 20972 }, { "epoch": 1.6990440699935192, "grad_norm": 0.06784273684024811, "learning_rate": 0.00012783203564516854, "loss": 0.3332, "step": 20973 }, { "epoch": 1.6991250810110174, "grad_norm": 0.05525969713926315, "learning_rate": 0.00012782753499257393, "loss": 0.3079, "step": 20974 }, { "epoch": 1.699206092028516, "grad_norm": 0.06042307987809181, "learning_rate": 0.00012782303433997932, "loss": 0.327, "step": 20975 }, { "epoch": 1.6992871030460144, "grad_norm": 0.0597507506608963, "learning_rate": 0.00012781853368738468, "loss": 0.2855, "step": 20976 }, { "epoch": 1.6993681140635126, "grad_norm": 0.05462580919265747, "learning_rate": 0.00012781403303479004, "loss": 0.2772, "step": 20977 }, { "epoch": 1.6994491250810109, "grad_norm": 0.04822581261396408, "learning_rate": 0.00012780953238219543, "loss": 0.2503, "step": 20978 }, { "epoch": 1.6995301360985096, "grad_norm": 0.061402879655361176, "learning_rate": 0.00012780503172960079, "loss": 0.3026, "step": 20979 }, { "epoch": 1.6996111471160078, "grad_norm": 0.057321321219205856, "learning_rate": 0.00012780053107700617, "loss": 0.264, "step": 20980 }, { "epoch": 1.699692158133506, "grad_norm": 0.05499900504946709, "learning_rate": 0.00012779603042441156, "loss": 0.288, "step": 20981 }, { "epoch": 1.6997731691510045, "grad_norm": 0.059335388243198395, "learning_rate": 0.00012779152977181692, "loss": 0.2493, "step": 20982 }, { "epoch": 1.699854180168503, "grad_norm": 0.04036781191825867, "learning_rate": 0.00012778702911922228, "loss": 0.2461, "step": 20983 }, { "epoch": 1.6999351911860012, "grad_norm": 0.057941023260354996, "learning_rate": 0.00012778252846662767, "loss": 0.2883, "step": 20984 }, { "epoch": 1.7000162022034997, "grad_norm": 0.050086475908756256, "learning_rate": 0.00012777802781403303, "loss": 0.26, "step": 20985 }, { "epoch": 1.7000972132209982, "grad_norm": 0.06684888154268265, "learning_rate": 0.00012777352716143841, "loss": 0.3185, "step": 20986 }, { "epoch": 1.7001782242384964, "grad_norm": 0.05445714667439461, "learning_rate": 0.0001277690265088438, "loss": 0.2718, "step": 20987 }, { "epoch": 1.7002592352559946, "grad_norm": 0.054723452776670456, "learning_rate": 0.00012776452585624916, "loss": 0.2237, "step": 20988 }, { "epoch": 1.7003402462734933, "grad_norm": 0.06936323642730713, "learning_rate": 0.00012776002520365452, "loss": 0.3069, "step": 20989 }, { "epoch": 1.7004212572909916, "grad_norm": 0.059990838170051575, "learning_rate": 0.0001277555245510599, "loss": 0.2868, "step": 20990 }, { "epoch": 1.7005022683084898, "grad_norm": 0.05394691601395607, "learning_rate": 0.0001277510238984653, "loss": 0.2586, "step": 20991 }, { "epoch": 1.7005832793259883, "grad_norm": 0.05593397095799446, "learning_rate": 0.00012774652324587066, "loss": 0.2665, "step": 20992 }, { "epoch": 1.7006642903434868, "grad_norm": 0.052219316363334656, "learning_rate": 0.00012774202259327604, "loss": 0.2339, "step": 20993 }, { "epoch": 1.700745301360985, "grad_norm": 0.06903288513422012, "learning_rate": 0.0001277375219406814, "loss": 0.2955, "step": 20994 }, { "epoch": 1.7008263123784835, "grad_norm": 0.06040949001908302, "learning_rate": 0.00012773302128808676, "loss": 0.3226, "step": 20995 }, { "epoch": 1.700907323395982, "grad_norm": 0.05372748523950577, "learning_rate": 0.00012772852063549215, "loss": 0.2893, "step": 20996 }, { "epoch": 1.7009883344134802, "grad_norm": 0.05498870834708214, "learning_rate": 0.00012772401998289754, "loss": 0.2618, "step": 20997 }, { "epoch": 1.7010693454309787, "grad_norm": 0.0433398000895977, "learning_rate": 0.0001277195193303029, "loss": 0.2516, "step": 20998 }, { "epoch": 1.7011503564484771, "grad_norm": 0.042253199964761734, "learning_rate": 0.00012771501867770828, "loss": 0.2629, "step": 20999 }, { "epoch": 1.7012313674659754, "grad_norm": 0.060787223279476166, "learning_rate": 0.00012771051802511364, "loss": 0.3057, "step": 21000 }, { "epoch": 1.7013123784834736, "grad_norm": 0.08020738512277603, "learning_rate": 0.000127706017372519, "loss": 0.3053, "step": 21001 }, { "epoch": 1.7013933895009723, "grad_norm": 0.058047182857990265, "learning_rate": 0.0001277015167199244, "loss": 0.2418, "step": 21002 }, { "epoch": 1.7014744005184705, "grad_norm": 0.052321989089250565, "learning_rate": 0.00012769701606732978, "loss": 0.2736, "step": 21003 }, { "epoch": 1.7015554115359688, "grad_norm": 0.052635811269283295, "learning_rate": 0.00012769251541473514, "loss": 0.3232, "step": 21004 }, { "epoch": 1.7016364225534673, "grad_norm": 0.050415851175785065, "learning_rate": 0.00012768801476214053, "loss": 0.2611, "step": 21005 }, { "epoch": 1.7017174335709657, "grad_norm": 0.05760395526885986, "learning_rate": 0.0001276835141095459, "loss": 0.2937, "step": 21006 }, { "epoch": 1.701798444588464, "grad_norm": 0.05417398735880852, "learning_rate": 0.00012767901345695125, "loss": 0.2681, "step": 21007 }, { "epoch": 1.7018794556059624, "grad_norm": 0.05250127241015434, "learning_rate": 0.00012767451280435663, "loss": 0.2809, "step": 21008 }, { "epoch": 1.701960466623461, "grad_norm": 0.05703813210129738, "learning_rate": 0.00012767001215176202, "loss": 0.274, "step": 21009 }, { "epoch": 1.7020414776409591, "grad_norm": 0.050707802176475525, "learning_rate": 0.00012766551149916738, "loss": 0.2451, "step": 21010 }, { "epoch": 1.7021224886584574, "grad_norm": 0.06028733402490616, "learning_rate": 0.00012766101084657277, "loss": 0.2741, "step": 21011 }, { "epoch": 1.702203499675956, "grad_norm": 0.059985797852277756, "learning_rate": 0.00012765651019397813, "loss": 0.3051, "step": 21012 }, { "epoch": 1.7022845106934543, "grad_norm": 0.05215641111135483, "learning_rate": 0.0001276520095413835, "loss": 0.2691, "step": 21013 }, { "epoch": 1.7023655217109526, "grad_norm": 0.055474214255809784, "learning_rate": 0.0001276475088887889, "loss": 0.2895, "step": 21014 }, { "epoch": 1.702446532728451, "grad_norm": 0.05044548586010933, "learning_rate": 0.00012764300823619426, "loss": 0.3022, "step": 21015 }, { "epoch": 1.7025275437459495, "grad_norm": 0.053043123334646225, "learning_rate": 0.00012763850758359962, "loss": 0.2758, "step": 21016 }, { "epoch": 1.7026085547634477, "grad_norm": 0.0499655157327652, "learning_rate": 0.000127634006931005, "loss": 0.2501, "step": 21017 }, { "epoch": 1.7026895657809462, "grad_norm": 0.05497471243143082, "learning_rate": 0.00012762950627841037, "loss": 0.2631, "step": 21018 }, { "epoch": 1.7027705767984447, "grad_norm": 0.05439502373337746, "learning_rate": 0.00012762500562581573, "loss": 0.285, "step": 21019 }, { "epoch": 1.702851587815943, "grad_norm": 0.05842931941151619, "learning_rate": 0.00012762050497322114, "loss": 0.2786, "step": 21020 }, { "epoch": 1.7029325988334414, "grad_norm": 0.05893060192465782, "learning_rate": 0.0001276160043206265, "loss": 0.3007, "step": 21021 }, { "epoch": 1.7030136098509399, "grad_norm": 0.05791548639535904, "learning_rate": 0.00012761150366803186, "loss": 0.2726, "step": 21022 }, { "epoch": 1.703094620868438, "grad_norm": 0.041637811809778214, "learning_rate": 0.00012760700301543725, "loss": 0.2336, "step": 21023 }, { "epoch": 1.7031756318859363, "grad_norm": 0.05405241623520851, "learning_rate": 0.0001276025023628426, "loss": 0.2907, "step": 21024 }, { "epoch": 1.7032566429034348, "grad_norm": 0.047728147357702255, "learning_rate": 0.00012759800171024797, "loss": 0.2697, "step": 21025 }, { "epoch": 1.7033376539209333, "grad_norm": 0.05584067478775978, "learning_rate": 0.00012759350105765339, "loss": 0.3092, "step": 21026 }, { "epoch": 1.7034186649384315, "grad_norm": 0.05352660268545151, "learning_rate": 0.00012758900040505875, "loss": 0.2769, "step": 21027 }, { "epoch": 1.70349967595593, "grad_norm": 0.052968524396419525, "learning_rate": 0.0001275844997524641, "loss": 0.3083, "step": 21028 }, { "epoch": 1.7035806869734285, "grad_norm": 0.06345254927873611, "learning_rate": 0.0001275799990998695, "loss": 0.2946, "step": 21029 }, { "epoch": 1.7036616979909267, "grad_norm": 0.058345187455415726, "learning_rate": 0.00012757549844727485, "loss": 0.3017, "step": 21030 }, { "epoch": 1.7037427090084252, "grad_norm": 0.06407354027032852, "learning_rate": 0.0001275709977946802, "loss": 0.2447, "step": 21031 }, { "epoch": 1.7038237200259236, "grad_norm": 0.049316179007291794, "learning_rate": 0.00012756649714208563, "loss": 0.2748, "step": 21032 }, { "epoch": 1.7039047310434219, "grad_norm": 0.06267198920249939, "learning_rate": 0.000127561996489491, "loss": 0.2882, "step": 21033 }, { "epoch": 1.7039857420609201, "grad_norm": 0.0582430474460125, "learning_rate": 0.00012755749583689635, "loss": 0.2848, "step": 21034 }, { "epoch": 1.7040667530784188, "grad_norm": 0.0576762817800045, "learning_rate": 0.00012755299518430173, "loss": 0.2623, "step": 21035 }, { "epoch": 1.704147764095917, "grad_norm": 0.05833594501018524, "learning_rate": 0.0001275484945317071, "loss": 0.2756, "step": 21036 }, { "epoch": 1.7042287751134153, "grad_norm": 0.061021964997053146, "learning_rate": 0.00012754399387911245, "loss": 0.2743, "step": 21037 }, { "epoch": 1.7043097861309138, "grad_norm": 0.05440381169319153, "learning_rate": 0.00012753949322651787, "loss": 0.2968, "step": 21038 }, { "epoch": 1.7043907971484122, "grad_norm": 0.048326920717954636, "learning_rate": 0.00012753499257392323, "loss": 0.2562, "step": 21039 }, { "epoch": 1.7044718081659105, "grad_norm": 0.05831613019108772, "learning_rate": 0.0001275304919213286, "loss": 0.2828, "step": 21040 }, { "epoch": 1.704552819183409, "grad_norm": 0.057130247354507446, "learning_rate": 0.00012752599126873398, "loss": 0.2707, "step": 21041 }, { "epoch": 1.7046338302009074, "grad_norm": 0.04975264146924019, "learning_rate": 0.00012752149061613934, "loss": 0.2518, "step": 21042 }, { "epoch": 1.7047148412184057, "grad_norm": 0.05217575281858444, "learning_rate": 0.00012751698996354472, "loss": 0.2548, "step": 21043 }, { "epoch": 1.7047958522359041, "grad_norm": 0.07900968939065933, "learning_rate": 0.0001275124893109501, "loss": 0.2852, "step": 21044 }, { "epoch": 1.7048768632534026, "grad_norm": 0.058848753571510315, "learning_rate": 0.00012750798865835547, "loss": 0.3091, "step": 21045 }, { "epoch": 1.7049578742709008, "grad_norm": 0.052460964769124985, "learning_rate": 0.00012750348800576083, "loss": 0.2722, "step": 21046 }, { "epoch": 1.705038885288399, "grad_norm": 0.052080314606428146, "learning_rate": 0.00012749898735316622, "loss": 0.297, "step": 21047 }, { "epoch": 1.7051198963058976, "grad_norm": 0.05389990657567978, "learning_rate": 0.00012749448670057158, "loss": 0.2982, "step": 21048 }, { "epoch": 1.705200907323396, "grad_norm": 0.0437735840678215, "learning_rate": 0.00012748998604797697, "loss": 0.2651, "step": 21049 }, { "epoch": 1.7052819183408943, "grad_norm": 0.05131865665316582, "learning_rate": 0.00012748548539538235, "loss": 0.2609, "step": 21050 }, { "epoch": 1.7053629293583927, "grad_norm": 0.04363410547375679, "learning_rate": 0.0001274809847427877, "loss": 0.2436, "step": 21051 }, { "epoch": 1.7054439403758912, "grad_norm": 0.05448685958981514, "learning_rate": 0.00012747648409019307, "loss": 0.3368, "step": 21052 }, { "epoch": 1.7055249513933894, "grad_norm": 0.055532172322273254, "learning_rate": 0.00012747198343759846, "loss": 0.2764, "step": 21053 }, { "epoch": 1.705605962410888, "grad_norm": 0.05029525235295296, "learning_rate": 0.00012746748278500382, "loss": 0.268, "step": 21054 }, { "epoch": 1.7056869734283864, "grad_norm": 0.058983251452445984, "learning_rate": 0.0001274629821324092, "loss": 0.2468, "step": 21055 }, { "epoch": 1.7057679844458846, "grad_norm": 0.05035523325204849, "learning_rate": 0.0001274584814798146, "loss": 0.2519, "step": 21056 }, { "epoch": 1.7058489954633829, "grad_norm": 0.05484575033187866, "learning_rate": 0.00012745398082721995, "loss": 0.3013, "step": 21057 }, { "epoch": 1.7059300064808816, "grad_norm": 0.05905700847506523, "learning_rate": 0.00012744948017462531, "loss": 0.274, "step": 21058 }, { "epoch": 1.7060110174983798, "grad_norm": 0.04861075058579445, "learning_rate": 0.0001274449795220307, "loss": 0.2592, "step": 21059 }, { "epoch": 1.706092028515878, "grad_norm": 0.056762732565402985, "learning_rate": 0.00012744047886943606, "loss": 0.2554, "step": 21060 }, { "epoch": 1.7061730395333765, "grad_norm": 0.0626283660531044, "learning_rate": 0.00012743597821684145, "loss": 0.2907, "step": 21061 }, { "epoch": 1.706254050550875, "grad_norm": 0.056917816400527954, "learning_rate": 0.00012743147756424684, "loss": 0.2593, "step": 21062 }, { "epoch": 1.7063350615683732, "grad_norm": 0.055883992463350296, "learning_rate": 0.0001274269769116522, "loss": 0.2603, "step": 21063 }, { "epoch": 1.7064160725858717, "grad_norm": 0.07410957664251328, "learning_rate": 0.00012742247625905756, "loss": 0.3146, "step": 21064 }, { "epoch": 1.7064970836033702, "grad_norm": 0.06119558587670326, "learning_rate": 0.00012741797560646294, "loss": 0.3055, "step": 21065 }, { "epoch": 1.7065780946208684, "grad_norm": 0.056091416627168655, "learning_rate": 0.00012741347495386833, "loss": 0.3078, "step": 21066 }, { "epoch": 1.7066591056383669, "grad_norm": 0.06130021810531616, "learning_rate": 0.0001274089743012737, "loss": 0.3076, "step": 21067 }, { "epoch": 1.7067401166558653, "grad_norm": 0.053993724286556244, "learning_rate": 0.00012740447364867908, "loss": 0.2357, "step": 21068 }, { "epoch": 1.7068211276733636, "grad_norm": 0.06275437027215958, "learning_rate": 0.00012739997299608444, "loss": 0.3032, "step": 21069 }, { "epoch": 1.7069021386908618, "grad_norm": 0.05278387293219566, "learning_rate": 0.0001273954723434898, "loss": 0.2554, "step": 21070 }, { "epoch": 1.7069831497083603, "grad_norm": 0.05508885532617569, "learning_rate": 0.00012739097169089518, "loss": 0.2938, "step": 21071 }, { "epoch": 1.7070641607258588, "grad_norm": 0.05782973766326904, "learning_rate": 0.00012738647103830057, "loss": 0.2583, "step": 21072 }, { "epoch": 1.707145171743357, "grad_norm": 0.0582059770822525, "learning_rate": 0.00012738197038570593, "loss": 0.3355, "step": 21073 }, { "epoch": 1.7072261827608555, "grad_norm": 0.055913038551807404, "learning_rate": 0.00012737746973311132, "loss": 0.2826, "step": 21074 }, { "epoch": 1.707307193778354, "grad_norm": 0.04921237379312515, "learning_rate": 0.00012737296908051668, "loss": 0.2739, "step": 21075 }, { "epoch": 1.7073882047958522, "grad_norm": 0.056289199739694595, "learning_rate": 0.00012736846842792204, "loss": 0.3051, "step": 21076 }, { "epoch": 1.7074692158133506, "grad_norm": 0.05416957661509514, "learning_rate": 0.00012736396777532743, "loss": 0.2725, "step": 21077 }, { "epoch": 1.7075502268308491, "grad_norm": 0.05085984617471695, "learning_rate": 0.0001273594671227328, "loss": 0.2759, "step": 21078 }, { "epoch": 1.7076312378483474, "grad_norm": 0.049407776445150375, "learning_rate": 0.00012735496647013817, "loss": 0.294, "step": 21079 }, { "epoch": 1.7077122488658456, "grad_norm": 0.06257401406764984, "learning_rate": 0.00012735046581754356, "loss": 0.2922, "step": 21080 }, { "epoch": 1.7077932598833443, "grad_norm": 0.046495795249938965, "learning_rate": 0.00012734596516494892, "loss": 0.2558, "step": 21081 }, { "epoch": 1.7078742709008425, "grad_norm": 0.06849236786365509, "learning_rate": 0.00012734146451235428, "loss": 0.2532, "step": 21082 }, { "epoch": 1.7079552819183408, "grad_norm": 0.05374249443411827, "learning_rate": 0.00012733696385975967, "loss": 0.2892, "step": 21083 }, { "epoch": 1.7080362929358393, "grad_norm": 0.05467557534575462, "learning_rate": 0.00012733246320716505, "loss": 0.2718, "step": 21084 }, { "epoch": 1.7081173039533377, "grad_norm": 0.05555357038974762, "learning_rate": 0.00012732796255457041, "loss": 0.289, "step": 21085 }, { "epoch": 1.708198314970836, "grad_norm": 0.05055621638894081, "learning_rate": 0.0001273234619019758, "loss": 0.266, "step": 21086 }, { "epoch": 1.7082793259883344, "grad_norm": 0.05175207182765007, "learning_rate": 0.00012731896124938116, "loss": 0.268, "step": 21087 }, { "epoch": 1.708360337005833, "grad_norm": 0.05274539813399315, "learning_rate": 0.00012731446059678652, "loss": 0.2707, "step": 21088 }, { "epoch": 1.7084413480233311, "grad_norm": 0.058866798877716064, "learning_rate": 0.0001273099599441919, "loss": 0.2413, "step": 21089 }, { "epoch": 1.7085223590408296, "grad_norm": 0.059128813445568085, "learning_rate": 0.0001273054592915973, "loss": 0.2794, "step": 21090 }, { "epoch": 1.708603370058328, "grad_norm": 0.05331343784928322, "learning_rate": 0.00012730095863900266, "loss": 0.2991, "step": 21091 }, { "epoch": 1.7086843810758263, "grad_norm": 0.052910611033439636, "learning_rate": 0.00012729645798640804, "loss": 0.2632, "step": 21092 }, { "epoch": 1.7087653920933246, "grad_norm": 0.04627866670489311, "learning_rate": 0.0001272919573338134, "loss": 0.2509, "step": 21093 }, { "epoch": 1.708846403110823, "grad_norm": 0.053217582404613495, "learning_rate": 0.00012728745668121876, "loss": 0.2715, "step": 21094 }, { "epoch": 1.7089274141283215, "grad_norm": 0.06612826138734818, "learning_rate": 0.00012728295602862418, "loss": 0.3198, "step": 21095 }, { "epoch": 1.7090084251458197, "grad_norm": 0.053610753268003464, "learning_rate": 0.00012727845537602954, "loss": 0.2619, "step": 21096 }, { "epoch": 1.7090894361633182, "grad_norm": 0.043761976063251495, "learning_rate": 0.0001272739547234349, "loss": 0.2487, "step": 21097 }, { "epoch": 1.7091704471808167, "grad_norm": 0.04693054035305977, "learning_rate": 0.00012726945407084029, "loss": 0.2636, "step": 21098 }, { "epoch": 1.709251458198315, "grad_norm": 0.05634808540344238, "learning_rate": 0.00012726495341824565, "loss": 0.2397, "step": 21099 }, { "epoch": 1.7093324692158134, "grad_norm": 0.062135498970746994, "learning_rate": 0.000127260452765651, "loss": 0.3048, "step": 21100 }, { "epoch": 1.7094134802333119, "grad_norm": 0.060942426323890686, "learning_rate": 0.00012725595211305642, "loss": 0.3448, "step": 21101 }, { "epoch": 1.70949449125081, "grad_norm": 0.05401855334639549, "learning_rate": 0.00012725145146046178, "loss": 0.2581, "step": 21102 }, { "epoch": 1.7095755022683083, "grad_norm": 0.050278641283512115, "learning_rate": 0.00012724695080786714, "loss": 0.2573, "step": 21103 }, { "epoch": 1.709656513285807, "grad_norm": 0.05164032429456711, "learning_rate": 0.00012724245015527253, "loss": 0.2528, "step": 21104 }, { "epoch": 1.7097375243033053, "grad_norm": 0.04855099692940712, "learning_rate": 0.0001272379495026779, "loss": 0.2553, "step": 21105 }, { "epoch": 1.7098185353208035, "grad_norm": 0.04928193241357803, "learning_rate": 0.00012723344885008325, "loss": 0.286, "step": 21106 }, { "epoch": 1.709899546338302, "grad_norm": 0.05188463255763054, "learning_rate": 0.00012722894819748866, "loss": 0.2621, "step": 21107 }, { "epoch": 1.7099805573558005, "grad_norm": 0.05235172063112259, "learning_rate": 0.00012722444754489402, "loss": 0.31, "step": 21108 }, { "epoch": 1.7100615683732987, "grad_norm": 0.056308262050151825, "learning_rate": 0.00012721994689229938, "loss": 0.2707, "step": 21109 }, { "epoch": 1.7101425793907972, "grad_norm": 0.044385965913534164, "learning_rate": 0.00012721544623970477, "loss": 0.2338, "step": 21110 }, { "epoch": 1.7102235904082956, "grad_norm": 0.04382346197962761, "learning_rate": 0.00012721094558711013, "loss": 0.2703, "step": 21111 }, { "epoch": 1.7103046014257939, "grad_norm": 0.053613610565662384, "learning_rate": 0.0001272064449345155, "loss": 0.2488, "step": 21112 }, { "epoch": 1.7103856124432921, "grad_norm": 0.0616731233894825, "learning_rate": 0.0001272019442819209, "loss": 0.2805, "step": 21113 }, { "epoch": 1.7104666234607908, "grad_norm": 0.046198032796382904, "learning_rate": 0.00012719744362932626, "loss": 0.2395, "step": 21114 }, { "epoch": 1.710547634478289, "grad_norm": 0.06092916801571846, "learning_rate": 0.00012719294297673162, "loss": 0.3167, "step": 21115 }, { "epoch": 1.7106286454957873, "grad_norm": 0.06428792327642441, "learning_rate": 0.000127188442324137, "loss": 0.3205, "step": 21116 }, { "epoch": 1.7107096565132858, "grad_norm": 0.055565331131219864, "learning_rate": 0.00012718394167154237, "loss": 0.2697, "step": 21117 }, { "epoch": 1.7107906675307842, "grad_norm": 0.05767647922039032, "learning_rate": 0.00012717944101894773, "loss": 0.2547, "step": 21118 }, { "epoch": 1.7108716785482825, "grad_norm": 0.060147978365421295, "learning_rate": 0.00012717494036635314, "loss": 0.2619, "step": 21119 }, { "epoch": 1.710952689565781, "grad_norm": 0.05821974575519562, "learning_rate": 0.0001271704397137585, "loss": 0.2597, "step": 21120 }, { "epoch": 1.7110337005832794, "grad_norm": 0.05406796559691429, "learning_rate": 0.00012716593906116386, "loss": 0.2909, "step": 21121 }, { "epoch": 1.7111147116007777, "grad_norm": 0.05455823615193367, "learning_rate": 0.00012716143840856925, "loss": 0.2887, "step": 21122 }, { "epoch": 1.7111957226182761, "grad_norm": 0.055845241993665695, "learning_rate": 0.0001271569377559746, "loss": 0.2761, "step": 21123 }, { "epoch": 1.7112767336357746, "grad_norm": 0.04812893643975258, "learning_rate": 0.00012715243710338, "loss": 0.2447, "step": 21124 }, { "epoch": 1.7113577446532728, "grad_norm": 0.04706289619207382, "learning_rate": 0.00012714793645078539, "loss": 0.2817, "step": 21125 }, { "epoch": 1.711438755670771, "grad_norm": 0.044318217784166336, "learning_rate": 0.00012714343579819075, "loss": 0.2766, "step": 21126 }, { "epoch": 1.7115197666882696, "grad_norm": 0.05528712272644043, "learning_rate": 0.0001271389351455961, "loss": 0.2571, "step": 21127 }, { "epoch": 1.711600777705768, "grad_norm": 0.051084212958812714, "learning_rate": 0.0001271344344930015, "loss": 0.2526, "step": 21128 }, { "epoch": 1.7116817887232663, "grad_norm": 0.05133767053484917, "learning_rate": 0.00012712993384040685, "loss": 0.2854, "step": 21129 }, { "epoch": 1.7117627997407647, "grad_norm": 0.04583508148789406, "learning_rate": 0.00012712543318781224, "loss": 0.23, "step": 21130 }, { "epoch": 1.7118438107582632, "grad_norm": 0.06236959248781204, "learning_rate": 0.00012712093253521763, "loss": 0.2824, "step": 21131 }, { "epoch": 1.7119248217757614, "grad_norm": 0.05616011098027229, "learning_rate": 0.000127116431882623, "loss": 0.2727, "step": 21132 }, { "epoch": 1.71200583279326, "grad_norm": 0.051108259707689285, "learning_rate": 0.00012711193123002835, "loss": 0.2703, "step": 21133 }, { "epoch": 1.7120868438107584, "grad_norm": 0.07083071768283844, "learning_rate": 0.00012710743057743373, "loss": 0.2931, "step": 21134 }, { "epoch": 1.7121678548282566, "grad_norm": 0.040604688227176666, "learning_rate": 0.0001271029299248391, "loss": 0.2411, "step": 21135 }, { "epoch": 1.7122488658457549, "grad_norm": 0.05927375331521034, "learning_rate": 0.00012709842927224448, "loss": 0.2877, "step": 21136 }, { "epoch": 1.7123298768632536, "grad_norm": 0.05343782901763916, "learning_rate": 0.00012709392861964987, "loss": 0.2738, "step": 21137 }, { "epoch": 1.7124108878807518, "grad_norm": 0.06753844767808914, "learning_rate": 0.00012708942796705523, "loss": 0.3114, "step": 21138 }, { "epoch": 1.71249189889825, "grad_norm": 0.06401346623897552, "learning_rate": 0.0001270849273144606, "loss": 0.2567, "step": 21139 }, { "epoch": 1.7125729099157485, "grad_norm": 0.054790738970041275, "learning_rate": 0.00012708042666186598, "loss": 0.2751, "step": 21140 }, { "epoch": 1.712653920933247, "grad_norm": 0.06184843182563782, "learning_rate": 0.00012707592600927134, "loss": 0.2772, "step": 21141 }, { "epoch": 1.7127349319507452, "grad_norm": 0.046240661293268204, "learning_rate": 0.00012707142535667672, "loss": 0.281, "step": 21142 }, { "epoch": 1.7128159429682437, "grad_norm": 0.057482510805130005, "learning_rate": 0.0001270669247040821, "loss": 0.2881, "step": 21143 }, { "epoch": 1.7128969539857422, "grad_norm": 0.0650155320763588, "learning_rate": 0.00012706242405148747, "loss": 0.3222, "step": 21144 }, { "epoch": 1.7129779650032404, "grad_norm": 0.05420124903321266, "learning_rate": 0.00012705792339889283, "loss": 0.2459, "step": 21145 }, { "epoch": 1.7130589760207389, "grad_norm": 0.05785535275936127, "learning_rate": 0.00012705342274629822, "loss": 0.3015, "step": 21146 }, { "epoch": 1.7131399870382373, "grad_norm": 0.0573989674448967, "learning_rate": 0.0001270489220937036, "loss": 0.3189, "step": 21147 }, { "epoch": 1.7132209980557356, "grad_norm": 0.05121051147580147, "learning_rate": 0.00012704442144110897, "loss": 0.2484, "step": 21148 }, { "epoch": 1.7133020090732338, "grad_norm": 0.0666457861661911, "learning_rate": 0.00012703992078851435, "loss": 0.3251, "step": 21149 }, { "epoch": 1.7133830200907323, "grad_norm": 0.042105793952941895, "learning_rate": 0.0001270354201359197, "loss": 0.2847, "step": 21150 }, { "epoch": 1.7134640311082308, "grad_norm": 0.05290107801556587, "learning_rate": 0.00012703091948332507, "loss": 0.2648, "step": 21151 }, { "epoch": 1.713545042125729, "grad_norm": 0.04666683450341225, "learning_rate": 0.00012702641883073046, "loss": 0.305, "step": 21152 }, { "epoch": 1.7136260531432275, "grad_norm": 0.07131568342447281, "learning_rate": 0.00012702191817813585, "loss": 0.2696, "step": 21153 }, { "epoch": 1.713707064160726, "grad_norm": 0.04963874816894531, "learning_rate": 0.0001270174175255412, "loss": 0.2648, "step": 21154 }, { "epoch": 1.7137880751782242, "grad_norm": 0.0497848279774189, "learning_rate": 0.0001270129168729466, "loss": 0.2732, "step": 21155 }, { "epoch": 1.7138690861957226, "grad_norm": 0.06622777879238129, "learning_rate": 0.00012700841622035195, "loss": 0.2878, "step": 21156 }, { "epoch": 1.7139500972132211, "grad_norm": 0.050237640738487244, "learning_rate": 0.00012700391556775731, "loss": 0.2419, "step": 21157 }, { "epoch": 1.7140311082307194, "grad_norm": 0.06899549067020416, "learning_rate": 0.0001269994149151627, "loss": 0.314, "step": 21158 }, { "epoch": 1.7141121192482176, "grad_norm": 0.05233056843280792, "learning_rate": 0.0001269949142625681, "loss": 0.2767, "step": 21159 }, { "epoch": 1.7141931302657163, "grad_norm": 0.06126559525728226, "learning_rate": 0.00012699041360997345, "loss": 0.26, "step": 21160 }, { "epoch": 1.7142741412832145, "grad_norm": 0.060388606041669846, "learning_rate": 0.00012698591295737884, "loss": 0.2673, "step": 21161 }, { "epoch": 1.7143551523007128, "grad_norm": 0.06104142591357231, "learning_rate": 0.0001269814123047842, "loss": 0.2809, "step": 21162 }, { "epoch": 1.7144361633182112, "grad_norm": 0.06470298022031784, "learning_rate": 0.00012697691165218956, "loss": 0.2765, "step": 21163 }, { "epoch": 1.7145171743357097, "grad_norm": 0.048042502254247665, "learning_rate": 0.00012697241099959494, "loss": 0.2597, "step": 21164 }, { "epoch": 1.714598185353208, "grad_norm": 0.05685548484325409, "learning_rate": 0.00012696791034700033, "loss": 0.3263, "step": 21165 }, { "epoch": 1.7146791963707064, "grad_norm": 0.059012167155742645, "learning_rate": 0.0001269634096944057, "loss": 0.2723, "step": 21166 }, { "epoch": 1.714760207388205, "grad_norm": 0.058648496866226196, "learning_rate": 0.00012695890904181108, "loss": 0.3046, "step": 21167 }, { "epoch": 1.7148412184057031, "grad_norm": 0.04576666280627251, "learning_rate": 0.00012695440838921644, "loss": 0.2526, "step": 21168 }, { "epoch": 1.7149222294232016, "grad_norm": 0.054590385407209396, "learning_rate": 0.0001269499077366218, "loss": 0.2629, "step": 21169 }, { "epoch": 1.7150032404407, "grad_norm": 0.06748390197753906, "learning_rate": 0.00012694540708402718, "loss": 0.295, "step": 21170 }, { "epoch": 1.7150842514581983, "grad_norm": 0.05063813179731369, "learning_rate": 0.00012694090643143257, "loss": 0.2501, "step": 21171 }, { "epoch": 1.7151652624756966, "grad_norm": 0.05567986145615578, "learning_rate": 0.00012693640577883793, "loss": 0.2925, "step": 21172 }, { "epoch": 1.715246273493195, "grad_norm": 0.050066396594047546, "learning_rate": 0.00012693190512624332, "loss": 0.2446, "step": 21173 }, { "epoch": 1.7153272845106935, "grad_norm": 0.055447474122047424, "learning_rate": 0.00012692740447364868, "loss": 0.2916, "step": 21174 }, { "epoch": 1.7154082955281917, "grad_norm": 0.06282706558704376, "learning_rate": 0.00012692290382105404, "loss": 0.3065, "step": 21175 }, { "epoch": 1.7154893065456902, "grad_norm": 0.04857902228832245, "learning_rate": 0.00012691840316845945, "loss": 0.2397, "step": 21176 }, { "epoch": 1.7155703175631887, "grad_norm": 0.04898069053888321, "learning_rate": 0.0001269139025158648, "loss": 0.2604, "step": 21177 }, { "epoch": 1.715651328580687, "grad_norm": 0.04747678339481354, "learning_rate": 0.00012690940186327017, "loss": 0.2599, "step": 21178 }, { "epoch": 1.7157323395981854, "grad_norm": 0.05100768059492111, "learning_rate": 0.00012690490121067556, "loss": 0.2213, "step": 21179 }, { "epoch": 1.7158133506156839, "grad_norm": 0.06935994327068329, "learning_rate": 0.00012690040055808092, "loss": 0.2827, "step": 21180 }, { "epoch": 1.715894361633182, "grad_norm": 0.05088813230395317, "learning_rate": 0.00012689589990548628, "loss": 0.3469, "step": 21181 }, { "epoch": 1.7159753726506803, "grad_norm": 0.04963594675064087, "learning_rate": 0.0001268913992528917, "loss": 0.2108, "step": 21182 }, { "epoch": 1.716056383668179, "grad_norm": 0.06332466751337051, "learning_rate": 0.00012688689860029706, "loss": 0.2625, "step": 21183 }, { "epoch": 1.7161373946856773, "grad_norm": 0.05151652917265892, "learning_rate": 0.00012688239794770242, "loss": 0.2509, "step": 21184 }, { "epoch": 1.7162184057031755, "grad_norm": 0.05739894509315491, "learning_rate": 0.0001268778972951078, "loss": 0.3009, "step": 21185 }, { "epoch": 1.716299416720674, "grad_norm": 0.06268756836652756, "learning_rate": 0.00012687339664251316, "loss": 0.2762, "step": 21186 }, { "epoch": 1.7163804277381725, "grad_norm": 0.04882485046982765, "learning_rate": 0.00012686889598991852, "loss": 0.2938, "step": 21187 }, { "epoch": 1.7164614387556707, "grad_norm": 0.04620731621980667, "learning_rate": 0.00012686439533732394, "loss": 0.2605, "step": 21188 }, { "epoch": 1.7165424497731692, "grad_norm": 0.04816884547472, "learning_rate": 0.0001268598946847293, "loss": 0.2662, "step": 21189 }, { "epoch": 1.7166234607906676, "grad_norm": 0.04603031650185585, "learning_rate": 0.00012685539403213466, "loss": 0.2633, "step": 21190 }, { "epoch": 1.7167044718081659, "grad_norm": 0.05901835113763809, "learning_rate": 0.00012685089337954004, "loss": 0.2855, "step": 21191 }, { "epoch": 1.7167854828256643, "grad_norm": 0.05132526904344559, "learning_rate": 0.0001268463927269454, "loss": 0.2198, "step": 21192 }, { "epoch": 1.7168664938431628, "grad_norm": 0.06519816070795059, "learning_rate": 0.00012684189207435076, "loss": 0.2984, "step": 21193 }, { "epoch": 1.716947504860661, "grad_norm": 0.057430390268564224, "learning_rate": 0.00012683739142175618, "loss": 0.2549, "step": 21194 }, { "epoch": 1.7170285158781593, "grad_norm": 0.04745124652981758, "learning_rate": 0.00012683289076916154, "loss": 0.2417, "step": 21195 }, { "epoch": 1.7171095268956578, "grad_norm": 0.05713975429534912, "learning_rate": 0.0001268283901165669, "loss": 0.2787, "step": 21196 }, { "epoch": 1.7171905379131562, "grad_norm": 0.054375555366277695, "learning_rate": 0.00012682388946397229, "loss": 0.2505, "step": 21197 }, { "epoch": 1.7172715489306545, "grad_norm": 0.05335111916065216, "learning_rate": 0.00012681938881137765, "loss": 0.2593, "step": 21198 }, { "epoch": 1.717352559948153, "grad_norm": 0.0593978688120842, "learning_rate": 0.00012681488815878303, "loss": 0.2923, "step": 21199 }, { "epoch": 1.7174335709656514, "grad_norm": 0.061143700033426285, "learning_rate": 0.00012681038750618842, "loss": 0.2594, "step": 21200 }, { "epoch": 1.7175145819831497, "grad_norm": 0.06332940608263016, "learning_rate": 0.00012680588685359378, "loss": 0.3053, "step": 21201 }, { "epoch": 1.7175955930006481, "grad_norm": 0.04996534436941147, "learning_rate": 0.00012680138620099914, "loss": 0.2429, "step": 21202 }, { "epoch": 1.7176766040181466, "grad_norm": 0.05738399922847748, "learning_rate": 0.00012679688554840453, "loss": 0.3105, "step": 21203 }, { "epoch": 1.7177576150356448, "grad_norm": 0.0711902305483818, "learning_rate": 0.0001267923848958099, "loss": 0.3255, "step": 21204 }, { "epoch": 1.717838626053143, "grad_norm": 0.05467765033245087, "learning_rate": 0.00012678788424321527, "loss": 0.2763, "step": 21205 }, { "epoch": 1.7179196370706418, "grad_norm": 0.0483541339635849, "learning_rate": 0.00012678338359062066, "loss": 0.2314, "step": 21206 }, { "epoch": 1.71800064808814, "grad_norm": 0.05823575705289841, "learning_rate": 0.00012677888293802602, "loss": 0.2994, "step": 21207 }, { "epoch": 1.7180816591056383, "grad_norm": 0.049455784261226654, "learning_rate": 0.00012677438228543138, "loss": 0.305, "step": 21208 }, { "epoch": 1.7181626701231367, "grad_norm": 0.05627220496535301, "learning_rate": 0.00012676988163283677, "loss": 0.2779, "step": 21209 }, { "epoch": 1.7182436811406352, "grad_norm": 0.05031132698059082, "learning_rate": 0.00012676538098024213, "loss": 0.2967, "step": 21210 }, { "epoch": 1.7183246921581334, "grad_norm": 0.04256313294172287, "learning_rate": 0.00012676088032764752, "loss": 0.2588, "step": 21211 }, { "epoch": 1.718405703175632, "grad_norm": 0.057716649025678635, "learning_rate": 0.0001267563796750529, "loss": 0.293, "step": 21212 }, { "epoch": 1.7184867141931304, "grad_norm": 0.048773281276226044, "learning_rate": 0.00012675187902245826, "loss": 0.271, "step": 21213 }, { "epoch": 1.7185677252106286, "grad_norm": 0.052334122359752655, "learning_rate": 0.00012674737836986362, "loss": 0.2585, "step": 21214 }, { "epoch": 1.7186487362281269, "grad_norm": 0.058943185955286026, "learning_rate": 0.000126742877717269, "loss": 0.2928, "step": 21215 }, { "epoch": 1.7187297472456255, "grad_norm": 0.05530468001961708, "learning_rate": 0.00012673837706467437, "loss": 0.267, "step": 21216 }, { "epoch": 1.7188107582631238, "grad_norm": 0.05892956256866455, "learning_rate": 0.00012673387641207976, "loss": 0.2927, "step": 21217 }, { "epoch": 1.718891769280622, "grad_norm": 0.06373018026351929, "learning_rate": 0.00012672937575948514, "loss": 0.3176, "step": 21218 }, { "epoch": 1.7189727802981205, "grad_norm": 0.05370737612247467, "learning_rate": 0.0001267248751068905, "loss": 0.2462, "step": 21219 }, { "epoch": 1.719053791315619, "grad_norm": 0.053730808198451996, "learning_rate": 0.00012672037445429586, "loss": 0.2505, "step": 21220 }, { "epoch": 1.7191348023331172, "grad_norm": 0.05969685688614845, "learning_rate": 0.00012671587380170125, "loss": 0.2559, "step": 21221 }, { "epoch": 1.7192158133506157, "grad_norm": 0.051641613245010376, "learning_rate": 0.0001267113731491066, "loss": 0.2581, "step": 21222 }, { "epoch": 1.7192968243681142, "grad_norm": 0.06095960736274719, "learning_rate": 0.000126706872496512, "loss": 0.2565, "step": 21223 }, { "epoch": 1.7193778353856124, "grad_norm": 0.05608683079481125, "learning_rate": 0.00012670237184391739, "loss": 0.2934, "step": 21224 }, { "epoch": 1.7194588464031109, "grad_norm": 0.0689942017197609, "learning_rate": 0.00012669787119132275, "loss": 0.2505, "step": 21225 }, { "epoch": 1.7195398574206093, "grad_norm": 0.056214869022369385, "learning_rate": 0.0001266933705387281, "loss": 0.2552, "step": 21226 }, { "epoch": 1.7196208684381076, "grad_norm": 0.04623086377978325, "learning_rate": 0.0001266888698861335, "loss": 0.2916, "step": 21227 }, { "epoch": 1.7197018794556058, "grad_norm": 0.06557148694992065, "learning_rate": 0.00012668436923353888, "loss": 0.2901, "step": 21228 }, { "epoch": 1.7197828904731045, "grad_norm": 0.05431944876909256, "learning_rate": 0.00012667986858094424, "loss": 0.2534, "step": 21229 }, { "epoch": 1.7198639014906028, "grad_norm": 0.060326166450977325, "learning_rate": 0.00012667536792834963, "loss": 0.3101, "step": 21230 }, { "epoch": 1.719944912508101, "grad_norm": 0.055251408368349075, "learning_rate": 0.000126670867275755, "loss": 0.3074, "step": 21231 }, { "epoch": 1.7200259235255995, "grad_norm": 0.05667218938469887, "learning_rate": 0.00012666636662316035, "loss": 0.3127, "step": 21232 }, { "epoch": 1.720106934543098, "grad_norm": 0.06880618631839752, "learning_rate": 0.00012666186597056574, "loss": 0.2992, "step": 21233 }, { "epoch": 1.7201879455605962, "grad_norm": 0.04143282026052475, "learning_rate": 0.00012665736531797112, "loss": 0.2202, "step": 21234 }, { "epoch": 1.7202689565780946, "grad_norm": 0.064445361495018, "learning_rate": 0.00012665286466537648, "loss": 0.2515, "step": 21235 }, { "epoch": 1.720349967595593, "grad_norm": 0.051159944385290146, "learning_rate": 0.00012664836401278187, "loss": 0.2752, "step": 21236 }, { "epoch": 1.7204309786130914, "grad_norm": 0.053983334451913834, "learning_rate": 0.00012664386336018723, "loss": 0.2789, "step": 21237 }, { "epoch": 1.7205119896305896, "grad_norm": 0.05558694526553154, "learning_rate": 0.0001266393627075926, "loss": 0.2809, "step": 21238 }, { "epoch": 1.7205930006480883, "grad_norm": 0.05079452320933342, "learning_rate": 0.00012663486205499798, "loss": 0.2771, "step": 21239 }, { "epoch": 1.7206740116655865, "grad_norm": 0.054594215005636215, "learning_rate": 0.00012663036140240336, "loss": 0.2361, "step": 21240 }, { "epoch": 1.7207550226830848, "grad_norm": 0.052411917597055435, "learning_rate": 0.00012662586074980872, "loss": 0.2781, "step": 21241 }, { "epoch": 1.7208360337005832, "grad_norm": 0.06520796567201614, "learning_rate": 0.0001266213600972141, "loss": 0.2616, "step": 21242 }, { "epoch": 1.7209170447180817, "grad_norm": 0.05025561898946762, "learning_rate": 0.00012661685944461947, "loss": 0.2573, "step": 21243 }, { "epoch": 1.72099805573558, "grad_norm": 0.060252200812101364, "learning_rate": 0.00012661235879202483, "loss": 0.2936, "step": 21244 }, { "epoch": 1.7210790667530784, "grad_norm": 0.0471336767077446, "learning_rate": 0.00012660785813943022, "loss": 0.2514, "step": 21245 }, { "epoch": 1.721160077770577, "grad_norm": 0.05307598039507866, "learning_rate": 0.0001266033574868356, "loss": 0.2775, "step": 21246 }, { "epoch": 1.7212410887880751, "grad_norm": 0.05043487623333931, "learning_rate": 0.00012659885683424097, "loss": 0.2566, "step": 21247 }, { "epoch": 1.7213220998055736, "grad_norm": 0.055017393082380295, "learning_rate": 0.00012659435618164635, "loss": 0.2598, "step": 21248 }, { "epoch": 1.721403110823072, "grad_norm": 0.058290716260671616, "learning_rate": 0.0001265898555290517, "loss": 0.3069, "step": 21249 }, { "epoch": 1.7214841218405703, "grad_norm": 0.06543973088264465, "learning_rate": 0.00012658535487645707, "loss": 0.2883, "step": 21250 }, { "epoch": 1.7215651328580686, "grad_norm": 0.07460454106330872, "learning_rate": 0.00012658085422386246, "loss": 0.3185, "step": 21251 }, { "epoch": 1.721646143875567, "grad_norm": 0.051480378955602646, "learning_rate": 0.00012657635357126785, "loss": 0.2741, "step": 21252 }, { "epoch": 1.7217271548930655, "grad_norm": 0.04900343716144562, "learning_rate": 0.0001265718529186732, "loss": 0.3012, "step": 21253 }, { "epoch": 1.7218081659105637, "grad_norm": 0.06127531826496124, "learning_rate": 0.0001265673522660786, "loss": 0.3124, "step": 21254 }, { "epoch": 1.7218891769280622, "grad_norm": 0.056050658226013184, "learning_rate": 0.00012656285161348395, "loss": 0.2646, "step": 21255 }, { "epoch": 1.7219701879455607, "grad_norm": 0.05161750316619873, "learning_rate": 0.00012655835096088931, "loss": 0.2661, "step": 21256 }, { "epoch": 1.722051198963059, "grad_norm": 0.05862666293978691, "learning_rate": 0.00012655385030829473, "loss": 0.3148, "step": 21257 }, { "epoch": 1.7221322099805574, "grad_norm": 0.053529586642980576, "learning_rate": 0.0001265493496557001, "loss": 0.2803, "step": 21258 }, { "epoch": 1.7222132209980558, "grad_norm": 0.05283923074603081, "learning_rate": 0.00012654484900310545, "loss": 0.2592, "step": 21259 }, { "epoch": 1.722294232015554, "grad_norm": 0.04250847548246384, "learning_rate": 0.00012654034835051084, "loss": 0.2548, "step": 21260 }, { "epoch": 1.7223752430330523, "grad_norm": 0.05477706715464592, "learning_rate": 0.0001265358476979162, "loss": 0.2956, "step": 21261 }, { "epoch": 1.722456254050551, "grad_norm": 0.04699578508734703, "learning_rate": 0.00012653134704532156, "loss": 0.2689, "step": 21262 }, { "epoch": 1.7225372650680493, "grad_norm": 0.048608992248773575, "learning_rate": 0.00012652684639272697, "loss": 0.2915, "step": 21263 }, { "epoch": 1.7226182760855475, "grad_norm": 0.050241369754076004, "learning_rate": 0.00012652234574013233, "loss": 0.2958, "step": 21264 }, { "epoch": 1.722699287103046, "grad_norm": 0.05378851667046547, "learning_rate": 0.0001265178450875377, "loss": 0.2629, "step": 21265 }, { "epoch": 1.7227802981205445, "grad_norm": 0.05486956611275673, "learning_rate": 0.00012651334443494308, "loss": 0.253, "step": 21266 }, { "epoch": 1.7228613091380427, "grad_norm": 0.04376472532749176, "learning_rate": 0.00012650884378234844, "loss": 0.2434, "step": 21267 }, { "epoch": 1.7229423201555412, "grad_norm": 0.05031654238700867, "learning_rate": 0.0001265043431297538, "loss": 0.2712, "step": 21268 }, { "epoch": 1.7230233311730396, "grad_norm": 0.05202542617917061, "learning_rate": 0.0001264998424771592, "loss": 0.3016, "step": 21269 }, { "epoch": 1.7231043421905379, "grad_norm": 0.06132112443447113, "learning_rate": 0.00012649534182456457, "loss": 0.3207, "step": 21270 }, { "epoch": 1.7231853532080363, "grad_norm": 0.0606006421148777, "learning_rate": 0.00012649084117196993, "loss": 0.2808, "step": 21271 }, { "epoch": 1.7232663642255348, "grad_norm": 0.05827093496918678, "learning_rate": 0.00012648634051937532, "loss": 0.2759, "step": 21272 }, { "epoch": 1.723347375243033, "grad_norm": 0.060442544519901276, "learning_rate": 0.00012648183986678068, "loss": 0.2757, "step": 21273 }, { "epoch": 1.7234283862605313, "grad_norm": 0.055618658661842346, "learning_rate": 0.00012647733921418604, "loss": 0.2845, "step": 21274 }, { "epoch": 1.7235093972780298, "grad_norm": 0.05738990753889084, "learning_rate": 0.00012647283856159145, "loss": 0.2523, "step": 21275 }, { "epoch": 1.7235904082955282, "grad_norm": 0.061367228627204895, "learning_rate": 0.00012646833790899681, "loss": 0.2478, "step": 21276 }, { "epoch": 1.7236714193130265, "grad_norm": 0.0630490705370903, "learning_rate": 0.00012646383725640217, "loss": 0.2989, "step": 21277 }, { "epoch": 1.723752430330525, "grad_norm": 0.05338551476597786, "learning_rate": 0.00012645933660380756, "loss": 0.2698, "step": 21278 }, { "epoch": 1.7238334413480234, "grad_norm": 0.04899115115404129, "learning_rate": 0.00012645483595121292, "loss": 0.2392, "step": 21279 }, { "epoch": 1.7239144523655217, "grad_norm": 0.05391428619623184, "learning_rate": 0.0001264503352986183, "loss": 0.2887, "step": 21280 }, { "epoch": 1.7239954633830201, "grad_norm": 0.0557735338807106, "learning_rate": 0.0001264458346460237, "loss": 0.2377, "step": 21281 }, { "epoch": 1.7240764744005186, "grad_norm": 0.05717554688453674, "learning_rate": 0.00012644133399342906, "loss": 0.3062, "step": 21282 }, { "epoch": 1.7241574854180168, "grad_norm": 0.04615462198853493, "learning_rate": 0.00012643683334083442, "loss": 0.2586, "step": 21283 }, { "epoch": 1.724238496435515, "grad_norm": 0.04595242440700531, "learning_rate": 0.0001264323326882398, "loss": 0.2649, "step": 21284 }, { "epoch": 1.7243195074530138, "grad_norm": 0.06683401763439178, "learning_rate": 0.00012642783203564516, "loss": 0.346, "step": 21285 }, { "epoch": 1.724400518470512, "grad_norm": 0.061918869614601135, "learning_rate": 0.00012642333138305055, "loss": 0.26, "step": 21286 }, { "epoch": 1.7244815294880103, "grad_norm": 0.04123949259519577, "learning_rate": 0.00012641883073045594, "loss": 0.254, "step": 21287 }, { "epoch": 1.7245625405055087, "grad_norm": 0.058282338082790375, "learning_rate": 0.0001264143300778613, "loss": 0.2479, "step": 21288 }, { "epoch": 1.7246435515230072, "grad_norm": 0.06941943615674973, "learning_rate": 0.00012640982942526666, "loss": 0.2915, "step": 21289 }, { "epoch": 1.7247245625405054, "grad_norm": 0.06298353523015976, "learning_rate": 0.00012640532877267204, "loss": 0.2921, "step": 21290 }, { "epoch": 1.724805573558004, "grad_norm": 0.066420778632164, "learning_rate": 0.0001264008281200774, "loss": 0.3566, "step": 21291 }, { "epoch": 1.7248865845755024, "grad_norm": 0.051810406148433685, "learning_rate": 0.0001263963274674828, "loss": 0.2673, "step": 21292 }, { "epoch": 1.7249675955930006, "grad_norm": 0.06557074189186096, "learning_rate": 0.00012639182681488818, "loss": 0.3117, "step": 21293 }, { "epoch": 1.725048606610499, "grad_norm": 0.05122154951095581, "learning_rate": 0.00012638732616229354, "loss": 0.2707, "step": 21294 }, { "epoch": 1.7251296176279975, "grad_norm": 0.0532211996614933, "learning_rate": 0.0001263828255096989, "loss": 0.2401, "step": 21295 }, { "epoch": 1.7252106286454958, "grad_norm": 0.053490567952394485, "learning_rate": 0.00012637832485710429, "loss": 0.3249, "step": 21296 }, { "epoch": 1.725291639662994, "grad_norm": 0.05019410327076912, "learning_rate": 0.00012637382420450965, "loss": 0.2748, "step": 21297 }, { "epoch": 1.7253726506804925, "grad_norm": 0.06262815743684769, "learning_rate": 0.00012636932355191503, "loss": 0.2599, "step": 21298 }, { "epoch": 1.725453661697991, "grad_norm": 0.04839923605322838, "learning_rate": 0.00012636482289932042, "loss": 0.2763, "step": 21299 }, { "epoch": 1.7255346727154892, "grad_norm": 0.05693989619612694, "learning_rate": 0.00012636032224672578, "loss": 0.2895, "step": 21300 }, { "epoch": 1.7256156837329877, "grad_norm": 0.057713817805051804, "learning_rate": 0.00012635582159413114, "loss": 0.2793, "step": 21301 }, { "epoch": 1.7256966947504861, "grad_norm": 0.05226074531674385, "learning_rate": 0.00012635132094153653, "loss": 0.257, "step": 21302 }, { "epoch": 1.7257777057679844, "grad_norm": 0.0561804436147213, "learning_rate": 0.0001263468202889419, "loss": 0.2764, "step": 21303 }, { "epoch": 1.7258587167854829, "grad_norm": 0.05232897773385048, "learning_rate": 0.00012634231963634727, "loss": 0.2765, "step": 21304 }, { "epoch": 1.7259397278029813, "grad_norm": 0.05661951005458832, "learning_rate": 0.00012633781898375266, "loss": 0.3073, "step": 21305 }, { "epoch": 1.7260207388204796, "grad_norm": 0.05507947504520416, "learning_rate": 0.00012633331833115802, "loss": 0.3042, "step": 21306 }, { "epoch": 1.7261017498379778, "grad_norm": 0.049649450927972794, "learning_rate": 0.00012632881767856338, "loss": 0.2322, "step": 21307 }, { "epoch": 1.7261827608554765, "grad_norm": 0.0585404708981514, "learning_rate": 0.00012632431702596877, "loss": 0.2929, "step": 21308 }, { "epoch": 1.7262637718729748, "grad_norm": 0.05449943616986275, "learning_rate": 0.00012631981637337416, "loss": 0.2779, "step": 21309 }, { "epoch": 1.726344782890473, "grad_norm": 0.05993398651480675, "learning_rate": 0.00012631531572077952, "loss": 0.2702, "step": 21310 }, { "epoch": 1.7264257939079715, "grad_norm": 0.06984718143939972, "learning_rate": 0.0001263108150681849, "loss": 0.2983, "step": 21311 }, { "epoch": 1.72650680492547, "grad_norm": 0.0720304548740387, "learning_rate": 0.00012630631441559026, "loss": 0.3044, "step": 21312 }, { "epoch": 1.7265878159429682, "grad_norm": 0.06032625213265419, "learning_rate": 0.00012630181376299562, "loss": 0.2737, "step": 21313 }, { "epoch": 1.7266688269604666, "grad_norm": 0.056495506316423416, "learning_rate": 0.000126297313110401, "loss": 0.2768, "step": 21314 }, { "epoch": 1.726749837977965, "grad_norm": 0.058696284890174866, "learning_rate": 0.0001262928124578064, "loss": 0.3179, "step": 21315 }, { "epoch": 1.7268308489954634, "grad_norm": 0.050367239862680435, "learning_rate": 0.00012628831180521176, "loss": 0.2669, "step": 21316 }, { "epoch": 1.7269118600129616, "grad_norm": 0.05638197809457779, "learning_rate": 0.00012628381115261714, "loss": 0.2807, "step": 21317 }, { "epoch": 1.7269928710304603, "grad_norm": 0.048176951706409454, "learning_rate": 0.0001262793105000225, "loss": 0.2527, "step": 21318 }, { "epoch": 1.7270738820479585, "grad_norm": 0.05604461580514908, "learning_rate": 0.00012627480984742787, "loss": 0.2796, "step": 21319 }, { "epoch": 1.7271548930654568, "grad_norm": 0.06629718095064163, "learning_rate": 0.00012627030919483325, "loss": 0.2862, "step": 21320 }, { "epoch": 1.7272359040829552, "grad_norm": 0.0682777464389801, "learning_rate": 0.00012626580854223864, "loss": 0.2772, "step": 21321 }, { "epoch": 1.7273169151004537, "grad_norm": 0.060906145721673965, "learning_rate": 0.000126261307889644, "loss": 0.2877, "step": 21322 }, { "epoch": 1.727397926117952, "grad_norm": 0.0503285676240921, "learning_rate": 0.0001262568072370494, "loss": 0.2496, "step": 21323 }, { "epoch": 1.7274789371354504, "grad_norm": 0.05899946019053459, "learning_rate": 0.00012625230658445475, "loss": 0.2627, "step": 21324 }, { "epoch": 1.7275599481529489, "grad_norm": 0.06631388515233994, "learning_rate": 0.0001262478059318601, "loss": 0.2691, "step": 21325 }, { "epoch": 1.7276409591704471, "grad_norm": 0.06886113435029984, "learning_rate": 0.0001262433052792655, "loss": 0.2729, "step": 21326 }, { "epoch": 1.7277219701879456, "grad_norm": 0.05362020060420036, "learning_rate": 0.00012623880462667088, "loss": 0.2615, "step": 21327 }, { "epoch": 1.727802981205444, "grad_norm": 0.04796961322426796, "learning_rate": 0.00012623430397407624, "loss": 0.2526, "step": 21328 }, { "epoch": 1.7278839922229423, "grad_norm": 0.04980582743883133, "learning_rate": 0.00012622980332148163, "loss": 0.283, "step": 21329 }, { "epoch": 1.7279650032404406, "grad_norm": 0.06693169474601746, "learning_rate": 0.000126225302668887, "loss": 0.2906, "step": 21330 }, { "epoch": 1.7280460142579392, "grad_norm": 0.06786801666021347, "learning_rate": 0.00012622080201629235, "loss": 0.2715, "step": 21331 }, { "epoch": 1.7281270252754375, "grad_norm": 0.051919642835855484, "learning_rate": 0.00012621630136369776, "loss": 0.2674, "step": 21332 }, { "epoch": 1.7282080362929357, "grad_norm": 0.06100764498114586, "learning_rate": 0.00012621180071110312, "loss": 0.251, "step": 21333 }, { "epoch": 1.7282890473104342, "grad_norm": 0.060728829354047775, "learning_rate": 0.00012620730005850848, "loss": 0.2945, "step": 21334 }, { "epoch": 1.7283700583279327, "grad_norm": 0.078863225877285, "learning_rate": 0.00012620279940591387, "loss": 0.3004, "step": 21335 }, { "epoch": 1.728451069345431, "grad_norm": 0.06468357890844345, "learning_rate": 0.00012619829875331923, "loss": 0.2717, "step": 21336 }, { "epoch": 1.7285320803629294, "grad_norm": 0.05709939822554588, "learning_rate": 0.0001261937981007246, "loss": 0.2574, "step": 21337 }, { "epoch": 1.7286130913804278, "grad_norm": 0.058976322412490845, "learning_rate": 0.00012618929744813, "loss": 0.272, "step": 21338 }, { "epoch": 1.728694102397926, "grad_norm": 0.04370775446295738, "learning_rate": 0.00012618479679553536, "loss": 0.2272, "step": 21339 }, { "epoch": 1.7287751134154243, "grad_norm": 0.05771368741989136, "learning_rate": 0.00012618029614294072, "loss": 0.2465, "step": 21340 }, { "epoch": 1.728856124432923, "grad_norm": 0.04556897655129433, "learning_rate": 0.0001261757954903461, "loss": 0.287, "step": 21341 }, { "epoch": 1.7289371354504213, "grad_norm": 0.05363953858613968, "learning_rate": 0.00012617129483775147, "loss": 0.2829, "step": 21342 }, { "epoch": 1.7290181464679195, "grad_norm": 0.05831297114491463, "learning_rate": 0.00012616679418515683, "loss": 0.2739, "step": 21343 }, { "epoch": 1.729099157485418, "grad_norm": 0.063968226313591, "learning_rate": 0.00012616229353256225, "loss": 0.2947, "step": 21344 }, { "epoch": 1.7291801685029164, "grad_norm": 0.05874845013022423, "learning_rate": 0.0001261577928799676, "loss": 0.2953, "step": 21345 }, { "epoch": 1.7292611795204147, "grad_norm": 0.05044439807534218, "learning_rate": 0.00012615329222737297, "loss": 0.3164, "step": 21346 }, { "epoch": 1.7293421905379132, "grad_norm": 0.06613511592149734, "learning_rate": 0.00012614879157477835, "loss": 0.3009, "step": 21347 }, { "epoch": 1.7294232015554116, "grad_norm": 0.05768156796693802, "learning_rate": 0.0001261442909221837, "loss": 0.3061, "step": 21348 }, { "epoch": 1.7295042125729099, "grad_norm": 0.051086124032735825, "learning_rate": 0.00012613979026958907, "loss": 0.2436, "step": 21349 }, { "epoch": 1.7295852235904083, "grad_norm": 0.05139658600091934, "learning_rate": 0.0001261352896169945, "loss": 0.2624, "step": 21350 }, { "epoch": 1.7296662346079068, "grad_norm": 0.05288831517100334, "learning_rate": 0.00012613078896439985, "loss": 0.2851, "step": 21351 }, { "epoch": 1.729747245625405, "grad_norm": 0.054695550352334976, "learning_rate": 0.0001261262883118052, "loss": 0.2491, "step": 21352 }, { "epoch": 1.7298282566429033, "grad_norm": 0.055590204894542694, "learning_rate": 0.0001261217876592106, "loss": 0.2727, "step": 21353 }, { "epoch": 1.7299092676604018, "grad_norm": 0.052829891443252563, "learning_rate": 0.00012611728700661595, "loss": 0.3052, "step": 21354 }, { "epoch": 1.7299902786779002, "grad_norm": 0.06215556710958481, "learning_rate": 0.00012611278635402131, "loss": 0.2647, "step": 21355 }, { "epoch": 1.7300712896953985, "grad_norm": 0.05681098252534866, "learning_rate": 0.00012610828570142673, "loss": 0.291, "step": 21356 }, { "epoch": 1.730152300712897, "grad_norm": 0.06204934045672417, "learning_rate": 0.0001261037850488321, "loss": 0.304, "step": 21357 }, { "epoch": 1.7302333117303954, "grad_norm": 0.05352215841412544, "learning_rate": 0.00012609928439623745, "loss": 0.2687, "step": 21358 }, { "epoch": 1.7303143227478937, "grad_norm": 0.048397473990917206, "learning_rate": 0.00012609478374364284, "loss": 0.2847, "step": 21359 }, { "epoch": 1.7303953337653921, "grad_norm": 0.055357180535793304, "learning_rate": 0.0001260902830910482, "loss": 0.297, "step": 21360 }, { "epoch": 1.7304763447828906, "grad_norm": 0.05453452840447426, "learning_rate": 0.00012608578243845358, "loss": 0.3122, "step": 21361 }, { "epoch": 1.7305573558003888, "grad_norm": 0.05287787318229675, "learning_rate": 0.00012608128178585897, "loss": 0.2694, "step": 21362 }, { "epoch": 1.730638366817887, "grad_norm": 0.045254334807395935, "learning_rate": 0.00012607678113326433, "loss": 0.2705, "step": 21363 }, { "epoch": 1.7307193778353858, "grad_norm": 0.05910949781537056, "learning_rate": 0.0001260722804806697, "loss": 0.2813, "step": 21364 }, { "epoch": 1.730800388852884, "grad_norm": 0.04513555392622948, "learning_rate": 0.00012606777982807508, "loss": 0.2551, "step": 21365 }, { "epoch": 1.7308813998703823, "grad_norm": 0.047856781631708145, "learning_rate": 0.00012606327917548044, "loss": 0.245, "step": 21366 }, { "epoch": 1.7309624108878807, "grad_norm": 0.06202912703156471, "learning_rate": 0.00012605877852288583, "loss": 0.2554, "step": 21367 }, { "epoch": 1.7310434219053792, "grad_norm": 0.06017141789197922, "learning_rate": 0.0001260542778702912, "loss": 0.2724, "step": 21368 }, { "epoch": 1.7311244329228774, "grad_norm": 0.049296289682388306, "learning_rate": 0.00012604977721769657, "loss": 0.2767, "step": 21369 }, { "epoch": 1.731205443940376, "grad_norm": 0.05470065027475357, "learning_rate": 0.00012604527656510193, "loss": 0.2262, "step": 21370 }, { "epoch": 1.7312864549578744, "grad_norm": 0.05836993455886841, "learning_rate": 0.00012604077591250732, "loss": 0.2958, "step": 21371 }, { "epoch": 1.7313674659753726, "grad_norm": 0.0632636696100235, "learning_rate": 0.00012603627525991268, "loss": 0.228, "step": 21372 }, { "epoch": 1.731448476992871, "grad_norm": 0.05101475864648819, "learning_rate": 0.00012603177460731807, "loss": 0.2638, "step": 21373 }, { "epoch": 1.7315294880103695, "grad_norm": 0.06122167408466339, "learning_rate": 0.00012602727395472345, "loss": 0.3064, "step": 21374 }, { "epoch": 1.7316104990278678, "grad_norm": 0.06043681502342224, "learning_rate": 0.00012602277330212881, "loss": 0.3018, "step": 21375 }, { "epoch": 1.731691510045366, "grad_norm": 0.05266754329204559, "learning_rate": 0.00012601827264953417, "loss": 0.2605, "step": 21376 }, { "epoch": 1.7317725210628645, "grad_norm": 0.06432856619358063, "learning_rate": 0.00012601377199693956, "loss": 0.305, "step": 21377 }, { "epoch": 1.731853532080363, "grad_norm": 0.05559670925140381, "learning_rate": 0.00012600927134434492, "loss": 0.2968, "step": 21378 }, { "epoch": 1.7319345430978612, "grad_norm": 0.05518518015742302, "learning_rate": 0.0001260047706917503, "loss": 0.2731, "step": 21379 }, { "epoch": 1.7320155541153597, "grad_norm": 0.06477613002061844, "learning_rate": 0.0001260002700391557, "loss": 0.2575, "step": 21380 }, { "epoch": 1.7320965651328581, "grad_norm": 0.0499190092086792, "learning_rate": 0.00012599576938656106, "loss": 0.2738, "step": 21381 }, { "epoch": 1.7321775761503564, "grad_norm": 0.048186007887125015, "learning_rate": 0.00012599126873396642, "loss": 0.2514, "step": 21382 }, { "epoch": 1.7322585871678549, "grad_norm": 0.047534551471471786, "learning_rate": 0.0001259867680813718, "loss": 0.279, "step": 21383 }, { "epoch": 1.7323395981853533, "grad_norm": 0.053618431091308594, "learning_rate": 0.0001259822674287772, "loss": 0.2716, "step": 21384 }, { "epoch": 1.7324206092028516, "grad_norm": 0.051217447966337204, "learning_rate": 0.00012597776677618255, "loss": 0.2633, "step": 21385 }, { "epoch": 1.7325016202203498, "grad_norm": 0.05521465465426445, "learning_rate": 0.00012597326612358794, "loss": 0.2639, "step": 21386 }, { "epoch": 1.7325826312378485, "grad_norm": 0.04279023036360741, "learning_rate": 0.0001259687654709933, "loss": 0.2439, "step": 21387 }, { "epoch": 1.7326636422553467, "grad_norm": 0.057732559740543365, "learning_rate": 0.00012596426481839866, "loss": 0.2406, "step": 21388 }, { "epoch": 1.732744653272845, "grad_norm": 0.06207024306058884, "learning_rate": 0.00012595976416580404, "loss": 0.2888, "step": 21389 }, { "epoch": 1.7328256642903435, "grad_norm": 0.05113453418016434, "learning_rate": 0.00012595526351320943, "loss": 0.2911, "step": 21390 }, { "epoch": 1.732906675307842, "grad_norm": 0.050154559314250946, "learning_rate": 0.0001259507628606148, "loss": 0.2753, "step": 21391 }, { "epoch": 1.7329876863253402, "grad_norm": 0.06298622488975525, "learning_rate": 0.00012594626220802018, "loss": 0.2862, "step": 21392 }, { "epoch": 1.7330686973428386, "grad_norm": 0.047300562262535095, "learning_rate": 0.00012594176155542554, "loss": 0.2832, "step": 21393 }, { "epoch": 1.733149708360337, "grad_norm": 0.0653621256351471, "learning_rate": 0.0001259372609028309, "loss": 0.2837, "step": 21394 }, { "epoch": 1.7332307193778353, "grad_norm": 0.05709261819720268, "learning_rate": 0.00012593276025023629, "loss": 0.2791, "step": 21395 }, { "epoch": 1.7333117303953338, "grad_norm": 0.0497063584625721, "learning_rate": 0.00012592825959764167, "loss": 0.2478, "step": 21396 }, { "epoch": 1.7333927414128323, "grad_norm": 0.0732504278421402, "learning_rate": 0.00012592375894504703, "loss": 0.2567, "step": 21397 }, { "epoch": 1.7334737524303305, "grad_norm": 0.06695054471492767, "learning_rate": 0.00012591925829245242, "loss": 0.2997, "step": 21398 }, { "epoch": 1.7335547634478288, "grad_norm": 0.06759504228830338, "learning_rate": 0.00012591475763985778, "loss": 0.3161, "step": 21399 }, { "epoch": 1.7336357744653272, "grad_norm": 0.05656924098730087, "learning_rate": 0.00012591025698726314, "loss": 0.2954, "step": 21400 }, { "epoch": 1.7337167854828257, "grad_norm": 0.06173291429877281, "learning_rate": 0.00012590575633466853, "loss": 0.3053, "step": 21401 }, { "epoch": 1.733797796500324, "grad_norm": 0.056654833257198334, "learning_rate": 0.00012590125568207391, "loss": 0.2904, "step": 21402 }, { "epoch": 1.7338788075178224, "grad_norm": 0.050510480999946594, "learning_rate": 0.00012589675502947927, "loss": 0.2386, "step": 21403 }, { "epoch": 1.7339598185353209, "grad_norm": 0.05793347209692001, "learning_rate": 0.00012589225437688466, "loss": 0.2795, "step": 21404 }, { "epoch": 1.7340408295528191, "grad_norm": 0.05403798446059227, "learning_rate": 0.00012588775372429002, "loss": 0.2917, "step": 21405 }, { "epoch": 1.7341218405703176, "grad_norm": 0.05910486355423927, "learning_rate": 0.00012588325307169538, "loss": 0.3166, "step": 21406 }, { "epoch": 1.734202851587816, "grad_norm": 0.05428627133369446, "learning_rate": 0.00012587875241910077, "loss": 0.2969, "step": 21407 }, { "epoch": 1.7342838626053143, "grad_norm": 0.0675114244222641, "learning_rate": 0.00012587425176650616, "loss": 0.3124, "step": 21408 }, { "epoch": 1.7343648736228126, "grad_norm": 0.060393840074539185, "learning_rate": 0.00012586975111391152, "loss": 0.3345, "step": 21409 }, { "epoch": 1.7344458846403112, "grad_norm": 0.05710717663168907, "learning_rate": 0.0001258652504613169, "loss": 0.2679, "step": 21410 }, { "epoch": 1.7345268956578095, "grad_norm": 0.05533914640545845, "learning_rate": 0.00012586074980872226, "loss": 0.3104, "step": 21411 }, { "epoch": 1.7346079066753077, "grad_norm": 0.057103849947452545, "learning_rate": 0.00012585624915612762, "loss": 0.3109, "step": 21412 }, { "epoch": 1.7346889176928062, "grad_norm": 0.04883692413568497, "learning_rate": 0.00012585174850353304, "loss": 0.2732, "step": 21413 }, { "epoch": 1.7347699287103047, "grad_norm": 0.056350503116846085, "learning_rate": 0.0001258472478509384, "loss": 0.3007, "step": 21414 }, { "epoch": 1.734850939727803, "grad_norm": 0.05377821624279022, "learning_rate": 0.00012584274719834376, "loss": 0.2833, "step": 21415 }, { "epoch": 1.7349319507453014, "grad_norm": 0.06554847955703735, "learning_rate": 0.00012583824654574915, "loss": 0.3022, "step": 21416 }, { "epoch": 1.7350129617627998, "grad_norm": 0.054987650364637375, "learning_rate": 0.0001258337458931545, "loss": 0.2718, "step": 21417 }, { "epoch": 1.735093972780298, "grad_norm": 0.046197839081287384, "learning_rate": 0.00012582924524055987, "loss": 0.2457, "step": 21418 }, { "epoch": 1.7351749837977966, "grad_norm": 0.055975694209337234, "learning_rate": 0.00012582474458796528, "loss": 0.2791, "step": 21419 }, { "epoch": 1.735255994815295, "grad_norm": 0.06019577011466026, "learning_rate": 0.00012582024393537064, "loss": 0.3089, "step": 21420 }, { "epoch": 1.7353370058327933, "grad_norm": 0.05753401666879654, "learning_rate": 0.000125815743282776, "loss": 0.2629, "step": 21421 }, { "epoch": 1.7354180168502915, "grad_norm": 0.05200933665037155, "learning_rate": 0.0001258112426301814, "loss": 0.2343, "step": 21422 }, { "epoch": 1.73549902786779, "grad_norm": 0.05833006277680397, "learning_rate": 0.00012580674197758675, "loss": 0.2695, "step": 21423 }, { "epoch": 1.7355800388852884, "grad_norm": 0.05324200913310051, "learning_rate": 0.00012580224132499213, "loss": 0.2712, "step": 21424 }, { "epoch": 1.7356610499027867, "grad_norm": 0.05839523673057556, "learning_rate": 0.00012579774067239752, "loss": 0.2812, "step": 21425 }, { "epoch": 1.7357420609202852, "grad_norm": 0.04021824151277542, "learning_rate": 0.00012579324001980288, "loss": 0.2395, "step": 21426 }, { "epoch": 1.7358230719377836, "grad_norm": 0.07867847383022308, "learning_rate": 0.00012578873936720824, "loss": 0.2878, "step": 21427 }, { "epoch": 1.7359040829552819, "grad_norm": 0.05782991647720337, "learning_rate": 0.00012578423871461363, "loss": 0.2531, "step": 21428 }, { "epoch": 1.7359850939727803, "grad_norm": 0.0577692985534668, "learning_rate": 0.000125779738062019, "loss": 0.2641, "step": 21429 }, { "epoch": 1.7360661049902788, "grad_norm": 0.05876109004020691, "learning_rate": 0.00012577523740942438, "loss": 0.2589, "step": 21430 }, { "epoch": 1.736147116007777, "grad_norm": 0.05080138146877289, "learning_rate": 0.00012577073675682976, "loss": 0.2635, "step": 21431 }, { "epoch": 1.7362281270252753, "grad_norm": 0.06783095002174377, "learning_rate": 0.00012576623610423512, "loss": 0.2848, "step": 21432 }, { "epoch": 1.736309138042774, "grad_norm": 0.059699445962905884, "learning_rate": 0.00012576173545164048, "loss": 0.3167, "step": 21433 }, { "epoch": 1.7363901490602722, "grad_norm": 0.060464613139629364, "learning_rate": 0.00012575723479904587, "loss": 0.2712, "step": 21434 }, { "epoch": 1.7364711600777705, "grad_norm": 0.05330680310726166, "learning_rate": 0.00012575273414645123, "loss": 0.2919, "step": 21435 }, { "epoch": 1.736552171095269, "grad_norm": 0.04967203736305237, "learning_rate": 0.00012574823349385662, "loss": 0.2596, "step": 21436 }, { "epoch": 1.7366331821127674, "grad_norm": 0.054990921169519424, "learning_rate": 0.000125743732841262, "loss": 0.2646, "step": 21437 }, { "epoch": 1.7367141931302656, "grad_norm": 0.052907802164554596, "learning_rate": 0.00012573923218866736, "loss": 0.2712, "step": 21438 }, { "epoch": 1.7367952041477641, "grad_norm": 0.05345708131790161, "learning_rate": 0.00012573473153607272, "loss": 0.2591, "step": 21439 }, { "epoch": 1.7368762151652626, "grad_norm": 0.0520918108522892, "learning_rate": 0.0001257302308834781, "loss": 0.2642, "step": 21440 }, { "epoch": 1.7369572261827608, "grad_norm": 0.056074172258377075, "learning_rate": 0.00012572573023088347, "loss": 0.2667, "step": 21441 }, { "epoch": 1.737038237200259, "grad_norm": 0.046990297734737396, "learning_rate": 0.00012572122957828886, "loss": 0.2657, "step": 21442 }, { "epoch": 1.7371192482177578, "grad_norm": 0.056683171540498734, "learning_rate": 0.00012571672892569425, "loss": 0.2743, "step": 21443 }, { "epoch": 1.737200259235256, "grad_norm": 0.048928774893283844, "learning_rate": 0.0001257122282730996, "loss": 0.274, "step": 21444 }, { "epoch": 1.7372812702527543, "grad_norm": 0.055514540523290634, "learning_rate": 0.00012570772762050497, "loss": 0.2615, "step": 21445 }, { "epoch": 1.7373622812702527, "grad_norm": 0.06316448748111725, "learning_rate": 0.00012570322696791035, "loss": 0.3063, "step": 21446 }, { "epoch": 1.7374432922877512, "grad_norm": 0.054943282157182693, "learning_rate": 0.0001256987263153157, "loss": 0.2994, "step": 21447 }, { "epoch": 1.7375243033052494, "grad_norm": 0.05165160447359085, "learning_rate": 0.0001256942256627211, "loss": 0.2552, "step": 21448 }, { "epoch": 1.737605314322748, "grad_norm": 0.050460558384656906, "learning_rate": 0.0001256897250101265, "loss": 0.2646, "step": 21449 }, { "epoch": 1.7376863253402464, "grad_norm": 0.05472885072231293, "learning_rate": 0.00012568522435753185, "loss": 0.285, "step": 21450 }, { "epoch": 1.7377673363577446, "grad_norm": 0.06435894221067429, "learning_rate": 0.0001256807237049372, "loss": 0.2957, "step": 21451 }, { "epoch": 1.737848347375243, "grad_norm": 0.060838378965854645, "learning_rate": 0.0001256762230523426, "loss": 0.2639, "step": 21452 }, { "epoch": 1.7379293583927415, "grad_norm": 0.05674123764038086, "learning_rate": 0.00012567172239974796, "loss": 0.309, "step": 21453 }, { "epoch": 1.7380103694102398, "grad_norm": 0.06330017745494843, "learning_rate": 0.00012566722174715334, "loss": 0.2949, "step": 21454 }, { "epoch": 1.738091380427738, "grad_norm": 0.05393190309405327, "learning_rate": 0.00012566272109455873, "loss": 0.2424, "step": 21455 }, { "epoch": 1.7381723914452365, "grad_norm": 0.042466968297958374, "learning_rate": 0.0001256582204419641, "loss": 0.2584, "step": 21456 }, { "epoch": 1.738253402462735, "grad_norm": 0.05222369357943535, "learning_rate": 0.00012565371978936945, "loss": 0.2665, "step": 21457 }, { "epoch": 1.7383344134802332, "grad_norm": 0.04583865404129028, "learning_rate": 0.00012564921913677484, "loss": 0.2534, "step": 21458 }, { "epoch": 1.7384154244977317, "grad_norm": 0.05196063220500946, "learning_rate": 0.0001256447184841802, "loss": 0.2622, "step": 21459 }, { "epoch": 1.7384964355152301, "grad_norm": 0.055964767932891846, "learning_rate": 0.00012564021783158558, "loss": 0.2734, "step": 21460 }, { "epoch": 1.7385774465327284, "grad_norm": 0.0522189661860466, "learning_rate": 0.00012563571717899097, "loss": 0.2734, "step": 21461 }, { "epoch": 1.7386584575502269, "grad_norm": 0.05658310279250145, "learning_rate": 0.00012563121652639633, "loss": 0.2604, "step": 21462 }, { "epoch": 1.7387394685677253, "grad_norm": 0.05104950815439224, "learning_rate": 0.0001256267158738017, "loss": 0.296, "step": 21463 }, { "epoch": 1.7388204795852236, "grad_norm": 0.0631055012345314, "learning_rate": 0.00012562221522120708, "loss": 0.2642, "step": 21464 }, { "epoch": 1.7389014906027218, "grad_norm": 0.05673608556389809, "learning_rate": 0.00012561771456861247, "loss": 0.302, "step": 21465 }, { "epoch": 1.7389825016202205, "grad_norm": 0.04988215118646622, "learning_rate": 0.00012561321391601783, "loss": 0.256, "step": 21466 }, { "epoch": 1.7390635126377187, "grad_norm": 0.053265515714883804, "learning_rate": 0.0001256087132634232, "loss": 0.2588, "step": 21467 }, { "epoch": 1.739144523655217, "grad_norm": 0.04829196259379387, "learning_rate": 0.00012560421261082857, "loss": 0.2508, "step": 21468 }, { "epoch": 1.7392255346727155, "grad_norm": 0.06507780402898788, "learning_rate": 0.00012559971195823393, "loss": 0.3162, "step": 21469 }, { "epoch": 1.739306545690214, "grad_norm": 0.05773133784532547, "learning_rate": 0.00012559521130563932, "loss": 0.2679, "step": 21470 }, { "epoch": 1.7393875567077122, "grad_norm": 0.055706221610307693, "learning_rate": 0.0001255907106530447, "loss": 0.2997, "step": 21471 }, { "epoch": 1.7394685677252106, "grad_norm": 0.05406045913696289, "learning_rate": 0.00012558621000045007, "loss": 0.2501, "step": 21472 }, { "epoch": 1.739549578742709, "grad_norm": 0.05830344930291176, "learning_rate": 0.00012558170934785545, "loss": 0.3011, "step": 21473 }, { "epoch": 1.7396305897602073, "grad_norm": 0.05689018964767456, "learning_rate": 0.00012557720869526081, "loss": 0.2535, "step": 21474 }, { "epoch": 1.7397116007777058, "grad_norm": 0.05018286034464836, "learning_rate": 0.00012557270804266617, "loss": 0.2671, "step": 21475 }, { "epoch": 1.7397926117952043, "grad_norm": 0.052559975534677505, "learning_rate": 0.00012556820739007156, "loss": 0.2961, "step": 21476 }, { "epoch": 1.7398736228127025, "grad_norm": 0.06947226077318192, "learning_rate": 0.00012556370673747695, "loss": 0.3438, "step": 21477 }, { "epoch": 1.7399546338302008, "grad_norm": 0.05610761046409607, "learning_rate": 0.0001255592060848823, "loss": 0.2982, "step": 21478 }, { "epoch": 1.7400356448476992, "grad_norm": 0.060067109763622284, "learning_rate": 0.0001255547054322877, "loss": 0.2656, "step": 21479 }, { "epoch": 1.7401166558651977, "grad_norm": 0.05526788532733917, "learning_rate": 0.00012555020477969306, "loss": 0.2666, "step": 21480 }, { "epoch": 1.740197666882696, "grad_norm": 0.05276999995112419, "learning_rate": 0.00012554570412709842, "loss": 0.2765, "step": 21481 }, { "epoch": 1.7402786779001944, "grad_norm": 0.054623451083898544, "learning_rate": 0.0001255412034745038, "loss": 0.2761, "step": 21482 }, { "epoch": 1.7403596889176929, "grad_norm": 0.04773684963583946, "learning_rate": 0.0001255367028219092, "loss": 0.2923, "step": 21483 }, { "epoch": 1.7404406999351911, "grad_norm": 0.053866446018218994, "learning_rate": 0.00012553220216931455, "loss": 0.2435, "step": 21484 }, { "epoch": 1.7405217109526896, "grad_norm": 0.049850448966026306, "learning_rate": 0.00012552770151671994, "loss": 0.2933, "step": 21485 }, { "epoch": 1.740602721970188, "grad_norm": 0.059759512543678284, "learning_rate": 0.0001255232008641253, "loss": 0.2967, "step": 21486 }, { "epoch": 1.7406837329876863, "grad_norm": 0.04076186940073967, "learning_rate": 0.00012551870021153066, "loss": 0.2648, "step": 21487 }, { "epoch": 1.7407647440051845, "grad_norm": 0.05791622772812843, "learning_rate": 0.00012551419955893604, "loss": 0.2974, "step": 21488 }, { "epoch": 1.7408457550226832, "grad_norm": 0.05344267562031746, "learning_rate": 0.00012550969890634143, "loss": 0.2804, "step": 21489 }, { "epoch": 1.7409267660401815, "grad_norm": 0.05635475367307663, "learning_rate": 0.0001255051982537468, "loss": 0.3058, "step": 21490 }, { "epoch": 1.7410077770576797, "grad_norm": 0.07314221560955048, "learning_rate": 0.00012550069760115218, "loss": 0.3249, "step": 21491 }, { "epoch": 1.7410887880751782, "grad_norm": 0.05568360164761543, "learning_rate": 0.00012549619694855754, "loss": 0.2766, "step": 21492 }, { "epoch": 1.7411697990926767, "grad_norm": 0.04688483104109764, "learning_rate": 0.00012549169629596293, "loss": 0.2597, "step": 21493 }, { "epoch": 1.741250810110175, "grad_norm": 0.047986626625061035, "learning_rate": 0.0001254871956433683, "loss": 0.2538, "step": 21494 }, { "epoch": 1.7413318211276734, "grad_norm": 0.05898340418934822, "learning_rate": 0.00012548269499077367, "loss": 0.3109, "step": 21495 }, { "epoch": 1.7414128321451718, "grad_norm": 0.062086861580610275, "learning_rate": 0.00012547819433817903, "loss": 0.2648, "step": 21496 }, { "epoch": 1.74149384316267, "grad_norm": 0.05001772567629814, "learning_rate": 0.00012547369368558442, "loss": 0.2583, "step": 21497 }, { "epoch": 1.7415748541801686, "grad_norm": 0.0589098185300827, "learning_rate": 0.00012546919303298978, "loss": 0.2771, "step": 21498 }, { "epoch": 1.741655865197667, "grad_norm": 0.06397674232721329, "learning_rate": 0.00012546469238039517, "loss": 0.2931, "step": 21499 }, { "epoch": 1.7417368762151653, "grad_norm": 0.059052225202322006, "learning_rate": 0.00012546019172780056, "loss": 0.2862, "step": 21500 }, { "epoch": 1.7418178872326635, "grad_norm": 0.05825584754347801, "learning_rate": 0.00012545569107520592, "loss": 0.3101, "step": 21501 }, { "epoch": 1.741898898250162, "grad_norm": 0.05482599884271622, "learning_rate": 0.00012545119042261128, "loss": 0.2734, "step": 21502 }, { "epoch": 1.7419799092676604, "grad_norm": 0.06473010033369064, "learning_rate": 0.00012544668977001666, "loss": 0.2913, "step": 21503 }, { "epoch": 1.7420609202851587, "grad_norm": 0.05840296298265457, "learning_rate": 0.00012544218911742202, "loss": 0.2555, "step": 21504 }, { "epoch": 1.7421419313026572, "grad_norm": 0.06706037372350693, "learning_rate": 0.0001254376884648274, "loss": 0.2609, "step": 21505 }, { "epoch": 1.7422229423201556, "grad_norm": 0.052381958812475204, "learning_rate": 0.0001254331878122328, "loss": 0.2467, "step": 21506 }, { "epoch": 1.7423039533376539, "grad_norm": 0.04638232663273811, "learning_rate": 0.00012542868715963816, "loss": 0.2378, "step": 21507 }, { "epoch": 1.7423849643551523, "grad_norm": 0.051347095519304276, "learning_rate": 0.00012542418650704352, "loss": 0.2355, "step": 21508 }, { "epoch": 1.7424659753726508, "grad_norm": 0.06403027474880219, "learning_rate": 0.0001254196858544489, "loss": 0.3214, "step": 21509 }, { "epoch": 1.742546986390149, "grad_norm": 0.049564070999622345, "learning_rate": 0.00012541518520185426, "loss": 0.2531, "step": 21510 }, { "epoch": 1.7426279974076473, "grad_norm": 0.06536975502967834, "learning_rate": 0.00012541068454925965, "loss": 0.3425, "step": 21511 }, { "epoch": 1.742709008425146, "grad_norm": 0.048163220286369324, "learning_rate": 0.00012540618389666504, "loss": 0.2339, "step": 21512 }, { "epoch": 1.7427900194426442, "grad_norm": 0.049840670078992844, "learning_rate": 0.0001254016832440704, "loss": 0.2806, "step": 21513 }, { "epoch": 1.7428710304601425, "grad_norm": 0.0577770434319973, "learning_rate": 0.00012539718259147576, "loss": 0.2912, "step": 21514 }, { "epoch": 1.742952041477641, "grad_norm": 0.05158194527029991, "learning_rate": 0.00012539268193888115, "loss": 0.2386, "step": 21515 }, { "epoch": 1.7430330524951394, "grad_norm": 0.0499204657971859, "learning_rate": 0.0001253881812862865, "loss": 0.251, "step": 21516 }, { "epoch": 1.7431140635126376, "grad_norm": 0.048412248492240906, "learning_rate": 0.0001253836806336919, "loss": 0.2594, "step": 21517 }, { "epoch": 1.7431950745301361, "grad_norm": 0.04767746105790138, "learning_rate": 0.00012537917998109728, "loss": 0.2352, "step": 21518 }, { "epoch": 1.7432760855476346, "grad_norm": 0.04811496660113335, "learning_rate": 0.00012537467932850264, "loss": 0.2538, "step": 21519 }, { "epoch": 1.7433570965651328, "grad_norm": 0.054618820548057556, "learning_rate": 0.000125370178675908, "loss": 0.2469, "step": 21520 }, { "epoch": 1.7434381075826313, "grad_norm": 0.055555637925863266, "learning_rate": 0.0001253656780233134, "loss": 0.2955, "step": 21521 }, { "epoch": 1.7435191186001298, "grad_norm": 0.04947299137711525, "learning_rate": 0.00012536117737071875, "loss": 0.27, "step": 21522 }, { "epoch": 1.743600129617628, "grad_norm": 0.05181947723031044, "learning_rate": 0.00012535667671812413, "loss": 0.2476, "step": 21523 }, { "epoch": 1.7436811406351262, "grad_norm": 0.055776968598365784, "learning_rate": 0.00012535217606552952, "loss": 0.2784, "step": 21524 }, { "epoch": 1.7437621516526247, "grad_norm": 0.0568946897983551, "learning_rate": 0.00012534767541293488, "loss": 0.2588, "step": 21525 }, { "epoch": 1.7438431626701232, "grad_norm": 0.06434997171163559, "learning_rate": 0.00012534317476034024, "loss": 0.2989, "step": 21526 }, { "epoch": 1.7439241736876214, "grad_norm": 0.059009552001953125, "learning_rate": 0.00012533867410774563, "loss": 0.3146, "step": 21527 }, { "epoch": 1.74400518470512, "grad_norm": 0.05180266872048378, "learning_rate": 0.000125334173455151, "loss": 0.2498, "step": 21528 }, { "epoch": 1.7440861957226184, "grad_norm": 0.0628051683306694, "learning_rate": 0.00012532967280255638, "loss": 0.2804, "step": 21529 }, { "epoch": 1.7441672067401166, "grad_norm": 0.07517543435096741, "learning_rate": 0.00012532517214996176, "loss": 0.2747, "step": 21530 }, { "epoch": 1.744248217757615, "grad_norm": 0.06764915585517883, "learning_rate": 0.00012532067149736712, "loss": 0.2639, "step": 21531 }, { "epoch": 1.7443292287751135, "grad_norm": 0.07854727655649185, "learning_rate": 0.00012531617084477248, "loss": 0.3395, "step": 21532 }, { "epoch": 1.7444102397926118, "grad_norm": 0.05045973137021065, "learning_rate": 0.00012531167019217787, "loss": 0.3025, "step": 21533 }, { "epoch": 1.74449125081011, "grad_norm": 0.054208904504776, "learning_rate": 0.00012530716953958323, "loss": 0.2857, "step": 21534 }, { "epoch": 1.7445722618276087, "grad_norm": 0.04966440424323082, "learning_rate": 0.00012530266888698862, "loss": 0.2402, "step": 21535 }, { "epoch": 1.744653272845107, "grad_norm": 0.05039123818278313, "learning_rate": 0.000125298168234394, "loss": 0.2566, "step": 21536 }, { "epoch": 1.7447342838626052, "grad_norm": 0.04494204372167587, "learning_rate": 0.00012529366758179936, "loss": 0.2608, "step": 21537 }, { "epoch": 1.7448152948801037, "grad_norm": 0.05890116095542908, "learning_rate": 0.00012528916692920472, "loss": 0.2842, "step": 21538 }, { "epoch": 1.7448963058976021, "grad_norm": 0.049471139907836914, "learning_rate": 0.0001252846662766101, "loss": 0.3005, "step": 21539 }, { "epoch": 1.7449773169151004, "grad_norm": 0.060563184320926666, "learning_rate": 0.00012528016562401547, "loss": 0.2672, "step": 21540 }, { "epoch": 1.7450583279325989, "grad_norm": 0.051124975085258484, "learning_rate": 0.00012527566497142086, "loss": 0.2533, "step": 21541 }, { "epoch": 1.7451393389500973, "grad_norm": 0.04515647143125534, "learning_rate": 0.00012527116431882625, "loss": 0.2504, "step": 21542 }, { "epoch": 1.7452203499675956, "grad_norm": 0.046405233442783356, "learning_rate": 0.0001252666636662316, "loss": 0.2772, "step": 21543 }, { "epoch": 1.7453013609850938, "grad_norm": 0.051825813949108124, "learning_rate": 0.00012526216301363697, "loss": 0.2524, "step": 21544 }, { "epoch": 1.7453823720025925, "grad_norm": 0.052078425884246826, "learning_rate": 0.00012525766236104235, "loss": 0.2721, "step": 21545 }, { "epoch": 1.7454633830200907, "grad_norm": 0.06504826992750168, "learning_rate": 0.00012525316170844774, "loss": 0.2336, "step": 21546 }, { "epoch": 1.745544394037589, "grad_norm": 0.056292686611413956, "learning_rate": 0.0001252486610558531, "loss": 0.2983, "step": 21547 }, { "epoch": 1.7456254050550875, "grad_norm": 0.062441788613796234, "learning_rate": 0.0001252441604032585, "loss": 0.3117, "step": 21548 }, { "epoch": 1.745706416072586, "grad_norm": 0.05644739791750908, "learning_rate": 0.00012523965975066385, "loss": 0.2612, "step": 21549 }, { "epoch": 1.7457874270900842, "grad_norm": 0.05442476272583008, "learning_rate": 0.0001252351590980692, "loss": 0.2705, "step": 21550 }, { "epoch": 1.7458684381075826, "grad_norm": 0.05974116176366806, "learning_rate": 0.0001252306584454746, "loss": 0.2695, "step": 21551 }, { "epoch": 1.745949449125081, "grad_norm": 0.05387002229690552, "learning_rate": 0.00012522615779287998, "loss": 0.2855, "step": 21552 }, { "epoch": 1.7460304601425793, "grad_norm": 0.05770234391093254, "learning_rate": 0.00012522165714028534, "loss": 0.3043, "step": 21553 }, { "epoch": 1.7461114711600778, "grad_norm": 0.0670681819319725, "learning_rate": 0.00012521715648769073, "loss": 0.2737, "step": 21554 }, { "epoch": 1.7461924821775763, "grad_norm": 0.07340843975543976, "learning_rate": 0.0001252126558350961, "loss": 0.2869, "step": 21555 }, { "epoch": 1.7462734931950745, "grad_norm": 0.059023816138505936, "learning_rate": 0.00012520815518250145, "loss": 0.2981, "step": 21556 }, { "epoch": 1.7463545042125728, "grad_norm": 0.062153011560440063, "learning_rate": 0.00012520365452990684, "loss": 0.2648, "step": 21557 }, { "epoch": 1.7464355152300715, "grad_norm": 0.05502208322286606, "learning_rate": 0.00012519915387731222, "loss": 0.2592, "step": 21558 }, { "epoch": 1.7465165262475697, "grad_norm": 0.0649203211069107, "learning_rate": 0.00012519465322471758, "loss": 0.2915, "step": 21559 }, { "epoch": 1.746597537265068, "grad_norm": 0.05525941774249077, "learning_rate": 0.00012519015257212297, "loss": 0.2721, "step": 21560 }, { "epoch": 1.7466785482825664, "grad_norm": 0.062348414212465286, "learning_rate": 0.00012518565191952833, "loss": 0.3265, "step": 21561 }, { "epoch": 1.7467595593000649, "grad_norm": 0.061026740819215775, "learning_rate": 0.00012518115126693372, "loss": 0.3212, "step": 21562 }, { "epoch": 1.7468405703175631, "grad_norm": 0.05713002756237984, "learning_rate": 0.00012517665061433908, "loss": 0.2846, "step": 21563 }, { "epoch": 1.7469215813350616, "grad_norm": 0.04872580245137215, "learning_rate": 0.00012517214996174447, "loss": 0.2637, "step": 21564 }, { "epoch": 1.74700259235256, "grad_norm": 0.05415729805827141, "learning_rate": 0.00012516764930914983, "loss": 0.2787, "step": 21565 }, { "epoch": 1.7470836033700583, "grad_norm": 0.054065488278865814, "learning_rate": 0.0001251631486565552, "loss": 0.3008, "step": 21566 }, { "epoch": 1.7471646143875565, "grad_norm": 0.05611393228173256, "learning_rate": 0.00012515864800396057, "loss": 0.2688, "step": 21567 }, { "epoch": 1.7472456254050552, "grad_norm": 0.05508045852184296, "learning_rate": 0.00012515414735136596, "loss": 0.2431, "step": 21568 }, { "epoch": 1.7473266364225535, "grad_norm": 0.05065127834677696, "learning_rate": 0.00012514964669877132, "loss": 0.3143, "step": 21569 }, { "epoch": 1.7474076474400517, "grad_norm": 0.04621940851211548, "learning_rate": 0.0001251451460461767, "loss": 0.2991, "step": 21570 }, { "epoch": 1.7474886584575502, "grad_norm": 0.04384281113743782, "learning_rate": 0.00012514064539358207, "loss": 0.2465, "step": 21571 }, { "epoch": 1.7475696694750487, "grad_norm": 0.05889362469315529, "learning_rate": 0.00012513614474098745, "loss": 0.2807, "step": 21572 }, { "epoch": 1.747650680492547, "grad_norm": 0.05274774879217148, "learning_rate": 0.00012513164408839281, "loss": 0.2664, "step": 21573 }, { "epoch": 1.7477316915100454, "grad_norm": 0.059176184237003326, "learning_rate": 0.0001251271434357982, "loss": 0.2772, "step": 21574 }, { "epoch": 1.7478127025275438, "grad_norm": 0.05871306732296944, "learning_rate": 0.0001251226427832036, "loss": 0.2698, "step": 21575 }, { "epoch": 1.747893713545042, "grad_norm": 0.052813295274972916, "learning_rate": 0.00012511814213060895, "loss": 0.2481, "step": 21576 }, { "epoch": 1.7479747245625405, "grad_norm": 0.06352829933166504, "learning_rate": 0.0001251136414780143, "loss": 0.3312, "step": 21577 }, { "epoch": 1.748055735580039, "grad_norm": 0.046175677329301834, "learning_rate": 0.0001251091408254197, "loss": 0.2593, "step": 21578 }, { "epoch": 1.7481367465975373, "grad_norm": 0.06400038301944733, "learning_rate": 0.00012510464017282506, "loss": 0.3031, "step": 21579 }, { "epoch": 1.7482177576150355, "grad_norm": 0.06217432767152786, "learning_rate": 0.00012510013952023044, "loss": 0.2741, "step": 21580 }, { "epoch": 1.748298768632534, "grad_norm": 0.06696798652410507, "learning_rate": 0.00012509563886763583, "loss": 0.3025, "step": 21581 }, { "epoch": 1.7483797796500324, "grad_norm": 0.0568382665514946, "learning_rate": 0.0001250911382150412, "loss": 0.3002, "step": 21582 }, { "epoch": 1.7484607906675307, "grad_norm": 0.06164439395070076, "learning_rate": 0.00012508663756244655, "loss": 0.2556, "step": 21583 }, { "epoch": 1.7485418016850292, "grad_norm": 0.05397419631481171, "learning_rate": 0.00012508213690985194, "loss": 0.2729, "step": 21584 }, { "epoch": 1.7486228127025276, "grad_norm": 0.05543617904186249, "learning_rate": 0.0001250776362572573, "loss": 0.305, "step": 21585 }, { "epoch": 1.7487038237200259, "grad_norm": 0.05711469426751137, "learning_rate": 0.00012507313560466268, "loss": 0.2885, "step": 21586 }, { "epoch": 1.7487848347375243, "grad_norm": 0.061703696846961975, "learning_rate": 0.00012506863495206807, "loss": 0.2795, "step": 21587 }, { "epoch": 1.7488658457550228, "grad_norm": 0.053626950830221176, "learning_rate": 0.00012506413429947343, "loss": 0.2935, "step": 21588 }, { "epoch": 1.748946856772521, "grad_norm": 0.0490972176194191, "learning_rate": 0.0001250596336468788, "loss": 0.2555, "step": 21589 }, { "epoch": 1.7490278677900193, "grad_norm": 0.05172901600599289, "learning_rate": 0.00012505513299428418, "loss": 0.295, "step": 21590 }, { "epoch": 1.749108878807518, "grad_norm": 0.05565916746854782, "learning_rate": 0.00012505063234168954, "loss": 0.268, "step": 21591 }, { "epoch": 1.7491898898250162, "grad_norm": 0.06226447597146034, "learning_rate": 0.00012504613168909493, "loss": 0.3178, "step": 21592 }, { "epoch": 1.7492709008425145, "grad_norm": 0.051055144518613815, "learning_rate": 0.00012504163103650031, "loss": 0.2619, "step": 21593 }, { "epoch": 1.749351911860013, "grad_norm": 0.05917363241314888, "learning_rate": 0.00012503713038390567, "loss": 0.2706, "step": 21594 }, { "epoch": 1.7494329228775114, "grad_norm": 0.05820334702730179, "learning_rate": 0.00012503262973131103, "loss": 0.2764, "step": 21595 }, { "epoch": 1.7495139338950096, "grad_norm": 0.06279566884040833, "learning_rate": 0.00012502812907871642, "loss": 0.2629, "step": 21596 }, { "epoch": 1.749594944912508, "grad_norm": 0.05095963552594185, "learning_rate": 0.00012502362842612178, "loss": 0.2827, "step": 21597 }, { "epoch": 1.7496759559300066, "grad_norm": 0.05769157037138939, "learning_rate": 0.00012501912777352717, "loss": 0.2404, "step": 21598 }, { "epoch": 1.7497569669475048, "grad_norm": 0.05776160582900047, "learning_rate": 0.00012501462712093256, "loss": 0.3016, "step": 21599 }, { "epoch": 1.7498379779650033, "grad_norm": 0.059620942920446396, "learning_rate": 0.00012501012646833792, "loss": 0.2767, "step": 21600 }, { "epoch": 1.7499189889825018, "grad_norm": 0.051635947078466415, "learning_rate": 0.00012500562581574328, "loss": 0.2555, "step": 21601 }, { "epoch": 1.75, "grad_norm": 0.061463210731744766, "learning_rate": 0.00012500112516314866, "loss": 0.2926, "step": 21602 }, { "epoch": 1.7500810110174982, "grad_norm": 0.05254808068275452, "learning_rate": 0.00012499662451055402, "loss": 0.2554, "step": 21603 }, { "epoch": 1.7501620220349967, "grad_norm": 0.05625847354531288, "learning_rate": 0.0001249921238579594, "loss": 0.2881, "step": 21604 }, { "epoch": 1.7502430330524952, "grad_norm": 0.05712534114718437, "learning_rate": 0.0001249876232053648, "loss": 0.2898, "step": 21605 }, { "epoch": 1.7503240440699934, "grad_norm": 0.05910136178135872, "learning_rate": 0.00012498312255277016, "loss": 0.2484, "step": 21606 }, { "epoch": 1.750405055087492, "grad_norm": 0.05727291479706764, "learning_rate": 0.00012497862190017552, "loss": 0.2755, "step": 21607 }, { "epoch": 1.7504860661049904, "grad_norm": 0.05712650343775749, "learning_rate": 0.0001249741212475809, "loss": 0.2926, "step": 21608 }, { "epoch": 1.7505670771224886, "grad_norm": 0.04607681557536125, "learning_rate": 0.00012496962059498626, "loss": 0.2639, "step": 21609 }, { "epoch": 1.750648088139987, "grad_norm": 0.06823698431253433, "learning_rate": 0.00012496511994239165, "loss": 0.2575, "step": 21610 }, { "epoch": 1.7507290991574855, "grad_norm": 0.04931235313415527, "learning_rate": 0.00012496061928979704, "loss": 0.2968, "step": 21611 }, { "epoch": 1.7508101101749838, "grad_norm": 0.05911986157298088, "learning_rate": 0.0001249561186372024, "loss": 0.3131, "step": 21612 }, { "epoch": 1.750891121192482, "grad_norm": 0.054763313382864, "learning_rate": 0.00012495161798460776, "loss": 0.2549, "step": 21613 }, { "epoch": 1.7509721322099807, "grad_norm": 0.048321280628442764, "learning_rate": 0.00012494711733201315, "loss": 0.2996, "step": 21614 }, { "epoch": 1.751053143227479, "grad_norm": 0.05358283594250679, "learning_rate": 0.0001249426166794185, "loss": 0.2784, "step": 21615 }, { "epoch": 1.7511341542449772, "grad_norm": 0.052051570266485214, "learning_rate": 0.0001249381160268239, "loss": 0.2844, "step": 21616 }, { "epoch": 1.7512151652624757, "grad_norm": 0.04813947528600693, "learning_rate": 0.00012493361537422928, "loss": 0.2421, "step": 21617 }, { "epoch": 1.7512961762799741, "grad_norm": 0.06113769859075546, "learning_rate": 0.00012492911472163464, "loss": 0.2889, "step": 21618 }, { "epoch": 1.7513771872974724, "grad_norm": 0.06924999505281448, "learning_rate": 0.00012492461406904, "loss": 0.284, "step": 21619 }, { "epoch": 1.7514581983149708, "grad_norm": 0.05498860776424408, "learning_rate": 0.0001249201134164454, "loss": 0.2622, "step": 21620 }, { "epoch": 1.7515392093324693, "grad_norm": 0.044673047959804535, "learning_rate": 0.00012491561276385075, "loss": 0.2655, "step": 21621 }, { "epoch": 1.7516202203499676, "grad_norm": 0.05713099613785744, "learning_rate": 0.00012491111211125613, "loss": 0.2567, "step": 21622 }, { "epoch": 1.751701231367466, "grad_norm": 0.04379938170313835, "learning_rate": 0.00012490661145866152, "loss": 0.2534, "step": 21623 }, { "epoch": 1.7517822423849645, "grad_norm": 0.055278170853853226, "learning_rate": 0.00012490211080606688, "loss": 0.2903, "step": 21624 }, { "epoch": 1.7518632534024627, "grad_norm": 0.05559024214744568, "learning_rate": 0.00012489761015347224, "loss": 0.2833, "step": 21625 }, { "epoch": 1.751944264419961, "grad_norm": 0.04830685257911682, "learning_rate": 0.00012489310950087763, "loss": 0.2773, "step": 21626 }, { "epoch": 1.7520252754374595, "grad_norm": 0.04247341677546501, "learning_rate": 0.00012488860884828302, "loss": 0.3078, "step": 21627 }, { "epoch": 1.752106286454958, "grad_norm": 0.05888223275542259, "learning_rate": 0.00012488410819568838, "loss": 0.2679, "step": 21628 }, { "epoch": 1.7521872974724562, "grad_norm": 0.045269403606653214, "learning_rate": 0.00012487960754309376, "loss": 0.2646, "step": 21629 }, { "epoch": 1.7522683084899546, "grad_norm": 0.05917354300618172, "learning_rate": 0.00012487510689049912, "loss": 0.277, "step": 21630 }, { "epoch": 1.752349319507453, "grad_norm": 0.06229737028479576, "learning_rate": 0.0001248706062379045, "loss": 0.2812, "step": 21631 }, { "epoch": 1.7524303305249513, "grad_norm": 0.05246162414550781, "learning_rate": 0.00012486610558530987, "loss": 0.2297, "step": 21632 }, { "epoch": 1.7525113415424498, "grad_norm": 0.060903310775756836, "learning_rate": 0.00012486160493271526, "loss": 0.2643, "step": 21633 }, { "epoch": 1.7525923525599483, "grad_norm": 0.05659567192196846, "learning_rate": 0.00012485710428012062, "loss": 0.2648, "step": 21634 }, { "epoch": 1.7526733635774465, "grad_norm": 0.058088213205337524, "learning_rate": 0.000124852603627526, "loss": 0.2457, "step": 21635 }, { "epoch": 1.7527543745949448, "grad_norm": 0.06439466029405594, "learning_rate": 0.00012484810297493137, "loss": 0.2749, "step": 21636 }, { "epoch": 1.7528353856124435, "grad_norm": 0.056994613260030746, "learning_rate": 0.00012484360232233675, "loss": 0.275, "step": 21637 }, { "epoch": 1.7529163966299417, "grad_norm": 0.05250370502471924, "learning_rate": 0.0001248391016697421, "loss": 0.28, "step": 21638 }, { "epoch": 1.75299740764744, "grad_norm": 0.0519607812166214, "learning_rate": 0.0001248346010171475, "loss": 0.2656, "step": 21639 }, { "epoch": 1.7530784186649384, "grad_norm": 0.0754295364022255, "learning_rate": 0.00012483010036455286, "loss": 0.3041, "step": 21640 }, { "epoch": 1.7531594296824369, "grad_norm": 0.07012224197387695, "learning_rate": 0.00012482559971195825, "loss": 0.3193, "step": 21641 }, { "epoch": 1.7532404406999351, "grad_norm": 0.050598498433828354, "learning_rate": 0.0001248210990593636, "loss": 0.3008, "step": 21642 }, { "epoch": 1.7533214517174336, "grad_norm": 0.05320628732442856, "learning_rate": 0.000124816598406769, "loss": 0.2498, "step": 21643 }, { "epoch": 1.753402462734932, "grad_norm": 0.05083597078919411, "learning_rate": 0.00012481209775417435, "loss": 0.2969, "step": 21644 }, { "epoch": 1.7534834737524303, "grad_norm": 0.06301596015691757, "learning_rate": 0.00012480759710157974, "loss": 0.3163, "step": 21645 }, { "epoch": 1.7535644847699285, "grad_norm": 0.05585845932364464, "learning_rate": 0.0001248030964489851, "loss": 0.2774, "step": 21646 }, { "epoch": 1.7536454957874272, "grad_norm": 0.0647146999835968, "learning_rate": 0.0001247985957963905, "loss": 0.3017, "step": 21647 }, { "epoch": 1.7537265068049255, "grad_norm": 0.08223205804824829, "learning_rate": 0.00012479409514379585, "loss": 0.2949, "step": 21648 }, { "epoch": 1.7538075178224237, "grad_norm": 0.05526803061366081, "learning_rate": 0.00012478959449120124, "loss": 0.3022, "step": 21649 }, { "epoch": 1.7538885288399222, "grad_norm": 0.04275866970419884, "learning_rate": 0.00012478509383860662, "loss": 0.2533, "step": 21650 }, { "epoch": 1.7539695398574207, "grad_norm": 0.05600523203611374, "learning_rate": 0.00012478059318601198, "loss": 0.2719, "step": 21651 }, { "epoch": 1.754050550874919, "grad_norm": 0.053683191537857056, "learning_rate": 0.00012477609253341734, "loss": 0.2669, "step": 21652 }, { "epoch": 1.7541315618924174, "grad_norm": 0.05389159545302391, "learning_rate": 0.00012477159188082273, "loss": 0.2572, "step": 21653 }, { "epoch": 1.7542125729099158, "grad_norm": 0.057773686945438385, "learning_rate": 0.0001247670912282281, "loss": 0.2924, "step": 21654 }, { "epoch": 1.754293583927414, "grad_norm": 0.05425887927412987, "learning_rate": 0.00012476259057563348, "loss": 0.2633, "step": 21655 }, { "epoch": 1.7543745949449125, "grad_norm": 0.05559500679373741, "learning_rate": 0.00012475808992303886, "loss": 0.2678, "step": 21656 }, { "epoch": 1.754455605962411, "grad_norm": 0.05029148608446121, "learning_rate": 0.00012475358927044422, "loss": 0.2664, "step": 21657 }, { "epoch": 1.7545366169799093, "grad_norm": 0.053293295204639435, "learning_rate": 0.00012474908861784958, "loss": 0.2915, "step": 21658 }, { "epoch": 1.7546176279974075, "grad_norm": 0.04977700486779213, "learning_rate": 0.00012474458796525497, "loss": 0.2921, "step": 21659 }, { "epoch": 1.7546986390149062, "grad_norm": 0.05130726844072342, "learning_rate": 0.00012474008731266033, "loss": 0.2822, "step": 21660 }, { "epoch": 1.7547796500324044, "grad_norm": 0.04962471127510071, "learning_rate": 0.00012473558666006572, "loss": 0.2613, "step": 21661 }, { "epoch": 1.7548606610499027, "grad_norm": 0.05113421380519867, "learning_rate": 0.0001247310860074711, "loss": 0.2958, "step": 21662 }, { "epoch": 1.7549416720674011, "grad_norm": 0.05285904183983803, "learning_rate": 0.00012472658535487647, "loss": 0.2974, "step": 21663 }, { "epoch": 1.7550226830848996, "grad_norm": 0.05132170394062996, "learning_rate": 0.00012472208470228183, "loss": 0.2654, "step": 21664 }, { "epoch": 1.7551036941023979, "grad_norm": 0.054761797189712524, "learning_rate": 0.0001247175840496872, "loss": 0.3014, "step": 21665 }, { "epoch": 1.7551847051198963, "grad_norm": 0.060858312994241714, "learning_rate": 0.00012471308339709257, "loss": 0.2973, "step": 21666 }, { "epoch": 1.7552657161373948, "grad_norm": 0.0532330721616745, "learning_rate": 0.00012470858274449796, "loss": 0.2534, "step": 21667 }, { "epoch": 1.755346727154893, "grad_norm": 0.05100312829017639, "learning_rate": 0.00012470408209190335, "loss": 0.2647, "step": 21668 }, { "epoch": 1.7554277381723913, "grad_norm": 0.06273045390844345, "learning_rate": 0.0001246995814393087, "loss": 0.2898, "step": 21669 }, { "epoch": 1.75550874918989, "grad_norm": 0.05596870183944702, "learning_rate": 0.00012469508078671407, "loss": 0.3194, "step": 21670 }, { "epoch": 1.7555897602073882, "grad_norm": 0.057777270674705505, "learning_rate": 0.00012469058013411945, "loss": 0.26, "step": 21671 }, { "epoch": 1.7556707712248865, "grad_norm": 0.07411547005176544, "learning_rate": 0.00012468607948152481, "loss": 0.3289, "step": 21672 }, { "epoch": 1.755751782242385, "grad_norm": 0.051789868623018265, "learning_rate": 0.0001246815788289302, "loss": 0.2488, "step": 21673 }, { "epoch": 1.7558327932598834, "grad_norm": 0.049402810633182526, "learning_rate": 0.0001246770781763356, "loss": 0.2336, "step": 21674 }, { "epoch": 1.7559138042773816, "grad_norm": 0.05984452739357948, "learning_rate": 0.00012467257752374095, "loss": 0.2871, "step": 21675 }, { "epoch": 1.75599481529488, "grad_norm": 0.050105199217796326, "learning_rate": 0.0001246680768711463, "loss": 0.251, "step": 21676 }, { "epoch": 1.7560758263123786, "grad_norm": 0.05107852816581726, "learning_rate": 0.0001246635762185517, "loss": 0.268, "step": 21677 }, { "epoch": 1.7561568373298768, "grad_norm": 0.05321214348077774, "learning_rate": 0.00012465907556595706, "loss": 0.2779, "step": 21678 }, { "epoch": 1.7562378483473753, "grad_norm": 0.048276063054800034, "learning_rate": 0.00012465457491336244, "loss": 0.2588, "step": 21679 }, { "epoch": 1.7563188593648738, "grad_norm": 0.0636172741651535, "learning_rate": 0.00012465007426076783, "loss": 0.2582, "step": 21680 }, { "epoch": 1.756399870382372, "grad_norm": 0.05773542821407318, "learning_rate": 0.0001246455736081732, "loss": 0.2962, "step": 21681 }, { "epoch": 1.7564808813998702, "grad_norm": 0.0537274032831192, "learning_rate": 0.00012464107295557855, "loss": 0.2705, "step": 21682 }, { "epoch": 1.7565618924173687, "grad_norm": 0.06137649714946747, "learning_rate": 0.00012463657230298394, "loss": 0.2856, "step": 21683 }, { "epoch": 1.7566429034348672, "grad_norm": 0.05260062217712402, "learning_rate": 0.0001246320716503893, "loss": 0.2577, "step": 21684 }, { "epoch": 1.7567239144523654, "grad_norm": 0.0494031198322773, "learning_rate": 0.00012462757099779469, "loss": 0.2966, "step": 21685 }, { "epoch": 1.7568049254698639, "grad_norm": 0.0548611544072628, "learning_rate": 0.00012462307034520007, "loss": 0.2493, "step": 21686 }, { "epoch": 1.7568859364873624, "grad_norm": 0.0449042022228241, "learning_rate": 0.00012461856969260543, "loss": 0.2628, "step": 21687 }, { "epoch": 1.7569669475048606, "grad_norm": 0.063282310962677, "learning_rate": 0.0001246140690400108, "loss": 0.266, "step": 21688 }, { "epoch": 1.757047958522359, "grad_norm": 0.06522276997566223, "learning_rate": 0.00012460956838741618, "loss": 0.3117, "step": 21689 }, { "epoch": 1.7571289695398575, "grad_norm": 0.05102567747235298, "learning_rate": 0.00012460506773482154, "loss": 0.2384, "step": 21690 }, { "epoch": 1.7572099805573558, "grad_norm": 0.05092918500304222, "learning_rate": 0.00012460056708222693, "loss": 0.3042, "step": 21691 }, { "epoch": 1.757290991574854, "grad_norm": 0.048716794699430466, "learning_rate": 0.00012459606642963231, "loss": 0.2535, "step": 21692 }, { "epoch": 1.7573720025923527, "grad_norm": 0.05294806882739067, "learning_rate": 0.00012459156577703767, "loss": 0.2843, "step": 21693 }, { "epoch": 1.757453013609851, "grad_norm": 0.04733522981405258, "learning_rate": 0.00012458706512444303, "loss": 0.2403, "step": 21694 }, { "epoch": 1.7575340246273492, "grad_norm": 0.04993963986635208, "learning_rate": 0.00012458256447184842, "loss": 0.2286, "step": 21695 }, { "epoch": 1.7576150356448477, "grad_norm": 0.059795740991830826, "learning_rate": 0.00012457806381925378, "loss": 0.3168, "step": 21696 }, { "epoch": 1.7576960466623461, "grad_norm": 0.054991334676742554, "learning_rate": 0.00012457356316665917, "loss": 0.2736, "step": 21697 }, { "epoch": 1.7577770576798444, "grad_norm": 0.06179995834827423, "learning_rate": 0.00012456906251406456, "loss": 0.2566, "step": 21698 }, { "epoch": 1.7578580686973428, "grad_norm": 0.04543781280517578, "learning_rate": 0.00012456456186146992, "loss": 0.256, "step": 21699 }, { "epoch": 1.7579390797148413, "grad_norm": 0.06514349579811096, "learning_rate": 0.0001245600612088753, "loss": 0.2745, "step": 21700 }, { "epoch": 1.7580200907323396, "grad_norm": 0.062302350997924805, "learning_rate": 0.00012455556055628066, "loss": 0.2294, "step": 21701 }, { "epoch": 1.758101101749838, "grad_norm": 0.04693538323044777, "learning_rate": 0.00012455105990368605, "loss": 0.2805, "step": 21702 }, { "epoch": 1.7581821127673365, "grad_norm": 0.0487443283200264, "learning_rate": 0.0001245465592510914, "loss": 0.2562, "step": 21703 }, { "epoch": 1.7582631237848347, "grad_norm": 0.05158422887325287, "learning_rate": 0.0001245420585984968, "loss": 0.2566, "step": 21704 }, { "epoch": 1.758344134802333, "grad_norm": 0.056412965059280396, "learning_rate": 0.00012453755794590216, "loss": 0.2555, "step": 21705 }, { "epoch": 1.7584251458198314, "grad_norm": 0.05551760271191597, "learning_rate": 0.00012453305729330754, "loss": 0.2889, "step": 21706 }, { "epoch": 1.75850615683733, "grad_norm": 0.07467301934957504, "learning_rate": 0.0001245285566407129, "loss": 0.2919, "step": 21707 }, { "epoch": 1.7585871678548282, "grad_norm": 0.05988988280296326, "learning_rate": 0.0001245240559881183, "loss": 0.2587, "step": 21708 }, { "epoch": 1.7586681788723266, "grad_norm": 0.047254081815481186, "learning_rate": 0.00012451955533552365, "loss": 0.2535, "step": 21709 }, { "epoch": 1.758749189889825, "grad_norm": 0.060444850474596024, "learning_rate": 0.00012451505468292904, "loss": 0.271, "step": 21710 }, { "epoch": 1.7588302009073233, "grad_norm": 0.05495789274573326, "learning_rate": 0.0001245105540303344, "loss": 0.277, "step": 21711 }, { "epoch": 1.7589112119248218, "grad_norm": 0.05612171068787575, "learning_rate": 0.00012450605337773979, "loss": 0.2637, "step": 21712 }, { "epoch": 1.7589922229423203, "grad_norm": 0.05255566164851189, "learning_rate": 0.00012450155272514515, "loss": 0.2884, "step": 21713 }, { "epoch": 1.7590732339598185, "grad_norm": 0.053682245314121246, "learning_rate": 0.00012449705207255053, "loss": 0.2974, "step": 21714 }, { "epoch": 1.7591542449773168, "grad_norm": 0.05426940321922302, "learning_rate": 0.0001244925514199559, "loss": 0.2631, "step": 21715 }, { "epoch": 1.7592352559948155, "grad_norm": 0.07131505757570267, "learning_rate": 0.00012448805076736128, "loss": 0.2999, "step": 21716 }, { "epoch": 1.7593162670123137, "grad_norm": 0.06050477921962738, "learning_rate": 0.00012448355011476664, "loss": 0.2971, "step": 21717 }, { "epoch": 1.759397278029812, "grad_norm": 0.049917127937078476, "learning_rate": 0.00012447904946217203, "loss": 0.2515, "step": 21718 }, { "epoch": 1.7594782890473104, "grad_norm": 0.055279117077589035, "learning_rate": 0.0001244745488095774, "loss": 0.2781, "step": 21719 }, { "epoch": 1.7595593000648089, "grad_norm": 0.051920387893915176, "learning_rate": 0.00012447004815698277, "loss": 0.2621, "step": 21720 }, { "epoch": 1.7596403110823071, "grad_norm": 0.06155714765191078, "learning_rate": 0.00012446554750438813, "loss": 0.2914, "step": 21721 }, { "epoch": 1.7597213220998056, "grad_norm": 0.0600089430809021, "learning_rate": 0.00012446104685179352, "loss": 0.275, "step": 21722 }, { "epoch": 1.759802333117304, "grad_norm": 0.05426739528775215, "learning_rate": 0.00012445654619919888, "loss": 0.2991, "step": 21723 }, { "epoch": 1.7598833441348023, "grad_norm": 0.04382641613483429, "learning_rate": 0.00012445204554660427, "loss": 0.2369, "step": 21724 }, { "epoch": 1.7599643551523008, "grad_norm": 0.05670471489429474, "learning_rate": 0.00012444754489400963, "loss": 0.2556, "step": 21725 }, { "epoch": 1.7600453661697992, "grad_norm": 0.05996417999267578, "learning_rate": 0.00012444304424141502, "loss": 0.2899, "step": 21726 }, { "epoch": 1.7601263771872975, "grad_norm": 0.05591500923037529, "learning_rate": 0.00012443854358882038, "loss": 0.2728, "step": 21727 }, { "epoch": 1.7602073882047957, "grad_norm": 0.050028007477521896, "learning_rate": 0.00012443404293622576, "loss": 0.2995, "step": 21728 }, { "epoch": 1.7602883992222942, "grad_norm": 0.05522231385111809, "learning_rate": 0.00012442954228363112, "loss": 0.2866, "step": 21729 }, { "epoch": 1.7603694102397927, "grad_norm": 0.055305320769548416, "learning_rate": 0.0001244250416310365, "loss": 0.2995, "step": 21730 }, { "epoch": 1.760450421257291, "grad_norm": 0.05761830508708954, "learning_rate": 0.0001244205409784419, "loss": 0.3019, "step": 21731 }, { "epoch": 1.7605314322747894, "grad_norm": 0.04823017120361328, "learning_rate": 0.00012441604032584726, "loss": 0.2601, "step": 21732 }, { "epoch": 1.7606124432922878, "grad_norm": 0.05907674878835678, "learning_rate": 0.00012441153967325262, "loss": 0.2999, "step": 21733 }, { "epoch": 1.760693454309786, "grad_norm": 0.050825346261262894, "learning_rate": 0.000124407039020658, "loss": 0.2835, "step": 21734 }, { "epoch": 1.7607744653272845, "grad_norm": 0.06334000825881958, "learning_rate": 0.00012440253836806337, "loss": 0.2775, "step": 21735 }, { "epoch": 1.760855476344783, "grad_norm": 0.05819164589047432, "learning_rate": 0.00012439803771546875, "loss": 0.2616, "step": 21736 }, { "epoch": 1.7609364873622813, "grad_norm": 0.04646517336368561, "learning_rate": 0.00012439353706287414, "loss": 0.244, "step": 21737 }, { "epoch": 1.7610174983797795, "grad_norm": 0.0714096948504448, "learning_rate": 0.0001243890364102795, "loss": 0.2959, "step": 21738 }, { "epoch": 1.7610985093972782, "grad_norm": 0.04353515803813934, "learning_rate": 0.00012438453575768486, "loss": 0.264, "step": 21739 }, { "epoch": 1.7611795204147764, "grad_norm": 0.05200977995991707, "learning_rate": 0.00012438003510509025, "loss": 0.2807, "step": 21740 }, { "epoch": 1.7612605314322747, "grad_norm": 0.06344801187515259, "learning_rate": 0.0001243755344524956, "loss": 0.286, "step": 21741 }, { "epoch": 1.7613415424497731, "grad_norm": 0.05688883364200592, "learning_rate": 0.000124371033799901, "loss": 0.2627, "step": 21742 }, { "epoch": 1.7614225534672716, "grad_norm": 0.06464666873216629, "learning_rate": 0.00012436653314730638, "loss": 0.3134, "step": 21743 }, { "epoch": 1.7615035644847699, "grad_norm": 0.053593527525663376, "learning_rate": 0.00012436203249471174, "loss": 0.3007, "step": 21744 }, { "epoch": 1.7615845755022683, "grad_norm": 0.038453370332717896, "learning_rate": 0.0001243575318421171, "loss": 0.2245, "step": 21745 }, { "epoch": 1.7616655865197668, "grad_norm": 0.05070507526397705, "learning_rate": 0.0001243530311895225, "loss": 0.2719, "step": 21746 }, { "epoch": 1.761746597537265, "grad_norm": 0.04365245997905731, "learning_rate": 0.00012434853053692785, "loss": 0.2587, "step": 21747 }, { "epoch": 1.7618276085547635, "grad_norm": 0.053488414734601974, "learning_rate": 0.00012434402988433324, "loss": 0.2996, "step": 21748 }, { "epoch": 1.761908619572262, "grad_norm": 0.053450509905815125, "learning_rate": 0.00012433952923173862, "loss": 0.2702, "step": 21749 }, { "epoch": 1.7619896305897602, "grad_norm": 0.046528153121471405, "learning_rate": 0.00012433502857914398, "loss": 0.2456, "step": 21750 }, { "epoch": 1.7620706416072585, "grad_norm": 0.0616072341799736, "learning_rate": 0.00012433052792654934, "loss": 0.2709, "step": 21751 }, { "epoch": 1.762151652624757, "grad_norm": 0.050160426646471024, "learning_rate": 0.00012432602727395473, "loss": 0.2721, "step": 21752 }, { "epoch": 1.7622326636422554, "grad_norm": 0.05370565503835678, "learning_rate": 0.0001243215266213601, "loss": 0.2803, "step": 21753 }, { "epoch": 1.7623136746597536, "grad_norm": 0.05295388028025627, "learning_rate": 0.00012431702596876548, "loss": 0.286, "step": 21754 }, { "epoch": 1.762394685677252, "grad_norm": 0.06044628098607063, "learning_rate": 0.00012431252531617086, "loss": 0.2924, "step": 21755 }, { "epoch": 1.7624756966947506, "grad_norm": 0.05766693502664566, "learning_rate": 0.00012430802466357622, "loss": 0.321, "step": 21756 }, { "epoch": 1.7625567077122488, "grad_norm": 0.04584493860602379, "learning_rate": 0.00012430352401098158, "loss": 0.2753, "step": 21757 }, { "epoch": 1.7626377187297473, "grad_norm": 0.05395643785595894, "learning_rate": 0.00012429902335838697, "loss": 0.2543, "step": 21758 }, { "epoch": 1.7627187297472457, "grad_norm": 0.0653577521443367, "learning_rate": 0.00012429452270579233, "loss": 0.3025, "step": 21759 }, { "epoch": 1.762799740764744, "grad_norm": 0.054335497319698334, "learning_rate": 0.00012429002205319772, "loss": 0.2753, "step": 21760 }, { "epoch": 1.7628807517822422, "grad_norm": 0.05730433762073517, "learning_rate": 0.0001242855214006031, "loss": 0.3175, "step": 21761 }, { "epoch": 1.762961762799741, "grad_norm": 0.04931226372718811, "learning_rate": 0.00012428102074800847, "loss": 0.2591, "step": 21762 }, { "epoch": 1.7630427738172392, "grad_norm": 0.056040529161691666, "learning_rate": 0.00012427652009541383, "loss": 0.2592, "step": 21763 }, { "epoch": 1.7631237848347374, "grad_norm": 0.05200287699699402, "learning_rate": 0.0001242720194428192, "loss": 0.2913, "step": 21764 }, { "epoch": 1.7632047958522359, "grad_norm": 0.06672247499227524, "learning_rate": 0.00012426751879022457, "loss": 0.2925, "step": 21765 }, { "epoch": 1.7632858068697344, "grad_norm": 0.05634213238954544, "learning_rate": 0.00012426301813762996, "loss": 0.2776, "step": 21766 }, { "epoch": 1.7633668178872326, "grad_norm": 0.05127370357513428, "learning_rate": 0.00012425851748503535, "loss": 0.2567, "step": 21767 }, { "epoch": 1.763447828904731, "grad_norm": 0.05843063443899155, "learning_rate": 0.0001242540168324407, "loss": 0.2992, "step": 21768 }, { "epoch": 1.7635288399222295, "grad_norm": 0.07764243334531784, "learning_rate": 0.0001242495161798461, "loss": 0.3162, "step": 21769 }, { "epoch": 1.7636098509397278, "grad_norm": 0.05555042251944542, "learning_rate": 0.00012424501552725146, "loss": 0.283, "step": 21770 }, { "epoch": 1.763690861957226, "grad_norm": 0.05444716289639473, "learning_rate": 0.00012424051487465682, "loss": 0.3248, "step": 21771 }, { "epoch": 1.7637718729747247, "grad_norm": 0.0517263300716877, "learning_rate": 0.0001242360142220622, "loss": 0.2594, "step": 21772 }, { "epoch": 1.763852883992223, "grad_norm": 0.06352398544549942, "learning_rate": 0.0001242315135694676, "loss": 0.2863, "step": 21773 }, { "epoch": 1.7639338950097212, "grad_norm": 0.04792700335383415, "learning_rate": 0.00012422701291687295, "loss": 0.2934, "step": 21774 }, { "epoch": 1.7640149060272197, "grad_norm": 0.05038423463702202, "learning_rate": 0.00012422251226427834, "loss": 0.2686, "step": 21775 }, { "epoch": 1.7640959170447181, "grad_norm": 0.054630909115076065, "learning_rate": 0.0001242180116116837, "loss": 0.2918, "step": 21776 }, { "epoch": 1.7641769280622164, "grad_norm": 0.05571306124329567, "learning_rate": 0.00012421351095908906, "loss": 0.2747, "step": 21777 }, { "epoch": 1.7642579390797148, "grad_norm": 0.04750450700521469, "learning_rate": 0.00012420901030649444, "loss": 0.242, "step": 21778 }, { "epoch": 1.7643389500972133, "grad_norm": 0.060028474777936935, "learning_rate": 0.00012420450965389983, "loss": 0.2681, "step": 21779 }, { "epoch": 1.7644199611147116, "grad_norm": 0.05656592175364494, "learning_rate": 0.0001242000090013052, "loss": 0.2401, "step": 21780 }, { "epoch": 1.76450097213221, "grad_norm": 0.05683549866080284, "learning_rate": 0.00012419550834871058, "loss": 0.2718, "step": 21781 }, { "epoch": 1.7645819831497085, "grad_norm": 0.055099744349718094, "learning_rate": 0.00012419100769611594, "loss": 0.2593, "step": 21782 }, { "epoch": 1.7646629941672067, "grad_norm": 0.0482744500041008, "learning_rate": 0.00012418650704352133, "loss": 0.2658, "step": 21783 }, { "epoch": 1.764744005184705, "grad_norm": 0.05808193236589432, "learning_rate": 0.00012418200639092669, "loss": 0.2852, "step": 21784 }, { "epoch": 1.7648250162022034, "grad_norm": 0.06865742057561874, "learning_rate": 0.00012417750573833207, "loss": 0.3209, "step": 21785 }, { "epoch": 1.764906027219702, "grad_norm": 0.07194961607456207, "learning_rate": 0.00012417300508573743, "loss": 0.3156, "step": 21786 }, { "epoch": 1.7649870382372002, "grad_norm": 0.06195151433348656, "learning_rate": 0.00012416850443314282, "loss": 0.2867, "step": 21787 }, { "epoch": 1.7650680492546986, "grad_norm": 0.051346033811569214, "learning_rate": 0.00012416400378054818, "loss": 0.2669, "step": 21788 }, { "epoch": 1.765149060272197, "grad_norm": 0.0620853491127491, "learning_rate": 0.00012415950312795357, "loss": 0.2745, "step": 21789 }, { "epoch": 1.7652300712896953, "grad_norm": 0.06049535796046257, "learning_rate": 0.00012415500247535893, "loss": 0.2704, "step": 21790 }, { "epoch": 1.7653110823071938, "grad_norm": 0.06817255169153214, "learning_rate": 0.00012415050182276431, "loss": 0.2856, "step": 21791 }, { "epoch": 1.7653920933246923, "grad_norm": 0.05738371983170509, "learning_rate": 0.00012414600117016967, "loss": 0.2306, "step": 21792 }, { "epoch": 1.7654731043421905, "grad_norm": 0.05548636242747307, "learning_rate": 0.00012414150051757506, "loss": 0.2965, "step": 21793 }, { "epoch": 1.7655541153596888, "grad_norm": 0.04842370003461838, "learning_rate": 0.00012413699986498042, "loss": 0.2579, "step": 21794 }, { "epoch": 1.7656351263771874, "grad_norm": 0.05322146788239479, "learning_rate": 0.0001241324992123858, "loss": 0.257, "step": 21795 }, { "epoch": 1.7657161373946857, "grad_norm": 0.055748313665390015, "learning_rate": 0.00012412799855979117, "loss": 0.2782, "step": 21796 }, { "epoch": 1.765797148412184, "grad_norm": 0.05629035830497742, "learning_rate": 0.00012412349790719656, "loss": 0.2514, "step": 21797 }, { "epoch": 1.7658781594296824, "grad_norm": 0.06065845862030983, "learning_rate": 0.00012411899725460192, "loss": 0.2579, "step": 21798 }, { "epoch": 1.7659591704471809, "grad_norm": 0.05282067880034447, "learning_rate": 0.0001241144966020073, "loss": 0.2431, "step": 21799 }, { "epoch": 1.7660401814646791, "grad_norm": 0.06059788167476654, "learning_rate": 0.00012410999594941266, "loss": 0.2792, "step": 21800 }, { "epoch": 1.7661211924821776, "grad_norm": 0.04922725260257721, "learning_rate": 0.00012410549529681805, "loss": 0.3058, "step": 21801 }, { "epoch": 1.766202203499676, "grad_norm": 0.05176448076963425, "learning_rate": 0.0001241009946442234, "loss": 0.2627, "step": 21802 }, { "epoch": 1.7662832145171743, "grad_norm": 0.06321108341217041, "learning_rate": 0.0001240964939916288, "loss": 0.2944, "step": 21803 }, { "epoch": 1.7663642255346728, "grad_norm": 0.04272797331213951, "learning_rate": 0.00012409199333903416, "loss": 0.2438, "step": 21804 }, { "epoch": 1.7664452365521712, "grad_norm": 0.06427010148763657, "learning_rate": 0.00012408749268643954, "loss": 0.2724, "step": 21805 }, { "epoch": 1.7665262475696695, "grad_norm": 0.058782003819942474, "learning_rate": 0.0001240829920338449, "loss": 0.2592, "step": 21806 }, { "epoch": 1.7666072585871677, "grad_norm": 0.046798817813396454, "learning_rate": 0.0001240784913812503, "loss": 0.2545, "step": 21807 }, { "epoch": 1.7666882696046662, "grad_norm": 0.05633607506752014, "learning_rate": 0.00012407399072865565, "loss": 0.2996, "step": 21808 }, { "epoch": 1.7667692806221647, "grad_norm": 0.05841773748397827, "learning_rate": 0.00012406949007606104, "loss": 0.2579, "step": 21809 }, { "epoch": 1.766850291639663, "grad_norm": 0.061462823301553726, "learning_rate": 0.0001240649894234664, "loss": 0.2993, "step": 21810 }, { "epoch": 1.7669313026571614, "grad_norm": 0.06246998906135559, "learning_rate": 0.00012406048877087179, "loss": 0.2886, "step": 21811 }, { "epoch": 1.7670123136746598, "grad_norm": 0.05299236625432968, "learning_rate": 0.00012405598811827717, "loss": 0.2694, "step": 21812 }, { "epoch": 1.767093324692158, "grad_norm": 0.05541946738958359, "learning_rate": 0.00012405148746568253, "loss": 0.2743, "step": 21813 }, { "epoch": 1.7671743357096565, "grad_norm": 0.07307790964841843, "learning_rate": 0.0001240469868130879, "loss": 0.2877, "step": 21814 }, { "epoch": 1.767255346727155, "grad_norm": 0.04940599203109741, "learning_rate": 0.00012404248616049328, "loss": 0.2501, "step": 21815 }, { "epoch": 1.7673363577446533, "grad_norm": 0.0704098492860794, "learning_rate": 0.00012403798550789864, "loss": 0.2915, "step": 21816 }, { "epoch": 1.7674173687621515, "grad_norm": 0.049868229776620865, "learning_rate": 0.00012403348485530403, "loss": 0.2508, "step": 21817 }, { "epoch": 1.7674983797796502, "grad_norm": 0.0582960806787014, "learning_rate": 0.00012402898420270942, "loss": 0.3132, "step": 21818 }, { "epoch": 1.7675793907971484, "grad_norm": 0.05498791113495827, "learning_rate": 0.00012402448355011478, "loss": 0.2758, "step": 21819 }, { "epoch": 1.7676604018146467, "grad_norm": 0.062076665461063385, "learning_rate": 0.00012401998289752014, "loss": 0.3287, "step": 21820 }, { "epoch": 1.7677414128321451, "grad_norm": 0.053407374769449234, "learning_rate": 0.00012401548224492552, "loss": 0.2525, "step": 21821 }, { "epoch": 1.7678224238496436, "grad_norm": 0.05482996627688408, "learning_rate": 0.00012401098159233088, "loss": 0.3037, "step": 21822 }, { "epoch": 1.7679034348671419, "grad_norm": 0.05839741975069046, "learning_rate": 0.00012400648093973627, "loss": 0.3059, "step": 21823 }, { "epoch": 1.7679844458846403, "grad_norm": 0.04174405336380005, "learning_rate": 0.00012400198028714166, "loss": 0.2515, "step": 21824 }, { "epoch": 1.7680654569021388, "grad_norm": 0.05571863055229187, "learning_rate": 0.00012399747963454702, "loss": 0.2738, "step": 21825 }, { "epoch": 1.768146467919637, "grad_norm": 0.062394920736551285, "learning_rate": 0.00012399297898195238, "loss": 0.2931, "step": 21826 }, { "epoch": 1.7682274789371355, "grad_norm": 0.05461779236793518, "learning_rate": 0.00012398847832935776, "loss": 0.2779, "step": 21827 }, { "epoch": 1.768308489954634, "grad_norm": 0.0779416486620903, "learning_rate": 0.00012398397767676312, "loss": 0.2979, "step": 21828 }, { "epoch": 1.7683895009721322, "grad_norm": 0.05171630531549454, "learning_rate": 0.0001239794770241685, "loss": 0.3044, "step": 21829 }, { "epoch": 1.7684705119896305, "grad_norm": 0.06141390651464462, "learning_rate": 0.0001239749763715739, "loss": 0.3078, "step": 21830 }, { "epoch": 1.768551523007129, "grad_norm": 0.05810209736227989, "learning_rate": 0.00012397047571897926, "loss": 0.2944, "step": 21831 }, { "epoch": 1.7686325340246274, "grad_norm": 0.054347582161426544, "learning_rate": 0.00012396597506638462, "loss": 0.2931, "step": 21832 }, { "epoch": 1.7687135450421256, "grad_norm": 0.058234091848134995, "learning_rate": 0.00012396147441379, "loss": 0.2658, "step": 21833 }, { "epoch": 1.768794556059624, "grad_norm": 0.07164759188890457, "learning_rate": 0.00012395697376119537, "loss": 0.2929, "step": 21834 }, { "epoch": 1.7688755670771226, "grad_norm": 0.0488571897149086, "learning_rate": 0.00012395247310860075, "loss": 0.2444, "step": 21835 }, { "epoch": 1.7689565780946208, "grad_norm": 0.05238921195268631, "learning_rate": 0.00012394797245600614, "loss": 0.302, "step": 21836 }, { "epoch": 1.7690375891121193, "grad_norm": 0.057009343057870865, "learning_rate": 0.0001239434718034115, "loss": 0.2741, "step": 21837 }, { "epoch": 1.7691186001296177, "grad_norm": 0.04948584735393524, "learning_rate": 0.0001239389711508169, "loss": 0.3014, "step": 21838 }, { "epoch": 1.769199611147116, "grad_norm": 0.04947437718510628, "learning_rate": 0.00012393447049822225, "loss": 0.293, "step": 21839 }, { "epoch": 1.7692806221646142, "grad_norm": 0.04769972339272499, "learning_rate": 0.0001239299698456276, "loss": 0.2778, "step": 21840 }, { "epoch": 1.769361633182113, "grad_norm": 0.06893935054540634, "learning_rate": 0.000123925469193033, "loss": 0.2989, "step": 21841 }, { "epoch": 1.7694426441996112, "grad_norm": 0.06745298951864243, "learning_rate": 0.00012392096854043838, "loss": 0.2669, "step": 21842 }, { "epoch": 1.7695236552171094, "grad_norm": 0.04827446863055229, "learning_rate": 0.00012391646788784374, "loss": 0.2794, "step": 21843 }, { "epoch": 1.7696046662346079, "grad_norm": 0.05109957233071327, "learning_rate": 0.00012391196723524913, "loss": 0.2207, "step": 21844 }, { "epoch": 1.7696856772521063, "grad_norm": 0.04495451971888542, "learning_rate": 0.0001239074665826545, "loss": 0.2511, "step": 21845 }, { "epoch": 1.7697666882696046, "grad_norm": 0.05750264972448349, "learning_rate": 0.00012390296593005985, "loss": 0.2837, "step": 21846 }, { "epoch": 1.769847699287103, "grad_norm": 0.04733563959598541, "learning_rate": 0.00012389846527746524, "loss": 0.2341, "step": 21847 }, { "epoch": 1.7699287103046015, "grad_norm": 0.054377175867557526, "learning_rate": 0.00012389396462487062, "loss": 0.2818, "step": 21848 }, { "epoch": 1.7700097213220998, "grad_norm": 0.06388578563928604, "learning_rate": 0.00012388946397227598, "loss": 0.2798, "step": 21849 }, { "epoch": 1.7700907323395982, "grad_norm": 0.07040360569953918, "learning_rate": 0.00012388496331968137, "loss": 0.2625, "step": 21850 }, { "epoch": 1.7701717433570967, "grad_norm": 0.07146581262350082, "learning_rate": 0.00012388046266708673, "loss": 0.3742, "step": 21851 }, { "epoch": 1.770252754374595, "grad_norm": 0.05052236095070839, "learning_rate": 0.0001238759620144921, "loss": 0.2262, "step": 21852 }, { "epoch": 1.7703337653920932, "grad_norm": 0.05325867608189583, "learning_rate": 0.00012387146136189748, "loss": 0.26, "step": 21853 }, { "epoch": 1.7704147764095917, "grad_norm": 0.057723164558410645, "learning_rate": 0.00012386696070930286, "loss": 0.2815, "step": 21854 }, { "epoch": 1.7704957874270901, "grad_norm": 0.06256304681301117, "learning_rate": 0.00012386246005670822, "loss": 0.2963, "step": 21855 }, { "epoch": 1.7705767984445884, "grad_norm": 0.0576959066092968, "learning_rate": 0.0001238579594041136, "loss": 0.3087, "step": 21856 }, { "epoch": 1.7706578094620868, "grad_norm": 0.07264872640371323, "learning_rate": 0.00012385345875151897, "loss": 0.3027, "step": 21857 }, { "epoch": 1.7707388204795853, "grad_norm": 0.05662224814295769, "learning_rate": 0.00012384895809892433, "loss": 0.2873, "step": 21858 }, { "epoch": 1.7708198314970836, "grad_norm": 0.050668567419052124, "learning_rate": 0.00012384445744632972, "loss": 0.2247, "step": 21859 }, { "epoch": 1.770900842514582, "grad_norm": 0.04291321709752083, "learning_rate": 0.0001238399567937351, "loss": 0.2235, "step": 21860 }, { "epoch": 1.7709818535320805, "grad_norm": 0.06177298352122307, "learning_rate": 0.00012383545614114047, "loss": 0.2879, "step": 21861 }, { "epoch": 1.7710628645495787, "grad_norm": 0.06481605023145676, "learning_rate": 0.00012383095548854585, "loss": 0.2883, "step": 21862 }, { "epoch": 1.771143875567077, "grad_norm": 0.05115228146314621, "learning_rate": 0.00012382645483595121, "loss": 0.2964, "step": 21863 }, { "epoch": 1.7712248865845757, "grad_norm": 0.05299142003059387, "learning_rate": 0.0001238219541833566, "loss": 0.2637, "step": 21864 }, { "epoch": 1.771305897602074, "grad_norm": 0.05036075785756111, "learning_rate": 0.00012381745353076196, "loss": 0.2516, "step": 21865 }, { "epoch": 1.7713869086195722, "grad_norm": 0.059381112456321716, "learning_rate": 0.00012381295287816735, "loss": 0.278, "step": 21866 }, { "epoch": 1.7714679196370706, "grad_norm": 0.04535800218582153, "learning_rate": 0.0001238084522255727, "loss": 0.2224, "step": 21867 }, { "epoch": 1.771548930654569, "grad_norm": 0.05935453623533249, "learning_rate": 0.0001238039515729781, "loss": 0.3078, "step": 21868 }, { "epoch": 1.7716299416720673, "grad_norm": 0.057842377573251724, "learning_rate": 0.00012379945092038346, "loss": 0.2609, "step": 21869 }, { "epoch": 1.7717109526895658, "grad_norm": 0.05465754121541977, "learning_rate": 0.00012379495026778884, "loss": 0.3241, "step": 21870 }, { "epoch": 1.7717919637070643, "grad_norm": 0.05112515017390251, "learning_rate": 0.0001237904496151942, "loss": 0.2686, "step": 21871 }, { "epoch": 1.7718729747245625, "grad_norm": 0.05687601864337921, "learning_rate": 0.0001237859489625996, "loss": 0.3015, "step": 21872 }, { "epoch": 1.7719539857420608, "grad_norm": 0.053299058228731155, "learning_rate": 0.00012378144831000495, "loss": 0.2401, "step": 21873 }, { "epoch": 1.7720349967595594, "grad_norm": 0.06761613488197327, "learning_rate": 0.00012377694765741034, "loss": 0.2663, "step": 21874 }, { "epoch": 1.7721160077770577, "grad_norm": 0.04997372627258301, "learning_rate": 0.0001237724470048157, "loss": 0.275, "step": 21875 }, { "epoch": 1.772197018794556, "grad_norm": 0.04821192845702171, "learning_rate": 0.00012376794635222108, "loss": 0.2531, "step": 21876 }, { "epoch": 1.7722780298120544, "grad_norm": 0.06274958699941635, "learning_rate": 0.00012376344569962644, "loss": 0.2769, "step": 21877 }, { "epoch": 1.7723590408295529, "grad_norm": 0.04555333033204079, "learning_rate": 0.00012375894504703183, "loss": 0.2701, "step": 21878 }, { "epoch": 1.7724400518470511, "grad_norm": 0.062332700937986374, "learning_rate": 0.0001237544443944372, "loss": 0.2862, "step": 21879 }, { "epoch": 1.7725210628645496, "grad_norm": 0.05699625611305237, "learning_rate": 0.00012374994374184258, "loss": 0.2738, "step": 21880 }, { "epoch": 1.772602073882048, "grad_norm": 0.06004469096660614, "learning_rate": 0.00012374544308924794, "loss": 0.2787, "step": 21881 }, { "epoch": 1.7726830848995463, "grad_norm": 0.058492615818977356, "learning_rate": 0.00012374094243665333, "loss": 0.2883, "step": 21882 }, { "epoch": 1.7727640959170448, "grad_norm": 0.051991090178489685, "learning_rate": 0.00012373644178405869, "loss": 0.3014, "step": 21883 }, { "epoch": 1.7728451069345432, "grad_norm": 0.053816016763448715, "learning_rate": 0.00012373194113146407, "loss": 0.2341, "step": 21884 }, { "epoch": 1.7729261179520415, "grad_norm": 0.0617956668138504, "learning_rate": 0.00012372744047886943, "loss": 0.3296, "step": 21885 }, { "epoch": 1.7730071289695397, "grad_norm": 0.06008381024003029, "learning_rate": 0.00012372293982627482, "loss": 0.2914, "step": 21886 }, { "epoch": 1.7730881399870384, "grad_norm": 0.04700002446770668, "learning_rate": 0.0001237184391736802, "loss": 0.2774, "step": 21887 }, { "epoch": 1.7731691510045366, "grad_norm": 0.06209200248122215, "learning_rate": 0.00012371393852108557, "loss": 0.2875, "step": 21888 }, { "epoch": 1.773250162022035, "grad_norm": 0.05754224210977554, "learning_rate": 0.00012370943786849093, "loss": 0.2831, "step": 21889 }, { "epoch": 1.7733311730395334, "grad_norm": 0.06822988390922546, "learning_rate": 0.00012370493721589631, "loss": 0.2533, "step": 21890 }, { "epoch": 1.7734121840570318, "grad_norm": 0.05238701403141022, "learning_rate": 0.00012370043656330167, "loss": 0.2797, "step": 21891 }, { "epoch": 1.77349319507453, "grad_norm": 0.05083979293704033, "learning_rate": 0.00012369593591070706, "loss": 0.2718, "step": 21892 }, { "epoch": 1.7735742060920285, "grad_norm": 0.047063197940588, "learning_rate": 0.00012369143525811245, "loss": 0.2665, "step": 21893 }, { "epoch": 1.773655217109527, "grad_norm": 0.05058155208826065, "learning_rate": 0.0001236869346055178, "loss": 0.2259, "step": 21894 }, { "epoch": 1.7737362281270252, "grad_norm": 0.054182857275009155, "learning_rate": 0.00012368243395292317, "loss": 0.2337, "step": 21895 }, { "epoch": 1.7738172391445235, "grad_norm": 0.058875586837530136, "learning_rate": 0.00012367793330032856, "loss": 0.3079, "step": 21896 }, { "epoch": 1.7738982501620222, "grad_norm": 0.04809205234050751, "learning_rate": 0.00012367343264773392, "loss": 0.2898, "step": 21897 }, { "epoch": 1.7739792611795204, "grad_norm": 0.04986289516091347, "learning_rate": 0.0001236689319951393, "loss": 0.285, "step": 21898 }, { "epoch": 1.7740602721970187, "grad_norm": 0.057436879724264145, "learning_rate": 0.0001236644313425447, "loss": 0.2737, "step": 21899 }, { "epoch": 1.7741412832145171, "grad_norm": 0.060833774507045746, "learning_rate": 0.00012365993068995005, "loss": 0.2656, "step": 21900 }, { "epoch": 1.7742222942320156, "grad_norm": 0.05589460954070091, "learning_rate": 0.00012365543003735544, "loss": 0.2759, "step": 21901 }, { "epoch": 1.7743033052495139, "grad_norm": 0.045339442789554596, "learning_rate": 0.0001236509293847608, "loss": 0.2471, "step": 21902 }, { "epoch": 1.7743843162670123, "grad_norm": 0.05823316425085068, "learning_rate": 0.00012364642873216616, "loss": 0.2818, "step": 21903 }, { "epoch": 1.7744653272845108, "grad_norm": 0.047641415148973465, "learning_rate": 0.00012364192807957155, "loss": 0.2875, "step": 21904 }, { "epoch": 1.774546338302009, "grad_norm": 0.05201798304915428, "learning_rate": 0.00012363742742697693, "loss": 0.2797, "step": 21905 }, { "epoch": 1.7746273493195075, "grad_norm": 0.05504504218697548, "learning_rate": 0.0001236329267743823, "loss": 0.2578, "step": 21906 }, { "epoch": 1.774708360337006, "grad_norm": 0.0527983233332634, "learning_rate": 0.00012362842612178768, "loss": 0.2803, "step": 21907 }, { "epoch": 1.7747893713545042, "grad_norm": 0.06161266937851906, "learning_rate": 0.00012362392546919304, "loss": 0.26, "step": 21908 }, { "epoch": 1.7748703823720025, "grad_norm": 0.06064946949481964, "learning_rate": 0.0001236194248165984, "loss": 0.2583, "step": 21909 }, { "epoch": 1.774951393389501, "grad_norm": 0.05657583102583885, "learning_rate": 0.0001236149241640038, "loss": 0.262, "step": 21910 }, { "epoch": 1.7750324044069994, "grad_norm": 0.06834515929222107, "learning_rate": 0.00012361042351140917, "loss": 0.2948, "step": 21911 }, { "epoch": 1.7751134154244976, "grad_norm": 0.05012359470129013, "learning_rate": 0.00012360592285881453, "loss": 0.2707, "step": 21912 }, { "epoch": 1.775194426441996, "grad_norm": 0.05883803591132164, "learning_rate": 0.00012360142220621992, "loss": 0.2749, "step": 21913 }, { "epoch": 1.7752754374594946, "grad_norm": 0.06639762222766876, "learning_rate": 0.00012359692155362528, "loss": 0.3117, "step": 21914 }, { "epoch": 1.7753564484769928, "grad_norm": 0.0774698555469513, "learning_rate": 0.00012359242090103064, "loss": 0.2825, "step": 21915 }, { "epoch": 1.7754374594944913, "grad_norm": 0.05529485270380974, "learning_rate": 0.00012358792024843603, "loss": 0.2406, "step": 21916 }, { "epoch": 1.7755184705119897, "grad_norm": 0.06073933467268944, "learning_rate": 0.00012358341959584142, "loss": 0.2711, "step": 21917 }, { "epoch": 1.775599481529488, "grad_norm": 0.06365562230348587, "learning_rate": 0.00012357891894324678, "loss": 0.3226, "step": 21918 }, { "epoch": 1.7756804925469862, "grad_norm": 0.06966453790664673, "learning_rate": 0.00012357441829065216, "loss": 0.3034, "step": 21919 }, { "epoch": 1.775761503564485, "grad_norm": 0.05651353299617767, "learning_rate": 0.00012356991763805752, "loss": 0.2836, "step": 21920 }, { "epoch": 1.7758425145819832, "grad_norm": 0.049086764454841614, "learning_rate": 0.00012356541698546288, "loss": 0.2772, "step": 21921 }, { "epoch": 1.7759235255994814, "grad_norm": 0.05163026601076126, "learning_rate": 0.00012356091633286827, "loss": 0.2773, "step": 21922 }, { "epoch": 1.7760045366169799, "grad_norm": 0.05657094717025757, "learning_rate": 0.00012355641568027366, "loss": 0.2571, "step": 21923 }, { "epoch": 1.7760855476344783, "grad_norm": 0.04952248930931091, "learning_rate": 0.00012355191502767902, "loss": 0.241, "step": 21924 }, { "epoch": 1.7761665586519766, "grad_norm": 0.059065740555524826, "learning_rate": 0.0001235474143750844, "loss": 0.2768, "step": 21925 }, { "epoch": 1.776247569669475, "grad_norm": 0.05678890645503998, "learning_rate": 0.00012354291372248976, "loss": 0.2842, "step": 21926 }, { "epoch": 1.7763285806869735, "grad_norm": 0.050089024007320404, "learning_rate": 0.00012353841306989512, "loss": 0.2733, "step": 21927 }, { "epoch": 1.7764095917044718, "grad_norm": 0.05137062445282936, "learning_rate": 0.0001235339124173005, "loss": 0.2842, "step": 21928 }, { "epoch": 1.7764906027219702, "grad_norm": 0.05186197906732559, "learning_rate": 0.0001235294117647059, "loss": 0.2657, "step": 21929 }, { "epoch": 1.7765716137394687, "grad_norm": 0.060374390333890915, "learning_rate": 0.00012352491111211126, "loss": 0.2829, "step": 21930 }, { "epoch": 1.776652624756967, "grad_norm": 0.047595467418432236, "learning_rate": 0.00012352041045951665, "loss": 0.2654, "step": 21931 }, { "epoch": 1.7767336357744652, "grad_norm": 0.0663878321647644, "learning_rate": 0.000123515909806922, "loss": 0.3388, "step": 21932 }, { "epoch": 1.7768146467919637, "grad_norm": 0.05642055347561836, "learning_rate": 0.00012351140915432737, "loss": 0.3008, "step": 21933 }, { "epoch": 1.7768956578094621, "grad_norm": 0.056756485253572464, "learning_rate": 0.00012350690850173275, "loss": 0.2959, "step": 21934 }, { "epoch": 1.7769766688269604, "grad_norm": 0.06318248808383942, "learning_rate": 0.00012350240784913814, "loss": 0.2685, "step": 21935 }, { "epoch": 1.7770576798444588, "grad_norm": 0.06705424189567566, "learning_rate": 0.0001234979071965435, "loss": 0.2758, "step": 21936 }, { "epoch": 1.7771386908619573, "grad_norm": 0.05629139393568039, "learning_rate": 0.0001234934065439489, "loss": 0.2615, "step": 21937 }, { "epoch": 1.7772197018794555, "grad_norm": 0.054760128259658813, "learning_rate": 0.00012348890589135425, "loss": 0.262, "step": 21938 }, { "epoch": 1.777300712896954, "grad_norm": 0.055271271616220474, "learning_rate": 0.0001234844052387596, "loss": 0.288, "step": 21939 }, { "epoch": 1.7773817239144525, "grad_norm": 0.05202984809875488, "learning_rate": 0.000123479904586165, "loss": 0.2842, "step": 21940 }, { "epoch": 1.7774627349319507, "grad_norm": 0.05875534936785698, "learning_rate": 0.00012347540393357038, "loss": 0.2525, "step": 21941 }, { "epoch": 1.777543745949449, "grad_norm": 0.05911627784371376, "learning_rate": 0.00012347090328097574, "loss": 0.2638, "step": 21942 }, { "epoch": 1.7776247569669477, "grad_norm": 0.07096484303474426, "learning_rate": 0.00012346640262838113, "loss": 0.3125, "step": 21943 }, { "epoch": 1.777705767984446, "grad_norm": 0.060509003698825836, "learning_rate": 0.0001234619019757865, "loss": 0.2735, "step": 21944 }, { "epoch": 1.7777867790019442, "grad_norm": 0.06088875234127045, "learning_rate": 0.00012345740132319188, "loss": 0.3082, "step": 21945 }, { "epoch": 1.7778677900194426, "grad_norm": 0.04923175275325775, "learning_rate": 0.00012345290067059724, "loss": 0.2385, "step": 21946 }, { "epoch": 1.777948801036941, "grad_norm": 0.04954548925161362, "learning_rate": 0.00012344840001800262, "loss": 0.2509, "step": 21947 }, { "epoch": 1.7780298120544393, "grad_norm": 0.05132404714822769, "learning_rate": 0.00012344389936540798, "loss": 0.2914, "step": 21948 }, { "epoch": 1.7781108230719378, "grad_norm": 0.05896401032805443, "learning_rate": 0.00012343939871281337, "loss": 0.3064, "step": 21949 }, { "epoch": 1.7781918340894363, "grad_norm": 0.0579485148191452, "learning_rate": 0.00012343489806021873, "loss": 0.2937, "step": 21950 }, { "epoch": 1.7782728451069345, "grad_norm": 0.052562858909368515, "learning_rate": 0.00012343039740762412, "loss": 0.2935, "step": 21951 }, { "epoch": 1.778353856124433, "grad_norm": 0.05139998719096184, "learning_rate": 0.00012342589675502948, "loss": 0.2585, "step": 21952 }, { "epoch": 1.7784348671419314, "grad_norm": 0.058107078075408936, "learning_rate": 0.00012342139610243487, "loss": 0.2711, "step": 21953 }, { "epoch": 1.7785158781594297, "grad_norm": 0.05365680903196335, "learning_rate": 0.00012341689544984023, "loss": 0.2968, "step": 21954 }, { "epoch": 1.778596889176928, "grad_norm": 0.06430280208587646, "learning_rate": 0.0001234123947972456, "loss": 0.2667, "step": 21955 }, { "epoch": 1.7786779001944264, "grad_norm": 0.06153976917266846, "learning_rate": 0.00012340789414465097, "loss": 0.2984, "step": 21956 }, { "epoch": 1.7787589112119249, "grad_norm": 0.06513801217079163, "learning_rate": 0.00012340339349205636, "loss": 0.2628, "step": 21957 }, { "epoch": 1.778839922229423, "grad_norm": 0.05367812514305115, "learning_rate": 0.00012339889283946172, "loss": 0.2542, "step": 21958 }, { "epoch": 1.7789209332469216, "grad_norm": 0.0550832636654377, "learning_rate": 0.0001233943921868671, "loss": 0.3059, "step": 21959 }, { "epoch": 1.77900194426442, "grad_norm": 0.05532095953822136, "learning_rate": 0.00012338989153427247, "loss": 0.2297, "step": 21960 }, { "epoch": 1.7790829552819183, "grad_norm": 0.061939723789691925, "learning_rate": 0.00012338539088167785, "loss": 0.3022, "step": 21961 }, { "epoch": 1.7791639662994168, "grad_norm": 0.06562153249979019, "learning_rate": 0.00012338089022908321, "loss": 0.268, "step": 21962 }, { "epoch": 1.7792449773169152, "grad_norm": 0.05717784911394119, "learning_rate": 0.0001233763895764886, "loss": 0.2602, "step": 21963 }, { "epoch": 1.7793259883344135, "grad_norm": 0.06313645839691162, "learning_rate": 0.00012337188892389396, "loss": 0.2716, "step": 21964 }, { "epoch": 1.7794069993519117, "grad_norm": 0.05977176874876022, "learning_rate": 0.00012336738827129935, "loss": 0.2899, "step": 21965 }, { "epoch": 1.7794880103694104, "grad_norm": 0.05052109807729721, "learning_rate": 0.0001233628876187047, "loss": 0.2595, "step": 21966 }, { "epoch": 1.7795690213869086, "grad_norm": 0.05418974906206131, "learning_rate": 0.0001233583869661101, "loss": 0.2872, "step": 21967 }, { "epoch": 1.779650032404407, "grad_norm": 0.04954787716269493, "learning_rate": 0.00012335388631351548, "loss": 0.2637, "step": 21968 }, { "epoch": 1.7797310434219054, "grad_norm": 0.05969540402293205, "learning_rate": 0.00012334938566092084, "loss": 0.302, "step": 21969 }, { "epoch": 1.7798120544394038, "grad_norm": 0.05559484288096428, "learning_rate": 0.00012334488500832623, "loss": 0.2331, "step": 21970 }, { "epoch": 1.779893065456902, "grad_norm": 0.05428197979927063, "learning_rate": 0.0001233403843557316, "loss": 0.2521, "step": 21971 }, { "epoch": 1.7799740764744005, "grad_norm": 0.05862327292561531, "learning_rate": 0.00012333588370313695, "loss": 0.2702, "step": 21972 }, { "epoch": 1.780055087491899, "grad_norm": 0.06107109412550926, "learning_rate": 0.00012333138305054234, "loss": 0.2893, "step": 21973 }, { "epoch": 1.7801360985093972, "grad_norm": 0.059656549245119095, "learning_rate": 0.00012332688239794772, "loss": 0.3108, "step": 21974 }, { "epoch": 1.7802171095268955, "grad_norm": 0.052469126880168915, "learning_rate": 0.00012332238174535308, "loss": 0.2605, "step": 21975 }, { "epoch": 1.7802981205443942, "grad_norm": 0.05808718129992485, "learning_rate": 0.00012331788109275847, "loss": 0.2265, "step": 21976 }, { "epoch": 1.7803791315618924, "grad_norm": 0.07221174240112305, "learning_rate": 0.00012331338044016383, "loss": 0.2782, "step": 21977 }, { "epoch": 1.7804601425793907, "grad_norm": 0.06101597473025322, "learning_rate": 0.0001233088797875692, "loss": 0.2695, "step": 21978 }, { "epoch": 1.7805411535968891, "grad_norm": 0.05770976096391678, "learning_rate": 0.00012330437913497458, "loss": 0.261, "step": 21979 }, { "epoch": 1.7806221646143876, "grad_norm": 0.05689411237835884, "learning_rate": 0.00012329987848237997, "loss": 0.2714, "step": 21980 }, { "epoch": 1.7807031756318858, "grad_norm": 0.060095496475696564, "learning_rate": 0.00012329537782978533, "loss": 0.3046, "step": 21981 }, { "epoch": 1.7807841866493843, "grad_norm": 0.06570778787136078, "learning_rate": 0.0001232908771771907, "loss": 0.2626, "step": 21982 }, { "epoch": 1.7808651976668828, "grad_norm": 0.0634298175573349, "learning_rate": 0.00012328637652459607, "loss": 0.3217, "step": 21983 }, { "epoch": 1.780946208684381, "grad_norm": 0.0634029358625412, "learning_rate": 0.00012328187587200143, "loss": 0.2632, "step": 21984 }, { "epoch": 1.7810272197018795, "grad_norm": 0.04596779868006706, "learning_rate": 0.00012327737521940682, "loss": 0.2562, "step": 21985 }, { "epoch": 1.781108230719378, "grad_norm": 0.0523015595972538, "learning_rate": 0.0001232728745668122, "loss": 0.2597, "step": 21986 }, { "epoch": 1.7811892417368762, "grad_norm": 0.06286849826574326, "learning_rate": 0.00012326837391421757, "loss": 0.315, "step": 21987 }, { "epoch": 1.7812702527543745, "grad_norm": 0.054060183465480804, "learning_rate": 0.00012326387326162295, "loss": 0.3178, "step": 21988 }, { "epoch": 1.7813512637718731, "grad_norm": 0.058555398136377335, "learning_rate": 0.00012325937260902831, "loss": 0.2405, "step": 21989 }, { "epoch": 1.7814322747893714, "grad_norm": 0.05825028941035271, "learning_rate": 0.00012325487195643367, "loss": 0.2865, "step": 21990 }, { "epoch": 1.7815132858068696, "grad_norm": 0.04288549721240997, "learning_rate": 0.00012325037130383906, "loss": 0.2355, "step": 21991 }, { "epoch": 1.781594296824368, "grad_norm": 0.051817573606967926, "learning_rate": 0.00012324587065124445, "loss": 0.2712, "step": 21992 }, { "epoch": 1.7816753078418666, "grad_norm": 0.058279577642679214, "learning_rate": 0.0001232413699986498, "loss": 0.2988, "step": 21993 }, { "epoch": 1.7817563188593648, "grad_norm": 0.06698846071958542, "learning_rate": 0.0001232368693460552, "loss": 0.351, "step": 21994 }, { "epoch": 1.7818373298768633, "grad_norm": 0.05766688287258148, "learning_rate": 0.00012323236869346056, "loss": 0.2839, "step": 21995 }, { "epoch": 1.7819183408943617, "grad_norm": 0.06342483311891556, "learning_rate": 0.00012322786804086592, "loss": 0.2469, "step": 21996 }, { "epoch": 1.78199935191186, "grad_norm": 0.05495030805468559, "learning_rate": 0.0001232233673882713, "loss": 0.2595, "step": 21997 }, { "epoch": 1.7820803629293582, "grad_norm": 0.05256880074739456, "learning_rate": 0.0001232188667356767, "loss": 0.2761, "step": 21998 }, { "epoch": 1.782161373946857, "grad_norm": 0.052520014345645905, "learning_rate": 0.00012321436608308205, "loss": 0.3011, "step": 21999 }, { "epoch": 1.7822423849643552, "grad_norm": 0.04679098725318909, "learning_rate": 0.00012320986543048744, "loss": 0.2715, "step": 22000 }, { "epoch": 1.7823233959818534, "grad_norm": 0.06614166498184204, "learning_rate": 0.0001232053647778928, "loss": 0.281, "step": 22001 }, { "epoch": 1.7824044069993519, "grad_norm": 0.04454330727458, "learning_rate": 0.00012320086412529816, "loss": 0.2277, "step": 22002 }, { "epoch": 1.7824854180168503, "grad_norm": 0.0626412034034729, "learning_rate": 0.00012319636347270355, "loss": 0.3442, "step": 22003 }, { "epoch": 1.7825664290343486, "grad_norm": 0.053170133382081985, "learning_rate": 0.00012319186282010893, "loss": 0.2938, "step": 22004 }, { "epoch": 1.782647440051847, "grad_norm": 0.05421067774295807, "learning_rate": 0.0001231873621675143, "loss": 0.2778, "step": 22005 }, { "epoch": 1.7827284510693455, "grad_norm": 0.05278918147087097, "learning_rate": 0.00012318286151491968, "loss": 0.3152, "step": 22006 }, { "epoch": 1.7828094620868438, "grad_norm": 0.050274867564439774, "learning_rate": 0.00012317836086232504, "loss": 0.2494, "step": 22007 }, { "epoch": 1.7828904731043422, "grad_norm": 0.04210267215967178, "learning_rate": 0.0001231738602097304, "loss": 0.2431, "step": 22008 }, { "epoch": 1.7829714841218407, "grad_norm": 0.048283085227012634, "learning_rate": 0.0001231693595571358, "loss": 0.2714, "step": 22009 }, { "epoch": 1.783052495139339, "grad_norm": 0.04342495650053024, "learning_rate": 0.00012316485890454117, "loss": 0.22, "step": 22010 }, { "epoch": 1.7831335061568372, "grad_norm": 0.05618229880928993, "learning_rate": 0.00012316035825194653, "loss": 0.2851, "step": 22011 }, { "epoch": 1.7832145171743357, "grad_norm": 0.057552892714738846, "learning_rate": 0.00012315585759935192, "loss": 0.3044, "step": 22012 }, { "epoch": 1.7832955281918341, "grad_norm": 0.0542028471827507, "learning_rate": 0.00012315135694675728, "loss": 0.249, "step": 22013 }, { "epoch": 1.7833765392093324, "grad_norm": 0.049775153398513794, "learning_rate": 0.00012314685629416264, "loss": 0.2795, "step": 22014 }, { "epoch": 1.7834575502268308, "grad_norm": 0.056546781212091446, "learning_rate": 0.00012314235564156803, "loss": 0.2498, "step": 22015 }, { "epoch": 1.7835385612443293, "grad_norm": 0.050292108207941055, "learning_rate": 0.00012313785498897342, "loss": 0.253, "step": 22016 }, { "epoch": 1.7836195722618275, "grad_norm": 0.05830508843064308, "learning_rate": 0.00012313335433637878, "loss": 0.276, "step": 22017 }, { "epoch": 1.783700583279326, "grad_norm": 0.061981040984392166, "learning_rate": 0.00012312885368378416, "loss": 0.2502, "step": 22018 }, { "epoch": 1.7837815942968245, "grad_norm": 0.06097181513905525, "learning_rate": 0.00012312435303118952, "loss": 0.2522, "step": 22019 }, { "epoch": 1.7838626053143227, "grad_norm": 0.05192512273788452, "learning_rate": 0.0001231198523785949, "loss": 0.2536, "step": 22020 }, { "epoch": 1.783943616331821, "grad_norm": 0.05766737088561058, "learning_rate": 0.00012311535172600027, "loss": 0.2954, "step": 22021 }, { "epoch": 1.7840246273493197, "grad_norm": 0.05775097757577896, "learning_rate": 0.00012311085107340566, "loss": 0.2491, "step": 22022 }, { "epoch": 1.784105638366818, "grad_norm": 0.05492521449923515, "learning_rate": 0.00012310635042081102, "loss": 0.2884, "step": 22023 }, { "epoch": 1.7841866493843161, "grad_norm": 0.047551821917295456, "learning_rate": 0.0001231018497682164, "loss": 0.2681, "step": 22024 }, { "epoch": 1.7842676604018146, "grad_norm": 0.06832152605056763, "learning_rate": 0.00012309734911562176, "loss": 0.282, "step": 22025 }, { "epoch": 1.784348671419313, "grad_norm": 0.059465307742357254, "learning_rate": 0.00012309284846302715, "loss": 0.3006, "step": 22026 }, { "epoch": 1.7844296824368113, "grad_norm": 0.05935780331492424, "learning_rate": 0.0001230883478104325, "loss": 0.2667, "step": 22027 }, { "epoch": 1.7845106934543098, "grad_norm": 0.05664028972387314, "learning_rate": 0.0001230838471578379, "loss": 0.2788, "step": 22028 }, { "epoch": 1.7845917044718083, "grad_norm": 0.05232711881399155, "learning_rate": 0.00012307934650524326, "loss": 0.2356, "step": 22029 }, { "epoch": 1.7846727154893065, "grad_norm": 0.0531197190284729, "learning_rate": 0.00012307484585264865, "loss": 0.2531, "step": 22030 }, { "epoch": 1.784753726506805, "grad_norm": 0.056127868592739105, "learning_rate": 0.000123070345200054, "loss": 0.28, "step": 22031 }, { "epoch": 1.7848347375243034, "grad_norm": 0.05154069513082504, "learning_rate": 0.0001230658445474594, "loss": 0.2483, "step": 22032 }, { "epoch": 1.7849157485418017, "grad_norm": 0.06221969425678253, "learning_rate": 0.00012306134389486475, "loss": 0.2853, "step": 22033 }, { "epoch": 1.7849967595593, "grad_norm": 0.0606851764023304, "learning_rate": 0.00012305684324227014, "loss": 0.2935, "step": 22034 }, { "epoch": 1.7850777705767984, "grad_norm": 0.06411600857973099, "learning_rate": 0.0001230523425896755, "loss": 0.2681, "step": 22035 }, { "epoch": 1.7851587815942969, "grad_norm": 0.05162372812628746, "learning_rate": 0.0001230478419370809, "loss": 0.2874, "step": 22036 }, { "epoch": 1.785239792611795, "grad_norm": 0.07114414870738983, "learning_rate": 0.00012304334128448625, "loss": 0.271, "step": 22037 }, { "epoch": 1.7853208036292936, "grad_norm": 0.06527965515851974, "learning_rate": 0.00012303884063189163, "loss": 0.284, "step": 22038 }, { "epoch": 1.785401814646792, "grad_norm": 0.05137494578957558, "learning_rate": 0.00012303433997929702, "loss": 0.2445, "step": 22039 }, { "epoch": 1.7854828256642903, "grad_norm": 0.05669848993420601, "learning_rate": 0.00012302983932670238, "loss": 0.2642, "step": 22040 }, { "epoch": 1.7855638366817888, "grad_norm": 0.054273076355457306, "learning_rate": 0.00012302533867410774, "loss": 0.2527, "step": 22041 }, { "epoch": 1.7856448476992872, "grad_norm": 0.051785897463560104, "learning_rate": 0.00012302083802151313, "loss": 0.2829, "step": 22042 }, { "epoch": 1.7857258587167855, "grad_norm": 0.06361639499664307, "learning_rate": 0.0001230163373689185, "loss": 0.3006, "step": 22043 }, { "epoch": 1.7858068697342837, "grad_norm": 0.05482267215847969, "learning_rate": 0.00012301183671632388, "loss": 0.2613, "step": 22044 }, { "epoch": 1.7858878807517824, "grad_norm": 0.06259823590517044, "learning_rate": 0.00012300733606372926, "loss": 0.3061, "step": 22045 }, { "epoch": 1.7859688917692806, "grad_norm": 0.047054145485162735, "learning_rate": 0.00012300283541113462, "loss": 0.2448, "step": 22046 }, { "epoch": 1.7860499027867789, "grad_norm": 0.06172563135623932, "learning_rate": 0.00012299833475853998, "loss": 0.3046, "step": 22047 }, { "epoch": 1.7861309138042774, "grad_norm": 0.04755101725459099, "learning_rate": 0.00012299383410594537, "loss": 0.2811, "step": 22048 }, { "epoch": 1.7862119248217758, "grad_norm": 0.057041235268116, "learning_rate": 0.00012298933345335076, "loss": 0.2506, "step": 22049 }, { "epoch": 1.786292935839274, "grad_norm": 0.05601425841450691, "learning_rate": 0.00012298483280075612, "loss": 0.3156, "step": 22050 }, { "epoch": 1.7863739468567725, "grad_norm": 0.04828924313187599, "learning_rate": 0.0001229803321481615, "loss": 0.2683, "step": 22051 }, { "epoch": 1.786454957874271, "grad_norm": 0.06615084409713745, "learning_rate": 0.00012297583149556687, "loss": 0.2838, "step": 22052 }, { "epoch": 1.7865359688917692, "grad_norm": 0.05322642624378204, "learning_rate": 0.00012297133084297223, "loss": 0.2804, "step": 22053 }, { "epoch": 1.7866169799092677, "grad_norm": 0.055921897292137146, "learning_rate": 0.0001229668301903776, "loss": 0.3104, "step": 22054 }, { "epoch": 1.7866979909267662, "grad_norm": 0.06767680495977402, "learning_rate": 0.000122962329537783, "loss": 0.3315, "step": 22055 }, { "epoch": 1.7867790019442644, "grad_norm": 0.06112572178244591, "learning_rate": 0.00012295782888518836, "loss": 0.244, "step": 22056 }, { "epoch": 1.7868600129617627, "grad_norm": 0.05754351243376732, "learning_rate": 0.00012295332823259375, "loss": 0.2735, "step": 22057 }, { "epoch": 1.7869410239792611, "grad_norm": 0.04415886104106903, "learning_rate": 0.0001229488275799991, "loss": 0.2288, "step": 22058 }, { "epoch": 1.7870220349967596, "grad_norm": 0.04353123903274536, "learning_rate": 0.00012294432692740447, "loss": 0.2522, "step": 22059 }, { "epoch": 1.7871030460142578, "grad_norm": 0.05958475172519684, "learning_rate": 0.00012293982627480985, "loss": 0.2736, "step": 22060 }, { "epoch": 1.7871840570317563, "grad_norm": 0.056693777441978455, "learning_rate": 0.00012293532562221524, "loss": 0.2444, "step": 22061 }, { "epoch": 1.7872650680492548, "grad_norm": 0.0533333495259285, "learning_rate": 0.0001229308249696206, "loss": 0.2346, "step": 22062 }, { "epoch": 1.787346079066753, "grad_norm": 0.05624905973672867, "learning_rate": 0.000122926324317026, "loss": 0.2425, "step": 22063 }, { "epoch": 1.7874270900842515, "grad_norm": 0.056994516402482986, "learning_rate": 0.00012292182366443135, "loss": 0.3258, "step": 22064 }, { "epoch": 1.78750810110175, "grad_norm": 0.058526236563920975, "learning_rate": 0.0001229173230118367, "loss": 0.2613, "step": 22065 }, { "epoch": 1.7875891121192482, "grad_norm": 0.05824572220444679, "learning_rate": 0.0001229128223592421, "loss": 0.2641, "step": 22066 }, { "epoch": 1.7876701231367464, "grad_norm": 0.06035739183425903, "learning_rate": 0.00012290832170664748, "loss": 0.272, "step": 22067 }, { "epoch": 1.7877511341542451, "grad_norm": 0.06125498563051224, "learning_rate": 0.00012290382105405284, "loss": 0.2784, "step": 22068 }, { "epoch": 1.7878321451717434, "grad_norm": 0.050116147845983505, "learning_rate": 0.00012289932040145823, "loss": 0.2784, "step": 22069 }, { "epoch": 1.7879131561892416, "grad_norm": 0.05024263635277748, "learning_rate": 0.0001228948197488636, "loss": 0.2598, "step": 22070 }, { "epoch": 1.78799416720674, "grad_norm": 0.052744414657354355, "learning_rate": 0.00012289031909626895, "loss": 0.2655, "step": 22071 }, { "epoch": 1.7880751782242386, "grad_norm": 0.062270790338516235, "learning_rate": 0.00012288581844367434, "loss": 0.2957, "step": 22072 }, { "epoch": 1.7881561892417368, "grad_norm": 0.052221402525901794, "learning_rate": 0.00012288131779107972, "loss": 0.2482, "step": 22073 }, { "epoch": 1.7882372002592353, "grad_norm": 0.04572257772088051, "learning_rate": 0.00012287681713848508, "loss": 0.2437, "step": 22074 }, { "epoch": 1.7883182112767337, "grad_norm": 0.055134136229753494, "learning_rate": 0.00012287231648589047, "loss": 0.3352, "step": 22075 }, { "epoch": 1.788399222294232, "grad_norm": 0.04828047752380371, "learning_rate": 0.00012286781583329583, "loss": 0.2628, "step": 22076 }, { "epoch": 1.7884802333117304, "grad_norm": 0.05978702753782272, "learning_rate": 0.0001228633151807012, "loss": 0.2827, "step": 22077 }, { "epoch": 1.788561244329229, "grad_norm": 0.04657856374979019, "learning_rate": 0.00012285881452810658, "loss": 0.2604, "step": 22078 }, { "epoch": 1.7886422553467272, "grad_norm": 0.05295327305793762, "learning_rate": 0.00012285431387551197, "loss": 0.2527, "step": 22079 }, { "epoch": 1.7887232663642254, "grad_norm": 0.061362508684396744, "learning_rate": 0.00012284981322291733, "loss": 0.2992, "step": 22080 }, { "epoch": 1.7888042773817239, "grad_norm": 0.060083989053964615, "learning_rate": 0.0001228453125703227, "loss": 0.2715, "step": 22081 }, { "epoch": 1.7888852883992223, "grad_norm": 0.051934026181697845, "learning_rate": 0.00012284081191772807, "loss": 0.2642, "step": 22082 }, { "epoch": 1.7889662994167206, "grad_norm": 0.042310990393161774, "learning_rate": 0.00012283631126513343, "loss": 0.2597, "step": 22083 }, { "epoch": 1.789047310434219, "grad_norm": 0.052813880145549774, "learning_rate": 0.00012283181061253882, "loss": 0.2509, "step": 22084 }, { "epoch": 1.7891283214517175, "grad_norm": 0.059147439897060394, "learning_rate": 0.0001228273099599442, "loss": 0.3004, "step": 22085 }, { "epoch": 1.7892093324692158, "grad_norm": 0.0584903247654438, "learning_rate": 0.00012282280930734957, "loss": 0.2706, "step": 22086 }, { "epoch": 1.7892903434867142, "grad_norm": 0.061702847480773926, "learning_rate": 0.00012281830865475496, "loss": 0.2919, "step": 22087 }, { "epoch": 1.7893713545042127, "grad_norm": 0.06372623145580292, "learning_rate": 0.00012281380800216032, "loss": 0.2651, "step": 22088 }, { "epoch": 1.789452365521711, "grad_norm": 0.06399668008089066, "learning_rate": 0.00012280930734956568, "loss": 0.2835, "step": 22089 }, { "epoch": 1.7895333765392092, "grad_norm": 0.052177559584379196, "learning_rate": 0.00012280480669697106, "loss": 0.2472, "step": 22090 }, { "epoch": 1.7896143875567079, "grad_norm": 0.05263921990990639, "learning_rate": 0.00012280030604437645, "loss": 0.2679, "step": 22091 }, { "epoch": 1.7896953985742061, "grad_norm": 0.05937456712126732, "learning_rate": 0.0001227958053917818, "loss": 0.3181, "step": 22092 }, { "epoch": 1.7897764095917044, "grad_norm": 0.057407818734645844, "learning_rate": 0.0001227913047391872, "loss": 0.278, "step": 22093 }, { "epoch": 1.7898574206092028, "grad_norm": 0.05732586607336998, "learning_rate": 0.00012278680408659256, "loss": 0.2701, "step": 22094 }, { "epoch": 1.7899384316267013, "grad_norm": 0.05332402139902115, "learning_rate": 0.00012278230343399792, "loss": 0.2583, "step": 22095 }, { "epoch": 1.7900194426441995, "grad_norm": 0.05595818907022476, "learning_rate": 0.0001227778027814033, "loss": 0.2632, "step": 22096 }, { "epoch": 1.790100453661698, "grad_norm": 0.07867451012134552, "learning_rate": 0.0001227733021288087, "loss": 0.312, "step": 22097 }, { "epoch": 1.7901814646791965, "grad_norm": 0.05248883366584778, "learning_rate": 0.00012276880147621405, "loss": 0.2347, "step": 22098 }, { "epoch": 1.7902624756966947, "grad_norm": 0.06724679470062256, "learning_rate": 0.00012276430082361944, "loss": 0.2738, "step": 22099 }, { "epoch": 1.790343486714193, "grad_norm": 0.0722748190164566, "learning_rate": 0.0001227598001710248, "loss": 0.283, "step": 22100 }, { "epoch": 1.7904244977316917, "grad_norm": 0.05698879063129425, "learning_rate": 0.00012275529951843019, "loss": 0.3087, "step": 22101 }, { "epoch": 1.79050550874919, "grad_norm": 0.05652936175465584, "learning_rate": 0.00012275079886583555, "loss": 0.3073, "step": 22102 }, { "epoch": 1.7905865197666881, "grad_norm": 0.04519682005047798, "learning_rate": 0.00012274629821324093, "loss": 0.2467, "step": 22103 }, { "epoch": 1.7906675307841866, "grad_norm": 0.05654732510447502, "learning_rate": 0.0001227417975606463, "loss": 0.2601, "step": 22104 }, { "epoch": 1.790748541801685, "grad_norm": 0.06253427267074585, "learning_rate": 0.00012273729690805168, "loss": 0.2781, "step": 22105 }, { "epoch": 1.7908295528191833, "grad_norm": 0.045378677546978, "learning_rate": 0.00012273279625545704, "loss": 0.2848, "step": 22106 }, { "epoch": 1.7909105638366818, "grad_norm": 0.056497104465961456, "learning_rate": 0.00012272829560286243, "loss": 0.2824, "step": 22107 }, { "epoch": 1.7909915748541803, "grad_norm": 0.05572156980633736, "learning_rate": 0.00012272379495026781, "loss": 0.3007, "step": 22108 }, { "epoch": 1.7910725858716785, "grad_norm": 0.05481686443090439, "learning_rate": 0.00012271929429767317, "loss": 0.2619, "step": 22109 }, { "epoch": 1.791153596889177, "grad_norm": 0.053460247814655304, "learning_rate": 0.00012271479364507853, "loss": 0.2852, "step": 22110 }, { "epoch": 1.7912346079066754, "grad_norm": 0.05808936804533005, "learning_rate": 0.00012271029299248392, "loss": 0.2542, "step": 22111 }, { "epoch": 1.7913156189241737, "grad_norm": 0.04818173125386238, "learning_rate": 0.00012270579233988928, "loss": 0.2825, "step": 22112 }, { "epoch": 1.791396629941672, "grad_norm": 0.05348360911011696, "learning_rate": 0.00012270129168729467, "loss": 0.2481, "step": 22113 }, { "epoch": 1.7914776409591704, "grad_norm": 0.04549575224518776, "learning_rate": 0.00012269679103470006, "loss": 0.238, "step": 22114 }, { "epoch": 1.7915586519766689, "grad_norm": 0.05501910299062729, "learning_rate": 0.00012269229038210542, "loss": 0.2706, "step": 22115 }, { "epoch": 1.791639662994167, "grad_norm": 0.05364497750997543, "learning_rate": 0.00012268778972951078, "loss": 0.275, "step": 22116 }, { "epoch": 1.7917206740116656, "grad_norm": 0.0561637319624424, "learning_rate": 0.00012268328907691616, "loss": 0.2714, "step": 22117 }, { "epoch": 1.791801685029164, "grad_norm": 0.06201014667749405, "learning_rate": 0.00012267878842432152, "loss": 0.2872, "step": 22118 }, { "epoch": 1.7918826960466623, "grad_norm": 0.064414381980896, "learning_rate": 0.0001226742877717269, "loss": 0.2662, "step": 22119 }, { "epoch": 1.7919637070641607, "grad_norm": 0.05276533216238022, "learning_rate": 0.0001226697871191323, "loss": 0.2668, "step": 22120 }, { "epoch": 1.7920447180816592, "grad_norm": 0.06760087609291077, "learning_rate": 0.00012266528646653766, "loss": 0.2928, "step": 22121 }, { "epoch": 1.7921257290991575, "grad_norm": 0.0584164634346962, "learning_rate": 0.00012266078581394302, "loss": 0.2329, "step": 22122 }, { "epoch": 1.7922067401166557, "grad_norm": 0.049879640340805054, "learning_rate": 0.0001226562851613484, "loss": 0.2632, "step": 22123 }, { "epoch": 1.7922877511341544, "grad_norm": 0.06702165305614471, "learning_rate": 0.00012265178450875376, "loss": 0.3171, "step": 22124 }, { "epoch": 1.7923687621516526, "grad_norm": 0.0711006447672844, "learning_rate": 0.00012264728385615915, "loss": 0.3067, "step": 22125 }, { "epoch": 1.7924497731691509, "grad_norm": 0.057895757257938385, "learning_rate": 0.00012264278320356454, "loss": 0.2707, "step": 22126 }, { "epoch": 1.7925307841866494, "grad_norm": 0.048520952463150024, "learning_rate": 0.0001226382825509699, "loss": 0.2701, "step": 22127 }, { "epoch": 1.7926117952041478, "grad_norm": 0.060298990458250046, "learning_rate": 0.00012263378189837526, "loss": 0.2497, "step": 22128 }, { "epoch": 1.792692806221646, "grad_norm": 0.052584659308195114, "learning_rate": 0.00012262928124578065, "loss": 0.3016, "step": 22129 }, { "epoch": 1.7927738172391445, "grad_norm": 0.04596573859453201, "learning_rate": 0.00012262478059318603, "loss": 0.2578, "step": 22130 }, { "epoch": 1.792854828256643, "grad_norm": 0.05442513898015022, "learning_rate": 0.0001226202799405914, "loss": 0.2382, "step": 22131 }, { "epoch": 1.7929358392741412, "grad_norm": 0.055967625230550766, "learning_rate": 0.00012261577928799678, "loss": 0.2909, "step": 22132 }, { "epoch": 1.7930168502916397, "grad_norm": 0.06419744342565536, "learning_rate": 0.00012261127863540214, "loss": 0.273, "step": 22133 }, { "epoch": 1.7930978613091382, "grad_norm": 0.050524089485406876, "learning_rate": 0.0001226067779828075, "loss": 0.2832, "step": 22134 }, { "epoch": 1.7931788723266364, "grad_norm": 0.05179031565785408, "learning_rate": 0.0001226022773302129, "loss": 0.3183, "step": 22135 }, { "epoch": 1.7932598833441347, "grad_norm": 0.049778230488300323, "learning_rate": 0.00012259777667761828, "loss": 0.2723, "step": 22136 }, { "epoch": 1.7933408943616331, "grad_norm": 0.05179633945226669, "learning_rate": 0.00012259327602502364, "loss": 0.2419, "step": 22137 }, { "epoch": 1.7934219053791316, "grad_norm": 0.06944955140352249, "learning_rate": 0.00012258877537242902, "loss": 0.2636, "step": 22138 }, { "epoch": 1.7935029163966298, "grad_norm": 0.05182751640677452, "learning_rate": 0.00012258427471983438, "loss": 0.2735, "step": 22139 }, { "epoch": 1.7935839274141283, "grad_norm": 0.06322464346885681, "learning_rate": 0.00012257977406723974, "loss": 0.3229, "step": 22140 }, { "epoch": 1.7936649384316268, "grad_norm": 0.04878616705536842, "learning_rate": 0.00012257527341464513, "loss": 0.2562, "step": 22141 }, { "epoch": 1.793745949449125, "grad_norm": 0.05315592512488365, "learning_rate": 0.00012257077276205052, "loss": 0.302, "step": 22142 }, { "epoch": 1.7938269604666235, "grad_norm": 0.056052401661872864, "learning_rate": 0.00012256627210945588, "loss": 0.2733, "step": 22143 }, { "epoch": 1.793907971484122, "grad_norm": 0.05101040005683899, "learning_rate": 0.00012256177145686126, "loss": 0.2602, "step": 22144 }, { "epoch": 1.7939889825016202, "grad_norm": 0.0631868839263916, "learning_rate": 0.00012255727080426662, "loss": 0.2495, "step": 22145 }, { "epoch": 1.7940699935191184, "grad_norm": 0.0552617684006691, "learning_rate": 0.00012255277015167198, "loss": 0.2988, "step": 22146 }, { "epoch": 1.7941510045366171, "grad_norm": 0.06353448331356049, "learning_rate": 0.00012254826949907737, "loss": 0.3198, "step": 22147 }, { "epoch": 1.7942320155541154, "grad_norm": 0.053773414343595505, "learning_rate": 0.00012254376884648276, "loss": 0.2663, "step": 22148 }, { "epoch": 1.7943130265716136, "grad_norm": 0.05944092199206352, "learning_rate": 0.00012253926819388812, "loss": 0.2718, "step": 22149 }, { "epoch": 1.794394037589112, "grad_norm": 0.06545424461364746, "learning_rate": 0.0001225347675412935, "loss": 0.2893, "step": 22150 }, { "epoch": 1.7944750486066106, "grad_norm": 0.06336667388677597, "learning_rate": 0.00012253026688869887, "loss": 0.2825, "step": 22151 }, { "epoch": 1.7945560596241088, "grad_norm": 0.0653478354215622, "learning_rate": 0.00012252576623610423, "loss": 0.3082, "step": 22152 }, { "epoch": 1.7946370706416073, "grad_norm": 0.04935803264379501, "learning_rate": 0.0001225212655835096, "loss": 0.2803, "step": 22153 }, { "epoch": 1.7947180816591057, "grad_norm": 0.06616376340389252, "learning_rate": 0.000122516764930915, "loss": 0.2731, "step": 22154 }, { "epoch": 1.794799092676604, "grad_norm": 0.04812661558389664, "learning_rate": 0.00012251226427832036, "loss": 0.2609, "step": 22155 }, { "epoch": 1.7948801036941024, "grad_norm": 0.048841774463653564, "learning_rate": 0.00012250776362572575, "loss": 0.2774, "step": 22156 }, { "epoch": 1.794961114711601, "grad_norm": 0.06260140985250473, "learning_rate": 0.0001225032629731311, "loss": 0.3173, "step": 22157 }, { "epoch": 1.7950421257290992, "grad_norm": 0.061257071793079376, "learning_rate": 0.00012249876232053647, "loss": 0.2533, "step": 22158 }, { "epoch": 1.7951231367465974, "grad_norm": 0.04774602875113487, "learning_rate": 0.00012249426166794185, "loss": 0.2251, "step": 22159 }, { "epoch": 1.7952041477640959, "grad_norm": 0.06033805012702942, "learning_rate": 0.00012248976101534724, "loss": 0.2844, "step": 22160 }, { "epoch": 1.7952851587815943, "grad_norm": 0.0505126528441906, "learning_rate": 0.0001224852603627526, "loss": 0.2389, "step": 22161 }, { "epoch": 1.7953661697990926, "grad_norm": 0.053559351712465286, "learning_rate": 0.000122480759710158, "loss": 0.2679, "step": 22162 }, { "epoch": 1.795447180816591, "grad_norm": 0.05731525272130966, "learning_rate": 0.00012247625905756335, "loss": 0.2822, "step": 22163 }, { "epoch": 1.7955281918340895, "grad_norm": 0.05027606710791588, "learning_rate": 0.0001224717584049687, "loss": 0.251, "step": 22164 }, { "epoch": 1.7956092028515878, "grad_norm": 0.05858727917075157, "learning_rate": 0.0001224672577523741, "loss": 0.2776, "step": 22165 }, { "epoch": 1.7956902138690862, "grad_norm": 0.0573890395462513, "learning_rate": 0.00012246275709977948, "loss": 0.3017, "step": 22166 }, { "epoch": 1.7957712248865847, "grad_norm": 0.0540848970413208, "learning_rate": 0.00012245825644718484, "loss": 0.2345, "step": 22167 }, { "epoch": 1.795852235904083, "grad_norm": 0.06505483388900757, "learning_rate": 0.00012245375579459023, "loss": 0.3001, "step": 22168 }, { "epoch": 1.7959332469215812, "grad_norm": 0.05077476054430008, "learning_rate": 0.0001224492551419956, "loss": 0.2605, "step": 22169 }, { "epoch": 1.7960142579390799, "grad_norm": 0.06261571496725082, "learning_rate": 0.00012244475448940095, "loss": 0.2615, "step": 22170 }, { "epoch": 1.7960952689565781, "grad_norm": 0.05674409121274948, "learning_rate": 0.00012244025383680634, "loss": 0.2662, "step": 22171 }, { "epoch": 1.7961762799740764, "grad_norm": 0.05636698380112648, "learning_rate": 0.00012243575318421172, "loss": 0.303, "step": 22172 }, { "epoch": 1.7962572909915748, "grad_norm": 0.05232566222548485, "learning_rate": 0.00012243125253161708, "loss": 0.2749, "step": 22173 }, { "epoch": 1.7963383020090733, "grad_norm": 0.05814569815993309, "learning_rate": 0.00012242675187902247, "loss": 0.2433, "step": 22174 }, { "epoch": 1.7964193130265715, "grad_norm": 0.05443737283349037, "learning_rate": 0.00012242225122642783, "loss": 0.2652, "step": 22175 }, { "epoch": 1.79650032404407, "grad_norm": 0.056092407554388046, "learning_rate": 0.0001224177505738332, "loss": 0.2928, "step": 22176 }, { "epoch": 1.7965813350615685, "grad_norm": 0.04900359734892845, "learning_rate": 0.0001224132499212386, "loss": 0.26, "step": 22177 }, { "epoch": 1.7966623460790667, "grad_norm": 0.05376352742314339, "learning_rate": 0.00012240874926864397, "loss": 0.2808, "step": 22178 }, { "epoch": 1.7967433570965652, "grad_norm": 0.054264407604932785, "learning_rate": 0.00012240424861604933, "loss": 0.2627, "step": 22179 }, { "epoch": 1.7968243681140637, "grad_norm": 0.06078781932592392, "learning_rate": 0.00012239974796345471, "loss": 0.316, "step": 22180 }, { "epoch": 1.796905379131562, "grad_norm": 0.0540931411087513, "learning_rate": 0.00012239524731086007, "loss": 0.2805, "step": 22181 }, { "epoch": 1.7969863901490601, "grad_norm": 0.0742974653840065, "learning_rate": 0.00012239074665826546, "loss": 0.2752, "step": 22182 }, { "epoch": 1.7970674011665586, "grad_norm": 0.05611839145421982, "learning_rate": 0.00012238624600567085, "loss": 0.2656, "step": 22183 }, { "epoch": 1.797148412184057, "grad_norm": 0.06015536189079285, "learning_rate": 0.0001223817453530762, "loss": 0.3339, "step": 22184 }, { "epoch": 1.7972294232015553, "grad_norm": 0.06692757457494736, "learning_rate": 0.00012237724470048157, "loss": 0.2854, "step": 22185 }, { "epoch": 1.7973104342190538, "grad_norm": 0.0594097375869751, "learning_rate": 0.00012237274404788696, "loss": 0.2938, "step": 22186 }, { "epoch": 1.7973914452365523, "grad_norm": 0.07080462574958801, "learning_rate": 0.00012236824339529232, "loss": 0.3276, "step": 22187 }, { "epoch": 1.7974724562540505, "grad_norm": 0.06359456479549408, "learning_rate": 0.0001223637427426977, "loss": 0.2843, "step": 22188 }, { "epoch": 1.797553467271549, "grad_norm": 0.056201670318841934, "learning_rate": 0.0001223592420901031, "loss": 0.3357, "step": 22189 }, { "epoch": 1.7976344782890474, "grad_norm": 0.05176905542612076, "learning_rate": 0.00012235474143750845, "loss": 0.2508, "step": 22190 }, { "epoch": 1.7977154893065457, "grad_norm": 0.04799371585249901, "learning_rate": 0.0001223502407849138, "loss": 0.2863, "step": 22191 }, { "epoch": 1.797796500324044, "grad_norm": 0.05569925531744957, "learning_rate": 0.0001223457401323192, "loss": 0.2732, "step": 22192 }, { "epoch": 1.7978775113415426, "grad_norm": 0.05573749542236328, "learning_rate": 0.00012234123947972456, "loss": 0.2429, "step": 22193 }, { "epoch": 1.7979585223590409, "grad_norm": 0.06206778809428215, "learning_rate": 0.00012233673882712994, "loss": 0.3125, "step": 22194 }, { "epoch": 1.798039533376539, "grad_norm": 0.05589371919631958, "learning_rate": 0.00012233223817453533, "loss": 0.3008, "step": 22195 }, { "epoch": 1.7981205443940376, "grad_norm": 0.05611521378159523, "learning_rate": 0.0001223277375219407, "loss": 0.2635, "step": 22196 }, { "epoch": 1.798201555411536, "grad_norm": 0.057951584458351135, "learning_rate": 0.00012232323686934605, "loss": 0.2921, "step": 22197 }, { "epoch": 1.7982825664290343, "grad_norm": 0.05601397156715393, "learning_rate": 0.00012231873621675144, "loss": 0.2778, "step": 22198 }, { "epoch": 1.7983635774465327, "grad_norm": 0.06642493605613708, "learning_rate": 0.0001223142355641568, "loss": 0.2739, "step": 22199 }, { "epoch": 1.7984445884640312, "grad_norm": 0.058593589812517166, "learning_rate": 0.00012230973491156219, "loss": 0.2916, "step": 22200 }, { "epoch": 1.7985255994815295, "grad_norm": 0.06586059182882309, "learning_rate": 0.00012230523425896757, "loss": 0.3034, "step": 22201 }, { "epoch": 1.7986066104990277, "grad_norm": 0.054388418793678284, "learning_rate": 0.00012230073360637293, "loss": 0.2716, "step": 22202 }, { "epoch": 1.7986876215165264, "grad_norm": 0.059615492820739746, "learning_rate": 0.0001222962329537783, "loss": 0.3011, "step": 22203 }, { "epoch": 1.7987686325340246, "grad_norm": 0.06519076973199844, "learning_rate": 0.00012229173230118368, "loss": 0.2707, "step": 22204 }, { "epoch": 1.7988496435515229, "grad_norm": 0.05637694150209427, "learning_rate": 0.00012228723164858907, "loss": 0.2512, "step": 22205 }, { "epoch": 1.7989306545690213, "grad_norm": 0.05849280208349228, "learning_rate": 0.00012228273099599443, "loss": 0.3303, "step": 22206 }, { "epoch": 1.7990116655865198, "grad_norm": 0.06378426402807236, "learning_rate": 0.00012227823034339981, "loss": 0.2797, "step": 22207 }, { "epoch": 1.799092676604018, "grad_norm": 0.05572065711021423, "learning_rate": 0.00012227372969080517, "loss": 0.2524, "step": 22208 }, { "epoch": 1.7991736876215165, "grad_norm": 0.0632311999797821, "learning_rate": 0.00012226922903821053, "loss": 0.2885, "step": 22209 }, { "epoch": 1.799254698639015, "grad_norm": 0.068609319627285, "learning_rate": 0.00012226472838561592, "loss": 0.2961, "step": 22210 }, { "epoch": 1.7993357096565132, "grad_norm": 0.05970097705721855, "learning_rate": 0.0001222602277330213, "loss": 0.2667, "step": 22211 }, { "epoch": 1.7994167206740117, "grad_norm": 0.05331006273627281, "learning_rate": 0.00012225572708042667, "loss": 0.2654, "step": 22212 }, { "epoch": 1.7994977316915102, "grad_norm": 0.057156920433044434, "learning_rate": 0.00012225122642783206, "loss": 0.2611, "step": 22213 }, { "epoch": 1.7995787427090084, "grad_norm": 0.04627132788300514, "learning_rate": 0.00012224672577523742, "loss": 0.2464, "step": 22214 }, { "epoch": 1.7996597537265067, "grad_norm": 0.07109607756137848, "learning_rate": 0.00012224222512264278, "loss": 0.3506, "step": 22215 }, { "epoch": 1.7997407647440054, "grad_norm": 0.057411983609199524, "learning_rate": 0.00012223772447004816, "loss": 0.2649, "step": 22216 }, { "epoch": 1.7998217757615036, "grad_norm": 0.06819972395896912, "learning_rate": 0.00012223322381745355, "loss": 0.3188, "step": 22217 }, { "epoch": 1.7999027867790018, "grad_norm": 0.05879756435751915, "learning_rate": 0.0001222287231648589, "loss": 0.2628, "step": 22218 }, { "epoch": 1.7999837977965003, "grad_norm": 0.049354761838912964, "learning_rate": 0.0001222242225122643, "loss": 0.2617, "step": 22219 }, { "epoch": 1.8000648088139988, "grad_norm": 0.04880581423640251, "learning_rate": 0.00012221972185966966, "loss": 0.2302, "step": 22220 }, { "epoch": 1.800145819831497, "grad_norm": 0.0527828149497509, "learning_rate": 0.00012221522120707502, "loss": 0.266, "step": 22221 }, { "epoch": 1.8002268308489955, "grad_norm": 0.05588414892554283, "learning_rate": 0.0001222107205544804, "loss": 0.2983, "step": 22222 }, { "epoch": 1.800307841866494, "grad_norm": 0.056125205010175705, "learning_rate": 0.0001222062199018858, "loss": 0.2798, "step": 22223 }, { "epoch": 1.8003888528839922, "grad_norm": 0.05799616128206253, "learning_rate": 0.00012220171924929115, "loss": 0.26, "step": 22224 }, { "epoch": 1.8004698639014904, "grad_norm": 0.05869988352060318, "learning_rate": 0.00012219721859669654, "loss": 0.3042, "step": 22225 }, { "epoch": 1.8005508749189891, "grad_norm": 0.0588398352265358, "learning_rate": 0.0001221927179441019, "loss": 0.2731, "step": 22226 }, { "epoch": 1.8006318859364874, "grad_norm": 0.05252890661358833, "learning_rate": 0.00012218821729150726, "loss": 0.2559, "step": 22227 }, { "epoch": 1.8007128969539856, "grad_norm": 0.04973560944199562, "learning_rate": 0.00012218371663891265, "loss": 0.2765, "step": 22228 }, { "epoch": 1.800793907971484, "grad_norm": 0.05808442831039429, "learning_rate": 0.00012217921598631803, "loss": 0.272, "step": 22229 }, { "epoch": 1.8008749189889826, "grad_norm": 0.052194830030202866, "learning_rate": 0.0001221747153337234, "loss": 0.2757, "step": 22230 }, { "epoch": 1.8009559300064808, "grad_norm": 0.06407959759235382, "learning_rate": 0.00012217021468112878, "loss": 0.3018, "step": 22231 }, { "epoch": 1.8010369410239793, "grad_norm": 0.048292580991983414, "learning_rate": 0.00012216571402853414, "loss": 0.2565, "step": 22232 }, { "epoch": 1.8011179520414777, "grad_norm": 0.06521427631378174, "learning_rate": 0.0001221612133759395, "loss": 0.2806, "step": 22233 }, { "epoch": 1.801198963058976, "grad_norm": 0.06741520762443542, "learning_rate": 0.0001221567127233449, "loss": 0.2611, "step": 22234 }, { "epoch": 1.8012799740764744, "grad_norm": 0.05765705928206444, "learning_rate": 0.00012215221207075028, "loss": 0.3098, "step": 22235 }, { "epoch": 1.801360985093973, "grad_norm": 0.05621275678277016, "learning_rate": 0.00012214771141815564, "loss": 0.2491, "step": 22236 }, { "epoch": 1.8014419961114712, "grad_norm": 0.05360456183552742, "learning_rate": 0.00012214321076556102, "loss": 0.2526, "step": 22237 }, { "epoch": 1.8015230071289694, "grad_norm": 0.06470076739788055, "learning_rate": 0.00012213871011296638, "loss": 0.291, "step": 22238 }, { "epoch": 1.8016040181464679, "grad_norm": 0.06331278383731842, "learning_rate": 0.00012213420946037174, "loss": 0.2857, "step": 22239 }, { "epoch": 1.8016850291639663, "grad_norm": 0.06826566159725189, "learning_rate": 0.00012212970880777713, "loss": 0.2561, "step": 22240 }, { "epoch": 1.8017660401814646, "grad_norm": 0.05896066501736641, "learning_rate": 0.00012212520815518252, "loss": 0.2735, "step": 22241 }, { "epoch": 1.801847051198963, "grad_norm": 0.0709279254078865, "learning_rate": 0.00012212070750258788, "loss": 0.2911, "step": 22242 }, { "epoch": 1.8019280622164615, "grad_norm": 0.05172071233391762, "learning_rate": 0.00012211620684999326, "loss": 0.261, "step": 22243 }, { "epoch": 1.8020090732339598, "grad_norm": 0.06091113016009331, "learning_rate": 0.00012211170619739862, "loss": 0.2672, "step": 22244 }, { "epoch": 1.8020900842514582, "grad_norm": 0.04631185904145241, "learning_rate": 0.00012210720554480398, "loss": 0.2359, "step": 22245 }, { "epoch": 1.8021710952689567, "grad_norm": 0.06016520410776138, "learning_rate": 0.0001221027048922094, "loss": 0.2634, "step": 22246 }, { "epoch": 1.802252106286455, "grad_norm": 0.05439654365181923, "learning_rate": 0.00012209820423961476, "loss": 0.3049, "step": 22247 }, { "epoch": 1.8023331173039532, "grad_norm": 0.05835814028978348, "learning_rate": 0.00012209370358702012, "loss": 0.2788, "step": 22248 }, { "epoch": 1.8024141283214519, "grad_norm": 0.04965536668896675, "learning_rate": 0.0001220892029344255, "loss": 0.2963, "step": 22249 }, { "epoch": 1.8024951393389501, "grad_norm": 0.048756249248981476, "learning_rate": 0.00012208470228183087, "loss": 0.2227, "step": 22250 }, { "epoch": 1.8025761503564484, "grad_norm": 0.059522707015275955, "learning_rate": 0.00012208020162923623, "loss": 0.2688, "step": 22251 }, { "epoch": 1.8026571613739468, "grad_norm": 0.07265980541706085, "learning_rate": 0.00012207570097664164, "loss": 0.2594, "step": 22252 }, { "epoch": 1.8027381723914453, "grad_norm": 0.05598118528723717, "learning_rate": 0.000122071200324047, "loss": 0.2712, "step": 22253 }, { "epoch": 1.8028191834089435, "grad_norm": 0.05440227687358856, "learning_rate": 0.00012206669967145236, "loss": 0.2689, "step": 22254 }, { "epoch": 1.802900194426442, "grad_norm": 0.062089111655950546, "learning_rate": 0.00012206219901885773, "loss": 0.2761, "step": 22255 }, { "epoch": 1.8029812054439405, "grad_norm": 0.07075740396976471, "learning_rate": 0.00012205769836626311, "loss": 0.2533, "step": 22256 }, { "epoch": 1.8030622164614387, "grad_norm": 0.07244139164686203, "learning_rate": 0.00012205319771366848, "loss": 0.266, "step": 22257 }, { "epoch": 1.8031432274789372, "grad_norm": 0.05973348021507263, "learning_rate": 0.00012204869706107387, "loss": 0.2593, "step": 22258 }, { "epoch": 1.8032242384964356, "grad_norm": 0.06082494184374809, "learning_rate": 0.00012204419640847924, "loss": 0.2516, "step": 22259 }, { "epoch": 1.803305249513934, "grad_norm": 0.06084740534424782, "learning_rate": 0.00012203969575588462, "loss": 0.286, "step": 22260 }, { "epoch": 1.8033862605314321, "grad_norm": 0.046079982072114944, "learning_rate": 0.00012203519510328998, "loss": 0.2514, "step": 22261 }, { "epoch": 1.8034672715489306, "grad_norm": 0.04780830070376396, "learning_rate": 0.00012203069445069535, "loss": 0.2542, "step": 22262 }, { "epoch": 1.803548282566429, "grad_norm": 0.05270274356007576, "learning_rate": 0.00012202619379810074, "loss": 0.2598, "step": 22263 }, { "epoch": 1.8036292935839273, "grad_norm": 0.05802077427506447, "learning_rate": 0.00012202169314550611, "loss": 0.2476, "step": 22264 }, { "epoch": 1.8037103046014258, "grad_norm": 0.043057069182395935, "learning_rate": 0.00012201719249291148, "loss": 0.2448, "step": 22265 }, { "epoch": 1.8037913156189243, "grad_norm": 0.06099724769592285, "learning_rate": 0.00012201269184031686, "loss": 0.2601, "step": 22266 }, { "epoch": 1.8038723266364225, "grad_norm": 0.07051218301057816, "learning_rate": 0.00012200819118772222, "loss": 0.2879, "step": 22267 }, { "epoch": 1.803953337653921, "grad_norm": 0.05437973514199257, "learning_rate": 0.00012200369053512759, "loss": 0.2669, "step": 22268 }, { "epoch": 1.8040343486714194, "grad_norm": 0.06001882627606392, "learning_rate": 0.00012199918988253298, "loss": 0.2822, "step": 22269 }, { "epoch": 1.8041153596889177, "grad_norm": 0.08208142966032028, "learning_rate": 0.00012199468922993835, "loss": 0.2962, "step": 22270 }, { "epoch": 1.804196370706416, "grad_norm": 0.055241961032152176, "learning_rate": 0.00012199018857734373, "loss": 0.2445, "step": 22271 }, { "epoch": 1.8042773817239146, "grad_norm": 0.05752910301089287, "learning_rate": 0.0001219856879247491, "loss": 0.2782, "step": 22272 }, { "epoch": 1.8043583927414129, "grad_norm": 0.06530386209487915, "learning_rate": 0.00012198118727215446, "loss": 0.304, "step": 22273 }, { "epoch": 1.804439403758911, "grad_norm": 0.05831904336810112, "learning_rate": 0.00012197668661955983, "loss": 0.2749, "step": 22274 }, { "epoch": 1.8045204147764096, "grad_norm": 0.05550654977560043, "learning_rate": 0.00012197218596696522, "loss": 0.2881, "step": 22275 }, { "epoch": 1.804601425793908, "grad_norm": 0.05844729021191597, "learning_rate": 0.00012196768531437059, "loss": 0.2886, "step": 22276 }, { "epoch": 1.8046824368114063, "grad_norm": 0.05544528737664223, "learning_rate": 0.00012196318466177597, "loss": 0.2513, "step": 22277 }, { "epoch": 1.8047634478289047, "grad_norm": 0.05155603587627411, "learning_rate": 0.00012195868400918134, "loss": 0.3023, "step": 22278 }, { "epoch": 1.8048444588464032, "grad_norm": 0.050557348877191544, "learning_rate": 0.0001219541833565867, "loss": 0.2795, "step": 22279 }, { "epoch": 1.8049254698639015, "grad_norm": 0.06218627095222473, "learning_rate": 0.00012194968270399207, "loss": 0.2854, "step": 22280 }, { "epoch": 1.8050064808814, "grad_norm": 0.05242801457643509, "learning_rate": 0.00012194518205139746, "loss": 0.237, "step": 22281 }, { "epoch": 1.8050874918988984, "grad_norm": 0.05768498033285141, "learning_rate": 0.00012194068139880283, "loss": 0.249, "step": 22282 }, { "epoch": 1.8051685029163966, "grad_norm": 0.052621133625507355, "learning_rate": 0.00012193618074620821, "loss": 0.2474, "step": 22283 }, { "epoch": 1.8052495139338949, "grad_norm": 0.053806059062480927, "learning_rate": 0.00012193168009361358, "loss": 0.2419, "step": 22284 }, { "epoch": 1.8053305249513933, "grad_norm": 0.055902380496263504, "learning_rate": 0.00012192717944101894, "loss": 0.252, "step": 22285 }, { "epoch": 1.8054115359688918, "grad_norm": 0.04919757321476936, "learning_rate": 0.00012192267878842434, "loss": 0.2879, "step": 22286 }, { "epoch": 1.80549254698639, "grad_norm": 0.05439651384949684, "learning_rate": 0.0001219181781358297, "loss": 0.2766, "step": 22287 }, { "epoch": 1.8055735580038885, "grad_norm": 0.07050889730453491, "learning_rate": 0.00012191367748323508, "loss": 0.3027, "step": 22288 }, { "epoch": 1.805654569021387, "grad_norm": 0.05229153111577034, "learning_rate": 0.00012190917683064045, "loss": 0.2309, "step": 22289 }, { "epoch": 1.8057355800388852, "grad_norm": 0.06367629021406174, "learning_rate": 0.00012190467617804582, "loss": 0.2515, "step": 22290 }, { "epoch": 1.8058165910563837, "grad_norm": 0.06692226976156235, "learning_rate": 0.00012190017552545118, "loss": 0.2764, "step": 22291 }, { "epoch": 1.8058976020738822, "grad_norm": 0.05514880269765854, "learning_rate": 0.00012189567487285658, "loss": 0.2416, "step": 22292 }, { "epoch": 1.8059786130913804, "grad_norm": 0.07039035856723785, "learning_rate": 0.00012189117422026194, "loss": 0.3171, "step": 22293 }, { "epoch": 1.8060596241088787, "grad_norm": 0.06593021005392075, "learning_rate": 0.00012188667356766732, "loss": 0.2675, "step": 22294 }, { "epoch": 1.8061406351263773, "grad_norm": 0.059571146965026855, "learning_rate": 0.00012188217291507269, "loss": 0.3039, "step": 22295 }, { "epoch": 1.8062216461438756, "grad_norm": 0.0600927360355854, "learning_rate": 0.00012187767226247807, "loss": 0.256, "step": 22296 }, { "epoch": 1.8063026571613738, "grad_norm": 0.04874737560749054, "learning_rate": 0.00012187317160988343, "loss": 0.2198, "step": 22297 }, { "epoch": 1.8063836681788723, "grad_norm": 0.05537532642483711, "learning_rate": 0.00012186867095728883, "loss": 0.2644, "step": 22298 }, { "epoch": 1.8064646791963708, "grad_norm": 0.049281734973192215, "learning_rate": 0.00012186417030469419, "loss": 0.2587, "step": 22299 }, { "epoch": 1.806545690213869, "grad_norm": 0.06185423582792282, "learning_rate": 0.00012185966965209956, "loss": 0.2584, "step": 22300 }, { "epoch": 1.8066267012313675, "grad_norm": 0.06315252184867859, "learning_rate": 0.00012185516899950493, "loss": 0.2874, "step": 22301 }, { "epoch": 1.806707712248866, "grad_norm": 0.049367886036634445, "learning_rate": 0.00012185066834691031, "loss": 0.281, "step": 22302 }, { "epoch": 1.8067887232663642, "grad_norm": 0.061743155121803284, "learning_rate": 0.00012184616769431567, "loss": 0.2474, "step": 22303 }, { "epoch": 1.8068697342838627, "grad_norm": 0.05364508181810379, "learning_rate": 0.00012184166704172107, "loss": 0.2641, "step": 22304 }, { "epoch": 1.8069507453013611, "grad_norm": 0.055584825575351715, "learning_rate": 0.00012183716638912643, "loss": 0.2548, "step": 22305 }, { "epoch": 1.8070317563188594, "grad_norm": 0.0605003759264946, "learning_rate": 0.0001218326657365318, "loss": 0.2925, "step": 22306 }, { "epoch": 1.8071127673363576, "grad_norm": 0.05850888788700104, "learning_rate": 0.00012182816508393717, "loss": 0.2722, "step": 22307 }, { "epoch": 1.807193778353856, "grad_norm": 0.05131843313574791, "learning_rate": 0.00012182366443134255, "loss": 0.3039, "step": 22308 }, { "epoch": 1.8072747893713546, "grad_norm": 0.06271613389253616, "learning_rate": 0.00012181916377874791, "loss": 0.2712, "step": 22309 }, { "epoch": 1.8073558003888528, "grad_norm": 0.055650744587183, "learning_rate": 0.00012181466312615331, "loss": 0.259, "step": 22310 }, { "epoch": 1.8074368114063513, "grad_norm": 0.053837329149246216, "learning_rate": 0.00012181016247355867, "loss": 0.3005, "step": 22311 }, { "epoch": 1.8075178224238497, "grad_norm": 0.05122172087430954, "learning_rate": 0.00012180566182096404, "loss": 0.2534, "step": 22312 }, { "epoch": 1.807598833441348, "grad_norm": 0.05293326824903488, "learning_rate": 0.00012180116116836942, "loss": 0.3092, "step": 22313 }, { "epoch": 1.8076798444588464, "grad_norm": 0.05141449719667435, "learning_rate": 0.00012179666051577479, "loss": 0.2599, "step": 22314 }, { "epoch": 1.807760855476345, "grad_norm": 0.04616117104887962, "learning_rate": 0.00012179215986318018, "loss": 0.2346, "step": 22315 }, { "epoch": 1.8078418664938432, "grad_norm": 0.060469768941402435, "learning_rate": 0.00012178765921058555, "loss": 0.2593, "step": 22316 }, { "epoch": 1.8079228775113414, "grad_norm": 0.05977886915206909, "learning_rate": 0.00012178315855799091, "loss": 0.22, "step": 22317 }, { "epoch": 1.80800388852884, "grad_norm": 0.05252216011285782, "learning_rate": 0.00012177865790539628, "loss": 0.2765, "step": 22318 }, { "epoch": 1.8080848995463383, "grad_norm": 0.05202179774641991, "learning_rate": 0.00012177415725280166, "loss": 0.2723, "step": 22319 }, { "epoch": 1.8081659105638366, "grad_norm": 0.0671328529715538, "learning_rate": 0.00012176965660020703, "loss": 0.2777, "step": 22320 }, { "epoch": 1.808246921581335, "grad_norm": 0.06827513873577118, "learning_rate": 0.00012176515594761242, "loss": 0.2782, "step": 22321 }, { "epoch": 1.8083279325988335, "grad_norm": 0.06724611669778824, "learning_rate": 0.00012176065529501779, "loss": 0.2963, "step": 22322 }, { "epoch": 1.8084089436163318, "grad_norm": 0.05954742431640625, "learning_rate": 0.00012175615464242315, "loss": 0.2595, "step": 22323 }, { "epoch": 1.8084899546338302, "grad_norm": 0.055643219500780106, "learning_rate": 0.00012175165398982853, "loss": 0.2942, "step": 22324 }, { "epoch": 1.8085709656513287, "grad_norm": 0.0529947504401207, "learning_rate": 0.0001217471533372339, "loss": 0.2943, "step": 22325 }, { "epoch": 1.808651976668827, "grad_norm": 0.0744687095284462, "learning_rate": 0.00012174265268463927, "loss": 0.2802, "step": 22326 }, { "epoch": 1.8087329876863252, "grad_norm": 0.05510347709059715, "learning_rate": 0.00012173815203204466, "loss": 0.2314, "step": 22327 }, { "epoch": 1.8088139987038239, "grad_norm": 0.05206920579075813, "learning_rate": 0.00012173365137945003, "loss": 0.2583, "step": 22328 }, { "epoch": 1.8088950097213221, "grad_norm": 0.0690377876162529, "learning_rate": 0.00012172915072685541, "loss": 0.323, "step": 22329 }, { "epoch": 1.8089760207388204, "grad_norm": 0.057178206741809845, "learning_rate": 0.00012172465007426077, "loss": 0.271, "step": 22330 }, { "epoch": 1.8090570317563188, "grad_norm": 0.057551007717847824, "learning_rate": 0.00012172014942166614, "loss": 0.2791, "step": 22331 }, { "epoch": 1.8091380427738173, "grad_norm": 0.07293134182691574, "learning_rate": 0.00012171564876907151, "loss": 0.3088, "step": 22332 }, { "epoch": 1.8092190537913155, "grad_norm": 0.06492959707975388, "learning_rate": 0.0001217111481164769, "loss": 0.2929, "step": 22333 }, { "epoch": 1.809300064808814, "grad_norm": 0.048726316541433334, "learning_rate": 0.00012170664746388228, "loss": 0.2456, "step": 22334 }, { "epoch": 1.8093810758263125, "grad_norm": 0.04845815151929855, "learning_rate": 0.00012170214681128765, "loss": 0.2668, "step": 22335 }, { "epoch": 1.8094620868438107, "grad_norm": 0.059659332036972046, "learning_rate": 0.00012169764615869301, "loss": 0.3305, "step": 22336 }, { "epoch": 1.8095430978613092, "grad_norm": 0.04906664416193962, "learning_rate": 0.00012169314550609838, "loss": 0.2406, "step": 22337 }, { "epoch": 1.8096241088788076, "grad_norm": 0.06083128973841667, "learning_rate": 0.00012168864485350377, "loss": 0.2755, "step": 22338 }, { "epoch": 1.809705119896306, "grad_norm": 0.05594184622168541, "learning_rate": 0.00012168414420090914, "loss": 0.2585, "step": 22339 }, { "epoch": 1.8097861309138041, "grad_norm": 0.059506163001060486, "learning_rate": 0.00012167964354831452, "loss": 0.2971, "step": 22340 }, { "epoch": 1.8098671419313026, "grad_norm": 0.05431336164474487, "learning_rate": 0.00012167514289571989, "loss": 0.2766, "step": 22341 }, { "epoch": 1.809948152948801, "grad_norm": 0.05741867795586586, "learning_rate": 0.00012167064224312525, "loss": 0.2573, "step": 22342 }, { "epoch": 1.8100291639662993, "grad_norm": 0.05569988489151001, "learning_rate": 0.00012166614159053062, "loss": 0.2713, "step": 22343 }, { "epoch": 1.8101101749837978, "grad_norm": 0.0600249357521534, "learning_rate": 0.00012166164093793601, "loss": 0.261, "step": 22344 }, { "epoch": 1.8101911860012962, "grad_norm": 0.04880553111433983, "learning_rate": 0.00012165714028534139, "loss": 0.2457, "step": 22345 }, { "epoch": 1.8102721970187945, "grad_norm": 0.05315748229622841, "learning_rate": 0.00012165263963274676, "loss": 0.2952, "step": 22346 }, { "epoch": 1.810353208036293, "grad_norm": 0.04898060858249664, "learning_rate": 0.00012164813898015213, "loss": 0.2647, "step": 22347 }, { "epoch": 1.8104342190537914, "grad_norm": 0.05683436244726181, "learning_rate": 0.00012164363832755749, "loss": 0.2885, "step": 22348 }, { "epoch": 1.8105152300712897, "grad_norm": 0.05105239525437355, "learning_rate": 0.00012163913767496287, "loss": 0.26, "step": 22349 }, { "epoch": 1.810596241088788, "grad_norm": 0.05811845883727074, "learning_rate": 0.00012163463702236825, "loss": 0.2592, "step": 22350 }, { "epoch": 1.8106772521062866, "grad_norm": 0.06090196967124939, "learning_rate": 0.00012163013636977363, "loss": 0.2933, "step": 22351 }, { "epoch": 1.8107582631237849, "grad_norm": 0.05475502833724022, "learning_rate": 0.000121625635717179, "loss": 0.2608, "step": 22352 }, { "epoch": 1.810839274141283, "grad_norm": 0.050672296434640884, "learning_rate": 0.00012162113506458437, "loss": 0.2541, "step": 22353 }, { "epoch": 1.8109202851587816, "grad_norm": 0.06462455540895462, "learning_rate": 0.00012161663441198973, "loss": 0.3082, "step": 22354 }, { "epoch": 1.81100129617628, "grad_norm": 0.05794956535100937, "learning_rate": 0.00012161213375939511, "loss": 0.3007, "step": 22355 }, { "epoch": 1.8110823071937783, "grad_norm": 0.06365778297185898, "learning_rate": 0.0001216076331068005, "loss": 0.2515, "step": 22356 }, { "epoch": 1.8111633182112767, "grad_norm": 0.06921739131212234, "learning_rate": 0.00012160313245420587, "loss": 0.2999, "step": 22357 }, { "epoch": 1.8112443292287752, "grad_norm": 0.05210886895656586, "learning_rate": 0.00012159863180161124, "loss": 0.2865, "step": 22358 }, { "epoch": 1.8113253402462735, "grad_norm": 0.053139787167310715, "learning_rate": 0.00012159413114901662, "loss": 0.2675, "step": 22359 }, { "epoch": 1.811406351263772, "grad_norm": 0.06554105877876282, "learning_rate": 0.00012158963049642198, "loss": 0.3042, "step": 22360 }, { "epoch": 1.8114873622812704, "grad_norm": 0.05704993009567261, "learning_rate": 0.00012158512984382735, "loss": 0.2726, "step": 22361 }, { "epoch": 1.8115683732987686, "grad_norm": 0.06123867630958557, "learning_rate": 0.00012158062919123274, "loss": 0.2919, "step": 22362 }, { "epoch": 1.8116493843162669, "grad_norm": 0.04844033345580101, "learning_rate": 0.00012157612853863811, "loss": 0.2447, "step": 22363 }, { "epoch": 1.8117303953337653, "grad_norm": 0.0578579381108284, "learning_rate": 0.00012157162788604348, "loss": 0.3067, "step": 22364 }, { "epoch": 1.8118114063512638, "grad_norm": 0.061319511383771896, "learning_rate": 0.00012156712723344886, "loss": 0.2953, "step": 22365 }, { "epoch": 1.811892417368762, "grad_norm": 0.0670505166053772, "learning_rate": 0.00012156262658085422, "loss": 0.2841, "step": 22366 }, { "epoch": 1.8119734283862605, "grad_norm": 0.05111420527100563, "learning_rate": 0.00012155812592825962, "loss": 0.2544, "step": 22367 }, { "epoch": 1.812054439403759, "grad_norm": 0.05738413333892822, "learning_rate": 0.00012155362527566498, "loss": 0.261, "step": 22368 }, { "epoch": 1.8121354504212572, "grad_norm": 0.06913864612579346, "learning_rate": 0.00012154912462307035, "loss": 0.319, "step": 22369 }, { "epoch": 1.8122164614387557, "grad_norm": 0.06058168411254883, "learning_rate": 0.00012154462397047573, "loss": 0.2474, "step": 22370 }, { "epoch": 1.8122974724562542, "grad_norm": 0.06141381338238716, "learning_rate": 0.0001215401233178811, "loss": 0.2828, "step": 22371 }, { "epoch": 1.8123784834737524, "grad_norm": 0.05288195610046387, "learning_rate": 0.00012153562266528646, "loss": 0.2572, "step": 22372 }, { "epoch": 1.8124594944912507, "grad_norm": 0.06417234987020493, "learning_rate": 0.00012153112201269186, "loss": 0.3048, "step": 22373 }, { "epoch": 1.8125405055087493, "grad_norm": 0.06014291197061539, "learning_rate": 0.00012152662136009722, "loss": 0.2615, "step": 22374 }, { "epoch": 1.8126215165262476, "grad_norm": 0.050298839807510376, "learning_rate": 0.0001215221207075026, "loss": 0.2565, "step": 22375 }, { "epoch": 1.8127025275437458, "grad_norm": 0.057888805866241455, "learning_rate": 0.00012151762005490797, "loss": 0.2453, "step": 22376 }, { "epoch": 1.8127835385612443, "grad_norm": 0.05726255103945732, "learning_rate": 0.00012151311940231334, "loss": 0.2976, "step": 22377 }, { "epoch": 1.8128645495787428, "grad_norm": 0.04736103117465973, "learning_rate": 0.0001215086187497187, "loss": 0.2447, "step": 22378 }, { "epoch": 1.812945560596241, "grad_norm": 0.05614374577999115, "learning_rate": 0.0001215041180971241, "loss": 0.3026, "step": 22379 }, { "epoch": 1.8130265716137395, "grad_norm": 0.059921517968177795, "learning_rate": 0.00012149961744452946, "loss": 0.3009, "step": 22380 }, { "epoch": 1.813107582631238, "grad_norm": 0.058286361396312714, "learning_rate": 0.00012149511679193484, "loss": 0.2828, "step": 22381 }, { "epoch": 1.8131885936487362, "grad_norm": 0.050801943987607956, "learning_rate": 0.00012149061613934021, "loss": 0.2702, "step": 22382 }, { "epoch": 1.8132696046662347, "grad_norm": 0.05966091528534889, "learning_rate": 0.00012148611548674558, "loss": 0.2639, "step": 22383 }, { "epoch": 1.8133506156837331, "grad_norm": 0.055925920605659485, "learning_rate": 0.00012148161483415094, "loss": 0.2673, "step": 22384 }, { "epoch": 1.8134316267012314, "grad_norm": 0.056826021522283554, "learning_rate": 0.00012147711418155634, "loss": 0.3034, "step": 22385 }, { "epoch": 1.8135126377187296, "grad_norm": 0.054868802428245544, "learning_rate": 0.0001214726135289617, "loss": 0.2957, "step": 22386 }, { "epoch": 1.813593648736228, "grad_norm": 0.06180058419704437, "learning_rate": 0.00012146811287636708, "loss": 0.2785, "step": 22387 }, { "epoch": 1.8136746597537265, "grad_norm": 0.04991845041513443, "learning_rate": 0.00012146361222377245, "loss": 0.2504, "step": 22388 }, { "epoch": 1.8137556707712248, "grad_norm": 0.059130534529685974, "learning_rate": 0.00012145911157117782, "loss": 0.2916, "step": 22389 }, { "epoch": 1.8138366817887233, "grad_norm": 0.06077086552977562, "learning_rate": 0.00012145461091858318, "loss": 0.2737, "step": 22390 }, { "epoch": 1.8139176928062217, "grad_norm": 0.061018723994493484, "learning_rate": 0.00012145011026598858, "loss": 0.2709, "step": 22391 }, { "epoch": 1.81399870382372, "grad_norm": 0.0630197674036026, "learning_rate": 0.00012144560961339394, "loss": 0.2471, "step": 22392 }, { "epoch": 1.8140797148412184, "grad_norm": 0.05346192419528961, "learning_rate": 0.00012144110896079932, "loss": 0.2635, "step": 22393 }, { "epoch": 1.814160725858717, "grad_norm": 0.05909271538257599, "learning_rate": 0.00012143660830820469, "loss": 0.2552, "step": 22394 }, { "epoch": 1.8142417368762151, "grad_norm": 0.061077363789081573, "learning_rate": 0.00012143210765561007, "loss": 0.2628, "step": 22395 }, { "epoch": 1.8143227478937134, "grad_norm": 0.0485931932926178, "learning_rate": 0.00012142760700301545, "loss": 0.2526, "step": 22396 }, { "epoch": 1.814403758911212, "grad_norm": 0.07086597383022308, "learning_rate": 0.00012142310635042083, "loss": 0.3017, "step": 22397 }, { "epoch": 1.8144847699287103, "grad_norm": 0.05354897305369377, "learning_rate": 0.0001214186056978262, "loss": 0.2934, "step": 22398 }, { "epoch": 1.8145657809462086, "grad_norm": 0.060347460210323334, "learning_rate": 0.00012141410504523156, "loss": 0.2427, "step": 22399 }, { "epoch": 1.814646791963707, "grad_norm": 0.07105302065610886, "learning_rate": 0.00012140960439263693, "loss": 0.3205, "step": 22400 }, { "epoch": 1.8147278029812055, "grad_norm": 0.07145987451076508, "learning_rate": 0.00012140510374004231, "loss": 0.3012, "step": 22401 }, { "epoch": 1.8148088139987038, "grad_norm": 0.05859648063778877, "learning_rate": 0.0001214006030874477, "loss": 0.2454, "step": 22402 }, { "epoch": 1.8148898250162022, "grad_norm": 0.06137736886739731, "learning_rate": 0.00012139610243485307, "loss": 0.2506, "step": 22403 }, { "epoch": 1.8149708360337007, "grad_norm": 0.0615663081407547, "learning_rate": 0.00012139160178225844, "loss": 0.2891, "step": 22404 }, { "epoch": 1.815051847051199, "grad_norm": 0.07232252508401871, "learning_rate": 0.0001213871011296638, "loss": 0.2762, "step": 22405 }, { "epoch": 1.8151328580686974, "grad_norm": 0.04825594276189804, "learning_rate": 0.00012138260047706918, "loss": 0.2539, "step": 22406 }, { "epoch": 1.8152138690861959, "grad_norm": 0.061036497354507446, "learning_rate": 0.00012137809982447455, "loss": 0.2656, "step": 22407 }, { "epoch": 1.815294880103694, "grad_norm": 0.04653603583574295, "learning_rate": 0.00012137359917187994, "loss": 0.2401, "step": 22408 }, { "epoch": 1.8153758911211924, "grad_norm": 0.05841079726815224, "learning_rate": 0.00012136909851928531, "loss": 0.2981, "step": 22409 }, { "epoch": 1.8154569021386908, "grad_norm": 0.05416569858789444, "learning_rate": 0.00012136459786669068, "loss": 0.2842, "step": 22410 }, { "epoch": 1.8155379131561893, "grad_norm": 0.05413185432553291, "learning_rate": 0.00012136009721409604, "loss": 0.2934, "step": 22411 }, { "epoch": 1.8156189241736875, "grad_norm": 0.05708204209804535, "learning_rate": 0.00012135559656150142, "loss": 0.2823, "step": 22412 }, { "epoch": 1.815699935191186, "grad_norm": 0.05461487919092178, "learning_rate": 0.00012135109590890679, "loss": 0.2544, "step": 22413 }, { "epoch": 1.8157809462086845, "grad_norm": 0.05890343710780144, "learning_rate": 0.00012134659525631218, "loss": 0.2831, "step": 22414 }, { "epoch": 1.8158619572261827, "grad_norm": 0.04829421639442444, "learning_rate": 0.00012134209460371755, "loss": 0.2755, "step": 22415 }, { "epoch": 1.8159429682436812, "grad_norm": 0.05846787244081497, "learning_rate": 0.00012133759395112292, "loss": 0.2883, "step": 22416 }, { "epoch": 1.8160239792611796, "grad_norm": 0.05573653057217598, "learning_rate": 0.00012133309329852828, "loss": 0.2377, "step": 22417 }, { "epoch": 1.8161049902786779, "grad_norm": 0.05826255679130554, "learning_rate": 0.00012132859264593366, "loss": 0.2521, "step": 22418 }, { "epoch": 1.8161860012961761, "grad_norm": 0.07741843163967133, "learning_rate": 0.00012132409199333905, "loss": 0.3201, "step": 22419 }, { "epoch": 1.8162670123136748, "grad_norm": 0.058812353760004044, "learning_rate": 0.00012131959134074442, "loss": 0.2444, "step": 22420 }, { "epoch": 1.816348023331173, "grad_norm": 0.05757759511470795, "learning_rate": 0.00012131509068814979, "loss": 0.2913, "step": 22421 }, { "epoch": 1.8164290343486713, "grad_norm": 0.06558262556791306, "learning_rate": 0.00012131059003555517, "loss": 0.2681, "step": 22422 }, { "epoch": 1.8165100453661698, "grad_norm": 0.058929912745952606, "learning_rate": 0.00012130608938296053, "loss": 0.3001, "step": 22423 }, { "epoch": 1.8165910563836682, "grad_norm": 0.055346451699733734, "learning_rate": 0.0001213015887303659, "loss": 0.2694, "step": 22424 }, { "epoch": 1.8166720674011665, "grad_norm": 0.05668604001402855, "learning_rate": 0.00012129708807777129, "loss": 0.3082, "step": 22425 }, { "epoch": 1.816753078418665, "grad_norm": 0.06419747322797775, "learning_rate": 0.00012129258742517666, "loss": 0.3171, "step": 22426 }, { "epoch": 1.8168340894361634, "grad_norm": 0.05885817110538483, "learning_rate": 0.00012128808677258203, "loss": 0.3019, "step": 22427 }, { "epoch": 1.8169151004536617, "grad_norm": 0.05111012980341911, "learning_rate": 0.00012128358611998741, "loss": 0.2704, "step": 22428 }, { "epoch": 1.81699611147116, "grad_norm": 0.07075147330760956, "learning_rate": 0.00012127908546739277, "loss": 0.307, "step": 22429 }, { "epoch": 1.8170771224886586, "grad_norm": 0.058915186673402786, "learning_rate": 0.00012127458481479814, "loss": 0.2834, "step": 22430 }, { "epoch": 1.8171581335061568, "grad_norm": 0.051177240908145905, "learning_rate": 0.00012127008416220353, "loss": 0.2605, "step": 22431 }, { "epoch": 1.817239144523655, "grad_norm": 0.06430277973413467, "learning_rate": 0.0001212655835096089, "loss": 0.2775, "step": 22432 }, { "epoch": 1.8173201555411536, "grad_norm": 0.05540946498513222, "learning_rate": 0.00012126108285701428, "loss": 0.286, "step": 22433 }, { "epoch": 1.817401166558652, "grad_norm": 0.05893925949931145, "learning_rate": 0.00012125658220441965, "loss": 0.311, "step": 22434 }, { "epoch": 1.8174821775761503, "grad_norm": 0.06361778825521469, "learning_rate": 0.00012125208155182501, "loss": 0.2978, "step": 22435 }, { "epoch": 1.8175631885936487, "grad_norm": 0.05619451403617859, "learning_rate": 0.00012124758089923038, "loss": 0.2462, "step": 22436 }, { "epoch": 1.8176441996111472, "grad_norm": 0.06951126456260681, "learning_rate": 0.00012124308024663577, "loss": 0.309, "step": 22437 }, { "epoch": 1.8177252106286454, "grad_norm": 0.06178022921085358, "learning_rate": 0.00012123857959404114, "loss": 0.2908, "step": 22438 }, { "epoch": 1.817806221646144, "grad_norm": 0.0636647567152977, "learning_rate": 0.00012123407894144652, "loss": 0.2445, "step": 22439 }, { "epoch": 1.8178872326636424, "grad_norm": 0.07269550859928131, "learning_rate": 0.00012122957828885189, "loss": 0.3106, "step": 22440 }, { "epoch": 1.8179682436811406, "grad_norm": 0.05728549882769585, "learning_rate": 0.00012122507763625725, "loss": 0.2696, "step": 22441 }, { "epoch": 1.8180492546986389, "grad_norm": 0.05145451799035072, "learning_rate": 0.00012122057698366262, "loss": 0.274, "step": 22442 }, { "epoch": 1.8181302657161373, "grad_norm": 0.04999382793903351, "learning_rate": 0.00012121607633106801, "loss": 0.2461, "step": 22443 }, { "epoch": 1.8182112767336358, "grad_norm": 0.04859451577067375, "learning_rate": 0.00012121157567847339, "loss": 0.2628, "step": 22444 }, { "epoch": 1.818292287751134, "grad_norm": 0.049572959542274475, "learning_rate": 0.00012120707502587876, "loss": 0.2608, "step": 22445 }, { "epoch": 1.8183732987686325, "grad_norm": 0.05134311690926552, "learning_rate": 0.00012120257437328413, "loss": 0.2482, "step": 22446 }, { "epoch": 1.818454309786131, "grad_norm": 0.0611451230943203, "learning_rate": 0.00012119807372068949, "loss": 0.2283, "step": 22447 }, { "epoch": 1.8185353208036292, "grad_norm": 0.052968040108680725, "learning_rate": 0.0001211935730680949, "loss": 0.2263, "step": 22448 }, { "epoch": 1.8186163318211277, "grad_norm": 0.05763326212763786, "learning_rate": 0.00012118907241550025, "loss": 0.2637, "step": 22449 }, { "epoch": 1.8186973428386262, "grad_norm": 0.05025547742843628, "learning_rate": 0.00012118457176290563, "loss": 0.3065, "step": 22450 }, { "epoch": 1.8187783538561244, "grad_norm": 0.07037966698408127, "learning_rate": 0.000121180071110311, "loss": 0.2658, "step": 22451 }, { "epoch": 1.8188593648736227, "grad_norm": 0.07079332321882248, "learning_rate": 0.00012117557045771637, "loss": 0.3103, "step": 22452 }, { "epoch": 1.8189403758911213, "grad_norm": 0.05972728133201599, "learning_rate": 0.00012117106980512173, "loss": 0.3073, "step": 22453 }, { "epoch": 1.8190213869086196, "grad_norm": 0.05895204469561577, "learning_rate": 0.00012116656915252714, "loss": 0.2826, "step": 22454 }, { "epoch": 1.8191023979261178, "grad_norm": 0.0534057579934597, "learning_rate": 0.0001211620684999325, "loss": 0.2624, "step": 22455 }, { "epoch": 1.8191834089436163, "grad_norm": 0.05191171541810036, "learning_rate": 0.00012115756784733787, "loss": 0.273, "step": 22456 }, { "epoch": 1.8192644199611148, "grad_norm": 0.06158572435379028, "learning_rate": 0.00012115306719474324, "loss": 0.3172, "step": 22457 }, { "epoch": 1.819345430978613, "grad_norm": 0.05240388587117195, "learning_rate": 0.00012114856654214862, "loss": 0.2498, "step": 22458 }, { "epoch": 1.8194264419961115, "grad_norm": 0.06356768310070038, "learning_rate": 0.00012114406588955398, "loss": 0.3298, "step": 22459 }, { "epoch": 1.81950745301361, "grad_norm": 0.06706894189119339, "learning_rate": 0.00012113956523695938, "loss": 0.3273, "step": 22460 }, { "epoch": 1.8195884640311082, "grad_norm": 0.05673682317137718, "learning_rate": 0.00012113506458436474, "loss": 0.2624, "step": 22461 }, { "epoch": 1.8196694750486067, "grad_norm": 0.05635488033294678, "learning_rate": 0.00012113056393177011, "loss": 0.2737, "step": 22462 }, { "epoch": 1.8197504860661051, "grad_norm": 0.060851920396089554, "learning_rate": 0.00012112606327917548, "loss": 0.2629, "step": 22463 }, { "epoch": 1.8198314970836034, "grad_norm": 0.06031657010316849, "learning_rate": 0.00012112156262658086, "loss": 0.2957, "step": 22464 }, { "epoch": 1.8199125081011016, "grad_norm": 0.06104261055588722, "learning_rate": 0.00012111706197398622, "loss": 0.308, "step": 22465 }, { "epoch": 1.8199935191186, "grad_norm": 0.05476023256778717, "learning_rate": 0.00012111256132139162, "loss": 0.2818, "step": 22466 }, { "epoch": 1.8200745301360985, "grad_norm": 0.046863965690135956, "learning_rate": 0.00012110806066879699, "loss": 0.2285, "step": 22467 }, { "epoch": 1.8201555411535968, "grad_norm": 0.06625159829854965, "learning_rate": 0.00012110356001620235, "loss": 0.2851, "step": 22468 }, { "epoch": 1.8202365521710953, "grad_norm": 0.05833403021097183, "learning_rate": 0.00012109905936360773, "loss": 0.2991, "step": 22469 }, { "epoch": 1.8203175631885937, "grad_norm": 0.06144651770591736, "learning_rate": 0.0001210945587110131, "loss": 0.2788, "step": 22470 }, { "epoch": 1.820398574206092, "grad_norm": 0.06062743067741394, "learning_rate": 0.00012109005805841849, "loss": 0.2588, "step": 22471 }, { "epoch": 1.8204795852235904, "grad_norm": 0.07043924182653427, "learning_rate": 0.00012108555740582386, "loss": 0.2513, "step": 22472 }, { "epoch": 1.820560596241089, "grad_norm": 0.06090352311730385, "learning_rate": 0.00012108105675322923, "loss": 0.2469, "step": 22473 }, { "epoch": 1.8206416072585871, "grad_norm": 0.0642428919672966, "learning_rate": 0.0001210765561006346, "loss": 0.2853, "step": 22474 }, { "epoch": 1.8207226182760854, "grad_norm": 0.07086139917373657, "learning_rate": 0.00012107205544803997, "loss": 0.2791, "step": 22475 }, { "epoch": 1.820803629293584, "grad_norm": 0.05878929793834686, "learning_rate": 0.00012106755479544534, "loss": 0.2854, "step": 22476 }, { "epoch": 1.8208846403110823, "grad_norm": 0.07627884298563004, "learning_rate": 0.00012106305414285073, "loss": 0.287, "step": 22477 }, { "epoch": 1.8209656513285806, "grad_norm": 0.06491941958665848, "learning_rate": 0.0001210585534902561, "loss": 0.2926, "step": 22478 }, { "epoch": 1.821046662346079, "grad_norm": 0.06161245331168175, "learning_rate": 0.00012105405283766148, "loss": 0.2606, "step": 22479 }, { "epoch": 1.8211276733635775, "grad_norm": 0.060672350227832794, "learning_rate": 0.00012104955218506684, "loss": 0.3011, "step": 22480 }, { "epoch": 1.8212086843810757, "grad_norm": 0.054367486387491226, "learning_rate": 0.00012104505153247221, "loss": 0.2692, "step": 22481 }, { "epoch": 1.8212896953985742, "grad_norm": 0.05251113697886467, "learning_rate": 0.00012104055087987758, "loss": 0.2704, "step": 22482 }, { "epoch": 1.8213707064160727, "grad_norm": 0.058134134858846664, "learning_rate": 0.00012103605022728297, "loss": 0.2472, "step": 22483 }, { "epoch": 1.821451717433571, "grad_norm": 0.05237071216106415, "learning_rate": 0.00012103154957468834, "loss": 0.2662, "step": 22484 }, { "epoch": 1.8215327284510694, "grad_norm": 0.05184619873762131, "learning_rate": 0.00012102704892209372, "loss": 0.2549, "step": 22485 }, { "epoch": 1.8216137394685679, "grad_norm": 0.060612794011831284, "learning_rate": 0.00012102254826949908, "loss": 0.2947, "step": 22486 }, { "epoch": 1.821694750486066, "grad_norm": 0.057740937918424606, "learning_rate": 0.00012101804761690445, "loss": 0.2396, "step": 22487 }, { "epoch": 1.8217757615035644, "grad_norm": 0.0475936233997345, "learning_rate": 0.00012101354696430982, "loss": 0.3043, "step": 22488 }, { "epoch": 1.8218567725210628, "grad_norm": 0.052612967789173126, "learning_rate": 0.00012100904631171521, "loss": 0.2571, "step": 22489 }, { "epoch": 1.8219377835385613, "grad_norm": 0.04497424140572548, "learning_rate": 0.00012100454565912058, "loss": 0.2296, "step": 22490 }, { "epoch": 1.8220187945560595, "grad_norm": 0.054526977241039276, "learning_rate": 0.00012100004500652596, "loss": 0.2669, "step": 22491 }, { "epoch": 1.822099805573558, "grad_norm": 0.06448981910943985, "learning_rate": 0.00012099554435393132, "loss": 0.2401, "step": 22492 }, { "epoch": 1.8221808165910565, "grad_norm": 0.05319571495056152, "learning_rate": 0.00012099104370133669, "loss": 0.2617, "step": 22493 }, { "epoch": 1.8222618276085547, "grad_norm": 0.056535281240940094, "learning_rate": 0.00012098654304874207, "loss": 0.2707, "step": 22494 }, { "epoch": 1.8223428386260532, "grad_norm": 0.05342821404337883, "learning_rate": 0.00012098204239614745, "loss": 0.2579, "step": 22495 }, { "epoch": 1.8224238496435516, "grad_norm": 0.041714731603860855, "learning_rate": 0.00012097754174355283, "loss": 0.2334, "step": 22496 }, { "epoch": 1.8225048606610499, "grad_norm": 0.06172311678528786, "learning_rate": 0.0001209730410909582, "loss": 0.2553, "step": 22497 }, { "epoch": 1.8225858716785481, "grad_norm": 0.06267467141151428, "learning_rate": 0.00012096854043836356, "loss": 0.2719, "step": 22498 }, { "epoch": 1.8226668826960468, "grad_norm": 0.0709090530872345, "learning_rate": 0.00012096403978576893, "loss": 0.2472, "step": 22499 }, { "epoch": 1.822747893713545, "grad_norm": 0.06478728353977203, "learning_rate": 0.00012095953913317432, "loss": 0.2908, "step": 22500 }, { "epoch": 1.8228289047310433, "grad_norm": 0.053202226758003235, "learning_rate": 0.0001209550384805797, "loss": 0.2836, "step": 22501 }, { "epoch": 1.8229099157485418, "grad_norm": 0.06402086466550827, "learning_rate": 0.00012095053782798507, "loss": 0.2871, "step": 22502 }, { "epoch": 1.8229909267660402, "grad_norm": 0.04878729209303856, "learning_rate": 0.00012094603717539044, "loss": 0.2367, "step": 22503 }, { "epoch": 1.8230719377835385, "grad_norm": 0.062460657209157944, "learning_rate": 0.0001209415365227958, "loss": 0.2806, "step": 22504 }, { "epoch": 1.823152948801037, "grad_norm": 0.0667148008942604, "learning_rate": 0.00012093703587020118, "loss": 0.2773, "step": 22505 }, { "epoch": 1.8232339598185354, "grad_norm": 0.0478152334690094, "learning_rate": 0.00012093253521760656, "loss": 0.2446, "step": 22506 }, { "epoch": 1.8233149708360337, "grad_norm": 0.0614163763821125, "learning_rate": 0.00012092803456501194, "loss": 0.2799, "step": 22507 }, { "epoch": 1.8233959818535321, "grad_norm": 0.05350629240274429, "learning_rate": 0.00012092353391241731, "loss": 0.2635, "step": 22508 }, { "epoch": 1.8234769928710306, "grad_norm": 0.053439583629369736, "learning_rate": 0.00012091903325982268, "loss": 0.2455, "step": 22509 }, { "epoch": 1.8235580038885288, "grad_norm": 0.059095874428749084, "learning_rate": 0.00012091453260722804, "loss": 0.3058, "step": 22510 }, { "epoch": 1.823639014906027, "grad_norm": 0.06233348697423935, "learning_rate": 0.00012091003195463342, "loss": 0.3011, "step": 22511 }, { "epoch": 1.8237200259235256, "grad_norm": 0.05984274297952652, "learning_rate": 0.0001209055313020388, "loss": 0.2479, "step": 22512 }, { "epoch": 1.823801036941024, "grad_norm": 0.06020957976579666, "learning_rate": 0.00012090103064944418, "loss": 0.2889, "step": 22513 }, { "epoch": 1.8238820479585223, "grad_norm": 0.06470814347267151, "learning_rate": 0.00012089652999684955, "loss": 0.3046, "step": 22514 }, { "epoch": 1.8239630589760207, "grad_norm": 0.06276507675647736, "learning_rate": 0.00012089202934425493, "loss": 0.282, "step": 22515 }, { "epoch": 1.8240440699935192, "grad_norm": 0.061302684247493744, "learning_rate": 0.00012088752869166029, "loss": 0.3058, "step": 22516 }, { "epoch": 1.8241250810110174, "grad_norm": 0.07292281836271286, "learning_rate": 0.00012088302803906566, "loss": 0.2499, "step": 22517 }, { "epoch": 1.824206092028516, "grad_norm": 0.05192513391375542, "learning_rate": 0.00012087852738647105, "loss": 0.2858, "step": 22518 }, { "epoch": 1.8242871030460144, "grad_norm": 0.06113511696457863, "learning_rate": 0.00012087402673387642, "loss": 0.2708, "step": 22519 }, { "epoch": 1.8243681140635126, "grad_norm": 0.06051720306277275, "learning_rate": 0.00012086952608128179, "loss": 0.2703, "step": 22520 }, { "epoch": 1.8244491250810109, "grad_norm": 0.05868927389383316, "learning_rate": 0.00012086502542868717, "loss": 0.2879, "step": 22521 }, { "epoch": 1.8245301360985096, "grad_norm": 0.05062629282474518, "learning_rate": 0.00012086052477609253, "loss": 0.2258, "step": 22522 }, { "epoch": 1.8246111471160078, "grad_norm": 0.061690833419561386, "learning_rate": 0.00012085602412349793, "loss": 0.2863, "step": 22523 }, { "epoch": 1.824692158133506, "grad_norm": 0.05033199116587639, "learning_rate": 0.00012085152347090329, "loss": 0.2558, "step": 22524 }, { "epoch": 1.8247731691510045, "grad_norm": 0.06216473504900932, "learning_rate": 0.00012084702281830866, "loss": 0.3475, "step": 22525 }, { "epoch": 1.824854180168503, "grad_norm": 0.05927857756614685, "learning_rate": 0.00012084252216571403, "loss": 0.2856, "step": 22526 }, { "epoch": 1.8249351911860012, "grad_norm": 0.05006115138530731, "learning_rate": 0.00012083802151311941, "loss": 0.2688, "step": 22527 }, { "epoch": 1.8250162022034997, "grad_norm": 0.05684928968548775, "learning_rate": 0.00012083352086052477, "loss": 0.2859, "step": 22528 }, { "epoch": 1.8250972132209982, "grad_norm": 0.06044033542275429, "learning_rate": 0.00012082902020793017, "loss": 0.2736, "step": 22529 }, { "epoch": 1.8251782242384964, "grad_norm": 0.0559692308306694, "learning_rate": 0.00012082451955533553, "loss": 0.229, "step": 22530 }, { "epoch": 1.8252592352559946, "grad_norm": 0.05422244593501091, "learning_rate": 0.0001208200189027409, "loss": 0.3124, "step": 22531 }, { "epoch": 1.8253402462734933, "grad_norm": 0.04698093235492706, "learning_rate": 0.00012081551825014628, "loss": 0.2666, "step": 22532 }, { "epoch": 1.8254212572909916, "grad_norm": 0.048311080783605576, "learning_rate": 0.00012081101759755165, "loss": 0.236, "step": 22533 }, { "epoch": 1.8255022683084898, "grad_norm": 0.05981531739234924, "learning_rate": 0.00012080651694495701, "loss": 0.2503, "step": 22534 }, { "epoch": 1.8255832793259883, "grad_norm": 0.0633094534277916, "learning_rate": 0.00012080201629236241, "loss": 0.2585, "step": 22535 }, { "epoch": 1.8256642903434868, "grad_norm": 0.05595608428120613, "learning_rate": 0.00012079751563976778, "loss": 0.2563, "step": 22536 }, { "epoch": 1.825745301360985, "grad_norm": 0.055708158761262894, "learning_rate": 0.00012079301498717314, "loss": 0.2438, "step": 22537 }, { "epoch": 1.8258263123784835, "grad_norm": 0.054020561277866364, "learning_rate": 0.00012078851433457852, "loss": 0.2861, "step": 22538 }, { "epoch": 1.825907323395982, "grad_norm": 0.0549539290368557, "learning_rate": 0.00012078401368198389, "loss": 0.2523, "step": 22539 }, { "epoch": 1.8259883344134802, "grad_norm": 0.04126188904047012, "learning_rate": 0.00012077951302938925, "loss": 0.2345, "step": 22540 }, { "epoch": 1.8260693454309787, "grad_norm": 0.05515645444393158, "learning_rate": 0.00012077501237679465, "loss": 0.2437, "step": 22541 }, { "epoch": 1.8261503564484771, "grad_norm": 0.05556858330965042, "learning_rate": 0.00012077051172420003, "loss": 0.2532, "step": 22542 }, { "epoch": 1.8262313674659754, "grad_norm": 0.05830145254731178, "learning_rate": 0.00012076601107160539, "loss": 0.2999, "step": 22543 }, { "epoch": 1.8263123784834736, "grad_norm": 0.05237169936299324, "learning_rate": 0.00012076151041901076, "loss": 0.2503, "step": 22544 }, { "epoch": 1.8263933895009723, "grad_norm": 0.056736260652542114, "learning_rate": 0.00012075700976641613, "loss": 0.254, "step": 22545 }, { "epoch": 1.8264744005184705, "grad_norm": 0.055458515882492065, "learning_rate": 0.00012075250911382149, "loss": 0.2374, "step": 22546 }, { "epoch": 1.8265554115359688, "grad_norm": 0.05906181037425995, "learning_rate": 0.0001207480084612269, "loss": 0.2214, "step": 22547 }, { "epoch": 1.8266364225534673, "grad_norm": 0.05620548501610756, "learning_rate": 0.00012074350780863227, "loss": 0.3144, "step": 22548 }, { "epoch": 1.8267174335709657, "grad_norm": 0.065769724547863, "learning_rate": 0.00012073900715603763, "loss": 0.2542, "step": 22549 }, { "epoch": 1.826798444588464, "grad_norm": 0.06099202483892441, "learning_rate": 0.000120734506503443, "loss": 0.2195, "step": 22550 }, { "epoch": 1.8268794556059624, "grad_norm": 0.053566206246614456, "learning_rate": 0.00012073000585084837, "loss": 0.2438, "step": 22551 }, { "epoch": 1.826960466623461, "grad_norm": 0.06798861920833588, "learning_rate": 0.00012072550519825376, "loss": 0.2738, "step": 22552 }, { "epoch": 1.8270414776409591, "grad_norm": 0.05625709891319275, "learning_rate": 0.00012072100454565914, "loss": 0.2551, "step": 22553 }, { "epoch": 1.8271224886584574, "grad_norm": 0.06199916824698448, "learning_rate": 0.00012071650389306451, "loss": 0.2348, "step": 22554 }, { "epoch": 1.827203499675956, "grad_norm": 0.05430760234594345, "learning_rate": 0.00012071200324046987, "loss": 0.2629, "step": 22555 }, { "epoch": 1.8272845106934543, "grad_norm": 0.07473081350326538, "learning_rate": 0.00012070750258787524, "loss": 0.3294, "step": 22556 }, { "epoch": 1.8273655217109526, "grad_norm": 0.06008787825703621, "learning_rate": 0.00012070300193528062, "loss": 0.2457, "step": 22557 }, { "epoch": 1.827446532728451, "grad_norm": 0.06692630052566528, "learning_rate": 0.000120698501282686, "loss": 0.2728, "step": 22558 }, { "epoch": 1.8275275437459495, "grad_norm": 0.06054568663239479, "learning_rate": 0.00012069400063009138, "loss": 0.2718, "step": 22559 }, { "epoch": 1.8276085547634477, "grad_norm": 0.06602507084608078, "learning_rate": 0.00012068949997749675, "loss": 0.2654, "step": 22560 }, { "epoch": 1.8276895657809462, "grad_norm": 0.04704922437667847, "learning_rate": 0.00012068499932490211, "loss": 0.24, "step": 22561 }, { "epoch": 1.8277705767984447, "grad_norm": 0.06324727833271027, "learning_rate": 0.00012068049867230748, "loss": 0.2936, "step": 22562 }, { "epoch": 1.827851587815943, "grad_norm": 0.060656383633613586, "learning_rate": 0.00012067599801971286, "loss": 0.293, "step": 22563 }, { "epoch": 1.8279325988334414, "grad_norm": 0.05807242542505264, "learning_rate": 0.00012067149736711825, "loss": 0.2623, "step": 22564 }, { "epoch": 1.8280136098509399, "grad_norm": 0.06374472379684448, "learning_rate": 0.00012066699671452362, "loss": 0.2685, "step": 22565 }, { "epoch": 1.828094620868438, "grad_norm": 0.05179629474878311, "learning_rate": 0.00012066249606192899, "loss": 0.2176, "step": 22566 }, { "epoch": 1.8281756318859363, "grad_norm": 0.05624118819832802, "learning_rate": 0.00012065799540933435, "loss": 0.2816, "step": 22567 }, { "epoch": 1.8282566429034348, "grad_norm": 0.06993228197097778, "learning_rate": 0.00012065349475673973, "loss": 0.2854, "step": 22568 }, { "epoch": 1.8283376539209333, "grad_norm": 0.06136844679713249, "learning_rate": 0.0001206489941041451, "loss": 0.2589, "step": 22569 }, { "epoch": 1.8284186649384315, "grad_norm": 0.05738857388496399, "learning_rate": 0.00012064449345155049, "loss": 0.2419, "step": 22570 }, { "epoch": 1.82849967595593, "grad_norm": 0.05129186809062958, "learning_rate": 0.00012063999279895586, "loss": 0.2566, "step": 22571 }, { "epoch": 1.8285806869734285, "grad_norm": 0.05626427382230759, "learning_rate": 0.00012063549214636123, "loss": 0.2962, "step": 22572 }, { "epoch": 1.8286616979909267, "grad_norm": 0.06274127215147018, "learning_rate": 0.0001206309914937666, "loss": 0.304, "step": 22573 }, { "epoch": 1.8287427090084252, "grad_norm": 0.06296470761299133, "learning_rate": 0.00012062649084117197, "loss": 0.2672, "step": 22574 }, { "epoch": 1.8288237200259236, "grad_norm": 0.04759762063622475, "learning_rate": 0.00012062199018857734, "loss": 0.2423, "step": 22575 }, { "epoch": 1.8289047310434219, "grad_norm": 0.04579182341694832, "learning_rate": 0.00012061748953598273, "loss": 0.2312, "step": 22576 }, { "epoch": 1.8289857420609201, "grad_norm": 0.05473625659942627, "learning_rate": 0.0001206129888833881, "loss": 0.3182, "step": 22577 }, { "epoch": 1.8290667530784188, "grad_norm": 0.06306344270706177, "learning_rate": 0.00012060848823079348, "loss": 0.2836, "step": 22578 }, { "epoch": 1.829147764095917, "grad_norm": 0.059901390224695206, "learning_rate": 0.00012060398757819884, "loss": 0.276, "step": 22579 }, { "epoch": 1.8292287751134153, "grad_norm": 0.06785433739423752, "learning_rate": 0.00012059948692560421, "loss": 0.2666, "step": 22580 }, { "epoch": 1.8293097861309138, "grad_norm": 0.059782687574625015, "learning_rate": 0.0001205949862730096, "loss": 0.2356, "step": 22581 }, { "epoch": 1.8293907971484122, "grad_norm": 0.05681954324245453, "learning_rate": 0.00012059048562041497, "loss": 0.2974, "step": 22582 }, { "epoch": 1.8294718081659105, "grad_norm": 0.05416959896683693, "learning_rate": 0.00012058598496782034, "loss": 0.2732, "step": 22583 }, { "epoch": 1.829552819183409, "grad_norm": 0.05503580719232559, "learning_rate": 0.00012058148431522572, "loss": 0.267, "step": 22584 }, { "epoch": 1.8296338302009074, "grad_norm": 0.04536353796720505, "learning_rate": 0.00012057698366263108, "loss": 0.2597, "step": 22585 }, { "epoch": 1.8297148412184057, "grad_norm": 0.05878216773271561, "learning_rate": 0.00012057248301003645, "loss": 0.2619, "step": 22586 }, { "epoch": 1.8297958522359041, "grad_norm": 0.06327325850725174, "learning_rate": 0.00012056798235744184, "loss": 0.2762, "step": 22587 }, { "epoch": 1.8298768632534026, "grad_norm": 0.04429469630122185, "learning_rate": 0.00012056348170484721, "loss": 0.2335, "step": 22588 }, { "epoch": 1.8299578742709008, "grad_norm": 0.058379221707582474, "learning_rate": 0.00012055898105225259, "loss": 0.2927, "step": 22589 }, { "epoch": 1.830038885288399, "grad_norm": 0.05577626824378967, "learning_rate": 0.00012055448039965796, "loss": 0.2665, "step": 22590 }, { "epoch": 1.8301198963058976, "grad_norm": 0.06345677375793457, "learning_rate": 0.00012054997974706332, "loss": 0.2933, "step": 22591 }, { "epoch": 1.830200907323396, "grad_norm": 0.0531621053814888, "learning_rate": 0.00012054547909446869, "loss": 0.27, "step": 22592 }, { "epoch": 1.8302819183408943, "grad_norm": 0.06574061512947083, "learning_rate": 0.00012054097844187408, "loss": 0.2579, "step": 22593 }, { "epoch": 1.8303629293583927, "grad_norm": 0.05340947210788727, "learning_rate": 0.00012053647778927945, "loss": 0.2503, "step": 22594 }, { "epoch": 1.8304439403758912, "grad_norm": 0.06001076102256775, "learning_rate": 0.00012053197713668483, "loss": 0.2756, "step": 22595 }, { "epoch": 1.8305249513933894, "grad_norm": 0.04578939825296402, "learning_rate": 0.0001205274764840902, "loss": 0.2726, "step": 22596 }, { "epoch": 1.830605962410888, "grad_norm": 0.056231167167425156, "learning_rate": 0.00012052297583149556, "loss": 0.2572, "step": 22597 }, { "epoch": 1.8306869734283864, "grad_norm": 0.050092145800590515, "learning_rate": 0.00012051847517890093, "loss": 0.2801, "step": 22598 }, { "epoch": 1.8307679844458846, "grad_norm": 0.046886369585990906, "learning_rate": 0.00012051397452630632, "loss": 0.2583, "step": 22599 }, { "epoch": 1.8308489954633829, "grad_norm": 0.06768523156642914, "learning_rate": 0.0001205094738737117, "loss": 0.3177, "step": 22600 }, { "epoch": 1.8309300064808816, "grad_norm": 0.06049235910177231, "learning_rate": 0.00012050497322111707, "loss": 0.2889, "step": 22601 }, { "epoch": 1.8310110174983798, "grad_norm": 0.04988478869199753, "learning_rate": 0.00012050047256852244, "loss": 0.3033, "step": 22602 }, { "epoch": 1.831092028515878, "grad_norm": 0.053116343915462494, "learning_rate": 0.0001204959719159278, "loss": 0.2863, "step": 22603 }, { "epoch": 1.8311730395333765, "grad_norm": 0.05728102847933769, "learning_rate": 0.0001204914712633332, "loss": 0.2863, "step": 22604 }, { "epoch": 1.831254050550875, "grad_norm": 0.05136289820075035, "learning_rate": 0.00012048697061073858, "loss": 0.2814, "step": 22605 }, { "epoch": 1.8313350615683732, "grad_norm": 0.05675578489899635, "learning_rate": 0.00012048246995814394, "loss": 0.2291, "step": 22606 }, { "epoch": 1.8314160725858717, "grad_norm": 0.05223593860864639, "learning_rate": 0.00012047796930554931, "loss": 0.2373, "step": 22607 }, { "epoch": 1.8314970836033702, "grad_norm": 0.05824046954512596, "learning_rate": 0.00012047346865295468, "loss": 0.264, "step": 22608 }, { "epoch": 1.8315780946208684, "grad_norm": 0.059614140540361404, "learning_rate": 0.00012046896800036004, "loss": 0.2839, "step": 22609 }, { "epoch": 1.8316591056383669, "grad_norm": 0.06695158779621124, "learning_rate": 0.00012046446734776544, "loss": 0.2891, "step": 22610 }, { "epoch": 1.8317401166558653, "grad_norm": 0.061037980020046234, "learning_rate": 0.00012045996669517082, "loss": 0.3029, "step": 22611 }, { "epoch": 1.8318211276733636, "grad_norm": 0.06011229753494263, "learning_rate": 0.00012045546604257618, "loss": 0.2629, "step": 22612 }, { "epoch": 1.8319021386908618, "grad_norm": 0.04871377348899841, "learning_rate": 0.00012045096538998155, "loss": 0.2463, "step": 22613 }, { "epoch": 1.8319831497083603, "grad_norm": 0.06099603325128555, "learning_rate": 0.00012044646473738693, "loss": 0.2795, "step": 22614 }, { "epoch": 1.8320641607258588, "grad_norm": 0.05151505768299103, "learning_rate": 0.00012044196408479229, "loss": 0.2828, "step": 22615 }, { "epoch": 1.832145171743357, "grad_norm": 0.06807029992341995, "learning_rate": 0.00012043746343219769, "loss": 0.2978, "step": 22616 }, { "epoch": 1.8322261827608555, "grad_norm": 0.0644865408539772, "learning_rate": 0.00012043296277960306, "loss": 0.2447, "step": 22617 }, { "epoch": 1.832307193778354, "grad_norm": 0.06668511033058167, "learning_rate": 0.00012042846212700842, "loss": 0.2266, "step": 22618 }, { "epoch": 1.8323882047958522, "grad_norm": 0.0608198456466198, "learning_rate": 0.0001204239614744138, "loss": 0.2927, "step": 22619 }, { "epoch": 1.8324692158133506, "grad_norm": 0.06732763350009918, "learning_rate": 0.00012041946082181917, "loss": 0.2836, "step": 22620 }, { "epoch": 1.8325502268308491, "grad_norm": 0.052625562995672226, "learning_rate": 0.00012041496016922453, "loss": 0.2439, "step": 22621 }, { "epoch": 1.8326312378483474, "grad_norm": 0.055617038160562515, "learning_rate": 0.00012041045951662993, "loss": 0.2672, "step": 22622 }, { "epoch": 1.8327122488658456, "grad_norm": 0.06397406756877899, "learning_rate": 0.0001204059588640353, "loss": 0.2955, "step": 22623 }, { "epoch": 1.8327932598833443, "grad_norm": 0.05224163830280304, "learning_rate": 0.00012040145821144066, "loss": 0.2504, "step": 22624 }, { "epoch": 1.8328742709008425, "grad_norm": 0.057418178766965866, "learning_rate": 0.00012039695755884603, "loss": 0.2887, "step": 22625 }, { "epoch": 1.8329552819183408, "grad_norm": 0.05472564697265625, "learning_rate": 0.00012039245690625141, "loss": 0.279, "step": 22626 }, { "epoch": 1.8330362929358393, "grad_norm": 0.05555223673582077, "learning_rate": 0.00012038795625365677, "loss": 0.2792, "step": 22627 }, { "epoch": 1.8331173039533377, "grad_norm": 0.05100194364786148, "learning_rate": 0.00012038345560106217, "loss": 0.2292, "step": 22628 }, { "epoch": 1.833198314970836, "grad_norm": 0.057672880589962006, "learning_rate": 0.00012037895494846754, "loss": 0.3163, "step": 22629 }, { "epoch": 1.8332793259883344, "grad_norm": 0.05746078118681908, "learning_rate": 0.0001203744542958729, "loss": 0.273, "step": 22630 }, { "epoch": 1.833360337005833, "grad_norm": 0.04709317535161972, "learning_rate": 0.00012036995364327828, "loss": 0.2454, "step": 22631 }, { "epoch": 1.8334413480233311, "grad_norm": 0.058300264179706573, "learning_rate": 0.00012036545299068365, "loss": 0.2748, "step": 22632 }, { "epoch": 1.8335223590408296, "grad_norm": 0.050654564052820206, "learning_rate": 0.00012036095233808904, "loss": 0.2443, "step": 22633 }, { "epoch": 1.833603370058328, "grad_norm": 0.05865953490138054, "learning_rate": 0.00012035645168549441, "loss": 0.2854, "step": 22634 }, { "epoch": 1.8336843810758263, "grad_norm": 0.05909755825996399, "learning_rate": 0.00012035195103289978, "loss": 0.2888, "step": 22635 }, { "epoch": 1.8337653920933246, "grad_norm": 0.06934584677219391, "learning_rate": 0.00012034745038030514, "loss": 0.2787, "step": 22636 }, { "epoch": 1.833846403110823, "grad_norm": 0.0527278296649456, "learning_rate": 0.00012034294972771052, "loss": 0.2694, "step": 22637 }, { "epoch": 1.8339274141283215, "grad_norm": 0.05175149068236351, "learning_rate": 0.00012033844907511589, "loss": 0.241, "step": 22638 }, { "epoch": 1.8340084251458197, "grad_norm": 0.05036207661032677, "learning_rate": 0.00012033394842252128, "loss": 0.2634, "step": 22639 }, { "epoch": 1.8340894361633182, "grad_norm": 0.05339133366942406, "learning_rate": 0.00012032944776992665, "loss": 0.2955, "step": 22640 }, { "epoch": 1.8341704471808167, "grad_norm": 0.055165499448776245, "learning_rate": 0.00012032494711733203, "loss": 0.2867, "step": 22641 }, { "epoch": 1.834251458198315, "grad_norm": 0.07881447672843933, "learning_rate": 0.00012032044646473739, "loss": 0.3049, "step": 22642 }, { "epoch": 1.8343324692158134, "grad_norm": 0.05756150186061859, "learning_rate": 0.00012031594581214276, "loss": 0.2966, "step": 22643 }, { "epoch": 1.8344134802333119, "grad_norm": 0.05701543390750885, "learning_rate": 0.00012031144515954813, "loss": 0.2866, "step": 22644 }, { "epoch": 1.83449449125081, "grad_norm": 0.04993041232228279, "learning_rate": 0.00012030694450695352, "loss": 0.2601, "step": 22645 }, { "epoch": 1.8345755022683083, "grad_norm": 0.05136844143271446, "learning_rate": 0.0001203024438543589, "loss": 0.3156, "step": 22646 }, { "epoch": 1.834656513285807, "grad_norm": 0.060809340327978134, "learning_rate": 0.00012029794320176427, "loss": 0.3136, "step": 22647 }, { "epoch": 1.8347375243033053, "grad_norm": 0.05689241737127304, "learning_rate": 0.00012029344254916963, "loss": 0.2737, "step": 22648 }, { "epoch": 1.8348185353208035, "grad_norm": 0.06734489649534225, "learning_rate": 0.000120288941896575, "loss": 0.3179, "step": 22649 }, { "epoch": 1.834899546338302, "grad_norm": 0.05590895935893059, "learning_rate": 0.00012028444124398038, "loss": 0.2887, "step": 22650 }, { "epoch": 1.8349805573558005, "grad_norm": 0.06098778545856476, "learning_rate": 0.00012027994059138576, "loss": 0.2639, "step": 22651 }, { "epoch": 1.8350615683732987, "grad_norm": 0.06430383026599884, "learning_rate": 0.00012027543993879114, "loss": 0.2732, "step": 22652 }, { "epoch": 1.8351425793907972, "grad_norm": 0.060666609555482864, "learning_rate": 0.00012027093928619651, "loss": 0.2682, "step": 22653 }, { "epoch": 1.8352235904082956, "grad_norm": 0.062334634363651276, "learning_rate": 0.00012026643863360187, "loss": 0.2374, "step": 22654 }, { "epoch": 1.8353046014257939, "grad_norm": 0.06325268745422363, "learning_rate": 0.00012026193798100724, "loss": 0.2965, "step": 22655 }, { "epoch": 1.8353856124432921, "grad_norm": 0.05274970829486847, "learning_rate": 0.00012025743732841263, "loss": 0.2384, "step": 22656 }, { "epoch": 1.8354666234607908, "grad_norm": 0.05541786924004555, "learning_rate": 0.000120252936675818, "loss": 0.2724, "step": 22657 }, { "epoch": 1.835547634478289, "grad_norm": 0.0628451406955719, "learning_rate": 0.00012024843602322338, "loss": 0.2188, "step": 22658 }, { "epoch": 1.8356286454957873, "grad_norm": 0.05656782537698746, "learning_rate": 0.00012024393537062875, "loss": 0.2519, "step": 22659 }, { "epoch": 1.8357096565132858, "grad_norm": 0.05050123482942581, "learning_rate": 0.00012023943471803411, "loss": 0.2645, "step": 22660 }, { "epoch": 1.8357906675307842, "grad_norm": 0.0647178515791893, "learning_rate": 0.00012023493406543948, "loss": 0.3063, "step": 22661 }, { "epoch": 1.8358716785482825, "grad_norm": 0.06083545461297035, "learning_rate": 0.00012023043341284487, "loss": 0.2363, "step": 22662 }, { "epoch": 1.835952689565781, "grad_norm": 0.06450481712818146, "learning_rate": 0.00012022593276025025, "loss": 0.2894, "step": 22663 }, { "epoch": 1.8360337005832794, "grad_norm": 0.059075742959976196, "learning_rate": 0.00012022143210765562, "loss": 0.2459, "step": 22664 }, { "epoch": 1.8361147116007777, "grad_norm": 0.05525489151477814, "learning_rate": 0.00012021693145506099, "loss": 0.2711, "step": 22665 }, { "epoch": 1.8361957226182761, "grad_norm": 0.05348137393593788, "learning_rate": 0.00012021243080246635, "loss": 0.3088, "step": 22666 }, { "epoch": 1.8362767336357746, "grad_norm": 0.05961565673351288, "learning_rate": 0.00012020793014987173, "loss": 0.2618, "step": 22667 }, { "epoch": 1.8363577446532728, "grad_norm": 0.056940242648124695, "learning_rate": 0.00012020342949727711, "loss": 0.283, "step": 22668 }, { "epoch": 1.836438755670771, "grad_norm": 0.053428273648023605, "learning_rate": 0.00012019892884468249, "loss": 0.2821, "step": 22669 }, { "epoch": 1.8365197666882696, "grad_norm": 0.05401530861854553, "learning_rate": 0.00012019442819208786, "loss": 0.2354, "step": 22670 }, { "epoch": 1.836600777705768, "grad_norm": 0.07518289238214493, "learning_rate": 0.00012018992753949323, "loss": 0.2414, "step": 22671 }, { "epoch": 1.8366817887232663, "grad_norm": 0.05973779782652855, "learning_rate": 0.0001201854268868986, "loss": 0.2419, "step": 22672 }, { "epoch": 1.8367627997407647, "grad_norm": 0.05145569518208504, "learning_rate": 0.00012018092623430397, "loss": 0.2599, "step": 22673 }, { "epoch": 1.8368438107582632, "grad_norm": 0.06088968366384506, "learning_rate": 0.00012017642558170937, "loss": 0.238, "step": 22674 }, { "epoch": 1.8369248217757614, "grad_norm": 0.055228862911462784, "learning_rate": 0.00012017192492911473, "loss": 0.278, "step": 22675 }, { "epoch": 1.83700583279326, "grad_norm": 0.04872765392065048, "learning_rate": 0.0001201674242765201, "loss": 0.2543, "step": 22676 }, { "epoch": 1.8370868438107584, "grad_norm": 0.05401741713285446, "learning_rate": 0.00012016292362392548, "loss": 0.2407, "step": 22677 }, { "epoch": 1.8371678548282566, "grad_norm": 0.059674959629774094, "learning_rate": 0.00012015842297133084, "loss": 0.2633, "step": 22678 }, { "epoch": 1.8372488658457549, "grad_norm": 0.07011188566684723, "learning_rate": 0.00012015392231873621, "loss": 0.2824, "step": 22679 }, { "epoch": 1.8373298768632536, "grad_norm": 0.05733582004904747, "learning_rate": 0.00012014942166614161, "loss": 0.2592, "step": 22680 }, { "epoch": 1.8374108878807518, "grad_norm": 0.06655251979827881, "learning_rate": 0.00012014492101354697, "loss": 0.2793, "step": 22681 }, { "epoch": 1.83749189889825, "grad_norm": 0.05587971210479736, "learning_rate": 0.00012014042036095234, "loss": 0.2447, "step": 22682 }, { "epoch": 1.8375729099157485, "grad_norm": 0.052102141082286835, "learning_rate": 0.00012013591970835772, "loss": 0.2994, "step": 22683 }, { "epoch": 1.837653920933247, "grad_norm": 0.05093378946185112, "learning_rate": 0.00012013141905576308, "loss": 0.2451, "step": 22684 }, { "epoch": 1.8377349319507452, "grad_norm": 0.05560063570737839, "learning_rate": 0.00012012691840316848, "loss": 0.2608, "step": 22685 }, { "epoch": 1.8378159429682437, "grad_norm": 0.07167977094650269, "learning_rate": 0.00012012241775057385, "loss": 0.2806, "step": 22686 }, { "epoch": 1.8378969539857422, "grad_norm": 0.0680895745754242, "learning_rate": 0.00012011791709797921, "loss": 0.2729, "step": 22687 }, { "epoch": 1.8379779650032404, "grad_norm": 0.06179485470056534, "learning_rate": 0.00012011341644538459, "loss": 0.2948, "step": 22688 }, { "epoch": 1.8380589760207389, "grad_norm": 0.06343559920787811, "learning_rate": 0.00012010891579278996, "loss": 0.2774, "step": 22689 }, { "epoch": 1.8381399870382373, "grad_norm": 0.07578915357589722, "learning_rate": 0.00012010441514019532, "loss": 0.3282, "step": 22690 }, { "epoch": 1.8382209980557356, "grad_norm": 0.060906898230314255, "learning_rate": 0.00012009991448760072, "loss": 0.269, "step": 22691 }, { "epoch": 1.8383020090732338, "grad_norm": 0.052891168743371964, "learning_rate": 0.0001200954138350061, "loss": 0.2587, "step": 22692 }, { "epoch": 1.8383830200907323, "grad_norm": 0.04828254505991936, "learning_rate": 0.00012009091318241145, "loss": 0.28, "step": 22693 }, { "epoch": 1.8384640311082308, "grad_norm": 0.06219147890806198, "learning_rate": 0.00012008641252981683, "loss": 0.2745, "step": 22694 }, { "epoch": 1.838545042125729, "grad_norm": 0.06701318919658661, "learning_rate": 0.0001200819118772222, "loss": 0.2384, "step": 22695 }, { "epoch": 1.8386260531432275, "grad_norm": 0.04830949380993843, "learning_rate": 0.00012007741122462756, "loss": 0.2375, "step": 22696 }, { "epoch": 1.838707064160726, "grad_norm": 0.05603285878896713, "learning_rate": 0.00012007291057203296, "loss": 0.2444, "step": 22697 }, { "epoch": 1.8387880751782242, "grad_norm": 0.06188029795885086, "learning_rate": 0.00012006840991943834, "loss": 0.2722, "step": 22698 }, { "epoch": 1.8388690861957226, "grad_norm": 0.07010263204574585, "learning_rate": 0.0001200639092668437, "loss": 0.28, "step": 22699 }, { "epoch": 1.8389500972132211, "grad_norm": 0.059973932802677155, "learning_rate": 0.00012005940861424907, "loss": 0.2692, "step": 22700 }, { "epoch": 1.8390311082307194, "grad_norm": 0.0619681179523468, "learning_rate": 0.00012005490796165444, "loss": 0.3075, "step": 22701 }, { "epoch": 1.8391121192482176, "grad_norm": 0.06769229471683502, "learning_rate": 0.0001200504073090598, "loss": 0.2876, "step": 22702 }, { "epoch": 1.8391931302657163, "grad_norm": 0.060972172766923904, "learning_rate": 0.0001200459066564652, "loss": 0.2646, "step": 22703 }, { "epoch": 1.8392741412832145, "grad_norm": 0.06796864420175552, "learning_rate": 0.00012004140600387058, "loss": 0.251, "step": 22704 }, { "epoch": 1.8393551523007128, "grad_norm": 0.060075342655181885, "learning_rate": 0.00012003690535127594, "loss": 0.2577, "step": 22705 }, { "epoch": 1.8394361633182112, "grad_norm": 0.06749142706394196, "learning_rate": 0.00012003240469868131, "loss": 0.2506, "step": 22706 }, { "epoch": 1.8395171743357097, "grad_norm": 0.07474429905414581, "learning_rate": 0.00012002790404608668, "loss": 0.2846, "step": 22707 }, { "epoch": 1.839598185353208, "grad_norm": 0.05930938944220543, "learning_rate": 0.00012002340339349207, "loss": 0.3026, "step": 22708 }, { "epoch": 1.8396791963707064, "grad_norm": 0.06845049560070038, "learning_rate": 0.00012001890274089744, "loss": 0.2588, "step": 22709 }, { "epoch": 1.839760207388205, "grad_norm": 0.059819675981998444, "learning_rate": 0.00012001440208830282, "loss": 0.2679, "step": 22710 }, { "epoch": 1.8398412184057031, "grad_norm": 0.07567431777715683, "learning_rate": 0.00012000990143570818, "loss": 0.3168, "step": 22711 }, { "epoch": 1.8399222294232016, "grad_norm": 0.06516402959823608, "learning_rate": 0.00012000540078311355, "loss": 0.2703, "step": 22712 }, { "epoch": 1.8400032404407, "grad_norm": 0.05498339235782623, "learning_rate": 0.00012000090013051893, "loss": 0.2821, "step": 22713 }, { "epoch": 1.8400842514581983, "grad_norm": 0.06723517179489136, "learning_rate": 0.00011999639947792431, "loss": 0.301, "step": 22714 }, { "epoch": 1.8401652624756966, "grad_norm": 0.061191376298666, "learning_rate": 0.00011999189882532969, "loss": 0.2895, "step": 22715 }, { "epoch": 1.840246273493195, "grad_norm": 0.0644366443157196, "learning_rate": 0.00011998739817273506, "loss": 0.2767, "step": 22716 }, { "epoch": 1.8403272845106935, "grad_norm": 0.04802727699279785, "learning_rate": 0.00011998289752014042, "loss": 0.2429, "step": 22717 }, { "epoch": 1.8404082955281917, "grad_norm": 0.061694443225860596, "learning_rate": 0.0001199783968675458, "loss": 0.3248, "step": 22718 }, { "epoch": 1.8404893065456902, "grad_norm": 0.055218540132045746, "learning_rate": 0.00011997389621495117, "loss": 0.2767, "step": 22719 }, { "epoch": 1.8405703175631887, "grad_norm": 0.06819997727870941, "learning_rate": 0.00011996939556235655, "loss": 0.2622, "step": 22720 }, { "epoch": 1.840651328580687, "grad_norm": 0.04921705648303032, "learning_rate": 0.00011996489490976193, "loss": 0.2548, "step": 22721 }, { "epoch": 1.8407323395981854, "grad_norm": 0.05520812049508095, "learning_rate": 0.0001199603942571673, "loss": 0.2925, "step": 22722 }, { "epoch": 1.8408133506156839, "grad_norm": 0.05166729539632797, "learning_rate": 0.00011995589360457266, "loss": 0.2553, "step": 22723 }, { "epoch": 1.840894361633182, "grad_norm": 0.045433759689331055, "learning_rate": 0.00011995139295197804, "loss": 0.246, "step": 22724 }, { "epoch": 1.8409753726506803, "grad_norm": 0.044920552521944046, "learning_rate": 0.00011994689229938341, "loss": 0.2227, "step": 22725 }, { "epoch": 1.841056383668179, "grad_norm": 0.05813392996788025, "learning_rate": 0.0001199423916467888, "loss": 0.2632, "step": 22726 }, { "epoch": 1.8411373946856773, "grad_norm": 0.06702584773302078, "learning_rate": 0.00011993789099419417, "loss": 0.2737, "step": 22727 }, { "epoch": 1.8412184057031755, "grad_norm": 0.05693779140710831, "learning_rate": 0.00011993339034159954, "loss": 0.2882, "step": 22728 }, { "epoch": 1.841299416720674, "grad_norm": 0.06254604458808899, "learning_rate": 0.0001199288896890049, "loss": 0.2929, "step": 22729 }, { "epoch": 1.8413804277381725, "grad_norm": 0.05122413486242294, "learning_rate": 0.00011992438903641028, "loss": 0.2891, "step": 22730 }, { "epoch": 1.8414614387556707, "grad_norm": 0.05988462269306183, "learning_rate": 0.00011991988838381565, "loss": 0.2963, "step": 22731 }, { "epoch": 1.8415424497731692, "grad_norm": 0.05026065558195114, "learning_rate": 0.00011991538773122104, "loss": 0.2593, "step": 22732 }, { "epoch": 1.8416234607906676, "grad_norm": 0.05141877382993698, "learning_rate": 0.00011991088707862641, "loss": 0.2585, "step": 22733 }, { "epoch": 1.8417044718081659, "grad_norm": 0.05695733428001404, "learning_rate": 0.00011990638642603178, "loss": 0.3031, "step": 22734 }, { "epoch": 1.8417854828256643, "grad_norm": 0.051710471510887146, "learning_rate": 0.00011990188577343714, "loss": 0.279, "step": 22735 }, { "epoch": 1.8418664938431628, "grad_norm": 0.05895623937249184, "learning_rate": 0.00011989738512084252, "loss": 0.3285, "step": 22736 }, { "epoch": 1.841947504860661, "grad_norm": 0.06714142858982086, "learning_rate": 0.00011989288446824792, "loss": 0.3021, "step": 22737 }, { "epoch": 1.8420285158781593, "grad_norm": 0.06259685754776001, "learning_rate": 0.00011988838381565328, "loss": 0.2606, "step": 22738 }, { "epoch": 1.8421095268956578, "grad_norm": 0.06058111786842346, "learning_rate": 0.00011988388316305865, "loss": 0.2916, "step": 22739 }, { "epoch": 1.8421905379131562, "grad_norm": 0.06061285734176636, "learning_rate": 0.00011987938251046403, "loss": 0.2886, "step": 22740 }, { "epoch": 1.8422715489306545, "grad_norm": 0.053787924349308014, "learning_rate": 0.00011987488185786939, "loss": 0.2605, "step": 22741 }, { "epoch": 1.842352559948153, "grad_norm": 0.06060533970594406, "learning_rate": 0.00011987038120527476, "loss": 0.2729, "step": 22742 }, { "epoch": 1.8424335709656514, "grad_norm": 0.052182767540216446, "learning_rate": 0.00011986588055268016, "loss": 0.2454, "step": 22743 }, { "epoch": 1.8425145819831497, "grad_norm": 0.05701451748609543, "learning_rate": 0.00011986137990008552, "loss": 0.2899, "step": 22744 }, { "epoch": 1.8425955930006481, "grad_norm": 0.07026814669370651, "learning_rate": 0.0001198568792474909, "loss": 0.2672, "step": 22745 }, { "epoch": 1.8426766040181466, "grad_norm": 0.056616608053445816, "learning_rate": 0.00011985237859489627, "loss": 0.2921, "step": 22746 }, { "epoch": 1.8427576150356448, "grad_norm": 0.05835625156760216, "learning_rate": 0.00011984787794230163, "loss": 0.2585, "step": 22747 }, { "epoch": 1.842838626053143, "grad_norm": 0.05612039193511009, "learning_rate": 0.000119843377289707, "loss": 0.2238, "step": 22748 }, { "epoch": 1.8429196370706418, "grad_norm": 0.062194813042879105, "learning_rate": 0.0001198388766371124, "loss": 0.2807, "step": 22749 }, { "epoch": 1.84300064808814, "grad_norm": 0.057809729129076004, "learning_rate": 0.00011983437598451776, "loss": 0.264, "step": 22750 }, { "epoch": 1.8430816591056383, "grad_norm": 0.07250712811946869, "learning_rate": 0.00011982987533192314, "loss": 0.2787, "step": 22751 }, { "epoch": 1.8431626701231367, "grad_norm": 0.061580806970596313, "learning_rate": 0.00011982537467932851, "loss": 0.2796, "step": 22752 }, { "epoch": 1.8432436811406352, "grad_norm": 0.0518670380115509, "learning_rate": 0.00011982087402673387, "loss": 0.263, "step": 22753 }, { "epoch": 1.8433246921581334, "grad_norm": 0.07268506288528442, "learning_rate": 0.00011981637337413924, "loss": 0.2785, "step": 22754 }, { "epoch": 1.843405703175632, "grad_norm": 0.05438043922185898, "learning_rate": 0.00011981187272154464, "loss": 0.2325, "step": 22755 }, { "epoch": 1.8434867141931304, "grad_norm": 0.05696878209710121, "learning_rate": 0.00011980737206895, "loss": 0.2576, "step": 22756 }, { "epoch": 1.8435677252106286, "grad_norm": 0.049444738775491714, "learning_rate": 0.00011980287141635538, "loss": 0.237, "step": 22757 }, { "epoch": 1.8436487362281269, "grad_norm": 0.0545518733561039, "learning_rate": 0.00011979837076376075, "loss": 0.2593, "step": 22758 }, { "epoch": 1.8437297472456255, "grad_norm": 0.050957489758729935, "learning_rate": 0.00011979387011116611, "loss": 0.2338, "step": 22759 }, { "epoch": 1.8438107582631238, "grad_norm": 0.06445847451686859, "learning_rate": 0.00011978936945857148, "loss": 0.2653, "step": 22760 }, { "epoch": 1.843891769280622, "grad_norm": 0.057379335165023804, "learning_rate": 0.00011978486880597689, "loss": 0.2853, "step": 22761 }, { "epoch": 1.8439727802981205, "grad_norm": 0.05096464604139328, "learning_rate": 0.00011978036815338225, "loss": 0.2465, "step": 22762 }, { "epoch": 1.844053791315619, "grad_norm": 0.06210287660360336, "learning_rate": 0.00011977586750078762, "loss": 0.2893, "step": 22763 }, { "epoch": 1.8441348023331172, "grad_norm": 0.05852840095758438, "learning_rate": 0.00011977136684819299, "loss": 0.2559, "step": 22764 }, { "epoch": 1.8442158133506157, "grad_norm": 0.06159196048974991, "learning_rate": 0.00011976686619559835, "loss": 0.2812, "step": 22765 }, { "epoch": 1.8442968243681142, "grad_norm": 0.06781520694494247, "learning_rate": 0.00011976236554300375, "loss": 0.2419, "step": 22766 }, { "epoch": 1.8443778353856124, "grad_norm": 0.06899692863225937, "learning_rate": 0.00011975786489040913, "loss": 0.2556, "step": 22767 }, { "epoch": 1.8444588464031109, "grad_norm": 0.057173509150743484, "learning_rate": 0.00011975336423781449, "loss": 0.2945, "step": 22768 }, { "epoch": 1.8445398574206093, "grad_norm": 0.05989151448011398, "learning_rate": 0.00011974886358521986, "loss": 0.2691, "step": 22769 }, { "epoch": 1.8446208684381076, "grad_norm": 0.05088137835264206, "learning_rate": 0.00011974436293262523, "loss": 0.244, "step": 22770 }, { "epoch": 1.8447018794556058, "grad_norm": 0.056217145174741745, "learning_rate": 0.0001197398622800306, "loss": 0.2451, "step": 22771 }, { "epoch": 1.8447828904731045, "grad_norm": 0.062338441610336304, "learning_rate": 0.000119735361627436, "loss": 0.2763, "step": 22772 }, { "epoch": 1.8448639014906028, "grad_norm": 0.05098249763250351, "learning_rate": 0.00011973086097484137, "loss": 0.2299, "step": 22773 }, { "epoch": 1.844944912508101, "grad_norm": 0.06756927073001862, "learning_rate": 0.00011972636032224673, "loss": 0.2627, "step": 22774 }, { "epoch": 1.8450259235255995, "grad_norm": 0.04866158589720726, "learning_rate": 0.0001197218596696521, "loss": 0.2783, "step": 22775 }, { "epoch": 1.845106934543098, "grad_norm": 0.051096439361572266, "learning_rate": 0.00011971735901705748, "loss": 0.2673, "step": 22776 }, { "epoch": 1.8451879455605962, "grad_norm": 0.057585157454013824, "learning_rate": 0.00011971285836446285, "loss": 0.2833, "step": 22777 }, { "epoch": 1.8452689565780946, "grad_norm": 0.057650353759527206, "learning_rate": 0.00011970835771186824, "loss": 0.2923, "step": 22778 }, { "epoch": 1.845349967595593, "grad_norm": 0.05406733974814415, "learning_rate": 0.00011970385705927361, "loss": 0.2456, "step": 22779 }, { "epoch": 1.8454309786130914, "grad_norm": 0.05556425824761391, "learning_rate": 0.00011969935640667897, "loss": 0.2827, "step": 22780 }, { "epoch": 1.8455119896305896, "grad_norm": 0.07153059542179108, "learning_rate": 0.00011969485575408434, "loss": 0.2633, "step": 22781 }, { "epoch": 1.8455930006480883, "grad_norm": 0.06352782249450684, "learning_rate": 0.00011969035510148972, "loss": 0.2709, "step": 22782 }, { "epoch": 1.8456740116655865, "grad_norm": 0.05468868836760521, "learning_rate": 0.00011968585444889509, "loss": 0.2873, "step": 22783 }, { "epoch": 1.8457550226830848, "grad_norm": 0.055515628308057785, "learning_rate": 0.00011968135379630048, "loss": 0.2938, "step": 22784 }, { "epoch": 1.8458360337005832, "grad_norm": 0.05868731066584587, "learning_rate": 0.00011967685314370585, "loss": 0.2835, "step": 22785 }, { "epoch": 1.8459170447180817, "grad_norm": 0.05362274497747421, "learning_rate": 0.00011967235249111121, "loss": 0.2513, "step": 22786 }, { "epoch": 1.84599805573558, "grad_norm": 0.05573923885822296, "learning_rate": 0.00011966785183851659, "loss": 0.2909, "step": 22787 }, { "epoch": 1.8460790667530784, "grad_norm": 0.052229754626750946, "learning_rate": 0.00011966335118592196, "loss": 0.2706, "step": 22788 }, { "epoch": 1.846160077770577, "grad_norm": 0.049126509577035904, "learning_rate": 0.00011965885053332735, "loss": 0.2298, "step": 22789 }, { "epoch": 1.8462410887880751, "grad_norm": 0.05795193836092949, "learning_rate": 0.00011965434988073272, "loss": 0.3102, "step": 22790 }, { "epoch": 1.8463220998055736, "grad_norm": 0.0662761703133583, "learning_rate": 0.0001196498492281381, "loss": 0.2588, "step": 22791 }, { "epoch": 1.846403110823072, "grad_norm": 0.060541246086359024, "learning_rate": 0.00011964534857554345, "loss": 0.2954, "step": 22792 }, { "epoch": 1.8464841218405703, "grad_norm": 0.055643677711486816, "learning_rate": 0.00011964084792294883, "loss": 0.2557, "step": 22793 }, { "epoch": 1.8465651328580686, "grad_norm": 0.04613005742430687, "learning_rate": 0.0001196363472703542, "loss": 0.2715, "step": 22794 }, { "epoch": 1.846646143875567, "grad_norm": 0.05167005583643913, "learning_rate": 0.00011963184661775959, "loss": 0.2513, "step": 22795 }, { "epoch": 1.8467271548930655, "grad_norm": 0.05315526947379112, "learning_rate": 0.00011962734596516496, "loss": 0.238, "step": 22796 }, { "epoch": 1.8468081659105637, "grad_norm": 0.05100571736693382, "learning_rate": 0.00011962284531257034, "loss": 0.2399, "step": 22797 }, { "epoch": 1.8468891769280622, "grad_norm": 0.05591544508934021, "learning_rate": 0.0001196183446599757, "loss": 0.2991, "step": 22798 }, { "epoch": 1.8469701879455607, "grad_norm": 0.055552974343299866, "learning_rate": 0.00011961384400738107, "loss": 0.249, "step": 22799 }, { "epoch": 1.847051198963059, "grad_norm": 0.059836044907569885, "learning_rate": 0.00011960934335478644, "loss": 0.2737, "step": 22800 }, { "epoch": 1.8471322099805574, "grad_norm": 0.058615636080503464, "learning_rate": 0.00011960484270219183, "loss": 0.2711, "step": 22801 }, { "epoch": 1.8472132209980558, "grad_norm": 0.05661217123270035, "learning_rate": 0.0001196003420495972, "loss": 0.2498, "step": 22802 }, { "epoch": 1.847294232015554, "grad_norm": 0.05448070913553238, "learning_rate": 0.00011959584139700258, "loss": 0.3288, "step": 22803 }, { "epoch": 1.8473752430330523, "grad_norm": 0.05980324000120163, "learning_rate": 0.00011959134074440794, "loss": 0.2745, "step": 22804 }, { "epoch": 1.847456254050551, "grad_norm": 0.060643620789051056, "learning_rate": 0.00011958684009181331, "loss": 0.2992, "step": 22805 }, { "epoch": 1.8475372650680493, "grad_norm": 0.05414064601063728, "learning_rate": 0.00011958233943921868, "loss": 0.237, "step": 22806 }, { "epoch": 1.8476182760855475, "grad_norm": 0.06307387351989746, "learning_rate": 0.00011957783878662407, "loss": 0.3098, "step": 22807 }, { "epoch": 1.847699287103046, "grad_norm": 0.054019246250391006, "learning_rate": 0.00011957333813402945, "loss": 0.283, "step": 22808 }, { "epoch": 1.8477802981205445, "grad_norm": 0.056204576045274734, "learning_rate": 0.00011956883748143482, "loss": 0.2118, "step": 22809 }, { "epoch": 1.8478613091380427, "grad_norm": 0.05422830581665039, "learning_rate": 0.00011956433682884018, "loss": 0.2927, "step": 22810 }, { "epoch": 1.8479423201555412, "grad_norm": 0.0619785338640213, "learning_rate": 0.00011955983617624555, "loss": 0.2982, "step": 22811 }, { "epoch": 1.8480233311730396, "grad_norm": 0.04841968044638634, "learning_rate": 0.00011955533552365093, "loss": 0.2854, "step": 22812 }, { "epoch": 1.8481043421905379, "grad_norm": 0.05482259392738342, "learning_rate": 0.00011955083487105631, "loss": 0.3004, "step": 22813 }, { "epoch": 1.8481853532080363, "grad_norm": 0.05952038988471031, "learning_rate": 0.00011954633421846169, "loss": 0.274, "step": 22814 }, { "epoch": 1.8482663642255348, "grad_norm": 0.06057178974151611, "learning_rate": 0.00011954183356586706, "loss": 0.3035, "step": 22815 }, { "epoch": 1.848347375243033, "grad_norm": 0.05620408430695534, "learning_rate": 0.00011953733291327242, "loss": 0.2811, "step": 22816 }, { "epoch": 1.8484283862605313, "grad_norm": 0.05309577286243439, "learning_rate": 0.0001195328322606778, "loss": 0.2644, "step": 22817 }, { "epoch": 1.8485093972780298, "grad_norm": 0.0493699349462986, "learning_rate": 0.0001195283316080832, "loss": 0.225, "step": 22818 }, { "epoch": 1.8485904082955282, "grad_norm": 0.04509785398840904, "learning_rate": 0.00011952383095548855, "loss": 0.2254, "step": 22819 }, { "epoch": 1.8486714193130265, "grad_norm": 0.059793125838041306, "learning_rate": 0.00011951933030289393, "loss": 0.2752, "step": 22820 }, { "epoch": 1.848752430330525, "grad_norm": 0.05882219597697258, "learning_rate": 0.0001195148296502993, "loss": 0.278, "step": 22821 }, { "epoch": 1.8488334413480234, "grad_norm": 0.05098733305931091, "learning_rate": 0.00011951032899770466, "loss": 0.2775, "step": 22822 }, { "epoch": 1.8489144523655217, "grad_norm": 0.06307312101125717, "learning_rate": 0.00011950582834511004, "loss": 0.2493, "step": 22823 }, { "epoch": 1.8489954633830201, "grad_norm": 0.056472472846508026, "learning_rate": 0.00011950132769251544, "loss": 0.2778, "step": 22824 }, { "epoch": 1.8490764744005186, "grad_norm": 0.045031849294900894, "learning_rate": 0.0001194968270399208, "loss": 0.2271, "step": 22825 }, { "epoch": 1.8491574854180168, "grad_norm": 0.07028354704380035, "learning_rate": 0.00011949232638732617, "loss": 0.2795, "step": 22826 }, { "epoch": 1.849238496435515, "grad_norm": 0.06089423596858978, "learning_rate": 0.00011948782573473154, "loss": 0.2195, "step": 22827 }, { "epoch": 1.8493195074530138, "grad_norm": 0.05401669070124626, "learning_rate": 0.0001194833250821369, "loss": 0.2605, "step": 22828 }, { "epoch": 1.849400518470512, "grad_norm": 0.05383060872554779, "learning_rate": 0.00011947882442954228, "loss": 0.2788, "step": 22829 }, { "epoch": 1.8494815294880103, "grad_norm": 0.053320858627557755, "learning_rate": 0.00011947432377694768, "loss": 0.2573, "step": 22830 }, { "epoch": 1.8495625405055087, "grad_norm": 0.048075009137392044, "learning_rate": 0.00011946982312435304, "loss": 0.2569, "step": 22831 }, { "epoch": 1.8496435515230072, "grad_norm": 0.05769873037934303, "learning_rate": 0.00011946532247175841, "loss": 0.2581, "step": 22832 }, { "epoch": 1.8497245625405054, "grad_norm": 0.053351398557424545, "learning_rate": 0.00011946082181916379, "loss": 0.2496, "step": 22833 }, { "epoch": 1.849805573558004, "grad_norm": 0.05518243834376335, "learning_rate": 0.00011945632116656915, "loss": 0.2513, "step": 22834 }, { "epoch": 1.8498865845755024, "grad_norm": 0.04904431477189064, "learning_rate": 0.00011945182051397452, "loss": 0.2684, "step": 22835 }, { "epoch": 1.8499675955930006, "grad_norm": 0.06256154924631119, "learning_rate": 0.00011944731986137992, "loss": 0.2536, "step": 22836 }, { "epoch": 1.850048606610499, "grad_norm": 0.05640607327222824, "learning_rate": 0.00011944281920878528, "loss": 0.235, "step": 22837 }, { "epoch": 1.8501296176279975, "grad_norm": 0.06628984957933426, "learning_rate": 0.00011943831855619065, "loss": 0.273, "step": 22838 }, { "epoch": 1.8502106286454958, "grad_norm": 0.07006203383207321, "learning_rate": 0.00011943381790359603, "loss": 0.2872, "step": 22839 }, { "epoch": 1.850291639662994, "grad_norm": 0.05886678397655487, "learning_rate": 0.0001194293172510014, "loss": 0.2358, "step": 22840 }, { "epoch": 1.8503726506804925, "grad_norm": 0.05628499016165733, "learning_rate": 0.00011942481659840679, "loss": 0.2739, "step": 22841 }, { "epoch": 1.850453661697991, "grad_norm": 0.052815746515989304, "learning_rate": 0.00011942031594581216, "loss": 0.2539, "step": 22842 }, { "epoch": 1.8505346727154892, "grad_norm": 0.07488470524549484, "learning_rate": 0.00011941581529321752, "loss": 0.3107, "step": 22843 }, { "epoch": 1.8506156837329877, "grad_norm": 0.0484716072678566, "learning_rate": 0.0001194113146406229, "loss": 0.2826, "step": 22844 }, { "epoch": 1.8506966947504861, "grad_norm": 0.05985249578952789, "learning_rate": 0.00011940681398802827, "loss": 0.2711, "step": 22845 }, { "epoch": 1.8507777057679844, "grad_norm": 0.05810718238353729, "learning_rate": 0.00011940231333543364, "loss": 0.274, "step": 22846 }, { "epoch": 1.8508587167854829, "grad_norm": 0.05745183303952217, "learning_rate": 0.00011939781268283903, "loss": 0.279, "step": 22847 }, { "epoch": 1.8509397278029813, "grad_norm": 0.050858549773693085, "learning_rate": 0.0001193933120302444, "loss": 0.2828, "step": 22848 }, { "epoch": 1.8510207388204796, "grad_norm": 0.04445550590753555, "learning_rate": 0.00011938881137764976, "loss": 0.2621, "step": 22849 }, { "epoch": 1.8511017498379778, "grad_norm": 0.05249026045203209, "learning_rate": 0.00011938431072505514, "loss": 0.2748, "step": 22850 }, { "epoch": 1.8511827608554765, "grad_norm": 0.05515358969569206, "learning_rate": 0.00011937981007246051, "loss": 0.2476, "step": 22851 }, { "epoch": 1.8512637718729748, "grad_norm": 0.05456148460507393, "learning_rate": 0.00011937530941986588, "loss": 0.3071, "step": 22852 }, { "epoch": 1.851344782890473, "grad_norm": 0.05495483800768852, "learning_rate": 0.00011937080876727127, "loss": 0.2823, "step": 22853 }, { "epoch": 1.8514257939079715, "grad_norm": 0.056710973381996155, "learning_rate": 0.00011936630811467664, "loss": 0.26, "step": 22854 }, { "epoch": 1.85150680492547, "grad_norm": 0.05013580992817879, "learning_rate": 0.000119361807462082, "loss": 0.2832, "step": 22855 }, { "epoch": 1.8515878159429682, "grad_norm": 0.0600869245827198, "learning_rate": 0.00011935730680948738, "loss": 0.2805, "step": 22856 }, { "epoch": 1.8516688269604666, "grad_norm": 0.059895481914281845, "learning_rate": 0.00011935280615689275, "loss": 0.3037, "step": 22857 }, { "epoch": 1.851749837977965, "grad_norm": 0.055746156722307205, "learning_rate": 0.00011934830550429813, "loss": 0.2847, "step": 22858 }, { "epoch": 1.8518308489954634, "grad_norm": 0.0649481788277626, "learning_rate": 0.00011934380485170351, "loss": 0.2879, "step": 22859 }, { "epoch": 1.8519118600129616, "grad_norm": 0.0539991594851017, "learning_rate": 0.00011933930419910889, "loss": 0.2815, "step": 22860 }, { "epoch": 1.8519928710304603, "grad_norm": 0.056314416229724884, "learning_rate": 0.00011933480354651425, "loss": 0.2749, "step": 22861 }, { "epoch": 1.8520738820479585, "grad_norm": 0.052914902567863464, "learning_rate": 0.00011933030289391962, "loss": 0.2394, "step": 22862 }, { "epoch": 1.8521548930654568, "grad_norm": 0.056165698915719986, "learning_rate": 0.00011932580224132499, "loss": 0.2677, "step": 22863 }, { "epoch": 1.8522359040829552, "grad_norm": 0.06530257314443588, "learning_rate": 0.00011932130158873037, "loss": 0.2919, "step": 22864 }, { "epoch": 1.8523169151004537, "grad_norm": 0.06353907287120819, "learning_rate": 0.00011931680093613575, "loss": 0.2929, "step": 22865 }, { "epoch": 1.852397926117952, "grad_norm": 0.06988402456045151, "learning_rate": 0.00011931230028354113, "loss": 0.2786, "step": 22866 }, { "epoch": 1.8524789371354504, "grad_norm": 0.055794063955545425, "learning_rate": 0.00011930779963094649, "loss": 0.2701, "step": 22867 }, { "epoch": 1.8525599481529489, "grad_norm": 0.06117599457502365, "learning_rate": 0.00011930329897835186, "loss": 0.2802, "step": 22868 }, { "epoch": 1.8526409591704471, "grad_norm": 0.04874401167035103, "learning_rate": 0.00011929879832575723, "loss": 0.2767, "step": 22869 }, { "epoch": 1.8527219701879456, "grad_norm": 0.04821452498435974, "learning_rate": 0.00011929429767316262, "loss": 0.253, "step": 22870 }, { "epoch": 1.852802981205444, "grad_norm": 0.0526653490960598, "learning_rate": 0.000119289797020568, "loss": 0.2845, "step": 22871 }, { "epoch": 1.8528839922229423, "grad_norm": 0.05268080160021782, "learning_rate": 0.00011928529636797337, "loss": 0.2513, "step": 22872 }, { "epoch": 1.8529650032404406, "grad_norm": 0.06358671933412552, "learning_rate": 0.00011928079571537873, "loss": 0.2685, "step": 22873 }, { "epoch": 1.8530460142579392, "grad_norm": 0.04742802679538727, "learning_rate": 0.0001192762950627841, "loss": 0.3032, "step": 22874 }, { "epoch": 1.8531270252754375, "grad_norm": 0.06022234261035919, "learning_rate": 0.00011927179441018948, "loss": 0.2919, "step": 22875 }, { "epoch": 1.8532080362929357, "grad_norm": 0.06292662769556046, "learning_rate": 0.00011926729375759486, "loss": 0.2676, "step": 22876 }, { "epoch": 1.8532890473104342, "grad_norm": 0.0531792975962162, "learning_rate": 0.00011926279310500024, "loss": 0.2299, "step": 22877 }, { "epoch": 1.8533700583279327, "grad_norm": 0.0589904747903347, "learning_rate": 0.00011925829245240561, "loss": 0.263, "step": 22878 }, { "epoch": 1.853451069345431, "grad_norm": 0.05586778372526169, "learning_rate": 0.00011925379179981097, "loss": 0.2393, "step": 22879 }, { "epoch": 1.8535320803629294, "grad_norm": 0.05040878430008888, "learning_rate": 0.00011924929114721634, "loss": 0.3131, "step": 22880 }, { "epoch": 1.8536130913804278, "grad_norm": 0.060290947556495667, "learning_rate": 0.00011924479049462172, "loss": 0.2753, "step": 22881 }, { "epoch": 1.853694102397926, "grad_norm": 0.0618286095559597, "learning_rate": 0.0001192402898420271, "loss": 0.3063, "step": 22882 }, { "epoch": 1.8537751134154243, "grad_norm": 0.06807377934455872, "learning_rate": 0.00011923578918943248, "loss": 0.2963, "step": 22883 }, { "epoch": 1.853856124432923, "grad_norm": 0.0539591945707798, "learning_rate": 0.00011923128853683785, "loss": 0.2659, "step": 22884 }, { "epoch": 1.8539371354504213, "grad_norm": 0.0648304671049118, "learning_rate": 0.00011922678788424321, "loss": 0.2938, "step": 22885 }, { "epoch": 1.8540181464679195, "grad_norm": 0.0472523532807827, "learning_rate": 0.00011922228723164859, "loss": 0.2033, "step": 22886 }, { "epoch": 1.854099157485418, "grad_norm": 0.07764612138271332, "learning_rate": 0.00011921778657905396, "loss": 0.3092, "step": 22887 }, { "epoch": 1.8541801685029164, "grad_norm": 0.05178683251142502, "learning_rate": 0.00011921328592645935, "loss": 0.2803, "step": 22888 }, { "epoch": 1.8542611795204147, "grad_norm": 0.06465435773134232, "learning_rate": 0.00011920878527386472, "loss": 0.2492, "step": 22889 }, { "epoch": 1.8543421905379132, "grad_norm": 0.06032474339008331, "learning_rate": 0.0001192042846212701, "loss": 0.2915, "step": 22890 }, { "epoch": 1.8544232015554116, "grad_norm": 0.06960516422986984, "learning_rate": 0.00011919978396867545, "loss": 0.2857, "step": 22891 }, { "epoch": 1.8545042125729099, "grad_norm": 0.05207527056336403, "learning_rate": 0.00011919528331608083, "loss": 0.2604, "step": 22892 }, { "epoch": 1.8545852235904083, "grad_norm": 0.049169376492500305, "learning_rate": 0.0001191907826634862, "loss": 0.2482, "step": 22893 }, { "epoch": 1.8546662346079068, "grad_norm": 0.047690924257040024, "learning_rate": 0.00011918628201089159, "loss": 0.2336, "step": 22894 }, { "epoch": 1.854747245625405, "grad_norm": 0.05421299859881401, "learning_rate": 0.00011918178135829696, "loss": 0.2772, "step": 22895 }, { "epoch": 1.8548282566429033, "grad_norm": 0.056196488440036774, "learning_rate": 0.00011917728070570234, "loss": 0.2582, "step": 22896 }, { "epoch": 1.8549092676604018, "grad_norm": 0.06981717050075531, "learning_rate": 0.0001191727800531077, "loss": 0.2615, "step": 22897 }, { "epoch": 1.8549902786779002, "grad_norm": 0.06893657892942429, "learning_rate": 0.00011916827940051307, "loss": 0.2834, "step": 22898 }, { "epoch": 1.8550712896953985, "grad_norm": 0.05434175953269005, "learning_rate": 0.00011916377874791847, "loss": 0.2508, "step": 22899 }, { "epoch": 1.855152300712897, "grad_norm": 0.055693451315164566, "learning_rate": 0.00011915927809532383, "loss": 0.2654, "step": 22900 }, { "epoch": 1.8552333117303954, "grad_norm": 0.05152764916419983, "learning_rate": 0.0001191547774427292, "loss": 0.2714, "step": 22901 }, { "epoch": 1.8553143227478937, "grad_norm": 0.05723453685641289, "learning_rate": 0.00011915027679013458, "loss": 0.3002, "step": 22902 }, { "epoch": 1.8553953337653921, "grad_norm": 0.06853441894054413, "learning_rate": 0.00011914577613753994, "loss": 0.281, "step": 22903 }, { "epoch": 1.8554763447828906, "grad_norm": 0.06737224757671356, "learning_rate": 0.00011914127548494531, "loss": 0.2943, "step": 22904 }, { "epoch": 1.8555573558003888, "grad_norm": 0.061686307191848755, "learning_rate": 0.00011913677483235071, "loss": 0.2613, "step": 22905 }, { "epoch": 1.855638366817887, "grad_norm": 0.05410462245345116, "learning_rate": 0.00011913227417975607, "loss": 0.2382, "step": 22906 }, { "epoch": 1.8557193778353858, "grad_norm": 0.05027051642537117, "learning_rate": 0.00011912777352716145, "loss": 0.2349, "step": 22907 }, { "epoch": 1.855800388852884, "grad_norm": 0.06542790681123734, "learning_rate": 0.00011912327287456682, "loss": 0.2648, "step": 22908 }, { "epoch": 1.8558813998703823, "grad_norm": 0.06300154328346252, "learning_rate": 0.00011911877222197219, "loss": 0.2792, "step": 22909 }, { "epoch": 1.8559624108878807, "grad_norm": 0.05228601396083832, "learning_rate": 0.00011911427156937755, "loss": 0.2633, "step": 22910 }, { "epoch": 1.8560434219053792, "grad_norm": 0.05761516094207764, "learning_rate": 0.00011910977091678295, "loss": 0.271, "step": 22911 }, { "epoch": 1.8561244329228774, "grad_norm": 0.05890168249607086, "learning_rate": 0.00011910527026418831, "loss": 0.297, "step": 22912 }, { "epoch": 1.856205443940376, "grad_norm": 0.056910499930381775, "learning_rate": 0.00011910076961159369, "loss": 0.2503, "step": 22913 }, { "epoch": 1.8562864549578744, "grad_norm": 0.06466339528560638, "learning_rate": 0.00011909626895899906, "loss": 0.2915, "step": 22914 }, { "epoch": 1.8563674659753726, "grad_norm": 0.060335755348205566, "learning_rate": 0.00011909176830640443, "loss": 0.28, "step": 22915 }, { "epoch": 1.856448476992871, "grad_norm": 0.05601664260029793, "learning_rate": 0.0001190872676538098, "loss": 0.2845, "step": 22916 }, { "epoch": 1.8565294880103695, "grad_norm": 0.04754171147942543, "learning_rate": 0.0001190827670012152, "loss": 0.2347, "step": 22917 }, { "epoch": 1.8566104990278678, "grad_norm": 0.06566859036684036, "learning_rate": 0.00011907826634862055, "loss": 0.295, "step": 22918 }, { "epoch": 1.856691510045366, "grad_norm": 0.043993715196847916, "learning_rate": 0.00011907376569602593, "loss": 0.2561, "step": 22919 }, { "epoch": 1.8567725210628645, "grad_norm": 0.05596703663468361, "learning_rate": 0.0001190692650434313, "loss": 0.276, "step": 22920 }, { "epoch": 1.856853532080363, "grad_norm": 0.04608578234910965, "learning_rate": 0.00011906476439083668, "loss": 0.2464, "step": 22921 }, { "epoch": 1.8569345430978612, "grad_norm": 0.06296337395906448, "learning_rate": 0.00011906026373824206, "loss": 0.3519, "step": 22922 }, { "epoch": 1.8570155541153597, "grad_norm": 0.05028759315609932, "learning_rate": 0.00011905576308564744, "loss": 0.2515, "step": 22923 }, { "epoch": 1.8570965651328581, "grad_norm": 0.05610145628452301, "learning_rate": 0.0001190512624330528, "loss": 0.2745, "step": 22924 }, { "epoch": 1.8571775761503564, "grad_norm": 0.05411072447896004, "learning_rate": 0.00011904676178045817, "loss": 0.2521, "step": 22925 }, { "epoch": 1.8572585871678549, "grad_norm": 0.047831691801548004, "learning_rate": 0.00011904226112786354, "loss": 0.2593, "step": 22926 }, { "epoch": 1.8573395981853533, "grad_norm": 0.045878808945417404, "learning_rate": 0.00011903776047526892, "loss": 0.2364, "step": 22927 }, { "epoch": 1.8574206092028516, "grad_norm": 0.05370170623064041, "learning_rate": 0.0001190332598226743, "loss": 0.2517, "step": 22928 }, { "epoch": 1.8575016202203498, "grad_norm": 0.06451074033975601, "learning_rate": 0.00011902875917007968, "loss": 0.3187, "step": 22929 }, { "epoch": 1.8575826312378485, "grad_norm": 0.0566953606903553, "learning_rate": 0.00011902425851748504, "loss": 0.2799, "step": 22930 }, { "epoch": 1.8576636422553467, "grad_norm": 0.06248341128230095, "learning_rate": 0.00011901975786489041, "loss": 0.295, "step": 22931 }, { "epoch": 1.857744653272845, "grad_norm": 0.05930943787097931, "learning_rate": 0.00011901525721229579, "loss": 0.2952, "step": 22932 }, { "epoch": 1.8578256642903435, "grad_norm": 0.05437745898962021, "learning_rate": 0.00011901075655970116, "loss": 0.2561, "step": 22933 }, { "epoch": 1.857906675307842, "grad_norm": 0.04583446681499481, "learning_rate": 0.00011900625590710655, "loss": 0.2377, "step": 22934 }, { "epoch": 1.8579876863253402, "grad_norm": 0.06268307566642761, "learning_rate": 0.00011900175525451192, "loss": 0.2747, "step": 22935 }, { "epoch": 1.8580686973428386, "grad_norm": 0.05475815385580063, "learning_rate": 0.00011899725460191728, "loss": 0.2221, "step": 22936 }, { "epoch": 1.858149708360337, "grad_norm": 0.06827379018068314, "learning_rate": 0.00011899275394932265, "loss": 0.2984, "step": 22937 }, { "epoch": 1.8582307193778353, "grad_norm": 0.046038705855607986, "learning_rate": 0.00011898825329672803, "loss": 0.2572, "step": 22938 }, { "epoch": 1.8583117303953338, "grad_norm": 0.0644463449716568, "learning_rate": 0.0001189837526441334, "loss": 0.278, "step": 22939 }, { "epoch": 1.8583927414128323, "grad_norm": 0.061292245984077454, "learning_rate": 0.00011897925199153879, "loss": 0.2861, "step": 22940 }, { "epoch": 1.8584737524303305, "grad_norm": 0.05138443410396576, "learning_rate": 0.00011897475133894416, "loss": 0.2923, "step": 22941 }, { "epoch": 1.8585547634478288, "grad_norm": 0.046949900686740875, "learning_rate": 0.00011897025068634952, "loss": 0.2688, "step": 22942 }, { "epoch": 1.8586357744653272, "grad_norm": 0.052126020193099976, "learning_rate": 0.0001189657500337549, "loss": 0.2404, "step": 22943 }, { "epoch": 1.8587167854828257, "grad_norm": 0.060159020125865936, "learning_rate": 0.00011896124938116027, "loss": 0.2904, "step": 22944 }, { "epoch": 1.858797796500324, "grad_norm": 0.05633169785141945, "learning_rate": 0.00011895674872856564, "loss": 0.2633, "step": 22945 }, { "epoch": 1.8588788075178224, "grad_norm": 0.05782944709062576, "learning_rate": 0.00011895224807597103, "loss": 0.2877, "step": 22946 }, { "epoch": 1.8589598185353209, "grad_norm": 0.04998238384723663, "learning_rate": 0.0001189477474233764, "loss": 0.2682, "step": 22947 }, { "epoch": 1.8590408295528191, "grad_norm": 0.06594345718622208, "learning_rate": 0.00011894324677078176, "loss": 0.3158, "step": 22948 }, { "epoch": 1.8591218405703176, "grad_norm": 0.06773028522729874, "learning_rate": 0.00011893874611818714, "loss": 0.2936, "step": 22949 }, { "epoch": 1.859202851587816, "grad_norm": 0.04972933232784271, "learning_rate": 0.00011893424546559251, "loss": 0.2655, "step": 22950 }, { "epoch": 1.8592838626053143, "grad_norm": 0.06286988407373428, "learning_rate": 0.0001189297448129979, "loss": 0.295, "step": 22951 }, { "epoch": 1.8593648736228126, "grad_norm": 0.05481676012277603, "learning_rate": 0.00011892524416040327, "loss": 0.2801, "step": 22952 }, { "epoch": 1.8594458846403112, "grad_norm": 0.060597535222768784, "learning_rate": 0.00011892074350780864, "loss": 0.2892, "step": 22953 }, { "epoch": 1.8595268956578095, "grad_norm": 0.06514524668455124, "learning_rate": 0.000118916242855214, "loss": 0.2844, "step": 22954 }, { "epoch": 1.8596079066753077, "grad_norm": 0.051577962934970856, "learning_rate": 0.00011891174220261938, "loss": 0.2692, "step": 22955 }, { "epoch": 1.8596889176928062, "grad_norm": 0.0652921050786972, "learning_rate": 0.00011890724155002475, "loss": 0.2766, "step": 22956 }, { "epoch": 1.8597699287103047, "grad_norm": 0.05151598900556564, "learning_rate": 0.00011890274089743014, "loss": 0.2708, "step": 22957 }, { "epoch": 1.859850939727803, "grad_norm": 0.05443265661597252, "learning_rate": 0.00011889824024483551, "loss": 0.2313, "step": 22958 }, { "epoch": 1.8599319507453014, "grad_norm": 0.06853888183832169, "learning_rate": 0.00011889373959224089, "loss": 0.3092, "step": 22959 }, { "epoch": 1.8600129617627998, "grad_norm": 0.06651543080806732, "learning_rate": 0.00011888923893964625, "loss": 0.324, "step": 22960 }, { "epoch": 1.860093972780298, "grad_norm": 0.06134466454386711, "learning_rate": 0.00011888473828705162, "loss": 0.2937, "step": 22961 }, { "epoch": 1.8601749837977966, "grad_norm": 0.10911976546049118, "learning_rate": 0.000118880237634457, "loss": 0.2954, "step": 22962 }, { "epoch": 1.860255994815295, "grad_norm": 0.07487057894468307, "learning_rate": 0.00011887573698186238, "loss": 0.3127, "step": 22963 }, { "epoch": 1.8603370058327933, "grad_norm": 0.04409882053732872, "learning_rate": 0.00011887123632926775, "loss": 0.247, "step": 22964 }, { "epoch": 1.8604180168502915, "grad_norm": 0.06299306452274323, "learning_rate": 0.00011886673567667313, "loss": 0.304, "step": 22965 }, { "epoch": 1.86049902786779, "grad_norm": 0.06553803384304047, "learning_rate": 0.00011886223502407849, "loss": 0.2936, "step": 22966 }, { "epoch": 1.8605800388852884, "grad_norm": 0.05951612442731857, "learning_rate": 0.00011885773437148386, "loss": 0.235, "step": 22967 }, { "epoch": 1.8606610499027867, "grad_norm": 0.05883181095123291, "learning_rate": 0.00011885323371888924, "loss": 0.2694, "step": 22968 }, { "epoch": 1.8607420609202852, "grad_norm": 0.0556342713534832, "learning_rate": 0.00011884873306629462, "loss": 0.2635, "step": 22969 }, { "epoch": 1.8608230719377836, "grad_norm": 0.06378420442342758, "learning_rate": 0.0001188442324137, "loss": 0.2705, "step": 22970 }, { "epoch": 1.8609040829552819, "grad_norm": 0.059256576001644135, "learning_rate": 0.00011883973176110537, "loss": 0.2802, "step": 22971 }, { "epoch": 1.8609850939727803, "grad_norm": 0.06668049097061157, "learning_rate": 0.00011883523110851073, "loss": 0.2518, "step": 22972 }, { "epoch": 1.8610661049902788, "grad_norm": 0.06027417257428169, "learning_rate": 0.0001188307304559161, "loss": 0.2843, "step": 22973 }, { "epoch": 1.861147116007777, "grad_norm": 0.06474443525075912, "learning_rate": 0.0001188262298033215, "loss": 0.3096, "step": 22974 }, { "epoch": 1.8612281270252753, "grad_norm": 0.05829734355211258, "learning_rate": 0.00011882172915072686, "loss": 0.2901, "step": 22975 }, { "epoch": 1.861309138042774, "grad_norm": 0.06739259511232376, "learning_rate": 0.00011881722849813224, "loss": 0.2961, "step": 22976 }, { "epoch": 1.8613901490602722, "grad_norm": 0.06400362402200699, "learning_rate": 0.00011881272784553761, "loss": 0.3034, "step": 22977 }, { "epoch": 1.8614711600777705, "grad_norm": 0.06838753819465637, "learning_rate": 0.00011880822719294298, "loss": 0.3016, "step": 22978 }, { "epoch": 1.861552171095269, "grad_norm": 0.06485739350318909, "learning_rate": 0.00011880372654034834, "loss": 0.2499, "step": 22979 }, { "epoch": 1.8616331821127674, "grad_norm": 0.06050259992480278, "learning_rate": 0.00011879922588775375, "loss": 0.269, "step": 22980 }, { "epoch": 1.8617141931302656, "grad_norm": 0.06689231097698212, "learning_rate": 0.0001187947252351591, "loss": 0.2854, "step": 22981 }, { "epoch": 1.8617952041477641, "grad_norm": 0.0670228898525238, "learning_rate": 0.00011879022458256448, "loss": 0.304, "step": 22982 }, { "epoch": 1.8618762151652626, "grad_norm": 0.05257980152964592, "learning_rate": 0.00011878572392996985, "loss": 0.2687, "step": 22983 }, { "epoch": 1.8619572261827608, "grad_norm": 0.05486253276467323, "learning_rate": 0.00011878122327737523, "loss": 0.3058, "step": 22984 }, { "epoch": 1.862038237200259, "grad_norm": 0.05932844430208206, "learning_rate": 0.00011877672262478059, "loss": 0.2972, "step": 22985 }, { "epoch": 1.8621192482177578, "grad_norm": 0.05444946512579918, "learning_rate": 0.00011877222197218599, "loss": 0.2695, "step": 22986 }, { "epoch": 1.862200259235256, "grad_norm": 0.062618188560009, "learning_rate": 0.00011876772131959135, "loss": 0.2801, "step": 22987 }, { "epoch": 1.8622812702527543, "grad_norm": 0.061284326016902924, "learning_rate": 0.00011876322066699672, "loss": 0.282, "step": 22988 }, { "epoch": 1.8623622812702527, "grad_norm": 0.049036409705877304, "learning_rate": 0.0001187587200144021, "loss": 0.2739, "step": 22989 }, { "epoch": 1.8624432922877512, "grad_norm": 0.053588204085826874, "learning_rate": 0.00011875421936180747, "loss": 0.2873, "step": 22990 }, { "epoch": 1.8625243033052494, "grad_norm": 0.06565236300230026, "learning_rate": 0.00011874971870921283, "loss": 0.2702, "step": 22991 }, { "epoch": 1.862605314322748, "grad_norm": 0.06319651007652283, "learning_rate": 0.00011874521805661823, "loss": 0.2731, "step": 22992 }, { "epoch": 1.8626863253402464, "grad_norm": 0.0632275864481926, "learning_rate": 0.00011874071740402359, "loss": 0.3067, "step": 22993 }, { "epoch": 1.8627673363577446, "grad_norm": 0.06545563787221909, "learning_rate": 0.00011873621675142896, "loss": 0.3187, "step": 22994 }, { "epoch": 1.862848347375243, "grad_norm": 0.0605737641453743, "learning_rate": 0.00011873171609883434, "loss": 0.2951, "step": 22995 }, { "epoch": 1.8629293583927415, "grad_norm": 0.051222190260887146, "learning_rate": 0.00011872721544623971, "loss": 0.246, "step": 22996 }, { "epoch": 1.8630103694102398, "grad_norm": 0.054123520851135254, "learning_rate": 0.00011872271479364507, "loss": 0.2858, "step": 22997 }, { "epoch": 1.863091380427738, "grad_norm": 0.0514240488409996, "learning_rate": 0.00011871821414105047, "loss": 0.225, "step": 22998 }, { "epoch": 1.8631723914452365, "grad_norm": 0.05652325227856636, "learning_rate": 0.00011871371348845583, "loss": 0.2703, "step": 22999 }, { "epoch": 1.863253402462735, "grad_norm": 0.0580107681453228, "learning_rate": 0.0001187092128358612, "loss": 0.2836, "step": 23000 }, { "epoch": 1.8633344134802332, "grad_norm": 0.0637790709733963, "learning_rate": 0.00011870471218326658, "loss": 0.2706, "step": 23001 }, { "epoch": 1.8634154244977317, "grad_norm": 0.053458504378795624, "learning_rate": 0.00011870021153067195, "loss": 0.2236, "step": 23002 }, { "epoch": 1.8634964355152301, "grad_norm": 0.0517844595015049, "learning_rate": 0.00011869571087807734, "loss": 0.2756, "step": 23003 }, { "epoch": 1.8635774465327284, "grad_norm": 0.06121333688497543, "learning_rate": 0.00011869121022548271, "loss": 0.2615, "step": 23004 }, { "epoch": 1.8636584575502269, "grad_norm": 0.07658272236585617, "learning_rate": 0.00011868670957288807, "loss": 0.2696, "step": 23005 }, { "epoch": 1.8637394685677253, "grad_norm": 0.06371904164552689, "learning_rate": 0.00011868220892029345, "loss": 0.2684, "step": 23006 }, { "epoch": 1.8638204795852236, "grad_norm": 0.05698368325829506, "learning_rate": 0.00011867770826769882, "loss": 0.2737, "step": 23007 }, { "epoch": 1.8639014906027218, "grad_norm": 0.065656878054142, "learning_rate": 0.00011867320761510419, "loss": 0.2917, "step": 23008 }, { "epoch": 1.8639825016202205, "grad_norm": 0.053923673927783966, "learning_rate": 0.00011866870696250958, "loss": 0.2278, "step": 23009 }, { "epoch": 1.8640635126377187, "grad_norm": 0.06553805619478226, "learning_rate": 0.00011866420630991495, "loss": 0.2594, "step": 23010 }, { "epoch": 1.864144523655217, "grad_norm": 0.05535467341542244, "learning_rate": 0.00011865970565732031, "loss": 0.2843, "step": 23011 }, { "epoch": 1.8642255346727155, "grad_norm": 0.05905730649828911, "learning_rate": 0.00011865520500472569, "loss": 0.2295, "step": 23012 }, { "epoch": 1.864306545690214, "grad_norm": 0.0544164814054966, "learning_rate": 0.00011865070435213106, "loss": 0.2827, "step": 23013 }, { "epoch": 1.8643875567077122, "grad_norm": 0.055385880172252655, "learning_rate": 0.00011864620369953643, "loss": 0.2745, "step": 23014 }, { "epoch": 1.8644685677252106, "grad_norm": 0.064468614757061, "learning_rate": 0.00011864170304694182, "loss": 0.3074, "step": 23015 }, { "epoch": 1.864549578742709, "grad_norm": 0.05835357680916786, "learning_rate": 0.0001186372023943472, "loss": 0.29, "step": 23016 }, { "epoch": 1.8646305897602073, "grad_norm": 0.05793554335832596, "learning_rate": 0.00011863270174175256, "loss": 0.2754, "step": 23017 }, { "epoch": 1.8647116007777058, "grad_norm": 0.0771193578839302, "learning_rate": 0.00011862820108915793, "loss": 0.2701, "step": 23018 }, { "epoch": 1.8647926117952043, "grad_norm": 0.05392744392156601, "learning_rate": 0.0001186237004365633, "loss": 0.3021, "step": 23019 }, { "epoch": 1.8648736228127025, "grad_norm": 0.05138259381055832, "learning_rate": 0.00011861919978396868, "loss": 0.2318, "step": 23020 }, { "epoch": 1.8649546338302008, "grad_norm": 0.05715327709913254, "learning_rate": 0.00011861469913137406, "loss": 0.2691, "step": 23021 }, { "epoch": 1.8650356448476992, "grad_norm": 0.07019861042499542, "learning_rate": 0.00011861019847877944, "loss": 0.2658, "step": 23022 }, { "epoch": 1.8651166558651977, "grad_norm": 0.05146124213933945, "learning_rate": 0.0001186056978261848, "loss": 0.2548, "step": 23023 }, { "epoch": 1.865197666882696, "grad_norm": 0.06404336541891098, "learning_rate": 0.00011860119717359017, "loss": 0.2954, "step": 23024 }, { "epoch": 1.8652786779001944, "grad_norm": 0.05009884759783745, "learning_rate": 0.00011859669652099554, "loss": 0.3042, "step": 23025 }, { "epoch": 1.8653596889176929, "grad_norm": 0.060626138001680374, "learning_rate": 0.00011859219586840093, "loss": 0.3105, "step": 23026 }, { "epoch": 1.8654406999351911, "grad_norm": 0.055893126875162125, "learning_rate": 0.0001185876952158063, "loss": 0.2749, "step": 23027 }, { "epoch": 1.8655217109526896, "grad_norm": 0.055800460278987885, "learning_rate": 0.00011858319456321168, "loss": 0.2762, "step": 23028 }, { "epoch": 1.865602721970188, "grad_norm": 0.05607381463050842, "learning_rate": 0.00011857869391061704, "loss": 0.2577, "step": 23029 }, { "epoch": 1.8656837329876863, "grad_norm": 0.054439570754766464, "learning_rate": 0.00011857419325802241, "loss": 0.2712, "step": 23030 }, { "epoch": 1.8657647440051845, "grad_norm": 0.04881187528371811, "learning_rate": 0.00011856969260542779, "loss": 0.2301, "step": 23031 }, { "epoch": 1.8658457550226832, "grad_norm": 0.05401236563920975, "learning_rate": 0.00011856519195283317, "loss": 0.2479, "step": 23032 }, { "epoch": 1.8659267660401815, "grad_norm": 0.05074911192059517, "learning_rate": 0.00011856069130023855, "loss": 0.2441, "step": 23033 }, { "epoch": 1.8660077770576797, "grad_norm": 0.04326274245977402, "learning_rate": 0.00011855619064764392, "loss": 0.2199, "step": 23034 }, { "epoch": 1.8660887880751782, "grad_norm": 0.07047049701213837, "learning_rate": 0.00011855168999504928, "loss": 0.3273, "step": 23035 }, { "epoch": 1.8661697990926767, "grad_norm": 0.05269942805171013, "learning_rate": 0.00011854718934245465, "loss": 0.2514, "step": 23036 }, { "epoch": 1.866250810110175, "grad_norm": 0.05708518996834755, "learning_rate": 0.00011854268868986003, "loss": 0.2811, "step": 23037 }, { "epoch": 1.8663318211276734, "grad_norm": 0.05749276280403137, "learning_rate": 0.00011853818803726541, "loss": 0.264, "step": 23038 }, { "epoch": 1.8664128321451718, "grad_norm": 0.064975805580616, "learning_rate": 0.00011853368738467079, "loss": 0.3185, "step": 23039 }, { "epoch": 1.86649384316267, "grad_norm": 0.05995302274823189, "learning_rate": 0.00011852918673207616, "loss": 0.2414, "step": 23040 }, { "epoch": 1.8665748541801686, "grad_norm": 0.05492790415883064, "learning_rate": 0.00011852468607948152, "loss": 0.2714, "step": 23041 }, { "epoch": 1.866655865197667, "grad_norm": 0.06112586334347725, "learning_rate": 0.0001185201854268869, "loss": 0.2785, "step": 23042 }, { "epoch": 1.8667368762151653, "grad_norm": 0.05609976127743721, "learning_rate": 0.00011851568477429227, "loss": 0.2709, "step": 23043 }, { "epoch": 1.8668178872326635, "grad_norm": 0.06333284080028534, "learning_rate": 0.00011851118412169766, "loss": 0.3069, "step": 23044 }, { "epoch": 1.866898898250162, "grad_norm": 0.053372643887996674, "learning_rate": 0.00011850668346910303, "loss": 0.2847, "step": 23045 }, { "epoch": 1.8669799092676604, "grad_norm": 0.06115709990262985, "learning_rate": 0.0001185021828165084, "loss": 0.2317, "step": 23046 }, { "epoch": 1.8670609202851587, "grad_norm": 0.04994625970721245, "learning_rate": 0.00011849768216391378, "loss": 0.2491, "step": 23047 }, { "epoch": 1.8671419313026572, "grad_norm": 0.049292415380477905, "learning_rate": 0.00011849318151131914, "loss": 0.2145, "step": 23048 }, { "epoch": 1.8672229423201556, "grad_norm": 0.05687323212623596, "learning_rate": 0.00011848868085872451, "loss": 0.2504, "step": 23049 }, { "epoch": 1.8673039533376539, "grad_norm": 0.05730355903506279, "learning_rate": 0.0001184841802061299, "loss": 0.238, "step": 23050 }, { "epoch": 1.8673849643551523, "grad_norm": 0.062119435518980026, "learning_rate": 0.00011847967955353527, "loss": 0.2294, "step": 23051 }, { "epoch": 1.8674659753726508, "grad_norm": 0.05226528272032738, "learning_rate": 0.00011847517890094064, "loss": 0.295, "step": 23052 }, { "epoch": 1.867546986390149, "grad_norm": 0.07073201984167099, "learning_rate": 0.00011847067824834602, "loss": 0.277, "step": 23053 }, { "epoch": 1.8676279974076473, "grad_norm": 0.0602298267185688, "learning_rate": 0.00011846617759575138, "loss": 0.2832, "step": 23054 }, { "epoch": 1.867709008425146, "grad_norm": 0.06587371230125427, "learning_rate": 0.00011846167694315678, "loss": 0.2758, "step": 23055 }, { "epoch": 1.8677900194426442, "grad_norm": 0.06909722834825516, "learning_rate": 0.00011845717629056214, "loss": 0.2748, "step": 23056 }, { "epoch": 1.8678710304601425, "grad_norm": 0.055311381816864014, "learning_rate": 0.00011845267563796751, "loss": 0.2827, "step": 23057 }, { "epoch": 1.867952041477641, "grad_norm": 0.04789602756500244, "learning_rate": 0.00011844817498537289, "loss": 0.2163, "step": 23058 }, { "epoch": 1.8680330524951394, "grad_norm": 0.06209869682788849, "learning_rate": 0.00011844367433277826, "loss": 0.2775, "step": 23059 }, { "epoch": 1.8681140635126376, "grad_norm": 0.05467294529080391, "learning_rate": 0.00011843917368018362, "loss": 0.2751, "step": 23060 }, { "epoch": 1.8681950745301361, "grad_norm": 0.061567649245262146, "learning_rate": 0.00011843467302758902, "loss": 0.2685, "step": 23061 }, { "epoch": 1.8682760855476346, "grad_norm": 0.06625506281852722, "learning_rate": 0.00011843017237499438, "loss": 0.2651, "step": 23062 }, { "epoch": 1.8683570965651328, "grad_norm": 0.05260910838842392, "learning_rate": 0.00011842567172239975, "loss": 0.2451, "step": 23063 }, { "epoch": 1.8684381075826313, "grad_norm": 0.04116278514266014, "learning_rate": 0.00011842117106980513, "loss": 0.2206, "step": 23064 }, { "epoch": 1.8685191186001298, "grad_norm": 0.05465328320860863, "learning_rate": 0.0001184166704172105, "loss": 0.2535, "step": 23065 }, { "epoch": 1.868600129617628, "grad_norm": 0.06118228659033775, "learning_rate": 0.00011841216976461586, "loss": 0.2564, "step": 23066 }, { "epoch": 1.8686811406351262, "grad_norm": 0.045729734003543854, "learning_rate": 0.00011840766911202126, "loss": 0.2892, "step": 23067 }, { "epoch": 1.8687621516526247, "grad_norm": 0.06286770105361938, "learning_rate": 0.00011840316845942662, "loss": 0.3109, "step": 23068 }, { "epoch": 1.8688431626701232, "grad_norm": 0.05661400035023689, "learning_rate": 0.000118398667806832, "loss": 0.2494, "step": 23069 }, { "epoch": 1.8689241736876214, "grad_norm": 0.059357624500989914, "learning_rate": 0.00011839416715423737, "loss": 0.2707, "step": 23070 }, { "epoch": 1.86900518470512, "grad_norm": 0.05716922879219055, "learning_rate": 0.00011838966650164274, "loss": 0.2725, "step": 23071 }, { "epoch": 1.8690861957226184, "grad_norm": 0.05681823939085007, "learning_rate": 0.0001183851658490481, "loss": 0.2519, "step": 23072 }, { "epoch": 1.8691672067401166, "grad_norm": 0.05614323541522026, "learning_rate": 0.0001183806651964535, "loss": 0.2836, "step": 23073 }, { "epoch": 1.869248217757615, "grad_norm": 0.059444133192300797, "learning_rate": 0.00011837616454385886, "loss": 0.278, "step": 23074 }, { "epoch": 1.8693292287751135, "grad_norm": 0.0667402595281601, "learning_rate": 0.00011837166389126424, "loss": 0.2654, "step": 23075 }, { "epoch": 1.8694102397926118, "grad_norm": 0.059592802077531815, "learning_rate": 0.00011836716323866961, "loss": 0.2652, "step": 23076 }, { "epoch": 1.86949125081011, "grad_norm": 0.05447734519839287, "learning_rate": 0.00011836266258607498, "loss": 0.2343, "step": 23077 }, { "epoch": 1.8695722618276087, "grad_norm": 0.06101779267191887, "learning_rate": 0.00011835816193348035, "loss": 0.3185, "step": 23078 }, { "epoch": 1.869653272845107, "grad_norm": 0.05614207684993744, "learning_rate": 0.00011835366128088575, "loss": 0.2759, "step": 23079 }, { "epoch": 1.8697342838626052, "grad_norm": 0.05248326435685158, "learning_rate": 0.0001183491606282911, "loss": 0.2659, "step": 23080 }, { "epoch": 1.8698152948801037, "grad_norm": 0.07007522135972977, "learning_rate": 0.00011834465997569648, "loss": 0.3193, "step": 23081 }, { "epoch": 1.8698963058976021, "grad_norm": 0.04993303865194321, "learning_rate": 0.00011834015932310185, "loss": 0.226, "step": 23082 }, { "epoch": 1.8699773169151004, "grad_norm": 0.058122795075178146, "learning_rate": 0.00011833565867050723, "loss": 0.263, "step": 23083 }, { "epoch": 1.8700583279325989, "grad_norm": 0.049717120826244354, "learning_rate": 0.00011833115801791261, "loss": 0.2541, "step": 23084 }, { "epoch": 1.8701393389500973, "grad_norm": 0.06780257821083069, "learning_rate": 0.00011832665736531799, "loss": 0.2463, "step": 23085 }, { "epoch": 1.8702203499675956, "grad_norm": 0.06661541759967804, "learning_rate": 0.00011832215671272335, "loss": 0.3024, "step": 23086 }, { "epoch": 1.8703013609850938, "grad_norm": 0.050316132605075836, "learning_rate": 0.00011831765606012872, "loss": 0.2392, "step": 23087 }, { "epoch": 1.8703823720025925, "grad_norm": 0.07037214189767838, "learning_rate": 0.0001183131554075341, "loss": 0.281, "step": 23088 }, { "epoch": 1.8704633830200907, "grad_norm": 0.08435565233230591, "learning_rate": 0.00011830865475493947, "loss": 0.3307, "step": 23089 }, { "epoch": 1.870544394037589, "grad_norm": 0.06200847774744034, "learning_rate": 0.00011830415410234486, "loss": 0.2522, "step": 23090 }, { "epoch": 1.8706254050550875, "grad_norm": 0.05395513400435448, "learning_rate": 0.00011829965344975023, "loss": 0.28, "step": 23091 }, { "epoch": 1.870706416072586, "grad_norm": 0.047511208802461624, "learning_rate": 0.00011829515279715559, "loss": 0.2718, "step": 23092 }, { "epoch": 1.8707874270900842, "grad_norm": 0.05486256256699562, "learning_rate": 0.00011829065214456096, "loss": 0.2614, "step": 23093 }, { "epoch": 1.8708684381075826, "grad_norm": 0.06447124481201172, "learning_rate": 0.00011828615149196634, "loss": 0.2584, "step": 23094 }, { "epoch": 1.870949449125081, "grad_norm": 0.05701237916946411, "learning_rate": 0.00011828165083937171, "loss": 0.3066, "step": 23095 }, { "epoch": 1.8710304601425793, "grad_norm": 0.06130368262529373, "learning_rate": 0.0001182771501867771, "loss": 0.281, "step": 23096 }, { "epoch": 1.8711114711600778, "grad_norm": 0.05731342360377312, "learning_rate": 0.00011827264953418247, "loss": 0.2568, "step": 23097 }, { "epoch": 1.8711924821775763, "grad_norm": 0.06326236575841904, "learning_rate": 0.00011826814888158783, "loss": 0.2683, "step": 23098 }, { "epoch": 1.8712734931950745, "grad_norm": 0.04846350848674774, "learning_rate": 0.0001182636482289932, "loss": 0.2413, "step": 23099 }, { "epoch": 1.8713545042125728, "grad_norm": 0.059385381639003754, "learning_rate": 0.00011825914757639858, "loss": 0.2771, "step": 23100 }, { "epoch": 1.8714355152300715, "grad_norm": 0.051562074571847916, "learning_rate": 0.00011825464692380395, "loss": 0.2283, "step": 23101 }, { "epoch": 1.8715165262475697, "grad_norm": 0.05749623849987984, "learning_rate": 0.00011825014627120934, "loss": 0.2467, "step": 23102 }, { "epoch": 1.871597537265068, "grad_norm": 0.05762708559632301, "learning_rate": 0.00011824564561861471, "loss": 0.3291, "step": 23103 }, { "epoch": 1.8716785482825664, "grad_norm": 0.07784654945135117, "learning_rate": 0.00011824114496602007, "loss": 0.3403, "step": 23104 }, { "epoch": 1.8717595593000649, "grad_norm": 0.07061202079057693, "learning_rate": 0.00011823664431342545, "loss": 0.2642, "step": 23105 }, { "epoch": 1.8718405703175631, "grad_norm": 0.07585369050502777, "learning_rate": 0.00011823214366083082, "loss": 0.331, "step": 23106 }, { "epoch": 1.8719215813350616, "grad_norm": 0.05564757063984871, "learning_rate": 0.0001182276430082362, "loss": 0.2967, "step": 23107 }, { "epoch": 1.87200259235256, "grad_norm": 0.07339231669902802, "learning_rate": 0.00011822314235564158, "loss": 0.2784, "step": 23108 }, { "epoch": 1.8720836033700583, "grad_norm": 0.06239980831742287, "learning_rate": 0.00011821864170304695, "loss": 0.2765, "step": 23109 }, { "epoch": 1.8721646143875565, "grad_norm": 0.06124987080693245, "learning_rate": 0.00011821414105045231, "loss": 0.2962, "step": 23110 }, { "epoch": 1.8722456254050552, "grad_norm": 0.05769716203212738, "learning_rate": 0.00011820964039785769, "loss": 0.3079, "step": 23111 }, { "epoch": 1.8723266364225535, "grad_norm": 0.07039839774370193, "learning_rate": 0.00011820513974526306, "loss": 0.2915, "step": 23112 }, { "epoch": 1.8724076474400517, "grad_norm": 0.05476633086800575, "learning_rate": 0.00011820063909266845, "loss": 0.2654, "step": 23113 }, { "epoch": 1.8724886584575502, "grad_norm": 0.04585075005888939, "learning_rate": 0.00011819613844007382, "loss": 0.2339, "step": 23114 }, { "epoch": 1.8725696694750487, "grad_norm": 0.058606356382369995, "learning_rate": 0.0001181916377874792, "loss": 0.2471, "step": 23115 }, { "epoch": 1.872650680492547, "grad_norm": 0.0552472360432148, "learning_rate": 0.00011818713713488457, "loss": 0.2488, "step": 23116 }, { "epoch": 1.8727316915100454, "grad_norm": 0.04606689140200615, "learning_rate": 0.00011818263648228993, "loss": 0.2438, "step": 23117 }, { "epoch": 1.8728127025275438, "grad_norm": 0.05656420439481735, "learning_rate": 0.0001181781358296953, "loss": 0.3072, "step": 23118 }, { "epoch": 1.872893713545042, "grad_norm": 0.06550390273332596, "learning_rate": 0.00011817363517710069, "loss": 0.2704, "step": 23119 }, { "epoch": 1.8729747245625405, "grad_norm": 0.05345557630062103, "learning_rate": 0.00011816913452450606, "loss": 0.2793, "step": 23120 }, { "epoch": 1.873055735580039, "grad_norm": 0.06245110183954239, "learning_rate": 0.00011816463387191144, "loss": 0.2841, "step": 23121 }, { "epoch": 1.8731367465975373, "grad_norm": 0.055512577295303345, "learning_rate": 0.00011816013321931681, "loss": 0.2806, "step": 23122 }, { "epoch": 1.8732177576150355, "grad_norm": 0.0590149387717247, "learning_rate": 0.00011815563256672217, "loss": 0.2875, "step": 23123 }, { "epoch": 1.873298768632534, "grad_norm": 0.05215387046337128, "learning_rate": 0.00011815113191412754, "loss": 0.2524, "step": 23124 }, { "epoch": 1.8733797796500324, "grad_norm": 0.05460435152053833, "learning_rate": 0.00011814663126153293, "loss": 0.2551, "step": 23125 }, { "epoch": 1.8734607906675307, "grad_norm": 0.06229287013411522, "learning_rate": 0.0001181421306089383, "loss": 0.2828, "step": 23126 }, { "epoch": 1.8735418016850292, "grad_norm": 0.06163933128118515, "learning_rate": 0.00011813762995634368, "loss": 0.2937, "step": 23127 }, { "epoch": 1.8736228127025276, "grad_norm": 0.0614483542740345, "learning_rate": 0.00011813312930374905, "loss": 0.2905, "step": 23128 }, { "epoch": 1.8737038237200259, "grad_norm": 0.06532017141580582, "learning_rate": 0.00011812862865115441, "loss": 0.2622, "step": 23129 }, { "epoch": 1.8737848347375243, "grad_norm": 0.0652865469455719, "learning_rate": 0.00011812412799855979, "loss": 0.3087, "step": 23130 }, { "epoch": 1.8738658457550228, "grad_norm": 0.05394424498081207, "learning_rate": 0.00011811962734596517, "loss": 0.2498, "step": 23131 }, { "epoch": 1.873946856772521, "grad_norm": 0.05773965269327164, "learning_rate": 0.00011811512669337055, "loss": 0.2645, "step": 23132 }, { "epoch": 1.8740278677900193, "grad_norm": 0.06872682273387909, "learning_rate": 0.00011811062604077592, "loss": 0.3171, "step": 23133 }, { "epoch": 1.874108878807518, "grad_norm": 0.05397693067789078, "learning_rate": 0.0001181061253881813, "loss": 0.2779, "step": 23134 }, { "epoch": 1.8741898898250162, "grad_norm": 0.062232982367277145, "learning_rate": 0.00011810162473558665, "loss": 0.2677, "step": 23135 }, { "epoch": 1.8742709008425145, "grad_norm": 0.06239263713359833, "learning_rate": 0.00011809712408299205, "loss": 0.3113, "step": 23136 }, { "epoch": 1.874351911860013, "grad_norm": 0.07032819837331772, "learning_rate": 0.00011809262343039741, "loss": 0.2985, "step": 23137 }, { "epoch": 1.8744329228775114, "grad_norm": 0.06495737284421921, "learning_rate": 0.00011808812277780279, "loss": 0.3248, "step": 23138 }, { "epoch": 1.8745139338950096, "grad_norm": 0.06768009811639786, "learning_rate": 0.00011808362212520816, "loss": 0.2638, "step": 23139 }, { "epoch": 1.874594944912508, "grad_norm": 0.060301005840301514, "learning_rate": 0.00011807912147261354, "loss": 0.3211, "step": 23140 }, { "epoch": 1.8746759559300066, "grad_norm": 0.05018927529454231, "learning_rate": 0.0001180746208200189, "loss": 0.2727, "step": 23141 }, { "epoch": 1.8747569669475048, "grad_norm": 0.05934358388185501, "learning_rate": 0.0001180701201674243, "loss": 0.2497, "step": 23142 }, { "epoch": 1.8748379779650033, "grad_norm": 0.06375506520271301, "learning_rate": 0.00011806561951482966, "loss": 0.244, "step": 23143 }, { "epoch": 1.8749189889825018, "grad_norm": 0.04853232577443123, "learning_rate": 0.00011806111886223503, "loss": 0.2322, "step": 23144 }, { "epoch": 1.875, "grad_norm": 0.07027478516101837, "learning_rate": 0.0001180566182096404, "loss": 0.2801, "step": 23145 }, { "epoch": 1.8750810110174982, "grad_norm": 0.057535093277692795, "learning_rate": 0.00011805211755704578, "loss": 0.2648, "step": 23146 }, { "epoch": 1.8751620220349967, "grad_norm": 0.059583697468042374, "learning_rate": 0.00011804761690445114, "loss": 0.2737, "step": 23147 }, { "epoch": 1.8752430330524952, "grad_norm": 0.05222093313932419, "learning_rate": 0.00011804311625185654, "loss": 0.2267, "step": 23148 }, { "epoch": 1.8753240440699934, "grad_norm": 0.0696893259882927, "learning_rate": 0.0001180386155992619, "loss": 0.2535, "step": 23149 }, { "epoch": 1.875405055087492, "grad_norm": 0.05335691198706627, "learning_rate": 0.00011803411494666727, "loss": 0.2489, "step": 23150 }, { "epoch": 1.8754860661049904, "grad_norm": 0.055933184921741486, "learning_rate": 0.00011802961429407265, "loss": 0.294, "step": 23151 }, { "epoch": 1.8755670771224886, "grad_norm": 0.05710383132100105, "learning_rate": 0.00011802511364147802, "loss": 0.2836, "step": 23152 }, { "epoch": 1.875648088139987, "grad_norm": 0.05327145382761955, "learning_rate": 0.00011802061298888338, "loss": 0.2964, "step": 23153 }, { "epoch": 1.8757290991574855, "grad_norm": 0.05328657105565071, "learning_rate": 0.00011801611233628878, "loss": 0.2742, "step": 23154 }, { "epoch": 1.8758101101749838, "grad_norm": 0.052094716578722, "learning_rate": 0.00011801161168369414, "loss": 0.2712, "step": 23155 }, { "epoch": 1.875891121192482, "grad_norm": 0.05217659845948219, "learning_rate": 0.00011800711103109951, "loss": 0.2587, "step": 23156 }, { "epoch": 1.8759721322099807, "grad_norm": 0.05159446597099304, "learning_rate": 0.00011800261037850489, "loss": 0.2881, "step": 23157 }, { "epoch": 1.876053143227479, "grad_norm": 0.07257717847824097, "learning_rate": 0.00011799810972591026, "loss": 0.2887, "step": 23158 }, { "epoch": 1.8761341542449772, "grad_norm": 0.07235502451658249, "learning_rate": 0.00011799360907331565, "loss": 0.2695, "step": 23159 }, { "epoch": 1.8762151652624757, "grad_norm": 0.0665421336889267, "learning_rate": 0.00011798910842072102, "loss": 0.2892, "step": 23160 }, { "epoch": 1.8762961762799741, "grad_norm": 0.0503656379878521, "learning_rate": 0.00011798460776812638, "loss": 0.2646, "step": 23161 }, { "epoch": 1.8763771872974724, "grad_norm": 0.060483045876026154, "learning_rate": 0.00011798010711553175, "loss": 0.2591, "step": 23162 }, { "epoch": 1.8764581983149708, "grad_norm": 0.060321781784296036, "learning_rate": 0.00011797560646293713, "loss": 0.2755, "step": 23163 }, { "epoch": 1.8765392093324693, "grad_norm": 0.06695659458637238, "learning_rate": 0.0001179711058103425, "loss": 0.2676, "step": 23164 }, { "epoch": 1.8766202203499676, "grad_norm": 0.052316345274448395, "learning_rate": 0.00011796660515774789, "loss": 0.2758, "step": 23165 }, { "epoch": 1.876701231367466, "grad_norm": 0.061475787311792374, "learning_rate": 0.00011796210450515326, "loss": 0.2605, "step": 23166 }, { "epoch": 1.8767822423849645, "grad_norm": 0.06417156010866165, "learning_rate": 0.00011795760385255862, "loss": 0.2547, "step": 23167 }, { "epoch": 1.8768632534024627, "grad_norm": 0.0625377893447876, "learning_rate": 0.000117953103199964, "loss": 0.2984, "step": 23168 }, { "epoch": 1.876944264419961, "grad_norm": 0.06442003697156906, "learning_rate": 0.00011794860254736937, "loss": 0.2942, "step": 23169 }, { "epoch": 1.8770252754374595, "grad_norm": 0.06341394782066345, "learning_rate": 0.00011794410189477474, "loss": 0.2621, "step": 23170 }, { "epoch": 1.877106286454958, "grad_norm": 0.0712093785405159, "learning_rate": 0.00011793960124218013, "loss": 0.3148, "step": 23171 }, { "epoch": 1.8771872974724562, "grad_norm": 0.06914816051721573, "learning_rate": 0.0001179351005895855, "loss": 0.2966, "step": 23172 }, { "epoch": 1.8772683084899546, "grad_norm": 0.06750152260065079, "learning_rate": 0.00011793059993699086, "loss": 0.3091, "step": 23173 }, { "epoch": 1.877349319507453, "grad_norm": 0.06350599229335785, "learning_rate": 0.00011792609928439624, "loss": 0.3225, "step": 23174 }, { "epoch": 1.8774303305249513, "grad_norm": 0.05254534259438515, "learning_rate": 0.00011792159863180161, "loss": 0.2607, "step": 23175 }, { "epoch": 1.8775113415424498, "grad_norm": 0.0773734524846077, "learning_rate": 0.00011791709797920699, "loss": 0.2863, "step": 23176 }, { "epoch": 1.8775923525599483, "grad_norm": 0.0571201853454113, "learning_rate": 0.00011791259732661237, "loss": 0.2958, "step": 23177 }, { "epoch": 1.8776733635774465, "grad_norm": 0.06771673262119293, "learning_rate": 0.00011790809667401775, "loss": 0.3361, "step": 23178 }, { "epoch": 1.8777543745949448, "grad_norm": 0.05999191105365753, "learning_rate": 0.0001179035960214231, "loss": 0.2586, "step": 23179 }, { "epoch": 1.8778353856124435, "grad_norm": 0.053210508078336716, "learning_rate": 0.00011789909536882848, "loss": 0.2406, "step": 23180 }, { "epoch": 1.8779163966299417, "grad_norm": 0.06333492696285248, "learning_rate": 0.00011789459471623385, "loss": 0.2541, "step": 23181 }, { "epoch": 1.87799740764744, "grad_norm": 0.06045440956950188, "learning_rate": 0.00011789009406363923, "loss": 0.2516, "step": 23182 }, { "epoch": 1.8780784186649384, "grad_norm": 0.06109251827001572, "learning_rate": 0.00011788559341104461, "loss": 0.2754, "step": 23183 }, { "epoch": 1.8781594296824369, "grad_norm": 0.054190631955862045, "learning_rate": 0.00011788109275844999, "loss": 0.2625, "step": 23184 }, { "epoch": 1.8782404406999351, "grad_norm": 0.05592363327741623, "learning_rate": 0.00011787659210585536, "loss": 0.2804, "step": 23185 }, { "epoch": 1.8783214517174336, "grad_norm": 0.05816115438938141, "learning_rate": 0.00011787209145326072, "loss": 0.2654, "step": 23186 }, { "epoch": 1.878402462734932, "grad_norm": 0.04598617181181908, "learning_rate": 0.0001178675908006661, "loss": 0.2338, "step": 23187 }, { "epoch": 1.8784834737524303, "grad_norm": 0.05558772012591362, "learning_rate": 0.00011786309014807148, "loss": 0.2792, "step": 23188 }, { "epoch": 1.8785644847699285, "grad_norm": 0.054704394191503525, "learning_rate": 0.00011785858949547686, "loss": 0.2761, "step": 23189 }, { "epoch": 1.8786454957874272, "grad_norm": 0.05882862210273743, "learning_rate": 0.00011785408884288223, "loss": 0.2817, "step": 23190 }, { "epoch": 1.8787265068049255, "grad_norm": 0.05930648744106293, "learning_rate": 0.0001178495881902876, "loss": 0.2687, "step": 23191 }, { "epoch": 1.8788075178224237, "grad_norm": 0.06882467865943909, "learning_rate": 0.00011784508753769296, "loss": 0.2748, "step": 23192 }, { "epoch": 1.8788885288399222, "grad_norm": 0.0692213848233223, "learning_rate": 0.00011784058688509834, "loss": 0.2884, "step": 23193 }, { "epoch": 1.8789695398574207, "grad_norm": 0.06592894345521927, "learning_rate": 0.00011783608623250372, "loss": 0.3105, "step": 23194 }, { "epoch": 1.879050550874919, "grad_norm": 0.053590722382068634, "learning_rate": 0.0001178315855799091, "loss": 0.28, "step": 23195 }, { "epoch": 1.8791315618924174, "grad_norm": 0.06279432028532028, "learning_rate": 0.00011782708492731447, "loss": 0.2687, "step": 23196 }, { "epoch": 1.8792125729099158, "grad_norm": 0.06744203716516495, "learning_rate": 0.00011782258427471984, "loss": 0.252, "step": 23197 }, { "epoch": 1.879293583927414, "grad_norm": 0.06443815678358078, "learning_rate": 0.0001178180836221252, "loss": 0.2666, "step": 23198 }, { "epoch": 1.8793745949449125, "grad_norm": 0.05662263557314873, "learning_rate": 0.00011781358296953058, "loss": 0.251, "step": 23199 }, { "epoch": 1.879455605962411, "grad_norm": 0.05632168427109718, "learning_rate": 0.00011780908231693597, "loss": 0.3433, "step": 23200 }, { "epoch": 1.8795366169799093, "grad_norm": 0.05025065690279007, "learning_rate": 0.00011780458166434134, "loss": 0.256, "step": 23201 }, { "epoch": 1.8796176279974075, "grad_norm": 0.05598134547472, "learning_rate": 0.00011780008101174671, "loss": 0.2565, "step": 23202 }, { "epoch": 1.8796986390149062, "grad_norm": 0.048147015273571014, "learning_rate": 0.00011779558035915209, "loss": 0.2558, "step": 23203 }, { "epoch": 1.8797796500324044, "grad_norm": 0.052048951387405396, "learning_rate": 0.00011779107970655745, "loss": 0.2569, "step": 23204 }, { "epoch": 1.8798606610499027, "grad_norm": 0.050691064447164536, "learning_rate": 0.00011778657905396282, "loss": 0.2632, "step": 23205 }, { "epoch": 1.8799416720674011, "grad_norm": 0.058509405702352524, "learning_rate": 0.00011778207840136821, "loss": 0.2606, "step": 23206 }, { "epoch": 1.8800226830848996, "grad_norm": 0.06845378130674362, "learning_rate": 0.00011777757774877358, "loss": 0.2833, "step": 23207 }, { "epoch": 1.8801036941023979, "grad_norm": 0.055425599217414856, "learning_rate": 0.00011777307709617895, "loss": 0.2456, "step": 23208 }, { "epoch": 1.8801847051198963, "grad_norm": 0.056326232850551605, "learning_rate": 0.00011776857644358433, "loss": 0.3153, "step": 23209 }, { "epoch": 1.8802657161373948, "grad_norm": 0.0537690632045269, "learning_rate": 0.00011776407579098969, "loss": 0.2587, "step": 23210 }, { "epoch": 1.880346727154893, "grad_norm": 0.055282801389694214, "learning_rate": 0.00011775957513839506, "loss": 0.2548, "step": 23211 }, { "epoch": 1.8804277381723913, "grad_norm": 0.056459132581949234, "learning_rate": 0.00011775507448580045, "loss": 0.2467, "step": 23212 }, { "epoch": 1.88050874918989, "grad_norm": 0.049248144030570984, "learning_rate": 0.00011775057383320582, "loss": 0.2457, "step": 23213 }, { "epoch": 1.8805897602073882, "grad_norm": 0.061972279101610184, "learning_rate": 0.0001177460731806112, "loss": 0.2712, "step": 23214 }, { "epoch": 1.8806707712248865, "grad_norm": 0.05165662243962288, "learning_rate": 0.00011774157252801657, "loss": 0.2853, "step": 23215 }, { "epoch": 1.880751782242385, "grad_norm": 0.05435626208782196, "learning_rate": 0.00011773707187542193, "loss": 0.2512, "step": 23216 }, { "epoch": 1.8808327932598834, "grad_norm": 0.05616835877299309, "learning_rate": 0.00011773257122282733, "loss": 0.2569, "step": 23217 }, { "epoch": 1.8809138042773816, "grad_norm": 0.05482323840260506, "learning_rate": 0.00011772807057023269, "loss": 0.2906, "step": 23218 }, { "epoch": 1.88099481529488, "grad_norm": 0.05005818232893944, "learning_rate": 0.00011772356991763806, "loss": 0.278, "step": 23219 }, { "epoch": 1.8810758263123786, "grad_norm": 0.07082431018352509, "learning_rate": 0.00011771906926504344, "loss": 0.2867, "step": 23220 }, { "epoch": 1.8811568373298768, "grad_norm": 0.056544482707977295, "learning_rate": 0.00011771456861244881, "loss": 0.298, "step": 23221 }, { "epoch": 1.8812378483473753, "grad_norm": 0.08427192270755768, "learning_rate": 0.00011771006795985417, "loss": 0.2707, "step": 23222 }, { "epoch": 1.8813188593648738, "grad_norm": 0.06930147111415863, "learning_rate": 0.00011770556730725957, "loss": 0.2753, "step": 23223 }, { "epoch": 1.881399870382372, "grad_norm": 0.06457947194576263, "learning_rate": 0.00011770106665466493, "loss": 0.2841, "step": 23224 }, { "epoch": 1.8814808813998702, "grad_norm": 0.06023853272199631, "learning_rate": 0.0001176965660020703, "loss": 0.2636, "step": 23225 }, { "epoch": 1.8815618924173687, "grad_norm": 0.05933365970849991, "learning_rate": 0.00011769206534947568, "loss": 0.2428, "step": 23226 }, { "epoch": 1.8816429034348672, "grad_norm": 0.05406753346323967, "learning_rate": 0.00011768756469688105, "loss": 0.2264, "step": 23227 }, { "epoch": 1.8817239144523654, "grad_norm": 0.05009690299630165, "learning_rate": 0.00011768306404428641, "loss": 0.2921, "step": 23228 }, { "epoch": 1.8818049254698639, "grad_norm": 0.062386028468608856, "learning_rate": 0.00011767856339169181, "loss": 0.274, "step": 23229 }, { "epoch": 1.8818859364873624, "grad_norm": 0.060382407158613205, "learning_rate": 0.00011767406273909717, "loss": 0.2765, "step": 23230 }, { "epoch": 1.8819669475048606, "grad_norm": 0.05769483372569084, "learning_rate": 0.00011766956208650255, "loss": 0.2419, "step": 23231 }, { "epoch": 1.882047958522359, "grad_norm": 0.04864031821489334, "learning_rate": 0.00011766506143390792, "loss": 0.2734, "step": 23232 }, { "epoch": 1.8821289695398575, "grad_norm": 0.05230722203850746, "learning_rate": 0.0001176605607813133, "loss": 0.2695, "step": 23233 }, { "epoch": 1.8822099805573558, "grad_norm": 0.050069410353899, "learning_rate": 0.00011765606012871865, "loss": 0.2594, "step": 23234 }, { "epoch": 1.882290991574854, "grad_norm": 0.05620177835226059, "learning_rate": 0.00011765155947612405, "loss": 0.2829, "step": 23235 }, { "epoch": 1.8823720025923527, "grad_norm": 0.059488844126462936, "learning_rate": 0.00011764705882352942, "loss": 0.2484, "step": 23236 }, { "epoch": 1.882453013609851, "grad_norm": 0.049712467938661575, "learning_rate": 0.00011764255817093479, "loss": 0.2615, "step": 23237 }, { "epoch": 1.8825340246273492, "grad_norm": 0.055734626948833466, "learning_rate": 0.00011763805751834016, "loss": 0.2662, "step": 23238 }, { "epoch": 1.8826150356448477, "grad_norm": 0.057213060557842255, "learning_rate": 0.00011763355686574554, "loss": 0.2276, "step": 23239 }, { "epoch": 1.8826960466623461, "grad_norm": 0.05141379311680794, "learning_rate": 0.00011762905621315092, "loss": 0.2531, "step": 23240 }, { "epoch": 1.8827770576798444, "grad_norm": 0.07517049461603165, "learning_rate": 0.0001176245555605563, "loss": 0.2728, "step": 23241 }, { "epoch": 1.8828580686973428, "grad_norm": 0.06808876246213913, "learning_rate": 0.00011762005490796166, "loss": 0.289, "step": 23242 }, { "epoch": 1.8829390797148413, "grad_norm": 0.057982414960861206, "learning_rate": 0.00011761555425536703, "loss": 0.275, "step": 23243 }, { "epoch": 1.8830200907323396, "grad_norm": 0.051993537694215775, "learning_rate": 0.0001176110536027724, "loss": 0.2556, "step": 23244 }, { "epoch": 1.883101101749838, "grad_norm": 0.04890101030468941, "learning_rate": 0.00011760655295017778, "loss": 0.2163, "step": 23245 }, { "epoch": 1.8831821127673365, "grad_norm": 0.06038685515522957, "learning_rate": 0.00011760205229758316, "loss": 0.3004, "step": 23246 }, { "epoch": 1.8832631237848347, "grad_norm": 0.05634760111570358, "learning_rate": 0.00011759755164498854, "loss": 0.2671, "step": 23247 }, { "epoch": 1.883344134802333, "grad_norm": 0.06269721686840057, "learning_rate": 0.0001175930509923939, "loss": 0.2915, "step": 23248 }, { "epoch": 1.8834251458198314, "grad_norm": 0.05876925587654114, "learning_rate": 0.00011758855033979927, "loss": 0.2553, "step": 23249 }, { "epoch": 1.88350615683733, "grad_norm": 0.05880272015929222, "learning_rate": 0.00011758404968720465, "loss": 0.2712, "step": 23250 }, { "epoch": 1.8835871678548282, "grad_norm": 0.05663580447435379, "learning_rate": 0.00011757954903461002, "loss": 0.2741, "step": 23251 }, { "epoch": 1.8836681788723266, "grad_norm": 0.06358767300844193, "learning_rate": 0.0001175750483820154, "loss": 0.2841, "step": 23252 }, { "epoch": 1.883749189889825, "grad_norm": 0.06005439907312393, "learning_rate": 0.00011757054772942078, "loss": 0.2268, "step": 23253 }, { "epoch": 1.8838302009073233, "grad_norm": 0.06091054901480675, "learning_rate": 0.00011756604707682615, "loss": 0.3052, "step": 23254 }, { "epoch": 1.8839112119248218, "grad_norm": 0.07007857412099838, "learning_rate": 0.00011756154642423151, "loss": 0.2703, "step": 23255 }, { "epoch": 1.8839922229423203, "grad_norm": 0.05151437595486641, "learning_rate": 0.00011755704577163689, "loss": 0.2233, "step": 23256 }, { "epoch": 1.8840732339598185, "grad_norm": 0.05768553167581558, "learning_rate": 0.00011755254511904226, "loss": 0.2686, "step": 23257 }, { "epoch": 1.8841542449773168, "grad_norm": 0.04945516213774681, "learning_rate": 0.00011754804446644765, "loss": 0.234, "step": 23258 }, { "epoch": 1.8842352559948155, "grad_norm": 0.053539324551820755, "learning_rate": 0.00011754354381385302, "loss": 0.2857, "step": 23259 }, { "epoch": 1.8843162670123137, "grad_norm": 0.065309077501297, "learning_rate": 0.0001175390431612584, "loss": 0.2945, "step": 23260 }, { "epoch": 1.884397278029812, "grad_norm": 0.05602163448929787, "learning_rate": 0.00011753454250866376, "loss": 0.2522, "step": 23261 }, { "epoch": 1.8844782890473104, "grad_norm": 0.06638114154338837, "learning_rate": 0.00011753004185606913, "loss": 0.2979, "step": 23262 }, { "epoch": 1.8845593000648089, "grad_norm": 0.04951305314898491, "learning_rate": 0.0001175255412034745, "loss": 0.2588, "step": 23263 }, { "epoch": 1.8846403110823071, "grad_norm": 0.053892266005277634, "learning_rate": 0.00011752104055087989, "loss": 0.2561, "step": 23264 }, { "epoch": 1.8847213220998056, "grad_norm": 0.05633193626999855, "learning_rate": 0.00011751653989828526, "loss": 0.3307, "step": 23265 }, { "epoch": 1.884802333117304, "grad_norm": 0.06259032338857651, "learning_rate": 0.00011751203924569064, "loss": 0.2476, "step": 23266 }, { "epoch": 1.8848833441348023, "grad_norm": 0.054782312363386154, "learning_rate": 0.000117507538593096, "loss": 0.2651, "step": 23267 }, { "epoch": 1.8849643551523008, "grad_norm": 0.07018223404884338, "learning_rate": 0.00011750303794050137, "loss": 0.2427, "step": 23268 }, { "epoch": 1.8850453661697992, "grad_norm": 0.05568501725792885, "learning_rate": 0.00011749853728790676, "loss": 0.2359, "step": 23269 }, { "epoch": 1.8851263771872975, "grad_norm": 0.06143895909190178, "learning_rate": 0.00011749403663531213, "loss": 0.3427, "step": 23270 }, { "epoch": 1.8852073882047957, "grad_norm": 0.0545257069170475, "learning_rate": 0.0001174895359827175, "loss": 0.2596, "step": 23271 }, { "epoch": 1.8852883992222942, "grad_norm": 0.055278100073337555, "learning_rate": 0.00011748503533012288, "loss": 0.268, "step": 23272 }, { "epoch": 1.8853694102397927, "grad_norm": 0.05664905160665512, "learning_rate": 0.00011748053467752824, "loss": 0.2745, "step": 23273 }, { "epoch": 1.885450421257291, "grad_norm": 0.06449475884437561, "learning_rate": 0.00011747603402493361, "loss": 0.287, "step": 23274 }, { "epoch": 1.8855314322747894, "grad_norm": 0.05762218311429024, "learning_rate": 0.000117471533372339, "loss": 0.2556, "step": 23275 }, { "epoch": 1.8856124432922878, "grad_norm": 0.057085197418928146, "learning_rate": 0.00011746703271974437, "loss": 0.2809, "step": 23276 }, { "epoch": 1.885693454309786, "grad_norm": 0.0630897730588913, "learning_rate": 0.00011746253206714975, "loss": 0.2833, "step": 23277 }, { "epoch": 1.8857744653272845, "grad_norm": 0.03931886702775955, "learning_rate": 0.00011745803141455512, "loss": 0.2068, "step": 23278 }, { "epoch": 1.885855476344783, "grad_norm": 0.06200547143816948, "learning_rate": 0.00011745353076196048, "loss": 0.2294, "step": 23279 }, { "epoch": 1.8859364873622813, "grad_norm": 0.04909854009747505, "learning_rate": 0.00011744903010936585, "loss": 0.2435, "step": 23280 }, { "epoch": 1.8860174983797795, "grad_norm": 0.05622778832912445, "learning_rate": 0.00011744452945677124, "loss": 0.2574, "step": 23281 }, { "epoch": 1.8860985093972782, "grad_norm": 0.05940768122673035, "learning_rate": 0.00011744002880417661, "loss": 0.2683, "step": 23282 }, { "epoch": 1.8861795204147764, "grad_norm": 0.05599218234419823, "learning_rate": 0.00011743552815158199, "loss": 0.2492, "step": 23283 }, { "epoch": 1.8862605314322747, "grad_norm": 0.061045896261930466, "learning_rate": 0.00011743102749898736, "loss": 0.2534, "step": 23284 }, { "epoch": 1.8863415424497731, "grad_norm": 0.06886105239391327, "learning_rate": 0.00011742652684639272, "loss": 0.3065, "step": 23285 }, { "epoch": 1.8864225534672716, "grad_norm": 0.04585834592580795, "learning_rate": 0.0001174220261937981, "loss": 0.2462, "step": 23286 }, { "epoch": 1.8865035644847699, "grad_norm": 0.059051238000392914, "learning_rate": 0.00011741752554120348, "loss": 0.2488, "step": 23287 }, { "epoch": 1.8865845755022683, "grad_norm": 0.07089319825172424, "learning_rate": 0.00011741302488860886, "loss": 0.3039, "step": 23288 }, { "epoch": 1.8866655865197668, "grad_norm": 0.05801811441779137, "learning_rate": 0.00011740852423601423, "loss": 0.2833, "step": 23289 }, { "epoch": 1.886746597537265, "grad_norm": 0.05861096456646919, "learning_rate": 0.0001174040235834196, "loss": 0.2818, "step": 23290 }, { "epoch": 1.8868276085547635, "grad_norm": 0.05898521840572357, "learning_rate": 0.00011739952293082496, "loss": 0.2713, "step": 23291 }, { "epoch": 1.886908619572262, "grad_norm": 0.05487682670354843, "learning_rate": 0.00011739502227823036, "loss": 0.2395, "step": 23292 }, { "epoch": 1.8869896305897602, "grad_norm": 0.061498358845710754, "learning_rate": 0.00011739052162563572, "loss": 0.2987, "step": 23293 }, { "epoch": 1.8870706416072585, "grad_norm": 0.06627925485372543, "learning_rate": 0.0001173860209730411, "loss": 0.2631, "step": 23294 }, { "epoch": 1.887151652624757, "grad_norm": 0.05386059358716011, "learning_rate": 0.00011738152032044647, "loss": 0.2437, "step": 23295 }, { "epoch": 1.8872326636422554, "grad_norm": 0.06662789732217789, "learning_rate": 0.00011737701966785184, "loss": 0.2781, "step": 23296 }, { "epoch": 1.8873136746597536, "grad_norm": 0.05340142175555229, "learning_rate": 0.0001173725190152572, "loss": 0.2402, "step": 23297 }, { "epoch": 1.887394685677252, "grad_norm": 0.06220469996333122, "learning_rate": 0.0001173680183626626, "loss": 0.2493, "step": 23298 }, { "epoch": 1.8874756966947506, "grad_norm": 0.06774033606052399, "learning_rate": 0.00011736351771006797, "loss": 0.2628, "step": 23299 }, { "epoch": 1.8875567077122488, "grad_norm": 0.056173328310251236, "learning_rate": 0.00011735901705747334, "loss": 0.2477, "step": 23300 }, { "epoch": 1.8876377187297473, "grad_norm": 0.04532039538025856, "learning_rate": 0.00011735451640487871, "loss": 0.237, "step": 23301 }, { "epoch": 1.8877187297472457, "grad_norm": 0.06891242414712906, "learning_rate": 0.00011735001575228409, "loss": 0.2505, "step": 23302 }, { "epoch": 1.887799740764744, "grad_norm": 0.05437375232577324, "learning_rate": 0.00011734551509968945, "loss": 0.2327, "step": 23303 }, { "epoch": 1.8878807517822422, "grad_norm": 0.0711214691400528, "learning_rate": 0.00011734101444709485, "loss": 0.3406, "step": 23304 }, { "epoch": 1.887961762799741, "grad_norm": 0.06552078574895859, "learning_rate": 0.00011733651379450021, "loss": 0.2973, "step": 23305 }, { "epoch": 1.8880427738172392, "grad_norm": 0.0602584071457386, "learning_rate": 0.00011733201314190558, "loss": 0.2509, "step": 23306 }, { "epoch": 1.8881237848347374, "grad_norm": 0.054096248000860214, "learning_rate": 0.00011732751248931095, "loss": 0.2961, "step": 23307 }, { "epoch": 1.8882047958522359, "grad_norm": 0.05824704095721245, "learning_rate": 0.00011732301183671633, "loss": 0.2853, "step": 23308 }, { "epoch": 1.8882858068697344, "grad_norm": 0.0697273537516594, "learning_rate": 0.00011731851118412169, "loss": 0.2621, "step": 23309 }, { "epoch": 1.8883668178872326, "grad_norm": 0.06556077301502228, "learning_rate": 0.00011731401053152709, "loss": 0.2689, "step": 23310 }, { "epoch": 1.888447828904731, "grad_norm": 0.05337255448102951, "learning_rate": 0.00011730950987893245, "loss": 0.2712, "step": 23311 }, { "epoch": 1.8885288399222295, "grad_norm": 0.06646385788917542, "learning_rate": 0.00011730500922633782, "loss": 0.2881, "step": 23312 }, { "epoch": 1.8886098509397278, "grad_norm": 0.054512329399585724, "learning_rate": 0.0001173005085737432, "loss": 0.2768, "step": 23313 }, { "epoch": 1.888690861957226, "grad_norm": 0.05661356449127197, "learning_rate": 0.00011729600792114857, "loss": 0.3054, "step": 23314 }, { "epoch": 1.8887718729747247, "grad_norm": 0.06277420371770859, "learning_rate": 0.00011729150726855393, "loss": 0.2674, "step": 23315 }, { "epoch": 1.888852883992223, "grad_norm": 0.06473672389984131, "learning_rate": 0.00011728700661595933, "loss": 0.2659, "step": 23316 }, { "epoch": 1.8889338950097212, "grad_norm": 0.05304969847202301, "learning_rate": 0.00011728250596336469, "loss": 0.2537, "step": 23317 }, { "epoch": 1.8890149060272197, "grad_norm": 0.06111942231655121, "learning_rate": 0.00011727800531077006, "loss": 0.2421, "step": 23318 }, { "epoch": 1.8890959170447181, "grad_norm": 0.06297849118709564, "learning_rate": 0.00011727350465817544, "loss": 0.2667, "step": 23319 }, { "epoch": 1.8891769280622164, "grad_norm": 0.06840529292821884, "learning_rate": 0.00011726900400558081, "loss": 0.2848, "step": 23320 }, { "epoch": 1.8892579390797148, "grad_norm": 0.05544573813676834, "learning_rate": 0.0001172645033529862, "loss": 0.2567, "step": 23321 }, { "epoch": 1.8893389500972133, "grad_norm": 0.06516019999980927, "learning_rate": 0.00011726000270039157, "loss": 0.2713, "step": 23322 }, { "epoch": 1.8894199611147116, "grad_norm": 0.07645253837108612, "learning_rate": 0.00011725550204779695, "loss": 0.2767, "step": 23323 }, { "epoch": 1.88950097213221, "grad_norm": 0.05969347059726715, "learning_rate": 0.0001172510013952023, "loss": 0.2833, "step": 23324 }, { "epoch": 1.8895819831497085, "grad_norm": 0.061069682240486145, "learning_rate": 0.00011724650074260768, "loss": 0.2896, "step": 23325 }, { "epoch": 1.8896629941672067, "grad_norm": 0.05962633714079857, "learning_rate": 0.00011724200009001305, "loss": 0.2753, "step": 23326 }, { "epoch": 1.889744005184705, "grad_norm": 0.06852693110704422, "learning_rate": 0.00011723749943741844, "loss": 0.2671, "step": 23327 }, { "epoch": 1.8898250162022034, "grad_norm": 0.06853719055652618, "learning_rate": 0.00011723299878482381, "loss": 0.3764, "step": 23328 }, { "epoch": 1.889906027219702, "grad_norm": 0.05661075562238693, "learning_rate": 0.00011722849813222919, "loss": 0.2504, "step": 23329 }, { "epoch": 1.8899870382372002, "grad_norm": 0.06189139559864998, "learning_rate": 0.00011722399747963455, "loss": 0.2636, "step": 23330 }, { "epoch": 1.8900680492546986, "grad_norm": 0.06755541265010834, "learning_rate": 0.00011721949682703992, "loss": 0.3142, "step": 23331 }, { "epoch": 1.890149060272197, "grad_norm": 0.06374403089284897, "learning_rate": 0.0001172149961744453, "loss": 0.2426, "step": 23332 }, { "epoch": 1.8902300712896953, "grad_norm": 0.05989615246653557, "learning_rate": 0.00011721049552185068, "loss": 0.2686, "step": 23333 }, { "epoch": 1.8903110823071938, "grad_norm": 0.06072322651743889, "learning_rate": 0.00011720599486925606, "loss": 0.2924, "step": 23334 }, { "epoch": 1.8903920933246923, "grad_norm": 0.06039504334330559, "learning_rate": 0.00011720149421666143, "loss": 0.3068, "step": 23335 }, { "epoch": 1.8904731043421905, "grad_norm": 0.058001559227705, "learning_rate": 0.00011719699356406679, "loss": 0.3194, "step": 23336 }, { "epoch": 1.8905541153596888, "grad_norm": 0.05137511342763901, "learning_rate": 0.00011719249291147216, "loss": 0.283, "step": 23337 }, { "epoch": 1.8906351263771874, "grad_norm": 0.06535633653402328, "learning_rate": 0.00011718799225887754, "loss": 0.282, "step": 23338 }, { "epoch": 1.8907161373946857, "grad_norm": 0.0476398840546608, "learning_rate": 0.00011718349160628292, "loss": 0.22, "step": 23339 }, { "epoch": 1.890797148412184, "grad_norm": 0.057031337171792984, "learning_rate": 0.0001171789909536883, "loss": 0.2837, "step": 23340 }, { "epoch": 1.8908781594296824, "grad_norm": 0.05744462460279465, "learning_rate": 0.00011717449030109367, "loss": 0.2553, "step": 23341 }, { "epoch": 1.8909591704471809, "grad_norm": 0.06022125110030174, "learning_rate": 0.00011716998964849903, "loss": 0.2846, "step": 23342 }, { "epoch": 1.8910401814646791, "grad_norm": 0.06611776351928711, "learning_rate": 0.0001171654889959044, "loss": 0.2775, "step": 23343 }, { "epoch": 1.8911211924821776, "grad_norm": 0.05349963158369064, "learning_rate": 0.00011716098834330979, "loss": 0.2582, "step": 23344 }, { "epoch": 1.891202203499676, "grad_norm": 0.05367043614387512, "learning_rate": 0.00011715648769071516, "loss": 0.2746, "step": 23345 }, { "epoch": 1.8912832145171743, "grad_norm": 0.04462164267897606, "learning_rate": 0.00011715198703812054, "loss": 0.2464, "step": 23346 }, { "epoch": 1.8913642255346728, "grad_norm": 0.04876317083835602, "learning_rate": 0.00011714748638552591, "loss": 0.2535, "step": 23347 }, { "epoch": 1.8914452365521712, "grad_norm": 0.060143206268548965, "learning_rate": 0.00011714298573293127, "loss": 0.2652, "step": 23348 }, { "epoch": 1.8915262475696695, "grad_norm": 0.05717812106013298, "learning_rate": 0.00011713848508033665, "loss": 0.2712, "step": 23349 }, { "epoch": 1.8916072585871677, "grad_norm": 0.07034508883953094, "learning_rate": 0.00011713398442774203, "loss": 0.2975, "step": 23350 }, { "epoch": 1.8916882696046662, "grad_norm": 0.06371060758829117, "learning_rate": 0.0001171294837751474, "loss": 0.2906, "step": 23351 }, { "epoch": 1.8917692806221647, "grad_norm": 0.06165608391165733, "learning_rate": 0.00011712498312255278, "loss": 0.2296, "step": 23352 }, { "epoch": 1.891850291639663, "grad_norm": 0.05758389085531235, "learning_rate": 0.00011712048246995815, "loss": 0.2453, "step": 23353 }, { "epoch": 1.8919313026571614, "grad_norm": 0.0606408528983593, "learning_rate": 0.00011711598181736351, "loss": 0.2498, "step": 23354 }, { "epoch": 1.8920123136746598, "grad_norm": 0.06578253954648972, "learning_rate": 0.00011711148116476889, "loss": 0.2658, "step": 23355 }, { "epoch": 1.892093324692158, "grad_norm": 0.06750598549842834, "learning_rate": 0.00011710698051217427, "loss": 0.2561, "step": 23356 }, { "epoch": 1.8921743357096565, "grad_norm": 0.06298558413982391, "learning_rate": 0.00011710247985957965, "loss": 0.2603, "step": 23357 }, { "epoch": 1.892255346727155, "grad_norm": 0.05482904985547066, "learning_rate": 0.00011709797920698502, "loss": 0.2428, "step": 23358 }, { "epoch": 1.8923363577446533, "grad_norm": 0.05431557446718216, "learning_rate": 0.0001170934785543904, "loss": 0.2608, "step": 23359 }, { "epoch": 1.8924173687621515, "grad_norm": 0.060233451426029205, "learning_rate": 0.00011708897790179576, "loss": 0.2297, "step": 23360 }, { "epoch": 1.8924983797796502, "grad_norm": 0.06667815148830414, "learning_rate": 0.00011708447724920113, "loss": 0.2879, "step": 23361 }, { "epoch": 1.8925793907971484, "grad_norm": 0.06704255938529968, "learning_rate": 0.00011707997659660652, "loss": 0.2846, "step": 23362 }, { "epoch": 1.8926604018146467, "grad_norm": 0.06251055747270584, "learning_rate": 0.00011707547594401189, "loss": 0.2894, "step": 23363 }, { "epoch": 1.8927414128321451, "grad_norm": 0.06847628951072693, "learning_rate": 0.00011707097529141726, "loss": 0.32, "step": 23364 }, { "epoch": 1.8928224238496436, "grad_norm": 0.06425163149833679, "learning_rate": 0.00011706647463882264, "loss": 0.2243, "step": 23365 }, { "epoch": 1.8929034348671419, "grad_norm": 0.0618412122130394, "learning_rate": 0.000117061973986228, "loss": 0.2973, "step": 23366 }, { "epoch": 1.8929844458846403, "grad_norm": 0.05935633182525635, "learning_rate": 0.00011705747333363337, "loss": 0.246, "step": 23367 }, { "epoch": 1.8930654569021388, "grad_norm": 0.04758918657898903, "learning_rate": 0.00011705297268103876, "loss": 0.2194, "step": 23368 }, { "epoch": 1.893146467919637, "grad_norm": 0.05723010003566742, "learning_rate": 0.00011704847202844413, "loss": 0.2448, "step": 23369 }, { "epoch": 1.8932274789371355, "grad_norm": 0.05666911602020264, "learning_rate": 0.0001170439713758495, "loss": 0.2836, "step": 23370 }, { "epoch": 1.893308489954634, "grad_norm": 0.059430863708257675, "learning_rate": 0.00011703947072325488, "loss": 0.2815, "step": 23371 }, { "epoch": 1.8933895009721322, "grad_norm": 0.05578245595097542, "learning_rate": 0.00011703497007066024, "loss": 0.2414, "step": 23372 }, { "epoch": 1.8934705119896305, "grad_norm": 0.05080593377351761, "learning_rate": 0.00011703046941806564, "loss": 0.2321, "step": 23373 }, { "epoch": 1.893551523007129, "grad_norm": 0.0652099996805191, "learning_rate": 0.000117025968765471, "loss": 0.2908, "step": 23374 }, { "epoch": 1.8936325340246274, "grad_norm": 0.05565506964921951, "learning_rate": 0.00011702146811287637, "loss": 0.2847, "step": 23375 }, { "epoch": 1.8937135450421256, "grad_norm": 0.05552058666944504, "learning_rate": 0.00011701696746028175, "loss": 0.3026, "step": 23376 }, { "epoch": 1.893794556059624, "grad_norm": 0.051754243671894073, "learning_rate": 0.00011701246680768712, "loss": 0.2425, "step": 23377 }, { "epoch": 1.8938755670771226, "grad_norm": 0.0491461418569088, "learning_rate": 0.00011700796615509248, "loss": 0.2677, "step": 23378 }, { "epoch": 1.8939565780946208, "grad_norm": 0.052235040813684464, "learning_rate": 0.00011700346550249788, "loss": 0.2612, "step": 23379 }, { "epoch": 1.8940375891121193, "grad_norm": 0.06482629477977753, "learning_rate": 0.00011699896484990324, "loss": 0.2645, "step": 23380 }, { "epoch": 1.8941186001296177, "grad_norm": 0.052297309041023254, "learning_rate": 0.00011699446419730861, "loss": 0.2516, "step": 23381 }, { "epoch": 1.894199611147116, "grad_norm": 0.05444755405187607, "learning_rate": 0.00011698996354471399, "loss": 0.2544, "step": 23382 }, { "epoch": 1.8942806221646142, "grad_norm": 0.061681777238845825, "learning_rate": 0.00011698546289211936, "loss": 0.2908, "step": 23383 }, { "epoch": 1.894361633182113, "grad_norm": 0.06857453286647797, "learning_rate": 0.00011698096223952472, "loss": 0.2886, "step": 23384 }, { "epoch": 1.8944426441996112, "grad_norm": 0.0629449114203453, "learning_rate": 0.00011697646158693012, "loss": 0.2858, "step": 23385 }, { "epoch": 1.8945236552171094, "grad_norm": 0.07253113389015198, "learning_rate": 0.0001169719609343355, "loss": 0.3053, "step": 23386 }, { "epoch": 1.8946046662346079, "grad_norm": 0.05951322987675667, "learning_rate": 0.00011696746028174086, "loss": 0.2928, "step": 23387 }, { "epoch": 1.8946856772521063, "grad_norm": 0.05505599081516266, "learning_rate": 0.00011696295962914623, "loss": 0.2499, "step": 23388 }, { "epoch": 1.8947666882696046, "grad_norm": 0.05070142447948456, "learning_rate": 0.0001169584589765516, "loss": 0.3039, "step": 23389 }, { "epoch": 1.894847699287103, "grad_norm": 0.05721290409564972, "learning_rate": 0.00011695395832395696, "loss": 0.2911, "step": 23390 }, { "epoch": 1.8949287103046015, "grad_norm": 0.05515219643712044, "learning_rate": 0.00011694945767136236, "loss": 0.2888, "step": 23391 }, { "epoch": 1.8950097213220998, "grad_norm": 0.060376644134521484, "learning_rate": 0.00011694495701876774, "loss": 0.2923, "step": 23392 }, { "epoch": 1.8950907323395982, "grad_norm": 0.060023244470357895, "learning_rate": 0.0001169404563661731, "loss": 0.2979, "step": 23393 }, { "epoch": 1.8951717433570967, "grad_norm": 0.0582108199596405, "learning_rate": 0.00011693595571357847, "loss": 0.2626, "step": 23394 }, { "epoch": 1.895252754374595, "grad_norm": 0.06270253658294678, "learning_rate": 0.00011693145506098385, "loss": 0.2814, "step": 23395 }, { "epoch": 1.8953337653920932, "grad_norm": 0.06579922139644623, "learning_rate": 0.0001169269544083892, "loss": 0.2559, "step": 23396 }, { "epoch": 1.8954147764095917, "grad_norm": 0.055605754256248474, "learning_rate": 0.0001169224537557946, "loss": 0.31, "step": 23397 }, { "epoch": 1.8954957874270901, "grad_norm": 0.047143884003162384, "learning_rate": 0.00011691795310319998, "loss": 0.2312, "step": 23398 }, { "epoch": 1.8955767984445884, "grad_norm": 0.061128631234169006, "learning_rate": 0.00011691345245060534, "loss": 0.2586, "step": 23399 }, { "epoch": 1.8956578094620868, "grad_norm": 0.05848472937941551, "learning_rate": 0.00011690895179801071, "loss": 0.3144, "step": 23400 }, { "epoch": 1.8957388204795853, "grad_norm": 0.05355636402964592, "learning_rate": 0.00011690445114541609, "loss": 0.268, "step": 23401 }, { "epoch": 1.8958198314970836, "grad_norm": 0.0633152574300766, "learning_rate": 0.00011689995049282147, "loss": 0.2618, "step": 23402 }, { "epoch": 1.895900842514582, "grad_norm": 0.05251619592308998, "learning_rate": 0.00011689544984022685, "loss": 0.2679, "step": 23403 }, { "epoch": 1.8959818535320805, "grad_norm": 0.05748763680458069, "learning_rate": 0.00011689094918763222, "loss": 0.2709, "step": 23404 }, { "epoch": 1.8960628645495787, "grad_norm": 0.05484431982040405, "learning_rate": 0.00011688644853503758, "loss": 0.2634, "step": 23405 }, { "epoch": 1.896143875567077, "grad_norm": 0.054440487176179886, "learning_rate": 0.00011688194788244295, "loss": 0.2566, "step": 23406 }, { "epoch": 1.8962248865845757, "grad_norm": 0.06458580493927002, "learning_rate": 0.00011687744722984833, "loss": 0.2492, "step": 23407 }, { "epoch": 1.896305897602074, "grad_norm": 0.054434191435575485, "learning_rate": 0.00011687294657725372, "loss": 0.2476, "step": 23408 }, { "epoch": 1.8963869086195722, "grad_norm": 0.05186142027378082, "learning_rate": 0.00011686844592465909, "loss": 0.2583, "step": 23409 }, { "epoch": 1.8964679196370706, "grad_norm": 0.05285156890749931, "learning_rate": 0.00011686394527206446, "loss": 0.2602, "step": 23410 }, { "epoch": 1.896548930654569, "grad_norm": 0.0519791916012764, "learning_rate": 0.00011685944461946982, "loss": 0.2512, "step": 23411 }, { "epoch": 1.8966299416720673, "grad_norm": 0.05681449547410011, "learning_rate": 0.0001168549439668752, "loss": 0.2388, "step": 23412 }, { "epoch": 1.8967109526895658, "grad_norm": 0.05287312716245651, "learning_rate": 0.00011685044331428057, "loss": 0.2576, "step": 23413 }, { "epoch": 1.8967919637070643, "grad_norm": 0.06390195339918137, "learning_rate": 0.00011684594266168596, "loss": 0.2953, "step": 23414 }, { "epoch": 1.8968729747245625, "grad_norm": 0.06533109396696091, "learning_rate": 0.00011684144200909133, "loss": 0.2578, "step": 23415 }, { "epoch": 1.8969539857420608, "grad_norm": 0.06771045178174973, "learning_rate": 0.0001168369413564967, "loss": 0.2826, "step": 23416 }, { "epoch": 1.8970349967595594, "grad_norm": 0.06620460003614426, "learning_rate": 0.00011683244070390206, "loss": 0.3149, "step": 23417 }, { "epoch": 1.8971160077770577, "grad_norm": 0.062103889882564545, "learning_rate": 0.00011682794005130744, "loss": 0.2722, "step": 23418 }, { "epoch": 1.897197018794556, "grad_norm": 0.058835145086050034, "learning_rate": 0.00011682343939871281, "loss": 0.2414, "step": 23419 }, { "epoch": 1.8972780298120544, "grad_norm": 0.05964101105928421, "learning_rate": 0.0001168189387461182, "loss": 0.2734, "step": 23420 }, { "epoch": 1.8973590408295529, "grad_norm": 0.052001770585775375, "learning_rate": 0.00011681443809352357, "loss": 0.233, "step": 23421 }, { "epoch": 1.8974400518470511, "grad_norm": 0.06389396637678146, "learning_rate": 0.00011680993744092895, "loss": 0.3054, "step": 23422 }, { "epoch": 1.8975210628645496, "grad_norm": 0.049117472022771835, "learning_rate": 0.0001168054367883343, "loss": 0.2342, "step": 23423 }, { "epoch": 1.897602073882048, "grad_norm": 0.05265875905752182, "learning_rate": 0.00011680093613573968, "loss": 0.2859, "step": 23424 }, { "epoch": 1.8976830848995463, "grad_norm": 0.05852856487035751, "learning_rate": 0.00011679643548314507, "loss": 0.2698, "step": 23425 }, { "epoch": 1.8977640959170448, "grad_norm": 0.05060741677880287, "learning_rate": 0.00011679193483055044, "loss": 0.2452, "step": 23426 }, { "epoch": 1.8978451069345432, "grad_norm": 0.06805577129125595, "learning_rate": 0.00011678743417795581, "loss": 0.283, "step": 23427 }, { "epoch": 1.8979261179520415, "grad_norm": 0.055861569941043854, "learning_rate": 0.00011678293352536119, "loss": 0.2498, "step": 23428 }, { "epoch": 1.8980071289695397, "grad_norm": 0.06261233240365982, "learning_rate": 0.00011677843287276655, "loss": 0.2589, "step": 23429 }, { "epoch": 1.8980881399870384, "grad_norm": 0.05829406529664993, "learning_rate": 0.00011677393222017192, "loss": 0.2646, "step": 23430 }, { "epoch": 1.8981691510045366, "grad_norm": 0.05647638067603111, "learning_rate": 0.00011676943156757731, "loss": 0.2909, "step": 23431 }, { "epoch": 1.898250162022035, "grad_norm": 0.0669042319059372, "learning_rate": 0.00011676493091498268, "loss": 0.2999, "step": 23432 }, { "epoch": 1.8983311730395334, "grad_norm": 0.0586322546005249, "learning_rate": 0.00011676043026238806, "loss": 0.273, "step": 23433 }, { "epoch": 1.8984121840570318, "grad_norm": 0.048080138862133026, "learning_rate": 0.00011675592960979343, "loss": 0.2836, "step": 23434 }, { "epoch": 1.89849319507453, "grad_norm": 0.05639676749706268, "learning_rate": 0.00011675142895719879, "loss": 0.2426, "step": 23435 }, { "epoch": 1.8985742060920285, "grad_norm": 0.05663561075925827, "learning_rate": 0.00011674692830460416, "loss": 0.3069, "step": 23436 }, { "epoch": 1.898655217109527, "grad_norm": 0.05259792134165764, "learning_rate": 0.00011674242765200955, "loss": 0.2756, "step": 23437 }, { "epoch": 1.8987362281270252, "grad_norm": 0.06394267082214355, "learning_rate": 0.00011673792699941492, "loss": 0.3269, "step": 23438 }, { "epoch": 1.8988172391445235, "grad_norm": 0.07205379009246826, "learning_rate": 0.0001167334263468203, "loss": 0.2702, "step": 23439 }, { "epoch": 1.8988982501620222, "grad_norm": 0.06035115197300911, "learning_rate": 0.00011672892569422567, "loss": 0.2676, "step": 23440 }, { "epoch": 1.8989792611795204, "grad_norm": 0.0664551630616188, "learning_rate": 0.00011672442504163103, "loss": 0.281, "step": 23441 }, { "epoch": 1.8990602721970187, "grad_norm": 0.06344987452030182, "learning_rate": 0.0001167199243890364, "loss": 0.2896, "step": 23442 }, { "epoch": 1.8991412832145171, "grad_norm": 0.061063800007104874, "learning_rate": 0.00011671542373644179, "loss": 0.2513, "step": 23443 }, { "epoch": 1.8992222942320156, "grad_norm": 0.05384545028209686, "learning_rate": 0.00011671092308384717, "loss": 0.265, "step": 23444 }, { "epoch": 1.8993033052495139, "grad_norm": 0.06857175379991531, "learning_rate": 0.00011670642243125254, "loss": 0.2713, "step": 23445 }, { "epoch": 1.8993843162670123, "grad_norm": 0.0652710348367691, "learning_rate": 0.00011670192177865791, "loss": 0.2431, "step": 23446 }, { "epoch": 1.8994653272845108, "grad_norm": 0.04748218506574631, "learning_rate": 0.00011669742112606327, "loss": 0.2993, "step": 23447 }, { "epoch": 1.899546338302009, "grad_norm": 0.052324965596199036, "learning_rate": 0.00011669292047346865, "loss": 0.2602, "step": 23448 }, { "epoch": 1.8996273493195075, "grad_norm": 0.059723157435655594, "learning_rate": 0.00011668841982087403, "loss": 0.2643, "step": 23449 }, { "epoch": 1.899708360337006, "grad_norm": 0.05854262039065361, "learning_rate": 0.00011668391916827941, "loss": 0.2597, "step": 23450 }, { "epoch": 1.8997893713545042, "grad_norm": 0.053819689899683, "learning_rate": 0.00011667941851568478, "loss": 0.2708, "step": 23451 }, { "epoch": 1.8998703823720025, "grad_norm": 0.060173001140356064, "learning_rate": 0.00011667491786309015, "loss": 0.2611, "step": 23452 }, { "epoch": 1.899951393389501, "grad_norm": 0.044513072818517685, "learning_rate": 0.00011667041721049551, "loss": 0.2144, "step": 23453 }, { "epoch": 1.9000324044069994, "grad_norm": 0.054686374962329865, "learning_rate": 0.00011666591655790091, "loss": 0.2517, "step": 23454 }, { "epoch": 1.9001134154244976, "grad_norm": 0.07276096940040588, "learning_rate": 0.00011666141590530629, "loss": 0.2826, "step": 23455 }, { "epoch": 1.900194426441996, "grad_norm": 0.06193507835268974, "learning_rate": 0.00011665691525271165, "loss": 0.2671, "step": 23456 }, { "epoch": 1.9002754374594946, "grad_norm": 0.05841159448027611, "learning_rate": 0.00011665241460011702, "loss": 0.2911, "step": 23457 }, { "epoch": 1.9003564484769928, "grad_norm": 0.05510144308209419, "learning_rate": 0.0001166479139475224, "loss": 0.2653, "step": 23458 }, { "epoch": 1.9004374594944913, "grad_norm": 0.06022772938013077, "learning_rate": 0.00011664341329492776, "loss": 0.3302, "step": 23459 }, { "epoch": 1.9005184705119897, "grad_norm": 0.05794879421591759, "learning_rate": 0.00011663891264233316, "loss": 0.3144, "step": 23460 }, { "epoch": 1.900599481529488, "grad_norm": 0.0524263009428978, "learning_rate": 0.00011663441198973853, "loss": 0.2423, "step": 23461 }, { "epoch": 1.9006804925469862, "grad_norm": 0.062246453016996384, "learning_rate": 0.00011662991133714389, "loss": 0.2428, "step": 23462 }, { "epoch": 1.900761503564485, "grad_norm": 0.05451060086488724, "learning_rate": 0.00011662541068454926, "loss": 0.2493, "step": 23463 }, { "epoch": 1.9008425145819832, "grad_norm": 0.06086164712905884, "learning_rate": 0.00011662091003195464, "loss": 0.2448, "step": 23464 }, { "epoch": 1.9009235255994814, "grad_norm": 0.06377673149108887, "learning_rate": 0.00011661640937936, "loss": 0.2726, "step": 23465 }, { "epoch": 1.9010045366169799, "grad_norm": 0.05496484041213989, "learning_rate": 0.0001166119087267654, "loss": 0.2791, "step": 23466 }, { "epoch": 1.9010855476344783, "grad_norm": 0.06893599033355713, "learning_rate": 0.00011660740807417077, "loss": 0.285, "step": 23467 }, { "epoch": 1.9011665586519766, "grad_norm": 0.0625278428196907, "learning_rate": 0.00011660290742157613, "loss": 0.2588, "step": 23468 }, { "epoch": 1.901247569669475, "grad_norm": 0.06080259010195732, "learning_rate": 0.0001165984067689815, "loss": 0.3061, "step": 23469 }, { "epoch": 1.9013285806869735, "grad_norm": 0.053016725927591324, "learning_rate": 0.00011659390611638688, "loss": 0.2483, "step": 23470 }, { "epoch": 1.9014095917044718, "grad_norm": 0.058317508548498154, "learning_rate": 0.00011658940546379224, "loss": 0.2967, "step": 23471 }, { "epoch": 1.9014906027219702, "grad_norm": 0.06212649866938591, "learning_rate": 0.00011658490481119764, "loss": 0.2737, "step": 23472 }, { "epoch": 1.9015716137394687, "grad_norm": 0.05819268524646759, "learning_rate": 0.00011658040415860301, "loss": 0.2462, "step": 23473 }, { "epoch": 1.901652624756967, "grad_norm": 0.048252273350954056, "learning_rate": 0.00011657590350600837, "loss": 0.2439, "step": 23474 }, { "epoch": 1.9017336357744652, "grad_norm": 0.05994917079806328, "learning_rate": 0.00011657140285341375, "loss": 0.2448, "step": 23475 }, { "epoch": 1.9018146467919637, "grad_norm": 0.04080428555607796, "learning_rate": 0.00011656690220081912, "loss": 0.2051, "step": 23476 }, { "epoch": 1.9018956578094621, "grad_norm": 0.06264632940292358, "learning_rate": 0.00011656240154822451, "loss": 0.2444, "step": 23477 }, { "epoch": 1.9019766688269604, "grad_norm": 0.06154443323612213, "learning_rate": 0.00011655790089562988, "loss": 0.3366, "step": 23478 }, { "epoch": 1.9020576798444588, "grad_norm": 0.05270412564277649, "learning_rate": 0.00011655340024303525, "loss": 0.2472, "step": 23479 }, { "epoch": 1.9021386908619573, "grad_norm": 0.05738883838057518, "learning_rate": 0.00011654889959044061, "loss": 0.2514, "step": 23480 }, { "epoch": 1.9022197018794555, "grad_norm": 0.0528935082256794, "learning_rate": 0.00011654439893784599, "loss": 0.265, "step": 23481 }, { "epoch": 1.902300712896954, "grad_norm": 0.06092529743909836, "learning_rate": 0.00011653989828525136, "loss": 0.2405, "step": 23482 }, { "epoch": 1.9023817239144525, "grad_norm": 0.06586316227912903, "learning_rate": 0.00011653539763265675, "loss": 0.2804, "step": 23483 }, { "epoch": 1.9024627349319507, "grad_norm": 0.06744181364774704, "learning_rate": 0.00011653089698006212, "loss": 0.226, "step": 23484 }, { "epoch": 1.902543745949449, "grad_norm": 0.057407937943935394, "learning_rate": 0.0001165263963274675, "loss": 0.2548, "step": 23485 }, { "epoch": 1.9026247569669477, "grad_norm": 0.06098683550953865, "learning_rate": 0.00011652189567487286, "loss": 0.2307, "step": 23486 }, { "epoch": 1.902705767984446, "grad_norm": 0.04744044691324234, "learning_rate": 0.00011651739502227823, "loss": 0.2395, "step": 23487 }, { "epoch": 1.9027867790019442, "grad_norm": 0.05935365706682205, "learning_rate": 0.0001165128943696836, "loss": 0.2953, "step": 23488 }, { "epoch": 1.9028677900194426, "grad_norm": 0.059553083032369614, "learning_rate": 0.00011650839371708899, "loss": 0.2726, "step": 23489 }, { "epoch": 1.902948801036941, "grad_norm": 0.06768973916769028, "learning_rate": 0.00011650389306449436, "loss": 0.3151, "step": 23490 }, { "epoch": 1.9030298120544393, "grad_norm": 0.06006639450788498, "learning_rate": 0.00011649939241189974, "loss": 0.2463, "step": 23491 }, { "epoch": 1.9031108230719378, "grad_norm": 0.06431041657924652, "learning_rate": 0.0001164948917593051, "loss": 0.2973, "step": 23492 }, { "epoch": 1.9031918340894363, "grad_norm": 0.05177253112196922, "learning_rate": 0.00011649039110671047, "loss": 0.249, "step": 23493 }, { "epoch": 1.9032728451069345, "grad_norm": 0.06623294949531555, "learning_rate": 0.00011648589045411585, "loss": 0.289, "step": 23494 }, { "epoch": 1.903353856124433, "grad_norm": 0.0612802617251873, "learning_rate": 0.00011648138980152123, "loss": 0.2727, "step": 23495 }, { "epoch": 1.9034348671419314, "grad_norm": 0.0497586615383625, "learning_rate": 0.0001164768891489266, "loss": 0.293, "step": 23496 }, { "epoch": 1.9035158781594297, "grad_norm": 0.05529085174202919, "learning_rate": 0.00011647238849633198, "loss": 0.2956, "step": 23497 }, { "epoch": 1.903596889176928, "grad_norm": 0.06677936017513275, "learning_rate": 0.00011646788784373734, "loss": 0.2632, "step": 23498 }, { "epoch": 1.9036779001944264, "grad_norm": 0.05843420699238777, "learning_rate": 0.00011646338719114271, "loss": 0.2722, "step": 23499 }, { "epoch": 1.9037589112119249, "grad_norm": 0.0610148124396801, "learning_rate": 0.00011645888653854809, "loss": 0.2534, "step": 23500 }, { "epoch": 1.903839922229423, "grad_norm": 0.050915516912937164, "learning_rate": 0.00011645438588595347, "loss": 0.2339, "step": 23501 }, { "epoch": 1.9039209332469216, "grad_norm": 0.07522425800561905, "learning_rate": 0.00011644988523335885, "loss": 0.3088, "step": 23502 }, { "epoch": 1.90400194426442, "grad_norm": 0.06760836392641068, "learning_rate": 0.00011644538458076422, "loss": 0.2598, "step": 23503 }, { "epoch": 1.9040829552819183, "grad_norm": 0.058208782225847244, "learning_rate": 0.00011644088392816958, "loss": 0.2923, "step": 23504 }, { "epoch": 1.9041639662994168, "grad_norm": 0.05135008320212364, "learning_rate": 0.00011643638327557495, "loss": 0.2455, "step": 23505 }, { "epoch": 1.9042449773169152, "grad_norm": 0.0543479323387146, "learning_rate": 0.00011643188262298034, "loss": 0.2697, "step": 23506 }, { "epoch": 1.9043259883344135, "grad_norm": 0.05527025833725929, "learning_rate": 0.00011642738197038572, "loss": 0.2645, "step": 23507 }, { "epoch": 1.9044069993519117, "grad_norm": 0.0615333653986454, "learning_rate": 0.00011642288131779109, "loss": 0.2426, "step": 23508 }, { "epoch": 1.9044880103694104, "grad_norm": 0.061779141426086426, "learning_rate": 0.00011641838066519646, "loss": 0.3022, "step": 23509 }, { "epoch": 1.9045690213869086, "grad_norm": 0.06118670105934143, "learning_rate": 0.00011641388001260182, "loss": 0.2446, "step": 23510 }, { "epoch": 1.904650032404407, "grad_norm": 0.07236479222774506, "learning_rate": 0.0001164093793600072, "loss": 0.2911, "step": 23511 }, { "epoch": 1.9047310434219054, "grad_norm": 0.07088925689458847, "learning_rate": 0.00011640487870741258, "loss": 0.2768, "step": 23512 }, { "epoch": 1.9048120544394038, "grad_norm": 0.06011257693171501, "learning_rate": 0.00011640037805481796, "loss": 0.314, "step": 23513 }, { "epoch": 1.904893065456902, "grad_norm": 0.054274432361125946, "learning_rate": 0.00011639587740222333, "loss": 0.2681, "step": 23514 }, { "epoch": 1.9049740764744005, "grad_norm": 0.04443292319774628, "learning_rate": 0.0001163913767496287, "loss": 0.2424, "step": 23515 }, { "epoch": 1.905055087491899, "grad_norm": 0.06765972822904587, "learning_rate": 0.00011638687609703406, "loss": 0.2851, "step": 23516 }, { "epoch": 1.9051360985093972, "grad_norm": 0.056600090116262436, "learning_rate": 0.00011638237544443944, "loss": 0.2824, "step": 23517 }, { "epoch": 1.9052171095268955, "grad_norm": 0.04816935211420059, "learning_rate": 0.00011637787479184483, "loss": 0.2828, "step": 23518 }, { "epoch": 1.9052981205443942, "grad_norm": 0.05863537639379501, "learning_rate": 0.0001163733741392502, "loss": 0.253, "step": 23519 }, { "epoch": 1.9053791315618924, "grad_norm": 0.05766700580716133, "learning_rate": 0.00011636887348665557, "loss": 0.2414, "step": 23520 }, { "epoch": 1.9054601425793907, "grad_norm": 0.07518936693668365, "learning_rate": 0.00011636437283406095, "loss": 0.2811, "step": 23521 }, { "epoch": 1.9055411535968891, "grad_norm": 0.05701868608593941, "learning_rate": 0.0001163598721814663, "loss": 0.2862, "step": 23522 }, { "epoch": 1.9056221646143876, "grad_norm": 0.04992794618010521, "learning_rate": 0.00011635537152887168, "loss": 0.2316, "step": 23523 }, { "epoch": 1.9057031756318858, "grad_norm": 0.06036796420812607, "learning_rate": 0.00011635087087627708, "loss": 0.2712, "step": 23524 }, { "epoch": 1.9057841866493843, "grad_norm": 0.06520384550094604, "learning_rate": 0.00011634637022368244, "loss": 0.2483, "step": 23525 }, { "epoch": 1.9058651976668828, "grad_norm": 0.06383537501096725, "learning_rate": 0.00011634186957108781, "loss": 0.2824, "step": 23526 }, { "epoch": 1.905946208684381, "grad_norm": 0.05158648267388344, "learning_rate": 0.00011633736891849319, "loss": 0.2697, "step": 23527 }, { "epoch": 1.9060272197018795, "grad_norm": 0.10046553611755371, "learning_rate": 0.00011633286826589855, "loss": 0.2859, "step": 23528 }, { "epoch": 1.906108230719378, "grad_norm": 0.04792208969593048, "learning_rate": 0.00011632836761330395, "loss": 0.2174, "step": 23529 }, { "epoch": 1.9061892417368762, "grad_norm": 0.05839664489030838, "learning_rate": 0.00011632386696070932, "loss": 0.2843, "step": 23530 }, { "epoch": 1.9062702527543745, "grad_norm": 0.07664167135953903, "learning_rate": 0.00011631936630811468, "loss": 0.3275, "step": 23531 }, { "epoch": 1.9063512637718731, "grad_norm": 0.057959817349910736, "learning_rate": 0.00011631486565552006, "loss": 0.2393, "step": 23532 }, { "epoch": 1.9064322747893714, "grad_norm": 0.06964612007141113, "learning_rate": 0.00011631036500292543, "loss": 0.2542, "step": 23533 }, { "epoch": 1.9065132858068696, "grad_norm": 0.058878980576992035, "learning_rate": 0.00011630586435033079, "loss": 0.268, "step": 23534 }, { "epoch": 1.906594296824368, "grad_norm": 0.06393729895353317, "learning_rate": 0.00011630136369773619, "loss": 0.2873, "step": 23535 }, { "epoch": 1.9066753078418666, "grad_norm": 0.052830591797828674, "learning_rate": 0.00011629686304514156, "loss": 0.273, "step": 23536 }, { "epoch": 1.9067563188593648, "grad_norm": 0.06132220849394798, "learning_rate": 0.00011629236239254692, "loss": 0.2793, "step": 23537 }, { "epoch": 1.9068373298768633, "grad_norm": 0.06467705965042114, "learning_rate": 0.0001162878617399523, "loss": 0.304, "step": 23538 }, { "epoch": 1.9069183408943617, "grad_norm": 0.05416596680879593, "learning_rate": 0.00011628336108735767, "loss": 0.2695, "step": 23539 }, { "epoch": 1.90699935191186, "grad_norm": 0.05750744789838791, "learning_rate": 0.00011627886043476303, "loss": 0.2528, "step": 23540 }, { "epoch": 1.9070803629293582, "grad_norm": 0.06359507888555527, "learning_rate": 0.00011627435978216843, "loss": 0.2945, "step": 23541 }, { "epoch": 1.907161373946857, "grad_norm": 0.06286890804767609, "learning_rate": 0.0001162698591295738, "loss": 0.2768, "step": 23542 }, { "epoch": 1.9072423849643552, "grad_norm": 0.06291428953409195, "learning_rate": 0.00011626535847697917, "loss": 0.284, "step": 23543 }, { "epoch": 1.9073233959818534, "grad_norm": 0.057130761444568634, "learning_rate": 0.00011626085782438454, "loss": 0.2401, "step": 23544 }, { "epoch": 1.9074044069993519, "grad_norm": 0.06668264418840408, "learning_rate": 0.00011625635717178991, "loss": 0.2621, "step": 23545 }, { "epoch": 1.9074854180168503, "grad_norm": 0.06476175040006638, "learning_rate": 0.00011625185651919527, "loss": 0.2796, "step": 23546 }, { "epoch": 1.9075664290343486, "grad_norm": 0.060899537056684494, "learning_rate": 0.00011624735586660067, "loss": 0.2448, "step": 23547 }, { "epoch": 1.907647440051847, "grad_norm": 0.0622684583067894, "learning_rate": 0.00011624285521400605, "loss": 0.2399, "step": 23548 }, { "epoch": 1.9077284510693455, "grad_norm": 0.06308908015489578, "learning_rate": 0.00011623835456141141, "loss": 0.2758, "step": 23549 }, { "epoch": 1.9078094620868438, "grad_norm": 0.061859551817178726, "learning_rate": 0.00011623385390881678, "loss": 0.2855, "step": 23550 }, { "epoch": 1.9078904731043422, "grad_norm": 0.06682777404785156, "learning_rate": 0.00011622935325622215, "loss": 0.3057, "step": 23551 }, { "epoch": 1.9079714841218407, "grad_norm": 0.06256937980651855, "learning_rate": 0.00011622485260362751, "loss": 0.2912, "step": 23552 }, { "epoch": 1.908052495139339, "grad_norm": 0.051607646048069, "learning_rate": 0.00011622035195103292, "loss": 0.2626, "step": 23553 }, { "epoch": 1.9081335061568372, "grad_norm": 0.054773516952991486, "learning_rate": 0.00011621585129843829, "loss": 0.2572, "step": 23554 }, { "epoch": 1.9082145171743357, "grad_norm": 0.07187357544898987, "learning_rate": 0.00011621135064584365, "loss": 0.3005, "step": 23555 }, { "epoch": 1.9082955281918341, "grad_norm": 0.05505804717540741, "learning_rate": 0.00011620684999324902, "loss": 0.2324, "step": 23556 }, { "epoch": 1.9083765392093324, "grad_norm": 0.057166606187820435, "learning_rate": 0.0001162023493406544, "loss": 0.2488, "step": 23557 }, { "epoch": 1.9084575502268308, "grad_norm": 0.0596490204334259, "learning_rate": 0.00011619784868805978, "loss": 0.2589, "step": 23558 }, { "epoch": 1.9085385612443293, "grad_norm": 0.0551486536860466, "learning_rate": 0.00011619334803546516, "loss": 0.2311, "step": 23559 }, { "epoch": 1.9086195722618275, "grad_norm": 0.05850379914045334, "learning_rate": 0.00011618884738287053, "loss": 0.2763, "step": 23560 }, { "epoch": 1.908700583279326, "grad_norm": 0.06534396857023239, "learning_rate": 0.00011618434673027589, "loss": 0.2837, "step": 23561 }, { "epoch": 1.9087815942968245, "grad_norm": 0.05430319532752037, "learning_rate": 0.00011617984607768126, "loss": 0.2355, "step": 23562 }, { "epoch": 1.9088626053143227, "grad_norm": 0.04827059432864189, "learning_rate": 0.00011617534542508664, "loss": 0.253, "step": 23563 }, { "epoch": 1.908943616331821, "grad_norm": 0.059288423508405685, "learning_rate": 0.00011617084477249202, "loss": 0.2538, "step": 23564 }, { "epoch": 1.9090246273493197, "grad_norm": 0.06148166209459305, "learning_rate": 0.0001161663441198974, "loss": 0.2547, "step": 23565 }, { "epoch": 1.909105638366818, "grad_norm": 0.07299099117517471, "learning_rate": 0.00011616184346730277, "loss": 0.2814, "step": 23566 }, { "epoch": 1.9091866493843161, "grad_norm": 0.05289170518517494, "learning_rate": 0.00011615734281470813, "loss": 0.2467, "step": 23567 }, { "epoch": 1.9092676604018146, "grad_norm": 0.07755734026432037, "learning_rate": 0.0001161528421621135, "loss": 0.3104, "step": 23568 }, { "epoch": 1.909348671419313, "grad_norm": 0.0510355643928051, "learning_rate": 0.00011614834150951888, "loss": 0.248, "step": 23569 }, { "epoch": 1.9094296824368113, "grad_norm": 0.06598133593797684, "learning_rate": 0.00011614384085692427, "loss": 0.2952, "step": 23570 }, { "epoch": 1.9095106934543098, "grad_norm": 0.0684957429766655, "learning_rate": 0.00011613934020432964, "loss": 0.2586, "step": 23571 }, { "epoch": 1.9095917044718083, "grad_norm": 0.058603137731552124, "learning_rate": 0.00011613483955173501, "loss": 0.2781, "step": 23572 }, { "epoch": 1.9096727154893065, "grad_norm": 0.052800316363573074, "learning_rate": 0.00011613033889914037, "loss": 0.3278, "step": 23573 }, { "epoch": 1.909753726506805, "grad_norm": 0.05046352744102478, "learning_rate": 0.00011612583824654575, "loss": 0.2318, "step": 23574 }, { "epoch": 1.9098347375243034, "grad_norm": 0.061404839158058167, "learning_rate": 0.00011612133759395112, "loss": 0.2591, "step": 23575 }, { "epoch": 1.9099157485418017, "grad_norm": 0.06698551028966904, "learning_rate": 0.00011611683694135651, "loss": 0.272, "step": 23576 }, { "epoch": 1.9099967595593, "grad_norm": 0.05472571775317192, "learning_rate": 0.00011611233628876188, "loss": 0.2662, "step": 23577 }, { "epoch": 1.9100777705767984, "grad_norm": 0.05898464098572731, "learning_rate": 0.00011610783563616726, "loss": 0.2572, "step": 23578 }, { "epoch": 1.9101587815942969, "grad_norm": 0.07992018014192581, "learning_rate": 0.00011610333498357262, "loss": 0.2764, "step": 23579 }, { "epoch": 1.910239792611795, "grad_norm": 0.04683117941021919, "learning_rate": 0.00011609883433097799, "loss": 0.2345, "step": 23580 }, { "epoch": 1.9103208036292936, "grad_norm": 0.07582516223192215, "learning_rate": 0.00011609433367838336, "loss": 0.3301, "step": 23581 }, { "epoch": 1.910401814646792, "grad_norm": 0.0542750246822834, "learning_rate": 0.00011608983302578875, "loss": 0.2332, "step": 23582 }, { "epoch": 1.9104828256642903, "grad_norm": 0.061518967151641846, "learning_rate": 0.00011608533237319412, "loss": 0.2615, "step": 23583 }, { "epoch": 1.9105638366817888, "grad_norm": 0.0602375864982605, "learning_rate": 0.0001160808317205995, "loss": 0.2639, "step": 23584 }, { "epoch": 1.9106448476992872, "grad_norm": 0.054494086652994156, "learning_rate": 0.00011607633106800486, "loss": 0.2504, "step": 23585 }, { "epoch": 1.9107258587167855, "grad_norm": 0.060139261186122894, "learning_rate": 0.00011607183041541023, "loss": 0.287, "step": 23586 }, { "epoch": 1.9108068697342837, "grad_norm": 0.07051198184490204, "learning_rate": 0.00011606732976281562, "loss": 0.3095, "step": 23587 }, { "epoch": 1.9108878807517824, "grad_norm": 0.05830878019332886, "learning_rate": 0.00011606282911022099, "loss": 0.2962, "step": 23588 }, { "epoch": 1.9109688917692806, "grad_norm": 0.06247595325112343, "learning_rate": 0.00011605832845762636, "loss": 0.2658, "step": 23589 }, { "epoch": 1.9110499027867789, "grad_norm": 0.05308188498020172, "learning_rate": 0.00011605382780503174, "loss": 0.2442, "step": 23590 }, { "epoch": 1.9111309138042774, "grad_norm": 0.05700519308447838, "learning_rate": 0.0001160493271524371, "loss": 0.2994, "step": 23591 }, { "epoch": 1.9112119248217758, "grad_norm": 0.05724016949534416, "learning_rate": 0.00011604482649984247, "loss": 0.2816, "step": 23592 }, { "epoch": 1.911292935839274, "grad_norm": 0.04718222841620445, "learning_rate": 0.00011604032584724787, "loss": 0.2446, "step": 23593 }, { "epoch": 1.9113739468567725, "grad_norm": 0.04728047549724579, "learning_rate": 0.00011603582519465323, "loss": 0.2632, "step": 23594 }, { "epoch": 1.911454957874271, "grad_norm": 0.06270313262939453, "learning_rate": 0.0001160313245420586, "loss": 0.3105, "step": 23595 }, { "epoch": 1.9115359688917692, "grad_norm": 0.05434510111808777, "learning_rate": 0.00011602682388946398, "loss": 0.2654, "step": 23596 }, { "epoch": 1.9116169799092677, "grad_norm": 0.0677875280380249, "learning_rate": 0.00011602232323686934, "loss": 0.3015, "step": 23597 }, { "epoch": 1.9116979909267662, "grad_norm": 0.07220727950334549, "learning_rate": 0.00011601782258427471, "loss": 0.277, "step": 23598 }, { "epoch": 1.9117790019442644, "grad_norm": 0.06096704676747322, "learning_rate": 0.00011601332193168011, "loss": 0.2713, "step": 23599 }, { "epoch": 1.9118600129617627, "grad_norm": 0.06041645258665085, "learning_rate": 0.00011600882127908547, "loss": 0.2569, "step": 23600 }, { "epoch": 1.9119410239792611, "grad_norm": 0.05308665335178375, "learning_rate": 0.00011600432062649085, "loss": 0.2709, "step": 23601 }, { "epoch": 1.9120220349967596, "grad_norm": 0.055644433945417404, "learning_rate": 0.00011599981997389622, "loss": 0.2551, "step": 23602 }, { "epoch": 1.9121030460142578, "grad_norm": 0.06518996506929398, "learning_rate": 0.00011599531932130158, "loss": 0.2585, "step": 23603 }, { "epoch": 1.9121840570317563, "grad_norm": 0.06019129976630211, "learning_rate": 0.00011599081866870696, "loss": 0.2829, "step": 23604 }, { "epoch": 1.9122650680492548, "grad_norm": 0.04989992454648018, "learning_rate": 0.00011598631801611236, "loss": 0.2676, "step": 23605 }, { "epoch": 1.912346079066753, "grad_norm": 0.057164739817380905, "learning_rate": 0.00011598181736351772, "loss": 0.2936, "step": 23606 }, { "epoch": 1.9124270900842515, "grad_norm": 0.06150704249739647, "learning_rate": 0.00011597731671092309, "loss": 0.2621, "step": 23607 }, { "epoch": 1.91250810110175, "grad_norm": 0.054209765046834946, "learning_rate": 0.00011597281605832846, "loss": 0.2534, "step": 23608 }, { "epoch": 1.9125891121192482, "grad_norm": 0.053575582802295685, "learning_rate": 0.00011596831540573382, "loss": 0.2486, "step": 23609 }, { "epoch": 1.9126701231367464, "grad_norm": 0.05911843851208687, "learning_rate": 0.00011596381475313922, "loss": 0.2303, "step": 23610 }, { "epoch": 1.9127511341542451, "grad_norm": 0.05687384307384491, "learning_rate": 0.0001159593141005446, "loss": 0.2235, "step": 23611 }, { "epoch": 1.9128321451717434, "grad_norm": 0.07763361930847168, "learning_rate": 0.00011595481344794996, "loss": 0.3259, "step": 23612 }, { "epoch": 1.9129131561892416, "grad_norm": 0.05966458469629288, "learning_rate": 0.00011595031279535533, "loss": 0.2486, "step": 23613 }, { "epoch": 1.91299416720674, "grad_norm": 0.058465708047151566, "learning_rate": 0.0001159458121427607, "loss": 0.2704, "step": 23614 }, { "epoch": 1.9130751782242386, "grad_norm": 0.05656788498163223, "learning_rate": 0.00011594131149016606, "loss": 0.2215, "step": 23615 }, { "epoch": 1.9131561892417368, "grad_norm": 0.055962447077035904, "learning_rate": 0.00011593681083757147, "loss": 0.2751, "step": 23616 }, { "epoch": 1.9132372002592353, "grad_norm": 0.04729706794023514, "learning_rate": 0.00011593231018497684, "loss": 0.2564, "step": 23617 }, { "epoch": 1.9133182112767337, "grad_norm": 0.06427638232707977, "learning_rate": 0.0001159278095323822, "loss": 0.2754, "step": 23618 }, { "epoch": 1.913399222294232, "grad_norm": 0.05865442752838135, "learning_rate": 0.00011592330887978757, "loss": 0.255, "step": 23619 }, { "epoch": 1.9134802333117304, "grad_norm": 0.06961038708686829, "learning_rate": 0.00011591880822719295, "loss": 0.3402, "step": 23620 }, { "epoch": 1.913561244329229, "grad_norm": 0.0628763884305954, "learning_rate": 0.0001159143075745983, "loss": 0.2438, "step": 23621 }, { "epoch": 1.9136422553467272, "grad_norm": 0.05884828045964241, "learning_rate": 0.00011590980692200371, "loss": 0.33, "step": 23622 }, { "epoch": 1.9137232663642254, "grad_norm": 0.05961216613650322, "learning_rate": 0.00011590530626940908, "loss": 0.273, "step": 23623 }, { "epoch": 1.9138042773817239, "grad_norm": 0.07995451241731644, "learning_rate": 0.00011590080561681444, "loss": 0.2605, "step": 23624 }, { "epoch": 1.9138852883992223, "grad_norm": 0.05430275946855545, "learning_rate": 0.00011589630496421981, "loss": 0.2286, "step": 23625 }, { "epoch": 1.9139662994167206, "grad_norm": 0.060730792582035065, "learning_rate": 0.00011589180431162519, "loss": 0.2459, "step": 23626 }, { "epoch": 1.914047310434219, "grad_norm": 0.07754159718751907, "learning_rate": 0.00011588730365903056, "loss": 0.2733, "step": 23627 }, { "epoch": 1.9141283214517175, "grad_norm": 0.05745822936296463, "learning_rate": 0.00011588280300643595, "loss": 0.2714, "step": 23628 }, { "epoch": 1.9142093324692158, "grad_norm": 0.05793623626232147, "learning_rate": 0.00011587830235384132, "loss": 0.2276, "step": 23629 }, { "epoch": 1.9142903434867142, "grad_norm": 0.05226112902164459, "learning_rate": 0.00011587380170124668, "loss": 0.2898, "step": 23630 }, { "epoch": 1.9143713545042127, "grad_norm": 0.05570800229907036, "learning_rate": 0.00011586930104865206, "loss": 0.2533, "step": 23631 }, { "epoch": 1.914452365521711, "grad_norm": 0.06118570268154144, "learning_rate": 0.00011586480039605743, "loss": 0.2697, "step": 23632 }, { "epoch": 1.9145333765392092, "grad_norm": 0.062098145484924316, "learning_rate": 0.0001158602997434628, "loss": 0.2491, "step": 23633 }, { "epoch": 1.9146143875567079, "grad_norm": 0.059037044644355774, "learning_rate": 0.00011585579909086819, "loss": 0.2991, "step": 23634 }, { "epoch": 1.9146953985742061, "grad_norm": 0.052922844886779785, "learning_rate": 0.00011585129843827356, "loss": 0.2744, "step": 23635 }, { "epoch": 1.9147764095917044, "grad_norm": 0.07369931787252426, "learning_rate": 0.00011584679778567892, "loss": 0.2997, "step": 23636 }, { "epoch": 1.9148574206092028, "grad_norm": 0.06724896281957626, "learning_rate": 0.0001158422971330843, "loss": 0.2649, "step": 23637 }, { "epoch": 1.9149384316267013, "grad_norm": 0.05919881537556648, "learning_rate": 0.00011583779648048967, "loss": 0.2736, "step": 23638 }, { "epoch": 1.9150194426441995, "grad_norm": 0.0630001425743103, "learning_rate": 0.00011583329582789506, "loss": 0.26, "step": 23639 }, { "epoch": 1.915100453661698, "grad_norm": 0.0480809323489666, "learning_rate": 0.00011582879517530043, "loss": 0.2376, "step": 23640 }, { "epoch": 1.9151814646791965, "grad_norm": 0.06232346221804619, "learning_rate": 0.0001158242945227058, "loss": 0.2549, "step": 23641 }, { "epoch": 1.9152624756966947, "grad_norm": 0.057114098221063614, "learning_rate": 0.00011581979387011117, "loss": 0.2361, "step": 23642 }, { "epoch": 1.915343486714193, "grad_norm": 0.06052986532449722, "learning_rate": 0.00011581529321751654, "loss": 0.2683, "step": 23643 }, { "epoch": 1.9154244977316917, "grad_norm": 0.06299544125795364, "learning_rate": 0.00011581079256492191, "loss": 0.2722, "step": 23644 }, { "epoch": 1.91550550874919, "grad_norm": 0.049486126750707626, "learning_rate": 0.0001158062919123273, "loss": 0.2546, "step": 23645 }, { "epoch": 1.9155865197666881, "grad_norm": 0.052669722586870193, "learning_rate": 0.00011580179125973267, "loss": 0.2571, "step": 23646 }, { "epoch": 1.9156675307841866, "grad_norm": 0.06197255104780197, "learning_rate": 0.00011579729060713805, "loss": 0.2522, "step": 23647 }, { "epoch": 1.915748541801685, "grad_norm": 0.05383479222655296, "learning_rate": 0.00011579278995454341, "loss": 0.2535, "step": 23648 }, { "epoch": 1.9158295528191833, "grad_norm": 0.052105970680713654, "learning_rate": 0.00011578828930194878, "loss": 0.2658, "step": 23649 }, { "epoch": 1.9159105638366818, "grad_norm": 0.06237861514091492, "learning_rate": 0.00011578378864935415, "loss": 0.2534, "step": 23650 }, { "epoch": 1.9159915748541803, "grad_norm": 0.050857868045568466, "learning_rate": 0.00011577928799675954, "loss": 0.245, "step": 23651 }, { "epoch": 1.9160725858716785, "grad_norm": 0.062176115810871124, "learning_rate": 0.00011577478734416492, "loss": 0.2344, "step": 23652 }, { "epoch": 1.916153596889177, "grad_norm": 0.059934522956609726, "learning_rate": 0.00011577028669157029, "loss": 0.2677, "step": 23653 }, { "epoch": 1.9162346079066754, "grad_norm": 0.05889790877699852, "learning_rate": 0.00011576578603897565, "loss": 0.2557, "step": 23654 }, { "epoch": 1.9163156189241737, "grad_norm": 0.05746990069746971, "learning_rate": 0.00011576128538638102, "loss": 0.2714, "step": 23655 }, { "epoch": 1.916396629941672, "grad_norm": 0.07142087072134018, "learning_rate": 0.0001157567847337864, "loss": 0.2866, "step": 23656 }, { "epoch": 1.9164776409591704, "grad_norm": 0.06459254026412964, "learning_rate": 0.00011575228408119178, "loss": 0.3052, "step": 23657 }, { "epoch": 1.9165586519766689, "grad_norm": 0.07262136787176132, "learning_rate": 0.00011574778342859716, "loss": 0.2893, "step": 23658 }, { "epoch": 1.916639662994167, "grad_norm": 0.06700246036052704, "learning_rate": 0.00011574328277600253, "loss": 0.3159, "step": 23659 }, { "epoch": 1.9167206740116656, "grad_norm": 0.05940024182200432, "learning_rate": 0.00011573878212340789, "loss": 0.269, "step": 23660 }, { "epoch": 1.916801685029164, "grad_norm": 0.0663234069943428, "learning_rate": 0.00011573428147081326, "loss": 0.2781, "step": 23661 }, { "epoch": 1.9168826960466623, "grad_norm": 0.051529187709093094, "learning_rate": 0.00011572978081821866, "loss": 0.257, "step": 23662 }, { "epoch": 1.9169637070641607, "grad_norm": 0.06385305523872375, "learning_rate": 0.00011572528016562402, "loss": 0.2746, "step": 23663 }, { "epoch": 1.9170447180816592, "grad_norm": 0.053908463567495346, "learning_rate": 0.0001157207795130294, "loss": 0.2617, "step": 23664 }, { "epoch": 1.9171257290991575, "grad_norm": 0.06621986627578735, "learning_rate": 0.00011571627886043477, "loss": 0.2714, "step": 23665 }, { "epoch": 1.9172067401166557, "grad_norm": 0.05236555263400078, "learning_rate": 0.00011571177820784013, "loss": 0.2463, "step": 23666 }, { "epoch": 1.9172877511341544, "grad_norm": 0.06412264704704285, "learning_rate": 0.0001157072775552455, "loss": 0.2906, "step": 23667 }, { "epoch": 1.9173687621516526, "grad_norm": 0.0691026896238327, "learning_rate": 0.0001157027769026509, "loss": 0.2795, "step": 23668 }, { "epoch": 1.9174497731691509, "grad_norm": 0.05732530727982521, "learning_rate": 0.00011569827625005627, "loss": 0.2636, "step": 23669 }, { "epoch": 1.9175307841866494, "grad_norm": 0.06166663393378258, "learning_rate": 0.00011569377559746164, "loss": 0.263, "step": 23670 }, { "epoch": 1.9176117952041478, "grad_norm": 0.05083499476313591, "learning_rate": 0.00011568927494486701, "loss": 0.2531, "step": 23671 }, { "epoch": 1.917692806221646, "grad_norm": 0.07012742757797241, "learning_rate": 0.00011568477429227237, "loss": 0.2805, "step": 23672 }, { "epoch": 1.9177738172391445, "grad_norm": 0.053490299731492996, "learning_rate": 0.00011568027363967775, "loss": 0.2436, "step": 23673 }, { "epoch": 1.917854828256643, "grad_norm": 0.045660991221666336, "learning_rate": 0.00011567577298708315, "loss": 0.261, "step": 23674 }, { "epoch": 1.9179358392741412, "grad_norm": 0.06296227127313614, "learning_rate": 0.00011567127233448851, "loss": 0.315, "step": 23675 }, { "epoch": 1.9180168502916397, "grad_norm": 0.05819511413574219, "learning_rate": 0.00011566677168189388, "loss": 0.2216, "step": 23676 }, { "epoch": 1.9180978613091382, "grad_norm": 0.07294635474681854, "learning_rate": 0.00011566227102929926, "loss": 0.2784, "step": 23677 }, { "epoch": 1.9181788723266364, "grad_norm": 0.05275367572903633, "learning_rate": 0.00011565777037670462, "loss": 0.2433, "step": 23678 }, { "epoch": 1.9182598833441347, "grad_norm": 0.058120809495449066, "learning_rate": 0.00011565326972410999, "loss": 0.242, "step": 23679 }, { "epoch": 1.9183408943616331, "grad_norm": 0.05177418887615204, "learning_rate": 0.00011564876907151539, "loss": 0.2525, "step": 23680 }, { "epoch": 1.9184219053791316, "grad_norm": 0.06865722686052322, "learning_rate": 0.00011564426841892075, "loss": 0.2977, "step": 23681 }, { "epoch": 1.9185029163966298, "grad_norm": 0.05706919729709625, "learning_rate": 0.00011563976776632612, "loss": 0.2953, "step": 23682 }, { "epoch": 1.9185839274141283, "grad_norm": 0.059390176087617874, "learning_rate": 0.0001156352671137315, "loss": 0.2857, "step": 23683 }, { "epoch": 1.9186649384316268, "grad_norm": 0.061236340552568436, "learning_rate": 0.00011563076646113686, "loss": 0.2785, "step": 23684 }, { "epoch": 1.918745949449125, "grad_norm": 0.059098824858665466, "learning_rate": 0.00011562626580854223, "loss": 0.2793, "step": 23685 }, { "epoch": 1.9188269604666235, "grad_norm": 0.060659877955913544, "learning_rate": 0.00011562176515594763, "loss": 0.2926, "step": 23686 }, { "epoch": 1.918907971484122, "grad_norm": 0.061666518449783325, "learning_rate": 0.00011561726450335299, "loss": 0.305, "step": 23687 }, { "epoch": 1.9189889825016202, "grad_norm": 0.0629463717341423, "learning_rate": 0.00011561276385075837, "loss": 0.2721, "step": 23688 }, { "epoch": 1.9190699935191184, "grad_norm": 0.06427877396345139, "learning_rate": 0.00011560826319816374, "loss": 0.2555, "step": 23689 }, { "epoch": 1.9191510045366171, "grad_norm": 0.06070510298013687, "learning_rate": 0.0001156037625455691, "loss": 0.2377, "step": 23690 }, { "epoch": 1.9192320155541154, "grad_norm": 0.06346534192562103, "learning_rate": 0.0001155992618929745, "loss": 0.2747, "step": 23691 }, { "epoch": 1.9193130265716136, "grad_norm": 0.055210500955581665, "learning_rate": 0.00011559476124037987, "loss": 0.2394, "step": 23692 }, { "epoch": 1.919394037589112, "grad_norm": 0.07612962275743484, "learning_rate": 0.00011559026058778523, "loss": 0.2888, "step": 23693 }, { "epoch": 1.9194750486066106, "grad_norm": 0.05394081771373749, "learning_rate": 0.0001155857599351906, "loss": 0.2794, "step": 23694 }, { "epoch": 1.9195560596241088, "grad_norm": 0.052274949848651886, "learning_rate": 0.00011558125928259598, "loss": 0.2271, "step": 23695 }, { "epoch": 1.9196370706416073, "grad_norm": 0.05973504111170769, "learning_rate": 0.00011557675863000135, "loss": 0.2481, "step": 23696 }, { "epoch": 1.9197180816591057, "grad_norm": 0.05261189118027687, "learning_rate": 0.00011557225797740674, "loss": 0.2702, "step": 23697 }, { "epoch": 1.919799092676604, "grad_norm": 0.08073947578668594, "learning_rate": 0.00011556775732481211, "loss": 0.2427, "step": 23698 }, { "epoch": 1.9198801036941024, "grad_norm": 0.06485044956207275, "learning_rate": 0.00011556325667221747, "loss": 0.2742, "step": 23699 }, { "epoch": 1.919961114711601, "grad_norm": 0.0512852780520916, "learning_rate": 0.00011555875601962285, "loss": 0.2425, "step": 23700 }, { "epoch": 1.9200421257290992, "grad_norm": 0.06893439590930939, "learning_rate": 0.00011555425536702822, "loss": 0.3144, "step": 23701 }, { "epoch": 1.9201231367465974, "grad_norm": 0.0645238384604454, "learning_rate": 0.0001155497547144336, "loss": 0.2899, "step": 23702 }, { "epoch": 1.9202041477640959, "grad_norm": 0.0568263940513134, "learning_rate": 0.00011554525406183898, "loss": 0.2815, "step": 23703 }, { "epoch": 1.9202851587815943, "grad_norm": 0.055893611162900925, "learning_rate": 0.00011554075340924436, "loss": 0.2716, "step": 23704 }, { "epoch": 1.9203661697990926, "grad_norm": 0.0569830983877182, "learning_rate": 0.00011553625275664972, "loss": 0.2727, "step": 23705 }, { "epoch": 1.920447180816591, "grad_norm": 0.052357595413923264, "learning_rate": 0.00011553175210405509, "loss": 0.3041, "step": 23706 }, { "epoch": 1.9205281918340895, "grad_norm": 0.06113898754119873, "learning_rate": 0.00011552725145146046, "loss": 0.2864, "step": 23707 }, { "epoch": 1.9206092028515878, "grad_norm": 0.051144078373909, "learning_rate": 0.00011552275079886584, "loss": 0.2892, "step": 23708 }, { "epoch": 1.9206902138690862, "grad_norm": 0.052653051912784576, "learning_rate": 0.00011551825014627122, "loss": 0.2612, "step": 23709 }, { "epoch": 1.9207712248865847, "grad_norm": 0.062111809849739075, "learning_rate": 0.0001155137494936766, "loss": 0.2928, "step": 23710 }, { "epoch": 1.920852235904083, "grad_norm": 0.04654501751065254, "learning_rate": 0.00011550924884108196, "loss": 0.2916, "step": 23711 }, { "epoch": 1.9209332469215812, "grad_norm": 0.05940484255552292, "learning_rate": 0.00011550474818848733, "loss": 0.32, "step": 23712 }, { "epoch": 1.9210142579390799, "grad_norm": 0.05449722334742546, "learning_rate": 0.0001155002475358927, "loss": 0.2713, "step": 23713 }, { "epoch": 1.9210952689565781, "grad_norm": 0.04990445077419281, "learning_rate": 0.00011549574688329808, "loss": 0.2451, "step": 23714 }, { "epoch": 1.9211762799740764, "grad_norm": 0.048222944140434265, "learning_rate": 0.00011549124623070347, "loss": 0.2643, "step": 23715 }, { "epoch": 1.9212572909915748, "grad_norm": 0.053241066634655, "learning_rate": 0.00011548674557810884, "loss": 0.3193, "step": 23716 }, { "epoch": 1.9213383020090733, "grad_norm": 0.05876283720135689, "learning_rate": 0.0001154822449255142, "loss": 0.2738, "step": 23717 }, { "epoch": 1.9214193130265715, "grad_norm": 0.07297758758068085, "learning_rate": 0.00011547774427291957, "loss": 0.2597, "step": 23718 }, { "epoch": 1.92150032404407, "grad_norm": 0.051740966737270355, "learning_rate": 0.00011547324362032495, "loss": 0.2848, "step": 23719 }, { "epoch": 1.9215813350615685, "grad_norm": 0.07227837294340134, "learning_rate": 0.00011546874296773033, "loss": 0.2866, "step": 23720 }, { "epoch": 1.9216623460790667, "grad_norm": 0.07501929998397827, "learning_rate": 0.00011546424231513571, "loss": 0.249, "step": 23721 }, { "epoch": 1.9217433570965652, "grad_norm": 0.06517595797777176, "learning_rate": 0.00011545974166254108, "loss": 0.2708, "step": 23722 }, { "epoch": 1.9218243681140637, "grad_norm": 0.06509903818368912, "learning_rate": 0.00011545524100994644, "loss": 0.2839, "step": 23723 }, { "epoch": 1.921905379131562, "grad_norm": 0.07787120342254639, "learning_rate": 0.00011545074035735181, "loss": 0.3032, "step": 23724 }, { "epoch": 1.9219863901490601, "grad_norm": 0.050759267061948776, "learning_rate": 0.00011544623970475719, "loss": 0.2508, "step": 23725 }, { "epoch": 1.9220674011665586, "grad_norm": 0.06383932381868362, "learning_rate": 0.00011544173905216258, "loss": 0.3205, "step": 23726 }, { "epoch": 1.922148412184057, "grad_norm": 0.06522666662931442, "learning_rate": 0.00011543723839956795, "loss": 0.2566, "step": 23727 }, { "epoch": 1.9222294232015553, "grad_norm": 0.04603106901049614, "learning_rate": 0.00011543273774697332, "loss": 0.2596, "step": 23728 }, { "epoch": 1.9223104342190538, "grad_norm": 0.06357534229755402, "learning_rate": 0.00011542823709437868, "loss": 0.2922, "step": 23729 }, { "epoch": 1.9223914452365523, "grad_norm": 0.06124119833111763, "learning_rate": 0.00011542373644178406, "loss": 0.2854, "step": 23730 }, { "epoch": 1.9224724562540505, "grad_norm": 0.05823444202542305, "learning_rate": 0.00011541923578918943, "loss": 0.2564, "step": 23731 }, { "epoch": 1.922553467271549, "grad_norm": 0.05559345334768295, "learning_rate": 0.00011541473513659482, "loss": 0.2842, "step": 23732 }, { "epoch": 1.9226344782890474, "grad_norm": 0.05219922587275505, "learning_rate": 0.00011541023448400019, "loss": 0.2688, "step": 23733 }, { "epoch": 1.9227154893065457, "grad_norm": 0.061427246779203415, "learning_rate": 0.00011540573383140556, "loss": 0.2675, "step": 23734 }, { "epoch": 1.922796500324044, "grad_norm": 0.061376236379146576, "learning_rate": 0.00011540123317881092, "loss": 0.2625, "step": 23735 }, { "epoch": 1.9228775113415426, "grad_norm": 0.06096119061112404, "learning_rate": 0.0001153967325262163, "loss": 0.2711, "step": 23736 }, { "epoch": 1.9229585223590409, "grad_norm": 0.05492965131998062, "learning_rate": 0.00011539223187362167, "loss": 0.2323, "step": 23737 }, { "epoch": 1.923039533376539, "grad_norm": 0.053612884134054184, "learning_rate": 0.00011538773122102706, "loss": 0.2624, "step": 23738 }, { "epoch": 1.9231205443940376, "grad_norm": 0.05468292906880379, "learning_rate": 0.00011538323056843243, "loss": 0.278, "step": 23739 }, { "epoch": 1.923201555411536, "grad_norm": 0.057623159140348434, "learning_rate": 0.0001153787299158378, "loss": 0.2608, "step": 23740 }, { "epoch": 1.9232825664290343, "grad_norm": 0.06276316940784454, "learning_rate": 0.00011537422926324317, "loss": 0.2601, "step": 23741 }, { "epoch": 1.9233635774465327, "grad_norm": 0.059519946575164795, "learning_rate": 0.00011536972861064854, "loss": 0.3006, "step": 23742 }, { "epoch": 1.9234445884640312, "grad_norm": 0.06149646267294884, "learning_rate": 0.00011536522795805394, "loss": 0.2556, "step": 23743 }, { "epoch": 1.9235255994815295, "grad_norm": 0.06678318977355957, "learning_rate": 0.0001153607273054593, "loss": 0.2883, "step": 23744 }, { "epoch": 1.9236066104990277, "grad_norm": 0.05350562557578087, "learning_rate": 0.00011535622665286467, "loss": 0.2249, "step": 23745 }, { "epoch": 1.9236876215165264, "grad_norm": 0.05622348561882973, "learning_rate": 0.00011535172600027005, "loss": 0.2765, "step": 23746 }, { "epoch": 1.9237686325340246, "grad_norm": 0.07468672096729279, "learning_rate": 0.00011534722534767541, "loss": 0.3001, "step": 23747 }, { "epoch": 1.9238496435515229, "grad_norm": 0.0751064196228981, "learning_rate": 0.00011534272469508078, "loss": 0.2626, "step": 23748 }, { "epoch": 1.9239306545690213, "grad_norm": 0.05530063062906265, "learning_rate": 0.00011533822404248618, "loss": 0.2431, "step": 23749 }, { "epoch": 1.9240116655865198, "grad_norm": 0.053154587745666504, "learning_rate": 0.00011533372338989154, "loss": 0.2823, "step": 23750 }, { "epoch": 1.924092676604018, "grad_norm": 0.06156206130981445, "learning_rate": 0.00011532922273729692, "loss": 0.2349, "step": 23751 }, { "epoch": 1.9241736876215165, "grad_norm": 0.06651521474123001, "learning_rate": 0.00011532472208470229, "loss": 0.2662, "step": 23752 }, { "epoch": 1.924254698639015, "grad_norm": 0.04997168108820915, "learning_rate": 0.00011532022143210765, "loss": 0.2556, "step": 23753 }, { "epoch": 1.9243357096565132, "grad_norm": 0.05996481329202652, "learning_rate": 0.00011531572077951302, "loss": 0.3115, "step": 23754 }, { "epoch": 1.9244167206740117, "grad_norm": 0.05364224314689636, "learning_rate": 0.00011531122012691842, "loss": 0.2766, "step": 23755 }, { "epoch": 1.9244977316915102, "grad_norm": 0.07042060792446136, "learning_rate": 0.00011530671947432378, "loss": 0.2657, "step": 23756 }, { "epoch": 1.9245787427090084, "grad_norm": 0.04777385666966438, "learning_rate": 0.00011530221882172916, "loss": 0.2287, "step": 23757 }, { "epoch": 1.9246597537265067, "grad_norm": 0.06700602918863297, "learning_rate": 0.00011529771816913453, "loss": 0.2736, "step": 23758 }, { "epoch": 1.9247407647440054, "grad_norm": 0.06722524762153625, "learning_rate": 0.00011529321751653989, "loss": 0.2707, "step": 23759 }, { "epoch": 1.9248217757615036, "grad_norm": 0.05077163130044937, "learning_rate": 0.00011528871686394526, "loss": 0.2763, "step": 23760 }, { "epoch": 1.9249027867790018, "grad_norm": 0.05121629685163498, "learning_rate": 0.00011528421621135067, "loss": 0.2735, "step": 23761 }, { "epoch": 1.9249837977965003, "grad_norm": 0.07214384526014328, "learning_rate": 0.00011527971555875603, "loss": 0.2928, "step": 23762 }, { "epoch": 1.9250648088139988, "grad_norm": 0.0521952249109745, "learning_rate": 0.0001152752149061614, "loss": 0.2367, "step": 23763 }, { "epoch": 1.925145819831497, "grad_norm": 0.06913160532712936, "learning_rate": 0.00011527071425356677, "loss": 0.3283, "step": 23764 }, { "epoch": 1.9252268308489955, "grad_norm": 0.05566168576478958, "learning_rate": 0.00011526621360097215, "loss": 0.2849, "step": 23765 }, { "epoch": 1.925307841866494, "grad_norm": 0.06802644580602646, "learning_rate": 0.0001152617129483775, "loss": 0.2753, "step": 23766 }, { "epoch": 1.9253888528839922, "grad_norm": 0.07014881074428558, "learning_rate": 0.00011525721229578291, "loss": 0.2517, "step": 23767 }, { "epoch": 1.9254698639014904, "grad_norm": 0.05886785313487053, "learning_rate": 0.00011525271164318827, "loss": 0.2763, "step": 23768 }, { "epoch": 1.9255508749189891, "grad_norm": 0.054348427802324295, "learning_rate": 0.00011524821099059364, "loss": 0.2648, "step": 23769 }, { "epoch": 1.9256318859364874, "grad_norm": 0.060900118201971054, "learning_rate": 0.00011524371033799901, "loss": 0.2349, "step": 23770 }, { "epoch": 1.9257128969539856, "grad_norm": 0.06138508766889572, "learning_rate": 0.00011523920968540439, "loss": 0.282, "step": 23771 }, { "epoch": 1.925793907971484, "grad_norm": 0.053859367966651917, "learning_rate": 0.00011523470903280977, "loss": 0.2704, "step": 23772 }, { "epoch": 1.9258749189889826, "grad_norm": 0.056411322206258774, "learning_rate": 0.00011523020838021515, "loss": 0.2781, "step": 23773 }, { "epoch": 1.9259559300064808, "grad_norm": 0.06078789383172989, "learning_rate": 0.00011522570772762051, "loss": 0.268, "step": 23774 }, { "epoch": 1.9260369410239793, "grad_norm": 0.052285823971033096, "learning_rate": 0.00011522120707502588, "loss": 0.2564, "step": 23775 }, { "epoch": 1.9261179520414777, "grad_norm": 0.06265722960233688, "learning_rate": 0.00011521670642243126, "loss": 0.2741, "step": 23776 }, { "epoch": 1.926198963058976, "grad_norm": 0.05709337443113327, "learning_rate": 0.00011521220576983663, "loss": 0.2382, "step": 23777 }, { "epoch": 1.9262799740764744, "grad_norm": 0.06749071180820465, "learning_rate": 0.00011520770511724202, "loss": 0.2617, "step": 23778 }, { "epoch": 1.926360985093973, "grad_norm": 0.05918231979012489, "learning_rate": 0.00011520320446464739, "loss": 0.2637, "step": 23779 }, { "epoch": 1.9264419961114712, "grad_norm": 0.07993515580892563, "learning_rate": 0.00011519870381205275, "loss": 0.2783, "step": 23780 }, { "epoch": 1.9265230071289694, "grad_norm": 0.07001268863677979, "learning_rate": 0.00011519420315945812, "loss": 0.3366, "step": 23781 }, { "epoch": 1.9266040181464679, "grad_norm": 0.05565613880753517, "learning_rate": 0.0001151897025068635, "loss": 0.2487, "step": 23782 }, { "epoch": 1.9266850291639663, "grad_norm": 0.0639483854174614, "learning_rate": 0.00011518520185426887, "loss": 0.2915, "step": 23783 }, { "epoch": 1.9267660401814646, "grad_norm": 0.06700640171766281, "learning_rate": 0.00011518070120167426, "loss": 0.2872, "step": 23784 }, { "epoch": 1.926847051198963, "grad_norm": 0.055324066430330276, "learning_rate": 0.00011517620054907963, "loss": 0.259, "step": 23785 }, { "epoch": 1.9269280622164615, "grad_norm": 0.06429389864206314, "learning_rate": 0.00011517169989648499, "loss": 0.2721, "step": 23786 }, { "epoch": 1.9270090732339598, "grad_norm": 0.05660184845328331, "learning_rate": 0.00011516719924389037, "loss": 0.2632, "step": 23787 }, { "epoch": 1.9270900842514582, "grad_norm": 0.051351770758628845, "learning_rate": 0.00011516269859129574, "loss": 0.2405, "step": 23788 }, { "epoch": 1.9271710952689567, "grad_norm": 0.055790483951568604, "learning_rate": 0.00011515819793870111, "loss": 0.2598, "step": 23789 }, { "epoch": 1.927252106286455, "grad_norm": 0.06330342590808868, "learning_rate": 0.0001151536972861065, "loss": 0.2544, "step": 23790 }, { "epoch": 1.9273331173039532, "grad_norm": 0.05117948725819588, "learning_rate": 0.00011514919663351187, "loss": 0.2295, "step": 23791 }, { "epoch": 1.9274141283214519, "grad_norm": 0.06005241349339485, "learning_rate": 0.00011514469598091723, "loss": 0.2551, "step": 23792 }, { "epoch": 1.9274951393389501, "grad_norm": 0.057109806686639786, "learning_rate": 0.00011514019532832261, "loss": 0.2375, "step": 23793 }, { "epoch": 1.9275761503564484, "grad_norm": 0.06928063929080963, "learning_rate": 0.00011513569467572798, "loss": 0.304, "step": 23794 }, { "epoch": 1.9276571613739468, "grad_norm": 0.06174888089299202, "learning_rate": 0.00011513119402313337, "loss": 0.2762, "step": 23795 }, { "epoch": 1.9277381723914453, "grad_norm": 0.07365316152572632, "learning_rate": 0.00011512669337053874, "loss": 0.2722, "step": 23796 }, { "epoch": 1.9278191834089435, "grad_norm": 0.06484461575746536, "learning_rate": 0.00011512219271794411, "loss": 0.2766, "step": 23797 }, { "epoch": 1.927900194426442, "grad_norm": 0.0619257427752018, "learning_rate": 0.00011511769206534947, "loss": 0.2866, "step": 23798 }, { "epoch": 1.9279812054439405, "grad_norm": 0.0514339804649353, "learning_rate": 0.00011511319141275485, "loss": 0.2801, "step": 23799 }, { "epoch": 1.9280622164614387, "grad_norm": 0.06011634320020676, "learning_rate": 0.00011510869076016022, "loss": 0.2558, "step": 23800 }, { "epoch": 1.9281432274789372, "grad_norm": 0.05926699936389923, "learning_rate": 0.00011510419010756561, "loss": 0.2728, "step": 23801 }, { "epoch": 1.9282242384964356, "grad_norm": 0.06783153116703033, "learning_rate": 0.00011509968945497098, "loss": 0.3053, "step": 23802 }, { "epoch": 1.928305249513934, "grad_norm": 0.06083691492676735, "learning_rate": 0.00011509518880237636, "loss": 0.2789, "step": 23803 }, { "epoch": 1.9283862605314321, "grad_norm": 0.051245927810668945, "learning_rate": 0.00011509068814978172, "loss": 0.263, "step": 23804 }, { "epoch": 1.9284672715489306, "grad_norm": 0.05796998366713524, "learning_rate": 0.00011508618749718709, "loss": 0.2657, "step": 23805 }, { "epoch": 1.928548282566429, "grad_norm": 0.06028568372130394, "learning_rate": 0.00011508168684459246, "loss": 0.2762, "step": 23806 }, { "epoch": 1.9286292935839273, "grad_norm": 0.062084048986434937, "learning_rate": 0.00011507718619199785, "loss": 0.2563, "step": 23807 }, { "epoch": 1.9287103046014258, "grad_norm": 0.050911158323287964, "learning_rate": 0.00011507268553940322, "loss": 0.2637, "step": 23808 }, { "epoch": 1.9287913156189243, "grad_norm": 0.0765126645565033, "learning_rate": 0.0001150681848868086, "loss": 0.288, "step": 23809 }, { "epoch": 1.9288723266364225, "grad_norm": 0.06888175010681152, "learning_rate": 0.00011506368423421396, "loss": 0.2965, "step": 23810 }, { "epoch": 1.928953337653921, "grad_norm": 0.06394674628973007, "learning_rate": 0.00011505918358161933, "loss": 0.2659, "step": 23811 }, { "epoch": 1.9290343486714194, "grad_norm": 0.061555393040180206, "learning_rate": 0.0001150546829290247, "loss": 0.263, "step": 23812 }, { "epoch": 1.9291153596889177, "grad_norm": 0.05561353638768196, "learning_rate": 0.00011505018227643009, "loss": 0.2886, "step": 23813 }, { "epoch": 1.929196370706416, "grad_norm": 0.05581795051693916, "learning_rate": 0.00011504568162383547, "loss": 0.2484, "step": 23814 }, { "epoch": 1.9292773817239146, "grad_norm": 0.06461822986602783, "learning_rate": 0.00011504118097124084, "loss": 0.2971, "step": 23815 }, { "epoch": 1.9293583927414129, "grad_norm": 0.05509224161505699, "learning_rate": 0.0001150366803186462, "loss": 0.2461, "step": 23816 }, { "epoch": 1.929439403758911, "grad_norm": 0.05018146336078644, "learning_rate": 0.00011503217966605157, "loss": 0.2369, "step": 23817 }, { "epoch": 1.9295204147764096, "grad_norm": 0.05055820196866989, "learning_rate": 0.00011502767901345695, "loss": 0.276, "step": 23818 }, { "epoch": 1.929601425793908, "grad_norm": 0.050321076065301895, "learning_rate": 0.00011502317836086233, "loss": 0.269, "step": 23819 }, { "epoch": 1.9296824368114063, "grad_norm": 0.06445109844207764, "learning_rate": 0.00011501867770826771, "loss": 0.2553, "step": 23820 }, { "epoch": 1.9297634478289047, "grad_norm": 0.054344166070222855, "learning_rate": 0.00011501417705567308, "loss": 0.2479, "step": 23821 }, { "epoch": 1.9298444588464032, "grad_norm": 0.05837761610746384, "learning_rate": 0.00011500967640307844, "loss": 0.2913, "step": 23822 }, { "epoch": 1.9299254698639015, "grad_norm": 0.06546605378389359, "learning_rate": 0.00011500517575048382, "loss": 0.2784, "step": 23823 }, { "epoch": 1.9300064808814, "grad_norm": 0.0638062059879303, "learning_rate": 0.00011500067509788922, "loss": 0.2531, "step": 23824 }, { "epoch": 1.9300874918988984, "grad_norm": 0.06651133298873901, "learning_rate": 0.00011499617444529458, "loss": 0.2692, "step": 23825 }, { "epoch": 1.9301685029163966, "grad_norm": 0.06613379716873169, "learning_rate": 0.00011499167379269995, "loss": 0.2671, "step": 23826 }, { "epoch": 1.9302495139338949, "grad_norm": 0.051686592400074005, "learning_rate": 0.00011498717314010532, "loss": 0.2468, "step": 23827 }, { "epoch": 1.9303305249513933, "grad_norm": 0.05653015151619911, "learning_rate": 0.00011498267248751068, "loss": 0.2876, "step": 23828 }, { "epoch": 1.9304115359688918, "grad_norm": 0.06232219934463501, "learning_rate": 0.00011497817183491606, "loss": 0.2862, "step": 23829 }, { "epoch": 1.93049254698639, "grad_norm": 0.052821218967437744, "learning_rate": 0.00011497367118232146, "loss": 0.2503, "step": 23830 }, { "epoch": 1.9305735580038885, "grad_norm": 0.06216568499803543, "learning_rate": 0.00011496917052972682, "loss": 0.2524, "step": 23831 }, { "epoch": 1.930654569021387, "grad_norm": 0.07735279202461243, "learning_rate": 0.00011496466987713219, "loss": 0.3003, "step": 23832 }, { "epoch": 1.9307355800388852, "grad_norm": 0.06235320121049881, "learning_rate": 0.00011496016922453756, "loss": 0.2338, "step": 23833 }, { "epoch": 1.9308165910563837, "grad_norm": 0.05040643364191055, "learning_rate": 0.00011495566857194294, "loss": 0.2185, "step": 23834 }, { "epoch": 1.9308976020738822, "grad_norm": 0.07056865096092224, "learning_rate": 0.0001149511679193483, "loss": 0.2625, "step": 23835 }, { "epoch": 1.9309786130913804, "grad_norm": 0.04554782807826996, "learning_rate": 0.0001149466672667537, "loss": 0.2648, "step": 23836 }, { "epoch": 1.9310596241088787, "grad_norm": 0.07224829494953156, "learning_rate": 0.00011494216661415906, "loss": 0.2497, "step": 23837 }, { "epoch": 1.9311406351263773, "grad_norm": 0.05645201355218887, "learning_rate": 0.00011493766596156443, "loss": 0.2642, "step": 23838 }, { "epoch": 1.9312216461438756, "grad_norm": 0.06240091100335121, "learning_rate": 0.0001149331653089698, "loss": 0.2762, "step": 23839 }, { "epoch": 1.9313026571613738, "grad_norm": 0.05922730639576912, "learning_rate": 0.00011492866465637518, "loss": 0.2615, "step": 23840 }, { "epoch": 1.9313836681788723, "grad_norm": 0.05924151837825775, "learning_rate": 0.00011492416400378054, "loss": 0.2664, "step": 23841 }, { "epoch": 1.9314646791963708, "grad_norm": 0.05077677592635155, "learning_rate": 0.00011491966335118594, "loss": 0.2478, "step": 23842 }, { "epoch": 1.931545690213869, "grad_norm": 0.06003196910023689, "learning_rate": 0.0001149151626985913, "loss": 0.3053, "step": 23843 }, { "epoch": 1.9316267012313675, "grad_norm": 0.05602165311574936, "learning_rate": 0.00011491066204599667, "loss": 0.2536, "step": 23844 }, { "epoch": 1.931707712248866, "grad_norm": 0.05465425178408623, "learning_rate": 0.00011490616139340205, "loss": 0.2706, "step": 23845 }, { "epoch": 1.9317887232663642, "grad_norm": 0.0503137931227684, "learning_rate": 0.00011490166074080742, "loss": 0.293, "step": 23846 }, { "epoch": 1.9318697342838627, "grad_norm": 0.042224958539009094, "learning_rate": 0.00011489716008821281, "loss": 0.2338, "step": 23847 }, { "epoch": 1.9319507453013611, "grad_norm": 0.06504608690738678, "learning_rate": 0.00011489265943561818, "loss": 0.326, "step": 23848 }, { "epoch": 1.9320317563188594, "grad_norm": 0.057590994983911514, "learning_rate": 0.00011488815878302354, "loss": 0.2648, "step": 23849 }, { "epoch": 1.9321127673363576, "grad_norm": 0.057391226291656494, "learning_rate": 0.00011488365813042892, "loss": 0.2405, "step": 23850 }, { "epoch": 1.932193778353856, "grad_norm": 0.06676268577575684, "learning_rate": 0.00011487915747783429, "loss": 0.2861, "step": 23851 }, { "epoch": 1.9322747893713546, "grad_norm": 0.05596553161740303, "learning_rate": 0.00011487465682523966, "loss": 0.2852, "step": 23852 }, { "epoch": 1.9323558003888528, "grad_norm": 0.05663205310702324, "learning_rate": 0.00011487015617264505, "loss": 0.2519, "step": 23853 }, { "epoch": 1.9324368114063513, "grad_norm": 0.06211337074637413, "learning_rate": 0.00011486565552005042, "loss": 0.2877, "step": 23854 }, { "epoch": 1.9325178224238497, "grad_norm": 0.0637897327542305, "learning_rate": 0.00011486115486745578, "loss": 0.278, "step": 23855 }, { "epoch": 1.932598833441348, "grad_norm": 0.05674593150615692, "learning_rate": 0.00011485665421486116, "loss": 0.2694, "step": 23856 }, { "epoch": 1.9326798444588464, "grad_norm": 0.07816286385059357, "learning_rate": 0.00011485215356226653, "loss": 0.3347, "step": 23857 }, { "epoch": 1.932760855476345, "grad_norm": 0.0642632469534874, "learning_rate": 0.0001148476529096719, "loss": 0.2657, "step": 23858 }, { "epoch": 1.9328418664938432, "grad_norm": 0.06931416690349579, "learning_rate": 0.00011484315225707729, "loss": 0.2807, "step": 23859 }, { "epoch": 1.9329228775113414, "grad_norm": 0.05714231729507446, "learning_rate": 0.00011483865160448267, "loss": 0.2979, "step": 23860 }, { "epoch": 1.93300388852884, "grad_norm": 0.06332039088010788, "learning_rate": 0.00011483415095188803, "loss": 0.2682, "step": 23861 }, { "epoch": 1.9330848995463383, "grad_norm": 0.06614676117897034, "learning_rate": 0.0001148296502992934, "loss": 0.2662, "step": 23862 }, { "epoch": 1.9331659105638366, "grad_norm": 0.05901767313480377, "learning_rate": 0.00011482514964669877, "loss": 0.3002, "step": 23863 }, { "epoch": 1.933246921581335, "grad_norm": 0.0599125437438488, "learning_rate": 0.00011482064899410415, "loss": 0.258, "step": 23864 }, { "epoch": 1.9333279325988335, "grad_norm": 0.05454184114933014, "learning_rate": 0.00011481614834150953, "loss": 0.2551, "step": 23865 }, { "epoch": 1.9334089436163318, "grad_norm": 0.0639750063419342, "learning_rate": 0.00011481164768891491, "loss": 0.261, "step": 23866 }, { "epoch": 1.9334899546338302, "grad_norm": 0.06324272602796555, "learning_rate": 0.00011480714703632027, "loss": 0.2616, "step": 23867 }, { "epoch": 1.9335709656513287, "grad_norm": 0.05212435871362686, "learning_rate": 0.00011480264638372564, "loss": 0.2473, "step": 23868 }, { "epoch": 1.933651976668827, "grad_norm": 0.058949582278728485, "learning_rate": 0.00011479814573113101, "loss": 0.3239, "step": 23869 }, { "epoch": 1.9337329876863252, "grad_norm": 0.057215847074985504, "learning_rate": 0.00011479364507853639, "loss": 0.2682, "step": 23870 }, { "epoch": 1.9338139987038239, "grad_norm": 0.053002744913101196, "learning_rate": 0.00011478914442594178, "loss": 0.2705, "step": 23871 }, { "epoch": 1.9338950097213221, "grad_norm": 0.07106682658195496, "learning_rate": 0.00011478464377334715, "loss": 0.2788, "step": 23872 }, { "epoch": 1.9339760207388204, "grad_norm": 0.06341718137264252, "learning_rate": 0.00011478014312075251, "loss": 0.2676, "step": 23873 }, { "epoch": 1.9340570317563188, "grad_norm": 0.05477464199066162, "learning_rate": 0.00011477564246815788, "loss": 0.223, "step": 23874 }, { "epoch": 1.9341380427738173, "grad_norm": 0.06264764815568924, "learning_rate": 0.00011477114181556326, "loss": 0.2839, "step": 23875 }, { "epoch": 1.9342190537913155, "grad_norm": 0.049226321280002594, "learning_rate": 0.00011476664116296864, "loss": 0.2423, "step": 23876 }, { "epoch": 1.934300064808814, "grad_norm": 0.062404386699199677, "learning_rate": 0.00011476214051037402, "loss": 0.297, "step": 23877 }, { "epoch": 1.9343810758263125, "grad_norm": 0.05783528834581375, "learning_rate": 0.00011475763985777939, "loss": 0.267, "step": 23878 }, { "epoch": 1.9344620868438107, "grad_norm": 0.06114105135202408, "learning_rate": 0.00011475313920518475, "loss": 0.2823, "step": 23879 }, { "epoch": 1.9345430978613092, "grad_norm": 0.06154955178499222, "learning_rate": 0.00011474863855259012, "loss": 0.2849, "step": 23880 }, { "epoch": 1.9346241088788076, "grad_norm": 0.06020995229482651, "learning_rate": 0.0001147441378999955, "loss": 0.3081, "step": 23881 }, { "epoch": 1.934705119896306, "grad_norm": 0.05826554074883461, "learning_rate": 0.00011473963724740088, "loss": 0.2419, "step": 23882 }, { "epoch": 1.9347861309138041, "grad_norm": 0.05434232950210571, "learning_rate": 0.00011473513659480626, "loss": 0.2449, "step": 23883 }, { "epoch": 1.9348671419313026, "grad_norm": 0.06052308902144432, "learning_rate": 0.00011473063594221163, "loss": 0.2925, "step": 23884 }, { "epoch": 1.934948152948801, "grad_norm": 0.05621659383177757, "learning_rate": 0.00011472613528961699, "loss": 0.2648, "step": 23885 }, { "epoch": 1.9350291639662993, "grad_norm": 0.057274747639894485, "learning_rate": 0.00011472163463702237, "loss": 0.2678, "step": 23886 }, { "epoch": 1.9351101749837978, "grad_norm": 0.05058053135871887, "learning_rate": 0.00011471713398442774, "loss": 0.2499, "step": 23887 }, { "epoch": 1.9351911860012962, "grad_norm": 0.04877978190779686, "learning_rate": 0.00011471263333183313, "loss": 0.2143, "step": 23888 }, { "epoch": 1.9352721970187945, "grad_norm": 0.05602328106760979, "learning_rate": 0.0001147081326792385, "loss": 0.2916, "step": 23889 }, { "epoch": 1.935353208036293, "grad_norm": 0.055223964154720306, "learning_rate": 0.00011470363202664387, "loss": 0.2723, "step": 23890 }, { "epoch": 1.9354342190537914, "grad_norm": 0.0630248561501503, "learning_rate": 0.00011469913137404923, "loss": 0.2176, "step": 23891 }, { "epoch": 1.9355152300712897, "grad_norm": 0.05983347073197365, "learning_rate": 0.00011469463072145461, "loss": 0.2666, "step": 23892 }, { "epoch": 1.935596241088788, "grad_norm": 0.05666651949286461, "learning_rate": 0.00011469013006885998, "loss": 0.2425, "step": 23893 }, { "epoch": 1.9356772521062866, "grad_norm": 0.05697185918688774, "learning_rate": 0.00011468562941626537, "loss": 0.256, "step": 23894 }, { "epoch": 1.9357582631237849, "grad_norm": 0.05312450975179672, "learning_rate": 0.00011468112876367074, "loss": 0.2724, "step": 23895 }, { "epoch": 1.935839274141283, "grad_norm": 0.050729118287563324, "learning_rate": 0.00011467662811107612, "loss": 0.2551, "step": 23896 }, { "epoch": 1.9359202851587816, "grad_norm": 0.056781407445669174, "learning_rate": 0.00011467212745848148, "loss": 0.2648, "step": 23897 }, { "epoch": 1.93600129617628, "grad_norm": 0.06569044291973114, "learning_rate": 0.00011466762680588685, "loss": 0.2704, "step": 23898 }, { "epoch": 1.9360823071937783, "grad_norm": 0.06088702008128166, "learning_rate": 0.00011466312615329222, "loss": 0.27, "step": 23899 }, { "epoch": 1.9361633182112767, "grad_norm": 0.055795930325984955, "learning_rate": 0.00011465862550069761, "loss": 0.2446, "step": 23900 }, { "epoch": 1.9362443292287752, "grad_norm": 0.06180226802825928, "learning_rate": 0.00011465412484810298, "loss": 0.2701, "step": 23901 }, { "epoch": 1.9363253402462735, "grad_norm": 0.06375572085380554, "learning_rate": 0.00011464962419550836, "loss": 0.2565, "step": 23902 }, { "epoch": 1.936406351263772, "grad_norm": 0.04969800263643265, "learning_rate": 0.00011464512354291373, "loss": 0.2377, "step": 23903 }, { "epoch": 1.9364873622812704, "grad_norm": 0.07344190031290054, "learning_rate": 0.00011464062289031909, "loss": 0.2966, "step": 23904 }, { "epoch": 1.9365683732987686, "grad_norm": 0.05501018092036247, "learning_rate": 0.00011463612223772449, "loss": 0.2251, "step": 23905 }, { "epoch": 1.9366493843162669, "grad_norm": 0.048425883054733276, "learning_rate": 0.00011463162158512985, "loss": 0.2583, "step": 23906 }, { "epoch": 1.9367303953337653, "grad_norm": 0.06696183234453201, "learning_rate": 0.00011462712093253522, "loss": 0.2684, "step": 23907 }, { "epoch": 1.9368114063512638, "grad_norm": 0.05589906871318817, "learning_rate": 0.0001146226202799406, "loss": 0.2343, "step": 23908 }, { "epoch": 1.936892417368762, "grad_norm": 0.07046809047460556, "learning_rate": 0.00011461811962734597, "loss": 0.3071, "step": 23909 }, { "epoch": 1.9369734283862605, "grad_norm": 0.07077688723802567, "learning_rate": 0.00011461361897475133, "loss": 0.3055, "step": 23910 }, { "epoch": 1.937054439403759, "grad_norm": 0.07673512399196625, "learning_rate": 0.00011460911832215673, "loss": 0.2839, "step": 23911 }, { "epoch": 1.9371354504212572, "grad_norm": 0.066664919257164, "learning_rate": 0.00011460461766956209, "loss": 0.2812, "step": 23912 }, { "epoch": 1.9372164614387557, "grad_norm": 0.06907733529806137, "learning_rate": 0.00011460011701696747, "loss": 0.2709, "step": 23913 }, { "epoch": 1.9372974724562542, "grad_norm": 0.0514480397105217, "learning_rate": 0.00011459561636437284, "loss": 0.2514, "step": 23914 }, { "epoch": 1.9373784834737524, "grad_norm": 0.06700679659843445, "learning_rate": 0.00011459111571177821, "loss": 0.2833, "step": 23915 }, { "epoch": 1.9374594944912507, "grad_norm": 0.06083435192704201, "learning_rate": 0.00011458661505918357, "loss": 0.2955, "step": 23916 }, { "epoch": 1.9375405055087493, "grad_norm": 0.051247239112854004, "learning_rate": 0.00011458211440658897, "loss": 0.2426, "step": 23917 }, { "epoch": 1.9376215165262476, "grad_norm": 0.048243001103401184, "learning_rate": 0.00011457761375399433, "loss": 0.2177, "step": 23918 }, { "epoch": 1.9377025275437458, "grad_norm": 0.05603655427694321, "learning_rate": 0.00011457311310139971, "loss": 0.3136, "step": 23919 }, { "epoch": 1.9377835385612443, "grad_norm": 0.06207847222685814, "learning_rate": 0.00011456861244880508, "loss": 0.2679, "step": 23920 }, { "epoch": 1.9378645495787428, "grad_norm": 0.05992251634597778, "learning_rate": 0.00011456411179621046, "loss": 0.2982, "step": 23921 }, { "epoch": 1.937945560596241, "grad_norm": 0.05770555138587952, "learning_rate": 0.00011455961114361582, "loss": 0.2643, "step": 23922 }, { "epoch": 1.9380265716137395, "grad_norm": 0.05730791762471199, "learning_rate": 0.00011455511049102122, "loss": 0.2514, "step": 23923 }, { "epoch": 1.938107582631238, "grad_norm": 0.0661020278930664, "learning_rate": 0.00011455060983842658, "loss": 0.2773, "step": 23924 }, { "epoch": 1.9381885936487362, "grad_norm": 0.05453161150217056, "learning_rate": 0.00011454610918583195, "loss": 0.2417, "step": 23925 }, { "epoch": 1.9382696046662347, "grad_norm": 0.06748737394809723, "learning_rate": 0.00011454160853323732, "loss": 0.2934, "step": 23926 }, { "epoch": 1.9383506156837331, "grad_norm": 0.05424464866518974, "learning_rate": 0.0001145371078806427, "loss": 0.2694, "step": 23927 }, { "epoch": 1.9384316267012314, "grad_norm": 0.06523843854665756, "learning_rate": 0.00011453260722804808, "loss": 0.2623, "step": 23928 }, { "epoch": 1.9385126377187296, "grad_norm": 0.06635954976081848, "learning_rate": 0.00011452810657545346, "loss": 0.2637, "step": 23929 }, { "epoch": 1.938593648736228, "grad_norm": 0.06299407035112381, "learning_rate": 0.00011452360592285882, "loss": 0.2927, "step": 23930 }, { "epoch": 1.9386746597537265, "grad_norm": 0.06170092523097992, "learning_rate": 0.00011451910527026419, "loss": 0.2784, "step": 23931 }, { "epoch": 1.9387556707712248, "grad_norm": 0.08529407531023026, "learning_rate": 0.00011451460461766956, "loss": 0.2693, "step": 23932 }, { "epoch": 1.9388366817887233, "grad_norm": 0.06642553955316544, "learning_rate": 0.00011451010396507494, "loss": 0.2497, "step": 23933 }, { "epoch": 1.9389176928062217, "grad_norm": 0.05886387825012207, "learning_rate": 0.00011450560331248033, "loss": 0.275, "step": 23934 }, { "epoch": 1.93899870382372, "grad_norm": 0.05601823702454567, "learning_rate": 0.0001145011026598857, "loss": 0.2583, "step": 23935 }, { "epoch": 1.9390797148412184, "grad_norm": 0.057005684822797775, "learning_rate": 0.00011449660200729106, "loss": 0.2406, "step": 23936 }, { "epoch": 1.939160725858717, "grad_norm": 0.05442437157034874, "learning_rate": 0.00011449210135469643, "loss": 0.2484, "step": 23937 }, { "epoch": 1.9392417368762151, "grad_norm": 0.059382639825344086, "learning_rate": 0.0001144876007021018, "loss": 0.2651, "step": 23938 }, { "epoch": 1.9393227478937134, "grad_norm": 0.06507833302021027, "learning_rate": 0.00011448310004950718, "loss": 0.2799, "step": 23939 }, { "epoch": 1.939403758911212, "grad_norm": 0.06146455928683281, "learning_rate": 0.00011447859939691257, "loss": 0.2335, "step": 23940 }, { "epoch": 1.9394847699287103, "grad_norm": 0.04766790568828583, "learning_rate": 0.00011447409874431794, "loss": 0.2231, "step": 23941 }, { "epoch": 1.9395657809462086, "grad_norm": 0.06149807572364807, "learning_rate": 0.0001144695980917233, "loss": 0.2733, "step": 23942 }, { "epoch": 1.939646791963707, "grad_norm": 0.07344269007444382, "learning_rate": 0.00011446509743912867, "loss": 0.3203, "step": 23943 }, { "epoch": 1.9397278029812055, "grad_norm": 0.06012146174907684, "learning_rate": 0.00011446059678653405, "loss": 0.2655, "step": 23944 }, { "epoch": 1.9398088139987038, "grad_norm": 0.06117866188287735, "learning_rate": 0.00011445609613393942, "loss": 0.2646, "step": 23945 }, { "epoch": 1.9398898250162022, "grad_norm": 0.06094307452440262, "learning_rate": 0.00011445159548134481, "loss": 0.3039, "step": 23946 }, { "epoch": 1.9399708360337007, "grad_norm": 0.060897525399923325, "learning_rate": 0.00011444709482875018, "loss": 0.2616, "step": 23947 }, { "epoch": 1.940051847051199, "grad_norm": 0.054656002670526505, "learning_rate": 0.00011444259417615554, "loss": 0.2567, "step": 23948 }, { "epoch": 1.9401328580686974, "grad_norm": 0.055229902267456055, "learning_rate": 0.00011443809352356092, "loss": 0.3029, "step": 23949 }, { "epoch": 1.9402138690861959, "grad_norm": 0.0635809451341629, "learning_rate": 0.00011443359287096629, "loss": 0.263, "step": 23950 }, { "epoch": 1.940294880103694, "grad_norm": 0.05155356600880623, "learning_rate": 0.00011442909221837166, "loss": 0.2497, "step": 23951 }, { "epoch": 1.9403758911211924, "grad_norm": 0.05434060096740723, "learning_rate": 0.00011442459156577705, "loss": 0.2649, "step": 23952 }, { "epoch": 1.9404569021386908, "grad_norm": 0.06424754858016968, "learning_rate": 0.00011442009091318242, "loss": 0.2904, "step": 23953 }, { "epoch": 1.9405379131561893, "grad_norm": 0.08113347738981247, "learning_rate": 0.00011441559026058778, "loss": 0.2875, "step": 23954 }, { "epoch": 1.9406189241736875, "grad_norm": 0.06629473716020584, "learning_rate": 0.00011441108960799316, "loss": 0.2856, "step": 23955 }, { "epoch": 1.940699935191186, "grad_norm": 0.05168033763766289, "learning_rate": 0.00011440658895539853, "loss": 0.2272, "step": 23956 }, { "epoch": 1.9407809462086845, "grad_norm": 0.0798877626657486, "learning_rate": 0.00011440208830280392, "loss": 0.2911, "step": 23957 }, { "epoch": 1.9408619572261827, "grad_norm": 0.06408312171697617, "learning_rate": 0.00011439758765020929, "loss": 0.2583, "step": 23958 }, { "epoch": 1.9409429682436812, "grad_norm": 0.04707522317767143, "learning_rate": 0.00011439308699761467, "loss": 0.2339, "step": 23959 }, { "epoch": 1.9410239792611796, "grad_norm": 0.05191976577043533, "learning_rate": 0.00011438858634502003, "loss": 0.2347, "step": 23960 }, { "epoch": 1.9411049902786779, "grad_norm": 0.052793506532907486, "learning_rate": 0.0001143840856924254, "loss": 0.2961, "step": 23961 }, { "epoch": 1.9411860012961761, "grad_norm": 0.057689033448696136, "learning_rate": 0.00011437958503983077, "loss": 0.2579, "step": 23962 }, { "epoch": 1.9412670123136748, "grad_norm": 0.056980159133672714, "learning_rate": 0.00011437508438723616, "loss": 0.2993, "step": 23963 }, { "epoch": 1.941348023331173, "grad_norm": 0.05391747131943703, "learning_rate": 0.00011437058373464153, "loss": 0.2379, "step": 23964 }, { "epoch": 1.9414290343486713, "grad_norm": 0.0595475398004055, "learning_rate": 0.00011436608308204691, "loss": 0.25, "step": 23965 }, { "epoch": 1.9415100453661698, "grad_norm": 0.04893352836370468, "learning_rate": 0.00011436158242945228, "loss": 0.2692, "step": 23966 }, { "epoch": 1.9415910563836682, "grad_norm": 0.06128464266657829, "learning_rate": 0.00011435708177685764, "loss": 0.2725, "step": 23967 }, { "epoch": 1.9416720674011665, "grad_norm": 0.05946241691708565, "learning_rate": 0.00011435258112426301, "loss": 0.2785, "step": 23968 }, { "epoch": 1.941753078418665, "grad_norm": 0.06088683009147644, "learning_rate": 0.0001143480804716684, "loss": 0.2832, "step": 23969 }, { "epoch": 1.9418340894361634, "grad_norm": 0.05622805282473564, "learning_rate": 0.00011434357981907378, "loss": 0.2521, "step": 23970 }, { "epoch": 1.9419151004536617, "grad_norm": 0.06270037591457367, "learning_rate": 0.00011433907916647915, "loss": 0.2689, "step": 23971 }, { "epoch": 1.94199611147116, "grad_norm": 0.06490737944841385, "learning_rate": 0.00011433457851388452, "loss": 0.2398, "step": 23972 }, { "epoch": 1.9420771224886586, "grad_norm": 0.0594317726790905, "learning_rate": 0.00011433007786128988, "loss": 0.243, "step": 23973 }, { "epoch": 1.9421581335061568, "grad_norm": 0.06923339515924454, "learning_rate": 0.00011432557720869526, "loss": 0.2784, "step": 23974 }, { "epoch": 1.942239144523655, "grad_norm": 0.062373723834753036, "learning_rate": 0.00011432107655610064, "loss": 0.2768, "step": 23975 }, { "epoch": 1.9423201555411536, "grad_norm": 0.0614754743874073, "learning_rate": 0.00011431657590350602, "loss": 0.2646, "step": 23976 }, { "epoch": 1.942401166558652, "grad_norm": 0.05733213946223259, "learning_rate": 0.00011431207525091139, "loss": 0.2638, "step": 23977 }, { "epoch": 1.9424821775761503, "grad_norm": 0.053427550941705704, "learning_rate": 0.00011430757459831676, "loss": 0.2055, "step": 23978 }, { "epoch": 1.9425631885936487, "grad_norm": 0.06927467882633209, "learning_rate": 0.00011430307394572212, "loss": 0.2574, "step": 23979 }, { "epoch": 1.9426441996111472, "grad_norm": 0.061050258576869965, "learning_rate": 0.00011429857329312752, "loss": 0.2588, "step": 23980 }, { "epoch": 1.9427252106286454, "grad_norm": 0.06572843343019485, "learning_rate": 0.00011429407264053289, "loss": 0.2726, "step": 23981 }, { "epoch": 1.942806221646144, "grad_norm": 0.0645068809390068, "learning_rate": 0.00011428957198793826, "loss": 0.2822, "step": 23982 }, { "epoch": 1.9428872326636424, "grad_norm": 0.07286891341209412, "learning_rate": 0.00011428507133534363, "loss": 0.2695, "step": 23983 }, { "epoch": 1.9429682436811406, "grad_norm": 0.06508053094148636, "learning_rate": 0.000114280570682749, "loss": 0.2857, "step": 23984 }, { "epoch": 1.9430492546986389, "grad_norm": 0.05973774939775467, "learning_rate": 0.00011427607003015437, "loss": 0.2527, "step": 23985 }, { "epoch": 1.9431302657161373, "grad_norm": 0.057456836104393005, "learning_rate": 0.00011427156937755977, "loss": 0.266, "step": 23986 }, { "epoch": 1.9432112767336358, "grad_norm": 0.06490325182676315, "learning_rate": 0.00011426706872496513, "loss": 0.2791, "step": 23987 }, { "epoch": 1.943292287751134, "grad_norm": 0.07369411736726761, "learning_rate": 0.0001142625680723705, "loss": 0.3054, "step": 23988 }, { "epoch": 1.9433732987686325, "grad_norm": 0.0538899265229702, "learning_rate": 0.00011425806741977587, "loss": 0.2425, "step": 23989 }, { "epoch": 1.943454309786131, "grad_norm": 0.05909901484847069, "learning_rate": 0.00011425356676718125, "loss": 0.275, "step": 23990 }, { "epoch": 1.9435353208036292, "grad_norm": 0.05489085987210274, "learning_rate": 0.00011424906611458661, "loss": 0.2841, "step": 23991 }, { "epoch": 1.9436163318211277, "grad_norm": 0.05336972326040268, "learning_rate": 0.00011424456546199201, "loss": 0.2588, "step": 23992 }, { "epoch": 1.9436973428386262, "grad_norm": 0.053867846727371216, "learning_rate": 0.00011424006480939737, "loss": 0.2954, "step": 23993 }, { "epoch": 1.9437783538561244, "grad_norm": 0.053872622549533844, "learning_rate": 0.00011423556415680274, "loss": 0.262, "step": 23994 }, { "epoch": 1.9438593648736227, "grad_norm": 0.06062573567032814, "learning_rate": 0.00011423106350420812, "loss": 0.3082, "step": 23995 }, { "epoch": 1.9439403758911213, "grad_norm": 0.05445629358291626, "learning_rate": 0.00011422656285161349, "loss": 0.2608, "step": 23996 }, { "epoch": 1.9440213869086196, "grad_norm": 0.060416966676712036, "learning_rate": 0.00011422206219901885, "loss": 0.27, "step": 23997 }, { "epoch": 1.9441023979261178, "grad_norm": 0.0546443797647953, "learning_rate": 0.00011421756154642425, "loss": 0.2463, "step": 23998 }, { "epoch": 1.9441834089436163, "grad_norm": 0.054451312869787216, "learning_rate": 0.00011421306089382961, "loss": 0.2277, "step": 23999 }, { "epoch": 1.9442644199611148, "grad_norm": 0.06382211297750473, "learning_rate": 0.00011420856024123498, "loss": 0.236, "step": 24000 }, { "epoch": 1.944345430978613, "grad_norm": 0.049225952476263046, "learning_rate": 0.00011420405958864036, "loss": 0.2737, "step": 24001 }, { "epoch": 1.9444264419961115, "grad_norm": 0.05174494534730911, "learning_rate": 0.00011419955893604573, "loss": 0.2588, "step": 24002 }, { "epoch": 1.94450745301361, "grad_norm": 0.05987345799803734, "learning_rate": 0.00011419505828345109, "loss": 0.2853, "step": 24003 }, { "epoch": 1.9445884640311082, "grad_norm": 0.05934160202741623, "learning_rate": 0.00011419055763085649, "loss": 0.2377, "step": 24004 }, { "epoch": 1.9446694750486067, "grad_norm": 0.058130960911512375, "learning_rate": 0.00011418605697826185, "loss": 0.2487, "step": 24005 }, { "epoch": 1.9447504860661051, "grad_norm": 0.052440762519836426, "learning_rate": 0.00011418155632566723, "loss": 0.2556, "step": 24006 }, { "epoch": 1.9448314970836034, "grad_norm": 0.06635487079620361, "learning_rate": 0.0001141770556730726, "loss": 0.3016, "step": 24007 }, { "epoch": 1.9449125081011016, "grad_norm": 0.04897871986031532, "learning_rate": 0.00011417255502047797, "loss": 0.2753, "step": 24008 }, { "epoch": 1.9449935191186, "grad_norm": 0.058619424700737, "learning_rate": 0.00011416805436788336, "loss": 0.2035, "step": 24009 }, { "epoch": 1.9450745301360985, "grad_norm": 0.04933502897620201, "learning_rate": 0.00011416355371528873, "loss": 0.2622, "step": 24010 }, { "epoch": 1.9451555411535968, "grad_norm": 0.044737957417964935, "learning_rate": 0.00011415905306269409, "loss": 0.239, "step": 24011 }, { "epoch": 1.9452365521710953, "grad_norm": 0.06307195872068405, "learning_rate": 0.00011415455241009947, "loss": 0.2677, "step": 24012 }, { "epoch": 1.9453175631885937, "grad_norm": 0.053666554391384125, "learning_rate": 0.00011415005175750484, "loss": 0.2509, "step": 24013 }, { "epoch": 1.945398574206092, "grad_norm": 0.060802605003118515, "learning_rate": 0.00011414555110491021, "loss": 0.2838, "step": 24014 }, { "epoch": 1.9454795852235904, "grad_norm": 0.05955837294459343, "learning_rate": 0.0001141410504523156, "loss": 0.2905, "step": 24015 }, { "epoch": 1.945560596241089, "grad_norm": 0.06257221102714539, "learning_rate": 0.00011413654979972097, "loss": 0.2567, "step": 24016 }, { "epoch": 1.9456416072585871, "grad_norm": 0.06320682168006897, "learning_rate": 0.00011413204914712633, "loss": 0.3398, "step": 24017 }, { "epoch": 1.9457226182760854, "grad_norm": 0.06431394815444946, "learning_rate": 0.00011412754849453171, "loss": 0.2492, "step": 24018 }, { "epoch": 1.945803629293584, "grad_norm": 0.0520576536655426, "learning_rate": 0.00011412304784193708, "loss": 0.2671, "step": 24019 }, { "epoch": 1.9458846403110823, "grad_norm": 0.06366564333438873, "learning_rate": 0.00011411854718934246, "loss": 0.2465, "step": 24020 }, { "epoch": 1.9459656513285806, "grad_norm": 0.05639234557747841, "learning_rate": 0.00011411404653674784, "loss": 0.2627, "step": 24021 }, { "epoch": 1.946046662346079, "grad_norm": 0.05050186067819595, "learning_rate": 0.00011410954588415322, "loss": 0.2566, "step": 24022 }, { "epoch": 1.9461276733635775, "grad_norm": 0.06824922561645508, "learning_rate": 0.00011410504523155858, "loss": 0.2542, "step": 24023 }, { "epoch": 1.9462086843810757, "grad_norm": 0.048303570598363876, "learning_rate": 0.00011410054457896395, "loss": 0.2629, "step": 24024 }, { "epoch": 1.9462896953985742, "grad_norm": 0.058540135622024536, "learning_rate": 0.00011409604392636932, "loss": 0.2789, "step": 24025 }, { "epoch": 1.9463707064160727, "grad_norm": 0.0661383792757988, "learning_rate": 0.0001140915432737747, "loss": 0.245, "step": 24026 }, { "epoch": 1.946451717433571, "grad_norm": 0.061160411685705185, "learning_rate": 0.00011408704262118008, "loss": 0.2163, "step": 24027 }, { "epoch": 1.9465327284510694, "grad_norm": 0.056919898837804794, "learning_rate": 0.00011408254196858546, "loss": 0.2187, "step": 24028 }, { "epoch": 1.9466137394685679, "grad_norm": 0.06528938561677933, "learning_rate": 0.00011407804131599082, "loss": 0.2884, "step": 24029 }, { "epoch": 1.946694750486066, "grad_norm": 0.05889112129807472, "learning_rate": 0.00011407354066339619, "loss": 0.3225, "step": 24030 }, { "epoch": 1.9467757615035644, "grad_norm": 0.061333924531936646, "learning_rate": 0.00011406904001080157, "loss": 0.2619, "step": 24031 }, { "epoch": 1.9468567725210628, "grad_norm": 0.07009389251470566, "learning_rate": 0.00011406453935820694, "loss": 0.2613, "step": 24032 }, { "epoch": 1.9469377835385613, "grad_norm": 0.06943484395742416, "learning_rate": 0.00011406003870561233, "loss": 0.256, "step": 24033 }, { "epoch": 1.9470187945560595, "grad_norm": 0.04996665567159653, "learning_rate": 0.0001140555380530177, "loss": 0.2415, "step": 24034 }, { "epoch": 1.947099805573558, "grad_norm": 0.05447015538811684, "learning_rate": 0.00011405103740042307, "loss": 0.2911, "step": 24035 }, { "epoch": 1.9471808165910565, "grad_norm": 0.05733644962310791, "learning_rate": 0.00011404653674782843, "loss": 0.2754, "step": 24036 }, { "epoch": 1.9472618276085547, "grad_norm": 0.061473965644836426, "learning_rate": 0.00011404203609523381, "loss": 0.2768, "step": 24037 }, { "epoch": 1.9473428386260532, "grad_norm": 0.047736383974552155, "learning_rate": 0.0001140375354426392, "loss": 0.2346, "step": 24038 }, { "epoch": 1.9474238496435516, "grad_norm": 0.06296089291572571, "learning_rate": 0.00011403303479004457, "loss": 0.2654, "step": 24039 }, { "epoch": 1.9475048606610499, "grad_norm": 0.061912525445222855, "learning_rate": 0.00011402853413744994, "loss": 0.2574, "step": 24040 }, { "epoch": 1.9475858716785481, "grad_norm": 0.07144110649824142, "learning_rate": 0.00011402403348485531, "loss": 0.2667, "step": 24041 }, { "epoch": 1.9476668826960468, "grad_norm": 0.06251692026853561, "learning_rate": 0.00011401953283226067, "loss": 0.2476, "step": 24042 }, { "epoch": 1.947747893713545, "grad_norm": 0.06474420428276062, "learning_rate": 0.00011401503217966605, "loss": 0.2716, "step": 24043 }, { "epoch": 1.9478289047310433, "grad_norm": 0.061901070177555084, "learning_rate": 0.00011401053152707144, "loss": 0.2543, "step": 24044 }, { "epoch": 1.9479099157485418, "grad_norm": 0.06367991119623184, "learning_rate": 0.00011400603087447681, "loss": 0.2544, "step": 24045 }, { "epoch": 1.9479909267660402, "grad_norm": 0.060065027326345444, "learning_rate": 0.00011400153022188218, "loss": 0.2548, "step": 24046 }, { "epoch": 1.9480719377835385, "grad_norm": 0.049185190349817276, "learning_rate": 0.00011399702956928756, "loss": 0.2562, "step": 24047 }, { "epoch": 1.948152948801037, "grad_norm": 0.05605795979499817, "learning_rate": 0.00011399252891669292, "loss": 0.2622, "step": 24048 }, { "epoch": 1.9482339598185354, "grad_norm": 0.06234635040163994, "learning_rate": 0.00011398802826409829, "loss": 0.277, "step": 24049 }, { "epoch": 1.9483149708360337, "grad_norm": 0.06847386807203293, "learning_rate": 0.00011398352761150368, "loss": 0.2943, "step": 24050 }, { "epoch": 1.9483959818535321, "grad_norm": 0.06446881592273712, "learning_rate": 0.00011397902695890905, "loss": 0.2821, "step": 24051 }, { "epoch": 1.9484769928710306, "grad_norm": 0.05356021970510483, "learning_rate": 0.00011397452630631442, "loss": 0.2891, "step": 24052 }, { "epoch": 1.9485580038885288, "grad_norm": 0.08012793958187103, "learning_rate": 0.0001139700256537198, "loss": 0.2671, "step": 24053 }, { "epoch": 1.948639014906027, "grad_norm": 0.04903095215559006, "learning_rate": 0.00011396552500112516, "loss": 0.2388, "step": 24054 }, { "epoch": 1.9487200259235256, "grad_norm": 0.056451983749866486, "learning_rate": 0.00011396102434853053, "loss": 0.2749, "step": 24055 }, { "epoch": 1.948801036941024, "grad_norm": 0.06300033628940582, "learning_rate": 0.00011395652369593592, "loss": 0.3084, "step": 24056 }, { "epoch": 1.9488820479585223, "grad_norm": 0.05597818270325661, "learning_rate": 0.00011395202304334129, "loss": 0.2499, "step": 24057 }, { "epoch": 1.9489630589760207, "grad_norm": 0.05621341988444328, "learning_rate": 0.00011394752239074667, "loss": 0.2607, "step": 24058 }, { "epoch": 1.9490440699935192, "grad_norm": 0.05270567163825035, "learning_rate": 0.00011394302173815204, "loss": 0.253, "step": 24059 }, { "epoch": 1.9491250810110174, "grad_norm": 0.07046864181756973, "learning_rate": 0.0001139385210855574, "loss": 0.3114, "step": 24060 }, { "epoch": 1.949206092028516, "grad_norm": 0.05155371129512787, "learning_rate": 0.0001139340204329628, "loss": 0.2617, "step": 24061 }, { "epoch": 1.9492871030460144, "grad_norm": 0.06104248762130737, "learning_rate": 0.00011392951978036816, "loss": 0.2578, "step": 24062 }, { "epoch": 1.9493681140635126, "grad_norm": 0.05884503200650215, "learning_rate": 0.00011392501912777353, "loss": 0.2704, "step": 24063 }, { "epoch": 1.9494491250810109, "grad_norm": 0.0626462921500206, "learning_rate": 0.00011392051847517891, "loss": 0.2998, "step": 24064 }, { "epoch": 1.9495301360985096, "grad_norm": 0.06282604485750198, "learning_rate": 0.00011391601782258428, "loss": 0.2836, "step": 24065 }, { "epoch": 1.9496111471160078, "grad_norm": 0.05556776747107506, "learning_rate": 0.00011391151716998964, "loss": 0.2542, "step": 24066 }, { "epoch": 1.949692158133506, "grad_norm": 0.07514247298240662, "learning_rate": 0.00011390701651739504, "loss": 0.2671, "step": 24067 }, { "epoch": 1.9497731691510045, "grad_norm": 0.07319103181362152, "learning_rate": 0.0001139025158648004, "loss": 0.253, "step": 24068 }, { "epoch": 1.949854180168503, "grad_norm": 0.07046500593423843, "learning_rate": 0.00011389801521220578, "loss": 0.3138, "step": 24069 }, { "epoch": 1.9499351911860012, "grad_norm": 0.060062143951654434, "learning_rate": 0.00011389351455961115, "loss": 0.2599, "step": 24070 }, { "epoch": 1.9500162022034997, "grad_norm": 0.06474334746599197, "learning_rate": 0.00011388901390701652, "loss": 0.2578, "step": 24071 }, { "epoch": 1.9500972132209982, "grad_norm": 0.06309830397367477, "learning_rate": 0.00011388451325442188, "loss": 0.2858, "step": 24072 }, { "epoch": 1.9501782242384964, "grad_norm": 0.061201080679893494, "learning_rate": 0.00011388001260182728, "loss": 0.2555, "step": 24073 }, { "epoch": 1.9502592352559946, "grad_norm": 0.05632198601961136, "learning_rate": 0.00011387551194923264, "loss": 0.2351, "step": 24074 }, { "epoch": 1.9503402462734933, "grad_norm": 0.05575885251164436, "learning_rate": 0.00011387101129663802, "loss": 0.3062, "step": 24075 }, { "epoch": 1.9504212572909916, "grad_norm": 0.07558295130729675, "learning_rate": 0.00011386651064404339, "loss": 0.2967, "step": 24076 }, { "epoch": 1.9505022683084898, "grad_norm": 0.07199109345674515, "learning_rate": 0.00011386200999144876, "loss": 0.2833, "step": 24077 }, { "epoch": 1.9505832793259883, "grad_norm": 0.06253136694431305, "learning_rate": 0.00011385750933885412, "loss": 0.331, "step": 24078 }, { "epoch": 1.9506642903434868, "grad_norm": 0.06450769305229187, "learning_rate": 0.00011385300868625953, "loss": 0.292, "step": 24079 }, { "epoch": 1.950745301360985, "grad_norm": 0.04632382467389107, "learning_rate": 0.00011384850803366489, "loss": 0.2592, "step": 24080 }, { "epoch": 1.9508263123784835, "grad_norm": 0.055005017668008804, "learning_rate": 0.00011384400738107026, "loss": 0.2463, "step": 24081 }, { "epoch": 1.950907323395982, "grad_norm": 0.06214490160346031, "learning_rate": 0.00011383950672847563, "loss": 0.2709, "step": 24082 }, { "epoch": 1.9509883344134802, "grad_norm": 0.04313374310731888, "learning_rate": 0.000113835006075881, "loss": 0.2247, "step": 24083 }, { "epoch": 1.9510693454309787, "grad_norm": 0.053844235837459564, "learning_rate": 0.00011383050542328637, "loss": 0.2478, "step": 24084 }, { "epoch": 1.9511503564484771, "grad_norm": 0.05828912928700447, "learning_rate": 0.00011382600477069177, "loss": 0.2478, "step": 24085 }, { "epoch": 1.9512313674659754, "grad_norm": 0.049257442355155945, "learning_rate": 0.00011382150411809713, "loss": 0.2616, "step": 24086 }, { "epoch": 1.9513123784834736, "grad_norm": 0.06783261895179749, "learning_rate": 0.0001138170034655025, "loss": 0.2901, "step": 24087 }, { "epoch": 1.9513933895009723, "grad_norm": 0.05653469264507294, "learning_rate": 0.00011381250281290787, "loss": 0.2835, "step": 24088 }, { "epoch": 1.9514744005184705, "grad_norm": 0.054393965750932693, "learning_rate": 0.00011380800216031325, "loss": 0.2645, "step": 24089 }, { "epoch": 1.9515554115359688, "grad_norm": 0.06505770981311798, "learning_rate": 0.00011380350150771863, "loss": 0.2834, "step": 24090 }, { "epoch": 1.9516364225534673, "grad_norm": 0.06360580027103424, "learning_rate": 0.00011379900085512401, "loss": 0.2592, "step": 24091 }, { "epoch": 1.9517174335709657, "grad_norm": 0.06400442868471146, "learning_rate": 0.00011379450020252937, "loss": 0.2857, "step": 24092 }, { "epoch": 1.951798444588464, "grad_norm": 0.05865645036101341, "learning_rate": 0.00011378999954993474, "loss": 0.2713, "step": 24093 }, { "epoch": 1.9518794556059624, "grad_norm": 0.06572428345680237, "learning_rate": 0.00011378549889734012, "loss": 0.264, "step": 24094 }, { "epoch": 1.951960466623461, "grad_norm": 0.05528400465846062, "learning_rate": 0.00011378099824474549, "loss": 0.2675, "step": 24095 }, { "epoch": 1.9520414776409591, "grad_norm": 0.06614361703395844, "learning_rate": 0.00011377649759215088, "loss": 0.294, "step": 24096 }, { "epoch": 1.9521224886584574, "grad_norm": 0.05867353081703186, "learning_rate": 0.00011377199693955625, "loss": 0.2546, "step": 24097 }, { "epoch": 1.952203499675956, "grad_norm": 0.06184687092900276, "learning_rate": 0.00011376749628696161, "loss": 0.2794, "step": 24098 }, { "epoch": 1.9522845106934543, "grad_norm": 0.05994020774960518, "learning_rate": 0.00011376299563436698, "loss": 0.2637, "step": 24099 }, { "epoch": 1.9523655217109526, "grad_norm": 0.05696763098239899, "learning_rate": 0.00011375849498177236, "loss": 0.31, "step": 24100 }, { "epoch": 1.952446532728451, "grad_norm": 0.058388851583004, "learning_rate": 0.00011375399432917773, "loss": 0.2826, "step": 24101 }, { "epoch": 1.9525275437459495, "grad_norm": 0.05529080703854561, "learning_rate": 0.00011374949367658312, "loss": 0.2612, "step": 24102 }, { "epoch": 1.9526085547634477, "grad_norm": 0.05322407931089401, "learning_rate": 0.00011374499302398849, "loss": 0.2918, "step": 24103 }, { "epoch": 1.9526895657809462, "grad_norm": 0.057907380163669586, "learning_rate": 0.00011374049237139387, "loss": 0.2858, "step": 24104 }, { "epoch": 1.9527705767984447, "grad_norm": 0.059196989983320236, "learning_rate": 0.00011373599171879923, "loss": 0.2977, "step": 24105 }, { "epoch": 1.952851587815943, "grad_norm": 0.06501670926809311, "learning_rate": 0.0001137314910662046, "loss": 0.2796, "step": 24106 }, { "epoch": 1.9529325988334414, "grad_norm": 0.060437873005867004, "learning_rate": 0.00011372699041360997, "loss": 0.2585, "step": 24107 }, { "epoch": 1.9530136098509399, "grad_norm": 0.057793036103248596, "learning_rate": 0.00011372248976101536, "loss": 0.2585, "step": 24108 }, { "epoch": 1.953094620868438, "grad_norm": 0.06259094923734665, "learning_rate": 0.00011371798910842073, "loss": 0.2546, "step": 24109 }, { "epoch": 1.9531756318859363, "grad_norm": 0.055709585547447205, "learning_rate": 0.00011371348845582611, "loss": 0.2675, "step": 24110 }, { "epoch": 1.9532566429034348, "grad_norm": 0.05864265188574791, "learning_rate": 0.00011370898780323147, "loss": 0.2846, "step": 24111 }, { "epoch": 1.9533376539209333, "grad_norm": 0.07006549835205078, "learning_rate": 0.00011370448715063684, "loss": 0.2695, "step": 24112 }, { "epoch": 1.9534186649384315, "grad_norm": 0.07488393038511276, "learning_rate": 0.00011369998649804223, "loss": 0.3068, "step": 24113 }, { "epoch": 1.95349967595593, "grad_norm": 0.067337766289711, "learning_rate": 0.0001136954858454476, "loss": 0.3021, "step": 24114 }, { "epoch": 1.9535806869734285, "grad_norm": 0.0490729995071888, "learning_rate": 0.00011369098519285297, "loss": 0.2364, "step": 24115 }, { "epoch": 1.9536616979909267, "grad_norm": 0.05571191385388374, "learning_rate": 0.00011368648454025835, "loss": 0.2307, "step": 24116 }, { "epoch": 1.9537427090084252, "grad_norm": 0.05138971656560898, "learning_rate": 0.00011368198388766371, "loss": 0.2702, "step": 24117 }, { "epoch": 1.9538237200259236, "grad_norm": 0.06012196093797684, "learning_rate": 0.00011367748323506908, "loss": 0.2532, "step": 24118 }, { "epoch": 1.9539047310434219, "grad_norm": 0.0667654424905777, "learning_rate": 0.00011367298258247447, "loss": 0.309, "step": 24119 }, { "epoch": 1.9539857420609201, "grad_norm": 0.061375852674245834, "learning_rate": 0.00011366848192987984, "loss": 0.2537, "step": 24120 }, { "epoch": 1.9540667530784188, "grad_norm": 0.06362592428922653, "learning_rate": 0.00011366398127728522, "loss": 0.2813, "step": 24121 }, { "epoch": 1.954147764095917, "grad_norm": 0.057681769132614136, "learning_rate": 0.00011365948062469059, "loss": 0.2756, "step": 24122 }, { "epoch": 1.9542287751134153, "grad_norm": 0.060051437467336655, "learning_rate": 0.00011365497997209595, "loss": 0.275, "step": 24123 }, { "epoch": 1.9543097861309138, "grad_norm": 0.07187920063734055, "learning_rate": 0.00011365047931950132, "loss": 0.3165, "step": 24124 }, { "epoch": 1.9543907971484122, "grad_norm": 0.062115393579006195, "learning_rate": 0.00011364597866690671, "loss": 0.309, "step": 24125 }, { "epoch": 1.9544718081659105, "grad_norm": 0.07206244021654129, "learning_rate": 0.00011364147801431208, "loss": 0.2352, "step": 24126 }, { "epoch": 1.954552819183409, "grad_norm": 0.061783768236637115, "learning_rate": 0.00011363697736171746, "loss": 0.2734, "step": 24127 }, { "epoch": 1.9546338302009074, "grad_norm": 0.05878418684005737, "learning_rate": 0.00011363247670912283, "loss": 0.2769, "step": 24128 }, { "epoch": 1.9547148412184057, "grad_norm": 0.05013301968574524, "learning_rate": 0.00011362797605652819, "loss": 0.2365, "step": 24129 }, { "epoch": 1.9547958522359041, "grad_norm": 0.06090492755174637, "learning_rate": 0.00011362347540393357, "loss": 0.2881, "step": 24130 }, { "epoch": 1.9548768632534026, "grad_norm": 0.066561758518219, "learning_rate": 0.00011361897475133895, "loss": 0.2439, "step": 24131 }, { "epoch": 1.9549578742709008, "grad_norm": 0.05893224850296974, "learning_rate": 0.00011361447409874433, "loss": 0.2768, "step": 24132 }, { "epoch": 1.955038885288399, "grad_norm": 0.056500572711229324, "learning_rate": 0.0001136099734461497, "loss": 0.3002, "step": 24133 }, { "epoch": 1.9551198963058976, "grad_norm": 0.055701520293951035, "learning_rate": 0.00011360547279355507, "loss": 0.266, "step": 24134 }, { "epoch": 1.955200907323396, "grad_norm": 0.07285144925117493, "learning_rate": 0.00011360097214096043, "loss": 0.2558, "step": 24135 }, { "epoch": 1.9552819183408943, "grad_norm": 0.05705364793539047, "learning_rate": 0.00011359647148836581, "loss": 0.2942, "step": 24136 }, { "epoch": 1.9553629293583927, "grad_norm": 0.057131167501211166, "learning_rate": 0.0001135919708357712, "loss": 0.2659, "step": 24137 }, { "epoch": 1.9554439403758912, "grad_norm": 0.058022141456604004, "learning_rate": 0.00011358747018317657, "loss": 0.2422, "step": 24138 }, { "epoch": 1.9555249513933894, "grad_norm": 0.05680813640356064, "learning_rate": 0.00011358296953058194, "loss": 0.2514, "step": 24139 }, { "epoch": 1.955605962410888, "grad_norm": 0.05137019604444504, "learning_rate": 0.00011357846887798732, "loss": 0.2994, "step": 24140 }, { "epoch": 1.9556869734283864, "grad_norm": 0.0479348823428154, "learning_rate": 0.00011357396822539268, "loss": 0.2255, "step": 24141 }, { "epoch": 1.9557679844458846, "grad_norm": 0.06023770943284035, "learning_rate": 0.00011356946757279808, "loss": 0.3027, "step": 24142 }, { "epoch": 1.9558489954633829, "grad_norm": 0.05584618076682091, "learning_rate": 0.00011356496692020344, "loss": 0.2731, "step": 24143 }, { "epoch": 1.9559300064808816, "grad_norm": 0.05602271482348442, "learning_rate": 0.00011356046626760881, "loss": 0.2554, "step": 24144 }, { "epoch": 1.9560110174983798, "grad_norm": 0.05980118364095688, "learning_rate": 0.00011355596561501418, "loss": 0.2706, "step": 24145 }, { "epoch": 1.956092028515878, "grad_norm": 0.04484741389751434, "learning_rate": 0.00011355146496241956, "loss": 0.2174, "step": 24146 }, { "epoch": 1.9561730395333765, "grad_norm": 0.08422911912202835, "learning_rate": 0.00011354696430982492, "loss": 0.3119, "step": 24147 }, { "epoch": 1.956254050550875, "grad_norm": 0.06032005324959755, "learning_rate": 0.00011354246365723032, "loss": 0.2296, "step": 24148 }, { "epoch": 1.9563350615683732, "grad_norm": 0.0618845634162426, "learning_rate": 0.00011353796300463568, "loss": 0.2492, "step": 24149 }, { "epoch": 1.9564160725858717, "grad_norm": 0.058529820293188095, "learning_rate": 0.00011353346235204105, "loss": 0.2588, "step": 24150 }, { "epoch": 1.9564970836033702, "grad_norm": 0.059740472584962845, "learning_rate": 0.00011352896169944642, "loss": 0.2709, "step": 24151 }, { "epoch": 1.9565780946208684, "grad_norm": 0.056812260299921036, "learning_rate": 0.0001135244610468518, "loss": 0.2424, "step": 24152 }, { "epoch": 1.9566591056383669, "grad_norm": 0.04421933740377426, "learning_rate": 0.00011351996039425716, "loss": 0.2248, "step": 24153 }, { "epoch": 1.9567401166558653, "grad_norm": 0.05292237550020218, "learning_rate": 0.00011351545974166256, "loss": 0.2809, "step": 24154 }, { "epoch": 1.9568211276733636, "grad_norm": 0.054508842527866364, "learning_rate": 0.00011351095908906792, "loss": 0.2455, "step": 24155 }, { "epoch": 1.9569021386908618, "grad_norm": 0.06526229530572891, "learning_rate": 0.00011350645843647329, "loss": 0.2686, "step": 24156 }, { "epoch": 1.9569831497083603, "grad_norm": 0.047157347202301025, "learning_rate": 0.00011350195778387867, "loss": 0.2593, "step": 24157 }, { "epoch": 1.9570641607258588, "grad_norm": 0.06563350558280945, "learning_rate": 0.00011349745713128404, "loss": 0.2474, "step": 24158 }, { "epoch": 1.957145171743357, "grad_norm": 0.058557942509651184, "learning_rate": 0.0001134929564786894, "loss": 0.2587, "step": 24159 }, { "epoch": 1.9572261827608555, "grad_norm": 0.05948895588517189, "learning_rate": 0.0001134884558260948, "loss": 0.259, "step": 24160 }, { "epoch": 1.957307193778354, "grad_norm": 0.06698385626077652, "learning_rate": 0.00011348395517350016, "loss": 0.2659, "step": 24161 }, { "epoch": 1.9573882047958522, "grad_norm": 0.06790705025196075, "learning_rate": 0.00011347945452090553, "loss": 0.265, "step": 24162 }, { "epoch": 1.9574692158133506, "grad_norm": 0.06589134782552719, "learning_rate": 0.00011347495386831091, "loss": 0.3139, "step": 24163 }, { "epoch": 1.9575502268308491, "grad_norm": 0.048939596861600876, "learning_rate": 0.00011347045321571628, "loss": 0.2322, "step": 24164 }, { "epoch": 1.9576312378483474, "grad_norm": 0.05785316601395607, "learning_rate": 0.00011346595256312167, "loss": 0.286, "step": 24165 }, { "epoch": 1.9577122488658456, "grad_norm": 0.05657365918159485, "learning_rate": 0.00011346145191052704, "loss": 0.2624, "step": 24166 }, { "epoch": 1.9577932598833443, "grad_norm": 0.052017692476511, "learning_rate": 0.0001134569512579324, "loss": 0.2513, "step": 24167 }, { "epoch": 1.9578742709008425, "grad_norm": 0.059289928525686264, "learning_rate": 0.00011345245060533778, "loss": 0.2915, "step": 24168 }, { "epoch": 1.9579552819183408, "grad_norm": 0.05871990695595741, "learning_rate": 0.00011344794995274315, "loss": 0.2953, "step": 24169 }, { "epoch": 1.9580362929358393, "grad_norm": 0.05929981544613838, "learning_rate": 0.00011344344930014852, "loss": 0.2598, "step": 24170 }, { "epoch": 1.9581173039533377, "grad_norm": 0.06190398707985878, "learning_rate": 0.00011343894864755391, "loss": 0.2833, "step": 24171 }, { "epoch": 1.958198314970836, "grad_norm": 0.05891365930438042, "learning_rate": 0.00011343444799495928, "loss": 0.2244, "step": 24172 }, { "epoch": 1.9582793259883344, "grad_norm": 0.04958629980683327, "learning_rate": 0.00011342994734236466, "loss": 0.2591, "step": 24173 }, { "epoch": 1.958360337005833, "grad_norm": 0.05412125959992409, "learning_rate": 0.00011342544668977002, "loss": 0.2543, "step": 24174 }, { "epoch": 1.9584413480233311, "grad_norm": 0.06268096715211868, "learning_rate": 0.00011342094603717539, "loss": 0.2434, "step": 24175 }, { "epoch": 1.9585223590408296, "grad_norm": 0.0730811133980751, "learning_rate": 0.00011341644538458076, "loss": 0.2657, "step": 24176 }, { "epoch": 1.958603370058328, "grad_norm": 0.062305256724357605, "learning_rate": 0.00011341194473198615, "loss": 0.2843, "step": 24177 }, { "epoch": 1.9586843810758263, "grad_norm": 0.060018282383680344, "learning_rate": 0.00011340744407939153, "loss": 0.2779, "step": 24178 }, { "epoch": 1.9587653920933246, "grad_norm": 0.06363444775342941, "learning_rate": 0.0001134029434267969, "loss": 0.2455, "step": 24179 }, { "epoch": 1.958846403110823, "grad_norm": 0.06346273422241211, "learning_rate": 0.00011339844277420226, "loss": 0.2725, "step": 24180 }, { "epoch": 1.9589274141283215, "grad_norm": 0.05639772117137909, "learning_rate": 0.00011339394212160763, "loss": 0.2455, "step": 24181 }, { "epoch": 1.9590084251458197, "grad_norm": 0.07999150454998016, "learning_rate": 0.000113389441469013, "loss": 0.2761, "step": 24182 }, { "epoch": 1.9590894361633182, "grad_norm": 0.054590802639722824, "learning_rate": 0.0001133849408164184, "loss": 0.2769, "step": 24183 }, { "epoch": 1.9591704471808167, "grad_norm": 0.055199988186359406, "learning_rate": 0.00011338044016382377, "loss": 0.2401, "step": 24184 }, { "epoch": 1.959251458198315, "grad_norm": 0.08141867816448212, "learning_rate": 0.00011337593951122914, "loss": 0.2864, "step": 24185 }, { "epoch": 1.9593324692158134, "grad_norm": 0.05465199425816536, "learning_rate": 0.0001133714388586345, "loss": 0.2519, "step": 24186 }, { "epoch": 1.9594134802333119, "grad_norm": 0.0712234228849411, "learning_rate": 0.00011336693820603987, "loss": 0.2858, "step": 24187 }, { "epoch": 1.95949449125081, "grad_norm": 0.058448389172554016, "learning_rate": 0.00011336243755344525, "loss": 0.2639, "step": 24188 }, { "epoch": 1.9595755022683083, "grad_norm": 0.06451912224292755, "learning_rate": 0.00011335793690085064, "loss": 0.2741, "step": 24189 }, { "epoch": 1.959656513285807, "grad_norm": 0.06475438922643661, "learning_rate": 0.00011335343624825601, "loss": 0.252, "step": 24190 }, { "epoch": 1.9597375243033053, "grad_norm": 0.0741402730345726, "learning_rate": 0.00011334893559566138, "loss": 0.2449, "step": 24191 }, { "epoch": 1.9598185353208035, "grad_norm": 0.0444721020758152, "learning_rate": 0.00011334443494306674, "loss": 0.263, "step": 24192 }, { "epoch": 1.959899546338302, "grad_norm": 0.07959980517625809, "learning_rate": 0.00011333993429047212, "loss": 0.2654, "step": 24193 }, { "epoch": 1.9599805573558005, "grad_norm": 0.05834224820137024, "learning_rate": 0.0001133354336378775, "loss": 0.2734, "step": 24194 }, { "epoch": 1.9600615683732987, "grad_norm": 0.053961653262376785, "learning_rate": 0.00011333093298528288, "loss": 0.2372, "step": 24195 }, { "epoch": 1.9601425793907972, "grad_norm": 0.04977266862988472, "learning_rate": 0.00011332643233268825, "loss": 0.2533, "step": 24196 }, { "epoch": 1.9602235904082956, "grad_norm": 0.0665312260389328, "learning_rate": 0.00011332193168009362, "loss": 0.2908, "step": 24197 }, { "epoch": 1.9603046014257939, "grad_norm": 0.062488801777362823, "learning_rate": 0.00011331743102749898, "loss": 0.2682, "step": 24198 }, { "epoch": 1.9603856124432921, "grad_norm": 0.05820208042860031, "learning_rate": 0.00011331293037490436, "loss": 0.2597, "step": 24199 }, { "epoch": 1.9604666234607908, "grad_norm": 0.06187162548303604, "learning_rate": 0.00011330842972230974, "loss": 0.246, "step": 24200 }, { "epoch": 1.960547634478289, "grad_norm": 0.050065185874700546, "learning_rate": 0.00011330392906971512, "loss": 0.2378, "step": 24201 }, { "epoch": 1.9606286454957873, "grad_norm": 0.04843199625611305, "learning_rate": 0.00011329942841712049, "loss": 0.241, "step": 24202 }, { "epoch": 1.9607096565132858, "grad_norm": 0.05734392628073692, "learning_rate": 0.00011329492776452587, "loss": 0.2769, "step": 24203 }, { "epoch": 1.9607906675307842, "grad_norm": 0.059642255306243896, "learning_rate": 0.00011329042711193123, "loss": 0.2737, "step": 24204 }, { "epoch": 1.9608716785482825, "grad_norm": 0.05199331417679787, "learning_rate": 0.0001132859264593366, "loss": 0.3023, "step": 24205 }, { "epoch": 1.960952689565781, "grad_norm": 0.058106642216444016, "learning_rate": 0.00011328142580674199, "loss": 0.2491, "step": 24206 }, { "epoch": 1.9610337005832794, "grad_norm": 0.06125922128558159, "learning_rate": 0.00011327692515414736, "loss": 0.256, "step": 24207 }, { "epoch": 1.9611147116007777, "grad_norm": 0.05933903902769089, "learning_rate": 0.00011327242450155273, "loss": 0.2476, "step": 24208 }, { "epoch": 1.9611957226182761, "grad_norm": 0.07027129083871841, "learning_rate": 0.00011326792384895811, "loss": 0.2797, "step": 24209 }, { "epoch": 1.9612767336357746, "grad_norm": 0.06699054688215256, "learning_rate": 0.00011326342319636347, "loss": 0.2781, "step": 24210 }, { "epoch": 1.9613577446532728, "grad_norm": 0.06571625918149948, "learning_rate": 0.00011325892254376884, "loss": 0.2325, "step": 24211 }, { "epoch": 1.961438755670771, "grad_norm": 0.054301898926496506, "learning_rate": 0.00011325442189117423, "loss": 0.2817, "step": 24212 }, { "epoch": 1.9615197666882696, "grad_norm": 0.05335809662938118, "learning_rate": 0.0001132499212385796, "loss": 0.2594, "step": 24213 }, { "epoch": 1.961600777705768, "grad_norm": 0.05309063196182251, "learning_rate": 0.00011324542058598498, "loss": 0.2543, "step": 24214 }, { "epoch": 1.9616817887232663, "grad_norm": 0.07116621732711792, "learning_rate": 0.00011324091993339035, "loss": 0.2815, "step": 24215 }, { "epoch": 1.9617627997407647, "grad_norm": 0.05735379084944725, "learning_rate": 0.00011323641928079571, "loss": 0.2533, "step": 24216 }, { "epoch": 1.9618438107582632, "grad_norm": 0.07357499748468399, "learning_rate": 0.00011323191862820108, "loss": 0.2909, "step": 24217 }, { "epoch": 1.9619248217757614, "grad_norm": 0.051178716123104095, "learning_rate": 0.00011322741797560647, "loss": 0.264, "step": 24218 }, { "epoch": 1.96200583279326, "grad_norm": 0.05480940639972687, "learning_rate": 0.00011322291732301184, "loss": 0.2515, "step": 24219 }, { "epoch": 1.9620868438107584, "grad_norm": 0.04697352275252342, "learning_rate": 0.00011321841667041722, "loss": 0.1938, "step": 24220 }, { "epoch": 1.9621678548282566, "grad_norm": 0.06244059279561043, "learning_rate": 0.00011321391601782259, "loss": 0.2577, "step": 24221 }, { "epoch": 1.9622488658457549, "grad_norm": 0.05462726950645447, "learning_rate": 0.00011320941536522795, "loss": 0.2353, "step": 24222 }, { "epoch": 1.9623298768632536, "grad_norm": 0.055527880787849426, "learning_rate": 0.00011320491471263335, "loss": 0.2525, "step": 24223 }, { "epoch": 1.9624108878807518, "grad_norm": 0.054965630173683167, "learning_rate": 0.00011320041406003871, "loss": 0.255, "step": 24224 }, { "epoch": 1.96249189889825, "grad_norm": 0.05207141861319542, "learning_rate": 0.00011319591340744408, "loss": 0.2601, "step": 24225 }, { "epoch": 1.9625729099157485, "grad_norm": 0.053380418568849564, "learning_rate": 0.00011319141275484946, "loss": 0.2242, "step": 24226 }, { "epoch": 1.962653920933247, "grad_norm": 0.06348201632499695, "learning_rate": 0.00011318691210225483, "loss": 0.2309, "step": 24227 }, { "epoch": 1.9627349319507452, "grad_norm": 0.06286928802728653, "learning_rate": 0.00011318241144966019, "loss": 0.3057, "step": 24228 }, { "epoch": 1.9628159429682437, "grad_norm": 0.06830119341611862, "learning_rate": 0.00011317791079706559, "loss": 0.29, "step": 24229 }, { "epoch": 1.9628969539857422, "grad_norm": 0.05434748902916908, "learning_rate": 0.00011317341014447095, "loss": 0.2584, "step": 24230 }, { "epoch": 1.9629779650032404, "grad_norm": 0.05682295933365822, "learning_rate": 0.00011316890949187633, "loss": 0.2603, "step": 24231 }, { "epoch": 1.9630589760207389, "grad_norm": 0.06411691009998322, "learning_rate": 0.0001131644088392817, "loss": 0.2867, "step": 24232 }, { "epoch": 1.9631399870382373, "grad_norm": 0.06302022933959961, "learning_rate": 0.00011315990818668707, "loss": 0.2794, "step": 24233 }, { "epoch": 1.9632209980557356, "grad_norm": 0.06645432859659195, "learning_rate": 0.00011315540753409243, "loss": 0.2635, "step": 24234 }, { "epoch": 1.9633020090732338, "grad_norm": 0.06499811261892319, "learning_rate": 0.00011315090688149783, "loss": 0.2713, "step": 24235 }, { "epoch": 1.9633830200907323, "grad_norm": 0.06514635682106018, "learning_rate": 0.0001131464062289032, "loss": 0.2812, "step": 24236 }, { "epoch": 1.9634640311082308, "grad_norm": 0.04923933371901512, "learning_rate": 0.00011314190557630857, "loss": 0.282, "step": 24237 }, { "epoch": 1.963545042125729, "grad_norm": 0.062177617102861404, "learning_rate": 0.00011313740492371394, "loss": 0.2679, "step": 24238 }, { "epoch": 1.9636260531432275, "grad_norm": 0.06502950191497803, "learning_rate": 0.00011313290427111932, "loss": 0.2964, "step": 24239 }, { "epoch": 1.963707064160726, "grad_norm": 0.05972938984632492, "learning_rate": 0.00011312840361852468, "loss": 0.2706, "step": 24240 }, { "epoch": 1.9637880751782242, "grad_norm": 0.050397828221321106, "learning_rate": 0.00011312390296593008, "loss": 0.2312, "step": 24241 }, { "epoch": 1.9638690861957226, "grad_norm": 0.054717596620321274, "learning_rate": 0.00011311940231333545, "loss": 0.2871, "step": 24242 }, { "epoch": 1.9639500972132211, "grad_norm": 0.061753761023283005, "learning_rate": 0.00011311490166074081, "loss": 0.2899, "step": 24243 }, { "epoch": 1.9640311082307194, "grad_norm": 0.053413957357406616, "learning_rate": 0.00011311040100814618, "loss": 0.2539, "step": 24244 }, { "epoch": 1.9641121192482176, "grad_norm": 0.05676018074154854, "learning_rate": 0.00011310590035555156, "loss": 0.2969, "step": 24245 }, { "epoch": 1.9641931302657163, "grad_norm": 0.06230397894978523, "learning_rate": 0.00011310139970295694, "loss": 0.2845, "step": 24246 }, { "epoch": 1.9642741412832145, "grad_norm": 0.06296941637992859, "learning_rate": 0.00011309689905036232, "loss": 0.2432, "step": 24247 }, { "epoch": 1.9643551523007128, "grad_norm": 0.05645883455872536, "learning_rate": 0.00011309239839776769, "loss": 0.2478, "step": 24248 }, { "epoch": 1.9644361633182112, "grad_norm": 0.06192861869931221, "learning_rate": 0.00011308789774517305, "loss": 0.2954, "step": 24249 }, { "epoch": 1.9645171743357097, "grad_norm": 0.06263851374387741, "learning_rate": 0.00011308339709257842, "loss": 0.247, "step": 24250 }, { "epoch": 1.964598185353208, "grad_norm": 0.06363161653280258, "learning_rate": 0.0001130788964399838, "loss": 0.2498, "step": 24251 }, { "epoch": 1.9646791963707064, "grad_norm": 0.05741799250245094, "learning_rate": 0.00011307439578738919, "loss": 0.3236, "step": 24252 }, { "epoch": 1.964760207388205, "grad_norm": 0.05794438719749451, "learning_rate": 0.00011306989513479456, "loss": 0.208, "step": 24253 }, { "epoch": 1.9648412184057031, "grad_norm": 0.0729365274310112, "learning_rate": 0.00011306539448219993, "loss": 0.2937, "step": 24254 }, { "epoch": 1.9649222294232016, "grad_norm": 0.07066737860441208, "learning_rate": 0.00011306089382960529, "loss": 0.284, "step": 24255 }, { "epoch": 1.9650032404407, "grad_norm": 0.05694340541958809, "learning_rate": 0.00011305639317701067, "loss": 0.2258, "step": 24256 }, { "epoch": 1.9650842514581983, "grad_norm": 0.07903728634119034, "learning_rate": 0.00011305189252441604, "loss": 0.2413, "step": 24257 }, { "epoch": 1.9651652624756966, "grad_norm": 0.07888397574424744, "learning_rate": 0.00011304739187182143, "loss": 0.2586, "step": 24258 }, { "epoch": 1.965246273493195, "grad_norm": 0.0666826143860817, "learning_rate": 0.0001130428912192268, "loss": 0.2713, "step": 24259 }, { "epoch": 1.9653272845106935, "grad_norm": 0.06192653626203537, "learning_rate": 0.00011303839056663217, "loss": 0.2717, "step": 24260 }, { "epoch": 1.9654082955281917, "grad_norm": 0.05060596019029617, "learning_rate": 0.00011303388991403753, "loss": 0.2343, "step": 24261 }, { "epoch": 1.9654893065456902, "grad_norm": 0.06154812127351761, "learning_rate": 0.00011302938926144291, "loss": 0.2823, "step": 24262 }, { "epoch": 1.9655703175631887, "grad_norm": 0.06363443285226822, "learning_rate": 0.00011302488860884828, "loss": 0.2412, "step": 24263 }, { "epoch": 1.965651328580687, "grad_norm": 0.056647319346666336, "learning_rate": 0.00011302038795625367, "loss": 0.2737, "step": 24264 }, { "epoch": 1.9657323395981854, "grad_norm": 0.06012161821126938, "learning_rate": 0.00011301588730365904, "loss": 0.292, "step": 24265 }, { "epoch": 1.9658133506156839, "grad_norm": 0.0688176155090332, "learning_rate": 0.00011301138665106442, "loss": 0.2574, "step": 24266 }, { "epoch": 1.965894361633182, "grad_norm": 0.05791863426566124, "learning_rate": 0.00011300688599846978, "loss": 0.2391, "step": 24267 }, { "epoch": 1.9659753726506803, "grad_norm": 0.06651633232831955, "learning_rate": 0.00011300238534587515, "loss": 0.2804, "step": 24268 }, { "epoch": 1.966056383668179, "grad_norm": 0.044299155473709106, "learning_rate": 0.00011299788469328052, "loss": 0.2518, "step": 24269 }, { "epoch": 1.9661373946856773, "grad_norm": 0.06218911334872246, "learning_rate": 0.00011299338404068591, "loss": 0.2613, "step": 24270 }, { "epoch": 1.9662184057031755, "grad_norm": 0.05687331780791283, "learning_rate": 0.00011298888338809128, "loss": 0.2567, "step": 24271 }, { "epoch": 1.966299416720674, "grad_norm": 0.056875865906476974, "learning_rate": 0.00011298438273549666, "loss": 0.2558, "step": 24272 }, { "epoch": 1.9663804277381725, "grad_norm": 0.06565535813570023, "learning_rate": 0.00011297988208290202, "loss": 0.299, "step": 24273 }, { "epoch": 1.9664614387556707, "grad_norm": 0.0581546276807785, "learning_rate": 0.00011297538143030739, "loss": 0.2253, "step": 24274 }, { "epoch": 1.9665424497731692, "grad_norm": 0.06149299070239067, "learning_rate": 0.00011297088077771278, "loss": 0.2401, "step": 24275 }, { "epoch": 1.9666234607906676, "grad_norm": 0.06168157607316971, "learning_rate": 0.00011296638012511815, "loss": 0.2851, "step": 24276 }, { "epoch": 1.9667044718081659, "grad_norm": 0.08303011953830719, "learning_rate": 0.00011296187947252353, "loss": 0.2569, "step": 24277 }, { "epoch": 1.9667854828256643, "grad_norm": 0.0567610077559948, "learning_rate": 0.0001129573788199289, "loss": 0.2474, "step": 24278 }, { "epoch": 1.9668664938431628, "grad_norm": 0.06008369103074074, "learning_rate": 0.00011295287816733426, "loss": 0.2717, "step": 24279 }, { "epoch": 1.966947504860661, "grad_norm": 0.06307718902826309, "learning_rate": 0.00011294837751473963, "loss": 0.2692, "step": 24280 }, { "epoch": 1.9670285158781593, "grad_norm": 0.05900444835424423, "learning_rate": 0.00011294387686214502, "loss": 0.2119, "step": 24281 }, { "epoch": 1.9671095268956578, "grad_norm": 0.06123722344636917, "learning_rate": 0.0001129393762095504, "loss": 0.2613, "step": 24282 }, { "epoch": 1.9671905379131562, "grad_norm": 0.05989507958292961, "learning_rate": 0.00011293487555695577, "loss": 0.2608, "step": 24283 }, { "epoch": 1.9672715489306545, "grad_norm": 0.048651110380887985, "learning_rate": 0.00011293037490436114, "loss": 0.2409, "step": 24284 }, { "epoch": 1.967352559948153, "grad_norm": 0.062375765293836594, "learning_rate": 0.0001129258742517665, "loss": 0.2719, "step": 24285 }, { "epoch": 1.9674335709656514, "grad_norm": 0.05648590624332428, "learning_rate": 0.00011292137359917187, "loss": 0.3004, "step": 24286 }, { "epoch": 1.9675145819831497, "grad_norm": 0.06214189529418945, "learning_rate": 0.00011291687294657726, "loss": 0.2473, "step": 24287 }, { "epoch": 1.9675955930006481, "grad_norm": 0.05842027813196182, "learning_rate": 0.00011291237229398264, "loss": 0.263, "step": 24288 }, { "epoch": 1.9676766040181466, "grad_norm": 0.06813821941614151, "learning_rate": 0.00011290787164138801, "loss": 0.2804, "step": 24289 }, { "epoch": 1.9677576150356448, "grad_norm": 0.06982593238353729, "learning_rate": 0.00011290337098879338, "loss": 0.3156, "step": 24290 }, { "epoch": 1.967838626053143, "grad_norm": 0.059116560965776443, "learning_rate": 0.00011289887033619874, "loss": 0.2764, "step": 24291 }, { "epoch": 1.9679196370706418, "grad_norm": 0.06490524858236313, "learning_rate": 0.00011289436968360412, "loss": 0.3004, "step": 24292 }, { "epoch": 1.96800064808814, "grad_norm": 0.05990765616297722, "learning_rate": 0.0001128898690310095, "loss": 0.261, "step": 24293 }, { "epoch": 1.9680816591056383, "grad_norm": 0.06079739332199097, "learning_rate": 0.00011288536837841488, "loss": 0.2864, "step": 24294 }, { "epoch": 1.9681626701231367, "grad_norm": 0.0682620033621788, "learning_rate": 0.00011288086772582025, "loss": 0.2691, "step": 24295 }, { "epoch": 1.9682436811406352, "grad_norm": 0.06189132481813431, "learning_rate": 0.00011287636707322562, "loss": 0.2378, "step": 24296 }, { "epoch": 1.9683246921581334, "grad_norm": 0.061714477837085724, "learning_rate": 0.00011287186642063098, "loss": 0.2491, "step": 24297 }, { "epoch": 1.968405703175632, "grad_norm": 0.056510306894779205, "learning_rate": 0.00011286736576803639, "loss": 0.2883, "step": 24298 }, { "epoch": 1.9684867141931304, "grad_norm": 0.06037548556923866, "learning_rate": 0.00011286286511544175, "loss": 0.2785, "step": 24299 }, { "epoch": 1.9685677252106286, "grad_norm": 0.057282455265522, "learning_rate": 0.00011285836446284712, "loss": 0.2417, "step": 24300 }, { "epoch": 1.9686487362281269, "grad_norm": 0.05612381175160408, "learning_rate": 0.00011285386381025249, "loss": 0.2616, "step": 24301 }, { "epoch": 1.9687297472456255, "grad_norm": 0.06894572824239731, "learning_rate": 0.00011284936315765787, "loss": 0.2798, "step": 24302 }, { "epoch": 1.9688107582631238, "grad_norm": 0.06875929981470108, "learning_rate": 0.00011284486250506323, "loss": 0.3331, "step": 24303 }, { "epoch": 1.968891769280622, "grad_norm": 0.07246767729520798, "learning_rate": 0.00011284036185246863, "loss": 0.2901, "step": 24304 }, { "epoch": 1.9689727802981205, "grad_norm": 0.0570947490632534, "learning_rate": 0.00011283586119987399, "loss": 0.2562, "step": 24305 }, { "epoch": 1.969053791315619, "grad_norm": 0.056611426174640656, "learning_rate": 0.00011283136054727936, "loss": 0.2627, "step": 24306 }, { "epoch": 1.9691348023331172, "grad_norm": 0.05819262936711311, "learning_rate": 0.00011282685989468473, "loss": 0.2955, "step": 24307 }, { "epoch": 1.9692158133506157, "grad_norm": 0.05799686163663864, "learning_rate": 0.00011282235924209011, "loss": 0.2635, "step": 24308 }, { "epoch": 1.9692968243681142, "grad_norm": 0.06244715303182602, "learning_rate": 0.00011281785858949547, "loss": 0.2691, "step": 24309 }, { "epoch": 1.9693778353856124, "grad_norm": 0.05977495759725571, "learning_rate": 0.00011281335793690087, "loss": 0.2219, "step": 24310 }, { "epoch": 1.9694588464031109, "grad_norm": 0.0680684894323349, "learning_rate": 0.00011280885728430624, "loss": 0.3019, "step": 24311 }, { "epoch": 1.9695398574206093, "grad_norm": 0.07078555226325989, "learning_rate": 0.0001128043566317116, "loss": 0.2791, "step": 24312 }, { "epoch": 1.9696208684381076, "grad_norm": 0.06547614187002182, "learning_rate": 0.00011279985597911698, "loss": 0.2702, "step": 24313 }, { "epoch": 1.9697018794556058, "grad_norm": 0.04946935921907425, "learning_rate": 0.00011279535532652235, "loss": 0.2616, "step": 24314 }, { "epoch": 1.9697828904731045, "grad_norm": 0.05585674196481705, "learning_rate": 0.00011279085467392771, "loss": 0.2472, "step": 24315 }, { "epoch": 1.9698639014906028, "grad_norm": 0.06626023352146149, "learning_rate": 0.00011278635402133311, "loss": 0.2707, "step": 24316 }, { "epoch": 1.969944912508101, "grad_norm": 0.062406085431575775, "learning_rate": 0.00011278185336873848, "loss": 0.2868, "step": 24317 }, { "epoch": 1.9700259235255995, "grad_norm": 0.05898802727460861, "learning_rate": 0.00011277735271614384, "loss": 0.288, "step": 24318 }, { "epoch": 1.970106934543098, "grad_norm": 0.08069593459367752, "learning_rate": 0.00011277285206354922, "loss": 0.3254, "step": 24319 }, { "epoch": 1.9701879455605962, "grad_norm": 0.04930425062775612, "learning_rate": 0.00011276835141095459, "loss": 0.2368, "step": 24320 }, { "epoch": 1.9702689565780946, "grad_norm": 0.05644959583878517, "learning_rate": 0.00011276385075835995, "loss": 0.2657, "step": 24321 }, { "epoch": 1.970349967595593, "grad_norm": 0.06363023817539215, "learning_rate": 0.00011275935010576535, "loss": 0.2921, "step": 24322 }, { "epoch": 1.9704309786130914, "grad_norm": 0.05638289451599121, "learning_rate": 0.00011275484945317073, "loss": 0.2637, "step": 24323 }, { "epoch": 1.9705119896305896, "grad_norm": 0.07388167828321457, "learning_rate": 0.00011275034880057609, "loss": 0.3125, "step": 24324 }, { "epoch": 1.9705930006480883, "grad_norm": 0.07029640674591064, "learning_rate": 0.00011274584814798146, "loss": 0.2886, "step": 24325 }, { "epoch": 1.9706740116655865, "grad_norm": 0.05631260573863983, "learning_rate": 0.00011274134749538683, "loss": 0.2543, "step": 24326 }, { "epoch": 1.9707550226830848, "grad_norm": 0.06461930274963379, "learning_rate": 0.00011273684684279222, "loss": 0.2516, "step": 24327 }, { "epoch": 1.9708360337005832, "grad_norm": 0.05598876625299454, "learning_rate": 0.00011273234619019759, "loss": 0.2604, "step": 24328 }, { "epoch": 1.9709170447180817, "grad_norm": 0.05257659777998924, "learning_rate": 0.00011272784553760297, "loss": 0.2461, "step": 24329 }, { "epoch": 1.97099805573558, "grad_norm": 0.06548883765935898, "learning_rate": 0.00011272334488500833, "loss": 0.2675, "step": 24330 }, { "epoch": 1.9710790667530784, "grad_norm": 0.06943689286708832, "learning_rate": 0.0001127188442324137, "loss": 0.2743, "step": 24331 }, { "epoch": 1.971160077770577, "grad_norm": 0.0543447881937027, "learning_rate": 0.00011271434357981907, "loss": 0.2649, "step": 24332 }, { "epoch": 1.9712410887880751, "grad_norm": 0.05939463898539543, "learning_rate": 0.00011270984292722446, "loss": 0.2728, "step": 24333 }, { "epoch": 1.9713220998055736, "grad_norm": 0.05872873589396477, "learning_rate": 0.00011270534227462983, "loss": 0.2517, "step": 24334 }, { "epoch": 1.971403110823072, "grad_norm": 0.058184314519166946, "learning_rate": 0.00011270084162203521, "loss": 0.2569, "step": 24335 }, { "epoch": 1.9714841218405703, "grad_norm": 0.05213569477200508, "learning_rate": 0.00011269634096944057, "loss": 0.2645, "step": 24336 }, { "epoch": 1.9715651328580686, "grad_norm": 0.059691090136766434, "learning_rate": 0.00011269184031684594, "loss": 0.2461, "step": 24337 }, { "epoch": 1.971646143875567, "grad_norm": 0.05191425606608391, "learning_rate": 0.00011268733966425132, "loss": 0.248, "step": 24338 }, { "epoch": 1.9717271548930655, "grad_norm": 0.059659551829099655, "learning_rate": 0.0001126828390116567, "loss": 0.203, "step": 24339 }, { "epoch": 1.9718081659105637, "grad_norm": 0.07564805448055267, "learning_rate": 0.00011267833835906208, "loss": 0.3105, "step": 24340 }, { "epoch": 1.9718891769280622, "grad_norm": 0.07206302136182785, "learning_rate": 0.00011267383770646745, "loss": 0.2776, "step": 24341 }, { "epoch": 1.9719701879455607, "grad_norm": 0.06573637574911118, "learning_rate": 0.00011266933705387281, "loss": 0.2624, "step": 24342 }, { "epoch": 1.972051198963059, "grad_norm": 0.06227828562259674, "learning_rate": 0.00011266483640127818, "loss": 0.3009, "step": 24343 }, { "epoch": 1.9721322099805574, "grad_norm": 0.07165750116109848, "learning_rate": 0.00011266033574868356, "loss": 0.2565, "step": 24344 }, { "epoch": 1.9722132209980558, "grad_norm": 0.06627726554870605, "learning_rate": 0.00011265583509608894, "loss": 0.2682, "step": 24345 }, { "epoch": 1.972294232015554, "grad_norm": 0.061688557267189026, "learning_rate": 0.00011265133444349432, "loss": 0.2246, "step": 24346 }, { "epoch": 1.9723752430330523, "grad_norm": 0.05644363909959793, "learning_rate": 0.00011264683379089969, "loss": 0.2426, "step": 24347 }, { "epoch": 1.972456254050551, "grad_norm": 0.060142453759908676, "learning_rate": 0.00011264233313830505, "loss": 0.281, "step": 24348 }, { "epoch": 1.9725372650680493, "grad_norm": 0.06855808943510056, "learning_rate": 0.00011263783248571043, "loss": 0.2884, "step": 24349 }, { "epoch": 1.9726182760855475, "grad_norm": 0.05051879212260246, "learning_rate": 0.00011263333183311581, "loss": 0.227, "step": 24350 }, { "epoch": 1.972699287103046, "grad_norm": 0.04746206849813461, "learning_rate": 0.00011262883118052119, "loss": 0.2836, "step": 24351 }, { "epoch": 1.9727802981205445, "grad_norm": 0.049681976437568665, "learning_rate": 0.00011262433052792656, "loss": 0.209, "step": 24352 }, { "epoch": 1.9728613091380427, "grad_norm": 0.04825180396437645, "learning_rate": 0.00011261982987533193, "loss": 0.2902, "step": 24353 }, { "epoch": 1.9729423201555412, "grad_norm": 0.06472016125917435, "learning_rate": 0.0001126153292227373, "loss": 0.2793, "step": 24354 }, { "epoch": 1.9730233311730396, "grad_norm": 0.06196343153715134, "learning_rate": 0.00011261082857014267, "loss": 0.2756, "step": 24355 }, { "epoch": 1.9731043421905379, "grad_norm": 0.059851787984371185, "learning_rate": 0.00011260632791754805, "loss": 0.2414, "step": 24356 }, { "epoch": 1.9731853532080363, "grad_norm": 0.054491378366947174, "learning_rate": 0.00011260182726495343, "loss": 0.2524, "step": 24357 }, { "epoch": 1.9732663642255348, "grad_norm": 0.060839369893074036, "learning_rate": 0.0001125973266123588, "loss": 0.3168, "step": 24358 }, { "epoch": 1.973347375243033, "grad_norm": 0.06078599765896797, "learning_rate": 0.00011259282595976417, "loss": 0.2883, "step": 24359 }, { "epoch": 1.9734283862605313, "grad_norm": 0.08065938204526901, "learning_rate": 0.00011258832530716953, "loss": 0.3195, "step": 24360 }, { "epoch": 1.9735093972780298, "grad_norm": 0.07087505608797073, "learning_rate": 0.00011258382465457491, "loss": 0.2759, "step": 24361 }, { "epoch": 1.9735904082955282, "grad_norm": 0.05086572840809822, "learning_rate": 0.0001125793240019803, "loss": 0.2296, "step": 24362 }, { "epoch": 1.9736714193130265, "grad_norm": 0.052488088607788086, "learning_rate": 0.00011257482334938567, "loss": 0.2581, "step": 24363 }, { "epoch": 1.973752430330525, "grad_norm": 0.06180967763066292, "learning_rate": 0.00011257032269679104, "loss": 0.2892, "step": 24364 }, { "epoch": 1.9738334413480234, "grad_norm": 0.053815726190805435, "learning_rate": 0.00011256582204419642, "loss": 0.2282, "step": 24365 }, { "epoch": 1.9739144523655217, "grad_norm": 0.05706044286489487, "learning_rate": 0.00011256132139160178, "loss": 0.2592, "step": 24366 }, { "epoch": 1.9739954633830201, "grad_norm": 0.05483723431825638, "learning_rate": 0.00011255682073900715, "loss": 0.258, "step": 24367 }, { "epoch": 1.9740764744005186, "grad_norm": 0.04908522218465805, "learning_rate": 0.00011255232008641254, "loss": 0.2459, "step": 24368 }, { "epoch": 1.9741574854180168, "grad_norm": 0.046063363552093506, "learning_rate": 0.00011254781943381791, "loss": 0.2091, "step": 24369 }, { "epoch": 1.974238496435515, "grad_norm": 0.07381352037191391, "learning_rate": 0.00011254331878122328, "loss": 0.3122, "step": 24370 }, { "epoch": 1.9743195074530138, "grad_norm": 0.04620813950896263, "learning_rate": 0.00011253881812862866, "loss": 0.2667, "step": 24371 }, { "epoch": 1.974400518470512, "grad_norm": 0.06148213520646095, "learning_rate": 0.00011253431747603402, "loss": 0.3344, "step": 24372 }, { "epoch": 1.9744815294880103, "grad_norm": 0.05575966835021973, "learning_rate": 0.00011252981682343939, "loss": 0.2599, "step": 24373 }, { "epoch": 1.9745625405055087, "grad_norm": 0.07938870787620544, "learning_rate": 0.00011252531617084478, "loss": 0.3063, "step": 24374 }, { "epoch": 1.9746435515230072, "grad_norm": 0.06841164082288742, "learning_rate": 0.00011252081551825015, "loss": 0.2779, "step": 24375 }, { "epoch": 1.9747245625405054, "grad_norm": 0.057837821543216705, "learning_rate": 0.00011251631486565553, "loss": 0.2773, "step": 24376 }, { "epoch": 1.974805573558004, "grad_norm": 0.06958066672086716, "learning_rate": 0.0001125118142130609, "loss": 0.235, "step": 24377 }, { "epoch": 1.9748865845755024, "grad_norm": 0.062445688992738724, "learning_rate": 0.00011250731356046626, "loss": 0.2771, "step": 24378 }, { "epoch": 1.9749675955930006, "grad_norm": 0.05331667140126228, "learning_rate": 0.00011250281290787166, "loss": 0.261, "step": 24379 }, { "epoch": 1.975048606610499, "grad_norm": 0.06378906965255737, "learning_rate": 0.00011249831225527703, "loss": 0.2597, "step": 24380 }, { "epoch": 1.9751296176279975, "grad_norm": 0.05976220220327377, "learning_rate": 0.0001124938116026824, "loss": 0.2747, "step": 24381 }, { "epoch": 1.9752106286454958, "grad_norm": 0.05172165110707283, "learning_rate": 0.00011248931095008777, "loss": 0.2417, "step": 24382 }, { "epoch": 1.975291639662994, "grad_norm": 0.06261162459850311, "learning_rate": 0.00011248481029749314, "loss": 0.2536, "step": 24383 }, { "epoch": 1.9753726506804925, "grad_norm": 0.06476294994354248, "learning_rate": 0.0001124803096448985, "loss": 0.2713, "step": 24384 }, { "epoch": 1.975453661697991, "grad_norm": 0.05832285061478615, "learning_rate": 0.0001124758089923039, "loss": 0.2588, "step": 24385 }, { "epoch": 1.9755346727154892, "grad_norm": 0.058134134858846664, "learning_rate": 0.00011247130833970928, "loss": 0.2624, "step": 24386 }, { "epoch": 1.9756156837329877, "grad_norm": 0.064618781208992, "learning_rate": 0.00011246680768711464, "loss": 0.2951, "step": 24387 }, { "epoch": 1.9756966947504861, "grad_norm": 0.05063759908080101, "learning_rate": 0.00011246230703452001, "loss": 0.262, "step": 24388 }, { "epoch": 1.9757777057679844, "grad_norm": 0.0644262284040451, "learning_rate": 0.00011245780638192538, "loss": 0.2836, "step": 24389 }, { "epoch": 1.9758587167854829, "grad_norm": 0.04861307144165039, "learning_rate": 0.00011245330572933074, "loss": 0.2593, "step": 24390 }, { "epoch": 1.9759397278029813, "grad_norm": 0.07954981923103333, "learning_rate": 0.00011244880507673614, "loss": 0.2886, "step": 24391 }, { "epoch": 1.9760207388204796, "grad_norm": 0.08754291385412216, "learning_rate": 0.00011244430442414152, "loss": 0.2864, "step": 24392 }, { "epoch": 1.9761017498379778, "grad_norm": 0.05969921499490738, "learning_rate": 0.00011243980377154688, "loss": 0.2877, "step": 24393 }, { "epoch": 1.9761827608554765, "grad_norm": 0.0576811358332634, "learning_rate": 0.00011243530311895225, "loss": 0.2593, "step": 24394 }, { "epoch": 1.9762637718729748, "grad_norm": 0.05961635336279869, "learning_rate": 0.00011243080246635762, "loss": 0.2802, "step": 24395 }, { "epoch": 1.976344782890473, "grad_norm": 0.05381064862012863, "learning_rate": 0.00011242630181376298, "loss": 0.2541, "step": 24396 }, { "epoch": 1.9764257939079715, "grad_norm": 0.05873046815395355, "learning_rate": 0.00011242180116116839, "loss": 0.219, "step": 24397 }, { "epoch": 1.97650680492547, "grad_norm": 0.06834851950407028, "learning_rate": 0.00011241730050857376, "loss": 0.2985, "step": 24398 }, { "epoch": 1.9765878159429682, "grad_norm": 0.07514762133359909, "learning_rate": 0.00011241279985597912, "loss": 0.3346, "step": 24399 }, { "epoch": 1.9766688269604666, "grad_norm": 0.061792224645614624, "learning_rate": 0.00011240829920338449, "loss": 0.2543, "step": 24400 }, { "epoch": 1.976749837977965, "grad_norm": 0.05621310696005821, "learning_rate": 0.00011240379855078987, "loss": 0.2323, "step": 24401 }, { "epoch": 1.9768308489954634, "grad_norm": 0.05764911696314812, "learning_rate": 0.00011239929789819523, "loss": 0.2796, "step": 24402 }, { "epoch": 1.9769118600129616, "grad_norm": 0.06711073219776154, "learning_rate": 0.00011239479724560063, "loss": 0.2955, "step": 24403 }, { "epoch": 1.9769928710304603, "grad_norm": 0.04883848503232002, "learning_rate": 0.000112390296593006, "loss": 0.2866, "step": 24404 }, { "epoch": 1.9770738820479585, "grad_norm": 0.06951811164617538, "learning_rate": 0.00011238579594041136, "loss": 0.2486, "step": 24405 }, { "epoch": 1.9771548930654568, "grad_norm": 0.051786020398139954, "learning_rate": 0.00011238129528781673, "loss": 0.2421, "step": 24406 }, { "epoch": 1.9772359040829552, "grad_norm": 0.07006373256444931, "learning_rate": 0.00011237679463522211, "loss": 0.2781, "step": 24407 }, { "epoch": 1.9773169151004537, "grad_norm": 0.054151784628629684, "learning_rate": 0.0001123722939826275, "loss": 0.2512, "step": 24408 }, { "epoch": 1.977397926117952, "grad_norm": 0.062393900007009506, "learning_rate": 0.00011236779333003287, "loss": 0.258, "step": 24409 }, { "epoch": 1.9774789371354504, "grad_norm": 0.06319965422153473, "learning_rate": 0.00011236329267743824, "loss": 0.2443, "step": 24410 }, { "epoch": 1.9775599481529489, "grad_norm": 0.06782061606645584, "learning_rate": 0.0001123587920248436, "loss": 0.244, "step": 24411 }, { "epoch": 1.9776409591704471, "grad_norm": 0.0541662760078907, "learning_rate": 0.00011235429137224898, "loss": 0.269, "step": 24412 }, { "epoch": 1.9777219701879456, "grad_norm": 0.05658426880836487, "learning_rate": 0.00011234979071965435, "loss": 0.2396, "step": 24413 }, { "epoch": 1.977802981205444, "grad_norm": 0.06435883790254593, "learning_rate": 0.00011234529006705974, "loss": 0.2469, "step": 24414 }, { "epoch": 1.9778839922229423, "grad_norm": 0.07199982553720474, "learning_rate": 0.00011234078941446511, "loss": 0.2922, "step": 24415 }, { "epoch": 1.9779650032404406, "grad_norm": 0.05086003988981247, "learning_rate": 0.00011233628876187048, "loss": 0.2537, "step": 24416 }, { "epoch": 1.9780460142579392, "grad_norm": 0.055621031671762466, "learning_rate": 0.00011233178810927584, "loss": 0.2739, "step": 24417 }, { "epoch": 1.9781270252754375, "grad_norm": 0.06266532093286514, "learning_rate": 0.00011232728745668122, "loss": 0.2704, "step": 24418 }, { "epoch": 1.9782080362929357, "grad_norm": 0.051992930471897125, "learning_rate": 0.00011232278680408659, "loss": 0.2495, "step": 24419 }, { "epoch": 1.9782890473104342, "grad_norm": 0.06642768532037735, "learning_rate": 0.00011231828615149198, "loss": 0.2768, "step": 24420 }, { "epoch": 1.9783700583279327, "grad_norm": 0.06322135776281357, "learning_rate": 0.00011231378549889735, "loss": 0.2809, "step": 24421 }, { "epoch": 1.978451069345431, "grad_norm": 0.05358263850212097, "learning_rate": 0.00011230928484630273, "loss": 0.2477, "step": 24422 }, { "epoch": 1.9785320803629294, "grad_norm": 0.06326816976070404, "learning_rate": 0.00011230478419370809, "loss": 0.2492, "step": 24423 }, { "epoch": 1.9786130913804278, "grad_norm": 0.06179438531398773, "learning_rate": 0.00011230028354111346, "loss": 0.2542, "step": 24424 }, { "epoch": 1.978694102397926, "grad_norm": 0.0613495409488678, "learning_rate": 0.00011229578288851883, "loss": 0.2609, "step": 24425 }, { "epoch": 1.9787751134154243, "grad_norm": 0.05658883601427078, "learning_rate": 0.00011229128223592422, "loss": 0.2791, "step": 24426 }, { "epoch": 1.978856124432923, "grad_norm": 0.07112139463424683, "learning_rate": 0.0001122867815833296, "loss": 0.2675, "step": 24427 }, { "epoch": 1.9789371354504213, "grad_norm": 0.062307700514793396, "learning_rate": 0.00011228228093073497, "loss": 0.328, "step": 24428 }, { "epoch": 1.9790181464679195, "grad_norm": 0.06263001263141632, "learning_rate": 0.00011227778027814033, "loss": 0.2461, "step": 24429 }, { "epoch": 1.979099157485418, "grad_norm": 0.05385729297995567, "learning_rate": 0.0001122732796255457, "loss": 0.275, "step": 24430 }, { "epoch": 1.9791801685029164, "grad_norm": 0.05409708619117737, "learning_rate": 0.00011226877897295109, "loss": 0.2181, "step": 24431 }, { "epoch": 1.9792611795204147, "grad_norm": 0.06311351805925369, "learning_rate": 0.00011226427832035646, "loss": 0.3082, "step": 24432 }, { "epoch": 1.9793421905379132, "grad_norm": 0.06120266765356064, "learning_rate": 0.00011225977766776184, "loss": 0.2868, "step": 24433 }, { "epoch": 1.9794232015554116, "grad_norm": 0.04996590316295624, "learning_rate": 0.00011225527701516721, "loss": 0.2469, "step": 24434 }, { "epoch": 1.9795042125729099, "grad_norm": 0.05709290876984596, "learning_rate": 0.00011225077636257257, "loss": 0.2701, "step": 24435 }, { "epoch": 1.9795852235904083, "grad_norm": 0.07180941104888916, "learning_rate": 0.00011224627570997794, "loss": 0.2976, "step": 24436 }, { "epoch": 1.9796662346079068, "grad_norm": 0.05287057161331177, "learning_rate": 0.00011224177505738333, "loss": 0.2368, "step": 24437 }, { "epoch": 1.979747245625405, "grad_norm": 0.05574708804488182, "learning_rate": 0.0001122372744047887, "loss": 0.2576, "step": 24438 }, { "epoch": 1.9798282566429033, "grad_norm": 0.05408519506454468, "learning_rate": 0.00011223277375219408, "loss": 0.2621, "step": 24439 }, { "epoch": 1.9799092676604018, "grad_norm": 0.06887460500001907, "learning_rate": 0.00011222827309959945, "loss": 0.2803, "step": 24440 }, { "epoch": 1.9799902786779002, "grad_norm": 0.06533532589673996, "learning_rate": 0.00011222377244700481, "loss": 0.2892, "step": 24441 }, { "epoch": 1.9800712896953985, "grad_norm": 0.08909396827220917, "learning_rate": 0.00011221927179441018, "loss": 0.3064, "step": 24442 }, { "epoch": 1.980152300712897, "grad_norm": 0.05991598218679428, "learning_rate": 0.00011221477114181557, "loss": 0.2813, "step": 24443 }, { "epoch": 1.9802333117303954, "grad_norm": 0.07463617622852325, "learning_rate": 0.00011221027048922094, "loss": 0.2546, "step": 24444 }, { "epoch": 1.9803143227478937, "grad_norm": 0.059813693165779114, "learning_rate": 0.00011220576983662632, "loss": 0.2864, "step": 24445 }, { "epoch": 1.9803953337653921, "grad_norm": 0.052126556634902954, "learning_rate": 0.00011220126918403169, "loss": 0.2353, "step": 24446 }, { "epoch": 1.9804763447828906, "grad_norm": 0.06391862779855728, "learning_rate": 0.00011219676853143705, "loss": 0.2966, "step": 24447 }, { "epoch": 1.9805573558003888, "grad_norm": 0.06347377598285675, "learning_rate": 0.00011219226787884243, "loss": 0.244, "step": 24448 }, { "epoch": 1.980638366817887, "grad_norm": 0.06185566633939743, "learning_rate": 0.00011218776722624783, "loss": 0.2716, "step": 24449 }, { "epoch": 1.9807193778353858, "grad_norm": 0.04907141998410225, "learning_rate": 0.00011218326657365319, "loss": 0.237, "step": 24450 }, { "epoch": 1.980800388852884, "grad_norm": 0.06573867797851562, "learning_rate": 0.00011217876592105856, "loss": 0.2799, "step": 24451 }, { "epoch": 1.9808813998703823, "grad_norm": 0.0709458440542221, "learning_rate": 0.00011217426526846393, "loss": 0.2982, "step": 24452 }, { "epoch": 1.9809624108878807, "grad_norm": 0.06352993100881577, "learning_rate": 0.0001121697646158693, "loss": 0.3239, "step": 24453 }, { "epoch": 1.9810434219053792, "grad_norm": 0.06232510134577751, "learning_rate": 0.00011216526396327467, "loss": 0.2297, "step": 24454 }, { "epoch": 1.9811244329228774, "grad_norm": 0.06582966446876526, "learning_rate": 0.00011216076331068007, "loss": 0.294, "step": 24455 }, { "epoch": 1.981205443940376, "grad_norm": 0.0512259304523468, "learning_rate": 0.00011215626265808543, "loss": 0.247, "step": 24456 }, { "epoch": 1.9812864549578744, "grad_norm": 0.049582984298467636, "learning_rate": 0.0001121517620054908, "loss": 0.2409, "step": 24457 }, { "epoch": 1.9813674659753726, "grad_norm": 0.06026807427406311, "learning_rate": 0.00011214726135289618, "loss": 0.2568, "step": 24458 }, { "epoch": 1.981448476992871, "grad_norm": 0.07120063900947571, "learning_rate": 0.00011214276070030154, "loss": 0.2823, "step": 24459 }, { "epoch": 1.9815294880103695, "grad_norm": 0.055205926299095154, "learning_rate": 0.00011213826004770694, "loss": 0.2424, "step": 24460 }, { "epoch": 1.9816104990278678, "grad_norm": 0.04511072859168053, "learning_rate": 0.00011213375939511231, "loss": 0.2494, "step": 24461 }, { "epoch": 1.981691510045366, "grad_norm": 0.05112173780798912, "learning_rate": 0.00011212925874251767, "loss": 0.2506, "step": 24462 }, { "epoch": 1.9817725210628645, "grad_norm": 0.0629863440990448, "learning_rate": 0.00011212475808992304, "loss": 0.2648, "step": 24463 }, { "epoch": 1.981853532080363, "grad_norm": 0.07610534876585007, "learning_rate": 0.00011212025743732842, "loss": 0.2665, "step": 24464 }, { "epoch": 1.9819345430978612, "grad_norm": 0.057084761559963226, "learning_rate": 0.00011211575678473378, "loss": 0.2609, "step": 24465 }, { "epoch": 1.9820155541153597, "grad_norm": 0.05006333068013191, "learning_rate": 0.00011211125613213918, "loss": 0.2939, "step": 24466 }, { "epoch": 1.9820965651328581, "grad_norm": 0.05147801712155342, "learning_rate": 0.00011210675547954455, "loss": 0.263, "step": 24467 }, { "epoch": 1.9821775761503564, "grad_norm": 0.056295499205589294, "learning_rate": 0.00011210225482694991, "loss": 0.2558, "step": 24468 }, { "epoch": 1.9822585871678549, "grad_norm": 0.0700087621808052, "learning_rate": 0.00011209775417435528, "loss": 0.2555, "step": 24469 }, { "epoch": 1.9823395981853533, "grad_norm": 0.06073461472988129, "learning_rate": 0.00011209325352176066, "loss": 0.2736, "step": 24470 }, { "epoch": 1.9824206092028516, "grad_norm": 0.06145452708005905, "learning_rate": 0.00011208875286916602, "loss": 0.2946, "step": 24471 }, { "epoch": 1.9825016202203498, "grad_norm": 0.06638166308403015, "learning_rate": 0.00011208425221657142, "loss": 0.2725, "step": 24472 }, { "epoch": 1.9825826312378485, "grad_norm": 0.06589549779891968, "learning_rate": 0.00011207975156397679, "loss": 0.2852, "step": 24473 }, { "epoch": 1.9826636422553467, "grad_norm": 0.05530265346169472, "learning_rate": 0.00011207525091138215, "loss": 0.2754, "step": 24474 }, { "epoch": 1.982744653272845, "grad_norm": 0.05445459857583046, "learning_rate": 0.00011207075025878753, "loss": 0.2451, "step": 24475 }, { "epoch": 1.9828256642903435, "grad_norm": 0.04934202507138252, "learning_rate": 0.0001120662496061929, "loss": 0.2473, "step": 24476 }, { "epoch": 1.982906675307842, "grad_norm": 0.06119426339864731, "learning_rate": 0.00011206174895359826, "loss": 0.3041, "step": 24477 }, { "epoch": 1.9829876863253402, "grad_norm": 0.05670265480875969, "learning_rate": 0.00011205724830100366, "loss": 0.2268, "step": 24478 }, { "epoch": 1.9830686973428386, "grad_norm": 0.06675172597169876, "learning_rate": 0.00011205274764840903, "loss": 0.2677, "step": 24479 }, { "epoch": 1.983149708360337, "grad_norm": 0.055620431900024414, "learning_rate": 0.0001120482469958144, "loss": 0.224, "step": 24480 }, { "epoch": 1.9832307193778353, "grad_norm": 0.06226406618952751, "learning_rate": 0.00011204374634321977, "loss": 0.2093, "step": 24481 }, { "epoch": 1.9833117303953338, "grad_norm": 0.051887817680835724, "learning_rate": 0.00011203924569062514, "loss": 0.2495, "step": 24482 }, { "epoch": 1.9833927414128323, "grad_norm": 0.059647489339113235, "learning_rate": 0.00011203474503803053, "loss": 0.2327, "step": 24483 }, { "epoch": 1.9834737524303305, "grad_norm": 0.06090124323964119, "learning_rate": 0.0001120302443854359, "loss": 0.2628, "step": 24484 }, { "epoch": 1.9835547634478288, "grad_norm": 0.05459466204047203, "learning_rate": 0.00011202574373284128, "loss": 0.2848, "step": 24485 }, { "epoch": 1.9836357744653272, "grad_norm": 0.056634724140167236, "learning_rate": 0.00011202124308024664, "loss": 0.3098, "step": 24486 }, { "epoch": 1.9837167854828257, "grad_norm": 0.07928376644849777, "learning_rate": 0.00011201674242765201, "loss": 0.3164, "step": 24487 }, { "epoch": 1.983797796500324, "grad_norm": 0.0586346834897995, "learning_rate": 0.00011201224177505738, "loss": 0.2314, "step": 24488 }, { "epoch": 1.9838788075178224, "grad_norm": 0.06746040284633636, "learning_rate": 0.00011200774112246277, "loss": 0.2514, "step": 24489 }, { "epoch": 1.9839598185353209, "grad_norm": 0.049767278134822845, "learning_rate": 0.00011200324046986814, "loss": 0.2643, "step": 24490 }, { "epoch": 1.9840408295528191, "grad_norm": 0.06641127169132233, "learning_rate": 0.00011199873981727352, "loss": 0.2634, "step": 24491 }, { "epoch": 1.9841218405703176, "grad_norm": 0.06195981800556183, "learning_rate": 0.00011199423916467888, "loss": 0.2632, "step": 24492 }, { "epoch": 1.984202851587816, "grad_norm": 0.05313951522111893, "learning_rate": 0.00011198973851208425, "loss": 0.2762, "step": 24493 }, { "epoch": 1.9842838626053143, "grad_norm": 0.05326806381344795, "learning_rate": 0.00011198523785948962, "loss": 0.2396, "step": 24494 }, { "epoch": 1.9843648736228126, "grad_norm": 0.07170695811510086, "learning_rate": 0.00011198073720689501, "loss": 0.2899, "step": 24495 }, { "epoch": 1.9844458846403112, "grad_norm": 0.06008395552635193, "learning_rate": 0.00011197623655430039, "loss": 0.2291, "step": 24496 }, { "epoch": 1.9845268956578095, "grad_norm": 0.070198193192482, "learning_rate": 0.00011197173590170576, "loss": 0.3339, "step": 24497 }, { "epoch": 1.9846079066753077, "grad_norm": 0.06424789130687714, "learning_rate": 0.00011196723524911112, "loss": 0.3084, "step": 24498 }, { "epoch": 1.9846889176928062, "grad_norm": 0.0610867515206337, "learning_rate": 0.00011196273459651649, "loss": 0.3067, "step": 24499 }, { "epoch": 1.9847699287103047, "grad_norm": 0.05551750212907791, "learning_rate": 0.00011195823394392187, "loss": 0.2685, "step": 24500 }, { "epoch": 1.984850939727803, "grad_norm": 0.06064987927675247, "learning_rate": 0.00011195373329132725, "loss": 0.3069, "step": 24501 }, { "epoch": 1.9849319507453014, "grad_norm": 0.06334603577852249, "learning_rate": 0.00011194923263873263, "loss": 0.2555, "step": 24502 }, { "epoch": 1.9850129617627998, "grad_norm": 0.06943856179714203, "learning_rate": 0.000111944731986138, "loss": 0.2758, "step": 24503 }, { "epoch": 1.985093972780298, "grad_norm": 0.055823858827352524, "learning_rate": 0.00011194023133354336, "loss": 0.258, "step": 24504 }, { "epoch": 1.9851749837977966, "grad_norm": 0.06590847671031952, "learning_rate": 0.00011193573068094873, "loss": 0.2694, "step": 24505 }, { "epoch": 1.985255994815295, "grad_norm": 0.08120261132717133, "learning_rate": 0.00011193123002835411, "loss": 0.3314, "step": 24506 }, { "epoch": 1.9853370058327933, "grad_norm": 0.058982912451028824, "learning_rate": 0.0001119267293757595, "loss": 0.2636, "step": 24507 }, { "epoch": 1.9854180168502915, "grad_norm": 0.09235643595457077, "learning_rate": 0.00011192222872316487, "loss": 0.2883, "step": 24508 }, { "epoch": 1.98549902786779, "grad_norm": 0.05961127579212189, "learning_rate": 0.00011191772807057024, "loss": 0.2508, "step": 24509 }, { "epoch": 1.9855800388852884, "grad_norm": 0.06918458640575409, "learning_rate": 0.0001119132274179756, "loss": 0.3028, "step": 24510 }, { "epoch": 1.9856610499027867, "grad_norm": 0.056944042444229126, "learning_rate": 0.00011190872676538098, "loss": 0.2494, "step": 24511 }, { "epoch": 1.9857420609202852, "grad_norm": 0.05929713323712349, "learning_rate": 0.00011190422611278638, "loss": 0.2547, "step": 24512 }, { "epoch": 1.9858230719377836, "grad_norm": 0.05718807131052017, "learning_rate": 0.00011189972546019174, "loss": 0.2701, "step": 24513 }, { "epoch": 1.9859040829552819, "grad_norm": 0.05005601793527603, "learning_rate": 0.00011189522480759711, "loss": 0.2057, "step": 24514 }, { "epoch": 1.9859850939727803, "grad_norm": 0.06474079936742783, "learning_rate": 0.00011189072415500248, "loss": 0.2971, "step": 24515 }, { "epoch": 1.9860661049902788, "grad_norm": 0.04730149731040001, "learning_rate": 0.00011188622350240784, "loss": 0.2715, "step": 24516 }, { "epoch": 1.986147116007777, "grad_norm": 0.05048287287354469, "learning_rate": 0.00011188172284981322, "loss": 0.2503, "step": 24517 }, { "epoch": 1.9862281270252753, "grad_norm": 0.0685349553823471, "learning_rate": 0.00011187722219721862, "loss": 0.2832, "step": 24518 }, { "epoch": 1.986309138042774, "grad_norm": 0.04649600014090538, "learning_rate": 0.00011187272154462398, "loss": 0.216, "step": 24519 }, { "epoch": 1.9863901490602722, "grad_norm": 0.06072686240077019, "learning_rate": 0.00011186822089202935, "loss": 0.2706, "step": 24520 }, { "epoch": 1.9864711600777705, "grad_norm": 0.06857617199420929, "learning_rate": 0.00011186372023943473, "loss": 0.2356, "step": 24521 }, { "epoch": 1.986552171095269, "grad_norm": 0.06952265650033951, "learning_rate": 0.00011185921958684009, "loss": 0.2902, "step": 24522 }, { "epoch": 1.9866331821127674, "grad_norm": 0.05612373724579811, "learning_rate": 0.00011185471893424546, "loss": 0.2502, "step": 24523 }, { "epoch": 1.9867141931302656, "grad_norm": 0.052051082253456116, "learning_rate": 0.00011185021828165086, "loss": 0.2394, "step": 24524 }, { "epoch": 1.9867952041477641, "grad_norm": 0.059324733912944794, "learning_rate": 0.00011184571762905622, "loss": 0.262, "step": 24525 }, { "epoch": 1.9868762151652626, "grad_norm": 0.06145957112312317, "learning_rate": 0.0001118412169764616, "loss": 0.2551, "step": 24526 }, { "epoch": 1.9869572261827608, "grad_norm": 0.06007521227002144, "learning_rate": 0.00011183671632386697, "loss": 0.2511, "step": 24527 }, { "epoch": 1.987038237200259, "grad_norm": 0.0481129065155983, "learning_rate": 0.00011183221567127233, "loss": 0.2424, "step": 24528 }, { "epoch": 1.9871192482177578, "grad_norm": 0.053366199135780334, "learning_rate": 0.0001118277150186777, "loss": 0.2566, "step": 24529 }, { "epoch": 1.987200259235256, "grad_norm": 0.057617463171482086, "learning_rate": 0.0001118232143660831, "loss": 0.2516, "step": 24530 }, { "epoch": 1.9872812702527543, "grad_norm": 0.06365573406219482, "learning_rate": 0.00011181871371348846, "loss": 0.3051, "step": 24531 }, { "epoch": 1.9873622812702527, "grad_norm": 0.050839368253946304, "learning_rate": 0.00011181421306089384, "loss": 0.2531, "step": 24532 }, { "epoch": 1.9874432922877512, "grad_norm": 0.0506996251642704, "learning_rate": 0.00011180971240829921, "loss": 0.2341, "step": 24533 }, { "epoch": 1.9875243033052494, "grad_norm": 0.07351569086313248, "learning_rate": 0.00011180521175570457, "loss": 0.2372, "step": 24534 }, { "epoch": 1.987605314322748, "grad_norm": 0.07951635867357254, "learning_rate": 0.00011180071110310994, "loss": 0.2614, "step": 24535 }, { "epoch": 1.9876863253402464, "grad_norm": 0.09023235738277435, "learning_rate": 0.00011179621045051534, "loss": 0.294, "step": 24536 }, { "epoch": 1.9877673363577446, "grad_norm": 0.06043418496847153, "learning_rate": 0.0001117917097979207, "loss": 0.2703, "step": 24537 }, { "epoch": 1.987848347375243, "grad_norm": 0.06518200039863586, "learning_rate": 0.00011178720914532608, "loss": 0.2742, "step": 24538 }, { "epoch": 1.9879293583927415, "grad_norm": 0.07014909386634827, "learning_rate": 0.00011178270849273145, "loss": 0.3306, "step": 24539 }, { "epoch": 1.9880103694102398, "grad_norm": 0.06112273782491684, "learning_rate": 0.00011177820784013681, "loss": 0.2638, "step": 24540 }, { "epoch": 1.988091380427738, "grad_norm": 0.05691821873188019, "learning_rate": 0.00011177370718754221, "loss": 0.2354, "step": 24541 }, { "epoch": 1.9881723914452365, "grad_norm": 0.057806871831417084, "learning_rate": 0.00011176920653494758, "loss": 0.2906, "step": 24542 }, { "epoch": 1.988253402462735, "grad_norm": 0.05050506070256233, "learning_rate": 0.00011176470588235294, "loss": 0.249, "step": 24543 }, { "epoch": 1.9883344134802332, "grad_norm": 0.056343384087085724, "learning_rate": 0.00011176020522975832, "loss": 0.2296, "step": 24544 }, { "epoch": 1.9884154244977317, "grad_norm": 0.049676354974508286, "learning_rate": 0.00011175570457716369, "loss": 0.2453, "step": 24545 }, { "epoch": 1.9884964355152301, "grad_norm": 0.062451086938381195, "learning_rate": 0.00011175120392456905, "loss": 0.302, "step": 24546 }, { "epoch": 1.9885774465327284, "grad_norm": 0.05744845047593117, "learning_rate": 0.00011174670327197445, "loss": 0.2519, "step": 24547 }, { "epoch": 1.9886584575502269, "grad_norm": 0.055483151227235794, "learning_rate": 0.00011174220261937983, "loss": 0.2436, "step": 24548 }, { "epoch": 1.9887394685677253, "grad_norm": 0.0638405978679657, "learning_rate": 0.00011173770196678519, "loss": 0.2653, "step": 24549 }, { "epoch": 1.9888204795852236, "grad_norm": 0.05625903606414795, "learning_rate": 0.00011173320131419056, "loss": 0.2677, "step": 24550 }, { "epoch": 1.9889014906027218, "grad_norm": 0.061679109930992126, "learning_rate": 0.00011172870066159593, "loss": 0.287, "step": 24551 }, { "epoch": 1.9889825016202205, "grad_norm": 0.05694204196333885, "learning_rate": 0.00011172420000900131, "loss": 0.2265, "step": 24552 }, { "epoch": 1.9890635126377187, "grad_norm": 0.07806746661663055, "learning_rate": 0.0001117196993564067, "loss": 0.2172, "step": 24553 }, { "epoch": 1.989144523655217, "grad_norm": 0.048661310225725174, "learning_rate": 0.00011171519870381207, "loss": 0.2383, "step": 24554 }, { "epoch": 1.9892255346727155, "grad_norm": 0.06622835993766785, "learning_rate": 0.00011171069805121743, "loss": 0.2882, "step": 24555 }, { "epoch": 1.989306545690214, "grad_norm": 0.05554422736167908, "learning_rate": 0.0001117061973986228, "loss": 0.2619, "step": 24556 }, { "epoch": 1.9893875567077122, "grad_norm": 0.052647799253463745, "learning_rate": 0.00011170169674602818, "loss": 0.2794, "step": 24557 }, { "epoch": 1.9894685677252106, "grad_norm": 0.060399044305086136, "learning_rate": 0.00011169719609343355, "loss": 0.2619, "step": 24558 }, { "epoch": 1.989549578742709, "grad_norm": 0.05348338559269905, "learning_rate": 0.00011169269544083894, "loss": 0.2545, "step": 24559 }, { "epoch": 1.9896305897602073, "grad_norm": 0.06820795685052872, "learning_rate": 0.00011168819478824431, "loss": 0.2736, "step": 24560 }, { "epoch": 1.9897116007777058, "grad_norm": 0.06192629784345627, "learning_rate": 0.00011168369413564967, "loss": 0.2742, "step": 24561 }, { "epoch": 1.9897926117952043, "grad_norm": 0.0603586845099926, "learning_rate": 0.00011167919348305504, "loss": 0.2561, "step": 24562 }, { "epoch": 1.9898736228127025, "grad_norm": 0.055726949125528336, "learning_rate": 0.00011167469283046042, "loss": 0.2532, "step": 24563 }, { "epoch": 1.9899546338302008, "grad_norm": 0.06763040274381638, "learning_rate": 0.0001116701921778658, "loss": 0.2934, "step": 24564 }, { "epoch": 1.9900356448476992, "grad_norm": 0.07034408301115036, "learning_rate": 0.00011166569152527118, "loss": 0.2619, "step": 24565 }, { "epoch": 1.9901166558651977, "grad_norm": 0.07477734982967377, "learning_rate": 0.00011166119087267655, "loss": 0.2686, "step": 24566 }, { "epoch": 1.990197666882696, "grad_norm": 0.05332627892494202, "learning_rate": 0.00011165669022008191, "loss": 0.2509, "step": 24567 }, { "epoch": 1.9902786779001944, "grad_norm": 0.05626533553004265, "learning_rate": 0.00011165218956748729, "loss": 0.3044, "step": 24568 }, { "epoch": 1.9903596889176929, "grad_norm": 0.05866987630724907, "learning_rate": 0.00011164768891489266, "loss": 0.2438, "step": 24569 }, { "epoch": 1.9904406999351911, "grad_norm": 0.05218745395541191, "learning_rate": 0.00011164318826229805, "loss": 0.2403, "step": 24570 }, { "epoch": 1.9905217109526896, "grad_norm": 0.06421063095331192, "learning_rate": 0.00011163868760970342, "loss": 0.2867, "step": 24571 }, { "epoch": 1.990602721970188, "grad_norm": 0.057032715529203415, "learning_rate": 0.00011163418695710879, "loss": 0.2647, "step": 24572 }, { "epoch": 1.9906837329876863, "grad_norm": 0.050951577723026276, "learning_rate": 0.00011162968630451415, "loss": 0.2686, "step": 24573 }, { "epoch": 1.9907647440051845, "grad_norm": 0.06326782703399658, "learning_rate": 0.00011162518565191953, "loss": 0.269, "step": 24574 }, { "epoch": 1.9908457550226832, "grad_norm": 0.062102437019348145, "learning_rate": 0.0001116206849993249, "loss": 0.2342, "step": 24575 }, { "epoch": 1.9909267660401815, "grad_norm": 0.07285258173942566, "learning_rate": 0.00011161618434673029, "loss": 0.2705, "step": 24576 }, { "epoch": 1.9910077770576797, "grad_norm": 0.06593865901231766, "learning_rate": 0.00011161168369413566, "loss": 0.269, "step": 24577 }, { "epoch": 1.9910887880751782, "grad_norm": 0.06808798760175705, "learning_rate": 0.00011160718304154103, "loss": 0.2833, "step": 24578 }, { "epoch": 1.9911697990926767, "grad_norm": 0.06346582621335983, "learning_rate": 0.0001116026823889464, "loss": 0.2162, "step": 24579 }, { "epoch": 1.991250810110175, "grad_norm": 0.05143898352980614, "learning_rate": 0.00011159818173635177, "loss": 0.2506, "step": 24580 }, { "epoch": 1.9913318211276734, "grad_norm": 0.05779607594013214, "learning_rate": 0.00011159368108375714, "loss": 0.3119, "step": 24581 }, { "epoch": 1.9914128321451718, "grad_norm": 0.06664140522480011, "learning_rate": 0.00011158918043116253, "loss": 0.269, "step": 24582 }, { "epoch": 1.99149384316267, "grad_norm": 0.0591379813849926, "learning_rate": 0.0001115846797785679, "loss": 0.2791, "step": 24583 }, { "epoch": 1.9915748541801686, "grad_norm": 0.058772388845682144, "learning_rate": 0.00011158017912597328, "loss": 0.2951, "step": 24584 }, { "epoch": 1.991655865197667, "grad_norm": 0.05919862911105156, "learning_rate": 0.00011157567847337864, "loss": 0.2522, "step": 24585 }, { "epoch": 1.9917368762151653, "grad_norm": 0.06193426623940468, "learning_rate": 0.00011157117782078401, "loss": 0.3214, "step": 24586 }, { "epoch": 1.9918178872326635, "grad_norm": 0.062030475586652756, "learning_rate": 0.00011156667716818938, "loss": 0.2391, "step": 24587 }, { "epoch": 1.991898898250162, "grad_norm": 0.06320808082818985, "learning_rate": 0.00011156217651559477, "loss": 0.2907, "step": 24588 }, { "epoch": 1.9919799092676604, "grad_norm": 0.057537805289030075, "learning_rate": 0.00011155767586300014, "loss": 0.2469, "step": 24589 }, { "epoch": 1.9920609202851587, "grad_norm": 0.057037729769945145, "learning_rate": 0.00011155317521040552, "loss": 0.2696, "step": 24590 }, { "epoch": 1.9921419313026572, "grad_norm": 0.05443946272134781, "learning_rate": 0.00011154867455781088, "loss": 0.2441, "step": 24591 }, { "epoch": 1.9922229423201556, "grad_norm": 0.060619425028562546, "learning_rate": 0.00011154417390521625, "loss": 0.2568, "step": 24592 }, { "epoch": 1.9923039533376539, "grad_norm": 0.06094972416758537, "learning_rate": 0.00011153967325262165, "loss": 0.2774, "step": 24593 }, { "epoch": 1.9923849643551523, "grad_norm": 0.06160997226834297, "learning_rate": 0.00011153517260002701, "loss": 0.2604, "step": 24594 }, { "epoch": 1.9924659753726508, "grad_norm": 0.06207242235541344, "learning_rate": 0.00011153067194743239, "loss": 0.2543, "step": 24595 }, { "epoch": 1.992546986390149, "grad_norm": 0.077674999833107, "learning_rate": 0.00011152617129483776, "loss": 0.2803, "step": 24596 }, { "epoch": 1.9926279974076473, "grad_norm": 0.0795058086514473, "learning_rate": 0.00011152167064224312, "loss": 0.3187, "step": 24597 }, { "epoch": 1.992709008425146, "grad_norm": 0.07428909093141556, "learning_rate": 0.00011151716998964849, "loss": 0.3045, "step": 24598 }, { "epoch": 1.9927900194426442, "grad_norm": 0.044681303203105927, "learning_rate": 0.0001115126693370539, "loss": 0.2241, "step": 24599 }, { "epoch": 1.9928710304601425, "grad_norm": 0.07117260992527008, "learning_rate": 0.00011150816868445925, "loss": 0.2866, "step": 24600 }, { "epoch": 1.992952041477641, "grad_norm": 0.06624329090118408, "learning_rate": 0.00011150366803186463, "loss": 0.3041, "step": 24601 }, { "epoch": 1.9930330524951394, "grad_norm": 0.06437989324331284, "learning_rate": 0.00011149916737927, "loss": 0.3017, "step": 24602 }, { "epoch": 1.9931140635126376, "grad_norm": 0.052165914326906204, "learning_rate": 0.00011149466672667536, "loss": 0.2566, "step": 24603 }, { "epoch": 1.9931950745301361, "grad_norm": 0.06342125684022903, "learning_rate": 0.00011149016607408073, "loss": 0.2759, "step": 24604 }, { "epoch": 1.9932760855476346, "grad_norm": 0.05953681841492653, "learning_rate": 0.00011148566542148614, "loss": 0.2445, "step": 24605 }, { "epoch": 1.9933570965651328, "grad_norm": 0.05627750977873802, "learning_rate": 0.0001114811647688915, "loss": 0.2747, "step": 24606 }, { "epoch": 1.9934381075826313, "grad_norm": 0.07000666856765747, "learning_rate": 0.00011147666411629687, "loss": 0.2832, "step": 24607 }, { "epoch": 1.9935191186001298, "grad_norm": 0.06306980550289154, "learning_rate": 0.00011147216346370224, "loss": 0.2462, "step": 24608 }, { "epoch": 1.993600129617628, "grad_norm": 0.06391213089227676, "learning_rate": 0.0001114676628111076, "loss": 0.261, "step": 24609 }, { "epoch": 1.9936811406351262, "grad_norm": 0.06361900269985199, "learning_rate": 0.00011146316215851298, "loss": 0.2847, "step": 24610 }, { "epoch": 1.9937621516526247, "grad_norm": 0.05595242604613304, "learning_rate": 0.00011145866150591838, "loss": 0.2612, "step": 24611 }, { "epoch": 1.9938431626701232, "grad_norm": 0.05720612406730652, "learning_rate": 0.00011145416085332374, "loss": 0.2928, "step": 24612 }, { "epoch": 1.9939241736876214, "grad_norm": 0.07067383825778961, "learning_rate": 0.00011144966020072911, "loss": 0.2921, "step": 24613 }, { "epoch": 1.99400518470512, "grad_norm": 0.06673353910446167, "learning_rate": 0.00011144515954813448, "loss": 0.2906, "step": 24614 }, { "epoch": 1.9940861957226184, "grad_norm": 0.06661546230316162, "learning_rate": 0.00011144065889553986, "loss": 0.284, "step": 24615 }, { "epoch": 1.9941672067401166, "grad_norm": 0.06102203577756882, "learning_rate": 0.00011143615824294525, "loss": 0.269, "step": 24616 }, { "epoch": 1.994248217757615, "grad_norm": 0.055220767855644226, "learning_rate": 0.00011143165759035062, "loss": 0.2439, "step": 24617 }, { "epoch": 1.9943292287751135, "grad_norm": 0.05387234315276146, "learning_rate": 0.00011142715693775598, "loss": 0.225, "step": 24618 }, { "epoch": 1.9944102397926118, "grad_norm": 0.05761054530739784, "learning_rate": 0.00011142265628516135, "loss": 0.3081, "step": 24619 }, { "epoch": 1.99449125081011, "grad_norm": 0.06527112424373627, "learning_rate": 0.00011141815563256673, "loss": 0.2999, "step": 24620 }, { "epoch": 1.9945722618276087, "grad_norm": 0.06231717765331268, "learning_rate": 0.0001114136549799721, "loss": 0.2853, "step": 24621 }, { "epoch": 1.994653272845107, "grad_norm": 0.05752355605363846, "learning_rate": 0.00011140915432737749, "loss": 0.2365, "step": 24622 }, { "epoch": 1.9947342838626052, "grad_norm": 0.05275258794426918, "learning_rate": 0.00011140465367478286, "loss": 0.2559, "step": 24623 }, { "epoch": 1.9948152948801037, "grad_norm": 0.048421286046504974, "learning_rate": 0.00011140015302218822, "loss": 0.2261, "step": 24624 }, { "epoch": 1.9948963058976021, "grad_norm": 0.05411406606435776, "learning_rate": 0.0001113956523695936, "loss": 0.2123, "step": 24625 }, { "epoch": 1.9949773169151004, "grad_norm": 0.06282738596200943, "learning_rate": 0.00011139115171699897, "loss": 0.2899, "step": 24626 }, { "epoch": 1.9950583279325989, "grad_norm": 0.058103177696466446, "learning_rate": 0.00011138665106440434, "loss": 0.3021, "step": 24627 }, { "epoch": 1.9951393389500973, "grad_norm": 0.059910792857408524, "learning_rate": 0.00011138215041180973, "loss": 0.2864, "step": 24628 }, { "epoch": 1.9952203499675956, "grad_norm": 0.061971426010131836, "learning_rate": 0.0001113776497592151, "loss": 0.2642, "step": 24629 }, { "epoch": 1.9953013609850938, "grad_norm": 0.05655450001358986, "learning_rate": 0.00011137314910662046, "loss": 0.3023, "step": 24630 }, { "epoch": 1.9953823720025925, "grad_norm": 0.06409027427434921, "learning_rate": 0.00011136864845402584, "loss": 0.2386, "step": 24631 }, { "epoch": 1.9954633830200907, "grad_norm": 0.06355836242437363, "learning_rate": 0.00011136414780143121, "loss": 0.2617, "step": 24632 }, { "epoch": 1.995544394037589, "grad_norm": 0.056723617017269135, "learning_rate": 0.00011135964714883658, "loss": 0.2586, "step": 24633 }, { "epoch": 1.9956254050550875, "grad_norm": 0.06595166772603989, "learning_rate": 0.00011135514649624197, "loss": 0.2874, "step": 24634 }, { "epoch": 1.995706416072586, "grad_norm": 0.06310024857521057, "learning_rate": 0.00011135064584364734, "loss": 0.2506, "step": 24635 }, { "epoch": 1.9957874270900842, "grad_norm": 0.0638793483376503, "learning_rate": 0.0001113461451910527, "loss": 0.266, "step": 24636 }, { "epoch": 1.9958684381075826, "grad_norm": 0.05258272960782051, "learning_rate": 0.00011134164453845808, "loss": 0.2736, "step": 24637 }, { "epoch": 1.995949449125081, "grad_norm": 0.0778241902589798, "learning_rate": 0.00011133714388586345, "loss": 0.3137, "step": 24638 }, { "epoch": 1.9960304601425793, "grad_norm": 0.06420960277318954, "learning_rate": 0.00011133264323326882, "loss": 0.2783, "step": 24639 }, { "epoch": 1.9961114711600778, "grad_norm": 0.06127495691180229, "learning_rate": 0.00011132814258067421, "loss": 0.2688, "step": 24640 }, { "epoch": 1.9961924821775763, "grad_norm": 0.06163187325000763, "learning_rate": 0.00011132364192807959, "loss": 0.2524, "step": 24641 }, { "epoch": 1.9962734931950745, "grad_norm": 0.06623512506484985, "learning_rate": 0.00011131914127548495, "loss": 0.2536, "step": 24642 }, { "epoch": 1.9963545042125728, "grad_norm": 0.05344029515981674, "learning_rate": 0.00011131464062289032, "loss": 0.2577, "step": 24643 }, { "epoch": 1.9964355152300715, "grad_norm": 0.05709357187151909, "learning_rate": 0.00011131013997029569, "loss": 0.2525, "step": 24644 }, { "epoch": 1.9965165262475697, "grad_norm": 0.05957780033349991, "learning_rate": 0.00011130563931770108, "loss": 0.2777, "step": 24645 }, { "epoch": 1.996597537265068, "grad_norm": 0.06133737415075302, "learning_rate": 0.00011130113866510645, "loss": 0.2333, "step": 24646 }, { "epoch": 1.9966785482825664, "grad_norm": 0.06680312752723694, "learning_rate": 0.00011129663801251183, "loss": 0.2544, "step": 24647 }, { "epoch": 1.9967595593000649, "grad_norm": 0.04961247742176056, "learning_rate": 0.00011129213735991719, "loss": 0.2551, "step": 24648 }, { "epoch": 1.9968405703175631, "grad_norm": 0.05387548357248306, "learning_rate": 0.00011128763670732256, "loss": 0.2646, "step": 24649 }, { "epoch": 1.9969215813350616, "grad_norm": 0.07167568057775497, "learning_rate": 0.00011128313605472793, "loss": 0.2806, "step": 24650 }, { "epoch": 1.99700259235256, "grad_norm": 0.05495288595557213, "learning_rate": 0.00011127863540213332, "loss": 0.2527, "step": 24651 }, { "epoch": 1.9970836033700583, "grad_norm": 0.07860712707042694, "learning_rate": 0.0001112741347495387, "loss": 0.3519, "step": 24652 }, { "epoch": 1.9971646143875565, "grad_norm": 0.05974472314119339, "learning_rate": 0.00011126963409694407, "loss": 0.2416, "step": 24653 }, { "epoch": 1.9972456254050552, "grad_norm": 0.0708412453532219, "learning_rate": 0.00011126513344434943, "loss": 0.3165, "step": 24654 }, { "epoch": 1.9973266364225535, "grad_norm": 0.04328044131398201, "learning_rate": 0.0001112606327917548, "loss": 0.2463, "step": 24655 }, { "epoch": 1.9974076474400517, "grad_norm": 0.054907847195863724, "learning_rate": 0.00011125613213916018, "loss": 0.2841, "step": 24656 }, { "epoch": 1.9974886584575502, "grad_norm": 0.05406655743718147, "learning_rate": 0.00011125163148656556, "loss": 0.2728, "step": 24657 }, { "epoch": 1.9975696694750487, "grad_norm": 0.05375692993402481, "learning_rate": 0.00011124713083397094, "loss": 0.3063, "step": 24658 }, { "epoch": 1.997650680492547, "grad_norm": 0.05375174060463905, "learning_rate": 0.00011124263018137631, "loss": 0.259, "step": 24659 }, { "epoch": 1.9977316915100454, "grad_norm": 0.05399360880255699, "learning_rate": 0.00011123812952878167, "loss": 0.2399, "step": 24660 }, { "epoch": 1.9978127025275438, "grad_norm": 0.0648183673620224, "learning_rate": 0.00011123362887618704, "loss": 0.2597, "step": 24661 }, { "epoch": 1.997893713545042, "grad_norm": 0.054348211735486984, "learning_rate": 0.00011122912822359242, "loss": 0.2603, "step": 24662 }, { "epoch": 1.9979747245625405, "grad_norm": 0.07106921076774597, "learning_rate": 0.0001112246275709978, "loss": 0.3093, "step": 24663 }, { "epoch": 1.998055735580039, "grad_norm": 0.05705713853240013, "learning_rate": 0.00011122012691840318, "loss": 0.2544, "step": 24664 }, { "epoch": 1.9981367465975373, "grad_norm": 0.05977337807416916, "learning_rate": 0.00011121562626580855, "loss": 0.2715, "step": 24665 }, { "epoch": 1.9982177576150355, "grad_norm": 0.05743882805109024, "learning_rate": 0.00011121112561321391, "loss": 0.2842, "step": 24666 }, { "epoch": 1.998298768632534, "grad_norm": 0.056074030697345734, "learning_rate": 0.00011120662496061929, "loss": 0.2544, "step": 24667 }, { "epoch": 1.9983797796500324, "grad_norm": 0.05134023725986481, "learning_rate": 0.00011120212430802469, "loss": 0.2757, "step": 24668 }, { "epoch": 1.9984607906675307, "grad_norm": 0.0737508237361908, "learning_rate": 0.00011119762365543005, "loss": 0.2564, "step": 24669 }, { "epoch": 1.9985418016850292, "grad_norm": 0.06467597186565399, "learning_rate": 0.00011119312300283542, "loss": 0.3089, "step": 24670 }, { "epoch": 1.9986228127025276, "grad_norm": 0.06818606704473495, "learning_rate": 0.0001111886223502408, "loss": 0.256, "step": 24671 }, { "epoch": 1.9987038237200259, "grad_norm": 0.06933887302875519, "learning_rate": 0.00011118412169764615, "loss": 0.2743, "step": 24672 }, { "epoch": 1.9987848347375243, "grad_norm": 0.05665956437587738, "learning_rate": 0.00011117962104505153, "loss": 0.2242, "step": 24673 }, { "epoch": 1.9988658457550228, "grad_norm": 0.05923176556825638, "learning_rate": 0.00011117512039245693, "loss": 0.301, "step": 24674 }, { "epoch": 1.998946856772521, "grad_norm": 0.054322510957717896, "learning_rate": 0.00011117061973986229, "loss": 0.2436, "step": 24675 }, { "epoch": 1.9990278677900193, "grad_norm": 0.05147448554635048, "learning_rate": 0.00011116611908726766, "loss": 0.229, "step": 24676 }, { "epoch": 1.999108878807518, "grad_norm": 0.06228252127766609, "learning_rate": 0.00011116161843467303, "loss": 0.2484, "step": 24677 }, { "epoch": 1.9991898898250162, "grad_norm": 0.04767598584294319, "learning_rate": 0.0001111571177820784, "loss": 0.2757, "step": 24678 }, { "epoch": 1.9992709008425145, "grad_norm": 0.04789305478334427, "learning_rate": 0.00011115261712948377, "loss": 0.2455, "step": 24679 }, { "epoch": 1.999351911860013, "grad_norm": 0.06482335180044174, "learning_rate": 0.00011114811647688917, "loss": 0.2898, "step": 24680 }, { "epoch": 1.9994329228775114, "grad_norm": 0.057793837040662766, "learning_rate": 0.00011114361582429453, "loss": 0.2517, "step": 24681 }, { "epoch": 1.9995139338950096, "grad_norm": 0.05205461010336876, "learning_rate": 0.0001111391151716999, "loss": 0.2323, "step": 24682 }, { "epoch": 1.999594944912508, "grad_norm": 0.05734451487660408, "learning_rate": 0.00011113461451910528, "loss": 0.2746, "step": 24683 }, { "epoch": 1.9996759559300066, "grad_norm": 0.062195006757974625, "learning_rate": 0.00011113011386651065, "loss": 0.2669, "step": 24684 }, { "epoch": 1.9997569669475048, "grad_norm": 0.05192793905735016, "learning_rate": 0.00011112561321391601, "loss": 0.2802, "step": 24685 }, { "epoch": 1.9998379779650033, "grad_norm": 0.06143191456794739, "learning_rate": 0.00011112111256132141, "loss": 0.311, "step": 24686 }, { "epoch": 1.9999189889825018, "grad_norm": 0.054254207760095596, "learning_rate": 0.00011111661190872677, "loss": 0.2842, "step": 24687 }, { "epoch": 2.0, "grad_norm": 0.04564838111400604, "learning_rate": 0.00011111211125613214, "loss": 0.1993, "step": 24688 }, { "epoch": 2.0000810110174982, "grad_norm": 0.05824628099799156, "learning_rate": 0.00011110761060353752, "loss": 0.2654, "step": 24689 }, { "epoch": 2.000162022034997, "grad_norm": 0.06120525300502777, "learning_rate": 0.00011110310995094289, "loss": 0.2628, "step": 24690 }, { "epoch": 2.000243033052495, "grad_norm": 0.05350266024470329, "learning_rate": 0.00011109860929834825, "loss": 0.2325, "step": 24691 }, { "epoch": 2.0003240440699934, "grad_norm": 0.06434735655784607, "learning_rate": 0.00011109410864575365, "loss": 0.2442, "step": 24692 }, { "epoch": 2.000405055087492, "grad_norm": 0.07097124308347702, "learning_rate": 0.00011108960799315901, "loss": 0.2748, "step": 24693 }, { "epoch": 2.0004860661049904, "grad_norm": 0.06032516807317734, "learning_rate": 0.00011108510734056439, "loss": 0.2436, "step": 24694 }, { "epoch": 2.0005670771224886, "grad_norm": 0.0629146546125412, "learning_rate": 0.00011108060668796976, "loss": 0.3101, "step": 24695 }, { "epoch": 2.000648088139987, "grad_norm": 0.052501216530799866, "learning_rate": 0.00011107610603537513, "loss": 0.2384, "step": 24696 }, { "epoch": 2.0007290991574855, "grad_norm": 0.06095361337065697, "learning_rate": 0.00011107160538278052, "loss": 0.2698, "step": 24697 }, { "epoch": 2.000810110174984, "grad_norm": 0.05067559704184532, "learning_rate": 0.0001110671047301859, "loss": 0.2674, "step": 24698 }, { "epoch": 2.000891121192482, "grad_norm": 0.06133396923542023, "learning_rate": 0.00011106260407759125, "loss": 0.2594, "step": 24699 }, { "epoch": 2.0009721322099807, "grad_norm": 0.07269486784934998, "learning_rate": 0.00011105810342499663, "loss": 0.2731, "step": 24700 }, { "epoch": 2.001053143227479, "grad_norm": 0.054098621010780334, "learning_rate": 0.000111053602772402, "loss": 0.2434, "step": 24701 }, { "epoch": 2.001134154244977, "grad_norm": 0.048134684562683105, "learning_rate": 0.00011104910211980737, "loss": 0.2278, "step": 24702 }, { "epoch": 2.001215165262476, "grad_norm": 0.05443552881479263, "learning_rate": 0.00011104460146721276, "loss": 0.2295, "step": 24703 }, { "epoch": 2.001296176279974, "grad_norm": 0.0662059560418129, "learning_rate": 0.00011104010081461814, "loss": 0.2592, "step": 24704 }, { "epoch": 2.0013771872974724, "grad_norm": 0.052790913730859756, "learning_rate": 0.0001110356001620235, "loss": 0.2463, "step": 24705 }, { "epoch": 2.0014581983149706, "grad_norm": 0.05461428686976433, "learning_rate": 0.00011103109950942887, "loss": 0.2431, "step": 24706 }, { "epoch": 2.0015392093324693, "grad_norm": 0.05621863156557083, "learning_rate": 0.00011102659885683424, "loss": 0.2484, "step": 24707 }, { "epoch": 2.0016202203499676, "grad_norm": 0.06086277589201927, "learning_rate": 0.00011102209820423962, "loss": 0.2532, "step": 24708 }, { "epoch": 2.001701231367466, "grad_norm": 0.06324207782745361, "learning_rate": 0.000111017597551645, "loss": 0.2349, "step": 24709 }, { "epoch": 2.0017822423849645, "grad_norm": 0.057401739060878754, "learning_rate": 0.00011101309689905038, "loss": 0.2737, "step": 24710 }, { "epoch": 2.0018632534024627, "grad_norm": 0.07243891805410385, "learning_rate": 0.00011100859624645574, "loss": 0.2514, "step": 24711 }, { "epoch": 2.001944264419961, "grad_norm": 0.06032774597406387, "learning_rate": 0.00011100409559386111, "loss": 0.2515, "step": 24712 }, { "epoch": 2.0020252754374597, "grad_norm": 0.06539171189069748, "learning_rate": 0.00011099959494126648, "loss": 0.2429, "step": 24713 }, { "epoch": 2.002106286454958, "grad_norm": 0.06766407936811447, "learning_rate": 0.00011099509428867186, "loss": 0.2121, "step": 24714 }, { "epoch": 2.002187297472456, "grad_norm": 0.05758608505129814, "learning_rate": 0.00011099059363607725, "loss": 0.267, "step": 24715 }, { "epoch": 2.002268308489955, "grad_norm": 0.061857450753450394, "learning_rate": 0.00011098609298348262, "loss": 0.2626, "step": 24716 }, { "epoch": 2.002349319507453, "grad_norm": 0.06130329146981239, "learning_rate": 0.00011098159233088798, "loss": 0.2481, "step": 24717 }, { "epoch": 2.0024303305249513, "grad_norm": 0.0638541579246521, "learning_rate": 0.00011097709167829335, "loss": 0.2778, "step": 24718 }, { "epoch": 2.0025113415424496, "grad_norm": 0.05980760231614113, "learning_rate": 0.00011097259102569873, "loss": 0.2796, "step": 24719 }, { "epoch": 2.0025923525599483, "grad_norm": 0.06339839845895767, "learning_rate": 0.0001109680903731041, "loss": 0.2772, "step": 24720 }, { "epoch": 2.0026733635774465, "grad_norm": 0.0550805926322937, "learning_rate": 0.00011096358972050949, "loss": 0.2452, "step": 24721 }, { "epoch": 2.0027543745949448, "grad_norm": 0.06922931969165802, "learning_rate": 0.00011095908906791486, "loss": 0.2714, "step": 24722 }, { "epoch": 2.0028353856124435, "grad_norm": 0.05844055488705635, "learning_rate": 0.00011095458841532022, "loss": 0.2589, "step": 24723 }, { "epoch": 2.0029163966299417, "grad_norm": 0.0715651586651802, "learning_rate": 0.0001109500877627256, "loss": 0.2519, "step": 24724 }, { "epoch": 2.00299740764744, "grad_norm": 0.056815255433321, "learning_rate": 0.00011094558711013097, "loss": 0.2375, "step": 24725 }, { "epoch": 2.0030784186649386, "grad_norm": 0.07449610531330109, "learning_rate": 0.00011094108645753636, "loss": 0.2598, "step": 24726 }, { "epoch": 2.003159429682437, "grad_norm": 0.059494923800230026, "learning_rate": 0.00011093658580494173, "loss": 0.2419, "step": 24727 }, { "epoch": 2.003240440699935, "grad_norm": 0.06169473007321358, "learning_rate": 0.0001109320851523471, "loss": 0.3016, "step": 24728 }, { "epoch": 2.0033214517174334, "grad_norm": 0.05887840688228607, "learning_rate": 0.00011092758449975246, "loss": 0.2653, "step": 24729 }, { "epoch": 2.003402462734932, "grad_norm": 0.05909749120473862, "learning_rate": 0.00011092308384715784, "loss": 0.2754, "step": 24730 }, { "epoch": 2.0034834737524303, "grad_norm": 0.05189096927642822, "learning_rate": 0.00011091858319456321, "loss": 0.2508, "step": 24731 }, { "epoch": 2.0035644847699285, "grad_norm": 0.0566328763961792, "learning_rate": 0.0001109140825419686, "loss": 0.2704, "step": 24732 }, { "epoch": 2.0036454957874272, "grad_norm": 0.060474324971437454, "learning_rate": 0.00011090958188937397, "loss": 0.2717, "step": 24733 }, { "epoch": 2.0037265068049255, "grad_norm": 0.07984836399555206, "learning_rate": 0.00011090508123677934, "loss": 0.3446, "step": 24734 }, { "epoch": 2.0038075178224237, "grad_norm": 0.056504566222429276, "learning_rate": 0.0001109005805841847, "loss": 0.2756, "step": 24735 }, { "epoch": 2.0038885288399224, "grad_norm": 0.05933867767453194, "learning_rate": 0.00011089607993159008, "loss": 0.2581, "step": 24736 }, { "epoch": 2.0039695398574207, "grad_norm": 0.06576526910066605, "learning_rate": 0.00011089157927899545, "loss": 0.2791, "step": 24737 }, { "epoch": 2.004050550874919, "grad_norm": 0.06752409785985947, "learning_rate": 0.00011088707862640084, "loss": 0.2283, "step": 24738 }, { "epoch": 2.0041315618924176, "grad_norm": 0.06522194296121597, "learning_rate": 0.00011088257797380621, "loss": 0.2802, "step": 24739 }, { "epoch": 2.004212572909916, "grad_norm": 0.05219874158501625, "learning_rate": 0.00011087807732121159, "loss": 0.2693, "step": 24740 }, { "epoch": 2.004293583927414, "grad_norm": 0.06683573126792908, "learning_rate": 0.00011087357666861695, "loss": 0.2921, "step": 24741 }, { "epoch": 2.0043745949449123, "grad_norm": 0.04907495528459549, "learning_rate": 0.00011086907601602232, "loss": 0.2185, "step": 24742 }, { "epoch": 2.004455605962411, "grad_norm": 0.06387689709663391, "learning_rate": 0.00011086457536342769, "loss": 0.2467, "step": 24743 }, { "epoch": 2.0045366169799093, "grad_norm": 0.0585172064602375, "learning_rate": 0.00011086007471083308, "loss": 0.2231, "step": 24744 }, { "epoch": 2.0046176279974075, "grad_norm": 0.059628698974847794, "learning_rate": 0.00011085557405823845, "loss": 0.2674, "step": 24745 }, { "epoch": 2.004698639014906, "grad_norm": 0.05630394071340561, "learning_rate": 0.00011085107340564383, "loss": 0.2594, "step": 24746 }, { "epoch": 2.0047796500324044, "grad_norm": 0.06501825153827667, "learning_rate": 0.00011084657275304919, "loss": 0.2527, "step": 24747 }, { "epoch": 2.0048606610499027, "grad_norm": 0.07488704472780228, "learning_rate": 0.00011084207210045456, "loss": 0.2559, "step": 24748 }, { "epoch": 2.0049416720674014, "grad_norm": 0.06376083940267563, "learning_rate": 0.00011083757144785996, "loss": 0.2598, "step": 24749 }, { "epoch": 2.0050226830848996, "grad_norm": 0.057747989892959595, "learning_rate": 0.00011083307079526532, "loss": 0.22, "step": 24750 }, { "epoch": 2.005103694102398, "grad_norm": 0.07557959854602814, "learning_rate": 0.0001108285701426707, "loss": 0.2561, "step": 24751 }, { "epoch": 2.005184705119896, "grad_norm": 0.07208617031574249, "learning_rate": 0.00011082406949007607, "loss": 0.2967, "step": 24752 }, { "epoch": 2.005265716137395, "grad_norm": 0.058110445737838745, "learning_rate": 0.00011081956883748144, "loss": 0.2892, "step": 24753 }, { "epoch": 2.005346727154893, "grad_norm": 0.05751382187008858, "learning_rate": 0.0001108150681848868, "loss": 0.2578, "step": 24754 }, { "epoch": 2.0054277381723913, "grad_norm": 0.051433827728033066, "learning_rate": 0.0001108105675322922, "loss": 0.2281, "step": 24755 }, { "epoch": 2.00550874918989, "grad_norm": 0.06940347701311111, "learning_rate": 0.00011080606687969756, "loss": 0.2757, "step": 24756 }, { "epoch": 2.005589760207388, "grad_norm": 0.05864989757537842, "learning_rate": 0.00011080156622710294, "loss": 0.244, "step": 24757 }, { "epoch": 2.0056707712248865, "grad_norm": 0.06956269592046738, "learning_rate": 0.00011079706557450831, "loss": 0.256, "step": 24758 }, { "epoch": 2.005751782242385, "grad_norm": 0.06806553900241852, "learning_rate": 0.00011079256492191368, "loss": 0.2792, "step": 24759 }, { "epoch": 2.0058327932598834, "grad_norm": 0.05740509182214737, "learning_rate": 0.00011078806426931904, "loss": 0.2826, "step": 24760 }, { "epoch": 2.0059138042773816, "grad_norm": 0.0648796334862709, "learning_rate": 0.00011078356361672444, "loss": 0.2743, "step": 24761 }, { "epoch": 2.00599481529488, "grad_norm": 0.06351775676012039, "learning_rate": 0.0001107790629641298, "loss": 0.2894, "step": 24762 }, { "epoch": 2.0060758263123786, "grad_norm": 0.07129117846488953, "learning_rate": 0.00011077456231153518, "loss": 0.2419, "step": 24763 }, { "epoch": 2.006156837329877, "grad_norm": 0.05522260442376137, "learning_rate": 0.00011077006165894055, "loss": 0.2666, "step": 24764 }, { "epoch": 2.006237848347375, "grad_norm": 0.07258357107639313, "learning_rate": 0.00011076556100634593, "loss": 0.2418, "step": 24765 }, { "epoch": 2.0063188593648738, "grad_norm": 0.0612257681787014, "learning_rate": 0.00011076106035375129, "loss": 0.2624, "step": 24766 }, { "epoch": 2.006399870382372, "grad_norm": 0.056269869208335876, "learning_rate": 0.00011075655970115669, "loss": 0.2558, "step": 24767 }, { "epoch": 2.0064808813998702, "grad_norm": 0.058652397245168686, "learning_rate": 0.00011075205904856205, "loss": 0.2731, "step": 24768 }, { "epoch": 2.006561892417369, "grad_norm": 0.05787859112024307, "learning_rate": 0.00011074755839596742, "loss": 0.259, "step": 24769 }, { "epoch": 2.006642903434867, "grad_norm": 0.07301168143749237, "learning_rate": 0.0001107430577433728, "loss": 0.2736, "step": 24770 }, { "epoch": 2.0067239144523654, "grad_norm": 0.05876472592353821, "learning_rate": 0.00011073855709077817, "loss": 0.2436, "step": 24771 }, { "epoch": 2.006804925469864, "grad_norm": 0.06376785039901733, "learning_rate": 0.00011073405643818353, "loss": 0.2371, "step": 24772 }, { "epoch": 2.0068859364873624, "grad_norm": 0.05607694014906883, "learning_rate": 0.00011072955578558893, "loss": 0.2826, "step": 24773 }, { "epoch": 2.0069669475048606, "grad_norm": 0.06324949115514755, "learning_rate": 0.00011072505513299429, "loss": 0.2269, "step": 24774 }, { "epoch": 2.007047958522359, "grad_norm": 0.06031335890293121, "learning_rate": 0.00011072055448039966, "loss": 0.2686, "step": 24775 }, { "epoch": 2.0071289695398575, "grad_norm": 0.04615411534905434, "learning_rate": 0.00011071605382780504, "loss": 0.2397, "step": 24776 }, { "epoch": 2.0072099805573558, "grad_norm": 0.06102414056658745, "learning_rate": 0.00011071155317521041, "loss": 0.305, "step": 24777 }, { "epoch": 2.007290991574854, "grad_norm": 0.05530553683638573, "learning_rate": 0.0001107070525226158, "loss": 0.2567, "step": 24778 }, { "epoch": 2.0073720025923527, "grad_norm": 0.05917354300618172, "learning_rate": 0.00011070255187002117, "loss": 0.2733, "step": 24779 }, { "epoch": 2.007453013609851, "grad_norm": 0.06167829409241676, "learning_rate": 0.00011069805121742653, "loss": 0.2323, "step": 24780 }, { "epoch": 2.007534024627349, "grad_norm": 0.06296923011541367, "learning_rate": 0.0001106935505648319, "loss": 0.2097, "step": 24781 }, { "epoch": 2.007615035644848, "grad_norm": 0.05603337287902832, "learning_rate": 0.00011068904991223728, "loss": 0.2307, "step": 24782 }, { "epoch": 2.007696046662346, "grad_norm": 0.0774034634232521, "learning_rate": 0.00011068454925964265, "loss": 0.2663, "step": 24783 }, { "epoch": 2.0077770576798444, "grad_norm": 0.06689009070396423, "learning_rate": 0.00011068004860704804, "loss": 0.2797, "step": 24784 }, { "epoch": 2.0078580686973426, "grad_norm": 0.05564473941922188, "learning_rate": 0.00011067554795445341, "loss": 0.2234, "step": 24785 }, { "epoch": 2.0079390797148413, "grad_norm": 0.06797027587890625, "learning_rate": 0.00011067104730185877, "loss": 0.3027, "step": 24786 }, { "epoch": 2.0080200907323396, "grad_norm": 0.061519429087638855, "learning_rate": 0.00011066654664926414, "loss": 0.2575, "step": 24787 }, { "epoch": 2.008101101749838, "grad_norm": 0.06355039775371552, "learning_rate": 0.00011066204599666952, "loss": 0.2481, "step": 24788 }, { "epoch": 2.0081821127673365, "grad_norm": 0.05140797793865204, "learning_rate": 0.00011065754534407489, "loss": 0.2388, "step": 24789 }, { "epoch": 2.0082631237848347, "grad_norm": 0.0660591721534729, "learning_rate": 0.00011065304469148028, "loss": 0.2364, "step": 24790 }, { "epoch": 2.008344134802333, "grad_norm": 0.062273040413856506, "learning_rate": 0.00011064854403888565, "loss": 0.2608, "step": 24791 }, { "epoch": 2.0084251458198317, "grad_norm": 0.059555307030677795, "learning_rate": 0.00011064404338629101, "loss": 0.2682, "step": 24792 }, { "epoch": 2.00850615683733, "grad_norm": 0.06148289144039154, "learning_rate": 0.00011063954273369639, "loss": 0.2812, "step": 24793 }, { "epoch": 2.008587167854828, "grad_norm": 0.057390399277210236, "learning_rate": 0.00011063504208110176, "loss": 0.2341, "step": 24794 }, { "epoch": 2.008668178872327, "grad_norm": 0.06243424862623215, "learning_rate": 0.00011063054142850713, "loss": 0.2291, "step": 24795 }, { "epoch": 2.008749189889825, "grad_norm": 0.055271074175834656, "learning_rate": 0.00011062604077591252, "loss": 0.2808, "step": 24796 }, { "epoch": 2.0088302009073233, "grad_norm": 0.05574141442775726, "learning_rate": 0.0001106215401233179, "loss": 0.2327, "step": 24797 }, { "epoch": 2.0089112119248216, "grad_norm": 0.055914100259542465, "learning_rate": 0.00011061703947072325, "loss": 0.2444, "step": 24798 }, { "epoch": 2.0089922229423203, "grad_norm": 0.0710621103644371, "learning_rate": 0.00011061253881812863, "loss": 0.2514, "step": 24799 }, { "epoch": 2.0090732339598185, "grad_norm": 0.06601139158010483, "learning_rate": 0.000110608038165534, "loss": 0.2553, "step": 24800 }, { "epoch": 2.0091542449773168, "grad_norm": 0.052085116505622864, "learning_rate": 0.00011060353751293939, "loss": 0.2459, "step": 24801 }, { "epoch": 2.0092352559948155, "grad_norm": 0.060282688587903976, "learning_rate": 0.00011059903686034476, "loss": 0.2479, "step": 24802 }, { "epoch": 2.0093162670123137, "grad_norm": 0.05750880390405655, "learning_rate": 0.00011059453620775014, "loss": 0.294, "step": 24803 }, { "epoch": 2.009397278029812, "grad_norm": 0.0854443684220314, "learning_rate": 0.0001105900355551555, "loss": 0.2245, "step": 24804 }, { "epoch": 2.0094782890473106, "grad_norm": 0.05768865346908569, "learning_rate": 0.00011058553490256087, "loss": 0.2389, "step": 24805 }, { "epoch": 2.009559300064809, "grad_norm": 0.06694091856479645, "learning_rate": 0.00011058103424996624, "loss": 0.2886, "step": 24806 }, { "epoch": 2.009640311082307, "grad_norm": 0.06841512769460678, "learning_rate": 0.00011057653359737163, "loss": 0.2861, "step": 24807 }, { "epoch": 2.0097213220998054, "grad_norm": 0.051087573170661926, "learning_rate": 0.000110572032944777, "loss": 0.2722, "step": 24808 }, { "epoch": 2.009802333117304, "grad_norm": 0.05654599517583847, "learning_rate": 0.00011056753229218238, "loss": 0.2624, "step": 24809 }, { "epoch": 2.0098833441348023, "grad_norm": 0.06250681728124619, "learning_rate": 0.00011056303163958774, "loss": 0.2876, "step": 24810 }, { "epoch": 2.0099643551523005, "grad_norm": 0.06365267187356949, "learning_rate": 0.00011055853098699311, "loss": 0.2541, "step": 24811 }, { "epoch": 2.0100453661697992, "grad_norm": 0.06928987056016922, "learning_rate": 0.00011055403033439848, "loss": 0.2938, "step": 24812 }, { "epoch": 2.0101263771872975, "grad_norm": 0.05956394597887993, "learning_rate": 0.00011054952968180387, "loss": 0.2443, "step": 24813 }, { "epoch": 2.0102073882047957, "grad_norm": 0.06563542038202286, "learning_rate": 0.00011054502902920925, "loss": 0.2706, "step": 24814 }, { "epoch": 2.0102883992222944, "grad_norm": 0.07303499430418015, "learning_rate": 0.00011054052837661462, "loss": 0.2691, "step": 24815 }, { "epoch": 2.0103694102397927, "grad_norm": 0.05468272790312767, "learning_rate": 0.00011053602772401998, "loss": 0.2536, "step": 24816 }, { "epoch": 2.010450421257291, "grad_norm": 0.059382714331150055, "learning_rate": 0.00011053152707142535, "loss": 0.2385, "step": 24817 }, { "epoch": 2.0105314322747896, "grad_norm": 0.06693161278963089, "learning_rate": 0.00011052702641883073, "loss": 0.2505, "step": 24818 }, { "epoch": 2.010612443292288, "grad_norm": 0.05936438590288162, "learning_rate": 0.00011052252576623611, "loss": 0.2067, "step": 24819 }, { "epoch": 2.010693454309786, "grad_norm": 0.07057078927755356, "learning_rate": 0.00011051802511364149, "loss": 0.2758, "step": 24820 }, { "epoch": 2.0107744653272843, "grad_norm": 0.06983327120542526, "learning_rate": 0.00011051352446104686, "loss": 0.2795, "step": 24821 }, { "epoch": 2.010855476344783, "grad_norm": 0.052694715559482574, "learning_rate": 0.00011050902380845223, "loss": 0.2565, "step": 24822 }, { "epoch": 2.0109364873622813, "grad_norm": 0.059741996228694916, "learning_rate": 0.0001105045231558576, "loss": 0.2474, "step": 24823 }, { "epoch": 2.0110174983797795, "grad_norm": 0.06866106390953064, "learning_rate": 0.00011050002250326297, "loss": 0.2669, "step": 24824 }, { "epoch": 2.011098509397278, "grad_norm": 0.06021895259618759, "learning_rate": 0.00011049552185066836, "loss": 0.2585, "step": 24825 }, { "epoch": 2.0111795204147764, "grad_norm": 0.05985042080283165, "learning_rate": 0.00011049102119807373, "loss": 0.2671, "step": 24826 }, { "epoch": 2.0112605314322747, "grad_norm": 0.05347612127661705, "learning_rate": 0.0001104865205454791, "loss": 0.2408, "step": 24827 }, { "epoch": 2.0113415424497734, "grad_norm": 0.07350436598062515, "learning_rate": 0.00011048201989288448, "loss": 0.2614, "step": 24828 }, { "epoch": 2.0114225534672716, "grad_norm": 0.05584220588207245, "learning_rate": 0.00011047751924028984, "loss": 0.2454, "step": 24829 }, { "epoch": 2.01150356448477, "grad_norm": 0.05780201032757759, "learning_rate": 0.00011047301858769524, "loss": 0.2406, "step": 24830 }, { "epoch": 2.011584575502268, "grad_norm": 0.06727975606918335, "learning_rate": 0.0001104685179351006, "loss": 0.2837, "step": 24831 }, { "epoch": 2.011665586519767, "grad_norm": 0.050393763929605484, "learning_rate": 0.00011046401728250597, "loss": 0.2185, "step": 24832 }, { "epoch": 2.011746597537265, "grad_norm": 0.05321752279996872, "learning_rate": 0.00011045951662991134, "loss": 0.2333, "step": 24833 }, { "epoch": 2.0118276085547633, "grad_norm": 0.058083198964595795, "learning_rate": 0.00011045501597731672, "loss": 0.2934, "step": 24834 }, { "epoch": 2.011908619572262, "grad_norm": 0.05851643532514572, "learning_rate": 0.00011045051532472208, "loss": 0.2604, "step": 24835 }, { "epoch": 2.01198963058976, "grad_norm": 0.07460993528366089, "learning_rate": 0.00011044601467212748, "loss": 0.2684, "step": 24836 }, { "epoch": 2.0120706416072585, "grad_norm": 0.05338436737656593, "learning_rate": 0.00011044151401953284, "loss": 0.2467, "step": 24837 }, { "epoch": 2.012151652624757, "grad_norm": 0.07232926785945892, "learning_rate": 0.00011043701336693821, "loss": 0.2798, "step": 24838 }, { "epoch": 2.0122326636422554, "grad_norm": 0.06704601645469666, "learning_rate": 0.00011043251271434359, "loss": 0.2967, "step": 24839 }, { "epoch": 2.0123136746597536, "grad_norm": 0.04931149259209633, "learning_rate": 0.00011042801206174896, "loss": 0.2283, "step": 24840 }, { "epoch": 2.0123946856772523, "grad_norm": 0.07212699204683304, "learning_rate": 0.00011042351140915432, "loss": 0.2677, "step": 24841 }, { "epoch": 2.0124756966947506, "grad_norm": 0.06866638362407684, "learning_rate": 0.00011041901075655972, "loss": 0.2856, "step": 24842 }, { "epoch": 2.012556707712249, "grad_norm": 0.060009896755218506, "learning_rate": 0.00011041451010396508, "loss": 0.2556, "step": 24843 }, { "epoch": 2.012637718729747, "grad_norm": 0.05222009867429733, "learning_rate": 0.00011041000945137045, "loss": 0.2415, "step": 24844 }, { "epoch": 2.0127187297472457, "grad_norm": 0.0565890334546566, "learning_rate": 0.00011040550879877583, "loss": 0.225, "step": 24845 }, { "epoch": 2.012799740764744, "grad_norm": 0.05756883695721626, "learning_rate": 0.0001104010081461812, "loss": 0.2198, "step": 24846 }, { "epoch": 2.0128807517822422, "grad_norm": 0.08347923308610916, "learning_rate": 0.00011039650749358656, "loss": 0.2808, "step": 24847 }, { "epoch": 2.012961762799741, "grad_norm": 0.06518597900867462, "learning_rate": 0.00011039200684099196, "loss": 0.2887, "step": 24848 }, { "epoch": 2.013042773817239, "grad_norm": 0.05346430093050003, "learning_rate": 0.00011038750618839732, "loss": 0.2629, "step": 24849 }, { "epoch": 2.0131237848347374, "grad_norm": 0.060417596250772476, "learning_rate": 0.0001103830055358027, "loss": 0.2502, "step": 24850 }, { "epoch": 2.013204795852236, "grad_norm": 0.05704808607697487, "learning_rate": 0.00011037850488320807, "loss": 0.2798, "step": 24851 }, { "epoch": 2.0132858068697344, "grad_norm": 0.07693468779325485, "learning_rate": 0.00011037400423061344, "loss": 0.2504, "step": 24852 }, { "epoch": 2.0133668178872326, "grad_norm": 0.06132509186863899, "learning_rate": 0.0001103695035780188, "loss": 0.2554, "step": 24853 }, { "epoch": 2.013447828904731, "grad_norm": 0.06517904996871948, "learning_rate": 0.0001103650029254242, "loss": 0.2751, "step": 24854 }, { "epoch": 2.0135288399222295, "grad_norm": 0.0628628209233284, "learning_rate": 0.00011036050227282956, "loss": 0.2956, "step": 24855 }, { "epoch": 2.0136098509397278, "grad_norm": 0.06579403579235077, "learning_rate": 0.00011035600162023494, "loss": 0.2664, "step": 24856 }, { "epoch": 2.013690861957226, "grad_norm": 0.052854299545288086, "learning_rate": 0.00011035150096764031, "loss": 0.2618, "step": 24857 }, { "epoch": 2.0137718729747247, "grad_norm": 0.055037494748830795, "learning_rate": 0.00011034700031504568, "loss": 0.2117, "step": 24858 }, { "epoch": 2.013852883992223, "grad_norm": 0.07010837644338608, "learning_rate": 0.00011034249966245107, "loss": 0.2582, "step": 24859 }, { "epoch": 2.013933895009721, "grad_norm": 0.06331303715705872, "learning_rate": 0.00011033799900985644, "loss": 0.2429, "step": 24860 }, { "epoch": 2.01401490602722, "grad_norm": 0.06630232185125351, "learning_rate": 0.0001103334983572618, "loss": 0.2881, "step": 24861 }, { "epoch": 2.014095917044718, "grad_norm": 0.07510954141616821, "learning_rate": 0.00011032899770466718, "loss": 0.2941, "step": 24862 }, { "epoch": 2.0141769280622164, "grad_norm": 0.06895710527896881, "learning_rate": 0.00011032449705207255, "loss": 0.2736, "step": 24863 }, { "epoch": 2.0142579390797146, "grad_norm": 0.054260727018117905, "learning_rate": 0.00011031999639947793, "loss": 0.233, "step": 24864 }, { "epoch": 2.0143389500972133, "grad_norm": 0.07401958107948303, "learning_rate": 0.00011031549574688331, "loss": 0.289, "step": 24865 }, { "epoch": 2.0144199611147116, "grad_norm": 0.06323473900556564, "learning_rate": 0.00011031099509428869, "loss": 0.2472, "step": 24866 }, { "epoch": 2.01450097213221, "grad_norm": 0.07643520087003708, "learning_rate": 0.00011030649444169405, "loss": 0.2777, "step": 24867 }, { "epoch": 2.0145819831497085, "grad_norm": 0.06905188411474228, "learning_rate": 0.00011030199378909942, "loss": 0.2359, "step": 24868 }, { "epoch": 2.0146629941672067, "grad_norm": 0.06276547908782959, "learning_rate": 0.0001102974931365048, "loss": 0.2813, "step": 24869 }, { "epoch": 2.014744005184705, "grad_norm": 0.06353870034217834, "learning_rate": 0.00011029299248391017, "loss": 0.2542, "step": 24870 }, { "epoch": 2.0148250162022037, "grad_norm": 0.0546317994594574, "learning_rate": 0.00011028849183131555, "loss": 0.2887, "step": 24871 }, { "epoch": 2.014906027219702, "grad_norm": 0.06614603102207184, "learning_rate": 0.00011028399117872093, "loss": 0.2699, "step": 24872 }, { "epoch": 2.0149870382372, "grad_norm": 0.048714540898799896, "learning_rate": 0.00011027949052612629, "loss": 0.2503, "step": 24873 }, { "epoch": 2.015068049254699, "grad_norm": 0.06008073687553406, "learning_rate": 0.00011027498987353166, "loss": 0.2622, "step": 24874 }, { "epoch": 2.015149060272197, "grad_norm": 0.07386930286884308, "learning_rate": 0.00011027048922093704, "loss": 0.2689, "step": 24875 }, { "epoch": 2.0152300712896953, "grad_norm": 0.060236118733882904, "learning_rate": 0.00011026598856834241, "loss": 0.2504, "step": 24876 }, { "epoch": 2.0153110823071936, "grad_norm": 0.0673976019024849, "learning_rate": 0.0001102614879157478, "loss": 0.2707, "step": 24877 }, { "epoch": 2.0153920933246923, "grad_norm": 0.060310475528240204, "learning_rate": 0.00011025698726315317, "loss": 0.2727, "step": 24878 }, { "epoch": 2.0154731043421905, "grad_norm": 0.06076750531792641, "learning_rate": 0.00011025248661055853, "loss": 0.2529, "step": 24879 }, { "epoch": 2.0155541153596888, "grad_norm": 0.06271596997976303, "learning_rate": 0.0001102479859579639, "loss": 0.2426, "step": 24880 }, { "epoch": 2.0156351263771874, "grad_norm": 0.062388744205236435, "learning_rate": 0.00011024348530536928, "loss": 0.2621, "step": 24881 }, { "epoch": 2.0157161373946857, "grad_norm": 0.051883265376091, "learning_rate": 0.00011023898465277466, "loss": 0.2425, "step": 24882 }, { "epoch": 2.015797148412184, "grad_norm": 0.0597045011818409, "learning_rate": 0.00011023448400018004, "loss": 0.2831, "step": 24883 }, { "epoch": 2.0158781594296826, "grad_norm": 0.06017323583364487, "learning_rate": 0.00011022998334758541, "loss": 0.2742, "step": 24884 }, { "epoch": 2.015959170447181, "grad_norm": 0.05535883083939552, "learning_rate": 0.00011022548269499077, "loss": 0.3075, "step": 24885 }, { "epoch": 2.016040181464679, "grad_norm": 0.06221333518624306, "learning_rate": 0.00011022098204239615, "loss": 0.2893, "step": 24886 }, { "epoch": 2.0161211924821774, "grad_norm": 0.06521249562501907, "learning_rate": 0.00011021648138980152, "loss": 0.2463, "step": 24887 }, { "epoch": 2.016202203499676, "grad_norm": 0.07503092288970947, "learning_rate": 0.0001102119807372069, "loss": 0.2656, "step": 24888 }, { "epoch": 2.0162832145171743, "grad_norm": 0.07238157838582993, "learning_rate": 0.00011020748008461228, "loss": 0.2772, "step": 24889 }, { "epoch": 2.0163642255346725, "grad_norm": 0.057459212839603424, "learning_rate": 0.00011020297943201765, "loss": 0.2264, "step": 24890 }, { "epoch": 2.0164452365521712, "grad_norm": 0.07599131017923355, "learning_rate": 0.00011019847877942303, "loss": 0.2923, "step": 24891 }, { "epoch": 2.0165262475696695, "grad_norm": 0.053455375134944916, "learning_rate": 0.00011019397812682839, "loss": 0.2342, "step": 24892 }, { "epoch": 2.0166072585871677, "grad_norm": 0.08646205067634583, "learning_rate": 0.00011018947747423376, "loss": 0.3353, "step": 24893 }, { "epoch": 2.0166882696046664, "grad_norm": 0.04716218635439873, "learning_rate": 0.00011018497682163915, "loss": 0.2437, "step": 24894 }, { "epoch": 2.0167692806221647, "grad_norm": 0.06640686839818954, "learning_rate": 0.00011018047616904452, "loss": 0.2363, "step": 24895 }, { "epoch": 2.016850291639663, "grad_norm": 0.055529430508613586, "learning_rate": 0.0001101759755164499, "loss": 0.2743, "step": 24896 }, { "epoch": 2.0169313026571616, "grad_norm": 0.06658606231212616, "learning_rate": 0.00011017147486385527, "loss": 0.2619, "step": 24897 }, { "epoch": 2.01701231367466, "grad_norm": 0.0485866405069828, "learning_rate": 0.00011016697421126063, "loss": 0.2371, "step": 24898 }, { "epoch": 2.017093324692158, "grad_norm": 0.062041107565164566, "learning_rate": 0.000110162473558666, "loss": 0.2701, "step": 24899 }, { "epoch": 2.0171743357096563, "grad_norm": 0.06547149270772934, "learning_rate": 0.00011015797290607139, "loss": 0.2669, "step": 24900 }, { "epoch": 2.017255346727155, "grad_norm": 0.05686812102794647, "learning_rate": 0.00011015347225347676, "loss": 0.2694, "step": 24901 }, { "epoch": 2.0173363577446533, "grad_norm": 0.06172237545251846, "learning_rate": 0.00011014897160088214, "loss": 0.241, "step": 24902 }, { "epoch": 2.0174173687621515, "grad_norm": 0.05941611900925636, "learning_rate": 0.00011014447094828751, "loss": 0.2788, "step": 24903 }, { "epoch": 2.01749837977965, "grad_norm": 0.08111029118299484, "learning_rate": 0.00011013997029569287, "loss": 0.3124, "step": 24904 }, { "epoch": 2.0175793907971484, "grad_norm": 0.04909438639879227, "learning_rate": 0.00011013546964309824, "loss": 0.2268, "step": 24905 }, { "epoch": 2.0176604018146467, "grad_norm": 0.06682118028402328, "learning_rate": 0.00011013096899050363, "loss": 0.2574, "step": 24906 }, { "epoch": 2.0177414128321454, "grad_norm": 0.06406114250421524, "learning_rate": 0.000110126468337909, "loss": 0.2469, "step": 24907 }, { "epoch": 2.0178224238496436, "grad_norm": 0.059632740914821625, "learning_rate": 0.00011012196768531438, "loss": 0.2702, "step": 24908 }, { "epoch": 2.017903434867142, "grad_norm": 0.06329367309808731, "learning_rate": 0.00011011746703271975, "loss": 0.2823, "step": 24909 }, { "epoch": 2.01798444588464, "grad_norm": 0.06427815556526184, "learning_rate": 0.00011011296638012511, "loss": 0.2771, "step": 24910 }, { "epoch": 2.018065456902139, "grad_norm": 0.06586452573537827, "learning_rate": 0.00011010846572753051, "loss": 0.25, "step": 24911 }, { "epoch": 2.018146467919637, "grad_norm": 0.05930725485086441, "learning_rate": 0.00011010396507493587, "loss": 0.2281, "step": 24912 }, { "epoch": 2.0182274789371353, "grad_norm": 0.062091536819934845, "learning_rate": 0.00011009946442234125, "loss": 0.2609, "step": 24913 }, { "epoch": 2.018308489954634, "grad_norm": 0.048801861703395844, "learning_rate": 0.00011009496376974662, "loss": 0.2532, "step": 24914 }, { "epoch": 2.018389500972132, "grad_norm": 0.0713617280125618, "learning_rate": 0.00011009046311715199, "loss": 0.281, "step": 24915 }, { "epoch": 2.0184705119896305, "grad_norm": 0.05809679627418518, "learning_rate": 0.00011008596246455735, "loss": 0.2809, "step": 24916 }, { "epoch": 2.018551523007129, "grad_norm": 0.0762714073061943, "learning_rate": 0.00011008146181196275, "loss": 0.2538, "step": 24917 }, { "epoch": 2.0186325340246274, "grad_norm": 0.06147345155477524, "learning_rate": 0.00011007696115936811, "loss": 0.2437, "step": 24918 }, { "epoch": 2.0187135450421256, "grad_norm": 0.0637492686510086, "learning_rate": 0.00011007246050677349, "loss": 0.2619, "step": 24919 }, { "epoch": 2.0187945560596243, "grad_norm": 0.06866779178380966, "learning_rate": 0.00011006795985417886, "loss": 0.2692, "step": 24920 }, { "epoch": 2.0188755670771226, "grad_norm": 0.05443578585982323, "learning_rate": 0.00011006345920158423, "loss": 0.2322, "step": 24921 }, { "epoch": 2.018956578094621, "grad_norm": 0.0455310195684433, "learning_rate": 0.0001100589585489896, "loss": 0.2692, "step": 24922 }, { "epoch": 2.019037589112119, "grad_norm": 0.0551089383661747, "learning_rate": 0.000110054457896395, "loss": 0.2893, "step": 24923 }, { "epoch": 2.0191186001296177, "grad_norm": 0.07012448459863663, "learning_rate": 0.00011004995724380036, "loss": 0.2355, "step": 24924 }, { "epoch": 2.019199611147116, "grad_norm": 0.062949538230896, "learning_rate": 0.00011004545659120573, "loss": 0.2785, "step": 24925 }, { "epoch": 2.0192806221646142, "grad_norm": 0.06228519603610039, "learning_rate": 0.0001100409559386111, "loss": 0.2644, "step": 24926 }, { "epoch": 2.019361633182113, "grad_norm": 0.05176496133208275, "learning_rate": 0.00011003645528601648, "loss": 0.2491, "step": 24927 }, { "epoch": 2.019442644199611, "grad_norm": 0.05500460043549538, "learning_rate": 0.00011003195463342184, "loss": 0.2675, "step": 24928 }, { "epoch": 2.0195236552171094, "grad_norm": 0.07030055671930313, "learning_rate": 0.00011002745398082724, "loss": 0.2412, "step": 24929 }, { "epoch": 2.019604666234608, "grad_norm": 0.056598152965307236, "learning_rate": 0.0001100229533282326, "loss": 0.2636, "step": 24930 }, { "epoch": 2.0196856772521063, "grad_norm": 0.05294608697295189, "learning_rate": 0.00011001845267563797, "loss": 0.2312, "step": 24931 }, { "epoch": 2.0197666882696046, "grad_norm": 0.0632098987698555, "learning_rate": 0.00011001395202304334, "loss": 0.2519, "step": 24932 }, { "epoch": 2.019847699287103, "grad_norm": 0.06364619731903076, "learning_rate": 0.00011000945137044872, "loss": 0.2579, "step": 24933 }, { "epoch": 2.0199287103046015, "grad_norm": 0.06215987354516983, "learning_rate": 0.0001100049507178541, "loss": 0.2517, "step": 24934 }, { "epoch": 2.0200097213220998, "grad_norm": 0.0633561834692955, "learning_rate": 0.00011000045006525948, "loss": 0.2062, "step": 24935 }, { "epoch": 2.020090732339598, "grad_norm": 0.050913676619529724, "learning_rate": 0.00010999594941266484, "loss": 0.2084, "step": 24936 }, { "epoch": 2.0201717433570967, "grad_norm": 0.06375525146722794, "learning_rate": 0.00010999144876007021, "loss": 0.3028, "step": 24937 }, { "epoch": 2.020252754374595, "grad_norm": 0.062853142619133, "learning_rate": 0.00010998694810747559, "loss": 0.2372, "step": 24938 }, { "epoch": 2.020333765392093, "grad_norm": 0.0629112720489502, "learning_rate": 0.00010998244745488096, "loss": 0.2601, "step": 24939 }, { "epoch": 2.020414776409592, "grad_norm": 0.06098567321896553, "learning_rate": 0.00010997794680228635, "loss": 0.2289, "step": 24940 }, { "epoch": 2.02049578742709, "grad_norm": 0.058595672249794006, "learning_rate": 0.00010997344614969172, "loss": 0.2181, "step": 24941 }, { "epoch": 2.0205767984445884, "grad_norm": 0.06882533431053162, "learning_rate": 0.00010996894549709708, "loss": 0.2263, "step": 24942 }, { "epoch": 2.020657809462087, "grad_norm": 0.06504590064287186, "learning_rate": 0.00010996444484450245, "loss": 0.2492, "step": 24943 }, { "epoch": 2.0207388204795853, "grad_norm": 0.07631085067987442, "learning_rate": 0.00010995994419190783, "loss": 0.2563, "step": 24944 }, { "epoch": 2.0208198314970836, "grad_norm": 0.06929519772529602, "learning_rate": 0.0001099554435393132, "loss": 0.2693, "step": 24945 }, { "epoch": 2.020900842514582, "grad_norm": 0.07654745876789093, "learning_rate": 0.00010995094288671859, "loss": 0.2764, "step": 24946 }, { "epoch": 2.0209818535320805, "grad_norm": 0.06594858318567276, "learning_rate": 0.00010994644223412396, "loss": 0.297, "step": 24947 }, { "epoch": 2.0210628645495787, "grad_norm": 0.06843897700309753, "learning_rate": 0.00010994194158152932, "loss": 0.2662, "step": 24948 }, { "epoch": 2.021143875567077, "grad_norm": 0.060284316539764404, "learning_rate": 0.0001099374409289347, "loss": 0.2381, "step": 24949 }, { "epoch": 2.0212248865845757, "grad_norm": 0.06309103965759277, "learning_rate": 0.00010993294027634007, "loss": 0.2796, "step": 24950 }, { "epoch": 2.021305897602074, "grad_norm": 0.051141221076250076, "learning_rate": 0.00010992843962374544, "loss": 0.2192, "step": 24951 }, { "epoch": 2.021386908619572, "grad_norm": 0.05918668955564499, "learning_rate": 0.00010992393897115083, "loss": 0.276, "step": 24952 }, { "epoch": 2.021467919637071, "grad_norm": 0.0494406521320343, "learning_rate": 0.0001099194383185562, "loss": 0.2209, "step": 24953 }, { "epoch": 2.021548930654569, "grad_norm": 0.06384733319282532, "learning_rate": 0.00010991493766596156, "loss": 0.2434, "step": 24954 }, { "epoch": 2.0216299416720673, "grad_norm": 0.06195070967078209, "learning_rate": 0.00010991043701336694, "loss": 0.2492, "step": 24955 }, { "epoch": 2.0217109526895656, "grad_norm": 0.05020172521471977, "learning_rate": 0.00010990593636077231, "loss": 0.2447, "step": 24956 }, { "epoch": 2.0217919637070643, "grad_norm": 0.06323754787445068, "learning_rate": 0.00010990143570817768, "loss": 0.274, "step": 24957 }, { "epoch": 2.0218729747245625, "grad_norm": 0.05106744542717934, "learning_rate": 0.00010989693505558307, "loss": 0.2456, "step": 24958 }, { "epoch": 2.0219539857420608, "grad_norm": 0.06775747984647751, "learning_rate": 0.00010989243440298845, "loss": 0.2219, "step": 24959 }, { "epoch": 2.0220349967595594, "grad_norm": 0.05914744734764099, "learning_rate": 0.00010988793375039382, "loss": 0.2463, "step": 24960 }, { "epoch": 2.0221160077770577, "grad_norm": 0.06987558305263519, "learning_rate": 0.00010988343309779918, "loss": 0.2643, "step": 24961 }, { "epoch": 2.022197018794556, "grad_norm": 0.06686465442180634, "learning_rate": 0.00010987893244520455, "loss": 0.2868, "step": 24962 }, { "epoch": 2.0222780298120546, "grad_norm": 0.05918348953127861, "learning_rate": 0.00010987443179260994, "loss": 0.2565, "step": 24963 }, { "epoch": 2.022359040829553, "grad_norm": 0.05445210263133049, "learning_rate": 0.00010986993114001531, "loss": 0.2242, "step": 24964 }, { "epoch": 2.022440051847051, "grad_norm": 0.062487054616212845, "learning_rate": 0.00010986543048742069, "loss": 0.2482, "step": 24965 }, { "epoch": 2.0225210628645494, "grad_norm": 0.053661637008190155, "learning_rate": 0.00010986092983482606, "loss": 0.2275, "step": 24966 }, { "epoch": 2.022602073882048, "grad_norm": 0.05276666209101677, "learning_rate": 0.00010985642918223142, "loss": 0.206, "step": 24967 }, { "epoch": 2.0226830848995463, "grad_norm": 0.053006209433078766, "learning_rate": 0.0001098519285296368, "loss": 0.2452, "step": 24968 }, { "epoch": 2.0227640959170445, "grad_norm": 0.06032721698284149, "learning_rate": 0.00010984742787704218, "loss": 0.2698, "step": 24969 }, { "epoch": 2.0228451069345432, "grad_norm": 0.06984737515449524, "learning_rate": 0.00010984292722444755, "loss": 0.3158, "step": 24970 }, { "epoch": 2.0229261179520415, "grad_norm": 0.07318402081727982, "learning_rate": 0.00010983842657185293, "loss": 0.2687, "step": 24971 }, { "epoch": 2.0230071289695397, "grad_norm": 0.05291476845741272, "learning_rate": 0.0001098339259192583, "loss": 0.2432, "step": 24972 }, { "epoch": 2.0230881399870384, "grad_norm": 0.051952771842479706, "learning_rate": 0.00010982942526666366, "loss": 0.223, "step": 24973 }, { "epoch": 2.0231691510045366, "grad_norm": 0.05664176493883133, "learning_rate": 0.00010982492461406904, "loss": 0.2493, "step": 24974 }, { "epoch": 2.023250162022035, "grad_norm": 0.05760600045323372, "learning_rate": 0.00010982042396147442, "loss": 0.256, "step": 24975 }, { "epoch": 2.0233311730395336, "grad_norm": 0.0646364763379097, "learning_rate": 0.0001098159233088798, "loss": 0.2838, "step": 24976 }, { "epoch": 2.023412184057032, "grad_norm": 0.05765828117728233, "learning_rate": 0.00010981142265628517, "loss": 0.2238, "step": 24977 }, { "epoch": 2.02349319507453, "grad_norm": 0.05723297595977783, "learning_rate": 0.00010980692200369054, "loss": 0.2353, "step": 24978 }, { "epoch": 2.0235742060920283, "grad_norm": 0.052987512201070786, "learning_rate": 0.0001098024213510959, "loss": 0.249, "step": 24979 }, { "epoch": 2.023655217109527, "grad_norm": 0.0625142753124237, "learning_rate": 0.00010979792069850128, "loss": 0.2637, "step": 24980 }, { "epoch": 2.0237362281270252, "grad_norm": 0.056187573820352554, "learning_rate": 0.00010979342004590666, "loss": 0.2558, "step": 24981 }, { "epoch": 2.0238172391445235, "grad_norm": 0.054129134863615036, "learning_rate": 0.00010978891939331204, "loss": 0.2429, "step": 24982 }, { "epoch": 2.023898250162022, "grad_norm": 0.062188971787691116, "learning_rate": 0.00010978441874071741, "loss": 0.2804, "step": 24983 }, { "epoch": 2.0239792611795204, "grad_norm": 0.0643451139330864, "learning_rate": 0.00010977991808812279, "loss": 0.2458, "step": 24984 }, { "epoch": 2.0240602721970187, "grad_norm": 0.06608925759792328, "learning_rate": 0.00010977541743552815, "loss": 0.2601, "step": 24985 }, { "epoch": 2.0241412832145174, "grad_norm": 0.0593901164829731, "learning_rate": 0.00010977091678293355, "loss": 0.2873, "step": 24986 }, { "epoch": 2.0242222942320156, "grad_norm": 0.059159524738788605, "learning_rate": 0.0001097664161303389, "loss": 0.2723, "step": 24987 }, { "epoch": 2.024303305249514, "grad_norm": 0.056645944714546204, "learning_rate": 0.00010976191547774428, "loss": 0.2177, "step": 24988 }, { "epoch": 2.024384316267012, "grad_norm": 0.0677858516573906, "learning_rate": 0.00010975741482514965, "loss": 0.2809, "step": 24989 }, { "epoch": 2.024465327284511, "grad_norm": 0.06798025220632553, "learning_rate": 0.00010975291417255503, "loss": 0.2814, "step": 24990 }, { "epoch": 2.024546338302009, "grad_norm": 0.04821959137916565, "learning_rate": 0.00010974841351996039, "loss": 0.2121, "step": 24991 }, { "epoch": 2.0246273493195073, "grad_norm": 0.07496128231287003, "learning_rate": 0.00010974391286736579, "loss": 0.2722, "step": 24992 }, { "epoch": 2.024708360337006, "grad_norm": 0.05031698942184448, "learning_rate": 0.00010973941221477115, "loss": 0.2518, "step": 24993 }, { "epoch": 2.024789371354504, "grad_norm": 0.05619891732931137, "learning_rate": 0.00010973491156217652, "loss": 0.2405, "step": 24994 }, { "epoch": 2.0248703823720025, "grad_norm": 0.06708116084337234, "learning_rate": 0.0001097304109095819, "loss": 0.2418, "step": 24995 }, { "epoch": 2.024951393389501, "grad_norm": 0.0597895085811615, "learning_rate": 0.00010972591025698727, "loss": 0.2578, "step": 24996 }, { "epoch": 2.0250324044069994, "grad_norm": 0.06456056237220764, "learning_rate": 0.00010972140960439263, "loss": 0.2667, "step": 24997 }, { "epoch": 2.0251134154244976, "grad_norm": 0.06623676419258118, "learning_rate": 0.00010971690895179803, "loss": 0.2774, "step": 24998 }, { "epoch": 2.0251944264419963, "grad_norm": 0.06833084672689438, "learning_rate": 0.00010971240829920339, "loss": 0.2838, "step": 24999 }, { "epoch": 2.0252754374594946, "grad_norm": 0.05819803103804588, "learning_rate": 0.00010970790764660876, "loss": 0.2698, "step": 25000 }, { "epoch": 2.025356448476993, "grad_norm": 0.07516606152057648, "learning_rate": 0.00010970340699401414, "loss": 0.2881, "step": 25001 }, { "epoch": 2.025437459494491, "grad_norm": 0.0686899870634079, "learning_rate": 0.00010969890634141951, "loss": 0.2546, "step": 25002 }, { "epoch": 2.0255184705119897, "grad_norm": 0.05174567550420761, "learning_rate": 0.00010969440568882487, "loss": 0.262, "step": 25003 }, { "epoch": 2.025599481529488, "grad_norm": 0.06987392902374268, "learning_rate": 0.00010968990503623027, "loss": 0.282, "step": 25004 }, { "epoch": 2.0256804925469862, "grad_norm": 0.058612413704395294, "learning_rate": 0.00010968540438363563, "loss": 0.2509, "step": 25005 }, { "epoch": 2.025761503564485, "grad_norm": 0.06494861841201782, "learning_rate": 0.000109680903731041, "loss": 0.2522, "step": 25006 }, { "epoch": 2.025842514581983, "grad_norm": 0.058371901512145996, "learning_rate": 0.00010967640307844638, "loss": 0.2689, "step": 25007 }, { "epoch": 2.0259235255994814, "grad_norm": 0.0662706196308136, "learning_rate": 0.00010967190242585175, "loss": 0.297, "step": 25008 }, { "epoch": 2.02600453661698, "grad_norm": 0.06714751571416855, "learning_rate": 0.00010966740177325711, "loss": 0.2632, "step": 25009 }, { "epoch": 2.0260855476344783, "grad_norm": 0.06655032187700272, "learning_rate": 0.00010966290112066251, "loss": 0.2735, "step": 25010 }, { "epoch": 2.0261665586519766, "grad_norm": 0.06318897753953934, "learning_rate": 0.00010965840046806787, "loss": 0.2675, "step": 25011 }, { "epoch": 2.026247569669475, "grad_norm": 0.056749485433101654, "learning_rate": 0.00010965389981547325, "loss": 0.2227, "step": 25012 }, { "epoch": 2.0263285806869735, "grad_norm": 0.07162638008594513, "learning_rate": 0.00010964939916287862, "loss": 0.2556, "step": 25013 }, { "epoch": 2.0264095917044718, "grad_norm": 0.06857409328222275, "learning_rate": 0.000109644898510284, "loss": 0.273, "step": 25014 }, { "epoch": 2.02649060272197, "grad_norm": 0.056812405586242676, "learning_rate": 0.00010964039785768938, "loss": 0.2441, "step": 25015 }, { "epoch": 2.0265716137394687, "grad_norm": 0.0918152704834938, "learning_rate": 0.00010963589720509475, "loss": 0.271, "step": 25016 }, { "epoch": 2.026652624756967, "grad_norm": 0.06377450376749039, "learning_rate": 0.00010963139655250011, "loss": 0.2599, "step": 25017 }, { "epoch": 2.026733635774465, "grad_norm": 0.05704493075609207, "learning_rate": 0.00010962689589990549, "loss": 0.2729, "step": 25018 }, { "epoch": 2.026814646791964, "grad_norm": 0.05449501425027847, "learning_rate": 0.00010962239524731086, "loss": 0.2323, "step": 25019 }, { "epoch": 2.026895657809462, "grad_norm": 0.06370042264461517, "learning_rate": 0.00010961789459471624, "loss": 0.2465, "step": 25020 }, { "epoch": 2.0269766688269604, "grad_norm": 0.06381337344646454, "learning_rate": 0.00010961339394212162, "loss": 0.2108, "step": 25021 }, { "epoch": 2.027057679844459, "grad_norm": 0.055754173547029495, "learning_rate": 0.000109608893289527, "loss": 0.228, "step": 25022 }, { "epoch": 2.0271386908619573, "grad_norm": 0.0694638341665268, "learning_rate": 0.00010960439263693236, "loss": 0.2338, "step": 25023 }, { "epoch": 2.0272197018794555, "grad_norm": 0.06296098977327347, "learning_rate": 0.00010959989198433773, "loss": 0.2104, "step": 25024 }, { "epoch": 2.027300712896954, "grad_norm": 0.04989820346236229, "learning_rate": 0.0001095953913317431, "loss": 0.2395, "step": 25025 }, { "epoch": 2.0273817239144525, "grad_norm": 0.05545510724186897, "learning_rate": 0.00010959089067914848, "loss": 0.243, "step": 25026 }, { "epoch": 2.0274627349319507, "grad_norm": 0.06581075489521027, "learning_rate": 0.00010958639002655386, "loss": 0.273, "step": 25027 }, { "epoch": 2.027543745949449, "grad_norm": 0.06940918415784836, "learning_rate": 0.00010958188937395924, "loss": 0.2525, "step": 25028 }, { "epoch": 2.0276247569669477, "grad_norm": 0.05674520507454872, "learning_rate": 0.00010957738872136461, "loss": 0.2343, "step": 25029 }, { "epoch": 2.027705767984446, "grad_norm": 0.04720667749643326, "learning_rate": 0.00010957288806876997, "loss": 0.2484, "step": 25030 }, { "epoch": 2.027786779001944, "grad_norm": 0.06545969098806381, "learning_rate": 0.00010956838741617534, "loss": 0.239, "step": 25031 }, { "epoch": 2.027867790019443, "grad_norm": 0.0600646436214447, "learning_rate": 0.00010956388676358072, "loss": 0.2443, "step": 25032 }, { "epoch": 2.027948801036941, "grad_norm": 0.051953334361314774, "learning_rate": 0.0001095593861109861, "loss": 0.3033, "step": 25033 }, { "epoch": 2.0280298120544393, "grad_norm": 0.056979961693286896, "learning_rate": 0.00010955488545839148, "loss": 0.2704, "step": 25034 }, { "epoch": 2.0281108230719376, "grad_norm": 0.0636250451207161, "learning_rate": 0.00010955038480579685, "loss": 0.2829, "step": 25035 }, { "epoch": 2.0281918340894363, "grad_norm": 0.06014517694711685, "learning_rate": 0.00010954588415320221, "loss": 0.2934, "step": 25036 }, { "epoch": 2.0282728451069345, "grad_norm": 0.05575292930006981, "learning_rate": 0.00010954138350060759, "loss": 0.2503, "step": 25037 }, { "epoch": 2.0283538561244328, "grad_norm": 0.06069019436836243, "learning_rate": 0.00010953688284801296, "loss": 0.2644, "step": 25038 }, { "epoch": 2.0284348671419314, "grad_norm": 0.055340562015771866, "learning_rate": 0.00010953238219541835, "loss": 0.2574, "step": 25039 }, { "epoch": 2.0285158781594297, "grad_norm": 0.05364044010639191, "learning_rate": 0.00010952788154282372, "loss": 0.2622, "step": 25040 }, { "epoch": 2.028596889176928, "grad_norm": 0.057188618928194046, "learning_rate": 0.0001095233808902291, "loss": 0.2789, "step": 25041 }, { "epoch": 2.0286779001944266, "grad_norm": 0.06309512257575989, "learning_rate": 0.00010951888023763445, "loss": 0.2699, "step": 25042 }, { "epoch": 2.028758911211925, "grad_norm": 0.05525052919983864, "learning_rate": 0.00010951437958503983, "loss": 0.2631, "step": 25043 }, { "epoch": 2.028839922229423, "grad_norm": 0.0631512701511383, "learning_rate": 0.00010950987893244522, "loss": 0.2757, "step": 25044 }, { "epoch": 2.028920933246922, "grad_norm": 0.05379737541079521, "learning_rate": 0.00010950537827985059, "loss": 0.2569, "step": 25045 }, { "epoch": 2.02900194426442, "grad_norm": 0.05144206061959267, "learning_rate": 0.00010950087762725596, "loss": 0.2527, "step": 25046 }, { "epoch": 2.0290829552819183, "grad_norm": 0.06055891141295433, "learning_rate": 0.00010949637697466134, "loss": 0.2479, "step": 25047 }, { "epoch": 2.0291639662994165, "grad_norm": 0.06275729089975357, "learning_rate": 0.0001094918763220667, "loss": 0.2867, "step": 25048 }, { "epoch": 2.029244977316915, "grad_norm": 0.05858127027750015, "learning_rate": 0.00010948737566947207, "loss": 0.2511, "step": 25049 }, { "epoch": 2.0293259883344135, "grad_norm": 0.07683297991752625, "learning_rate": 0.00010948287501687746, "loss": 0.2954, "step": 25050 }, { "epoch": 2.0294069993519117, "grad_norm": 0.05623006075620651, "learning_rate": 0.00010947837436428283, "loss": 0.3148, "step": 25051 }, { "epoch": 2.0294880103694104, "grad_norm": 0.06020371988415718, "learning_rate": 0.0001094738737116882, "loss": 0.2865, "step": 25052 }, { "epoch": 2.0295690213869086, "grad_norm": 0.057044848799705505, "learning_rate": 0.00010946937305909358, "loss": 0.2943, "step": 25053 }, { "epoch": 2.029650032404407, "grad_norm": 0.05336422100663185, "learning_rate": 0.00010946487240649894, "loss": 0.2309, "step": 25054 }, { "epoch": 2.0297310434219056, "grad_norm": 0.05944908782839775, "learning_rate": 0.00010946037175390431, "loss": 0.2163, "step": 25055 }, { "epoch": 2.029812054439404, "grad_norm": 0.06958787888288498, "learning_rate": 0.0001094558711013097, "loss": 0.2916, "step": 25056 }, { "epoch": 2.029893065456902, "grad_norm": 0.07049243152141571, "learning_rate": 0.00010945137044871507, "loss": 0.2844, "step": 25057 }, { "epoch": 2.0299740764744003, "grad_norm": 0.048220280557870865, "learning_rate": 0.00010944686979612045, "loss": 0.2358, "step": 25058 }, { "epoch": 2.030055087491899, "grad_norm": 0.06429468840360641, "learning_rate": 0.00010944236914352582, "loss": 0.2563, "step": 25059 }, { "epoch": 2.0301360985093972, "grad_norm": 0.06154334545135498, "learning_rate": 0.00010943786849093118, "loss": 0.2583, "step": 25060 }, { "epoch": 2.0302171095268955, "grad_norm": 0.06173671409487724, "learning_rate": 0.00010943336783833655, "loss": 0.271, "step": 25061 }, { "epoch": 2.030298120544394, "grad_norm": 0.06672929227352142, "learning_rate": 0.00010942886718574194, "loss": 0.2636, "step": 25062 }, { "epoch": 2.0303791315618924, "grad_norm": 0.06729565560817719, "learning_rate": 0.00010942436653314731, "loss": 0.2563, "step": 25063 }, { "epoch": 2.0304601425793907, "grad_norm": 0.06372225284576416, "learning_rate": 0.00010941986588055269, "loss": 0.2503, "step": 25064 }, { "epoch": 2.0305411535968894, "grad_norm": 0.05387897044420242, "learning_rate": 0.00010941536522795806, "loss": 0.2554, "step": 25065 }, { "epoch": 2.0306221646143876, "grad_norm": 0.050809603184461594, "learning_rate": 0.00010941086457536342, "loss": 0.2344, "step": 25066 }, { "epoch": 2.030703175631886, "grad_norm": 0.06151817366480827, "learning_rate": 0.00010940636392276882, "loss": 0.2516, "step": 25067 }, { "epoch": 2.0307841866493845, "grad_norm": 0.06497801095247269, "learning_rate": 0.00010940186327017418, "loss": 0.2661, "step": 25068 }, { "epoch": 2.030865197666883, "grad_norm": 0.057573284953832626, "learning_rate": 0.00010939736261757956, "loss": 0.2439, "step": 25069 }, { "epoch": 2.030946208684381, "grad_norm": 0.06750375032424927, "learning_rate": 0.00010939286196498493, "loss": 0.2727, "step": 25070 }, { "epoch": 2.0310272197018793, "grad_norm": 0.06054481491446495, "learning_rate": 0.0001093883613123903, "loss": 0.2383, "step": 25071 }, { "epoch": 2.031108230719378, "grad_norm": 0.06974566727876663, "learning_rate": 0.00010938386065979566, "loss": 0.298, "step": 25072 }, { "epoch": 2.031189241736876, "grad_norm": 0.06394176185131073, "learning_rate": 0.00010937936000720106, "loss": 0.2388, "step": 25073 }, { "epoch": 2.0312702527543745, "grad_norm": 0.050677455961704254, "learning_rate": 0.00010937485935460642, "loss": 0.2417, "step": 25074 }, { "epoch": 2.031351263771873, "grad_norm": 0.06450121849775314, "learning_rate": 0.0001093703587020118, "loss": 0.2397, "step": 25075 }, { "epoch": 2.0314322747893714, "grad_norm": 0.06229517608880997, "learning_rate": 0.00010936585804941717, "loss": 0.2726, "step": 25076 }, { "epoch": 2.0315132858068696, "grad_norm": 0.06502380222082138, "learning_rate": 0.00010936135739682254, "loss": 0.2809, "step": 25077 }, { "epoch": 2.0315942968243683, "grad_norm": 0.05273763835430145, "learning_rate": 0.0001093568567442279, "loss": 0.2218, "step": 25078 }, { "epoch": 2.0316753078418666, "grad_norm": 0.059138379991054535, "learning_rate": 0.0001093523560916333, "loss": 0.259, "step": 25079 }, { "epoch": 2.031756318859365, "grad_norm": 0.054885562509298325, "learning_rate": 0.00010934785543903866, "loss": 0.2615, "step": 25080 }, { "epoch": 2.031837329876863, "grad_norm": 0.04939741641283035, "learning_rate": 0.00010934335478644404, "loss": 0.2444, "step": 25081 }, { "epoch": 2.0319183408943617, "grad_norm": 0.05478831008076668, "learning_rate": 0.00010933885413384941, "loss": 0.2389, "step": 25082 }, { "epoch": 2.03199935191186, "grad_norm": 0.06696939468383789, "learning_rate": 0.00010933435348125479, "loss": 0.305, "step": 25083 }, { "epoch": 2.0320803629293582, "grad_norm": 0.051382653415203094, "learning_rate": 0.00010932985282866015, "loss": 0.2306, "step": 25084 }, { "epoch": 2.032161373946857, "grad_norm": 0.07961443811655045, "learning_rate": 0.00010932535217606555, "loss": 0.2348, "step": 25085 }, { "epoch": 2.032242384964355, "grad_norm": 0.0601271316409111, "learning_rate": 0.0001093208515234709, "loss": 0.2833, "step": 25086 }, { "epoch": 2.0323233959818534, "grad_norm": 0.062180083245038986, "learning_rate": 0.00010931635087087628, "loss": 0.2535, "step": 25087 }, { "epoch": 2.032404406999352, "grad_norm": 0.07181721180677414, "learning_rate": 0.00010931185021828165, "loss": 0.2996, "step": 25088 }, { "epoch": 2.0324854180168503, "grad_norm": 0.04999459907412529, "learning_rate": 0.00010930734956568703, "loss": 0.2413, "step": 25089 }, { "epoch": 2.0325664290343486, "grad_norm": 0.05797059088945389, "learning_rate": 0.00010930284891309239, "loss": 0.2754, "step": 25090 }, { "epoch": 2.0326474400518473, "grad_norm": 0.0662585198879242, "learning_rate": 0.00010929834826049779, "loss": 0.2687, "step": 25091 }, { "epoch": 2.0327284510693455, "grad_norm": 0.06757596880197525, "learning_rate": 0.00010929384760790316, "loss": 0.2941, "step": 25092 }, { "epoch": 2.0328094620868438, "grad_norm": 0.05894165858626366, "learning_rate": 0.00010928934695530852, "loss": 0.3183, "step": 25093 }, { "epoch": 2.032890473104342, "grad_norm": 0.0708579570055008, "learning_rate": 0.0001092848463027139, "loss": 0.2966, "step": 25094 }, { "epoch": 2.0329714841218407, "grad_norm": 0.06730002164840698, "learning_rate": 0.00010928034565011927, "loss": 0.239, "step": 25095 }, { "epoch": 2.033052495139339, "grad_norm": 0.04710034653544426, "learning_rate": 0.00010927584499752466, "loss": 0.2072, "step": 25096 }, { "epoch": 2.033133506156837, "grad_norm": 0.08717244863510132, "learning_rate": 0.00010927134434493003, "loss": 0.3157, "step": 25097 }, { "epoch": 2.033214517174336, "grad_norm": 0.060938701033592224, "learning_rate": 0.0001092668436923354, "loss": 0.3116, "step": 25098 }, { "epoch": 2.033295528191834, "grad_norm": 0.062359604984521866, "learning_rate": 0.00010926234303974076, "loss": 0.2838, "step": 25099 }, { "epoch": 2.0333765392093324, "grad_norm": 0.06159916892647743, "learning_rate": 0.00010925784238714614, "loss": 0.2623, "step": 25100 }, { "epoch": 2.033457550226831, "grad_norm": 0.06426838785409927, "learning_rate": 0.00010925334173455151, "loss": 0.2926, "step": 25101 }, { "epoch": 2.0335385612443293, "grad_norm": 0.06911225616931915, "learning_rate": 0.0001092488410819569, "loss": 0.2972, "step": 25102 }, { "epoch": 2.0336195722618275, "grad_norm": 0.057620491832494736, "learning_rate": 0.00010924434042936227, "loss": 0.2462, "step": 25103 }, { "epoch": 2.033700583279326, "grad_norm": 0.058398373425006866, "learning_rate": 0.00010923983977676764, "loss": 0.2825, "step": 25104 }, { "epoch": 2.0337815942968245, "grad_norm": 0.06570584326982498, "learning_rate": 0.000109235339124173, "loss": 0.2713, "step": 25105 }, { "epoch": 2.0338626053143227, "grad_norm": 0.0867527574300766, "learning_rate": 0.00010923083847157838, "loss": 0.2968, "step": 25106 }, { "epoch": 2.033943616331821, "grad_norm": 0.05803163722157478, "learning_rate": 0.00010922633781898375, "loss": 0.2257, "step": 25107 }, { "epoch": 2.0340246273493197, "grad_norm": 0.06863657385110855, "learning_rate": 0.00010922183716638914, "loss": 0.2413, "step": 25108 }, { "epoch": 2.034105638366818, "grad_norm": 0.05453773960471153, "learning_rate": 0.00010921733651379451, "loss": 0.2746, "step": 25109 }, { "epoch": 2.034186649384316, "grad_norm": 0.05881081521511078, "learning_rate": 0.00010921283586119989, "loss": 0.2647, "step": 25110 }, { "epoch": 2.034267660401815, "grad_norm": 0.054627347737550735, "learning_rate": 0.00010920833520860525, "loss": 0.2671, "step": 25111 }, { "epoch": 2.034348671419313, "grad_norm": 0.0621161125600338, "learning_rate": 0.00010920383455601062, "loss": 0.2506, "step": 25112 }, { "epoch": 2.0344296824368113, "grad_norm": 0.0610932894051075, "learning_rate": 0.000109199333903416, "loss": 0.2802, "step": 25113 }, { "epoch": 2.0345106934543096, "grad_norm": 0.062395934015512466, "learning_rate": 0.00010919483325082138, "loss": 0.2289, "step": 25114 }, { "epoch": 2.0345917044718083, "grad_norm": 0.06225151568651199, "learning_rate": 0.00010919033259822675, "loss": 0.3058, "step": 25115 }, { "epoch": 2.0346727154893065, "grad_norm": 0.05295949801802635, "learning_rate": 0.00010918583194563213, "loss": 0.2118, "step": 25116 }, { "epoch": 2.0347537265068047, "grad_norm": 0.06223113089799881, "learning_rate": 0.00010918133129303749, "loss": 0.2409, "step": 25117 }, { "epoch": 2.0348347375243034, "grad_norm": 0.0673019140958786, "learning_rate": 0.00010917683064044286, "loss": 0.2742, "step": 25118 }, { "epoch": 2.0349157485418017, "grad_norm": 0.06154569610953331, "learning_rate": 0.00010917232998784825, "loss": 0.278, "step": 25119 }, { "epoch": 2.0349967595593, "grad_norm": 0.06451249122619629, "learning_rate": 0.00010916782933525362, "loss": 0.2448, "step": 25120 }, { "epoch": 2.0350777705767986, "grad_norm": 0.05225265398621559, "learning_rate": 0.000109163328682659, "loss": 0.2114, "step": 25121 }, { "epoch": 2.035158781594297, "grad_norm": 0.06646935641765594, "learning_rate": 0.00010915882803006437, "loss": 0.251, "step": 25122 }, { "epoch": 2.035239792611795, "grad_norm": 0.05266328155994415, "learning_rate": 0.00010915432737746973, "loss": 0.2568, "step": 25123 }, { "epoch": 2.035320803629294, "grad_norm": 0.06037535145878792, "learning_rate": 0.0001091498267248751, "loss": 0.2778, "step": 25124 }, { "epoch": 2.035401814646792, "grad_norm": 0.05661199986934662, "learning_rate": 0.00010914532607228049, "loss": 0.265, "step": 25125 }, { "epoch": 2.0354828256642903, "grad_norm": 0.06349622458219528, "learning_rate": 0.00010914082541968586, "loss": 0.2316, "step": 25126 }, { "epoch": 2.0355638366817885, "grad_norm": 0.06441009044647217, "learning_rate": 0.00010913632476709124, "loss": 0.2619, "step": 25127 }, { "epoch": 2.035644847699287, "grad_norm": 0.057916779071092606, "learning_rate": 0.00010913182411449661, "loss": 0.2373, "step": 25128 }, { "epoch": 2.0357258587167855, "grad_norm": 0.07067728787660599, "learning_rate": 0.00010912732346190197, "loss": 0.269, "step": 25129 }, { "epoch": 2.0358068697342837, "grad_norm": 0.05133752524852753, "learning_rate": 0.00010912282280930734, "loss": 0.2378, "step": 25130 }, { "epoch": 2.0358878807517824, "grad_norm": 0.053712695837020874, "learning_rate": 0.00010911832215671273, "loss": 0.2077, "step": 25131 }, { "epoch": 2.0359688917692806, "grad_norm": 0.058011818677186966, "learning_rate": 0.0001091138215041181, "loss": 0.2455, "step": 25132 }, { "epoch": 2.036049902786779, "grad_norm": 0.07174625247716904, "learning_rate": 0.00010910932085152348, "loss": 0.3063, "step": 25133 }, { "epoch": 2.0361309138042776, "grad_norm": 0.05659336969256401, "learning_rate": 0.00010910482019892885, "loss": 0.236, "step": 25134 }, { "epoch": 2.036211924821776, "grad_norm": 0.06388983875513077, "learning_rate": 0.00010910031954633421, "loss": 0.2843, "step": 25135 }, { "epoch": 2.036292935839274, "grad_norm": 0.07428158819675446, "learning_rate": 0.00010909581889373959, "loss": 0.2687, "step": 25136 }, { "epoch": 2.0363739468567723, "grad_norm": 0.06257401406764984, "learning_rate": 0.00010909131824114497, "loss": 0.2659, "step": 25137 }, { "epoch": 2.036454957874271, "grad_norm": 0.06060203164815903, "learning_rate": 0.00010908681758855035, "loss": 0.2578, "step": 25138 }, { "epoch": 2.0365359688917692, "grad_norm": 0.08558185398578644, "learning_rate": 0.00010908231693595572, "loss": 0.2567, "step": 25139 }, { "epoch": 2.0366169799092675, "grad_norm": 0.08406521379947662, "learning_rate": 0.0001090778162833611, "loss": 0.2774, "step": 25140 }, { "epoch": 2.036697990926766, "grad_norm": 0.054151203483343124, "learning_rate": 0.00010907331563076645, "loss": 0.2609, "step": 25141 }, { "epoch": 2.0367790019442644, "grad_norm": 0.06468921899795532, "learning_rate": 0.00010906881497817183, "loss": 0.2595, "step": 25142 }, { "epoch": 2.0368600129617627, "grad_norm": 0.055418115109205246, "learning_rate": 0.00010906431432557722, "loss": 0.2319, "step": 25143 }, { "epoch": 2.0369410239792614, "grad_norm": 0.05805583298206329, "learning_rate": 0.00010905981367298259, "loss": 0.2811, "step": 25144 }, { "epoch": 2.0370220349967596, "grad_norm": 0.05657727271318436, "learning_rate": 0.00010905531302038796, "loss": 0.2416, "step": 25145 }, { "epoch": 2.037103046014258, "grad_norm": 0.05590350925922394, "learning_rate": 0.00010905081236779334, "loss": 0.2429, "step": 25146 }, { "epoch": 2.0371840570317565, "grad_norm": 0.06622657924890518, "learning_rate": 0.0001090463117151987, "loss": 0.2972, "step": 25147 }, { "epoch": 2.037265068049255, "grad_norm": 0.06123768165707588, "learning_rate": 0.0001090418110626041, "loss": 0.2762, "step": 25148 }, { "epoch": 2.037346079066753, "grad_norm": 0.062109388411045074, "learning_rate": 0.00010903731041000946, "loss": 0.2418, "step": 25149 }, { "epoch": 2.0374270900842513, "grad_norm": 0.06358703225851059, "learning_rate": 0.00010903280975741483, "loss": 0.2675, "step": 25150 }, { "epoch": 2.03750810110175, "grad_norm": 0.05100923404097557, "learning_rate": 0.0001090283091048202, "loss": 0.2346, "step": 25151 }, { "epoch": 2.037589112119248, "grad_norm": 0.06406904757022858, "learning_rate": 0.00010902380845222558, "loss": 0.2638, "step": 25152 }, { "epoch": 2.0376701231367464, "grad_norm": 0.06896286457777023, "learning_rate": 0.00010901930779963094, "loss": 0.2405, "step": 25153 }, { "epoch": 2.037751134154245, "grad_norm": 0.05823047086596489, "learning_rate": 0.00010901480714703634, "loss": 0.2461, "step": 25154 }, { "epoch": 2.0378321451717434, "grad_norm": 0.07670606672763824, "learning_rate": 0.0001090103064944417, "loss": 0.2884, "step": 25155 }, { "epoch": 2.0379131561892416, "grad_norm": 0.1217048391699791, "learning_rate": 0.00010900580584184707, "loss": 0.3086, "step": 25156 }, { "epoch": 2.0379941672067403, "grad_norm": 0.05431569740176201, "learning_rate": 0.00010900130518925245, "loss": 0.2592, "step": 25157 }, { "epoch": 2.0380751782242386, "grad_norm": 0.0681622326374054, "learning_rate": 0.00010899680453665782, "loss": 0.2489, "step": 25158 }, { "epoch": 2.038156189241737, "grad_norm": 0.05610613897442818, "learning_rate": 0.00010899230388406318, "loss": 0.2317, "step": 25159 }, { "epoch": 2.038237200259235, "grad_norm": 0.06325603276491165, "learning_rate": 0.00010898780323146858, "loss": 0.2827, "step": 25160 }, { "epoch": 2.0383182112767337, "grad_norm": 0.069940485060215, "learning_rate": 0.00010898330257887395, "loss": 0.249, "step": 25161 }, { "epoch": 2.038399222294232, "grad_norm": 0.059809450060129166, "learning_rate": 0.00010897880192627931, "loss": 0.2967, "step": 25162 }, { "epoch": 2.0384802333117302, "grad_norm": 0.06071152910590172, "learning_rate": 0.00010897430127368469, "loss": 0.254, "step": 25163 }, { "epoch": 2.038561244329229, "grad_norm": 0.06979705393314362, "learning_rate": 0.00010896980062109006, "loss": 0.2616, "step": 25164 }, { "epoch": 2.038642255346727, "grad_norm": 0.06330650299787521, "learning_rate": 0.00010896529996849542, "loss": 0.2712, "step": 25165 }, { "epoch": 2.0387232663642254, "grad_norm": 0.05924196541309357, "learning_rate": 0.00010896079931590082, "loss": 0.2434, "step": 25166 }, { "epoch": 2.038804277381724, "grad_norm": 0.05195455253124237, "learning_rate": 0.0001089562986633062, "loss": 0.2472, "step": 25167 }, { "epoch": 2.0388852883992223, "grad_norm": 0.0717695876955986, "learning_rate": 0.00010895179801071156, "loss": 0.2771, "step": 25168 }, { "epoch": 2.0389662994167206, "grad_norm": 0.05342751368880272, "learning_rate": 0.00010894729735811693, "loss": 0.2439, "step": 25169 }, { "epoch": 2.039047310434219, "grad_norm": 0.05817456170916557, "learning_rate": 0.0001089427967055223, "loss": 0.2545, "step": 25170 }, { "epoch": 2.0391283214517175, "grad_norm": 0.06112273782491684, "learning_rate": 0.00010893829605292769, "loss": 0.2924, "step": 25171 }, { "epoch": 2.0392093324692158, "grad_norm": 0.05255928635597229, "learning_rate": 0.00010893379540033306, "loss": 0.2109, "step": 25172 }, { "epoch": 2.039290343486714, "grad_norm": 0.051350079476833344, "learning_rate": 0.00010892929474773844, "loss": 0.2313, "step": 25173 }, { "epoch": 2.0393713545042127, "grad_norm": 0.06925446540117264, "learning_rate": 0.0001089247940951438, "loss": 0.2773, "step": 25174 }, { "epoch": 2.039452365521711, "grad_norm": 0.061324674636125565, "learning_rate": 0.00010892029344254917, "loss": 0.2867, "step": 25175 }, { "epoch": 2.039533376539209, "grad_norm": 0.06071063503623009, "learning_rate": 0.00010891579278995454, "loss": 0.2643, "step": 25176 }, { "epoch": 2.039614387556708, "grad_norm": 0.06241742521524429, "learning_rate": 0.00010891129213735993, "loss": 0.2664, "step": 25177 }, { "epoch": 2.039695398574206, "grad_norm": 0.06371390074491501, "learning_rate": 0.0001089067914847653, "loss": 0.258, "step": 25178 }, { "epoch": 2.0397764095917044, "grad_norm": 0.047560915350914, "learning_rate": 0.00010890229083217068, "loss": 0.2607, "step": 25179 }, { "epoch": 2.039857420609203, "grad_norm": 0.0704430490732193, "learning_rate": 0.00010889779017957604, "loss": 0.2488, "step": 25180 }, { "epoch": 2.0399384316267013, "grad_norm": 0.06133200600743294, "learning_rate": 0.00010889328952698141, "loss": 0.299, "step": 25181 }, { "epoch": 2.0400194426441995, "grad_norm": 0.06216515600681305, "learning_rate": 0.00010888878887438679, "loss": 0.2495, "step": 25182 }, { "epoch": 2.040100453661698, "grad_norm": 0.08171521127223969, "learning_rate": 0.00010888428822179217, "loss": 0.2662, "step": 25183 }, { "epoch": 2.0401814646791965, "grad_norm": 0.06202945485711098, "learning_rate": 0.00010887978756919755, "loss": 0.2791, "step": 25184 }, { "epoch": 2.0402624756966947, "grad_norm": 0.05931111425161362, "learning_rate": 0.00010887528691660292, "loss": 0.2811, "step": 25185 }, { "epoch": 2.040343486714193, "grad_norm": 0.06533900648355484, "learning_rate": 0.00010887078626400828, "loss": 0.2494, "step": 25186 }, { "epoch": 2.0404244977316917, "grad_norm": 0.04683497175574303, "learning_rate": 0.00010886628561141365, "loss": 0.2489, "step": 25187 }, { "epoch": 2.04050550874919, "grad_norm": 0.06653185933828354, "learning_rate": 0.00010886178495881903, "loss": 0.2598, "step": 25188 }, { "epoch": 2.040586519766688, "grad_norm": 0.06457366049289703, "learning_rate": 0.00010885728430622441, "loss": 0.2452, "step": 25189 }, { "epoch": 2.040667530784187, "grad_norm": 0.046025507152080536, "learning_rate": 0.00010885278365362979, "loss": 0.2694, "step": 25190 }, { "epoch": 2.040748541801685, "grad_norm": 0.05456798896193504, "learning_rate": 0.00010884828300103516, "loss": 0.2243, "step": 25191 }, { "epoch": 2.0408295528191833, "grad_norm": 0.07598184794187546, "learning_rate": 0.00010884378234844052, "loss": 0.26, "step": 25192 }, { "epoch": 2.0409105638366816, "grad_norm": 0.05668919160962105, "learning_rate": 0.0001088392816958459, "loss": 0.2743, "step": 25193 }, { "epoch": 2.0409915748541803, "grad_norm": 0.06732729822397232, "learning_rate": 0.00010883478104325127, "loss": 0.2668, "step": 25194 }, { "epoch": 2.0410725858716785, "grad_norm": 0.06389015913009644, "learning_rate": 0.00010883028039065666, "loss": 0.2835, "step": 25195 }, { "epoch": 2.0411535968891767, "grad_norm": 0.06920386850833893, "learning_rate": 0.00010882577973806203, "loss": 0.2426, "step": 25196 }, { "epoch": 2.0412346079066754, "grad_norm": 0.06971029192209244, "learning_rate": 0.0001088212790854674, "loss": 0.2717, "step": 25197 }, { "epoch": 2.0413156189241737, "grad_norm": 0.06330526620149612, "learning_rate": 0.00010881677843287276, "loss": 0.2365, "step": 25198 }, { "epoch": 2.041396629941672, "grad_norm": 0.0685361996293068, "learning_rate": 0.00010881227778027814, "loss": 0.279, "step": 25199 }, { "epoch": 2.0414776409591706, "grad_norm": 0.05792514234781265, "learning_rate": 0.00010880777712768352, "loss": 0.2366, "step": 25200 }, { "epoch": 2.041558651976669, "grad_norm": 0.06563431024551392, "learning_rate": 0.0001088032764750889, "loss": 0.266, "step": 25201 }, { "epoch": 2.041639662994167, "grad_norm": 0.05215870961546898, "learning_rate": 0.00010879877582249427, "loss": 0.2613, "step": 25202 }, { "epoch": 2.041720674011666, "grad_norm": 0.06881966441869736, "learning_rate": 0.00010879427516989965, "loss": 0.2992, "step": 25203 }, { "epoch": 2.041801685029164, "grad_norm": 0.054352544248104095, "learning_rate": 0.000108789774517305, "loss": 0.2296, "step": 25204 }, { "epoch": 2.0418826960466623, "grad_norm": 0.08665680140256882, "learning_rate": 0.00010878527386471038, "loss": 0.2642, "step": 25205 }, { "epoch": 2.0419637070641605, "grad_norm": 0.06520480662584305, "learning_rate": 0.00010878077321211577, "loss": 0.2686, "step": 25206 }, { "epoch": 2.042044718081659, "grad_norm": 0.057679660618305206, "learning_rate": 0.00010877627255952114, "loss": 0.218, "step": 25207 }, { "epoch": 2.0421257290991575, "grad_norm": 0.06190623342990875, "learning_rate": 0.00010877177190692651, "loss": 0.2329, "step": 25208 }, { "epoch": 2.0422067401166557, "grad_norm": 0.07380854338407516, "learning_rate": 0.00010876727125433189, "loss": 0.2816, "step": 25209 }, { "epoch": 2.0422877511341544, "grad_norm": 0.059125471860170364, "learning_rate": 0.00010876277060173725, "loss": 0.2596, "step": 25210 }, { "epoch": 2.0423687621516526, "grad_norm": 0.05417705699801445, "learning_rate": 0.00010875826994914262, "loss": 0.2192, "step": 25211 }, { "epoch": 2.042449773169151, "grad_norm": 0.054412972182035446, "learning_rate": 0.00010875376929654801, "loss": 0.2798, "step": 25212 }, { "epoch": 2.0425307841866496, "grad_norm": 0.05146175995469093, "learning_rate": 0.00010874926864395338, "loss": 0.234, "step": 25213 }, { "epoch": 2.042611795204148, "grad_norm": 0.0554482527077198, "learning_rate": 0.00010874476799135875, "loss": 0.2545, "step": 25214 }, { "epoch": 2.042692806221646, "grad_norm": 0.06421638280153275, "learning_rate": 0.00010874026733876413, "loss": 0.2538, "step": 25215 }, { "epoch": 2.0427738172391443, "grad_norm": 0.06408846378326416, "learning_rate": 0.00010873576668616949, "loss": 0.2672, "step": 25216 }, { "epoch": 2.042854828256643, "grad_norm": 0.06903623789548874, "learning_rate": 0.00010873126603357486, "loss": 0.2385, "step": 25217 }, { "epoch": 2.0429358392741412, "grad_norm": 0.06604668498039246, "learning_rate": 0.00010872676538098025, "loss": 0.247, "step": 25218 }, { "epoch": 2.0430168502916395, "grad_norm": 0.06986042857170105, "learning_rate": 0.00010872226472838562, "loss": 0.2529, "step": 25219 }, { "epoch": 2.043097861309138, "grad_norm": 0.05973782390356064, "learning_rate": 0.000108717764075791, "loss": 0.2432, "step": 25220 }, { "epoch": 2.0431788723266364, "grad_norm": 0.06510750204324722, "learning_rate": 0.00010871326342319637, "loss": 0.2568, "step": 25221 }, { "epoch": 2.0432598833441347, "grad_norm": 0.057255763560533524, "learning_rate": 0.00010870876277060173, "loss": 0.2666, "step": 25222 }, { "epoch": 2.0433408943616334, "grad_norm": 0.061788126826286316, "learning_rate": 0.0001087042621180071, "loss": 0.2459, "step": 25223 }, { "epoch": 2.0434219053791316, "grad_norm": 0.058773137629032135, "learning_rate": 0.00010869976146541249, "loss": 0.2736, "step": 25224 }, { "epoch": 2.04350291639663, "grad_norm": 0.0588141568005085, "learning_rate": 0.00010869526081281786, "loss": 0.2935, "step": 25225 }, { "epoch": 2.0435839274141285, "grad_norm": 0.06576879322528839, "learning_rate": 0.00010869076016022324, "loss": 0.2801, "step": 25226 }, { "epoch": 2.0436649384316268, "grad_norm": 0.07062683999538422, "learning_rate": 0.00010868625950762861, "loss": 0.26, "step": 25227 }, { "epoch": 2.043745949449125, "grad_norm": 0.07221104204654694, "learning_rate": 0.00010868175885503397, "loss": 0.2598, "step": 25228 }, { "epoch": 2.0438269604666233, "grad_norm": 0.05775179713964462, "learning_rate": 0.00010867725820243937, "loss": 0.2565, "step": 25229 }, { "epoch": 2.043907971484122, "grad_norm": 0.06143064796924591, "learning_rate": 0.00010867275754984475, "loss": 0.2412, "step": 25230 }, { "epoch": 2.04398898250162, "grad_norm": 0.0679447203874588, "learning_rate": 0.0001086682568972501, "loss": 0.2682, "step": 25231 }, { "epoch": 2.0440699935191184, "grad_norm": 0.05769611522555351, "learning_rate": 0.00010866375624465548, "loss": 0.2428, "step": 25232 }, { "epoch": 2.044151004536617, "grad_norm": 0.05339081957936287, "learning_rate": 0.00010865925559206085, "loss": 0.2342, "step": 25233 }, { "epoch": 2.0442320155541154, "grad_norm": 0.05419541150331497, "learning_rate": 0.00010865475493946621, "loss": 0.2811, "step": 25234 }, { "epoch": 2.0443130265716136, "grad_norm": 0.06672076135873795, "learning_rate": 0.00010865025428687161, "loss": 0.2646, "step": 25235 }, { "epoch": 2.0443940375891123, "grad_norm": 0.06640081852674484, "learning_rate": 0.00010864575363427699, "loss": 0.2206, "step": 25236 }, { "epoch": 2.0444750486066106, "grad_norm": 0.06232764199376106, "learning_rate": 0.00010864125298168235, "loss": 0.2563, "step": 25237 }, { "epoch": 2.044556059624109, "grad_norm": 0.08199611306190491, "learning_rate": 0.00010863675232908772, "loss": 0.2992, "step": 25238 }, { "epoch": 2.044637070641607, "grad_norm": 0.06076132878661156, "learning_rate": 0.0001086322516764931, "loss": 0.2602, "step": 25239 }, { "epoch": 2.0447180816591057, "grad_norm": 0.06518393754959106, "learning_rate": 0.00010862775102389845, "loss": 0.2383, "step": 25240 }, { "epoch": 2.044799092676604, "grad_norm": 0.05123493820428848, "learning_rate": 0.00010862325037130386, "loss": 0.2327, "step": 25241 }, { "epoch": 2.0448801036941022, "grad_norm": 0.05127443000674248, "learning_rate": 0.00010861874971870923, "loss": 0.2443, "step": 25242 }, { "epoch": 2.044961114711601, "grad_norm": 0.05903277546167374, "learning_rate": 0.00010861424906611459, "loss": 0.2386, "step": 25243 }, { "epoch": 2.045042125729099, "grad_norm": 0.06020485237240791, "learning_rate": 0.00010860974841351996, "loss": 0.2262, "step": 25244 }, { "epoch": 2.0451231367465974, "grad_norm": 0.05409131199121475, "learning_rate": 0.00010860524776092534, "loss": 0.2589, "step": 25245 }, { "epoch": 2.045204147764096, "grad_norm": 0.060740187764167786, "learning_rate": 0.0001086007471083307, "loss": 0.2329, "step": 25246 }, { "epoch": 2.0452851587815943, "grad_norm": 0.06752003729343414, "learning_rate": 0.0001085962464557361, "loss": 0.2297, "step": 25247 }, { "epoch": 2.0453661697990926, "grad_norm": 0.06496375054121017, "learning_rate": 0.00010859174580314147, "loss": 0.2775, "step": 25248 }, { "epoch": 2.0454471808165913, "grad_norm": 0.059181131422519684, "learning_rate": 0.00010858724515054683, "loss": 0.2275, "step": 25249 }, { "epoch": 2.0455281918340895, "grad_norm": 0.05792650207877159, "learning_rate": 0.0001085827444979522, "loss": 0.2853, "step": 25250 }, { "epoch": 2.0456092028515878, "grad_norm": 0.059535253793001175, "learning_rate": 0.00010857824384535758, "loss": 0.2278, "step": 25251 }, { "epoch": 2.045690213869086, "grad_norm": 0.0699746236205101, "learning_rate": 0.00010857374319276297, "loss": 0.2716, "step": 25252 }, { "epoch": 2.0457712248865847, "grad_norm": 0.060232095420360565, "learning_rate": 0.00010856924254016834, "loss": 0.2602, "step": 25253 }, { "epoch": 2.045852235904083, "grad_norm": 0.08279857039451599, "learning_rate": 0.00010856474188757371, "loss": 0.28, "step": 25254 }, { "epoch": 2.045933246921581, "grad_norm": 0.07259318977594376, "learning_rate": 0.00010856024123497907, "loss": 0.2621, "step": 25255 }, { "epoch": 2.04601425793908, "grad_norm": 0.05575643107295036, "learning_rate": 0.00010855574058238445, "loss": 0.2717, "step": 25256 }, { "epoch": 2.046095268956578, "grad_norm": 0.06759722530841827, "learning_rate": 0.00010855123992978982, "loss": 0.2664, "step": 25257 }, { "epoch": 2.0461762799740764, "grad_norm": 0.06458771973848343, "learning_rate": 0.00010854673927719521, "loss": 0.2659, "step": 25258 }, { "epoch": 2.046257290991575, "grad_norm": 0.06146334111690521, "learning_rate": 0.00010854223862460058, "loss": 0.2536, "step": 25259 }, { "epoch": 2.0463383020090733, "grad_norm": 0.054808396846055984, "learning_rate": 0.00010853773797200595, "loss": 0.2369, "step": 25260 }, { "epoch": 2.0464193130265715, "grad_norm": 0.05751333385705948, "learning_rate": 0.00010853323731941131, "loss": 0.2482, "step": 25261 }, { "epoch": 2.04650032404407, "grad_norm": 0.0674423947930336, "learning_rate": 0.00010852873666681669, "loss": 0.2647, "step": 25262 }, { "epoch": 2.0465813350615685, "grad_norm": 0.0590125173330307, "learning_rate": 0.00010852423601422206, "loss": 0.2987, "step": 25263 }, { "epoch": 2.0466623460790667, "grad_norm": 0.07027654349803925, "learning_rate": 0.00010851973536162745, "loss": 0.3088, "step": 25264 }, { "epoch": 2.046743357096565, "grad_norm": 0.05695608630776405, "learning_rate": 0.00010851523470903282, "loss": 0.2663, "step": 25265 }, { "epoch": 2.0468243681140637, "grad_norm": 0.06685222685337067, "learning_rate": 0.0001085107340564382, "loss": 0.3287, "step": 25266 }, { "epoch": 2.046905379131562, "grad_norm": 0.055971141904592514, "learning_rate": 0.00010850623340384356, "loss": 0.2208, "step": 25267 }, { "epoch": 2.04698639014906, "grad_norm": 0.05768276005983353, "learning_rate": 0.00010850173275124893, "loss": 0.2425, "step": 25268 }, { "epoch": 2.047067401166559, "grad_norm": 0.06055925786495209, "learning_rate": 0.0001084972320986543, "loss": 0.2441, "step": 25269 }, { "epoch": 2.047148412184057, "grad_norm": 0.06709778308868408, "learning_rate": 0.00010849273144605969, "loss": 0.2709, "step": 25270 }, { "epoch": 2.0472294232015553, "grad_norm": 0.07295244932174683, "learning_rate": 0.00010848823079346506, "loss": 0.2556, "step": 25271 }, { "epoch": 2.047310434219054, "grad_norm": 0.0473698154091835, "learning_rate": 0.00010848373014087044, "loss": 0.2631, "step": 25272 }, { "epoch": 2.0473914452365523, "grad_norm": 0.06414885818958282, "learning_rate": 0.0001084792294882758, "loss": 0.2333, "step": 25273 }, { "epoch": 2.0474724562540505, "grad_norm": 0.06567434221506119, "learning_rate": 0.00010847472883568117, "loss": 0.2742, "step": 25274 }, { "epoch": 2.0475534672715487, "grad_norm": 0.0706758126616478, "learning_rate": 0.00010847022818308654, "loss": 0.2413, "step": 25275 }, { "epoch": 2.0476344782890474, "grad_norm": 0.05288252234458923, "learning_rate": 0.00010846572753049193, "loss": 0.217, "step": 25276 }, { "epoch": 2.0477154893065457, "grad_norm": 0.06759516894817352, "learning_rate": 0.0001084612268778973, "loss": 0.2626, "step": 25277 }, { "epoch": 2.047796500324044, "grad_norm": 0.05509433150291443, "learning_rate": 0.00010845672622530268, "loss": 0.2362, "step": 25278 }, { "epoch": 2.0478775113415426, "grad_norm": 0.06873355805873871, "learning_rate": 0.00010845222557270804, "loss": 0.2722, "step": 25279 }, { "epoch": 2.047958522359041, "grad_norm": 0.07308941334486008, "learning_rate": 0.00010844772492011341, "loss": 0.2349, "step": 25280 }, { "epoch": 2.048039533376539, "grad_norm": 0.06773709505796432, "learning_rate": 0.0001084432242675188, "loss": 0.2699, "step": 25281 }, { "epoch": 2.048120544394038, "grad_norm": 0.052150875329971313, "learning_rate": 0.00010843872361492417, "loss": 0.2775, "step": 25282 }, { "epoch": 2.048201555411536, "grad_norm": 0.06018095463514328, "learning_rate": 0.00010843422296232955, "loss": 0.2788, "step": 25283 }, { "epoch": 2.0482825664290343, "grad_norm": 0.06471269577741623, "learning_rate": 0.00010842972230973492, "loss": 0.2316, "step": 25284 }, { "epoch": 2.0483635774465325, "grad_norm": 0.06936302036046982, "learning_rate": 0.00010842522165714028, "loss": 0.3311, "step": 25285 }, { "epoch": 2.048444588464031, "grad_norm": 0.06158831715583801, "learning_rate": 0.00010842072100454565, "loss": 0.2261, "step": 25286 }, { "epoch": 2.0485255994815295, "grad_norm": 0.061135925352573395, "learning_rate": 0.00010841622035195104, "loss": 0.2825, "step": 25287 }, { "epoch": 2.0486066104990277, "grad_norm": 0.06801503151655197, "learning_rate": 0.00010841171969935641, "loss": 0.2833, "step": 25288 }, { "epoch": 2.0486876215165264, "grad_norm": 0.052315711975097656, "learning_rate": 0.00010840721904676179, "loss": 0.235, "step": 25289 }, { "epoch": 2.0487686325340246, "grad_norm": 0.0969030037522316, "learning_rate": 0.00010840271839416716, "loss": 0.259, "step": 25290 }, { "epoch": 2.048849643551523, "grad_norm": 0.07182536274194717, "learning_rate": 0.00010839821774157252, "loss": 0.2531, "step": 25291 }, { "epoch": 2.0489306545690216, "grad_norm": 0.0598326101899147, "learning_rate": 0.0001083937170889779, "loss": 0.251, "step": 25292 }, { "epoch": 2.04901166558652, "grad_norm": 0.060341332107782364, "learning_rate": 0.00010838921643638328, "loss": 0.2806, "step": 25293 }, { "epoch": 2.049092676604018, "grad_norm": 0.049504201859235764, "learning_rate": 0.00010838471578378866, "loss": 0.297, "step": 25294 }, { "epoch": 2.0491736876215167, "grad_norm": 0.06888049840927124, "learning_rate": 0.00010838021513119403, "loss": 0.2524, "step": 25295 }, { "epoch": 2.049254698639015, "grad_norm": 0.0596277117729187, "learning_rate": 0.0001083757144785994, "loss": 0.2837, "step": 25296 }, { "epoch": 2.0493357096565132, "grad_norm": 0.05721274018287659, "learning_rate": 0.00010837121382600476, "loss": 0.2543, "step": 25297 }, { "epoch": 2.0494167206740115, "grad_norm": 0.08060150593519211, "learning_rate": 0.00010836671317341014, "loss": 0.2573, "step": 25298 }, { "epoch": 2.04949773169151, "grad_norm": 0.05760250240564346, "learning_rate": 0.00010836221252081554, "loss": 0.2385, "step": 25299 }, { "epoch": 2.0495787427090084, "grad_norm": 0.07224278897047043, "learning_rate": 0.0001083577118682209, "loss": 0.2988, "step": 25300 }, { "epoch": 2.0496597537265067, "grad_norm": 0.06838857382535934, "learning_rate": 0.00010835321121562627, "loss": 0.2279, "step": 25301 }, { "epoch": 2.0497407647440054, "grad_norm": 0.07297328114509583, "learning_rate": 0.00010834871056303165, "loss": 0.3082, "step": 25302 }, { "epoch": 2.0498217757615036, "grad_norm": 0.07161819189786911, "learning_rate": 0.000108344209910437, "loss": 0.2788, "step": 25303 }, { "epoch": 2.049902786779002, "grad_norm": 0.057904768735170364, "learning_rate": 0.0001083397092578424, "loss": 0.2154, "step": 25304 }, { "epoch": 2.0499837977965005, "grad_norm": 0.06379827857017517, "learning_rate": 0.00010833520860524778, "loss": 0.2477, "step": 25305 }, { "epoch": 2.0500648088139988, "grad_norm": 0.05735756456851959, "learning_rate": 0.00010833070795265314, "loss": 0.2626, "step": 25306 }, { "epoch": 2.050145819831497, "grad_norm": 0.06780066341161728, "learning_rate": 0.00010832620730005851, "loss": 0.2818, "step": 25307 }, { "epoch": 2.0502268308489953, "grad_norm": 0.06374137103557587, "learning_rate": 0.00010832170664746389, "loss": 0.2399, "step": 25308 }, { "epoch": 2.050307841866494, "grad_norm": 0.06231587007641792, "learning_rate": 0.00010831720599486925, "loss": 0.2412, "step": 25309 }, { "epoch": 2.050388852883992, "grad_norm": 0.06339266896247864, "learning_rate": 0.00010831270534227465, "loss": 0.2633, "step": 25310 }, { "epoch": 2.0504698639014904, "grad_norm": 0.059382013976573944, "learning_rate": 0.00010830820468968002, "loss": 0.249, "step": 25311 }, { "epoch": 2.050550874918989, "grad_norm": 0.05026280879974365, "learning_rate": 0.00010830370403708538, "loss": 0.261, "step": 25312 }, { "epoch": 2.0506318859364874, "grad_norm": 0.05445680394768715, "learning_rate": 0.00010829920338449076, "loss": 0.2356, "step": 25313 }, { "epoch": 2.0507128969539856, "grad_norm": 0.047062940895557404, "learning_rate": 0.00010829470273189613, "loss": 0.2241, "step": 25314 }, { "epoch": 2.0507939079714843, "grad_norm": 0.06622743606567383, "learning_rate": 0.00010829020207930149, "loss": 0.2578, "step": 25315 }, { "epoch": 2.0508749189889826, "grad_norm": 0.055635739117860794, "learning_rate": 0.00010828570142670689, "loss": 0.2606, "step": 25316 }, { "epoch": 2.050955930006481, "grad_norm": 0.054714541882276535, "learning_rate": 0.00010828120077411226, "loss": 0.2839, "step": 25317 }, { "epoch": 2.051036941023979, "grad_norm": 0.056158315390348434, "learning_rate": 0.00010827670012151762, "loss": 0.2433, "step": 25318 }, { "epoch": 2.0511179520414777, "grad_norm": 0.04764556884765625, "learning_rate": 0.000108272199468923, "loss": 0.2449, "step": 25319 }, { "epoch": 2.051198963058976, "grad_norm": 0.061281781643629074, "learning_rate": 0.00010826769881632837, "loss": 0.3081, "step": 25320 }, { "epoch": 2.051279974076474, "grad_norm": 0.05917587876319885, "learning_rate": 0.00010826319816373373, "loss": 0.2426, "step": 25321 }, { "epoch": 2.051360985093973, "grad_norm": 0.05930584669113159, "learning_rate": 0.00010825869751113913, "loss": 0.2238, "step": 25322 }, { "epoch": 2.051441996111471, "grad_norm": 0.06175884231925011, "learning_rate": 0.0001082541968585445, "loss": 0.2367, "step": 25323 }, { "epoch": 2.0515230071289694, "grad_norm": 0.06388114392757416, "learning_rate": 0.00010824969620594986, "loss": 0.2432, "step": 25324 }, { "epoch": 2.051604018146468, "grad_norm": 0.05988280847668648, "learning_rate": 0.00010824519555335524, "loss": 0.2363, "step": 25325 }, { "epoch": 2.0516850291639663, "grad_norm": 0.07179353386163712, "learning_rate": 0.00010824069490076061, "loss": 0.2732, "step": 25326 }, { "epoch": 2.0517660401814646, "grad_norm": 0.07074914127588272, "learning_rate": 0.00010823619424816597, "loss": 0.212, "step": 25327 }, { "epoch": 2.0518470511989633, "grad_norm": 0.07107607275247574, "learning_rate": 0.00010823169359557137, "loss": 0.2568, "step": 25328 }, { "epoch": 2.0519280622164615, "grad_norm": 0.05105834826827049, "learning_rate": 0.00010822719294297675, "loss": 0.2546, "step": 25329 }, { "epoch": 2.0520090732339598, "grad_norm": 0.06509406864643097, "learning_rate": 0.0001082226922903821, "loss": 0.288, "step": 25330 }, { "epoch": 2.052090084251458, "grad_norm": 0.0698138028383255, "learning_rate": 0.00010821819163778748, "loss": 0.2601, "step": 25331 }, { "epoch": 2.0521710952689567, "grad_norm": 0.06088358536362648, "learning_rate": 0.00010821369098519285, "loss": 0.2543, "step": 25332 }, { "epoch": 2.052252106286455, "grad_norm": 0.06391238421201706, "learning_rate": 0.00010820919033259824, "loss": 0.2437, "step": 25333 }, { "epoch": 2.052333117303953, "grad_norm": 0.05070818215608597, "learning_rate": 0.00010820468968000361, "loss": 0.2322, "step": 25334 }, { "epoch": 2.052414128321452, "grad_norm": 0.06086651608347893, "learning_rate": 0.00010820018902740899, "loss": 0.2405, "step": 25335 }, { "epoch": 2.05249513933895, "grad_norm": 0.06370527297258377, "learning_rate": 0.00010819568837481435, "loss": 0.2719, "step": 25336 }, { "epoch": 2.0525761503564484, "grad_norm": 0.08593538403511047, "learning_rate": 0.00010819118772221972, "loss": 0.2623, "step": 25337 }, { "epoch": 2.052657161373947, "grad_norm": 0.06916448473930359, "learning_rate": 0.0001081866870696251, "loss": 0.3053, "step": 25338 }, { "epoch": 2.0527381723914453, "grad_norm": 0.06832996010780334, "learning_rate": 0.00010818218641703048, "loss": 0.2716, "step": 25339 }, { "epoch": 2.0528191834089435, "grad_norm": 0.052606839686632156, "learning_rate": 0.00010817768576443586, "loss": 0.2287, "step": 25340 }, { "epoch": 2.052900194426442, "grad_norm": 0.07673200219869614, "learning_rate": 0.00010817318511184123, "loss": 0.2445, "step": 25341 }, { "epoch": 2.0529812054439405, "grad_norm": 0.0654953345656395, "learning_rate": 0.00010816868445924659, "loss": 0.2664, "step": 25342 }, { "epoch": 2.0530622164614387, "grad_norm": 0.06384981423616409, "learning_rate": 0.00010816418380665196, "loss": 0.2609, "step": 25343 }, { "epoch": 2.053143227478937, "grad_norm": 0.0635632798075676, "learning_rate": 0.00010815968315405734, "loss": 0.2574, "step": 25344 }, { "epoch": 2.0532242384964356, "grad_norm": 0.06317467987537384, "learning_rate": 0.00010815518250146272, "loss": 0.2562, "step": 25345 }, { "epoch": 2.053305249513934, "grad_norm": 0.061597634106874466, "learning_rate": 0.0001081506818488681, "loss": 0.2435, "step": 25346 }, { "epoch": 2.053386260531432, "grad_norm": 0.06339189410209656, "learning_rate": 0.00010814618119627347, "loss": 0.2619, "step": 25347 }, { "epoch": 2.053467271548931, "grad_norm": 0.06991313397884369, "learning_rate": 0.00010814168054367883, "loss": 0.2435, "step": 25348 }, { "epoch": 2.053548282566429, "grad_norm": 0.06686621904373169, "learning_rate": 0.0001081371798910842, "loss": 0.274, "step": 25349 }, { "epoch": 2.0536292935839273, "grad_norm": 0.07232963293790817, "learning_rate": 0.00010813267923848958, "loss": 0.2884, "step": 25350 }, { "epoch": 2.053710304601426, "grad_norm": 0.05980679392814636, "learning_rate": 0.00010812817858589497, "loss": 0.2728, "step": 25351 }, { "epoch": 2.0537913156189243, "grad_norm": 0.0768938660621643, "learning_rate": 0.00010812367793330034, "loss": 0.3206, "step": 25352 }, { "epoch": 2.0538723266364225, "grad_norm": 0.051484767347574234, "learning_rate": 0.00010811917728070571, "loss": 0.2502, "step": 25353 }, { "epoch": 2.0539533376539207, "grad_norm": 0.04832427576184273, "learning_rate": 0.00010811467662811107, "loss": 0.2379, "step": 25354 }, { "epoch": 2.0540343486714194, "grad_norm": 0.0567401684820652, "learning_rate": 0.00010811017597551645, "loss": 0.282, "step": 25355 }, { "epoch": 2.0541153596889177, "grad_norm": 0.07354261726140976, "learning_rate": 0.00010810567532292182, "loss": 0.2407, "step": 25356 }, { "epoch": 2.054196370706416, "grad_norm": 0.06890858709812164, "learning_rate": 0.00010810117467032721, "loss": 0.2768, "step": 25357 }, { "epoch": 2.0542773817239146, "grad_norm": 0.06346815824508667, "learning_rate": 0.00010809667401773258, "loss": 0.2608, "step": 25358 }, { "epoch": 2.054358392741413, "grad_norm": 0.05928133428096771, "learning_rate": 0.00010809217336513795, "loss": 0.2616, "step": 25359 }, { "epoch": 2.054439403758911, "grad_norm": 0.05636308342218399, "learning_rate": 0.00010808767271254331, "loss": 0.2779, "step": 25360 }, { "epoch": 2.05452041477641, "grad_norm": 0.0737721249461174, "learning_rate": 0.00010808317205994869, "loss": 0.317, "step": 25361 }, { "epoch": 2.054601425793908, "grad_norm": 0.06959801912307739, "learning_rate": 0.00010807867140735408, "loss": 0.2139, "step": 25362 }, { "epoch": 2.0546824368114063, "grad_norm": 0.06847614049911499, "learning_rate": 0.00010807417075475945, "loss": 0.2532, "step": 25363 }, { "epoch": 2.0547634478289045, "grad_norm": 0.055373258888721466, "learning_rate": 0.00010806967010216482, "loss": 0.2381, "step": 25364 }, { "epoch": 2.054844458846403, "grad_norm": 0.05136518180370331, "learning_rate": 0.0001080651694495702, "loss": 0.2581, "step": 25365 }, { "epoch": 2.0549254698639015, "grad_norm": 0.0626882016658783, "learning_rate": 0.00010806066879697556, "loss": 0.2773, "step": 25366 }, { "epoch": 2.0550064808813997, "grad_norm": 0.051707156002521515, "learning_rate": 0.00010805616814438093, "loss": 0.2434, "step": 25367 }, { "epoch": 2.0550874918988984, "grad_norm": 0.06586476415395737, "learning_rate": 0.00010805166749178633, "loss": 0.2737, "step": 25368 }, { "epoch": 2.0551685029163966, "grad_norm": 0.06009049341082573, "learning_rate": 0.00010804716683919169, "loss": 0.2667, "step": 25369 }, { "epoch": 2.055249513933895, "grad_norm": 0.07964500039815903, "learning_rate": 0.00010804266618659706, "loss": 0.2736, "step": 25370 }, { "epoch": 2.0553305249513936, "grad_norm": 0.06829434633255005, "learning_rate": 0.00010803816553400244, "loss": 0.2558, "step": 25371 }, { "epoch": 2.055411535968892, "grad_norm": 0.0760689228773117, "learning_rate": 0.0001080336648814078, "loss": 0.2657, "step": 25372 }, { "epoch": 2.05549254698639, "grad_norm": 0.07334722578525543, "learning_rate": 0.00010802916422881317, "loss": 0.28, "step": 25373 }, { "epoch": 2.0555735580038887, "grad_norm": 0.05663280189037323, "learning_rate": 0.00010802466357621857, "loss": 0.2564, "step": 25374 }, { "epoch": 2.055654569021387, "grad_norm": 0.07035217434167862, "learning_rate": 0.00010802016292362393, "loss": 0.2701, "step": 25375 }, { "epoch": 2.0557355800388852, "grad_norm": 0.07113281637430191, "learning_rate": 0.0001080156622710293, "loss": 0.2341, "step": 25376 }, { "epoch": 2.0558165910563835, "grad_norm": 0.06113379821181297, "learning_rate": 0.00010801116161843468, "loss": 0.2442, "step": 25377 }, { "epoch": 2.055897602073882, "grad_norm": 0.06307367980480194, "learning_rate": 0.00010800666096584004, "loss": 0.293, "step": 25378 }, { "epoch": 2.0559786130913804, "grad_norm": 0.05947719141840935, "learning_rate": 0.00010800216031324541, "loss": 0.2455, "step": 25379 }, { "epoch": 2.0560596241088787, "grad_norm": 0.06589431315660477, "learning_rate": 0.00010799765966065081, "loss": 0.275, "step": 25380 }, { "epoch": 2.0561406351263773, "grad_norm": 0.061434660106897354, "learning_rate": 0.00010799315900805617, "loss": 0.2388, "step": 25381 }, { "epoch": 2.0562216461438756, "grad_norm": 0.06392492353916168, "learning_rate": 0.00010798865835546155, "loss": 0.2626, "step": 25382 }, { "epoch": 2.056302657161374, "grad_norm": 0.062192004173994064, "learning_rate": 0.00010798415770286692, "loss": 0.2075, "step": 25383 }, { "epoch": 2.0563836681788725, "grad_norm": 0.0811699777841568, "learning_rate": 0.00010797965705027228, "loss": 0.3011, "step": 25384 }, { "epoch": 2.0564646791963708, "grad_norm": 0.05931561812758446, "learning_rate": 0.00010797515639767768, "loss": 0.2761, "step": 25385 }, { "epoch": 2.056545690213869, "grad_norm": 0.06587688624858856, "learning_rate": 0.00010797065574508306, "loss": 0.2879, "step": 25386 }, { "epoch": 2.0566267012313673, "grad_norm": 0.05588224530220032, "learning_rate": 0.00010796615509248842, "loss": 0.2513, "step": 25387 }, { "epoch": 2.056707712248866, "grad_norm": 0.06199725717306137, "learning_rate": 0.00010796165443989379, "loss": 0.2752, "step": 25388 }, { "epoch": 2.056788723266364, "grad_norm": 0.04725608974695206, "learning_rate": 0.00010795715378729916, "loss": 0.2246, "step": 25389 }, { "epoch": 2.0568697342838624, "grad_norm": 0.06411755084991455, "learning_rate": 0.00010795265313470452, "loss": 0.2443, "step": 25390 }, { "epoch": 2.056950745301361, "grad_norm": 0.06652890145778656, "learning_rate": 0.00010794815248210992, "loss": 0.2282, "step": 25391 }, { "epoch": 2.0570317563188594, "grad_norm": 0.05945013090968132, "learning_rate": 0.0001079436518295153, "loss": 0.3013, "step": 25392 }, { "epoch": 2.0571127673363576, "grad_norm": 0.07318027317523956, "learning_rate": 0.00010793915117692066, "loss": 0.2679, "step": 25393 }, { "epoch": 2.0571937783538563, "grad_norm": 0.05756182223558426, "learning_rate": 0.00010793465052432603, "loss": 0.2471, "step": 25394 }, { "epoch": 2.0572747893713546, "grad_norm": 0.0655335783958435, "learning_rate": 0.0001079301498717314, "loss": 0.2832, "step": 25395 }, { "epoch": 2.057355800388853, "grad_norm": 0.07269290834665298, "learning_rate": 0.00010792564921913676, "loss": 0.2433, "step": 25396 }, { "epoch": 2.057436811406351, "grad_norm": 0.0656035766005516, "learning_rate": 0.00010792114856654216, "loss": 0.2351, "step": 25397 }, { "epoch": 2.0575178224238497, "grad_norm": 0.06749686598777771, "learning_rate": 0.00010791664791394754, "loss": 0.2705, "step": 25398 }, { "epoch": 2.057598833441348, "grad_norm": 0.06477170437574387, "learning_rate": 0.0001079121472613529, "loss": 0.2732, "step": 25399 }, { "epoch": 2.057679844458846, "grad_norm": 0.05468861386179924, "learning_rate": 0.00010790764660875827, "loss": 0.2596, "step": 25400 }, { "epoch": 2.057760855476345, "grad_norm": 0.059450436383485794, "learning_rate": 0.00010790314595616365, "loss": 0.2385, "step": 25401 }, { "epoch": 2.057841866493843, "grad_norm": 0.05819116160273552, "learning_rate": 0.00010789864530356902, "loss": 0.2732, "step": 25402 }, { "epoch": 2.0579228775113414, "grad_norm": 0.06588525325059891, "learning_rate": 0.0001078941446509744, "loss": 0.2763, "step": 25403 }, { "epoch": 2.05800388852884, "grad_norm": 0.059603944420814514, "learning_rate": 0.00010788964399837978, "loss": 0.2468, "step": 25404 }, { "epoch": 2.0580848995463383, "grad_norm": 0.0704275518655777, "learning_rate": 0.00010788514334578514, "loss": 0.2963, "step": 25405 }, { "epoch": 2.0581659105638366, "grad_norm": 0.053840700536966324, "learning_rate": 0.00010788064269319051, "loss": 0.2356, "step": 25406 }, { "epoch": 2.0582469215813353, "grad_norm": 0.05670013278722763, "learning_rate": 0.00010787614204059589, "loss": 0.2554, "step": 25407 }, { "epoch": 2.0583279325988335, "grad_norm": 0.05271613597869873, "learning_rate": 0.00010787164138800126, "loss": 0.2384, "step": 25408 }, { "epoch": 2.0584089436163318, "grad_norm": 0.05470237880945206, "learning_rate": 0.00010786714073540665, "loss": 0.2539, "step": 25409 }, { "epoch": 2.05848995463383, "grad_norm": 0.06058771535754204, "learning_rate": 0.00010786264008281202, "loss": 0.2643, "step": 25410 }, { "epoch": 2.0585709656513287, "grad_norm": 0.061354998499155045, "learning_rate": 0.00010785813943021738, "loss": 0.267, "step": 25411 }, { "epoch": 2.058651976668827, "grad_norm": 0.07212673872709274, "learning_rate": 0.00010785363877762276, "loss": 0.2658, "step": 25412 }, { "epoch": 2.058732987686325, "grad_norm": 0.05706482008099556, "learning_rate": 0.00010784913812502813, "loss": 0.2526, "step": 25413 }, { "epoch": 2.058813998703824, "grad_norm": 0.06004820019006729, "learning_rate": 0.00010784463747243352, "loss": 0.2527, "step": 25414 }, { "epoch": 2.058895009721322, "grad_norm": 0.06409589946269989, "learning_rate": 0.00010784013681983889, "loss": 0.2244, "step": 25415 }, { "epoch": 2.0589760207388204, "grad_norm": 0.05893712118268013, "learning_rate": 0.00010783563616724426, "loss": 0.2805, "step": 25416 }, { "epoch": 2.059057031756319, "grad_norm": 0.06911461800336838, "learning_rate": 0.00010783113551464962, "loss": 0.2239, "step": 25417 }, { "epoch": 2.0591380427738173, "grad_norm": 0.07301256060600281, "learning_rate": 0.000107826634862055, "loss": 0.2678, "step": 25418 }, { "epoch": 2.0592190537913155, "grad_norm": 0.06696818768978119, "learning_rate": 0.00010782213420946037, "loss": 0.2502, "step": 25419 }, { "epoch": 2.059300064808814, "grad_norm": 0.07134930789470673, "learning_rate": 0.00010781763355686576, "loss": 0.2564, "step": 25420 }, { "epoch": 2.0593810758263125, "grad_norm": 0.061927393078804016, "learning_rate": 0.00010781313290427113, "loss": 0.234, "step": 25421 }, { "epoch": 2.0594620868438107, "grad_norm": 0.05794256180524826, "learning_rate": 0.0001078086322516765, "loss": 0.2276, "step": 25422 }, { "epoch": 2.059543097861309, "grad_norm": 0.061672911047935486, "learning_rate": 0.00010780413159908186, "loss": 0.2486, "step": 25423 }, { "epoch": 2.0596241088788076, "grad_norm": 0.06663712114095688, "learning_rate": 0.00010779963094648724, "loss": 0.2562, "step": 25424 }, { "epoch": 2.059705119896306, "grad_norm": 0.06700772792100906, "learning_rate": 0.00010779513029389261, "loss": 0.2657, "step": 25425 }, { "epoch": 2.059786130913804, "grad_norm": 0.051759883761405945, "learning_rate": 0.000107790629641298, "loss": 0.2297, "step": 25426 }, { "epoch": 2.059867141931303, "grad_norm": 0.06241053715348244, "learning_rate": 0.00010778612898870337, "loss": 0.2642, "step": 25427 }, { "epoch": 2.059948152948801, "grad_norm": 0.05102437734603882, "learning_rate": 0.00010778162833610875, "loss": 0.2665, "step": 25428 }, { "epoch": 2.0600291639662993, "grad_norm": 0.08531544357538223, "learning_rate": 0.0001077771276835141, "loss": 0.2539, "step": 25429 }, { "epoch": 2.060110174983798, "grad_norm": 0.06146989390254021, "learning_rate": 0.00010777262703091948, "loss": 0.2673, "step": 25430 }, { "epoch": 2.0601911860012962, "grad_norm": 0.07017390429973602, "learning_rate": 0.00010776812637832485, "loss": 0.271, "step": 25431 }, { "epoch": 2.0602721970187945, "grad_norm": 0.056769225746393204, "learning_rate": 0.00010776362572573024, "loss": 0.2696, "step": 25432 }, { "epoch": 2.0603532080362927, "grad_norm": 0.061329200863838196, "learning_rate": 0.00010775912507313561, "loss": 0.3056, "step": 25433 }, { "epoch": 2.0604342190537914, "grad_norm": 0.05566471815109253, "learning_rate": 0.00010775462442054099, "loss": 0.236, "step": 25434 }, { "epoch": 2.0605152300712897, "grad_norm": 0.04820633307099342, "learning_rate": 0.00010775012376794635, "loss": 0.2328, "step": 25435 }, { "epoch": 2.060596241088788, "grad_norm": 0.06556723266839981, "learning_rate": 0.00010774562311535172, "loss": 0.26, "step": 25436 }, { "epoch": 2.0606772521062866, "grad_norm": 0.05982845649123192, "learning_rate": 0.00010774112246275712, "loss": 0.2371, "step": 25437 }, { "epoch": 2.060758263123785, "grad_norm": 0.0625552088022232, "learning_rate": 0.00010773662181016248, "loss": 0.2744, "step": 25438 }, { "epoch": 2.060839274141283, "grad_norm": 0.07399687170982361, "learning_rate": 0.00010773212115756786, "loss": 0.251, "step": 25439 }, { "epoch": 2.060920285158782, "grad_norm": 0.057092901319265366, "learning_rate": 0.00010772762050497323, "loss": 0.2365, "step": 25440 }, { "epoch": 2.06100129617628, "grad_norm": 0.05995013564825058, "learning_rate": 0.00010772311985237859, "loss": 0.2596, "step": 25441 }, { "epoch": 2.0610823071937783, "grad_norm": 0.08690828084945679, "learning_rate": 0.00010771861919978396, "loss": 0.286, "step": 25442 }, { "epoch": 2.0611633182112765, "grad_norm": 0.07108601182699203, "learning_rate": 0.00010771411854718936, "loss": 0.2676, "step": 25443 }, { "epoch": 2.061244329228775, "grad_norm": 0.07897786796092987, "learning_rate": 0.00010770961789459472, "loss": 0.2668, "step": 25444 }, { "epoch": 2.0613253402462735, "grad_norm": 0.05581003800034523, "learning_rate": 0.0001077051172420001, "loss": 0.2658, "step": 25445 }, { "epoch": 2.0614063512637717, "grad_norm": 0.05923276022076607, "learning_rate": 0.00010770061658940547, "loss": 0.2794, "step": 25446 }, { "epoch": 2.0614873622812704, "grad_norm": 0.057664401829242706, "learning_rate": 0.00010769611593681083, "loss": 0.2442, "step": 25447 }, { "epoch": 2.0615683732987686, "grad_norm": 0.060754094272851944, "learning_rate": 0.0001076916152842162, "loss": 0.2401, "step": 25448 }, { "epoch": 2.061649384316267, "grad_norm": 0.05699896439909935, "learning_rate": 0.0001076871146316216, "loss": 0.2309, "step": 25449 }, { "epoch": 2.0617303953337656, "grad_norm": 0.062410902231931686, "learning_rate": 0.00010768261397902697, "loss": 0.2446, "step": 25450 }, { "epoch": 2.061811406351264, "grad_norm": 0.08376600593328476, "learning_rate": 0.00010767811332643234, "loss": 0.2463, "step": 25451 }, { "epoch": 2.061892417368762, "grad_norm": 0.05989118665456772, "learning_rate": 0.00010767361267383771, "loss": 0.2559, "step": 25452 }, { "epoch": 2.0619734283862607, "grad_norm": 0.05913263559341431, "learning_rate": 0.00010766911202124307, "loss": 0.2597, "step": 25453 }, { "epoch": 2.062054439403759, "grad_norm": 0.0431220605969429, "learning_rate": 0.00010766461136864845, "loss": 0.1989, "step": 25454 }, { "epoch": 2.0621354504212572, "grad_norm": 0.06682056933641434, "learning_rate": 0.00010766011071605385, "loss": 0.2754, "step": 25455 }, { "epoch": 2.0622164614387555, "grad_norm": 0.07611300051212311, "learning_rate": 0.00010765561006345921, "loss": 0.2821, "step": 25456 }, { "epoch": 2.062297472456254, "grad_norm": 0.06988724321126938, "learning_rate": 0.00010765110941086458, "loss": 0.2711, "step": 25457 }, { "epoch": 2.0623784834737524, "grad_norm": 0.061311472207307816, "learning_rate": 0.00010764660875826995, "loss": 0.238, "step": 25458 }, { "epoch": 2.0624594944912507, "grad_norm": 0.05116477981209755, "learning_rate": 0.00010764210810567531, "loss": 0.247, "step": 25459 }, { "epoch": 2.0625405055087493, "grad_norm": 0.06642013043165207, "learning_rate": 0.00010763760745308069, "loss": 0.2614, "step": 25460 }, { "epoch": 2.0626215165262476, "grad_norm": 0.05731287598609924, "learning_rate": 0.00010763310680048609, "loss": 0.2508, "step": 25461 }, { "epoch": 2.062702527543746, "grad_norm": 0.06660475581884384, "learning_rate": 0.00010762860614789145, "loss": 0.249, "step": 25462 }, { "epoch": 2.0627835385612445, "grad_norm": 0.07254832983016968, "learning_rate": 0.00010762410549529682, "loss": 0.3014, "step": 25463 }, { "epoch": 2.0628645495787428, "grad_norm": 0.06310251355171204, "learning_rate": 0.0001076196048427022, "loss": 0.2488, "step": 25464 }, { "epoch": 2.062945560596241, "grad_norm": 0.06924467533826828, "learning_rate": 0.00010761510419010756, "loss": 0.2594, "step": 25465 }, { "epoch": 2.0630265716137393, "grad_norm": 0.06101961433887482, "learning_rate": 0.00010761060353751296, "loss": 0.2605, "step": 25466 }, { "epoch": 2.063107582631238, "grad_norm": 0.06226739287376404, "learning_rate": 0.00010760610288491833, "loss": 0.2286, "step": 25467 }, { "epoch": 2.063188593648736, "grad_norm": 0.05867013707756996, "learning_rate": 0.00010760160223232369, "loss": 0.2512, "step": 25468 }, { "epoch": 2.0632696046662344, "grad_norm": 0.05932064726948738, "learning_rate": 0.00010759710157972906, "loss": 0.2728, "step": 25469 }, { "epoch": 2.063350615683733, "grad_norm": 0.07166090607643127, "learning_rate": 0.00010759260092713444, "loss": 0.2621, "step": 25470 }, { "epoch": 2.0634316267012314, "grad_norm": 0.05555148422718048, "learning_rate": 0.00010758810027453981, "loss": 0.2402, "step": 25471 }, { "epoch": 2.0635126377187296, "grad_norm": 0.05288715660572052, "learning_rate": 0.0001075835996219452, "loss": 0.2352, "step": 25472 }, { "epoch": 2.0635936487362283, "grad_norm": 0.062381304800510406, "learning_rate": 0.00010757909896935057, "loss": 0.2925, "step": 25473 }, { "epoch": 2.0636746597537265, "grad_norm": 0.06400690227746964, "learning_rate": 0.00010757459831675593, "loss": 0.2709, "step": 25474 }, { "epoch": 2.063755670771225, "grad_norm": 0.06255996227264404, "learning_rate": 0.0001075700976641613, "loss": 0.2678, "step": 25475 }, { "epoch": 2.0638366817887235, "grad_norm": 0.059242524206638336, "learning_rate": 0.00010756559701156668, "loss": 0.2982, "step": 25476 }, { "epoch": 2.0639176928062217, "grad_norm": 0.06539078801870346, "learning_rate": 0.00010756109635897205, "loss": 0.2829, "step": 25477 }, { "epoch": 2.06399870382372, "grad_norm": 0.05289076268672943, "learning_rate": 0.00010755659570637744, "loss": 0.2096, "step": 25478 }, { "epoch": 2.064079714841218, "grad_norm": 0.07497277110815048, "learning_rate": 0.00010755209505378281, "loss": 0.263, "step": 25479 }, { "epoch": 2.064160725858717, "grad_norm": 0.0586920790374279, "learning_rate": 0.00010754759440118817, "loss": 0.2514, "step": 25480 }, { "epoch": 2.064241736876215, "grad_norm": 0.07065203785896301, "learning_rate": 0.00010754309374859355, "loss": 0.2484, "step": 25481 }, { "epoch": 2.0643227478937134, "grad_norm": 0.0774066299200058, "learning_rate": 0.00010753859309599892, "loss": 0.2964, "step": 25482 }, { "epoch": 2.064403758911212, "grad_norm": 0.06121392548084259, "learning_rate": 0.0001075340924434043, "loss": 0.2298, "step": 25483 }, { "epoch": 2.0644847699287103, "grad_norm": 0.08433547616004944, "learning_rate": 0.00010752959179080968, "loss": 0.3256, "step": 25484 }, { "epoch": 2.0645657809462086, "grad_norm": 0.06554676592350006, "learning_rate": 0.00010752509113821506, "loss": 0.2843, "step": 25485 }, { "epoch": 2.0646467919637073, "grad_norm": 0.0544719398021698, "learning_rate": 0.00010752059048562042, "loss": 0.2767, "step": 25486 }, { "epoch": 2.0647278029812055, "grad_norm": 0.06069955229759216, "learning_rate": 0.00010751608983302579, "loss": 0.2681, "step": 25487 }, { "epoch": 2.0648088139987038, "grad_norm": 0.0706348568201065, "learning_rate": 0.00010751158918043116, "loss": 0.2734, "step": 25488 }, { "epoch": 2.064889825016202, "grad_norm": 0.0692746564745903, "learning_rate": 0.00010750708852783655, "loss": 0.2547, "step": 25489 }, { "epoch": 2.0649708360337007, "grad_norm": 0.06689253449440002, "learning_rate": 0.00010750258787524192, "loss": 0.2882, "step": 25490 }, { "epoch": 2.065051847051199, "grad_norm": 0.0553397610783577, "learning_rate": 0.0001074980872226473, "loss": 0.2507, "step": 25491 }, { "epoch": 2.065132858068697, "grad_norm": 0.07342147827148438, "learning_rate": 0.00010749358657005266, "loss": 0.2894, "step": 25492 }, { "epoch": 2.065213869086196, "grad_norm": 0.06011483445763588, "learning_rate": 0.00010748908591745803, "loss": 0.2383, "step": 25493 }, { "epoch": 2.065294880103694, "grad_norm": 0.06471290439367294, "learning_rate": 0.0001074845852648634, "loss": 0.2363, "step": 25494 }, { "epoch": 2.0653758911211924, "grad_norm": 0.058766886591911316, "learning_rate": 0.00010748008461226879, "loss": 0.2708, "step": 25495 }, { "epoch": 2.065456902138691, "grad_norm": 0.04799140617251396, "learning_rate": 0.00010747558395967417, "loss": 0.2145, "step": 25496 }, { "epoch": 2.0655379131561893, "grad_norm": 0.05512750893831253, "learning_rate": 0.00010747108330707954, "loss": 0.2369, "step": 25497 }, { "epoch": 2.0656189241736875, "grad_norm": 0.0533377043902874, "learning_rate": 0.0001074665826544849, "loss": 0.2161, "step": 25498 }, { "epoch": 2.065699935191186, "grad_norm": 0.06390156596899033, "learning_rate": 0.00010746208200189027, "loss": 0.2746, "step": 25499 }, { "epoch": 2.0657809462086845, "grad_norm": 0.052978046238422394, "learning_rate": 0.00010745758134929565, "loss": 0.2455, "step": 25500 }, { "epoch": 2.0658619572261827, "grad_norm": 0.059611186385154724, "learning_rate": 0.00010745308069670103, "loss": 0.2146, "step": 25501 }, { "epoch": 2.065942968243681, "grad_norm": 0.07775352895259857, "learning_rate": 0.0001074485800441064, "loss": 0.2481, "step": 25502 }, { "epoch": 2.0660239792611796, "grad_norm": 0.05068066716194153, "learning_rate": 0.00010744407939151178, "loss": 0.2753, "step": 25503 }, { "epoch": 2.066104990278678, "grad_norm": 0.0600021667778492, "learning_rate": 0.00010743957873891714, "loss": 0.2884, "step": 25504 }, { "epoch": 2.066186001296176, "grad_norm": 0.05596313253045082, "learning_rate": 0.00010743507808632251, "loss": 0.2536, "step": 25505 }, { "epoch": 2.066267012313675, "grad_norm": 0.05262834578752518, "learning_rate": 0.00010743057743372789, "loss": 0.2505, "step": 25506 }, { "epoch": 2.066348023331173, "grad_norm": 0.059846874326467514, "learning_rate": 0.00010742607678113327, "loss": 0.2275, "step": 25507 }, { "epoch": 2.0664290343486713, "grad_norm": 0.05899396911263466, "learning_rate": 0.00010742157612853865, "loss": 0.2734, "step": 25508 }, { "epoch": 2.06651004536617, "grad_norm": 0.05113719776272774, "learning_rate": 0.00010741707547594402, "loss": 0.2496, "step": 25509 }, { "epoch": 2.0665910563836682, "grad_norm": 0.05043940991163254, "learning_rate": 0.00010741257482334938, "loss": 0.2103, "step": 25510 }, { "epoch": 2.0666720674011665, "grad_norm": 0.06341768801212311, "learning_rate": 0.00010740807417075476, "loss": 0.2821, "step": 25511 }, { "epoch": 2.0667530784186647, "grad_norm": 0.056847795844078064, "learning_rate": 0.00010740357351816013, "loss": 0.229, "step": 25512 }, { "epoch": 2.0668340894361634, "grad_norm": 0.07382318377494812, "learning_rate": 0.00010739907286556552, "loss": 0.2489, "step": 25513 }, { "epoch": 2.0669151004536617, "grad_norm": 0.07611710578203201, "learning_rate": 0.00010739457221297089, "loss": 0.2719, "step": 25514 }, { "epoch": 2.06699611147116, "grad_norm": 0.060138873755931854, "learning_rate": 0.00010739007156037626, "loss": 0.2736, "step": 25515 }, { "epoch": 2.0670771224886586, "grad_norm": 0.06036220118403435, "learning_rate": 0.00010738557090778162, "loss": 0.2438, "step": 25516 }, { "epoch": 2.067158133506157, "grad_norm": 0.06158443167805672, "learning_rate": 0.000107381070255187, "loss": 0.2693, "step": 25517 }, { "epoch": 2.067239144523655, "grad_norm": 0.07605554163455963, "learning_rate": 0.0001073765696025924, "loss": 0.2527, "step": 25518 }, { "epoch": 2.067320155541154, "grad_norm": 0.07047504931688309, "learning_rate": 0.00010737206894999776, "loss": 0.2989, "step": 25519 }, { "epoch": 2.067401166558652, "grad_norm": 0.06754288822412491, "learning_rate": 0.00010736756829740313, "loss": 0.2966, "step": 25520 }, { "epoch": 2.0674821775761503, "grad_norm": 0.05999528989195824, "learning_rate": 0.0001073630676448085, "loss": 0.248, "step": 25521 }, { "epoch": 2.067563188593649, "grad_norm": 0.059076789766550064, "learning_rate": 0.00010735856699221387, "loss": 0.2683, "step": 25522 }, { "epoch": 2.067644199611147, "grad_norm": 0.058501314371824265, "learning_rate": 0.00010735406633961924, "loss": 0.2538, "step": 25523 }, { "epoch": 2.0677252106286454, "grad_norm": 0.06582152098417282, "learning_rate": 0.00010734956568702464, "loss": 0.2636, "step": 25524 }, { "epoch": 2.0678062216461437, "grad_norm": 0.05614101141691208, "learning_rate": 0.00010734506503443, "loss": 0.2706, "step": 25525 }, { "epoch": 2.0678872326636424, "grad_norm": 0.05233749374747276, "learning_rate": 0.00010734056438183537, "loss": 0.2506, "step": 25526 }, { "epoch": 2.0679682436811406, "grad_norm": 0.058415673673152924, "learning_rate": 0.00010733606372924075, "loss": 0.2374, "step": 25527 }, { "epoch": 2.068049254698639, "grad_norm": 0.05963871255517006, "learning_rate": 0.00010733156307664611, "loss": 0.2552, "step": 25528 }, { "epoch": 2.0681302657161376, "grad_norm": 0.05692227929830551, "learning_rate": 0.00010732706242405148, "loss": 0.2423, "step": 25529 }, { "epoch": 2.068211276733636, "grad_norm": 0.06456737965345383, "learning_rate": 0.00010732256177145688, "loss": 0.273, "step": 25530 }, { "epoch": 2.068292287751134, "grad_norm": 0.0603487491607666, "learning_rate": 0.00010731806111886224, "loss": 0.2559, "step": 25531 }, { "epoch": 2.0683732987686327, "grad_norm": 0.07074466347694397, "learning_rate": 0.00010731356046626761, "loss": 0.2903, "step": 25532 }, { "epoch": 2.068454309786131, "grad_norm": 0.07579421997070312, "learning_rate": 0.00010730905981367299, "loss": 0.2499, "step": 25533 }, { "epoch": 2.0685353208036292, "grad_norm": 0.06210765987634659, "learning_rate": 0.00010730455916107835, "loss": 0.2876, "step": 25534 }, { "epoch": 2.0686163318211275, "grad_norm": 0.050779640674591064, "learning_rate": 0.00010730005850848372, "loss": 0.2516, "step": 25535 }, { "epoch": 2.068697342838626, "grad_norm": 0.05742275342345238, "learning_rate": 0.00010729555785588912, "loss": 0.2579, "step": 25536 }, { "epoch": 2.0687783538561244, "grad_norm": 0.06202203035354614, "learning_rate": 0.00010729105720329448, "loss": 0.256, "step": 25537 }, { "epoch": 2.0688593648736227, "grad_norm": 0.07720314711332321, "learning_rate": 0.00010728655655069986, "loss": 0.2485, "step": 25538 }, { "epoch": 2.0689403758911213, "grad_norm": 0.06743809580802917, "learning_rate": 0.00010728205589810523, "loss": 0.2238, "step": 25539 }, { "epoch": 2.0690213869086196, "grad_norm": 0.06037648394703865, "learning_rate": 0.0001072775552455106, "loss": 0.2561, "step": 25540 }, { "epoch": 2.069102397926118, "grad_norm": 0.0584062859416008, "learning_rate": 0.00010727305459291596, "loss": 0.2945, "step": 25541 }, { "epoch": 2.0691834089436165, "grad_norm": 0.06143295392394066, "learning_rate": 0.00010726855394032136, "loss": 0.2384, "step": 25542 }, { "epoch": 2.0692644199611148, "grad_norm": 0.06712021678686142, "learning_rate": 0.00010726405328772672, "loss": 0.2639, "step": 25543 }, { "epoch": 2.069345430978613, "grad_norm": 0.05841357633471489, "learning_rate": 0.0001072595526351321, "loss": 0.2739, "step": 25544 }, { "epoch": 2.0694264419961117, "grad_norm": 0.06780307739973068, "learning_rate": 0.00010725505198253747, "loss": 0.2708, "step": 25545 }, { "epoch": 2.06950745301361, "grad_norm": 0.07603610306978226, "learning_rate": 0.00010725055132994285, "loss": 0.3202, "step": 25546 }, { "epoch": 2.069588464031108, "grad_norm": 0.07834131270647049, "learning_rate": 0.00010724605067734823, "loss": 0.302, "step": 25547 }, { "epoch": 2.0696694750486064, "grad_norm": 0.06293410062789917, "learning_rate": 0.0001072415500247536, "loss": 0.2331, "step": 25548 }, { "epoch": 2.069750486066105, "grad_norm": 0.05901750549674034, "learning_rate": 0.00010723704937215897, "loss": 0.2451, "step": 25549 }, { "epoch": 2.0698314970836034, "grad_norm": 0.061935652047395706, "learning_rate": 0.00010723254871956434, "loss": 0.2806, "step": 25550 }, { "epoch": 2.0699125081011016, "grad_norm": 0.05839787423610687, "learning_rate": 0.00010722804806696971, "loss": 0.308, "step": 25551 }, { "epoch": 2.0699935191186003, "grad_norm": 0.061520643532276154, "learning_rate": 0.00010722354741437509, "loss": 0.2504, "step": 25552 }, { "epoch": 2.0700745301360985, "grad_norm": 0.06616219133138657, "learning_rate": 0.00010721904676178047, "loss": 0.3015, "step": 25553 }, { "epoch": 2.070155541153597, "grad_norm": 0.0596001073718071, "learning_rate": 0.00010721454610918585, "loss": 0.2737, "step": 25554 }, { "epoch": 2.0702365521710955, "grad_norm": 0.052469369024038315, "learning_rate": 0.00010721004545659121, "loss": 0.2847, "step": 25555 }, { "epoch": 2.0703175631885937, "grad_norm": 0.05862971395254135, "learning_rate": 0.00010720554480399658, "loss": 0.2468, "step": 25556 }, { "epoch": 2.070398574206092, "grad_norm": 0.05308549106121063, "learning_rate": 0.00010720104415140195, "loss": 0.2428, "step": 25557 }, { "epoch": 2.07047958522359, "grad_norm": 0.04823100194334984, "learning_rate": 0.00010719654349880733, "loss": 0.2398, "step": 25558 }, { "epoch": 2.070560596241089, "grad_norm": 0.04787187650799751, "learning_rate": 0.00010719204284621272, "loss": 0.2616, "step": 25559 }, { "epoch": 2.070641607258587, "grad_norm": 0.06318645179271698, "learning_rate": 0.00010718754219361809, "loss": 0.2735, "step": 25560 }, { "epoch": 2.0707226182760854, "grad_norm": 0.060162078589200974, "learning_rate": 0.00010718304154102345, "loss": 0.2522, "step": 25561 }, { "epoch": 2.070803629293584, "grad_norm": 0.06665724515914917, "learning_rate": 0.00010717854088842882, "loss": 0.2239, "step": 25562 }, { "epoch": 2.0708846403110823, "grad_norm": 0.06121491640806198, "learning_rate": 0.0001071740402358342, "loss": 0.2561, "step": 25563 }, { "epoch": 2.0709656513285806, "grad_norm": 0.06794225424528122, "learning_rate": 0.00010716953958323957, "loss": 0.2715, "step": 25564 }, { "epoch": 2.0710466623460793, "grad_norm": 0.05323499068617821, "learning_rate": 0.00010716503893064496, "loss": 0.269, "step": 25565 }, { "epoch": 2.0711276733635775, "grad_norm": 0.06246719881892204, "learning_rate": 0.00010716053827805033, "loss": 0.2422, "step": 25566 }, { "epoch": 2.0712086843810757, "grad_norm": 0.05139967426657677, "learning_rate": 0.00010715603762545569, "loss": 0.2417, "step": 25567 }, { "epoch": 2.071289695398574, "grad_norm": 0.06880932301282883, "learning_rate": 0.00010715153697286106, "loss": 0.2313, "step": 25568 }, { "epoch": 2.0713707064160727, "grad_norm": 0.06802390515804291, "learning_rate": 0.00010714703632026644, "loss": 0.2592, "step": 25569 }, { "epoch": 2.071451717433571, "grad_norm": 0.060402750968933105, "learning_rate": 0.00010714253566767183, "loss": 0.2621, "step": 25570 }, { "epoch": 2.071532728451069, "grad_norm": 0.05299397557973862, "learning_rate": 0.0001071380350150772, "loss": 0.2308, "step": 25571 }, { "epoch": 2.071613739468568, "grad_norm": 0.05250426381826401, "learning_rate": 0.00010713353436248257, "loss": 0.245, "step": 25572 }, { "epoch": 2.071694750486066, "grad_norm": 0.06468871980905533, "learning_rate": 0.00010712903370988793, "loss": 0.2577, "step": 25573 }, { "epoch": 2.0717757615035644, "grad_norm": 0.05726084113121033, "learning_rate": 0.0001071245330572933, "loss": 0.2585, "step": 25574 }, { "epoch": 2.071856772521063, "grad_norm": 0.06499053537845612, "learning_rate": 0.00010712003240469868, "loss": 0.2271, "step": 25575 }, { "epoch": 2.0719377835385613, "grad_norm": 0.05974118039011955, "learning_rate": 0.00010711553175210407, "loss": 0.2672, "step": 25576 }, { "epoch": 2.0720187945560595, "grad_norm": 0.07319014519453049, "learning_rate": 0.00010711103109950944, "loss": 0.2568, "step": 25577 }, { "epoch": 2.072099805573558, "grad_norm": 0.056026265025138855, "learning_rate": 0.00010710653044691481, "loss": 0.2507, "step": 25578 }, { "epoch": 2.0721808165910565, "grad_norm": 0.06923699378967285, "learning_rate": 0.00010710202979432017, "loss": 0.2544, "step": 25579 }, { "epoch": 2.0722618276085547, "grad_norm": 0.0588361993432045, "learning_rate": 0.00010709752914172555, "loss": 0.2436, "step": 25580 }, { "epoch": 2.072342838626053, "grad_norm": 0.06266608089208603, "learning_rate": 0.00010709302848913092, "loss": 0.2777, "step": 25581 }, { "epoch": 2.0724238496435516, "grad_norm": 0.050658125430345535, "learning_rate": 0.00010708852783653631, "loss": 0.2363, "step": 25582 }, { "epoch": 2.07250486066105, "grad_norm": 0.05195537209510803, "learning_rate": 0.00010708402718394168, "loss": 0.2793, "step": 25583 }, { "epoch": 2.072585871678548, "grad_norm": 0.07279521226882935, "learning_rate": 0.00010707952653134706, "loss": 0.2929, "step": 25584 }, { "epoch": 2.072666882696047, "grad_norm": 0.06828270107507706, "learning_rate": 0.00010707502587875242, "loss": 0.2234, "step": 25585 }, { "epoch": 2.072747893713545, "grad_norm": 0.06727327406406403, "learning_rate": 0.00010707052522615779, "loss": 0.2902, "step": 25586 }, { "epoch": 2.0728289047310433, "grad_norm": 0.05688133463263512, "learning_rate": 0.00010706602457356316, "loss": 0.2401, "step": 25587 }, { "epoch": 2.072909915748542, "grad_norm": 0.0660371407866478, "learning_rate": 0.00010706152392096855, "loss": 0.2907, "step": 25588 }, { "epoch": 2.0729909267660402, "grad_norm": 0.06644482910633087, "learning_rate": 0.00010705702326837392, "loss": 0.2801, "step": 25589 }, { "epoch": 2.0730719377835385, "grad_norm": 0.06793033331632614, "learning_rate": 0.0001070525226157793, "loss": 0.2611, "step": 25590 }, { "epoch": 2.0731529488010367, "grad_norm": 0.05119938403367996, "learning_rate": 0.00010704802196318466, "loss": 0.2204, "step": 25591 }, { "epoch": 2.0732339598185354, "grad_norm": 0.06441966444253922, "learning_rate": 0.00010704352131059003, "loss": 0.2236, "step": 25592 }, { "epoch": 2.0733149708360337, "grad_norm": 0.0572500079870224, "learning_rate": 0.0001070390206579954, "loss": 0.2268, "step": 25593 }, { "epoch": 2.073395981853532, "grad_norm": 0.08293695002794266, "learning_rate": 0.00010703452000540079, "loss": 0.2815, "step": 25594 }, { "epoch": 2.0734769928710306, "grad_norm": 0.0702480673789978, "learning_rate": 0.00010703001935280617, "loss": 0.302, "step": 25595 }, { "epoch": 2.073558003888529, "grad_norm": 0.058082662522792816, "learning_rate": 0.00010702551870021154, "loss": 0.2337, "step": 25596 }, { "epoch": 2.073639014906027, "grad_norm": 0.06347770988941193, "learning_rate": 0.0001070210180476169, "loss": 0.2441, "step": 25597 }, { "epoch": 2.073720025923526, "grad_norm": 0.059085577726364136, "learning_rate": 0.00010701651739502227, "loss": 0.2552, "step": 25598 }, { "epoch": 2.073801036941024, "grad_norm": 0.0577191598713398, "learning_rate": 0.00010701201674242767, "loss": 0.2575, "step": 25599 }, { "epoch": 2.0738820479585223, "grad_norm": 0.054545432329177856, "learning_rate": 0.00010700751608983303, "loss": 0.2327, "step": 25600 }, { "epoch": 2.0739630589760205, "grad_norm": 0.05656864121556282, "learning_rate": 0.00010700301543723841, "loss": 0.2511, "step": 25601 }, { "epoch": 2.074044069993519, "grad_norm": 0.05607932060956955, "learning_rate": 0.00010699851478464378, "loss": 0.2355, "step": 25602 }, { "epoch": 2.0741250810110174, "grad_norm": 0.07496161758899689, "learning_rate": 0.00010699401413204914, "loss": 0.3078, "step": 25603 }, { "epoch": 2.0742060920285157, "grad_norm": 0.06039506942033768, "learning_rate": 0.00010698951347945451, "loss": 0.2421, "step": 25604 }, { "epoch": 2.0742871030460144, "grad_norm": 0.06327684223651886, "learning_rate": 0.00010698501282685991, "loss": 0.2545, "step": 25605 }, { "epoch": 2.0743681140635126, "grad_norm": 0.06102275848388672, "learning_rate": 0.00010698051217426527, "loss": 0.2782, "step": 25606 }, { "epoch": 2.074449125081011, "grad_norm": 0.06765256077051163, "learning_rate": 0.00010697601152167065, "loss": 0.2567, "step": 25607 }, { "epoch": 2.0745301360985096, "grad_norm": 0.0560624822974205, "learning_rate": 0.00010697151086907602, "loss": 0.2401, "step": 25608 }, { "epoch": 2.074611147116008, "grad_norm": 0.05952421948313713, "learning_rate": 0.0001069670102164814, "loss": 0.2765, "step": 25609 }, { "epoch": 2.074692158133506, "grad_norm": 0.07524368911981583, "learning_rate": 0.00010696250956388676, "loss": 0.286, "step": 25610 }, { "epoch": 2.0747731691510047, "grad_norm": 0.0632515475153923, "learning_rate": 0.00010695800891129216, "loss": 0.319, "step": 25611 }, { "epoch": 2.074854180168503, "grad_norm": 0.053480733186006546, "learning_rate": 0.00010695350825869752, "loss": 0.245, "step": 25612 }, { "epoch": 2.0749351911860012, "grad_norm": 0.07301179319620132, "learning_rate": 0.00010694900760610289, "loss": 0.2887, "step": 25613 }, { "epoch": 2.0750162022034995, "grad_norm": 0.05944663658738136, "learning_rate": 0.00010694450695350826, "loss": 0.3042, "step": 25614 }, { "epoch": 2.075097213220998, "grad_norm": 0.06371688097715378, "learning_rate": 0.00010694000630091364, "loss": 0.2952, "step": 25615 }, { "epoch": 2.0751782242384964, "grad_norm": 0.05884646624326706, "learning_rate": 0.000106935505648319, "loss": 0.2649, "step": 25616 }, { "epoch": 2.0752592352559946, "grad_norm": 0.057729847729206085, "learning_rate": 0.0001069310049957244, "loss": 0.2633, "step": 25617 }, { "epoch": 2.0753402462734933, "grad_norm": 0.06070605665445328, "learning_rate": 0.00010692650434312976, "loss": 0.2183, "step": 25618 }, { "epoch": 2.0754212572909916, "grad_norm": 0.04957975447177887, "learning_rate": 0.00010692200369053513, "loss": 0.2647, "step": 25619 }, { "epoch": 2.07550226830849, "grad_norm": 0.05769617483019829, "learning_rate": 0.0001069175030379405, "loss": 0.2364, "step": 25620 }, { "epoch": 2.0755832793259885, "grad_norm": 0.06463346630334854, "learning_rate": 0.00010691300238534588, "loss": 0.2877, "step": 25621 }, { "epoch": 2.0756642903434868, "grad_norm": 0.05660693347454071, "learning_rate": 0.00010690850173275127, "loss": 0.2918, "step": 25622 }, { "epoch": 2.075745301360985, "grad_norm": 0.06576941907405853, "learning_rate": 0.00010690400108015664, "loss": 0.2448, "step": 25623 }, { "epoch": 2.0758263123784833, "grad_norm": 0.06314044445753098, "learning_rate": 0.000106899500427562, "loss": 0.2815, "step": 25624 }, { "epoch": 2.075907323395982, "grad_norm": 0.06122468411922455, "learning_rate": 0.00010689499977496737, "loss": 0.2712, "step": 25625 }, { "epoch": 2.07598833441348, "grad_norm": 0.06045297533273697, "learning_rate": 0.00010689049912237275, "loss": 0.2386, "step": 25626 }, { "epoch": 2.0760693454309784, "grad_norm": 0.06016145274043083, "learning_rate": 0.00010688599846977812, "loss": 0.2585, "step": 25627 }, { "epoch": 2.076150356448477, "grad_norm": 0.07210994511842728, "learning_rate": 0.00010688149781718351, "loss": 0.2581, "step": 25628 }, { "epoch": 2.0762313674659754, "grad_norm": 0.06515554338693619, "learning_rate": 0.00010687699716458888, "loss": 0.2634, "step": 25629 }, { "epoch": 2.0763123784834736, "grad_norm": 0.08918245881795883, "learning_rate": 0.00010687249651199424, "loss": 0.3076, "step": 25630 }, { "epoch": 2.0763933895009723, "grad_norm": 0.06386683881282806, "learning_rate": 0.00010686799585939962, "loss": 0.2638, "step": 25631 }, { "epoch": 2.0764744005184705, "grad_norm": 0.050748348236083984, "learning_rate": 0.00010686349520680499, "loss": 0.2224, "step": 25632 }, { "epoch": 2.076555411535969, "grad_norm": 0.07010557502508163, "learning_rate": 0.00010685899455421036, "loss": 0.2524, "step": 25633 }, { "epoch": 2.0766364225534675, "grad_norm": 0.05773705244064331, "learning_rate": 0.00010685449390161575, "loss": 0.2583, "step": 25634 }, { "epoch": 2.0767174335709657, "grad_norm": 0.06482109427452087, "learning_rate": 0.00010684999324902112, "loss": 0.2794, "step": 25635 }, { "epoch": 2.076798444588464, "grad_norm": 0.07043759524822235, "learning_rate": 0.00010684549259642648, "loss": 0.2956, "step": 25636 }, { "epoch": 2.076879455605962, "grad_norm": 0.06465146690607071, "learning_rate": 0.00010684099194383186, "loss": 0.2775, "step": 25637 }, { "epoch": 2.076960466623461, "grad_norm": 0.06037595868110657, "learning_rate": 0.00010683649129123723, "loss": 0.2441, "step": 25638 }, { "epoch": 2.077041477640959, "grad_norm": 0.08674988150596619, "learning_rate": 0.0001068319906386426, "loss": 0.3322, "step": 25639 }, { "epoch": 2.0771224886584574, "grad_norm": 0.05551932752132416, "learning_rate": 0.00010682748998604799, "loss": 0.261, "step": 25640 }, { "epoch": 2.077203499675956, "grad_norm": 0.07131566852331161, "learning_rate": 0.00010682298933345336, "loss": 0.232, "step": 25641 }, { "epoch": 2.0772845106934543, "grad_norm": 0.05972439795732498, "learning_rate": 0.00010681848868085872, "loss": 0.2835, "step": 25642 }, { "epoch": 2.0773655217109526, "grad_norm": 0.05565524473786354, "learning_rate": 0.0001068139880282641, "loss": 0.23, "step": 25643 }, { "epoch": 2.0774465327284513, "grad_norm": 0.05611024051904678, "learning_rate": 0.00010680948737566947, "loss": 0.3103, "step": 25644 }, { "epoch": 2.0775275437459495, "grad_norm": 0.07952877879142761, "learning_rate": 0.00010680498672307485, "loss": 0.2595, "step": 25645 }, { "epoch": 2.0776085547634477, "grad_norm": 0.06529795378446579, "learning_rate": 0.00010680048607048023, "loss": 0.2697, "step": 25646 }, { "epoch": 2.077689565780946, "grad_norm": 0.06786473840475082, "learning_rate": 0.0001067959854178856, "loss": 0.2843, "step": 25647 }, { "epoch": 2.0777705767984447, "grad_norm": 0.06259346753358841, "learning_rate": 0.00010679148476529097, "loss": 0.2382, "step": 25648 }, { "epoch": 2.077851587815943, "grad_norm": 0.057985417544841766, "learning_rate": 0.00010678698411269634, "loss": 0.2807, "step": 25649 }, { "epoch": 2.077932598833441, "grad_norm": 0.048762246966362, "learning_rate": 0.00010678248346010171, "loss": 0.2399, "step": 25650 }, { "epoch": 2.07801360985094, "grad_norm": 0.05488719791173935, "learning_rate": 0.0001067779828075071, "loss": 0.2443, "step": 25651 }, { "epoch": 2.078094620868438, "grad_norm": 0.05270203948020935, "learning_rate": 0.00010677348215491247, "loss": 0.226, "step": 25652 }, { "epoch": 2.0781756318859363, "grad_norm": 0.07405373454093933, "learning_rate": 0.00010676898150231785, "loss": 0.2567, "step": 25653 }, { "epoch": 2.078256642903435, "grad_norm": 0.07551823556423187, "learning_rate": 0.00010676448084972321, "loss": 0.2704, "step": 25654 }, { "epoch": 2.0783376539209333, "grad_norm": 0.05943058803677559, "learning_rate": 0.00010675998019712858, "loss": 0.2553, "step": 25655 }, { "epoch": 2.0784186649384315, "grad_norm": 0.05561874434351921, "learning_rate": 0.00010675547954453396, "loss": 0.2388, "step": 25656 }, { "epoch": 2.07849967595593, "grad_norm": 0.04733414202928543, "learning_rate": 0.00010675097889193934, "loss": 0.2326, "step": 25657 }, { "epoch": 2.0785806869734285, "grad_norm": 0.07790904492139816, "learning_rate": 0.00010674647823934472, "loss": 0.279, "step": 25658 }, { "epoch": 2.0786616979909267, "grad_norm": 0.06400874257087708, "learning_rate": 0.00010674197758675009, "loss": 0.2887, "step": 25659 }, { "epoch": 2.078742709008425, "grad_norm": 0.06504980474710464, "learning_rate": 0.00010673747693415545, "loss": 0.264, "step": 25660 }, { "epoch": 2.0788237200259236, "grad_norm": 0.06921865046024323, "learning_rate": 0.00010673297628156082, "loss": 0.2837, "step": 25661 }, { "epoch": 2.078904731043422, "grad_norm": 0.057739365845918655, "learning_rate": 0.0001067284756289662, "loss": 0.2548, "step": 25662 }, { "epoch": 2.07898574206092, "grad_norm": 0.058383893221616745, "learning_rate": 0.00010672397497637158, "loss": 0.2579, "step": 25663 }, { "epoch": 2.079066753078419, "grad_norm": 0.06038293242454529, "learning_rate": 0.00010671947432377696, "loss": 0.2434, "step": 25664 }, { "epoch": 2.079147764095917, "grad_norm": 0.0538647286593914, "learning_rate": 0.00010671497367118233, "loss": 0.2383, "step": 25665 }, { "epoch": 2.0792287751134153, "grad_norm": 0.05161396414041519, "learning_rate": 0.00010671047301858769, "loss": 0.2746, "step": 25666 }, { "epoch": 2.079309786130914, "grad_norm": 0.06023260951042175, "learning_rate": 0.00010670597236599306, "loss": 0.2567, "step": 25667 }, { "epoch": 2.0793907971484122, "grad_norm": 0.05432547256350517, "learning_rate": 0.00010670147171339844, "loss": 0.227, "step": 25668 }, { "epoch": 2.0794718081659105, "grad_norm": 0.05337492749094963, "learning_rate": 0.00010669697106080383, "loss": 0.2281, "step": 25669 }, { "epoch": 2.0795528191834087, "grad_norm": 0.06227840110659599, "learning_rate": 0.0001066924704082092, "loss": 0.225, "step": 25670 }, { "epoch": 2.0796338302009074, "grad_norm": 0.0642867460846901, "learning_rate": 0.00010668796975561457, "loss": 0.2542, "step": 25671 }, { "epoch": 2.0797148412184057, "grad_norm": 0.07576468586921692, "learning_rate": 0.00010668346910301993, "loss": 0.3283, "step": 25672 }, { "epoch": 2.079795852235904, "grad_norm": 0.05812416598200798, "learning_rate": 0.0001066789684504253, "loss": 0.2565, "step": 25673 }, { "epoch": 2.0798768632534026, "grad_norm": 0.05353740602731705, "learning_rate": 0.00010667446779783071, "loss": 0.2655, "step": 25674 }, { "epoch": 2.079957874270901, "grad_norm": 0.052923765033483505, "learning_rate": 0.00010666996714523607, "loss": 0.2334, "step": 25675 }, { "epoch": 2.080038885288399, "grad_norm": 0.07438129186630249, "learning_rate": 0.00010666546649264144, "loss": 0.2633, "step": 25676 }, { "epoch": 2.0801198963058978, "grad_norm": 0.06333016604185104, "learning_rate": 0.00010666096584004681, "loss": 0.2701, "step": 25677 }, { "epoch": 2.080200907323396, "grad_norm": 0.061056092381477356, "learning_rate": 0.00010665646518745219, "loss": 0.2527, "step": 25678 }, { "epoch": 2.0802819183408943, "grad_norm": 0.0625620037317276, "learning_rate": 0.00010665196453485755, "loss": 0.2912, "step": 25679 }, { "epoch": 2.080362929358393, "grad_norm": 0.05852917581796646, "learning_rate": 0.00010664746388226295, "loss": 0.2371, "step": 25680 }, { "epoch": 2.080443940375891, "grad_norm": 0.05300210788846016, "learning_rate": 0.00010664296322966831, "loss": 0.238, "step": 25681 }, { "epoch": 2.0805249513933894, "grad_norm": 0.051059871912002563, "learning_rate": 0.00010663846257707368, "loss": 0.2545, "step": 25682 }, { "epoch": 2.0806059624108877, "grad_norm": 0.05365686118602753, "learning_rate": 0.00010663396192447906, "loss": 0.2292, "step": 25683 }, { "epoch": 2.0806869734283864, "grad_norm": 0.056775450706481934, "learning_rate": 0.00010662946127188443, "loss": 0.2782, "step": 25684 }, { "epoch": 2.0807679844458846, "grad_norm": 0.06060144305229187, "learning_rate": 0.00010662496061928979, "loss": 0.2625, "step": 25685 }, { "epoch": 2.080848995463383, "grad_norm": 0.06818179786205292, "learning_rate": 0.00010662045996669519, "loss": 0.2896, "step": 25686 }, { "epoch": 2.0809300064808816, "grad_norm": 0.05880607292056084, "learning_rate": 0.00010661595931410055, "loss": 0.2755, "step": 25687 }, { "epoch": 2.08101101749838, "grad_norm": 0.06834668666124344, "learning_rate": 0.00010661145866150592, "loss": 0.2539, "step": 25688 }, { "epoch": 2.081092028515878, "grad_norm": 0.09048371016979218, "learning_rate": 0.0001066069580089113, "loss": 0.2587, "step": 25689 }, { "epoch": 2.0811730395333767, "grad_norm": 0.07518291473388672, "learning_rate": 0.00010660245735631667, "loss": 0.2522, "step": 25690 }, { "epoch": 2.081254050550875, "grad_norm": 0.0637117400765419, "learning_rate": 0.00010659795670372203, "loss": 0.2945, "step": 25691 }, { "epoch": 2.0813350615683732, "grad_norm": 0.06945264339447021, "learning_rate": 0.00010659345605112743, "loss": 0.2858, "step": 25692 }, { "epoch": 2.0814160725858715, "grad_norm": 0.06889203935861588, "learning_rate": 0.00010658895539853279, "loss": 0.2662, "step": 25693 }, { "epoch": 2.08149708360337, "grad_norm": 0.06387284398078918, "learning_rate": 0.00010658445474593817, "loss": 0.2433, "step": 25694 }, { "epoch": 2.0815780946208684, "grad_norm": 0.05560588836669922, "learning_rate": 0.00010657995409334354, "loss": 0.2042, "step": 25695 }, { "epoch": 2.0816591056383666, "grad_norm": 0.052960895001888275, "learning_rate": 0.00010657545344074891, "loss": 0.2754, "step": 25696 }, { "epoch": 2.0817401166558653, "grad_norm": 0.07859915494918823, "learning_rate": 0.00010657095278815427, "loss": 0.3211, "step": 25697 }, { "epoch": 2.0818211276733636, "grad_norm": 0.05939163640141487, "learning_rate": 0.00010656645213555967, "loss": 0.2674, "step": 25698 }, { "epoch": 2.081902138690862, "grad_norm": 0.06102471798658371, "learning_rate": 0.00010656195148296503, "loss": 0.2112, "step": 25699 }, { "epoch": 2.0819831497083605, "grad_norm": 0.07584518939256668, "learning_rate": 0.00010655745083037041, "loss": 0.2417, "step": 25700 }, { "epoch": 2.0820641607258588, "grad_norm": 0.06054631620645523, "learning_rate": 0.00010655295017777578, "loss": 0.2998, "step": 25701 }, { "epoch": 2.082145171743357, "grad_norm": 0.06277702748775482, "learning_rate": 0.00010654844952518115, "loss": 0.2694, "step": 25702 }, { "epoch": 2.0822261827608557, "grad_norm": 0.06722848862409592, "learning_rate": 0.00010654394887258654, "loss": 0.281, "step": 25703 }, { "epoch": 2.082307193778354, "grad_norm": 0.0671129897236824, "learning_rate": 0.00010653944821999192, "loss": 0.2553, "step": 25704 }, { "epoch": 2.082388204795852, "grad_norm": 0.06236971542239189, "learning_rate": 0.00010653494756739728, "loss": 0.2847, "step": 25705 }, { "epoch": 2.0824692158133504, "grad_norm": 0.06291084736585617, "learning_rate": 0.00010653044691480265, "loss": 0.215, "step": 25706 }, { "epoch": 2.082550226830849, "grad_norm": 0.061905037611722946, "learning_rate": 0.00010652594626220802, "loss": 0.2296, "step": 25707 }, { "epoch": 2.0826312378483474, "grad_norm": 0.0679488480091095, "learning_rate": 0.0001065214456096134, "loss": 0.24, "step": 25708 }, { "epoch": 2.0827122488658456, "grad_norm": 0.05547121539711952, "learning_rate": 0.00010651694495701878, "loss": 0.2332, "step": 25709 }, { "epoch": 2.0827932598833443, "grad_norm": 0.06207665428519249, "learning_rate": 0.00010651244430442416, "loss": 0.2399, "step": 25710 }, { "epoch": 2.0828742709008425, "grad_norm": 0.060923006385564804, "learning_rate": 0.00010650794365182952, "loss": 0.2793, "step": 25711 }, { "epoch": 2.082955281918341, "grad_norm": 0.05969534441828728, "learning_rate": 0.00010650344299923489, "loss": 0.2456, "step": 25712 }, { "epoch": 2.0830362929358395, "grad_norm": 0.060140207409858704, "learning_rate": 0.00010649894234664026, "loss": 0.2417, "step": 25713 }, { "epoch": 2.0831173039533377, "grad_norm": 0.06350401043891907, "learning_rate": 0.00010649444169404564, "loss": 0.2725, "step": 25714 }, { "epoch": 2.083198314970836, "grad_norm": 0.05730599910020828, "learning_rate": 0.00010648994104145102, "loss": 0.2471, "step": 25715 }, { "epoch": 2.083279325988334, "grad_norm": 0.06925246119499207, "learning_rate": 0.0001064854403888564, "loss": 0.2924, "step": 25716 }, { "epoch": 2.083360337005833, "grad_norm": 0.05671600624918938, "learning_rate": 0.00010648093973626176, "loss": 0.2313, "step": 25717 }, { "epoch": 2.083441348023331, "grad_norm": 0.05397858843207359, "learning_rate": 0.00010647643908366713, "loss": 0.2426, "step": 25718 }, { "epoch": 2.0835223590408294, "grad_norm": 0.06132481247186661, "learning_rate": 0.0001064719384310725, "loss": 0.2614, "step": 25719 }, { "epoch": 2.083603370058328, "grad_norm": 0.07008907943964005, "learning_rate": 0.00010646743777847788, "loss": 0.2507, "step": 25720 }, { "epoch": 2.0836843810758263, "grad_norm": 0.05325290188193321, "learning_rate": 0.00010646293712588327, "loss": 0.2513, "step": 25721 }, { "epoch": 2.0837653920933246, "grad_norm": 0.06077274680137634, "learning_rate": 0.00010645843647328864, "loss": 0.2353, "step": 25722 }, { "epoch": 2.0838464031108233, "grad_norm": 0.05821505934000015, "learning_rate": 0.000106453935820694, "loss": 0.252, "step": 25723 }, { "epoch": 2.0839274141283215, "grad_norm": 0.05630730465054512, "learning_rate": 0.00010644943516809937, "loss": 0.2255, "step": 25724 }, { "epoch": 2.0840084251458197, "grad_norm": 0.0677599087357521, "learning_rate": 0.00010644493451550475, "loss": 0.3002, "step": 25725 }, { "epoch": 2.0840894361633184, "grad_norm": 0.06731487065553665, "learning_rate": 0.00010644043386291012, "loss": 0.2607, "step": 25726 }, { "epoch": 2.0841704471808167, "grad_norm": 0.07227706909179688, "learning_rate": 0.00010643593321031551, "loss": 0.2606, "step": 25727 }, { "epoch": 2.084251458198315, "grad_norm": 0.07242441922426224, "learning_rate": 0.00010643143255772088, "loss": 0.2694, "step": 25728 }, { "epoch": 2.084332469215813, "grad_norm": 0.06362912803888321, "learning_rate": 0.00010642693190512624, "loss": 0.2793, "step": 25729 }, { "epoch": 2.084413480233312, "grad_norm": 0.0482899434864521, "learning_rate": 0.00010642243125253162, "loss": 0.2201, "step": 25730 }, { "epoch": 2.08449449125081, "grad_norm": 0.059406179934740067, "learning_rate": 0.00010641793059993699, "loss": 0.2579, "step": 25731 }, { "epoch": 2.0845755022683083, "grad_norm": 0.059811271727085114, "learning_rate": 0.00010641342994734238, "loss": 0.2488, "step": 25732 }, { "epoch": 2.084656513285807, "grad_norm": 0.06633289903402328, "learning_rate": 0.00010640892929474775, "loss": 0.2932, "step": 25733 }, { "epoch": 2.0847375243033053, "grad_norm": 0.05347701534628868, "learning_rate": 0.00010640442864215312, "loss": 0.2601, "step": 25734 }, { "epoch": 2.0848185353208035, "grad_norm": 0.060605239123106, "learning_rate": 0.00010639992798955848, "loss": 0.2603, "step": 25735 }, { "epoch": 2.084899546338302, "grad_norm": 0.05953764170408249, "learning_rate": 0.00010639542733696386, "loss": 0.2675, "step": 25736 }, { "epoch": 2.0849805573558005, "grad_norm": 0.052090004086494446, "learning_rate": 0.00010639092668436923, "loss": 0.2545, "step": 25737 }, { "epoch": 2.0850615683732987, "grad_norm": 0.054392725229263306, "learning_rate": 0.00010638642603177462, "loss": 0.2421, "step": 25738 }, { "epoch": 2.085142579390797, "grad_norm": 0.05666566640138626, "learning_rate": 0.00010638192537917999, "loss": 0.211, "step": 25739 }, { "epoch": 2.0852235904082956, "grad_norm": 0.057736024260520935, "learning_rate": 0.00010637742472658536, "loss": 0.2907, "step": 25740 }, { "epoch": 2.085304601425794, "grad_norm": 0.055861927568912506, "learning_rate": 0.00010637292407399074, "loss": 0.2927, "step": 25741 }, { "epoch": 2.085385612443292, "grad_norm": 0.060406699776649475, "learning_rate": 0.0001063684234213961, "loss": 0.2586, "step": 25742 }, { "epoch": 2.085466623460791, "grad_norm": 0.0583062581717968, "learning_rate": 0.00010636392276880147, "loss": 0.2507, "step": 25743 }, { "epoch": 2.085547634478289, "grad_norm": 0.06983792036771774, "learning_rate": 0.00010635942211620686, "loss": 0.2803, "step": 25744 }, { "epoch": 2.0856286454957873, "grad_norm": 0.06031275913119316, "learning_rate": 0.00010635492146361223, "loss": 0.274, "step": 25745 }, { "epoch": 2.085709656513286, "grad_norm": 0.06896448880434036, "learning_rate": 0.0001063504208110176, "loss": 0.2534, "step": 25746 }, { "epoch": 2.0857906675307842, "grad_norm": 0.08314532041549683, "learning_rate": 0.00010634592015842298, "loss": 0.2933, "step": 25747 }, { "epoch": 2.0858716785482825, "grad_norm": 0.06117968261241913, "learning_rate": 0.00010634141950582834, "loss": 0.244, "step": 25748 }, { "epoch": 2.085952689565781, "grad_norm": 0.05076151341199875, "learning_rate": 0.00010633691885323371, "loss": 0.2418, "step": 25749 }, { "epoch": 2.0860337005832794, "grad_norm": 0.05884158983826637, "learning_rate": 0.0001063324182006391, "loss": 0.2552, "step": 25750 }, { "epoch": 2.0861147116007777, "grad_norm": 0.06510408967733383, "learning_rate": 0.00010632791754804447, "loss": 0.2432, "step": 25751 }, { "epoch": 2.086195722618276, "grad_norm": 0.05114440992474556, "learning_rate": 0.00010632341689544985, "loss": 0.263, "step": 25752 }, { "epoch": 2.0862767336357746, "grad_norm": 0.05631138011813164, "learning_rate": 0.00010631891624285522, "loss": 0.2466, "step": 25753 }, { "epoch": 2.086357744653273, "grad_norm": 0.06776197999715805, "learning_rate": 0.00010631441559026058, "loss": 0.287, "step": 25754 }, { "epoch": 2.086438755670771, "grad_norm": 0.05723719298839569, "learning_rate": 0.00010630991493766598, "loss": 0.2534, "step": 25755 }, { "epoch": 2.0865197666882698, "grad_norm": 0.07237262278795242, "learning_rate": 0.00010630541428507134, "loss": 0.2933, "step": 25756 }, { "epoch": 2.086600777705768, "grad_norm": 0.061718061566352844, "learning_rate": 0.00010630091363247672, "loss": 0.2711, "step": 25757 }, { "epoch": 2.0866817887232663, "grad_norm": 0.06035630404949188, "learning_rate": 0.00010629641297988209, "loss": 0.235, "step": 25758 }, { "epoch": 2.086762799740765, "grad_norm": 0.06308078020811081, "learning_rate": 0.00010629191232728746, "loss": 0.2505, "step": 25759 }, { "epoch": 2.086843810758263, "grad_norm": 0.05552982538938522, "learning_rate": 0.00010628741167469282, "loss": 0.2482, "step": 25760 }, { "epoch": 2.0869248217757614, "grad_norm": 0.06342519819736481, "learning_rate": 0.00010628291102209822, "loss": 0.2535, "step": 25761 }, { "epoch": 2.0870058327932597, "grad_norm": 0.04495114088058472, "learning_rate": 0.00010627841036950358, "loss": 0.2632, "step": 25762 }, { "epoch": 2.0870868438107584, "grad_norm": 0.06390406936407089, "learning_rate": 0.00010627390971690896, "loss": 0.2626, "step": 25763 }, { "epoch": 2.0871678548282566, "grad_norm": 0.06474427878856659, "learning_rate": 0.00010626940906431433, "loss": 0.2821, "step": 25764 }, { "epoch": 2.087248865845755, "grad_norm": 0.05594582483172417, "learning_rate": 0.0001062649084117197, "loss": 0.285, "step": 25765 }, { "epoch": 2.0873298768632536, "grad_norm": 0.061284903436899185, "learning_rate": 0.00010626040775912507, "loss": 0.2327, "step": 25766 }, { "epoch": 2.087410887880752, "grad_norm": 0.058083221316337585, "learning_rate": 0.00010625590710653047, "loss": 0.2555, "step": 25767 }, { "epoch": 2.08749189889825, "grad_norm": 0.0643569752573967, "learning_rate": 0.00010625140645393583, "loss": 0.2676, "step": 25768 }, { "epoch": 2.0875729099157487, "grad_norm": 0.05668030306696892, "learning_rate": 0.0001062469058013412, "loss": 0.263, "step": 25769 }, { "epoch": 2.087653920933247, "grad_norm": 0.05691717192530632, "learning_rate": 0.00010624240514874657, "loss": 0.285, "step": 25770 }, { "epoch": 2.087734931950745, "grad_norm": 0.05709190294146538, "learning_rate": 0.00010623790449615195, "loss": 0.2534, "step": 25771 }, { "epoch": 2.087815942968244, "grad_norm": 0.05560784786939621, "learning_rate": 0.0001062334038435573, "loss": 0.2386, "step": 25772 }, { "epoch": 2.087896953985742, "grad_norm": 0.062379688024520874, "learning_rate": 0.00010622890319096271, "loss": 0.2553, "step": 25773 }, { "epoch": 2.0879779650032404, "grad_norm": 0.060460880398750305, "learning_rate": 0.00010622440253836807, "loss": 0.239, "step": 25774 }, { "epoch": 2.0880589760207386, "grad_norm": 0.06349531561136246, "learning_rate": 0.00010621990188577344, "loss": 0.2695, "step": 25775 }, { "epoch": 2.0881399870382373, "grad_norm": 0.07084312289953232, "learning_rate": 0.00010621540123317881, "loss": 0.2661, "step": 25776 }, { "epoch": 2.0882209980557356, "grad_norm": 0.06351789832115173, "learning_rate": 0.00010621090058058419, "loss": 0.2794, "step": 25777 }, { "epoch": 2.088302009073234, "grad_norm": 0.06865677982568741, "learning_rate": 0.00010620639992798955, "loss": 0.2506, "step": 25778 }, { "epoch": 2.0883830200907325, "grad_norm": 0.05225326120853424, "learning_rate": 0.00010620189927539495, "loss": 0.2465, "step": 25779 }, { "epoch": 2.0884640311082308, "grad_norm": 0.06418361514806747, "learning_rate": 0.00010619739862280031, "loss": 0.2706, "step": 25780 }, { "epoch": 2.088545042125729, "grad_norm": 0.057067323476076126, "learning_rate": 0.00010619289797020568, "loss": 0.2642, "step": 25781 }, { "epoch": 2.0886260531432277, "grad_norm": 0.06191162019968033, "learning_rate": 0.00010618839731761106, "loss": 0.2264, "step": 25782 }, { "epoch": 2.088707064160726, "grad_norm": 0.10706423968076706, "learning_rate": 0.00010618389666501643, "loss": 0.259, "step": 25783 }, { "epoch": 2.088788075178224, "grad_norm": 0.07185043394565582, "learning_rate": 0.00010617939601242182, "loss": 0.249, "step": 25784 }, { "epoch": 2.0888690861957224, "grad_norm": 0.056017689406871796, "learning_rate": 0.00010617489535982719, "loss": 0.282, "step": 25785 }, { "epoch": 2.088950097213221, "grad_norm": 0.0657619833946228, "learning_rate": 0.00010617039470723255, "loss": 0.2481, "step": 25786 }, { "epoch": 2.0890311082307194, "grad_norm": 0.05671881511807442, "learning_rate": 0.00010616589405463792, "loss": 0.2431, "step": 25787 }, { "epoch": 2.0891121192482176, "grad_norm": 0.061993759125471115, "learning_rate": 0.0001061613934020433, "loss": 0.2715, "step": 25788 }, { "epoch": 2.0891931302657163, "grad_norm": 0.05531548708677292, "learning_rate": 0.00010615689274944867, "loss": 0.212, "step": 25789 }, { "epoch": 2.0892741412832145, "grad_norm": 0.05547591298818588, "learning_rate": 0.00010615239209685406, "loss": 0.2111, "step": 25790 }, { "epoch": 2.089355152300713, "grad_norm": 0.07032620906829834, "learning_rate": 0.00010614789144425943, "loss": 0.2928, "step": 25791 }, { "epoch": 2.0894361633182115, "grad_norm": 0.06776494532823563, "learning_rate": 0.00010614339079166479, "loss": 0.227, "step": 25792 }, { "epoch": 2.0895171743357097, "grad_norm": 0.06672436743974686, "learning_rate": 0.00010613889013907017, "loss": 0.3022, "step": 25793 }, { "epoch": 2.089598185353208, "grad_norm": 0.07442519813776016, "learning_rate": 0.00010613438948647554, "loss": 0.2315, "step": 25794 }, { "epoch": 2.089679196370706, "grad_norm": 0.07650196552276611, "learning_rate": 0.00010612988883388091, "loss": 0.2704, "step": 25795 }, { "epoch": 2.089760207388205, "grad_norm": 0.07087470591068268, "learning_rate": 0.0001061253881812863, "loss": 0.2499, "step": 25796 }, { "epoch": 2.089841218405703, "grad_norm": 0.06081029400229454, "learning_rate": 0.00010612088752869167, "loss": 0.2102, "step": 25797 }, { "epoch": 2.0899222294232014, "grad_norm": 0.0629265084862709, "learning_rate": 0.00010611638687609703, "loss": 0.2666, "step": 25798 }, { "epoch": 2.0900032404407, "grad_norm": 0.06669928878545761, "learning_rate": 0.00010611188622350241, "loss": 0.2466, "step": 25799 }, { "epoch": 2.0900842514581983, "grad_norm": 0.053711552172899246, "learning_rate": 0.00010610738557090778, "loss": 0.2246, "step": 25800 }, { "epoch": 2.0901652624756966, "grad_norm": 0.05141836032271385, "learning_rate": 0.00010610288491831315, "loss": 0.2227, "step": 25801 }, { "epoch": 2.0902462734931953, "grad_norm": 0.05613867565989494, "learning_rate": 0.00010609838426571854, "loss": 0.223, "step": 25802 }, { "epoch": 2.0903272845106935, "grad_norm": 0.0661654844880104, "learning_rate": 0.00010609388361312392, "loss": 0.2581, "step": 25803 }, { "epoch": 2.0904082955281917, "grad_norm": 0.04869081825017929, "learning_rate": 0.00010608938296052928, "loss": 0.2325, "step": 25804 }, { "epoch": 2.0904893065456904, "grad_norm": 0.07372672110795975, "learning_rate": 0.00010608488230793465, "loss": 0.3026, "step": 25805 }, { "epoch": 2.0905703175631887, "grad_norm": 0.05311240255832672, "learning_rate": 0.00010608038165534002, "loss": 0.2785, "step": 25806 }, { "epoch": 2.090651328580687, "grad_norm": 0.06160484999418259, "learning_rate": 0.00010607588100274541, "loss": 0.2661, "step": 25807 }, { "epoch": 2.090732339598185, "grad_norm": 0.053490664809942245, "learning_rate": 0.00010607138035015078, "loss": 0.2464, "step": 25808 }, { "epoch": 2.090813350615684, "grad_norm": 0.06441934406757355, "learning_rate": 0.00010606687969755616, "loss": 0.2287, "step": 25809 }, { "epoch": 2.090894361633182, "grad_norm": 0.06719013303518295, "learning_rate": 0.00010606237904496153, "loss": 0.2507, "step": 25810 }, { "epoch": 2.0909753726506803, "grad_norm": 0.06515239924192429, "learning_rate": 0.00010605787839236689, "loss": 0.2787, "step": 25811 }, { "epoch": 2.091056383668179, "grad_norm": 0.058618079870939255, "learning_rate": 0.00010605337773977226, "loss": 0.2328, "step": 25812 }, { "epoch": 2.0911373946856773, "grad_norm": 0.06700004637241364, "learning_rate": 0.00010604887708717765, "loss": 0.2504, "step": 25813 }, { "epoch": 2.0912184057031755, "grad_norm": 0.06595467776060104, "learning_rate": 0.00010604437643458303, "loss": 0.2316, "step": 25814 }, { "epoch": 2.091299416720674, "grad_norm": 0.07155590504407883, "learning_rate": 0.0001060398757819884, "loss": 0.2837, "step": 25815 }, { "epoch": 2.0913804277381725, "grad_norm": 0.06447424739599228, "learning_rate": 0.00010603537512939377, "loss": 0.2741, "step": 25816 }, { "epoch": 2.0914614387556707, "grad_norm": 0.06656340509653091, "learning_rate": 0.00010603087447679913, "loss": 0.2896, "step": 25817 }, { "epoch": 2.091542449773169, "grad_norm": 0.06265214085578918, "learning_rate": 0.0001060263738242045, "loss": 0.2384, "step": 25818 }, { "epoch": 2.0916234607906676, "grad_norm": 0.06865495443344116, "learning_rate": 0.00010602187317160989, "loss": 0.3143, "step": 25819 }, { "epoch": 2.091704471808166, "grad_norm": 0.06335429847240448, "learning_rate": 0.00010601737251901527, "loss": 0.2424, "step": 25820 }, { "epoch": 2.091785482825664, "grad_norm": 0.06055642291903496, "learning_rate": 0.00010601287186642064, "loss": 0.2754, "step": 25821 }, { "epoch": 2.091866493843163, "grad_norm": 0.06168762221932411, "learning_rate": 0.00010600837121382601, "loss": 0.2886, "step": 25822 }, { "epoch": 2.091947504860661, "grad_norm": 0.0600765235722065, "learning_rate": 0.00010600387056123137, "loss": 0.2488, "step": 25823 }, { "epoch": 2.0920285158781593, "grad_norm": 0.057513538748025894, "learning_rate": 0.00010599936990863675, "loss": 0.2352, "step": 25824 }, { "epoch": 2.092109526895658, "grad_norm": 0.061202578246593475, "learning_rate": 0.00010599486925604213, "loss": 0.2575, "step": 25825 }, { "epoch": 2.0921905379131562, "grad_norm": 0.06461769342422485, "learning_rate": 0.00010599036860344751, "loss": 0.2584, "step": 25826 }, { "epoch": 2.0922715489306545, "grad_norm": 0.08148052543401718, "learning_rate": 0.00010598586795085288, "loss": 0.2752, "step": 25827 }, { "epoch": 2.0923525599481527, "grad_norm": 0.05636552721261978, "learning_rate": 0.00010598136729825826, "loss": 0.2453, "step": 25828 }, { "epoch": 2.0924335709656514, "grad_norm": 0.055334556847810745, "learning_rate": 0.00010597686664566362, "loss": 0.2603, "step": 25829 }, { "epoch": 2.0925145819831497, "grad_norm": 0.05396450310945511, "learning_rate": 0.00010597236599306899, "loss": 0.2434, "step": 25830 }, { "epoch": 2.092595593000648, "grad_norm": 0.06471575051546097, "learning_rate": 0.00010596786534047438, "loss": 0.2984, "step": 25831 }, { "epoch": 2.0926766040181466, "grad_norm": 0.05728616937994957, "learning_rate": 0.00010596336468787975, "loss": 0.272, "step": 25832 }, { "epoch": 2.092757615035645, "grad_norm": 0.0556185282766819, "learning_rate": 0.00010595886403528512, "loss": 0.2365, "step": 25833 }, { "epoch": 2.092838626053143, "grad_norm": 0.04983407258987427, "learning_rate": 0.0001059543633826905, "loss": 0.2521, "step": 25834 }, { "epoch": 2.0929196370706418, "grad_norm": 0.07000332325696945, "learning_rate": 0.00010594986273009586, "loss": 0.2536, "step": 25835 }, { "epoch": 2.09300064808814, "grad_norm": 0.05987836793065071, "learning_rate": 0.00010594536207750126, "loss": 0.2305, "step": 25836 }, { "epoch": 2.0930816591056383, "grad_norm": 0.0611238107085228, "learning_rate": 0.00010594086142490662, "loss": 0.2421, "step": 25837 }, { "epoch": 2.093162670123137, "grad_norm": 0.0788748636841774, "learning_rate": 0.00010593636077231199, "loss": 0.2708, "step": 25838 }, { "epoch": 2.093243681140635, "grad_norm": 0.05313669890165329, "learning_rate": 0.00010593186011971737, "loss": 0.246, "step": 25839 }, { "epoch": 2.0933246921581334, "grad_norm": 0.06658217310905457, "learning_rate": 0.00010592735946712274, "loss": 0.27, "step": 25840 }, { "epoch": 2.0934057031756317, "grad_norm": 0.06660610437393188, "learning_rate": 0.0001059228588145281, "loss": 0.3048, "step": 25841 }, { "epoch": 2.0934867141931304, "grad_norm": 0.06018728017807007, "learning_rate": 0.0001059183581619335, "loss": 0.2395, "step": 25842 }, { "epoch": 2.0935677252106286, "grad_norm": 0.056028787046670914, "learning_rate": 0.00010591385750933886, "loss": 0.263, "step": 25843 }, { "epoch": 2.093648736228127, "grad_norm": 0.06321057677268982, "learning_rate": 0.00010590935685674423, "loss": 0.2256, "step": 25844 }, { "epoch": 2.0937297472456255, "grad_norm": 0.08071082830429077, "learning_rate": 0.00010590485620414961, "loss": 0.3121, "step": 25845 }, { "epoch": 2.093810758263124, "grad_norm": 0.06411097198724747, "learning_rate": 0.00010590035555155498, "loss": 0.2786, "step": 25846 }, { "epoch": 2.093891769280622, "grad_norm": 0.059210870414972305, "learning_rate": 0.00010589585489896034, "loss": 0.2746, "step": 25847 }, { "epoch": 2.0939727802981207, "grad_norm": 0.07945404201745987, "learning_rate": 0.00010589135424636574, "loss": 0.2947, "step": 25848 }, { "epoch": 2.094053791315619, "grad_norm": 0.05297846719622612, "learning_rate": 0.0001058868535937711, "loss": 0.2591, "step": 25849 }, { "epoch": 2.094134802333117, "grad_norm": 0.07699709385633469, "learning_rate": 0.00010588235294117647, "loss": 0.2897, "step": 25850 }, { "epoch": 2.0942158133506155, "grad_norm": 0.07000952959060669, "learning_rate": 0.00010587785228858185, "loss": 0.2837, "step": 25851 }, { "epoch": 2.094296824368114, "grad_norm": 0.05717483162879944, "learning_rate": 0.00010587335163598722, "loss": 0.2496, "step": 25852 }, { "epoch": 2.0943778353856124, "grad_norm": 0.058601513504981995, "learning_rate": 0.00010586885098339258, "loss": 0.26, "step": 25853 }, { "epoch": 2.0944588464031106, "grad_norm": 0.07447142153978348, "learning_rate": 0.00010586435033079798, "loss": 0.2429, "step": 25854 }, { "epoch": 2.0945398574206093, "grad_norm": 0.05477110669016838, "learning_rate": 0.00010585984967820334, "loss": 0.2115, "step": 25855 }, { "epoch": 2.0946208684381076, "grad_norm": 0.061628323048353195, "learning_rate": 0.00010585534902560872, "loss": 0.3228, "step": 25856 }, { "epoch": 2.094701879455606, "grad_norm": 0.05916190892457962, "learning_rate": 0.00010585084837301409, "loss": 0.2674, "step": 25857 }, { "epoch": 2.0947828904731045, "grad_norm": 0.058797094970941544, "learning_rate": 0.00010584634772041946, "loss": 0.2531, "step": 25858 }, { "epoch": 2.0948639014906028, "grad_norm": 0.050141118466854095, "learning_rate": 0.00010584184706782482, "loss": 0.2569, "step": 25859 }, { "epoch": 2.094944912508101, "grad_norm": 0.07009284943342209, "learning_rate": 0.00010583734641523022, "loss": 0.2686, "step": 25860 }, { "epoch": 2.0950259235255997, "grad_norm": 0.06598873436450958, "learning_rate": 0.00010583284576263558, "loss": 0.2529, "step": 25861 }, { "epoch": 2.095106934543098, "grad_norm": 0.05313203111290932, "learning_rate": 0.00010582834511004096, "loss": 0.2521, "step": 25862 }, { "epoch": 2.095187945560596, "grad_norm": 0.053829628974199295, "learning_rate": 0.00010582384445744633, "loss": 0.2272, "step": 25863 }, { "epoch": 2.0952689565780944, "grad_norm": 0.0683264285326004, "learning_rate": 0.0001058193438048517, "loss": 0.2467, "step": 25864 }, { "epoch": 2.095349967595593, "grad_norm": 0.07350464165210724, "learning_rate": 0.00010581484315225709, "loss": 0.3027, "step": 25865 }, { "epoch": 2.0954309786130914, "grad_norm": 0.07851356267929077, "learning_rate": 0.00010581034249966247, "loss": 0.3086, "step": 25866 }, { "epoch": 2.0955119896305896, "grad_norm": 0.06328796595335007, "learning_rate": 0.00010580584184706783, "loss": 0.2429, "step": 25867 }, { "epoch": 2.0955930006480883, "grad_norm": 0.06569570302963257, "learning_rate": 0.0001058013411944732, "loss": 0.2965, "step": 25868 }, { "epoch": 2.0956740116655865, "grad_norm": 0.05470426380634308, "learning_rate": 0.00010579684054187857, "loss": 0.2537, "step": 25869 }, { "epoch": 2.095755022683085, "grad_norm": 0.06650625169277191, "learning_rate": 0.00010579233988928395, "loss": 0.2746, "step": 25870 }, { "epoch": 2.0958360337005835, "grad_norm": 0.05982498824596405, "learning_rate": 0.00010578783923668933, "loss": 0.2607, "step": 25871 }, { "epoch": 2.0959170447180817, "grad_norm": 0.05677129328250885, "learning_rate": 0.00010578333858409471, "loss": 0.2816, "step": 25872 }, { "epoch": 2.09599805573558, "grad_norm": 0.060105811804533005, "learning_rate": 0.00010577883793150007, "loss": 0.2755, "step": 25873 }, { "epoch": 2.096079066753078, "grad_norm": 0.06248025223612785, "learning_rate": 0.00010577433727890544, "loss": 0.2758, "step": 25874 }, { "epoch": 2.096160077770577, "grad_norm": 0.0530896931886673, "learning_rate": 0.00010576983662631081, "loss": 0.2461, "step": 25875 }, { "epoch": 2.096241088788075, "grad_norm": 0.05905615910887718, "learning_rate": 0.00010576533597371619, "loss": 0.2714, "step": 25876 }, { "epoch": 2.0963220998055734, "grad_norm": 0.06350836902856827, "learning_rate": 0.00010576083532112158, "loss": 0.2623, "step": 25877 }, { "epoch": 2.096403110823072, "grad_norm": 0.07517584413290024, "learning_rate": 0.00010575633466852695, "loss": 0.2645, "step": 25878 }, { "epoch": 2.0964841218405703, "grad_norm": 0.0731901228427887, "learning_rate": 0.00010575183401593232, "loss": 0.2511, "step": 25879 }, { "epoch": 2.0965651328580686, "grad_norm": 0.06542081385850906, "learning_rate": 0.00010574733336333768, "loss": 0.2262, "step": 25880 }, { "epoch": 2.0966461438755672, "grad_norm": 0.0583370216190815, "learning_rate": 0.00010574283271074306, "loss": 0.2004, "step": 25881 }, { "epoch": 2.0967271548930655, "grad_norm": 0.0566578283905983, "learning_rate": 0.00010573833205814843, "loss": 0.2464, "step": 25882 }, { "epoch": 2.0968081659105637, "grad_norm": 0.06367211788892746, "learning_rate": 0.00010573383140555382, "loss": 0.2435, "step": 25883 }, { "epoch": 2.0968891769280624, "grad_norm": 0.07052043080329895, "learning_rate": 0.00010572933075295919, "loss": 0.2647, "step": 25884 }, { "epoch": 2.0969701879455607, "grad_norm": 0.06257165223360062, "learning_rate": 0.00010572483010036456, "loss": 0.2382, "step": 25885 }, { "epoch": 2.097051198963059, "grad_norm": 0.07620043307542801, "learning_rate": 0.00010572032944776992, "loss": 0.2354, "step": 25886 }, { "epoch": 2.097132209980557, "grad_norm": 0.06310175359249115, "learning_rate": 0.0001057158287951753, "loss": 0.2355, "step": 25887 }, { "epoch": 2.097213220998056, "grad_norm": 0.06670668721199036, "learning_rate": 0.00010571132814258069, "loss": 0.2478, "step": 25888 }, { "epoch": 2.097294232015554, "grad_norm": 0.061407558619976044, "learning_rate": 0.00010570682748998606, "loss": 0.2442, "step": 25889 }, { "epoch": 2.0973752430330523, "grad_norm": 0.07137828320264816, "learning_rate": 0.00010570232683739143, "loss": 0.2681, "step": 25890 }, { "epoch": 2.097456254050551, "grad_norm": 0.06494369357824326, "learning_rate": 0.0001056978261847968, "loss": 0.2408, "step": 25891 }, { "epoch": 2.0975372650680493, "grad_norm": 0.06404042989015579, "learning_rate": 0.00010569332553220217, "loss": 0.2647, "step": 25892 }, { "epoch": 2.0976182760855475, "grad_norm": 0.0751018077135086, "learning_rate": 0.00010568882487960754, "loss": 0.2569, "step": 25893 }, { "epoch": 2.097699287103046, "grad_norm": 0.05569750815629959, "learning_rate": 0.00010568432422701293, "loss": 0.2303, "step": 25894 }, { "epoch": 2.0977802981205445, "grad_norm": 0.06106451153755188, "learning_rate": 0.0001056798235744183, "loss": 0.2732, "step": 25895 }, { "epoch": 2.0978613091380427, "grad_norm": 0.06857486069202423, "learning_rate": 0.00010567532292182367, "loss": 0.2592, "step": 25896 }, { "epoch": 2.097942320155541, "grad_norm": 0.0606396459043026, "learning_rate": 0.00010567082226922905, "loss": 0.2216, "step": 25897 }, { "epoch": 2.0980233311730396, "grad_norm": 0.05696889013051987, "learning_rate": 0.00010566632161663441, "loss": 0.2482, "step": 25898 }, { "epoch": 2.098104342190538, "grad_norm": 0.05583791434764862, "learning_rate": 0.00010566182096403978, "loss": 0.2342, "step": 25899 }, { "epoch": 2.098185353208036, "grad_norm": 0.061783235520124435, "learning_rate": 0.00010565732031144517, "loss": 0.2278, "step": 25900 }, { "epoch": 2.098266364225535, "grad_norm": 0.06409650295972824, "learning_rate": 0.00010565281965885054, "loss": 0.2559, "step": 25901 }, { "epoch": 2.098347375243033, "grad_norm": 0.05956130847334862, "learning_rate": 0.00010564831900625592, "loss": 0.2202, "step": 25902 }, { "epoch": 2.0984283862605313, "grad_norm": 0.04956075921654701, "learning_rate": 0.00010564381835366129, "loss": 0.2025, "step": 25903 }, { "epoch": 2.09850939727803, "grad_norm": 0.06383761018514633, "learning_rate": 0.00010563931770106665, "loss": 0.2671, "step": 25904 }, { "epoch": 2.0985904082955282, "grad_norm": 0.07448650151491165, "learning_rate": 0.00010563481704847202, "loss": 0.2972, "step": 25905 }, { "epoch": 2.0986714193130265, "grad_norm": 0.06987703591585159, "learning_rate": 0.00010563031639587741, "loss": 0.2953, "step": 25906 }, { "epoch": 2.098752430330525, "grad_norm": 0.06196024641394615, "learning_rate": 0.00010562581574328278, "loss": 0.2541, "step": 25907 }, { "epoch": 2.0988334413480234, "grad_norm": 0.06016739085316658, "learning_rate": 0.00010562131509068816, "loss": 0.2748, "step": 25908 }, { "epoch": 2.0989144523655217, "grad_norm": 0.05822562053799629, "learning_rate": 0.00010561681443809353, "loss": 0.2236, "step": 25909 }, { "epoch": 2.09899546338302, "grad_norm": 0.05755983665585518, "learning_rate": 0.00010561231378549889, "loss": 0.2493, "step": 25910 }, { "epoch": 2.0990764744005186, "grad_norm": 0.056454241275787354, "learning_rate": 0.00010560781313290426, "loss": 0.2218, "step": 25911 }, { "epoch": 2.099157485418017, "grad_norm": 0.07454843074083328, "learning_rate": 0.00010560331248030965, "loss": 0.2426, "step": 25912 }, { "epoch": 2.099238496435515, "grad_norm": 0.06113918125629425, "learning_rate": 0.00010559881182771503, "loss": 0.2593, "step": 25913 }, { "epoch": 2.0993195074530138, "grad_norm": 0.06242956966161728, "learning_rate": 0.0001055943111751204, "loss": 0.2403, "step": 25914 }, { "epoch": 2.099400518470512, "grad_norm": 0.06737563759088516, "learning_rate": 0.00010558981052252577, "loss": 0.2492, "step": 25915 }, { "epoch": 2.0994815294880103, "grad_norm": 0.05659928917884827, "learning_rate": 0.00010558530986993113, "loss": 0.2394, "step": 25916 }, { "epoch": 2.099562540505509, "grad_norm": 0.06783798336982727, "learning_rate": 0.00010558080921733653, "loss": 0.2747, "step": 25917 }, { "epoch": 2.099643551523007, "grad_norm": 0.06404232233762741, "learning_rate": 0.0001055763085647419, "loss": 0.2721, "step": 25918 }, { "epoch": 2.0997245625405054, "grad_norm": 0.06794023513793945, "learning_rate": 0.00010557180791214727, "loss": 0.2454, "step": 25919 }, { "epoch": 2.0998055735580037, "grad_norm": 0.06422477215528488, "learning_rate": 0.00010556730725955264, "loss": 0.292, "step": 25920 }, { "epoch": 2.0998865845755024, "grad_norm": 0.05315267667174339, "learning_rate": 0.00010556280660695801, "loss": 0.2484, "step": 25921 }, { "epoch": 2.0999675955930006, "grad_norm": 0.053334176540374756, "learning_rate": 0.00010555830595436337, "loss": 0.3048, "step": 25922 }, { "epoch": 2.100048606610499, "grad_norm": 0.06307677179574966, "learning_rate": 0.00010555380530176878, "loss": 0.2724, "step": 25923 }, { "epoch": 2.1001296176279975, "grad_norm": 0.06282954663038254, "learning_rate": 0.00010554930464917414, "loss": 0.2642, "step": 25924 }, { "epoch": 2.100210628645496, "grad_norm": 0.0542440190911293, "learning_rate": 0.00010554480399657951, "loss": 0.2537, "step": 25925 }, { "epoch": 2.100291639662994, "grad_norm": 0.0643736720085144, "learning_rate": 0.00010554030334398488, "loss": 0.2486, "step": 25926 }, { "epoch": 2.1003726506804927, "grad_norm": 0.06453826278448105, "learning_rate": 0.00010553580269139026, "loss": 0.2951, "step": 25927 }, { "epoch": 2.100453661697991, "grad_norm": 0.06805769354104996, "learning_rate": 0.00010553130203879562, "loss": 0.2796, "step": 25928 }, { "epoch": 2.100534672715489, "grad_norm": 0.05347653478384018, "learning_rate": 0.00010552680138620102, "loss": 0.2501, "step": 25929 }, { "epoch": 2.100615683732988, "grad_norm": 0.06046787276864052, "learning_rate": 0.00010552230073360638, "loss": 0.2579, "step": 25930 }, { "epoch": 2.100696694750486, "grad_norm": 0.057562969624996185, "learning_rate": 0.00010551780008101175, "loss": 0.2422, "step": 25931 }, { "epoch": 2.1007777057679844, "grad_norm": 0.0631248950958252, "learning_rate": 0.00010551329942841712, "loss": 0.274, "step": 25932 }, { "epoch": 2.1008587167854826, "grad_norm": 0.06080873683094978, "learning_rate": 0.0001055087987758225, "loss": 0.2253, "step": 25933 }, { "epoch": 2.1009397278029813, "grad_norm": 0.06965567171573639, "learning_rate": 0.00010550429812322786, "loss": 0.2816, "step": 25934 }, { "epoch": 2.1010207388204796, "grad_norm": 0.07264664769172668, "learning_rate": 0.00010549979747063326, "loss": 0.26, "step": 25935 }, { "epoch": 2.101101749837978, "grad_norm": 0.0596703365445137, "learning_rate": 0.00010549529681803862, "loss": 0.2154, "step": 25936 }, { "epoch": 2.1011827608554765, "grad_norm": 0.06607351452112198, "learning_rate": 0.00010549079616544399, "loss": 0.2913, "step": 25937 }, { "epoch": 2.1012637718729748, "grad_norm": 0.054247647523880005, "learning_rate": 0.00010548629551284937, "loss": 0.2655, "step": 25938 }, { "epoch": 2.101344782890473, "grad_norm": 0.07472404092550278, "learning_rate": 0.00010548179486025474, "loss": 0.2869, "step": 25939 }, { "epoch": 2.1014257939079717, "grad_norm": 0.05823759734630585, "learning_rate": 0.00010547729420766013, "loss": 0.227, "step": 25940 }, { "epoch": 2.10150680492547, "grad_norm": 0.06206611543893814, "learning_rate": 0.0001054727935550655, "loss": 0.2557, "step": 25941 }, { "epoch": 2.101587815942968, "grad_norm": 0.06391151249408722, "learning_rate": 0.00010546829290247086, "loss": 0.2689, "step": 25942 }, { "epoch": 2.1016688269604664, "grad_norm": 0.04861528426408768, "learning_rate": 0.00010546379224987623, "loss": 0.2413, "step": 25943 }, { "epoch": 2.101749837977965, "grad_norm": 0.06416679918766022, "learning_rate": 0.00010545929159728161, "loss": 0.2517, "step": 25944 }, { "epoch": 2.1018308489954634, "grad_norm": 0.05032241344451904, "learning_rate": 0.00010545479094468698, "loss": 0.2621, "step": 25945 }, { "epoch": 2.1019118600129616, "grad_norm": 0.06672210991382599, "learning_rate": 0.00010545029029209237, "loss": 0.2304, "step": 25946 }, { "epoch": 2.1019928710304603, "grad_norm": 0.05387113615870476, "learning_rate": 0.00010544578963949774, "loss": 0.2592, "step": 25947 }, { "epoch": 2.1020738820479585, "grad_norm": 0.05849631503224373, "learning_rate": 0.00010544128898690312, "loss": 0.2468, "step": 25948 }, { "epoch": 2.1021548930654568, "grad_norm": 0.07469534128904343, "learning_rate": 0.00010543678833430848, "loss": 0.2787, "step": 25949 }, { "epoch": 2.1022359040829555, "grad_norm": 0.06344032287597656, "learning_rate": 0.00010543228768171385, "loss": 0.2459, "step": 25950 }, { "epoch": 2.1023169151004537, "grad_norm": 0.06518208235502243, "learning_rate": 0.00010542778702911922, "loss": 0.2265, "step": 25951 }, { "epoch": 2.102397926117952, "grad_norm": 0.054074712097644806, "learning_rate": 0.00010542328637652461, "loss": 0.2395, "step": 25952 }, { "epoch": 2.1024789371354506, "grad_norm": 0.0657474473118782, "learning_rate": 0.00010541878572392998, "loss": 0.3097, "step": 25953 }, { "epoch": 2.102559948152949, "grad_norm": 0.06637993454933167, "learning_rate": 0.00010541428507133536, "loss": 0.286, "step": 25954 }, { "epoch": 2.102640959170447, "grad_norm": 0.06281334161758423, "learning_rate": 0.00010540978441874072, "loss": 0.2775, "step": 25955 }, { "epoch": 2.1027219701879454, "grad_norm": 0.060356464236974716, "learning_rate": 0.00010540528376614609, "loss": 0.2912, "step": 25956 }, { "epoch": 2.102802981205444, "grad_norm": 0.07013566046953201, "learning_rate": 0.00010540078311355146, "loss": 0.3275, "step": 25957 }, { "epoch": 2.1028839922229423, "grad_norm": 0.05440182611346245, "learning_rate": 0.00010539628246095685, "loss": 0.2983, "step": 25958 }, { "epoch": 2.1029650032404406, "grad_norm": 0.05539843067526817, "learning_rate": 0.00010539178180836222, "loss": 0.2443, "step": 25959 }, { "epoch": 2.1030460142579392, "grad_norm": 0.05578560009598732, "learning_rate": 0.0001053872811557676, "loss": 0.2418, "step": 25960 }, { "epoch": 2.1031270252754375, "grad_norm": 0.0695006400346756, "learning_rate": 0.00010538278050317296, "loss": 0.2742, "step": 25961 }, { "epoch": 2.1032080362929357, "grad_norm": 0.0572974868118763, "learning_rate": 0.00010537827985057833, "loss": 0.2737, "step": 25962 }, { "epoch": 2.1032890473104344, "grad_norm": 0.07014113664627075, "learning_rate": 0.0001053737791979837, "loss": 0.2304, "step": 25963 }, { "epoch": 2.1033700583279327, "grad_norm": 0.07061361521482468, "learning_rate": 0.00010536927854538909, "loss": 0.2436, "step": 25964 }, { "epoch": 2.103451069345431, "grad_norm": 0.079985611140728, "learning_rate": 0.00010536477789279447, "loss": 0.3183, "step": 25965 }, { "epoch": 2.103532080362929, "grad_norm": 0.07024233043193817, "learning_rate": 0.00010536027724019984, "loss": 0.257, "step": 25966 }, { "epoch": 2.103613091380428, "grad_norm": 0.055939681828022, "learning_rate": 0.0001053557765876052, "loss": 0.2686, "step": 25967 }, { "epoch": 2.103694102397926, "grad_norm": 0.06374896317720413, "learning_rate": 0.00010535127593501057, "loss": 0.2437, "step": 25968 }, { "epoch": 2.1037751134154243, "grad_norm": 0.060883134603500366, "learning_rate": 0.00010534677528241596, "loss": 0.2739, "step": 25969 }, { "epoch": 2.103856124432923, "grad_norm": 0.06968491524457932, "learning_rate": 0.00010534227462982133, "loss": 0.243, "step": 25970 }, { "epoch": 2.1039371354504213, "grad_norm": 0.06380768120288849, "learning_rate": 0.00010533777397722671, "loss": 0.2462, "step": 25971 }, { "epoch": 2.1040181464679195, "grad_norm": 0.0704575926065445, "learning_rate": 0.00010533327332463208, "loss": 0.259, "step": 25972 }, { "epoch": 2.104099157485418, "grad_norm": 0.05365743860602379, "learning_rate": 0.00010532877267203744, "loss": 0.2534, "step": 25973 }, { "epoch": 2.1041801685029164, "grad_norm": 0.06499110907316208, "learning_rate": 0.00010532427201944282, "loss": 0.2802, "step": 25974 }, { "epoch": 2.1042611795204147, "grad_norm": 0.06151876598596573, "learning_rate": 0.0001053197713668482, "loss": 0.2415, "step": 25975 }, { "epoch": 2.1043421905379134, "grad_norm": 0.06648625433444977, "learning_rate": 0.00010531527071425358, "loss": 0.2563, "step": 25976 }, { "epoch": 2.1044232015554116, "grad_norm": 0.057899028062820435, "learning_rate": 0.00010531077006165895, "loss": 0.2355, "step": 25977 }, { "epoch": 2.10450421257291, "grad_norm": 0.07268321514129639, "learning_rate": 0.00010530626940906432, "loss": 0.2558, "step": 25978 }, { "epoch": 2.104585223590408, "grad_norm": 0.05207452178001404, "learning_rate": 0.00010530176875646968, "loss": 0.2317, "step": 25979 }, { "epoch": 2.104666234607907, "grad_norm": 0.052503883838653564, "learning_rate": 0.00010529726810387506, "loss": 0.2387, "step": 25980 }, { "epoch": 2.104747245625405, "grad_norm": 0.05151834338903427, "learning_rate": 0.00010529276745128044, "loss": 0.2316, "step": 25981 }, { "epoch": 2.1048282566429033, "grad_norm": 0.07107538729906082, "learning_rate": 0.00010528826679868582, "loss": 0.3077, "step": 25982 }, { "epoch": 2.104909267660402, "grad_norm": 0.05292705073952675, "learning_rate": 0.00010528376614609119, "loss": 0.2599, "step": 25983 }, { "epoch": 2.1049902786779002, "grad_norm": 0.07342039048671722, "learning_rate": 0.00010527926549349656, "loss": 0.2586, "step": 25984 }, { "epoch": 2.1050712896953985, "grad_norm": 0.05398553982377052, "learning_rate": 0.00010527476484090192, "loss": 0.2687, "step": 25985 }, { "epoch": 2.105152300712897, "grad_norm": 0.0721733421087265, "learning_rate": 0.0001052702641883073, "loss": 0.2903, "step": 25986 }, { "epoch": 2.1052333117303954, "grad_norm": 0.07198905199766159, "learning_rate": 0.00010526576353571269, "loss": 0.2692, "step": 25987 }, { "epoch": 2.1053143227478937, "grad_norm": 0.06857843697071075, "learning_rate": 0.00010526126288311806, "loss": 0.2703, "step": 25988 }, { "epoch": 2.105395333765392, "grad_norm": 0.0674414411187172, "learning_rate": 0.00010525676223052343, "loss": 0.2731, "step": 25989 }, { "epoch": 2.1054763447828906, "grad_norm": 0.05459675192832947, "learning_rate": 0.0001052522615779288, "loss": 0.2306, "step": 25990 }, { "epoch": 2.105557355800389, "grad_norm": 0.05658693239092827, "learning_rate": 0.00010524776092533417, "loss": 0.2499, "step": 25991 }, { "epoch": 2.105638366817887, "grad_norm": 0.05841512978076935, "learning_rate": 0.00010524326027273957, "loss": 0.2647, "step": 25992 }, { "epoch": 2.1057193778353858, "grad_norm": 0.06220696493983269, "learning_rate": 0.00010523875962014493, "loss": 0.2774, "step": 25993 }, { "epoch": 2.105800388852884, "grad_norm": 0.06404992938041687, "learning_rate": 0.0001052342589675503, "loss": 0.227, "step": 25994 }, { "epoch": 2.1058813998703823, "grad_norm": 0.08344966918230057, "learning_rate": 0.00010522975831495567, "loss": 0.3065, "step": 25995 }, { "epoch": 2.105962410887881, "grad_norm": 0.04662399739027023, "learning_rate": 0.00010522525766236105, "loss": 0.2443, "step": 25996 }, { "epoch": 2.106043421905379, "grad_norm": 0.07650208473205566, "learning_rate": 0.00010522075700976641, "loss": 0.2978, "step": 25997 }, { "epoch": 2.1061244329228774, "grad_norm": 0.06667305529117584, "learning_rate": 0.00010521625635717181, "loss": 0.2549, "step": 25998 }, { "epoch": 2.1062054439403757, "grad_norm": 0.05364314094185829, "learning_rate": 0.00010521175570457717, "loss": 0.2474, "step": 25999 }, { "epoch": 2.1062864549578744, "grad_norm": 0.07898110151290894, "learning_rate": 0.00010520725505198254, "loss": 0.2559, "step": 26000 }, { "epoch": 2.1063674659753726, "grad_norm": 0.0522674061357975, "learning_rate": 0.00010520275439938792, "loss": 0.2427, "step": 26001 }, { "epoch": 2.106448476992871, "grad_norm": 0.06294900923967361, "learning_rate": 0.00010519825374679329, "loss": 0.2572, "step": 26002 }, { "epoch": 2.1065294880103695, "grad_norm": 0.06288789212703705, "learning_rate": 0.00010519375309419865, "loss": 0.2602, "step": 26003 }, { "epoch": 2.106610499027868, "grad_norm": 0.05611900985240936, "learning_rate": 0.00010518925244160405, "loss": 0.2704, "step": 26004 }, { "epoch": 2.106691510045366, "grad_norm": 0.06647303700447083, "learning_rate": 0.00010518475178900941, "loss": 0.2875, "step": 26005 }, { "epoch": 2.1067725210628647, "grad_norm": 0.05431298166513443, "learning_rate": 0.00010518025113641478, "loss": 0.2415, "step": 26006 }, { "epoch": 2.106853532080363, "grad_norm": 0.07187354564666748, "learning_rate": 0.00010517575048382016, "loss": 0.3086, "step": 26007 }, { "epoch": 2.106934543097861, "grad_norm": 0.06193733587861061, "learning_rate": 0.00010517124983122553, "loss": 0.2618, "step": 26008 }, { "epoch": 2.10701555411536, "grad_norm": 0.07977601140737534, "learning_rate": 0.00010516674917863089, "loss": 0.2584, "step": 26009 }, { "epoch": 2.107096565132858, "grad_norm": 0.07653692364692688, "learning_rate": 0.00010516224852603629, "loss": 0.2764, "step": 26010 }, { "epoch": 2.1071775761503564, "grad_norm": 0.05683164671063423, "learning_rate": 0.00010515774787344165, "loss": 0.2385, "step": 26011 }, { "epoch": 2.1072585871678546, "grad_norm": 0.05809834226965904, "learning_rate": 0.00010515324722084703, "loss": 0.2453, "step": 26012 }, { "epoch": 2.1073395981853533, "grad_norm": 0.06329745799303055, "learning_rate": 0.0001051487465682524, "loss": 0.2541, "step": 26013 }, { "epoch": 2.1074206092028516, "grad_norm": 0.06667368859052658, "learning_rate": 0.00010514424591565777, "loss": 0.2378, "step": 26014 }, { "epoch": 2.10750162022035, "grad_norm": 0.07000331580638885, "learning_rate": 0.00010513974526306313, "loss": 0.3256, "step": 26015 }, { "epoch": 2.1075826312378485, "grad_norm": 0.056671325117349625, "learning_rate": 0.00010513524461046853, "loss": 0.2131, "step": 26016 }, { "epoch": 2.1076636422553467, "grad_norm": 0.05919632315635681, "learning_rate": 0.00010513074395787391, "loss": 0.2216, "step": 26017 }, { "epoch": 2.107744653272845, "grad_norm": 0.06502486020326614, "learning_rate": 0.00010512624330527927, "loss": 0.2587, "step": 26018 }, { "epoch": 2.1078256642903437, "grad_norm": 0.06771452724933624, "learning_rate": 0.00010512174265268464, "loss": 0.2489, "step": 26019 }, { "epoch": 2.107906675307842, "grad_norm": 0.07498066127300262, "learning_rate": 0.00010511724200009001, "loss": 0.2853, "step": 26020 }, { "epoch": 2.10798768632534, "grad_norm": 0.05339035019278526, "learning_rate": 0.0001051127413474954, "loss": 0.2318, "step": 26021 }, { "epoch": 2.1080686973428384, "grad_norm": 0.06364471465349197, "learning_rate": 0.00010510824069490078, "loss": 0.3141, "step": 26022 }, { "epoch": 2.108149708360337, "grad_norm": 0.06906301528215408, "learning_rate": 0.00010510374004230615, "loss": 0.2675, "step": 26023 }, { "epoch": 2.1082307193778353, "grad_norm": 0.05944638326764107, "learning_rate": 0.00010509923938971151, "loss": 0.2374, "step": 26024 }, { "epoch": 2.1083117303953336, "grad_norm": 0.06184754893183708, "learning_rate": 0.00010509473873711688, "loss": 0.2586, "step": 26025 }, { "epoch": 2.1083927414128323, "grad_norm": 0.06255876272916794, "learning_rate": 0.00010509023808452226, "loss": 0.2445, "step": 26026 }, { "epoch": 2.1084737524303305, "grad_norm": 0.06641737371683121, "learning_rate": 0.00010508573743192764, "loss": 0.2396, "step": 26027 }, { "epoch": 2.1085547634478288, "grad_norm": 0.05015082284808159, "learning_rate": 0.00010508123677933302, "loss": 0.2394, "step": 26028 }, { "epoch": 2.1086357744653275, "grad_norm": 0.09021834284067154, "learning_rate": 0.00010507673612673839, "loss": 0.2919, "step": 26029 }, { "epoch": 2.1087167854828257, "grad_norm": 0.06203924119472504, "learning_rate": 0.00010507223547414375, "loss": 0.2747, "step": 26030 }, { "epoch": 2.108797796500324, "grad_norm": 0.048285387456417084, "learning_rate": 0.00010506773482154912, "loss": 0.2308, "step": 26031 }, { "epoch": 2.108878807517822, "grad_norm": 0.05346948280930519, "learning_rate": 0.0001050632341689545, "loss": 0.2085, "step": 26032 }, { "epoch": 2.108959818535321, "grad_norm": 0.054928600788116455, "learning_rate": 0.00010505873351635988, "loss": 0.2359, "step": 26033 }, { "epoch": 2.109040829552819, "grad_norm": 0.05738217011094093, "learning_rate": 0.00010505423286376526, "loss": 0.205, "step": 26034 }, { "epoch": 2.1091218405703174, "grad_norm": 0.06716038286685944, "learning_rate": 0.00010504973221117063, "loss": 0.2708, "step": 26035 }, { "epoch": 2.109202851587816, "grad_norm": 0.0686250627040863, "learning_rate": 0.00010504523155857599, "loss": 0.258, "step": 26036 }, { "epoch": 2.1092838626053143, "grad_norm": 0.058074306696653366, "learning_rate": 0.00010504073090598137, "loss": 0.2406, "step": 26037 }, { "epoch": 2.1093648736228126, "grad_norm": 0.06594202667474747, "learning_rate": 0.00010503623025338674, "loss": 0.2912, "step": 26038 }, { "epoch": 2.1094458846403112, "grad_norm": 0.0583692230284214, "learning_rate": 0.00010503172960079213, "loss": 0.2266, "step": 26039 }, { "epoch": 2.1095268956578095, "grad_norm": 0.05876157432794571, "learning_rate": 0.0001050272289481975, "loss": 0.2621, "step": 26040 }, { "epoch": 2.1096079066753077, "grad_norm": 0.057510919868946075, "learning_rate": 0.00010502272829560287, "loss": 0.2414, "step": 26041 }, { "epoch": 2.1096889176928064, "grad_norm": 0.06362812966108322, "learning_rate": 0.00010501822764300823, "loss": 0.2567, "step": 26042 }, { "epoch": 2.1097699287103047, "grad_norm": 0.06669417023658752, "learning_rate": 0.00010501372699041361, "loss": 0.2412, "step": 26043 }, { "epoch": 2.109850939727803, "grad_norm": 0.0634775161743164, "learning_rate": 0.00010500922633781898, "loss": 0.2767, "step": 26044 }, { "epoch": 2.109931950745301, "grad_norm": 0.07396277785301208, "learning_rate": 0.00010500472568522437, "loss": 0.2799, "step": 26045 }, { "epoch": 2.1100129617628, "grad_norm": 0.056821681559085846, "learning_rate": 0.00010500022503262974, "loss": 0.2229, "step": 26046 }, { "epoch": 2.110093972780298, "grad_norm": 0.05848122015595436, "learning_rate": 0.00010499572438003512, "loss": 0.2295, "step": 26047 }, { "epoch": 2.1101749837977963, "grad_norm": 0.08277922123670578, "learning_rate": 0.00010499122372744048, "loss": 0.2667, "step": 26048 }, { "epoch": 2.110255994815295, "grad_norm": 0.06810778379440308, "learning_rate": 0.00010498672307484585, "loss": 0.274, "step": 26049 }, { "epoch": 2.1103370058327933, "grad_norm": 0.07550406455993652, "learning_rate": 0.00010498222242225124, "loss": 0.2594, "step": 26050 }, { "epoch": 2.1104180168502915, "grad_norm": 0.06279616802930832, "learning_rate": 0.00010497772176965661, "loss": 0.28, "step": 26051 }, { "epoch": 2.11049902786779, "grad_norm": 0.060095228254795074, "learning_rate": 0.00010497322111706198, "loss": 0.238, "step": 26052 }, { "epoch": 2.1105800388852884, "grad_norm": 0.061981070786714554, "learning_rate": 0.00010496872046446736, "loss": 0.2696, "step": 26053 }, { "epoch": 2.1106610499027867, "grad_norm": 0.06327757984399796, "learning_rate": 0.00010496421981187272, "loss": 0.2771, "step": 26054 }, { "epoch": 2.110742060920285, "grad_norm": 0.07830698788166046, "learning_rate": 0.00010495971915927809, "loss": 0.292, "step": 26055 }, { "epoch": 2.1108230719377836, "grad_norm": 0.06010693684220314, "learning_rate": 0.00010495521850668348, "loss": 0.2472, "step": 26056 }, { "epoch": 2.110904082955282, "grad_norm": 0.0585097037255764, "learning_rate": 0.00010495071785408885, "loss": 0.2756, "step": 26057 }, { "epoch": 2.11098509397278, "grad_norm": 0.059424206614494324, "learning_rate": 0.00010494621720149423, "loss": 0.2788, "step": 26058 }, { "epoch": 2.111066104990279, "grad_norm": 0.05133242905139923, "learning_rate": 0.0001049417165488996, "loss": 0.2625, "step": 26059 }, { "epoch": 2.111147116007777, "grad_norm": 0.055822599679231644, "learning_rate": 0.00010493721589630496, "loss": 0.2834, "step": 26060 }, { "epoch": 2.1112281270252753, "grad_norm": 0.053742486983537674, "learning_rate": 0.00010493271524371033, "loss": 0.2375, "step": 26061 }, { "epoch": 2.111309138042774, "grad_norm": 0.06400348991155624, "learning_rate": 0.00010492821459111572, "loss": 0.2367, "step": 26062 }, { "epoch": 2.1113901490602722, "grad_norm": 0.06669709831476212, "learning_rate": 0.00010492371393852109, "loss": 0.2442, "step": 26063 }, { "epoch": 2.1114711600777705, "grad_norm": 0.06966737657785416, "learning_rate": 0.00010491921328592647, "loss": 0.2724, "step": 26064 }, { "epoch": 2.111552171095269, "grad_norm": 0.06068720296025276, "learning_rate": 0.00010491471263333184, "loss": 0.2577, "step": 26065 }, { "epoch": 2.1116331821127674, "grad_norm": 0.05518393963575363, "learning_rate": 0.0001049102119807372, "loss": 0.2351, "step": 26066 }, { "epoch": 2.1117141931302656, "grad_norm": 0.0546581968665123, "learning_rate": 0.00010490571132814257, "loss": 0.2113, "step": 26067 }, { "epoch": 2.111795204147764, "grad_norm": 0.07191743701696396, "learning_rate": 0.00010490121067554796, "loss": 0.2787, "step": 26068 }, { "epoch": 2.1118762151652626, "grad_norm": 0.0725640207529068, "learning_rate": 0.00010489671002295333, "loss": 0.2762, "step": 26069 }, { "epoch": 2.111957226182761, "grad_norm": 0.06637110561132431, "learning_rate": 0.00010489220937035871, "loss": 0.2152, "step": 26070 }, { "epoch": 2.112038237200259, "grad_norm": 0.06367357075214386, "learning_rate": 0.00010488770871776408, "loss": 0.2789, "step": 26071 }, { "epoch": 2.1121192482177578, "grad_norm": 0.052471041679382324, "learning_rate": 0.00010488320806516944, "loss": 0.2458, "step": 26072 }, { "epoch": 2.112200259235256, "grad_norm": 0.06254670768976212, "learning_rate": 0.00010487870741257484, "loss": 0.2575, "step": 26073 }, { "epoch": 2.1122812702527543, "grad_norm": 0.06931617856025696, "learning_rate": 0.0001048742067599802, "loss": 0.2263, "step": 26074 }, { "epoch": 2.112362281270253, "grad_norm": 0.06655744463205338, "learning_rate": 0.00010486970610738558, "loss": 0.2686, "step": 26075 }, { "epoch": 2.112443292287751, "grad_norm": 0.054537370800971985, "learning_rate": 0.00010486520545479095, "loss": 0.2822, "step": 26076 }, { "epoch": 2.1125243033052494, "grad_norm": 0.06522393971681595, "learning_rate": 0.00010486070480219632, "loss": 0.2968, "step": 26077 }, { "epoch": 2.1126053143227477, "grad_norm": 0.06392154097557068, "learning_rate": 0.00010485620414960168, "loss": 0.2307, "step": 26078 }, { "epoch": 2.1126863253402464, "grad_norm": 0.057777177542448044, "learning_rate": 0.00010485170349700708, "loss": 0.2331, "step": 26079 }, { "epoch": 2.1127673363577446, "grad_norm": 0.059251341968774796, "learning_rate": 0.00010484720284441244, "loss": 0.2868, "step": 26080 }, { "epoch": 2.112848347375243, "grad_norm": 0.063652902841568, "learning_rate": 0.00010484270219181782, "loss": 0.2458, "step": 26081 }, { "epoch": 2.1129293583927415, "grad_norm": 0.05617694556713104, "learning_rate": 0.00010483820153922319, "loss": 0.2302, "step": 26082 }, { "epoch": 2.11301036941024, "grad_norm": 0.05732857063412666, "learning_rate": 0.00010483370088662857, "loss": 0.2406, "step": 26083 }, { "epoch": 2.113091380427738, "grad_norm": 0.05721890181303024, "learning_rate": 0.00010482920023403393, "loss": 0.2408, "step": 26084 }, { "epoch": 2.1131723914452367, "grad_norm": 0.053211111575365067, "learning_rate": 0.00010482469958143933, "loss": 0.2469, "step": 26085 }, { "epoch": 2.113253402462735, "grad_norm": 0.06854691356420517, "learning_rate": 0.0001048201989288447, "loss": 0.2964, "step": 26086 }, { "epoch": 2.113334413480233, "grad_norm": 0.059306904673576355, "learning_rate": 0.00010481569827625006, "loss": 0.2599, "step": 26087 }, { "epoch": 2.113415424497732, "grad_norm": 0.057720642536878586, "learning_rate": 0.00010481119762365543, "loss": 0.2641, "step": 26088 }, { "epoch": 2.11349643551523, "grad_norm": 0.06803397089242935, "learning_rate": 0.0001048066969710608, "loss": 0.2846, "step": 26089 }, { "epoch": 2.1135774465327284, "grad_norm": 0.054278306663036346, "learning_rate": 0.00010480219631846617, "loss": 0.2366, "step": 26090 }, { "epoch": 2.1136584575502266, "grad_norm": 0.07058935612440109, "learning_rate": 0.00010479769566587157, "loss": 0.2422, "step": 26091 }, { "epoch": 2.1137394685677253, "grad_norm": 0.049645014107227325, "learning_rate": 0.00010479319501327694, "loss": 0.2387, "step": 26092 }, { "epoch": 2.1138204795852236, "grad_norm": 0.06945136189460754, "learning_rate": 0.0001047886943606823, "loss": 0.2778, "step": 26093 }, { "epoch": 2.113901490602722, "grad_norm": 0.058754272758960724, "learning_rate": 0.00010478419370808767, "loss": 0.2448, "step": 26094 }, { "epoch": 2.1139825016202205, "grad_norm": 0.06239371746778488, "learning_rate": 0.00010477969305549305, "loss": 0.2497, "step": 26095 }, { "epoch": 2.1140635126377187, "grad_norm": 0.07894030213356018, "learning_rate": 0.00010477519240289841, "loss": 0.2891, "step": 26096 }, { "epoch": 2.114144523655217, "grad_norm": 0.0756504163146019, "learning_rate": 0.00010477069175030381, "loss": 0.2929, "step": 26097 }, { "epoch": 2.1142255346727157, "grad_norm": 0.054733000695705414, "learning_rate": 0.00010476619109770918, "loss": 0.2162, "step": 26098 }, { "epoch": 2.114306545690214, "grad_norm": 0.05940738692879677, "learning_rate": 0.00010476169044511454, "loss": 0.2872, "step": 26099 }, { "epoch": 2.114387556707712, "grad_norm": 0.05741294100880623, "learning_rate": 0.00010475718979251992, "loss": 0.2484, "step": 26100 }, { "epoch": 2.1144685677252104, "grad_norm": 0.06838394701480865, "learning_rate": 0.00010475268913992529, "loss": 0.2606, "step": 26101 }, { "epoch": 2.114549578742709, "grad_norm": 0.07081933319568634, "learning_rate": 0.00010474818848733068, "loss": 0.2782, "step": 26102 }, { "epoch": 2.1146305897602073, "grad_norm": 0.058358822017908096, "learning_rate": 0.00010474368783473605, "loss": 0.2384, "step": 26103 }, { "epoch": 2.1147116007777056, "grad_norm": 0.05638371407985687, "learning_rate": 0.00010473918718214142, "loss": 0.2525, "step": 26104 }, { "epoch": 2.1147926117952043, "grad_norm": 0.06408551335334778, "learning_rate": 0.00010473468652954678, "loss": 0.2752, "step": 26105 }, { "epoch": 2.1148736228127025, "grad_norm": 0.06037361919879913, "learning_rate": 0.00010473018587695216, "loss": 0.2626, "step": 26106 }, { "epoch": 2.1149546338302008, "grad_norm": 0.05822877958416939, "learning_rate": 0.00010472568522435753, "loss": 0.2408, "step": 26107 }, { "epoch": 2.1150356448476995, "grad_norm": 0.052096933126449585, "learning_rate": 0.00010472118457176292, "loss": 0.2619, "step": 26108 }, { "epoch": 2.1151166558651977, "grad_norm": 0.06133052706718445, "learning_rate": 0.00010471668391916829, "loss": 0.2439, "step": 26109 }, { "epoch": 2.115197666882696, "grad_norm": 0.06262310594320297, "learning_rate": 0.00010471218326657367, "loss": 0.2709, "step": 26110 }, { "epoch": 2.1152786779001946, "grad_norm": 0.05901063233613968, "learning_rate": 0.00010470768261397903, "loss": 0.2492, "step": 26111 }, { "epoch": 2.115359688917693, "grad_norm": 0.06124577298760414, "learning_rate": 0.0001047031819613844, "loss": 0.2355, "step": 26112 }, { "epoch": 2.115440699935191, "grad_norm": 0.057176556438207626, "learning_rate": 0.00010469868130878977, "loss": 0.2454, "step": 26113 }, { "epoch": 2.1155217109526894, "grad_norm": 0.0686374306678772, "learning_rate": 0.00010469418065619516, "loss": 0.2547, "step": 26114 }, { "epoch": 2.115602721970188, "grad_norm": 0.05528261885046959, "learning_rate": 0.00010468968000360053, "loss": 0.2413, "step": 26115 }, { "epoch": 2.1156837329876863, "grad_norm": 0.07165534049272537, "learning_rate": 0.00010468517935100591, "loss": 0.2451, "step": 26116 }, { "epoch": 2.1157647440051845, "grad_norm": 0.07650242745876312, "learning_rate": 0.00010468067869841127, "loss": 0.2724, "step": 26117 }, { "epoch": 2.1158457550226832, "grad_norm": 0.06085258349776268, "learning_rate": 0.00010467617804581664, "loss": 0.2926, "step": 26118 }, { "epoch": 2.1159267660401815, "grad_norm": 0.07131532579660416, "learning_rate": 0.00010467167739322201, "loss": 0.2394, "step": 26119 }, { "epoch": 2.1160077770576797, "grad_norm": 0.07328178733587265, "learning_rate": 0.0001046671767406274, "loss": 0.2732, "step": 26120 }, { "epoch": 2.1160887880751784, "grad_norm": 0.06203152611851692, "learning_rate": 0.00010466267608803278, "loss": 0.2863, "step": 26121 }, { "epoch": 2.1161697990926767, "grad_norm": 0.07757362723350525, "learning_rate": 0.00010465817543543815, "loss": 0.2655, "step": 26122 }, { "epoch": 2.116250810110175, "grad_norm": 0.07243662327528, "learning_rate": 0.00010465367478284351, "loss": 0.2313, "step": 26123 }, { "epoch": 2.116331821127673, "grad_norm": 0.07049787789583206, "learning_rate": 0.00010464917413024888, "loss": 0.2567, "step": 26124 }, { "epoch": 2.116412832145172, "grad_norm": 0.06684257090091705, "learning_rate": 0.00010464467347765427, "loss": 0.2406, "step": 26125 }, { "epoch": 2.11649384316267, "grad_norm": 0.052143748849630356, "learning_rate": 0.00010464017282505964, "loss": 0.23, "step": 26126 }, { "epoch": 2.1165748541801683, "grad_norm": 0.05962395295500755, "learning_rate": 0.00010463567217246502, "loss": 0.256, "step": 26127 }, { "epoch": 2.116655865197667, "grad_norm": 0.061811357736587524, "learning_rate": 0.00010463117151987039, "loss": 0.2421, "step": 26128 }, { "epoch": 2.1167368762151653, "grad_norm": 0.0682341679930687, "learning_rate": 0.00010462667086727575, "loss": 0.2453, "step": 26129 }, { "epoch": 2.1168178872326635, "grad_norm": 0.07151232659816742, "learning_rate": 0.00010462217021468112, "loss": 0.2643, "step": 26130 }, { "epoch": 2.116898898250162, "grad_norm": 0.053989555686712265, "learning_rate": 0.00010461766956208651, "loss": 0.2658, "step": 26131 }, { "epoch": 2.1169799092676604, "grad_norm": 0.06623726338148117, "learning_rate": 0.00010461316890949189, "loss": 0.2811, "step": 26132 }, { "epoch": 2.1170609202851587, "grad_norm": 0.05381413549184799, "learning_rate": 0.00010460866825689726, "loss": 0.2685, "step": 26133 }, { "epoch": 2.1171419313026574, "grad_norm": 0.056920427829027176, "learning_rate": 0.00010460416760430263, "loss": 0.2591, "step": 26134 }, { "epoch": 2.1172229423201556, "grad_norm": 0.06302447617053986, "learning_rate": 0.00010459966695170799, "loss": 0.2217, "step": 26135 }, { "epoch": 2.117303953337654, "grad_norm": 0.06110900640487671, "learning_rate": 0.00010459516629911337, "loss": 0.2377, "step": 26136 }, { "epoch": 2.117384964355152, "grad_norm": 0.059887710958719254, "learning_rate": 0.00010459066564651875, "loss": 0.2437, "step": 26137 }, { "epoch": 2.117465975372651, "grad_norm": 0.04854189604520798, "learning_rate": 0.00010458616499392413, "loss": 0.2316, "step": 26138 }, { "epoch": 2.117546986390149, "grad_norm": 0.06568967550992966, "learning_rate": 0.0001045816643413295, "loss": 0.2684, "step": 26139 }, { "epoch": 2.1176279974076473, "grad_norm": 0.04694654792547226, "learning_rate": 0.00010457716368873487, "loss": 0.2543, "step": 26140 }, { "epoch": 2.117709008425146, "grad_norm": 0.07281840592622757, "learning_rate": 0.00010457266303614023, "loss": 0.2672, "step": 26141 }, { "epoch": 2.1177900194426442, "grad_norm": 0.0669359415769577, "learning_rate": 0.00010456816238354561, "loss": 0.2436, "step": 26142 }, { "epoch": 2.1178710304601425, "grad_norm": 0.07672449201345444, "learning_rate": 0.000104563661730951, "loss": 0.2949, "step": 26143 }, { "epoch": 2.117952041477641, "grad_norm": 0.06504993140697479, "learning_rate": 0.00010455916107835637, "loss": 0.2751, "step": 26144 }, { "epoch": 2.1180330524951394, "grad_norm": 0.07261329889297485, "learning_rate": 0.00010455466042576174, "loss": 0.2494, "step": 26145 }, { "epoch": 2.1181140635126376, "grad_norm": 0.05860583484172821, "learning_rate": 0.00010455015977316712, "loss": 0.2514, "step": 26146 }, { "epoch": 2.118195074530136, "grad_norm": 0.09110474586486816, "learning_rate": 0.00010454565912057248, "loss": 0.293, "step": 26147 }, { "epoch": 2.1182760855476346, "grad_norm": 0.05644802376627922, "learning_rate": 0.00010454115846797785, "loss": 0.263, "step": 26148 }, { "epoch": 2.118357096565133, "grad_norm": 0.05598912760615349, "learning_rate": 0.00010453665781538324, "loss": 0.2473, "step": 26149 }, { "epoch": 2.118438107582631, "grad_norm": 0.05859972536563873, "learning_rate": 0.00010453215716278861, "loss": 0.232, "step": 26150 }, { "epoch": 2.1185191186001298, "grad_norm": 0.057381488382816315, "learning_rate": 0.00010452765651019398, "loss": 0.2124, "step": 26151 }, { "epoch": 2.118600129617628, "grad_norm": 0.05538659542798996, "learning_rate": 0.00010452315585759936, "loss": 0.2685, "step": 26152 }, { "epoch": 2.1186811406351262, "grad_norm": 0.0663476511836052, "learning_rate": 0.00010451865520500472, "loss": 0.2414, "step": 26153 }, { "epoch": 2.118762151652625, "grad_norm": 0.05742928013205528, "learning_rate": 0.00010451415455241012, "loss": 0.2559, "step": 26154 }, { "epoch": 2.118843162670123, "grad_norm": 0.08387189358472824, "learning_rate": 0.00010450965389981549, "loss": 0.2854, "step": 26155 }, { "epoch": 2.1189241736876214, "grad_norm": 0.059729333966970444, "learning_rate": 0.00010450515324722085, "loss": 0.2309, "step": 26156 }, { "epoch": 2.11900518470512, "grad_norm": 0.06769514083862305, "learning_rate": 0.00010450065259462623, "loss": 0.2841, "step": 26157 }, { "epoch": 2.1190861957226184, "grad_norm": 0.07828256487846375, "learning_rate": 0.0001044961519420316, "loss": 0.2738, "step": 26158 }, { "epoch": 2.1191672067401166, "grad_norm": 0.06256436556577682, "learning_rate": 0.00010449165128943696, "loss": 0.2503, "step": 26159 }, { "epoch": 2.119248217757615, "grad_norm": 0.07057522982358932, "learning_rate": 0.00010448715063684236, "loss": 0.2277, "step": 26160 }, { "epoch": 2.1193292287751135, "grad_norm": 0.062220748513936996, "learning_rate": 0.00010448264998424773, "loss": 0.268, "step": 26161 }, { "epoch": 2.119410239792612, "grad_norm": 0.055413730442523956, "learning_rate": 0.0001044781493316531, "loss": 0.2235, "step": 26162 }, { "epoch": 2.11949125081011, "grad_norm": 0.05254589021205902, "learning_rate": 0.00010447364867905847, "loss": 0.2702, "step": 26163 }, { "epoch": 2.1195722618276087, "grad_norm": 0.061898522078990936, "learning_rate": 0.00010446914802646384, "loss": 0.2119, "step": 26164 }, { "epoch": 2.119653272845107, "grad_norm": 0.0660005584359169, "learning_rate": 0.0001044646473738692, "loss": 0.27, "step": 26165 }, { "epoch": 2.119734283862605, "grad_norm": 0.058570023626089096, "learning_rate": 0.0001044601467212746, "loss": 0.2225, "step": 26166 }, { "epoch": 2.119815294880104, "grad_norm": 0.06641636788845062, "learning_rate": 0.00010445564606867997, "loss": 0.2676, "step": 26167 }, { "epoch": 2.119896305897602, "grad_norm": 0.06420730799436569, "learning_rate": 0.00010445114541608533, "loss": 0.2481, "step": 26168 }, { "epoch": 2.1199773169151004, "grad_norm": 0.06847498565912247, "learning_rate": 0.00010444664476349071, "loss": 0.2607, "step": 26169 }, { "epoch": 2.1200583279325986, "grad_norm": 0.055883947759866714, "learning_rate": 0.00010444214411089608, "loss": 0.2322, "step": 26170 }, { "epoch": 2.1201393389500973, "grad_norm": 0.06870889663696289, "learning_rate": 0.00010443764345830144, "loss": 0.2445, "step": 26171 }, { "epoch": 2.1202203499675956, "grad_norm": 0.06205878406763077, "learning_rate": 0.00010443314280570684, "loss": 0.2635, "step": 26172 }, { "epoch": 2.120301360985094, "grad_norm": 0.06938192248344421, "learning_rate": 0.00010442864215311222, "loss": 0.2607, "step": 26173 }, { "epoch": 2.1203823720025925, "grad_norm": 0.07056978344917297, "learning_rate": 0.00010442414150051758, "loss": 0.2683, "step": 26174 }, { "epoch": 2.1204633830200907, "grad_norm": 0.055816810578107834, "learning_rate": 0.00010441964084792295, "loss": 0.2517, "step": 26175 }, { "epoch": 2.120544394037589, "grad_norm": 0.06050346419215202, "learning_rate": 0.00010441514019532832, "loss": 0.2518, "step": 26176 }, { "epoch": 2.1206254050550877, "grad_norm": 0.06322181224822998, "learning_rate": 0.00010441063954273368, "loss": 0.288, "step": 26177 }, { "epoch": 2.120706416072586, "grad_norm": 0.06231006234884262, "learning_rate": 0.00010440613889013908, "loss": 0.2527, "step": 26178 }, { "epoch": 2.120787427090084, "grad_norm": 0.06672997772693634, "learning_rate": 0.00010440163823754446, "loss": 0.2725, "step": 26179 }, { "epoch": 2.120868438107583, "grad_norm": 0.05536544322967529, "learning_rate": 0.00010439713758494982, "loss": 0.2328, "step": 26180 }, { "epoch": 2.120949449125081, "grad_norm": 0.06709985435009003, "learning_rate": 0.00010439263693235519, "loss": 0.2527, "step": 26181 }, { "epoch": 2.1210304601425793, "grad_norm": 0.05647808685898781, "learning_rate": 0.00010438813627976057, "loss": 0.2834, "step": 26182 }, { "epoch": 2.1211114711600776, "grad_norm": 0.050501611083745956, "learning_rate": 0.00010438363562716595, "loss": 0.2427, "step": 26183 }, { "epoch": 2.1211924821775763, "grad_norm": 0.06943561136722565, "learning_rate": 0.00010437913497457133, "loss": 0.243, "step": 26184 }, { "epoch": 2.1212734931950745, "grad_norm": 0.0644908994436264, "learning_rate": 0.0001043746343219767, "loss": 0.2669, "step": 26185 }, { "epoch": 2.1213545042125728, "grad_norm": 0.07270579040050507, "learning_rate": 0.00010437013366938206, "loss": 0.2585, "step": 26186 }, { "epoch": 2.1214355152300715, "grad_norm": 0.07066863030195236, "learning_rate": 0.00010436563301678743, "loss": 0.2864, "step": 26187 }, { "epoch": 2.1215165262475697, "grad_norm": 0.06685689091682434, "learning_rate": 0.00010436113236419281, "loss": 0.2619, "step": 26188 }, { "epoch": 2.121597537265068, "grad_norm": 0.07248367369174957, "learning_rate": 0.0001043566317115982, "loss": 0.2502, "step": 26189 }, { "epoch": 2.1216785482825666, "grad_norm": 0.05484854802489281, "learning_rate": 0.00010435213105900357, "loss": 0.2461, "step": 26190 }, { "epoch": 2.121759559300065, "grad_norm": 0.05028130114078522, "learning_rate": 0.00010434763040640894, "loss": 0.2659, "step": 26191 }, { "epoch": 2.121840570317563, "grad_norm": 0.06200327351689339, "learning_rate": 0.0001043431297538143, "loss": 0.232, "step": 26192 }, { "epoch": 2.1219215813350614, "grad_norm": 0.048312537372112274, "learning_rate": 0.00010433862910121968, "loss": 0.242, "step": 26193 }, { "epoch": 2.12200259235256, "grad_norm": 0.058204833418130875, "learning_rate": 0.00010433412844862505, "loss": 0.241, "step": 26194 }, { "epoch": 2.1220836033700583, "grad_norm": 0.06451279670000076, "learning_rate": 0.00010432962779603044, "loss": 0.2668, "step": 26195 }, { "epoch": 2.1221646143875565, "grad_norm": 0.0593167208135128, "learning_rate": 0.00010432512714343581, "loss": 0.2549, "step": 26196 }, { "epoch": 2.1222456254050552, "grad_norm": 0.060698408633470535, "learning_rate": 0.00010432062649084118, "loss": 0.2381, "step": 26197 }, { "epoch": 2.1223266364225535, "grad_norm": 0.07551072537899017, "learning_rate": 0.00010431612583824654, "loss": 0.2797, "step": 26198 }, { "epoch": 2.1224076474400517, "grad_norm": 0.06287830322980881, "learning_rate": 0.00010431162518565192, "loss": 0.2611, "step": 26199 }, { "epoch": 2.1224886584575504, "grad_norm": 0.0578264556825161, "learning_rate": 0.00010430712453305729, "loss": 0.2523, "step": 26200 }, { "epoch": 2.1225696694750487, "grad_norm": 0.053504422307014465, "learning_rate": 0.00010430262388046268, "loss": 0.2402, "step": 26201 }, { "epoch": 2.122650680492547, "grad_norm": 0.06288152933120728, "learning_rate": 0.00010429812322786805, "loss": 0.2552, "step": 26202 }, { "epoch": 2.1227316915100456, "grad_norm": 0.07255303859710693, "learning_rate": 0.00010429362257527342, "loss": 0.2479, "step": 26203 }, { "epoch": 2.122812702527544, "grad_norm": 0.06279697269201279, "learning_rate": 0.00010428912192267878, "loss": 0.2606, "step": 26204 }, { "epoch": 2.122893713545042, "grad_norm": 0.07724709808826447, "learning_rate": 0.00010428462127008416, "loss": 0.3127, "step": 26205 }, { "epoch": 2.1229747245625403, "grad_norm": 0.06904693692922592, "learning_rate": 0.00010428012061748955, "loss": 0.2998, "step": 26206 }, { "epoch": 2.123055735580039, "grad_norm": 0.07110625505447388, "learning_rate": 0.00010427561996489492, "loss": 0.2679, "step": 26207 }, { "epoch": 2.1231367465975373, "grad_norm": 0.055152129381895065, "learning_rate": 0.00010427111931230029, "loss": 0.2452, "step": 26208 }, { "epoch": 2.1232177576150355, "grad_norm": 0.050709083676338196, "learning_rate": 0.00010426661865970567, "loss": 0.2758, "step": 26209 }, { "epoch": 2.123298768632534, "grad_norm": 0.057936035096645355, "learning_rate": 0.00010426211800711103, "loss": 0.2816, "step": 26210 }, { "epoch": 2.1233797796500324, "grad_norm": 0.05752767622470856, "learning_rate": 0.0001042576173545164, "loss": 0.2379, "step": 26211 }, { "epoch": 2.1234607906675307, "grad_norm": 0.05384643375873566, "learning_rate": 0.00010425311670192179, "loss": 0.2312, "step": 26212 }, { "epoch": 2.1235418016850294, "grad_norm": 0.07233273983001709, "learning_rate": 0.00010424861604932716, "loss": 0.2908, "step": 26213 }, { "epoch": 2.1236228127025276, "grad_norm": 0.06371590495109558, "learning_rate": 0.00010424411539673253, "loss": 0.2986, "step": 26214 }, { "epoch": 2.123703823720026, "grad_norm": 0.054069891571998596, "learning_rate": 0.00010423961474413791, "loss": 0.2706, "step": 26215 }, { "epoch": 2.123784834737524, "grad_norm": 0.06553568691015244, "learning_rate": 0.00010423511409154327, "loss": 0.213, "step": 26216 }, { "epoch": 2.123865845755023, "grad_norm": 0.05562140792608261, "learning_rate": 0.00010423061343894864, "loss": 0.2606, "step": 26217 }, { "epoch": 2.123946856772521, "grad_norm": 0.06978686153888702, "learning_rate": 0.00010422611278635403, "loss": 0.2908, "step": 26218 }, { "epoch": 2.1240278677900193, "grad_norm": 0.06078553944826126, "learning_rate": 0.0001042216121337594, "loss": 0.2235, "step": 26219 }, { "epoch": 2.124108878807518, "grad_norm": 0.05368654802441597, "learning_rate": 0.00010421711148116478, "loss": 0.218, "step": 26220 }, { "epoch": 2.124189889825016, "grad_norm": 0.07140675187110901, "learning_rate": 0.00010421261082857015, "loss": 0.2807, "step": 26221 }, { "epoch": 2.1242709008425145, "grad_norm": 0.06928906589746475, "learning_rate": 0.00010420811017597551, "loss": 0.2496, "step": 26222 }, { "epoch": 2.124351911860013, "grad_norm": 0.07112964987754822, "learning_rate": 0.00010420360952338088, "loss": 0.258, "step": 26223 }, { "epoch": 2.1244329228775114, "grad_norm": 0.06662682443857193, "learning_rate": 0.00010419910887078628, "loss": 0.2908, "step": 26224 }, { "epoch": 2.1245139338950096, "grad_norm": 0.05349923297762871, "learning_rate": 0.00010419460821819164, "loss": 0.2349, "step": 26225 }, { "epoch": 2.124594944912508, "grad_norm": 0.06059380993247032, "learning_rate": 0.00010419010756559702, "loss": 0.2223, "step": 26226 }, { "epoch": 2.1246759559300066, "grad_norm": 0.06186490133404732, "learning_rate": 0.00010418560691300239, "loss": 0.2835, "step": 26227 }, { "epoch": 2.124756966947505, "grad_norm": 0.059035640209913254, "learning_rate": 0.00010418110626040775, "loss": 0.2732, "step": 26228 }, { "epoch": 2.124837977965003, "grad_norm": 0.06877464801073074, "learning_rate": 0.00010417660560781312, "loss": 0.2323, "step": 26229 }, { "epoch": 2.1249189889825018, "grad_norm": 0.06301309168338776, "learning_rate": 0.00010417210495521853, "loss": 0.2623, "step": 26230 }, { "epoch": 2.125, "grad_norm": 0.07027006894350052, "learning_rate": 0.00010416760430262389, "loss": 0.2478, "step": 26231 }, { "epoch": 2.1250810110174982, "grad_norm": 0.05357455834746361, "learning_rate": 0.00010416310365002926, "loss": 0.2156, "step": 26232 }, { "epoch": 2.125162022034997, "grad_norm": 0.06986691057682037, "learning_rate": 0.00010415860299743463, "loss": 0.2421, "step": 26233 }, { "epoch": 2.125243033052495, "grad_norm": 0.06922683119773865, "learning_rate": 0.00010415410234483999, "loss": 0.2455, "step": 26234 }, { "epoch": 2.1253240440699934, "grad_norm": 0.054529983550310135, "learning_rate": 0.0001041496016922454, "loss": 0.2331, "step": 26235 }, { "epoch": 2.1254050550874917, "grad_norm": 0.07196197658777237, "learning_rate": 0.00010414510103965077, "loss": 0.2474, "step": 26236 }, { "epoch": 2.1254860661049904, "grad_norm": 0.07445516437292099, "learning_rate": 0.00010414060038705613, "loss": 0.2361, "step": 26237 }, { "epoch": 2.1255670771224886, "grad_norm": 0.07319855690002441, "learning_rate": 0.0001041360997344615, "loss": 0.256, "step": 26238 }, { "epoch": 2.125648088139987, "grad_norm": 0.05994647741317749, "learning_rate": 0.00010413159908186687, "loss": 0.2262, "step": 26239 }, { "epoch": 2.1257290991574855, "grad_norm": 0.07112723588943481, "learning_rate": 0.00010412709842927223, "loss": 0.2444, "step": 26240 }, { "epoch": 2.125810110174984, "grad_norm": 0.06451784819364548, "learning_rate": 0.00010412259777667764, "loss": 0.2477, "step": 26241 }, { "epoch": 2.125891121192482, "grad_norm": 0.05500245466828346, "learning_rate": 0.00010411809712408301, "loss": 0.2422, "step": 26242 }, { "epoch": 2.1259721322099807, "grad_norm": 0.06472550332546234, "learning_rate": 0.00010411359647148837, "loss": 0.2837, "step": 26243 }, { "epoch": 2.126053143227479, "grad_norm": 0.07072708755731583, "learning_rate": 0.00010410909581889374, "loss": 0.2748, "step": 26244 }, { "epoch": 2.126134154244977, "grad_norm": 0.07283692806959152, "learning_rate": 0.00010410459516629912, "loss": 0.2817, "step": 26245 }, { "epoch": 2.126215165262476, "grad_norm": 0.07446455210447311, "learning_rate": 0.00010410009451370448, "loss": 0.2939, "step": 26246 }, { "epoch": 2.126296176279974, "grad_norm": 0.066004179418087, "learning_rate": 0.00010409559386110988, "loss": 0.2706, "step": 26247 }, { "epoch": 2.1263771872974724, "grad_norm": 0.05694718658924103, "learning_rate": 0.00010409109320851525, "loss": 0.2355, "step": 26248 }, { "epoch": 2.126458198314971, "grad_norm": 0.05692675709724426, "learning_rate": 0.00010408659255592061, "loss": 0.242, "step": 26249 }, { "epoch": 2.1265392093324693, "grad_norm": 0.0675569698214531, "learning_rate": 0.00010408209190332598, "loss": 0.2595, "step": 26250 }, { "epoch": 2.1266202203499676, "grad_norm": 0.06404047459363937, "learning_rate": 0.00010407759125073136, "loss": 0.2409, "step": 26251 }, { "epoch": 2.126701231367466, "grad_norm": 0.0745696946978569, "learning_rate": 0.00010407309059813672, "loss": 0.2876, "step": 26252 }, { "epoch": 2.1267822423849645, "grad_norm": 0.06286709755659103, "learning_rate": 0.00010406858994554212, "loss": 0.2785, "step": 26253 }, { "epoch": 2.1268632534024627, "grad_norm": 0.05989371985197067, "learning_rate": 0.00010406408929294749, "loss": 0.2816, "step": 26254 }, { "epoch": 2.126944264419961, "grad_norm": 0.0647851824760437, "learning_rate": 0.00010405958864035285, "loss": 0.2196, "step": 26255 }, { "epoch": 2.1270252754374597, "grad_norm": 0.0552176833152771, "learning_rate": 0.00010405508798775823, "loss": 0.2191, "step": 26256 }, { "epoch": 2.127106286454958, "grad_norm": 0.06426537781953812, "learning_rate": 0.0001040505873351636, "loss": 0.2801, "step": 26257 }, { "epoch": 2.127187297472456, "grad_norm": 0.06369642913341522, "learning_rate": 0.00010404608668256899, "loss": 0.2588, "step": 26258 }, { "epoch": 2.1272683084899544, "grad_norm": 0.059055306017398834, "learning_rate": 0.00010404158602997436, "loss": 0.2524, "step": 26259 }, { "epoch": 2.127349319507453, "grad_norm": 0.06316355615854263, "learning_rate": 0.00010403708537737973, "loss": 0.2154, "step": 26260 }, { "epoch": 2.1274303305249513, "grad_norm": 0.08105630427598953, "learning_rate": 0.0001040325847247851, "loss": 0.2739, "step": 26261 }, { "epoch": 2.1275113415424496, "grad_norm": 0.07276780158281326, "learning_rate": 0.00010402808407219047, "loss": 0.267, "step": 26262 }, { "epoch": 2.1275923525599483, "grad_norm": 0.05796151980757713, "learning_rate": 0.00010402358341959584, "loss": 0.2863, "step": 26263 }, { "epoch": 2.1276733635774465, "grad_norm": 0.07009650766849518, "learning_rate": 0.00010401908276700123, "loss": 0.2627, "step": 26264 }, { "epoch": 2.1277543745949448, "grad_norm": 0.06027712672948837, "learning_rate": 0.0001040145821144066, "loss": 0.2333, "step": 26265 }, { "epoch": 2.1278353856124435, "grad_norm": 0.06609676033258438, "learning_rate": 0.00010401008146181198, "loss": 0.2538, "step": 26266 }, { "epoch": 2.1279163966299417, "grad_norm": 0.07370631396770477, "learning_rate": 0.00010400558080921734, "loss": 0.2589, "step": 26267 }, { "epoch": 2.12799740764744, "grad_norm": 0.05804192274808884, "learning_rate": 0.00010400108015662271, "loss": 0.2441, "step": 26268 }, { "epoch": 2.1280784186649386, "grad_norm": 0.05638861283659935, "learning_rate": 0.00010399657950402808, "loss": 0.2698, "step": 26269 }, { "epoch": 2.128159429682437, "grad_norm": 0.06556017696857452, "learning_rate": 0.00010399207885143347, "loss": 0.2474, "step": 26270 }, { "epoch": 2.128240440699935, "grad_norm": 0.05903168395161629, "learning_rate": 0.00010398757819883884, "loss": 0.2387, "step": 26271 }, { "epoch": 2.1283214517174334, "grad_norm": 0.056488338857889175, "learning_rate": 0.00010398307754624422, "loss": 0.2302, "step": 26272 }, { "epoch": 2.128402462734932, "grad_norm": 0.05794563144445419, "learning_rate": 0.00010397857689364958, "loss": 0.2536, "step": 26273 }, { "epoch": 2.1284834737524303, "grad_norm": 0.06688067317008972, "learning_rate": 0.00010397407624105495, "loss": 0.2834, "step": 26274 }, { "epoch": 2.1285644847699285, "grad_norm": 0.049689728766679764, "learning_rate": 0.00010396957558846032, "loss": 0.219, "step": 26275 }, { "epoch": 2.1286454957874272, "grad_norm": 0.06290605664253235, "learning_rate": 0.00010396507493586571, "loss": 0.2924, "step": 26276 }, { "epoch": 2.1287265068049255, "grad_norm": 0.0553937628865242, "learning_rate": 0.00010396057428327108, "loss": 0.2459, "step": 26277 }, { "epoch": 2.1288075178224237, "grad_norm": 0.06402228772640228, "learning_rate": 0.00010395607363067646, "loss": 0.2613, "step": 26278 }, { "epoch": 2.1288885288399224, "grad_norm": 0.07002311199903488, "learning_rate": 0.00010395157297808182, "loss": 0.2424, "step": 26279 }, { "epoch": 2.1289695398574207, "grad_norm": 0.0628318265080452, "learning_rate": 0.00010394707232548719, "loss": 0.2463, "step": 26280 }, { "epoch": 2.129050550874919, "grad_norm": 0.062282539904117584, "learning_rate": 0.00010394257167289257, "loss": 0.2638, "step": 26281 }, { "epoch": 2.129131561892417, "grad_norm": 0.05883893743157387, "learning_rate": 0.00010393807102029795, "loss": 0.2542, "step": 26282 }, { "epoch": 2.129212572909916, "grad_norm": 0.07414300739765167, "learning_rate": 0.00010393357036770333, "loss": 0.2334, "step": 26283 }, { "epoch": 2.129293583927414, "grad_norm": 0.06983643025159836, "learning_rate": 0.0001039290697151087, "loss": 0.267, "step": 26284 }, { "epoch": 2.1293745949449123, "grad_norm": 0.0707906112074852, "learning_rate": 0.00010392456906251406, "loss": 0.2389, "step": 26285 }, { "epoch": 2.129455605962411, "grad_norm": 0.06597426533699036, "learning_rate": 0.00010392006840991943, "loss": 0.252, "step": 26286 }, { "epoch": 2.1295366169799093, "grad_norm": 0.0692160502076149, "learning_rate": 0.00010391556775732483, "loss": 0.2618, "step": 26287 }, { "epoch": 2.1296176279974075, "grad_norm": 0.05935715511441231, "learning_rate": 0.0001039110671047302, "loss": 0.2613, "step": 26288 }, { "epoch": 2.129698639014906, "grad_norm": 0.0638524740934372, "learning_rate": 0.00010390656645213557, "loss": 0.2415, "step": 26289 }, { "epoch": 2.1297796500324044, "grad_norm": 0.06546127796173096, "learning_rate": 0.00010390206579954094, "loss": 0.225, "step": 26290 }, { "epoch": 2.1298606610499027, "grad_norm": 0.05805578827857971, "learning_rate": 0.0001038975651469463, "loss": 0.2147, "step": 26291 }, { "epoch": 2.1299416720674014, "grad_norm": 0.050522420555353165, "learning_rate": 0.00010389306449435168, "loss": 0.2434, "step": 26292 }, { "epoch": 2.1300226830848996, "grad_norm": 0.06944924592971802, "learning_rate": 0.00010388856384175708, "loss": 0.284, "step": 26293 }, { "epoch": 2.130103694102398, "grad_norm": 0.05426494777202606, "learning_rate": 0.00010388406318916244, "loss": 0.2662, "step": 26294 }, { "epoch": 2.130184705119896, "grad_norm": 0.06014052778482437, "learning_rate": 0.00010387956253656781, "loss": 0.2193, "step": 26295 }, { "epoch": 2.130265716137395, "grad_norm": 0.06177065894007683, "learning_rate": 0.00010387506188397318, "loss": 0.2814, "step": 26296 }, { "epoch": 2.130346727154893, "grad_norm": 0.054523248225450516, "learning_rate": 0.00010387056123137854, "loss": 0.2851, "step": 26297 }, { "epoch": 2.1304277381723913, "grad_norm": 0.062403611838817596, "learning_rate": 0.00010386606057878392, "loss": 0.2655, "step": 26298 }, { "epoch": 2.13050874918989, "grad_norm": 0.06154339760541916, "learning_rate": 0.00010386155992618932, "loss": 0.2696, "step": 26299 }, { "epoch": 2.130589760207388, "grad_norm": 0.05249398201704025, "learning_rate": 0.00010385705927359468, "loss": 0.243, "step": 26300 }, { "epoch": 2.1306707712248865, "grad_norm": 0.06125812232494354, "learning_rate": 0.00010385255862100005, "loss": 0.2581, "step": 26301 }, { "epoch": 2.130751782242385, "grad_norm": 0.06812266260385513, "learning_rate": 0.00010384805796840542, "loss": 0.2485, "step": 26302 }, { "epoch": 2.1308327932598834, "grad_norm": 0.0666508674621582, "learning_rate": 0.00010384355731581078, "loss": 0.2815, "step": 26303 }, { "epoch": 2.1309138042773816, "grad_norm": 0.07783432304859161, "learning_rate": 0.00010383905666321616, "loss": 0.277, "step": 26304 }, { "epoch": 2.13099481529488, "grad_norm": 0.06238381192088127, "learning_rate": 0.00010383455601062156, "loss": 0.2191, "step": 26305 }, { "epoch": 2.1310758263123786, "grad_norm": 0.08316400647163391, "learning_rate": 0.00010383005535802692, "loss": 0.281, "step": 26306 }, { "epoch": 2.131156837329877, "grad_norm": 0.07105521857738495, "learning_rate": 0.00010382555470543229, "loss": 0.2566, "step": 26307 }, { "epoch": 2.131237848347375, "grad_norm": 0.06777141988277435, "learning_rate": 0.00010382105405283767, "loss": 0.2886, "step": 26308 }, { "epoch": 2.1313188593648738, "grad_norm": 0.0814325287938118, "learning_rate": 0.00010381655340024303, "loss": 0.2652, "step": 26309 }, { "epoch": 2.131399870382372, "grad_norm": 0.05332833155989647, "learning_rate": 0.00010381205274764843, "loss": 0.2244, "step": 26310 }, { "epoch": 2.1314808813998702, "grad_norm": 0.051932573318481445, "learning_rate": 0.0001038075520950538, "loss": 0.2449, "step": 26311 }, { "epoch": 2.131561892417369, "grad_norm": 0.06615007668733597, "learning_rate": 0.00010380305144245916, "loss": 0.2693, "step": 26312 }, { "epoch": 2.131642903434867, "grad_norm": 0.05709698051214218, "learning_rate": 0.00010379855078986453, "loss": 0.261, "step": 26313 }, { "epoch": 2.1317239144523654, "grad_norm": 0.06457649916410446, "learning_rate": 0.00010379405013726991, "loss": 0.2664, "step": 26314 }, { "epoch": 2.131804925469864, "grad_norm": 0.061860498040914536, "learning_rate": 0.00010378954948467527, "loss": 0.2273, "step": 26315 }, { "epoch": 2.1318859364873624, "grad_norm": 0.05811541900038719, "learning_rate": 0.00010378504883208067, "loss": 0.2317, "step": 26316 }, { "epoch": 2.1319669475048606, "grad_norm": 0.08073913305997849, "learning_rate": 0.00010378054817948604, "loss": 0.2872, "step": 26317 }, { "epoch": 2.132047958522359, "grad_norm": 0.059323426336050034, "learning_rate": 0.0001037760475268914, "loss": 0.2742, "step": 26318 }, { "epoch": 2.1321289695398575, "grad_norm": 0.06322403997182846, "learning_rate": 0.00010377154687429678, "loss": 0.2432, "step": 26319 }, { "epoch": 2.1322099805573558, "grad_norm": 0.08462004363536835, "learning_rate": 0.00010376704622170215, "loss": 0.265, "step": 26320 }, { "epoch": 2.132290991574854, "grad_norm": 0.05855248123407364, "learning_rate": 0.00010376254556910751, "loss": 0.2519, "step": 26321 }, { "epoch": 2.1323720025923527, "grad_norm": 0.06013753265142441, "learning_rate": 0.00010375804491651291, "loss": 0.2431, "step": 26322 }, { "epoch": 2.132453013609851, "grad_norm": 0.06801541149616241, "learning_rate": 0.00010375354426391828, "loss": 0.2304, "step": 26323 }, { "epoch": 2.132534024627349, "grad_norm": 0.0554489828646183, "learning_rate": 0.00010374904361132364, "loss": 0.2435, "step": 26324 }, { "epoch": 2.132615035644848, "grad_norm": 0.059247396886348724, "learning_rate": 0.00010374454295872902, "loss": 0.25, "step": 26325 }, { "epoch": 2.132696046662346, "grad_norm": 0.05381538346409798, "learning_rate": 0.00010374004230613439, "loss": 0.2666, "step": 26326 }, { "epoch": 2.1327770576798444, "grad_norm": 0.053969722241163254, "learning_rate": 0.00010373554165353976, "loss": 0.2994, "step": 26327 }, { "epoch": 2.1328580686973426, "grad_norm": 0.062413427978754044, "learning_rate": 0.00010373104100094515, "loss": 0.2737, "step": 26328 }, { "epoch": 2.1329390797148413, "grad_norm": 0.06641319394111633, "learning_rate": 0.00010372654034835053, "loss": 0.2615, "step": 26329 }, { "epoch": 2.1330200907323396, "grad_norm": 0.05698978528380394, "learning_rate": 0.00010372203969575589, "loss": 0.2741, "step": 26330 }, { "epoch": 2.133101101749838, "grad_norm": 0.05135650932788849, "learning_rate": 0.00010371753904316126, "loss": 0.2124, "step": 26331 }, { "epoch": 2.1331821127673365, "grad_norm": 0.0743556097149849, "learning_rate": 0.00010371303839056663, "loss": 0.2611, "step": 26332 }, { "epoch": 2.1332631237848347, "grad_norm": 0.06397853791713715, "learning_rate": 0.000103708537737972, "loss": 0.239, "step": 26333 }, { "epoch": 2.133344134802333, "grad_norm": 0.08030462265014648, "learning_rate": 0.0001037040370853774, "loss": 0.2777, "step": 26334 }, { "epoch": 2.1334251458198317, "grad_norm": 0.06808465719223022, "learning_rate": 0.00010369953643278277, "loss": 0.2438, "step": 26335 }, { "epoch": 2.13350615683733, "grad_norm": 0.05307475104928017, "learning_rate": 0.00010369503578018813, "loss": 0.2798, "step": 26336 }, { "epoch": 2.133587167854828, "grad_norm": 0.05824761092662811, "learning_rate": 0.0001036905351275935, "loss": 0.2436, "step": 26337 }, { "epoch": 2.133668178872327, "grad_norm": 0.07136199623346329, "learning_rate": 0.00010368603447499887, "loss": 0.2344, "step": 26338 }, { "epoch": 2.133749189889825, "grad_norm": 0.06164703890681267, "learning_rate": 0.00010368153382240426, "loss": 0.2324, "step": 26339 }, { "epoch": 2.1338302009073233, "grad_norm": 0.0646100714802742, "learning_rate": 0.00010367703316980964, "loss": 0.2902, "step": 26340 }, { "epoch": 2.1339112119248216, "grad_norm": 0.08409897983074188, "learning_rate": 0.00010367253251721501, "loss": 0.2522, "step": 26341 }, { "epoch": 2.1339922229423203, "grad_norm": 0.06278625130653381, "learning_rate": 0.00010366803186462037, "loss": 0.208, "step": 26342 }, { "epoch": 2.1340732339598185, "grad_norm": 0.07819729298353195, "learning_rate": 0.00010366353121202574, "loss": 0.2776, "step": 26343 }, { "epoch": 2.1341542449773168, "grad_norm": 0.05907128378748894, "learning_rate": 0.00010365903055943112, "loss": 0.2685, "step": 26344 }, { "epoch": 2.1342352559948155, "grad_norm": 0.06722066551446915, "learning_rate": 0.0001036545299068365, "loss": 0.2745, "step": 26345 }, { "epoch": 2.1343162670123137, "grad_norm": 0.06968337297439575, "learning_rate": 0.00010365002925424188, "loss": 0.2304, "step": 26346 }, { "epoch": 2.134397278029812, "grad_norm": 0.05220748856663704, "learning_rate": 0.00010364552860164725, "loss": 0.2533, "step": 26347 }, { "epoch": 2.1344782890473106, "grad_norm": 0.06192225590348244, "learning_rate": 0.00010364102794905261, "loss": 0.2705, "step": 26348 }, { "epoch": 2.134559300064809, "grad_norm": 0.06906786561012268, "learning_rate": 0.00010363652729645798, "loss": 0.2649, "step": 26349 }, { "epoch": 2.134640311082307, "grad_norm": 0.06037280336022377, "learning_rate": 0.00010363202664386336, "loss": 0.2351, "step": 26350 }, { "epoch": 2.1347213220998054, "grad_norm": 0.06934493780136108, "learning_rate": 0.00010362752599126874, "loss": 0.2877, "step": 26351 }, { "epoch": 2.134802333117304, "grad_norm": 0.07149045169353485, "learning_rate": 0.00010362302533867412, "loss": 0.2611, "step": 26352 }, { "epoch": 2.1348833441348023, "grad_norm": 0.0638730451464653, "learning_rate": 0.00010361852468607949, "loss": 0.2668, "step": 26353 }, { "epoch": 2.1349643551523005, "grad_norm": 0.05846642702817917, "learning_rate": 0.00010361402403348485, "loss": 0.2729, "step": 26354 }, { "epoch": 2.1350453661697992, "grad_norm": 0.061692554503679276, "learning_rate": 0.00010360952338089023, "loss": 0.2465, "step": 26355 }, { "epoch": 2.1351263771872975, "grad_norm": 0.06015016511082649, "learning_rate": 0.0001036050227282956, "loss": 0.2682, "step": 26356 }, { "epoch": 2.1352073882047957, "grad_norm": 0.07217731326818466, "learning_rate": 0.00010360052207570099, "loss": 0.3123, "step": 26357 }, { "epoch": 2.1352883992222944, "grad_norm": 0.05961654335260391, "learning_rate": 0.00010359602142310636, "loss": 0.2348, "step": 26358 }, { "epoch": 2.1353694102397927, "grad_norm": 0.05991566553711891, "learning_rate": 0.00010359152077051173, "loss": 0.2568, "step": 26359 }, { "epoch": 2.135450421257291, "grad_norm": 0.05187077075242996, "learning_rate": 0.0001035870201179171, "loss": 0.234, "step": 26360 }, { "epoch": 2.1355314322747896, "grad_norm": 0.07228851318359375, "learning_rate": 0.00010358251946532247, "loss": 0.2917, "step": 26361 }, { "epoch": 2.135612443292288, "grad_norm": 0.05363132804632187, "learning_rate": 0.00010357801881272784, "loss": 0.2507, "step": 26362 }, { "epoch": 2.135693454309786, "grad_norm": 0.06615161150693893, "learning_rate": 0.00010357351816013323, "loss": 0.2376, "step": 26363 }, { "epoch": 2.1357744653272843, "grad_norm": 0.059696223586797714, "learning_rate": 0.0001035690175075386, "loss": 0.2358, "step": 26364 }, { "epoch": 2.135855476344783, "grad_norm": 0.05982410907745361, "learning_rate": 0.00010356451685494398, "loss": 0.2444, "step": 26365 }, { "epoch": 2.1359364873622813, "grad_norm": 0.06730233877897263, "learning_rate": 0.00010356001620234934, "loss": 0.2819, "step": 26366 }, { "epoch": 2.1360174983797795, "grad_norm": 0.061217255890369415, "learning_rate": 0.00010355551554975471, "loss": 0.2586, "step": 26367 }, { "epoch": 2.136098509397278, "grad_norm": 0.05739770084619522, "learning_rate": 0.00010355101489716011, "loss": 0.2267, "step": 26368 }, { "epoch": 2.1361795204147764, "grad_norm": 0.07194606959819794, "learning_rate": 0.00010354651424456547, "loss": 0.2953, "step": 26369 }, { "epoch": 2.1362605314322747, "grad_norm": 0.06404095888137817, "learning_rate": 0.00010354201359197084, "loss": 0.2578, "step": 26370 }, { "epoch": 2.1363415424497734, "grad_norm": 0.09129726886749268, "learning_rate": 0.00010353751293937622, "loss": 0.2832, "step": 26371 }, { "epoch": 2.1364225534672716, "grad_norm": 0.061405278742313385, "learning_rate": 0.00010353301228678158, "loss": 0.229, "step": 26372 }, { "epoch": 2.13650356448477, "grad_norm": 0.05377738177776337, "learning_rate": 0.00010352851163418695, "loss": 0.2135, "step": 26373 }, { "epoch": 2.136584575502268, "grad_norm": 0.05743473395705223, "learning_rate": 0.00010352401098159235, "loss": 0.2366, "step": 26374 }, { "epoch": 2.136665586519767, "grad_norm": 0.05833156779408455, "learning_rate": 0.00010351951032899771, "loss": 0.2676, "step": 26375 }, { "epoch": 2.136746597537265, "grad_norm": 0.06809478253126144, "learning_rate": 0.00010351500967640309, "loss": 0.2674, "step": 26376 }, { "epoch": 2.1368276085547633, "grad_norm": 0.05865863710641861, "learning_rate": 0.00010351050902380846, "loss": 0.2034, "step": 26377 }, { "epoch": 2.136908619572262, "grad_norm": 0.05914255976676941, "learning_rate": 0.00010350600837121382, "loss": 0.2658, "step": 26378 }, { "epoch": 2.13698963058976, "grad_norm": 0.055052950978279114, "learning_rate": 0.00010350150771861919, "loss": 0.2838, "step": 26379 }, { "epoch": 2.1370706416072585, "grad_norm": 0.0790739506483078, "learning_rate": 0.00010349700706602459, "loss": 0.3052, "step": 26380 }, { "epoch": 2.137151652624757, "grad_norm": 0.06399786472320557, "learning_rate": 0.00010349250641342995, "loss": 0.253, "step": 26381 }, { "epoch": 2.1372326636422554, "grad_norm": 0.06380538642406464, "learning_rate": 0.00010348800576083533, "loss": 0.2286, "step": 26382 }, { "epoch": 2.1373136746597536, "grad_norm": 0.06525228917598724, "learning_rate": 0.0001034835051082407, "loss": 0.2567, "step": 26383 }, { "epoch": 2.1373946856772523, "grad_norm": 0.05566810816526413, "learning_rate": 0.00010347900445564606, "loss": 0.2635, "step": 26384 }, { "epoch": 2.1374756966947506, "grad_norm": 0.06300102174282074, "learning_rate": 0.00010347450380305143, "loss": 0.2824, "step": 26385 }, { "epoch": 2.137556707712249, "grad_norm": 0.05361280217766762, "learning_rate": 0.00010347000315045683, "loss": 0.2763, "step": 26386 }, { "epoch": 2.137637718729747, "grad_norm": 0.04924359917640686, "learning_rate": 0.0001034655024978622, "loss": 0.224, "step": 26387 }, { "epoch": 2.1377187297472457, "grad_norm": 0.05773067846894264, "learning_rate": 0.00010346100184526757, "loss": 0.2382, "step": 26388 }, { "epoch": 2.137799740764744, "grad_norm": 0.07036109268665314, "learning_rate": 0.00010345650119267294, "loss": 0.2489, "step": 26389 }, { "epoch": 2.1378807517822422, "grad_norm": 0.06863632053136826, "learning_rate": 0.00010345200054007832, "loss": 0.2514, "step": 26390 }, { "epoch": 2.137961762799741, "grad_norm": 0.07008032500743866, "learning_rate": 0.0001034474998874837, "loss": 0.2615, "step": 26391 }, { "epoch": 2.138042773817239, "grad_norm": 0.05886336788535118, "learning_rate": 0.00010344299923488908, "loss": 0.2667, "step": 26392 }, { "epoch": 2.1381237848347374, "grad_norm": 0.06166012957692146, "learning_rate": 0.00010343849858229444, "loss": 0.2622, "step": 26393 }, { "epoch": 2.138204795852236, "grad_norm": 0.06474199146032333, "learning_rate": 0.00010343399792969981, "loss": 0.2691, "step": 26394 }, { "epoch": 2.1382858068697344, "grad_norm": 0.06455071270465851, "learning_rate": 0.00010342949727710518, "loss": 0.2353, "step": 26395 }, { "epoch": 2.1383668178872326, "grad_norm": 0.06218364089727402, "learning_rate": 0.00010342499662451056, "loss": 0.2311, "step": 26396 }, { "epoch": 2.138447828904731, "grad_norm": 0.06683358550071716, "learning_rate": 0.00010342049597191594, "loss": 0.2628, "step": 26397 }, { "epoch": 2.1385288399222295, "grad_norm": 0.061113521456718445, "learning_rate": 0.00010341599531932132, "loss": 0.2618, "step": 26398 }, { "epoch": 2.1386098509397278, "grad_norm": 0.047484900802373886, "learning_rate": 0.00010341149466672668, "loss": 0.2075, "step": 26399 }, { "epoch": 2.138690861957226, "grad_norm": 0.07419335097074509, "learning_rate": 0.00010340699401413205, "loss": 0.2428, "step": 26400 }, { "epoch": 2.1387718729747247, "grad_norm": 0.0658346489071846, "learning_rate": 0.00010340249336153743, "loss": 0.256, "step": 26401 }, { "epoch": 2.138852883992223, "grad_norm": 0.061048831790685654, "learning_rate": 0.0001033979927089428, "loss": 0.2456, "step": 26402 }, { "epoch": 2.138933895009721, "grad_norm": 0.06434381008148193, "learning_rate": 0.00010339349205634819, "loss": 0.2481, "step": 26403 }, { "epoch": 2.13901490602722, "grad_norm": 0.07088811695575714, "learning_rate": 0.00010338899140375356, "loss": 0.2486, "step": 26404 }, { "epoch": 2.139095917044718, "grad_norm": 0.06179327517747879, "learning_rate": 0.00010338449075115892, "loss": 0.2761, "step": 26405 }, { "epoch": 2.1391769280622164, "grad_norm": 0.07120385766029358, "learning_rate": 0.00010337999009856429, "loss": 0.2839, "step": 26406 }, { "epoch": 2.139257939079715, "grad_norm": 0.05768730491399765, "learning_rate": 0.00010337548944596967, "loss": 0.248, "step": 26407 }, { "epoch": 2.1393389500972133, "grad_norm": 0.06576817482709885, "learning_rate": 0.00010337098879337504, "loss": 0.2627, "step": 26408 }, { "epoch": 2.1394199611147116, "grad_norm": 0.056955333799123764, "learning_rate": 0.00010336648814078043, "loss": 0.2858, "step": 26409 }, { "epoch": 2.13950097213221, "grad_norm": 0.0679904893040657, "learning_rate": 0.0001033619874881858, "loss": 0.2467, "step": 26410 }, { "epoch": 2.1395819831497085, "grad_norm": 0.0797104611992836, "learning_rate": 0.00010335748683559116, "loss": 0.2559, "step": 26411 }, { "epoch": 2.1396629941672067, "grad_norm": 0.05416597053408623, "learning_rate": 0.00010335298618299653, "loss": 0.232, "step": 26412 }, { "epoch": 2.139744005184705, "grad_norm": 0.05993930995464325, "learning_rate": 0.00010334848553040191, "loss": 0.2279, "step": 26413 }, { "epoch": 2.1398250162022037, "grad_norm": 0.05330348759889603, "learning_rate": 0.00010334398487780728, "loss": 0.2933, "step": 26414 }, { "epoch": 2.139906027219702, "grad_norm": 0.06839060038328171, "learning_rate": 0.00010333948422521267, "loss": 0.2748, "step": 26415 }, { "epoch": 2.1399870382372, "grad_norm": 0.0675363838672638, "learning_rate": 0.00010333498357261804, "loss": 0.2523, "step": 26416 }, { "epoch": 2.140068049254699, "grad_norm": 0.07417375594377518, "learning_rate": 0.0001033304829200234, "loss": 0.2739, "step": 26417 }, { "epoch": 2.140149060272197, "grad_norm": 0.0644654631614685, "learning_rate": 0.00010332598226742878, "loss": 0.2708, "step": 26418 }, { "epoch": 2.1402300712896953, "grad_norm": 0.061722222715616226, "learning_rate": 0.00010332148161483415, "loss": 0.2747, "step": 26419 }, { "epoch": 2.1403110823071936, "grad_norm": 0.05808354541659355, "learning_rate": 0.00010331698096223954, "loss": 0.2263, "step": 26420 }, { "epoch": 2.1403920933246923, "grad_norm": 0.04819166660308838, "learning_rate": 0.00010331248030964491, "loss": 0.2457, "step": 26421 }, { "epoch": 2.1404731043421905, "grad_norm": 0.059980083256959915, "learning_rate": 0.00010330797965705028, "loss": 0.2476, "step": 26422 }, { "epoch": 2.1405541153596888, "grad_norm": 0.05877969041466713, "learning_rate": 0.00010330347900445564, "loss": 0.2365, "step": 26423 }, { "epoch": 2.1406351263771874, "grad_norm": 0.055677689611911774, "learning_rate": 0.00010329897835186102, "loss": 0.1986, "step": 26424 }, { "epoch": 2.1407161373946857, "grad_norm": 0.08751609921455383, "learning_rate": 0.00010329447769926639, "loss": 0.2867, "step": 26425 }, { "epoch": 2.140797148412184, "grad_norm": 0.06934158504009247, "learning_rate": 0.00010328997704667178, "loss": 0.2596, "step": 26426 }, { "epoch": 2.1408781594296826, "grad_norm": 0.06514386087656021, "learning_rate": 0.00010328547639407715, "loss": 0.2662, "step": 26427 }, { "epoch": 2.140959170447181, "grad_norm": 0.06309270858764648, "learning_rate": 0.00010328097574148253, "loss": 0.2795, "step": 26428 }, { "epoch": 2.141040181464679, "grad_norm": 0.0683743879199028, "learning_rate": 0.00010327647508888789, "loss": 0.3018, "step": 26429 }, { "epoch": 2.141121192482178, "grad_norm": 0.04728134348988533, "learning_rate": 0.00010327197443629326, "loss": 0.2146, "step": 26430 }, { "epoch": 2.141202203499676, "grad_norm": 0.05591541528701782, "learning_rate": 0.00010326747378369863, "loss": 0.2513, "step": 26431 }, { "epoch": 2.1412832145171743, "grad_norm": 0.04973398149013519, "learning_rate": 0.00010326297313110402, "loss": 0.2232, "step": 26432 }, { "epoch": 2.1413642255346725, "grad_norm": 0.05986110493540764, "learning_rate": 0.0001032584724785094, "loss": 0.254, "step": 26433 }, { "epoch": 2.1414452365521712, "grad_norm": 0.06611759215593338, "learning_rate": 0.00010325397182591477, "loss": 0.2275, "step": 26434 }, { "epoch": 2.1415262475696695, "grad_norm": 0.06158333644270897, "learning_rate": 0.00010324947117332013, "loss": 0.2348, "step": 26435 }, { "epoch": 2.1416072585871677, "grad_norm": 0.06677713990211487, "learning_rate": 0.0001032449705207255, "loss": 0.253, "step": 26436 }, { "epoch": 2.1416882696046664, "grad_norm": 0.05785349756479263, "learning_rate": 0.00010324046986813087, "loss": 0.252, "step": 26437 }, { "epoch": 2.1417692806221647, "grad_norm": 0.0526701845228672, "learning_rate": 0.00010323596921553626, "loss": 0.2633, "step": 26438 }, { "epoch": 2.141850291639663, "grad_norm": 0.06457501649856567, "learning_rate": 0.00010323146856294164, "loss": 0.2287, "step": 26439 }, { "epoch": 2.141931302657161, "grad_norm": 0.06768237799406052, "learning_rate": 0.00010322696791034701, "loss": 0.2505, "step": 26440 }, { "epoch": 2.14201231367466, "grad_norm": 0.05725770443677902, "learning_rate": 0.00010322246725775237, "loss": 0.275, "step": 26441 }, { "epoch": 2.142093324692158, "grad_norm": 0.06118274852633476, "learning_rate": 0.00010321796660515774, "loss": 0.2306, "step": 26442 }, { "epoch": 2.1421743357096563, "grad_norm": 0.07188738137483597, "learning_rate": 0.00010321346595256314, "loss": 0.3041, "step": 26443 }, { "epoch": 2.142255346727155, "grad_norm": 0.07297833263874054, "learning_rate": 0.0001032089652999685, "loss": 0.2598, "step": 26444 }, { "epoch": 2.1423363577446533, "grad_norm": 0.0646425187587738, "learning_rate": 0.00010320446464737388, "loss": 0.2422, "step": 26445 }, { "epoch": 2.1424173687621515, "grad_norm": 0.05819808691740036, "learning_rate": 0.00010319996399477925, "loss": 0.2433, "step": 26446 }, { "epoch": 2.14249837977965, "grad_norm": 0.060123011469841, "learning_rate": 0.00010319546334218461, "loss": 0.22, "step": 26447 }, { "epoch": 2.1425793907971484, "grad_norm": 0.054031722247600555, "learning_rate": 0.00010319096268958998, "loss": 0.2528, "step": 26448 }, { "epoch": 2.1426604018146467, "grad_norm": 0.05365290492773056, "learning_rate": 0.00010318646203699539, "loss": 0.2528, "step": 26449 }, { "epoch": 2.1427414128321454, "grad_norm": 0.06101183220744133, "learning_rate": 0.00010318196138440075, "loss": 0.2365, "step": 26450 }, { "epoch": 2.1428224238496436, "grad_norm": 0.06819365918636322, "learning_rate": 0.00010317746073180612, "loss": 0.2122, "step": 26451 }, { "epoch": 2.142903434867142, "grad_norm": 0.05841772258281708, "learning_rate": 0.00010317296007921149, "loss": 0.2207, "step": 26452 }, { "epoch": 2.1429844458846405, "grad_norm": 0.05575397238135338, "learning_rate": 0.00010316845942661685, "loss": 0.2278, "step": 26453 }, { "epoch": 2.143065456902139, "grad_norm": 0.0675593689084053, "learning_rate": 0.00010316395877402223, "loss": 0.241, "step": 26454 }, { "epoch": 2.143146467919637, "grad_norm": 0.06064525246620178, "learning_rate": 0.00010315945812142763, "loss": 0.2345, "step": 26455 }, { "epoch": 2.1432274789371353, "grad_norm": 0.06661902368068695, "learning_rate": 0.00010315495746883299, "loss": 0.2721, "step": 26456 }, { "epoch": 2.143308489954634, "grad_norm": 0.05384773388504982, "learning_rate": 0.00010315045681623836, "loss": 0.2424, "step": 26457 }, { "epoch": 2.143389500972132, "grad_norm": 0.0716564953327179, "learning_rate": 0.00010314595616364373, "loss": 0.2887, "step": 26458 }, { "epoch": 2.1434705119896305, "grad_norm": 0.06959453225135803, "learning_rate": 0.00010314145551104911, "loss": 0.2666, "step": 26459 }, { "epoch": 2.143551523007129, "grad_norm": 0.06140555441379547, "learning_rate": 0.00010313695485845447, "loss": 0.2526, "step": 26460 }, { "epoch": 2.1436325340246274, "grad_norm": 0.06028158217668533, "learning_rate": 0.00010313245420585987, "loss": 0.2576, "step": 26461 }, { "epoch": 2.1437135450421256, "grad_norm": 0.11063975095748901, "learning_rate": 0.00010312795355326523, "loss": 0.2897, "step": 26462 }, { "epoch": 2.143794556059624, "grad_norm": 0.06059414893388748, "learning_rate": 0.0001031234529006706, "loss": 0.2898, "step": 26463 }, { "epoch": 2.1438755670771226, "grad_norm": 0.06326915323734283, "learning_rate": 0.00010311895224807598, "loss": 0.2594, "step": 26464 }, { "epoch": 2.143956578094621, "grad_norm": 0.053890589624643326, "learning_rate": 0.00010311445159548135, "loss": 0.2303, "step": 26465 }, { "epoch": 2.144037589112119, "grad_norm": 0.0587117001414299, "learning_rate": 0.00010310995094288671, "loss": 0.3032, "step": 26466 }, { "epoch": 2.1441186001296177, "grad_norm": 0.06277912855148315, "learning_rate": 0.00010310545029029211, "loss": 0.2732, "step": 26467 }, { "epoch": 2.144199611147116, "grad_norm": 0.06795115768909454, "learning_rate": 0.00010310094963769747, "loss": 0.2623, "step": 26468 }, { "epoch": 2.1442806221646142, "grad_norm": 0.05775567889213562, "learning_rate": 0.00010309644898510284, "loss": 0.2579, "step": 26469 }, { "epoch": 2.144361633182113, "grad_norm": 0.07728058099746704, "learning_rate": 0.00010309194833250822, "loss": 0.2639, "step": 26470 }, { "epoch": 2.144442644199611, "grad_norm": 0.06779685616493225, "learning_rate": 0.00010308744767991359, "loss": 0.2606, "step": 26471 }, { "epoch": 2.1445236552171094, "grad_norm": 0.058312151581048965, "learning_rate": 0.00010308294702731898, "loss": 0.2612, "step": 26472 }, { "epoch": 2.144604666234608, "grad_norm": 0.07041696459054947, "learning_rate": 0.00010307844637472435, "loss": 0.2735, "step": 26473 }, { "epoch": 2.1446856772521063, "grad_norm": 0.06928315758705139, "learning_rate": 0.00010307394572212971, "loss": 0.2664, "step": 26474 }, { "epoch": 2.1447666882696046, "grad_norm": 0.061312656849622726, "learning_rate": 0.00010306944506953509, "loss": 0.3004, "step": 26475 }, { "epoch": 2.144847699287103, "grad_norm": 0.0498967207968235, "learning_rate": 0.00010306494441694046, "loss": 0.2628, "step": 26476 }, { "epoch": 2.1449287103046015, "grad_norm": 0.05547580495476723, "learning_rate": 0.00010306044376434583, "loss": 0.2206, "step": 26477 }, { "epoch": 2.1450097213220998, "grad_norm": 0.06494994461536407, "learning_rate": 0.00010305594311175122, "loss": 0.2589, "step": 26478 }, { "epoch": 2.145090732339598, "grad_norm": 0.09350940585136414, "learning_rate": 0.0001030514424591566, "loss": 0.2807, "step": 26479 }, { "epoch": 2.1451717433570967, "grad_norm": 0.07217366248369217, "learning_rate": 0.00010304694180656195, "loss": 0.2563, "step": 26480 }, { "epoch": 2.145252754374595, "grad_norm": 0.06400761753320694, "learning_rate": 0.00010304244115396733, "loss": 0.2625, "step": 26481 }, { "epoch": 2.145333765392093, "grad_norm": 0.07066665589809418, "learning_rate": 0.0001030379405013727, "loss": 0.2737, "step": 26482 }, { "epoch": 2.145414776409592, "grad_norm": 0.07438008487224579, "learning_rate": 0.00010303343984877807, "loss": 0.2685, "step": 26483 }, { "epoch": 2.14549578742709, "grad_norm": 0.07420582324266434, "learning_rate": 0.00010302893919618346, "loss": 0.2386, "step": 26484 }, { "epoch": 2.1455767984445884, "grad_norm": 0.06442619115114212, "learning_rate": 0.00010302443854358883, "loss": 0.2382, "step": 26485 }, { "epoch": 2.1456578094620866, "grad_norm": 0.05589266121387482, "learning_rate": 0.0001030199378909942, "loss": 0.2886, "step": 26486 }, { "epoch": 2.1457388204795853, "grad_norm": 0.05380919948220253, "learning_rate": 0.00010301543723839957, "loss": 0.2513, "step": 26487 }, { "epoch": 2.1458198314970836, "grad_norm": 0.07141850143671036, "learning_rate": 0.00010301093658580494, "loss": 0.2867, "step": 26488 }, { "epoch": 2.145900842514582, "grad_norm": 0.074470154941082, "learning_rate": 0.00010300643593321032, "loss": 0.2819, "step": 26489 }, { "epoch": 2.1459818535320805, "grad_norm": 0.05928831547498703, "learning_rate": 0.0001030019352806157, "loss": 0.2699, "step": 26490 }, { "epoch": 2.1460628645495787, "grad_norm": 0.06617609411478043, "learning_rate": 0.00010299743462802108, "loss": 0.229, "step": 26491 }, { "epoch": 2.146143875567077, "grad_norm": 0.0654638484120369, "learning_rate": 0.00010299293397542644, "loss": 0.258, "step": 26492 }, { "epoch": 2.1462248865845757, "grad_norm": 0.06313467770814896, "learning_rate": 0.00010298843332283181, "loss": 0.2635, "step": 26493 }, { "epoch": 2.146305897602074, "grad_norm": 0.061352573335170746, "learning_rate": 0.00010298393267023718, "loss": 0.2785, "step": 26494 }, { "epoch": 2.146386908619572, "grad_norm": 0.05656629800796509, "learning_rate": 0.00010297943201764257, "loss": 0.2786, "step": 26495 }, { "epoch": 2.146467919637071, "grad_norm": 0.0668196976184845, "learning_rate": 0.00010297493136504794, "loss": 0.2822, "step": 26496 }, { "epoch": 2.146548930654569, "grad_norm": 0.051473237574100494, "learning_rate": 0.00010297043071245332, "loss": 0.2299, "step": 26497 }, { "epoch": 2.1466299416720673, "grad_norm": 0.0558832623064518, "learning_rate": 0.00010296593005985868, "loss": 0.2648, "step": 26498 }, { "epoch": 2.1467109526895656, "grad_norm": 0.05708617717027664, "learning_rate": 0.00010296142940726405, "loss": 0.2563, "step": 26499 }, { "epoch": 2.1467919637070643, "grad_norm": 0.06447821855545044, "learning_rate": 0.00010295692875466943, "loss": 0.2614, "step": 26500 }, { "epoch": 2.1468729747245625, "grad_norm": 0.07675790041685104, "learning_rate": 0.00010295242810207481, "loss": 0.2717, "step": 26501 }, { "epoch": 2.1469539857420608, "grad_norm": 0.05448411777615547, "learning_rate": 0.00010294792744948019, "loss": 0.2388, "step": 26502 }, { "epoch": 2.1470349967595594, "grad_norm": 0.07981395721435547, "learning_rate": 0.00010294342679688556, "loss": 0.2609, "step": 26503 }, { "epoch": 2.1471160077770577, "grad_norm": 0.06348752975463867, "learning_rate": 0.00010293892614429092, "loss": 0.263, "step": 26504 }, { "epoch": 2.147197018794556, "grad_norm": 0.055345065891742706, "learning_rate": 0.0001029344254916963, "loss": 0.2106, "step": 26505 }, { "epoch": 2.1472780298120546, "grad_norm": 0.05992121621966362, "learning_rate": 0.00010292992483910167, "loss": 0.2421, "step": 26506 }, { "epoch": 2.147359040829553, "grad_norm": 0.05819815397262573, "learning_rate": 0.00010292542418650705, "loss": 0.2634, "step": 26507 }, { "epoch": 2.147440051847051, "grad_norm": 0.06262335926294327, "learning_rate": 0.00010292092353391243, "loss": 0.2093, "step": 26508 }, { "epoch": 2.1475210628645494, "grad_norm": 0.053963132202625275, "learning_rate": 0.0001029164228813178, "loss": 0.261, "step": 26509 }, { "epoch": 2.147602073882048, "grad_norm": 0.058370329439640045, "learning_rate": 0.00010291192222872316, "loss": 0.2638, "step": 26510 }, { "epoch": 2.1476830848995463, "grad_norm": 0.04938644543290138, "learning_rate": 0.00010290742157612854, "loss": 0.2664, "step": 26511 }, { "epoch": 2.1477640959170445, "grad_norm": 0.05646153539419174, "learning_rate": 0.00010290292092353391, "loss": 0.223, "step": 26512 }, { "epoch": 2.1478451069345432, "grad_norm": 0.06261970847845078, "learning_rate": 0.0001028984202709393, "loss": 0.2702, "step": 26513 }, { "epoch": 2.1479261179520415, "grad_norm": 0.05951191857457161, "learning_rate": 0.00010289391961834467, "loss": 0.2573, "step": 26514 }, { "epoch": 2.1480071289695397, "grad_norm": 0.06979864835739136, "learning_rate": 0.00010288941896575004, "loss": 0.3194, "step": 26515 }, { "epoch": 2.1480881399870384, "grad_norm": 0.05955822020769119, "learning_rate": 0.0001028849183131554, "loss": 0.2649, "step": 26516 }, { "epoch": 2.1481691510045366, "grad_norm": 0.08112508803606033, "learning_rate": 0.00010288041766056078, "loss": 0.309, "step": 26517 }, { "epoch": 2.148250162022035, "grad_norm": 0.06836628168821335, "learning_rate": 0.00010287591700796615, "loss": 0.2426, "step": 26518 }, { "epoch": 2.1483311730395336, "grad_norm": 0.07359662652015686, "learning_rate": 0.00010287141635537154, "loss": 0.2346, "step": 26519 }, { "epoch": 2.148412184057032, "grad_norm": 0.07387182116508484, "learning_rate": 0.00010286691570277691, "loss": 0.2542, "step": 26520 }, { "epoch": 2.14849319507453, "grad_norm": 0.07881849259138107, "learning_rate": 0.00010286241505018228, "loss": 0.3046, "step": 26521 }, { "epoch": 2.1485742060920283, "grad_norm": 0.07013516873121262, "learning_rate": 0.00010285791439758764, "loss": 0.2478, "step": 26522 }, { "epoch": 2.148655217109527, "grad_norm": 0.07745005935430527, "learning_rate": 0.00010285341374499302, "loss": 0.2765, "step": 26523 }, { "epoch": 2.1487362281270252, "grad_norm": 0.0691734030842781, "learning_rate": 0.00010284891309239842, "loss": 0.2633, "step": 26524 }, { "epoch": 2.1488172391445235, "grad_norm": 0.05485551059246063, "learning_rate": 0.00010284441243980378, "loss": 0.225, "step": 26525 }, { "epoch": 2.148898250162022, "grad_norm": 0.065140500664711, "learning_rate": 0.00010283991178720915, "loss": 0.2564, "step": 26526 }, { "epoch": 2.1489792611795204, "grad_norm": 0.06324749439954758, "learning_rate": 0.00010283541113461453, "loss": 0.2274, "step": 26527 }, { "epoch": 2.1490602721970187, "grad_norm": 0.05943019315600395, "learning_rate": 0.0001028309104820199, "loss": 0.247, "step": 26528 }, { "epoch": 2.1491412832145174, "grad_norm": 0.05910013988614082, "learning_rate": 0.00010282640982942526, "loss": 0.2751, "step": 26529 }, { "epoch": 2.1492222942320156, "grad_norm": 0.06628038734197617, "learning_rate": 0.00010282190917683066, "loss": 0.2657, "step": 26530 }, { "epoch": 2.149303305249514, "grad_norm": 0.0645565614104271, "learning_rate": 0.00010281740852423602, "loss": 0.2534, "step": 26531 }, { "epoch": 2.149384316267012, "grad_norm": 0.059102196246385574, "learning_rate": 0.0001028129078716414, "loss": 0.2709, "step": 26532 }, { "epoch": 2.149465327284511, "grad_norm": 0.08080757409334183, "learning_rate": 0.00010280840721904677, "loss": 0.2303, "step": 26533 }, { "epoch": 2.149546338302009, "grad_norm": 0.07256803661584854, "learning_rate": 0.00010280390656645214, "loss": 0.2381, "step": 26534 }, { "epoch": 2.1496273493195073, "grad_norm": 0.06296215951442719, "learning_rate": 0.0001027994059138575, "loss": 0.2396, "step": 26535 }, { "epoch": 2.149708360337006, "grad_norm": 0.06908629089593887, "learning_rate": 0.0001027949052612629, "loss": 0.2381, "step": 26536 }, { "epoch": 2.149789371354504, "grad_norm": 0.0535476990044117, "learning_rate": 0.00010279040460866826, "loss": 0.2161, "step": 26537 }, { "epoch": 2.1498703823720025, "grad_norm": 0.07507485896348953, "learning_rate": 0.00010278590395607364, "loss": 0.3161, "step": 26538 }, { "epoch": 2.149951393389501, "grad_norm": 0.08611593395471573, "learning_rate": 0.00010278140330347901, "loss": 0.252, "step": 26539 }, { "epoch": 2.1500324044069994, "grad_norm": 0.06031523272395134, "learning_rate": 0.00010277690265088438, "loss": 0.2271, "step": 26540 }, { "epoch": 2.1501134154244976, "grad_norm": 0.08460874110460281, "learning_rate": 0.00010277240199828974, "loss": 0.2427, "step": 26541 }, { "epoch": 2.1501944264419963, "grad_norm": 0.06741653382778168, "learning_rate": 0.00010276790134569514, "loss": 0.2594, "step": 26542 }, { "epoch": 2.1502754374594946, "grad_norm": 0.05998913198709488, "learning_rate": 0.0001027634006931005, "loss": 0.2479, "step": 26543 }, { "epoch": 2.150356448476993, "grad_norm": 0.05123647302389145, "learning_rate": 0.00010275890004050588, "loss": 0.2636, "step": 26544 }, { "epoch": 2.150437459494491, "grad_norm": 0.062051501125097275, "learning_rate": 0.00010275439938791125, "loss": 0.2531, "step": 26545 }, { "epoch": 2.1505184705119897, "grad_norm": 0.06481350213289261, "learning_rate": 0.00010274989873531662, "loss": 0.2322, "step": 26546 }, { "epoch": 2.150599481529488, "grad_norm": 0.06562846153974533, "learning_rate": 0.00010274539808272198, "loss": 0.2733, "step": 26547 }, { "epoch": 2.1506804925469862, "grad_norm": 0.0631428137421608, "learning_rate": 0.00010274089743012739, "loss": 0.2787, "step": 26548 }, { "epoch": 2.150761503564485, "grad_norm": 0.07828080654144287, "learning_rate": 0.00010273639677753275, "loss": 0.2352, "step": 26549 }, { "epoch": 2.150842514581983, "grad_norm": 0.06805264949798584, "learning_rate": 0.00010273189612493812, "loss": 0.2428, "step": 26550 }, { "epoch": 2.1509235255994814, "grad_norm": 0.06137997657060623, "learning_rate": 0.00010272739547234349, "loss": 0.255, "step": 26551 }, { "epoch": 2.15100453661698, "grad_norm": 0.07119157165288925, "learning_rate": 0.00010272289481974887, "loss": 0.3016, "step": 26552 }, { "epoch": 2.1510855476344783, "grad_norm": 0.06857890635728836, "learning_rate": 0.00010271839416715425, "loss": 0.2899, "step": 26553 }, { "epoch": 2.1511665586519766, "grad_norm": 0.048014085739851, "learning_rate": 0.00010271389351455963, "loss": 0.2641, "step": 26554 }, { "epoch": 2.151247569669475, "grad_norm": 0.07205279171466827, "learning_rate": 0.00010270939286196499, "loss": 0.2434, "step": 26555 }, { "epoch": 2.1513285806869735, "grad_norm": 0.07112793624401093, "learning_rate": 0.00010270489220937036, "loss": 0.3006, "step": 26556 }, { "epoch": 2.1514095917044718, "grad_norm": 0.06290469318628311, "learning_rate": 0.00010270039155677573, "loss": 0.2626, "step": 26557 }, { "epoch": 2.15149060272197, "grad_norm": 0.055432796478271484, "learning_rate": 0.00010269589090418111, "loss": 0.255, "step": 26558 }, { "epoch": 2.1515716137394687, "grad_norm": 0.05841575935482979, "learning_rate": 0.0001026913902515865, "loss": 0.285, "step": 26559 }, { "epoch": 2.151652624756967, "grad_norm": 0.06193108111619949, "learning_rate": 0.00010268688959899187, "loss": 0.2864, "step": 26560 }, { "epoch": 2.151733635774465, "grad_norm": 0.0693509429693222, "learning_rate": 0.00010268238894639723, "loss": 0.2604, "step": 26561 }, { "epoch": 2.151814646791964, "grad_norm": 0.061149440705776215, "learning_rate": 0.0001026778882938026, "loss": 0.2691, "step": 26562 }, { "epoch": 2.151895657809462, "grad_norm": 0.06805266439914703, "learning_rate": 0.00010267338764120798, "loss": 0.3228, "step": 26563 }, { "epoch": 2.1519766688269604, "grad_norm": 0.05401428043842316, "learning_rate": 0.00010266888698861335, "loss": 0.2867, "step": 26564 }, { "epoch": 2.152057679844459, "grad_norm": 0.06017407029867172, "learning_rate": 0.00010266438633601874, "loss": 0.2303, "step": 26565 }, { "epoch": 2.1521386908619573, "grad_norm": 0.06361527740955353, "learning_rate": 0.00010265988568342411, "loss": 0.2976, "step": 26566 }, { "epoch": 2.1522197018794555, "grad_norm": 0.06338279694318771, "learning_rate": 0.00010265538503082947, "loss": 0.2583, "step": 26567 }, { "epoch": 2.152300712896954, "grad_norm": 0.0681900903582573, "learning_rate": 0.00010265088437823484, "loss": 0.2604, "step": 26568 }, { "epoch": 2.1523817239144525, "grad_norm": 0.06779904663562775, "learning_rate": 0.00010264638372564022, "loss": 0.2554, "step": 26569 }, { "epoch": 2.1524627349319507, "grad_norm": 0.0530104897916317, "learning_rate": 0.00010264188307304559, "loss": 0.2325, "step": 26570 }, { "epoch": 2.152543745949449, "grad_norm": 0.07330182939767838, "learning_rate": 0.00010263738242045098, "loss": 0.2599, "step": 26571 }, { "epoch": 2.1526247569669477, "grad_norm": 0.06863976269960403, "learning_rate": 0.00010263288176785635, "loss": 0.2425, "step": 26572 }, { "epoch": 2.152705767984446, "grad_norm": 0.0714733898639679, "learning_rate": 0.00010262838111526171, "loss": 0.2421, "step": 26573 }, { "epoch": 2.152786779001944, "grad_norm": 0.06109316647052765, "learning_rate": 0.00010262388046266709, "loss": 0.2359, "step": 26574 }, { "epoch": 2.152867790019443, "grad_norm": 0.06053869053721428, "learning_rate": 0.00010261937981007246, "loss": 0.2656, "step": 26575 }, { "epoch": 2.152948801036941, "grad_norm": 0.06158290058374405, "learning_rate": 0.00010261487915747785, "loss": 0.2442, "step": 26576 }, { "epoch": 2.1530298120544393, "grad_norm": 0.05543677508831024, "learning_rate": 0.00010261037850488322, "loss": 0.215, "step": 26577 }, { "epoch": 2.1531108230719376, "grad_norm": 0.05953080579638481, "learning_rate": 0.0001026058778522886, "loss": 0.2935, "step": 26578 }, { "epoch": 2.1531918340894363, "grad_norm": 0.06894619017839432, "learning_rate": 0.00010260137719969395, "loss": 0.2454, "step": 26579 }, { "epoch": 2.1532728451069345, "grad_norm": 0.06742026656866074, "learning_rate": 0.00010259687654709933, "loss": 0.231, "step": 26580 }, { "epoch": 2.1533538561244328, "grad_norm": 0.056998420506715775, "learning_rate": 0.0001025923758945047, "loss": 0.2384, "step": 26581 }, { "epoch": 2.1534348671419314, "grad_norm": 0.06900626420974731, "learning_rate": 0.00010258787524191009, "loss": 0.2387, "step": 26582 }, { "epoch": 2.1535158781594297, "grad_norm": 0.05155860260128975, "learning_rate": 0.00010258337458931546, "loss": 0.2394, "step": 26583 }, { "epoch": 2.153596889176928, "grad_norm": 0.07202799618244171, "learning_rate": 0.00010257887393672084, "loss": 0.2856, "step": 26584 }, { "epoch": 2.1536779001944266, "grad_norm": 0.05389600247144699, "learning_rate": 0.0001025743732841262, "loss": 0.2319, "step": 26585 }, { "epoch": 2.153758911211925, "grad_norm": 0.06073429808020592, "learning_rate": 0.00010256987263153157, "loss": 0.2352, "step": 26586 }, { "epoch": 2.153839922229423, "grad_norm": 0.07475744187831879, "learning_rate": 0.00010256537197893694, "loss": 0.263, "step": 26587 }, { "epoch": 2.153920933246922, "grad_norm": 0.056003883481025696, "learning_rate": 0.00010256087132634233, "loss": 0.2163, "step": 26588 }, { "epoch": 2.15400194426442, "grad_norm": 0.07948049902915955, "learning_rate": 0.0001025563706737477, "loss": 0.2713, "step": 26589 }, { "epoch": 2.1540829552819183, "grad_norm": 0.05557125434279442, "learning_rate": 0.00010255187002115308, "loss": 0.2532, "step": 26590 }, { "epoch": 2.1541639662994165, "grad_norm": 0.07005944103002548, "learning_rate": 0.00010254736936855844, "loss": 0.2673, "step": 26591 }, { "epoch": 2.154244977316915, "grad_norm": 0.06999240815639496, "learning_rate": 0.00010254286871596381, "loss": 0.2479, "step": 26592 }, { "epoch": 2.1543259883344135, "grad_norm": 0.061392951756715775, "learning_rate": 0.00010253836806336918, "loss": 0.2824, "step": 26593 }, { "epoch": 2.1544069993519117, "grad_norm": 0.05862107500433922, "learning_rate": 0.00010253386741077457, "loss": 0.254, "step": 26594 }, { "epoch": 2.1544880103694104, "grad_norm": 0.055120065808296204, "learning_rate": 0.00010252936675817994, "loss": 0.2826, "step": 26595 }, { "epoch": 2.1545690213869086, "grad_norm": 0.05726516246795654, "learning_rate": 0.00010252486610558532, "loss": 0.258, "step": 26596 }, { "epoch": 2.154650032404407, "grad_norm": 0.06028033047914505, "learning_rate": 0.00010252036545299069, "loss": 0.2468, "step": 26597 }, { "epoch": 2.1547310434219056, "grad_norm": 0.06699507683515549, "learning_rate": 0.00010251586480039605, "loss": 0.2881, "step": 26598 }, { "epoch": 2.154812054439404, "grad_norm": 0.056645482778549194, "learning_rate": 0.00010251136414780143, "loss": 0.245, "step": 26599 }, { "epoch": 2.154893065456902, "grad_norm": 0.06294909864664078, "learning_rate": 0.00010250686349520681, "loss": 0.2676, "step": 26600 }, { "epoch": 2.1549740764744003, "grad_norm": 0.05808530002832413, "learning_rate": 0.00010250236284261219, "loss": 0.2408, "step": 26601 }, { "epoch": 2.155055087491899, "grad_norm": 0.056637249886989594, "learning_rate": 0.00010249786219001756, "loss": 0.2518, "step": 26602 }, { "epoch": 2.1551360985093972, "grad_norm": 0.056968703866004944, "learning_rate": 0.00010249336153742293, "loss": 0.2357, "step": 26603 }, { "epoch": 2.1552171095268955, "grad_norm": 0.06403225660324097, "learning_rate": 0.0001024888608848283, "loss": 0.2996, "step": 26604 }, { "epoch": 2.155298120544394, "grad_norm": 0.05986631661653519, "learning_rate": 0.0001024843602322337, "loss": 0.247, "step": 26605 }, { "epoch": 2.1553791315618924, "grad_norm": 0.06378137320280075, "learning_rate": 0.00010247985957963905, "loss": 0.2541, "step": 26606 }, { "epoch": 2.1554601425793907, "grad_norm": 0.053147122263908386, "learning_rate": 0.00010247535892704443, "loss": 0.2254, "step": 26607 }, { "epoch": 2.1555411535968894, "grad_norm": 0.06405290216207504, "learning_rate": 0.0001024708582744498, "loss": 0.2534, "step": 26608 }, { "epoch": 2.1556221646143876, "grad_norm": 0.06678581237792969, "learning_rate": 0.00010246635762185518, "loss": 0.2812, "step": 26609 }, { "epoch": 2.155703175631886, "grad_norm": 0.08605819195508957, "learning_rate": 0.00010246185696926054, "loss": 0.2717, "step": 26610 }, { "epoch": 2.1557841866493845, "grad_norm": 0.07971778512001038, "learning_rate": 0.00010245735631666594, "loss": 0.2643, "step": 26611 }, { "epoch": 2.155865197666883, "grad_norm": 0.06603200733661652, "learning_rate": 0.0001024528556640713, "loss": 0.2387, "step": 26612 }, { "epoch": 2.155946208684381, "grad_norm": 0.060673121362924576, "learning_rate": 0.00010244835501147667, "loss": 0.2346, "step": 26613 }, { "epoch": 2.1560272197018793, "grad_norm": 0.060722626745700836, "learning_rate": 0.00010244385435888204, "loss": 0.2713, "step": 26614 }, { "epoch": 2.156108230719378, "grad_norm": 0.06248776987195015, "learning_rate": 0.00010243935370628742, "loss": 0.2609, "step": 26615 }, { "epoch": 2.156189241736876, "grad_norm": 0.05779466778039932, "learning_rate": 0.00010243485305369278, "loss": 0.2544, "step": 26616 }, { "epoch": 2.1562702527543745, "grad_norm": 0.07016555219888687, "learning_rate": 0.00010243035240109818, "loss": 0.2636, "step": 26617 }, { "epoch": 2.156351263771873, "grad_norm": 0.08269957453012466, "learning_rate": 0.00010242585174850354, "loss": 0.2469, "step": 26618 }, { "epoch": 2.1564322747893714, "grad_norm": 0.05582574009895325, "learning_rate": 0.00010242135109590891, "loss": 0.2708, "step": 26619 }, { "epoch": 2.1565132858068696, "grad_norm": 0.0636686459183693, "learning_rate": 0.00010241685044331428, "loss": 0.2627, "step": 26620 }, { "epoch": 2.1565942968243683, "grad_norm": 0.06832555681467056, "learning_rate": 0.00010241234979071966, "loss": 0.2596, "step": 26621 }, { "epoch": 2.1566753078418666, "grad_norm": 0.06222445145249367, "learning_rate": 0.00010240784913812502, "loss": 0.2496, "step": 26622 }, { "epoch": 2.156756318859365, "grad_norm": 0.07057000696659088, "learning_rate": 0.00010240334848553042, "loss": 0.2579, "step": 26623 }, { "epoch": 2.156837329876863, "grad_norm": 0.057806190103292465, "learning_rate": 0.00010239884783293578, "loss": 0.2485, "step": 26624 }, { "epoch": 2.1569183408943617, "grad_norm": 0.05468752607703209, "learning_rate": 0.00010239434718034115, "loss": 0.2532, "step": 26625 }, { "epoch": 2.15699935191186, "grad_norm": 0.05896284431219101, "learning_rate": 0.00010238984652774653, "loss": 0.2606, "step": 26626 }, { "epoch": 2.1570803629293582, "grad_norm": 0.062181517481803894, "learning_rate": 0.0001023853458751519, "loss": 0.2601, "step": 26627 }, { "epoch": 2.157161373946857, "grad_norm": 0.0685560554265976, "learning_rate": 0.00010238084522255729, "loss": 0.274, "step": 26628 }, { "epoch": 2.157242384964355, "grad_norm": 0.0639854297041893, "learning_rate": 0.00010237634456996266, "loss": 0.229, "step": 26629 }, { "epoch": 2.1573233959818534, "grad_norm": 0.06651315093040466, "learning_rate": 0.00010237184391736802, "loss": 0.2389, "step": 26630 }, { "epoch": 2.157404406999352, "grad_norm": 0.06652846187353134, "learning_rate": 0.0001023673432647734, "loss": 0.2525, "step": 26631 }, { "epoch": 2.1574854180168503, "grad_norm": 0.06786283105611801, "learning_rate": 0.00010236284261217877, "loss": 0.2838, "step": 26632 }, { "epoch": 2.1575664290343486, "grad_norm": 0.06288962066173553, "learning_rate": 0.00010235834195958414, "loss": 0.2379, "step": 26633 }, { "epoch": 2.1576474400518473, "grad_norm": 0.07239990681409836, "learning_rate": 0.00010235384130698953, "loss": 0.2779, "step": 26634 }, { "epoch": 2.1577284510693455, "grad_norm": 0.07054921239614487, "learning_rate": 0.0001023493406543949, "loss": 0.2498, "step": 26635 }, { "epoch": 2.1578094620868438, "grad_norm": 0.07556222379207611, "learning_rate": 0.00010234484000180026, "loss": 0.2712, "step": 26636 }, { "epoch": 2.157890473104342, "grad_norm": 0.055823683738708496, "learning_rate": 0.00010234033934920564, "loss": 0.2631, "step": 26637 }, { "epoch": 2.1579714841218407, "grad_norm": 0.061262547969818115, "learning_rate": 0.00010233583869661101, "loss": 0.27, "step": 26638 }, { "epoch": 2.158052495139339, "grad_norm": 0.07095042616128922, "learning_rate": 0.00010233133804401638, "loss": 0.2901, "step": 26639 }, { "epoch": 2.158133506156837, "grad_norm": 0.07894306629896164, "learning_rate": 0.00010232683739142177, "loss": 0.256, "step": 26640 }, { "epoch": 2.158214517174336, "grad_norm": 0.05407718941569328, "learning_rate": 0.00010232233673882714, "loss": 0.2349, "step": 26641 }, { "epoch": 2.158295528191834, "grad_norm": 0.07037380337715149, "learning_rate": 0.0001023178360862325, "loss": 0.2731, "step": 26642 }, { "epoch": 2.1583765392093324, "grad_norm": 0.04452992230653763, "learning_rate": 0.00010231333543363788, "loss": 0.2351, "step": 26643 }, { "epoch": 2.158457550226831, "grad_norm": 0.06478511542081833, "learning_rate": 0.00010230883478104325, "loss": 0.2517, "step": 26644 }, { "epoch": 2.1585385612443293, "grad_norm": 0.061454348266124725, "learning_rate": 0.00010230433412844863, "loss": 0.2674, "step": 26645 }, { "epoch": 2.1586195722618275, "grad_norm": 0.056620292365550995, "learning_rate": 0.00010229983347585401, "loss": 0.239, "step": 26646 }, { "epoch": 2.158700583279326, "grad_norm": 0.05957835167646408, "learning_rate": 0.00010229533282325939, "loss": 0.2663, "step": 26647 }, { "epoch": 2.1587815942968245, "grad_norm": 0.06666881591081619, "learning_rate": 0.00010229083217066475, "loss": 0.2544, "step": 26648 }, { "epoch": 2.1588626053143227, "grad_norm": 0.06860926747322083, "learning_rate": 0.00010228633151807012, "loss": 0.2814, "step": 26649 }, { "epoch": 2.158943616331821, "grad_norm": 0.05659700185060501, "learning_rate": 0.00010228183086547549, "loss": 0.2478, "step": 26650 }, { "epoch": 2.1590246273493197, "grad_norm": 0.04990334063768387, "learning_rate": 0.00010227733021288087, "loss": 0.2532, "step": 26651 }, { "epoch": 2.159105638366818, "grad_norm": 0.07146997004747391, "learning_rate": 0.00010227282956028625, "loss": 0.2496, "step": 26652 }, { "epoch": 2.159186649384316, "grad_norm": 0.06514699757099152, "learning_rate": 0.00010226832890769163, "loss": 0.255, "step": 26653 }, { "epoch": 2.159267660401815, "grad_norm": 0.06386931985616684, "learning_rate": 0.00010226382825509699, "loss": 0.2656, "step": 26654 }, { "epoch": 2.159348671419313, "grad_norm": 0.05146828293800354, "learning_rate": 0.00010225932760250236, "loss": 0.2349, "step": 26655 }, { "epoch": 2.1594296824368113, "grad_norm": 0.059927668422460556, "learning_rate": 0.00010225482694990773, "loss": 0.2559, "step": 26656 }, { "epoch": 2.15951069345431, "grad_norm": 0.05467910319566727, "learning_rate": 0.00010225032629731312, "loss": 0.2622, "step": 26657 }, { "epoch": 2.1595917044718083, "grad_norm": 0.062487825751304626, "learning_rate": 0.0001022458256447185, "loss": 0.2629, "step": 26658 }, { "epoch": 2.1596727154893065, "grad_norm": 0.0732332095503807, "learning_rate": 0.00010224132499212387, "loss": 0.2707, "step": 26659 }, { "epoch": 2.1597537265068047, "grad_norm": 0.06669419258832932, "learning_rate": 0.00010223682433952923, "loss": 0.2927, "step": 26660 }, { "epoch": 2.1598347375243034, "grad_norm": 0.06436692923307419, "learning_rate": 0.0001022323236869346, "loss": 0.2877, "step": 26661 }, { "epoch": 2.1599157485418017, "grad_norm": 0.05647365748882294, "learning_rate": 0.00010222782303433998, "loss": 0.2812, "step": 26662 }, { "epoch": 2.1599967595593, "grad_norm": 0.07081833481788635, "learning_rate": 0.00010222332238174536, "loss": 0.2701, "step": 26663 }, { "epoch": 2.1600777705767986, "grad_norm": 0.0558331124484539, "learning_rate": 0.00010221882172915074, "loss": 0.2585, "step": 26664 }, { "epoch": 2.160158781594297, "grad_norm": 0.07021911442279816, "learning_rate": 0.00010221432107655611, "loss": 0.2821, "step": 26665 }, { "epoch": 2.160239792611795, "grad_norm": 0.062387652695178986, "learning_rate": 0.00010220982042396148, "loss": 0.2167, "step": 26666 }, { "epoch": 2.1603208036292934, "grad_norm": 0.0702107697725296, "learning_rate": 0.00010220531977136684, "loss": 0.239, "step": 26667 }, { "epoch": 2.160401814646792, "grad_norm": 0.0710628479719162, "learning_rate": 0.00010220081911877222, "loss": 0.2947, "step": 26668 }, { "epoch": 2.1604828256642903, "grad_norm": 0.05518188700079918, "learning_rate": 0.0001021963184661776, "loss": 0.2594, "step": 26669 }, { "epoch": 2.1605638366817885, "grad_norm": 0.07188651710748672, "learning_rate": 0.00010219181781358298, "loss": 0.2536, "step": 26670 }, { "epoch": 2.160644847699287, "grad_norm": 0.06636640429496765, "learning_rate": 0.00010218731716098835, "loss": 0.2418, "step": 26671 }, { "epoch": 2.1607258587167855, "grad_norm": 0.07105717062950134, "learning_rate": 0.00010218281650839373, "loss": 0.2735, "step": 26672 }, { "epoch": 2.1608068697342837, "grad_norm": 0.061248186975717545, "learning_rate": 0.00010217831585579909, "loss": 0.267, "step": 26673 }, { "epoch": 2.1608878807517824, "grad_norm": 0.06057823449373245, "learning_rate": 0.00010217381520320446, "loss": 0.2574, "step": 26674 }, { "epoch": 2.1609688917692806, "grad_norm": 0.056544676423072815, "learning_rate": 0.00010216931455060985, "loss": 0.2459, "step": 26675 }, { "epoch": 2.161049902786779, "grad_norm": 0.06037278473377228, "learning_rate": 0.00010216481389801522, "loss": 0.259, "step": 26676 }, { "epoch": 2.1611309138042776, "grad_norm": 0.07936231791973114, "learning_rate": 0.0001021603132454206, "loss": 0.2663, "step": 26677 }, { "epoch": 2.161211924821776, "grad_norm": 0.060614656656980515, "learning_rate": 0.00010215581259282597, "loss": 0.2603, "step": 26678 }, { "epoch": 2.161292935839274, "grad_norm": 0.0534573532640934, "learning_rate": 0.00010215131194023133, "loss": 0.2638, "step": 26679 }, { "epoch": 2.1613739468567728, "grad_norm": 0.060797303915023804, "learning_rate": 0.0001021468112876367, "loss": 0.2402, "step": 26680 }, { "epoch": 2.161454957874271, "grad_norm": 0.06404197216033936, "learning_rate": 0.00010214231063504209, "loss": 0.2485, "step": 26681 }, { "epoch": 2.1615359688917692, "grad_norm": 0.05765843018889427, "learning_rate": 0.00010213780998244746, "loss": 0.2736, "step": 26682 }, { "epoch": 2.1616169799092675, "grad_norm": 0.05763285979628563, "learning_rate": 0.00010213330932985284, "loss": 0.2434, "step": 26683 }, { "epoch": 2.161697990926766, "grad_norm": 0.05568533390760422, "learning_rate": 0.00010212880867725821, "loss": 0.2232, "step": 26684 }, { "epoch": 2.1617790019442644, "grad_norm": 0.06619726121425629, "learning_rate": 0.00010212430802466357, "loss": 0.2955, "step": 26685 }, { "epoch": 2.1618600129617627, "grad_norm": 0.07268057763576508, "learning_rate": 0.00010211980737206897, "loss": 0.2706, "step": 26686 }, { "epoch": 2.1619410239792614, "grad_norm": 0.06154467910528183, "learning_rate": 0.00010211530671947433, "loss": 0.24, "step": 26687 }, { "epoch": 2.1620220349967596, "grad_norm": 0.0544455386698246, "learning_rate": 0.0001021108060668797, "loss": 0.2601, "step": 26688 }, { "epoch": 2.162103046014258, "grad_norm": 0.06915867328643799, "learning_rate": 0.00010210630541428508, "loss": 0.2544, "step": 26689 }, { "epoch": 2.162184057031756, "grad_norm": 0.0737709030508995, "learning_rate": 0.00010210180476169045, "loss": 0.2471, "step": 26690 }, { "epoch": 2.162265068049255, "grad_norm": 0.05942685529589653, "learning_rate": 0.00010209730410909581, "loss": 0.2324, "step": 26691 }, { "epoch": 2.162346079066753, "grad_norm": 0.05648018419742584, "learning_rate": 0.00010209280345650121, "loss": 0.2247, "step": 26692 }, { "epoch": 2.1624270900842513, "grad_norm": 0.0682801902294159, "learning_rate": 0.00010208830280390657, "loss": 0.2884, "step": 26693 }, { "epoch": 2.16250810110175, "grad_norm": 0.07194321602582932, "learning_rate": 0.00010208380215131195, "loss": 0.2652, "step": 26694 }, { "epoch": 2.162589112119248, "grad_norm": 0.08287494629621506, "learning_rate": 0.00010207930149871732, "loss": 0.2525, "step": 26695 }, { "epoch": 2.1626701231367464, "grad_norm": 0.07747865468263626, "learning_rate": 0.00010207480084612269, "loss": 0.271, "step": 26696 }, { "epoch": 2.162751134154245, "grad_norm": 0.07304595410823822, "learning_rate": 0.00010207030019352805, "loss": 0.3076, "step": 26697 }, { "epoch": 2.1628321451717434, "grad_norm": 0.07419362664222717, "learning_rate": 0.00010206579954093345, "loss": 0.2364, "step": 26698 }, { "epoch": 2.1629131561892416, "grad_norm": 0.07199463248252869, "learning_rate": 0.00010206129888833881, "loss": 0.2931, "step": 26699 }, { "epoch": 2.1629941672067403, "grad_norm": 0.05875088647007942, "learning_rate": 0.00010205679823574419, "loss": 0.2558, "step": 26700 }, { "epoch": 2.1630751782242386, "grad_norm": 0.057817574590444565, "learning_rate": 0.00010205229758314956, "loss": 0.2728, "step": 26701 }, { "epoch": 2.163156189241737, "grad_norm": 0.060741499066352844, "learning_rate": 0.00010204779693055493, "loss": 0.2873, "step": 26702 }, { "epoch": 2.163237200259235, "grad_norm": 0.0565064400434494, "learning_rate": 0.0001020432962779603, "loss": 0.2225, "step": 26703 }, { "epoch": 2.1633182112767337, "grad_norm": 0.06156710162758827, "learning_rate": 0.0001020387956253657, "loss": 0.2445, "step": 26704 }, { "epoch": 2.163399222294232, "grad_norm": 0.05642209202051163, "learning_rate": 0.00010203429497277105, "loss": 0.2315, "step": 26705 }, { "epoch": 2.1634802333117302, "grad_norm": 0.06932426989078522, "learning_rate": 0.00010202979432017643, "loss": 0.2718, "step": 26706 }, { "epoch": 2.163561244329229, "grad_norm": 0.061338771134614944, "learning_rate": 0.0001020252936675818, "loss": 0.2471, "step": 26707 }, { "epoch": 2.163642255346727, "grad_norm": 0.053529005497694016, "learning_rate": 0.00010202079301498718, "loss": 0.2258, "step": 26708 }, { "epoch": 2.1637232663642254, "grad_norm": 0.062487855553627014, "learning_rate": 0.00010201629236239256, "loss": 0.2294, "step": 26709 }, { "epoch": 2.163804277381724, "grad_norm": 0.0688663199543953, "learning_rate": 0.00010201179170979794, "loss": 0.2623, "step": 26710 }, { "epoch": 2.1638852883992223, "grad_norm": 0.05936722457408905, "learning_rate": 0.0001020072910572033, "loss": 0.2663, "step": 26711 }, { "epoch": 2.1639662994167206, "grad_norm": 0.056306082755327225, "learning_rate": 0.00010200279040460867, "loss": 0.2768, "step": 26712 }, { "epoch": 2.164047310434219, "grad_norm": 0.07249502837657928, "learning_rate": 0.00010199828975201404, "loss": 0.2538, "step": 26713 }, { "epoch": 2.1641283214517175, "grad_norm": 0.06649959087371826, "learning_rate": 0.00010199378909941942, "loss": 0.2547, "step": 26714 }, { "epoch": 2.1642093324692158, "grad_norm": 0.06089214235544205, "learning_rate": 0.0001019892884468248, "loss": 0.2736, "step": 26715 }, { "epoch": 2.164290343486714, "grad_norm": 0.06362010538578033, "learning_rate": 0.00010198478779423018, "loss": 0.2643, "step": 26716 }, { "epoch": 2.1643713545042127, "grad_norm": 0.061047982424497604, "learning_rate": 0.00010198028714163554, "loss": 0.2762, "step": 26717 }, { "epoch": 2.164452365521711, "grad_norm": 0.06476712971925735, "learning_rate": 0.00010197578648904091, "loss": 0.2731, "step": 26718 }, { "epoch": 2.164533376539209, "grad_norm": 0.051583848893642426, "learning_rate": 0.00010197128583644629, "loss": 0.2737, "step": 26719 }, { "epoch": 2.164614387556708, "grad_norm": 0.062449660152196884, "learning_rate": 0.00010196678518385166, "loss": 0.2785, "step": 26720 }, { "epoch": 2.164695398574206, "grad_norm": 0.06525503098964691, "learning_rate": 0.00010196228453125705, "loss": 0.2442, "step": 26721 }, { "epoch": 2.1647764095917044, "grad_norm": 0.0679815486073494, "learning_rate": 0.00010195778387866242, "loss": 0.2464, "step": 26722 }, { "epoch": 2.164857420609203, "grad_norm": 0.06120334565639496, "learning_rate": 0.00010195328322606778, "loss": 0.2351, "step": 26723 }, { "epoch": 2.1649384316267013, "grad_norm": 0.057069793343544006, "learning_rate": 0.00010194878257347315, "loss": 0.2769, "step": 26724 }, { "epoch": 2.1650194426441995, "grad_norm": 0.05186355486512184, "learning_rate": 0.00010194428192087853, "loss": 0.255, "step": 26725 }, { "epoch": 2.165100453661698, "grad_norm": 0.05202582851052284, "learning_rate": 0.0001019397812682839, "loss": 0.2577, "step": 26726 }, { "epoch": 2.1651814646791965, "grad_norm": 0.05064481124281883, "learning_rate": 0.00010193528061568929, "loss": 0.2548, "step": 26727 }, { "epoch": 2.1652624756966947, "grad_norm": 0.06526156514883041, "learning_rate": 0.00010193077996309466, "loss": 0.276, "step": 26728 }, { "epoch": 2.165343486714193, "grad_norm": 0.049982719123363495, "learning_rate": 0.00010192627931050002, "loss": 0.2681, "step": 26729 }, { "epoch": 2.1654244977316917, "grad_norm": 0.050517067313194275, "learning_rate": 0.0001019217786579054, "loss": 0.2587, "step": 26730 }, { "epoch": 2.16550550874919, "grad_norm": 0.0650363489985466, "learning_rate": 0.00010191727800531077, "loss": 0.2565, "step": 26731 }, { "epoch": 2.165586519766688, "grad_norm": 0.060763679444789886, "learning_rate": 0.00010191277735271614, "loss": 0.2328, "step": 26732 }, { "epoch": 2.165667530784187, "grad_norm": 0.0616152361035347, "learning_rate": 0.00010190827670012153, "loss": 0.2042, "step": 26733 }, { "epoch": 2.165748541801685, "grad_norm": 0.07217466086149216, "learning_rate": 0.0001019037760475269, "loss": 0.2598, "step": 26734 }, { "epoch": 2.1658295528191833, "grad_norm": 0.07178352773189545, "learning_rate": 0.00010189927539493228, "loss": 0.2348, "step": 26735 }, { "epoch": 2.1659105638366816, "grad_norm": 0.07062013447284698, "learning_rate": 0.00010189477474233764, "loss": 0.2333, "step": 26736 }, { "epoch": 2.1659915748541803, "grad_norm": 0.07211899012327194, "learning_rate": 0.00010189027408974301, "loss": 0.2592, "step": 26737 }, { "epoch": 2.1660725858716785, "grad_norm": 0.07860163599252701, "learning_rate": 0.0001018857734371484, "loss": 0.2504, "step": 26738 }, { "epoch": 2.1661535968891767, "grad_norm": 0.0584687739610672, "learning_rate": 0.00010188127278455377, "loss": 0.2546, "step": 26739 }, { "epoch": 2.1662346079066754, "grad_norm": 0.058649592101573944, "learning_rate": 0.00010187677213195914, "loss": 0.2068, "step": 26740 }, { "epoch": 2.1663156189241737, "grad_norm": 0.07066672295331955, "learning_rate": 0.00010187227147936452, "loss": 0.2642, "step": 26741 }, { "epoch": 2.166396629941672, "grad_norm": 0.06761866807937622, "learning_rate": 0.00010186777082676988, "loss": 0.2859, "step": 26742 }, { "epoch": 2.1664776409591706, "grad_norm": 0.06311914324760437, "learning_rate": 0.00010186327017417525, "loss": 0.2429, "step": 26743 }, { "epoch": 2.166558651976669, "grad_norm": 0.04951076582074165, "learning_rate": 0.00010185876952158064, "loss": 0.2263, "step": 26744 }, { "epoch": 2.166639662994167, "grad_norm": 0.0637192651629448, "learning_rate": 0.00010185426886898601, "loss": 0.2625, "step": 26745 }, { "epoch": 2.166720674011666, "grad_norm": 0.06900296360254288, "learning_rate": 0.00010184976821639139, "loss": 0.2377, "step": 26746 }, { "epoch": 2.166801685029164, "grad_norm": 0.07371290773153305, "learning_rate": 0.00010184526756379676, "loss": 0.246, "step": 26747 }, { "epoch": 2.1668826960466623, "grad_norm": 0.062216177582740784, "learning_rate": 0.00010184076691120212, "loss": 0.2369, "step": 26748 }, { "epoch": 2.1669637070641605, "grad_norm": 0.06387155503034592, "learning_rate": 0.0001018362662586075, "loss": 0.2689, "step": 26749 }, { "epoch": 2.167044718081659, "grad_norm": 0.058942124247550964, "learning_rate": 0.00010183176560601288, "loss": 0.2371, "step": 26750 }, { "epoch": 2.1671257290991575, "grad_norm": 0.06074969097971916, "learning_rate": 0.00010182726495341825, "loss": 0.23, "step": 26751 }, { "epoch": 2.1672067401166557, "grad_norm": 0.07480621337890625, "learning_rate": 0.00010182276430082363, "loss": 0.287, "step": 26752 }, { "epoch": 2.1672877511341544, "grad_norm": 0.08909303694963455, "learning_rate": 0.000101818263648229, "loss": 0.2427, "step": 26753 }, { "epoch": 2.1673687621516526, "grad_norm": 0.05299947410821915, "learning_rate": 0.00010181376299563436, "loss": 0.2399, "step": 26754 }, { "epoch": 2.167449773169151, "grad_norm": 0.06072693318128586, "learning_rate": 0.00010180926234303973, "loss": 0.2634, "step": 26755 }, { "epoch": 2.1675307841866496, "grad_norm": 0.0829189270734787, "learning_rate": 0.00010180476169044512, "loss": 0.3127, "step": 26756 }, { "epoch": 2.167611795204148, "grad_norm": 0.06256073713302612, "learning_rate": 0.0001018002610378505, "loss": 0.2423, "step": 26757 }, { "epoch": 2.167692806221646, "grad_norm": 0.06656386703252792, "learning_rate": 0.00010179576038525587, "loss": 0.2767, "step": 26758 }, { "epoch": 2.1677738172391443, "grad_norm": 0.0756622776389122, "learning_rate": 0.00010179125973266124, "loss": 0.2534, "step": 26759 }, { "epoch": 2.167854828256643, "grad_norm": 0.06560155004262924, "learning_rate": 0.0001017867590800666, "loss": 0.2627, "step": 26760 }, { "epoch": 2.1679358392741412, "grad_norm": 0.07325062900781631, "learning_rate": 0.000101782258427472, "loss": 0.2672, "step": 26761 }, { "epoch": 2.1680168502916395, "grad_norm": 0.08070877194404602, "learning_rate": 0.00010177775777487736, "loss": 0.2794, "step": 26762 }, { "epoch": 2.168097861309138, "grad_norm": 0.06608166545629501, "learning_rate": 0.00010177325712228274, "loss": 0.2571, "step": 26763 }, { "epoch": 2.1681788723266364, "grad_norm": 0.05721350386738777, "learning_rate": 0.00010176875646968811, "loss": 0.2518, "step": 26764 }, { "epoch": 2.1682598833441347, "grad_norm": 0.08121379464864731, "learning_rate": 0.00010176425581709348, "loss": 0.3085, "step": 26765 }, { "epoch": 2.1683408943616334, "grad_norm": 0.06703774631023407, "learning_rate": 0.00010175975516449884, "loss": 0.2756, "step": 26766 }, { "epoch": 2.1684219053791316, "grad_norm": 0.07273641973733902, "learning_rate": 0.00010175525451190425, "loss": 0.2832, "step": 26767 }, { "epoch": 2.16850291639663, "grad_norm": 0.05412622541189194, "learning_rate": 0.0001017507538593096, "loss": 0.2947, "step": 26768 }, { "epoch": 2.1685839274141285, "grad_norm": 0.06769771873950958, "learning_rate": 0.00010174625320671498, "loss": 0.247, "step": 26769 }, { "epoch": 2.1686649384316268, "grad_norm": 0.06173847243189812, "learning_rate": 0.00010174175255412035, "loss": 0.2371, "step": 26770 }, { "epoch": 2.168745949449125, "grad_norm": 0.048327669501304626, "learning_rate": 0.00010173725190152573, "loss": 0.1902, "step": 26771 }, { "epoch": 2.1688269604666233, "grad_norm": 0.071600541472435, "learning_rate": 0.00010173275124893109, "loss": 0.2731, "step": 26772 }, { "epoch": 2.168907971484122, "grad_norm": 0.05988006293773651, "learning_rate": 0.00010172825059633649, "loss": 0.2513, "step": 26773 }, { "epoch": 2.16898898250162, "grad_norm": 0.07233826816082001, "learning_rate": 0.00010172374994374185, "loss": 0.2387, "step": 26774 }, { "epoch": 2.1690699935191184, "grad_norm": 0.05992598831653595, "learning_rate": 0.00010171924929114722, "loss": 0.2615, "step": 26775 }, { "epoch": 2.169151004536617, "grad_norm": 0.0667218416929245, "learning_rate": 0.0001017147486385526, "loss": 0.2468, "step": 26776 }, { "epoch": 2.1692320155541154, "grad_norm": 0.06006920710206032, "learning_rate": 0.00010171024798595797, "loss": 0.2693, "step": 26777 }, { "epoch": 2.1693130265716136, "grad_norm": 0.06924033164978027, "learning_rate": 0.00010170574733336333, "loss": 0.2469, "step": 26778 }, { "epoch": 2.1693940375891123, "grad_norm": 0.05584564805030823, "learning_rate": 0.00010170124668076873, "loss": 0.2532, "step": 26779 }, { "epoch": 2.1694750486066106, "grad_norm": 0.05546014755964279, "learning_rate": 0.00010169674602817409, "loss": 0.2259, "step": 26780 }, { "epoch": 2.169556059624109, "grad_norm": 0.058247484266757965, "learning_rate": 0.00010169224537557946, "loss": 0.2787, "step": 26781 }, { "epoch": 2.169637070641607, "grad_norm": 0.055568281561136246, "learning_rate": 0.00010168774472298484, "loss": 0.2211, "step": 26782 }, { "epoch": 2.1697180816591057, "grad_norm": 0.06250690668821335, "learning_rate": 0.00010168324407039021, "loss": 0.2913, "step": 26783 }, { "epoch": 2.169799092676604, "grad_norm": 0.05972811579704285, "learning_rate": 0.00010167874341779557, "loss": 0.263, "step": 26784 }, { "epoch": 2.1698801036941022, "grad_norm": 0.06468886882066727, "learning_rate": 0.00010167424276520097, "loss": 0.2459, "step": 26785 }, { "epoch": 2.169961114711601, "grad_norm": 0.06843791902065277, "learning_rate": 0.00010166974211260633, "loss": 0.2314, "step": 26786 }, { "epoch": 2.170042125729099, "grad_norm": 0.06479120254516602, "learning_rate": 0.0001016652414600117, "loss": 0.2212, "step": 26787 }, { "epoch": 2.1701231367465974, "grad_norm": 0.057134464383125305, "learning_rate": 0.00010166074080741708, "loss": 0.2177, "step": 26788 }, { "epoch": 2.170204147764096, "grad_norm": 0.06482430547475815, "learning_rate": 0.00010165624015482245, "loss": 0.2535, "step": 26789 }, { "epoch": 2.1702851587815943, "grad_norm": 0.06957831978797913, "learning_rate": 0.00010165173950222784, "loss": 0.2535, "step": 26790 }, { "epoch": 2.1703661697990926, "grad_norm": 0.09038234502077103, "learning_rate": 0.00010164723884963321, "loss": 0.2401, "step": 26791 }, { "epoch": 2.1704471808165913, "grad_norm": 0.07424237579107285, "learning_rate": 0.00010164273819703857, "loss": 0.2865, "step": 26792 }, { "epoch": 2.1705281918340895, "grad_norm": 0.06953853368759155, "learning_rate": 0.00010163823754444395, "loss": 0.271, "step": 26793 }, { "epoch": 2.1706092028515878, "grad_norm": 0.06888557970523834, "learning_rate": 0.00010163373689184932, "loss": 0.2437, "step": 26794 }, { "epoch": 2.170690213869086, "grad_norm": 0.05482298508286476, "learning_rate": 0.00010162923623925469, "loss": 0.2175, "step": 26795 }, { "epoch": 2.1707712248865847, "grad_norm": 0.06336444616317749, "learning_rate": 0.00010162473558666008, "loss": 0.23, "step": 26796 }, { "epoch": 2.170852235904083, "grad_norm": 0.07812932878732681, "learning_rate": 0.00010162023493406545, "loss": 0.2428, "step": 26797 }, { "epoch": 2.170933246921581, "grad_norm": 0.07318337261676788, "learning_rate": 0.00010161573428147081, "loss": 0.2747, "step": 26798 }, { "epoch": 2.17101425793908, "grad_norm": 0.07459474354982376, "learning_rate": 0.00010161123362887619, "loss": 0.2651, "step": 26799 }, { "epoch": 2.171095268956578, "grad_norm": 0.06458953768014908, "learning_rate": 0.00010160673297628156, "loss": 0.3091, "step": 26800 }, { "epoch": 2.1711762799740764, "grad_norm": 0.06517677754163742, "learning_rate": 0.00010160223232368693, "loss": 0.273, "step": 26801 }, { "epoch": 2.171257290991575, "grad_norm": 0.06921321898698807, "learning_rate": 0.00010159773167109232, "loss": 0.2543, "step": 26802 }, { "epoch": 2.1713383020090733, "grad_norm": 0.06701141595840454, "learning_rate": 0.0001015932310184977, "loss": 0.2652, "step": 26803 }, { "epoch": 2.1714193130265715, "grad_norm": 0.058195166289806366, "learning_rate": 0.00010158873036590307, "loss": 0.2553, "step": 26804 }, { "epoch": 2.17150032404407, "grad_norm": 0.0681111291050911, "learning_rate": 0.00010158422971330843, "loss": 0.238, "step": 26805 }, { "epoch": 2.1715813350615685, "grad_norm": 0.07244133204221725, "learning_rate": 0.0001015797290607138, "loss": 0.243, "step": 26806 }, { "epoch": 2.1716623460790667, "grad_norm": 0.05482422932982445, "learning_rate": 0.00010157522840811918, "loss": 0.2243, "step": 26807 }, { "epoch": 2.171743357096565, "grad_norm": 0.057814229279756546, "learning_rate": 0.00010157072775552456, "loss": 0.2618, "step": 26808 }, { "epoch": 2.1718243681140637, "grad_norm": 0.05317820981144905, "learning_rate": 0.00010156622710292994, "loss": 0.2305, "step": 26809 }, { "epoch": 2.171905379131562, "grad_norm": 0.07410687953233719, "learning_rate": 0.00010156172645033531, "loss": 0.2517, "step": 26810 }, { "epoch": 2.17198639014906, "grad_norm": 0.07304941117763519, "learning_rate": 0.00010155722579774067, "loss": 0.3064, "step": 26811 }, { "epoch": 2.172067401166559, "grad_norm": 0.05160785838961601, "learning_rate": 0.00010155272514514604, "loss": 0.258, "step": 26812 }, { "epoch": 2.172148412184057, "grad_norm": 0.06912660598754883, "learning_rate": 0.00010154822449255143, "loss": 0.3077, "step": 26813 }, { "epoch": 2.1722294232015553, "grad_norm": 0.057028722018003464, "learning_rate": 0.0001015437238399568, "loss": 0.2669, "step": 26814 }, { "epoch": 2.172310434219054, "grad_norm": 0.06860620528459549, "learning_rate": 0.00010153922318736218, "loss": 0.2796, "step": 26815 }, { "epoch": 2.1723914452365523, "grad_norm": 0.06248742341995239, "learning_rate": 0.00010153472253476755, "loss": 0.2643, "step": 26816 }, { "epoch": 2.1724724562540505, "grad_norm": 0.06286834180355072, "learning_rate": 0.00010153022188217291, "loss": 0.2312, "step": 26817 }, { "epoch": 2.1725534672715487, "grad_norm": 0.052289243787527084, "learning_rate": 0.00010152572122957829, "loss": 0.2168, "step": 26818 }, { "epoch": 2.1726344782890474, "grad_norm": 0.06501159071922302, "learning_rate": 0.00010152122057698367, "loss": 0.2233, "step": 26819 }, { "epoch": 2.1727154893065457, "grad_norm": 0.06584373116493225, "learning_rate": 0.00010151671992438905, "loss": 0.302, "step": 26820 }, { "epoch": 2.172796500324044, "grad_norm": 0.06395610421895981, "learning_rate": 0.00010151221927179442, "loss": 0.2661, "step": 26821 }, { "epoch": 2.1728775113415426, "grad_norm": 0.06655091792345047, "learning_rate": 0.0001015077186191998, "loss": 0.2748, "step": 26822 }, { "epoch": 2.172958522359041, "grad_norm": 0.05467076599597931, "learning_rate": 0.00010150321796660515, "loss": 0.2563, "step": 26823 }, { "epoch": 2.173039533376539, "grad_norm": 0.053810540586709976, "learning_rate": 0.00010149871731401053, "loss": 0.2178, "step": 26824 }, { "epoch": 2.173120544394038, "grad_norm": 0.06725815683603287, "learning_rate": 0.00010149421666141591, "loss": 0.2688, "step": 26825 }, { "epoch": 2.173201555411536, "grad_norm": 0.0681646317243576, "learning_rate": 0.00010148971600882129, "loss": 0.2879, "step": 26826 }, { "epoch": 2.1732825664290343, "grad_norm": 0.0565604604780674, "learning_rate": 0.00010148521535622666, "loss": 0.2636, "step": 26827 }, { "epoch": 2.1733635774465325, "grad_norm": 0.062234435230493546, "learning_rate": 0.00010148071470363204, "loss": 0.2619, "step": 26828 }, { "epoch": 2.173444588464031, "grad_norm": 0.055433180183172226, "learning_rate": 0.0001014762140510374, "loss": 0.2548, "step": 26829 }, { "epoch": 2.1735255994815295, "grad_norm": 0.06828644126653671, "learning_rate": 0.00010147171339844277, "loss": 0.2876, "step": 26830 }, { "epoch": 2.1736066104990277, "grad_norm": 0.05612456053495407, "learning_rate": 0.00010146721274584816, "loss": 0.2097, "step": 26831 }, { "epoch": 2.1736876215165264, "grad_norm": 0.07091177254915237, "learning_rate": 0.00010146271209325353, "loss": 0.2632, "step": 26832 }, { "epoch": 2.1737686325340246, "grad_norm": 0.06432201713323593, "learning_rate": 0.0001014582114406589, "loss": 0.2493, "step": 26833 }, { "epoch": 2.173849643551523, "grad_norm": 0.08751298487186432, "learning_rate": 0.00010145371078806428, "loss": 0.2594, "step": 26834 }, { "epoch": 2.1739306545690216, "grad_norm": 0.06486582010984421, "learning_rate": 0.00010144921013546964, "loss": 0.2603, "step": 26835 }, { "epoch": 2.17401166558652, "grad_norm": 0.07561185956001282, "learning_rate": 0.00010144470948287501, "loss": 0.2503, "step": 26836 }, { "epoch": 2.174092676604018, "grad_norm": 0.05659014731645584, "learning_rate": 0.0001014402088302804, "loss": 0.2091, "step": 26837 }, { "epoch": 2.1741736876215167, "grad_norm": 0.049686819314956665, "learning_rate": 0.00010143570817768577, "loss": 0.2458, "step": 26838 }, { "epoch": 2.174254698639015, "grad_norm": 0.06719785183668137, "learning_rate": 0.00010143120752509114, "loss": 0.2321, "step": 26839 }, { "epoch": 2.1743357096565132, "grad_norm": 0.06648898869752884, "learning_rate": 0.00010142670687249652, "loss": 0.2551, "step": 26840 }, { "epoch": 2.1744167206740115, "grad_norm": 0.07859358936548233, "learning_rate": 0.00010142220621990188, "loss": 0.2387, "step": 26841 }, { "epoch": 2.17449773169151, "grad_norm": 0.06366816908121109, "learning_rate": 0.00010141770556730728, "loss": 0.2409, "step": 26842 }, { "epoch": 2.1745787427090084, "grad_norm": 0.06784055382013321, "learning_rate": 0.00010141320491471264, "loss": 0.247, "step": 26843 }, { "epoch": 2.1746597537265067, "grad_norm": 0.08164727687835693, "learning_rate": 0.00010140870426211801, "loss": 0.2682, "step": 26844 }, { "epoch": 2.1747407647440054, "grad_norm": 0.06338001042604446, "learning_rate": 0.00010140420360952339, "loss": 0.276, "step": 26845 }, { "epoch": 2.1748217757615036, "grad_norm": 0.061157673597335815, "learning_rate": 0.00010139970295692876, "loss": 0.2441, "step": 26846 }, { "epoch": 2.174902786779002, "grad_norm": 0.06020011752843857, "learning_rate": 0.00010139520230433412, "loss": 0.2292, "step": 26847 }, { "epoch": 2.1749837977965005, "grad_norm": 0.060947537422180176, "learning_rate": 0.00010139070165173952, "loss": 0.2535, "step": 26848 }, { "epoch": 2.1750648088139988, "grad_norm": 0.05085349455475807, "learning_rate": 0.00010138620099914488, "loss": 0.2459, "step": 26849 }, { "epoch": 2.175145819831497, "grad_norm": 0.07931619882583618, "learning_rate": 0.00010138170034655025, "loss": 0.2339, "step": 26850 }, { "epoch": 2.1752268308489953, "grad_norm": 0.05213142931461334, "learning_rate": 0.00010137719969395563, "loss": 0.2326, "step": 26851 }, { "epoch": 2.175307841866494, "grad_norm": 0.07295794039964676, "learning_rate": 0.000101372699041361, "loss": 0.2556, "step": 26852 }, { "epoch": 2.175388852883992, "grad_norm": 0.06054900586605072, "learning_rate": 0.00010136819838876636, "loss": 0.2651, "step": 26853 }, { "epoch": 2.1754698639014904, "grad_norm": 0.08561225980520248, "learning_rate": 0.00010136369773617176, "loss": 0.2647, "step": 26854 }, { "epoch": 2.175550874918989, "grad_norm": 0.05409996584057808, "learning_rate": 0.00010135919708357712, "loss": 0.2526, "step": 26855 }, { "epoch": 2.1756318859364874, "grad_norm": 0.058983881026506424, "learning_rate": 0.0001013546964309825, "loss": 0.2587, "step": 26856 }, { "epoch": 2.1757128969539856, "grad_norm": 0.0726015567779541, "learning_rate": 0.00010135019577838787, "loss": 0.2552, "step": 26857 }, { "epoch": 2.1757939079714843, "grad_norm": 0.06838826835155487, "learning_rate": 0.00010134569512579324, "loss": 0.2544, "step": 26858 }, { "epoch": 2.1758749189889826, "grad_norm": 0.07187411189079285, "learning_rate": 0.0001013411944731986, "loss": 0.2318, "step": 26859 }, { "epoch": 2.175955930006481, "grad_norm": 0.06443783640861511, "learning_rate": 0.000101336693820604, "loss": 0.2641, "step": 26860 }, { "epoch": 2.1760369410239795, "grad_norm": 0.07598260790109634, "learning_rate": 0.00010133219316800936, "loss": 0.2823, "step": 26861 }, { "epoch": 2.1761179520414777, "grad_norm": 0.05119853839278221, "learning_rate": 0.00010132769251541474, "loss": 0.2304, "step": 26862 }, { "epoch": 2.176198963058976, "grad_norm": 0.07492173463106155, "learning_rate": 0.00010132319186282011, "loss": 0.2622, "step": 26863 }, { "epoch": 2.176279974076474, "grad_norm": 0.05547771230340004, "learning_rate": 0.00010131869121022548, "loss": 0.2158, "step": 26864 }, { "epoch": 2.176360985093973, "grad_norm": 0.06932628899812698, "learning_rate": 0.00010131419055763084, "loss": 0.2798, "step": 26865 }, { "epoch": 2.176441996111471, "grad_norm": 0.06388963013887405, "learning_rate": 0.00010130968990503625, "loss": 0.2632, "step": 26866 }, { "epoch": 2.1765230071289694, "grad_norm": 0.05636812746524811, "learning_rate": 0.00010130518925244162, "loss": 0.2714, "step": 26867 }, { "epoch": 2.176604018146468, "grad_norm": 0.06506742537021637, "learning_rate": 0.00010130068859984698, "loss": 0.2738, "step": 26868 }, { "epoch": 2.1766850291639663, "grad_norm": 0.060381386429071426, "learning_rate": 0.00010129618794725235, "loss": 0.2743, "step": 26869 }, { "epoch": 2.1767660401814646, "grad_norm": 0.05970916897058487, "learning_rate": 0.00010129168729465773, "loss": 0.2559, "step": 26870 }, { "epoch": 2.176847051198963, "grad_norm": 0.06593108922243118, "learning_rate": 0.00010128718664206311, "loss": 0.2611, "step": 26871 }, { "epoch": 2.1769280622164615, "grad_norm": 0.05910542234778404, "learning_rate": 0.00010128268598946849, "loss": 0.2718, "step": 26872 }, { "epoch": 2.1770090732339598, "grad_norm": 0.06308293342590332, "learning_rate": 0.00010127818533687386, "loss": 0.2707, "step": 26873 }, { "epoch": 2.177090084251458, "grad_norm": 0.06285148113965988, "learning_rate": 0.00010127368468427922, "loss": 0.2661, "step": 26874 }, { "epoch": 2.1771710952689567, "grad_norm": 0.059369564056396484, "learning_rate": 0.0001012691840316846, "loss": 0.222, "step": 26875 }, { "epoch": 2.177252106286455, "grad_norm": 0.05995746701955795, "learning_rate": 0.00010126468337908997, "loss": 0.3101, "step": 26876 }, { "epoch": 2.177333117303953, "grad_norm": 0.08611472696065903, "learning_rate": 0.00010126018272649536, "loss": 0.276, "step": 26877 }, { "epoch": 2.177414128321452, "grad_norm": 0.06998145580291748, "learning_rate": 0.00010125568207390073, "loss": 0.2923, "step": 26878 }, { "epoch": 2.17749513933895, "grad_norm": 0.05503055825829506, "learning_rate": 0.0001012511814213061, "loss": 0.2521, "step": 26879 }, { "epoch": 2.1775761503564484, "grad_norm": 0.06742962449789047, "learning_rate": 0.00010124668076871146, "loss": 0.292, "step": 26880 }, { "epoch": 2.177657161373947, "grad_norm": 0.061561357229948044, "learning_rate": 0.00010124218011611684, "loss": 0.2354, "step": 26881 }, { "epoch": 2.1777381723914453, "grad_norm": 0.05672166496515274, "learning_rate": 0.00010123767946352221, "loss": 0.242, "step": 26882 }, { "epoch": 2.1778191834089435, "grad_norm": 0.07537026703357697, "learning_rate": 0.0001012331788109276, "loss": 0.2796, "step": 26883 }, { "epoch": 2.1779001944264422, "grad_norm": 0.058555323630571365, "learning_rate": 0.00010122867815833297, "loss": 0.2534, "step": 26884 }, { "epoch": 2.1779812054439405, "grad_norm": 0.06248362362384796, "learning_rate": 0.00010122417750573834, "loss": 0.2499, "step": 26885 }, { "epoch": 2.1780622164614387, "grad_norm": 0.06366802006959915, "learning_rate": 0.0001012196768531437, "loss": 0.2357, "step": 26886 }, { "epoch": 2.178143227478937, "grad_norm": 0.07255370169878006, "learning_rate": 0.00010121517620054908, "loss": 0.2567, "step": 26887 }, { "epoch": 2.1782242384964356, "grad_norm": 0.07287876307964325, "learning_rate": 0.00010121067554795445, "loss": 0.3164, "step": 26888 }, { "epoch": 2.178305249513934, "grad_norm": 0.0844060480594635, "learning_rate": 0.00010120617489535984, "loss": 0.2519, "step": 26889 }, { "epoch": 2.178386260531432, "grad_norm": 0.08379963785409927, "learning_rate": 0.00010120167424276521, "loss": 0.2488, "step": 26890 }, { "epoch": 2.178467271548931, "grad_norm": 0.058833617717027664, "learning_rate": 0.00010119717359017059, "loss": 0.2436, "step": 26891 }, { "epoch": 2.178548282566429, "grad_norm": 0.07550203055143356, "learning_rate": 0.00010119267293757595, "loss": 0.2592, "step": 26892 }, { "epoch": 2.1786292935839273, "grad_norm": 0.0695260539650917, "learning_rate": 0.00010118817228498132, "loss": 0.2653, "step": 26893 }, { "epoch": 2.1787103046014256, "grad_norm": 0.054173555225133896, "learning_rate": 0.0001011836716323867, "loss": 0.2421, "step": 26894 }, { "epoch": 2.1787913156189243, "grad_norm": 0.05898955464363098, "learning_rate": 0.00010117917097979208, "loss": 0.2291, "step": 26895 }, { "epoch": 2.1788723266364225, "grad_norm": 0.05412711948156357, "learning_rate": 0.00010117467032719745, "loss": 0.2196, "step": 26896 }, { "epoch": 2.1789533376539207, "grad_norm": 0.053595926612615585, "learning_rate": 0.00010117016967460283, "loss": 0.2341, "step": 26897 }, { "epoch": 2.1790343486714194, "grad_norm": 0.06263613700866699, "learning_rate": 0.00010116566902200819, "loss": 0.2385, "step": 26898 }, { "epoch": 2.1791153596889177, "grad_norm": 0.07698112726211548, "learning_rate": 0.00010116116836941356, "loss": 0.3146, "step": 26899 }, { "epoch": 2.179196370706416, "grad_norm": 0.06529154628515244, "learning_rate": 0.00010115666771681895, "loss": 0.2266, "step": 26900 }, { "epoch": 2.1792773817239146, "grad_norm": 0.0627814456820488, "learning_rate": 0.00010115216706422432, "loss": 0.2646, "step": 26901 }, { "epoch": 2.179358392741413, "grad_norm": 0.06005362793803215, "learning_rate": 0.0001011476664116297, "loss": 0.2475, "step": 26902 }, { "epoch": 2.179439403758911, "grad_norm": 0.062046971172094345, "learning_rate": 0.00010114316575903507, "loss": 0.2527, "step": 26903 }, { "epoch": 2.17952041477641, "grad_norm": 0.06619498878717422, "learning_rate": 0.00010113866510644043, "loss": 0.2766, "step": 26904 }, { "epoch": 2.179601425793908, "grad_norm": 0.06332775950431824, "learning_rate": 0.0001011341644538458, "loss": 0.2353, "step": 26905 }, { "epoch": 2.1796824368114063, "grad_norm": 0.06274640560150146, "learning_rate": 0.00010112966380125119, "loss": 0.2526, "step": 26906 }, { "epoch": 2.179763447828905, "grad_norm": 0.060782160609960556, "learning_rate": 0.00010112516314865656, "loss": 0.2578, "step": 26907 }, { "epoch": 2.179844458846403, "grad_norm": 0.062455467879772186, "learning_rate": 0.00010112066249606194, "loss": 0.2314, "step": 26908 }, { "epoch": 2.1799254698639015, "grad_norm": 0.06808603554964066, "learning_rate": 0.00010111616184346731, "loss": 0.2572, "step": 26909 }, { "epoch": 2.1800064808813997, "grad_norm": 0.07538673281669617, "learning_rate": 0.00010111166119087267, "loss": 0.2653, "step": 26910 }, { "epoch": 2.1800874918988984, "grad_norm": 0.05819778889417648, "learning_rate": 0.00010110716053827804, "loss": 0.2352, "step": 26911 }, { "epoch": 2.1801685029163966, "grad_norm": 0.07164320349693298, "learning_rate": 0.00010110265988568343, "loss": 0.2437, "step": 26912 }, { "epoch": 2.180249513933895, "grad_norm": 0.0552498884499073, "learning_rate": 0.0001010981592330888, "loss": 0.2135, "step": 26913 }, { "epoch": 2.1803305249513936, "grad_norm": 0.0703219398856163, "learning_rate": 0.00010109365858049418, "loss": 0.2735, "step": 26914 }, { "epoch": 2.180411535968892, "grad_norm": 0.0603179857134819, "learning_rate": 0.00010108915792789955, "loss": 0.2237, "step": 26915 }, { "epoch": 2.18049254698639, "grad_norm": 0.07395555824041367, "learning_rate": 0.00010108465727530491, "loss": 0.2294, "step": 26916 }, { "epoch": 2.1805735580038883, "grad_norm": 0.06649938225746155, "learning_rate": 0.00010108015662271029, "loss": 0.2314, "step": 26917 }, { "epoch": 2.180654569021387, "grad_norm": 0.05904200300574303, "learning_rate": 0.00010107565597011567, "loss": 0.2757, "step": 26918 }, { "epoch": 2.1807355800388852, "grad_norm": 0.0554424412548542, "learning_rate": 0.00010107115531752105, "loss": 0.2022, "step": 26919 }, { "epoch": 2.1808165910563835, "grad_norm": 0.07284878194332123, "learning_rate": 0.00010106665466492642, "loss": 0.2521, "step": 26920 }, { "epoch": 2.180897602073882, "grad_norm": 0.06328210979700089, "learning_rate": 0.0001010621540123318, "loss": 0.2335, "step": 26921 }, { "epoch": 2.1809786130913804, "grad_norm": 0.07258543372154236, "learning_rate": 0.00010105765335973715, "loss": 0.2862, "step": 26922 }, { "epoch": 2.1810596241088787, "grad_norm": 0.07839885354042053, "learning_rate": 0.00010105315270714255, "loss": 0.3048, "step": 26923 }, { "epoch": 2.1811406351263773, "grad_norm": 0.06599098443984985, "learning_rate": 0.00010104865205454791, "loss": 0.3021, "step": 26924 }, { "epoch": 2.1812216461438756, "grad_norm": 0.059078000485897064, "learning_rate": 0.00010104415140195329, "loss": 0.2275, "step": 26925 }, { "epoch": 2.181302657161374, "grad_norm": 0.0659744143486023, "learning_rate": 0.00010103965074935866, "loss": 0.2426, "step": 26926 }, { "epoch": 2.1813836681788725, "grad_norm": 0.05550341308116913, "learning_rate": 0.00010103515009676404, "loss": 0.237, "step": 26927 }, { "epoch": 2.1814646791963708, "grad_norm": 0.061645619571208954, "learning_rate": 0.0001010306494441694, "loss": 0.2622, "step": 26928 }, { "epoch": 2.181545690213869, "grad_norm": 0.06350360810756683, "learning_rate": 0.0001010261487915748, "loss": 0.2376, "step": 26929 }, { "epoch": 2.1816267012313673, "grad_norm": 0.05759890004992485, "learning_rate": 0.00010102164813898016, "loss": 0.272, "step": 26930 }, { "epoch": 2.181707712248866, "grad_norm": 0.06146299093961716, "learning_rate": 0.00010101714748638553, "loss": 0.2552, "step": 26931 }, { "epoch": 2.181788723266364, "grad_norm": 0.06417529284954071, "learning_rate": 0.0001010126468337909, "loss": 0.2554, "step": 26932 }, { "epoch": 2.1818697342838624, "grad_norm": 0.06104740872979164, "learning_rate": 0.00010100814618119628, "loss": 0.2642, "step": 26933 }, { "epoch": 2.181950745301361, "grad_norm": 0.05828656256198883, "learning_rate": 0.00010100364552860164, "loss": 0.2439, "step": 26934 }, { "epoch": 2.1820317563188594, "grad_norm": 0.05927203223109245, "learning_rate": 0.00010099914487600704, "loss": 0.2732, "step": 26935 }, { "epoch": 2.1821127673363576, "grad_norm": 0.07756493240594864, "learning_rate": 0.00010099464422341241, "loss": 0.2433, "step": 26936 }, { "epoch": 2.1821937783538563, "grad_norm": 0.06863519549369812, "learning_rate": 0.00010099014357081777, "loss": 0.2594, "step": 26937 }, { "epoch": 2.1822747893713546, "grad_norm": 0.06425057351589203, "learning_rate": 0.00010098564291822315, "loss": 0.2223, "step": 26938 }, { "epoch": 2.182355800388853, "grad_norm": 0.057839956134557724, "learning_rate": 0.00010098114226562852, "loss": 0.2562, "step": 26939 }, { "epoch": 2.182436811406351, "grad_norm": 0.06706058979034424, "learning_rate": 0.00010097664161303388, "loss": 0.2575, "step": 26940 }, { "epoch": 2.1825178224238497, "grad_norm": 0.07520833611488342, "learning_rate": 0.00010097214096043928, "loss": 0.2859, "step": 26941 }, { "epoch": 2.182598833441348, "grad_norm": 0.06069038808345795, "learning_rate": 0.00010096764030784465, "loss": 0.2359, "step": 26942 }, { "epoch": 2.182679844458846, "grad_norm": 0.06446312367916107, "learning_rate": 0.00010096313965525001, "loss": 0.2374, "step": 26943 }, { "epoch": 2.182760855476345, "grad_norm": 0.07859501242637634, "learning_rate": 0.00010095863900265539, "loss": 0.2502, "step": 26944 }, { "epoch": 2.182841866493843, "grad_norm": 0.06446570158004761, "learning_rate": 0.00010095413835006076, "loss": 0.2924, "step": 26945 }, { "epoch": 2.1829228775113414, "grad_norm": 0.06464308500289917, "learning_rate": 0.00010094963769746615, "loss": 0.262, "step": 26946 }, { "epoch": 2.18300388852884, "grad_norm": 0.04921901226043701, "learning_rate": 0.00010094513704487152, "loss": 0.2536, "step": 26947 }, { "epoch": 2.1830848995463383, "grad_norm": 0.06221909075975418, "learning_rate": 0.0001009406363922769, "loss": 0.3152, "step": 26948 }, { "epoch": 2.1831659105638366, "grad_norm": 0.06305009126663208, "learning_rate": 0.00010093613573968225, "loss": 0.2528, "step": 26949 }, { "epoch": 2.1832469215813353, "grad_norm": 0.08612064272165298, "learning_rate": 0.00010093163508708763, "loss": 0.2447, "step": 26950 }, { "epoch": 2.1833279325988335, "grad_norm": 0.07131386548280716, "learning_rate": 0.000100927134434493, "loss": 0.2517, "step": 26951 }, { "epoch": 2.1834089436163318, "grad_norm": 0.08630380779504776, "learning_rate": 0.00010092263378189839, "loss": 0.3009, "step": 26952 }, { "epoch": 2.18348995463383, "grad_norm": 0.05582493916153908, "learning_rate": 0.00010091813312930376, "loss": 0.2636, "step": 26953 }, { "epoch": 2.1835709656513287, "grad_norm": 0.07412777841091156, "learning_rate": 0.00010091363247670914, "loss": 0.2333, "step": 26954 }, { "epoch": 2.183651976668827, "grad_norm": 0.06575404107570648, "learning_rate": 0.0001009091318241145, "loss": 0.2573, "step": 26955 }, { "epoch": 2.183732987686325, "grad_norm": 0.0642695501446724, "learning_rate": 0.00010090463117151987, "loss": 0.2624, "step": 26956 }, { "epoch": 2.183813998703824, "grad_norm": 0.0616472102701664, "learning_rate": 0.00010090013051892524, "loss": 0.2231, "step": 26957 }, { "epoch": 2.183895009721322, "grad_norm": 0.06302770972251892, "learning_rate": 0.00010089562986633063, "loss": 0.2798, "step": 26958 }, { "epoch": 2.1839760207388204, "grad_norm": 0.05955367907881737, "learning_rate": 0.000100891129213736, "loss": 0.2604, "step": 26959 }, { "epoch": 2.184057031756319, "grad_norm": 0.06482961773872375, "learning_rate": 0.00010088662856114138, "loss": 0.2375, "step": 26960 }, { "epoch": 2.1841380427738173, "grad_norm": 0.0561881847679615, "learning_rate": 0.00010088212790854674, "loss": 0.226, "step": 26961 }, { "epoch": 2.1842190537913155, "grad_norm": 0.07170061022043228, "learning_rate": 0.00010087762725595211, "loss": 0.2623, "step": 26962 }, { "epoch": 2.184300064808814, "grad_norm": 0.06463984400033951, "learning_rate": 0.00010087312660335749, "loss": 0.2756, "step": 26963 }, { "epoch": 2.1843810758263125, "grad_norm": 0.061898235231637955, "learning_rate": 0.00010086862595076287, "loss": 0.2383, "step": 26964 }, { "epoch": 2.1844620868438107, "grad_norm": 0.06135169789195061, "learning_rate": 0.00010086412529816825, "loss": 0.2642, "step": 26965 }, { "epoch": 2.184543097861309, "grad_norm": 0.08380525559186935, "learning_rate": 0.00010085962464557362, "loss": 0.2849, "step": 26966 }, { "epoch": 2.1846241088788076, "grad_norm": 0.07640878856182098, "learning_rate": 0.00010085512399297898, "loss": 0.3056, "step": 26967 }, { "epoch": 2.184705119896306, "grad_norm": 0.0755113810300827, "learning_rate": 0.00010085062334038435, "loss": 0.2957, "step": 26968 }, { "epoch": 2.184786130913804, "grad_norm": 0.07308080792427063, "learning_rate": 0.00010084612268778973, "loss": 0.2689, "step": 26969 }, { "epoch": 2.184867141931303, "grad_norm": 0.07572430372238159, "learning_rate": 0.00010084162203519511, "loss": 0.2492, "step": 26970 }, { "epoch": 2.184948152948801, "grad_norm": 0.07837171852588654, "learning_rate": 0.00010083712138260049, "loss": 0.3113, "step": 26971 }, { "epoch": 2.1850291639662993, "grad_norm": 0.06892222166061401, "learning_rate": 0.00010083262073000586, "loss": 0.2123, "step": 26972 }, { "epoch": 2.185110174983798, "grad_norm": 0.0713641420006752, "learning_rate": 0.00010082812007741122, "loss": 0.2821, "step": 26973 }, { "epoch": 2.1851911860012962, "grad_norm": 0.05223554000258446, "learning_rate": 0.0001008236194248166, "loss": 0.2282, "step": 26974 }, { "epoch": 2.1852721970187945, "grad_norm": 0.060114096850156784, "learning_rate": 0.00010081911877222198, "loss": 0.2458, "step": 26975 }, { "epoch": 2.1853532080362927, "grad_norm": 0.06315486133098602, "learning_rate": 0.00010081461811962736, "loss": 0.2526, "step": 26976 }, { "epoch": 2.1854342190537914, "grad_norm": 0.05718789994716644, "learning_rate": 0.00010081011746703273, "loss": 0.2548, "step": 26977 }, { "epoch": 2.1855152300712897, "grad_norm": 0.0645727887749672, "learning_rate": 0.0001008056168144381, "loss": 0.2194, "step": 26978 }, { "epoch": 2.185596241088788, "grad_norm": 0.05673626437783241, "learning_rate": 0.00010080111616184346, "loss": 0.2776, "step": 26979 }, { "epoch": 2.1856772521062866, "grad_norm": 0.0616806261241436, "learning_rate": 0.00010079661550924884, "loss": 0.2394, "step": 26980 }, { "epoch": 2.185758263123785, "grad_norm": 0.08814667910337448, "learning_rate": 0.00010079211485665422, "loss": 0.2503, "step": 26981 }, { "epoch": 2.185839274141283, "grad_norm": 0.06610722839832306, "learning_rate": 0.0001007876142040596, "loss": 0.2726, "step": 26982 }, { "epoch": 2.185920285158782, "grad_norm": 0.0646798387169838, "learning_rate": 0.00010078311355146497, "loss": 0.2755, "step": 26983 }, { "epoch": 2.18600129617628, "grad_norm": 0.06937199085950851, "learning_rate": 0.00010077861289887034, "loss": 0.2723, "step": 26984 }, { "epoch": 2.1860823071937783, "grad_norm": 0.0682591125369072, "learning_rate": 0.0001007741122462757, "loss": 0.2559, "step": 26985 }, { "epoch": 2.1861633182112765, "grad_norm": 0.0681900903582573, "learning_rate": 0.00010076961159368108, "loss": 0.2232, "step": 26986 }, { "epoch": 2.186244329228775, "grad_norm": 0.06384847313165665, "learning_rate": 0.00010076511094108647, "loss": 0.2501, "step": 26987 }, { "epoch": 2.1863253402462735, "grad_norm": 0.06840259581804276, "learning_rate": 0.00010076061028849184, "loss": 0.2717, "step": 26988 }, { "epoch": 2.1864063512637717, "grad_norm": 0.06057314947247505, "learning_rate": 0.00010075610963589721, "loss": 0.2268, "step": 26989 }, { "epoch": 2.1864873622812704, "grad_norm": 0.058428164571523666, "learning_rate": 0.00010075160898330259, "loss": 0.2459, "step": 26990 }, { "epoch": 2.1865683732987686, "grad_norm": 0.07158659398555756, "learning_rate": 0.00010074710833070795, "loss": 0.2702, "step": 26991 }, { "epoch": 2.186649384316267, "grad_norm": 0.06685875356197357, "learning_rate": 0.00010074260767811332, "loss": 0.2488, "step": 26992 }, { "epoch": 2.1867303953337656, "grad_norm": 0.049234382808208466, "learning_rate": 0.0001007381070255187, "loss": 0.2249, "step": 26993 }, { "epoch": 2.186811406351264, "grad_norm": 0.057531118392944336, "learning_rate": 0.00010073360637292408, "loss": 0.2399, "step": 26994 }, { "epoch": 2.186892417368762, "grad_norm": 0.06531118601560593, "learning_rate": 0.00010072910572032945, "loss": 0.2352, "step": 26995 }, { "epoch": 2.1869734283862607, "grad_norm": 0.05908123403787613, "learning_rate": 0.00010072460506773483, "loss": 0.2365, "step": 26996 }, { "epoch": 2.187054439403759, "grad_norm": 0.07252193987369537, "learning_rate": 0.00010072010441514019, "loss": 0.2443, "step": 26997 }, { "epoch": 2.1871354504212572, "grad_norm": 0.06428375095129013, "learning_rate": 0.00010071560376254556, "loss": 0.2711, "step": 26998 }, { "epoch": 2.1872164614387555, "grad_norm": 0.06661681085824966, "learning_rate": 0.00010071110310995095, "loss": 0.2677, "step": 26999 }, { "epoch": 2.187297472456254, "grad_norm": 0.06688234955072403, "learning_rate": 0.00010070660245735632, "loss": 0.2124, "step": 27000 }, { "epoch": 2.1873784834737524, "grad_norm": 0.062010444700717926, "learning_rate": 0.0001007021018047617, "loss": 0.2186, "step": 27001 }, { "epoch": 2.1874594944912507, "grad_norm": 0.06785605847835541, "learning_rate": 0.00010069760115216707, "loss": 0.2512, "step": 27002 }, { "epoch": 2.1875405055087493, "grad_norm": 0.06811809539794922, "learning_rate": 0.00010069310049957243, "loss": 0.2352, "step": 27003 }, { "epoch": 2.1876215165262476, "grad_norm": 0.06498055905103683, "learning_rate": 0.00010068859984697783, "loss": 0.2579, "step": 27004 }, { "epoch": 2.187702527543746, "grad_norm": 0.07715179026126862, "learning_rate": 0.0001006840991943832, "loss": 0.2328, "step": 27005 }, { "epoch": 2.1877835385612445, "grad_norm": 0.07081539183855057, "learning_rate": 0.00010067959854178856, "loss": 0.2599, "step": 27006 }, { "epoch": 2.1878645495787428, "grad_norm": 0.082499660551548, "learning_rate": 0.00010067509788919394, "loss": 0.288, "step": 27007 }, { "epoch": 2.187945560596241, "grad_norm": 0.06907915323972702, "learning_rate": 0.00010067059723659931, "loss": 0.2821, "step": 27008 }, { "epoch": 2.1880265716137393, "grad_norm": 0.0711050033569336, "learning_rate": 0.00010066609658400467, "loss": 0.2782, "step": 27009 }, { "epoch": 2.188107582631238, "grad_norm": 0.06753864884376526, "learning_rate": 0.00010066159593141007, "loss": 0.2313, "step": 27010 }, { "epoch": 2.188188593648736, "grad_norm": 0.059835441410541534, "learning_rate": 0.00010065709527881545, "loss": 0.2399, "step": 27011 }, { "epoch": 2.1882696046662344, "grad_norm": 0.07964125275611877, "learning_rate": 0.0001006525946262208, "loss": 0.2661, "step": 27012 }, { "epoch": 2.188350615683733, "grad_norm": 0.06409208476543427, "learning_rate": 0.00010064809397362618, "loss": 0.2652, "step": 27013 }, { "epoch": 2.1884316267012314, "grad_norm": 0.05935799330472946, "learning_rate": 0.00010064359332103155, "loss": 0.244, "step": 27014 }, { "epoch": 2.1885126377187296, "grad_norm": 0.08206460624933243, "learning_rate": 0.00010063909266843691, "loss": 0.2023, "step": 27015 }, { "epoch": 2.1885936487362283, "grad_norm": 0.07063757628202438, "learning_rate": 0.00010063459201584231, "loss": 0.2893, "step": 27016 }, { "epoch": 2.1886746597537265, "grad_norm": 0.06272878497838974, "learning_rate": 0.00010063009136324769, "loss": 0.2369, "step": 27017 }, { "epoch": 2.188755670771225, "grad_norm": 0.060590989887714386, "learning_rate": 0.00010062559071065305, "loss": 0.2661, "step": 27018 }, { "epoch": 2.1888366817887235, "grad_norm": 0.06756321340799332, "learning_rate": 0.00010062109005805842, "loss": 0.2799, "step": 27019 }, { "epoch": 2.1889176928062217, "grad_norm": 0.06753882765769958, "learning_rate": 0.0001006165894054638, "loss": 0.2786, "step": 27020 }, { "epoch": 2.18899870382372, "grad_norm": 0.0644640401005745, "learning_rate": 0.00010061208875286915, "loss": 0.2291, "step": 27021 }, { "epoch": 2.189079714841218, "grad_norm": 0.06838696449995041, "learning_rate": 0.00010060758810027455, "loss": 0.2785, "step": 27022 }, { "epoch": 2.189160725858717, "grad_norm": 0.06403999775648117, "learning_rate": 0.00010060308744767993, "loss": 0.2289, "step": 27023 }, { "epoch": 2.189241736876215, "grad_norm": 0.06139326095581055, "learning_rate": 0.00010059858679508529, "loss": 0.2291, "step": 27024 }, { "epoch": 2.1893227478937134, "grad_norm": 0.0574423111975193, "learning_rate": 0.00010059408614249066, "loss": 0.259, "step": 27025 }, { "epoch": 2.189403758911212, "grad_norm": 0.07539302110671997, "learning_rate": 0.00010058958548989604, "loss": 0.2837, "step": 27026 }, { "epoch": 2.1894847699287103, "grad_norm": 0.06395363807678223, "learning_rate": 0.00010058508483730142, "loss": 0.2634, "step": 27027 }, { "epoch": 2.1895657809462086, "grad_norm": 0.05907757952809334, "learning_rate": 0.0001005805841847068, "loss": 0.2663, "step": 27028 }, { "epoch": 2.1896467919637073, "grad_norm": 0.06327487528324127, "learning_rate": 0.00010057608353211217, "loss": 0.2362, "step": 27029 }, { "epoch": 2.1897278029812055, "grad_norm": 0.06699670106172562, "learning_rate": 0.00010057158287951753, "loss": 0.2709, "step": 27030 }, { "epoch": 2.1898088139987038, "grad_norm": 0.051833443343639374, "learning_rate": 0.0001005670822269229, "loss": 0.2482, "step": 27031 }, { "epoch": 2.189889825016202, "grad_norm": 0.07217784970998764, "learning_rate": 0.00010056258157432828, "loss": 0.2804, "step": 27032 }, { "epoch": 2.1899708360337007, "grad_norm": 0.06261934340000153, "learning_rate": 0.00010055808092173366, "loss": 0.2548, "step": 27033 }, { "epoch": 2.190051847051199, "grad_norm": 0.06124917045235634, "learning_rate": 0.00010055358026913904, "loss": 0.218, "step": 27034 }, { "epoch": 2.190132858068697, "grad_norm": 0.06161245331168175, "learning_rate": 0.00010054907961654441, "loss": 0.229, "step": 27035 }, { "epoch": 2.190213869086196, "grad_norm": 0.07126471400260925, "learning_rate": 0.00010054457896394977, "loss": 0.2559, "step": 27036 }, { "epoch": 2.190294880103694, "grad_norm": 0.05679122358560562, "learning_rate": 0.00010054007831135515, "loss": 0.2001, "step": 27037 }, { "epoch": 2.1903758911211924, "grad_norm": 0.07383640855550766, "learning_rate": 0.00010053557765876052, "loss": 0.2519, "step": 27038 }, { "epoch": 2.190456902138691, "grad_norm": 0.06887379288673401, "learning_rate": 0.0001005310770061659, "loss": 0.2363, "step": 27039 }, { "epoch": 2.1905379131561893, "grad_norm": 0.06768579035997391, "learning_rate": 0.00010052657635357128, "loss": 0.241, "step": 27040 }, { "epoch": 2.1906189241736875, "grad_norm": 0.061511702835559845, "learning_rate": 0.00010052207570097665, "loss": 0.2445, "step": 27041 }, { "epoch": 2.190699935191186, "grad_norm": 0.06590621918439865, "learning_rate": 0.00010051757504838201, "loss": 0.2734, "step": 27042 }, { "epoch": 2.1907809462086845, "grad_norm": 0.060971327126026154, "learning_rate": 0.00010051307439578739, "loss": 0.2402, "step": 27043 }, { "epoch": 2.1908619572261827, "grad_norm": 0.05624224618077278, "learning_rate": 0.00010050857374319276, "loss": 0.2245, "step": 27044 }, { "epoch": 2.190942968243681, "grad_norm": 0.06078394129872322, "learning_rate": 0.00010050407309059815, "loss": 0.3057, "step": 27045 }, { "epoch": 2.1910239792611796, "grad_norm": 0.06620259582996368, "learning_rate": 0.00010049957243800352, "loss": 0.2642, "step": 27046 }, { "epoch": 2.191104990278678, "grad_norm": 0.05764997750520706, "learning_rate": 0.0001004950717854089, "loss": 0.2416, "step": 27047 }, { "epoch": 2.191186001296176, "grad_norm": 0.05957689881324768, "learning_rate": 0.00010049057113281425, "loss": 0.2355, "step": 27048 }, { "epoch": 2.191267012313675, "grad_norm": 0.06990189105272293, "learning_rate": 0.00010048607048021963, "loss": 0.2298, "step": 27049 }, { "epoch": 2.191348023331173, "grad_norm": 0.061472970992326736, "learning_rate": 0.000100481569827625, "loss": 0.2542, "step": 27050 }, { "epoch": 2.1914290343486713, "grad_norm": 0.05452711880207062, "learning_rate": 0.00010047706917503039, "loss": 0.2594, "step": 27051 }, { "epoch": 2.19151004536617, "grad_norm": 0.05978472903370857, "learning_rate": 0.00010047256852243576, "loss": 0.2424, "step": 27052 }, { "epoch": 2.1915910563836682, "grad_norm": 0.06296546757221222, "learning_rate": 0.00010046806786984114, "loss": 0.2645, "step": 27053 }, { "epoch": 2.1916720674011665, "grad_norm": 0.06470096856355667, "learning_rate": 0.0001004635672172465, "loss": 0.2586, "step": 27054 }, { "epoch": 2.1917530784186647, "grad_norm": 0.06551109999418259, "learning_rate": 0.00010045906656465187, "loss": 0.2369, "step": 27055 }, { "epoch": 2.1918340894361634, "grad_norm": 0.06295715272426605, "learning_rate": 0.00010045456591205726, "loss": 0.2507, "step": 27056 }, { "epoch": 2.1919151004536617, "grad_norm": 0.06285136938095093, "learning_rate": 0.00010045006525946263, "loss": 0.2484, "step": 27057 }, { "epoch": 2.19199611147116, "grad_norm": 0.04650372266769409, "learning_rate": 0.000100445564606868, "loss": 0.2413, "step": 27058 }, { "epoch": 2.1920771224886586, "grad_norm": 0.06401270627975464, "learning_rate": 0.00010044106395427338, "loss": 0.2889, "step": 27059 }, { "epoch": 2.192158133506157, "grad_norm": 0.0678238719701767, "learning_rate": 0.00010043656330167874, "loss": 0.2224, "step": 27060 }, { "epoch": 2.192239144523655, "grad_norm": 0.06454212963581085, "learning_rate": 0.00010043206264908411, "loss": 0.2326, "step": 27061 }, { "epoch": 2.192320155541154, "grad_norm": 0.0558546744287014, "learning_rate": 0.0001004275619964895, "loss": 0.2824, "step": 27062 }, { "epoch": 2.192401166558652, "grad_norm": 0.06207578256726265, "learning_rate": 0.00010042306134389487, "loss": 0.272, "step": 27063 }, { "epoch": 2.1924821775761503, "grad_norm": 0.05711657181382179, "learning_rate": 0.00010041856069130025, "loss": 0.2588, "step": 27064 }, { "epoch": 2.192563188593649, "grad_norm": 0.07172786444425583, "learning_rate": 0.00010041406003870562, "loss": 0.2402, "step": 27065 }, { "epoch": 2.192644199611147, "grad_norm": 0.07652819156646729, "learning_rate": 0.00010040955938611098, "loss": 0.233, "step": 27066 }, { "epoch": 2.1927252106286454, "grad_norm": 0.062125325202941895, "learning_rate": 0.00010040505873351635, "loss": 0.2643, "step": 27067 }, { "epoch": 2.1928062216461437, "grad_norm": 0.05422678962349892, "learning_rate": 0.00010040055808092174, "loss": 0.2316, "step": 27068 }, { "epoch": 2.1928872326636424, "grad_norm": 0.05607717111706734, "learning_rate": 0.00010039605742832711, "loss": 0.2473, "step": 27069 }, { "epoch": 2.1929682436811406, "grad_norm": 0.06470224261283875, "learning_rate": 0.00010039155677573249, "loss": 0.2499, "step": 27070 }, { "epoch": 2.193049254698639, "grad_norm": 0.08244304358959198, "learning_rate": 0.00010038705612313786, "loss": 0.2361, "step": 27071 }, { "epoch": 2.1931302657161376, "grad_norm": 0.058195363730192184, "learning_rate": 0.00010038255547054322, "loss": 0.2303, "step": 27072 }, { "epoch": 2.193211276733636, "grad_norm": 0.06642767786979675, "learning_rate": 0.0001003780548179486, "loss": 0.2225, "step": 27073 }, { "epoch": 2.193292287751134, "grad_norm": 0.06660183519124985, "learning_rate": 0.000100373554165354, "loss": 0.267, "step": 27074 }, { "epoch": 2.1933732987686327, "grad_norm": 0.06869763880968094, "learning_rate": 0.00010036905351275936, "loss": 0.2598, "step": 27075 }, { "epoch": 2.193454309786131, "grad_norm": 0.06465927511453629, "learning_rate": 0.00010036455286016473, "loss": 0.269, "step": 27076 }, { "epoch": 2.1935353208036292, "grad_norm": 0.06094600260257721, "learning_rate": 0.0001003600522075701, "loss": 0.2514, "step": 27077 }, { "epoch": 2.1936163318211275, "grad_norm": 0.06704872101545334, "learning_rate": 0.00010035555155497546, "loss": 0.2857, "step": 27078 }, { "epoch": 2.193697342838626, "grad_norm": 0.0678897276520729, "learning_rate": 0.00010035105090238086, "loss": 0.2392, "step": 27079 }, { "epoch": 2.1937783538561244, "grad_norm": 0.06632528454065323, "learning_rate": 0.00010034655024978624, "loss": 0.2516, "step": 27080 }, { "epoch": 2.1938593648736227, "grad_norm": 0.05018361285328865, "learning_rate": 0.0001003420495971916, "loss": 0.2556, "step": 27081 }, { "epoch": 2.1939403758911213, "grad_norm": 0.06408238410949707, "learning_rate": 0.00010033754894459697, "loss": 0.2101, "step": 27082 }, { "epoch": 2.1940213869086196, "grad_norm": 0.054987452924251556, "learning_rate": 0.00010033304829200234, "loss": 0.2328, "step": 27083 }, { "epoch": 2.194102397926118, "grad_norm": 0.08715245872735977, "learning_rate": 0.0001003285476394077, "loss": 0.2756, "step": 27084 }, { "epoch": 2.1941834089436165, "grad_norm": 0.0645839124917984, "learning_rate": 0.0001003240469868131, "loss": 0.2502, "step": 27085 }, { "epoch": 2.1942644199611148, "grad_norm": 0.07756000757217407, "learning_rate": 0.00010031954633421848, "loss": 0.2455, "step": 27086 }, { "epoch": 2.194345430978613, "grad_norm": 0.07709761708974838, "learning_rate": 0.00010031504568162384, "loss": 0.2763, "step": 27087 }, { "epoch": 2.1944264419961117, "grad_norm": 0.06762171536684036, "learning_rate": 0.00010031054502902921, "loss": 0.2682, "step": 27088 }, { "epoch": 2.19450745301361, "grad_norm": 0.07915575802326202, "learning_rate": 0.00010030604437643459, "loss": 0.2576, "step": 27089 }, { "epoch": 2.194588464031108, "grad_norm": 0.0666109248995781, "learning_rate": 0.00010030154372383995, "loss": 0.2592, "step": 27090 }, { "epoch": 2.1946694750486064, "grad_norm": 0.065487340092659, "learning_rate": 0.00010029704307124535, "loss": 0.2819, "step": 27091 }, { "epoch": 2.194750486066105, "grad_norm": 0.04944925010204315, "learning_rate": 0.00010029254241865072, "loss": 0.2711, "step": 27092 }, { "epoch": 2.1948314970836034, "grad_norm": 0.05818904563784599, "learning_rate": 0.00010028804176605608, "loss": 0.2692, "step": 27093 }, { "epoch": 2.1949125081011016, "grad_norm": 0.07648874819278717, "learning_rate": 0.00010028354111346145, "loss": 0.2549, "step": 27094 }, { "epoch": 2.1949935191186003, "grad_norm": 0.06092251464724541, "learning_rate": 0.00010027904046086683, "loss": 0.2752, "step": 27095 }, { "epoch": 2.1950745301360985, "grad_norm": 0.05122043564915657, "learning_rate": 0.00010027453980827219, "loss": 0.2425, "step": 27096 }, { "epoch": 2.195155541153597, "grad_norm": 0.06031843647360802, "learning_rate": 0.00010027003915567759, "loss": 0.2439, "step": 27097 }, { "epoch": 2.195236552171095, "grad_norm": 0.07118486613035202, "learning_rate": 0.00010026553850308296, "loss": 0.2973, "step": 27098 }, { "epoch": 2.1953175631885937, "grad_norm": 0.0504891462624073, "learning_rate": 0.00010026103785048832, "loss": 0.2472, "step": 27099 }, { "epoch": 2.195398574206092, "grad_norm": 0.060217734426259995, "learning_rate": 0.0001002565371978937, "loss": 0.2556, "step": 27100 }, { "epoch": 2.19547958522359, "grad_norm": 0.057529326528310776, "learning_rate": 0.00010025203654529907, "loss": 0.2159, "step": 27101 }, { "epoch": 2.195560596241089, "grad_norm": 0.06307288259267807, "learning_rate": 0.00010024753589270443, "loss": 0.2522, "step": 27102 }, { "epoch": 2.195641607258587, "grad_norm": 0.0709809735417366, "learning_rate": 0.00010024303524010983, "loss": 0.2566, "step": 27103 }, { "epoch": 2.1957226182760854, "grad_norm": 0.058112580329179764, "learning_rate": 0.0001002385345875152, "loss": 0.2735, "step": 27104 }, { "epoch": 2.195803629293584, "grad_norm": 0.051618389785289764, "learning_rate": 0.00010023403393492056, "loss": 0.2369, "step": 27105 }, { "epoch": 2.1958846403110823, "grad_norm": 0.051942378282547, "learning_rate": 0.00010022953328232594, "loss": 0.2598, "step": 27106 }, { "epoch": 2.1959656513285806, "grad_norm": 0.06882523000240326, "learning_rate": 0.00010022503262973131, "loss": 0.283, "step": 27107 }, { "epoch": 2.1960466623460793, "grad_norm": 0.07351597398519516, "learning_rate": 0.0001002205319771367, "loss": 0.2697, "step": 27108 }, { "epoch": 2.1961276733635775, "grad_norm": 0.06842011213302612, "learning_rate": 0.00010021603132454207, "loss": 0.2583, "step": 27109 }, { "epoch": 2.1962086843810757, "grad_norm": 0.06987638026475906, "learning_rate": 0.00010021153067194745, "loss": 0.2067, "step": 27110 }, { "epoch": 2.1962896953985744, "grad_norm": 0.0682644248008728, "learning_rate": 0.0001002070300193528, "loss": 0.2765, "step": 27111 }, { "epoch": 2.1963707064160727, "grad_norm": 0.0675097405910492, "learning_rate": 0.00010020252936675818, "loss": 0.2952, "step": 27112 }, { "epoch": 2.196451717433571, "grad_norm": 0.06232113763689995, "learning_rate": 0.00010019802871416355, "loss": 0.2379, "step": 27113 }, { "epoch": 2.196532728451069, "grad_norm": 0.06394306570291519, "learning_rate": 0.00010019352806156894, "loss": 0.2296, "step": 27114 }, { "epoch": 2.196613739468568, "grad_norm": 0.06672637909650803, "learning_rate": 0.00010018902740897431, "loss": 0.2604, "step": 27115 }, { "epoch": 2.196694750486066, "grad_norm": 0.07373014092445374, "learning_rate": 0.00010018452675637969, "loss": 0.2931, "step": 27116 }, { "epoch": 2.1967757615035644, "grad_norm": 0.06646038591861725, "learning_rate": 0.00010018002610378505, "loss": 0.245, "step": 27117 }, { "epoch": 2.196856772521063, "grad_norm": 0.050996072590351105, "learning_rate": 0.00010017552545119042, "loss": 0.2759, "step": 27118 }, { "epoch": 2.1969377835385613, "grad_norm": 0.04928220435976982, "learning_rate": 0.0001001710247985958, "loss": 0.2405, "step": 27119 }, { "epoch": 2.1970187945560595, "grad_norm": 0.07236079126596451, "learning_rate": 0.00010016652414600118, "loss": 0.3173, "step": 27120 }, { "epoch": 2.1970998055735578, "grad_norm": 0.06007302552461624, "learning_rate": 0.00010016202349340656, "loss": 0.2147, "step": 27121 }, { "epoch": 2.1971808165910565, "grad_norm": 0.058224182575941086, "learning_rate": 0.00010015752284081193, "loss": 0.282, "step": 27122 }, { "epoch": 2.1972618276085547, "grad_norm": 0.07079780101776123, "learning_rate": 0.00010015302218821729, "loss": 0.2344, "step": 27123 }, { "epoch": 2.197342838626053, "grad_norm": 0.06322164833545685, "learning_rate": 0.00010014852153562266, "loss": 0.2089, "step": 27124 }, { "epoch": 2.1974238496435516, "grad_norm": 0.05888306349515915, "learning_rate": 0.00010014402088302804, "loss": 0.2324, "step": 27125 }, { "epoch": 2.19750486066105, "grad_norm": 0.055346712470054626, "learning_rate": 0.00010013952023043342, "loss": 0.2585, "step": 27126 }, { "epoch": 2.197585871678548, "grad_norm": 0.06587319076061249, "learning_rate": 0.0001001350195778388, "loss": 0.253, "step": 27127 }, { "epoch": 2.197666882696047, "grad_norm": 0.056305259466171265, "learning_rate": 0.00010013051892524417, "loss": 0.2195, "step": 27128 }, { "epoch": 2.197747893713545, "grad_norm": 0.04610638692975044, "learning_rate": 0.00010012601827264953, "loss": 0.1896, "step": 27129 }, { "epoch": 2.1978289047310433, "grad_norm": 0.06477444618940353, "learning_rate": 0.0001001215176200549, "loss": 0.2673, "step": 27130 }, { "epoch": 2.197909915748542, "grad_norm": 0.0813470110297203, "learning_rate": 0.00010011701696746029, "loss": 0.279, "step": 27131 }, { "epoch": 2.1979909267660402, "grad_norm": 0.07328961789608002, "learning_rate": 0.00010011251631486566, "loss": 0.2411, "step": 27132 }, { "epoch": 2.1980719377835385, "grad_norm": 0.06270834803581238, "learning_rate": 0.00010010801566227104, "loss": 0.2682, "step": 27133 }, { "epoch": 2.198152948801037, "grad_norm": 0.05361814424395561, "learning_rate": 0.00010010351500967641, "loss": 0.2963, "step": 27134 }, { "epoch": 2.1982339598185354, "grad_norm": 0.08425524830818176, "learning_rate": 0.00010009901435708177, "loss": 0.2577, "step": 27135 }, { "epoch": 2.1983149708360337, "grad_norm": 0.07660902291536331, "learning_rate": 0.00010009451370448715, "loss": 0.234, "step": 27136 }, { "epoch": 2.198395981853532, "grad_norm": 0.06730036437511444, "learning_rate": 0.00010009001305189253, "loss": 0.2721, "step": 27137 }, { "epoch": 2.1984769928710306, "grad_norm": 0.08619198948144913, "learning_rate": 0.0001000855123992979, "loss": 0.3167, "step": 27138 }, { "epoch": 2.198558003888529, "grad_norm": 0.05942687392234802, "learning_rate": 0.00010008101174670328, "loss": 0.2137, "step": 27139 }, { "epoch": 2.198639014906027, "grad_norm": 0.05431392416357994, "learning_rate": 0.00010007651109410865, "loss": 0.2292, "step": 27140 }, { "epoch": 2.198720025923526, "grad_norm": 0.0699324905872345, "learning_rate": 0.00010007201044151401, "loss": 0.2661, "step": 27141 }, { "epoch": 2.198801036941024, "grad_norm": 0.05979772284626961, "learning_rate": 0.00010006750978891939, "loss": 0.2514, "step": 27142 }, { "epoch": 2.1988820479585223, "grad_norm": 0.059384338557720184, "learning_rate": 0.00010006300913632479, "loss": 0.2559, "step": 27143 }, { "epoch": 2.1989630589760205, "grad_norm": 0.06850460916757584, "learning_rate": 0.00010005850848373015, "loss": 0.266, "step": 27144 }, { "epoch": 2.199044069993519, "grad_norm": 0.06401151418685913, "learning_rate": 0.00010005400783113552, "loss": 0.2512, "step": 27145 }, { "epoch": 2.1991250810110174, "grad_norm": 0.07683590054512024, "learning_rate": 0.0001000495071785409, "loss": 0.292, "step": 27146 }, { "epoch": 2.1992060920285157, "grad_norm": 0.06709449738264084, "learning_rate": 0.00010004500652594626, "loss": 0.2352, "step": 27147 }, { "epoch": 2.1992871030460144, "grad_norm": 0.06725570559501648, "learning_rate": 0.00010004050587335163, "loss": 0.2849, "step": 27148 }, { "epoch": 2.1993681140635126, "grad_norm": 0.07019000500440598, "learning_rate": 0.00010003600522075703, "loss": 0.2795, "step": 27149 }, { "epoch": 2.199449125081011, "grad_norm": 0.09164316207170486, "learning_rate": 0.00010003150456816239, "loss": 0.2621, "step": 27150 }, { "epoch": 2.1995301360985096, "grad_norm": 0.06879639625549316, "learning_rate": 0.00010002700391556776, "loss": 0.2867, "step": 27151 }, { "epoch": 2.199611147116008, "grad_norm": 0.07062327116727829, "learning_rate": 0.00010002250326297314, "loss": 0.243, "step": 27152 }, { "epoch": 2.199692158133506, "grad_norm": 0.07149609923362732, "learning_rate": 0.0001000180026103785, "loss": 0.2377, "step": 27153 }, { "epoch": 2.1997731691510047, "grad_norm": 0.06835110485553741, "learning_rate": 0.00010001350195778387, "loss": 0.2424, "step": 27154 }, { "epoch": 2.199854180168503, "grad_norm": 0.05907417833805084, "learning_rate": 0.00010000900130518927, "loss": 0.2585, "step": 27155 }, { "epoch": 2.1999351911860012, "grad_norm": 0.08102352917194366, "learning_rate": 0.00010000450065259463, "loss": 0.2646, "step": 27156 }, { "epoch": 2.2000162022034995, "grad_norm": 0.06644205749034882, "learning_rate": 0.0001, "loss": 0.2181, "step": 27157 }, { "epoch": 2.200097213220998, "grad_norm": 0.05316384136676788, "learning_rate": 9.999549934740538e-05, "loss": 0.2329, "step": 27158 }, { "epoch": 2.2001782242384964, "grad_norm": 0.06541083753108978, "learning_rate": 9.999099869481075e-05, "loss": 0.269, "step": 27159 }, { "epoch": 2.2002592352559946, "grad_norm": 0.07427337765693665, "learning_rate": 9.998649804221613e-05, "loss": 0.2808, "step": 27160 }, { "epoch": 2.2003402462734933, "grad_norm": 0.06966466456651688, "learning_rate": 9.99819973896215e-05, "loss": 0.2675, "step": 27161 }, { "epoch": 2.2004212572909916, "grad_norm": 0.061215851455926895, "learning_rate": 9.997749673702687e-05, "loss": 0.2489, "step": 27162 }, { "epoch": 2.20050226830849, "grad_norm": 0.051639918237924576, "learning_rate": 9.997299608443225e-05, "loss": 0.2527, "step": 27163 }, { "epoch": 2.2005832793259885, "grad_norm": 0.07078103721141815, "learning_rate": 9.996849543183762e-05, "loss": 0.2507, "step": 27164 }, { "epoch": 2.2006642903434868, "grad_norm": 0.06260479241609573, "learning_rate": 9.9963994779243e-05, "loss": 0.2756, "step": 27165 }, { "epoch": 2.200745301360985, "grad_norm": 0.07474082708358765, "learning_rate": 9.995949412664837e-05, "loss": 0.267, "step": 27166 }, { "epoch": 2.2008263123784833, "grad_norm": 0.07497971504926682, "learning_rate": 9.995499347405374e-05, "loss": 0.3132, "step": 27167 }, { "epoch": 2.200907323395982, "grad_norm": 0.06425240635871887, "learning_rate": 9.995049282145911e-05, "loss": 0.2429, "step": 27168 }, { "epoch": 2.20098833441348, "grad_norm": 0.056325603276491165, "learning_rate": 9.994599216886449e-05, "loss": 0.2481, "step": 27169 }, { "epoch": 2.2010693454309784, "grad_norm": 0.05998290330171585, "learning_rate": 9.994149151626986e-05, "loss": 0.2565, "step": 27170 }, { "epoch": 2.201150356448477, "grad_norm": 0.05483241379261017, "learning_rate": 9.993699086367524e-05, "loss": 0.2498, "step": 27171 }, { "epoch": 2.2012313674659754, "grad_norm": 0.06303826719522476, "learning_rate": 9.993249021108061e-05, "loss": 0.2494, "step": 27172 }, { "epoch": 2.2013123784834736, "grad_norm": 0.06558068096637726, "learning_rate": 9.9927989558486e-05, "loss": 0.247, "step": 27173 }, { "epoch": 2.2013933895009723, "grad_norm": 0.06100596487522125, "learning_rate": 9.992348890589136e-05, "loss": 0.228, "step": 27174 }, { "epoch": 2.2014744005184705, "grad_norm": 0.06812256574630737, "learning_rate": 9.991898825329673e-05, "loss": 0.2565, "step": 27175 }, { "epoch": 2.201555411535969, "grad_norm": 0.05909280478954315, "learning_rate": 9.991448760070212e-05, "loss": 0.2197, "step": 27176 }, { "epoch": 2.2016364225534675, "grad_norm": 0.06603255122900009, "learning_rate": 9.990998694810748e-05, "loss": 0.2717, "step": 27177 }, { "epoch": 2.2017174335709657, "grad_norm": 0.06001312658190727, "learning_rate": 9.990548629551285e-05, "loss": 0.2446, "step": 27178 }, { "epoch": 2.201798444588464, "grad_norm": 0.06539706885814667, "learning_rate": 9.990098564291824e-05, "loss": 0.2538, "step": 27179 }, { "epoch": 2.201879455605962, "grad_norm": 0.06196155771613121, "learning_rate": 9.98964849903236e-05, "loss": 0.2476, "step": 27180 }, { "epoch": 2.201960466623461, "grad_norm": 0.06107961758971214, "learning_rate": 9.989198433772897e-05, "loss": 0.2705, "step": 27181 }, { "epoch": 2.202041477640959, "grad_norm": 0.05701644718647003, "learning_rate": 9.988748368513436e-05, "loss": 0.2582, "step": 27182 }, { "epoch": 2.2021224886584574, "grad_norm": 0.05959983542561531, "learning_rate": 9.988298303253972e-05, "loss": 0.2449, "step": 27183 }, { "epoch": 2.202203499675956, "grad_norm": 0.06808173656463623, "learning_rate": 9.987848237994509e-05, "loss": 0.2961, "step": 27184 }, { "epoch": 2.2022845106934543, "grad_norm": 0.06098674610257149, "learning_rate": 9.987398172735048e-05, "loss": 0.2377, "step": 27185 }, { "epoch": 2.2023655217109526, "grad_norm": 0.0937223955988884, "learning_rate": 9.986948107475584e-05, "loss": 0.2819, "step": 27186 }, { "epoch": 2.2024465327284513, "grad_norm": 0.07166877388954163, "learning_rate": 9.986498042216121e-05, "loss": 0.2414, "step": 27187 }, { "epoch": 2.2025275437459495, "grad_norm": 0.057484161108732224, "learning_rate": 9.98604797695666e-05, "loss": 0.2384, "step": 27188 }, { "epoch": 2.2026085547634477, "grad_norm": 0.08165212720632553, "learning_rate": 9.985597911697196e-05, "loss": 0.2655, "step": 27189 }, { "epoch": 2.202689565780946, "grad_norm": 0.07205326855182648, "learning_rate": 9.985147846437733e-05, "loss": 0.2405, "step": 27190 }, { "epoch": 2.2027705767984447, "grad_norm": 0.06088333949446678, "learning_rate": 9.984697781178272e-05, "loss": 0.2745, "step": 27191 }, { "epoch": 2.202851587815943, "grad_norm": 0.07322680205106735, "learning_rate": 9.984247715918808e-05, "loss": 0.2722, "step": 27192 }, { "epoch": 2.202932598833441, "grad_norm": 0.06500561535358429, "learning_rate": 9.983797650659345e-05, "loss": 0.278, "step": 27193 }, { "epoch": 2.20301360985094, "grad_norm": 0.05702653154730797, "learning_rate": 9.983347585399884e-05, "loss": 0.2436, "step": 27194 }, { "epoch": 2.203094620868438, "grad_norm": 0.06561413407325745, "learning_rate": 9.98289752014042e-05, "loss": 0.2873, "step": 27195 }, { "epoch": 2.2031756318859363, "grad_norm": 0.07284931093454361, "learning_rate": 9.982447454880958e-05, "loss": 0.2479, "step": 27196 }, { "epoch": 2.203256642903435, "grad_norm": 0.06644266098737717, "learning_rate": 9.981997389621496e-05, "loss": 0.2779, "step": 27197 }, { "epoch": 2.2033376539209333, "grad_norm": 0.06733689457178116, "learning_rate": 9.981547324362032e-05, "loss": 0.2625, "step": 27198 }, { "epoch": 2.2034186649384315, "grad_norm": 0.06203522905707359, "learning_rate": 9.981097259102571e-05, "loss": 0.2689, "step": 27199 }, { "epoch": 2.20349967595593, "grad_norm": 0.05635721981525421, "learning_rate": 9.980647193843108e-05, "loss": 0.2755, "step": 27200 }, { "epoch": 2.2035806869734285, "grad_norm": 0.05323106423020363, "learning_rate": 9.980197128583644e-05, "loss": 0.2448, "step": 27201 }, { "epoch": 2.2036616979909267, "grad_norm": 0.0679623931646347, "learning_rate": 9.979747063324183e-05, "loss": 0.2517, "step": 27202 }, { "epoch": 2.203742709008425, "grad_norm": 0.06785213947296143, "learning_rate": 9.97929699806472e-05, "loss": 0.251, "step": 27203 }, { "epoch": 2.2038237200259236, "grad_norm": 0.06036762520670891, "learning_rate": 9.978846932805256e-05, "loss": 0.2078, "step": 27204 }, { "epoch": 2.203904731043422, "grad_norm": 0.05910918489098549, "learning_rate": 9.978396867545795e-05, "loss": 0.237, "step": 27205 }, { "epoch": 2.20398574206092, "grad_norm": 0.0643678605556488, "learning_rate": 9.977946802286332e-05, "loss": 0.2414, "step": 27206 }, { "epoch": 2.204066753078419, "grad_norm": 0.07331311702728271, "learning_rate": 9.977496737026868e-05, "loss": 0.2394, "step": 27207 }, { "epoch": 2.204147764095917, "grad_norm": 0.05993986874818802, "learning_rate": 9.977046671767407e-05, "loss": 0.2783, "step": 27208 }, { "epoch": 2.2042287751134153, "grad_norm": 0.0725242868065834, "learning_rate": 9.976596606507945e-05, "loss": 0.2711, "step": 27209 }, { "epoch": 2.204309786130914, "grad_norm": 0.07055281102657318, "learning_rate": 9.97614654124848e-05, "loss": 0.2692, "step": 27210 }, { "epoch": 2.2043907971484122, "grad_norm": 0.06300518661737442, "learning_rate": 9.975696475989019e-05, "loss": 0.2196, "step": 27211 }, { "epoch": 2.2044718081659105, "grad_norm": 0.10356733202934265, "learning_rate": 9.975246410729557e-05, "loss": 0.2921, "step": 27212 }, { "epoch": 2.2045528191834087, "grad_norm": 0.06462245434522629, "learning_rate": 9.974796345470093e-05, "loss": 0.1995, "step": 27213 }, { "epoch": 2.2046338302009074, "grad_norm": 0.0734039694070816, "learning_rate": 9.974346280210631e-05, "loss": 0.2784, "step": 27214 }, { "epoch": 2.2047148412184057, "grad_norm": 0.053666383028030396, "learning_rate": 9.973896214951169e-05, "loss": 0.2901, "step": 27215 }, { "epoch": 2.204795852235904, "grad_norm": 0.06547022610902786, "learning_rate": 9.973446149691705e-05, "loss": 0.2601, "step": 27216 }, { "epoch": 2.2048768632534026, "grad_norm": 0.07122816890478134, "learning_rate": 9.972996084432243e-05, "loss": 0.2685, "step": 27217 }, { "epoch": 2.204957874270901, "grad_norm": 0.056516796350479126, "learning_rate": 9.972546019172781e-05, "loss": 0.3253, "step": 27218 }, { "epoch": 2.205038885288399, "grad_norm": 0.06432358175516129, "learning_rate": 9.972095953913317e-05, "loss": 0.2563, "step": 27219 }, { "epoch": 2.2051198963058978, "grad_norm": 0.045835137367248535, "learning_rate": 9.971645888653856e-05, "loss": 0.2529, "step": 27220 }, { "epoch": 2.205200907323396, "grad_norm": 0.0580122210085392, "learning_rate": 9.971195823394393e-05, "loss": 0.2571, "step": 27221 }, { "epoch": 2.2052819183408943, "grad_norm": 0.05672190338373184, "learning_rate": 9.970745758134929e-05, "loss": 0.2699, "step": 27222 }, { "epoch": 2.205362929358393, "grad_norm": 0.04954679682850838, "learning_rate": 9.970295692875468e-05, "loss": 0.2287, "step": 27223 }, { "epoch": 2.205443940375891, "grad_norm": 0.06200401112437248, "learning_rate": 9.969845627616005e-05, "loss": 0.2672, "step": 27224 }, { "epoch": 2.2055249513933894, "grad_norm": 0.05223114416003227, "learning_rate": 9.969395562356542e-05, "loss": 0.2534, "step": 27225 }, { "epoch": 2.2056059624108877, "grad_norm": 0.061142902821302414, "learning_rate": 9.96894549709708e-05, "loss": 0.2874, "step": 27226 }, { "epoch": 2.2056869734283864, "grad_norm": 0.05851171910762787, "learning_rate": 9.968495431837617e-05, "loss": 0.2247, "step": 27227 }, { "epoch": 2.2057679844458846, "grad_norm": 0.07332703471183777, "learning_rate": 9.968045366578154e-05, "loss": 0.2797, "step": 27228 }, { "epoch": 2.205848995463383, "grad_norm": 0.0516182966530323, "learning_rate": 9.967595301318692e-05, "loss": 0.2202, "step": 27229 }, { "epoch": 2.2059300064808816, "grad_norm": 0.0503121018409729, "learning_rate": 9.967145236059229e-05, "loss": 0.1759, "step": 27230 }, { "epoch": 2.20601101749838, "grad_norm": 0.05455978214740753, "learning_rate": 9.966695170799766e-05, "loss": 0.2481, "step": 27231 }, { "epoch": 2.206092028515878, "grad_norm": 0.06131187826395035, "learning_rate": 9.966245105540304e-05, "loss": 0.248, "step": 27232 }, { "epoch": 2.2061730395333767, "grad_norm": 0.07432816177606583, "learning_rate": 9.965795040280841e-05, "loss": 0.2314, "step": 27233 }, { "epoch": 2.206254050550875, "grad_norm": 0.060853440314531326, "learning_rate": 9.965344975021379e-05, "loss": 0.2422, "step": 27234 }, { "epoch": 2.2063350615683732, "grad_norm": 0.0634138435125351, "learning_rate": 9.964894909761916e-05, "loss": 0.2392, "step": 27235 }, { "epoch": 2.2064160725858715, "grad_norm": 0.08180546015501022, "learning_rate": 9.964444844502453e-05, "loss": 0.2537, "step": 27236 }, { "epoch": 2.20649708360337, "grad_norm": 0.059457991272211075, "learning_rate": 9.96399477924299e-05, "loss": 0.2641, "step": 27237 }, { "epoch": 2.2065780946208684, "grad_norm": 0.06653142720460892, "learning_rate": 9.963544713983528e-05, "loss": 0.2427, "step": 27238 }, { "epoch": 2.2066591056383666, "grad_norm": 0.07568779587745667, "learning_rate": 9.963094648724065e-05, "loss": 0.2923, "step": 27239 }, { "epoch": 2.2067401166558653, "grad_norm": 0.06351905316114426, "learning_rate": 9.962644583464603e-05, "loss": 0.2854, "step": 27240 }, { "epoch": 2.2068211276733636, "grad_norm": 0.0615183487534523, "learning_rate": 9.96219451820514e-05, "loss": 0.234, "step": 27241 }, { "epoch": 2.206902138690862, "grad_norm": 0.05222557857632637, "learning_rate": 9.961744452945677e-05, "loss": 0.2578, "step": 27242 }, { "epoch": 2.2069831497083605, "grad_norm": 0.05467645451426506, "learning_rate": 9.961294387686215e-05, "loss": 0.2315, "step": 27243 }, { "epoch": 2.2070641607258588, "grad_norm": 0.06070820987224579, "learning_rate": 9.960844322426752e-05, "loss": 0.2568, "step": 27244 }, { "epoch": 2.207145171743357, "grad_norm": 0.05687892436981201, "learning_rate": 9.96039425716729e-05, "loss": 0.241, "step": 27245 }, { "epoch": 2.2072261827608557, "grad_norm": 0.07000940293073654, "learning_rate": 9.959944191907827e-05, "loss": 0.2835, "step": 27246 }, { "epoch": 2.207307193778354, "grad_norm": 0.05552801117300987, "learning_rate": 9.959494126648364e-05, "loss": 0.2798, "step": 27247 }, { "epoch": 2.207388204795852, "grad_norm": 0.06713631749153137, "learning_rate": 9.959044061388902e-05, "loss": 0.2782, "step": 27248 }, { "epoch": 2.2074692158133504, "grad_norm": 0.05187486484646797, "learning_rate": 9.958593996129439e-05, "loss": 0.2324, "step": 27249 }, { "epoch": 2.207550226830849, "grad_norm": 0.07198222726583481, "learning_rate": 9.958143930869976e-05, "loss": 0.2797, "step": 27250 }, { "epoch": 2.2076312378483474, "grad_norm": 0.06188987195491791, "learning_rate": 9.957693865610515e-05, "loss": 0.2489, "step": 27251 }, { "epoch": 2.2077122488658456, "grad_norm": 0.07027272880077362, "learning_rate": 9.957243800351051e-05, "loss": 0.2898, "step": 27252 }, { "epoch": 2.2077932598833443, "grad_norm": 0.05598944053053856, "learning_rate": 9.956793735091588e-05, "loss": 0.2326, "step": 27253 }, { "epoch": 2.2078742709008425, "grad_norm": 0.06060578301548958, "learning_rate": 9.956343669832127e-05, "loss": 0.2571, "step": 27254 }, { "epoch": 2.207955281918341, "grad_norm": 0.05788060650229454, "learning_rate": 9.955893604572663e-05, "loss": 0.213, "step": 27255 }, { "epoch": 2.2080362929358395, "grad_norm": 0.07043232768774033, "learning_rate": 9.9554435393132e-05, "loss": 0.263, "step": 27256 }, { "epoch": 2.2081173039533377, "grad_norm": 0.06341779977083206, "learning_rate": 9.954993474053739e-05, "loss": 0.2358, "step": 27257 }, { "epoch": 2.208198314970836, "grad_norm": 0.054107457399368286, "learning_rate": 9.954543408794275e-05, "loss": 0.2264, "step": 27258 }, { "epoch": 2.208279325988334, "grad_norm": 0.06598135083913803, "learning_rate": 9.954093343534813e-05, "loss": 0.2559, "step": 27259 }, { "epoch": 2.208360337005833, "grad_norm": 0.0615556575357914, "learning_rate": 9.953643278275351e-05, "loss": 0.264, "step": 27260 }, { "epoch": 2.208441348023331, "grad_norm": 0.07525072991847992, "learning_rate": 9.953193213015887e-05, "loss": 0.2897, "step": 27261 }, { "epoch": 2.2085223590408294, "grad_norm": 0.0570087768137455, "learning_rate": 9.952743147756425e-05, "loss": 0.2455, "step": 27262 }, { "epoch": 2.208603370058328, "grad_norm": 0.08910997956991196, "learning_rate": 9.952293082496963e-05, "loss": 0.2998, "step": 27263 }, { "epoch": 2.2086843810758263, "grad_norm": 0.07079474627971649, "learning_rate": 9.9518430172375e-05, "loss": 0.2236, "step": 27264 }, { "epoch": 2.2087653920933246, "grad_norm": 0.06641038507223129, "learning_rate": 9.951392951978037e-05, "loss": 0.2413, "step": 27265 }, { "epoch": 2.2088464031108233, "grad_norm": 0.07263396680355072, "learning_rate": 9.950942886718575e-05, "loss": 0.2327, "step": 27266 }, { "epoch": 2.2089274141283215, "grad_norm": 0.0786370038986206, "learning_rate": 9.950492821459111e-05, "loss": 0.2585, "step": 27267 }, { "epoch": 2.2090084251458197, "grad_norm": 0.06409087777137756, "learning_rate": 9.950042756199649e-05, "loss": 0.2572, "step": 27268 }, { "epoch": 2.2090894361633184, "grad_norm": 0.07662307471036911, "learning_rate": 9.949592690940188e-05, "loss": 0.3218, "step": 27269 }, { "epoch": 2.2091704471808167, "grad_norm": 0.0556035116314888, "learning_rate": 9.949142625680724e-05, "loss": 0.2315, "step": 27270 }, { "epoch": 2.209251458198315, "grad_norm": 0.07545039057731628, "learning_rate": 9.948692560421261e-05, "loss": 0.2455, "step": 27271 }, { "epoch": 2.209332469215813, "grad_norm": 0.07402218133211136, "learning_rate": 9.9482424951618e-05, "loss": 0.2466, "step": 27272 }, { "epoch": 2.209413480233312, "grad_norm": 0.07804799824953079, "learning_rate": 9.947792429902336e-05, "loss": 0.2561, "step": 27273 }, { "epoch": 2.20949449125081, "grad_norm": 0.07206982374191284, "learning_rate": 9.947342364642873e-05, "loss": 0.268, "step": 27274 }, { "epoch": 2.2095755022683083, "grad_norm": 0.06558424234390259, "learning_rate": 9.946892299383412e-05, "loss": 0.2505, "step": 27275 }, { "epoch": 2.209656513285807, "grad_norm": 0.05041855201125145, "learning_rate": 9.946442234123948e-05, "loss": 0.2461, "step": 27276 }, { "epoch": 2.2097375243033053, "grad_norm": 0.06105949729681015, "learning_rate": 9.945992168864486e-05, "loss": 0.2645, "step": 27277 }, { "epoch": 2.2098185353208035, "grad_norm": 0.05644926801323891, "learning_rate": 9.945542103605024e-05, "loss": 0.2377, "step": 27278 }, { "epoch": 2.209899546338302, "grad_norm": 0.06584417074918747, "learning_rate": 9.94509203834556e-05, "loss": 0.2987, "step": 27279 }, { "epoch": 2.2099805573558005, "grad_norm": 0.06372004747390747, "learning_rate": 9.944641973086099e-05, "loss": 0.2823, "step": 27280 }, { "epoch": 2.2100615683732987, "grad_norm": 0.05927109718322754, "learning_rate": 9.944191907826636e-05, "loss": 0.2528, "step": 27281 }, { "epoch": 2.210142579390797, "grad_norm": 0.06508370488882065, "learning_rate": 9.943741842567172e-05, "loss": 0.2439, "step": 27282 }, { "epoch": 2.2102235904082956, "grad_norm": 0.0654415488243103, "learning_rate": 9.94329177730771e-05, "loss": 0.278, "step": 27283 }, { "epoch": 2.210304601425794, "grad_norm": 0.06012353301048279, "learning_rate": 9.942841712048248e-05, "loss": 0.2598, "step": 27284 }, { "epoch": 2.210385612443292, "grad_norm": 0.06264343857765198, "learning_rate": 9.942391646788784e-05, "loss": 0.2586, "step": 27285 }, { "epoch": 2.210466623460791, "grad_norm": 0.05635780468583107, "learning_rate": 9.941941581529323e-05, "loss": 0.2515, "step": 27286 }, { "epoch": 2.210547634478289, "grad_norm": 0.07047024369239807, "learning_rate": 9.94149151626986e-05, "loss": 0.2785, "step": 27287 }, { "epoch": 2.2106286454957873, "grad_norm": 0.07421385496854782, "learning_rate": 9.941041451010396e-05, "loss": 0.2461, "step": 27288 }, { "epoch": 2.210709656513286, "grad_norm": 0.07711216062307358, "learning_rate": 9.940591385750935e-05, "loss": 0.2494, "step": 27289 }, { "epoch": 2.2107906675307842, "grad_norm": 0.06072551757097244, "learning_rate": 9.940141320491472e-05, "loss": 0.3183, "step": 27290 }, { "epoch": 2.2108716785482825, "grad_norm": 0.08102339506149292, "learning_rate": 9.939691255232008e-05, "loss": 0.2514, "step": 27291 }, { "epoch": 2.210952689565781, "grad_norm": 0.06397506594657898, "learning_rate": 9.939241189972547e-05, "loss": 0.2289, "step": 27292 }, { "epoch": 2.2110337005832794, "grad_norm": 0.05820344015955925, "learning_rate": 9.938791124713084e-05, "loss": 0.2186, "step": 27293 }, { "epoch": 2.2111147116007777, "grad_norm": 0.06543104350566864, "learning_rate": 9.93834105945362e-05, "loss": 0.2407, "step": 27294 }, { "epoch": 2.211195722618276, "grad_norm": 0.07835323363542557, "learning_rate": 9.937890994194159e-05, "loss": 0.2444, "step": 27295 }, { "epoch": 2.2112767336357746, "grad_norm": 0.06635398417711258, "learning_rate": 9.937440928934696e-05, "loss": 0.2334, "step": 27296 }, { "epoch": 2.211357744653273, "grad_norm": 0.05356477573513985, "learning_rate": 9.936990863675232e-05, "loss": 0.2232, "step": 27297 }, { "epoch": 2.211438755670771, "grad_norm": 0.05896617844700813, "learning_rate": 9.936540798415771e-05, "loss": 0.2636, "step": 27298 }, { "epoch": 2.2115197666882698, "grad_norm": 0.05496169626712799, "learning_rate": 9.936090733156308e-05, "loss": 0.2313, "step": 27299 }, { "epoch": 2.211600777705768, "grad_norm": 0.06341741979122162, "learning_rate": 9.935640667896844e-05, "loss": 0.2541, "step": 27300 }, { "epoch": 2.2116817887232663, "grad_norm": 0.05799972638487816, "learning_rate": 9.935190602637383e-05, "loss": 0.2355, "step": 27301 }, { "epoch": 2.211762799740765, "grad_norm": 0.06351993978023529, "learning_rate": 9.93474053737792e-05, "loss": 0.2545, "step": 27302 }, { "epoch": 2.211843810758263, "grad_norm": 0.05903366208076477, "learning_rate": 9.934290472118458e-05, "loss": 0.2037, "step": 27303 }, { "epoch": 2.2119248217757614, "grad_norm": 0.07522643357515335, "learning_rate": 9.933840406858995e-05, "loss": 0.2483, "step": 27304 }, { "epoch": 2.2120058327932597, "grad_norm": 0.06297983974218369, "learning_rate": 9.933390341599533e-05, "loss": 0.2967, "step": 27305 }, { "epoch": 2.2120868438107584, "grad_norm": 0.08709455281496048, "learning_rate": 9.93294027634007e-05, "loss": 0.2715, "step": 27306 }, { "epoch": 2.2121678548282566, "grad_norm": 0.07213051617145538, "learning_rate": 9.932490211080607e-05, "loss": 0.2528, "step": 27307 }, { "epoch": 2.212248865845755, "grad_norm": 0.06085379421710968, "learning_rate": 9.932040145821145e-05, "loss": 0.2293, "step": 27308 }, { "epoch": 2.2123298768632536, "grad_norm": 0.05907389521598816, "learning_rate": 9.931590080561682e-05, "loss": 0.2358, "step": 27309 }, { "epoch": 2.212410887880752, "grad_norm": 0.0674804225564003, "learning_rate": 9.931140015302219e-05, "loss": 0.2447, "step": 27310 }, { "epoch": 2.21249189889825, "grad_norm": 0.059147339314222336, "learning_rate": 9.930689950042757e-05, "loss": 0.2597, "step": 27311 }, { "epoch": 2.2125729099157487, "grad_norm": 0.05762263014912605, "learning_rate": 9.930239884783294e-05, "loss": 0.2347, "step": 27312 }, { "epoch": 2.212653920933247, "grad_norm": 0.0638045072555542, "learning_rate": 9.929789819523831e-05, "loss": 0.2646, "step": 27313 }, { "epoch": 2.212734931950745, "grad_norm": 0.07611501216888428, "learning_rate": 9.929339754264369e-05, "loss": 0.2533, "step": 27314 }, { "epoch": 2.212815942968244, "grad_norm": 0.06777425855398178, "learning_rate": 9.928889689004906e-05, "loss": 0.2812, "step": 27315 }, { "epoch": 2.212896953985742, "grad_norm": 0.05217338353395462, "learning_rate": 9.928439623745443e-05, "loss": 0.2586, "step": 27316 }, { "epoch": 2.2129779650032404, "grad_norm": 0.06507042050361633, "learning_rate": 9.927989558485981e-05, "loss": 0.2517, "step": 27317 }, { "epoch": 2.2130589760207386, "grad_norm": 0.06591640412807465, "learning_rate": 9.927539493226518e-05, "loss": 0.2339, "step": 27318 }, { "epoch": 2.2131399870382373, "grad_norm": 0.06380046159029007, "learning_rate": 9.927089427967056e-05, "loss": 0.2303, "step": 27319 }, { "epoch": 2.2132209980557356, "grad_norm": 0.0698883980512619, "learning_rate": 9.926639362707593e-05, "loss": 0.2441, "step": 27320 }, { "epoch": 2.213302009073234, "grad_norm": 0.04417555406689644, "learning_rate": 9.92618929744813e-05, "loss": 0.2252, "step": 27321 }, { "epoch": 2.2133830200907325, "grad_norm": 0.07910863310098648, "learning_rate": 9.925739232188668e-05, "loss": 0.2467, "step": 27322 }, { "epoch": 2.2134640311082308, "grad_norm": 0.06234428659081459, "learning_rate": 9.925289166929205e-05, "loss": 0.2396, "step": 27323 }, { "epoch": 2.213545042125729, "grad_norm": 0.0615205354988575, "learning_rate": 9.924839101669742e-05, "loss": 0.2425, "step": 27324 }, { "epoch": 2.2136260531432272, "grad_norm": 0.07571331411600113, "learning_rate": 9.92438903641028e-05, "loss": 0.257, "step": 27325 }, { "epoch": 2.213707064160726, "grad_norm": 0.07765693962574005, "learning_rate": 9.923938971150817e-05, "loss": 0.2795, "step": 27326 }, { "epoch": 2.213788075178224, "grad_norm": 0.07145484536886215, "learning_rate": 9.923488905891354e-05, "loss": 0.2542, "step": 27327 }, { "epoch": 2.2138690861957224, "grad_norm": 0.07751671224832535, "learning_rate": 9.923038840631892e-05, "loss": 0.259, "step": 27328 }, { "epoch": 2.213950097213221, "grad_norm": 0.05293998867273331, "learning_rate": 9.922588775372429e-05, "loss": 0.2228, "step": 27329 }, { "epoch": 2.2140311082307194, "grad_norm": 0.06756523996591568, "learning_rate": 9.922138710112967e-05, "loss": 0.2897, "step": 27330 }, { "epoch": 2.2141121192482176, "grad_norm": 0.05320659652352333, "learning_rate": 9.921688644853504e-05, "loss": 0.2577, "step": 27331 }, { "epoch": 2.2141931302657163, "grad_norm": 0.0681634396314621, "learning_rate": 9.921238579594043e-05, "loss": 0.257, "step": 27332 }, { "epoch": 2.2142741412832145, "grad_norm": 0.06318969279527664, "learning_rate": 9.920788514334579e-05, "loss": 0.2767, "step": 27333 }, { "epoch": 2.214355152300713, "grad_norm": 0.06955531239509583, "learning_rate": 9.920338449075116e-05, "loss": 0.299, "step": 27334 }, { "epoch": 2.2144361633182115, "grad_norm": 0.0692557767033577, "learning_rate": 9.919888383815655e-05, "loss": 0.2761, "step": 27335 }, { "epoch": 2.2145171743357097, "grad_norm": 0.06259065866470337, "learning_rate": 9.919438318556191e-05, "loss": 0.2358, "step": 27336 }, { "epoch": 2.214598185353208, "grad_norm": 0.06407694518566132, "learning_rate": 9.918988253296728e-05, "loss": 0.2416, "step": 27337 }, { "epoch": 2.2146791963707066, "grad_norm": 0.06017732620239258, "learning_rate": 9.918538188037267e-05, "loss": 0.2327, "step": 27338 }, { "epoch": 2.214760207388205, "grad_norm": 0.05848823860287666, "learning_rate": 9.918088122777803e-05, "loss": 0.2397, "step": 27339 }, { "epoch": 2.214841218405703, "grad_norm": 0.0606422983109951, "learning_rate": 9.91763805751834e-05, "loss": 0.2453, "step": 27340 }, { "epoch": 2.2149222294232014, "grad_norm": 0.07600191980600357, "learning_rate": 9.917187992258879e-05, "loss": 0.2595, "step": 27341 }, { "epoch": 2.2150032404407, "grad_norm": 0.05821105092763901, "learning_rate": 9.916737926999415e-05, "loss": 0.2641, "step": 27342 }, { "epoch": 2.2150842514581983, "grad_norm": 0.04956561699509621, "learning_rate": 9.916287861739952e-05, "loss": 0.185, "step": 27343 }, { "epoch": 2.2151652624756966, "grad_norm": 0.06335799396038055, "learning_rate": 9.915837796480491e-05, "loss": 0.2326, "step": 27344 }, { "epoch": 2.2152462734931953, "grad_norm": 0.06417136639356613, "learning_rate": 9.915387731221027e-05, "loss": 0.2495, "step": 27345 }, { "epoch": 2.2153272845106935, "grad_norm": 0.08220578730106354, "learning_rate": 9.914937665961564e-05, "loss": 0.2699, "step": 27346 }, { "epoch": 2.2154082955281917, "grad_norm": 0.06456286460161209, "learning_rate": 9.914487600702103e-05, "loss": 0.2291, "step": 27347 }, { "epoch": 2.21548930654569, "grad_norm": 0.06221301853656769, "learning_rate": 9.914037535442639e-05, "loss": 0.2272, "step": 27348 }, { "epoch": 2.2155703175631887, "grad_norm": 0.05606995150446892, "learning_rate": 9.913587470183176e-05, "loss": 0.2354, "step": 27349 }, { "epoch": 2.215651328580687, "grad_norm": 0.07109422236680984, "learning_rate": 9.913137404923715e-05, "loss": 0.3113, "step": 27350 }, { "epoch": 2.215732339598185, "grad_norm": 0.06941894441843033, "learning_rate": 9.912687339664251e-05, "loss": 0.2866, "step": 27351 }, { "epoch": 2.215813350615684, "grad_norm": 0.06637705862522125, "learning_rate": 9.912237274404788e-05, "loss": 0.304, "step": 27352 }, { "epoch": 2.215894361633182, "grad_norm": 0.06498729437589645, "learning_rate": 9.911787209145327e-05, "loss": 0.3121, "step": 27353 }, { "epoch": 2.2159753726506803, "grad_norm": 0.08340992778539658, "learning_rate": 9.911337143885863e-05, "loss": 0.302, "step": 27354 }, { "epoch": 2.216056383668179, "grad_norm": 0.06651787459850311, "learning_rate": 9.9108870786264e-05, "loss": 0.2322, "step": 27355 }, { "epoch": 2.2161373946856773, "grad_norm": 0.08446913957595825, "learning_rate": 9.910437013366939e-05, "loss": 0.2637, "step": 27356 }, { "epoch": 2.2162184057031755, "grad_norm": 0.05738092213869095, "learning_rate": 9.909986948107475e-05, "loss": 0.2603, "step": 27357 }, { "epoch": 2.216299416720674, "grad_norm": 0.08143822848796844, "learning_rate": 9.909536882848014e-05, "loss": 0.2694, "step": 27358 }, { "epoch": 2.2163804277381725, "grad_norm": 0.04862968251109123, "learning_rate": 9.909086817588551e-05, "loss": 0.226, "step": 27359 }, { "epoch": 2.2164614387556707, "grad_norm": 0.05859667807817459, "learning_rate": 9.908636752329087e-05, "loss": 0.2551, "step": 27360 }, { "epoch": 2.216542449773169, "grad_norm": 0.07120684534311295, "learning_rate": 9.908186687069626e-05, "loss": 0.2648, "step": 27361 }, { "epoch": 2.2166234607906676, "grad_norm": 0.06827737390995026, "learning_rate": 9.907736621810163e-05, "loss": 0.264, "step": 27362 }, { "epoch": 2.216704471808166, "grad_norm": 0.05470007658004761, "learning_rate": 9.9072865565507e-05, "loss": 0.25, "step": 27363 }, { "epoch": 2.216785482825664, "grad_norm": 0.06258236616849899, "learning_rate": 9.906836491291238e-05, "loss": 0.274, "step": 27364 }, { "epoch": 2.216866493843163, "grad_norm": 0.06155962124466896, "learning_rate": 9.906386426031775e-05, "loss": 0.2329, "step": 27365 }, { "epoch": 2.216947504860661, "grad_norm": 0.06398579478263855, "learning_rate": 9.905936360772311e-05, "loss": 0.2401, "step": 27366 }, { "epoch": 2.2170285158781593, "grad_norm": 0.0638839453458786, "learning_rate": 9.90548629551285e-05, "loss": 0.2635, "step": 27367 }, { "epoch": 2.217109526895658, "grad_norm": 0.06554199010133743, "learning_rate": 9.905036230253388e-05, "loss": 0.2445, "step": 27368 }, { "epoch": 2.2171905379131562, "grad_norm": 0.06670667231082916, "learning_rate": 9.904586164993924e-05, "loss": 0.336, "step": 27369 }, { "epoch": 2.2172715489306545, "grad_norm": 0.06505695730447769, "learning_rate": 9.904136099734462e-05, "loss": 0.277, "step": 27370 }, { "epoch": 2.2173525599481527, "grad_norm": 0.05942712724208832, "learning_rate": 9.903686034475e-05, "loss": 0.2439, "step": 27371 }, { "epoch": 2.2174335709656514, "grad_norm": 0.05172451585531235, "learning_rate": 9.903235969215536e-05, "loss": 0.2348, "step": 27372 }, { "epoch": 2.2175145819831497, "grad_norm": 0.0634874477982521, "learning_rate": 9.902785903956074e-05, "loss": 0.2198, "step": 27373 }, { "epoch": 2.217595593000648, "grad_norm": 0.06077420711517334, "learning_rate": 9.902335838696612e-05, "loss": 0.2598, "step": 27374 }, { "epoch": 2.2176766040181466, "grad_norm": 0.06746877729892731, "learning_rate": 9.901885773437148e-05, "loss": 0.2783, "step": 27375 }, { "epoch": 2.217757615035645, "grad_norm": 0.06087268888950348, "learning_rate": 9.901435708177686e-05, "loss": 0.2618, "step": 27376 }, { "epoch": 2.217838626053143, "grad_norm": 0.07225014269351959, "learning_rate": 9.900985642918224e-05, "loss": 0.2837, "step": 27377 }, { "epoch": 2.2179196370706418, "grad_norm": 0.0711088478565216, "learning_rate": 9.90053557765876e-05, "loss": 0.2293, "step": 27378 }, { "epoch": 2.21800064808814, "grad_norm": 0.05558910593390465, "learning_rate": 9.900085512399299e-05, "loss": 0.2422, "step": 27379 }, { "epoch": 2.2180816591056383, "grad_norm": 0.09979903697967529, "learning_rate": 9.899635447139836e-05, "loss": 0.2926, "step": 27380 }, { "epoch": 2.218162670123137, "grad_norm": 0.060716476291418076, "learning_rate": 9.899185381880373e-05, "loss": 0.2442, "step": 27381 }, { "epoch": 2.218243681140635, "grad_norm": 0.06644859910011292, "learning_rate": 9.89873531662091e-05, "loss": 0.2583, "step": 27382 }, { "epoch": 2.2183246921581334, "grad_norm": 0.05265781655907631, "learning_rate": 9.898285251361448e-05, "loss": 0.2632, "step": 27383 }, { "epoch": 2.2184057031756317, "grad_norm": 0.07375629991292953, "learning_rate": 9.897835186101985e-05, "loss": 0.2509, "step": 27384 }, { "epoch": 2.2184867141931304, "grad_norm": 0.06024035066366196, "learning_rate": 9.897385120842523e-05, "loss": 0.2678, "step": 27385 }, { "epoch": 2.2185677252106286, "grad_norm": 0.06680899113416672, "learning_rate": 9.89693505558306e-05, "loss": 0.2442, "step": 27386 }, { "epoch": 2.218648736228127, "grad_norm": 0.05207567289471626, "learning_rate": 9.896484990323597e-05, "loss": 0.2211, "step": 27387 }, { "epoch": 2.2187297472456255, "grad_norm": 0.059763479977846146, "learning_rate": 9.896034925064135e-05, "loss": 0.2822, "step": 27388 }, { "epoch": 2.218810758263124, "grad_norm": 0.07277268916368484, "learning_rate": 9.895584859804672e-05, "loss": 0.2677, "step": 27389 }, { "epoch": 2.218891769280622, "grad_norm": 0.06010603532195091, "learning_rate": 9.89513479454521e-05, "loss": 0.2264, "step": 27390 }, { "epoch": 2.2189727802981207, "grad_norm": 0.06812576949596405, "learning_rate": 9.894684729285747e-05, "loss": 0.2438, "step": 27391 }, { "epoch": 2.219053791315619, "grad_norm": 0.05678996816277504, "learning_rate": 9.894234664026284e-05, "loss": 0.2705, "step": 27392 }, { "epoch": 2.219134802333117, "grad_norm": 0.06939080357551575, "learning_rate": 9.893784598766822e-05, "loss": 0.256, "step": 27393 }, { "epoch": 2.2192158133506155, "grad_norm": 0.0556022971868515, "learning_rate": 9.893334533507359e-05, "loss": 0.2282, "step": 27394 }, { "epoch": 2.219296824368114, "grad_norm": 0.07095302641391754, "learning_rate": 9.892884468247896e-05, "loss": 0.2738, "step": 27395 }, { "epoch": 2.2193778353856124, "grad_norm": 0.07031233608722687, "learning_rate": 9.892434402988434e-05, "loss": 0.279, "step": 27396 }, { "epoch": 2.2194588464031106, "grad_norm": 0.05634433776140213, "learning_rate": 9.891984337728971e-05, "loss": 0.2655, "step": 27397 }, { "epoch": 2.2195398574206093, "grad_norm": 0.056388407945632935, "learning_rate": 9.891534272469508e-05, "loss": 0.2669, "step": 27398 }, { "epoch": 2.2196208684381076, "grad_norm": 0.06369081884622574, "learning_rate": 9.891084207210046e-05, "loss": 0.2805, "step": 27399 }, { "epoch": 2.219701879455606, "grad_norm": 0.0784493088722229, "learning_rate": 9.890634141950583e-05, "loss": 0.2655, "step": 27400 }, { "epoch": 2.2197828904731045, "grad_norm": 0.052695855498313904, "learning_rate": 9.89018407669112e-05, "loss": 0.2547, "step": 27401 }, { "epoch": 2.2198639014906028, "grad_norm": 0.06510698050260544, "learning_rate": 9.889734011431658e-05, "loss": 0.2539, "step": 27402 }, { "epoch": 2.219944912508101, "grad_norm": 0.06631234288215637, "learning_rate": 9.889283946172195e-05, "loss": 0.2554, "step": 27403 }, { "epoch": 2.2200259235255997, "grad_norm": 0.06391224265098572, "learning_rate": 9.888833880912733e-05, "loss": 0.2828, "step": 27404 }, { "epoch": 2.220106934543098, "grad_norm": 0.0637073889374733, "learning_rate": 9.88838381565327e-05, "loss": 0.2219, "step": 27405 }, { "epoch": 2.220187945560596, "grad_norm": 0.05338175967335701, "learning_rate": 9.887933750393807e-05, "loss": 0.229, "step": 27406 }, { "epoch": 2.2202689565780944, "grad_norm": 0.06317490339279175, "learning_rate": 9.887483685134345e-05, "loss": 0.2624, "step": 27407 }, { "epoch": 2.220349967595593, "grad_norm": 0.06853865832090378, "learning_rate": 9.887033619874882e-05, "loss": 0.2574, "step": 27408 }, { "epoch": 2.2204309786130914, "grad_norm": 0.06886468827724457, "learning_rate": 9.88658355461542e-05, "loss": 0.2514, "step": 27409 }, { "epoch": 2.2205119896305896, "grad_norm": 0.07354399561882019, "learning_rate": 9.886133489355958e-05, "loss": 0.2396, "step": 27410 }, { "epoch": 2.2205930006480883, "grad_norm": 0.06268005073070526, "learning_rate": 9.885683424096494e-05, "loss": 0.254, "step": 27411 }, { "epoch": 2.2206740116655865, "grad_norm": 0.09697388857603073, "learning_rate": 9.885233358837031e-05, "loss": 0.2285, "step": 27412 }, { "epoch": 2.220755022683085, "grad_norm": 0.06654613465070724, "learning_rate": 9.88478329357757e-05, "loss": 0.2374, "step": 27413 }, { "epoch": 2.2208360337005835, "grad_norm": 0.07476381957530975, "learning_rate": 9.884333228318106e-05, "loss": 0.2601, "step": 27414 }, { "epoch": 2.2209170447180817, "grad_norm": 0.05062644183635712, "learning_rate": 9.883883163058644e-05, "loss": 0.2397, "step": 27415 }, { "epoch": 2.22099805573558, "grad_norm": 0.06820233911275864, "learning_rate": 9.883433097799182e-05, "loss": 0.2549, "step": 27416 }, { "epoch": 2.221079066753078, "grad_norm": 0.05485299229621887, "learning_rate": 9.882983032539718e-05, "loss": 0.2242, "step": 27417 }, { "epoch": 2.221160077770577, "grad_norm": 0.06426907330751419, "learning_rate": 9.882532967280256e-05, "loss": 0.2561, "step": 27418 }, { "epoch": 2.221241088788075, "grad_norm": 0.06557059288024902, "learning_rate": 9.882082902020794e-05, "loss": 0.2679, "step": 27419 }, { "epoch": 2.2213220998055734, "grad_norm": 0.06241839751601219, "learning_rate": 9.88163283676133e-05, "loss": 0.2643, "step": 27420 }, { "epoch": 2.221403110823072, "grad_norm": 0.06659674644470215, "learning_rate": 9.881182771501868e-05, "loss": 0.2709, "step": 27421 }, { "epoch": 2.2214841218405703, "grad_norm": 0.06463085860013962, "learning_rate": 9.880732706242406e-05, "loss": 0.2544, "step": 27422 }, { "epoch": 2.2215651328580686, "grad_norm": 0.06982456147670746, "learning_rate": 9.880282640982942e-05, "loss": 0.2625, "step": 27423 }, { "epoch": 2.2216461438755672, "grad_norm": 0.05242660269141197, "learning_rate": 9.87983257572348e-05, "loss": 0.2588, "step": 27424 }, { "epoch": 2.2217271548930655, "grad_norm": 0.06410206854343414, "learning_rate": 9.879382510464018e-05, "loss": 0.2542, "step": 27425 }, { "epoch": 2.2218081659105637, "grad_norm": 0.0681510865688324, "learning_rate": 9.878932445204554e-05, "loss": 0.2598, "step": 27426 }, { "epoch": 2.2218891769280624, "grad_norm": 0.061042800545692444, "learning_rate": 9.878482379945092e-05, "loss": 0.2472, "step": 27427 }, { "epoch": 2.2219701879455607, "grad_norm": 0.06885142624378204, "learning_rate": 9.87803231468563e-05, "loss": 0.2414, "step": 27428 }, { "epoch": 2.222051198963059, "grad_norm": 0.06637189537286758, "learning_rate": 9.877582249426167e-05, "loss": 0.2563, "step": 27429 }, { "epoch": 2.222132209980557, "grad_norm": 0.07090841978788376, "learning_rate": 9.877132184166704e-05, "loss": 0.2827, "step": 27430 }, { "epoch": 2.222213220998056, "grad_norm": 0.08006473630666733, "learning_rate": 9.876682118907243e-05, "loss": 0.2639, "step": 27431 }, { "epoch": 2.222294232015554, "grad_norm": 0.05476365610957146, "learning_rate": 9.876232053647779e-05, "loss": 0.2275, "step": 27432 }, { "epoch": 2.2223752430330523, "grad_norm": 0.07430217415094376, "learning_rate": 9.875781988388316e-05, "loss": 0.2414, "step": 27433 }, { "epoch": 2.222456254050551, "grad_norm": 0.09188614785671234, "learning_rate": 9.875331923128855e-05, "loss": 0.2447, "step": 27434 }, { "epoch": 2.2225372650680493, "grad_norm": 0.052723485976457596, "learning_rate": 9.874881857869391e-05, "loss": 0.2271, "step": 27435 }, { "epoch": 2.2226182760855475, "grad_norm": 0.07605164498090744, "learning_rate": 9.87443179260993e-05, "loss": 0.2764, "step": 27436 }, { "epoch": 2.222699287103046, "grad_norm": 0.06630595028400421, "learning_rate": 9.873981727350467e-05, "loss": 0.2323, "step": 27437 }, { "epoch": 2.2227802981205445, "grad_norm": 0.049888212233781815, "learning_rate": 9.873531662091003e-05, "loss": 0.1975, "step": 27438 }, { "epoch": 2.2228613091380427, "grad_norm": 0.049289241433143616, "learning_rate": 9.873081596831542e-05, "loss": 0.2091, "step": 27439 }, { "epoch": 2.222942320155541, "grad_norm": 0.0582793727517128, "learning_rate": 9.872631531572079e-05, "loss": 0.222, "step": 27440 }, { "epoch": 2.2230233311730396, "grad_norm": 0.06828349083662033, "learning_rate": 9.872181466312615e-05, "loss": 0.2356, "step": 27441 }, { "epoch": 2.223104342190538, "grad_norm": 0.0737132653594017, "learning_rate": 9.871731401053154e-05, "loss": 0.2409, "step": 27442 }, { "epoch": 2.223185353208036, "grad_norm": 0.07881966233253479, "learning_rate": 9.871281335793691e-05, "loss": 0.2731, "step": 27443 }, { "epoch": 2.223266364225535, "grad_norm": 0.07030628621578217, "learning_rate": 9.870831270534227e-05, "loss": 0.2873, "step": 27444 }, { "epoch": 2.223347375243033, "grad_norm": 0.06227487698197365, "learning_rate": 9.870381205274766e-05, "loss": 0.254, "step": 27445 }, { "epoch": 2.2234283862605313, "grad_norm": 0.07559426873922348, "learning_rate": 9.869931140015303e-05, "loss": 0.2863, "step": 27446 }, { "epoch": 2.22350939727803, "grad_norm": 0.058403171598911285, "learning_rate": 9.869481074755839e-05, "loss": 0.2524, "step": 27447 }, { "epoch": 2.2235904082955282, "grad_norm": 0.059054918587207794, "learning_rate": 9.869031009496378e-05, "loss": 0.2425, "step": 27448 }, { "epoch": 2.2236714193130265, "grad_norm": 0.05502532050013542, "learning_rate": 9.868580944236915e-05, "loss": 0.2413, "step": 27449 }, { "epoch": 2.223752430330525, "grad_norm": 0.058676645159721375, "learning_rate": 9.868130878977452e-05, "loss": 0.2485, "step": 27450 }, { "epoch": 2.2238334413480234, "grad_norm": 0.07493289560079575, "learning_rate": 9.86768081371799e-05, "loss": 0.2898, "step": 27451 }, { "epoch": 2.2239144523655217, "grad_norm": 0.060452915728092194, "learning_rate": 9.867230748458527e-05, "loss": 0.2362, "step": 27452 }, { "epoch": 2.22399546338302, "grad_norm": 0.058643221855163574, "learning_rate": 9.866780683199065e-05, "loss": 0.2515, "step": 27453 }, { "epoch": 2.2240764744005186, "grad_norm": 0.06971300393342972, "learning_rate": 9.866330617939602e-05, "loss": 0.2339, "step": 27454 }, { "epoch": 2.224157485418017, "grad_norm": 0.0665970891714096, "learning_rate": 9.865880552680139e-05, "loss": 0.2689, "step": 27455 }, { "epoch": 2.224238496435515, "grad_norm": 0.06566937267780304, "learning_rate": 9.865430487420677e-05, "loss": 0.2652, "step": 27456 }, { "epoch": 2.2243195074530138, "grad_norm": 0.07684820145368576, "learning_rate": 9.864980422161214e-05, "loss": 0.2953, "step": 27457 }, { "epoch": 2.224400518470512, "grad_norm": 0.06417267769575119, "learning_rate": 9.864530356901751e-05, "loss": 0.2415, "step": 27458 }, { "epoch": 2.2244815294880103, "grad_norm": 0.0706862136721611, "learning_rate": 9.864080291642289e-05, "loss": 0.3013, "step": 27459 }, { "epoch": 2.224562540505509, "grad_norm": 0.07747501134872437, "learning_rate": 9.863630226382826e-05, "loss": 0.285, "step": 27460 }, { "epoch": 2.224643551523007, "grad_norm": 0.0710633248090744, "learning_rate": 9.863180161123363e-05, "loss": 0.2647, "step": 27461 }, { "epoch": 2.2247245625405054, "grad_norm": 0.07837072759866714, "learning_rate": 9.862730095863901e-05, "loss": 0.3105, "step": 27462 }, { "epoch": 2.2248055735580037, "grad_norm": 0.07310599088668823, "learning_rate": 9.862280030604438e-05, "loss": 0.2734, "step": 27463 }, { "epoch": 2.2248865845755024, "grad_norm": 0.06395061314105988, "learning_rate": 9.861829965344976e-05, "loss": 0.242, "step": 27464 }, { "epoch": 2.2249675955930006, "grad_norm": 0.06322309374809265, "learning_rate": 9.861379900085513e-05, "loss": 0.2324, "step": 27465 }, { "epoch": 2.225048606610499, "grad_norm": 0.05373113974928856, "learning_rate": 9.86092983482605e-05, "loss": 0.2504, "step": 27466 }, { "epoch": 2.2251296176279975, "grad_norm": 0.06073353439569473, "learning_rate": 9.860479769566588e-05, "loss": 0.24, "step": 27467 }, { "epoch": 2.225210628645496, "grad_norm": 0.06355416029691696, "learning_rate": 9.860029704307125e-05, "loss": 0.2263, "step": 27468 }, { "epoch": 2.225291639662994, "grad_norm": 0.07005083560943604, "learning_rate": 9.859579639047662e-05, "loss": 0.2319, "step": 27469 }, { "epoch": 2.2253726506804927, "grad_norm": 0.06226866692304611, "learning_rate": 9.8591295737882e-05, "loss": 0.2657, "step": 27470 }, { "epoch": 2.225453661697991, "grad_norm": 0.061929695308208466, "learning_rate": 9.858679508528737e-05, "loss": 0.2895, "step": 27471 }, { "epoch": 2.225534672715489, "grad_norm": 0.047602199018001556, "learning_rate": 9.858229443269274e-05, "loss": 0.2445, "step": 27472 }, { "epoch": 2.225615683732988, "grad_norm": 0.05019146203994751, "learning_rate": 9.857779378009812e-05, "loss": 0.2185, "step": 27473 }, { "epoch": 2.225696694750486, "grad_norm": 0.07254036515951157, "learning_rate": 9.857329312750349e-05, "loss": 0.2716, "step": 27474 }, { "epoch": 2.2257777057679844, "grad_norm": 0.062929667532444, "learning_rate": 9.856879247490886e-05, "loss": 0.2515, "step": 27475 }, { "epoch": 2.2258587167854826, "grad_norm": 0.05786556378006935, "learning_rate": 9.856429182231424e-05, "loss": 0.2367, "step": 27476 }, { "epoch": 2.2259397278029813, "grad_norm": 0.05418018996715546, "learning_rate": 9.855979116971961e-05, "loss": 0.2743, "step": 27477 }, { "epoch": 2.2260207388204796, "grad_norm": 0.05989348143339157, "learning_rate": 9.855529051712499e-05, "loss": 0.2373, "step": 27478 }, { "epoch": 2.226101749837978, "grad_norm": 0.05833243206143379, "learning_rate": 9.855078986453036e-05, "loss": 0.214, "step": 27479 }, { "epoch": 2.2261827608554765, "grad_norm": 0.06846433132886887, "learning_rate": 9.854628921193573e-05, "loss": 0.2518, "step": 27480 }, { "epoch": 2.2262637718729748, "grad_norm": 0.07357719540596008, "learning_rate": 9.85417885593411e-05, "loss": 0.236, "step": 27481 }, { "epoch": 2.226344782890473, "grad_norm": 0.06799498945474625, "learning_rate": 9.853728790674648e-05, "loss": 0.2835, "step": 27482 }, { "epoch": 2.2264257939079717, "grad_norm": 0.07681022584438324, "learning_rate": 9.853278725415185e-05, "loss": 0.2576, "step": 27483 }, { "epoch": 2.22650680492547, "grad_norm": 0.09169048070907593, "learning_rate": 9.852828660155723e-05, "loss": 0.2802, "step": 27484 }, { "epoch": 2.226587815942968, "grad_norm": 0.07720378786325455, "learning_rate": 9.85237859489626e-05, "loss": 0.2744, "step": 27485 }, { "epoch": 2.2266688269604664, "grad_norm": 0.06049828231334686, "learning_rate": 9.851928529636797e-05, "loss": 0.2339, "step": 27486 }, { "epoch": 2.226749837977965, "grad_norm": 0.06331600993871689, "learning_rate": 9.851478464377335e-05, "loss": 0.2062, "step": 27487 }, { "epoch": 2.2268308489954634, "grad_norm": 0.0648995041847229, "learning_rate": 9.851028399117872e-05, "loss": 0.272, "step": 27488 }, { "epoch": 2.2269118600129616, "grad_norm": 0.05869756639003754, "learning_rate": 9.85057833385841e-05, "loss": 0.2155, "step": 27489 }, { "epoch": 2.2269928710304603, "grad_norm": 0.07214734703302383, "learning_rate": 9.850128268598947e-05, "loss": 0.259, "step": 27490 }, { "epoch": 2.2270738820479585, "grad_norm": 0.07368501275777817, "learning_rate": 9.849678203339486e-05, "loss": 0.2668, "step": 27491 }, { "epoch": 2.2271548930654568, "grad_norm": 0.057600319385528564, "learning_rate": 9.849228138080022e-05, "loss": 0.2817, "step": 27492 }, { "epoch": 2.2272359040829555, "grad_norm": 0.07728826254606247, "learning_rate": 9.848778072820559e-05, "loss": 0.2332, "step": 27493 }, { "epoch": 2.2273169151004537, "grad_norm": 0.08698121458292007, "learning_rate": 9.848328007561098e-05, "loss": 0.2613, "step": 27494 }, { "epoch": 2.227397926117952, "grad_norm": 0.06995168328285217, "learning_rate": 9.847877942301634e-05, "loss": 0.2448, "step": 27495 }, { "epoch": 2.2274789371354506, "grad_norm": 0.07552751153707504, "learning_rate": 9.847427877042171e-05, "loss": 0.2467, "step": 27496 }, { "epoch": 2.227559948152949, "grad_norm": 0.05158974602818489, "learning_rate": 9.84697781178271e-05, "loss": 0.2392, "step": 27497 }, { "epoch": 2.227640959170447, "grad_norm": 0.06887788325548172, "learning_rate": 9.846527746523246e-05, "loss": 0.2429, "step": 27498 }, { "epoch": 2.2277219701879454, "grad_norm": 0.05913383141160011, "learning_rate": 9.846077681263783e-05, "loss": 0.251, "step": 27499 }, { "epoch": 2.227802981205444, "grad_norm": 0.06455465406179428, "learning_rate": 9.845627616004322e-05, "loss": 0.2602, "step": 27500 }, { "epoch": 2.2278839922229423, "grad_norm": 0.06063240021467209, "learning_rate": 9.845177550744858e-05, "loss": 0.2513, "step": 27501 }, { "epoch": 2.2279650032404406, "grad_norm": 0.0720057561993599, "learning_rate": 9.844727485485395e-05, "loss": 0.2618, "step": 27502 }, { "epoch": 2.2280460142579392, "grad_norm": 0.06722862273454666, "learning_rate": 9.844277420225934e-05, "loss": 0.2426, "step": 27503 }, { "epoch": 2.2281270252754375, "grad_norm": 0.06625615060329437, "learning_rate": 9.84382735496647e-05, "loss": 0.2413, "step": 27504 }, { "epoch": 2.2282080362929357, "grad_norm": 0.06222971901297569, "learning_rate": 9.843377289707007e-05, "loss": 0.2936, "step": 27505 }, { "epoch": 2.2282890473104344, "grad_norm": 0.07540034502744675, "learning_rate": 9.842927224447546e-05, "loss": 0.2774, "step": 27506 }, { "epoch": 2.2283700583279327, "grad_norm": 0.05772213637828827, "learning_rate": 9.842477159188082e-05, "loss": 0.2926, "step": 27507 }, { "epoch": 2.228451069345431, "grad_norm": 0.05750125274062157, "learning_rate": 9.84202709392862e-05, "loss": 0.2172, "step": 27508 }, { "epoch": 2.228532080362929, "grad_norm": 0.06508267670869827, "learning_rate": 9.841577028669158e-05, "loss": 0.2431, "step": 27509 }, { "epoch": 2.228613091380428, "grad_norm": 0.05775724723935127, "learning_rate": 9.841126963409694e-05, "loss": 0.2197, "step": 27510 }, { "epoch": 2.228694102397926, "grad_norm": 0.06315132230520248, "learning_rate": 9.840676898150231e-05, "loss": 0.2528, "step": 27511 }, { "epoch": 2.2287751134154243, "grad_norm": 0.07274419069290161, "learning_rate": 9.84022683289077e-05, "loss": 0.2502, "step": 27512 }, { "epoch": 2.228856124432923, "grad_norm": 0.059047311544418335, "learning_rate": 9.839776767631306e-05, "loss": 0.2592, "step": 27513 }, { "epoch": 2.2289371354504213, "grad_norm": 0.07797567546367645, "learning_rate": 9.839326702371844e-05, "loss": 0.2427, "step": 27514 }, { "epoch": 2.2290181464679195, "grad_norm": 0.06294585019350052, "learning_rate": 9.838876637112382e-05, "loss": 0.3364, "step": 27515 }, { "epoch": 2.229099157485418, "grad_norm": 0.07711624354124069, "learning_rate": 9.83842657185292e-05, "loss": 0.282, "step": 27516 }, { "epoch": 2.2291801685029164, "grad_norm": 0.06658951193094254, "learning_rate": 9.837976506593457e-05, "loss": 0.2528, "step": 27517 }, { "epoch": 2.2292611795204147, "grad_norm": 0.06800119578838348, "learning_rate": 9.837526441333994e-05, "loss": 0.3019, "step": 27518 }, { "epoch": 2.2293421905379134, "grad_norm": 0.06257172673940659, "learning_rate": 9.837076376074532e-05, "loss": 0.2488, "step": 27519 }, { "epoch": 2.2294232015554116, "grad_norm": 0.06106441840529442, "learning_rate": 9.836626310815069e-05, "loss": 0.2568, "step": 27520 }, { "epoch": 2.22950421257291, "grad_norm": 0.061102159321308136, "learning_rate": 9.836176245555606e-05, "loss": 0.2469, "step": 27521 }, { "epoch": 2.229585223590408, "grad_norm": 0.07021883875131607, "learning_rate": 9.835726180296144e-05, "loss": 0.2892, "step": 27522 }, { "epoch": 2.229666234607907, "grad_norm": 0.07805996388196945, "learning_rate": 9.835276115036681e-05, "loss": 0.2457, "step": 27523 }, { "epoch": 2.229747245625405, "grad_norm": 0.05466349795460701, "learning_rate": 9.834826049777218e-05, "loss": 0.2625, "step": 27524 }, { "epoch": 2.2298282566429033, "grad_norm": 0.06738977134227753, "learning_rate": 9.834375984517756e-05, "loss": 0.2104, "step": 27525 }, { "epoch": 2.229909267660402, "grad_norm": 0.06767590343952179, "learning_rate": 9.833925919258293e-05, "loss": 0.2371, "step": 27526 }, { "epoch": 2.2299902786779002, "grad_norm": 0.06312504410743713, "learning_rate": 9.83347585399883e-05, "loss": 0.2782, "step": 27527 }, { "epoch": 2.2300712896953985, "grad_norm": 0.07312658429145813, "learning_rate": 9.833025788739368e-05, "loss": 0.2412, "step": 27528 }, { "epoch": 2.230152300712897, "grad_norm": 0.07023181021213531, "learning_rate": 9.832575723479905e-05, "loss": 0.2241, "step": 27529 }, { "epoch": 2.2302333117303954, "grad_norm": 0.06490954011678696, "learning_rate": 9.832125658220443e-05, "loss": 0.2657, "step": 27530 }, { "epoch": 2.2303143227478937, "grad_norm": 0.07052972167730331, "learning_rate": 9.83167559296098e-05, "loss": 0.2798, "step": 27531 }, { "epoch": 2.230395333765392, "grad_norm": 0.07023556530475616, "learning_rate": 9.831225527701517e-05, "loss": 0.239, "step": 27532 }, { "epoch": 2.2304763447828906, "grad_norm": 0.06125883013010025, "learning_rate": 9.830775462442055e-05, "loss": 0.2651, "step": 27533 }, { "epoch": 2.230557355800389, "grad_norm": 0.060255266726017, "learning_rate": 9.830325397182592e-05, "loss": 0.2869, "step": 27534 }, { "epoch": 2.230638366817887, "grad_norm": 0.07880748063325882, "learning_rate": 9.82987533192313e-05, "loss": 0.2452, "step": 27535 }, { "epoch": 2.2307193778353858, "grad_norm": 0.07296689599752426, "learning_rate": 9.829425266663667e-05, "loss": 0.277, "step": 27536 }, { "epoch": 2.230800388852884, "grad_norm": 0.05730430781841278, "learning_rate": 9.828975201404204e-05, "loss": 0.2232, "step": 27537 }, { "epoch": 2.2308813998703823, "grad_norm": 0.06154127046465874, "learning_rate": 9.828525136144742e-05, "loss": 0.2694, "step": 27538 }, { "epoch": 2.230962410887881, "grad_norm": 0.058270592242479324, "learning_rate": 9.828075070885279e-05, "loss": 0.2411, "step": 27539 }, { "epoch": 2.231043421905379, "grad_norm": 0.061224162578582764, "learning_rate": 9.827625005625816e-05, "loss": 0.2387, "step": 27540 }, { "epoch": 2.2311244329228774, "grad_norm": 0.05655451491475105, "learning_rate": 9.827174940366354e-05, "loss": 0.2532, "step": 27541 }, { "epoch": 2.231205443940376, "grad_norm": 0.06237894669175148, "learning_rate": 9.826724875106891e-05, "loss": 0.2996, "step": 27542 }, { "epoch": 2.2312864549578744, "grad_norm": 0.0705811008810997, "learning_rate": 9.826274809847428e-05, "loss": 0.2582, "step": 27543 }, { "epoch": 2.2313674659753726, "grad_norm": 0.06242735683917999, "learning_rate": 9.825824744587966e-05, "loss": 0.2207, "step": 27544 }, { "epoch": 2.231448476992871, "grad_norm": 0.07497147470712662, "learning_rate": 9.825374679328503e-05, "loss": 0.2633, "step": 27545 }, { "epoch": 2.2315294880103695, "grad_norm": 0.05885780230164528, "learning_rate": 9.82492461406904e-05, "loss": 0.2292, "step": 27546 }, { "epoch": 2.231610499027868, "grad_norm": 0.07651273906230927, "learning_rate": 9.824474548809578e-05, "loss": 0.2238, "step": 27547 }, { "epoch": 2.231691510045366, "grad_norm": 0.07533611357212067, "learning_rate": 9.824024483550115e-05, "loss": 0.2695, "step": 27548 }, { "epoch": 2.2317725210628647, "grad_norm": 0.06582427769899368, "learning_rate": 9.823574418290653e-05, "loss": 0.2439, "step": 27549 }, { "epoch": 2.231853532080363, "grad_norm": 0.07081213593482971, "learning_rate": 9.82312435303119e-05, "loss": 0.2457, "step": 27550 }, { "epoch": 2.231934543097861, "grad_norm": 0.054331351071596146, "learning_rate": 9.822674287771727e-05, "loss": 0.2193, "step": 27551 }, { "epoch": 2.2320155541153595, "grad_norm": 0.06326715648174286, "learning_rate": 9.822224222512265e-05, "loss": 0.276, "step": 27552 }, { "epoch": 2.232096565132858, "grad_norm": 0.0661514475941658, "learning_rate": 9.821774157252802e-05, "loss": 0.2201, "step": 27553 }, { "epoch": 2.2321775761503564, "grad_norm": 0.07344749569892883, "learning_rate": 9.821324091993339e-05, "loss": 0.2613, "step": 27554 }, { "epoch": 2.2322585871678546, "grad_norm": 0.06575877219438553, "learning_rate": 9.820874026733877e-05, "loss": 0.2533, "step": 27555 }, { "epoch": 2.2323395981853533, "grad_norm": 0.06327194720506668, "learning_rate": 9.820423961474414e-05, "loss": 0.2836, "step": 27556 }, { "epoch": 2.2324206092028516, "grad_norm": 0.07980243861675262, "learning_rate": 9.819973896214951e-05, "loss": 0.3185, "step": 27557 }, { "epoch": 2.23250162022035, "grad_norm": 0.06968654692173004, "learning_rate": 9.819523830955489e-05, "loss": 0.2597, "step": 27558 }, { "epoch": 2.2325826312378485, "grad_norm": 0.06582485139369965, "learning_rate": 9.819073765696026e-05, "loss": 0.2474, "step": 27559 }, { "epoch": 2.2326636422553467, "grad_norm": 0.059361279010772705, "learning_rate": 9.818623700436563e-05, "loss": 0.2138, "step": 27560 }, { "epoch": 2.232744653272845, "grad_norm": 0.05606989562511444, "learning_rate": 9.818173635177101e-05, "loss": 0.192, "step": 27561 }, { "epoch": 2.2328256642903437, "grad_norm": 0.06947729736566544, "learning_rate": 9.817723569917638e-05, "loss": 0.3042, "step": 27562 }, { "epoch": 2.232906675307842, "grad_norm": 0.05743652954697609, "learning_rate": 9.817273504658176e-05, "loss": 0.2408, "step": 27563 }, { "epoch": 2.23298768632534, "grad_norm": 0.06133139878511429, "learning_rate": 9.816823439398713e-05, "loss": 0.2494, "step": 27564 }, { "epoch": 2.233068697342839, "grad_norm": 0.06576412916183472, "learning_rate": 9.81637337413925e-05, "loss": 0.293, "step": 27565 }, { "epoch": 2.233149708360337, "grad_norm": 0.07262366265058517, "learning_rate": 9.815923308879788e-05, "loss": 0.2593, "step": 27566 }, { "epoch": 2.2332307193778353, "grad_norm": 0.06177317723631859, "learning_rate": 9.815473243620325e-05, "loss": 0.2956, "step": 27567 }, { "epoch": 2.2333117303953336, "grad_norm": 0.06126299500465393, "learning_rate": 9.815023178360862e-05, "loss": 0.2228, "step": 27568 }, { "epoch": 2.2333927414128323, "grad_norm": 0.05007245019078255, "learning_rate": 9.814573113101401e-05, "loss": 0.2662, "step": 27569 }, { "epoch": 2.2334737524303305, "grad_norm": 0.07485800236463547, "learning_rate": 9.814123047841937e-05, "loss": 0.2344, "step": 27570 }, { "epoch": 2.2335547634478288, "grad_norm": 0.057362813502550125, "learning_rate": 9.813672982582474e-05, "loss": 0.2246, "step": 27571 }, { "epoch": 2.2336357744653275, "grad_norm": 0.05725299194455147, "learning_rate": 9.813222917323013e-05, "loss": 0.2566, "step": 27572 }, { "epoch": 2.2337167854828257, "grad_norm": 0.06267392635345459, "learning_rate": 9.812772852063549e-05, "loss": 0.2466, "step": 27573 }, { "epoch": 2.233797796500324, "grad_norm": 0.07378648221492767, "learning_rate": 9.812322786804087e-05, "loss": 0.2414, "step": 27574 }, { "epoch": 2.233878807517822, "grad_norm": 0.07484953105449677, "learning_rate": 9.811872721544625e-05, "loss": 0.2581, "step": 27575 }, { "epoch": 2.233959818535321, "grad_norm": 0.07462868839502335, "learning_rate": 9.811422656285161e-05, "loss": 0.2483, "step": 27576 }, { "epoch": 2.234040829552819, "grad_norm": 0.056544914841651917, "learning_rate": 9.810972591025699e-05, "loss": 0.298, "step": 27577 }, { "epoch": 2.2341218405703174, "grad_norm": 0.05818667635321617, "learning_rate": 9.810522525766237e-05, "loss": 0.2323, "step": 27578 }, { "epoch": 2.234202851587816, "grad_norm": 0.06373794376850128, "learning_rate": 9.810072460506773e-05, "loss": 0.2654, "step": 27579 }, { "epoch": 2.2342838626053143, "grad_norm": 0.09751275926828384, "learning_rate": 9.80962239524731e-05, "loss": 0.2999, "step": 27580 }, { "epoch": 2.2343648736228126, "grad_norm": 0.059716999530792236, "learning_rate": 9.80917232998785e-05, "loss": 0.2333, "step": 27581 }, { "epoch": 2.2344458846403112, "grad_norm": 0.049951471388339996, "learning_rate": 9.808722264728385e-05, "loss": 0.221, "step": 27582 }, { "epoch": 2.2345268956578095, "grad_norm": 0.07606692612171173, "learning_rate": 9.808272199468923e-05, "loss": 0.2508, "step": 27583 }, { "epoch": 2.2346079066753077, "grad_norm": 0.053282156586647034, "learning_rate": 9.807822134209461e-05, "loss": 0.2406, "step": 27584 }, { "epoch": 2.2346889176928064, "grad_norm": 0.05666787177324295, "learning_rate": 9.807372068949999e-05, "loss": 0.235, "step": 27585 }, { "epoch": 2.2347699287103047, "grad_norm": 0.06908533722162247, "learning_rate": 9.806922003690535e-05, "loss": 0.3004, "step": 27586 }, { "epoch": 2.234850939727803, "grad_norm": 0.05455467104911804, "learning_rate": 9.806471938431074e-05, "loss": 0.2432, "step": 27587 }, { "epoch": 2.234931950745301, "grad_norm": 0.061505746096372604, "learning_rate": 9.806021873171611e-05, "loss": 0.2074, "step": 27588 }, { "epoch": 2.2350129617628, "grad_norm": 0.07434146851301193, "learning_rate": 9.805571807912147e-05, "loss": 0.2753, "step": 27589 }, { "epoch": 2.235093972780298, "grad_norm": 0.06101938709616661, "learning_rate": 9.805121742652686e-05, "loss": 0.2577, "step": 27590 }, { "epoch": 2.2351749837977963, "grad_norm": 0.05936296284198761, "learning_rate": 9.804671677393223e-05, "loss": 0.2461, "step": 27591 }, { "epoch": 2.235255994815295, "grad_norm": 0.060692984610795975, "learning_rate": 9.804221612133759e-05, "loss": 0.2119, "step": 27592 }, { "epoch": 2.2353370058327933, "grad_norm": 0.0680263414978981, "learning_rate": 9.803771546874298e-05, "loss": 0.2368, "step": 27593 }, { "epoch": 2.2354180168502915, "grad_norm": 0.06867021322250366, "learning_rate": 9.803321481614835e-05, "loss": 0.2842, "step": 27594 }, { "epoch": 2.23549902786779, "grad_norm": 0.06476103514432907, "learning_rate": 9.802871416355372e-05, "loss": 0.2524, "step": 27595 }, { "epoch": 2.2355800388852884, "grad_norm": 0.0553499199450016, "learning_rate": 9.80242135109591e-05, "loss": 0.2183, "step": 27596 }, { "epoch": 2.2356610499027867, "grad_norm": 0.08324495702981949, "learning_rate": 9.801971285836447e-05, "loss": 0.2827, "step": 27597 }, { "epoch": 2.235742060920285, "grad_norm": 0.06461543589830399, "learning_rate": 9.801521220576985e-05, "loss": 0.2366, "step": 27598 }, { "epoch": 2.2358230719377836, "grad_norm": 0.06602394580841064, "learning_rate": 9.801071155317522e-05, "loss": 0.2393, "step": 27599 }, { "epoch": 2.235904082955282, "grad_norm": 0.06189282611012459, "learning_rate": 9.800621090058059e-05, "loss": 0.2199, "step": 27600 }, { "epoch": 2.23598509397278, "grad_norm": 0.06203988194465637, "learning_rate": 9.800171024798597e-05, "loss": 0.2333, "step": 27601 }, { "epoch": 2.236066104990279, "grad_norm": 0.05741807445883751, "learning_rate": 9.799720959539134e-05, "loss": 0.2591, "step": 27602 }, { "epoch": 2.236147116007777, "grad_norm": 0.06251756101846695, "learning_rate": 9.799270894279671e-05, "loss": 0.2568, "step": 27603 }, { "epoch": 2.2362281270252753, "grad_norm": 0.06924989074468613, "learning_rate": 9.798820829020209e-05, "loss": 0.2319, "step": 27604 }, { "epoch": 2.236309138042774, "grad_norm": 0.05958033725619316, "learning_rate": 9.798370763760746e-05, "loss": 0.285, "step": 27605 }, { "epoch": 2.2363901490602722, "grad_norm": 0.07292255014181137, "learning_rate": 9.797920698501283e-05, "loss": 0.2757, "step": 27606 }, { "epoch": 2.2364711600777705, "grad_norm": 0.06331977993249893, "learning_rate": 9.797470633241821e-05, "loss": 0.2391, "step": 27607 }, { "epoch": 2.236552171095269, "grad_norm": 0.06359085440635681, "learning_rate": 9.797020567982358e-05, "loss": 0.2788, "step": 27608 }, { "epoch": 2.2366331821127674, "grad_norm": 0.04755968600511551, "learning_rate": 9.796570502722895e-05, "loss": 0.2137, "step": 27609 }, { "epoch": 2.2367141931302656, "grad_norm": 0.068677619099617, "learning_rate": 9.796120437463433e-05, "loss": 0.2455, "step": 27610 }, { "epoch": 2.236795204147764, "grad_norm": 0.05319945141673088, "learning_rate": 9.79567037220397e-05, "loss": 0.23, "step": 27611 }, { "epoch": 2.2368762151652626, "grad_norm": 0.07198475301265717, "learning_rate": 9.795220306944508e-05, "loss": 0.323, "step": 27612 }, { "epoch": 2.236957226182761, "grad_norm": 0.07129145413637161, "learning_rate": 9.794770241685045e-05, "loss": 0.2547, "step": 27613 }, { "epoch": 2.237038237200259, "grad_norm": 0.08250509947538376, "learning_rate": 9.794320176425582e-05, "loss": 0.2533, "step": 27614 }, { "epoch": 2.2371192482177578, "grad_norm": 0.06255879998207092, "learning_rate": 9.79387011116612e-05, "loss": 0.2776, "step": 27615 }, { "epoch": 2.237200259235256, "grad_norm": 0.056715380400419235, "learning_rate": 9.793420045906657e-05, "loss": 0.2633, "step": 27616 }, { "epoch": 2.2372812702527543, "grad_norm": 0.08910910040140152, "learning_rate": 9.792969980647194e-05, "loss": 0.2812, "step": 27617 }, { "epoch": 2.237362281270253, "grad_norm": 0.05875033885240555, "learning_rate": 9.792519915387732e-05, "loss": 0.2527, "step": 27618 }, { "epoch": 2.237443292287751, "grad_norm": 0.054330844432115555, "learning_rate": 9.792069850128269e-05, "loss": 0.234, "step": 27619 }, { "epoch": 2.2375243033052494, "grad_norm": 0.06929908692836761, "learning_rate": 9.791619784868806e-05, "loss": 0.2287, "step": 27620 }, { "epoch": 2.2376053143227477, "grad_norm": 0.05904196947813034, "learning_rate": 9.791169719609344e-05, "loss": 0.2536, "step": 27621 }, { "epoch": 2.2376863253402464, "grad_norm": 0.06485608965158463, "learning_rate": 9.790719654349881e-05, "loss": 0.2793, "step": 27622 }, { "epoch": 2.2377673363577446, "grad_norm": 0.05853230133652687, "learning_rate": 9.790269589090419e-05, "loss": 0.2733, "step": 27623 }, { "epoch": 2.237848347375243, "grad_norm": 0.05344027280807495, "learning_rate": 9.789819523830956e-05, "loss": 0.2305, "step": 27624 }, { "epoch": 2.2379293583927415, "grad_norm": 0.057308122515678406, "learning_rate": 9.789369458571493e-05, "loss": 0.216, "step": 27625 }, { "epoch": 2.23801036941024, "grad_norm": 0.08146602660417557, "learning_rate": 9.78891939331203e-05, "loss": 0.2624, "step": 27626 }, { "epoch": 2.238091380427738, "grad_norm": 0.07540880888700485, "learning_rate": 9.788469328052568e-05, "loss": 0.2634, "step": 27627 }, { "epoch": 2.2381723914452367, "grad_norm": 0.07709179073572159, "learning_rate": 9.788019262793105e-05, "loss": 0.2665, "step": 27628 }, { "epoch": 2.238253402462735, "grad_norm": 0.07061771303415298, "learning_rate": 9.787569197533643e-05, "loss": 0.2396, "step": 27629 }, { "epoch": 2.238334413480233, "grad_norm": 0.06282792240381241, "learning_rate": 9.78711913227418e-05, "loss": 0.2519, "step": 27630 }, { "epoch": 2.238415424497732, "grad_norm": 0.06897687166929245, "learning_rate": 9.786669067014717e-05, "loss": 0.3148, "step": 27631 }, { "epoch": 2.23849643551523, "grad_norm": 0.06308693438768387, "learning_rate": 9.786219001755255e-05, "loss": 0.2562, "step": 27632 }, { "epoch": 2.2385774465327284, "grad_norm": 0.053178559988737106, "learning_rate": 9.785768936495792e-05, "loss": 0.2282, "step": 27633 }, { "epoch": 2.2386584575502266, "grad_norm": 0.06457486748695374, "learning_rate": 9.78531887123633e-05, "loss": 0.2539, "step": 27634 }, { "epoch": 2.2387394685677253, "grad_norm": 0.06926770508289337, "learning_rate": 9.784868805976867e-05, "loss": 0.3016, "step": 27635 }, { "epoch": 2.2388204795852236, "grad_norm": 0.07494411617517471, "learning_rate": 9.784418740717404e-05, "loss": 0.2625, "step": 27636 }, { "epoch": 2.238901490602722, "grad_norm": 0.05255121737718582, "learning_rate": 9.783968675457942e-05, "loss": 0.2445, "step": 27637 }, { "epoch": 2.2389825016202205, "grad_norm": 0.058950025588274, "learning_rate": 9.783518610198479e-05, "loss": 0.2513, "step": 27638 }, { "epoch": 2.2390635126377187, "grad_norm": 0.06239871680736542, "learning_rate": 9.783068544939016e-05, "loss": 0.2302, "step": 27639 }, { "epoch": 2.239144523655217, "grad_norm": 0.059939995408058167, "learning_rate": 9.782618479679554e-05, "loss": 0.2669, "step": 27640 }, { "epoch": 2.2392255346727157, "grad_norm": 0.06433939933776855, "learning_rate": 9.782168414420091e-05, "loss": 0.2489, "step": 27641 }, { "epoch": 2.239306545690214, "grad_norm": 0.060069452971220016, "learning_rate": 9.781718349160628e-05, "loss": 0.2463, "step": 27642 }, { "epoch": 2.239387556707712, "grad_norm": 0.07081073522567749, "learning_rate": 9.781268283901166e-05, "loss": 0.2563, "step": 27643 }, { "epoch": 2.2394685677252104, "grad_norm": 0.07024306058883667, "learning_rate": 9.780818218641703e-05, "loss": 0.2692, "step": 27644 }, { "epoch": 2.239549578742709, "grad_norm": 0.06197810545563698, "learning_rate": 9.78036815338224e-05, "loss": 0.2678, "step": 27645 }, { "epoch": 2.2396305897602073, "grad_norm": 0.05187183618545532, "learning_rate": 9.779918088122778e-05, "loss": 0.2451, "step": 27646 }, { "epoch": 2.2397116007777056, "grad_norm": 0.06668131798505783, "learning_rate": 9.779468022863317e-05, "loss": 0.2693, "step": 27647 }, { "epoch": 2.2397926117952043, "grad_norm": 0.07856709510087967, "learning_rate": 9.779017957603853e-05, "loss": 0.2779, "step": 27648 }, { "epoch": 2.2398736228127025, "grad_norm": 0.07700806856155396, "learning_rate": 9.77856789234439e-05, "loss": 0.2508, "step": 27649 }, { "epoch": 2.2399546338302008, "grad_norm": 0.06017092615365982, "learning_rate": 9.778117827084929e-05, "loss": 0.2599, "step": 27650 }, { "epoch": 2.2400356448476995, "grad_norm": 0.06719563156366348, "learning_rate": 9.777667761825465e-05, "loss": 0.2658, "step": 27651 }, { "epoch": 2.2401166558651977, "grad_norm": 0.061362117528915405, "learning_rate": 9.777217696566002e-05, "loss": 0.2284, "step": 27652 }, { "epoch": 2.240197666882696, "grad_norm": 0.06896430253982544, "learning_rate": 9.776767631306541e-05, "loss": 0.2376, "step": 27653 }, { "epoch": 2.2402786779001946, "grad_norm": 0.06479042023420334, "learning_rate": 9.776317566047078e-05, "loss": 0.2307, "step": 27654 }, { "epoch": 2.240359688917693, "grad_norm": 0.06881794333457947, "learning_rate": 9.775867500787614e-05, "loss": 0.2324, "step": 27655 }, { "epoch": 2.240440699935191, "grad_norm": 0.06108381599187851, "learning_rate": 9.775417435528153e-05, "loss": 0.2415, "step": 27656 }, { "epoch": 2.2405217109526894, "grad_norm": 0.06953638792037964, "learning_rate": 9.77496737026869e-05, "loss": 0.2839, "step": 27657 }, { "epoch": 2.240602721970188, "grad_norm": 0.06614869832992554, "learning_rate": 9.774517305009226e-05, "loss": 0.2442, "step": 27658 }, { "epoch": 2.2406837329876863, "grad_norm": 0.1045251414179802, "learning_rate": 9.774067239749765e-05, "loss": 0.2695, "step": 27659 }, { "epoch": 2.2407647440051845, "grad_norm": 0.0695669949054718, "learning_rate": 9.773617174490302e-05, "loss": 0.2693, "step": 27660 }, { "epoch": 2.2408457550226832, "grad_norm": 0.06217452138662338, "learning_rate": 9.773167109230838e-05, "loss": 0.2216, "step": 27661 }, { "epoch": 2.2409267660401815, "grad_norm": 0.05973656475543976, "learning_rate": 9.772717043971377e-05, "loss": 0.2175, "step": 27662 }, { "epoch": 2.2410077770576797, "grad_norm": 0.06636808067560196, "learning_rate": 9.772266978711914e-05, "loss": 0.267, "step": 27663 }, { "epoch": 2.2410887880751784, "grad_norm": 0.07036703824996948, "learning_rate": 9.77181691345245e-05, "loss": 0.2256, "step": 27664 }, { "epoch": 2.2411697990926767, "grad_norm": 0.06427907943725586, "learning_rate": 9.771366848192989e-05, "loss": 0.2263, "step": 27665 }, { "epoch": 2.241250810110175, "grad_norm": 0.06415683031082153, "learning_rate": 9.770916782933526e-05, "loss": 0.2586, "step": 27666 }, { "epoch": 2.241331821127673, "grad_norm": 0.05050278455018997, "learning_rate": 9.770466717674062e-05, "loss": 0.208, "step": 27667 }, { "epoch": 2.241412832145172, "grad_norm": 0.07526952028274536, "learning_rate": 9.770016652414601e-05, "loss": 0.2852, "step": 27668 }, { "epoch": 2.24149384316267, "grad_norm": 0.051428694278001785, "learning_rate": 9.769566587155138e-05, "loss": 0.2508, "step": 27669 }, { "epoch": 2.2415748541801683, "grad_norm": 0.06677006930112839, "learning_rate": 9.769116521895674e-05, "loss": 0.2466, "step": 27670 }, { "epoch": 2.241655865197667, "grad_norm": 0.06048748269677162, "learning_rate": 9.768666456636213e-05, "loss": 0.2282, "step": 27671 }, { "epoch": 2.2417368762151653, "grad_norm": 0.06114950403571129, "learning_rate": 9.76821639137675e-05, "loss": 0.2255, "step": 27672 }, { "epoch": 2.2418178872326635, "grad_norm": 0.07318947464227676, "learning_rate": 9.767766326117287e-05, "loss": 0.2637, "step": 27673 }, { "epoch": 2.241898898250162, "grad_norm": 0.06158239394426346, "learning_rate": 9.767316260857825e-05, "loss": 0.2701, "step": 27674 }, { "epoch": 2.2419799092676604, "grad_norm": 0.058391667902469635, "learning_rate": 9.766866195598363e-05, "loss": 0.2541, "step": 27675 }, { "epoch": 2.2420609202851587, "grad_norm": 0.06646616011857986, "learning_rate": 9.7664161303389e-05, "loss": 0.2838, "step": 27676 }, { "epoch": 2.2421419313026574, "grad_norm": 0.06191658973693848, "learning_rate": 9.765966065079437e-05, "loss": 0.2369, "step": 27677 }, { "epoch": 2.2422229423201556, "grad_norm": 0.07929068803787231, "learning_rate": 9.765515999819975e-05, "loss": 0.2425, "step": 27678 }, { "epoch": 2.242303953337654, "grad_norm": 0.06208163872361183, "learning_rate": 9.765065934560512e-05, "loss": 0.2378, "step": 27679 }, { "epoch": 2.242384964355152, "grad_norm": 0.09012886881828308, "learning_rate": 9.76461586930105e-05, "loss": 0.2697, "step": 27680 }, { "epoch": 2.242465975372651, "grad_norm": 0.07423432171344757, "learning_rate": 9.764165804041587e-05, "loss": 0.2782, "step": 27681 }, { "epoch": 2.242546986390149, "grad_norm": 0.06949017196893692, "learning_rate": 9.763715738782124e-05, "loss": 0.2715, "step": 27682 }, { "epoch": 2.2426279974076473, "grad_norm": 0.0629178062081337, "learning_rate": 9.763265673522662e-05, "loss": 0.2575, "step": 27683 }, { "epoch": 2.242709008425146, "grad_norm": 0.0800800770521164, "learning_rate": 9.762815608263199e-05, "loss": 0.2946, "step": 27684 }, { "epoch": 2.2427900194426442, "grad_norm": 0.0748923197388649, "learning_rate": 9.762365543003736e-05, "loss": 0.2647, "step": 27685 }, { "epoch": 2.2428710304601425, "grad_norm": 0.06776215136051178, "learning_rate": 9.761915477744274e-05, "loss": 0.2396, "step": 27686 }, { "epoch": 2.242952041477641, "grad_norm": 0.06588950008153915, "learning_rate": 9.761465412484811e-05, "loss": 0.2286, "step": 27687 }, { "epoch": 2.2430330524951394, "grad_norm": 0.06949547678232193, "learning_rate": 9.761015347225348e-05, "loss": 0.2577, "step": 27688 }, { "epoch": 2.2431140635126376, "grad_norm": 0.058526843786239624, "learning_rate": 9.760565281965886e-05, "loss": 0.2679, "step": 27689 }, { "epoch": 2.243195074530136, "grad_norm": 0.06525886803865433, "learning_rate": 9.760115216706423e-05, "loss": 0.2511, "step": 27690 }, { "epoch": 2.2432760855476346, "grad_norm": 0.06647324562072754, "learning_rate": 9.75966515144696e-05, "loss": 0.2755, "step": 27691 }, { "epoch": 2.243357096565133, "grad_norm": 0.06325287371873856, "learning_rate": 9.759215086187498e-05, "loss": 0.2338, "step": 27692 }, { "epoch": 2.243438107582631, "grad_norm": 0.09324182569980621, "learning_rate": 9.758765020928035e-05, "loss": 0.2508, "step": 27693 }, { "epoch": 2.2435191186001298, "grad_norm": 0.06293612718582153, "learning_rate": 9.758314955668572e-05, "loss": 0.2608, "step": 27694 }, { "epoch": 2.243600129617628, "grad_norm": 0.06111268326640129, "learning_rate": 9.75786489040911e-05, "loss": 0.244, "step": 27695 }, { "epoch": 2.2436811406351262, "grad_norm": 0.05974910780787468, "learning_rate": 9.757414825149647e-05, "loss": 0.2458, "step": 27696 }, { "epoch": 2.243762151652625, "grad_norm": 0.07850635796785355, "learning_rate": 9.756964759890185e-05, "loss": 0.2657, "step": 27697 }, { "epoch": 2.243843162670123, "grad_norm": 0.06100136414170265, "learning_rate": 9.756514694630722e-05, "loss": 0.2265, "step": 27698 }, { "epoch": 2.2439241736876214, "grad_norm": 0.07412116229534149, "learning_rate": 9.756064629371259e-05, "loss": 0.3002, "step": 27699 }, { "epoch": 2.24400518470512, "grad_norm": 0.06798642128705978, "learning_rate": 9.755614564111797e-05, "loss": 0.2751, "step": 27700 }, { "epoch": 2.2440861957226184, "grad_norm": 0.07323481142520905, "learning_rate": 9.755164498852334e-05, "loss": 0.2393, "step": 27701 }, { "epoch": 2.2441672067401166, "grad_norm": 0.07551179081201553, "learning_rate": 9.754714433592871e-05, "loss": 0.2398, "step": 27702 }, { "epoch": 2.244248217757615, "grad_norm": 0.06301606446504593, "learning_rate": 9.754264368333409e-05, "loss": 0.2451, "step": 27703 }, { "epoch": 2.2443292287751135, "grad_norm": 0.060894306749105453, "learning_rate": 9.753814303073946e-05, "loss": 0.2531, "step": 27704 }, { "epoch": 2.244410239792612, "grad_norm": 0.06674228608608246, "learning_rate": 9.753364237814483e-05, "loss": 0.2386, "step": 27705 }, { "epoch": 2.24449125081011, "grad_norm": 0.06235579773783684, "learning_rate": 9.752914172555021e-05, "loss": 0.243, "step": 27706 }, { "epoch": 2.2445722618276087, "grad_norm": 0.07511220872402191, "learning_rate": 9.752464107295558e-05, "loss": 0.2352, "step": 27707 }, { "epoch": 2.244653272845107, "grad_norm": 0.07888006418943405, "learning_rate": 9.752014042036096e-05, "loss": 0.2753, "step": 27708 }, { "epoch": 2.244734283862605, "grad_norm": 0.0688016340136528, "learning_rate": 9.751563976776633e-05, "loss": 0.2296, "step": 27709 }, { "epoch": 2.244815294880104, "grad_norm": 0.07113447785377502, "learning_rate": 9.75111391151717e-05, "loss": 0.2865, "step": 27710 }, { "epoch": 2.244896305897602, "grad_norm": 0.06750037521123886, "learning_rate": 9.750663846257708e-05, "loss": 0.2499, "step": 27711 }, { "epoch": 2.2449773169151004, "grad_norm": 0.050355907529592514, "learning_rate": 9.750213780998245e-05, "loss": 0.2083, "step": 27712 }, { "epoch": 2.2450583279325986, "grad_norm": 0.06752597540616989, "learning_rate": 9.749763715738782e-05, "loss": 0.286, "step": 27713 }, { "epoch": 2.2451393389500973, "grad_norm": 0.05738743022084236, "learning_rate": 9.74931365047932e-05, "loss": 0.267, "step": 27714 }, { "epoch": 2.2452203499675956, "grad_norm": 0.08378364890813828, "learning_rate": 9.748863585219857e-05, "loss": 0.263, "step": 27715 }, { "epoch": 2.245301360985094, "grad_norm": 0.06476253271102905, "learning_rate": 9.748413519960394e-05, "loss": 0.2629, "step": 27716 }, { "epoch": 2.2453823720025925, "grad_norm": 0.06573726236820221, "learning_rate": 9.747963454700932e-05, "loss": 0.2835, "step": 27717 }, { "epoch": 2.2454633830200907, "grad_norm": 0.05313044786453247, "learning_rate": 9.747513389441469e-05, "loss": 0.2232, "step": 27718 }, { "epoch": 2.245544394037589, "grad_norm": 0.06361450999975204, "learning_rate": 9.747063324182006e-05, "loss": 0.2716, "step": 27719 }, { "epoch": 2.2456254050550877, "grad_norm": 0.06137660890817642, "learning_rate": 9.746613258922544e-05, "loss": 0.2451, "step": 27720 }, { "epoch": 2.245706416072586, "grad_norm": 0.060190510004758835, "learning_rate": 9.746163193663081e-05, "loss": 0.2378, "step": 27721 }, { "epoch": 2.245787427090084, "grad_norm": 0.05988527089357376, "learning_rate": 9.745713128403619e-05, "loss": 0.2593, "step": 27722 }, { "epoch": 2.245868438107583, "grad_norm": 0.06177292391657829, "learning_rate": 9.745263063144157e-05, "loss": 0.2269, "step": 27723 }, { "epoch": 2.245949449125081, "grad_norm": 0.05540267378091812, "learning_rate": 9.744812997884693e-05, "loss": 0.2415, "step": 27724 }, { "epoch": 2.2460304601425793, "grad_norm": 0.07458721846342087, "learning_rate": 9.74436293262523e-05, "loss": 0.228, "step": 27725 }, { "epoch": 2.2461114711600776, "grad_norm": 0.0722312182188034, "learning_rate": 9.74391286736577e-05, "loss": 0.2973, "step": 27726 }, { "epoch": 2.2461924821775763, "grad_norm": 0.06115707382559776, "learning_rate": 9.743462802106305e-05, "loss": 0.2307, "step": 27727 }, { "epoch": 2.2462734931950745, "grad_norm": 0.06007055193185806, "learning_rate": 9.743012736846844e-05, "loss": 0.2421, "step": 27728 }, { "epoch": 2.2463545042125728, "grad_norm": 0.0731889083981514, "learning_rate": 9.742562671587381e-05, "loss": 0.2972, "step": 27729 }, { "epoch": 2.2464355152300715, "grad_norm": 0.0577794648706913, "learning_rate": 9.742112606327917e-05, "loss": 0.2105, "step": 27730 }, { "epoch": 2.2465165262475697, "grad_norm": 0.06230419501662254, "learning_rate": 9.741662541068456e-05, "loss": 0.2752, "step": 27731 }, { "epoch": 2.246597537265068, "grad_norm": 0.07523570954799652, "learning_rate": 9.741212475808994e-05, "loss": 0.2258, "step": 27732 }, { "epoch": 2.2466785482825666, "grad_norm": 0.07175968587398529, "learning_rate": 9.74076241054953e-05, "loss": 0.275, "step": 27733 }, { "epoch": 2.246759559300065, "grad_norm": 0.07299905270338058, "learning_rate": 9.740312345290068e-05, "loss": 0.2665, "step": 27734 }, { "epoch": 2.246840570317563, "grad_norm": 0.0867857038974762, "learning_rate": 9.739862280030606e-05, "loss": 0.2812, "step": 27735 }, { "epoch": 2.2469215813350614, "grad_norm": 0.06806836277246475, "learning_rate": 9.739412214771142e-05, "loss": 0.2524, "step": 27736 }, { "epoch": 2.24700259235256, "grad_norm": 0.07593537122011185, "learning_rate": 9.73896214951168e-05, "loss": 0.2734, "step": 27737 }, { "epoch": 2.2470836033700583, "grad_norm": 0.07448563724756241, "learning_rate": 9.738512084252218e-05, "loss": 0.2473, "step": 27738 }, { "epoch": 2.2471646143875565, "grad_norm": 0.06869513541460037, "learning_rate": 9.738062018992754e-05, "loss": 0.2335, "step": 27739 }, { "epoch": 2.2472456254050552, "grad_norm": 0.05957813560962677, "learning_rate": 9.737611953733292e-05, "loss": 0.2562, "step": 27740 }, { "epoch": 2.2473266364225535, "grad_norm": 0.06571602821350098, "learning_rate": 9.73716188847383e-05, "loss": 0.2691, "step": 27741 }, { "epoch": 2.2474076474400517, "grad_norm": 0.06896911561489105, "learning_rate": 9.736711823214366e-05, "loss": 0.2493, "step": 27742 }, { "epoch": 2.2474886584575504, "grad_norm": 0.0788666233420372, "learning_rate": 9.736261757954904e-05, "loss": 0.2619, "step": 27743 }, { "epoch": 2.2475696694750487, "grad_norm": 0.08098050951957703, "learning_rate": 9.735811692695442e-05, "loss": 0.2563, "step": 27744 }, { "epoch": 2.247650680492547, "grad_norm": 0.061166420578956604, "learning_rate": 9.735361627435978e-05, "loss": 0.2636, "step": 27745 }, { "epoch": 2.2477316915100456, "grad_norm": 0.07056838274002075, "learning_rate": 9.734911562176517e-05, "loss": 0.2773, "step": 27746 }, { "epoch": 2.247812702527544, "grad_norm": 0.06341052800416946, "learning_rate": 9.734461496917054e-05, "loss": 0.2722, "step": 27747 }, { "epoch": 2.247893713545042, "grad_norm": 0.08864749222993851, "learning_rate": 9.73401143165759e-05, "loss": 0.2749, "step": 27748 }, { "epoch": 2.2479747245625403, "grad_norm": 0.06531281769275665, "learning_rate": 9.733561366398129e-05, "loss": 0.2412, "step": 27749 }, { "epoch": 2.248055735580039, "grad_norm": 0.05753309279680252, "learning_rate": 9.733111301138666e-05, "loss": 0.2593, "step": 27750 }, { "epoch": 2.2481367465975373, "grad_norm": 0.06826810538768768, "learning_rate": 9.732661235879202e-05, "loss": 0.2313, "step": 27751 }, { "epoch": 2.2482177576150355, "grad_norm": 0.06804418563842773, "learning_rate": 9.732211170619741e-05, "loss": 0.2549, "step": 27752 }, { "epoch": 2.248298768632534, "grad_norm": 0.07234552502632141, "learning_rate": 9.731761105360278e-05, "loss": 0.2672, "step": 27753 }, { "epoch": 2.2483797796500324, "grad_norm": 0.0556609183549881, "learning_rate": 9.731311040100815e-05, "loss": 0.2281, "step": 27754 }, { "epoch": 2.2484607906675307, "grad_norm": 0.07722504436969757, "learning_rate": 9.730860974841353e-05, "loss": 0.2918, "step": 27755 }, { "epoch": 2.248541801685029, "grad_norm": 0.060491111129522324, "learning_rate": 9.73041090958189e-05, "loss": 0.2375, "step": 27756 }, { "epoch": 2.2486228127025276, "grad_norm": 0.05507529154419899, "learning_rate": 9.729960844322428e-05, "loss": 0.2338, "step": 27757 }, { "epoch": 2.248703823720026, "grad_norm": 0.07224796712398529, "learning_rate": 9.729510779062965e-05, "loss": 0.2423, "step": 27758 }, { "epoch": 2.248784834737524, "grad_norm": 0.06265775114297867, "learning_rate": 9.729060713803502e-05, "loss": 0.2582, "step": 27759 }, { "epoch": 2.248865845755023, "grad_norm": 0.08026780188083649, "learning_rate": 9.72861064854404e-05, "loss": 0.2753, "step": 27760 }, { "epoch": 2.248946856772521, "grad_norm": 0.051603544503450394, "learning_rate": 9.728160583284577e-05, "loss": 0.2458, "step": 27761 }, { "epoch": 2.2490278677900193, "grad_norm": 0.07035865634679794, "learning_rate": 9.727710518025114e-05, "loss": 0.2556, "step": 27762 }, { "epoch": 2.249108878807518, "grad_norm": 0.05800047889351845, "learning_rate": 9.727260452765652e-05, "loss": 0.2377, "step": 27763 }, { "epoch": 2.249189889825016, "grad_norm": 0.06574030965566635, "learning_rate": 9.726810387506189e-05, "loss": 0.2641, "step": 27764 }, { "epoch": 2.2492709008425145, "grad_norm": 0.06430261582136154, "learning_rate": 9.726360322246726e-05, "loss": 0.2426, "step": 27765 }, { "epoch": 2.249351911860013, "grad_norm": 0.06490317732095718, "learning_rate": 9.725910256987264e-05, "loss": 0.2361, "step": 27766 }, { "epoch": 2.2494329228775114, "grad_norm": 0.0709247961640358, "learning_rate": 9.725460191727801e-05, "loss": 0.2505, "step": 27767 }, { "epoch": 2.2495139338950096, "grad_norm": 0.05828109383583069, "learning_rate": 9.725010126468338e-05, "loss": 0.2168, "step": 27768 }, { "epoch": 2.2495949449125083, "grad_norm": 0.0659540668129921, "learning_rate": 9.724560061208876e-05, "loss": 0.2212, "step": 27769 }, { "epoch": 2.2496759559300066, "grad_norm": 0.07301966100931168, "learning_rate": 9.724109995949413e-05, "loss": 0.2457, "step": 27770 }, { "epoch": 2.249756966947505, "grad_norm": 0.06525655835866928, "learning_rate": 9.72365993068995e-05, "loss": 0.2453, "step": 27771 }, { "epoch": 2.249837977965003, "grad_norm": 0.0701277032494545, "learning_rate": 9.723209865430488e-05, "loss": 0.2612, "step": 27772 }, { "epoch": 2.2499189889825018, "grad_norm": 0.05551356077194214, "learning_rate": 9.722759800171025e-05, "loss": 0.2751, "step": 27773 }, { "epoch": 2.25, "grad_norm": 0.07594873756170273, "learning_rate": 9.722309734911563e-05, "loss": 0.2741, "step": 27774 }, { "epoch": 2.2500810110174982, "grad_norm": 0.07346559315919876, "learning_rate": 9.7218596696521e-05, "loss": 0.2472, "step": 27775 }, { "epoch": 2.250162022034997, "grad_norm": 0.06441164016723633, "learning_rate": 9.721409604392637e-05, "loss": 0.2858, "step": 27776 }, { "epoch": 2.250243033052495, "grad_norm": 0.07367736846208572, "learning_rate": 9.720959539133175e-05, "loss": 0.2576, "step": 27777 }, { "epoch": 2.2503240440699934, "grad_norm": 0.07037994265556335, "learning_rate": 9.720509473873712e-05, "loss": 0.2483, "step": 27778 }, { "epoch": 2.2504050550874917, "grad_norm": 0.062301624566316605, "learning_rate": 9.72005940861425e-05, "loss": 0.2298, "step": 27779 }, { "epoch": 2.2504860661049904, "grad_norm": 0.06465239077806473, "learning_rate": 9.719609343354787e-05, "loss": 0.2167, "step": 27780 }, { "epoch": 2.2505670771224886, "grad_norm": 0.06765540689229965, "learning_rate": 9.719159278095324e-05, "loss": 0.2624, "step": 27781 }, { "epoch": 2.250648088139987, "grad_norm": 0.07314081490039825, "learning_rate": 9.718709212835862e-05, "loss": 0.2735, "step": 27782 }, { "epoch": 2.2507290991574855, "grad_norm": 0.056773893535137177, "learning_rate": 9.718259147576399e-05, "loss": 0.2679, "step": 27783 }, { "epoch": 2.250810110174984, "grad_norm": 0.07499083131551743, "learning_rate": 9.717809082316936e-05, "loss": 0.2742, "step": 27784 }, { "epoch": 2.250891121192482, "grad_norm": 0.055659789592027664, "learning_rate": 9.717359017057474e-05, "loss": 0.2459, "step": 27785 }, { "epoch": 2.2509721322099807, "grad_norm": 0.057748425751924515, "learning_rate": 9.716908951798011e-05, "loss": 0.2191, "step": 27786 }, { "epoch": 2.251053143227479, "grad_norm": 0.06498988717794418, "learning_rate": 9.716458886538548e-05, "loss": 0.2336, "step": 27787 }, { "epoch": 2.251134154244977, "grad_norm": 0.05748360604047775, "learning_rate": 9.716008821279086e-05, "loss": 0.2329, "step": 27788 }, { "epoch": 2.251215165262476, "grad_norm": 0.05783957615494728, "learning_rate": 9.715558756019624e-05, "loss": 0.2266, "step": 27789 }, { "epoch": 2.251296176279974, "grad_norm": 0.05689896643161774, "learning_rate": 9.71510869076016e-05, "loss": 0.2332, "step": 27790 }, { "epoch": 2.2513771872974724, "grad_norm": 0.05369093269109726, "learning_rate": 9.714658625500698e-05, "loss": 0.2255, "step": 27791 }, { "epoch": 2.251458198314971, "grad_norm": 0.061454132199287415, "learning_rate": 9.714208560241236e-05, "loss": 0.2584, "step": 27792 }, { "epoch": 2.2515392093324693, "grad_norm": 0.05191675201058388, "learning_rate": 9.713758494981772e-05, "loss": 0.2653, "step": 27793 }, { "epoch": 2.2516202203499676, "grad_norm": 0.05647498741745949, "learning_rate": 9.71330842972231e-05, "loss": 0.2598, "step": 27794 }, { "epoch": 2.251701231367466, "grad_norm": 0.05284734070301056, "learning_rate": 9.712858364462849e-05, "loss": 0.244, "step": 27795 }, { "epoch": 2.2517822423849645, "grad_norm": 0.05807924270629883, "learning_rate": 9.712408299203385e-05, "loss": 0.2768, "step": 27796 }, { "epoch": 2.2518632534024627, "grad_norm": 0.06968452781438828, "learning_rate": 9.711958233943922e-05, "loss": 0.2916, "step": 27797 }, { "epoch": 2.251944264419961, "grad_norm": 0.05544663220643997, "learning_rate": 9.71150816868446e-05, "loss": 0.2459, "step": 27798 }, { "epoch": 2.2520252754374597, "grad_norm": 0.05861636623740196, "learning_rate": 9.711058103424997e-05, "loss": 0.2588, "step": 27799 }, { "epoch": 2.252106286454958, "grad_norm": 0.06427206844091415, "learning_rate": 9.710608038165534e-05, "loss": 0.229, "step": 27800 }, { "epoch": 2.252187297472456, "grad_norm": 0.06308040022850037, "learning_rate": 9.710157972906073e-05, "loss": 0.2487, "step": 27801 }, { "epoch": 2.2522683084899544, "grad_norm": 0.060957130044698715, "learning_rate": 9.709707907646609e-05, "loss": 0.2518, "step": 27802 }, { "epoch": 2.252349319507453, "grad_norm": 0.048829443752765656, "learning_rate": 9.709257842387146e-05, "loss": 0.2253, "step": 27803 }, { "epoch": 2.2524303305249513, "grad_norm": 0.05918252468109131, "learning_rate": 9.708807777127685e-05, "loss": 0.255, "step": 27804 }, { "epoch": 2.2525113415424496, "grad_norm": 0.0695018544793129, "learning_rate": 9.708357711868221e-05, "loss": 0.256, "step": 27805 }, { "epoch": 2.2525923525599483, "grad_norm": 0.07991789281368256, "learning_rate": 9.70790764660876e-05, "loss": 0.2932, "step": 27806 }, { "epoch": 2.2526733635774465, "grad_norm": 0.06261714547872543, "learning_rate": 9.707457581349297e-05, "loss": 0.2423, "step": 27807 }, { "epoch": 2.2527543745949448, "grad_norm": 0.0648646205663681, "learning_rate": 9.707007516089833e-05, "loss": 0.2338, "step": 27808 }, { "epoch": 2.2528353856124435, "grad_norm": 0.07085174322128296, "learning_rate": 9.706557450830372e-05, "loss": 0.2525, "step": 27809 }, { "epoch": 2.2529163966299417, "grad_norm": 0.0629393607378006, "learning_rate": 9.706107385570909e-05, "loss": 0.2756, "step": 27810 }, { "epoch": 2.25299740764744, "grad_norm": 0.06812289357185364, "learning_rate": 9.705657320311445e-05, "loss": 0.2666, "step": 27811 }, { "epoch": 2.2530784186649386, "grad_norm": 0.06094437837600708, "learning_rate": 9.705207255051984e-05, "loss": 0.2545, "step": 27812 }, { "epoch": 2.253159429682437, "grad_norm": 0.06829962879419327, "learning_rate": 9.704757189792521e-05, "loss": 0.2416, "step": 27813 }, { "epoch": 2.253240440699935, "grad_norm": 0.058218564838171005, "learning_rate": 9.704307124533057e-05, "loss": 0.2694, "step": 27814 }, { "epoch": 2.253321451717434, "grad_norm": 0.05303672328591347, "learning_rate": 9.703857059273596e-05, "loss": 0.2362, "step": 27815 }, { "epoch": 2.253402462734932, "grad_norm": 0.06274978071451187, "learning_rate": 9.703406994014133e-05, "loss": 0.2829, "step": 27816 }, { "epoch": 2.2534834737524303, "grad_norm": 0.06936366856098175, "learning_rate": 9.702956928754669e-05, "loss": 0.2609, "step": 27817 }, { "epoch": 2.2535644847699285, "grad_norm": 0.06225275993347168, "learning_rate": 9.702506863495208e-05, "loss": 0.2297, "step": 27818 }, { "epoch": 2.2536454957874272, "grad_norm": 0.07567765563726425, "learning_rate": 9.702056798235745e-05, "loss": 0.3011, "step": 27819 }, { "epoch": 2.2537265068049255, "grad_norm": 0.07268765568733215, "learning_rate": 9.701606732976281e-05, "loss": 0.2752, "step": 27820 }, { "epoch": 2.2538075178224237, "grad_norm": 0.06192336603999138, "learning_rate": 9.70115666771682e-05, "loss": 0.255, "step": 27821 }, { "epoch": 2.2538885288399224, "grad_norm": 0.06162412464618683, "learning_rate": 9.700706602457357e-05, "loss": 0.2628, "step": 27822 }, { "epoch": 2.2539695398574207, "grad_norm": 0.07518964260816574, "learning_rate": 9.700256537197893e-05, "loss": 0.2581, "step": 27823 }, { "epoch": 2.254050550874919, "grad_norm": 0.07366792857646942, "learning_rate": 9.699806471938432e-05, "loss": 0.281, "step": 27824 }, { "epoch": 2.254131561892417, "grad_norm": 0.06474350392818451, "learning_rate": 9.69935640667897e-05, "loss": 0.2567, "step": 27825 }, { "epoch": 2.254212572909916, "grad_norm": 0.07112695276737213, "learning_rate": 9.698906341419505e-05, "loss": 0.279, "step": 27826 }, { "epoch": 2.254293583927414, "grad_norm": 0.06454798579216003, "learning_rate": 9.698456276160044e-05, "loss": 0.2568, "step": 27827 }, { "epoch": 2.2543745949449123, "grad_norm": 0.07216446101665497, "learning_rate": 9.698006210900581e-05, "loss": 0.2637, "step": 27828 }, { "epoch": 2.254455605962411, "grad_norm": 0.07267016917467117, "learning_rate": 9.697556145641117e-05, "loss": 0.2624, "step": 27829 }, { "epoch": 2.2545366169799093, "grad_norm": 0.0549045130610466, "learning_rate": 9.697106080381656e-05, "loss": 0.2394, "step": 27830 }, { "epoch": 2.2546176279974075, "grad_norm": 0.0676216334104538, "learning_rate": 9.696656015122194e-05, "loss": 0.3046, "step": 27831 }, { "epoch": 2.254698639014906, "grad_norm": 0.05932784825563431, "learning_rate": 9.69620594986273e-05, "loss": 0.2855, "step": 27832 }, { "epoch": 2.2547796500324044, "grad_norm": 0.06297644227743149, "learning_rate": 9.695755884603268e-05, "loss": 0.2341, "step": 27833 }, { "epoch": 2.2548606610499027, "grad_norm": 0.06666389107704163, "learning_rate": 9.695305819343806e-05, "loss": 0.247, "step": 27834 }, { "epoch": 2.2549416720674014, "grad_norm": 0.06561585515737534, "learning_rate": 9.694855754084343e-05, "loss": 0.2727, "step": 27835 }, { "epoch": 2.2550226830848996, "grad_norm": 0.06304679065942764, "learning_rate": 9.69440568882488e-05, "loss": 0.261, "step": 27836 }, { "epoch": 2.255103694102398, "grad_norm": 0.057998958975076675, "learning_rate": 9.693955623565418e-05, "loss": 0.2634, "step": 27837 }, { "epoch": 2.2551847051198965, "grad_norm": 0.061838455498218536, "learning_rate": 9.693505558305955e-05, "loss": 0.2558, "step": 27838 }, { "epoch": 2.255265716137395, "grad_norm": 0.06982436776161194, "learning_rate": 9.693055493046492e-05, "loss": 0.2729, "step": 27839 }, { "epoch": 2.255346727154893, "grad_norm": 0.06714750081300735, "learning_rate": 9.69260542778703e-05, "loss": 0.2463, "step": 27840 }, { "epoch": 2.2554277381723913, "grad_norm": 0.05839761346578598, "learning_rate": 9.692155362527567e-05, "loss": 0.224, "step": 27841 }, { "epoch": 2.25550874918989, "grad_norm": 0.062319573014974594, "learning_rate": 9.691705297268105e-05, "loss": 0.2349, "step": 27842 }, { "epoch": 2.255589760207388, "grad_norm": 0.0658549889922142, "learning_rate": 9.691255232008642e-05, "loss": 0.233, "step": 27843 }, { "epoch": 2.2556707712248865, "grad_norm": 0.06968716531991959, "learning_rate": 9.690805166749179e-05, "loss": 0.2594, "step": 27844 }, { "epoch": 2.255751782242385, "grad_norm": 0.062022265046834946, "learning_rate": 9.690355101489717e-05, "loss": 0.2612, "step": 27845 }, { "epoch": 2.2558327932598834, "grad_norm": 0.06448287516832352, "learning_rate": 9.689905036230254e-05, "loss": 0.2406, "step": 27846 }, { "epoch": 2.2559138042773816, "grad_norm": 0.04885780066251755, "learning_rate": 9.689454970970791e-05, "loss": 0.2206, "step": 27847 }, { "epoch": 2.25599481529488, "grad_norm": 0.06826422363519669, "learning_rate": 9.689004905711329e-05, "loss": 0.24, "step": 27848 }, { "epoch": 2.2560758263123786, "grad_norm": 0.054555803537368774, "learning_rate": 9.688554840451866e-05, "loss": 0.2357, "step": 27849 }, { "epoch": 2.256156837329877, "grad_norm": 0.05679121986031532, "learning_rate": 9.688104775192403e-05, "loss": 0.2509, "step": 27850 }, { "epoch": 2.256237848347375, "grad_norm": 0.07215198874473572, "learning_rate": 9.687654709932941e-05, "loss": 0.2869, "step": 27851 }, { "epoch": 2.2563188593648738, "grad_norm": 0.06503754109144211, "learning_rate": 9.687204644673478e-05, "loss": 0.2314, "step": 27852 }, { "epoch": 2.256399870382372, "grad_norm": 0.08150571584701538, "learning_rate": 9.686754579414015e-05, "loss": 0.2287, "step": 27853 }, { "epoch": 2.2564808813998702, "grad_norm": 0.06819403171539307, "learning_rate": 9.686304514154553e-05, "loss": 0.272, "step": 27854 }, { "epoch": 2.256561892417369, "grad_norm": 0.06893903017044067, "learning_rate": 9.68585444889509e-05, "loss": 0.2516, "step": 27855 }, { "epoch": 2.256642903434867, "grad_norm": 0.07099400460720062, "learning_rate": 9.685404383635628e-05, "loss": 0.2557, "step": 27856 }, { "epoch": 2.2567239144523654, "grad_norm": 0.05509239435195923, "learning_rate": 9.684954318376165e-05, "loss": 0.2763, "step": 27857 }, { "epoch": 2.256804925469864, "grad_norm": 0.059497859328985214, "learning_rate": 9.684504253116702e-05, "loss": 0.2944, "step": 27858 }, { "epoch": 2.2568859364873624, "grad_norm": 0.06433116644620895, "learning_rate": 9.68405418785724e-05, "loss": 0.227, "step": 27859 }, { "epoch": 2.2569669475048606, "grad_norm": 0.062018293887376785, "learning_rate": 9.683604122597777e-05, "loss": 0.1944, "step": 27860 }, { "epoch": 2.2570479585223593, "grad_norm": 0.07556930929422379, "learning_rate": 9.683154057338316e-05, "loss": 0.2528, "step": 27861 }, { "epoch": 2.2571289695398575, "grad_norm": 0.06311879307031631, "learning_rate": 9.682703992078852e-05, "loss": 0.2752, "step": 27862 }, { "epoch": 2.2572099805573558, "grad_norm": 0.06782593578100204, "learning_rate": 9.682253926819389e-05, "loss": 0.2638, "step": 27863 }, { "epoch": 2.257290991574854, "grad_norm": 0.064161017537117, "learning_rate": 9.681803861559928e-05, "loss": 0.2636, "step": 27864 }, { "epoch": 2.2573720025923527, "grad_norm": 0.08791672438383102, "learning_rate": 9.681353796300464e-05, "loss": 0.277, "step": 27865 }, { "epoch": 2.257453013609851, "grad_norm": 0.07432711869478226, "learning_rate": 9.680903731041001e-05, "loss": 0.2761, "step": 27866 }, { "epoch": 2.257534024627349, "grad_norm": 0.061404597014188766, "learning_rate": 9.68045366578154e-05, "loss": 0.2678, "step": 27867 }, { "epoch": 2.257615035644848, "grad_norm": 0.0511782243847847, "learning_rate": 9.680003600522076e-05, "loss": 0.2169, "step": 27868 }, { "epoch": 2.257696046662346, "grad_norm": 0.05862230435013771, "learning_rate": 9.679553535262613e-05, "loss": 0.2222, "step": 27869 }, { "epoch": 2.2577770576798444, "grad_norm": 0.058833081275224686, "learning_rate": 9.679103470003152e-05, "loss": 0.2323, "step": 27870 }, { "epoch": 2.2578580686973426, "grad_norm": 0.05861777067184448, "learning_rate": 9.678653404743688e-05, "loss": 0.2489, "step": 27871 }, { "epoch": 2.2579390797148413, "grad_norm": 0.06305963546037674, "learning_rate": 9.678203339484225e-05, "loss": 0.2298, "step": 27872 }, { "epoch": 2.2580200907323396, "grad_norm": 0.07361917197704315, "learning_rate": 9.677753274224764e-05, "loss": 0.2926, "step": 27873 }, { "epoch": 2.258101101749838, "grad_norm": 0.07810232043266296, "learning_rate": 9.6773032089653e-05, "loss": 0.2722, "step": 27874 }, { "epoch": 2.2581821127673365, "grad_norm": 0.07439389079809189, "learning_rate": 9.676853143705837e-05, "loss": 0.278, "step": 27875 }, { "epoch": 2.2582631237848347, "grad_norm": 0.060669757425785065, "learning_rate": 9.676403078446376e-05, "loss": 0.2413, "step": 27876 }, { "epoch": 2.258344134802333, "grad_norm": 0.051836349070072174, "learning_rate": 9.675953013186912e-05, "loss": 0.2208, "step": 27877 }, { "epoch": 2.2584251458198317, "grad_norm": 0.06817534565925598, "learning_rate": 9.67550294792745e-05, "loss": 0.2213, "step": 27878 }, { "epoch": 2.25850615683733, "grad_norm": 0.0720968171954155, "learning_rate": 9.675052882667988e-05, "loss": 0.2481, "step": 27879 }, { "epoch": 2.258587167854828, "grad_norm": 0.05986291170120239, "learning_rate": 9.674602817408524e-05, "loss": 0.2508, "step": 27880 }, { "epoch": 2.258668178872327, "grad_norm": 0.0641389712691307, "learning_rate": 9.674152752149062e-05, "loss": 0.2973, "step": 27881 }, { "epoch": 2.258749189889825, "grad_norm": 0.06277509033679962, "learning_rate": 9.6737026868896e-05, "loss": 0.2439, "step": 27882 }, { "epoch": 2.2588302009073233, "grad_norm": 0.060060031712055206, "learning_rate": 9.673252621630136e-05, "loss": 0.243, "step": 27883 }, { "epoch": 2.2589112119248216, "grad_norm": 0.0587458573281765, "learning_rate": 9.672802556370674e-05, "loss": 0.2252, "step": 27884 }, { "epoch": 2.2589922229423203, "grad_norm": 0.056831326335668564, "learning_rate": 9.672352491111212e-05, "loss": 0.2175, "step": 27885 }, { "epoch": 2.2590732339598185, "grad_norm": 0.06255652010440826, "learning_rate": 9.671902425851748e-05, "loss": 0.219, "step": 27886 }, { "epoch": 2.2591542449773168, "grad_norm": 0.06168430671095848, "learning_rate": 9.671452360592287e-05, "loss": 0.2411, "step": 27887 }, { "epoch": 2.2592352559948155, "grad_norm": 0.06198149546980858, "learning_rate": 9.671002295332824e-05, "loss": 0.2581, "step": 27888 }, { "epoch": 2.2593162670123137, "grad_norm": 0.06613697111606598, "learning_rate": 9.67055223007336e-05, "loss": 0.258, "step": 27889 }, { "epoch": 2.259397278029812, "grad_norm": 0.059314556419849396, "learning_rate": 9.670102164813899e-05, "loss": 0.2882, "step": 27890 }, { "epoch": 2.2594782890473106, "grad_norm": 0.04973381385207176, "learning_rate": 9.669652099554437e-05, "loss": 0.2104, "step": 27891 }, { "epoch": 2.259559300064809, "grad_norm": 0.07927535474300385, "learning_rate": 9.669202034294973e-05, "loss": 0.2664, "step": 27892 }, { "epoch": 2.259640311082307, "grad_norm": 0.06386194378137589, "learning_rate": 9.668751969035511e-05, "loss": 0.2277, "step": 27893 }, { "epoch": 2.2597213220998054, "grad_norm": 0.07194879651069641, "learning_rate": 9.668301903776049e-05, "loss": 0.2376, "step": 27894 }, { "epoch": 2.259802333117304, "grad_norm": 0.057715609669685364, "learning_rate": 9.667851838516585e-05, "loss": 0.212, "step": 27895 }, { "epoch": 2.2598833441348023, "grad_norm": 0.0678049772977829, "learning_rate": 9.667401773257123e-05, "loss": 0.2616, "step": 27896 }, { "epoch": 2.2599643551523005, "grad_norm": 0.06508383899927139, "learning_rate": 9.66695170799766e-05, "loss": 0.2513, "step": 27897 }, { "epoch": 2.2600453661697992, "grad_norm": 0.06817775964736938, "learning_rate": 9.666501642738197e-05, "loss": 0.2594, "step": 27898 }, { "epoch": 2.2601263771872975, "grad_norm": 0.04622168466448784, "learning_rate": 9.666051577478735e-05, "loss": 0.1943, "step": 27899 }, { "epoch": 2.2602073882047957, "grad_norm": 0.06753421574831009, "learning_rate": 9.665601512219273e-05, "loss": 0.2419, "step": 27900 }, { "epoch": 2.2602883992222944, "grad_norm": 0.05535978451371193, "learning_rate": 9.665151446959809e-05, "loss": 0.2371, "step": 27901 }, { "epoch": 2.2603694102397927, "grad_norm": 0.05608188360929489, "learning_rate": 9.664701381700347e-05, "loss": 0.2404, "step": 27902 }, { "epoch": 2.260450421257291, "grad_norm": 0.07442081719636917, "learning_rate": 9.664251316440885e-05, "loss": 0.2929, "step": 27903 }, { "epoch": 2.2605314322747896, "grad_norm": 0.06551259756088257, "learning_rate": 9.663801251181421e-05, "loss": 0.2504, "step": 27904 }, { "epoch": 2.260612443292288, "grad_norm": 0.06035178527235985, "learning_rate": 9.66335118592196e-05, "loss": 0.2239, "step": 27905 }, { "epoch": 2.260693454309786, "grad_norm": 0.05210309103131294, "learning_rate": 9.662901120662497e-05, "loss": 0.2309, "step": 27906 }, { "epoch": 2.2607744653272843, "grad_norm": 0.05578669160604477, "learning_rate": 9.662451055403033e-05, "loss": 0.2555, "step": 27907 }, { "epoch": 2.260855476344783, "grad_norm": 0.059826821088790894, "learning_rate": 9.662000990143572e-05, "loss": 0.2674, "step": 27908 }, { "epoch": 2.2609364873622813, "grad_norm": 0.055484119802713394, "learning_rate": 9.661550924884109e-05, "loss": 0.2559, "step": 27909 }, { "epoch": 2.2610174983797795, "grad_norm": 0.06662512570619583, "learning_rate": 9.661100859624645e-05, "loss": 0.2428, "step": 27910 }, { "epoch": 2.261098509397278, "grad_norm": 0.06647738069295883, "learning_rate": 9.660650794365184e-05, "loss": 0.2705, "step": 27911 }, { "epoch": 2.2611795204147764, "grad_norm": 0.07568782567977905, "learning_rate": 9.660200729105721e-05, "loss": 0.2397, "step": 27912 }, { "epoch": 2.2612605314322747, "grad_norm": 0.05346573889255524, "learning_rate": 9.659750663846258e-05, "loss": 0.2155, "step": 27913 }, { "epoch": 2.261341542449773, "grad_norm": 0.05152289569377899, "learning_rate": 9.659300598586796e-05, "loss": 0.2347, "step": 27914 }, { "epoch": 2.2614225534672716, "grad_norm": 0.06577225774526596, "learning_rate": 9.658850533327333e-05, "loss": 0.2841, "step": 27915 }, { "epoch": 2.26150356448477, "grad_norm": 0.061457157135009766, "learning_rate": 9.65840046806787e-05, "loss": 0.2433, "step": 27916 }, { "epoch": 2.261584575502268, "grad_norm": 0.053514160215854645, "learning_rate": 9.657950402808408e-05, "loss": 0.2233, "step": 27917 }, { "epoch": 2.261665586519767, "grad_norm": 0.06306682527065277, "learning_rate": 9.657500337548945e-05, "loss": 0.286, "step": 27918 }, { "epoch": 2.261746597537265, "grad_norm": 0.06268003582954407, "learning_rate": 9.657050272289483e-05, "loss": 0.2605, "step": 27919 }, { "epoch": 2.2618276085547633, "grad_norm": 0.0637531504034996, "learning_rate": 9.65660020703002e-05, "loss": 0.2675, "step": 27920 }, { "epoch": 2.261908619572262, "grad_norm": 0.06470780819654465, "learning_rate": 9.656150141770557e-05, "loss": 0.2344, "step": 27921 }, { "epoch": 2.26198963058976, "grad_norm": 0.06864067912101746, "learning_rate": 9.655700076511095e-05, "loss": 0.2639, "step": 27922 }, { "epoch": 2.2620706416072585, "grad_norm": 0.06445959955453873, "learning_rate": 9.655250011251632e-05, "loss": 0.2367, "step": 27923 }, { "epoch": 2.262151652624757, "grad_norm": 0.0648827999830246, "learning_rate": 9.65479994599217e-05, "loss": 0.2133, "step": 27924 }, { "epoch": 2.2622326636422554, "grad_norm": 0.06934642791748047, "learning_rate": 9.654349880732707e-05, "loss": 0.2511, "step": 27925 }, { "epoch": 2.2623136746597536, "grad_norm": 0.06981291621923447, "learning_rate": 9.653899815473244e-05, "loss": 0.2763, "step": 27926 }, { "epoch": 2.2623946856772523, "grad_norm": 0.07649930566549301, "learning_rate": 9.653449750213781e-05, "loss": 0.245, "step": 27927 }, { "epoch": 2.2624756966947506, "grad_norm": 0.06518231332302094, "learning_rate": 9.652999684954319e-05, "loss": 0.2537, "step": 27928 }, { "epoch": 2.262556707712249, "grad_norm": 0.051750268787145615, "learning_rate": 9.652549619694856e-05, "loss": 0.225, "step": 27929 }, { "epoch": 2.262637718729747, "grad_norm": 0.07412702590227127, "learning_rate": 9.652099554435394e-05, "loss": 0.2511, "step": 27930 }, { "epoch": 2.2627187297472457, "grad_norm": 0.057747166603803635, "learning_rate": 9.651649489175931e-05, "loss": 0.2292, "step": 27931 }, { "epoch": 2.262799740764744, "grad_norm": 0.07066566497087479, "learning_rate": 9.651199423916468e-05, "loss": 0.2743, "step": 27932 }, { "epoch": 2.2628807517822422, "grad_norm": 0.06702201068401337, "learning_rate": 9.650749358657006e-05, "loss": 0.2583, "step": 27933 }, { "epoch": 2.262961762799741, "grad_norm": 0.0659802109003067, "learning_rate": 9.650299293397543e-05, "loss": 0.2497, "step": 27934 }, { "epoch": 2.263042773817239, "grad_norm": 0.06113113835453987, "learning_rate": 9.64984922813808e-05, "loss": 0.2161, "step": 27935 }, { "epoch": 2.2631237848347374, "grad_norm": 0.07334668189287186, "learning_rate": 9.649399162878618e-05, "loss": 0.2582, "step": 27936 }, { "epoch": 2.2632047958522357, "grad_norm": 0.057873696088790894, "learning_rate": 9.648949097619155e-05, "loss": 0.249, "step": 27937 }, { "epoch": 2.2632858068697344, "grad_norm": 0.05299917235970497, "learning_rate": 9.648499032359692e-05, "loss": 0.2305, "step": 27938 }, { "epoch": 2.2633668178872326, "grad_norm": 0.06792672723531723, "learning_rate": 9.648048967100231e-05, "loss": 0.2545, "step": 27939 }, { "epoch": 2.263447828904731, "grad_norm": 0.06435631215572357, "learning_rate": 9.647598901840767e-05, "loss": 0.2659, "step": 27940 }, { "epoch": 2.2635288399222295, "grad_norm": 0.06368466466665268, "learning_rate": 9.647148836581305e-05, "loss": 0.2745, "step": 27941 }, { "epoch": 2.2636098509397278, "grad_norm": 0.061373911798000336, "learning_rate": 9.646698771321843e-05, "loss": 0.2489, "step": 27942 }, { "epoch": 2.263690861957226, "grad_norm": 0.06631971150636673, "learning_rate": 9.646248706062379e-05, "loss": 0.2395, "step": 27943 }, { "epoch": 2.2637718729747247, "grad_norm": 0.058606911450624466, "learning_rate": 9.645798640802917e-05, "loss": 0.2348, "step": 27944 }, { "epoch": 2.263852883992223, "grad_norm": 0.0627407506108284, "learning_rate": 9.645348575543455e-05, "loss": 0.2781, "step": 27945 }, { "epoch": 2.263933895009721, "grad_norm": 0.05077546089887619, "learning_rate": 9.644898510283991e-05, "loss": 0.2521, "step": 27946 }, { "epoch": 2.26401490602722, "grad_norm": 0.07406048476696014, "learning_rate": 9.644448445024529e-05, "loss": 0.2675, "step": 27947 }, { "epoch": 2.264095917044718, "grad_norm": 0.057797353714704514, "learning_rate": 9.643998379765067e-05, "loss": 0.1935, "step": 27948 }, { "epoch": 2.2641769280622164, "grad_norm": 0.05993032827973366, "learning_rate": 9.643548314505603e-05, "loss": 0.2157, "step": 27949 }, { "epoch": 2.264257939079715, "grad_norm": 0.059989865869283676, "learning_rate": 9.643098249246141e-05, "loss": 0.2519, "step": 27950 }, { "epoch": 2.2643389500972133, "grad_norm": 0.07011564821004868, "learning_rate": 9.64264818398668e-05, "loss": 0.2762, "step": 27951 }, { "epoch": 2.2644199611147116, "grad_norm": 0.0700843557715416, "learning_rate": 9.642198118727215e-05, "loss": 0.2355, "step": 27952 }, { "epoch": 2.26450097213221, "grad_norm": 0.06506705284118652, "learning_rate": 9.641748053467753e-05, "loss": 0.2306, "step": 27953 }, { "epoch": 2.2645819831497085, "grad_norm": 0.06078203022480011, "learning_rate": 9.641297988208292e-05, "loss": 0.2461, "step": 27954 }, { "epoch": 2.2646629941672067, "grad_norm": 0.04927067831158638, "learning_rate": 9.640847922948828e-05, "loss": 0.2106, "step": 27955 }, { "epoch": 2.264744005184705, "grad_norm": 0.05114800110459328, "learning_rate": 9.640397857689365e-05, "loss": 0.2107, "step": 27956 }, { "epoch": 2.2648250162022037, "grad_norm": 0.06491793692111969, "learning_rate": 9.639947792429904e-05, "loss": 0.2448, "step": 27957 }, { "epoch": 2.264906027219702, "grad_norm": 0.07158529758453369, "learning_rate": 9.63949772717044e-05, "loss": 0.2538, "step": 27958 }, { "epoch": 2.2649870382372, "grad_norm": 0.07513157278299332, "learning_rate": 9.639047661910977e-05, "loss": 0.2702, "step": 27959 }, { "epoch": 2.2650680492546984, "grad_norm": 0.06532614678144455, "learning_rate": 9.638597596651516e-05, "loss": 0.2587, "step": 27960 }, { "epoch": 2.265149060272197, "grad_norm": 0.06393644958734512, "learning_rate": 9.638147531392052e-05, "loss": 0.2593, "step": 27961 }, { "epoch": 2.2652300712896953, "grad_norm": 0.058122579008340836, "learning_rate": 9.637697466132589e-05, "loss": 0.2569, "step": 27962 }, { "epoch": 2.2653110823071936, "grad_norm": 0.05314813181757927, "learning_rate": 9.637247400873128e-05, "loss": 0.2269, "step": 27963 }, { "epoch": 2.2653920933246923, "grad_norm": 0.06928487122058868, "learning_rate": 9.636797335613664e-05, "loss": 0.2635, "step": 27964 }, { "epoch": 2.2654731043421905, "grad_norm": 0.05637259781360626, "learning_rate": 9.636347270354203e-05, "loss": 0.2504, "step": 27965 }, { "epoch": 2.2655541153596888, "grad_norm": 0.07404623925685883, "learning_rate": 9.63589720509474e-05, "loss": 0.2725, "step": 27966 }, { "epoch": 2.2656351263771874, "grad_norm": 0.059635065495967865, "learning_rate": 9.635447139835276e-05, "loss": 0.2214, "step": 27967 }, { "epoch": 2.2657161373946857, "grad_norm": 0.07493023574352264, "learning_rate": 9.634997074575815e-05, "loss": 0.2995, "step": 27968 }, { "epoch": 2.265797148412184, "grad_norm": 0.07327574491500854, "learning_rate": 9.634547009316352e-05, "loss": 0.2793, "step": 27969 }, { "epoch": 2.2658781594296826, "grad_norm": 0.07068467885255814, "learning_rate": 9.634096944056888e-05, "loss": 0.2383, "step": 27970 }, { "epoch": 2.265959170447181, "grad_norm": 0.07366050779819489, "learning_rate": 9.633646878797427e-05, "loss": 0.2369, "step": 27971 }, { "epoch": 2.266040181464679, "grad_norm": 0.05504010245203972, "learning_rate": 9.633196813537964e-05, "loss": 0.2442, "step": 27972 }, { "epoch": 2.266121192482178, "grad_norm": 0.05307780206203461, "learning_rate": 9.6327467482785e-05, "loss": 0.2648, "step": 27973 }, { "epoch": 2.266202203499676, "grad_norm": 0.058790065348148346, "learning_rate": 9.632296683019039e-05, "loss": 0.2295, "step": 27974 }, { "epoch": 2.2662832145171743, "grad_norm": 0.07024440169334412, "learning_rate": 9.631846617759576e-05, "loss": 0.2316, "step": 27975 }, { "epoch": 2.2663642255346725, "grad_norm": 0.07419854402542114, "learning_rate": 9.631396552500112e-05, "loss": 0.2755, "step": 27976 }, { "epoch": 2.2664452365521712, "grad_norm": 0.0637488067150116, "learning_rate": 9.630946487240651e-05, "loss": 0.264, "step": 27977 }, { "epoch": 2.2665262475696695, "grad_norm": 0.08120803534984589, "learning_rate": 9.630496421981188e-05, "loss": 0.2495, "step": 27978 }, { "epoch": 2.2666072585871677, "grad_norm": 0.06741491705179214, "learning_rate": 9.630046356721724e-05, "loss": 0.2777, "step": 27979 }, { "epoch": 2.2666882696046664, "grad_norm": 0.06208684667944908, "learning_rate": 9.629596291462263e-05, "loss": 0.2689, "step": 27980 }, { "epoch": 2.2667692806221647, "grad_norm": 0.07637951523065567, "learning_rate": 9.6291462262028e-05, "loss": 0.3162, "step": 27981 }, { "epoch": 2.266850291639663, "grad_norm": 0.0612499862909317, "learning_rate": 9.628696160943336e-05, "loss": 0.26, "step": 27982 }, { "epoch": 2.266931302657161, "grad_norm": 0.07130424678325653, "learning_rate": 9.628246095683875e-05, "loss": 0.2524, "step": 27983 }, { "epoch": 2.26701231367466, "grad_norm": 0.07146918028593063, "learning_rate": 9.627796030424412e-05, "loss": 0.2477, "step": 27984 }, { "epoch": 2.267093324692158, "grad_norm": 0.06923989951610565, "learning_rate": 9.627345965164948e-05, "loss": 0.2463, "step": 27985 }, { "epoch": 2.2671743357096563, "grad_norm": 0.05341016501188278, "learning_rate": 9.626895899905487e-05, "loss": 0.2214, "step": 27986 }, { "epoch": 2.267255346727155, "grad_norm": 0.07137572765350342, "learning_rate": 9.626445834646024e-05, "loss": 0.2466, "step": 27987 }, { "epoch": 2.2673363577446533, "grad_norm": 0.060765862464904785, "learning_rate": 9.62599576938656e-05, "loss": 0.2313, "step": 27988 }, { "epoch": 2.2674173687621515, "grad_norm": 0.06152436509728432, "learning_rate": 9.625545704127099e-05, "loss": 0.2466, "step": 27989 }, { "epoch": 2.26749837977965, "grad_norm": 0.05408678948879242, "learning_rate": 9.625095638867637e-05, "loss": 0.2439, "step": 27990 }, { "epoch": 2.2675793907971484, "grad_norm": 0.053834687918424606, "learning_rate": 9.624645573608173e-05, "loss": 0.246, "step": 27991 }, { "epoch": 2.2676604018146467, "grad_norm": 0.06949444115161896, "learning_rate": 9.624195508348711e-05, "loss": 0.2376, "step": 27992 }, { "epoch": 2.2677414128321454, "grad_norm": 0.060450479388237, "learning_rate": 9.623745443089249e-05, "loss": 0.2117, "step": 27993 }, { "epoch": 2.2678224238496436, "grad_norm": 0.05990859866142273, "learning_rate": 9.623295377829786e-05, "loss": 0.274, "step": 27994 }, { "epoch": 2.267903434867142, "grad_norm": 0.0631098821759224, "learning_rate": 9.622845312570323e-05, "loss": 0.2882, "step": 27995 }, { "epoch": 2.2679844458846405, "grad_norm": 0.06892747431993484, "learning_rate": 9.622395247310861e-05, "loss": 0.227, "step": 27996 }, { "epoch": 2.268065456902139, "grad_norm": 0.05815494433045387, "learning_rate": 9.621945182051398e-05, "loss": 0.2431, "step": 27997 }, { "epoch": 2.268146467919637, "grad_norm": 0.051642052829265594, "learning_rate": 9.621495116791935e-05, "loss": 0.2257, "step": 27998 }, { "epoch": 2.2682274789371353, "grad_norm": 0.05603569746017456, "learning_rate": 9.621045051532473e-05, "loss": 0.26, "step": 27999 }, { "epoch": 2.268308489954634, "grad_norm": 0.0718226209282875, "learning_rate": 9.62059498627301e-05, "loss": 0.2461, "step": 28000 }, { "epoch": 2.268389500972132, "grad_norm": 0.0677584558725357, "learning_rate": 9.620144921013548e-05, "loss": 0.2434, "step": 28001 }, { "epoch": 2.2684705119896305, "grad_norm": 0.05520008131861687, "learning_rate": 9.619694855754085e-05, "loss": 0.2681, "step": 28002 }, { "epoch": 2.268551523007129, "grad_norm": 0.06791157275438309, "learning_rate": 9.619244790494622e-05, "loss": 0.2695, "step": 28003 }, { "epoch": 2.2686325340246274, "grad_norm": 0.05662940442562103, "learning_rate": 9.61879472523516e-05, "loss": 0.2663, "step": 28004 }, { "epoch": 2.2687135450421256, "grad_norm": 0.06527923047542572, "learning_rate": 9.618344659975697e-05, "loss": 0.2916, "step": 28005 }, { "epoch": 2.268794556059624, "grad_norm": 0.05786988511681557, "learning_rate": 9.617894594716234e-05, "loss": 0.231, "step": 28006 }, { "epoch": 2.2688755670771226, "grad_norm": 0.06730439513921738, "learning_rate": 9.617444529456772e-05, "loss": 0.2724, "step": 28007 }, { "epoch": 2.268956578094621, "grad_norm": 0.05449024215340614, "learning_rate": 9.616994464197309e-05, "loss": 0.2368, "step": 28008 }, { "epoch": 2.269037589112119, "grad_norm": 0.0632796511054039, "learning_rate": 9.616544398937846e-05, "loss": 0.2399, "step": 28009 }, { "epoch": 2.2691186001296177, "grad_norm": 0.06255123019218445, "learning_rate": 9.616094333678384e-05, "loss": 0.2599, "step": 28010 }, { "epoch": 2.269199611147116, "grad_norm": 0.060231465846300125, "learning_rate": 9.615644268418921e-05, "loss": 0.2241, "step": 28011 }, { "epoch": 2.2692806221646142, "grad_norm": 0.060310233384370804, "learning_rate": 9.615194203159458e-05, "loss": 0.2308, "step": 28012 }, { "epoch": 2.269361633182113, "grad_norm": 0.08699499070644379, "learning_rate": 9.614744137899996e-05, "loss": 0.2961, "step": 28013 }, { "epoch": 2.269442644199611, "grad_norm": 0.06907127052545547, "learning_rate": 9.614294072640533e-05, "loss": 0.239, "step": 28014 }, { "epoch": 2.2695236552171094, "grad_norm": 0.06631629168987274, "learning_rate": 9.61384400738107e-05, "loss": 0.2425, "step": 28015 }, { "epoch": 2.269604666234608, "grad_norm": 0.07456733286380768, "learning_rate": 9.613393942121608e-05, "loss": 0.2669, "step": 28016 }, { "epoch": 2.2696856772521063, "grad_norm": 0.07179507613182068, "learning_rate": 9.612943876862145e-05, "loss": 0.2641, "step": 28017 }, { "epoch": 2.2697666882696046, "grad_norm": 0.06662650406360626, "learning_rate": 9.612493811602683e-05, "loss": 0.2811, "step": 28018 }, { "epoch": 2.2698476992871033, "grad_norm": 0.05604487657546997, "learning_rate": 9.61204374634322e-05, "loss": 0.2491, "step": 28019 }, { "epoch": 2.2699287103046015, "grad_norm": 0.0543643943965435, "learning_rate": 9.611593681083759e-05, "loss": 0.2541, "step": 28020 }, { "epoch": 2.2700097213220998, "grad_norm": 0.06231965124607086, "learning_rate": 9.611143615824295e-05, "loss": 0.196, "step": 28021 }, { "epoch": 2.270090732339598, "grad_norm": 0.06550896167755127, "learning_rate": 9.610693550564832e-05, "loss": 0.2541, "step": 28022 }, { "epoch": 2.2701717433570967, "grad_norm": 0.06826600432395935, "learning_rate": 9.610243485305371e-05, "loss": 0.2487, "step": 28023 }, { "epoch": 2.270252754374595, "grad_norm": 0.06269895285367966, "learning_rate": 9.609793420045907e-05, "loss": 0.2342, "step": 28024 }, { "epoch": 2.270333765392093, "grad_norm": 0.05146685615181923, "learning_rate": 9.609343354786444e-05, "loss": 0.2753, "step": 28025 }, { "epoch": 2.270414776409592, "grad_norm": 0.059559378772974014, "learning_rate": 9.608893289526983e-05, "loss": 0.2508, "step": 28026 }, { "epoch": 2.27049578742709, "grad_norm": 0.06171615794301033, "learning_rate": 9.608443224267519e-05, "loss": 0.2971, "step": 28027 }, { "epoch": 2.2705767984445884, "grad_norm": 0.05737600475549698, "learning_rate": 9.607993159008056e-05, "loss": 0.2523, "step": 28028 }, { "epoch": 2.2706578094620866, "grad_norm": 0.061808522790670395, "learning_rate": 9.607543093748595e-05, "loss": 0.231, "step": 28029 }, { "epoch": 2.2707388204795853, "grad_norm": 0.061562590301036835, "learning_rate": 9.607093028489131e-05, "loss": 0.2664, "step": 28030 }, { "epoch": 2.2708198314970836, "grad_norm": 0.06974106281995773, "learning_rate": 9.606642963229668e-05, "loss": 0.2272, "step": 28031 }, { "epoch": 2.270900842514582, "grad_norm": 0.05686337500810623, "learning_rate": 9.606192897970207e-05, "loss": 0.2348, "step": 28032 }, { "epoch": 2.2709818535320805, "grad_norm": 0.05605282634496689, "learning_rate": 9.605742832710743e-05, "loss": 0.2539, "step": 28033 }, { "epoch": 2.2710628645495787, "grad_norm": 0.07936391979455948, "learning_rate": 9.60529276745128e-05, "loss": 0.2369, "step": 28034 }, { "epoch": 2.271143875567077, "grad_norm": 0.06194789707660675, "learning_rate": 9.604842702191819e-05, "loss": 0.2124, "step": 28035 }, { "epoch": 2.2712248865845757, "grad_norm": 0.07164730876684189, "learning_rate": 9.604392636932355e-05, "loss": 0.2329, "step": 28036 }, { "epoch": 2.271305897602074, "grad_norm": 0.0761098638176918, "learning_rate": 9.603942571672892e-05, "loss": 0.2965, "step": 28037 }, { "epoch": 2.271386908619572, "grad_norm": 0.07668202370405197, "learning_rate": 9.603492506413431e-05, "loss": 0.2525, "step": 28038 }, { "epoch": 2.271467919637071, "grad_norm": 0.07283965498209, "learning_rate": 9.603042441153967e-05, "loss": 0.2561, "step": 28039 }, { "epoch": 2.271548930654569, "grad_norm": 0.07585301995277405, "learning_rate": 9.602592375894505e-05, "loss": 0.2746, "step": 28040 }, { "epoch": 2.2716299416720673, "grad_norm": 0.07666393369436264, "learning_rate": 9.602142310635043e-05, "loss": 0.2471, "step": 28041 }, { "epoch": 2.271710952689566, "grad_norm": 0.06626211851835251, "learning_rate": 9.601692245375579e-05, "loss": 0.274, "step": 28042 }, { "epoch": 2.2717919637070643, "grad_norm": 0.06695680320262909, "learning_rate": 9.601242180116117e-05, "loss": 0.2673, "step": 28043 }, { "epoch": 2.2718729747245625, "grad_norm": 0.07495789974927902, "learning_rate": 9.600792114856655e-05, "loss": 0.2563, "step": 28044 }, { "epoch": 2.2719539857420608, "grad_norm": 0.06226911023259163, "learning_rate": 9.600342049597191e-05, "loss": 0.2282, "step": 28045 }, { "epoch": 2.2720349967595594, "grad_norm": 0.0586124062538147, "learning_rate": 9.59989198433773e-05, "loss": 0.2222, "step": 28046 }, { "epoch": 2.2721160077770577, "grad_norm": 0.0961061641573906, "learning_rate": 9.599441919078267e-05, "loss": 0.2865, "step": 28047 }, { "epoch": 2.272197018794556, "grad_norm": 0.06482996046543121, "learning_rate": 9.598991853818803e-05, "loss": 0.2704, "step": 28048 }, { "epoch": 2.2722780298120546, "grad_norm": 0.07463010400533676, "learning_rate": 9.598541788559342e-05, "loss": 0.2539, "step": 28049 }, { "epoch": 2.272359040829553, "grad_norm": 0.05233949422836304, "learning_rate": 9.59809172329988e-05, "loss": 0.2534, "step": 28050 }, { "epoch": 2.272440051847051, "grad_norm": 0.07969961315393448, "learning_rate": 9.597641658040416e-05, "loss": 0.3144, "step": 28051 }, { "epoch": 2.2725210628645494, "grad_norm": 0.07647403329610825, "learning_rate": 9.597191592780954e-05, "loss": 0.2552, "step": 28052 }, { "epoch": 2.272602073882048, "grad_norm": 0.09753167629241943, "learning_rate": 9.596741527521492e-05, "loss": 0.2545, "step": 28053 }, { "epoch": 2.2726830848995463, "grad_norm": 0.06515941023826599, "learning_rate": 9.596291462262028e-05, "loss": 0.2693, "step": 28054 }, { "epoch": 2.2727640959170445, "grad_norm": 0.07915733009576797, "learning_rate": 9.595841397002566e-05, "loss": 0.2925, "step": 28055 }, { "epoch": 2.2728451069345432, "grad_norm": 0.0683445930480957, "learning_rate": 9.595391331743104e-05, "loss": 0.2546, "step": 28056 }, { "epoch": 2.2729261179520415, "grad_norm": 0.07178857177495956, "learning_rate": 9.59494126648364e-05, "loss": 0.2179, "step": 28057 }, { "epoch": 2.2730071289695397, "grad_norm": 0.05506483465433121, "learning_rate": 9.594491201224178e-05, "loss": 0.2233, "step": 28058 }, { "epoch": 2.2730881399870384, "grad_norm": 0.06703373789787292, "learning_rate": 9.594041135964716e-05, "loss": 0.2706, "step": 28059 }, { "epoch": 2.2731691510045366, "grad_norm": 0.06285927444696426, "learning_rate": 9.593591070705252e-05, "loss": 0.2609, "step": 28060 }, { "epoch": 2.273250162022035, "grad_norm": 0.056138258427381516, "learning_rate": 9.59314100544579e-05, "loss": 0.2039, "step": 28061 }, { "epoch": 2.2733311730395336, "grad_norm": 0.06609399616718292, "learning_rate": 9.592690940186328e-05, "loss": 0.2393, "step": 28062 }, { "epoch": 2.273412184057032, "grad_norm": 0.06169790029525757, "learning_rate": 9.592240874926864e-05, "loss": 0.2802, "step": 28063 }, { "epoch": 2.27349319507453, "grad_norm": 0.06889687478542328, "learning_rate": 9.591790809667403e-05, "loss": 0.2713, "step": 28064 }, { "epoch": 2.2735742060920288, "grad_norm": 0.0658617615699768, "learning_rate": 9.59134074440794e-05, "loss": 0.2538, "step": 28065 }, { "epoch": 2.273655217109527, "grad_norm": 0.0711151510477066, "learning_rate": 9.590890679148476e-05, "loss": 0.2761, "step": 28066 }, { "epoch": 2.2737362281270252, "grad_norm": 0.07032036036252975, "learning_rate": 9.590440613889015e-05, "loss": 0.285, "step": 28067 }, { "epoch": 2.2738172391445235, "grad_norm": 0.06317279487848282, "learning_rate": 9.589990548629552e-05, "loss": 0.231, "step": 28068 }, { "epoch": 2.273898250162022, "grad_norm": 0.06784936785697937, "learning_rate": 9.589540483370088e-05, "loss": 0.259, "step": 28069 }, { "epoch": 2.2739792611795204, "grad_norm": 0.06801003217697144, "learning_rate": 9.589090418110627e-05, "loss": 0.2417, "step": 28070 }, { "epoch": 2.2740602721970187, "grad_norm": 0.08054570853710175, "learning_rate": 9.588640352851164e-05, "loss": 0.248, "step": 28071 }, { "epoch": 2.2741412832145174, "grad_norm": 0.05918966606259346, "learning_rate": 9.588190287591701e-05, "loss": 0.2452, "step": 28072 }, { "epoch": 2.2742222942320156, "grad_norm": 0.06380657851696014, "learning_rate": 9.587740222332239e-05, "loss": 0.2177, "step": 28073 }, { "epoch": 2.274303305249514, "grad_norm": 0.07852265983819962, "learning_rate": 9.587290157072776e-05, "loss": 0.2489, "step": 28074 }, { "epoch": 2.274384316267012, "grad_norm": 0.07023780047893524, "learning_rate": 9.586840091813314e-05, "loss": 0.2442, "step": 28075 }, { "epoch": 2.274465327284511, "grad_norm": 0.08056506514549255, "learning_rate": 9.586390026553851e-05, "loss": 0.269, "step": 28076 }, { "epoch": 2.274546338302009, "grad_norm": 0.07773345708847046, "learning_rate": 9.585939961294388e-05, "loss": 0.2805, "step": 28077 }, { "epoch": 2.2746273493195073, "grad_norm": 0.06304016709327698, "learning_rate": 9.585489896034926e-05, "loss": 0.2316, "step": 28078 }, { "epoch": 2.274708360337006, "grad_norm": 0.07088416814804077, "learning_rate": 9.585039830775463e-05, "loss": 0.2349, "step": 28079 }, { "epoch": 2.274789371354504, "grad_norm": 0.061816342175006866, "learning_rate": 9.584589765516e-05, "loss": 0.2104, "step": 28080 }, { "epoch": 2.2748703823720025, "grad_norm": 0.062373705208301544, "learning_rate": 9.584139700256538e-05, "loss": 0.235, "step": 28081 }, { "epoch": 2.274951393389501, "grad_norm": 0.06598381698131561, "learning_rate": 9.583689634997075e-05, "loss": 0.2672, "step": 28082 }, { "epoch": 2.2750324044069994, "grad_norm": 0.06689947098493576, "learning_rate": 9.583239569737612e-05, "loss": 0.2961, "step": 28083 }, { "epoch": 2.2751134154244976, "grad_norm": 0.07751460373401642, "learning_rate": 9.58278950447815e-05, "loss": 0.2693, "step": 28084 }, { "epoch": 2.2751944264419963, "grad_norm": 0.0560307651758194, "learning_rate": 9.582339439218687e-05, "loss": 0.2455, "step": 28085 }, { "epoch": 2.2752754374594946, "grad_norm": 0.0636034682393074, "learning_rate": 9.581889373959224e-05, "loss": 0.2222, "step": 28086 }, { "epoch": 2.275356448476993, "grad_norm": 0.07131937891244888, "learning_rate": 9.581439308699762e-05, "loss": 0.2621, "step": 28087 }, { "epoch": 2.2754374594944915, "grad_norm": 0.0636332780122757, "learning_rate": 9.580989243440299e-05, "loss": 0.2331, "step": 28088 }, { "epoch": 2.2755184705119897, "grad_norm": 0.04460746794939041, "learning_rate": 9.580539178180837e-05, "loss": 0.2375, "step": 28089 }, { "epoch": 2.275599481529488, "grad_norm": 0.06167895346879959, "learning_rate": 9.580089112921374e-05, "loss": 0.2246, "step": 28090 }, { "epoch": 2.2756804925469862, "grad_norm": 0.06896967440843582, "learning_rate": 9.579639047661911e-05, "loss": 0.2652, "step": 28091 }, { "epoch": 2.275761503564485, "grad_norm": 0.06739029288291931, "learning_rate": 9.579188982402449e-05, "loss": 0.2471, "step": 28092 }, { "epoch": 2.275842514581983, "grad_norm": 0.05659691244363785, "learning_rate": 9.578738917142986e-05, "loss": 0.2575, "step": 28093 }, { "epoch": 2.2759235255994814, "grad_norm": 0.07226850092411041, "learning_rate": 9.578288851883523e-05, "loss": 0.2619, "step": 28094 }, { "epoch": 2.27600453661698, "grad_norm": 0.056813836097717285, "learning_rate": 9.577838786624061e-05, "loss": 0.2418, "step": 28095 }, { "epoch": 2.2760855476344783, "grad_norm": 0.05825258418917656, "learning_rate": 9.577388721364598e-05, "loss": 0.2707, "step": 28096 }, { "epoch": 2.2761665586519766, "grad_norm": 0.07265456020832062, "learning_rate": 9.576938656105135e-05, "loss": 0.2919, "step": 28097 }, { "epoch": 2.276247569669475, "grad_norm": 0.07091055065393448, "learning_rate": 9.576488590845674e-05, "loss": 0.2699, "step": 28098 }, { "epoch": 2.2763285806869735, "grad_norm": 0.05324326083064079, "learning_rate": 9.57603852558621e-05, "loss": 0.224, "step": 28099 }, { "epoch": 2.2764095917044718, "grad_norm": 0.05966367945075035, "learning_rate": 9.575588460326748e-05, "loss": 0.2656, "step": 28100 }, { "epoch": 2.27649060272197, "grad_norm": 0.06723299622535706, "learning_rate": 9.575138395067286e-05, "loss": 0.2725, "step": 28101 }, { "epoch": 2.2765716137394687, "grad_norm": 0.07250206917524338, "learning_rate": 9.574688329807822e-05, "loss": 0.253, "step": 28102 }, { "epoch": 2.276652624756967, "grad_norm": 0.05841446295380592, "learning_rate": 9.57423826454836e-05, "loss": 0.2519, "step": 28103 }, { "epoch": 2.276733635774465, "grad_norm": 0.06128285080194473, "learning_rate": 9.573788199288898e-05, "loss": 0.2297, "step": 28104 }, { "epoch": 2.276814646791964, "grad_norm": 0.05345291271805763, "learning_rate": 9.573338134029434e-05, "loss": 0.2033, "step": 28105 }, { "epoch": 2.276895657809462, "grad_norm": 0.0680374950170517, "learning_rate": 9.572888068769972e-05, "loss": 0.2673, "step": 28106 }, { "epoch": 2.2769766688269604, "grad_norm": 0.06612028181552887, "learning_rate": 9.57243800351051e-05, "loss": 0.2735, "step": 28107 }, { "epoch": 2.277057679844459, "grad_norm": 0.06396949291229248, "learning_rate": 9.571987938251046e-05, "loss": 0.2196, "step": 28108 }, { "epoch": 2.2771386908619573, "grad_norm": 0.0603230744600296, "learning_rate": 9.571537872991584e-05, "loss": 0.2457, "step": 28109 }, { "epoch": 2.2772197018794555, "grad_norm": 0.05682147294282913, "learning_rate": 9.571087807732122e-05, "loss": 0.2556, "step": 28110 }, { "epoch": 2.277300712896954, "grad_norm": 0.06449034810066223, "learning_rate": 9.570637742472658e-05, "loss": 0.2305, "step": 28111 }, { "epoch": 2.2773817239144525, "grad_norm": 0.06559954583644867, "learning_rate": 9.570187677213196e-05, "loss": 0.238, "step": 28112 }, { "epoch": 2.2774627349319507, "grad_norm": 0.07063081860542297, "learning_rate": 9.569737611953735e-05, "loss": 0.2515, "step": 28113 }, { "epoch": 2.277543745949449, "grad_norm": 0.06556065380573273, "learning_rate": 9.56928754669427e-05, "loss": 0.2922, "step": 28114 }, { "epoch": 2.2776247569669477, "grad_norm": 0.06445152312517166, "learning_rate": 9.568837481434808e-05, "loss": 0.2591, "step": 28115 }, { "epoch": 2.277705767984446, "grad_norm": 0.07554656267166138, "learning_rate": 9.568387416175347e-05, "loss": 0.2867, "step": 28116 }, { "epoch": 2.277786779001944, "grad_norm": 0.06792528927326202, "learning_rate": 9.567937350915883e-05, "loss": 0.2787, "step": 28117 }, { "epoch": 2.2778677900194424, "grad_norm": 0.06126142665743828, "learning_rate": 9.56748728565642e-05, "loss": 0.2588, "step": 28118 }, { "epoch": 2.277948801036941, "grad_norm": 0.06154099851846695, "learning_rate": 9.567037220396959e-05, "loss": 0.2529, "step": 28119 }, { "epoch": 2.2780298120544393, "grad_norm": 0.06487050652503967, "learning_rate": 9.566587155137495e-05, "loss": 0.2491, "step": 28120 }, { "epoch": 2.2781108230719376, "grad_norm": 0.06991656869649887, "learning_rate": 9.566137089878032e-05, "loss": 0.2596, "step": 28121 }, { "epoch": 2.2781918340894363, "grad_norm": 0.07080622762441635, "learning_rate": 9.565687024618571e-05, "loss": 0.2844, "step": 28122 }, { "epoch": 2.2782728451069345, "grad_norm": 0.07612073421478271, "learning_rate": 9.565236959359107e-05, "loss": 0.2624, "step": 28123 }, { "epoch": 2.2783538561244328, "grad_norm": 0.06403160840272903, "learning_rate": 9.564786894099646e-05, "loss": 0.2432, "step": 28124 }, { "epoch": 2.2784348671419314, "grad_norm": 0.06373529881238937, "learning_rate": 9.564336828840183e-05, "loss": 0.2473, "step": 28125 }, { "epoch": 2.2785158781594297, "grad_norm": 0.05918858200311661, "learning_rate": 9.563886763580719e-05, "loss": 0.2741, "step": 28126 }, { "epoch": 2.278596889176928, "grad_norm": 0.07889010012149811, "learning_rate": 9.563436698321258e-05, "loss": 0.2177, "step": 28127 }, { "epoch": 2.2786779001944266, "grad_norm": 0.06980336457490921, "learning_rate": 9.562986633061795e-05, "loss": 0.2733, "step": 28128 }, { "epoch": 2.278758911211925, "grad_norm": 0.08205079287290573, "learning_rate": 9.562536567802331e-05, "loss": 0.2339, "step": 28129 }, { "epoch": 2.278839922229423, "grad_norm": 0.07007057219743729, "learning_rate": 9.56208650254287e-05, "loss": 0.2609, "step": 28130 }, { "epoch": 2.278920933246922, "grad_norm": 0.061237528920173645, "learning_rate": 9.561636437283407e-05, "loss": 0.2355, "step": 28131 }, { "epoch": 2.27900194426442, "grad_norm": 0.05059129372239113, "learning_rate": 9.561186372023943e-05, "loss": 0.2554, "step": 28132 }, { "epoch": 2.2790829552819183, "grad_norm": 0.06367185711860657, "learning_rate": 9.560736306764482e-05, "loss": 0.258, "step": 28133 }, { "epoch": 2.2791639662994165, "grad_norm": 0.061538007110357285, "learning_rate": 9.560286241505019e-05, "loss": 0.2436, "step": 28134 }, { "epoch": 2.279244977316915, "grad_norm": 0.06117791682481766, "learning_rate": 9.559836176245555e-05, "loss": 0.2555, "step": 28135 }, { "epoch": 2.2793259883344135, "grad_norm": 0.06512202322483063, "learning_rate": 9.559386110986094e-05, "loss": 0.2767, "step": 28136 }, { "epoch": 2.2794069993519117, "grad_norm": 0.054416246712207794, "learning_rate": 9.558936045726631e-05, "loss": 0.246, "step": 28137 }, { "epoch": 2.2794880103694104, "grad_norm": 0.07490819692611694, "learning_rate": 9.558485980467167e-05, "loss": 0.2663, "step": 28138 }, { "epoch": 2.2795690213869086, "grad_norm": 0.055716339498758316, "learning_rate": 9.558035915207706e-05, "loss": 0.2518, "step": 28139 }, { "epoch": 2.279650032404407, "grad_norm": 0.07080546766519547, "learning_rate": 9.557585849948243e-05, "loss": 0.2555, "step": 28140 }, { "epoch": 2.279731043421905, "grad_norm": 0.07642080634832382, "learning_rate": 9.557135784688779e-05, "loss": 0.3134, "step": 28141 }, { "epoch": 2.279812054439404, "grad_norm": 0.0733831450343132, "learning_rate": 9.556685719429318e-05, "loss": 0.2263, "step": 28142 }, { "epoch": 2.279893065456902, "grad_norm": 0.08651655912399292, "learning_rate": 9.556235654169855e-05, "loss": 0.2357, "step": 28143 }, { "epoch": 2.2799740764744003, "grad_norm": 0.06302259117364883, "learning_rate": 9.555785588910391e-05, "loss": 0.2157, "step": 28144 }, { "epoch": 2.280055087491899, "grad_norm": 0.06950745731592178, "learning_rate": 9.55533552365093e-05, "loss": 0.2786, "step": 28145 }, { "epoch": 2.2801360985093972, "grad_norm": 0.06577300280332565, "learning_rate": 9.554885458391467e-05, "loss": 0.2228, "step": 28146 }, { "epoch": 2.2802171095268955, "grad_norm": 0.07339700311422348, "learning_rate": 9.554435393132003e-05, "loss": 0.2488, "step": 28147 }, { "epoch": 2.280298120544394, "grad_norm": 0.06289852410554886, "learning_rate": 9.553985327872542e-05, "loss": 0.2438, "step": 28148 }, { "epoch": 2.2803791315618924, "grad_norm": 0.05764570087194443, "learning_rate": 9.55353526261308e-05, "loss": 0.2099, "step": 28149 }, { "epoch": 2.2804601425793907, "grad_norm": 0.04218946769833565, "learning_rate": 9.553085197353616e-05, "loss": 0.2068, "step": 28150 }, { "epoch": 2.2805411535968894, "grad_norm": 0.07371071726083755, "learning_rate": 9.552635132094154e-05, "loss": 0.2473, "step": 28151 }, { "epoch": 2.2806221646143876, "grad_norm": 0.06671245396137238, "learning_rate": 9.552185066834692e-05, "loss": 0.2579, "step": 28152 }, { "epoch": 2.280703175631886, "grad_norm": 0.06467222422361374, "learning_rate": 9.551735001575229e-05, "loss": 0.2633, "step": 28153 }, { "epoch": 2.2807841866493845, "grad_norm": 0.06984186172485352, "learning_rate": 9.551284936315766e-05, "loss": 0.2834, "step": 28154 }, { "epoch": 2.280865197666883, "grad_norm": 0.057516057044267654, "learning_rate": 9.550834871056304e-05, "loss": 0.2485, "step": 28155 }, { "epoch": 2.280946208684381, "grad_norm": 0.0576837882399559, "learning_rate": 9.550384805796841e-05, "loss": 0.2331, "step": 28156 }, { "epoch": 2.2810272197018793, "grad_norm": 0.06400992721319199, "learning_rate": 9.549934740537378e-05, "loss": 0.2409, "step": 28157 }, { "epoch": 2.281108230719378, "grad_norm": 0.07133117318153381, "learning_rate": 9.549484675277916e-05, "loss": 0.2394, "step": 28158 }, { "epoch": 2.281189241736876, "grad_norm": 0.05950386822223663, "learning_rate": 9.549034610018453e-05, "loss": 0.2629, "step": 28159 }, { "epoch": 2.2812702527543745, "grad_norm": 0.06598322093486786, "learning_rate": 9.54858454475899e-05, "loss": 0.2732, "step": 28160 }, { "epoch": 2.281351263771873, "grad_norm": 0.052976034581661224, "learning_rate": 9.548134479499528e-05, "loss": 0.246, "step": 28161 }, { "epoch": 2.2814322747893714, "grad_norm": 0.05692631006240845, "learning_rate": 9.547684414240065e-05, "loss": 0.2429, "step": 28162 }, { "epoch": 2.2815132858068696, "grad_norm": 0.06387508660554886, "learning_rate": 9.547234348980603e-05, "loss": 0.2653, "step": 28163 }, { "epoch": 2.281594296824368, "grad_norm": 0.06577061116695404, "learning_rate": 9.54678428372114e-05, "loss": 0.2142, "step": 28164 }, { "epoch": 2.2816753078418666, "grad_norm": 0.06224307790398598, "learning_rate": 9.546334218461677e-05, "loss": 0.2492, "step": 28165 }, { "epoch": 2.281756318859365, "grad_norm": 0.06197723001241684, "learning_rate": 9.545884153202215e-05, "loss": 0.2428, "step": 28166 }, { "epoch": 2.281837329876863, "grad_norm": 0.061254385858774185, "learning_rate": 9.545434087942752e-05, "loss": 0.2553, "step": 28167 }, { "epoch": 2.2819183408943617, "grad_norm": 0.06933100521564484, "learning_rate": 9.54498402268329e-05, "loss": 0.2643, "step": 28168 }, { "epoch": 2.28199935191186, "grad_norm": 0.0723806843161583, "learning_rate": 9.544533957423827e-05, "loss": 0.2492, "step": 28169 }, { "epoch": 2.2820803629293582, "grad_norm": 0.05900033935904503, "learning_rate": 9.544083892164364e-05, "loss": 0.2738, "step": 28170 }, { "epoch": 2.282161373946857, "grad_norm": 0.07795938849449158, "learning_rate": 9.543633826904901e-05, "loss": 0.2242, "step": 28171 }, { "epoch": 2.282242384964355, "grad_norm": 0.060051240026950836, "learning_rate": 9.543183761645439e-05, "loss": 0.2521, "step": 28172 }, { "epoch": 2.2823233959818534, "grad_norm": 0.061081644147634506, "learning_rate": 9.542733696385976e-05, "loss": 0.2866, "step": 28173 }, { "epoch": 2.282404406999352, "grad_norm": 0.07270535826683044, "learning_rate": 9.542283631126514e-05, "loss": 0.2235, "step": 28174 }, { "epoch": 2.2824854180168503, "grad_norm": 0.07448476552963257, "learning_rate": 9.541833565867051e-05, "loss": 0.2199, "step": 28175 }, { "epoch": 2.2825664290343486, "grad_norm": 0.06277775019407272, "learning_rate": 9.541383500607588e-05, "loss": 0.2548, "step": 28176 }, { "epoch": 2.2826474400518473, "grad_norm": 0.06676080077886581, "learning_rate": 9.540933435348126e-05, "loss": 0.2542, "step": 28177 }, { "epoch": 2.2827284510693455, "grad_norm": 0.05792508274316788, "learning_rate": 9.540483370088663e-05, "loss": 0.2852, "step": 28178 }, { "epoch": 2.2828094620868438, "grad_norm": 0.05737094581127167, "learning_rate": 9.540033304829202e-05, "loss": 0.2581, "step": 28179 }, { "epoch": 2.282890473104342, "grad_norm": 0.06211812049150467, "learning_rate": 9.539583239569738e-05, "loss": 0.2292, "step": 28180 }, { "epoch": 2.2829714841218407, "grad_norm": 0.07637868076562881, "learning_rate": 9.539133174310275e-05, "loss": 0.2691, "step": 28181 }, { "epoch": 2.283052495139339, "grad_norm": 0.06960336118936539, "learning_rate": 9.538683109050814e-05, "loss": 0.2326, "step": 28182 }, { "epoch": 2.283133506156837, "grad_norm": 0.06159220635890961, "learning_rate": 9.53823304379135e-05, "loss": 0.2531, "step": 28183 }, { "epoch": 2.283214517174336, "grad_norm": 0.062390245497226715, "learning_rate": 9.537782978531887e-05, "loss": 0.3095, "step": 28184 }, { "epoch": 2.283295528191834, "grad_norm": 0.06799671053886414, "learning_rate": 9.537332913272426e-05, "loss": 0.2446, "step": 28185 }, { "epoch": 2.2833765392093324, "grad_norm": 0.050637729465961456, "learning_rate": 9.536882848012962e-05, "loss": 0.2581, "step": 28186 }, { "epoch": 2.2834575502268306, "grad_norm": 0.08036299794912338, "learning_rate": 9.536432782753499e-05, "loss": 0.2413, "step": 28187 }, { "epoch": 2.2835385612443293, "grad_norm": 0.06844053417444229, "learning_rate": 9.535982717494038e-05, "loss": 0.2028, "step": 28188 }, { "epoch": 2.2836195722618275, "grad_norm": 0.07212093472480774, "learning_rate": 9.535532652234574e-05, "loss": 0.2397, "step": 28189 }, { "epoch": 2.283700583279326, "grad_norm": 0.0825839415192604, "learning_rate": 9.535082586975111e-05, "loss": 0.2579, "step": 28190 }, { "epoch": 2.2837815942968245, "grad_norm": 0.07898513227701187, "learning_rate": 9.53463252171565e-05, "loss": 0.2711, "step": 28191 }, { "epoch": 2.2838626053143227, "grad_norm": 0.08032089471817017, "learning_rate": 9.534182456456186e-05, "loss": 0.2724, "step": 28192 }, { "epoch": 2.283943616331821, "grad_norm": 0.07727602124214172, "learning_rate": 9.533732391196723e-05, "loss": 0.2241, "step": 28193 }, { "epoch": 2.2840246273493197, "grad_norm": 0.07366763800382614, "learning_rate": 9.533282325937262e-05, "loss": 0.2354, "step": 28194 }, { "epoch": 2.284105638366818, "grad_norm": 0.07446452230215073, "learning_rate": 9.532832260677798e-05, "loss": 0.2805, "step": 28195 }, { "epoch": 2.284186649384316, "grad_norm": 0.06326040625572205, "learning_rate": 9.532382195418335e-05, "loss": 0.2287, "step": 28196 }, { "epoch": 2.284267660401815, "grad_norm": 0.05394669622182846, "learning_rate": 9.531932130158874e-05, "loss": 0.2708, "step": 28197 }, { "epoch": 2.284348671419313, "grad_norm": 0.054610323160886765, "learning_rate": 9.53148206489941e-05, "loss": 0.2596, "step": 28198 }, { "epoch": 2.2844296824368113, "grad_norm": 0.047331418842077255, "learning_rate": 9.531031999639948e-05, "loss": 0.2438, "step": 28199 }, { "epoch": 2.28451069345431, "grad_norm": 0.07602977007627487, "learning_rate": 9.530581934380486e-05, "loss": 0.2742, "step": 28200 }, { "epoch": 2.2845917044718083, "grad_norm": 0.06329728662967682, "learning_rate": 9.530131869121022e-05, "loss": 0.2407, "step": 28201 }, { "epoch": 2.2846727154893065, "grad_norm": 0.05749182775616646, "learning_rate": 9.52968180386156e-05, "loss": 0.2423, "step": 28202 }, { "epoch": 2.2847537265068047, "grad_norm": 0.062329839915037155, "learning_rate": 9.529231738602098e-05, "loss": 0.2623, "step": 28203 }, { "epoch": 2.2848347375243034, "grad_norm": 0.056943412870168686, "learning_rate": 9.528781673342634e-05, "loss": 0.255, "step": 28204 }, { "epoch": 2.2849157485418017, "grad_norm": 0.06783603876829147, "learning_rate": 9.528331608083173e-05, "loss": 0.2757, "step": 28205 }, { "epoch": 2.2849967595593, "grad_norm": 0.0673983246088028, "learning_rate": 9.52788154282371e-05, "loss": 0.2527, "step": 28206 }, { "epoch": 2.2850777705767986, "grad_norm": 0.054460301995277405, "learning_rate": 9.527431477564246e-05, "loss": 0.2595, "step": 28207 }, { "epoch": 2.285158781594297, "grad_norm": 0.05514821410179138, "learning_rate": 9.526981412304785e-05, "loss": 0.2557, "step": 28208 }, { "epoch": 2.285239792611795, "grad_norm": 0.07168824970722198, "learning_rate": 9.526531347045323e-05, "loss": 0.2503, "step": 28209 }, { "epoch": 2.2853208036292934, "grad_norm": 0.07106754928827286, "learning_rate": 9.526081281785859e-05, "loss": 0.266, "step": 28210 }, { "epoch": 2.285401814646792, "grad_norm": 0.06122040003538132, "learning_rate": 9.525631216526397e-05, "loss": 0.285, "step": 28211 }, { "epoch": 2.2854828256642903, "grad_norm": 0.055722106248140335, "learning_rate": 9.525181151266935e-05, "loss": 0.2126, "step": 28212 }, { "epoch": 2.2855638366817885, "grad_norm": 0.0730934888124466, "learning_rate": 9.52473108600747e-05, "loss": 0.2513, "step": 28213 }, { "epoch": 2.285644847699287, "grad_norm": 0.06926199793815613, "learning_rate": 9.52428102074801e-05, "loss": 0.2303, "step": 28214 }, { "epoch": 2.2857258587167855, "grad_norm": 0.057966891676187515, "learning_rate": 9.523830955488547e-05, "loss": 0.2631, "step": 28215 }, { "epoch": 2.2858068697342837, "grad_norm": 0.07610571384429932, "learning_rate": 9.523380890229083e-05, "loss": 0.2419, "step": 28216 }, { "epoch": 2.2858878807517824, "grad_norm": 0.04987300932407379, "learning_rate": 9.522930824969621e-05, "loss": 0.2507, "step": 28217 }, { "epoch": 2.2859688917692806, "grad_norm": 0.06637983024120331, "learning_rate": 9.522480759710159e-05, "loss": 0.2303, "step": 28218 }, { "epoch": 2.286049902786779, "grad_norm": 0.07258900254964828, "learning_rate": 9.522030694450695e-05, "loss": 0.2512, "step": 28219 }, { "epoch": 2.2861309138042776, "grad_norm": 0.06396087259054184, "learning_rate": 9.521580629191233e-05, "loss": 0.2433, "step": 28220 }, { "epoch": 2.286211924821776, "grad_norm": 0.05232375115156174, "learning_rate": 9.521130563931771e-05, "loss": 0.2265, "step": 28221 }, { "epoch": 2.286292935839274, "grad_norm": 0.07666315883398056, "learning_rate": 9.520680498672307e-05, "loss": 0.229, "step": 28222 }, { "epoch": 2.2863739468567728, "grad_norm": 0.06945835053920746, "learning_rate": 9.520230433412846e-05, "loss": 0.2227, "step": 28223 }, { "epoch": 2.286454957874271, "grad_norm": 0.0774323046207428, "learning_rate": 9.519780368153383e-05, "loss": 0.2956, "step": 28224 }, { "epoch": 2.2865359688917692, "grad_norm": 0.06579787284135818, "learning_rate": 9.519330302893919e-05, "loss": 0.2426, "step": 28225 }, { "epoch": 2.2866169799092675, "grad_norm": 0.07013032585382462, "learning_rate": 9.518880237634458e-05, "loss": 0.2767, "step": 28226 }, { "epoch": 2.286697990926766, "grad_norm": 0.07726210355758667, "learning_rate": 9.518430172374995e-05, "loss": 0.2645, "step": 28227 }, { "epoch": 2.2867790019442644, "grad_norm": 0.07686841487884521, "learning_rate": 9.517980107115531e-05, "loss": 0.2483, "step": 28228 }, { "epoch": 2.2868600129617627, "grad_norm": 0.06310471892356873, "learning_rate": 9.51753004185607e-05, "loss": 0.2663, "step": 28229 }, { "epoch": 2.2869410239792614, "grad_norm": 0.05296747758984566, "learning_rate": 9.517079976596607e-05, "loss": 0.2524, "step": 28230 }, { "epoch": 2.2870220349967596, "grad_norm": 0.06394476443529129, "learning_rate": 9.516629911337144e-05, "loss": 0.2327, "step": 28231 }, { "epoch": 2.287103046014258, "grad_norm": 0.08596345782279968, "learning_rate": 9.516179846077682e-05, "loss": 0.229, "step": 28232 }, { "epoch": 2.287184057031756, "grad_norm": 0.06403294205665588, "learning_rate": 9.515729780818219e-05, "loss": 0.2249, "step": 28233 }, { "epoch": 2.287265068049255, "grad_norm": 0.073976069688797, "learning_rate": 9.515279715558757e-05, "loss": 0.2389, "step": 28234 }, { "epoch": 2.287346079066753, "grad_norm": 0.06861808896064758, "learning_rate": 9.514829650299294e-05, "loss": 0.2483, "step": 28235 }, { "epoch": 2.2874270900842513, "grad_norm": 0.06976199895143509, "learning_rate": 9.514379585039831e-05, "loss": 0.2794, "step": 28236 }, { "epoch": 2.28750810110175, "grad_norm": 0.0783432349562645, "learning_rate": 9.513929519780369e-05, "loss": 0.2481, "step": 28237 }, { "epoch": 2.287589112119248, "grad_norm": 0.05985863134264946, "learning_rate": 9.513479454520906e-05, "loss": 0.2146, "step": 28238 }, { "epoch": 2.2876701231367464, "grad_norm": 0.07286644726991653, "learning_rate": 9.513029389261443e-05, "loss": 0.2254, "step": 28239 }, { "epoch": 2.287751134154245, "grad_norm": 0.06654609739780426, "learning_rate": 9.512579324001981e-05, "loss": 0.2668, "step": 28240 }, { "epoch": 2.2878321451717434, "grad_norm": 0.056361664086580276, "learning_rate": 9.512129258742518e-05, "loss": 0.2595, "step": 28241 }, { "epoch": 2.2879131561892416, "grad_norm": 0.07212407886981964, "learning_rate": 9.511679193483055e-05, "loss": 0.2908, "step": 28242 }, { "epoch": 2.2879941672067403, "grad_norm": 0.05984533578157425, "learning_rate": 9.511229128223593e-05, "loss": 0.2865, "step": 28243 }, { "epoch": 2.2880751782242386, "grad_norm": 0.06211211159825325, "learning_rate": 9.51077906296413e-05, "loss": 0.2501, "step": 28244 }, { "epoch": 2.288156189241737, "grad_norm": 0.0646166056394577, "learning_rate": 9.510328997704667e-05, "loss": 0.2579, "step": 28245 }, { "epoch": 2.2882372002592355, "grad_norm": 0.0673956647515297, "learning_rate": 9.509878932445205e-05, "loss": 0.2683, "step": 28246 }, { "epoch": 2.2883182112767337, "grad_norm": 0.05403958633542061, "learning_rate": 9.509428867185742e-05, "loss": 0.2417, "step": 28247 }, { "epoch": 2.288399222294232, "grad_norm": 0.05626343563199043, "learning_rate": 9.50897880192628e-05, "loss": 0.2173, "step": 28248 }, { "epoch": 2.2884802333117302, "grad_norm": 0.06387647986412048, "learning_rate": 9.508528736666817e-05, "loss": 0.245, "step": 28249 }, { "epoch": 2.288561244329229, "grad_norm": 0.06680309772491455, "learning_rate": 9.508078671407354e-05, "loss": 0.2748, "step": 28250 }, { "epoch": 2.288642255346727, "grad_norm": 0.05708785727620125, "learning_rate": 9.507628606147892e-05, "loss": 0.2285, "step": 28251 }, { "epoch": 2.2887232663642254, "grad_norm": 0.06534324586391449, "learning_rate": 9.507178540888429e-05, "loss": 0.2364, "step": 28252 }, { "epoch": 2.288804277381724, "grad_norm": 0.07644810527563095, "learning_rate": 9.506728475628966e-05, "loss": 0.2334, "step": 28253 }, { "epoch": 2.2888852883992223, "grad_norm": 0.07262365520000458, "learning_rate": 9.506278410369504e-05, "loss": 0.2624, "step": 28254 }, { "epoch": 2.2889662994167206, "grad_norm": 0.061442889273166656, "learning_rate": 9.505828345110041e-05, "loss": 0.2362, "step": 28255 }, { "epoch": 2.289047310434219, "grad_norm": 0.07143153995275497, "learning_rate": 9.505378279850578e-05, "loss": 0.2874, "step": 28256 }, { "epoch": 2.2891283214517175, "grad_norm": 0.0650113970041275, "learning_rate": 9.504928214591117e-05, "loss": 0.1863, "step": 28257 }, { "epoch": 2.2892093324692158, "grad_norm": 0.06786942481994629, "learning_rate": 9.504478149331653e-05, "loss": 0.2347, "step": 28258 }, { "epoch": 2.289290343486714, "grad_norm": 0.06598436832427979, "learning_rate": 9.50402808407219e-05, "loss": 0.2598, "step": 28259 }, { "epoch": 2.2893713545042127, "grad_norm": 0.06490588188171387, "learning_rate": 9.503578018812729e-05, "loss": 0.2493, "step": 28260 }, { "epoch": 2.289452365521711, "grad_norm": 0.05049898847937584, "learning_rate": 9.503127953553265e-05, "loss": 0.2423, "step": 28261 }, { "epoch": 2.289533376539209, "grad_norm": 0.05463642627000809, "learning_rate": 9.502677888293803e-05, "loss": 0.2613, "step": 28262 }, { "epoch": 2.289614387556708, "grad_norm": 0.08326191455125809, "learning_rate": 9.502227823034341e-05, "loss": 0.2508, "step": 28263 }, { "epoch": 2.289695398574206, "grad_norm": 0.07564669102430344, "learning_rate": 9.501777757774877e-05, "loss": 0.27, "step": 28264 }, { "epoch": 2.2897764095917044, "grad_norm": 0.06074196472764015, "learning_rate": 9.501327692515415e-05, "loss": 0.2657, "step": 28265 }, { "epoch": 2.289857420609203, "grad_norm": 0.06326223164796829, "learning_rate": 9.500877627255953e-05, "loss": 0.2301, "step": 28266 }, { "epoch": 2.2899384316267013, "grad_norm": 0.05562509596347809, "learning_rate": 9.50042756199649e-05, "loss": 0.2538, "step": 28267 }, { "epoch": 2.2900194426441995, "grad_norm": 0.059947285801172256, "learning_rate": 9.499977496737027e-05, "loss": 0.274, "step": 28268 }, { "epoch": 2.2901004536616982, "grad_norm": 0.051273610442876816, "learning_rate": 9.499527431477565e-05, "loss": 0.2301, "step": 28269 }, { "epoch": 2.2901814646791965, "grad_norm": 0.0706385150551796, "learning_rate": 9.499077366218102e-05, "loss": 0.2673, "step": 28270 }, { "epoch": 2.2902624756966947, "grad_norm": 0.06693287938833237, "learning_rate": 9.498627300958639e-05, "loss": 0.2535, "step": 28271 }, { "epoch": 2.290343486714193, "grad_norm": 0.07116030901670456, "learning_rate": 9.498177235699178e-05, "loss": 0.2951, "step": 28272 }, { "epoch": 2.2904244977316917, "grad_norm": 0.08194348961114883, "learning_rate": 9.497727170439714e-05, "loss": 0.2604, "step": 28273 }, { "epoch": 2.29050550874919, "grad_norm": 0.05736947059631348, "learning_rate": 9.497277105180251e-05, "loss": 0.2223, "step": 28274 }, { "epoch": 2.290586519766688, "grad_norm": 0.0609368234872818, "learning_rate": 9.49682703992079e-05, "loss": 0.2197, "step": 28275 }, { "epoch": 2.290667530784187, "grad_norm": 0.061983298510313034, "learning_rate": 9.496376974661326e-05, "loss": 0.2552, "step": 28276 }, { "epoch": 2.290748541801685, "grad_norm": 0.06865274906158447, "learning_rate": 9.495926909401863e-05, "loss": 0.2611, "step": 28277 }, { "epoch": 2.2908295528191833, "grad_norm": 0.07397310435771942, "learning_rate": 9.495476844142402e-05, "loss": 0.2649, "step": 28278 }, { "epoch": 2.2909105638366816, "grad_norm": 0.06135142594575882, "learning_rate": 9.495026778882938e-05, "loss": 0.2785, "step": 28279 }, { "epoch": 2.2909915748541803, "grad_norm": 0.05850803852081299, "learning_rate": 9.494576713623475e-05, "loss": 0.2482, "step": 28280 }, { "epoch": 2.2910725858716785, "grad_norm": 0.06148507446050644, "learning_rate": 9.494126648364014e-05, "loss": 0.2317, "step": 28281 }, { "epoch": 2.2911535968891767, "grad_norm": 0.06344686448574066, "learning_rate": 9.49367658310455e-05, "loss": 0.239, "step": 28282 }, { "epoch": 2.2912346079066754, "grad_norm": 0.0795753002166748, "learning_rate": 9.493226517845089e-05, "loss": 0.2462, "step": 28283 }, { "epoch": 2.2913156189241737, "grad_norm": 0.061491359025239944, "learning_rate": 9.492776452585626e-05, "loss": 0.228, "step": 28284 }, { "epoch": 2.291396629941672, "grad_norm": 0.07499266415834427, "learning_rate": 9.492326387326162e-05, "loss": 0.2701, "step": 28285 }, { "epoch": 2.2914776409591706, "grad_norm": 0.06818591058254242, "learning_rate": 9.4918763220667e-05, "loss": 0.2532, "step": 28286 }, { "epoch": 2.291558651976669, "grad_norm": 0.08009780943393707, "learning_rate": 9.491426256807238e-05, "loss": 0.3119, "step": 28287 }, { "epoch": 2.291639662994167, "grad_norm": 0.056352753192186356, "learning_rate": 9.490976191547774e-05, "loss": 0.2219, "step": 28288 }, { "epoch": 2.291720674011666, "grad_norm": 0.080259308218956, "learning_rate": 9.490526126288313e-05, "loss": 0.2622, "step": 28289 }, { "epoch": 2.291801685029164, "grad_norm": 0.06937295198440552, "learning_rate": 9.49007606102885e-05, "loss": 0.2491, "step": 28290 }, { "epoch": 2.2918826960466623, "grad_norm": 0.06505081802606583, "learning_rate": 9.489625995769386e-05, "loss": 0.2546, "step": 28291 }, { "epoch": 2.291963707064161, "grad_norm": 0.06334318220615387, "learning_rate": 9.489175930509925e-05, "loss": 0.2564, "step": 28292 }, { "epoch": 2.292044718081659, "grad_norm": 0.0651654452085495, "learning_rate": 9.488725865250462e-05, "loss": 0.2688, "step": 28293 }, { "epoch": 2.2921257290991575, "grad_norm": 0.06208321824669838, "learning_rate": 9.488275799990998e-05, "loss": 0.2233, "step": 28294 }, { "epoch": 2.2922067401166557, "grad_norm": 0.056360118091106415, "learning_rate": 9.487825734731537e-05, "loss": 0.2241, "step": 28295 }, { "epoch": 2.2922877511341544, "grad_norm": 0.07632552832365036, "learning_rate": 9.487375669472074e-05, "loss": 0.2594, "step": 28296 }, { "epoch": 2.2923687621516526, "grad_norm": 0.07288607954978943, "learning_rate": 9.48692560421261e-05, "loss": 0.2737, "step": 28297 }, { "epoch": 2.292449773169151, "grad_norm": 0.0723167434334755, "learning_rate": 9.486475538953149e-05, "loss": 0.2366, "step": 28298 }, { "epoch": 2.2925307841866496, "grad_norm": 0.05326563119888306, "learning_rate": 9.486025473693686e-05, "loss": 0.2355, "step": 28299 }, { "epoch": 2.292611795204148, "grad_norm": 0.06141812726855278, "learning_rate": 9.485575408434222e-05, "loss": 0.2516, "step": 28300 }, { "epoch": 2.292692806221646, "grad_norm": 0.06692104041576385, "learning_rate": 9.485125343174761e-05, "loss": 0.2937, "step": 28301 }, { "epoch": 2.2927738172391443, "grad_norm": 0.06172855570912361, "learning_rate": 9.484675277915298e-05, "loss": 0.2417, "step": 28302 }, { "epoch": 2.292854828256643, "grad_norm": 0.06784968078136444, "learning_rate": 9.484225212655836e-05, "loss": 0.2704, "step": 28303 }, { "epoch": 2.2929358392741412, "grad_norm": 0.07524605095386505, "learning_rate": 9.483775147396373e-05, "loss": 0.2703, "step": 28304 }, { "epoch": 2.2930168502916395, "grad_norm": 0.061262935400009155, "learning_rate": 9.48332508213691e-05, "loss": 0.2608, "step": 28305 }, { "epoch": 2.293097861309138, "grad_norm": 0.07269617915153503, "learning_rate": 9.482875016877448e-05, "loss": 0.24, "step": 28306 }, { "epoch": 2.2931788723266364, "grad_norm": 0.06610900163650513, "learning_rate": 9.482424951617985e-05, "loss": 0.2466, "step": 28307 }, { "epoch": 2.2932598833441347, "grad_norm": 0.08361209183931351, "learning_rate": 9.481974886358523e-05, "loss": 0.2459, "step": 28308 }, { "epoch": 2.2933408943616334, "grad_norm": 0.06436758488416672, "learning_rate": 9.48152482109906e-05, "loss": 0.2156, "step": 28309 }, { "epoch": 2.2934219053791316, "grad_norm": 0.07810865342617035, "learning_rate": 9.481074755839597e-05, "loss": 0.2417, "step": 28310 }, { "epoch": 2.29350291639663, "grad_norm": 0.07908753305673599, "learning_rate": 9.480624690580135e-05, "loss": 0.2408, "step": 28311 }, { "epoch": 2.2935839274141285, "grad_norm": 0.05391676723957062, "learning_rate": 9.480174625320672e-05, "loss": 0.2365, "step": 28312 }, { "epoch": 2.2936649384316268, "grad_norm": 0.0659322664141655, "learning_rate": 9.47972456006121e-05, "loss": 0.2425, "step": 28313 }, { "epoch": 2.293745949449125, "grad_norm": 0.07384074479341507, "learning_rate": 9.479274494801747e-05, "loss": 0.2852, "step": 28314 }, { "epoch": 2.2938269604666237, "grad_norm": 0.07077052444219589, "learning_rate": 9.478824429542284e-05, "loss": 0.2672, "step": 28315 }, { "epoch": 2.293907971484122, "grad_norm": 0.06094752997159958, "learning_rate": 9.478374364282821e-05, "loss": 0.225, "step": 28316 }, { "epoch": 2.29398898250162, "grad_norm": 0.056600797921419144, "learning_rate": 9.477924299023359e-05, "loss": 0.2478, "step": 28317 }, { "epoch": 2.2940699935191184, "grad_norm": 0.0702267587184906, "learning_rate": 9.477474233763896e-05, "loss": 0.2666, "step": 28318 }, { "epoch": 2.294151004536617, "grad_norm": 0.06771890819072723, "learning_rate": 9.477024168504434e-05, "loss": 0.273, "step": 28319 }, { "epoch": 2.2942320155541154, "grad_norm": 0.067947618663311, "learning_rate": 9.476574103244971e-05, "loss": 0.2272, "step": 28320 }, { "epoch": 2.2943130265716136, "grad_norm": 0.0776883214712143, "learning_rate": 9.476124037985508e-05, "loss": 0.3166, "step": 28321 }, { "epoch": 2.2943940375891123, "grad_norm": 0.05880747735500336, "learning_rate": 9.475673972726046e-05, "loss": 0.2402, "step": 28322 }, { "epoch": 2.2944750486066106, "grad_norm": 0.05730128660798073, "learning_rate": 9.475223907466583e-05, "loss": 0.2259, "step": 28323 }, { "epoch": 2.294556059624109, "grad_norm": 0.06512884050607681, "learning_rate": 9.47477384220712e-05, "loss": 0.2672, "step": 28324 }, { "epoch": 2.294637070641607, "grad_norm": 0.06250195950269699, "learning_rate": 9.474323776947658e-05, "loss": 0.2561, "step": 28325 }, { "epoch": 2.2947180816591057, "grad_norm": 0.061930492520332336, "learning_rate": 9.473873711688195e-05, "loss": 0.2613, "step": 28326 }, { "epoch": 2.294799092676604, "grad_norm": 0.06593555212020874, "learning_rate": 9.473423646428732e-05, "loss": 0.2862, "step": 28327 }, { "epoch": 2.2948801036941022, "grad_norm": 0.056815922260284424, "learning_rate": 9.47297358116927e-05, "loss": 0.2174, "step": 28328 }, { "epoch": 2.294961114711601, "grad_norm": 0.07273375242948532, "learning_rate": 9.472523515909807e-05, "loss": 0.2806, "step": 28329 }, { "epoch": 2.295042125729099, "grad_norm": 0.0795697495341301, "learning_rate": 9.472073450650344e-05, "loss": 0.2545, "step": 28330 }, { "epoch": 2.2951231367465974, "grad_norm": 0.05804990231990814, "learning_rate": 9.471623385390882e-05, "loss": 0.2681, "step": 28331 }, { "epoch": 2.295204147764096, "grad_norm": 0.05056268721818924, "learning_rate": 9.471173320131419e-05, "loss": 0.2425, "step": 28332 }, { "epoch": 2.2952851587815943, "grad_norm": 0.06456629186868668, "learning_rate": 9.470723254871957e-05, "loss": 0.2263, "step": 28333 }, { "epoch": 2.2953661697990926, "grad_norm": 0.06838972866535187, "learning_rate": 9.470273189612494e-05, "loss": 0.2526, "step": 28334 }, { "epoch": 2.2954471808165913, "grad_norm": 0.06177784502506256, "learning_rate": 9.469823124353031e-05, "loss": 0.2227, "step": 28335 }, { "epoch": 2.2955281918340895, "grad_norm": 0.08078238368034363, "learning_rate": 9.469373059093569e-05, "loss": 0.2449, "step": 28336 }, { "epoch": 2.2956092028515878, "grad_norm": 0.06638062745332718, "learning_rate": 9.468922993834106e-05, "loss": 0.2641, "step": 28337 }, { "epoch": 2.295690213869086, "grad_norm": 0.08003167808055878, "learning_rate": 9.468472928574645e-05, "loss": 0.2807, "step": 28338 }, { "epoch": 2.2957712248865847, "grad_norm": 0.08642017841339111, "learning_rate": 9.468022863315181e-05, "loss": 0.2712, "step": 28339 }, { "epoch": 2.295852235904083, "grad_norm": 0.059965621680021286, "learning_rate": 9.467572798055718e-05, "loss": 0.2274, "step": 28340 }, { "epoch": 2.295933246921581, "grad_norm": 0.05939400941133499, "learning_rate": 9.467122732796257e-05, "loss": 0.2365, "step": 28341 }, { "epoch": 2.29601425793908, "grad_norm": 0.05748443305492401, "learning_rate": 9.466672667536793e-05, "loss": 0.2375, "step": 28342 }, { "epoch": 2.296095268956578, "grad_norm": 0.07314951717853546, "learning_rate": 9.46622260227733e-05, "loss": 0.2715, "step": 28343 }, { "epoch": 2.2961762799740764, "grad_norm": 0.06428799778223038, "learning_rate": 9.465772537017869e-05, "loss": 0.2464, "step": 28344 }, { "epoch": 2.2962572909915746, "grad_norm": 0.06721749901771545, "learning_rate": 9.465322471758405e-05, "loss": 0.243, "step": 28345 }, { "epoch": 2.2963383020090733, "grad_norm": 0.05531509593129158, "learning_rate": 9.464872406498942e-05, "loss": 0.2744, "step": 28346 }, { "epoch": 2.2964193130265715, "grad_norm": 0.07265246659517288, "learning_rate": 9.464422341239481e-05, "loss": 0.2686, "step": 28347 }, { "epoch": 2.29650032404407, "grad_norm": 0.06863284111022949, "learning_rate": 9.463972275980017e-05, "loss": 0.2606, "step": 28348 }, { "epoch": 2.2965813350615685, "grad_norm": 0.05782085284590721, "learning_rate": 9.463522210720554e-05, "loss": 0.2256, "step": 28349 }, { "epoch": 2.2966623460790667, "grad_norm": 0.05759461969137192, "learning_rate": 9.463072145461093e-05, "loss": 0.2229, "step": 28350 }, { "epoch": 2.296743357096565, "grad_norm": 0.07360438257455826, "learning_rate": 9.462622080201629e-05, "loss": 0.2503, "step": 28351 }, { "epoch": 2.2968243681140637, "grad_norm": 0.06497979909181595, "learning_rate": 9.462172014942166e-05, "loss": 0.2661, "step": 28352 }, { "epoch": 2.296905379131562, "grad_norm": 0.05980612710118294, "learning_rate": 9.461721949682705e-05, "loss": 0.2292, "step": 28353 }, { "epoch": 2.29698639014906, "grad_norm": 0.04446947202086449, "learning_rate": 9.461271884423241e-05, "loss": 0.2002, "step": 28354 }, { "epoch": 2.297067401166559, "grad_norm": 0.06264941394329071, "learning_rate": 9.460821819163778e-05, "loss": 0.2511, "step": 28355 }, { "epoch": 2.297148412184057, "grad_norm": 0.07036852091550827, "learning_rate": 9.460371753904317e-05, "loss": 0.2535, "step": 28356 }, { "epoch": 2.2972294232015553, "grad_norm": 0.060618314892053604, "learning_rate": 9.459921688644853e-05, "loss": 0.2722, "step": 28357 }, { "epoch": 2.297310434219054, "grad_norm": 0.06120682880282402, "learning_rate": 9.45947162338539e-05, "loss": 0.2608, "step": 28358 }, { "epoch": 2.2973914452365523, "grad_norm": 0.05045586824417114, "learning_rate": 9.459021558125929e-05, "loss": 0.2056, "step": 28359 }, { "epoch": 2.2974724562540505, "grad_norm": 0.0593399740755558, "learning_rate": 9.458571492866465e-05, "loss": 0.2436, "step": 28360 }, { "epoch": 2.2975534672715487, "grad_norm": 0.05917546525597572, "learning_rate": 9.458121427607003e-05, "loss": 0.2518, "step": 28361 }, { "epoch": 2.2976344782890474, "grad_norm": 0.07110293209552765, "learning_rate": 9.457671362347541e-05, "loss": 0.2729, "step": 28362 }, { "epoch": 2.2977154893065457, "grad_norm": 0.07229780405759811, "learning_rate": 9.457221297088077e-05, "loss": 0.269, "step": 28363 }, { "epoch": 2.297796500324044, "grad_norm": 0.05696237087249756, "learning_rate": 9.456771231828616e-05, "loss": 0.2289, "step": 28364 }, { "epoch": 2.2978775113415426, "grad_norm": 0.06969776749610901, "learning_rate": 9.456321166569153e-05, "loss": 0.247, "step": 28365 }, { "epoch": 2.297958522359041, "grad_norm": 0.06379079818725586, "learning_rate": 9.45587110130969e-05, "loss": 0.2436, "step": 28366 }, { "epoch": 2.298039533376539, "grad_norm": 0.06603731960058212, "learning_rate": 9.455421036050228e-05, "loss": 0.2661, "step": 28367 }, { "epoch": 2.2981205443940373, "grad_norm": 0.0677296444773674, "learning_rate": 9.454970970790766e-05, "loss": 0.2285, "step": 28368 }, { "epoch": 2.298201555411536, "grad_norm": 0.0610167570412159, "learning_rate": 9.454520905531303e-05, "loss": 0.2561, "step": 28369 }, { "epoch": 2.2982825664290343, "grad_norm": 0.060097601264715195, "learning_rate": 9.45407084027184e-05, "loss": 0.2386, "step": 28370 }, { "epoch": 2.2983635774465325, "grad_norm": 0.09854019433259964, "learning_rate": 9.453620775012378e-05, "loss": 0.259, "step": 28371 }, { "epoch": 2.298444588464031, "grad_norm": 0.052267104387283325, "learning_rate": 9.453170709752915e-05, "loss": 0.2207, "step": 28372 }, { "epoch": 2.2985255994815295, "grad_norm": 0.06957710534334183, "learning_rate": 9.452720644493452e-05, "loss": 0.2621, "step": 28373 }, { "epoch": 2.2986066104990277, "grad_norm": 0.05953522399067879, "learning_rate": 9.45227057923399e-05, "loss": 0.2669, "step": 28374 }, { "epoch": 2.2986876215165264, "grad_norm": 0.06640417128801346, "learning_rate": 9.451820513974527e-05, "loss": 0.2461, "step": 28375 }, { "epoch": 2.2987686325340246, "grad_norm": 0.07142982631921768, "learning_rate": 9.451370448715064e-05, "loss": 0.2503, "step": 28376 }, { "epoch": 2.298849643551523, "grad_norm": 0.07983957976102829, "learning_rate": 9.450920383455602e-05, "loss": 0.2676, "step": 28377 }, { "epoch": 2.2989306545690216, "grad_norm": 0.059071604162454605, "learning_rate": 9.450470318196139e-05, "loss": 0.2624, "step": 28378 }, { "epoch": 2.29901166558652, "grad_norm": 0.06270880997180939, "learning_rate": 9.450020252936676e-05, "loss": 0.2306, "step": 28379 }, { "epoch": 2.299092676604018, "grad_norm": 0.06676393002271652, "learning_rate": 9.449570187677214e-05, "loss": 0.2477, "step": 28380 }, { "epoch": 2.2991736876215167, "grad_norm": 0.07599867135286331, "learning_rate": 9.449120122417751e-05, "loss": 0.2911, "step": 28381 }, { "epoch": 2.299254698639015, "grad_norm": 0.061391886323690414, "learning_rate": 9.448670057158289e-05, "loss": 0.2364, "step": 28382 }, { "epoch": 2.2993357096565132, "grad_norm": 0.05860184505581856, "learning_rate": 9.448219991898826e-05, "loss": 0.2293, "step": 28383 }, { "epoch": 2.2994167206740115, "grad_norm": 0.0665908008813858, "learning_rate": 9.447769926639363e-05, "loss": 0.2416, "step": 28384 }, { "epoch": 2.29949773169151, "grad_norm": 0.06436271965503693, "learning_rate": 9.4473198613799e-05, "loss": 0.2713, "step": 28385 }, { "epoch": 2.2995787427090084, "grad_norm": 0.060904085636138916, "learning_rate": 9.446869796120438e-05, "loss": 0.2974, "step": 28386 }, { "epoch": 2.2996597537265067, "grad_norm": 0.05490853264927864, "learning_rate": 9.446419730860975e-05, "loss": 0.2115, "step": 28387 }, { "epoch": 2.2997407647440054, "grad_norm": 0.0666164755821228, "learning_rate": 9.445969665601513e-05, "loss": 0.2551, "step": 28388 }, { "epoch": 2.2998217757615036, "grad_norm": 0.04948906973004341, "learning_rate": 9.44551960034205e-05, "loss": 0.2575, "step": 28389 }, { "epoch": 2.299902786779002, "grad_norm": 0.06865771114826202, "learning_rate": 9.445069535082587e-05, "loss": 0.2477, "step": 28390 }, { "epoch": 2.2999837977965, "grad_norm": 0.0601445734500885, "learning_rate": 9.444619469823125e-05, "loss": 0.2679, "step": 28391 }, { "epoch": 2.3000648088139988, "grad_norm": 0.05781553313136101, "learning_rate": 9.444169404563662e-05, "loss": 0.2276, "step": 28392 }, { "epoch": 2.300145819831497, "grad_norm": 0.0677238255739212, "learning_rate": 9.4437193393042e-05, "loss": 0.2261, "step": 28393 }, { "epoch": 2.3002268308489953, "grad_norm": 0.0595778189599514, "learning_rate": 9.443269274044737e-05, "loss": 0.2752, "step": 28394 }, { "epoch": 2.300307841866494, "grad_norm": 0.0635710060596466, "learning_rate": 9.442819208785274e-05, "loss": 0.2567, "step": 28395 }, { "epoch": 2.300388852883992, "grad_norm": 0.06570877879858017, "learning_rate": 9.442369143525812e-05, "loss": 0.2933, "step": 28396 }, { "epoch": 2.3004698639014904, "grad_norm": 0.06711771339178085, "learning_rate": 9.441919078266349e-05, "loss": 0.2205, "step": 28397 }, { "epoch": 2.300550874918989, "grad_norm": 0.07368925213813782, "learning_rate": 9.441469013006886e-05, "loss": 0.2361, "step": 28398 }, { "epoch": 2.3006318859364874, "grad_norm": 0.06781308352947235, "learning_rate": 9.441018947747424e-05, "loss": 0.261, "step": 28399 }, { "epoch": 2.3007128969539856, "grad_norm": 0.06204356253147125, "learning_rate": 9.440568882487961e-05, "loss": 0.2319, "step": 28400 }, { "epoch": 2.3007939079714843, "grad_norm": 0.061793096363544464, "learning_rate": 9.440118817228498e-05, "loss": 0.2038, "step": 28401 }, { "epoch": 2.3008749189889826, "grad_norm": 0.05806412175297737, "learning_rate": 9.439668751969036e-05, "loss": 0.2435, "step": 28402 }, { "epoch": 2.300955930006481, "grad_norm": 0.06467214971780777, "learning_rate": 9.439218686709573e-05, "loss": 0.2193, "step": 28403 }, { "epoch": 2.3010369410239795, "grad_norm": 0.05494023486971855, "learning_rate": 9.43876862145011e-05, "loss": 0.2562, "step": 28404 }, { "epoch": 2.3011179520414777, "grad_norm": 0.06859727948904037, "learning_rate": 9.438318556190648e-05, "loss": 0.2541, "step": 28405 }, { "epoch": 2.301198963058976, "grad_norm": 0.07257232815027237, "learning_rate": 9.437868490931185e-05, "loss": 0.2783, "step": 28406 }, { "epoch": 2.301279974076474, "grad_norm": 0.07005695253610611, "learning_rate": 9.437418425671723e-05, "loss": 0.2998, "step": 28407 }, { "epoch": 2.301360985093973, "grad_norm": 0.06895466893911362, "learning_rate": 9.43696836041226e-05, "loss": 0.2509, "step": 28408 }, { "epoch": 2.301441996111471, "grad_norm": 0.07160642743110657, "learning_rate": 9.436518295152797e-05, "loss": 0.2389, "step": 28409 }, { "epoch": 2.3015230071289694, "grad_norm": 0.06630747020244598, "learning_rate": 9.436068229893335e-05, "loss": 0.2737, "step": 28410 }, { "epoch": 2.301604018146468, "grad_norm": 0.06067004054784775, "learning_rate": 9.435618164633872e-05, "loss": 0.2827, "step": 28411 }, { "epoch": 2.3016850291639663, "grad_norm": 0.06799201667308807, "learning_rate": 9.43516809937441e-05, "loss": 0.21, "step": 28412 }, { "epoch": 2.3017660401814646, "grad_norm": 0.06540130823850632, "learning_rate": 9.434718034114947e-05, "loss": 0.2194, "step": 28413 }, { "epoch": 2.301847051198963, "grad_norm": 0.06734874099493027, "learning_rate": 9.434267968855484e-05, "loss": 0.2604, "step": 28414 }, { "epoch": 2.3019280622164615, "grad_norm": 0.05423181131482124, "learning_rate": 9.433817903596021e-05, "loss": 0.2558, "step": 28415 }, { "epoch": 2.3020090732339598, "grad_norm": 0.06706136465072632, "learning_rate": 9.43336783833656e-05, "loss": 0.2677, "step": 28416 }, { "epoch": 2.302090084251458, "grad_norm": 0.07376489043235779, "learning_rate": 9.432917773077096e-05, "loss": 0.2563, "step": 28417 }, { "epoch": 2.3021710952689567, "grad_norm": 0.0608864426612854, "learning_rate": 9.432467707817634e-05, "loss": 0.2718, "step": 28418 }, { "epoch": 2.302252106286455, "grad_norm": 0.07492481172084808, "learning_rate": 9.432017642558172e-05, "loss": 0.2959, "step": 28419 }, { "epoch": 2.302333117303953, "grad_norm": 0.05481376126408577, "learning_rate": 9.431567577298708e-05, "loss": 0.2881, "step": 28420 }, { "epoch": 2.302414128321452, "grad_norm": 0.05414591357111931, "learning_rate": 9.431117512039246e-05, "loss": 0.2368, "step": 28421 }, { "epoch": 2.30249513933895, "grad_norm": 0.06244802847504616, "learning_rate": 9.430667446779784e-05, "loss": 0.2707, "step": 28422 }, { "epoch": 2.3025761503564484, "grad_norm": 0.07095088809728622, "learning_rate": 9.43021738152032e-05, "loss": 0.262, "step": 28423 }, { "epoch": 2.302657161373947, "grad_norm": 0.07430743426084518, "learning_rate": 9.429767316260858e-05, "loss": 0.273, "step": 28424 }, { "epoch": 2.3027381723914453, "grad_norm": 0.07228193432092667, "learning_rate": 9.429317251001396e-05, "loss": 0.2152, "step": 28425 }, { "epoch": 2.3028191834089435, "grad_norm": 0.06242399662733078, "learning_rate": 9.428867185741932e-05, "loss": 0.2448, "step": 28426 }, { "epoch": 2.3029001944264422, "grad_norm": 0.0664132609963417, "learning_rate": 9.42841712048247e-05, "loss": 0.2344, "step": 28427 }, { "epoch": 2.3029812054439405, "grad_norm": 0.07687770575284958, "learning_rate": 9.427967055223008e-05, "loss": 0.2753, "step": 28428 }, { "epoch": 2.3030622164614387, "grad_norm": 0.08215631544589996, "learning_rate": 9.427516989963545e-05, "loss": 0.2519, "step": 28429 }, { "epoch": 2.303143227478937, "grad_norm": 0.07950203120708466, "learning_rate": 9.427066924704082e-05, "loss": 0.2495, "step": 28430 }, { "epoch": 2.3032242384964356, "grad_norm": 0.06067657843232155, "learning_rate": 9.42661685944462e-05, "loss": 0.2453, "step": 28431 }, { "epoch": 2.303305249513934, "grad_norm": 0.06874459236860275, "learning_rate": 9.426166794185157e-05, "loss": 0.2695, "step": 28432 }, { "epoch": 2.303386260531432, "grad_norm": 0.06850822269916534, "learning_rate": 9.425716728925694e-05, "loss": 0.2321, "step": 28433 }, { "epoch": 2.303467271548931, "grad_norm": 0.08743181824684143, "learning_rate": 9.425266663666233e-05, "loss": 0.265, "step": 28434 }, { "epoch": 2.303548282566429, "grad_norm": 0.055900510400533676, "learning_rate": 9.424816598406769e-05, "loss": 0.2272, "step": 28435 }, { "epoch": 2.3036292935839273, "grad_norm": 0.07269109040498734, "learning_rate": 9.424366533147306e-05, "loss": 0.284, "step": 28436 }, { "epoch": 2.3037103046014256, "grad_norm": 0.06864180415868759, "learning_rate": 9.423916467887845e-05, "loss": 0.2528, "step": 28437 }, { "epoch": 2.3037913156189243, "grad_norm": 0.06924668699502945, "learning_rate": 9.423466402628382e-05, "loss": 0.2543, "step": 28438 }, { "epoch": 2.3038723266364225, "grad_norm": 0.07466299086809158, "learning_rate": 9.423016337368918e-05, "loss": 0.2687, "step": 28439 }, { "epoch": 2.3039533376539207, "grad_norm": 0.07248183339834213, "learning_rate": 9.422566272109457e-05, "loss": 0.267, "step": 28440 }, { "epoch": 2.3040343486714194, "grad_norm": 0.06602407246828079, "learning_rate": 9.422116206849994e-05, "loss": 0.2441, "step": 28441 }, { "epoch": 2.3041153596889177, "grad_norm": 0.07264547795057297, "learning_rate": 9.421666141590532e-05, "loss": 0.2265, "step": 28442 }, { "epoch": 2.304196370706416, "grad_norm": 0.06024101749062538, "learning_rate": 9.421216076331069e-05, "loss": 0.2226, "step": 28443 }, { "epoch": 2.3042773817239146, "grad_norm": 0.06888773292303085, "learning_rate": 9.420766011071606e-05, "loss": 0.2748, "step": 28444 }, { "epoch": 2.304358392741413, "grad_norm": 0.06901773065328598, "learning_rate": 9.420315945812144e-05, "loss": 0.2888, "step": 28445 }, { "epoch": 2.304439403758911, "grad_norm": 0.07405581325292587, "learning_rate": 9.419865880552681e-05, "loss": 0.2954, "step": 28446 }, { "epoch": 2.30452041477641, "grad_norm": 0.053265344351530075, "learning_rate": 9.419415815293218e-05, "loss": 0.2623, "step": 28447 }, { "epoch": 2.304601425793908, "grad_norm": 0.05269251763820648, "learning_rate": 9.418965750033756e-05, "loss": 0.2577, "step": 28448 }, { "epoch": 2.3046824368114063, "grad_norm": 0.06693107634782791, "learning_rate": 9.418515684774293e-05, "loss": 0.2662, "step": 28449 }, { "epoch": 2.304763447828905, "grad_norm": 0.06313629448413849, "learning_rate": 9.41806561951483e-05, "loss": 0.226, "step": 28450 }, { "epoch": 2.304844458846403, "grad_norm": 0.0705796480178833, "learning_rate": 9.417615554255368e-05, "loss": 0.2718, "step": 28451 }, { "epoch": 2.3049254698639015, "grad_norm": 0.05864161252975464, "learning_rate": 9.417165488995905e-05, "loss": 0.2517, "step": 28452 }, { "epoch": 2.3050064808813997, "grad_norm": 0.060567956417798996, "learning_rate": 9.416715423736443e-05, "loss": 0.2667, "step": 28453 }, { "epoch": 2.3050874918988984, "grad_norm": 0.0734984502196312, "learning_rate": 9.41626535847698e-05, "loss": 0.2583, "step": 28454 }, { "epoch": 2.3051685029163966, "grad_norm": 0.06271208077669144, "learning_rate": 9.415815293217517e-05, "loss": 0.241, "step": 28455 }, { "epoch": 2.305249513933895, "grad_norm": 0.07750144600868225, "learning_rate": 9.415365227958055e-05, "loss": 0.273, "step": 28456 }, { "epoch": 2.3053305249513936, "grad_norm": 0.057265881448984146, "learning_rate": 9.414915162698592e-05, "loss": 0.233, "step": 28457 }, { "epoch": 2.305411535968892, "grad_norm": 0.0688074454665184, "learning_rate": 9.414465097439129e-05, "loss": 0.2528, "step": 28458 }, { "epoch": 2.30549254698639, "grad_norm": 0.08131054043769836, "learning_rate": 9.414015032179667e-05, "loss": 0.2691, "step": 28459 }, { "epoch": 2.3055735580038883, "grad_norm": 0.056380532681941986, "learning_rate": 9.413564966920204e-05, "loss": 0.2488, "step": 28460 }, { "epoch": 2.305654569021387, "grad_norm": 0.06920839846134186, "learning_rate": 9.413114901660741e-05, "loss": 0.2732, "step": 28461 }, { "epoch": 2.3057355800388852, "grad_norm": 0.06537441164255142, "learning_rate": 9.412664836401279e-05, "loss": 0.2125, "step": 28462 }, { "epoch": 2.3058165910563835, "grad_norm": 0.0647144690155983, "learning_rate": 9.412214771141816e-05, "loss": 0.2727, "step": 28463 }, { "epoch": 2.305897602073882, "grad_norm": 0.05751211941242218, "learning_rate": 9.411764705882353e-05, "loss": 0.2527, "step": 28464 }, { "epoch": 2.3059786130913804, "grad_norm": 0.058670178055763245, "learning_rate": 9.411314640622891e-05, "loss": 0.2362, "step": 28465 }, { "epoch": 2.3060596241088787, "grad_norm": 0.0812852680683136, "learning_rate": 9.410864575363428e-05, "loss": 0.2728, "step": 28466 }, { "epoch": 2.3061406351263773, "grad_norm": 0.06273907423019409, "learning_rate": 9.410414510103966e-05, "loss": 0.276, "step": 28467 }, { "epoch": 2.3062216461438756, "grad_norm": 0.06311316788196564, "learning_rate": 9.409964444844503e-05, "loss": 0.2381, "step": 28468 }, { "epoch": 2.306302657161374, "grad_norm": 0.08670295029878616, "learning_rate": 9.40951437958504e-05, "loss": 0.2965, "step": 28469 }, { "epoch": 2.3063836681788725, "grad_norm": 0.06879959255456924, "learning_rate": 9.409064314325578e-05, "loss": 0.2687, "step": 28470 }, { "epoch": 2.3064646791963708, "grad_norm": 0.0669282004237175, "learning_rate": 9.408614249066115e-05, "loss": 0.2588, "step": 28471 }, { "epoch": 2.306545690213869, "grad_norm": 0.06189112365245819, "learning_rate": 9.408164183806652e-05, "loss": 0.2502, "step": 28472 }, { "epoch": 2.3066267012313677, "grad_norm": 0.07936570793390274, "learning_rate": 9.40771411854719e-05, "loss": 0.2687, "step": 28473 }, { "epoch": 2.306707712248866, "grad_norm": 0.08174500614404678, "learning_rate": 9.407264053287727e-05, "loss": 0.2332, "step": 28474 }, { "epoch": 2.306788723266364, "grad_norm": 0.06000255048274994, "learning_rate": 9.406813988028264e-05, "loss": 0.2489, "step": 28475 }, { "epoch": 2.3068697342838624, "grad_norm": 0.08085722476243973, "learning_rate": 9.406363922768802e-05, "loss": 0.2647, "step": 28476 }, { "epoch": 2.306950745301361, "grad_norm": 0.06643027812242508, "learning_rate": 9.405913857509339e-05, "loss": 0.227, "step": 28477 }, { "epoch": 2.3070317563188594, "grad_norm": 0.06923480331897736, "learning_rate": 9.405463792249877e-05, "loss": 0.2638, "step": 28478 }, { "epoch": 2.3071127673363576, "grad_norm": 0.058105580508708954, "learning_rate": 9.405013726990414e-05, "loss": 0.2462, "step": 28479 }, { "epoch": 2.3071937783538563, "grad_norm": 0.06581859290599823, "learning_rate": 9.404563661730951e-05, "loss": 0.2355, "step": 28480 }, { "epoch": 2.3072747893713546, "grad_norm": 0.0653432309627533, "learning_rate": 9.404113596471489e-05, "loss": 0.2852, "step": 28481 }, { "epoch": 2.307355800388853, "grad_norm": 0.06128329411149025, "learning_rate": 9.403663531212026e-05, "loss": 0.2594, "step": 28482 }, { "epoch": 2.307436811406351, "grad_norm": 0.06882143020629883, "learning_rate": 9.403213465952563e-05, "loss": 0.2442, "step": 28483 }, { "epoch": 2.3075178224238497, "grad_norm": 0.060445886105298996, "learning_rate": 9.402763400693101e-05, "loss": 0.2295, "step": 28484 }, { "epoch": 2.307598833441348, "grad_norm": 0.10045522451400757, "learning_rate": 9.402313335433638e-05, "loss": 0.2667, "step": 28485 }, { "epoch": 2.307679844458846, "grad_norm": 0.06006335839629173, "learning_rate": 9.401863270174175e-05, "loss": 0.2093, "step": 28486 }, { "epoch": 2.307760855476345, "grad_norm": 0.06454713642597198, "learning_rate": 9.401413204914713e-05, "loss": 0.2232, "step": 28487 }, { "epoch": 2.307841866493843, "grad_norm": 0.061691079288721085, "learning_rate": 9.40096313965525e-05, "loss": 0.288, "step": 28488 }, { "epoch": 2.3079228775113414, "grad_norm": 0.05934334546327591, "learning_rate": 9.400513074395787e-05, "loss": 0.2301, "step": 28489 }, { "epoch": 2.30800388852884, "grad_norm": 0.07355938851833344, "learning_rate": 9.400063009136325e-05, "loss": 0.2563, "step": 28490 }, { "epoch": 2.3080848995463383, "grad_norm": 0.051536675542593, "learning_rate": 9.399612943876862e-05, "loss": 0.2287, "step": 28491 }, { "epoch": 2.3081659105638366, "grad_norm": 0.06216194108128548, "learning_rate": 9.3991628786174e-05, "loss": 0.2532, "step": 28492 }, { "epoch": 2.3082469215813353, "grad_norm": 0.07674593478441238, "learning_rate": 9.398712813357937e-05, "loss": 0.2364, "step": 28493 }, { "epoch": 2.3083279325988335, "grad_norm": 0.0691610649228096, "learning_rate": 9.398262748098474e-05, "loss": 0.2353, "step": 28494 }, { "epoch": 2.3084089436163318, "grad_norm": 0.05119806528091431, "learning_rate": 9.397812682839012e-05, "loss": 0.2663, "step": 28495 }, { "epoch": 2.3084899546338304, "grad_norm": 0.061490509659051895, "learning_rate": 9.397362617579549e-05, "loss": 0.2408, "step": 28496 }, { "epoch": 2.3085709656513287, "grad_norm": 0.06719590723514557, "learning_rate": 9.396912552320088e-05, "loss": 0.2429, "step": 28497 }, { "epoch": 2.308651976668827, "grad_norm": 0.0660691112279892, "learning_rate": 9.396462487060624e-05, "loss": 0.2449, "step": 28498 }, { "epoch": 2.308732987686325, "grad_norm": 0.08293015509843826, "learning_rate": 9.396012421801161e-05, "loss": 0.2525, "step": 28499 }, { "epoch": 2.308813998703824, "grad_norm": 0.07510707527399063, "learning_rate": 9.3955623565417e-05, "loss": 0.2675, "step": 28500 }, { "epoch": 2.308895009721322, "grad_norm": 0.06140323728322983, "learning_rate": 9.395112291282236e-05, "loss": 0.2454, "step": 28501 }, { "epoch": 2.3089760207388204, "grad_norm": 0.07450032234191895, "learning_rate": 9.394662226022773e-05, "loss": 0.2273, "step": 28502 }, { "epoch": 2.309057031756319, "grad_norm": 0.08496350795030594, "learning_rate": 9.394212160763312e-05, "loss": 0.238, "step": 28503 }, { "epoch": 2.3091380427738173, "grad_norm": 0.052981045097112656, "learning_rate": 9.393762095503848e-05, "loss": 0.1997, "step": 28504 }, { "epoch": 2.3092190537913155, "grad_norm": 0.05658174306154251, "learning_rate": 9.393312030244385e-05, "loss": 0.2151, "step": 28505 }, { "epoch": 2.309300064808814, "grad_norm": 0.0606054812669754, "learning_rate": 9.392861964984924e-05, "loss": 0.2466, "step": 28506 }, { "epoch": 2.3093810758263125, "grad_norm": 0.054300349205732346, "learning_rate": 9.392411899725461e-05, "loss": 0.2442, "step": 28507 }, { "epoch": 2.3094620868438107, "grad_norm": 0.06346888840198517, "learning_rate": 9.391961834465997e-05, "loss": 0.2301, "step": 28508 }, { "epoch": 2.309543097861309, "grad_norm": 0.060989703983068466, "learning_rate": 9.391511769206536e-05, "loss": 0.257, "step": 28509 }, { "epoch": 2.3096241088788076, "grad_norm": 0.05664192885160446, "learning_rate": 9.391061703947073e-05, "loss": 0.25, "step": 28510 }, { "epoch": 2.309705119896306, "grad_norm": 0.06328573077917099, "learning_rate": 9.39061163868761e-05, "loss": 0.2381, "step": 28511 }, { "epoch": 2.309786130913804, "grad_norm": 0.052768316119909286, "learning_rate": 9.390161573428148e-05, "loss": 0.2359, "step": 28512 }, { "epoch": 2.309867141931303, "grad_norm": 0.06358731538057327, "learning_rate": 9.389711508168685e-05, "loss": 0.2454, "step": 28513 }, { "epoch": 2.309948152948801, "grad_norm": 0.056090958416461945, "learning_rate": 9.389261442909221e-05, "loss": 0.244, "step": 28514 }, { "epoch": 2.3100291639662993, "grad_norm": 0.07474549859762192, "learning_rate": 9.38881137764976e-05, "loss": 0.2323, "step": 28515 }, { "epoch": 2.310110174983798, "grad_norm": 0.058619070798158646, "learning_rate": 9.388361312390298e-05, "loss": 0.2427, "step": 28516 }, { "epoch": 2.3101911860012962, "grad_norm": 0.0642256885766983, "learning_rate": 9.387911247130834e-05, "loss": 0.2488, "step": 28517 }, { "epoch": 2.3102721970187945, "grad_norm": 0.07105040550231934, "learning_rate": 9.387461181871372e-05, "loss": 0.2636, "step": 28518 }, { "epoch": 2.310353208036293, "grad_norm": 0.07639192044734955, "learning_rate": 9.38701111661191e-05, "loss": 0.2961, "step": 28519 }, { "epoch": 2.3104342190537914, "grad_norm": 0.06323409825563431, "learning_rate": 9.386561051352446e-05, "loss": 0.2579, "step": 28520 }, { "epoch": 2.3105152300712897, "grad_norm": 0.07321860641241074, "learning_rate": 9.386110986092984e-05, "loss": 0.2771, "step": 28521 }, { "epoch": 2.310596241088788, "grad_norm": 0.0523768812417984, "learning_rate": 9.385660920833522e-05, "loss": 0.2457, "step": 28522 }, { "epoch": 2.3106772521062866, "grad_norm": 0.05895464867353439, "learning_rate": 9.385210855574059e-05, "loss": 0.2525, "step": 28523 }, { "epoch": 2.310758263123785, "grad_norm": 0.06829524785280228, "learning_rate": 9.384760790314596e-05, "loss": 0.2227, "step": 28524 }, { "epoch": 2.310839274141283, "grad_norm": 0.06126768887042999, "learning_rate": 9.384310725055134e-05, "loss": 0.2504, "step": 28525 }, { "epoch": 2.310920285158782, "grad_norm": 0.06391113251447678, "learning_rate": 9.383860659795671e-05, "loss": 0.2241, "step": 28526 }, { "epoch": 2.31100129617628, "grad_norm": 0.07911943644285202, "learning_rate": 9.383410594536209e-05, "loss": 0.2581, "step": 28527 }, { "epoch": 2.3110823071937783, "grad_norm": 0.05488322302699089, "learning_rate": 9.382960529276746e-05, "loss": 0.2314, "step": 28528 }, { "epoch": 2.3111633182112765, "grad_norm": 0.050786539912223816, "learning_rate": 9.382510464017283e-05, "loss": 0.2464, "step": 28529 }, { "epoch": 2.311244329228775, "grad_norm": 0.07747553288936615, "learning_rate": 9.38206039875782e-05, "loss": 0.255, "step": 28530 }, { "epoch": 2.3113253402462735, "grad_norm": 0.06674326956272125, "learning_rate": 9.381610333498358e-05, "loss": 0.2351, "step": 28531 }, { "epoch": 2.3114063512637717, "grad_norm": 0.07949559390544891, "learning_rate": 9.381160268238895e-05, "loss": 0.2757, "step": 28532 }, { "epoch": 2.3114873622812704, "grad_norm": 0.06439467519521713, "learning_rate": 9.380710202979433e-05, "loss": 0.2233, "step": 28533 }, { "epoch": 2.3115683732987686, "grad_norm": 0.06354320049285889, "learning_rate": 9.38026013771997e-05, "loss": 0.2357, "step": 28534 }, { "epoch": 2.311649384316267, "grad_norm": 0.06903377175331116, "learning_rate": 9.379810072460507e-05, "loss": 0.2546, "step": 28535 }, { "epoch": 2.3117303953337656, "grad_norm": 0.05698674917221069, "learning_rate": 9.379360007201045e-05, "loss": 0.2392, "step": 28536 }, { "epoch": 2.311811406351264, "grad_norm": 0.07843542098999023, "learning_rate": 9.378909941941582e-05, "loss": 0.3068, "step": 28537 }, { "epoch": 2.311892417368762, "grad_norm": 0.06648211926221848, "learning_rate": 9.37845987668212e-05, "loss": 0.2375, "step": 28538 }, { "epoch": 2.3119734283862607, "grad_norm": 0.05423009395599365, "learning_rate": 9.378009811422657e-05, "loss": 0.2255, "step": 28539 }, { "epoch": 2.312054439403759, "grad_norm": 0.054902784526348114, "learning_rate": 9.377559746163194e-05, "loss": 0.2453, "step": 28540 }, { "epoch": 2.3121354504212572, "grad_norm": 0.060616981238126755, "learning_rate": 9.377109680903732e-05, "loss": 0.2776, "step": 28541 }, { "epoch": 2.3122164614387555, "grad_norm": 0.06343099474906921, "learning_rate": 9.376659615644269e-05, "loss": 0.2778, "step": 28542 }, { "epoch": 2.312297472456254, "grad_norm": 0.06430594623088837, "learning_rate": 9.376209550384806e-05, "loss": 0.2446, "step": 28543 }, { "epoch": 2.3123784834737524, "grad_norm": 0.07691385596990585, "learning_rate": 9.375759485125344e-05, "loss": 0.254, "step": 28544 }, { "epoch": 2.3124594944912507, "grad_norm": 0.06930742412805557, "learning_rate": 9.375309419865881e-05, "loss": 0.2614, "step": 28545 }, { "epoch": 2.3125405055087493, "grad_norm": 0.07686679810285568, "learning_rate": 9.374859354606418e-05, "loss": 0.2806, "step": 28546 }, { "epoch": 2.3126215165262476, "grad_norm": 0.05961774289608002, "learning_rate": 9.374409289346956e-05, "loss": 0.2176, "step": 28547 }, { "epoch": 2.312702527543746, "grad_norm": 0.05253405496478081, "learning_rate": 9.373959224087493e-05, "loss": 0.2275, "step": 28548 }, { "epoch": 2.3127835385612445, "grad_norm": 0.055466219782829285, "learning_rate": 9.37350915882803e-05, "loss": 0.3152, "step": 28549 }, { "epoch": 2.3128645495787428, "grad_norm": 0.06095070019364357, "learning_rate": 9.373059093568568e-05, "loss": 0.3214, "step": 28550 }, { "epoch": 2.312945560596241, "grad_norm": 0.06326496601104736, "learning_rate": 9.372609028309105e-05, "loss": 0.2667, "step": 28551 }, { "epoch": 2.3130265716137393, "grad_norm": 0.06290464848279953, "learning_rate": 9.372158963049643e-05, "loss": 0.2538, "step": 28552 }, { "epoch": 2.313107582631238, "grad_norm": 0.061679188162088394, "learning_rate": 9.37170889779018e-05, "loss": 0.2405, "step": 28553 }, { "epoch": 2.313188593648736, "grad_norm": 0.07740036398172379, "learning_rate": 9.371258832530717e-05, "loss": 0.2171, "step": 28554 }, { "epoch": 2.3132696046662344, "grad_norm": 0.06478272378444672, "learning_rate": 9.370808767271255e-05, "loss": 0.2422, "step": 28555 }, { "epoch": 2.313350615683733, "grad_norm": 0.0558452382683754, "learning_rate": 9.370358702011792e-05, "loss": 0.2632, "step": 28556 }, { "epoch": 2.3134316267012314, "grad_norm": 0.05222080647945404, "learning_rate": 9.36990863675233e-05, "loss": 0.2371, "step": 28557 }, { "epoch": 2.3135126377187296, "grad_norm": 0.06738220900297165, "learning_rate": 9.369458571492867e-05, "loss": 0.2556, "step": 28558 }, { "epoch": 2.3135936487362283, "grad_norm": 0.0675593838095665, "learning_rate": 9.369008506233404e-05, "loss": 0.2543, "step": 28559 }, { "epoch": 2.3136746597537265, "grad_norm": 0.06700984388589859, "learning_rate": 9.368558440973941e-05, "loss": 0.217, "step": 28560 }, { "epoch": 2.313755670771225, "grad_norm": 0.07644146680831909, "learning_rate": 9.368108375714479e-05, "loss": 0.2359, "step": 28561 }, { "epoch": 2.3138366817887235, "grad_norm": 0.055144716054201126, "learning_rate": 9.367658310455016e-05, "loss": 0.2421, "step": 28562 }, { "epoch": 2.3139176928062217, "grad_norm": 0.06547948718070984, "learning_rate": 9.367208245195554e-05, "loss": 0.218, "step": 28563 }, { "epoch": 2.31399870382372, "grad_norm": 0.06000036746263504, "learning_rate": 9.366758179936091e-05, "loss": 0.2309, "step": 28564 }, { "epoch": 2.314079714841218, "grad_norm": 0.04735422134399414, "learning_rate": 9.366308114676628e-05, "loss": 0.2643, "step": 28565 }, { "epoch": 2.314160725858717, "grad_norm": 0.07013414055109024, "learning_rate": 9.365858049417166e-05, "loss": 0.2308, "step": 28566 }, { "epoch": 2.314241736876215, "grad_norm": 0.05629046633839607, "learning_rate": 9.365407984157703e-05, "loss": 0.2114, "step": 28567 }, { "epoch": 2.3143227478937134, "grad_norm": 0.06310821324586868, "learning_rate": 9.36495791889824e-05, "loss": 0.2241, "step": 28568 }, { "epoch": 2.314403758911212, "grad_norm": 0.06148114427924156, "learning_rate": 9.364507853638778e-05, "loss": 0.2802, "step": 28569 }, { "epoch": 2.3144847699287103, "grad_norm": 0.07152937352657318, "learning_rate": 9.364057788379315e-05, "loss": 0.2365, "step": 28570 }, { "epoch": 2.3145657809462086, "grad_norm": 0.07520348578691483, "learning_rate": 9.363607723119852e-05, "loss": 0.2384, "step": 28571 }, { "epoch": 2.314646791963707, "grad_norm": 0.07106326520442963, "learning_rate": 9.36315765786039e-05, "loss": 0.2678, "step": 28572 }, { "epoch": 2.3147278029812055, "grad_norm": 0.08119279891252518, "learning_rate": 9.362707592600927e-05, "loss": 0.2913, "step": 28573 }, { "epoch": 2.3148088139987038, "grad_norm": 0.07053650170564651, "learning_rate": 9.362257527341464e-05, "loss": 0.2389, "step": 28574 }, { "epoch": 2.314889825016202, "grad_norm": 0.050931304693222046, "learning_rate": 9.361807462082003e-05, "loss": 0.2577, "step": 28575 }, { "epoch": 2.3149708360337007, "grad_norm": 0.07329791784286499, "learning_rate": 9.36135739682254e-05, "loss": 0.207, "step": 28576 }, { "epoch": 2.315051847051199, "grad_norm": 0.07313059270381927, "learning_rate": 9.360907331563077e-05, "loss": 0.2609, "step": 28577 }, { "epoch": 2.315132858068697, "grad_norm": 0.04392017424106598, "learning_rate": 9.360457266303615e-05, "loss": 0.2144, "step": 28578 }, { "epoch": 2.315213869086196, "grad_norm": 0.07469665259122849, "learning_rate": 9.360007201044153e-05, "loss": 0.2913, "step": 28579 }, { "epoch": 2.315294880103694, "grad_norm": 0.08206086605787277, "learning_rate": 9.359557135784689e-05, "loss": 0.2466, "step": 28580 }, { "epoch": 2.3153758911211924, "grad_norm": 0.06451928615570068, "learning_rate": 9.359107070525227e-05, "loss": 0.2784, "step": 28581 }, { "epoch": 2.315456902138691, "grad_norm": 0.06024431437253952, "learning_rate": 9.358657005265765e-05, "loss": 0.2635, "step": 28582 }, { "epoch": 2.3155379131561893, "grad_norm": 0.053838253021240234, "learning_rate": 9.358206940006301e-05, "loss": 0.2289, "step": 28583 }, { "epoch": 2.3156189241736875, "grad_norm": 0.08351702243089676, "learning_rate": 9.35775687474684e-05, "loss": 0.272, "step": 28584 }, { "epoch": 2.315699935191186, "grad_norm": 0.06420436501502991, "learning_rate": 9.357306809487377e-05, "loss": 0.2401, "step": 28585 }, { "epoch": 2.3157809462086845, "grad_norm": 0.07795728743076324, "learning_rate": 9.356856744227913e-05, "loss": 0.2637, "step": 28586 }, { "epoch": 2.3158619572261827, "grad_norm": 0.057210199534893036, "learning_rate": 9.356406678968452e-05, "loss": 0.2675, "step": 28587 }, { "epoch": 2.315942968243681, "grad_norm": 0.05689553543925285, "learning_rate": 9.355956613708989e-05, "loss": 0.2559, "step": 28588 }, { "epoch": 2.3160239792611796, "grad_norm": 0.08148650825023651, "learning_rate": 9.355506548449525e-05, "loss": 0.2419, "step": 28589 }, { "epoch": 2.316104990278678, "grad_norm": 0.08246307820081711, "learning_rate": 9.355056483190064e-05, "loss": 0.2181, "step": 28590 }, { "epoch": 2.316186001296176, "grad_norm": 0.07857725024223328, "learning_rate": 9.354606417930601e-05, "loss": 0.2313, "step": 28591 }, { "epoch": 2.316267012313675, "grad_norm": 0.05609336495399475, "learning_rate": 9.354156352671137e-05, "loss": 0.2738, "step": 28592 }, { "epoch": 2.316348023331173, "grad_norm": 0.06364389508962631, "learning_rate": 9.353706287411676e-05, "loss": 0.278, "step": 28593 }, { "epoch": 2.3164290343486713, "grad_norm": 0.07805577665567398, "learning_rate": 9.353256222152213e-05, "loss": 0.2652, "step": 28594 }, { "epoch": 2.3165100453661696, "grad_norm": 0.07056976109743118, "learning_rate": 9.352806156892749e-05, "loss": 0.2265, "step": 28595 }, { "epoch": 2.3165910563836682, "grad_norm": 0.06576458364725113, "learning_rate": 9.352356091633288e-05, "loss": 0.2315, "step": 28596 }, { "epoch": 2.3166720674011665, "grad_norm": 0.06922286748886108, "learning_rate": 9.351906026373825e-05, "loss": 0.275, "step": 28597 }, { "epoch": 2.3167530784186647, "grad_norm": 0.0591658353805542, "learning_rate": 9.351455961114361e-05, "loss": 0.2421, "step": 28598 }, { "epoch": 2.3168340894361634, "grad_norm": 0.0667690709233284, "learning_rate": 9.3510058958549e-05, "loss": 0.2788, "step": 28599 }, { "epoch": 2.3169151004536617, "grad_norm": 0.0834415853023529, "learning_rate": 9.350555830595437e-05, "loss": 0.2597, "step": 28600 }, { "epoch": 2.31699611147116, "grad_norm": 0.06573021411895752, "learning_rate": 9.350105765335975e-05, "loss": 0.2709, "step": 28601 }, { "epoch": 2.3170771224886586, "grad_norm": 0.06178218126296997, "learning_rate": 9.349655700076512e-05, "loss": 0.2495, "step": 28602 }, { "epoch": 2.317158133506157, "grad_norm": 0.061735160648822784, "learning_rate": 9.349205634817049e-05, "loss": 0.2539, "step": 28603 }, { "epoch": 2.317239144523655, "grad_norm": 0.09074624627828598, "learning_rate": 9.348755569557587e-05, "loss": 0.3201, "step": 28604 }, { "epoch": 2.317320155541154, "grad_norm": 0.06366878747940063, "learning_rate": 9.348305504298124e-05, "loss": 0.2549, "step": 28605 }, { "epoch": 2.317401166558652, "grad_norm": 0.06706615537405014, "learning_rate": 9.347855439038661e-05, "loss": 0.2593, "step": 28606 }, { "epoch": 2.3174821775761503, "grad_norm": 0.058599237352609634, "learning_rate": 9.347405373779199e-05, "loss": 0.2496, "step": 28607 }, { "epoch": 2.317563188593649, "grad_norm": 0.07354261726140976, "learning_rate": 9.346955308519736e-05, "loss": 0.2655, "step": 28608 }, { "epoch": 2.317644199611147, "grad_norm": 0.05688546970486641, "learning_rate": 9.346505243260273e-05, "loss": 0.2277, "step": 28609 }, { "epoch": 2.3177252106286454, "grad_norm": 0.06739480793476105, "learning_rate": 9.346055178000811e-05, "loss": 0.2541, "step": 28610 }, { "epoch": 2.3178062216461437, "grad_norm": 0.06607834249734879, "learning_rate": 9.345605112741348e-05, "loss": 0.2723, "step": 28611 }, { "epoch": 2.3178872326636424, "grad_norm": 0.06151716411113739, "learning_rate": 9.345155047481886e-05, "loss": 0.2445, "step": 28612 }, { "epoch": 2.3179682436811406, "grad_norm": 0.058197587728500366, "learning_rate": 9.344704982222423e-05, "loss": 0.2327, "step": 28613 }, { "epoch": 2.318049254698639, "grad_norm": 0.06791552901268005, "learning_rate": 9.34425491696296e-05, "loss": 0.2655, "step": 28614 }, { "epoch": 2.3181302657161376, "grad_norm": 0.0670228824019432, "learning_rate": 9.343804851703498e-05, "loss": 0.2915, "step": 28615 }, { "epoch": 2.318211276733636, "grad_norm": 0.07326885312795639, "learning_rate": 9.343354786444035e-05, "loss": 0.3023, "step": 28616 }, { "epoch": 2.318292287751134, "grad_norm": 0.05599506199359894, "learning_rate": 9.342904721184572e-05, "loss": 0.2733, "step": 28617 }, { "epoch": 2.3183732987686323, "grad_norm": 0.06617167592048645, "learning_rate": 9.34245465592511e-05, "loss": 0.2475, "step": 28618 }, { "epoch": 2.318454309786131, "grad_norm": 0.05749017000198364, "learning_rate": 9.342004590665647e-05, "loss": 0.2628, "step": 28619 }, { "epoch": 2.3185353208036292, "grad_norm": 0.07162825018167496, "learning_rate": 9.341554525406184e-05, "loss": 0.2922, "step": 28620 }, { "epoch": 2.3186163318211275, "grad_norm": 0.06103399768471718, "learning_rate": 9.341104460146722e-05, "loss": 0.2815, "step": 28621 }, { "epoch": 2.318697342838626, "grad_norm": 0.05452466011047363, "learning_rate": 9.340654394887259e-05, "loss": 0.2352, "step": 28622 }, { "epoch": 2.3187783538561244, "grad_norm": 0.06113947182893753, "learning_rate": 9.340204329627796e-05, "loss": 0.254, "step": 28623 }, { "epoch": 2.3188593648736227, "grad_norm": 0.0528695210814476, "learning_rate": 9.339754264368334e-05, "loss": 0.2403, "step": 28624 }, { "epoch": 2.3189403758911213, "grad_norm": 0.07002796232700348, "learning_rate": 9.339304199108871e-05, "loss": 0.233, "step": 28625 }, { "epoch": 2.3190213869086196, "grad_norm": 0.059731412678956985, "learning_rate": 9.338854133849409e-05, "loss": 0.259, "step": 28626 }, { "epoch": 2.319102397926118, "grad_norm": 0.06515996903181076, "learning_rate": 9.338404068589946e-05, "loss": 0.2598, "step": 28627 }, { "epoch": 2.3191834089436165, "grad_norm": 0.05637449026107788, "learning_rate": 9.337954003330483e-05, "loss": 0.2327, "step": 28628 }, { "epoch": 2.3192644199611148, "grad_norm": 0.05539031699299812, "learning_rate": 9.33750393807102e-05, "loss": 0.235, "step": 28629 }, { "epoch": 2.319345430978613, "grad_norm": 0.0662870705127716, "learning_rate": 9.337053872811558e-05, "loss": 0.2741, "step": 28630 }, { "epoch": 2.3194264419961117, "grad_norm": 0.06665057688951492, "learning_rate": 9.336603807552095e-05, "loss": 0.2643, "step": 28631 }, { "epoch": 2.31950745301361, "grad_norm": 0.05518549680709839, "learning_rate": 9.336153742292633e-05, "loss": 0.2225, "step": 28632 }, { "epoch": 2.319588464031108, "grad_norm": 0.0644635409116745, "learning_rate": 9.33570367703317e-05, "loss": 0.2178, "step": 28633 }, { "epoch": 2.3196694750486064, "grad_norm": 0.060210924595594406, "learning_rate": 9.335253611773707e-05, "loss": 0.3026, "step": 28634 }, { "epoch": 2.319750486066105, "grad_norm": 0.06755194813013077, "learning_rate": 9.334803546514245e-05, "loss": 0.2671, "step": 28635 }, { "epoch": 2.3198314970836034, "grad_norm": 0.06527794897556305, "learning_rate": 9.334353481254782e-05, "loss": 0.2682, "step": 28636 }, { "epoch": 2.3199125081011016, "grad_norm": 0.059565238654613495, "learning_rate": 9.33390341599532e-05, "loss": 0.2281, "step": 28637 }, { "epoch": 2.3199935191186003, "grad_norm": 0.07178928703069687, "learning_rate": 9.333453350735857e-05, "loss": 0.2453, "step": 28638 }, { "epoch": 2.3200745301360985, "grad_norm": 0.05469311401247978, "learning_rate": 9.333003285476394e-05, "loss": 0.2421, "step": 28639 }, { "epoch": 2.320155541153597, "grad_norm": 0.06045643240213394, "learning_rate": 9.332553220216932e-05, "loss": 0.2452, "step": 28640 }, { "epoch": 2.320236552171095, "grad_norm": 0.056732453405857086, "learning_rate": 9.332103154957469e-05, "loss": 0.2053, "step": 28641 }, { "epoch": 2.3203175631885937, "grad_norm": 0.06491853296756744, "learning_rate": 9.331653089698008e-05, "loss": 0.2578, "step": 28642 }, { "epoch": 2.320398574206092, "grad_norm": 0.06337980180978775, "learning_rate": 9.331203024438544e-05, "loss": 0.2679, "step": 28643 }, { "epoch": 2.32047958522359, "grad_norm": 0.06434284150600433, "learning_rate": 9.330752959179081e-05, "loss": 0.249, "step": 28644 }, { "epoch": 2.320560596241089, "grad_norm": 0.06914211064577103, "learning_rate": 9.33030289391962e-05, "loss": 0.2393, "step": 28645 }, { "epoch": 2.320641607258587, "grad_norm": 0.08020402491092682, "learning_rate": 9.329852828660156e-05, "loss": 0.2562, "step": 28646 }, { "epoch": 2.3207226182760854, "grad_norm": 0.061425480991601944, "learning_rate": 9.329402763400693e-05, "loss": 0.2476, "step": 28647 }, { "epoch": 2.320803629293584, "grad_norm": 0.06337305903434753, "learning_rate": 9.328952698141232e-05, "loss": 0.2547, "step": 28648 }, { "epoch": 2.3208846403110823, "grad_norm": 0.06028088927268982, "learning_rate": 9.328502632881768e-05, "loss": 0.2206, "step": 28649 }, { "epoch": 2.3209656513285806, "grad_norm": 0.06460175663232803, "learning_rate": 9.328052567622305e-05, "loss": 0.2312, "step": 28650 }, { "epoch": 2.3210466623460793, "grad_norm": 0.08044122904539108, "learning_rate": 9.327602502362844e-05, "loss": 0.2623, "step": 28651 }, { "epoch": 2.3211276733635775, "grad_norm": 0.08063766360282898, "learning_rate": 9.32715243710338e-05, "loss": 0.2854, "step": 28652 }, { "epoch": 2.3212086843810757, "grad_norm": 0.059449952095746994, "learning_rate": 9.326702371843917e-05, "loss": 0.2184, "step": 28653 }, { "epoch": 2.3212896953985744, "grad_norm": 0.061719294637441635, "learning_rate": 9.326252306584456e-05, "loss": 0.2218, "step": 28654 }, { "epoch": 2.3213707064160727, "grad_norm": 0.06940759718418121, "learning_rate": 9.325802241324992e-05, "loss": 0.2229, "step": 28655 }, { "epoch": 2.321451717433571, "grad_norm": 0.0604344978928566, "learning_rate": 9.325352176065531e-05, "loss": 0.2039, "step": 28656 }, { "epoch": 2.321532728451069, "grad_norm": 0.06056111678481102, "learning_rate": 9.324902110806068e-05, "loss": 0.2522, "step": 28657 }, { "epoch": 2.321613739468568, "grad_norm": 0.07482937723398209, "learning_rate": 9.324452045546604e-05, "loss": 0.2472, "step": 28658 }, { "epoch": 2.321694750486066, "grad_norm": 0.055157896131277084, "learning_rate": 9.324001980287143e-05, "loss": 0.2505, "step": 28659 }, { "epoch": 2.3217757615035644, "grad_norm": 0.062443807721138, "learning_rate": 9.32355191502768e-05, "loss": 0.2007, "step": 28660 }, { "epoch": 2.321856772521063, "grad_norm": 0.06030425429344177, "learning_rate": 9.323101849768216e-05, "loss": 0.2203, "step": 28661 }, { "epoch": 2.3219377835385613, "grad_norm": 0.07104106992483139, "learning_rate": 9.322651784508755e-05, "loss": 0.2457, "step": 28662 }, { "epoch": 2.3220187945560595, "grad_norm": 0.06790819764137268, "learning_rate": 9.322201719249292e-05, "loss": 0.28, "step": 28663 }, { "epoch": 2.3220998055735578, "grad_norm": 0.06834875047206879, "learning_rate": 9.321751653989828e-05, "loss": 0.2454, "step": 28664 }, { "epoch": 2.3221808165910565, "grad_norm": 0.05928588658571243, "learning_rate": 9.321301588730367e-05, "loss": 0.2552, "step": 28665 }, { "epoch": 2.3222618276085547, "grad_norm": 0.07721876353025436, "learning_rate": 9.320851523470904e-05, "loss": 0.2618, "step": 28666 }, { "epoch": 2.322342838626053, "grad_norm": 0.0791599452495575, "learning_rate": 9.32040145821144e-05, "loss": 0.2464, "step": 28667 }, { "epoch": 2.3224238496435516, "grad_norm": 0.06511277705430984, "learning_rate": 9.319951392951979e-05, "loss": 0.2918, "step": 28668 }, { "epoch": 2.32250486066105, "grad_norm": 0.06426123529672623, "learning_rate": 9.319501327692516e-05, "loss": 0.223, "step": 28669 }, { "epoch": 2.322585871678548, "grad_norm": 0.05680019408464432, "learning_rate": 9.319051262433052e-05, "loss": 0.2295, "step": 28670 }, { "epoch": 2.322666882696047, "grad_norm": 0.07702506333589554, "learning_rate": 9.318601197173591e-05, "loss": 0.2666, "step": 28671 }, { "epoch": 2.322747893713545, "grad_norm": 0.08701331168413162, "learning_rate": 9.318151131914128e-05, "loss": 0.2492, "step": 28672 }, { "epoch": 2.3228289047310433, "grad_norm": 0.08785002678632736, "learning_rate": 9.317701066654664e-05, "loss": 0.3024, "step": 28673 }, { "epoch": 2.322909915748542, "grad_norm": 0.06806372106075287, "learning_rate": 9.317251001395203e-05, "loss": 0.2763, "step": 28674 }, { "epoch": 2.3229909267660402, "grad_norm": 0.08264884352684021, "learning_rate": 9.31680093613574e-05, "loss": 0.2541, "step": 28675 }, { "epoch": 2.3230719377835385, "grad_norm": 0.05051686242222786, "learning_rate": 9.316350870876277e-05, "loss": 0.2237, "step": 28676 }, { "epoch": 2.323152948801037, "grad_norm": 0.05707293376326561, "learning_rate": 9.315900805616815e-05, "loss": 0.2311, "step": 28677 }, { "epoch": 2.3232339598185354, "grad_norm": 0.07153897732496262, "learning_rate": 9.315450740357353e-05, "loss": 0.2794, "step": 28678 }, { "epoch": 2.3233149708360337, "grad_norm": 0.08252691477537155, "learning_rate": 9.315000675097889e-05, "loss": 0.293, "step": 28679 }, { "epoch": 2.323395981853532, "grad_norm": 0.078824482858181, "learning_rate": 9.314550609838427e-05, "loss": 0.227, "step": 28680 }, { "epoch": 2.3234769928710306, "grad_norm": 0.057428814470767975, "learning_rate": 9.314100544578965e-05, "loss": 0.2732, "step": 28681 }, { "epoch": 2.323558003888529, "grad_norm": 0.061816342175006866, "learning_rate": 9.313650479319502e-05, "loss": 0.2177, "step": 28682 }, { "epoch": 2.323639014906027, "grad_norm": 0.07356321811676025, "learning_rate": 9.31320041406004e-05, "loss": 0.3128, "step": 28683 }, { "epoch": 2.323720025923526, "grad_norm": 0.07679431885480881, "learning_rate": 9.312750348800577e-05, "loss": 0.2415, "step": 28684 }, { "epoch": 2.323801036941024, "grad_norm": 0.060885075479745865, "learning_rate": 9.312300283541114e-05, "loss": 0.2542, "step": 28685 }, { "epoch": 2.3238820479585223, "grad_norm": 0.06792385131120682, "learning_rate": 9.311850218281652e-05, "loss": 0.2264, "step": 28686 }, { "epoch": 2.3239630589760205, "grad_norm": 0.05504778027534485, "learning_rate": 9.311400153022189e-05, "loss": 0.2534, "step": 28687 }, { "epoch": 2.324044069993519, "grad_norm": 0.07655138522386551, "learning_rate": 9.310950087762726e-05, "loss": 0.2711, "step": 28688 }, { "epoch": 2.3241250810110174, "grad_norm": 0.06925234943628311, "learning_rate": 9.310500022503264e-05, "loss": 0.2786, "step": 28689 }, { "epoch": 2.3242060920285157, "grad_norm": 0.0667187049984932, "learning_rate": 9.310049957243801e-05, "loss": 0.2541, "step": 28690 }, { "epoch": 2.3242871030460144, "grad_norm": 0.06552990525960922, "learning_rate": 9.309599891984338e-05, "loss": 0.3223, "step": 28691 }, { "epoch": 2.3243681140635126, "grad_norm": 0.060230664908885956, "learning_rate": 9.309149826724876e-05, "loss": 0.2444, "step": 28692 }, { "epoch": 2.324449125081011, "grad_norm": 0.07626669108867645, "learning_rate": 9.308699761465413e-05, "loss": 0.2414, "step": 28693 }, { "epoch": 2.3245301360985096, "grad_norm": 0.05662994831800461, "learning_rate": 9.30824969620595e-05, "loss": 0.215, "step": 28694 }, { "epoch": 2.324611147116008, "grad_norm": 0.059049200266599655, "learning_rate": 9.307799630946488e-05, "loss": 0.2491, "step": 28695 }, { "epoch": 2.324692158133506, "grad_norm": 0.05884696543216705, "learning_rate": 9.307349565687025e-05, "loss": 0.233, "step": 28696 }, { "epoch": 2.3247731691510047, "grad_norm": 0.06978729367256165, "learning_rate": 9.306899500427562e-05, "loss": 0.2214, "step": 28697 }, { "epoch": 2.324854180168503, "grad_norm": 0.06161429360508919, "learning_rate": 9.3064494351681e-05, "loss": 0.2235, "step": 28698 }, { "epoch": 2.3249351911860012, "grad_norm": 0.06919985264539719, "learning_rate": 9.305999369908637e-05, "loss": 0.2727, "step": 28699 }, { "epoch": 2.3250162022035, "grad_norm": 0.06399379670619965, "learning_rate": 9.305549304649175e-05, "loss": 0.248, "step": 28700 }, { "epoch": 2.325097213220998, "grad_norm": 0.05922725051641464, "learning_rate": 9.305099239389712e-05, "loss": 0.2056, "step": 28701 }, { "epoch": 2.3251782242384964, "grad_norm": 0.057275738567113876, "learning_rate": 9.304649174130249e-05, "loss": 0.2456, "step": 28702 }, { "epoch": 2.3252592352559946, "grad_norm": 0.06780439615249634, "learning_rate": 9.304199108870787e-05, "loss": 0.2467, "step": 28703 }, { "epoch": 2.3253402462734933, "grad_norm": 0.061287131160497665, "learning_rate": 9.303749043611324e-05, "loss": 0.3068, "step": 28704 }, { "epoch": 2.3254212572909916, "grad_norm": 0.06363238394260406, "learning_rate": 9.303298978351861e-05, "loss": 0.2691, "step": 28705 }, { "epoch": 2.32550226830849, "grad_norm": 0.07219129055738449, "learning_rate": 9.302848913092399e-05, "loss": 0.2758, "step": 28706 }, { "epoch": 2.3255832793259885, "grad_norm": 0.07437929511070251, "learning_rate": 9.302398847832936e-05, "loss": 0.2233, "step": 28707 }, { "epoch": 2.3256642903434868, "grad_norm": 0.055756907910108566, "learning_rate": 9.301948782573473e-05, "loss": 0.2347, "step": 28708 }, { "epoch": 2.325745301360985, "grad_norm": 0.07122205197811127, "learning_rate": 9.301498717314011e-05, "loss": 0.2544, "step": 28709 }, { "epoch": 2.3258263123784833, "grad_norm": 0.065572090446949, "learning_rate": 9.301048652054548e-05, "loss": 0.22, "step": 28710 }, { "epoch": 2.325907323395982, "grad_norm": 0.06668426096439362, "learning_rate": 9.300598586795087e-05, "loss": 0.2722, "step": 28711 }, { "epoch": 2.32598833441348, "grad_norm": 0.08833864331245422, "learning_rate": 9.300148521535623e-05, "loss": 0.2419, "step": 28712 }, { "epoch": 2.3260693454309784, "grad_norm": 0.06704901158809662, "learning_rate": 9.29969845627616e-05, "loss": 0.2561, "step": 28713 }, { "epoch": 2.326150356448477, "grad_norm": 0.06984715163707733, "learning_rate": 9.299248391016699e-05, "loss": 0.2398, "step": 28714 }, { "epoch": 2.3262313674659754, "grad_norm": 0.06736070662736893, "learning_rate": 9.298798325757235e-05, "loss": 0.266, "step": 28715 }, { "epoch": 2.3263123784834736, "grad_norm": 0.06093626841902733, "learning_rate": 9.298348260497772e-05, "loss": 0.2351, "step": 28716 }, { "epoch": 2.3263933895009723, "grad_norm": 0.06312654167413712, "learning_rate": 9.297898195238311e-05, "loss": 0.2475, "step": 28717 }, { "epoch": 2.3264744005184705, "grad_norm": 0.061158306896686554, "learning_rate": 9.297448129978847e-05, "loss": 0.2674, "step": 28718 }, { "epoch": 2.326555411535969, "grad_norm": 0.07095766812562943, "learning_rate": 9.296998064719384e-05, "loss": 0.2411, "step": 28719 }, { "epoch": 2.3266364225534675, "grad_norm": 0.06944864988327026, "learning_rate": 9.296547999459923e-05, "loss": 0.2305, "step": 28720 }, { "epoch": 2.3267174335709657, "grad_norm": 0.06484808772802353, "learning_rate": 9.296097934200459e-05, "loss": 0.246, "step": 28721 }, { "epoch": 2.326798444588464, "grad_norm": 0.06880709528923035, "learning_rate": 9.295647868940997e-05, "loss": 0.2772, "step": 28722 }, { "epoch": 2.3268794556059627, "grad_norm": 0.05179981887340546, "learning_rate": 9.295197803681535e-05, "loss": 0.2215, "step": 28723 }, { "epoch": 2.326960466623461, "grad_norm": 0.06509382277727127, "learning_rate": 9.294747738422071e-05, "loss": 0.2636, "step": 28724 }, { "epoch": 2.327041477640959, "grad_norm": 0.06014722213149071, "learning_rate": 9.294297673162609e-05, "loss": 0.251, "step": 28725 }, { "epoch": 2.3271224886584574, "grad_norm": 0.07093410938978195, "learning_rate": 9.293847607903147e-05, "loss": 0.2665, "step": 28726 }, { "epoch": 2.327203499675956, "grad_norm": 0.054343074560165405, "learning_rate": 9.293397542643683e-05, "loss": 0.264, "step": 28727 }, { "epoch": 2.3272845106934543, "grad_norm": 0.07552266865968704, "learning_rate": 9.29294747738422e-05, "loss": 0.2363, "step": 28728 }, { "epoch": 2.3273655217109526, "grad_norm": 0.06929947435855865, "learning_rate": 9.29249741212476e-05, "loss": 0.2754, "step": 28729 }, { "epoch": 2.3274465327284513, "grad_norm": 0.0776057094335556, "learning_rate": 9.292047346865295e-05, "loss": 0.2586, "step": 28730 }, { "epoch": 2.3275275437459495, "grad_norm": 0.06017788499593735, "learning_rate": 9.291597281605833e-05, "loss": 0.243, "step": 28731 }, { "epoch": 2.3276085547634477, "grad_norm": 0.06438402831554413, "learning_rate": 9.291147216346371e-05, "loss": 0.266, "step": 28732 }, { "epoch": 2.327689565780946, "grad_norm": 0.06785589456558228, "learning_rate": 9.290697151086907e-05, "loss": 0.2935, "step": 28733 }, { "epoch": 2.3277705767984447, "grad_norm": 0.06721623986959457, "learning_rate": 9.290247085827446e-05, "loss": 0.2814, "step": 28734 }, { "epoch": 2.327851587815943, "grad_norm": 0.053944192826747894, "learning_rate": 9.289797020567984e-05, "loss": 0.2258, "step": 28735 }, { "epoch": 2.327932598833441, "grad_norm": 0.059720806777477264, "learning_rate": 9.28934695530852e-05, "loss": 0.2323, "step": 28736 }, { "epoch": 2.32801360985094, "grad_norm": 0.05844448506832123, "learning_rate": 9.288896890049058e-05, "loss": 0.2292, "step": 28737 }, { "epoch": 2.328094620868438, "grad_norm": 0.06499190628528595, "learning_rate": 9.288446824789596e-05, "loss": 0.2655, "step": 28738 }, { "epoch": 2.3281756318859363, "grad_norm": 0.0609806589782238, "learning_rate": 9.287996759530132e-05, "loss": 0.2413, "step": 28739 }, { "epoch": 2.328256642903435, "grad_norm": 0.06576729565858841, "learning_rate": 9.28754669427067e-05, "loss": 0.2281, "step": 28740 }, { "epoch": 2.3283376539209333, "grad_norm": 0.06825067102909088, "learning_rate": 9.287096629011208e-05, "loss": 0.2359, "step": 28741 }, { "epoch": 2.3284186649384315, "grad_norm": 0.05548768490552902, "learning_rate": 9.286646563751744e-05, "loss": 0.2472, "step": 28742 }, { "epoch": 2.32849967595593, "grad_norm": 0.057399872690439224, "learning_rate": 9.286196498492282e-05, "loss": 0.2318, "step": 28743 }, { "epoch": 2.3285806869734285, "grad_norm": 0.06823092699050903, "learning_rate": 9.28574643323282e-05, "loss": 0.2614, "step": 28744 }, { "epoch": 2.3286616979909267, "grad_norm": 0.06344280391931534, "learning_rate": 9.285296367973356e-05, "loss": 0.2802, "step": 28745 }, { "epoch": 2.3287427090084254, "grad_norm": 0.06706608086824417, "learning_rate": 9.284846302713895e-05, "loss": 0.2509, "step": 28746 }, { "epoch": 2.3288237200259236, "grad_norm": 0.07321401685476303, "learning_rate": 9.284396237454432e-05, "loss": 0.2632, "step": 28747 }, { "epoch": 2.328904731043422, "grad_norm": 0.07312627881765366, "learning_rate": 9.283946172194968e-05, "loss": 0.284, "step": 28748 }, { "epoch": 2.32898574206092, "grad_norm": 0.07281111925840378, "learning_rate": 9.283496106935507e-05, "loss": 0.2649, "step": 28749 }, { "epoch": 2.329066753078419, "grad_norm": 0.07355530560016632, "learning_rate": 9.283046041676044e-05, "loss": 0.2446, "step": 28750 }, { "epoch": 2.329147764095917, "grad_norm": 0.07079508900642395, "learning_rate": 9.28259597641658e-05, "loss": 0.2546, "step": 28751 }, { "epoch": 2.3292287751134153, "grad_norm": 0.07261772453784943, "learning_rate": 9.282145911157119e-05, "loss": 0.2631, "step": 28752 }, { "epoch": 2.329309786130914, "grad_norm": 0.06484165787696838, "learning_rate": 9.281695845897656e-05, "loss": 0.2186, "step": 28753 }, { "epoch": 2.3293907971484122, "grad_norm": 0.05773812532424927, "learning_rate": 9.281245780638192e-05, "loss": 0.206, "step": 28754 }, { "epoch": 2.3294718081659105, "grad_norm": 0.062474749982357025, "learning_rate": 9.280795715378731e-05, "loss": 0.2643, "step": 28755 }, { "epoch": 2.3295528191834087, "grad_norm": 0.06529214978218079, "learning_rate": 9.280345650119268e-05, "loss": 0.2855, "step": 28756 }, { "epoch": 2.3296338302009074, "grad_norm": 0.06362579017877579, "learning_rate": 9.279895584859804e-05, "loss": 0.2055, "step": 28757 }, { "epoch": 2.3297148412184057, "grad_norm": 0.07225839793682098, "learning_rate": 9.279445519600343e-05, "loss": 0.2555, "step": 28758 }, { "epoch": 2.329795852235904, "grad_norm": 0.059502191841602325, "learning_rate": 9.27899545434088e-05, "loss": 0.2313, "step": 28759 }, { "epoch": 2.3298768632534026, "grad_norm": 0.07234331220388412, "learning_rate": 9.278545389081418e-05, "loss": 0.2666, "step": 28760 }, { "epoch": 2.329957874270901, "grad_norm": 0.06535720825195312, "learning_rate": 9.278095323821955e-05, "loss": 0.2418, "step": 28761 }, { "epoch": 2.330038885288399, "grad_norm": 0.07945813983678818, "learning_rate": 9.277645258562492e-05, "loss": 0.2588, "step": 28762 }, { "epoch": 2.3301198963058978, "grad_norm": 0.06043941527605057, "learning_rate": 9.27719519330303e-05, "loss": 0.2362, "step": 28763 }, { "epoch": 2.330200907323396, "grad_norm": 0.06322724372148514, "learning_rate": 9.276745128043567e-05, "loss": 0.2405, "step": 28764 }, { "epoch": 2.3302819183408943, "grad_norm": 0.0783979520201683, "learning_rate": 9.276295062784104e-05, "loss": 0.295, "step": 28765 }, { "epoch": 2.330362929358393, "grad_norm": 0.06260982155799866, "learning_rate": 9.275844997524642e-05, "loss": 0.2632, "step": 28766 }, { "epoch": 2.330443940375891, "grad_norm": 0.0643438994884491, "learning_rate": 9.275394932265179e-05, "loss": 0.2328, "step": 28767 }, { "epoch": 2.3305249513933894, "grad_norm": 0.07150939851999283, "learning_rate": 9.274944867005716e-05, "loss": 0.2515, "step": 28768 }, { "epoch": 2.3306059624108877, "grad_norm": 0.07017625123262405, "learning_rate": 9.274494801746254e-05, "loss": 0.2381, "step": 28769 }, { "epoch": 2.3306869734283864, "grad_norm": 0.07019396871328354, "learning_rate": 9.274044736486791e-05, "loss": 0.2661, "step": 28770 }, { "epoch": 2.3307679844458846, "grad_norm": 0.06480173021554947, "learning_rate": 9.273594671227329e-05, "loss": 0.261, "step": 28771 }, { "epoch": 2.330848995463383, "grad_norm": 0.07203859835863113, "learning_rate": 9.273144605967866e-05, "loss": 0.2598, "step": 28772 }, { "epoch": 2.3309300064808816, "grad_norm": 0.055770982056856155, "learning_rate": 9.272694540708403e-05, "loss": 0.2475, "step": 28773 }, { "epoch": 2.33101101749838, "grad_norm": 0.07176729291677475, "learning_rate": 9.27224447544894e-05, "loss": 0.3045, "step": 28774 }, { "epoch": 2.331092028515878, "grad_norm": 0.07138893753290176, "learning_rate": 9.271794410189478e-05, "loss": 0.2374, "step": 28775 }, { "epoch": 2.3311730395333763, "grad_norm": 0.07597170025110245, "learning_rate": 9.271344344930015e-05, "loss": 0.244, "step": 28776 }, { "epoch": 2.331254050550875, "grad_norm": 0.06030385568737984, "learning_rate": 9.270894279670553e-05, "loss": 0.309, "step": 28777 }, { "epoch": 2.3313350615683732, "grad_norm": 0.054813820868730545, "learning_rate": 9.27044421441109e-05, "loss": 0.2555, "step": 28778 }, { "epoch": 2.3314160725858715, "grad_norm": 0.06930273026227951, "learning_rate": 9.269994149151627e-05, "loss": 0.2483, "step": 28779 }, { "epoch": 2.33149708360337, "grad_norm": 0.07116469740867615, "learning_rate": 9.269544083892165e-05, "loss": 0.2432, "step": 28780 }, { "epoch": 2.3315780946208684, "grad_norm": 0.06757555902004242, "learning_rate": 9.269094018632702e-05, "loss": 0.3001, "step": 28781 }, { "epoch": 2.3316591056383666, "grad_norm": 0.06617242097854614, "learning_rate": 9.26864395337324e-05, "loss": 0.242, "step": 28782 }, { "epoch": 2.3317401166558653, "grad_norm": 0.06303367763757706, "learning_rate": 9.268193888113777e-05, "loss": 0.2311, "step": 28783 }, { "epoch": 2.3318211276733636, "grad_norm": 0.06040335074067116, "learning_rate": 9.267743822854314e-05, "loss": 0.2501, "step": 28784 }, { "epoch": 2.331902138690862, "grad_norm": 0.07035958021879196, "learning_rate": 9.267293757594852e-05, "loss": 0.272, "step": 28785 }, { "epoch": 2.3319831497083605, "grad_norm": 0.07982959598302841, "learning_rate": 9.26684369233539e-05, "loss": 0.2289, "step": 28786 }, { "epoch": 2.3320641607258588, "grad_norm": 0.060289278626441956, "learning_rate": 9.266393627075926e-05, "loss": 0.2461, "step": 28787 }, { "epoch": 2.332145171743357, "grad_norm": 0.06692136079072952, "learning_rate": 9.265943561816464e-05, "loss": 0.2686, "step": 28788 }, { "epoch": 2.3322261827608557, "grad_norm": 0.07542910426855087, "learning_rate": 9.265493496557002e-05, "loss": 0.2549, "step": 28789 }, { "epoch": 2.332307193778354, "grad_norm": 0.06903022527694702, "learning_rate": 9.265043431297538e-05, "loss": 0.2434, "step": 28790 }, { "epoch": 2.332388204795852, "grad_norm": 0.07558293640613556, "learning_rate": 9.264593366038076e-05, "loss": 0.2725, "step": 28791 }, { "epoch": 2.3324692158133504, "grad_norm": 0.06524661928415298, "learning_rate": 9.264143300778614e-05, "loss": 0.3046, "step": 28792 }, { "epoch": 2.332550226830849, "grad_norm": 0.06829063594341278, "learning_rate": 9.26369323551915e-05, "loss": 0.2546, "step": 28793 }, { "epoch": 2.3326312378483474, "grad_norm": 0.060153085738420486, "learning_rate": 9.263243170259688e-05, "loss": 0.2406, "step": 28794 }, { "epoch": 2.3327122488658456, "grad_norm": 0.06505537033081055, "learning_rate": 9.262793105000227e-05, "loss": 0.2498, "step": 28795 }, { "epoch": 2.3327932598833443, "grad_norm": 0.06308627128601074, "learning_rate": 9.262343039740763e-05, "loss": 0.2413, "step": 28796 }, { "epoch": 2.3328742709008425, "grad_norm": 0.07692531496286392, "learning_rate": 9.2618929744813e-05, "loss": 0.223, "step": 28797 }, { "epoch": 2.332955281918341, "grad_norm": 0.07463797926902771, "learning_rate": 9.261442909221839e-05, "loss": 0.2878, "step": 28798 }, { "epoch": 2.333036292935839, "grad_norm": 0.06551359593868256, "learning_rate": 9.260992843962375e-05, "loss": 0.2374, "step": 28799 }, { "epoch": 2.3331173039533377, "grad_norm": 0.0611150823533535, "learning_rate": 9.260542778702912e-05, "loss": 0.2136, "step": 28800 }, { "epoch": 2.333198314970836, "grad_norm": 0.04823988303542137, "learning_rate": 9.260092713443451e-05, "loss": 0.2037, "step": 28801 }, { "epoch": 2.333279325988334, "grad_norm": 0.05834527313709259, "learning_rate": 9.259642648183987e-05, "loss": 0.2723, "step": 28802 }, { "epoch": 2.333360337005833, "grad_norm": 0.0750277191400528, "learning_rate": 9.259192582924524e-05, "loss": 0.2637, "step": 28803 }, { "epoch": 2.333441348023331, "grad_norm": 0.06466560810804367, "learning_rate": 9.258742517665063e-05, "loss": 0.2512, "step": 28804 }, { "epoch": 2.3335223590408294, "grad_norm": 0.06680560111999512, "learning_rate": 9.258292452405599e-05, "loss": 0.2453, "step": 28805 }, { "epoch": 2.333603370058328, "grad_norm": 0.07900147140026093, "learning_rate": 9.257842387146136e-05, "loss": 0.2408, "step": 28806 }, { "epoch": 2.3336843810758263, "grad_norm": 0.08736459165811539, "learning_rate": 9.257392321886675e-05, "loss": 0.2717, "step": 28807 }, { "epoch": 2.3337653920933246, "grad_norm": 0.06984545290470123, "learning_rate": 9.256942256627211e-05, "loss": 0.2761, "step": 28808 }, { "epoch": 2.3338464031108233, "grad_norm": 0.05604099482297897, "learning_rate": 9.256492191367748e-05, "loss": 0.224, "step": 28809 }, { "epoch": 2.3339274141283215, "grad_norm": 0.07051841169595718, "learning_rate": 9.256042126108287e-05, "loss": 0.2637, "step": 28810 }, { "epoch": 2.3340084251458197, "grad_norm": 0.07440926134586334, "learning_rate": 9.255592060848823e-05, "loss": 0.2557, "step": 28811 }, { "epoch": 2.3340894361633184, "grad_norm": 0.07619215548038483, "learning_rate": 9.25514199558936e-05, "loss": 0.2939, "step": 28812 }, { "epoch": 2.3341704471808167, "grad_norm": 0.06554830074310303, "learning_rate": 9.254691930329899e-05, "loss": 0.2635, "step": 28813 }, { "epoch": 2.334251458198315, "grad_norm": 0.05629369616508484, "learning_rate": 9.254241865070435e-05, "loss": 0.2119, "step": 28814 }, { "epoch": 2.334332469215813, "grad_norm": 0.05875800922513008, "learning_rate": 9.253791799810974e-05, "loss": 0.2599, "step": 28815 }, { "epoch": 2.334413480233312, "grad_norm": 0.06345994770526886, "learning_rate": 9.253341734551511e-05, "loss": 0.2424, "step": 28816 }, { "epoch": 2.33449449125081, "grad_norm": 0.079732745885849, "learning_rate": 9.252891669292047e-05, "loss": 0.2379, "step": 28817 }, { "epoch": 2.3345755022683083, "grad_norm": 0.07231254875659943, "learning_rate": 9.252441604032586e-05, "loss": 0.2322, "step": 28818 }, { "epoch": 2.334656513285807, "grad_norm": 0.061505191028118134, "learning_rate": 9.251991538773123e-05, "loss": 0.2371, "step": 28819 }, { "epoch": 2.3347375243033053, "grad_norm": 0.07146601378917694, "learning_rate": 9.251541473513659e-05, "loss": 0.2746, "step": 28820 }, { "epoch": 2.3348185353208035, "grad_norm": 0.05789024010300636, "learning_rate": 9.251091408254198e-05, "loss": 0.2869, "step": 28821 }, { "epoch": 2.3348995463383018, "grad_norm": 0.07208845764398575, "learning_rate": 9.250641342994735e-05, "loss": 0.228, "step": 28822 }, { "epoch": 2.3349805573558005, "grad_norm": 0.05617007613182068, "learning_rate": 9.250191277735271e-05, "loss": 0.2676, "step": 28823 }, { "epoch": 2.3350615683732987, "grad_norm": 0.07315897941589355, "learning_rate": 9.24974121247581e-05, "loss": 0.2697, "step": 28824 }, { "epoch": 2.335142579390797, "grad_norm": 0.07798895984888077, "learning_rate": 9.249291147216347e-05, "loss": 0.2535, "step": 28825 }, { "epoch": 2.3352235904082956, "grad_norm": 0.07685133814811707, "learning_rate": 9.248841081956883e-05, "loss": 0.3038, "step": 28826 }, { "epoch": 2.335304601425794, "grad_norm": 0.06907043606042862, "learning_rate": 9.248391016697422e-05, "loss": 0.2836, "step": 28827 }, { "epoch": 2.335385612443292, "grad_norm": 0.06489613652229309, "learning_rate": 9.24794095143796e-05, "loss": 0.2511, "step": 28828 }, { "epoch": 2.335466623460791, "grad_norm": 0.07132221758365631, "learning_rate": 9.247490886178495e-05, "loss": 0.2657, "step": 28829 }, { "epoch": 2.335547634478289, "grad_norm": 0.05750245600938797, "learning_rate": 9.247040820919034e-05, "loss": 0.2246, "step": 28830 }, { "epoch": 2.3356286454957873, "grad_norm": 0.0631583034992218, "learning_rate": 9.246590755659571e-05, "loss": 0.2372, "step": 28831 }, { "epoch": 2.335709656513286, "grad_norm": 0.06106153503060341, "learning_rate": 9.246140690400107e-05, "loss": 0.2485, "step": 28832 }, { "epoch": 2.3357906675307842, "grad_norm": 0.0786064937710762, "learning_rate": 9.245690625140646e-05, "loss": 0.2482, "step": 28833 }, { "epoch": 2.3358716785482825, "grad_norm": 0.0773613229393959, "learning_rate": 9.245240559881184e-05, "loss": 0.281, "step": 28834 }, { "epoch": 2.335952689565781, "grad_norm": 0.08510950952768326, "learning_rate": 9.24479049462172e-05, "loss": 0.2649, "step": 28835 }, { "epoch": 2.3360337005832794, "grad_norm": 0.0630519837141037, "learning_rate": 9.244340429362258e-05, "loss": 0.2635, "step": 28836 }, { "epoch": 2.3361147116007777, "grad_norm": 0.06939224153757095, "learning_rate": 9.243890364102796e-05, "loss": 0.282, "step": 28837 }, { "epoch": 2.336195722618276, "grad_norm": 0.06594755500555038, "learning_rate": 9.243440298843332e-05, "loss": 0.2541, "step": 28838 }, { "epoch": 2.3362767336357746, "grad_norm": 0.0586162731051445, "learning_rate": 9.24299023358387e-05, "loss": 0.2118, "step": 28839 }, { "epoch": 2.336357744653273, "grad_norm": 0.06778659671545029, "learning_rate": 9.242540168324408e-05, "loss": 0.2217, "step": 28840 }, { "epoch": 2.336438755670771, "grad_norm": 0.07562971115112305, "learning_rate": 9.242090103064945e-05, "loss": 0.2907, "step": 28841 }, { "epoch": 2.3365197666882698, "grad_norm": 0.06536746025085449, "learning_rate": 9.241640037805482e-05, "loss": 0.2519, "step": 28842 }, { "epoch": 2.336600777705768, "grad_norm": 0.07464933395385742, "learning_rate": 9.24118997254602e-05, "loss": 0.2423, "step": 28843 }, { "epoch": 2.3366817887232663, "grad_norm": 0.07020731270313263, "learning_rate": 9.240739907286557e-05, "loss": 0.2366, "step": 28844 }, { "epoch": 2.3367627997407645, "grad_norm": 0.06497664004564285, "learning_rate": 9.240289842027095e-05, "loss": 0.2522, "step": 28845 }, { "epoch": 2.336843810758263, "grad_norm": 0.05622991919517517, "learning_rate": 9.239839776767632e-05, "loss": 0.2252, "step": 28846 }, { "epoch": 2.3369248217757614, "grad_norm": 0.06055394187569618, "learning_rate": 9.239389711508169e-05, "loss": 0.2235, "step": 28847 }, { "epoch": 2.3370058327932597, "grad_norm": 0.06988414376974106, "learning_rate": 9.238939646248707e-05, "loss": 0.2199, "step": 28848 }, { "epoch": 2.3370868438107584, "grad_norm": 0.05594763904809952, "learning_rate": 9.238489580989244e-05, "loss": 0.262, "step": 28849 }, { "epoch": 2.3371678548282566, "grad_norm": 0.07687854021787643, "learning_rate": 9.238039515729781e-05, "loss": 0.2651, "step": 28850 }, { "epoch": 2.337248865845755, "grad_norm": 0.0715331956744194, "learning_rate": 9.237589450470319e-05, "loss": 0.2874, "step": 28851 }, { "epoch": 2.3373298768632536, "grad_norm": 0.05961179360747337, "learning_rate": 9.237139385210856e-05, "loss": 0.2296, "step": 28852 }, { "epoch": 2.337410887880752, "grad_norm": 0.05749481916427612, "learning_rate": 9.236689319951393e-05, "loss": 0.2327, "step": 28853 }, { "epoch": 2.33749189889825, "grad_norm": 0.0786358043551445, "learning_rate": 9.236239254691931e-05, "loss": 0.2288, "step": 28854 }, { "epoch": 2.3375729099157487, "grad_norm": 0.0550704151391983, "learning_rate": 9.235789189432468e-05, "loss": 0.2517, "step": 28855 }, { "epoch": 2.337653920933247, "grad_norm": 0.05813801661133766, "learning_rate": 9.235339124173005e-05, "loss": 0.2494, "step": 28856 }, { "epoch": 2.337734931950745, "grad_norm": 0.05280022695660591, "learning_rate": 9.234889058913543e-05, "loss": 0.2242, "step": 28857 }, { "epoch": 2.337815942968244, "grad_norm": 0.07368249446153641, "learning_rate": 9.23443899365408e-05, "loss": 0.2554, "step": 28858 }, { "epoch": 2.337896953985742, "grad_norm": 0.07378704845905304, "learning_rate": 9.233988928394618e-05, "loss": 0.2355, "step": 28859 }, { "epoch": 2.3379779650032404, "grad_norm": 0.06204526498913765, "learning_rate": 9.233538863135155e-05, "loss": 0.2243, "step": 28860 }, { "epoch": 2.3380589760207386, "grad_norm": 0.06535429507493973, "learning_rate": 9.233088797875692e-05, "loss": 0.2564, "step": 28861 }, { "epoch": 2.3381399870382373, "grad_norm": 0.0819898247718811, "learning_rate": 9.23263873261623e-05, "loss": 0.2373, "step": 28862 }, { "epoch": 2.3382209980557356, "grad_norm": 0.0712568461894989, "learning_rate": 9.232188667356767e-05, "loss": 0.285, "step": 28863 }, { "epoch": 2.338302009073234, "grad_norm": 0.060597095638513565, "learning_rate": 9.231738602097304e-05, "loss": 0.2217, "step": 28864 }, { "epoch": 2.3383830200907325, "grad_norm": 0.08113036304712296, "learning_rate": 9.231288536837842e-05, "loss": 0.2632, "step": 28865 }, { "epoch": 2.3384640311082308, "grad_norm": 0.06157940253615379, "learning_rate": 9.230838471578379e-05, "loss": 0.2237, "step": 28866 }, { "epoch": 2.338545042125729, "grad_norm": 0.06427460163831711, "learning_rate": 9.230388406318918e-05, "loss": 0.2304, "step": 28867 }, { "epoch": 2.3386260531432272, "grad_norm": 0.07019753754138947, "learning_rate": 9.229938341059454e-05, "loss": 0.2394, "step": 28868 }, { "epoch": 2.338707064160726, "grad_norm": 0.062393609434366226, "learning_rate": 9.229488275799991e-05, "loss": 0.2255, "step": 28869 }, { "epoch": 2.338788075178224, "grad_norm": 0.07597274333238602, "learning_rate": 9.22903821054053e-05, "loss": 0.262, "step": 28870 }, { "epoch": 2.3388690861957224, "grad_norm": 0.06904326379299164, "learning_rate": 9.228588145281066e-05, "loss": 0.2396, "step": 28871 }, { "epoch": 2.338950097213221, "grad_norm": 0.06872831284999847, "learning_rate": 9.228138080021603e-05, "loss": 0.2541, "step": 28872 }, { "epoch": 2.3390311082307194, "grad_norm": 0.08009593188762665, "learning_rate": 9.227688014762142e-05, "loss": 0.2615, "step": 28873 }, { "epoch": 2.3391121192482176, "grad_norm": 0.07586513459682465, "learning_rate": 9.227237949502678e-05, "loss": 0.2718, "step": 28874 }, { "epoch": 2.3391931302657163, "grad_norm": 0.058345962315797806, "learning_rate": 9.226787884243215e-05, "loss": 0.2233, "step": 28875 }, { "epoch": 2.3392741412832145, "grad_norm": 0.06970062851905823, "learning_rate": 9.226337818983754e-05, "loss": 0.2233, "step": 28876 }, { "epoch": 2.339355152300713, "grad_norm": 0.08210455626249313, "learning_rate": 9.22588775372429e-05, "loss": 0.266, "step": 28877 }, { "epoch": 2.3394361633182115, "grad_norm": 0.08800669759511948, "learning_rate": 9.225437688464827e-05, "loss": 0.2611, "step": 28878 }, { "epoch": 2.3395171743357097, "grad_norm": 0.06274203956127167, "learning_rate": 9.224987623205366e-05, "loss": 0.2301, "step": 28879 }, { "epoch": 2.339598185353208, "grad_norm": 0.05375487357378006, "learning_rate": 9.224537557945902e-05, "loss": 0.2326, "step": 28880 }, { "epoch": 2.3396791963707066, "grad_norm": 0.06286244839429855, "learning_rate": 9.22408749268644e-05, "loss": 0.2318, "step": 28881 }, { "epoch": 2.339760207388205, "grad_norm": 0.07534752041101456, "learning_rate": 9.223637427426978e-05, "loss": 0.2476, "step": 28882 }, { "epoch": 2.339841218405703, "grad_norm": 0.0626584067940712, "learning_rate": 9.223187362167514e-05, "loss": 0.2728, "step": 28883 }, { "epoch": 2.3399222294232014, "grad_norm": 0.05673712119460106, "learning_rate": 9.222737296908052e-05, "loss": 0.2632, "step": 28884 }, { "epoch": 2.3400032404407, "grad_norm": 0.055177003145217896, "learning_rate": 9.22228723164859e-05, "loss": 0.2225, "step": 28885 }, { "epoch": 2.3400842514581983, "grad_norm": 0.06031114235520363, "learning_rate": 9.221837166389126e-05, "loss": 0.2577, "step": 28886 }, { "epoch": 2.3401652624756966, "grad_norm": 0.062008291482925415, "learning_rate": 9.221387101129664e-05, "loss": 0.2615, "step": 28887 }, { "epoch": 2.3402462734931953, "grad_norm": 0.05988309159874916, "learning_rate": 9.220937035870202e-05, "loss": 0.2788, "step": 28888 }, { "epoch": 2.3403272845106935, "grad_norm": 0.08174771070480347, "learning_rate": 9.220486970610738e-05, "loss": 0.2649, "step": 28889 }, { "epoch": 2.3404082955281917, "grad_norm": 0.07654719054698944, "learning_rate": 9.220036905351276e-05, "loss": 0.2373, "step": 28890 }, { "epoch": 2.34048930654569, "grad_norm": 0.05224039405584335, "learning_rate": 9.219586840091814e-05, "loss": 0.1982, "step": 28891 }, { "epoch": 2.3405703175631887, "grad_norm": 0.06260054558515549, "learning_rate": 9.21913677483235e-05, "loss": 0.2508, "step": 28892 }, { "epoch": 2.340651328580687, "grad_norm": 0.05155821144580841, "learning_rate": 9.218686709572889e-05, "loss": 0.2476, "step": 28893 }, { "epoch": 2.340732339598185, "grad_norm": 0.06139184907078743, "learning_rate": 9.218236644313427e-05, "loss": 0.2475, "step": 28894 }, { "epoch": 2.340813350615684, "grad_norm": 0.0666014552116394, "learning_rate": 9.217786579053963e-05, "loss": 0.2513, "step": 28895 }, { "epoch": 2.340894361633182, "grad_norm": 0.06419499218463898, "learning_rate": 9.217336513794501e-05, "loss": 0.2276, "step": 28896 }, { "epoch": 2.3409753726506803, "grad_norm": 0.08260070532560349, "learning_rate": 9.216886448535039e-05, "loss": 0.2799, "step": 28897 }, { "epoch": 2.341056383668179, "grad_norm": 0.061513788998126984, "learning_rate": 9.216436383275575e-05, "loss": 0.2532, "step": 28898 }, { "epoch": 2.3411373946856773, "grad_norm": 0.06428606063127518, "learning_rate": 9.215986318016113e-05, "loss": 0.2677, "step": 28899 }, { "epoch": 2.3412184057031755, "grad_norm": 0.07828311622142792, "learning_rate": 9.215536252756651e-05, "loss": 0.247, "step": 28900 }, { "epoch": 2.341299416720674, "grad_norm": 0.08125907927751541, "learning_rate": 9.215086187497187e-05, "loss": 0.2534, "step": 28901 }, { "epoch": 2.3413804277381725, "grad_norm": 0.0653858631849289, "learning_rate": 9.214636122237725e-05, "loss": 0.251, "step": 28902 }, { "epoch": 2.3414614387556707, "grad_norm": 0.06650615483522415, "learning_rate": 9.214186056978263e-05, "loss": 0.2561, "step": 28903 }, { "epoch": 2.3415424497731694, "grad_norm": 0.0694642886519432, "learning_rate": 9.213735991718799e-05, "loss": 0.2732, "step": 28904 }, { "epoch": 2.3416234607906676, "grad_norm": 0.07338881492614746, "learning_rate": 9.213285926459338e-05, "loss": 0.263, "step": 28905 }, { "epoch": 2.341704471808166, "grad_norm": 0.05784222483634949, "learning_rate": 9.212835861199875e-05, "loss": 0.2523, "step": 28906 }, { "epoch": 2.341785482825664, "grad_norm": 0.06689013540744781, "learning_rate": 9.212385795940411e-05, "loss": 0.242, "step": 28907 }, { "epoch": 2.341866493843163, "grad_norm": 0.0690750703215599, "learning_rate": 9.21193573068095e-05, "loss": 0.2491, "step": 28908 }, { "epoch": 2.341947504860661, "grad_norm": 0.0644659698009491, "learning_rate": 9.211485665421487e-05, "loss": 0.2304, "step": 28909 }, { "epoch": 2.3420285158781593, "grad_norm": 0.06285371631383896, "learning_rate": 9.211035600162023e-05, "loss": 0.2224, "step": 28910 }, { "epoch": 2.342109526895658, "grad_norm": 0.06495443731546402, "learning_rate": 9.210585534902562e-05, "loss": 0.2385, "step": 28911 }, { "epoch": 2.3421905379131562, "grad_norm": 0.07020930200815201, "learning_rate": 9.210135469643099e-05, "loss": 0.2388, "step": 28912 }, { "epoch": 2.3422715489306545, "grad_norm": 0.07302417606115341, "learning_rate": 9.209685404383635e-05, "loss": 0.219, "step": 28913 }, { "epoch": 2.3423525599481527, "grad_norm": 0.05842958390712738, "learning_rate": 9.209235339124174e-05, "loss": 0.2071, "step": 28914 }, { "epoch": 2.3424335709656514, "grad_norm": 0.05464782565832138, "learning_rate": 9.208785273864711e-05, "loss": 0.2315, "step": 28915 }, { "epoch": 2.3425145819831497, "grad_norm": 0.0690939649939537, "learning_rate": 9.208335208605247e-05, "loss": 0.2757, "step": 28916 }, { "epoch": 2.342595593000648, "grad_norm": 0.08018796145915985, "learning_rate": 9.207885143345786e-05, "loss": 0.2232, "step": 28917 }, { "epoch": 2.3426766040181466, "grad_norm": 0.06097112223505974, "learning_rate": 9.207435078086323e-05, "loss": 0.2281, "step": 28918 }, { "epoch": 2.342757615035645, "grad_norm": 0.053888920694589615, "learning_rate": 9.20698501282686e-05, "loss": 0.2471, "step": 28919 }, { "epoch": 2.342838626053143, "grad_norm": 0.07474564015865326, "learning_rate": 9.206534947567398e-05, "loss": 0.2867, "step": 28920 }, { "epoch": 2.3429196370706418, "grad_norm": 0.07385596632957458, "learning_rate": 9.206084882307935e-05, "loss": 0.2633, "step": 28921 }, { "epoch": 2.34300064808814, "grad_norm": 0.0691351518034935, "learning_rate": 9.205634817048473e-05, "loss": 0.2792, "step": 28922 }, { "epoch": 2.3430816591056383, "grad_norm": 0.07424209266901016, "learning_rate": 9.20518475178901e-05, "loss": 0.2547, "step": 28923 }, { "epoch": 2.343162670123137, "grad_norm": 0.06224573031067848, "learning_rate": 9.204734686529547e-05, "loss": 0.2473, "step": 28924 }, { "epoch": 2.343243681140635, "grad_norm": 0.0734906867146492, "learning_rate": 9.204284621270085e-05, "loss": 0.2414, "step": 28925 }, { "epoch": 2.3433246921581334, "grad_norm": 0.06674046814441681, "learning_rate": 9.203834556010622e-05, "loss": 0.2388, "step": 28926 }, { "epoch": 2.343405703175632, "grad_norm": 0.08335613459348679, "learning_rate": 9.20338449075116e-05, "loss": 0.2423, "step": 28927 }, { "epoch": 2.3434867141931304, "grad_norm": 0.06520678848028183, "learning_rate": 9.202934425491697e-05, "loss": 0.238, "step": 28928 }, { "epoch": 2.3435677252106286, "grad_norm": 0.0749930590391159, "learning_rate": 9.202484360232234e-05, "loss": 0.2372, "step": 28929 }, { "epoch": 2.343648736228127, "grad_norm": 0.06236148253083229, "learning_rate": 9.202034294972772e-05, "loss": 0.2388, "step": 28930 }, { "epoch": 2.3437297472456255, "grad_norm": 0.0710134208202362, "learning_rate": 9.201584229713309e-05, "loss": 0.2383, "step": 28931 }, { "epoch": 2.343810758263124, "grad_norm": 0.07559984922409058, "learning_rate": 9.201134164453846e-05, "loss": 0.2338, "step": 28932 }, { "epoch": 2.343891769280622, "grad_norm": 0.06743871420621872, "learning_rate": 9.200684099194384e-05, "loss": 0.2642, "step": 28933 }, { "epoch": 2.3439727802981207, "grad_norm": 0.06335540860891342, "learning_rate": 9.200234033934921e-05, "loss": 0.2315, "step": 28934 }, { "epoch": 2.344053791315619, "grad_norm": 0.07250569760799408, "learning_rate": 9.199783968675458e-05, "loss": 0.2444, "step": 28935 }, { "epoch": 2.344134802333117, "grad_norm": 0.07095897942781448, "learning_rate": 9.199333903415996e-05, "loss": 0.2593, "step": 28936 }, { "epoch": 2.3442158133506155, "grad_norm": 0.057127442210912704, "learning_rate": 9.198883838156533e-05, "loss": 0.2046, "step": 28937 }, { "epoch": 2.344296824368114, "grad_norm": 0.07967636734247208, "learning_rate": 9.19843377289707e-05, "loss": 0.2502, "step": 28938 }, { "epoch": 2.3443778353856124, "grad_norm": 0.05165790766477585, "learning_rate": 9.197983707637608e-05, "loss": 0.2311, "step": 28939 }, { "epoch": 2.3444588464031106, "grad_norm": 0.05036972835659981, "learning_rate": 9.197533642378145e-05, "loss": 0.2129, "step": 28940 }, { "epoch": 2.3445398574206093, "grad_norm": 0.07671160250902176, "learning_rate": 9.197083577118682e-05, "loss": 0.2968, "step": 28941 }, { "epoch": 2.3446208684381076, "grad_norm": 0.0694350153207779, "learning_rate": 9.19663351185922e-05, "loss": 0.2546, "step": 28942 }, { "epoch": 2.344701879455606, "grad_norm": 0.0753200575709343, "learning_rate": 9.196183446599757e-05, "loss": 0.2516, "step": 28943 }, { "epoch": 2.3447828904731045, "grad_norm": 0.06781428307294846, "learning_rate": 9.195733381340295e-05, "loss": 0.2385, "step": 28944 }, { "epoch": 2.3448639014906028, "grad_norm": 0.06559989601373672, "learning_rate": 9.195283316080833e-05, "loss": 0.2582, "step": 28945 }, { "epoch": 2.344944912508101, "grad_norm": 0.08580916374921799, "learning_rate": 9.194833250821369e-05, "loss": 0.2617, "step": 28946 }, { "epoch": 2.3450259235255997, "grad_norm": 0.065764881670475, "learning_rate": 9.194383185561907e-05, "loss": 0.2238, "step": 28947 }, { "epoch": 2.345106934543098, "grad_norm": 0.06031308323144913, "learning_rate": 9.193933120302445e-05, "loss": 0.2493, "step": 28948 }, { "epoch": 2.345187945560596, "grad_norm": 0.0599418543279171, "learning_rate": 9.193483055042981e-05, "loss": 0.2477, "step": 28949 }, { "epoch": 2.345268956578095, "grad_norm": 0.07239186018705368, "learning_rate": 9.193032989783519e-05, "loss": 0.2567, "step": 28950 }, { "epoch": 2.345349967595593, "grad_norm": 0.08161863684654236, "learning_rate": 9.192582924524057e-05, "loss": 0.2713, "step": 28951 }, { "epoch": 2.3454309786130914, "grad_norm": 0.0612737275660038, "learning_rate": 9.192132859264593e-05, "loss": 0.216, "step": 28952 }, { "epoch": 2.3455119896305896, "grad_norm": 0.08176063001155853, "learning_rate": 9.191682794005131e-05, "loss": 0.2514, "step": 28953 }, { "epoch": 2.3455930006480883, "grad_norm": 0.04953533038496971, "learning_rate": 9.19123272874567e-05, "loss": 0.2425, "step": 28954 }, { "epoch": 2.3456740116655865, "grad_norm": 0.06790510565042496, "learning_rate": 9.190782663486206e-05, "loss": 0.2317, "step": 28955 }, { "epoch": 2.345755022683085, "grad_norm": 0.07242914289236069, "learning_rate": 9.190332598226743e-05, "loss": 0.227, "step": 28956 }, { "epoch": 2.3458360337005835, "grad_norm": 0.07935182005167007, "learning_rate": 9.189882532967282e-05, "loss": 0.266, "step": 28957 }, { "epoch": 2.3459170447180817, "grad_norm": 0.06886610388755798, "learning_rate": 9.189432467707818e-05, "loss": 0.2192, "step": 28958 }, { "epoch": 2.34599805573558, "grad_norm": 0.0698259025812149, "learning_rate": 9.188982402448355e-05, "loss": 0.2757, "step": 28959 }, { "epoch": 2.346079066753078, "grad_norm": 0.06897472590208054, "learning_rate": 9.188532337188894e-05, "loss": 0.2608, "step": 28960 }, { "epoch": 2.346160077770577, "grad_norm": 0.0625743418931961, "learning_rate": 9.18808227192943e-05, "loss": 0.2446, "step": 28961 }, { "epoch": 2.346241088788075, "grad_norm": 0.04949932545423508, "learning_rate": 9.187632206669967e-05, "loss": 0.2161, "step": 28962 }, { "epoch": 2.3463220998055734, "grad_norm": 0.06938916444778442, "learning_rate": 9.187182141410506e-05, "loss": 0.2491, "step": 28963 }, { "epoch": 2.346403110823072, "grad_norm": 0.07660891115665436, "learning_rate": 9.186732076151042e-05, "loss": 0.2647, "step": 28964 }, { "epoch": 2.3464841218405703, "grad_norm": 0.06504109501838684, "learning_rate": 9.186282010891579e-05, "loss": 0.2786, "step": 28965 }, { "epoch": 2.3465651328580686, "grad_norm": 0.07999644428491592, "learning_rate": 9.185831945632118e-05, "loss": 0.2319, "step": 28966 }, { "epoch": 2.3466461438755672, "grad_norm": 0.07580562680959702, "learning_rate": 9.185381880372654e-05, "loss": 0.2959, "step": 28967 }, { "epoch": 2.3467271548930655, "grad_norm": 0.05977579206228256, "learning_rate": 9.184931815113191e-05, "loss": 0.2362, "step": 28968 }, { "epoch": 2.3468081659105637, "grad_norm": 0.053713224828243256, "learning_rate": 9.18448174985373e-05, "loss": 0.2205, "step": 28969 }, { "epoch": 2.3468891769280624, "grad_norm": 0.07156253606081009, "learning_rate": 9.184031684594266e-05, "loss": 0.2572, "step": 28970 }, { "epoch": 2.3469701879455607, "grad_norm": 0.06587357074022293, "learning_rate": 9.183581619334803e-05, "loss": 0.2546, "step": 28971 }, { "epoch": 2.347051198963059, "grad_norm": 0.052538491785526276, "learning_rate": 9.183131554075342e-05, "loss": 0.21, "step": 28972 }, { "epoch": 2.3471322099805576, "grad_norm": 0.05909012258052826, "learning_rate": 9.182681488815878e-05, "loss": 0.2277, "step": 28973 }, { "epoch": 2.347213220998056, "grad_norm": 0.054832495748996735, "learning_rate": 9.182231423556417e-05, "loss": 0.2304, "step": 28974 }, { "epoch": 2.347294232015554, "grad_norm": 0.07047142833471298, "learning_rate": 9.181781358296954e-05, "loss": 0.262, "step": 28975 }, { "epoch": 2.3473752430330523, "grad_norm": 0.06125551462173462, "learning_rate": 9.18133129303749e-05, "loss": 0.2335, "step": 28976 }, { "epoch": 2.347456254050551, "grad_norm": 0.08382407575845718, "learning_rate": 9.180881227778029e-05, "loss": 0.3174, "step": 28977 }, { "epoch": 2.3475372650680493, "grad_norm": 0.06475745886564255, "learning_rate": 9.180431162518566e-05, "loss": 0.2553, "step": 28978 }, { "epoch": 2.3476182760855475, "grad_norm": 0.05636309087276459, "learning_rate": 9.179981097259102e-05, "loss": 0.2548, "step": 28979 }, { "epoch": 2.347699287103046, "grad_norm": 0.06750059127807617, "learning_rate": 9.179531031999641e-05, "loss": 0.2676, "step": 28980 }, { "epoch": 2.3477802981205445, "grad_norm": 0.05322468280792236, "learning_rate": 9.179080966740178e-05, "loss": 0.2366, "step": 28981 }, { "epoch": 2.3478613091380427, "grad_norm": 0.06459199637174606, "learning_rate": 9.178630901480714e-05, "loss": 0.2448, "step": 28982 }, { "epoch": 2.347942320155541, "grad_norm": 0.08525732904672623, "learning_rate": 9.178180836221253e-05, "loss": 0.255, "step": 28983 }, { "epoch": 2.3480233311730396, "grad_norm": 0.06443566828966141, "learning_rate": 9.17773077096179e-05, "loss": 0.2518, "step": 28984 }, { "epoch": 2.348104342190538, "grad_norm": 0.06144661083817482, "learning_rate": 9.177280705702326e-05, "loss": 0.2246, "step": 28985 }, { "epoch": 2.348185353208036, "grad_norm": 0.055623866617679596, "learning_rate": 9.176830640442865e-05, "loss": 0.2165, "step": 28986 }, { "epoch": 2.348266364225535, "grad_norm": 0.054140083491802216, "learning_rate": 9.176380575183402e-05, "loss": 0.2077, "step": 28987 }, { "epoch": 2.348347375243033, "grad_norm": 0.05536891147494316, "learning_rate": 9.175930509923938e-05, "loss": 0.1951, "step": 28988 }, { "epoch": 2.3484283862605313, "grad_norm": 0.08356544375419617, "learning_rate": 9.175480444664477e-05, "loss": 0.2173, "step": 28989 }, { "epoch": 2.34850939727803, "grad_norm": 0.07575877755880356, "learning_rate": 9.175030379405014e-05, "loss": 0.2238, "step": 28990 }, { "epoch": 2.3485904082955282, "grad_norm": 0.07282985001802444, "learning_rate": 9.17458031414555e-05, "loss": 0.2359, "step": 28991 }, { "epoch": 2.3486714193130265, "grad_norm": 0.06027929112315178, "learning_rate": 9.174130248886089e-05, "loss": 0.2358, "step": 28992 }, { "epoch": 2.348752430330525, "grad_norm": 0.061999425292015076, "learning_rate": 9.173680183626627e-05, "loss": 0.1877, "step": 28993 }, { "epoch": 2.3488334413480234, "grad_norm": 0.0746329203248024, "learning_rate": 9.173230118367163e-05, "loss": 0.2525, "step": 28994 }, { "epoch": 2.3489144523655217, "grad_norm": 0.05842380225658417, "learning_rate": 9.172780053107701e-05, "loss": 0.2533, "step": 28995 }, { "epoch": 2.34899546338302, "grad_norm": 0.05924517661333084, "learning_rate": 9.172329987848239e-05, "loss": 0.2229, "step": 28996 }, { "epoch": 2.3490764744005186, "grad_norm": 0.06103135272860527, "learning_rate": 9.171879922588775e-05, "loss": 0.2527, "step": 28997 }, { "epoch": 2.349157485418017, "grad_norm": 0.0787557065486908, "learning_rate": 9.171429857329313e-05, "loss": 0.2657, "step": 28998 }, { "epoch": 2.349238496435515, "grad_norm": 0.07225020974874496, "learning_rate": 9.170979792069851e-05, "loss": 0.2664, "step": 28999 }, { "epoch": 2.3493195074530138, "grad_norm": 0.06952983886003494, "learning_rate": 9.170529726810388e-05, "loss": 0.2139, "step": 29000 }, { "epoch": 2.349400518470512, "grad_norm": 0.06747347116470337, "learning_rate": 9.170079661550925e-05, "loss": 0.2172, "step": 29001 }, { "epoch": 2.3494815294880103, "grad_norm": 0.06495499610900879, "learning_rate": 9.169629596291463e-05, "loss": 0.2392, "step": 29002 }, { "epoch": 2.3495625405055085, "grad_norm": 0.07105159759521484, "learning_rate": 9.169179531032e-05, "loss": 0.2398, "step": 29003 }, { "epoch": 2.349643551523007, "grad_norm": 0.06550126522779465, "learning_rate": 9.168729465772538e-05, "loss": 0.2994, "step": 29004 }, { "epoch": 2.3497245625405054, "grad_norm": 0.05634589120745659, "learning_rate": 9.168279400513075e-05, "loss": 0.2074, "step": 29005 }, { "epoch": 2.3498055735580037, "grad_norm": 0.06591413170099258, "learning_rate": 9.167829335253612e-05, "loss": 0.2813, "step": 29006 }, { "epoch": 2.3498865845755024, "grad_norm": 0.06812646985054016, "learning_rate": 9.16737926999415e-05, "loss": 0.2792, "step": 29007 }, { "epoch": 2.3499675955930006, "grad_norm": 0.06828898936510086, "learning_rate": 9.166929204734687e-05, "loss": 0.2605, "step": 29008 }, { "epoch": 2.350048606610499, "grad_norm": 0.06219886243343353, "learning_rate": 9.166479139475224e-05, "loss": 0.2731, "step": 29009 }, { "epoch": 2.3501296176279975, "grad_norm": 0.0656910315155983, "learning_rate": 9.166029074215762e-05, "loss": 0.258, "step": 29010 }, { "epoch": 2.350210628645496, "grad_norm": 0.06982869654893875, "learning_rate": 9.165579008956299e-05, "loss": 0.2512, "step": 29011 }, { "epoch": 2.350291639662994, "grad_norm": 0.047029316425323486, "learning_rate": 9.165128943696836e-05, "loss": 0.2259, "step": 29012 }, { "epoch": 2.3503726506804927, "grad_norm": 0.07702232897281647, "learning_rate": 9.164678878437374e-05, "loss": 0.2386, "step": 29013 }, { "epoch": 2.350453661697991, "grad_norm": 0.052338164299726486, "learning_rate": 9.164228813177911e-05, "loss": 0.2337, "step": 29014 }, { "epoch": 2.350534672715489, "grad_norm": 0.07687724381685257, "learning_rate": 9.163778747918449e-05, "loss": 0.2648, "step": 29015 }, { "epoch": 2.350615683732988, "grad_norm": 0.06665743142366409, "learning_rate": 9.163328682658986e-05, "loss": 0.2785, "step": 29016 }, { "epoch": 2.350696694750486, "grad_norm": 0.06634911149740219, "learning_rate": 9.162878617399523e-05, "loss": 0.2277, "step": 29017 }, { "epoch": 2.3507777057679844, "grad_norm": 0.07159882038831711, "learning_rate": 9.16242855214006e-05, "loss": 0.2722, "step": 29018 }, { "epoch": 2.3508587167854826, "grad_norm": 0.05206114426255226, "learning_rate": 9.161978486880598e-05, "loss": 0.2074, "step": 29019 }, { "epoch": 2.3509397278029813, "grad_norm": 0.06335695832967758, "learning_rate": 9.161528421621135e-05, "loss": 0.2361, "step": 29020 }, { "epoch": 2.3510207388204796, "grad_norm": 0.07118600606918335, "learning_rate": 9.161078356361673e-05, "loss": 0.2682, "step": 29021 }, { "epoch": 2.351101749837978, "grad_norm": 0.06725834310054779, "learning_rate": 9.16062829110221e-05, "loss": 0.2615, "step": 29022 }, { "epoch": 2.3511827608554765, "grad_norm": 0.0714595764875412, "learning_rate": 9.160178225842747e-05, "loss": 0.2619, "step": 29023 }, { "epoch": 2.3512637718729748, "grad_norm": 0.0766262337565422, "learning_rate": 9.159728160583285e-05, "loss": 0.2714, "step": 29024 }, { "epoch": 2.351344782890473, "grad_norm": 0.07190114259719849, "learning_rate": 9.159278095323822e-05, "loss": 0.2599, "step": 29025 }, { "epoch": 2.3514257939079712, "grad_norm": 0.06509433686733246, "learning_rate": 9.158828030064361e-05, "loss": 0.255, "step": 29026 }, { "epoch": 2.35150680492547, "grad_norm": 0.06340566277503967, "learning_rate": 9.158377964804897e-05, "loss": 0.2432, "step": 29027 }, { "epoch": 2.351587815942968, "grad_norm": 0.07813110947608948, "learning_rate": 9.157927899545434e-05, "loss": 0.2729, "step": 29028 }, { "epoch": 2.3516688269604664, "grad_norm": 0.07088219374418259, "learning_rate": 9.157477834285973e-05, "loss": 0.2391, "step": 29029 }, { "epoch": 2.351749837977965, "grad_norm": 0.0655277669429779, "learning_rate": 9.157027769026509e-05, "loss": 0.256, "step": 29030 }, { "epoch": 2.3518308489954634, "grad_norm": 0.07638170570135117, "learning_rate": 9.156577703767046e-05, "loss": 0.3067, "step": 29031 }, { "epoch": 2.3519118600129616, "grad_norm": 0.05520417168736458, "learning_rate": 9.156127638507585e-05, "loss": 0.2231, "step": 29032 }, { "epoch": 2.3519928710304603, "grad_norm": 0.061939746141433716, "learning_rate": 9.155677573248121e-05, "loss": 0.1929, "step": 29033 }, { "epoch": 2.3520738820479585, "grad_norm": 0.06050903722643852, "learning_rate": 9.155227507988658e-05, "loss": 0.2748, "step": 29034 }, { "epoch": 2.3521548930654568, "grad_norm": 0.06427094340324402, "learning_rate": 9.154777442729197e-05, "loss": 0.2544, "step": 29035 }, { "epoch": 2.3522359040829555, "grad_norm": 0.06105329468846321, "learning_rate": 9.154327377469733e-05, "loss": 0.219, "step": 29036 }, { "epoch": 2.3523169151004537, "grad_norm": 0.07421961426734924, "learning_rate": 9.15387731221027e-05, "loss": 0.2403, "step": 29037 }, { "epoch": 2.352397926117952, "grad_norm": 0.0582902729511261, "learning_rate": 9.153427246950809e-05, "loss": 0.26, "step": 29038 }, { "epoch": 2.3524789371354506, "grad_norm": 0.06710220873355865, "learning_rate": 9.152977181691345e-05, "loss": 0.26, "step": 29039 }, { "epoch": 2.352559948152949, "grad_norm": 0.054716769605875015, "learning_rate": 9.152527116431883e-05, "loss": 0.2301, "step": 29040 }, { "epoch": 2.352640959170447, "grad_norm": 0.06258168071508408, "learning_rate": 9.152077051172421e-05, "loss": 0.2367, "step": 29041 }, { "epoch": 2.3527219701879454, "grad_norm": 0.0666784793138504, "learning_rate": 9.151626985912957e-05, "loss": 0.2377, "step": 29042 }, { "epoch": 2.352802981205444, "grad_norm": 0.062231969088315964, "learning_rate": 9.151176920653495e-05, "loss": 0.2311, "step": 29043 }, { "epoch": 2.3528839922229423, "grad_norm": 0.05620197951793671, "learning_rate": 9.150726855394033e-05, "loss": 0.2493, "step": 29044 }, { "epoch": 2.3529650032404406, "grad_norm": 0.06929001957178116, "learning_rate": 9.150276790134569e-05, "loss": 0.2569, "step": 29045 }, { "epoch": 2.3530460142579392, "grad_norm": 0.07018900662660599, "learning_rate": 9.149826724875107e-05, "loss": 0.2287, "step": 29046 }, { "epoch": 2.3531270252754375, "grad_norm": 0.07081607729196548, "learning_rate": 9.149376659615645e-05, "loss": 0.2215, "step": 29047 }, { "epoch": 2.3532080362929357, "grad_norm": 0.062088314443826675, "learning_rate": 9.148926594356181e-05, "loss": 0.2282, "step": 29048 }, { "epoch": 2.353289047310434, "grad_norm": 0.06053098291158676, "learning_rate": 9.148476529096719e-05, "loss": 0.2198, "step": 29049 }, { "epoch": 2.3533700583279327, "grad_norm": 0.07323571294546127, "learning_rate": 9.148026463837257e-05, "loss": 0.2468, "step": 29050 }, { "epoch": 2.353451069345431, "grad_norm": 0.06491614133119583, "learning_rate": 9.147576398577793e-05, "loss": 0.2259, "step": 29051 }, { "epoch": 2.353532080362929, "grad_norm": 0.06910113245248795, "learning_rate": 9.147126333318332e-05, "loss": 0.2277, "step": 29052 }, { "epoch": 2.353613091380428, "grad_norm": 0.06695449352264404, "learning_rate": 9.14667626805887e-05, "loss": 0.2676, "step": 29053 }, { "epoch": 2.353694102397926, "grad_norm": 0.07094959169626236, "learning_rate": 9.146226202799406e-05, "loss": 0.2747, "step": 29054 }, { "epoch": 2.3537751134154243, "grad_norm": 0.0783795416355133, "learning_rate": 9.145776137539944e-05, "loss": 0.2644, "step": 29055 }, { "epoch": 2.353856124432923, "grad_norm": 0.09245872497558594, "learning_rate": 9.145326072280482e-05, "loss": 0.3017, "step": 29056 }, { "epoch": 2.3539371354504213, "grad_norm": 0.06651142239570618, "learning_rate": 9.144876007021018e-05, "loss": 0.2586, "step": 29057 }, { "epoch": 2.3540181464679195, "grad_norm": 0.0733952522277832, "learning_rate": 9.144425941761556e-05, "loss": 0.2572, "step": 29058 }, { "epoch": 2.354099157485418, "grad_norm": 0.0771905779838562, "learning_rate": 9.143975876502094e-05, "loss": 0.2467, "step": 29059 }, { "epoch": 2.3541801685029164, "grad_norm": 0.06089000403881073, "learning_rate": 9.14352581124263e-05, "loss": 0.2183, "step": 29060 }, { "epoch": 2.3542611795204147, "grad_norm": 0.06993361562490463, "learning_rate": 9.143075745983168e-05, "loss": 0.2635, "step": 29061 }, { "epoch": 2.3543421905379134, "grad_norm": 0.06053172051906586, "learning_rate": 9.142625680723706e-05, "loss": 0.2675, "step": 29062 }, { "epoch": 2.3544232015554116, "grad_norm": 0.06449250131845474, "learning_rate": 9.142175615464242e-05, "loss": 0.2575, "step": 29063 }, { "epoch": 2.35450421257291, "grad_norm": 0.07320661842823029, "learning_rate": 9.14172555020478e-05, "loss": 0.2607, "step": 29064 }, { "epoch": 2.354585223590408, "grad_norm": 0.0640730932354927, "learning_rate": 9.141275484945318e-05, "loss": 0.2475, "step": 29065 }, { "epoch": 2.354666234607907, "grad_norm": 0.055678799748420715, "learning_rate": 9.140825419685854e-05, "loss": 0.1937, "step": 29066 }, { "epoch": 2.354747245625405, "grad_norm": 0.060588981956243515, "learning_rate": 9.140375354426393e-05, "loss": 0.2503, "step": 29067 }, { "epoch": 2.3548282566429033, "grad_norm": 0.06185297295451164, "learning_rate": 9.13992528916693e-05, "loss": 0.2874, "step": 29068 }, { "epoch": 2.354909267660402, "grad_norm": 0.0669855922460556, "learning_rate": 9.139475223907466e-05, "loss": 0.2355, "step": 29069 }, { "epoch": 2.3549902786779002, "grad_norm": 0.0630691796541214, "learning_rate": 9.139025158648005e-05, "loss": 0.2245, "step": 29070 }, { "epoch": 2.3550712896953985, "grad_norm": 0.05715397745370865, "learning_rate": 9.138575093388542e-05, "loss": 0.2487, "step": 29071 }, { "epoch": 2.3551523007128967, "grad_norm": 0.0638887882232666, "learning_rate": 9.138125028129078e-05, "loss": 0.287, "step": 29072 }, { "epoch": 2.3552333117303954, "grad_norm": 0.05640844628214836, "learning_rate": 9.137674962869617e-05, "loss": 0.228, "step": 29073 }, { "epoch": 2.3553143227478937, "grad_norm": 0.06533593684434891, "learning_rate": 9.137224897610154e-05, "loss": 0.2561, "step": 29074 }, { "epoch": 2.355395333765392, "grad_norm": 0.056167490780353546, "learning_rate": 9.13677483235069e-05, "loss": 0.2184, "step": 29075 }, { "epoch": 2.3554763447828906, "grad_norm": 0.0543614998459816, "learning_rate": 9.136324767091229e-05, "loss": 0.2405, "step": 29076 }, { "epoch": 2.355557355800389, "grad_norm": 0.05727843940258026, "learning_rate": 9.135874701831766e-05, "loss": 0.2459, "step": 29077 }, { "epoch": 2.355638366817887, "grad_norm": 0.06158679351210594, "learning_rate": 9.135424636572304e-05, "loss": 0.2419, "step": 29078 }, { "epoch": 2.3557193778353858, "grad_norm": 0.06343264132738113, "learning_rate": 9.134974571312841e-05, "loss": 0.2282, "step": 29079 }, { "epoch": 2.355800388852884, "grad_norm": 0.07503470033407211, "learning_rate": 9.134524506053378e-05, "loss": 0.2314, "step": 29080 }, { "epoch": 2.3558813998703823, "grad_norm": 0.0750756710767746, "learning_rate": 9.134074440793916e-05, "loss": 0.2241, "step": 29081 }, { "epoch": 2.355962410887881, "grad_norm": 0.06549500674009323, "learning_rate": 9.133624375534453e-05, "loss": 0.2455, "step": 29082 }, { "epoch": 2.356043421905379, "grad_norm": 0.06845001876354218, "learning_rate": 9.13317431027499e-05, "loss": 0.2723, "step": 29083 }, { "epoch": 2.3561244329228774, "grad_norm": 0.10937629640102386, "learning_rate": 9.132724245015528e-05, "loss": 0.2576, "step": 29084 }, { "epoch": 2.356205443940376, "grad_norm": 0.07276338338851929, "learning_rate": 9.132274179756065e-05, "loss": 0.2463, "step": 29085 }, { "epoch": 2.3562864549578744, "grad_norm": 0.08041878789663315, "learning_rate": 9.131824114496602e-05, "loss": 0.2571, "step": 29086 }, { "epoch": 2.3563674659753726, "grad_norm": 0.0719827190041542, "learning_rate": 9.13137404923714e-05, "loss": 0.2842, "step": 29087 }, { "epoch": 2.356448476992871, "grad_norm": 0.07114730775356293, "learning_rate": 9.130923983977677e-05, "loss": 0.2465, "step": 29088 }, { "epoch": 2.3565294880103695, "grad_norm": 0.11425285041332245, "learning_rate": 9.130473918718215e-05, "loss": 0.2278, "step": 29089 }, { "epoch": 2.356610499027868, "grad_norm": 0.07294203341007233, "learning_rate": 9.130023853458752e-05, "loss": 0.2497, "step": 29090 }, { "epoch": 2.356691510045366, "grad_norm": 0.0667933002114296, "learning_rate": 9.129573788199289e-05, "loss": 0.2678, "step": 29091 }, { "epoch": 2.3567725210628647, "grad_norm": 0.07403317093849182, "learning_rate": 9.129123722939827e-05, "loss": 0.2561, "step": 29092 }, { "epoch": 2.356853532080363, "grad_norm": 0.06869540363550186, "learning_rate": 9.128673657680364e-05, "loss": 0.3012, "step": 29093 }, { "epoch": 2.356934543097861, "grad_norm": 0.06836505979299545, "learning_rate": 9.128223592420901e-05, "loss": 0.2439, "step": 29094 }, { "epoch": 2.3570155541153595, "grad_norm": 0.05557122081518173, "learning_rate": 9.127773527161439e-05, "loss": 0.2789, "step": 29095 }, { "epoch": 2.357096565132858, "grad_norm": 0.07525942474603653, "learning_rate": 9.127323461901976e-05, "loss": 0.2753, "step": 29096 }, { "epoch": 2.3571775761503564, "grad_norm": 0.049367401748895645, "learning_rate": 9.126873396642513e-05, "loss": 0.228, "step": 29097 }, { "epoch": 2.3572585871678546, "grad_norm": 0.06840921193361282, "learning_rate": 9.126423331383051e-05, "loss": 0.2433, "step": 29098 }, { "epoch": 2.3573395981853533, "grad_norm": 0.06897839158773422, "learning_rate": 9.125973266123588e-05, "loss": 0.2622, "step": 29099 }, { "epoch": 2.3574206092028516, "grad_norm": 0.06702248752117157, "learning_rate": 9.125523200864125e-05, "loss": 0.2476, "step": 29100 }, { "epoch": 2.35750162022035, "grad_norm": 0.06632895767688751, "learning_rate": 9.125073135604663e-05, "loss": 0.2635, "step": 29101 }, { "epoch": 2.3575826312378485, "grad_norm": 0.06883935630321503, "learning_rate": 9.1246230703452e-05, "loss": 0.3266, "step": 29102 }, { "epoch": 2.3576636422553467, "grad_norm": 0.058567121624946594, "learning_rate": 9.124173005085738e-05, "loss": 0.2546, "step": 29103 }, { "epoch": 2.357744653272845, "grad_norm": 0.06617163121700287, "learning_rate": 9.123722939826276e-05, "loss": 0.3136, "step": 29104 }, { "epoch": 2.3578256642903437, "grad_norm": 0.05383819714188576, "learning_rate": 9.123272874566812e-05, "loss": 0.2334, "step": 29105 }, { "epoch": 2.357906675307842, "grad_norm": 0.06604919582605362, "learning_rate": 9.12282280930735e-05, "loss": 0.2537, "step": 29106 }, { "epoch": 2.35798768632534, "grad_norm": 0.06112247705459595, "learning_rate": 9.122372744047888e-05, "loss": 0.2292, "step": 29107 }, { "epoch": 2.358068697342839, "grad_norm": 0.07379915565252304, "learning_rate": 9.121922678788424e-05, "loss": 0.2603, "step": 29108 }, { "epoch": 2.358149708360337, "grad_norm": 0.07685612142086029, "learning_rate": 9.121472613528962e-05, "loss": 0.2514, "step": 29109 }, { "epoch": 2.3582307193778353, "grad_norm": 0.08886110037565231, "learning_rate": 9.1210225482695e-05, "loss": 0.2416, "step": 29110 }, { "epoch": 2.3583117303953336, "grad_norm": 0.0855654776096344, "learning_rate": 9.120572483010036e-05, "loss": 0.2798, "step": 29111 }, { "epoch": 2.3583927414128323, "grad_norm": 0.058703597635030746, "learning_rate": 9.120122417750574e-05, "loss": 0.263, "step": 29112 }, { "epoch": 2.3584737524303305, "grad_norm": 0.08543696999549866, "learning_rate": 9.119672352491113e-05, "loss": 0.2868, "step": 29113 }, { "epoch": 2.3585547634478288, "grad_norm": 0.08278053253889084, "learning_rate": 9.119222287231649e-05, "loss": 0.2598, "step": 29114 }, { "epoch": 2.3586357744653275, "grad_norm": 0.06764932721853256, "learning_rate": 9.118772221972186e-05, "loss": 0.2557, "step": 29115 }, { "epoch": 2.3587167854828257, "grad_norm": 0.07048583030700684, "learning_rate": 9.118322156712725e-05, "loss": 0.2823, "step": 29116 }, { "epoch": 2.358797796500324, "grad_norm": 0.06134413182735443, "learning_rate": 9.11787209145326e-05, "loss": 0.2635, "step": 29117 }, { "epoch": 2.358878807517822, "grad_norm": 0.07762916386127472, "learning_rate": 9.117422026193798e-05, "loss": 0.2568, "step": 29118 }, { "epoch": 2.358959818535321, "grad_norm": 0.04822731763124466, "learning_rate": 9.116971960934337e-05, "loss": 0.2413, "step": 29119 }, { "epoch": 2.359040829552819, "grad_norm": 0.06413828581571579, "learning_rate": 9.116521895674873e-05, "loss": 0.2573, "step": 29120 }, { "epoch": 2.3591218405703174, "grad_norm": 0.060196202248334885, "learning_rate": 9.11607183041541e-05, "loss": 0.2772, "step": 29121 }, { "epoch": 2.359202851587816, "grad_norm": 0.07289192825555801, "learning_rate": 9.115621765155949e-05, "loss": 0.2606, "step": 29122 }, { "epoch": 2.3592838626053143, "grad_norm": 0.07562807947397232, "learning_rate": 9.115171699896485e-05, "loss": 0.2779, "step": 29123 }, { "epoch": 2.3593648736228126, "grad_norm": 0.06476518511772156, "learning_rate": 9.114721634637022e-05, "loss": 0.2334, "step": 29124 }, { "epoch": 2.3594458846403112, "grad_norm": 0.06918711960315704, "learning_rate": 9.114271569377561e-05, "loss": 0.2648, "step": 29125 }, { "epoch": 2.3595268956578095, "grad_norm": 0.07326087355613708, "learning_rate": 9.113821504118097e-05, "loss": 0.2548, "step": 29126 }, { "epoch": 2.3596079066753077, "grad_norm": 0.054968882352113724, "learning_rate": 9.113371438858634e-05, "loss": 0.2403, "step": 29127 }, { "epoch": 2.3596889176928064, "grad_norm": 0.08446795493364334, "learning_rate": 9.112921373599173e-05, "loss": 0.2453, "step": 29128 }, { "epoch": 2.3597699287103047, "grad_norm": 0.06379998475313187, "learning_rate": 9.112471308339709e-05, "loss": 0.2709, "step": 29129 }, { "epoch": 2.359850939727803, "grad_norm": 0.054361291229724884, "learning_rate": 9.112021243080248e-05, "loss": 0.2494, "step": 29130 }, { "epoch": 2.3599319507453016, "grad_norm": 0.06530174612998962, "learning_rate": 9.111571177820785e-05, "loss": 0.234, "step": 29131 }, { "epoch": 2.3600129617628, "grad_norm": 0.07407192140817642, "learning_rate": 9.111121112561321e-05, "loss": 0.2432, "step": 29132 }, { "epoch": 2.360093972780298, "grad_norm": 0.06312862038612366, "learning_rate": 9.11067104730186e-05, "loss": 0.2423, "step": 29133 }, { "epoch": 2.3601749837977963, "grad_norm": 0.06313052028417587, "learning_rate": 9.110220982042397e-05, "loss": 0.2631, "step": 29134 }, { "epoch": 2.360255994815295, "grad_norm": 0.0686829537153244, "learning_rate": 9.109770916782933e-05, "loss": 0.2375, "step": 29135 }, { "epoch": 2.3603370058327933, "grad_norm": 0.05823444202542305, "learning_rate": 9.109320851523472e-05, "loss": 0.2578, "step": 29136 }, { "epoch": 2.3604180168502915, "grad_norm": 0.06658496707677841, "learning_rate": 9.108870786264009e-05, "loss": 0.256, "step": 29137 }, { "epoch": 2.36049902786779, "grad_norm": 0.06323231011629105, "learning_rate": 9.108420721004545e-05, "loss": 0.2386, "step": 29138 }, { "epoch": 2.3605800388852884, "grad_norm": 0.06542006134986877, "learning_rate": 9.107970655745084e-05, "loss": 0.2323, "step": 29139 }, { "epoch": 2.3606610499027867, "grad_norm": 0.06671994924545288, "learning_rate": 9.107520590485621e-05, "loss": 0.2718, "step": 29140 }, { "epoch": 2.360742060920285, "grad_norm": 0.06911414861679077, "learning_rate": 9.107070525226157e-05, "loss": 0.2941, "step": 29141 }, { "epoch": 2.3608230719377836, "grad_norm": 0.0645342469215393, "learning_rate": 9.106620459966696e-05, "loss": 0.243, "step": 29142 }, { "epoch": 2.360904082955282, "grad_norm": 0.06701602786779404, "learning_rate": 9.106170394707233e-05, "loss": 0.2663, "step": 29143 }, { "epoch": 2.36098509397278, "grad_norm": 0.06413000077009201, "learning_rate": 9.10572032944777e-05, "loss": 0.2423, "step": 29144 }, { "epoch": 2.361066104990279, "grad_norm": 0.06362126022577286, "learning_rate": 9.105270264188308e-05, "loss": 0.2118, "step": 29145 }, { "epoch": 2.361147116007777, "grad_norm": 0.05643783137202263, "learning_rate": 9.104820198928845e-05, "loss": 0.2042, "step": 29146 }, { "epoch": 2.3612281270252753, "grad_norm": 0.07649555057287216, "learning_rate": 9.104370133669381e-05, "loss": 0.2315, "step": 29147 }, { "epoch": 2.361309138042774, "grad_norm": 0.06502323597669601, "learning_rate": 9.10392006840992e-05, "loss": 0.2491, "step": 29148 }, { "epoch": 2.3613901490602722, "grad_norm": 0.09323930740356445, "learning_rate": 9.103470003150457e-05, "loss": 0.249, "step": 29149 }, { "epoch": 2.3614711600777705, "grad_norm": 0.06336724758148193, "learning_rate": 9.103019937890994e-05, "loss": 0.2039, "step": 29150 }, { "epoch": 2.361552171095269, "grad_norm": 0.07176464796066284, "learning_rate": 9.102569872631532e-05, "loss": 0.2359, "step": 29151 }, { "epoch": 2.3616331821127674, "grad_norm": 0.07099702209234238, "learning_rate": 9.10211980737207e-05, "loss": 0.2616, "step": 29152 }, { "epoch": 2.3617141931302656, "grad_norm": 0.06924501806497574, "learning_rate": 9.101669742112606e-05, "loss": 0.2431, "step": 29153 }, { "epoch": 2.3617952041477643, "grad_norm": 0.05669789761304855, "learning_rate": 9.101219676853144e-05, "loss": 0.2405, "step": 29154 }, { "epoch": 2.3618762151652626, "grad_norm": 0.06776037812232971, "learning_rate": 9.100769611593682e-05, "loss": 0.248, "step": 29155 }, { "epoch": 2.361957226182761, "grad_norm": 0.061502985656261444, "learning_rate": 9.100319546334219e-05, "loss": 0.2553, "step": 29156 }, { "epoch": 2.362038237200259, "grad_norm": 0.061214275658130646, "learning_rate": 9.099869481074756e-05, "loss": 0.2245, "step": 29157 }, { "epoch": 2.3621192482177578, "grad_norm": 0.07771072536706924, "learning_rate": 9.099419415815294e-05, "loss": 0.2295, "step": 29158 }, { "epoch": 2.362200259235256, "grad_norm": 0.06995867937803268, "learning_rate": 9.098969350555831e-05, "loss": 0.2534, "step": 29159 }, { "epoch": 2.3622812702527543, "grad_norm": 0.06866510212421417, "learning_rate": 9.098519285296368e-05, "loss": 0.2699, "step": 29160 }, { "epoch": 2.362362281270253, "grad_norm": 0.06447815895080566, "learning_rate": 9.098069220036906e-05, "loss": 0.2618, "step": 29161 }, { "epoch": 2.362443292287751, "grad_norm": 0.08535295724868774, "learning_rate": 9.097619154777443e-05, "loss": 0.296, "step": 29162 }, { "epoch": 2.3625243033052494, "grad_norm": 0.06989932805299759, "learning_rate": 9.09716908951798e-05, "loss": 0.2501, "step": 29163 }, { "epoch": 2.3626053143227477, "grad_norm": 0.06145579367876053, "learning_rate": 9.096719024258518e-05, "loss": 0.239, "step": 29164 }, { "epoch": 2.3626863253402464, "grad_norm": 0.07149982452392578, "learning_rate": 9.096268958999055e-05, "loss": 0.2636, "step": 29165 }, { "epoch": 2.3627673363577446, "grad_norm": 0.06792671233415604, "learning_rate": 9.095818893739593e-05, "loss": 0.27, "step": 29166 }, { "epoch": 2.362848347375243, "grad_norm": 0.06381487846374512, "learning_rate": 9.09536882848013e-05, "loss": 0.2677, "step": 29167 }, { "epoch": 2.3629293583927415, "grad_norm": 0.055963076651096344, "learning_rate": 9.094918763220667e-05, "loss": 0.2372, "step": 29168 }, { "epoch": 2.36301036941024, "grad_norm": 0.07309937477111816, "learning_rate": 9.094468697961205e-05, "loss": 0.2895, "step": 29169 }, { "epoch": 2.363091380427738, "grad_norm": 0.06073899194598198, "learning_rate": 9.094018632701742e-05, "loss": 0.2794, "step": 29170 }, { "epoch": 2.3631723914452367, "grad_norm": 0.06594845652580261, "learning_rate": 9.09356856744228e-05, "loss": 0.2368, "step": 29171 }, { "epoch": 2.363253402462735, "grad_norm": 0.06607092171907425, "learning_rate": 9.093118502182817e-05, "loss": 0.2608, "step": 29172 }, { "epoch": 2.363334413480233, "grad_norm": 0.0516083724796772, "learning_rate": 9.092668436923354e-05, "loss": 0.2241, "step": 29173 }, { "epoch": 2.363415424497732, "grad_norm": 0.05938537046313286, "learning_rate": 9.092218371663892e-05, "loss": 0.2413, "step": 29174 }, { "epoch": 2.36349643551523, "grad_norm": 0.057529933750629425, "learning_rate": 9.091768306404429e-05, "loss": 0.2434, "step": 29175 }, { "epoch": 2.3635774465327284, "grad_norm": 0.06252843886613846, "learning_rate": 9.091318241144966e-05, "loss": 0.2634, "step": 29176 }, { "epoch": 2.363658457550227, "grad_norm": 0.08230286836624146, "learning_rate": 9.090868175885504e-05, "loss": 0.2736, "step": 29177 }, { "epoch": 2.3637394685677253, "grad_norm": 0.05570271983742714, "learning_rate": 9.090418110626041e-05, "loss": 0.1984, "step": 29178 }, { "epoch": 2.3638204795852236, "grad_norm": 0.05833020433783531, "learning_rate": 9.089968045366578e-05, "loss": 0.232, "step": 29179 }, { "epoch": 2.363901490602722, "grad_norm": 0.06215290352702141, "learning_rate": 9.089517980107116e-05, "loss": 0.2629, "step": 29180 }, { "epoch": 2.3639825016202205, "grad_norm": 0.07116110622882843, "learning_rate": 9.089067914847653e-05, "loss": 0.2618, "step": 29181 }, { "epoch": 2.3640635126377187, "grad_norm": 0.07417536526918411, "learning_rate": 9.08861784958819e-05, "loss": 0.2508, "step": 29182 }, { "epoch": 2.364144523655217, "grad_norm": 0.0574500672519207, "learning_rate": 9.088167784328728e-05, "loss": 0.2698, "step": 29183 }, { "epoch": 2.3642255346727157, "grad_norm": 0.06579186022281647, "learning_rate": 9.087717719069265e-05, "loss": 0.2372, "step": 29184 }, { "epoch": 2.364306545690214, "grad_norm": 0.06995394080877304, "learning_rate": 9.087267653809804e-05, "loss": 0.2239, "step": 29185 }, { "epoch": 2.364387556707712, "grad_norm": 0.06749846041202545, "learning_rate": 9.08681758855034e-05, "loss": 0.2503, "step": 29186 }, { "epoch": 2.3644685677252104, "grad_norm": 0.0807734802365303, "learning_rate": 9.086367523290877e-05, "loss": 0.2808, "step": 29187 }, { "epoch": 2.364549578742709, "grad_norm": 0.060010310262441635, "learning_rate": 9.085917458031416e-05, "loss": 0.2503, "step": 29188 }, { "epoch": 2.3646305897602073, "grad_norm": 0.0891261026263237, "learning_rate": 9.085467392771952e-05, "loss": 0.2796, "step": 29189 }, { "epoch": 2.3647116007777056, "grad_norm": 0.06593820452690125, "learning_rate": 9.085017327512489e-05, "loss": 0.2598, "step": 29190 }, { "epoch": 2.3647926117952043, "grad_norm": 0.06441017240285873, "learning_rate": 9.084567262253028e-05, "loss": 0.2758, "step": 29191 }, { "epoch": 2.3648736228127025, "grad_norm": 0.05712772160768509, "learning_rate": 9.084117196993564e-05, "loss": 0.2509, "step": 29192 }, { "epoch": 2.3649546338302008, "grad_norm": 0.053244173526763916, "learning_rate": 9.083667131734101e-05, "loss": 0.2186, "step": 29193 }, { "epoch": 2.3650356448476995, "grad_norm": 0.06399260461330414, "learning_rate": 9.08321706647464e-05, "loss": 0.2382, "step": 29194 }, { "epoch": 2.3651166558651977, "grad_norm": 0.10175412148237228, "learning_rate": 9.082767001215176e-05, "loss": 0.2739, "step": 29195 }, { "epoch": 2.365197666882696, "grad_norm": 0.09233570098876953, "learning_rate": 9.082316935955713e-05, "loss": 0.2991, "step": 29196 }, { "epoch": 2.3652786779001946, "grad_norm": 0.08154605329036713, "learning_rate": 9.081866870696252e-05, "loss": 0.2443, "step": 29197 }, { "epoch": 2.365359688917693, "grad_norm": 0.07981499284505844, "learning_rate": 9.081416805436788e-05, "loss": 0.2662, "step": 29198 }, { "epoch": 2.365440699935191, "grad_norm": 0.060174666345119476, "learning_rate": 9.080966740177326e-05, "loss": 0.2389, "step": 29199 }, { "epoch": 2.3655217109526894, "grad_norm": 0.06928128004074097, "learning_rate": 9.080516674917864e-05, "loss": 0.259, "step": 29200 }, { "epoch": 2.365602721970188, "grad_norm": 0.0812605544924736, "learning_rate": 9.0800666096584e-05, "loss": 0.2458, "step": 29201 }, { "epoch": 2.3656837329876863, "grad_norm": 0.08370108902454376, "learning_rate": 9.079616544398938e-05, "loss": 0.2776, "step": 29202 }, { "epoch": 2.3657647440051845, "grad_norm": 0.058878593146800995, "learning_rate": 9.079166479139476e-05, "loss": 0.2219, "step": 29203 }, { "epoch": 2.3658457550226832, "grad_norm": 0.08872153609991074, "learning_rate": 9.078716413880012e-05, "loss": 0.2776, "step": 29204 }, { "epoch": 2.3659267660401815, "grad_norm": 0.06256221234798431, "learning_rate": 9.07826634862055e-05, "loss": 0.2237, "step": 29205 }, { "epoch": 2.3660077770576797, "grad_norm": 0.06201305240392685, "learning_rate": 9.077816283361088e-05, "loss": 0.2855, "step": 29206 }, { "epoch": 2.3660887880751784, "grad_norm": 0.06656473875045776, "learning_rate": 9.077366218101624e-05, "loss": 0.2318, "step": 29207 }, { "epoch": 2.3661697990926767, "grad_norm": 0.06485893577337265, "learning_rate": 9.076916152842162e-05, "loss": 0.2734, "step": 29208 }, { "epoch": 2.366250810110175, "grad_norm": 0.06904721260070801, "learning_rate": 9.0764660875827e-05, "loss": 0.2874, "step": 29209 }, { "epoch": 2.366331821127673, "grad_norm": 0.0712614580988884, "learning_rate": 9.076016022323236e-05, "loss": 0.2972, "step": 29210 }, { "epoch": 2.366412832145172, "grad_norm": 0.06572310626506805, "learning_rate": 9.075565957063775e-05, "loss": 0.2869, "step": 29211 }, { "epoch": 2.36649384316267, "grad_norm": 0.055767737329006195, "learning_rate": 9.075115891804313e-05, "loss": 0.2331, "step": 29212 }, { "epoch": 2.3665748541801683, "grad_norm": 0.06114116311073303, "learning_rate": 9.074665826544849e-05, "loss": 0.2281, "step": 29213 }, { "epoch": 2.366655865197667, "grad_norm": 0.06554603576660156, "learning_rate": 9.074215761285387e-05, "loss": 0.2319, "step": 29214 }, { "epoch": 2.3667368762151653, "grad_norm": 0.06667567044496536, "learning_rate": 9.073765696025925e-05, "loss": 0.2464, "step": 29215 }, { "epoch": 2.3668178872326635, "grad_norm": 0.0630006194114685, "learning_rate": 9.07331563076646e-05, "loss": 0.2647, "step": 29216 }, { "epoch": 2.366898898250162, "grad_norm": 0.0625307708978653, "learning_rate": 9.072865565507e-05, "loss": 0.2146, "step": 29217 }, { "epoch": 2.3669799092676604, "grad_norm": 0.06067518889904022, "learning_rate": 9.072415500247537e-05, "loss": 0.2151, "step": 29218 }, { "epoch": 2.3670609202851587, "grad_norm": 0.06341242045164108, "learning_rate": 9.071965434988073e-05, "loss": 0.2586, "step": 29219 }, { "epoch": 2.3671419313026574, "grad_norm": 0.06968440860509872, "learning_rate": 9.071515369728611e-05, "loss": 0.234, "step": 29220 }, { "epoch": 2.3672229423201556, "grad_norm": 0.07393652945756912, "learning_rate": 9.071065304469149e-05, "loss": 0.223, "step": 29221 }, { "epoch": 2.367303953337654, "grad_norm": 0.06258944422006607, "learning_rate": 9.070615239209685e-05, "loss": 0.2252, "step": 29222 }, { "epoch": 2.367384964355152, "grad_norm": 0.0730939507484436, "learning_rate": 9.070165173950224e-05, "loss": 0.2548, "step": 29223 }, { "epoch": 2.367465975372651, "grad_norm": 0.05863727629184723, "learning_rate": 9.069715108690761e-05, "loss": 0.2166, "step": 29224 }, { "epoch": 2.367546986390149, "grad_norm": 0.08163779973983765, "learning_rate": 9.069265043431298e-05, "loss": 0.267, "step": 29225 }, { "epoch": 2.3676279974076473, "grad_norm": 0.07284118235111237, "learning_rate": 9.068814978171836e-05, "loss": 0.2513, "step": 29226 }, { "epoch": 2.367709008425146, "grad_norm": 0.061009738594293594, "learning_rate": 9.068364912912373e-05, "loss": 0.2427, "step": 29227 }, { "epoch": 2.3677900194426442, "grad_norm": 0.06732402741909027, "learning_rate": 9.06791484765291e-05, "loss": 0.2178, "step": 29228 }, { "epoch": 2.3678710304601425, "grad_norm": 0.07382349669933319, "learning_rate": 9.067464782393448e-05, "loss": 0.2322, "step": 29229 }, { "epoch": 2.3679520414776407, "grad_norm": 0.06112572178244591, "learning_rate": 9.067014717133985e-05, "loss": 0.213, "step": 29230 }, { "epoch": 2.3680330524951394, "grad_norm": 0.07919428497552872, "learning_rate": 9.066564651874522e-05, "loss": 0.2283, "step": 29231 }, { "epoch": 2.3681140635126376, "grad_norm": 0.07739866524934769, "learning_rate": 9.06611458661506e-05, "loss": 0.2545, "step": 29232 }, { "epoch": 2.368195074530136, "grad_norm": 0.07174039632081985, "learning_rate": 9.065664521355597e-05, "loss": 0.2392, "step": 29233 }, { "epoch": 2.3682760855476346, "grad_norm": 0.08616352826356888, "learning_rate": 9.065214456096134e-05, "loss": 0.2912, "step": 29234 }, { "epoch": 2.368357096565133, "grad_norm": 0.07922189682722092, "learning_rate": 9.064764390836672e-05, "loss": 0.2886, "step": 29235 }, { "epoch": 2.368438107582631, "grad_norm": 0.07131379842758179, "learning_rate": 9.064314325577209e-05, "loss": 0.2466, "step": 29236 }, { "epoch": 2.3685191186001298, "grad_norm": 0.053437042981386185, "learning_rate": 9.063864260317747e-05, "loss": 0.2572, "step": 29237 }, { "epoch": 2.368600129617628, "grad_norm": 0.08782981336116791, "learning_rate": 9.063414195058284e-05, "loss": 0.2602, "step": 29238 }, { "epoch": 2.3686811406351262, "grad_norm": 0.061420753598213196, "learning_rate": 9.062964129798821e-05, "loss": 0.2428, "step": 29239 }, { "epoch": 2.368762151652625, "grad_norm": 0.053099263459444046, "learning_rate": 9.062514064539359e-05, "loss": 0.2624, "step": 29240 }, { "epoch": 2.368843162670123, "grad_norm": 0.055860213935375214, "learning_rate": 9.062063999279896e-05, "loss": 0.2477, "step": 29241 }, { "epoch": 2.3689241736876214, "grad_norm": 0.057424698024988174, "learning_rate": 9.061613934020433e-05, "loss": 0.2316, "step": 29242 }, { "epoch": 2.36900518470512, "grad_norm": 0.059896305203437805, "learning_rate": 9.061163868760971e-05, "loss": 0.2346, "step": 29243 }, { "epoch": 2.3690861957226184, "grad_norm": 0.060097161680459976, "learning_rate": 9.060713803501508e-05, "loss": 0.2623, "step": 29244 }, { "epoch": 2.3691672067401166, "grad_norm": 0.06034714728593826, "learning_rate": 9.060263738242045e-05, "loss": 0.2934, "step": 29245 }, { "epoch": 2.369248217757615, "grad_norm": 0.07298918068408966, "learning_rate": 9.059813672982583e-05, "loss": 0.2696, "step": 29246 }, { "epoch": 2.3693292287751135, "grad_norm": 0.060322027653455734, "learning_rate": 9.05936360772312e-05, "loss": 0.2534, "step": 29247 }, { "epoch": 2.369410239792612, "grad_norm": 0.08148866891860962, "learning_rate": 9.058913542463658e-05, "loss": 0.2583, "step": 29248 }, { "epoch": 2.36949125081011, "grad_norm": 0.08276345580816269, "learning_rate": 9.058463477204195e-05, "loss": 0.2633, "step": 29249 }, { "epoch": 2.3695722618276087, "grad_norm": 0.059174928814172745, "learning_rate": 9.058013411944732e-05, "loss": 0.2334, "step": 29250 }, { "epoch": 2.369653272845107, "grad_norm": 0.06756450980901718, "learning_rate": 9.05756334668527e-05, "loss": 0.2424, "step": 29251 }, { "epoch": 2.369734283862605, "grad_norm": 0.06810939311981201, "learning_rate": 9.057113281425807e-05, "loss": 0.2365, "step": 29252 }, { "epoch": 2.3698152948801035, "grad_norm": 0.06994233280420303, "learning_rate": 9.056663216166344e-05, "loss": 0.2347, "step": 29253 }, { "epoch": 2.369896305897602, "grad_norm": 0.07477480173110962, "learning_rate": 9.056213150906882e-05, "loss": 0.2462, "step": 29254 }, { "epoch": 2.3699773169151004, "grad_norm": 0.056406669318675995, "learning_rate": 9.055763085647419e-05, "loss": 0.2442, "step": 29255 }, { "epoch": 2.3700583279325986, "grad_norm": 0.06577787548303604, "learning_rate": 9.055313020387956e-05, "loss": 0.2065, "step": 29256 }, { "epoch": 2.3701393389500973, "grad_norm": 0.08167983591556549, "learning_rate": 9.054862955128494e-05, "loss": 0.2722, "step": 29257 }, { "epoch": 2.3702203499675956, "grad_norm": 0.06138281151652336, "learning_rate": 9.054412889869031e-05, "loss": 0.2421, "step": 29258 }, { "epoch": 2.370301360985094, "grad_norm": 0.08680165559053421, "learning_rate": 9.053962824609568e-05, "loss": 0.2529, "step": 29259 }, { "epoch": 2.3703823720025925, "grad_norm": 0.05767164006829262, "learning_rate": 9.053512759350106e-05, "loss": 0.2414, "step": 29260 }, { "epoch": 2.3704633830200907, "grad_norm": 0.06463748961687088, "learning_rate": 9.053062694090643e-05, "loss": 0.2336, "step": 29261 }, { "epoch": 2.370544394037589, "grad_norm": 0.060328882187604904, "learning_rate": 9.05261262883118e-05, "loss": 0.2762, "step": 29262 }, { "epoch": 2.3706254050550877, "grad_norm": 0.06523432582616806, "learning_rate": 9.052162563571719e-05, "loss": 0.2519, "step": 29263 }, { "epoch": 2.370706416072586, "grad_norm": 0.06499111652374268, "learning_rate": 9.051712498312255e-05, "loss": 0.2491, "step": 29264 }, { "epoch": 2.370787427090084, "grad_norm": 0.06619423627853394, "learning_rate": 9.051262433052793e-05, "loss": 0.2648, "step": 29265 }, { "epoch": 2.370868438107583, "grad_norm": 0.06158056482672691, "learning_rate": 9.050812367793331e-05, "loss": 0.2727, "step": 29266 }, { "epoch": 2.370949449125081, "grad_norm": 0.08038074523210526, "learning_rate": 9.050362302533867e-05, "loss": 0.2144, "step": 29267 }, { "epoch": 2.3710304601425793, "grad_norm": 0.06463596224784851, "learning_rate": 9.049912237274405e-05, "loss": 0.2685, "step": 29268 }, { "epoch": 2.3711114711600776, "grad_norm": 0.0551106333732605, "learning_rate": 9.049462172014943e-05, "loss": 0.2355, "step": 29269 }, { "epoch": 2.3711924821775763, "grad_norm": 0.053852569311857224, "learning_rate": 9.04901210675548e-05, "loss": 0.2639, "step": 29270 }, { "epoch": 2.3712734931950745, "grad_norm": 0.06700147688388824, "learning_rate": 9.048562041496017e-05, "loss": 0.2603, "step": 29271 }, { "epoch": 2.3713545042125728, "grad_norm": 0.054714132100343704, "learning_rate": 9.048111976236556e-05, "loss": 0.1949, "step": 29272 }, { "epoch": 2.3714355152300715, "grad_norm": 0.059138987213373184, "learning_rate": 9.047661910977092e-05, "loss": 0.2275, "step": 29273 }, { "epoch": 2.3715165262475697, "grad_norm": 0.08097271621227264, "learning_rate": 9.047211845717629e-05, "loss": 0.2626, "step": 29274 }, { "epoch": 2.371597537265068, "grad_norm": 0.08481959998607635, "learning_rate": 9.046761780458168e-05, "loss": 0.2558, "step": 29275 }, { "epoch": 2.371678548282566, "grad_norm": 0.06269409507513046, "learning_rate": 9.046311715198704e-05, "loss": 0.2561, "step": 29276 }, { "epoch": 2.371759559300065, "grad_norm": 0.06228793412446976, "learning_rate": 9.045861649939241e-05, "loss": 0.2448, "step": 29277 }, { "epoch": 2.371840570317563, "grad_norm": 0.06148279458284378, "learning_rate": 9.04541158467978e-05, "loss": 0.299, "step": 29278 }, { "epoch": 2.3719215813350614, "grad_norm": 0.06152509152889252, "learning_rate": 9.044961519420316e-05, "loss": 0.2714, "step": 29279 }, { "epoch": 2.37200259235256, "grad_norm": 0.07238215953111649, "learning_rate": 9.044511454160853e-05, "loss": 0.237, "step": 29280 }, { "epoch": 2.3720836033700583, "grad_norm": 0.0703226625919342, "learning_rate": 9.044061388901392e-05, "loss": 0.2399, "step": 29281 }, { "epoch": 2.3721646143875565, "grad_norm": 0.06950215995311737, "learning_rate": 9.043611323641928e-05, "loss": 0.2455, "step": 29282 }, { "epoch": 2.3722456254050552, "grad_norm": 0.06561096012592316, "learning_rate": 9.043161258382465e-05, "loss": 0.2642, "step": 29283 }, { "epoch": 2.3723266364225535, "grad_norm": 0.05323941260576248, "learning_rate": 9.042711193123004e-05, "loss": 0.2313, "step": 29284 }, { "epoch": 2.3724076474400517, "grad_norm": 0.057839252054691315, "learning_rate": 9.04226112786354e-05, "loss": 0.2092, "step": 29285 }, { "epoch": 2.3724886584575504, "grad_norm": 0.06279532611370087, "learning_rate": 9.041811062604077e-05, "loss": 0.2515, "step": 29286 }, { "epoch": 2.3725696694750487, "grad_norm": 0.059042852371931076, "learning_rate": 9.041360997344616e-05, "loss": 0.2575, "step": 29287 }, { "epoch": 2.372650680492547, "grad_norm": 0.0693732351064682, "learning_rate": 9.040910932085152e-05, "loss": 0.2682, "step": 29288 }, { "epoch": 2.3727316915100456, "grad_norm": 0.06384388357400894, "learning_rate": 9.04046086682569e-05, "loss": 0.247, "step": 29289 }, { "epoch": 2.372812702527544, "grad_norm": 0.07595692574977875, "learning_rate": 9.040010801566228e-05, "loss": 0.2578, "step": 29290 }, { "epoch": 2.372893713545042, "grad_norm": 0.07165265083312988, "learning_rate": 9.039560736306765e-05, "loss": 0.2329, "step": 29291 }, { "epoch": 2.3729747245625403, "grad_norm": 0.06206320971250534, "learning_rate": 9.039110671047303e-05, "loss": 0.2453, "step": 29292 }, { "epoch": 2.373055735580039, "grad_norm": 0.05709023401141167, "learning_rate": 9.03866060578784e-05, "loss": 0.2483, "step": 29293 }, { "epoch": 2.3731367465975373, "grad_norm": 0.06202362850308418, "learning_rate": 9.038210540528377e-05, "loss": 0.2261, "step": 29294 }, { "epoch": 2.3732177576150355, "grad_norm": 0.0633508712053299, "learning_rate": 9.037760475268915e-05, "loss": 0.2277, "step": 29295 }, { "epoch": 2.373298768632534, "grad_norm": 0.08329858630895615, "learning_rate": 9.037310410009452e-05, "loss": 0.3046, "step": 29296 }, { "epoch": 2.3733797796500324, "grad_norm": 0.07049525529146194, "learning_rate": 9.03686034474999e-05, "loss": 0.2704, "step": 29297 }, { "epoch": 2.3734607906675307, "grad_norm": 0.06773684173822403, "learning_rate": 9.036410279490527e-05, "loss": 0.2407, "step": 29298 }, { "epoch": 2.373541801685029, "grad_norm": 0.0773211419582367, "learning_rate": 9.035960214231064e-05, "loss": 0.236, "step": 29299 }, { "epoch": 2.3736228127025276, "grad_norm": 0.07259319722652435, "learning_rate": 9.035510148971602e-05, "loss": 0.2451, "step": 29300 }, { "epoch": 2.373703823720026, "grad_norm": 0.07149739563465118, "learning_rate": 9.035060083712139e-05, "loss": 0.2715, "step": 29301 }, { "epoch": 2.373784834737524, "grad_norm": 0.07947015017271042, "learning_rate": 9.034610018452676e-05, "loss": 0.2534, "step": 29302 }, { "epoch": 2.373865845755023, "grad_norm": 0.05928175151348114, "learning_rate": 9.034159953193214e-05, "loss": 0.2417, "step": 29303 }, { "epoch": 2.373946856772521, "grad_norm": 0.07301197201013565, "learning_rate": 9.033709887933751e-05, "loss": 0.2505, "step": 29304 }, { "epoch": 2.3740278677900193, "grad_norm": 0.06998045742511749, "learning_rate": 9.033259822674288e-05, "loss": 0.2652, "step": 29305 }, { "epoch": 2.374108878807518, "grad_norm": 0.055942460894584656, "learning_rate": 9.032809757414826e-05, "loss": 0.2293, "step": 29306 }, { "epoch": 2.374189889825016, "grad_norm": 0.05982231721282005, "learning_rate": 9.032359692155363e-05, "loss": 0.2461, "step": 29307 }, { "epoch": 2.3742709008425145, "grad_norm": 0.05536472797393799, "learning_rate": 9.0319096268959e-05, "loss": 0.2613, "step": 29308 }, { "epoch": 2.374351911860013, "grad_norm": 0.06701016426086426, "learning_rate": 9.031459561636438e-05, "loss": 0.2521, "step": 29309 }, { "epoch": 2.3744329228775114, "grad_norm": 0.059674832969903946, "learning_rate": 9.031009496376975e-05, "loss": 0.2505, "step": 29310 }, { "epoch": 2.3745139338950096, "grad_norm": 0.06509581953287125, "learning_rate": 9.030559431117513e-05, "loss": 0.2343, "step": 29311 }, { "epoch": 2.3745949449125083, "grad_norm": 0.06403180956840515, "learning_rate": 9.03010936585805e-05, "loss": 0.2505, "step": 29312 }, { "epoch": 2.3746759559300066, "grad_norm": 0.06330661475658417, "learning_rate": 9.029659300598587e-05, "loss": 0.2172, "step": 29313 }, { "epoch": 2.374756966947505, "grad_norm": 0.060208771377801895, "learning_rate": 9.029209235339125e-05, "loss": 0.2502, "step": 29314 }, { "epoch": 2.374837977965003, "grad_norm": 0.07508249580860138, "learning_rate": 9.028759170079662e-05, "loss": 0.2881, "step": 29315 }, { "epoch": 2.3749189889825018, "grad_norm": 0.0637403130531311, "learning_rate": 9.0283091048202e-05, "loss": 0.2305, "step": 29316 }, { "epoch": 2.375, "grad_norm": 0.06241225078701973, "learning_rate": 9.027859039560737e-05, "loss": 0.2242, "step": 29317 }, { "epoch": 2.3750810110174982, "grad_norm": 0.05353011190891266, "learning_rate": 9.027408974301274e-05, "loss": 0.2316, "step": 29318 }, { "epoch": 2.375162022034997, "grad_norm": 0.0804276168346405, "learning_rate": 9.026958909041811e-05, "loss": 0.2389, "step": 29319 }, { "epoch": 2.375243033052495, "grad_norm": 0.07396351546049118, "learning_rate": 9.026508843782349e-05, "loss": 0.2833, "step": 29320 }, { "epoch": 2.3753240440699934, "grad_norm": 0.07646553963422775, "learning_rate": 9.026058778522886e-05, "loss": 0.2643, "step": 29321 }, { "epoch": 2.3754050550874917, "grad_norm": 0.06344413757324219, "learning_rate": 9.025608713263424e-05, "loss": 0.2716, "step": 29322 }, { "epoch": 2.3754860661049904, "grad_norm": 0.07764876633882523, "learning_rate": 9.025158648003961e-05, "loss": 0.2762, "step": 29323 }, { "epoch": 2.3755670771224886, "grad_norm": 0.07426711171865463, "learning_rate": 9.024708582744498e-05, "loss": 0.2448, "step": 29324 }, { "epoch": 2.375648088139987, "grad_norm": 0.06773090362548828, "learning_rate": 9.024258517485036e-05, "loss": 0.2829, "step": 29325 }, { "epoch": 2.3757290991574855, "grad_norm": 0.06279227882623672, "learning_rate": 9.023808452225573e-05, "loss": 0.2658, "step": 29326 }, { "epoch": 2.375810110174984, "grad_norm": 0.06827504187822342, "learning_rate": 9.02335838696611e-05, "loss": 0.2172, "step": 29327 }, { "epoch": 2.375891121192482, "grad_norm": 0.06267635524272919, "learning_rate": 9.022908321706648e-05, "loss": 0.2675, "step": 29328 }, { "epoch": 2.3759721322099807, "grad_norm": 0.05714039504528046, "learning_rate": 9.022458256447185e-05, "loss": 0.2372, "step": 29329 }, { "epoch": 2.376053143227479, "grad_norm": 0.07521743327379227, "learning_rate": 9.022008191187722e-05, "loss": 0.2625, "step": 29330 }, { "epoch": 2.376134154244977, "grad_norm": 0.07218578457832336, "learning_rate": 9.02155812592826e-05, "loss": 0.2601, "step": 29331 }, { "epoch": 2.376215165262476, "grad_norm": 0.061242103576660156, "learning_rate": 9.021108060668797e-05, "loss": 0.2439, "step": 29332 }, { "epoch": 2.376296176279974, "grad_norm": 0.05862688645720482, "learning_rate": 9.020657995409335e-05, "loss": 0.2327, "step": 29333 }, { "epoch": 2.3763771872974724, "grad_norm": 0.065977081656456, "learning_rate": 9.020207930149872e-05, "loss": 0.2434, "step": 29334 }, { "epoch": 2.376458198314971, "grad_norm": 0.0634816437959671, "learning_rate": 9.019757864890409e-05, "loss": 0.2493, "step": 29335 }, { "epoch": 2.3765392093324693, "grad_norm": 0.06377308815717697, "learning_rate": 9.019307799630947e-05, "loss": 0.2441, "step": 29336 }, { "epoch": 2.3766202203499676, "grad_norm": 0.06866897642612457, "learning_rate": 9.018857734371484e-05, "loss": 0.2655, "step": 29337 }, { "epoch": 2.376701231367466, "grad_norm": 0.06453913450241089, "learning_rate": 9.018407669112021e-05, "loss": 0.2355, "step": 29338 }, { "epoch": 2.3767822423849645, "grad_norm": 0.07311535626649857, "learning_rate": 9.017957603852559e-05, "loss": 0.2829, "step": 29339 }, { "epoch": 2.3768632534024627, "grad_norm": 0.08807942271232605, "learning_rate": 9.017507538593096e-05, "loss": 0.2346, "step": 29340 }, { "epoch": 2.376944264419961, "grad_norm": 0.056518666446208954, "learning_rate": 9.017057473333633e-05, "loss": 0.2741, "step": 29341 }, { "epoch": 2.3770252754374597, "grad_norm": 0.07224880158901215, "learning_rate": 9.016607408074171e-05, "loss": 0.2188, "step": 29342 }, { "epoch": 2.377106286454958, "grad_norm": 0.0775294154882431, "learning_rate": 9.016157342814708e-05, "loss": 0.2412, "step": 29343 }, { "epoch": 2.377187297472456, "grad_norm": 0.06410830467939377, "learning_rate": 9.015707277555247e-05, "loss": 0.2718, "step": 29344 }, { "epoch": 2.3772683084899544, "grad_norm": 0.0727565661072731, "learning_rate": 9.015257212295783e-05, "loss": 0.2492, "step": 29345 }, { "epoch": 2.377349319507453, "grad_norm": 0.05172773078083992, "learning_rate": 9.01480714703632e-05, "loss": 0.1947, "step": 29346 }, { "epoch": 2.3774303305249513, "grad_norm": 0.05713996663689613, "learning_rate": 9.014357081776859e-05, "loss": 0.2295, "step": 29347 }, { "epoch": 2.3775113415424496, "grad_norm": 0.07344935834407806, "learning_rate": 9.013907016517395e-05, "loss": 0.2317, "step": 29348 }, { "epoch": 2.3775923525599483, "grad_norm": 0.050090860575437546, "learning_rate": 9.013456951257932e-05, "loss": 0.2414, "step": 29349 }, { "epoch": 2.3776733635774465, "grad_norm": 0.05050138384103775, "learning_rate": 9.013006885998471e-05, "loss": 0.2231, "step": 29350 }, { "epoch": 2.3777543745949448, "grad_norm": 0.06192393973469734, "learning_rate": 9.012556820739007e-05, "loss": 0.2396, "step": 29351 }, { "epoch": 2.3778353856124435, "grad_norm": 0.07744492590427399, "learning_rate": 9.012106755479544e-05, "loss": 0.2311, "step": 29352 }, { "epoch": 2.3779163966299417, "grad_norm": 0.06321647018194199, "learning_rate": 9.011656690220083e-05, "loss": 0.2532, "step": 29353 }, { "epoch": 2.37799740764744, "grad_norm": 0.06907698512077332, "learning_rate": 9.011206624960619e-05, "loss": 0.2545, "step": 29354 }, { "epoch": 2.3780784186649386, "grad_norm": 0.06946361809968948, "learning_rate": 9.010756559701156e-05, "loss": 0.2538, "step": 29355 }, { "epoch": 2.378159429682437, "grad_norm": 0.0634828582406044, "learning_rate": 9.010306494441695e-05, "loss": 0.2617, "step": 29356 }, { "epoch": 2.378240440699935, "grad_norm": 0.06888262182474136, "learning_rate": 9.009856429182231e-05, "loss": 0.2616, "step": 29357 }, { "epoch": 2.378321451717434, "grad_norm": 0.07091489434242249, "learning_rate": 9.009406363922769e-05, "loss": 0.2833, "step": 29358 }, { "epoch": 2.378402462734932, "grad_norm": 0.06463092565536499, "learning_rate": 9.008956298663307e-05, "loss": 0.262, "step": 29359 }, { "epoch": 2.3784834737524303, "grad_norm": 0.06307054311037064, "learning_rate": 9.008506233403845e-05, "loss": 0.2975, "step": 29360 }, { "epoch": 2.3785644847699285, "grad_norm": 0.07395637035369873, "learning_rate": 9.00805616814438e-05, "loss": 0.2408, "step": 29361 }, { "epoch": 2.3786454957874272, "grad_norm": 0.0689404159784317, "learning_rate": 9.007606102884919e-05, "loss": 0.265, "step": 29362 }, { "epoch": 2.3787265068049255, "grad_norm": 0.060062237083911896, "learning_rate": 9.007156037625457e-05, "loss": 0.2371, "step": 29363 }, { "epoch": 2.3788075178224237, "grad_norm": 0.06880932301282883, "learning_rate": 9.006705972365993e-05, "loss": 0.2392, "step": 29364 }, { "epoch": 2.3788885288399224, "grad_norm": 0.05119600147008896, "learning_rate": 9.006255907106531e-05, "loss": 0.2123, "step": 29365 }, { "epoch": 2.3789695398574207, "grad_norm": 0.053568657487630844, "learning_rate": 9.005805841847069e-05, "loss": 0.258, "step": 29366 }, { "epoch": 2.379050550874919, "grad_norm": 0.05414640158414841, "learning_rate": 9.005355776587605e-05, "loss": 0.1898, "step": 29367 }, { "epoch": 2.379131561892417, "grad_norm": 0.057076968252658844, "learning_rate": 9.004905711328143e-05, "loss": 0.2524, "step": 29368 }, { "epoch": 2.379212572909916, "grad_norm": 0.07572975754737854, "learning_rate": 9.004455646068681e-05, "loss": 0.2415, "step": 29369 }, { "epoch": 2.379293583927414, "grad_norm": 0.05044267699122429, "learning_rate": 9.004005580809218e-05, "loss": 0.2259, "step": 29370 }, { "epoch": 2.3793745949449123, "grad_norm": 0.0783015713095665, "learning_rate": 9.003555515549756e-05, "loss": 0.2868, "step": 29371 }, { "epoch": 2.379455605962411, "grad_norm": 0.06224671006202698, "learning_rate": 9.003105450290293e-05, "loss": 0.2468, "step": 29372 }, { "epoch": 2.3795366169799093, "grad_norm": 0.06955622136592865, "learning_rate": 9.00265538503083e-05, "loss": 0.2552, "step": 29373 }, { "epoch": 2.3796176279974075, "grad_norm": 0.05678120627999306, "learning_rate": 9.002205319771368e-05, "loss": 0.1996, "step": 29374 }, { "epoch": 2.379698639014906, "grad_norm": 0.07291990518569946, "learning_rate": 9.001755254511905e-05, "loss": 0.241, "step": 29375 }, { "epoch": 2.3797796500324044, "grad_norm": 0.06487166881561279, "learning_rate": 9.001305189252442e-05, "loss": 0.184, "step": 29376 }, { "epoch": 2.3798606610499027, "grad_norm": 0.06429188698530197, "learning_rate": 9.00085512399298e-05, "loss": 0.2441, "step": 29377 }, { "epoch": 2.3799416720674014, "grad_norm": 0.06392151862382889, "learning_rate": 9.000405058733517e-05, "loss": 0.2663, "step": 29378 }, { "epoch": 2.3800226830848996, "grad_norm": 0.07104162126779556, "learning_rate": 8.999954993474054e-05, "loss": 0.2556, "step": 29379 }, { "epoch": 2.380103694102398, "grad_norm": 0.07497918605804443, "learning_rate": 8.999504928214592e-05, "loss": 0.2527, "step": 29380 }, { "epoch": 2.3801847051198965, "grad_norm": 0.06797675788402557, "learning_rate": 8.999054862955129e-05, "loss": 0.2693, "step": 29381 }, { "epoch": 2.380265716137395, "grad_norm": 0.06973263621330261, "learning_rate": 8.998604797695667e-05, "loss": 0.235, "step": 29382 }, { "epoch": 2.380346727154893, "grad_norm": 0.06908416748046875, "learning_rate": 8.998154732436204e-05, "loss": 0.2702, "step": 29383 }, { "epoch": 2.3804277381723913, "grad_norm": 0.06926834583282471, "learning_rate": 8.997704667176741e-05, "loss": 0.2228, "step": 29384 }, { "epoch": 2.38050874918989, "grad_norm": 0.06293617933988571, "learning_rate": 8.997254601917279e-05, "loss": 0.2318, "step": 29385 }, { "epoch": 2.380589760207388, "grad_norm": 0.07725179195404053, "learning_rate": 8.996804536657816e-05, "loss": 0.2875, "step": 29386 }, { "epoch": 2.3806707712248865, "grad_norm": 0.08843903988599777, "learning_rate": 8.996354471398353e-05, "loss": 0.2792, "step": 29387 }, { "epoch": 2.380751782242385, "grad_norm": 0.05114304646849632, "learning_rate": 8.995904406138891e-05, "loss": 0.2401, "step": 29388 }, { "epoch": 2.3808327932598834, "grad_norm": 0.06192674860358238, "learning_rate": 8.995454340879428e-05, "loss": 0.2321, "step": 29389 }, { "epoch": 2.3809138042773816, "grad_norm": 0.07055914402008057, "learning_rate": 8.995004275619965e-05, "loss": 0.244, "step": 29390 }, { "epoch": 2.38099481529488, "grad_norm": 0.06864523887634277, "learning_rate": 8.994554210360503e-05, "loss": 0.2536, "step": 29391 }, { "epoch": 2.3810758263123786, "grad_norm": 0.06931130588054657, "learning_rate": 8.99410414510104e-05, "loss": 0.2185, "step": 29392 }, { "epoch": 2.381156837329877, "grad_norm": 0.06413775682449341, "learning_rate": 8.993654079841577e-05, "loss": 0.26, "step": 29393 }, { "epoch": 2.381237848347375, "grad_norm": 0.06281787902116776, "learning_rate": 8.993204014582115e-05, "loss": 0.2374, "step": 29394 }, { "epoch": 2.3813188593648738, "grad_norm": 0.06532049179077148, "learning_rate": 8.992753949322652e-05, "loss": 0.2814, "step": 29395 }, { "epoch": 2.381399870382372, "grad_norm": 0.04853471741080284, "learning_rate": 8.99230388406319e-05, "loss": 0.2335, "step": 29396 }, { "epoch": 2.3814808813998702, "grad_norm": 0.04921332374215126, "learning_rate": 8.991853818803727e-05, "loss": 0.2296, "step": 29397 }, { "epoch": 2.381561892417369, "grad_norm": 0.07588033378124237, "learning_rate": 8.991403753544264e-05, "loss": 0.2316, "step": 29398 }, { "epoch": 2.381642903434867, "grad_norm": 0.06211639940738678, "learning_rate": 8.990953688284802e-05, "loss": 0.2186, "step": 29399 }, { "epoch": 2.3817239144523654, "grad_norm": 0.05988191440701485, "learning_rate": 8.990503623025339e-05, "loss": 0.2049, "step": 29400 }, { "epoch": 2.381804925469864, "grad_norm": 0.06821976602077484, "learning_rate": 8.990053557765876e-05, "loss": 0.2521, "step": 29401 }, { "epoch": 2.3818859364873624, "grad_norm": 0.06516029685735703, "learning_rate": 8.989603492506414e-05, "loss": 0.2673, "step": 29402 }, { "epoch": 2.3819669475048606, "grad_norm": 0.09090472757816315, "learning_rate": 8.989153427246951e-05, "loss": 0.2308, "step": 29403 }, { "epoch": 2.3820479585223593, "grad_norm": 0.0731615200638771, "learning_rate": 8.988703361987488e-05, "loss": 0.2317, "step": 29404 }, { "epoch": 2.3821289695398575, "grad_norm": 0.06726046651601791, "learning_rate": 8.988253296728026e-05, "loss": 0.2476, "step": 29405 }, { "epoch": 2.3822099805573558, "grad_norm": 0.07715233415365219, "learning_rate": 8.987803231468563e-05, "loss": 0.2815, "step": 29406 }, { "epoch": 2.382290991574854, "grad_norm": 0.07051915675401688, "learning_rate": 8.9873531662091e-05, "loss": 0.247, "step": 29407 }, { "epoch": 2.3823720025923527, "grad_norm": 0.06349935382604599, "learning_rate": 8.986903100949638e-05, "loss": 0.2381, "step": 29408 }, { "epoch": 2.382453013609851, "grad_norm": 0.07029050588607788, "learning_rate": 8.986453035690175e-05, "loss": 0.2344, "step": 29409 }, { "epoch": 2.382534024627349, "grad_norm": 0.06303397566080093, "learning_rate": 8.986002970430713e-05, "loss": 0.2225, "step": 29410 }, { "epoch": 2.382615035644848, "grad_norm": 0.07148943841457367, "learning_rate": 8.98555290517125e-05, "loss": 0.2517, "step": 29411 }, { "epoch": 2.382696046662346, "grad_norm": 0.0819324180483818, "learning_rate": 8.985102839911787e-05, "loss": 0.2755, "step": 29412 }, { "epoch": 2.3827770576798444, "grad_norm": 0.06606269627809525, "learning_rate": 8.984652774652325e-05, "loss": 0.2423, "step": 29413 }, { "epoch": 2.3828580686973426, "grad_norm": 0.06293286383152008, "learning_rate": 8.984202709392862e-05, "loss": 0.2624, "step": 29414 }, { "epoch": 2.3829390797148413, "grad_norm": 0.06545112282037735, "learning_rate": 8.9837526441334e-05, "loss": 0.2283, "step": 29415 }, { "epoch": 2.3830200907323396, "grad_norm": 0.06773924827575684, "learning_rate": 8.983302578873937e-05, "loss": 0.2265, "step": 29416 }, { "epoch": 2.383101101749838, "grad_norm": 0.07627654820680618, "learning_rate": 8.982852513614474e-05, "loss": 0.2721, "step": 29417 }, { "epoch": 2.3831821127673365, "grad_norm": 0.0715307965874672, "learning_rate": 8.982402448355011e-05, "loss": 0.2365, "step": 29418 }, { "epoch": 2.3832631237848347, "grad_norm": 0.06103930249810219, "learning_rate": 8.981952383095549e-05, "loss": 0.231, "step": 29419 }, { "epoch": 2.383344134802333, "grad_norm": 0.06282036751508713, "learning_rate": 8.981502317836086e-05, "loss": 0.2191, "step": 29420 }, { "epoch": 2.3834251458198317, "grad_norm": 0.07668137550354004, "learning_rate": 8.981052252576624e-05, "loss": 0.2885, "step": 29421 }, { "epoch": 2.38350615683733, "grad_norm": 0.06577254086732864, "learning_rate": 8.980602187317162e-05, "loss": 0.2641, "step": 29422 }, { "epoch": 2.383587167854828, "grad_norm": 0.06056416779756546, "learning_rate": 8.980152122057698e-05, "loss": 0.2767, "step": 29423 }, { "epoch": 2.383668178872327, "grad_norm": 0.06798427551984787, "learning_rate": 8.979702056798236e-05, "loss": 0.2606, "step": 29424 }, { "epoch": 2.383749189889825, "grad_norm": 0.06081007421016693, "learning_rate": 8.979251991538774e-05, "loss": 0.2633, "step": 29425 }, { "epoch": 2.3838302009073233, "grad_norm": 0.06475000828504562, "learning_rate": 8.97880192627931e-05, "loss": 0.2315, "step": 29426 }, { "epoch": 2.3839112119248216, "grad_norm": 0.06365825980901718, "learning_rate": 8.978351861019848e-05, "loss": 0.2564, "step": 29427 }, { "epoch": 2.3839922229423203, "grad_norm": 0.06745237857103348, "learning_rate": 8.977901795760386e-05, "loss": 0.2596, "step": 29428 }, { "epoch": 2.3840732339598185, "grad_norm": 0.06658170372247696, "learning_rate": 8.977451730500924e-05, "loss": 0.2437, "step": 29429 }, { "epoch": 2.3841542449773168, "grad_norm": 0.06342948228120804, "learning_rate": 8.97700166524146e-05, "loss": 0.2361, "step": 29430 }, { "epoch": 2.3842352559948155, "grad_norm": 0.07605592161417007, "learning_rate": 8.976551599981999e-05, "loss": 0.2685, "step": 29431 }, { "epoch": 2.3843162670123137, "grad_norm": 0.06304483115673065, "learning_rate": 8.976101534722536e-05, "loss": 0.2481, "step": 29432 }, { "epoch": 2.384397278029812, "grad_norm": 0.06299123167991638, "learning_rate": 8.975651469463072e-05, "loss": 0.2569, "step": 29433 }, { "epoch": 2.3844782890473106, "grad_norm": 0.07267063856124878, "learning_rate": 8.97520140420361e-05, "loss": 0.2488, "step": 29434 }, { "epoch": 2.384559300064809, "grad_norm": 0.0659085214138031, "learning_rate": 8.974751338944148e-05, "loss": 0.2288, "step": 29435 }, { "epoch": 2.384640311082307, "grad_norm": 0.07585114985704422, "learning_rate": 8.974301273684684e-05, "loss": 0.2565, "step": 29436 }, { "epoch": 2.3847213220998054, "grad_norm": 0.08091212064027786, "learning_rate": 8.973851208425223e-05, "loss": 0.2633, "step": 29437 }, { "epoch": 2.384802333117304, "grad_norm": 0.06598428636789322, "learning_rate": 8.97340114316576e-05, "loss": 0.2664, "step": 29438 }, { "epoch": 2.3848833441348023, "grad_norm": 0.06010466068983078, "learning_rate": 8.972951077906296e-05, "loss": 0.2495, "step": 29439 }, { "epoch": 2.3849643551523005, "grad_norm": 0.05948279798030853, "learning_rate": 8.972501012646835e-05, "loss": 0.215, "step": 29440 }, { "epoch": 2.3850453661697992, "grad_norm": 0.07091670483350754, "learning_rate": 8.972050947387372e-05, "loss": 0.2684, "step": 29441 }, { "epoch": 2.3851263771872975, "grad_norm": 0.07026311755180359, "learning_rate": 8.971600882127908e-05, "loss": 0.2313, "step": 29442 }, { "epoch": 2.3852073882047957, "grad_norm": 0.06763871759176254, "learning_rate": 8.971150816868447e-05, "loss": 0.2378, "step": 29443 }, { "epoch": 2.3852883992222944, "grad_norm": 0.06841225922107697, "learning_rate": 8.970700751608984e-05, "loss": 0.2332, "step": 29444 }, { "epoch": 2.3853694102397927, "grad_norm": 0.06431113183498383, "learning_rate": 8.97025068634952e-05, "loss": 0.2752, "step": 29445 }, { "epoch": 2.385450421257291, "grad_norm": 0.06591040641069412, "learning_rate": 8.969800621090059e-05, "loss": 0.2531, "step": 29446 }, { "epoch": 2.3855314322747896, "grad_norm": 0.062982939183712, "learning_rate": 8.969350555830596e-05, "loss": 0.1959, "step": 29447 }, { "epoch": 2.385612443292288, "grad_norm": 0.05950835719704628, "learning_rate": 8.968900490571134e-05, "loss": 0.2678, "step": 29448 }, { "epoch": 2.385693454309786, "grad_norm": 0.05962624028325081, "learning_rate": 8.968450425311671e-05, "loss": 0.2357, "step": 29449 }, { "epoch": 2.3857744653272843, "grad_norm": 0.05592918023467064, "learning_rate": 8.968000360052208e-05, "loss": 0.2017, "step": 29450 }, { "epoch": 2.385855476344783, "grad_norm": 0.06242687255144119, "learning_rate": 8.967550294792746e-05, "loss": 0.2649, "step": 29451 }, { "epoch": 2.3859364873622813, "grad_norm": 0.05544115602970123, "learning_rate": 8.967100229533283e-05, "loss": 0.2417, "step": 29452 }, { "epoch": 2.3860174983797795, "grad_norm": 0.08726091682910919, "learning_rate": 8.96665016427382e-05, "loss": 0.2654, "step": 29453 }, { "epoch": 2.386098509397278, "grad_norm": 0.06033741310238838, "learning_rate": 8.966200099014358e-05, "loss": 0.2248, "step": 29454 }, { "epoch": 2.3861795204147764, "grad_norm": 0.0769038274884224, "learning_rate": 8.965750033754895e-05, "loss": 0.2655, "step": 29455 }, { "epoch": 2.3862605314322747, "grad_norm": 0.06940066814422607, "learning_rate": 8.965299968495433e-05, "loss": 0.2582, "step": 29456 }, { "epoch": 2.386341542449773, "grad_norm": 0.07113378494977951, "learning_rate": 8.96484990323597e-05, "loss": 0.2424, "step": 29457 }, { "epoch": 2.3864225534672716, "grad_norm": 0.07034889608621597, "learning_rate": 8.964399837976507e-05, "loss": 0.2595, "step": 29458 }, { "epoch": 2.38650356448477, "grad_norm": 0.0719676986336708, "learning_rate": 8.963949772717045e-05, "loss": 0.2594, "step": 29459 }, { "epoch": 2.386584575502268, "grad_norm": 0.06455852091312408, "learning_rate": 8.963499707457582e-05, "loss": 0.22, "step": 29460 }, { "epoch": 2.386665586519767, "grad_norm": 0.06308633089065552, "learning_rate": 8.96304964219812e-05, "loss": 0.2601, "step": 29461 }, { "epoch": 2.386746597537265, "grad_norm": 0.05884365737438202, "learning_rate": 8.962599576938657e-05, "loss": 0.2338, "step": 29462 }, { "epoch": 2.3868276085547633, "grad_norm": 0.07012617588043213, "learning_rate": 8.962149511679194e-05, "loss": 0.2558, "step": 29463 }, { "epoch": 2.386908619572262, "grad_norm": 0.0692358985543251, "learning_rate": 8.961699446419731e-05, "loss": 0.239, "step": 29464 }, { "epoch": 2.38698963058976, "grad_norm": 0.06754320114850998, "learning_rate": 8.961249381160269e-05, "loss": 0.2061, "step": 29465 }, { "epoch": 2.3870706416072585, "grad_norm": 0.060342174023389816, "learning_rate": 8.960799315900806e-05, "loss": 0.2169, "step": 29466 }, { "epoch": 2.387151652624757, "grad_norm": 0.06610507518053055, "learning_rate": 8.960349250641344e-05, "loss": 0.2707, "step": 29467 }, { "epoch": 2.3872326636422554, "grad_norm": 0.07763776183128357, "learning_rate": 8.959899185381881e-05, "loss": 0.2592, "step": 29468 }, { "epoch": 2.3873136746597536, "grad_norm": 0.05693863704800606, "learning_rate": 8.959449120122418e-05, "loss": 0.2363, "step": 29469 }, { "epoch": 2.3873946856772523, "grad_norm": 0.06809014081954956, "learning_rate": 8.958999054862956e-05, "loss": 0.2543, "step": 29470 }, { "epoch": 2.3874756966947506, "grad_norm": 0.06256530433893204, "learning_rate": 8.958548989603493e-05, "loss": 0.2253, "step": 29471 }, { "epoch": 2.387556707712249, "grad_norm": 0.07401026040315628, "learning_rate": 8.95809892434403e-05, "loss": 0.2619, "step": 29472 }, { "epoch": 2.387637718729747, "grad_norm": 0.06931627541780472, "learning_rate": 8.957648859084568e-05, "loss": 0.2977, "step": 29473 }, { "epoch": 2.3877187297472457, "grad_norm": 0.0846065878868103, "learning_rate": 8.957198793825105e-05, "loss": 0.3067, "step": 29474 }, { "epoch": 2.387799740764744, "grad_norm": 0.0711844339966774, "learning_rate": 8.956748728565642e-05, "loss": 0.2294, "step": 29475 }, { "epoch": 2.3878807517822422, "grad_norm": 0.06755898892879486, "learning_rate": 8.95629866330618e-05, "loss": 0.2515, "step": 29476 }, { "epoch": 2.387961762799741, "grad_norm": 0.07480302453041077, "learning_rate": 8.955848598046717e-05, "loss": 0.2045, "step": 29477 }, { "epoch": 2.388042773817239, "grad_norm": 0.0641959086060524, "learning_rate": 8.955398532787254e-05, "loss": 0.236, "step": 29478 }, { "epoch": 2.3881237848347374, "grad_norm": 0.06681734323501587, "learning_rate": 8.954948467527792e-05, "loss": 0.2513, "step": 29479 }, { "epoch": 2.3882047958522357, "grad_norm": 0.0504288449883461, "learning_rate": 8.954498402268329e-05, "loss": 0.2491, "step": 29480 }, { "epoch": 2.3882858068697344, "grad_norm": 0.05784103274345398, "learning_rate": 8.954048337008867e-05, "loss": 0.2398, "step": 29481 }, { "epoch": 2.3883668178872326, "grad_norm": 0.07770927250385284, "learning_rate": 8.953598271749404e-05, "loss": 0.2731, "step": 29482 }, { "epoch": 2.388447828904731, "grad_norm": 0.06906847655773163, "learning_rate": 8.953148206489941e-05, "loss": 0.2525, "step": 29483 }, { "epoch": 2.3885288399222295, "grad_norm": 0.07368722558021545, "learning_rate": 8.952698141230479e-05, "loss": 0.2416, "step": 29484 }, { "epoch": 2.3886098509397278, "grad_norm": 0.04947913438081741, "learning_rate": 8.952248075971016e-05, "loss": 0.2461, "step": 29485 }, { "epoch": 2.388690861957226, "grad_norm": 0.06703958660364151, "learning_rate": 8.951798010711553e-05, "loss": 0.2476, "step": 29486 }, { "epoch": 2.3887718729747247, "grad_norm": 0.07003334909677505, "learning_rate": 8.951347945452091e-05, "loss": 0.2733, "step": 29487 }, { "epoch": 2.388852883992223, "grad_norm": 0.07241364568471909, "learning_rate": 8.950897880192628e-05, "loss": 0.2788, "step": 29488 }, { "epoch": 2.388933895009721, "grad_norm": 0.07211482524871826, "learning_rate": 8.950447814933165e-05, "loss": 0.2698, "step": 29489 }, { "epoch": 2.38901490602722, "grad_norm": 0.0676577165722847, "learning_rate": 8.949997749673703e-05, "loss": 0.278, "step": 29490 }, { "epoch": 2.389095917044718, "grad_norm": 0.07245694100856781, "learning_rate": 8.94954768441424e-05, "loss": 0.263, "step": 29491 }, { "epoch": 2.3891769280622164, "grad_norm": 0.06439948081970215, "learning_rate": 8.949097619154778e-05, "loss": 0.2363, "step": 29492 }, { "epoch": 2.389257939079715, "grad_norm": 0.06424013525247574, "learning_rate": 8.948647553895315e-05, "loss": 0.2454, "step": 29493 }, { "epoch": 2.3893389500972133, "grad_norm": 0.08479436486959457, "learning_rate": 8.948197488635852e-05, "loss": 0.257, "step": 29494 }, { "epoch": 2.3894199611147116, "grad_norm": 0.06991950422525406, "learning_rate": 8.947747423376391e-05, "loss": 0.2861, "step": 29495 }, { "epoch": 2.38950097213221, "grad_norm": 0.05634448677301407, "learning_rate": 8.947297358116927e-05, "loss": 0.2191, "step": 29496 }, { "epoch": 2.3895819831497085, "grad_norm": 0.06699156761169434, "learning_rate": 8.946847292857464e-05, "loss": 0.2619, "step": 29497 }, { "epoch": 2.3896629941672067, "grad_norm": 0.05900770053267479, "learning_rate": 8.946397227598003e-05, "loss": 0.2327, "step": 29498 }, { "epoch": 2.389744005184705, "grad_norm": 0.05870789662003517, "learning_rate": 8.945947162338539e-05, "loss": 0.2415, "step": 29499 }, { "epoch": 2.3898250162022037, "grad_norm": 0.07382352650165558, "learning_rate": 8.945497097079076e-05, "loss": 0.2541, "step": 29500 }, { "epoch": 2.389906027219702, "grad_norm": 0.06369208544492722, "learning_rate": 8.945047031819615e-05, "loss": 0.2456, "step": 29501 }, { "epoch": 2.3899870382372, "grad_norm": 0.07548237591981888, "learning_rate": 8.944596966560151e-05, "loss": 0.2469, "step": 29502 }, { "epoch": 2.3900680492546984, "grad_norm": 0.06972216069698334, "learning_rate": 8.94414690130069e-05, "loss": 0.2522, "step": 29503 }, { "epoch": 2.390149060272197, "grad_norm": 0.05822772905230522, "learning_rate": 8.943696836041227e-05, "loss": 0.2461, "step": 29504 }, { "epoch": 2.3902300712896953, "grad_norm": 0.07330431044101715, "learning_rate": 8.943246770781763e-05, "loss": 0.2647, "step": 29505 }, { "epoch": 2.3903110823071936, "grad_norm": 0.07856698334217072, "learning_rate": 8.942796705522302e-05, "loss": 0.235, "step": 29506 }, { "epoch": 2.3903920933246923, "grad_norm": 0.07406508922576904, "learning_rate": 8.942346640262839e-05, "loss": 0.2767, "step": 29507 }, { "epoch": 2.3904731043421905, "grad_norm": 0.06292467564344406, "learning_rate": 8.941896575003375e-05, "loss": 0.2303, "step": 29508 }, { "epoch": 2.3905541153596888, "grad_norm": 0.07412441074848175, "learning_rate": 8.941446509743914e-05, "loss": 0.263, "step": 29509 }, { "epoch": 2.3906351263771874, "grad_norm": 0.06904534995555878, "learning_rate": 8.940996444484451e-05, "loss": 0.267, "step": 29510 }, { "epoch": 2.3907161373946857, "grad_norm": 0.07869131863117218, "learning_rate": 8.940546379224987e-05, "loss": 0.2491, "step": 29511 }, { "epoch": 2.390797148412184, "grad_norm": 0.06752260774374008, "learning_rate": 8.940096313965526e-05, "loss": 0.2257, "step": 29512 }, { "epoch": 2.3908781594296826, "grad_norm": 0.0686509758234024, "learning_rate": 8.939646248706063e-05, "loss": 0.2307, "step": 29513 }, { "epoch": 2.390959170447181, "grad_norm": 0.06670700013637543, "learning_rate": 8.9391961834466e-05, "loss": 0.227, "step": 29514 }, { "epoch": 2.391040181464679, "grad_norm": 0.07350020855665207, "learning_rate": 8.938746118187138e-05, "loss": 0.2595, "step": 29515 }, { "epoch": 2.391121192482178, "grad_norm": 0.062225863337516785, "learning_rate": 8.938296052927676e-05, "loss": 0.2215, "step": 29516 }, { "epoch": 2.391202203499676, "grad_norm": 0.05617249011993408, "learning_rate": 8.937845987668212e-05, "loss": 0.2478, "step": 29517 }, { "epoch": 2.3912832145171743, "grad_norm": 0.06018560007214546, "learning_rate": 8.93739592240875e-05, "loss": 0.2521, "step": 29518 }, { "epoch": 2.3913642255346725, "grad_norm": 0.06831783801317215, "learning_rate": 8.936945857149288e-05, "loss": 0.2441, "step": 29519 }, { "epoch": 2.3914452365521712, "grad_norm": 0.05382615327835083, "learning_rate": 8.936495791889824e-05, "loss": 0.2117, "step": 29520 }, { "epoch": 2.3915262475696695, "grad_norm": 0.05994303151965141, "learning_rate": 8.936045726630362e-05, "loss": 0.2316, "step": 29521 }, { "epoch": 2.3916072585871677, "grad_norm": 0.06629019975662231, "learning_rate": 8.9355956613709e-05, "loss": 0.2224, "step": 29522 }, { "epoch": 2.3916882696046664, "grad_norm": 0.06589581072330475, "learning_rate": 8.935145596111436e-05, "loss": 0.2921, "step": 29523 }, { "epoch": 2.3917692806221647, "grad_norm": 0.06992766261100769, "learning_rate": 8.934695530851974e-05, "loss": 0.2422, "step": 29524 }, { "epoch": 2.391850291639663, "grad_norm": 0.06090115010738373, "learning_rate": 8.934245465592512e-05, "loss": 0.2305, "step": 29525 }, { "epoch": 2.391931302657161, "grad_norm": 0.06497127562761307, "learning_rate": 8.933795400333048e-05, "loss": 0.2307, "step": 29526 }, { "epoch": 2.39201231367466, "grad_norm": 0.06542658805847168, "learning_rate": 8.933345335073586e-05, "loss": 0.2518, "step": 29527 }, { "epoch": 2.392093324692158, "grad_norm": 0.07996457070112228, "learning_rate": 8.932895269814124e-05, "loss": 0.2767, "step": 29528 }, { "epoch": 2.3921743357096563, "grad_norm": 0.0691191554069519, "learning_rate": 8.932445204554661e-05, "loss": 0.2719, "step": 29529 }, { "epoch": 2.392255346727155, "grad_norm": 0.06836870312690735, "learning_rate": 8.931995139295199e-05, "loss": 0.2424, "step": 29530 }, { "epoch": 2.3923363577446533, "grad_norm": 0.06277644634246826, "learning_rate": 8.931545074035736e-05, "loss": 0.3112, "step": 29531 }, { "epoch": 2.3924173687621515, "grad_norm": 0.083866186439991, "learning_rate": 8.931095008776273e-05, "loss": 0.245, "step": 29532 }, { "epoch": 2.39249837977965, "grad_norm": 0.08739671856164932, "learning_rate": 8.93064494351681e-05, "loss": 0.2828, "step": 29533 }, { "epoch": 2.3925793907971484, "grad_norm": 0.07633476704359055, "learning_rate": 8.930194878257348e-05, "loss": 0.2661, "step": 29534 }, { "epoch": 2.3926604018146467, "grad_norm": 0.06199926510453224, "learning_rate": 8.929744812997885e-05, "loss": 0.2305, "step": 29535 }, { "epoch": 2.3927414128321454, "grad_norm": 0.05972784385085106, "learning_rate": 8.929294747738423e-05, "loss": 0.2733, "step": 29536 }, { "epoch": 2.3928224238496436, "grad_norm": 0.060678012669086456, "learning_rate": 8.92884468247896e-05, "loss": 0.2294, "step": 29537 }, { "epoch": 2.392903434867142, "grad_norm": 0.0721907690167427, "learning_rate": 8.928394617219497e-05, "loss": 0.2564, "step": 29538 }, { "epoch": 2.3929844458846405, "grad_norm": 0.06307584047317505, "learning_rate": 8.927944551960035e-05, "loss": 0.2649, "step": 29539 }, { "epoch": 2.393065456902139, "grad_norm": 0.0750347301363945, "learning_rate": 8.927494486700572e-05, "loss": 0.2259, "step": 29540 }, { "epoch": 2.393146467919637, "grad_norm": 0.06097326800227165, "learning_rate": 8.92704442144111e-05, "loss": 0.2481, "step": 29541 }, { "epoch": 2.3932274789371353, "grad_norm": 0.060833755880594254, "learning_rate": 8.926594356181647e-05, "loss": 0.206, "step": 29542 }, { "epoch": 2.393308489954634, "grad_norm": 0.06569301337003708, "learning_rate": 8.926144290922184e-05, "loss": 0.2317, "step": 29543 }, { "epoch": 2.393389500972132, "grad_norm": 0.05607564002275467, "learning_rate": 8.925694225662722e-05, "loss": 0.2252, "step": 29544 }, { "epoch": 2.3934705119896305, "grad_norm": 0.06047516316175461, "learning_rate": 8.925244160403259e-05, "loss": 0.2683, "step": 29545 }, { "epoch": 2.393551523007129, "grad_norm": 0.054401516914367676, "learning_rate": 8.924794095143796e-05, "loss": 0.2488, "step": 29546 }, { "epoch": 2.3936325340246274, "grad_norm": 0.04817594587802887, "learning_rate": 8.924344029884334e-05, "loss": 0.2257, "step": 29547 }, { "epoch": 2.3937135450421256, "grad_norm": 0.05992850661277771, "learning_rate": 8.923893964624871e-05, "loss": 0.2092, "step": 29548 }, { "epoch": 2.393794556059624, "grad_norm": 0.07102970033884048, "learning_rate": 8.923443899365408e-05, "loss": 0.2858, "step": 29549 }, { "epoch": 2.3938755670771226, "grad_norm": 0.05211573839187622, "learning_rate": 8.922993834105946e-05, "loss": 0.2312, "step": 29550 }, { "epoch": 2.393956578094621, "grad_norm": 0.07465586066246033, "learning_rate": 8.922543768846483e-05, "loss": 0.2826, "step": 29551 }, { "epoch": 2.394037589112119, "grad_norm": 0.07565885782241821, "learning_rate": 8.92209370358702e-05, "loss": 0.2216, "step": 29552 }, { "epoch": 2.3941186001296177, "grad_norm": 0.07079378515481949, "learning_rate": 8.921643638327558e-05, "loss": 0.2301, "step": 29553 }, { "epoch": 2.394199611147116, "grad_norm": 0.06497667729854584, "learning_rate": 8.921193573068095e-05, "loss": 0.2282, "step": 29554 }, { "epoch": 2.3942806221646142, "grad_norm": 0.06706354767084122, "learning_rate": 8.920743507808633e-05, "loss": 0.2796, "step": 29555 }, { "epoch": 2.394361633182113, "grad_norm": 0.06051041558384895, "learning_rate": 8.92029344254917e-05, "loss": 0.2497, "step": 29556 }, { "epoch": 2.394442644199611, "grad_norm": 0.061869047582149506, "learning_rate": 8.919843377289707e-05, "loss": 0.2302, "step": 29557 }, { "epoch": 2.3945236552171094, "grad_norm": 0.06803011149168015, "learning_rate": 8.919393312030245e-05, "loss": 0.248, "step": 29558 }, { "epoch": 2.394604666234608, "grad_norm": 0.06455831974744797, "learning_rate": 8.918943246770782e-05, "loss": 0.2371, "step": 29559 }, { "epoch": 2.3946856772521063, "grad_norm": 0.07764575630426407, "learning_rate": 8.91849318151132e-05, "loss": 0.2781, "step": 29560 }, { "epoch": 2.3947666882696046, "grad_norm": 0.06719639152288437, "learning_rate": 8.918043116251857e-05, "loss": 0.2158, "step": 29561 }, { "epoch": 2.3948476992871033, "grad_norm": 0.07174685597419739, "learning_rate": 8.917593050992394e-05, "loss": 0.2641, "step": 29562 }, { "epoch": 2.3949287103046015, "grad_norm": 0.07454746961593628, "learning_rate": 8.917142985732931e-05, "loss": 0.2645, "step": 29563 }, { "epoch": 2.3950097213220998, "grad_norm": 0.07032407075166702, "learning_rate": 8.91669292047347e-05, "loss": 0.2503, "step": 29564 }, { "epoch": 2.395090732339598, "grad_norm": 0.07723000645637512, "learning_rate": 8.916242855214006e-05, "loss": 0.2716, "step": 29565 }, { "epoch": 2.3951717433570967, "grad_norm": 0.06460478901863098, "learning_rate": 8.915792789954544e-05, "loss": 0.2652, "step": 29566 }, { "epoch": 2.395252754374595, "grad_norm": 0.05906296521425247, "learning_rate": 8.915342724695082e-05, "loss": 0.2557, "step": 29567 }, { "epoch": 2.395333765392093, "grad_norm": 0.05999612808227539, "learning_rate": 8.914892659435618e-05, "loss": 0.2319, "step": 29568 }, { "epoch": 2.395414776409592, "grad_norm": 0.07182128727436066, "learning_rate": 8.914442594176156e-05, "loss": 0.2204, "step": 29569 }, { "epoch": 2.39549578742709, "grad_norm": 0.0702458918094635, "learning_rate": 8.913992528916694e-05, "loss": 0.2589, "step": 29570 }, { "epoch": 2.3955767984445884, "grad_norm": 0.05754755809903145, "learning_rate": 8.91354246365723e-05, "loss": 0.2285, "step": 29571 }, { "epoch": 2.3956578094620866, "grad_norm": 0.07615985721349716, "learning_rate": 8.913092398397768e-05, "loss": 0.2494, "step": 29572 }, { "epoch": 2.3957388204795853, "grad_norm": 0.06916381418704987, "learning_rate": 8.912642333138306e-05, "loss": 0.2825, "step": 29573 }, { "epoch": 2.3958198314970836, "grad_norm": 0.08532532304525375, "learning_rate": 8.912192267878842e-05, "loss": 0.2174, "step": 29574 }, { "epoch": 2.395900842514582, "grad_norm": 0.06773477047681808, "learning_rate": 8.91174220261938e-05, "loss": 0.2418, "step": 29575 }, { "epoch": 2.3959818535320805, "grad_norm": 0.06349464505910873, "learning_rate": 8.911292137359918e-05, "loss": 0.2202, "step": 29576 }, { "epoch": 2.3960628645495787, "grad_norm": 0.06422010809183121, "learning_rate": 8.910842072100454e-05, "loss": 0.2895, "step": 29577 }, { "epoch": 2.396143875567077, "grad_norm": 0.060994140803813934, "learning_rate": 8.910392006840992e-05, "loss": 0.2572, "step": 29578 }, { "epoch": 2.3962248865845757, "grad_norm": 0.07673156261444092, "learning_rate": 8.90994194158153e-05, "loss": 0.2491, "step": 29579 }, { "epoch": 2.396305897602074, "grad_norm": 0.06521592289209366, "learning_rate": 8.909491876322067e-05, "loss": 0.2379, "step": 29580 }, { "epoch": 2.396386908619572, "grad_norm": 0.05804380401968956, "learning_rate": 8.909041811062605e-05, "loss": 0.2108, "step": 29581 }, { "epoch": 2.396467919637071, "grad_norm": 0.05636740103363991, "learning_rate": 8.908591745803143e-05, "loss": 0.2424, "step": 29582 }, { "epoch": 2.396548930654569, "grad_norm": 0.08155636489391327, "learning_rate": 8.908141680543679e-05, "loss": 0.2704, "step": 29583 }, { "epoch": 2.3966299416720673, "grad_norm": 0.05432563275098801, "learning_rate": 8.907691615284217e-05, "loss": 0.2141, "step": 29584 }, { "epoch": 2.396710952689566, "grad_norm": 0.06512283533811569, "learning_rate": 8.907241550024755e-05, "loss": 0.246, "step": 29585 }, { "epoch": 2.3967919637070643, "grad_norm": 0.06527567654848099, "learning_rate": 8.906791484765291e-05, "loss": 0.2362, "step": 29586 }, { "epoch": 2.3968729747245625, "grad_norm": 0.06097620725631714, "learning_rate": 8.90634141950583e-05, "loss": 0.2412, "step": 29587 }, { "epoch": 2.3969539857420608, "grad_norm": 0.06241489201784134, "learning_rate": 8.905891354246367e-05, "loss": 0.2231, "step": 29588 }, { "epoch": 2.3970349967595594, "grad_norm": 0.06722144782543182, "learning_rate": 8.905441288986903e-05, "loss": 0.2468, "step": 29589 }, { "epoch": 2.3971160077770577, "grad_norm": 0.08474922925233841, "learning_rate": 8.904991223727442e-05, "loss": 0.2148, "step": 29590 }, { "epoch": 2.397197018794556, "grad_norm": 0.05398935079574585, "learning_rate": 8.904541158467979e-05, "loss": 0.2572, "step": 29591 }, { "epoch": 2.3972780298120546, "grad_norm": 0.05842433497309685, "learning_rate": 8.904091093208515e-05, "loss": 0.2377, "step": 29592 }, { "epoch": 2.397359040829553, "grad_norm": 0.06882943958044052, "learning_rate": 8.903641027949054e-05, "loss": 0.2476, "step": 29593 }, { "epoch": 2.397440051847051, "grad_norm": 0.049048058688640594, "learning_rate": 8.903190962689591e-05, "loss": 0.1895, "step": 29594 }, { "epoch": 2.3975210628645494, "grad_norm": 0.06414130330085754, "learning_rate": 8.902740897430127e-05, "loss": 0.2652, "step": 29595 }, { "epoch": 2.397602073882048, "grad_norm": 0.08015193790197372, "learning_rate": 8.902290832170666e-05, "loss": 0.2534, "step": 29596 }, { "epoch": 2.3976830848995463, "grad_norm": 0.05423025041818619, "learning_rate": 8.901840766911203e-05, "loss": 0.2412, "step": 29597 }, { "epoch": 2.3977640959170445, "grad_norm": 0.06724569946527481, "learning_rate": 8.901390701651739e-05, "loss": 0.2432, "step": 29598 }, { "epoch": 2.3978451069345432, "grad_norm": 0.07872132956981659, "learning_rate": 8.900940636392278e-05, "loss": 0.2623, "step": 29599 }, { "epoch": 2.3979261179520415, "grad_norm": 0.06910988688468933, "learning_rate": 8.900490571132815e-05, "loss": 0.2595, "step": 29600 }, { "epoch": 2.3980071289695397, "grad_norm": 0.06886013597249985, "learning_rate": 8.900040505873351e-05, "loss": 0.2688, "step": 29601 }, { "epoch": 2.3980881399870384, "grad_norm": 0.07101655006408691, "learning_rate": 8.89959044061389e-05, "loss": 0.2469, "step": 29602 }, { "epoch": 2.3981691510045366, "grad_norm": 0.07381045818328857, "learning_rate": 8.899140375354427e-05, "loss": 0.2643, "step": 29603 }, { "epoch": 2.398250162022035, "grad_norm": 0.053292810916900635, "learning_rate": 8.898690310094963e-05, "loss": 0.1849, "step": 29604 }, { "epoch": 2.3983311730395336, "grad_norm": 0.05561814457178116, "learning_rate": 8.898240244835502e-05, "loss": 0.2459, "step": 29605 }, { "epoch": 2.398412184057032, "grad_norm": 0.061810776591300964, "learning_rate": 8.897790179576039e-05, "loss": 0.2249, "step": 29606 }, { "epoch": 2.39849319507453, "grad_norm": 0.07361455261707306, "learning_rate": 8.897340114316577e-05, "loss": 0.2647, "step": 29607 }, { "epoch": 2.3985742060920288, "grad_norm": 0.060726381838321686, "learning_rate": 8.896890049057114e-05, "loss": 0.2486, "step": 29608 }, { "epoch": 2.398655217109527, "grad_norm": 0.07952304184436798, "learning_rate": 8.896439983797651e-05, "loss": 0.2455, "step": 29609 }, { "epoch": 2.3987362281270252, "grad_norm": 0.07039670646190643, "learning_rate": 8.895989918538189e-05, "loss": 0.2579, "step": 29610 }, { "epoch": 2.3988172391445235, "grad_norm": 0.06562759727239609, "learning_rate": 8.895539853278726e-05, "loss": 0.2359, "step": 29611 }, { "epoch": 2.398898250162022, "grad_norm": 0.07553057372570038, "learning_rate": 8.895089788019263e-05, "loss": 0.251, "step": 29612 }, { "epoch": 2.3989792611795204, "grad_norm": 0.05753480643033981, "learning_rate": 8.894639722759801e-05, "loss": 0.2121, "step": 29613 }, { "epoch": 2.3990602721970187, "grad_norm": 0.06267315149307251, "learning_rate": 8.894189657500338e-05, "loss": 0.2448, "step": 29614 }, { "epoch": 2.3991412832145174, "grad_norm": 0.07307960838079453, "learning_rate": 8.893739592240876e-05, "loss": 0.2624, "step": 29615 }, { "epoch": 2.3992222942320156, "grad_norm": 0.06411339342594147, "learning_rate": 8.893289526981413e-05, "loss": 0.2217, "step": 29616 }, { "epoch": 2.399303305249514, "grad_norm": 0.06430069357156754, "learning_rate": 8.89283946172195e-05, "loss": 0.2259, "step": 29617 }, { "epoch": 2.399384316267012, "grad_norm": 0.06761736422777176, "learning_rate": 8.892389396462488e-05, "loss": 0.2493, "step": 29618 }, { "epoch": 2.399465327284511, "grad_norm": 0.06652569770812988, "learning_rate": 8.891939331203025e-05, "loss": 0.2445, "step": 29619 }, { "epoch": 2.399546338302009, "grad_norm": 0.04514552652835846, "learning_rate": 8.891489265943562e-05, "loss": 0.2013, "step": 29620 }, { "epoch": 2.3996273493195073, "grad_norm": 0.08357077091932297, "learning_rate": 8.8910392006841e-05, "loss": 0.2911, "step": 29621 }, { "epoch": 2.399708360337006, "grad_norm": 0.0534617118537426, "learning_rate": 8.890589135424637e-05, "loss": 0.1895, "step": 29622 }, { "epoch": 2.399789371354504, "grad_norm": 0.06501682847738266, "learning_rate": 8.890139070165174e-05, "loss": 0.2394, "step": 29623 }, { "epoch": 2.3998703823720025, "grad_norm": 0.059162236750125885, "learning_rate": 8.889689004905712e-05, "loss": 0.2821, "step": 29624 }, { "epoch": 2.399951393389501, "grad_norm": 0.06915201246738434, "learning_rate": 8.889238939646249e-05, "loss": 0.2513, "step": 29625 }, { "epoch": 2.4000324044069994, "grad_norm": 0.07056642323732376, "learning_rate": 8.888788874386787e-05, "loss": 0.2652, "step": 29626 }, { "epoch": 2.4001134154244976, "grad_norm": 0.053473327308893204, "learning_rate": 8.888338809127324e-05, "loss": 0.2427, "step": 29627 }, { "epoch": 2.4001944264419963, "grad_norm": 0.09125243127346039, "learning_rate": 8.887888743867861e-05, "loss": 0.2423, "step": 29628 }, { "epoch": 2.4002754374594946, "grad_norm": 0.08848083019256592, "learning_rate": 8.887438678608399e-05, "loss": 0.2736, "step": 29629 }, { "epoch": 2.400356448476993, "grad_norm": 0.0683230310678482, "learning_rate": 8.886988613348936e-05, "loss": 0.2493, "step": 29630 }, { "epoch": 2.4004374594944915, "grad_norm": 0.07020305097103119, "learning_rate": 8.886538548089473e-05, "loss": 0.2545, "step": 29631 }, { "epoch": 2.4005184705119897, "grad_norm": 0.07480403780937195, "learning_rate": 8.88608848283001e-05, "loss": 0.2506, "step": 29632 }, { "epoch": 2.400599481529488, "grad_norm": 0.0668230876326561, "learning_rate": 8.885638417570548e-05, "loss": 0.2722, "step": 29633 }, { "epoch": 2.4006804925469862, "grad_norm": 0.06247445195913315, "learning_rate": 8.885188352311085e-05, "loss": 0.2566, "step": 29634 }, { "epoch": 2.400761503564485, "grad_norm": 0.06823485344648361, "learning_rate": 8.884738287051623e-05, "loss": 0.2498, "step": 29635 }, { "epoch": 2.400842514581983, "grad_norm": 0.07839343696832657, "learning_rate": 8.884288221792161e-05, "loss": 0.2733, "step": 29636 }, { "epoch": 2.4009235255994814, "grad_norm": 0.08385621011257172, "learning_rate": 8.883838156532697e-05, "loss": 0.2964, "step": 29637 }, { "epoch": 2.40100453661698, "grad_norm": 0.06754658371210098, "learning_rate": 8.883388091273235e-05, "loss": 0.2416, "step": 29638 }, { "epoch": 2.4010855476344783, "grad_norm": 0.060111649334430695, "learning_rate": 8.882938026013774e-05, "loss": 0.2204, "step": 29639 }, { "epoch": 2.4011665586519766, "grad_norm": 0.07113023102283478, "learning_rate": 8.88248796075431e-05, "loss": 0.2438, "step": 29640 }, { "epoch": 2.401247569669475, "grad_norm": 0.07107888907194138, "learning_rate": 8.882037895494847e-05, "loss": 0.2741, "step": 29641 }, { "epoch": 2.4013285806869735, "grad_norm": 0.06887800246477127, "learning_rate": 8.881587830235386e-05, "loss": 0.2886, "step": 29642 }, { "epoch": 2.4014095917044718, "grad_norm": 0.055432695895433426, "learning_rate": 8.881137764975922e-05, "loss": 0.2222, "step": 29643 }, { "epoch": 2.40149060272197, "grad_norm": 0.06920438259840012, "learning_rate": 8.880687699716459e-05, "loss": 0.2484, "step": 29644 }, { "epoch": 2.4015716137394687, "grad_norm": 0.06635645776987076, "learning_rate": 8.880237634456998e-05, "loss": 0.2444, "step": 29645 }, { "epoch": 2.401652624756967, "grad_norm": 0.06889855861663818, "learning_rate": 8.879787569197534e-05, "loss": 0.2806, "step": 29646 }, { "epoch": 2.401733635774465, "grad_norm": 0.06344221532344818, "learning_rate": 8.879337503938071e-05, "loss": 0.2375, "step": 29647 }, { "epoch": 2.401814646791964, "grad_norm": 0.06579340249300003, "learning_rate": 8.87888743867861e-05, "loss": 0.2372, "step": 29648 }, { "epoch": 2.401895657809462, "grad_norm": 0.05817051976919174, "learning_rate": 8.878437373419146e-05, "loss": 0.2487, "step": 29649 }, { "epoch": 2.4019766688269604, "grad_norm": 0.08828700333833694, "learning_rate": 8.877987308159683e-05, "loss": 0.3105, "step": 29650 }, { "epoch": 2.402057679844459, "grad_norm": 0.05760222673416138, "learning_rate": 8.877537242900222e-05, "loss": 0.2585, "step": 29651 }, { "epoch": 2.4021386908619573, "grad_norm": 0.05954901874065399, "learning_rate": 8.877087177640758e-05, "loss": 0.2504, "step": 29652 }, { "epoch": 2.4022197018794555, "grad_norm": 0.05687323212623596, "learning_rate": 8.876637112381295e-05, "loss": 0.2478, "step": 29653 }, { "epoch": 2.402300712896954, "grad_norm": 0.07466956973075867, "learning_rate": 8.876187047121834e-05, "loss": 0.2437, "step": 29654 }, { "epoch": 2.4023817239144525, "grad_norm": 0.05892636254429817, "learning_rate": 8.87573698186237e-05, "loss": 0.252, "step": 29655 }, { "epoch": 2.4024627349319507, "grad_norm": 0.06808330118656158, "learning_rate": 8.875286916602907e-05, "loss": 0.2319, "step": 29656 }, { "epoch": 2.402543745949449, "grad_norm": 0.061993733048439026, "learning_rate": 8.874836851343446e-05, "loss": 0.2637, "step": 29657 }, { "epoch": 2.4026247569669477, "grad_norm": 0.056053124368190765, "learning_rate": 8.874386786083982e-05, "loss": 0.272, "step": 29658 }, { "epoch": 2.402705767984446, "grad_norm": 0.06183644384145737, "learning_rate": 8.87393672082452e-05, "loss": 0.2559, "step": 29659 }, { "epoch": 2.402786779001944, "grad_norm": 0.06335576623678207, "learning_rate": 8.873486655565058e-05, "loss": 0.2429, "step": 29660 }, { "epoch": 2.4028677900194424, "grad_norm": 0.06689286977052689, "learning_rate": 8.873036590305594e-05, "loss": 0.2428, "step": 29661 }, { "epoch": 2.402948801036941, "grad_norm": 0.06543515622615814, "learning_rate": 8.872586525046133e-05, "loss": 0.243, "step": 29662 }, { "epoch": 2.4030298120544393, "grad_norm": 0.07345867902040482, "learning_rate": 8.87213645978667e-05, "loss": 0.2533, "step": 29663 }, { "epoch": 2.4031108230719376, "grad_norm": 0.07059627771377563, "learning_rate": 8.871686394527206e-05, "loss": 0.2348, "step": 29664 }, { "epoch": 2.4031918340894363, "grad_norm": 0.07763314247131348, "learning_rate": 8.871236329267745e-05, "loss": 0.2539, "step": 29665 }, { "epoch": 2.4032728451069345, "grad_norm": 0.06865006685256958, "learning_rate": 8.870786264008282e-05, "loss": 0.2579, "step": 29666 }, { "epoch": 2.4033538561244328, "grad_norm": 0.061918459832668304, "learning_rate": 8.870336198748818e-05, "loss": 0.2472, "step": 29667 }, { "epoch": 2.4034348671419314, "grad_norm": 0.061768610030412674, "learning_rate": 8.869886133489357e-05, "loss": 0.1955, "step": 29668 }, { "epoch": 2.4035158781594297, "grad_norm": 0.06935855001211166, "learning_rate": 8.869436068229894e-05, "loss": 0.2317, "step": 29669 }, { "epoch": 2.403596889176928, "grad_norm": 0.0667807012796402, "learning_rate": 8.86898600297043e-05, "loss": 0.251, "step": 29670 }, { "epoch": 2.4036779001944266, "grad_norm": 0.06723842024803162, "learning_rate": 8.868535937710969e-05, "loss": 0.226, "step": 29671 }, { "epoch": 2.403758911211925, "grad_norm": 0.06217675283551216, "learning_rate": 8.868085872451506e-05, "loss": 0.2232, "step": 29672 }, { "epoch": 2.403839922229423, "grad_norm": 0.06494613736867905, "learning_rate": 8.867635807192042e-05, "loss": 0.2547, "step": 29673 }, { "epoch": 2.403920933246922, "grad_norm": 0.06875234097242355, "learning_rate": 8.867185741932581e-05, "loss": 0.2292, "step": 29674 }, { "epoch": 2.40400194426442, "grad_norm": 0.07220776379108429, "learning_rate": 8.866735676673119e-05, "loss": 0.3206, "step": 29675 }, { "epoch": 2.4040829552819183, "grad_norm": 0.07017524540424347, "learning_rate": 8.866285611413655e-05, "loss": 0.2571, "step": 29676 }, { "epoch": 2.4041639662994165, "grad_norm": 0.0610513836145401, "learning_rate": 8.865835546154193e-05, "loss": 0.229, "step": 29677 }, { "epoch": 2.404244977316915, "grad_norm": 0.06113404035568237, "learning_rate": 8.86538548089473e-05, "loss": 0.287, "step": 29678 }, { "epoch": 2.4043259883344135, "grad_norm": 0.06622372567653656, "learning_rate": 8.864935415635267e-05, "loss": 0.2213, "step": 29679 }, { "epoch": 2.4044069993519117, "grad_norm": 0.06459666043519974, "learning_rate": 8.864485350375805e-05, "loss": 0.2076, "step": 29680 }, { "epoch": 2.4044880103694104, "grad_norm": 0.06556376069784164, "learning_rate": 8.864035285116343e-05, "loss": 0.2137, "step": 29681 }, { "epoch": 2.4045690213869086, "grad_norm": 0.0706576257944107, "learning_rate": 8.863585219856879e-05, "loss": 0.2516, "step": 29682 }, { "epoch": 2.404650032404407, "grad_norm": 0.06727118790149689, "learning_rate": 8.863135154597417e-05, "loss": 0.2497, "step": 29683 }, { "epoch": 2.404731043421905, "grad_norm": 0.06485434621572495, "learning_rate": 8.862685089337955e-05, "loss": 0.2405, "step": 29684 }, { "epoch": 2.404812054439404, "grad_norm": 0.06530654430389404, "learning_rate": 8.862235024078491e-05, "loss": 0.2682, "step": 29685 }, { "epoch": 2.404893065456902, "grad_norm": 0.07048763334751129, "learning_rate": 8.86178495881903e-05, "loss": 0.2775, "step": 29686 }, { "epoch": 2.4049740764744003, "grad_norm": 0.06269804388284683, "learning_rate": 8.861334893559567e-05, "loss": 0.2426, "step": 29687 }, { "epoch": 2.405055087491899, "grad_norm": 0.06605499982833862, "learning_rate": 8.860884828300104e-05, "loss": 0.2528, "step": 29688 }, { "epoch": 2.4051360985093972, "grad_norm": 0.06122152879834175, "learning_rate": 8.860434763040642e-05, "loss": 0.2521, "step": 29689 }, { "epoch": 2.4052171095268955, "grad_norm": 0.06740374118089676, "learning_rate": 8.859984697781179e-05, "loss": 0.2256, "step": 29690 }, { "epoch": 2.405298120544394, "grad_norm": 0.06832475960254669, "learning_rate": 8.859534632521716e-05, "loss": 0.2571, "step": 29691 }, { "epoch": 2.4053791315618924, "grad_norm": 0.05728984251618385, "learning_rate": 8.859084567262254e-05, "loss": 0.2393, "step": 29692 }, { "epoch": 2.4054601425793907, "grad_norm": 0.05768602341413498, "learning_rate": 8.858634502002791e-05, "loss": 0.182, "step": 29693 }, { "epoch": 2.4055411535968894, "grad_norm": 0.06498179584741592, "learning_rate": 8.858184436743328e-05, "loss": 0.2197, "step": 29694 }, { "epoch": 2.4056221646143876, "grad_norm": 0.06262174248695374, "learning_rate": 8.857734371483866e-05, "loss": 0.2531, "step": 29695 }, { "epoch": 2.405703175631886, "grad_norm": 0.08773565292358398, "learning_rate": 8.857284306224403e-05, "loss": 0.2652, "step": 29696 }, { "epoch": 2.4057841866493845, "grad_norm": 0.09450256079435349, "learning_rate": 8.85683424096494e-05, "loss": 0.2816, "step": 29697 }, { "epoch": 2.405865197666883, "grad_norm": 0.07148297131061554, "learning_rate": 8.856384175705478e-05, "loss": 0.2585, "step": 29698 }, { "epoch": 2.405946208684381, "grad_norm": 0.06335129588842392, "learning_rate": 8.855934110446015e-05, "loss": 0.2381, "step": 29699 }, { "epoch": 2.4060272197018793, "grad_norm": 0.07238379865884781, "learning_rate": 8.855484045186553e-05, "loss": 0.2496, "step": 29700 }, { "epoch": 2.406108230719378, "grad_norm": 0.06429867446422577, "learning_rate": 8.85503397992709e-05, "loss": 0.2236, "step": 29701 }, { "epoch": 2.406189241736876, "grad_norm": 0.07388942688703537, "learning_rate": 8.854583914667627e-05, "loss": 0.2766, "step": 29702 }, { "epoch": 2.4062702527543745, "grad_norm": 0.06903047859668732, "learning_rate": 8.854133849408165e-05, "loss": 0.2415, "step": 29703 }, { "epoch": 2.406351263771873, "grad_norm": 0.05265869200229645, "learning_rate": 8.853683784148702e-05, "loss": 0.2548, "step": 29704 }, { "epoch": 2.4064322747893714, "grad_norm": 0.0546029731631279, "learning_rate": 8.85323371888924e-05, "loss": 0.2263, "step": 29705 }, { "epoch": 2.4065132858068696, "grad_norm": 0.07559093087911606, "learning_rate": 8.852783653629777e-05, "loss": 0.2457, "step": 29706 }, { "epoch": 2.406594296824368, "grad_norm": 0.07392976433038712, "learning_rate": 8.852333588370314e-05, "loss": 0.2449, "step": 29707 }, { "epoch": 2.4066753078418666, "grad_norm": 0.06068427115678787, "learning_rate": 8.851883523110851e-05, "loss": 0.2252, "step": 29708 }, { "epoch": 2.406756318859365, "grad_norm": 0.06823750585317612, "learning_rate": 8.851433457851389e-05, "loss": 0.3009, "step": 29709 }, { "epoch": 2.406837329876863, "grad_norm": 0.06476844102144241, "learning_rate": 8.850983392591926e-05, "loss": 0.2708, "step": 29710 }, { "epoch": 2.4069183408943617, "grad_norm": 0.07834062725305557, "learning_rate": 8.850533327332463e-05, "loss": 0.2802, "step": 29711 }, { "epoch": 2.40699935191186, "grad_norm": 0.3444421589374542, "learning_rate": 8.850083262073001e-05, "loss": 0.2757, "step": 29712 }, { "epoch": 2.4070803629293582, "grad_norm": 0.060476649552583694, "learning_rate": 8.849633196813538e-05, "loss": 0.3, "step": 29713 }, { "epoch": 2.407161373946857, "grad_norm": 0.05499740317463875, "learning_rate": 8.849183131554077e-05, "loss": 0.2457, "step": 29714 }, { "epoch": 2.407242384964355, "grad_norm": 0.06884583830833435, "learning_rate": 8.848733066294613e-05, "loss": 0.2529, "step": 29715 }, { "epoch": 2.4073233959818534, "grad_norm": 0.0695691630244255, "learning_rate": 8.84828300103515e-05, "loss": 0.2408, "step": 29716 }, { "epoch": 2.407404406999352, "grad_norm": 0.06407498568296432, "learning_rate": 8.847832935775689e-05, "loss": 0.2632, "step": 29717 }, { "epoch": 2.4074854180168503, "grad_norm": 0.060239940881729126, "learning_rate": 8.847382870516225e-05, "loss": 0.2313, "step": 29718 }, { "epoch": 2.4075664290343486, "grad_norm": 0.054388102144002914, "learning_rate": 8.846932805256762e-05, "loss": 0.2618, "step": 29719 }, { "epoch": 2.4076474400518473, "grad_norm": 0.06771720200777054, "learning_rate": 8.846482739997301e-05, "loss": 0.2367, "step": 29720 }, { "epoch": 2.4077284510693455, "grad_norm": 0.07251696288585663, "learning_rate": 8.846032674737837e-05, "loss": 0.234, "step": 29721 }, { "epoch": 2.4078094620868438, "grad_norm": 0.06434344500303268, "learning_rate": 8.845582609478374e-05, "loss": 0.2908, "step": 29722 }, { "epoch": 2.407890473104342, "grad_norm": 0.08624786138534546, "learning_rate": 8.845132544218913e-05, "loss": 0.2447, "step": 29723 }, { "epoch": 2.4079714841218407, "grad_norm": 0.06329482048749924, "learning_rate": 8.844682478959449e-05, "loss": 0.2652, "step": 29724 }, { "epoch": 2.408052495139339, "grad_norm": 0.07422767579555511, "learning_rate": 8.844232413699987e-05, "loss": 0.2649, "step": 29725 }, { "epoch": 2.408133506156837, "grad_norm": 0.06247050687670708, "learning_rate": 8.843782348440525e-05, "loss": 0.2408, "step": 29726 }, { "epoch": 2.408214517174336, "grad_norm": 0.06806286424398422, "learning_rate": 8.843332283181061e-05, "loss": 0.2658, "step": 29727 }, { "epoch": 2.408295528191834, "grad_norm": 0.06933243572711945, "learning_rate": 8.842882217921599e-05, "loss": 0.2318, "step": 29728 }, { "epoch": 2.4083765392093324, "grad_norm": 0.09632189571857452, "learning_rate": 8.842432152662137e-05, "loss": 0.2846, "step": 29729 }, { "epoch": 2.4084575502268306, "grad_norm": 0.05814709514379501, "learning_rate": 8.841982087402673e-05, "loss": 0.2546, "step": 29730 }, { "epoch": 2.4085385612443293, "grad_norm": 0.07406198978424072, "learning_rate": 8.841532022143211e-05, "loss": 0.2681, "step": 29731 }, { "epoch": 2.4086195722618275, "grad_norm": 0.07493565231561661, "learning_rate": 8.84108195688375e-05, "loss": 0.2497, "step": 29732 }, { "epoch": 2.408700583279326, "grad_norm": 0.06633651256561279, "learning_rate": 8.840631891624285e-05, "loss": 0.2923, "step": 29733 }, { "epoch": 2.4087815942968245, "grad_norm": 0.0569579154253006, "learning_rate": 8.840181826364823e-05, "loss": 0.2019, "step": 29734 }, { "epoch": 2.4088626053143227, "grad_norm": 0.05882423743605614, "learning_rate": 8.839731761105361e-05, "loss": 0.2373, "step": 29735 }, { "epoch": 2.408943616331821, "grad_norm": 0.0590720996260643, "learning_rate": 8.839281695845897e-05, "loss": 0.2308, "step": 29736 }, { "epoch": 2.4090246273493197, "grad_norm": 0.0773407518863678, "learning_rate": 8.838831630586435e-05, "loss": 0.2613, "step": 29737 }, { "epoch": 2.409105638366818, "grad_norm": 0.06300017982721329, "learning_rate": 8.838381565326974e-05, "loss": 0.1976, "step": 29738 }, { "epoch": 2.409186649384316, "grad_norm": 0.07593004405498505, "learning_rate": 8.83793150006751e-05, "loss": 0.2488, "step": 29739 }, { "epoch": 2.409267660401815, "grad_norm": 0.06119083985686302, "learning_rate": 8.837481434808048e-05, "loss": 0.2288, "step": 29740 }, { "epoch": 2.409348671419313, "grad_norm": 0.07041435688734055, "learning_rate": 8.837031369548586e-05, "loss": 0.2664, "step": 29741 }, { "epoch": 2.4094296824368113, "grad_norm": 0.07043371349573135, "learning_rate": 8.836581304289122e-05, "loss": 0.2784, "step": 29742 }, { "epoch": 2.40951069345431, "grad_norm": 0.07403253018856049, "learning_rate": 8.83613123902966e-05, "loss": 0.249, "step": 29743 }, { "epoch": 2.4095917044718083, "grad_norm": 0.07712215930223465, "learning_rate": 8.835681173770198e-05, "loss": 0.2421, "step": 29744 }, { "epoch": 2.4096727154893065, "grad_norm": 0.07213019579648972, "learning_rate": 8.835231108510734e-05, "loss": 0.2471, "step": 29745 }, { "epoch": 2.4097537265068047, "grad_norm": 0.06117299944162369, "learning_rate": 8.834781043251272e-05, "loss": 0.2534, "step": 29746 }, { "epoch": 2.4098347375243034, "grad_norm": 0.06271757185459137, "learning_rate": 8.83433097799181e-05, "loss": 0.2347, "step": 29747 }, { "epoch": 2.4099157485418017, "grad_norm": 0.060313113033771515, "learning_rate": 8.833880912732346e-05, "loss": 0.2103, "step": 29748 }, { "epoch": 2.4099967595593, "grad_norm": 0.0679052546620369, "learning_rate": 8.833430847472885e-05, "loss": 0.2536, "step": 29749 }, { "epoch": 2.4100777705767986, "grad_norm": 0.06643978506326675, "learning_rate": 8.832980782213422e-05, "loss": 0.2498, "step": 29750 }, { "epoch": 2.410158781594297, "grad_norm": 0.08351900428533554, "learning_rate": 8.832530716953958e-05, "loss": 0.3062, "step": 29751 }, { "epoch": 2.410239792611795, "grad_norm": 0.07076345384120941, "learning_rate": 8.832080651694497e-05, "loss": 0.2302, "step": 29752 }, { "epoch": 2.4103208036292934, "grad_norm": 0.06369180232286453, "learning_rate": 8.831630586435034e-05, "loss": 0.2396, "step": 29753 }, { "epoch": 2.410401814646792, "grad_norm": 0.07251781970262527, "learning_rate": 8.83118052117557e-05, "loss": 0.2806, "step": 29754 }, { "epoch": 2.4104828256642903, "grad_norm": 0.0614294707775116, "learning_rate": 8.830730455916109e-05, "loss": 0.2489, "step": 29755 }, { "epoch": 2.4105638366817885, "grad_norm": 0.060916539281606674, "learning_rate": 8.830280390656646e-05, "loss": 0.2722, "step": 29756 }, { "epoch": 2.410644847699287, "grad_norm": 0.05728588253259659, "learning_rate": 8.829830325397182e-05, "loss": 0.2451, "step": 29757 }, { "epoch": 2.4107258587167855, "grad_norm": 0.05679110437631607, "learning_rate": 8.829380260137721e-05, "loss": 0.2239, "step": 29758 }, { "epoch": 2.4108068697342837, "grad_norm": 0.0824250653386116, "learning_rate": 8.828930194878258e-05, "loss": 0.2978, "step": 29759 }, { "epoch": 2.4108878807517824, "grad_norm": 0.06265558302402496, "learning_rate": 8.828480129618794e-05, "loss": 0.2662, "step": 29760 }, { "epoch": 2.4109688917692806, "grad_norm": 0.058735158294439316, "learning_rate": 8.828030064359333e-05, "loss": 0.2146, "step": 29761 }, { "epoch": 2.411049902786779, "grad_norm": 0.06004401296377182, "learning_rate": 8.82757999909987e-05, "loss": 0.2509, "step": 29762 }, { "epoch": 2.4111309138042776, "grad_norm": 0.07412339746952057, "learning_rate": 8.827129933840406e-05, "loss": 0.327, "step": 29763 }, { "epoch": 2.411211924821776, "grad_norm": 0.0679435208439827, "learning_rate": 8.826679868580945e-05, "loss": 0.2932, "step": 29764 }, { "epoch": 2.411292935839274, "grad_norm": 0.07169094681739807, "learning_rate": 8.826229803321482e-05, "loss": 0.2314, "step": 29765 }, { "epoch": 2.4113739468567728, "grad_norm": 0.06179240345954895, "learning_rate": 8.82577973806202e-05, "loss": 0.2281, "step": 29766 }, { "epoch": 2.411454957874271, "grad_norm": 0.07233671098947525, "learning_rate": 8.825329672802557e-05, "loss": 0.2076, "step": 29767 }, { "epoch": 2.4115359688917692, "grad_norm": 0.0739409476518631, "learning_rate": 8.824879607543094e-05, "loss": 0.246, "step": 29768 }, { "epoch": 2.4116169799092675, "grad_norm": 0.06868897378444672, "learning_rate": 8.824429542283632e-05, "loss": 0.2497, "step": 29769 }, { "epoch": 2.411697990926766, "grad_norm": 0.06777679175138474, "learning_rate": 8.823979477024169e-05, "loss": 0.2261, "step": 29770 }, { "epoch": 2.4117790019442644, "grad_norm": 0.0738830640912056, "learning_rate": 8.823529411764706e-05, "loss": 0.2361, "step": 29771 }, { "epoch": 2.4118600129617627, "grad_norm": 0.06694016605615616, "learning_rate": 8.823079346505244e-05, "loss": 0.2583, "step": 29772 }, { "epoch": 2.4119410239792614, "grad_norm": 0.06757953763008118, "learning_rate": 8.822629281245781e-05, "loss": 0.2597, "step": 29773 }, { "epoch": 2.4120220349967596, "grad_norm": 0.07212909311056137, "learning_rate": 8.822179215986319e-05, "loss": 0.2619, "step": 29774 }, { "epoch": 2.412103046014258, "grad_norm": 0.06142732873558998, "learning_rate": 8.821729150726856e-05, "loss": 0.2064, "step": 29775 }, { "epoch": 2.412184057031756, "grad_norm": 0.06122400611639023, "learning_rate": 8.821279085467393e-05, "loss": 0.2589, "step": 29776 }, { "epoch": 2.412265068049255, "grad_norm": 0.06232326477766037, "learning_rate": 8.82082902020793e-05, "loss": 0.2328, "step": 29777 }, { "epoch": 2.412346079066753, "grad_norm": 0.07228163629770279, "learning_rate": 8.820378954948468e-05, "loss": 0.3051, "step": 29778 }, { "epoch": 2.4124270900842513, "grad_norm": 0.06454326212406158, "learning_rate": 8.819928889689005e-05, "loss": 0.2439, "step": 29779 }, { "epoch": 2.41250810110175, "grad_norm": 0.07410193979740143, "learning_rate": 8.819478824429543e-05, "loss": 0.3022, "step": 29780 }, { "epoch": 2.412589112119248, "grad_norm": 0.07694942504167557, "learning_rate": 8.81902875917008e-05, "loss": 0.2377, "step": 29781 }, { "epoch": 2.4126701231367464, "grad_norm": 0.07964512705802917, "learning_rate": 8.818578693910617e-05, "loss": 0.2537, "step": 29782 }, { "epoch": 2.412751134154245, "grad_norm": 0.054239723831415176, "learning_rate": 8.818128628651155e-05, "loss": 0.2466, "step": 29783 }, { "epoch": 2.4128321451717434, "grad_norm": 0.06153694540262222, "learning_rate": 8.817678563391692e-05, "loss": 0.2287, "step": 29784 }, { "epoch": 2.4129131561892416, "grad_norm": 0.07154665142297745, "learning_rate": 8.81722849813223e-05, "loss": 0.2932, "step": 29785 }, { "epoch": 2.4129941672067403, "grad_norm": 0.06164253503084183, "learning_rate": 8.816778432872767e-05, "loss": 0.2298, "step": 29786 }, { "epoch": 2.4130751782242386, "grad_norm": 0.05929383262991905, "learning_rate": 8.816328367613304e-05, "loss": 0.2465, "step": 29787 }, { "epoch": 2.413156189241737, "grad_norm": 0.05722380056977272, "learning_rate": 8.815878302353842e-05, "loss": 0.2338, "step": 29788 }, { "epoch": 2.4132372002592355, "grad_norm": 0.07813999801874161, "learning_rate": 8.815428237094379e-05, "loss": 0.2222, "step": 29789 }, { "epoch": 2.4133182112767337, "grad_norm": 0.061567552387714386, "learning_rate": 8.814978171834916e-05, "loss": 0.2316, "step": 29790 }, { "epoch": 2.413399222294232, "grad_norm": 0.06083657220005989, "learning_rate": 8.814528106575454e-05, "loss": 0.2198, "step": 29791 }, { "epoch": 2.4134802333117302, "grad_norm": 0.06548894196748734, "learning_rate": 8.814078041315991e-05, "loss": 0.2775, "step": 29792 }, { "epoch": 2.413561244329229, "grad_norm": 0.08378948271274567, "learning_rate": 8.813627976056528e-05, "loss": 0.3195, "step": 29793 }, { "epoch": 2.413642255346727, "grad_norm": 0.06670024245977402, "learning_rate": 8.813177910797066e-05, "loss": 0.2197, "step": 29794 }, { "epoch": 2.4137232663642254, "grad_norm": 0.05539132282137871, "learning_rate": 8.812727845537604e-05, "loss": 0.2427, "step": 29795 }, { "epoch": 2.413804277381724, "grad_norm": 0.07522717118263245, "learning_rate": 8.81227778027814e-05, "loss": 0.2701, "step": 29796 }, { "epoch": 2.4138852883992223, "grad_norm": 0.06489524245262146, "learning_rate": 8.811827715018678e-05, "loss": 0.2401, "step": 29797 }, { "epoch": 2.4139662994167206, "grad_norm": 0.06208113953471184, "learning_rate": 8.811377649759217e-05, "loss": 0.2158, "step": 29798 }, { "epoch": 2.414047310434219, "grad_norm": 0.06468240171670914, "learning_rate": 8.810927584499753e-05, "loss": 0.2597, "step": 29799 }, { "epoch": 2.4141283214517175, "grad_norm": 0.06483402848243713, "learning_rate": 8.81047751924029e-05, "loss": 0.2367, "step": 29800 }, { "epoch": 2.4142093324692158, "grad_norm": 0.07432875037193298, "learning_rate": 8.810027453980829e-05, "loss": 0.23, "step": 29801 }, { "epoch": 2.414290343486714, "grad_norm": 0.0824606642127037, "learning_rate": 8.809577388721365e-05, "loss": 0.2402, "step": 29802 }, { "epoch": 2.4143713545042127, "grad_norm": 0.061389729380607605, "learning_rate": 8.809127323461902e-05, "loss": 0.2312, "step": 29803 }, { "epoch": 2.414452365521711, "grad_norm": 0.06574756652116776, "learning_rate": 8.808677258202441e-05, "loss": 0.2729, "step": 29804 }, { "epoch": 2.414533376539209, "grad_norm": 0.06415622681379318, "learning_rate": 8.808227192942977e-05, "loss": 0.2467, "step": 29805 }, { "epoch": 2.414614387556708, "grad_norm": 0.06910507380962372, "learning_rate": 8.807777127683514e-05, "loss": 0.2529, "step": 29806 }, { "epoch": 2.414695398574206, "grad_norm": 0.07220354676246643, "learning_rate": 8.807327062424053e-05, "loss": 0.2353, "step": 29807 }, { "epoch": 2.4147764095917044, "grad_norm": 0.0628422200679779, "learning_rate": 8.806876997164589e-05, "loss": 0.2537, "step": 29808 }, { "epoch": 2.414857420609203, "grad_norm": 0.06969404965639114, "learning_rate": 8.806426931905126e-05, "loss": 0.2692, "step": 29809 }, { "epoch": 2.4149384316267013, "grad_norm": 0.061693429946899414, "learning_rate": 8.805976866645665e-05, "loss": 0.2453, "step": 29810 }, { "epoch": 2.4150194426441995, "grad_norm": 0.0579097718000412, "learning_rate": 8.805526801386201e-05, "loss": 0.2477, "step": 29811 }, { "epoch": 2.4151004536616982, "grad_norm": 0.07249568402767181, "learning_rate": 8.805076736126738e-05, "loss": 0.2195, "step": 29812 }, { "epoch": 2.4151814646791965, "grad_norm": 0.0828808918595314, "learning_rate": 8.804626670867277e-05, "loss": 0.2826, "step": 29813 }, { "epoch": 2.4152624756966947, "grad_norm": 0.07269842177629471, "learning_rate": 8.804176605607813e-05, "loss": 0.2963, "step": 29814 }, { "epoch": 2.415343486714193, "grad_norm": 0.06632715463638306, "learning_rate": 8.80372654034835e-05, "loss": 0.2514, "step": 29815 }, { "epoch": 2.4154244977316917, "grad_norm": 0.07053852081298828, "learning_rate": 8.803276475088889e-05, "loss": 0.2155, "step": 29816 }, { "epoch": 2.41550550874919, "grad_norm": 0.06813622266054153, "learning_rate": 8.802826409829425e-05, "loss": 0.2546, "step": 29817 }, { "epoch": 2.415586519766688, "grad_norm": 0.05975549668073654, "learning_rate": 8.802376344569962e-05, "loss": 0.2262, "step": 29818 }, { "epoch": 2.415667530784187, "grad_norm": 0.05883145332336426, "learning_rate": 8.801926279310501e-05, "loss": 0.2362, "step": 29819 }, { "epoch": 2.415748541801685, "grad_norm": 0.07348956912755966, "learning_rate": 8.801476214051037e-05, "loss": 0.2625, "step": 29820 }, { "epoch": 2.4158295528191833, "grad_norm": 0.06862062960863113, "learning_rate": 8.801026148791576e-05, "loss": 0.2435, "step": 29821 }, { "epoch": 2.4159105638366816, "grad_norm": 0.060579124838113785, "learning_rate": 8.800576083532113e-05, "loss": 0.2218, "step": 29822 }, { "epoch": 2.4159915748541803, "grad_norm": 0.06969986855983734, "learning_rate": 8.800126018272649e-05, "loss": 0.2312, "step": 29823 }, { "epoch": 2.4160725858716785, "grad_norm": 0.06263470649719238, "learning_rate": 8.799675953013188e-05, "loss": 0.2526, "step": 29824 }, { "epoch": 2.4161535968891767, "grad_norm": 0.0679418295621872, "learning_rate": 8.799225887753725e-05, "loss": 0.2384, "step": 29825 }, { "epoch": 2.4162346079066754, "grad_norm": 0.07143159955739975, "learning_rate": 8.798775822494261e-05, "loss": 0.236, "step": 29826 }, { "epoch": 2.4163156189241737, "grad_norm": 0.07788621634244919, "learning_rate": 8.7983257572348e-05, "loss": 0.2508, "step": 29827 }, { "epoch": 2.416396629941672, "grad_norm": 0.055590007454156876, "learning_rate": 8.797875691975337e-05, "loss": 0.2412, "step": 29828 }, { "epoch": 2.4164776409591706, "grad_norm": 0.07236363738775253, "learning_rate": 8.797425626715873e-05, "loss": 0.2574, "step": 29829 }, { "epoch": 2.416558651976669, "grad_norm": 0.05652404949069023, "learning_rate": 8.796975561456412e-05, "loss": 0.2436, "step": 29830 }, { "epoch": 2.416639662994167, "grad_norm": 0.06469497084617615, "learning_rate": 8.79652549619695e-05, "loss": 0.2439, "step": 29831 }, { "epoch": 2.416720674011666, "grad_norm": 0.0865524411201477, "learning_rate": 8.796075430937485e-05, "loss": 0.2599, "step": 29832 }, { "epoch": 2.416801685029164, "grad_norm": 0.06282461434602737, "learning_rate": 8.795625365678024e-05, "loss": 0.2414, "step": 29833 }, { "epoch": 2.4168826960466623, "grad_norm": 0.07091455161571503, "learning_rate": 8.795175300418562e-05, "loss": 0.2388, "step": 29834 }, { "epoch": 2.416963707064161, "grad_norm": 0.06994987279176712, "learning_rate": 8.794725235159098e-05, "loss": 0.2325, "step": 29835 }, { "epoch": 2.417044718081659, "grad_norm": 0.0654347687959671, "learning_rate": 8.794275169899636e-05, "loss": 0.2666, "step": 29836 }, { "epoch": 2.4171257290991575, "grad_norm": 0.06742554903030396, "learning_rate": 8.793825104640174e-05, "loss": 0.2468, "step": 29837 }, { "epoch": 2.4172067401166557, "grad_norm": 0.06697697937488556, "learning_rate": 8.79337503938071e-05, "loss": 0.2861, "step": 29838 }, { "epoch": 2.4172877511341544, "grad_norm": 0.06529314070940018, "learning_rate": 8.792924974121248e-05, "loss": 0.2546, "step": 29839 }, { "epoch": 2.4173687621516526, "grad_norm": 0.05987263470888138, "learning_rate": 8.792474908861786e-05, "loss": 0.2414, "step": 29840 }, { "epoch": 2.417449773169151, "grad_norm": 0.06584987789392471, "learning_rate": 8.792024843602322e-05, "loss": 0.2776, "step": 29841 }, { "epoch": 2.4175307841866496, "grad_norm": 0.048123326152563095, "learning_rate": 8.79157477834286e-05, "loss": 0.2081, "step": 29842 }, { "epoch": 2.417611795204148, "grad_norm": 0.06386002898216248, "learning_rate": 8.791124713083398e-05, "loss": 0.2184, "step": 29843 }, { "epoch": 2.417692806221646, "grad_norm": 0.08389604091644287, "learning_rate": 8.790674647823934e-05, "loss": 0.264, "step": 29844 }, { "epoch": 2.4177738172391443, "grad_norm": 0.07809042185544968, "learning_rate": 8.790224582564472e-05, "loss": 0.2656, "step": 29845 }, { "epoch": 2.417854828256643, "grad_norm": 0.05657443404197693, "learning_rate": 8.78977451730501e-05, "loss": 0.1997, "step": 29846 }, { "epoch": 2.4179358392741412, "grad_norm": 0.05614442378282547, "learning_rate": 8.789324452045547e-05, "loss": 0.2572, "step": 29847 }, { "epoch": 2.4180168502916395, "grad_norm": 0.07657313346862793, "learning_rate": 8.788874386786085e-05, "loss": 0.256, "step": 29848 }, { "epoch": 2.418097861309138, "grad_norm": 0.07188082486391068, "learning_rate": 8.788424321526622e-05, "loss": 0.2519, "step": 29849 }, { "epoch": 2.4181788723266364, "grad_norm": 0.06149272248148918, "learning_rate": 8.787974256267159e-05, "loss": 0.2172, "step": 29850 }, { "epoch": 2.4182598833441347, "grad_norm": 0.08759336918592453, "learning_rate": 8.787524191007697e-05, "loss": 0.2777, "step": 29851 }, { "epoch": 2.4183408943616334, "grad_norm": 0.08040926605463028, "learning_rate": 8.787074125748234e-05, "loss": 0.2624, "step": 29852 }, { "epoch": 2.4184219053791316, "grad_norm": 0.06956743448972702, "learning_rate": 8.786624060488771e-05, "loss": 0.2582, "step": 29853 }, { "epoch": 2.41850291639663, "grad_norm": 0.07439576834440231, "learning_rate": 8.786173995229309e-05, "loss": 0.2626, "step": 29854 }, { "epoch": 2.4185839274141285, "grad_norm": 0.06943316757678986, "learning_rate": 8.785723929969846e-05, "loss": 0.2056, "step": 29855 }, { "epoch": 2.4186649384316268, "grad_norm": 0.06287401914596558, "learning_rate": 8.785273864710383e-05, "loss": 0.2195, "step": 29856 }, { "epoch": 2.418745949449125, "grad_norm": 0.062373608350753784, "learning_rate": 8.784823799450921e-05, "loss": 0.216, "step": 29857 }, { "epoch": 2.4188269604666237, "grad_norm": 0.05923473834991455, "learning_rate": 8.784373734191458e-05, "loss": 0.2179, "step": 29858 }, { "epoch": 2.418907971484122, "grad_norm": 0.06392458826303482, "learning_rate": 8.783923668931996e-05, "loss": 0.2229, "step": 29859 }, { "epoch": 2.41898898250162, "grad_norm": 0.06252685189247131, "learning_rate": 8.783473603672533e-05, "loss": 0.2176, "step": 29860 }, { "epoch": 2.4190699935191184, "grad_norm": 0.06143161281943321, "learning_rate": 8.78302353841307e-05, "loss": 0.2343, "step": 29861 }, { "epoch": 2.419151004536617, "grad_norm": 0.06481669098138809, "learning_rate": 8.782573473153608e-05, "loss": 0.2392, "step": 29862 }, { "epoch": 2.4192320155541154, "grad_norm": 0.07333081215620041, "learning_rate": 8.782123407894145e-05, "loss": 0.2721, "step": 29863 }, { "epoch": 2.4193130265716136, "grad_norm": 0.06324837356805801, "learning_rate": 8.781673342634682e-05, "loss": 0.2464, "step": 29864 }, { "epoch": 2.4193940375891123, "grad_norm": 0.07487185299396515, "learning_rate": 8.78122327737522e-05, "loss": 0.2536, "step": 29865 }, { "epoch": 2.4194750486066106, "grad_norm": 0.06082228943705559, "learning_rate": 8.780773212115757e-05, "loss": 0.2173, "step": 29866 }, { "epoch": 2.419556059624109, "grad_norm": 0.07401525974273682, "learning_rate": 8.780323146856294e-05, "loss": 0.2594, "step": 29867 }, { "epoch": 2.419637070641607, "grad_norm": 0.07237707078456879, "learning_rate": 8.779873081596832e-05, "loss": 0.2409, "step": 29868 }, { "epoch": 2.4197180816591057, "grad_norm": 0.06320221722126007, "learning_rate": 8.779423016337369e-05, "loss": 0.2408, "step": 29869 }, { "epoch": 2.419799092676604, "grad_norm": 0.07496561110019684, "learning_rate": 8.778972951077906e-05, "loss": 0.2456, "step": 29870 }, { "epoch": 2.4198801036941022, "grad_norm": 0.07235972583293915, "learning_rate": 8.778522885818444e-05, "loss": 0.245, "step": 29871 }, { "epoch": 2.419961114711601, "grad_norm": 0.0759521946310997, "learning_rate": 8.778072820558981e-05, "loss": 0.2589, "step": 29872 }, { "epoch": 2.420042125729099, "grad_norm": 0.07334791123867035, "learning_rate": 8.77762275529952e-05, "loss": 0.3019, "step": 29873 }, { "epoch": 2.4201231367465974, "grad_norm": 0.06675311177968979, "learning_rate": 8.777172690040056e-05, "loss": 0.2646, "step": 29874 }, { "epoch": 2.420204147764096, "grad_norm": 0.0692124143242836, "learning_rate": 8.776722624780593e-05, "loss": 0.2403, "step": 29875 }, { "epoch": 2.4202851587815943, "grad_norm": 0.06959296762943268, "learning_rate": 8.776272559521132e-05, "loss": 0.2203, "step": 29876 }, { "epoch": 2.4203661697990926, "grad_norm": 0.07304505258798599, "learning_rate": 8.775822494261668e-05, "loss": 0.2476, "step": 29877 }, { "epoch": 2.4204471808165913, "grad_norm": 0.07309460639953613, "learning_rate": 8.775372429002205e-05, "loss": 0.2791, "step": 29878 }, { "epoch": 2.4205281918340895, "grad_norm": 0.06610136479139328, "learning_rate": 8.774922363742744e-05, "loss": 0.282, "step": 29879 }, { "epoch": 2.4206092028515878, "grad_norm": 0.07905296981334686, "learning_rate": 8.77447229848328e-05, "loss": 0.2449, "step": 29880 }, { "epoch": 2.420690213869086, "grad_norm": 0.06679189205169678, "learning_rate": 8.774022233223817e-05, "loss": 0.2395, "step": 29881 }, { "epoch": 2.4207712248865847, "grad_norm": 0.07395980507135391, "learning_rate": 8.773572167964356e-05, "loss": 0.3036, "step": 29882 }, { "epoch": 2.420852235904083, "grad_norm": 0.07605738937854767, "learning_rate": 8.773122102704892e-05, "loss": 0.2484, "step": 29883 }, { "epoch": 2.420933246921581, "grad_norm": 0.05466524511575699, "learning_rate": 8.77267203744543e-05, "loss": 0.2249, "step": 29884 }, { "epoch": 2.42101425793908, "grad_norm": 0.058822643011808395, "learning_rate": 8.772221972185968e-05, "loss": 0.2559, "step": 29885 }, { "epoch": 2.421095268956578, "grad_norm": 0.07392793893814087, "learning_rate": 8.771771906926504e-05, "loss": 0.2545, "step": 29886 }, { "epoch": 2.4211762799740764, "grad_norm": 0.05999179184436798, "learning_rate": 8.771321841667042e-05, "loss": 0.2357, "step": 29887 }, { "epoch": 2.4212572909915746, "grad_norm": 0.06972895562648773, "learning_rate": 8.77087177640758e-05, "loss": 0.2264, "step": 29888 }, { "epoch": 2.4213383020090733, "grad_norm": 0.06538040190935135, "learning_rate": 8.770421711148116e-05, "loss": 0.1909, "step": 29889 }, { "epoch": 2.4214193130265715, "grad_norm": 0.05858279764652252, "learning_rate": 8.769971645888654e-05, "loss": 0.2409, "step": 29890 }, { "epoch": 2.42150032404407, "grad_norm": 0.06701955199241638, "learning_rate": 8.769521580629192e-05, "loss": 0.268, "step": 29891 }, { "epoch": 2.4215813350615685, "grad_norm": 0.06543616205453873, "learning_rate": 8.769071515369728e-05, "loss": 0.2356, "step": 29892 }, { "epoch": 2.4216623460790667, "grad_norm": 0.08259650319814682, "learning_rate": 8.768621450110266e-05, "loss": 0.2182, "step": 29893 }, { "epoch": 2.421743357096565, "grad_norm": 0.067936010658741, "learning_rate": 8.768171384850804e-05, "loss": 0.2435, "step": 29894 }, { "epoch": 2.4218243681140637, "grad_norm": 0.0737324133515358, "learning_rate": 8.76772131959134e-05, "loss": 0.2154, "step": 29895 }, { "epoch": 2.421905379131562, "grad_norm": 0.05419429764151573, "learning_rate": 8.767271254331878e-05, "loss": 0.2825, "step": 29896 }, { "epoch": 2.42198639014906, "grad_norm": 0.07466893643140793, "learning_rate": 8.766821189072417e-05, "loss": 0.2347, "step": 29897 }, { "epoch": 2.422067401166559, "grad_norm": 0.06078220158815384, "learning_rate": 8.766371123812953e-05, "loss": 0.2399, "step": 29898 }, { "epoch": 2.422148412184057, "grad_norm": 0.06347761303186417, "learning_rate": 8.765921058553491e-05, "loss": 0.2375, "step": 29899 }, { "epoch": 2.4222294232015553, "grad_norm": 0.07001287490129471, "learning_rate": 8.765470993294029e-05, "loss": 0.2417, "step": 29900 }, { "epoch": 2.422310434219054, "grad_norm": 0.05894186720252037, "learning_rate": 8.765020928034565e-05, "loss": 0.2496, "step": 29901 }, { "epoch": 2.4223914452365523, "grad_norm": 0.05822170153260231, "learning_rate": 8.764570862775103e-05, "loss": 0.2584, "step": 29902 }, { "epoch": 2.4224724562540505, "grad_norm": 0.060905635356903076, "learning_rate": 8.764120797515641e-05, "loss": 0.2282, "step": 29903 }, { "epoch": 2.4225534672715487, "grad_norm": 0.06173749268054962, "learning_rate": 8.763670732256177e-05, "loss": 0.2173, "step": 29904 }, { "epoch": 2.4226344782890474, "grad_norm": 0.07445482164621353, "learning_rate": 8.763220666996715e-05, "loss": 0.2503, "step": 29905 }, { "epoch": 2.4227154893065457, "grad_norm": 0.07381244748830795, "learning_rate": 8.762770601737253e-05, "loss": 0.2464, "step": 29906 }, { "epoch": 2.422796500324044, "grad_norm": 0.06338288635015488, "learning_rate": 8.762320536477789e-05, "loss": 0.2549, "step": 29907 }, { "epoch": 2.4228775113415426, "grad_norm": 0.0717942863702774, "learning_rate": 8.761870471218328e-05, "loss": 0.2664, "step": 29908 }, { "epoch": 2.422958522359041, "grad_norm": 0.0670522078871727, "learning_rate": 8.761420405958865e-05, "loss": 0.2832, "step": 29909 }, { "epoch": 2.423039533376539, "grad_norm": 0.07442935556173325, "learning_rate": 8.760970340699401e-05, "loss": 0.2276, "step": 29910 }, { "epoch": 2.4231205443940373, "grad_norm": 0.07393119484186172, "learning_rate": 8.76052027543994e-05, "loss": 0.244, "step": 29911 }, { "epoch": 2.423201555411536, "grad_norm": 0.06407365202903748, "learning_rate": 8.760070210180477e-05, "loss": 0.229, "step": 29912 }, { "epoch": 2.4232825664290343, "grad_norm": 0.07595523446798325, "learning_rate": 8.759620144921013e-05, "loss": 0.2782, "step": 29913 }, { "epoch": 2.4233635774465325, "grad_norm": 0.06496904790401459, "learning_rate": 8.759170079661552e-05, "loss": 0.2289, "step": 29914 }, { "epoch": 2.423444588464031, "grad_norm": 0.07434480637311935, "learning_rate": 8.758720014402089e-05, "loss": 0.2359, "step": 29915 }, { "epoch": 2.4235255994815295, "grad_norm": 0.07025157660245895, "learning_rate": 8.758269949142625e-05, "loss": 0.2308, "step": 29916 }, { "epoch": 2.4236066104990277, "grad_norm": 0.07805941253900528, "learning_rate": 8.757819883883164e-05, "loss": 0.2323, "step": 29917 }, { "epoch": 2.4236876215165264, "grad_norm": 0.06584879755973816, "learning_rate": 8.757369818623701e-05, "loss": 0.2444, "step": 29918 }, { "epoch": 2.4237686325340246, "grad_norm": 0.0628671944141388, "learning_rate": 8.756919753364237e-05, "loss": 0.2402, "step": 29919 }, { "epoch": 2.423849643551523, "grad_norm": 0.07205840945243835, "learning_rate": 8.756469688104776e-05, "loss": 0.2556, "step": 29920 }, { "epoch": 2.4239306545690216, "grad_norm": 0.06968038529157639, "learning_rate": 8.756019622845313e-05, "loss": 0.2782, "step": 29921 }, { "epoch": 2.42401166558652, "grad_norm": 0.077177993953228, "learning_rate": 8.755569557585849e-05, "loss": 0.2601, "step": 29922 }, { "epoch": 2.424092676604018, "grad_norm": 0.06498174369335175, "learning_rate": 8.755119492326388e-05, "loss": 0.2192, "step": 29923 }, { "epoch": 2.4241736876215167, "grad_norm": 0.06896430999040604, "learning_rate": 8.754669427066925e-05, "loss": 0.2517, "step": 29924 }, { "epoch": 2.424254698639015, "grad_norm": 0.07223816961050034, "learning_rate": 8.754219361807463e-05, "loss": 0.2318, "step": 29925 }, { "epoch": 2.4243357096565132, "grad_norm": 0.06818022578954697, "learning_rate": 8.753769296548e-05, "loss": 0.2928, "step": 29926 }, { "epoch": 2.4244167206740115, "grad_norm": 0.06415341049432755, "learning_rate": 8.753319231288537e-05, "loss": 0.2132, "step": 29927 }, { "epoch": 2.42449773169151, "grad_norm": 0.07759521901607513, "learning_rate": 8.752869166029075e-05, "loss": 0.2623, "step": 29928 }, { "epoch": 2.4245787427090084, "grad_norm": 0.06272358447313309, "learning_rate": 8.752419100769612e-05, "loss": 0.25, "step": 29929 }, { "epoch": 2.4246597537265067, "grad_norm": 0.060498110949993134, "learning_rate": 8.75196903551015e-05, "loss": 0.2399, "step": 29930 }, { "epoch": 2.4247407647440054, "grad_norm": 0.07703352719545364, "learning_rate": 8.751518970250687e-05, "loss": 0.2812, "step": 29931 }, { "epoch": 2.4248217757615036, "grad_norm": 0.06761782616376877, "learning_rate": 8.751068904991224e-05, "loss": 0.283, "step": 29932 }, { "epoch": 2.424902786779002, "grad_norm": 0.06330965459346771, "learning_rate": 8.750618839731762e-05, "loss": 0.2472, "step": 29933 }, { "epoch": 2.4249837977965, "grad_norm": 0.056882016360759735, "learning_rate": 8.750168774472299e-05, "loss": 0.2416, "step": 29934 }, { "epoch": 2.4250648088139988, "grad_norm": 0.06899707019329071, "learning_rate": 8.749718709212836e-05, "loss": 0.2348, "step": 29935 }, { "epoch": 2.425145819831497, "grad_norm": 0.07171101123094559, "learning_rate": 8.749268643953374e-05, "loss": 0.2216, "step": 29936 }, { "epoch": 2.4252268308489953, "grad_norm": 0.06139335408806801, "learning_rate": 8.748818578693911e-05, "loss": 0.2235, "step": 29937 }, { "epoch": 2.425307841866494, "grad_norm": 0.07158593833446503, "learning_rate": 8.748368513434448e-05, "loss": 0.2672, "step": 29938 }, { "epoch": 2.425388852883992, "grad_norm": 0.055078309029340744, "learning_rate": 8.747918448174986e-05, "loss": 0.2161, "step": 29939 }, { "epoch": 2.4254698639014904, "grad_norm": 0.05247804895043373, "learning_rate": 8.747468382915523e-05, "loss": 0.2266, "step": 29940 }, { "epoch": 2.425550874918989, "grad_norm": 0.06628144532442093, "learning_rate": 8.74701831765606e-05, "loss": 0.2305, "step": 29941 }, { "epoch": 2.4256318859364874, "grad_norm": 0.07619509100914001, "learning_rate": 8.746568252396598e-05, "loss": 0.2846, "step": 29942 }, { "epoch": 2.4257128969539856, "grad_norm": 0.06378401070833206, "learning_rate": 8.746118187137135e-05, "loss": 0.2399, "step": 29943 }, { "epoch": 2.4257939079714843, "grad_norm": 0.0780458077788353, "learning_rate": 8.745668121877673e-05, "loss": 0.266, "step": 29944 }, { "epoch": 2.4258749189889826, "grad_norm": 0.06926210969686508, "learning_rate": 8.74521805661821e-05, "loss": 0.2581, "step": 29945 }, { "epoch": 2.425955930006481, "grad_norm": 0.06663671135902405, "learning_rate": 8.744767991358747e-05, "loss": 0.2038, "step": 29946 }, { "epoch": 2.4260369410239795, "grad_norm": 0.06778295338153839, "learning_rate": 8.744317926099285e-05, "loss": 0.2474, "step": 29947 }, { "epoch": 2.4261179520414777, "grad_norm": 0.08452882617712021, "learning_rate": 8.743867860839822e-05, "loss": 0.2713, "step": 29948 }, { "epoch": 2.426198963058976, "grad_norm": 0.07399019598960876, "learning_rate": 8.743417795580359e-05, "loss": 0.2588, "step": 29949 }, { "epoch": 2.426279974076474, "grad_norm": 0.06832026690244675, "learning_rate": 8.742967730320897e-05, "loss": 0.245, "step": 29950 }, { "epoch": 2.426360985093973, "grad_norm": 0.06566408276557922, "learning_rate": 8.742517665061435e-05, "loss": 0.2204, "step": 29951 }, { "epoch": 2.426441996111471, "grad_norm": 0.06541307270526886, "learning_rate": 8.742067599801971e-05, "loss": 0.308, "step": 29952 }, { "epoch": 2.4265230071289694, "grad_norm": 0.07913939654827118, "learning_rate": 8.741617534542509e-05, "loss": 0.2746, "step": 29953 }, { "epoch": 2.426604018146468, "grad_norm": 0.07623070478439331, "learning_rate": 8.741167469283047e-05, "loss": 0.2766, "step": 29954 }, { "epoch": 2.4266850291639663, "grad_norm": 0.06193844974040985, "learning_rate": 8.740717404023583e-05, "loss": 0.2231, "step": 29955 }, { "epoch": 2.4267660401814646, "grad_norm": 0.07710251212120056, "learning_rate": 8.740267338764121e-05, "loss": 0.2649, "step": 29956 }, { "epoch": 2.426847051198963, "grad_norm": 0.06773968040943146, "learning_rate": 8.73981727350466e-05, "loss": 0.2572, "step": 29957 }, { "epoch": 2.4269280622164615, "grad_norm": 0.05227114260196686, "learning_rate": 8.739367208245196e-05, "loss": 0.2524, "step": 29958 }, { "epoch": 2.4270090732339598, "grad_norm": 0.06641446053981781, "learning_rate": 8.738917142985733e-05, "loss": 0.2518, "step": 29959 }, { "epoch": 2.427090084251458, "grad_norm": 0.07579101622104645, "learning_rate": 8.738467077726272e-05, "loss": 0.2628, "step": 29960 }, { "epoch": 2.4271710952689567, "grad_norm": 0.057052165269851685, "learning_rate": 8.738017012466808e-05, "loss": 0.1979, "step": 29961 }, { "epoch": 2.427252106286455, "grad_norm": 0.063960500061512, "learning_rate": 8.737566947207345e-05, "loss": 0.2349, "step": 29962 }, { "epoch": 2.427333117303953, "grad_norm": 0.056935809552669525, "learning_rate": 8.737116881947884e-05, "loss": 0.2307, "step": 29963 }, { "epoch": 2.427414128321452, "grad_norm": 0.08629359304904938, "learning_rate": 8.73666681668842e-05, "loss": 0.2795, "step": 29964 }, { "epoch": 2.42749513933895, "grad_norm": 0.06306321918964386, "learning_rate": 8.736216751428957e-05, "loss": 0.2237, "step": 29965 }, { "epoch": 2.4275761503564484, "grad_norm": 0.06393168121576309, "learning_rate": 8.735766686169496e-05, "loss": 0.2537, "step": 29966 }, { "epoch": 2.427657161373947, "grad_norm": 0.06578171998262405, "learning_rate": 8.735316620910032e-05, "loss": 0.2718, "step": 29967 }, { "epoch": 2.4277381723914453, "grad_norm": 0.07443420588970184, "learning_rate": 8.734866555650569e-05, "loss": 0.2344, "step": 29968 }, { "epoch": 2.4278191834089435, "grad_norm": 0.07351133972406387, "learning_rate": 8.734416490391108e-05, "loss": 0.2356, "step": 29969 }, { "epoch": 2.4279001944264422, "grad_norm": 0.07358096539974213, "learning_rate": 8.733966425131644e-05, "loss": 0.2264, "step": 29970 }, { "epoch": 2.4279812054439405, "grad_norm": 0.07105661183595657, "learning_rate": 8.733516359872181e-05, "loss": 0.2398, "step": 29971 }, { "epoch": 2.4280622164614387, "grad_norm": 0.0635269358754158, "learning_rate": 8.73306629461272e-05, "loss": 0.2536, "step": 29972 }, { "epoch": 2.428143227478937, "grad_norm": 0.07085049897432327, "learning_rate": 8.732616229353256e-05, "loss": 0.2625, "step": 29973 }, { "epoch": 2.4282242384964356, "grad_norm": 0.06909903883934021, "learning_rate": 8.732166164093793e-05, "loss": 0.2326, "step": 29974 }, { "epoch": 2.428305249513934, "grad_norm": 0.06062382087111473, "learning_rate": 8.731716098834332e-05, "loss": 0.2277, "step": 29975 }, { "epoch": 2.428386260531432, "grad_norm": 0.07726830244064331, "learning_rate": 8.731266033574868e-05, "loss": 0.248, "step": 29976 }, { "epoch": 2.428467271548931, "grad_norm": 0.07095228880643845, "learning_rate": 8.730815968315405e-05, "loss": 0.2711, "step": 29977 }, { "epoch": 2.428548282566429, "grad_norm": 0.07358419895172119, "learning_rate": 8.730365903055944e-05, "loss": 0.2577, "step": 29978 }, { "epoch": 2.4286292935839273, "grad_norm": 0.0734160989522934, "learning_rate": 8.72991583779648e-05, "loss": 0.2313, "step": 29979 }, { "epoch": 2.4287103046014256, "grad_norm": 0.07221377640962601, "learning_rate": 8.729465772537019e-05, "loss": 0.2388, "step": 29980 }, { "epoch": 2.4287913156189243, "grad_norm": 0.07227525860071182, "learning_rate": 8.729015707277556e-05, "loss": 0.2619, "step": 29981 }, { "epoch": 2.4288723266364225, "grad_norm": 0.06419933587312698, "learning_rate": 8.728565642018092e-05, "loss": 0.2373, "step": 29982 }, { "epoch": 2.4289533376539207, "grad_norm": 0.07368076592683792, "learning_rate": 8.728115576758631e-05, "loss": 0.2541, "step": 29983 }, { "epoch": 2.4290343486714194, "grad_norm": 0.06805281341075897, "learning_rate": 8.727665511499168e-05, "loss": 0.2301, "step": 29984 }, { "epoch": 2.4291153596889177, "grad_norm": 0.0686119869351387, "learning_rate": 8.727215446239704e-05, "loss": 0.2327, "step": 29985 }, { "epoch": 2.429196370706416, "grad_norm": 0.073693186044693, "learning_rate": 8.726765380980243e-05, "loss": 0.2879, "step": 29986 }, { "epoch": 2.4292773817239146, "grad_norm": 0.07163406908512115, "learning_rate": 8.72631531572078e-05, "loss": 0.2568, "step": 29987 }, { "epoch": 2.429358392741413, "grad_norm": 0.08275744318962097, "learning_rate": 8.725865250461316e-05, "loss": 0.2245, "step": 29988 }, { "epoch": 2.429439403758911, "grad_norm": 0.06841978430747986, "learning_rate": 8.725415185201855e-05, "loss": 0.2633, "step": 29989 }, { "epoch": 2.42952041477641, "grad_norm": 0.05534449964761734, "learning_rate": 8.724965119942392e-05, "loss": 0.2196, "step": 29990 }, { "epoch": 2.429601425793908, "grad_norm": 0.05714098736643791, "learning_rate": 8.724515054682928e-05, "loss": 0.2595, "step": 29991 }, { "epoch": 2.4296824368114063, "grad_norm": 0.07040819525718689, "learning_rate": 8.724064989423467e-05, "loss": 0.2937, "step": 29992 }, { "epoch": 2.429763447828905, "grad_norm": 0.05960967391729355, "learning_rate": 8.723614924164005e-05, "loss": 0.2393, "step": 29993 }, { "epoch": 2.429844458846403, "grad_norm": 0.05933735892176628, "learning_rate": 8.72316485890454e-05, "loss": 0.2062, "step": 29994 }, { "epoch": 2.4299254698639015, "grad_norm": 0.06407492607831955, "learning_rate": 8.722714793645079e-05, "loss": 0.2333, "step": 29995 }, { "epoch": 2.4300064808813997, "grad_norm": 0.0534355528652668, "learning_rate": 8.722264728385617e-05, "loss": 0.2148, "step": 29996 }, { "epoch": 2.4300874918988984, "grad_norm": 0.06425287574529648, "learning_rate": 8.721814663126153e-05, "loss": 0.2585, "step": 29997 }, { "epoch": 2.4301685029163966, "grad_norm": 0.0652540922164917, "learning_rate": 8.721364597866691e-05, "loss": 0.2127, "step": 29998 }, { "epoch": 2.430249513933895, "grad_norm": 0.06726215779781342, "learning_rate": 8.720914532607229e-05, "loss": 0.232, "step": 29999 }, { "epoch": 2.4303305249513936, "grad_norm": 0.06891316175460815, "learning_rate": 8.720464467347765e-05, "loss": 0.2624, "step": 30000 }, { "epoch": 2.430411535968892, "grad_norm": 0.0784035250544548, "learning_rate": 8.720014402088303e-05, "loss": 0.2833, "step": 30001 }, { "epoch": 2.43049254698639, "grad_norm": 0.07090424746274948, "learning_rate": 8.719564336828841e-05, "loss": 0.2373, "step": 30002 }, { "epoch": 2.4305735580038883, "grad_norm": 0.07066990435123444, "learning_rate": 8.719114271569377e-05, "loss": 0.2364, "step": 30003 }, { "epoch": 2.430654569021387, "grad_norm": 0.07360489666461945, "learning_rate": 8.718664206309915e-05, "loss": 0.2622, "step": 30004 }, { "epoch": 2.4307355800388852, "grad_norm": 0.07157068699598312, "learning_rate": 8.718214141050453e-05, "loss": 0.251, "step": 30005 }, { "epoch": 2.4308165910563835, "grad_norm": 0.06780479848384857, "learning_rate": 8.71776407579099e-05, "loss": 0.2558, "step": 30006 }, { "epoch": 2.430897602073882, "grad_norm": 0.06623739749193192, "learning_rate": 8.717314010531528e-05, "loss": 0.2593, "step": 30007 }, { "epoch": 2.4309786130913804, "grad_norm": 0.06277523189783096, "learning_rate": 8.716863945272065e-05, "loss": 0.229, "step": 30008 }, { "epoch": 2.4310596241088787, "grad_norm": 0.0608900710940361, "learning_rate": 8.716413880012602e-05, "loss": 0.2153, "step": 30009 }, { "epoch": 2.4311406351263773, "grad_norm": 0.07124532759189606, "learning_rate": 8.71596381475314e-05, "loss": 0.2302, "step": 30010 }, { "epoch": 2.4312216461438756, "grad_norm": 0.07467015087604523, "learning_rate": 8.715513749493677e-05, "loss": 0.2531, "step": 30011 }, { "epoch": 2.431302657161374, "grad_norm": 0.06628242880105972, "learning_rate": 8.715063684234214e-05, "loss": 0.2381, "step": 30012 }, { "epoch": 2.4313836681788725, "grad_norm": 0.07032094150781631, "learning_rate": 8.714613618974752e-05, "loss": 0.279, "step": 30013 }, { "epoch": 2.4314646791963708, "grad_norm": 0.07590150833129883, "learning_rate": 8.714163553715289e-05, "loss": 0.2673, "step": 30014 }, { "epoch": 2.431545690213869, "grad_norm": 0.08777482807636261, "learning_rate": 8.713713488455826e-05, "loss": 0.2624, "step": 30015 }, { "epoch": 2.4316267012313677, "grad_norm": 0.07249364256858826, "learning_rate": 8.713263423196364e-05, "loss": 0.3, "step": 30016 }, { "epoch": 2.431707712248866, "grad_norm": 0.07197359204292297, "learning_rate": 8.712813357936901e-05, "loss": 0.2389, "step": 30017 }, { "epoch": 2.431788723266364, "grad_norm": 0.07103599607944489, "learning_rate": 8.712363292677439e-05, "loss": 0.2419, "step": 30018 }, { "epoch": 2.4318697342838624, "grad_norm": 0.057174891233444214, "learning_rate": 8.711913227417976e-05, "loss": 0.2458, "step": 30019 }, { "epoch": 2.431950745301361, "grad_norm": 0.08173391968011856, "learning_rate": 8.711463162158513e-05, "loss": 0.246, "step": 30020 }, { "epoch": 2.4320317563188594, "grad_norm": 0.08192049711942673, "learning_rate": 8.71101309689905e-05, "loss": 0.263, "step": 30021 }, { "epoch": 2.4321127673363576, "grad_norm": 0.07564476132392883, "learning_rate": 8.710563031639588e-05, "loss": 0.2261, "step": 30022 }, { "epoch": 2.4321937783538563, "grad_norm": 0.07592171430587769, "learning_rate": 8.710112966380125e-05, "loss": 0.287, "step": 30023 }, { "epoch": 2.4322747893713546, "grad_norm": 0.05942939221858978, "learning_rate": 8.709662901120663e-05, "loss": 0.2677, "step": 30024 }, { "epoch": 2.432355800388853, "grad_norm": 0.07345996797084808, "learning_rate": 8.7092128358612e-05, "loss": 0.259, "step": 30025 }, { "epoch": 2.432436811406351, "grad_norm": 0.0635913833975792, "learning_rate": 8.708762770601737e-05, "loss": 0.2466, "step": 30026 }, { "epoch": 2.4325178224238497, "grad_norm": 0.06319551914930344, "learning_rate": 8.708312705342275e-05, "loss": 0.263, "step": 30027 }, { "epoch": 2.432598833441348, "grad_norm": 0.06326335668563843, "learning_rate": 8.707862640082812e-05, "loss": 0.2751, "step": 30028 }, { "epoch": 2.432679844458846, "grad_norm": 0.061082255095243454, "learning_rate": 8.70741257482335e-05, "loss": 0.2286, "step": 30029 }, { "epoch": 2.432760855476345, "grad_norm": 0.0597209595143795, "learning_rate": 8.706962509563887e-05, "loss": 0.2292, "step": 30030 }, { "epoch": 2.432841866493843, "grad_norm": 0.0566350482404232, "learning_rate": 8.706512444304424e-05, "loss": 0.262, "step": 30031 }, { "epoch": 2.4329228775113414, "grad_norm": 0.06501947343349457, "learning_rate": 8.706062379044963e-05, "loss": 0.2317, "step": 30032 }, { "epoch": 2.43300388852884, "grad_norm": 0.07468343526124954, "learning_rate": 8.705612313785499e-05, "loss": 0.2716, "step": 30033 }, { "epoch": 2.4330848995463383, "grad_norm": 0.06104396656155586, "learning_rate": 8.705162248526036e-05, "loss": 0.3045, "step": 30034 }, { "epoch": 2.4331659105638366, "grad_norm": 0.06956176459789276, "learning_rate": 8.704712183266575e-05, "loss": 0.257, "step": 30035 }, { "epoch": 2.4332469215813353, "grad_norm": 0.05189737305045128, "learning_rate": 8.704262118007111e-05, "loss": 0.2166, "step": 30036 }, { "epoch": 2.4333279325988335, "grad_norm": 0.0675140991806984, "learning_rate": 8.703812052747648e-05, "loss": 0.2814, "step": 30037 }, { "epoch": 2.4334089436163318, "grad_norm": 0.04953811690211296, "learning_rate": 8.703361987488187e-05, "loss": 0.2595, "step": 30038 }, { "epoch": 2.4334899546338304, "grad_norm": 0.07347162067890167, "learning_rate": 8.702911922228723e-05, "loss": 0.2455, "step": 30039 }, { "epoch": 2.4335709656513287, "grad_norm": 0.053286418318748474, "learning_rate": 8.70246185696926e-05, "loss": 0.2425, "step": 30040 }, { "epoch": 2.433651976668827, "grad_norm": 0.06523050367832184, "learning_rate": 8.702011791709799e-05, "loss": 0.2209, "step": 30041 }, { "epoch": 2.433732987686325, "grad_norm": 0.05891433730721474, "learning_rate": 8.701561726450335e-05, "loss": 0.2575, "step": 30042 }, { "epoch": 2.433813998703824, "grad_norm": 0.08087577670812607, "learning_rate": 8.701111661190873e-05, "loss": 0.2569, "step": 30043 }, { "epoch": 2.433895009721322, "grad_norm": 0.06858525425195694, "learning_rate": 8.700661595931411e-05, "loss": 0.2464, "step": 30044 }, { "epoch": 2.4339760207388204, "grad_norm": 0.06655458360910416, "learning_rate": 8.700211530671947e-05, "loss": 0.2438, "step": 30045 }, { "epoch": 2.434057031756319, "grad_norm": 0.0642879530787468, "learning_rate": 8.699761465412485e-05, "loss": 0.2355, "step": 30046 }, { "epoch": 2.4341380427738173, "grad_norm": 0.05205295979976654, "learning_rate": 8.699311400153023e-05, "loss": 0.225, "step": 30047 }, { "epoch": 2.4342190537913155, "grad_norm": 0.07050012052059174, "learning_rate": 8.69886133489356e-05, "loss": 0.2291, "step": 30048 }, { "epoch": 2.434300064808814, "grad_norm": 0.0668644905090332, "learning_rate": 8.698411269634097e-05, "loss": 0.2689, "step": 30049 }, { "epoch": 2.4343810758263125, "grad_norm": 0.0587209090590477, "learning_rate": 8.697961204374635e-05, "loss": 0.244, "step": 30050 }, { "epoch": 2.4344620868438107, "grad_norm": 0.07448132336139679, "learning_rate": 8.697511139115171e-05, "loss": 0.2395, "step": 30051 }, { "epoch": 2.434543097861309, "grad_norm": 0.0627812072634697, "learning_rate": 8.697061073855709e-05, "loss": 0.2505, "step": 30052 }, { "epoch": 2.4346241088788076, "grad_norm": 0.07397947460412979, "learning_rate": 8.696611008596247e-05, "loss": 0.2638, "step": 30053 }, { "epoch": 2.434705119896306, "grad_norm": 0.08410845696926117, "learning_rate": 8.696160943336784e-05, "loss": 0.3002, "step": 30054 }, { "epoch": 2.434786130913804, "grad_norm": 0.05857682228088379, "learning_rate": 8.695710878077321e-05, "loss": 0.2265, "step": 30055 }, { "epoch": 2.434867141931303, "grad_norm": 0.08480028063058853, "learning_rate": 8.69526081281786e-05, "loss": 0.2682, "step": 30056 }, { "epoch": 2.434948152948801, "grad_norm": 0.0604703351855278, "learning_rate": 8.694810747558396e-05, "loss": 0.2503, "step": 30057 }, { "epoch": 2.4350291639662993, "grad_norm": 0.07384306192398071, "learning_rate": 8.694360682298934e-05, "loss": 0.2213, "step": 30058 }, { "epoch": 2.435110174983798, "grad_norm": 0.06075995787978172, "learning_rate": 8.693910617039472e-05, "loss": 0.2487, "step": 30059 }, { "epoch": 2.4351911860012962, "grad_norm": 0.06557806581258774, "learning_rate": 8.693460551780008e-05, "loss": 0.2346, "step": 30060 }, { "epoch": 2.4352721970187945, "grad_norm": 0.0574188195168972, "learning_rate": 8.693010486520546e-05, "loss": 0.2619, "step": 30061 }, { "epoch": 2.435353208036293, "grad_norm": 0.06319770961999893, "learning_rate": 8.692560421261084e-05, "loss": 0.2364, "step": 30062 }, { "epoch": 2.4354342190537914, "grad_norm": 0.06702440977096558, "learning_rate": 8.69211035600162e-05, "loss": 0.2398, "step": 30063 }, { "epoch": 2.4355152300712897, "grad_norm": 0.06472543627023697, "learning_rate": 8.691660290742158e-05, "loss": 0.2607, "step": 30064 }, { "epoch": 2.435596241088788, "grad_norm": 0.07147222012281418, "learning_rate": 8.691210225482696e-05, "loss": 0.2307, "step": 30065 }, { "epoch": 2.4356772521062866, "grad_norm": 0.0654640719294548, "learning_rate": 8.690760160223232e-05, "loss": 0.2552, "step": 30066 }, { "epoch": 2.435758263123785, "grad_norm": 0.06752464920282364, "learning_rate": 8.69031009496377e-05, "loss": 0.2487, "step": 30067 }, { "epoch": 2.435839274141283, "grad_norm": 0.07302305847406387, "learning_rate": 8.689860029704308e-05, "loss": 0.233, "step": 30068 }, { "epoch": 2.435920285158782, "grad_norm": 0.06815741211175919, "learning_rate": 8.689409964444844e-05, "loss": 0.2269, "step": 30069 }, { "epoch": 2.43600129617628, "grad_norm": 0.06292562186717987, "learning_rate": 8.688959899185383e-05, "loss": 0.225, "step": 30070 }, { "epoch": 2.4360823071937783, "grad_norm": 0.07034356892108917, "learning_rate": 8.68850983392592e-05, "loss": 0.2582, "step": 30071 }, { "epoch": 2.4361633182112765, "grad_norm": 0.06996940821409225, "learning_rate": 8.688059768666456e-05, "loss": 0.2468, "step": 30072 }, { "epoch": 2.436244329228775, "grad_norm": 0.08299688994884491, "learning_rate": 8.687609703406995e-05, "loss": 0.3314, "step": 30073 }, { "epoch": 2.4363253402462735, "grad_norm": 0.06599246710538864, "learning_rate": 8.687159638147532e-05, "loss": 0.2411, "step": 30074 }, { "epoch": 2.4364063512637717, "grad_norm": 0.06469972431659698, "learning_rate": 8.686709572888068e-05, "loss": 0.2836, "step": 30075 }, { "epoch": 2.4364873622812704, "grad_norm": 0.069060780107975, "learning_rate": 8.686259507628607e-05, "loss": 0.2874, "step": 30076 }, { "epoch": 2.4365683732987686, "grad_norm": 0.06383948773145676, "learning_rate": 8.685809442369144e-05, "loss": 0.2221, "step": 30077 }, { "epoch": 2.436649384316267, "grad_norm": 0.08201758563518524, "learning_rate": 8.685359377109682e-05, "loss": 0.2834, "step": 30078 }, { "epoch": 2.4367303953337656, "grad_norm": 0.06945295631885529, "learning_rate": 8.684909311850219e-05, "loss": 0.2383, "step": 30079 }, { "epoch": 2.436811406351264, "grad_norm": 0.06499353796243668, "learning_rate": 8.684459246590756e-05, "loss": 0.2813, "step": 30080 }, { "epoch": 2.436892417368762, "grad_norm": 0.0747925415635109, "learning_rate": 8.684009181331294e-05, "loss": 0.2787, "step": 30081 }, { "epoch": 2.4369734283862607, "grad_norm": 0.07593086361885071, "learning_rate": 8.683559116071831e-05, "loss": 0.2714, "step": 30082 }, { "epoch": 2.437054439403759, "grad_norm": 0.06988240033388138, "learning_rate": 8.683109050812368e-05, "loss": 0.2704, "step": 30083 }, { "epoch": 2.4371354504212572, "grad_norm": 0.06397940218448639, "learning_rate": 8.682658985552906e-05, "loss": 0.2307, "step": 30084 }, { "epoch": 2.4372164614387555, "grad_norm": 0.0741710364818573, "learning_rate": 8.682208920293443e-05, "loss": 0.2703, "step": 30085 }, { "epoch": 2.437297472456254, "grad_norm": 0.05661537125706673, "learning_rate": 8.68175885503398e-05, "loss": 0.2515, "step": 30086 }, { "epoch": 2.4373784834737524, "grad_norm": 0.070607990026474, "learning_rate": 8.681308789774518e-05, "loss": 0.2371, "step": 30087 }, { "epoch": 2.4374594944912507, "grad_norm": 0.07335694134235382, "learning_rate": 8.680858724515055e-05, "loss": 0.2893, "step": 30088 }, { "epoch": 2.4375405055087493, "grad_norm": 0.05634382739663124, "learning_rate": 8.680408659255592e-05, "loss": 0.2443, "step": 30089 }, { "epoch": 2.4376215165262476, "grad_norm": 0.06172090023756027, "learning_rate": 8.67995859399613e-05, "loss": 0.2265, "step": 30090 }, { "epoch": 2.437702527543746, "grad_norm": 0.07646841555833817, "learning_rate": 8.679508528736667e-05, "loss": 0.2593, "step": 30091 }, { "epoch": 2.4377835385612445, "grad_norm": 0.07664379477500916, "learning_rate": 8.679058463477205e-05, "loss": 0.2795, "step": 30092 }, { "epoch": 2.4378645495787428, "grad_norm": 0.10637167096138, "learning_rate": 8.678608398217742e-05, "loss": 0.3214, "step": 30093 }, { "epoch": 2.437945560596241, "grad_norm": 0.0653686448931694, "learning_rate": 8.678158332958279e-05, "loss": 0.2566, "step": 30094 }, { "epoch": 2.4380265716137393, "grad_norm": 0.06110738590359688, "learning_rate": 8.677708267698817e-05, "loss": 0.2642, "step": 30095 }, { "epoch": 2.438107582631238, "grad_norm": 0.06815928965806961, "learning_rate": 8.677258202439354e-05, "loss": 0.2397, "step": 30096 }, { "epoch": 2.438188593648736, "grad_norm": 0.07178793847560883, "learning_rate": 8.676808137179891e-05, "loss": 0.2464, "step": 30097 }, { "epoch": 2.4382696046662344, "grad_norm": 0.06039717420935631, "learning_rate": 8.676358071920429e-05, "loss": 0.2454, "step": 30098 }, { "epoch": 2.438350615683733, "grad_norm": 0.06946100294589996, "learning_rate": 8.675908006660966e-05, "loss": 0.2279, "step": 30099 }, { "epoch": 2.4384316267012314, "grad_norm": 0.04718714952468872, "learning_rate": 8.675457941401503e-05, "loss": 0.2418, "step": 30100 }, { "epoch": 2.4385126377187296, "grad_norm": 0.06509923934936523, "learning_rate": 8.675007876142041e-05, "loss": 0.2436, "step": 30101 }, { "epoch": 2.4385936487362283, "grad_norm": 0.06489907205104828, "learning_rate": 8.674557810882578e-05, "loss": 0.236, "step": 30102 }, { "epoch": 2.4386746597537265, "grad_norm": 0.06927445530891418, "learning_rate": 8.674107745623116e-05, "loss": 0.2661, "step": 30103 }, { "epoch": 2.438755670771225, "grad_norm": 0.07982465624809265, "learning_rate": 8.673657680363653e-05, "loss": 0.2622, "step": 30104 }, { "epoch": 2.4388366817887235, "grad_norm": 0.06970755010843277, "learning_rate": 8.67320761510419e-05, "loss": 0.2519, "step": 30105 }, { "epoch": 2.4389176928062217, "grad_norm": 0.0699467882514, "learning_rate": 8.672757549844728e-05, "loss": 0.2591, "step": 30106 }, { "epoch": 2.43899870382372, "grad_norm": 0.07506277412176132, "learning_rate": 8.672307484585265e-05, "loss": 0.2524, "step": 30107 }, { "epoch": 2.439079714841218, "grad_norm": 0.06829731166362762, "learning_rate": 8.671857419325802e-05, "loss": 0.2967, "step": 30108 }, { "epoch": 2.439160725858717, "grad_norm": 0.05720601603388786, "learning_rate": 8.67140735406634e-05, "loss": 0.1938, "step": 30109 }, { "epoch": 2.439241736876215, "grad_norm": 0.07455245405435562, "learning_rate": 8.670957288806878e-05, "loss": 0.2327, "step": 30110 }, { "epoch": 2.4393227478937134, "grad_norm": 0.07823212444782257, "learning_rate": 8.670507223547414e-05, "loss": 0.256, "step": 30111 }, { "epoch": 2.439403758911212, "grad_norm": 0.06638111919164658, "learning_rate": 8.670057158287952e-05, "loss": 0.1916, "step": 30112 }, { "epoch": 2.4394847699287103, "grad_norm": 0.07046499103307724, "learning_rate": 8.66960709302849e-05, "loss": 0.2316, "step": 30113 }, { "epoch": 2.4395657809462086, "grad_norm": 0.06909290701150894, "learning_rate": 8.669157027769026e-05, "loss": 0.234, "step": 30114 }, { "epoch": 2.439646791963707, "grad_norm": 0.0655369684100151, "learning_rate": 8.668706962509564e-05, "loss": 0.2252, "step": 30115 }, { "epoch": 2.4397278029812055, "grad_norm": 0.06978704035282135, "learning_rate": 8.668256897250103e-05, "loss": 0.2338, "step": 30116 }, { "epoch": 2.4398088139987038, "grad_norm": 0.07861284911632538, "learning_rate": 8.667806831990639e-05, "loss": 0.2589, "step": 30117 }, { "epoch": 2.439889825016202, "grad_norm": 0.07819448411464691, "learning_rate": 8.667356766731176e-05, "loss": 0.2219, "step": 30118 }, { "epoch": 2.4399708360337007, "grad_norm": 0.06307553499937057, "learning_rate": 8.666906701471715e-05, "loss": 0.2715, "step": 30119 }, { "epoch": 2.440051847051199, "grad_norm": 0.07494818419218063, "learning_rate": 8.66645663621225e-05, "loss": 0.2309, "step": 30120 }, { "epoch": 2.440132858068697, "grad_norm": 0.06351149082183838, "learning_rate": 8.666006570952788e-05, "loss": 0.251, "step": 30121 }, { "epoch": 2.440213869086196, "grad_norm": 0.04857697710394859, "learning_rate": 8.665556505693327e-05, "loss": 0.2666, "step": 30122 }, { "epoch": 2.440294880103694, "grad_norm": 0.06089930236339569, "learning_rate": 8.665106440433863e-05, "loss": 0.2118, "step": 30123 }, { "epoch": 2.4403758911211924, "grad_norm": 0.061717960983514786, "learning_rate": 8.6646563751744e-05, "loss": 0.246, "step": 30124 }, { "epoch": 2.440456902138691, "grad_norm": 0.05882474035024643, "learning_rate": 8.664206309914939e-05, "loss": 0.2653, "step": 30125 }, { "epoch": 2.4405379131561893, "grad_norm": 0.06826498359441757, "learning_rate": 8.663756244655475e-05, "loss": 0.2209, "step": 30126 }, { "epoch": 2.4406189241736875, "grad_norm": 0.066226065158844, "learning_rate": 8.663306179396012e-05, "loss": 0.2289, "step": 30127 }, { "epoch": 2.440699935191186, "grad_norm": 0.06775106489658356, "learning_rate": 8.662856114136551e-05, "loss": 0.2569, "step": 30128 }, { "epoch": 2.4407809462086845, "grad_norm": 0.058902762830257416, "learning_rate": 8.662406048877087e-05, "loss": 0.2281, "step": 30129 }, { "epoch": 2.4408619572261827, "grad_norm": 0.0739506185054779, "learning_rate": 8.661955983617624e-05, "loss": 0.2965, "step": 30130 }, { "epoch": 2.440942968243681, "grad_norm": 0.06172848120331764, "learning_rate": 8.661505918358163e-05, "loss": 0.2381, "step": 30131 }, { "epoch": 2.4410239792611796, "grad_norm": 0.07081998139619827, "learning_rate": 8.661055853098699e-05, "loss": 0.2702, "step": 30132 }, { "epoch": 2.441104990278678, "grad_norm": 0.07338794320821762, "learning_rate": 8.660605787839236e-05, "loss": 0.2521, "step": 30133 }, { "epoch": 2.441186001296176, "grad_norm": 0.06638986617326736, "learning_rate": 8.660155722579775e-05, "loss": 0.2489, "step": 30134 }, { "epoch": 2.441267012313675, "grad_norm": 0.07379947602748871, "learning_rate": 8.659705657320311e-05, "loss": 0.2803, "step": 30135 }, { "epoch": 2.441348023331173, "grad_norm": 0.0717678815126419, "learning_rate": 8.659255592060848e-05, "loss": 0.2377, "step": 30136 }, { "epoch": 2.4414290343486713, "grad_norm": 0.07228371500968933, "learning_rate": 8.658805526801387e-05, "loss": 0.2236, "step": 30137 }, { "epoch": 2.4415100453661696, "grad_norm": 0.06211762875318527, "learning_rate": 8.658355461541923e-05, "loss": 0.1953, "step": 30138 }, { "epoch": 2.4415910563836682, "grad_norm": 0.06627662479877472, "learning_rate": 8.657905396282462e-05, "loss": 0.2584, "step": 30139 }, { "epoch": 2.4416720674011665, "grad_norm": 0.06256860494613647, "learning_rate": 8.657455331022999e-05, "loss": 0.2335, "step": 30140 }, { "epoch": 2.4417530784186647, "grad_norm": 0.06448493897914886, "learning_rate": 8.657005265763535e-05, "loss": 0.2528, "step": 30141 }, { "epoch": 2.4418340894361634, "grad_norm": 0.06773277372121811, "learning_rate": 8.656555200504074e-05, "loss": 0.2519, "step": 30142 }, { "epoch": 2.4419151004536617, "grad_norm": 0.06808852404356003, "learning_rate": 8.656105135244611e-05, "loss": 0.2425, "step": 30143 }, { "epoch": 2.44199611147116, "grad_norm": 0.07154618948698044, "learning_rate": 8.655655069985149e-05, "loss": 0.2675, "step": 30144 }, { "epoch": 2.4420771224886586, "grad_norm": 0.05622822791337967, "learning_rate": 8.655205004725686e-05, "loss": 0.2169, "step": 30145 }, { "epoch": 2.442158133506157, "grad_norm": 0.06128344312310219, "learning_rate": 8.654754939466223e-05, "loss": 0.2408, "step": 30146 }, { "epoch": 2.442239144523655, "grad_norm": 0.061993323266506195, "learning_rate": 8.654304874206761e-05, "loss": 0.2565, "step": 30147 }, { "epoch": 2.442320155541154, "grad_norm": 0.06531644612550735, "learning_rate": 8.653854808947298e-05, "loss": 0.2509, "step": 30148 }, { "epoch": 2.442401166558652, "grad_norm": 0.048911985009908676, "learning_rate": 8.653404743687835e-05, "loss": 0.2278, "step": 30149 }, { "epoch": 2.4424821775761503, "grad_norm": 0.07343631982803345, "learning_rate": 8.652954678428373e-05, "loss": 0.2444, "step": 30150 }, { "epoch": 2.442563188593649, "grad_norm": 0.0558130107820034, "learning_rate": 8.65250461316891e-05, "loss": 0.2607, "step": 30151 }, { "epoch": 2.442644199611147, "grad_norm": 0.07380655407905579, "learning_rate": 8.652054547909448e-05, "loss": 0.223, "step": 30152 }, { "epoch": 2.4427252106286454, "grad_norm": 0.08603765815496445, "learning_rate": 8.651604482649985e-05, "loss": 0.2589, "step": 30153 }, { "epoch": 2.4428062216461437, "grad_norm": 0.08148400485515594, "learning_rate": 8.651154417390522e-05, "loss": 0.2296, "step": 30154 }, { "epoch": 2.4428872326636424, "grad_norm": 0.07049152255058289, "learning_rate": 8.65070435213106e-05, "loss": 0.2623, "step": 30155 }, { "epoch": 2.4429682436811406, "grad_norm": 0.06398821622133255, "learning_rate": 8.650254286871597e-05, "loss": 0.2785, "step": 30156 }, { "epoch": 2.443049254698639, "grad_norm": 0.05324990302324295, "learning_rate": 8.649804221612134e-05, "loss": 0.2106, "step": 30157 }, { "epoch": 2.4431302657161376, "grad_norm": 0.06434676051139832, "learning_rate": 8.649354156352672e-05, "loss": 0.235, "step": 30158 }, { "epoch": 2.443211276733636, "grad_norm": 0.06905324012041092, "learning_rate": 8.648904091093209e-05, "loss": 0.3151, "step": 30159 }, { "epoch": 2.443292287751134, "grad_norm": 0.06296160072088242, "learning_rate": 8.648454025833746e-05, "loss": 0.2356, "step": 30160 }, { "epoch": 2.4433732987686323, "grad_norm": 0.07445219904184341, "learning_rate": 8.648003960574284e-05, "loss": 0.2383, "step": 30161 }, { "epoch": 2.443454309786131, "grad_norm": 0.06814183294773102, "learning_rate": 8.647553895314821e-05, "loss": 0.249, "step": 30162 }, { "epoch": 2.4435353208036292, "grad_norm": 0.07182428240776062, "learning_rate": 8.647103830055358e-05, "loss": 0.2982, "step": 30163 }, { "epoch": 2.4436163318211275, "grad_norm": 0.05431900545954704, "learning_rate": 8.646653764795896e-05, "loss": 0.2317, "step": 30164 }, { "epoch": 2.443697342838626, "grad_norm": 0.06252321600914001, "learning_rate": 8.646203699536433e-05, "loss": 0.2462, "step": 30165 }, { "epoch": 2.4437783538561244, "grad_norm": 0.05793005973100662, "learning_rate": 8.64575363427697e-05, "loss": 0.2459, "step": 30166 }, { "epoch": 2.4438593648736227, "grad_norm": 0.06284428387880325, "learning_rate": 8.645303569017508e-05, "loss": 0.2341, "step": 30167 }, { "epoch": 2.4439403758911213, "grad_norm": 0.06669968366622925, "learning_rate": 8.644853503758045e-05, "loss": 0.2574, "step": 30168 }, { "epoch": 2.4440213869086196, "grad_norm": 0.06979219615459442, "learning_rate": 8.644403438498583e-05, "loss": 0.2617, "step": 30169 }, { "epoch": 2.444102397926118, "grad_norm": 0.07064683735370636, "learning_rate": 8.64395337323912e-05, "loss": 0.2584, "step": 30170 }, { "epoch": 2.4441834089436165, "grad_norm": 0.05474438518285751, "learning_rate": 8.643503307979657e-05, "loss": 0.2299, "step": 30171 }, { "epoch": 2.4442644199611148, "grad_norm": 0.08040313422679901, "learning_rate": 8.643053242720195e-05, "loss": 0.2606, "step": 30172 }, { "epoch": 2.444345430978613, "grad_norm": 0.062317855656147, "learning_rate": 8.642603177460732e-05, "loss": 0.2458, "step": 30173 }, { "epoch": 2.4444264419961117, "grad_norm": 0.060882315039634705, "learning_rate": 8.64215311220127e-05, "loss": 0.2343, "step": 30174 }, { "epoch": 2.44450745301361, "grad_norm": 0.05670086294412613, "learning_rate": 8.641703046941807e-05, "loss": 0.2096, "step": 30175 }, { "epoch": 2.444588464031108, "grad_norm": 0.06691998243331909, "learning_rate": 8.641252981682344e-05, "loss": 0.2181, "step": 30176 }, { "epoch": 2.4446694750486064, "grad_norm": 0.07000657916069031, "learning_rate": 8.640802916422882e-05, "loss": 0.2579, "step": 30177 }, { "epoch": 2.444750486066105, "grad_norm": 0.06200079247355461, "learning_rate": 8.640352851163419e-05, "loss": 0.2235, "step": 30178 }, { "epoch": 2.4448314970836034, "grad_norm": 0.08357629179954529, "learning_rate": 8.639902785903956e-05, "loss": 0.2811, "step": 30179 }, { "epoch": 2.4449125081011016, "grad_norm": 0.07436035573482513, "learning_rate": 8.639452720644494e-05, "loss": 0.262, "step": 30180 }, { "epoch": 2.4449935191186003, "grad_norm": 0.05238322168588638, "learning_rate": 8.639002655385031e-05, "loss": 0.2377, "step": 30181 }, { "epoch": 2.4450745301360985, "grad_norm": 0.06278589367866516, "learning_rate": 8.638552590125568e-05, "loss": 0.2956, "step": 30182 }, { "epoch": 2.445155541153597, "grad_norm": 0.0722695142030716, "learning_rate": 8.638102524866106e-05, "loss": 0.2302, "step": 30183 }, { "epoch": 2.445236552171095, "grad_norm": 0.05430447682738304, "learning_rate": 8.637652459606643e-05, "loss": 0.2463, "step": 30184 }, { "epoch": 2.4453175631885937, "grad_norm": 0.062322165817022324, "learning_rate": 8.63720239434718e-05, "loss": 0.2584, "step": 30185 }, { "epoch": 2.445398574206092, "grad_norm": 0.0586535707116127, "learning_rate": 8.636752329087718e-05, "loss": 0.2408, "step": 30186 }, { "epoch": 2.44547958522359, "grad_norm": 0.05980996787548065, "learning_rate": 8.636302263828255e-05, "loss": 0.244, "step": 30187 }, { "epoch": 2.445560596241089, "grad_norm": 0.06888397783041, "learning_rate": 8.635852198568792e-05, "loss": 0.2519, "step": 30188 }, { "epoch": 2.445641607258587, "grad_norm": 0.0636942908167839, "learning_rate": 8.63540213330933e-05, "loss": 0.2485, "step": 30189 }, { "epoch": 2.4457226182760854, "grad_norm": 0.07013783603906631, "learning_rate": 8.634952068049867e-05, "loss": 0.2381, "step": 30190 }, { "epoch": 2.445803629293584, "grad_norm": 0.06727758795022964, "learning_rate": 8.634502002790406e-05, "loss": 0.2452, "step": 30191 }, { "epoch": 2.4458846403110823, "grad_norm": 0.07199087738990784, "learning_rate": 8.634051937530942e-05, "loss": 0.2598, "step": 30192 }, { "epoch": 2.4459656513285806, "grad_norm": 0.06623843312263489, "learning_rate": 8.633601872271479e-05, "loss": 0.237, "step": 30193 }, { "epoch": 2.4460466623460793, "grad_norm": 0.05432756245136261, "learning_rate": 8.633151807012018e-05, "loss": 0.2641, "step": 30194 }, { "epoch": 2.4461276733635775, "grad_norm": 0.083702951669693, "learning_rate": 8.632701741752554e-05, "loss": 0.219, "step": 30195 }, { "epoch": 2.4462086843810757, "grad_norm": 0.06516814231872559, "learning_rate": 8.632251676493091e-05, "loss": 0.219, "step": 30196 }, { "epoch": 2.4462896953985744, "grad_norm": 0.0688856914639473, "learning_rate": 8.63180161123363e-05, "loss": 0.2319, "step": 30197 }, { "epoch": 2.4463707064160727, "grad_norm": 0.05996042490005493, "learning_rate": 8.631351545974166e-05, "loss": 0.2344, "step": 30198 }, { "epoch": 2.446451717433571, "grad_norm": 0.05216207727789879, "learning_rate": 8.630901480714703e-05, "loss": 0.211, "step": 30199 }, { "epoch": 2.446532728451069, "grad_norm": 0.0762057974934578, "learning_rate": 8.630451415455242e-05, "loss": 0.2654, "step": 30200 }, { "epoch": 2.446613739468568, "grad_norm": 0.07425080984830856, "learning_rate": 8.630001350195778e-05, "loss": 0.2564, "step": 30201 }, { "epoch": 2.446694750486066, "grad_norm": 0.0731613039970398, "learning_rate": 8.629551284936316e-05, "loss": 0.263, "step": 30202 }, { "epoch": 2.4467757615035644, "grad_norm": 0.06292743235826492, "learning_rate": 8.629101219676854e-05, "loss": 0.2536, "step": 30203 }, { "epoch": 2.446856772521063, "grad_norm": 0.05863060802221298, "learning_rate": 8.62865115441739e-05, "loss": 0.2441, "step": 30204 }, { "epoch": 2.4469377835385613, "grad_norm": 0.07905661314725876, "learning_rate": 8.628201089157928e-05, "loss": 0.2976, "step": 30205 }, { "epoch": 2.4470187945560595, "grad_norm": 0.07654749602079391, "learning_rate": 8.627751023898466e-05, "loss": 0.2482, "step": 30206 }, { "epoch": 2.4470998055735578, "grad_norm": 0.06803172081708908, "learning_rate": 8.627300958639002e-05, "loss": 0.259, "step": 30207 }, { "epoch": 2.4471808165910565, "grad_norm": 0.0701146125793457, "learning_rate": 8.62685089337954e-05, "loss": 0.281, "step": 30208 }, { "epoch": 2.4472618276085547, "grad_norm": 0.06699921190738678, "learning_rate": 8.626400828120078e-05, "loss": 0.2412, "step": 30209 }, { "epoch": 2.447342838626053, "grad_norm": 0.05488741770386696, "learning_rate": 8.625950762860614e-05, "loss": 0.2227, "step": 30210 }, { "epoch": 2.4474238496435516, "grad_norm": 0.06483423709869385, "learning_rate": 8.625500697601152e-05, "loss": 0.2543, "step": 30211 }, { "epoch": 2.44750486066105, "grad_norm": 0.07855366915464401, "learning_rate": 8.62505063234169e-05, "loss": 0.2256, "step": 30212 }, { "epoch": 2.447585871678548, "grad_norm": 0.06719925999641418, "learning_rate": 8.624600567082228e-05, "loss": 0.2345, "step": 30213 }, { "epoch": 2.447666882696047, "grad_norm": 0.07370990514755249, "learning_rate": 8.624150501822764e-05, "loss": 0.2294, "step": 30214 }, { "epoch": 2.447747893713545, "grad_norm": 0.06388029456138611, "learning_rate": 8.623700436563303e-05, "loss": 0.2191, "step": 30215 }, { "epoch": 2.4478289047310433, "grad_norm": 0.05369741469621658, "learning_rate": 8.62325037130384e-05, "loss": 0.1817, "step": 30216 }, { "epoch": 2.447909915748542, "grad_norm": 0.07407825440168381, "learning_rate": 8.622800306044377e-05, "loss": 0.2759, "step": 30217 }, { "epoch": 2.4479909267660402, "grad_norm": 0.07580500096082687, "learning_rate": 8.622350240784915e-05, "loss": 0.2373, "step": 30218 }, { "epoch": 2.4480719377835385, "grad_norm": 0.05918845906853676, "learning_rate": 8.621900175525452e-05, "loss": 0.253, "step": 30219 }, { "epoch": 2.448152948801037, "grad_norm": 0.05765198916196823, "learning_rate": 8.62145011026599e-05, "loss": 0.2461, "step": 30220 }, { "epoch": 2.4482339598185354, "grad_norm": 0.058827124536037445, "learning_rate": 8.621000045006527e-05, "loss": 0.2439, "step": 30221 }, { "epoch": 2.4483149708360337, "grad_norm": 0.06166047975420952, "learning_rate": 8.620549979747064e-05, "loss": 0.2563, "step": 30222 }, { "epoch": 2.448395981853532, "grad_norm": 0.05963588505983353, "learning_rate": 8.620099914487601e-05, "loss": 0.2635, "step": 30223 }, { "epoch": 2.4484769928710306, "grad_norm": 0.0705462247133255, "learning_rate": 8.619649849228139e-05, "loss": 0.2458, "step": 30224 }, { "epoch": 2.448558003888529, "grad_norm": 0.06184877082705498, "learning_rate": 8.619199783968676e-05, "loss": 0.27, "step": 30225 }, { "epoch": 2.448639014906027, "grad_norm": 0.07148279994726181, "learning_rate": 8.618749718709214e-05, "loss": 0.2715, "step": 30226 }, { "epoch": 2.448720025923526, "grad_norm": 0.08536798506975174, "learning_rate": 8.618299653449751e-05, "loss": 0.2776, "step": 30227 }, { "epoch": 2.448801036941024, "grad_norm": 0.06759648770093918, "learning_rate": 8.617849588190288e-05, "loss": 0.2646, "step": 30228 }, { "epoch": 2.4488820479585223, "grad_norm": 0.06979471445083618, "learning_rate": 8.617399522930826e-05, "loss": 0.2466, "step": 30229 }, { "epoch": 2.4489630589760205, "grad_norm": 0.06026214361190796, "learning_rate": 8.616949457671363e-05, "loss": 0.2331, "step": 30230 }, { "epoch": 2.449044069993519, "grad_norm": 0.05924506485462189, "learning_rate": 8.6164993924119e-05, "loss": 0.2982, "step": 30231 }, { "epoch": 2.4491250810110174, "grad_norm": 0.06539326161146164, "learning_rate": 8.616049327152438e-05, "loss": 0.2333, "step": 30232 }, { "epoch": 2.4492060920285157, "grad_norm": 0.07669103890657425, "learning_rate": 8.615599261892975e-05, "loss": 0.2756, "step": 30233 }, { "epoch": 2.4492871030460144, "grad_norm": 0.06633754819631577, "learning_rate": 8.615149196633512e-05, "loss": 0.2472, "step": 30234 }, { "epoch": 2.4493681140635126, "grad_norm": 0.06898613274097443, "learning_rate": 8.61469913137405e-05, "loss": 0.2201, "step": 30235 }, { "epoch": 2.449449125081011, "grad_norm": 0.06048338860273361, "learning_rate": 8.614249066114587e-05, "loss": 0.2409, "step": 30236 }, { "epoch": 2.4495301360985096, "grad_norm": 0.05309195816516876, "learning_rate": 8.613799000855125e-05, "loss": 0.235, "step": 30237 }, { "epoch": 2.449611147116008, "grad_norm": 0.0583578422665596, "learning_rate": 8.613348935595662e-05, "loss": 0.2428, "step": 30238 }, { "epoch": 2.449692158133506, "grad_norm": 0.056156620383262634, "learning_rate": 8.612898870336199e-05, "loss": 0.253, "step": 30239 }, { "epoch": 2.4497731691510047, "grad_norm": 0.06680503487586975, "learning_rate": 8.612448805076737e-05, "loss": 0.2071, "step": 30240 }, { "epoch": 2.449854180168503, "grad_norm": 0.07724934816360474, "learning_rate": 8.611998739817274e-05, "loss": 0.2515, "step": 30241 }, { "epoch": 2.4499351911860012, "grad_norm": 0.0604155957698822, "learning_rate": 8.611548674557811e-05, "loss": 0.218, "step": 30242 }, { "epoch": 2.4500162022035, "grad_norm": 0.06490961462259293, "learning_rate": 8.611098609298349e-05, "loss": 0.2586, "step": 30243 }, { "epoch": 2.450097213220998, "grad_norm": 0.05839584767818451, "learning_rate": 8.610648544038886e-05, "loss": 0.2598, "step": 30244 }, { "epoch": 2.4501782242384964, "grad_norm": 0.08250536024570465, "learning_rate": 8.610198478779423e-05, "loss": 0.2916, "step": 30245 }, { "epoch": 2.4502592352559946, "grad_norm": 0.06101342290639877, "learning_rate": 8.609748413519961e-05, "loss": 0.2361, "step": 30246 }, { "epoch": 2.4503402462734933, "grad_norm": 0.06912539154291153, "learning_rate": 8.609298348260498e-05, "loss": 0.251, "step": 30247 }, { "epoch": 2.4504212572909916, "grad_norm": 0.0605338029563427, "learning_rate": 8.608848283001035e-05, "loss": 0.2406, "step": 30248 }, { "epoch": 2.45050226830849, "grad_norm": 0.06752576678991318, "learning_rate": 8.608398217741573e-05, "loss": 0.2248, "step": 30249 }, { "epoch": 2.4505832793259885, "grad_norm": 0.07446485757827759, "learning_rate": 8.60794815248211e-05, "loss": 0.2459, "step": 30250 }, { "epoch": 2.4506642903434868, "grad_norm": 0.07084500789642334, "learning_rate": 8.607498087222648e-05, "loss": 0.2724, "step": 30251 }, { "epoch": 2.450745301360985, "grad_norm": 0.05843502655625343, "learning_rate": 8.607048021963185e-05, "loss": 0.231, "step": 30252 }, { "epoch": 2.4508263123784833, "grad_norm": 0.07239796966314316, "learning_rate": 8.606597956703722e-05, "loss": 0.2943, "step": 30253 }, { "epoch": 2.450907323395982, "grad_norm": 0.05716480687260628, "learning_rate": 8.60614789144426e-05, "loss": 0.2787, "step": 30254 }, { "epoch": 2.45098833441348, "grad_norm": 0.07082024216651917, "learning_rate": 8.605697826184797e-05, "loss": 0.2477, "step": 30255 }, { "epoch": 2.4510693454309784, "grad_norm": 0.06596090644598007, "learning_rate": 8.605247760925334e-05, "loss": 0.2573, "step": 30256 }, { "epoch": 2.451150356448477, "grad_norm": 0.06249532848596573, "learning_rate": 8.604797695665872e-05, "loss": 0.2237, "step": 30257 }, { "epoch": 2.4512313674659754, "grad_norm": 0.05652106925845146, "learning_rate": 8.604347630406409e-05, "loss": 0.2822, "step": 30258 }, { "epoch": 2.4513123784834736, "grad_norm": 0.07296479493379593, "learning_rate": 8.603897565146946e-05, "loss": 0.2955, "step": 30259 }, { "epoch": 2.4513933895009723, "grad_norm": 0.07375498861074448, "learning_rate": 8.603447499887484e-05, "loss": 0.1986, "step": 30260 }, { "epoch": 2.4514744005184705, "grad_norm": 0.07448673248291016, "learning_rate": 8.602997434628021e-05, "loss": 0.2583, "step": 30261 }, { "epoch": 2.451555411535969, "grad_norm": 0.061081256717443466, "learning_rate": 8.602547369368559e-05, "loss": 0.2175, "step": 30262 }, { "epoch": 2.4516364225534675, "grad_norm": 0.08052729070186615, "learning_rate": 8.602097304109096e-05, "loss": 0.2595, "step": 30263 }, { "epoch": 2.4517174335709657, "grad_norm": 0.058177463710308075, "learning_rate": 8.601647238849633e-05, "loss": 0.2324, "step": 30264 }, { "epoch": 2.451798444588464, "grad_norm": 0.06863915175199509, "learning_rate": 8.60119717359017e-05, "loss": 0.2182, "step": 30265 }, { "epoch": 2.4518794556059627, "grad_norm": 0.0763634443283081, "learning_rate": 8.600747108330708e-05, "loss": 0.2431, "step": 30266 }, { "epoch": 2.451960466623461, "grad_norm": 0.06735827773809433, "learning_rate": 8.600297043071245e-05, "loss": 0.2313, "step": 30267 }, { "epoch": 2.452041477640959, "grad_norm": 0.06448716670274734, "learning_rate": 8.599846977811783e-05, "loss": 0.2683, "step": 30268 }, { "epoch": 2.4521224886584574, "grad_norm": 0.07303851842880249, "learning_rate": 8.599396912552321e-05, "loss": 0.2663, "step": 30269 }, { "epoch": 2.452203499675956, "grad_norm": 0.07002940028905869, "learning_rate": 8.598946847292857e-05, "loss": 0.2348, "step": 30270 }, { "epoch": 2.4522845106934543, "grad_norm": 0.05738309770822525, "learning_rate": 8.598496782033395e-05, "loss": 0.2549, "step": 30271 }, { "epoch": 2.4523655217109526, "grad_norm": 0.06919324398040771, "learning_rate": 8.598046716773933e-05, "loss": 0.2567, "step": 30272 }, { "epoch": 2.4524465327284513, "grad_norm": 0.06908387690782547, "learning_rate": 8.59759665151447e-05, "loss": 0.3042, "step": 30273 }, { "epoch": 2.4525275437459495, "grad_norm": 0.08584818243980408, "learning_rate": 8.597146586255007e-05, "loss": 0.26, "step": 30274 }, { "epoch": 2.4526085547634477, "grad_norm": 0.05924270302057266, "learning_rate": 8.596696520995546e-05, "loss": 0.2441, "step": 30275 }, { "epoch": 2.452689565780946, "grad_norm": 0.05678810179233551, "learning_rate": 8.596246455736082e-05, "loss": 0.2191, "step": 30276 }, { "epoch": 2.4527705767984447, "grad_norm": 0.07264811545610428, "learning_rate": 8.595796390476619e-05, "loss": 0.2639, "step": 30277 }, { "epoch": 2.452851587815943, "grad_norm": 0.07443877309560776, "learning_rate": 8.595346325217158e-05, "loss": 0.2369, "step": 30278 }, { "epoch": 2.452932598833441, "grad_norm": 0.06062542647123337, "learning_rate": 8.594896259957694e-05, "loss": 0.2448, "step": 30279 }, { "epoch": 2.45301360985094, "grad_norm": 0.06636027246713638, "learning_rate": 8.594446194698231e-05, "loss": 0.2481, "step": 30280 }, { "epoch": 2.453094620868438, "grad_norm": 0.06837310642004013, "learning_rate": 8.59399612943877e-05, "loss": 0.2091, "step": 30281 }, { "epoch": 2.4531756318859363, "grad_norm": 0.061930716037750244, "learning_rate": 8.593546064179307e-05, "loss": 0.233, "step": 30282 }, { "epoch": 2.453256642903435, "grad_norm": 0.06425345689058304, "learning_rate": 8.593095998919843e-05, "loss": 0.2712, "step": 30283 }, { "epoch": 2.4533376539209333, "grad_norm": 0.05221385881304741, "learning_rate": 8.592645933660382e-05, "loss": 0.1921, "step": 30284 }, { "epoch": 2.4534186649384315, "grad_norm": 0.06649923324584961, "learning_rate": 8.592195868400919e-05, "loss": 0.2396, "step": 30285 }, { "epoch": 2.45349967595593, "grad_norm": 0.05886203050613403, "learning_rate": 8.591745803141455e-05, "loss": 0.2474, "step": 30286 }, { "epoch": 2.4535806869734285, "grad_norm": 0.08170748502016068, "learning_rate": 8.591295737881994e-05, "loss": 0.1982, "step": 30287 }, { "epoch": 2.4536616979909267, "grad_norm": 0.06259898841381073, "learning_rate": 8.590845672622531e-05, "loss": 0.2768, "step": 30288 }, { "epoch": 2.4537427090084254, "grad_norm": 0.05977996066212654, "learning_rate": 8.590395607363067e-05, "loss": 0.263, "step": 30289 }, { "epoch": 2.4538237200259236, "grad_norm": 0.07319679111242294, "learning_rate": 8.589945542103606e-05, "loss": 0.2685, "step": 30290 }, { "epoch": 2.453904731043422, "grad_norm": 0.07402021437883377, "learning_rate": 8.589495476844143e-05, "loss": 0.2638, "step": 30291 }, { "epoch": 2.45398574206092, "grad_norm": 0.0682920292019844, "learning_rate": 8.58904541158468e-05, "loss": 0.2361, "step": 30292 }, { "epoch": 2.454066753078419, "grad_norm": 0.08681609481573105, "learning_rate": 8.588595346325218e-05, "loss": 0.2554, "step": 30293 }, { "epoch": 2.454147764095917, "grad_norm": 0.07418433576822281, "learning_rate": 8.588145281065755e-05, "loss": 0.2437, "step": 30294 }, { "epoch": 2.4542287751134153, "grad_norm": 0.08345085382461548, "learning_rate": 8.587695215806291e-05, "loss": 0.2521, "step": 30295 }, { "epoch": 2.454309786130914, "grad_norm": 0.06327337771654129, "learning_rate": 8.58724515054683e-05, "loss": 0.222, "step": 30296 }, { "epoch": 2.4543907971484122, "grad_norm": 0.060608334839344025, "learning_rate": 8.586795085287367e-05, "loss": 0.222, "step": 30297 }, { "epoch": 2.4544718081659105, "grad_norm": 0.0566270612180233, "learning_rate": 8.586345020027905e-05, "loss": 0.2147, "step": 30298 }, { "epoch": 2.4545528191834087, "grad_norm": 0.06820667535066605, "learning_rate": 8.585894954768442e-05, "loss": 0.2288, "step": 30299 }, { "epoch": 2.4546338302009074, "grad_norm": 0.06401017308235168, "learning_rate": 8.58544488950898e-05, "loss": 0.2591, "step": 30300 }, { "epoch": 2.4547148412184057, "grad_norm": 0.06722749769687653, "learning_rate": 8.584994824249517e-05, "loss": 0.2487, "step": 30301 }, { "epoch": 2.454795852235904, "grad_norm": 0.07767395675182343, "learning_rate": 8.584544758990054e-05, "loss": 0.2766, "step": 30302 }, { "epoch": 2.4548768632534026, "grad_norm": 0.06633684784173965, "learning_rate": 8.584094693730592e-05, "loss": 0.2748, "step": 30303 }, { "epoch": 2.454957874270901, "grad_norm": 0.05974660813808441, "learning_rate": 8.583644628471129e-05, "loss": 0.2275, "step": 30304 }, { "epoch": 2.455038885288399, "grad_norm": 0.07270497828722, "learning_rate": 8.583194563211666e-05, "loss": 0.2186, "step": 30305 }, { "epoch": 2.4551198963058978, "grad_norm": 0.09797348827123642, "learning_rate": 8.582744497952204e-05, "loss": 0.2735, "step": 30306 }, { "epoch": 2.455200907323396, "grad_norm": 0.07703609019517899, "learning_rate": 8.582294432692741e-05, "loss": 0.2192, "step": 30307 }, { "epoch": 2.4552819183408943, "grad_norm": 0.06129564717411995, "learning_rate": 8.581844367433278e-05, "loss": 0.2369, "step": 30308 }, { "epoch": 2.455362929358393, "grad_norm": 0.06443587690591812, "learning_rate": 8.581394302173816e-05, "loss": 0.2501, "step": 30309 }, { "epoch": 2.455443940375891, "grad_norm": 0.07890526205301285, "learning_rate": 8.580944236914353e-05, "loss": 0.2453, "step": 30310 }, { "epoch": 2.4555249513933894, "grad_norm": 0.06493376940488815, "learning_rate": 8.58049417165489e-05, "loss": 0.2391, "step": 30311 }, { "epoch": 2.4556059624108877, "grad_norm": 0.06649234890937805, "learning_rate": 8.580044106395428e-05, "loss": 0.2606, "step": 30312 }, { "epoch": 2.4556869734283864, "grad_norm": 0.05301273614168167, "learning_rate": 8.579594041135965e-05, "loss": 0.2416, "step": 30313 }, { "epoch": 2.4557679844458846, "grad_norm": 0.07830186933279037, "learning_rate": 8.579143975876503e-05, "loss": 0.2389, "step": 30314 }, { "epoch": 2.455848995463383, "grad_norm": 0.07225922495126724, "learning_rate": 8.57869391061704e-05, "loss": 0.2436, "step": 30315 }, { "epoch": 2.4559300064808816, "grad_norm": 0.062381599098443985, "learning_rate": 8.578243845357577e-05, "loss": 0.2313, "step": 30316 }, { "epoch": 2.45601101749838, "grad_norm": 0.06470402330160141, "learning_rate": 8.577793780098115e-05, "loss": 0.2578, "step": 30317 }, { "epoch": 2.456092028515878, "grad_norm": 0.06753360480070114, "learning_rate": 8.577343714838652e-05, "loss": 0.2984, "step": 30318 }, { "epoch": 2.4561730395333763, "grad_norm": 0.051338646560907364, "learning_rate": 8.57689364957919e-05, "loss": 0.2128, "step": 30319 }, { "epoch": 2.456254050550875, "grad_norm": 0.0592113696038723, "learning_rate": 8.576443584319727e-05, "loss": 0.236, "step": 30320 }, { "epoch": 2.4563350615683732, "grad_norm": 0.05667173117399216, "learning_rate": 8.575993519060264e-05, "loss": 0.2514, "step": 30321 }, { "epoch": 2.4564160725858715, "grad_norm": 0.07165750861167908, "learning_rate": 8.575543453800801e-05, "loss": 0.2425, "step": 30322 }, { "epoch": 2.45649708360337, "grad_norm": 0.06985411047935486, "learning_rate": 8.575093388541339e-05, "loss": 0.29, "step": 30323 }, { "epoch": 2.4565780946208684, "grad_norm": 0.06525350362062454, "learning_rate": 8.574643323281876e-05, "loss": 0.2371, "step": 30324 }, { "epoch": 2.4566591056383666, "grad_norm": 0.0659409761428833, "learning_rate": 8.574193258022414e-05, "loss": 0.2527, "step": 30325 }, { "epoch": 2.4567401166558653, "grad_norm": 0.055850714445114136, "learning_rate": 8.573743192762951e-05, "loss": 0.2211, "step": 30326 }, { "epoch": 2.4568211276733636, "grad_norm": 0.05591071397066116, "learning_rate": 8.573293127503488e-05, "loss": 0.2212, "step": 30327 }, { "epoch": 2.456902138690862, "grad_norm": 0.06260688602924347, "learning_rate": 8.572843062244026e-05, "loss": 0.2319, "step": 30328 }, { "epoch": 2.4569831497083605, "grad_norm": 0.07620882242918015, "learning_rate": 8.572392996984563e-05, "loss": 0.3032, "step": 30329 }, { "epoch": 2.4570641607258588, "grad_norm": 0.0697203055024147, "learning_rate": 8.5719429317251e-05, "loss": 0.2622, "step": 30330 }, { "epoch": 2.457145171743357, "grad_norm": 0.05793391913175583, "learning_rate": 8.571492866465638e-05, "loss": 0.2152, "step": 30331 }, { "epoch": 2.4572261827608557, "grad_norm": 0.07787619531154633, "learning_rate": 8.571042801206175e-05, "loss": 0.2662, "step": 30332 }, { "epoch": 2.457307193778354, "grad_norm": 0.0645591989159584, "learning_rate": 8.570592735946712e-05, "loss": 0.232, "step": 30333 }, { "epoch": 2.457388204795852, "grad_norm": 0.07003186643123627, "learning_rate": 8.57014267068725e-05, "loss": 0.2611, "step": 30334 }, { "epoch": 2.4574692158133504, "grad_norm": 0.0585886612534523, "learning_rate": 8.569692605427787e-05, "loss": 0.2741, "step": 30335 }, { "epoch": 2.457550226830849, "grad_norm": 0.059686798602342606, "learning_rate": 8.569242540168325e-05, "loss": 0.1979, "step": 30336 }, { "epoch": 2.4576312378483474, "grad_norm": 0.07068637758493423, "learning_rate": 8.568792474908862e-05, "loss": 0.2559, "step": 30337 }, { "epoch": 2.4577122488658456, "grad_norm": 0.07276032119989395, "learning_rate": 8.568342409649399e-05, "loss": 0.2432, "step": 30338 }, { "epoch": 2.4577932598833443, "grad_norm": 0.06920722872018814, "learning_rate": 8.567892344389937e-05, "loss": 0.2193, "step": 30339 }, { "epoch": 2.4578742709008425, "grad_norm": 0.08199632167816162, "learning_rate": 8.567442279130474e-05, "loss": 0.275, "step": 30340 }, { "epoch": 2.457955281918341, "grad_norm": 0.06501300632953644, "learning_rate": 8.566992213871011e-05, "loss": 0.2413, "step": 30341 }, { "epoch": 2.458036292935839, "grad_norm": 0.07494282722473145, "learning_rate": 8.566542148611549e-05, "loss": 0.277, "step": 30342 }, { "epoch": 2.4581173039533377, "grad_norm": 0.07412391155958176, "learning_rate": 8.566092083352086e-05, "loss": 0.253, "step": 30343 }, { "epoch": 2.458198314970836, "grad_norm": 0.07492408156394958, "learning_rate": 8.565642018092623e-05, "loss": 0.2307, "step": 30344 }, { "epoch": 2.458279325988334, "grad_norm": 0.05356239527463913, "learning_rate": 8.565191952833161e-05, "loss": 0.2513, "step": 30345 }, { "epoch": 2.458360337005833, "grad_norm": 0.06325045973062515, "learning_rate": 8.564741887573698e-05, "loss": 0.2453, "step": 30346 }, { "epoch": 2.458441348023331, "grad_norm": 0.06481393426656723, "learning_rate": 8.564291822314236e-05, "loss": 0.2395, "step": 30347 }, { "epoch": 2.4585223590408294, "grad_norm": 0.06986995786428452, "learning_rate": 8.563841757054773e-05, "loss": 0.268, "step": 30348 }, { "epoch": 2.458603370058328, "grad_norm": 0.07024464011192322, "learning_rate": 8.56339169179531e-05, "loss": 0.2651, "step": 30349 }, { "epoch": 2.4586843810758263, "grad_norm": 0.0805504098534584, "learning_rate": 8.562941626535849e-05, "loss": 0.2235, "step": 30350 }, { "epoch": 2.4587653920933246, "grad_norm": 0.07116348296403885, "learning_rate": 8.562491561276386e-05, "loss": 0.2771, "step": 30351 }, { "epoch": 2.4588464031108233, "grad_norm": 0.06739845871925354, "learning_rate": 8.562041496016922e-05, "loss": 0.2338, "step": 30352 }, { "epoch": 2.4589274141283215, "grad_norm": 0.08376084268093109, "learning_rate": 8.561591430757461e-05, "loss": 0.2715, "step": 30353 }, { "epoch": 2.4590084251458197, "grad_norm": 0.06276308000087738, "learning_rate": 8.561141365497998e-05, "loss": 0.2489, "step": 30354 }, { "epoch": 2.4590894361633184, "grad_norm": 0.07075909525156021, "learning_rate": 8.560691300238534e-05, "loss": 0.2909, "step": 30355 }, { "epoch": 2.4591704471808167, "grad_norm": 0.061290279030799866, "learning_rate": 8.560241234979073e-05, "loss": 0.2094, "step": 30356 }, { "epoch": 2.459251458198315, "grad_norm": 0.05792885273694992, "learning_rate": 8.55979116971961e-05, "loss": 0.2233, "step": 30357 }, { "epoch": 2.459332469215813, "grad_norm": 0.07260933518409729, "learning_rate": 8.559341104460146e-05, "loss": 0.2944, "step": 30358 }, { "epoch": 2.459413480233312, "grad_norm": 0.06975215673446655, "learning_rate": 8.558891039200685e-05, "loss": 0.2188, "step": 30359 }, { "epoch": 2.45949449125081, "grad_norm": 0.07033288478851318, "learning_rate": 8.558440973941223e-05, "loss": 0.2467, "step": 30360 }, { "epoch": 2.4595755022683083, "grad_norm": 0.07207024097442627, "learning_rate": 8.557990908681759e-05, "loss": 0.2752, "step": 30361 }, { "epoch": 2.459656513285807, "grad_norm": 0.0666518434882164, "learning_rate": 8.557540843422297e-05, "loss": 0.2163, "step": 30362 }, { "epoch": 2.4597375243033053, "grad_norm": 0.0729021430015564, "learning_rate": 8.557090778162835e-05, "loss": 0.2594, "step": 30363 }, { "epoch": 2.4598185353208035, "grad_norm": 0.0761818215250969, "learning_rate": 8.55664071290337e-05, "loss": 0.2738, "step": 30364 }, { "epoch": 2.4598995463383018, "grad_norm": 0.07403729110956192, "learning_rate": 8.55619064764391e-05, "loss": 0.2168, "step": 30365 }, { "epoch": 2.4599805573558005, "grad_norm": 0.06657052785158157, "learning_rate": 8.555740582384447e-05, "loss": 0.2402, "step": 30366 }, { "epoch": 2.4600615683732987, "grad_norm": 0.07630985975265503, "learning_rate": 8.555290517124983e-05, "loss": 0.2508, "step": 30367 }, { "epoch": 2.460142579390797, "grad_norm": 0.06758099794387817, "learning_rate": 8.554840451865521e-05, "loss": 0.2263, "step": 30368 }, { "epoch": 2.4602235904082956, "grad_norm": 0.06532974541187286, "learning_rate": 8.554390386606059e-05, "loss": 0.2811, "step": 30369 }, { "epoch": 2.460304601425794, "grad_norm": 0.06683867424726486, "learning_rate": 8.553940321346595e-05, "loss": 0.2445, "step": 30370 }, { "epoch": 2.460385612443292, "grad_norm": 0.06784668564796448, "learning_rate": 8.553490256087134e-05, "loss": 0.2371, "step": 30371 }, { "epoch": 2.460466623460791, "grad_norm": 0.07643197476863861, "learning_rate": 8.553040190827671e-05, "loss": 0.2522, "step": 30372 }, { "epoch": 2.460547634478289, "grad_norm": 0.08601364493370056, "learning_rate": 8.552590125568207e-05, "loss": 0.2702, "step": 30373 }, { "epoch": 2.4606286454957873, "grad_norm": 0.06803518533706665, "learning_rate": 8.552140060308746e-05, "loss": 0.2846, "step": 30374 }, { "epoch": 2.460709656513286, "grad_norm": 0.07338423281908035, "learning_rate": 8.551689995049283e-05, "loss": 0.2245, "step": 30375 }, { "epoch": 2.4607906675307842, "grad_norm": 0.05104734003543854, "learning_rate": 8.55123992978982e-05, "loss": 0.2611, "step": 30376 }, { "epoch": 2.4608716785482825, "grad_norm": 0.07619819790124893, "learning_rate": 8.550789864530358e-05, "loss": 0.2321, "step": 30377 }, { "epoch": 2.460952689565781, "grad_norm": 0.06153459474444389, "learning_rate": 8.550339799270895e-05, "loss": 0.2389, "step": 30378 }, { "epoch": 2.4610337005832794, "grad_norm": 0.0647321343421936, "learning_rate": 8.549889734011432e-05, "loss": 0.2187, "step": 30379 }, { "epoch": 2.4611147116007777, "grad_norm": 0.06784353405237198, "learning_rate": 8.54943966875197e-05, "loss": 0.2167, "step": 30380 }, { "epoch": 2.461195722618276, "grad_norm": 0.06753828376531601, "learning_rate": 8.548989603492507e-05, "loss": 0.2649, "step": 30381 }, { "epoch": 2.4612767336357746, "grad_norm": 0.07720842957496643, "learning_rate": 8.548539538233044e-05, "loss": 0.2522, "step": 30382 }, { "epoch": 2.461357744653273, "grad_norm": 0.07862772792577744, "learning_rate": 8.548089472973582e-05, "loss": 0.2818, "step": 30383 }, { "epoch": 2.461438755670771, "grad_norm": 0.08115944266319275, "learning_rate": 8.547639407714119e-05, "loss": 0.2641, "step": 30384 }, { "epoch": 2.4615197666882698, "grad_norm": 0.07155396789312363, "learning_rate": 8.547189342454657e-05, "loss": 0.2993, "step": 30385 }, { "epoch": 2.461600777705768, "grad_norm": 0.07185786217451096, "learning_rate": 8.546739277195194e-05, "loss": 0.2796, "step": 30386 }, { "epoch": 2.4616817887232663, "grad_norm": 0.06836527585983276, "learning_rate": 8.546289211935731e-05, "loss": 0.2409, "step": 30387 }, { "epoch": 2.4617627997407645, "grad_norm": 0.07136380672454834, "learning_rate": 8.545839146676269e-05, "loss": 0.25, "step": 30388 }, { "epoch": 2.461843810758263, "grad_norm": 0.07146602123975754, "learning_rate": 8.545389081416806e-05, "loss": 0.2616, "step": 30389 }, { "epoch": 2.4619248217757614, "grad_norm": 0.06611467897891998, "learning_rate": 8.544939016157343e-05, "loss": 0.2365, "step": 30390 }, { "epoch": 2.4620058327932597, "grad_norm": 0.06571775674819946, "learning_rate": 8.544488950897881e-05, "loss": 0.2619, "step": 30391 }, { "epoch": 2.4620868438107584, "grad_norm": 0.06344595551490784, "learning_rate": 8.544038885638418e-05, "loss": 0.2376, "step": 30392 }, { "epoch": 2.4621678548282566, "grad_norm": 0.06370608508586884, "learning_rate": 8.543588820378955e-05, "loss": 0.215, "step": 30393 }, { "epoch": 2.462248865845755, "grad_norm": 0.07206301391124725, "learning_rate": 8.543138755119493e-05, "loss": 0.2498, "step": 30394 }, { "epoch": 2.4623298768632536, "grad_norm": 0.06672500818967819, "learning_rate": 8.54268868986003e-05, "loss": 0.1975, "step": 30395 }, { "epoch": 2.462410887880752, "grad_norm": 0.05213450640439987, "learning_rate": 8.542238624600568e-05, "loss": 0.2409, "step": 30396 }, { "epoch": 2.46249189889825, "grad_norm": 0.07119280099868774, "learning_rate": 8.541788559341105e-05, "loss": 0.2497, "step": 30397 }, { "epoch": 2.4625729099157487, "grad_norm": 0.06520720571279526, "learning_rate": 8.541338494081642e-05, "loss": 0.2301, "step": 30398 }, { "epoch": 2.462653920933247, "grad_norm": 0.08692757040262222, "learning_rate": 8.54088842882218e-05, "loss": 0.2663, "step": 30399 }, { "epoch": 2.462734931950745, "grad_norm": 0.07218527793884277, "learning_rate": 8.540438363562717e-05, "loss": 0.241, "step": 30400 }, { "epoch": 2.462815942968244, "grad_norm": 0.07891545444726944, "learning_rate": 8.539988298303254e-05, "loss": 0.242, "step": 30401 }, { "epoch": 2.462896953985742, "grad_norm": 0.07669158279895782, "learning_rate": 8.539538233043792e-05, "loss": 0.2612, "step": 30402 }, { "epoch": 2.4629779650032404, "grad_norm": 0.06553692370653152, "learning_rate": 8.539088167784329e-05, "loss": 0.2627, "step": 30403 }, { "epoch": 2.4630589760207386, "grad_norm": 0.07745185494422913, "learning_rate": 8.538638102524866e-05, "loss": 0.2248, "step": 30404 }, { "epoch": 2.4631399870382373, "grad_norm": 0.07231428474187851, "learning_rate": 8.538188037265404e-05, "loss": 0.2218, "step": 30405 }, { "epoch": 2.4632209980557356, "grad_norm": 0.06869354099035263, "learning_rate": 8.537737972005941e-05, "loss": 0.2896, "step": 30406 }, { "epoch": 2.463302009073234, "grad_norm": 0.07032286375761032, "learning_rate": 8.537287906746478e-05, "loss": 0.2081, "step": 30407 }, { "epoch": 2.4633830200907325, "grad_norm": 0.06324692070484161, "learning_rate": 8.536837841487016e-05, "loss": 0.2233, "step": 30408 }, { "epoch": 2.4634640311082308, "grad_norm": 0.07106756418943405, "learning_rate": 8.536387776227553e-05, "loss": 0.2105, "step": 30409 }, { "epoch": 2.463545042125729, "grad_norm": 0.05563431605696678, "learning_rate": 8.53593771096809e-05, "loss": 0.1939, "step": 30410 }, { "epoch": 2.4636260531432272, "grad_norm": 0.06630561500787735, "learning_rate": 8.535487645708628e-05, "loss": 0.2427, "step": 30411 }, { "epoch": 2.463707064160726, "grad_norm": 0.06674076616764069, "learning_rate": 8.535037580449165e-05, "loss": 0.2506, "step": 30412 }, { "epoch": 2.463788075178224, "grad_norm": 0.07224856317043304, "learning_rate": 8.534587515189703e-05, "loss": 0.282, "step": 30413 }, { "epoch": 2.4638690861957224, "grad_norm": 0.05965695530176163, "learning_rate": 8.53413744993024e-05, "loss": 0.2581, "step": 30414 }, { "epoch": 2.463950097213221, "grad_norm": 0.057730019092559814, "learning_rate": 8.533687384670777e-05, "loss": 0.2325, "step": 30415 }, { "epoch": 2.4640311082307194, "grad_norm": 0.0637093186378479, "learning_rate": 8.533237319411315e-05, "loss": 0.2286, "step": 30416 }, { "epoch": 2.4641121192482176, "grad_norm": 0.06882121413946152, "learning_rate": 8.532787254151853e-05, "loss": 0.2783, "step": 30417 }, { "epoch": 2.4641931302657163, "grad_norm": 0.07381433993577957, "learning_rate": 8.53233718889239e-05, "loss": 0.2349, "step": 30418 }, { "epoch": 2.4642741412832145, "grad_norm": 0.058938298374414444, "learning_rate": 8.531887123632927e-05, "loss": 0.2514, "step": 30419 }, { "epoch": 2.464355152300713, "grad_norm": 0.07231079787015915, "learning_rate": 8.531437058373466e-05, "loss": 0.2605, "step": 30420 }, { "epoch": 2.4644361633182115, "grad_norm": 0.06277959793806076, "learning_rate": 8.530986993114002e-05, "loss": 0.2499, "step": 30421 }, { "epoch": 2.4645171743357097, "grad_norm": 0.07568095624446869, "learning_rate": 8.530536927854539e-05, "loss": 0.2548, "step": 30422 }, { "epoch": 2.464598185353208, "grad_norm": 0.06656137108802795, "learning_rate": 8.530086862595078e-05, "loss": 0.2307, "step": 30423 }, { "epoch": 2.4646791963707066, "grad_norm": 0.05642401799559593, "learning_rate": 8.529636797335614e-05, "loss": 0.228, "step": 30424 }, { "epoch": 2.464760207388205, "grad_norm": 0.07163457572460175, "learning_rate": 8.529186732076151e-05, "loss": 0.2378, "step": 30425 }, { "epoch": 2.464841218405703, "grad_norm": 0.06835125386714935, "learning_rate": 8.52873666681669e-05, "loss": 0.2362, "step": 30426 }, { "epoch": 2.4649222294232014, "grad_norm": 0.07255390286445618, "learning_rate": 8.528286601557226e-05, "loss": 0.2377, "step": 30427 }, { "epoch": 2.4650032404407, "grad_norm": 0.07115132361650467, "learning_rate": 8.527836536297764e-05, "loss": 0.2631, "step": 30428 }, { "epoch": 2.4650842514581983, "grad_norm": 0.05883652716875076, "learning_rate": 8.527386471038302e-05, "loss": 0.2244, "step": 30429 }, { "epoch": 2.4651652624756966, "grad_norm": 0.08018367737531662, "learning_rate": 8.526936405778838e-05, "loss": 0.2834, "step": 30430 }, { "epoch": 2.4652462734931953, "grad_norm": 0.07445010542869568, "learning_rate": 8.526486340519376e-05, "loss": 0.2497, "step": 30431 }, { "epoch": 2.4653272845106935, "grad_norm": 0.07131835073232651, "learning_rate": 8.526036275259914e-05, "loss": 0.2285, "step": 30432 }, { "epoch": 2.4654082955281917, "grad_norm": 0.07377203553915024, "learning_rate": 8.52558621000045e-05, "loss": 0.2694, "step": 30433 }, { "epoch": 2.46548930654569, "grad_norm": 0.059419889003038406, "learning_rate": 8.525136144740989e-05, "loss": 0.2168, "step": 30434 }, { "epoch": 2.4655703175631887, "grad_norm": 0.052533701062202454, "learning_rate": 8.524686079481526e-05, "loss": 0.2151, "step": 30435 }, { "epoch": 2.465651328580687, "grad_norm": 0.0678594559431076, "learning_rate": 8.524236014222062e-05, "loss": 0.2407, "step": 30436 }, { "epoch": 2.465732339598185, "grad_norm": 0.07487330585718155, "learning_rate": 8.5237859489626e-05, "loss": 0.2665, "step": 30437 }, { "epoch": 2.465813350615684, "grad_norm": 0.06742486357688904, "learning_rate": 8.523335883703138e-05, "loss": 0.2655, "step": 30438 }, { "epoch": 2.465894361633182, "grad_norm": 0.05089341476559639, "learning_rate": 8.522885818443674e-05, "loss": 0.2299, "step": 30439 }, { "epoch": 2.4659753726506803, "grad_norm": 0.05498534068465233, "learning_rate": 8.522435753184213e-05, "loss": 0.2017, "step": 30440 }, { "epoch": 2.466056383668179, "grad_norm": 0.06885070353746414, "learning_rate": 8.52198568792475e-05, "loss": 0.2298, "step": 30441 }, { "epoch": 2.4661373946856773, "grad_norm": 0.06844142079353333, "learning_rate": 8.521535622665286e-05, "loss": 0.2448, "step": 30442 }, { "epoch": 2.4662184057031755, "grad_norm": 0.08285916596651077, "learning_rate": 8.521085557405825e-05, "loss": 0.2514, "step": 30443 }, { "epoch": 2.466299416720674, "grad_norm": 0.06653977930545807, "learning_rate": 8.520635492146362e-05, "loss": 0.236, "step": 30444 }, { "epoch": 2.4663804277381725, "grad_norm": 0.06271621584892273, "learning_rate": 8.520185426886898e-05, "loss": 0.2182, "step": 30445 }, { "epoch": 2.4664614387556707, "grad_norm": 0.08903393894433975, "learning_rate": 8.519735361627437e-05, "loss": 0.2353, "step": 30446 }, { "epoch": 2.4665424497731694, "grad_norm": 0.06892696768045425, "learning_rate": 8.519285296367974e-05, "loss": 0.242, "step": 30447 }, { "epoch": 2.4666234607906676, "grad_norm": 0.06523766368627548, "learning_rate": 8.51883523110851e-05, "loss": 0.2128, "step": 30448 }, { "epoch": 2.466704471808166, "grad_norm": 0.06940148770809174, "learning_rate": 8.518385165849049e-05, "loss": 0.2851, "step": 30449 }, { "epoch": 2.466785482825664, "grad_norm": 0.07958599925041199, "learning_rate": 8.517935100589586e-05, "loss": 0.2569, "step": 30450 }, { "epoch": 2.466866493843163, "grad_norm": 0.080347441136837, "learning_rate": 8.517485035330122e-05, "loss": 0.2485, "step": 30451 }, { "epoch": 2.466947504860661, "grad_norm": 0.0917719230055809, "learning_rate": 8.517034970070661e-05, "loss": 0.2446, "step": 30452 }, { "epoch": 2.4670285158781593, "grad_norm": 0.07323457300662994, "learning_rate": 8.516584904811198e-05, "loss": 0.2448, "step": 30453 }, { "epoch": 2.467109526895658, "grad_norm": 0.08020184934139252, "learning_rate": 8.516134839551734e-05, "loss": 0.2403, "step": 30454 }, { "epoch": 2.4671905379131562, "grad_norm": 0.06991557776927948, "learning_rate": 8.515684774292273e-05, "loss": 0.2151, "step": 30455 }, { "epoch": 2.4672715489306545, "grad_norm": 0.0720125064253807, "learning_rate": 8.51523470903281e-05, "loss": 0.2741, "step": 30456 }, { "epoch": 2.4673525599481527, "grad_norm": 0.06054284796118736, "learning_rate": 8.514784643773348e-05, "loss": 0.2589, "step": 30457 }, { "epoch": 2.4674335709656514, "grad_norm": 0.061874501407146454, "learning_rate": 8.514334578513885e-05, "loss": 0.2394, "step": 30458 }, { "epoch": 2.4675145819831497, "grad_norm": 0.07197708636522293, "learning_rate": 8.513884513254423e-05, "loss": 0.2428, "step": 30459 }, { "epoch": 2.467595593000648, "grad_norm": 0.08322641998529434, "learning_rate": 8.51343444799496e-05, "loss": 0.2442, "step": 30460 }, { "epoch": 2.4676766040181466, "grad_norm": 0.06238508224487305, "learning_rate": 8.512984382735497e-05, "loss": 0.27, "step": 30461 }, { "epoch": 2.467757615035645, "grad_norm": 0.055403050035238266, "learning_rate": 8.512534317476035e-05, "loss": 0.2421, "step": 30462 }, { "epoch": 2.467838626053143, "grad_norm": 0.07716569304466248, "learning_rate": 8.512084252216572e-05, "loss": 0.2726, "step": 30463 }, { "epoch": 2.4679196370706418, "grad_norm": 0.0678950846195221, "learning_rate": 8.51163418695711e-05, "loss": 0.2568, "step": 30464 }, { "epoch": 2.46800064808814, "grad_norm": 0.07370582222938538, "learning_rate": 8.511184121697647e-05, "loss": 0.2735, "step": 30465 }, { "epoch": 2.4680816591056383, "grad_norm": 0.06295979768037796, "learning_rate": 8.510734056438184e-05, "loss": 0.239, "step": 30466 }, { "epoch": 2.468162670123137, "grad_norm": 0.07194853574037552, "learning_rate": 8.510283991178721e-05, "loss": 0.24, "step": 30467 }, { "epoch": 2.468243681140635, "grad_norm": 0.0788399875164032, "learning_rate": 8.509833925919259e-05, "loss": 0.2444, "step": 30468 }, { "epoch": 2.4683246921581334, "grad_norm": 0.06429265439510345, "learning_rate": 8.509383860659796e-05, "loss": 0.241, "step": 30469 }, { "epoch": 2.468405703175632, "grad_norm": 0.05816531553864479, "learning_rate": 8.508933795400334e-05, "loss": 0.2041, "step": 30470 }, { "epoch": 2.4684867141931304, "grad_norm": 0.058793339878320694, "learning_rate": 8.508483730140871e-05, "loss": 0.2095, "step": 30471 }, { "epoch": 2.4685677252106286, "grad_norm": 0.07979535311460495, "learning_rate": 8.508033664881408e-05, "loss": 0.2869, "step": 30472 }, { "epoch": 2.468648736228127, "grad_norm": 0.060868628323078156, "learning_rate": 8.507583599621946e-05, "loss": 0.2525, "step": 30473 }, { "epoch": 2.4687297472456255, "grad_norm": 0.0653449222445488, "learning_rate": 8.507133534362483e-05, "loss": 0.2202, "step": 30474 }, { "epoch": 2.468810758263124, "grad_norm": 0.07391634583473206, "learning_rate": 8.50668346910302e-05, "loss": 0.2511, "step": 30475 }, { "epoch": 2.468891769280622, "grad_norm": 0.06315775960683823, "learning_rate": 8.506233403843558e-05, "loss": 0.2647, "step": 30476 }, { "epoch": 2.4689727802981207, "grad_norm": 0.08127032220363617, "learning_rate": 8.505783338584095e-05, "loss": 0.2974, "step": 30477 }, { "epoch": 2.469053791315619, "grad_norm": 0.07029463350772858, "learning_rate": 8.505333273324632e-05, "loss": 0.234, "step": 30478 }, { "epoch": 2.469134802333117, "grad_norm": 0.08047143369913101, "learning_rate": 8.50488320806517e-05, "loss": 0.2738, "step": 30479 }, { "epoch": 2.4692158133506155, "grad_norm": 0.07719200104475021, "learning_rate": 8.504433142805707e-05, "loss": 0.2413, "step": 30480 }, { "epoch": 2.469296824368114, "grad_norm": 0.08738135546445847, "learning_rate": 8.503983077546244e-05, "loss": 0.2801, "step": 30481 }, { "epoch": 2.4693778353856124, "grad_norm": 0.06579606235027313, "learning_rate": 8.503533012286782e-05, "loss": 0.2395, "step": 30482 }, { "epoch": 2.4694588464031106, "grad_norm": 0.06997605413198471, "learning_rate": 8.503082947027319e-05, "loss": 0.2696, "step": 30483 }, { "epoch": 2.4695398574206093, "grad_norm": 0.07098864018917084, "learning_rate": 8.502632881767857e-05, "loss": 0.2248, "step": 30484 }, { "epoch": 2.4696208684381076, "grad_norm": 0.06740034371614456, "learning_rate": 8.502182816508394e-05, "loss": 0.2787, "step": 30485 }, { "epoch": 2.469701879455606, "grad_norm": 0.0740785151720047, "learning_rate": 8.501732751248933e-05, "loss": 0.2315, "step": 30486 }, { "epoch": 2.4697828904731045, "grad_norm": 0.09051207453012466, "learning_rate": 8.501282685989469e-05, "loss": 0.277, "step": 30487 }, { "epoch": 2.4698639014906028, "grad_norm": 0.06422457098960876, "learning_rate": 8.500832620730006e-05, "loss": 0.232, "step": 30488 }, { "epoch": 2.469944912508101, "grad_norm": 0.0801544114947319, "learning_rate": 8.500382555470545e-05, "loss": 0.3011, "step": 30489 }, { "epoch": 2.4700259235255997, "grad_norm": 0.055484507232904434, "learning_rate": 8.499932490211081e-05, "loss": 0.2941, "step": 30490 }, { "epoch": 2.470106934543098, "grad_norm": 0.06433779746294022, "learning_rate": 8.499482424951618e-05, "loss": 0.1931, "step": 30491 }, { "epoch": 2.470187945560596, "grad_norm": 0.06803855299949646, "learning_rate": 8.499032359692157e-05, "loss": 0.217, "step": 30492 }, { "epoch": 2.470268956578095, "grad_norm": 0.0670522153377533, "learning_rate": 8.498582294432693e-05, "loss": 0.2807, "step": 30493 }, { "epoch": 2.470349967595593, "grad_norm": 0.07295812666416168, "learning_rate": 8.49813222917323e-05, "loss": 0.2891, "step": 30494 }, { "epoch": 2.4704309786130914, "grad_norm": 0.06234823167324066, "learning_rate": 8.497682163913769e-05, "loss": 0.252, "step": 30495 }, { "epoch": 2.4705119896305896, "grad_norm": 0.06830089539289474, "learning_rate": 8.497232098654305e-05, "loss": 0.2569, "step": 30496 }, { "epoch": 2.4705930006480883, "grad_norm": 0.06779050827026367, "learning_rate": 8.496782033394842e-05, "loss": 0.2216, "step": 30497 }, { "epoch": 2.4706740116655865, "grad_norm": 0.0621131956577301, "learning_rate": 8.496331968135381e-05, "loss": 0.2374, "step": 30498 }, { "epoch": 2.470755022683085, "grad_norm": 0.06830302625894547, "learning_rate": 8.495881902875917e-05, "loss": 0.2565, "step": 30499 }, { "epoch": 2.4708360337005835, "grad_norm": 0.05653122067451477, "learning_rate": 8.495431837616454e-05, "loss": 0.2369, "step": 30500 }, { "epoch": 2.4709170447180817, "grad_norm": 0.05332133173942566, "learning_rate": 8.494981772356993e-05, "loss": 0.2493, "step": 30501 }, { "epoch": 2.47099805573558, "grad_norm": 0.06842204183340073, "learning_rate": 8.494531707097529e-05, "loss": 0.2659, "step": 30502 }, { "epoch": 2.471079066753078, "grad_norm": 0.061891455203294754, "learning_rate": 8.494081641838066e-05, "loss": 0.2596, "step": 30503 }, { "epoch": 2.471160077770577, "grad_norm": 0.048567235469818115, "learning_rate": 8.493631576578605e-05, "loss": 0.203, "step": 30504 }, { "epoch": 2.471241088788075, "grad_norm": 0.061536628752946854, "learning_rate": 8.493181511319141e-05, "loss": 0.2194, "step": 30505 }, { "epoch": 2.4713220998055734, "grad_norm": 0.056444212794303894, "learning_rate": 8.492731446059679e-05, "loss": 0.2044, "step": 30506 }, { "epoch": 2.471403110823072, "grad_norm": 0.06512252241373062, "learning_rate": 8.492281380800217e-05, "loss": 0.2332, "step": 30507 }, { "epoch": 2.4714841218405703, "grad_norm": 0.05856655165553093, "learning_rate": 8.491831315540753e-05, "loss": 0.2205, "step": 30508 }, { "epoch": 2.4715651328580686, "grad_norm": 0.07192254066467285, "learning_rate": 8.491381250281292e-05, "loss": 0.2276, "step": 30509 }, { "epoch": 2.4716461438755672, "grad_norm": 0.07527060806751251, "learning_rate": 8.490931185021829e-05, "loss": 0.2525, "step": 30510 }, { "epoch": 2.4717271548930655, "grad_norm": 0.06652418524026871, "learning_rate": 8.490481119762365e-05, "loss": 0.2377, "step": 30511 }, { "epoch": 2.4718081659105637, "grad_norm": 0.0621594674885273, "learning_rate": 8.490031054502904e-05, "loss": 0.2076, "step": 30512 }, { "epoch": 2.4718891769280624, "grad_norm": 0.07410325109958649, "learning_rate": 8.489580989243441e-05, "loss": 0.2559, "step": 30513 }, { "epoch": 2.4719701879455607, "grad_norm": 0.06433682143688202, "learning_rate": 8.489130923983977e-05, "loss": 0.2071, "step": 30514 }, { "epoch": 2.472051198963059, "grad_norm": 0.06185179203748703, "learning_rate": 8.488680858724516e-05, "loss": 0.2434, "step": 30515 }, { "epoch": 2.4721322099805576, "grad_norm": 0.06650276482105255, "learning_rate": 8.488230793465053e-05, "loss": 0.253, "step": 30516 }, { "epoch": 2.472213220998056, "grad_norm": 0.05839819461107254, "learning_rate": 8.48778072820559e-05, "loss": 0.2209, "step": 30517 }, { "epoch": 2.472294232015554, "grad_norm": 0.05731014162302017, "learning_rate": 8.487330662946128e-05, "loss": 0.2107, "step": 30518 }, { "epoch": 2.4723752430330523, "grad_norm": 0.08695816248655319, "learning_rate": 8.486880597686666e-05, "loss": 0.2402, "step": 30519 }, { "epoch": 2.472456254050551, "grad_norm": 0.060433726757764816, "learning_rate": 8.486430532427202e-05, "loss": 0.236, "step": 30520 }, { "epoch": 2.4725372650680493, "grad_norm": 0.060748256742954254, "learning_rate": 8.48598046716774e-05, "loss": 0.2119, "step": 30521 }, { "epoch": 2.4726182760855475, "grad_norm": 0.07171925157308578, "learning_rate": 8.485530401908278e-05, "loss": 0.286, "step": 30522 }, { "epoch": 2.472699287103046, "grad_norm": 0.053237102925777435, "learning_rate": 8.485080336648814e-05, "loss": 0.1828, "step": 30523 }, { "epoch": 2.4727802981205445, "grad_norm": 0.06715618073940277, "learning_rate": 8.484630271389352e-05, "loss": 0.2562, "step": 30524 }, { "epoch": 2.4728613091380427, "grad_norm": 0.06880155205726624, "learning_rate": 8.48418020612989e-05, "loss": 0.2556, "step": 30525 }, { "epoch": 2.472942320155541, "grad_norm": 0.06980689615011215, "learning_rate": 8.483730140870426e-05, "loss": 0.2443, "step": 30526 }, { "epoch": 2.4730233311730396, "grad_norm": 0.06468836963176727, "learning_rate": 8.483280075610964e-05, "loss": 0.2377, "step": 30527 }, { "epoch": 2.473104342190538, "grad_norm": 0.07451368868350983, "learning_rate": 8.482830010351502e-05, "loss": 0.2622, "step": 30528 }, { "epoch": 2.473185353208036, "grad_norm": 0.07759378850460052, "learning_rate": 8.482379945092038e-05, "loss": 0.2815, "step": 30529 }, { "epoch": 2.473266364225535, "grad_norm": 0.0744905173778534, "learning_rate": 8.481929879832577e-05, "loss": 0.2696, "step": 30530 }, { "epoch": 2.473347375243033, "grad_norm": 0.06338359415531158, "learning_rate": 8.481479814573114e-05, "loss": 0.2335, "step": 30531 }, { "epoch": 2.4734283862605313, "grad_norm": 0.061391785740852356, "learning_rate": 8.48102974931365e-05, "loss": 0.2507, "step": 30532 }, { "epoch": 2.47350939727803, "grad_norm": 0.05529424548149109, "learning_rate": 8.480579684054189e-05, "loss": 0.2195, "step": 30533 }, { "epoch": 2.4735904082955282, "grad_norm": 0.06360072642564774, "learning_rate": 8.480129618794726e-05, "loss": 0.2163, "step": 30534 }, { "epoch": 2.4736714193130265, "grad_norm": 0.08186694234609604, "learning_rate": 8.479679553535263e-05, "loss": 0.2304, "step": 30535 }, { "epoch": 2.473752430330525, "grad_norm": 0.06562604010105133, "learning_rate": 8.4792294882758e-05, "loss": 0.2797, "step": 30536 }, { "epoch": 2.4738334413480234, "grad_norm": 0.07563050836324692, "learning_rate": 8.478779423016338e-05, "loss": 0.2963, "step": 30537 }, { "epoch": 2.4739144523655217, "grad_norm": 0.0658164918422699, "learning_rate": 8.478329357756875e-05, "loss": 0.2575, "step": 30538 }, { "epoch": 2.47399546338302, "grad_norm": 0.07443150132894516, "learning_rate": 8.477879292497413e-05, "loss": 0.2333, "step": 30539 }, { "epoch": 2.4740764744005186, "grad_norm": 0.06861604005098343, "learning_rate": 8.47742922723795e-05, "loss": 0.2373, "step": 30540 }, { "epoch": 2.474157485418017, "grad_norm": 0.07418636232614517, "learning_rate": 8.476979161978487e-05, "loss": 0.2265, "step": 30541 }, { "epoch": 2.474238496435515, "grad_norm": 0.06815627962350845, "learning_rate": 8.476529096719025e-05, "loss": 0.2377, "step": 30542 }, { "epoch": 2.4743195074530138, "grad_norm": 0.06877239048480988, "learning_rate": 8.476079031459562e-05, "loss": 0.2037, "step": 30543 }, { "epoch": 2.474400518470512, "grad_norm": 0.0606442354619503, "learning_rate": 8.4756289662001e-05, "loss": 0.2412, "step": 30544 }, { "epoch": 2.4744815294880103, "grad_norm": 0.0652085393667221, "learning_rate": 8.475178900940637e-05, "loss": 0.2045, "step": 30545 }, { "epoch": 2.4745625405055085, "grad_norm": 0.07037603855133057, "learning_rate": 8.474728835681174e-05, "loss": 0.2213, "step": 30546 }, { "epoch": 2.474643551523007, "grad_norm": 0.06558571755886078, "learning_rate": 8.474278770421712e-05, "loss": 0.2443, "step": 30547 }, { "epoch": 2.4747245625405054, "grad_norm": 0.07204144448041916, "learning_rate": 8.473828705162249e-05, "loss": 0.2457, "step": 30548 }, { "epoch": 2.4748055735580037, "grad_norm": 0.08189809322357178, "learning_rate": 8.473378639902786e-05, "loss": 0.2156, "step": 30549 }, { "epoch": 2.4748865845755024, "grad_norm": 0.0684322714805603, "learning_rate": 8.472928574643324e-05, "loss": 0.2378, "step": 30550 }, { "epoch": 2.4749675955930006, "grad_norm": 0.07303576916456223, "learning_rate": 8.472478509383861e-05, "loss": 0.2454, "step": 30551 }, { "epoch": 2.475048606610499, "grad_norm": 0.07292795181274414, "learning_rate": 8.472028444124398e-05, "loss": 0.2673, "step": 30552 }, { "epoch": 2.4751296176279975, "grad_norm": 0.06463218480348587, "learning_rate": 8.471578378864936e-05, "loss": 0.2719, "step": 30553 }, { "epoch": 2.475210628645496, "grad_norm": 0.08307449519634247, "learning_rate": 8.471128313605473e-05, "loss": 0.256, "step": 30554 }, { "epoch": 2.475291639662994, "grad_norm": 0.05798949673771858, "learning_rate": 8.47067824834601e-05, "loss": 0.2702, "step": 30555 }, { "epoch": 2.4753726506804927, "grad_norm": 0.07065913081169128, "learning_rate": 8.470228183086548e-05, "loss": 0.2602, "step": 30556 }, { "epoch": 2.475453661697991, "grad_norm": 0.08354199677705765, "learning_rate": 8.469778117827085e-05, "loss": 0.2517, "step": 30557 }, { "epoch": 2.475534672715489, "grad_norm": 0.05542146787047386, "learning_rate": 8.469328052567623e-05, "loss": 0.2171, "step": 30558 }, { "epoch": 2.475615683732988, "grad_norm": 0.07074569165706635, "learning_rate": 8.46887798730816e-05, "loss": 0.2834, "step": 30559 }, { "epoch": 2.475696694750486, "grad_norm": 0.07820092886686325, "learning_rate": 8.468427922048697e-05, "loss": 0.2285, "step": 30560 }, { "epoch": 2.4757777057679844, "grad_norm": 0.059474945068359375, "learning_rate": 8.467977856789236e-05, "loss": 0.2126, "step": 30561 }, { "epoch": 2.4758587167854826, "grad_norm": 0.06978403031826019, "learning_rate": 8.467527791529772e-05, "loss": 0.2501, "step": 30562 }, { "epoch": 2.4759397278029813, "grad_norm": 0.07842637598514557, "learning_rate": 8.46707772627031e-05, "loss": 0.2466, "step": 30563 }, { "epoch": 2.4760207388204796, "grad_norm": 0.0689956545829773, "learning_rate": 8.466627661010848e-05, "loss": 0.2547, "step": 30564 }, { "epoch": 2.476101749837978, "grad_norm": 0.07924441993236542, "learning_rate": 8.466177595751384e-05, "loss": 0.2389, "step": 30565 }, { "epoch": 2.4761827608554765, "grad_norm": 0.06799956411123276, "learning_rate": 8.465727530491921e-05, "loss": 0.236, "step": 30566 }, { "epoch": 2.4762637718729748, "grad_norm": 0.07037723064422607, "learning_rate": 8.46527746523246e-05, "loss": 0.2611, "step": 30567 }, { "epoch": 2.476344782890473, "grad_norm": 0.05896992236375809, "learning_rate": 8.464827399972996e-05, "loss": 0.2478, "step": 30568 }, { "epoch": 2.4764257939079712, "grad_norm": 0.06081313639879227, "learning_rate": 8.464377334713534e-05, "loss": 0.2164, "step": 30569 }, { "epoch": 2.47650680492547, "grad_norm": 0.06605610251426697, "learning_rate": 8.463927269454072e-05, "loss": 0.2315, "step": 30570 }, { "epoch": 2.476587815942968, "grad_norm": 0.06241413950920105, "learning_rate": 8.463477204194608e-05, "loss": 0.2517, "step": 30571 }, { "epoch": 2.4766688269604664, "grad_norm": 0.0740523412823677, "learning_rate": 8.463027138935146e-05, "loss": 0.2226, "step": 30572 }, { "epoch": 2.476749837977965, "grad_norm": 0.06345130503177643, "learning_rate": 8.462577073675684e-05, "loss": 0.2541, "step": 30573 }, { "epoch": 2.4768308489954634, "grad_norm": 0.07574643939733505, "learning_rate": 8.46212700841622e-05, "loss": 0.2631, "step": 30574 }, { "epoch": 2.4769118600129616, "grad_norm": 0.06299690157175064, "learning_rate": 8.461676943156758e-05, "loss": 0.2358, "step": 30575 }, { "epoch": 2.4769928710304603, "grad_norm": 0.07480761408805847, "learning_rate": 8.461226877897296e-05, "loss": 0.2674, "step": 30576 }, { "epoch": 2.4770738820479585, "grad_norm": 0.0540340431034565, "learning_rate": 8.460776812637832e-05, "loss": 0.227, "step": 30577 }, { "epoch": 2.4771548930654568, "grad_norm": 0.07527433335781097, "learning_rate": 8.46032674737837e-05, "loss": 0.2709, "step": 30578 }, { "epoch": 2.4772359040829555, "grad_norm": 0.06662973016500473, "learning_rate": 8.459876682118909e-05, "loss": 0.2107, "step": 30579 }, { "epoch": 2.4773169151004537, "grad_norm": 0.052600983530282974, "learning_rate": 8.459426616859445e-05, "loss": 0.2453, "step": 30580 }, { "epoch": 2.477397926117952, "grad_norm": 0.06754674017429352, "learning_rate": 8.458976551599982e-05, "loss": 0.2646, "step": 30581 }, { "epoch": 2.4774789371354506, "grad_norm": 0.07289335131645203, "learning_rate": 8.45852648634052e-05, "loss": 0.2732, "step": 30582 }, { "epoch": 2.477559948152949, "grad_norm": 0.061072248965501785, "learning_rate": 8.458076421081057e-05, "loss": 0.1947, "step": 30583 }, { "epoch": 2.477640959170447, "grad_norm": 0.07370416820049286, "learning_rate": 8.457626355821594e-05, "loss": 0.2897, "step": 30584 }, { "epoch": 2.4777219701879454, "grad_norm": 0.06480906158685684, "learning_rate": 8.457176290562133e-05, "loss": 0.2679, "step": 30585 }, { "epoch": 2.477802981205444, "grad_norm": 0.05878300592303276, "learning_rate": 8.456726225302669e-05, "loss": 0.2373, "step": 30586 }, { "epoch": 2.4778839922229423, "grad_norm": 0.07398363202810287, "learning_rate": 8.456276160043207e-05, "loss": 0.2447, "step": 30587 }, { "epoch": 2.4779650032404406, "grad_norm": 0.07146541774272919, "learning_rate": 8.455826094783745e-05, "loss": 0.2513, "step": 30588 }, { "epoch": 2.4780460142579392, "grad_norm": 0.07220860570669174, "learning_rate": 8.455376029524281e-05, "loss": 0.2173, "step": 30589 }, { "epoch": 2.4781270252754375, "grad_norm": 0.08894750475883484, "learning_rate": 8.45492596426482e-05, "loss": 0.2895, "step": 30590 }, { "epoch": 2.4782080362929357, "grad_norm": 0.06148972734808922, "learning_rate": 8.454475899005357e-05, "loss": 0.2457, "step": 30591 }, { "epoch": 2.478289047310434, "grad_norm": 0.07154235243797302, "learning_rate": 8.454025833745893e-05, "loss": 0.2492, "step": 30592 }, { "epoch": 2.4783700583279327, "grad_norm": 0.06511364877223969, "learning_rate": 8.453575768486432e-05, "loss": 0.2275, "step": 30593 }, { "epoch": 2.478451069345431, "grad_norm": 0.051312949508428574, "learning_rate": 8.453125703226969e-05, "loss": 0.2409, "step": 30594 }, { "epoch": 2.478532080362929, "grad_norm": 0.06297353655099869, "learning_rate": 8.452675637967505e-05, "loss": 0.214, "step": 30595 }, { "epoch": 2.478613091380428, "grad_norm": 0.059804175049066544, "learning_rate": 8.452225572708044e-05, "loss": 0.212, "step": 30596 }, { "epoch": 2.478694102397926, "grad_norm": 0.06705871969461441, "learning_rate": 8.451775507448581e-05, "loss": 0.244, "step": 30597 }, { "epoch": 2.4787751134154243, "grad_norm": 0.06258346140384674, "learning_rate": 8.451325442189117e-05, "loss": 0.2943, "step": 30598 }, { "epoch": 2.478856124432923, "grad_norm": 0.0627899020910263, "learning_rate": 8.450875376929656e-05, "loss": 0.232, "step": 30599 }, { "epoch": 2.4789371354504213, "grad_norm": 0.0678204819560051, "learning_rate": 8.450425311670193e-05, "loss": 0.229, "step": 30600 }, { "epoch": 2.4790181464679195, "grad_norm": 0.05785902217030525, "learning_rate": 8.449975246410729e-05, "loss": 0.2413, "step": 30601 }, { "epoch": 2.479099157485418, "grad_norm": 0.058222945779561996, "learning_rate": 8.449525181151268e-05, "loss": 0.2707, "step": 30602 }, { "epoch": 2.4791801685029164, "grad_norm": 0.06798720359802246, "learning_rate": 8.449075115891805e-05, "loss": 0.2509, "step": 30603 }, { "epoch": 2.4792611795204147, "grad_norm": 0.061136022210121155, "learning_rate": 8.448625050632341e-05, "loss": 0.2303, "step": 30604 }, { "epoch": 2.4793421905379134, "grad_norm": 0.07287042587995529, "learning_rate": 8.44817498537288e-05, "loss": 0.2456, "step": 30605 }, { "epoch": 2.4794232015554116, "grad_norm": 0.07112374156713486, "learning_rate": 8.447724920113417e-05, "loss": 0.2289, "step": 30606 }, { "epoch": 2.47950421257291, "grad_norm": 0.06856828182935715, "learning_rate": 8.447274854853953e-05, "loss": 0.3085, "step": 30607 }, { "epoch": 2.479585223590408, "grad_norm": 0.05687296390533447, "learning_rate": 8.446824789594492e-05, "loss": 0.2255, "step": 30608 }, { "epoch": 2.479666234607907, "grad_norm": 0.06885094195604324, "learning_rate": 8.44637472433503e-05, "loss": 0.2761, "step": 30609 }, { "epoch": 2.479747245625405, "grad_norm": 0.05651099607348442, "learning_rate": 8.445924659075565e-05, "loss": 0.233, "step": 30610 }, { "epoch": 2.4798282566429033, "grad_norm": 0.08474230021238327, "learning_rate": 8.445474593816104e-05, "loss": 0.2556, "step": 30611 }, { "epoch": 2.479909267660402, "grad_norm": 0.06992295384407043, "learning_rate": 8.445024528556641e-05, "loss": 0.2496, "step": 30612 }, { "epoch": 2.4799902786779002, "grad_norm": 0.05965316668152809, "learning_rate": 8.444574463297179e-05, "loss": 0.2151, "step": 30613 }, { "epoch": 2.4800712896953985, "grad_norm": 0.07484708726406097, "learning_rate": 8.444124398037716e-05, "loss": 0.2451, "step": 30614 }, { "epoch": 2.4801523007128967, "grad_norm": 0.07535053789615631, "learning_rate": 8.443674332778253e-05, "loss": 0.2582, "step": 30615 }, { "epoch": 2.4802333117303954, "grad_norm": 0.0784064307808876, "learning_rate": 8.443224267518791e-05, "loss": 0.2701, "step": 30616 }, { "epoch": 2.4803143227478937, "grad_norm": 0.06548595428466797, "learning_rate": 8.442774202259328e-05, "loss": 0.2358, "step": 30617 }, { "epoch": 2.480395333765392, "grad_norm": 0.08438055962324142, "learning_rate": 8.442324136999866e-05, "loss": 0.2528, "step": 30618 }, { "epoch": 2.4804763447828906, "grad_norm": 0.06295493245124817, "learning_rate": 8.441874071740403e-05, "loss": 0.2414, "step": 30619 }, { "epoch": 2.480557355800389, "grad_norm": 0.06215086951851845, "learning_rate": 8.44142400648094e-05, "loss": 0.2201, "step": 30620 }, { "epoch": 2.480638366817887, "grad_norm": 0.04356473311781883, "learning_rate": 8.440973941221478e-05, "loss": 0.2121, "step": 30621 }, { "epoch": 2.4807193778353858, "grad_norm": 0.06062763184309006, "learning_rate": 8.440523875962015e-05, "loss": 0.2442, "step": 30622 }, { "epoch": 2.480800388852884, "grad_norm": 0.07249461859464645, "learning_rate": 8.440073810702552e-05, "loss": 0.2531, "step": 30623 }, { "epoch": 2.4808813998703823, "grad_norm": 0.0566757470369339, "learning_rate": 8.43962374544309e-05, "loss": 0.2855, "step": 30624 }, { "epoch": 2.480962410887881, "grad_norm": 0.05951603129506111, "learning_rate": 8.439173680183627e-05, "loss": 0.2121, "step": 30625 }, { "epoch": 2.481043421905379, "grad_norm": 0.06192256510257721, "learning_rate": 8.438723614924164e-05, "loss": 0.1961, "step": 30626 }, { "epoch": 2.4811244329228774, "grad_norm": 0.06632743030786514, "learning_rate": 8.438273549664702e-05, "loss": 0.2504, "step": 30627 }, { "epoch": 2.481205443940376, "grad_norm": 0.05739384517073631, "learning_rate": 8.437823484405239e-05, "loss": 0.2301, "step": 30628 }, { "epoch": 2.4812864549578744, "grad_norm": 0.052155740559101105, "learning_rate": 8.437373419145777e-05, "loss": 0.2313, "step": 30629 }, { "epoch": 2.4813674659753726, "grad_norm": 0.07894574850797653, "learning_rate": 8.436923353886314e-05, "loss": 0.2343, "step": 30630 }, { "epoch": 2.481448476992871, "grad_norm": 0.07149939984083176, "learning_rate": 8.436473288626851e-05, "loss": 0.2414, "step": 30631 }, { "epoch": 2.4815294880103695, "grad_norm": 0.0677579790353775, "learning_rate": 8.436023223367389e-05, "loss": 0.2401, "step": 30632 }, { "epoch": 2.481610499027868, "grad_norm": 0.07865419238805771, "learning_rate": 8.435573158107926e-05, "loss": 0.265, "step": 30633 }, { "epoch": 2.481691510045366, "grad_norm": 0.07887757569551468, "learning_rate": 8.435123092848463e-05, "loss": 0.2649, "step": 30634 }, { "epoch": 2.4817725210628647, "grad_norm": 0.06781040132045746, "learning_rate": 8.434673027589001e-05, "loss": 0.245, "step": 30635 }, { "epoch": 2.481853532080363, "grad_norm": 0.07102234661579132, "learning_rate": 8.434222962329538e-05, "loss": 0.2769, "step": 30636 }, { "epoch": 2.481934543097861, "grad_norm": 0.05997258052229881, "learning_rate": 8.433772897070075e-05, "loss": 0.2127, "step": 30637 }, { "epoch": 2.4820155541153595, "grad_norm": 0.06905551999807358, "learning_rate": 8.433322831810613e-05, "loss": 0.2399, "step": 30638 }, { "epoch": 2.482096565132858, "grad_norm": 0.0653391182422638, "learning_rate": 8.43287276655115e-05, "loss": 0.2442, "step": 30639 }, { "epoch": 2.4821775761503564, "grad_norm": 0.0679607167840004, "learning_rate": 8.432422701291688e-05, "loss": 0.2498, "step": 30640 }, { "epoch": 2.4822585871678546, "grad_norm": 0.07271024584770203, "learning_rate": 8.431972636032225e-05, "loss": 0.2263, "step": 30641 }, { "epoch": 2.4823395981853533, "grad_norm": 0.0782843753695488, "learning_rate": 8.431522570772764e-05, "loss": 0.2853, "step": 30642 }, { "epoch": 2.4824206092028516, "grad_norm": 0.06645394116640091, "learning_rate": 8.4310725055133e-05, "loss": 0.2078, "step": 30643 }, { "epoch": 2.48250162022035, "grad_norm": 0.06782057881355286, "learning_rate": 8.430622440253837e-05, "loss": 0.2438, "step": 30644 }, { "epoch": 2.4825826312378485, "grad_norm": 0.06983155757188797, "learning_rate": 8.430172374994376e-05, "loss": 0.2171, "step": 30645 }, { "epoch": 2.4826636422553467, "grad_norm": 0.06148098036646843, "learning_rate": 8.429722309734912e-05, "loss": 0.2539, "step": 30646 }, { "epoch": 2.482744653272845, "grad_norm": 0.062350235879421234, "learning_rate": 8.429272244475449e-05, "loss": 0.253, "step": 30647 }, { "epoch": 2.4828256642903437, "grad_norm": 0.08074339479207993, "learning_rate": 8.428822179215988e-05, "loss": 0.2768, "step": 30648 }, { "epoch": 2.482906675307842, "grad_norm": 0.061865705996751785, "learning_rate": 8.428372113956524e-05, "loss": 0.2295, "step": 30649 }, { "epoch": 2.48298768632534, "grad_norm": 0.06636541336774826, "learning_rate": 8.427922048697061e-05, "loss": 0.2492, "step": 30650 }, { "epoch": 2.483068697342839, "grad_norm": 0.05307980999350548, "learning_rate": 8.4274719834376e-05, "loss": 0.1934, "step": 30651 }, { "epoch": 2.483149708360337, "grad_norm": 0.06762948632240295, "learning_rate": 8.427021918178136e-05, "loss": 0.2117, "step": 30652 }, { "epoch": 2.4832307193778353, "grad_norm": 0.05977749824523926, "learning_rate": 8.426571852918673e-05, "loss": 0.242, "step": 30653 }, { "epoch": 2.4833117303953336, "grad_norm": 0.07046031951904297, "learning_rate": 8.426121787659212e-05, "loss": 0.2485, "step": 30654 }, { "epoch": 2.4833927414128323, "grad_norm": 0.06229454278945923, "learning_rate": 8.425671722399748e-05, "loss": 0.2353, "step": 30655 }, { "epoch": 2.4834737524303305, "grad_norm": 0.07501602172851562, "learning_rate": 8.425221657140285e-05, "loss": 0.2711, "step": 30656 }, { "epoch": 2.4835547634478288, "grad_norm": 0.0901978611946106, "learning_rate": 8.424771591880824e-05, "loss": 0.2729, "step": 30657 }, { "epoch": 2.4836357744653275, "grad_norm": 0.07065119594335556, "learning_rate": 8.42432152662136e-05, "loss": 0.2416, "step": 30658 }, { "epoch": 2.4837167854828257, "grad_norm": 0.07169196754693985, "learning_rate": 8.423871461361897e-05, "loss": 0.2981, "step": 30659 }, { "epoch": 2.483797796500324, "grad_norm": 0.06765072047710419, "learning_rate": 8.423421396102436e-05, "loss": 0.2378, "step": 30660 }, { "epoch": 2.483878807517822, "grad_norm": 0.07858853787183762, "learning_rate": 8.422971330842972e-05, "loss": 0.2106, "step": 30661 }, { "epoch": 2.483959818535321, "grad_norm": 0.06337659806013107, "learning_rate": 8.42252126558351e-05, "loss": 0.271, "step": 30662 }, { "epoch": 2.484040829552819, "grad_norm": 0.059507403522729874, "learning_rate": 8.422071200324048e-05, "loss": 0.2453, "step": 30663 }, { "epoch": 2.4841218405703174, "grad_norm": 0.06476711481809616, "learning_rate": 8.421621135064584e-05, "loss": 0.2628, "step": 30664 }, { "epoch": 2.484202851587816, "grad_norm": 0.060637783259153366, "learning_rate": 8.421171069805122e-05, "loss": 0.2652, "step": 30665 }, { "epoch": 2.4842838626053143, "grad_norm": 0.07929336279630661, "learning_rate": 8.42072100454566e-05, "loss": 0.2567, "step": 30666 }, { "epoch": 2.4843648736228126, "grad_norm": 0.07002262771129608, "learning_rate": 8.420270939286196e-05, "loss": 0.2426, "step": 30667 }, { "epoch": 2.4844458846403112, "grad_norm": 0.0657939612865448, "learning_rate": 8.419820874026735e-05, "loss": 0.2328, "step": 30668 }, { "epoch": 2.4845268956578095, "grad_norm": 0.06577887386083603, "learning_rate": 8.419370808767272e-05, "loss": 0.2103, "step": 30669 }, { "epoch": 2.4846079066753077, "grad_norm": 0.057862211018800735, "learning_rate": 8.418920743507808e-05, "loss": 0.2646, "step": 30670 }, { "epoch": 2.4846889176928064, "grad_norm": 0.06161754950881004, "learning_rate": 8.418470678248347e-05, "loss": 0.2235, "step": 30671 }, { "epoch": 2.4847699287103047, "grad_norm": 0.0700254738330841, "learning_rate": 8.418020612988884e-05, "loss": 0.2535, "step": 30672 }, { "epoch": 2.484850939727803, "grad_norm": 0.07041961699724197, "learning_rate": 8.41757054772942e-05, "loss": 0.2669, "step": 30673 }, { "epoch": 2.4849319507453016, "grad_norm": 0.0570821575820446, "learning_rate": 8.417120482469959e-05, "loss": 0.1884, "step": 30674 }, { "epoch": 2.4850129617628, "grad_norm": 0.0660528689622879, "learning_rate": 8.416670417210496e-05, "loss": 0.2748, "step": 30675 }, { "epoch": 2.485093972780298, "grad_norm": 0.077247254550457, "learning_rate": 8.416220351951032e-05, "loss": 0.267, "step": 30676 }, { "epoch": 2.4851749837977963, "grad_norm": 0.06788933277130127, "learning_rate": 8.415770286691571e-05, "loss": 0.2301, "step": 30677 }, { "epoch": 2.485255994815295, "grad_norm": 0.07161764055490494, "learning_rate": 8.415320221432109e-05, "loss": 0.2332, "step": 30678 }, { "epoch": 2.4853370058327933, "grad_norm": 0.08423119783401489, "learning_rate": 8.414870156172645e-05, "loss": 0.2964, "step": 30679 }, { "epoch": 2.4854180168502915, "grad_norm": 0.06369981914758682, "learning_rate": 8.414420090913183e-05, "loss": 0.2404, "step": 30680 }, { "epoch": 2.48549902786779, "grad_norm": 0.06630921363830566, "learning_rate": 8.41397002565372e-05, "loss": 0.2425, "step": 30681 }, { "epoch": 2.4855800388852884, "grad_norm": 0.06150691956281662, "learning_rate": 8.413519960394257e-05, "loss": 0.1941, "step": 30682 }, { "epoch": 2.4856610499027867, "grad_norm": 0.058932892978191376, "learning_rate": 8.413069895134795e-05, "loss": 0.238, "step": 30683 }, { "epoch": 2.485742060920285, "grad_norm": 0.1102815568447113, "learning_rate": 8.412619829875333e-05, "loss": 0.2678, "step": 30684 }, { "epoch": 2.4858230719377836, "grad_norm": 0.06793038547039032, "learning_rate": 8.412169764615869e-05, "loss": 0.262, "step": 30685 }, { "epoch": 2.485904082955282, "grad_norm": 0.05985938012599945, "learning_rate": 8.411719699356407e-05, "loss": 0.2546, "step": 30686 }, { "epoch": 2.48598509397278, "grad_norm": 0.060996122658252716, "learning_rate": 8.411269634096945e-05, "loss": 0.2451, "step": 30687 }, { "epoch": 2.486066104990279, "grad_norm": 0.06225128471851349, "learning_rate": 8.410819568837481e-05, "loss": 0.2376, "step": 30688 }, { "epoch": 2.486147116007777, "grad_norm": 0.07075046002864838, "learning_rate": 8.41036950357802e-05, "loss": 0.2201, "step": 30689 }, { "epoch": 2.4862281270252753, "grad_norm": 0.07719450443983078, "learning_rate": 8.409919438318557e-05, "loss": 0.2591, "step": 30690 }, { "epoch": 2.486309138042774, "grad_norm": 0.07757656276226044, "learning_rate": 8.409469373059093e-05, "loss": 0.3049, "step": 30691 }, { "epoch": 2.4863901490602722, "grad_norm": 0.06839792430400848, "learning_rate": 8.409019307799632e-05, "loss": 0.2417, "step": 30692 }, { "epoch": 2.4864711600777705, "grad_norm": 0.07726556062698364, "learning_rate": 8.408569242540169e-05, "loss": 0.2267, "step": 30693 }, { "epoch": 2.486552171095269, "grad_norm": 0.0668526440858841, "learning_rate": 8.408119177280706e-05, "loss": 0.232, "step": 30694 }, { "epoch": 2.4866331821127674, "grad_norm": 0.05855938419699669, "learning_rate": 8.407669112021244e-05, "loss": 0.2572, "step": 30695 }, { "epoch": 2.4867141931302656, "grad_norm": 0.07574159651994705, "learning_rate": 8.407219046761781e-05, "loss": 0.2189, "step": 30696 }, { "epoch": 2.4867952041477643, "grad_norm": 0.07984759658575058, "learning_rate": 8.406768981502318e-05, "loss": 0.2623, "step": 30697 }, { "epoch": 2.4868762151652626, "grad_norm": 0.06058523803949356, "learning_rate": 8.406318916242856e-05, "loss": 0.2509, "step": 30698 }, { "epoch": 2.486957226182761, "grad_norm": 0.07417916506528854, "learning_rate": 8.405868850983393e-05, "loss": 0.2528, "step": 30699 }, { "epoch": 2.487038237200259, "grad_norm": 0.07533387839794159, "learning_rate": 8.40541878572393e-05, "loss": 0.2253, "step": 30700 }, { "epoch": 2.4871192482177578, "grad_norm": 0.06526943296194077, "learning_rate": 8.404968720464468e-05, "loss": 0.2292, "step": 30701 }, { "epoch": 2.487200259235256, "grad_norm": 0.08730722218751907, "learning_rate": 8.404518655205005e-05, "loss": 0.2835, "step": 30702 }, { "epoch": 2.4872812702527543, "grad_norm": 0.061956096440553665, "learning_rate": 8.404068589945543e-05, "loss": 0.2447, "step": 30703 }, { "epoch": 2.487362281270253, "grad_norm": 0.05448000505566597, "learning_rate": 8.40361852468608e-05, "loss": 0.2064, "step": 30704 }, { "epoch": 2.487443292287751, "grad_norm": 0.061836034059524536, "learning_rate": 8.403168459426617e-05, "loss": 0.2299, "step": 30705 }, { "epoch": 2.4875243033052494, "grad_norm": 0.04638923332095146, "learning_rate": 8.402718394167155e-05, "loss": 0.2057, "step": 30706 }, { "epoch": 2.4876053143227477, "grad_norm": 0.0720670148730278, "learning_rate": 8.402268328907692e-05, "loss": 0.2222, "step": 30707 }, { "epoch": 2.4876863253402464, "grad_norm": 0.06310757249593735, "learning_rate": 8.40181826364823e-05, "loss": 0.2407, "step": 30708 }, { "epoch": 2.4877673363577446, "grad_norm": 0.07061705738306046, "learning_rate": 8.401368198388767e-05, "loss": 0.2506, "step": 30709 }, { "epoch": 2.487848347375243, "grad_norm": 0.07268223911523819, "learning_rate": 8.400918133129304e-05, "loss": 0.2372, "step": 30710 }, { "epoch": 2.4879293583927415, "grad_norm": 0.06775526702404022, "learning_rate": 8.400468067869841e-05, "loss": 0.2453, "step": 30711 }, { "epoch": 2.48801036941024, "grad_norm": 0.06613241136074066, "learning_rate": 8.400018002610379e-05, "loss": 0.2715, "step": 30712 }, { "epoch": 2.488091380427738, "grad_norm": 0.05558057501912117, "learning_rate": 8.399567937350916e-05, "loss": 0.2134, "step": 30713 }, { "epoch": 2.4881723914452367, "grad_norm": 0.06692616641521454, "learning_rate": 8.399117872091454e-05, "loss": 0.2437, "step": 30714 }, { "epoch": 2.488253402462735, "grad_norm": 0.05806124210357666, "learning_rate": 8.398667806831991e-05, "loss": 0.2347, "step": 30715 }, { "epoch": 2.488334413480233, "grad_norm": 0.0691765546798706, "learning_rate": 8.398217741572528e-05, "loss": 0.2374, "step": 30716 }, { "epoch": 2.488415424497732, "grad_norm": 0.08031977713108063, "learning_rate": 8.397767676313066e-05, "loss": 0.2315, "step": 30717 }, { "epoch": 2.48849643551523, "grad_norm": 0.06622499972581863, "learning_rate": 8.397317611053603e-05, "loss": 0.2299, "step": 30718 }, { "epoch": 2.4885774465327284, "grad_norm": 0.07046611607074738, "learning_rate": 8.39686754579414e-05, "loss": 0.2726, "step": 30719 }, { "epoch": 2.488658457550227, "grad_norm": 0.0632859617471695, "learning_rate": 8.396417480534679e-05, "loss": 0.2554, "step": 30720 }, { "epoch": 2.4887394685677253, "grad_norm": 0.06783681362867355, "learning_rate": 8.395967415275215e-05, "loss": 0.253, "step": 30721 }, { "epoch": 2.4888204795852236, "grad_norm": 0.06444837898015976, "learning_rate": 8.395517350015752e-05, "loss": 0.2713, "step": 30722 }, { "epoch": 2.488901490602722, "grad_norm": 0.06186733394861221, "learning_rate": 8.395067284756291e-05, "loss": 0.246, "step": 30723 }, { "epoch": 2.4889825016202205, "grad_norm": 0.06374796479940414, "learning_rate": 8.394617219496827e-05, "loss": 0.2104, "step": 30724 }, { "epoch": 2.4890635126377187, "grad_norm": 0.05994171276688576, "learning_rate": 8.394167154237364e-05, "loss": 0.2266, "step": 30725 }, { "epoch": 2.489144523655217, "grad_norm": 0.06336595863103867, "learning_rate": 8.393717088977903e-05, "loss": 0.2484, "step": 30726 }, { "epoch": 2.4892255346727157, "grad_norm": 0.07264426350593567, "learning_rate": 8.393267023718439e-05, "loss": 0.2569, "step": 30727 }, { "epoch": 2.489306545690214, "grad_norm": 0.05867601931095123, "learning_rate": 8.392816958458977e-05, "loss": 0.2649, "step": 30728 }, { "epoch": 2.489387556707712, "grad_norm": 0.06587252020835876, "learning_rate": 8.392366893199515e-05, "loss": 0.24, "step": 30729 }, { "epoch": 2.4894685677252104, "grad_norm": 0.0639888271689415, "learning_rate": 8.391916827940051e-05, "loss": 0.2455, "step": 30730 }, { "epoch": 2.489549578742709, "grad_norm": 0.05863109230995178, "learning_rate": 8.391466762680589e-05, "loss": 0.2364, "step": 30731 }, { "epoch": 2.4896305897602073, "grad_norm": 0.05232294648885727, "learning_rate": 8.391016697421127e-05, "loss": 0.2337, "step": 30732 }, { "epoch": 2.4897116007777056, "grad_norm": 0.07216206192970276, "learning_rate": 8.390566632161663e-05, "loss": 0.2358, "step": 30733 }, { "epoch": 2.4897926117952043, "grad_norm": 0.06885205954313278, "learning_rate": 8.390116566902201e-05, "loss": 0.2556, "step": 30734 }, { "epoch": 2.4898736228127025, "grad_norm": 0.08514875918626785, "learning_rate": 8.38966650164274e-05, "loss": 0.2399, "step": 30735 }, { "epoch": 2.4899546338302008, "grad_norm": 0.0638003721833229, "learning_rate": 8.389216436383275e-05, "loss": 0.2225, "step": 30736 }, { "epoch": 2.4900356448476995, "grad_norm": 0.05131521448493004, "learning_rate": 8.388766371123813e-05, "loss": 0.2403, "step": 30737 }, { "epoch": 2.4901166558651977, "grad_norm": 0.06556036323308945, "learning_rate": 8.388316305864352e-05, "loss": 0.2427, "step": 30738 }, { "epoch": 2.490197666882696, "grad_norm": 0.06367666274309158, "learning_rate": 8.387866240604888e-05, "loss": 0.2795, "step": 30739 }, { "epoch": 2.4902786779001946, "grad_norm": 0.07163514196872711, "learning_rate": 8.387416175345425e-05, "loss": 0.285, "step": 30740 }, { "epoch": 2.490359688917693, "grad_norm": 0.07182058691978455, "learning_rate": 8.386966110085964e-05, "loss": 0.2071, "step": 30741 }, { "epoch": 2.490440699935191, "grad_norm": 0.07572603970766068, "learning_rate": 8.3865160448265e-05, "loss": 0.2247, "step": 30742 }, { "epoch": 2.4905217109526894, "grad_norm": 0.07965695858001709, "learning_rate": 8.386065979567037e-05, "loss": 0.2605, "step": 30743 }, { "epoch": 2.490602721970188, "grad_norm": 0.06812844425439835, "learning_rate": 8.385615914307576e-05, "loss": 0.2173, "step": 30744 }, { "epoch": 2.4906837329876863, "grad_norm": 0.059363484382629395, "learning_rate": 8.385165849048112e-05, "loss": 0.213, "step": 30745 }, { "epoch": 2.4907647440051845, "grad_norm": 0.05940215289592743, "learning_rate": 8.38471578378865e-05, "loss": 0.2231, "step": 30746 }, { "epoch": 2.4908457550226832, "grad_norm": 0.0675797238945961, "learning_rate": 8.384265718529188e-05, "loss": 0.2375, "step": 30747 }, { "epoch": 2.4909267660401815, "grad_norm": 0.07645593583583832, "learning_rate": 8.383815653269724e-05, "loss": 0.2662, "step": 30748 }, { "epoch": 2.4910077770576797, "grad_norm": 0.06828924268484116, "learning_rate": 8.383365588010262e-05, "loss": 0.2422, "step": 30749 }, { "epoch": 2.4910887880751784, "grad_norm": 0.05796194076538086, "learning_rate": 8.3829155227508e-05, "loss": 0.2714, "step": 30750 }, { "epoch": 2.4911697990926767, "grad_norm": 0.05657875910401344, "learning_rate": 8.382465457491336e-05, "loss": 0.2778, "step": 30751 }, { "epoch": 2.491250810110175, "grad_norm": 0.06976671516895294, "learning_rate": 8.382015392231875e-05, "loss": 0.2602, "step": 30752 }, { "epoch": 2.491331821127673, "grad_norm": 0.0654015988111496, "learning_rate": 8.381565326972412e-05, "loss": 0.2255, "step": 30753 }, { "epoch": 2.491412832145172, "grad_norm": 0.07128993421792984, "learning_rate": 8.381115261712948e-05, "loss": 0.2695, "step": 30754 }, { "epoch": 2.49149384316267, "grad_norm": 0.06543438136577606, "learning_rate": 8.380665196453487e-05, "loss": 0.2221, "step": 30755 }, { "epoch": 2.4915748541801683, "grad_norm": 0.06640952825546265, "learning_rate": 8.380215131194024e-05, "loss": 0.2306, "step": 30756 }, { "epoch": 2.491655865197667, "grad_norm": 0.07250026613473892, "learning_rate": 8.37976506593456e-05, "loss": 0.2375, "step": 30757 }, { "epoch": 2.4917368762151653, "grad_norm": 0.07018820941448212, "learning_rate": 8.379315000675099e-05, "loss": 0.2903, "step": 30758 }, { "epoch": 2.4918178872326635, "grad_norm": 0.08030076324939728, "learning_rate": 8.378864935415636e-05, "loss": 0.2615, "step": 30759 }, { "epoch": 2.491898898250162, "grad_norm": 0.08757913112640381, "learning_rate": 8.378414870156172e-05, "loss": 0.2731, "step": 30760 }, { "epoch": 2.4919799092676604, "grad_norm": 0.08045205473899841, "learning_rate": 8.377964804896711e-05, "loss": 0.2832, "step": 30761 }, { "epoch": 2.4920609202851587, "grad_norm": 0.06057002395391464, "learning_rate": 8.377514739637248e-05, "loss": 0.2432, "step": 30762 }, { "epoch": 2.4921419313026574, "grad_norm": 0.05765476077795029, "learning_rate": 8.377064674377784e-05, "loss": 0.247, "step": 30763 }, { "epoch": 2.4922229423201556, "grad_norm": 0.056977640837430954, "learning_rate": 8.376614609118323e-05, "loss": 0.2014, "step": 30764 }, { "epoch": 2.492303953337654, "grad_norm": 0.07265925407409668, "learning_rate": 8.37616454385886e-05, "loss": 0.2569, "step": 30765 }, { "epoch": 2.492384964355152, "grad_norm": 0.06928717344999313, "learning_rate": 8.375714478599396e-05, "loss": 0.2234, "step": 30766 }, { "epoch": 2.492465975372651, "grad_norm": 0.06462343037128448, "learning_rate": 8.375264413339935e-05, "loss": 0.2669, "step": 30767 }, { "epoch": 2.492546986390149, "grad_norm": 0.08052459359169006, "learning_rate": 8.374814348080472e-05, "loss": 0.236, "step": 30768 }, { "epoch": 2.4926279974076473, "grad_norm": 0.06301391124725342, "learning_rate": 8.374364282821008e-05, "loss": 0.2311, "step": 30769 }, { "epoch": 2.492709008425146, "grad_norm": 0.077354796230793, "learning_rate": 8.373914217561547e-05, "loss": 0.2357, "step": 30770 }, { "epoch": 2.4927900194426442, "grad_norm": 0.06379301846027374, "learning_rate": 8.373464152302084e-05, "loss": 0.2072, "step": 30771 }, { "epoch": 2.4928710304601425, "grad_norm": 0.06881890445947647, "learning_rate": 8.373014087042622e-05, "loss": 0.248, "step": 30772 }, { "epoch": 2.4929520414776407, "grad_norm": 0.0689517930150032, "learning_rate": 8.372564021783159e-05, "loss": 0.2441, "step": 30773 }, { "epoch": 2.4930330524951394, "grad_norm": 0.05295158922672272, "learning_rate": 8.372113956523696e-05, "loss": 0.2357, "step": 30774 }, { "epoch": 2.4931140635126376, "grad_norm": 0.06985612958669662, "learning_rate": 8.371663891264234e-05, "loss": 0.2318, "step": 30775 }, { "epoch": 2.493195074530136, "grad_norm": 0.0848032534122467, "learning_rate": 8.371213826004771e-05, "loss": 0.2192, "step": 30776 }, { "epoch": 2.4932760855476346, "grad_norm": 0.06986959278583527, "learning_rate": 8.370763760745309e-05, "loss": 0.2328, "step": 30777 }, { "epoch": 2.493357096565133, "grad_norm": 0.07506789267063141, "learning_rate": 8.370313695485846e-05, "loss": 0.2663, "step": 30778 }, { "epoch": 2.493438107582631, "grad_norm": 0.07147037237882614, "learning_rate": 8.369863630226383e-05, "loss": 0.2346, "step": 30779 }, { "epoch": 2.4935191186001298, "grad_norm": 0.06762287020683289, "learning_rate": 8.36941356496692e-05, "loss": 0.2223, "step": 30780 }, { "epoch": 2.493600129617628, "grad_norm": 0.0584288164973259, "learning_rate": 8.368963499707458e-05, "loss": 0.2497, "step": 30781 }, { "epoch": 2.4936811406351262, "grad_norm": 0.07436443120241165, "learning_rate": 8.368513434447995e-05, "loss": 0.2495, "step": 30782 }, { "epoch": 2.493762151652625, "grad_norm": 0.06659824401140213, "learning_rate": 8.368063369188533e-05, "loss": 0.2361, "step": 30783 }, { "epoch": 2.493843162670123, "grad_norm": 0.07718595117330551, "learning_rate": 8.36761330392907e-05, "loss": 0.2793, "step": 30784 }, { "epoch": 2.4939241736876214, "grad_norm": 0.06469152122735977, "learning_rate": 8.367163238669607e-05, "loss": 0.2164, "step": 30785 }, { "epoch": 2.49400518470512, "grad_norm": 0.07271499186754227, "learning_rate": 8.366713173410145e-05, "loss": 0.2249, "step": 30786 }, { "epoch": 2.4940861957226184, "grad_norm": 0.06968480348587036, "learning_rate": 8.366263108150682e-05, "loss": 0.1998, "step": 30787 }, { "epoch": 2.4941672067401166, "grad_norm": 0.05411296710371971, "learning_rate": 8.36581304289122e-05, "loss": 0.2202, "step": 30788 }, { "epoch": 2.494248217757615, "grad_norm": 0.08066213130950928, "learning_rate": 8.365362977631757e-05, "loss": 0.2692, "step": 30789 }, { "epoch": 2.4943292287751135, "grad_norm": 0.07861744612455368, "learning_rate": 8.364912912372294e-05, "loss": 0.2448, "step": 30790 }, { "epoch": 2.494410239792612, "grad_norm": 0.06976334750652313, "learning_rate": 8.364462847112832e-05, "loss": 0.2366, "step": 30791 }, { "epoch": 2.49449125081011, "grad_norm": 0.06461894512176514, "learning_rate": 8.364012781853369e-05, "loss": 0.2481, "step": 30792 }, { "epoch": 2.4945722618276087, "grad_norm": 0.08025755733251572, "learning_rate": 8.363562716593906e-05, "loss": 0.2452, "step": 30793 }, { "epoch": 2.494653272845107, "grad_norm": 0.061761967837810516, "learning_rate": 8.363112651334444e-05, "loss": 0.2833, "step": 30794 }, { "epoch": 2.494734283862605, "grad_norm": 0.0678744688630104, "learning_rate": 8.362662586074981e-05, "loss": 0.2691, "step": 30795 }, { "epoch": 2.4948152948801035, "grad_norm": 0.05866160988807678, "learning_rate": 8.362212520815518e-05, "loss": 0.2171, "step": 30796 }, { "epoch": 2.494896305897602, "grad_norm": 0.04981419816613197, "learning_rate": 8.361762455556056e-05, "loss": 0.2361, "step": 30797 }, { "epoch": 2.4949773169151004, "grad_norm": 0.059023331850767136, "learning_rate": 8.361312390296593e-05, "loss": 0.2385, "step": 30798 }, { "epoch": 2.4950583279325986, "grad_norm": 0.0639590322971344, "learning_rate": 8.36086232503713e-05, "loss": 0.236, "step": 30799 }, { "epoch": 2.4951393389500973, "grad_norm": 0.06808973848819733, "learning_rate": 8.360412259777668e-05, "loss": 0.2893, "step": 30800 }, { "epoch": 2.4952203499675956, "grad_norm": 0.07416072487831116, "learning_rate": 8.359962194518207e-05, "loss": 0.2832, "step": 30801 }, { "epoch": 2.495301360985094, "grad_norm": 0.0650864839553833, "learning_rate": 8.359512129258743e-05, "loss": 0.2546, "step": 30802 }, { "epoch": 2.4953823720025925, "grad_norm": 0.08762753009796143, "learning_rate": 8.35906206399928e-05, "loss": 0.2987, "step": 30803 }, { "epoch": 2.4954633830200907, "grad_norm": 0.061661556363105774, "learning_rate": 8.358611998739819e-05, "loss": 0.2546, "step": 30804 }, { "epoch": 2.495544394037589, "grad_norm": 0.06270502507686615, "learning_rate": 8.358161933480355e-05, "loss": 0.2302, "step": 30805 }, { "epoch": 2.4956254050550877, "grad_norm": 0.07116597890853882, "learning_rate": 8.357711868220892e-05, "loss": 0.2538, "step": 30806 }, { "epoch": 2.495706416072586, "grad_norm": 0.06109967827796936, "learning_rate": 8.357261802961431e-05, "loss": 0.2781, "step": 30807 }, { "epoch": 2.495787427090084, "grad_norm": 0.0619681179523468, "learning_rate": 8.356811737701967e-05, "loss": 0.2463, "step": 30808 }, { "epoch": 2.495868438107583, "grad_norm": 0.051027651876211166, "learning_rate": 8.356361672442504e-05, "loss": 0.2279, "step": 30809 }, { "epoch": 2.495949449125081, "grad_norm": 0.07194460928440094, "learning_rate": 8.355911607183043e-05, "loss": 0.2074, "step": 30810 }, { "epoch": 2.4960304601425793, "grad_norm": 0.051879867911338806, "learning_rate": 8.355461541923579e-05, "loss": 0.2447, "step": 30811 }, { "epoch": 2.4961114711600776, "grad_norm": 0.06579706072807312, "learning_rate": 8.355011476664116e-05, "loss": 0.293, "step": 30812 }, { "epoch": 2.4961924821775763, "grad_norm": 0.06795860826969147, "learning_rate": 8.354561411404655e-05, "loss": 0.2648, "step": 30813 }, { "epoch": 2.4962734931950745, "grad_norm": 0.06433531641960144, "learning_rate": 8.354111346145191e-05, "loss": 0.2001, "step": 30814 }, { "epoch": 2.4963545042125728, "grad_norm": 0.07233475148677826, "learning_rate": 8.353661280885728e-05, "loss": 0.284, "step": 30815 }, { "epoch": 2.4964355152300715, "grad_norm": 0.06656422466039658, "learning_rate": 8.353211215626267e-05, "loss": 0.2203, "step": 30816 }, { "epoch": 2.4965165262475697, "grad_norm": 0.07233376801013947, "learning_rate": 8.352761150366803e-05, "loss": 0.2488, "step": 30817 }, { "epoch": 2.496597537265068, "grad_norm": 0.05535171553492546, "learning_rate": 8.35231108510734e-05, "loss": 0.2426, "step": 30818 }, { "epoch": 2.496678548282566, "grad_norm": 0.06830519437789917, "learning_rate": 8.351861019847879e-05, "loss": 0.2119, "step": 30819 }, { "epoch": 2.496759559300065, "grad_norm": 0.05747649446129799, "learning_rate": 8.351410954588415e-05, "loss": 0.1943, "step": 30820 }, { "epoch": 2.496840570317563, "grad_norm": 0.06299077719449997, "learning_rate": 8.350960889328952e-05, "loss": 0.2415, "step": 30821 }, { "epoch": 2.4969215813350614, "grad_norm": 0.06807470321655273, "learning_rate": 8.350510824069491e-05, "loss": 0.2317, "step": 30822 }, { "epoch": 2.49700259235256, "grad_norm": 0.07347835600376129, "learning_rate": 8.350060758810027e-05, "loss": 0.2706, "step": 30823 }, { "epoch": 2.4970836033700583, "grad_norm": 0.06623311340808868, "learning_rate": 8.349610693550565e-05, "loss": 0.2308, "step": 30824 }, { "epoch": 2.4971646143875565, "grad_norm": 0.065008744597435, "learning_rate": 8.349160628291103e-05, "loss": 0.243, "step": 30825 }, { "epoch": 2.4972456254050552, "grad_norm": 0.0647854432463646, "learning_rate": 8.348710563031639e-05, "loss": 0.2314, "step": 30826 }, { "epoch": 2.4973266364225535, "grad_norm": 0.07481033354997635, "learning_rate": 8.348260497772178e-05, "loss": 0.2432, "step": 30827 }, { "epoch": 2.4974076474400517, "grad_norm": 0.06997375190258026, "learning_rate": 8.347810432512715e-05, "loss": 0.2522, "step": 30828 }, { "epoch": 2.4974886584575504, "grad_norm": 0.06547009944915771, "learning_rate": 8.347360367253251e-05, "loss": 0.2632, "step": 30829 }, { "epoch": 2.4975696694750487, "grad_norm": 0.06076023727655411, "learning_rate": 8.34691030199379e-05, "loss": 0.2494, "step": 30830 }, { "epoch": 2.497650680492547, "grad_norm": 0.07652776688337326, "learning_rate": 8.346460236734327e-05, "loss": 0.261, "step": 30831 }, { "epoch": 2.4977316915100456, "grad_norm": 0.05655470862984657, "learning_rate": 8.346010171474863e-05, "loss": 0.2281, "step": 30832 }, { "epoch": 2.497812702527544, "grad_norm": 0.06015842780470848, "learning_rate": 8.345560106215402e-05, "loss": 0.2345, "step": 30833 }, { "epoch": 2.497893713545042, "grad_norm": 0.06424501538276672, "learning_rate": 8.34511004095594e-05, "loss": 0.2638, "step": 30834 }, { "epoch": 2.4979747245625403, "grad_norm": 0.07210192084312439, "learning_rate": 8.344659975696475e-05, "loss": 0.252, "step": 30835 }, { "epoch": 2.498055735580039, "grad_norm": 0.09136870503425598, "learning_rate": 8.344209910437014e-05, "loss": 0.2684, "step": 30836 }, { "epoch": 2.4981367465975373, "grad_norm": 0.05969814956188202, "learning_rate": 8.343759845177552e-05, "loss": 0.2264, "step": 30837 }, { "epoch": 2.4982177576150355, "grad_norm": 0.07652722299098969, "learning_rate": 8.343309779918088e-05, "loss": 0.2453, "step": 30838 }, { "epoch": 2.498298768632534, "grad_norm": 0.05736469849944115, "learning_rate": 8.342859714658626e-05, "loss": 0.243, "step": 30839 }, { "epoch": 2.4983797796500324, "grad_norm": 0.061904434114694595, "learning_rate": 8.342409649399164e-05, "loss": 0.2392, "step": 30840 }, { "epoch": 2.4984607906675307, "grad_norm": 0.07190416753292084, "learning_rate": 8.3419595841397e-05, "loss": 0.2707, "step": 30841 }, { "epoch": 2.498541801685029, "grad_norm": 0.08621451258659363, "learning_rate": 8.341509518880238e-05, "loss": 0.248, "step": 30842 }, { "epoch": 2.4986228127025276, "grad_norm": 0.0821828618645668, "learning_rate": 8.341059453620776e-05, "loss": 0.2222, "step": 30843 }, { "epoch": 2.498703823720026, "grad_norm": 0.05787357687950134, "learning_rate": 8.340609388361312e-05, "loss": 0.2421, "step": 30844 }, { "epoch": 2.498784834737524, "grad_norm": 0.05896326154470444, "learning_rate": 8.34015932310185e-05, "loss": 0.2403, "step": 30845 }, { "epoch": 2.498865845755023, "grad_norm": 0.059107761830091476, "learning_rate": 8.339709257842388e-05, "loss": 0.2261, "step": 30846 }, { "epoch": 2.498946856772521, "grad_norm": 0.06375333666801453, "learning_rate": 8.339259192582924e-05, "loss": 0.2294, "step": 30847 }, { "epoch": 2.4990278677900193, "grad_norm": 0.07359378039836884, "learning_rate": 8.338809127323463e-05, "loss": 0.2555, "step": 30848 }, { "epoch": 2.499108878807518, "grad_norm": 0.06330002844333649, "learning_rate": 8.338359062064e-05, "loss": 0.2664, "step": 30849 }, { "epoch": 2.499189889825016, "grad_norm": 0.06739085167646408, "learning_rate": 8.337908996804536e-05, "loss": 0.2217, "step": 30850 }, { "epoch": 2.4992709008425145, "grad_norm": 0.07190203666687012, "learning_rate": 8.337458931545075e-05, "loss": 0.2355, "step": 30851 }, { "epoch": 2.499351911860013, "grad_norm": 0.06133474037051201, "learning_rate": 8.337008866285612e-05, "loss": 0.1959, "step": 30852 }, { "epoch": 2.4994329228775114, "grad_norm": 0.0660034790635109, "learning_rate": 8.336558801026149e-05, "loss": 0.2614, "step": 30853 }, { "epoch": 2.4995139338950096, "grad_norm": 0.06458307802677155, "learning_rate": 8.336108735766687e-05, "loss": 0.262, "step": 30854 }, { "epoch": 2.4995949449125083, "grad_norm": 0.06402486562728882, "learning_rate": 8.335658670507224e-05, "loss": 0.2258, "step": 30855 }, { "epoch": 2.4996759559300066, "grad_norm": 0.07437600940465927, "learning_rate": 8.335208605247761e-05, "loss": 0.2691, "step": 30856 }, { "epoch": 2.499756966947505, "grad_norm": 0.08359698206186295, "learning_rate": 8.334758539988299e-05, "loss": 0.2653, "step": 30857 }, { "epoch": 2.499837977965003, "grad_norm": 0.07374858856201172, "learning_rate": 8.334308474728836e-05, "loss": 0.2155, "step": 30858 }, { "epoch": 2.4999189889825018, "grad_norm": 0.06222660467028618, "learning_rate": 8.333858409469373e-05, "loss": 0.207, "step": 30859 }, { "epoch": 2.5, "grad_norm": 0.05293847993016243, "learning_rate": 8.333408344209911e-05, "loss": 0.2201, "step": 30860 }, { "epoch": 2.5000810110174982, "grad_norm": 0.07100159674882889, "learning_rate": 8.332958278950448e-05, "loss": 0.2418, "step": 30861 }, { "epoch": 2.5001620220349965, "grad_norm": 0.06915083527565002, "learning_rate": 8.332508213690986e-05, "loss": 0.2505, "step": 30862 }, { "epoch": 2.500243033052495, "grad_norm": 0.07485393434762955, "learning_rate": 8.332058148431523e-05, "loss": 0.2725, "step": 30863 }, { "epoch": 2.5003240440699934, "grad_norm": 0.06454658508300781, "learning_rate": 8.33160808317206e-05, "loss": 0.241, "step": 30864 }, { "epoch": 2.5004050550874917, "grad_norm": 0.07296113669872284, "learning_rate": 8.331158017912598e-05, "loss": 0.2803, "step": 30865 }, { "epoch": 2.5004860661049904, "grad_norm": 0.0719156339764595, "learning_rate": 8.330707952653135e-05, "loss": 0.2439, "step": 30866 }, { "epoch": 2.5005670771224886, "grad_norm": 0.06684891879558563, "learning_rate": 8.330257887393672e-05, "loss": 0.2381, "step": 30867 }, { "epoch": 2.500648088139987, "grad_norm": 0.06705359369516373, "learning_rate": 8.32980782213421e-05, "loss": 0.2667, "step": 30868 }, { "epoch": 2.5007290991574855, "grad_norm": 0.06898030638694763, "learning_rate": 8.329357756874747e-05, "loss": 0.238, "step": 30869 }, { "epoch": 2.500810110174984, "grad_norm": 0.07460542023181915, "learning_rate": 8.328907691615284e-05, "loss": 0.2264, "step": 30870 }, { "epoch": 2.500891121192482, "grad_norm": 0.06566330790519714, "learning_rate": 8.328457626355822e-05, "loss": 0.2231, "step": 30871 }, { "epoch": 2.5009721322099807, "grad_norm": 0.06869935244321823, "learning_rate": 8.328007561096359e-05, "loss": 0.2458, "step": 30872 }, { "epoch": 2.501053143227479, "grad_norm": 0.08810488879680634, "learning_rate": 8.327557495836897e-05, "loss": 0.2617, "step": 30873 }, { "epoch": 2.501134154244977, "grad_norm": 0.06041054055094719, "learning_rate": 8.327107430577434e-05, "loss": 0.2252, "step": 30874 }, { "epoch": 2.501215165262476, "grad_norm": 0.0637449398636818, "learning_rate": 8.326657365317971e-05, "loss": 0.2395, "step": 30875 }, { "epoch": 2.501296176279974, "grad_norm": 0.0678592324256897, "learning_rate": 8.326207300058509e-05, "loss": 0.2409, "step": 30876 }, { "epoch": 2.5013771872974724, "grad_norm": 0.06725373864173889, "learning_rate": 8.325757234799046e-05, "loss": 0.2309, "step": 30877 }, { "epoch": 2.501458198314971, "grad_norm": 0.050262413918972015, "learning_rate": 8.325307169539583e-05, "loss": 0.2047, "step": 30878 }, { "epoch": 2.5015392093324693, "grad_norm": 0.07753513753414154, "learning_rate": 8.324857104280122e-05, "loss": 0.2693, "step": 30879 }, { "epoch": 2.5016202203499676, "grad_norm": 0.05689910426735878, "learning_rate": 8.324407039020658e-05, "loss": 0.2371, "step": 30880 }, { "epoch": 2.5017012313674662, "grad_norm": 0.06461703777313232, "learning_rate": 8.323956973761195e-05, "loss": 0.1844, "step": 30881 }, { "epoch": 2.5017822423849645, "grad_norm": 0.07810285687446594, "learning_rate": 8.323506908501734e-05, "loss": 0.2965, "step": 30882 }, { "epoch": 2.5018632534024627, "grad_norm": 0.06737931817770004, "learning_rate": 8.32305684324227e-05, "loss": 0.2273, "step": 30883 }, { "epoch": 2.501944264419961, "grad_norm": 0.06584341824054718, "learning_rate": 8.322606777982807e-05, "loss": 0.243, "step": 30884 }, { "epoch": 2.5020252754374592, "grad_norm": 0.061913296580314636, "learning_rate": 8.322156712723346e-05, "loss": 0.215, "step": 30885 }, { "epoch": 2.502106286454958, "grad_norm": 0.08903798460960388, "learning_rate": 8.321706647463882e-05, "loss": 0.2401, "step": 30886 }, { "epoch": 2.502187297472456, "grad_norm": 0.05823826417326927, "learning_rate": 8.32125658220442e-05, "loss": 0.2282, "step": 30887 }, { "epoch": 2.5022683084899544, "grad_norm": 0.06540390849113464, "learning_rate": 8.320806516944958e-05, "loss": 0.2232, "step": 30888 }, { "epoch": 2.502349319507453, "grad_norm": 0.07079609483480453, "learning_rate": 8.320356451685494e-05, "loss": 0.273, "step": 30889 }, { "epoch": 2.5024303305249513, "grad_norm": 0.06377361714839935, "learning_rate": 8.319906386426032e-05, "loss": 0.2508, "step": 30890 }, { "epoch": 2.5025113415424496, "grad_norm": 0.05838244780898094, "learning_rate": 8.31945632116657e-05, "loss": 0.243, "step": 30891 }, { "epoch": 2.5025923525599483, "grad_norm": 0.0708763375878334, "learning_rate": 8.319006255907106e-05, "loss": 0.2257, "step": 30892 }, { "epoch": 2.5026733635774465, "grad_norm": 0.05726628750562668, "learning_rate": 8.318556190647644e-05, "loss": 0.233, "step": 30893 }, { "epoch": 2.5027543745949448, "grad_norm": 0.08269777148962021, "learning_rate": 8.318106125388182e-05, "loss": 0.2664, "step": 30894 }, { "epoch": 2.5028353856124435, "grad_norm": 0.0771472156047821, "learning_rate": 8.317656060128718e-05, "loss": 0.2415, "step": 30895 }, { "epoch": 2.5029163966299417, "grad_norm": 0.0826282948255539, "learning_rate": 8.317205994869256e-05, "loss": 0.2691, "step": 30896 }, { "epoch": 2.50299740764744, "grad_norm": 0.05811542645096779, "learning_rate": 8.316755929609795e-05, "loss": 0.2405, "step": 30897 }, { "epoch": 2.5030784186649386, "grad_norm": 0.06644046306610107, "learning_rate": 8.31630586435033e-05, "loss": 0.2587, "step": 30898 }, { "epoch": 2.503159429682437, "grad_norm": 0.06532420963048935, "learning_rate": 8.315855799090868e-05, "loss": 0.2284, "step": 30899 }, { "epoch": 2.503240440699935, "grad_norm": 0.08071818947792053, "learning_rate": 8.315405733831407e-05, "loss": 0.2254, "step": 30900 }, { "epoch": 2.503321451717434, "grad_norm": 0.06762257218360901, "learning_rate": 8.314955668571943e-05, "loss": 0.2662, "step": 30901 }, { "epoch": 2.503402462734932, "grad_norm": 0.07281740754842758, "learning_rate": 8.31450560331248e-05, "loss": 0.2385, "step": 30902 }, { "epoch": 2.5034834737524303, "grad_norm": 0.05427378788590431, "learning_rate": 8.314055538053019e-05, "loss": 0.2264, "step": 30903 }, { "epoch": 2.5035644847699285, "grad_norm": 0.07130347937345505, "learning_rate": 8.313605472793555e-05, "loss": 0.2604, "step": 30904 }, { "epoch": 2.5036454957874272, "grad_norm": 0.062126901000738144, "learning_rate": 8.313155407534093e-05, "loss": 0.2492, "step": 30905 }, { "epoch": 2.5037265068049255, "grad_norm": 0.061227038502693176, "learning_rate": 8.312705342274631e-05, "loss": 0.2367, "step": 30906 }, { "epoch": 2.5038075178224237, "grad_norm": 0.06333401799201965, "learning_rate": 8.312255277015167e-05, "loss": 0.2348, "step": 30907 }, { "epoch": 2.503888528839922, "grad_norm": 0.05559328943490982, "learning_rate": 8.311805211755705e-05, "loss": 0.2244, "step": 30908 }, { "epoch": 2.5039695398574207, "grad_norm": 0.055634625256061554, "learning_rate": 8.311355146496243e-05, "loss": 0.2136, "step": 30909 }, { "epoch": 2.504050550874919, "grad_norm": 0.09396779537200928, "learning_rate": 8.310905081236779e-05, "loss": 0.2394, "step": 30910 }, { "epoch": 2.504131561892417, "grad_norm": 0.06352783739566803, "learning_rate": 8.310455015977318e-05, "loss": 0.233, "step": 30911 }, { "epoch": 2.504212572909916, "grad_norm": 0.06977412849664688, "learning_rate": 8.310004950717855e-05, "loss": 0.2355, "step": 30912 }, { "epoch": 2.504293583927414, "grad_norm": 0.05852169543504715, "learning_rate": 8.309554885458391e-05, "loss": 0.242, "step": 30913 }, { "epoch": 2.5043745949449123, "grad_norm": 0.0841292068362236, "learning_rate": 8.30910482019893e-05, "loss": 0.272, "step": 30914 }, { "epoch": 2.504455605962411, "grad_norm": 0.07529615610837936, "learning_rate": 8.308654754939467e-05, "loss": 0.2463, "step": 30915 }, { "epoch": 2.5045366169799093, "grad_norm": 0.06277451664209366, "learning_rate": 8.308204689680003e-05, "loss": 0.2405, "step": 30916 }, { "epoch": 2.5046176279974075, "grad_norm": 0.06636457145214081, "learning_rate": 8.307754624420542e-05, "loss": 0.2437, "step": 30917 }, { "epoch": 2.504698639014906, "grad_norm": 0.07447199523448944, "learning_rate": 8.307304559161079e-05, "loss": 0.2237, "step": 30918 }, { "epoch": 2.5047796500324044, "grad_norm": 0.07748948782682419, "learning_rate": 8.306854493901615e-05, "loss": 0.2311, "step": 30919 }, { "epoch": 2.5048606610499027, "grad_norm": 0.07205738872289658, "learning_rate": 8.306404428642154e-05, "loss": 0.2384, "step": 30920 }, { "epoch": 2.5049416720674014, "grad_norm": 0.06956085562705994, "learning_rate": 8.305954363382691e-05, "loss": 0.2322, "step": 30921 }, { "epoch": 2.5050226830848996, "grad_norm": 0.07650784403085709, "learning_rate": 8.305504298123227e-05, "loss": 0.2319, "step": 30922 }, { "epoch": 2.505103694102398, "grad_norm": 0.06893320381641388, "learning_rate": 8.305054232863766e-05, "loss": 0.2731, "step": 30923 }, { "epoch": 2.5051847051198965, "grad_norm": 0.1009536162018776, "learning_rate": 8.304604167604303e-05, "loss": 0.252, "step": 30924 }, { "epoch": 2.505265716137395, "grad_norm": 0.07383900880813599, "learning_rate": 8.304154102344839e-05, "loss": 0.2708, "step": 30925 }, { "epoch": 2.505346727154893, "grad_norm": 0.06586895883083344, "learning_rate": 8.303704037085378e-05, "loss": 0.2802, "step": 30926 }, { "epoch": 2.5054277381723913, "grad_norm": 0.06558408588171005, "learning_rate": 8.303253971825915e-05, "loss": 0.2248, "step": 30927 }, { "epoch": 2.50550874918989, "grad_norm": 0.059231966733932495, "learning_rate": 8.302803906566451e-05, "loss": 0.2249, "step": 30928 }, { "epoch": 2.505589760207388, "grad_norm": 0.07415720075368881, "learning_rate": 8.30235384130699e-05, "loss": 0.2538, "step": 30929 }, { "epoch": 2.5056707712248865, "grad_norm": 0.06834818422794342, "learning_rate": 8.301903776047527e-05, "loss": 0.2339, "step": 30930 }, { "epoch": 2.5057517822423847, "grad_norm": 0.06618189066648483, "learning_rate": 8.301453710788065e-05, "loss": 0.2409, "step": 30931 }, { "epoch": 2.5058327932598834, "grad_norm": 0.06760060787200928, "learning_rate": 8.301003645528602e-05, "loss": 0.2271, "step": 30932 }, { "epoch": 2.5059138042773816, "grad_norm": 0.08346579223871231, "learning_rate": 8.30055358026914e-05, "loss": 0.2185, "step": 30933 }, { "epoch": 2.50599481529488, "grad_norm": 0.059812482446432114, "learning_rate": 8.300103515009677e-05, "loss": 0.2391, "step": 30934 }, { "epoch": 2.5060758263123786, "grad_norm": 0.1017281636595726, "learning_rate": 8.299653449750214e-05, "loss": 0.2935, "step": 30935 }, { "epoch": 2.506156837329877, "grad_norm": 0.059621043503284454, "learning_rate": 8.299203384490752e-05, "loss": 0.2088, "step": 30936 }, { "epoch": 2.506237848347375, "grad_norm": 0.0738162249326706, "learning_rate": 8.298753319231289e-05, "loss": 0.2646, "step": 30937 }, { "epoch": 2.5063188593648738, "grad_norm": 0.07516893744468689, "learning_rate": 8.298303253971826e-05, "loss": 0.2495, "step": 30938 }, { "epoch": 2.506399870382372, "grad_norm": 0.0615583173930645, "learning_rate": 8.297853188712364e-05, "loss": 0.2633, "step": 30939 }, { "epoch": 2.5064808813998702, "grad_norm": 0.07958000898361206, "learning_rate": 8.297403123452901e-05, "loss": 0.261, "step": 30940 }, { "epoch": 2.506561892417369, "grad_norm": 0.0821593627333641, "learning_rate": 8.296953058193438e-05, "loss": 0.2462, "step": 30941 }, { "epoch": 2.506642903434867, "grad_norm": 0.0779246836900711, "learning_rate": 8.296502992933976e-05, "loss": 0.268, "step": 30942 }, { "epoch": 2.5067239144523654, "grad_norm": 0.07051964849233627, "learning_rate": 8.296052927674513e-05, "loss": 0.2419, "step": 30943 }, { "epoch": 2.506804925469864, "grad_norm": 0.06869851052761078, "learning_rate": 8.29560286241505e-05, "loss": 0.2333, "step": 30944 }, { "epoch": 2.5068859364873624, "grad_norm": 0.06015220284461975, "learning_rate": 8.295152797155588e-05, "loss": 0.2614, "step": 30945 }, { "epoch": 2.5069669475048606, "grad_norm": 0.07013320922851562, "learning_rate": 8.294702731896125e-05, "loss": 0.2796, "step": 30946 }, { "epoch": 2.5070479585223593, "grad_norm": 0.07819762080907822, "learning_rate": 8.294252666636663e-05, "loss": 0.2289, "step": 30947 }, { "epoch": 2.5071289695398575, "grad_norm": 0.0813426524400711, "learning_rate": 8.2938026013772e-05, "loss": 0.2402, "step": 30948 }, { "epoch": 2.5072099805573558, "grad_norm": 0.055428456515073776, "learning_rate": 8.293352536117737e-05, "loss": 0.223, "step": 30949 }, { "epoch": 2.507290991574854, "grad_norm": 0.06621409952640533, "learning_rate": 8.292902470858275e-05, "loss": 0.2153, "step": 30950 }, { "epoch": 2.5073720025923527, "grad_norm": 0.06681319326162338, "learning_rate": 8.292452405598812e-05, "loss": 0.2816, "step": 30951 }, { "epoch": 2.507453013609851, "grad_norm": 0.08249227702617645, "learning_rate": 8.29200234033935e-05, "loss": 0.2652, "step": 30952 }, { "epoch": 2.507534024627349, "grad_norm": 0.06201348453760147, "learning_rate": 8.291552275079887e-05, "loss": 0.2267, "step": 30953 }, { "epoch": 2.5076150356448474, "grad_norm": 0.07167745381593704, "learning_rate": 8.291102209820424e-05, "loss": 0.2452, "step": 30954 }, { "epoch": 2.507696046662346, "grad_norm": 0.050998084247112274, "learning_rate": 8.290652144560961e-05, "loss": 0.2648, "step": 30955 }, { "epoch": 2.5077770576798444, "grad_norm": 0.05879823490977287, "learning_rate": 8.290202079301499e-05, "loss": 0.1901, "step": 30956 }, { "epoch": 2.5078580686973426, "grad_norm": 0.0664614737033844, "learning_rate": 8.289752014042036e-05, "loss": 0.2271, "step": 30957 }, { "epoch": 2.5079390797148413, "grad_norm": 0.05864118039608002, "learning_rate": 8.289301948782574e-05, "loss": 0.2495, "step": 30958 }, { "epoch": 2.5080200907323396, "grad_norm": 0.06172305345535278, "learning_rate": 8.288851883523111e-05, "loss": 0.2209, "step": 30959 }, { "epoch": 2.508101101749838, "grad_norm": 0.07727573812007904, "learning_rate": 8.28840181826365e-05, "loss": 0.2551, "step": 30960 }, { "epoch": 2.5081821127673365, "grad_norm": 0.05355662852525711, "learning_rate": 8.287951753004186e-05, "loss": 0.2044, "step": 30961 }, { "epoch": 2.5082631237848347, "grad_norm": 0.06967973709106445, "learning_rate": 8.287501687744723e-05, "loss": 0.3012, "step": 30962 }, { "epoch": 2.508344134802333, "grad_norm": 0.07389171421527863, "learning_rate": 8.287051622485262e-05, "loss": 0.2043, "step": 30963 }, { "epoch": 2.5084251458198317, "grad_norm": 0.05707792565226555, "learning_rate": 8.286601557225798e-05, "loss": 0.2422, "step": 30964 }, { "epoch": 2.50850615683733, "grad_norm": 0.06958410888910294, "learning_rate": 8.286151491966335e-05, "loss": 0.2354, "step": 30965 }, { "epoch": 2.508587167854828, "grad_norm": 0.06012604758143425, "learning_rate": 8.285701426706874e-05, "loss": 0.198, "step": 30966 }, { "epoch": 2.508668178872327, "grad_norm": 0.06455910205841064, "learning_rate": 8.28525136144741e-05, "loss": 0.242, "step": 30967 }, { "epoch": 2.508749189889825, "grad_norm": 0.07406137138605118, "learning_rate": 8.284801296187947e-05, "loss": 0.2476, "step": 30968 }, { "epoch": 2.5088302009073233, "grad_norm": 0.06192121282219887, "learning_rate": 8.284351230928486e-05, "loss": 0.2304, "step": 30969 }, { "epoch": 2.508911211924822, "grad_norm": 0.0722472220659256, "learning_rate": 8.283901165669022e-05, "loss": 0.277, "step": 30970 }, { "epoch": 2.5089922229423203, "grad_norm": 0.06029026210308075, "learning_rate": 8.283451100409559e-05, "loss": 0.2065, "step": 30971 }, { "epoch": 2.5090732339598185, "grad_norm": 0.06601886451244354, "learning_rate": 8.283001035150098e-05, "loss": 0.2576, "step": 30972 }, { "epoch": 2.5091542449773168, "grad_norm": 0.06260927021503448, "learning_rate": 8.282550969890634e-05, "loss": 0.2336, "step": 30973 }, { "epoch": 2.5092352559948155, "grad_norm": 0.0685477927327156, "learning_rate": 8.282100904631171e-05, "loss": 0.2755, "step": 30974 }, { "epoch": 2.5093162670123137, "grad_norm": 0.06632889062166214, "learning_rate": 8.28165083937171e-05, "loss": 0.2266, "step": 30975 }, { "epoch": 2.509397278029812, "grad_norm": 0.06554222851991653, "learning_rate": 8.281200774112246e-05, "loss": 0.259, "step": 30976 }, { "epoch": 2.50947828904731, "grad_norm": 0.08140553534030914, "learning_rate": 8.280750708852783e-05, "loss": 0.2513, "step": 30977 }, { "epoch": 2.509559300064809, "grad_norm": 0.08227310329675674, "learning_rate": 8.280300643593322e-05, "loss": 0.2705, "step": 30978 }, { "epoch": 2.509640311082307, "grad_norm": 0.06471066921949387, "learning_rate": 8.279850578333858e-05, "loss": 0.2253, "step": 30979 }, { "epoch": 2.5097213220998054, "grad_norm": 0.06800235062837601, "learning_rate": 8.279400513074395e-05, "loss": 0.2571, "step": 30980 }, { "epoch": 2.509802333117304, "grad_norm": 0.06845162063837051, "learning_rate": 8.278950447814934e-05, "loss": 0.26, "step": 30981 }, { "epoch": 2.5098833441348023, "grad_norm": 0.06966293603181839, "learning_rate": 8.27850038255547e-05, "loss": 0.2661, "step": 30982 }, { "epoch": 2.5099643551523005, "grad_norm": 0.06582781672477722, "learning_rate": 8.278050317296008e-05, "loss": 0.2724, "step": 30983 }, { "epoch": 2.5100453661697992, "grad_norm": 0.049061696976423264, "learning_rate": 8.277600252036546e-05, "loss": 0.1902, "step": 30984 }, { "epoch": 2.5101263771872975, "grad_norm": 0.06378032267093658, "learning_rate": 8.277150186777082e-05, "loss": 0.2258, "step": 30985 }, { "epoch": 2.5102073882047957, "grad_norm": 0.07274179905653, "learning_rate": 8.276700121517621e-05, "loss": 0.2643, "step": 30986 }, { "epoch": 2.5102883992222944, "grad_norm": 0.08791711926460266, "learning_rate": 8.276250056258158e-05, "loss": 0.2667, "step": 30987 }, { "epoch": 2.5103694102397927, "grad_norm": 0.06939581036567688, "learning_rate": 8.275799990998694e-05, "loss": 0.2409, "step": 30988 }, { "epoch": 2.510450421257291, "grad_norm": 0.06034819036722183, "learning_rate": 8.275349925739233e-05, "loss": 0.2484, "step": 30989 }, { "epoch": 2.5105314322747896, "grad_norm": 0.06719058752059937, "learning_rate": 8.27489986047977e-05, "loss": 0.2362, "step": 30990 }, { "epoch": 2.510612443292288, "grad_norm": 0.07439959049224854, "learning_rate": 8.274449795220306e-05, "loss": 0.2581, "step": 30991 }, { "epoch": 2.510693454309786, "grad_norm": 0.09280843287706375, "learning_rate": 8.273999729960845e-05, "loss": 0.2783, "step": 30992 }, { "epoch": 2.5107744653272848, "grad_norm": 0.07778385281562805, "learning_rate": 8.273549664701382e-05, "loss": 0.2226, "step": 30993 }, { "epoch": 2.510855476344783, "grad_norm": 0.04843428358435631, "learning_rate": 8.273099599441918e-05, "loss": 0.256, "step": 30994 }, { "epoch": 2.5109364873622813, "grad_norm": 0.07087177038192749, "learning_rate": 8.272649534182457e-05, "loss": 0.2884, "step": 30995 }, { "epoch": 2.5110174983797795, "grad_norm": 0.07463837414979935, "learning_rate": 8.272199468922995e-05, "loss": 0.2664, "step": 30996 }, { "epoch": 2.511098509397278, "grad_norm": 0.07141347974538803, "learning_rate": 8.271749403663532e-05, "loss": 0.26, "step": 30997 }, { "epoch": 2.5111795204147764, "grad_norm": 0.06270520389080048, "learning_rate": 8.271299338404069e-05, "loss": 0.2217, "step": 30998 }, { "epoch": 2.5112605314322747, "grad_norm": 0.06914729624986649, "learning_rate": 8.270849273144607e-05, "loss": 0.2532, "step": 30999 }, { "epoch": 2.511341542449773, "grad_norm": 0.07662202417850494, "learning_rate": 8.270399207885144e-05, "loss": 0.259, "step": 31000 }, { "epoch": 2.5114225534672716, "grad_norm": 0.05497424304485321, "learning_rate": 8.269949142625681e-05, "loss": 0.2289, "step": 31001 }, { "epoch": 2.51150356448477, "grad_norm": 0.09671833366155624, "learning_rate": 8.269499077366219e-05, "loss": 0.3282, "step": 31002 }, { "epoch": 2.511584575502268, "grad_norm": 0.06896227598190308, "learning_rate": 8.269049012106756e-05, "loss": 0.2577, "step": 31003 }, { "epoch": 2.511665586519767, "grad_norm": 0.07261406630277634, "learning_rate": 8.268598946847293e-05, "loss": 0.2413, "step": 31004 }, { "epoch": 2.511746597537265, "grad_norm": 0.06723308563232422, "learning_rate": 8.268148881587831e-05, "loss": 0.2717, "step": 31005 }, { "epoch": 2.5118276085547633, "grad_norm": 0.07040799409151077, "learning_rate": 8.267698816328368e-05, "loss": 0.2719, "step": 31006 }, { "epoch": 2.511908619572262, "grad_norm": 0.057618461549282074, "learning_rate": 8.267248751068906e-05, "loss": 0.2569, "step": 31007 }, { "epoch": 2.51198963058976, "grad_norm": 0.061061207205057144, "learning_rate": 8.266798685809443e-05, "loss": 0.2659, "step": 31008 }, { "epoch": 2.5120706416072585, "grad_norm": 0.06231067329645157, "learning_rate": 8.26634862054998e-05, "loss": 0.2425, "step": 31009 }, { "epoch": 2.512151652624757, "grad_norm": 0.07808933407068253, "learning_rate": 8.265898555290518e-05, "loss": 0.2279, "step": 31010 }, { "epoch": 2.5122326636422554, "grad_norm": 0.06471867859363556, "learning_rate": 8.265448490031055e-05, "loss": 0.238, "step": 31011 }, { "epoch": 2.5123136746597536, "grad_norm": 0.0662679597735405, "learning_rate": 8.264998424771592e-05, "loss": 0.2606, "step": 31012 }, { "epoch": 2.5123946856772523, "grad_norm": 0.07310327142477036, "learning_rate": 8.26454835951213e-05, "loss": 0.2359, "step": 31013 }, { "epoch": 2.5124756966947506, "grad_norm": 0.06892668455839157, "learning_rate": 8.264098294252667e-05, "loss": 0.2511, "step": 31014 }, { "epoch": 2.512556707712249, "grad_norm": 0.05978599190711975, "learning_rate": 8.263648228993204e-05, "loss": 0.2551, "step": 31015 }, { "epoch": 2.5126377187297475, "grad_norm": 0.08446621149778366, "learning_rate": 8.263198163733742e-05, "loss": 0.2699, "step": 31016 }, { "epoch": 2.5127187297472457, "grad_norm": 0.06874669343233109, "learning_rate": 8.262748098474279e-05, "loss": 0.2363, "step": 31017 }, { "epoch": 2.512799740764744, "grad_norm": 0.08762843906879425, "learning_rate": 8.262298033214816e-05, "loss": 0.2743, "step": 31018 }, { "epoch": 2.5128807517822422, "grad_norm": 0.07058103382587433, "learning_rate": 8.261847967955354e-05, "loss": 0.2776, "step": 31019 }, { "epoch": 2.512961762799741, "grad_norm": 0.06566563248634338, "learning_rate": 8.261397902695891e-05, "loss": 0.2729, "step": 31020 }, { "epoch": 2.513042773817239, "grad_norm": 0.06321921199560165, "learning_rate": 8.260947837436429e-05, "loss": 0.2211, "step": 31021 }, { "epoch": 2.5131237848347374, "grad_norm": 0.08189821988344193, "learning_rate": 8.260497772176966e-05, "loss": 0.262, "step": 31022 }, { "epoch": 2.5132047958522357, "grad_norm": 0.07887553423643112, "learning_rate": 8.260047706917503e-05, "loss": 0.2556, "step": 31023 }, { "epoch": 2.5132858068697344, "grad_norm": 0.06533343344926834, "learning_rate": 8.25959764165804e-05, "loss": 0.2488, "step": 31024 }, { "epoch": 2.5133668178872326, "grad_norm": 0.0702027752995491, "learning_rate": 8.259147576398578e-05, "loss": 0.2433, "step": 31025 }, { "epoch": 2.513447828904731, "grad_norm": 0.05941007658839226, "learning_rate": 8.258697511139115e-05, "loss": 0.2066, "step": 31026 }, { "epoch": 2.5135288399222295, "grad_norm": 0.06379848718643188, "learning_rate": 8.258247445879653e-05, "loss": 0.2569, "step": 31027 }, { "epoch": 2.5136098509397278, "grad_norm": 0.05726584047079086, "learning_rate": 8.25779738062019e-05, "loss": 0.2191, "step": 31028 }, { "epoch": 2.513690861957226, "grad_norm": 0.05791817605495453, "learning_rate": 8.257347315360727e-05, "loss": 0.2129, "step": 31029 }, { "epoch": 2.5137718729747247, "grad_norm": 0.0738334208726883, "learning_rate": 8.256897250101265e-05, "loss": 0.2542, "step": 31030 }, { "epoch": 2.513852883992223, "grad_norm": 0.05998535081744194, "learning_rate": 8.256447184841802e-05, "loss": 0.2376, "step": 31031 }, { "epoch": 2.513933895009721, "grad_norm": 0.07362499833106995, "learning_rate": 8.25599711958234e-05, "loss": 0.2627, "step": 31032 }, { "epoch": 2.51401490602722, "grad_norm": 0.07652860134840012, "learning_rate": 8.255547054322877e-05, "loss": 0.2573, "step": 31033 }, { "epoch": 2.514095917044718, "grad_norm": 0.07800864428281784, "learning_rate": 8.255096989063414e-05, "loss": 0.2786, "step": 31034 }, { "epoch": 2.5141769280622164, "grad_norm": 0.05959049239754677, "learning_rate": 8.254646923803952e-05, "loss": 0.2756, "step": 31035 }, { "epoch": 2.514257939079715, "grad_norm": 0.0774015411734581, "learning_rate": 8.254196858544489e-05, "loss": 0.2543, "step": 31036 }, { "epoch": 2.5143389500972133, "grad_norm": 0.05201781168580055, "learning_rate": 8.253746793285026e-05, "loss": 0.2401, "step": 31037 }, { "epoch": 2.5144199611147116, "grad_norm": 0.069788359105587, "learning_rate": 8.253296728025565e-05, "loss": 0.235, "step": 31038 }, { "epoch": 2.5145009721322102, "grad_norm": 0.05659021437168121, "learning_rate": 8.252846662766101e-05, "loss": 0.2487, "step": 31039 }, { "epoch": 2.5145819831497085, "grad_norm": 0.06626788526773453, "learning_rate": 8.252396597506638e-05, "loss": 0.2715, "step": 31040 }, { "epoch": 2.5146629941672067, "grad_norm": 0.0640718936920166, "learning_rate": 8.251946532247177e-05, "loss": 0.2338, "step": 31041 }, { "epoch": 2.514744005184705, "grad_norm": 0.06315992772579193, "learning_rate": 8.251496466987713e-05, "loss": 0.2602, "step": 31042 }, { "epoch": 2.5148250162022032, "grad_norm": 0.07527586072683334, "learning_rate": 8.25104640172825e-05, "loss": 0.2437, "step": 31043 }, { "epoch": 2.514906027219702, "grad_norm": 0.04863426089286804, "learning_rate": 8.250596336468789e-05, "loss": 0.2198, "step": 31044 }, { "epoch": 2.5149870382372, "grad_norm": 0.06549611687660217, "learning_rate": 8.250146271209325e-05, "loss": 0.2217, "step": 31045 }, { "epoch": 2.5150680492546984, "grad_norm": 0.07761254161596298, "learning_rate": 8.249696205949863e-05, "loss": 0.2536, "step": 31046 }, { "epoch": 2.515149060272197, "grad_norm": 0.06986057013273239, "learning_rate": 8.249246140690401e-05, "loss": 0.2393, "step": 31047 }, { "epoch": 2.5152300712896953, "grad_norm": 0.07064176350831985, "learning_rate": 8.248796075430937e-05, "loss": 0.2258, "step": 31048 }, { "epoch": 2.5153110823071936, "grad_norm": 0.07709095627069473, "learning_rate": 8.248346010171475e-05, "loss": 0.28, "step": 31049 }, { "epoch": 2.5153920933246923, "grad_norm": 0.07585789263248444, "learning_rate": 8.247895944912013e-05, "loss": 0.2327, "step": 31050 }, { "epoch": 2.5154731043421905, "grad_norm": 0.061996519565582275, "learning_rate": 8.24744587965255e-05, "loss": 0.2263, "step": 31051 }, { "epoch": 2.5155541153596888, "grad_norm": 0.05735709145665169, "learning_rate": 8.246995814393087e-05, "loss": 0.2294, "step": 31052 }, { "epoch": 2.5156351263771874, "grad_norm": 0.05866505205631256, "learning_rate": 8.246545749133625e-05, "loss": 0.2437, "step": 31053 }, { "epoch": 2.5157161373946857, "grad_norm": 0.07577653229236603, "learning_rate": 8.246095683874161e-05, "loss": 0.2429, "step": 31054 }, { "epoch": 2.515797148412184, "grad_norm": 0.06760087609291077, "learning_rate": 8.245645618614699e-05, "loss": 0.237, "step": 31055 }, { "epoch": 2.5158781594296826, "grad_norm": 0.08195766806602478, "learning_rate": 8.245195553355238e-05, "loss": 0.2464, "step": 31056 }, { "epoch": 2.515959170447181, "grad_norm": 0.07483825087547302, "learning_rate": 8.244745488095774e-05, "loss": 0.2503, "step": 31057 }, { "epoch": 2.516040181464679, "grad_norm": 0.06859326362609863, "learning_rate": 8.244295422836311e-05, "loss": 0.2488, "step": 31058 }, { "epoch": 2.516121192482178, "grad_norm": 0.056272827088832855, "learning_rate": 8.24384535757685e-05, "loss": 0.1995, "step": 31059 }, { "epoch": 2.516202203499676, "grad_norm": 0.059765033423900604, "learning_rate": 8.243395292317386e-05, "loss": 0.2459, "step": 31060 }, { "epoch": 2.5162832145171743, "grad_norm": 0.075907401740551, "learning_rate": 8.242945227057923e-05, "loss": 0.263, "step": 31061 }, { "epoch": 2.516364225534673, "grad_norm": 0.06140841171145439, "learning_rate": 8.242495161798462e-05, "loss": 0.2322, "step": 31062 }, { "epoch": 2.5164452365521712, "grad_norm": 0.07933636009693146, "learning_rate": 8.242045096538998e-05, "loss": 0.2132, "step": 31063 }, { "epoch": 2.5165262475696695, "grad_norm": 0.08671210706233978, "learning_rate": 8.241595031279536e-05, "loss": 0.2591, "step": 31064 }, { "epoch": 2.5166072585871677, "grad_norm": 0.06944069266319275, "learning_rate": 8.241144966020074e-05, "loss": 0.2335, "step": 31065 }, { "epoch": 2.516688269604666, "grad_norm": 0.0640743300318718, "learning_rate": 8.240694900760611e-05, "loss": 0.272, "step": 31066 }, { "epoch": 2.5167692806221647, "grad_norm": 0.05724559724330902, "learning_rate": 8.240244835501148e-05, "loss": 0.2613, "step": 31067 }, { "epoch": 2.516850291639663, "grad_norm": 0.06746908277273178, "learning_rate": 8.239794770241686e-05, "loss": 0.2403, "step": 31068 }, { "epoch": 2.516931302657161, "grad_norm": 0.0781325176358223, "learning_rate": 8.239344704982223e-05, "loss": 0.2326, "step": 31069 }, { "epoch": 2.51701231367466, "grad_norm": 0.05279175937175751, "learning_rate": 8.23889463972276e-05, "loss": 0.2217, "step": 31070 }, { "epoch": 2.517093324692158, "grad_norm": 0.06322243064641953, "learning_rate": 8.238444574463298e-05, "loss": 0.2494, "step": 31071 }, { "epoch": 2.5171743357096563, "grad_norm": 0.0703403502702713, "learning_rate": 8.237994509203835e-05, "loss": 0.2432, "step": 31072 }, { "epoch": 2.517255346727155, "grad_norm": 0.07747770845890045, "learning_rate": 8.237544443944373e-05, "loss": 0.2315, "step": 31073 }, { "epoch": 2.5173363577446533, "grad_norm": 0.06204115226864815, "learning_rate": 8.23709437868491e-05, "loss": 0.2381, "step": 31074 }, { "epoch": 2.5174173687621515, "grad_norm": 0.06431187689304352, "learning_rate": 8.236644313425447e-05, "loss": 0.2485, "step": 31075 }, { "epoch": 2.51749837977965, "grad_norm": 0.07739128917455673, "learning_rate": 8.236194248165985e-05, "loss": 0.1975, "step": 31076 }, { "epoch": 2.5175793907971484, "grad_norm": 0.06386204063892365, "learning_rate": 8.235744182906522e-05, "loss": 0.2433, "step": 31077 }, { "epoch": 2.5176604018146467, "grad_norm": 0.056900352239608765, "learning_rate": 8.23529411764706e-05, "loss": 0.2113, "step": 31078 }, { "epoch": 2.5177414128321454, "grad_norm": 0.06782882660627365, "learning_rate": 8.234844052387597e-05, "loss": 0.2786, "step": 31079 }, { "epoch": 2.5178224238496436, "grad_norm": 0.06768842786550522, "learning_rate": 8.234393987128134e-05, "loss": 0.2609, "step": 31080 }, { "epoch": 2.517903434867142, "grad_norm": 0.05288982763886452, "learning_rate": 8.233943921868672e-05, "loss": 0.2281, "step": 31081 }, { "epoch": 2.5179844458846405, "grad_norm": 0.07983200997114182, "learning_rate": 8.233493856609209e-05, "loss": 0.254, "step": 31082 }, { "epoch": 2.518065456902139, "grad_norm": 0.054932545870542526, "learning_rate": 8.233043791349746e-05, "loss": 0.2325, "step": 31083 }, { "epoch": 2.518146467919637, "grad_norm": 0.06276437640190125, "learning_rate": 8.232593726090284e-05, "loss": 0.2322, "step": 31084 }, { "epoch": 2.5182274789371357, "grad_norm": 0.070334292948246, "learning_rate": 8.232143660830821e-05, "loss": 0.2695, "step": 31085 }, { "epoch": 2.518308489954634, "grad_norm": 0.07306241244077682, "learning_rate": 8.231693595571358e-05, "loss": 0.2392, "step": 31086 }, { "epoch": 2.518389500972132, "grad_norm": 0.06493571400642395, "learning_rate": 8.231243530311896e-05, "loss": 0.2222, "step": 31087 }, { "epoch": 2.5184705119896305, "grad_norm": 0.07168751955032349, "learning_rate": 8.230793465052433e-05, "loss": 0.2696, "step": 31088 }, { "epoch": 2.5185515230071287, "grad_norm": 0.06785114109516144, "learning_rate": 8.23034339979297e-05, "loss": 0.2299, "step": 31089 }, { "epoch": 2.5186325340246274, "grad_norm": 0.07063904404640198, "learning_rate": 8.229893334533508e-05, "loss": 0.2868, "step": 31090 }, { "epoch": 2.5187135450421256, "grad_norm": 0.06915218383073807, "learning_rate": 8.229443269274045e-05, "loss": 0.2387, "step": 31091 }, { "epoch": 2.518794556059624, "grad_norm": 0.07183767110109329, "learning_rate": 8.228993204014583e-05, "loss": 0.2427, "step": 31092 }, { "epoch": 2.5188755670771226, "grad_norm": 0.05309898406267166, "learning_rate": 8.22854313875512e-05, "loss": 0.2147, "step": 31093 }, { "epoch": 2.518956578094621, "grad_norm": 0.06278495490550995, "learning_rate": 8.228093073495657e-05, "loss": 0.2275, "step": 31094 }, { "epoch": 2.519037589112119, "grad_norm": 0.06439706683158875, "learning_rate": 8.227643008236195e-05, "loss": 0.2362, "step": 31095 }, { "epoch": 2.5191186001296177, "grad_norm": 0.05649157240986824, "learning_rate": 8.227192942976732e-05, "loss": 0.1942, "step": 31096 }, { "epoch": 2.519199611147116, "grad_norm": 0.07158772647380829, "learning_rate": 8.226742877717269e-05, "loss": 0.2734, "step": 31097 }, { "epoch": 2.5192806221646142, "grad_norm": 0.06923554837703705, "learning_rate": 8.226292812457807e-05, "loss": 0.2339, "step": 31098 }, { "epoch": 2.519361633182113, "grad_norm": 0.06337632983922958, "learning_rate": 8.225842747198344e-05, "loss": 0.2546, "step": 31099 }, { "epoch": 2.519442644199611, "grad_norm": 0.06667693704366684, "learning_rate": 8.225392681938881e-05, "loss": 0.2225, "step": 31100 }, { "epoch": 2.5195236552171094, "grad_norm": 0.06592801213264465, "learning_rate": 8.224942616679419e-05, "loss": 0.2211, "step": 31101 }, { "epoch": 2.519604666234608, "grad_norm": 0.07447879761457443, "learning_rate": 8.224492551419956e-05, "loss": 0.294, "step": 31102 }, { "epoch": 2.5196856772521063, "grad_norm": 0.06477392464876175, "learning_rate": 8.224042486160493e-05, "loss": 0.2443, "step": 31103 }, { "epoch": 2.5197666882696046, "grad_norm": 0.06004251912236214, "learning_rate": 8.223592420901031e-05, "loss": 0.1946, "step": 31104 }, { "epoch": 2.5198476992871033, "grad_norm": 0.06646063923835754, "learning_rate": 8.223142355641568e-05, "loss": 0.2496, "step": 31105 }, { "epoch": 2.5199287103046015, "grad_norm": 0.06413585692644119, "learning_rate": 8.222692290382106e-05, "loss": 0.2262, "step": 31106 }, { "epoch": 2.5200097213220998, "grad_norm": 0.0555780753493309, "learning_rate": 8.222242225122643e-05, "loss": 0.2569, "step": 31107 }, { "epoch": 2.5200907323395985, "grad_norm": 0.07252832502126694, "learning_rate": 8.22179215986318e-05, "loss": 0.2627, "step": 31108 }, { "epoch": 2.5201717433570967, "grad_norm": 0.061369750648736954, "learning_rate": 8.221342094603718e-05, "loss": 0.219, "step": 31109 }, { "epoch": 2.520252754374595, "grad_norm": 0.07031913101673126, "learning_rate": 8.220892029344255e-05, "loss": 0.2617, "step": 31110 }, { "epoch": 2.520333765392093, "grad_norm": 0.06914175301790237, "learning_rate": 8.220441964084792e-05, "loss": 0.2448, "step": 31111 }, { "epoch": 2.5204147764095914, "grad_norm": 0.07155036926269531, "learning_rate": 8.21999189882533e-05, "loss": 0.2427, "step": 31112 }, { "epoch": 2.52049578742709, "grad_norm": 0.06575379520654678, "learning_rate": 8.219541833565867e-05, "loss": 0.2573, "step": 31113 }, { "epoch": 2.5205767984445884, "grad_norm": 0.06591765582561493, "learning_rate": 8.219091768306404e-05, "loss": 0.2327, "step": 31114 }, { "epoch": 2.5206578094620866, "grad_norm": 0.08451759070158005, "learning_rate": 8.218641703046942e-05, "loss": 0.2686, "step": 31115 }, { "epoch": 2.5207388204795853, "grad_norm": 0.058037735521793365, "learning_rate": 8.218191637787479e-05, "loss": 0.2307, "step": 31116 }, { "epoch": 2.5208198314970836, "grad_norm": 0.07824068516492844, "learning_rate": 8.217741572528017e-05, "loss": 0.2556, "step": 31117 }, { "epoch": 2.520900842514582, "grad_norm": 0.06366552412509918, "learning_rate": 8.217291507268554e-05, "loss": 0.2442, "step": 31118 }, { "epoch": 2.5209818535320805, "grad_norm": 0.068834587931633, "learning_rate": 8.216841442009093e-05, "loss": 0.2743, "step": 31119 }, { "epoch": 2.5210628645495787, "grad_norm": 0.07074061036109924, "learning_rate": 8.216391376749629e-05, "loss": 0.216, "step": 31120 }, { "epoch": 2.521143875567077, "grad_norm": 0.048492182046175, "learning_rate": 8.215941311490166e-05, "loss": 0.2349, "step": 31121 }, { "epoch": 2.5212248865845757, "grad_norm": 0.0563458614051342, "learning_rate": 8.215491246230705e-05, "loss": 0.226, "step": 31122 }, { "epoch": 2.521305897602074, "grad_norm": 0.054789893329143524, "learning_rate": 8.21504118097124e-05, "loss": 0.2251, "step": 31123 }, { "epoch": 2.521386908619572, "grad_norm": 0.0652405321598053, "learning_rate": 8.214591115711778e-05, "loss": 0.2357, "step": 31124 }, { "epoch": 2.521467919637071, "grad_norm": 0.061516888439655304, "learning_rate": 8.214141050452317e-05, "loss": 0.2426, "step": 31125 }, { "epoch": 2.521548930654569, "grad_norm": 0.07577957212924957, "learning_rate": 8.213690985192853e-05, "loss": 0.2577, "step": 31126 }, { "epoch": 2.5216299416720673, "grad_norm": 0.07500039786100388, "learning_rate": 8.21324091993339e-05, "loss": 0.2392, "step": 31127 }, { "epoch": 2.521710952689566, "grad_norm": 0.07097041606903076, "learning_rate": 8.212790854673929e-05, "loss": 0.244, "step": 31128 }, { "epoch": 2.5217919637070643, "grad_norm": 0.06939812004566193, "learning_rate": 8.212340789414465e-05, "loss": 0.2284, "step": 31129 }, { "epoch": 2.5218729747245625, "grad_norm": 0.05452917516231537, "learning_rate": 8.211890724155002e-05, "loss": 0.2235, "step": 31130 }, { "epoch": 2.5219539857420608, "grad_norm": 0.062049634754657745, "learning_rate": 8.211440658895541e-05, "loss": 0.258, "step": 31131 }, { "epoch": 2.5220349967595594, "grad_norm": 0.0639457032084465, "learning_rate": 8.210990593636077e-05, "loss": 0.2656, "step": 31132 }, { "epoch": 2.5221160077770577, "grad_norm": 0.05421054735779762, "learning_rate": 8.210540528376614e-05, "loss": 0.2243, "step": 31133 }, { "epoch": 2.522197018794556, "grad_norm": 0.07816123962402344, "learning_rate": 8.210090463117153e-05, "loss": 0.251, "step": 31134 }, { "epoch": 2.522278029812054, "grad_norm": 0.06340120732784271, "learning_rate": 8.20964039785769e-05, "loss": 0.2402, "step": 31135 }, { "epoch": 2.522359040829553, "grad_norm": 0.0676545798778534, "learning_rate": 8.209190332598226e-05, "loss": 0.241, "step": 31136 }, { "epoch": 2.522440051847051, "grad_norm": 0.05805578827857971, "learning_rate": 8.208740267338765e-05, "loss": 0.2214, "step": 31137 }, { "epoch": 2.5225210628645494, "grad_norm": 0.06669300049543381, "learning_rate": 8.208290202079302e-05, "loss": 0.2335, "step": 31138 }, { "epoch": 2.522602073882048, "grad_norm": 0.06161652132868767, "learning_rate": 8.207840136819838e-05, "loss": 0.2354, "step": 31139 }, { "epoch": 2.5226830848995463, "grad_norm": 0.07172819972038269, "learning_rate": 8.207390071560377e-05, "loss": 0.2715, "step": 31140 }, { "epoch": 2.5227640959170445, "grad_norm": 0.051006119698286057, "learning_rate": 8.206940006300915e-05, "loss": 0.195, "step": 31141 }, { "epoch": 2.5228451069345432, "grad_norm": 0.06547657400369644, "learning_rate": 8.20648994104145e-05, "loss": 0.2316, "step": 31142 }, { "epoch": 2.5229261179520415, "grad_norm": 0.06320697069168091, "learning_rate": 8.206039875781989e-05, "loss": 0.2534, "step": 31143 }, { "epoch": 2.5230071289695397, "grad_norm": 0.06975486874580383, "learning_rate": 8.205589810522527e-05, "loss": 0.2401, "step": 31144 }, { "epoch": 2.5230881399870384, "grad_norm": 0.06577564030885696, "learning_rate": 8.205139745263064e-05, "loss": 0.2054, "step": 31145 }, { "epoch": 2.5231691510045366, "grad_norm": 0.08921880275011063, "learning_rate": 8.204689680003601e-05, "loss": 0.2514, "step": 31146 }, { "epoch": 2.523250162022035, "grad_norm": 0.07652483880519867, "learning_rate": 8.204239614744139e-05, "loss": 0.2526, "step": 31147 }, { "epoch": 2.5233311730395336, "grad_norm": 0.05995349958539009, "learning_rate": 8.203789549484676e-05, "loss": 0.244, "step": 31148 }, { "epoch": 2.523412184057032, "grad_norm": 0.06425217539072037, "learning_rate": 8.203339484225213e-05, "loss": 0.2284, "step": 31149 }, { "epoch": 2.52349319507453, "grad_norm": 0.07397456467151642, "learning_rate": 8.202889418965751e-05, "loss": 0.268, "step": 31150 }, { "epoch": 2.5235742060920288, "grad_norm": 0.06542269885540009, "learning_rate": 8.202439353706288e-05, "loss": 0.2364, "step": 31151 }, { "epoch": 2.523655217109527, "grad_norm": 0.05775555968284607, "learning_rate": 8.201989288446825e-05, "loss": 0.2386, "step": 31152 }, { "epoch": 2.5237362281270252, "grad_norm": 0.06939782202243805, "learning_rate": 8.201539223187363e-05, "loss": 0.2728, "step": 31153 }, { "epoch": 2.5238172391445235, "grad_norm": 0.07716054469347, "learning_rate": 8.2010891579279e-05, "loss": 0.2684, "step": 31154 }, { "epoch": 2.523898250162022, "grad_norm": 0.049509044736623764, "learning_rate": 8.200639092668438e-05, "loss": 0.2559, "step": 31155 }, { "epoch": 2.5239792611795204, "grad_norm": 0.08856166899204254, "learning_rate": 8.200189027408975e-05, "loss": 0.2449, "step": 31156 }, { "epoch": 2.5240602721970187, "grad_norm": 0.06791945546865463, "learning_rate": 8.199738962149512e-05, "loss": 0.2783, "step": 31157 }, { "epoch": 2.524141283214517, "grad_norm": 0.06625067442655563, "learning_rate": 8.19928889689005e-05, "loss": 0.2368, "step": 31158 }, { "epoch": 2.5242222942320156, "grad_norm": 0.051140010356903076, "learning_rate": 8.198838831630587e-05, "loss": 0.2421, "step": 31159 }, { "epoch": 2.524303305249514, "grad_norm": 0.06882493942975998, "learning_rate": 8.198388766371124e-05, "loss": 0.2504, "step": 31160 }, { "epoch": 2.524384316267012, "grad_norm": 0.08425910770893097, "learning_rate": 8.197938701111662e-05, "loss": 0.242, "step": 31161 }, { "epoch": 2.524465327284511, "grad_norm": 0.07307098060846329, "learning_rate": 8.197488635852199e-05, "loss": 0.2875, "step": 31162 }, { "epoch": 2.524546338302009, "grad_norm": 0.06732004880905151, "learning_rate": 8.197038570592736e-05, "loss": 0.2407, "step": 31163 }, { "epoch": 2.5246273493195073, "grad_norm": 0.05499308556318283, "learning_rate": 8.196588505333274e-05, "loss": 0.214, "step": 31164 }, { "epoch": 2.524708360337006, "grad_norm": 0.07563404738903046, "learning_rate": 8.196138440073811e-05, "loss": 0.2255, "step": 31165 }, { "epoch": 2.524789371354504, "grad_norm": 0.07380463182926178, "learning_rate": 8.195688374814349e-05, "loss": 0.2911, "step": 31166 }, { "epoch": 2.5248703823720025, "grad_norm": 0.07666265964508057, "learning_rate": 8.195238309554886e-05, "loss": 0.218, "step": 31167 }, { "epoch": 2.524951393389501, "grad_norm": 0.0652792826294899, "learning_rate": 8.194788244295423e-05, "loss": 0.2203, "step": 31168 }, { "epoch": 2.5250324044069994, "grad_norm": 0.07028517872095108, "learning_rate": 8.19433817903596e-05, "loss": 0.2337, "step": 31169 }, { "epoch": 2.5251134154244976, "grad_norm": 0.07915160804986954, "learning_rate": 8.193888113776498e-05, "loss": 0.2431, "step": 31170 }, { "epoch": 2.5251944264419963, "grad_norm": 0.0773158147931099, "learning_rate": 8.193438048517035e-05, "loss": 0.2556, "step": 31171 }, { "epoch": 2.5252754374594946, "grad_norm": 0.0707051157951355, "learning_rate": 8.192987983257573e-05, "loss": 0.2387, "step": 31172 }, { "epoch": 2.525356448476993, "grad_norm": 0.09297461807727814, "learning_rate": 8.19253791799811e-05, "loss": 0.2542, "step": 31173 }, { "epoch": 2.5254374594944915, "grad_norm": 0.07315745204687119, "learning_rate": 8.192087852738647e-05, "loss": 0.2251, "step": 31174 }, { "epoch": 2.5255184705119897, "grad_norm": 0.0702429711818695, "learning_rate": 8.191637787479185e-05, "loss": 0.2634, "step": 31175 }, { "epoch": 2.525599481529488, "grad_norm": 0.06600876897573471, "learning_rate": 8.191187722219722e-05, "loss": 0.2157, "step": 31176 }, { "epoch": 2.5256804925469862, "grad_norm": 0.06941699236631393, "learning_rate": 8.19073765696026e-05, "loss": 0.2759, "step": 31177 }, { "epoch": 2.525761503564485, "grad_norm": 0.08072786033153534, "learning_rate": 8.190287591700797e-05, "loss": 0.2418, "step": 31178 }, { "epoch": 2.525842514581983, "grad_norm": 0.0683659017086029, "learning_rate": 8.189837526441334e-05, "loss": 0.2526, "step": 31179 }, { "epoch": 2.5259235255994814, "grad_norm": 0.06554919481277466, "learning_rate": 8.189387461181872e-05, "loss": 0.2554, "step": 31180 }, { "epoch": 2.5260045366169797, "grad_norm": 0.07267916202545166, "learning_rate": 8.188937395922409e-05, "loss": 0.2287, "step": 31181 }, { "epoch": 2.5260855476344783, "grad_norm": 0.06501460075378418, "learning_rate": 8.188487330662946e-05, "loss": 0.2373, "step": 31182 }, { "epoch": 2.5261665586519766, "grad_norm": 0.06084512174129486, "learning_rate": 8.188037265403484e-05, "loss": 0.2371, "step": 31183 }, { "epoch": 2.526247569669475, "grad_norm": 0.06529213488101959, "learning_rate": 8.187587200144021e-05, "loss": 0.2729, "step": 31184 }, { "epoch": 2.5263285806869735, "grad_norm": 0.07154843956232071, "learning_rate": 8.187137134884558e-05, "loss": 0.2429, "step": 31185 }, { "epoch": 2.5264095917044718, "grad_norm": 0.07982178032398224, "learning_rate": 8.186687069625096e-05, "loss": 0.2964, "step": 31186 }, { "epoch": 2.52649060272197, "grad_norm": 0.058192428201436996, "learning_rate": 8.186237004365633e-05, "loss": 0.2279, "step": 31187 }, { "epoch": 2.5265716137394687, "grad_norm": 0.07291363179683685, "learning_rate": 8.18578693910617e-05, "loss": 0.2713, "step": 31188 }, { "epoch": 2.526652624756967, "grad_norm": 0.062472179532051086, "learning_rate": 8.185336873846708e-05, "loss": 0.2577, "step": 31189 }, { "epoch": 2.526733635774465, "grad_norm": 0.0711505115032196, "learning_rate": 8.184886808587245e-05, "loss": 0.2482, "step": 31190 }, { "epoch": 2.526814646791964, "grad_norm": 0.07676628232002258, "learning_rate": 8.184436743327783e-05, "loss": 0.2605, "step": 31191 }, { "epoch": 2.526895657809462, "grad_norm": 0.08312243968248367, "learning_rate": 8.18398667806832e-05, "loss": 0.2551, "step": 31192 }, { "epoch": 2.5269766688269604, "grad_norm": 0.08073175698518753, "learning_rate": 8.183536612808857e-05, "loss": 0.2822, "step": 31193 }, { "epoch": 2.527057679844459, "grad_norm": 0.06505302339792252, "learning_rate": 8.183086547549395e-05, "loss": 0.2396, "step": 31194 }, { "epoch": 2.5271386908619573, "grad_norm": 0.06782861053943634, "learning_rate": 8.182636482289932e-05, "loss": 0.2482, "step": 31195 }, { "epoch": 2.5272197018794555, "grad_norm": 0.06377576291561127, "learning_rate": 8.18218641703047e-05, "loss": 0.2243, "step": 31196 }, { "epoch": 2.5273007128969542, "grad_norm": 0.07454057037830353, "learning_rate": 8.181736351771008e-05, "loss": 0.2649, "step": 31197 }, { "epoch": 2.5273817239144525, "grad_norm": 0.0724162757396698, "learning_rate": 8.181286286511544e-05, "loss": 0.2363, "step": 31198 }, { "epoch": 2.5274627349319507, "grad_norm": 0.07536423951387405, "learning_rate": 8.180836221252081e-05, "loss": 0.2535, "step": 31199 }, { "epoch": 2.527543745949449, "grad_norm": 0.071848563849926, "learning_rate": 8.18038615599262e-05, "loss": 0.2259, "step": 31200 }, { "epoch": 2.5276247569669477, "grad_norm": 0.05115946754813194, "learning_rate": 8.179936090733156e-05, "loss": 0.2048, "step": 31201 }, { "epoch": 2.527705767984446, "grad_norm": 0.07633697241544724, "learning_rate": 8.179486025473693e-05, "loss": 0.2453, "step": 31202 }, { "epoch": 2.527786779001944, "grad_norm": 0.07274796813726425, "learning_rate": 8.179035960214232e-05, "loss": 0.2779, "step": 31203 }, { "epoch": 2.5278677900194424, "grad_norm": 0.06171036511659622, "learning_rate": 8.17858589495477e-05, "loss": 0.2316, "step": 31204 }, { "epoch": 2.527948801036941, "grad_norm": 0.06370175629854202, "learning_rate": 8.178135829695306e-05, "loss": 0.229, "step": 31205 }, { "epoch": 2.5280298120544393, "grad_norm": 0.06523820757865906, "learning_rate": 8.177685764435844e-05, "loss": 0.2706, "step": 31206 }, { "epoch": 2.5281108230719376, "grad_norm": 0.06415421515703201, "learning_rate": 8.177235699176382e-05, "loss": 0.2251, "step": 31207 }, { "epoch": 2.5281918340894363, "grad_norm": 0.06288018077611923, "learning_rate": 8.176785633916918e-05, "loss": 0.2352, "step": 31208 }, { "epoch": 2.5282728451069345, "grad_norm": 0.0697561576962471, "learning_rate": 8.176335568657456e-05, "loss": 0.2235, "step": 31209 }, { "epoch": 2.5283538561244328, "grad_norm": 0.05984310805797577, "learning_rate": 8.175885503397994e-05, "loss": 0.3137, "step": 31210 }, { "epoch": 2.5284348671419314, "grad_norm": 0.06374064087867737, "learning_rate": 8.17543543813853e-05, "loss": 0.2261, "step": 31211 }, { "epoch": 2.5285158781594297, "grad_norm": 0.07622829079627991, "learning_rate": 8.174985372879068e-05, "loss": 0.2648, "step": 31212 }, { "epoch": 2.528596889176928, "grad_norm": 0.061640314757823944, "learning_rate": 8.174535307619606e-05, "loss": 0.231, "step": 31213 }, { "epoch": 2.5286779001944266, "grad_norm": 0.048052046447992325, "learning_rate": 8.174085242360142e-05, "loss": 0.2346, "step": 31214 }, { "epoch": 2.528758911211925, "grad_norm": 0.06793522834777832, "learning_rate": 8.17363517710068e-05, "loss": 0.2208, "step": 31215 }, { "epoch": 2.528839922229423, "grad_norm": 0.07524669170379639, "learning_rate": 8.173185111841218e-05, "loss": 0.2506, "step": 31216 }, { "epoch": 2.528920933246922, "grad_norm": 0.06395810842514038, "learning_rate": 8.172735046581754e-05, "loss": 0.2392, "step": 31217 }, { "epoch": 2.52900194426442, "grad_norm": 0.066915363073349, "learning_rate": 8.172284981322293e-05, "loss": 0.2288, "step": 31218 }, { "epoch": 2.5290829552819183, "grad_norm": 0.07390469312667847, "learning_rate": 8.17183491606283e-05, "loss": 0.2657, "step": 31219 }, { "epoch": 2.529163966299417, "grad_norm": 0.06230680271983147, "learning_rate": 8.171384850803366e-05, "loss": 0.2053, "step": 31220 }, { "epoch": 2.529244977316915, "grad_norm": 0.07825953513383865, "learning_rate": 8.170934785543905e-05, "loss": 0.2406, "step": 31221 }, { "epoch": 2.5293259883344135, "grad_norm": 0.07075408846139908, "learning_rate": 8.170484720284442e-05, "loss": 0.2395, "step": 31222 }, { "epoch": 2.5294069993519117, "grad_norm": 0.08722478896379471, "learning_rate": 8.17003465502498e-05, "loss": 0.2744, "step": 31223 }, { "epoch": 2.5294880103694104, "grad_norm": 0.07645217329263687, "learning_rate": 8.169584589765517e-05, "loss": 0.2373, "step": 31224 }, { "epoch": 2.5295690213869086, "grad_norm": 0.057986825704574585, "learning_rate": 8.169134524506054e-05, "loss": 0.2142, "step": 31225 }, { "epoch": 2.529650032404407, "grad_norm": 0.07386277616024017, "learning_rate": 8.168684459246591e-05, "loss": 0.2463, "step": 31226 }, { "epoch": 2.529731043421905, "grad_norm": 0.073249451816082, "learning_rate": 8.168234393987129e-05, "loss": 0.2204, "step": 31227 }, { "epoch": 2.529812054439404, "grad_norm": 0.06574179977178574, "learning_rate": 8.167784328727666e-05, "loss": 0.2398, "step": 31228 }, { "epoch": 2.529893065456902, "grad_norm": 0.06217941641807556, "learning_rate": 8.167334263468204e-05, "loss": 0.2265, "step": 31229 }, { "epoch": 2.5299740764744003, "grad_norm": 0.07396439462900162, "learning_rate": 8.166884198208741e-05, "loss": 0.2724, "step": 31230 }, { "epoch": 2.530055087491899, "grad_norm": 0.06049538031220436, "learning_rate": 8.166434132949278e-05, "loss": 0.2595, "step": 31231 }, { "epoch": 2.5301360985093972, "grad_norm": 0.06809945404529572, "learning_rate": 8.165984067689816e-05, "loss": 0.2765, "step": 31232 }, { "epoch": 2.5302171095268955, "grad_norm": 0.06943079829216003, "learning_rate": 8.165534002430353e-05, "loss": 0.2106, "step": 31233 }, { "epoch": 2.530298120544394, "grad_norm": 0.07760677486658096, "learning_rate": 8.16508393717089e-05, "loss": 0.2546, "step": 31234 }, { "epoch": 2.5303791315618924, "grad_norm": 0.0711454451084137, "learning_rate": 8.164633871911428e-05, "loss": 0.2497, "step": 31235 }, { "epoch": 2.5304601425793907, "grad_norm": 0.06712127476930618, "learning_rate": 8.164183806651965e-05, "loss": 0.2484, "step": 31236 }, { "epoch": 2.5305411535968894, "grad_norm": 0.07480525225400925, "learning_rate": 8.163733741392502e-05, "loss": 0.2369, "step": 31237 }, { "epoch": 2.5306221646143876, "grad_norm": 0.06365400552749634, "learning_rate": 8.16328367613304e-05, "loss": 0.2914, "step": 31238 }, { "epoch": 2.530703175631886, "grad_norm": 0.05150923877954483, "learning_rate": 8.162833610873577e-05, "loss": 0.2473, "step": 31239 }, { "epoch": 2.5307841866493845, "grad_norm": 0.06193334981799126, "learning_rate": 8.162383545614115e-05, "loss": 0.225, "step": 31240 }, { "epoch": 2.530865197666883, "grad_norm": 0.07247815281152725, "learning_rate": 8.161933480354652e-05, "loss": 0.2574, "step": 31241 }, { "epoch": 2.530946208684381, "grad_norm": 0.06654870510101318, "learning_rate": 8.161483415095189e-05, "loss": 0.2669, "step": 31242 }, { "epoch": 2.5310272197018797, "grad_norm": 0.05943829193711281, "learning_rate": 8.161033349835727e-05, "loss": 0.2294, "step": 31243 }, { "epoch": 2.531108230719378, "grad_norm": 0.07509196549654007, "learning_rate": 8.160583284576264e-05, "loss": 0.253, "step": 31244 }, { "epoch": 2.531189241736876, "grad_norm": 0.0631127879023552, "learning_rate": 8.160133219316801e-05, "loss": 0.223, "step": 31245 }, { "epoch": 2.5312702527543745, "grad_norm": 0.06481597572565079, "learning_rate": 8.159683154057339e-05, "loss": 0.2604, "step": 31246 }, { "epoch": 2.531351263771873, "grad_norm": 0.058724142611026764, "learning_rate": 8.159233088797876e-05, "loss": 0.2142, "step": 31247 }, { "epoch": 2.5314322747893714, "grad_norm": 0.05988123267889023, "learning_rate": 8.158783023538413e-05, "loss": 0.232, "step": 31248 }, { "epoch": 2.5315132858068696, "grad_norm": 0.066890649497509, "learning_rate": 8.158332958278951e-05, "loss": 0.2558, "step": 31249 }, { "epoch": 2.531594296824368, "grad_norm": 0.06956793367862701, "learning_rate": 8.157882893019488e-05, "loss": 0.2574, "step": 31250 }, { "epoch": 2.5316753078418666, "grad_norm": 0.06504230201244354, "learning_rate": 8.157432827760026e-05, "loss": 0.2539, "step": 31251 }, { "epoch": 2.531756318859365, "grad_norm": 0.07182765752077103, "learning_rate": 8.156982762500563e-05, "loss": 0.2228, "step": 31252 }, { "epoch": 2.531837329876863, "grad_norm": 0.06693996489048004, "learning_rate": 8.1565326972411e-05, "loss": 0.2399, "step": 31253 }, { "epoch": 2.5319183408943617, "grad_norm": 0.05704502761363983, "learning_rate": 8.156082631981638e-05, "loss": 0.2004, "step": 31254 }, { "epoch": 2.53199935191186, "grad_norm": 0.08396997302770615, "learning_rate": 8.155632566722175e-05, "loss": 0.2521, "step": 31255 }, { "epoch": 2.5320803629293582, "grad_norm": 0.0741053894162178, "learning_rate": 8.155182501462712e-05, "loss": 0.2688, "step": 31256 }, { "epoch": 2.532161373946857, "grad_norm": 0.06670941412448883, "learning_rate": 8.15473243620325e-05, "loss": 0.237, "step": 31257 }, { "epoch": 2.532242384964355, "grad_norm": 0.06339386105537415, "learning_rate": 8.154282370943787e-05, "loss": 0.2096, "step": 31258 }, { "epoch": 2.5323233959818534, "grad_norm": 0.06393574923276901, "learning_rate": 8.153832305684324e-05, "loss": 0.2722, "step": 31259 }, { "epoch": 2.532404406999352, "grad_norm": 0.05412076786160469, "learning_rate": 8.153382240424862e-05, "loss": 0.2805, "step": 31260 }, { "epoch": 2.5324854180168503, "grad_norm": 0.06732047349214554, "learning_rate": 8.152932175165399e-05, "loss": 0.2738, "step": 31261 }, { "epoch": 2.5325664290343486, "grad_norm": 0.05094984546303749, "learning_rate": 8.152482109905936e-05, "loss": 0.2153, "step": 31262 }, { "epoch": 2.5326474400518473, "grad_norm": 0.07786114513874054, "learning_rate": 8.152032044646474e-05, "loss": 0.2511, "step": 31263 }, { "epoch": 2.5327284510693455, "grad_norm": 0.07169172912836075, "learning_rate": 8.151581979387011e-05, "loss": 0.2491, "step": 31264 }, { "epoch": 2.5328094620868438, "grad_norm": 0.0770837813615799, "learning_rate": 8.151131914127549e-05, "loss": 0.25, "step": 31265 }, { "epoch": 2.5328904731043425, "grad_norm": 0.06381841748952866, "learning_rate": 8.150681848868086e-05, "loss": 0.271, "step": 31266 }, { "epoch": 2.5329714841218407, "grad_norm": 0.06991208344697952, "learning_rate": 8.150231783608623e-05, "loss": 0.2584, "step": 31267 }, { "epoch": 2.533052495139339, "grad_norm": 0.06889082491397858, "learning_rate": 8.14978171834916e-05, "loss": 0.2506, "step": 31268 }, { "epoch": 2.533133506156837, "grad_norm": 0.05752260610461235, "learning_rate": 8.149331653089698e-05, "loss": 0.2336, "step": 31269 }, { "epoch": 2.5332145171743354, "grad_norm": 0.05750564485788345, "learning_rate": 8.148881587830237e-05, "loss": 0.2201, "step": 31270 }, { "epoch": 2.533295528191834, "grad_norm": 0.05817430838942528, "learning_rate": 8.148431522570773e-05, "loss": 0.2258, "step": 31271 }, { "epoch": 2.5333765392093324, "grad_norm": 0.07425309717655182, "learning_rate": 8.14798145731131e-05, "loss": 0.2193, "step": 31272 }, { "epoch": 2.5334575502268306, "grad_norm": 0.07449954748153687, "learning_rate": 8.147531392051849e-05, "loss": 0.2453, "step": 31273 }, { "epoch": 2.5335385612443293, "grad_norm": 0.07094830274581909, "learning_rate": 8.147081326792385e-05, "loss": 0.2577, "step": 31274 }, { "epoch": 2.5336195722618275, "grad_norm": 0.057979848235845566, "learning_rate": 8.146631261532922e-05, "loss": 0.2077, "step": 31275 }, { "epoch": 2.533700583279326, "grad_norm": 0.08039218187332153, "learning_rate": 8.146181196273461e-05, "loss": 0.2589, "step": 31276 }, { "epoch": 2.5337815942968245, "grad_norm": 0.06383368372917175, "learning_rate": 8.145731131013997e-05, "loss": 0.229, "step": 31277 }, { "epoch": 2.5338626053143227, "grad_norm": 0.05893819406628609, "learning_rate": 8.145281065754536e-05, "loss": 0.2638, "step": 31278 }, { "epoch": 2.533943616331821, "grad_norm": 0.07219822704792023, "learning_rate": 8.144831000495073e-05, "loss": 0.2814, "step": 31279 }, { "epoch": 2.5340246273493197, "grad_norm": 0.06139072775840759, "learning_rate": 8.144380935235609e-05, "loss": 0.2013, "step": 31280 }, { "epoch": 2.534105638366818, "grad_norm": 0.06618008762598038, "learning_rate": 8.143930869976148e-05, "loss": 0.2239, "step": 31281 }, { "epoch": 2.534186649384316, "grad_norm": 0.07001540809869766, "learning_rate": 8.143480804716685e-05, "loss": 0.2598, "step": 31282 }, { "epoch": 2.534267660401815, "grad_norm": 0.06306258589029312, "learning_rate": 8.143030739457221e-05, "loss": 0.2509, "step": 31283 }, { "epoch": 2.534348671419313, "grad_norm": 0.08775768429040909, "learning_rate": 8.14258067419776e-05, "loss": 0.2411, "step": 31284 }, { "epoch": 2.5344296824368113, "grad_norm": 0.07452242076396942, "learning_rate": 8.142130608938297e-05, "loss": 0.2495, "step": 31285 }, { "epoch": 2.53451069345431, "grad_norm": 0.0733480229973793, "learning_rate": 8.141680543678833e-05, "loss": 0.2598, "step": 31286 }, { "epoch": 2.5345917044718083, "grad_norm": 0.08361136168241501, "learning_rate": 8.141230478419372e-05, "loss": 0.2812, "step": 31287 }, { "epoch": 2.5346727154893065, "grad_norm": 0.06638027727603912, "learning_rate": 8.140780413159909e-05, "loss": 0.232, "step": 31288 }, { "epoch": 2.534753726506805, "grad_norm": 0.0772709921002388, "learning_rate": 8.140330347900445e-05, "loss": 0.2764, "step": 31289 }, { "epoch": 2.5348347375243034, "grad_norm": 0.06880713999271393, "learning_rate": 8.139880282640984e-05, "loss": 0.2691, "step": 31290 }, { "epoch": 2.5349157485418017, "grad_norm": 0.07806142419576645, "learning_rate": 8.139430217381521e-05, "loss": 0.2485, "step": 31291 }, { "epoch": 2.5349967595593, "grad_norm": 0.07091590017080307, "learning_rate": 8.138980152122057e-05, "loss": 0.2637, "step": 31292 }, { "epoch": 2.535077770576798, "grad_norm": 0.07089286297559738, "learning_rate": 8.138530086862596e-05, "loss": 0.2482, "step": 31293 }, { "epoch": 2.535158781594297, "grad_norm": 0.06219044700264931, "learning_rate": 8.138080021603133e-05, "loss": 0.2416, "step": 31294 }, { "epoch": 2.535239792611795, "grad_norm": 0.07427731901407242, "learning_rate": 8.13762995634367e-05, "loss": 0.2847, "step": 31295 }, { "epoch": 2.5353208036292934, "grad_norm": 0.06974802166223526, "learning_rate": 8.137179891084208e-05, "loss": 0.2975, "step": 31296 }, { "epoch": 2.535401814646792, "grad_norm": 0.0793389305472374, "learning_rate": 8.136729825824745e-05, "loss": 0.2627, "step": 31297 }, { "epoch": 2.5354828256642903, "grad_norm": 0.07065420597791672, "learning_rate": 8.136279760565281e-05, "loss": 0.2782, "step": 31298 }, { "epoch": 2.5355638366817885, "grad_norm": 0.07344506680965424, "learning_rate": 8.13582969530582e-05, "loss": 0.2681, "step": 31299 }, { "epoch": 2.535644847699287, "grad_norm": 0.06105487421154976, "learning_rate": 8.135379630046358e-05, "loss": 0.2714, "step": 31300 }, { "epoch": 2.5357258587167855, "grad_norm": 0.06935641914606094, "learning_rate": 8.134929564786894e-05, "loss": 0.221, "step": 31301 }, { "epoch": 2.5358068697342837, "grad_norm": 0.07248562574386597, "learning_rate": 8.134479499527432e-05, "loss": 0.2825, "step": 31302 }, { "epoch": 2.5358878807517824, "grad_norm": 0.0756208673119545, "learning_rate": 8.13402943426797e-05, "loss": 0.2494, "step": 31303 }, { "epoch": 2.5359688917692806, "grad_norm": 0.0709727481007576, "learning_rate": 8.133579369008507e-05, "loss": 0.2962, "step": 31304 }, { "epoch": 2.536049902786779, "grad_norm": 0.05900624394416809, "learning_rate": 8.133129303749044e-05, "loss": 0.2414, "step": 31305 }, { "epoch": 2.5361309138042776, "grad_norm": 0.08494237810373306, "learning_rate": 8.132679238489582e-05, "loss": 0.2627, "step": 31306 }, { "epoch": 2.536211924821776, "grad_norm": 0.07157905399799347, "learning_rate": 8.132229173230119e-05, "loss": 0.2756, "step": 31307 }, { "epoch": 2.536292935839274, "grad_norm": 0.05654566362500191, "learning_rate": 8.131779107970656e-05, "loss": 0.2383, "step": 31308 }, { "epoch": 2.5363739468567728, "grad_norm": 0.06779604405164719, "learning_rate": 8.131329042711194e-05, "loss": 0.2504, "step": 31309 }, { "epoch": 2.536454957874271, "grad_norm": 0.059628915041685104, "learning_rate": 8.130878977451731e-05, "loss": 0.216, "step": 31310 }, { "epoch": 2.5365359688917692, "grad_norm": 0.07072723656892776, "learning_rate": 8.130428912192268e-05, "loss": 0.2538, "step": 31311 }, { "epoch": 2.536616979909268, "grad_norm": 0.0625431165099144, "learning_rate": 8.129978846932806e-05, "loss": 0.2461, "step": 31312 }, { "epoch": 2.536697990926766, "grad_norm": 0.07024965435266495, "learning_rate": 8.129528781673343e-05, "loss": 0.2557, "step": 31313 }, { "epoch": 2.5367790019442644, "grad_norm": 0.07092742621898651, "learning_rate": 8.12907871641388e-05, "loss": 0.2451, "step": 31314 }, { "epoch": 2.5368600129617627, "grad_norm": 0.07966975122690201, "learning_rate": 8.128628651154418e-05, "loss": 0.2333, "step": 31315 }, { "epoch": 2.536941023979261, "grad_norm": 0.06543375551700592, "learning_rate": 8.128178585894955e-05, "loss": 0.2375, "step": 31316 }, { "epoch": 2.5370220349967596, "grad_norm": 0.0650889202952385, "learning_rate": 8.127728520635493e-05, "loss": 0.2669, "step": 31317 }, { "epoch": 2.537103046014258, "grad_norm": 0.06884335726499557, "learning_rate": 8.12727845537603e-05, "loss": 0.2407, "step": 31318 }, { "epoch": 2.537184057031756, "grad_norm": 0.07633572816848755, "learning_rate": 8.126828390116567e-05, "loss": 0.2261, "step": 31319 }, { "epoch": 2.537265068049255, "grad_norm": 0.08338648080825806, "learning_rate": 8.126378324857105e-05, "loss": 0.2446, "step": 31320 }, { "epoch": 2.537346079066753, "grad_norm": 0.06513289362192154, "learning_rate": 8.125928259597642e-05, "loss": 0.2519, "step": 31321 }, { "epoch": 2.5374270900842513, "grad_norm": 0.07970602810382843, "learning_rate": 8.12547819433818e-05, "loss": 0.2615, "step": 31322 }, { "epoch": 2.53750810110175, "grad_norm": 0.06919629871845245, "learning_rate": 8.125028129078717e-05, "loss": 0.2653, "step": 31323 }, { "epoch": 2.537589112119248, "grad_norm": 0.06731939315795898, "learning_rate": 8.124578063819254e-05, "loss": 0.244, "step": 31324 }, { "epoch": 2.5376701231367464, "grad_norm": 0.08713632822036743, "learning_rate": 8.124127998559792e-05, "loss": 0.2377, "step": 31325 }, { "epoch": 2.537751134154245, "grad_norm": 0.07864696532487869, "learning_rate": 8.123677933300329e-05, "loss": 0.2489, "step": 31326 }, { "epoch": 2.5378321451717434, "grad_norm": 0.07397564500570297, "learning_rate": 8.123227868040866e-05, "loss": 0.2627, "step": 31327 }, { "epoch": 2.5379131561892416, "grad_norm": 0.05374839901924133, "learning_rate": 8.122777802781404e-05, "loss": 0.2231, "step": 31328 }, { "epoch": 2.5379941672067403, "grad_norm": 0.06318262964487076, "learning_rate": 8.122327737521941e-05, "loss": 0.2434, "step": 31329 }, { "epoch": 2.5380751782242386, "grad_norm": 0.07979314029216766, "learning_rate": 8.121877672262478e-05, "loss": 0.2595, "step": 31330 }, { "epoch": 2.538156189241737, "grad_norm": 0.06891234219074249, "learning_rate": 8.121427607003016e-05, "loss": 0.2358, "step": 31331 }, { "epoch": 2.5382372002592355, "grad_norm": 0.054671745747327805, "learning_rate": 8.120977541743553e-05, "loss": 0.234, "step": 31332 }, { "epoch": 2.5383182112767337, "grad_norm": 0.06143256276845932, "learning_rate": 8.12052747648409e-05, "loss": 0.2299, "step": 31333 }, { "epoch": 2.538399222294232, "grad_norm": 0.0636318251490593, "learning_rate": 8.120077411224628e-05, "loss": 0.2702, "step": 31334 }, { "epoch": 2.5384802333117307, "grad_norm": 0.07338964939117432, "learning_rate": 8.119627345965165e-05, "loss": 0.2552, "step": 31335 }, { "epoch": 2.538561244329229, "grad_norm": 0.07319425791501999, "learning_rate": 8.119177280705702e-05, "loss": 0.217, "step": 31336 }, { "epoch": 2.538642255346727, "grad_norm": 0.06431514769792557, "learning_rate": 8.11872721544624e-05, "loss": 0.2385, "step": 31337 }, { "epoch": 2.5387232663642254, "grad_norm": 0.06492746621370316, "learning_rate": 8.118277150186777e-05, "loss": 0.218, "step": 31338 }, { "epoch": 2.5388042773817237, "grad_norm": 0.07999663054943085, "learning_rate": 8.117827084927316e-05, "loss": 0.2526, "step": 31339 }, { "epoch": 2.5388852883992223, "grad_norm": 0.08836555480957031, "learning_rate": 8.117377019667852e-05, "loss": 0.2302, "step": 31340 }, { "epoch": 2.5389662994167206, "grad_norm": 0.0734882652759552, "learning_rate": 8.116926954408389e-05, "loss": 0.2532, "step": 31341 }, { "epoch": 2.539047310434219, "grad_norm": 0.07000671327114105, "learning_rate": 8.116476889148928e-05, "loss": 0.2284, "step": 31342 }, { "epoch": 2.5391283214517175, "grad_norm": 0.07682245224714279, "learning_rate": 8.116026823889464e-05, "loss": 0.2607, "step": 31343 }, { "epoch": 2.5392093324692158, "grad_norm": 0.07145370543003082, "learning_rate": 8.115576758630001e-05, "loss": 0.2871, "step": 31344 }, { "epoch": 2.539290343486714, "grad_norm": 0.07194703817367554, "learning_rate": 8.11512669337054e-05, "loss": 0.2931, "step": 31345 }, { "epoch": 2.5393713545042127, "grad_norm": 0.09231244027614594, "learning_rate": 8.114676628111076e-05, "loss": 0.2872, "step": 31346 }, { "epoch": 2.539452365521711, "grad_norm": 0.06625858694314957, "learning_rate": 8.114226562851613e-05, "loss": 0.2093, "step": 31347 }, { "epoch": 2.539533376539209, "grad_norm": 0.06716077029705048, "learning_rate": 8.113776497592152e-05, "loss": 0.2434, "step": 31348 }, { "epoch": 2.539614387556708, "grad_norm": 0.05468598008155823, "learning_rate": 8.113326432332688e-05, "loss": 0.2153, "step": 31349 }, { "epoch": 2.539695398574206, "grad_norm": 0.05044583976268768, "learning_rate": 8.112876367073226e-05, "loss": 0.2816, "step": 31350 }, { "epoch": 2.5397764095917044, "grad_norm": 0.08922028541564941, "learning_rate": 8.112426301813764e-05, "loss": 0.3512, "step": 31351 }, { "epoch": 2.539857420609203, "grad_norm": 0.07008825987577438, "learning_rate": 8.1119762365543e-05, "loss": 0.2424, "step": 31352 }, { "epoch": 2.5399384316267013, "grad_norm": 0.07038585841655731, "learning_rate": 8.111526171294838e-05, "loss": 0.2347, "step": 31353 }, { "epoch": 2.5400194426441995, "grad_norm": 0.06255844235420227, "learning_rate": 8.111076106035376e-05, "loss": 0.25, "step": 31354 }, { "epoch": 2.5401004536616982, "grad_norm": 0.06451769173145294, "learning_rate": 8.110626040775912e-05, "loss": 0.2458, "step": 31355 }, { "epoch": 2.5401814646791965, "grad_norm": 0.0686870664358139, "learning_rate": 8.110175975516451e-05, "loss": 0.2339, "step": 31356 }, { "epoch": 2.5402624756966947, "grad_norm": 0.07512461394071579, "learning_rate": 8.109725910256988e-05, "loss": 0.2332, "step": 31357 }, { "epoch": 2.540343486714193, "grad_norm": 0.07387032359838486, "learning_rate": 8.109275844997524e-05, "loss": 0.2731, "step": 31358 }, { "epoch": 2.5404244977316917, "grad_norm": 0.07460983842611313, "learning_rate": 8.108825779738063e-05, "loss": 0.2667, "step": 31359 }, { "epoch": 2.54050550874919, "grad_norm": 0.09056951850652695, "learning_rate": 8.1083757144786e-05, "loss": 0.2487, "step": 31360 }, { "epoch": 2.540586519766688, "grad_norm": 0.0651530846953392, "learning_rate": 8.107925649219136e-05, "loss": 0.267, "step": 31361 }, { "epoch": 2.5406675307841864, "grad_norm": 0.07405731827020645, "learning_rate": 8.107475583959675e-05, "loss": 0.2498, "step": 31362 }, { "epoch": 2.540748541801685, "grad_norm": 0.08580704033374786, "learning_rate": 8.107025518700213e-05, "loss": 0.239, "step": 31363 }, { "epoch": 2.5408295528191833, "grad_norm": 0.08362039923667908, "learning_rate": 8.106575453440749e-05, "loss": 0.2814, "step": 31364 }, { "epoch": 2.5409105638366816, "grad_norm": 0.06828504055738449, "learning_rate": 8.106125388181287e-05, "loss": 0.2383, "step": 31365 }, { "epoch": 2.5409915748541803, "grad_norm": 0.060805875808000565, "learning_rate": 8.105675322921825e-05, "loss": 0.2859, "step": 31366 }, { "epoch": 2.5410725858716785, "grad_norm": 0.08148464560508728, "learning_rate": 8.10522525766236e-05, "loss": 0.2517, "step": 31367 }, { "epoch": 2.5411535968891767, "grad_norm": 0.0642819032073021, "learning_rate": 8.1047751924029e-05, "loss": 0.2102, "step": 31368 }, { "epoch": 2.5412346079066754, "grad_norm": 0.07097551971673965, "learning_rate": 8.104325127143437e-05, "loss": 0.2484, "step": 31369 }, { "epoch": 2.5413156189241737, "grad_norm": 0.07399990409612656, "learning_rate": 8.103875061883973e-05, "loss": 0.2383, "step": 31370 }, { "epoch": 2.541396629941672, "grad_norm": 0.0638495683670044, "learning_rate": 8.103424996624511e-05, "loss": 0.2475, "step": 31371 }, { "epoch": 2.5414776409591706, "grad_norm": 0.06386625021696091, "learning_rate": 8.102974931365049e-05, "loss": 0.2494, "step": 31372 }, { "epoch": 2.541558651976669, "grad_norm": 0.08529839664697647, "learning_rate": 8.102524866105585e-05, "loss": 0.2434, "step": 31373 }, { "epoch": 2.541639662994167, "grad_norm": 0.07638844847679138, "learning_rate": 8.102074800846124e-05, "loss": 0.2781, "step": 31374 }, { "epoch": 2.541720674011666, "grad_norm": 0.06507378816604614, "learning_rate": 8.101624735586661e-05, "loss": 0.2088, "step": 31375 }, { "epoch": 2.541801685029164, "grad_norm": 0.062385451048612595, "learning_rate": 8.101174670327197e-05, "loss": 0.321, "step": 31376 }, { "epoch": 2.5418826960466623, "grad_norm": 0.051668040454387665, "learning_rate": 8.100724605067736e-05, "loss": 0.2125, "step": 31377 }, { "epoch": 2.541963707064161, "grad_norm": 0.06572634726762772, "learning_rate": 8.100274539808273e-05, "loss": 0.2803, "step": 31378 }, { "epoch": 2.542044718081659, "grad_norm": 0.08419101685285568, "learning_rate": 8.099824474548809e-05, "loss": 0.2258, "step": 31379 }, { "epoch": 2.5421257290991575, "grad_norm": 0.05883840471506119, "learning_rate": 8.099374409289348e-05, "loss": 0.2255, "step": 31380 }, { "epoch": 2.5422067401166557, "grad_norm": 0.0676005482673645, "learning_rate": 8.098924344029885e-05, "loss": 0.2646, "step": 31381 }, { "epoch": 2.5422877511341544, "grad_norm": 0.09608455747365952, "learning_rate": 8.098474278770422e-05, "loss": 0.2543, "step": 31382 }, { "epoch": 2.5423687621516526, "grad_norm": 0.07695041596889496, "learning_rate": 8.09802421351096e-05, "loss": 0.2484, "step": 31383 }, { "epoch": 2.542449773169151, "grad_norm": 0.05805501341819763, "learning_rate": 8.097574148251497e-05, "loss": 0.2323, "step": 31384 }, { "epoch": 2.542530784186649, "grad_norm": 0.06507986783981323, "learning_rate": 8.097124082992035e-05, "loss": 0.2528, "step": 31385 }, { "epoch": 2.542611795204148, "grad_norm": 0.06332040578126907, "learning_rate": 8.096674017732572e-05, "loss": 0.2356, "step": 31386 }, { "epoch": 2.542692806221646, "grad_norm": 0.06507432460784912, "learning_rate": 8.096223952473109e-05, "loss": 0.2551, "step": 31387 }, { "epoch": 2.5427738172391443, "grad_norm": 0.08199954777956009, "learning_rate": 8.095773887213647e-05, "loss": 0.258, "step": 31388 }, { "epoch": 2.542854828256643, "grad_norm": 0.0717313215136528, "learning_rate": 8.095323821954184e-05, "loss": 0.2619, "step": 31389 }, { "epoch": 2.5429358392741412, "grad_norm": 0.056809939444065094, "learning_rate": 8.094873756694721e-05, "loss": 0.2235, "step": 31390 }, { "epoch": 2.5430168502916395, "grad_norm": 0.06941332668066025, "learning_rate": 8.094423691435259e-05, "loss": 0.246, "step": 31391 }, { "epoch": 2.543097861309138, "grad_norm": 0.07208379358053207, "learning_rate": 8.093973626175796e-05, "loss": 0.2508, "step": 31392 }, { "epoch": 2.5431788723266364, "grad_norm": 0.06503705680370331, "learning_rate": 8.093523560916333e-05, "loss": 0.2538, "step": 31393 }, { "epoch": 2.5432598833441347, "grad_norm": 0.07446480542421341, "learning_rate": 8.093073495656871e-05, "loss": 0.3158, "step": 31394 }, { "epoch": 2.5433408943616334, "grad_norm": 0.06678412109613419, "learning_rate": 8.092623430397408e-05, "loss": 0.223, "step": 31395 }, { "epoch": 2.5434219053791316, "grad_norm": 0.05912339687347412, "learning_rate": 8.092173365137945e-05, "loss": 0.2329, "step": 31396 }, { "epoch": 2.54350291639663, "grad_norm": 0.06796559691429138, "learning_rate": 8.091723299878483e-05, "loss": 0.225, "step": 31397 }, { "epoch": 2.5435839274141285, "grad_norm": 0.07572735846042633, "learning_rate": 8.09127323461902e-05, "loss": 0.2327, "step": 31398 }, { "epoch": 2.5436649384316268, "grad_norm": 0.07548676431179047, "learning_rate": 8.090823169359558e-05, "loss": 0.2619, "step": 31399 }, { "epoch": 2.543745949449125, "grad_norm": 0.05666543170809746, "learning_rate": 8.090373104100095e-05, "loss": 0.2124, "step": 31400 }, { "epoch": 2.5438269604666237, "grad_norm": 0.0540505051612854, "learning_rate": 8.089923038840632e-05, "loss": 0.2521, "step": 31401 }, { "epoch": 2.543907971484122, "grad_norm": 0.07738293707370758, "learning_rate": 8.08947297358117e-05, "loss": 0.239, "step": 31402 }, { "epoch": 2.54398898250162, "grad_norm": 0.07248470187187195, "learning_rate": 8.089022908321707e-05, "loss": 0.2341, "step": 31403 }, { "epoch": 2.5440699935191184, "grad_norm": 0.06621932983398438, "learning_rate": 8.088572843062244e-05, "loss": 0.2474, "step": 31404 }, { "epoch": 2.544151004536617, "grad_norm": 0.06753819435834885, "learning_rate": 8.088122777802782e-05, "loss": 0.253, "step": 31405 }, { "epoch": 2.5442320155541154, "grad_norm": 0.06610064208507538, "learning_rate": 8.087672712543319e-05, "loss": 0.2116, "step": 31406 }, { "epoch": 2.5443130265716136, "grad_norm": 0.06519056856632233, "learning_rate": 8.087222647283856e-05, "loss": 0.298, "step": 31407 }, { "epoch": 2.544394037589112, "grad_norm": 0.080140121281147, "learning_rate": 8.086772582024395e-05, "loss": 0.2725, "step": 31408 }, { "epoch": 2.5444750486066106, "grad_norm": 0.06608147919178009, "learning_rate": 8.086322516764931e-05, "loss": 0.2641, "step": 31409 }, { "epoch": 2.544556059624109, "grad_norm": 0.060456544160842896, "learning_rate": 8.085872451505469e-05, "loss": 0.2372, "step": 31410 }, { "epoch": 2.544637070641607, "grad_norm": 0.06948632746934891, "learning_rate": 8.085422386246007e-05, "loss": 0.235, "step": 31411 }, { "epoch": 2.5447180816591057, "grad_norm": 0.062045566737651825, "learning_rate": 8.084972320986543e-05, "loss": 0.2112, "step": 31412 }, { "epoch": 2.544799092676604, "grad_norm": 0.05757555738091469, "learning_rate": 8.08452225572708e-05, "loss": 0.2134, "step": 31413 }, { "epoch": 2.5448801036941022, "grad_norm": 0.06793248653411865, "learning_rate": 8.084072190467619e-05, "loss": 0.2696, "step": 31414 }, { "epoch": 2.544961114711601, "grad_norm": 0.05913359671831131, "learning_rate": 8.083622125208155e-05, "loss": 0.2545, "step": 31415 }, { "epoch": 2.545042125729099, "grad_norm": 0.061100032180547714, "learning_rate": 8.083172059948693e-05, "loss": 0.2445, "step": 31416 }, { "epoch": 2.5451231367465974, "grad_norm": 0.07344558835029602, "learning_rate": 8.082721994689231e-05, "loss": 0.2253, "step": 31417 }, { "epoch": 2.545204147764096, "grad_norm": 0.06750749051570892, "learning_rate": 8.082271929429767e-05, "loss": 0.288, "step": 31418 }, { "epoch": 2.5452851587815943, "grad_norm": 0.059958647936582565, "learning_rate": 8.081821864170305e-05, "loss": 0.2649, "step": 31419 }, { "epoch": 2.5453661697990926, "grad_norm": 0.0737132877111435, "learning_rate": 8.081371798910843e-05, "loss": 0.218, "step": 31420 }, { "epoch": 2.5454471808165913, "grad_norm": 0.06931359320878983, "learning_rate": 8.08092173365138e-05, "loss": 0.229, "step": 31421 }, { "epoch": 2.5455281918340895, "grad_norm": 0.07487521320581436, "learning_rate": 8.080471668391917e-05, "loss": 0.2505, "step": 31422 }, { "epoch": 2.5456092028515878, "grad_norm": 0.05484943836927414, "learning_rate": 8.080021603132456e-05, "loss": 0.2027, "step": 31423 }, { "epoch": 2.5456902138690864, "grad_norm": 0.07246629893779755, "learning_rate": 8.079571537872992e-05, "loss": 0.2567, "step": 31424 }, { "epoch": 2.5457712248865847, "grad_norm": 0.07829077541828156, "learning_rate": 8.079121472613529e-05, "loss": 0.273, "step": 31425 }, { "epoch": 2.545852235904083, "grad_norm": 0.05823566019535065, "learning_rate": 8.078671407354068e-05, "loss": 0.2361, "step": 31426 }, { "epoch": 2.545933246921581, "grad_norm": 0.05854753777384758, "learning_rate": 8.078221342094604e-05, "loss": 0.2787, "step": 31427 }, { "epoch": 2.54601425793908, "grad_norm": 0.0628264844417572, "learning_rate": 8.077771276835141e-05, "loss": 0.2208, "step": 31428 }, { "epoch": 2.546095268956578, "grad_norm": 0.0791764110326767, "learning_rate": 8.07732121157568e-05, "loss": 0.2485, "step": 31429 }, { "epoch": 2.5461762799740764, "grad_norm": 0.06644466519355774, "learning_rate": 8.076871146316216e-05, "loss": 0.2638, "step": 31430 }, { "epoch": 2.5462572909915746, "grad_norm": 0.07715664803981781, "learning_rate": 8.076421081056753e-05, "loss": 0.2882, "step": 31431 }, { "epoch": 2.5463383020090733, "grad_norm": 0.07745123654603958, "learning_rate": 8.075971015797292e-05, "loss": 0.2652, "step": 31432 }, { "epoch": 2.5464193130265715, "grad_norm": 0.07673593610525131, "learning_rate": 8.075520950537828e-05, "loss": 0.2977, "step": 31433 }, { "epoch": 2.54650032404407, "grad_norm": 0.06416471302509308, "learning_rate": 8.075070885278367e-05, "loss": 0.2543, "step": 31434 }, { "epoch": 2.5465813350615685, "grad_norm": 0.0717669129371643, "learning_rate": 8.074620820018904e-05, "loss": 0.2554, "step": 31435 }, { "epoch": 2.5466623460790667, "grad_norm": 0.06670583039522171, "learning_rate": 8.07417075475944e-05, "loss": 0.2304, "step": 31436 }, { "epoch": 2.546743357096565, "grad_norm": 0.06272175908088684, "learning_rate": 8.073720689499979e-05, "loss": 0.2561, "step": 31437 }, { "epoch": 2.5468243681140637, "grad_norm": 0.05772021785378456, "learning_rate": 8.073270624240516e-05, "loss": 0.2488, "step": 31438 }, { "epoch": 2.546905379131562, "grad_norm": 0.07507701963186264, "learning_rate": 8.072820558981052e-05, "loss": 0.2547, "step": 31439 }, { "epoch": 2.54698639014906, "grad_norm": 0.07226193696260452, "learning_rate": 8.07237049372159e-05, "loss": 0.2555, "step": 31440 }, { "epoch": 2.547067401166559, "grad_norm": 0.04555051773786545, "learning_rate": 8.071920428462128e-05, "loss": 0.2192, "step": 31441 }, { "epoch": 2.547148412184057, "grad_norm": 0.07527697086334229, "learning_rate": 8.071470363202664e-05, "loss": 0.2175, "step": 31442 }, { "epoch": 2.5472294232015553, "grad_norm": 0.057383034378290176, "learning_rate": 8.071020297943203e-05, "loss": 0.2166, "step": 31443 }, { "epoch": 2.547310434219054, "grad_norm": 0.07875962555408478, "learning_rate": 8.07057023268374e-05, "loss": 0.2251, "step": 31444 }, { "epoch": 2.5473914452365523, "grad_norm": 0.05845116823911667, "learning_rate": 8.070120167424276e-05, "loss": 0.2163, "step": 31445 }, { "epoch": 2.5474724562540505, "grad_norm": 0.062219440937042236, "learning_rate": 8.069670102164815e-05, "loss": 0.2429, "step": 31446 }, { "epoch": 2.547553467271549, "grad_norm": 0.06835257261991501, "learning_rate": 8.069220036905352e-05, "loss": 0.2873, "step": 31447 }, { "epoch": 2.5476344782890474, "grad_norm": 0.07006371021270752, "learning_rate": 8.068769971645888e-05, "loss": 0.2312, "step": 31448 }, { "epoch": 2.5477154893065457, "grad_norm": 0.06568927317857742, "learning_rate": 8.068319906386427e-05, "loss": 0.2677, "step": 31449 }, { "epoch": 2.547796500324044, "grad_norm": 0.07233678549528122, "learning_rate": 8.067869841126964e-05, "loss": 0.2782, "step": 31450 }, { "epoch": 2.5478775113415426, "grad_norm": 0.05997056886553764, "learning_rate": 8.0674197758675e-05, "loss": 0.2002, "step": 31451 }, { "epoch": 2.547958522359041, "grad_norm": 0.07329046726226807, "learning_rate": 8.066969710608039e-05, "loss": 0.2554, "step": 31452 }, { "epoch": 2.548039533376539, "grad_norm": 0.06880810111761093, "learning_rate": 8.066519645348576e-05, "loss": 0.2187, "step": 31453 }, { "epoch": 2.5481205443940373, "grad_norm": 0.07072120159864426, "learning_rate": 8.066069580089112e-05, "loss": 0.2397, "step": 31454 }, { "epoch": 2.548201555411536, "grad_norm": 0.0745464563369751, "learning_rate": 8.065619514829651e-05, "loss": 0.2726, "step": 31455 }, { "epoch": 2.5482825664290343, "grad_norm": 0.07428991794586182, "learning_rate": 8.065169449570188e-05, "loss": 0.2519, "step": 31456 }, { "epoch": 2.5483635774465325, "grad_norm": 0.06004711240530014, "learning_rate": 8.064719384310724e-05, "loss": 0.2289, "step": 31457 }, { "epoch": 2.548444588464031, "grad_norm": 0.07008999586105347, "learning_rate": 8.064269319051263e-05, "loss": 0.2509, "step": 31458 }, { "epoch": 2.5485255994815295, "grad_norm": 0.06014377623796463, "learning_rate": 8.0638192537918e-05, "loss": 0.2232, "step": 31459 }, { "epoch": 2.5486066104990277, "grad_norm": 0.07197581231594086, "learning_rate": 8.063369188532337e-05, "loss": 0.2214, "step": 31460 }, { "epoch": 2.5486876215165264, "grad_norm": 0.07166114449501038, "learning_rate": 8.062919123272875e-05, "loss": 0.2319, "step": 31461 }, { "epoch": 2.5487686325340246, "grad_norm": 0.06933080404996872, "learning_rate": 8.062469058013413e-05, "loss": 0.2551, "step": 31462 }, { "epoch": 2.548849643551523, "grad_norm": 0.05811614543199539, "learning_rate": 8.06201899275395e-05, "loss": 0.2361, "step": 31463 }, { "epoch": 2.5489306545690216, "grad_norm": 0.06434644013643265, "learning_rate": 8.061568927494487e-05, "loss": 0.2391, "step": 31464 }, { "epoch": 2.54901166558652, "grad_norm": 0.07208694517612457, "learning_rate": 8.061118862235025e-05, "loss": 0.2571, "step": 31465 }, { "epoch": 2.549092676604018, "grad_norm": 0.05893586575984955, "learning_rate": 8.060668796975562e-05, "loss": 0.2181, "step": 31466 }, { "epoch": 2.5491736876215167, "grad_norm": 0.07099951803684235, "learning_rate": 8.0602187317161e-05, "loss": 0.2707, "step": 31467 }, { "epoch": 2.549254698639015, "grad_norm": 0.0784766674041748, "learning_rate": 8.059768666456637e-05, "loss": 0.2883, "step": 31468 }, { "epoch": 2.5493357096565132, "grad_norm": 0.07410445064306259, "learning_rate": 8.059318601197174e-05, "loss": 0.2503, "step": 31469 }, { "epoch": 2.549416720674012, "grad_norm": 0.07164378464221954, "learning_rate": 8.058868535937711e-05, "loss": 0.246, "step": 31470 }, { "epoch": 2.54949773169151, "grad_norm": 0.06276273727416992, "learning_rate": 8.058418470678249e-05, "loss": 0.2383, "step": 31471 }, { "epoch": 2.5495787427090084, "grad_norm": 0.07285445928573608, "learning_rate": 8.057968405418786e-05, "loss": 0.2663, "step": 31472 }, { "epoch": 2.5496597537265067, "grad_norm": 0.059940405189991, "learning_rate": 8.057518340159324e-05, "loss": 0.2534, "step": 31473 }, { "epoch": 2.5497407647440054, "grad_norm": 0.060176264494657516, "learning_rate": 8.057068274899861e-05, "loss": 0.2396, "step": 31474 }, { "epoch": 2.5498217757615036, "grad_norm": 0.08137813955545425, "learning_rate": 8.056618209640398e-05, "loss": 0.2975, "step": 31475 }, { "epoch": 2.549902786779002, "grad_norm": 0.0577470101416111, "learning_rate": 8.056168144380936e-05, "loss": 0.2355, "step": 31476 }, { "epoch": 2.5499837977965, "grad_norm": 0.059146296232938766, "learning_rate": 8.055718079121473e-05, "loss": 0.1929, "step": 31477 }, { "epoch": 2.5500648088139988, "grad_norm": 0.06168728321790695, "learning_rate": 8.05526801386201e-05, "loss": 0.2511, "step": 31478 }, { "epoch": 2.550145819831497, "grad_norm": 0.06982030719518661, "learning_rate": 8.054817948602548e-05, "loss": 0.2568, "step": 31479 }, { "epoch": 2.5502268308489953, "grad_norm": 0.07995269447565079, "learning_rate": 8.054367883343085e-05, "loss": 0.2949, "step": 31480 }, { "epoch": 2.550307841866494, "grad_norm": 0.06170627847313881, "learning_rate": 8.053917818083622e-05, "loss": 0.2176, "step": 31481 }, { "epoch": 2.550388852883992, "grad_norm": 0.05501281097531319, "learning_rate": 8.05346775282416e-05, "loss": 0.2013, "step": 31482 }, { "epoch": 2.5504698639014904, "grad_norm": 0.0645870566368103, "learning_rate": 8.053017687564697e-05, "loss": 0.2536, "step": 31483 }, { "epoch": 2.550550874918989, "grad_norm": 0.08232105523347855, "learning_rate": 8.052567622305235e-05, "loss": 0.2796, "step": 31484 }, { "epoch": 2.5506318859364874, "grad_norm": 0.08395248651504517, "learning_rate": 8.052117557045772e-05, "loss": 0.2554, "step": 31485 }, { "epoch": 2.5507128969539856, "grad_norm": 0.06629413366317749, "learning_rate": 8.051667491786309e-05, "loss": 0.2121, "step": 31486 }, { "epoch": 2.5507939079714843, "grad_norm": 0.08047202974557877, "learning_rate": 8.051217426526847e-05, "loss": 0.2666, "step": 31487 }, { "epoch": 2.5508749189889826, "grad_norm": 0.07313171774148941, "learning_rate": 8.050767361267384e-05, "loss": 0.2548, "step": 31488 }, { "epoch": 2.550955930006481, "grad_norm": 0.07625266164541245, "learning_rate": 8.050317296007923e-05, "loss": 0.2876, "step": 31489 }, { "epoch": 2.5510369410239795, "grad_norm": 0.060665592551231384, "learning_rate": 8.049867230748459e-05, "loss": 0.1688, "step": 31490 }, { "epoch": 2.5511179520414777, "grad_norm": 0.052935559302568436, "learning_rate": 8.049417165488996e-05, "loss": 0.22, "step": 31491 }, { "epoch": 2.551198963058976, "grad_norm": 0.07563591003417969, "learning_rate": 8.048967100229535e-05, "loss": 0.2976, "step": 31492 }, { "epoch": 2.5512799740764747, "grad_norm": 0.0724901482462883, "learning_rate": 8.048517034970071e-05, "loss": 0.2973, "step": 31493 }, { "epoch": 2.551360985093973, "grad_norm": 0.06520567089319229, "learning_rate": 8.048066969710608e-05, "loss": 0.2555, "step": 31494 }, { "epoch": 2.551441996111471, "grad_norm": 0.05636593699455261, "learning_rate": 8.047616904451147e-05, "loss": 0.2236, "step": 31495 }, { "epoch": 2.5515230071289694, "grad_norm": 0.06789355725049973, "learning_rate": 8.047166839191683e-05, "loss": 0.2481, "step": 31496 }, { "epoch": 2.5516040181464676, "grad_norm": 0.06700514256954193, "learning_rate": 8.04671677393222e-05, "loss": 0.2253, "step": 31497 }, { "epoch": 2.5516850291639663, "grad_norm": 0.06449780613183975, "learning_rate": 8.046266708672759e-05, "loss": 0.2459, "step": 31498 }, { "epoch": 2.5517660401814646, "grad_norm": 0.07504129409790039, "learning_rate": 8.045816643413295e-05, "loss": 0.2684, "step": 31499 }, { "epoch": 2.551847051198963, "grad_norm": 0.09547720849514008, "learning_rate": 8.045366578153832e-05, "loss": 0.2897, "step": 31500 }, { "epoch": 2.5519280622164615, "grad_norm": 0.07837854325771332, "learning_rate": 8.044916512894371e-05, "loss": 0.2486, "step": 31501 }, { "epoch": 2.5520090732339598, "grad_norm": 0.07050782442092896, "learning_rate": 8.044466447634907e-05, "loss": 0.2473, "step": 31502 }, { "epoch": 2.552090084251458, "grad_norm": 0.0633266493678093, "learning_rate": 8.044016382375444e-05, "loss": 0.2212, "step": 31503 }, { "epoch": 2.5521710952689567, "grad_norm": 0.06830959767103195, "learning_rate": 8.043566317115983e-05, "loss": 0.2966, "step": 31504 }, { "epoch": 2.552252106286455, "grad_norm": 0.06610594689846039, "learning_rate": 8.043116251856519e-05, "loss": 0.2113, "step": 31505 }, { "epoch": 2.552333117303953, "grad_norm": 0.06861544400453568, "learning_rate": 8.042666186597056e-05, "loss": 0.2738, "step": 31506 }, { "epoch": 2.552414128321452, "grad_norm": 0.06505028903484344, "learning_rate": 8.042216121337595e-05, "loss": 0.2386, "step": 31507 }, { "epoch": 2.55249513933895, "grad_norm": 0.05172327533364296, "learning_rate": 8.041766056078131e-05, "loss": 0.2403, "step": 31508 }, { "epoch": 2.5525761503564484, "grad_norm": 0.07511154562234879, "learning_rate": 8.041315990818669e-05, "loss": 0.2425, "step": 31509 }, { "epoch": 2.552657161373947, "grad_norm": 0.08115319162607193, "learning_rate": 8.040865925559207e-05, "loss": 0.2258, "step": 31510 }, { "epoch": 2.5527381723914453, "grad_norm": 0.062161825597286224, "learning_rate": 8.040415860299743e-05, "loss": 0.283, "step": 31511 }, { "epoch": 2.5528191834089435, "grad_norm": 0.07682914286851883, "learning_rate": 8.03996579504028e-05, "loss": 0.2659, "step": 31512 }, { "epoch": 2.5529001944264422, "grad_norm": 0.07859036326408386, "learning_rate": 8.03951572978082e-05, "loss": 0.2941, "step": 31513 }, { "epoch": 2.5529812054439405, "grad_norm": 0.06644420325756073, "learning_rate": 8.039065664521355e-05, "loss": 0.2299, "step": 31514 }, { "epoch": 2.5530622164614387, "grad_norm": 0.07618606835603714, "learning_rate": 8.038615599261894e-05, "loss": 0.2805, "step": 31515 }, { "epoch": 2.5531432274789374, "grad_norm": 0.05901041626930237, "learning_rate": 8.038165534002431e-05, "loss": 0.2437, "step": 31516 }, { "epoch": 2.5532242384964356, "grad_norm": 0.05900729075074196, "learning_rate": 8.037715468742967e-05, "loss": 0.2205, "step": 31517 }, { "epoch": 2.553305249513934, "grad_norm": 0.05804867297410965, "learning_rate": 8.037265403483506e-05, "loss": 0.2451, "step": 31518 }, { "epoch": 2.553386260531432, "grad_norm": 0.08153724670410156, "learning_rate": 8.036815338224043e-05, "loss": 0.2562, "step": 31519 }, { "epoch": 2.5534672715489304, "grad_norm": 0.08371604979038239, "learning_rate": 8.03636527296458e-05, "loss": 0.2581, "step": 31520 }, { "epoch": 2.553548282566429, "grad_norm": 0.07773632556200027, "learning_rate": 8.035915207705118e-05, "loss": 0.2601, "step": 31521 }, { "epoch": 2.5536292935839273, "grad_norm": 0.06940475851297379, "learning_rate": 8.035465142445656e-05, "loss": 0.2382, "step": 31522 }, { "epoch": 2.5537103046014256, "grad_norm": 0.06401082873344421, "learning_rate": 8.035015077186192e-05, "loss": 0.2451, "step": 31523 }, { "epoch": 2.5537913156189243, "grad_norm": 0.07718578726053238, "learning_rate": 8.03456501192673e-05, "loss": 0.3, "step": 31524 }, { "epoch": 2.5538723266364225, "grad_norm": 0.06541422009468079, "learning_rate": 8.034114946667268e-05, "loss": 0.2555, "step": 31525 }, { "epoch": 2.5539533376539207, "grad_norm": 0.06255664676427841, "learning_rate": 8.033664881407804e-05, "loss": 0.2131, "step": 31526 }, { "epoch": 2.5540343486714194, "grad_norm": 0.07217202335596085, "learning_rate": 8.033214816148342e-05, "loss": 0.2385, "step": 31527 }, { "epoch": 2.5541153596889177, "grad_norm": 0.08286501467227936, "learning_rate": 8.03276475088888e-05, "loss": 0.2232, "step": 31528 }, { "epoch": 2.554196370706416, "grad_norm": 0.0661536231637001, "learning_rate": 8.032314685629416e-05, "loss": 0.2298, "step": 31529 }, { "epoch": 2.5542773817239146, "grad_norm": 0.06117008253931999, "learning_rate": 8.031864620369954e-05, "loss": 0.234, "step": 31530 }, { "epoch": 2.554358392741413, "grad_norm": 0.06352005898952484, "learning_rate": 8.031414555110492e-05, "loss": 0.2151, "step": 31531 }, { "epoch": 2.554439403758911, "grad_norm": 0.0644054189324379, "learning_rate": 8.030964489851028e-05, "loss": 0.2567, "step": 31532 }, { "epoch": 2.55452041477641, "grad_norm": 0.06056513637304306, "learning_rate": 8.030514424591567e-05, "loss": 0.2268, "step": 31533 }, { "epoch": 2.554601425793908, "grad_norm": 0.058263856917619705, "learning_rate": 8.030064359332104e-05, "loss": 0.2508, "step": 31534 }, { "epoch": 2.5546824368114063, "grad_norm": 0.08078998327255249, "learning_rate": 8.02961429407264e-05, "loss": 0.2539, "step": 31535 }, { "epoch": 2.554763447828905, "grad_norm": 0.057538606226444244, "learning_rate": 8.029164228813179e-05, "loss": 0.2433, "step": 31536 }, { "epoch": 2.554844458846403, "grad_norm": 0.05998895317316055, "learning_rate": 8.028714163553716e-05, "loss": 0.2392, "step": 31537 }, { "epoch": 2.5549254698639015, "grad_norm": 0.062351860105991364, "learning_rate": 8.028264098294252e-05, "loss": 0.2477, "step": 31538 }, { "epoch": 2.5550064808814, "grad_norm": 0.0619591660797596, "learning_rate": 8.027814033034791e-05, "loss": 0.2093, "step": 31539 }, { "epoch": 2.5550874918988984, "grad_norm": 0.06385014206171036, "learning_rate": 8.027363967775328e-05, "loss": 0.2536, "step": 31540 }, { "epoch": 2.5551685029163966, "grad_norm": 0.06226972118020058, "learning_rate": 8.026913902515865e-05, "loss": 0.244, "step": 31541 }, { "epoch": 2.555249513933895, "grad_norm": 0.059874311089515686, "learning_rate": 8.026463837256403e-05, "loss": 0.2393, "step": 31542 }, { "epoch": 2.555330524951393, "grad_norm": 0.06457412987947464, "learning_rate": 8.02601377199694e-05, "loss": 0.2551, "step": 31543 }, { "epoch": 2.555411535968892, "grad_norm": 0.05957409739494324, "learning_rate": 8.025563706737478e-05, "loss": 0.2265, "step": 31544 }, { "epoch": 2.55549254698639, "grad_norm": 0.06354091316461563, "learning_rate": 8.025113641478015e-05, "loss": 0.2137, "step": 31545 }, { "epoch": 2.5555735580038883, "grad_norm": 0.06427013874053955, "learning_rate": 8.024663576218552e-05, "loss": 0.2358, "step": 31546 }, { "epoch": 2.555654569021387, "grad_norm": 0.06286320835351944, "learning_rate": 8.02421351095909e-05, "loss": 0.2234, "step": 31547 }, { "epoch": 2.5557355800388852, "grad_norm": 0.07858006656169891, "learning_rate": 8.023763445699627e-05, "loss": 0.24, "step": 31548 }, { "epoch": 2.5558165910563835, "grad_norm": 0.07946208864450455, "learning_rate": 8.023313380440164e-05, "loss": 0.2449, "step": 31549 }, { "epoch": 2.555897602073882, "grad_norm": 0.06396917253732681, "learning_rate": 8.022863315180702e-05, "loss": 0.2593, "step": 31550 }, { "epoch": 2.5559786130913804, "grad_norm": 0.07191366702318192, "learning_rate": 8.022413249921239e-05, "loss": 0.2111, "step": 31551 }, { "epoch": 2.5560596241088787, "grad_norm": 0.07205166667699814, "learning_rate": 8.021963184661776e-05, "loss": 0.2119, "step": 31552 }, { "epoch": 2.5561406351263773, "grad_norm": 0.09519444406032562, "learning_rate": 8.021513119402314e-05, "loss": 0.2577, "step": 31553 }, { "epoch": 2.5562216461438756, "grad_norm": 0.07464559376239777, "learning_rate": 8.021063054142851e-05, "loss": 0.2512, "step": 31554 }, { "epoch": 2.556302657161374, "grad_norm": 0.07090155780315399, "learning_rate": 8.020612988883388e-05, "loss": 0.233, "step": 31555 }, { "epoch": 2.5563836681788725, "grad_norm": 0.06708156317472458, "learning_rate": 8.020162923623926e-05, "loss": 0.2269, "step": 31556 }, { "epoch": 2.5564646791963708, "grad_norm": 0.07535571604967117, "learning_rate": 8.019712858364463e-05, "loss": 0.2014, "step": 31557 }, { "epoch": 2.556545690213869, "grad_norm": 0.07511401176452637, "learning_rate": 8.019262793105e-05, "loss": 0.2518, "step": 31558 }, { "epoch": 2.5566267012313677, "grad_norm": 0.055385805666446686, "learning_rate": 8.018812727845538e-05, "loss": 0.213, "step": 31559 }, { "epoch": 2.556707712248866, "grad_norm": 0.08293783664703369, "learning_rate": 8.018362662586075e-05, "loss": 0.2147, "step": 31560 }, { "epoch": 2.556788723266364, "grad_norm": 0.08957002311944962, "learning_rate": 8.017912597326613e-05, "loss": 0.239, "step": 31561 }, { "epoch": 2.556869734283863, "grad_norm": 0.07297007739543915, "learning_rate": 8.01746253206715e-05, "loss": 0.2445, "step": 31562 }, { "epoch": 2.556950745301361, "grad_norm": 0.06064193323254585, "learning_rate": 8.017012466807687e-05, "loss": 0.2358, "step": 31563 }, { "epoch": 2.5570317563188594, "grad_norm": 0.06830794364213943, "learning_rate": 8.016562401548225e-05, "loss": 0.258, "step": 31564 }, { "epoch": 2.5571127673363576, "grad_norm": 0.06914442032575607, "learning_rate": 8.016112336288762e-05, "loss": 0.2373, "step": 31565 }, { "epoch": 2.557193778353856, "grad_norm": 0.07195620238780975, "learning_rate": 8.0156622710293e-05, "loss": 0.247, "step": 31566 }, { "epoch": 2.5572747893713546, "grad_norm": 0.06008646637201309, "learning_rate": 8.015212205769838e-05, "loss": 0.2111, "step": 31567 }, { "epoch": 2.557355800388853, "grad_norm": 0.0733952522277832, "learning_rate": 8.014762140510374e-05, "loss": 0.2395, "step": 31568 }, { "epoch": 2.557436811406351, "grad_norm": 0.06406228989362717, "learning_rate": 8.014312075250912e-05, "loss": 0.2522, "step": 31569 }, { "epoch": 2.5575178224238497, "grad_norm": 0.0706440731883049, "learning_rate": 8.01386200999145e-05, "loss": 0.251, "step": 31570 }, { "epoch": 2.557598833441348, "grad_norm": 0.06202545017004013, "learning_rate": 8.013411944731986e-05, "loss": 0.2392, "step": 31571 }, { "epoch": 2.557679844458846, "grad_norm": 0.06435450911521912, "learning_rate": 8.012961879472524e-05, "loss": 0.2225, "step": 31572 }, { "epoch": 2.557760855476345, "grad_norm": 0.06245691329240799, "learning_rate": 8.012511814213062e-05, "loss": 0.2139, "step": 31573 }, { "epoch": 2.557841866493843, "grad_norm": 0.07368949055671692, "learning_rate": 8.012061748953598e-05, "loss": 0.2669, "step": 31574 }, { "epoch": 2.5579228775113414, "grad_norm": 0.09155303239822388, "learning_rate": 8.011611683694136e-05, "loss": 0.2254, "step": 31575 }, { "epoch": 2.55800388852884, "grad_norm": 0.0641794353723526, "learning_rate": 8.011161618434674e-05, "loss": 0.2448, "step": 31576 }, { "epoch": 2.5580848995463383, "grad_norm": 0.04910936951637268, "learning_rate": 8.01071155317521e-05, "loss": 0.2323, "step": 31577 }, { "epoch": 2.5581659105638366, "grad_norm": 0.08007602393627167, "learning_rate": 8.010261487915748e-05, "loss": 0.2633, "step": 31578 }, { "epoch": 2.5582469215813353, "grad_norm": 0.07051931321620941, "learning_rate": 8.009811422656286e-05, "loss": 0.2535, "step": 31579 }, { "epoch": 2.5583279325988335, "grad_norm": 0.05478541553020477, "learning_rate": 8.009361357396822e-05, "loss": 0.2535, "step": 31580 }, { "epoch": 2.5584089436163318, "grad_norm": 0.06377498805522919, "learning_rate": 8.00891129213736e-05, "loss": 0.2529, "step": 31581 }, { "epoch": 2.5584899546338304, "grad_norm": 0.07668465375900269, "learning_rate": 8.008461226877899e-05, "loss": 0.2516, "step": 31582 }, { "epoch": 2.5585709656513287, "grad_norm": 0.06877156347036362, "learning_rate": 8.008011161618435e-05, "loss": 0.2237, "step": 31583 }, { "epoch": 2.558651976668827, "grad_norm": 0.07158958911895752, "learning_rate": 8.007561096358972e-05, "loss": 0.2451, "step": 31584 }, { "epoch": 2.558732987686325, "grad_norm": 0.06303799152374268, "learning_rate": 8.00711103109951e-05, "loss": 0.238, "step": 31585 }, { "epoch": 2.558813998703824, "grad_norm": 0.06521542370319366, "learning_rate": 8.006660965840047e-05, "loss": 0.2328, "step": 31586 }, { "epoch": 2.558895009721322, "grad_norm": 0.08857200294733047, "learning_rate": 8.006210900580584e-05, "loss": 0.2863, "step": 31587 }, { "epoch": 2.5589760207388204, "grad_norm": 0.06617053598165512, "learning_rate": 8.005760835321123e-05, "loss": 0.2446, "step": 31588 }, { "epoch": 2.5590570317563186, "grad_norm": 0.07978159934282303, "learning_rate": 8.005310770061659e-05, "loss": 0.2124, "step": 31589 }, { "epoch": 2.5591380427738173, "grad_norm": 0.060769423842430115, "learning_rate": 8.004860704802196e-05, "loss": 0.249, "step": 31590 }, { "epoch": 2.5592190537913155, "grad_norm": 0.07907140254974365, "learning_rate": 8.004410639542735e-05, "loss": 0.2616, "step": 31591 }, { "epoch": 2.559300064808814, "grad_norm": 0.0631987601518631, "learning_rate": 8.003960574283271e-05, "loss": 0.2509, "step": 31592 }, { "epoch": 2.5593810758263125, "grad_norm": 0.0674910768866539, "learning_rate": 8.00351050902381e-05, "loss": 0.2322, "step": 31593 }, { "epoch": 2.5594620868438107, "grad_norm": 0.06475397199392319, "learning_rate": 8.003060443764347e-05, "loss": 0.248, "step": 31594 }, { "epoch": 2.559543097861309, "grad_norm": 0.06565103679895401, "learning_rate": 8.002610378504883e-05, "loss": 0.269, "step": 31595 }, { "epoch": 2.5596241088788076, "grad_norm": 0.06982734799385071, "learning_rate": 8.002160313245422e-05, "loss": 0.2485, "step": 31596 }, { "epoch": 2.559705119896306, "grad_norm": 0.0696418285369873, "learning_rate": 8.001710247985959e-05, "loss": 0.2317, "step": 31597 }, { "epoch": 2.559786130913804, "grad_norm": 0.06628835201263428, "learning_rate": 8.001260182726495e-05, "loss": 0.2639, "step": 31598 }, { "epoch": 2.559867141931303, "grad_norm": 0.08352109789848328, "learning_rate": 8.000810117467034e-05, "loss": 0.2375, "step": 31599 }, { "epoch": 2.559948152948801, "grad_norm": 0.057371314615011215, "learning_rate": 8.000360052207571e-05, "loss": 0.1994, "step": 31600 }, { "epoch": 2.5600291639662993, "grad_norm": 0.06860015541315079, "learning_rate": 7.999909986948107e-05, "loss": 0.2653, "step": 31601 }, { "epoch": 2.560110174983798, "grad_norm": 0.07338287681341171, "learning_rate": 7.999459921688646e-05, "loss": 0.2302, "step": 31602 }, { "epoch": 2.5601911860012962, "grad_norm": 0.08055268228054047, "learning_rate": 7.999009856429183e-05, "loss": 0.2321, "step": 31603 }, { "epoch": 2.5602721970187945, "grad_norm": 0.06045558676123619, "learning_rate": 7.998559791169719e-05, "loss": 0.2268, "step": 31604 }, { "epoch": 2.560353208036293, "grad_norm": 0.0702483206987381, "learning_rate": 7.998109725910258e-05, "loss": 0.2423, "step": 31605 }, { "epoch": 2.5604342190537914, "grad_norm": 0.05823696404695511, "learning_rate": 7.997659660650795e-05, "loss": 0.2305, "step": 31606 }, { "epoch": 2.5605152300712897, "grad_norm": 0.04120326414704323, "learning_rate": 7.997209595391331e-05, "loss": 0.1926, "step": 31607 }, { "epoch": 2.560596241088788, "grad_norm": 0.07255415618419647, "learning_rate": 7.99675953013187e-05, "loss": 0.2321, "step": 31608 }, { "epoch": 2.5606772521062866, "grad_norm": 0.06354595720767975, "learning_rate": 7.996309464872407e-05, "loss": 0.1984, "step": 31609 }, { "epoch": 2.560758263123785, "grad_norm": 0.05864912271499634, "learning_rate": 7.995859399612943e-05, "loss": 0.2216, "step": 31610 }, { "epoch": 2.560839274141283, "grad_norm": 0.07821990549564362, "learning_rate": 7.995409334353482e-05, "loss": 0.2606, "step": 31611 }, { "epoch": 2.5609202851587813, "grad_norm": 0.08326715975999832, "learning_rate": 7.99495926909402e-05, "loss": 0.2755, "step": 31612 }, { "epoch": 2.56100129617628, "grad_norm": 0.06333693861961365, "learning_rate": 7.994509203834555e-05, "loss": 0.2467, "step": 31613 }, { "epoch": 2.5610823071937783, "grad_norm": 0.07545526325702667, "learning_rate": 7.994059138575094e-05, "loss": 0.2635, "step": 31614 }, { "epoch": 2.5611633182112765, "grad_norm": 0.07584195584058762, "learning_rate": 7.993609073315631e-05, "loss": 0.2271, "step": 31615 }, { "epoch": 2.561244329228775, "grad_norm": 0.06313547492027283, "learning_rate": 7.993159008056167e-05, "loss": 0.2386, "step": 31616 }, { "epoch": 2.5613253402462735, "grad_norm": 0.07154353708028793, "learning_rate": 7.992708942796706e-05, "loss": 0.2546, "step": 31617 }, { "epoch": 2.5614063512637717, "grad_norm": 0.05730261653661728, "learning_rate": 7.992258877537244e-05, "loss": 0.2422, "step": 31618 }, { "epoch": 2.5614873622812704, "grad_norm": 0.07545353472232819, "learning_rate": 7.99180881227778e-05, "loss": 0.2251, "step": 31619 }, { "epoch": 2.5615683732987686, "grad_norm": 0.06900720298290253, "learning_rate": 7.991358747018318e-05, "loss": 0.2536, "step": 31620 }, { "epoch": 2.561649384316267, "grad_norm": 0.08917799592018127, "learning_rate": 7.990908681758856e-05, "loss": 0.2861, "step": 31621 }, { "epoch": 2.5617303953337656, "grad_norm": 0.07041896134614944, "learning_rate": 7.990458616499393e-05, "loss": 0.2607, "step": 31622 }, { "epoch": 2.561811406351264, "grad_norm": 0.06320405751466751, "learning_rate": 7.99000855123993e-05, "loss": 0.2377, "step": 31623 }, { "epoch": 2.561892417368762, "grad_norm": 0.07257578521966934, "learning_rate": 7.989558485980468e-05, "loss": 0.2455, "step": 31624 }, { "epoch": 2.5619734283862607, "grad_norm": 0.05553307384252548, "learning_rate": 7.989108420721005e-05, "loss": 0.2239, "step": 31625 }, { "epoch": 2.562054439403759, "grad_norm": 0.06904014199972153, "learning_rate": 7.988658355461542e-05, "loss": 0.2554, "step": 31626 }, { "epoch": 2.5621354504212572, "grad_norm": 0.06648155301809311, "learning_rate": 7.98820829020208e-05, "loss": 0.2718, "step": 31627 }, { "epoch": 2.562216461438756, "grad_norm": 0.06676594913005829, "learning_rate": 7.987758224942617e-05, "loss": 0.2238, "step": 31628 }, { "epoch": 2.562297472456254, "grad_norm": 0.08011876046657562, "learning_rate": 7.987308159683154e-05, "loss": 0.2354, "step": 31629 }, { "epoch": 2.5623784834737524, "grad_norm": 0.07950930297374725, "learning_rate": 7.986858094423692e-05, "loss": 0.2578, "step": 31630 }, { "epoch": 2.5624594944912507, "grad_norm": 0.06657250970602036, "learning_rate": 7.986408029164229e-05, "loss": 0.2425, "step": 31631 }, { "epoch": 2.5625405055087493, "grad_norm": 0.06820258498191833, "learning_rate": 7.985957963904767e-05, "loss": 0.2869, "step": 31632 }, { "epoch": 2.5626215165262476, "grad_norm": 0.07058540731668472, "learning_rate": 7.985507898645304e-05, "loss": 0.2421, "step": 31633 }, { "epoch": 2.562702527543746, "grad_norm": 0.082171231508255, "learning_rate": 7.985057833385841e-05, "loss": 0.2493, "step": 31634 }, { "epoch": 2.562783538561244, "grad_norm": 0.07239805907011032, "learning_rate": 7.984607768126379e-05, "loss": 0.2511, "step": 31635 }, { "epoch": 2.5628645495787428, "grad_norm": 0.08331374824047089, "learning_rate": 7.984157702866916e-05, "loss": 0.2364, "step": 31636 }, { "epoch": 2.562945560596241, "grad_norm": 0.06185334920883179, "learning_rate": 7.983707637607453e-05, "loss": 0.2163, "step": 31637 }, { "epoch": 2.5630265716137393, "grad_norm": 0.06657331436872482, "learning_rate": 7.983257572347991e-05, "loss": 0.2514, "step": 31638 }, { "epoch": 2.563107582631238, "grad_norm": 0.07455500960350037, "learning_rate": 7.982807507088528e-05, "loss": 0.2303, "step": 31639 }, { "epoch": 2.563188593648736, "grad_norm": 0.07059744745492935, "learning_rate": 7.982357441829065e-05, "loss": 0.2506, "step": 31640 }, { "epoch": 2.5632696046662344, "grad_norm": 0.06850961595773697, "learning_rate": 7.981907376569603e-05, "loss": 0.2667, "step": 31641 }, { "epoch": 2.563350615683733, "grad_norm": 0.061915162950754166, "learning_rate": 7.98145731131014e-05, "loss": 0.2525, "step": 31642 }, { "epoch": 2.5634316267012314, "grad_norm": 0.056903161108493805, "learning_rate": 7.981007246050678e-05, "loss": 0.2664, "step": 31643 }, { "epoch": 2.5635126377187296, "grad_norm": 0.06871285289525986, "learning_rate": 7.980557180791215e-05, "loss": 0.2468, "step": 31644 }, { "epoch": 2.5635936487362283, "grad_norm": 0.06563066691160202, "learning_rate": 7.980107115531752e-05, "loss": 0.2289, "step": 31645 }, { "epoch": 2.5636746597537265, "grad_norm": 0.07172146439552307, "learning_rate": 7.97965705027229e-05, "loss": 0.2563, "step": 31646 }, { "epoch": 2.563755670771225, "grad_norm": 0.0756186842918396, "learning_rate": 7.979206985012827e-05, "loss": 0.2602, "step": 31647 }, { "epoch": 2.5638366817887235, "grad_norm": 0.0699758529663086, "learning_rate": 7.978756919753366e-05, "loss": 0.2454, "step": 31648 }, { "epoch": 2.5639176928062217, "grad_norm": 0.06742367148399353, "learning_rate": 7.978306854493902e-05, "loss": 0.2735, "step": 31649 }, { "epoch": 2.56399870382372, "grad_norm": 0.07160858064889908, "learning_rate": 7.977856789234439e-05, "loss": 0.2511, "step": 31650 }, { "epoch": 2.5640797148412187, "grad_norm": 0.06456317007541656, "learning_rate": 7.977406723974978e-05, "loss": 0.2287, "step": 31651 }, { "epoch": 2.564160725858717, "grad_norm": 0.06363669037818909, "learning_rate": 7.976956658715514e-05, "loss": 0.2989, "step": 31652 }, { "epoch": 2.564241736876215, "grad_norm": 0.07487460225820541, "learning_rate": 7.976506593456051e-05, "loss": 0.2616, "step": 31653 }, { "epoch": 2.5643227478937134, "grad_norm": 0.056149642914533615, "learning_rate": 7.97605652819659e-05, "loss": 0.2086, "step": 31654 }, { "epoch": 2.564403758911212, "grad_norm": 0.06197667494416237, "learning_rate": 7.975606462937126e-05, "loss": 0.2958, "step": 31655 }, { "epoch": 2.5644847699287103, "grad_norm": 0.07219796627759933, "learning_rate": 7.975156397677663e-05, "loss": 0.2387, "step": 31656 }, { "epoch": 2.5645657809462086, "grad_norm": 0.06966370344161987, "learning_rate": 7.974706332418202e-05, "loss": 0.2681, "step": 31657 }, { "epoch": 2.564646791963707, "grad_norm": 0.07845804840326309, "learning_rate": 7.974256267158738e-05, "loss": 0.2618, "step": 31658 }, { "epoch": 2.5647278029812055, "grad_norm": 0.07504191249608994, "learning_rate": 7.973806201899275e-05, "loss": 0.2152, "step": 31659 }, { "epoch": 2.5648088139987038, "grad_norm": 0.0714363306760788, "learning_rate": 7.973356136639814e-05, "loss": 0.2462, "step": 31660 }, { "epoch": 2.564889825016202, "grad_norm": 0.05532535910606384, "learning_rate": 7.97290607138035e-05, "loss": 0.2049, "step": 31661 }, { "epoch": 2.5649708360337007, "grad_norm": 0.07604444026947021, "learning_rate": 7.972456006120887e-05, "loss": 0.2465, "step": 31662 }, { "epoch": 2.565051847051199, "grad_norm": 0.06291288137435913, "learning_rate": 7.972005940861426e-05, "loss": 0.2371, "step": 31663 }, { "epoch": 2.565132858068697, "grad_norm": 0.055569905787706375, "learning_rate": 7.971555875601962e-05, "loss": 0.2203, "step": 31664 }, { "epoch": 2.565213869086196, "grad_norm": 0.07797441631555557, "learning_rate": 7.9711058103425e-05, "loss": 0.2219, "step": 31665 }, { "epoch": 2.565294880103694, "grad_norm": 0.058151982724666595, "learning_rate": 7.970655745083038e-05, "loss": 0.2429, "step": 31666 }, { "epoch": 2.5653758911211924, "grad_norm": 0.06993840634822845, "learning_rate": 7.970205679823574e-05, "loss": 0.2447, "step": 31667 }, { "epoch": 2.565456902138691, "grad_norm": 0.08092262595891953, "learning_rate": 7.969755614564112e-05, "loss": 0.2304, "step": 31668 }, { "epoch": 2.5655379131561893, "grad_norm": 0.060657061636447906, "learning_rate": 7.96930554930465e-05, "loss": 0.2387, "step": 31669 }, { "epoch": 2.5656189241736875, "grad_norm": 0.0777408555150032, "learning_rate": 7.968855484045186e-05, "loss": 0.2553, "step": 31670 }, { "epoch": 2.565699935191186, "grad_norm": 0.09145821630954742, "learning_rate": 7.968405418785724e-05, "loss": 0.2321, "step": 31671 }, { "epoch": 2.5657809462086845, "grad_norm": 0.07050789892673492, "learning_rate": 7.967955353526262e-05, "loss": 0.2307, "step": 31672 }, { "epoch": 2.5658619572261827, "grad_norm": 0.07583153247833252, "learning_rate": 7.967505288266798e-05, "loss": 0.2552, "step": 31673 }, { "epoch": 2.5659429682436814, "grad_norm": 0.06492862850427628, "learning_rate": 7.967055223007337e-05, "loss": 0.2218, "step": 31674 }, { "epoch": 2.5660239792611796, "grad_norm": 0.06800020486116409, "learning_rate": 7.966605157747874e-05, "loss": 0.2584, "step": 31675 }, { "epoch": 2.566104990278678, "grad_norm": 0.06461351364850998, "learning_rate": 7.96615509248841e-05, "loss": 0.2393, "step": 31676 }, { "epoch": 2.566186001296176, "grad_norm": 0.0789063423871994, "learning_rate": 7.965705027228949e-05, "loss": 0.329, "step": 31677 }, { "epoch": 2.566267012313675, "grad_norm": 0.0644729882478714, "learning_rate": 7.965254961969486e-05, "loss": 0.2665, "step": 31678 }, { "epoch": 2.566348023331173, "grad_norm": 0.07038956135511398, "learning_rate": 7.964804896710023e-05, "loss": 0.2604, "step": 31679 }, { "epoch": 2.5664290343486713, "grad_norm": 0.06449288129806519, "learning_rate": 7.964354831450561e-05, "loss": 0.2496, "step": 31680 }, { "epoch": 2.5665100453661696, "grad_norm": 0.0661557987332344, "learning_rate": 7.963904766191099e-05, "loss": 0.2896, "step": 31681 }, { "epoch": 2.5665910563836682, "grad_norm": 0.06338050216436386, "learning_rate": 7.963454700931635e-05, "loss": 0.2271, "step": 31682 }, { "epoch": 2.5666720674011665, "grad_norm": 0.07911043614149094, "learning_rate": 7.963004635672173e-05, "loss": 0.2658, "step": 31683 }, { "epoch": 2.5667530784186647, "grad_norm": 0.06274065375328064, "learning_rate": 7.96255457041271e-05, "loss": 0.2752, "step": 31684 }, { "epoch": 2.5668340894361634, "grad_norm": 0.06047653779387474, "learning_rate": 7.962104505153247e-05, "loss": 0.2436, "step": 31685 }, { "epoch": 2.5669151004536617, "grad_norm": 0.05865699425339699, "learning_rate": 7.961654439893785e-05, "loss": 0.2425, "step": 31686 }, { "epoch": 2.56699611147116, "grad_norm": 0.05763997510075569, "learning_rate": 7.961204374634323e-05, "loss": 0.2008, "step": 31687 }, { "epoch": 2.5670771224886586, "grad_norm": 0.06605460494756699, "learning_rate": 7.960754309374859e-05, "loss": 0.2653, "step": 31688 }, { "epoch": 2.567158133506157, "grad_norm": 0.07231853157281876, "learning_rate": 7.960304244115397e-05, "loss": 0.2368, "step": 31689 }, { "epoch": 2.567239144523655, "grad_norm": 0.059788014739751816, "learning_rate": 7.959854178855935e-05, "loss": 0.2558, "step": 31690 }, { "epoch": 2.567320155541154, "grad_norm": 0.06757822632789612, "learning_rate": 7.959404113596471e-05, "loss": 0.2506, "step": 31691 }, { "epoch": 2.567401166558652, "grad_norm": 0.06352180987596512, "learning_rate": 7.95895404833701e-05, "loss": 0.2464, "step": 31692 }, { "epoch": 2.5674821775761503, "grad_norm": 0.0770125687122345, "learning_rate": 7.958503983077547e-05, "loss": 0.2519, "step": 31693 }, { "epoch": 2.567563188593649, "grad_norm": 0.06670795381069183, "learning_rate": 7.958053917818083e-05, "loss": 0.2404, "step": 31694 }, { "epoch": 2.567644199611147, "grad_norm": 0.07569408416748047, "learning_rate": 7.957603852558622e-05, "loss": 0.2846, "step": 31695 }, { "epoch": 2.5677252106286454, "grad_norm": 0.08044213801622391, "learning_rate": 7.957153787299159e-05, "loss": 0.2473, "step": 31696 }, { "epoch": 2.567806221646144, "grad_norm": 0.07201042026281357, "learning_rate": 7.956703722039695e-05, "loss": 0.2284, "step": 31697 }, { "epoch": 2.5678872326636424, "grad_norm": 0.07905683666467667, "learning_rate": 7.956253656780234e-05, "loss": 0.2666, "step": 31698 }, { "epoch": 2.5679682436811406, "grad_norm": 0.06858737021684647, "learning_rate": 7.955803591520771e-05, "loss": 0.2826, "step": 31699 }, { "epoch": 2.568049254698639, "grad_norm": 0.06841877847909927, "learning_rate": 7.955353526261308e-05, "loss": 0.2319, "step": 31700 }, { "epoch": 2.568130265716137, "grad_norm": 0.07550330460071564, "learning_rate": 7.954903461001846e-05, "loss": 0.2848, "step": 31701 }, { "epoch": 2.568211276733636, "grad_norm": 0.07050354033708572, "learning_rate": 7.954453395742383e-05, "loss": 0.2248, "step": 31702 }, { "epoch": 2.568292287751134, "grad_norm": 0.06879187375307083, "learning_rate": 7.95400333048292e-05, "loss": 0.2598, "step": 31703 }, { "epoch": 2.5683732987686323, "grad_norm": 0.07464316487312317, "learning_rate": 7.953553265223458e-05, "loss": 0.2493, "step": 31704 }, { "epoch": 2.568454309786131, "grad_norm": 0.08402939140796661, "learning_rate": 7.953103199963995e-05, "loss": 0.3024, "step": 31705 }, { "epoch": 2.5685353208036292, "grad_norm": 0.08375585079193115, "learning_rate": 7.952653134704533e-05, "loss": 0.2586, "step": 31706 }, { "epoch": 2.5686163318211275, "grad_norm": 0.07422322034835815, "learning_rate": 7.95220306944507e-05, "loss": 0.2421, "step": 31707 }, { "epoch": 2.568697342838626, "grad_norm": 0.057322461158037186, "learning_rate": 7.951753004185607e-05, "loss": 0.2143, "step": 31708 }, { "epoch": 2.5687783538561244, "grad_norm": 0.056815192103385925, "learning_rate": 7.951302938926145e-05, "loss": 0.2386, "step": 31709 }, { "epoch": 2.5688593648736227, "grad_norm": 0.07299406826496124, "learning_rate": 7.950852873666682e-05, "loss": 0.2548, "step": 31710 }, { "epoch": 2.5689403758911213, "grad_norm": 0.07193489372730255, "learning_rate": 7.95040280840722e-05, "loss": 0.2603, "step": 31711 }, { "epoch": 2.5690213869086196, "grad_norm": 0.06784748286008835, "learning_rate": 7.949952743147757e-05, "loss": 0.2642, "step": 31712 }, { "epoch": 2.569102397926118, "grad_norm": 0.07315580546855927, "learning_rate": 7.949502677888294e-05, "loss": 0.2688, "step": 31713 }, { "epoch": 2.5691834089436165, "grad_norm": 0.07491733878850937, "learning_rate": 7.949052612628831e-05, "loss": 0.2509, "step": 31714 }, { "epoch": 2.5692644199611148, "grad_norm": 0.061660926789045334, "learning_rate": 7.948602547369369e-05, "loss": 0.2521, "step": 31715 }, { "epoch": 2.569345430978613, "grad_norm": 0.06637068837881088, "learning_rate": 7.948152482109906e-05, "loss": 0.2659, "step": 31716 }, { "epoch": 2.5694264419961117, "grad_norm": 0.05649961158633232, "learning_rate": 7.947702416850444e-05, "loss": 0.2615, "step": 31717 }, { "epoch": 2.56950745301361, "grad_norm": 0.07894111424684525, "learning_rate": 7.947252351590981e-05, "loss": 0.2268, "step": 31718 }, { "epoch": 2.569588464031108, "grad_norm": 0.07335829734802246, "learning_rate": 7.946802286331518e-05, "loss": 0.2655, "step": 31719 }, { "epoch": 2.569669475048607, "grad_norm": 0.06373408436775208, "learning_rate": 7.946352221072056e-05, "loss": 0.2919, "step": 31720 }, { "epoch": 2.569750486066105, "grad_norm": 0.05747825652360916, "learning_rate": 7.945902155812593e-05, "loss": 0.2486, "step": 31721 }, { "epoch": 2.5698314970836034, "grad_norm": 0.054663050919771194, "learning_rate": 7.94545209055313e-05, "loss": 0.244, "step": 31722 }, { "epoch": 2.5699125081011016, "grad_norm": 0.049483031034469604, "learning_rate": 7.945002025293668e-05, "loss": 0.1919, "step": 31723 }, { "epoch": 2.5699935191186, "grad_norm": 0.06937556713819504, "learning_rate": 7.944551960034205e-05, "loss": 0.2199, "step": 31724 }, { "epoch": 2.5700745301360985, "grad_norm": 0.0625581368803978, "learning_rate": 7.944101894774742e-05, "loss": 0.212, "step": 31725 }, { "epoch": 2.570155541153597, "grad_norm": 0.061997413635253906, "learning_rate": 7.943651829515281e-05, "loss": 0.1864, "step": 31726 }, { "epoch": 2.570236552171095, "grad_norm": 0.05428750067949295, "learning_rate": 7.943201764255817e-05, "loss": 0.2125, "step": 31727 }, { "epoch": 2.5703175631885937, "grad_norm": 0.0579020231962204, "learning_rate": 7.942751698996355e-05, "loss": 0.2402, "step": 31728 }, { "epoch": 2.570398574206092, "grad_norm": 0.0654272735118866, "learning_rate": 7.942301633736893e-05, "loss": 0.2564, "step": 31729 }, { "epoch": 2.57047958522359, "grad_norm": 0.06533244997262955, "learning_rate": 7.941851568477429e-05, "loss": 0.2245, "step": 31730 }, { "epoch": 2.570560596241089, "grad_norm": 0.05718918517231941, "learning_rate": 7.941401503217967e-05, "loss": 0.2114, "step": 31731 }, { "epoch": 2.570641607258587, "grad_norm": 0.07008680701255798, "learning_rate": 7.940951437958505e-05, "loss": 0.2426, "step": 31732 }, { "epoch": 2.5707226182760854, "grad_norm": 0.09347264468669891, "learning_rate": 7.940501372699041e-05, "loss": 0.297, "step": 31733 }, { "epoch": 2.570803629293584, "grad_norm": 0.0643296018242836, "learning_rate": 7.940051307439579e-05, "loss": 0.2487, "step": 31734 }, { "epoch": 2.5708846403110823, "grad_norm": 0.06303554028272629, "learning_rate": 7.939601242180117e-05, "loss": 0.23, "step": 31735 }, { "epoch": 2.5709656513285806, "grad_norm": 0.07969815284013748, "learning_rate": 7.939151176920653e-05, "loss": 0.279, "step": 31736 }, { "epoch": 2.5710466623460793, "grad_norm": 0.0638255774974823, "learning_rate": 7.938701111661191e-05, "loss": 0.2303, "step": 31737 }, { "epoch": 2.5711276733635775, "grad_norm": 0.07085902988910675, "learning_rate": 7.93825104640173e-05, "loss": 0.2291, "step": 31738 }, { "epoch": 2.5712086843810757, "grad_norm": 0.07110630720853806, "learning_rate": 7.937800981142265e-05, "loss": 0.2587, "step": 31739 }, { "epoch": 2.5712896953985744, "grad_norm": 0.06317129731178284, "learning_rate": 7.937350915882803e-05, "loss": 0.2378, "step": 31740 }, { "epoch": 2.5713707064160727, "grad_norm": 0.06473881006240845, "learning_rate": 7.936900850623342e-05, "loss": 0.2282, "step": 31741 }, { "epoch": 2.571451717433571, "grad_norm": 0.05658971518278122, "learning_rate": 7.936450785363878e-05, "loss": 0.2565, "step": 31742 }, { "epoch": 2.5715327284510696, "grad_norm": 0.08154220134019852, "learning_rate": 7.936000720104415e-05, "loss": 0.2951, "step": 31743 }, { "epoch": 2.571613739468568, "grad_norm": 0.06489664316177368, "learning_rate": 7.935550654844954e-05, "loss": 0.2579, "step": 31744 }, { "epoch": 2.571694750486066, "grad_norm": 0.06681975722312927, "learning_rate": 7.93510058958549e-05, "loss": 0.2536, "step": 31745 }, { "epoch": 2.5717757615035644, "grad_norm": 0.07080577313899994, "learning_rate": 7.934650524326027e-05, "loss": 0.2278, "step": 31746 }, { "epoch": 2.5718567725210626, "grad_norm": 0.05441749840974808, "learning_rate": 7.934200459066566e-05, "loss": 0.2455, "step": 31747 }, { "epoch": 2.5719377835385613, "grad_norm": 0.061398494988679886, "learning_rate": 7.933750393807102e-05, "loss": 0.2176, "step": 31748 }, { "epoch": 2.5720187945560595, "grad_norm": 0.06527212262153625, "learning_rate": 7.933300328547639e-05, "loss": 0.2472, "step": 31749 }, { "epoch": 2.5720998055735578, "grad_norm": 0.06973239779472351, "learning_rate": 7.932850263288178e-05, "loss": 0.2494, "step": 31750 }, { "epoch": 2.5721808165910565, "grad_norm": 0.07312745600938797, "learning_rate": 7.932400198028714e-05, "loss": 0.2688, "step": 31751 }, { "epoch": 2.5722618276085547, "grad_norm": 0.0772642195224762, "learning_rate": 7.931950132769253e-05, "loss": 0.2538, "step": 31752 }, { "epoch": 2.572342838626053, "grad_norm": 0.08061595261096954, "learning_rate": 7.93150006750979e-05, "loss": 0.2651, "step": 31753 }, { "epoch": 2.5724238496435516, "grad_norm": 0.06649156659841537, "learning_rate": 7.931050002250326e-05, "loss": 0.2767, "step": 31754 }, { "epoch": 2.57250486066105, "grad_norm": 0.07221254706382751, "learning_rate": 7.930599936990865e-05, "loss": 0.2387, "step": 31755 }, { "epoch": 2.572585871678548, "grad_norm": 0.06058627739548683, "learning_rate": 7.930149871731402e-05, "loss": 0.2548, "step": 31756 }, { "epoch": 2.572666882696047, "grad_norm": 0.07540536671876907, "learning_rate": 7.929699806471938e-05, "loss": 0.232, "step": 31757 }, { "epoch": 2.572747893713545, "grad_norm": 0.07115985453128815, "learning_rate": 7.929249741212477e-05, "loss": 0.2661, "step": 31758 }, { "epoch": 2.5728289047310433, "grad_norm": 0.06167732924222946, "learning_rate": 7.928799675953014e-05, "loss": 0.2584, "step": 31759 }, { "epoch": 2.572909915748542, "grad_norm": 0.07705485820770264, "learning_rate": 7.92834961069355e-05, "loss": 0.2734, "step": 31760 }, { "epoch": 2.5729909267660402, "grad_norm": 0.08480952680110931, "learning_rate": 7.927899545434089e-05, "loss": 0.2601, "step": 31761 }, { "epoch": 2.5730719377835385, "grad_norm": 0.06356513500213623, "learning_rate": 7.927449480174626e-05, "loss": 0.2332, "step": 31762 }, { "epoch": 2.573152948801037, "grad_norm": 0.07468358427286148, "learning_rate": 7.926999414915162e-05, "loss": 0.266, "step": 31763 }, { "epoch": 2.5732339598185354, "grad_norm": 0.07145358622074127, "learning_rate": 7.926549349655701e-05, "loss": 0.2509, "step": 31764 }, { "epoch": 2.5733149708360337, "grad_norm": 0.059972260147333145, "learning_rate": 7.926099284396238e-05, "loss": 0.2798, "step": 31765 }, { "epoch": 2.5733959818535324, "grad_norm": 0.0660482719540596, "learning_rate": 7.925649219136774e-05, "loss": 0.2027, "step": 31766 }, { "epoch": 2.5734769928710306, "grad_norm": 0.06355316191911697, "learning_rate": 7.925199153877313e-05, "loss": 0.2612, "step": 31767 }, { "epoch": 2.573558003888529, "grad_norm": 0.06815008074045181, "learning_rate": 7.92474908861785e-05, "loss": 0.2356, "step": 31768 }, { "epoch": 2.573639014906027, "grad_norm": 0.06347496062517166, "learning_rate": 7.924299023358386e-05, "loss": 0.2563, "step": 31769 }, { "epoch": 2.5737200259235253, "grad_norm": 0.06929799169301987, "learning_rate": 7.923848958098925e-05, "loss": 0.2598, "step": 31770 }, { "epoch": 2.573801036941024, "grad_norm": 0.05718935653567314, "learning_rate": 7.923398892839462e-05, "loss": 0.2301, "step": 31771 }, { "epoch": 2.5738820479585223, "grad_norm": 0.07201238721609116, "learning_rate": 7.922948827579998e-05, "loss": 0.2603, "step": 31772 }, { "epoch": 2.5739630589760205, "grad_norm": 0.06836757063865662, "learning_rate": 7.922498762320537e-05, "loss": 0.2469, "step": 31773 }, { "epoch": 2.574044069993519, "grad_norm": 0.06981032341718674, "learning_rate": 7.922048697061074e-05, "loss": 0.2563, "step": 31774 }, { "epoch": 2.5741250810110174, "grad_norm": 0.0501057431101799, "learning_rate": 7.92159863180161e-05, "loss": 0.2203, "step": 31775 }, { "epoch": 2.5742060920285157, "grad_norm": 0.06120969355106354, "learning_rate": 7.921148566542149e-05, "loss": 0.2058, "step": 31776 }, { "epoch": 2.5742871030460144, "grad_norm": 0.058200716972351074, "learning_rate": 7.920698501282687e-05, "loss": 0.294, "step": 31777 }, { "epoch": 2.5743681140635126, "grad_norm": 0.06421802192926407, "learning_rate": 7.920248436023223e-05, "loss": 0.2257, "step": 31778 }, { "epoch": 2.574449125081011, "grad_norm": 0.062095798552036285, "learning_rate": 7.919798370763761e-05, "loss": 0.2375, "step": 31779 }, { "epoch": 2.5745301360985096, "grad_norm": 0.09116526693105698, "learning_rate": 7.919348305504299e-05, "loss": 0.2646, "step": 31780 }, { "epoch": 2.574611147116008, "grad_norm": 0.06377584487199783, "learning_rate": 7.918898240244836e-05, "loss": 0.2194, "step": 31781 }, { "epoch": 2.574692158133506, "grad_norm": 0.06614330410957336, "learning_rate": 7.918448174985373e-05, "loss": 0.2386, "step": 31782 }, { "epoch": 2.5747731691510047, "grad_norm": 0.06941062211990356, "learning_rate": 7.917998109725911e-05, "loss": 0.2201, "step": 31783 }, { "epoch": 2.574854180168503, "grad_norm": 0.07736022025346756, "learning_rate": 7.917548044466448e-05, "loss": 0.2627, "step": 31784 }, { "epoch": 2.5749351911860012, "grad_norm": 0.07738102227449417, "learning_rate": 7.917097979206985e-05, "loss": 0.2485, "step": 31785 }, { "epoch": 2.5750162022035, "grad_norm": 0.0658000260591507, "learning_rate": 7.916647913947523e-05, "loss": 0.2313, "step": 31786 }, { "epoch": 2.575097213220998, "grad_norm": 0.05562092363834381, "learning_rate": 7.91619784868806e-05, "loss": 0.1899, "step": 31787 }, { "epoch": 2.5751782242384964, "grad_norm": 0.07304248958826065, "learning_rate": 7.915747783428597e-05, "loss": 0.2122, "step": 31788 }, { "epoch": 2.5752592352559946, "grad_norm": 0.06350831687450409, "learning_rate": 7.915297718169135e-05, "loss": 0.2167, "step": 31789 }, { "epoch": 2.5753402462734933, "grad_norm": 0.07086034119129181, "learning_rate": 7.914847652909672e-05, "loss": 0.277, "step": 31790 }, { "epoch": 2.5754212572909916, "grad_norm": 0.0860385000705719, "learning_rate": 7.91439758765021e-05, "loss": 0.2773, "step": 31791 }, { "epoch": 2.57550226830849, "grad_norm": 0.08237428963184357, "learning_rate": 7.913947522390747e-05, "loss": 0.2728, "step": 31792 }, { "epoch": 2.575583279325988, "grad_norm": 0.06451267749071121, "learning_rate": 7.913497457131284e-05, "loss": 0.23, "step": 31793 }, { "epoch": 2.5756642903434868, "grad_norm": 0.07084333151578903, "learning_rate": 7.913047391871822e-05, "loss": 0.2147, "step": 31794 }, { "epoch": 2.575745301360985, "grad_norm": 0.07405141741037369, "learning_rate": 7.912597326612359e-05, "loss": 0.2789, "step": 31795 }, { "epoch": 2.5758263123784833, "grad_norm": 0.06787556409835815, "learning_rate": 7.912147261352896e-05, "loss": 0.2233, "step": 31796 }, { "epoch": 2.575907323395982, "grad_norm": 0.0673149824142456, "learning_rate": 7.911697196093434e-05, "loss": 0.2395, "step": 31797 }, { "epoch": 2.57598833441348, "grad_norm": 0.062474120408296585, "learning_rate": 7.911247130833971e-05, "loss": 0.2255, "step": 31798 }, { "epoch": 2.5760693454309784, "grad_norm": 0.08614230901002884, "learning_rate": 7.910797065574508e-05, "loss": 0.2667, "step": 31799 }, { "epoch": 2.576150356448477, "grad_norm": 0.0551866814494133, "learning_rate": 7.910347000315046e-05, "loss": 0.2323, "step": 31800 }, { "epoch": 2.5762313674659754, "grad_norm": 0.06509862095117569, "learning_rate": 7.909896935055583e-05, "loss": 0.2328, "step": 31801 }, { "epoch": 2.5763123784834736, "grad_norm": 0.06465253233909607, "learning_rate": 7.90944686979612e-05, "loss": 0.2326, "step": 31802 }, { "epoch": 2.5763933895009723, "grad_norm": 0.07347733527421951, "learning_rate": 7.908996804536658e-05, "loss": 0.2337, "step": 31803 }, { "epoch": 2.5764744005184705, "grad_norm": 0.0698373094201088, "learning_rate": 7.908546739277195e-05, "loss": 0.2174, "step": 31804 }, { "epoch": 2.576555411535969, "grad_norm": 0.06428837776184082, "learning_rate": 7.908096674017733e-05, "loss": 0.2271, "step": 31805 }, { "epoch": 2.5766364225534675, "grad_norm": 0.08448978513479233, "learning_rate": 7.90764660875827e-05, "loss": 0.2484, "step": 31806 }, { "epoch": 2.5767174335709657, "grad_norm": 0.07157791405916214, "learning_rate": 7.907196543498809e-05, "loss": 0.274, "step": 31807 }, { "epoch": 2.576798444588464, "grad_norm": 0.06949556618928909, "learning_rate": 7.906746478239345e-05, "loss": 0.2483, "step": 31808 }, { "epoch": 2.5768794556059627, "grad_norm": 0.0694814994931221, "learning_rate": 7.906296412979882e-05, "loss": 0.2675, "step": 31809 }, { "epoch": 2.576960466623461, "grad_norm": 0.0681164488196373, "learning_rate": 7.905846347720421e-05, "loss": 0.2498, "step": 31810 }, { "epoch": 2.577041477640959, "grad_norm": 0.06991052627563477, "learning_rate": 7.905396282460957e-05, "loss": 0.2577, "step": 31811 }, { "epoch": 2.5771224886584574, "grad_norm": 0.07411139458417892, "learning_rate": 7.904946217201494e-05, "loss": 0.2511, "step": 31812 }, { "epoch": 2.577203499675956, "grad_norm": 0.07247655838727951, "learning_rate": 7.904496151942033e-05, "loss": 0.2281, "step": 31813 }, { "epoch": 2.5772845106934543, "grad_norm": 0.06791792064905167, "learning_rate": 7.904046086682569e-05, "loss": 0.1999, "step": 31814 }, { "epoch": 2.5773655217109526, "grad_norm": 0.06338424980640411, "learning_rate": 7.903596021423106e-05, "loss": 0.2779, "step": 31815 }, { "epoch": 2.577446532728451, "grad_norm": 0.06921472400426865, "learning_rate": 7.903145956163645e-05, "loss": 0.2648, "step": 31816 }, { "epoch": 2.5775275437459495, "grad_norm": 0.06940491497516632, "learning_rate": 7.902695890904181e-05, "loss": 0.2565, "step": 31817 }, { "epoch": 2.5776085547634477, "grad_norm": 0.09353849291801453, "learning_rate": 7.902245825644718e-05, "loss": 0.2863, "step": 31818 }, { "epoch": 2.577689565780946, "grad_norm": 0.06500814110040665, "learning_rate": 7.901795760385257e-05, "loss": 0.225, "step": 31819 }, { "epoch": 2.5777705767984447, "grad_norm": 0.07395117729902267, "learning_rate": 7.901345695125793e-05, "loss": 0.2565, "step": 31820 }, { "epoch": 2.577851587815943, "grad_norm": 0.06356361508369446, "learning_rate": 7.90089562986633e-05, "loss": 0.2422, "step": 31821 }, { "epoch": 2.577932598833441, "grad_norm": 0.0858735591173172, "learning_rate": 7.900445564606869e-05, "loss": 0.2295, "step": 31822 }, { "epoch": 2.57801360985094, "grad_norm": 0.08137764781713486, "learning_rate": 7.899995499347405e-05, "loss": 0.2482, "step": 31823 }, { "epoch": 2.578094620868438, "grad_norm": 0.06723980605602264, "learning_rate": 7.899545434087942e-05, "loss": 0.2327, "step": 31824 }, { "epoch": 2.5781756318859363, "grad_norm": 0.07822005450725555, "learning_rate": 7.899095368828481e-05, "loss": 0.2167, "step": 31825 }, { "epoch": 2.578256642903435, "grad_norm": 0.06803352385759354, "learning_rate": 7.898645303569017e-05, "loss": 0.2758, "step": 31826 }, { "epoch": 2.5783376539209333, "grad_norm": 0.06352008134126663, "learning_rate": 7.898195238309555e-05, "loss": 0.2449, "step": 31827 }, { "epoch": 2.5784186649384315, "grad_norm": 0.05779225379228592, "learning_rate": 7.897745173050093e-05, "loss": 0.2469, "step": 31828 }, { "epoch": 2.57849967595593, "grad_norm": 0.08133967220783234, "learning_rate": 7.897295107790629e-05, "loss": 0.264, "step": 31829 }, { "epoch": 2.5785806869734285, "grad_norm": 0.06355690956115723, "learning_rate": 7.896845042531167e-05, "loss": 0.2246, "step": 31830 }, { "epoch": 2.5786616979909267, "grad_norm": 0.0669471025466919, "learning_rate": 7.896394977271705e-05, "loss": 0.273, "step": 31831 }, { "epoch": 2.5787427090084254, "grad_norm": 0.08634018898010254, "learning_rate": 7.895944912012241e-05, "loss": 0.2405, "step": 31832 }, { "epoch": 2.5788237200259236, "grad_norm": 0.06180933117866516, "learning_rate": 7.89549484675278e-05, "loss": 0.2301, "step": 31833 }, { "epoch": 2.578904731043422, "grad_norm": 0.0742286667227745, "learning_rate": 7.895044781493317e-05, "loss": 0.2425, "step": 31834 }, { "epoch": 2.57898574206092, "grad_norm": 0.07804601639509201, "learning_rate": 7.894594716233853e-05, "loss": 0.259, "step": 31835 }, { "epoch": 2.579066753078419, "grad_norm": 0.07568758726119995, "learning_rate": 7.894144650974392e-05, "loss": 0.2752, "step": 31836 }, { "epoch": 2.579147764095917, "grad_norm": 0.08096685260534286, "learning_rate": 7.89369458571493e-05, "loss": 0.2559, "step": 31837 }, { "epoch": 2.5792287751134153, "grad_norm": 0.06668811291456223, "learning_rate": 7.893244520455466e-05, "loss": 0.2306, "step": 31838 }, { "epoch": 2.5793097861309136, "grad_norm": 0.06312041729688644, "learning_rate": 7.892794455196004e-05, "loss": 0.2368, "step": 31839 }, { "epoch": 2.5793907971484122, "grad_norm": 0.06583622843027115, "learning_rate": 7.892344389936542e-05, "loss": 0.2461, "step": 31840 }, { "epoch": 2.5794718081659105, "grad_norm": 0.07359354197978973, "learning_rate": 7.891894324677078e-05, "loss": 0.2411, "step": 31841 }, { "epoch": 2.5795528191834087, "grad_norm": 0.07067452371120453, "learning_rate": 7.891444259417616e-05, "loss": 0.2187, "step": 31842 }, { "epoch": 2.5796338302009074, "grad_norm": 0.07543587684631348, "learning_rate": 7.890994194158154e-05, "loss": 0.2515, "step": 31843 }, { "epoch": 2.5797148412184057, "grad_norm": 0.07966103404760361, "learning_rate": 7.89054412889869e-05, "loss": 0.2217, "step": 31844 }, { "epoch": 2.579795852235904, "grad_norm": 0.063313789665699, "learning_rate": 7.890094063639228e-05, "loss": 0.2173, "step": 31845 }, { "epoch": 2.5798768632534026, "grad_norm": 0.06680615246295929, "learning_rate": 7.889643998379766e-05, "loss": 0.2391, "step": 31846 }, { "epoch": 2.579957874270901, "grad_norm": 0.06952749937772751, "learning_rate": 7.889193933120302e-05, "loss": 0.2215, "step": 31847 }, { "epoch": 2.580038885288399, "grad_norm": 0.06919938325881958, "learning_rate": 7.88874386786084e-05, "loss": 0.2328, "step": 31848 }, { "epoch": 2.5801198963058978, "grad_norm": 0.057678766548633575, "learning_rate": 7.888293802601378e-05, "loss": 0.2199, "step": 31849 }, { "epoch": 2.580200907323396, "grad_norm": 0.06296581029891968, "learning_rate": 7.887843737341914e-05, "loss": 0.243, "step": 31850 }, { "epoch": 2.5802819183408943, "grad_norm": 0.06534301489591599, "learning_rate": 7.887393672082453e-05, "loss": 0.2571, "step": 31851 }, { "epoch": 2.580362929358393, "grad_norm": 0.06356888264417648, "learning_rate": 7.88694360682299e-05, "loss": 0.2826, "step": 31852 }, { "epoch": 2.580443940375891, "grad_norm": 0.067490354180336, "learning_rate": 7.886493541563527e-05, "loss": 0.1816, "step": 31853 }, { "epoch": 2.5805249513933894, "grad_norm": 0.07598926872015, "learning_rate": 7.886043476304065e-05, "loss": 0.2491, "step": 31854 }, { "epoch": 2.580605962410888, "grad_norm": 0.08062969893217087, "learning_rate": 7.885593411044602e-05, "loss": 0.2401, "step": 31855 }, { "epoch": 2.5806869734283864, "grad_norm": 0.06391061097383499, "learning_rate": 7.88514334578514e-05, "loss": 0.2205, "step": 31856 }, { "epoch": 2.5807679844458846, "grad_norm": 0.06544674932956696, "learning_rate": 7.884693280525677e-05, "loss": 0.2358, "step": 31857 }, { "epoch": 2.580848995463383, "grad_norm": 0.07806091010570526, "learning_rate": 7.884243215266214e-05, "loss": 0.2748, "step": 31858 }, { "epoch": 2.5809300064808816, "grad_norm": 0.07057543098926544, "learning_rate": 7.883793150006751e-05, "loss": 0.2613, "step": 31859 }, { "epoch": 2.58101101749838, "grad_norm": 0.07381962239742279, "learning_rate": 7.883343084747289e-05, "loss": 0.2217, "step": 31860 }, { "epoch": 2.581092028515878, "grad_norm": 0.07270567864179611, "learning_rate": 7.882893019487826e-05, "loss": 0.2973, "step": 31861 }, { "epoch": 2.5811730395333763, "grad_norm": 0.0710500106215477, "learning_rate": 7.882442954228364e-05, "loss": 0.2109, "step": 31862 }, { "epoch": 2.581254050550875, "grad_norm": 0.06224672496318817, "learning_rate": 7.881992888968901e-05, "loss": 0.2362, "step": 31863 }, { "epoch": 2.5813350615683732, "grad_norm": 0.08102069050073624, "learning_rate": 7.881542823709438e-05, "loss": 0.2663, "step": 31864 }, { "epoch": 2.5814160725858715, "grad_norm": 0.07523306459188461, "learning_rate": 7.881092758449976e-05, "loss": 0.236, "step": 31865 }, { "epoch": 2.58149708360337, "grad_norm": 0.059707846492528915, "learning_rate": 7.880642693190513e-05, "loss": 0.226, "step": 31866 }, { "epoch": 2.5815780946208684, "grad_norm": 0.08537987619638443, "learning_rate": 7.88019262793105e-05, "loss": 0.2803, "step": 31867 }, { "epoch": 2.5816591056383666, "grad_norm": 0.08499877899885178, "learning_rate": 7.879742562671588e-05, "loss": 0.2439, "step": 31868 }, { "epoch": 2.5817401166558653, "grad_norm": 0.0631185919046402, "learning_rate": 7.879292497412125e-05, "loss": 0.2641, "step": 31869 }, { "epoch": 2.5818211276733636, "grad_norm": 0.0609898567199707, "learning_rate": 7.878842432152662e-05, "loss": 0.2563, "step": 31870 }, { "epoch": 2.581902138690862, "grad_norm": 0.06977617740631104, "learning_rate": 7.8783923668932e-05, "loss": 0.2555, "step": 31871 }, { "epoch": 2.5819831497083605, "grad_norm": 0.08325894176959991, "learning_rate": 7.877942301633737e-05, "loss": 0.2246, "step": 31872 }, { "epoch": 2.5820641607258588, "grad_norm": 0.06733439862728119, "learning_rate": 7.877492236374274e-05, "loss": 0.2504, "step": 31873 }, { "epoch": 2.582145171743357, "grad_norm": 0.06447796523571014, "learning_rate": 7.877042171114812e-05, "loss": 0.2509, "step": 31874 }, { "epoch": 2.5822261827608557, "grad_norm": 0.06359328329563141, "learning_rate": 7.876592105855349e-05, "loss": 0.2577, "step": 31875 }, { "epoch": 2.582307193778354, "grad_norm": 0.06793835759162903, "learning_rate": 7.876142040595887e-05, "loss": 0.2806, "step": 31876 }, { "epoch": 2.582388204795852, "grad_norm": 0.06319888681173325, "learning_rate": 7.875691975336424e-05, "loss": 0.2502, "step": 31877 }, { "epoch": 2.582469215813351, "grad_norm": 0.06786399334669113, "learning_rate": 7.875241910076961e-05, "loss": 0.2169, "step": 31878 }, { "epoch": 2.582550226830849, "grad_norm": 0.06800121068954468, "learning_rate": 7.874791844817499e-05, "loss": 0.2228, "step": 31879 }, { "epoch": 2.5826312378483474, "grad_norm": 0.06821072101593018, "learning_rate": 7.874341779558036e-05, "loss": 0.2414, "step": 31880 }, { "epoch": 2.5827122488658456, "grad_norm": 0.072810597717762, "learning_rate": 7.873891714298573e-05, "loss": 0.2831, "step": 31881 }, { "epoch": 2.5827932598833443, "grad_norm": 0.0673997551202774, "learning_rate": 7.873441649039111e-05, "loss": 0.2378, "step": 31882 }, { "epoch": 2.5828742709008425, "grad_norm": 0.07142699509859085, "learning_rate": 7.872991583779648e-05, "loss": 0.2479, "step": 31883 }, { "epoch": 2.582955281918341, "grad_norm": 0.07638320326805115, "learning_rate": 7.872541518520185e-05, "loss": 0.2707, "step": 31884 }, { "epoch": 2.583036292935839, "grad_norm": 0.06295417994260788, "learning_rate": 7.872091453260724e-05, "loss": 0.2215, "step": 31885 }, { "epoch": 2.5831173039533377, "grad_norm": 0.07948005944490433, "learning_rate": 7.87164138800126e-05, "loss": 0.2867, "step": 31886 }, { "epoch": 2.583198314970836, "grad_norm": 0.07531192898750305, "learning_rate": 7.871191322741798e-05, "loss": 0.2373, "step": 31887 }, { "epoch": 2.583279325988334, "grad_norm": 0.07652915269136429, "learning_rate": 7.870741257482336e-05, "loss": 0.2251, "step": 31888 }, { "epoch": 2.583360337005833, "grad_norm": 0.06174972653388977, "learning_rate": 7.870291192222872e-05, "loss": 0.274, "step": 31889 }, { "epoch": 2.583441348023331, "grad_norm": 0.06382008641958237, "learning_rate": 7.86984112696341e-05, "loss": 0.2568, "step": 31890 }, { "epoch": 2.5835223590408294, "grad_norm": 0.07435295730829239, "learning_rate": 7.869391061703948e-05, "loss": 0.253, "step": 31891 }, { "epoch": 2.583603370058328, "grad_norm": 0.05679628998041153, "learning_rate": 7.868940996444484e-05, "loss": 0.2062, "step": 31892 }, { "epoch": 2.5836843810758263, "grad_norm": 0.07225190848112106, "learning_rate": 7.868490931185022e-05, "loss": 0.2477, "step": 31893 }, { "epoch": 2.5837653920933246, "grad_norm": 0.07137248665094376, "learning_rate": 7.86804086592556e-05, "loss": 0.269, "step": 31894 }, { "epoch": 2.5838464031108233, "grad_norm": 0.06835871934890747, "learning_rate": 7.867590800666096e-05, "loss": 0.2538, "step": 31895 }, { "epoch": 2.5839274141283215, "grad_norm": 0.06776162981987, "learning_rate": 7.867140735406634e-05, "loss": 0.2528, "step": 31896 }, { "epoch": 2.5840084251458197, "grad_norm": 0.08782105892896652, "learning_rate": 7.866690670147172e-05, "loss": 0.2711, "step": 31897 }, { "epoch": 2.5840894361633184, "grad_norm": 0.07954996824264526, "learning_rate": 7.866240604887708e-05, "loss": 0.2217, "step": 31898 }, { "epoch": 2.5841704471808167, "grad_norm": 0.07364766299724579, "learning_rate": 7.865790539628246e-05, "loss": 0.2444, "step": 31899 }, { "epoch": 2.584251458198315, "grad_norm": 0.06456109136343002, "learning_rate": 7.865340474368785e-05, "loss": 0.2373, "step": 31900 }, { "epoch": 2.5843324692158136, "grad_norm": 0.060708265751600266, "learning_rate": 7.86489040910932e-05, "loss": 0.2437, "step": 31901 }, { "epoch": 2.584413480233312, "grad_norm": 0.06029626354575157, "learning_rate": 7.864440343849858e-05, "loss": 0.2127, "step": 31902 }, { "epoch": 2.58449449125081, "grad_norm": 0.05488735809922218, "learning_rate": 7.863990278590397e-05, "loss": 0.1978, "step": 31903 }, { "epoch": 2.5845755022683083, "grad_norm": 0.06499477475881577, "learning_rate": 7.863540213330933e-05, "loss": 0.2439, "step": 31904 }, { "epoch": 2.584656513285807, "grad_norm": 0.06294959038496017, "learning_rate": 7.86309014807147e-05, "loss": 0.2378, "step": 31905 }, { "epoch": 2.5847375243033053, "grad_norm": 0.06903290003538132, "learning_rate": 7.862640082812009e-05, "loss": 0.2506, "step": 31906 }, { "epoch": 2.5848185353208035, "grad_norm": 0.07474994659423828, "learning_rate": 7.862190017552545e-05, "loss": 0.2359, "step": 31907 }, { "epoch": 2.5848995463383018, "grad_norm": 0.05141305550932884, "learning_rate": 7.861739952293082e-05, "loss": 0.2331, "step": 31908 }, { "epoch": 2.5849805573558005, "grad_norm": 0.07798498123884201, "learning_rate": 7.861289887033621e-05, "loss": 0.2547, "step": 31909 }, { "epoch": 2.5850615683732987, "grad_norm": 0.06230713427066803, "learning_rate": 7.860839821774157e-05, "loss": 0.2093, "step": 31910 }, { "epoch": 2.585142579390797, "grad_norm": 0.06651043146848679, "learning_rate": 7.860389756514696e-05, "loss": 0.2335, "step": 31911 }, { "epoch": 2.5852235904082956, "grad_norm": 0.06720586866140366, "learning_rate": 7.859939691255233e-05, "loss": 0.2343, "step": 31912 }, { "epoch": 2.585304601425794, "grad_norm": 0.06423775106668472, "learning_rate": 7.859489625995769e-05, "loss": 0.2247, "step": 31913 }, { "epoch": 2.585385612443292, "grad_norm": 0.0718165785074234, "learning_rate": 7.859039560736308e-05, "loss": 0.2158, "step": 31914 }, { "epoch": 2.585466623460791, "grad_norm": 0.07057687640190125, "learning_rate": 7.858589495476845e-05, "loss": 0.2108, "step": 31915 }, { "epoch": 2.585547634478289, "grad_norm": 0.06897904723882675, "learning_rate": 7.858139430217381e-05, "loss": 0.3009, "step": 31916 }, { "epoch": 2.5856286454957873, "grad_norm": 0.05689922347664833, "learning_rate": 7.85768936495792e-05, "loss": 0.2781, "step": 31917 }, { "epoch": 2.585709656513286, "grad_norm": 0.061782244592905045, "learning_rate": 7.857239299698457e-05, "loss": 0.2151, "step": 31918 }, { "epoch": 2.5857906675307842, "grad_norm": 0.06645800918340683, "learning_rate": 7.856789234438994e-05, "loss": 0.2151, "step": 31919 }, { "epoch": 2.5858716785482825, "grad_norm": 0.07219221442937851, "learning_rate": 7.856339169179532e-05, "loss": 0.2389, "step": 31920 }, { "epoch": 2.585952689565781, "grad_norm": 0.07668904960155487, "learning_rate": 7.855889103920069e-05, "loss": 0.2608, "step": 31921 }, { "epoch": 2.5860337005832794, "grad_norm": 0.0610949881374836, "learning_rate": 7.855439038660606e-05, "loss": 0.228, "step": 31922 }, { "epoch": 2.5861147116007777, "grad_norm": 0.07142166048288345, "learning_rate": 7.854988973401144e-05, "loss": 0.2539, "step": 31923 }, { "epoch": 2.5861957226182763, "grad_norm": 0.06144547089934349, "learning_rate": 7.854538908141681e-05, "loss": 0.225, "step": 31924 }, { "epoch": 2.5862767336357746, "grad_norm": 0.06903857737779617, "learning_rate": 7.854088842882219e-05, "loss": 0.2198, "step": 31925 }, { "epoch": 2.586357744653273, "grad_norm": 0.062392707914114, "learning_rate": 7.853638777622756e-05, "loss": 0.2668, "step": 31926 }, { "epoch": 2.586438755670771, "grad_norm": 0.06246126815676689, "learning_rate": 7.853188712363293e-05, "loss": 0.2567, "step": 31927 }, { "epoch": 2.5865197666882693, "grad_norm": 0.07953917980194092, "learning_rate": 7.85273864710383e-05, "loss": 0.2558, "step": 31928 }, { "epoch": 2.586600777705768, "grad_norm": 0.07347556203603745, "learning_rate": 7.852288581844368e-05, "loss": 0.2663, "step": 31929 }, { "epoch": 2.5866817887232663, "grad_norm": 0.06010118126869202, "learning_rate": 7.851838516584905e-05, "loss": 0.2645, "step": 31930 }, { "epoch": 2.5867627997407645, "grad_norm": 0.06075914949178696, "learning_rate": 7.851388451325443e-05, "loss": 0.2351, "step": 31931 }, { "epoch": 2.586843810758263, "grad_norm": 0.0672052800655365, "learning_rate": 7.85093838606598e-05, "loss": 0.2519, "step": 31932 }, { "epoch": 2.5869248217757614, "grad_norm": 0.05604797974228859, "learning_rate": 7.850488320806517e-05, "loss": 0.225, "step": 31933 }, { "epoch": 2.5870058327932597, "grad_norm": 0.055545855313539505, "learning_rate": 7.850038255547055e-05, "loss": 0.2383, "step": 31934 }, { "epoch": 2.5870868438107584, "grad_norm": 0.058497052639722824, "learning_rate": 7.849588190287592e-05, "loss": 0.2571, "step": 31935 }, { "epoch": 2.5871678548282566, "grad_norm": 0.07107438147068024, "learning_rate": 7.84913812502813e-05, "loss": 0.2375, "step": 31936 }, { "epoch": 2.587248865845755, "grad_norm": 0.08413809537887573, "learning_rate": 7.848688059768667e-05, "loss": 0.3145, "step": 31937 }, { "epoch": 2.5873298768632536, "grad_norm": 0.06129063665866852, "learning_rate": 7.848237994509204e-05, "loss": 0.2484, "step": 31938 }, { "epoch": 2.587410887880752, "grad_norm": 0.06950299441814423, "learning_rate": 7.847787929249742e-05, "loss": 0.2527, "step": 31939 }, { "epoch": 2.58749189889825, "grad_norm": 0.062319688498973846, "learning_rate": 7.847337863990279e-05, "loss": 0.2404, "step": 31940 }, { "epoch": 2.5875729099157487, "grad_norm": 0.06427384912967682, "learning_rate": 7.846887798730816e-05, "loss": 0.2597, "step": 31941 }, { "epoch": 2.587653920933247, "grad_norm": 0.055912766605615616, "learning_rate": 7.846437733471354e-05, "loss": 0.2475, "step": 31942 }, { "epoch": 2.587734931950745, "grad_norm": 0.06777750700712204, "learning_rate": 7.845987668211891e-05, "loss": 0.2068, "step": 31943 }, { "epoch": 2.587815942968244, "grad_norm": 0.06045191362500191, "learning_rate": 7.845537602952428e-05, "loss": 0.1975, "step": 31944 }, { "epoch": 2.587896953985742, "grad_norm": 0.05264495685696602, "learning_rate": 7.845087537692966e-05, "loss": 0.222, "step": 31945 }, { "epoch": 2.5879779650032404, "grad_norm": 0.0727301612496376, "learning_rate": 7.844637472433503e-05, "loss": 0.229, "step": 31946 }, { "epoch": 2.588058976020739, "grad_norm": 0.0756848007440567, "learning_rate": 7.84418740717404e-05, "loss": 0.2335, "step": 31947 }, { "epoch": 2.5881399870382373, "grad_norm": 0.07050405442714691, "learning_rate": 7.843737341914578e-05, "loss": 0.2176, "step": 31948 }, { "epoch": 2.5882209980557356, "grad_norm": 0.08286644518375397, "learning_rate": 7.843287276655115e-05, "loss": 0.2437, "step": 31949 }, { "epoch": 2.588302009073234, "grad_norm": 0.06457363814115524, "learning_rate": 7.842837211395653e-05, "loss": 0.2737, "step": 31950 }, { "epoch": 2.588383020090732, "grad_norm": 0.05261223763227463, "learning_rate": 7.84238714613619e-05, "loss": 0.2254, "step": 31951 }, { "epoch": 2.5884640311082308, "grad_norm": 0.07144974172115326, "learning_rate": 7.841937080876727e-05, "loss": 0.2101, "step": 31952 }, { "epoch": 2.588545042125729, "grad_norm": 0.07414579391479492, "learning_rate": 7.841487015617265e-05, "loss": 0.2218, "step": 31953 }, { "epoch": 2.5886260531432272, "grad_norm": 0.06547979265451431, "learning_rate": 7.841036950357802e-05, "loss": 0.2299, "step": 31954 }, { "epoch": 2.588707064160726, "grad_norm": 0.06448670476675034, "learning_rate": 7.84058688509834e-05, "loss": 0.2585, "step": 31955 }, { "epoch": 2.588788075178224, "grad_norm": 0.08230631053447723, "learning_rate": 7.840136819838877e-05, "loss": 0.2804, "step": 31956 }, { "epoch": 2.5888690861957224, "grad_norm": 0.057719308882951736, "learning_rate": 7.839686754579414e-05, "loss": 0.2039, "step": 31957 }, { "epoch": 2.588950097213221, "grad_norm": 0.07325759530067444, "learning_rate": 7.839236689319951e-05, "loss": 0.2741, "step": 31958 }, { "epoch": 2.5890311082307194, "grad_norm": 0.06162470951676369, "learning_rate": 7.838786624060489e-05, "loss": 0.243, "step": 31959 }, { "epoch": 2.5891121192482176, "grad_norm": 0.07601647078990936, "learning_rate": 7.838336558801026e-05, "loss": 0.2172, "step": 31960 }, { "epoch": 2.5891931302657163, "grad_norm": 0.06250499188899994, "learning_rate": 7.837886493541564e-05, "loss": 0.238, "step": 31961 }, { "epoch": 2.5892741412832145, "grad_norm": 0.0644306018948555, "learning_rate": 7.837436428282101e-05, "loss": 0.2483, "step": 31962 }, { "epoch": 2.589355152300713, "grad_norm": 0.07267862558364868, "learning_rate": 7.836986363022638e-05, "loss": 0.2606, "step": 31963 }, { "epoch": 2.5894361633182115, "grad_norm": 0.08261629939079285, "learning_rate": 7.836536297763176e-05, "loss": 0.255, "step": 31964 }, { "epoch": 2.5895171743357097, "grad_norm": 0.06600159406661987, "learning_rate": 7.836086232503713e-05, "loss": 0.2572, "step": 31965 }, { "epoch": 2.589598185353208, "grad_norm": 0.06426016986370087, "learning_rate": 7.835636167244252e-05, "loss": 0.226, "step": 31966 }, { "epoch": 2.5896791963707066, "grad_norm": 0.0756409540772438, "learning_rate": 7.835186101984788e-05, "loss": 0.2416, "step": 31967 }, { "epoch": 2.589760207388205, "grad_norm": 0.07189775258302689, "learning_rate": 7.834736036725325e-05, "loss": 0.2586, "step": 31968 }, { "epoch": 2.589841218405703, "grad_norm": 0.06634508073329926, "learning_rate": 7.834285971465864e-05, "loss": 0.232, "step": 31969 }, { "epoch": 2.589922229423202, "grad_norm": 0.07708480209112167, "learning_rate": 7.8338359062064e-05, "loss": 0.2601, "step": 31970 }, { "epoch": 2.5900032404407, "grad_norm": 0.07109393179416656, "learning_rate": 7.833385840946937e-05, "loss": 0.2367, "step": 31971 }, { "epoch": 2.5900842514581983, "grad_norm": 0.06452390551567078, "learning_rate": 7.832935775687476e-05, "loss": 0.2532, "step": 31972 }, { "epoch": 2.5901652624756966, "grad_norm": 0.06601597368717194, "learning_rate": 7.832485710428012e-05, "loss": 0.2728, "step": 31973 }, { "epoch": 2.590246273493195, "grad_norm": 0.06474656611680984, "learning_rate": 7.832035645168549e-05, "loss": 0.2459, "step": 31974 }, { "epoch": 2.5903272845106935, "grad_norm": 0.07416459918022156, "learning_rate": 7.831585579909088e-05, "loss": 0.2702, "step": 31975 }, { "epoch": 2.5904082955281917, "grad_norm": 0.07041539996862411, "learning_rate": 7.831135514649624e-05, "loss": 0.2545, "step": 31976 }, { "epoch": 2.59048930654569, "grad_norm": 0.07663211226463318, "learning_rate": 7.830685449390161e-05, "loss": 0.2413, "step": 31977 }, { "epoch": 2.5905703175631887, "grad_norm": 0.0726345106959343, "learning_rate": 7.8302353841307e-05, "loss": 0.2414, "step": 31978 }, { "epoch": 2.590651328580687, "grad_norm": 0.06782495975494385, "learning_rate": 7.829785318871236e-05, "loss": 0.2362, "step": 31979 }, { "epoch": 2.590732339598185, "grad_norm": 0.06722512096166611, "learning_rate": 7.829335253611773e-05, "loss": 0.2548, "step": 31980 }, { "epoch": 2.590813350615684, "grad_norm": 0.07960183173418045, "learning_rate": 7.828885188352312e-05, "loss": 0.2321, "step": 31981 }, { "epoch": 2.590894361633182, "grad_norm": 0.057806596159935, "learning_rate": 7.828435123092848e-05, "loss": 0.2318, "step": 31982 }, { "epoch": 2.5909753726506803, "grad_norm": 0.07928095757961273, "learning_rate": 7.827985057833385e-05, "loss": 0.2425, "step": 31983 }, { "epoch": 2.591056383668179, "grad_norm": 0.07918012142181396, "learning_rate": 7.827534992573924e-05, "loss": 0.2469, "step": 31984 }, { "epoch": 2.5911373946856773, "grad_norm": 0.06785554438829422, "learning_rate": 7.82708492731446e-05, "loss": 0.2426, "step": 31985 }, { "epoch": 2.5912184057031755, "grad_norm": 0.0647081658244133, "learning_rate": 7.826634862054998e-05, "loss": 0.2429, "step": 31986 }, { "epoch": 2.591299416720674, "grad_norm": 0.07442610710859299, "learning_rate": 7.826184796795536e-05, "loss": 0.2838, "step": 31987 }, { "epoch": 2.5913804277381725, "grad_norm": 0.08988859504461288, "learning_rate": 7.825734731536074e-05, "loss": 0.2178, "step": 31988 }, { "epoch": 2.5914614387556707, "grad_norm": 0.07456997781991959, "learning_rate": 7.82528466627661e-05, "loss": 0.2431, "step": 31989 }, { "epoch": 2.5915424497731694, "grad_norm": 0.054924655705690384, "learning_rate": 7.824834601017148e-05, "loss": 0.2429, "step": 31990 }, { "epoch": 2.5916234607906676, "grad_norm": 0.06245535612106323, "learning_rate": 7.824384535757686e-05, "loss": 0.2261, "step": 31991 }, { "epoch": 2.591704471808166, "grad_norm": 0.07505930215120316, "learning_rate": 7.823934470498223e-05, "loss": 0.2483, "step": 31992 }, { "epoch": 2.5917854828256646, "grad_norm": 0.06292567402124405, "learning_rate": 7.82348440523876e-05, "loss": 0.2525, "step": 31993 }, { "epoch": 2.591866493843163, "grad_norm": 0.06478261947631836, "learning_rate": 7.823034339979298e-05, "loss": 0.2212, "step": 31994 }, { "epoch": 2.591947504860661, "grad_norm": 0.08286509662866592, "learning_rate": 7.822584274719835e-05, "loss": 0.2374, "step": 31995 }, { "epoch": 2.5920285158781593, "grad_norm": 0.06472121179103851, "learning_rate": 7.822134209460373e-05, "loss": 0.2454, "step": 31996 }, { "epoch": 2.5921095268956575, "grad_norm": 0.06049100682139397, "learning_rate": 7.82168414420091e-05, "loss": 0.2333, "step": 31997 }, { "epoch": 2.5921905379131562, "grad_norm": 0.06915795803070068, "learning_rate": 7.821234078941447e-05, "loss": 0.2544, "step": 31998 }, { "epoch": 2.5922715489306545, "grad_norm": 0.07713301479816437, "learning_rate": 7.820784013681985e-05, "loss": 0.2656, "step": 31999 }, { "epoch": 2.5923525599481527, "grad_norm": 0.06822709739208221, "learning_rate": 7.820333948422522e-05, "loss": 0.2675, "step": 32000 }, { "epoch": 2.5924335709656514, "grad_norm": 0.07031536102294922, "learning_rate": 7.819883883163059e-05, "loss": 0.2574, "step": 32001 }, { "epoch": 2.5925145819831497, "grad_norm": 0.06541726738214493, "learning_rate": 7.819433817903597e-05, "loss": 0.2327, "step": 32002 }, { "epoch": 2.592595593000648, "grad_norm": 0.07353468984365463, "learning_rate": 7.818983752644134e-05, "loss": 0.2186, "step": 32003 }, { "epoch": 2.5926766040181466, "grad_norm": 0.057273805141448975, "learning_rate": 7.818533687384671e-05, "loss": 0.2287, "step": 32004 }, { "epoch": 2.592757615035645, "grad_norm": 0.06168617680668831, "learning_rate": 7.818083622125209e-05, "loss": 0.2699, "step": 32005 }, { "epoch": 2.592838626053143, "grad_norm": 0.07566682994365692, "learning_rate": 7.817633556865746e-05, "loss": 0.2402, "step": 32006 }, { "epoch": 2.5929196370706418, "grad_norm": 0.06090695783495903, "learning_rate": 7.817183491606283e-05, "loss": 0.2508, "step": 32007 }, { "epoch": 2.59300064808814, "grad_norm": 0.06652563810348511, "learning_rate": 7.816733426346821e-05, "loss": 0.2417, "step": 32008 }, { "epoch": 2.5930816591056383, "grad_norm": 0.08565051853656769, "learning_rate": 7.816283361087358e-05, "loss": 0.2456, "step": 32009 }, { "epoch": 2.593162670123137, "grad_norm": 0.06650824099779129, "learning_rate": 7.815833295827896e-05, "loss": 0.2313, "step": 32010 }, { "epoch": 2.593243681140635, "grad_norm": 0.05159616097807884, "learning_rate": 7.815383230568433e-05, "loss": 0.221, "step": 32011 }, { "epoch": 2.5933246921581334, "grad_norm": 0.07195013761520386, "learning_rate": 7.81493316530897e-05, "loss": 0.2957, "step": 32012 }, { "epoch": 2.593405703175632, "grad_norm": 0.08130887150764465, "learning_rate": 7.814483100049508e-05, "loss": 0.2132, "step": 32013 }, { "epoch": 2.5934867141931304, "grad_norm": 0.0904105082154274, "learning_rate": 7.814033034790045e-05, "loss": 0.2671, "step": 32014 }, { "epoch": 2.5935677252106286, "grad_norm": 0.06435638666152954, "learning_rate": 7.813582969530582e-05, "loss": 0.2366, "step": 32015 }, { "epoch": 2.593648736228127, "grad_norm": 0.07310062646865845, "learning_rate": 7.81313290427112e-05, "loss": 0.2915, "step": 32016 }, { "epoch": 2.5937297472456255, "grad_norm": 0.06183222308754921, "learning_rate": 7.812682839011657e-05, "loss": 0.2516, "step": 32017 }, { "epoch": 2.593810758263124, "grad_norm": 0.06138157099485397, "learning_rate": 7.812232773752194e-05, "loss": 0.2402, "step": 32018 }, { "epoch": 2.593891769280622, "grad_norm": 0.06564778089523315, "learning_rate": 7.811782708492732e-05, "loss": 0.2193, "step": 32019 }, { "epoch": 2.5939727802981203, "grad_norm": 0.07527987658977509, "learning_rate": 7.811332643233269e-05, "loss": 0.2521, "step": 32020 }, { "epoch": 2.594053791315619, "grad_norm": 0.07320604473352432, "learning_rate": 7.810882577973807e-05, "loss": 0.2271, "step": 32021 }, { "epoch": 2.594134802333117, "grad_norm": 0.0609990619122982, "learning_rate": 7.810432512714344e-05, "loss": 0.2274, "step": 32022 }, { "epoch": 2.5942158133506155, "grad_norm": 0.06894183903932571, "learning_rate": 7.809982447454881e-05, "loss": 0.2463, "step": 32023 }, { "epoch": 2.594296824368114, "grad_norm": 0.09135408699512482, "learning_rate": 7.809532382195419e-05, "loss": 0.271, "step": 32024 }, { "epoch": 2.5943778353856124, "grad_norm": 0.06804493069648743, "learning_rate": 7.809082316935956e-05, "loss": 0.2331, "step": 32025 }, { "epoch": 2.5944588464031106, "grad_norm": 0.06523216515779495, "learning_rate": 7.808632251676493e-05, "loss": 0.2107, "step": 32026 }, { "epoch": 2.5945398574206093, "grad_norm": 0.05876978114247322, "learning_rate": 7.80818218641703e-05, "loss": 0.2293, "step": 32027 }, { "epoch": 2.5946208684381076, "grad_norm": 0.09167477488517761, "learning_rate": 7.807732121157568e-05, "loss": 0.2273, "step": 32028 }, { "epoch": 2.594701879455606, "grad_norm": 0.06189775466918945, "learning_rate": 7.807282055898105e-05, "loss": 0.2415, "step": 32029 }, { "epoch": 2.5947828904731045, "grad_norm": 0.06428928673267365, "learning_rate": 7.806831990638643e-05, "loss": 0.2318, "step": 32030 }, { "epoch": 2.5948639014906028, "grad_norm": 0.05991830304265022, "learning_rate": 7.80638192537918e-05, "loss": 0.2147, "step": 32031 }, { "epoch": 2.594944912508101, "grad_norm": 0.07664906978607178, "learning_rate": 7.805931860119717e-05, "loss": 0.2763, "step": 32032 }, { "epoch": 2.5950259235255997, "grad_norm": 0.06697775423526764, "learning_rate": 7.805481794860255e-05, "loss": 0.233, "step": 32033 }, { "epoch": 2.595106934543098, "grad_norm": 0.07400806248188019, "learning_rate": 7.805031729600792e-05, "loss": 0.2343, "step": 32034 }, { "epoch": 2.595187945560596, "grad_norm": 0.06435059010982513, "learning_rate": 7.80458166434133e-05, "loss": 0.2256, "step": 32035 }, { "epoch": 2.595268956578095, "grad_norm": 0.05965763330459595, "learning_rate": 7.804131599081867e-05, "loss": 0.2557, "step": 32036 }, { "epoch": 2.595349967595593, "grad_norm": 0.05650469288229942, "learning_rate": 7.803681533822404e-05, "loss": 0.2102, "step": 32037 }, { "epoch": 2.5954309786130914, "grad_norm": 0.062130898237228394, "learning_rate": 7.803231468562942e-05, "loss": 0.2337, "step": 32038 }, { "epoch": 2.5955119896305896, "grad_norm": 0.08088408410549164, "learning_rate": 7.802781403303479e-05, "loss": 0.2722, "step": 32039 }, { "epoch": 2.5955930006480883, "grad_norm": 0.06567167490720749, "learning_rate": 7.802331338044016e-05, "loss": 0.2643, "step": 32040 }, { "epoch": 2.5956740116655865, "grad_norm": 0.06135808676481247, "learning_rate": 7.801881272784554e-05, "loss": 0.1826, "step": 32041 }, { "epoch": 2.595755022683085, "grad_norm": 0.07413794845342636, "learning_rate": 7.801431207525091e-05, "loss": 0.2329, "step": 32042 }, { "epoch": 2.595836033700583, "grad_norm": 0.08833226561546326, "learning_rate": 7.800981142265628e-05, "loss": 0.2747, "step": 32043 }, { "epoch": 2.5959170447180817, "grad_norm": 0.07479183375835419, "learning_rate": 7.800531077006167e-05, "loss": 0.2454, "step": 32044 }, { "epoch": 2.59599805573558, "grad_norm": 0.07777579128742218, "learning_rate": 7.800081011746703e-05, "loss": 0.2246, "step": 32045 }, { "epoch": 2.596079066753078, "grad_norm": 0.06699565798044205, "learning_rate": 7.79963094648724e-05, "loss": 0.2376, "step": 32046 }, { "epoch": 2.596160077770577, "grad_norm": 0.07032589614391327, "learning_rate": 7.799180881227779e-05, "loss": 0.2038, "step": 32047 }, { "epoch": 2.596241088788075, "grad_norm": 0.07530355453491211, "learning_rate": 7.798730815968315e-05, "loss": 0.2412, "step": 32048 }, { "epoch": 2.5963220998055734, "grad_norm": 0.07182317227125168, "learning_rate": 7.798280750708853e-05, "loss": 0.2384, "step": 32049 }, { "epoch": 2.596403110823072, "grad_norm": 0.07123768329620361, "learning_rate": 7.797830685449391e-05, "loss": 0.2564, "step": 32050 }, { "epoch": 2.5964841218405703, "grad_norm": 0.06900478154420853, "learning_rate": 7.797380620189927e-05, "loss": 0.2412, "step": 32051 }, { "epoch": 2.5965651328580686, "grad_norm": 0.07733968645334244, "learning_rate": 7.796930554930465e-05, "loss": 0.2846, "step": 32052 }, { "epoch": 2.5966461438755672, "grad_norm": 0.06465481966733932, "learning_rate": 7.796480489671003e-05, "loss": 0.2019, "step": 32053 }, { "epoch": 2.5967271548930655, "grad_norm": 0.06342798471450806, "learning_rate": 7.79603042441154e-05, "loss": 0.2099, "step": 32054 }, { "epoch": 2.5968081659105637, "grad_norm": 0.07235822081565857, "learning_rate": 7.795580359152077e-05, "loss": 0.2274, "step": 32055 }, { "epoch": 2.5968891769280624, "grad_norm": 0.06785944849252701, "learning_rate": 7.795130293892615e-05, "loss": 0.2421, "step": 32056 }, { "epoch": 2.5969701879455607, "grad_norm": 0.08089176565408707, "learning_rate": 7.794680228633153e-05, "loss": 0.2329, "step": 32057 }, { "epoch": 2.597051198963059, "grad_norm": 0.061501555144786835, "learning_rate": 7.794230163373689e-05, "loss": 0.2261, "step": 32058 }, { "epoch": 2.5971322099805576, "grad_norm": 0.06012822687625885, "learning_rate": 7.793780098114228e-05, "loss": 0.229, "step": 32059 }, { "epoch": 2.597213220998056, "grad_norm": 0.07157018780708313, "learning_rate": 7.793330032854765e-05, "loss": 0.2696, "step": 32060 }, { "epoch": 2.597294232015554, "grad_norm": 0.07280758768320084, "learning_rate": 7.792879967595301e-05, "loss": 0.2474, "step": 32061 }, { "epoch": 2.5973752430330523, "grad_norm": 0.06970489770174026, "learning_rate": 7.79242990233584e-05, "loss": 0.209, "step": 32062 }, { "epoch": 2.597456254050551, "grad_norm": 0.060698650777339935, "learning_rate": 7.791979837076377e-05, "loss": 0.2343, "step": 32063 }, { "epoch": 2.5975372650680493, "grad_norm": 0.07536067068576813, "learning_rate": 7.791529771816913e-05, "loss": 0.2347, "step": 32064 }, { "epoch": 2.5976182760855475, "grad_norm": 0.062164146453142166, "learning_rate": 7.791079706557452e-05, "loss": 0.2291, "step": 32065 }, { "epoch": 2.5976992871030458, "grad_norm": 0.06609980016946793, "learning_rate": 7.790629641297989e-05, "loss": 0.2181, "step": 32066 }, { "epoch": 2.5977802981205445, "grad_norm": 0.07265621423721313, "learning_rate": 7.790179576038525e-05, "loss": 0.2465, "step": 32067 }, { "epoch": 2.5978613091380427, "grad_norm": 0.08752243220806122, "learning_rate": 7.789729510779064e-05, "loss": 0.2734, "step": 32068 }, { "epoch": 2.597942320155541, "grad_norm": 0.06559853255748749, "learning_rate": 7.789279445519601e-05, "loss": 0.2437, "step": 32069 }, { "epoch": 2.5980233311730396, "grad_norm": 0.06379378587007523, "learning_rate": 7.788829380260139e-05, "loss": 0.2586, "step": 32070 }, { "epoch": 2.598104342190538, "grad_norm": 0.06516022235155106, "learning_rate": 7.788379315000676e-05, "loss": 0.2178, "step": 32071 }, { "epoch": 2.598185353208036, "grad_norm": 0.0715324804186821, "learning_rate": 7.787929249741213e-05, "loss": 0.2428, "step": 32072 }, { "epoch": 2.598266364225535, "grad_norm": 0.06728719919919968, "learning_rate": 7.78747918448175e-05, "loss": 0.2606, "step": 32073 }, { "epoch": 2.598347375243033, "grad_norm": 0.057596538215875626, "learning_rate": 7.787029119222288e-05, "loss": 0.2282, "step": 32074 }, { "epoch": 2.5984283862605313, "grad_norm": 0.07895933091640472, "learning_rate": 7.786579053962825e-05, "loss": 0.2665, "step": 32075 }, { "epoch": 2.59850939727803, "grad_norm": 0.062324851751327515, "learning_rate": 7.786128988703363e-05, "loss": 0.2022, "step": 32076 }, { "epoch": 2.5985904082955282, "grad_norm": 0.06992025673389435, "learning_rate": 7.7856789234439e-05, "loss": 0.2236, "step": 32077 }, { "epoch": 2.5986714193130265, "grad_norm": 0.0895005464553833, "learning_rate": 7.785228858184437e-05, "loss": 0.233, "step": 32078 }, { "epoch": 2.598752430330525, "grad_norm": 0.06543274968862534, "learning_rate": 7.784778792924975e-05, "loss": 0.2491, "step": 32079 }, { "epoch": 2.5988334413480234, "grad_norm": 0.07061692327260971, "learning_rate": 7.784328727665512e-05, "loss": 0.2302, "step": 32080 }, { "epoch": 2.5989144523655217, "grad_norm": 0.08071654289960861, "learning_rate": 7.78387866240605e-05, "loss": 0.2655, "step": 32081 }, { "epoch": 2.5989954633830203, "grad_norm": 0.06737606227397919, "learning_rate": 7.783428597146587e-05, "loss": 0.234, "step": 32082 }, { "epoch": 2.5990764744005186, "grad_norm": 0.06019715592265129, "learning_rate": 7.782978531887124e-05, "loss": 0.245, "step": 32083 }, { "epoch": 2.599157485418017, "grad_norm": 0.06333910673856735, "learning_rate": 7.782528466627662e-05, "loss": 0.2508, "step": 32084 }, { "epoch": 2.599238496435515, "grad_norm": 0.07445622980594635, "learning_rate": 7.782078401368199e-05, "loss": 0.2456, "step": 32085 }, { "epoch": 2.5993195074530138, "grad_norm": 0.06121855229139328, "learning_rate": 7.781628336108736e-05, "loss": 0.2211, "step": 32086 }, { "epoch": 2.599400518470512, "grad_norm": 0.09158430993556976, "learning_rate": 7.781178270849274e-05, "loss": 0.2805, "step": 32087 }, { "epoch": 2.5994815294880103, "grad_norm": 0.0586605928838253, "learning_rate": 7.780728205589811e-05, "loss": 0.2152, "step": 32088 }, { "epoch": 2.5995625405055085, "grad_norm": 0.08469560742378235, "learning_rate": 7.780278140330348e-05, "loss": 0.2512, "step": 32089 }, { "epoch": 2.599643551523007, "grad_norm": 0.08891402184963226, "learning_rate": 7.779828075070886e-05, "loss": 0.2435, "step": 32090 }, { "epoch": 2.5997245625405054, "grad_norm": 0.05682078003883362, "learning_rate": 7.779378009811423e-05, "loss": 0.1997, "step": 32091 }, { "epoch": 2.5998055735580037, "grad_norm": 0.07719125598669052, "learning_rate": 7.77892794455196e-05, "loss": 0.2507, "step": 32092 }, { "epoch": 2.5998865845755024, "grad_norm": 0.09698209166526794, "learning_rate": 7.778477879292498e-05, "loss": 0.2179, "step": 32093 }, { "epoch": 2.5999675955930006, "grad_norm": 0.05910210683941841, "learning_rate": 7.778027814033035e-05, "loss": 0.2175, "step": 32094 }, { "epoch": 2.600048606610499, "grad_norm": 0.06909825652837753, "learning_rate": 7.777577748773573e-05, "loss": 0.2486, "step": 32095 }, { "epoch": 2.6001296176279975, "grad_norm": 0.0670301616191864, "learning_rate": 7.77712768351411e-05, "loss": 0.2375, "step": 32096 }, { "epoch": 2.600210628645496, "grad_norm": 0.0631147101521492, "learning_rate": 7.776677618254647e-05, "loss": 0.2515, "step": 32097 }, { "epoch": 2.600291639662994, "grad_norm": 0.08409339934587479, "learning_rate": 7.776227552995185e-05, "loss": 0.2635, "step": 32098 }, { "epoch": 2.6003726506804927, "grad_norm": 0.06275641918182373, "learning_rate": 7.775777487735722e-05, "loss": 0.222, "step": 32099 }, { "epoch": 2.600453661697991, "grad_norm": 0.06495209783315659, "learning_rate": 7.77532742247626e-05, "loss": 0.2427, "step": 32100 }, { "epoch": 2.600534672715489, "grad_norm": 0.058924321085214615, "learning_rate": 7.774877357216797e-05, "loss": 0.2369, "step": 32101 }, { "epoch": 2.600615683732988, "grad_norm": 0.06495524197816849, "learning_rate": 7.774427291957334e-05, "loss": 0.2248, "step": 32102 }, { "epoch": 2.600696694750486, "grad_norm": 0.06625855714082718, "learning_rate": 7.773977226697871e-05, "loss": 0.2416, "step": 32103 }, { "epoch": 2.6007777057679844, "grad_norm": 0.06197715550661087, "learning_rate": 7.773527161438409e-05, "loss": 0.2356, "step": 32104 }, { "epoch": 2.600858716785483, "grad_norm": 0.06675733625888824, "learning_rate": 7.773077096178946e-05, "loss": 0.2749, "step": 32105 }, { "epoch": 2.6009397278029813, "grad_norm": 0.06515955924987793, "learning_rate": 7.772627030919483e-05, "loss": 0.2303, "step": 32106 }, { "epoch": 2.6010207388204796, "grad_norm": 0.05678664520382881, "learning_rate": 7.772176965660021e-05, "loss": 0.204, "step": 32107 }, { "epoch": 2.601101749837978, "grad_norm": 0.08410949259996414, "learning_rate": 7.771726900400558e-05, "loss": 0.2382, "step": 32108 }, { "epoch": 2.6011827608554765, "grad_norm": 0.06365490704774857, "learning_rate": 7.771276835141096e-05, "loss": 0.2017, "step": 32109 }, { "epoch": 2.6012637718729748, "grad_norm": 0.07431008666753769, "learning_rate": 7.770826769881633e-05, "loss": 0.2291, "step": 32110 }, { "epoch": 2.601344782890473, "grad_norm": 0.07208096235990524, "learning_rate": 7.77037670462217e-05, "loss": 0.234, "step": 32111 }, { "epoch": 2.6014257939079712, "grad_norm": 0.09047136455774307, "learning_rate": 7.769926639362708e-05, "loss": 0.2459, "step": 32112 }, { "epoch": 2.60150680492547, "grad_norm": 0.06856090575456619, "learning_rate": 7.769476574103245e-05, "loss": 0.2726, "step": 32113 }, { "epoch": 2.601587815942968, "grad_norm": 0.056730642914772034, "learning_rate": 7.769026508843782e-05, "loss": 0.2499, "step": 32114 }, { "epoch": 2.6016688269604664, "grad_norm": 0.0567195862531662, "learning_rate": 7.76857644358432e-05, "loss": 0.2298, "step": 32115 }, { "epoch": 2.601749837977965, "grad_norm": 0.0721188485622406, "learning_rate": 7.768126378324857e-05, "loss": 0.2161, "step": 32116 }, { "epoch": 2.6018308489954634, "grad_norm": 0.08085426688194275, "learning_rate": 7.767676313065394e-05, "loss": 0.2409, "step": 32117 }, { "epoch": 2.6019118600129616, "grad_norm": 0.07815434783697128, "learning_rate": 7.767226247805932e-05, "loss": 0.2103, "step": 32118 }, { "epoch": 2.6019928710304603, "grad_norm": 0.0653507336974144, "learning_rate": 7.766776182546469e-05, "loss": 0.2375, "step": 32119 }, { "epoch": 2.6020738820479585, "grad_norm": 0.05822880193591118, "learning_rate": 7.766326117287007e-05, "loss": 0.2571, "step": 32120 }, { "epoch": 2.6021548930654568, "grad_norm": 0.06808577477931976, "learning_rate": 7.765876052027544e-05, "loss": 0.2179, "step": 32121 }, { "epoch": 2.6022359040829555, "grad_norm": 0.0650370866060257, "learning_rate": 7.765425986768081e-05, "loss": 0.2428, "step": 32122 }, { "epoch": 2.6023169151004537, "grad_norm": 0.07062049210071564, "learning_rate": 7.76497592150862e-05, "loss": 0.2361, "step": 32123 }, { "epoch": 2.602397926117952, "grad_norm": 0.06092534959316254, "learning_rate": 7.764525856249156e-05, "loss": 0.2163, "step": 32124 }, { "epoch": 2.6024789371354506, "grad_norm": 0.06739575415849686, "learning_rate": 7.764075790989695e-05, "loss": 0.2449, "step": 32125 }, { "epoch": 2.602559948152949, "grad_norm": 0.06647944450378418, "learning_rate": 7.763625725730232e-05, "loss": 0.231, "step": 32126 }, { "epoch": 2.602640959170447, "grad_norm": 0.06996641308069229, "learning_rate": 7.763175660470768e-05, "loss": 0.2452, "step": 32127 }, { "epoch": 2.602721970187946, "grad_norm": 0.061999525874853134, "learning_rate": 7.762725595211307e-05, "loss": 0.2371, "step": 32128 }, { "epoch": 2.602802981205444, "grad_norm": 0.06345861405134201, "learning_rate": 7.762275529951844e-05, "loss": 0.2354, "step": 32129 }, { "epoch": 2.6028839922229423, "grad_norm": 0.07594001293182373, "learning_rate": 7.76182546469238e-05, "loss": 0.2262, "step": 32130 }, { "epoch": 2.6029650032404406, "grad_norm": 0.05956905707716942, "learning_rate": 7.761375399432919e-05, "loss": 0.2466, "step": 32131 }, { "epoch": 2.6030460142579392, "grad_norm": 0.07624305039644241, "learning_rate": 7.760925334173456e-05, "loss": 0.2476, "step": 32132 }, { "epoch": 2.6031270252754375, "grad_norm": 0.06887023895978928, "learning_rate": 7.760475268913992e-05, "loss": 0.2213, "step": 32133 }, { "epoch": 2.6032080362929357, "grad_norm": 0.0756741613149643, "learning_rate": 7.760025203654531e-05, "loss": 0.2794, "step": 32134 }, { "epoch": 2.603289047310434, "grad_norm": 0.05459382012486458, "learning_rate": 7.759575138395068e-05, "loss": 0.2318, "step": 32135 }, { "epoch": 2.6033700583279327, "grad_norm": 0.06112325191497803, "learning_rate": 7.759125073135604e-05, "loss": 0.2449, "step": 32136 }, { "epoch": 2.603451069345431, "grad_norm": 0.056865058839321136, "learning_rate": 7.758675007876143e-05, "loss": 0.2248, "step": 32137 }, { "epoch": 2.603532080362929, "grad_norm": 0.07650525867938995, "learning_rate": 7.75822494261668e-05, "loss": 0.2255, "step": 32138 }, { "epoch": 2.603613091380428, "grad_norm": 0.06502238661050797, "learning_rate": 7.757774877357216e-05, "loss": 0.2462, "step": 32139 }, { "epoch": 2.603694102397926, "grad_norm": 0.06768109649419785, "learning_rate": 7.757324812097755e-05, "loss": 0.2303, "step": 32140 }, { "epoch": 2.6037751134154243, "grad_norm": 0.0667257085442543, "learning_rate": 7.756874746838292e-05, "loss": 0.2302, "step": 32141 }, { "epoch": 2.603856124432923, "grad_norm": 0.06509841233491898, "learning_rate": 7.756424681578828e-05, "loss": 0.262, "step": 32142 }, { "epoch": 2.6039371354504213, "grad_norm": 0.06468883901834488, "learning_rate": 7.755974616319367e-05, "loss": 0.2787, "step": 32143 }, { "epoch": 2.6040181464679195, "grad_norm": 0.08520834892988205, "learning_rate": 7.755524551059905e-05, "loss": 0.2821, "step": 32144 }, { "epoch": 2.604099157485418, "grad_norm": 0.06440196186304092, "learning_rate": 7.75507448580044e-05, "loss": 0.225, "step": 32145 }, { "epoch": 2.6041801685029164, "grad_norm": 0.06669966131448746, "learning_rate": 7.754624420540979e-05, "loss": 0.2595, "step": 32146 }, { "epoch": 2.6042611795204147, "grad_norm": 0.08062119036912918, "learning_rate": 7.754174355281517e-05, "loss": 0.2767, "step": 32147 }, { "epoch": 2.6043421905379134, "grad_norm": 0.069743312895298, "learning_rate": 7.753724290022053e-05, "loss": 0.2634, "step": 32148 }, { "epoch": 2.6044232015554116, "grad_norm": 0.06465429812669754, "learning_rate": 7.753274224762591e-05, "loss": 0.2333, "step": 32149 }, { "epoch": 2.60450421257291, "grad_norm": 0.06599178165197372, "learning_rate": 7.752824159503129e-05, "loss": 0.2581, "step": 32150 }, { "epoch": 2.6045852235904086, "grad_norm": 0.06734525412321091, "learning_rate": 7.752374094243666e-05, "loss": 0.2836, "step": 32151 }, { "epoch": 2.604666234607907, "grad_norm": 0.08109408617019653, "learning_rate": 7.751924028984203e-05, "loss": 0.2455, "step": 32152 }, { "epoch": 2.604747245625405, "grad_norm": 0.06604114174842834, "learning_rate": 7.751473963724741e-05, "loss": 0.2375, "step": 32153 }, { "epoch": 2.6048282566429033, "grad_norm": 0.06498434394598007, "learning_rate": 7.751023898465278e-05, "loss": 0.2373, "step": 32154 }, { "epoch": 2.6049092676604015, "grad_norm": 0.06334884464740753, "learning_rate": 7.750573833205816e-05, "loss": 0.2258, "step": 32155 }, { "epoch": 2.6049902786779002, "grad_norm": 0.06946169584989548, "learning_rate": 7.750123767946353e-05, "loss": 0.192, "step": 32156 }, { "epoch": 2.6050712896953985, "grad_norm": 0.06957487761974335, "learning_rate": 7.74967370268689e-05, "loss": 0.257, "step": 32157 }, { "epoch": 2.6051523007128967, "grad_norm": 0.08043936640024185, "learning_rate": 7.749223637427428e-05, "loss": 0.2327, "step": 32158 }, { "epoch": 2.6052333117303954, "grad_norm": 0.06651458889245987, "learning_rate": 7.748773572167965e-05, "loss": 0.2216, "step": 32159 }, { "epoch": 2.6053143227478937, "grad_norm": 0.07017157226800919, "learning_rate": 7.748323506908502e-05, "loss": 0.2383, "step": 32160 }, { "epoch": 2.605395333765392, "grad_norm": 0.07861942052841187, "learning_rate": 7.74787344164904e-05, "loss": 0.2728, "step": 32161 }, { "epoch": 2.6054763447828906, "grad_norm": 0.07645098865032196, "learning_rate": 7.747423376389577e-05, "loss": 0.2531, "step": 32162 }, { "epoch": 2.605557355800389, "grad_norm": 0.06685902923345566, "learning_rate": 7.746973311130114e-05, "loss": 0.2501, "step": 32163 }, { "epoch": 2.605638366817887, "grad_norm": 0.057141318917274475, "learning_rate": 7.746523245870652e-05, "loss": 0.209, "step": 32164 }, { "epoch": 2.6057193778353858, "grad_norm": 0.07345332205295563, "learning_rate": 7.746073180611189e-05, "loss": 0.2316, "step": 32165 }, { "epoch": 2.605800388852884, "grad_norm": 0.07095864415168762, "learning_rate": 7.745623115351726e-05, "loss": 0.2351, "step": 32166 }, { "epoch": 2.6058813998703823, "grad_norm": 0.0770629495382309, "learning_rate": 7.745173050092264e-05, "loss": 0.2171, "step": 32167 }, { "epoch": 2.605962410887881, "grad_norm": 0.0654478594660759, "learning_rate": 7.744722984832801e-05, "loss": 0.2113, "step": 32168 }, { "epoch": 2.606043421905379, "grad_norm": 0.07980560511350632, "learning_rate": 7.744272919573339e-05, "loss": 0.2671, "step": 32169 }, { "epoch": 2.6061244329228774, "grad_norm": 0.06946124136447906, "learning_rate": 7.743822854313876e-05, "loss": 0.2077, "step": 32170 }, { "epoch": 2.606205443940376, "grad_norm": 0.07111818343400955, "learning_rate": 7.743372789054413e-05, "loss": 0.2573, "step": 32171 }, { "epoch": 2.6062864549578744, "grad_norm": 0.06737150251865387, "learning_rate": 7.74292272379495e-05, "loss": 0.2075, "step": 32172 }, { "epoch": 2.6063674659753726, "grad_norm": 0.08232983946800232, "learning_rate": 7.742472658535488e-05, "loss": 0.2885, "step": 32173 }, { "epoch": 2.6064484769928713, "grad_norm": 0.0650903508067131, "learning_rate": 7.742022593276025e-05, "loss": 0.2218, "step": 32174 }, { "epoch": 2.6065294880103695, "grad_norm": 0.05715804547071457, "learning_rate": 7.741572528016563e-05, "loss": 0.2562, "step": 32175 }, { "epoch": 2.606610499027868, "grad_norm": 0.08915777504444122, "learning_rate": 7.7411224627571e-05, "loss": 0.2583, "step": 32176 }, { "epoch": 2.606691510045366, "grad_norm": 0.060810547322034836, "learning_rate": 7.740672397497637e-05, "loss": 0.2595, "step": 32177 }, { "epoch": 2.6067725210628643, "grad_norm": 0.06770209223031998, "learning_rate": 7.740222332238175e-05, "loss": 0.245, "step": 32178 }, { "epoch": 2.606853532080363, "grad_norm": 0.07107766717672348, "learning_rate": 7.739772266978712e-05, "loss": 0.2365, "step": 32179 }, { "epoch": 2.606934543097861, "grad_norm": 0.064228855073452, "learning_rate": 7.73932220171925e-05, "loss": 0.1962, "step": 32180 }, { "epoch": 2.6070155541153595, "grad_norm": 0.0662747398018837, "learning_rate": 7.738872136459787e-05, "loss": 0.2428, "step": 32181 }, { "epoch": 2.607096565132858, "grad_norm": 0.06631524860858917, "learning_rate": 7.738422071200324e-05, "loss": 0.2313, "step": 32182 }, { "epoch": 2.6071775761503564, "grad_norm": 0.06557276844978333, "learning_rate": 7.737972005940862e-05, "loss": 0.2079, "step": 32183 }, { "epoch": 2.6072585871678546, "grad_norm": 0.06969014555215836, "learning_rate": 7.737521940681399e-05, "loss": 0.2213, "step": 32184 }, { "epoch": 2.6073395981853533, "grad_norm": 0.05485299229621887, "learning_rate": 7.737071875421936e-05, "loss": 0.2153, "step": 32185 }, { "epoch": 2.6074206092028516, "grad_norm": 0.06255738437175751, "learning_rate": 7.736621810162474e-05, "loss": 0.2748, "step": 32186 }, { "epoch": 2.60750162022035, "grad_norm": 0.07666832953691483, "learning_rate": 7.736171744903011e-05, "loss": 0.2586, "step": 32187 }, { "epoch": 2.6075826312378485, "grad_norm": 0.07308944314718246, "learning_rate": 7.735721679643548e-05, "loss": 0.2681, "step": 32188 }, { "epoch": 2.6076636422553467, "grad_norm": 0.0682573989033699, "learning_rate": 7.735271614384086e-05, "loss": 0.2337, "step": 32189 }, { "epoch": 2.607744653272845, "grad_norm": 0.08183484524488449, "learning_rate": 7.734821549124623e-05, "loss": 0.2215, "step": 32190 }, { "epoch": 2.6078256642903437, "grad_norm": 0.08453409373760223, "learning_rate": 7.73437148386516e-05, "loss": 0.2535, "step": 32191 }, { "epoch": 2.607906675307842, "grad_norm": 0.07045777142047882, "learning_rate": 7.733921418605699e-05, "loss": 0.2249, "step": 32192 }, { "epoch": 2.60798768632534, "grad_norm": 0.060405831784009933, "learning_rate": 7.733471353346235e-05, "loss": 0.2345, "step": 32193 }, { "epoch": 2.608068697342839, "grad_norm": 0.08729273080825806, "learning_rate": 7.733021288086773e-05, "loss": 0.2365, "step": 32194 }, { "epoch": 2.608149708360337, "grad_norm": 0.06557916104793549, "learning_rate": 7.732571222827311e-05, "loss": 0.2168, "step": 32195 }, { "epoch": 2.6082307193778353, "grad_norm": 0.07198052853345871, "learning_rate": 7.732121157567847e-05, "loss": 0.2444, "step": 32196 }, { "epoch": 2.608311730395334, "grad_norm": 0.07454212754964828, "learning_rate": 7.731671092308385e-05, "loss": 0.2326, "step": 32197 }, { "epoch": 2.6083927414128323, "grad_norm": 0.07987958192825317, "learning_rate": 7.731221027048923e-05, "loss": 0.2359, "step": 32198 }, { "epoch": 2.6084737524303305, "grad_norm": 0.07974809408187866, "learning_rate": 7.73077096178946e-05, "loss": 0.257, "step": 32199 }, { "epoch": 2.6085547634478288, "grad_norm": 0.0739462673664093, "learning_rate": 7.730320896529997e-05, "loss": 0.2693, "step": 32200 }, { "epoch": 2.608635774465327, "grad_norm": 0.06772410869598389, "learning_rate": 7.729870831270535e-05, "loss": 0.214, "step": 32201 }, { "epoch": 2.6087167854828257, "grad_norm": 0.06608753651380539, "learning_rate": 7.729420766011071e-05, "loss": 0.2142, "step": 32202 }, { "epoch": 2.608797796500324, "grad_norm": 0.06899905949831009, "learning_rate": 7.72897070075161e-05, "loss": 0.2072, "step": 32203 }, { "epoch": 2.608878807517822, "grad_norm": 0.07096538692712784, "learning_rate": 7.728520635492148e-05, "loss": 0.261, "step": 32204 }, { "epoch": 2.608959818535321, "grad_norm": 0.08852484077215195, "learning_rate": 7.728070570232684e-05, "loss": 0.2959, "step": 32205 }, { "epoch": 2.609040829552819, "grad_norm": 0.0703040286898613, "learning_rate": 7.727620504973222e-05, "loss": 0.3129, "step": 32206 }, { "epoch": 2.6091218405703174, "grad_norm": 0.06888163834810257, "learning_rate": 7.72717043971376e-05, "loss": 0.2657, "step": 32207 }, { "epoch": 2.609202851587816, "grad_norm": 0.06287510693073273, "learning_rate": 7.726720374454296e-05, "loss": 0.232, "step": 32208 }, { "epoch": 2.6092838626053143, "grad_norm": 0.06748857349157333, "learning_rate": 7.726270309194834e-05, "loss": 0.2416, "step": 32209 }, { "epoch": 2.6093648736228126, "grad_norm": 0.07733192294836044, "learning_rate": 7.725820243935372e-05, "loss": 0.2529, "step": 32210 }, { "epoch": 2.6094458846403112, "grad_norm": 0.06905055791139603, "learning_rate": 7.725370178675908e-05, "loss": 0.2513, "step": 32211 }, { "epoch": 2.6095268956578095, "grad_norm": 0.06713546812534332, "learning_rate": 7.724920113416446e-05, "loss": 0.2609, "step": 32212 }, { "epoch": 2.6096079066753077, "grad_norm": 0.04904366284608841, "learning_rate": 7.724470048156984e-05, "loss": 0.2258, "step": 32213 }, { "epoch": 2.6096889176928064, "grad_norm": 0.0761357843875885, "learning_rate": 7.72401998289752e-05, "loss": 0.25, "step": 32214 }, { "epoch": 2.6097699287103047, "grad_norm": 0.06284836679697037, "learning_rate": 7.723569917638058e-05, "loss": 0.2252, "step": 32215 }, { "epoch": 2.609850939727803, "grad_norm": 0.06382585316896439, "learning_rate": 7.723119852378596e-05, "loss": 0.2554, "step": 32216 }, { "epoch": 2.6099319507453016, "grad_norm": 0.061265233904123306, "learning_rate": 7.722669787119132e-05, "loss": 0.2382, "step": 32217 }, { "epoch": 2.6100129617628, "grad_norm": 0.06161702796816826, "learning_rate": 7.72221972185967e-05, "loss": 0.2122, "step": 32218 }, { "epoch": 2.610093972780298, "grad_norm": 0.06948524713516235, "learning_rate": 7.721769656600208e-05, "loss": 0.2784, "step": 32219 }, { "epoch": 2.6101749837977968, "grad_norm": 0.06408175826072693, "learning_rate": 7.721319591340744e-05, "loss": 0.2285, "step": 32220 }, { "epoch": 2.610255994815295, "grad_norm": 0.08492530882358551, "learning_rate": 7.720869526081283e-05, "loss": 0.2356, "step": 32221 }, { "epoch": 2.6103370058327933, "grad_norm": 0.06570692360401154, "learning_rate": 7.72041946082182e-05, "loss": 0.2748, "step": 32222 }, { "epoch": 2.6104180168502915, "grad_norm": 0.0714087188243866, "learning_rate": 7.719969395562356e-05, "loss": 0.2488, "step": 32223 }, { "epoch": 2.6104990278677898, "grad_norm": 0.07423436641693115, "learning_rate": 7.719519330302895e-05, "loss": 0.2216, "step": 32224 }, { "epoch": 2.6105800388852884, "grad_norm": 0.05890345573425293, "learning_rate": 7.719069265043432e-05, "loss": 0.2195, "step": 32225 }, { "epoch": 2.6106610499027867, "grad_norm": 0.0739654079079628, "learning_rate": 7.718619199783968e-05, "loss": 0.2489, "step": 32226 }, { "epoch": 2.610742060920285, "grad_norm": 0.07757818698883057, "learning_rate": 7.718169134524507e-05, "loss": 0.259, "step": 32227 }, { "epoch": 2.6108230719377836, "grad_norm": 0.0802304819226265, "learning_rate": 7.717719069265044e-05, "loss": 0.2567, "step": 32228 }, { "epoch": 2.610904082955282, "grad_norm": 0.06230137497186661, "learning_rate": 7.717269004005582e-05, "loss": 0.2389, "step": 32229 }, { "epoch": 2.61098509397278, "grad_norm": 0.07052315026521683, "learning_rate": 7.716818938746119e-05, "loss": 0.2591, "step": 32230 }, { "epoch": 2.611066104990279, "grad_norm": 0.07671262323856354, "learning_rate": 7.716368873486656e-05, "loss": 0.243, "step": 32231 }, { "epoch": 2.611147116007777, "grad_norm": 0.06278721243143082, "learning_rate": 7.715918808227194e-05, "loss": 0.2128, "step": 32232 }, { "epoch": 2.6112281270252753, "grad_norm": 0.098354272544384, "learning_rate": 7.715468742967731e-05, "loss": 0.2761, "step": 32233 }, { "epoch": 2.611309138042774, "grad_norm": 0.07634606957435608, "learning_rate": 7.715018677708268e-05, "loss": 0.262, "step": 32234 }, { "epoch": 2.6113901490602722, "grad_norm": 0.07335880398750305, "learning_rate": 7.714568612448806e-05, "loss": 0.233, "step": 32235 }, { "epoch": 2.6114711600777705, "grad_norm": 0.05759400501847267, "learning_rate": 7.714118547189343e-05, "loss": 0.208, "step": 32236 }, { "epoch": 2.611552171095269, "grad_norm": 0.07161126285791397, "learning_rate": 7.71366848192988e-05, "loss": 0.2126, "step": 32237 }, { "epoch": 2.6116331821127674, "grad_norm": 0.07215110957622528, "learning_rate": 7.713218416670418e-05, "loss": 0.2435, "step": 32238 }, { "epoch": 2.6117141931302656, "grad_norm": 0.05700969696044922, "learning_rate": 7.712768351410955e-05, "loss": 0.2107, "step": 32239 }, { "epoch": 2.6117952041477643, "grad_norm": 0.0639571100473404, "learning_rate": 7.712318286151492e-05, "loss": 0.2427, "step": 32240 }, { "epoch": 2.6118762151652626, "grad_norm": 0.06004498153924942, "learning_rate": 7.71186822089203e-05, "loss": 0.246, "step": 32241 }, { "epoch": 2.611957226182761, "grad_norm": 0.059504538774490356, "learning_rate": 7.711418155632567e-05, "loss": 0.2312, "step": 32242 }, { "epoch": 2.612038237200259, "grad_norm": 0.08116213977336884, "learning_rate": 7.710968090373105e-05, "loss": 0.305, "step": 32243 }, { "epoch": 2.6121192482177578, "grad_norm": 0.06468251347541809, "learning_rate": 7.710518025113642e-05, "loss": 0.2377, "step": 32244 }, { "epoch": 2.612200259235256, "grad_norm": 0.06595216691493988, "learning_rate": 7.710067959854179e-05, "loss": 0.2472, "step": 32245 }, { "epoch": 2.6122812702527543, "grad_norm": 0.07535982877016068, "learning_rate": 7.709617894594717e-05, "loss": 0.2429, "step": 32246 }, { "epoch": 2.6123622812702525, "grad_norm": 0.07220283150672913, "learning_rate": 7.709167829335254e-05, "loss": 0.2217, "step": 32247 }, { "epoch": 2.612443292287751, "grad_norm": 0.07300985604524612, "learning_rate": 7.708717764075791e-05, "loss": 0.2433, "step": 32248 }, { "epoch": 2.6125243033052494, "grad_norm": 0.06394525617361069, "learning_rate": 7.708267698816329e-05, "loss": 0.2997, "step": 32249 }, { "epoch": 2.6126053143227477, "grad_norm": 0.060523077845573425, "learning_rate": 7.707817633556866e-05, "loss": 0.2195, "step": 32250 }, { "epoch": 2.6126863253402464, "grad_norm": 0.06264317780733109, "learning_rate": 7.707367568297403e-05, "loss": 0.2492, "step": 32251 }, { "epoch": 2.6127673363577446, "grad_norm": 0.06706266105175018, "learning_rate": 7.706917503037941e-05, "loss": 0.2164, "step": 32252 }, { "epoch": 2.612848347375243, "grad_norm": 0.06442420929670334, "learning_rate": 7.706467437778478e-05, "loss": 0.2514, "step": 32253 }, { "epoch": 2.6129293583927415, "grad_norm": 0.08338776230812073, "learning_rate": 7.706017372519016e-05, "loss": 0.2873, "step": 32254 }, { "epoch": 2.61301036941024, "grad_norm": 0.08646774291992188, "learning_rate": 7.705567307259553e-05, "loss": 0.2131, "step": 32255 }, { "epoch": 2.613091380427738, "grad_norm": 0.060562461614608765, "learning_rate": 7.70511724200009e-05, "loss": 0.2104, "step": 32256 }, { "epoch": 2.6131723914452367, "grad_norm": 0.06581650674343109, "learning_rate": 7.704667176740628e-05, "loss": 0.2556, "step": 32257 }, { "epoch": 2.613253402462735, "grad_norm": 0.0644855946302414, "learning_rate": 7.704217111481165e-05, "loss": 0.244, "step": 32258 }, { "epoch": 2.613334413480233, "grad_norm": 0.06705068796873093, "learning_rate": 7.703767046221702e-05, "loss": 0.2729, "step": 32259 }, { "epoch": 2.613415424497732, "grad_norm": 0.06724876910448074, "learning_rate": 7.70331698096224e-05, "loss": 0.2644, "step": 32260 }, { "epoch": 2.61349643551523, "grad_norm": 0.0717342346906662, "learning_rate": 7.702866915702778e-05, "loss": 0.2399, "step": 32261 }, { "epoch": 2.6135774465327284, "grad_norm": 0.06799786537885666, "learning_rate": 7.702416850443314e-05, "loss": 0.2561, "step": 32262 }, { "epoch": 2.613658457550227, "grad_norm": 0.06930407881736755, "learning_rate": 7.701966785183852e-05, "loss": 0.2153, "step": 32263 }, { "epoch": 2.6137394685677253, "grad_norm": 0.07747004181146622, "learning_rate": 7.70151671992439e-05, "loss": 0.2524, "step": 32264 }, { "epoch": 2.6138204795852236, "grad_norm": 0.06788234412670135, "learning_rate": 7.701066654664926e-05, "loss": 0.2446, "step": 32265 }, { "epoch": 2.613901490602722, "grad_norm": 0.06565559655427933, "learning_rate": 7.700616589405464e-05, "loss": 0.2925, "step": 32266 }, { "epoch": 2.6139825016202205, "grad_norm": 0.06663317233324051, "learning_rate": 7.700166524146003e-05, "loss": 0.2257, "step": 32267 }, { "epoch": 2.6140635126377187, "grad_norm": 0.07271160930395126, "learning_rate": 7.699716458886539e-05, "loss": 0.2135, "step": 32268 }, { "epoch": 2.614144523655217, "grad_norm": 0.08005572110414505, "learning_rate": 7.699266393627076e-05, "loss": 0.2594, "step": 32269 }, { "epoch": 2.6142255346727152, "grad_norm": 0.08294783532619476, "learning_rate": 7.698816328367615e-05, "loss": 0.2594, "step": 32270 }, { "epoch": 2.614306545690214, "grad_norm": 0.07596366107463837, "learning_rate": 7.69836626310815e-05, "loss": 0.2699, "step": 32271 }, { "epoch": 2.614387556707712, "grad_norm": 0.08953236043453217, "learning_rate": 7.697916197848688e-05, "loss": 0.2384, "step": 32272 }, { "epoch": 2.6144685677252104, "grad_norm": 0.07046157121658325, "learning_rate": 7.697466132589227e-05, "loss": 0.2189, "step": 32273 }, { "epoch": 2.614549578742709, "grad_norm": 0.05413123220205307, "learning_rate": 7.697016067329763e-05, "loss": 0.2373, "step": 32274 }, { "epoch": 2.6146305897602073, "grad_norm": 0.07471950352191925, "learning_rate": 7.6965660020703e-05, "loss": 0.2546, "step": 32275 }, { "epoch": 2.6147116007777056, "grad_norm": 0.06346073001623154, "learning_rate": 7.696115936810839e-05, "loss": 0.2304, "step": 32276 }, { "epoch": 2.6147926117952043, "grad_norm": 0.06895754486322403, "learning_rate": 7.695665871551375e-05, "loss": 0.2521, "step": 32277 }, { "epoch": 2.6148736228127025, "grad_norm": 0.0746992975473404, "learning_rate": 7.695215806291912e-05, "loss": 0.2486, "step": 32278 }, { "epoch": 2.6149546338302008, "grad_norm": 0.063937708735466, "learning_rate": 7.694765741032451e-05, "loss": 0.2435, "step": 32279 }, { "epoch": 2.6150356448476995, "grad_norm": 0.07269015163183212, "learning_rate": 7.694315675772987e-05, "loss": 0.2738, "step": 32280 }, { "epoch": 2.6151166558651977, "grad_norm": 0.09423906356096268, "learning_rate": 7.693865610513524e-05, "loss": 0.2198, "step": 32281 }, { "epoch": 2.615197666882696, "grad_norm": 0.06471596658229828, "learning_rate": 7.693415545254063e-05, "loss": 0.2556, "step": 32282 }, { "epoch": 2.6152786779001946, "grad_norm": 0.0668293684720993, "learning_rate": 7.692965479994599e-05, "loss": 0.2652, "step": 32283 }, { "epoch": 2.615359688917693, "grad_norm": 0.06607384234666824, "learning_rate": 7.692515414735138e-05, "loss": 0.2113, "step": 32284 }, { "epoch": 2.615440699935191, "grad_norm": 0.07677590101957321, "learning_rate": 7.692065349475675e-05, "loss": 0.2479, "step": 32285 }, { "epoch": 2.61552171095269, "grad_norm": 0.06859259307384491, "learning_rate": 7.691615284216211e-05, "loss": 0.2425, "step": 32286 }, { "epoch": 2.615602721970188, "grad_norm": 0.0541251078248024, "learning_rate": 7.69116521895675e-05, "loss": 0.2511, "step": 32287 }, { "epoch": 2.6156837329876863, "grad_norm": 0.0717424750328064, "learning_rate": 7.690715153697287e-05, "loss": 0.2662, "step": 32288 }, { "epoch": 2.6157647440051845, "grad_norm": 0.07615697383880615, "learning_rate": 7.690265088437823e-05, "loss": 0.2544, "step": 32289 }, { "epoch": 2.6158457550226832, "grad_norm": 0.061555415391922, "learning_rate": 7.689815023178362e-05, "loss": 0.22, "step": 32290 }, { "epoch": 2.6159267660401815, "grad_norm": 0.07001828402280807, "learning_rate": 7.689364957918899e-05, "loss": 0.2428, "step": 32291 }, { "epoch": 2.6160077770576797, "grad_norm": 0.0808320865035057, "learning_rate": 7.688914892659435e-05, "loss": 0.2334, "step": 32292 }, { "epoch": 2.616088788075178, "grad_norm": 0.06133532524108887, "learning_rate": 7.688464827399974e-05, "loss": 0.2588, "step": 32293 }, { "epoch": 2.6161697990926767, "grad_norm": 0.07468949258327484, "learning_rate": 7.688014762140511e-05, "loss": 0.244, "step": 32294 }, { "epoch": 2.616250810110175, "grad_norm": 0.061842817813158035, "learning_rate": 7.687564696881047e-05, "loss": 0.2684, "step": 32295 }, { "epoch": 2.616331821127673, "grad_norm": 0.0641714558005333, "learning_rate": 7.687114631621586e-05, "loss": 0.2385, "step": 32296 }, { "epoch": 2.616412832145172, "grad_norm": 0.08465207368135452, "learning_rate": 7.686664566362123e-05, "loss": 0.2598, "step": 32297 }, { "epoch": 2.61649384316267, "grad_norm": 0.09201711416244507, "learning_rate": 7.68621450110266e-05, "loss": 0.2785, "step": 32298 }, { "epoch": 2.6165748541801683, "grad_norm": 0.06847358494997025, "learning_rate": 7.685764435843198e-05, "loss": 0.2386, "step": 32299 }, { "epoch": 2.616655865197667, "grad_norm": 0.07034248858690262, "learning_rate": 7.685314370583735e-05, "loss": 0.2418, "step": 32300 }, { "epoch": 2.6167368762151653, "grad_norm": 0.06319961696863174, "learning_rate": 7.684864305324271e-05, "loss": 0.2412, "step": 32301 }, { "epoch": 2.6168178872326635, "grad_norm": 0.10452283173799515, "learning_rate": 7.68441424006481e-05, "loss": 0.199, "step": 32302 }, { "epoch": 2.616898898250162, "grad_norm": 0.06628091633319855, "learning_rate": 7.683964174805348e-05, "loss": 0.2441, "step": 32303 }, { "epoch": 2.6169799092676604, "grad_norm": 0.08092938363552094, "learning_rate": 7.683514109545884e-05, "loss": 0.2497, "step": 32304 }, { "epoch": 2.6170609202851587, "grad_norm": 0.054823897778987885, "learning_rate": 7.683064044286422e-05, "loss": 0.2224, "step": 32305 }, { "epoch": 2.6171419313026574, "grad_norm": 0.08330629765987396, "learning_rate": 7.68261397902696e-05, "loss": 0.2496, "step": 32306 }, { "epoch": 2.6172229423201556, "grad_norm": 0.06323496997356415, "learning_rate": 7.682163913767496e-05, "loss": 0.2206, "step": 32307 }, { "epoch": 2.617303953337654, "grad_norm": 0.07369324564933777, "learning_rate": 7.681713848508034e-05, "loss": 0.2567, "step": 32308 }, { "epoch": 2.6173849643551526, "grad_norm": 0.07172773033380508, "learning_rate": 7.681263783248572e-05, "loss": 0.2587, "step": 32309 }, { "epoch": 2.617465975372651, "grad_norm": 0.07234320789575577, "learning_rate": 7.680813717989109e-05, "loss": 0.2255, "step": 32310 }, { "epoch": 2.617546986390149, "grad_norm": 0.07661251723766327, "learning_rate": 7.680363652729646e-05, "loss": 0.2329, "step": 32311 }, { "epoch": 2.6176279974076473, "grad_norm": 0.0664915069937706, "learning_rate": 7.679913587470184e-05, "loss": 0.2622, "step": 32312 }, { "epoch": 2.617709008425146, "grad_norm": 0.07211392372846603, "learning_rate": 7.679463522210721e-05, "loss": 0.2687, "step": 32313 }, { "epoch": 2.6177900194426442, "grad_norm": 0.06264899671077728, "learning_rate": 7.679013456951259e-05, "loss": 0.21, "step": 32314 }, { "epoch": 2.6178710304601425, "grad_norm": 0.06314536929130554, "learning_rate": 7.678563391691796e-05, "loss": 0.2079, "step": 32315 }, { "epoch": 2.6179520414776407, "grad_norm": 0.07251273095607758, "learning_rate": 7.678113326432333e-05, "loss": 0.2262, "step": 32316 }, { "epoch": 2.6180330524951394, "grad_norm": 0.06271009147167206, "learning_rate": 7.67766326117287e-05, "loss": 0.2423, "step": 32317 }, { "epoch": 2.6181140635126376, "grad_norm": 0.06633373349905014, "learning_rate": 7.677213195913408e-05, "loss": 0.2674, "step": 32318 }, { "epoch": 2.618195074530136, "grad_norm": 0.06320098787546158, "learning_rate": 7.676763130653945e-05, "loss": 0.2256, "step": 32319 }, { "epoch": 2.6182760855476346, "grad_norm": 0.06923279911279678, "learning_rate": 7.676313065394483e-05, "loss": 0.2426, "step": 32320 }, { "epoch": 2.618357096565133, "grad_norm": 0.07858932018280029, "learning_rate": 7.67586300013502e-05, "loss": 0.2355, "step": 32321 }, { "epoch": 2.618438107582631, "grad_norm": 0.06619489938020706, "learning_rate": 7.675412934875557e-05, "loss": 0.1963, "step": 32322 }, { "epoch": 2.6185191186001298, "grad_norm": 0.06854096055030823, "learning_rate": 7.674962869616095e-05, "loss": 0.2577, "step": 32323 }, { "epoch": 2.618600129617628, "grad_norm": 0.06553176045417786, "learning_rate": 7.674512804356632e-05, "loss": 0.2607, "step": 32324 }, { "epoch": 2.6186811406351262, "grad_norm": 0.06606678664684296, "learning_rate": 7.67406273909717e-05, "loss": 0.2687, "step": 32325 }, { "epoch": 2.618762151652625, "grad_norm": 0.08767323195934296, "learning_rate": 7.673612673837707e-05, "loss": 0.2404, "step": 32326 }, { "epoch": 2.618843162670123, "grad_norm": 0.07807376980781555, "learning_rate": 7.673162608578244e-05, "loss": 0.246, "step": 32327 }, { "epoch": 2.6189241736876214, "grad_norm": 0.11678686738014221, "learning_rate": 7.672712543318782e-05, "loss": 0.2686, "step": 32328 }, { "epoch": 2.61900518470512, "grad_norm": 0.07521022111177444, "learning_rate": 7.672262478059319e-05, "loss": 0.2316, "step": 32329 }, { "epoch": 2.6190861957226184, "grad_norm": 0.06827811151742935, "learning_rate": 7.671812412799856e-05, "loss": 0.2324, "step": 32330 }, { "epoch": 2.6191672067401166, "grad_norm": 0.06705344468355179, "learning_rate": 7.671362347540394e-05, "loss": 0.224, "step": 32331 }, { "epoch": 2.6192482177576153, "grad_norm": 0.06198614090681076, "learning_rate": 7.670912282280931e-05, "loss": 0.2539, "step": 32332 }, { "epoch": 2.6193292287751135, "grad_norm": 0.09031055867671967, "learning_rate": 7.670462217021468e-05, "loss": 0.2687, "step": 32333 }, { "epoch": 2.619410239792612, "grad_norm": 0.06712203472852707, "learning_rate": 7.670012151762006e-05, "loss": 0.2179, "step": 32334 }, { "epoch": 2.61949125081011, "grad_norm": 0.05460682138800621, "learning_rate": 7.669562086502543e-05, "loss": 0.2411, "step": 32335 }, { "epoch": 2.6195722618276087, "grad_norm": 0.08100723475217819, "learning_rate": 7.669112021243082e-05, "loss": 0.2147, "step": 32336 }, { "epoch": 2.619653272845107, "grad_norm": 0.06605152785778046, "learning_rate": 7.668661955983618e-05, "loss": 0.264, "step": 32337 }, { "epoch": 2.619734283862605, "grad_norm": 0.06912678480148315, "learning_rate": 7.668211890724155e-05, "loss": 0.2526, "step": 32338 }, { "epoch": 2.6198152948801035, "grad_norm": 0.07025929540395737, "learning_rate": 7.667761825464694e-05, "loss": 0.2403, "step": 32339 }, { "epoch": 2.619896305897602, "grad_norm": 0.06496301293373108, "learning_rate": 7.66731176020523e-05, "loss": 0.2339, "step": 32340 }, { "epoch": 2.6199773169151004, "grad_norm": 0.06952445209026337, "learning_rate": 7.666861694945767e-05, "loss": 0.2371, "step": 32341 }, { "epoch": 2.6200583279325986, "grad_norm": 0.06544504314661026, "learning_rate": 7.666411629686306e-05, "loss": 0.2753, "step": 32342 }, { "epoch": 2.6201393389500973, "grad_norm": 0.07840461283922195, "learning_rate": 7.665961564426842e-05, "loss": 0.2742, "step": 32343 }, { "epoch": 2.6202203499675956, "grad_norm": 0.07411501556634903, "learning_rate": 7.665511499167379e-05, "loss": 0.2433, "step": 32344 }, { "epoch": 2.620301360985094, "grad_norm": 0.06554578244686127, "learning_rate": 7.665061433907918e-05, "loss": 0.216, "step": 32345 }, { "epoch": 2.6203823720025925, "grad_norm": 0.06754684448242188, "learning_rate": 7.664611368648454e-05, "loss": 0.2724, "step": 32346 }, { "epoch": 2.6204633830200907, "grad_norm": 0.061257973313331604, "learning_rate": 7.664161303388991e-05, "loss": 0.2091, "step": 32347 }, { "epoch": 2.620544394037589, "grad_norm": 0.07244864851236343, "learning_rate": 7.66371123812953e-05, "loss": 0.2419, "step": 32348 }, { "epoch": 2.6206254050550877, "grad_norm": 0.07496330887079239, "learning_rate": 7.663261172870066e-05, "loss": 0.2521, "step": 32349 }, { "epoch": 2.620706416072586, "grad_norm": 0.07110787183046341, "learning_rate": 7.662811107610603e-05, "loss": 0.2727, "step": 32350 }, { "epoch": 2.620787427090084, "grad_norm": 0.05446374788880348, "learning_rate": 7.662361042351142e-05, "loss": 0.2528, "step": 32351 }, { "epoch": 2.620868438107583, "grad_norm": 0.06342311948537827, "learning_rate": 7.661910977091678e-05, "loss": 0.2287, "step": 32352 }, { "epoch": 2.620949449125081, "grad_norm": 0.06803973764181137, "learning_rate": 7.661460911832216e-05, "loss": 0.2491, "step": 32353 }, { "epoch": 2.6210304601425793, "grad_norm": 0.0856875330209732, "learning_rate": 7.661010846572754e-05, "loss": 0.2084, "step": 32354 }, { "epoch": 2.621111471160078, "grad_norm": 0.06051705405116081, "learning_rate": 7.66056078131329e-05, "loss": 0.2578, "step": 32355 }, { "epoch": 2.6211924821775763, "grad_norm": 0.08695938438177109, "learning_rate": 7.660110716053828e-05, "loss": 0.3125, "step": 32356 }, { "epoch": 2.6212734931950745, "grad_norm": 0.0813361182808876, "learning_rate": 7.659660650794366e-05, "loss": 0.2567, "step": 32357 }, { "epoch": 2.6213545042125728, "grad_norm": 0.08723953366279602, "learning_rate": 7.659210585534902e-05, "loss": 0.2701, "step": 32358 }, { "epoch": 2.6214355152300715, "grad_norm": 0.06307353079319, "learning_rate": 7.65876052027544e-05, "loss": 0.1899, "step": 32359 }, { "epoch": 2.6215165262475697, "grad_norm": 0.07490169256925583, "learning_rate": 7.658310455015978e-05, "loss": 0.232, "step": 32360 }, { "epoch": 2.621597537265068, "grad_norm": 0.07667241990566254, "learning_rate": 7.657860389756514e-05, "loss": 0.25, "step": 32361 }, { "epoch": 2.621678548282566, "grad_norm": 0.06483347713947296, "learning_rate": 7.657410324497053e-05, "loss": 0.2403, "step": 32362 }, { "epoch": 2.621759559300065, "grad_norm": 0.06192995235323906, "learning_rate": 7.65696025923759e-05, "loss": 0.2547, "step": 32363 }, { "epoch": 2.621840570317563, "grad_norm": 0.06280378252267838, "learning_rate": 7.656510193978127e-05, "loss": 0.2088, "step": 32364 }, { "epoch": 2.6219215813350614, "grad_norm": 0.07592108845710754, "learning_rate": 7.656060128718665e-05, "loss": 0.239, "step": 32365 }, { "epoch": 2.62200259235256, "grad_norm": 0.06386705487966537, "learning_rate": 7.655610063459203e-05, "loss": 0.2509, "step": 32366 }, { "epoch": 2.6220836033700583, "grad_norm": 0.06451716274023056, "learning_rate": 7.655159998199739e-05, "loss": 0.2352, "step": 32367 }, { "epoch": 2.6221646143875565, "grad_norm": 0.07501220703125, "learning_rate": 7.654709932940277e-05, "loss": 0.2633, "step": 32368 }, { "epoch": 2.6222456254050552, "grad_norm": 0.07946756482124329, "learning_rate": 7.654259867680815e-05, "loss": 0.2367, "step": 32369 }, { "epoch": 2.6223266364225535, "grad_norm": 0.05955003201961517, "learning_rate": 7.653809802421351e-05, "loss": 0.2201, "step": 32370 }, { "epoch": 2.6224076474400517, "grad_norm": 0.07076594978570938, "learning_rate": 7.65335973716189e-05, "loss": 0.2268, "step": 32371 }, { "epoch": 2.6224886584575504, "grad_norm": 0.06496407836675644, "learning_rate": 7.652909671902427e-05, "loss": 0.2207, "step": 32372 }, { "epoch": 2.6225696694750487, "grad_norm": 0.058163098990917206, "learning_rate": 7.652459606642963e-05, "loss": 0.2196, "step": 32373 }, { "epoch": 2.622650680492547, "grad_norm": 0.07921572774648666, "learning_rate": 7.652009541383501e-05, "loss": 0.2733, "step": 32374 }, { "epoch": 2.6227316915100456, "grad_norm": 0.07185711711645126, "learning_rate": 7.651559476124039e-05, "loss": 0.2641, "step": 32375 }, { "epoch": 2.622812702527544, "grad_norm": 0.07281036674976349, "learning_rate": 7.651109410864575e-05, "loss": 0.2401, "step": 32376 }, { "epoch": 2.622893713545042, "grad_norm": 0.06288488209247589, "learning_rate": 7.650659345605114e-05, "loss": 0.2117, "step": 32377 }, { "epoch": 2.6229747245625408, "grad_norm": 0.07066594809293747, "learning_rate": 7.650209280345651e-05, "loss": 0.2199, "step": 32378 }, { "epoch": 2.623055735580039, "grad_norm": 0.05946981906890869, "learning_rate": 7.649759215086187e-05, "loss": 0.2398, "step": 32379 }, { "epoch": 2.6231367465975373, "grad_norm": 0.07342832535505295, "learning_rate": 7.649309149826726e-05, "loss": 0.2429, "step": 32380 }, { "epoch": 2.6232177576150355, "grad_norm": 0.06416358798742294, "learning_rate": 7.648859084567263e-05, "loss": 0.2544, "step": 32381 }, { "epoch": 2.6232987686325338, "grad_norm": 0.056006185710430145, "learning_rate": 7.648409019307799e-05, "loss": 0.1901, "step": 32382 }, { "epoch": 2.6233797796500324, "grad_norm": 0.06662266701459885, "learning_rate": 7.647958954048338e-05, "loss": 0.2087, "step": 32383 }, { "epoch": 2.6234607906675307, "grad_norm": 0.07266471534967422, "learning_rate": 7.647508888788875e-05, "loss": 0.2493, "step": 32384 }, { "epoch": 2.623541801685029, "grad_norm": 0.05771796777844429, "learning_rate": 7.647058823529411e-05, "loss": 0.2701, "step": 32385 }, { "epoch": 2.6236228127025276, "grad_norm": 0.07551492005586624, "learning_rate": 7.64660875826995e-05, "loss": 0.2664, "step": 32386 }, { "epoch": 2.623703823720026, "grad_norm": 0.06250123679637909, "learning_rate": 7.646158693010487e-05, "loss": 0.1939, "step": 32387 }, { "epoch": 2.623784834737524, "grad_norm": 0.08225307613611221, "learning_rate": 7.645708627751025e-05, "loss": 0.2735, "step": 32388 }, { "epoch": 2.623865845755023, "grad_norm": 0.06248268112540245, "learning_rate": 7.645258562491562e-05, "loss": 0.263, "step": 32389 }, { "epoch": 2.623946856772521, "grad_norm": 0.07815909385681152, "learning_rate": 7.644808497232099e-05, "loss": 0.2409, "step": 32390 }, { "epoch": 2.6240278677900193, "grad_norm": 0.05346912518143654, "learning_rate": 7.644358431972637e-05, "loss": 0.2733, "step": 32391 }, { "epoch": 2.624108878807518, "grad_norm": 0.07662081718444824, "learning_rate": 7.643908366713174e-05, "loss": 0.213, "step": 32392 }, { "epoch": 2.624189889825016, "grad_norm": 0.07478302717208862, "learning_rate": 7.643458301453711e-05, "loss": 0.2294, "step": 32393 }, { "epoch": 2.6242709008425145, "grad_norm": 0.04852926358580589, "learning_rate": 7.643008236194249e-05, "loss": 0.2121, "step": 32394 }, { "epoch": 2.624351911860013, "grad_norm": 0.06255146116018295, "learning_rate": 7.642558170934786e-05, "loss": 0.2581, "step": 32395 }, { "epoch": 2.6244329228775114, "grad_norm": 0.06238606944680214, "learning_rate": 7.642108105675323e-05, "loss": 0.2354, "step": 32396 }, { "epoch": 2.6245139338950096, "grad_norm": 0.06730019301176071, "learning_rate": 7.641658040415861e-05, "loss": 0.262, "step": 32397 }, { "epoch": 2.6245949449125083, "grad_norm": 0.07908900827169418, "learning_rate": 7.641207975156398e-05, "loss": 0.2703, "step": 32398 }, { "epoch": 2.6246759559300066, "grad_norm": 0.06469932943582535, "learning_rate": 7.640757909896935e-05, "loss": 0.2283, "step": 32399 }, { "epoch": 2.624756966947505, "grad_norm": 0.06892475485801697, "learning_rate": 7.640307844637473e-05, "loss": 0.2434, "step": 32400 }, { "epoch": 2.6248379779650035, "grad_norm": 0.05960692837834358, "learning_rate": 7.63985777937801e-05, "loss": 0.2154, "step": 32401 }, { "epoch": 2.6249189889825018, "grad_norm": 0.06794025003910065, "learning_rate": 7.639407714118548e-05, "loss": 0.2474, "step": 32402 }, { "epoch": 2.625, "grad_norm": 0.08723536133766174, "learning_rate": 7.638957648859085e-05, "loss": 0.2854, "step": 32403 }, { "epoch": 2.6250810110174982, "grad_norm": 0.07772015035152435, "learning_rate": 7.638507583599622e-05, "loss": 0.2649, "step": 32404 }, { "epoch": 2.6251620220349965, "grad_norm": 0.06779235601425171, "learning_rate": 7.63805751834016e-05, "loss": 0.2492, "step": 32405 }, { "epoch": 2.625243033052495, "grad_norm": 0.07044626772403717, "learning_rate": 7.637607453080697e-05, "loss": 0.2549, "step": 32406 }, { "epoch": 2.6253240440699934, "grad_norm": 0.06848505139350891, "learning_rate": 7.637157387821234e-05, "loss": 0.2499, "step": 32407 }, { "epoch": 2.6254050550874917, "grad_norm": 0.05949360504746437, "learning_rate": 7.636707322561772e-05, "loss": 0.2437, "step": 32408 }, { "epoch": 2.6254860661049904, "grad_norm": 0.06853028386831284, "learning_rate": 7.636257257302309e-05, "loss": 0.252, "step": 32409 }, { "epoch": 2.6255670771224886, "grad_norm": 0.06630270928144455, "learning_rate": 7.635807192042846e-05, "loss": 0.2911, "step": 32410 }, { "epoch": 2.625648088139987, "grad_norm": 0.05694075673818588, "learning_rate": 7.635357126783384e-05, "loss": 0.237, "step": 32411 }, { "epoch": 2.6257290991574855, "grad_norm": 0.060872483998537064, "learning_rate": 7.634907061523921e-05, "loss": 0.2001, "step": 32412 }, { "epoch": 2.625810110174984, "grad_norm": 0.07585026323795319, "learning_rate": 7.634456996264459e-05, "loss": 0.2366, "step": 32413 }, { "epoch": 2.625891121192482, "grad_norm": 0.07402799278497696, "learning_rate": 7.634006931004997e-05, "loss": 0.224, "step": 32414 }, { "epoch": 2.6259721322099807, "grad_norm": 0.06976964324712753, "learning_rate": 7.633556865745533e-05, "loss": 0.2131, "step": 32415 }, { "epoch": 2.626053143227479, "grad_norm": 0.06143612414598465, "learning_rate": 7.63310680048607e-05, "loss": 0.2317, "step": 32416 }, { "epoch": 2.626134154244977, "grad_norm": 0.06992180645465851, "learning_rate": 7.63265673522661e-05, "loss": 0.2192, "step": 32417 }, { "epoch": 2.626215165262476, "grad_norm": 0.08070293813943863, "learning_rate": 7.632206669967145e-05, "loss": 0.2618, "step": 32418 }, { "epoch": 2.626296176279974, "grad_norm": 0.052165403962135315, "learning_rate": 7.631756604707683e-05, "loss": 0.1829, "step": 32419 }, { "epoch": 2.6263771872974724, "grad_norm": 0.048813480883836746, "learning_rate": 7.631306539448221e-05, "loss": 0.2209, "step": 32420 }, { "epoch": 2.626458198314971, "grad_norm": 0.07322482764720917, "learning_rate": 7.630856474188757e-05, "loss": 0.2471, "step": 32421 }, { "epoch": 2.6265392093324693, "grad_norm": 0.08113683760166168, "learning_rate": 7.630406408929295e-05, "loss": 0.241, "step": 32422 }, { "epoch": 2.6266202203499676, "grad_norm": 0.06385481357574463, "learning_rate": 7.629956343669833e-05, "loss": 0.2138, "step": 32423 }, { "epoch": 2.6267012313674662, "grad_norm": 0.07413046061992645, "learning_rate": 7.62950627841037e-05, "loss": 0.2732, "step": 32424 }, { "epoch": 2.6267822423849645, "grad_norm": 0.06510314345359802, "learning_rate": 7.629056213150907e-05, "loss": 0.2449, "step": 32425 }, { "epoch": 2.6268632534024627, "grad_norm": 0.06330209225416183, "learning_rate": 7.628606147891446e-05, "loss": 0.2809, "step": 32426 }, { "epoch": 2.626944264419961, "grad_norm": 0.07114989310503006, "learning_rate": 7.628156082631982e-05, "loss": 0.2421, "step": 32427 }, { "epoch": 2.6270252754374592, "grad_norm": 0.05140691623091698, "learning_rate": 7.627706017372519e-05, "loss": 0.2027, "step": 32428 }, { "epoch": 2.627106286454958, "grad_norm": 0.06812848895788193, "learning_rate": 7.627255952113058e-05, "loss": 0.2745, "step": 32429 }, { "epoch": 2.627187297472456, "grad_norm": 0.06321202218532562, "learning_rate": 7.626805886853594e-05, "loss": 0.2336, "step": 32430 }, { "epoch": 2.6272683084899544, "grad_norm": 0.07882923632860184, "learning_rate": 7.626355821594131e-05, "loss": 0.2177, "step": 32431 }, { "epoch": 2.627349319507453, "grad_norm": 0.06591887772083282, "learning_rate": 7.62590575633467e-05, "loss": 0.239, "step": 32432 }, { "epoch": 2.6274303305249513, "grad_norm": 0.06610725075006485, "learning_rate": 7.625455691075206e-05, "loss": 0.221, "step": 32433 }, { "epoch": 2.6275113415424496, "grad_norm": 0.0637730062007904, "learning_rate": 7.625005625815743e-05, "loss": 0.2773, "step": 32434 }, { "epoch": 2.6275923525599483, "grad_norm": 0.06635932624340057, "learning_rate": 7.624555560556282e-05, "loss": 0.2655, "step": 32435 }, { "epoch": 2.6276733635774465, "grad_norm": 0.07389713823795319, "learning_rate": 7.624105495296818e-05, "loss": 0.2285, "step": 32436 }, { "epoch": 2.6277543745949448, "grad_norm": 0.05655698478221893, "learning_rate": 7.623655430037355e-05, "loss": 0.1999, "step": 32437 }, { "epoch": 2.6278353856124435, "grad_norm": 0.062165699899196625, "learning_rate": 7.623205364777894e-05, "loss": 0.2385, "step": 32438 }, { "epoch": 2.6279163966299417, "grad_norm": 0.06502586603164673, "learning_rate": 7.62275529951843e-05, "loss": 0.2192, "step": 32439 }, { "epoch": 2.62799740764744, "grad_norm": 0.0674702525138855, "learning_rate": 7.622305234258967e-05, "loss": 0.2316, "step": 32440 }, { "epoch": 2.6280784186649386, "grad_norm": 0.07493073493242264, "learning_rate": 7.621855168999506e-05, "loss": 0.2313, "step": 32441 }, { "epoch": 2.628159429682437, "grad_norm": 0.0628257542848587, "learning_rate": 7.621405103740042e-05, "loss": 0.2539, "step": 32442 }, { "epoch": 2.628240440699935, "grad_norm": 0.055204570293426514, "learning_rate": 7.620955038480581e-05, "loss": 0.207, "step": 32443 }, { "epoch": 2.628321451717434, "grad_norm": 0.06424355506896973, "learning_rate": 7.620504973221118e-05, "loss": 0.2354, "step": 32444 }, { "epoch": 2.628402462734932, "grad_norm": 0.06352122128009796, "learning_rate": 7.620054907961654e-05, "loss": 0.2193, "step": 32445 }, { "epoch": 2.6284834737524303, "grad_norm": 0.05706331506371498, "learning_rate": 7.619604842702193e-05, "loss": 0.2269, "step": 32446 }, { "epoch": 2.6285644847699285, "grad_norm": 0.07694992423057556, "learning_rate": 7.61915477744273e-05, "loss": 0.2404, "step": 32447 }, { "epoch": 2.6286454957874272, "grad_norm": 0.05537230148911476, "learning_rate": 7.618704712183266e-05, "loss": 0.2304, "step": 32448 }, { "epoch": 2.6287265068049255, "grad_norm": 0.07225795835256577, "learning_rate": 7.618254646923805e-05, "loss": 0.2303, "step": 32449 }, { "epoch": 2.6288075178224237, "grad_norm": 0.07407918572425842, "learning_rate": 7.617804581664342e-05, "loss": 0.2663, "step": 32450 }, { "epoch": 2.628888528839922, "grad_norm": 0.07087674736976624, "learning_rate": 7.617354516404878e-05, "loss": 0.2669, "step": 32451 }, { "epoch": 2.6289695398574207, "grad_norm": 0.08752910792827606, "learning_rate": 7.616904451145417e-05, "loss": 0.2677, "step": 32452 }, { "epoch": 2.629050550874919, "grad_norm": 0.06621308624744415, "learning_rate": 7.616454385885954e-05, "loss": 0.2215, "step": 32453 }, { "epoch": 2.629131561892417, "grad_norm": 0.07405148446559906, "learning_rate": 7.61600432062649e-05, "loss": 0.2325, "step": 32454 }, { "epoch": 2.629212572909916, "grad_norm": 0.08505482971668243, "learning_rate": 7.615554255367029e-05, "loss": 0.3245, "step": 32455 }, { "epoch": 2.629293583927414, "grad_norm": 0.0664757639169693, "learning_rate": 7.615104190107566e-05, "loss": 0.223, "step": 32456 }, { "epoch": 2.6293745949449123, "grad_norm": 0.06615759432315826, "learning_rate": 7.614654124848102e-05, "loss": 0.2758, "step": 32457 }, { "epoch": 2.629455605962411, "grad_norm": 0.054171085357666016, "learning_rate": 7.614204059588641e-05, "loss": 0.1806, "step": 32458 }, { "epoch": 2.6295366169799093, "grad_norm": 0.06245662271976471, "learning_rate": 7.613753994329178e-05, "loss": 0.2236, "step": 32459 }, { "epoch": 2.6296176279974075, "grad_norm": 0.0623309463262558, "learning_rate": 7.613303929069714e-05, "loss": 0.2411, "step": 32460 }, { "epoch": 2.629698639014906, "grad_norm": 0.06058632954955101, "learning_rate": 7.612853863810253e-05, "loss": 0.256, "step": 32461 }, { "epoch": 2.6297796500324044, "grad_norm": 0.06449336558580399, "learning_rate": 7.61240379855079e-05, "loss": 0.2124, "step": 32462 }, { "epoch": 2.6298606610499027, "grad_norm": 0.06935933232307434, "learning_rate": 7.611953733291327e-05, "loss": 0.2831, "step": 32463 }, { "epoch": 2.6299416720674014, "grad_norm": 0.060872264206409454, "learning_rate": 7.611503668031865e-05, "loss": 0.2145, "step": 32464 }, { "epoch": 2.6300226830848996, "grad_norm": 0.07569092512130737, "learning_rate": 7.611053602772403e-05, "loss": 0.2138, "step": 32465 }, { "epoch": 2.630103694102398, "grad_norm": 0.06525085866451263, "learning_rate": 7.610603537512939e-05, "loss": 0.2359, "step": 32466 }, { "epoch": 2.6301847051198965, "grad_norm": 0.06427115201950073, "learning_rate": 7.610153472253477e-05, "loss": 0.2974, "step": 32467 }, { "epoch": 2.630265716137395, "grad_norm": 0.05559571832418442, "learning_rate": 7.609703406994015e-05, "loss": 0.2354, "step": 32468 }, { "epoch": 2.630346727154893, "grad_norm": 0.08632521331310272, "learning_rate": 7.609253341734552e-05, "loss": 0.2556, "step": 32469 }, { "epoch": 2.6304277381723913, "grad_norm": 0.08800216764211655, "learning_rate": 7.60880327647509e-05, "loss": 0.2764, "step": 32470 }, { "epoch": 2.63050874918989, "grad_norm": 0.04657886177301407, "learning_rate": 7.608353211215627e-05, "loss": 0.2174, "step": 32471 }, { "epoch": 2.630589760207388, "grad_norm": 0.06629232317209244, "learning_rate": 7.607903145956164e-05, "loss": 0.2327, "step": 32472 }, { "epoch": 2.6306707712248865, "grad_norm": 0.07411345094442368, "learning_rate": 7.607453080696702e-05, "loss": 0.2397, "step": 32473 }, { "epoch": 2.6307517822423847, "grad_norm": 0.05961480364203453, "learning_rate": 7.607003015437239e-05, "loss": 0.205, "step": 32474 }, { "epoch": 2.6308327932598834, "grad_norm": 0.07046563178300858, "learning_rate": 7.606552950177776e-05, "loss": 0.236, "step": 32475 }, { "epoch": 2.6309138042773816, "grad_norm": 0.06287655979394913, "learning_rate": 7.606102884918314e-05, "loss": 0.2673, "step": 32476 }, { "epoch": 2.63099481529488, "grad_norm": 0.05665929988026619, "learning_rate": 7.605652819658851e-05, "loss": 0.2287, "step": 32477 }, { "epoch": 2.6310758263123786, "grad_norm": 0.07031681388616562, "learning_rate": 7.605202754399388e-05, "loss": 0.2574, "step": 32478 }, { "epoch": 2.631156837329877, "grad_norm": 0.06546088308095932, "learning_rate": 7.604752689139926e-05, "loss": 0.2581, "step": 32479 }, { "epoch": 2.631237848347375, "grad_norm": 0.0613400936126709, "learning_rate": 7.604302623880463e-05, "loss": 0.2315, "step": 32480 }, { "epoch": 2.6313188593648738, "grad_norm": 0.06321492046117783, "learning_rate": 7.603852558621e-05, "loss": 0.2587, "step": 32481 }, { "epoch": 2.631399870382372, "grad_norm": 0.06262946128845215, "learning_rate": 7.603402493361538e-05, "loss": 0.2344, "step": 32482 }, { "epoch": 2.6314808813998702, "grad_norm": 0.08262698352336884, "learning_rate": 7.602952428102075e-05, "loss": 0.2682, "step": 32483 }, { "epoch": 2.631561892417369, "grad_norm": 0.0711028203368187, "learning_rate": 7.602502362842612e-05, "loss": 0.2854, "step": 32484 }, { "epoch": 2.631642903434867, "grad_norm": 0.07169201970100403, "learning_rate": 7.60205229758315e-05, "loss": 0.252, "step": 32485 }, { "epoch": 2.6317239144523654, "grad_norm": 0.10206906497478485, "learning_rate": 7.601602232323687e-05, "loss": 0.3226, "step": 32486 }, { "epoch": 2.631804925469864, "grad_norm": 0.05083823204040527, "learning_rate": 7.601152167064225e-05, "loss": 0.2138, "step": 32487 }, { "epoch": 2.6318859364873624, "grad_norm": 0.07286400347948074, "learning_rate": 7.600702101804762e-05, "loss": 0.2506, "step": 32488 }, { "epoch": 2.6319669475048606, "grad_norm": 0.06732335686683655, "learning_rate": 7.600252036545299e-05, "loss": 0.2105, "step": 32489 }, { "epoch": 2.6320479585223593, "grad_norm": 0.06304507702589035, "learning_rate": 7.599801971285837e-05, "loss": 0.2874, "step": 32490 }, { "epoch": 2.6321289695398575, "grad_norm": 0.06779453158378601, "learning_rate": 7.599351906026374e-05, "loss": 0.2219, "step": 32491 }, { "epoch": 2.6322099805573558, "grad_norm": 0.07301854342222214, "learning_rate": 7.598901840766911e-05, "loss": 0.2083, "step": 32492 }, { "epoch": 2.632290991574854, "grad_norm": 0.07681296765804291, "learning_rate": 7.598451775507449e-05, "loss": 0.2742, "step": 32493 }, { "epoch": 2.6323720025923527, "grad_norm": 0.08773787319660187, "learning_rate": 7.598001710247986e-05, "loss": 0.2812, "step": 32494 }, { "epoch": 2.632453013609851, "grad_norm": 0.07804632931947708, "learning_rate": 7.597551644988525e-05, "loss": 0.2324, "step": 32495 }, { "epoch": 2.632534024627349, "grad_norm": 0.0674925297498703, "learning_rate": 7.597101579729061e-05, "loss": 0.2301, "step": 32496 }, { "epoch": 2.6326150356448474, "grad_norm": 0.05840333551168442, "learning_rate": 7.596651514469598e-05, "loss": 0.2329, "step": 32497 }, { "epoch": 2.632696046662346, "grad_norm": 0.06401608139276505, "learning_rate": 7.596201449210137e-05, "loss": 0.2633, "step": 32498 }, { "epoch": 2.6327770576798444, "grad_norm": 0.0558314323425293, "learning_rate": 7.595751383950673e-05, "loss": 0.2179, "step": 32499 }, { "epoch": 2.6328580686973426, "grad_norm": 0.06202941760420799, "learning_rate": 7.59530131869121e-05, "loss": 0.2683, "step": 32500 }, { "epoch": 2.6329390797148413, "grad_norm": 0.06091583892703056, "learning_rate": 7.594851253431749e-05, "loss": 0.2597, "step": 32501 }, { "epoch": 2.6330200907323396, "grad_norm": 0.08405400812625885, "learning_rate": 7.594401188172285e-05, "loss": 0.2353, "step": 32502 }, { "epoch": 2.633101101749838, "grad_norm": 0.05751390382647514, "learning_rate": 7.593951122912822e-05, "loss": 0.2386, "step": 32503 }, { "epoch": 2.6331821127673365, "grad_norm": 0.06506470590829849, "learning_rate": 7.593501057653361e-05, "loss": 0.2794, "step": 32504 }, { "epoch": 2.6332631237848347, "grad_norm": 0.05713038891553879, "learning_rate": 7.593050992393897e-05, "loss": 0.2323, "step": 32505 }, { "epoch": 2.633344134802333, "grad_norm": 0.058620527386665344, "learning_rate": 7.592600927134434e-05, "loss": 0.2063, "step": 32506 }, { "epoch": 2.6334251458198317, "grad_norm": 0.06940052658319473, "learning_rate": 7.592150861874973e-05, "loss": 0.2366, "step": 32507 }, { "epoch": 2.63350615683733, "grad_norm": 0.07536128908395767, "learning_rate": 7.591700796615509e-05, "loss": 0.2548, "step": 32508 }, { "epoch": 2.633587167854828, "grad_norm": 0.07242191582918167, "learning_rate": 7.591250731356046e-05, "loss": 0.2497, "step": 32509 }, { "epoch": 2.633668178872327, "grad_norm": 0.08319424837827682, "learning_rate": 7.590800666096585e-05, "loss": 0.2839, "step": 32510 }, { "epoch": 2.633749189889825, "grad_norm": 0.06494160741567612, "learning_rate": 7.590350600837121e-05, "loss": 0.2149, "step": 32511 }, { "epoch": 2.6338302009073233, "grad_norm": 0.07588519901037216, "learning_rate": 7.589900535577659e-05, "loss": 0.2237, "step": 32512 }, { "epoch": 2.633911211924822, "grad_norm": 0.07043095678091049, "learning_rate": 7.589450470318197e-05, "loss": 0.2427, "step": 32513 }, { "epoch": 2.6339922229423203, "grad_norm": 0.05470437556505203, "learning_rate": 7.589000405058733e-05, "loss": 0.2254, "step": 32514 }, { "epoch": 2.6340732339598185, "grad_norm": 0.06383230537176132, "learning_rate": 7.58855033979927e-05, "loss": 0.2294, "step": 32515 }, { "epoch": 2.6341542449773168, "grad_norm": 0.06890767812728882, "learning_rate": 7.58810027453981e-05, "loss": 0.2546, "step": 32516 }, { "epoch": 2.6342352559948155, "grad_norm": 0.06413356214761734, "learning_rate": 7.587650209280345e-05, "loss": 0.2194, "step": 32517 }, { "epoch": 2.6343162670123137, "grad_norm": 0.07503025233745575, "learning_rate": 7.587200144020883e-05, "loss": 0.2763, "step": 32518 }, { "epoch": 2.634397278029812, "grad_norm": 0.09024403989315033, "learning_rate": 7.586750078761421e-05, "loss": 0.2639, "step": 32519 }, { "epoch": 2.63447828904731, "grad_norm": 0.08484242111444473, "learning_rate": 7.586300013501957e-05, "loss": 0.2422, "step": 32520 }, { "epoch": 2.634559300064809, "grad_norm": 0.0671757161617279, "learning_rate": 7.585849948242496e-05, "loss": 0.2385, "step": 32521 }, { "epoch": 2.634640311082307, "grad_norm": 0.0698087215423584, "learning_rate": 7.585399882983034e-05, "loss": 0.2244, "step": 32522 }, { "epoch": 2.6347213220998054, "grad_norm": 0.058275751769542694, "learning_rate": 7.58494981772357e-05, "loss": 0.2251, "step": 32523 }, { "epoch": 2.634802333117304, "grad_norm": 0.08362040668725967, "learning_rate": 7.584499752464108e-05, "loss": 0.2318, "step": 32524 }, { "epoch": 2.6348833441348023, "grad_norm": 0.06042616814374924, "learning_rate": 7.584049687204646e-05, "loss": 0.2167, "step": 32525 }, { "epoch": 2.6349643551523005, "grad_norm": 0.06875180453062057, "learning_rate": 7.583599621945182e-05, "loss": 0.2818, "step": 32526 }, { "epoch": 2.6350453661697992, "grad_norm": 0.0656152069568634, "learning_rate": 7.58314955668572e-05, "loss": 0.2206, "step": 32527 }, { "epoch": 2.6351263771872975, "grad_norm": 0.0642925277352333, "learning_rate": 7.582699491426258e-05, "loss": 0.2473, "step": 32528 }, { "epoch": 2.6352073882047957, "grad_norm": 0.07825696468353271, "learning_rate": 7.582249426166794e-05, "loss": 0.2188, "step": 32529 }, { "epoch": 2.6352883992222944, "grad_norm": 0.06980311870574951, "learning_rate": 7.581799360907332e-05, "loss": 0.2673, "step": 32530 }, { "epoch": 2.6353694102397927, "grad_norm": 0.06447041034698486, "learning_rate": 7.58134929564787e-05, "loss": 0.1968, "step": 32531 }, { "epoch": 2.635450421257291, "grad_norm": 0.08126753568649292, "learning_rate": 7.580899230388406e-05, "loss": 0.2308, "step": 32532 }, { "epoch": 2.6355314322747896, "grad_norm": 0.07112924009561539, "learning_rate": 7.580449165128944e-05, "loss": 0.2664, "step": 32533 }, { "epoch": 2.635612443292288, "grad_norm": 0.08120989054441452, "learning_rate": 7.579999099869482e-05, "loss": 0.27, "step": 32534 }, { "epoch": 2.635693454309786, "grad_norm": 0.06857681274414062, "learning_rate": 7.579549034610018e-05, "loss": 0.2216, "step": 32535 }, { "epoch": 2.6357744653272848, "grad_norm": 0.06669405102729797, "learning_rate": 7.579098969350557e-05, "loss": 0.2048, "step": 32536 }, { "epoch": 2.635855476344783, "grad_norm": 0.06707888841629028, "learning_rate": 7.578648904091094e-05, "loss": 0.2508, "step": 32537 }, { "epoch": 2.6359364873622813, "grad_norm": 0.07921718806028366, "learning_rate": 7.57819883883163e-05, "loss": 0.2566, "step": 32538 }, { "epoch": 2.6360174983797795, "grad_norm": 0.06119322031736374, "learning_rate": 7.577748773572169e-05, "loss": 0.245, "step": 32539 }, { "epoch": 2.636098509397278, "grad_norm": 0.05891178548336029, "learning_rate": 7.577298708312706e-05, "loss": 0.2474, "step": 32540 }, { "epoch": 2.6361795204147764, "grad_norm": 0.08063157647848129, "learning_rate": 7.576848643053242e-05, "loss": 0.2611, "step": 32541 }, { "epoch": 2.6362605314322747, "grad_norm": 0.08956170082092285, "learning_rate": 7.576398577793781e-05, "loss": 0.2495, "step": 32542 }, { "epoch": 2.636341542449773, "grad_norm": 0.06798410415649414, "learning_rate": 7.575948512534318e-05, "loss": 0.2467, "step": 32543 }, { "epoch": 2.6364225534672716, "grad_norm": 0.05866193026304245, "learning_rate": 7.575498447274854e-05, "loss": 0.2177, "step": 32544 }, { "epoch": 2.63650356448477, "grad_norm": 0.05509014427661896, "learning_rate": 7.575048382015393e-05, "loss": 0.2732, "step": 32545 }, { "epoch": 2.636584575502268, "grad_norm": 0.07084442675113678, "learning_rate": 7.57459831675593e-05, "loss": 0.2132, "step": 32546 }, { "epoch": 2.636665586519767, "grad_norm": 0.0648268610239029, "learning_rate": 7.574148251496468e-05, "loss": 0.2429, "step": 32547 }, { "epoch": 2.636746597537265, "grad_norm": 0.061500899493694305, "learning_rate": 7.573698186237005e-05, "loss": 0.2264, "step": 32548 }, { "epoch": 2.6368276085547633, "grad_norm": 0.05797162652015686, "learning_rate": 7.573248120977542e-05, "loss": 0.2345, "step": 32549 }, { "epoch": 2.636908619572262, "grad_norm": 0.07138822972774506, "learning_rate": 7.57279805571808e-05, "loss": 0.2401, "step": 32550 }, { "epoch": 2.63698963058976, "grad_norm": 0.06884890049695969, "learning_rate": 7.572347990458617e-05, "loss": 0.218, "step": 32551 }, { "epoch": 2.6370706416072585, "grad_norm": 0.06728807836771011, "learning_rate": 7.571897925199154e-05, "loss": 0.2532, "step": 32552 }, { "epoch": 2.637151652624757, "grad_norm": 0.06018717586994171, "learning_rate": 7.571447859939692e-05, "loss": 0.2437, "step": 32553 }, { "epoch": 2.6372326636422554, "grad_norm": 0.05906055495142937, "learning_rate": 7.570997794680229e-05, "loss": 0.2397, "step": 32554 }, { "epoch": 2.6373136746597536, "grad_norm": 0.07531093806028366, "learning_rate": 7.570547729420766e-05, "loss": 0.2393, "step": 32555 }, { "epoch": 2.6373946856772523, "grad_norm": 0.05766147002577782, "learning_rate": 7.570097664161304e-05, "loss": 0.2038, "step": 32556 }, { "epoch": 2.6374756966947506, "grad_norm": 0.06611020117998123, "learning_rate": 7.569647598901841e-05, "loss": 0.2788, "step": 32557 }, { "epoch": 2.637556707712249, "grad_norm": 0.06789632886648178, "learning_rate": 7.569197533642378e-05, "loss": 0.2282, "step": 32558 }, { "epoch": 2.6376377187297475, "grad_norm": 0.06541197001934052, "learning_rate": 7.568747468382916e-05, "loss": 0.2074, "step": 32559 }, { "epoch": 2.6377187297472457, "grad_norm": 0.05770771950483322, "learning_rate": 7.568297403123453e-05, "loss": 0.2686, "step": 32560 }, { "epoch": 2.637799740764744, "grad_norm": 0.06224161759018898, "learning_rate": 7.56784733786399e-05, "loss": 0.2401, "step": 32561 }, { "epoch": 2.6378807517822422, "grad_norm": 0.07410628348588943, "learning_rate": 7.567397272604528e-05, "loss": 0.2267, "step": 32562 }, { "epoch": 2.637961762799741, "grad_norm": 0.07967900484800339, "learning_rate": 7.566947207345065e-05, "loss": 0.2519, "step": 32563 }, { "epoch": 2.638042773817239, "grad_norm": 0.05484290421009064, "learning_rate": 7.566497142085603e-05, "loss": 0.2387, "step": 32564 }, { "epoch": 2.6381237848347374, "grad_norm": 0.07121595740318298, "learning_rate": 7.56604707682614e-05, "loss": 0.2445, "step": 32565 }, { "epoch": 2.6382047958522357, "grad_norm": 0.07594309002161026, "learning_rate": 7.565597011566677e-05, "loss": 0.2466, "step": 32566 }, { "epoch": 2.6382858068697344, "grad_norm": 0.07039465010166168, "learning_rate": 7.565146946307215e-05, "loss": 0.2453, "step": 32567 }, { "epoch": 2.6383668178872326, "grad_norm": 0.07094226777553558, "learning_rate": 7.564696881047752e-05, "loss": 0.2492, "step": 32568 }, { "epoch": 2.638447828904731, "grad_norm": 0.06495281308889389, "learning_rate": 7.56424681578829e-05, "loss": 0.2191, "step": 32569 }, { "epoch": 2.6385288399222295, "grad_norm": 0.0773189589381218, "learning_rate": 7.563796750528827e-05, "loss": 0.2458, "step": 32570 }, { "epoch": 2.6386098509397278, "grad_norm": 0.06711572408676147, "learning_rate": 7.563346685269364e-05, "loss": 0.222, "step": 32571 }, { "epoch": 2.638690861957226, "grad_norm": 0.07043211907148361, "learning_rate": 7.562896620009902e-05, "loss": 0.2116, "step": 32572 }, { "epoch": 2.6387718729747247, "grad_norm": 0.07517684251070023, "learning_rate": 7.56244655475044e-05, "loss": 0.2516, "step": 32573 }, { "epoch": 2.638852883992223, "grad_norm": 0.05948114022612572, "learning_rate": 7.561996489490976e-05, "loss": 0.2167, "step": 32574 }, { "epoch": 2.638933895009721, "grad_norm": 0.07915988564491272, "learning_rate": 7.561546424231514e-05, "loss": 0.2525, "step": 32575 }, { "epoch": 2.63901490602722, "grad_norm": 0.07200496643781662, "learning_rate": 7.561096358972052e-05, "loss": 0.2496, "step": 32576 }, { "epoch": 2.639095917044718, "grad_norm": 0.06896743178367615, "learning_rate": 7.560646293712588e-05, "loss": 0.2254, "step": 32577 }, { "epoch": 2.6391769280622164, "grad_norm": 0.08511435985565186, "learning_rate": 7.560196228453126e-05, "loss": 0.2259, "step": 32578 }, { "epoch": 2.639257939079715, "grad_norm": 0.06475701183080673, "learning_rate": 7.559746163193664e-05, "loss": 0.2316, "step": 32579 }, { "epoch": 2.6393389500972133, "grad_norm": 0.06055932492017746, "learning_rate": 7.5592960979342e-05, "loss": 0.2858, "step": 32580 }, { "epoch": 2.6394199611147116, "grad_norm": 0.060425933450460434, "learning_rate": 7.558846032674738e-05, "loss": 0.2444, "step": 32581 }, { "epoch": 2.6395009721322102, "grad_norm": 0.058254752308130264, "learning_rate": 7.558395967415277e-05, "loss": 0.2394, "step": 32582 }, { "epoch": 2.6395819831497085, "grad_norm": 0.06561224162578583, "learning_rate": 7.557945902155813e-05, "loss": 0.2432, "step": 32583 }, { "epoch": 2.6396629941672067, "grad_norm": 0.0659739077091217, "learning_rate": 7.55749583689635e-05, "loss": 0.2335, "step": 32584 }, { "epoch": 2.639744005184705, "grad_norm": 0.06240279600024223, "learning_rate": 7.557045771636889e-05, "loss": 0.2339, "step": 32585 }, { "epoch": 2.6398250162022032, "grad_norm": 0.07514799386262894, "learning_rate": 7.556595706377425e-05, "loss": 0.2695, "step": 32586 }, { "epoch": 2.639906027219702, "grad_norm": 0.0739828571677208, "learning_rate": 7.556145641117962e-05, "loss": 0.2369, "step": 32587 }, { "epoch": 2.6399870382372, "grad_norm": 0.0633767768740654, "learning_rate": 7.5556955758585e-05, "loss": 0.23, "step": 32588 }, { "epoch": 2.6400680492546984, "grad_norm": 0.07802503556013107, "learning_rate": 7.555245510599037e-05, "loss": 0.3001, "step": 32589 }, { "epoch": 2.640149060272197, "grad_norm": 0.06382672488689423, "learning_rate": 7.554795445339574e-05, "loss": 0.2588, "step": 32590 }, { "epoch": 2.6402300712896953, "grad_norm": 0.06049477681517601, "learning_rate": 7.554345380080113e-05, "loss": 0.2287, "step": 32591 }, { "epoch": 2.6403110823071936, "grad_norm": 0.06609281152486801, "learning_rate": 7.553895314820649e-05, "loss": 0.2418, "step": 32592 }, { "epoch": 2.6403920933246923, "grad_norm": 0.06597542017698288, "learning_rate": 7.553445249561186e-05, "loss": 0.2306, "step": 32593 }, { "epoch": 2.6404731043421905, "grad_norm": 0.07869094610214233, "learning_rate": 7.552995184301725e-05, "loss": 0.2537, "step": 32594 }, { "epoch": 2.6405541153596888, "grad_norm": 0.06835228204727173, "learning_rate": 7.552545119042261e-05, "loss": 0.24, "step": 32595 }, { "epoch": 2.6406351263771874, "grad_norm": 0.07356154918670654, "learning_rate": 7.552095053782798e-05, "loss": 0.2272, "step": 32596 }, { "epoch": 2.6407161373946857, "grad_norm": 0.09468083083629608, "learning_rate": 7.551644988523337e-05, "loss": 0.2401, "step": 32597 }, { "epoch": 2.640797148412184, "grad_norm": 0.05996778979897499, "learning_rate": 7.551194923263873e-05, "loss": 0.2278, "step": 32598 }, { "epoch": 2.6408781594296826, "grad_norm": 0.07565630972385406, "learning_rate": 7.55074485800441e-05, "loss": 0.2634, "step": 32599 }, { "epoch": 2.640959170447181, "grad_norm": 0.07954513281583786, "learning_rate": 7.550294792744949e-05, "loss": 0.2922, "step": 32600 }, { "epoch": 2.641040181464679, "grad_norm": 0.07488039135932922, "learning_rate": 7.549844727485485e-05, "loss": 0.2688, "step": 32601 }, { "epoch": 2.641121192482178, "grad_norm": 0.06334620714187622, "learning_rate": 7.549394662226024e-05, "loss": 0.2393, "step": 32602 }, { "epoch": 2.641202203499676, "grad_norm": 0.0585486926138401, "learning_rate": 7.548944596966561e-05, "loss": 0.2279, "step": 32603 }, { "epoch": 2.6412832145171743, "grad_norm": 0.08411995321512222, "learning_rate": 7.548494531707097e-05, "loss": 0.2395, "step": 32604 }, { "epoch": 2.641364225534673, "grad_norm": 0.06781220436096191, "learning_rate": 7.548044466447636e-05, "loss": 0.238, "step": 32605 }, { "epoch": 2.6414452365521712, "grad_norm": 0.07711216807365417, "learning_rate": 7.547594401188173e-05, "loss": 0.2428, "step": 32606 }, { "epoch": 2.6415262475696695, "grad_norm": 0.060875892639160156, "learning_rate": 7.547144335928709e-05, "loss": 0.2344, "step": 32607 }, { "epoch": 2.6416072585871677, "grad_norm": 0.05834164097905159, "learning_rate": 7.546694270669248e-05, "loss": 0.2612, "step": 32608 }, { "epoch": 2.641688269604666, "grad_norm": 0.054436735808849335, "learning_rate": 7.546244205409785e-05, "loss": 0.2359, "step": 32609 }, { "epoch": 2.6417692806221647, "grad_norm": 0.05471387878060341, "learning_rate": 7.545794140150321e-05, "loss": 0.2391, "step": 32610 }, { "epoch": 2.641850291639663, "grad_norm": 0.07137314230203629, "learning_rate": 7.54534407489086e-05, "loss": 0.2723, "step": 32611 }, { "epoch": 2.641931302657161, "grad_norm": 0.056198038160800934, "learning_rate": 7.544894009631397e-05, "loss": 0.2534, "step": 32612 }, { "epoch": 2.64201231367466, "grad_norm": 0.06524398922920227, "learning_rate": 7.544443944371933e-05, "loss": 0.2166, "step": 32613 }, { "epoch": 2.642093324692158, "grad_norm": 0.07286935299634933, "learning_rate": 7.543993879112472e-05, "loss": 0.2192, "step": 32614 }, { "epoch": 2.6421743357096563, "grad_norm": 0.07484481483697891, "learning_rate": 7.54354381385301e-05, "loss": 0.2274, "step": 32615 }, { "epoch": 2.642255346727155, "grad_norm": 0.06895321607589722, "learning_rate": 7.543093748593545e-05, "loss": 0.2625, "step": 32616 }, { "epoch": 2.6423363577446533, "grad_norm": 0.05788419023156166, "learning_rate": 7.542643683334084e-05, "loss": 0.2183, "step": 32617 }, { "epoch": 2.6424173687621515, "grad_norm": 0.06298904120922089, "learning_rate": 7.542193618074621e-05, "loss": 0.243, "step": 32618 }, { "epoch": 2.64249837977965, "grad_norm": 0.09361536800861359, "learning_rate": 7.541743552815157e-05, "loss": 0.2904, "step": 32619 }, { "epoch": 2.6425793907971484, "grad_norm": 0.06291305273771286, "learning_rate": 7.541293487555696e-05, "loss": 0.2298, "step": 32620 }, { "epoch": 2.6426604018146467, "grad_norm": 0.07938309013843536, "learning_rate": 7.540843422296234e-05, "loss": 0.2714, "step": 32621 }, { "epoch": 2.6427414128321454, "grad_norm": 0.06279677897691727, "learning_rate": 7.54039335703677e-05, "loss": 0.2719, "step": 32622 }, { "epoch": 2.6428224238496436, "grad_norm": 0.05711670219898224, "learning_rate": 7.539943291777308e-05, "loss": 0.2207, "step": 32623 }, { "epoch": 2.642903434867142, "grad_norm": 0.0746513158082962, "learning_rate": 7.539493226517846e-05, "loss": 0.2689, "step": 32624 }, { "epoch": 2.6429844458846405, "grad_norm": 0.06141054257750511, "learning_rate": 7.539043161258382e-05, "loss": 0.2542, "step": 32625 }, { "epoch": 2.643065456902139, "grad_norm": 0.04914986714720726, "learning_rate": 7.53859309599892e-05, "loss": 0.1971, "step": 32626 }, { "epoch": 2.643146467919637, "grad_norm": 0.06171542406082153, "learning_rate": 7.538143030739458e-05, "loss": 0.2379, "step": 32627 }, { "epoch": 2.6432274789371357, "grad_norm": 0.058278243988752365, "learning_rate": 7.537692965479995e-05, "loss": 0.2454, "step": 32628 }, { "epoch": 2.643308489954634, "grad_norm": 0.06110624596476555, "learning_rate": 7.537242900220532e-05, "loss": 0.2411, "step": 32629 }, { "epoch": 2.643389500972132, "grad_norm": 0.0745861604809761, "learning_rate": 7.53679283496107e-05, "loss": 0.2381, "step": 32630 }, { "epoch": 2.6434705119896305, "grad_norm": 0.06847658008337021, "learning_rate": 7.536342769701607e-05, "loss": 0.2489, "step": 32631 }, { "epoch": 2.6435515230071287, "grad_norm": 0.07420511543750763, "learning_rate": 7.535892704442145e-05, "loss": 0.2661, "step": 32632 }, { "epoch": 2.6436325340246274, "grad_norm": 0.0696389377117157, "learning_rate": 7.535442639182682e-05, "loss": 0.2802, "step": 32633 }, { "epoch": 2.6437135450421256, "grad_norm": 0.05882401019334793, "learning_rate": 7.534992573923219e-05, "loss": 0.2239, "step": 32634 }, { "epoch": 2.643794556059624, "grad_norm": 0.08121724426746368, "learning_rate": 7.534542508663757e-05, "loss": 0.2467, "step": 32635 }, { "epoch": 2.6438755670771226, "grad_norm": 0.07373958826065063, "learning_rate": 7.534092443404294e-05, "loss": 0.2181, "step": 32636 }, { "epoch": 2.643956578094621, "grad_norm": 0.06903345882892609, "learning_rate": 7.533642378144831e-05, "loss": 0.2287, "step": 32637 }, { "epoch": 2.644037589112119, "grad_norm": 0.05462869256734848, "learning_rate": 7.533192312885369e-05, "loss": 0.2376, "step": 32638 }, { "epoch": 2.6441186001296177, "grad_norm": 0.08030558377504349, "learning_rate": 7.532742247625906e-05, "loss": 0.2477, "step": 32639 }, { "epoch": 2.644199611147116, "grad_norm": 0.06077692657709122, "learning_rate": 7.532292182366443e-05, "loss": 0.2466, "step": 32640 }, { "epoch": 2.6442806221646142, "grad_norm": 0.07113752514123917, "learning_rate": 7.531842117106981e-05, "loss": 0.1929, "step": 32641 }, { "epoch": 2.644361633182113, "grad_norm": 0.07391038537025452, "learning_rate": 7.531392051847518e-05, "loss": 0.2271, "step": 32642 }, { "epoch": 2.644442644199611, "grad_norm": 0.06465182453393936, "learning_rate": 7.530941986588055e-05, "loss": 0.2248, "step": 32643 }, { "epoch": 2.6445236552171094, "grad_norm": 0.0905710756778717, "learning_rate": 7.530491921328593e-05, "loss": 0.2302, "step": 32644 }, { "epoch": 2.644604666234608, "grad_norm": 0.06979474425315857, "learning_rate": 7.53004185606913e-05, "loss": 0.257, "step": 32645 }, { "epoch": 2.6446856772521063, "grad_norm": 0.09235589951276779, "learning_rate": 7.529591790809668e-05, "loss": 0.2435, "step": 32646 }, { "epoch": 2.6447666882696046, "grad_norm": 0.0746222734451294, "learning_rate": 7.529141725550205e-05, "loss": 0.22, "step": 32647 }, { "epoch": 2.6448476992871033, "grad_norm": 0.06662612408399582, "learning_rate": 7.528691660290742e-05, "loss": 0.282, "step": 32648 }, { "epoch": 2.6449287103046015, "grad_norm": 0.07575134187936783, "learning_rate": 7.52824159503128e-05, "loss": 0.2775, "step": 32649 }, { "epoch": 2.6450097213220998, "grad_norm": 0.06182150915265083, "learning_rate": 7.527791529771817e-05, "loss": 0.2392, "step": 32650 }, { "epoch": 2.6450907323395985, "grad_norm": 0.05878112465143204, "learning_rate": 7.527341464512354e-05, "loss": 0.2584, "step": 32651 }, { "epoch": 2.6451717433570967, "grad_norm": 0.06054788455367088, "learning_rate": 7.526891399252892e-05, "loss": 0.2283, "step": 32652 }, { "epoch": 2.645252754374595, "grad_norm": 0.06147018447518349, "learning_rate": 7.526441333993429e-05, "loss": 0.2315, "step": 32653 }, { "epoch": 2.645333765392093, "grad_norm": 0.07113911956548691, "learning_rate": 7.525991268733968e-05, "loss": 0.2514, "step": 32654 }, { "epoch": 2.6454147764095914, "grad_norm": 0.07609254866838455, "learning_rate": 7.525541203474504e-05, "loss": 0.2986, "step": 32655 }, { "epoch": 2.64549578742709, "grad_norm": 0.0673244446516037, "learning_rate": 7.525091138215041e-05, "loss": 0.2681, "step": 32656 }, { "epoch": 2.6455767984445884, "grad_norm": 0.07759573310613632, "learning_rate": 7.52464107295558e-05, "loss": 0.2694, "step": 32657 }, { "epoch": 2.6456578094620866, "grad_norm": 0.05810312554240227, "learning_rate": 7.524191007696116e-05, "loss": 0.2054, "step": 32658 }, { "epoch": 2.6457388204795853, "grad_norm": 0.06164870783686638, "learning_rate": 7.523740942436653e-05, "loss": 0.2628, "step": 32659 }, { "epoch": 2.6458198314970836, "grad_norm": 0.06307579576969147, "learning_rate": 7.523290877177192e-05, "loss": 0.2479, "step": 32660 }, { "epoch": 2.645900842514582, "grad_norm": 0.06636112183332443, "learning_rate": 7.522840811917728e-05, "loss": 0.2731, "step": 32661 }, { "epoch": 2.6459818535320805, "grad_norm": 0.08152161538600922, "learning_rate": 7.522390746658265e-05, "loss": 0.2277, "step": 32662 }, { "epoch": 2.6460628645495787, "grad_norm": 0.06108549237251282, "learning_rate": 7.521940681398804e-05, "loss": 0.2522, "step": 32663 }, { "epoch": 2.646143875567077, "grad_norm": 0.06697442382574081, "learning_rate": 7.52149061613934e-05, "loss": 0.2561, "step": 32664 }, { "epoch": 2.6462248865845757, "grad_norm": 0.0735754519701004, "learning_rate": 7.521040550879877e-05, "loss": 0.197, "step": 32665 }, { "epoch": 2.646305897602074, "grad_norm": 0.07068278640508652, "learning_rate": 7.520590485620416e-05, "loss": 0.2477, "step": 32666 }, { "epoch": 2.646386908619572, "grad_norm": 0.07574401050806046, "learning_rate": 7.520140420360952e-05, "loss": 0.2381, "step": 32667 }, { "epoch": 2.646467919637071, "grad_norm": 0.05677516758441925, "learning_rate": 7.51969035510149e-05, "loss": 0.2326, "step": 32668 }, { "epoch": 2.646548930654569, "grad_norm": 0.0748014971613884, "learning_rate": 7.519240289842028e-05, "loss": 0.2259, "step": 32669 }, { "epoch": 2.6466299416720673, "grad_norm": 0.07873434573411942, "learning_rate": 7.518790224582564e-05, "loss": 0.245, "step": 32670 }, { "epoch": 2.646710952689566, "grad_norm": 0.06972617655992508, "learning_rate": 7.518340159323102e-05, "loss": 0.2302, "step": 32671 }, { "epoch": 2.6467919637070643, "grad_norm": 0.07274175435304642, "learning_rate": 7.51789009406364e-05, "loss": 0.2413, "step": 32672 }, { "epoch": 2.6468729747245625, "grad_norm": 0.07968560606241226, "learning_rate": 7.517440028804176e-05, "loss": 0.2227, "step": 32673 }, { "epoch": 2.6469539857420608, "grad_norm": 0.07307036966085434, "learning_rate": 7.516989963544714e-05, "loss": 0.2334, "step": 32674 }, { "epoch": 2.6470349967595594, "grad_norm": 0.07144438475370407, "learning_rate": 7.516539898285252e-05, "loss": 0.2635, "step": 32675 }, { "epoch": 2.6471160077770577, "grad_norm": 0.0709671601653099, "learning_rate": 7.516089833025788e-05, "loss": 0.2459, "step": 32676 }, { "epoch": 2.647197018794556, "grad_norm": 0.07104463130235672, "learning_rate": 7.515639767766326e-05, "loss": 0.26, "step": 32677 }, { "epoch": 2.647278029812054, "grad_norm": 0.0711294636130333, "learning_rate": 7.515189702506864e-05, "loss": 0.2542, "step": 32678 }, { "epoch": 2.647359040829553, "grad_norm": 0.07094048708677292, "learning_rate": 7.5147396372474e-05, "loss": 0.2864, "step": 32679 }, { "epoch": 2.647440051847051, "grad_norm": 0.07097669690847397, "learning_rate": 7.514289571987939e-05, "loss": 0.2434, "step": 32680 }, { "epoch": 2.6475210628645494, "grad_norm": 0.0773238092660904, "learning_rate": 7.513839506728477e-05, "loss": 0.2633, "step": 32681 }, { "epoch": 2.647602073882048, "grad_norm": 0.061735138297080994, "learning_rate": 7.513389441469013e-05, "loss": 0.2205, "step": 32682 }, { "epoch": 2.6476830848995463, "grad_norm": 0.08001622557640076, "learning_rate": 7.512939376209551e-05, "loss": 0.2697, "step": 32683 }, { "epoch": 2.6477640959170445, "grad_norm": 0.05978796258568764, "learning_rate": 7.512489310950089e-05, "loss": 0.2402, "step": 32684 }, { "epoch": 2.6478451069345432, "grad_norm": 0.06869897246360779, "learning_rate": 7.512039245690625e-05, "loss": 0.2341, "step": 32685 }, { "epoch": 2.6479261179520415, "grad_norm": 0.07840482890605927, "learning_rate": 7.511589180431163e-05, "loss": 0.2416, "step": 32686 }, { "epoch": 2.6480071289695397, "grad_norm": 0.08068672567605972, "learning_rate": 7.511139115171701e-05, "loss": 0.2327, "step": 32687 }, { "epoch": 2.6480881399870384, "grad_norm": 0.0856936126947403, "learning_rate": 7.510689049912237e-05, "loss": 0.2434, "step": 32688 }, { "epoch": 2.6481691510045366, "grad_norm": 0.06403835862874985, "learning_rate": 7.510238984652775e-05, "loss": 0.2453, "step": 32689 }, { "epoch": 2.648250162022035, "grad_norm": 0.05858549848198891, "learning_rate": 7.509788919393313e-05, "loss": 0.2318, "step": 32690 }, { "epoch": 2.6483311730395336, "grad_norm": 0.0913025438785553, "learning_rate": 7.509338854133849e-05, "loss": 0.2923, "step": 32691 }, { "epoch": 2.648412184057032, "grad_norm": 0.05180613324046135, "learning_rate": 7.508888788874387e-05, "loss": 0.211, "step": 32692 }, { "epoch": 2.64849319507453, "grad_norm": 0.06905458867549896, "learning_rate": 7.508438723614925e-05, "loss": 0.2236, "step": 32693 }, { "epoch": 2.6485742060920288, "grad_norm": 0.07221891731023788, "learning_rate": 7.507988658355461e-05, "loss": 0.2228, "step": 32694 }, { "epoch": 2.648655217109527, "grad_norm": 0.07029981166124344, "learning_rate": 7.507538593096e-05, "loss": 0.288, "step": 32695 }, { "epoch": 2.6487362281270252, "grad_norm": 0.07628034800291061, "learning_rate": 7.507088527836537e-05, "loss": 0.2504, "step": 32696 }, { "epoch": 2.6488172391445235, "grad_norm": 0.0622580461204052, "learning_rate": 7.506638462577073e-05, "loss": 0.2462, "step": 32697 }, { "epoch": 2.648898250162022, "grad_norm": 0.06469859927892685, "learning_rate": 7.506188397317612e-05, "loss": 0.238, "step": 32698 }, { "epoch": 2.6489792611795204, "grad_norm": 0.09067538380622864, "learning_rate": 7.505738332058149e-05, "loss": 0.2341, "step": 32699 }, { "epoch": 2.6490602721970187, "grad_norm": 0.062475938349962234, "learning_rate": 7.505288266798685e-05, "loss": 0.2527, "step": 32700 }, { "epoch": 2.649141283214517, "grad_norm": 0.08265208452939987, "learning_rate": 7.504838201539224e-05, "loss": 0.2583, "step": 32701 }, { "epoch": 2.6492222942320156, "grad_norm": 0.0584041066467762, "learning_rate": 7.504388136279761e-05, "loss": 0.2062, "step": 32702 }, { "epoch": 2.649303305249514, "grad_norm": 0.06874685734510422, "learning_rate": 7.503938071020297e-05, "loss": 0.3045, "step": 32703 }, { "epoch": 2.649384316267012, "grad_norm": 0.08429435640573502, "learning_rate": 7.503488005760836e-05, "loss": 0.246, "step": 32704 }, { "epoch": 2.649465327284511, "grad_norm": 0.0826609656214714, "learning_rate": 7.503037940501373e-05, "loss": 0.2725, "step": 32705 }, { "epoch": 2.649546338302009, "grad_norm": 0.06336696445941925, "learning_rate": 7.50258787524191e-05, "loss": 0.2564, "step": 32706 }, { "epoch": 2.6496273493195073, "grad_norm": 0.06022218242287636, "learning_rate": 7.502137809982448e-05, "loss": 0.2375, "step": 32707 }, { "epoch": 2.649708360337006, "grad_norm": 0.07887089252471924, "learning_rate": 7.501687744722985e-05, "loss": 0.2879, "step": 32708 }, { "epoch": 2.649789371354504, "grad_norm": 0.06073421984910965, "learning_rate": 7.501237679463523e-05, "loss": 0.254, "step": 32709 }, { "epoch": 2.6498703823720025, "grad_norm": 0.06496462225914001, "learning_rate": 7.50078761420406e-05, "loss": 0.2568, "step": 32710 }, { "epoch": 2.649951393389501, "grad_norm": 0.060677289962768555, "learning_rate": 7.500337548944597e-05, "loss": 0.2228, "step": 32711 }, { "epoch": 2.6500324044069994, "grad_norm": 0.06383208930492401, "learning_rate": 7.499887483685135e-05, "loss": 0.2338, "step": 32712 }, { "epoch": 2.6501134154244976, "grad_norm": 0.059448208659887314, "learning_rate": 7.499437418425672e-05, "loss": 0.2227, "step": 32713 }, { "epoch": 2.6501944264419963, "grad_norm": 0.06204259768128395, "learning_rate": 7.49898735316621e-05, "loss": 0.2526, "step": 32714 }, { "epoch": 2.6502754374594946, "grad_norm": 0.051161028444767, "learning_rate": 7.498537287906747e-05, "loss": 0.2497, "step": 32715 }, { "epoch": 2.650356448476993, "grad_norm": 0.07101891189813614, "learning_rate": 7.498087222647284e-05, "loss": 0.2082, "step": 32716 }, { "epoch": 2.6504374594944915, "grad_norm": 0.05326693132519722, "learning_rate": 7.497637157387822e-05, "loss": 0.219, "step": 32717 }, { "epoch": 2.6505184705119897, "grad_norm": 0.06772123277187347, "learning_rate": 7.497187092128359e-05, "loss": 0.263, "step": 32718 }, { "epoch": 2.650599481529488, "grad_norm": 0.06042374670505524, "learning_rate": 7.496737026868896e-05, "loss": 0.2462, "step": 32719 }, { "epoch": 2.6506804925469862, "grad_norm": 0.06658099591732025, "learning_rate": 7.496286961609434e-05, "loss": 0.2341, "step": 32720 }, { "epoch": 2.650761503564485, "grad_norm": 0.0610867403447628, "learning_rate": 7.495836896349971e-05, "loss": 0.2434, "step": 32721 }, { "epoch": 2.650842514581983, "grad_norm": 0.06181452050805092, "learning_rate": 7.495386831090508e-05, "loss": 0.2349, "step": 32722 }, { "epoch": 2.6509235255994814, "grad_norm": 0.06832627952098846, "learning_rate": 7.494936765831046e-05, "loss": 0.2463, "step": 32723 }, { "epoch": 2.6510045366169797, "grad_norm": 0.06687076389789581, "learning_rate": 7.494486700571583e-05, "loss": 0.245, "step": 32724 }, { "epoch": 2.6510855476344783, "grad_norm": 0.073924221098423, "learning_rate": 7.49403663531212e-05, "loss": 0.2394, "step": 32725 }, { "epoch": 2.6511665586519766, "grad_norm": 0.06764573603868484, "learning_rate": 7.493586570052658e-05, "loss": 0.268, "step": 32726 }, { "epoch": 2.651247569669475, "grad_norm": 0.056262798607349396, "learning_rate": 7.493136504793195e-05, "loss": 0.1928, "step": 32727 }, { "epoch": 2.6513285806869735, "grad_norm": 0.08186808228492737, "learning_rate": 7.492686439533732e-05, "loss": 0.2306, "step": 32728 }, { "epoch": 2.6514095917044718, "grad_norm": 0.06322402507066727, "learning_rate": 7.49223637427427e-05, "loss": 0.1993, "step": 32729 }, { "epoch": 2.65149060272197, "grad_norm": 0.06521429866552353, "learning_rate": 7.491786309014807e-05, "loss": 0.2408, "step": 32730 }, { "epoch": 2.6515716137394687, "grad_norm": 0.06778412312269211, "learning_rate": 7.491336243755345e-05, "loss": 0.2515, "step": 32731 }, { "epoch": 2.651652624756967, "grad_norm": 0.05836867913603783, "learning_rate": 7.490886178495883e-05, "loss": 0.2386, "step": 32732 }, { "epoch": 2.651733635774465, "grad_norm": 0.07563242316246033, "learning_rate": 7.490436113236419e-05, "loss": 0.2285, "step": 32733 }, { "epoch": 2.651814646791964, "grad_norm": 0.07448039948940277, "learning_rate": 7.489986047976957e-05, "loss": 0.2491, "step": 32734 }, { "epoch": 2.651895657809462, "grad_norm": 0.07622935622930527, "learning_rate": 7.489535982717495e-05, "loss": 0.2075, "step": 32735 }, { "epoch": 2.6519766688269604, "grad_norm": 0.07185497879981995, "learning_rate": 7.489085917458031e-05, "loss": 0.2174, "step": 32736 }, { "epoch": 2.652057679844459, "grad_norm": 0.06740206480026245, "learning_rate": 7.488635852198569e-05, "loss": 0.2716, "step": 32737 }, { "epoch": 2.6521386908619573, "grad_norm": 0.07669515907764435, "learning_rate": 7.488185786939107e-05, "loss": 0.2235, "step": 32738 }, { "epoch": 2.6522197018794555, "grad_norm": 0.0658188983798027, "learning_rate": 7.487735721679643e-05, "loss": 0.2752, "step": 32739 }, { "epoch": 2.6523007128969542, "grad_norm": 0.07216110080480576, "learning_rate": 7.487285656420181e-05, "loss": 0.2269, "step": 32740 }, { "epoch": 2.6523817239144525, "grad_norm": 0.059288520365953445, "learning_rate": 7.48683559116072e-05, "loss": 0.2627, "step": 32741 }, { "epoch": 2.6524627349319507, "grad_norm": 0.0678819864988327, "learning_rate": 7.486385525901256e-05, "loss": 0.2596, "step": 32742 }, { "epoch": 2.652543745949449, "grad_norm": 0.062606081366539, "learning_rate": 7.485935460641793e-05, "loss": 0.2459, "step": 32743 }, { "epoch": 2.6526247569669477, "grad_norm": 0.07655350863933563, "learning_rate": 7.485485395382332e-05, "loss": 0.2138, "step": 32744 }, { "epoch": 2.652705767984446, "grad_norm": 0.06734804064035416, "learning_rate": 7.485035330122868e-05, "loss": 0.2406, "step": 32745 }, { "epoch": 2.652786779001944, "grad_norm": 0.07340921461582184, "learning_rate": 7.484585264863405e-05, "loss": 0.2493, "step": 32746 }, { "epoch": 2.6528677900194424, "grad_norm": 0.06226073205471039, "learning_rate": 7.484135199603944e-05, "loss": 0.22, "step": 32747 }, { "epoch": 2.652948801036941, "grad_norm": 0.07011928409337997, "learning_rate": 7.48368513434448e-05, "loss": 0.2151, "step": 32748 }, { "epoch": 2.6530298120544393, "grad_norm": 0.06465835869312286, "learning_rate": 7.483235069085017e-05, "loss": 0.2576, "step": 32749 }, { "epoch": 2.6531108230719376, "grad_norm": 0.05908047780394554, "learning_rate": 7.482785003825556e-05, "loss": 0.2277, "step": 32750 }, { "epoch": 2.6531918340894363, "grad_norm": 0.07969486713409424, "learning_rate": 7.482334938566092e-05, "loss": 0.2495, "step": 32751 }, { "epoch": 2.6532728451069345, "grad_norm": 0.08361916989088058, "learning_rate": 7.481884873306629e-05, "loss": 0.2756, "step": 32752 }, { "epoch": 2.6533538561244328, "grad_norm": 0.06488799303770065, "learning_rate": 7.481434808047168e-05, "loss": 0.2179, "step": 32753 }, { "epoch": 2.6534348671419314, "grad_norm": 0.06097380816936493, "learning_rate": 7.480984742787704e-05, "loss": 0.2414, "step": 32754 }, { "epoch": 2.6535158781594297, "grad_norm": 0.06575482338666916, "learning_rate": 7.480534677528241e-05, "loss": 0.2405, "step": 32755 }, { "epoch": 2.653596889176928, "grad_norm": 0.07554206252098083, "learning_rate": 7.48008461226878e-05, "loss": 0.2275, "step": 32756 }, { "epoch": 2.6536779001944266, "grad_norm": 0.06285907328128815, "learning_rate": 7.479634547009316e-05, "loss": 0.2471, "step": 32757 }, { "epoch": 2.653758911211925, "grad_norm": 0.07046081870794296, "learning_rate": 7.479184481749853e-05, "loss": 0.2722, "step": 32758 }, { "epoch": 2.653839922229423, "grad_norm": 0.060215841978788376, "learning_rate": 7.478734416490392e-05, "loss": 0.255, "step": 32759 }, { "epoch": 2.653920933246922, "grad_norm": 0.07125762850046158, "learning_rate": 7.478284351230928e-05, "loss": 0.2296, "step": 32760 }, { "epoch": 2.65400194426442, "grad_norm": 0.062798410654068, "learning_rate": 7.477834285971467e-05, "loss": 0.2356, "step": 32761 }, { "epoch": 2.6540829552819183, "grad_norm": 0.06494462490081787, "learning_rate": 7.477384220712004e-05, "loss": 0.2589, "step": 32762 }, { "epoch": 2.654163966299417, "grad_norm": 0.05592583492398262, "learning_rate": 7.47693415545254e-05, "loss": 0.2291, "step": 32763 }, { "epoch": 2.654244977316915, "grad_norm": 0.060103777796030045, "learning_rate": 7.476484090193079e-05, "loss": 0.2522, "step": 32764 }, { "epoch": 2.6543259883344135, "grad_norm": 0.05874945595860481, "learning_rate": 7.476034024933616e-05, "loss": 0.2365, "step": 32765 }, { "epoch": 2.6544069993519117, "grad_norm": 0.07520363479852676, "learning_rate": 7.475583959674152e-05, "loss": 0.2695, "step": 32766 }, { "epoch": 2.6544880103694104, "grad_norm": 0.06662902981042862, "learning_rate": 7.475133894414691e-05, "loss": 0.285, "step": 32767 }, { "epoch": 2.6545690213869086, "grad_norm": 0.06352225691080093, "learning_rate": 7.474683829155228e-05, "loss": 0.2239, "step": 32768 }, { "epoch": 2.654650032404407, "grad_norm": 0.061932697892189026, "learning_rate": 7.474233763895764e-05, "loss": 0.2295, "step": 32769 }, { "epoch": 2.654731043421905, "grad_norm": 0.08426105231046677, "learning_rate": 7.473783698636303e-05, "loss": 0.2352, "step": 32770 }, { "epoch": 2.654812054439404, "grad_norm": 0.06787164509296417, "learning_rate": 7.47333363337684e-05, "loss": 0.2387, "step": 32771 }, { "epoch": 2.654893065456902, "grad_norm": 0.07294077426195145, "learning_rate": 7.472883568117378e-05, "loss": 0.2472, "step": 32772 }, { "epoch": 2.6549740764744003, "grad_norm": 0.06752139329910278, "learning_rate": 7.472433502857915e-05, "loss": 0.2441, "step": 32773 }, { "epoch": 2.655055087491899, "grad_norm": 0.07813578844070435, "learning_rate": 7.471983437598452e-05, "loss": 0.2892, "step": 32774 }, { "epoch": 2.6551360985093972, "grad_norm": 0.06288386881351471, "learning_rate": 7.47153337233899e-05, "loss": 0.2494, "step": 32775 }, { "epoch": 2.6552171095268955, "grad_norm": 0.06610371172428131, "learning_rate": 7.471083307079527e-05, "loss": 0.2302, "step": 32776 }, { "epoch": 2.655298120544394, "grad_norm": 0.0636439397931099, "learning_rate": 7.470633241820064e-05, "loss": 0.2547, "step": 32777 }, { "epoch": 2.6553791315618924, "grad_norm": 0.05692015960812569, "learning_rate": 7.470183176560602e-05, "loss": 0.2121, "step": 32778 }, { "epoch": 2.6554601425793907, "grad_norm": 0.06695279479026794, "learning_rate": 7.469733111301139e-05, "loss": 0.2443, "step": 32779 }, { "epoch": 2.6555411535968894, "grad_norm": 0.05734477564692497, "learning_rate": 7.469283046041677e-05, "loss": 0.2494, "step": 32780 }, { "epoch": 2.6556221646143876, "grad_norm": 0.07733457535505295, "learning_rate": 7.468832980782214e-05, "loss": 0.2526, "step": 32781 }, { "epoch": 2.655703175631886, "grad_norm": 0.07286423444747925, "learning_rate": 7.468382915522751e-05, "loss": 0.2524, "step": 32782 }, { "epoch": 2.6557841866493845, "grad_norm": 0.05612710863351822, "learning_rate": 7.467932850263289e-05, "loss": 0.2019, "step": 32783 }, { "epoch": 2.655865197666883, "grad_norm": 0.06247418001294136, "learning_rate": 7.467482785003826e-05, "loss": 0.2558, "step": 32784 }, { "epoch": 2.655946208684381, "grad_norm": 0.06592721492052078, "learning_rate": 7.467032719744363e-05, "loss": 0.2253, "step": 32785 }, { "epoch": 2.6560272197018797, "grad_norm": 0.0667741596698761, "learning_rate": 7.466582654484901e-05, "loss": 0.2555, "step": 32786 }, { "epoch": 2.656108230719378, "grad_norm": 0.09115524590015411, "learning_rate": 7.466132589225438e-05, "loss": 0.3118, "step": 32787 }, { "epoch": 2.656189241736876, "grad_norm": 0.06214887276291847, "learning_rate": 7.465682523965975e-05, "loss": 0.2552, "step": 32788 }, { "epoch": 2.6562702527543745, "grad_norm": 0.07011143118143082, "learning_rate": 7.465232458706513e-05, "loss": 0.2423, "step": 32789 }, { "epoch": 2.656351263771873, "grad_norm": 0.0744331106543541, "learning_rate": 7.46478239344705e-05, "loss": 0.2576, "step": 32790 }, { "epoch": 2.6564322747893714, "grad_norm": 0.07393400371074677, "learning_rate": 7.464332328187588e-05, "loss": 0.2289, "step": 32791 }, { "epoch": 2.6565132858068696, "grad_norm": 0.06812934577465057, "learning_rate": 7.463882262928125e-05, "loss": 0.2269, "step": 32792 }, { "epoch": 2.656594296824368, "grad_norm": 0.06811689585447311, "learning_rate": 7.463432197668662e-05, "loss": 0.2322, "step": 32793 }, { "epoch": 2.6566753078418666, "grad_norm": 0.06463243812322617, "learning_rate": 7.4629821324092e-05, "loss": 0.244, "step": 32794 }, { "epoch": 2.656756318859365, "grad_norm": 0.06714651733636856, "learning_rate": 7.462532067149737e-05, "loss": 0.2522, "step": 32795 }, { "epoch": 2.656837329876863, "grad_norm": 0.07441917061805725, "learning_rate": 7.462082001890274e-05, "loss": 0.2573, "step": 32796 }, { "epoch": 2.6569183408943617, "grad_norm": 0.04962689429521561, "learning_rate": 7.461631936630812e-05, "loss": 0.2251, "step": 32797 }, { "epoch": 2.65699935191186, "grad_norm": 0.0671190395951271, "learning_rate": 7.461181871371349e-05, "loss": 0.2388, "step": 32798 }, { "epoch": 2.6570803629293582, "grad_norm": 0.05777635797858238, "learning_rate": 7.460731806111886e-05, "loss": 0.2143, "step": 32799 }, { "epoch": 2.657161373946857, "grad_norm": 0.07067793607711792, "learning_rate": 7.460281740852424e-05, "loss": 0.26, "step": 32800 }, { "epoch": 2.657242384964355, "grad_norm": 0.080874502658844, "learning_rate": 7.459831675592961e-05, "loss": 0.2637, "step": 32801 }, { "epoch": 2.6573233959818534, "grad_norm": 0.05750936269760132, "learning_rate": 7.459381610333498e-05, "loss": 0.2358, "step": 32802 }, { "epoch": 2.657404406999352, "grad_norm": 0.07016448676586151, "learning_rate": 7.458931545074036e-05, "loss": 0.2365, "step": 32803 }, { "epoch": 2.6574854180168503, "grad_norm": 0.05285506322979927, "learning_rate": 7.458481479814573e-05, "loss": 0.2466, "step": 32804 }, { "epoch": 2.6575664290343486, "grad_norm": 0.06905540823936462, "learning_rate": 7.45803141455511e-05, "loss": 0.2385, "step": 32805 }, { "epoch": 2.6576474400518473, "grad_norm": 0.07303239405155182, "learning_rate": 7.457581349295648e-05, "loss": 0.254, "step": 32806 }, { "epoch": 2.6577284510693455, "grad_norm": 0.07117251306772232, "learning_rate": 7.457131284036185e-05, "loss": 0.2309, "step": 32807 }, { "epoch": 2.6578094620868438, "grad_norm": 0.07382563501596451, "learning_rate": 7.456681218776723e-05, "loss": 0.2534, "step": 32808 }, { "epoch": 2.6578904731043425, "grad_norm": 0.0767350047826767, "learning_rate": 7.45623115351726e-05, "loss": 0.2698, "step": 32809 }, { "epoch": 2.6579714841218407, "grad_norm": 0.06391940265893936, "learning_rate": 7.455781088257797e-05, "loss": 0.2923, "step": 32810 }, { "epoch": 2.658052495139339, "grad_norm": 0.07749442756175995, "learning_rate": 7.455331022998335e-05, "loss": 0.2898, "step": 32811 }, { "epoch": 2.658133506156837, "grad_norm": 0.06880564242601395, "learning_rate": 7.454880957738872e-05, "loss": 0.2209, "step": 32812 }, { "epoch": 2.6582145171743354, "grad_norm": 0.06603529304265976, "learning_rate": 7.454430892479411e-05, "loss": 0.2457, "step": 32813 }, { "epoch": 2.658295528191834, "grad_norm": 0.07407406717538834, "learning_rate": 7.453980827219947e-05, "loss": 0.2332, "step": 32814 }, { "epoch": 2.6583765392093324, "grad_norm": 0.059012070298194885, "learning_rate": 7.453530761960484e-05, "loss": 0.2146, "step": 32815 }, { "epoch": 2.6584575502268306, "grad_norm": 0.06564911454916, "learning_rate": 7.453080696701023e-05, "loss": 0.2453, "step": 32816 }, { "epoch": 2.6585385612443293, "grad_norm": 0.06099837273359299, "learning_rate": 7.452630631441559e-05, "loss": 0.2525, "step": 32817 }, { "epoch": 2.6586195722618275, "grad_norm": 0.06015372276306152, "learning_rate": 7.452180566182096e-05, "loss": 0.2526, "step": 32818 }, { "epoch": 2.658700583279326, "grad_norm": 0.07565578818321228, "learning_rate": 7.451730500922635e-05, "loss": 0.2416, "step": 32819 }, { "epoch": 2.6587815942968245, "grad_norm": 0.07236052304506302, "learning_rate": 7.451280435663171e-05, "loss": 0.2683, "step": 32820 }, { "epoch": 2.6588626053143227, "grad_norm": 0.08145192265510559, "learning_rate": 7.450830370403708e-05, "loss": 0.2372, "step": 32821 }, { "epoch": 2.658943616331821, "grad_norm": 0.0653989315032959, "learning_rate": 7.450380305144247e-05, "loss": 0.2019, "step": 32822 }, { "epoch": 2.6590246273493197, "grad_norm": 0.08094444870948792, "learning_rate": 7.449930239884783e-05, "loss": 0.2539, "step": 32823 }, { "epoch": 2.659105638366818, "grad_norm": 0.06678465008735657, "learning_rate": 7.44948017462532e-05, "loss": 0.2444, "step": 32824 }, { "epoch": 2.659186649384316, "grad_norm": 0.0730195865035057, "learning_rate": 7.449030109365859e-05, "loss": 0.2558, "step": 32825 }, { "epoch": 2.659267660401815, "grad_norm": 0.06824833154678345, "learning_rate": 7.448580044106395e-05, "loss": 0.2344, "step": 32826 }, { "epoch": 2.659348671419313, "grad_norm": 0.054951079189777374, "learning_rate": 7.448129978846932e-05, "loss": 0.2197, "step": 32827 }, { "epoch": 2.6594296824368113, "grad_norm": 0.06483834981918335, "learning_rate": 7.447679913587471e-05, "loss": 0.2326, "step": 32828 }, { "epoch": 2.65951069345431, "grad_norm": 0.08794785290956497, "learning_rate": 7.447229848328007e-05, "loss": 0.2344, "step": 32829 }, { "epoch": 2.6595917044718083, "grad_norm": 0.07904116064310074, "learning_rate": 7.446779783068545e-05, "loss": 0.293, "step": 32830 }, { "epoch": 2.6596727154893065, "grad_norm": 0.07375107705593109, "learning_rate": 7.446329717809083e-05, "loss": 0.2413, "step": 32831 }, { "epoch": 2.659753726506805, "grad_norm": 0.07543234527111053, "learning_rate": 7.445879652549619e-05, "loss": 0.2486, "step": 32832 }, { "epoch": 2.6598347375243034, "grad_norm": 0.0675840899348259, "learning_rate": 7.445429587290157e-05, "loss": 0.2402, "step": 32833 }, { "epoch": 2.6599157485418017, "grad_norm": 0.07507772743701935, "learning_rate": 7.444979522030695e-05, "loss": 0.2293, "step": 32834 }, { "epoch": 2.6599967595593, "grad_norm": 0.07033761590719223, "learning_rate": 7.444529456771231e-05, "loss": 0.2751, "step": 32835 }, { "epoch": 2.660077770576798, "grad_norm": 0.07205361127853394, "learning_rate": 7.444079391511769e-05, "loss": 0.268, "step": 32836 }, { "epoch": 2.660158781594297, "grad_norm": 0.06997992098331451, "learning_rate": 7.443629326252307e-05, "loss": 0.2482, "step": 32837 }, { "epoch": 2.660239792611795, "grad_norm": 0.07074286788702011, "learning_rate": 7.443179260992843e-05, "loss": 0.2292, "step": 32838 }, { "epoch": 2.6603208036292934, "grad_norm": 0.06832756102085114, "learning_rate": 7.442729195733382e-05, "loss": 0.2708, "step": 32839 }, { "epoch": 2.660401814646792, "grad_norm": 0.06949548423290253, "learning_rate": 7.44227913047392e-05, "loss": 0.2357, "step": 32840 }, { "epoch": 2.6604828256642903, "grad_norm": 0.06634163856506348, "learning_rate": 7.441829065214457e-05, "loss": 0.2334, "step": 32841 }, { "epoch": 2.6605638366817885, "grad_norm": 0.07854374498128891, "learning_rate": 7.441378999954994e-05, "loss": 0.2625, "step": 32842 }, { "epoch": 2.660644847699287, "grad_norm": 0.073130302131176, "learning_rate": 7.440928934695532e-05, "loss": 0.2658, "step": 32843 }, { "epoch": 2.6607258587167855, "grad_norm": 0.06455226242542267, "learning_rate": 7.440478869436069e-05, "loss": 0.2409, "step": 32844 }, { "epoch": 2.6608068697342837, "grad_norm": 0.06555270403623581, "learning_rate": 7.440028804176606e-05, "loss": 0.2591, "step": 32845 }, { "epoch": 2.6608878807517824, "grad_norm": 0.06920315325260162, "learning_rate": 7.439578738917144e-05, "loss": 0.2523, "step": 32846 }, { "epoch": 2.6609688917692806, "grad_norm": 0.07340586930513382, "learning_rate": 7.439128673657681e-05, "loss": 0.2112, "step": 32847 }, { "epoch": 2.661049902786779, "grad_norm": 0.06806132942438126, "learning_rate": 7.438678608398218e-05, "loss": 0.2399, "step": 32848 }, { "epoch": 2.6611309138042776, "grad_norm": 0.0959523618221283, "learning_rate": 7.438228543138756e-05, "loss": 0.21, "step": 32849 }, { "epoch": 2.661211924821776, "grad_norm": 0.07259013503789902, "learning_rate": 7.437778477879293e-05, "loss": 0.2479, "step": 32850 }, { "epoch": 2.661292935839274, "grad_norm": 0.07072082161903381, "learning_rate": 7.43732841261983e-05, "loss": 0.2315, "step": 32851 }, { "epoch": 2.6613739468567728, "grad_norm": 0.06280186027288437, "learning_rate": 7.436878347360368e-05, "loss": 0.2325, "step": 32852 }, { "epoch": 2.661454957874271, "grad_norm": 0.08135109394788742, "learning_rate": 7.436428282100905e-05, "loss": 0.2748, "step": 32853 }, { "epoch": 2.6615359688917692, "grad_norm": 0.06902328133583069, "learning_rate": 7.435978216841443e-05, "loss": 0.2285, "step": 32854 }, { "epoch": 2.661616979909268, "grad_norm": 0.058770183473825455, "learning_rate": 7.43552815158198e-05, "loss": 0.2423, "step": 32855 }, { "epoch": 2.661697990926766, "grad_norm": 0.0691264271736145, "learning_rate": 7.435078086322517e-05, "loss": 0.2207, "step": 32856 }, { "epoch": 2.6617790019442644, "grad_norm": 0.0778164342045784, "learning_rate": 7.434628021063055e-05, "loss": 0.2412, "step": 32857 }, { "epoch": 2.6618600129617627, "grad_norm": 0.08259502053260803, "learning_rate": 7.434177955803592e-05, "loss": 0.2691, "step": 32858 }, { "epoch": 2.661941023979261, "grad_norm": 0.06187589466571808, "learning_rate": 7.43372789054413e-05, "loss": 0.2248, "step": 32859 }, { "epoch": 2.6620220349967596, "grad_norm": 0.0695401206612587, "learning_rate": 7.433277825284667e-05, "loss": 0.2364, "step": 32860 }, { "epoch": 2.662103046014258, "grad_norm": 0.08041361719369888, "learning_rate": 7.432827760025204e-05, "loss": 0.2444, "step": 32861 }, { "epoch": 2.662184057031756, "grad_norm": 0.058977339416742325, "learning_rate": 7.432377694765741e-05, "loss": 0.2423, "step": 32862 }, { "epoch": 2.662265068049255, "grad_norm": 0.05708383023738861, "learning_rate": 7.431927629506279e-05, "loss": 0.2171, "step": 32863 }, { "epoch": 2.662346079066753, "grad_norm": 0.06705954670906067, "learning_rate": 7.431477564246816e-05, "loss": 0.2145, "step": 32864 }, { "epoch": 2.6624270900842513, "grad_norm": 0.06451255828142166, "learning_rate": 7.431027498987354e-05, "loss": 0.2152, "step": 32865 }, { "epoch": 2.66250810110175, "grad_norm": 0.06673882901668549, "learning_rate": 7.430577433727891e-05, "loss": 0.254, "step": 32866 }, { "epoch": 2.662589112119248, "grad_norm": 0.06643757969141006, "learning_rate": 7.430127368468428e-05, "loss": 0.2403, "step": 32867 }, { "epoch": 2.6626701231367464, "grad_norm": 0.06720244139432907, "learning_rate": 7.429677303208966e-05, "loss": 0.2334, "step": 32868 }, { "epoch": 2.662751134154245, "grad_norm": 0.0601067841053009, "learning_rate": 7.429227237949503e-05, "loss": 0.2168, "step": 32869 }, { "epoch": 2.6628321451717434, "grad_norm": 0.0732504352927208, "learning_rate": 7.42877717269004e-05, "loss": 0.2565, "step": 32870 }, { "epoch": 2.6629131561892416, "grad_norm": 0.06915004551410675, "learning_rate": 7.428327107430578e-05, "loss": 0.2443, "step": 32871 }, { "epoch": 2.6629941672067403, "grad_norm": 0.06645822525024414, "learning_rate": 7.427877042171115e-05, "loss": 0.2207, "step": 32872 }, { "epoch": 2.6630751782242386, "grad_norm": 0.06649801880121231, "learning_rate": 7.427426976911652e-05, "loss": 0.2651, "step": 32873 }, { "epoch": 2.663156189241737, "grad_norm": 0.06191537529230118, "learning_rate": 7.42697691165219e-05, "loss": 0.2389, "step": 32874 }, { "epoch": 2.6632372002592355, "grad_norm": 0.07433738559484482, "learning_rate": 7.426526846392727e-05, "loss": 0.2857, "step": 32875 }, { "epoch": 2.6633182112767337, "grad_norm": 0.06460598856210709, "learning_rate": 7.426076781133265e-05, "loss": 0.2842, "step": 32876 }, { "epoch": 2.663399222294232, "grad_norm": 0.06493370234966278, "learning_rate": 7.425626715873802e-05, "loss": 0.2334, "step": 32877 }, { "epoch": 2.6634802333117307, "grad_norm": 0.0691828578710556, "learning_rate": 7.425176650614339e-05, "loss": 0.2099, "step": 32878 }, { "epoch": 2.663561244329229, "grad_norm": 0.06899280846118927, "learning_rate": 7.424726585354877e-05, "loss": 0.2307, "step": 32879 }, { "epoch": 2.663642255346727, "grad_norm": 0.06932519376277924, "learning_rate": 7.424276520095414e-05, "loss": 0.2354, "step": 32880 }, { "epoch": 2.6637232663642254, "grad_norm": 0.06170056387782097, "learning_rate": 7.423826454835951e-05, "loss": 0.2123, "step": 32881 }, { "epoch": 2.6638042773817237, "grad_norm": 0.06595876812934875, "learning_rate": 7.423376389576489e-05, "loss": 0.2404, "step": 32882 }, { "epoch": 2.6638852883992223, "grad_norm": 0.07378228008747101, "learning_rate": 7.422926324317026e-05, "loss": 0.2741, "step": 32883 }, { "epoch": 2.6639662994167206, "grad_norm": 0.06303569674491882, "learning_rate": 7.422476259057563e-05, "loss": 0.2634, "step": 32884 }, { "epoch": 2.664047310434219, "grad_norm": 0.07362163811922073, "learning_rate": 7.422026193798101e-05, "loss": 0.2168, "step": 32885 }, { "epoch": 2.6641283214517175, "grad_norm": 0.05958087369799614, "learning_rate": 7.421576128538638e-05, "loss": 0.2555, "step": 32886 }, { "epoch": 2.6642093324692158, "grad_norm": 0.09635303169488907, "learning_rate": 7.421126063279175e-05, "loss": 0.221, "step": 32887 }, { "epoch": 2.664290343486714, "grad_norm": 0.07042831927537918, "learning_rate": 7.420675998019713e-05, "loss": 0.2282, "step": 32888 }, { "epoch": 2.6643713545042127, "grad_norm": 0.061355553567409515, "learning_rate": 7.42022593276025e-05, "loss": 0.2289, "step": 32889 }, { "epoch": 2.664452365521711, "grad_norm": 0.08902187645435333, "learning_rate": 7.419775867500788e-05, "loss": 0.2903, "step": 32890 }, { "epoch": 2.664533376539209, "grad_norm": 0.07006392627954483, "learning_rate": 7.419325802241326e-05, "loss": 0.2058, "step": 32891 }, { "epoch": 2.664614387556708, "grad_norm": 0.07656487077474594, "learning_rate": 7.418875736981862e-05, "loss": 0.2665, "step": 32892 }, { "epoch": 2.664695398574206, "grad_norm": 0.07237304747104645, "learning_rate": 7.4184256717224e-05, "loss": 0.2195, "step": 32893 }, { "epoch": 2.6647764095917044, "grad_norm": 0.07717087119817734, "learning_rate": 7.417975606462938e-05, "loss": 0.2536, "step": 32894 }, { "epoch": 2.664857420609203, "grad_norm": 0.07785272598266602, "learning_rate": 7.417525541203474e-05, "loss": 0.2629, "step": 32895 }, { "epoch": 2.6649384316267013, "grad_norm": 0.0654202550649643, "learning_rate": 7.417075475944012e-05, "loss": 0.2482, "step": 32896 }, { "epoch": 2.6650194426441995, "grad_norm": 0.08050129562616348, "learning_rate": 7.41662541068455e-05, "loss": 0.2913, "step": 32897 }, { "epoch": 2.6651004536616982, "grad_norm": 0.06768804043531418, "learning_rate": 7.416175345425086e-05, "loss": 0.2443, "step": 32898 }, { "epoch": 2.6651814646791965, "grad_norm": 0.0724261924624443, "learning_rate": 7.415725280165624e-05, "loss": 0.2556, "step": 32899 }, { "epoch": 2.6652624756966947, "grad_norm": 0.054597921669483185, "learning_rate": 7.415275214906163e-05, "loss": 0.2127, "step": 32900 }, { "epoch": 2.665343486714193, "grad_norm": 0.08325103670358658, "learning_rate": 7.414825149646699e-05, "loss": 0.2412, "step": 32901 }, { "epoch": 2.6654244977316917, "grad_norm": 0.06817788630723953, "learning_rate": 7.414375084387236e-05, "loss": 0.2504, "step": 32902 }, { "epoch": 2.66550550874919, "grad_norm": 0.07943432778120041, "learning_rate": 7.413925019127775e-05, "loss": 0.2358, "step": 32903 }, { "epoch": 2.665586519766688, "grad_norm": 0.0712936744093895, "learning_rate": 7.41347495386831e-05, "loss": 0.2562, "step": 32904 }, { "epoch": 2.6656675307841864, "grad_norm": 0.06890852749347687, "learning_rate": 7.413024888608848e-05, "loss": 0.2333, "step": 32905 }, { "epoch": 2.665748541801685, "grad_norm": 0.08567355573177338, "learning_rate": 7.412574823349387e-05, "loss": 0.2737, "step": 32906 }, { "epoch": 2.6658295528191833, "grad_norm": 0.06952392309904099, "learning_rate": 7.412124758089923e-05, "loss": 0.2425, "step": 32907 }, { "epoch": 2.6659105638366816, "grad_norm": 0.0863032266497612, "learning_rate": 7.41167469283046e-05, "loss": 0.2555, "step": 32908 }, { "epoch": 2.6659915748541803, "grad_norm": 0.0648011565208435, "learning_rate": 7.411224627570999e-05, "loss": 0.2101, "step": 32909 }, { "epoch": 2.6660725858716785, "grad_norm": 0.06821193546056747, "learning_rate": 7.410774562311536e-05, "loss": 0.2466, "step": 32910 }, { "epoch": 2.6661535968891767, "grad_norm": 0.05463450029492378, "learning_rate": 7.410324497052072e-05, "loss": 0.225, "step": 32911 }, { "epoch": 2.6662346079066754, "grad_norm": 0.06467321515083313, "learning_rate": 7.409874431792611e-05, "loss": 0.2331, "step": 32912 }, { "epoch": 2.6663156189241737, "grad_norm": 0.06272049993276596, "learning_rate": 7.409424366533148e-05, "loss": 0.239, "step": 32913 }, { "epoch": 2.666396629941672, "grad_norm": 0.07468179613351822, "learning_rate": 7.408974301273684e-05, "loss": 0.2639, "step": 32914 }, { "epoch": 2.6664776409591706, "grad_norm": 0.07041250169277191, "learning_rate": 7.408524236014223e-05, "loss": 0.2703, "step": 32915 }, { "epoch": 2.666558651976669, "grad_norm": 0.07356708496809006, "learning_rate": 7.40807417075476e-05, "loss": 0.2616, "step": 32916 }, { "epoch": 2.666639662994167, "grad_norm": 0.06438080221414566, "learning_rate": 7.407624105495298e-05, "loss": 0.2226, "step": 32917 }, { "epoch": 2.666720674011666, "grad_norm": 0.07412604242563248, "learning_rate": 7.407174040235835e-05, "loss": 0.2895, "step": 32918 }, { "epoch": 2.666801685029164, "grad_norm": 0.07654453068971634, "learning_rate": 7.406723974976372e-05, "loss": 0.2243, "step": 32919 }, { "epoch": 2.6668826960466623, "grad_norm": 0.059283047914505005, "learning_rate": 7.40627390971691e-05, "loss": 0.2298, "step": 32920 }, { "epoch": 2.666963707064161, "grad_norm": 0.06338262557983398, "learning_rate": 7.405823844457447e-05, "loss": 0.2419, "step": 32921 }, { "epoch": 2.667044718081659, "grad_norm": 0.0706416442990303, "learning_rate": 7.405373779197984e-05, "loss": 0.2299, "step": 32922 }, { "epoch": 2.6671257290991575, "grad_norm": 0.06983651220798492, "learning_rate": 7.404923713938522e-05, "loss": 0.246, "step": 32923 }, { "epoch": 2.6672067401166557, "grad_norm": 0.06590235978364944, "learning_rate": 7.404473648679059e-05, "loss": 0.2675, "step": 32924 }, { "epoch": 2.6672877511341544, "grad_norm": 0.06504794955253601, "learning_rate": 7.404023583419597e-05, "loss": 0.247, "step": 32925 }, { "epoch": 2.6673687621516526, "grad_norm": 0.07013759016990662, "learning_rate": 7.403573518160134e-05, "loss": 0.2233, "step": 32926 }, { "epoch": 2.667449773169151, "grad_norm": 0.07694179564714432, "learning_rate": 7.403123452900671e-05, "loss": 0.2745, "step": 32927 }, { "epoch": 2.667530784186649, "grad_norm": 0.0731961578130722, "learning_rate": 7.402673387641209e-05, "loss": 0.2511, "step": 32928 }, { "epoch": 2.667611795204148, "grad_norm": 0.06496523320674896, "learning_rate": 7.402223322381746e-05, "loss": 0.256, "step": 32929 }, { "epoch": 2.667692806221646, "grad_norm": 0.05998383089900017, "learning_rate": 7.401773257122283e-05, "loss": 0.2482, "step": 32930 }, { "epoch": 2.6677738172391443, "grad_norm": 0.0756494551897049, "learning_rate": 7.401323191862821e-05, "loss": 0.2843, "step": 32931 }, { "epoch": 2.667854828256643, "grad_norm": 0.06741814315319061, "learning_rate": 7.400873126603358e-05, "loss": 0.265, "step": 32932 }, { "epoch": 2.6679358392741412, "grad_norm": 0.07142461091279984, "learning_rate": 7.400423061343895e-05, "loss": 0.242, "step": 32933 }, { "epoch": 2.6680168502916395, "grad_norm": 0.05582594871520996, "learning_rate": 7.399972996084433e-05, "loss": 0.2296, "step": 32934 }, { "epoch": 2.668097861309138, "grad_norm": 0.06327968835830688, "learning_rate": 7.39952293082497e-05, "loss": 0.272, "step": 32935 }, { "epoch": 2.6681788723266364, "grad_norm": 0.07664207369089127, "learning_rate": 7.399072865565507e-05, "loss": 0.2518, "step": 32936 }, { "epoch": 2.6682598833441347, "grad_norm": 0.06662250310182571, "learning_rate": 7.398622800306045e-05, "loss": 0.2276, "step": 32937 }, { "epoch": 2.6683408943616334, "grad_norm": 0.06861812621355057, "learning_rate": 7.398172735046582e-05, "loss": 0.2379, "step": 32938 }, { "epoch": 2.6684219053791316, "grad_norm": 0.06940872967243195, "learning_rate": 7.39772266978712e-05, "loss": 0.2181, "step": 32939 }, { "epoch": 2.66850291639663, "grad_norm": 0.05719943717122078, "learning_rate": 7.397272604527657e-05, "loss": 0.2575, "step": 32940 }, { "epoch": 2.6685839274141285, "grad_norm": 0.0687544196844101, "learning_rate": 7.396822539268194e-05, "loss": 0.2167, "step": 32941 }, { "epoch": 2.6686649384316268, "grad_norm": 0.05461694300174713, "learning_rate": 7.396372474008732e-05, "loss": 0.2191, "step": 32942 }, { "epoch": 2.668745949449125, "grad_norm": 0.09195633977651596, "learning_rate": 7.395922408749269e-05, "loss": 0.2238, "step": 32943 }, { "epoch": 2.6688269604666237, "grad_norm": 0.07397960871458054, "learning_rate": 7.395472343489806e-05, "loss": 0.2789, "step": 32944 }, { "epoch": 2.668907971484122, "grad_norm": 0.06718258559703827, "learning_rate": 7.395022278230344e-05, "loss": 0.2049, "step": 32945 }, { "epoch": 2.66898898250162, "grad_norm": 0.06812811642885208, "learning_rate": 7.394572212970881e-05, "loss": 0.2347, "step": 32946 }, { "epoch": 2.6690699935191184, "grad_norm": 0.06567735970020294, "learning_rate": 7.394122147711418e-05, "loss": 0.2389, "step": 32947 }, { "epoch": 2.669151004536617, "grad_norm": 0.07259346544742584, "learning_rate": 7.393672082451956e-05, "loss": 0.2341, "step": 32948 }, { "epoch": 2.6692320155541154, "grad_norm": 0.057004157453775406, "learning_rate": 7.393222017192493e-05, "loss": 0.2235, "step": 32949 }, { "epoch": 2.6693130265716136, "grad_norm": 0.07738929241895676, "learning_rate": 7.39277195193303e-05, "loss": 0.2077, "step": 32950 }, { "epoch": 2.669394037589112, "grad_norm": 0.08664657175540924, "learning_rate": 7.392321886673568e-05, "loss": 0.2846, "step": 32951 }, { "epoch": 2.6694750486066106, "grad_norm": 0.08418330550193787, "learning_rate": 7.391871821414105e-05, "loss": 0.2987, "step": 32952 }, { "epoch": 2.669556059624109, "grad_norm": 0.08851824700832367, "learning_rate": 7.391421756154643e-05, "loss": 0.2564, "step": 32953 }, { "epoch": 2.669637070641607, "grad_norm": 0.07711591571569443, "learning_rate": 7.39097169089518e-05, "loss": 0.2528, "step": 32954 }, { "epoch": 2.6697180816591057, "grad_norm": 0.061196353286504745, "learning_rate": 7.390521625635717e-05, "loss": 0.2271, "step": 32955 }, { "epoch": 2.669799092676604, "grad_norm": 0.07016127556562424, "learning_rate": 7.390071560376255e-05, "loss": 0.238, "step": 32956 }, { "epoch": 2.6698801036941022, "grad_norm": 0.075688436627388, "learning_rate": 7.389621495116792e-05, "loss": 0.2279, "step": 32957 }, { "epoch": 2.669961114711601, "grad_norm": 0.06808993220329285, "learning_rate": 7.38917142985733e-05, "loss": 0.2429, "step": 32958 }, { "epoch": 2.670042125729099, "grad_norm": 0.07332292199134827, "learning_rate": 7.388721364597867e-05, "loss": 0.2337, "step": 32959 }, { "epoch": 2.6701231367465974, "grad_norm": 0.058614619076251984, "learning_rate": 7.388271299338404e-05, "loss": 0.2578, "step": 32960 }, { "epoch": 2.670204147764096, "grad_norm": 0.05495653301477432, "learning_rate": 7.387821234078941e-05, "loss": 0.2715, "step": 32961 }, { "epoch": 2.6702851587815943, "grad_norm": 0.08176012337207794, "learning_rate": 7.387371168819479e-05, "loss": 0.2445, "step": 32962 }, { "epoch": 2.6703661697990926, "grad_norm": 0.060160085558891296, "learning_rate": 7.386921103560016e-05, "loss": 0.2281, "step": 32963 }, { "epoch": 2.6704471808165913, "grad_norm": 0.09189620614051819, "learning_rate": 7.386471038300554e-05, "loss": 0.2297, "step": 32964 }, { "epoch": 2.6705281918340895, "grad_norm": 0.060057610273361206, "learning_rate": 7.386020973041091e-05, "loss": 0.2108, "step": 32965 }, { "epoch": 2.6706092028515878, "grad_norm": 0.0785459652543068, "learning_rate": 7.385570907781628e-05, "loss": 0.2736, "step": 32966 }, { "epoch": 2.6706902138690864, "grad_norm": 0.07110899686813354, "learning_rate": 7.385120842522166e-05, "loss": 0.272, "step": 32967 }, { "epoch": 2.6707712248865847, "grad_norm": 0.08549158275127411, "learning_rate": 7.384670777262703e-05, "loss": 0.2238, "step": 32968 }, { "epoch": 2.670852235904083, "grad_norm": 0.06585050374269485, "learning_rate": 7.38422071200324e-05, "loss": 0.2332, "step": 32969 }, { "epoch": 2.670933246921581, "grad_norm": 0.07162567228078842, "learning_rate": 7.383770646743778e-05, "loss": 0.26, "step": 32970 }, { "epoch": 2.67101425793908, "grad_norm": 0.0801546648144722, "learning_rate": 7.383320581484315e-05, "loss": 0.2837, "step": 32971 }, { "epoch": 2.671095268956578, "grad_norm": 0.05951867997646332, "learning_rate": 7.382870516224854e-05, "loss": 0.214, "step": 32972 }, { "epoch": 2.6711762799740764, "grad_norm": 0.06935639679431915, "learning_rate": 7.38242045096539e-05, "loss": 0.2163, "step": 32973 }, { "epoch": 2.6712572909915746, "grad_norm": 0.08030533790588379, "learning_rate": 7.381970385705927e-05, "loss": 0.2229, "step": 32974 }, { "epoch": 2.6713383020090733, "grad_norm": 0.06654808670282364, "learning_rate": 7.381520320446466e-05, "loss": 0.1838, "step": 32975 }, { "epoch": 2.6714193130265715, "grad_norm": 0.0794433057308197, "learning_rate": 7.381070255187002e-05, "loss": 0.2433, "step": 32976 }, { "epoch": 2.67150032404407, "grad_norm": 0.06465379148721695, "learning_rate": 7.380620189927539e-05, "loss": 0.2271, "step": 32977 }, { "epoch": 2.6715813350615685, "grad_norm": 0.06718406081199646, "learning_rate": 7.380170124668078e-05, "loss": 0.2625, "step": 32978 }, { "epoch": 2.6716623460790667, "grad_norm": 0.08068958669900894, "learning_rate": 7.379720059408615e-05, "loss": 0.2868, "step": 32979 }, { "epoch": 2.671743357096565, "grad_norm": 0.08404342085123062, "learning_rate": 7.379269994149151e-05, "loss": 0.2575, "step": 32980 }, { "epoch": 2.6718243681140637, "grad_norm": 0.0789237916469574, "learning_rate": 7.37881992888969e-05, "loss": 0.239, "step": 32981 }, { "epoch": 2.671905379131562, "grad_norm": 0.07895661890506744, "learning_rate": 7.378369863630227e-05, "loss": 0.2527, "step": 32982 }, { "epoch": 2.67198639014906, "grad_norm": 0.05732421949505806, "learning_rate": 7.377919798370763e-05, "loss": 0.2235, "step": 32983 }, { "epoch": 2.672067401166559, "grad_norm": 0.06396181881427765, "learning_rate": 7.377469733111302e-05, "loss": 0.2192, "step": 32984 }, { "epoch": 2.672148412184057, "grad_norm": 0.0701260045170784, "learning_rate": 7.37701966785184e-05, "loss": 0.2878, "step": 32985 }, { "epoch": 2.6722294232015553, "grad_norm": 0.06090003252029419, "learning_rate": 7.376569602592375e-05, "loss": 0.2271, "step": 32986 }, { "epoch": 2.672310434219054, "grad_norm": 0.0759798064827919, "learning_rate": 7.376119537332914e-05, "loss": 0.2326, "step": 32987 }, { "epoch": 2.6723914452365523, "grad_norm": 0.07578757405281067, "learning_rate": 7.375669472073452e-05, "loss": 0.246, "step": 32988 }, { "epoch": 2.6724724562540505, "grad_norm": 0.081985242664814, "learning_rate": 7.375219406813988e-05, "loss": 0.2728, "step": 32989 }, { "epoch": 2.672553467271549, "grad_norm": 0.07229936122894287, "learning_rate": 7.374769341554526e-05, "loss": 0.2434, "step": 32990 }, { "epoch": 2.6726344782890474, "grad_norm": 0.08067141473293304, "learning_rate": 7.374319276295064e-05, "loss": 0.2439, "step": 32991 }, { "epoch": 2.6727154893065457, "grad_norm": 0.06598978489637375, "learning_rate": 7.3738692110356e-05, "loss": 0.2388, "step": 32992 }, { "epoch": 2.672796500324044, "grad_norm": 0.06533140689134598, "learning_rate": 7.373419145776138e-05, "loss": 0.2161, "step": 32993 }, { "epoch": 2.6728775113415426, "grad_norm": 0.06714296340942383, "learning_rate": 7.372969080516676e-05, "loss": 0.2742, "step": 32994 }, { "epoch": 2.672958522359041, "grad_norm": 0.05954829603433609, "learning_rate": 7.372519015257212e-05, "loss": 0.2243, "step": 32995 }, { "epoch": 2.673039533376539, "grad_norm": 0.05642719566822052, "learning_rate": 7.37206894999775e-05, "loss": 0.2207, "step": 32996 }, { "epoch": 2.6731205443940373, "grad_norm": 0.06855549663305283, "learning_rate": 7.371618884738288e-05, "loss": 0.2558, "step": 32997 }, { "epoch": 2.673201555411536, "grad_norm": 0.07786260545253754, "learning_rate": 7.371168819478825e-05, "loss": 0.2037, "step": 32998 }, { "epoch": 2.6732825664290343, "grad_norm": 0.06984948366880417, "learning_rate": 7.370718754219363e-05, "loss": 0.2563, "step": 32999 }, { "epoch": 2.6733635774465325, "grad_norm": 0.07111197710037231, "learning_rate": 7.3702686889599e-05, "loss": 0.2558, "step": 33000 }, { "epoch": 2.673444588464031, "grad_norm": 0.05868116021156311, "learning_rate": 7.369818623700437e-05, "loss": 0.2407, "step": 33001 }, { "epoch": 2.6735255994815295, "grad_norm": 0.07903192192316055, "learning_rate": 7.369368558440975e-05, "loss": 0.2704, "step": 33002 }, { "epoch": 2.6736066104990277, "grad_norm": 0.06334315985441208, "learning_rate": 7.368918493181512e-05, "loss": 0.2486, "step": 33003 }, { "epoch": 2.6736876215165264, "grad_norm": 0.06944619119167328, "learning_rate": 7.36846842792205e-05, "loss": 0.2121, "step": 33004 }, { "epoch": 2.6737686325340246, "grad_norm": 0.06809698045253754, "learning_rate": 7.368018362662587e-05, "loss": 0.2242, "step": 33005 }, { "epoch": 2.673849643551523, "grad_norm": 0.0604662150144577, "learning_rate": 7.367568297403124e-05, "loss": 0.2319, "step": 33006 }, { "epoch": 2.6739306545690216, "grad_norm": 0.08217473328113556, "learning_rate": 7.367118232143661e-05, "loss": 0.2467, "step": 33007 }, { "epoch": 2.67401166558652, "grad_norm": 0.05181199684739113, "learning_rate": 7.366668166884199e-05, "loss": 0.2315, "step": 33008 }, { "epoch": 2.674092676604018, "grad_norm": 0.06757138669490814, "learning_rate": 7.366218101624736e-05, "loss": 0.2506, "step": 33009 }, { "epoch": 2.6741736876215167, "grad_norm": 0.06875761598348618, "learning_rate": 7.365768036365273e-05, "loss": 0.2638, "step": 33010 }, { "epoch": 2.674254698639015, "grad_norm": 0.07288714498281479, "learning_rate": 7.365317971105811e-05, "loss": 0.2307, "step": 33011 }, { "epoch": 2.6743357096565132, "grad_norm": 0.07257254421710968, "learning_rate": 7.364867905846348e-05, "loss": 0.251, "step": 33012 }, { "epoch": 2.674416720674012, "grad_norm": 0.06154019013047218, "learning_rate": 7.364417840586886e-05, "loss": 0.2334, "step": 33013 }, { "epoch": 2.67449773169151, "grad_norm": 0.06860852241516113, "learning_rate": 7.363967775327423e-05, "loss": 0.2807, "step": 33014 }, { "epoch": 2.6745787427090084, "grad_norm": 0.06585631519556046, "learning_rate": 7.36351771006796e-05, "loss": 0.2459, "step": 33015 }, { "epoch": 2.6746597537265067, "grad_norm": 0.06791950762271881, "learning_rate": 7.363067644808498e-05, "loss": 0.2817, "step": 33016 }, { "epoch": 2.6747407647440054, "grad_norm": 0.061369288712739944, "learning_rate": 7.362617579549035e-05, "loss": 0.2283, "step": 33017 }, { "epoch": 2.6748217757615036, "grad_norm": 0.060109131038188934, "learning_rate": 7.362167514289572e-05, "loss": 0.2465, "step": 33018 }, { "epoch": 2.674902786779002, "grad_norm": 0.0682285726070404, "learning_rate": 7.36171744903011e-05, "loss": 0.3074, "step": 33019 }, { "epoch": 2.6749837977965, "grad_norm": 0.07335293292999268, "learning_rate": 7.361267383770647e-05, "loss": 0.2427, "step": 33020 }, { "epoch": 2.6750648088139988, "grad_norm": 0.08329258114099503, "learning_rate": 7.360817318511184e-05, "loss": 0.2594, "step": 33021 }, { "epoch": 2.675145819831497, "grad_norm": 0.06502322852611542, "learning_rate": 7.360367253251722e-05, "loss": 0.2127, "step": 33022 }, { "epoch": 2.6752268308489953, "grad_norm": 0.06749629229307175, "learning_rate": 7.359917187992259e-05, "loss": 0.248, "step": 33023 }, { "epoch": 2.675307841866494, "grad_norm": 0.0741758793592453, "learning_rate": 7.359467122732797e-05, "loss": 0.2177, "step": 33024 }, { "epoch": 2.675388852883992, "grad_norm": 0.07246986776590347, "learning_rate": 7.359017057473334e-05, "loss": 0.2696, "step": 33025 }, { "epoch": 2.6754698639014904, "grad_norm": 0.07233214378356934, "learning_rate": 7.358566992213871e-05, "loss": 0.2256, "step": 33026 }, { "epoch": 2.675550874918989, "grad_norm": 0.061211708933115005, "learning_rate": 7.358116926954409e-05, "loss": 0.2186, "step": 33027 }, { "epoch": 2.6756318859364874, "grad_norm": 0.07487697154283524, "learning_rate": 7.357666861694946e-05, "loss": 0.2549, "step": 33028 }, { "epoch": 2.6757128969539856, "grad_norm": 0.07913942635059357, "learning_rate": 7.357216796435483e-05, "loss": 0.2858, "step": 33029 }, { "epoch": 2.6757939079714843, "grad_norm": 0.06393986195325851, "learning_rate": 7.356766731176021e-05, "loss": 0.1946, "step": 33030 }, { "epoch": 2.6758749189889826, "grad_norm": 0.06356556713581085, "learning_rate": 7.356316665916558e-05, "loss": 0.2099, "step": 33031 }, { "epoch": 2.675955930006481, "grad_norm": 0.06801209598779678, "learning_rate": 7.355866600657095e-05, "loss": 0.2403, "step": 33032 }, { "epoch": 2.6760369410239795, "grad_norm": 0.05762135609984398, "learning_rate": 7.355416535397633e-05, "loss": 0.2487, "step": 33033 }, { "epoch": 2.6761179520414777, "grad_norm": 0.06864021718502045, "learning_rate": 7.35496647013817e-05, "loss": 0.2513, "step": 33034 }, { "epoch": 2.676198963058976, "grad_norm": 0.053114648908376694, "learning_rate": 7.354516404878708e-05, "loss": 0.1946, "step": 33035 }, { "epoch": 2.6762799740764747, "grad_norm": 0.07913174480199814, "learning_rate": 7.354066339619245e-05, "loss": 0.2559, "step": 33036 }, { "epoch": 2.676360985093973, "grad_norm": 0.061761241406202316, "learning_rate": 7.353616274359782e-05, "loss": 0.2353, "step": 33037 }, { "epoch": 2.676441996111471, "grad_norm": 0.08432194590568542, "learning_rate": 7.35316620910032e-05, "loss": 0.2903, "step": 33038 }, { "epoch": 2.6765230071289694, "grad_norm": 0.09266561269760132, "learning_rate": 7.352716143840857e-05, "loss": 0.2652, "step": 33039 }, { "epoch": 2.6766040181464676, "grad_norm": 0.06161290407180786, "learning_rate": 7.352266078581394e-05, "loss": 0.2394, "step": 33040 }, { "epoch": 2.6766850291639663, "grad_norm": 0.05790049955248833, "learning_rate": 7.351816013321932e-05, "loss": 0.2208, "step": 33041 }, { "epoch": 2.6767660401814646, "grad_norm": 0.08553800731897354, "learning_rate": 7.351365948062469e-05, "loss": 0.2521, "step": 33042 }, { "epoch": 2.676847051198963, "grad_norm": 0.07292623817920685, "learning_rate": 7.350915882803006e-05, "loss": 0.2595, "step": 33043 }, { "epoch": 2.6769280622164615, "grad_norm": 0.07673818618059158, "learning_rate": 7.350465817543544e-05, "loss": 0.2461, "step": 33044 }, { "epoch": 2.6770090732339598, "grad_norm": 0.06594213098287582, "learning_rate": 7.350015752284082e-05, "loss": 0.2667, "step": 33045 }, { "epoch": 2.677090084251458, "grad_norm": 0.0723397359251976, "learning_rate": 7.349565687024618e-05, "loss": 0.2517, "step": 33046 }, { "epoch": 2.6771710952689567, "grad_norm": 0.06494677066802979, "learning_rate": 7.349115621765156e-05, "loss": 0.268, "step": 33047 }, { "epoch": 2.677252106286455, "grad_norm": 0.07920119911432266, "learning_rate": 7.348665556505695e-05, "loss": 0.2019, "step": 33048 }, { "epoch": 2.677333117303953, "grad_norm": 0.06195864453911781, "learning_rate": 7.34821549124623e-05, "loss": 0.2074, "step": 33049 }, { "epoch": 2.677414128321452, "grad_norm": 0.06277751177549362, "learning_rate": 7.347765425986769e-05, "loss": 0.2351, "step": 33050 }, { "epoch": 2.67749513933895, "grad_norm": 0.07916513085365295, "learning_rate": 7.347315360727307e-05, "loss": 0.2296, "step": 33051 }, { "epoch": 2.6775761503564484, "grad_norm": 0.06696799397468567, "learning_rate": 7.346865295467843e-05, "loss": 0.2141, "step": 33052 }, { "epoch": 2.677657161373947, "grad_norm": 0.07103472948074341, "learning_rate": 7.346415230208381e-05, "loss": 0.2666, "step": 33053 }, { "epoch": 2.6777381723914453, "grad_norm": 0.071993388235569, "learning_rate": 7.345965164948919e-05, "loss": 0.2598, "step": 33054 }, { "epoch": 2.6778191834089435, "grad_norm": 0.06992863863706589, "learning_rate": 7.345515099689455e-05, "loss": 0.2631, "step": 33055 }, { "epoch": 2.6779001944264422, "grad_norm": 0.04920663684606552, "learning_rate": 7.345065034429993e-05, "loss": 0.2122, "step": 33056 }, { "epoch": 2.6779812054439405, "grad_norm": 0.05919310450553894, "learning_rate": 7.344614969170531e-05, "loss": 0.2311, "step": 33057 }, { "epoch": 2.6780622164614387, "grad_norm": 0.07455393671989441, "learning_rate": 7.344164903911067e-05, "loss": 0.2463, "step": 33058 }, { "epoch": 2.6781432274789374, "grad_norm": 0.0503612719476223, "learning_rate": 7.343714838651606e-05, "loss": 0.231, "step": 33059 }, { "epoch": 2.6782242384964356, "grad_norm": 0.08073712140321732, "learning_rate": 7.343264773392143e-05, "loss": 0.2548, "step": 33060 }, { "epoch": 2.678305249513934, "grad_norm": 0.06331314146518707, "learning_rate": 7.342814708132679e-05, "loss": 0.2254, "step": 33061 }, { "epoch": 2.678386260531432, "grad_norm": 0.07347361743450165, "learning_rate": 7.342364642873218e-05, "loss": 0.2901, "step": 33062 }, { "epoch": 2.6784672715489304, "grad_norm": 0.07064739614725113, "learning_rate": 7.341914577613755e-05, "loss": 0.2493, "step": 33063 }, { "epoch": 2.678548282566429, "grad_norm": 0.05976058542728424, "learning_rate": 7.341464512354291e-05, "loss": 0.2609, "step": 33064 }, { "epoch": 2.6786292935839273, "grad_norm": 0.08085988461971283, "learning_rate": 7.34101444709483e-05, "loss": 0.2543, "step": 33065 }, { "epoch": 2.6787103046014256, "grad_norm": 0.07125470787286758, "learning_rate": 7.340564381835367e-05, "loss": 0.2506, "step": 33066 }, { "epoch": 2.6787913156189243, "grad_norm": 0.05963381752371788, "learning_rate": 7.340114316575903e-05, "loss": 0.2614, "step": 33067 }, { "epoch": 2.6788723266364225, "grad_norm": 0.08461900055408478, "learning_rate": 7.339664251316442e-05, "loss": 0.2944, "step": 33068 }, { "epoch": 2.6789533376539207, "grad_norm": 0.07790318131446838, "learning_rate": 7.339214186056979e-05, "loss": 0.2164, "step": 33069 }, { "epoch": 2.6790343486714194, "grad_norm": 0.06046464294195175, "learning_rate": 7.338764120797515e-05, "loss": 0.2328, "step": 33070 }, { "epoch": 2.6791153596889177, "grad_norm": 0.07902556657791138, "learning_rate": 7.338314055538054e-05, "loss": 0.2431, "step": 33071 }, { "epoch": 2.679196370706416, "grad_norm": 0.07424572855234146, "learning_rate": 7.337863990278591e-05, "loss": 0.2215, "step": 33072 }, { "epoch": 2.6792773817239146, "grad_norm": 0.08468609303236008, "learning_rate": 7.337413925019127e-05, "loss": 0.2426, "step": 33073 }, { "epoch": 2.679358392741413, "grad_norm": 0.0756547823548317, "learning_rate": 7.336963859759666e-05, "loss": 0.2741, "step": 33074 }, { "epoch": 2.679439403758911, "grad_norm": 0.06156749650835991, "learning_rate": 7.336513794500203e-05, "loss": 0.2129, "step": 33075 }, { "epoch": 2.67952041477641, "grad_norm": 0.07414403557777405, "learning_rate": 7.33606372924074e-05, "loss": 0.2438, "step": 33076 }, { "epoch": 2.679601425793908, "grad_norm": 0.08308634161949158, "learning_rate": 7.335613663981278e-05, "loss": 0.2182, "step": 33077 }, { "epoch": 2.6796824368114063, "grad_norm": 0.07818423956632614, "learning_rate": 7.335163598721815e-05, "loss": 0.2259, "step": 33078 }, { "epoch": 2.679763447828905, "grad_norm": 0.07427828758955002, "learning_rate": 7.334713533462353e-05, "loss": 0.2669, "step": 33079 }, { "epoch": 2.679844458846403, "grad_norm": 0.07437131553888321, "learning_rate": 7.33426346820289e-05, "loss": 0.2438, "step": 33080 }, { "epoch": 2.6799254698639015, "grad_norm": 0.07310924679040909, "learning_rate": 7.333813402943427e-05, "loss": 0.2495, "step": 33081 }, { "epoch": 2.6800064808814, "grad_norm": 0.08593792468309402, "learning_rate": 7.333363337683965e-05, "loss": 0.2339, "step": 33082 }, { "epoch": 2.6800874918988984, "grad_norm": 0.06198699772357941, "learning_rate": 7.332913272424502e-05, "loss": 0.274, "step": 33083 }, { "epoch": 2.6801685029163966, "grad_norm": 0.07218437641859055, "learning_rate": 7.33246320716504e-05, "loss": 0.2084, "step": 33084 }, { "epoch": 2.680249513933895, "grad_norm": 0.07658874988555908, "learning_rate": 7.332013141905577e-05, "loss": 0.2106, "step": 33085 }, { "epoch": 2.680330524951393, "grad_norm": 0.07715161889791489, "learning_rate": 7.331563076646114e-05, "loss": 0.2361, "step": 33086 }, { "epoch": 2.680411535968892, "grad_norm": 0.07156594097614288, "learning_rate": 7.331113011386652e-05, "loss": 0.2526, "step": 33087 }, { "epoch": 2.68049254698639, "grad_norm": 0.06449709832668304, "learning_rate": 7.330662946127189e-05, "loss": 0.2542, "step": 33088 }, { "epoch": 2.6805735580038883, "grad_norm": 0.06789814680814743, "learning_rate": 7.330212880867726e-05, "loss": 0.2546, "step": 33089 }, { "epoch": 2.680654569021387, "grad_norm": 0.060112036764621735, "learning_rate": 7.329762815608264e-05, "loss": 0.2154, "step": 33090 }, { "epoch": 2.6807355800388852, "grad_norm": 0.06398256868124008, "learning_rate": 7.329312750348801e-05, "loss": 0.2722, "step": 33091 }, { "epoch": 2.6808165910563835, "grad_norm": 0.05962206423282623, "learning_rate": 7.328862685089338e-05, "loss": 0.2408, "step": 33092 }, { "epoch": 2.680897602073882, "grad_norm": 0.05923084542155266, "learning_rate": 7.328412619829876e-05, "loss": 0.2777, "step": 33093 }, { "epoch": 2.6809786130913804, "grad_norm": 0.07328286021947861, "learning_rate": 7.327962554570413e-05, "loss": 0.2516, "step": 33094 }, { "epoch": 2.6810596241088787, "grad_norm": 0.07436803728342056, "learning_rate": 7.32751248931095e-05, "loss": 0.215, "step": 33095 }, { "epoch": 2.6811406351263773, "grad_norm": 0.07537177950143814, "learning_rate": 7.327062424051488e-05, "loss": 0.2732, "step": 33096 }, { "epoch": 2.6812216461438756, "grad_norm": 0.0655427947640419, "learning_rate": 7.326612358792025e-05, "loss": 0.2564, "step": 33097 }, { "epoch": 2.681302657161374, "grad_norm": 0.06698904931545258, "learning_rate": 7.326162293532563e-05, "loss": 0.2322, "step": 33098 }, { "epoch": 2.6813836681788725, "grad_norm": 0.06953173130750656, "learning_rate": 7.3257122282731e-05, "loss": 0.2397, "step": 33099 }, { "epoch": 2.6814646791963708, "grad_norm": 0.061687178909778595, "learning_rate": 7.325262163013637e-05, "loss": 0.2374, "step": 33100 }, { "epoch": 2.681545690213869, "grad_norm": 0.0845770463347435, "learning_rate": 7.324812097754175e-05, "loss": 0.2528, "step": 33101 }, { "epoch": 2.6816267012313677, "grad_norm": 0.0615716390311718, "learning_rate": 7.324362032494712e-05, "loss": 0.2315, "step": 33102 }, { "epoch": 2.681707712248866, "grad_norm": 0.07067247480154037, "learning_rate": 7.32391196723525e-05, "loss": 0.2569, "step": 33103 }, { "epoch": 2.681788723266364, "grad_norm": 0.07423364371061325, "learning_rate": 7.323461901975787e-05, "loss": 0.2387, "step": 33104 }, { "epoch": 2.681869734283863, "grad_norm": 0.07920686155557632, "learning_rate": 7.323011836716324e-05, "loss": 0.2634, "step": 33105 }, { "epoch": 2.681950745301361, "grad_norm": 0.0680554062128067, "learning_rate": 7.322561771456861e-05, "loss": 0.2232, "step": 33106 }, { "epoch": 2.6820317563188594, "grad_norm": 0.07262624800205231, "learning_rate": 7.322111706197399e-05, "loss": 0.2345, "step": 33107 }, { "epoch": 2.6821127673363576, "grad_norm": 0.05929746478796005, "learning_rate": 7.321661640937936e-05, "loss": 0.2062, "step": 33108 }, { "epoch": 2.682193778353856, "grad_norm": 0.06272649019956589, "learning_rate": 7.321211575678474e-05, "loss": 0.2401, "step": 33109 }, { "epoch": 2.6822747893713546, "grad_norm": 0.07481082528829575, "learning_rate": 7.320761510419011e-05, "loss": 0.242, "step": 33110 }, { "epoch": 2.682355800388853, "grad_norm": 0.07007595151662827, "learning_rate": 7.320311445159548e-05, "loss": 0.2444, "step": 33111 }, { "epoch": 2.682436811406351, "grad_norm": 0.08007033914327621, "learning_rate": 7.319861379900086e-05, "loss": 0.2856, "step": 33112 }, { "epoch": 2.6825178224238497, "grad_norm": 0.06305108219385147, "learning_rate": 7.319411314640623e-05, "loss": 0.1845, "step": 33113 }, { "epoch": 2.682598833441348, "grad_norm": 0.06217300146818161, "learning_rate": 7.318961249381162e-05, "loss": 0.2333, "step": 33114 }, { "epoch": 2.682679844458846, "grad_norm": 0.0762304812669754, "learning_rate": 7.318511184121698e-05, "loss": 0.2566, "step": 33115 }, { "epoch": 2.682760855476345, "grad_norm": 0.064681276679039, "learning_rate": 7.318061118862235e-05, "loss": 0.2285, "step": 33116 }, { "epoch": 2.682841866493843, "grad_norm": 0.07641452550888062, "learning_rate": 7.317611053602774e-05, "loss": 0.2715, "step": 33117 }, { "epoch": 2.6829228775113414, "grad_norm": 0.07061993330717087, "learning_rate": 7.31716098834331e-05, "loss": 0.2617, "step": 33118 }, { "epoch": 2.68300388852884, "grad_norm": 0.07542899996042252, "learning_rate": 7.316710923083847e-05, "loss": 0.2281, "step": 33119 }, { "epoch": 2.6830848995463383, "grad_norm": 0.07717788964509964, "learning_rate": 7.316260857824386e-05, "loss": 0.2113, "step": 33120 }, { "epoch": 2.6831659105638366, "grad_norm": 0.08544004708528519, "learning_rate": 7.315810792564922e-05, "loss": 0.3001, "step": 33121 }, { "epoch": 2.6832469215813353, "grad_norm": 0.06345837563276291, "learning_rate": 7.315360727305459e-05, "loss": 0.2396, "step": 33122 }, { "epoch": 2.6833279325988335, "grad_norm": 0.07493044435977936, "learning_rate": 7.314910662045998e-05, "loss": 0.2772, "step": 33123 }, { "epoch": 2.6834089436163318, "grad_norm": 0.06237129122018814, "learning_rate": 7.314460596786534e-05, "loss": 0.2675, "step": 33124 }, { "epoch": 2.6834899546338304, "grad_norm": 0.08715277910232544, "learning_rate": 7.314010531527071e-05, "loss": 0.2656, "step": 33125 }, { "epoch": 2.6835709656513287, "grad_norm": 0.06376224756240845, "learning_rate": 7.31356046626761e-05, "loss": 0.264, "step": 33126 }, { "epoch": 2.683651976668827, "grad_norm": 0.05935446545481682, "learning_rate": 7.313110401008146e-05, "loss": 0.223, "step": 33127 }, { "epoch": 2.683732987686325, "grad_norm": 0.07266701012849808, "learning_rate": 7.312660335748683e-05, "loss": 0.2985, "step": 33128 }, { "epoch": 2.683813998703824, "grad_norm": 0.059860944747924805, "learning_rate": 7.312210270489222e-05, "loss": 0.2024, "step": 33129 }, { "epoch": 2.683895009721322, "grad_norm": 0.06587036699056625, "learning_rate": 7.311760205229758e-05, "loss": 0.245, "step": 33130 }, { "epoch": 2.6839760207388204, "grad_norm": 0.06060159206390381, "learning_rate": 7.311310139970297e-05, "loss": 0.2355, "step": 33131 }, { "epoch": 2.6840570317563186, "grad_norm": 0.06470111012458801, "learning_rate": 7.310860074710834e-05, "loss": 0.2146, "step": 33132 }, { "epoch": 2.6841380427738173, "grad_norm": 0.060420744121074677, "learning_rate": 7.31041000945137e-05, "loss": 0.2589, "step": 33133 }, { "epoch": 2.6842190537913155, "grad_norm": 0.07256756722927094, "learning_rate": 7.309959944191909e-05, "loss": 0.2549, "step": 33134 }, { "epoch": 2.684300064808814, "grad_norm": 0.07449455559253693, "learning_rate": 7.309509878932446e-05, "loss": 0.2264, "step": 33135 }, { "epoch": 2.6843810758263125, "grad_norm": 0.06787081807851791, "learning_rate": 7.309059813672982e-05, "loss": 0.2685, "step": 33136 }, { "epoch": 2.6844620868438107, "grad_norm": 0.08377574384212494, "learning_rate": 7.308609748413521e-05, "loss": 0.2421, "step": 33137 }, { "epoch": 2.684543097861309, "grad_norm": 0.06868404895067215, "learning_rate": 7.308159683154058e-05, "loss": 0.2213, "step": 33138 }, { "epoch": 2.6846241088788076, "grad_norm": 0.05533949285745621, "learning_rate": 7.307709617894594e-05, "loss": 0.2353, "step": 33139 }, { "epoch": 2.684705119896306, "grad_norm": 0.05881068855524063, "learning_rate": 7.307259552635133e-05, "loss": 0.2078, "step": 33140 }, { "epoch": 2.684786130913804, "grad_norm": 0.0701180249452591, "learning_rate": 7.30680948737567e-05, "loss": 0.214, "step": 33141 }, { "epoch": 2.684867141931303, "grad_norm": 0.07627440243959427, "learning_rate": 7.306359422116206e-05, "loss": 0.2459, "step": 33142 }, { "epoch": 2.684948152948801, "grad_norm": 0.06293842941522598, "learning_rate": 7.305909356856745e-05, "loss": 0.2094, "step": 33143 }, { "epoch": 2.6850291639662993, "grad_norm": 0.07441238313913345, "learning_rate": 7.305459291597282e-05, "loss": 0.2658, "step": 33144 }, { "epoch": 2.685110174983798, "grad_norm": 0.07221051305532455, "learning_rate": 7.305009226337818e-05, "loss": 0.2472, "step": 33145 }, { "epoch": 2.6851911860012962, "grad_norm": 0.06838735938072205, "learning_rate": 7.304559161078357e-05, "loss": 0.2299, "step": 33146 }, { "epoch": 2.6852721970187945, "grad_norm": 0.06920010596513748, "learning_rate": 7.304109095818895e-05, "loss": 0.2072, "step": 33147 }, { "epoch": 2.685353208036293, "grad_norm": 0.05869818478822708, "learning_rate": 7.30365903055943e-05, "loss": 0.1972, "step": 33148 }, { "epoch": 2.6854342190537914, "grad_norm": 0.0703733041882515, "learning_rate": 7.303208965299969e-05, "loss": 0.2579, "step": 33149 }, { "epoch": 2.6855152300712897, "grad_norm": 0.0641406923532486, "learning_rate": 7.302758900040507e-05, "loss": 0.2525, "step": 33150 }, { "epoch": 2.685596241088788, "grad_norm": 0.07498500496149063, "learning_rate": 7.302308834781043e-05, "loss": 0.2679, "step": 33151 }, { "epoch": 2.6856772521062866, "grad_norm": 0.07610327005386353, "learning_rate": 7.301858769521581e-05, "loss": 0.2749, "step": 33152 }, { "epoch": 2.685758263123785, "grad_norm": 0.06884650886058807, "learning_rate": 7.301408704262119e-05, "loss": 0.2293, "step": 33153 }, { "epoch": 2.685839274141283, "grad_norm": 0.07191795855760574, "learning_rate": 7.300958639002655e-05, "loss": 0.2463, "step": 33154 }, { "epoch": 2.6859202851587813, "grad_norm": 0.08212050050497055, "learning_rate": 7.300508573743193e-05, "loss": 0.2759, "step": 33155 }, { "epoch": 2.68600129617628, "grad_norm": 0.06252212077379227, "learning_rate": 7.300058508483731e-05, "loss": 0.2329, "step": 33156 }, { "epoch": 2.6860823071937783, "grad_norm": 0.07689528912305832, "learning_rate": 7.299608443224268e-05, "loss": 0.248, "step": 33157 }, { "epoch": 2.6861633182112765, "grad_norm": 0.05639709159731865, "learning_rate": 7.299158377964806e-05, "loss": 0.2008, "step": 33158 }, { "epoch": 2.686244329228775, "grad_norm": 0.06725417077541351, "learning_rate": 7.298708312705343e-05, "loss": 0.2233, "step": 33159 }, { "epoch": 2.6863253402462735, "grad_norm": 0.060404933989048004, "learning_rate": 7.29825824744588e-05, "loss": 0.2402, "step": 33160 }, { "epoch": 2.6864063512637717, "grad_norm": 0.07094196230173111, "learning_rate": 7.297808182186418e-05, "loss": 0.2344, "step": 33161 }, { "epoch": 2.6864873622812704, "grad_norm": 0.06894193589687347, "learning_rate": 7.297358116926955e-05, "loss": 0.2251, "step": 33162 }, { "epoch": 2.6865683732987686, "grad_norm": 0.05957801640033722, "learning_rate": 7.296908051667492e-05, "loss": 0.2377, "step": 33163 }, { "epoch": 2.686649384316267, "grad_norm": 0.061558082699775696, "learning_rate": 7.29645798640803e-05, "loss": 0.2337, "step": 33164 }, { "epoch": 2.6867303953337656, "grad_norm": 0.06628931313753128, "learning_rate": 7.296007921148567e-05, "loss": 0.239, "step": 33165 }, { "epoch": 2.686811406351264, "grad_norm": 0.07567416876554489, "learning_rate": 7.295557855889104e-05, "loss": 0.2472, "step": 33166 }, { "epoch": 2.686892417368762, "grad_norm": 0.055566683411598206, "learning_rate": 7.295107790629642e-05, "loss": 0.1925, "step": 33167 }, { "epoch": 2.6869734283862607, "grad_norm": 0.071644127368927, "learning_rate": 7.294657725370179e-05, "loss": 0.229, "step": 33168 }, { "epoch": 2.687054439403759, "grad_norm": 0.057638879865407944, "learning_rate": 7.294207660110717e-05, "loss": 0.2478, "step": 33169 }, { "epoch": 2.6871354504212572, "grad_norm": 0.06617935746908188, "learning_rate": 7.293757594851254e-05, "loss": 0.2413, "step": 33170 }, { "epoch": 2.687216461438756, "grad_norm": 0.055808231234550476, "learning_rate": 7.293307529591791e-05, "loss": 0.228, "step": 33171 }, { "epoch": 2.687297472456254, "grad_norm": 0.06902889162302017, "learning_rate": 7.292857464332329e-05, "loss": 0.2437, "step": 33172 }, { "epoch": 2.6873784834737524, "grad_norm": 0.061212435364723206, "learning_rate": 7.292407399072866e-05, "loss": 0.243, "step": 33173 }, { "epoch": 2.6874594944912507, "grad_norm": 0.06083257123827934, "learning_rate": 7.291957333813403e-05, "loss": 0.2048, "step": 33174 }, { "epoch": 2.6875405055087493, "grad_norm": 0.06691806018352509, "learning_rate": 7.29150726855394e-05, "loss": 0.2118, "step": 33175 }, { "epoch": 2.6876215165262476, "grad_norm": 0.06726226955652237, "learning_rate": 7.291057203294478e-05, "loss": 0.2679, "step": 33176 }, { "epoch": 2.687702527543746, "grad_norm": 0.06752733141183853, "learning_rate": 7.290607138035015e-05, "loss": 0.2157, "step": 33177 }, { "epoch": 2.687783538561244, "grad_norm": 0.06699246168136597, "learning_rate": 7.290157072775553e-05, "loss": 0.2987, "step": 33178 }, { "epoch": 2.6878645495787428, "grad_norm": 0.07415398210287094, "learning_rate": 7.28970700751609e-05, "loss": 0.227, "step": 33179 }, { "epoch": 2.687945560596241, "grad_norm": 0.07832382619380951, "learning_rate": 7.289256942256627e-05, "loss": 0.2113, "step": 33180 }, { "epoch": 2.6880265716137393, "grad_norm": 0.08317804336547852, "learning_rate": 7.288806876997165e-05, "loss": 0.2368, "step": 33181 }, { "epoch": 2.688107582631238, "grad_norm": 0.08504065871238708, "learning_rate": 7.288356811737702e-05, "loss": 0.2501, "step": 33182 }, { "epoch": 2.688188593648736, "grad_norm": 0.06972184777259827, "learning_rate": 7.287906746478241e-05, "loss": 0.2503, "step": 33183 }, { "epoch": 2.6882696046662344, "grad_norm": 0.06454414874315262, "learning_rate": 7.287456681218777e-05, "loss": 0.2296, "step": 33184 }, { "epoch": 2.688350615683733, "grad_norm": 0.07722259312868118, "learning_rate": 7.287006615959314e-05, "loss": 0.2707, "step": 33185 }, { "epoch": 2.6884316267012314, "grad_norm": 0.07629488408565521, "learning_rate": 7.286556550699853e-05, "loss": 0.2498, "step": 33186 }, { "epoch": 2.6885126377187296, "grad_norm": 0.06469930708408356, "learning_rate": 7.286106485440389e-05, "loss": 0.2165, "step": 33187 }, { "epoch": 2.6885936487362283, "grad_norm": 0.07222463935613632, "learning_rate": 7.285656420180926e-05, "loss": 0.2577, "step": 33188 }, { "epoch": 2.6886746597537265, "grad_norm": 0.06537472456693649, "learning_rate": 7.285206354921465e-05, "loss": 0.2383, "step": 33189 }, { "epoch": 2.688755670771225, "grad_norm": 0.06282258778810501, "learning_rate": 7.284756289662001e-05, "loss": 0.2593, "step": 33190 }, { "epoch": 2.6888366817887235, "grad_norm": 0.07500491291284561, "learning_rate": 7.284306224402538e-05, "loss": 0.2235, "step": 33191 }, { "epoch": 2.6889176928062217, "grad_norm": 0.0683460459113121, "learning_rate": 7.283856159143077e-05, "loss": 0.2503, "step": 33192 }, { "epoch": 2.68899870382372, "grad_norm": 0.06435289978981018, "learning_rate": 7.283406093883613e-05, "loss": 0.264, "step": 33193 }, { "epoch": 2.6890797148412187, "grad_norm": 0.06583774089813232, "learning_rate": 7.28295602862415e-05, "loss": 0.2308, "step": 33194 }, { "epoch": 2.689160725858717, "grad_norm": 0.079376220703125, "learning_rate": 7.282505963364689e-05, "loss": 0.266, "step": 33195 }, { "epoch": 2.689241736876215, "grad_norm": 0.06783498823642731, "learning_rate": 7.282055898105225e-05, "loss": 0.2771, "step": 33196 }, { "epoch": 2.6893227478937134, "grad_norm": 0.085577592253685, "learning_rate": 7.281605832845763e-05, "loss": 0.2668, "step": 33197 }, { "epoch": 2.689403758911212, "grad_norm": 0.08192739635705948, "learning_rate": 7.281155767586301e-05, "loss": 0.2549, "step": 33198 }, { "epoch": 2.6894847699287103, "grad_norm": 0.05756738781929016, "learning_rate": 7.280705702326837e-05, "loss": 0.2618, "step": 33199 }, { "epoch": 2.6895657809462086, "grad_norm": 0.06547581404447556, "learning_rate": 7.280255637067375e-05, "loss": 0.2215, "step": 33200 }, { "epoch": 2.689646791963707, "grad_norm": 0.08526495844125748, "learning_rate": 7.279805571807913e-05, "loss": 0.2376, "step": 33201 }, { "epoch": 2.6897278029812055, "grad_norm": 0.07014923542737961, "learning_rate": 7.27935550654845e-05, "loss": 0.2528, "step": 33202 }, { "epoch": 2.6898088139987038, "grad_norm": 0.06240760535001755, "learning_rate": 7.278905441288987e-05, "loss": 0.2287, "step": 33203 }, { "epoch": 2.689889825016202, "grad_norm": 0.06542903184890747, "learning_rate": 7.278455376029525e-05, "loss": 0.2727, "step": 33204 }, { "epoch": 2.6899708360337007, "grad_norm": 0.05773237347602844, "learning_rate": 7.278005310770061e-05, "loss": 0.2155, "step": 33205 }, { "epoch": 2.690051847051199, "grad_norm": 0.0647493302822113, "learning_rate": 7.277555245510599e-05, "loss": 0.2355, "step": 33206 }, { "epoch": 2.690132858068697, "grad_norm": 0.08845240622758865, "learning_rate": 7.277105180251138e-05, "loss": 0.2547, "step": 33207 }, { "epoch": 2.690213869086196, "grad_norm": 0.07543900609016418, "learning_rate": 7.276655114991674e-05, "loss": 0.2389, "step": 33208 }, { "epoch": 2.690294880103694, "grad_norm": 0.07690194994211197, "learning_rate": 7.276205049732212e-05, "loss": 0.234, "step": 33209 }, { "epoch": 2.6903758911211924, "grad_norm": 0.08355151861906052, "learning_rate": 7.27575498447275e-05, "loss": 0.2186, "step": 33210 }, { "epoch": 2.690456902138691, "grad_norm": 0.06553471833467484, "learning_rate": 7.275304919213286e-05, "loss": 0.2411, "step": 33211 }, { "epoch": 2.6905379131561893, "grad_norm": 0.05886956304311752, "learning_rate": 7.274854853953824e-05, "loss": 0.2046, "step": 33212 }, { "epoch": 2.6906189241736875, "grad_norm": 0.06500393897294998, "learning_rate": 7.274404788694362e-05, "loss": 0.2312, "step": 33213 }, { "epoch": 2.690699935191186, "grad_norm": 0.060121990740299225, "learning_rate": 7.273954723434898e-05, "loss": 0.2289, "step": 33214 }, { "epoch": 2.6907809462086845, "grad_norm": 0.07940906286239624, "learning_rate": 7.273504658175436e-05, "loss": 0.2924, "step": 33215 }, { "epoch": 2.6908619572261827, "grad_norm": 0.07092858105897903, "learning_rate": 7.273054592915974e-05, "loss": 0.2849, "step": 33216 }, { "epoch": 2.6909429682436814, "grad_norm": 0.07701389491558075, "learning_rate": 7.27260452765651e-05, "loss": 0.231, "step": 33217 }, { "epoch": 2.6910239792611796, "grad_norm": 0.07645139843225479, "learning_rate": 7.272154462397049e-05, "loss": 0.2716, "step": 33218 }, { "epoch": 2.691104990278678, "grad_norm": 0.07623913884162903, "learning_rate": 7.271704397137586e-05, "loss": 0.2404, "step": 33219 }, { "epoch": 2.691186001296176, "grad_norm": 0.07207286357879639, "learning_rate": 7.271254331878122e-05, "loss": 0.2685, "step": 33220 }, { "epoch": 2.691267012313675, "grad_norm": 0.06755689531564713, "learning_rate": 7.27080426661866e-05, "loss": 0.2, "step": 33221 }, { "epoch": 2.691348023331173, "grad_norm": 0.048205725848674774, "learning_rate": 7.270354201359198e-05, "loss": 0.2153, "step": 33222 }, { "epoch": 2.6914290343486713, "grad_norm": 0.056371089071035385, "learning_rate": 7.269904136099734e-05, "loss": 0.229, "step": 33223 }, { "epoch": 2.6915100453661696, "grad_norm": 0.07858487218618393, "learning_rate": 7.269454070840273e-05, "loss": 0.2349, "step": 33224 }, { "epoch": 2.6915910563836682, "grad_norm": 0.06717490404844284, "learning_rate": 7.26900400558081e-05, "loss": 0.2728, "step": 33225 }, { "epoch": 2.6916720674011665, "grad_norm": 0.08073551952838898, "learning_rate": 7.268553940321346e-05, "loss": 0.2517, "step": 33226 }, { "epoch": 2.6917530784186647, "grad_norm": 0.06915895640850067, "learning_rate": 7.268103875061885e-05, "loss": 0.2544, "step": 33227 }, { "epoch": 2.6918340894361634, "grad_norm": 0.09112341701984406, "learning_rate": 7.267653809802422e-05, "loss": 0.2584, "step": 33228 }, { "epoch": 2.6919151004536617, "grad_norm": 0.056663673371076584, "learning_rate": 7.267203744542958e-05, "loss": 0.2328, "step": 33229 }, { "epoch": 2.69199611147116, "grad_norm": 0.06987922638654709, "learning_rate": 7.266753679283497e-05, "loss": 0.2238, "step": 33230 }, { "epoch": 2.6920771224886586, "grad_norm": 0.06600745767354965, "learning_rate": 7.266303614024034e-05, "loss": 0.2534, "step": 33231 }, { "epoch": 2.692158133506157, "grad_norm": 0.07932254672050476, "learning_rate": 7.26585354876457e-05, "loss": 0.2912, "step": 33232 }, { "epoch": 2.692239144523655, "grad_norm": 0.0636344626545906, "learning_rate": 7.265403483505109e-05, "loss": 0.2158, "step": 33233 }, { "epoch": 2.692320155541154, "grad_norm": 0.06788277626037598, "learning_rate": 7.264953418245646e-05, "loss": 0.243, "step": 33234 }, { "epoch": 2.692401166558652, "grad_norm": 0.07238825410604477, "learning_rate": 7.264503352986184e-05, "loss": 0.2375, "step": 33235 }, { "epoch": 2.6924821775761503, "grad_norm": 0.07912512123584747, "learning_rate": 7.264053287726721e-05, "loss": 0.2399, "step": 33236 }, { "epoch": 2.692563188593649, "grad_norm": 0.06999436765909195, "learning_rate": 7.263603222467258e-05, "loss": 0.2532, "step": 33237 }, { "epoch": 2.692644199611147, "grad_norm": 0.0635719895362854, "learning_rate": 7.263153157207796e-05, "loss": 0.2021, "step": 33238 }, { "epoch": 2.6927252106286454, "grad_norm": 0.0745772048830986, "learning_rate": 7.262703091948333e-05, "loss": 0.2395, "step": 33239 }, { "epoch": 2.692806221646144, "grad_norm": 0.06520460546016693, "learning_rate": 7.26225302668887e-05, "loss": 0.2094, "step": 33240 }, { "epoch": 2.6928872326636424, "grad_norm": 0.08982928842306137, "learning_rate": 7.261802961429408e-05, "loss": 0.255, "step": 33241 }, { "epoch": 2.6929682436811406, "grad_norm": 0.07295730710029602, "learning_rate": 7.261352896169945e-05, "loss": 0.2429, "step": 33242 }, { "epoch": 2.693049254698639, "grad_norm": 0.08273440599441528, "learning_rate": 7.260902830910483e-05, "loss": 0.2343, "step": 33243 }, { "epoch": 2.693130265716137, "grad_norm": 0.0663905143737793, "learning_rate": 7.26045276565102e-05, "loss": 0.216, "step": 33244 }, { "epoch": 2.693211276733636, "grad_norm": 0.0648883581161499, "learning_rate": 7.260002700391557e-05, "loss": 0.2583, "step": 33245 }, { "epoch": 2.693292287751134, "grad_norm": 0.0766635537147522, "learning_rate": 7.259552635132095e-05, "loss": 0.2446, "step": 33246 }, { "epoch": 2.6933732987686323, "grad_norm": 0.059019945561885834, "learning_rate": 7.259102569872632e-05, "loss": 0.2218, "step": 33247 }, { "epoch": 2.693454309786131, "grad_norm": 0.07485313713550568, "learning_rate": 7.25865250461317e-05, "loss": 0.2732, "step": 33248 }, { "epoch": 2.6935353208036292, "grad_norm": 0.05649654194712639, "learning_rate": 7.258202439353707e-05, "loss": 0.2228, "step": 33249 }, { "epoch": 2.6936163318211275, "grad_norm": 0.06117332726716995, "learning_rate": 7.257752374094244e-05, "loss": 0.2393, "step": 33250 }, { "epoch": 2.693697342838626, "grad_norm": 0.07414082437753677, "learning_rate": 7.257302308834781e-05, "loss": 0.2713, "step": 33251 }, { "epoch": 2.6937783538561244, "grad_norm": 0.06634288281202316, "learning_rate": 7.256852243575319e-05, "loss": 0.2481, "step": 33252 }, { "epoch": 2.6938593648736227, "grad_norm": 0.08081548660993576, "learning_rate": 7.256402178315856e-05, "loss": 0.2457, "step": 33253 }, { "epoch": 2.6939403758911213, "grad_norm": 0.07430370151996613, "learning_rate": 7.255952113056393e-05, "loss": 0.2422, "step": 33254 }, { "epoch": 2.6940213869086196, "grad_norm": 0.05825252830982208, "learning_rate": 7.255502047796931e-05, "loss": 0.2506, "step": 33255 }, { "epoch": 2.694102397926118, "grad_norm": 0.06999190151691437, "learning_rate": 7.255051982537468e-05, "loss": 0.2349, "step": 33256 }, { "epoch": 2.6941834089436165, "grad_norm": 0.0780666321516037, "learning_rate": 7.254601917278006e-05, "loss": 0.2406, "step": 33257 }, { "epoch": 2.6942644199611148, "grad_norm": 0.06424911320209503, "learning_rate": 7.254151852018543e-05, "loss": 0.2341, "step": 33258 }, { "epoch": 2.694345430978613, "grad_norm": 0.06545555591583252, "learning_rate": 7.25370178675908e-05, "loss": 0.2404, "step": 33259 }, { "epoch": 2.6944264419961117, "grad_norm": 0.07272839546203613, "learning_rate": 7.253251721499618e-05, "loss": 0.2459, "step": 33260 }, { "epoch": 2.69450745301361, "grad_norm": 0.05883920565247536, "learning_rate": 7.252801656240155e-05, "loss": 0.2115, "step": 33261 }, { "epoch": 2.694588464031108, "grad_norm": 0.08189603686332703, "learning_rate": 7.252351590980692e-05, "loss": 0.2414, "step": 33262 }, { "epoch": 2.694669475048607, "grad_norm": 0.061765994876623154, "learning_rate": 7.25190152572123e-05, "loss": 0.2222, "step": 33263 }, { "epoch": 2.694750486066105, "grad_norm": 0.06287230551242828, "learning_rate": 7.251451460461768e-05, "loss": 0.2543, "step": 33264 }, { "epoch": 2.6948314970836034, "grad_norm": 0.07255005091428757, "learning_rate": 7.251001395202304e-05, "loss": 0.2561, "step": 33265 }, { "epoch": 2.6949125081011016, "grad_norm": 0.06739959120750427, "learning_rate": 7.250551329942842e-05, "loss": 0.2254, "step": 33266 }, { "epoch": 2.6949935191186, "grad_norm": 0.08224621415138245, "learning_rate": 7.25010126468338e-05, "loss": 0.2517, "step": 33267 }, { "epoch": 2.6950745301360985, "grad_norm": 0.06153412163257599, "learning_rate": 7.249651199423917e-05, "loss": 0.2531, "step": 33268 }, { "epoch": 2.695155541153597, "grad_norm": 0.06801117956638336, "learning_rate": 7.249201134164454e-05, "loss": 0.2405, "step": 33269 }, { "epoch": 2.695236552171095, "grad_norm": 0.06874486804008484, "learning_rate": 7.248751068904993e-05, "loss": 0.2378, "step": 33270 }, { "epoch": 2.6953175631885937, "grad_norm": 0.06314772367477417, "learning_rate": 7.248301003645529e-05, "loss": 0.242, "step": 33271 }, { "epoch": 2.695398574206092, "grad_norm": 0.08381687849760056, "learning_rate": 7.247850938386066e-05, "loss": 0.2253, "step": 33272 }, { "epoch": 2.69547958522359, "grad_norm": 0.06956927478313446, "learning_rate": 7.247400873126605e-05, "loss": 0.2453, "step": 33273 }, { "epoch": 2.695560596241089, "grad_norm": 0.09207741171121597, "learning_rate": 7.246950807867141e-05, "loss": 0.2703, "step": 33274 }, { "epoch": 2.695641607258587, "grad_norm": 0.07127492129802704, "learning_rate": 7.246500742607678e-05, "loss": 0.2314, "step": 33275 }, { "epoch": 2.6957226182760854, "grad_norm": 0.07321468740701675, "learning_rate": 7.246050677348217e-05, "loss": 0.2373, "step": 33276 }, { "epoch": 2.695803629293584, "grad_norm": 0.08432543277740479, "learning_rate": 7.245600612088753e-05, "loss": 0.2574, "step": 33277 }, { "epoch": 2.6958846403110823, "grad_norm": 0.06618187576532364, "learning_rate": 7.24515054682929e-05, "loss": 0.2416, "step": 33278 }, { "epoch": 2.6959656513285806, "grad_norm": 0.05841851234436035, "learning_rate": 7.244700481569829e-05, "loss": 0.2438, "step": 33279 }, { "epoch": 2.6960466623460793, "grad_norm": 0.0806334912776947, "learning_rate": 7.244250416310365e-05, "loss": 0.2827, "step": 33280 }, { "epoch": 2.6961276733635775, "grad_norm": 0.06472641229629517, "learning_rate": 7.243800351050902e-05, "loss": 0.2229, "step": 33281 }, { "epoch": 2.6962086843810757, "grad_norm": 0.07949283719062805, "learning_rate": 7.243350285791441e-05, "loss": 0.2679, "step": 33282 }, { "epoch": 2.6962896953985744, "grad_norm": 0.06775813549757004, "learning_rate": 7.242900220531977e-05, "loss": 0.2363, "step": 33283 }, { "epoch": 2.6963707064160727, "grad_norm": 0.06058688834309578, "learning_rate": 7.242450155272514e-05, "loss": 0.2377, "step": 33284 }, { "epoch": 2.696451717433571, "grad_norm": 0.05882725119590759, "learning_rate": 7.242000090013053e-05, "loss": 0.2168, "step": 33285 }, { "epoch": 2.6965327284510696, "grad_norm": 0.06747718900442123, "learning_rate": 7.241550024753589e-05, "loss": 0.2318, "step": 33286 }, { "epoch": 2.696613739468568, "grad_norm": 0.07593017816543579, "learning_rate": 7.241099959494126e-05, "loss": 0.232, "step": 33287 }, { "epoch": 2.696694750486066, "grad_norm": 0.055422279983758926, "learning_rate": 7.240649894234665e-05, "loss": 0.2036, "step": 33288 }, { "epoch": 2.6967757615035644, "grad_norm": 0.07576458901166916, "learning_rate": 7.240199828975201e-05, "loss": 0.2645, "step": 33289 }, { "epoch": 2.6968567725210626, "grad_norm": 0.06819913536310196, "learning_rate": 7.23974976371574e-05, "loss": 0.2327, "step": 33290 }, { "epoch": 2.6969377835385613, "grad_norm": 0.0817083939909935, "learning_rate": 7.239299698456277e-05, "loss": 0.2123, "step": 33291 }, { "epoch": 2.6970187945560595, "grad_norm": 0.05920260399580002, "learning_rate": 7.238849633196813e-05, "loss": 0.202, "step": 33292 }, { "epoch": 2.6970998055735578, "grad_norm": 0.08613543212413788, "learning_rate": 7.238399567937352e-05, "loss": 0.2552, "step": 33293 }, { "epoch": 2.6971808165910565, "grad_norm": 0.07524523138999939, "learning_rate": 7.237949502677889e-05, "loss": 0.2334, "step": 33294 }, { "epoch": 2.6972618276085547, "grad_norm": 0.07589299231767654, "learning_rate": 7.237499437418425e-05, "loss": 0.2353, "step": 33295 }, { "epoch": 2.697342838626053, "grad_norm": 0.06346601247787476, "learning_rate": 7.237049372158964e-05, "loss": 0.2238, "step": 33296 }, { "epoch": 2.6974238496435516, "grad_norm": 0.09287595748901367, "learning_rate": 7.236599306899501e-05, "loss": 0.2437, "step": 33297 }, { "epoch": 2.69750486066105, "grad_norm": 0.07111447304487228, "learning_rate": 7.236149241640037e-05, "loss": 0.2139, "step": 33298 }, { "epoch": 2.697585871678548, "grad_norm": 0.0682421624660492, "learning_rate": 7.235699176380576e-05, "loss": 0.2269, "step": 33299 }, { "epoch": 2.697666882696047, "grad_norm": 0.07002420723438263, "learning_rate": 7.235249111121113e-05, "loss": 0.2146, "step": 33300 }, { "epoch": 2.697747893713545, "grad_norm": 0.07240556925535202, "learning_rate": 7.23479904586165e-05, "loss": 0.2509, "step": 33301 }, { "epoch": 2.6978289047310433, "grad_norm": 0.07018124312162399, "learning_rate": 7.234348980602188e-05, "loss": 0.2489, "step": 33302 }, { "epoch": 2.697909915748542, "grad_norm": 0.08013798296451569, "learning_rate": 7.233898915342725e-05, "loss": 0.2603, "step": 33303 }, { "epoch": 2.6979909267660402, "grad_norm": 0.07782293856143951, "learning_rate": 7.233448850083262e-05, "loss": 0.2498, "step": 33304 }, { "epoch": 2.6980719377835385, "grad_norm": 0.07612436264753342, "learning_rate": 7.2329987848238e-05, "loss": 0.2671, "step": 33305 }, { "epoch": 2.698152948801037, "grad_norm": 0.0673048198223114, "learning_rate": 7.232548719564338e-05, "loss": 0.2325, "step": 33306 }, { "epoch": 2.6982339598185354, "grad_norm": 0.06546095013618469, "learning_rate": 7.232098654304874e-05, "loss": 0.209, "step": 33307 }, { "epoch": 2.6983149708360337, "grad_norm": 0.06932816654443741, "learning_rate": 7.231648589045412e-05, "loss": 0.2107, "step": 33308 }, { "epoch": 2.6983959818535324, "grad_norm": 0.04809468239545822, "learning_rate": 7.23119852378595e-05, "loss": 0.2242, "step": 33309 }, { "epoch": 2.6984769928710306, "grad_norm": 0.07442724704742432, "learning_rate": 7.230748458526486e-05, "loss": 0.2577, "step": 33310 }, { "epoch": 2.698558003888529, "grad_norm": 0.06932736188173294, "learning_rate": 7.230298393267024e-05, "loss": 0.2478, "step": 33311 }, { "epoch": 2.698639014906027, "grad_norm": 0.06765572726726532, "learning_rate": 7.229848328007562e-05, "loss": 0.1954, "step": 33312 }, { "epoch": 2.6987200259235253, "grad_norm": 0.07260085642337799, "learning_rate": 7.229398262748098e-05, "loss": 0.2151, "step": 33313 }, { "epoch": 2.698801036941024, "grad_norm": 0.07135795801877975, "learning_rate": 7.228948197488636e-05, "loss": 0.2522, "step": 33314 }, { "epoch": 2.6988820479585223, "grad_norm": 0.06212751194834709, "learning_rate": 7.228498132229174e-05, "loss": 0.1974, "step": 33315 }, { "epoch": 2.6989630589760205, "grad_norm": 0.07515834271907806, "learning_rate": 7.228048066969711e-05, "loss": 0.2718, "step": 33316 }, { "epoch": 2.699044069993519, "grad_norm": 0.08626675605773926, "learning_rate": 7.227598001710249e-05, "loss": 0.2927, "step": 33317 }, { "epoch": 2.6991250810110174, "grad_norm": 0.062219806015491486, "learning_rate": 7.227147936450786e-05, "loss": 0.226, "step": 33318 }, { "epoch": 2.6992060920285157, "grad_norm": 0.05566618964076042, "learning_rate": 7.226697871191323e-05, "loss": 0.2235, "step": 33319 }, { "epoch": 2.6992871030460144, "grad_norm": 0.07704995572566986, "learning_rate": 7.22624780593186e-05, "loss": 0.2252, "step": 33320 }, { "epoch": 2.6993681140635126, "grad_norm": 0.06611926108598709, "learning_rate": 7.225797740672398e-05, "loss": 0.2323, "step": 33321 }, { "epoch": 2.699449125081011, "grad_norm": 0.08879465609788895, "learning_rate": 7.225347675412935e-05, "loss": 0.2155, "step": 33322 }, { "epoch": 2.6995301360985096, "grad_norm": 0.06557431817054749, "learning_rate": 7.224897610153473e-05, "loss": 0.2402, "step": 33323 }, { "epoch": 2.699611147116008, "grad_norm": 0.06380093842744827, "learning_rate": 7.22444754489401e-05, "loss": 0.2188, "step": 33324 }, { "epoch": 2.699692158133506, "grad_norm": 0.075450100004673, "learning_rate": 7.223997479634547e-05, "loss": 0.2284, "step": 33325 }, { "epoch": 2.6997731691510047, "grad_norm": 0.0675964429974556, "learning_rate": 7.223547414375085e-05, "loss": 0.2116, "step": 33326 }, { "epoch": 2.699854180168503, "grad_norm": 0.0674784854054451, "learning_rate": 7.223097349115622e-05, "loss": 0.256, "step": 33327 }, { "epoch": 2.6999351911860012, "grad_norm": 0.08821087330579758, "learning_rate": 7.22264728385616e-05, "loss": 0.2311, "step": 33328 }, { "epoch": 2.7000162022035, "grad_norm": 0.0646054744720459, "learning_rate": 7.222197218596697e-05, "loss": 0.2302, "step": 33329 }, { "epoch": 2.700097213220998, "grad_norm": 0.07179594039916992, "learning_rate": 7.221747153337234e-05, "loss": 0.277, "step": 33330 }, { "epoch": 2.7001782242384964, "grad_norm": 0.07757652550935745, "learning_rate": 7.221297088077772e-05, "loss": 0.2516, "step": 33331 }, { "epoch": 2.7002592352559946, "grad_norm": 0.06800487637519836, "learning_rate": 7.220847022818309e-05, "loss": 0.256, "step": 33332 }, { "epoch": 2.7003402462734933, "grad_norm": 0.06418580561876297, "learning_rate": 7.220396957558846e-05, "loss": 0.2208, "step": 33333 }, { "epoch": 2.7004212572909916, "grad_norm": 0.10417748242616653, "learning_rate": 7.219946892299384e-05, "loss": 0.301, "step": 33334 }, { "epoch": 2.70050226830849, "grad_norm": 0.06674910336732864, "learning_rate": 7.219496827039921e-05, "loss": 0.2428, "step": 33335 }, { "epoch": 2.700583279325988, "grad_norm": 0.052473656833171844, "learning_rate": 7.219046761780458e-05, "loss": 0.1834, "step": 33336 }, { "epoch": 2.7006642903434868, "grad_norm": 0.07685059309005737, "learning_rate": 7.218596696520996e-05, "loss": 0.2553, "step": 33337 }, { "epoch": 2.700745301360985, "grad_norm": 0.062406525015830994, "learning_rate": 7.218146631261533e-05, "loss": 0.2381, "step": 33338 }, { "epoch": 2.7008263123784833, "grad_norm": 0.0681590810418129, "learning_rate": 7.21769656600207e-05, "loss": 0.2208, "step": 33339 }, { "epoch": 2.700907323395982, "grad_norm": 0.06955277919769287, "learning_rate": 7.217246500742608e-05, "loss": 0.2628, "step": 33340 }, { "epoch": 2.70098833441348, "grad_norm": 0.07531130313873291, "learning_rate": 7.216796435483145e-05, "loss": 0.2291, "step": 33341 }, { "epoch": 2.7010693454309784, "grad_norm": 0.0607423335313797, "learning_rate": 7.216346370223684e-05, "loss": 0.208, "step": 33342 }, { "epoch": 2.701150356448477, "grad_norm": 0.0653001219034195, "learning_rate": 7.21589630496422e-05, "loss": 0.232, "step": 33343 }, { "epoch": 2.7012313674659754, "grad_norm": 0.07373838126659393, "learning_rate": 7.215446239704757e-05, "loss": 0.2258, "step": 33344 }, { "epoch": 2.7013123784834736, "grad_norm": 0.07639024406671524, "learning_rate": 7.214996174445296e-05, "loss": 0.2367, "step": 33345 }, { "epoch": 2.7013933895009723, "grad_norm": 0.06796936690807343, "learning_rate": 7.214546109185832e-05, "loss": 0.2381, "step": 33346 }, { "epoch": 2.7014744005184705, "grad_norm": 0.07707379758358002, "learning_rate": 7.21409604392637e-05, "loss": 0.2342, "step": 33347 }, { "epoch": 2.701555411535969, "grad_norm": 0.06277401000261307, "learning_rate": 7.213645978666908e-05, "loss": 0.2317, "step": 33348 }, { "epoch": 2.7016364225534675, "grad_norm": 0.05920235440135002, "learning_rate": 7.213195913407444e-05, "loss": 0.2417, "step": 33349 }, { "epoch": 2.7017174335709657, "grad_norm": 0.072438545525074, "learning_rate": 7.212745848147981e-05, "loss": 0.2415, "step": 33350 }, { "epoch": 2.701798444588464, "grad_norm": 0.07473402470350266, "learning_rate": 7.21229578288852e-05, "loss": 0.2696, "step": 33351 }, { "epoch": 2.7018794556059627, "grad_norm": 0.07099024951457977, "learning_rate": 7.211845717629056e-05, "loss": 0.2352, "step": 33352 }, { "epoch": 2.701960466623461, "grad_norm": 0.06595303118228912, "learning_rate": 7.211395652369594e-05, "loss": 0.2281, "step": 33353 }, { "epoch": 2.702041477640959, "grad_norm": 0.067349873483181, "learning_rate": 7.210945587110132e-05, "loss": 0.2216, "step": 33354 }, { "epoch": 2.7021224886584574, "grad_norm": 0.0640546903014183, "learning_rate": 7.210495521850668e-05, "loss": 0.2381, "step": 33355 }, { "epoch": 2.702203499675956, "grad_norm": 0.07349754124879837, "learning_rate": 7.210045456591206e-05, "loss": 0.3067, "step": 33356 }, { "epoch": 2.7022845106934543, "grad_norm": 0.0878182053565979, "learning_rate": 7.209595391331744e-05, "loss": 0.2244, "step": 33357 }, { "epoch": 2.7023655217109526, "grad_norm": 0.06644090265035629, "learning_rate": 7.20914532607228e-05, "loss": 0.2537, "step": 33358 }, { "epoch": 2.702446532728451, "grad_norm": 0.067959725856781, "learning_rate": 7.208695260812818e-05, "loss": 0.2672, "step": 33359 }, { "epoch": 2.7025275437459495, "grad_norm": 0.06765919923782349, "learning_rate": 7.208245195553356e-05, "loss": 0.2533, "step": 33360 }, { "epoch": 2.7026085547634477, "grad_norm": 0.07062802463769913, "learning_rate": 7.207795130293892e-05, "loss": 0.2377, "step": 33361 }, { "epoch": 2.702689565780946, "grad_norm": 0.07054631412029266, "learning_rate": 7.20734506503443e-05, "loss": 0.2494, "step": 33362 }, { "epoch": 2.7027705767984447, "grad_norm": 0.07606198638677597, "learning_rate": 7.206894999774968e-05, "loss": 0.246, "step": 33363 }, { "epoch": 2.702851587815943, "grad_norm": 0.08337026089429855, "learning_rate": 7.206444934515504e-05, "loss": 0.2349, "step": 33364 }, { "epoch": 2.702932598833441, "grad_norm": 0.0638199970126152, "learning_rate": 7.205994869256042e-05, "loss": 0.2243, "step": 33365 }, { "epoch": 2.70301360985094, "grad_norm": 0.08534477651119232, "learning_rate": 7.20554480399658e-05, "loss": 0.2551, "step": 33366 }, { "epoch": 2.703094620868438, "grad_norm": 0.06455480307340622, "learning_rate": 7.205094738737117e-05, "loss": 0.2196, "step": 33367 }, { "epoch": 2.7031756318859363, "grad_norm": 0.07369302958250046, "learning_rate": 7.204644673477655e-05, "loss": 0.223, "step": 33368 }, { "epoch": 2.703256642903435, "grad_norm": 0.06185693293809891, "learning_rate": 7.204194608218193e-05, "loss": 0.2207, "step": 33369 }, { "epoch": 2.7033376539209333, "grad_norm": 0.0870005264878273, "learning_rate": 7.203744542958729e-05, "loss": 0.2543, "step": 33370 }, { "epoch": 2.7034186649384315, "grad_norm": 0.0734618604183197, "learning_rate": 7.203294477699267e-05, "loss": 0.2882, "step": 33371 }, { "epoch": 2.70349967595593, "grad_norm": 0.08058614283800125, "learning_rate": 7.202844412439805e-05, "loss": 0.2344, "step": 33372 }, { "epoch": 2.7035806869734285, "grad_norm": 0.06244783475995064, "learning_rate": 7.202394347180341e-05, "loss": 0.2252, "step": 33373 }, { "epoch": 2.7036616979909267, "grad_norm": 0.06978266686201096, "learning_rate": 7.20194428192088e-05, "loss": 0.2308, "step": 33374 }, { "epoch": 2.7037427090084254, "grad_norm": 0.0777072086930275, "learning_rate": 7.201494216661417e-05, "loss": 0.2629, "step": 33375 }, { "epoch": 2.7038237200259236, "grad_norm": 0.07009978592395782, "learning_rate": 7.201044151401953e-05, "loss": 0.2857, "step": 33376 }, { "epoch": 2.703904731043422, "grad_norm": 0.07170268893241882, "learning_rate": 7.200594086142492e-05, "loss": 0.2273, "step": 33377 }, { "epoch": 2.70398574206092, "grad_norm": 0.06669243425130844, "learning_rate": 7.200144020883029e-05, "loss": 0.2396, "step": 33378 }, { "epoch": 2.704066753078419, "grad_norm": 0.0675826445221901, "learning_rate": 7.199693955623565e-05, "loss": 0.2302, "step": 33379 }, { "epoch": 2.704147764095917, "grad_norm": 0.07260756194591522, "learning_rate": 7.199243890364104e-05, "loss": 0.2338, "step": 33380 }, { "epoch": 2.7042287751134153, "grad_norm": 0.06512048840522766, "learning_rate": 7.198793825104641e-05, "loss": 0.2291, "step": 33381 }, { "epoch": 2.7043097861309136, "grad_norm": 0.06933195143938065, "learning_rate": 7.198343759845177e-05, "loss": 0.2877, "step": 33382 }, { "epoch": 2.7043907971484122, "grad_norm": 0.06995735317468643, "learning_rate": 7.197893694585716e-05, "loss": 0.2208, "step": 33383 }, { "epoch": 2.7044718081659105, "grad_norm": 0.07151626795530319, "learning_rate": 7.197443629326253e-05, "loss": 0.236, "step": 33384 }, { "epoch": 2.7045528191834087, "grad_norm": 0.08576501160860062, "learning_rate": 7.196993564066789e-05, "loss": 0.2119, "step": 33385 }, { "epoch": 2.7046338302009074, "grad_norm": 0.06742434948682785, "learning_rate": 7.196543498807328e-05, "loss": 0.2228, "step": 33386 }, { "epoch": 2.7047148412184057, "grad_norm": 0.07232806086540222, "learning_rate": 7.196093433547865e-05, "loss": 0.2335, "step": 33387 }, { "epoch": 2.704795852235904, "grad_norm": 0.06070084869861603, "learning_rate": 7.195643368288401e-05, "loss": 0.2159, "step": 33388 }, { "epoch": 2.7048768632534026, "grad_norm": 0.0777098536491394, "learning_rate": 7.19519330302894e-05, "loss": 0.2654, "step": 33389 }, { "epoch": 2.704957874270901, "grad_norm": 0.07623117417097092, "learning_rate": 7.194743237769477e-05, "loss": 0.2627, "step": 33390 }, { "epoch": 2.705038885288399, "grad_norm": 0.07137143611907959, "learning_rate": 7.194293172510013e-05, "loss": 0.1909, "step": 33391 }, { "epoch": 2.7051198963058978, "grad_norm": 0.06881123781204224, "learning_rate": 7.193843107250552e-05, "loss": 0.2623, "step": 33392 }, { "epoch": 2.705200907323396, "grad_norm": 0.07806967943906784, "learning_rate": 7.193393041991089e-05, "loss": 0.2565, "step": 33393 }, { "epoch": 2.7052819183408943, "grad_norm": 0.05773887410759926, "learning_rate": 7.192942976731627e-05, "loss": 0.2081, "step": 33394 }, { "epoch": 2.705362929358393, "grad_norm": 0.06873015314340591, "learning_rate": 7.192492911472164e-05, "loss": 0.2472, "step": 33395 }, { "epoch": 2.705443940375891, "grad_norm": 0.07720614224672318, "learning_rate": 7.192042846212701e-05, "loss": 0.2222, "step": 33396 }, { "epoch": 2.7055249513933894, "grad_norm": 0.0625235065817833, "learning_rate": 7.191592780953239e-05, "loss": 0.2268, "step": 33397 }, { "epoch": 2.705605962410888, "grad_norm": 0.06290551275014877, "learning_rate": 7.191142715693776e-05, "loss": 0.2327, "step": 33398 }, { "epoch": 2.7056869734283864, "grad_norm": 0.05978340655565262, "learning_rate": 7.190692650434313e-05, "loss": 0.2379, "step": 33399 }, { "epoch": 2.7057679844458846, "grad_norm": 0.06658025830984116, "learning_rate": 7.190242585174851e-05, "loss": 0.2415, "step": 33400 }, { "epoch": 2.705848995463383, "grad_norm": 0.04890163242816925, "learning_rate": 7.189792519915388e-05, "loss": 0.2447, "step": 33401 }, { "epoch": 2.7059300064808816, "grad_norm": 0.06032396852970123, "learning_rate": 7.189342454655926e-05, "loss": 0.2411, "step": 33402 }, { "epoch": 2.70601101749838, "grad_norm": 0.07358459383249283, "learning_rate": 7.188892389396463e-05, "loss": 0.2707, "step": 33403 }, { "epoch": 2.706092028515878, "grad_norm": 0.07115450501441956, "learning_rate": 7.188442324137e-05, "loss": 0.2074, "step": 33404 }, { "epoch": 2.7061730395333763, "grad_norm": 0.08676160871982574, "learning_rate": 7.187992258877538e-05, "loss": 0.267, "step": 33405 }, { "epoch": 2.706254050550875, "grad_norm": 0.07879418134689331, "learning_rate": 7.187542193618075e-05, "loss": 0.2451, "step": 33406 }, { "epoch": 2.7063350615683732, "grad_norm": 0.06741847097873688, "learning_rate": 7.187092128358612e-05, "loss": 0.232, "step": 33407 }, { "epoch": 2.7064160725858715, "grad_norm": 0.06861771643161774, "learning_rate": 7.18664206309915e-05, "loss": 0.2397, "step": 33408 }, { "epoch": 2.70649708360337, "grad_norm": 0.06544741988182068, "learning_rate": 7.186191997839687e-05, "loss": 0.206, "step": 33409 }, { "epoch": 2.7065780946208684, "grad_norm": 0.07405445724725723, "learning_rate": 7.185741932580224e-05, "loss": 0.2349, "step": 33410 }, { "epoch": 2.7066591056383666, "grad_norm": 0.06363479048013687, "learning_rate": 7.185291867320762e-05, "loss": 0.2493, "step": 33411 }, { "epoch": 2.7067401166558653, "grad_norm": 0.06907227635383606, "learning_rate": 7.184841802061299e-05, "loss": 0.2485, "step": 33412 }, { "epoch": 2.7068211276733636, "grad_norm": 0.07298295199871063, "learning_rate": 7.184391736801836e-05, "loss": 0.2263, "step": 33413 }, { "epoch": 2.706902138690862, "grad_norm": 0.07148992270231247, "learning_rate": 7.183941671542374e-05, "loss": 0.237, "step": 33414 }, { "epoch": 2.7069831497083605, "grad_norm": 0.057731013745069504, "learning_rate": 7.183491606282911e-05, "loss": 0.2101, "step": 33415 }, { "epoch": 2.7070641607258588, "grad_norm": 0.07263604551553726, "learning_rate": 7.183041541023449e-05, "loss": 0.2743, "step": 33416 }, { "epoch": 2.707145171743357, "grad_norm": 0.06796883046627045, "learning_rate": 7.182591475763986e-05, "loss": 0.2423, "step": 33417 }, { "epoch": 2.7072261827608557, "grad_norm": 0.06539809703826904, "learning_rate": 7.182141410504523e-05, "loss": 0.2331, "step": 33418 }, { "epoch": 2.707307193778354, "grad_norm": 0.061306267976760864, "learning_rate": 7.18169134524506e-05, "loss": 0.2238, "step": 33419 }, { "epoch": 2.707388204795852, "grad_norm": 0.07076717913150787, "learning_rate": 7.181241279985598e-05, "loss": 0.2162, "step": 33420 }, { "epoch": 2.707469215813351, "grad_norm": 0.07293318212032318, "learning_rate": 7.180791214726135e-05, "loss": 0.2403, "step": 33421 }, { "epoch": 2.707550226830849, "grad_norm": 0.06484469026327133, "learning_rate": 7.180341149466673e-05, "loss": 0.2588, "step": 33422 }, { "epoch": 2.7076312378483474, "grad_norm": 0.06189373508095741, "learning_rate": 7.179891084207211e-05, "loss": 0.2457, "step": 33423 }, { "epoch": 2.7077122488658456, "grad_norm": 0.06705143302679062, "learning_rate": 7.179441018947747e-05, "loss": 0.2319, "step": 33424 }, { "epoch": 2.7077932598833443, "grad_norm": 0.06246310472488403, "learning_rate": 7.178990953688285e-05, "loss": 0.2564, "step": 33425 }, { "epoch": 2.7078742709008425, "grad_norm": 0.05720631778240204, "learning_rate": 7.178540888428824e-05, "loss": 0.2121, "step": 33426 }, { "epoch": 2.707955281918341, "grad_norm": 0.06298079341650009, "learning_rate": 7.17809082316936e-05, "loss": 0.2262, "step": 33427 }, { "epoch": 2.708036292935839, "grad_norm": 0.07074960321187973, "learning_rate": 7.177640757909897e-05, "loss": 0.263, "step": 33428 }, { "epoch": 2.7081173039533377, "grad_norm": 0.052444297820329666, "learning_rate": 7.177190692650436e-05, "loss": 0.2398, "step": 33429 }, { "epoch": 2.708198314970836, "grad_norm": 0.09744896739721298, "learning_rate": 7.176740627390972e-05, "loss": 0.288, "step": 33430 }, { "epoch": 2.708279325988334, "grad_norm": 0.06278154999017715, "learning_rate": 7.176290562131509e-05, "loss": 0.2493, "step": 33431 }, { "epoch": 2.708360337005833, "grad_norm": 0.06635581701993942, "learning_rate": 7.175840496872048e-05, "loss": 0.2874, "step": 33432 }, { "epoch": 2.708441348023331, "grad_norm": 0.06772460788488388, "learning_rate": 7.175390431612584e-05, "loss": 0.2096, "step": 33433 }, { "epoch": 2.7085223590408294, "grad_norm": 0.07880239188671112, "learning_rate": 7.174940366353121e-05, "loss": 0.2606, "step": 33434 }, { "epoch": 2.708603370058328, "grad_norm": 0.06576960533857346, "learning_rate": 7.17449030109366e-05, "loss": 0.2168, "step": 33435 }, { "epoch": 2.7086843810758263, "grad_norm": 0.06567549705505371, "learning_rate": 7.174040235834196e-05, "loss": 0.229, "step": 33436 }, { "epoch": 2.7087653920933246, "grad_norm": 0.08709168434143066, "learning_rate": 7.173590170574733e-05, "loss": 0.281, "step": 33437 }, { "epoch": 2.7088464031108233, "grad_norm": 0.06958389282226562, "learning_rate": 7.173140105315272e-05, "loss": 0.2553, "step": 33438 }, { "epoch": 2.7089274141283215, "grad_norm": 0.07254193723201752, "learning_rate": 7.172690040055808e-05, "loss": 0.2231, "step": 33439 }, { "epoch": 2.7090084251458197, "grad_norm": 0.06497646123170853, "learning_rate": 7.172239974796345e-05, "loss": 0.2209, "step": 33440 }, { "epoch": 2.7090894361633184, "grad_norm": 0.0700288861989975, "learning_rate": 7.171789909536884e-05, "loss": 0.2325, "step": 33441 }, { "epoch": 2.7091704471808167, "grad_norm": 0.0581977441906929, "learning_rate": 7.17133984427742e-05, "loss": 0.2221, "step": 33442 }, { "epoch": 2.709251458198315, "grad_norm": 0.07684613764286041, "learning_rate": 7.170889779017957e-05, "loss": 0.2357, "step": 33443 }, { "epoch": 2.7093324692158136, "grad_norm": 0.07410931587219238, "learning_rate": 7.170439713758496e-05, "loss": 0.247, "step": 33444 }, { "epoch": 2.709413480233312, "grad_norm": 0.07118923217058182, "learning_rate": 7.169989648499032e-05, "loss": 0.2413, "step": 33445 }, { "epoch": 2.70949449125081, "grad_norm": 0.0676896721124649, "learning_rate": 7.16953958323957e-05, "loss": 0.2431, "step": 33446 }, { "epoch": 2.7095755022683083, "grad_norm": 0.06472186744213104, "learning_rate": 7.169089517980108e-05, "loss": 0.2399, "step": 33447 }, { "epoch": 2.709656513285807, "grad_norm": 0.060834795236587524, "learning_rate": 7.168639452720644e-05, "loss": 0.2208, "step": 33448 }, { "epoch": 2.7097375243033053, "grad_norm": 0.06608045101165771, "learning_rate": 7.168189387461183e-05, "loss": 0.2341, "step": 33449 }, { "epoch": 2.7098185353208035, "grad_norm": 0.07285463064908981, "learning_rate": 7.16773932220172e-05, "loss": 0.2401, "step": 33450 }, { "epoch": 2.7098995463383018, "grad_norm": 0.06280604004859924, "learning_rate": 7.167289256942256e-05, "loss": 0.2285, "step": 33451 }, { "epoch": 2.7099805573558005, "grad_norm": 0.07557007670402527, "learning_rate": 7.166839191682795e-05, "loss": 0.2702, "step": 33452 }, { "epoch": 2.7100615683732987, "grad_norm": 0.06775477528572083, "learning_rate": 7.166389126423332e-05, "loss": 0.2416, "step": 33453 }, { "epoch": 2.710142579390797, "grad_norm": 0.07118985056877136, "learning_rate": 7.165939061163868e-05, "loss": 0.2498, "step": 33454 }, { "epoch": 2.7102235904082956, "grad_norm": 0.06145792827010155, "learning_rate": 7.165488995904407e-05, "loss": 0.2484, "step": 33455 }, { "epoch": 2.710304601425794, "grad_norm": 0.08589167147874832, "learning_rate": 7.165038930644944e-05, "loss": 0.2659, "step": 33456 }, { "epoch": 2.710385612443292, "grad_norm": 0.07768699526786804, "learning_rate": 7.16458886538548e-05, "loss": 0.2092, "step": 33457 }, { "epoch": 2.710466623460791, "grad_norm": 0.08188536763191223, "learning_rate": 7.164138800126019e-05, "loss": 0.2414, "step": 33458 }, { "epoch": 2.710547634478289, "grad_norm": 0.05485611408948898, "learning_rate": 7.163688734866556e-05, "loss": 0.2101, "step": 33459 }, { "epoch": 2.7106286454957873, "grad_norm": 0.0856386050581932, "learning_rate": 7.163238669607092e-05, "loss": 0.2819, "step": 33460 }, { "epoch": 2.710709656513286, "grad_norm": 0.05618041381239891, "learning_rate": 7.162788604347631e-05, "loss": 0.2136, "step": 33461 }, { "epoch": 2.7107906675307842, "grad_norm": 0.0732741504907608, "learning_rate": 7.162338539088169e-05, "loss": 0.2406, "step": 33462 }, { "epoch": 2.7108716785482825, "grad_norm": 0.0713251456618309, "learning_rate": 7.161888473828705e-05, "loss": 0.2269, "step": 33463 }, { "epoch": 2.710952689565781, "grad_norm": 0.06645556539297104, "learning_rate": 7.161438408569243e-05, "loss": 0.2355, "step": 33464 }, { "epoch": 2.7110337005832794, "grad_norm": 0.06869667023420334, "learning_rate": 7.16098834330978e-05, "loss": 0.2426, "step": 33465 }, { "epoch": 2.7111147116007777, "grad_norm": 0.06282439827919006, "learning_rate": 7.160538278050317e-05, "loss": 0.2451, "step": 33466 }, { "epoch": 2.7111957226182763, "grad_norm": 0.06767906248569489, "learning_rate": 7.160088212790855e-05, "loss": 0.204, "step": 33467 }, { "epoch": 2.7112767336357746, "grad_norm": 0.07737169414758682, "learning_rate": 7.159638147531393e-05, "loss": 0.277, "step": 33468 }, { "epoch": 2.711357744653273, "grad_norm": 0.06870124489068985, "learning_rate": 7.159188082271929e-05, "loss": 0.2128, "step": 33469 }, { "epoch": 2.711438755670771, "grad_norm": 0.06312039494514465, "learning_rate": 7.158738017012467e-05, "loss": 0.2247, "step": 33470 }, { "epoch": 2.7115197666882693, "grad_norm": 0.08233053237199783, "learning_rate": 7.158287951753005e-05, "loss": 0.2808, "step": 33471 }, { "epoch": 2.711600777705768, "grad_norm": 0.09459495544433594, "learning_rate": 7.157837886493541e-05, "loss": 0.2381, "step": 33472 }, { "epoch": 2.7116817887232663, "grad_norm": 0.06900796294212341, "learning_rate": 7.15738782123408e-05, "loss": 0.2062, "step": 33473 }, { "epoch": 2.7117627997407645, "grad_norm": 0.061789944767951965, "learning_rate": 7.156937755974617e-05, "loss": 0.2018, "step": 33474 }, { "epoch": 2.711843810758263, "grad_norm": 0.07610882818698883, "learning_rate": 7.156487690715154e-05, "loss": 0.2645, "step": 33475 }, { "epoch": 2.7119248217757614, "grad_norm": 0.06013292446732521, "learning_rate": 7.156037625455692e-05, "loss": 0.2092, "step": 33476 }, { "epoch": 2.7120058327932597, "grad_norm": 0.08542242646217346, "learning_rate": 7.155587560196229e-05, "loss": 0.2674, "step": 33477 }, { "epoch": 2.7120868438107584, "grad_norm": 0.06455022841691971, "learning_rate": 7.155137494936766e-05, "loss": 0.2131, "step": 33478 }, { "epoch": 2.7121678548282566, "grad_norm": 0.07186616957187653, "learning_rate": 7.154687429677304e-05, "loss": 0.23, "step": 33479 }, { "epoch": 2.712248865845755, "grad_norm": 0.07408653199672699, "learning_rate": 7.154237364417841e-05, "loss": 0.2427, "step": 33480 }, { "epoch": 2.7123298768632536, "grad_norm": 0.07832899689674377, "learning_rate": 7.153787299158378e-05, "loss": 0.3104, "step": 33481 }, { "epoch": 2.712410887880752, "grad_norm": 0.05991185083985329, "learning_rate": 7.153337233898916e-05, "loss": 0.2335, "step": 33482 }, { "epoch": 2.71249189889825, "grad_norm": 0.06915418058633804, "learning_rate": 7.152887168639453e-05, "loss": 0.2403, "step": 33483 }, { "epoch": 2.7125729099157487, "grad_norm": 0.06852851808071136, "learning_rate": 7.15243710337999e-05, "loss": 0.2568, "step": 33484 }, { "epoch": 2.712653920933247, "grad_norm": 0.06072534993290901, "learning_rate": 7.151987038120528e-05, "loss": 0.2297, "step": 33485 }, { "epoch": 2.712734931950745, "grad_norm": 0.07479368150234222, "learning_rate": 7.151536972861065e-05, "loss": 0.2208, "step": 33486 }, { "epoch": 2.712815942968244, "grad_norm": 0.0576009564101696, "learning_rate": 7.151086907601603e-05, "loss": 0.2207, "step": 33487 }, { "epoch": 2.712896953985742, "grad_norm": 0.06572145223617554, "learning_rate": 7.15063684234214e-05, "loss": 0.2381, "step": 33488 }, { "epoch": 2.7129779650032404, "grad_norm": 0.06663456559181213, "learning_rate": 7.150186777082677e-05, "loss": 0.2246, "step": 33489 }, { "epoch": 2.713058976020739, "grad_norm": 0.06880438327789307, "learning_rate": 7.149736711823215e-05, "loss": 0.2555, "step": 33490 }, { "epoch": 2.7131399870382373, "grad_norm": 0.08984614163637161, "learning_rate": 7.149286646563752e-05, "loss": 0.2848, "step": 33491 }, { "epoch": 2.7132209980557356, "grad_norm": 0.06485553085803986, "learning_rate": 7.148836581304289e-05, "loss": 0.2543, "step": 33492 }, { "epoch": 2.713302009073234, "grad_norm": 0.0725436732172966, "learning_rate": 7.148386516044827e-05, "loss": 0.2744, "step": 33493 }, { "epoch": 2.713383020090732, "grad_norm": 0.05967855826020241, "learning_rate": 7.147936450785364e-05, "loss": 0.1979, "step": 33494 }, { "epoch": 2.7134640311082308, "grad_norm": 0.06330663710832596, "learning_rate": 7.147486385525901e-05, "loss": 0.222, "step": 33495 }, { "epoch": 2.713545042125729, "grad_norm": 0.07709506154060364, "learning_rate": 7.147036320266439e-05, "loss": 0.2257, "step": 33496 }, { "epoch": 2.7136260531432272, "grad_norm": 0.06852762401103973, "learning_rate": 7.146586255006976e-05, "loss": 0.2556, "step": 33497 }, { "epoch": 2.713707064160726, "grad_norm": 0.0920860543847084, "learning_rate": 7.146136189747513e-05, "loss": 0.2751, "step": 33498 }, { "epoch": 2.713788075178224, "grad_norm": 0.06808226555585861, "learning_rate": 7.145686124488051e-05, "loss": 0.2922, "step": 33499 }, { "epoch": 2.7138690861957224, "grad_norm": 0.09032059460878372, "learning_rate": 7.145236059228588e-05, "loss": 0.262, "step": 33500 }, { "epoch": 2.713950097213221, "grad_norm": 0.07511615008115768, "learning_rate": 7.144785993969127e-05, "loss": 0.2589, "step": 33501 }, { "epoch": 2.7140311082307194, "grad_norm": 0.08429624885320663, "learning_rate": 7.144335928709663e-05, "loss": 0.2865, "step": 33502 }, { "epoch": 2.7141121192482176, "grad_norm": 0.06352245062589645, "learning_rate": 7.1438858634502e-05, "loss": 0.224, "step": 33503 }, { "epoch": 2.7141931302657163, "grad_norm": 0.07323718816041946, "learning_rate": 7.143435798190739e-05, "loss": 0.2201, "step": 33504 }, { "epoch": 2.7142741412832145, "grad_norm": 0.060021862387657166, "learning_rate": 7.142985732931275e-05, "loss": 0.2206, "step": 33505 }, { "epoch": 2.714355152300713, "grad_norm": 0.07336699217557907, "learning_rate": 7.142535667671812e-05, "loss": 0.2374, "step": 33506 }, { "epoch": 2.7144361633182115, "grad_norm": 0.05722619220614433, "learning_rate": 7.142085602412351e-05, "loss": 0.232, "step": 33507 }, { "epoch": 2.7145171743357097, "grad_norm": 0.07713234424591064, "learning_rate": 7.141635537152887e-05, "loss": 0.274, "step": 33508 }, { "epoch": 2.714598185353208, "grad_norm": 0.07163354009389877, "learning_rate": 7.141185471893424e-05, "loss": 0.2428, "step": 33509 }, { "epoch": 2.7146791963707066, "grad_norm": 0.057901978492736816, "learning_rate": 7.140735406633963e-05, "loss": 0.2215, "step": 33510 }, { "epoch": 2.714760207388205, "grad_norm": 0.07321817427873611, "learning_rate": 7.140285341374499e-05, "loss": 0.2323, "step": 33511 }, { "epoch": 2.714841218405703, "grad_norm": 0.057540737092494965, "learning_rate": 7.139835276115037e-05, "loss": 0.2594, "step": 33512 }, { "epoch": 2.714922229423202, "grad_norm": 0.07701243460178375, "learning_rate": 7.139385210855575e-05, "loss": 0.2269, "step": 33513 }, { "epoch": 2.7150032404407, "grad_norm": 0.06628618389368057, "learning_rate": 7.138935145596111e-05, "loss": 0.2503, "step": 33514 }, { "epoch": 2.7150842514581983, "grad_norm": 0.0763016939163208, "learning_rate": 7.138485080336649e-05, "loss": 0.2567, "step": 33515 }, { "epoch": 2.7151652624756966, "grad_norm": 0.06274400651454926, "learning_rate": 7.138035015077187e-05, "loss": 0.2472, "step": 33516 }, { "epoch": 2.715246273493195, "grad_norm": 0.06951197981834412, "learning_rate": 7.137584949817723e-05, "loss": 0.2483, "step": 33517 }, { "epoch": 2.7153272845106935, "grad_norm": 0.07218840718269348, "learning_rate": 7.137134884558261e-05, "loss": 0.2551, "step": 33518 }, { "epoch": 2.7154082955281917, "grad_norm": 0.06729360669851303, "learning_rate": 7.1366848192988e-05, "loss": 0.231, "step": 33519 }, { "epoch": 2.71548930654569, "grad_norm": 0.07275759428739548, "learning_rate": 7.136234754039335e-05, "loss": 0.2293, "step": 33520 }, { "epoch": 2.7155703175631887, "grad_norm": 0.07135862857103348, "learning_rate": 7.135784688779873e-05, "loss": 0.229, "step": 33521 }, { "epoch": 2.715651328580687, "grad_norm": 0.07239732146263123, "learning_rate": 7.135334623520411e-05, "loss": 0.2509, "step": 33522 }, { "epoch": 2.715732339598185, "grad_norm": 0.061063461005687714, "learning_rate": 7.134884558260947e-05, "loss": 0.1925, "step": 33523 }, { "epoch": 2.715813350615684, "grad_norm": 0.07632570713758469, "learning_rate": 7.134434493001485e-05, "loss": 0.2524, "step": 33524 }, { "epoch": 2.715894361633182, "grad_norm": 0.0663372054696083, "learning_rate": 7.133984427742024e-05, "loss": 0.2305, "step": 33525 }, { "epoch": 2.7159753726506803, "grad_norm": 0.07694404572248459, "learning_rate": 7.13353436248256e-05, "loss": 0.2213, "step": 33526 }, { "epoch": 2.716056383668179, "grad_norm": 0.052713215351104736, "learning_rate": 7.133084297223098e-05, "loss": 0.2292, "step": 33527 }, { "epoch": 2.7161373946856773, "grad_norm": 0.055823810398578644, "learning_rate": 7.132634231963636e-05, "loss": 0.2124, "step": 33528 }, { "epoch": 2.7162184057031755, "grad_norm": 0.06696312129497528, "learning_rate": 7.132184166704172e-05, "loss": 0.2505, "step": 33529 }, { "epoch": 2.716299416720674, "grad_norm": 0.08826608210802078, "learning_rate": 7.13173410144471e-05, "loss": 0.2581, "step": 33530 }, { "epoch": 2.7163804277381725, "grad_norm": 0.06801729649305344, "learning_rate": 7.131284036185248e-05, "loss": 0.2392, "step": 33531 }, { "epoch": 2.7164614387556707, "grad_norm": 0.06413350254297256, "learning_rate": 7.130833970925784e-05, "loss": 0.2219, "step": 33532 }, { "epoch": 2.7165424497731694, "grad_norm": 0.0645436942577362, "learning_rate": 7.130383905666322e-05, "loss": 0.231, "step": 33533 }, { "epoch": 2.7166234607906676, "grad_norm": 0.0745738223195076, "learning_rate": 7.12993384040686e-05, "loss": 0.2507, "step": 33534 }, { "epoch": 2.716704471808166, "grad_norm": 0.0662076473236084, "learning_rate": 7.129483775147396e-05, "loss": 0.2205, "step": 33535 }, { "epoch": 2.7167854828256646, "grad_norm": 0.05969638377428055, "learning_rate": 7.129033709887935e-05, "loss": 0.2391, "step": 33536 }, { "epoch": 2.716866493843163, "grad_norm": 0.0601656436920166, "learning_rate": 7.128583644628472e-05, "loss": 0.2518, "step": 33537 }, { "epoch": 2.716947504860661, "grad_norm": 0.07162291556596756, "learning_rate": 7.128133579369008e-05, "loss": 0.1978, "step": 33538 }, { "epoch": 2.7170285158781593, "grad_norm": 0.07743734866380692, "learning_rate": 7.127683514109547e-05, "loss": 0.2858, "step": 33539 }, { "epoch": 2.7171095268956575, "grad_norm": 0.06878258287906647, "learning_rate": 7.127233448850084e-05, "loss": 0.2163, "step": 33540 }, { "epoch": 2.7171905379131562, "grad_norm": 0.07549357414245605, "learning_rate": 7.12678338359062e-05, "loss": 0.248, "step": 33541 }, { "epoch": 2.7172715489306545, "grad_norm": 0.08683260530233383, "learning_rate": 7.126333318331159e-05, "loss": 0.273, "step": 33542 }, { "epoch": 2.7173525599481527, "grad_norm": 0.07400300353765488, "learning_rate": 7.125883253071696e-05, "loss": 0.2123, "step": 33543 }, { "epoch": 2.7174335709656514, "grad_norm": 0.07202634960412979, "learning_rate": 7.125433187812232e-05, "loss": 0.2748, "step": 33544 }, { "epoch": 2.7175145819831497, "grad_norm": 0.09767428040504456, "learning_rate": 7.124983122552771e-05, "loss": 0.2557, "step": 33545 }, { "epoch": 2.717595593000648, "grad_norm": 0.06339447945356369, "learning_rate": 7.124533057293308e-05, "loss": 0.2201, "step": 33546 }, { "epoch": 2.7176766040181466, "grad_norm": 0.062197357416152954, "learning_rate": 7.124082992033844e-05, "loss": 0.2771, "step": 33547 }, { "epoch": 2.717757615035645, "grad_norm": 0.09015563130378723, "learning_rate": 7.123632926774383e-05, "loss": 0.2631, "step": 33548 }, { "epoch": 2.717838626053143, "grad_norm": 0.07836005091667175, "learning_rate": 7.12318286151492e-05, "loss": 0.2618, "step": 33549 }, { "epoch": 2.7179196370706418, "grad_norm": 0.07033463567495346, "learning_rate": 7.122732796255456e-05, "loss": 0.237, "step": 33550 }, { "epoch": 2.71800064808814, "grad_norm": 0.07338385283946991, "learning_rate": 7.122282730995995e-05, "loss": 0.2414, "step": 33551 }, { "epoch": 2.7180816591056383, "grad_norm": 0.07711820304393768, "learning_rate": 7.121832665736532e-05, "loss": 0.2412, "step": 33552 }, { "epoch": 2.718162670123137, "grad_norm": 0.07389004528522491, "learning_rate": 7.12138260047707e-05, "loss": 0.2343, "step": 33553 }, { "epoch": 2.718243681140635, "grad_norm": 0.05881441757082939, "learning_rate": 7.120932535217607e-05, "loss": 0.2261, "step": 33554 }, { "epoch": 2.7183246921581334, "grad_norm": 0.05964907258749008, "learning_rate": 7.120482469958144e-05, "loss": 0.2293, "step": 33555 }, { "epoch": 2.718405703175632, "grad_norm": 0.07038917392492294, "learning_rate": 7.120032404698682e-05, "loss": 0.2141, "step": 33556 }, { "epoch": 2.7184867141931304, "grad_norm": 0.0781693086028099, "learning_rate": 7.119582339439219e-05, "loss": 0.2586, "step": 33557 }, { "epoch": 2.7185677252106286, "grad_norm": 0.07387826591730118, "learning_rate": 7.119132274179756e-05, "loss": 0.2357, "step": 33558 }, { "epoch": 2.718648736228127, "grad_norm": 0.06107885017991066, "learning_rate": 7.118682208920294e-05, "loss": 0.2209, "step": 33559 }, { "epoch": 2.7187297472456255, "grad_norm": 0.06332498043775558, "learning_rate": 7.118232143660831e-05, "loss": 0.2584, "step": 33560 }, { "epoch": 2.718810758263124, "grad_norm": 0.07451072335243225, "learning_rate": 7.117782078401369e-05, "loss": 0.2507, "step": 33561 }, { "epoch": 2.718891769280622, "grad_norm": 0.06843792647123337, "learning_rate": 7.117332013141906e-05, "loss": 0.2654, "step": 33562 }, { "epoch": 2.7189727802981203, "grad_norm": 0.07237779349088669, "learning_rate": 7.116881947882443e-05, "loss": 0.2313, "step": 33563 }, { "epoch": 2.719053791315619, "grad_norm": 0.1040261909365654, "learning_rate": 7.11643188262298e-05, "loss": 0.2367, "step": 33564 }, { "epoch": 2.719134802333117, "grad_norm": 0.059434372931718826, "learning_rate": 7.115981817363518e-05, "loss": 0.2504, "step": 33565 }, { "epoch": 2.7192158133506155, "grad_norm": 0.07661649584770203, "learning_rate": 7.115531752104055e-05, "loss": 0.2233, "step": 33566 }, { "epoch": 2.719296824368114, "grad_norm": 0.08492406457662582, "learning_rate": 7.115081686844593e-05, "loss": 0.2525, "step": 33567 }, { "epoch": 2.7193778353856124, "grad_norm": 0.07533139735460281, "learning_rate": 7.11463162158513e-05, "loss": 0.268, "step": 33568 }, { "epoch": 2.7194588464031106, "grad_norm": 0.07281734049320221, "learning_rate": 7.114181556325667e-05, "loss": 0.2023, "step": 33569 }, { "epoch": 2.7195398574206093, "grad_norm": 0.08975783735513687, "learning_rate": 7.113731491066205e-05, "loss": 0.2275, "step": 33570 }, { "epoch": 2.7196208684381076, "grad_norm": 0.0756605714559555, "learning_rate": 7.113281425806742e-05, "loss": 0.2513, "step": 33571 }, { "epoch": 2.719701879455606, "grad_norm": 0.07475683838129044, "learning_rate": 7.11283136054728e-05, "loss": 0.2341, "step": 33572 }, { "epoch": 2.7197828904731045, "grad_norm": 0.06809760630130768, "learning_rate": 7.112381295287817e-05, "loss": 0.2198, "step": 33573 }, { "epoch": 2.7198639014906028, "grad_norm": 0.05999773368239403, "learning_rate": 7.111931230028354e-05, "loss": 0.2005, "step": 33574 }, { "epoch": 2.719944912508101, "grad_norm": 0.06841011345386505, "learning_rate": 7.111481164768892e-05, "loss": 0.2354, "step": 33575 }, { "epoch": 2.7200259235255997, "grad_norm": 0.07008085399866104, "learning_rate": 7.111031099509429e-05, "loss": 0.2323, "step": 33576 }, { "epoch": 2.720106934543098, "grad_norm": 0.06371785700321198, "learning_rate": 7.110581034249966e-05, "loss": 0.264, "step": 33577 }, { "epoch": 2.720187945560596, "grad_norm": 0.06585246324539185, "learning_rate": 7.110130968990504e-05, "loss": 0.2564, "step": 33578 }, { "epoch": 2.720268956578095, "grad_norm": 0.06621664017438889, "learning_rate": 7.109680903731041e-05, "loss": 0.2347, "step": 33579 }, { "epoch": 2.720349967595593, "grad_norm": 0.07635606080293655, "learning_rate": 7.109230838471578e-05, "loss": 0.2729, "step": 33580 }, { "epoch": 2.7204309786130914, "grad_norm": 0.06877021491527557, "learning_rate": 7.108780773212116e-05, "loss": 0.2482, "step": 33581 }, { "epoch": 2.7205119896305896, "grad_norm": 0.058608278632164, "learning_rate": 7.108330707952654e-05, "loss": 0.2102, "step": 33582 }, { "epoch": 2.7205930006480883, "grad_norm": 0.05785350129008293, "learning_rate": 7.10788064269319e-05, "loss": 0.219, "step": 33583 }, { "epoch": 2.7206740116655865, "grad_norm": 0.0791582241654396, "learning_rate": 7.107430577433728e-05, "loss": 0.2492, "step": 33584 }, { "epoch": 2.720755022683085, "grad_norm": 0.09484634548425674, "learning_rate": 7.106980512174267e-05, "loss": 0.2916, "step": 33585 }, { "epoch": 2.720836033700583, "grad_norm": 0.0706673339009285, "learning_rate": 7.106530446914803e-05, "loss": 0.2352, "step": 33586 }, { "epoch": 2.7209170447180817, "grad_norm": 0.07668599486351013, "learning_rate": 7.10608038165534e-05, "loss": 0.2717, "step": 33587 }, { "epoch": 2.72099805573558, "grad_norm": 0.06315944343805313, "learning_rate": 7.105630316395879e-05, "loss": 0.2646, "step": 33588 }, { "epoch": 2.721079066753078, "grad_norm": 0.06296543776988983, "learning_rate": 7.105180251136415e-05, "loss": 0.248, "step": 33589 }, { "epoch": 2.721160077770577, "grad_norm": 0.05871746689081192, "learning_rate": 7.104730185876952e-05, "loss": 0.2101, "step": 33590 }, { "epoch": 2.721241088788075, "grad_norm": 0.0563269667327404, "learning_rate": 7.104280120617491e-05, "loss": 0.2151, "step": 33591 }, { "epoch": 2.7213220998055734, "grad_norm": 0.07399358600378036, "learning_rate": 7.103830055358027e-05, "loss": 0.2352, "step": 33592 }, { "epoch": 2.721403110823072, "grad_norm": 0.06997597217559814, "learning_rate": 7.103379990098564e-05, "loss": 0.2531, "step": 33593 }, { "epoch": 2.7214841218405703, "grad_norm": 0.06861952692270279, "learning_rate": 7.102929924839103e-05, "loss": 0.2447, "step": 33594 }, { "epoch": 2.7215651328580686, "grad_norm": 0.07394934445619583, "learning_rate": 7.102479859579639e-05, "loss": 0.2736, "step": 33595 }, { "epoch": 2.7216461438755672, "grad_norm": 0.05605033412575722, "learning_rate": 7.102029794320176e-05, "loss": 0.1993, "step": 33596 }, { "epoch": 2.7217271548930655, "grad_norm": 0.05519258230924606, "learning_rate": 7.101579729060715e-05, "loss": 0.2333, "step": 33597 }, { "epoch": 2.7218081659105637, "grad_norm": 0.06714540719985962, "learning_rate": 7.101129663801251e-05, "loss": 0.2494, "step": 33598 }, { "epoch": 2.7218891769280624, "grad_norm": 0.07090084999799728, "learning_rate": 7.100679598541788e-05, "loss": 0.2563, "step": 33599 }, { "epoch": 2.7219701879455607, "grad_norm": 0.07883425056934357, "learning_rate": 7.100229533282327e-05, "loss": 0.2712, "step": 33600 }, { "epoch": 2.722051198963059, "grad_norm": 0.07029104977846146, "learning_rate": 7.099779468022863e-05, "loss": 0.255, "step": 33601 }, { "epoch": 2.7221322099805576, "grad_norm": 0.07110211253166199, "learning_rate": 7.0993294027634e-05, "loss": 0.2401, "step": 33602 }, { "epoch": 2.722213220998056, "grad_norm": 0.1000295951962471, "learning_rate": 7.098879337503939e-05, "loss": 0.2613, "step": 33603 }, { "epoch": 2.722294232015554, "grad_norm": 0.06811027973890305, "learning_rate": 7.098429272244475e-05, "loss": 0.2265, "step": 33604 }, { "epoch": 2.7223752430330523, "grad_norm": 0.07708834111690521, "learning_rate": 7.097979206985012e-05, "loss": 0.2799, "step": 33605 }, { "epoch": 2.722456254050551, "grad_norm": 0.0472310408949852, "learning_rate": 7.097529141725551e-05, "loss": 0.2226, "step": 33606 }, { "epoch": 2.7225372650680493, "grad_norm": 0.05726686492562294, "learning_rate": 7.097079076466087e-05, "loss": 0.2461, "step": 33607 }, { "epoch": 2.7226182760855475, "grad_norm": 0.06914225220680237, "learning_rate": 7.096629011206626e-05, "loss": 0.245, "step": 33608 }, { "epoch": 2.7226992871030458, "grad_norm": 0.09501109272241592, "learning_rate": 7.096178945947163e-05, "loss": 0.3241, "step": 33609 }, { "epoch": 2.7227802981205445, "grad_norm": 0.07300115376710892, "learning_rate": 7.095728880687699e-05, "loss": 0.2492, "step": 33610 }, { "epoch": 2.7228613091380427, "grad_norm": 0.0799044743180275, "learning_rate": 7.095278815428238e-05, "loss": 0.2455, "step": 33611 }, { "epoch": 2.722942320155541, "grad_norm": 0.07171258330345154, "learning_rate": 7.094828750168775e-05, "loss": 0.2421, "step": 33612 }, { "epoch": 2.7230233311730396, "grad_norm": 0.05257529020309448, "learning_rate": 7.094378684909311e-05, "loss": 0.2129, "step": 33613 }, { "epoch": 2.723104342190538, "grad_norm": 0.07569834589958191, "learning_rate": 7.09392861964985e-05, "loss": 0.2429, "step": 33614 }, { "epoch": 2.723185353208036, "grad_norm": 0.06856614351272583, "learning_rate": 7.093478554390387e-05, "loss": 0.2554, "step": 33615 }, { "epoch": 2.723266364225535, "grad_norm": 0.0748237892985344, "learning_rate": 7.093028489130923e-05, "loss": 0.2562, "step": 33616 }, { "epoch": 2.723347375243033, "grad_norm": 0.06843352317810059, "learning_rate": 7.092578423871462e-05, "loss": 0.2739, "step": 33617 }, { "epoch": 2.7234283862605313, "grad_norm": 0.07195959240198135, "learning_rate": 7.092128358612e-05, "loss": 0.2439, "step": 33618 }, { "epoch": 2.72350939727803, "grad_norm": 0.05495288968086243, "learning_rate": 7.091678293352535e-05, "loss": 0.211, "step": 33619 }, { "epoch": 2.7235904082955282, "grad_norm": 0.06835697591304779, "learning_rate": 7.091228228093074e-05, "loss": 0.283, "step": 33620 }, { "epoch": 2.7236714193130265, "grad_norm": 0.07124406844377518, "learning_rate": 7.090778162833612e-05, "loss": 0.2401, "step": 33621 }, { "epoch": 2.723752430330525, "grad_norm": 0.07246687263250351, "learning_rate": 7.090328097574148e-05, "loss": 0.2355, "step": 33622 }, { "epoch": 2.7238334413480234, "grad_norm": 0.06816147267818451, "learning_rate": 7.089878032314686e-05, "loss": 0.2505, "step": 33623 }, { "epoch": 2.7239144523655217, "grad_norm": 0.07142950594425201, "learning_rate": 7.089427967055224e-05, "loss": 0.2656, "step": 33624 }, { "epoch": 2.7239954633830203, "grad_norm": 0.06787704676389694, "learning_rate": 7.088977901795761e-05, "loss": 0.2318, "step": 33625 }, { "epoch": 2.7240764744005186, "grad_norm": 0.05560300126671791, "learning_rate": 7.088527836536298e-05, "loss": 0.2253, "step": 33626 }, { "epoch": 2.724157485418017, "grad_norm": 0.054587751626968384, "learning_rate": 7.088077771276836e-05, "loss": 0.2343, "step": 33627 }, { "epoch": 2.724238496435515, "grad_norm": 0.06579109281301498, "learning_rate": 7.087627706017373e-05, "loss": 0.2483, "step": 33628 }, { "epoch": 2.7243195074530138, "grad_norm": 0.06791524589061737, "learning_rate": 7.08717764075791e-05, "loss": 0.2794, "step": 33629 }, { "epoch": 2.724400518470512, "grad_norm": 0.07304447144269943, "learning_rate": 7.086727575498448e-05, "loss": 0.2804, "step": 33630 }, { "epoch": 2.7244815294880103, "grad_norm": 0.06753737479448318, "learning_rate": 7.086277510238985e-05, "loss": 0.2541, "step": 33631 }, { "epoch": 2.7245625405055085, "grad_norm": 0.07851094752550125, "learning_rate": 7.085827444979522e-05, "loss": 0.2719, "step": 33632 }, { "epoch": 2.724643551523007, "grad_norm": 0.07380569726228714, "learning_rate": 7.08537737972006e-05, "loss": 0.2608, "step": 33633 }, { "epoch": 2.7247245625405054, "grad_norm": 0.05877283215522766, "learning_rate": 7.084927314460597e-05, "loss": 0.2309, "step": 33634 }, { "epoch": 2.7248055735580037, "grad_norm": 0.07889322936534882, "learning_rate": 7.084477249201135e-05, "loss": 0.2519, "step": 33635 }, { "epoch": 2.7248865845755024, "grad_norm": 0.08005855977535248, "learning_rate": 7.084027183941672e-05, "loss": 0.258, "step": 33636 }, { "epoch": 2.7249675955930006, "grad_norm": 0.07733011245727539, "learning_rate": 7.083577118682209e-05, "loss": 0.2289, "step": 33637 }, { "epoch": 2.725048606610499, "grad_norm": 0.06844766438007355, "learning_rate": 7.083127053422747e-05, "loss": 0.2214, "step": 33638 }, { "epoch": 2.7251296176279975, "grad_norm": 0.07209837436676025, "learning_rate": 7.082676988163284e-05, "loss": 0.2471, "step": 33639 }, { "epoch": 2.725210628645496, "grad_norm": 0.06612085551023483, "learning_rate": 7.082226922903821e-05, "loss": 0.258, "step": 33640 }, { "epoch": 2.725291639662994, "grad_norm": 0.06333424896001816, "learning_rate": 7.081776857644359e-05, "loss": 0.2165, "step": 33641 }, { "epoch": 2.7253726506804927, "grad_norm": 0.07169868797063828, "learning_rate": 7.081326792384896e-05, "loss": 0.2308, "step": 33642 }, { "epoch": 2.725453661697991, "grad_norm": 0.07447036355733871, "learning_rate": 7.080876727125433e-05, "loss": 0.2666, "step": 33643 }, { "epoch": 2.725534672715489, "grad_norm": 0.076211117208004, "learning_rate": 7.080426661865971e-05, "loss": 0.2153, "step": 33644 }, { "epoch": 2.725615683732988, "grad_norm": 0.06768426299095154, "learning_rate": 7.079976596606508e-05, "loss": 0.2191, "step": 33645 }, { "epoch": 2.725696694750486, "grad_norm": 0.0650651678442955, "learning_rate": 7.079526531347046e-05, "loss": 0.2467, "step": 33646 }, { "epoch": 2.7257777057679844, "grad_norm": 0.07114232331514359, "learning_rate": 7.079076466087583e-05, "loss": 0.2419, "step": 33647 }, { "epoch": 2.725858716785483, "grad_norm": 0.06247364357113838, "learning_rate": 7.07862640082812e-05, "loss": 0.2613, "step": 33648 }, { "epoch": 2.7259397278029813, "grad_norm": 0.0777125209569931, "learning_rate": 7.078176335568658e-05, "loss": 0.2511, "step": 33649 }, { "epoch": 2.7260207388204796, "grad_norm": 0.07471860200166702, "learning_rate": 7.077726270309195e-05, "loss": 0.2519, "step": 33650 }, { "epoch": 2.726101749837978, "grad_norm": 0.07368387281894684, "learning_rate": 7.077276205049732e-05, "loss": 0.2666, "step": 33651 }, { "epoch": 2.7261827608554765, "grad_norm": 0.08657611906528473, "learning_rate": 7.07682613979027e-05, "loss": 0.2486, "step": 33652 }, { "epoch": 2.7262637718729748, "grad_norm": 0.06747881323099136, "learning_rate": 7.076376074530807e-05, "loss": 0.2253, "step": 33653 }, { "epoch": 2.726344782890473, "grad_norm": 0.06942425668239594, "learning_rate": 7.075926009271344e-05, "loss": 0.2256, "step": 33654 }, { "epoch": 2.7264257939079712, "grad_norm": 0.08220626413822174, "learning_rate": 7.075475944011882e-05, "loss": 0.2429, "step": 33655 }, { "epoch": 2.72650680492547, "grad_norm": 0.06210099533200264, "learning_rate": 7.075025878752419e-05, "loss": 0.2634, "step": 33656 }, { "epoch": 2.726587815942968, "grad_norm": 0.06558974087238312, "learning_rate": 7.074575813492956e-05, "loss": 0.2416, "step": 33657 }, { "epoch": 2.7266688269604664, "grad_norm": 0.06641119718551636, "learning_rate": 7.074125748233494e-05, "loss": 0.2247, "step": 33658 }, { "epoch": 2.726749837977965, "grad_norm": 0.06373778730630875, "learning_rate": 7.073675682974031e-05, "loss": 0.2278, "step": 33659 }, { "epoch": 2.7268308489954634, "grad_norm": 0.06465722620487213, "learning_rate": 7.07322561771457e-05, "loss": 0.237, "step": 33660 }, { "epoch": 2.7269118600129616, "grad_norm": 0.05753665044903755, "learning_rate": 7.072775552455106e-05, "loss": 0.2318, "step": 33661 }, { "epoch": 2.7269928710304603, "grad_norm": 0.07702966779470444, "learning_rate": 7.072325487195643e-05, "loss": 0.2348, "step": 33662 }, { "epoch": 2.7270738820479585, "grad_norm": 0.06840366870164871, "learning_rate": 7.071875421936182e-05, "loss": 0.2309, "step": 33663 }, { "epoch": 2.7271548930654568, "grad_norm": 0.058910470455884933, "learning_rate": 7.071425356676718e-05, "loss": 0.2279, "step": 33664 }, { "epoch": 2.7272359040829555, "grad_norm": 0.07476451992988586, "learning_rate": 7.070975291417255e-05, "loss": 0.2169, "step": 33665 }, { "epoch": 2.7273169151004537, "grad_norm": 0.0877615436911583, "learning_rate": 7.070525226157794e-05, "loss": 0.322, "step": 33666 }, { "epoch": 2.727397926117952, "grad_norm": 0.06256525218486786, "learning_rate": 7.07007516089833e-05, "loss": 0.1851, "step": 33667 }, { "epoch": 2.7274789371354506, "grad_norm": 0.057740043848752975, "learning_rate": 7.069625095638867e-05, "loss": 0.249, "step": 33668 }, { "epoch": 2.727559948152949, "grad_norm": 0.062011539936065674, "learning_rate": 7.069175030379406e-05, "loss": 0.2031, "step": 33669 }, { "epoch": 2.727640959170447, "grad_norm": 0.07256323844194412, "learning_rate": 7.068724965119942e-05, "loss": 0.2277, "step": 33670 }, { "epoch": 2.727721970187946, "grad_norm": 0.07076326757669449, "learning_rate": 7.06827489986048e-05, "loss": 0.2631, "step": 33671 }, { "epoch": 2.727802981205444, "grad_norm": 0.058779485523700714, "learning_rate": 7.067824834601018e-05, "loss": 0.2013, "step": 33672 }, { "epoch": 2.7278839922229423, "grad_norm": 0.0768062174320221, "learning_rate": 7.067374769341554e-05, "loss": 0.2639, "step": 33673 }, { "epoch": 2.7279650032404406, "grad_norm": 0.07481861114501953, "learning_rate": 7.066924704082092e-05, "loss": 0.2232, "step": 33674 }, { "epoch": 2.7280460142579392, "grad_norm": 0.07629449665546417, "learning_rate": 7.06647463882263e-05, "loss": 0.2398, "step": 33675 }, { "epoch": 2.7281270252754375, "grad_norm": 0.07321591675281525, "learning_rate": 7.066024573563166e-05, "loss": 0.2728, "step": 33676 }, { "epoch": 2.7282080362929357, "grad_norm": 0.06354720145463943, "learning_rate": 7.065574508303704e-05, "loss": 0.1912, "step": 33677 }, { "epoch": 2.728289047310434, "grad_norm": 0.08110125362873077, "learning_rate": 7.065124443044242e-05, "loss": 0.2261, "step": 33678 }, { "epoch": 2.7283700583279327, "grad_norm": 0.08282686024904251, "learning_rate": 7.064674377784778e-05, "loss": 0.256, "step": 33679 }, { "epoch": 2.728451069345431, "grad_norm": 0.07880692183971405, "learning_rate": 7.064224312525316e-05, "loss": 0.2178, "step": 33680 }, { "epoch": 2.728532080362929, "grad_norm": 0.08492696285247803, "learning_rate": 7.063774247265854e-05, "loss": 0.226, "step": 33681 }, { "epoch": 2.728613091380428, "grad_norm": 0.06499497592449188, "learning_rate": 7.06332418200639e-05, "loss": 0.2546, "step": 33682 }, { "epoch": 2.728694102397926, "grad_norm": 0.07533867657184601, "learning_rate": 7.062874116746928e-05, "loss": 0.28, "step": 33683 }, { "epoch": 2.7287751134154243, "grad_norm": 0.07215952128171921, "learning_rate": 7.062424051487467e-05, "loss": 0.2216, "step": 33684 }, { "epoch": 2.728856124432923, "grad_norm": 0.05357545614242554, "learning_rate": 7.061973986228003e-05, "loss": 0.2004, "step": 33685 }, { "epoch": 2.7289371354504213, "grad_norm": 0.08010009676218033, "learning_rate": 7.061523920968541e-05, "loss": 0.2291, "step": 33686 }, { "epoch": 2.7290181464679195, "grad_norm": 0.07454568147659302, "learning_rate": 7.061073855709079e-05, "loss": 0.286, "step": 33687 }, { "epoch": 2.729099157485418, "grad_norm": 0.08589702099561691, "learning_rate": 7.060623790449615e-05, "loss": 0.266, "step": 33688 }, { "epoch": 2.7291801685029164, "grad_norm": 0.0718001127243042, "learning_rate": 7.060173725190153e-05, "loss": 0.2354, "step": 33689 }, { "epoch": 2.7292611795204147, "grad_norm": 0.05786353349685669, "learning_rate": 7.059723659930691e-05, "loss": 0.2384, "step": 33690 }, { "epoch": 2.7293421905379134, "grad_norm": 0.06753803044557571, "learning_rate": 7.059273594671227e-05, "loss": 0.2229, "step": 33691 }, { "epoch": 2.7294232015554116, "grad_norm": 0.05679044499993324, "learning_rate": 7.058823529411765e-05, "loss": 0.2221, "step": 33692 }, { "epoch": 2.72950421257291, "grad_norm": 0.058725755661726, "learning_rate": 7.058373464152303e-05, "loss": 0.2534, "step": 33693 }, { "epoch": 2.7295852235904086, "grad_norm": 0.06571077555418015, "learning_rate": 7.05792339889284e-05, "loss": 0.2338, "step": 33694 }, { "epoch": 2.729666234607907, "grad_norm": 0.07829579710960388, "learning_rate": 7.057473333633378e-05, "loss": 0.2649, "step": 33695 }, { "epoch": 2.729747245625405, "grad_norm": 0.05849388986825943, "learning_rate": 7.057023268373915e-05, "loss": 0.2353, "step": 33696 }, { "epoch": 2.7298282566429033, "grad_norm": 0.053417835384607315, "learning_rate": 7.056573203114452e-05, "loss": 0.23, "step": 33697 }, { "epoch": 2.7299092676604015, "grad_norm": 0.06413785368204117, "learning_rate": 7.05612313785499e-05, "loss": 0.2688, "step": 33698 }, { "epoch": 2.7299902786779002, "grad_norm": 0.06464651226997375, "learning_rate": 7.055673072595527e-05, "loss": 0.2158, "step": 33699 }, { "epoch": 2.7300712896953985, "grad_norm": 0.058058079332113266, "learning_rate": 7.055223007336064e-05, "loss": 0.23, "step": 33700 }, { "epoch": 2.7301523007128967, "grad_norm": 0.0811547338962555, "learning_rate": 7.054772942076602e-05, "loss": 0.2421, "step": 33701 }, { "epoch": 2.7302333117303954, "grad_norm": 0.06070886552333832, "learning_rate": 7.054322876817139e-05, "loss": 0.2083, "step": 33702 }, { "epoch": 2.7303143227478937, "grad_norm": 0.05657278373837471, "learning_rate": 7.053872811557676e-05, "loss": 0.1969, "step": 33703 }, { "epoch": 2.730395333765392, "grad_norm": 0.06588597595691681, "learning_rate": 7.053422746298214e-05, "loss": 0.2475, "step": 33704 }, { "epoch": 2.7304763447828906, "grad_norm": 0.07281491905450821, "learning_rate": 7.052972681038751e-05, "loss": 0.2373, "step": 33705 }, { "epoch": 2.730557355800389, "grad_norm": 0.0641547366976738, "learning_rate": 7.052522615779288e-05, "loss": 0.2116, "step": 33706 }, { "epoch": 2.730638366817887, "grad_norm": 0.0717482790350914, "learning_rate": 7.052072550519826e-05, "loss": 0.2386, "step": 33707 }, { "epoch": 2.7307193778353858, "grad_norm": 0.07203912734985352, "learning_rate": 7.051622485260363e-05, "loss": 0.2436, "step": 33708 }, { "epoch": 2.730800388852884, "grad_norm": 0.06969515234231949, "learning_rate": 7.0511724200009e-05, "loss": 0.2409, "step": 33709 }, { "epoch": 2.7308813998703823, "grad_norm": 0.08552543818950653, "learning_rate": 7.050722354741438e-05, "loss": 0.2173, "step": 33710 }, { "epoch": 2.730962410887881, "grad_norm": 0.0826902985572815, "learning_rate": 7.050272289481975e-05, "loss": 0.237, "step": 33711 }, { "epoch": 2.731043421905379, "grad_norm": 0.05936390534043312, "learning_rate": 7.049822224222513e-05, "loss": 0.2083, "step": 33712 }, { "epoch": 2.7311244329228774, "grad_norm": 0.08406198024749756, "learning_rate": 7.04937215896305e-05, "loss": 0.2386, "step": 33713 }, { "epoch": 2.731205443940376, "grad_norm": 0.07294872403144836, "learning_rate": 7.048922093703587e-05, "loss": 0.2189, "step": 33714 }, { "epoch": 2.7312864549578744, "grad_norm": 0.0857522040605545, "learning_rate": 7.048472028444125e-05, "loss": 0.2611, "step": 33715 }, { "epoch": 2.7313674659753726, "grad_norm": 0.07238192856311798, "learning_rate": 7.048021963184662e-05, "loss": 0.2455, "step": 33716 }, { "epoch": 2.7314484769928713, "grad_norm": 0.08115571737289429, "learning_rate": 7.0475718979252e-05, "loss": 0.2292, "step": 33717 }, { "epoch": 2.7315294880103695, "grad_norm": 0.06046930328011513, "learning_rate": 7.047121832665737e-05, "loss": 0.2508, "step": 33718 }, { "epoch": 2.731610499027868, "grad_norm": 0.0676305890083313, "learning_rate": 7.046671767406274e-05, "loss": 0.2265, "step": 33719 }, { "epoch": 2.731691510045366, "grad_norm": 0.07789310067892075, "learning_rate": 7.046221702146812e-05, "loss": 0.2027, "step": 33720 }, { "epoch": 2.7317725210628643, "grad_norm": 0.057190366089344025, "learning_rate": 7.045771636887349e-05, "loss": 0.2165, "step": 33721 }, { "epoch": 2.731853532080363, "grad_norm": 0.07512050122022629, "learning_rate": 7.045321571627886e-05, "loss": 0.2153, "step": 33722 }, { "epoch": 2.731934543097861, "grad_norm": 0.05146675184369087, "learning_rate": 7.044871506368424e-05, "loss": 0.2318, "step": 33723 }, { "epoch": 2.7320155541153595, "grad_norm": 0.07653915882110596, "learning_rate": 7.044421441108961e-05, "loss": 0.2963, "step": 33724 }, { "epoch": 2.732096565132858, "grad_norm": 0.06947991251945496, "learning_rate": 7.043971375849498e-05, "loss": 0.2098, "step": 33725 }, { "epoch": 2.7321775761503564, "grad_norm": 0.05789974704384804, "learning_rate": 7.043521310590036e-05, "loss": 0.2251, "step": 33726 }, { "epoch": 2.7322585871678546, "grad_norm": 0.06780644506216049, "learning_rate": 7.043071245330573e-05, "loss": 0.2158, "step": 33727 }, { "epoch": 2.7323395981853533, "grad_norm": 0.06673760712146759, "learning_rate": 7.04262118007111e-05, "loss": 0.2658, "step": 33728 }, { "epoch": 2.7324206092028516, "grad_norm": 0.06278131902217865, "learning_rate": 7.042171114811648e-05, "loss": 0.2576, "step": 33729 }, { "epoch": 2.73250162022035, "grad_norm": 0.07872526347637177, "learning_rate": 7.041721049552185e-05, "loss": 0.2636, "step": 33730 }, { "epoch": 2.7325826312378485, "grad_norm": 0.05997196584939957, "learning_rate": 7.041270984292722e-05, "loss": 0.2184, "step": 33731 }, { "epoch": 2.7326636422553467, "grad_norm": 0.07817976921796799, "learning_rate": 7.04082091903326e-05, "loss": 0.2661, "step": 33732 }, { "epoch": 2.732744653272845, "grad_norm": 0.060990262776613235, "learning_rate": 7.040370853773797e-05, "loss": 0.229, "step": 33733 }, { "epoch": 2.7328256642903437, "grad_norm": 0.07189729809761047, "learning_rate": 7.039920788514335e-05, "loss": 0.2156, "step": 33734 }, { "epoch": 2.732906675307842, "grad_norm": 0.06340024620294571, "learning_rate": 7.039470723254872e-05, "loss": 0.2244, "step": 33735 }, { "epoch": 2.73298768632534, "grad_norm": 0.059773221611976624, "learning_rate": 7.039020657995409e-05, "loss": 0.2135, "step": 33736 }, { "epoch": 2.733068697342839, "grad_norm": 0.05692936107516289, "learning_rate": 7.038570592735947e-05, "loss": 0.2086, "step": 33737 }, { "epoch": 2.733149708360337, "grad_norm": 0.05831446498632431, "learning_rate": 7.038120527476485e-05, "loss": 0.1777, "step": 33738 }, { "epoch": 2.7332307193778353, "grad_norm": 0.0769772082567215, "learning_rate": 7.037670462217021e-05, "loss": 0.2574, "step": 33739 }, { "epoch": 2.733311730395334, "grad_norm": 0.0605337955057621, "learning_rate": 7.037220396957559e-05, "loss": 0.2145, "step": 33740 }, { "epoch": 2.7333927414128323, "grad_norm": 0.08001101762056351, "learning_rate": 7.036770331698097e-05, "loss": 0.2414, "step": 33741 }, { "epoch": 2.7334737524303305, "grad_norm": 0.05913480371236801, "learning_rate": 7.036320266438633e-05, "loss": 0.2118, "step": 33742 }, { "epoch": 2.7335547634478288, "grad_norm": 0.0753609761595726, "learning_rate": 7.035870201179171e-05, "loss": 0.2286, "step": 33743 }, { "epoch": 2.733635774465327, "grad_norm": 0.07040075212717056, "learning_rate": 7.03542013591971e-05, "loss": 0.2193, "step": 33744 }, { "epoch": 2.7337167854828257, "grad_norm": 0.06425496935844421, "learning_rate": 7.034970070660246e-05, "loss": 0.2583, "step": 33745 }, { "epoch": 2.733797796500324, "grad_norm": 0.06807851046323776, "learning_rate": 7.034520005400783e-05, "loss": 0.2412, "step": 33746 }, { "epoch": 2.733878807517822, "grad_norm": 0.06525375694036484, "learning_rate": 7.034069940141322e-05, "loss": 0.2068, "step": 33747 }, { "epoch": 2.733959818535321, "grad_norm": 0.06132105737924576, "learning_rate": 7.033619874881858e-05, "loss": 0.1967, "step": 33748 }, { "epoch": 2.734040829552819, "grad_norm": 0.07580532133579254, "learning_rate": 7.033169809622395e-05, "loss": 0.2364, "step": 33749 }, { "epoch": 2.7341218405703174, "grad_norm": 0.0784335732460022, "learning_rate": 7.032719744362934e-05, "loss": 0.2257, "step": 33750 }, { "epoch": 2.734202851587816, "grad_norm": 0.07119733840227127, "learning_rate": 7.03226967910347e-05, "loss": 0.2395, "step": 33751 }, { "epoch": 2.7342838626053143, "grad_norm": 0.07127971947193146, "learning_rate": 7.031819613844007e-05, "loss": 0.2173, "step": 33752 }, { "epoch": 2.7343648736228126, "grad_norm": 0.07059836387634277, "learning_rate": 7.031369548584546e-05, "loss": 0.2458, "step": 33753 }, { "epoch": 2.7344458846403112, "grad_norm": 0.07279672473669052, "learning_rate": 7.030919483325082e-05, "loss": 0.247, "step": 33754 }, { "epoch": 2.7345268956578095, "grad_norm": 0.0836254432797432, "learning_rate": 7.030469418065619e-05, "loss": 0.2716, "step": 33755 }, { "epoch": 2.7346079066753077, "grad_norm": 0.06887470930814743, "learning_rate": 7.030019352806158e-05, "loss": 0.2572, "step": 33756 }, { "epoch": 2.7346889176928064, "grad_norm": 0.06887579709291458, "learning_rate": 7.029569287546694e-05, "loss": 0.2227, "step": 33757 }, { "epoch": 2.7347699287103047, "grad_norm": 0.0751393735408783, "learning_rate": 7.029119222287231e-05, "loss": 0.2437, "step": 33758 }, { "epoch": 2.734850939727803, "grad_norm": 0.062123481184244156, "learning_rate": 7.02866915702777e-05, "loss": 0.2359, "step": 33759 }, { "epoch": 2.7349319507453016, "grad_norm": 0.09555557370185852, "learning_rate": 7.028219091768306e-05, "loss": 0.2334, "step": 33760 }, { "epoch": 2.7350129617628, "grad_norm": 0.0862041488289833, "learning_rate": 7.027769026508843e-05, "loss": 0.2499, "step": 33761 }, { "epoch": 2.735093972780298, "grad_norm": 0.05869421362876892, "learning_rate": 7.027318961249382e-05, "loss": 0.1972, "step": 33762 }, { "epoch": 2.7351749837977968, "grad_norm": 0.071591317653656, "learning_rate": 7.02686889598992e-05, "loss": 0.2497, "step": 33763 }, { "epoch": 2.735255994815295, "grad_norm": 0.07253791391849518, "learning_rate": 7.026418830730455e-05, "loss": 0.2708, "step": 33764 }, { "epoch": 2.7353370058327933, "grad_norm": 0.07822144776582718, "learning_rate": 7.025968765470994e-05, "loss": 0.2319, "step": 33765 }, { "epoch": 2.7354180168502915, "grad_norm": 0.08056861162185669, "learning_rate": 7.025518700211531e-05, "loss": 0.2354, "step": 33766 }, { "epoch": 2.7354990278677898, "grad_norm": 0.06811316311359406, "learning_rate": 7.025068634952069e-05, "loss": 0.2526, "step": 33767 }, { "epoch": 2.7355800388852884, "grad_norm": 0.07110168039798737, "learning_rate": 7.024618569692606e-05, "loss": 0.2254, "step": 33768 }, { "epoch": 2.7356610499027867, "grad_norm": 0.06414638459682465, "learning_rate": 7.024168504433144e-05, "loss": 0.2435, "step": 33769 }, { "epoch": 2.735742060920285, "grad_norm": 0.06620276719331741, "learning_rate": 7.023718439173681e-05, "loss": 0.2487, "step": 33770 }, { "epoch": 2.7358230719377836, "grad_norm": 0.07465215027332306, "learning_rate": 7.023268373914218e-05, "loss": 0.262, "step": 33771 }, { "epoch": 2.735904082955282, "grad_norm": 0.07484614104032516, "learning_rate": 7.022818308654756e-05, "loss": 0.2374, "step": 33772 }, { "epoch": 2.73598509397278, "grad_norm": 0.0882822796702385, "learning_rate": 7.022368243395293e-05, "loss": 0.2527, "step": 33773 }, { "epoch": 2.736066104990279, "grad_norm": 0.06477689743041992, "learning_rate": 7.02191817813583e-05, "loss": 0.2397, "step": 33774 }, { "epoch": 2.736147116007777, "grad_norm": 0.055372174829244614, "learning_rate": 7.021468112876368e-05, "loss": 0.2228, "step": 33775 }, { "epoch": 2.7362281270252753, "grad_norm": 0.07421818375587463, "learning_rate": 7.021018047616905e-05, "loss": 0.2492, "step": 33776 }, { "epoch": 2.736309138042774, "grad_norm": 0.06517310440540314, "learning_rate": 7.020567982357442e-05, "loss": 0.2673, "step": 33777 }, { "epoch": 2.7363901490602722, "grad_norm": 0.08472959697246552, "learning_rate": 7.02011791709798e-05, "loss": 0.2383, "step": 33778 }, { "epoch": 2.7364711600777705, "grad_norm": 0.08939102292060852, "learning_rate": 7.019667851838517e-05, "loss": 0.2173, "step": 33779 }, { "epoch": 2.736552171095269, "grad_norm": 0.06531999260187149, "learning_rate": 7.019217786579055e-05, "loss": 0.2868, "step": 33780 }, { "epoch": 2.7366331821127674, "grad_norm": 0.07156679034233093, "learning_rate": 7.018767721319592e-05, "loss": 0.2444, "step": 33781 }, { "epoch": 2.7367141931302656, "grad_norm": 0.08077112585306168, "learning_rate": 7.018317656060129e-05, "loss": 0.2384, "step": 33782 }, { "epoch": 2.7367952041477643, "grad_norm": 0.06216087564826012, "learning_rate": 7.017867590800667e-05, "loss": 0.2296, "step": 33783 }, { "epoch": 2.7368762151652626, "grad_norm": 0.08354579657316208, "learning_rate": 7.017417525541204e-05, "loss": 0.2386, "step": 33784 }, { "epoch": 2.736957226182761, "grad_norm": 0.07849965244531631, "learning_rate": 7.016967460281741e-05, "loss": 0.2343, "step": 33785 }, { "epoch": 2.737038237200259, "grad_norm": 0.06662687659263611, "learning_rate": 7.016517395022279e-05, "loss": 0.2158, "step": 33786 }, { "epoch": 2.7371192482177578, "grad_norm": 0.08657371252775192, "learning_rate": 7.016067329762816e-05, "loss": 0.2542, "step": 33787 }, { "epoch": 2.737200259235256, "grad_norm": 0.06502904742956161, "learning_rate": 7.015617264503353e-05, "loss": 0.2463, "step": 33788 }, { "epoch": 2.7372812702527543, "grad_norm": 0.06432264298200607, "learning_rate": 7.015167199243891e-05, "loss": 0.2188, "step": 33789 }, { "epoch": 2.7373622812702525, "grad_norm": 0.06669308245182037, "learning_rate": 7.014717133984428e-05, "loss": 0.2682, "step": 33790 }, { "epoch": 2.737443292287751, "grad_norm": 0.07942566275596619, "learning_rate": 7.014267068724965e-05, "loss": 0.2607, "step": 33791 }, { "epoch": 2.7375243033052494, "grad_norm": 0.07019062340259552, "learning_rate": 7.013817003465503e-05, "loss": 0.2515, "step": 33792 }, { "epoch": 2.7376053143227477, "grad_norm": 0.0668862909078598, "learning_rate": 7.01336693820604e-05, "loss": 0.2336, "step": 33793 }, { "epoch": 2.7376863253402464, "grad_norm": 0.06477285176515579, "learning_rate": 7.012916872946578e-05, "loss": 0.21, "step": 33794 }, { "epoch": 2.7377673363577446, "grad_norm": 0.08168063312768936, "learning_rate": 7.012466807687115e-05, "loss": 0.2426, "step": 33795 }, { "epoch": 2.737848347375243, "grad_norm": 0.07425316423177719, "learning_rate": 7.012016742427652e-05, "loss": 0.2753, "step": 33796 }, { "epoch": 2.7379293583927415, "grad_norm": 0.0631299614906311, "learning_rate": 7.01156667716819e-05, "loss": 0.249, "step": 33797 }, { "epoch": 2.73801036941024, "grad_norm": 0.05468069016933441, "learning_rate": 7.011116611908727e-05, "loss": 0.2033, "step": 33798 }, { "epoch": 2.738091380427738, "grad_norm": 0.07220423221588135, "learning_rate": 7.010666546649264e-05, "loss": 0.2031, "step": 33799 }, { "epoch": 2.7381723914452367, "grad_norm": 0.06589049100875854, "learning_rate": 7.010216481389802e-05, "loss": 0.2005, "step": 33800 }, { "epoch": 2.738253402462735, "grad_norm": 0.07369597256183624, "learning_rate": 7.009766416130339e-05, "loss": 0.2631, "step": 33801 }, { "epoch": 2.738334413480233, "grad_norm": 0.060120873153209686, "learning_rate": 7.009316350870876e-05, "loss": 0.2399, "step": 33802 }, { "epoch": 2.738415424497732, "grad_norm": 0.06925645470619202, "learning_rate": 7.008866285611414e-05, "loss": 0.2172, "step": 33803 }, { "epoch": 2.73849643551523, "grad_norm": 0.08340088278055191, "learning_rate": 7.008416220351951e-05, "loss": 0.2284, "step": 33804 }, { "epoch": 2.7385774465327284, "grad_norm": 0.06543581932783127, "learning_rate": 7.007966155092489e-05, "loss": 0.2927, "step": 33805 }, { "epoch": 2.738658457550227, "grad_norm": 0.07321444153785706, "learning_rate": 7.007516089833026e-05, "loss": 0.2035, "step": 33806 }, { "epoch": 2.7387394685677253, "grad_norm": 0.08377903699874878, "learning_rate": 7.007066024573563e-05, "loss": 0.2437, "step": 33807 }, { "epoch": 2.7388204795852236, "grad_norm": 0.07676710933446884, "learning_rate": 7.0066159593141e-05, "loss": 0.2379, "step": 33808 }, { "epoch": 2.738901490602722, "grad_norm": 0.07532139122486115, "learning_rate": 7.006165894054638e-05, "loss": 0.2104, "step": 33809 }, { "epoch": 2.7389825016202205, "grad_norm": 0.07883398979902267, "learning_rate": 7.005715828795175e-05, "loss": 0.2516, "step": 33810 }, { "epoch": 2.7390635126377187, "grad_norm": 0.08295116573572159, "learning_rate": 7.005265763535713e-05, "loss": 0.2142, "step": 33811 }, { "epoch": 2.739144523655217, "grad_norm": 0.07011543959379196, "learning_rate": 7.00481569827625e-05, "loss": 0.2167, "step": 33812 }, { "epoch": 2.7392255346727152, "grad_norm": 0.07436306029558182, "learning_rate": 7.004365633016787e-05, "loss": 0.258, "step": 33813 }, { "epoch": 2.739306545690214, "grad_norm": 0.07716170698404312, "learning_rate": 7.003915567757325e-05, "loss": 0.241, "step": 33814 }, { "epoch": 2.739387556707712, "grad_norm": 0.06765492260456085, "learning_rate": 7.003465502497862e-05, "loss": 0.2618, "step": 33815 }, { "epoch": 2.7394685677252104, "grad_norm": 0.07305973023176193, "learning_rate": 7.0030154372384e-05, "loss": 0.2412, "step": 33816 }, { "epoch": 2.739549578742709, "grad_norm": 0.06445764005184174, "learning_rate": 7.002565371978937e-05, "loss": 0.224, "step": 33817 }, { "epoch": 2.7396305897602073, "grad_norm": 0.06938768178224564, "learning_rate": 7.002115306719474e-05, "loss": 0.2641, "step": 33818 }, { "epoch": 2.7397116007777056, "grad_norm": 0.06424959003925323, "learning_rate": 7.001665241460013e-05, "loss": 0.2338, "step": 33819 }, { "epoch": 2.7397926117952043, "grad_norm": 0.07303512841463089, "learning_rate": 7.001215176200549e-05, "loss": 0.2504, "step": 33820 }, { "epoch": 2.7398736228127025, "grad_norm": 0.0583353228867054, "learning_rate": 7.000765110941086e-05, "loss": 0.2449, "step": 33821 }, { "epoch": 2.7399546338302008, "grad_norm": 0.06295175850391388, "learning_rate": 7.000315045681625e-05, "loss": 0.223, "step": 33822 }, { "epoch": 2.7400356448476995, "grad_norm": 0.07108136266469955, "learning_rate": 6.999864980422161e-05, "loss": 0.2506, "step": 33823 }, { "epoch": 2.7401166558651977, "grad_norm": 0.07039394229650497, "learning_rate": 6.999414915162698e-05, "loss": 0.2432, "step": 33824 }, { "epoch": 2.740197666882696, "grad_norm": 0.07306911051273346, "learning_rate": 6.998964849903237e-05, "loss": 0.2378, "step": 33825 }, { "epoch": 2.7402786779001946, "grad_norm": 0.07997465133666992, "learning_rate": 6.998514784643773e-05, "loss": 0.2805, "step": 33826 }, { "epoch": 2.740359688917693, "grad_norm": 0.08780818432569504, "learning_rate": 6.99806471938431e-05, "loss": 0.2553, "step": 33827 }, { "epoch": 2.740440699935191, "grad_norm": 0.07117179781198502, "learning_rate": 6.997614654124849e-05, "loss": 0.2315, "step": 33828 }, { "epoch": 2.74052171095269, "grad_norm": 0.08158638328313828, "learning_rate": 6.997164588865385e-05, "loss": 0.2547, "step": 33829 }, { "epoch": 2.740602721970188, "grad_norm": 0.06616087257862091, "learning_rate": 6.996714523605923e-05, "loss": 0.2258, "step": 33830 }, { "epoch": 2.7406837329876863, "grad_norm": 0.0600474514067173, "learning_rate": 6.996264458346461e-05, "loss": 0.2402, "step": 33831 }, { "epoch": 2.7407647440051845, "grad_norm": 0.06791268289089203, "learning_rate": 6.995814393086999e-05, "loss": 0.261, "step": 33832 }, { "epoch": 2.7408457550226832, "grad_norm": 0.06313014775514603, "learning_rate": 6.995364327827535e-05, "loss": 0.2329, "step": 33833 }, { "epoch": 2.7409267660401815, "grad_norm": 0.07031618058681488, "learning_rate": 6.994914262568073e-05, "loss": 0.2339, "step": 33834 }, { "epoch": 2.7410077770576797, "grad_norm": 0.07318814843893051, "learning_rate": 6.994464197308611e-05, "loss": 0.2446, "step": 33835 }, { "epoch": 2.741088788075178, "grad_norm": 0.0746544748544693, "learning_rate": 6.994014132049147e-05, "loss": 0.2573, "step": 33836 }, { "epoch": 2.7411697990926767, "grad_norm": 0.08126812428236008, "learning_rate": 6.993564066789685e-05, "loss": 0.2768, "step": 33837 }, { "epoch": 2.741250810110175, "grad_norm": 0.0760657787322998, "learning_rate": 6.993114001530223e-05, "loss": 0.2319, "step": 33838 }, { "epoch": 2.741331821127673, "grad_norm": 0.07601358741521835, "learning_rate": 6.992663936270759e-05, "loss": 0.262, "step": 33839 }, { "epoch": 2.741412832145172, "grad_norm": 0.07119158655405045, "learning_rate": 6.992213871011297e-05, "loss": 0.2495, "step": 33840 }, { "epoch": 2.74149384316267, "grad_norm": 0.05685252323746681, "learning_rate": 6.991763805751835e-05, "loss": 0.2144, "step": 33841 }, { "epoch": 2.7415748541801683, "grad_norm": 0.05508258566260338, "learning_rate": 6.991313740492371e-05, "loss": 0.2268, "step": 33842 }, { "epoch": 2.741655865197667, "grad_norm": 0.07089263945817947, "learning_rate": 6.99086367523291e-05, "loss": 0.2272, "step": 33843 }, { "epoch": 2.7417368762151653, "grad_norm": 0.07166006416082382, "learning_rate": 6.990413609973447e-05, "loss": 0.2718, "step": 33844 }, { "epoch": 2.7418178872326635, "grad_norm": 0.0669952780008316, "learning_rate": 6.989963544713984e-05, "loss": 0.2445, "step": 33845 }, { "epoch": 2.741898898250162, "grad_norm": 0.06594375520944595, "learning_rate": 6.989513479454522e-05, "loss": 0.2806, "step": 33846 }, { "epoch": 2.7419799092676604, "grad_norm": 0.04766225442290306, "learning_rate": 6.989063414195059e-05, "loss": 0.2036, "step": 33847 }, { "epoch": 2.7420609202851587, "grad_norm": 0.0686596930027008, "learning_rate": 6.988613348935596e-05, "loss": 0.2248, "step": 33848 }, { "epoch": 2.7421419313026574, "grad_norm": 0.05600970238447189, "learning_rate": 6.988163283676134e-05, "loss": 0.206, "step": 33849 }, { "epoch": 2.7422229423201556, "grad_norm": 0.08106046915054321, "learning_rate": 6.987713218416671e-05, "loss": 0.2254, "step": 33850 }, { "epoch": 2.742303953337654, "grad_norm": 0.06680909544229507, "learning_rate": 6.987263153157208e-05, "loss": 0.251, "step": 33851 }, { "epoch": 2.7423849643551526, "grad_norm": 0.07697292417287827, "learning_rate": 6.986813087897746e-05, "loss": 0.2281, "step": 33852 }, { "epoch": 2.742465975372651, "grad_norm": 0.07131654024124146, "learning_rate": 6.986363022638283e-05, "loss": 0.2315, "step": 33853 }, { "epoch": 2.742546986390149, "grad_norm": 0.06702481955289841, "learning_rate": 6.98591295737882e-05, "loss": 0.215, "step": 33854 }, { "epoch": 2.7426279974076473, "grad_norm": 0.06961885839700699, "learning_rate": 6.985462892119358e-05, "loss": 0.2585, "step": 33855 }, { "epoch": 2.742709008425146, "grad_norm": 0.06202276796102524, "learning_rate": 6.985012826859895e-05, "loss": 0.2271, "step": 33856 }, { "epoch": 2.7427900194426442, "grad_norm": 0.07506030052900314, "learning_rate": 6.984562761600433e-05, "loss": 0.2559, "step": 33857 }, { "epoch": 2.7428710304601425, "grad_norm": 0.07953765243291855, "learning_rate": 6.98411269634097e-05, "loss": 0.2489, "step": 33858 }, { "epoch": 2.7429520414776407, "grad_norm": 0.07561314105987549, "learning_rate": 6.983662631081507e-05, "loss": 0.2335, "step": 33859 }, { "epoch": 2.7430330524951394, "grad_norm": 0.07074534147977829, "learning_rate": 6.983212565822045e-05, "loss": 0.2195, "step": 33860 }, { "epoch": 2.7431140635126376, "grad_norm": 0.06227237731218338, "learning_rate": 6.982762500562582e-05, "loss": 0.2746, "step": 33861 }, { "epoch": 2.743195074530136, "grad_norm": 0.06314686685800552, "learning_rate": 6.98231243530312e-05, "loss": 0.2408, "step": 33862 }, { "epoch": 2.7432760855476346, "grad_norm": 0.06816864013671875, "learning_rate": 6.981862370043657e-05, "loss": 0.2566, "step": 33863 }, { "epoch": 2.743357096565133, "grad_norm": 0.057434193789958954, "learning_rate": 6.981412304784194e-05, "loss": 0.1896, "step": 33864 }, { "epoch": 2.743438107582631, "grad_norm": 0.08918724209070206, "learning_rate": 6.980962239524731e-05, "loss": 0.2798, "step": 33865 }, { "epoch": 2.7435191186001298, "grad_norm": 0.0637928768992424, "learning_rate": 6.980512174265269e-05, "loss": 0.2373, "step": 33866 }, { "epoch": 2.743600129617628, "grad_norm": 0.06168600916862488, "learning_rate": 6.980062109005806e-05, "loss": 0.2044, "step": 33867 }, { "epoch": 2.7436811406351262, "grad_norm": 0.06205640733242035, "learning_rate": 6.979612043746344e-05, "loss": 0.2485, "step": 33868 }, { "epoch": 2.743762151652625, "grad_norm": 0.0713237076997757, "learning_rate": 6.979161978486881e-05, "loss": 0.2555, "step": 33869 }, { "epoch": 2.743843162670123, "grad_norm": 0.0663994699716568, "learning_rate": 6.978711913227418e-05, "loss": 0.2017, "step": 33870 }, { "epoch": 2.7439241736876214, "grad_norm": 0.07154440879821777, "learning_rate": 6.978261847967956e-05, "loss": 0.259, "step": 33871 }, { "epoch": 2.74400518470512, "grad_norm": 0.08224856108427048, "learning_rate": 6.977811782708493e-05, "loss": 0.2549, "step": 33872 }, { "epoch": 2.7440861957226184, "grad_norm": 0.07634004205465317, "learning_rate": 6.97736171744903e-05, "loss": 0.2671, "step": 33873 }, { "epoch": 2.7441672067401166, "grad_norm": 0.06648749858140945, "learning_rate": 6.976911652189568e-05, "loss": 0.2517, "step": 33874 }, { "epoch": 2.7442482177576153, "grad_norm": 0.07519011199474335, "learning_rate": 6.976461586930105e-05, "loss": 0.2343, "step": 33875 }, { "epoch": 2.7443292287751135, "grad_norm": 0.0715513825416565, "learning_rate": 6.976011521670642e-05, "loss": 0.2435, "step": 33876 }, { "epoch": 2.744410239792612, "grad_norm": 0.05868488922715187, "learning_rate": 6.97556145641118e-05, "loss": 0.191, "step": 33877 }, { "epoch": 2.74449125081011, "grad_norm": 0.06658539921045303, "learning_rate": 6.975111391151717e-05, "loss": 0.2674, "step": 33878 }, { "epoch": 2.7445722618276087, "grad_norm": 0.07914631813764572, "learning_rate": 6.974661325892255e-05, "loss": 0.2455, "step": 33879 }, { "epoch": 2.744653272845107, "grad_norm": 0.06325214356184006, "learning_rate": 6.974211260632792e-05, "loss": 0.2192, "step": 33880 }, { "epoch": 2.744734283862605, "grad_norm": 0.07583370804786682, "learning_rate": 6.973761195373329e-05, "loss": 0.246, "step": 33881 }, { "epoch": 2.7448152948801035, "grad_norm": 0.08487239480018616, "learning_rate": 6.973311130113867e-05, "loss": 0.264, "step": 33882 }, { "epoch": 2.744896305897602, "grad_norm": 0.06740166246891022, "learning_rate": 6.972861064854404e-05, "loss": 0.2526, "step": 33883 }, { "epoch": 2.7449773169151004, "grad_norm": 0.065463587641716, "learning_rate": 6.972410999594941e-05, "loss": 0.2367, "step": 33884 }, { "epoch": 2.7450583279325986, "grad_norm": 0.07324957102537155, "learning_rate": 6.971960934335479e-05, "loss": 0.2468, "step": 33885 }, { "epoch": 2.7451393389500973, "grad_norm": 0.06521370261907578, "learning_rate": 6.971510869076016e-05, "loss": 0.2448, "step": 33886 }, { "epoch": 2.7452203499675956, "grad_norm": 0.0762423500418663, "learning_rate": 6.971060803816553e-05, "loss": 0.2584, "step": 33887 }, { "epoch": 2.745301360985094, "grad_norm": 0.06677243113517761, "learning_rate": 6.970610738557091e-05, "loss": 0.2226, "step": 33888 }, { "epoch": 2.7453823720025925, "grad_norm": 0.06267214566469193, "learning_rate": 6.970160673297628e-05, "loss": 0.2011, "step": 33889 }, { "epoch": 2.7454633830200907, "grad_norm": 0.054746679961681366, "learning_rate": 6.969710608038165e-05, "loss": 0.2164, "step": 33890 }, { "epoch": 2.745544394037589, "grad_norm": 0.06546787172555923, "learning_rate": 6.969260542778703e-05, "loss": 0.2138, "step": 33891 }, { "epoch": 2.7456254050550877, "grad_norm": 0.06459563225507736, "learning_rate": 6.96881047751924e-05, "loss": 0.2623, "step": 33892 }, { "epoch": 2.745706416072586, "grad_norm": 0.06675142049789429, "learning_rate": 6.968360412259778e-05, "loss": 0.2613, "step": 33893 }, { "epoch": 2.745787427090084, "grad_norm": 0.06525643169879913, "learning_rate": 6.967910347000315e-05, "loss": 0.2273, "step": 33894 }, { "epoch": 2.745868438107583, "grad_norm": 0.0785670205950737, "learning_rate": 6.967460281740852e-05, "loss": 0.2384, "step": 33895 }, { "epoch": 2.745949449125081, "grad_norm": 0.061771150678396225, "learning_rate": 6.96701021648139e-05, "loss": 0.2368, "step": 33896 }, { "epoch": 2.7460304601425793, "grad_norm": 0.07329079508781433, "learning_rate": 6.966560151221928e-05, "loss": 0.2479, "step": 33897 }, { "epoch": 2.746111471160078, "grad_norm": 0.058052219450473785, "learning_rate": 6.966110085962466e-05, "loss": 0.2358, "step": 33898 }, { "epoch": 2.7461924821775763, "grad_norm": 0.08250406384468079, "learning_rate": 6.965660020703002e-05, "loss": 0.2169, "step": 33899 }, { "epoch": 2.7462734931950745, "grad_norm": 0.06558365374803543, "learning_rate": 6.96520995544354e-05, "loss": 0.2499, "step": 33900 }, { "epoch": 2.7463545042125728, "grad_norm": 0.06303752958774567, "learning_rate": 6.964759890184078e-05, "loss": 0.1933, "step": 33901 }, { "epoch": 2.7464355152300715, "grad_norm": 0.07631899416446686, "learning_rate": 6.964309824924614e-05, "loss": 0.231, "step": 33902 }, { "epoch": 2.7465165262475697, "grad_norm": 0.07728277891874313, "learning_rate": 6.963859759665153e-05, "loss": 0.254, "step": 33903 }, { "epoch": 2.746597537265068, "grad_norm": 0.06139729544520378, "learning_rate": 6.96340969440569e-05, "loss": 0.2061, "step": 33904 }, { "epoch": 2.746678548282566, "grad_norm": 0.06394772976636887, "learning_rate": 6.962959629146226e-05, "loss": 0.2302, "step": 33905 }, { "epoch": 2.746759559300065, "grad_norm": 0.07677542418241501, "learning_rate": 6.962509563886765e-05, "loss": 0.2518, "step": 33906 }, { "epoch": 2.746840570317563, "grad_norm": 0.06649527698755264, "learning_rate": 6.962059498627302e-05, "loss": 0.2174, "step": 33907 }, { "epoch": 2.7469215813350614, "grad_norm": 0.07912680506706238, "learning_rate": 6.961609433367838e-05, "loss": 0.2361, "step": 33908 }, { "epoch": 2.74700259235256, "grad_norm": 0.05456256493926048, "learning_rate": 6.961159368108377e-05, "loss": 0.2232, "step": 33909 }, { "epoch": 2.7470836033700583, "grad_norm": 0.06172642111778259, "learning_rate": 6.960709302848914e-05, "loss": 0.2531, "step": 33910 }, { "epoch": 2.7471646143875565, "grad_norm": 0.07474996894598007, "learning_rate": 6.96025923758945e-05, "loss": 0.2614, "step": 33911 }, { "epoch": 2.7472456254050552, "grad_norm": 0.06314655393362045, "learning_rate": 6.959809172329989e-05, "loss": 0.2316, "step": 33912 }, { "epoch": 2.7473266364225535, "grad_norm": 0.0807376354932785, "learning_rate": 6.959359107070526e-05, "loss": 0.2238, "step": 33913 }, { "epoch": 2.7474076474400517, "grad_norm": 0.07609011977910995, "learning_rate": 6.958909041811062e-05, "loss": 0.2425, "step": 33914 }, { "epoch": 2.7474886584575504, "grad_norm": 0.08066786080598831, "learning_rate": 6.958458976551601e-05, "loss": 0.2354, "step": 33915 }, { "epoch": 2.7475696694750487, "grad_norm": 0.06597253680229187, "learning_rate": 6.958008911292138e-05, "loss": 0.2118, "step": 33916 }, { "epoch": 2.747650680492547, "grad_norm": 0.06814610958099365, "learning_rate": 6.957558846032674e-05, "loss": 0.2365, "step": 33917 }, { "epoch": 2.7477316915100456, "grad_norm": 0.07999754697084427, "learning_rate": 6.957108780773213e-05, "loss": 0.277, "step": 33918 }, { "epoch": 2.747812702527544, "grad_norm": 0.07574919611215591, "learning_rate": 6.95665871551375e-05, "loss": 0.281, "step": 33919 }, { "epoch": 2.747893713545042, "grad_norm": 0.062194306403398514, "learning_rate": 6.956208650254286e-05, "loss": 0.2249, "step": 33920 }, { "epoch": 2.7479747245625408, "grad_norm": 0.06311631202697754, "learning_rate": 6.955758584994825e-05, "loss": 0.2159, "step": 33921 }, { "epoch": 2.748055735580039, "grad_norm": 0.06273925304412842, "learning_rate": 6.955308519735362e-05, "loss": 0.196, "step": 33922 }, { "epoch": 2.7481367465975373, "grad_norm": 0.05916561558842659, "learning_rate": 6.954858454475898e-05, "loss": 0.2142, "step": 33923 }, { "epoch": 2.7482177576150355, "grad_norm": 0.07138768583536148, "learning_rate": 6.954408389216437e-05, "loss": 0.2128, "step": 33924 }, { "epoch": 2.7482987686325338, "grad_norm": 0.07354070246219635, "learning_rate": 6.953958323956974e-05, "loss": 0.2503, "step": 33925 }, { "epoch": 2.7483797796500324, "grad_norm": 0.07693708688020706, "learning_rate": 6.953508258697512e-05, "loss": 0.2403, "step": 33926 }, { "epoch": 2.7484607906675307, "grad_norm": 0.07450443506240845, "learning_rate": 6.953058193438049e-05, "loss": 0.2474, "step": 33927 }, { "epoch": 2.748541801685029, "grad_norm": 0.07271936535835266, "learning_rate": 6.952608128178587e-05, "loss": 0.2424, "step": 33928 }, { "epoch": 2.7486228127025276, "grad_norm": 0.05559582635760307, "learning_rate": 6.952158062919124e-05, "loss": 0.2425, "step": 33929 }, { "epoch": 2.748703823720026, "grad_norm": 0.07008780539035797, "learning_rate": 6.951707997659661e-05, "loss": 0.248, "step": 33930 }, { "epoch": 2.748784834737524, "grad_norm": 0.06590422242879868, "learning_rate": 6.951257932400199e-05, "loss": 0.2359, "step": 33931 }, { "epoch": 2.748865845755023, "grad_norm": 0.0587596632540226, "learning_rate": 6.950807867140736e-05, "loss": 0.257, "step": 33932 }, { "epoch": 2.748946856772521, "grad_norm": 0.06739132106304169, "learning_rate": 6.950357801881273e-05, "loss": 0.2381, "step": 33933 }, { "epoch": 2.7490278677900193, "grad_norm": 0.06548912823200226, "learning_rate": 6.949907736621811e-05, "loss": 0.2451, "step": 33934 }, { "epoch": 2.749108878807518, "grad_norm": 0.05901022627949715, "learning_rate": 6.949457671362348e-05, "loss": 0.2312, "step": 33935 }, { "epoch": 2.749189889825016, "grad_norm": 0.0753978043794632, "learning_rate": 6.949007606102885e-05, "loss": 0.2602, "step": 33936 }, { "epoch": 2.7492709008425145, "grad_norm": 0.05910757556557655, "learning_rate": 6.948557540843423e-05, "loss": 0.2512, "step": 33937 }, { "epoch": 2.749351911860013, "grad_norm": 0.06722880154848099, "learning_rate": 6.94810747558396e-05, "loss": 0.2267, "step": 33938 }, { "epoch": 2.7494329228775114, "grad_norm": 0.07442230731248856, "learning_rate": 6.947657410324498e-05, "loss": 0.2271, "step": 33939 }, { "epoch": 2.7495139338950096, "grad_norm": 0.06559806317090988, "learning_rate": 6.947207345065035e-05, "loss": 0.2305, "step": 33940 }, { "epoch": 2.7495949449125083, "grad_norm": 0.05963011085987091, "learning_rate": 6.946757279805572e-05, "loss": 0.242, "step": 33941 }, { "epoch": 2.7496759559300066, "grad_norm": 0.0676308423280716, "learning_rate": 6.94630721454611e-05, "loss": 0.2261, "step": 33942 }, { "epoch": 2.749756966947505, "grad_norm": 0.07964812219142914, "learning_rate": 6.945857149286647e-05, "loss": 0.3088, "step": 33943 }, { "epoch": 2.7498379779650035, "grad_norm": 0.05791834741830826, "learning_rate": 6.945407084027184e-05, "loss": 0.218, "step": 33944 }, { "epoch": 2.7499189889825018, "grad_norm": 0.07928522676229477, "learning_rate": 6.944957018767722e-05, "loss": 0.225, "step": 33945 }, { "epoch": 2.75, "grad_norm": 0.08295446634292603, "learning_rate": 6.944506953508259e-05, "loss": 0.2764, "step": 33946 }, { "epoch": 2.7500810110174982, "grad_norm": 0.06554339081048965, "learning_rate": 6.944056888248796e-05, "loss": 0.2681, "step": 33947 }, { "epoch": 2.7501620220349965, "grad_norm": 0.07376903295516968, "learning_rate": 6.943606822989334e-05, "loss": 0.2583, "step": 33948 }, { "epoch": 2.750243033052495, "grad_norm": 0.07312076538801193, "learning_rate": 6.943156757729871e-05, "loss": 0.2299, "step": 33949 }, { "epoch": 2.7503240440699934, "grad_norm": 0.05707407742738724, "learning_rate": 6.942706692470408e-05, "loss": 0.2532, "step": 33950 }, { "epoch": 2.7504050550874917, "grad_norm": 0.06634999811649323, "learning_rate": 6.942256627210946e-05, "loss": 0.2768, "step": 33951 }, { "epoch": 2.7504860661049904, "grad_norm": 0.06431234627962112, "learning_rate": 6.941806561951483e-05, "loss": 0.2164, "step": 33952 }, { "epoch": 2.7505670771224886, "grad_norm": 0.07189373672008514, "learning_rate": 6.94135649669202e-05, "loss": 0.2043, "step": 33953 }, { "epoch": 2.750648088139987, "grad_norm": 0.06200005114078522, "learning_rate": 6.940906431432558e-05, "loss": 0.2356, "step": 33954 }, { "epoch": 2.7507290991574855, "grad_norm": 0.08007141947746277, "learning_rate": 6.940456366173095e-05, "loss": 0.2381, "step": 33955 }, { "epoch": 2.750810110174984, "grad_norm": 0.06408270448446274, "learning_rate": 6.940006300913633e-05, "loss": 0.2401, "step": 33956 }, { "epoch": 2.750891121192482, "grad_norm": 0.06597329676151276, "learning_rate": 6.93955623565417e-05, "loss": 0.2812, "step": 33957 }, { "epoch": 2.7509721322099807, "grad_norm": 0.0680282860994339, "learning_rate": 6.939106170394707e-05, "loss": 0.2148, "step": 33958 }, { "epoch": 2.751053143227479, "grad_norm": 0.07116768509149551, "learning_rate": 6.938656105135245e-05, "loss": 0.2254, "step": 33959 }, { "epoch": 2.751134154244977, "grad_norm": 0.09429546445608139, "learning_rate": 6.938206039875782e-05, "loss": 0.2869, "step": 33960 }, { "epoch": 2.751215165262476, "grad_norm": 0.061728041619062424, "learning_rate": 6.93775597461632e-05, "loss": 0.2476, "step": 33961 }, { "epoch": 2.751296176279974, "grad_norm": 0.0597955696284771, "learning_rate": 6.937305909356857e-05, "loss": 0.2159, "step": 33962 }, { "epoch": 2.7513771872974724, "grad_norm": 0.0768377035856247, "learning_rate": 6.936855844097394e-05, "loss": 0.2677, "step": 33963 }, { "epoch": 2.751458198314971, "grad_norm": 0.09343895316123962, "learning_rate": 6.936405778837932e-05, "loss": 0.2634, "step": 33964 }, { "epoch": 2.7515392093324693, "grad_norm": 0.08717542886734009, "learning_rate": 6.935955713578469e-05, "loss": 0.2586, "step": 33965 }, { "epoch": 2.7516202203499676, "grad_norm": 0.09890085458755493, "learning_rate": 6.935505648319006e-05, "loss": 0.2414, "step": 33966 }, { "epoch": 2.7517012313674662, "grad_norm": 0.07061535120010376, "learning_rate": 6.935055583059545e-05, "loss": 0.2343, "step": 33967 }, { "epoch": 2.7517822423849645, "grad_norm": 0.0656886100769043, "learning_rate": 6.934605517800081e-05, "loss": 0.2115, "step": 33968 }, { "epoch": 2.7518632534024627, "grad_norm": 0.056593045592308044, "learning_rate": 6.934155452540618e-05, "loss": 0.2224, "step": 33969 }, { "epoch": 2.751944264419961, "grad_norm": 0.08037639409303665, "learning_rate": 6.933705387281157e-05, "loss": 0.2474, "step": 33970 }, { "epoch": 2.7520252754374592, "grad_norm": 0.06938332319259644, "learning_rate": 6.933255322021693e-05, "loss": 0.2223, "step": 33971 }, { "epoch": 2.752106286454958, "grad_norm": 0.081057608127594, "learning_rate": 6.93280525676223e-05, "loss": 0.2436, "step": 33972 }, { "epoch": 2.752187297472456, "grad_norm": 0.07251177728176117, "learning_rate": 6.932355191502769e-05, "loss": 0.2473, "step": 33973 }, { "epoch": 2.7522683084899544, "grad_norm": 0.07197289913892746, "learning_rate": 6.931905126243305e-05, "loss": 0.2609, "step": 33974 }, { "epoch": 2.752349319507453, "grad_norm": 0.071237713098526, "learning_rate": 6.931455060983842e-05, "loss": 0.2004, "step": 33975 }, { "epoch": 2.7524303305249513, "grad_norm": 0.069503553211689, "learning_rate": 6.931004995724381e-05, "loss": 0.2672, "step": 33976 }, { "epoch": 2.7525113415424496, "grad_norm": 0.07165911048650742, "learning_rate": 6.930554930464917e-05, "loss": 0.1889, "step": 33977 }, { "epoch": 2.7525923525599483, "grad_norm": 0.06798816472291946, "learning_rate": 6.930104865205456e-05, "loss": 0.2506, "step": 33978 }, { "epoch": 2.7526733635774465, "grad_norm": 0.07564514875411987, "learning_rate": 6.929654799945993e-05, "loss": 0.2637, "step": 33979 }, { "epoch": 2.7527543745949448, "grad_norm": 0.060637280344963074, "learning_rate": 6.929204734686529e-05, "loss": 0.2081, "step": 33980 }, { "epoch": 2.7528353856124435, "grad_norm": 0.06939760595560074, "learning_rate": 6.928754669427068e-05, "loss": 0.2313, "step": 33981 }, { "epoch": 2.7529163966299417, "grad_norm": 0.07175181061029434, "learning_rate": 6.928304604167605e-05, "loss": 0.2277, "step": 33982 }, { "epoch": 2.75299740764744, "grad_norm": 0.06570654362440109, "learning_rate": 6.927854538908141e-05, "loss": 0.229, "step": 33983 }, { "epoch": 2.7530784186649386, "grad_norm": 0.08200690150260925, "learning_rate": 6.92740447364868e-05, "loss": 0.2282, "step": 33984 }, { "epoch": 2.753159429682437, "grad_norm": 0.07265735417604446, "learning_rate": 6.926954408389217e-05, "loss": 0.2854, "step": 33985 }, { "epoch": 2.753240440699935, "grad_norm": 0.0830908939242363, "learning_rate": 6.926504343129753e-05, "loss": 0.2356, "step": 33986 }, { "epoch": 2.753321451717434, "grad_norm": 0.05797835439443588, "learning_rate": 6.926054277870292e-05, "loss": 0.2184, "step": 33987 }, { "epoch": 2.753402462734932, "grad_norm": 0.07792074233293533, "learning_rate": 6.92560421261083e-05, "loss": 0.2397, "step": 33988 }, { "epoch": 2.7534834737524303, "grad_norm": 0.07205940783023834, "learning_rate": 6.925154147351366e-05, "loss": 0.2362, "step": 33989 }, { "epoch": 2.7535644847699285, "grad_norm": 0.06467721611261368, "learning_rate": 6.924704082091904e-05, "loss": 0.2393, "step": 33990 }, { "epoch": 2.7536454957874272, "grad_norm": 0.06070276349782944, "learning_rate": 6.924254016832442e-05, "loss": 0.2191, "step": 33991 }, { "epoch": 2.7537265068049255, "grad_norm": 0.06308267265558243, "learning_rate": 6.923803951572978e-05, "loss": 0.2415, "step": 33992 }, { "epoch": 2.7538075178224237, "grad_norm": 0.0762130543589592, "learning_rate": 6.923353886313516e-05, "loss": 0.2365, "step": 33993 }, { "epoch": 2.753888528839922, "grad_norm": 0.07591088861227036, "learning_rate": 6.922903821054054e-05, "loss": 0.2486, "step": 33994 }, { "epoch": 2.7539695398574207, "grad_norm": 0.07481890916824341, "learning_rate": 6.92245375579459e-05, "loss": 0.2647, "step": 33995 }, { "epoch": 2.754050550874919, "grad_norm": 0.07969322055578232, "learning_rate": 6.922003690535128e-05, "loss": 0.2999, "step": 33996 }, { "epoch": 2.754131561892417, "grad_norm": 0.08638513833284378, "learning_rate": 6.921553625275666e-05, "loss": 0.2851, "step": 33997 }, { "epoch": 2.754212572909916, "grad_norm": 0.06518282741308212, "learning_rate": 6.921103560016202e-05, "loss": 0.2319, "step": 33998 }, { "epoch": 2.754293583927414, "grad_norm": 0.06742814183235168, "learning_rate": 6.92065349475674e-05, "loss": 0.2463, "step": 33999 }, { "epoch": 2.7543745949449123, "grad_norm": 0.07727876305580139, "learning_rate": 6.920203429497278e-05, "loss": 0.2808, "step": 34000 }, { "epoch": 2.754455605962411, "grad_norm": 0.08557460457086563, "learning_rate": 6.919753364237814e-05, "loss": 0.2823, "step": 34001 }, { "epoch": 2.7545366169799093, "grad_norm": 0.0750543400645256, "learning_rate": 6.919303298978353e-05, "loss": 0.2273, "step": 34002 }, { "epoch": 2.7546176279974075, "grad_norm": 0.08017384260892868, "learning_rate": 6.91885323371889e-05, "loss": 0.2672, "step": 34003 }, { "epoch": 2.754698639014906, "grad_norm": 0.06331358104944229, "learning_rate": 6.918403168459427e-05, "loss": 0.2213, "step": 34004 }, { "epoch": 2.7547796500324044, "grad_norm": 0.09369877725839615, "learning_rate": 6.917953103199965e-05, "loss": 0.2576, "step": 34005 }, { "epoch": 2.7548606610499027, "grad_norm": 0.06448009610176086, "learning_rate": 6.917503037940502e-05, "loss": 0.245, "step": 34006 }, { "epoch": 2.7549416720674014, "grad_norm": 0.07438341528177261, "learning_rate": 6.91705297268104e-05, "loss": 0.2535, "step": 34007 }, { "epoch": 2.7550226830848996, "grad_norm": 0.06136835366487503, "learning_rate": 6.916602907421577e-05, "loss": 0.1772, "step": 34008 }, { "epoch": 2.755103694102398, "grad_norm": 0.0914490669965744, "learning_rate": 6.916152842162114e-05, "loss": 0.2733, "step": 34009 }, { "epoch": 2.7551847051198965, "grad_norm": 0.06491867452859879, "learning_rate": 6.915702776902651e-05, "loss": 0.237, "step": 34010 }, { "epoch": 2.755265716137395, "grad_norm": 0.05932406708598137, "learning_rate": 6.915252711643189e-05, "loss": 0.2666, "step": 34011 }, { "epoch": 2.755346727154893, "grad_norm": 0.07096493989229202, "learning_rate": 6.914802646383726e-05, "loss": 0.2507, "step": 34012 }, { "epoch": 2.7554277381723913, "grad_norm": 0.06969096511602402, "learning_rate": 6.914352581124264e-05, "loss": 0.2219, "step": 34013 }, { "epoch": 2.75550874918989, "grad_norm": 0.0693049356341362, "learning_rate": 6.913902515864801e-05, "loss": 0.2498, "step": 34014 }, { "epoch": 2.755589760207388, "grad_norm": 0.06646228581666946, "learning_rate": 6.913452450605338e-05, "loss": 0.2494, "step": 34015 }, { "epoch": 2.7556707712248865, "grad_norm": 0.058434128761291504, "learning_rate": 6.913002385345876e-05, "loss": 0.1993, "step": 34016 }, { "epoch": 2.7557517822423847, "grad_norm": 0.06461863964796066, "learning_rate": 6.912552320086413e-05, "loss": 0.2394, "step": 34017 }, { "epoch": 2.7558327932598834, "grad_norm": 0.06257472187280655, "learning_rate": 6.91210225482695e-05, "loss": 0.2442, "step": 34018 }, { "epoch": 2.7559138042773816, "grad_norm": 0.07050945609807968, "learning_rate": 6.911652189567488e-05, "loss": 0.2254, "step": 34019 }, { "epoch": 2.75599481529488, "grad_norm": 0.07353843748569489, "learning_rate": 6.911202124308025e-05, "loss": 0.2551, "step": 34020 }, { "epoch": 2.7560758263123786, "grad_norm": 0.07434510439634323, "learning_rate": 6.910752059048562e-05, "loss": 0.2247, "step": 34021 }, { "epoch": 2.756156837329877, "grad_norm": 0.06477247178554535, "learning_rate": 6.9103019937891e-05, "loss": 0.216, "step": 34022 }, { "epoch": 2.756237848347375, "grad_norm": 0.06273271143436432, "learning_rate": 6.909851928529637e-05, "loss": 0.2295, "step": 34023 }, { "epoch": 2.7563188593648738, "grad_norm": 0.06990016996860504, "learning_rate": 6.909401863270174e-05, "loss": 0.2672, "step": 34024 }, { "epoch": 2.756399870382372, "grad_norm": 0.070733442902565, "learning_rate": 6.908951798010712e-05, "loss": 0.2324, "step": 34025 }, { "epoch": 2.7564808813998702, "grad_norm": 0.07302505522966385, "learning_rate": 6.908501732751249e-05, "loss": 0.2399, "step": 34026 }, { "epoch": 2.756561892417369, "grad_norm": 0.0650155171751976, "learning_rate": 6.908051667491787e-05, "loss": 0.2295, "step": 34027 }, { "epoch": 2.756642903434867, "grad_norm": 0.07546880841255188, "learning_rate": 6.907601602232324e-05, "loss": 0.2041, "step": 34028 }, { "epoch": 2.7567239144523654, "grad_norm": 0.06319276988506317, "learning_rate": 6.907151536972861e-05, "loss": 0.2252, "step": 34029 }, { "epoch": 2.756804925469864, "grad_norm": 0.08173449337482452, "learning_rate": 6.906701471713399e-05, "loss": 0.2507, "step": 34030 }, { "epoch": 2.7568859364873624, "grad_norm": 0.06582282483577728, "learning_rate": 6.906251406453936e-05, "loss": 0.23, "step": 34031 }, { "epoch": 2.7569669475048606, "grad_norm": 0.0720318853855133, "learning_rate": 6.905801341194473e-05, "loss": 0.2675, "step": 34032 }, { "epoch": 2.7570479585223593, "grad_norm": 0.06968838721513748, "learning_rate": 6.905351275935011e-05, "loss": 0.2095, "step": 34033 }, { "epoch": 2.7571289695398575, "grad_norm": 0.07105859369039536, "learning_rate": 6.904901210675548e-05, "loss": 0.2526, "step": 34034 }, { "epoch": 2.7572099805573558, "grad_norm": 0.07147864252328873, "learning_rate": 6.904451145416085e-05, "loss": 0.2264, "step": 34035 }, { "epoch": 2.757290991574854, "grad_norm": 0.08469801396131516, "learning_rate": 6.904001080156624e-05, "loss": 0.2404, "step": 34036 }, { "epoch": 2.7573720025923527, "grad_norm": 0.0625695213675499, "learning_rate": 6.90355101489716e-05, "loss": 0.2344, "step": 34037 }, { "epoch": 2.757453013609851, "grad_norm": 0.06275301426649094, "learning_rate": 6.903100949637698e-05, "loss": 0.2615, "step": 34038 }, { "epoch": 2.757534024627349, "grad_norm": 0.07460293918848038, "learning_rate": 6.902650884378236e-05, "loss": 0.2533, "step": 34039 }, { "epoch": 2.7576150356448474, "grad_norm": 0.0641096755862236, "learning_rate": 6.902200819118772e-05, "loss": 0.2409, "step": 34040 }, { "epoch": 2.757696046662346, "grad_norm": 0.06952983140945435, "learning_rate": 6.90175075385931e-05, "loss": 0.2488, "step": 34041 }, { "epoch": 2.7577770576798444, "grad_norm": 0.06201440840959549, "learning_rate": 6.901300688599848e-05, "loss": 0.2475, "step": 34042 }, { "epoch": 2.7578580686973426, "grad_norm": 0.06937065720558167, "learning_rate": 6.900850623340384e-05, "loss": 0.2258, "step": 34043 }, { "epoch": 2.7579390797148413, "grad_norm": 0.07255363464355469, "learning_rate": 6.900400558080922e-05, "loss": 0.2046, "step": 34044 }, { "epoch": 2.7580200907323396, "grad_norm": 0.06431081891059875, "learning_rate": 6.89995049282146e-05, "loss": 0.2412, "step": 34045 }, { "epoch": 2.758101101749838, "grad_norm": 0.060214076191186905, "learning_rate": 6.899500427561996e-05, "loss": 0.2368, "step": 34046 }, { "epoch": 2.7581821127673365, "grad_norm": 0.0770927146077156, "learning_rate": 6.899050362302534e-05, "loss": 0.261, "step": 34047 }, { "epoch": 2.7582631237848347, "grad_norm": 0.055978596210479736, "learning_rate": 6.898600297043072e-05, "loss": 0.2484, "step": 34048 }, { "epoch": 2.758344134802333, "grad_norm": 0.07794700562953949, "learning_rate": 6.898150231783609e-05, "loss": 0.2395, "step": 34049 }, { "epoch": 2.7584251458198317, "grad_norm": 0.08637645840644836, "learning_rate": 6.897700166524146e-05, "loss": 0.2474, "step": 34050 }, { "epoch": 2.75850615683733, "grad_norm": 0.06628405302762985, "learning_rate": 6.897250101264685e-05, "loss": 0.2618, "step": 34051 }, { "epoch": 2.758587167854828, "grad_norm": 0.07030244916677475, "learning_rate": 6.89680003600522e-05, "loss": 0.2091, "step": 34052 }, { "epoch": 2.758668178872327, "grad_norm": 0.05243457108736038, "learning_rate": 6.896349970745758e-05, "loss": 0.2023, "step": 34053 }, { "epoch": 2.758749189889825, "grad_norm": 0.06554213911294937, "learning_rate": 6.895899905486297e-05, "loss": 0.2613, "step": 34054 }, { "epoch": 2.7588302009073233, "grad_norm": 0.08007621020078659, "learning_rate": 6.895449840226833e-05, "loss": 0.2792, "step": 34055 }, { "epoch": 2.758911211924822, "grad_norm": 0.07678791880607605, "learning_rate": 6.894999774967371e-05, "loss": 0.2163, "step": 34056 }, { "epoch": 2.7589922229423203, "grad_norm": 0.06600095331668854, "learning_rate": 6.894549709707909e-05, "loss": 0.2529, "step": 34057 }, { "epoch": 2.7590732339598185, "grad_norm": 0.0781325250864029, "learning_rate": 6.894099644448445e-05, "loss": 0.2323, "step": 34058 }, { "epoch": 2.7591542449773168, "grad_norm": 0.07266079634428024, "learning_rate": 6.893649579188983e-05, "loss": 0.2763, "step": 34059 }, { "epoch": 2.7592352559948155, "grad_norm": 0.06358586996793747, "learning_rate": 6.893199513929521e-05, "loss": 0.2214, "step": 34060 }, { "epoch": 2.7593162670123137, "grad_norm": 0.08538417518138885, "learning_rate": 6.892749448670057e-05, "loss": 0.2352, "step": 34061 }, { "epoch": 2.759397278029812, "grad_norm": 0.07268763333559036, "learning_rate": 6.892299383410596e-05, "loss": 0.2172, "step": 34062 }, { "epoch": 2.75947828904731, "grad_norm": 0.07276776432991028, "learning_rate": 6.891849318151133e-05, "loss": 0.255, "step": 34063 }, { "epoch": 2.759559300064809, "grad_norm": 0.07200052589178085, "learning_rate": 6.891399252891669e-05, "loss": 0.2703, "step": 34064 }, { "epoch": 2.759640311082307, "grad_norm": 0.09576958417892456, "learning_rate": 6.890949187632208e-05, "loss": 0.2816, "step": 34065 }, { "epoch": 2.7597213220998054, "grad_norm": 0.08117634057998657, "learning_rate": 6.890499122372745e-05, "loss": 0.249, "step": 34066 }, { "epoch": 2.759802333117304, "grad_norm": 0.059016503393650055, "learning_rate": 6.890049057113281e-05, "loss": 0.2394, "step": 34067 }, { "epoch": 2.7598833441348023, "grad_norm": 0.06975926458835602, "learning_rate": 6.88959899185382e-05, "loss": 0.2693, "step": 34068 }, { "epoch": 2.7599643551523005, "grad_norm": 0.06149639934301376, "learning_rate": 6.889148926594357e-05, "loss": 0.2803, "step": 34069 }, { "epoch": 2.7600453661697992, "grad_norm": 0.08504508435726166, "learning_rate": 6.888698861334893e-05, "loss": 0.277, "step": 34070 }, { "epoch": 2.7601263771872975, "grad_norm": 0.0662258192896843, "learning_rate": 6.888248796075432e-05, "loss": 0.2374, "step": 34071 }, { "epoch": 2.7602073882047957, "grad_norm": 0.0804053395986557, "learning_rate": 6.887798730815969e-05, "loss": 0.2527, "step": 34072 }, { "epoch": 2.7602883992222944, "grad_norm": 0.07075381278991699, "learning_rate": 6.887348665556505e-05, "loss": 0.2536, "step": 34073 }, { "epoch": 2.7603694102397927, "grad_norm": 0.06893350183963776, "learning_rate": 6.886898600297044e-05, "loss": 0.2206, "step": 34074 }, { "epoch": 2.760450421257291, "grad_norm": 0.060836561024188995, "learning_rate": 6.886448535037581e-05, "loss": 0.2044, "step": 34075 }, { "epoch": 2.7605314322747896, "grad_norm": 0.0595211423933506, "learning_rate": 6.885998469778117e-05, "loss": 0.2402, "step": 34076 }, { "epoch": 2.760612443292288, "grad_norm": 0.07096649706363678, "learning_rate": 6.885548404518656e-05, "loss": 0.2102, "step": 34077 }, { "epoch": 2.760693454309786, "grad_norm": 0.0808127298951149, "learning_rate": 6.885098339259193e-05, "loss": 0.2048, "step": 34078 }, { "epoch": 2.7607744653272848, "grad_norm": 0.06761705130338669, "learning_rate": 6.884648273999729e-05, "loss": 0.2122, "step": 34079 }, { "epoch": 2.760855476344783, "grad_norm": 0.05668129399418831, "learning_rate": 6.884198208740268e-05, "loss": 0.2432, "step": 34080 }, { "epoch": 2.7609364873622813, "grad_norm": 0.060756102204322815, "learning_rate": 6.883748143480805e-05, "loss": 0.257, "step": 34081 }, { "epoch": 2.7610174983797795, "grad_norm": 0.07630965113639832, "learning_rate": 6.883298078221341e-05, "loss": 0.2406, "step": 34082 }, { "epoch": 2.761098509397278, "grad_norm": 0.07598250359296799, "learning_rate": 6.88284801296188e-05, "loss": 0.2886, "step": 34083 }, { "epoch": 2.7611795204147764, "grad_norm": 0.07698392868041992, "learning_rate": 6.882397947702417e-05, "loss": 0.2702, "step": 34084 }, { "epoch": 2.7612605314322747, "grad_norm": 0.06041031703352928, "learning_rate": 6.881947882442955e-05, "loss": 0.2089, "step": 34085 }, { "epoch": 2.761341542449773, "grad_norm": 0.0772460475564003, "learning_rate": 6.881497817183492e-05, "loss": 0.2368, "step": 34086 }, { "epoch": 2.7614225534672716, "grad_norm": 0.062571682035923, "learning_rate": 6.88104775192403e-05, "loss": 0.2754, "step": 34087 }, { "epoch": 2.76150356448477, "grad_norm": 0.07503519207239151, "learning_rate": 6.880597686664567e-05, "loss": 0.224, "step": 34088 }, { "epoch": 2.761584575502268, "grad_norm": 0.07290045917034149, "learning_rate": 6.880147621405104e-05, "loss": 0.2328, "step": 34089 }, { "epoch": 2.761665586519767, "grad_norm": 0.08204520493745804, "learning_rate": 6.879697556145642e-05, "loss": 0.2032, "step": 34090 }, { "epoch": 2.761746597537265, "grad_norm": 0.05929996818304062, "learning_rate": 6.879247490886179e-05, "loss": 0.2157, "step": 34091 }, { "epoch": 2.7618276085547633, "grad_norm": 0.07957153767347336, "learning_rate": 6.878797425626716e-05, "loss": 0.2244, "step": 34092 }, { "epoch": 2.761908619572262, "grad_norm": 0.07009933888912201, "learning_rate": 6.878347360367254e-05, "loss": 0.2706, "step": 34093 }, { "epoch": 2.76198963058976, "grad_norm": 0.058375827968120575, "learning_rate": 6.877897295107791e-05, "loss": 0.225, "step": 34094 }, { "epoch": 2.7620706416072585, "grad_norm": 0.06847576797008514, "learning_rate": 6.877447229848328e-05, "loss": 0.2501, "step": 34095 }, { "epoch": 2.762151652624757, "grad_norm": 0.07601086795330048, "learning_rate": 6.876997164588866e-05, "loss": 0.2545, "step": 34096 }, { "epoch": 2.7622326636422554, "grad_norm": 0.05859259143471718, "learning_rate": 6.876547099329403e-05, "loss": 0.245, "step": 34097 }, { "epoch": 2.7623136746597536, "grad_norm": 0.09432229399681091, "learning_rate": 6.87609703406994e-05, "loss": 0.2341, "step": 34098 }, { "epoch": 2.7623946856772523, "grad_norm": 0.0631144791841507, "learning_rate": 6.875646968810478e-05, "loss": 0.2361, "step": 34099 }, { "epoch": 2.7624756966947506, "grad_norm": 0.07539720833301544, "learning_rate": 6.875196903551015e-05, "loss": 0.311, "step": 34100 }, { "epoch": 2.762556707712249, "grad_norm": 0.07470940053462982, "learning_rate": 6.874746838291553e-05, "loss": 0.2444, "step": 34101 }, { "epoch": 2.7626377187297475, "grad_norm": 0.07325862348079681, "learning_rate": 6.87429677303209e-05, "loss": 0.2461, "step": 34102 }, { "epoch": 2.7627187297472457, "grad_norm": 0.053800273686647415, "learning_rate": 6.873846707772627e-05, "loss": 0.2503, "step": 34103 }, { "epoch": 2.762799740764744, "grad_norm": 0.057765450328588486, "learning_rate": 6.873396642513165e-05, "loss": 0.2727, "step": 34104 }, { "epoch": 2.7628807517822422, "grad_norm": 0.07699766010046005, "learning_rate": 6.872946577253702e-05, "loss": 0.2603, "step": 34105 }, { "epoch": 2.762961762799741, "grad_norm": 0.06159216910600662, "learning_rate": 6.87249651199424e-05, "loss": 0.2394, "step": 34106 }, { "epoch": 2.763042773817239, "grad_norm": 0.06081032380461693, "learning_rate": 6.872046446734777e-05, "loss": 0.2559, "step": 34107 }, { "epoch": 2.7631237848347374, "grad_norm": 0.06624030321836472, "learning_rate": 6.871596381475314e-05, "loss": 0.2043, "step": 34108 }, { "epoch": 2.7632047958522357, "grad_norm": 0.07395646721124649, "learning_rate": 6.871146316215851e-05, "loss": 0.2352, "step": 34109 }, { "epoch": 2.7632858068697344, "grad_norm": 0.07033342123031616, "learning_rate": 6.870696250956389e-05, "loss": 0.2737, "step": 34110 }, { "epoch": 2.7633668178872326, "grad_norm": 0.06181248649954796, "learning_rate": 6.870246185696928e-05, "loss": 0.2192, "step": 34111 }, { "epoch": 2.763447828904731, "grad_norm": 0.08963780850172043, "learning_rate": 6.869796120437464e-05, "loss": 0.23, "step": 34112 }, { "epoch": 2.7635288399222295, "grad_norm": 0.06250149011611938, "learning_rate": 6.869346055178001e-05, "loss": 0.2011, "step": 34113 }, { "epoch": 2.7636098509397278, "grad_norm": 0.0642528384923935, "learning_rate": 6.86889598991854e-05, "loss": 0.2589, "step": 34114 }, { "epoch": 2.763690861957226, "grad_norm": 0.061350125819444656, "learning_rate": 6.868445924659076e-05, "loss": 0.2141, "step": 34115 }, { "epoch": 2.7637718729747247, "grad_norm": 0.11386937648057938, "learning_rate": 6.867995859399613e-05, "loss": 0.2322, "step": 34116 }, { "epoch": 2.763852883992223, "grad_norm": 0.06786888092756271, "learning_rate": 6.867545794140152e-05, "loss": 0.24, "step": 34117 }, { "epoch": 2.763933895009721, "grad_norm": 0.06581398844718933, "learning_rate": 6.867095728880688e-05, "loss": 0.266, "step": 34118 }, { "epoch": 2.76401490602722, "grad_norm": 0.07778912037611008, "learning_rate": 6.866645663621225e-05, "loss": 0.2575, "step": 34119 }, { "epoch": 2.764095917044718, "grad_norm": 0.05699251592159271, "learning_rate": 6.866195598361764e-05, "loss": 0.2367, "step": 34120 }, { "epoch": 2.7641769280622164, "grad_norm": 0.06215566396713257, "learning_rate": 6.8657455331023e-05, "loss": 0.2401, "step": 34121 }, { "epoch": 2.764257939079715, "grad_norm": 0.07722298800945282, "learning_rate": 6.865295467842837e-05, "loss": 0.2614, "step": 34122 }, { "epoch": 2.7643389500972133, "grad_norm": 0.08287777006626129, "learning_rate": 6.864845402583376e-05, "loss": 0.2182, "step": 34123 }, { "epoch": 2.7644199611147116, "grad_norm": 0.06992882490158081, "learning_rate": 6.864395337323912e-05, "loss": 0.2434, "step": 34124 }, { "epoch": 2.7645009721322102, "grad_norm": 0.06839783489704132, "learning_rate": 6.863945272064449e-05, "loss": 0.2385, "step": 34125 }, { "epoch": 2.7645819831497085, "grad_norm": 0.08003885298967361, "learning_rate": 6.863495206804988e-05, "loss": 0.2472, "step": 34126 }, { "epoch": 2.7646629941672067, "grad_norm": 0.07297755777835846, "learning_rate": 6.863045141545524e-05, "loss": 0.2534, "step": 34127 }, { "epoch": 2.764744005184705, "grad_norm": 0.06011795997619629, "learning_rate": 6.862595076286061e-05, "loss": 0.2433, "step": 34128 }, { "epoch": 2.7648250162022032, "grad_norm": 0.05884721502661705, "learning_rate": 6.8621450110266e-05, "loss": 0.2437, "step": 34129 }, { "epoch": 2.764906027219702, "grad_norm": 0.060120269656181335, "learning_rate": 6.861694945767136e-05, "loss": 0.2557, "step": 34130 }, { "epoch": 2.7649870382372, "grad_norm": 0.06731750816106796, "learning_rate": 6.861244880507673e-05, "loss": 0.2703, "step": 34131 }, { "epoch": 2.7650680492546984, "grad_norm": 0.06370685994625092, "learning_rate": 6.860794815248212e-05, "loss": 0.1969, "step": 34132 }, { "epoch": 2.765149060272197, "grad_norm": 0.07013097405433655, "learning_rate": 6.860344749988748e-05, "loss": 0.2258, "step": 34133 }, { "epoch": 2.7652300712896953, "grad_norm": 0.06985171884298325, "learning_rate": 6.859894684729285e-05, "loss": 0.2342, "step": 34134 }, { "epoch": 2.7653110823071936, "grad_norm": 0.06555838882923126, "learning_rate": 6.859444619469824e-05, "loss": 0.2189, "step": 34135 }, { "epoch": 2.7653920933246923, "grad_norm": 0.06166043132543564, "learning_rate": 6.85899455421036e-05, "loss": 0.1987, "step": 34136 }, { "epoch": 2.7654731043421905, "grad_norm": 0.050333742052316666, "learning_rate": 6.858544488950899e-05, "loss": 0.2041, "step": 34137 }, { "epoch": 2.7655541153596888, "grad_norm": 0.08235972374677658, "learning_rate": 6.858094423691436e-05, "loss": 0.2338, "step": 34138 }, { "epoch": 2.7656351263771874, "grad_norm": 0.07296254485845566, "learning_rate": 6.857644358431972e-05, "loss": 0.2222, "step": 34139 }, { "epoch": 2.7657161373946857, "grad_norm": 0.06814103573560715, "learning_rate": 6.857194293172511e-05, "loss": 0.2563, "step": 34140 }, { "epoch": 2.765797148412184, "grad_norm": 0.06390096992254257, "learning_rate": 6.856744227913048e-05, "loss": 0.2665, "step": 34141 }, { "epoch": 2.7658781594296826, "grad_norm": 0.07354594767093658, "learning_rate": 6.856294162653584e-05, "loss": 0.2291, "step": 34142 }, { "epoch": 2.765959170447181, "grad_norm": 0.07856588810682297, "learning_rate": 6.855844097394123e-05, "loss": 0.2688, "step": 34143 }, { "epoch": 2.766040181464679, "grad_norm": 0.08960554748773575, "learning_rate": 6.85539403213466e-05, "loss": 0.3206, "step": 34144 }, { "epoch": 2.766121192482178, "grad_norm": 0.06466906517744064, "learning_rate": 6.854943966875196e-05, "loss": 0.2163, "step": 34145 }, { "epoch": 2.766202203499676, "grad_norm": 0.07751652598381042, "learning_rate": 6.854493901615735e-05, "loss": 0.2569, "step": 34146 }, { "epoch": 2.7662832145171743, "grad_norm": 0.07169736176729202, "learning_rate": 6.854043836356273e-05, "loss": 0.2173, "step": 34147 }, { "epoch": 2.766364225534673, "grad_norm": 0.10449399054050446, "learning_rate": 6.853593771096809e-05, "loss": 0.2221, "step": 34148 }, { "epoch": 2.7664452365521712, "grad_norm": 0.05646883696317673, "learning_rate": 6.853143705837347e-05, "loss": 0.2063, "step": 34149 }, { "epoch": 2.7665262475696695, "grad_norm": 0.07571178674697876, "learning_rate": 6.852693640577885e-05, "loss": 0.2241, "step": 34150 }, { "epoch": 2.7666072585871677, "grad_norm": 0.06778353452682495, "learning_rate": 6.85224357531842e-05, "loss": 0.2323, "step": 34151 }, { "epoch": 2.766688269604666, "grad_norm": 0.07670864462852478, "learning_rate": 6.85179351005896e-05, "loss": 0.2914, "step": 34152 }, { "epoch": 2.7667692806221647, "grad_norm": 0.06275644153356552, "learning_rate": 6.851343444799497e-05, "loss": 0.2204, "step": 34153 }, { "epoch": 2.766850291639663, "grad_norm": 0.057569120079278946, "learning_rate": 6.850893379540033e-05, "loss": 0.2422, "step": 34154 }, { "epoch": 2.766931302657161, "grad_norm": 0.0675300657749176, "learning_rate": 6.850443314280571e-05, "loss": 0.2607, "step": 34155 }, { "epoch": 2.76701231367466, "grad_norm": 0.05765746906399727, "learning_rate": 6.849993249021109e-05, "loss": 0.2258, "step": 34156 }, { "epoch": 2.767093324692158, "grad_norm": 0.08889303356409073, "learning_rate": 6.849543183761645e-05, "loss": 0.2619, "step": 34157 }, { "epoch": 2.7671743357096563, "grad_norm": 0.0686429962515831, "learning_rate": 6.849093118502183e-05, "loss": 0.2244, "step": 34158 }, { "epoch": 2.767255346727155, "grad_norm": 0.06495986878871918, "learning_rate": 6.848643053242721e-05, "loss": 0.2121, "step": 34159 }, { "epoch": 2.7673363577446533, "grad_norm": 0.0850294977426529, "learning_rate": 6.848192987983257e-05, "loss": 0.2696, "step": 34160 }, { "epoch": 2.7674173687621515, "grad_norm": 0.07529442012310028, "learning_rate": 6.847742922723796e-05, "loss": 0.2447, "step": 34161 }, { "epoch": 2.76749837977965, "grad_norm": 0.08042323589324951, "learning_rate": 6.847292857464333e-05, "loss": 0.2635, "step": 34162 }, { "epoch": 2.7675793907971484, "grad_norm": 0.08062606304883957, "learning_rate": 6.84684279220487e-05, "loss": 0.2282, "step": 34163 }, { "epoch": 2.7676604018146467, "grad_norm": 0.062072739005088806, "learning_rate": 6.846392726945408e-05, "loss": 0.2497, "step": 34164 }, { "epoch": 2.7677414128321454, "grad_norm": 0.08234535902738571, "learning_rate": 6.845942661685945e-05, "loss": 0.2873, "step": 34165 }, { "epoch": 2.7678224238496436, "grad_norm": 0.07265506684780121, "learning_rate": 6.845492596426482e-05, "loss": 0.2969, "step": 34166 }, { "epoch": 2.767903434867142, "grad_norm": 0.06202319264411926, "learning_rate": 6.84504253116702e-05, "loss": 0.2533, "step": 34167 }, { "epoch": 2.7679844458846405, "grad_norm": 0.0771106705069542, "learning_rate": 6.844592465907557e-05, "loss": 0.2164, "step": 34168 }, { "epoch": 2.768065456902139, "grad_norm": 0.06931550800800323, "learning_rate": 6.844142400648094e-05, "loss": 0.2737, "step": 34169 }, { "epoch": 2.768146467919637, "grad_norm": 0.06963351368904114, "learning_rate": 6.843692335388632e-05, "loss": 0.2342, "step": 34170 }, { "epoch": 2.7682274789371357, "grad_norm": 0.0673379972577095, "learning_rate": 6.843242270129169e-05, "loss": 0.2214, "step": 34171 }, { "epoch": 2.768308489954634, "grad_norm": 0.05613080412149429, "learning_rate": 6.842792204869707e-05, "loss": 0.2457, "step": 34172 }, { "epoch": 2.768389500972132, "grad_norm": 0.06227600574493408, "learning_rate": 6.842342139610244e-05, "loss": 0.2736, "step": 34173 }, { "epoch": 2.7684705119896305, "grad_norm": 0.07635784149169922, "learning_rate": 6.841892074350781e-05, "loss": 0.2526, "step": 34174 }, { "epoch": 2.7685515230071287, "grad_norm": 0.0714573860168457, "learning_rate": 6.841442009091319e-05, "loss": 0.2522, "step": 34175 }, { "epoch": 2.7686325340246274, "grad_norm": 0.05897994339466095, "learning_rate": 6.840991943831856e-05, "loss": 0.2216, "step": 34176 }, { "epoch": 2.7687135450421256, "grad_norm": 0.06542729586362839, "learning_rate": 6.840541878572393e-05, "loss": 0.2409, "step": 34177 }, { "epoch": 2.768794556059624, "grad_norm": 0.0854971632361412, "learning_rate": 6.840091813312931e-05, "loss": 0.2025, "step": 34178 }, { "epoch": 2.7688755670771226, "grad_norm": 0.07088334113359451, "learning_rate": 6.839641748053468e-05, "loss": 0.2625, "step": 34179 }, { "epoch": 2.768956578094621, "grad_norm": 0.07033325731754303, "learning_rate": 6.839191682794005e-05, "loss": 0.2396, "step": 34180 }, { "epoch": 2.769037589112119, "grad_norm": 0.0697719007730484, "learning_rate": 6.838741617534543e-05, "loss": 0.2381, "step": 34181 }, { "epoch": 2.7691186001296177, "grad_norm": 0.06432507932186127, "learning_rate": 6.83829155227508e-05, "loss": 0.2111, "step": 34182 }, { "epoch": 2.769199611147116, "grad_norm": 0.06042730435729027, "learning_rate": 6.837841487015617e-05, "loss": 0.2316, "step": 34183 }, { "epoch": 2.7692806221646142, "grad_norm": 0.06913060694932938, "learning_rate": 6.837391421756155e-05, "loss": 0.2385, "step": 34184 }, { "epoch": 2.769361633182113, "grad_norm": 0.09149839729070663, "learning_rate": 6.836941356496692e-05, "loss": 0.2421, "step": 34185 }, { "epoch": 2.769442644199611, "grad_norm": 0.06392455101013184, "learning_rate": 6.83649129123723e-05, "loss": 0.2093, "step": 34186 }, { "epoch": 2.7695236552171094, "grad_norm": 0.07868770509958267, "learning_rate": 6.836041225977767e-05, "loss": 0.2547, "step": 34187 }, { "epoch": 2.769604666234608, "grad_norm": 0.07805663347244263, "learning_rate": 6.835591160718304e-05, "loss": 0.232, "step": 34188 }, { "epoch": 2.7696856772521063, "grad_norm": 0.0701029971241951, "learning_rate": 6.835141095458843e-05, "loss": 0.2309, "step": 34189 }, { "epoch": 2.7697666882696046, "grad_norm": 0.07640746235847473, "learning_rate": 6.834691030199379e-05, "loss": 0.2492, "step": 34190 }, { "epoch": 2.7698476992871033, "grad_norm": 0.09099043160676956, "learning_rate": 6.834240964939916e-05, "loss": 0.2548, "step": 34191 }, { "epoch": 2.7699287103046015, "grad_norm": 0.0710119977593422, "learning_rate": 6.833790899680455e-05, "loss": 0.2006, "step": 34192 }, { "epoch": 2.7700097213220998, "grad_norm": 0.07448773831129074, "learning_rate": 6.833340834420991e-05, "loss": 0.2295, "step": 34193 }, { "epoch": 2.7700907323395985, "grad_norm": 0.07332435995340347, "learning_rate": 6.832890769161528e-05, "loss": 0.2575, "step": 34194 }, { "epoch": 2.7701717433570967, "grad_norm": 0.06369539350271225, "learning_rate": 6.832440703902067e-05, "loss": 0.229, "step": 34195 }, { "epoch": 2.770252754374595, "grad_norm": 0.06551164388656616, "learning_rate": 6.831990638642603e-05, "loss": 0.2353, "step": 34196 }, { "epoch": 2.770333765392093, "grad_norm": 0.06759309023618698, "learning_rate": 6.83154057338314e-05, "loss": 0.2376, "step": 34197 }, { "epoch": 2.7704147764095914, "grad_norm": 0.06851544231176376, "learning_rate": 6.831090508123679e-05, "loss": 0.2226, "step": 34198 }, { "epoch": 2.77049578742709, "grad_norm": 0.08439340442419052, "learning_rate": 6.830640442864215e-05, "loss": 0.2609, "step": 34199 }, { "epoch": 2.7705767984445884, "grad_norm": 0.06333932280540466, "learning_rate": 6.830190377604753e-05, "loss": 0.1892, "step": 34200 }, { "epoch": 2.7706578094620866, "grad_norm": 0.0682709664106369, "learning_rate": 6.829740312345291e-05, "loss": 0.2032, "step": 34201 }, { "epoch": 2.7707388204795853, "grad_norm": 0.06252402067184448, "learning_rate": 6.829290247085827e-05, "loss": 0.2441, "step": 34202 }, { "epoch": 2.7708198314970836, "grad_norm": 0.06736239790916443, "learning_rate": 6.828840181826365e-05, "loss": 0.2608, "step": 34203 }, { "epoch": 2.770900842514582, "grad_norm": 0.06103686988353729, "learning_rate": 6.828390116566903e-05, "loss": 0.2006, "step": 34204 }, { "epoch": 2.7709818535320805, "grad_norm": 0.06788726150989532, "learning_rate": 6.82794005130744e-05, "loss": 0.2633, "step": 34205 }, { "epoch": 2.7710628645495787, "grad_norm": 0.0695648342370987, "learning_rate": 6.827489986047977e-05, "loss": 0.2048, "step": 34206 }, { "epoch": 2.771143875567077, "grad_norm": 0.07660722732543945, "learning_rate": 6.827039920788516e-05, "loss": 0.244, "step": 34207 }, { "epoch": 2.7712248865845757, "grad_norm": 0.06939336657524109, "learning_rate": 6.826589855529052e-05, "loss": 0.2472, "step": 34208 }, { "epoch": 2.771305897602074, "grad_norm": 0.06204705685377121, "learning_rate": 6.826139790269589e-05, "loss": 0.1828, "step": 34209 }, { "epoch": 2.771386908619572, "grad_norm": 0.09053542464971542, "learning_rate": 6.825689725010128e-05, "loss": 0.2619, "step": 34210 }, { "epoch": 2.771467919637071, "grad_norm": 0.06675177067518234, "learning_rate": 6.825239659750664e-05, "loss": 0.2146, "step": 34211 }, { "epoch": 2.771548930654569, "grad_norm": 0.07906267791986465, "learning_rate": 6.824789594491201e-05, "loss": 0.2495, "step": 34212 }, { "epoch": 2.7716299416720673, "grad_norm": 0.0674787312746048, "learning_rate": 6.82433952923174e-05, "loss": 0.2408, "step": 34213 }, { "epoch": 2.771710952689566, "grad_norm": 0.06417389959096909, "learning_rate": 6.823889463972276e-05, "loss": 0.2353, "step": 34214 }, { "epoch": 2.7717919637070643, "grad_norm": 0.0717383325099945, "learning_rate": 6.823439398712814e-05, "loss": 0.2797, "step": 34215 }, { "epoch": 2.7718729747245625, "grad_norm": 0.07927710562944412, "learning_rate": 6.822989333453352e-05, "loss": 0.2345, "step": 34216 }, { "epoch": 2.7719539857420608, "grad_norm": 0.07535596191883087, "learning_rate": 6.822539268193888e-05, "loss": 0.2829, "step": 34217 }, { "epoch": 2.7720349967595594, "grad_norm": 0.06705041974782944, "learning_rate": 6.822089202934426e-05, "loss": 0.2121, "step": 34218 }, { "epoch": 2.7721160077770577, "grad_norm": 0.0631314367055893, "learning_rate": 6.821639137674964e-05, "loss": 0.2218, "step": 34219 }, { "epoch": 2.772197018794556, "grad_norm": 0.08411700278520584, "learning_rate": 6.8211890724155e-05, "loss": 0.2544, "step": 34220 }, { "epoch": 2.772278029812054, "grad_norm": 0.05945594608783722, "learning_rate": 6.820739007156039e-05, "loss": 0.2006, "step": 34221 }, { "epoch": 2.772359040829553, "grad_norm": 0.07686436176300049, "learning_rate": 6.820288941896576e-05, "loss": 0.2498, "step": 34222 }, { "epoch": 2.772440051847051, "grad_norm": 0.0920679047703743, "learning_rate": 6.819838876637112e-05, "loss": 0.2636, "step": 34223 }, { "epoch": 2.7725210628645494, "grad_norm": 0.06771649420261383, "learning_rate": 6.81938881137765e-05, "loss": 0.2202, "step": 34224 }, { "epoch": 2.772602073882048, "grad_norm": 0.06906941533088684, "learning_rate": 6.818938746118188e-05, "loss": 0.2569, "step": 34225 }, { "epoch": 2.7726830848995463, "grad_norm": 0.05707994103431702, "learning_rate": 6.818488680858724e-05, "loss": 0.219, "step": 34226 }, { "epoch": 2.7727640959170445, "grad_norm": 0.065834179520607, "learning_rate": 6.818038615599263e-05, "loss": 0.2546, "step": 34227 }, { "epoch": 2.7728451069345432, "grad_norm": 0.06828774511814117, "learning_rate": 6.8175885503398e-05, "loss": 0.2346, "step": 34228 }, { "epoch": 2.7729261179520415, "grad_norm": 0.057811468839645386, "learning_rate": 6.817138485080336e-05, "loss": 0.2097, "step": 34229 }, { "epoch": 2.7730071289695397, "grad_norm": 0.07118486613035202, "learning_rate": 6.816688419820875e-05, "loss": 0.2427, "step": 34230 }, { "epoch": 2.7730881399870384, "grad_norm": 0.062281396239995956, "learning_rate": 6.816238354561412e-05, "loss": 0.226, "step": 34231 }, { "epoch": 2.7731691510045366, "grad_norm": 0.07268352806568146, "learning_rate": 6.815788289301948e-05, "loss": 0.2392, "step": 34232 }, { "epoch": 2.773250162022035, "grad_norm": 0.07287313044071198, "learning_rate": 6.815338224042487e-05, "loss": 0.2569, "step": 34233 }, { "epoch": 2.7733311730395336, "grad_norm": 0.07274441421031952, "learning_rate": 6.814888158783024e-05, "loss": 0.26, "step": 34234 }, { "epoch": 2.773412184057032, "grad_norm": 0.07283134013414383, "learning_rate": 6.81443809352356e-05, "loss": 0.2761, "step": 34235 }, { "epoch": 2.77349319507453, "grad_norm": 0.0661434605717659, "learning_rate": 6.813988028264099e-05, "loss": 0.2348, "step": 34236 }, { "epoch": 2.7735742060920288, "grad_norm": 0.06506034731864929, "learning_rate": 6.813537963004636e-05, "loss": 0.238, "step": 34237 }, { "epoch": 2.773655217109527, "grad_norm": 0.07169941067695618, "learning_rate": 6.813087897745172e-05, "loss": 0.2654, "step": 34238 }, { "epoch": 2.7737362281270252, "grad_norm": 0.06020570173859596, "learning_rate": 6.812637832485711e-05, "loss": 0.2147, "step": 34239 }, { "epoch": 2.7738172391445235, "grad_norm": 0.0668194517493248, "learning_rate": 6.812187767226248e-05, "loss": 0.2402, "step": 34240 }, { "epoch": 2.773898250162022, "grad_norm": 0.07484626770019531, "learning_rate": 6.811737701966784e-05, "loss": 0.2321, "step": 34241 }, { "epoch": 2.7739792611795204, "grad_norm": 0.06657987087965012, "learning_rate": 6.811287636707323e-05, "loss": 0.2443, "step": 34242 }, { "epoch": 2.7740602721970187, "grad_norm": 0.07141261547803879, "learning_rate": 6.81083757144786e-05, "loss": 0.2208, "step": 34243 }, { "epoch": 2.774141283214517, "grad_norm": 0.08069676160812378, "learning_rate": 6.810387506188398e-05, "loss": 0.2144, "step": 34244 }, { "epoch": 2.7742222942320156, "grad_norm": 0.07490212470293045, "learning_rate": 6.809937440928935e-05, "loss": 0.2379, "step": 34245 }, { "epoch": 2.774303305249514, "grad_norm": 0.07865491509437561, "learning_rate": 6.809487375669473e-05, "loss": 0.2451, "step": 34246 }, { "epoch": 2.774384316267012, "grad_norm": 0.07843254506587982, "learning_rate": 6.80903731041001e-05, "loss": 0.2623, "step": 34247 }, { "epoch": 2.774465327284511, "grad_norm": 0.0783471018075943, "learning_rate": 6.808587245150547e-05, "loss": 0.2292, "step": 34248 }, { "epoch": 2.774546338302009, "grad_norm": 0.058314185589551926, "learning_rate": 6.808137179891085e-05, "loss": 0.2225, "step": 34249 }, { "epoch": 2.7746273493195073, "grad_norm": 0.09157566726207733, "learning_rate": 6.807687114631622e-05, "loss": 0.2533, "step": 34250 }, { "epoch": 2.774708360337006, "grad_norm": 0.08975666761398315, "learning_rate": 6.80723704937216e-05, "loss": 0.2492, "step": 34251 }, { "epoch": 2.774789371354504, "grad_norm": 0.08823460340499878, "learning_rate": 6.806786984112697e-05, "loss": 0.2509, "step": 34252 }, { "epoch": 2.7748703823720025, "grad_norm": 0.08141889423131943, "learning_rate": 6.806336918853234e-05, "loss": 0.2395, "step": 34253 }, { "epoch": 2.774951393389501, "grad_norm": 0.07389726489782333, "learning_rate": 6.805886853593771e-05, "loss": 0.2248, "step": 34254 }, { "epoch": 2.7750324044069994, "grad_norm": 0.08528506010770798, "learning_rate": 6.805436788334309e-05, "loss": 0.279, "step": 34255 }, { "epoch": 2.7751134154244976, "grad_norm": 0.08055625855922699, "learning_rate": 6.804986723074846e-05, "loss": 0.2374, "step": 34256 }, { "epoch": 2.7751944264419963, "grad_norm": 0.08427558839321136, "learning_rate": 6.804536657815384e-05, "loss": 0.2612, "step": 34257 }, { "epoch": 2.7752754374594946, "grad_norm": 0.07798214256763458, "learning_rate": 6.804086592555921e-05, "loss": 0.2819, "step": 34258 }, { "epoch": 2.775356448476993, "grad_norm": 0.07762635499238968, "learning_rate": 6.803636527296458e-05, "loss": 0.215, "step": 34259 }, { "epoch": 2.7754374594944915, "grad_norm": 0.09789147228002548, "learning_rate": 6.803186462036996e-05, "loss": 0.2489, "step": 34260 }, { "epoch": 2.7755184705119897, "grad_norm": 0.07010649889707565, "learning_rate": 6.802736396777533e-05, "loss": 0.2476, "step": 34261 }, { "epoch": 2.775599481529488, "grad_norm": 0.06729248911142349, "learning_rate": 6.80228633151807e-05, "loss": 0.2405, "step": 34262 }, { "epoch": 2.7756804925469862, "grad_norm": 0.08678527176380157, "learning_rate": 6.801836266258608e-05, "loss": 0.2487, "step": 34263 }, { "epoch": 2.775761503564485, "grad_norm": 0.06508525460958481, "learning_rate": 6.801386200999145e-05, "loss": 0.2134, "step": 34264 }, { "epoch": 2.775842514581983, "grad_norm": 0.06213225796818733, "learning_rate": 6.800936135739682e-05, "loss": 0.2169, "step": 34265 }, { "epoch": 2.7759235255994814, "grad_norm": 0.07055771350860596, "learning_rate": 6.80048607048022e-05, "loss": 0.289, "step": 34266 }, { "epoch": 2.7760045366169797, "grad_norm": 0.07395903021097183, "learning_rate": 6.800036005220757e-05, "loss": 0.2553, "step": 34267 }, { "epoch": 2.7760855476344783, "grad_norm": 0.05806746706366539, "learning_rate": 6.799585939961294e-05, "loss": 0.1816, "step": 34268 }, { "epoch": 2.7761665586519766, "grad_norm": 0.072418212890625, "learning_rate": 6.799135874701832e-05, "loss": 0.2678, "step": 34269 }, { "epoch": 2.776247569669475, "grad_norm": 0.06981861591339111, "learning_rate": 6.79868580944237e-05, "loss": 0.3012, "step": 34270 }, { "epoch": 2.7763285806869735, "grad_norm": 0.06536334753036499, "learning_rate": 6.798235744182907e-05, "loss": 0.2188, "step": 34271 }, { "epoch": 2.7764095917044718, "grad_norm": 0.06149706616997719, "learning_rate": 6.797785678923444e-05, "loss": 0.2251, "step": 34272 }, { "epoch": 2.77649060272197, "grad_norm": 0.05460198596119881, "learning_rate": 6.797335613663983e-05, "loss": 0.2401, "step": 34273 }, { "epoch": 2.7765716137394687, "grad_norm": 0.07276234775781631, "learning_rate": 6.796885548404519e-05, "loss": 0.2793, "step": 34274 }, { "epoch": 2.776652624756967, "grad_norm": 0.06272850185632706, "learning_rate": 6.796435483145056e-05, "loss": 0.2495, "step": 34275 }, { "epoch": 2.776733635774465, "grad_norm": 0.07314179837703705, "learning_rate": 6.795985417885595e-05, "loss": 0.231, "step": 34276 }, { "epoch": 2.776814646791964, "grad_norm": 0.06718869507312775, "learning_rate": 6.795535352626131e-05, "loss": 0.2868, "step": 34277 }, { "epoch": 2.776895657809462, "grad_norm": 0.06727830320596695, "learning_rate": 6.795085287366668e-05, "loss": 0.2443, "step": 34278 }, { "epoch": 2.7769766688269604, "grad_norm": 0.062349457293748856, "learning_rate": 6.794635222107207e-05, "loss": 0.2412, "step": 34279 }, { "epoch": 2.777057679844459, "grad_norm": 0.08564005047082901, "learning_rate": 6.794185156847743e-05, "loss": 0.258, "step": 34280 }, { "epoch": 2.7771386908619573, "grad_norm": 0.05737084522843361, "learning_rate": 6.79373509158828e-05, "loss": 0.2522, "step": 34281 }, { "epoch": 2.7772197018794555, "grad_norm": 0.07271725684404373, "learning_rate": 6.793285026328819e-05, "loss": 0.2331, "step": 34282 }, { "epoch": 2.7773007128969542, "grad_norm": 0.05929986387491226, "learning_rate": 6.792834961069355e-05, "loss": 0.2134, "step": 34283 }, { "epoch": 2.7773817239144525, "grad_norm": 0.07402640581130981, "learning_rate": 6.792384895809892e-05, "loss": 0.2497, "step": 34284 }, { "epoch": 2.7774627349319507, "grad_norm": 0.0746961385011673, "learning_rate": 6.791934830550431e-05, "loss": 0.2521, "step": 34285 }, { "epoch": 2.777543745949449, "grad_norm": 0.0649658590555191, "learning_rate": 6.791484765290967e-05, "loss": 0.2287, "step": 34286 }, { "epoch": 2.7776247569669477, "grad_norm": 0.09045769274234772, "learning_rate": 6.791034700031504e-05, "loss": 0.2135, "step": 34287 }, { "epoch": 2.777705767984446, "grad_norm": 0.059244897216558456, "learning_rate": 6.790584634772043e-05, "loss": 0.2282, "step": 34288 }, { "epoch": 2.777786779001944, "grad_norm": 0.07081972062587738, "learning_rate": 6.790134569512579e-05, "loss": 0.2521, "step": 34289 }, { "epoch": 2.7778677900194424, "grad_norm": 0.07101619988679886, "learning_rate": 6.789684504253116e-05, "loss": 0.2492, "step": 34290 }, { "epoch": 2.777948801036941, "grad_norm": 0.07111038267612457, "learning_rate": 6.789234438993655e-05, "loss": 0.2415, "step": 34291 }, { "epoch": 2.7780298120544393, "grad_norm": 0.05958026275038719, "learning_rate": 6.788784373734191e-05, "loss": 0.1995, "step": 34292 }, { "epoch": 2.7781108230719376, "grad_norm": 0.06855040788650513, "learning_rate": 6.788334308474728e-05, "loss": 0.2341, "step": 34293 }, { "epoch": 2.7781918340894363, "grad_norm": 0.06002607196569443, "learning_rate": 6.787884243215267e-05, "loss": 0.2287, "step": 34294 }, { "epoch": 2.7782728451069345, "grad_norm": 0.0822204127907753, "learning_rate": 6.787434177955803e-05, "loss": 0.2672, "step": 34295 }, { "epoch": 2.7783538561244328, "grad_norm": 0.0641741082072258, "learning_rate": 6.786984112696342e-05, "loss": 0.2066, "step": 34296 }, { "epoch": 2.7784348671419314, "grad_norm": 0.07143940776586533, "learning_rate": 6.786534047436879e-05, "loss": 0.2429, "step": 34297 }, { "epoch": 2.7785158781594297, "grad_norm": 0.08297896385192871, "learning_rate": 6.786083982177415e-05, "loss": 0.2247, "step": 34298 }, { "epoch": 2.778596889176928, "grad_norm": 0.08254558593034744, "learning_rate": 6.785633916917954e-05, "loss": 0.2453, "step": 34299 }, { "epoch": 2.7786779001944266, "grad_norm": 0.06594591587781906, "learning_rate": 6.785183851658491e-05, "loss": 0.2548, "step": 34300 }, { "epoch": 2.778758911211925, "grad_norm": 0.08076639473438263, "learning_rate": 6.784733786399027e-05, "loss": 0.2329, "step": 34301 }, { "epoch": 2.778839922229423, "grad_norm": 0.07141506671905518, "learning_rate": 6.784283721139566e-05, "loss": 0.239, "step": 34302 }, { "epoch": 2.778920933246922, "grad_norm": 0.05972827598452568, "learning_rate": 6.783833655880103e-05, "loss": 0.2027, "step": 34303 }, { "epoch": 2.77900194426442, "grad_norm": 0.058857645839452744, "learning_rate": 6.78338359062064e-05, "loss": 0.2091, "step": 34304 }, { "epoch": 2.7790829552819183, "grad_norm": 0.06419733166694641, "learning_rate": 6.782933525361178e-05, "loss": 0.2232, "step": 34305 }, { "epoch": 2.779163966299417, "grad_norm": 0.07907504588365555, "learning_rate": 6.782483460101716e-05, "loss": 0.2345, "step": 34306 }, { "epoch": 2.779244977316915, "grad_norm": 0.0683266744017601, "learning_rate": 6.782033394842252e-05, "loss": 0.2764, "step": 34307 }, { "epoch": 2.7793259883344135, "grad_norm": 0.06538711488246918, "learning_rate": 6.78158332958279e-05, "loss": 0.2376, "step": 34308 }, { "epoch": 2.7794069993519117, "grad_norm": 0.06912099570035934, "learning_rate": 6.781133264323328e-05, "loss": 0.242, "step": 34309 }, { "epoch": 2.7794880103694104, "grad_norm": 0.07980895042419434, "learning_rate": 6.780683199063864e-05, "loss": 0.2766, "step": 34310 }, { "epoch": 2.7795690213869086, "grad_norm": 0.07531173527240753, "learning_rate": 6.780233133804402e-05, "loss": 0.2434, "step": 34311 }, { "epoch": 2.779650032404407, "grad_norm": 0.06003532186150551, "learning_rate": 6.77978306854494e-05, "loss": 0.2389, "step": 34312 }, { "epoch": 2.779731043421905, "grad_norm": 0.07522325217723846, "learning_rate": 6.779333003285476e-05, "loss": 0.2211, "step": 34313 }, { "epoch": 2.779812054439404, "grad_norm": 0.06744924187660217, "learning_rate": 6.778882938026014e-05, "loss": 0.2506, "step": 34314 }, { "epoch": 2.779893065456902, "grad_norm": 0.05675087496638298, "learning_rate": 6.778432872766552e-05, "loss": 0.202, "step": 34315 }, { "epoch": 2.7799740764744003, "grad_norm": 0.06242429465055466, "learning_rate": 6.777982807507088e-05, "loss": 0.2161, "step": 34316 }, { "epoch": 2.780055087491899, "grad_norm": 0.08354310691356659, "learning_rate": 6.777532742247626e-05, "loss": 0.2787, "step": 34317 }, { "epoch": 2.7801360985093972, "grad_norm": 0.05664534866809845, "learning_rate": 6.777082676988164e-05, "loss": 0.1897, "step": 34318 }, { "epoch": 2.7802171095268955, "grad_norm": 0.09233099967241287, "learning_rate": 6.7766326117287e-05, "loss": 0.2648, "step": 34319 }, { "epoch": 2.780298120544394, "grad_norm": 0.07990967482328415, "learning_rate": 6.776182546469239e-05, "loss": 0.2448, "step": 34320 }, { "epoch": 2.7803791315618924, "grad_norm": 0.06859664618968964, "learning_rate": 6.775732481209776e-05, "loss": 0.2354, "step": 34321 }, { "epoch": 2.7804601425793907, "grad_norm": 0.05772284418344498, "learning_rate": 6.775282415950313e-05, "loss": 0.2309, "step": 34322 }, { "epoch": 2.7805411535968894, "grad_norm": 0.06003151461482048, "learning_rate": 6.77483235069085e-05, "loss": 0.2292, "step": 34323 }, { "epoch": 2.7806221646143876, "grad_norm": 0.07578642666339874, "learning_rate": 6.774382285431388e-05, "loss": 0.2366, "step": 34324 }, { "epoch": 2.780703175631886, "grad_norm": 0.06596392393112183, "learning_rate": 6.773932220171925e-05, "loss": 0.2677, "step": 34325 }, { "epoch": 2.7807841866493845, "grad_norm": 0.08551062643527985, "learning_rate": 6.773482154912463e-05, "loss": 0.2299, "step": 34326 }, { "epoch": 2.780865197666883, "grad_norm": 0.09138695150613785, "learning_rate": 6.773032089653e-05, "loss": 0.2513, "step": 34327 }, { "epoch": 2.780946208684381, "grad_norm": 0.11304971575737, "learning_rate": 6.772582024393537e-05, "loss": 0.2293, "step": 34328 }, { "epoch": 2.7810272197018797, "grad_norm": 0.0825752317905426, "learning_rate": 6.772131959134075e-05, "loss": 0.2433, "step": 34329 }, { "epoch": 2.781108230719378, "grad_norm": 0.07327274978160858, "learning_rate": 6.771681893874612e-05, "loss": 0.2101, "step": 34330 }, { "epoch": 2.781189241736876, "grad_norm": 0.059017740190029144, "learning_rate": 6.77123182861515e-05, "loss": 0.2263, "step": 34331 }, { "epoch": 2.7812702527543745, "grad_norm": 0.060888487845659256, "learning_rate": 6.770781763355687e-05, "loss": 0.2376, "step": 34332 }, { "epoch": 2.781351263771873, "grad_norm": 0.050954628735780716, "learning_rate": 6.770331698096224e-05, "loss": 0.2443, "step": 34333 }, { "epoch": 2.7814322747893714, "grad_norm": 0.07145940512418747, "learning_rate": 6.769881632836762e-05, "loss": 0.2808, "step": 34334 }, { "epoch": 2.7815132858068696, "grad_norm": 0.06935775279998779, "learning_rate": 6.769431567577299e-05, "loss": 0.2596, "step": 34335 }, { "epoch": 2.781594296824368, "grad_norm": 0.07173454761505127, "learning_rate": 6.768981502317836e-05, "loss": 0.2566, "step": 34336 }, { "epoch": 2.7816753078418666, "grad_norm": 0.08079207688570023, "learning_rate": 6.768531437058374e-05, "loss": 0.2637, "step": 34337 }, { "epoch": 2.781756318859365, "grad_norm": 0.05620386078953743, "learning_rate": 6.768081371798911e-05, "loss": 0.2232, "step": 34338 }, { "epoch": 2.781837329876863, "grad_norm": 0.0672222226858139, "learning_rate": 6.767631306539448e-05, "loss": 0.2388, "step": 34339 }, { "epoch": 2.7819183408943617, "grad_norm": 0.06389716267585754, "learning_rate": 6.767181241279986e-05, "loss": 0.2297, "step": 34340 }, { "epoch": 2.78199935191186, "grad_norm": 0.06956552714109421, "learning_rate": 6.766731176020523e-05, "loss": 0.2206, "step": 34341 }, { "epoch": 2.7820803629293582, "grad_norm": 0.06504596769809723, "learning_rate": 6.76628111076106e-05, "loss": 0.2438, "step": 34342 }, { "epoch": 2.782161373946857, "grad_norm": 0.09995260089635849, "learning_rate": 6.765831045501598e-05, "loss": 0.2781, "step": 34343 }, { "epoch": 2.782242384964355, "grad_norm": 0.074515700340271, "learning_rate": 6.765380980242135e-05, "loss": 0.2707, "step": 34344 }, { "epoch": 2.7823233959818534, "grad_norm": 0.05361089110374451, "learning_rate": 6.764930914982673e-05, "loss": 0.1947, "step": 34345 }, { "epoch": 2.782404406999352, "grad_norm": 0.07568366080522537, "learning_rate": 6.76448084972321e-05, "loss": 0.2898, "step": 34346 }, { "epoch": 2.7824854180168503, "grad_norm": 0.07337959110736847, "learning_rate": 6.764030784463747e-05, "loss": 0.2613, "step": 34347 }, { "epoch": 2.7825664290343486, "grad_norm": 0.06756607443094254, "learning_rate": 6.763580719204286e-05, "loss": 0.2078, "step": 34348 }, { "epoch": 2.7826474400518473, "grad_norm": 0.07057231664657593, "learning_rate": 6.763130653944822e-05, "loss": 0.2187, "step": 34349 }, { "epoch": 2.7827284510693455, "grad_norm": 0.0597674734890461, "learning_rate": 6.76268058868536e-05, "loss": 0.2315, "step": 34350 }, { "epoch": 2.7828094620868438, "grad_norm": 0.06805480271577835, "learning_rate": 6.762230523425898e-05, "loss": 0.276, "step": 34351 }, { "epoch": 2.7828904731043425, "grad_norm": 0.047967858612537384, "learning_rate": 6.761780458166434e-05, "loss": 0.2266, "step": 34352 }, { "epoch": 2.7829714841218407, "grad_norm": 0.07217514514923096, "learning_rate": 6.761330392906971e-05, "loss": 0.1975, "step": 34353 }, { "epoch": 2.783052495139339, "grad_norm": 0.06429262459278107, "learning_rate": 6.76088032764751e-05, "loss": 0.2329, "step": 34354 }, { "epoch": 2.783133506156837, "grad_norm": 0.05952700972557068, "learning_rate": 6.760430262388046e-05, "loss": 0.2487, "step": 34355 }, { "epoch": 2.7832145171743354, "grad_norm": 0.0745530053973198, "learning_rate": 6.759980197128584e-05, "loss": 0.2477, "step": 34356 }, { "epoch": 2.783295528191834, "grad_norm": 0.08647239953279495, "learning_rate": 6.759530131869122e-05, "loss": 0.209, "step": 34357 }, { "epoch": 2.7833765392093324, "grad_norm": 0.08480441570281982, "learning_rate": 6.759080066609658e-05, "loss": 0.2785, "step": 34358 }, { "epoch": 2.7834575502268306, "grad_norm": 0.07079491764307022, "learning_rate": 6.758630001350196e-05, "loss": 0.1753, "step": 34359 }, { "epoch": 2.7835385612443293, "grad_norm": 0.0632624626159668, "learning_rate": 6.758179936090734e-05, "loss": 0.2346, "step": 34360 }, { "epoch": 2.7836195722618275, "grad_norm": 0.08435448259115219, "learning_rate": 6.75772987083127e-05, "loss": 0.2539, "step": 34361 }, { "epoch": 2.783700583279326, "grad_norm": 0.07184403389692307, "learning_rate": 6.757279805571808e-05, "loss": 0.2656, "step": 34362 }, { "epoch": 2.7837815942968245, "grad_norm": 0.07023405283689499, "learning_rate": 6.756829740312346e-05, "loss": 0.2108, "step": 34363 }, { "epoch": 2.7838626053143227, "grad_norm": 0.07092241197824478, "learning_rate": 6.756379675052882e-05, "loss": 0.2064, "step": 34364 }, { "epoch": 2.783943616331821, "grad_norm": 0.07640679180622101, "learning_rate": 6.75592960979342e-05, "loss": 0.2837, "step": 34365 }, { "epoch": 2.7840246273493197, "grad_norm": 0.06458078324794769, "learning_rate": 6.755479544533959e-05, "loss": 0.2426, "step": 34366 }, { "epoch": 2.784105638366818, "grad_norm": 0.08889305591583252, "learning_rate": 6.755029479274495e-05, "loss": 0.2502, "step": 34367 }, { "epoch": 2.784186649384316, "grad_norm": 0.0728810727596283, "learning_rate": 6.754579414015032e-05, "loss": 0.2762, "step": 34368 }, { "epoch": 2.784267660401815, "grad_norm": 0.06907269358634949, "learning_rate": 6.75412934875557e-05, "loss": 0.2933, "step": 34369 }, { "epoch": 2.784348671419313, "grad_norm": 0.06508993357419968, "learning_rate": 6.753679283496107e-05, "loss": 0.2284, "step": 34370 }, { "epoch": 2.7844296824368113, "grad_norm": 0.07364595681428909, "learning_rate": 6.753229218236644e-05, "loss": 0.2047, "step": 34371 }, { "epoch": 2.78451069345431, "grad_norm": 0.07595406472682953, "learning_rate": 6.752779152977183e-05, "loss": 0.2373, "step": 34372 }, { "epoch": 2.7845917044718083, "grad_norm": 0.08654715865850449, "learning_rate": 6.752329087717719e-05, "loss": 0.2802, "step": 34373 }, { "epoch": 2.7846727154893065, "grad_norm": 0.07352244853973389, "learning_rate": 6.751879022458257e-05, "loss": 0.2392, "step": 34374 }, { "epoch": 2.784753726506805, "grad_norm": 0.06762534379959106, "learning_rate": 6.751428957198795e-05, "loss": 0.2184, "step": 34375 }, { "epoch": 2.7848347375243034, "grad_norm": 0.07174224406480789, "learning_rate": 6.750978891939331e-05, "loss": 0.2153, "step": 34376 }, { "epoch": 2.7849157485418017, "grad_norm": 0.06852773576974869, "learning_rate": 6.75052882667987e-05, "loss": 0.2234, "step": 34377 }, { "epoch": 2.7849967595593, "grad_norm": 0.06424195319414139, "learning_rate": 6.750078761420407e-05, "loss": 0.2547, "step": 34378 }, { "epoch": 2.785077770576798, "grad_norm": 0.06172497943043709, "learning_rate": 6.749628696160943e-05, "loss": 0.2291, "step": 34379 }, { "epoch": 2.785158781594297, "grad_norm": 0.07526271790266037, "learning_rate": 6.749178630901482e-05, "loss": 0.2528, "step": 34380 }, { "epoch": 2.785239792611795, "grad_norm": 0.06385906040668488, "learning_rate": 6.748728565642019e-05, "loss": 0.2146, "step": 34381 }, { "epoch": 2.7853208036292934, "grad_norm": 0.08248017728328705, "learning_rate": 6.748278500382555e-05, "loss": 0.2931, "step": 34382 }, { "epoch": 2.785401814646792, "grad_norm": 0.06632523238658905, "learning_rate": 6.747828435123094e-05, "loss": 0.2118, "step": 34383 }, { "epoch": 2.7854828256642903, "grad_norm": 0.07956916838884354, "learning_rate": 6.747378369863631e-05, "loss": 0.2701, "step": 34384 }, { "epoch": 2.7855638366817885, "grad_norm": 0.06996366381645203, "learning_rate": 6.746928304604167e-05, "loss": 0.2281, "step": 34385 }, { "epoch": 2.785644847699287, "grad_norm": 0.07411151379346848, "learning_rate": 6.746478239344706e-05, "loss": 0.26, "step": 34386 }, { "epoch": 2.7857258587167855, "grad_norm": 0.053881555795669556, "learning_rate": 6.746028174085243e-05, "loss": 0.2054, "step": 34387 }, { "epoch": 2.7858068697342837, "grad_norm": 0.07679598033428192, "learning_rate": 6.745578108825779e-05, "loss": 0.2424, "step": 34388 }, { "epoch": 2.7858878807517824, "grad_norm": 0.07939158380031586, "learning_rate": 6.745128043566318e-05, "loss": 0.2767, "step": 34389 }, { "epoch": 2.7859688917692806, "grad_norm": 0.07225979119539261, "learning_rate": 6.744677978306855e-05, "loss": 0.2158, "step": 34390 }, { "epoch": 2.786049902786779, "grad_norm": 0.06612420827150345, "learning_rate": 6.744227913047391e-05, "loss": 0.2233, "step": 34391 }, { "epoch": 2.7861309138042776, "grad_norm": 0.09697147458791733, "learning_rate": 6.74377784778793e-05, "loss": 0.255, "step": 34392 }, { "epoch": 2.786211924821776, "grad_norm": 0.07526032626628876, "learning_rate": 6.743327782528467e-05, "loss": 0.2381, "step": 34393 }, { "epoch": 2.786292935839274, "grad_norm": 0.08052770048379898, "learning_rate": 6.742877717269003e-05, "loss": 0.2626, "step": 34394 }, { "epoch": 2.7863739468567728, "grad_norm": 0.06478998064994812, "learning_rate": 6.742427652009542e-05, "loss": 0.2183, "step": 34395 }, { "epoch": 2.786454957874271, "grad_norm": 0.06821257621049881, "learning_rate": 6.741977586750079e-05, "loss": 0.2422, "step": 34396 }, { "epoch": 2.7865359688917692, "grad_norm": 0.07175886631011963, "learning_rate": 6.741527521490615e-05, "loss": 0.2066, "step": 34397 }, { "epoch": 2.786616979909268, "grad_norm": 0.06310652196407318, "learning_rate": 6.741077456231154e-05, "loss": 0.2241, "step": 34398 }, { "epoch": 2.786697990926766, "grad_norm": 0.0647004023194313, "learning_rate": 6.740627390971691e-05, "loss": 0.2276, "step": 34399 }, { "epoch": 2.7867790019442644, "grad_norm": 0.08007898926734924, "learning_rate": 6.740177325712227e-05, "loss": 0.2739, "step": 34400 }, { "epoch": 2.7868600129617627, "grad_norm": 0.06193099170923233, "learning_rate": 6.739727260452766e-05, "loss": 0.2301, "step": 34401 }, { "epoch": 2.786941023979261, "grad_norm": 0.06945597380399704, "learning_rate": 6.739277195193303e-05, "loss": 0.2555, "step": 34402 }, { "epoch": 2.7870220349967596, "grad_norm": 0.07463754713535309, "learning_rate": 6.738827129933841e-05, "loss": 0.2691, "step": 34403 }, { "epoch": 2.787103046014258, "grad_norm": 0.06011288985610008, "learning_rate": 6.738377064674378e-05, "loss": 0.2347, "step": 34404 }, { "epoch": 2.787184057031756, "grad_norm": 0.06475888192653656, "learning_rate": 6.737926999414916e-05, "loss": 0.2483, "step": 34405 }, { "epoch": 2.787265068049255, "grad_norm": 0.05519997701048851, "learning_rate": 6.737476934155453e-05, "loss": 0.2429, "step": 34406 }, { "epoch": 2.787346079066753, "grad_norm": 0.08073216676712036, "learning_rate": 6.73702686889599e-05, "loss": 0.2635, "step": 34407 }, { "epoch": 2.7874270900842513, "grad_norm": 0.06941085308790207, "learning_rate": 6.736576803636528e-05, "loss": 0.2153, "step": 34408 }, { "epoch": 2.78750810110175, "grad_norm": 0.08742102235555649, "learning_rate": 6.736126738377065e-05, "loss": 0.2539, "step": 34409 }, { "epoch": 2.787589112119248, "grad_norm": 0.06479343771934509, "learning_rate": 6.735676673117602e-05, "loss": 0.2463, "step": 34410 }, { "epoch": 2.7876701231367464, "grad_norm": 0.06715091317892075, "learning_rate": 6.73522660785814e-05, "loss": 0.2261, "step": 34411 }, { "epoch": 2.787751134154245, "grad_norm": 0.06692752987146378, "learning_rate": 6.734776542598677e-05, "loss": 0.2037, "step": 34412 }, { "epoch": 2.7878321451717434, "grad_norm": 0.07739755511283875, "learning_rate": 6.734326477339214e-05, "loss": 0.2517, "step": 34413 }, { "epoch": 2.7879131561892416, "grad_norm": 0.08224021643400192, "learning_rate": 6.733876412079752e-05, "loss": 0.2464, "step": 34414 }, { "epoch": 2.7879941672067403, "grad_norm": 0.06463365256786346, "learning_rate": 6.733426346820289e-05, "loss": 0.2182, "step": 34415 }, { "epoch": 2.7880751782242386, "grad_norm": 0.08047308027744293, "learning_rate": 6.732976281560827e-05, "loss": 0.252, "step": 34416 }, { "epoch": 2.788156189241737, "grad_norm": 0.06497808545827866, "learning_rate": 6.732526216301364e-05, "loss": 0.2045, "step": 34417 }, { "epoch": 2.7882372002592355, "grad_norm": 0.057299938052892685, "learning_rate": 6.732076151041901e-05, "loss": 0.2309, "step": 34418 }, { "epoch": 2.7883182112767337, "grad_norm": 0.0716933012008667, "learning_rate": 6.731626085782439e-05, "loss": 0.2534, "step": 34419 }, { "epoch": 2.788399222294232, "grad_norm": 0.0884045958518982, "learning_rate": 6.731176020522976e-05, "loss": 0.2376, "step": 34420 }, { "epoch": 2.7884802333117307, "grad_norm": 0.07025162875652313, "learning_rate": 6.730725955263513e-05, "loss": 0.2471, "step": 34421 }, { "epoch": 2.788561244329229, "grad_norm": 0.11640918254852295, "learning_rate": 6.730275890004051e-05, "loss": 0.2244, "step": 34422 }, { "epoch": 2.788642255346727, "grad_norm": 0.06777489930391312, "learning_rate": 6.729825824744588e-05, "loss": 0.2375, "step": 34423 }, { "epoch": 2.7887232663642254, "grad_norm": 0.06596884876489639, "learning_rate": 6.729375759485125e-05, "loss": 0.2492, "step": 34424 }, { "epoch": 2.7888042773817237, "grad_norm": 0.06826752424240112, "learning_rate": 6.728925694225663e-05, "loss": 0.2131, "step": 34425 }, { "epoch": 2.7888852883992223, "grad_norm": 0.07430961728096008, "learning_rate": 6.7284756289662e-05, "loss": 0.2756, "step": 34426 }, { "epoch": 2.7889662994167206, "grad_norm": 0.0647616907954216, "learning_rate": 6.728025563706737e-05, "loss": 0.2101, "step": 34427 }, { "epoch": 2.789047310434219, "grad_norm": 0.06215166300535202, "learning_rate": 6.727575498447275e-05, "loss": 0.2188, "step": 34428 }, { "epoch": 2.7891283214517175, "grad_norm": 0.06014298275113106, "learning_rate": 6.727125433187814e-05, "loss": 0.1843, "step": 34429 }, { "epoch": 2.7892093324692158, "grad_norm": 0.07539289444684982, "learning_rate": 6.72667536792835e-05, "loss": 0.2651, "step": 34430 }, { "epoch": 2.789290343486714, "grad_norm": 0.07783479988574982, "learning_rate": 6.726225302668887e-05, "loss": 0.2589, "step": 34431 }, { "epoch": 2.7893713545042127, "grad_norm": 0.07596423476934433, "learning_rate": 6.725775237409426e-05, "loss": 0.2412, "step": 34432 }, { "epoch": 2.789452365521711, "grad_norm": 0.06946752220392227, "learning_rate": 6.725325172149962e-05, "loss": 0.2539, "step": 34433 }, { "epoch": 2.789533376539209, "grad_norm": 0.07110365480184555, "learning_rate": 6.724875106890499e-05, "loss": 0.2496, "step": 34434 }, { "epoch": 2.789614387556708, "grad_norm": 0.06963635236024857, "learning_rate": 6.724425041631038e-05, "loss": 0.2613, "step": 34435 }, { "epoch": 2.789695398574206, "grad_norm": 0.07469368726015091, "learning_rate": 6.723974976371574e-05, "loss": 0.2121, "step": 34436 }, { "epoch": 2.7897764095917044, "grad_norm": 0.057353585958480835, "learning_rate": 6.723524911112111e-05, "loss": 0.2013, "step": 34437 }, { "epoch": 2.789857420609203, "grad_norm": 0.0705106109380722, "learning_rate": 6.72307484585265e-05, "loss": 0.2649, "step": 34438 }, { "epoch": 2.7899384316267013, "grad_norm": 0.07225330919027328, "learning_rate": 6.722624780593186e-05, "loss": 0.2561, "step": 34439 }, { "epoch": 2.7900194426441995, "grad_norm": 0.06563139706850052, "learning_rate": 6.722174715333723e-05, "loss": 0.2022, "step": 34440 }, { "epoch": 2.7901004536616982, "grad_norm": 0.06589397042989731, "learning_rate": 6.721724650074262e-05, "loss": 0.2353, "step": 34441 }, { "epoch": 2.7901814646791965, "grad_norm": 0.061312295496463776, "learning_rate": 6.721274584814798e-05, "loss": 0.1942, "step": 34442 }, { "epoch": 2.7902624756966947, "grad_norm": 0.0669531598687172, "learning_rate": 6.720824519555335e-05, "loss": 0.2218, "step": 34443 }, { "epoch": 2.790343486714193, "grad_norm": 0.06205807998776436, "learning_rate": 6.720374454295874e-05, "loss": 0.228, "step": 34444 }, { "epoch": 2.7904244977316917, "grad_norm": 0.08307838439941406, "learning_rate": 6.71992438903641e-05, "loss": 0.2183, "step": 34445 }, { "epoch": 2.79050550874919, "grad_norm": 0.07728546112775803, "learning_rate": 6.719474323776947e-05, "loss": 0.2474, "step": 34446 }, { "epoch": 2.790586519766688, "grad_norm": 0.07177817076444626, "learning_rate": 6.719024258517486e-05, "loss": 0.254, "step": 34447 }, { "epoch": 2.7906675307841864, "grad_norm": 0.06399939209222794, "learning_rate": 6.718574193258022e-05, "loss": 0.2457, "step": 34448 }, { "epoch": 2.790748541801685, "grad_norm": 0.05192526802420616, "learning_rate": 6.71812412799856e-05, "loss": 0.2214, "step": 34449 }, { "epoch": 2.7908295528191833, "grad_norm": 0.06940247118473053, "learning_rate": 6.717674062739098e-05, "loss": 0.2491, "step": 34450 }, { "epoch": 2.7909105638366816, "grad_norm": 0.07798627763986588, "learning_rate": 6.717223997479634e-05, "loss": 0.236, "step": 34451 }, { "epoch": 2.7909915748541803, "grad_norm": 0.06335878372192383, "learning_rate": 6.716773932220171e-05, "loss": 0.2106, "step": 34452 }, { "epoch": 2.7910725858716785, "grad_norm": 0.06506040692329407, "learning_rate": 6.71632386696071e-05, "loss": 0.2444, "step": 34453 }, { "epoch": 2.7911535968891767, "grad_norm": 0.06760771572589874, "learning_rate": 6.715873801701246e-05, "loss": 0.2147, "step": 34454 }, { "epoch": 2.7912346079066754, "grad_norm": 0.06756895035505295, "learning_rate": 6.715423736441785e-05, "loss": 0.273, "step": 34455 }, { "epoch": 2.7913156189241737, "grad_norm": 0.0767185315489769, "learning_rate": 6.714973671182322e-05, "loss": 0.2733, "step": 34456 }, { "epoch": 2.791396629941672, "grad_norm": 0.07195433229207993, "learning_rate": 6.714523605922858e-05, "loss": 0.2248, "step": 34457 }, { "epoch": 2.7914776409591706, "grad_norm": 0.05929771810770035, "learning_rate": 6.714073540663397e-05, "loss": 0.2299, "step": 34458 }, { "epoch": 2.791558651976669, "grad_norm": 0.06814391165971756, "learning_rate": 6.713623475403934e-05, "loss": 0.2515, "step": 34459 }, { "epoch": 2.791639662994167, "grad_norm": 0.07823874801397324, "learning_rate": 6.71317341014447e-05, "loss": 0.2584, "step": 34460 }, { "epoch": 2.791720674011666, "grad_norm": 0.0727526992559433, "learning_rate": 6.712723344885009e-05, "loss": 0.2264, "step": 34461 }, { "epoch": 2.791801685029164, "grad_norm": 0.06739646196365356, "learning_rate": 6.712273279625546e-05, "loss": 0.2707, "step": 34462 }, { "epoch": 2.7918826960466623, "grad_norm": 0.07312295585870743, "learning_rate": 6.711823214366082e-05, "loss": 0.2549, "step": 34463 }, { "epoch": 2.791963707064161, "grad_norm": 0.0585709773004055, "learning_rate": 6.711373149106621e-05, "loss": 0.2358, "step": 34464 }, { "epoch": 2.792044718081659, "grad_norm": 0.06079203262925148, "learning_rate": 6.710923083847159e-05, "loss": 0.2251, "step": 34465 }, { "epoch": 2.7921257290991575, "grad_norm": 0.07005365937948227, "learning_rate": 6.710473018587695e-05, "loss": 0.2099, "step": 34466 }, { "epoch": 2.7922067401166557, "grad_norm": 0.08667057752609253, "learning_rate": 6.710022953328233e-05, "loss": 0.2528, "step": 34467 }, { "epoch": 2.7922877511341544, "grad_norm": 0.07198949158191681, "learning_rate": 6.70957288806877e-05, "loss": 0.2678, "step": 34468 }, { "epoch": 2.7923687621516526, "grad_norm": 0.06842099130153656, "learning_rate": 6.709122822809307e-05, "loss": 0.2277, "step": 34469 }, { "epoch": 2.792449773169151, "grad_norm": 0.06861353665590286, "learning_rate": 6.708672757549845e-05, "loss": 0.2078, "step": 34470 }, { "epoch": 2.792530784186649, "grad_norm": 0.06622407585382462, "learning_rate": 6.708222692290383e-05, "loss": 0.219, "step": 34471 }, { "epoch": 2.792611795204148, "grad_norm": 0.05334659665822983, "learning_rate": 6.707772627030919e-05, "loss": 0.2134, "step": 34472 }, { "epoch": 2.792692806221646, "grad_norm": 0.07334712147712708, "learning_rate": 6.707322561771457e-05, "loss": 0.235, "step": 34473 }, { "epoch": 2.7927738172391443, "grad_norm": 0.06408771872520447, "learning_rate": 6.706872496511995e-05, "loss": 0.2147, "step": 34474 }, { "epoch": 2.792854828256643, "grad_norm": 0.07221581041812897, "learning_rate": 6.706422431252531e-05, "loss": 0.2565, "step": 34475 }, { "epoch": 2.7929358392741412, "grad_norm": 0.06033742427825928, "learning_rate": 6.70597236599307e-05, "loss": 0.2485, "step": 34476 }, { "epoch": 2.7930168502916395, "grad_norm": 0.07943415641784668, "learning_rate": 6.705522300733607e-05, "loss": 0.2359, "step": 34477 }, { "epoch": 2.793097861309138, "grad_norm": 0.0714966431260109, "learning_rate": 6.705072235474143e-05, "loss": 0.2412, "step": 34478 }, { "epoch": 2.7931788723266364, "grad_norm": 0.059110529720783234, "learning_rate": 6.704622170214682e-05, "loss": 0.2053, "step": 34479 }, { "epoch": 2.7932598833441347, "grad_norm": 0.08263637870550156, "learning_rate": 6.704172104955219e-05, "loss": 0.2521, "step": 34480 }, { "epoch": 2.7933408943616334, "grad_norm": 0.07647662609815598, "learning_rate": 6.703722039695756e-05, "loss": 0.2456, "step": 34481 }, { "epoch": 2.7934219053791316, "grad_norm": 0.06695931404829025, "learning_rate": 6.703271974436294e-05, "loss": 0.2433, "step": 34482 }, { "epoch": 2.79350291639663, "grad_norm": 0.06521735340356827, "learning_rate": 6.702821909176831e-05, "loss": 0.2418, "step": 34483 }, { "epoch": 2.7935839274141285, "grad_norm": 0.05986318364739418, "learning_rate": 6.702371843917368e-05, "loss": 0.2465, "step": 34484 }, { "epoch": 2.7936649384316268, "grad_norm": 0.07254021614789963, "learning_rate": 6.701921778657906e-05, "loss": 0.2622, "step": 34485 }, { "epoch": 2.793745949449125, "grad_norm": 0.07294338941574097, "learning_rate": 6.701471713398443e-05, "loss": 0.2323, "step": 34486 }, { "epoch": 2.7938269604666237, "grad_norm": 0.06299685686826706, "learning_rate": 6.70102164813898e-05, "loss": 0.2194, "step": 34487 }, { "epoch": 2.793907971484122, "grad_norm": 0.06214449554681778, "learning_rate": 6.700571582879518e-05, "loss": 0.2218, "step": 34488 }, { "epoch": 2.79398898250162, "grad_norm": 0.07161349803209305, "learning_rate": 6.700121517620055e-05, "loss": 0.2257, "step": 34489 }, { "epoch": 2.7940699935191184, "grad_norm": 0.06868092715740204, "learning_rate": 6.699671452360593e-05, "loss": 0.2494, "step": 34490 }, { "epoch": 2.794151004536617, "grad_norm": 0.07574149966239929, "learning_rate": 6.69922138710113e-05, "loss": 0.2281, "step": 34491 }, { "epoch": 2.7942320155541154, "grad_norm": 0.07077787071466446, "learning_rate": 6.698771321841667e-05, "loss": 0.2547, "step": 34492 }, { "epoch": 2.7943130265716136, "grad_norm": 0.061070095747709274, "learning_rate": 6.698321256582205e-05, "loss": 0.2976, "step": 34493 }, { "epoch": 2.794394037589112, "grad_norm": 0.06678925454616547, "learning_rate": 6.697871191322742e-05, "loss": 0.2325, "step": 34494 }, { "epoch": 2.7944750486066106, "grad_norm": 0.06714421510696411, "learning_rate": 6.69742112606328e-05, "loss": 0.2559, "step": 34495 }, { "epoch": 2.794556059624109, "grad_norm": 0.07086465507745743, "learning_rate": 6.696971060803817e-05, "loss": 0.2762, "step": 34496 }, { "epoch": 2.794637070641607, "grad_norm": 0.08439730852842331, "learning_rate": 6.696520995544354e-05, "loss": 0.2505, "step": 34497 }, { "epoch": 2.7947180816591057, "grad_norm": 0.06269598752260208, "learning_rate": 6.696070930284891e-05, "loss": 0.2165, "step": 34498 }, { "epoch": 2.794799092676604, "grad_norm": 0.05698813870549202, "learning_rate": 6.695620865025429e-05, "loss": 0.2581, "step": 34499 }, { "epoch": 2.7948801036941022, "grad_norm": 0.08211810886859894, "learning_rate": 6.695170799765966e-05, "loss": 0.2274, "step": 34500 }, { "epoch": 2.794961114711601, "grad_norm": 0.0799921527504921, "learning_rate": 6.694720734506504e-05, "loss": 0.2721, "step": 34501 }, { "epoch": 2.795042125729099, "grad_norm": 0.06881997734308243, "learning_rate": 6.694270669247041e-05, "loss": 0.2394, "step": 34502 }, { "epoch": 2.7951231367465974, "grad_norm": 0.06042630970478058, "learning_rate": 6.693820603987578e-05, "loss": 0.2451, "step": 34503 }, { "epoch": 2.795204147764096, "grad_norm": 0.06238381192088127, "learning_rate": 6.693370538728116e-05, "loss": 0.2208, "step": 34504 }, { "epoch": 2.7952851587815943, "grad_norm": 0.058415718376636505, "learning_rate": 6.692920473468653e-05, "loss": 0.25, "step": 34505 }, { "epoch": 2.7953661697990926, "grad_norm": 0.06427952647209167, "learning_rate": 6.69247040820919e-05, "loss": 0.2714, "step": 34506 }, { "epoch": 2.7954471808165913, "grad_norm": 0.06831841170787811, "learning_rate": 6.692020342949729e-05, "loss": 0.2201, "step": 34507 }, { "epoch": 2.7955281918340895, "grad_norm": 0.07188865542411804, "learning_rate": 6.691570277690265e-05, "loss": 0.2024, "step": 34508 }, { "epoch": 2.7956092028515878, "grad_norm": 0.07600495219230652, "learning_rate": 6.691120212430802e-05, "loss": 0.2454, "step": 34509 }, { "epoch": 2.7956902138690864, "grad_norm": 0.07487045228481293, "learning_rate": 6.690670147171341e-05, "loss": 0.2543, "step": 34510 }, { "epoch": 2.7957712248865847, "grad_norm": 0.07164686918258667, "learning_rate": 6.690220081911877e-05, "loss": 0.2616, "step": 34511 }, { "epoch": 2.795852235904083, "grad_norm": 0.07789607346057892, "learning_rate": 6.689770016652414e-05, "loss": 0.2309, "step": 34512 }, { "epoch": 2.795933246921581, "grad_norm": 0.06713174283504486, "learning_rate": 6.689319951392953e-05, "loss": 0.2241, "step": 34513 }, { "epoch": 2.79601425793908, "grad_norm": 0.07741440087556839, "learning_rate": 6.688869886133489e-05, "loss": 0.2458, "step": 34514 }, { "epoch": 2.796095268956578, "grad_norm": 0.0612313486635685, "learning_rate": 6.688419820874027e-05, "loss": 0.2272, "step": 34515 }, { "epoch": 2.7961762799740764, "grad_norm": 0.06217198818922043, "learning_rate": 6.687969755614565e-05, "loss": 0.2304, "step": 34516 }, { "epoch": 2.7962572909915746, "grad_norm": 0.0742369219660759, "learning_rate": 6.687519690355101e-05, "loss": 0.2653, "step": 34517 }, { "epoch": 2.7963383020090733, "grad_norm": 0.06669142842292786, "learning_rate": 6.687069625095639e-05, "loss": 0.2621, "step": 34518 }, { "epoch": 2.7964193130265715, "grad_norm": 0.09008178114891052, "learning_rate": 6.686619559836177e-05, "loss": 0.2696, "step": 34519 }, { "epoch": 2.79650032404407, "grad_norm": 0.06483547389507294, "learning_rate": 6.686169494576713e-05, "loss": 0.2261, "step": 34520 }, { "epoch": 2.7965813350615685, "grad_norm": 0.06703682988882065, "learning_rate": 6.685719429317251e-05, "loss": 0.253, "step": 34521 }, { "epoch": 2.7966623460790667, "grad_norm": 0.06760460138320923, "learning_rate": 6.68526936405779e-05, "loss": 0.2837, "step": 34522 }, { "epoch": 2.796743357096565, "grad_norm": 0.08956301212310791, "learning_rate": 6.684819298798325e-05, "loss": 0.2619, "step": 34523 }, { "epoch": 2.7968243681140637, "grad_norm": 0.07180722057819366, "learning_rate": 6.684369233538863e-05, "loss": 0.2567, "step": 34524 }, { "epoch": 2.796905379131562, "grad_norm": 0.06838277727365494, "learning_rate": 6.683919168279402e-05, "loss": 0.2282, "step": 34525 }, { "epoch": 2.79698639014906, "grad_norm": 0.06887766718864441, "learning_rate": 6.683469103019938e-05, "loss": 0.2045, "step": 34526 }, { "epoch": 2.797067401166559, "grad_norm": 0.07803060859441757, "learning_rate": 6.683019037760475e-05, "loss": 0.2324, "step": 34527 }, { "epoch": 2.797148412184057, "grad_norm": 0.08068870007991791, "learning_rate": 6.682568972501014e-05, "loss": 0.2478, "step": 34528 }, { "epoch": 2.7972294232015553, "grad_norm": 0.060097936540842056, "learning_rate": 6.68211890724155e-05, "loss": 0.2032, "step": 34529 }, { "epoch": 2.797310434219054, "grad_norm": 0.06428631395101547, "learning_rate": 6.681668841982087e-05, "loss": 0.2645, "step": 34530 }, { "epoch": 2.7973914452365523, "grad_norm": 0.0661778450012207, "learning_rate": 6.681218776722626e-05, "loss": 0.274, "step": 34531 }, { "epoch": 2.7974724562540505, "grad_norm": 0.07209504395723343, "learning_rate": 6.680768711463162e-05, "loss": 0.1972, "step": 34532 }, { "epoch": 2.797553467271549, "grad_norm": 0.07826335728168488, "learning_rate": 6.6803186462037e-05, "loss": 0.1785, "step": 34533 }, { "epoch": 2.7976344782890474, "grad_norm": 0.0779106393456459, "learning_rate": 6.679868580944238e-05, "loss": 0.2395, "step": 34534 }, { "epoch": 2.7977154893065457, "grad_norm": 0.07281313091516495, "learning_rate": 6.679418515684774e-05, "loss": 0.229, "step": 34535 }, { "epoch": 2.797796500324044, "grad_norm": 0.06805375218391418, "learning_rate": 6.678968450425312e-05, "loss": 0.2508, "step": 34536 }, { "epoch": 2.7978775113415426, "grad_norm": 0.07204117625951767, "learning_rate": 6.67851838516585e-05, "loss": 0.2444, "step": 34537 }, { "epoch": 2.797958522359041, "grad_norm": 0.06302698701620102, "learning_rate": 6.678068319906386e-05, "loss": 0.2394, "step": 34538 }, { "epoch": 2.798039533376539, "grad_norm": 0.06678730249404907, "learning_rate": 6.677618254646925e-05, "loss": 0.2558, "step": 34539 }, { "epoch": 2.7981205443940373, "grad_norm": 0.07123063504695892, "learning_rate": 6.677168189387462e-05, "loss": 0.2087, "step": 34540 }, { "epoch": 2.798201555411536, "grad_norm": 0.060089875012636185, "learning_rate": 6.676718124127998e-05, "loss": 0.2292, "step": 34541 }, { "epoch": 2.7982825664290343, "grad_norm": 0.06729140877723694, "learning_rate": 6.676268058868537e-05, "loss": 0.2311, "step": 34542 }, { "epoch": 2.7983635774465325, "grad_norm": 0.07628912478685379, "learning_rate": 6.675817993609074e-05, "loss": 0.2566, "step": 34543 }, { "epoch": 2.798444588464031, "grad_norm": 0.07927137613296509, "learning_rate": 6.67536792834961e-05, "loss": 0.2523, "step": 34544 }, { "epoch": 2.7985255994815295, "grad_norm": 0.06815194338560104, "learning_rate": 6.674917863090149e-05, "loss": 0.2686, "step": 34545 }, { "epoch": 2.7986066104990277, "grad_norm": 0.06721120327711105, "learning_rate": 6.674467797830686e-05, "loss": 0.2471, "step": 34546 }, { "epoch": 2.7986876215165264, "grad_norm": 0.08339305222034454, "learning_rate": 6.674017732571223e-05, "loss": 0.2074, "step": 34547 }, { "epoch": 2.7987686325340246, "grad_norm": 0.06246606260538101, "learning_rate": 6.673567667311761e-05, "loss": 0.2553, "step": 34548 }, { "epoch": 2.798849643551523, "grad_norm": 0.06168720871210098, "learning_rate": 6.673117602052298e-05, "loss": 0.2603, "step": 34549 }, { "epoch": 2.7989306545690216, "grad_norm": 0.0719793364405632, "learning_rate": 6.672667536792836e-05, "loss": 0.2338, "step": 34550 }, { "epoch": 2.79901166558652, "grad_norm": 0.07064522057771683, "learning_rate": 6.672217471533373e-05, "loss": 0.2486, "step": 34551 }, { "epoch": 2.799092676604018, "grad_norm": 0.07162480801343918, "learning_rate": 6.67176740627391e-05, "loss": 0.2603, "step": 34552 }, { "epoch": 2.7991736876215167, "grad_norm": 0.0879589319229126, "learning_rate": 6.671317341014448e-05, "loss": 0.2434, "step": 34553 }, { "epoch": 2.799254698639015, "grad_norm": 0.05829513818025589, "learning_rate": 6.670867275754985e-05, "loss": 0.2313, "step": 34554 }, { "epoch": 2.7993357096565132, "grad_norm": 0.07135823369026184, "learning_rate": 6.670417210495522e-05, "loss": 0.2476, "step": 34555 }, { "epoch": 2.799416720674012, "grad_norm": 0.06813136488199234, "learning_rate": 6.66996714523606e-05, "loss": 0.2315, "step": 34556 }, { "epoch": 2.79949773169151, "grad_norm": 0.08152391761541367, "learning_rate": 6.669517079976597e-05, "loss": 0.2475, "step": 34557 }, { "epoch": 2.7995787427090084, "grad_norm": 0.0726170465350151, "learning_rate": 6.669067014717134e-05, "loss": 0.2706, "step": 34558 }, { "epoch": 2.7996597537265067, "grad_norm": 0.08020608127117157, "learning_rate": 6.668616949457672e-05, "loss": 0.2555, "step": 34559 }, { "epoch": 2.7997407647440054, "grad_norm": 0.07207894325256348, "learning_rate": 6.668166884198209e-05, "loss": 0.2487, "step": 34560 }, { "epoch": 2.7998217757615036, "grad_norm": 0.07342177629470825, "learning_rate": 6.667716818938746e-05, "loss": 0.274, "step": 34561 }, { "epoch": 2.799902786779002, "grad_norm": 0.07004830241203308, "learning_rate": 6.667266753679284e-05, "loss": 0.22, "step": 34562 }, { "epoch": 2.7999837977965, "grad_norm": 0.06473211944103241, "learning_rate": 6.666816688419821e-05, "loss": 0.1806, "step": 34563 }, { "epoch": 2.8000648088139988, "grad_norm": 0.072107695043087, "learning_rate": 6.666366623160359e-05, "loss": 0.2916, "step": 34564 }, { "epoch": 2.800145819831497, "grad_norm": 0.07329043000936508, "learning_rate": 6.665916557900896e-05, "loss": 0.2855, "step": 34565 }, { "epoch": 2.8002268308489953, "grad_norm": 0.06471650302410126, "learning_rate": 6.665466492641433e-05, "loss": 0.2268, "step": 34566 }, { "epoch": 2.800307841866494, "grad_norm": 0.07123664766550064, "learning_rate": 6.66501642738197e-05, "loss": 0.2165, "step": 34567 }, { "epoch": 2.800388852883992, "grad_norm": 0.07238410413265228, "learning_rate": 6.664566362122508e-05, "loss": 0.233, "step": 34568 }, { "epoch": 2.8004698639014904, "grad_norm": 0.07660076022148132, "learning_rate": 6.664116296863045e-05, "loss": 0.2519, "step": 34569 }, { "epoch": 2.800550874918989, "grad_norm": 0.07034006714820862, "learning_rate": 6.663666231603583e-05, "loss": 0.2442, "step": 34570 }, { "epoch": 2.8006318859364874, "grad_norm": 0.0729314535856247, "learning_rate": 6.66321616634412e-05, "loss": 0.2307, "step": 34571 }, { "epoch": 2.8007128969539856, "grad_norm": 0.06865367293357849, "learning_rate": 6.662766101084657e-05, "loss": 0.2385, "step": 34572 }, { "epoch": 2.8007939079714843, "grad_norm": 0.0807727724313736, "learning_rate": 6.662316035825195e-05, "loss": 0.2429, "step": 34573 }, { "epoch": 2.8008749189889826, "grad_norm": 0.06009256839752197, "learning_rate": 6.661865970565732e-05, "loss": 0.2426, "step": 34574 }, { "epoch": 2.800955930006481, "grad_norm": 0.07074223458766937, "learning_rate": 6.66141590530627e-05, "loss": 0.2391, "step": 34575 }, { "epoch": 2.8010369410239795, "grad_norm": 0.06917184591293335, "learning_rate": 6.660965840046807e-05, "loss": 0.2406, "step": 34576 }, { "epoch": 2.8011179520414777, "grad_norm": 0.07436686754226685, "learning_rate": 6.660515774787344e-05, "loss": 0.282, "step": 34577 }, { "epoch": 2.801198963058976, "grad_norm": 0.0587419793009758, "learning_rate": 6.660065709527882e-05, "loss": 0.2045, "step": 34578 }, { "epoch": 2.8012799740764747, "grad_norm": 0.06786326318979263, "learning_rate": 6.659615644268419e-05, "loss": 0.2613, "step": 34579 }, { "epoch": 2.801360985093973, "grad_norm": 0.06299388408660889, "learning_rate": 6.659165579008956e-05, "loss": 0.2184, "step": 34580 }, { "epoch": 2.801441996111471, "grad_norm": 0.0794903039932251, "learning_rate": 6.658715513749494e-05, "loss": 0.2686, "step": 34581 }, { "epoch": 2.8015230071289694, "grad_norm": 0.06590180099010468, "learning_rate": 6.658265448490031e-05, "loss": 0.2272, "step": 34582 }, { "epoch": 2.8016040181464676, "grad_norm": 0.0644986629486084, "learning_rate": 6.657815383230568e-05, "loss": 0.2402, "step": 34583 }, { "epoch": 2.8016850291639663, "grad_norm": 0.06860768795013428, "learning_rate": 6.657365317971106e-05, "loss": 0.2406, "step": 34584 }, { "epoch": 2.8017660401814646, "grad_norm": 0.059577204287052155, "learning_rate": 6.656915252711643e-05, "loss": 0.2433, "step": 34585 }, { "epoch": 2.801847051198963, "grad_norm": 0.06488867849111557, "learning_rate": 6.65646518745218e-05, "loss": 0.2505, "step": 34586 }, { "epoch": 2.8019280622164615, "grad_norm": 0.0636620968580246, "learning_rate": 6.656015122192718e-05, "loss": 0.2132, "step": 34587 }, { "epoch": 2.8020090732339598, "grad_norm": 0.07555467635393143, "learning_rate": 6.655565056933257e-05, "loss": 0.2072, "step": 34588 }, { "epoch": 2.802090084251458, "grad_norm": 0.06727524101734161, "learning_rate": 6.655114991673793e-05, "loss": 0.2106, "step": 34589 }, { "epoch": 2.8021710952689567, "grad_norm": 0.09039086848497391, "learning_rate": 6.65466492641433e-05, "loss": 0.224, "step": 34590 }, { "epoch": 2.802252106286455, "grad_norm": 0.06671174615621567, "learning_rate": 6.654214861154869e-05, "loss": 0.2353, "step": 34591 }, { "epoch": 2.802333117303953, "grad_norm": 0.068624347448349, "learning_rate": 6.653764795895405e-05, "loss": 0.25, "step": 34592 }, { "epoch": 2.802414128321452, "grad_norm": 0.06567755341529846, "learning_rate": 6.653314730635942e-05, "loss": 0.2615, "step": 34593 }, { "epoch": 2.80249513933895, "grad_norm": 0.06661804765462875, "learning_rate": 6.652864665376481e-05, "loss": 0.2165, "step": 34594 }, { "epoch": 2.8025761503564484, "grad_norm": 0.07081003487110138, "learning_rate": 6.652414600117017e-05, "loss": 0.2194, "step": 34595 }, { "epoch": 2.802657161373947, "grad_norm": 0.07812508940696716, "learning_rate": 6.651964534857554e-05, "loss": 0.2238, "step": 34596 }, { "epoch": 2.8027381723914453, "grad_norm": 0.06472626328468323, "learning_rate": 6.651514469598093e-05, "loss": 0.2292, "step": 34597 }, { "epoch": 2.8028191834089435, "grad_norm": 0.06261005252599716, "learning_rate": 6.651064404338629e-05, "loss": 0.2384, "step": 34598 }, { "epoch": 2.8029001944264422, "grad_norm": 0.07326589524745941, "learning_rate": 6.650614339079166e-05, "loss": 0.2766, "step": 34599 }, { "epoch": 2.8029812054439405, "grad_norm": 0.07288602739572525, "learning_rate": 6.650164273819705e-05, "loss": 0.2294, "step": 34600 }, { "epoch": 2.8030622164614387, "grad_norm": 0.07847611606121063, "learning_rate": 6.649714208560241e-05, "loss": 0.2404, "step": 34601 }, { "epoch": 2.8031432274789374, "grad_norm": 0.06981007754802704, "learning_rate": 6.649264143300778e-05, "loss": 0.1985, "step": 34602 }, { "epoch": 2.8032242384964356, "grad_norm": 0.08531308174133301, "learning_rate": 6.648814078041317e-05, "loss": 0.2403, "step": 34603 }, { "epoch": 2.803305249513934, "grad_norm": 0.08525741845369339, "learning_rate": 6.648364012781853e-05, "loss": 0.2574, "step": 34604 }, { "epoch": 2.803386260531432, "grad_norm": 0.08389617502689362, "learning_rate": 6.64791394752239e-05, "loss": 0.2306, "step": 34605 }, { "epoch": 2.8034672715489304, "grad_norm": 0.072044737637043, "learning_rate": 6.647463882262929e-05, "loss": 0.2215, "step": 34606 }, { "epoch": 2.803548282566429, "grad_norm": 0.06642574071884155, "learning_rate": 6.647013817003465e-05, "loss": 0.2351, "step": 34607 }, { "epoch": 2.8036292935839273, "grad_norm": 0.061156339943408966, "learning_rate": 6.646563751744002e-05, "loss": 0.1957, "step": 34608 }, { "epoch": 2.8037103046014256, "grad_norm": 0.06726231426000595, "learning_rate": 6.646113686484541e-05, "loss": 0.2144, "step": 34609 }, { "epoch": 2.8037913156189243, "grad_norm": 0.06568557769060135, "learning_rate": 6.645663621225077e-05, "loss": 0.2513, "step": 34610 }, { "epoch": 2.8038723266364225, "grad_norm": 0.06943460553884506, "learning_rate": 6.645213555965614e-05, "loss": 0.2174, "step": 34611 }, { "epoch": 2.8039533376539207, "grad_norm": 0.06923588365316391, "learning_rate": 6.644763490706153e-05, "loss": 0.2228, "step": 34612 }, { "epoch": 2.8040343486714194, "grad_norm": 0.07375077158212662, "learning_rate": 6.644313425446689e-05, "loss": 0.229, "step": 34613 }, { "epoch": 2.8041153596889177, "grad_norm": 0.0803399607539177, "learning_rate": 6.643863360187228e-05, "loss": 0.2498, "step": 34614 }, { "epoch": 2.804196370706416, "grad_norm": 0.0667591392993927, "learning_rate": 6.643413294927765e-05, "loss": 0.2154, "step": 34615 }, { "epoch": 2.8042773817239146, "grad_norm": 0.07770081609487534, "learning_rate": 6.642963229668303e-05, "loss": 0.2353, "step": 34616 }, { "epoch": 2.804358392741413, "grad_norm": 0.07570865750312805, "learning_rate": 6.64251316440884e-05, "loss": 0.2543, "step": 34617 }, { "epoch": 2.804439403758911, "grad_norm": 0.08184947073459625, "learning_rate": 6.642063099149377e-05, "loss": 0.212, "step": 34618 }, { "epoch": 2.80452041477641, "grad_norm": 0.07105211913585663, "learning_rate": 6.641613033889915e-05, "loss": 0.2307, "step": 34619 }, { "epoch": 2.804601425793908, "grad_norm": 0.08429677039384842, "learning_rate": 6.641162968630452e-05, "loss": 0.2478, "step": 34620 }, { "epoch": 2.8046824368114063, "grad_norm": 0.06321967393159866, "learning_rate": 6.64071290337099e-05, "loss": 0.2593, "step": 34621 }, { "epoch": 2.804763447828905, "grad_norm": 0.07362980395555496, "learning_rate": 6.640262838111527e-05, "loss": 0.2551, "step": 34622 }, { "epoch": 2.804844458846403, "grad_norm": 0.053744908422231674, "learning_rate": 6.639812772852064e-05, "loss": 0.2146, "step": 34623 }, { "epoch": 2.8049254698639015, "grad_norm": 0.05200938135385513, "learning_rate": 6.639362707592602e-05, "loss": 0.2074, "step": 34624 }, { "epoch": 2.8050064808814, "grad_norm": 0.06889305263757706, "learning_rate": 6.638912642333139e-05, "loss": 0.2125, "step": 34625 }, { "epoch": 2.8050874918988984, "grad_norm": 0.06506326794624329, "learning_rate": 6.638462577073676e-05, "loss": 0.2397, "step": 34626 }, { "epoch": 2.8051685029163966, "grad_norm": 0.06494927406311035, "learning_rate": 6.638012511814214e-05, "loss": 0.2151, "step": 34627 }, { "epoch": 2.805249513933895, "grad_norm": 0.061213236302137375, "learning_rate": 6.637562446554751e-05, "loss": 0.2753, "step": 34628 }, { "epoch": 2.805330524951393, "grad_norm": 0.06733286380767822, "learning_rate": 6.637112381295288e-05, "loss": 0.2663, "step": 34629 }, { "epoch": 2.805411535968892, "grad_norm": 0.07056358456611633, "learning_rate": 6.636662316035826e-05, "loss": 0.2348, "step": 34630 }, { "epoch": 2.80549254698639, "grad_norm": 0.06755927205085754, "learning_rate": 6.636212250776363e-05, "loss": 0.2353, "step": 34631 }, { "epoch": 2.8055735580038883, "grad_norm": 0.06227118894457817, "learning_rate": 6.6357621855169e-05, "loss": 0.2439, "step": 34632 }, { "epoch": 2.805654569021387, "grad_norm": 0.07647748291492462, "learning_rate": 6.635312120257438e-05, "loss": 0.2632, "step": 34633 }, { "epoch": 2.8057355800388852, "grad_norm": 0.05834818631410599, "learning_rate": 6.634862054997975e-05, "loss": 0.1897, "step": 34634 }, { "epoch": 2.8058165910563835, "grad_norm": 0.06183220446109772, "learning_rate": 6.634411989738512e-05, "loss": 0.2031, "step": 34635 }, { "epoch": 2.805897602073882, "grad_norm": 0.0634341686964035, "learning_rate": 6.63396192447905e-05, "loss": 0.2026, "step": 34636 }, { "epoch": 2.8059786130913804, "grad_norm": 0.06907891482114792, "learning_rate": 6.633511859219587e-05, "loss": 0.2259, "step": 34637 }, { "epoch": 2.8060596241088787, "grad_norm": 0.06217336654663086, "learning_rate": 6.633061793960125e-05, "loss": 0.246, "step": 34638 }, { "epoch": 2.8061406351263773, "grad_norm": 0.0637386217713356, "learning_rate": 6.632611728700662e-05, "loss": 0.2425, "step": 34639 }, { "epoch": 2.8062216461438756, "grad_norm": 0.05497181415557861, "learning_rate": 6.632161663441199e-05, "loss": 0.2363, "step": 34640 }, { "epoch": 2.806302657161374, "grad_norm": 0.06874691694974899, "learning_rate": 6.631711598181737e-05, "loss": 0.2528, "step": 34641 }, { "epoch": 2.8063836681788725, "grad_norm": 0.060625869780778885, "learning_rate": 6.631261532922274e-05, "loss": 0.2129, "step": 34642 }, { "epoch": 2.8064646791963708, "grad_norm": 0.06054311618208885, "learning_rate": 6.630811467662811e-05, "loss": 0.205, "step": 34643 }, { "epoch": 2.806545690213869, "grad_norm": 0.0658852681517601, "learning_rate": 6.630361402403349e-05, "loss": 0.2381, "step": 34644 }, { "epoch": 2.8066267012313677, "grad_norm": 0.07085654884576797, "learning_rate": 6.629911337143886e-05, "loss": 0.2305, "step": 34645 }, { "epoch": 2.806707712248866, "grad_norm": 0.08350074291229248, "learning_rate": 6.629461271884423e-05, "loss": 0.2542, "step": 34646 }, { "epoch": 2.806788723266364, "grad_norm": 0.08466752618551254, "learning_rate": 6.629011206624961e-05, "loss": 0.2522, "step": 34647 }, { "epoch": 2.806869734283863, "grad_norm": 0.0648236945271492, "learning_rate": 6.628561141365498e-05, "loss": 0.2413, "step": 34648 }, { "epoch": 2.806950745301361, "grad_norm": 0.06670759618282318, "learning_rate": 6.628111076106036e-05, "loss": 0.2769, "step": 34649 }, { "epoch": 2.8070317563188594, "grad_norm": 0.07005154341459274, "learning_rate": 6.627661010846573e-05, "loss": 0.2387, "step": 34650 }, { "epoch": 2.8071127673363576, "grad_norm": 0.07206209748983383, "learning_rate": 6.62721094558711e-05, "loss": 0.2357, "step": 34651 }, { "epoch": 2.807193778353856, "grad_norm": 0.07660338282585144, "learning_rate": 6.626760880327648e-05, "loss": 0.2409, "step": 34652 }, { "epoch": 2.8072747893713546, "grad_norm": 0.06903325766324997, "learning_rate": 6.626310815068185e-05, "loss": 0.249, "step": 34653 }, { "epoch": 2.807355800388853, "grad_norm": 0.06659288704395294, "learning_rate": 6.625860749808722e-05, "loss": 0.2152, "step": 34654 }, { "epoch": 2.807436811406351, "grad_norm": 0.05893072485923767, "learning_rate": 6.62541068454926e-05, "loss": 0.2092, "step": 34655 }, { "epoch": 2.8075178224238497, "grad_norm": 0.07203570753335953, "learning_rate": 6.624960619289797e-05, "loss": 0.2303, "step": 34656 }, { "epoch": 2.807598833441348, "grad_norm": 0.06032579019665718, "learning_rate": 6.624510554030334e-05, "loss": 0.2224, "step": 34657 }, { "epoch": 2.807679844458846, "grad_norm": 0.08079580217599869, "learning_rate": 6.624060488770872e-05, "loss": 0.26, "step": 34658 }, { "epoch": 2.807760855476345, "grad_norm": 0.07629918307065964, "learning_rate": 6.623610423511409e-05, "loss": 0.22, "step": 34659 }, { "epoch": 2.807841866493843, "grad_norm": 0.0819818377494812, "learning_rate": 6.623160358251947e-05, "loss": 0.2332, "step": 34660 }, { "epoch": 2.8079228775113414, "grad_norm": 0.07031166553497314, "learning_rate": 6.622710292992484e-05, "loss": 0.2707, "step": 34661 }, { "epoch": 2.80800388852884, "grad_norm": 0.07032985240221024, "learning_rate": 6.622260227733021e-05, "loss": 0.2258, "step": 34662 }, { "epoch": 2.8080848995463383, "grad_norm": 0.07309204339981079, "learning_rate": 6.621810162473559e-05, "loss": 0.2405, "step": 34663 }, { "epoch": 2.8081659105638366, "grad_norm": 0.06003347039222717, "learning_rate": 6.621360097214096e-05, "loss": 0.1924, "step": 34664 }, { "epoch": 2.8082469215813353, "grad_norm": 0.08050237596035004, "learning_rate": 6.620910031954633e-05, "loss": 0.2514, "step": 34665 }, { "epoch": 2.8083279325988335, "grad_norm": 0.06988146901130676, "learning_rate": 6.620459966695172e-05, "loss": 0.2691, "step": 34666 }, { "epoch": 2.8084089436163318, "grad_norm": 0.06781960278749466, "learning_rate": 6.620009901435708e-05, "loss": 0.2264, "step": 34667 }, { "epoch": 2.8084899546338304, "grad_norm": 0.07960690557956696, "learning_rate": 6.619559836176245e-05, "loss": 0.2934, "step": 34668 }, { "epoch": 2.8085709656513287, "grad_norm": 0.08437389880418777, "learning_rate": 6.619109770916784e-05, "loss": 0.3173, "step": 34669 }, { "epoch": 2.808651976668827, "grad_norm": 0.07166004180908203, "learning_rate": 6.61865970565732e-05, "loss": 0.2455, "step": 34670 }, { "epoch": 2.808732987686325, "grad_norm": 0.0753786712884903, "learning_rate": 6.618209640397857e-05, "loss": 0.2823, "step": 34671 }, { "epoch": 2.808813998703824, "grad_norm": 0.0854075700044632, "learning_rate": 6.617759575138396e-05, "loss": 0.2383, "step": 34672 }, { "epoch": 2.808895009721322, "grad_norm": 0.06980396062135696, "learning_rate": 6.617309509878932e-05, "loss": 0.2351, "step": 34673 }, { "epoch": 2.8089760207388204, "grad_norm": 0.06357509642839432, "learning_rate": 6.61685944461947e-05, "loss": 0.2088, "step": 34674 }, { "epoch": 2.8090570317563186, "grad_norm": 0.05988110974431038, "learning_rate": 6.616409379360008e-05, "loss": 0.2062, "step": 34675 }, { "epoch": 2.8091380427738173, "grad_norm": 0.07213082164525986, "learning_rate": 6.615959314100544e-05, "loss": 0.2386, "step": 34676 }, { "epoch": 2.8092190537913155, "grad_norm": 0.08268000185489655, "learning_rate": 6.615509248841082e-05, "loss": 0.2764, "step": 34677 }, { "epoch": 2.809300064808814, "grad_norm": 0.05751238018274307, "learning_rate": 6.61505918358162e-05, "loss": 0.2063, "step": 34678 }, { "epoch": 2.8093810758263125, "grad_norm": 0.0581294521689415, "learning_rate": 6.614609118322156e-05, "loss": 0.1949, "step": 34679 }, { "epoch": 2.8094620868438107, "grad_norm": 0.07043235749006271, "learning_rate": 6.614159053062694e-05, "loss": 0.2478, "step": 34680 }, { "epoch": 2.809543097861309, "grad_norm": 0.07290154695510864, "learning_rate": 6.613708987803232e-05, "loss": 0.2856, "step": 34681 }, { "epoch": 2.8096241088788076, "grad_norm": 0.07745785266160965, "learning_rate": 6.613258922543768e-05, "loss": 0.2073, "step": 34682 }, { "epoch": 2.809705119896306, "grad_norm": 0.07462266832590103, "learning_rate": 6.612808857284306e-05, "loss": 0.2184, "step": 34683 }, { "epoch": 2.809786130913804, "grad_norm": 0.0726090669631958, "learning_rate": 6.612358792024845e-05, "loss": 0.2314, "step": 34684 }, { "epoch": 2.809867141931303, "grad_norm": 0.07967076450586319, "learning_rate": 6.611908726765382e-05, "loss": 0.2429, "step": 34685 }, { "epoch": 2.809948152948801, "grad_norm": 0.06095170974731445, "learning_rate": 6.611458661505918e-05, "loss": 0.2106, "step": 34686 }, { "epoch": 2.8100291639662993, "grad_norm": 0.06986818462610245, "learning_rate": 6.611008596246457e-05, "loss": 0.2225, "step": 34687 }, { "epoch": 2.810110174983798, "grad_norm": 0.06797707825899124, "learning_rate": 6.610558530986994e-05, "loss": 0.2099, "step": 34688 }, { "epoch": 2.8101911860012962, "grad_norm": 0.06992008537054062, "learning_rate": 6.61010846572753e-05, "loss": 0.2615, "step": 34689 }, { "epoch": 2.8102721970187945, "grad_norm": 0.0866551622748375, "learning_rate": 6.609658400468069e-05, "loss": 0.2792, "step": 34690 }, { "epoch": 2.810353208036293, "grad_norm": 0.06725787371397018, "learning_rate": 6.609208335208606e-05, "loss": 0.2464, "step": 34691 }, { "epoch": 2.8104342190537914, "grad_norm": 0.06845443695783615, "learning_rate": 6.608758269949143e-05, "loss": 0.239, "step": 34692 }, { "epoch": 2.8105152300712897, "grad_norm": 0.07654809206724167, "learning_rate": 6.608308204689681e-05, "loss": 0.2886, "step": 34693 }, { "epoch": 2.810596241088788, "grad_norm": 0.0735306441783905, "learning_rate": 6.607858139430218e-05, "loss": 0.2403, "step": 34694 }, { "epoch": 2.8106772521062866, "grad_norm": 0.06867165863513947, "learning_rate": 6.607408074170755e-05, "loss": 0.2408, "step": 34695 }, { "epoch": 2.810758263123785, "grad_norm": 0.06796415150165558, "learning_rate": 6.606958008911293e-05, "loss": 0.2632, "step": 34696 }, { "epoch": 2.810839274141283, "grad_norm": 0.07219690829515457, "learning_rate": 6.60650794365183e-05, "loss": 0.22, "step": 34697 }, { "epoch": 2.8109202851587813, "grad_norm": 0.0721060186624527, "learning_rate": 6.606057878392368e-05, "loss": 0.2405, "step": 34698 }, { "epoch": 2.81100129617628, "grad_norm": 0.06989874690771103, "learning_rate": 6.605607813132905e-05, "loss": 0.2185, "step": 34699 }, { "epoch": 2.8110823071937783, "grad_norm": 0.07357072085142136, "learning_rate": 6.605157747873442e-05, "loss": 0.2446, "step": 34700 }, { "epoch": 2.8111633182112765, "grad_norm": 0.06997592002153397, "learning_rate": 6.60470768261398e-05, "loss": 0.2355, "step": 34701 }, { "epoch": 2.811244329228775, "grad_norm": 0.06015832722187042, "learning_rate": 6.604257617354517e-05, "loss": 0.2095, "step": 34702 }, { "epoch": 2.8113253402462735, "grad_norm": 0.06459351629018784, "learning_rate": 6.603807552095054e-05, "loss": 0.2295, "step": 34703 }, { "epoch": 2.8114063512637717, "grad_norm": 0.06236038729548454, "learning_rate": 6.603357486835592e-05, "loss": 0.2424, "step": 34704 }, { "epoch": 2.8114873622812704, "grad_norm": 0.0749562606215477, "learning_rate": 6.602907421576129e-05, "loss": 0.2212, "step": 34705 }, { "epoch": 2.8115683732987686, "grad_norm": 0.06482082605361938, "learning_rate": 6.602457356316666e-05, "loss": 0.2116, "step": 34706 }, { "epoch": 2.811649384316267, "grad_norm": 0.06248362362384796, "learning_rate": 6.602007291057204e-05, "loss": 0.196, "step": 34707 }, { "epoch": 2.8117303953337656, "grad_norm": 0.0831056535243988, "learning_rate": 6.601557225797741e-05, "loss": 0.2368, "step": 34708 }, { "epoch": 2.811811406351264, "grad_norm": 0.07669353485107422, "learning_rate": 6.601107160538279e-05, "loss": 0.2473, "step": 34709 }, { "epoch": 2.811892417368762, "grad_norm": 0.06788359582424164, "learning_rate": 6.600657095278816e-05, "loss": 0.2364, "step": 34710 }, { "epoch": 2.8119734283862607, "grad_norm": 0.07720784842967987, "learning_rate": 6.600207030019353e-05, "loss": 0.255, "step": 34711 }, { "epoch": 2.812054439403759, "grad_norm": 0.07028775662183762, "learning_rate": 6.59975696475989e-05, "loss": 0.2154, "step": 34712 }, { "epoch": 2.8121354504212572, "grad_norm": 0.07846012711524963, "learning_rate": 6.599306899500428e-05, "loss": 0.2551, "step": 34713 }, { "epoch": 2.812216461438756, "grad_norm": 0.05860767886042595, "learning_rate": 6.598856834240965e-05, "loss": 0.2109, "step": 34714 }, { "epoch": 2.812297472456254, "grad_norm": 0.06866607069969177, "learning_rate": 6.598406768981503e-05, "loss": 0.2356, "step": 34715 }, { "epoch": 2.8123784834737524, "grad_norm": 0.0720444768667221, "learning_rate": 6.59795670372204e-05, "loss": 0.2537, "step": 34716 }, { "epoch": 2.8124594944912507, "grad_norm": 0.07664826512336731, "learning_rate": 6.597506638462577e-05, "loss": 0.2701, "step": 34717 }, { "epoch": 2.8125405055087493, "grad_norm": 0.06196041405200958, "learning_rate": 6.597056573203115e-05, "loss": 0.2354, "step": 34718 }, { "epoch": 2.8126215165262476, "grad_norm": 0.07691022008657455, "learning_rate": 6.596606507943652e-05, "loss": 0.2415, "step": 34719 }, { "epoch": 2.812702527543746, "grad_norm": 0.05945818871259689, "learning_rate": 6.59615644268419e-05, "loss": 0.2418, "step": 34720 }, { "epoch": 2.812783538561244, "grad_norm": 0.06513824313879013, "learning_rate": 6.595706377424727e-05, "loss": 0.2605, "step": 34721 }, { "epoch": 2.8128645495787428, "grad_norm": 0.08122964203357697, "learning_rate": 6.595256312165264e-05, "loss": 0.2643, "step": 34722 }, { "epoch": 2.812945560596241, "grad_norm": 0.06063034385442734, "learning_rate": 6.594806246905802e-05, "loss": 0.2454, "step": 34723 }, { "epoch": 2.8130265716137393, "grad_norm": 0.059998977929353714, "learning_rate": 6.594356181646339e-05, "loss": 0.2002, "step": 34724 }, { "epoch": 2.813107582631238, "grad_norm": 0.07546351104974747, "learning_rate": 6.593906116386876e-05, "loss": 0.2441, "step": 34725 }, { "epoch": 2.813188593648736, "grad_norm": 0.07578836381435394, "learning_rate": 6.593456051127414e-05, "loss": 0.2553, "step": 34726 }, { "epoch": 2.8132696046662344, "grad_norm": 0.07513416558504105, "learning_rate": 6.593005985867951e-05, "loss": 0.2314, "step": 34727 }, { "epoch": 2.813350615683733, "grad_norm": 0.07094703614711761, "learning_rate": 6.592555920608488e-05, "loss": 0.2104, "step": 34728 }, { "epoch": 2.8134316267012314, "grad_norm": 0.06131584197282791, "learning_rate": 6.592105855349026e-05, "loss": 0.2389, "step": 34729 }, { "epoch": 2.8135126377187296, "grad_norm": 0.06059715896844864, "learning_rate": 6.591655790089563e-05, "loss": 0.2167, "step": 34730 }, { "epoch": 2.8135936487362283, "grad_norm": 0.08900104463100433, "learning_rate": 6.5912057248301e-05, "loss": 0.2172, "step": 34731 }, { "epoch": 2.8136746597537265, "grad_norm": 0.06046757847070694, "learning_rate": 6.590755659570638e-05, "loss": 0.2062, "step": 34732 }, { "epoch": 2.813755670771225, "grad_norm": 0.06872811168432236, "learning_rate": 6.590305594311175e-05, "loss": 0.2142, "step": 34733 }, { "epoch": 2.8138366817887235, "grad_norm": 0.07165510952472687, "learning_rate": 6.589855529051713e-05, "loss": 0.2444, "step": 34734 }, { "epoch": 2.8139176928062217, "grad_norm": 0.06559931486845016, "learning_rate": 6.58940546379225e-05, "loss": 0.2244, "step": 34735 }, { "epoch": 2.81399870382372, "grad_norm": 0.061313237994909286, "learning_rate": 6.588955398532787e-05, "loss": 0.2403, "step": 34736 }, { "epoch": 2.8140797148412187, "grad_norm": 0.07175567001104355, "learning_rate": 6.588505333273325e-05, "loss": 0.2025, "step": 34737 }, { "epoch": 2.814160725858717, "grad_norm": 0.07134431600570679, "learning_rate": 6.588055268013862e-05, "loss": 0.227, "step": 34738 }, { "epoch": 2.814241736876215, "grad_norm": 0.07444427162408829, "learning_rate": 6.5876052027544e-05, "loss": 0.2227, "step": 34739 }, { "epoch": 2.8143227478937134, "grad_norm": 0.0755746141076088, "learning_rate": 6.587155137494937e-05, "loss": 0.2617, "step": 34740 }, { "epoch": 2.814403758911212, "grad_norm": 0.07173725217580795, "learning_rate": 6.586705072235474e-05, "loss": 0.2281, "step": 34741 }, { "epoch": 2.8144847699287103, "grad_norm": 0.08544918894767761, "learning_rate": 6.586255006976011e-05, "loss": 0.2604, "step": 34742 }, { "epoch": 2.8145657809462086, "grad_norm": 0.08578377962112427, "learning_rate": 6.585804941716549e-05, "loss": 0.2231, "step": 34743 }, { "epoch": 2.814646791963707, "grad_norm": 0.04919799044728279, "learning_rate": 6.585354876457086e-05, "loss": 0.2155, "step": 34744 }, { "epoch": 2.8147278029812055, "grad_norm": 0.07585250586271286, "learning_rate": 6.584904811197623e-05, "loss": 0.2356, "step": 34745 }, { "epoch": 2.8148088139987038, "grad_norm": 0.08482617139816284, "learning_rate": 6.584454745938161e-05, "loss": 0.2677, "step": 34746 }, { "epoch": 2.814889825016202, "grad_norm": 0.08311016857624054, "learning_rate": 6.5840046806787e-05, "loss": 0.2143, "step": 34747 }, { "epoch": 2.8149708360337007, "grad_norm": 0.07984510809183121, "learning_rate": 6.583554615419236e-05, "loss": 0.287, "step": 34748 }, { "epoch": 2.815051847051199, "grad_norm": 0.058721333742141724, "learning_rate": 6.583104550159773e-05, "loss": 0.2335, "step": 34749 }, { "epoch": 2.815132858068697, "grad_norm": 0.07456637173891068, "learning_rate": 6.582654484900312e-05, "loss": 0.2607, "step": 34750 }, { "epoch": 2.815213869086196, "grad_norm": 0.06956981122493744, "learning_rate": 6.582204419640848e-05, "loss": 0.2164, "step": 34751 }, { "epoch": 2.815294880103694, "grad_norm": 0.05485118553042412, "learning_rate": 6.581754354381385e-05, "loss": 0.2154, "step": 34752 }, { "epoch": 2.8153758911211924, "grad_norm": 0.0642561987042427, "learning_rate": 6.581304289121924e-05, "loss": 0.2323, "step": 34753 }, { "epoch": 2.815456902138691, "grad_norm": 0.08512634038925171, "learning_rate": 6.580854223862461e-05, "loss": 0.2392, "step": 34754 }, { "epoch": 2.8155379131561893, "grad_norm": 0.07992500811815262, "learning_rate": 6.580404158602997e-05, "loss": 0.2507, "step": 34755 }, { "epoch": 2.8156189241736875, "grad_norm": 0.06383884698152542, "learning_rate": 6.579954093343536e-05, "loss": 0.2258, "step": 34756 }, { "epoch": 2.815699935191186, "grad_norm": 0.07852292060852051, "learning_rate": 6.579504028084073e-05, "loss": 0.2868, "step": 34757 }, { "epoch": 2.8157809462086845, "grad_norm": 0.06256461888551712, "learning_rate": 6.579053962824609e-05, "loss": 0.2594, "step": 34758 }, { "epoch": 2.8158619572261827, "grad_norm": 0.06567879021167755, "learning_rate": 6.578603897565148e-05, "loss": 0.241, "step": 34759 }, { "epoch": 2.8159429682436814, "grad_norm": 0.08263526856899261, "learning_rate": 6.578153832305685e-05, "loss": 0.2563, "step": 34760 }, { "epoch": 2.8160239792611796, "grad_norm": 0.05803782120347023, "learning_rate": 6.577703767046221e-05, "loss": 0.2702, "step": 34761 }, { "epoch": 2.816104990278678, "grad_norm": 0.06257728487253189, "learning_rate": 6.57725370178676e-05, "loss": 0.2379, "step": 34762 }, { "epoch": 2.816186001296176, "grad_norm": 0.0719677284359932, "learning_rate": 6.576803636527297e-05, "loss": 0.2398, "step": 34763 }, { "epoch": 2.816267012313675, "grad_norm": 0.06265591830015182, "learning_rate": 6.576353571267833e-05, "loss": 0.2097, "step": 34764 }, { "epoch": 2.816348023331173, "grad_norm": 0.06853341311216354, "learning_rate": 6.575903506008372e-05, "loss": 0.2298, "step": 34765 }, { "epoch": 2.8164290343486713, "grad_norm": 0.07417232543230057, "learning_rate": 6.57545344074891e-05, "loss": 0.2361, "step": 34766 }, { "epoch": 2.8165100453661696, "grad_norm": 0.06885260343551636, "learning_rate": 6.575003375489445e-05, "loss": 0.2339, "step": 34767 }, { "epoch": 2.8165910563836682, "grad_norm": 0.07457751035690308, "learning_rate": 6.574553310229984e-05, "loss": 0.2434, "step": 34768 }, { "epoch": 2.8166720674011665, "grad_norm": 0.0838153213262558, "learning_rate": 6.574103244970521e-05, "loss": 0.2571, "step": 34769 }, { "epoch": 2.8167530784186647, "grad_norm": 0.0747385174036026, "learning_rate": 6.573653179711057e-05, "loss": 0.2562, "step": 34770 }, { "epoch": 2.8168340894361634, "grad_norm": 0.06462652236223221, "learning_rate": 6.573203114451596e-05, "loss": 0.2311, "step": 34771 }, { "epoch": 2.8169151004536617, "grad_norm": 0.06499453634023666, "learning_rate": 6.572753049192134e-05, "loss": 0.2292, "step": 34772 }, { "epoch": 2.81699611147116, "grad_norm": 0.07763504236936569, "learning_rate": 6.572302983932671e-05, "loss": 0.275, "step": 34773 }, { "epoch": 2.8170771224886586, "grad_norm": 0.07104837894439697, "learning_rate": 6.571852918673208e-05, "loss": 0.2451, "step": 34774 }, { "epoch": 2.817158133506157, "grad_norm": 0.06204180419445038, "learning_rate": 6.571402853413746e-05, "loss": 0.2286, "step": 34775 }, { "epoch": 2.817239144523655, "grad_norm": 0.060844436287879944, "learning_rate": 6.570952788154283e-05, "loss": 0.2389, "step": 34776 }, { "epoch": 2.817320155541154, "grad_norm": 0.07024809718132019, "learning_rate": 6.57050272289482e-05, "loss": 0.2467, "step": 34777 }, { "epoch": 2.817401166558652, "grad_norm": 0.06422761082649231, "learning_rate": 6.570052657635358e-05, "loss": 0.231, "step": 34778 }, { "epoch": 2.8174821775761503, "grad_norm": 0.066131092607975, "learning_rate": 6.569602592375895e-05, "loss": 0.2645, "step": 34779 }, { "epoch": 2.817563188593649, "grad_norm": 0.08315324038267136, "learning_rate": 6.569152527116432e-05, "loss": 0.2272, "step": 34780 }, { "epoch": 2.817644199611147, "grad_norm": 0.07713814824819565, "learning_rate": 6.56870246185697e-05, "loss": 0.2293, "step": 34781 }, { "epoch": 2.8177252106286454, "grad_norm": 0.08167970180511475, "learning_rate": 6.568252396597507e-05, "loss": 0.2669, "step": 34782 }, { "epoch": 2.817806221646144, "grad_norm": 0.060405585914850235, "learning_rate": 6.567802331338045e-05, "loss": 0.2059, "step": 34783 }, { "epoch": 2.8178872326636424, "grad_norm": 0.05984266847372055, "learning_rate": 6.567352266078582e-05, "loss": 0.2416, "step": 34784 }, { "epoch": 2.8179682436811406, "grad_norm": 0.0785268023610115, "learning_rate": 6.566902200819119e-05, "loss": 0.2622, "step": 34785 }, { "epoch": 2.818049254698639, "grad_norm": 0.06491284817457199, "learning_rate": 6.566452135559657e-05, "loss": 0.2012, "step": 34786 }, { "epoch": 2.818130265716137, "grad_norm": 0.07182303071022034, "learning_rate": 6.566002070300194e-05, "loss": 0.1987, "step": 34787 }, { "epoch": 2.818211276733636, "grad_norm": 0.06826022267341614, "learning_rate": 6.565552005040731e-05, "loss": 0.2458, "step": 34788 }, { "epoch": 2.818292287751134, "grad_norm": 0.07355841994285583, "learning_rate": 6.565101939781269e-05, "loss": 0.2204, "step": 34789 }, { "epoch": 2.8183732987686323, "grad_norm": 0.07428918778896332, "learning_rate": 6.564651874521806e-05, "loss": 0.2811, "step": 34790 }, { "epoch": 2.818454309786131, "grad_norm": 0.06705375015735626, "learning_rate": 6.564201809262343e-05, "loss": 0.2766, "step": 34791 }, { "epoch": 2.8185353208036292, "grad_norm": 0.07119186222553253, "learning_rate": 6.563751744002881e-05, "loss": 0.2485, "step": 34792 }, { "epoch": 2.8186163318211275, "grad_norm": 0.06527364999055862, "learning_rate": 6.563301678743418e-05, "loss": 0.1968, "step": 34793 }, { "epoch": 2.818697342838626, "grad_norm": 0.06532592326402664, "learning_rate": 6.562851613483956e-05, "loss": 0.2672, "step": 34794 }, { "epoch": 2.8187783538561244, "grad_norm": 0.06652968376874924, "learning_rate": 6.562401548224493e-05, "loss": 0.221, "step": 34795 }, { "epoch": 2.8188593648736227, "grad_norm": 0.07571815699338913, "learning_rate": 6.56195148296503e-05, "loss": 0.2233, "step": 34796 }, { "epoch": 2.8189403758911213, "grad_norm": 0.07855816185474396, "learning_rate": 6.561501417705568e-05, "loss": 0.253, "step": 34797 }, { "epoch": 2.8190213869086196, "grad_norm": 0.07121338695287704, "learning_rate": 6.561051352446105e-05, "loss": 0.2578, "step": 34798 }, { "epoch": 2.819102397926118, "grad_norm": 0.06593113392591476, "learning_rate": 6.560601287186642e-05, "loss": 0.212, "step": 34799 }, { "epoch": 2.8191834089436165, "grad_norm": 0.07159001380205154, "learning_rate": 6.56015122192718e-05, "loss": 0.2365, "step": 34800 }, { "epoch": 2.8192644199611148, "grad_norm": 0.06383642554283142, "learning_rate": 6.559701156667717e-05, "loss": 0.1856, "step": 34801 }, { "epoch": 2.819345430978613, "grad_norm": 0.06708572059869766, "learning_rate": 6.559251091408254e-05, "loss": 0.2248, "step": 34802 }, { "epoch": 2.8194264419961117, "grad_norm": 0.06649935245513916, "learning_rate": 6.558801026148792e-05, "loss": 0.2095, "step": 34803 }, { "epoch": 2.81950745301361, "grad_norm": 0.06844470649957657, "learning_rate": 6.558350960889329e-05, "loss": 0.2459, "step": 34804 }, { "epoch": 2.819588464031108, "grad_norm": 0.06876395642757416, "learning_rate": 6.557900895629866e-05, "loss": 0.2228, "step": 34805 }, { "epoch": 2.819669475048607, "grad_norm": 0.07152493298053741, "learning_rate": 6.557450830370404e-05, "loss": 0.2422, "step": 34806 }, { "epoch": 2.819750486066105, "grad_norm": 0.075772725045681, "learning_rate": 6.557000765110941e-05, "loss": 0.2311, "step": 34807 }, { "epoch": 2.8198314970836034, "grad_norm": 0.05591581016778946, "learning_rate": 6.556550699851479e-05, "loss": 0.2257, "step": 34808 }, { "epoch": 2.8199125081011016, "grad_norm": 0.0721067413687706, "learning_rate": 6.556100634592016e-05, "loss": 0.2546, "step": 34809 }, { "epoch": 2.8199935191186, "grad_norm": 0.04921577125787735, "learning_rate": 6.555650569332553e-05, "loss": 0.1953, "step": 34810 }, { "epoch": 2.8200745301360985, "grad_norm": 0.07357095181941986, "learning_rate": 6.55520050407309e-05, "loss": 0.2555, "step": 34811 }, { "epoch": 2.820155541153597, "grad_norm": 0.08259088546037674, "learning_rate": 6.554750438813628e-05, "loss": 0.2466, "step": 34812 }, { "epoch": 2.820236552171095, "grad_norm": 0.07193511724472046, "learning_rate": 6.554300373554165e-05, "loss": 0.2403, "step": 34813 }, { "epoch": 2.8203175631885937, "grad_norm": 0.0642080307006836, "learning_rate": 6.553850308294703e-05, "loss": 0.2419, "step": 34814 }, { "epoch": 2.820398574206092, "grad_norm": 0.10085230320692062, "learning_rate": 6.55340024303524e-05, "loss": 0.2357, "step": 34815 }, { "epoch": 2.82047958522359, "grad_norm": 0.07006397098302841, "learning_rate": 6.552950177775777e-05, "loss": 0.2743, "step": 34816 }, { "epoch": 2.820560596241089, "grad_norm": 0.057857707142829895, "learning_rate": 6.552500112516315e-05, "loss": 0.2454, "step": 34817 }, { "epoch": 2.820641607258587, "grad_norm": 0.06146040931344032, "learning_rate": 6.552050047256852e-05, "loss": 0.1922, "step": 34818 }, { "epoch": 2.8207226182760854, "grad_norm": 0.09001678228378296, "learning_rate": 6.55159998199739e-05, "loss": 0.2536, "step": 34819 }, { "epoch": 2.820803629293584, "grad_norm": 0.07994525879621506, "learning_rate": 6.551149916737928e-05, "loss": 0.2718, "step": 34820 }, { "epoch": 2.8208846403110823, "grad_norm": 0.059684574604034424, "learning_rate": 6.550699851478464e-05, "loss": 0.2079, "step": 34821 }, { "epoch": 2.8209656513285806, "grad_norm": 0.06576598435640335, "learning_rate": 6.550249786219002e-05, "loss": 0.2614, "step": 34822 }, { "epoch": 2.8210466623460793, "grad_norm": 0.055970799177885056, "learning_rate": 6.54979972095954e-05, "loss": 0.186, "step": 34823 }, { "epoch": 2.8211276733635775, "grad_norm": 0.06768764555454254, "learning_rate": 6.549349655700076e-05, "loss": 0.2597, "step": 34824 }, { "epoch": 2.8212086843810757, "grad_norm": 0.07927141338586807, "learning_rate": 6.548899590440615e-05, "loss": 0.2573, "step": 34825 }, { "epoch": 2.8212896953985744, "grad_norm": 0.07906976342201233, "learning_rate": 6.548449525181152e-05, "loss": 0.2302, "step": 34826 }, { "epoch": 2.8213707064160727, "grad_norm": 0.09384344518184662, "learning_rate": 6.547999459921688e-05, "loss": 0.2324, "step": 34827 }, { "epoch": 2.821451717433571, "grad_norm": 0.06932132691144943, "learning_rate": 6.547549394662227e-05, "loss": 0.23, "step": 34828 }, { "epoch": 2.8215327284510696, "grad_norm": 0.06174250319600105, "learning_rate": 6.547099329402764e-05, "loss": 0.227, "step": 34829 }, { "epoch": 2.821613739468568, "grad_norm": 0.06949333846569061, "learning_rate": 6.5466492641433e-05, "loss": 0.2362, "step": 34830 }, { "epoch": 2.821694750486066, "grad_norm": 0.0753270760178566, "learning_rate": 6.546199198883839e-05, "loss": 0.2455, "step": 34831 }, { "epoch": 2.8217757615035644, "grad_norm": 0.07068932056427002, "learning_rate": 6.545749133624377e-05, "loss": 0.2138, "step": 34832 }, { "epoch": 2.8218567725210626, "grad_norm": 0.059605516493320465, "learning_rate": 6.545299068364913e-05, "loss": 0.2259, "step": 34833 }, { "epoch": 2.8219377835385613, "grad_norm": 0.07835594564676285, "learning_rate": 6.544849003105451e-05, "loss": 0.2409, "step": 34834 }, { "epoch": 2.8220187945560595, "grad_norm": 0.058763157576322556, "learning_rate": 6.544398937845989e-05, "loss": 0.1978, "step": 34835 }, { "epoch": 2.8220998055735578, "grad_norm": 0.06683146208524704, "learning_rate": 6.543948872586525e-05, "loss": 0.2535, "step": 34836 }, { "epoch": 2.8221808165910565, "grad_norm": 0.07187090069055557, "learning_rate": 6.543498807327063e-05, "loss": 0.2472, "step": 34837 }, { "epoch": 2.8222618276085547, "grad_norm": 0.06906905770301819, "learning_rate": 6.543048742067601e-05, "loss": 0.2251, "step": 34838 }, { "epoch": 2.822342838626053, "grad_norm": 0.07675648480653763, "learning_rate": 6.542598676808137e-05, "loss": 0.2314, "step": 34839 }, { "epoch": 2.8224238496435516, "grad_norm": 0.06668443977832794, "learning_rate": 6.542148611548675e-05, "loss": 0.2838, "step": 34840 }, { "epoch": 2.82250486066105, "grad_norm": 0.06730206310749054, "learning_rate": 6.541698546289213e-05, "loss": 0.2056, "step": 34841 }, { "epoch": 2.822585871678548, "grad_norm": 0.061029449105262756, "learning_rate": 6.541248481029749e-05, "loss": 0.2379, "step": 34842 }, { "epoch": 2.822666882696047, "grad_norm": 0.07811133563518524, "learning_rate": 6.540798415770288e-05, "loss": 0.2162, "step": 34843 }, { "epoch": 2.822747893713545, "grad_norm": 0.06834142655134201, "learning_rate": 6.540348350510825e-05, "loss": 0.2365, "step": 34844 }, { "epoch": 2.8228289047310433, "grad_norm": 0.07567695528268814, "learning_rate": 6.539898285251361e-05, "loss": 0.2561, "step": 34845 }, { "epoch": 2.822909915748542, "grad_norm": 0.07392167299985886, "learning_rate": 6.5394482199919e-05, "loss": 0.2387, "step": 34846 }, { "epoch": 2.8229909267660402, "grad_norm": 0.07837437838315964, "learning_rate": 6.538998154732437e-05, "loss": 0.2296, "step": 34847 }, { "epoch": 2.8230719377835385, "grad_norm": 0.06737375259399414, "learning_rate": 6.538548089472973e-05, "loss": 0.2398, "step": 34848 }, { "epoch": 2.823152948801037, "grad_norm": 0.05513811856508255, "learning_rate": 6.538098024213512e-05, "loss": 0.2324, "step": 34849 }, { "epoch": 2.8232339598185354, "grad_norm": 0.07730121910572052, "learning_rate": 6.537647958954049e-05, "loss": 0.2797, "step": 34850 }, { "epoch": 2.8233149708360337, "grad_norm": 0.06672193855047226, "learning_rate": 6.537197893694586e-05, "loss": 0.2119, "step": 34851 }, { "epoch": 2.8233959818535324, "grad_norm": 0.054995715618133545, "learning_rate": 6.536747828435124e-05, "loss": 0.2336, "step": 34852 }, { "epoch": 2.8234769928710306, "grad_norm": 0.07880965620279312, "learning_rate": 6.536297763175661e-05, "loss": 0.2678, "step": 34853 }, { "epoch": 2.823558003888529, "grad_norm": 0.0686798021197319, "learning_rate": 6.535847697916198e-05, "loss": 0.2092, "step": 34854 }, { "epoch": 2.823639014906027, "grad_norm": 0.06438298523426056, "learning_rate": 6.535397632656736e-05, "loss": 0.223, "step": 34855 }, { "epoch": 2.8237200259235253, "grad_norm": 0.07243643701076508, "learning_rate": 6.534947567397273e-05, "loss": 0.2362, "step": 34856 }, { "epoch": 2.823801036941024, "grad_norm": 0.06763161718845367, "learning_rate": 6.53449750213781e-05, "loss": 0.2336, "step": 34857 }, { "epoch": 2.8238820479585223, "grad_norm": 0.07330334186553955, "learning_rate": 6.534047436878348e-05, "loss": 0.2797, "step": 34858 }, { "epoch": 2.8239630589760205, "grad_norm": 0.06176656112074852, "learning_rate": 6.533597371618885e-05, "loss": 0.233, "step": 34859 }, { "epoch": 2.824044069993519, "grad_norm": 0.05908965319395065, "learning_rate": 6.533147306359423e-05, "loss": 0.2061, "step": 34860 }, { "epoch": 2.8241250810110174, "grad_norm": 0.08021603524684906, "learning_rate": 6.53269724109996e-05, "loss": 0.2717, "step": 34861 }, { "epoch": 2.8242060920285157, "grad_norm": 0.0696469247341156, "learning_rate": 6.532247175840497e-05, "loss": 0.2386, "step": 34862 }, { "epoch": 2.8242871030460144, "grad_norm": 0.06569056212902069, "learning_rate": 6.531797110581035e-05, "loss": 0.2837, "step": 34863 }, { "epoch": 2.8243681140635126, "grad_norm": 0.06969066709280014, "learning_rate": 6.531347045321572e-05, "loss": 0.2276, "step": 34864 }, { "epoch": 2.824449125081011, "grad_norm": 0.0799136832356453, "learning_rate": 6.53089698006211e-05, "loss": 0.2332, "step": 34865 }, { "epoch": 2.8245301360985096, "grad_norm": 0.06439350545406342, "learning_rate": 6.530446914802647e-05, "loss": 0.2407, "step": 34866 }, { "epoch": 2.824611147116008, "grad_norm": 0.07675740867853165, "learning_rate": 6.529996849543184e-05, "loss": 0.1951, "step": 34867 }, { "epoch": 2.824692158133506, "grad_norm": 0.061667781323194504, "learning_rate": 6.529546784283722e-05, "loss": 0.254, "step": 34868 }, { "epoch": 2.8247731691510047, "grad_norm": 0.06682218611240387, "learning_rate": 6.529096719024259e-05, "loss": 0.2163, "step": 34869 }, { "epoch": 2.824854180168503, "grad_norm": 0.05522351711988449, "learning_rate": 6.528646653764796e-05, "loss": 0.219, "step": 34870 }, { "epoch": 2.8249351911860012, "grad_norm": 0.09306511282920837, "learning_rate": 6.528196588505334e-05, "loss": 0.2794, "step": 34871 }, { "epoch": 2.8250162022035, "grad_norm": 0.06492568552494049, "learning_rate": 6.527746523245871e-05, "loss": 0.2203, "step": 34872 }, { "epoch": 2.825097213220998, "grad_norm": 0.07955403625965118, "learning_rate": 6.527296457986408e-05, "loss": 0.2162, "step": 34873 }, { "epoch": 2.8251782242384964, "grad_norm": 0.0795619934797287, "learning_rate": 6.526846392726946e-05, "loss": 0.2759, "step": 34874 }, { "epoch": 2.8252592352559946, "grad_norm": 0.06763854622840881, "learning_rate": 6.526396327467483e-05, "loss": 0.2137, "step": 34875 }, { "epoch": 2.8253402462734933, "grad_norm": 0.06950404495000839, "learning_rate": 6.52594626220802e-05, "loss": 0.2366, "step": 34876 }, { "epoch": 2.8254212572909916, "grad_norm": 0.06068060174584389, "learning_rate": 6.525496196948558e-05, "loss": 0.2257, "step": 34877 }, { "epoch": 2.82550226830849, "grad_norm": 0.07817313075065613, "learning_rate": 6.525046131689095e-05, "loss": 0.2418, "step": 34878 }, { "epoch": 2.825583279325988, "grad_norm": 0.07102708518505096, "learning_rate": 6.524596066429632e-05, "loss": 0.2426, "step": 34879 }, { "epoch": 2.8256642903434868, "grad_norm": 0.07444391399621964, "learning_rate": 6.52414600117017e-05, "loss": 0.2446, "step": 34880 }, { "epoch": 2.825745301360985, "grad_norm": 0.06354103982448578, "learning_rate": 6.523695935910707e-05, "loss": 0.2356, "step": 34881 }, { "epoch": 2.8258263123784833, "grad_norm": 0.06879103928804398, "learning_rate": 6.523245870651245e-05, "loss": 0.2282, "step": 34882 }, { "epoch": 2.825907323395982, "grad_norm": 0.06873899698257446, "learning_rate": 6.522795805391782e-05, "loss": 0.2419, "step": 34883 }, { "epoch": 2.82598833441348, "grad_norm": 0.0864935889840126, "learning_rate": 6.522345740132319e-05, "loss": 0.2754, "step": 34884 }, { "epoch": 2.8260693454309784, "grad_norm": 0.05859457328915596, "learning_rate": 6.521895674872857e-05, "loss": 0.2396, "step": 34885 }, { "epoch": 2.826150356448477, "grad_norm": 0.08526481688022614, "learning_rate": 6.521445609613394e-05, "loss": 0.2512, "step": 34886 }, { "epoch": 2.8262313674659754, "grad_norm": 0.07040493935346603, "learning_rate": 6.520995544353931e-05, "loss": 0.2025, "step": 34887 }, { "epoch": 2.8263123784834736, "grad_norm": 0.07555270940065384, "learning_rate": 6.520545479094469e-05, "loss": 0.2448, "step": 34888 }, { "epoch": 2.8263933895009723, "grad_norm": 0.06401999294757843, "learning_rate": 6.520095413835007e-05, "loss": 0.218, "step": 34889 }, { "epoch": 2.8264744005184705, "grad_norm": 0.07106225192546844, "learning_rate": 6.519645348575543e-05, "loss": 0.2403, "step": 34890 }, { "epoch": 2.826555411535969, "grad_norm": 0.06419890373945236, "learning_rate": 6.519195283316081e-05, "loss": 0.2198, "step": 34891 }, { "epoch": 2.8266364225534675, "grad_norm": 0.08662693202495575, "learning_rate": 6.51874521805662e-05, "loss": 0.2662, "step": 34892 }, { "epoch": 2.8267174335709657, "grad_norm": 0.06219939887523651, "learning_rate": 6.518295152797156e-05, "loss": 0.2195, "step": 34893 }, { "epoch": 2.826798444588464, "grad_norm": 0.06532366573810577, "learning_rate": 6.517845087537693e-05, "loss": 0.2113, "step": 34894 }, { "epoch": 2.8268794556059627, "grad_norm": 0.06734823435544968, "learning_rate": 6.517395022278232e-05, "loss": 0.2382, "step": 34895 }, { "epoch": 2.826960466623461, "grad_norm": 0.0640251487493515, "learning_rate": 6.516944957018768e-05, "loss": 0.2352, "step": 34896 }, { "epoch": 2.827041477640959, "grad_norm": 0.08535750955343246, "learning_rate": 6.516494891759305e-05, "loss": 0.2394, "step": 34897 }, { "epoch": 2.8271224886584574, "grad_norm": 0.06400448828935623, "learning_rate": 6.516044826499844e-05, "loss": 0.2099, "step": 34898 }, { "epoch": 2.827203499675956, "grad_norm": 0.06465429067611694, "learning_rate": 6.51559476124038e-05, "loss": 0.2472, "step": 34899 }, { "epoch": 2.8272845106934543, "grad_norm": 0.06246986612677574, "learning_rate": 6.515144695980917e-05, "loss": 0.2248, "step": 34900 }, { "epoch": 2.8273655217109526, "grad_norm": 0.060044556856155396, "learning_rate": 6.514694630721456e-05, "loss": 0.2189, "step": 34901 }, { "epoch": 2.827446532728451, "grad_norm": 0.07407305389642715, "learning_rate": 6.514244565461992e-05, "loss": 0.2497, "step": 34902 }, { "epoch": 2.8275275437459495, "grad_norm": 0.08100856095552444, "learning_rate": 6.513794500202529e-05, "loss": 0.202, "step": 34903 }, { "epoch": 2.8276085547634477, "grad_norm": 0.05908948928117752, "learning_rate": 6.513344434943068e-05, "loss": 0.2329, "step": 34904 }, { "epoch": 2.827689565780946, "grad_norm": 0.06425736099481583, "learning_rate": 6.512894369683604e-05, "loss": 0.2385, "step": 34905 }, { "epoch": 2.8277705767984447, "grad_norm": 0.07773395627737045, "learning_rate": 6.512444304424143e-05, "loss": 0.2401, "step": 34906 }, { "epoch": 2.827851587815943, "grad_norm": 0.09697293490171432, "learning_rate": 6.51199423916468e-05, "loss": 0.2682, "step": 34907 }, { "epoch": 2.827932598833441, "grad_norm": 0.07075246423482895, "learning_rate": 6.511544173905216e-05, "loss": 0.2332, "step": 34908 }, { "epoch": 2.82801360985094, "grad_norm": 0.07517353445291519, "learning_rate": 6.511094108645755e-05, "loss": 0.232, "step": 34909 }, { "epoch": 2.828094620868438, "grad_norm": 0.06551103293895721, "learning_rate": 6.510644043386292e-05, "loss": 0.208, "step": 34910 }, { "epoch": 2.8281756318859363, "grad_norm": 0.060613300651311874, "learning_rate": 6.510193978126828e-05, "loss": 0.2181, "step": 34911 }, { "epoch": 2.828256642903435, "grad_norm": 0.06362450867891312, "learning_rate": 6.509743912867367e-05, "loss": 0.2349, "step": 34912 }, { "epoch": 2.8283376539209333, "grad_norm": 0.11658961325883865, "learning_rate": 6.509293847607904e-05, "loss": 0.2772, "step": 34913 }, { "epoch": 2.8284186649384315, "grad_norm": 0.06812942773103714, "learning_rate": 6.50884378234844e-05, "loss": 0.2187, "step": 34914 }, { "epoch": 2.82849967595593, "grad_norm": 0.07889614254236221, "learning_rate": 6.508393717088979e-05, "loss": 0.2722, "step": 34915 }, { "epoch": 2.8285806869734285, "grad_norm": 0.06316548585891724, "learning_rate": 6.507943651829516e-05, "loss": 0.2252, "step": 34916 }, { "epoch": 2.8286616979909267, "grad_norm": 0.08983833342790604, "learning_rate": 6.507493586570052e-05, "loss": 0.2591, "step": 34917 }, { "epoch": 2.8287427090084254, "grad_norm": 0.07219039648771286, "learning_rate": 6.507043521310591e-05, "loss": 0.2134, "step": 34918 }, { "epoch": 2.8288237200259236, "grad_norm": 0.07436413317918777, "learning_rate": 6.506593456051128e-05, "loss": 0.2704, "step": 34919 }, { "epoch": 2.828904731043422, "grad_norm": 0.0707738921046257, "learning_rate": 6.506143390791664e-05, "loss": 0.23, "step": 34920 }, { "epoch": 2.82898574206092, "grad_norm": 0.06954982876777649, "learning_rate": 6.505693325532203e-05, "loss": 0.2565, "step": 34921 }, { "epoch": 2.829066753078419, "grad_norm": 0.057384226471185684, "learning_rate": 6.50524326027274e-05, "loss": 0.2442, "step": 34922 }, { "epoch": 2.829147764095917, "grad_norm": 0.060179632157087326, "learning_rate": 6.504793195013276e-05, "loss": 0.2302, "step": 34923 }, { "epoch": 2.8292287751134153, "grad_norm": 0.06120677664875984, "learning_rate": 6.504343129753815e-05, "loss": 0.2157, "step": 34924 }, { "epoch": 2.8293097861309136, "grad_norm": 0.06761647015810013, "learning_rate": 6.503893064494352e-05, "loss": 0.2727, "step": 34925 }, { "epoch": 2.8293907971484122, "grad_norm": 0.06313874572515488, "learning_rate": 6.503442999234888e-05, "loss": 0.236, "step": 34926 }, { "epoch": 2.8294718081659105, "grad_norm": 0.05890517681837082, "learning_rate": 6.502992933975427e-05, "loss": 0.2384, "step": 34927 }, { "epoch": 2.8295528191834087, "grad_norm": 0.07191134244203568, "learning_rate": 6.502542868715964e-05, "loss": 0.2269, "step": 34928 }, { "epoch": 2.8296338302009074, "grad_norm": 0.07116875052452087, "learning_rate": 6.5020928034565e-05, "loss": 0.2258, "step": 34929 }, { "epoch": 2.8297148412184057, "grad_norm": 0.07394856214523315, "learning_rate": 6.501642738197039e-05, "loss": 0.2275, "step": 34930 }, { "epoch": 2.829795852235904, "grad_norm": 0.06653162837028503, "learning_rate": 6.501192672937577e-05, "loss": 0.2285, "step": 34931 }, { "epoch": 2.8298768632534026, "grad_norm": 0.0657869353890419, "learning_rate": 6.500742607678114e-05, "loss": 0.2579, "step": 34932 }, { "epoch": 2.829957874270901, "grad_norm": 0.0657775029540062, "learning_rate": 6.500292542418651e-05, "loss": 0.2337, "step": 34933 }, { "epoch": 2.830038885288399, "grad_norm": 0.07672388851642609, "learning_rate": 6.499842477159189e-05, "loss": 0.2695, "step": 34934 }, { "epoch": 2.8301198963058978, "grad_norm": 0.09917972981929779, "learning_rate": 6.499392411899726e-05, "loss": 0.2231, "step": 34935 }, { "epoch": 2.830200907323396, "grad_norm": 0.07146567106246948, "learning_rate": 6.498942346640263e-05, "loss": 0.2256, "step": 34936 }, { "epoch": 2.8302819183408943, "grad_norm": 0.06359067559242249, "learning_rate": 6.498492281380801e-05, "loss": 0.2181, "step": 34937 }, { "epoch": 2.830362929358393, "grad_norm": 0.0743074044585228, "learning_rate": 6.498042216121338e-05, "loss": 0.243, "step": 34938 }, { "epoch": 2.830443940375891, "grad_norm": 0.07573742419481277, "learning_rate": 6.497592150861875e-05, "loss": 0.2234, "step": 34939 }, { "epoch": 2.8305249513933894, "grad_norm": 0.0722004771232605, "learning_rate": 6.497142085602413e-05, "loss": 0.2239, "step": 34940 }, { "epoch": 2.830605962410888, "grad_norm": 0.05707455798983574, "learning_rate": 6.49669202034295e-05, "loss": 0.2017, "step": 34941 }, { "epoch": 2.8306869734283864, "grad_norm": 0.0702546238899231, "learning_rate": 6.496241955083488e-05, "loss": 0.259, "step": 34942 }, { "epoch": 2.8307679844458846, "grad_norm": 0.06682027876377106, "learning_rate": 6.495791889824025e-05, "loss": 0.257, "step": 34943 }, { "epoch": 2.830848995463383, "grad_norm": 0.07950440049171448, "learning_rate": 6.495341824564562e-05, "loss": 0.2959, "step": 34944 }, { "epoch": 2.8309300064808816, "grad_norm": 0.08968168497085571, "learning_rate": 6.4948917593051e-05, "loss": 0.2162, "step": 34945 }, { "epoch": 2.83101101749838, "grad_norm": 0.06955923140048981, "learning_rate": 6.494441694045637e-05, "loss": 0.2408, "step": 34946 }, { "epoch": 2.831092028515878, "grad_norm": 0.04745893552899361, "learning_rate": 6.493991628786174e-05, "loss": 0.2034, "step": 34947 }, { "epoch": 2.8311730395333763, "grad_norm": 0.06792275607585907, "learning_rate": 6.493541563526712e-05, "loss": 0.2606, "step": 34948 }, { "epoch": 2.831254050550875, "grad_norm": 0.0904744416475296, "learning_rate": 6.493091498267249e-05, "loss": 0.2697, "step": 34949 }, { "epoch": 2.8313350615683732, "grad_norm": 0.07122079282999039, "learning_rate": 6.492641433007786e-05, "loss": 0.2314, "step": 34950 }, { "epoch": 2.8314160725858715, "grad_norm": 0.07180433720350266, "learning_rate": 6.492191367748324e-05, "loss": 0.2862, "step": 34951 }, { "epoch": 2.83149708360337, "grad_norm": 0.05907098948955536, "learning_rate": 6.491741302488861e-05, "loss": 0.2454, "step": 34952 }, { "epoch": 2.8315780946208684, "grad_norm": 0.052595555782318115, "learning_rate": 6.491291237229399e-05, "loss": 0.2333, "step": 34953 }, { "epoch": 2.8316591056383666, "grad_norm": 0.06805722415447235, "learning_rate": 6.490841171969936e-05, "loss": 0.2222, "step": 34954 }, { "epoch": 2.8317401166558653, "grad_norm": 0.054611627012491226, "learning_rate": 6.490391106710473e-05, "loss": 0.2171, "step": 34955 }, { "epoch": 2.8318211276733636, "grad_norm": 0.06894121319055557, "learning_rate": 6.48994104145101e-05, "loss": 0.2061, "step": 34956 }, { "epoch": 2.831902138690862, "grad_norm": 0.07178279757499695, "learning_rate": 6.489490976191548e-05, "loss": 0.2446, "step": 34957 }, { "epoch": 2.8319831497083605, "grad_norm": 0.06755075603723526, "learning_rate": 6.489040910932087e-05, "loss": 0.2233, "step": 34958 }, { "epoch": 2.8320641607258588, "grad_norm": 0.06892146170139313, "learning_rate": 6.488590845672623e-05, "loss": 0.2749, "step": 34959 }, { "epoch": 2.832145171743357, "grad_norm": 0.06232399493455887, "learning_rate": 6.48814078041316e-05, "loss": 0.2401, "step": 34960 }, { "epoch": 2.8322261827608557, "grad_norm": 0.07726490497589111, "learning_rate": 6.487690715153699e-05, "loss": 0.2388, "step": 34961 }, { "epoch": 2.832307193778354, "grad_norm": 0.07525191456079483, "learning_rate": 6.487240649894235e-05, "loss": 0.2742, "step": 34962 }, { "epoch": 2.832388204795852, "grad_norm": 0.0881979689002037, "learning_rate": 6.486790584634772e-05, "loss": 0.2627, "step": 34963 }, { "epoch": 2.832469215813351, "grad_norm": 0.0649413987994194, "learning_rate": 6.486340519375311e-05, "loss": 0.2475, "step": 34964 }, { "epoch": 2.832550226830849, "grad_norm": 0.06952086836099625, "learning_rate": 6.485890454115847e-05, "loss": 0.2643, "step": 34965 }, { "epoch": 2.8326312378483474, "grad_norm": 0.0617440864443779, "learning_rate": 6.485440388856384e-05, "loss": 0.229, "step": 34966 }, { "epoch": 2.8327122488658456, "grad_norm": 0.059850435703992844, "learning_rate": 6.484990323596923e-05, "loss": 0.265, "step": 34967 }, { "epoch": 2.8327932598833443, "grad_norm": 0.07749108970165253, "learning_rate": 6.484540258337459e-05, "loss": 0.2749, "step": 34968 }, { "epoch": 2.8328742709008425, "grad_norm": 0.06757545471191406, "learning_rate": 6.484090193077996e-05, "loss": 0.2506, "step": 34969 }, { "epoch": 2.832955281918341, "grad_norm": 0.06373197585344315, "learning_rate": 6.483640127818535e-05, "loss": 0.2415, "step": 34970 }, { "epoch": 2.833036292935839, "grad_norm": 0.06949126720428467, "learning_rate": 6.483190062559071e-05, "loss": 0.2503, "step": 34971 }, { "epoch": 2.8331173039533377, "grad_norm": 0.05850743502378464, "learning_rate": 6.482739997299608e-05, "loss": 0.1972, "step": 34972 }, { "epoch": 2.833198314970836, "grad_norm": 0.07521665096282959, "learning_rate": 6.482289932040147e-05, "loss": 0.2519, "step": 34973 }, { "epoch": 2.833279325988334, "grad_norm": 0.058105140924453735, "learning_rate": 6.481839866780683e-05, "loss": 0.2276, "step": 34974 }, { "epoch": 2.833360337005833, "grad_norm": 0.05883853882551193, "learning_rate": 6.48138980152122e-05, "loss": 0.2589, "step": 34975 }, { "epoch": 2.833441348023331, "grad_norm": 0.06599044799804688, "learning_rate": 6.480939736261759e-05, "loss": 0.2381, "step": 34976 }, { "epoch": 2.8335223590408294, "grad_norm": 0.0584799088537693, "learning_rate": 6.480489671002295e-05, "loss": 0.2495, "step": 34977 }, { "epoch": 2.833603370058328, "grad_norm": 0.07386167347431183, "learning_rate": 6.480039605742833e-05, "loss": 0.2524, "step": 34978 }, { "epoch": 2.8336843810758263, "grad_norm": 0.07024627923965454, "learning_rate": 6.479589540483371e-05, "loss": 0.2099, "step": 34979 }, { "epoch": 2.8337653920933246, "grad_norm": 0.057228393852710724, "learning_rate": 6.479139475223907e-05, "loss": 0.2309, "step": 34980 }, { "epoch": 2.8338464031108233, "grad_norm": 0.07119888067245483, "learning_rate": 6.478689409964445e-05, "loss": 0.2353, "step": 34981 }, { "epoch": 2.8339274141283215, "grad_norm": 0.06771930307149887, "learning_rate": 6.478239344704983e-05, "loss": 0.2281, "step": 34982 }, { "epoch": 2.8340084251458197, "grad_norm": 0.06906388700008392, "learning_rate": 6.477789279445519e-05, "loss": 0.2931, "step": 34983 }, { "epoch": 2.8340894361633184, "grad_norm": 0.07088629901409149, "learning_rate": 6.477339214186058e-05, "loss": 0.2441, "step": 34984 }, { "epoch": 2.8341704471808167, "grad_norm": 0.06921649724245071, "learning_rate": 6.476889148926595e-05, "loss": 0.2769, "step": 34985 }, { "epoch": 2.834251458198315, "grad_norm": 0.07710966467857361, "learning_rate": 6.476439083667131e-05, "loss": 0.2329, "step": 34986 }, { "epoch": 2.8343324692158136, "grad_norm": 0.056059908121824265, "learning_rate": 6.47598901840767e-05, "loss": 0.2521, "step": 34987 }, { "epoch": 2.834413480233312, "grad_norm": 0.061635423451662064, "learning_rate": 6.475538953148207e-05, "loss": 0.1952, "step": 34988 }, { "epoch": 2.83449449125081, "grad_norm": 0.08030088990926743, "learning_rate": 6.475088887888743e-05, "loss": 0.2684, "step": 34989 }, { "epoch": 2.8345755022683083, "grad_norm": 0.07802361994981766, "learning_rate": 6.474638822629282e-05, "loss": 0.265, "step": 34990 }, { "epoch": 2.834656513285807, "grad_norm": 0.06831242889165878, "learning_rate": 6.47418875736982e-05, "loss": 0.2168, "step": 34991 }, { "epoch": 2.8347375243033053, "grad_norm": 0.0766051784157753, "learning_rate": 6.473738692110356e-05, "loss": 0.2396, "step": 34992 }, { "epoch": 2.8348185353208035, "grad_norm": 0.09226632863283157, "learning_rate": 6.473288626850894e-05, "loss": 0.2222, "step": 34993 }, { "epoch": 2.8348995463383018, "grad_norm": 0.06202827766537666, "learning_rate": 6.472838561591432e-05, "loss": 0.2082, "step": 34994 }, { "epoch": 2.8349805573558005, "grad_norm": 0.06007321923971176, "learning_rate": 6.472388496331968e-05, "loss": 0.2341, "step": 34995 }, { "epoch": 2.8350615683732987, "grad_norm": 0.07148656994104385, "learning_rate": 6.471938431072506e-05, "loss": 0.2519, "step": 34996 }, { "epoch": 2.835142579390797, "grad_norm": 0.06458251923322678, "learning_rate": 6.471488365813044e-05, "loss": 0.2426, "step": 34997 }, { "epoch": 2.8352235904082956, "grad_norm": 0.07826030254364014, "learning_rate": 6.47103830055358e-05, "loss": 0.2701, "step": 34998 }, { "epoch": 2.835304601425794, "grad_norm": 0.08532463014125824, "learning_rate": 6.470588235294118e-05, "loss": 0.2626, "step": 34999 }, { "epoch": 2.835385612443292, "grad_norm": 0.08283942192792892, "learning_rate": 6.470138170034656e-05, "loss": 0.2379, "step": 35000 }, { "epoch": 2.835466623460791, "grad_norm": 0.07847673445940018, "learning_rate": 6.469688104775192e-05, "loss": 0.2509, "step": 35001 }, { "epoch": 2.835547634478289, "grad_norm": 0.0680897906422615, "learning_rate": 6.46923803951573e-05, "loss": 0.1928, "step": 35002 }, { "epoch": 2.8356286454957873, "grad_norm": 0.05337081104516983, "learning_rate": 6.468787974256268e-05, "loss": 0.1969, "step": 35003 }, { "epoch": 2.835709656513286, "grad_norm": 0.07576156407594681, "learning_rate": 6.468337908996804e-05, "loss": 0.2327, "step": 35004 }, { "epoch": 2.8357906675307842, "grad_norm": 0.08666659891605377, "learning_rate": 6.467887843737343e-05, "loss": 0.3167, "step": 35005 }, { "epoch": 2.8358716785482825, "grad_norm": 0.09243784844875336, "learning_rate": 6.46743777847788e-05, "loss": 0.2857, "step": 35006 }, { "epoch": 2.835952689565781, "grad_norm": 0.0682855099439621, "learning_rate": 6.466987713218416e-05, "loss": 0.1955, "step": 35007 }, { "epoch": 2.8360337005832794, "grad_norm": 0.062375444918870926, "learning_rate": 6.466537647958955e-05, "loss": 0.2608, "step": 35008 }, { "epoch": 2.8361147116007777, "grad_norm": 0.059896890074014664, "learning_rate": 6.466087582699492e-05, "loss": 0.2254, "step": 35009 }, { "epoch": 2.8361957226182763, "grad_norm": 0.07151475548744202, "learning_rate": 6.46563751744003e-05, "loss": 0.2278, "step": 35010 }, { "epoch": 2.8362767336357746, "grad_norm": 0.05724192038178444, "learning_rate": 6.465187452180567e-05, "loss": 0.2194, "step": 35011 }, { "epoch": 2.836357744653273, "grad_norm": 0.05848658084869385, "learning_rate": 6.464737386921104e-05, "loss": 0.2469, "step": 35012 }, { "epoch": 2.836438755670771, "grad_norm": 0.07299201935529709, "learning_rate": 6.464287321661641e-05, "loss": 0.2336, "step": 35013 }, { "epoch": 2.8365197666882693, "grad_norm": 0.055811021476984024, "learning_rate": 6.463837256402179e-05, "loss": 0.2315, "step": 35014 }, { "epoch": 2.836600777705768, "grad_norm": 0.07474242150783539, "learning_rate": 6.463387191142716e-05, "loss": 0.2569, "step": 35015 }, { "epoch": 2.8366817887232663, "grad_norm": 0.07334800809621811, "learning_rate": 6.462937125883254e-05, "loss": 0.2535, "step": 35016 }, { "epoch": 2.8367627997407645, "grad_norm": 0.06788776814937592, "learning_rate": 6.462487060623791e-05, "loss": 0.2351, "step": 35017 }, { "epoch": 2.836843810758263, "grad_norm": 0.06585695594549179, "learning_rate": 6.462036995364328e-05, "loss": 0.2525, "step": 35018 }, { "epoch": 2.8369248217757614, "grad_norm": 0.07982311397790909, "learning_rate": 6.461586930104866e-05, "loss": 0.259, "step": 35019 }, { "epoch": 2.8370058327932597, "grad_norm": 0.07220277935266495, "learning_rate": 6.461136864845403e-05, "loss": 0.2495, "step": 35020 }, { "epoch": 2.8370868438107584, "grad_norm": 0.07429298013448715, "learning_rate": 6.46068679958594e-05, "loss": 0.2376, "step": 35021 }, { "epoch": 2.8371678548282566, "grad_norm": 0.08143361657857895, "learning_rate": 6.460236734326478e-05, "loss": 0.2465, "step": 35022 }, { "epoch": 2.837248865845755, "grad_norm": 0.06289290636777878, "learning_rate": 6.459786669067015e-05, "loss": 0.2462, "step": 35023 }, { "epoch": 2.8373298768632536, "grad_norm": 0.06706014275550842, "learning_rate": 6.459336603807552e-05, "loss": 0.2297, "step": 35024 }, { "epoch": 2.837410887880752, "grad_norm": 0.05648915097117424, "learning_rate": 6.45888653854809e-05, "loss": 0.2318, "step": 35025 }, { "epoch": 2.83749189889825, "grad_norm": 0.07064875960350037, "learning_rate": 6.458436473288627e-05, "loss": 0.2537, "step": 35026 }, { "epoch": 2.8375729099157487, "grad_norm": 0.06095169112086296, "learning_rate": 6.457986408029165e-05, "loss": 0.2096, "step": 35027 }, { "epoch": 2.837653920933247, "grad_norm": 0.05833174288272858, "learning_rate": 6.457536342769702e-05, "loss": 0.2017, "step": 35028 }, { "epoch": 2.837734931950745, "grad_norm": 0.0616929791867733, "learning_rate": 6.457086277510239e-05, "loss": 0.2404, "step": 35029 }, { "epoch": 2.837815942968244, "grad_norm": 0.07648038119077682, "learning_rate": 6.456636212250777e-05, "loss": 0.292, "step": 35030 }, { "epoch": 2.837896953985742, "grad_norm": 0.07276062667369843, "learning_rate": 6.456186146991314e-05, "loss": 0.2465, "step": 35031 }, { "epoch": 2.8379779650032404, "grad_norm": 0.06426334381103516, "learning_rate": 6.455736081731851e-05, "loss": 0.2026, "step": 35032 }, { "epoch": 2.838058976020739, "grad_norm": 0.09658359736204147, "learning_rate": 6.455286016472389e-05, "loss": 0.2774, "step": 35033 }, { "epoch": 2.8381399870382373, "grad_norm": 0.06532305479049683, "learning_rate": 6.454835951212926e-05, "loss": 0.2223, "step": 35034 }, { "epoch": 2.8382209980557356, "grad_norm": 0.05708573758602142, "learning_rate": 6.454385885953463e-05, "loss": 0.1995, "step": 35035 }, { "epoch": 2.838302009073234, "grad_norm": 0.07263416051864624, "learning_rate": 6.453935820694002e-05, "loss": 0.2672, "step": 35036 }, { "epoch": 2.838383020090732, "grad_norm": 0.0830162912607193, "learning_rate": 6.453485755434538e-05, "loss": 0.2136, "step": 35037 }, { "epoch": 2.8384640311082308, "grad_norm": 0.06936755776405334, "learning_rate": 6.453035690175075e-05, "loss": 0.2827, "step": 35038 }, { "epoch": 2.838545042125729, "grad_norm": 0.0790124237537384, "learning_rate": 6.452585624915614e-05, "loss": 0.2345, "step": 35039 }, { "epoch": 2.8386260531432272, "grad_norm": 0.06774169951677322, "learning_rate": 6.45213555965615e-05, "loss": 0.2457, "step": 35040 }, { "epoch": 2.838707064160726, "grad_norm": 0.0654446929693222, "learning_rate": 6.451685494396688e-05, "loss": 0.2259, "step": 35041 }, { "epoch": 2.838788075178224, "grad_norm": 0.06522495299577713, "learning_rate": 6.451235429137226e-05, "loss": 0.2096, "step": 35042 }, { "epoch": 2.8388690861957224, "grad_norm": 0.06641356647014618, "learning_rate": 6.450785363877762e-05, "loss": 0.2841, "step": 35043 }, { "epoch": 2.838950097213221, "grad_norm": 0.06917969137430191, "learning_rate": 6.4503352986183e-05, "loss": 0.1908, "step": 35044 }, { "epoch": 2.8390311082307194, "grad_norm": 0.07256962358951569, "learning_rate": 6.449885233358838e-05, "loss": 0.2137, "step": 35045 }, { "epoch": 2.8391121192482176, "grad_norm": 0.06664843112230301, "learning_rate": 6.449435168099374e-05, "loss": 0.2453, "step": 35046 }, { "epoch": 2.8391931302657163, "grad_norm": 0.05385294556617737, "learning_rate": 6.448985102839912e-05, "loss": 0.2228, "step": 35047 }, { "epoch": 2.8392741412832145, "grad_norm": 0.07032804191112518, "learning_rate": 6.44853503758045e-05, "loss": 0.24, "step": 35048 }, { "epoch": 2.839355152300713, "grad_norm": 0.0735827162861824, "learning_rate": 6.448084972320986e-05, "loss": 0.2415, "step": 35049 }, { "epoch": 2.8394361633182115, "grad_norm": 0.06067224219441414, "learning_rate": 6.447634907061524e-05, "loss": 0.2344, "step": 35050 }, { "epoch": 2.8395171743357097, "grad_norm": 0.06811947375535965, "learning_rate": 6.447184841802063e-05, "loss": 0.203, "step": 35051 }, { "epoch": 2.839598185353208, "grad_norm": 0.07413507252931595, "learning_rate": 6.446734776542599e-05, "loss": 0.2707, "step": 35052 }, { "epoch": 2.8396791963707066, "grad_norm": 0.06308168172836304, "learning_rate": 6.446284711283136e-05, "loss": 0.2208, "step": 35053 }, { "epoch": 2.839760207388205, "grad_norm": 0.07065436244010925, "learning_rate": 6.445834646023675e-05, "loss": 0.2497, "step": 35054 }, { "epoch": 2.839841218405703, "grad_norm": 0.08040545135736465, "learning_rate": 6.44538458076421e-05, "loss": 0.2678, "step": 35055 }, { "epoch": 2.839922229423202, "grad_norm": 0.08743759989738464, "learning_rate": 6.444934515504748e-05, "loss": 0.2581, "step": 35056 }, { "epoch": 2.8400032404407, "grad_norm": 0.06685937941074371, "learning_rate": 6.444484450245287e-05, "loss": 0.2281, "step": 35057 }, { "epoch": 2.8400842514581983, "grad_norm": 0.06777413934469223, "learning_rate": 6.444034384985823e-05, "loss": 0.2623, "step": 35058 }, { "epoch": 2.8401652624756966, "grad_norm": 0.06635896861553192, "learning_rate": 6.44358431972636e-05, "loss": 0.2158, "step": 35059 }, { "epoch": 2.840246273493195, "grad_norm": 0.06646884232759476, "learning_rate": 6.443134254466899e-05, "loss": 0.2056, "step": 35060 }, { "epoch": 2.8403272845106935, "grad_norm": 0.06737709790468216, "learning_rate": 6.442684189207435e-05, "loss": 0.2506, "step": 35061 }, { "epoch": 2.8404082955281917, "grad_norm": 0.07704462856054306, "learning_rate": 6.442234123947972e-05, "loss": 0.2152, "step": 35062 }, { "epoch": 2.84048930654569, "grad_norm": 0.06483523547649384, "learning_rate": 6.441784058688511e-05, "loss": 0.2347, "step": 35063 }, { "epoch": 2.8405703175631887, "grad_norm": 0.07217948883771896, "learning_rate": 6.441333993429047e-05, "loss": 0.24, "step": 35064 }, { "epoch": 2.840651328580687, "grad_norm": 0.07810252904891968, "learning_rate": 6.440883928169586e-05, "loss": 0.2455, "step": 35065 }, { "epoch": 2.840732339598185, "grad_norm": 0.08098962903022766, "learning_rate": 6.440433862910123e-05, "loss": 0.2083, "step": 35066 }, { "epoch": 2.840813350615684, "grad_norm": 0.07664956152439117, "learning_rate": 6.439983797650659e-05, "loss": 0.2215, "step": 35067 }, { "epoch": 2.840894361633182, "grad_norm": 0.06895925849676132, "learning_rate": 6.439533732391198e-05, "loss": 0.24, "step": 35068 }, { "epoch": 2.8409753726506803, "grad_norm": 0.07341915369033813, "learning_rate": 6.439083667131735e-05, "loss": 0.2441, "step": 35069 }, { "epoch": 2.841056383668179, "grad_norm": 0.0792069062590599, "learning_rate": 6.438633601872271e-05, "loss": 0.2352, "step": 35070 }, { "epoch": 2.8411373946856773, "grad_norm": 0.0779370442032814, "learning_rate": 6.43818353661281e-05, "loss": 0.249, "step": 35071 }, { "epoch": 2.8412184057031755, "grad_norm": 0.08689171075820923, "learning_rate": 6.437733471353347e-05, "loss": 0.2668, "step": 35072 }, { "epoch": 2.841299416720674, "grad_norm": 0.08527795970439911, "learning_rate": 6.437283406093883e-05, "loss": 0.2353, "step": 35073 }, { "epoch": 2.8413804277381725, "grad_norm": 0.05817866697907448, "learning_rate": 6.436833340834422e-05, "loss": 0.2205, "step": 35074 }, { "epoch": 2.8414614387556707, "grad_norm": 0.07152362167835236, "learning_rate": 6.436383275574959e-05, "loss": 0.2293, "step": 35075 }, { "epoch": 2.8415424497731694, "grad_norm": 0.055119458585977554, "learning_rate": 6.435933210315495e-05, "loss": 0.1939, "step": 35076 }, { "epoch": 2.8416234607906676, "grad_norm": 0.0744624063372612, "learning_rate": 6.435483145056034e-05, "loss": 0.2108, "step": 35077 }, { "epoch": 2.841704471808166, "grad_norm": 0.07901319861412048, "learning_rate": 6.435033079796571e-05, "loss": 0.2493, "step": 35078 }, { "epoch": 2.8417854828256646, "grad_norm": 0.07424967736005783, "learning_rate": 6.434583014537107e-05, "loss": 0.2564, "step": 35079 }, { "epoch": 2.841866493843163, "grad_norm": 0.07194703817367554, "learning_rate": 6.434132949277646e-05, "loss": 0.2647, "step": 35080 }, { "epoch": 2.841947504860661, "grad_norm": 0.07558548450469971, "learning_rate": 6.433682884018183e-05, "loss": 0.2529, "step": 35081 }, { "epoch": 2.8420285158781593, "grad_norm": 0.08328049629926682, "learning_rate": 6.43323281875872e-05, "loss": 0.2749, "step": 35082 }, { "epoch": 2.8421095268956575, "grad_norm": 0.07167727500200272, "learning_rate": 6.432782753499258e-05, "loss": 0.2678, "step": 35083 }, { "epoch": 2.8421905379131562, "grad_norm": 0.08185160160064697, "learning_rate": 6.432332688239795e-05, "loss": 0.2322, "step": 35084 }, { "epoch": 2.8422715489306545, "grad_norm": 0.08160267770290375, "learning_rate": 6.431882622980331e-05, "loss": 0.2491, "step": 35085 }, { "epoch": 2.8423525599481527, "grad_norm": 0.0694357231259346, "learning_rate": 6.43143255772087e-05, "loss": 0.2365, "step": 35086 }, { "epoch": 2.8424335709656514, "grad_norm": 0.06928124278783798, "learning_rate": 6.430982492461407e-05, "loss": 0.2033, "step": 35087 }, { "epoch": 2.8425145819831497, "grad_norm": 0.07067787647247314, "learning_rate": 6.430532427201944e-05, "loss": 0.2214, "step": 35088 }, { "epoch": 2.842595593000648, "grad_norm": 0.06498835980892181, "learning_rate": 6.430082361942482e-05, "loss": 0.2462, "step": 35089 }, { "epoch": 2.8426766040181466, "grad_norm": 0.06612785905599594, "learning_rate": 6.42963229668302e-05, "loss": 0.2241, "step": 35090 }, { "epoch": 2.842757615035645, "grad_norm": 0.06681181490421295, "learning_rate": 6.429182231423557e-05, "loss": 0.2506, "step": 35091 }, { "epoch": 2.842838626053143, "grad_norm": 0.0670313686132431, "learning_rate": 6.428732166164094e-05, "loss": 0.2273, "step": 35092 }, { "epoch": 2.8429196370706418, "grad_norm": 0.06744195520877838, "learning_rate": 6.428282100904632e-05, "loss": 0.2009, "step": 35093 }, { "epoch": 2.84300064808814, "grad_norm": 0.06611412763595581, "learning_rate": 6.427832035645169e-05, "loss": 0.2131, "step": 35094 }, { "epoch": 2.8430816591056383, "grad_norm": 0.05895596370100975, "learning_rate": 6.427381970385706e-05, "loss": 0.2321, "step": 35095 }, { "epoch": 2.843162670123137, "grad_norm": 0.06782980263233185, "learning_rate": 6.426931905126244e-05, "loss": 0.228, "step": 35096 }, { "epoch": 2.843243681140635, "grad_norm": 0.06184051185846329, "learning_rate": 6.426481839866781e-05, "loss": 0.2317, "step": 35097 }, { "epoch": 2.8433246921581334, "grad_norm": 0.07850348949432373, "learning_rate": 6.426031774607318e-05, "loss": 0.2829, "step": 35098 }, { "epoch": 2.843405703175632, "grad_norm": 0.07288786768913269, "learning_rate": 6.425581709347856e-05, "loss": 0.2351, "step": 35099 }, { "epoch": 2.8434867141931304, "grad_norm": 0.06398995220661163, "learning_rate": 6.425131644088393e-05, "loss": 0.2206, "step": 35100 }, { "epoch": 2.8435677252106286, "grad_norm": 0.06157770752906799, "learning_rate": 6.42468157882893e-05, "loss": 0.2379, "step": 35101 }, { "epoch": 2.843648736228127, "grad_norm": 0.06304378807544708, "learning_rate": 6.424231513569468e-05, "loss": 0.2784, "step": 35102 }, { "epoch": 2.8437297472456255, "grad_norm": 0.06795405596494675, "learning_rate": 6.423781448310005e-05, "loss": 0.2381, "step": 35103 }, { "epoch": 2.843810758263124, "grad_norm": 0.06040545925498009, "learning_rate": 6.423331383050543e-05, "loss": 0.2162, "step": 35104 }, { "epoch": 2.843891769280622, "grad_norm": 0.059341639280319214, "learning_rate": 6.42288131779108e-05, "loss": 0.2117, "step": 35105 }, { "epoch": 2.8439727802981203, "grad_norm": 0.07643841952085495, "learning_rate": 6.422431252531617e-05, "loss": 0.235, "step": 35106 }, { "epoch": 2.844053791315619, "grad_norm": 0.07376381009817123, "learning_rate": 6.421981187272155e-05, "loss": 0.2177, "step": 35107 }, { "epoch": 2.844134802333117, "grad_norm": 0.05964221805334091, "learning_rate": 6.421531122012692e-05, "loss": 0.2086, "step": 35108 }, { "epoch": 2.8442158133506155, "grad_norm": 0.06813271343708038, "learning_rate": 6.42108105675323e-05, "loss": 0.2221, "step": 35109 }, { "epoch": 2.844296824368114, "grad_norm": 0.07453761994838715, "learning_rate": 6.420630991493767e-05, "loss": 0.2207, "step": 35110 }, { "epoch": 2.8443778353856124, "grad_norm": 0.06619906425476074, "learning_rate": 6.420180926234304e-05, "loss": 0.2148, "step": 35111 }, { "epoch": 2.8444588464031106, "grad_norm": 0.06775642186403275, "learning_rate": 6.419730860974842e-05, "loss": 0.2325, "step": 35112 }, { "epoch": 2.8445398574206093, "grad_norm": 0.07589752972126007, "learning_rate": 6.419280795715379e-05, "loss": 0.2606, "step": 35113 }, { "epoch": 2.8446208684381076, "grad_norm": 0.0746060386300087, "learning_rate": 6.418830730455916e-05, "loss": 0.2374, "step": 35114 }, { "epoch": 2.844701879455606, "grad_norm": 0.08039572834968567, "learning_rate": 6.418380665196454e-05, "loss": 0.27, "step": 35115 }, { "epoch": 2.8447828904731045, "grad_norm": 0.0715525895357132, "learning_rate": 6.417930599936991e-05, "loss": 0.2392, "step": 35116 }, { "epoch": 2.8448639014906028, "grad_norm": 0.06464890390634537, "learning_rate": 6.41748053467753e-05, "loss": 0.2321, "step": 35117 }, { "epoch": 2.844944912508101, "grad_norm": 0.07697032392024994, "learning_rate": 6.417030469418066e-05, "loss": 0.2271, "step": 35118 }, { "epoch": 2.8450259235255997, "grad_norm": 0.07292238622903824, "learning_rate": 6.416580404158603e-05, "loss": 0.2573, "step": 35119 }, { "epoch": 2.845106934543098, "grad_norm": 0.07538817822933197, "learning_rate": 6.416130338899142e-05, "loss": 0.2529, "step": 35120 }, { "epoch": 2.845187945560596, "grad_norm": 0.08894102275371552, "learning_rate": 6.415680273639678e-05, "loss": 0.2783, "step": 35121 }, { "epoch": 2.845268956578095, "grad_norm": 0.06148630753159523, "learning_rate": 6.415230208380215e-05, "loss": 0.2373, "step": 35122 }, { "epoch": 2.845349967595593, "grad_norm": 0.06874866038560867, "learning_rate": 6.414780143120754e-05, "loss": 0.2085, "step": 35123 }, { "epoch": 2.8454309786130914, "grad_norm": 0.06734011322259903, "learning_rate": 6.41433007786129e-05, "loss": 0.2627, "step": 35124 }, { "epoch": 2.8455119896305896, "grad_norm": 0.07099562883377075, "learning_rate": 6.413880012601827e-05, "loss": 0.2366, "step": 35125 }, { "epoch": 2.8455930006480883, "grad_norm": 0.06782719492912292, "learning_rate": 6.413429947342366e-05, "loss": 0.2591, "step": 35126 }, { "epoch": 2.8456740116655865, "grad_norm": 0.08088771253824234, "learning_rate": 6.412979882082902e-05, "loss": 0.2627, "step": 35127 }, { "epoch": 2.845755022683085, "grad_norm": 0.06747904419898987, "learning_rate": 6.412529816823439e-05, "loss": 0.2215, "step": 35128 }, { "epoch": 2.845836033700583, "grad_norm": 0.09774980694055557, "learning_rate": 6.412079751563978e-05, "loss": 0.2136, "step": 35129 }, { "epoch": 2.8459170447180817, "grad_norm": 0.06703547388315201, "learning_rate": 6.411629686304514e-05, "loss": 0.2568, "step": 35130 }, { "epoch": 2.84599805573558, "grad_norm": 0.07806454598903656, "learning_rate": 6.411179621045051e-05, "loss": 0.2178, "step": 35131 }, { "epoch": 2.846079066753078, "grad_norm": 0.0774211660027504, "learning_rate": 6.41072955578559e-05, "loss": 0.2296, "step": 35132 }, { "epoch": 2.846160077770577, "grad_norm": 0.06930553913116455, "learning_rate": 6.410279490526126e-05, "loss": 0.2351, "step": 35133 }, { "epoch": 2.846241088788075, "grad_norm": 0.0710282027721405, "learning_rate": 6.409829425266663e-05, "loss": 0.2244, "step": 35134 }, { "epoch": 2.8463220998055734, "grad_norm": 0.05973135307431221, "learning_rate": 6.409379360007202e-05, "loss": 0.1993, "step": 35135 }, { "epoch": 2.846403110823072, "grad_norm": 0.07437211275100708, "learning_rate": 6.408929294747738e-05, "loss": 0.253, "step": 35136 }, { "epoch": 2.8464841218405703, "grad_norm": 0.06692270934581757, "learning_rate": 6.408479229488276e-05, "loss": 0.2403, "step": 35137 }, { "epoch": 2.8465651328580686, "grad_norm": 0.06204549968242645, "learning_rate": 6.408029164228814e-05, "loss": 0.2558, "step": 35138 }, { "epoch": 2.8466461438755672, "grad_norm": 0.05657990649342537, "learning_rate": 6.40757909896935e-05, "loss": 0.2377, "step": 35139 }, { "epoch": 2.8467271548930655, "grad_norm": 0.08282967656850815, "learning_rate": 6.407129033709888e-05, "loss": 0.2422, "step": 35140 }, { "epoch": 2.8468081659105637, "grad_norm": 0.06874528527259827, "learning_rate": 6.406678968450426e-05, "loss": 0.2538, "step": 35141 }, { "epoch": 2.8468891769280624, "grad_norm": 0.08185140788555145, "learning_rate": 6.406228903190962e-05, "loss": 0.2704, "step": 35142 }, { "epoch": 2.8469701879455607, "grad_norm": 0.07284029573202133, "learning_rate": 6.405778837931501e-05, "loss": 0.2421, "step": 35143 }, { "epoch": 2.847051198963059, "grad_norm": 0.06483624130487442, "learning_rate": 6.405328772672038e-05, "loss": 0.1819, "step": 35144 }, { "epoch": 2.8471322099805576, "grad_norm": 0.06488542258739471, "learning_rate": 6.404878707412574e-05, "loss": 0.2052, "step": 35145 }, { "epoch": 2.847213220998056, "grad_norm": 0.06830263137817383, "learning_rate": 6.404428642153113e-05, "loss": 0.2134, "step": 35146 }, { "epoch": 2.847294232015554, "grad_norm": 0.08114606142044067, "learning_rate": 6.40397857689365e-05, "loss": 0.2294, "step": 35147 }, { "epoch": 2.8473752430330523, "grad_norm": 0.05794978141784668, "learning_rate": 6.403528511634186e-05, "loss": 0.2045, "step": 35148 }, { "epoch": 2.847456254050551, "grad_norm": 0.06319063156843185, "learning_rate": 6.403078446374725e-05, "loss": 0.231, "step": 35149 }, { "epoch": 2.8475372650680493, "grad_norm": 0.0754387155175209, "learning_rate": 6.402628381115263e-05, "loss": 0.2598, "step": 35150 }, { "epoch": 2.8476182760855475, "grad_norm": 0.06007128581404686, "learning_rate": 6.402178315855799e-05, "loss": 0.2397, "step": 35151 }, { "epoch": 2.8476992871030458, "grad_norm": 0.07417289167642593, "learning_rate": 6.401728250596337e-05, "loss": 0.2182, "step": 35152 }, { "epoch": 2.8477802981205445, "grad_norm": 0.06433563679456711, "learning_rate": 6.401278185336875e-05, "loss": 0.2193, "step": 35153 }, { "epoch": 2.8478613091380427, "grad_norm": 0.06929808855056763, "learning_rate": 6.40082812007741e-05, "loss": 0.2558, "step": 35154 }, { "epoch": 2.847942320155541, "grad_norm": 0.07436833530664444, "learning_rate": 6.40037805481795e-05, "loss": 0.2258, "step": 35155 }, { "epoch": 2.8480233311730396, "grad_norm": 0.05718601122498512, "learning_rate": 6.399927989558487e-05, "loss": 0.2283, "step": 35156 }, { "epoch": 2.848104342190538, "grad_norm": 0.079722099006176, "learning_rate": 6.399477924299023e-05, "loss": 0.2145, "step": 35157 }, { "epoch": 2.848185353208036, "grad_norm": 0.06823069602251053, "learning_rate": 6.399027859039561e-05, "loss": 0.1983, "step": 35158 }, { "epoch": 2.848266364225535, "grad_norm": 0.07056091725826263, "learning_rate": 6.398577793780099e-05, "loss": 0.2287, "step": 35159 }, { "epoch": 2.848347375243033, "grad_norm": 0.054329004138708115, "learning_rate": 6.398127728520635e-05, "loss": 0.2391, "step": 35160 }, { "epoch": 2.8484283862605313, "grad_norm": 0.07334672659635544, "learning_rate": 6.397677663261174e-05, "loss": 0.2789, "step": 35161 }, { "epoch": 2.84850939727803, "grad_norm": 0.08274146169424057, "learning_rate": 6.397227598001711e-05, "loss": 0.2236, "step": 35162 }, { "epoch": 2.8485904082955282, "grad_norm": 0.07405499368906021, "learning_rate": 6.396777532742247e-05, "loss": 0.2411, "step": 35163 }, { "epoch": 2.8486714193130265, "grad_norm": 0.07061372697353363, "learning_rate": 6.396327467482786e-05, "loss": 0.241, "step": 35164 }, { "epoch": 2.848752430330525, "grad_norm": 0.08040562272071838, "learning_rate": 6.395877402223323e-05, "loss": 0.2407, "step": 35165 }, { "epoch": 2.8488334413480234, "grad_norm": 0.07685498148202896, "learning_rate": 6.395427336963859e-05, "loss": 0.2458, "step": 35166 }, { "epoch": 2.8489144523655217, "grad_norm": 0.08878312259912491, "learning_rate": 6.394977271704398e-05, "loss": 0.2675, "step": 35167 }, { "epoch": 2.8489954633830203, "grad_norm": 0.06860477477312088, "learning_rate": 6.394527206444935e-05, "loss": 0.2683, "step": 35168 }, { "epoch": 2.8490764744005186, "grad_norm": 0.06936057657003403, "learning_rate": 6.394077141185472e-05, "loss": 0.2105, "step": 35169 }, { "epoch": 2.849157485418017, "grad_norm": 0.06778555363416672, "learning_rate": 6.39362707592601e-05, "loss": 0.2152, "step": 35170 }, { "epoch": 2.849238496435515, "grad_norm": 0.07298450171947479, "learning_rate": 6.393177010666547e-05, "loss": 0.2174, "step": 35171 }, { "epoch": 2.8493195074530138, "grad_norm": 0.07470021396875381, "learning_rate": 6.392726945407084e-05, "loss": 0.2394, "step": 35172 }, { "epoch": 2.849400518470512, "grad_norm": 0.07039808481931686, "learning_rate": 6.392276880147622e-05, "loss": 0.2132, "step": 35173 }, { "epoch": 2.8494815294880103, "grad_norm": 0.061103638261556625, "learning_rate": 6.391826814888159e-05, "loss": 0.2225, "step": 35174 }, { "epoch": 2.8495625405055085, "grad_norm": 0.0838061049580574, "learning_rate": 6.391376749628697e-05, "loss": 0.2628, "step": 35175 }, { "epoch": 2.849643551523007, "grad_norm": 0.0686267763376236, "learning_rate": 6.390926684369234e-05, "loss": 0.2291, "step": 35176 }, { "epoch": 2.8497245625405054, "grad_norm": 0.058250222355127335, "learning_rate": 6.390476619109771e-05, "loss": 0.1935, "step": 35177 }, { "epoch": 2.8498055735580037, "grad_norm": 0.0706809014081955, "learning_rate": 6.390026553850309e-05, "loss": 0.2664, "step": 35178 }, { "epoch": 2.8498865845755024, "grad_norm": 0.06753183901309967, "learning_rate": 6.389576488590846e-05, "loss": 0.2202, "step": 35179 }, { "epoch": 2.8499675955930006, "grad_norm": 0.07120547443628311, "learning_rate": 6.389126423331383e-05, "loss": 0.2449, "step": 35180 }, { "epoch": 2.850048606610499, "grad_norm": 0.06694607436656952, "learning_rate": 6.388676358071921e-05, "loss": 0.265, "step": 35181 }, { "epoch": 2.8501296176279975, "grad_norm": 0.0656026229262352, "learning_rate": 6.388226292812458e-05, "loss": 0.272, "step": 35182 }, { "epoch": 2.850210628645496, "grad_norm": 0.06331050395965576, "learning_rate": 6.387776227552995e-05, "loss": 0.2309, "step": 35183 }, { "epoch": 2.850291639662994, "grad_norm": 0.06337806582450867, "learning_rate": 6.387326162293533e-05, "loss": 0.2291, "step": 35184 }, { "epoch": 2.8503726506804927, "grad_norm": 0.06179683655500412, "learning_rate": 6.38687609703407e-05, "loss": 0.2544, "step": 35185 }, { "epoch": 2.850453661697991, "grad_norm": 0.08526507019996643, "learning_rate": 6.386426031774608e-05, "loss": 0.2118, "step": 35186 }, { "epoch": 2.850534672715489, "grad_norm": 0.0754719078540802, "learning_rate": 6.385975966515145e-05, "loss": 0.2297, "step": 35187 }, { "epoch": 2.850615683732988, "grad_norm": 0.06728820502758026, "learning_rate": 6.385525901255682e-05, "loss": 0.2813, "step": 35188 }, { "epoch": 2.850696694750486, "grad_norm": 0.06928353011608124, "learning_rate": 6.38507583599622e-05, "loss": 0.2412, "step": 35189 }, { "epoch": 2.8507777057679844, "grad_norm": 0.07666745781898499, "learning_rate": 6.384625770736757e-05, "loss": 0.246, "step": 35190 }, { "epoch": 2.850858716785483, "grad_norm": 0.058885589241981506, "learning_rate": 6.384175705477294e-05, "loss": 0.249, "step": 35191 }, { "epoch": 2.8509397278029813, "grad_norm": 0.075236476957798, "learning_rate": 6.383725640217832e-05, "loss": 0.2784, "step": 35192 }, { "epoch": 2.8510207388204796, "grad_norm": 0.06604667007923126, "learning_rate": 6.383275574958369e-05, "loss": 0.2177, "step": 35193 }, { "epoch": 2.851101749837978, "grad_norm": 0.06537952274084091, "learning_rate": 6.382825509698906e-05, "loss": 0.222, "step": 35194 }, { "epoch": 2.8511827608554765, "grad_norm": 0.07576059550046921, "learning_rate": 6.382375444439445e-05, "loss": 0.221, "step": 35195 }, { "epoch": 2.8512637718729748, "grad_norm": 0.06259869784116745, "learning_rate": 6.381925379179981e-05, "loss": 0.228, "step": 35196 }, { "epoch": 2.851344782890473, "grad_norm": 0.07308464497327805, "learning_rate": 6.381475313920518e-05, "loss": 0.2048, "step": 35197 }, { "epoch": 2.8514257939079712, "grad_norm": 0.06591349095106125, "learning_rate": 6.381025248661057e-05, "loss": 0.2246, "step": 35198 }, { "epoch": 2.85150680492547, "grad_norm": 0.0643618181347847, "learning_rate": 6.380575183401593e-05, "loss": 0.2277, "step": 35199 }, { "epoch": 2.851587815942968, "grad_norm": 0.06636466085910797, "learning_rate": 6.38012511814213e-05, "loss": 0.2164, "step": 35200 }, { "epoch": 2.8516688269604664, "grad_norm": 0.07473160326480865, "learning_rate": 6.379675052882669e-05, "loss": 0.2129, "step": 35201 }, { "epoch": 2.851749837977965, "grad_norm": 0.06256754696369171, "learning_rate": 6.379224987623205e-05, "loss": 0.2112, "step": 35202 }, { "epoch": 2.8518308489954634, "grad_norm": 0.06157509610056877, "learning_rate": 6.378774922363743e-05, "loss": 0.1868, "step": 35203 }, { "epoch": 2.8519118600129616, "grad_norm": 0.0895833671092987, "learning_rate": 6.378324857104281e-05, "loss": 0.2341, "step": 35204 }, { "epoch": 2.8519928710304603, "grad_norm": 0.061636537313461304, "learning_rate": 6.377874791844817e-05, "loss": 0.2464, "step": 35205 }, { "epoch": 2.8520738820479585, "grad_norm": 0.07151246815919876, "learning_rate": 6.377424726585355e-05, "loss": 0.2118, "step": 35206 }, { "epoch": 2.8521548930654568, "grad_norm": 0.08077570796012878, "learning_rate": 6.376974661325893e-05, "loss": 0.2486, "step": 35207 }, { "epoch": 2.8522359040829555, "grad_norm": 0.07708319276571274, "learning_rate": 6.37652459606643e-05, "loss": 0.2665, "step": 35208 }, { "epoch": 2.8523169151004537, "grad_norm": 0.06794562935829163, "learning_rate": 6.376074530806967e-05, "loss": 0.228, "step": 35209 }, { "epoch": 2.852397926117952, "grad_norm": 0.06809671968221664, "learning_rate": 6.375624465547506e-05, "loss": 0.2367, "step": 35210 }, { "epoch": 2.8524789371354506, "grad_norm": 0.07280357927083969, "learning_rate": 6.375174400288042e-05, "loss": 0.2497, "step": 35211 }, { "epoch": 2.852559948152949, "grad_norm": 0.07265602797269821, "learning_rate": 6.374724335028579e-05, "loss": 0.2702, "step": 35212 }, { "epoch": 2.852640959170447, "grad_norm": 0.07723171263933182, "learning_rate": 6.374274269769118e-05, "loss": 0.2374, "step": 35213 }, { "epoch": 2.852721970187946, "grad_norm": 0.07471165806055069, "learning_rate": 6.373824204509654e-05, "loss": 0.2167, "step": 35214 }, { "epoch": 2.852802981205444, "grad_norm": 0.06255011260509491, "learning_rate": 6.373374139250191e-05, "loss": 0.2229, "step": 35215 }, { "epoch": 2.8528839922229423, "grad_norm": 0.07400676608085632, "learning_rate": 6.37292407399073e-05, "loss": 0.2626, "step": 35216 }, { "epoch": 2.8529650032404406, "grad_norm": 0.07762713730335236, "learning_rate": 6.372474008731266e-05, "loss": 0.2496, "step": 35217 }, { "epoch": 2.8530460142579392, "grad_norm": 0.07321801781654358, "learning_rate": 6.372023943471803e-05, "loss": 0.249, "step": 35218 }, { "epoch": 2.8531270252754375, "grad_norm": 0.0633714348077774, "learning_rate": 6.371573878212342e-05, "loss": 0.2319, "step": 35219 }, { "epoch": 2.8532080362929357, "grad_norm": 0.06907516717910767, "learning_rate": 6.371123812952878e-05, "loss": 0.2051, "step": 35220 }, { "epoch": 2.853289047310434, "grad_norm": 0.0761343389749527, "learning_rate": 6.370673747693416e-05, "loss": 0.2348, "step": 35221 }, { "epoch": 2.8533700583279327, "grad_norm": 0.05884816497564316, "learning_rate": 6.370223682433954e-05, "loss": 0.2294, "step": 35222 }, { "epoch": 2.853451069345431, "grad_norm": 0.08518347144126892, "learning_rate": 6.36977361717449e-05, "loss": 0.268, "step": 35223 }, { "epoch": 2.853532080362929, "grad_norm": 0.06178712844848633, "learning_rate": 6.369323551915029e-05, "loss": 0.2653, "step": 35224 }, { "epoch": 2.853613091380428, "grad_norm": 0.0769021064043045, "learning_rate": 6.368873486655566e-05, "loss": 0.2757, "step": 35225 }, { "epoch": 2.853694102397926, "grad_norm": 0.08336912095546722, "learning_rate": 6.368423421396102e-05, "loss": 0.2508, "step": 35226 }, { "epoch": 2.8537751134154243, "grad_norm": 0.08090720325708389, "learning_rate": 6.36797335613664e-05, "loss": 0.2616, "step": 35227 }, { "epoch": 2.853856124432923, "grad_norm": 0.07356841117143631, "learning_rate": 6.367523290877178e-05, "loss": 0.2864, "step": 35228 }, { "epoch": 2.8539371354504213, "grad_norm": 0.08213097602128983, "learning_rate": 6.367073225617714e-05, "loss": 0.2625, "step": 35229 }, { "epoch": 2.8540181464679195, "grad_norm": 0.06912447512149811, "learning_rate": 6.366623160358253e-05, "loss": 0.2115, "step": 35230 }, { "epoch": 2.854099157485418, "grad_norm": 0.07546108961105347, "learning_rate": 6.36617309509879e-05, "loss": 0.2738, "step": 35231 }, { "epoch": 2.8541801685029164, "grad_norm": 0.06090640276670456, "learning_rate": 6.365723029839326e-05, "loss": 0.2212, "step": 35232 }, { "epoch": 2.8542611795204147, "grad_norm": 0.06261391192674637, "learning_rate": 6.365272964579865e-05, "loss": 0.2001, "step": 35233 }, { "epoch": 2.8543421905379134, "grad_norm": 0.0737534910440445, "learning_rate": 6.364822899320402e-05, "loss": 0.2346, "step": 35234 }, { "epoch": 2.8544232015554116, "grad_norm": 0.07249848544597626, "learning_rate": 6.364372834060938e-05, "loss": 0.2115, "step": 35235 }, { "epoch": 2.85450421257291, "grad_norm": 0.06756263226270676, "learning_rate": 6.363922768801477e-05, "loss": 0.2579, "step": 35236 }, { "epoch": 2.8545852235904086, "grad_norm": 0.0735563412308693, "learning_rate": 6.363472703542014e-05, "loss": 0.2398, "step": 35237 }, { "epoch": 2.854666234607907, "grad_norm": 0.06471346318721771, "learning_rate": 6.36302263828255e-05, "loss": 0.2311, "step": 35238 }, { "epoch": 2.854747245625405, "grad_norm": 0.06796788424253464, "learning_rate": 6.362572573023089e-05, "loss": 0.23, "step": 35239 }, { "epoch": 2.8548282566429033, "grad_norm": 0.06186298653483391, "learning_rate": 6.362122507763626e-05, "loss": 0.2499, "step": 35240 }, { "epoch": 2.8549092676604015, "grad_norm": 0.08557670563459396, "learning_rate": 6.361672442504162e-05, "loss": 0.2748, "step": 35241 }, { "epoch": 2.8549902786779002, "grad_norm": 0.09569763392210007, "learning_rate": 6.361222377244701e-05, "loss": 0.2927, "step": 35242 }, { "epoch": 2.8550712896953985, "grad_norm": 0.06551126390695572, "learning_rate": 6.360772311985238e-05, "loss": 0.2238, "step": 35243 }, { "epoch": 2.8551523007128967, "grad_norm": 0.07863879203796387, "learning_rate": 6.360322246725774e-05, "loss": 0.2576, "step": 35244 }, { "epoch": 2.8552333117303954, "grad_norm": 0.08634800463914871, "learning_rate": 6.359872181466313e-05, "loss": 0.2323, "step": 35245 }, { "epoch": 2.8553143227478937, "grad_norm": 0.06437304615974426, "learning_rate": 6.35942211620685e-05, "loss": 0.2183, "step": 35246 }, { "epoch": 2.855395333765392, "grad_norm": 0.06253159791231155, "learning_rate": 6.358972050947387e-05, "loss": 0.2394, "step": 35247 }, { "epoch": 2.8554763447828906, "grad_norm": 0.07356922328472137, "learning_rate": 6.358521985687925e-05, "loss": 0.2593, "step": 35248 }, { "epoch": 2.855557355800389, "grad_norm": 0.08020610362291336, "learning_rate": 6.358071920428463e-05, "loss": 0.2317, "step": 35249 }, { "epoch": 2.855638366817887, "grad_norm": 0.07049740850925446, "learning_rate": 6.357621855169e-05, "loss": 0.2635, "step": 35250 }, { "epoch": 2.8557193778353858, "grad_norm": 0.0626765564084053, "learning_rate": 6.357171789909537e-05, "loss": 0.2446, "step": 35251 }, { "epoch": 2.855800388852884, "grad_norm": 0.06806015223264694, "learning_rate": 6.356721724650075e-05, "loss": 0.2239, "step": 35252 }, { "epoch": 2.8558813998703823, "grad_norm": 0.0689484253525734, "learning_rate": 6.356271659390612e-05, "loss": 0.2429, "step": 35253 }, { "epoch": 2.855962410887881, "grad_norm": 0.05486216023564339, "learning_rate": 6.35582159413115e-05, "loss": 0.2164, "step": 35254 }, { "epoch": 2.856043421905379, "grad_norm": 0.07790280878543854, "learning_rate": 6.355371528871687e-05, "loss": 0.2526, "step": 35255 }, { "epoch": 2.8561244329228774, "grad_norm": 0.07450840622186661, "learning_rate": 6.354921463612224e-05, "loss": 0.27, "step": 35256 }, { "epoch": 2.856205443940376, "grad_norm": 0.06325256079435349, "learning_rate": 6.354471398352761e-05, "loss": 0.2214, "step": 35257 }, { "epoch": 2.8562864549578744, "grad_norm": 0.05930173397064209, "learning_rate": 6.354021333093299e-05, "loss": 0.1991, "step": 35258 }, { "epoch": 2.8563674659753726, "grad_norm": 0.08211062848567963, "learning_rate": 6.353571267833836e-05, "loss": 0.2146, "step": 35259 }, { "epoch": 2.8564484769928713, "grad_norm": 0.06989636272192001, "learning_rate": 6.353121202574374e-05, "loss": 0.2205, "step": 35260 }, { "epoch": 2.8565294880103695, "grad_norm": 0.07901213318109512, "learning_rate": 6.352671137314911e-05, "loss": 0.2526, "step": 35261 }, { "epoch": 2.856610499027868, "grad_norm": 0.07214916497468948, "learning_rate": 6.352221072055448e-05, "loss": 0.2327, "step": 35262 }, { "epoch": 2.856691510045366, "grad_norm": 0.06947073340415955, "learning_rate": 6.351771006795986e-05, "loss": 0.2287, "step": 35263 }, { "epoch": 2.8567725210628643, "grad_norm": 0.06751016527414322, "learning_rate": 6.351320941536523e-05, "loss": 0.2027, "step": 35264 }, { "epoch": 2.856853532080363, "grad_norm": 0.07422146946191788, "learning_rate": 6.35087087627706e-05, "loss": 0.279, "step": 35265 }, { "epoch": 2.856934543097861, "grad_norm": 0.057897377759218216, "learning_rate": 6.350420811017598e-05, "loss": 0.2344, "step": 35266 }, { "epoch": 2.8570155541153595, "grad_norm": 0.0692281723022461, "learning_rate": 6.349970745758135e-05, "loss": 0.1927, "step": 35267 }, { "epoch": 2.857096565132858, "grad_norm": 0.06540977954864502, "learning_rate": 6.349520680498672e-05, "loss": 0.2654, "step": 35268 }, { "epoch": 2.8571775761503564, "grad_norm": 0.06885252892971039, "learning_rate": 6.34907061523921e-05, "loss": 0.2198, "step": 35269 }, { "epoch": 2.8572585871678546, "grad_norm": 0.0721738189458847, "learning_rate": 6.348620549979747e-05, "loss": 0.2105, "step": 35270 }, { "epoch": 2.8573395981853533, "grad_norm": 0.07012255489826202, "learning_rate": 6.348170484720285e-05, "loss": 0.2645, "step": 35271 }, { "epoch": 2.8574206092028516, "grad_norm": 0.07275258749723434, "learning_rate": 6.347720419460822e-05, "loss": 0.2459, "step": 35272 }, { "epoch": 2.85750162022035, "grad_norm": 0.06511726975440979, "learning_rate": 6.347270354201359e-05, "loss": 0.2477, "step": 35273 }, { "epoch": 2.8575826312378485, "grad_norm": 0.07181781530380249, "learning_rate": 6.346820288941897e-05, "loss": 0.257, "step": 35274 }, { "epoch": 2.8576636422553467, "grad_norm": 0.082255057990551, "learning_rate": 6.346370223682434e-05, "loss": 0.234, "step": 35275 }, { "epoch": 2.857744653272845, "grad_norm": 0.07187642902135849, "learning_rate": 6.345920158422973e-05, "loss": 0.218, "step": 35276 }, { "epoch": 2.8578256642903437, "grad_norm": 0.08506189286708832, "learning_rate": 6.345470093163509e-05, "loss": 0.2611, "step": 35277 }, { "epoch": 2.857906675307842, "grad_norm": 0.062427401542663574, "learning_rate": 6.345020027904046e-05, "loss": 0.2199, "step": 35278 }, { "epoch": 2.85798768632534, "grad_norm": 0.07684777677059174, "learning_rate": 6.344569962644585e-05, "loss": 0.2513, "step": 35279 }, { "epoch": 2.858068697342839, "grad_norm": 0.06772081553936005, "learning_rate": 6.344119897385121e-05, "loss": 0.2081, "step": 35280 }, { "epoch": 2.858149708360337, "grad_norm": 0.07348523288965225, "learning_rate": 6.343669832125658e-05, "loss": 0.2299, "step": 35281 }, { "epoch": 2.8582307193778353, "grad_norm": 0.06706465035676956, "learning_rate": 6.343219766866197e-05, "loss": 0.2021, "step": 35282 }, { "epoch": 2.858311730395334, "grad_norm": 0.07620218396186829, "learning_rate": 6.342769701606733e-05, "loss": 0.2334, "step": 35283 }, { "epoch": 2.8583927414128323, "grad_norm": 0.06165030971169472, "learning_rate": 6.34231963634727e-05, "loss": 0.2203, "step": 35284 }, { "epoch": 2.8584737524303305, "grad_norm": 0.07112786173820496, "learning_rate": 6.341869571087809e-05, "loss": 0.2484, "step": 35285 }, { "epoch": 2.8585547634478288, "grad_norm": 0.08486390858888626, "learning_rate": 6.341419505828345e-05, "loss": 0.2339, "step": 35286 }, { "epoch": 2.858635774465327, "grad_norm": 0.06826337426900864, "learning_rate": 6.340969440568882e-05, "loss": 0.2212, "step": 35287 }, { "epoch": 2.8587167854828257, "grad_norm": 0.06725852191448212, "learning_rate": 6.340519375309421e-05, "loss": 0.2556, "step": 35288 }, { "epoch": 2.858797796500324, "grad_norm": 0.06825444102287292, "learning_rate": 6.340069310049957e-05, "loss": 0.2074, "step": 35289 }, { "epoch": 2.858878807517822, "grad_norm": 0.08633331209421158, "learning_rate": 6.339619244790494e-05, "loss": 0.251, "step": 35290 }, { "epoch": 2.858959818535321, "grad_norm": 0.05289662256836891, "learning_rate": 6.339169179531033e-05, "loss": 0.2458, "step": 35291 }, { "epoch": 2.859040829552819, "grad_norm": 0.06766065210103989, "learning_rate": 6.338719114271569e-05, "loss": 0.2007, "step": 35292 }, { "epoch": 2.8591218405703174, "grad_norm": 0.07813969999551773, "learning_rate": 6.338269049012106e-05, "loss": 0.2458, "step": 35293 }, { "epoch": 2.859202851587816, "grad_norm": 0.07089391350746155, "learning_rate": 6.337818983752645e-05, "loss": 0.2372, "step": 35294 }, { "epoch": 2.8592838626053143, "grad_norm": 0.0717868059873581, "learning_rate": 6.337368918493181e-05, "loss": 0.2528, "step": 35295 }, { "epoch": 2.8593648736228126, "grad_norm": 0.07486657053232193, "learning_rate": 6.336918853233719e-05, "loss": 0.2474, "step": 35296 }, { "epoch": 2.8594458846403112, "grad_norm": 0.06870671361684799, "learning_rate": 6.336468787974257e-05, "loss": 0.2606, "step": 35297 }, { "epoch": 2.8595268956578095, "grad_norm": 0.057232122868299484, "learning_rate": 6.336018722714793e-05, "loss": 0.233, "step": 35298 }, { "epoch": 2.8596079066753077, "grad_norm": 0.07663465291261673, "learning_rate": 6.33556865745533e-05, "loss": 0.2177, "step": 35299 }, { "epoch": 2.8596889176928064, "grad_norm": 0.050945814698934555, "learning_rate": 6.335118592195869e-05, "loss": 0.2268, "step": 35300 }, { "epoch": 2.8597699287103047, "grad_norm": 0.06260829418897629, "learning_rate": 6.334668526936405e-05, "loss": 0.2582, "step": 35301 }, { "epoch": 2.859850939727803, "grad_norm": 0.07052972912788391, "learning_rate": 6.334218461676944e-05, "loss": 0.2534, "step": 35302 }, { "epoch": 2.8599319507453016, "grad_norm": 0.07194507867097855, "learning_rate": 6.333768396417481e-05, "loss": 0.2344, "step": 35303 }, { "epoch": 2.8600129617628, "grad_norm": 0.07999186217784882, "learning_rate": 6.333318331158017e-05, "loss": 0.2746, "step": 35304 }, { "epoch": 2.860093972780298, "grad_norm": 0.05699498951435089, "learning_rate": 6.332868265898556e-05, "loss": 0.2543, "step": 35305 }, { "epoch": 2.8601749837977968, "grad_norm": 0.06230924651026726, "learning_rate": 6.332418200639093e-05, "loss": 0.22, "step": 35306 }, { "epoch": 2.860255994815295, "grad_norm": 0.0675150528550148, "learning_rate": 6.33196813537963e-05, "loss": 0.2309, "step": 35307 }, { "epoch": 2.8603370058327933, "grad_norm": 0.06433086842298508, "learning_rate": 6.331518070120168e-05, "loss": 0.2268, "step": 35308 }, { "epoch": 2.8604180168502915, "grad_norm": 0.06448919326066971, "learning_rate": 6.331068004860706e-05, "loss": 0.2167, "step": 35309 }, { "epoch": 2.8604990278677898, "grad_norm": 0.058945026248693466, "learning_rate": 6.330617939601242e-05, "loss": 0.2175, "step": 35310 }, { "epoch": 2.8605800388852884, "grad_norm": 0.062296051532030106, "learning_rate": 6.33016787434178e-05, "loss": 0.2121, "step": 35311 }, { "epoch": 2.8606610499027867, "grad_norm": 0.06867983937263489, "learning_rate": 6.329717809082318e-05, "loss": 0.2024, "step": 35312 }, { "epoch": 2.860742060920285, "grad_norm": 0.07533132284879684, "learning_rate": 6.329267743822854e-05, "loss": 0.2656, "step": 35313 }, { "epoch": 2.8608230719377836, "grad_norm": 0.07362021505832672, "learning_rate": 6.328817678563392e-05, "loss": 0.235, "step": 35314 }, { "epoch": 2.860904082955282, "grad_norm": 0.08826854079961777, "learning_rate": 6.32836761330393e-05, "loss": 0.2769, "step": 35315 }, { "epoch": 2.86098509397278, "grad_norm": 0.07047184556722641, "learning_rate": 6.327917548044466e-05, "loss": 0.2569, "step": 35316 }, { "epoch": 2.861066104990279, "grad_norm": 0.06254454702138901, "learning_rate": 6.327467482785004e-05, "loss": 0.2417, "step": 35317 }, { "epoch": 2.861147116007777, "grad_norm": 0.08397172391414642, "learning_rate": 6.327017417525542e-05, "loss": 0.2571, "step": 35318 }, { "epoch": 2.8612281270252753, "grad_norm": 0.06830441951751709, "learning_rate": 6.326567352266078e-05, "loss": 0.2049, "step": 35319 }, { "epoch": 2.861309138042774, "grad_norm": 0.06679099053144455, "learning_rate": 6.326117287006617e-05, "loss": 0.2213, "step": 35320 }, { "epoch": 2.8613901490602722, "grad_norm": 0.07950352877378464, "learning_rate": 6.325667221747154e-05, "loss": 0.2821, "step": 35321 }, { "epoch": 2.8614711600777705, "grad_norm": 0.06094673275947571, "learning_rate": 6.32521715648769e-05, "loss": 0.2486, "step": 35322 }, { "epoch": 2.861552171095269, "grad_norm": 0.07335047423839569, "learning_rate": 6.324767091228229e-05, "loss": 0.2606, "step": 35323 }, { "epoch": 2.8616331821127674, "grad_norm": 0.06174264848232269, "learning_rate": 6.324317025968766e-05, "loss": 0.2105, "step": 35324 }, { "epoch": 2.8617141931302656, "grad_norm": 0.07181406766176224, "learning_rate": 6.323866960709302e-05, "loss": 0.2196, "step": 35325 }, { "epoch": 2.8617952041477643, "grad_norm": 0.060329336673021317, "learning_rate": 6.323416895449841e-05, "loss": 0.2211, "step": 35326 }, { "epoch": 2.8618762151652626, "grad_norm": 0.06322979927062988, "learning_rate": 6.322966830190378e-05, "loss": 0.2337, "step": 35327 }, { "epoch": 2.861957226182761, "grad_norm": 0.0658821389079094, "learning_rate": 6.322516764930915e-05, "loss": 0.2367, "step": 35328 }, { "epoch": 2.862038237200259, "grad_norm": 0.07008875906467438, "learning_rate": 6.322066699671453e-05, "loss": 0.2247, "step": 35329 }, { "epoch": 2.8621192482177578, "grad_norm": 0.07550831139087677, "learning_rate": 6.32161663441199e-05, "loss": 0.2494, "step": 35330 }, { "epoch": 2.862200259235256, "grad_norm": 0.06202748790383339, "learning_rate": 6.321166569152527e-05, "loss": 0.262, "step": 35331 }, { "epoch": 2.8622812702527543, "grad_norm": 0.06371939927339554, "learning_rate": 6.320716503893065e-05, "loss": 0.2553, "step": 35332 }, { "epoch": 2.8623622812702525, "grad_norm": 0.06563441455364227, "learning_rate": 6.320266438633602e-05, "loss": 0.2378, "step": 35333 }, { "epoch": 2.862443292287751, "grad_norm": 0.061470042914152145, "learning_rate": 6.31981637337414e-05, "loss": 0.2063, "step": 35334 }, { "epoch": 2.8625243033052494, "grad_norm": 0.07362918555736542, "learning_rate": 6.319366308114677e-05, "loss": 0.2389, "step": 35335 }, { "epoch": 2.8626053143227477, "grad_norm": 0.06169411167502403, "learning_rate": 6.318916242855214e-05, "loss": 0.2074, "step": 35336 }, { "epoch": 2.8626863253402464, "grad_norm": 0.07283011823892593, "learning_rate": 6.318466177595752e-05, "loss": 0.2238, "step": 35337 }, { "epoch": 2.8627673363577446, "grad_norm": 0.06744706630706787, "learning_rate": 6.318016112336289e-05, "loss": 0.272, "step": 35338 }, { "epoch": 2.862848347375243, "grad_norm": 0.0716882050037384, "learning_rate": 6.317566047076826e-05, "loss": 0.2215, "step": 35339 }, { "epoch": 2.8629293583927415, "grad_norm": 0.05441503971815109, "learning_rate": 6.317115981817364e-05, "loss": 0.1988, "step": 35340 }, { "epoch": 2.86301036941024, "grad_norm": 0.0689413994550705, "learning_rate": 6.316665916557901e-05, "loss": 0.2252, "step": 35341 }, { "epoch": 2.863091380427738, "grad_norm": 0.061735041439533234, "learning_rate": 6.316215851298438e-05, "loss": 0.232, "step": 35342 }, { "epoch": 2.8631723914452367, "grad_norm": 0.09404154121875763, "learning_rate": 6.315765786038976e-05, "loss": 0.2105, "step": 35343 }, { "epoch": 2.863253402462735, "grad_norm": 0.0623144656419754, "learning_rate": 6.315315720779513e-05, "loss": 0.2107, "step": 35344 }, { "epoch": 2.863334413480233, "grad_norm": 0.062484726309776306, "learning_rate": 6.31486565552005e-05, "loss": 0.2606, "step": 35345 }, { "epoch": 2.863415424497732, "grad_norm": 0.07301829010248184, "learning_rate": 6.314415590260588e-05, "loss": 0.2335, "step": 35346 }, { "epoch": 2.86349643551523, "grad_norm": 0.06890372931957245, "learning_rate": 6.313965525001125e-05, "loss": 0.2306, "step": 35347 }, { "epoch": 2.8635774465327284, "grad_norm": 0.07791730016469955, "learning_rate": 6.313515459741663e-05, "loss": 0.2477, "step": 35348 }, { "epoch": 2.863658457550227, "grad_norm": 0.07661699503660202, "learning_rate": 6.3130653944822e-05, "loss": 0.2427, "step": 35349 }, { "epoch": 2.8637394685677253, "grad_norm": 0.07754947990179062, "learning_rate": 6.312615329222737e-05, "loss": 0.2412, "step": 35350 }, { "epoch": 2.8638204795852236, "grad_norm": 0.07087945193052292, "learning_rate": 6.312165263963275e-05, "loss": 0.269, "step": 35351 }, { "epoch": 2.863901490602722, "grad_norm": 0.0750063881278038, "learning_rate": 6.311715198703812e-05, "loss": 0.2525, "step": 35352 }, { "epoch": 2.8639825016202205, "grad_norm": 0.06159652769565582, "learning_rate": 6.31126513344435e-05, "loss": 0.2505, "step": 35353 }, { "epoch": 2.8640635126377187, "grad_norm": 0.06571204215288162, "learning_rate": 6.310815068184888e-05, "loss": 0.2329, "step": 35354 }, { "epoch": 2.864144523655217, "grad_norm": 0.06939777731895447, "learning_rate": 6.310365002925424e-05, "loss": 0.2651, "step": 35355 }, { "epoch": 2.8642255346727152, "grad_norm": 0.06422492116689682, "learning_rate": 6.309914937665961e-05, "loss": 0.2443, "step": 35356 }, { "epoch": 2.864306545690214, "grad_norm": 0.07563510537147522, "learning_rate": 6.3094648724065e-05, "loss": 0.2143, "step": 35357 }, { "epoch": 2.864387556707712, "grad_norm": 0.07055597752332687, "learning_rate": 6.309014807147036e-05, "loss": 0.2762, "step": 35358 }, { "epoch": 2.8644685677252104, "grad_norm": 0.07962583005428314, "learning_rate": 6.308564741887574e-05, "loss": 0.2304, "step": 35359 }, { "epoch": 2.864549578742709, "grad_norm": 0.07300310581922531, "learning_rate": 6.308114676628112e-05, "loss": 0.2459, "step": 35360 }, { "epoch": 2.8646305897602073, "grad_norm": 0.06624911725521088, "learning_rate": 6.307664611368648e-05, "loss": 0.2411, "step": 35361 }, { "epoch": 2.8647116007777056, "grad_norm": 0.07704348862171173, "learning_rate": 6.307214546109186e-05, "loss": 0.2419, "step": 35362 }, { "epoch": 2.8647926117952043, "grad_norm": 0.07485693693161011, "learning_rate": 6.306764480849724e-05, "loss": 0.2371, "step": 35363 }, { "epoch": 2.8648736228127025, "grad_norm": 0.06935972720384598, "learning_rate": 6.30631441559026e-05, "loss": 0.2247, "step": 35364 }, { "epoch": 2.8649546338302008, "grad_norm": 0.07739797234535217, "learning_rate": 6.305864350330798e-05, "loss": 0.2283, "step": 35365 }, { "epoch": 2.8650356448476995, "grad_norm": 0.06980100274085999, "learning_rate": 6.305414285071336e-05, "loss": 0.2481, "step": 35366 }, { "epoch": 2.8651166558651977, "grad_norm": 0.0687079057097435, "learning_rate": 6.304964219811872e-05, "loss": 0.2261, "step": 35367 }, { "epoch": 2.865197666882696, "grad_norm": 0.060128916054964066, "learning_rate": 6.30451415455241e-05, "loss": 0.2552, "step": 35368 }, { "epoch": 2.8652786779001946, "grad_norm": 0.08529718220233917, "learning_rate": 6.304064089292949e-05, "loss": 0.2835, "step": 35369 }, { "epoch": 2.865359688917693, "grad_norm": 0.0598611943423748, "learning_rate": 6.303614024033485e-05, "loss": 0.202, "step": 35370 }, { "epoch": 2.865440699935191, "grad_norm": 0.07495393604040146, "learning_rate": 6.303163958774022e-05, "loss": 0.2317, "step": 35371 }, { "epoch": 2.86552171095269, "grad_norm": 0.057052772492170334, "learning_rate": 6.30271389351456e-05, "loss": 0.2322, "step": 35372 }, { "epoch": 2.865602721970188, "grad_norm": 0.06384529173374176, "learning_rate": 6.302263828255097e-05, "loss": 0.2231, "step": 35373 }, { "epoch": 2.8656837329876863, "grad_norm": 0.06650028377771378, "learning_rate": 6.301813762995634e-05, "loss": 0.2609, "step": 35374 }, { "epoch": 2.8657647440051845, "grad_norm": 0.06612016260623932, "learning_rate": 6.301363697736173e-05, "loss": 0.231, "step": 35375 }, { "epoch": 2.8658457550226832, "grad_norm": 0.059364426881074905, "learning_rate": 6.300913632476709e-05, "loss": 0.2322, "step": 35376 }, { "epoch": 2.8659267660401815, "grad_norm": 0.06600163877010345, "learning_rate": 6.300463567217246e-05, "loss": 0.2155, "step": 35377 }, { "epoch": 2.8660077770576797, "grad_norm": 0.0754174292087555, "learning_rate": 6.300013501957785e-05, "loss": 0.246, "step": 35378 }, { "epoch": 2.866088788075178, "grad_norm": 0.08635005354881287, "learning_rate": 6.299563436698321e-05, "loss": 0.247, "step": 35379 }, { "epoch": 2.8661697990926767, "grad_norm": 0.06976178288459778, "learning_rate": 6.29911337143886e-05, "loss": 0.2224, "step": 35380 }, { "epoch": 2.866250810110175, "grad_norm": 0.06297709047794342, "learning_rate": 6.298663306179397e-05, "loss": 0.2141, "step": 35381 }, { "epoch": 2.866331821127673, "grad_norm": 0.06248379126191139, "learning_rate": 6.298213240919933e-05, "loss": 0.2168, "step": 35382 }, { "epoch": 2.866412832145172, "grad_norm": 0.060140691697597504, "learning_rate": 6.297763175660472e-05, "loss": 0.2225, "step": 35383 }, { "epoch": 2.86649384316267, "grad_norm": 0.07628795504570007, "learning_rate": 6.297313110401009e-05, "loss": 0.2349, "step": 35384 }, { "epoch": 2.8665748541801683, "grad_norm": 0.06263097375631332, "learning_rate": 6.296863045141545e-05, "loss": 0.2161, "step": 35385 }, { "epoch": 2.866655865197667, "grad_norm": 0.09367737919092178, "learning_rate": 6.296412979882084e-05, "loss": 0.2256, "step": 35386 }, { "epoch": 2.8667368762151653, "grad_norm": 0.07277649641036987, "learning_rate": 6.295962914622621e-05, "loss": 0.2514, "step": 35387 }, { "epoch": 2.8668178872326635, "grad_norm": 0.07186709344387054, "learning_rate": 6.295512849363157e-05, "loss": 0.2418, "step": 35388 }, { "epoch": 2.866898898250162, "grad_norm": 0.06744267791509628, "learning_rate": 6.295062784103696e-05, "loss": 0.2127, "step": 35389 }, { "epoch": 2.8669799092676604, "grad_norm": 0.06757162511348724, "learning_rate": 6.294612718844233e-05, "loss": 0.2476, "step": 35390 }, { "epoch": 2.8670609202851587, "grad_norm": 0.061270441859960556, "learning_rate": 6.294162653584769e-05, "loss": 0.2302, "step": 35391 }, { "epoch": 2.8671419313026574, "grad_norm": 0.07216763496398926, "learning_rate": 6.293712588325308e-05, "loss": 0.274, "step": 35392 }, { "epoch": 2.8672229423201556, "grad_norm": 0.0688694566488266, "learning_rate": 6.293262523065845e-05, "loss": 0.2617, "step": 35393 }, { "epoch": 2.867303953337654, "grad_norm": 0.07353059202432632, "learning_rate": 6.292812457806381e-05, "loss": 0.2692, "step": 35394 }, { "epoch": 2.8673849643551526, "grad_norm": 0.07066787034273148, "learning_rate": 6.29236239254692e-05, "loss": 0.2335, "step": 35395 }, { "epoch": 2.867465975372651, "grad_norm": 0.07494823634624481, "learning_rate": 6.291912327287457e-05, "loss": 0.2416, "step": 35396 }, { "epoch": 2.867546986390149, "grad_norm": 0.07171319425106049, "learning_rate": 6.291462262027993e-05, "loss": 0.2809, "step": 35397 }, { "epoch": 2.8676279974076473, "grad_norm": 0.07325095683336258, "learning_rate": 6.291012196768532e-05, "loss": 0.2258, "step": 35398 }, { "epoch": 2.867709008425146, "grad_norm": 0.0509103499352932, "learning_rate": 6.29056213150907e-05, "loss": 0.2138, "step": 35399 }, { "epoch": 2.8677900194426442, "grad_norm": 0.055378302931785583, "learning_rate": 6.290112066249607e-05, "loss": 0.2314, "step": 35400 }, { "epoch": 2.8678710304601425, "grad_norm": 0.07109376788139343, "learning_rate": 6.289662000990144e-05, "loss": 0.223, "step": 35401 }, { "epoch": 2.8679520414776407, "grad_norm": 0.07241575419902802, "learning_rate": 6.289211935730681e-05, "loss": 0.2772, "step": 35402 }, { "epoch": 2.8680330524951394, "grad_norm": 0.08092466741800308, "learning_rate": 6.288761870471219e-05, "loss": 0.2879, "step": 35403 }, { "epoch": 2.8681140635126376, "grad_norm": 0.06131209805607796, "learning_rate": 6.288311805211756e-05, "loss": 0.2668, "step": 35404 }, { "epoch": 2.868195074530136, "grad_norm": 0.06692001223564148, "learning_rate": 6.287861739952294e-05, "loss": 0.2437, "step": 35405 }, { "epoch": 2.8682760855476346, "grad_norm": 0.09271931648254395, "learning_rate": 6.287411674692831e-05, "loss": 0.2373, "step": 35406 }, { "epoch": 2.868357096565133, "grad_norm": 0.06557455658912659, "learning_rate": 6.286961609433368e-05, "loss": 0.2273, "step": 35407 }, { "epoch": 2.868438107582631, "grad_norm": 0.08470204472541809, "learning_rate": 6.286511544173906e-05, "loss": 0.2361, "step": 35408 }, { "epoch": 2.8685191186001298, "grad_norm": 0.060380466282367706, "learning_rate": 6.286061478914443e-05, "loss": 0.2297, "step": 35409 }, { "epoch": 2.868600129617628, "grad_norm": 0.07112187147140503, "learning_rate": 6.28561141365498e-05, "loss": 0.234, "step": 35410 }, { "epoch": 2.8686811406351262, "grad_norm": 0.07456043362617493, "learning_rate": 6.285161348395518e-05, "loss": 0.2998, "step": 35411 }, { "epoch": 2.868762151652625, "grad_norm": 0.05900955572724342, "learning_rate": 6.284711283136055e-05, "loss": 0.2041, "step": 35412 }, { "epoch": 2.868843162670123, "grad_norm": 0.07007622718811035, "learning_rate": 6.284261217876592e-05, "loss": 0.2142, "step": 35413 }, { "epoch": 2.8689241736876214, "grad_norm": 0.08670007437467575, "learning_rate": 6.28381115261713e-05, "loss": 0.2603, "step": 35414 }, { "epoch": 2.86900518470512, "grad_norm": 0.07252345979213715, "learning_rate": 6.283361087357667e-05, "loss": 0.2462, "step": 35415 }, { "epoch": 2.8690861957226184, "grad_norm": 0.0890662744641304, "learning_rate": 6.282911022098204e-05, "loss": 0.2453, "step": 35416 }, { "epoch": 2.8691672067401166, "grad_norm": 0.07275164127349854, "learning_rate": 6.282460956838742e-05, "loss": 0.2335, "step": 35417 }, { "epoch": 2.8692482177576153, "grad_norm": 0.07086493074893951, "learning_rate": 6.282010891579279e-05, "loss": 0.2012, "step": 35418 }, { "epoch": 2.8693292287751135, "grad_norm": 0.07489845901727676, "learning_rate": 6.281560826319817e-05, "loss": 0.2585, "step": 35419 }, { "epoch": 2.869410239792612, "grad_norm": 0.06938111037015915, "learning_rate": 6.281110761060354e-05, "loss": 0.2936, "step": 35420 }, { "epoch": 2.86949125081011, "grad_norm": 0.10458862781524658, "learning_rate": 6.280660695800891e-05, "loss": 0.215, "step": 35421 }, { "epoch": 2.8695722618276087, "grad_norm": 0.07261291146278381, "learning_rate": 6.280210630541429e-05, "loss": 0.2707, "step": 35422 }, { "epoch": 2.869653272845107, "grad_norm": 0.06971706449985504, "learning_rate": 6.279760565281966e-05, "loss": 0.2418, "step": 35423 }, { "epoch": 2.869734283862605, "grad_norm": 0.06998847424983978, "learning_rate": 6.279310500022503e-05, "loss": 0.2288, "step": 35424 }, { "epoch": 2.8698152948801035, "grad_norm": 0.07669820636510849, "learning_rate": 6.278860434763041e-05, "loss": 0.2475, "step": 35425 }, { "epoch": 2.869896305897602, "grad_norm": 0.07179666310548782, "learning_rate": 6.278410369503578e-05, "loss": 0.2127, "step": 35426 }, { "epoch": 2.8699773169151004, "grad_norm": 0.07503090798854828, "learning_rate": 6.277960304244115e-05, "loss": 0.239, "step": 35427 }, { "epoch": 2.8700583279325986, "grad_norm": 0.0681663379073143, "learning_rate": 6.277510238984653e-05, "loss": 0.2254, "step": 35428 }, { "epoch": 2.8701393389500973, "grad_norm": 0.074290432035923, "learning_rate": 6.27706017372519e-05, "loss": 0.2204, "step": 35429 }, { "epoch": 2.8702203499675956, "grad_norm": 0.08182138949632645, "learning_rate": 6.276610108465728e-05, "loss": 0.2269, "step": 35430 }, { "epoch": 2.870301360985094, "grad_norm": 0.07460813224315643, "learning_rate": 6.276160043206265e-05, "loss": 0.2537, "step": 35431 }, { "epoch": 2.8703823720025925, "grad_norm": 0.0704612210392952, "learning_rate": 6.275709977946802e-05, "loss": 0.2059, "step": 35432 }, { "epoch": 2.8704633830200907, "grad_norm": 0.07259119302034378, "learning_rate": 6.27525991268734e-05, "loss": 0.2568, "step": 35433 }, { "epoch": 2.870544394037589, "grad_norm": 0.06900456547737122, "learning_rate": 6.274809847427877e-05, "loss": 0.2531, "step": 35434 }, { "epoch": 2.8706254050550877, "grad_norm": 0.07990577816963196, "learning_rate": 6.274359782168416e-05, "loss": 0.2337, "step": 35435 }, { "epoch": 2.870706416072586, "grad_norm": 0.05933460593223572, "learning_rate": 6.273909716908952e-05, "loss": 0.2447, "step": 35436 }, { "epoch": 2.870787427090084, "grad_norm": 0.0612567774951458, "learning_rate": 6.273459651649489e-05, "loss": 0.2196, "step": 35437 }, { "epoch": 2.870868438107583, "grad_norm": 0.0733252465724945, "learning_rate": 6.273009586390028e-05, "loss": 0.2116, "step": 35438 }, { "epoch": 2.870949449125081, "grad_norm": 0.07396426796913147, "learning_rate": 6.272559521130564e-05, "loss": 0.2392, "step": 35439 }, { "epoch": 2.8710304601425793, "grad_norm": 0.07068262249231339, "learning_rate": 6.272109455871101e-05, "loss": 0.2437, "step": 35440 }, { "epoch": 2.871111471160078, "grad_norm": 0.06269538402557373, "learning_rate": 6.27165939061164e-05, "loss": 0.2486, "step": 35441 }, { "epoch": 2.8711924821775763, "grad_norm": 0.07349444180727005, "learning_rate": 6.271209325352176e-05, "loss": 0.2264, "step": 35442 }, { "epoch": 2.8712734931950745, "grad_norm": 0.06318012624979019, "learning_rate": 6.270759260092713e-05, "loss": 0.2091, "step": 35443 }, { "epoch": 2.8713545042125728, "grad_norm": 0.07905392348766327, "learning_rate": 6.270309194833252e-05, "loss": 0.2388, "step": 35444 }, { "epoch": 2.8714355152300715, "grad_norm": 0.08803623914718628, "learning_rate": 6.269859129573788e-05, "loss": 0.239, "step": 35445 }, { "epoch": 2.8715165262475697, "grad_norm": 0.07360757142305374, "learning_rate": 6.269409064314325e-05, "loss": 0.2107, "step": 35446 }, { "epoch": 2.871597537265068, "grad_norm": 0.08824556320905685, "learning_rate": 6.268958999054864e-05, "loss": 0.2401, "step": 35447 }, { "epoch": 2.871678548282566, "grad_norm": 0.06086265295743942, "learning_rate": 6.2685089337954e-05, "loss": 0.2577, "step": 35448 }, { "epoch": 2.871759559300065, "grad_norm": 0.06580790132284164, "learning_rate": 6.268058868535937e-05, "loss": 0.2476, "step": 35449 }, { "epoch": 2.871840570317563, "grad_norm": 0.061794914305210114, "learning_rate": 6.267608803276476e-05, "loss": 0.2647, "step": 35450 }, { "epoch": 2.8719215813350614, "grad_norm": 0.06876268237829208, "learning_rate": 6.267158738017012e-05, "loss": 0.2087, "step": 35451 }, { "epoch": 2.87200259235256, "grad_norm": 0.06276143342256546, "learning_rate": 6.26670867275755e-05, "loss": 0.2074, "step": 35452 }, { "epoch": 2.8720836033700583, "grad_norm": 0.07023021578788757, "learning_rate": 6.266258607498088e-05, "loss": 0.2612, "step": 35453 }, { "epoch": 2.8721646143875565, "grad_norm": 0.06984831392765045, "learning_rate": 6.265808542238624e-05, "loss": 0.2163, "step": 35454 }, { "epoch": 2.8722456254050552, "grad_norm": 0.08129183948040009, "learning_rate": 6.265358476979162e-05, "loss": 0.2289, "step": 35455 }, { "epoch": 2.8723266364225535, "grad_norm": 0.07722354680299759, "learning_rate": 6.2649084117197e-05, "loss": 0.2451, "step": 35456 }, { "epoch": 2.8724076474400517, "grad_norm": 0.07596605271100998, "learning_rate": 6.264458346460236e-05, "loss": 0.2431, "step": 35457 }, { "epoch": 2.8724886584575504, "grad_norm": 0.07646816223859787, "learning_rate": 6.264008281200774e-05, "loss": 0.2241, "step": 35458 }, { "epoch": 2.8725696694750487, "grad_norm": 0.05903325974941254, "learning_rate": 6.263558215941312e-05, "loss": 0.2295, "step": 35459 }, { "epoch": 2.872650680492547, "grad_norm": 0.07608656585216522, "learning_rate": 6.263108150681848e-05, "loss": 0.2624, "step": 35460 }, { "epoch": 2.8727316915100456, "grad_norm": 0.062335386872291565, "learning_rate": 6.262658085422387e-05, "loss": 0.2214, "step": 35461 }, { "epoch": 2.872812702527544, "grad_norm": 0.07655826956033707, "learning_rate": 6.262208020162924e-05, "loss": 0.2578, "step": 35462 }, { "epoch": 2.872893713545042, "grad_norm": 0.07051601260900497, "learning_rate": 6.26175795490346e-05, "loss": 0.2404, "step": 35463 }, { "epoch": 2.8729747245625408, "grad_norm": 0.08787756413221359, "learning_rate": 6.261307889643999e-05, "loss": 0.2726, "step": 35464 }, { "epoch": 2.873055735580039, "grad_norm": 0.07177945226430893, "learning_rate": 6.260857824384536e-05, "loss": 0.2675, "step": 35465 }, { "epoch": 2.8731367465975373, "grad_norm": 0.09521305561065674, "learning_rate": 6.260407759125072e-05, "loss": 0.3276, "step": 35466 }, { "epoch": 2.8732177576150355, "grad_norm": 0.07370465993881226, "learning_rate": 6.259957693865611e-05, "loss": 0.2483, "step": 35467 }, { "epoch": 2.8732987686325338, "grad_norm": 0.06736384332180023, "learning_rate": 6.259507628606149e-05, "loss": 0.2223, "step": 35468 }, { "epoch": 2.8733797796500324, "grad_norm": 0.05994664132595062, "learning_rate": 6.259057563346686e-05, "loss": 0.2281, "step": 35469 }, { "epoch": 2.8734607906675307, "grad_norm": 0.07355383038520813, "learning_rate": 6.258607498087223e-05, "loss": 0.2431, "step": 35470 }, { "epoch": 2.873541801685029, "grad_norm": 0.06896742433309555, "learning_rate": 6.25815743282776e-05, "loss": 0.2634, "step": 35471 }, { "epoch": 2.8736228127025276, "grad_norm": 0.07273846864700317, "learning_rate": 6.257707367568298e-05, "loss": 0.2292, "step": 35472 }, { "epoch": 2.873703823720026, "grad_norm": 0.07593858242034912, "learning_rate": 6.257257302308835e-05, "loss": 0.229, "step": 35473 }, { "epoch": 2.873784834737524, "grad_norm": 0.07159194350242615, "learning_rate": 6.256807237049373e-05, "loss": 0.2512, "step": 35474 }, { "epoch": 2.873865845755023, "grad_norm": 0.05694905295968056, "learning_rate": 6.25635717178991e-05, "loss": 0.2053, "step": 35475 }, { "epoch": 2.873946856772521, "grad_norm": 0.06768777221441269, "learning_rate": 6.255907106530447e-05, "loss": 0.2114, "step": 35476 }, { "epoch": 2.8740278677900193, "grad_norm": 0.0636713057756424, "learning_rate": 6.255457041270985e-05, "loss": 0.2611, "step": 35477 }, { "epoch": 2.874108878807518, "grad_norm": 0.07856924831867218, "learning_rate": 6.255006976011522e-05, "loss": 0.2495, "step": 35478 }, { "epoch": 2.874189889825016, "grad_norm": 0.05206296592950821, "learning_rate": 6.25455691075206e-05, "loss": 0.2194, "step": 35479 }, { "epoch": 2.8742709008425145, "grad_norm": 0.06615325063467026, "learning_rate": 6.254106845492597e-05, "loss": 0.2006, "step": 35480 }, { "epoch": 2.874351911860013, "grad_norm": 0.07415047287940979, "learning_rate": 6.253656780233134e-05, "loss": 0.2854, "step": 35481 }, { "epoch": 2.8744329228775114, "grad_norm": 0.07157108187675476, "learning_rate": 6.253206714973672e-05, "loss": 0.2438, "step": 35482 }, { "epoch": 2.8745139338950096, "grad_norm": 0.06774777173995972, "learning_rate": 6.252756649714209e-05, "loss": 0.2013, "step": 35483 }, { "epoch": 2.8745949449125083, "grad_norm": 0.0959698036313057, "learning_rate": 6.252306584454746e-05, "loss": 0.218, "step": 35484 }, { "epoch": 2.8746759559300066, "grad_norm": 0.08200061321258545, "learning_rate": 6.251856519195284e-05, "loss": 0.2541, "step": 35485 }, { "epoch": 2.874756966947505, "grad_norm": 0.07556328922510147, "learning_rate": 6.251406453935821e-05, "loss": 0.2114, "step": 35486 }, { "epoch": 2.8748379779650035, "grad_norm": 0.06020106002688408, "learning_rate": 6.250956388676358e-05, "loss": 0.2459, "step": 35487 }, { "epoch": 2.8749189889825018, "grad_norm": 0.09242961555719376, "learning_rate": 6.250506323416896e-05, "loss": 0.2142, "step": 35488 }, { "epoch": 2.875, "grad_norm": 0.07264678180217743, "learning_rate": 6.250056258157433e-05, "loss": 0.2267, "step": 35489 }, { "epoch": 2.8750810110174982, "grad_norm": 0.07662799954414368, "learning_rate": 6.24960619289797e-05, "loss": 0.2436, "step": 35490 }, { "epoch": 2.8751620220349965, "grad_norm": 0.06048709526658058, "learning_rate": 6.249156127638508e-05, "loss": 0.2083, "step": 35491 }, { "epoch": 2.875243033052495, "grad_norm": 0.10970834642648697, "learning_rate": 6.248706062379045e-05, "loss": 0.2632, "step": 35492 }, { "epoch": 2.8753240440699934, "grad_norm": 0.07959237694740295, "learning_rate": 6.248255997119583e-05, "loss": 0.2716, "step": 35493 }, { "epoch": 2.8754050550874917, "grad_norm": 0.0831311047077179, "learning_rate": 6.24780593186012e-05, "loss": 0.2483, "step": 35494 }, { "epoch": 2.8754860661049904, "grad_norm": 0.08556374907493591, "learning_rate": 6.247355866600657e-05, "loss": 0.2334, "step": 35495 }, { "epoch": 2.8755670771224886, "grad_norm": 0.0768716111779213, "learning_rate": 6.246905801341195e-05, "loss": 0.1901, "step": 35496 }, { "epoch": 2.875648088139987, "grad_norm": 0.06417785584926605, "learning_rate": 6.246455736081732e-05, "loss": 0.2079, "step": 35497 }, { "epoch": 2.8757290991574855, "grad_norm": 0.06606259942054749, "learning_rate": 6.24600567082227e-05, "loss": 0.1936, "step": 35498 }, { "epoch": 2.875810110174984, "grad_norm": 0.06523019820451736, "learning_rate": 6.245555605562807e-05, "loss": 0.2154, "step": 35499 }, { "epoch": 2.875891121192482, "grad_norm": 0.07109911739826202, "learning_rate": 6.245105540303344e-05, "loss": 0.2438, "step": 35500 }, { "epoch": 2.8759721322099807, "grad_norm": 0.05882502347230911, "learning_rate": 6.244655475043881e-05, "loss": 0.2372, "step": 35501 }, { "epoch": 2.876053143227479, "grad_norm": 0.06620030850172043, "learning_rate": 6.244205409784419e-05, "loss": 0.2526, "step": 35502 }, { "epoch": 2.876134154244977, "grad_norm": 0.061462339013814926, "learning_rate": 6.243755344524956e-05, "loss": 0.2338, "step": 35503 }, { "epoch": 2.876215165262476, "grad_norm": 0.07159540057182312, "learning_rate": 6.243305279265494e-05, "loss": 0.2107, "step": 35504 }, { "epoch": 2.876296176279974, "grad_norm": 0.07247888296842575, "learning_rate": 6.242855214006031e-05, "loss": 0.2427, "step": 35505 }, { "epoch": 2.8763771872974724, "grad_norm": 0.067585788667202, "learning_rate": 6.242405148746568e-05, "loss": 0.2259, "step": 35506 }, { "epoch": 2.876458198314971, "grad_norm": 0.07686949521303177, "learning_rate": 6.241955083487106e-05, "loss": 0.3078, "step": 35507 }, { "epoch": 2.8765392093324693, "grad_norm": 0.07205145061016083, "learning_rate": 6.241505018227643e-05, "loss": 0.2163, "step": 35508 }, { "epoch": 2.8766202203499676, "grad_norm": 0.08684998005628586, "learning_rate": 6.24105495296818e-05, "loss": 0.2437, "step": 35509 }, { "epoch": 2.8767012313674662, "grad_norm": 0.07802329957485199, "learning_rate": 6.240604887708718e-05, "loss": 0.2524, "step": 35510 }, { "epoch": 2.8767822423849645, "grad_norm": 0.06080799549818039, "learning_rate": 6.240154822449255e-05, "loss": 0.2426, "step": 35511 }, { "epoch": 2.8768632534024627, "grad_norm": 0.07781938463449478, "learning_rate": 6.239704757189792e-05, "loss": 0.2979, "step": 35512 }, { "epoch": 2.876944264419961, "grad_norm": 0.06262904405593872, "learning_rate": 6.239254691930331e-05, "loss": 0.2756, "step": 35513 }, { "epoch": 2.8770252754374592, "grad_norm": 0.07558223605155945, "learning_rate": 6.238804626670867e-05, "loss": 0.2413, "step": 35514 }, { "epoch": 2.877106286454958, "grad_norm": 0.07031270861625671, "learning_rate": 6.238354561411404e-05, "loss": 0.2594, "step": 35515 }, { "epoch": 2.877187297472456, "grad_norm": 0.075470469892025, "learning_rate": 6.237904496151943e-05, "loss": 0.2414, "step": 35516 }, { "epoch": 2.8772683084899544, "grad_norm": 0.0652003362774849, "learning_rate": 6.237454430892479e-05, "loss": 0.2424, "step": 35517 }, { "epoch": 2.877349319507453, "grad_norm": 0.08088836073875427, "learning_rate": 6.237004365633017e-05, "loss": 0.2411, "step": 35518 }, { "epoch": 2.8774303305249513, "grad_norm": 0.06947515904903412, "learning_rate": 6.236554300373555e-05, "loss": 0.2463, "step": 35519 }, { "epoch": 2.8775113415424496, "grad_norm": 0.07593680173158646, "learning_rate": 6.236104235114091e-05, "loss": 0.2444, "step": 35520 }, { "epoch": 2.8775923525599483, "grad_norm": 0.08024193346500397, "learning_rate": 6.235654169854629e-05, "loss": 0.214, "step": 35521 }, { "epoch": 2.8776733635774465, "grad_norm": 0.0644368976354599, "learning_rate": 6.235204104595167e-05, "loss": 0.2066, "step": 35522 }, { "epoch": 2.8777543745949448, "grad_norm": 0.06782899051904678, "learning_rate": 6.234754039335703e-05, "loss": 0.2366, "step": 35523 }, { "epoch": 2.8778353856124435, "grad_norm": 0.08508621901273727, "learning_rate": 6.234303974076241e-05, "loss": 0.2548, "step": 35524 }, { "epoch": 2.8779163966299417, "grad_norm": 0.0708160474896431, "learning_rate": 6.23385390881678e-05, "loss": 0.2563, "step": 35525 }, { "epoch": 2.87799740764744, "grad_norm": 0.08127661794424057, "learning_rate": 6.233403843557315e-05, "loss": 0.2414, "step": 35526 }, { "epoch": 2.8780784186649386, "grad_norm": 0.06155683100223541, "learning_rate": 6.232953778297853e-05, "loss": 0.2284, "step": 35527 }, { "epoch": 2.878159429682437, "grad_norm": 0.06013333424925804, "learning_rate": 6.232503713038392e-05, "loss": 0.2303, "step": 35528 }, { "epoch": 2.878240440699935, "grad_norm": 0.07108181715011597, "learning_rate": 6.232053647778928e-05, "loss": 0.2475, "step": 35529 }, { "epoch": 2.878321451717434, "grad_norm": 0.06939464062452316, "learning_rate": 6.231603582519465e-05, "loss": 0.2586, "step": 35530 }, { "epoch": 2.878402462734932, "grad_norm": 0.09219469130039215, "learning_rate": 6.231153517260004e-05, "loss": 0.2114, "step": 35531 }, { "epoch": 2.8784834737524303, "grad_norm": 0.0657503604888916, "learning_rate": 6.23070345200054e-05, "loss": 0.238, "step": 35532 }, { "epoch": 2.8785644847699285, "grad_norm": 0.08449986577033997, "learning_rate": 6.230253386741077e-05, "loss": 0.2588, "step": 35533 }, { "epoch": 2.8786454957874272, "grad_norm": 0.06352499127388, "learning_rate": 6.229803321481616e-05, "loss": 0.1936, "step": 35534 }, { "epoch": 2.8787265068049255, "grad_norm": 0.05862050503492355, "learning_rate": 6.229353256222152e-05, "loss": 0.2048, "step": 35535 }, { "epoch": 2.8788075178224237, "grad_norm": 0.08377757668495178, "learning_rate": 6.228903190962689e-05, "loss": 0.2343, "step": 35536 }, { "epoch": 2.878888528839922, "grad_norm": 0.07096932083368301, "learning_rate": 6.228453125703228e-05, "loss": 0.2631, "step": 35537 }, { "epoch": 2.8789695398574207, "grad_norm": 0.05568651854991913, "learning_rate": 6.228003060443765e-05, "loss": 0.2669, "step": 35538 }, { "epoch": 2.879050550874919, "grad_norm": 0.07287915050983429, "learning_rate": 6.227552995184303e-05, "loss": 0.2246, "step": 35539 }, { "epoch": 2.879131561892417, "grad_norm": 0.06835003197193146, "learning_rate": 6.22710292992484e-05, "loss": 0.2161, "step": 35540 }, { "epoch": 2.879212572909916, "grad_norm": 0.06841269880533218, "learning_rate": 6.226652864665377e-05, "loss": 0.221, "step": 35541 }, { "epoch": 2.879293583927414, "grad_norm": 0.07463429123163223, "learning_rate": 6.226202799405915e-05, "loss": 0.2398, "step": 35542 }, { "epoch": 2.8793745949449123, "grad_norm": 0.07194212824106216, "learning_rate": 6.225752734146452e-05, "loss": 0.2364, "step": 35543 }, { "epoch": 2.879455605962411, "grad_norm": 0.08712617307901382, "learning_rate": 6.225302668886989e-05, "loss": 0.21, "step": 35544 }, { "epoch": 2.8795366169799093, "grad_norm": 0.06648823618888855, "learning_rate": 6.224852603627527e-05, "loss": 0.1939, "step": 35545 }, { "epoch": 2.8796176279974075, "grad_norm": 0.08884168416261673, "learning_rate": 6.224402538368064e-05, "loss": 0.2639, "step": 35546 }, { "epoch": 2.879698639014906, "grad_norm": 0.06660214066505432, "learning_rate": 6.223952473108601e-05, "loss": 0.2142, "step": 35547 }, { "epoch": 2.8797796500324044, "grad_norm": 0.0755704939365387, "learning_rate": 6.223502407849139e-05, "loss": 0.2261, "step": 35548 }, { "epoch": 2.8798606610499027, "grad_norm": 0.07990830391645432, "learning_rate": 6.223052342589676e-05, "loss": 0.2378, "step": 35549 }, { "epoch": 2.8799416720674014, "grad_norm": 0.05775754526257515, "learning_rate": 6.222602277330213e-05, "loss": 0.2239, "step": 35550 }, { "epoch": 2.8800226830848996, "grad_norm": 0.06801251322031021, "learning_rate": 6.222152212070751e-05, "loss": 0.2262, "step": 35551 }, { "epoch": 2.880103694102398, "grad_norm": 0.0567367859184742, "learning_rate": 6.221702146811288e-05, "loss": 0.2115, "step": 35552 }, { "epoch": 2.8801847051198965, "grad_norm": 0.06986761093139648, "learning_rate": 6.221252081551826e-05, "loss": 0.2124, "step": 35553 }, { "epoch": 2.880265716137395, "grad_norm": 0.07065030932426453, "learning_rate": 6.220802016292363e-05, "loss": 0.2382, "step": 35554 }, { "epoch": 2.880346727154893, "grad_norm": 0.06928656995296478, "learning_rate": 6.2203519510329e-05, "loss": 0.2904, "step": 35555 }, { "epoch": 2.8804277381723913, "grad_norm": 0.0750303864479065, "learning_rate": 6.219901885773438e-05, "loss": 0.2133, "step": 35556 }, { "epoch": 2.88050874918989, "grad_norm": 0.0664948970079422, "learning_rate": 6.219451820513975e-05, "loss": 0.2041, "step": 35557 }, { "epoch": 2.880589760207388, "grad_norm": 0.07583000510931015, "learning_rate": 6.219001755254512e-05, "loss": 0.2363, "step": 35558 }, { "epoch": 2.8806707712248865, "grad_norm": 0.07268665730953217, "learning_rate": 6.21855168999505e-05, "loss": 0.2409, "step": 35559 }, { "epoch": 2.8807517822423847, "grad_norm": 0.06231937184929848, "learning_rate": 6.218101624735587e-05, "loss": 0.1869, "step": 35560 }, { "epoch": 2.8808327932598834, "grad_norm": 0.06016063317656517, "learning_rate": 6.217651559476124e-05, "loss": 0.2062, "step": 35561 }, { "epoch": 2.8809138042773816, "grad_norm": 0.0735088363289833, "learning_rate": 6.217201494216662e-05, "loss": 0.2389, "step": 35562 }, { "epoch": 2.88099481529488, "grad_norm": 0.07358022779226303, "learning_rate": 6.216751428957199e-05, "loss": 0.2366, "step": 35563 }, { "epoch": 2.8810758263123786, "grad_norm": 0.07338882982730865, "learning_rate": 6.216301363697737e-05, "loss": 0.2587, "step": 35564 }, { "epoch": 2.881156837329877, "grad_norm": 0.06316777318716049, "learning_rate": 6.215851298438274e-05, "loss": 0.2384, "step": 35565 }, { "epoch": 2.881237848347375, "grad_norm": 0.06496182829141617, "learning_rate": 6.215401233178811e-05, "loss": 0.2138, "step": 35566 }, { "epoch": 2.8813188593648738, "grad_norm": 0.07022936642169952, "learning_rate": 6.214951167919349e-05, "loss": 0.3083, "step": 35567 }, { "epoch": 2.881399870382372, "grad_norm": 0.062291331589221954, "learning_rate": 6.214501102659886e-05, "loss": 0.2285, "step": 35568 }, { "epoch": 2.8814808813998702, "grad_norm": 0.07278969138860703, "learning_rate": 6.214051037400423e-05, "loss": 0.2596, "step": 35569 }, { "epoch": 2.881561892417369, "grad_norm": 0.07649640738964081, "learning_rate": 6.21360097214096e-05, "loss": 0.228, "step": 35570 }, { "epoch": 2.881642903434867, "grad_norm": 0.061720188707113266, "learning_rate": 6.213150906881498e-05, "loss": 0.2165, "step": 35571 }, { "epoch": 2.8817239144523654, "grad_norm": 0.0904294103384018, "learning_rate": 6.212700841622035e-05, "loss": 0.298, "step": 35572 }, { "epoch": 2.881804925469864, "grad_norm": 0.07655181735754013, "learning_rate": 6.212250776362573e-05, "loss": 0.2519, "step": 35573 }, { "epoch": 2.8818859364873624, "grad_norm": 0.06292436271905899, "learning_rate": 6.21180071110311e-05, "loss": 0.2502, "step": 35574 }, { "epoch": 2.8819669475048606, "grad_norm": 0.06908023357391357, "learning_rate": 6.211350645843647e-05, "loss": 0.234, "step": 35575 }, { "epoch": 2.8820479585223593, "grad_norm": 0.06845370680093765, "learning_rate": 6.210900580584185e-05, "loss": 0.2498, "step": 35576 }, { "epoch": 2.8821289695398575, "grad_norm": 0.059849537909030914, "learning_rate": 6.210450515324722e-05, "loss": 0.2268, "step": 35577 }, { "epoch": 2.8822099805573558, "grad_norm": 0.061932940036058426, "learning_rate": 6.21000045006526e-05, "loss": 0.2153, "step": 35578 }, { "epoch": 2.882290991574854, "grad_norm": 0.06094523146748543, "learning_rate": 6.209550384805797e-05, "loss": 0.2275, "step": 35579 }, { "epoch": 2.8823720025923527, "grad_norm": 0.07336302846670151, "learning_rate": 6.209100319546334e-05, "loss": 0.2353, "step": 35580 }, { "epoch": 2.882453013609851, "grad_norm": 0.07363414019346237, "learning_rate": 6.208650254286872e-05, "loss": 0.2224, "step": 35581 }, { "epoch": 2.882534024627349, "grad_norm": 0.07803016155958176, "learning_rate": 6.208200189027409e-05, "loss": 0.2543, "step": 35582 }, { "epoch": 2.8826150356448474, "grad_norm": 0.06834989786148071, "learning_rate": 6.207750123767946e-05, "loss": 0.2238, "step": 35583 }, { "epoch": 2.882696046662346, "grad_norm": 0.0689224898815155, "learning_rate": 6.207300058508484e-05, "loss": 0.2557, "step": 35584 }, { "epoch": 2.8827770576798444, "grad_norm": 0.07135829329490662, "learning_rate": 6.206849993249021e-05, "loss": 0.2292, "step": 35585 }, { "epoch": 2.8828580686973426, "grad_norm": 0.05913795530796051, "learning_rate": 6.206399927989558e-05, "loss": 0.2147, "step": 35586 }, { "epoch": 2.8829390797148413, "grad_norm": 0.0672590509057045, "learning_rate": 6.205949862730096e-05, "loss": 0.2223, "step": 35587 }, { "epoch": 2.8830200907323396, "grad_norm": 0.10194092988967896, "learning_rate": 6.205499797470633e-05, "loss": 0.2594, "step": 35588 }, { "epoch": 2.883101101749838, "grad_norm": 0.08454529941082001, "learning_rate": 6.20504973221117e-05, "loss": 0.1887, "step": 35589 }, { "epoch": 2.8831821127673365, "grad_norm": 0.0754120796918869, "learning_rate": 6.204599666951708e-05, "loss": 0.2313, "step": 35590 }, { "epoch": 2.8832631237848347, "grad_norm": 0.0692853033542633, "learning_rate": 6.204149601692245e-05, "loss": 0.234, "step": 35591 }, { "epoch": 2.883344134802333, "grad_norm": 0.06850889325141907, "learning_rate": 6.203699536432783e-05, "loss": 0.2424, "step": 35592 }, { "epoch": 2.8834251458198317, "grad_norm": 0.06814275681972504, "learning_rate": 6.20324947117332e-05, "loss": 0.2283, "step": 35593 }, { "epoch": 2.88350615683733, "grad_norm": 0.07559818029403687, "learning_rate": 6.202799405913859e-05, "loss": 0.2431, "step": 35594 }, { "epoch": 2.883587167854828, "grad_norm": 0.06723404675722122, "learning_rate": 6.202349340654395e-05, "loss": 0.222, "step": 35595 }, { "epoch": 2.883668178872327, "grad_norm": 0.08386305719614029, "learning_rate": 6.201899275394932e-05, "loss": 0.2784, "step": 35596 }, { "epoch": 2.883749189889825, "grad_norm": 0.0725565105676651, "learning_rate": 6.201449210135471e-05, "loss": 0.2076, "step": 35597 }, { "epoch": 2.8838302009073233, "grad_norm": 0.05963285639882088, "learning_rate": 6.200999144876007e-05, "loss": 0.2076, "step": 35598 }, { "epoch": 2.883911211924822, "grad_norm": 0.08718138188123703, "learning_rate": 6.200549079616544e-05, "loss": 0.245, "step": 35599 }, { "epoch": 2.8839922229423203, "grad_norm": 0.058612942695617676, "learning_rate": 6.200099014357083e-05, "loss": 0.2009, "step": 35600 }, { "epoch": 2.8840732339598185, "grad_norm": 0.08298259228467941, "learning_rate": 6.199648949097619e-05, "loss": 0.2368, "step": 35601 }, { "epoch": 2.8841542449773168, "grad_norm": 0.07767398655414581, "learning_rate": 6.199198883838156e-05, "loss": 0.2454, "step": 35602 }, { "epoch": 2.8842352559948155, "grad_norm": 0.07122716307640076, "learning_rate": 6.198748818578695e-05, "loss": 0.2794, "step": 35603 }, { "epoch": 2.8843162670123137, "grad_norm": 0.06702442467212677, "learning_rate": 6.198298753319231e-05, "loss": 0.2153, "step": 35604 }, { "epoch": 2.884397278029812, "grad_norm": 0.07701624929904938, "learning_rate": 6.197848688059768e-05, "loss": 0.2566, "step": 35605 }, { "epoch": 2.88447828904731, "grad_norm": 0.0829615518450737, "learning_rate": 6.197398622800307e-05, "loss": 0.2451, "step": 35606 }, { "epoch": 2.884559300064809, "grad_norm": 0.0616791732609272, "learning_rate": 6.196948557540844e-05, "loss": 0.2841, "step": 35607 }, { "epoch": 2.884640311082307, "grad_norm": 0.06792052835226059, "learning_rate": 6.19649849228138e-05, "loss": 0.2464, "step": 35608 }, { "epoch": 2.8847213220998054, "grad_norm": 0.07558625936508179, "learning_rate": 6.196048427021919e-05, "loss": 0.2394, "step": 35609 }, { "epoch": 2.884802333117304, "grad_norm": 0.06835988163948059, "learning_rate": 6.195598361762456e-05, "loss": 0.2254, "step": 35610 }, { "epoch": 2.8848833441348023, "grad_norm": 0.07234005630016327, "learning_rate": 6.195148296502992e-05, "loss": 0.2331, "step": 35611 }, { "epoch": 2.8849643551523005, "grad_norm": 0.10257917642593384, "learning_rate": 6.194698231243531e-05, "loss": 0.2124, "step": 35612 }, { "epoch": 2.8850453661697992, "grad_norm": 0.05626985803246498, "learning_rate": 6.194248165984069e-05, "loss": 0.2305, "step": 35613 }, { "epoch": 2.8851263771872975, "grad_norm": 0.08011636137962341, "learning_rate": 6.193798100724605e-05, "loss": 0.3001, "step": 35614 }, { "epoch": 2.8852073882047957, "grad_norm": 0.07734625786542892, "learning_rate": 6.193348035465143e-05, "loss": 0.2325, "step": 35615 }, { "epoch": 2.8852883992222944, "grad_norm": 0.07830455154180527, "learning_rate": 6.19289797020568e-05, "loss": 0.2291, "step": 35616 }, { "epoch": 2.8853694102397927, "grad_norm": 0.0644298568367958, "learning_rate": 6.192447904946217e-05, "loss": 0.2332, "step": 35617 }, { "epoch": 2.885450421257291, "grad_norm": 0.06490510702133179, "learning_rate": 6.191997839686755e-05, "loss": 0.2059, "step": 35618 }, { "epoch": 2.8855314322747896, "grad_norm": 0.0631365105509758, "learning_rate": 6.191547774427293e-05, "loss": 0.2456, "step": 35619 }, { "epoch": 2.885612443292288, "grad_norm": 0.07085856050252914, "learning_rate": 6.19109770916783e-05, "loss": 0.2167, "step": 35620 }, { "epoch": 2.885693454309786, "grad_norm": 0.07526741921901703, "learning_rate": 6.190647643908367e-05, "loss": 0.2384, "step": 35621 }, { "epoch": 2.8857744653272848, "grad_norm": 0.0649634301662445, "learning_rate": 6.190197578648905e-05, "loss": 0.2464, "step": 35622 }, { "epoch": 2.885855476344783, "grad_norm": 0.06935419142246246, "learning_rate": 6.189747513389442e-05, "loss": 0.2064, "step": 35623 }, { "epoch": 2.8859364873622813, "grad_norm": 0.07325664162635803, "learning_rate": 6.18929744812998e-05, "loss": 0.2449, "step": 35624 }, { "epoch": 2.8860174983797795, "grad_norm": 0.06494774669408798, "learning_rate": 6.188847382870517e-05, "loss": 0.2535, "step": 35625 }, { "epoch": 2.886098509397278, "grad_norm": 0.08057263493537903, "learning_rate": 6.188397317611054e-05, "loss": 0.2219, "step": 35626 }, { "epoch": 2.8861795204147764, "grad_norm": 0.07165004312992096, "learning_rate": 6.187947252351592e-05, "loss": 0.2221, "step": 35627 }, { "epoch": 2.8862605314322747, "grad_norm": 0.07129422575235367, "learning_rate": 6.187497187092129e-05, "loss": 0.2189, "step": 35628 }, { "epoch": 2.886341542449773, "grad_norm": 0.08567812293767929, "learning_rate": 6.187047121832666e-05, "loss": 0.2296, "step": 35629 }, { "epoch": 2.8864225534672716, "grad_norm": 0.0857577994465828, "learning_rate": 6.186597056573204e-05, "loss": 0.2672, "step": 35630 }, { "epoch": 2.88650356448477, "grad_norm": 0.09535958617925644, "learning_rate": 6.186146991313741e-05, "loss": 0.2712, "step": 35631 }, { "epoch": 2.886584575502268, "grad_norm": 0.07217075675725937, "learning_rate": 6.185696926054278e-05, "loss": 0.2839, "step": 35632 }, { "epoch": 2.886665586519767, "grad_norm": 0.07447575777769089, "learning_rate": 6.185246860794816e-05, "loss": 0.259, "step": 35633 }, { "epoch": 2.886746597537265, "grad_norm": 0.06412901729345322, "learning_rate": 6.184796795535353e-05, "loss": 0.2066, "step": 35634 }, { "epoch": 2.8868276085547633, "grad_norm": 0.06986956298351288, "learning_rate": 6.18434673027589e-05, "loss": 0.2538, "step": 35635 }, { "epoch": 2.886908619572262, "grad_norm": 0.08456194400787354, "learning_rate": 6.183896665016428e-05, "loss": 0.2918, "step": 35636 }, { "epoch": 2.88698963058976, "grad_norm": 0.07183456420898438, "learning_rate": 6.183446599756965e-05, "loss": 0.2643, "step": 35637 }, { "epoch": 2.8870706416072585, "grad_norm": 0.07418840378522873, "learning_rate": 6.182996534497503e-05, "loss": 0.2165, "step": 35638 }, { "epoch": 2.887151652624757, "grad_norm": 0.06666819006204605, "learning_rate": 6.18254646923804e-05, "loss": 0.2132, "step": 35639 }, { "epoch": 2.8872326636422554, "grad_norm": 0.06616838276386261, "learning_rate": 6.182096403978577e-05, "loss": 0.2479, "step": 35640 }, { "epoch": 2.8873136746597536, "grad_norm": 0.07370367646217346, "learning_rate": 6.181646338719115e-05, "loss": 0.2395, "step": 35641 }, { "epoch": 2.8873946856772523, "grad_norm": 0.07266636192798615, "learning_rate": 6.181196273459652e-05, "loss": 0.212, "step": 35642 }, { "epoch": 2.8874756966947506, "grad_norm": 0.06336379051208496, "learning_rate": 6.18074620820019e-05, "loss": 0.2285, "step": 35643 }, { "epoch": 2.887556707712249, "grad_norm": 0.07010842114686966, "learning_rate": 6.180296142940727e-05, "loss": 0.2644, "step": 35644 }, { "epoch": 2.8876377187297475, "grad_norm": 0.07312007248401642, "learning_rate": 6.179846077681264e-05, "loss": 0.2542, "step": 35645 }, { "epoch": 2.8877187297472457, "grad_norm": 0.05939409136772156, "learning_rate": 6.179396012421801e-05, "loss": 0.2296, "step": 35646 }, { "epoch": 2.887799740764744, "grad_norm": 0.07634563744068146, "learning_rate": 6.178945947162339e-05, "loss": 0.2464, "step": 35647 }, { "epoch": 2.8878807517822422, "grad_norm": 0.08463514596223831, "learning_rate": 6.178495881902876e-05, "loss": 0.2719, "step": 35648 }, { "epoch": 2.887961762799741, "grad_norm": 0.06642767786979675, "learning_rate": 6.178045816643413e-05, "loss": 0.2499, "step": 35649 }, { "epoch": 2.888042773817239, "grad_norm": 0.07932719588279724, "learning_rate": 6.177595751383951e-05, "loss": 0.2366, "step": 35650 }, { "epoch": 2.8881237848347374, "grad_norm": 0.06755657494068146, "learning_rate": 6.177145686124488e-05, "loss": 0.2319, "step": 35651 }, { "epoch": 2.8882047958522357, "grad_norm": 0.07422051578760147, "learning_rate": 6.176695620865026e-05, "loss": 0.2295, "step": 35652 }, { "epoch": 2.8882858068697344, "grad_norm": 0.060580454766750336, "learning_rate": 6.176245555605563e-05, "loss": 0.2221, "step": 35653 }, { "epoch": 2.8883668178872326, "grad_norm": 0.06504825502634048, "learning_rate": 6.1757954903461e-05, "loss": 0.2375, "step": 35654 }, { "epoch": 2.888447828904731, "grad_norm": 0.060391828417778015, "learning_rate": 6.175345425086638e-05, "loss": 0.1846, "step": 35655 }, { "epoch": 2.8885288399222295, "grad_norm": 0.07530611753463745, "learning_rate": 6.174895359827175e-05, "loss": 0.236, "step": 35656 }, { "epoch": 2.8886098509397278, "grad_norm": 0.060225117951631546, "learning_rate": 6.174445294567712e-05, "loss": 0.2358, "step": 35657 }, { "epoch": 2.888690861957226, "grad_norm": 0.05687911808490753, "learning_rate": 6.17399522930825e-05, "loss": 0.2323, "step": 35658 }, { "epoch": 2.8887718729747247, "grad_norm": 0.06379065662622452, "learning_rate": 6.173545164048787e-05, "loss": 0.2152, "step": 35659 }, { "epoch": 2.888852883992223, "grad_norm": 0.06311369687318802, "learning_rate": 6.173095098789324e-05, "loss": 0.2269, "step": 35660 }, { "epoch": 2.888933895009721, "grad_norm": 0.08408977836370468, "learning_rate": 6.172645033529862e-05, "loss": 0.237, "step": 35661 }, { "epoch": 2.88901490602722, "grad_norm": 0.07219025492668152, "learning_rate": 6.172194968270399e-05, "loss": 0.2533, "step": 35662 }, { "epoch": 2.889095917044718, "grad_norm": 0.08540571480989456, "learning_rate": 6.171744903010937e-05, "loss": 0.2353, "step": 35663 }, { "epoch": 2.8891769280622164, "grad_norm": 0.07214327901601791, "learning_rate": 6.171294837751474e-05, "loss": 0.263, "step": 35664 }, { "epoch": 2.889257939079715, "grad_norm": 0.07148697227239609, "learning_rate": 6.170844772492011e-05, "loss": 0.2214, "step": 35665 }, { "epoch": 2.8893389500972133, "grad_norm": 0.06975147128105164, "learning_rate": 6.170394707232549e-05, "loss": 0.2098, "step": 35666 }, { "epoch": 2.8894199611147116, "grad_norm": 0.07287190109491348, "learning_rate": 6.169944641973086e-05, "loss": 0.2221, "step": 35667 }, { "epoch": 2.8895009721322102, "grad_norm": 0.06442992389202118, "learning_rate": 6.169494576713623e-05, "loss": 0.2223, "step": 35668 }, { "epoch": 2.8895819831497085, "grad_norm": 0.0819707065820694, "learning_rate": 6.169044511454161e-05, "loss": 0.1962, "step": 35669 }, { "epoch": 2.8896629941672067, "grad_norm": 0.08242817223072052, "learning_rate": 6.168594446194698e-05, "loss": 0.2302, "step": 35670 }, { "epoch": 2.889744005184705, "grad_norm": 0.08381433039903641, "learning_rate": 6.168144380935235e-05, "loss": 0.2477, "step": 35671 }, { "epoch": 2.8898250162022032, "grad_norm": 0.06956245005130768, "learning_rate": 6.167694315675774e-05, "loss": 0.2422, "step": 35672 }, { "epoch": 2.889906027219702, "grad_norm": 0.06494015455245972, "learning_rate": 6.167244250416311e-05, "loss": 0.2564, "step": 35673 }, { "epoch": 2.8899870382372, "grad_norm": 0.06729976087808609, "learning_rate": 6.166794185156848e-05, "loss": 0.2336, "step": 35674 }, { "epoch": 2.8900680492546984, "grad_norm": 0.06940561532974243, "learning_rate": 6.166344119897386e-05, "loss": 0.2058, "step": 35675 }, { "epoch": 2.890149060272197, "grad_norm": 0.06687884032726288, "learning_rate": 6.165894054637924e-05, "loss": 0.2139, "step": 35676 }, { "epoch": 2.8902300712896953, "grad_norm": 0.07727450132369995, "learning_rate": 6.16544398937846e-05, "loss": 0.2432, "step": 35677 }, { "epoch": 2.8903110823071936, "grad_norm": 0.06999623775482178, "learning_rate": 6.164993924118998e-05, "loss": 0.2718, "step": 35678 }, { "epoch": 2.8903920933246923, "grad_norm": 0.07031282782554626, "learning_rate": 6.164543858859536e-05, "loss": 0.2262, "step": 35679 }, { "epoch": 2.8904731043421905, "grad_norm": 0.07029929012060165, "learning_rate": 6.164093793600072e-05, "loss": 0.2375, "step": 35680 }, { "epoch": 2.8905541153596888, "grad_norm": 0.08660591393709183, "learning_rate": 6.16364372834061e-05, "loss": 0.2649, "step": 35681 }, { "epoch": 2.8906351263771874, "grad_norm": 0.08125916123390198, "learning_rate": 6.163193663081148e-05, "loss": 0.2546, "step": 35682 }, { "epoch": 2.8907161373946857, "grad_norm": 0.07145148515701294, "learning_rate": 6.162743597821684e-05, "loss": 0.2218, "step": 35683 }, { "epoch": 2.890797148412184, "grad_norm": 0.0741056427359581, "learning_rate": 6.162293532562222e-05, "loss": 0.2769, "step": 35684 }, { "epoch": 2.8908781594296826, "grad_norm": 0.08147289603948593, "learning_rate": 6.16184346730276e-05, "loss": 0.2197, "step": 35685 }, { "epoch": 2.890959170447181, "grad_norm": 0.06483854353427887, "learning_rate": 6.161393402043296e-05, "loss": 0.2311, "step": 35686 }, { "epoch": 2.891040181464679, "grad_norm": 0.06676837056875229, "learning_rate": 6.160943336783835e-05, "loss": 0.2453, "step": 35687 }, { "epoch": 2.891121192482178, "grad_norm": 0.07340171933174133, "learning_rate": 6.160493271524372e-05, "loss": 0.2072, "step": 35688 }, { "epoch": 2.891202203499676, "grad_norm": 0.06559917330741882, "learning_rate": 6.160043206264908e-05, "loss": 0.2415, "step": 35689 }, { "epoch": 2.8912832145171743, "grad_norm": 0.058529727160930634, "learning_rate": 6.159593141005447e-05, "loss": 0.2363, "step": 35690 }, { "epoch": 2.891364225534673, "grad_norm": 0.06556835025548935, "learning_rate": 6.159143075745984e-05, "loss": 0.235, "step": 35691 }, { "epoch": 2.8914452365521712, "grad_norm": 0.061084944754838943, "learning_rate": 6.15869301048652e-05, "loss": 0.2404, "step": 35692 }, { "epoch": 2.8915262475696695, "grad_norm": 0.06845731288194656, "learning_rate": 6.158242945227059e-05, "loss": 0.2403, "step": 35693 }, { "epoch": 2.8916072585871677, "grad_norm": 0.057648856192827225, "learning_rate": 6.157792879967596e-05, "loss": 0.213, "step": 35694 }, { "epoch": 2.891688269604666, "grad_norm": 0.07292310148477554, "learning_rate": 6.157342814708132e-05, "loss": 0.224, "step": 35695 }, { "epoch": 2.8917692806221647, "grad_norm": 0.06565499305725098, "learning_rate": 6.156892749448671e-05, "loss": 0.2563, "step": 35696 }, { "epoch": 2.891850291639663, "grad_norm": 0.0779080018401146, "learning_rate": 6.156442684189208e-05, "loss": 0.232, "step": 35697 }, { "epoch": 2.891931302657161, "grad_norm": 0.07010319828987122, "learning_rate": 6.155992618929746e-05, "loss": 0.2262, "step": 35698 }, { "epoch": 2.89201231367466, "grad_norm": 0.06465441733598709, "learning_rate": 6.155542553670283e-05, "loss": 0.2268, "step": 35699 }, { "epoch": 2.892093324692158, "grad_norm": 0.08106248080730438, "learning_rate": 6.15509248841082e-05, "loss": 0.241, "step": 35700 }, { "epoch": 2.8921743357096563, "grad_norm": 0.06921873986721039, "learning_rate": 6.154642423151358e-05, "loss": 0.2296, "step": 35701 }, { "epoch": 2.892255346727155, "grad_norm": 0.07705320417881012, "learning_rate": 6.154192357891895e-05, "loss": 0.253, "step": 35702 }, { "epoch": 2.8923363577446533, "grad_norm": 0.07825233042240143, "learning_rate": 6.153742292632432e-05, "loss": 0.2568, "step": 35703 }, { "epoch": 2.8924173687621515, "grad_norm": 0.07463907450437546, "learning_rate": 6.15329222737297e-05, "loss": 0.2506, "step": 35704 }, { "epoch": 2.89249837977965, "grad_norm": 0.07520240545272827, "learning_rate": 6.152842162113507e-05, "loss": 0.2678, "step": 35705 }, { "epoch": 2.8925793907971484, "grad_norm": 0.07687666267156601, "learning_rate": 6.152392096854044e-05, "loss": 0.264, "step": 35706 }, { "epoch": 2.8926604018146467, "grad_norm": 0.07741294801235199, "learning_rate": 6.151942031594582e-05, "loss": 0.2225, "step": 35707 }, { "epoch": 2.8927414128321454, "grad_norm": 0.07104411721229553, "learning_rate": 6.151491966335119e-05, "loss": 0.2285, "step": 35708 }, { "epoch": 2.8928224238496436, "grad_norm": 0.07104265689849854, "learning_rate": 6.151041901075656e-05, "loss": 0.2319, "step": 35709 }, { "epoch": 2.892903434867142, "grad_norm": 0.0759245902299881, "learning_rate": 6.150591835816194e-05, "loss": 0.2577, "step": 35710 }, { "epoch": 2.8929844458846405, "grad_norm": 0.06897891312837601, "learning_rate": 6.150141770556731e-05, "loss": 0.2196, "step": 35711 }, { "epoch": 2.893065456902139, "grad_norm": 0.07628782093524933, "learning_rate": 6.149691705297269e-05, "loss": 0.2444, "step": 35712 }, { "epoch": 2.893146467919637, "grad_norm": 0.06364277005195618, "learning_rate": 6.149241640037806e-05, "loss": 0.2097, "step": 35713 }, { "epoch": 2.8932274789371357, "grad_norm": 0.07740595191717148, "learning_rate": 6.148791574778343e-05, "loss": 0.2185, "step": 35714 }, { "epoch": 2.893308489954634, "grad_norm": 0.07443327456712723, "learning_rate": 6.14834150951888e-05, "loss": 0.2131, "step": 35715 }, { "epoch": 2.893389500972132, "grad_norm": 0.0595758818089962, "learning_rate": 6.147891444259418e-05, "loss": 0.2057, "step": 35716 }, { "epoch": 2.8934705119896305, "grad_norm": 0.07437099516391754, "learning_rate": 6.147441378999955e-05, "loss": 0.2699, "step": 35717 }, { "epoch": 2.8935515230071287, "grad_norm": 0.08497471362352371, "learning_rate": 6.146991313740493e-05, "loss": 0.2037, "step": 35718 }, { "epoch": 2.8936325340246274, "grad_norm": 0.07126790285110474, "learning_rate": 6.14654124848103e-05, "loss": 0.2333, "step": 35719 }, { "epoch": 2.8937135450421256, "grad_norm": 0.07110083848237991, "learning_rate": 6.146091183221567e-05, "loss": 0.2206, "step": 35720 }, { "epoch": 2.893794556059624, "grad_norm": 0.07824023067951202, "learning_rate": 6.145641117962105e-05, "loss": 0.239, "step": 35721 }, { "epoch": 2.8938755670771226, "grad_norm": 0.0700746402144432, "learning_rate": 6.145191052702642e-05, "loss": 0.2125, "step": 35722 }, { "epoch": 2.893956578094621, "grad_norm": 0.06137807294726372, "learning_rate": 6.14474098744318e-05, "loss": 0.2615, "step": 35723 }, { "epoch": 2.894037589112119, "grad_norm": 0.0737156793475151, "learning_rate": 6.144290922183717e-05, "loss": 0.2481, "step": 35724 }, { "epoch": 2.8941186001296177, "grad_norm": 0.06886066496372223, "learning_rate": 6.143840856924254e-05, "loss": 0.2657, "step": 35725 }, { "epoch": 2.894199611147116, "grad_norm": 0.08199062943458557, "learning_rate": 6.143390791664792e-05, "loss": 0.2282, "step": 35726 }, { "epoch": 2.8942806221646142, "grad_norm": 0.053236592561006546, "learning_rate": 6.142940726405329e-05, "loss": 0.2214, "step": 35727 }, { "epoch": 2.894361633182113, "grad_norm": 0.08542463928461075, "learning_rate": 6.142490661145866e-05, "loss": 0.2405, "step": 35728 }, { "epoch": 2.894442644199611, "grad_norm": 0.07335537672042847, "learning_rate": 6.142040595886404e-05, "loss": 0.2456, "step": 35729 }, { "epoch": 2.8945236552171094, "grad_norm": 0.06895040720701218, "learning_rate": 6.141590530626941e-05, "loss": 0.2405, "step": 35730 }, { "epoch": 2.894604666234608, "grad_norm": 0.0709012895822525, "learning_rate": 6.141140465367478e-05, "loss": 0.223, "step": 35731 }, { "epoch": 2.8946856772521063, "grad_norm": 0.058473143726587296, "learning_rate": 6.140690400108016e-05, "loss": 0.2292, "step": 35732 }, { "epoch": 2.8947666882696046, "grad_norm": 0.07192769646644592, "learning_rate": 6.140240334848553e-05, "loss": 0.2443, "step": 35733 }, { "epoch": 2.8948476992871033, "grad_norm": 0.0589325875043869, "learning_rate": 6.13979026958909e-05, "loss": 0.2225, "step": 35734 }, { "epoch": 2.8949287103046015, "grad_norm": 0.06955495476722717, "learning_rate": 6.139340204329628e-05, "loss": 0.2651, "step": 35735 }, { "epoch": 2.8950097213220998, "grad_norm": 0.07267536222934723, "learning_rate": 6.138890139070165e-05, "loss": 0.2517, "step": 35736 }, { "epoch": 2.8950907323395985, "grad_norm": 0.06132880225777626, "learning_rate": 6.138440073810703e-05, "loss": 0.2315, "step": 35737 }, { "epoch": 2.8951717433570967, "grad_norm": 0.08604191243648529, "learning_rate": 6.13799000855124e-05, "loss": 0.2307, "step": 35738 }, { "epoch": 2.895252754374595, "grad_norm": 0.0703839436173439, "learning_rate": 6.137539943291777e-05, "loss": 0.2391, "step": 35739 }, { "epoch": 2.895333765392093, "grad_norm": 0.0668756365776062, "learning_rate": 6.137089878032315e-05, "loss": 0.2407, "step": 35740 }, { "epoch": 2.8954147764095914, "grad_norm": 0.08132848143577576, "learning_rate": 6.136639812772852e-05, "loss": 0.226, "step": 35741 }, { "epoch": 2.89549578742709, "grad_norm": 0.06000250205397606, "learning_rate": 6.136189747513391e-05, "loss": 0.227, "step": 35742 }, { "epoch": 2.8955767984445884, "grad_norm": 0.07280347496271133, "learning_rate": 6.135739682253927e-05, "loss": 0.243, "step": 35743 }, { "epoch": 2.8956578094620866, "grad_norm": 0.05585459619760513, "learning_rate": 6.135289616994464e-05, "loss": 0.1944, "step": 35744 }, { "epoch": 2.8957388204795853, "grad_norm": 0.07624555379152298, "learning_rate": 6.134839551735003e-05, "loss": 0.2585, "step": 35745 }, { "epoch": 2.8958198314970836, "grad_norm": 0.08827781677246094, "learning_rate": 6.134389486475539e-05, "loss": 0.2441, "step": 35746 }, { "epoch": 2.895900842514582, "grad_norm": 0.08926903456449509, "learning_rate": 6.133939421216076e-05, "loss": 0.25, "step": 35747 }, { "epoch": 2.8959818535320805, "grad_norm": 0.06952491402626038, "learning_rate": 6.133489355956615e-05, "loss": 0.2142, "step": 35748 }, { "epoch": 2.8960628645495787, "grad_norm": 0.07767337560653687, "learning_rate": 6.133039290697151e-05, "loss": 0.2297, "step": 35749 }, { "epoch": 2.896143875567077, "grad_norm": 0.0816320851445198, "learning_rate": 6.132589225437688e-05, "loss": 0.2429, "step": 35750 }, { "epoch": 2.8962248865845757, "grad_norm": 0.06641452759504318, "learning_rate": 6.132139160178227e-05, "loss": 0.235, "step": 35751 }, { "epoch": 2.896305897602074, "grad_norm": 0.0660315752029419, "learning_rate": 6.131689094918763e-05, "loss": 0.2504, "step": 35752 }, { "epoch": 2.896386908619572, "grad_norm": 0.09795043617486954, "learning_rate": 6.131239029659302e-05, "loss": 0.2725, "step": 35753 }, { "epoch": 2.896467919637071, "grad_norm": 0.07718279212713242, "learning_rate": 6.130788964399839e-05, "loss": 0.2823, "step": 35754 }, { "epoch": 2.896548930654569, "grad_norm": 0.07075318694114685, "learning_rate": 6.130338899140375e-05, "loss": 0.2465, "step": 35755 }, { "epoch": 2.8966299416720673, "grad_norm": 0.06218565255403519, "learning_rate": 6.129888833880914e-05, "loss": 0.2148, "step": 35756 }, { "epoch": 2.896710952689566, "grad_norm": 0.07949451357126236, "learning_rate": 6.129438768621451e-05, "loss": 0.2384, "step": 35757 }, { "epoch": 2.8967919637070643, "grad_norm": 0.053814273327589035, "learning_rate": 6.128988703361987e-05, "loss": 0.2206, "step": 35758 }, { "epoch": 2.8968729747245625, "grad_norm": 0.07167844474315643, "learning_rate": 6.128538638102526e-05, "loss": 0.2381, "step": 35759 }, { "epoch": 2.8969539857420608, "grad_norm": 0.07708174735307693, "learning_rate": 6.128088572843063e-05, "loss": 0.232, "step": 35760 }, { "epoch": 2.8970349967595594, "grad_norm": 0.06454035639762878, "learning_rate": 6.127638507583599e-05, "loss": 0.2296, "step": 35761 }, { "epoch": 2.8971160077770577, "grad_norm": 0.08612510561943054, "learning_rate": 6.127188442324138e-05, "loss": 0.2265, "step": 35762 }, { "epoch": 2.897197018794556, "grad_norm": 0.06060539931058884, "learning_rate": 6.126738377064675e-05, "loss": 0.2063, "step": 35763 }, { "epoch": 2.897278029812054, "grad_norm": 0.07030733674764633, "learning_rate": 6.126288311805211e-05, "loss": 0.2411, "step": 35764 }, { "epoch": 2.897359040829553, "grad_norm": 0.06264536082744598, "learning_rate": 6.12583824654575e-05, "loss": 0.2182, "step": 35765 }, { "epoch": 2.897440051847051, "grad_norm": 0.08044012635946274, "learning_rate": 6.125388181286287e-05, "loss": 0.2497, "step": 35766 }, { "epoch": 2.8975210628645494, "grad_norm": 0.06459780782461166, "learning_rate": 6.124938116026823e-05, "loss": 0.2542, "step": 35767 }, { "epoch": 2.897602073882048, "grad_norm": 0.0798090472817421, "learning_rate": 6.124488050767362e-05, "loss": 0.2075, "step": 35768 }, { "epoch": 2.8976830848995463, "grad_norm": 0.0677122250199318, "learning_rate": 6.1240379855079e-05, "loss": 0.1975, "step": 35769 }, { "epoch": 2.8977640959170445, "grad_norm": 0.06713425368070602, "learning_rate": 6.123587920248435e-05, "loss": 0.2271, "step": 35770 }, { "epoch": 2.8978451069345432, "grad_norm": 0.09057461470365524, "learning_rate": 6.123137854988974e-05, "loss": 0.2581, "step": 35771 }, { "epoch": 2.8979261179520415, "grad_norm": 0.06432762742042542, "learning_rate": 6.122687789729512e-05, "loss": 0.2248, "step": 35772 }, { "epoch": 2.8980071289695397, "grad_norm": 0.07236693799495697, "learning_rate": 6.122237724470048e-05, "loss": 0.2248, "step": 35773 }, { "epoch": 2.8980881399870384, "grad_norm": 0.06687065213918686, "learning_rate": 6.121787659210586e-05, "loss": 0.1849, "step": 35774 }, { "epoch": 2.8981691510045366, "grad_norm": 0.05940384045243263, "learning_rate": 6.121337593951124e-05, "loss": 0.2392, "step": 35775 }, { "epoch": 2.898250162022035, "grad_norm": 0.07831598073244095, "learning_rate": 6.12088752869166e-05, "loss": 0.2467, "step": 35776 }, { "epoch": 2.8983311730395336, "grad_norm": 0.08034084737300873, "learning_rate": 6.120437463432198e-05, "loss": 0.2475, "step": 35777 }, { "epoch": 2.898412184057032, "grad_norm": 0.06301796436309814, "learning_rate": 6.119987398172736e-05, "loss": 0.2021, "step": 35778 }, { "epoch": 2.89849319507453, "grad_norm": 0.0662277564406395, "learning_rate": 6.119537332913273e-05, "loss": 0.2191, "step": 35779 }, { "epoch": 2.8985742060920288, "grad_norm": 0.06957314908504486, "learning_rate": 6.11908726765381e-05, "loss": 0.2556, "step": 35780 }, { "epoch": 2.898655217109527, "grad_norm": 0.05701505020260811, "learning_rate": 6.118637202394348e-05, "loss": 0.244, "step": 35781 }, { "epoch": 2.8987362281270252, "grad_norm": 0.06337479501962662, "learning_rate": 6.118187137134885e-05, "loss": 0.222, "step": 35782 }, { "epoch": 2.8988172391445235, "grad_norm": 0.07498802244663239, "learning_rate": 6.117737071875422e-05, "loss": 0.2608, "step": 35783 }, { "epoch": 2.898898250162022, "grad_norm": 0.06363961100578308, "learning_rate": 6.11728700661596e-05, "loss": 0.2693, "step": 35784 }, { "epoch": 2.8989792611795204, "grad_norm": 0.058687712997198105, "learning_rate": 6.116836941356497e-05, "loss": 0.2206, "step": 35785 }, { "epoch": 2.8990602721970187, "grad_norm": 0.07359886914491653, "learning_rate": 6.116386876097035e-05, "loss": 0.2365, "step": 35786 }, { "epoch": 2.899141283214517, "grad_norm": 0.0818164125084877, "learning_rate": 6.115936810837572e-05, "loss": 0.2434, "step": 35787 }, { "epoch": 2.8992222942320156, "grad_norm": 0.06891828030347824, "learning_rate": 6.115486745578109e-05, "loss": 0.2451, "step": 35788 }, { "epoch": 2.899303305249514, "grad_norm": 0.0845608338713646, "learning_rate": 6.115036680318647e-05, "loss": 0.2512, "step": 35789 }, { "epoch": 2.899384316267012, "grad_norm": 0.07846824079751968, "learning_rate": 6.114586615059184e-05, "loss": 0.206, "step": 35790 }, { "epoch": 2.899465327284511, "grad_norm": 0.07462865859270096, "learning_rate": 6.114136549799721e-05, "loss": 0.2471, "step": 35791 }, { "epoch": 2.899546338302009, "grad_norm": 0.06899303197860718, "learning_rate": 6.113686484540259e-05, "loss": 0.223, "step": 35792 }, { "epoch": 2.8996273493195073, "grad_norm": 0.07402803003787994, "learning_rate": 6.113236419280796e-05, "loss": 0.2511, "step": 35793 }, { "epoch": 2.899708360337006, "grad_norm": 0.06916865706443787, "learning_rate": 6.112786354021333e-05, "loss": 0.226, "step": 35794 }, { "epoch": 2.899789371354504, "grad_norm": 0.07550256699323654, "learning_rate": 6.112336288761871e-05, "loss": 0.2197, "step": 35795 }, { "epoch": 2.8998703823720025, "grad_norm": 0.06550214439630508, "learning_rate": 6.111886223502408e-05, "loss": 0.2392, "step": 35796 }, { "epoch": 2.899951393389501, "grad_norm": 0.09479155391454697, "learning_rate": 6.111436158242946e-05, "loss": 0.2623, "step": 35797 }, { "epoch": 2.9000324044069994, "grad_norm": 0.06591324508190155, "learning_rate": 6.110986092983483e-05, "loss": 0.2439, "step": 35798 }, { "epoch": 2.9001134154244976, "grad_norm": 0.07462452352046967, "learning_rate": 6.11053602772402e-05, "loss": 0.2639, "step": 35799 }, { "epoch": 2.9001944264419963, "grad_norm": 0.0622115395963192, "learning_rate": 6.110085962464558e-05, "loss": 0.2001, "step": 35800 }, { "epoch": 2.9002754374594946, "grad_norm": 0.07298856973648071, "learning_rate": 6.109635897205095e-05, "loss": 0.241, "step": 35801 }, { "epoch": 2.900356448476993, "grad_norm": 0.07199979573488235, "learning_rate": 6.109185831945632e-05, "loss": 0.2375, "step": 35802 }, { "epoch": 2.9004374594944915, "grad_norm": 0.08813222497701645, "learning_rate": 6.10873576668617e-05, "loss": 0.2208, "step": 35803 }, { "epoch": 2.9005184705119897, "grad_norm": 0.06779766082763672, "learning_rate": 6.108285701426707e-05, "loss": 0.2225, "step": 35804 }, { "epoch": 2.900599481529488, "grad_norm": 0.07290019094944, "learning_rate": 6.107835636167244e-05, "loss": 0.2221, "step": 35805 }, { "epoch": 2.9006804925469862, "grad_norm": 0.07287006825208664, "learning_rate": 6.107385570907782e-05, "loss": 0.2547, "step": 35806 }, { "epoch": 2.900761503564485, "grad_norm": 0.07200240343809128, "learning_rate": 6.106935505648319e-05, "loss": 0.2173, "step": 35807 }, { "epoch": 2.900842514581983, "grad_norm": 0.06275129318237305, "learning_rate": 6.106485440388856e-05, "loss": 0.2357, "step": 35808 }, { "epoch": 2.9009235255994814, "grad_norm": 0.07564835995435715, "learning_rate": 6.106035375129394e-05, "loss": 0.2277, "step": 35809 }, { "epoch": 2.9010045366169797, "grad_norm": 0.05272950977087021, "learning_rate": 6.105585309869931e-05, "loss": 0.191, "step": 35810 }, { "epoch": 2.9010855476344783, "grad_norm": 0.0691477358341217, "learning_rate": 6.10513524461047e-05, "loss": 0.2281, "step": 35811 }, { "epoch": 2.9011665586519766, "grad_norm": 0.07196378707885742, "learning_rate": 6.104685179351006e-05, "loss": 0.2087, "step": 35812 }, { "epoch": 2.901247569669475, "grad_norm": 0.059849925339221954, "learning_rate": 6.104235114091543e-05, "loss": 0.2459, "step": 35813 }, { "epoch": 2.9013285806869735, "grad_norm": 0.0893348902463913, "learning_rate": 6.103785048832082e-05, "loss": 0.2147, "step": 35814 }, { "epoch": 2.9014095917044718, "grad_norm": 0.06526979058980942, "learning_rate": 6.103334983572618e-05, "loss": 0.2392, "step": 35815 }, { "epoch": 2.90149060272197, "grad_norm": 0.06764549016952515, "learning_rate": 6.1028849183131554e-05, "loss": 0.2748, "step": 35816 }, { "epoch": 2.9015716137394687, "grad_norm": 0.07543917000293732, "learning_rate": 6.1024348530536934e-05, "loss": 0.2473, "step": 35817 }, { "epoch": 2.901652624756967, "grad_norm": 0.0789756327867508, "learning_rate": 6.101984787794231e-05, "loss": 0.2554, "step": 35818 }, { "epoch": 2.901733635774465, "grad_norm": 0.05799851939082146, "learning_rate": 6.1015347225347675e-05, "loss": 0.215, "step": 35819 }, { "epoch": 2.901814646791964, "grad_norm": 0.06580415368080139, "learning_rate": 6.1010846572753055e-05, "loss": 0.2228, "step": 35820 }, { "epoch": 2.901895657809462, "grad_norm": 0.05901229754090309, "learning_rate": 6.100634592015843e-05, "loss": 0.1921, "step": 35821 }, { "epoch": 2.9019766688269604, "grad_norm": 0.07883156836032867, "learning_rate": 6.1001845267563795e-05, "loss": 0.2291, "step": 35822 }, { "epoch": 2.902057679844459, "grad_norm": 0.07001320272684097, "learning_rate": 6.0997344614969176e-05, "loss": 0.2415, "step": 35823 }, { "epoch": 2.9021386908619573, "grad_norm": 0.0769026055932045, "learning_rate": 6.099284396237455e-05, "loss": 0.2237, "step": 35824 }, { "epoch": 2.9022197018794555, "grad_norm": 0.07997845858335495, "learning_rate": 6.0988343309779916e-05, "loss": 0.2736, "step": 35825 }, { "epoch": 2.9023007128969542, "grad_norm": 0.06704940646886826, "learning_rate": 6.0983842657185297e-05, "loss": 0.2322, "step": 35826 }, { "epoch": 2.9023817239144525, "grad_norm": 0.07610899209976196, "learning_rate": 6.097934200459067e-05, "loss": 0.214, "step": 35827 }, { "epoch": 2.9024627349319507, "grad_norm": 0.07217517495155334, "learning_rate": 6.097484135199604e-05, "loss": 0.2088, "step": 35828 }, { "epoch": 2.902543745949449, "grad_norm": 0.07086925953626633, "learning_rate": 6.097034069940142e-05, "loss": 0.2388, "step": 35829 }, { "epoch": 2.9026247569669477, "grad_norm": 0.07095544040203094, "learning_rate": 6.096584004680679e-05, "loss": 0.2729, "step": 35830 }, { "epoch": 2.902705767984446, "grad_norm": 0.0699014961719513, "learning_rate": 6.096133939421217e-05, "loss": 0.2417, "step": 35831 }, { "epoch": 2.902786779001944, "grad_norm": 0.0674590915441513, "learning_rate": 6.095683874161754e-05, "loss": 0.2022, "step": 35832 }, { "epoch": 2.9028677900194424, "grad_norm": 0.06928414106369019, "learning_rate": 6.095233808902291e-05, "loss": 0.2075, "step": 35833 }, { "epoch": 2.902948801036941, "grad_norm": 0.06169206649065018, "learning_rate": 6.094783743642829e-05, "loss": 0.1976, "step": 35834 }, { "epoch": 2.9030298120544393, "grad_norm": 0.07483284175395966, "learning_rate": 6.094333678383366e-05, "loss": 0.2431, "step": 35835 }, { "epoch": 2.9031108230719376, "grad_norm": 0.07868918031454086, "learning_rate": 6.093883613123903e-05, "loss": 0.2426, "step": 35836 }, { "epoch": 2.9031918340894363, "grad_norm": 0.06046166270971298, "learning_rate": 6.093433547864441e-05, "loss": 0.2048, "step": 35837 }, { "epoch": 2.9032728451069345, "grad_norm": 0.05576327070593834, "learning_rate": 6.092983482604978e-05, "loss": 0.2047, "step": 35838 }, { "epoch": 2.9033538561244328, "grad_norm": 0.07098156213760376, "learning_rate": 6.0925334173455153e-05, "loss": 0.212, "step": 35839 }, { "epoch": 2.9034348671419314, "grad_norm": 0.06611877679824829, "learning_rate": 6.0920833520860534e-05, "loss": 0.201, "step": 35840 }, { "epoch": 2.9035158781594297, "grad_norm": 0.05478915944695473, "learning_rate": 6.09163328682659e-05, "loss": 0.2121, "step": 35841 }, { "epoch": 2.903596889176928, "grad_norm": 0.07683388888835907, "learning_rate": 6.0911832215671274e-05, "loss": 0.2307, "step": 35842 }, { "epoch": 2.9036779001944266, "grad_norm": 0.06985338777303696, "learning_rate": 6.0907331563076655e-05, "loss": 0.2677, "step": 35843 }, { "epoch": 2.903758911211925, "grad_norm": 0.05494997650384903, "learning_rate": 6.090283091048202e-05, "loss": 0.207, "step": 35844 }, { "epoch": 2.903839922229423, "grad_norm": 0.06654516607522964, "learning_rate": 6.0898330257887395e-05, "loss": 0.2374, "step": 35845 }, { "epoch": 2.903920933246922, "grad_norm": 0.08357511460781097, "learning_rate": 6.0893829605292775e-05, "loss": 0.3211, "step": 35846 }, { "epoch": 2.90400194426442, "grad_norm": 0.06784695386886597, "learning_rate": 6.088932895269814e-05, "loss": 0.2089, "step": 35847 }, { "epoch": 2.9040829552819183, "grad_norm": 0.056639935821294785, "learning_rate": 6.0884828300103516e-05, "loss": 0.2092, "step": 35848 }, { "epoch": 2.904163966299417, "grad_norm": 0.06386925280094147, "learning_rate": 6.0880327647508896e-05, "loss": 0.2534, "step": 35849 }, { "epoch": 2.904244977316915, "grad_norm": 0.06474310904741287, "learning_rate": 6.087582699491426e-05, "loss": 0.2513, "step": 35850 }, { "epoch": 2.9043259883344135, "grad_norm": 0.06959279626607895, "learning_rate": 6.087132634231964e-05, "loss": 0.2557, "step": 35851 }, { "epoch": 2.9044069993519117, "grad_norm": 0.06982912123203278, "learning_rate": 6.086682568972502e-05, "loss": 0.2188, "step": 35852 }, { "epoch": 2.9044880103694104, "grad_norm": 0.06339522451162338, "learning_rate": 6.0862325037130384e-05, "loss": 0.2278, "step": 35853 }, { "epoch": 2.9045690213869086, "grad_norm": 0.06980250775814056, "learning_rate": 6.085782438453576e-05, "loss": 0.2094, "step": 35854 }, { "epoch": 2.904650032404407, "grad_norm": 0.06515302509069443, "learning_rate": 6.085332373194114e-05, "loss": 0.237, "step": 35855 }, { "epoch": 2.904731043421905, "grad_norm": 0.05380278453230858, "learning_rate": 6.0848823079346505e-05, "loss": 0.2298, "step": 35856 }, { "epoch": 2.904812054439404, "grad_norm": 0.06366229057312012, "learning_rate": 6.0844322426751885e-05, "loss": 0.2188, "step": 35857 }, { "epoch": 2.904893065456902, "grad_norm": 0.08803581446409225, "learning_rate": 6.083982177415726e-05, "loss": 0.2557, "step": 35858 }, { "epoch": 2.9049740764744003, "grad_norm": 0.08504535257816315, "learning_rate": 6.0835321121562626e-05, "loss": 0.2789, "step": 35859 }, { "epoch": 2.905055087491899, "grad_norm": 0.07685036212205887, "learning_rate": 6.0830820468968006e-05, "loss": 0.2296, "step": 35860 }, { "epoch": 2.9051360985093972, "grad_norm": 0.08343004435300827, "learning_rate": 6.082631981637338e-05, "loss": 0.261, "step": 35861 }, { "epoch": 2.9052171095268955, "grad_norm": 0.07279090583324432, "learning_rate": 6.0821819163778746e-05, "loss": 0.2527, "step": 35862 }, { "epoch": 2.905298120544394, "grad_norm": 0.07041387259960175, "learning_rate": 6.081731851118413e-05, "loss": 0.2108, "step": 35863 }, { "epoch": 2.9053791315618924, "grad_norm": 0.07155166566371918, "learning_rate": 6.08128178585895e-05, "loss": 0.2382, "step": 35864 }, { "epoch": 2.9054601425793907, "grad_norm": 0.07028033584356308, "learning_rate": 6.080831720599487e-05, "loss": 0.2522, "step": 35865 }, { "epoch": 2.9055411535968894, "grad_norm": 0.05422496423125267, "learning_rate": 6.080381655340025e-05, "loss": 0.2167, "step": 35866 }, { "epoch": 2.9056221646143876, "grad_norm": 0.06637411564588547, "learning_rate": 6.079931590080562e-05, "loss": 0.2423, "step": 35867 }, { "epoch": 2.905703175631886, "grad_norm": 0.08302521705627441, "learning_rate": 6.079481524821099e-05, "loss": 0.2241, "step": 35868 }, { "epoch": 2.9057841866493845, "grad_norm": 0.06816847622394562, "learning_rate": 6.079031459561637e-05, "loss": 0.262, "step": 35869 }, { "epoch": 2.905865197666883, "grad_norm": 0.07962358742952347, "learning_rate": 6.078581394302174e-05, "loss": 0.2411, "step": 35870 }, { "epoch": 2.905946208684381, "grad_norm": 0.06026584282517433, "learning_rate": 6.078131329042711e-05, "loss": 0.2123, "step": 35871 }, { "epoch": 2.9060272197018797, "grad_norm": 0.07744492590427399, "learning_rate": 6.077681263783249e-05, "loss": 0.2571, "step": 35872 }, { "epoch": 2.906108230719378, "grad_norm": 0.07641256600618362, "learning_rate": 6.077231198523786e-05, "loss": 0.2433, "step": 35873 }, { "epoch": 2.906189241736876, "grad_norm": 0.06493622064590454, "learning_rate": 6.076781133264323e-05, "loss": 0.2226, "step": 35874 }, { "epoch": 2.9062702527543745, "grad_norm": 0.06400008499622345, "learning_rate": 6.076331068004861e-05, "loss": 0.2073, "step": 35875 }, { "epoch": 2.906351263771873, "grad_norm": 0.08324109762907028, "learning_rate": 6.0758810027453984e-05, "loss": 0.2243, "step": 35876 }, { "epoch": 2.9064322747893714, "grad_norm": 0.0696927011013031, "learning_rate": 6.075430937485935e-05, "loss": 0.2262, "step": 35877 }, { "epoch": 2.9065132858068696, "grad_norm": 0.07888327538967133, "learning_rate": 6.074980872226473e-05, "loss": 0.2412, "step": 35878 }, { "epoch": 2.906594296824368, "grad_norm": 0.07660671323537827, "learning_rate": 6.0745308069670104e-05, "loss": 0.2162, "step": 35879 }, { "epoch": 2.9066753078418666, "grad_norm": 0.07841958850622177, "learning_rate": 6.074080741707547e-05, "loss": 0.2149, "step": 35880 }, { "epoch": 2.906756318859365, "grad_norm": 0.0810098648071289, "learning_rate": 6.073630676448085e-05, "loss": 0.2379, "step": 35881 }, { "epoch": 2.906837329876863, "grad_norm": 0.06500328332185745, "learning_rate": 6.0731806111886225e-05, "loss": 0.2691, "step": 35882 }, { "epoch": 2.9069183408943617, "grad_norm": 0.0727931559085846, "learning_rate": 6.072730545929159e-05, "loss": 0.2249, "step": 35883 }, { "epoch": 2.90699935191186, "grad_norm": 0.05468270182609558, "learning_rate": 6.072280480669697e-05, "loss": 0.225, "step": 35884 }, { "epoch": 2.9070803629293582, "grad_norm": 0.06025093048810959, "learning_rate": 6.0718304154102346e-05, "loss": 0.1984, "step": 35885 }, { "epoch": 2.907161373946857, "grad_norm": 0.06294640898704529, "learning_rate": 6.0713803501507726e-05, "loss": 0.2257, "step": 35886 }, { "epoch": 2.907242384964355, "grad_norm": 0.06526791304349899, "learning_rate": 6.07093028489131e-05, "loss": 0.2354, "step": 35887 }, { "epoch": 2.9073233959818534, "grad_norm": 0.07205391675233841, "learning_rate": 6.070480219631847e-05, "loss": 0.2435, "step": 35888 }, { "epoch": 2.907404406999352, "grad_norm": 0.061126165091991425, "learning_rate": 6.070030154372385e-05, "loss": 0.2328, "step": 35889 }, { "epoch": 2.9074854180168503, "grad_norm": 0.08679535239934921, "learning_rate": 6.069580089112922e-05, "loss": 0.2527, "step": 35890 }, { "epoch": 2.9075664290343486, "grad_norm": 0.0681762620806694, "learning_rate": 6.069130023853459e-05, "loss": 0.2374, "step": 35891 }, { "epoch": 2.9076474400518473, "grad_norm": 0.08707486838102341, "learning_rate": 6.068679958593997e-05, "loss": 0.2545, "step": 35892 }, { "epoch": 2.9077284510693455, "grad_norm": 0.07081759721040726, "learning_rate": 6.068229893334534e-05, "loss": 0.233, "step": 35893 }, { "epoch": 2.9078094620868438, "grad_norm": 0.06580916047096252, "learning_rate": 6.067779828075071e-05, "loss": 0.2258, "step": 35894 }, { "epoch": 2.9078904731043425, "grad_norm": 0.08303841948509216, "learning_rate": 6.067329762815609e-05, "loss": 0.223, "step": 35895 }, { "epoch": 2.9079714841218407, "grad_norm": 0.0737767368555069, "learning_rate": 6.066879697556146e-05, "loss": 0.2329, "step": 35896 }, { "epoch": 2.908052495139339, "grad_norm": 0.07478015124797821, "learning_rate": 6.066429632296683e-05, "loss": 0.2463, "step": 35897 }, { "epoch": 2.908133506156837, "grad_norm": 0.0626714825630188, "learning_rate": 6.065979567037221e-05, "loss": 0.2489, "step": 35898 }, { "epoch": 2.9082145171743354, "grad_norm": 0.07597566395998001, "learning_rate": 6.065529501777758e-05, "loss": 0.2727, "step": 35899 }, { "epoch": 2.908295528191834, "grad_norm": 0.07273010909557343, "learning_rate": 6.065079436518295e-05, "loss": 0.2128, "step": 35900 }, { "epoch": 2.9083765392093324, "grad_norm": 0.07037299126386642, "learning_rate": 6.064629371258833e-05, "loss": 0.2208, "step": 35901 }, { "epoch": 2.9084575502268306, "grad_norm": 0.07470954954624176, "learning_rate": 6.0641793059993704e-05, "loss": 0.2775, "step": 35902 }, { "epoch": 2.9085385612443293, "grad_norm": 0.06422090530395508, "learning_rate": 6.063729240739907e-05, "loss": 0.2161, "step": 35903 }, { "epoch": 2.9086195722618275, "grad_norm": 0.0677059069275856, "learning_rate": 6.063279175480445e-05, "loss": 0.2187, "step": 35904 }, { "epoch": 2.908700583279326, "grad_norm": 0.05977741628885269, "learning_rate": 6.0628291102209825e-05, "loss": 0.2246, "step": 35905 }, { "epoch": 2.9087815942968245, "grad_norm": 0.06989169120788574, "learning_rate": 6.062379044961519e-05, "loss": 0.2392, "step": 35906 }, { "epoch": 2.9088626053143227, "grad_norm": 0.07747205346822739, "learning_rate": 6.061928979702057e-05, "loss": 0.2463, "step": 35907 }, { "epoch": 2.908943616331821, "grad_norm": 0.059107888489961624, "learning_rate": 6.0614789144425946e-05, "loss": 0.2219, "step": 35908 }, { "epoch": 2.9090246273493197, "grad_norm": 0.06700557470321655, "learning_rate": 6.061028849183131e-05, "loss": 0.2547, "step": 35909 }, { "epoch": 2.909105638366818, "grad_norm": 0.057208240032196045, "learning_rate": 6.060578783923669e-05, "loss": 0.2164, "step": 35910 }, { "epoch": 2.909186649384316, "grad_norm": 0.08306428790092468, "learning_rate": 6.0601287186642066e-05, "loss": 0.235, "step": 35911 }, { "epoch": 2.909267660401815, "grad_norm": 0.06049541011452675, "learning_rate": 6.059678653404745e-05, "loss": 0.233, "step": 35912 }, { "epoch": 2.909348671419313, "grad_norm": 0.07041322439908981, "learning_rate": 6.0592285881452814e-05, "loss": 0.2491, "step": 35913 }, { "epoch": 2.9094296824368113, "grad_norm": 0.053164467215538025, "learning_rate": 6.058778522885819e-05, "loss": 0.2484, "step": 35914 }, { "epoch": 2.90951069345431, "grad_norm": 0.06176530569791794, "learning_rate": 6.058328457626357e-05, "loss": 0.2616, "step": 35915 }, { "epoch": 2.9095917044718083, "grad_norm": 0.07826686650514603, "learning_rate": 6.0578783923668934e-05, "loss": 0.2445, "step": 35916 }, { "epoch": 2.9096727154893065, "grad_norm": 0.05870719254016876, "learning_rate": 6.057428327107431e-05, "loss": 0.2202, "step": 35917 }, { "epoch": 2.909753726506805, "grad_norm": 0.08019772171974182, "learning_rate": 6.056978261847969e-05, "loss": 0.2925, "step": 35918 }, { "epoch": 2.9098347375243034, "grad_norm": 0.08110763132572174, "learning_rate": 6.0565281965885055e-05, "loss": 0.243, "step": 35919 }, { "epoch": 2.9099157485418017, "grad_norm": 0.06867033243179321, "learning_rate": 6.056078131329043e-05, "loss": 0.2711, "step": 35920 }, { "epoch": 2.9099967595593, "grad_norm": 0.06370605528354645, "learning_rate": 6.055628066069581e-05, "loss": 0.2008, "step": 35921 }, { "epoch": 2.910077770576798, "grad_norm": 0.06781043112277985, "learning_rate": 6.0551780008101176e-05, "loss": 0.2233, "step": 35922 }, { "epoch": 2.910158781594297, "grad_norm": 0.061090607196092606, "learning_rate": 6.054727935550655e-05, "loss": 0.2301, "step": 35923 }, { "epoch": 2.910239792611795, "grad_norm": 0.06507328897714615, "learning_rate": 6.054277870291193e-05, "loss": 0.2153, "step": 35924 }, { "epoch": 2.9103208036292934, "grad_norm": 0.06221972405910492, "learning_rate": 6.05382780503173e-05, "loss": 0.2341, "step": 35925 }, { "epoch": 2.910401814646792, "grad_norm": 0.08535179495811462, "learning_rate": 6.053377739772267e-05, "loss": 0.2062, "step": 35926 }, { "epoch": 2.9104828256642903, "grad_norm": 0.06976145505905151, "learning_rate": 6.052927674512805e-05, "loss": 0.2191, "step": 35927 }, { "epoch": 2.9105638366817885, "grad_norm": 0.0640319213271141, "learning_rate": 6.052477609253342e-05, "loss": 0.2136, "step": 35928 }, { "epoch": 2.910644847699287, "grad_norm": 0.0687737911939621, "learning_rate": 6.052027543993879e-05, "loss": 0.2224, "step": 35929 }, { "epoch": 2.9107258587167855, "grad_norm": 0.06703128665685654, "learning_rate": 6.051577478734417e-05, "loss": 0.2207, "step": 35930 }, { "epoch": 2.9108068697342837, "grad_norm": 0.08989090472459793, "learning_rate": 6.051127413474954e-05, "loss": 0.229, "step": 35931 }, { "epoch": 2.9108878807517824, "grad_norm": 0.08146070688962936, "learning_rate": 6.050677348215491e-05, "loss": 0.2524, "step": 35932 }, { "epoch": 2.9109688917692806, "grad_norm": 0.06896800547838211, "learning_rate": 6.050227282956029e-05, "loss": 0.2529, "step": 35933 }, { "epoch": 2.911049902786779, "grad_norm": 0.06814651191234589, "learning_rate": 6.049777217696566e-05, "loss": 0.2504, "step": 35934 }, { "epoch": 2.9111309138042776, "grad_norm": 0.06067023426294327, "learning_rate": 6.049327152437103e-05, "loss": 0.2049, "step": 35935 }, { "epoch": 2.911211924821776, "grad_norm": 0.08196239918470383, "learning_rate": 6.048877087177641e-05, "loss": 0.2512, "step": 35936 }, { "epoch": 2.911292935839274, "grad_norm": 0.059716738760471344, "learning_rate": 6.048427021918178e-05, "loss": 0.2547, "step": 35937 }, { "epoch": 2.9113739468567728, "grad_norm": 0.12704335153102875, "learning_rate": 6.047976956658716e-05, "loss": 0.2401, "step": 35938 }, { "epoch": 2.911454957874271, "grad_norm": 0.08267374336719513, "learning_rate": 6.0475268913992534e-05, "loss": 0.2669, "step": 35939 }, { "epoch": 2.9115359688917692, "grad_norm": 0.05156685784459114, "learning_rate": 6.04707682613979e-05, "loss": 0.2432, "step": 35940 }, { "epoch": 2.911616979909268, "grad_norm": 0.06564828753471375, "learning_rate": 6.046626760880328e-05, "loss": 0.2191, "step": 35941 }, { "epoch": 2.911697990926766, "grad_norm": 0.0737994983792305, "learning_rate": 6.0461766956208655e-05, "loss": 0.254, "step": 35942 }, { "epoch": 2.9117790019442644, "grad_norm": 0.06724006682634354, "learning_rate": 6.045726630361402e-05, "loss": 0.2057, "step": 35943 }, { "epoch": 2.9118600129617627, "grad_norm": 0.07741007208824158, "learning_rate": 6.04527656510194e-05, "loss": 0.2592, "step": 35944 }, { "epoch": 2.911941023979261, "grad_norm": 0.06976274400949478, "learning_rate": 6.0448264998424776e-05, "loss": 0.2221, "step": 35945 }, { "epoch": 2.9120220349967596, "grad_norm": 0.06324218213558197, "learning_rate": 6.044376434583014e-05, "loss": 0.2021, "step": 35946 }, { "epoch": 2.912103046014258, "grad_norm": 0.06732454150915146, "learning_rate": 6.043926369323552e-05, "loss": 0.2843, "step": 35947 }, { "epoch": 2.912184057031756, "grad_norm": 0.057616446167230606, "learning_rate": 6.0434763040640897e-05, "loss": 0.232, "step": 35948 }, { "epoch": 2.912265068049255, "grad_norm": 0.06809848546981812, "learning_rate": 6.043026238804626e-05, "loss": 0.2738, "step": 35949 }, { "epoch": 2.912346079066753, "grad_norm": 0.08779431879520416, "learning_rate": 6.0425761735451644e-05, "loss": 0.2318, "step": 35950 }, { "epoch": 2.9124270900842513, "grad_norm": 0.07004832476377487, "learning_rate": 6.042126108285702e-05, "loss": 0.2597, "step": 35951 }, { "epoch": 2.91250810110175, "grad_norm": 0.07469019293785095, "learning_rate": 6.0416760430262384e-05, "loss": 0.2601, "step": 35952 }, { "epoch": 2.912589112119248, "grad_norm": 0.08924978971481323, "learning_rate": 6.0412259777667765e-05, "loss": 0.2753, "step": 35953 }, { "epoch": 2.9126701231367464, "grad_norm": 0.06867161393165588, "learning_rate": 6.040775912507314e-05, "loss": 0.2344, "step": 35954 }, { "epoch": 2.912751134154245, "grad_norm": 0.053387414664030075, "learning_rate": 6.0403258472478505e-05, "loss": 0.2333, "step": 35955 }, { "epoch": 2.9128321451717434, "grad_norm": 0.06053132563829422, "learning_rate": 6.039875781988389e-05, "loss": 0.2225, "step": 35956 }, { "epoch": 2.9129131561892416, "grad_norm": 0.07425640523433685, "learning_rate": 6.039425716728926e-05, "loss": 0.2235, "step": 35957 }, { "epoch": 2.9129941672067403, "grad_norm": 0.07319247722625732, "learning_rate": 6.0389756514694626e-05, "loss": 0.2746, "step": 35958 }, { "epoch": 2.9130751782242386, "grad_norm": 0.07840321213006973, "learning_rate": 6.038525586210001e-05, "loss": 0.2477, "step": 35959 }, { "epoch": 2.913156189241737, "grad_norm": 0.078122079372406, "learning_rate": 6.038075520950538e-05, "loss": 0.2466, "step": 35960 }, { "epoch": 2.9132372002592355, "grad_norm": 0.07562543451786041, "learning_rate": 6.0376254556910747e-05, "loss": 0.2156, "step": 35961 }, { "epoch": 2.9133182112767337, "grad_norm": 0.08191193640232086, "learning_rate": 6.0371753904316134e-05, "loss": 0.2012, "step": 35962 }, { "epoch": 2.913399222294232, "grad_norm": 0.08304063230752945, "learning_rate": 6.03672532517215e-05, "loss": 0.2545, "step": 35963 }, { "epoch": 2.9134802333117307, "grad_norm": 0.06543884426355362, "learning_rate": 6.036275259912688e-05, "loss": 0.2176, "step": 35964 }, { "epoch": 2.913561244329229, "grad_norm": 0.06955443322658539, "learning_rate": 6.0358251946532255e-05, "loss": 0.3001, "step": 35965 }, { "epoch": 2.913642255346727, "grad_norm": 0.06895852088928223, "learning_rate": 6.035375129393762e-05, "loss": 0.2049, "step": 35966 }, { "epoch": 2.9137232663642254, "grad_norm": 0.07708650082349777, "learning_rate": 6.0349250641343e-05, "loss": 0.1875, "step": 35967 }, { "epoch": 2.9138042773817237, "grad_norm": 0.06768258661031723, "learning_rate": 6.0344749988748375e-05, "loss": 0.2432, "step": 35968 }, { "epoch": 2.9138852883992223, "grad_norm": 0.06098243594169617, "learning_rate": 6.034024933615374e-05, "loss": 0.2507, "step": 35969 }, { "epoch": 2.9139662994167206, "grad_norm": 0.06618139147758484, "learning_rate": 6.033574868355912e-05, "loss": 0.243, "step": 35970 }, { "epoch": 2.914047310434219, "grad_norm": 0.05809492990374565, "learning_rate": 6.0331248030964496e-05, "loss": 0.18, "step": 35971 }, { "epoch": 2.9141283214517175, "grad_norm": 0.06135577708482742, "learning_rate": 6.032674737836986e-05, "loss": 0.2337, "step": 35972 }, { "epoch": 2.9142093324692158, "grad_norm": 0.0696924701333046, "learning_rate": 6.0322246725775243e-05, "loss": 0.2357, "step": 35973 }, { "epoch": 2.914290343486714, "grad_norm": 0.08586207032203674, "learning_rate": 6.031774607318062e-05, "loss": 0.2312, "step": 35974 }, { "epoch": 2.9143713545042127, "grad_norm": 0.07971008121967316, "learning_rate": 6.0313245420585984e-05, "loss": 0.2802, "step": 35975 }, { "epoch": 2.914452365521711, "grad_norm": 0.07447706162929535, "learning_rate": 6.0308744767991364e-05, "loss": 0.1998, "step": 35976 }, { "epoch": 2.914533376539209, "grad_norm": 0.07790254801511765, "learning_rate": 6.030424411539674e-05, "loss": 0.2352, "step": 35977 }, { "epoch": 2.914614387556708, "grad_norm": 0.08902592211961746, "learning_rate": 6.0299743462802105e-05, "loss": 0.2782, "step": 35978 }, { "epoch": 2.914695398574206, "grad_norm": 0.0902736634016037, "learning_rate": 6.0295242810207485e-05, "loss": 0.2615, "step": 35979 }, { "epoch": 2.9147764095917044, "grad_norm": 0.08209915459156036, "learning_rate": 6.029074215761286e-05, "loss": 0.2546, "step": 35980 }, { "epoch": 2.914857420609203, "grad_norm": 0.06725655496120453, "learning_rate": 6.0286241505018225e-05, "loss": 0.2279, "step": 35981 }, { "epoch": 2.9149384316267013, "grad_norm": 0.07377345114946365, "learning_rate": 6.0281740852423606e-05, "loss": 0.2458, "step": 35982 }, { "epoch": 2.9150194426441995, "grad_norm": 0.06759821623563766, "learning_rate": 6.027724019982898e-05, "loss": 0.2527, "step": 35983 }, { "epoch": 2.9151004536616982, "grad_norm": 0.0730883926153183, "learning_rate": 6.0272739547234346e-05, "loss": 0.2729, "step": 35984 }, { "epoch": 2.9151814646791965, "grad_norm": 0.06299924850463867, "learning_rate": 6.026823889463973e-05, "loss": 0.2348, "step": 35985 }, { "epoch": 2.9152624756966947, "grad_norm": 0.06301521509885788, "learning_rate": 6.02637382420451e-05, "loss": 0.2553, "step": 35986 }, { "epoch": 2.915343486714193, "grad_norm": 0.07721760869026184, "learning_rate": 6.025923758945047e-05, "loss": 0.2446, "step": 35987 }, { "epoch": 2.9154244977316917, "grad_norm": 0.07018601894378662, "learning_rate": 6.025473693685585e-05, "loss": 0.2279, "step": 35988 }, { "epoch": 2.91550550874919, "grad_norm": 0.0949833020567894, "learning_rate": 6.025023628426122e-05, "loss": 0.2389, "step": 35989 }, { "epoch": 2.915586519766688, "grad_norm": 0.06459692865610123, "learning_rate": 6.02457356316666e-05, "loss": 0.224, "step": 35990 }, { "epoch": 2.9156675307841864, "grad_norm": 0.08588383346796036, "learning_rate": 6.024123497907197e-05, "loss": 0.2213, "step": 35991 }, { "epoch": 2.915748541801685, "grad_norm": 0.056231267750263214, "learning_rate": 6.023673432647734e-05, "loss": 0.2273, "step": 35992 }, { "epoch": 2.9158295528191833, "grad_norm": 0.06222568452358246, "learning_rate": 6.023223367388272e-05, "loss": 0.222, "step": 35993 }, { "epoch": 2.9159105638366816, "grad_norm": 0.06202542781829834, "learning_rate": 6.022773302128809e-05, "loss": 0.245, "step": 35994 }, { "epoch": 2.9159915748541803, "grad_norm": 0.06934040039777756, "learning_rate": 6.022323236869346e-05, "loss": 0.2778, "step": 35995 }, { "epoch": 2.9160725858716785, "grad_norm": 0.05983179062604904, "learning_rate": 6.021873171609884e-05, "loss": 0.2096, "step": 35996 }, { "epoch": 2.9161535968891767, "grad_norm": 0.0680842399597168, "learning_rate": 6.021423106350421e-05, "loss": 0.1782, "step": 35997 }, { "epoch": 2.9162346079066754, "grad_norm": 0.07440262287855148, "learning_rate": 6.0209730410909583e-05, "loss": 0.2313, "step": 35998 }, { "epoch": 2.9163156189241737, "grad_norm": 0.07230222970247269, "learning_rate": 6.0205229758314964e-05, "loss": 0.2244, "step": 35999 }, { "epoch": 2.916396629941672, "grad_norm": 0.06707389652729034, "learning_rate": 6.020072910572033e-05, "loss": 0.2261, "step": 36000 }, { "epoch": 2.9164776409591706, "grad_norm": 0.08801737427711487, "learning_rate": 6.0196228453125704e-05, "loss": 0.2211, "step": 36001 }, { "epoch": 2.916558651976669, "grad_norm": 0.060239892452955246, "learning_rate": 6.0191727800531085e-05, "loss": 0.2263, "step": 36002 }, { "epoch": 2.916639662994167, "grad_norm": 0.06880299001932144, "learning_rate": 6.018722714793645e-05, "loss": 0.2139, "step": 36003 }, { "epoch": 2.916720674011666, "grad_norm": 0.0746193379163742, "learning_rate": 6.0182726495341825e-05, "loss": 0.2507, "step": 36004 }, { "epoch": 2.916801685029164, "grad_norm": 0.0893036276102066, "learning_rate": 6.0178225842747205e-05, "loss": 0.2142, "step": 36005 }, { "epoch": 2.9168826960466623, "grad_norm": 0.07693555951118469, "learning_rate": 6.017372519015257e-05, "loss": 0.2635, "step": 36006 }, { "epoch": 2.916963707064161, "grad_norm": 0.0776832103729248, "learning_rate": 6.0169224537557946e-05, "loss": 0.2693, "step": 36007 }, { "epoch": 2.917044718081659, "grad_norm": 0.06802288442850113, "learning_rate": 6.0164723884963326e-05, "loss": 0.2196, "step": 36008 }, { "epoch": 2.9171257290991575, "grad_norm": 0.06248356029391289, "learning_rate": 6.016022323236869e-05, "loss": 0.2019, "step": 36009 }, { "epoch": 2.9172067401166557, "grad_norm": 0.08267667889595032, "learning_rate": 6.015572257977407e-05, "loss": 0.2381, "step": 36010 }, { "epoch": 2.9172877511341544, "grad_norm": 0.09951439499855042, "learning_rate": 6.015122192717945e-05, "loss": 0.2291, "step": 36011 }, { "epoch": 2.9173687621516526, "grad_norm": 0.0678766742348671, "learning_rate": 6.0146721274584814e-05, "loss": 0.2568, "step": 36012 }, { "epoch": 2.917449773169151, "grad_norm": 0.0689510852098465, "learning_rate": 6.014222062199019e-05, "loss": 0.2227, "step": 36013 }, { "epoch": 2.917530784186649, "grad_norm": 0.07802077382802963, "learning_rate": 6.013771996939557e-05, "loss": 0.2553, "step": 36014 }, { "epoch": 2.917611795204148, "grad_norm": 0.08323326706886292, "learning_rate": 6.0133219316800935e-05, "loss": 0.2574, "step": 36015 }, { "epoch": 2.917692806221646, "grad_norm": 0.0724555179476738, "learning_rate": 6.0128718664206315e-05, "loss": 0.2756, "step": 36016 }, { "epoch": 2.9177738172391443, "grad_norm": 0.06053786724805832, "learning_rate": 6.012421801161169e-05, "loss": 0.2384, "step": 36017 }, { "epoch": 2.917854828256643, "grad_norm": 0.06939715892076492, "learning_rate": 6.0119717359017056e-05, "loss": 0.2602, "step": 36018 }, { "epoch": 2.9179358392741412, "grad_norm": 0.058913156390190125, "learning_rate": 6.0115216706422436e-05, "loss": 0.2173, "step": 36019 }, { "epoch": 2.9180168502916395, "grad_norm": 0.0667242705821991, "learning_rate": 6.011071605382781e-05, "loss": 0.2754, "step": 36020 }, { "epoch": 2.918097861309138, "grad_norm": 0.07719171792268753, "learning_rate": 6.0106215401233176e-05, "loss": 0.2325, "step": 36021 }, { "epoch": 2.9181788723266364, "grad_norm": 0.06560521572828293, "learning_rate": 6.010171474863856e-05, "loss": 0.237, "step": 36022 }, { "epoch": 2.9182598833441347, "grad_norm": 0.07015185058116913, "learning_rate": 6.009721409604393e-05, "loss": 0.2556, "step": 36023 }, { "epoch": 2.9183408943616334, "grad_norm": 0.08773837238550186, "learning_rate": 6.00927134434493e-05, "loss": 0.2307, "step": 36024 }, { "epoch": 2.9184219053791316, "grad_norm": 0.07875818014144897, "learning_rate": 6.0088212790854684e-05, "loss": 0.2694, "step": 36025 }, { "epoch": 2.91850291639663, "grad_norm": 0.09875552356243134, "learning_rate": 6.008371213826005e-05, "loss": 0.228, "step": 36026 }, { "epoch": 2.9185839274141285, "grad_norm": 0.06942545622587204, "learning_rate": 6.007921148566542e-05, "loss": 0.2305, "step": 36027 }, { "epoch": 2.9186649384316268, "grad_norm": 0.06027941405773163, "learning_rate": 6.0074710833070805e-05, "loss": 0.2302, "step": 36028 }, { "epoch": 2.918745949449125, "grad_norm": 0.06728939712047577, "learning_rate": 6.007021018047617e-05, "loss": 0.267, "step": 36029 }, { "epoch": 2.9188269604666237, "grad_norm": 0.07952848076820374, "learning_rate": 6.006570952788154e-05, "loss": 0.2559, "step": 36030 }, { "epoch": 2.918907971484122, "grad_norm": 0.08097676187753677, "learning_rate": 6.0061208875286926e-05, "loss": 0.2443, "step": 36031 }, { "epoch": 2.91898898250162, "grad_norm": 0.08644181489944458, "learning_rate": 6.005670822269229e-05, "loss": 0.2557, "step": 36032 }, { "epoch": 2.9190699935191184, "grad_norm": 0.08713238686323166, "learning_rate": 6.005220757009766e-05, "loss": 0.2512, "step": 36033 }, { "epoch": 2.919151004536617, "grad_norm": 0.06697341799736023, "learning_rate": 6.004770691750305e-05, "loss": 0.2599, "step": 36034 }, { "epoch": 2.9192320155541154, "grad_norm": 0.07292734831571579, "learning_rate": 6.0043206264908414e-05, "loss": 0.23, "step": 36035 }, { "epoch": 2.9193130265716136, "grad_norm": 0.08001824468374252, "learning_rate": 6.003870561231378e-05, "loss": 0.2166, "step": 36036 }, { "epoch": 2.919394037589112, "grad_norm": 0.0886823758482933, "learning_rate": 6.003420495971917e-05, "loss": 0.2745, "step": 36037 }, { "epoch": 2.9194750486066106, "grad_norm": 0.06975951045751572, "learning_rate": 6.0029704307124534e-05, "loss": 0.235, "step": 36038 }, { "epoch": 2.919556059624109, "grad_norm": 0.0714506283402443, "learning_rate": 6.00252036545299e-05, "loss": 0.238, "step": 36039 }, { "epoch": 2.919637070641607, "grad_norm": 0.07188653200864792, "learning_rate": 6.002070300193529e-05, "loss": 0.2556, "step": 36040 }, { "epoch": 2.9197180816591057, "grad_norm": 0.06636472791433334, "learning_rate": 6.0016202349340655e-05, "loss": 0.2175, "step": 36041 }, { "epoch": 2.919799092676604, "grad_norm": 0.08170676976442337, "learning_rate": 6.0011701696746036e-05, "loss": 0.2331, "step": 36042 }, { "epoch": 2.9198801036941022, "grad_norm": 0.07941601425409317, "learning_rate": 6.000720104415141e-05, "loss": 0.2594, "step": 36043 }, { "epoch": 2.919961114711601, "grad_norm": 0.07099863886833191, "learning_rate": 6.0002700391556776e-05, "loss": 0.2031, "step": 36044 }, { "epoch": 2.920042125729099, "grad_norm": 0.07502160221338272, "learning_rate": 5.9998199738962156e-05, "loss": 0.271, "step": 36045 }, { "epoch": 2.9201231367465974, "grad_norm": 0.06421137601137161, "learning_rate": 5.999369908636753e-05, "loss": 0.2468, "step": 36046 }, { "epoch": 2.920204147764096, "grad_norm": 0.07903636991977692, "learning_rate": 5.99891984337729e-05, "loss": 0.2231, "step": 36047 }, { "epoch": 2.9202851587815943, "grad_norm": 0.06335616111755371, "learning_rate": 5.998469778117828e-05, "loss": 0.2637, "step": 36048 }, { "epoch": 2.9203661697990926, "grad_norm": 0.0635843276977539, "learning_rate": 5.998019712858365e-05, "loss": 0.2393, "step": 36049 }, { "epoch": 2.9204471808165913, "grad_norm": 0.07643640786409378, "learning_rate": 5.997569647598902e-05, "loss": 0.2274, "step": 36050 }, { "epoch": 2.9205281918340895, "grad_norm": 0.06594489514827728, "learning_rate": 5.99711958233944e-05, "loss": 0.2577, "step": 36051 }, { "epoch": 2.9206092028515878, "grad_norm": 0.06565924733877182, "learning_rate": 5.996669517079977e-05, "loss": 0.2338, "step": 36052 }, { "epoch": 2.9206902138690864, "grad_norm": 0.062482208013534546, "learning_rate": 5.996219451820514e-05, "loss": 0.2011, "step": 36053 }, { "epoch": 2.9207712248865847, "grad_norm": 0.08185567706823349, "learning_rate": 5.995769386561052e-05, "loss": 0.2261, "step": 36054 }, { "epoch": 2.920852235904083, "grad_norm": 0.081700898706913, "learning_rate": 5.995319321301589e-05, "loss": 0.2386, "step": 36055 }, { "epoch": 2.920933246921581, "grad_norm": 0.06645942479372025, "learning_rate": 5.994869256042126e-05, "loss": 0.2569, "step": 36056 }, { "epoch": 2.92101425793908, "grad_norm": 0.06409208476543427, "learning_rate": 5.994419190782664e-05, "loss": 0.2149, "step": 36057 }, { "epoch": 2.921095268956578, "grad_norm": 0.07192101329565048, "learning_rate": 5.993969125523201e-05, "loss": 0.2396, "step": 36058 }, { "epoch": 2.9211762799740764, "grad_norm": 0.06122337281703949, "learning_rate": 5.993519060263738e-05, "loss": 0.2373, "step": 36059 }, { "epoch": 2.9212572909915746, "grad_norm": 0.061685703694820404, "learning_rate": 5.993068995004276e-05, "loss": 0.2008, "step": 36060 }, { "epoch": 2.9213383020090733, "grad_norm": 0.07286947965621948, "learning_rate": 5.9926189297448134e-05, "loss": 0.236, "step": 36061 }, { "epoch": 2.9214193130265715, "grad_norm": 0.06022263318300247, "learning_rate": 5.99216886448535e-05, "loss": 0.2183, "step": 36062 }, { "epoch": 2.92150032404407, "grad_norm": 0.06972159445285797, "learning_rate": 5.991718799225888e-05, "loss": 0.2201, "step": 36063 }, { "epoch": 2.9215813350615685, "grad_norm": 0.06769529730081558, "learning_rate": 5.9912687339664255e-05, "loss": 0.2061, "step": 36064 }, { "epoch": 2.9216623460790667, "grad_norm": 0.08727024495601654, "learning_rate": 5.990818668706962e-05, "loss": 0.2219, "step": 36065 }, { "epoch": 2.921743357096565, "grad_norm": 0.07862614095211029, "learning_rate": 5.9903686034475e-05, "loss": 0.2637, "step": 36066 }, { "epoch": 2.9218243681140637, "grad_norm": 0.07064937800168991, "learning_rate": 5.9899185381880376e-05, "loss": 0.2427, "step": 36067 }, { "epoch": 2.921905379131562, "grad_norm": 0.06485024839639664, "learning_rate": 5.989468472928574e-05, "loss": 0.2264, "step": 36068 }, { "epoch": 2.92198639014906, "grad_norm": 0.0720147117972374, "learning_rate": 5.989018407669112e-05, "loss": 0.2398, "step": 36069 }, { "epoch": 2.922067401166559, "grad_norm": 0.05576111748814583, "learning_rate": 5.9885683424096496e-05, "loss": 0.2087, "step": 36070 }, { "epoch": 2.922148412184057, "grad_norm": 0.07834657281637192, "learning_rate": 5.988118277150188e-05, "loss": 0.2386, "step": 36071 }, { "epoch": 2.9222294232015553, "grad_norm": 0.07598018646240234, "learning_rate": 5.9876682118907244e-05, "loss": 0.2439, "step": 36072 }, { "epoch": 2.922310434219054, "grad_norm": 0.06340734660625458, "learning_rate": 5.987218146631262e-05, "loss": 0.2215, "step": 36073 }, { "epoch": 2.9223914452365523, "grad_norm": 0.0714435800909996, "learning_rate": 5.9867680813718e-05, "loss": 0.3095, "step": 36074 }, { "epoch": 2.9224724562540505, "grad_norm": 0.06071990728378296, "learning_rate": 5.9863180161123364e-05, "loss": 0.2146, "step": 36075 }, { "epoch": 2.922553467271549, "grad_norm": 0.07448688894510269, "learning_rate": 5.985867950852874e-05, "loss": 0.2154, "step": 36076 }, { "epoch": 2.9226344782890474, "grad_norm": 0.06451181322336197, "learning_rate": 5.985417885593412e-05, "loss": 0.2266, "step": 36077 }, { "epoch": 2.9227154893065457, "grad_norm": 0.15605607628822327, "learning_rate": 5.9849678203339485e-05, "loss": 0.2543, "step": 36078 }, { "epoch": 2.922796500324044, "grad_norm": 0.06267999857664108, "learning_rate": 5.984517755074486e-05, "loss": 0.2258, "step": 36079 }, { "epoch": 2.9228775113415426, "grad_norm": 0.07223370671272278, "learning_rate": 5.984067689815024e-05, "loss": 0.2142, "step": 36080 }, { "epoch": 2.922958522359041, "grad_norm": 0.06177590414881706, "learning_rate": 5.9836176245555606e-05, "loss": 0.2014, "step": 36081 }, { "epoch": 2.923039533376539, "grad_norm": 0.07765360176563263, "learning_rate": 5.983167559296098e-05, "loss": 0.2466, "step": 36082 }, { "epoch": 2.9231205443940373, "grad_norm": 0.07498011738061905, "learning_rate": 5.982717494036636e-05, "loss": 0.2452, "step": 36083 }, { "epoch": 2.923201555411536, "grad_norm": 0.06292008608579636, "learning_rate": 5.982267428777173e-05, "loss": 0.2839, "step": 36084 }, { "epoch": 2.9232825664290343, "grad_norm": 0.0622960664331913, "learning_rate": 5.98181736351771e-05, "loss": 0.2315, "step": 36085 }, { "epoch": 2.9233635774465325, "grad_norm": 0.07538387179374695, "learning_rate": 5.981367298258248e-05, "loss": 0.2152, "step": 36086 }, { "epoch": 2.923444588464031, "grad_norm": 0.060837045311927795, "learning_rate": 5.980917232998785e-05, "loss": 0.2283, "step": 36087 }, { "epoch": 2.9235255994815295, "grad_norm": 0.059952542185783386, "learning_rate": 5.980467167739322e-05, "loss": 0.2267, "step": 36088 }, { "epoch": 2.9236066104990277, "grad_norm": 0.07587370276451111, "learning_rate": 5.98001710247986e-05, "loss": 0.261, "step": 36089 }, { "epoch": 2.9236876215165264, "grad_norm": 0.08088859170675278, "learning_rate": 5.979567037220397e-05, "loss": 0.2302, "step": 36090 }, { "epoch": 2.9237686325340246, "grad_norm": 0.07576531916856766, "learning_rate": 5.979116971960934e-05, "loss": 0.2257, "step": 36091 }, { "epoch": 2.923849643551523, "grad_norm": 0.08089031279087067, "learning_rate": 5.978666906701472e-05, "loss": 0.2481, "step": 36092 }, { "epoch": 2.9239306545690216, "grad_norm": 0.07415632158517838, "learning_rate": 5.978216841442009e-05, "loss": 0.2173, "step": 36093 }, { "epoch": 2.92401166558652, "grad_norm": 0.06114750728011131, "learning_rate": 5.977766776182546e-05, "loss": 0.2361, "step": 36094 }, { "epoch": 2.924092676604018, "grad_norm": 0.066036157310009, "learning_rate": 5.977316710923084e-05, "loss": 0.2411, "step": 36095 }, { "epoch": 2.9241736876215167, "grad_norm": 0.07516303658485413, "learning_rate": 5.976866645663621e-05, "loss": 0.2481, "step": 36096 }, { "epoch": 2.924254698639015, "grad_norm": 0.08236120641231537, "learning_rate": 5.97641658040416e-05, "loss": 0.2399, "step": 36097 }, { "epoch": 2.9243357096565132, "grad_norm": 0.08051339536905289, "learning_rate": 5.9759665151446964e-05, "loss": 0.2312, "step": 36098 }, { "epoch": 2.924416720674012, "grad_norm": 0.06887134909629822, "learning_rate": 5.975516449885233e-05, "loss": 0.2171, "step": 36099 }, { "epoch": 2.92449773169151, "grad_norm": 0.06876733899116516, "learning_rate": 5.975066384625772e-05, "loss": 0.2453, "step": 36100 }, { "epoch": 2.9245787427090084, "grad_norm": 0.08076386898756027, "learning_rate": 5.9746163193663085e-05, "loss": 0.2468, "step": 36101 }, { "epoch": 2.9246597537265067, "grad_norm": 0.057946834713220596, "learning_rate": 5.974166254106845e-05, "loss": 0.2112, "step": 36102 }, { "epoch": 2.9247407647440054, "grad_norm": 0.05689328908920288, "learning_rate": 5.973716188847384e-05, "loss": 0.189, "step": 36103 }, { "epoch": 2.9248217757615036, "grad_norm": 0.07286691665649414, "learning_rate": 5.9732661235879206e-05, "loss": 0.2746, "step": 36104 }, { "epoch": 2.924902786779002, "grad_norm": 0.07182064652442932, "learning_rate": 5.972816058328457e-05, "loss": 0.2343, "step": 36105 }, { "epoch": 2.9249837977965, "grad_norm": 0.0803033784031868, "learning_rate": 5.972365993068996e-05, "loss": 0.2761, "step": 36106 }, { "epoch": 2.9250648088139988, "grad_norm": 0.059216950088739395, "learning_rate": 5.9719159278095327e-05, "loss": 0.2362, "step": 36107 }, { "epoch": 2.925145819831497, "grad_norm": 0.07625512778759003, "learning_rate": 5.97146586255007e-05, "loss": 0.2577, "step": 36108 }, { "epoch": 2.9252268308489953, "grad_norm": 0.0680420845746994, "learning_rate": 5.971015797290608e-05, "loss": 0.2478, "step": 36109 }, { "epoch": 2.925307841866494, "grad_norm": 0.07407931983470917, "learning_rate": 5.970565732031145e-05, "loss": 0.2175, "step": 36110 }, { "epoch": 2.925388852883992, "grad_norm": 0.07050876319408417, "learning_rate": 5.970115666771682e-05, "loss": 0.2551, "step": 36111 }, { "epoch": 2.9254698639014904, "grad_norm": 0.057711564004421234, "learning_rate": 5.96966560151222e-05, "loss": 0.2034, "step": 36112 }, { "epoch": 2.925550874918989, "grad_norm": 0.06754898279905319, "learning_rate": 5.969215536252757e-05, "loss": 0.2493, "step": 36113 }, { "epoch": 2.9256318859364874, "grad_norm": 0.07142467051744461, "learning_rate": 5.968765470993294e-05, "loss": 0.2153, "step": 36114 }, { "epoch": 2.9257128969539856, "grad_norm": 0.08044569939374924, "learning_rate": 5.968315405733832e-05, "loss": 0.2436, "step": 36115 }, { "epoch": 2.9257939079714843, "grad_norm": 0.06678476184606552, "learning_rate": 5.967865340474369e-05, "loss": 0.2147, "step": 36116 }, { "epoch": 2.9258749189889826, "grad_norm": 0.06317151337862015, "learning_rate": 5.967415275214906e-05, "loss": 0.2122, "step": 36117 }, { "epoch": 2.925955930006481, "grad_norm": 0.06971244513988495, "learning_rate": 5.966965209955444e-05, "loss": 0.2348, "step": 36118 }, { "epoch": 2.9260369410239795, "grad_norm": 0.06993476301431656, "learning_rate": 5.966515144695981e-05, "loss": 0.2464, "step": 36119 }, { "epoch": 2.9261179520414777, "grad_norm": 0.06371525675058365, "learning_rate": 5.9660650794365183e-05, "loss": 0.25, "step": 36120 }, { "epoch": 2.926198963058976, "grad_norm": 0.07864125818014145, "learning_rate": 5.9656150141770564e-05, "loss": 0.2573, "step": 36121 }, { "epoch": 2.9262799740764747, "grad_norm": 0.07708080857992172, "learning_rate": 5.965164948917593e-05, "loss": 0.2499, "step": 36122 }, { "epoch": 2.926360985093973, "grad_norm": 0.0688590481877327, "learning_rate": 5.964714883658131e-05, "loss": 0.2152, "step": 36123 }, { "epoch": 2.926441996111471, "grad_norm": 0.10641010850667953, "learning_rate": 5.9642648183986685e-05, "loss": 0.2338, "step": 36124 }, { "epoch": 2.9265230071289694, "grad_norm": 0.06765805929899216, "learning_rate": 5.963814753139205e-05, "loss": 0.2249, "step": 36125 }, { "epoch": 2.9266040181464676, "grad_norm": 0.0745808556675911, "learning_rate": 5.963364687879743e-05, "loss": 0.243, "step": 36126 }, { "epoch": 2.9266850291639663, "grad_norm": 0.08130037784576416, "learning_rate": 5.9629146226202805e-05, "loss": 0.2407, "step": 36127 }, { "epoch": 2.9267660401814646, "grad_norm": 0.07136457413434982, "learning_rate": 5.962464557360817e-05, "loss": 0.2321, "step": 36128 }, { "epoch": 2.926847051198963, "grad_norm": 0.06018296629190445, "learning_rate": 5.962014492101355e-05, "loss": 0.2418, "step": 36129 }, { "epoch": 2.9269280622164615, "grad_norm": 0.0679769515991211, "learning_rate": 5.9615644268418926e-05, "loss": 0.2187, "step": 36130 }, { "epoch": 2.9270090732339598, "grad_norm": 0.07336881011724472, "learning_rate": 5.961114361582429e-05, "loss": 0.2377, "step": 36131 }, { "epoch": 2.927090084251458, "grad_norm": 0.06220309063792229, "learning_rate": 5.9606642963229673e-05, "loss": 0.2127, "step": 36132 }, { "epoch": 2.9271710952689567, "grad_norm": 0.06562183052301407, "learning_rate": 5.960214231063505e-05, "loss": 0.2139, "step": 36133 }, { "epoch": 2.927252106286455, "grad_norm": 0.07450972497463226, "learning_rate": 5.9597641658040414e-05, "loss": 0.2087, "step": 36134 }, { "epoch": 2.927333117303953, "grad_norm": 0.08635947853326797, "learning_rate": 5.9593141005445794e-05, "loss": 0.2949, "step": 36135 }, { "epoch": 2.927414128321452, "grad_norm": 0.07206778973340988, "learning_rate": 5.958864035285117e-05, "loss": 0.2091, "step": 36136 }, { "epoch": 2.92749513933895, "grad_norm": 0.08502889424562454, "learning_rate": 5.9584139700256535e-05, "loss": 0.2183, "step": 36137 }, { "epoch": 2.9275761503564484, "grad_norm": 0.08003343641757965, "learning_rate": 5.9579639047661915e-05, "loss": 0.2619, "step": 36138 }, { "epoch": 2.927657161373947, "grad_norm": 0.06411267817020416, "learning_rate": 5.957513839506729e-05, "loss": 0.1962, "step": 36139 }, { "epoch": 2.9277381723914453, "grad_norm": 0.07732285559177399, "learning_rate": 5.9570637742472655e-05, "loss": 0.2055, "step": 36140 }, { "epoch": 2.9278191834089435, "grad_norm": 0.07913392037153244, "learning_rate": 5.9566137089878036e-05, "loss": 0.2521, "step": 36141 }, { "epoch": 2.9279001944264422, "grad_norm": 0.06923690438270569, "learning_rate": 5.956163643728341e-05, "loss": 0.235, "step": 36142 }, { "epoch": 2.9279812054439405, "grad_norm": 0.06813286244869232, "learning_rate": 5.9557135784688776e-05, "loss": 0.2352, "step": 36143 }, { "epoch": 2.9280622164614387, "grad_norm": 0.06538520008325577, "learning_rate": 5.955263513209416e-05, "loss": 0.2298, "step": 36144 }, { "epoch": 2.9281432274789374, "grad_norm": 0.08470512181520462, "learning_rate": 5.954813447949953e-05, "loss": 0.2459, "step": 36145 }, { "epoch": 2.9282242384964356, "grad_norm": 0.06865940988063812, "learning_rate": 5.95436338269049e-05, "loss": 0.2014, "step": 36146 }, { "epoch": 2.928305249513934, "grad_norm": 0.07658974826335907, "learning_rate": 5.953913317431028e-05, "loss": 0.2166, "step": 36147 }, { "epoch": 2.928386260531432, "grad_norm": 0.08708245307207108, "learning_rate": 5.953463252171565e-05, "loss": 0.2725, "step": 36148 }, { "epoch": 2.9284672715489304, "grad_norm": 0.08692276477813721, "learning_rate": 5.953013186912103e-05, "loss": 0.2537, "step": 36149 }, { "epoch": 2.928548282566429, "grad_norm": 0.06990312784910202, "learning_rate": 5.95256312165264e-05, "loss": 0.2333, "step": 36150 }, { "epoch": 2.9286292935839273, "grad_norm": 0.06813271343708038, "learning_rate": 5.952113056393177e-05, "loss": 0.2336, "step": 36151 }, { "epoch": 2.9287103046014256, "grad_norm": 0.07490336149930954, "learning_rate": 5.951662991133715e-05, "loss": 0.2331, "step": 36152 }, { "epoch": 2.9287913156189243, "grad_norm": 0.07326825708150864, "learning_rate": 5.951212925874252e-05, "loss": 0.243, "step": 36153 }, { "epoch": 2.9288723266364225, "grad_norm": 0.07395300269126892, "learning_rate": 5.950762860614789e-05, "loss": 0.251, "step": 36154 }, { "epoch": 2.9289533376539207, "grad_norm": 0.06281843781471252, "learning_rate": 5.950312795355327e-05, "loss": 0.2416, "step": 36155 }, { "epoch": 2.9290343486714194, "grad_norm": 0.06327148526906967, "learning_rate": 5.949862730095864e-05, "loss": 0.2023, "step": 36156 }, { "epoch": 2.9291153596889177, "grad_norm": 0.0622863844037056, "learning_rate": 5.9494126648364014e-05, "loss": 0.2162, "step": 36157 }, { "epoch": 2.929196370706416, "grad_norm": 0.08750246465206146, "learning_rate": 5.9489625995769394e-05, "loss": 0.2569, "step": 36158 }, { "epoch": 2.9292773817239146, "grad_norm": 0.08218368887901306, "learning_rate": 5.948512534317476e-05, "loss": 0.238, "step": 36159 }, { "epoch": 2.929358392741413, "grad_norm": 0.06105087697505951, "learning_rate": 5.9480624690580134e-05, "loss": 0.24, "step": 36160 }, { "epoch": 2.929439403758911, "grad_norm": 0.05267670005559921, "learning_rate": 5.9476124037985515e-05, "loss": 0.2191, "step": 36161 }, { "epoch": 2.92952041477641, "grad_norm": 0.06280451267957687, "learning_rate": 5.947162338539088e-05, "loss": 0.2278, "step": 36162 }, { "epoch": 2.929601425793908, "grad_norm": 0.09271591156721115, "learning_rate": 5.9467122732796255e-05, "loss": 0.2223, "step": 36163 }, { "epoch": 2.9296824368114063, "grad_norm": 0.06803149729967117, "learning_rate": 5.9462622080201636e-05, "loss": 0.2334, "step": 36164 }, { "epoch": 2.929763447828905, "grad_norm": 0.08212228119373322, "learning_rate": 5.9458121427607e-05, "loss": 0.2246, "step": 36165 }, { "epoch": 2.929844458846403, "grad_norm": 0.07780248671770096, "learning_rate": 5.9453620775012376e-05, "loss": 0.2379, "step": 36166 }, { "epoch": 2.9299254698639015, "grad_norm": 0.06778130680322647, "learning_rate": 5.9449120122417756e-05, "loss": 0.2397, "step": 36167 }, { "epoch": 2.9300064808814, "grad_norm": 0.07062183320522308, "learning_rate": 5.944461946982312e-05, "loss": 0.2277, "step": 36168 }, { "epoch": 2.9300874918988984, "grad_norm": 0.0797196626663208, "learning_rate": 5.94401188172285e-05, "loss": 0.253, "step": 36169 }, { "epoch": 2.9301685029163966, "grad_norm": 0.06827553361654282, "learning_rate": 5.943561816463388e-05, "loss": 0.2286, "step": 36170 }, { "epoch": 2.930249513933895, "grad_norm": 0.09623746573925018, "learning_rate": 5.9431117512039244e-05, "loss": 0.2639, "step": 36171 }, { "epoch": 2.930330524951393, "grad_norm": 0.08124282211065292, "learning_rate": 5.942661685944462e-05, "loss": 0.2256, "step": 36172 }, { "epoch": 2.930411535968892, "grad_norm": 0.05782557278871536, "learning_rate": 5.942211620685e-05, "loss": 0.2109, "step": 36173 }, { "epoch": 2.93049254698639, "grad_norm": 0.08583999425172806, "learning_rate": 5.9417615554255365e-05, "loss": 0.3011, "step": 36174 }, { "epoch": 2.9305735580038883, "grad_norm": 0.0772082656621933, "learning_rate": 5.941311490166075e-05, "loss": 0.2378, "step": 36175 }, { "epoch": 2.930654569021387, "grad_norm": 0.06742317229509354, "learning_rate": 5.940861424906612e-05, "loss": 0.24, "step": 36176 }, { "epoch": 2.9307355800388852, "grad_norm": 0.06944488734006882, "learning_rate": 5.940411359647149e-05, "loss": 0.2228, "step": 36177 }, { "epoch": 2.9308165910563835, "grad_norm": 0.07197489589452744, "learning_rate": 5.939961294387687e-05, "loss": 0.27, "step": 36178 }, { "epoch": 2.930897602073882, "grad_norm": 0.08774056285619736, "learning_rate": 5.939511229128224e-05, "loss": 0.2765, "step": 36179 }, { "epoch": 2.9309786130913804, "grad_norm": 0.06142619997262955, "learning_rate": 5.939061163868761e-05, "loss": 0.2269, "step": 36180 }, { "epoch": 2.9310596241088787, "grad_norm": 0.07134956121444702, "learning_rate": 5.9386110986092994e-05, "loss": 0.2299, "step": 36181 }, { "epoch": 2.9311406351263773, "grad_norm": 0.06611652672290802, "learning_rate": 5.938161033349836e-05, "loss": 0.2032, "step": 36182 }, { "epoch": 2.9312216461438756, "grad_norm": 0.07123982906341553, "learning_rate": 5.9377109680903734e-05, "loss": 0.2169, "step": 36183 }, { "epoch": 2.931302657161374, "grad_norm": 0.07195700705051422, "learning_rate": 5.9372609028309114e-05, "loss": 0.2308, "step": 36184 }, { "epoch": 2.9313836681788725, "grad_norm": 0.06632047146558762, "learning_rate": 5.936810837571448e-05, "loss": 0.2245, "step": 36185 }, { "epoch": 2.9314646791963708, "grad_norm": 0.08340226113796234, "learning_rate": 5.9363607723119855e-05, "loss": 0.2389, "step": 36186 }, { "epoch": 2.931545690213869, "grad_norm": 0.08525307476520538, "learning_rate": 5.9359107070525235e-05, "loss": 0.2569, "step": 36187 }, { "epoch": 2.9316267012313677, "grad_norm": 0.06627818942070007, "learning_rate": 5.93546064179306e-05, "loss": 0.2362, "step": 36188 }, { "epoch": 2.931707712248866, "grad_norm": 0.07321153581142426, "learning_rate": 5.9350105765335976e-05, "loss": 0.2238, "step": 36189 }, { "epoch": 2.931788723266364, "grad_norm": 0.06970200687646866, "learning_rate": 5.9345605112741356e-05, "loss": 0.247, "step": 36190 }, { "epoch": 2.931869734283863, "grad_norm": 0.06322509050369263, "learning_rate": 5.934110446014672e-05, "loss": 0.2081, "step": 36191 }, { "epoch": 2.931950745301361, "grad_norm": 0.07383931428194046, "learning_rate": 5.9336603807552096e-05, "loss": 0.2285, "step": 36192 }, { "epoch": 2.9320317563188594, "grad_norm": 0.07150974124670029, "learning_rate": 5.933210315495748e-05, "loss": 0.2052, "step": 36193 }, { "epoch": 2.9321127673363576, "grad_norm": 0.08250176161527634, "learning_rate": 5.9327602502362844e-05, "loss": 0.2557, "step": 36194 }, { "epoch": 2.932193778353856, "grad_norm": 0.08581391721963882, "learning_rate": 5.932310184976822e-05, "loss": 0.2755, "step": 36195 }, { "epoch": 2.9322747893713546, "grad_norm": 0.07952970266342163, "learning_rate": 5.93186011971736e-05, "loss": 0.2323, "step": 36196 }, { "epoch": 2.932355800388853, "grad_norm": 0.08379239588975906, "learning_rate": 5.9314100544578964e-05, "loss": 0.2358, "step": 36197 }, { "epoch": 2.932436811406351, "grad_norm": 0.07248571515083313, "learning_rate": 5.930959989198434e-05, "loss": 0.2258, "step": 36198 }, { "epoch": 2.9325178224238497, "grad_norm": 0.07288960367441177, "learning_rate": 5.930509923938972e-05, "loss": 0.2334, "step": 36199 }, { "epoch": 2.932598833441348, "grad_norm": 0.0691913366317749, "learning_rate": 5.9300598586795085e-05, "loss": 0.209, "step": 36200 }, { "epoch": 2.932679844458846, "grad_norm": 0.06181837618350983, "learning_rate": 5.9296097934200466e-05, "loss": 0.2213, "step": 36201 }, { "epoch": 2.932760855476345, "grad_norm": 0.06725524365901947, "learning_rate": 5.929159728160584e-05, "loss": 0.2012, "step": 36202 }, { "epoch": 2.932841866493843, "grad_norm": 0.06281258165836334, "learning_rate": 5.9287096629011206e-05, "loss": 0.2418, "step": 36203 }, { "epoch": 2.9329228775113414, "grad_norm": 0.06464042514562607, "learning_rate": 5.9282595976416586e-05, "loss": 0.235, "step": 36204 }, { "epoch": 2.93300388852884, "grad_norm": 0.06401500850915909, "learning_rate": 5.927809532382196e-05, "loss": 0.2067, "step": 36205 }, { "epoch": 2.9330848995463383, "grad_norm": 0.0744711384177208, "learning_rate": 5.927359467122733e-05, "loss": 0.2644, "step": 36206 }, { "epoch": 2.9331659105638366, "grad_norm": 0.06625427305698395, "learning_rate": 5.926909401863271e-05, "loss": 0.2379, "step": 36207 }, { "epoch": 2.9332469215813353, "grad_norm": 0.05669691041111946, "learning_rate": 5.926459336603808e-05, "loss": 0.2245, "step": 36208 }, { "epoch": 2.9333279325988335, "grad_norm": 0.07033190876245499, "learning_rate": 5.926009271344345e-05, "loss": 0.2269, "step": 36209 }, { "epoch": 2.9334089436163318, "grad_norm": 0.06328772753477097, "learning_rate": 5.925559206084883e-05, "loss": 0.1984, "step": 36210 }, { "epoch": 2.9334899546338304, "grad_norm": 0.10779903829097748, "learning_rate": 5.92510914082542e-05, "loss": 0.2562, "step": 36211 }, { "epoch": 2.9335709656513287, "grad_norm": 0.0638335794210434, "learning_rate": 5.924659075565957e-05, "loss": 0.2151, "step": 36212 }, { "epoch": 2.933651976668827, "grad_norm": 0.07483941316604614, "learning_rate": 5.924209010306495e-05, "loss": 0.2682, "step": 36213 }, { "epoch": 2.933732987686325, "grad_norm": 0.04787442088127136, "learning_rate": 5.923758945047032e-05, "loss": 0.1916, "step": 36214 }, { "epoch": 2.933813998703824, "grad_norm": 0.0677068680524826, "learning_rate": 5.923308879787569e-05, "loss": 0.2389, "step": 36215 }, { "epoch": 2.933895009721322, "grad_norm": 0.05607495829463005, "learning_rate": 5.922858814528107e-05, "loss": 0.205, "step": 36216 }, { "epoch": 2.9339760207388204, "grad_norm": 0.0977732464671135, "learning_rate": 5.922408749268644e-05, "loss": 0.2886, "step": 36217 }, { "epoch": 2.9340570317563186, "grad_norm": 0.07866278290748596, "learning_rate": 5.921958684009181e-05, "loss": 0.2498, "step": 36218 }, { "epoch": 2.9341380427738173, "grad_norm": 0.06456935405731201, "learning_rate": 5.921508618749719e-05, "loss": 0.2421, "step": 36219 }, { "epoch": 2.9342190537913155, "grad_norm": 0.06755385547876358, "learning_rate": 5.9210585534902564e-05, "loss": 0.2349, "step": 36220 }, { "epoch": 2.934300064808814, "grad_norm": 0.06859587877988815, "learning_rate": 5.920608488230793e-05, "loss": 0.2239, "step": 36221 }, { "epoch": 2.9343810758263125, "grad_norm": 0.06106014922261238, "learning_rate": 5.920158422971331e-05, "loss": 0.2118, "step": 36222 }, { "epoch": 2.9344620868438107, "grad_norm": 0.0807553380727768, "learning_rate": 5.9197083577118685e-05, "loss": 0.2256, "step": 36223 }, { "epoch": 2.934543097861309, "grad_norm": 0.06360894441604614, "learning_rate": 5.919258292452405e-05, "loss": 0.2257, "step": 36224 }, { "epoch": 2.9346241088788076, "grad_norm": 0.07121789455413818, "learning_rate": 5.918808227192943e-05, "loss": 0.2161, "step": 36225 }, { "epoch": 2.934705119896306, "grad_norm": 0.06633513420820236, "learning_rate": 5.9183581619334806e-05, "loss": 0.2218, "step": 36226 }, { "epoch": 2.934786130913804, "grad_norm": 0.08529671281576157, "learning_rate": 5.917908096674017e-05, "loss": 0.2529, "step": 36227 }, { "epoch": 2.934867141931303, "grad_norm": 0.06434918195009232, "learning_rate": 5.917458031414555e-05, "loss": 0.2959, "step": 36228 }, { "epoch": 2.934948152948801, "grad_norm": 0.0785248875617981, "learning_rate": 5.9170079661550926e-05, "loss": 0.2368, "step": 36229 }, { "epoch": 2.9350291639662993, "grad_norm": 0.07209806889295578, "learning_rate": 5.916557900895631e-05, "loss": 0.2218, "step": 36230 }, { "epoch": 2.935110174983798, "grad_norm": 0.06406816840171814, "learning_rate": 5.9161078356361674e-05, "loss": 0.2592, "step": 36231 }, { "epoch": 2.9351911860012962, "grad_norm": 0.06611085683107376, "learning_rate": 5.915657770376705e-05, "loss": 0.2361, "step": 36232 }, { "epoch": 2.9352721970187945, "grad_norm": 0.08267108350992203, "learning_rate": 5.915207705117243e-05, "loss": 0.2227, "step": 36233 }, { "epoch": 2.935353208036293, "grad_norm": 0.04846043139696121, "learning_rate": 5.9147576398577795e-05, "loss": 0.2004, "step": 36234 }, { "epoch": 2.9354342190537914, "grad_norm": 0.07758070528507233, "learning_rate": 5.914307574598317e-05, "loss": 0.2329, "step": 36235 }, { "epoch": 2.9355152300712897, "grad_norm": 0.07985047250986099, "learning_rate": 5.913857509338855e-05, "loss": 0.2617, "step": 36236 }, { "epoch": 2.935596241088788, "grad_norm": 0.0672224760055542, "learning_rate": 5.9134074440793915e-05, "loss": 0.2242, "step": 36237 }, { "epoch": 2.9356772521062866, "grad_norm": 0.05481787770986557, "learning_rate": 5.912957378819929e-05, "loss": 0.2285, "step": 36238 }, { "epoch": 2.935758263123785, "grad_norm": 0.06856312602758408, "learning_rate": 5.912507313560467e-05, "loss": 0.2128, "step": 36239 }, { "epoch": 2.935839274141283, "grad_norm": 0.08229131996631622, "learning_rate": 5.9120572483010036e-05, "loss": 0.232, "step": 36240 }, { "epoch": 2.9359202851587813, "grad_norm": 0.08203065395355225, "learning_rate": 5.911607183041541e-05, "loss": 0.2496, "step": 36241 }, { "epoch": 2.93600129617628, "grad_norm": 0.06302852183580399, "learning_rate": 5.911157117782079e-05, "loss": 0.22, "step": 36242 }, { "epoch": 2.9360823071937783, "grad_norm": 0.07781028002500534, "learning_rate": 5.910707052522616e-05, "loss": 0.2149, "step": 36243 }, { "epoch": 2.9361633182112765, "grad_norm": 0.057055290788412094, "learning_rate": 5.910256987263153e-05, "loss": 0.2192, "step": 36244 }, { "epoch": 2.936244329228775, "grad_norm": 0.07755919545888901, "learning_rate": 5.909806922003691e-05, "loss": 0.2027, "step": 36245 }, { "epoch": 2.9363253402462735, "grad_norm": 0.07984499633312225, "learning_rate": 5.9093568567442285e-05, "loss": 0.3008, "step": 36246 }, { "epoch": 2.9364063512637717, "grad_norm": 0.07670563459396362, "learning_rate": 5.908906791484765e-05, "loss": 0.2304, "step": 36247 }, { "epoch": 2.9364873622812704, "grad_norm": 0.07776518911123276, "learning_rate": 5.908456726225303e-05, "loss": 0.2522, "step": 36248 }, { "epoch": 2.9365683732987686, "grad_norm": 0.07556840032339096, "learning_rate": 5.9080066609658405e-05, "loss": 0.2786, "step": 36249 }, { "epoch": 2.936649384316267, "grad_norm": 0.07746266573667526, "learning_rate": 5.907556595706377e-05, "loss": 0.2553, "step": 36250 }, { "epoch": 2.9367303953337656, "grad_norm": 0.07655060291290283, "learning_rate": 5.907106530446915e-05, "loss": 0.2623, "step": 36251 }, { "epoch": 2.936811406351264, "grad_norm": 0.0660661831498146, "learning_rate": 5.9066564651874526e-05, "loss": 0.2599, "step": 36252 }, { "epoch": 2.936892417368762, "grad_norm": 0.0715685710310936, "learning_rate": 5.906206399927989e-05, "loss": 0.2416, "step": 36253 }, { "epoch": 2.9369734283862607, "grad_norm": 0.0755523145198822, "learning_rate": 5.905756334668527e-05, "loss": 0.1945, "step": 36254 }, { "epoch": 2.937054439403759, "grad_norm": 0.06897228211164474, "learning_rate": 5.905306269409065e-05, "loss": 0.2484, "step": 36255 }, { "epoch": 2.9371354504212572, "grad_norm": 0.058608174324035645, "learning_rate": 5.904856204149603e-05, "loss": 0.2372, "step": 36256 }, { "epoch": 2.937216461438756, "grad_norm": 0.0753135159611702, "learning_rate": 5.9044061388901394e-05, "loss": 0.24, "step": 36257 }, { "epoch": 2.937297472456254, "grad_norm": 0.060669947415590286, "learning_rate": 5.903956073630677e-05, "loss": 0.2222, "step": 36258 }, { "epoch": 2.9373784834737524, "grad_norm": 0.06780382245779037, "learning_rate": 5.903506008371215e-05, "loss": 0.2175, "step": 36259 }, { "epoch": 2.9374594944912507, "grad_norm": 0.07297215610742569, "learning_rate": 5.9030559431117515e-05, "loss": 0.2391, "step": 36260 }, { "epoch": 2.9375405055087493, "grad_norm": 0.05533376708626747, "learning_rate": 5.902605877852289e-05, "loss": 0.2383, "step": 36261 }, { "epoch": 2.9376215165262476, "grad_norm": 0.07857169955968857, "learning_rate": 5.902155812592827e-05, "loss": 0.2224, "step": 36262 }, { "epoch": 2.937702527543746, "grad_norm": 0.06497713923454285, "learning_rate": 5.9017057473333636e-05, "loss": 0.2276, "step": 36263 }, { "epoch": 2.937783538561244, "grad_norm": 0.06889013200998306, "learning_rate": 5.901255682073901e-05, "loss": 0.2339, "step": 36264 }, { "epoch": 2.9378645495787428, "grad_norm": 0.08349175751209259, "learning_rate": 5.900805616814439e-05, "loss": 0.2421, "step": 36265 }, { "epoch": 2.937945560596241, "grad_norm": 0.06726394593715668, "learning_rate": 5.9003555515549757e-05, "loss": 0.1948, "step": 36266 }, { "epoch": 2.9380265716137393, "grad_norm": 0.06818697601556778, "learning_rate": 5.899905486295513e-05, "loss": 0.2306, "step": 36267 }, { "epoch": 2.938107582631238, "grad_norm": 0.07113194465637207, "learning_rate": 5.899455421036051e-05, "loss": 0.2849, "step": 36268 }, { "epoch": 2.938188593648736, "grad_norm": 0.06667479872703552, "learning_rate": 5.899005355776588e-05, "loss": 0.2268, "step": 36269 }, { "epoch": 2.9382696046662344, "grad_norm": 0.06525031477212906, "learning_rate": 5.898555290517125e-05, "loss": 0.2418, "step": 36270 }, { "epoch": 2.938350615683733, "grad_norm": 0.07072465866804123, "learning_rate": 5.898105225257663e-05, "loss": 0.2467, "step": 36271 }, { "epoch": 2.9384316267012314, "grad_norm": 0.07480061054229736, "learning_rate": 5.8976551599982e-05, "loss": 0.2547, "step": 36272 }, { "epoch": 2.9385126377187296, "grad_norm": 0.06577193737030029, "learning_rate": 5.897205094738737e-05, "loss": 0.224, "step": 36273 }, { "epoch": 2.9385936487362283, "grad_norm": 0.06845003366470337, "learning_rate": 5.896755029479275e-05, "loss": 0.252, "step": 36274 }, { "epoch": 2.9386746597537265, "grad_norm": 0.07179780304431915, "learning_rate": 5.896304964219812e-05, "loss": 0.2625, "step": 36275 }, { "epoch": 2.938755670771225, "grad_norm": 0.08876433223485947, "learning_rate": 5.895854898960349e-05, "loss": 0.2339, "step": 36276 }, { "epoch": 2.9388366817887235, "grad_norm": 0.08522546291351318, "learning_rate": 5.895404833700887e-05, "loss": 0.227, "step": 36277 }, { "epoch": 2.9389176928062217, "grad_norm": 0.07845503836870193, "learning_rate": 5.894954768441424e-05, "loss": 0.2634, "step": 36278 }, { "epoch": 2.93899870382372, "grad_norm": 0.06297402828931808, "learning_rate": 5.8945047031819613e-05, "loss": 0.2386, "step": 36279 }, { "epoch": 2.9390797148412187, "grad_norm": 0.08465047180652618, "learning_rate": 5.8940546379224994e-05, "loss": 0.2565, "step": 36280 }, { "epoch": 2.939160725858717, "grad_norm": 0.07127416133880615, "learning_rate": 5.893604572663036e-05, "loss": 0.2476, "step": 36281 }, { "epoch": 2.939241736876215, "grad_norm": 0.06154097244143486, "learning_rate": 5.893154507403574e-05, "loss": 0.2539, "step": 36282 }, { "epoch": 2.9393227478937134, "grad_norm": 0.06503734737634659, "learning_rate": 5.8927044421441115e-05, "loss": 0.2353, "step": 36283 }, { "epoch": 2.939403758911212, "grad_norm": 0.06861399859189987, "learning_rate": 5.892254376884648e-05, "loss": 0.2582, "step": 36284 }, { "epoch": 2.9394847699287103, "grad_norm": 0.06149303540587425, "learning_rate": 5.891804311625186e-05, "loss": 0.2141, "step": 36285 }, { "epoch": 2.9395657809462086, "grad_norm": 0.07276730984449387, "learning_rate": 5.8913542463657235e-05, "loss": 0.2389, "step": 36286 }, { "epoch": 2.939646791963707, "grad_norm": 0.0715443417429924, "learning_rate": 5.89090418110626e-05, "loss": 0.2372, "step": 36287 }, { "epoch": 2.9397278029812055, "grad_norm": 0.062204938381910324, "learning_rate": 5.890454115846798e-05, "loss": 0.2568, "step": 36288 }, { "epoch": 2.9398088139987038, "grad_norm": 0.0683540403842926, "learning_rate": 5.8900040505873356e-05, "loss": 0.2389, "step": 36289 }, { "epoch": 2.939889825016202, "grad_norm": 0.07297884672880173, "learning_rate": 5.889553985327872e-05, "loss": 0.2405, "step": 36290 }, { "epoch": 2.9399708360337007, "grad_norm": 0.06527545303106308, "learning_rate": 5.8891039200684103e-05, "loss": 0.2614, "step": 36291 }, { "epoch": 2.940051847051199, "grad_norm": 0.0808287039399147, "learning_rate": 5.888653854808948e-05, "loss": 0.2379, "step": 36292 }, { "epoch": 2.940132858068697, "grad_norm": 0.07050243765115738, "learning_rate": 5.8882037895494844e-05, "loss": 0.2261, "step": 36293 }, { "epoch": 2.940213869086196, "grad_norm": 0.06260987371206284, "learning_rate": 5.8877537242900224e-05, "loss": 0.2136, "step": 36294 }, { "epoch": 2.940294880103694, "grad_norm": 0.07322978973388672, "learning_rate": 5.88730365903056e-05, "loss": 0.2196, "step": 36295 }, { "epoch": 2.9403758911211924, "grad_norm": 0.07632744312286377, "learning_rate": 5.8868535937710965e-05, "loss": 0.2172, "step": 36296 }, { "epoch": 2.940456902138691, "grad_norm": 0.06406212598085403, "learning_rate": 5.8864035285116345e-05, "loss": 0.2239, "step": 36297 }, { "epoch": 2.9405379131561893, "grad_norm": 0.07500002533197403, "learning_rate": 5.885953463252172e-05, "loss": 0.2279, "step": 36298 }, { "epoch": 2.9406189241736875, "grad_norm": 0.06563522666692734, "learning_rate": 5.8855033979927086e-05, "loss": 0.2317, "step": 36299 }, { "epoch": 2.940699935191186, "grad_norm": 0.0740443617105484, "learning_rate": 5.8850533327332466e-05, "loss": 0.2419, "step": 36300 }, { "epoch": 2.9407809462086845, "grad_norm": 0.06501168757677078, "learning_rate": 5.884603267473784e-05, "loss": 0.253, "step": 36301 }, { "epoch": 2.9408619572261827, "grad_norm": 0.07930354028940201, "learning_rate": 5.8841532022143206e-05, "loss": 0.23, "step": 36302 }, { "epoch": 2.9409429682436814, "grad_norm": 0.06326466798782349, "learning_rate": 5.883703136954859e-05, "loss": 0.22, "step": 36303 }, { "epoch": 2.9410239792611796, "grad_norm": 0.08392850309610367, "learning_rate": 5.883253071695396e-05, "loss": 0.2652, "step": 36304 }, { "epoch": 2.941104990278678, "grad_norm": 0.08548044413328171, "learning_rate": 5.882803006435933e-05, "loss": 0.2386, "step": 36305 }, { "epoch": 2.941186001296176, "grad_norm": 0.07695137709379196, "learning_rate": 5.882352941176471e-05, "loss": 0.2681, "step": 36306 }, { "epoch": 2.941267012313675, "grad_norm": 0.07780933380126953, "learning_rate": 5.881902875917008e-05, "loss": 0.25, "step": 36307 }, { "epoch": 2.941348023331173, "grad_norm": 0.06766297668218613, "learning_rate": 5.881452810657546e-05, "loss": 0.2378, "step": 36308 }, { "epoch": 2.9414290343486713, "grad_norm": 0.06672467291355133, "learning_rate": 5.881002745398083e-05, "loss": 0.2312, "step": 36309 }, { "epoch": 2.9415100453661696, "grad_norm": 0.06629875302314758, "learning_rate": 5.88055268013862e-05, "loss": 0.1752, "step": 36310 }, { "epoch": 2.9415910563836682, "grad_norm": 0.0737876445055008, "learning_rate": 5.880102614879158e-05, "loss": 0.2579, "step": 36311 }, { "epoch": 2.9416720674011665, "grad_norm": 0.07448048889636993, "learning_rate": 5.879652549619695e-05, "loss": 0.2324, "step": 36312 }, { "epoch": 2.9417530784186647, "grad_norm": 0.07642365992069244, "learning_rate": 5.879202484360232e-05, "loss": 0.2153, "step": 36313 }, { "epoch": 2.9418340894361634, "grad_norm": 0.07787315547466278, "learning_rate": 5.87875241910077e-05, "loss": 0.2748, "step": 36314 }, { "epoch": 2.9419151004536617, "grad_norm": 0.06532657891511917, "learning_rate": 5.878302353841308e-05, "loss": 0.2083, "step": 36315 }, { "epoch": 2.94199611147116, "grad_norm": 0.05467003956437111, "learning_rate": 5.8778522885818444e-05, "loss": 0.2153, "step": 36316 }, { "epoch": 2.9420771224886586, "grad_norm": 0.0709647536277771, "learning_rate": 5.8774022233223824e-05, "loss": 0.2478, "step": 36317 }, { "epoch": 2.942158133506157, "grad_norm": 0.06659302860498428, "learning_rate": 5.87695215806292e-05, "loss": 0.2199, "step": 36318 }, { "epoch": 2.942239144523655, "grad_norm": 0.06867357343435287, "learning_rate": 5.8765020928034564e-05, "loss": 0.2352, "step": 36319 }, { "epoch": 2.942320155541154, "grad_norm": 0.07150635123252869, "learning_rate": 5.8760520275439945e-05, "loss": 0.2181, "step": 36320 }, { "epoch": 2.942401166558652, "grad_norm": 0.06097935512661934, "learning_rate": 5.875601962284532e-05, "loss": 0.2181, "step": 36321 }, { "epoch": 2.9424821775761503, "grad_norm": 0.059090375900268555, "learning_rate": 5.8751518970250685e-05, "loss": 0.234, "step": 36322 }, { "epoch": 2.942563188593649, "grad_norm": 0.06826435029506683, "learning_rate": 5.8747018317656066e-05, "loss": 0.2217, "step": 36323 }, { "epoch": 2.942644199611147, "grad_norm": 0.06106416881084442, "learning_rate": 5.874251766506144e-05, "loss": 0.2194, "step": 36324 }, { "epoch": 2.9427252106286454, "grad_norm": 0.08476357161998749, "learning_rate": 5.8738017012466806e-05, "loss": 0.258, "step": 36325 }, { "epoch": 2.942806221646144, "grad_norm": 0.08583477139472961, "learning_rate": 5.8733516359872186e-05, "loss": 0.2373, "step": 36326 }, { "epoch": 2.9428872326636424, "grad_norm": 0.06490720808506012, "learning_rate": 5.872901570727756e-05, "loss": 0.2244, "step": 36327 }, { "epoch": 2.9429682436811406, "grad_norm": 0.06273435056209564, "learning_rate": 5.872451505468293e-05, "loss": 0.2383, "step": 36328 }, { "epoch": 2.943049254698639, "grad_norm": 0.0690455287694931, "learning_rate": 5.872001440208831e-05, "loss": 0.2175, "step": 36329 }, { "epoch": 2.943130265716137, "grad_norm": 0.06984954327344894, "learning_rate": 5.871551374949368e-05, "loss": 0.2369, "step": 36330 }, { "epoch": 2.943211276733636, "grad_norm": 0.06107647344470024, "learning_rate": 5.871101309689905e-05, "loss": 0.2586, "step": 36331 }, { "epoch": 2.943292287751134, "grad_norm": 0.05110946297645569, "learning_rate": 5.870651244430443e-05, "loss": 0.247, "step": 36332 }, { "epoch": 2.9433732987686323, "grad_norm": 0.05262623727321625, "learning_rate": 5.87020117917098e-05, "loss": 0.2363, "step": 36333 }, { "epoch": 2.943454309786131, "grad_norm": 0.06491180509328842, "learning_rate": 5.869751113911518e-05, "loss": 0.2098, "step": 36334 }, { "epoch": 2.9435353208036292, "grad_norm": 0.06550070643424988, "learning_rate": 5.869301048652055e-05, "loss": 0.2218, "step": 36335 }, { "epoch": 2.9436163318211275, "grad_norm": 0.06160715967416763, "learning_rate": 5.868850983392592e-05, "loss": 0.2426, "step": 36336 }, { "epoch": 2.943697342838626, "grad_norm": 0.07329082489013672, "learning_rate": 5.86840091813313e-05, "loss": 0.2247, "step": 36337 }, { "epoch": 2.9437783538561244, "grad_norm": 0.06759696453809738, "learning_rate": 5.867950852873667e-05, "loss": 0.2356, "step": 36338 }, { "epoch": 2.9438593648736227, "grad_norm": 0.07199111580848694, "learning_rate": 5.867500787614204e-05, "loss": 0.2055, "step": 36339 }, { "epoch": 2.9439403758911213, "grad_norm": 0.06256087124347687, "learning_rate": 5.8670507223547424e-05, "loss": 0.231, "step": 36340 }, { "epoch": 2.9440213869086196, "grad_norm": 0.08340414613485336, "learning_rate": 5.866600657095279e-05, "loss": 0.2429, "step": 36341 }, { "epoch": 2.944102397926118, "grad_norm": 0.06608752906322479, "learning_rate": 5.8661505918358164e-05, "loss": 0.253, "step": 36342 }, { "epoch": 2.9441834089436165, "grad_norm": 0.06594279408454895, "learning_rate": 5.8657005265763544e-05, "loss": 0.2269, "step": 36343 }, { "epoch": 2.9442644199611148, "grad_norm": 0.07080650329589844, "learning_rate": 5.865250461316891e-05, "loss": 0.2652, "step": 36344 }, { "epoch": 2.944345430978613, "grad_norm": 0.06498748809099197, "learning_rate": 5.8648003960574285e-05, "loss": 0.2223, "step": 36345 }, { "epoch": 2.9444264419961117, "grad_norm": 0.07089196145534515, "learning_rate": 5.8643503307979665e-05, "loss": 0.225, "step": 36346 }, { "epoch": 2.94450745301361, "grad_norm": 0.06853543967008591, "learning_rate": 5.863900265538503e-05, "loss": 0.2135, "step": 36347 }, { "epoch": 2.944588464031108, "grad_norm": 0.0707242339849472, "learning_rate": 5.8634502002790406e-05, "loss": 0.2413, "step": 36348 }, { "epoch": 2.944669475048607, "grad_norm": 0.07439923286437988, "learning_rate": 5.8630001350195786e-05, "loss": 0.2203, "step": 36349 }, { "epoch": 2.944750486066105, "grad_norm": 0.08233077079057693, "learning_rate": 5.862550069760115e-05, "loss": 0.2218, "step": 36350 }, { "epoch": 2.9448314970836034, "grad_norm": 0.07423988729715347, "learning_rate": 5.8621000045006526e-05, "loss": 0.2328, "step": 36351 }, { "epoch": 2.9449125081011016, "grad_norm": 0.06345546245574951, "learning_rate": 5.861649939241191e-05, "loss": 0.2776, "step": 36352 }, { "epoch": 2.9449935191186, "grad_norm": 0.07971476763486862, "learning_rate": 5.8611998739817274e-05, "loss": 0.2116, "step": 36353 }, { "epoch": 2.9450745301360985, "grad_norm": 0.06921039521694183, "learning_rate": 5.860749808722265e-05, "loss": 0.2595, "step": 36354 }, { "epoch": 2.945155541153597, "grad_norm": 0.06553160399198532, "learning_rate": 5.860299743462803e-05, "loss": 0.2056, "step": 36355 }, { "epoch": 2.945236552171095, "grad_norm": 0.069551981985569, "learning_rate": 5.8598496782033394e-05, "loss": 0.2239, "step": 36356 }, { "epoch": 2.9453175631885937, "grad_norm": 0.06705120205879211, "learning_rate": 5.859399612943877e-05, "loss": 0.1983, "step": 36357 }, { "epoch": 2.945398574206092, "grad_norm": 0.06197086349129677, "learning_rate": 5.858949547684415e-05, "loss": 0.2264, "step": 36358 }, { "epoch": 2.94547958522359, "grad_norm": 0.06881693005561829, "learning_rate": 5.8584994824249515e-05, "loss": 0.2393, "step": 36359 }, { "epoch": 2.945560596241089, "grad_norm": 0.07800547033548355, "learning_rate": 5.8580494171654896e-05, "loss": 0.2728, "step": 36360 }, { "epoch": 2.945641607258587, "grad_norm": 0.06328790634870529, "learning_rate": 5.857599351906027e-05, "loss": 0.2343, "step": 36361 }, { "epoch": 2.9457226182760854, "grad_norm": 0.07082858681678772, "learning_rate": 5.8571492866465636e-05, "loss": 0.2285, "step": 36362 }, { "epoch": 2.945803629293584, "grad_norm": 0.07132884114980698, "learning_rate": 5.8566992213871016e-05, "loss": 0.2679, "step": 36363 }, { "epoch": 2.9458846403110823, "grad_norm": 0.06614235043525696, "learning_rate": 5.856249156127639e-05, "loss": 0.2511, "step": 36364 }, { "epoch": 2.9459656513285806, "grad_norm": 0.07640809565782547, "learning_rate": 5.855799090868176e-05, "loss": 0.239, "step": 36365 }, { "epoch": 2.9460466623460793, "grad_norm": 0.07471118867397308, "learning_rate": 5.855349025608714e-05, "loss": 0.2087, "step": 36366 }, { "epoch": 2.9461276733635775, "grad_norm": 0.08368020504713058, "learning_rate": 5.854898960349251e-05, "loss": 0.2227, "step": 36367 }, { "epoch": 2.9462086843810757, "grad_norm": 0.07055380195379257, "learning_rate": 5.854448895089788e-05, "loss": 0.2538, "step": 36368 }, { "epoch": 2.9462896953985744, "grad_norm": 0.07868610322475433, "learning_rate": 5.853998829830326e-05, "loss": 0.2145, "step": 36369 }, { "epoch": 2.9463707064160727, "grad_norm": 0.06251738965511322, "learning_rate": 5.853548764570863e-05, "loss": 0.211, "step": 36370 }, { "epoch": 2.946451717433571, "grad_norm": 0.07561231404542923, "learning_rate": 5.8530986993114e-05, "loss": 0.2344, "step": 36371 }, { "epoch": 2.9465327284510696, "grad_norm": 0.0834537148475647, "learning_rate": 5.852648634051938e-05, "loss": 0.2006, "step": 36372 }, { "epoch": 2.946613739468568, "grad_norm": 0.0762653574347496, "learning_rate": 5.852198568792475e-05, "loss": 0.243, "step": 36373 }, { "epoch": 2.946694750486066, "grad_norm": 0.07931900769472122, "learning_rate": 5.851748503533012e-05, "loss": 0.2371, "step": 36374 }, { "epoch": 2.9467757615035644, "grad_norm": 0.07118021696805954, "learning_rate": 5.85129843827355e-05, "loss": 0.2396, "step": 36375 }, { "epoch": 2.9468567725210626, "grad_norm": 0.09576117992401123, "learning_rate": 5.850848373014087e-05, "loss": 0.2699, "step": 36376 }, { "epoch": 2.9469377835385613, "grad_norm": 0.07844670116901398, "learning_rate": 5.850398307754624e-05, "loss": 0.2611, "step": 36377 }, { "epoch": 2.9470187945560595, "grad_norm": 0.07611510902643204, "learning_rate": 5.849948242495162e-05, "loss": 0.233, "step": 36378 }, { "epoch": 2.9470998055735578, "grad_norm": 0.06875669956207275, "learning_rate": 5.8494981772356994e-05, "loss": 0.2342, "step": 36379 }, { "epoch": 2.9471808165910565, "grad_norm": 0.05916833132505417, "learning_rate": 5.849048111976236e-05, "loss": 0.2067, "step": 36380 }, { "epoch": 2.9472618276085547, "grad_norm": 0.06750084459781647, "learning_rate": 5.848598046716775e-05, "loss": 0.2626, "step": 36381 }, { "epoch": 2.947342838626053, "grad_norm": 0.07500852644443512, "learning_rate": 5.8481479814573115e-05, "loss": 0.2259, "step": 36382 }, { "epoch": 2.9474238496435516, "grad_norm": 0.081149160861969, "learning_rate": 5.847697916197848e-05, "loss": 0.2295, "step": 36383 }, { "epoch": 2.94750486066105, "grad_norm": 0.07233553379774094, "learning_rate": 5.847247850938387e-05, "loss": 0.3038, "step": 36384 }, { "epoch": 2.947585871678548, "grad_norm": 0.05843168869614601, "learning_rate": 5.8467977856789236e-05, "loss": 0.2013, "step": 36385 }, { "epoch": 2.947666882696047, "grad_norm": 0.06838095933198929, "learning_rate": 5.84634772041946e-05, "loss": 0.1948, "step": 36386 }, { "epoch": 2.947747893713545, "grad_norm": 0.07753279060125351, "learning_rate": 5.845897655159999e-05, "loss": 0.2314, "step": 36387 }, { "epoch": 2.9478289047310433, "grad_norm": 0.060668349266052246, "learning_rate": 5.8454475899005357e-05, "loss": 0.2207, "step": 36388 }, { "epoch": 2.947909915748542, "grad_norm": 0.06636876612901688, "learning_rate": 5.844997524641074e-05, "loss": 0.2157, "step": 36389 }, { "epoch": 2.9479909267660402, "grad_norm": 0.06835020333528519, "learning_rate": 5.844547459381611e-05, "loss": 0.2651, "step": 36390 }, { "epoch": 2.9480719377835385, "grad_norm": 0.06267762184143066, "learning_rate": 5.844097394122148e-05, "loss": 0.2177, "step": 36391 }, { "epoch": 2.948152948801037, "grad_norm": 0.06823470443487167, "learning_rate": 5.843647328862686e-05, "loss": 0.209, "step": 36392 }, { "epoch": 2.9482339598185354, "grad_norm": 0.07210874557495117, "learning_rate": 5.843197263603223e-05, "loss": 0.2383, "step": 36393 }, { "epoch": 2.9483149708360337, "grad_norm": 0.06944157183170319, "learning_rate": 5.84274719834376e-05, "loss": 0.2474, "step": 36394 }, { "epoch": 2.9483959818535324, "grad_norm": 0.06869715452194214, "learning_rate": 5.842297133084298e-05, "loss": 0.1967, "step": 36395 }, { "epoch": 2.9484769928710306, "grad_norm": 0.07306776195764542, "learning_rate": 5.841847067824835e-05, "loss": 0.2055, "step": 36396 }, { "epoch": 2.948558003888529, "grad_norm": 0.06623499095439911, "learning_rate": 5.841397002565372e-05, "loss": 0.2519, "step": 36397 }, { "epoch": 2.948639014906027, "grad_norm": 0.07583684474229813, "learning_rate": 5.84094693730591e-05, "loss": 0.2676, "step": 36398 }, { "epoch": 2.9487200259235253, "grad_norm": 0.08304212242364883, "learning_rate": 5.840496872046447e-05, "loss": 0.2836, "step": 36399 }, { "epoch": 2.948801036941024, "grad_norm": 0.06242493912577629, "learning_rate": 5.840046806786984e-05, "loss": 0.2512, "step": 36400 }, { "epoch": 2.9488820479585223, "grad_norm": 0.06770298629999161, "learning_rate": 5.839596741527522e-05, "loss": 0.2472, "step": 36401 }, { "epoch": 2.9489630589760205, "grad_norm": 0.07269997894763947, "learning_rate": 5.8391466762680594e-05, "loss": 0.2654, "step": 36402 }, { "epoch": 2.949044069993519, "grad_norm": 0.060589950531721115, "learning_rate": 5.838696611008596e-05, "loss": 0.2046, "step": 36403 }, { "epoch": 2.9491250810110174, "grad_norm": 0.06518398225307465, "learning_rate": 5.838246545749134e-05, "loss": 0.2226, "step": 36404 }, { "epoch": 2.9492060920285157, "grad_norm": 0.07953999936580658, "learning_rate": 5.8377964804896715e-05, "loss": 0.2554, "step": 36405 }, { "epoch": 2.9492871030460144, "grad_norm": 0.05752985551953316, "learning_rate": 5.837346415230208e-05, "loss": 0.2228, "step": 36406 }, { "epoch": 2.9493681140635126, "grad_norm": 0.07126346975564957, "learning_rate": 5.836896349970746e-05, "loss": 0.2552, "step": 36407 }, { "epoch": 2.949449125081011, "grad_norm": 0.059985458850860596, "learning_rate": 5.8364462847112835e-05, "loss": 0.238, "step": 36408 }, { "epoch": 2.9495301360985096, "grad_norm": 0.07471557706594467, "learning_rate": 5.83599621945182e-05, "loss": 0.2084, "step": 36409 }, { "epoch": 2.949611147116008, "grad_norm": 0.0673462375998497, "learning_rate": 5.835546154192358e-05, "loss": 0.2717, "step": 36410 }, { "epoch": 2.949692158133506, "grad_norm": 0.06365612894296646, "learning_rate": 5.8350960889328956e-05, "loss": 0.2385, "step": 36411 }, { "epoch": 2.9497731691510047, "grad_norm": 0.06991245597600937, "learning_rate": 5.834646023673432e-05, "loss": 0.2248, "step": 36412 }, { "epoch": 2.949854180168503, "grad_norm": 0.07591899484395981, "learning_rate": 5.8341959584139703e-05, "loss": 0.22, "step": 36413 }, { "epoch": 2.9499351911860012, "grad_norm": 0.07599229365587234, "learning_rate": 5.833745893154508e-05, "loss": 0.2484, "step": 36414 }, { "epoch": 2.9500162022035, "grad_norm": 0.06545108556747437, "learning_rate": 5.833295827895046e-05, "loss": 0.2073, "step": 36415 }, { "epoch": 2.950097213220998, "grad_norm": 0.06527014076709747, "learning_rate": 5.8328457626355824e-05, "loss": 0.2219, "step": 36416 }, { "epoch": 2.9501782242384964, "grad_norm": 0.07544932514429092, "learning_rate": 5.83239569737612e-05, "loss": 0.2462, "step": 36417 }, { "epoch": 2.9502592352559946, "grad_norm": 0.08888234198093414, "learning_rate": 5.831945632116658e-05, "loss": 0.2363, "step": 36418 }, { "epoch": 2.9503402462734933, "grad_norm": 0.07459647208452225, "learning_rate": 5.8314955668571945e-05, "loss": 0.2426, "step": 36419 }, { "epoch": 2.9504212572909916, "grad_norm": 0.0885954275727272, "learning_rate": 5.831045501597732e-05, "loss": 0.2533, "step": 36420 }, { "epoch": 2.95050226830849, "grad_norm": 0.0728658065199852, "learning_rate": 5.83059543633827e-05, "loss": 0.2295, "step": 36421 }, { "epoch": 2.950583279325988, "grad_norm": 0.07531264424324036, "learning_rate": 5.8301453710788066e-05, "loss": 0.2412, "step": 36422 }, { "epoch": 2.9506642903434868, "grad_norm": 0.06192412227392197, "learning_rate": 5.829695305819344e-05, "loss": 0.2121, "step": 36423 }, { "epoch": 2.950745301360985, "grad_norm": 0.06983577460050583, "learning_rate": 5.829245240559882e-05, "loss": 0.2304, "step": 36424 }, { "epoch": 2.9508263123784833, "grad_norm": 0.08154419809579849, "learning_rate": 5.828795175300419e-05, "loss": 0.2647, "step": 36425 }, { "epoch": 2.950907323395982, "grad_norm": 0.07178998738527298, "learning_rate": 5.828345110040956e-05, "loss": 0.2357, "step": 36426 }, { "epoch": 2.95098833441348, "grad_norm": 0.07730478048324585, "learning_rate": 5.827895044781494e-05, "loss": 0.2271, "step": 36427 }, { "epoch": 2.9510693454309784, "grad_norm": 0.07810342311859131, "learning_rate": 5.827444979522031e-05, "loss": 0.2295, "step": 36428 }, { "epoch": 2.951150356448477, "grad_norm": 0.06364695727825165, "learning_rate": 5.826994914262568e-05, "loss": 0.2314, "step": 36429 }, { "epoch": 2.9512313674659754, "grad_norm": 0.06301039457321167, "learning_rate": 5.826544849003106e-05, "loss": 0.2351, "step": 36430 }, { "epoch": 2.9513123784834736, "grad_norm": 0.08015149086713791, "learning_rate": 5.826094783743643e-05, "loss": 0.2205, "step": 36431 }, { "epoch": 2.9513933895009723, "grad_norm": 0.05650978162884712, "learning_rate": 5.82564471848418e-05, "loss": 0.2316, "step": 36432 }, { "epoch": 2.9514744005184705, "grad_norm": 0.06430716067552567, "learning_rate": 5.825194653224718e-05, "loss": 0.2136, "step": 36433 }, { "epoch": 2.951555411535969, "grad_norm": 0.07166837155818939, "learning_rate": 5.824744587965255e-05, "loss": 0.2194, "step": 36434 }, { "epoch": 2.9516364225534675, "grad_norm": 0.07965375483036041, "learning_rate": 5.824294522705792e-05, "loss": 0.2609, "step": 36435 }, { "epoch": 2.9517174335709657, "grad_norm": 0.06448642909526825, "learning_rate": 5.82384445744633e-05, "loss": 0.2271, "step": 36436 }, { "epoch": 2.951798444588464, "grad_norm": 0.073979951441288, "learning_rate": 5.823394392186867e-05, "loss": 0.2448, "step": 36437 }, { "epoch": 2.9518794556059627, "grad_norm": 0.07171009480953217, "learning_rate": 5.8229443269274043e-05, "loss": 0.2052, "step": 36438 }, { "epoch": 2.951960466623461, "grad_norm": 0.07442062348127365, "learning_rate": 5.8224942616679424e-05, "loss": 0.2574, "step": 36439 }, { "epoch": 2.952041477640959, "grad_norm": 0.07632353901863098, "learning_rate": 5.822044196408479e-05, "loss": 0.2441, "step": 36440 }, { "epoch": 2.9521224886584574, "grad_norm": 0.09367818385362625, "learning_rate": 5.821594131149017e-05, "loss": 0.2539, "step": 36441 }, { "epoch": 2.952203499675956, "grad_norm": 0.06162737309932709, "learning_rate": 5.8211440658895545e-05, "loss": 0.2441, "step": 36442 }, { "epoch": 2.9522845106934543, "grad_norm": 0.06815103441476822, "learning_rate": 5.820694000630091e-05, "loss": 0.2262, "step": 36443 }, { "epoch": 2.9523655217109526, "grad_norm": 0.0818028524518013, "learning_rate": 5.820243935370629e-05, "loss": 0.213, "step": 36444 }, { "epoch": 2.952446532728451, "grad_norm": 0.057324279099702835, "learning_rate": 5.8197938701111665e-05, "loss": 0.2319, "step": 36445 }, { "epoch": 2.9525275437459495, "grad_norm": 0.07575049996376038, "learning_rate": 5.819343804851703e-05, "loss": 0.2409, "step": 36446 }, { "epoch": 2.9526085547634477, "grad_norm": 0.08090692013502121, "learning_rate": 5.818893739592241e-05, "loss": 0.2608, "step": 36447 }, { "epoch": 2.952689565780946, "grad_norm": 0.08545611798763275, "learning_rate": 5.8184436743327786e-05, "loss": 0.2354, "step": 36448 }, { "epoch": 2.9527705767984447, "grad_norm": 0.06185333430767059, "learning_rate": 5.817993609073315e-05, "loss": 0.2466, "step": 36449 }, { "epoch": 2.952851587815943, "grad_norm": 0.06604185700416565, "learning_rate": 5.817543543813854e-05, "loss": 0.2307, "step": 36450 }, { "epoch": 2.952932598833441, "grad_norm": 0.06959135085344315, "learning_rate": 5.817093478554391e-05, "loss": 0.2378, "step": 36451 }, { "epoch": 2.95301360985094, "grad_norm": 0.07814179360866547, "learning_rate": 5.8166434132949274e-05, "loss": 0.254, "step": 36452 }, { "epoch": 2.953094620868438, "grad_norm": 0.06907559186220169, "learning_rate": 5.816193348035466e-05, "loss": 0.2342, "step": 36453 }, { "epoch": 2.9531756318859363, "grad_norm": 0.08826621621847153, "learning_rate": 5.815743282776003e-05, "loss": 0.2375, "step": 36454 }, { "epoch": 2.953256642903435, "grad_norm": 0.08610070496797562, "learning_rate": 5.8152932175165395e-05, "loss": 0.2515, "step": 36455 }, { "epoch": 2.9533376539209333, "grad_norm": 0.0768219530582428, "learning_rate": 5.814843152257078e-05, "loss": 0.2643, "step": 36456 }, { "epoch": 2.9534186649384315, "grad_norm": 0.08998133987188339, "learning_rate": 5.814393086997615e-05, "loss": 0.2352, "step": 36457 }, { "epoch": 2.95349967595593, "grad_norm": 0.06378687173128128, "learning_rate": 5.8139430217381516e-05, "loss": 0.2024, "step": 36458 }, { "epoch": 2.9535806869734285, "grad_norm": 0.07165522873401642, "learning_rate": 5.81349295647869e-05, "loss": 0.2318, "step": 36459 }, { "epoch": 2.9536616979909267, "grad_norm": 0.0691077783703804, "learning_rate": 5.813042891219227e-05, "loss": 0.2299, "step": 36460 }, { "epoch": 2.9537427090084254, "grad_norm": 0.08201265335083008, "learning_rate": 5.8125928259597636e-05, "loss": 0.2369, "step": 36461 }, { "epoch": 2.9538237200259236, "grad_norm": 0.06257831305265427, "learning_rate": 5.8121427607003024e-05, "loss": 0.2074, "step": 36462 }, { "epoch": 2.953904731043422, "grad_norm": 0.0590004026889801, "learning_rate": 5.811692695440839e-05, "loss": 0.2563, "step": 36463 }, { "epoch": 2.95398574206092, "grad_norm": 0.05743236094713211, "learning_rate": 5.811242630181376e-05, "loss": 0.2244, "step": 36464 }, { "epoch": 2.954066753078419, "grad_norm": 0.07363023608922958, "learning_rate": 5.8107925649219144e-05, "loss": 0.2144, "step": 36465 }, { "epoch": 2.954147764095917, "grad_norm": 0.07051236927509308, "learning_rate": 5.810342499662451e-05, "loss": 0.2412, "step": 36466 }, { "epoch": 2.9542287751134153, "grad_norm": 0.06581655144691467, "learning_rate": 5.809892434402989e-05, "loss": 0.2234, "step": 36467 }, { "epoch": 2.9543097861309136, "grad_norm": 0.07746722549200058, "learning_rate": 5.8094423691435265e-05, "loss": 0.2497, "step": 36468 }, { "epoch": 2.9543907971484122, "grad_norm": 0.07712780684232712, "learning_rate": 5.808992303884063e-05, "loss": 0.2754, "step": 36469 }, { "epoch": 2.9544718081659105, "grad_norm": 0.06322678178548813, "learning_rate": 5.808542238624601e-05, "loss": 0.2088, "step": 36470 }, { "epoch": 2.9545528191834087, "grad_norm": 0.06469064950942993, "learning_rate": 5.8080921733651386e-05, "loss": 0.2484, "step": 36471 }, { "epoch": 2.9546338302009074, "grad_norm": 0.055840183049440384, "learning_rate": 5.807642108105675e-05, "loss": 0.2361, "step": 36472 }, { "epoch": 2.9547148412184057, "grad_norm": 0.0745265781879425, "learning_rate": 5.807192042846213e-05, "loss": 0.245, "step": 36473 }, { "epoch": 2.954795852235904, "grad_norm": 0.07949806749820709, "learning_rate": 5.806741977586751e-05, "loss": 0.2293, "step": 36474 }, { "epoch": 2.9548768632534026, "grad_norm": 0.07750680297613144, "learning_rate": 5.8062919123272874e-05, "loss": 0.2629, "step": 36475 }, { "epoch": 2.954957874270901, "grad_norm": 0.06992745399475098, "learning_rate": 5.8058418470678254e-05, "loss": 0.2457, "step": 36476 }, { "epoch": 2.955038885288399, "grad_norm": 0.06394144892692566, "learning_rate": 5.805391781808363e-05, "loss": 0.256, "step": 36477 }, { "epoch": 2.9551198963058978, "grad_norm": 0.07632666826248169, "learning_rate": 5.8049417165488994e-05, "loss": 0.2397, "step": 36478 }, { "epoch": 2.955200907323396, "grad_norm": 0.06426636129617691, "learning_rate": 5.8044916512894375e-05, "loss": 0.2383, "step": 36479 }, { "epoch": 2.9552819183408943, "grad_norm": 0.06205812096595764, "learning_rate": 5.804041586029975e-05, "loss": 0.2265, "step": 36480 }, { "epoch": 2.955362929358393, "grad_norm": 0.10336317867040634, "learning_rate": 5.8035915207705115e-05, "loss": 0.2269, "step": 36481 }, { "epoch": 2.955443940375891, "grad_norm": 0.08274838328361511, "learning_rate": 5.8031414555110496e-05, "loss": 0.2612, "step": 36482 }, { "epoch": 2.9555249513933894, "grad_norm": 0.0678529441356659, "learning_rate": 5.802691390251587e-05, "loss": 0.2079, "step": 36483 }, { "epoch": 2.955605962410888, "grad_norm": 0.0703086107969284, "learning_rate": 5.8022413249921236e-05, "loss": 0.2161, "step": 36484 }, { "epoch": 2.9556869734283864, "grad_norm": 0.07301075011491776, "learning_rate": 5.8017912597326616e-05, "loss": 0.2301, "step": 36485 }, { "epoch": 2.9557679844458846, "grad_norm": 0.07188529521226883, "learning_rate": 5.801341194473199e-05, "loss": 0.268, "step": 36486 }, { "epoch": 2.955848995463383, "grad_norm": 0.05946533754467964, "learning_rate": 5.800891129213736e-05, "loss": 0.2307, "step": 36487 }, { "epoch": 2.9559300064808816, "grad_norm": 0.07464953511953354, "learning_rate": 5.800441063954274e-05, "loss": 0.2562, "step": 36488 }, { "epoch": 2.95601101749838, "grad_norm": 0.08129985630512238, "learning_rate": 5.799990998694811e-05, "loss": 0.2446, "step": 36489 }, { "epoch": 2.956092028515878, "grad_norm": 0.07472100853919983, "learning_rate": 5.799540933435348e-05, "loss": 0.2391, "step": 36490 }, { "epoch": 2.9561730395333763, "grad_norm": 0.07534375786781311, "learning_rate": 5.799090868175886e-05, "loss": 0.2406, "step": 36491 }, { "epoch": 2.956254050550875, "grad_norm": 0.07338795065879822, "learning_rate": 5.798640802916423e-05, "loss": 0.2649, "step": 36492 }, { "epoch": 2.9563350615683732, "grad_norm": 0.06019020080566406, "learning_rate": 5.798190737656961e-05, "loss": 0.2068, "step": 36493 }, { "epoch": 2.9564160725858715, "grad_norm": 0.07050585746765137, "learning_rate": 5.797740672397498e-05, "loss": 0.205, "step": 36494 }, { "epoch": 2.95649708360337, "grad_norm": 0.07372116297483444, "learning_rate": 5.797290607138035e-05, "loss": 0.2451, "step": 36495 }, { "epoch": 2.9565780946208684, "grad_norm": 0.056005675345659256, "learning_rate": 5.796840541878573e-05, "loss": 0.2651, "step": 36496 }, { "epoch": 2.9566591056383666, "grad_norm": 0.06699275970458984, "learning_rate": 5.79639047661911e-05, "loss": 0.2495, "step": 36497 }, { "epoch": 2.9567401166558653, "grad_norm": 0.0599977932870388, "learning_rate": 5.795940411359647e-05, "loss": 0.2201, "step": 36498 }, { "epoch": 2.9568211276733636, "grad_norm": 0.06906404346227646, "learning_rate": 5.7954903461001854e-05, "loss": 0.2357, "step": 36499 }, { "epoch": 2.956902138690862, "grad_norm": 0.08245977014303207, "learning_rate": 5.795040280840722e-05, "loss": 0.2137, "step": 36500 }, { "epoch": 2.9569831497083605, "grad_norm": 0.06937648355960846, "learning_rate": 5.7945902155812594e-05, "loss": 0.2331, "step": 36501 }, { "epoch": 2.9570641607258588, "grad_norm": 0.08504277467727661, "learning_rate": 5.7941401503217974e-05, "loss": 0.2126, "step": 36502 }, { "epoch": 2.957145171743357, "grad_norm": 0.06685921549797058, "learning_rate": 5.793690085062334e-05, "loss": 0.2254, "step": 36503 }, { "epoch": 2.9572261827608557, "grad_norm": 0.07819310575723648, "learning_rate": 5.7932400198028715e-05, "loss": 0.2516, "step": 36504 }, { "epoch": 2.957307193778354, "grad_norm": 0.0625201091170311, "learning_rate": 5.7927899545434095e-05, "loss": 0.2099, "step": 36505 }, { "epoch": 2.957388204795852, "grad_norm": 0.06668776273727417, "learning_rate": 5.792339889283946e-05, "loss": 0.1961, "step": 36506 }, { "epoch": 2.957469215813351, "grad_norm": 0.05308452248573303, "learning_rate": 5.7918898240244836e-05, "loss": 0.2351, "step": 36507 }, { "epoch": 2.957550226830849, "grad_norm": 0.07450252026319504, "learning_rate": 5.7914397587650216e-05, "loss": 0.2425, "step": 36508 }, { "epoch": 2.9576312378483474, "grad_norm": 0.05780648812651634, "learning_rate": 5.790989693505558e-05, "loss": 0.2224, "step": 36509 }, { "epoch": 2.9577122488658456, "grad_norm": 0.07254470139741898, "learning_rate": 5.7905396282460956e-05, "loss": 0.2557, "step": 36510 }, { "epoch": 2.9577932598833443, "grad_norm": 0.06770768016576767, "learning_rate": 5.790089562986634e-05, "loss": 0.244, "step": 36511 }, { "epoch": 2.9578742709008425, "grad_norm": 0.08380091935396194, "learning_rate": 5.7896394977271704e-05, "loss": 0.2591, "step": 36512 }, { "epoch": 2.957955281918341, "grad_norm": 0.06993330270051956, "learning_rate": 5.789189432467708e-05, "loss": 0.2276, "step": 36513 }, { "epoch": 2.958036292935839, "grad_norm": 0.07040643692016602, "learning_rate": 5.788739367208246e-05, "loss": 0.2245, "step": 36514 }, { "epoch": 2.9581173039533377, "grad_norm": 0.07032278925180435, "learning_rate": 5.7882893019487824e-05, "loss": 0.2378, "step": 36515 }, { "epoch": 2.958198314970836, "grad_norm": 0.047874245792627335, "learning_rate": 5.78783923668932e-05, "loss": 0.1835, "step": 36516 }, { "epoch": 2.958279325988334, "grad_norm": 0.06393905729055405, "learning_rate": 5.787389171429858e-05, "loss": 0.2281, "step": 36517 }, { "epoch": 2.958360337005833, "grad_norm": 0.06521731615066528, "learning_rate": 5.7869391061703945e-05, "loss": 0.2326, "step": 36518 }, { "epoch": 2.958441348023331, "grad_norm": 0.09332821518182755, "learning_rate": 5.786489040910933e-05, "loss": 0.2457, "step": 36519 }, { "epoch": 2.9585223590408294, "grad_norm": 0.052814967930316925, "learning_rate": 5.78603897565147e-05, "loss": 0.2083, "step": 36520 }, { "epoch": 2.958603370058328, "grad_norm": 0.06364589184522629, "learning_rate": 5.7855889103920066e-05, "loss": 0.2288, "step": 36521 }, { "epoch": 2.9586843810758263, "grad_norm": 0.07168759405612946, "learning_rate": 5.785138845132545e-05, "loss": 0.2214, "step": 36522 }, { "epoch": 2.9587653920933246, "grad_norm": 0.0853877067565918, "learning_rate": 5.784688779873082e-05, "loss": 0.2841, "step": 36523 }, { "epoch": 2.9588464031108233, "grad_norm": 0.07488827407360077, "learning_rate": 5.784238714613619e-05, "loss": 0.2702, "step": 36524 }, { "epoch": 2.9589274141283215, "grad_norm": 0.060363445430994034, "learning_rate": 5.7837886493541574e-05, "loss": 0.2635, "step": 36525 }, { "epoch": 2.9590084251458197, "grad_norm": 0.07399044930934906, "learning_rate": 5.783338584094694e-05, "loss": 0.226, "step": 36526 }, { "epoch": 2.9590894361633184, "grad_norm": 0.06035542115569115, "learning_rate": 5.782888518835231e-05, "loss": 0.1998, "step": 36527 }, { "epoch": 2.9591704471808167, "grad_norm": 0.059440962970256805, "learning_rate": 5.7824384535757695e-05, "loss": 0.2374, "step": 36528 }, { "epoch": 2.959251458198315, "grad_norm": 0.06876916438341141, "learning_rate": 5.781988388316306e-05, "loss": 0.2204, "step": 36529 }, { "epoch": 2.9593324692158136, "grad_norm": 0.07742578536272049, "learning_rate": 5.781538323056843e-05, "loss": 0.1969, "step": 36530 }, { "epoch": 2.959413480233312, "grad_norm": 0.07292307168245316, "learning_rate": 5.7810882577973816e-05, "loss": 0.2276, "step": 36531 }, { "epoch": 2.95949449125081, "grad_norm": 0.07512343674898148, "learning_rate": 5.780638192537918e-05, "loss": 0.2269, "step": 36532 }, { "epoch": 2.9595755022683083, "grad_norm": 0.05893891677260399, "learning_rate": 5.780188127278455e-05, "loss": 0.2385, "step": 36533 }, { "epoch": 2.959656513285807, "grad_norm": 0.06704125553369522, "learning_rate": 5.7797380620189936e-05, "loss": 0.2305, "step": 36534 }, { "epoch": 2.9597375243033053, "grad_norm": 0.07865846157073975, "learning_rate": 5.77928799675953e-05, "loss": 0.2482, "step": 36535 }, { "epoch": 2.9598185353208035, "grad_norm": 0.06334725022315979, "learning_rate": 5.778837931500068e-05, "loss": 0.2497, "step": 36536 }, { "epoch": 2.9598995463383018, "grad_norm": 0.06638215482234955, "learning_rate": 5.778387866240606e-05, "loss": 0.2113, "step": 36537 }, { "epoch": 2.9599805573558005, "grad_norm": 0.06827043741941452, "learning_rate": 5.7779378009811424e-05, "loss": 0.2311, "step": 36538 }, { "epoch": 2.9600615683732987, "grad_norm": 0.05676588788628578, "learning_rate": 5.77748773572168e-05, "loss": 0.21, "step": 36539 }, { "epoch": 2.960142579390797, "grad_norm": 0.08814438432455063, "learning_rate": 5.777037670462218e-05, "loss": 0.3044, "step": 36540 }, { "epoch": 2.9602235904082956, "grad_norm": 0.07136793434619904, "learning_rate": 5.7765876052027545e-05, "loss": 0.2657, "step": 36541 }, { "epoch": 2.960304601425794, "grad_norm": 0.06732363253831863, "learning_rate": 5.776137539943292e-05, "loss": 0.246, "step": 36542 }, { "epoch": 2.960385612443292, "grad_norm": 0.07745783030986786, "learning_rate": 5.77568747468383e-05, "loss": 0.2371, "step": 36543 }, { "epoch": 2.960466623460791, "grad_norm": 0.06036265939474106, "learning_rate": 5.7752374094243666e-05, "loss": 0.2035, "step": 36544 }, { "epoch": 2.960547634478289, "grad_norm": 0.06795885413885117, "learning_rate": 5.774787344164904e-05, "loss": 0.2229, "step": 36545 }, { "epoch": 2.9606286454957873, "grad_norm": 0.10156449675559998, "learning_rate": 5.774337278905442e-05, "loss": 0.2794, "step": 36546 }, { "epoch": 2.960709656513286, "grad_norm": 0.07111938297748566, "learning_rate": 5.7738872136459787e-05, "loss": 0.2011, "step": 36547 }, { "epoch": 2.9607906675307842, "grad_norm": 0.05817258358001709, "learning_rate": 5.773437148386517e-05, "loss": 0.2034, "step": 36548 }, { "epoch": 2.9608716785482825, "grad_norm": 0.06076313182711601, "learning_rate": 5.772987083127054e-05, "loss": 0.2228, "step": 36549 }, { "epoch": 2.960952689565781, "grad_norm": 0.08142624795436859, "learning_rate": 5.772537017867591e-05, "loss": 0.2786, "step": 36550 }, { "epoch": 2.9610337005832794, "grad_norm": 0.07742969691753387, "learning_rate": 5.772086952608129e-05, "loss": 0.2945, "step": 36551 }, { "epoch": 2.9611147116007777, "grad_norm": 0.06102697178721428, "learning_rate": 5.771636887348666e-05, "loss": 0.2289, "step": 36552 }, { "epoch": 2.9611957226182763, "grad_norm": 0.06912451982498169, "learning_rate": 5.771186822089203e-05, "loss": 0.21, "step": 36553 }, { "epoch": 2.9612767336357746, "grad_norm": 0.05915422737598419, "learning_rate": 5.770736756829741e-05, "loss": 0.1814, "step": 36554 }, { "epoch": 2.961357744653273, "grad_norm": 0.06098875403404236, "learning_rate": 5.770286691570278e-05, "loss": 0.2633, "step": 36555 }, { "epoch": 2.961438755670771, "grad_norm": 0.07346302270889282, "learning_rate": 5.769836626310815e-05, "loss": 0.2828, "step": 36556 }, { "epoch": 2.9615197666882693, "grad_norm": 0.08193167299032211, "learning_rate": 5.769386561051353e-05, "loss": 0.2435, "step": 36557 }, { "epoch": 2.961600777705768, "grad_norm": 0.07248496264219284, "learning_rate": 5.76893649579189e-05, "loss": 0.2135, "step": 36558 }, { "epoch": 2.9616817887232663, "grad_norm": 0.07301896810531616, "learning_rate": 5.768486430532427e-05, "loss": 0.2176, "step": 36559 }, { "epoch": 2.9617627997407645, "grad_norm": 0.07056121528148651, "learning_rate": 5.768036365272965e-05, "loss": 0.2808, "step": 36560 }, { "epoch": 2.961843810758263, "grad_norm": 0.057158637791872025, "learning_rate": 5.7675863000135024e-05, "loss": 0.2535, "step": 36561 }, { "epoch": 2.9619248217757614, "grad_norm": 0.07093117386102676, "learning_rate": 5.767136234754039e-05, "loss": 0.223, "step": 36562 }, { "epoch": 2.9620058327932597, "grad_norm": 0.07848324626684189, "learning_rate": 5.766686169494577e-05, "loss": 0.2333, "step": 36563 }, { "epoch": 2.9620868438107584, "grad_norm": 0.07335309684276581, "learning_rate": 5.7662361042351145e-05, "loss": 0.2507, "step": 36564 }, { "epoch": 2.9621678548282566, "grad_norm": 0.06225890666246414, "learning_rate": 5.765786038975651e-05, "loss": 0.2424, "step": 36565 }, { "epoch": 2.962248865845755, "grad_norm": 0.061031196266412735, "learning_rate": 5.765335973716189e-05, "loss": 0.2232, "step": 36566 }, { "epoch": 2.9623298768632536, "grad_norm": 0.06554216146469116, "learning_rate": 5.7648859084567265e-05, "loss": 0.2382, "step": 36567 }, { "epoch": 2.962410887880752, "grad_norm": 0.08055777102708817, "learning_rate": 5.764435843197263e-05, "loss": 0.2449, "step": 36568 }, { "epoch": 2.96249189889825, "grad_norm": 0.07099359482526779, "learning_rate": 5.763985777937801e-05, "loss": 0.2464, "step": 36569 }, { "epoch": 2.9625729099157487, "grad_norm": 0.07479843497276306, "learning_rate": 5.7635357126783386e-05, "loss": 0.2208, "step": 36570 }, { "epoch": 2.962653920933247, "grad_norm": 0.06658231467008591, "learning_rate": 5.763085647418875e-05, "loss": 0.2021, "step": 36571 }, { "epoch": 2.962734931950745, "grad_norm": 0.0665665715932846, "learning_rate": 5.7626355821594133e-05, "loss": 0.2087, "step": 36572 }, { "epoch": 2.962815942968244, "grad_norm": 0.06857075542211533, "learning_rate": 5.762185516899951e-05, "loss": 0.2755, "step": 36573 }, { "epoch": 2.962896953985742, "grad_norm": 0.07066106796264648, "learning_rate": 5.761735451640489e-05, "loss": 0.2654, "step": 36574 }, { "epoch": 2.9629779650032404, "grad_norm": 0.06204492971301079, "learning_rate": 5.7612853863810254e-05, "loss": 0.2247, "step": 36575 }, { "epoch": 2.963058976020739, "grad_norm": 0.06969764828681946, "learning_rate": 5.760835321121563e-05, "loss": 0.2691, "step": 36576 }, { "epoch": 2.9631399870382373, "grad_norm": 0.05596327409148216, "learning_rate": 5.760385255862101e-05, "loss": 0.2097, "step": 36577 }, { "epoch": 2.9632209980557356, "grad_norm": 0.06442490965127945, "learning_rate": 5.7599351906026375e-05, "loss": 0.2236, "step": 36578 }, { "epoch": 2.963302009073234, "grad_norm": 0.07338786125183105, "learning_rate": 5.759485125343175e-05, "loss": 0.2406, "step": 36579 }, { "epoch": 2.963383020090732, "grad_norm": 0.07324231415987015, "learning_rate": 5.759035060083713e-05, "loss": 0.2234, "step": 36580 }, { "epoch": 2.9634640311082308, "grad_norm": 0.061953090131282806, "learning_rate": 5.7585849948242496e-05, "loss": 0.201, "step": 36581 }, { "epoch": 2.963545042125729, "grad_norm": 0.06300554424524307, "learning_rate": 5.758134929564787e-05, "loss": 0.2068, "step": 36582 }, { "epoch": 2.9636260531432272, "grad_norm": 0.0651894062757492, "learning_rate": 5.757684864305325e-05, "loss": 0.2519, "step": 36583 }, { "epoch": 2.963707064160726, "grad_norm": 0.0788087248802185, "learning_rate": 5.757234799045862e-05, "loss": 0.2195, "step": 36584 }, { "epoch": 2.963788075178224, "grad_norm": 0.07214096933603287, "learning_rate": 5.756784733786399e-05, "loss": 0.2267, "step": 36585 }, { "epoch": 2.9638690861957224, "grad_norm": 0.05137092247605324, "learning_rate": 5.756334668526937e-05, "loss": 0.2019, "step": 36586 }, { "epoch": 2.963950097213221, "grad_norm": 0.08676977455615997, "learning_rate": 5.755884603267474e-05, "loss": 0.248, "step": 36587 }, { "epoch": 2.9640311082307194, "grad_norm": 0.07802512496709824, "learning_rate": 5.755434538008011e-05, "loss": 0.2724, "step": 36588 }, { "epoch": 2.9641121192482176, "grad_norm": 0.0753769502043724, "learning_rate": 5.754984472748549e-05, "loss": 0.2109, "step": 36589 }, { "epoch": 2.9641931302657163, "grad_norm": 0.07928132265806198, "learning_rate": 5.754534407489086e-05, "loss": 0.2555, "step": 36590 }, { "epoch": 2.9642741412832145, "grad_norm": 0.08000775426626205, "learning_rate": 5.754084342229623e-05, "loss": 0.2267, "step": 36591 }, { "epoch": 2.964355152300713, "grad_norm": 0.0798909068107605, "learning_rate": 5.753634276970161e-05, "loss": 0.2471, "step": 36592 }, { "epoch": 2.9644361633182115, "grad_norm": 0.08604051917791367, "learning_rate": 5.753184211710698e-05, "loss": 0.2207, "step": 36593 }, { "epoch": 2.9645171743357097, "grad_norm": 0.06759706139564514, "learning_rate": 5.752734146451235e-05, "loss": 0.2117, "step": 36594 }, { "epoch": 2.964598185353208, "grad_norm": 0.06823612749576569, "learning_rate": 5.752284081191773e-05, "loss": 0.2091, "step": 36595 }, { "epoch": 2.9646791963707066, "grad_norm": 0.05693601071834564, "learning_rate": 5.75183401593231e-05, "loss": 0.223, "step": 36596 }, { "epoch": 2.964760207388205, "grad_norm": 0.07944602519273758, "learning_rate": 5.7513839506728474e-05, "loss": 0.2785, "step": 36597 }, { "epoch": 2.964841218405703, "grad_norm": 0.08167745918035507, "learning_rate": 5.7509338854133854e-05, "loss": 0.2245, "step": 36598 }, { "epoch": 2.964922229423202, "grad_norm": 0.08227448165416718, "learning_rate": 5.750483820153922e-05, "loss": 0.2542, "step": 36599 }, { "epoch": 2.9650032404407, "grad_norm": 0.05877317115664482, "learning_rate": 5.750033754894461e-05, "loss": 0.2076, "step": 36600 }, { "epoch": 2.9650842514581983, "grad_norm": 0.0726679190993309, "learning_rate": 5.7495836896349975e-05, "loss": 0.2218, "step": 36601 }, { "epoch": 2.9651652624756966, "grad_norm": 0.0621146522462368, "learning_rate": 5.749133624375534e-05, "loss": 0.2346, "step": 36602 }, { "epoch": 2.965246273493195, "grad_norm": 0.08478382229804993, "learning_rate": 5.748683559116073e-05, "loss": 0.2481, "step": 36603 }, { "epoch": 2.9653272845106935, "grad_norm": 0.06537126004695892, "learning_rate": 5.7482334938566096e-05, "loss": 0.2317, "step": 36604 }, { "epoch": 2.9654082955281917, "grad_norm": 0.06782863289117813, "learning_rate": 5.747783428597147e-05, "loss": 0.1949, "step": 36605 }, { "epoch": 2.96548930654569, "grad_norm": 0.0709555372595787, "learning_rate": 5.747333363337685e-05, "loss": 0.2361, "step": 36606 }, { "epoch": 2.9655703175631887, "grad_norm": 0.06789838522672653, "learning_rate": 5.7468832980782216e-05, "loss": 0.2231, "step": 36607 }, { "epoch": 2.965651328580687, "grad_norm": 0.07288671284914017, "learning_rate": 5.746433232818759e-05, "loss": 0.2363, "step": 36608 }, { "epoch": 2.965732339598185, "grad_norm": 0.060141559690237045, "learning_rate": 5.745983167559297e-05, "loss": 0.215, "step": 36609 }, { "epoch": 2.965813350615684, "grad_norm": 0.05655485391616821, "learning_rate": 5.745533102299834e-05, "loss": 0.2373, "step": 36610 }, { "epoch": 2.965894361633182, "grad_norm": 0.07940292358398438, "learning_rate": 5.745083037040371e-05, "loss": 0.2613, "step": 36611 }, { "epoch": 2.9659753726506803, "grad_norm": 0.06751307100057602, "learning_rate": 5.744632971780909e-05, "loss": 0.2073, "step": 36612 }, { "epoch": 2.966056383668179, "grad_norm": 0.06888186186552048, "learning_rate": 5.744182906521446e-05, "loss": 0.249, "step": 36613 }, { "epoch": 2.9661373946856773, "grad_norm": 0.06600555032491684, "learning_rate": 5.743732841261983e-05, "loss": 0.2299, "step": 36614 }, { "epoch": 2.9662184057031755, "grad_norm": 0.07247529923915863, "learning_rate": 5.743282776002521e-05, "loss": 0.2191, "step": 36615 }, { "epoch": 2.966299416720674, "grad_norm": 0.06783980131149292, "learning_rate": 5.742832710743058e-05, "loss": 0.2469, "step": 36616 }, { "epoch": 2.9663804277381725, "grad_norm": 0.05341443419456482, "learning_rate": 5.742382645483595e-05, "loss": 0.2058, "step": 36617 }, { "epoch": 2.9664614387556707, "grad_norm": 0.06809686124324799, "learning_rate": 5.741932580224133e-05, "loss": 0.2443, "step": 36618 }, { "epoch": 2.9665424497731694, "grad_norm": 0.08757691085338593, "learning_rate": 5.74148251496467e-05, "loss": 0.2596, "step": 36619 }, { "epoch": 2.9666234607906676, "grad_norm": 0.07357107102870941, "learning_rate": 5.741032449705207e-05, "loss": 0.2223, "step": 36620 }, { "epoch": 2.966704471808166, "grad_norm": 0.07046817988157272, "learning_rate": 5.7405823844457454e-05, "loss": 0.238, "step": 36621 }, { "epoch": 2.9667854828256646, "grad_norm": 0.07757671922445297, "learning_rate": 5.740132319186282e-05, "loss": 0.2575, "step": 36622 }, { "epoch": 2.966866493843163, "grad_norm": 0.09322373569011688, "learning_rate": 5.7396822539268194e-05, "loss": 0.2306, "step": 36623 }, { "epoch": 2.966947504860661, "grad_norm": 0.07821787148714066, "learning_rate": 5.7392321886673574e-05, "loss": 0.2679, "step": 36624 }, { "epoch": 2.9670285158781593, "grad_norm": 0.06471216678619385, "learning_rate": 5.738782123407894e-05, "loss": 0.2387, "step": 36625 }, { "epoch": 2.9671095268956575, "grad_norm": 0.0634206086397171, "learning_rate": 5.738332058148432e-05, "loss": 0.2094, "step": 36626 }, { "epoch": 2.9671905379131562, "grad_norm": 0.054617397487163544, "learning_rate": 5.7378819928889695e-05, "loss": 0.2139, "step": 36627 }, { "epoch": 2.9672715489306545, "grad_norm": 0.0798972025513649, "learning_rate": 5.737431927629506e-05, "loss": 0.2662, "step": 36628 }, { "epoch": 2.9673525599481527, "grad_norm": 0.0739915519952774, "learning_rate": 5.736981862370044e-05, "loss": 0.2616, "step": 36629 }, { "epoch": 2.9674335709656514, "grad_norm": 0.07516808062791824, "learning_rate": 5.7365317971105816e-05, "loss": 0.2412, "step": 36630 }, { "epoch": 2.9675145819831497, "grad_norm": 0.07437235862016678, "learning_rate": 5.736081731851118e-05, "loss": 0.254, "step": 36631 }, { "epoch": 2.967595593000648, "grad_norm": 0.0826960951089859, "learning_rate": 5.735631666591656e-05, "loss": 0.2511, "step": 36632 }, { "epoch": 2.9676766040181466, "grad_norm": 0.08840584754943848, "learning_rate": 5.735181601332194e-05, "loss": 0.2449, "step": 36633 }, { "epoch": 2.967757615035645, "grad_norm": 0.07088325917720795, "learning_rate": 5.7347315360727304e-05, "loss": 0.2315, "step": 36634 }, { "epoch": 2.967838626053143, "grad_norm": 0.07013159245252609, "learning_rate": 5.7342814708132684e-05, "loss": 0.2712, "step": 36635 }, { "epoch": 2.9679196370706418, "grad_norm": 0.06038451939821243, "learning_rate": 5.733831405553806e-05, "loss": 0.1965, "step": 36636 }, { "epoch": 2.96800064808814, "grad_norm": 0.0711541622877121, "learning_rate": 5.7333813402943424e-05, "loss": 0.2286, "step": 36637 }, { "epoch": 2.9680816591056383, "grad_norm": 0.0641595646739006, "learning_rate": 5.7329312750348805e-05, "loss": 0.1963, "step": 36638 }, { "epoch": 2.968162670123137, "grad_norm": 0.05820443481206894, "learning_rate": 5.732481209775418e-05, "loss": 0.2042, "step": 36639 }, { "epoch": 2.968243681140635, "grad_norm": 0.0708758533000946, "learning_rate": 5.7320311445159545e-05, "loss": 0.2332, "step": 36640 }, { "epoch": 2.9683246921581334, "grad_norm": 0.06513377279043198, "learning_rate": 5.7315810792564926e-05, "loss": 0.1954, "step": 36641 }, { "epoch": 2.968405703175632, "grad_norm": 0.0671873390674591, "learning_rate": 5.73113101399703e-05, "loss": 0.2437, "step": 36642 }, { "epoch": 2.9684867141931304, "grad_norm": 0.07724633067846298, "learning_rate": 5.7306809487375666e-05, "loss": 0.2359, "step": 36643 }, { "epoch": 2.9685677252106286, "grad_norm": 0.07575727999210358, "learning_rate": 5.7302308834781046e-05, "loss": 0.2281, "step": 36644 }, { "epoch": 2.968648736228127, "grad_norm": 0.06524080783128738, "learning_rate": 5.729780818218642e-05, "loss": 0.2341, "step": 36645 }, { "epoch": 2.9687297472456255, "grad_norm": 0.06586866080760956, "learning_rate": 5.729330752959179e-05, "loss": 0.2305, "step": 36646 }, { "epoch": 2.968810758263124, "grad_norm": 0.057113513350486755, "learning_rate": 5.728880687699717e-05, "loss": 0.205, "step": 36647 }, { "epoch": 2.968891769280622, "grad_norm": 0.07440678775310516, "learning_rate": 5.728430622440254e-05, "loss": 0.2353, "step": 36648 }, { "epoch": 2.9689727802981203, "grad_norm": 0.07313182204961777, "learning_rate": 5.727980557180791e-05, "loss": 0.2262, "step": 36649 }, { "epoch": 2.969053791315619, "grad_norm": 0.06670592725276947, "learning_rate": 5.727530491921329e-05, "loss": 0.2014, "step": 36650 }, { "epoch": 2.969134802333117, "grad_norm": 0.06971270591020584, "learning_rate": 5.727080426661866e-05, "loss": 0.2418, "step": 36651 }, { "epoch": 2.9692158133506155, "grad_norm": 0.07573027163743973, "learning_rate": 5.726630361402404e-05, "loss": 0.2379, "step": 36652 }, { "epoch": 2.969296824368114, "grad_norm": 0.07461774349212646, "learning_rate": 5.726180296142941e-05, "loss": 0.2143, "step": 36653 }, { "epoch": 2.9693778353856124, "grad_norm": 0.0802450180053711, "learning_rate": 5.725730230883478e-05, "loss": 0.2163, "step": 36654 }, { "epoch": 2.9694588464031106, "grad_norm": 0.0737207680940628, "learning_rate": 5.725280165624016e-05, "loss": 0.303, "step": 36655 }, { "epoch": 2.9695398574206093, "grad_norm": 0.07518462836742401, "learning_rate": 5.724830100364553e-05, "loss": 0.2254, "step": 36656 }, { "epoch": 2.9696208684381076, "grad_norm": 0.06999009102582932, "learning_rate": 5.72438003510509e-05, "loss": 0.2647, "step": 36657 }, { "epoch": 2.969701879455606, "grad_norm": 0.0705016627907753, "learning_rate": 5.7239299698456284e-05, "loss": 0.2446, "step": 36658 }, { "epoch": 2.9697828904731045, "grad_norm": 0.07972300052642822, "learning_rate": 5.723479904586165e-05, "loss": 0.2184, "step": 36659 }, { "epoch": 2.9698639014906028, "grad_norm": 0.07280859351158142, "learning_rate": 5.7230298393267024e-05, "loss": 0.25, "step": 36660 }, { "epoch": 2.969944912508101, "grad_norm": 0.07672141492366791, "learning_rate": 5.7225797740672404e-05, "loss": 0.2497, "step": 36661 }, { "epoch": 2.9700259235255997, "grad_norm": 0.06514837592840195, "learning_rate": 5.722129708807777e-05, "loss": 0.1916, "step": 36662 }, { "epoch": 2.970106934543098, "grad_norm": 0.08025524020195007, "learning_rate": 5.7216796435483145e-05, "loss": 0.229, "step": 36663 }, { "epoch": 2.970187945560596, "grad_norm": 0.06459321826696396, "learning_rate": 5.7212295782888525e-05, "loss": 0.2116, "step": 36664 }, { "epoch": 2.970268956578095, "grad_norm": 0.06459393352270126, "learning_rate": 5.720779513029389e-05, "loss": 0.2322, "step": 36665 }, { "epoch": 2.970349967595593, "grad_norm": 0.0697956532239914, "learning_rate": 5.7203294477699266e-05, "loss": 0.2363, "step": 36666 }, { "epoch": 2.9704309786130914, "grad_norm": 0.07393402606248856, "learning_rate": 5.7198793825104646e-05, "loss": 0.2411, "step": 36667 }, { "epoch": 2.9705119896305896, "grad_norm": 0.05831243842840195, "learning_rate": 5.719429317251001e-05, "loss": 0.219, "step": 36668 }, { "epoch": 2.9705930006480883, "grad_norm": 0.06987475603818893, "learning_rate": 5.7189792519915386e-05, "loss": 0.256, "step": 36669 }, { "epoch": 2.9706740116655865, "grad_norm": 0.06129377707839012, "learning_rate": 5.718529186732077e-05, "loss": 0.2088, "step": 36670 }, { "epoch": 2.970755022683085, "grad_norm": 0.06800404191017151, "learning_rate": 5.718079121472614e-05, "loss": 0.2267, "step": 36671 }, { "epoch": 2.970836033700583, "grad_norm": 0.05925722420215607, "learning_rate": 5.717629056213151e-05, "loss": 0.1863, "step": 36672 }, { "epoch": 2.9709170447180817, "grad_norm": 0.06941638886928558, "learning_rate": 5.717178990953689e-05, "loss": 0.2214, "step": 36673 }, { "epoch": 2.97099805573558, "grad_norm": 0.07604862004518509, "learning_rate": 5.716728925694226e-05, "loss": 0.2401, "step": 36674 }, { "epoch": 2.971079066753078, "grad_norm": 0.066129170358181, "learning_rate": 5.716278860434763e-05, "loss": 0.2286, "step": 36675 }, { "epoch": 2.971160077770577, "grad_norm": 0.07583235949277878, "learning_rate": 5.715828795175301e-05, "loss": 0.2284, "step": 36676 }, { "epoch": 2.971241088788075, "grad_norm": 0.06899145990610123, "learning_rate": 5.715378729915838e-05, "loss": 0.2332, "step": 36677 }, { "epoch": 2.9713220998055734, "grad_norm": 0.058244358748197556, "learning_rate": 5.714928664656376e-05, "loss": 0.2607, "step": 36678 }, { "epoch": 2.971403110823072, "grad_norm": 0.0775933712720871, "learning_rate": 5.714478599396913e-05, "loss": 0.227, "step": 36679 }, { "epoch": 2.9714841218405703, "grad_norm": 0.08306246995925903, "learning_rate": 5.71402853413745e-05, "loss": 0.2446, "step": 36680 }, { "epoch": 2.9715651328580686, "grad_norm": 0.0686577782034874, "learning_rate": 5.713578468877988e-05, "loss": 0.2276, "step": 36681 }, { "epoch": 2.9716461438755672, "grad_norm": 0.05848119035363197, "learning_rate": 5.713128403618525e-05, "loss": 0.2267, "step": 36682 }, { "epoch": 2.9717271548930655, "grad_norm": 0.06222635135054588, "learning_rate": 5.7126783383590624e-05, "loss": 0.2205, "step": 36683 }, { "epoch": 2.9718081659105637, "grad_norm": 0.08012259751558304, "learning_rate": 5.7122282730996004e-05, "loss": 0.2364, "step": 36684 }, { "epoch": 2.9718891769280624, "grad_norm": 0.07313792407512665, "learning_rate": 5.711778207840137e-05, "loss": 0.2682, "step": 36685 }, { "epoch": 2.9719701879455607, "grad_norm": 0.08533124625682831, "learning_rate": 5.7113281425806745e-05, "loss": 0.2391, "step": 36686 }, { "epoch": 2.972051198963059, "grad_norm": 0.07785817980766296, "learning_rate": 5.7108780773212125e-05, "loss": 0.265, "step": 36687 }, { "epoch": 2.9721322099805576, "grad_norm": 0.05892932415008545, "learning_rate": 5.710428012061749e-05, "loss": 0.2122, "step": 36688 }, { "epoch": 2.972213220998056, "grad_norm": 0.06475520133972168, "learning_rate": 5.7099779468022865e-05, "loss": 0.2353, "step": 36689 }, { "epoch": 2.972294232015554, "grad_norm": 0.06920190155506134, "learning_rate": 5.7095278815428246e-05, "loss": 0.238, "step": 36690 }, { "epoch": 2.9723752430330523, "grad_norm": 0.05841432139277458, "learning_rate": 5.709077816283361e-05, "loss": 0.2313, "step": 36691 }, { "epoch": 2.972456254050551, "grad_norm": 0.06791935116052628, "learning_rate": 5.7086277510238986e-05, "loss": 0.2221, "step": 36692 }, { "epoch": 2.9725372650680493, "grad_norm": 0.0724581778049469, "learning_rate": 5.7081776857644367e-05, "loss": 0.237, "step": 36693 }, { "epoch": 2.9726182760855475, "grad_norm": 0.08021465688943863, "learning_rate": 5.707727620504973e-05, "loss": 0.2457, "step": 36694 }, { "epoch": 2.9726992871030458, "grad_norm": 0.06262336671352386, "learning_rate": 5.707277555245511e-05, "loss": 0.2763, "step": 36695 }, { "epoch": 2.9727802981205445, "grad_norm": 0.0640406459569931, "learning_rate": 5.706827489986049e-05, "loss": 0.2138, "step": 36696 }, { "epoch": 2.9728613091380427, "grad_norm": 0.06408407539129257, "learning_rate": 5.7063774247265854e-05, "loss": 0.2061, "step": 36697 }, { "epoch": 2.972942320155541, "grad_norm": 0.06960425525903702, "learning_rate": 5.705927359467123e-05, "loss": 0.2182, "step": 36698 }, { "epoch": 2.9730233311730396, "grad_norm": 0.05899034067988396, "learning_rate": 5.705477294207661e-05, "loss": 0.2301, "step": 36699 }, { "epoch": 2.973104342190538, "grad_norm": 0.07200396806001663, "learning_rate": 5.7050272289481975e-05, "loss": 0.2169, "step": 36700 }, { "epoch": 2.973185353208036, "grad_norm": 0.06714487820863724, "learning_rate": 5.704577163688735e-05, "loss": 0.2566, "step": 36701 }, { "epoch": 2.973266364225535, "grad_norm": 0.07260193675756454, "learning_rate": 5.704127098429273e-05, "loss": 0.235, "step": 36702 }, { "epoch": 2.973347375243033, "grad_norm": 0.0751718133687973, "learning_rate": 5.7036770331698096e-05, "loss": 0.2409, "step": 36703 }, { "epoch": 2.9734283862605313, "grad_norm": 0.06625243276357651, "learning_rate": 5.703226967910347e-05, "loss": 0.2252, "step": 36704 }, { "epoch": 2.97350939727803, "grad_norm": 0.07275108993053436, "learning_rate": 5.702776902650885e-05, "loss": 0.2133, "step": 36705 }, { "epoch": 2.9735904082955282, "grad_norm": 0.08456332236528397, "learning_rate": 5.7023268373914217e-05, "loss": 0.2169, "step": 36706 }, { "epoch": 2.9736714193130265, "grad_norm": 0.052250731736421585, "learning_rate": 5.70187677213196e-05, "loss": 0.2133, "step": 36707 }, { "epoch": 2.973752430330525, "grad_norm": 0.0629974752664566, "learning_rate": 5.701426706872497e-05, "loss": 0.2495, "step": 36708 }, { "epoch": 2.9738334413480234, "grad_norm": 0.07221046835184097, "learning_rate": 5.700976641613034e-05, "loss": 0.235, "step": 36709 }, { "epoch": 2.9739144523655217, "grad_norm": 0.08091980218887329, "learning_rate": 5.700526576353572e-05, "loss": 0.2234, "step": 36710 }, { "epoch": 2.9739954633830203, "grad_norm": 0.07329240441322327, "learning_rate": 5.700076511094109e-05, "loss": 0.2328, "step": 36711 }, { "epoch": 2.9740764744005186, "grad_norm": 0.06695925444364548, "learning_rate": 5.699626445834646e-05, "loss": 0.2266, "step": 36712 }, { "epoch": 2.974157485418017, "grad_norm": 0.07289384305477142, "learning_rate": 5.699176380575184e-05, "loss": 0.2424, "step": 36713 }, { "epoch": 2.974238496435515, "grad_norm": 0.0600985512137413, "learning_rate": 5.698726315315721e-05, "loss": 0.2145, "step": 36714 }, { "epoch": 2.9743195074530138, "grad_norm": 0.06750945746898651, "learning_rate": 5.698276250056258e-05, "loss": 0.2451, "step": 36715 }, { "epoch": 2.974400518470512, "grad_norm": 0.09045244753360748, "learning_rate": 5.697826184796796e-05, "loss": 0.2417, "step": 36716 }, { "epoch": 2.9744815294880103, "grad_norm": 0.07236456125974655, "learning_rate": 5.697376119537333e-05, "loss": 0.2006, "step": 36717 }, { "epoch": 2.9745625405055085, "grad_norm": 0.06211559846997261, "learning_rate": 5.69692605427787e-05, "loss": 0.223, "step": 36718 }, { "epoch": 2.974643551523007, "grad_norm": 0.07024736702442169, "learning_rate": 5.696475989018408e-05, "loss": 0.2447, "step": 36719 }, { "epoch": 2.9747245625405054, "grad_norm": 0.0769864022731781, "learning_rate": 5.6960259237589454e-05, "loss": 0.2559, "step": 36720 }, { "epoch": 2.9748055735580037, "grad_norm": 0.06864888221025467, "learning_rate": 5.695575858499482e-05, "loss": 0.2039, "step": 36721 }, { "epoch": 2.9748865845755024, "grad_norm": 0.06834088265895844, "learning_rate": 5.69512579324002e-05, "loss": 0.2201, "step": 36722 }, { "epoch": 2.9749675955930006, "grad_norm": 0.0637509822845459, "learning_rate": 5.6946757279805575e-05, "loss": 0.2219, "step": 36723 }, { "epoch": 2.975048606610499, "grad_norm": 0.075530506670475, "learning_rate": 5.694225662721094e-05, "loss": 0.223, "step": 36724 }, { "epoch": 2.9751296176279975, "grad_norm": 0.09860153496265411, "learning_rate": 5.693775597461632e-05, "loss": 0.2255, "step": 36725 }, { "epoch": 2.975210628645496, "grad_norm": 0.07422681152820587, "learning_rate": 5.6933255322021695e-05, "loss": 0.2502, "step": 36726 }, { "epoch": 2.975291639662994, "grad_norm": 0.06554101407527924, "learning_rate": 5.692875466942706e-05, "loss": 0.2168, "step": 36727 }, { "epoch": 2.9753726506804927, "grad_norm": 0.05854536220431328, "learning_rate": 5.692425401683244e-05, "loss": 0.2171, "step": 36728 }, { "epoch": 2.975453661697991, "grad_norm": 0.07138527929782867, "learning_rate": 5.6919753364237816e-05, "loss": 0.2234, "step": 36729 }, { "epoch": 2.975534672715489, "grad_norm": 0.06646374613046646, "learning_rate": 5.691525271164318e-05, "loss": 0.2106, "step": 36730 }, { "epoch": 2.975615683732988, "grad_norm": 0.061085499823093414, "learning_rate": 5.6910752059048563e-05, "loss": 0.2047, "step": 36731 }, { "epoch": 2.975696694750486, "grad_norm": 0.07485700398683548, "learning_rate": 5.690625140645394e-05, "loss": 0.2248, "step": 36732 }, { "epoch": 2.9757777057679844, "grad_norm": 0.05422642081975937, "learning_rate": 5.690175075385932e-05, "loss": 0.2411, "step": 36733 }, { "epoch": 2.975858716785483, "grad_norm": 0.06741956621408463, "learning_rate": 5.6897250101264684e-05, "loss": 0.2408, "step": 36734 }, { "epoch": 2.9759397278029813, "grad_norm": 0.05959073826670647, "learning_rate": 5.689274944867006e-05, "loss": 0.232, "step": 36735 }, { "epoch": 2.9760207388204796, "grad_norm": 0.08108634501695633, "learning_rate": 5.688824879607544e-05, "loss": 0.2572, "step": 36736 }, { "epoch": 2.976101749837978, "grad_norm": 0.07710261642932892, "learning_rate": 5.6883748143480805e-05, "loss": 0.2616, "step": 36737 }, { "epoch": 2.9761827608554765, "grad_norm": 0.05549994856119156, "learning_rate": 5.687924749088618e-05, "loss": 0.1959, "step": 36738 }, { "epoch": 2.9762637718729748, "grad_norm": 0.07519969344139099, "learning_rate": 5.687474683829156e-05, "loss": 0.2243, "step": 36739 }, { "epoch": 2.976344782890473, "grad_norm": 0.06337859481573105, "learning_rate": 5.687024618569693e-05, "loss": 0.2568, "step": 36740 }, { "epoch": 2.9764257939079712, "grad_norm": 0.07228632271289825, "learning_rate": 5.68657455331023e-05, "loss": 0.2505, "step": 36741 }, { "epoch": 2.97650680492547, "grad_norm": 0.07821758836507797, "learning_rate": 5.686124488050768e-05, "loss": 0.2524, "step": 36742 }, { "epoch": 2.976587815942968, "grad_norm": 0.0661669373512268, "learning_rate": 5.6856744227913053e-05, "loss": 0.2145, "step": 36743 }, { "epoch": 2.9766688269604664, "grad_norm": 0.04832182824611664, "learning_rate": 5.685224357531842e-05, "loss": 0.215, "step": 36744 }, { "epoch": 2.976749837977965, "grad_norm": 0.07201585918664932, "learning_rate": 5.68477429227238e-05, "loss": 0.245, "step": 36745 }, { "epoch": 2.9768308489954634, "grad_norm": 0.06247623264789581, "learning_rate": 5.6843242270129174e-05, "loss": 0.2146, "step": 36746 }, { "epoch": 2.9769118600129616, "grad_norm": 0.06249498575925827, "learning_rate": 5.683874161753454e-05, "loss": 0.2188, "step": 36747 }, { "epoch": 2.9769928710304603, "grad_norm": 0.0698925331234932, "learning_rate": 5.683424096493992e-05, "loss": 0.2434, "step": 36748 }, { "epoch": 2.9770738820479585, "grad_norm": 0.06198454648256302, "learning_rate": 5.6829740312345295e-05, "loss": 0.2281, "step": 36749 }, { "epoch": 2.9771548930654568, "grad_norm": 0.07053163647651672, "learning_rate": 5.682523965975066e-05, "loss": 0.248, "step": 36750 }, { "epoch": 2.9772359040829555, "grad_norm": 0.07296600192785263, "learning_rate": 5.682073900715604e-05, "loss": 0.2231, "step": 36751 }, { "epoch": 2.9773169151004537, "grad_norm": 0.0734759047627449, "learning_rate": 5.6816238354561416e-05, "loss": 0.222, "step": 36752 }, { "epoch": 2.977397926117952, "grad_norm": 0.06778737157583237, "learning_rate": 5.681173770196678e-05, "loss": 0.2413, "step": 36753 }, { "epoch": 2.9774789371354506, "grad_norm": 0.06905605643987656, "learning_rate": 5.680723704937216e-05, "loss": 0.2188, "step": 36754 }, { "epoch": 2.977559948152949, "grad_norm": 0.07669002562761307, "learning_rate": 5.680273639677754e-05, "loss": 0.2266, "step": 36755 }, { "epoch": 2.977640959170447, "grad_norm": 0.0734478309750557, "learning_rate": 5.6798235744182904e-05, "loss": 0.2303, "step": 36756 }, { "epoch": 2.977721970187946, "grad_norm": 0.0745403915643692, "learning_rate": 5.6793735091588284e-05, "loss": 0.2795, "step": 36757 }, { "epoch": 2.977802981205444, "grad_norm": 0.0757230594754219, "learning_rate": 5.678923443899366e-05, "loss": 0.2217, "step": 36758 }, { "epoch": 2.9778839922229423, "grad_norm": 0.09342848509550095, "learning_rate": 5.678473378639904e-05, "loss": 0.2421, "step": 36759 }, { "epoch": 2.9779650032404406, "grad_norm": 0.07937044650316238, "learning_rate": 5.6780233133804405e-05, "loss": 0.224, "step": 36760 }, { "epoch": 2.9780460142579392, "grad_norm": 0.06836716085672379, "learning_rate": 5.677573248120978e-05, "loss": 0.2351, "step": 36761 }, { "epoch": 2.9781270252754375, "grad_norm": 0.05467992275953293, "learning_rate": 5.677123182861516e-05, "loss": 0.2282, "step": 36762 }, { "epoch": 2.9782080362929357, "grad_norm": 0.08199840039014816, "learning_rate": 5.6766731176020526e-05, "loss": 0.2023, "step": 36763 }, { "epoch": 2.978289047310434, "grad_norm": 0.06809769570827484, "learning_rate": 5.67622305234259e-05, "loss": 0.2468, "step": 36764 }, { "epoch": 2.9783700583279327, "grad_norm": 0.06043122336268425, "learning_rate": 5.675772987083128e-05, "loss": 0.2184, "step": 36765 }, { "epoch": 2.978451069345431, "grad_norm": 0.06579332053661346, "learning_rate": 5.6753229218236646e-05, "loss": 0.2366, "step": 36766 }, { "epoch": 2.978532080362929, "grad_norm": 0.06955967098474503, "learning_rate": 5.674872856564202e-05, "loss": 0.2295, "step": 36767 }, { "epoch": 2.978613091380428, "grad_norm": 0.0776662528514862, "learning_rate": 5.67442279130474e-05, "loss": 0.1878, "step": 36768 }, { "epoch": 2.978694102397926, "grad_norm": 0.06722002476453781, "learning_rate": 5.673972726045277e-05, "loss": 0.209, "step": 36769 }, { "epoch": 2.9787751134154243, "grad_norm": 0.07205243408679962, "learning_rate": 5.673522660785814e-05, "loss": 0.1897, "step": 36770 }, { "epoch": 2.978856124432923, "grad_norm": 0.06477699428796768, "learning_rate": 5.673072595526352e-05, "loss": 0.2377, "step": 36771 }, { "epoch": 2.9789371354504213, "grad_norm": 0.06230941042304039, "learning_rate": 5.672622530266889e-05, "loss": 0.2186, "step": 36772 }, { "epoch": 2.9790181464679195, "grad_norm": 0.07898541539907455, "learning_rate": 5.672172465007426e-05, "loss": 0.2367, "step": 36773 }, { "epoch": 2.979099157485418, "grad_norm": 0.11465831845998764, "learning_rate": 5.671722399747964e-05, "loss": 0.2297, "step": 36774 }, { "epoch": 2.9791801685029164, "grad_norm": 0.07188776135444641, "learning_rate": 5.671272334488501e-05, "loss": 0.2447, "step": 36775 }, { "epoch": 2.9792611795204147, "grad_norm": 0.07775386422872543, "learning_rate": 5.670822269229038e-05, "loss": 0.2155, "step": 36776 }, { "epoch": 2.9793421905379134, "grad_norm": 0.08862534910440445, "learning_rate": 5.670372203969576e-05, "loss": 0.2713, "step": 36777 }, { "epoch": 2.9794232015554116, "grad_norm": 0.06923260539770126, "learning_rate": 5.669922138710113e-05, "loss": 0.2396, "step": 36778 }, { "epoch": 2.97950421257291, "grad_norm": 0.07492440193891525, "learning_rate": 5.66947207345065e-05, "loss": 0.2408, "step": 36779 }, { "epoch": 2.9795852235904086, "grad_norm": 0.0657166913151741, "learning_rate": 5.6690220081911884e-05, "loss": 0.2263, "step": 36780 }, { "epoch": 2.979666234607907, "grad_norm": 0.06350436806678772, "learning_rate": 5.668571942931725e-05, "loss": 0.2342, "step": 36781 }, { "epoch": 2.979747245625405, "grad_norm": 0.07210227102041245, "learning_rate": 5.6681218776722624e-05, "loss": 0.2376, "step": 36782 }, { "epoch": 2.9798282566429033, "grad_norm": 0.08657340705394745, "learning_rate": 5.6676718124128004e-05, "loss": 0.2338, "step": 36783 }, { "epoch": 2.9799092676604015, "grad_norm": 0.06363702565431595, "learning_rate": 5.667221747153337e-05, "loss": 0.2116, "step": 36784 }, { "epoch": 2.9799902786779002, "grad_norm": 0.07759935408830643, "learning_rate": 5.666771681893875e-05, "loss": 0.2377, "step": 36785 }, { "epoch": 2.9800712896953985, "grad_norm": 0.07095187157392502, "learning_rate": 5.6663216166344125e-05, "loss": 0.2431, "step": 36786 }, { "epoch": 2.9801523007128967, "grad_norm": 0.07811301946640015, "learning_rate": 5.665871551374949e-05, "loss": 0.2804, "step": 36787 }, { "epoch": 2.9802333117303954, "grad_norm": 0.0775509923696518, "learning_rate": 5.665421486115487e-05, "loss": 0.2445, "step": 36788 }, { "epoch": 2.9803143227478937, "grad_norm": 0.05990509316325188, "learning_rate": 5.6649714208560246e-05, "loss": 0.2156, "step": 36789 }, { "epoch": 2.980395333765392, "grad_norm": 0.0601307637989521, "learning_rate": 5.664521355596561e-05, "loss": 0.1895, "step": 36790 }, { "epoch": 2.9804763447828906, "grad_norm": 0.06881251186132431, "learning_rate": 5.664071290337099e-05, "loss": 0.2413, "step": 36791 }, { "epoch": 2.980557355800389, "grad_norm": 0.06957107782363892, "learning_rate": 5.663621225077637e-05, "loss": 0.2475, "step": 36792 }, { "epoch": 2.980638366817887, "grad_norm": 0.07004716247320175, "learning_rate": 5.6631711598181734e-05, "loss": 0.2469, "step": 36793 }, { "epoch": 2.9807193778353858, "grad_norm": 0.06502577662467957, "learning_rate": 5.6627210945587114e-05, "loss": 0.2572, "step": 36794 }, { "epoch": 2.980800388852884, "grad_norm": 0.07077474892139435, "learning_rate": 5.662271029299249e-05, "loss": 0.2167, "step": 36795 }, { "epoch": 2.9808813998703823, "grad_norm": 0.07815693318843842, "learning_rate": 5.6618209640397854e-05, "loss": 0.2498, "step": 36796 }, { "epoch": 2.980962410887881, "grad_norm": 0.06587857753038406, "learning_rate": 5.6613708987803235e-05, "loss": 0.2151, "step": 36797 }, { "epoch": 2.981043421905379, "grad_norm": 0.07732000946998596, "learning_rate": 5.660920833520861e-05, "loss": 0.2745, "step": 36798 }, { "epoch": 2.9811244329228774, "grad_norm": 0.06029903516173363, "learning_rate": 5.6604707682613975e-05, "loss": 0.2348, "step": 36799 }, { "epoch": 2.981205443940376, "grad_norm": 0.0725325271487236, "learning_rate": 5.6600207030019356e-05, "loss": 0.2739, "step": 36800 }, { "epoch": 2.9812864549578744, "grad_norm": 0.07531344890594482, "learning_rate": 5.659570637742473e-05, "loss": 0.2513, "step": 36801 }, { "epoch": 2.9813674659753726, "grad_norm": 0.08409595489501953, "learning_rate": 5.6591205724830096e-05, "loss": 0.2855, "step": 36802 }, { "epoch": 2.9814484769928713, "grad_norm": 0.0734337642788887, "learning_rate": 5.6586705072235476e-05, "loss": 0.2331, "step": 36803 }, { "epoch": 2.9815294880103695, "grad_norm": 0.05683013051748276, "learning_rate": 5.658220441964085e-05, "loss": 0.2541, "step": 36804 }, { "epoch": 2.981610499027868, "grad_norm": 0.08660989999771118, "learning_rate": 5.657770376704622e-05, "loss": 0.2709, "step": 36805 }, { "epoch": 2.981691510045366, "grad_norm": 0.0753931775689125, "learning_rate": 5.65732031144516e-05, "loss": 0.239, "step": 36806 }, { "epoch": 2.9817725210628643, "grad_norm": 0.06659752875566483, "learning_rate": 5.656870246185697e-05, "loss": 0.2585, "step": 36807 }, { "epoch": 2.981853532080363, "grad_norm": 0.07593297213315964, "learning_rate": 5.656420180926234e-05, "loss": 0.2474, "step": 36808 }, { "epoch": 2.981934543097861, "grad_norm": 0.06416141241788864, "learning_rate": 5.6559701156667725e-05, "loss": 0.2309, "step": 36809 }, { "epoch": 2.9820155541153595, "grad_norm": 0.06690444052219391, "learning_rate": 5.655520050407309e-05, "loss": 0.254, "step": 36810 }, { "epoch": 2.982096565132858, "grad_norm": 0.06408987939357758, "learning_rate": 5.655069985147847e-05, "loss": 0.2347, "step": 36811 }, { "epoch": 2.9821775761503564, "grad_norm": 0.06503544747829437, "learning_rate": 5.6546199198883846e-05, "loss": 0.2096, "step": 36812 }, { "epoch": 2.9822585871678546, "grad_norm": 0.06841304898262024, "learning_rate": 5.654169854628921e-05, "loss": 0.2502, "step": 36813 }, { "epoch": 2.9823395981853533, "grad_norm": 0.0618809312582016, "learning_rate": 5.653719789369459e-05, "loss": 0.2488, "step": 36814 }, { "epoch": 2.9824206092028516, "grad_norm": 0.09338065981864929, "learning_rate": 5.6532697241099966e-05, "loss": 0.2322, "step": 36815 }, { "epoch": 2.98250162022035, "grad_norm": 0.07193892449140549, "learning_rate": 5.652819658850533e-05, "loss": 0.2355, "step": 36816 }, { "epoch": 2.9825826312378485, "grad_norm": 0.07920563220977783, "learning_rate": 5.6523695935910714e-05, "loss": 0.2611, "step": 36817 }, { "epoch": 2.9826636422553467, "grad_norm": 0.06333600729703903, "learning_rate": 5.651919528331609e-05, "loss": 0.1964, "step": 36818 }, { "epoch": 2.982744653272845, "grad_norm": 0.06896653026342392, "learning_rate": 5.6514694630721454e-05, "loss": 0.2281, "step": 36819 }, { "epoch": 2.9828256642903437, "grad_norm": 0.07341278344392776, "learning_rate": 5.6510193978126834e-05, "loss": 0.2536, "step": 36820 }, { "epoch": 2.982906675307842, "grad_norm": 0.05979539453983307, "learning_rate": 5.650569332553221e-05, "loss": 0.2331, "step": 36821 }, { "epoch": 2.98298768632534, "grad_norm": 0.07593628764152527, "learning_rate": 5.6501192672937575e-05, "loss": 0.2168, "step": 36822 }, { "epoch": 2.983068697342839, "grad_norm": 0.06415986269712448, "learning_rate": 5.6496692020342955e-05, "loss": 0.2266, "step": 36823 }, { "epoch": 2.983149708360337, "grad_norm": 0.0640503317117691, "learning_rate": 5.649219136774833e-05, "loss": 0.2336, "step": 36824 }, { "epoch": 2.9832307193778353, "grad_norm": 0.06948990374803543, "learning_rate": 5.6487690715153696e-05, "loss": 0.2501, "step": 36825 }, { "epoch": 2.983311730395334, "grad_norm": 0.05950549244880676, "learning_rate": 5.6483190062559076e-05, "loss": 0.2023, "step": 36826 }, { "epoch": 2.9833927414128323, "grad_norm": 0.06723881512880325, "learning_rate": 5.647868940996445e-05, "loss": 0.2492, "step": 36827 }, { "epoch": 2.9834737524303305, "grad_norm": 0.07525148242712021, "learning_rate": 5.6474188757369817e-05, "loss": 0.2129, "step": 36828 }, { "epoch": 2.9835547634478288, "grad_norm": 0.06187017261981964, "learning_rate": 5.64696881047752e-05, "loss": 0.2312, "step": 36829 }, { "epoch": 2.983635774465327, "grad_norm": 0.06554147601127625, "learning_rate": 5.646518745218057e-05, "loss": 0.2335, "step": 36830 }, { "epoch": 2.9837167854828257, "grad_norm": 0.08165033906698227, "learning_rate": 5.646068679958594e-05, "loss": 0.2255, "step": 36831 }, { "epoch": 2.983797796500324, "grad_norm": 0.07512885332107544, "learning_rate": 5.645618614699132e-05, "loss": 0.2286, "step": 36832 }, { "epoch": 2.983878807517822, "grad_norm": 0.07381787896156311, "learning_rate": 5.645168549439669e-05, "loss": 0.2278, "step": 36833 }, { "epoch": 2.983959818535321, "grad_norm": 0.05407465249300003, "learning_rate": 5.644718484180206e-05, "loss": 0.2133, "step": 36834 }, { "epoch": 2.984040829552819, "grad_norm": 0.07097189873456955, "learning_rate": 5.644268418920744e-05, "loss": 0.2419, "step": 36835 }, { "epoch": 2.9841218405703174, "grad_norm": 0.06315812468528748, "learning_rate": 5.643818353661281e-05, "loss": 0.2211, "step": 36836 }, { "epoch": 2.984202851587816, "grad_norm": 0.08845272660255432, "learning_rate": 5.643368288401819e-05, "loss": 0.2207, "step": 36837 }, { "epoch": 2.9842838626053143, "grad_norm": 0.06286734342575073, "learning_rate": 5.642918223142356e-05, "loss": 0.2274, "step": 36838 }, { "epoch": 2.9843648736228126, "grad_norm": 0.06748202443122864, "learning_rate": 5.642468157882893e-05, "loss": 0.2246, "step": 36839 }, { "epoch": 2.9844458846403112, "grad_norm": 0.07265258580446243, "learning_rate": 5.642018092623431e-05, "loss": 0.2506, "step": 36840 }, { "epoch": 2.9845268956578095, "grad_norm": 0.07095389068126678, "learning_rate": 5.641568027363968e-05, "loss": 0.2023, "step": 36841 }, { "epoch": 2.9846079066753077, "grad_norm": 0.06580625474452972, "learning_rate": 5.6411179621045054e-05, "loss": 0.2145, "step": 36842 }, { "epoch": 2.9846889176928064, "grad_norm": 0.07952791452407837, "learning_rate": 5.6406678968450434e-05, "loss": 0.2439, "step": 36843 }, { "epoch": 2.9847699287103047, "grad_norm": 0.06329455226659775, "learning_rate": 5.64021783158558e-05, "loss": 0.2118, "step": 36844 }, { "epoch": 2.984850939727803, "grad_norm": 0.06945721805095673, "learning_rate": 5.6397677663261175e-05, "loss": 0.2085, "step": 36845 }, { "epoch": 2.9849319507453016, "grad_norm": 0.06174931675195694, "learning_rate": 5.6393177010666555e-05, "loss": 0.2488, "step": 36846 }, { "epoch": 2.9850129617628, "grad_norm": 0.0708073079586029, "learning_rate": 5.638867635807192e-05, "loss": 0.2367, "step": 36847 }, { "epoch": 2.985093972780298, "grad_norm": 0.06488966196775436, "learning_rate": 5.6384175705477295e-05, "loss": 0.2631, "step": 36848 }, { "epoch": 2.9851749837977968, "grad_norm": 0.06657645851373672, "learning_rate": 5.6379675052882676e-05, "loss": 0.2297, "step": 36849 }, { "epoch": 2.985255994815295, "grad_norm": 0.07657184451818466, "learning_rate": 5.637517440028804e-05, "loss": 0.2499, "step": 36850 }, { "epoch": 2.9853370058327933, "grad_norm": 0.07391027361154556, "learning_rate": 5.6370673747693416e-05, "loss": 0.2909, "step": 36851 }, { "epoch": 2.9854180168502915, "grad_norm": 0.08322843164205551, "learning_rate": 5.6366173095098797e-05, "loss": 0.2532, "step": 36852 }, { "epoch": 2.9854990278677898, "grad_norm": 0.0625251829624176, "learning_rate": 5.636167244250416e-05, "loss": 0.2415, "step": 36853 }, { "epoch": 2.9855800388852884, "grad_norm": 0.07309753447771072, "learning_rate": 5.635717178990954e-05, "loss": 0.2235, "step": 36854 }, { "epoch": 2.9856610499027867, "grad_norm": 0.10035616904497147, "learning_rate": 5.635267113731492e-05, "loss": 0.2233, "step": 36855 }, { "epoch": 2.985742060920285, "grad_norm": 0.06504696607589722, "learning_rate": 5.6348170484720284e-05, "loss": 0.2555, "step": 36856 }, { "epoch": 2.9858230719377836, "grad_norm": 0.05904727056622505, "learning_rate": 5.634366983212566e-05, "loss": 0.209, "step": 36857 }, { "epoch": 2.985904082955282, "grad_norm": 0.057460296899080276, "learning_rate": 5.633916917953104e-05, "loss": 0.2188, "step": 36858 }, { "epoch": 2.98598509397278, "grad_norm": 0.08421430736780167, "learning_rate": 5.6334668526936405e-05, "loss": 0.2448, "step": 36859 }, { "epoch": 2.986066104990279, "grad_norm": 0.07628507912158966, "learning_rate": 5.633016787434178e-05, "loss": 0.2689, "step": 36860 }, { "epoch": 2.986147116007777, "grad_norm": 0.06346532702445984, "learning_rate": 5.632566722174716e-05, "loss": 0.2263, "step": 36861 }, { "epoch": 2.9862281270252753, "grad_norm": 0.06182417646050453, "learning_rate": 5.6321166569152526e-05, "loss": 0.2284, "step": 36862 }, { "epoch": 2.986309138042774, "grad_norm": 0.07870961725711823, "learning_rate": 5.6316665916557906e-05, "loss": 0.2217, "step": 36863 }, { "epoch": 2.9863901490602722, "grad_norm": 0.06632569432258606, "learning_rate": 5.631216526396328e-05, "loss": 0.234, "step": 36864 }, { "epoch": 2.9864711600777705, "grad_norm": 0.06690378487110138, "learning_rate": 5.630766461136865e-05, "loss": 0.2192, "step": 36865 }, { "epoch": 2.986552171095269, "grad_norm": 0.0669408068060875, "learning_rate": 5.630316395877403e-05, "loss": 0.2296, "step": 36866 }, { "epoch": 2.9866331821127674, "grad_norm": 0.07498961687088013, "learning_rate": 5.62986633061794e-05, "loss": 0.2248, "step": 36867 }, { "epoch": 2.9867141931302656, "grad_norm": 0.07754261046648026, "learning_rate": 5.629416265358477e-05, "loss": 0.2422, "step": 36868 }, { "epoch": 2.9867952041477643, "grad_norm": 0.06618178635835648, "learning_rate": 5.628966200099015e-05, "loss": 0.2741, "step": 36869 }, { "epoch": 2.9868762151652626, "grad_norm": 0.06932277977466583, "learning_rate": 5.628516134839552e-05, "loss": 0.2565, "step": 36870 }, { "epoch": 2.986957226182761, "grad_norm": 0.08020354807376862, "learning_rate": 5.628066069580089e-05, "loss": 0.2391, "step": 36871 }, { "epoch": 2.987038237200259, "grad_norm": 0.07193691283464432, "learning_rate": 5.627616004320627e-05, "loss": 0.2755, "step": 36872 }, { "epoch": 2.9871192482177578, "grad_norm": 0.0832822322845459, "learning_rate": 5.627165939061164e-05, "loss": 0.2179, "step": 36873 }, { "epoch": 2.987200259235256, "grad_norm": 0.06554457545280457, "learning_rate": 5.626715873801701e-05, "loss": 0.2217, "step": 36874 }, { "epoch": 2.9872812702527543, "grad_norm": 0.05843869596719742, "learning_rate": 5.626265808542239e-05, "loss": 0.2312, "step": 36875 }, { "epoch": 2.9873622812702525, "grad_norm": 0.06552619487047195, "learning_rate": 5.625815743282776e-05, "loss": 0.2341, "step": 36876 }, { "epoch": 2.987443292287751, "grad_norm": 0.07473114132881165, "learning_rate": 5.625365678023313e-05, "loss": 0.2564, "step": 36877 }, { "epoch": 2.9875243033052494, "grad_norm": 0.08000738173723221, "learning_rate": 5.624915612763852e-05, "loss": 0.2256, "step": 36878 }, { "epoch": 2.9876053143227477, "grad_norm": 0.06652722507715225, "learning_rate": 5.6244655475043884e-05, "loss": 0.22, "step": 36879 }, { "epoch": 2.9876863253402464, "grad_norm": 0.06857480853796005, "learning_rate": 5.624015482244925e-05, "loss": 0.1864, "step": 36880 }, { "epoch": 2.9877673363577446, "grad_norm": 0.06475800275802612, "learning_rate": 5.623565416985464e-05, "loss": 0.2225, "step": 36881 }, { "epoch": 2.987848347375243, "grad_norm": 0.06667593121528625, "learning_rate": 5.6231153517260005e-05, "loss": 0.2186, "step": 36882 }, { "epoch": 2.9879293583927415, "grad_norm": 0.06999807804822922, "learning_rate": 5.622665286466537e-05, "loss": 0.2023, "step": 36883 }, { "epoch": 2.98801036941024, "grad_norm": 0.0566612184047699, "learning_rate": 5.622215221207076e-05, "loss": 0.2193, "step": 36884 }, { "epoch": 2.988091380427738, "grad_norm": 0.06729897856712341, "learning_rate": 5.6217651559476125e-05, "loss": 0.2308, "step": 36885 }, { "epoch": 2.9881723914452367, "grad_norm": 0.06882520765066147, "learning_rate": 5.621315090688149e-05, "loss": 0.2252, "step": 36886 }, { "epoch": 2.988253402462735, "grad_norm": 0.07283573597669601, "learning_rate": 5.620865025428688e-05, "loss": 0.2624, "step": 36887 }, { "epoch": 2.988334413480233, "grad_norm": 0.06333566457033157, "learning_rate": 5.6204149601692246e-05, "loss": 0.2408, "step": 36888 }, { "epoch": 2.988415424497732, "grad_norm": 0.08885639905929565, "learning_rate": 5.619964894909761e-05, "loss": 0.1944, "step": 36889 }, { "epoch": 2.98849643551523, "grad_norm": 0.08246281743049622, "learning_rate": 5.6195148296503e-05, "loss": 0.212, "step": 36890 }, { "epoch": 2.9885774465327284, "grad_norm": 0.09889711439609528, "learning_rate": 5.619064764390837e-05, "loss": 0.257, "step": 36891 }, { "epoch": 2.988658457550227, "grad_norm": 0.07491931319236755, "learning_rate": 5.618614699131375e-05, "loss": 0.2262, "step": 36892 }, { "epoch": 2.9887394685677253, "grad_norm": 0.06972592324018478, "learning_rate": 5.618164633871912e-05, "loss": 0.2335, "step": 36893 }, { "epoch": 2.9888204795852236, "grad_norm": 0.06400167942047119, "learning_rate": 5.617714568612449e-05, "loss": 0.1905, "step": 36894 }, { "epoch": 2.988901490602722, "grad_norm": 0.06958544999361038, "learning_rate": 5.617264503352987e-05, "loss": 0.2395, "step": 36895 }, { "epoch": 2.9889825016202205, "grad_norm": 0.09128827601671219, "learning_rate": 5.616814438093524e-05, "loss": 0.2448, "step": 36896 }, { "epoch": 2.9890635126377187, "grad_norm": 0.07656344026327133, "learning_rate": 5.616364372834061e-05, "loss": 0.2378, "step": 36897 }, { "epoch": 2.989144523655217, "grad_norm": 0.07432923465967178, "learning_rate": 5.615914307574599e-05, "loss": 0.2323, "step": 36898 }, { "epoch": 2.9892255346727152, "grad_norm": 0.07078418880701065, "learning_rate": 5.615464242315136e-05, "loss": 0.2494, "step": 36899 }, { "epoch": 2.989306545690214, "grad_norm": 0.0705699622631073, "learning_rate": 5.615014177055673e-05, "loss": 0.2592, "step": 36900 }, { "epoch": 2.989387556707712, "grad_norm": 0.07478456199169159, "learning_rate": 5.614564111796211e-05, "loss": 0.2549, "step": 36901 }, { "epoch": 2.9894685677252104, "grad_norm": 0.0605703704059124, "learning_rate": 5.6141140465367484e-05, "loss": 0.2604, "step": 36902 }, { "epoch": 2.989549578742709, "grad_norm": 0.06664056330919266, "learning_rate": 5.613663981277285e-05, "loss": 0.2315, "step": 36903 }, { "epoch": 2.9896305897602073, "grad_norm": 0.06250179558992386, "learning_rate": 5.613213916017823e-05, "loss": 0.2196, "step": 36904 }, { "epoch": 2.9897116007777056, "grad_norm": 0.07414854317903519, "learning_rate": 5.6127638507583604e-05, "loss": 0.2265, "step": 36905 }, { "epoch": 2.9897926117952043, "grad_norm": 0.05994947999715805, "learning_rate": 5.612313785498897e-05, "loss": 0.2559, "step": 36906 }, { "epoch": 2.9898736228127025, "grad_norm": 0.06740280985832214, "learning_rate": 5.611863720239435e-05, "loss": 0.2213, "step": 36907 }, { "epoch": 2.9899546338302008, "grad_norm": 0.057032033801078796, "learning_rate": 5.6114136549799725e-05, "loss": 0.2229, "step": 36908 }, { "epoch": 2.9900356448476995, "grad_norm": 0.06304501742124557, "learning_rate": 5.610963589720509e-05, "loss": 0.2175, "step": 36909 }, { "epoch": 2.9901166558651977, "grad_norm": 0.0731239914894104, "learning_rate": 5.610513524461047e-05, "loss": 0.2228, "step": 36910 }, { "epoch": 2.990197666882696, "grad_norm": 0.07033661007881165, "learning_rate": 5.6100634592015846e-05, "loss": 0.2895, "step": 36911 }, { "epoch": 2.9902786779001946, "grad_norm": 0.06642790138721466, "learning_rate": 5.609613393942121e-05, "loss": 0.256, "step": 36912 }, { "epoch": 2.990359688917693, "grad_norm": 0.06933210045099258, "learning_rate": 5.609163328682659e-05, "loss": 0.2643, "step": 36913 }, { "epoch": 2.990440699935191, "grad_norm": 0.08031534403562546, "learning_rate": 5.608713263423197e-05, "loss": 0.2476, "step": 36914 }, { "epoch": 2.99052171095269, "grad_norm": 0.06195875257253647, "learning_rate": 5.6082631981637334e-05, "loss": 0.2046, "step": 36915 }, { "epoch": 2.990602721970188, "grad_norm": 0.07216474413871765, "learning_rate": 5.6078131329042714e-05, "loss": 0.2256, "step": 36916 }, { "epoch": 2.9906837329876863, "grad_norm": 0.06766729801893234, "learning_rate": 5.607363067644809e-05, "loss": 0.2458, "step": 36917 }, { "epoch": 2.9907647440051845, "grad_norm": 0.07051542401313782, "learning_rate": 5.606913002385347e-05, "loss": 0.2508, "step": 36918 }, { "epoch": 2.9908457550226832, "grad_norm": 0.08813019841909409, "learning_rate": 5.6064629371258835e-05, "loss": 0.2302, "step": 36919 }, { "epoch": 2.9909267660401815, "grad_norm": 0.08156602829694748, "learning_rate": 5.606012871866421e-05, "loss": 0.229, "step": 36920 }, { "epoch": 2.9910077770576797, "grad_norm": 0.0654522255063057, "learning_rate": 5.605562806606959e-05, "loss": 0.2237, "step": 36921 }, { "epoch": 2.991088788075178, "grad_norm": 0.07376670837402344, "learning_rate": 5.6051127413474956e-05, "loss": 0.2159, "step": 36922 }, { "epoch": 2.9911697990926767, "grad_norm": 0.07919562608003616, "learning_rate": 5.604662676088033e-05, "loss": 0.2493, "step": 36923 }, { "epoch": 2.991250810110175, "grad_norm": 0.0770791545510292, "learning_rate": 5.604212610828571e-05, "loss": 0.2226, "step": 36924 }, { "epoch": 2.991331821127673, "grad_norm": 0.06926511228084564, "learning_rate": 5.6037625455691076e-05, "loss": 0.2281, "step": 36925 }, { "epoch": 2.991412832145172, "grad_norm": 0.08092325925827026, "learning_rate": 5.603312480309645e-05, "loss": 0.2247, "step": 36926 }, { "epoch": 2.99149384316267, "grad_norm": 0.0863441750407219, "learning_rate": 5.602862415050183e-05, "loss": 0.2467, "step": 36927 }, { "epoch": 2.9915748541801683, "grad_norm": 0.06858447194099426, "learning_rate": 5.60241234979072e-05, "loss": 0.2399, "step": 36928 }, { "epoch": 2.991655865197667, "grad_norm": 0.07151912152767181, "learning_rate": 5.601962284531257e-05, "loss": 0.2374, "step": 36929 }, { "epoch": 2.9917368762151653, "grad_norm": 0.07646768540143967, "learning_rate": 5.601512219271795e-05, "loss": 0.2306, "step": 36930 }, { "epoch": 2.9918178872326635, "grad_norm": 0.057797741144895554, "learning_rate": 5.601062154012332e-05, "loss": 0.2212, "step": 36931 }, { "epoch": 2.991898898250162, "grad_norm": 0.0864371731877327, "learning_rate": 5.600612088752869e-05, "loss": 0.2567, "step": 36932 }, { "epoch": 2.9919799092676604, "grad_norm": 0.07477493584156036, "learning_rate": 5.600162023493407e-05, "loss": 0.2252, "step": 36933 }, { "epoch": 2.9920609202851587, "grad_norm": 0.06860797107219696, "learning_rate": 5.599711958233944e-05, "loss": 0.2285, "step": 36934 }, { "epoch": 2.9921419313026574, "grad_norm": 0.06708861887454987, "learning_rate": 5.599261892974481e-05, "loss": 0.2718, "step": 36935 }, { "epoch": 2.9922229423201556, "grad_norm": 0.07017388194799423, "learning_rate": 5.598811827715019e-05, "loss": 0.2182, "step": 36936 }, { "epoch": 2.992303953337654, "grad_norm": 0.07404615730047226, "learning_rate": 5.598361762455556e-05, "loss": 0.2446, "step": 36937 }, { "epoch": 2.9923849643551526, "grad_norm": 0.0662136971950531, "learning_rate": 5.597911697196093e-05, "loss": 0.232, "step": 36938 }, { "epoch": 2.992465975372651, "grad_norm": 0.06485337018966675, "learning_rate": 5.5974616319366314e-05, "loss": 0.2074, "step": 36939 }, { "epoch": 2.992546986390149, "grad_norm": 0.0583493709564209, "learning_rate": 5.597011566677168e-05, "loss": 0.226, "step": 36940 }, { "epoch": 2.9926279974076473, "grad_norm": 0.0701061561703682, "learning_rate": 5.5965615014177054e-05, "loss": 0.218, "step": 36941 }, { "epoch": 2.992709008425146, "grad_norm": 0.06900807470083237, "learning_rate": 5.5961114361582434e-05, "loss": 0.232, "step": 36942 }, { "epoch": 2.9927900194426442, "grad_norm": 0.05951214209198952, "learning_rate": 5.59566137089878e-05, "loss": 0.2214, "step": 36943 }, { "epoch": 2.9928710304601425, "grad_norm": 0.07958030700683594, "learning_rate": 5.595211305639319e-05, "loss": 0.2196, "step": 36944 }, { "epoch": 2.9929520414776407, "grad_norm": 0.05673525854945183, "learning_rate": 5.5947612403798555e-05, "loss": 0.2276, "step": 36945 }, { "epoch": 2.9930330524951394, "grad_norm": 0.06486007571220398, "learning_rate": 5.594311175120392e-05, "loss": 0.2064, "step": 36946 }, { "epoch": 2.9931140635126376, "grad_norm": 0.07947579026222229, "learning_rate": 5.593861109860931e-05, "loss": 0.2512, "step": 36947 }, { "epoch": 2.993195074530136, "grad_norm": 0.059358954429626465, "learning_rate": 5.5934110446014676e-05, "loss": 0.2127, "step": 36948 }, { "epoch": 2.9932760855476346, "grad_norm": 0.09291760623455048, "learning_rate": 5.592960979342004e-05, "loss": 0.2713, "step": 36949 }, { "epoch": 2.993357096565133, "grad_norm": 0.0725402757525444, "learning_rate": 5.592510914082543e-05, "loss": 0.2276, "step": 36950 }, { "epoch": 2.993438107582631, "grad_norm": 0.0793076902627945, "learning_rate": 5.59206084882308e-05, "loss": 0.2456, "step": 36951 }, { "epoch": 2.9935191186001298, "grad_norm": 0.06947968155145645, "learning_rate": 5.5916107835636164e-05, "loss": 0.2153, "step": 36952 }, { "epoch": 2.993600129617628, "grad_norm": 0.07872812449932098, "learning_rate": 5.591160718304155e-05, "loss": 0.2489, "step": 36953 }, { "epoch": 2.9936811406351262, "grad_norm": 0.06870760023593903, "learning_rate": 5.590710653044692e-05, "loss": 0.2383, "step": 36954 }, { "epoch": 2.993762151652625, "grad_norm": 0.08993922919034958, "learning_rate": 5.5902605877852284e-05, "loss": 0.2144, "step": 36955 }, { "epoch": 2.993843162670123, "grad_norm": 0.08114828914403915, "learning_rate": 5.589810522525767e-05, "loss": 0.247, "step": 36956 }, { "epoch": 2.9939241736876214, "grad_norm": 0.05917239189147949, "learning_rate": 5.589360457266304e-05, "loss": 0.2236, "step": 36957 }, { "epoch": 2.99400518470512, "grad_norm": 0.06396820396184921, "learning_rate": 5.5889103920068405e-05, "loss": 0.2262, "step": 36958 }, { "epoch": 2.9940861957226184, "grad_norm": 0.10564498603343964, "learning_rate": 5.588460326747379e-05, "loss": 0.297, "step": 36959 }, { "epoch": 2.9941672067401166, "grad_norm": 0.07465486973524094, "learning_rate": 5.588010261487916e-05, "loss": 0.246, "step": 36960 }, { "epoch": 2.9942482177576153, "grad_norm": 0.06373563408851624, "learning_rate": 5.5875601962284526e-05, "loss": 0.2094, "step": 36961 }, { "epoch": 2.9943292287751135, "grad_norm": 0.06839507073163986, "learning_rate": 5.587110130968991e-05, "loss": 0.2364, "step": 36962 }, { "epoch": 2.994410239792612, "grad_norm": 0.06401334702968597, "learning_rate": 5.586660065709528e-05, "loss": 0.2258, "step": 36963 }, { "epoch": 2.99449125081011, "grad_norm": 0.06055555120110512, "learning_rate": 5.5862100004500654e-05, "loss": 0.244, "step": 36964 }, { "epoch": 2.9945722618276087, "grad_norm": 0.0696861669421196, "learning_rate": 5.5857599351906034e-05, "loss": 0.2492, "step": 36965 }, { "epoch": 2.994653272845107, "grad_norm": 0.08219528198242188, "learning_rate": 5.58530986993114e-05, "loss": 0.2569, "step": 36966 }, { "epoch": 2.994734283862605, "grad_norm": 0.0690721794962883, "learning_rate": 5.5848598046716774e-05, "loss": 0.2122, "step": 36967 }, { "epoch": 2.9948152948801035, "grad_norm": 0.05418196693062782, "learning_rate": 5.5844097394122155e-05, "loss": 0.2198, "step": 36968 }, { "epoch": 2.994896305897602, "grad_norm": 0.06869655102491379, "learning_rate": 5.583959674152752e-05, "loss": 0.2916, "step": 36969 }, { "epoch": 2.9949773169151004, "grad_norm": 0.07241586595773697, "learning_rate": 5.58350960889329e-05, "loss": 0.2491, "step": 36970 }, { "epoch": 2.9950583279325986, "grad_norm": 0.06834710389375687, "learning_rate": 5.5830595436338276e-05, "loss": 0.218, "step": 36971 }, { "epoch": 2.9951393389500973, "grad_norm": 0.06092188134789467, "learning_rate": 5.582609478374364e-05, "loss": 0.2526, "step": 36972 }, { "epoch": 2.9952203499675956, "grad_norm": 0.07484481483697891, "learning_rate": 5.582159413114902e-05, "loss": 0.2255, "step": 36973 }, { "epoch": 2.995301360985094, "grad_norm": 0.06314831227064133, "learning_rate": 5.5817093478554396e-05, "loss": 0.2404, "step": 36974 }, { "epoch": 2.9953823720025925, "grad_norm": 0.06152360513806343, "learning_rate": 5.581259282595976e-05, "loss": 0.2398, "step": 36975 }, { "epoch": 2.9954633830200907, "grad_norm": 0.05533519759774208, "learning_rate": 5.5808092173365144e-05, "loss": 0.2503, "step": 36976 }, { "epoch": 2.995544394037589, "grad_norm": 0.06785975396633148, "learning_rate": 5.580359152077052e-05, "loss": 0.2262, "step": 36977 }, { "epoch": 2.9956254050550877, "grad_norm": 0.08108928799629211, "learning_rate": 5.5799090868175884e-05, "loss": 0.2441, "step": 36978 }, { "epoch": 2.995706416072586, "grad_norm": 0.06509430706501007, "learning_rate": 5.5794590215581265e-05, "loss": 0.2306, "step": 36979 }, { "epoch": 2.995787427090084, "grad_norm": 0.07513480633497238, "learning_rate": 5.579008956298664e-05, "loss": 0.1986, "step": 36980 }, { "epoch": 2.995868438107583, "grad_norm": 0.05923466384410858, "learning_rate": 5.5785588910392005e-05, "loss": 0.2388, "step": 36981 }, { "epoch": 2.995949449125081, "grad_norm": 0.06244102865457535, "learning_rate": 5.5781088257797385e-05, "loss": 0.2288, "step": 36982 }, { "epoch": 2.9960304601425793, "grad_norm": 0.08009325712919235, "learning_rate": 5.577658760520276e-05, "loss": 0.3072, "step": 36983 }, { "epoch": 2.996111471160078, "grad_norm": 0.09328343719244003, "learning_rate": 5.5772086952608126e-05, "loss": 0.2637, "step": 36984 }, { "epoch": 2.9961924821775763, "grad_norm": 0.05990605056285858, "learning_rate": 5.5767586300013506e-05, "loss": 0.2468, "step": 36985 }, { "epoch": 2.9962734931950745, "grad_norm": 0.07123095542192459, "learning_rate": 5.576308564741888e-05, "loss": 0.2032, "step": 36986 }, { "epoch": 2.9963545042125728, "grad_norm": 0.06369782239198685, "learning_rate": 5.5758584994824247e-05, "loss": 0.1956, "step": 36987 }, { "epoch": 2.9964355152300715, "grad_norm": 0.0753772184252739, "learning_rate": 5.575408434222963e-05, "loss": 0.2192, "step": 36988 }, { "epoch": 2.9965165262475697, "grad_norm": 0.05591370910406113, "learning_rate": 5.5749583689635e-05, "loss": 0.2266, "step": 36989 }, { "epoch": 2.996597537265068, "grad_norm": 0.07075093686580658, "learning_rate": 5.574508303704037e-05, "loss": 0.254, "step": 36990 }, { "epoch": 2.996678548282566, "grad_norm": 0.06642140448093414, "learning_rate": 5.574058238444575e-05, "loss": 0.2854, "step": 36991 }, { "epoch": 2.996759559300065, "grad_norm": 0.07355593144893646, "learning_rate": 5.573608173185112e-05, "loss": 0.242, "step": 36992 }, { "epoch": 2.996840570317563, "grad_norm": 0.07146865874528885, "learning_rate": 5.573158107925649e-05, "loss": 0.2138, "step": 36993 }, { "epoch": 2.9969215813350614, "grad_norm": 0.06968890130519867, "learning_rate": 5.572708042666187e-05, "loss": 0.2465, "step": 36994 }, { "epoch": 2.99700259235256, "grad_norm": 0.09615656733512878, "learning_rate": 5.572257977406724e-05, "loss": 0.2272, "step": 36995 }, { "epoch": 2.9970836033700583, "grad_norm": 0.06006643921136856, "learning_rate": 5.571807912147262e-05, "loss": 0.182, "step": 36996 }, { "epoch": 2.9971646143875565, "grad_norm": 0.07367891818284988, "learning_rate": 5.571357846887799e-05, "loss": 0.2238, "step": 36997 }, { "epoch": 2.9972456254050552, "grad_norm": 0.08367826789617538, "learning_rate": 5.570907781628336e-05, "loss": 0.2268, "step": 36998 }, { "epoch": 2.9973266364225535, "grad_norm": 0.07553227990865707, "learning_rate": 5.570457716368874e-05, "loss": 0.2119, "step": 36999 }, { "epoch": 2.9974076474400517, "grad_norm": 0.08225884288549423, "learning_rate": 5.570007651109411e-05, "loss": 0.2727, "step": 37000 }, { "epoch": 2.9974886584575504, "grad_norm": 0.05904098227620125, "learning_rate": 5.5695575858499484e-05, "loss": 0.2552, "step": 37001 }, { "epoch": 2.9975696694750487, "grad_norm": 0.055857256054878235, "learning_rate": 5.5691075205904864e-05, "loss": 0.2265, "step": 37002 }, { "epoch": 2.997650680492547, "grad_norm": 0.060024164617061615, "learning_rate": 5.568657455331023e-05, "loss": 0.2122, "step": 37003 }, { "epoch": 2.9977316915100456, "grad_norm": 0.05858771875500679, "learning_rate": 5.5682073900715605e-05, "loss": 0.2138, "step": 37004 }, { "epoch": 2.997812702527544, "grad_norm": 0.0658746287226677, "learning_rate": 5.5677573248120985e-05, "loss": 0.2397, "step": 37005 }, { "epoch": 2.997893713545042, "grad_norm": 0.07731475681066513, "learning_rate": 5.567307259552635e-05, "loss": 0.2376, "step": 37006 }, { "epoch": 2.9979747245625408, "grad_norm": 0.06727912276983261, "learning_rate": 5.5668571942931725e-05, "loss": 0.2381, "step": 37007 }, { "epoch": 2.998055735580039, "grad_norm": 0.06332457065582275, "learning_rate": 5.5664071290337106e-05, "loss": 0.2109, "step": 37008 }, { "epoch": 2.9981367465975373, "grad_norm": 0.06446274369955063, "learning_rate": 5.565957063774247e-05, "loss": 0.2262, "step": 37009 }, { "epoch": 2.9982177576150355, "grad_norm": 0.06915741413831711, "learning_rate": 5.5655069985147846e-05, "loss": 0.234, "step": 37010 }, { "epoch": 2.9982987686325338, "grad_norm": 0.0659353956580162, "learning_rate": 5.5650569332553227e-05, "loss": 0.1997, "step": 37011 }, { "epoch": 2.9983797796500324, "grad_norm": 0.06665370613336563, "learning_rate": 5.5646068679958593e-05, "loss": 0.2327, "step": 37012 }, { "epoch": 2.9984607906675307, "grad_norm": 0.07406622171401978, "learning_rate": 5.564156802736397e-05, "loss": 0.2297, "step": 37013 }, { "epoch": 2.998541801685029, "grad_norm": 0.07928238064050674, "learning_rate": 5.563706737476935e-05, "loss": 0.2363, "step": 37014 }, { "epoch": 2.9986228127025276, "grad_norm": 0.07332681864500046, "learning_rate": 5.5632566722174714e-05, "loss": 0.2725, "step": 37015 }, { "epoch": 2.998703823720026, "grad_norm": 0.0755414143204689, "learning_rate": 5.562806606958009e-05, "loss": 0.233, "step": 37016 }, { "epoch": 2.998784834737524, "grad_norm": 0.07047983258962631, "learning_rate": 5.562356541698547e-05, "loss": 0.2551, "step": 37017 }, { "epoch": 2.998865845755023, "grad_norm": 0.07175135612487793, "learning_rate": 5.5619064764390835e-05, "loss": 0.2268, "step": 37018 }, { "epoch": 2.998946856772521, "grad_norm": 0.08001399040222168, "learning_rate": 5.561456411179621e-05, "loss": 0.241, "step": 37019 }, { "epoch": 2.9990278677900193, "grad_norm": 0.06803309172391891, "learning_rate": 5.561006345920159e-05, "loss": 0.2063, "step": 37020 }, { "epoch": 2.999108878807518, "grad_norm": 0.06360645592212677, "learning_rate": 5.5605562806606956e-05, "loss": 0.2126, "step": 37021 }, { "epoch": 2.999189889825016, "grad_norm": 0.06314657628536224, "learning_rate": 5.560106215401234e-05, "loss": 0.2278, "step": 37022 }, { "epoch": 2.9992709008425145, "grad_norm": 0.07989577949047089, "learning_rate": 5.559656150141771e-05, "loss": 0.2294, "step": 37023 }, { "epoch": 2.999351911860013, "grad_norm": 0.0917118638753891, "learning_rate": 5.559206084882308e-05, "loss": 0.2288, "step": 37024 }, { "epoch": 2.9994329228775114, "grad_norm": 0.06495703011751175, "learning_rate": 5.5587560196228464e-05, "loss": 0.2604, "step": 37025 }, { "epoch": 2.9995139338950096, "grad_norm": 0.06716945022344589, "learning_rate": 5.558305954363383e-05, "loss": 0.2511, "step": 37026 }, { "epoch": 2.9995949449125083, "grad_norm": 0.075443796813488, "learning_rate": 5.55785588910392e-05, "loss": 0.2178, "step": 37027 }, { "epoch": 2.9996759559300066, "grad_norm": 0.06924057006835938, "learning_rate": 5.5574058238444585e-05, "loss": 0.2539, "step": 37028 }, { "epoch": 2.999756966947505, "grad_norm": 0.06341889500617981, "learning_rate": 5.556955758584995e-05, "loss": 0.2463, "step": 37029 }, { "epoch": 2.9998379779650035, "grad_norm": 0.06503824144601822, "learning_rate": 5.5565056933255325e-05, "loss": 0.1933, "step": 37030 }, { "epoch": 2.9999189889825018, "grad_norm": 0.07555829733610153, "learning_rate": 5.5560556280660705e-05, "loss": 0.2049, "step": 37031 }, { "epoch": 3.0, "grad_norm": 0.0738195925951004, "learning_rate": 5.555605562806607e-05, "loss": 0.214, "step": 37032 }, { "epoch": 3.0000810110174982, "grad_norm": 0.0725194588303566, "learning_rate": 5.5551554975471446e-05, "loss": 0.2275, "step": 37033 }, { "epoch": 3.000162022034997, "grad_norm": 0.07739761471748352, "learning_rate": 5.5547054322876826e-05, "loss": 0.2549, "step": 37034 }, { "epoch": 3.000243033052495, "grad_norm": 0.08765597641468048, "learning_rate": 5.554255367028219e-05, "loss": 0.2413, "step": 37035 }, { "epoch": 3.0003240440699934, "grad_norm": 0.07010837644338608, "learning_rate": 5.553805301768757e-05, "loss": 0.2515, "step": 37036 }, { "epoch": 3.000405055087492, "grad_norm": 0.07616742700338364, "learning_rate": 5.553355236509295e-05, "loss": 0.2184, "step": 37037 }, { "epoch": 3.0004860661049904, "grad_norm": 0.07259862869977951, "learning_rate": 5.5529051712498314e-05, "loss": 0.225, "step": 37038 }, { "epoch": 3.0005670771224886, "grad_norm": 0.06548836827278137, "learning_rate": 5.552455105990369e-05, "loss": 0.2056, "step": 37039 }, { "epoch": 3.000648088139987, "grad_norm": 0.06693669408559799, "learning_rate": 5.552005040730907e-05, "loss": 0.2328, "step": 37040 }, { "epoch": 3.0007290991574855, "grad_norm": 0.09123533219099045, "learning_rate": 5.5515549754714435e-05, "loss": 0.2435, "step": 37041 }, { "epoch": 3.000810110174984, "grad_norm": 0.06889007240533829, "learning_rate": 5.551104910211981e-05, "loss": 0.2274, "step": 37042 }, { "epoch": 3.000891121192482, "grad_norm": 0.07103199511766434, "learning_rate": 5.550654844952519e-05, "loss": 0.2235, "step": 37043 }, { "epoch": 3.0009721322099807, "grad_norm": 0.08572027832269669, "learning_rate": 5.5502047796930556e-05, "loss": 0.29, "step": 37044 }, { "epoch": 3.001053143227479, "grad_norm": 0.0924052745103836, "learning_rate": 5.549754714433593e-05, "loss": 0.223, "step": 37045 }, { "epoch": 3.001134154244977, "grad_norm": 0.07948629558086395, "learning_rate": 5.549304649174131e-05, "loss": 0.2384, "step": 37046 }, { "epoch": 3.001215165262476, "grad_norm": 0.07083747535943985, "learning_rate": 5.5488545839146676e-05, "loss": 0.2236, "step": 37047 }, { "epoch": 3.001296176279974, "grad_norm": 0.06366025656461716, "learning_rate": 5.548404518655205e-05, "loss": 0.2383, "step": 37048 }, { "epoch": 3.0013771872974724, "grad_norm": 0.06225239485502243, "learning_rate": 5.547954453395743e-05, "loss": 0.1976, "step": 37049 }, { "epoch": 3.0014581983149706, "grad_norm": 0.06707856804132462, "learning_rate": 5.54750438813628e-05, "loss": 0.2156, "step": 37050 }, { "epoch": 3.0015392093324693, "grad_norm": 0.07274459302425385, "learning_rate": 5.547054322876818e-05, "loss": 0.2846, "step": 37051 }, { "epoch": 3.0016202203499676, "grad_norm": 0.060465868562459946, "learning_rate": 5.546604257617355e-05, "loss": 0.2079, "step": 37052 }, { "epoch": 3.001701231367466, "grad_norm": 0.061956025660037994, "learning_rate": 5.546154192357892e-05, "loss": 0.238, "step": 37053 }, { "epoch": 3.0017822423849645, "grad_norm": 0.07218556851148605, "learning_rate": 5.54570412709843e-05, "loss": 0.217, "step": 37054 }, { "epoch": 3.0018632534024627, "grad_norm": 0.06965664774179459, "learning_rate": 5.545254061838967e-05, "loss": 0.2006, "step": 37055 }, { "epoch": 3.001944264419961, "grad_norm": 0.07059884816408157, "learning_rate": 5.544803996579504e-05, "loss": 0.2552, "step": 37056 }, { "epoch": 3.0020252754374597, "grad_norm": 0.060033269226551056, "learning_rate": 5.544353931320042e-05, "loss": 0.2328, "step": 37057 }, { "epoch": 3.002106286454958, "grad_norm": 0.07267241179943085, "learning_rate": 5.543903866060579e-05, "loss": 0.221, "step": 37058 }, { "epoch": 3.002187297472456, "grad_norm": 0.059818148612976074, "learning_rate": 5.543453800801116e-05, "loss": 0.221, "step": 37059 }, { "epoch": 3.002268308489955, "grad_norm": 0.10267902165651321, "learning_rate": 5.543003735541654e-05, "loss": 0.2289, "step": 37060 }, { "epoch": 3.002349319507453, "grad_norm": 0.0698448047041893, "learning_rate": 5.5425536702821914e-05, "loss": 0.2109, "step": 37061 }, { "epoch": 3.0024303305249513, "grad_norm": 0.06195312365889549, "learning_rate": 5.542103605022728e-05, "loss": 0.2531, "step": 37062 }, { "epoch": 3.0025113415424496, "grad_norm": 0.06242690607905388, "learning_rate": 5.541653539763266e-05, "loss": 0.2113, "step": 37063 }, { "epoch": 3.0025923525599483, "grad_norm": 0.06666281819343567, "learning_rate": 5.5412034745038034e-05, "loss": 0.2209, "step": 37064 }, { "epoch": 3.0026733635774465, "grad_norm": 0.06624188274145126, "learning_rate": 5.54075340924434e-05, "loss": 0.2024, "step": 37065 }, { "epoch": 3.0027543745949448, "grad_norm": 0.07061390578746796, "learning_rate": 5.540303343984878e-05, "loss": 0.2311, "step": 37066 }, { "epoch": 3.0028353856124435, "grad_norm": 0.07270953059196472, "learning_rate": 5.5398532787254155e-05, "loss": 0.2471, "step": 37067 }, { "epoch": 3.0029163966299417, "grad_norm": 0.06502418220043182, "learning_rate": 5.539403213465952e-05, "loss": 0.2313, "step": 37068 }, { "epoch": 3.00299740764744, "grad_norm": 0.07650100439786911, "learning_rate": 5.53895314820649e-05, "loss": 0.2661, "step": 37069 }, { "epoch": 3.0030784186649386, "grad_norm": 0.07324282824993134, "learning_rate": 5.5385030829470276e-05, "loss": 0.2472, "step": 37070 }, { "epoch": 3.003159429682437, "grad_norm": 0.05792533606290817, "learning_rate": 5.538053017687564e-05, "loss": 0.209, "step": 37071 }, { "epoch": 3.003240440699935, "grad_norm": 0.07956384122371674, "learning_rate": 5.537602952428102e-05, "loss": 0.2226, "step": 37072 }, { "epoch": 3.0033214517174334, "grad_norm": 0.061989475041627884, "learning_rate": 5.53715288716864e-05, "loss": 0.2615, "step": 37073 }, { "epoch": 3.003402462734932, "grad_norm": 0.07994687557220459, "learning_rate": 5.5367028219091764e-05, "loss": 0.2498, "step": 37074 }, { "epoch": 3.0034834737524303, "grad_norm": 0.06498431414365768, "learning_rate": 5.5362527566497144e-05, "loss": 0.2343, "step": 37075 }, { "epoch": 3.0035644847699285, "grad_norm": 0.07870197296142578, "learning_rate": 5.535802691390252e-05, "loss": 0.2269, "step": 37076 }, { "epoch": 3.0036454957874272, "grad_norm": 0.07183849811553955, "learning_rate": 5.53535262613079e-05, "loss": 0.2438, "step": 37077 }, { "epoch": 3.0037265068049255, "grad_norm": 0.07815229892730713, "learning_rate": 5.5349025608713265e-05, "loss": 0.2122, "step": 37078 }, { "epoch": 3.0038075178224237, "grad_norm": 0.0587080642580986, "learning_rate": 5.534452495611864e-05, "loss": 0.211, "step": 37079 }, { "epoch": 3.0038885288399224, "grad_norm": 0.08774900436401367, "learning_rate": 5.534002430352402e-05, "loss": 0.2188, "step": 37080 }, { "epoch": 3.0039695398574207, "grad_norm": 0.08080202341079712, "learning_rate": 5.5335523650929386e-05, "loss": 0.2553, "step": 37081 }, { "epoch": 3.004050550874919, "grad_norm": 0.07951570302248001, "learning_rate": 5.533102299833476e-05, "loss": 0.2161, "step": 37082 }, { "epoch": 3.0041315618924176, "grad_norm": 0.07218746840953827, "learning_rate": 5.532652234574014e-05, "loss": 0.2065, "step": 37083 }, { "epoch": 3.004212572909916, "grad_norm": 0.07605035603046417, "learning_rate": 5.5322021693145506e-05, "loss": 0.2587, "step": 37084 }, { "epoch": 3.004293583927414, "grad_norm": 0.07501280307769775, "learning_rate": 5.531752104055088e-05, "loss": 0.2375, "step": 37085 }, { "epoch": 3.0043745949449123, "grad_norm": 0.07414011657238007, "learning_rate": 5.531302038795626e-05, "loss": 0.2246, "step": 37086 }, { "epoch": 3.004455605962411, "grad_norm": 0.06920185685157776, "learning_rate": 5.530851973536163e-05, "loss": 0.2588, "step": 37087 }, { "epoch": 3.0045366169799093, "grad_norm": 0.06351779401302338, "learning_rate": 5.5304019082767e-05, "loss": 0.2167, "step": 37088 }, { "epoch": 3.0046176279974075, "grad_norm": 0.058531858026981354, "learning_rate": 5.529951843017238e-05, "loss": 0.2324, "step": 37089 }, { "epoch": 3.004698639014906, "grad_norm": 0.06440838426351547, "learning_rate": 5.529501777757775e-05, "loss": 0.2204, "step": 37090 }, { "epoch": 3.0047796500324044, "grad_norm": 0.06930806487798691, "learning_rate": 5.529051712498312e-05, "loss": 0.1756, "step": 37091 }, { "epoch": 3.0048606610499027, "grad_norm": 0.08574075251817703, "learning_rate": 5.52860164723885e-05, "loss": 0.2421, "step": 37092 }, { "epoch": 3.0049416720674014, "grad_norm": 0.08673498034477234, "learning_rate": 5.528151581979387e-05, "loss": 0.2415, "step": 37093 }, { "epoch": 3.0050226830848996, "grad_norm": 0.06406184285879135, "learning_rate": 5.527701516719924e-05, "loss": 0.1944, "step": 37094 }, { "epoch": 3.005103694102398, "grad_norm": 0.0748908668756485, "learning_rate": 5.527251451460462e-05, "loss": 0.224, "step": 37095 }, { "epoch": 3.005184705119896, "grad_norm": 0.0906112864613533, "learning_rate": 5.526801386200999e-05, "loss": 0.2435, "step": 37096 }, { "epoch": 3.005265716137395, "grad_norm": 0.08011359721422195, "learning_rate": 5.526351320941536e-05, "loss": 0.2725, "step": 37097 }, { "epoch": 3.005346727154893, "grad_norm": 0.06861620396375656, "learning_rate": 5.5259012556820744e-05, "loss": 0.2229, "step": 37098 }, { "epoch": 3.0054277381723913, "grad_norm": 0.08145179599523544, "learning_rate": 5.525451190422612e-05, "loss": 0.2125, "step": 37099 }, { "epoch": 3.00550874918989, "grad_norm": 0.06678757071495056, "learning_rate": 5.5250011251631484e-05, "loss": 0.2285, "step": 37100 }, { "epoch": 3.005589760207388, "grad_norm": 0.08154644817113876, "learning_rate": 5.5245510599036864e-05, "loss": 0.2317, "step": 37101 }, { "epoch": 3.0056707712248865, "grad_norm": 0.06820086389780045, "learning_rate": 5.524100994644224e-05, "loss": 0.2131, "step": 37102 }, { "epoch": 3.005751782242385, "grad_norm": 0.07761559635400772, "learning_rate": 5.523650929384762e-05, "loss": 0.239, "step": 37103 }, { "epoch": 3.0058327932598834, "grad_norm": 0.0691225454211235, "learning_rate": 5.5232008641252985e-05, "loss": 0.2477, "step": 37104 }, { "epoch": 3.0059138042773816, "grad_norm": 0.09152821451425552, "learning_rate": 5.522750798865836e-05, "loss": 0.2406, "step": 37105 }, { "epoch": 3.00599481529488, "grad_norm": 0.08434759080410004, "learning_rate": 5.522300733606374e-05, "loss": 0.2814, "step": 37106 }, { "epoch": 3.0060758263123786, "grad_norm": 0.07852553576231003, "learning_rate": 5.5218506683469106e-05, "loss": 0.2628, "step": 37107 }, { "epoch": 3.006156837329877, "grad_norm": 0.06626973301172256, "learning_rate": 5.521400603087448e-05, "loss": 0.2117, "step": 37108 }, { "epoch": 3.006237848347375, "grad_norm": 0.06209743767976761, "learning_rate": 5.520950537827986e-05, "loss": 0.2654, "step": 37109 }, { "epoch": 3.0063188593648738, "grad_norm": 0.06478123366832733, "learning_rate": 5.520500472568523e-05, "loss": 0.2731, "step": 37110 }, { "epoch": 3.006399870382372, "grad_norm": 0.07236045598983765, "learning_rate": 5.52005040730906e-05, "loss": 0.2063, "step": 37111 }, { "epoch": 3.0064808813998702, "grad_norm": 0.06700768321752548, "learning_rate": 5.519600342049598e-05, "loss": 0.2363, "step": 37112 }, { "epoch": 3.006561892417369, "grad_norm": 0.06586731225252151, "learning_rate": 5.519150276790135e-05, "loss": 0.2576, "step": 37113 }, { "epoch": 3.006642903434867, "grad_norm": 0.062321074306964874, "learning_rate": 5.518700211530672e-05, "loss": 0.2245, "step": 37114 }, { "epoch": 3.0067239144523654, "grad_norm": 0.07160164415836334, "learning_rate": 5.51825014627121e-05, "loss": 0.2452, "step": 37115 }, { "epoch": 3.006804925469864, "grad_norm": 0.07295297086238861, "learning_rate": 5.517800081011747e-05, "loss": 0.2185, "step": 37116 }, { "epoch": 3.0068859364873624, "grad_norm": 0.06453075259923935, "learning_rate": 5.517350015752284e-05, "loss": 0.2221, "step": 37117 }, { "epoch": 3.0069669475048606, "grad_norm": 0.08894529938697815, "learning_rate": 5.516899950492822e-05, "loss": 0.2735, "step": 37118 }, { "epoch": 3.007047958522359, "grad_norm": 0.06164504587650299, "learning_rate": 5.516449885233359e-05, "loss": 0.2135, "step": 37119 }, { "epoch": 3.0071289695398575, "grad_norm": 0.08532165735960007, "learning_rate": 5.515999819973896e-05, "loss": 0.2352, "step": 37120 }, { "epoch": 3.0072099805573558, "grad_norm": 0.077989362180233, "learning_rate": 5.515549754714434e-05, "loss": 0.2249, "step": 37121 }, { "epoch": 3.007290991574854, "grad_norm": 0.06919614225625992, "learning_rate": 5.515099689454971e-05, "loss": 0.2618, "step": 37122 }, { "epoch": 3.0073720025923527, "grad_norm": 0.07316429167985916, "learning_rate": 5.5146496241955084e-05, "loss": 0.2389, "step": 37123 }, { "epoch": 3.007453013609851, "grad_norm": 0.0694352462887764, "learning_rate": 5.5141995589360464e-05, "loss": 0.2361, "step": 37124 }, { "epoch": 3.007534024627349, "grad_norm": 0.08120887726545334, "learning_rate": 5.513749493676583e-05, "loss": 0.2597, "step": 37125 }, { "epoch": 3.007615035644848, "grad_norm": 0.08180362731218338, "learning_rate": 5.5132994284171205e-05, "loss": 0.2737, "step": 37126 }, { "epoch": 3.007696046662346, "grad_norm": 0.07655584067106247, "learning_rate": 5.5128493631576585e-05, "loss": 0.247, "step": 37127 }, { "epoch": 3.0077770576798444, "grad_norm": 0.057747483253479004, "learning_rate": 5.512399297898195e-05, "loss": 0.2309, "step": 37128 }, { "epoch": 3.0078580686973426, "grad_norm": 0.05902184545993805, "learning_rate": 5.511949232638733e-05, "loss": 0.1943, "step": 37129 }, { "epoch": 3.0079390797148413, "grad_norm": 0.06686657667160034, "learning_rate": 5.5114991673792706e-05, "loss": 0.2331, "step": 37130 }, { "epoch": 3.0080200907323396, "grad_norm": 0.06984364986419678, "learning_rate": 5.511049102119807e-05, "loss": 0.2597, "step": 37131 }, { "epoch": 3.008101101749838, "grad_norm": 0.05688484013080597, "learning_rate": 5.510599036860345e-05, "loss": 0.2591, "step": 37132 }, { "epoch": 3.0081821127673365, "grad_norm": 0.07238908112049103, "learning_rate": 5.5101489716008827e-05, "loss": 0.2553, "step": 37133 }, { "epoch": 3.0082631237848347, "grad_norm": 0.06852646172046661, "learning_rate": 5.509698906341419e-05, "loss": 0.2341, "step": 37134 }, { "epoch": 3.008344134802333, "grad_norm": 0.07024137675762177, "learning_rate": 5.5092488410819574e-05, "loss": 0.224, "step": 37135 }, { "epoch": 3.0084251458198317, "grad_norm": 0.07092615962028503, "learning_rate": 5.508798775822495e-05, "loss": 0.2485, "step": 37136 }, { "epoch": 3.00850615683733, "grad_norm": 0.05837797746062279, "learning_rate": 5.5083487105630314e-05, "loss": 0.184, "step": 37137 }, { "epoch": 3.008587167854828, "grad_norm": 0.06182774528861046, "learning_rate": 5.5078986453035695e-05, "loss": 0.2218, "step": 37138 }, { "epoch": 3.008668178872327, "grad_norm": 0.0963364765048027, "learning_rate": 5.507448580044107e-05, "loss": 0.2624, "step": 37139 }, { "epoch": 3.008749189889825, "grad_norm": 0.05469222739338875, "learning_rate": 5.5069985147846435e-05, "loss": 0.2098, "step": 37140 }, { "epoch": 3.0088302009073233, "grad_norm": 0.08195900917053223, "learning_rate": 5.5065484495251815e-05, "loss": 0.2398, "step": 37141 }, { "epoch": 3.0089112119248216, "grad_norm": 0.06950972229242325, "learning_rate": 5.506098384265719e-05, "loss": 0.2796, "step": 37142 }, { "epoch": 3.0089922229423203, "grad_norm": 0.07501380145549774, "learning_rate": 5.5056483190062556e-05, "loss": 0.2564, "step": 37143 }, { "epoch": 3.0090732339598185, "grad_norm": 0.06771993637084961, "learning_rate": 5.5051982537467936e-05, "loss": 0.2088, "step": 37144 }, { "epoch": 3.0091542449773168, "grad_norm": 0.06669183075428009, "learning_rate": 5.504748188487331e-05, "loss": 0.1927, "step": 37145 }, { "epoch": 3.0092352559948155, "grad_norm": 0.08785217255353928, "learning_rate": 5.5042981232278677e-05, "loss": 0.2287, "step": 37146 }, { "epoch": 3.0093162670123137, "grad_norm": 0.0853303074836731, "learning_rate": 5.503848057968406e-05, "loss": 0.1907, "step": 37147 }, { "epoch": 3.009397278029812, "grad_norm": 0.0789603441953659, "learning_rate": 5.503397992708943e-05, "loss": 0.1991, "step": 37148 }, { "epoch": 3.0094782890473106, "grad_norm": 0.06373845040798187, "learning_rate": 5.50294792744948e-05, "loss": 0.2359, "step": 37149 }, { "epoch": 3.009559300064809, "grad_norm": 0.0680304542183876, "learning_rate": 5.502497862190018e-05, "loss": 0.1963, "step": 37150 }, { "epoch": 3.009640311082307, "grad_norm": 0.05651449039578438, "learning_rate": 5.502047796930555e-05, "loss": 0.1977, "step": 37151 }, { "epoch": 3.0097213220998054, "grad_norm": 0.10431863367557526, "learning_rate": 5.501597731671092e-05, "loss": 0.2325, "step": 37152 }, { "epoch": 3.009802333117304, "grad_norm": 0.07406028360128403, "learning_rate": 5.50114766641163e-05, "loss": 0.2395, "step": 37153 }, { "epoch": 3.0098833441348023, "grad_norm": 0.06386356800794601, "learning_rate": 5.500697601152167e-05, "loss": 0.2155, "step": 37154 }, { "epoch": 3.0099643551523005, "grad_norm": 0.10542084276676178, "learning_rate": 5.500247535892705e-05, "loss": 0.2085, "step": 37155 }, { "epoch": 3.0100453661697992, "grad_norm": 0.0667385384440422, "learning_rate": 5.499797470633242e-05, "loss": 0.2158, "step": 37156 }, { "epoch": 3.0101263771872975, "grad_norm": 0.07736918330192566, "learning_rate": 5.499347405373779e-05, "loss": 0.2182, "step": 37157 }, { "epoch": 3.0102073882047957, "grad_norm": 0.06546894460916519, "learning_rate": 5.498897340114317e-05, "loss": 0.2375, "step": 37158 }, { "epoch": 3.0102883992222944, "grad_norm": 0.08651191741228104, "learning_rate": 5.498447274854854e-05, "loss": 0.2414, "step": 37159 }, { "epoch": 3.0103694102397927, "grad_norm": 0.08241085708141327, "learning_rate": 5.4979972095953914e-05, "loss": 0.2608, "step": 37160 }, { "epoch": 3.010450421257291, "grad_norm": 0.07510359585285187, "learning_rate": 5.4975471443359294e-05, "loss": 0.222, "step": 37161 }, { "epoch": 3.0105314322747896, "grad_norm": 0.07132948935031891, "learning_rate": 5.497097079076466e-05, "loss": 0.2024, "step": 37162 }, { "epoch": 3.010612443292288, "grad_norm": 0.061131563037633896, "learning_rate": 5.4966470138170035e-05, "loss": 0.2456, "step": 37163 }, { "epoch": 3.010693454309786, "grad_norm": 0.057700663805007935, "learning_rate": 5.4961969485575415e-05, "loss": 0.2281, "step": 37164 }, { "epoch": 3.0107744653272843, "grad_norm": 0.06116776168346405, "learning_rate": 5.495746883298078e-05, "loss": 0.2004, "step": 37165 }, { "epoch": 3.010855476344783, "grad_norm": 0.07130347192287445, "learning_rate": 5.4952968180386155e-05, "loss": 0.2461, "step": 37166 }, { "epoch": 3.0109364873622813, "grad_norm": 0.05239642783999443, "learning_rate": 5.4948467527791536e-05, "loss": 0.1924, "step": 37167 }, { "epoch": 3.0110174983797795, "grad_norm": 0.07538292557001114, "learning_rate": 5.494396687519691e-05, "loss": 0.2068, "step": 37168 }, { "epoch": 3.011098509397278, "grad_norm": 0.07418717443943024, "learning_rate": 5.4939466222602276e-05, "loss": 0.2516, "step": 37169 }, { "epoch": 3.0111795204147764, "grad_norm": 0.07645675539970398, "learning_rate": 5.493496557000766e-05, "loss": 0.2525, "step": 37170 }, { "epoch": 3.0112605314322747, "grad_norm": 0.08645131438970566, "learning_rate": 5.493046491741303e-05, "loss": 0.2413, "step": 37171 }, { "epoch": 3.0113415424497734, "grad_norm": 0.06277317553758621, "learning_rate": 5.49259642648184e-05, "loss": 0.208, "step": 37172 }, { "epoch": 3.0114225534672716, "grad_norm": 0.07197176665067673, "learning_rate": 5.492146361222378e-05, "loss": 0.2284, "step": 37173 }, { "epoch": 3.01150356448477, "grad_norm": 0.07234460115432739, "learning_rate": 5.491696295962915e-05, "loss": 0.2438, "step": 37174 }, { "epoch": 3.011584575502268, "grad_norm": 0.07466896623373032, "learning_rate": 5.491246230703452e-05, "loss": 0.2424, "step": 37175 }, { "epoch": 3.011665586519767, "grad_norm": 0.07664573192596436, "learning_rate": 5.49079616544399e-05, "loss": 0.1929, "step": 37176 }, { "epoch": 3.011746597537265, "grad_norm": 0.09230896830558777, "learning_rate": 5.490346100184527e-05, "loss": 0.2293, "step": 37177 }, { "epoch": 3.0118276085547633, "grad_norm": 0.06680130958557129, "learning_rate": 5.489896034925064e-05, "loss": 0.2365, "step": 37178 }, { "epoch": 3.011908619572262, "grad_norm": 0.06987342238426208, "learning_rate": 5.489445969665602e-05, "loss": 0.2413, "step": 37179 }, { "epoch": 3.01198963058976, "grad_norm": 0.06937224417924881, "learning_rate": 5.488995904406139e-05, "loss": 0.2374, "step": 37180 }, { "epoch": 3.0120706416072585, "grad_norm": 0.059260692447423935, "learning_rate": 5.488545839146677e-05, "loss": 0.2195, "step": 37181 }, { "epoch": 3.012151652624757, "grad_norm": 0.11258818954229355, "learning_rate": 5.488095773887214e-05, "loss": 0.241, "step": 37182 }, { "epoch": 3.0122326636422554, "grad_norm": 0.0633421391248703, "learning_rate": 5.4876457086277513e-05, "loss": 0.2432, "step": 37183 }, { "epoch": 3.0123136746597536, "grad_norm": 0.07214123755693436, "learning_rate": 5.4871956433682894e-05, "loss": 0.2305, "step": 37184 }, { "epoch": 3.0123946856772523, "grad_norm": 0.07456515729427338, "learning_rate": 5.486745578108826e-05, "loss": 0.238, "step": 37185 }, { "epoch": 3.0124756966947506, "grad_norm": 0.05460673198103905, "learning_rate": 5.4862955128493634e-05, "loss": 0.1989, "step": 37186 }, { "epoch": 3.012556707712249, "grad_norm": 0.062048815190792084, "learning_rate": 5.4858454475899015e-05, "loss": 0.2333, "step": 37187 }, { "epoch": 3.012637718729747, "grad_norm": 0.07398334890604019, "learning_rate": 5.485395382330438e-05, "loss": 0.2463, "step": 37188 }, { "epoch": 3.0127187297472457, "grad_norm": 0.07296764850616455, "learning_rate": 5.4849453170709755e-05, "loss": 0.2321, "step": 37189 }, { "epoch": 3.012799740764744, "grad_norm": 0.07078998535871506, "learning_rate": 5.4844952518115135e-05, "loss": 0.244, "step": 37190 }, { "epoch": 3.0128807517822422, "grad_norm": 0.07269686460494995, "learning_rate": 5.48404518655205e-05, "loss": 0.1921, "step": 37191 }, { "epoch": 3.012961762799741, "grad_norm": 0.07033134996891022, "learning_rate": 5.4835951212925876e-05, "loss": 0.2418, "step": 37192 }, { "epoch": 3.013042773817239, "grad_norm": 0.08645029366016388, "learning_rate": 5.4831450560331256e-05, "loss": 0.2314, "step": 37193 }, { "epoch": 3.0131237848347374, "grad_norm": 0.06531119346618652, "learning_rate": 5.482694990773662e-05, "loss": 0.2065, "step": 37194 }, { "epoch": 3.013204795852236, "grad_norm": 0.07155883312225342, "learning_rate": 5.4822449255142e-05, "loss": 0.235, "step": 37195 }, { "epoch": 3.0132858068697344, "grad_norm": 0.06503793597221375, "learning_rate": 5.481794860254738e-05, "loss": 0.2261, "step": 37196 }, { "epoch": 3.0133668178872326, "grad_norm": 0.07013113796710968, "learning_rate": 5.4813447949952744e-05, "loss": 0.2167, "step": 37197 }, { "epoch": 3.013447828904731, "grad_norm": 0.08574307709932327, "learning_rate": 5.480894729735812e-05, "loss": 0.2266, "step": 37198 }, { "epoch": 3.0135288399222295, "grad_norm": 0.07470546662807465, "learning_rate": 5.48044466447635e-05, "loss": 0.2973, "step": 37199 }, { "epoch": 3.0136098509397278, "grad_norm": 0.08057143539190292, "learning_rate": 5.4799945992168865e-05, "loss": 0.2368, "step": 37200 }, { "epoch": 3.013690861957226, "grad_norm": 0.07528182119131088, "learning_rate": 5.479544533957424e-05, "loss": 0.2354, "step": 37201 }, { "epoch": 3.0137718729747247, "grad_norm": 0.07321956008672714, "learning_rate": 5.479094468697962e-05, "loss": 0.2205, "step": 37202 }, { "epoch": 3.013852883992223, "grad_norm": 0.07638127356767654, "learning_rate": 5.4786444034384986e-05, "loss": 0.2284, "step": 37203 }, { "epoch": 3.013933895009721, "grad_norm": 0.06527112424373627, "learning_rate": 5.478194338179036e-05, "loss": 0.2554, "step": 37204 }, { "epoch": 3.01401490602722, "grad_norm": 0.07681337743997574, "learning_rate": 5.477744272919574e-05, "loss": 0.2288, "step": 37205 }, { "epoch": 3.014095917044718, "grad_norm": 0.06282360106706619, "learning_rate": 5.4772942076601106e-05, "loss": 0.217, "step": 37206 }, { "epoch": 3.0141769280622164, "grad_norm": 0.06141400337219238, "learning_rate": 5.476844142400648e-05, "loss": 0.2678, "step": 37207 }, { "epoch": 3.0142579390797146, "grad_norm": 0.06095695495605469, "learning_rate": 5.476394077141186e-05, "loss": 0.2428, "step": 37208 }, { "epoch": 3.0143389500972133, "grad_norm": 0.062181975692510605, "learning_rate": 5.475944011881723e-05, "loss": 0.2381, "step": 37209 }, { "epoch": 3.0144199611147116, "grad_norm": 0.06874234229326248, "learning_rate": 5.475493946622261e-05, "loss": 0.2473, "step": 37210 }, { "epoch": 3.01450097213221, "grad_norm": 0.0715310275554657, "learning_rate": 5.475043881362798e-05, "loss": 0.229, "step": 37211 }, { "epoch": 3.0145819831497085, "grad_norm": 0.06931551545858383, "learning_rate": 5.474593816103335e-05, "loss": 0.2037, "step": 37212 }, { "epoch": 3.0146629941672067, "grad_norm": 0.06569510698318481, "learning_rate": 5.474143750843873e-05, "loss": 0.2061, "step": 37213 }, { "epoch": 3.014744005184705, "grad_norm": 0.060928985476493835, "learning_rate": 5.47369368558441e-05, "loss": 0.2004, "step": 37214 }, { "epoch": 3.0148250162022037, "grad_norm": 0.06559912115335464, "learning_rate": 5.473243620324947e-05, "loss": 0.2478, "step": 37215 }, { "epoch": 3.014906027219702, "grad_norm": 0.09447550028562546, "learning_rate": 5.472793555065485e-05, "loss": 0.2177, "step": 37216 }, { "epoch": 3.0149870382372, "grad_norm": 0.07177255302667618, "learning_rate": 5.472343489806022e-05, "loss": 0.2568, "step": 37217 }, { "epoch": 3.015068049254699, "grad_norm": 0.07061301916837692, "learning_rate": 5.471893424546559e-05, "loss": 0.25, "step": 37218 }, { "epoch": 3.015149060272197, "grad_norm": 0.055768292397260666, "learning_rate": 5.471443359287097e-05, "loss": 0.2318, "step": 37219 }, { "epoch": 3.0152300712896953, "grad_norm": 0.06401805579662323, "learning_rate": 5.4709932940276344e-05, "loss": 0.1784, "step": 37220 }, { "epoch": 3.0153110823071936, "grad_norm": 0.07539704442024231, "learning_rate": 5.470543228768171e-05, "loss": 0.2364, "step": 37221 }, { "epoch": 3.0153920933246923, "grad_norm": 0.06201757490634918, "learning_rate": 5.470093163508709e-05, "loss": 0.2009, "step": 37222 }, { "epoch": 3.0154731043421905, "grad_norm": 0.06623942404985428, "learning_rate": 5.4696430982492464e-05, "loss": 0.2436, "step": 37223 }, { "epoch": 3.0155541153596888, "grad_norm": 0.07048413157463074, "learning_rate": 5.469193032989783e-05, "loss": 0.2292, "step": 37224 }, { "epoch": 3.0156351263771874, "grad_norm": 0.06607518345117569, "learning_rate": 5.468742967730321e-05, "loss": 0.2366, "step": 37225 }, { "epoch": 3.0157161373946857, "grad_norm": 0.07342692464590073, "learning_rate": 5.4682929024708585e-05, "loss": 0.1976, "step": 37226 }, { "epoch": 3.015797148412184, "grad_norm": 0.0792352706193924, "learning_rate": 5.467842837211395e-05, "loss": 0.2558, "step": 37227 }, { "epoch": 3.0158781594296826, "grad_norm": 0.06499131768941879, "learning_rate": 5.467392771951933e-05, "loss": 0.1834, "step": 37228 }, { "epoch": 3.015959170447181, "grad_norm": 0.06605541706085205, "learning_rate": 5.4669427066924706e-05, "loss": 0.2081, "step": 37229 }, { "epoch": 3.016040181464679, "grad_norm": 0.076319120824337, "learning_rate": 5.466492641433007e-05, "loss": 0.2378, "step": 37230 }, { "epoch": 3.0161211924821774, "grad_norm": 0.0853307768702507, "learning_rate": 5.466042576173545e-05, "loss": 0.2144, "step": 37231 }, { "epoch": 3.016202203499676, "grad_norm": 0.05636778101325035, "learning_rate": 5.465592510914083e-05, "loss": 0.2152, "step": 37232 }, { "epoch": 3.0162832145171743, "grad_norm": 0.07191046327352524, "learning_rate": 5.4651424456546194e-05, "loss": 0.2203, "step": 37233 }, { "epoch": 3.0163642255346725, "grad_norm": 0.06822015345096588, "learning_rate": 5.464692380395158e-05, "loss": 0.2697, "step": 37234 }, { "epoch": 3.0164452365521712, "grad_norm": 0.06572640687227249, "learning_rate": 5.464242315135695e-05, "loss": 0.2328, "step": 37235 }, { "epoch": 3.0165262475696695, "grad_norm": 0.06790418177843094, "learning_rate": 5.463792249876233e-05, "loss": 0.2499, "step": 37236 }, { "epoch": 3.0166072585871677, "grad_norm": 0.07083853334188461, "learning_rate": 5.46334218461677e-05, "loss": 0.1971, "step": 37237 }, { "epoch": 3.0166882696046664, "grad_norm": 0.0681314542889595, "learning_rate": 5.462892119357307e-05, "loss": 0.2352, "step": 37238 }, { "epoch": 3.0167692806221647, "grad_norm": 0.059616200625896454, "learning_rate": 5.462442054097845e-05, "loss": 0.2286, "step": 37239 }, { "epoch": 3.016850291639663, "grad_norm": 0.06560337543487549, "learning_rate": 5.461991988838382e-05, "loss": 0.2481, "step": 37240 }, { "epoch": 3.0169313026571616, "grad_norm": 0.06413570791482925, "learning_rate": 5.461541923578919e-05, "loss": 0.2271, "step": 37241 }, { "epoch": 3.01701231367466, "grad_norm": 0.07081443071365356, "learning_rate": 5.461091858319457e-05, "loss": 0.2156, "step": 37242 }, { "epoch": 3.017093324692158, "grad_norm": 0.07580958306789398, "learning_rate": 5.460641793059994e-05, "loss": 0.2512, "step": 37243 }, { "epoch": 3.0171743357096563, "grad_norm": 0.07665203511714935, "learning_rate": 5.460191727800531e-05, "loss": 0.2064, "step": 37244 }, { "epoch": 3.017255346727155, "grad_norm": 0.0836213082075119, "learning_rate": 5.459741662541069e-05, "loss": 0.2442, "step": 37245 }, { "epoch": 3.0173363577446533, "grad_norm": 0.07863006740808487, "learning_rate": 5.4592915972816064e-05, "loss": 0.2178, "step": 37246 }, { "epoch": 3.0174173687621515, "grad_norm": 0.06348031014204025, "learning_rate": 5.458841532022143e-05, "loss": 0.1979, "step": 37247 }, { "epoch": 3.01749837977965, "grad_norm": 0.0701642706990242, "learning_rate": 5.458391466762681e-05, "loss": 0.2428, "step": 37248 }, { "epoch": 3.0175793907971484, "grad_norm": 0.0907692015171051, "learning_rate": 5.4579414015032185e-05, "loss": 0.2313, "step": 37249 }, { "epoch": 3.0176604018146467, "grad_norm": 0.07722572237253189, "learning_rate": 5.457491336243755e-05, "loss": 0.2181, "step": 37250 }, { "epoch": 3.0177414128321454, "grad_norm": 0.07011134922504425, "learning_rate": 5.457041270984293e-05, "loss": 0.2379, "step": 37251 }, { "epoch": 3.0178224238496436, "grad_norm": 0.07664912939071655, "learning_rate": 5.4565912057248306e-05, "loss": 0.2393, "step": 37252 }, { "epoch": 3.017903434867142, "grad_norm": 0.07362043857574463, "learning_rate": 5.456141140465367e-05, "loss": 0.2173, "step": 37253 }, { "epoch": 3.01798444588464, "grad_norm": 0.07834141701459885, "learning_rate": 5.455691075205905e-05, "loss": 0.2493, "step": 37254 }, { "epoch": 3.018065456902139, "grad_norm": 0.06812326610088348, "learning_rate": 5.4552410099464426e-05, "loss": 0.2226, "step": 37255 }, { "epoch": 3.018146467919637, "grad_norm": 0.06823229044675827, "learning_rate": 5.454790944686979e-05, "loss": 0.206, "step": 37256 }, { "epoch": 3.0182274789371353, "grad_norm": 0.09068725258111954, "learning_rate": 5.4543408794275174e-05, "loss": 0.2219, "step": 37257 }, { "epoch": 3.018308489954634, "grad_norm": 0.0642944872379303, "learning_rate": 5.453890814168055e-05, "loss": 0.2413, "step": 37258 }, { "epoch": 3.018389500972132, "grad_norm": 0.07336447387933731, "learning_rate": 5.4534407489085914e-05, "loss": 0.2409, "step": 37259 }, { "epoch": 3.0184705119896305, "grad_norm": 0.07347241044044495, "learning_rate": 5.4529906836491294e-05, "loss": 0.2229, "step": 37260 }, { "epoch": 3.018551523007129, "grad_norm": 0.07632424682378769, "learning_rate": 5.452540618389667e-05, "loss": 0.2265, "step": 37261 }, { "epoch": 3.0186325340246274, "grad_norm": 0.0791018158197403, "learning_rate": 5.452090553130205e-05, "loss": 0.211, "step": 37262 }, { "epoch": 3.0187135450421256, "grad_norm": 0.0772065594792366, "learning_rate": 5.4516404878707415e-05, "loss": 0.2076, "step": 37263 }, { "epoch": 3.0187945560596243, "grad_norm": 0.06899842619895935, "learning_rate": 5.451190422611279e-05, "loss": 0.2531, "step": 37264 }, { "epoch": 3.0188755670771226, "grad_norm": 0.08461082726716995, "learning_rate": 5.450740357351817e-05, "loss": 0.3056, "step": 37265 }, { "epoch": 3.018956578094621, "grad_norm": 0.07619468867778778, "learning_rate": 5.4502902920923536e-05, "loss": 0.2356, "step": 37266 }, { "epoch": 3.019037589112119, "grad_norm": 0.08498235791921616, "learning_rate": 5.449840226832891e-05, "loss": 0.2215, "step": 37267 }, { "epoch": 3.0191186001296177, "grad_norm": 0.06508364528417587, "learning_rate": 5.449390161573429e-05, "loss": 0.2219, "step": 37268 }, { "epoch": 3.019199611147116, "grad_norm": 0.060360874980688095, "learning_rate": 5.448940096313966e-05, "loss": 0.2633, "step": 37269 }, { "epoch": 3.0192806221646142, "grad_norm": 0.07176513224840164, "learning_rate": 5.448490031054503e-05, "loss": 0.218, "step": 37270 }, { "epoch": 3.019361633182113, "grad_norm": 0.07150030881166458, "learning_rate": 5.448039965795041e-05, "loss": 0.2151, "step": 37271 }, { "epoch": 3.019442644199611, "grad_norm": 0.07552962005138397, "learning_rate": 5.447589900535578e-05, "loss": 0.2398, "step": 37272 }, { "epoch": 3.0195236552171094, "grad_norm": 0.09413708001375198, "learning_rate": 5.447139835276115e-05, "loss": 0.2635, "step": 37273 }, { "epoch": 3.019604666234608, "grad_norm": 0.07900536805391312, "learning_rate": 5.446689770016653e-05, "loss": 0.2267, "step": 37274 }, { "epoch": 3.0196856772521063, "grad_norm": 0.07706227153539658, "learning_rate": 5.44623970475719e-05, "loss": 0.2637, "step": 37275 }, { "epoch": 3.0197666882696046, "grad_norm": 0.06675276160240173, "learning_rate": 5.445789639497727e-05, "loss": 0.2128, "step": 37276 }, { "epoch": 3.019847699287103, "grad_norm": 0.0579092912375927, "learning_rate": 5.445339574238265e-05, "loss": 0.2021, "step": 37277 }, { "epoch": 3.0199287103046015, "grad_norm": 0.0754135251045227, "learning_rate": 5.444889508978802e-05, "loss": 0.2551, "step": 37278 }, { "epoch": 3.0200097213220998, "grad_norm": 0.07271932810544968, "learning_rate": 5.444439443719339e-05, "loss": 0.2093, "step": 37279 }, { "epoch": 3.020090732339598, "grad_norm": 0.07659343630075455, "learning_rate": 5.443989378459877e-05, "loss": 0.2459, "step": 37280 }, { "epoch": 3.0201717433570967, "grad_norm": 0.06897158175706863, "learning_rate": 5.443539313200414e-05, "loss": 0.2466, "step": 37281 }, { "epoch": 3.020252754374595, "grad_norm": 0.06454170495271683, "learning_rate": 5.4430892479409514e-05, "loss": 0.2076, "step": 37282 }, { "epoch": 3.020333765392093, "grad_norm": 0.07755886763334274, "learning_rate": 5.4426391826814894e-05, "loss": 0.2343, "step": 37283 }, { "epoch": 3.020414776409592, "grad_norm": 0.07092835009098053, "learning_rate": 5.442189117422026e-05, "loss": 0.2077, "step": 37284 }, { "epoch": 3.02049578742709, "grad_norm": 0.0711079090833664, "learning_rate": 5.4417390521625635e-05, "loss": 0.2171, "step": 37285 }, { "epoch": 3.0205767984445884, "grad_norm": 0.06403342634439468, "learning_rate": 5.4412889869031015e-05, "loss": 0.2404, "step": 37286 }, { "epoch": 3.020657809462087, "grad_norm": 0.07245420664548874, "learning_rate": 5.440838921643638e-05, "loss": 0.222, "step": 37287 }, { "epoch": 3.0207388204795853, "grad_norm": 0.09491761773824692, "learning_rate": 5.440388856384176e-05, "loss": 0.2192, "step": 37288 }, { "epoch": 3.0208198314970836, "grad_norm": 0.06958624720573425, "learning_rate": 5.4399387911247136e-05, "loss": 0.2372, "step": 37289 }, { "epoch": 3.020900842514582, "grad_norm": 0.07254919409751892, "learning_rate": 5.43948872586525e-05, "loss": 0.2008, "step": 37290 }, { "epoch": 3.0209818535320805, "grad_norm": 0.06609213352203369, "learning_rate": 5.439038660605788e-05, "loss": 0.2431, "step": 37291 }, { "epoch": 3.0210628645495787, "grad_norm": 0.07412755489349365, "learning_rate": 5.4385885953463257e-05, "loss": 0.2478, "step": 37292 }, { "epoch": 3.021143875567077, "grad_norm": 0.04594634473323822, "learning_rate": 5.438138530086862e-05, "loss": 0.2057, "step": 37293 }, { "epoch": 3.0212248865845757, "grad_norm": 0.09090680629014969, "learning_rate": 5.4376884648274004e-05, "loss": 0.252, "step": 37294 }, { "epoch": 3.021305897602074, "grad_norm": 0.08909545093774796, "learning_rate": 5.437238399567938e-05, "loss": 0.2878, "step": 37295 }, { "epoch": 3.021386908619572, "grad_norm": 0.07122080773115158, "learning_rate": 5.4367883343084744e-05, "loss": 0.2181, "step": 37296 }, { "epoch": 3.021467919637071, "grad_norm": 0.07232771068811417, "learning_rate": 5.4363382690490125e-05, "loss": 0.2455, "step": 37297 }, { "epoch": 3.021548930654569, "grad_norm": 0.07290542870759964, "learning_rate": 5.43588820378955e-05, "loss": 0.2413, "step": 37298 }, { "epoch": 3.0216299416720673, "grad_norm": 0.08508367091417313, "learning_rate": 5.4354381385300865e-05, "loss": 0.1985, "step": 37299 }, { "epoch": 3.0217109526895656, "grad_norm": 0.06248488649725914, "learning_rate": 5.4349880732706245e-05, "loss": 0.2598, "step": 37300 }, { "epoch": 3.0217919637070643, "grad_norm": 0.09384345263242722, "learning_rate": 5.434538008011162e-05, "loss": 0.2532, "step": 37301 }, { "epoch": 3.0218729747245625, "grad_norm": 0.07828628271818161, "learning_rate": 5.4340879427516986e-05, "loss": 0.253, "step": 37302 }, { "epoch": 3.0219539857420608, "grad_norm": 0.06680522859096527, "learning_rate": 5.433637877492237e-05, "loss": 0.2128, "step": 37303 }, { "epoch": 3.0220349967595594, "grad_norm": 0.07760295271873474, "learning_rate": 5.433187812232774e-05, "loss": 0.2144, "step": 37304 }, { "epoch": 3.0221160077770577, "grad_norm": 0.06053243577480316, "learning_rate": 5.432737746973311e-05, "loss": 0.2238, "step": 37305 }, { "epoch": 3.022197018794556, "grad_norm": 0.07531262189149857, "learning_rate": 5.4322876817138494e-05, "loss": 0.2447, "step": 37306 }, { "epoch": 3.0222780298120546, "grad_norm": 0.06485378742218018, "learning_rate": 5.431837616454386e-05, "loss": 0.2276, "step": 37307 }, { "epoch": 3.022359040829553, "grad_norm": 0.060371000319719315, "learning_rate": 5.431387551194923e-05, "loss": 0.1987, "step": 37308 }, { "epoch": 3.022440051847051, "grad_norm": 0.07953300327062607, "learning_rate": 5.4309374859354615e-05, "loss": 0.2299, "step": 37309 }, { "epoch": 3.0225210628645494, "grad_norm": 0.06593051552772522, "learning_rate": 5.430487420675998e-05, "loss": 0.2461, "step": 37310 }, { "epoch": 3.022602073882048, "grad_norm": 0.065412238240242, "learning_rate": 5.430037355416535e-05, "loss": 0.2018, "step": 37311 }, { "epoch": 3.0226830848995463, "grad_norm": 0.07266979664564133, "learning_rate": 5.4295872901570735e-05, "loss": 0.2308, "step": 37312 }, { "epoch": 3.0227640959170445, "grad_norm": 0.08019926398992538, "learning_rate": 5.42913722489761e-05, "loss": 0.244, "step": 37313 }, { "epoch": 3.0228451069345432, "grad_norm": 0.06879211217164993, "learning_rate": 5.428687159638148e-05, "loss": 0.2152, "step": 37314 }, { "epoch": 3.0229261179520415, "grad_norm": 0.07490628957748413, "learning_rate": 5.4282370943786856e-05, "loss": 0.2829, "step": 37315 }, { "epoch": 3.0230071289695397, "grad_norm": 0.06716594845056534, "learning_rate": 5.427787029119222e-05, "loss": 0.2269, "step": 37316 }, { "epoch": 3.0230881399870384, "grad_norm": 0.0727708488702774, "learning_rate": 5.4273369638597603e-05, "loss": 0.2197, "step": 37317 }, { "epoch": 3.0231691510045366, "grad_norm": 0.07033897936344147, "learning_rate": 5.426886898600298e-05, "loss": 0.227, "step": 37318 }, { "epoch": 3.023250162022035, "grad_norm": 0.07997550070285797, "learning_rate": 5.4264368333408344e-05, "loss": 0.1858, "step": 37319 }, { "epoch": 3.0233311730395336, "grad_norm": 0.05678095296025276, "learning_rate": 5.4259867680813724e-05, "loss": 0.226, "step": 37320 }, { "epoch": 3.023412184057032, "grad_norm": 0.05888417363166809, "learning_rate": 5.42553670282191e-05, "loss": 0.2114, "step": 37321 }, { "epoch": 3.02349319507453, "grad_norm": 0.08064017444849014, "learning_rate": 5.4250866375624465e-05, "loss": 0.264, "step": 37322 }, { "epoch": 3.0235742060920283, "grad_norm": 0.0671364963054657, "learning_rate": 5.4246365723029845e-05, "loss": 0.2271, "step": 37323 }, { "epoch": 3.023655217109527, "grad_norm": 0.07308002561330795, "learning_rate": 5.424186507043522e-05, "loss": 0.2027, "step": 37324 }, { "epoch": 3.0237362281270252, "grad_norm": 0.07530664652585983, "learning_rate": 5.4237364417840585e-05, "loss": 0.2335, "step": 37325 }, { "epoch": 3.0238172391445235, "grad_norm": 0.07628055661916733, "learning_rate": 5.4232863765245966e-05, "loss": 0.2619, "step": 37326 }, { "epoch": 3.023898250162022, "grad_norm": 0.07323234528303146, "learning_rate": 5.422836311265134e-05, "loss": 0.2737, "step": 37327 }, { "epoch": 3.0239792611795204, "grad_norm": 0.0741615816950798, "learning_rate": 5.4223862460056706e-05, "loss": 0.2271, "step": 37328 }, { "epoch": 3.0240602721970187, "grad_norm": 0.07981351017951965, "learning_rate": 5.421936180746209e-05, "loss": 0.242, "step": 37329 }, { "epoch": 3.0241412832145174, "grad_norm": 0.07190749794244766, "learning_rate": 5.421486115486746e-05, "loss": 0.2174, "step": 37330 }, { "epoch": 3.0242222942320156, "grad_norm": 0.07480721175670624, "learning_rate": 5.421036050227283e-05, "loss": 0.2183, "step": 37331 }, { "epoch": 3.024303305249514, "grad_norm": 0.0733325406908989, "learning_rate": 5.420585984967821e-05, "loss": 0.2294, "step": 37332 }, { "epoch": 3.024384316267012, "grad_norm": 0.07993824034929276, "learning_rate": 5.420135919708358e-05, "loss": 0.2483, "step": 37333 }, { "epoch": 3.024465327284511, "grad_norm": 0.07397164404392242, "learning_rate": 5.419685854448895e-05, "loss": 0.2497, "step": 37334 }, { "epoch": 3.024546338302009, "grad_norm": 0.07546770572662354, "learning_rate": 5.419235789189433e-05, "loss": 0.2276, "step": 37335 }, { "epoch": 3.0246273493195073, "grad_norm": 0.07344533503055573, "learning_rate": 5.41878572392997e-05, "loss": 0.2389, "step": 37336 }, { "epoch": 3.024708360337006, "grad_norm": 0.0710587278008461, "learning_rate": 5.418335658670507e-05, "loss": 0.2324, "step": 37337 }, { "epoch": 3.024789371354504, "grad_norm": 0.0577889122068882, "learning_rate": 5.417885593411045e-05, "loss": 0.2531, "step": 37338 }, { "epoch": 3.0248703823720025, "grad_norm": 0.07441332191228867, "learning_rate": 5.417435528151582e-05, "loss": 0.2532, "step": 37339 }, { "epoch": 3.024951393389501, "grad_norm": 0.07562925666570663, "learning_rate": 5.41698546289212e-05, "loss": 0.2319, "step": 37340 }, { "epoch": 3.0250324044069994, "grad_norm": 0.061570510268211365, "learning_rate": 5.416535397632657e-05, "loss": 0.2177, "step": 37341 }, { "epoch": 3.0251134154244976, "grad_norm": 0.06682217121124268, "learning_rate": 5.4160853323731944e-05, "loss": 0.2536, "step": 37342 }, { "epoch": 3.0251944264419963, "grad_norm": 0.07247215509414673, "learning_rate": 5.4156352671137324e-05, "loss": 0.2109, "step": 37343 }, { "epoch": 3.0252754374594946, "grad_norm": 0.06940843164920807, "learning_rate": 5.415185201854269e-05, "loss": 0.215, "step": 37344 }, { "epoch": 3.025356448476993, "grad_norm": 0.09310358017683029, "learning_rate": 5.4147351365948064e-05, "loss": 0.1984, "step": 37345 }, { "epoch": 3.025437459494491, "grad_norm": 0.06538576632738113, "learning_rate": 5.4142850713353445e-05, "loss": 0.2462, "step": 37346 }, { "epoch": 3.0255184705119897, "grad_norm": 0.06469295918941498, "learning_rate": 5.413835006075881e-05, "loss": 0.2298, "step": 37347 }, { "epoch": 3.025599481529488, "grad_norm": 0.08019588887691498, "learning_rate": 5.4133849408164185e-05, "loss": 0.2458, "step": 37348 }, { "epoch": 3.0256804925469862, "grad_norm": 0.06767239421606064, "learning_rate": 5.4129348755569565e-05, "loss": 0.2186, "step": 37349 }, { "epoch": 3.025761503564485, "grad_norm": 0.06455112993717194, "learning_rate": 5.412484810297493e-05, "loss": 0.2254, "step": 37350 }, { "epoch": 3.025842514581983, "grad_norm": 0.06013812869787216, "learning_rate": 5.4120347450380306e-05, "loss": 0.2261, "step": 37351 }, { "epoch": 3.0259235255994814, "grad_norm": 0.06590741127729416, "learning_rate": 5.4115846797785686e-05, "loss": 0.2639, "step": 37352 }, { "epoch": 3.02600453661698, "grad_norm": 0.06299849599599838, "learning_rate": 5.411134614519105e-05, "loss": 0.2252, "step": 37353 }, { "epoch": 3.0260855476344783, "grad_norm": 0.0614766888320446, "learning_rate": 5.410684549259643e-05, "loss": 0.2291, "step": 37354 }, { "epoch": 3.0261665586519766, "grad_norm": 0.0771852657198906, "learning_rate": 5.410234484000181e-05, "loss": 0.219, "step": 37355 }, { "epoch": 3.026247569669475, "grad_norm": 0.060940295457839966, "learning_rate": 5.4097844187407174e-05, "loss": 0.2142, "step": 37356 }, { "epoch": 3.0263285806869735, "grad_norm": 0.0864911824464798, "learning_rate": 5.409334353481255e-05, "loss": 0.2632, "step": 37357 }, { "epoch": 3.0264095917044718, "grad_norm": 0.0863729789853096, "learning_rate": 5.408884288221793e-05, "loss": 0.2591, "step": 37358 }, { "epoch": 3.02649060272197, "grad_norm": 0.06721893697977066, "learning_rate": 5.4084342229623295e-05, "loss": 0.2522, "step": 37359 }, { "epoch": 3.0265716137394687, "grad_norm": 0.0649418756365776, "learning_rate": 5.407984157702867e-05, "loss": 0.2273, "step": 37360 }, { "epoch": 3.026652624756967, "grad_norm": 0.06797848641872406, "learning_rate": 5.407534092443405e-05, "loss": 0.2519, "step": 37361 }, { "epoch": 3.026733635774465, "grad_norm": 0.08032295852899551, "learning_rate": 5.4070840271839416e-05, "loss": 0.2394, "step": 37362 }, { "epoch": 3.026814646791964, "grad_norm": 0.05988214164972305, "learning_rate": 5.406633961924479e-05, "loss": 0.2148, "step": 37363 }, { "epoch": 3.026895657809462, "grad_norm": 0.0621197335422039, "learning_rate": 5.406183896665017e-05, "loss": 0.2123, "step": 37364 }, { "epoch": 3.0269766688269604, "grad_norm": 0.06993670016527176, "learning_rate": 5.4057338314055536e-05, "loss": 0.2174, "step": 37365 }, { "epoch": 3.027057679844459, "grad_norm": 0.05763579532504082, "learning_rate": 5.405283766146091e-05, "loss": 0.187, "step": 37366 }, { "epoch": 3.0271386908619573, "grad_norm": 0.06508929282426834, "learning_rate": 5.404833700886629e-05, "loss": 0.2239, "step": 37367 }, { "epoch": 3.0272197018794555, "grad_norm": 0.06755175441503525, "learning_rate": 5.404383635627166e-05, "loss": 0.2611, "step": 37368 }, { "epoch": 3.027300712896954, "grad_norm": 0.07040070742368698, "learning_rate": 5.403933570367704e-05, "loss": 0.2613, "step": 37369 }, { "epoch": 3.0273817239144525, "grad_norm": 0.06353382021188736, "learning_rate": 5.403483505108241e-05, "loss": 0.2224, "step": 37370 }, { "epoch": 3.0274627349319507, "grad_norm": 0.06055076792836189, "learning_rate": 5.403033439848778e-05, "loss": 0.2054, "step": 37371 }, { "epoch": 3.027543745949449, "grad_norm": 0.06850864738225937, "learning_rate": 5.4025833745893165e-05, "loss": 0.2224, "step": 37372 }, { "epoch": 3.0276247569669477, "grad_norm": 0.058965496718883514, "learning_rate": 5.402133309329853e-05, "loss": 0.199, "step": 37373 }, { "epoch": 3.027705767984446, "grad_norm": 0.06645647436380386, "learning_rate": 5.40168324407039e-05, "loss": 0.2263, "step": 37374 }, { "epoch": 3.027786779001944, "grad_norm": 0.07181558758020401, "learning_rate": 5.4012331788109286e-05, "loss": 0.2058, "step": 37375 }, { "epoch": 3.027867790019443, "grad_norm": 0.06589259952306747, "learning_rate": 5.400783113551465e-05, "loss": 0.2199, "step": 37376 }, { "epoch": 3.027948801036941, "grad_norm": 0.07510503381490707, "learning_rate": 5.400333048292002e-05, "loss": 0.2155, "step": 37377 }, { "epoch": 3.0280298120544393, "grad_norm": 0.0718095675110817, "learning_rate": 5.399882983032541e-05, "loss": 0.2156, "step": 37378 }, { "epoch": 3.0281108230719376, "grad_norm": 0.07855464518070221, "learning_rate": 5.3994329177730774e-05, "loss": 0.2282, "step": 37379 }, { "epoch": 3.0281918340894363, "grad_norm": 0.0817565992474556, "learning_rate": 5.398982852513614e-05, "loss": 0.2462, "step": 37380 }, { "epoch": 3.0282728451069345, "grad_norm": 0.06936581432819366, "learning_rate": 5.398532787254153e-05, "loss": 0.2508, "step": 37381 }, { "epoch": 3.0283538561244328, "grad_norm": 0.07433851808309555, "learning_rate": 5.3980827219946894e-05, "loss": 0.25, "step": 37382 }, { "epoch": 3.0284348671419314, "grad_norm": 0.06942424178123474, "learning_rate": 5.397632656735226e-05, "loss": 0.2481, "step": 37383 }, { "epoch": 3.0285158781594297, "grad_norm": 0.07389133423566818, "learning_rate": 5.397182591475765e-05, "loss": 0.2411, "step": 37384 }, { "epoch": 3.028596889176928, "grad_norm": 0.06642919778823853, "learning_rate": 5.3967325262163015e-05, "loss": 0.2291, "step": 37385 }, { "epoch": 3.0286779001944266, "grad_norm": 0.09277532249689102, "learning_rate": 5.396282460956838e-05, "loss": 0.269, "step": 37386 }, { "epoch": 3.028758911211925, "grad_norm": 0.07429872453212738, "learning_rate": 5.395832395697377e-05, "loss": 0.2613, "step": 37387 }, { "epoch": 3.028839922229423, "grad_norm": 0.07359006255865097, "learning_rate": 5.3953823304379136e-05, "loss": 0.2138, "step": 37388 }, { "epoch": 3.028920933246922, "grad_norm": 0.07269123941659927, "learning_rate": 5.394932265178451e-05, "loss": 0.251, "step": 37389 }, { "epoch": 3.02900194426442, "grad_norm": 0.06176629289984703, "learning_rate": 5.394482199918989e-05, "loss": 0.2489, "step": 37390 }, { "epoch": 3.0290829552819183, "grad_norm": 0.07285406440496445, "learning_rate": 5.394032134659526e-05, "loss": 0.2211, "step": 37391 }, { "epoch": 3.0291639662994165, "grad_norm": 0.052691586315631866, "learning_rate": 5.393582069400063e-05, "loss": 0.2022, "step": 37392 }, { "epoch": 3.029244977316915, "grad_norm": 0.058721382170915604, "learning_rate": 5.393132004140601e-05, "loss": 0.2314, "step": 37393 }, { "epoch": 3.0293259883344135, "grad_norm": 0.06069071590900421, "learning_rate": 5.392681938881138e-05, "loss": 0.1991, "step": 37394 }, { "epoch": 3.0294069993519117, "grad_norm": 0.058969639241695404, "learning_rate": 5.392231873621676e-05, "loss": 0.2395, "step": 37395 }, { "epoch": 3.0294880103694104, "grad_norm": 0.07969574630260468, "learning_rate": 5.391781808362213e-05, "loss": 0.2136, "step": 37396 }, { "epoch": 3.0295690213869086, "grad_norm": 0.07085856050252914, "learning_rate": 5.39133174310275e-05, "loss": 0.2667, "step": 37397 }, { "epoch": 3.029650032404407, "grad_norm": 0.07769621908664703, "learning_rate": 5.390881677843288e-05, "loss": 0.2324, "step": 37398 }, { "epoch": 3.0297310434219056, "grad_norm": 0.07305508852005005, "learning_rate": 5.390431612583825e-05, "loss": 0.2091, "step": 37399 }, { "epoch": 3.029812054439404, "grad_norm": 0.07000664621591568, "learning_rate": 5.389981547324362e-05, "loss": 0.204, "step": 37400 }, { "epoch": 3.029893065456902, "grad_norm": 0.06918566673994064, "learning_rate": 5.3895314820649e-05, "loss": 0.2249, "step": 37401 }, { "epoch": 3.0299740764744003, "grad_norm": 0.06122610345482826, "learning_rate": 5.389081416805437e-05, "loss": 0.2038, "step": 37402 }, { "epoch": 3.030055087491899, "grad_norm": 0.056917302310466766, "learning_rate": 5.388631351545974e-05, "loss": 0.2111, "step": 37403 }, { "epoch": 3.0301360985093972, "grad_norm": 0.08675568550825119, "learning_rate": 5.388181286286512e-05, "loss": 0.2416, "step": 37404 }, { "epoch": 3.0302171095268955, "grad_norm": 0.08074220269918442, "learning_rate": 5.3877312210270494e-05, "loss": 0.2445, "step": 37405 }, { "epoch": 3.030298120544394, "grad_norm": 0.06495244055986404, "learning_rate": 5.387281155767586e-05, "loss": 0.2734, "step": 37406 }, { "epoch": 3.0303791315618924, "grad_norm": 0.07835765928030014, "learning_rate": 5.386831090508124e-05, "loss": 0.239, "step": 37407 }, { "epoch": 3.0304601425793907, "grad_norm": 0.07907546311616898, "learning_rate": 5.3863810252486615e-05, "loss": 0.2104, "step": 37408 }, { "epoch": 3.0305411535968894, "grad_norm": 0.06553105264902115, "learning_rate": 5.385930959989198e-05, "loss": 0.2213, "step": 37409 }, { "epoch": 3.0306221646143876, "grad_norm": 0.08010930567979813, "learning_rate": 5.385480894729736e-05, "loss": 0.2438, "step": 37410 }, { "epoch": 3.030703175631886, "grad_norm": 0.06734942644834518, "learning_rate": 5.3850308294702736e-05, "loss": 0.2651, "step": 37411 }, { "epoch": 3.0307841866493845, "grad_norm": 0.07775172591209412, "learning_rate": 5.38458076421081e-05, "loss": 0.2688, "step": 37412 }, { "epoch": 3.030865197666883, "grad_norm": 0.08631449192762375, "learning_rate": 5.384130698951348e-05, "loss": 0.2444, "step": 37413 }, { "epoch": 3.030946208684381, "grad_norm": 0.0761285126209259, "learning_rate": 5.3836806336918856e-05, "loss": 0.2356, "step": 37414 }, { "epoch": 3.0310272197018793, "grad_norm": 0.06520287692546844, "learning_rate": 5.383230568432422e-05, "loss": 0.2142, "step": 37415 }, { "epoch": 3.031108230719378, "grad_norm": 0.08522919565439224, "learning_rate": 5.3827805031729604e-05, "loss": 0.2504, "step": 37416 }, { "epoch": 3.031189241736876, "grad_norm": 0.07640458643436432, "learning_rate": 5.382330437913498e-05, "loss": 0.2308, "step": 37417 }, { "epoch": 3.0312702527543745, "grad_norm": 0.0752745121717453, "learning_rate": 5.3818803726540344e-05, "loss": 0.2541, "step": 37418 }, { "epoch": 3.031351263771873, "grad_norm": 0.06180490925908089, "learning_rate": 5.3814303073945725e-05, "loss": 0.2418, "step": 37419 }, { "epoch": 3.0314322747893714, "grad_norm": 0.07486403733491898, "learning_rate": 5.38098024213511e-05, "loss": 0.1804, "step": 37420 }, { "epoch": 3.0315132858068696, "grad_norm": 0.06396470218896866, "learning_rate": 5.380530176875648e-05, "loss": 0.2823, "step": 37421 }, { "epoch": 3.0315942968243683, "grad_norm": 0.08682464808225632, "learning_rate": 5.3800801116161845e-05, "loss": 0.3189, "step": 37422 }, { "epoch": 3.0316753078418666, "grad_norm": 0.06351017951965332, "learning_rate": 5.379630046356722e-05, "loss": 0.2396, "step": 37423 }, { "epoch": 3.031756318859365, "grad_norm": 0.07645206898450851, "learning_rate": 5.37917998109726e-05, "loss": 0.1973, "step": 37424 }, { "epoch": 3.031837329876863, "grad_norm": 0.07359036803245544, "learning_rate": 5.3787299158377966e-05, "loss": 0.2536, "step": 37425 }, { "epoch": 3.0319183408943617, "grad_norm": 0.0697692409157753, "learning_rate": 5.378279850578334e-05, "loss": 0.2437, "step": 37426 }, { "epoch": 3.03199935191186, "grad_norm": 0.056899294257164, "learning_rate": 5.377829785318872e-05, "loss": 0.203, "step": 37427 }, { "epoch": 3.0320803629293582, "grad_norm": 0.06358642131090164, "learning_rate": 5.377379720059409e-05, "loss": 0.2269, "step": 37428 }, { "epoch": 3.032161373946857, "grad_norm": 0.08034027367830276, "learning_rate": 5.376929654799946e-05, "loss": 0.2828, "step": 37429 }, { "epoch": 3.032242384964355, "grad_norm": 0.08328770101070404, "learning_rate": 5.376479589540484e-05, "loss": 0.2514, "step": 37430 }, { "epoch": 3.0323233959818534, "grad_norm": 0.07368003576993942, "learning_rate": 5.376029524281021e-05, "loss": 0.2088, "step": 37431 }, { "epoch": 3.032404406999352, "grad_norm": 0.054242219775915146, "learning_rate": 5.375579459021558e-05, "loss": 0.2459, "step": 37432 }, { "epoch": 3.0324854180168503, "grad_norm": 0.061121974140405655, "learning_rate": 5.375129393762096e-05, "loss": 0.2175, "step": 37433 }, { "epoch": 3.0325664290343486, "grad_norm": 0.07788266241550446, "learning_rate": 5.374679328502633e-05, "loss": 0.245, "step": 37434 }, { "epoch": 3.0326474400518473, "grad_norm": 0.07702527195215225, "learning_rate": 5.37422926324317e-05, "loss": 0.2215, "step": 37435 }, { "epoch": 3.0327284510693455, "grad_norm": 0.06540277600288391, "learning_rate": 5.373779197983708e-05, "loss": 0.2075, "step": 37436 }, { "epoch": 3.0328094620868438, "grad_norm": 0.09210977703332901, "learning_rate": 5.373329132724245e-05, "loss": 0.2786, "step": 37437 }, { "epoch": 3.032890473104342, "grad_norm": 0.06716837733983994, "learning_rate": 5.372879067464782e-05, "loss": 0.2514, "step": 37438 }, { "epoch": 3.0329714841218407, "grad_norm": 0.0809812918305397, "learning_rate": 5.37242900220532e-05, "loss": 0.2673, "step": 37439 }, { "epoch": 3.033052495139339, "grad_norm": 0.08728468418121338, "learning_rate": 5.371978936945857e-05, "loss": 0.2203, "step": 37440 }, { "epoch": 3.033133506156837, "grad_norm": 0.08095324039459229, "learning_rate": 5.3715288716863944e-05, "loss": 0.2107, "step": 37441 }, { "epoch": 3.033214517174336, "grad_norm": 0.0658116266131401, "learning_rate": 5.3710788064269324e-05, "loss": 0.2465, "step": 37442 }, { "epoch": 3.033295528191834, "grad_norm": 0.07490494102239609, "learning_rate": 5.370628741167469e-05, "loss": 0.215, "step": 37443 }, { "epoch": 3.0333765392093324, "grad_norm": 0.06612545251846313, "learning_rate": 5.3701786759080065e-05, "loss": 0.2258, "step": 37444 }, { "epoch": 3.033457550226831, "grad_norm": 0.06559719145298004, "learning_rate": 5.3697286106485445e-05, "loss": 0.2329, "step": 37445 }, { "epoch": 3.0335385612443293, "grad_norm": 0.08513180166482925, "learning_rate": 5.369278545389081e-05, "loss": 0.2581, "step": 37446 }, { "epoch": 3.0336195722618275, "grad_norm": 0.0750497505068779, "learning_rate": 5.36882848012962e-05, "loss": 0.1976, "step": 37447 }, { "epoch": 3.033700583279326, "grad_norm": 0.06607310473918915, "learning_rate": 5.3683784148701566e-05, "loss": 0.2383, "step": 37448 }, { "epoch": 3.0337815942968245, "grad_norm": 0.06415095180273056, "learning_rate": 5.367928349610693e-05, "loss": 0.1751, "step": 37449 }, { "epoch": 3.0338626053143227, "grad_norm": 0.07550282776355743, "learning_rate": 5.367478284351232e-05, "loss": 0.2309, "step": 37450 }, { "epoch": 3.033943616331821, "grad_norm": 0.06409140676259995, "learning_rate": 5.3670282190917687e-05, "loss": 0.2014, "step": 37451 }, { "epoch": 3.0340246273493197, "grad_norm": 0.06622499972581863, "learning_rate": 5.3665781538323053e-05, "loss": 0.2573, "step": 37452 }, { "epoch": 3.034105638366818, "grad_norm": 0.06955836713314056, "learning_rate": 5.366128088572844e-05, "loss": 0.2423, "step": 37453 }, { "epoch": 3.034186649384316, "grad_norm": 0.057708337903022766, "learning_rate": 5.365678023313381e-05, "loss": 0.1841, "step": 37454 }, { "epoch": 3.034267660401815, "grad_norm": 0.06644859164953232, "learning_rate": 5.3652279580539174e-05, "loss": 0.2402, "step": 37455 }, { "epoch": 3.034348671419313, "grad_norm": 0.06704627722501755, "learning_rate": 5.364777892794456e-05, "loss": 0.2369, "step": 37456 }, { "epoch": 3.0344296824368113, "grad_norm": 0.07295235246419907, "learning_rate": 5.364327827534993e-05, "loss": 0.2788, "step": 37457 }, { "epoch": 3.0345106934543096, "grad_norm": 0.08905566483736038, "learning_rate": 5.36387776227553e-05, "loss": 0.2241, "step": 37458 }, { "epoch": 3.0345917044718083, "grad_norm": 0.07346481084823608, "learning_rate": 5.363427697016068e-05, "loss": 0.2377, "step": 37459 }, { "epoch": 3.0346727154893065, "grad_norm": 0.07479727268218994, "learning_rate": 5.362977631756605e-05, "loss": 0.2355, "step": 37460 }, { "epoch": 3.0347537265068047, "grad_norm": 0.06769086420536041, "learning_rate": 5.362527566497142e-05, "loss": 0.2046, "step": 37461 }, { "epoch": 3.0348347375243034, "grad_norm": 0.060751911252737045, "learning_rate": 5.36207750123768e-05, "loss": 0.2178, "step": 37462 }, { "epoch": 3.0349157485418017, "grad_norm": 0.07391112297773361, "learning_rate": 5.361627435978217e-05, "loss": 0.2547, "step": 37463 }, { "epoch": 3.0349967595593, "grad_norm": 0.0735718160867691, "learning_rate": 5.3611773707187543e-05, "loss": 0.2025, "step": 37464 }, { "epoch": 3.0350777705767986, "grad_norm": 0.07991587370634079, "learning_rate": 5.3607273054592924e-05, "loss": 0.2145, "step": 37465 }, { "epoch": 3.035158781594297, "grad_norm": 0.07211611419916153, "learning_rate": 5.360277240199829e-05, "loss": 0.2086, "step": 37466 }, { "epoch": 3.035239792611795, "grad_norm": 0.0736667737364769, "learning_rate": 5.3598271749403664e-05, "loss": 0.2063, "step": 37467 }, { "epoch": 3.035320803629294, "grad_norm": 0.07628808170557022, "learning_rate": 5.3593771096809045e-05, "loss": 0.226, "step": 37468 }, { "epoch": 3.035401814646792, "grad_norm": 0.06351827830076218, "learning_rate": 5.358927044421441e-05, "loss": 0.2271, "step": 37469 }, { "epoch": 3.0354828256642903, "grad_norm": 0.06043693795800209, "learning_rate": 5.3584769791619785e-05, "loss": 0.2059, "step": 37470 }, { "epoch": 3.0355638366817885, "grad_norm": 0.06572989374399185, "learning_rate": 5.3580269139025165e-05, "loss": 0.2441, "step": 37471 }, { "epoch": 3.035644847699287, "grad_norm": 0.07039684802293777, "learning_rate": 5.357576848643053e-05, "loss": 0.2481, "step": 37472 }, { "epoch": 3.0357258587167855, "grad_norm": 0.06588947027921677, "learning_rate": 5.357126783383591e-05, "loss": 0.2329, "step": 37473 }, { "epoch": 3.0358068697342837, "grad_norm": 0.057079948484897614, "learning_rate": 5.3566767181241286e-05, "loss": 0.181, "step": 37474 }, { "epoch": 3.0358878807517824, "grad_norm": 0.05848081409931183, "learning_rate": 5.356226652864665e-05, "loss": 0.2016, "step": 37475 }, { "epoch": 3.0359688917692806, "grad_norm": 0.06334573030471802, "learning_rate": 5.3557765876052033e-05, "loss": 0.2294, "step": 37476 }, { "epoch": 3.036049902786779, "grad_norm": 0.07205773144960403, "learning_rate": 5.355326522345741e-05, "loss": 0.2109, "step": 37477 }, { "epoch": 3.0361309138042776, "grad_norm": 0.07480573654174805, "learning_rate": 5.3548764570862774e-05, "loss": 0.2309, "step": 37478 }, { "epoch": 3.036211924821776, "grad_norm": 0.050419967621564865, "learning_rate": 5.3544263918268154e-05, "loss": 0.1819, "step": 37479 }, { "epoch": 3.036292935839274, "grad_norm": 0.05797084793448448, "learning_rate": 5.353976326567353e-05, "loss": 0.199, "step": 37480 }, { "epoch": 3.0363739468567723, "grad_norm": 0.0698668509721756, "learning_rate": 5.3535262613078895e-05, "loss": 0.2142, "step": 37481 }, { "epoch": 3.036454957874271, "grad_norm": 0.07098384946584702, "learning_rate": 5.3530761960484275e-05, "loss": 0.2571, "step": 37482 }, { "epoch": 3.0365359688917692, "grad_norm": 0.06307261437177658, "learning_rate": 5.352626130788965e-05, "loss": 0.2419, "step": 37483 }, { "epoch": 3.0366169799092675, "grad_norm": 0.07396720349788666, "learning_rate": 5.3521760655295015e-05, "loss": 0.2252, "step": 37484 }, { "epoch": 3.036697990926766, "grad_norm": 0.07606613636016846, "learning_rate": 5.3517260002700396e-05, "loss": 0.212, "step": 37485 }, { "epoch": 3.0367790019442644, "grad_norm": 0.07393737137317657, "learning_rate": 5.351275935010577e-05, "loss": 0.2213, "step": 37486 }, { "epoch": 3.0368600129617627, "grad_norm": 0.07436393201351166, "learning_rate": 5.3508258697511136e-05, "loss": 0.257, "step": 37487 }, { "epoch": 3.0369410239792614, "grad_norm": 0.0729961097240448, "learning_rate": 5.350375804491652e-05, "loss": 0.2238, "step": 37488 }, { "epoch": 3.0370220349967596, "grad_norm": 0.08112277835607529, "learning_rate": 5.349925739232189e-05, "loss": 0.2837, "step": 37489 }, { "epoch": 3.037103046014258, "grad_norm": 0.05937008932232857, "learning_rate": 5.349475673972726e-05, "loss": 0.2072, "step": 37490 }, { "epoch": 3.0371840570317565, "grad_norm": 0.07106920331716537, "learning_rate": 5.349025608713264e-05, "loss": 0.2089, "step": 37491 }, { "epoch": 3.037265068049255, "grad_norm": 0.05964120104908943, "learning_rate": 5.348575543453801e-05, "loss": 0.2145, "step": 37492 }, { "epoch": 3.037346079066753, "grad_norm": 0.0652584359049797, "learning_rate": 5.348125478194338e-05, "loss": 0.2002, "step": 37493 }, { "epoch": 3.0374270900842513, "grad_norm": 0.08113518357276917, "learning_rate": 5.347675412934876e-05, "loss": 0.2559, "step": 37494 }, { "epoch": 3.03750810110175, "grad_norm": 0.06260479241609573, "learning_rate": 5.347225347675413e-05, "loss": 0.2166, "step": 37495 }, { "epoch": 3.037589112119248, "grad_norm": 0.0738939419388771, "learning_rate": 5.34677528241595e-05, "loss": 0.2316, "step": 37496 }, { "epoch": 3.0376701231367464, "grad_norm": 0.08688575029373169, "learning_rate": 5.346325217156488e-05, "loss": 0.1926, "step": 37497 }, { "epoch": 3.037751134154245, "grad_norm": 0.09045497328042984, "learning_rate": 5.345875151897025e-05, "loss": 0.2259, "step": 37498 }, { "epoch": 3.0378321451717434, "grad_norm": 0.07186606526374817, "learning_rate": 5.345425086637563e-05, "loss": 0.2487, "step": 37499 }, { "epoch": 3.0379131561892416, "grad_norm": 0.09282194077968597, "learning_rate": 5.3449750213781e-05, "loss": 0.2408, "step": 37500 }, { "epoch": 3.0379941672067403, "grad_norm": 0.06380818784236908, "learning_rate": 5.3445249561186374e-05, "loss": 0.1914, "step": 37501 }, { "epoch": 3.0380751782242386, "grad_norm": 0.0604427196085453, "learning_rate": 5.3440748908591754e-05, "loss": 0.1788, "step": 37502 }, { "epoch": 3.038156189241737, "grad_norm": 0.07687865197658539, "learning_rate": 5.343624825599712e-05, "loss": 0.1917, "step": 37503 }, { "epoch": 3.038237200259235, "grad_norm": 0.07873617112636566, "learning_rate": 5.3431747603402494e-05, "loss": 0.2192, "step": 37504 }, { "epoch": 3.0383182112767337, "grad_norm": 0.07436539977788925, "learning_rate": 5.3427246950807875e-05, "loss": 0.2413, "step": 37505 }, { "epoch": 3.038399222294232, "grad_norm": 0.07715284079313278, "learning_rate": 5.342274629821324e-05, "loss": 0.2259, "step": 37506 }, { "epoch": 3.0384802333117302, "grad_norm": 0.06521465629339218, "learning_rate": 5.3418245645618615e-05, "loss": 0.2354, "step": 37507 }, { "epoch": 3.038561244329229, "grad_norm": 0.0683523640036583, "learning_rate": 5.3413744993023996e-05, "loss": 0.1877, "step": 37508 }, { "epoch": 3.038642255346727, "grad_norm": 0.09436856210231781, "learning_rate": 5.340924434042936e-05, "loss": 0.2488, "step": 37509 }, { "epoch": 3.0387232663642254, "grad_norm": 0.07124968618154526, "learning_rate": 5.3404743687834736e-05, "loss": 0.209, "step": 37510 }, { "epoch": 3.038804277381724, "grad_norm": 0.07376158237457275, "learning_rate": 5.3400243035240116e-05, "loss": 0.2305, "step": 37511 }, { "epoch": 3.0388852883992223, "grad_norm": 0.07440029084682465, "learning_rate": 5.339574238264548e-05, "loss": 0.2344, "step": 37512 }, { "epoch": 3.0389662994167206, "grad_norm": 0.07803120464086533, "learning_rate": 5.339124173005086e-05, "loss": 0.2568, "step": 37513 }, { "epoch": 3.039047310434219, "grad_norm": 0.08405184745788574, "learning_rate": 5.338674107745624e-05, "loss": 0.2262, "step": 37514 }, { "epoch": 3.0391283214517175, "grad_norm": 0.08579245954751968, "learning_rate": 5.3382240424861604e-05, "loss": 0.2317, "step": 37515 }, { "epoch": 3.0392093324692158, "grad_norm": 0.0725676491856575, "learning_rate": 5.337773977226698e-05, "loss": 0.2415, "step": 37516 }, { "epoch": 3.039290343486714, "grad_norm": 0.0620294027030468, "learning_rate": 5.337323911967236e-05, "loss": 0.233, "step": 37517 }, { "epoch": 3.0393713545042127, "grad_norm": 0.06782814115285873, "learning_rate": 5.3368738467077725e-05, "loss": 0.2392, "step": 37518 }, { "epoch": 3.039452365521711, "grad_norm": 0.06763561815023422, "learning_rate": 5.33642378144831e-05, "loss": 0.2074, "step": 37519 }, { "epoch": 3.039533376539209, "grad_norm": 0.06067238375544548, "learning_rate": 5.335973716188848e-05, "loss": 0.2242, "step": 37520 }, { "epoch": 3.039614387556708, "grad_norm": 0.06123970076441765, "learning_rate": 5.3355236509293846e-05, "loss": 0.2208, "step": 37521 }, { "epoch": 3.039695398574206, "grad_norm": 0.07573569566011429, "learning_rate": 5.335073585669922e-05, "loss": 0.2613, "step": 37522 }, { "epoch": 3.0397764095917044, "grad_norm": 0.09033697843551636, "learning_rate": 5.33462352041046e-05, "loss": 0.1972, "step": 37523 }, { "epoch": 3.039857420609203, "grad_norm": 0.07207215577363968, "learning_rate": 5.3341734551509966e-05, "loss": 0.2466, "step": 37524 }, { "epoch": 3.0399384316267013, "grad_norm": 0.08466987311840057, "learning_rate": 5.3337233898915354e-05, "loss": 0.2555, "step": 37525 }, { "epoch": 3.0400194426441995, "grad_norm": 0.06799205392599106, "learning_rate": 5.333273324632072e-05, "loss": 0.2397, "step": 37526 }, { "epoch": 3.040100453661698, "grad_norm": 0.08047273755073547, "learning_rate": 5.3328232593726094e-05, "loss": 0.2083, "step": 37527 }, { "epoch": 3.0401814646791965, "grad_norm": 0.07457821071147919, "learning_rate": 5.3323731941131474e-05, "loss": 0.2328, "step": 37528 }, { "epoch": 3.0402624756966947, "grad_norm": 0.06416770815849304, "learning_rate": 5.331923128853684e-05, "loss": 0.2524, "step": 37529 }, { "epoch": 3.040343486714193, "grad_norm": 0.08857259899377823, "learning_rate": 5.3314730635942215e-05, "loss": 0.1908, "step": 37530 }, { "epoch": 3.0404244977316917, "grad_norm": 0.06999360769987106, "learning_rate": 5.3310229983347595e-05, "loss": 0.2341, "step": 37531 }, { "epoch": 3.04050550874919, "grad_norm": 0.08685637265443802, "learning_rate": 5.330572933075296e-05, "loss": 0.2426, "step": 37532 }, { "epoch": 3.040586519766688, "grad_norm": 0.061902157962322235, "learning_rate": 5.3301228678158336e-05, "loss": 0.1998, "step": 37533 }, { "epoch": 3.040667530784187, "grad_norm": 0.0677398070693016, "learning_rate": 5.3296728025563716e-05, "loss": 0.2147, "step": 37534 }, { "epoch": 3.040748541801685, "grad_norm": 0.07476639747619629, "learning_rate": 5.329222737296908e-05, "loss": 0.2109, "step": 37535 }, { "epoch": 3.0408295528191833, "grad_norm": 0.06877736747264862, "learning_rate": 5.3287726720374456e-05, "loss": 0.1989, "step": 37536 }, { "epoch": 3.0409105638366816, "grad_norm": 0.0667649358510971, "learning_rate": 5.328322606777984e-05, "loss": 0.1968, "step": 37537 }, { "epoch": 3.0409915748541803, "grad_norm": 0.06484334915876389, "learning_rate": 5.3278725415185204e-05, "loss": 0.1927, "step": 37538 }, { "epoch": 3.0410725858716785, "grad_norm": 0.0637441873550415, "learning_rate": 5.327422476259058e-05, "loss": 0.2431, "step": 37539 }, { "epoch": 3.0411535968891767, "grad_norm": 0.06637068092823029, "learning_rate": 5.326972410999596e-05, "loss": 0.2477, "step": 37540 }, { "epoch": 3.0412346079066754, "grad_norm": 0.07331857830286026, "learning_rate": 5.3265223457401324e-05, "loss": 0.2287, "step": 37541 }, { "epoch": 3.0413156189241737, "grad_norm": 0.06472773104906082, "learning_rate": 5.32607228048067e-05, "loss": 0.2434, "step": 37542 }, { "epoch": 3.041396629941672, "grad_norm": 0.061671916395425797, "learning_rate": 5.325622215221208e-05, "loss": 0.2499, "step": 37543 }, { "epoch": 3.0414776409591706, "grad_norm": 0.07631280273199081, "learning_rate": 5.3251721499617445e-05, "loss": 0.2362, "step": 37544 }, { "epoch": 3.041558651976669, "grad_norm": 0.07066506147384644, "learning_rate": 5.324722084702282e-05, "loss": 0.1933, "step": 37545 }, { "epoch": 3.041639662994167, "grad_norm": 0.07721670717000961, "learning_rate": 5.32427201944282e-05, "loss": 0.2023, "step": 37546 }, { "epoch": 3.041720674011666, "grad_norm": 0.0790068730711937, "learning_rate": 5.3238219541833566e-05, "loss": 0.2633, "step": 37547 }, { "epoch": 3.041801685029164, "grad_norm": 0.06365988403558731, "learning_rate": 5.323371888923894e-05, "loss": 0.2293, "step": 37548 }, { "epoch": 3.0418826960466623, "grad_norm": 0.07861355692148209, "learning_rate": 5.322921823664432e-05, "loss": 0.2473, "step": 37549 }, { "epoch": 3.0419637070641605, "grad_norm": 0.07868161052465439, "learning_rate": 5.322471758404969e-05, "loss": 0.2582, "step": 37550 }, { "epoch": 3.042044718081659, "grad_norm": 0.05878037214279175, "learning_rate": 5.322021693145506e-05, "loss": 0.2121, "step": 37551 }, { "epoch": 3.0421257290991575, "grad_norm": 0.06938491761684418, "learning_rate": 5.321571627886044e-05, "loss": 0.2811, "step": 37552 }, { "epoch": 3.0422067401166557, "grad_norm": 0.06721276044845581, "learning_rate": 5.321121562626581e-05, "loss": 0.2572, "step": 37553 }, { "epoch": 3.0422877511341544, "grad_norm": 0.08425439894199371, "learning_rate": 5.320671497367119e-05, "loss": 0.2336, "step": 37554 }, { "epoch": 3.0423687621516526, "grad_norm": 0.07287422567605972, "learning_rate": 5.320221432107656e-05, "loss": 0.2144, "step": 37555 }, { "epoch": 3.042449773169151, "grad_norm": 0.07077626138925552, "learning_rate": 5.319771366848193e-05, "loss": 0.2239, "step": 37556 }, { "epoch": 3.0425307841866496, "grad_norm": 0.07828255742788315, "learning_rate": 5.319321301588731e-05, "loss": 0.2138, "step": 37557 }, { "epoch": 3.042611795204148, "grad_norm": 0.062211714684963226, "learning_rate": 5.318871236329268e-05, "loss": 0.2208, "step": 37558 }, { "epoch": 3.042692806221646, "grad_norm": 0.057104844599962234, "learning_rate": 5.318421171069805e-05, "loss": 0.2069, "step": 37559 }, { "epoch": 3.0427738172391443, "grad_norm": 0.07099134474992752, "learning_rate": 5.317971105810343e-05, "loss": 0.2105, "step": 37560 }, { "epoch": 3.042854828256643, "grad_norm": 0.06640342622995377, "learning_rate": 5.31752104055088e-05, "loss": 0.2627, "step": 37561 }, { "epoch": 3.0429358392741412, "grad_norm": 0.07271246612071991, "learning_rate": 5.317070975291417e-05, "loss": 0.2299, "step": 37562 }, { "epoch": 3.0430168502916395, "grad_norm": 0.07936045527458191, "learning_rate": 5.316620910031955e-05, "loss": 0.2154, "step": 37563 }, { "epoch": 3.043097861309138, "grad_norm": 0.07012918591499329, "learning_rate": 5.3161708447724924e-05, "loss": 0.2266, "step": 37564 }, { "epoch": 3.0431788723266364, "grad_norm": 0.07440099865198135, "learning_rate": 5.315720779513029e-05, "loss": 0.2014, "step": 37565 }, { "epoch": 3.0432598833441347, "grad_norm": 0.07617049664258957, "learning_rate": 5.315270714253567e-05, "loss": 0.2135, "step": 37566 }, { "epoch": 3.0433408943616334, "grad_norm": 0.0808364674448967, "learning_rate": 5.3148206489941045e-05, "loss": 0.252, "step": 37567 }, { "epoch": 3.0434219053791316, "grad_norm": 0.07821191102266312, "learning_rate": 5.314370583734641e-05, "loss": 0.2414, "step": 37568 }, { "epoch": 3.04350291639663, "grad_norm": 0.07060352712869644, "learning_rate": 5.313920518475179e-05, "loss": 0.2288, "step": 37569 }, { "epoch": 3.0435839274141285, "grad_norm": 0.09167982637882233, "learning_rate": 5.3134704532157166e-05, "loss": 0.2358, "step": 37570 }, { "epoch": 3.0436649384316268, "grad_norm": 0.06757145375013351, "learning_rate": 5.313020387956253e-05, "loss": 0.223, "step": 37571 }, { "epoch": 3.043745949449125, "grad_norm": 0.06062881648540497, "learning_rate": 5.312570322696791e-05, "loss": 0.2076, "step": 37572 }, { "epoch": 3.0438269604666233, "grad_norm": 0.06633229553699493, "learning_rate": 5.3121202574373287e-05, "loss": 0.2267, "step": 37573 }, { "epoch": 3.043907971484122, "grad_norm": 0.0841381773352623, "learning_rate": 5.311670192177865e-05, "loss": 0.2821, "step": 37574 }, { "epoch": 3.04398898250162, "grad_norm": 0.07296059280633926, "learning_rate": 5.3112201269184034e-05, "loss": 0.2326, "step": 37575 }, { "epoch": 3.0440699935191184, "grad_norm": 0.05896758660674095, "learning_rate": 5.310770061658941e-05, "loss": 0.2521, "step": 37576 }, { "epoch": 3.044151004536617, "grad_norm": 0.07654000073671341, "learning_rate": 5.3103199963994774e-05, "loss": 0.2348, "step": 37577 }, { "epoch": 3.0442320155541154, "grad_norm": 0.06925518065690994, "learning_rate": 5.3098699311400155e-05, "loss": 0.2029, "step": 37578 }, { "epoch": 3.0443130265716136, "grad_norm": 0.06512514501810074, "learning_rate": 5.309419865880553e-05, "loss": 0.244, "step": 37579 }, { "epoch": 3.0443940375891123, "grad_norm": 0.06901316344738007, "learning_rate": 5.308969800621091e-05, "loss": 0.2376, "step": 37580 }, { "epoch": 3.0444750486066106, "grad_norm": 0.08202358335256577, "learning_rate": 5.3085197353616275e-05, "loss": 0.2596, "step": 37581 }, { "epoch": 3.044556059624109, "grad_norm": 0.07049586623907089, "learning_rate": 5.308069670102165e-05, "loss": 0.2407, "step": 37582 }, { "epoch": 3.044637070641607, "grad_norm": 0.06545942276716232, "learning_rate": 5.307619604842703e-05, "loss": 0.2347, "step": 37583 }, { "epoch": 3.0447180816591057, "grad_norm": 0.06013672426342964, "learning_rate": 5.3071695395832396e-05, "loss": 0.209, "step": 37584 }, { "epoch": 3.044799092676604, "grad_norm": 0.06841271370649338, "learning_rate": 5.306719474323777e-05, "loss": 0.1978, "step": 37585 }, { "epoch": 3.0448801036941022, "grad_norm": 0.07165801525115967, "learning_rate": 5.306269409064315e-05, "loss": 0.241, "step": 37586 }, { "epoch": 3.044961114711601, "grad_norm": 0.0669749528169632, "learning_rate": 5.305819343804852e-05, "loss": 0.2372, "step": 37587 }, { "epoch": 3.045042125729099, "grad_norm": 0.07862003892660141, "learning_rate": 5.305369278545389e-05, "loss": 0.2322, "step": 37588 }, { "epoch": 3.0451231367465974, "grad_norm": 0.06369896978139877, "learning_rate": 5.304919213285927e-05, "loss": 0.2009, "step": 37589 }, { "epoch": 3.045204147764096, "grad_norm": 0.05961852893233299, "learning_rate": 5.304469148026464e-05, "loss": 0.2006, "step": 37590 }, { "epoch": 3.0452851587815943, "grad_norm": 0.0706590786576271, "learning_rate": 5.304019082767001e-05, "loss": 0.2272, "step": 37591 }, { "epoch": 3.0453661697990926, "grad_norm": 0.06891553848981857, "learning_rate": 5.303569017507539e-05, "loss": 0.2257, "step": 37592 }, { "epoch": 3.0454471808165913, "grad_norm": 0.07027577608823776, "learning_rate": 5.3031189522480765e-05, "loss": 0.2522, "step": 37593 }, { "epoch": 3.0455281918340895, "grad_norm": 0.06691335886716843, "learning_rate": 5.302668886988613e-05, "loss": 0.2302, "step": 37594 }, { "epoch": 3.0456092028515878, "grad_norm": 0.07276862114667892, "learning_rate": 5.302218821729151e-05, "loss": 0.2399, "step": 37595 }, { "epoch": 3.045690213869086, "grad_norm": 0.06181017681956291, "learning_rate": 5.3017687564696886e-05, "loss": 0.2431, "step": 37596 }, { "epoch": 3.0457712248865847, "grad_norm": 0.0632273405790329, "learning_rate": 5.301318691210225e-05, "loss": 0.1874, "step": 37597 }, { "epoch": 3.045852235904083, "grad_norm": 0.09335504472255707, "learning_rate": 5.300868625950763e-05, "loss": 0.2396, "step": 37598 }, { "epoch": 3.045933246921581, "grad_norm": 0.06783024221658707, "learning_rate": 5.300418560691301e-05, "loss": 0.232, "step": 37599 }, { "epoch": 3.04601425793908, "grad_norm": 0.060017917305231094, "learning_rate": 5.2999684954318374e-05, "loss": 0.1993, "step": 37600 }, { "epoch": 3.046095268956578, "grad_norm": 0.08943045884370804, "learning_rate": 5.2995184301723754e-05, "loss": 0.2238, "step": 37601 }, { "epoch": 3.0461762799740764, "grad_norm": 0.07300959527492523, "learning_rate": 5.299068364912913e-05, "loss": 0.2396, "step": 37602 }, { "epoch": 3.046257290991575, "grad_norm": 0.0776297077536583, "learning_rate": 5.2986182996534495e-05, "loss": 0.2241, "step": 37603 }, { "epoch": 3.0463383020090733, "grad_norm": 0.057680848985910416, "learning_rate": 5.2981682343939875e-05, "loss": 0.2218, "step": 37604 }, { "epoch": 3.0464193130265715, "grad_norm": 0.06870376318693161, "learning_rate": 5.297718169134525e-05, "loss": 0.2368, "step": 37605 }, { "epoch": 3.04650032404407, "grad_norm": 0.07908564060926437, "learning_rate": 5.297268103875063e-05, "loss": 0.2257, "step": 37606 }, { "epoch": 3.0465813350615685, "grad_norm": 0.0836765468120575, "learning_rate": 5.2968180386155996e-05, "loss": 0.1985, "step": 37607 }, { "epoch": 3.0466623460790667, "grad_norm": 0.07887356728315353, "learning_rate": 5.296367973356137e-05, "loss": 0.2279, "step": 37608 }, { "epoch": 3.046743357096565, "grad_norm": 0.07012686133384705, "learning_rate": 5.295917908096675e-05, "loss": 0.2389, "step": 37609 }, { "epoch": 3.0468243681140637, "grad_norm": 0.07311613857746124, "learning_rate": 5.295467842837212e-05, "loss": 0.2198, "step": 37610 }, { "epoch": 3.046905379131562, "grad_norm": 0.08131827414035797, "learning_rate": 5.295017777577749e-05, "loss": 0.2551, "step": 37611 }, { "epoch": 3.04698639014906, "grad_norm": 0.056082744151353836, "learning_rate": 5.294567712318287e-05, "loss": 0.1839, "step": 37612 }, { "epoch": 3.047067401166559, "grad_norm": 0.06888628751039505, "learning_rate": 5.294117647058824e-05, "loss": 0.1866, "step": 37613 }, { "epoch": 3.047148412184057, "grad_norm": 0.07252807915210724, "learning_rate": 5.293667581799361e-05, "loss": 0.1902, "step": 37614 }, { "epoch": 3.0472294232015553, "grad_norm": 0.07436560094356537, "learning_rate": 5.293217516539899e-05, "loss": 0.2341, "step": 37615 }, { "epoch": 3.047310434219054, "grad_norm": 0.05969003215432167, "learning_rate": 5.292767451280436e-05, "loss": 0.2154, "step": 37616 }, { "epoch": 3.0473914452365523, "grad_norm": 0.05967477336525917, "learning_rate": 5.292317386020973e-05, "loss": 0.2088, "step": 37617 }, { "epoch": 3.0474724562540505, "grad_norm": 0.06089045852422714, "learning_rate": 5.291867320761511e-05, "loss": 0.2153, "step": 37618 }, { "epoch": 3.0475534672715487, "grad_norm": 0.0729355439543724, "learning_rate": 5.291417255502048e-05, "loss": 0.1967, "step": 37619 }, { "epoch": 3.0476344782890474, "grad_norm": 0.06276184320449829, "learning_rate": 5.290967190242585e-05, "loss": 0.2377, "step": 37620 }, { "epoch": 3.0477154893065457, "grad_norm": 0.06968085467815399, "learning_rate": 5.290517124983123e-05, "loss": 0.2665, "step": 37621 }, { "epoch": 3.047796500324044, "grad_norm": 0.06097818538546562, "learning_rate": 5.29006705972366e-05, "loss": 0.2161, "step": 37622 }, { "epoch": 3.0478775113415426, "grad_norm": 0.07302144169807434, "learning_rate": 5.2896169944641973e-05, "loss": 0.2855, "step": 37623 }, { "epoch": 3.047958522359041, "grad_norm": 0.07345126569271088, "learning_rate": 5.2891669292047354e-05, "loss": 0.2534, "step": 37624 }, { "epoch": 3.048039533376539, "grad_norm": 0.052877724170684814, "learning_rate": 5.288716863945272e-05, "loss": 0.2006, "step": 37625 }, { "epoch": 3.048120544394038, "grad_norm": 0.0753074586391449, "learning_rate": 5.2882667986858094e-05, "loss": 0.2258, "step": 37626 }, { "epoch": 3.048201555411536, "grad_norm": 0.07556282728910446, "learning_rate": 5.2878167334263475e-05, "loss": 0.2416, "step": 37627 }, { "epoch": 3.0482825664290343, "grad_norm": 0.06304620951414108, "learning_rate": 5.287366668166884e-05, "loss": 0.2146, "step": 37628 }, { "epoch": 3.0483635774465325, "grad_norm": 0.08251237124204636, "learning_rate": 5.2869166029074215e-05, "loss": 0.2211, "step": 37629 }, { "epoch": 3.048444588464031, "grad_norm": 0.08227784186601639, "learning_rate": 5.2864665376479595e-05, "loss": 0.2459, "step": 37630 }, { "epoch": 3.0485255994815295, "grad_norm": 0.07222383469343185, "learning_rate": 5.286016472388496e-05, "loss": 0.2076, "step": 37631 }, { "epoch": 3.0486066104990277, "grad_norm": 0.08086186647415161, "learning_rate": 5.285566407129034e-05, "loss": 0.3006, "step": 37632 }, { "epoch": 3.0486876215165264, "grad_norm": 0.07935461401939392, "learning_rate": 5.2851163418695716e-05, "loss": 0.2355, "step": 37633 }, { "epoch": 3.0487686325340246, "grad_norm": 0.07919391989707947, "learning_rate": 5.284666276610108e-05, "loss": 0.197, "step": 37634 }, { "epoch": 3.048849643551523, "grad_norm": 0.08551984280347824, "learning_rate": 5.2842162113506463e-05, "loss": 0.2705, "step": 37635 }, { "epoch": 3.0489306545690216, "grad_norm": 0.06743234395980835, "learning_rate": 5.283766146091184e-05, "loss": 0.2522, "step": 37636 }, { "epoch": 3.04901166558652, "grad_norm": 0.06722062081098557, "learning_rate": 5.2833160808317204e-05, "loss": 0.2982, "step": 37637 }, { "epoch": 3.049092676604018, "grad_norm": 0.0709279254078865, "learning_rate": 5.2828660155722584e-05, "loss": 0.2492, "step": 37638 }, { "epoch": 3.0491736876215167, "grad_norm": 0.06910178810358047, "learning_rate": 5.282415950312796e-05, "loss": 0.2369, "step": 37639 }, { "epoch": 3.049254698639015, "grad_norm": 0.06689421832561493, "learning_rate": 5.2819658850533325e-05, "loss": 0.2122, "step": 37640 }, { "epoch": 3.0493357096565132, "grad_norm": 0.07600487023591995, "learning_rate": 5.2815158197938705e-05, "loss": 0.2316, "step": 37641 }, { "epoch": 3.0494167206740115, "grad_norm": 0.07255223393440247, "learning_rate": 5.281065754534408e-05, "loss": 0.2154, "step": 37642 }, { "epoch": 3.04949773169151, "grad_norm": 0.06639236211776733, "learning_rate": 5.2806156892749446e-05, "loss": 0.1957, "step": 37643 }, { "epoch": 3.0495787427090084, "grad_norm": 0.07258108258247375, "learning_rate": 5.2801656240154826e-05, "loss": 0.2397, "step": 37644 }, { "epoch": 3.0496597537265067, "grad_norm": 0.0692557692527771, "learning_rate": 5.27971555875602e-05, "loss": 0.2283, "step": 37645 }, { "epoch": 3.0497407647440054, "grad_norm": 0.0779244527220726, "learning_rate": 5.2792654934965566e-05, "loss": 0.2222, "step": 37646 }, { "epoch": 3.0498217757615036, "grad_norm": 0.07220424711704254, "learning_rate": 5.278815428237095e-05, "loss": 0.2353, "step": 37647 }, { "epoch": 3.049902786779002, "grad_norm": 0.06793083995580673, "learning_rate": 5.278365362977632e-05, "loss": 0.2437, "step": 37648 }, { "epoch": 3.0499837977965005, "grad_norm": 0.08438190072774887, "learning_rate": 5.277915297718169e-05, "loss": 0.2031, "step": 37649 }, { "epoch": 3.0500648088139988, "grad_norm": 0.07985883951187134, "learning_rate": 5.277465232458707e-05, "loss": 0.2314, "step": 37650 }, { "epoch": 3.050145819831497, "grad_norm": 0.06336715817451477, "learning_rate": 5.277015167199244e-05, "loss": 0.2329, "step": 37651 }, { "epoch": 3.0502268308489953, "grad_norm": 0.08432844281196594, "learning_rate": 5.276565101939781e-05, "loss": 0.239, "step": 37652 }, { "epoch": 3.050307841866494, "grad_norm": 0.07463747262954712, "learning_rate": 5.276115036680319e-05, "loss": 0.2422, "step": 37653 }, { "epoch": 3.050388852883992, "grad_norm": 0.06991366297006607, "learning_rate": 5.275664971420856e-05, "loss": 0.1971, "step": 37654 }, { "epoch": 3.0504698639014904, "grad_norm": 0.07708337903022766, "learning_rate": 5.275214906161393e-05, "loss": 0.2578, "step": 37655 }, { "epoch": 3.050550874918989, "grad_norm": 0.0805683434009552, "learning_rate": 5.274764840901931e-05, "loss": 0.2059, "step": 37656 }, { "epoch": 3.0506318859364874, "grad_norm": 0.07840461283922195, "learning_rate": 5.274314775642468e-05, "loss": 0.2351, "step": 37657 }, { "epoch": 3.0507128969539856, "grad_norm": 0.08233583718538284, "learning_rate": 5.273864710383006e-05, "loss": 0.2095, "step": 37658 }, { "epoch": 3.0507939079714843, "grad_norm": 0.06016426905989647, "learning_rate": 5.273414645123543e-05, "loss": 0.2084, "step": 37659 }, { "epoch": 3.0508749189889826, "grad_norm": 0.07174959033727646, "learning_rate": 5.2729645798640804e-05, "loss": 0.2509, "step": 37660 }, { "epoch": 3.050955930006481, "grad_norm": 0.07257946580648422, "learning_rate": 5.2725145146046184e-05, "loss": 0.2225, "step": 37661 }, { "epoch": 3.051036941023979, "grad_norm": 0.06079892814159393, "learning_rate": 5.272064449345156e-05, "loss": 0.212, "step": 37662 }, { "epoch": 3.0511179520414777, "grad_norm": 0.07712015509605408, "learning_rate": 5.2716143840856924e-05, "loss": 0.2023, "step": 37663 }, { "epoch": 3.051198963058976, "grad_norm": 0.06289387494325638, "learning_rate": 5.2711643188262305e-05, "loss": 0.1957, "step": 37664 }, { "epoch": 3.051279974076474, "grad_norm": 0.07708357274532318, "learning_rate": 5.270714253566768e-05, "loss": 0.2109, "step": 37665 }, { "epoch": 3.051360985093973, "grad_norm": 0.07875526696443558, "learning_rate": 5.2702641883073045e-05, "loss": 0.2204, "step": 37666 }, { "epoch": 3.051441996111471, "grad_norm": 0.06272130459547043, "learning_rate": 5.2698141230478426e-05, "loss": 0.2099, "step": 37667 }, { "epoch": 3.0515230071289694, "grad_norm": 0.07556160539388657, "learning_rate": 5.26936405778838e-05, "loss": 0.1985, "step": 37668 }, { "epoch": 3.051604018146468, "grad_norm": 0.0975421816110611, "learning_rate": 5.2689139925289166e-05, "loss": 0.2284, "step": 37669 }, { "epoch": 3.0516850291639663, "grad_norm": 0.0759001150727272, "learning_rate": 5.2684639272694546e-05, "loss": 0.2443, "step": 37670 }, { "epoch": 3.0517660401814646, "grad_norm": 0.06809696555137634, "learning_rate": 5.268013862009992e-05, "loss": 0.2292, "step": 37671 }, { "epoch": 3.0518470511989633, "grad_norm": 0.08246435970067978, "learning_rate": 5.267563796750529e-05, "loss": 0.2386, "step": 37672 }, { "epoch": 3.0519280622164615, "grad_norm": 0.07349836081266403, "learning_rate": 5.267113731491067e-05, "loss": 0.2198, "step": 37673 }, { "epoch": 3.0520090732339598, "grad_norm": 0.0644262284040451, "learning_rate": 5.266663666231604e-05, "loss": 0.2131, "step": 37674 }, { "epoch": 3.052090084251458, "grad_norm": 0.08209548890590668, "learning_rate": 5.266213600972141e-05, "loss": 0.2471, "step": 37675 }, { "epoch": 3.0521710952689567, "grad_norm": 0.07829367369413376, "learning_rate": 5.265763535712679e-05, "loss": 0.29, "step": 37676 }, { "epoch": 3.052252106286455, "grad_norm": 0.07859180122613907, "learning_rate": 5.265313470453216e-05, "loss": 0.2662, "step": 37677 }, { "epoch": 3.052333117303953, "grad_norm": 0.08331391215324402, "learning_rate": 5.264863405193753e-05, "loss": 0.2649, "step": 37678 }, { "epoch": 3.052414128321452, "grad_norm": 0.06476303935050964, "learning_rate": 5.264413339934291e-05, "loss": 0.2167, "step": 37679 }, { "epoch": 3.05249513933895, "grad_norm": 0.08074692636728287, "learning_rate": 5.263963274674828e-05, "loss": 0.2791, "step": 37680 }, { "epoch": 3.0525761503564484, "grad_norm": 0.06423210352659225, "learning_rate": 5.263513209415365e-05, "loss": 0.2381, "step": 37681 }, { "epoch": 3.052657161373947, "grad_norm": 0.07402287423610687, "learning_rate": 5.263063144155903e-05, "loss": 0.2468, "step": 37682 }, { "epoch": 3.0527381723914453, "grad_norm": 0.06496645510196686, "learning_rate": 5.26261307889644e-05, "loss": 0.2047, "step": 37683 }, { "epoch": 3.0528191834089435, "grad_norm": 0.06942404806613922, "learning_rate": 5.2621630136369784e-05, "loss": 0.2579, "step": 37684 }, { "epoch": 3.052900194426442, "grad_norm": 0.07186678797006607, "learning_rate": 5.261712948377515e-05, "loss": 0.2149, "step": 37685 }, { "epoch": 3.0529812054439405, "grad_norm": 0.06583134829998016, "learning_rate": 5.2612628831180524e-05, "loss": 0.2711, "step": 37686 }, { "epoch": 3.0530622164614387, "grad_norm": 0.0710592195391655, "learning_rate": 5.2608128178585904e-05, "loss": 0.2618, "step": 37687 }, { "epoch": 3.053143227478937, "grad_norm": 0.0674964115023613, "learning_rate": 5.260362752599127e-05, "loss": 0.2102, "step": 37688 }, { "epoch": 3.0532242384964356, "grad_norm": 0.06882596760988235, "learning_rate": 5.2599126873396645e-05, "loss": 0.2408, "step": 37689 }, { "epoch": 3.053305249513934, "grad_norm": 0.06895139813423157, "learning_rate": 5.2594626220802025e-05, "loss": 0.2427, "step": 37690 }, { "epoch": 3.053386260531432, "grad_norm": 0.07730741798877716, "learning_rate": 5.259012556820739e-05, "loss": 0.2335, "step": 37691 }, { "epoch": 3.053467271548931, "grad_norm": 0.07082430273294449, "learning_rate": 5.2585624915612766e-05, "loss": 0.2354, "step": 37692 }, { "epoch": 3.053548282566429, "grad_norm": 0.08576175570487976, "learning_rate": 5.2581124263018146e-05, "loss": 0.2319, "step": 37693 }, { "epoch": 3.0536292935839273, "grad_norm": 0.0777260810136795, "learning_rate": 5.257662361042351e-05, "loss": 0.2478, "step": 37694 }, { "epoch": 3.053710304601426, "grad_norm": 0.08391319215297699, "learning_rate": 5.2572122957828886e-05, "loss": 0.2181, "step": 37695 }, { "epoch": 3.0537913156189243, "grad_norm": 0.08710380643606186, "learning_rate": 5.256762230523427e-05, "loss": 0.2079, "step": 37696 }, { "epoch": 3.0538723266364225, "grad_norm": 0.08606188744306564, "learning_rate": 5.2563121652639634e-05, "loss": 0.2813, "step": 37697 }, { "epoch": 3.0539533376539207, "grad_norm": 0.06323255598545074, "learning_rate": 5.255862100004501e-05, "loss": 0.2385, "step": 37698 }, { "epoch": 3.0540343486714194, "grad_norm": 0.0727868601679802, "learning_rate": 5.255412034745039e-05, "loss": 0.2218, "step": 37699 }, { "epoch": 3.0541153596889177, "grad_norm": 0.1189265325665474, "learning_rate": 5.2549619694855754e-05, "loss": 0.2413, "step": 37700 }, { "epoch": 3.054196370706416, "grad_norm": 0.06921849399805069, "learning_rate": 5.254511904226113e-05, "loss": 0.2167, "step": 37701 }, { "epoch": 3.0542773817239146, "grad_norm": 0.07509531080722809, "learning_rate": 5.254061838966651e-05, "loss": 0.2447, "step": 37702 }, { "epoch": 3.054358392741413, "grad_norm": 0.08192278444766998, "learning_rate": 5.2536117737071875e-05, "loss": 0.244, "step": 37703 }, { "epoch": 3.054439403758911, "grad_norm": 0.07004018872976303, "learning_rate": 5.253161708447725e-05, "loss": 0.2455, "step": 37704 }, { "epoch": 3.05452041477641, "grad_norm": 0.07350824028253555, "learning_rate": 5.252711643188263e-05, "loss": 0.206, "step": 37705 }, { "epoch": 3.054601425793908, "grad_norm": 0.07443010807037354, "learning_rate": 5.2522615779287996e-05, "loss": 0.2564, "step": 37706 }, { "epoch": 3.0546824368114063, "grad_norm": 0.0812426209449768, "learning_rate": 5.251811512669337e-05, "loss": 0.2325, "step": 37707 }, { "epoch": 3.0547634478289045, "grad_norm": 0.06436657160520554, "learning_rate": 5.251361447409875e-05, "loss": 0.2336, "step": 37708 }, { "epoch": 3.054844458846403, "grad_norm": 0.08351697772741318, "learning_rate": 5.250911382150412e-05, "loss": 0.2314, "step": 37709 }, { "epoch": 3.0549254698639015, "grad_norm": 0.06691282987594604, "learning_rate": 5.250461316890949e-05, "loss": 0.2598, "step": 37710 }, { "epoch": 3.0550064808813997, "grad_norm": 0.07169311493635178, "learning_rate": 5.250011251631487e-05, "loss": 0.2592, "step": 37711 }, { "epoch": 3.0550874918988984, "grad_norm": 0.06714262813329697, "learning_rate": 5.249561186372024e-05, "loss": 0.2298, "step": 37712 }, { "epoch": 3.0551685029163966, "grad_norm": 0.07606201618909836, "learning_rate": 5.249111121112562e-05, "loss": 0.2161, "step": 37713 }, { "epoch": 3.055249513933895, "grad_norm": 0.07866179943084717, "learning_rate": 5.248661055853099e-05, "loss": 0.2689, "step": 37714 }, { "epoch": 3.0553305249513936, "grad_norm": 0.07774538546800613, "learning_rate": 5.248210990593636e-05, "loss": 0.2358, "step": 37715 }, { "epoch": 3.055411535968892, "grad_norm": 0.07213099300861359, "learning_rate": 5.247760925334174e-05, "loss": 0.2341, "step": 37716 }, { "epoch": 3.05549254698639, "grad_norm": 0.08474995940923691, "learning_rate": 5.247310860074711e-05, "loss": 0.2034, "step": 37717 }, { "epoch": 3.0555735580038887, "grad_norm": 0.07596404105424881, "learning_rate": 5.246860794815248e-05, "loss": 0.1956, "step": 37718 }, { "epoch": 3.055654569021387, "grad_norm": 0.06409193575382233, "learning_rate": 5.246410729555786e-05, "loss": 0.2006, "step": 37719 }, { "epoch": 3.0557355800388852, "grad_norm": 0.06833213567733765, "learning_rate": 5.245960664296323e-05, "loss": 0.2181, "step": 37720 }, { "epoch": 3.0558165910563835, "grad_norm": 0.06813128292560577, "learning_rate": 5.24551059903686e-05, "loss": 0.2598, "step": 37721 }, { "epoch": 3.055897602073882, "grad_norm": 0.06665215641260147, "learning_rate": 5.245060533777398e-05, "loss": 0.221, "step": 37722 }, { "epoch": 3.0559786130913804, "grad_norm": 0.08951111137866974, "learning_rate": 5.2446104685179354e-05, "loss": 0.2169, "step": 37723 }, { "epoch": 3.0560596241088787, "grad_norm": 0.0698397308588028, "learning_rate": 5.244160403258472e-05, "loss": 0.2467, "step": 37724 }, { "epoch": 3.0561406351263773, "grad_norm": 0.07880235463380814, "learning_rate": 5.24371033799901e-05, "loss": 0.2124, "step": 37725 }, { "epoch": 3.0562216461438756, "grad_norm": 0.0704299658536911, "learning_rate": 5.2432602727395475e-05, "loss": 0.2013, "step": 37726 }, { "epoch": 3.056302657161374, "grad_norm": 0.06827425956726074, "learning_rate": 5.242810207480084e-05, "loss": 0.2079, "step": 37727 }, { "epoch": 3.0563836681788725, "grad_norm": 0.05798809602856636, "learning_rate": 5.242360142220622e-05, "loss": 0.1942, "step": 37728 }, { "epoch": 3.0564646791963708, "grad_norm": 0.07702101767063141, "learning_rate": 5.2419100769611596e-05, "loss": 0.24, "step": 37729 }, { "epoch": 3.056545690213869, "grad_norm": 0.06647029519081116, "learning_rate": 5.241460011701696e-05, "loss": 0.2744, "step": 37730 }, { "epoch": 3.0566267012313673, "grad_norm": 0.06671483814716339, "learning_rate": 5.241009946442235e-05, "loss": 0.1994, "step": 37731 }, { "epoch": 3.056707712248866, "grad_norm": 0.08495612442493439, "learning_rate": 5.2405598811827717e-05, "loss": 0.2208, "step": 37732 }, { "epoch": 3.056788723266364, "grad_norm": 0.06984516978263855, "learning_rate": 5.240109815923308e-05, "loss": 0.2391, "step": 37733 }, { "epoch": 3.0568697342838624, "grad_norm": 0.07440478354692459, "learning_rate": 5.239659750663847e-05, "loss": 0.2529, "step": 37734 }, { "epoch": 3.056950745301361, "grad_norm": 0.06642437726259232, "learning_rate": 5.239209685404384e-05, "loss": 0.2556, "step": 37735 }, { "epoch": 3.0570317563188594, "grad_norm": 0.07040982693433762, "learning_rate": 5.2387596201449204e-05, "loss": 0.2275, "step": 37736 }, { "epoch": 3.0571127673363576, "grad_norm": 0.08027375489473343, "learning_rate": 5.238309554885459e-05, "loss": 0.2447, "step": 37737 }, { "epoch": 3.0571937783538563, "grad_norm": 0.06435451656579971, "learning_rate": 5.237859489625996e-05, "loss": 0.2394, "step": 37738 }, { "epoch": 3.0572747893713546, "grad_norm": 0.06844855844974518, "learning_rate": 5.237409424366534e-05, "loss": 0.2332, "step": 37739 }, { "epoch": 3.057355800388853, "grad_norm": 0.07512935996055603, "learning_rate": 5.236959359107071e-05, "loss": 0.2268, "step": 37740 }, { "epoch": 3.057436811406351, "grad_norm": 0.06621728837490082, "learning_rate": 5.236509293847608e-05, "loss": 0.2143, "step": 37741 }, { "epoch": 3.0575178224238497, "grad_norm": 0.06908552348613739, "learning_rate": 5.236059228588146e-05, "loss": 0.2153, "step": 37742 }, { "epoch": 3.057598833441348, "grad_norm": 0.06851686537265778, "learning_rate": 5.235609163328683e-05, "loss": 0.2243, "step": 37743 }, { "epoch": 3.057679844458846, "grad_norm": 0.0707482248544693, "learning_rate": 5.23515909806922e-05, "loss": 0.225, "step": 37744 }, { "epoch": 3.057760855476345, "grad_norm": 0.06813740730285645, "learning_rate": 5.234709032809758e-05, "loss": 0.2526, "step": 37745 }, { "epoch": 3.057841866493843, "grad_norm": 0.07044027745723724, "learning_rate": 5.2342589675502954e-05, "loss": 0.245, "step": 37746 }, { "epoch": 3.0579228775113414, "grad_norm": 0.07610490918159485, "learning_rate": 5.233808902290832e-05, "loss": 0.2738, "step": 37747 }, { "epoch": 3.05800388852884, "grad_norm": 0.07336747646331787, "learning_rate": 5.23335883703137e-05, "loss": 0.2617, "step": 37748 }, { "epoch": 3.0580848995463383, "grad_norm": 0.06932365894317627, "learning_rate": 5.2329087717719075e-05, "loss": 0.213, "step": 37749 }, { "epoch": 3.0581659105638366, "grad_norm": 0.06575492769479752, "learning_rate": 5.232458706512444e-05, "loss": 0.2413, "step": 37750 }, { "epoch": 3.0582469215813353, "grad_norm": 0.07268187403678894, "learning_rate": 5.232008641252982e-05, "loss": 0.2193, "step": 37751 }, { "epoch": 3.0583279325988335, "grad_norm": 0.08104255795478821, "learning_rate": 5.2315585759935195e-05, "loss": 0.2199, "step": 37752 }, { "epoch": 3.0584089436163318, "grad_norm": 0.06563001126050949, "learning_rate": 5.231108510734056e-05, "loss": 0.2283, "step": 37753 }, { "epoch": 3.05848995463383, "grad_norm": 0.076548732817173, "learning_rate": 5.230658445474594e-05, "loss": 0.2182, "step": 37754 }, { "epoch": 3.0585709656513287, "grad_norm": 0.0830010399222374, "learning_rate": 5.2302083802151316e-05, "loss": 0.2069, "step": 37755 }, { "epoch": 3.058651976668827, "grad_norm": 0.07072506099939346, "learning_rate": 5.229758314955668e-05, "loss": 0.2466, "step": 37756 }, { "epoch": 3.058732987686325, "grad_norm": 0.06200630962848663, "learning_rate": 5.2293082496962063e-05, "loss": 0.1948, "step": 37757 }, { "epoch": 3.058813998703824, "grad_norm": 0.06687376648187637, "learning_rate": 5.228858184436744e-05, "loss": 0.2422, "step": 37758 }, { "epoch": 3.058895009721322, "grad_norm": 0.05735126882791519, "learning_rate": 5.2284081191772804e-05, "loss": 0.1972, "step": 37759 }, { "epoch": 3.0589760207388204, "grad_norm": 0.08486035466194153, "learning_rate": 5.2279580539178184e-05, "loss": 0.2567, "step": 37760 }, { "epoch": 3.059057031756319, "grad_norm": 0.07064859569072723, "learning_rate": 5.227507988658356e-05, "loss": 0.2122, "step": 37761 }, { "epoch": 3.0591380427738173, "grad_norm": 0.0696304515004158, "learning_rate": 5.2270579233988925e-05, "loss": 0.2335, "step": 37762 }, { "epoch": 3.0592190537913155, "grad_norm": 0.06331910192966461, "learning_rate": 5.2266078581394305e-05, "loss": 0.2119, "step": 37763 }, { "epoch": 3.059300064808814, "grad_norm": 0.06532147526741028, "learning_rate": 5.226157792879968e-05, "loss": 0.1845, "step": 37764 }, { "epoch": 3.0593810758263125, "grad_norm": 0.0645311176776886, "learning_rate": 5.225707727620506e-05, "loss": 0.2216, "step": 37765 }, { "epoch": 3.0594620868438107, "grad_norm": 0.08908110111951828, "learning_rate": 5.2252576623610426e-05, "loss": 0.231, "step": 37766 }, { "epoch": 3.059543097861309, "grad_norm": 0.07068745046854019, "learning_rate": 5.22480759710158e-05, "loss": 0.1985, "step": 37767 }, { "epoch": 3.0596241088788076, "grad_norm": 0.07705529779195786, "learning_rate": 5.224357531842118e-05, "loss": 0.2223, "step": 37768 }, { "epoch": 3.059705119896306, "grad_norm": 0.11516771465539932, "learning_rate": 5.223907466582655e-05, "loss": 0.2384, "step": 37769 }, { "epoch": 3.059786130913804, "grad_norm": 0.08792196959257126, "learning_rate": 5.223457401323192e-05, "loss": 0.2319, "step": 37770 }, { "epoch": 3.059867141931303, "grad_norm": 0.07994337379932404, "learning_rate": 5.22300733606373e-05, "loss": 0.2648, "step": 37771 }, { "epoch": 3.059948152948801, "grad_norm": 0.07762891799211502, "learning_rate": 5.222557270804267e-05, "loss": 0.2191, "step": 37772 }, { "epoch": 3.0600291639662993, "grad_norm": 0.07052240520715714, "learning_rate": 5.222107205544804e-05, "loss": 0.2416, "step": 37773 }, { "epoch": 3.060110174983798, "grad_norm": 0.06587530672550201, "learning_rate": 5.221657140285342e-05, "loss": 0.2163, "step": 37774 }, { "epoch": 3.0601911860012962, "grad_norm": 0.06059986352920532, "learning_rate": 5.221207075025879e-05, "loss": 0.1863, "step": 37775 }, { "epoch": 3.0602721970187945, "grad_norm": 0.07946839928627014, "learning_rate": 5.220757009766416e-05, "loss": 0.223, "step": 37776 }, { "epoch": 3.0603532080362927, "grad_norm": 0.06629893183708191, "learning_rate": 5.220306944506954e-05, "loss": 0.2106, "step": 37777 }, { "epoch": 3.0604342190537914, "grad_norm": 0.07502514123916626, "learning_rate": 5.219856879247491e-05, "loss": 0.2046, "step": 37778 }, { "epoch": 3.0605152300712897, "grad_norm": 0.06490511447191238, "learning_rate": 5.219406813988028e-05, "loss": 0.2268, "step": 37779 }, { "epoch": 3.060596241088788, "grad_norm": 0.06450946629047394, "learning_rate": 5.218956748728566e-05, "loss": 0.2163, "step": 37780 }, { "epoch": 3.0606772521062866, "grad_norm": 0.0701812207698822, "learning_rate": 5.218506683469103e-05, "loss": 0.2466, "step": 37781 }, { "epoch": 3.060758263123785, "grad_norm": 0.07255059480667114, "learning_rate": 5.2180566182096403e-05, "loss": 0.2308, "step": 37782 }, { "epoch": 3.060839274141283, "grad_norm": 0.05262809619307518, "learning_rate": 5.2176065529501784e-05, "loss": 0.198, "step": 37783 }, { "epoch": 3.060920285158782, "grad_norm": 0.06464151293039322, "learning_rate": 5.217156487690715e-05, "loss": 0.2412, "step": 37784 }, { "epoch": 3.06100129617628, "grad_norm": 0.06939256191253662, "learning_rate": 5.2167064224312524e-05, "loss": 0.2188, "step": 37785 }, { "epoch": 3.0610823071937783, "grad_norm": 0.06600762903690338, "learning_rate": 5.2162563571717905e-05, "loss": 0.2393, "step": 37786 }, { "epoch": 3.0611633182112765, "grad_norm": 0.08542144298553467, "learning_rate": 5.215806291912327e-05, "loss": 0.2108, "step": 37787 }, { "epoch": 3.061244329228775, "grad_norm": 0.0636884793639183, "learning_rate": 5.2153562266528645e-05, "loss": 0.2294, "step": 37788 }, { "epoch": 3.0613253402462735, "grad_norm": 0.07961232215166092, "learning_rate": 5.2149061613934025e-05, "loss": 0.247, "step": 37789 }, { "epoch": 3.0614063512637717, "grad_norm": 0.06676743924617767, "learning_rate": 5.214456096133939e-05, "loss": 0.2302, "step": 37790 }, { "epoch": 3.0614873622812704, "grad_norm": 0.06724625080823898, "learning_rate": 5.214006030874477e-05, "loss": 0.2113, "step": 37791 }, { "epoch": 3.0615683732987686, "grad_norm": 0.062377527356147766, "learning_rate": 5.2135559656150146e-05, "loss": 0.2459, "step": 37792 }, { "epoch": 3.061649384316267, "grad_norm": 0.08417245745658875, "learning_rate": 5.213105900355551e-05, "loss": 0.2, "step": 37793 }, { "epoch": 3.0617303953337656, "grad_norm": 0.0867442935705185, "learning_rate": 5.2126558350960894e-05, "loss": 0.2331, "step": 37794 }, { "epoch": 3.061811406351264, "grad_norm": 0.07381570339202881, "learning_rate": 5.212205769836627e-05, "loss": 0.2123, "step": 37795 }, { "epoch": 3.061892417368762, "grad_norm": 0.08096037805080414, "learning_rate": 5.2117557045771634e-05, "loss": 0.2016, "step": 37796 }, { "epoch": 3.0619734283862607, "grad_norm": 0.07170046865940094, "learning_rate": 5.2113056393177014e-05, "loss": 0.218, "step": 37797 }, { "epoch": 3.062054439403759, "grad_norm": 0.06569628417491913, "learning_rate": 5.210855574058239e-05, "loss": 0.1975, "step": 37798 }, { "epoch": 3.0621354504212572, "grad_norm": 0.05782657116651535, "learning_rate": 5.2104055087987755e-05, "loss": 0.1901, "step": 37799 }, { "epoch": 3.0622164614387555, "grad_norm": 0.07560217380523682, "learning_rate": 5.209955443539314e-05, "loss": 0.2263, "step": 37800 }, { "epoch": 3.062297472456254, "grad_norm": 0.07204508781433105, "learning_rate": 5.209505378279851e-05, "loss": 0.2422, "step": 37801 }, { "epoch": 3.0623784834737524, "grad_norm": 0.0661541149020195, "learning_rate": 5.2090553130203876e-05, "loss": 0.2322, "step": 37802 }, { "epoch": 3.0624594944912507, "grad_norm": 0.07273122668266296, "learning_rate": 5.208605247760926e-05, "loss": 0.2193, "step": 37803 }, { "epoch": 3.0625405055087493, "grad_norm": 0.0696941390633583, "learning_rate": 5.208155182501463e-05, "loss": 0.2121, "step": 37804 }, { "epoch": 3.0626215165262476, "grad_norm": 0.08383599668741226, "learning_rate": 5.2077051172419996e-05, "loss": 0.2512, "step": 37805 }, { "epoch": 3.062702527543746, "grad_norm": 0.06656774878501892, "learning_rate": 5.2072550519825384e-05, "loss": 0.209, "step": 37806 }, { "epoch": 3.0627835385612445, "grad_norm": 0.08085005730390549, "learning_rate": 5.206804986723075e-05, "loss": 0.2692, "step": 37807 }, { "epoch": 3.0628645495787428, "grad_norm": 0.07488963752985, "learning_rate": 5.206354921463612e-05, "loss": 0.2244, "step": 37808 }, { "epoch": 3.062945560596241, "grad_norm": 0.06890054047107697, "learning_rate": 5.2059048562041504e-05, "loss": 0.1915, "step": 37809 }, { "epoch": 3.0630265716137393, "grad_norm": 0.06550314277410507, "learning_rate": 5.205454790944687e-05, "loss": 0.2211, "step": 37810 }, { "epoch": 3.063107582631238, "grad_norm": 0.054814115166664124, "learning_rate": 5.205004725685224e-05, "loss": 0.1907, "step": 37811 }, { "epoch": 3.063188593648736, "grad_norm": 0.061660099774599075, "learning_rate": 5.2045546604257625e-05, "loss": 0.2298, "step": 37812 }, { "epoch": 3.0632696046662344, "grad_norm": 0.07961427420377731, "learning_rate": 5.204104595166299e-05, "loss": 0.2413, "step": 37813 }, { "epoch": 3.063350615683733, "grad_norm": 0.08189357072114944, "learning_rate": 5.203654529906836e-05, "loss": 0.2183, "step": 37814 }, { "epoch": 3.0634316267012314, "grad_norm": 0.08027397841215134, "learning_rate": 5.2032044646473746e-05, "loss": 0.2233, "step": 37815 }, { "epoch": 3.0635126377187296, "grad_norm": 0.06822025775909424, "learning_rate": 5.202754399387911e-05, "loss": 0.2241, "step": 37816 }, { "epoch": 3.0635936487362283, "grad_norm": 0.06859682500362396, "learning_rate": 5.202304334128449e-05, "loss": 0.2298, "step": 37817 }, { "epoch": 3.0636746597537265, "grad_norm": 0.061326105147600174, "learning_rate": 5.201854268868987e-05, "loss": 0.28, "step": 37818 }, { "epoch": 3.063755670771225, "grad_norm": 0.08826600760221481, "learning_rate": 5.2014042036095234e-05, "loss": 0.2569, "step": 37819 }, { "epoch": 3.0638366817887235, "grad_norm": 0.07701018452644348, "learning_rate": 5.2009541383500614e-05, "loss": 0.2506, "step": 37820 }, { "epoch": 3.0639176928062217, "grad_norm": 0.07785700261592865, "learning_rate": 5.200504073090599e-05, "loss": 0.2383, "step": 37821 }, { "epoch": 3.06399870382372, "grad_norm": 0.08271851390600204, "learning_rate": 5.2000540078311354e-05, "loss": 0.2141, "step": 37822 }, { "epoch": 3.064079714841218, "grad_norm": 0.0652521550655365, "learning_rate": 5.1996039425716735e-05, "loss": 0.2539, "step": 37823 }, { "epoch": 3.064160725858717, "grad_norm": 0.07621287554502487, "learning_rate": 5.199153877312211e-05, "loss": 0.1846, "step": 37824 }, { "epoch": 3.064241736876215, "grad_norm": 0.07543626427650452, "learning_rate": 5.1987038120527475e-05, "loss": 0.2251, "step": 37825 }, { "epoch": 3.0643227478937134, "grad_norm": 0.07078852504491806, "learning_rate": 5.1982537467932856e-05, "loss": 0.2359, "step": 37826 }, { "epoch": 3.064403758911212, "grad_norm": 0.08174537867307663, "learning_rate": 5.197803681533823e-05, "loss": 0.2348, "step": 37827 }, { "epoch": 3.0644847699287103, "grad_norm": 0.06269580870866776, "learning_rate": 5.1973536162743596e-05, "loss": 0.2082, "step": 37828 }, { "epoch": 3.0645657809462086, "grad_norm": 0.07843272387981415, "learning_rate": 5.1969035510148976e-05, "loss": 0.2294, "step": 37829 }, { "epoch": 3.0646467919637073, "grad_norm": 0.08541741967201233, "learning_rate": 5.196453485755435e-05, "loss": 0.2572, "step": 37830 }, { "epoch": 3.0647278029812055, "grad_norm": 0.09261652082204819, "learning_rate": 5.196003420495972e-05, "loss": 0.2414, "step": 37831 }, { "epoch": 3.0648088139987038, "grad_norm": 0.06951161473989487, "learning_rate": 5.19555335523651e-05, "loss": 0.2247, "step": 37832 }, { "epoch": 3.064889825016202, "grad_norm": 0.0775570273399353, "learning_rate": 5.195103289977047e-05, "loss": 0.213, "step": 37833 }, { "epoch": 3.0649708360337007, "grad_norm": 0.05936042591929436, "learning_rate": 5.194653224717584e-05, "loss": 0.2352, "step": 37834 }, { "epoch": 3.065051847051199, "grad_norm": 0.07738978415727615, "learning_rate": 5.194203159458122e-05, "loss": 0.2336, "step": 37835 }, { "epoch": 3.065132858068697, "grad_norm": 0.07329297810792923, "learning_rate": 5.193753094198659e-05, "loss": 0.2307, "step": 37836 }, { "epoch": 3.065213869086196, "grad_norm": 0.06835595518350601, "learning_rate": 5.193303028939196e-05, "loss": 0.2438, "step": 37837 }, { "epoch": 3.065294880103694, "grad_norm": 0.07460511475801468, "learning_rate": 5.192852963679734e-05, "loss": 0.218, "step": 37838 }, { "epoch": 3.0653758911211924, "grad_norm": 0.06165440008044243, "learning_rate": 5.192402898420271e-05, "loss": 0.2262, "step": 37839 }, { "epoch": 3.065456902138691, "grad_norm": 0.07027935981750488, "learning_rate": 5.191952833160808e-05, "loss": 0.239, "step": 37840 }, { "epoch": 3.0655379131561893, "grad_norm": 0.0670817494392395, "learning_rate": 5.191502767901346e-05, "loss": 0.2223, "step": 37841 }, { "epoch": 3.0656189241736875, "grad_norm": 0.06680265814065933, "learning_rate": 5.191052702641883e-05, "loss": 0.2366, "step": 37842 }, { "epoch": 3.065699935191186, "grad_norm": 0.06441053748130798, "learning_rate": 5.1906026373824214e-05, "loss": 0.2357, "step": 37843 }, { "epoch": 3.0657809462086845, "grad_norm": 0.06061835587024689, "learning_rate": 5.190152572122958e-05, "loss": 0.2327, "step": 37844 }, { "epoch": 3.0658619572261827, "grad_norm": 0.06712067872285843, "learning_rate": 5.1897025068634954e-05, "loss": 0.2321, "step": 37845 }, { "epoch": 3.065942968243681, "grad_norm": 0.07448230683803558, "learning_rate": 5.1892524416040334e-05, "loss": 0.2567, "step": 37846 }, { "epoch": 3.0660239792611796, "grad_norm": 0.058865804225206375, "learning_rate": 5.18880237634457e-05, "loss": 0.214, "step": 37847 }, { "epoch": 3.066104990278678, "grad_norm": 0.07380334287881851, "learning_rate": 5.1883523110851075e-05, "loss": 0.2223, "step": 37848 }, { "epoch": 3.066186001296176, "grad_norm": 0.07105739414691925, "learning_rate": 5.1879022458256455e-05, "loss": 0.2171, "step": 37849 }, { "epoch": 3.066267012313675, "grad_norm": 0.0776592344045639, "learning_rate": 5.187452180566182e-05, "loss": 0.216, "step": 37850 }, { "epoch": 3.066348023331173, "grad_norm": 0.06981410086154938, "learning_rate": 5.1870021153067196e-05, "loss": 0.2408, "step": 37851 }, { "epoch": 3.0664290343486713, "grad_norm": 0.0790480226278305, "learning_rate": 5.1865520500472576e-05, "loss": 0.2486, "step": 37852 }, { "epoch": 3.06651004536617, "grad_norm": 0.06860355287790298, "learning_rate": 5.186101984787794e-05, "loss": 0.2365, "step": 37853 }, { "epoch": 3.0665910563836682, "grad_norm": 0.0746893510222435, "learning_rate": 5.1856519195283316e-05, "loss": 0.2285, "step": 37854 }, { "epoch": 3.0666720674011665, "grad_norm": 0.06371176242828369, "learning_rate": 5.18520185426887e-05, "loss": 0.2371, "step": 37855 }, { "epoch": 3.0667530784186647, "grad_norm": 0.0675220713019371, "learning_rate": 5.1847517890094064e-05, "loss": 0.2049, "step": 37856 }, { "epoch": 3.0668340894361634, "grad_norm": 0.0685059204697609, "learning_rate": 5.184301723749944e-05, "loss": 0.2191, "step": 37857 }, { "epoch": 3.0669151004536617, "grad_norm": 0.0811622142791748, "learning_rate": 5.183851658490482e-05, "loss": 0.2291, "step": 37858 }, { "epoch": 3.06699611147116, "grad_norm": 0.05773041024804115, "learning_rate": 5.1834015932310185e-05, "loss": 0.2369, "step": 37859 }, { "epoch": 3.0670771224886586, "grad_norm": 0.06531134992837906, "learning_rate": 5.182951527971556e-05, "loss": 0.2021, "step": 37860 }, { "epoch": 3.067158133506157, "grad_norm": 0.08389285206794739, "learning_rate": 5.182501462712094e-05, "loss": 0.2364, "step": 37861 }, { "epoch": 3.067239144523655, "grad_norm": 0.09492291510105133, "learning_rate": 5.1820513974526305e-05, "loss": 0.2315, "step": 37862 }, { "epoch": 3.067320155541154, "grad_norm": 0.08566474169492722, "learning_rate": 5.181601332193168e-05, "loss": 0.2212, "step": 37863 }, { "epoch": 3.067401166558652, "grad_norm": 0.0661972314119339, "learning_rate": 5.181151266933706e-05, "loss": 0.182, "step": 37864 }, { "epoch": 3.0674821775761503, "grad_norm": 0.08925847709178925, "learning_rate": 5.1807012016742426e-05, "loss": 0.221, "step": 37865 }, { "epoch": 3.067563188593649, "grad_norm": 0.06424117088317871, "learning_rate": 5.18025113641478e-05, "loss": 0.2518, "step": 37866 }, { "epoch": 3.067644199611147, "grad_norm": 0.0826725959777832, "learning_rate": 5.179801071155318e-05, "loss": 0.2195, "step": 37867 }, { "epoch": 3.0677252106286454, "grad_norm": 0.06489408016204834, "learning_rate": 5.179351005895855e-05, "loss": 0.2034, "step": 37868 }, { "epoch": 3.0678062216461437, "grad_norm": 0.06176425889134407, "learning_rate": 5.178900940636392e-05, "loss": 0.2564, "step": 37869 }, { "epoch": 3.0678872326636424, "grad_norm": 0.07508070766925812, "learning_rate": 5.17845087537693e-05, "loss": 0.2382, "step": 37870 }, { "epoch": 3.0679682436811406, "grad_norm": 0.06734276562929153, "learning_rate": 5.178000810117467e-05, "loss": 0.2312, "step": 37871 }, { "epoch": 3.068049254698639, "grad_norm": 0.06585001945495605, "learning_rate": 5.1775507448580055e-05, "loss": 0.2313, "step": 37872 }, { "epoch": 3.0681302657161376, "grad_norm": 0.0641934871673584, "learning_rate": 5.177100679598542e-05, "loss": 0.2136, "step": 37873 }, { "epoch": 3.068211276733636, "grad_norm": 0.07912000268697739, "learning_rate": 5.176650614339079e-05, "loss": 0.2236, "step": 37874 }, { "epoch": 3.068292287751134, "grad_norm": 0.07646586745977402, "learning_rate": 5.1762005490796176e-05, "loss": 0.2342, "step": 37875 }, { "epoch": 3.0683732987686327, "grad_norm": 0.06955553591251373, "learning_rate": 5.175750483820154e-05, "loss": 0.2141, "step": 37876 }, { "epoch": 3.068454309786131, "grad_norm": 0.06082908436655998, "learning_rate": 5.175300418560691e-05, "loss": 0.2045, "step": 37877 }, { "epoch": 3.0685353208036292, "grad_norm": 0.07860400527715683, "learning_rate": 5.1748503533012297e-05, "loss": 0.221, "step": 37878 }, { "epoch": 3.0686163318211275, "grad_norm": 0.06857936084270477, "learning_rate": 5.174400288041766e-05, "loss": 0.2142, "step": 37879 }, { "epoch": 3.068697342838626, "grad_norm": 0.06467515230178833, "learning_rate": 5.173950222782303e-05, "loss": 0.1947, "step": 37880 }, { "epoch": 3.0687783538561244, "grad_norm": 0.06888511031866074, "learning_rate": 5.173500157522842e-05, "loss": 0.2594, "step": 37881 }, { "epoch": 3.0688593648736227, "grad_norm": 0.06320245563983917, "learning_rate": 5.1730500922633784e-05, "loss": 0.207, "step": 37882 }, { "epoch": 3.0689403758911213, "grad_norm": 0.07228653877973557, "learning_rate": 5.172600027003916e-05, "loss": 0.2266, "step": 37883 }, { "epoch": 3.0690213869086196, "grad_norm": 0.0634072795510292, "learning_rate": 5.172149961744454e-05, "loss": 0.2282, "step": 37884 }, { "epoch": 3.069102397926118, "grad_norm": 0.09231677651405334, "learning_rate": 5.1716998964849905e-05, "loss": 0.2451, "step": 37885 }, { "epoch": 3.0691834089436165, "grad_norm": 0.062211230397224426, "learning_rate": 5.171249831225528e-05, "loss": 0.2275, "step": 37886 }, { "epoch": 3.0692644199611148, "grad_norm": 0.06925942003726959, "learning_rate": 5.170799765966066e-05, "loss": 0.2077, "step": 37887 }, { "epoch": 3.069345430978613, "grad_norm": 0.1082034781575203, "learning_rate": 5.1703497007066026e-05, "loss": 0.2093, "step": 37888 }, { "epoch": 3.0694264419961117, "grad_norm": 0.059054188430309296, "learning_rate": 5.16989963544714e-05, "loss": 0.2651, "step": 37889 }, { "epoch": 3.06950745301361, "grad_norm": 0.07227233797311783, "learning_rate": 5.169449570187678e-05, "loss": 0.2525, "step": 37890 }, { "epoch": 3.069588464031108, "grad_norm": 0.07376521080732346, "learning_rate": 5.1689995049282147e-05, "loss": 0.2542, "step": 37891 }, { "epoch": 3.0696694750486064, "grad_norm": 0.0684414729475975, "learning_rate": 5.168549439668752e-05, "loss": 0.229, "step": 37892 }, { "epoch": 3.069750486066105, "grad_norm": 0.06626005470752716, "learning_rate": 5.16809937440929e-05, "loss": 0.2198, "step": 37893 }, { "epoch": 3.0698314970836034, "grad_norm": 0.0789312869310379, "learning_rate": 5.167649309149827e-05, "loss": 0.2563, "step": 37894 }, { "epoch": 3.0699125081011016, "grad_norm": 0.06644661724567413, "learning_rate": 5.167199243890364e-05, "loss": 0.2453, "step": 37895 }, { "epoch": 3.0699935191186003, "grad_norm": 0.07774606347084045, "learning_rate": 5.166749178630902e-05, "loss": 0.2669, "step": 37896 }, { "epoch": 3.0700745301360985, "grad_norm": 0.09429129958152771, "learning_rate": 5.166299113371439e-05, "loss": 0.2276, "step": 37897 }, { "epoch": 3.070155541153597, "grad_norm": 0.07213341444730759, "learning_rate": 5.165849048111977e-05, "loss": 0.2385, "step": 37898 }, { "epoch": 3.0702365521710955, "grad_norm": 0.06322767585515976, "learning_rate": 5.165398982852514e-05, "loss": 0.2112, "step": 37899 }, { "epoch": 3.0703175631885937, "grad_norm": 0.0699484571814537, "learning_rate": 5.164948917593051e-05, "loss": 0.2288, "step": 37900 }, { "epoch": 3.070398574206092, "grad_norm": 0.06551605463027954, "learning_rate": 5.164498852333589e-05, "loss": 0.2175, "step": 37901 }, { "epoch": 3.07047958522359, "grad_norm": 0.0687117800116539, "learning_rate": 5.164048787074126e-05, "loss": 0.2439, "step": 37902 }, { "epoch": 3.070560596241089, "grad_norm": 0.07314037531614304, "learning_rate": 5.163598721814663e-05, "loss": 0.1995, "step": 37903 }, { "epoch": 3.070641607258587, "grad_norm": 0.08327914029359818, "learning_rate": 5.163148656555201e-05, "loss": 0.2242, "step": 37904 }, { "epoch": 3.0707226182760854, "grad_norm": 0.08092246204614639, "learning_rate": 5.1626985912957384e-05, "loss": 0.2712, "step": 37905 }, { "epoch": 3.070803629293584, "grad_norm": 0.062449127435684204, "learning_rate": 5.162248526036275e-05, "loss": 0.2368, "step": 37906 }, { "epoch": 3.0708846403110823, "grad_norm": 0.05750298872590065, "learning_rate": 5.161798460776813e-05, "loss": 0.2053, "step": 37907 }, { "epoch": 3.0709656513285806, "grad_norm": 0.05637628957629204, "learning_rate": 5.1613483955173505e-05, "loss": 0.2378, "step": 37908 }, { "epoch": 3.0710466623460793, "grad_norm": 0.07444218546152115, "learning_rate": 5.160898330257887e-05, "loss": 0.2127, "step": 37909 }, { "epoch": 3.0711276733635775, "grad_norm": 0.0685492604970932, "learning_rate": 5.160448264998425e-05, "loss": 0.2548, "step": 37910 }, { "epoch": 3.0712086843810757, "grad_norm": 0.0651240348815918, "learning_rate": 5.1599981997389625e-05, "loss": 0.2301, "step": 37911 }, { "epoch": 3.071289695398574, "grad_norm": 0.06461571156978607, "learning_rate": 5.159548134479499e-05, "loss": 0.2373, "step": 37912 }, { "epoch": 3.0713707064160727, "grad_norm": 0.0866527408361435, "learning_rate": 5.159098069220037e-05, "loss": 0.2355, "step": 37913 }, { "epoch": 3.071451717433571, "grad_norm": 0.06332384794950485, "learning_rate": 5.1586480039605746e-05, "loss": 0.2346, "step": 37914 }, { "epoch": 3.071532728451069, "grad_norm": 0.07270190864801407, "learning_rate": 5.158197938701111e-05, "loss": 0.2162, "step": 37915 }, { "epoch": 3.071613739468568, "grad_norm": 0.07732129096984863, "learning_rate": 5.1577478734416493e-05, "loss": 0.2001, "step": 37916 }, { "epoch": 3.071694750486066, "grad_norm": 0.0813266858458519, "learning_rate": 5.157297808182187e-05, "loss": 0.259, "step": 37917 }, { "epoch": 3.0717757615035644, "grad_norm": 0.07190661877393723, "learning_rate": 5.1568477429227234e-05, "loss": 0.2087, "step": 37918 }, { "epoch": 3.071856772521063, "grad_norm": 0.07515159994363785, "learning_rate": 5.1563976776632614e-05, "loss": 0.2316, "step": 37919 }, { "epoch": 3.0719377835385613, "grad_norm": 0.07636462152004242, "learning_rate": 5.155947612403799e-05, "loss": 0.2296, "step": 37920 }, { "epoch": 3.0720187945560595, "grad_norm": 0.07358011603355408, "learning_rate": 5.1554975471443355e-05, "loss": 0.206, "step": 37921 }, { "epoch": 3.072099805573558, "grad_norm": 0.05487060546875, "learning_rate": 5.1550474818848735e-05, "loss": 0.2, "step": 37922 }, { "epoch": 3.0721808165910565, "grad_norm": 0.08249057084321976, "learning_rate": 5.154597416625411e-05, "loss": 0.2099, "step": 37923 }, { "epoch": 3.0722618276085547, "grad_norm": 0.06941979378461838, "learning_rate": 5.154147351365949e-05, "loss": 0.2349, "step": 37924 }, { "epoch": 3.072342838626053, "grad_norm": 0.07720015197992325, "learning_rate": 5.1536972861064856e-05, "loss": 0.2585, "step": 37925 }, { "epoch": 3.0724238496435516, "grad_norm": 0.07195110619068146, "learning_rate": 5.153247220847023e-05, "loss": 0.2437, "step": 37926 }, { "epoch": 3.07250486066105, "grad_norm": 0.08540907502174377, "learning_rate": 5.152797155587561e-05, "loss": 0.2424, "step": 37927 }, { "epoch": 3.072585871678548, "grad_norm": 0.10462567955255508, "learning_rate": 5.152347090328098e-05, "loss": 0.2277, "step": 37928 }, { "epoch": 3.072666882696047, "grad_norm": 0.0824456512928009, "learning_rate": 5.151897025068635e-05, "loss": 0.2582, "step": 37929 }, { "epoch": 3.072747893713545, "grad_norm": 0.06701191514730453, "learning_rate": 5.151446959809173e-05, "loss": 0.2477, "step": 37930 }, { "epoch": 3.0728289047310433, "grad_norm": 0.06684470176696777, "learning_rate": 5.15099689454971e-05, "loss": 0.2442, "step": 37931 }, { "epoch": 3.072909915748542, "grad_norm": 0.09025054425001144, "learning_rate": 5.150546829290247e-05, "loss": 0.2667, "step": 37932 }, { "epoch": 3.0729909267660402, "grad_norm": 0.06221548467874527, "learning_rate": 5.150096764030785e-05, "loss": 0.2143, "step": 37933 }, { "epoch": 3.0730719377835385, "grad_norm": 0.060628268867731094, "learning_rate": 5.149646698771322e-05, "loss": 0.2303, "step": 37934 }, { "epoch": 3.0731529488010367, "grad_norm": 0.08122788369655609, "learning_rate": 5.149196633511859e-05, "loss": 0.2504, "step": 37935 }, { "epoch": 3.0732339598185354, "grad_norm": 0.07332400232553482, "learning_rate": 5.148746568252397e-05, "loss": 0.2223, "step": 37936 }, { "epoch": 3.0733149708360337, "grad_norm": 0.0817917063832283, "learning_rate": 5.148296502992934e-05, "loss": 0.2812, "step": 37937 }, { "epoch": 3.073395981853532, "grad_norm": 0.06565006822347641, "learning_rate": 5.147846437733471e-05, "loss": 0.351, "step": 37938 }, { "epoch": 3.0734769928710306, "grad_norm": 0.06252454221248627, "learning_rate": 5.147396372474009e-05, "loss": 0.2064, "step": 37939 }, { "epoch": 3.073558003888529, "grad_norm": 0.0847528949379921, "learning_rate": 5.146946307214546e-05, "loss": 0.2214, "step": 37940 }, { "epoch": 3.073639014906027, "grad_norm": 0.06985647976398468, "learning_rate": 5.1464962419550834e-05, "loss": 0.2443, "step": 37941 }, { "epoch": 3.073720025923526, "grad_norm": 0.07312333583831787, "learning_rate": 5.1460461766956214e-05, "loss": 0.2522, "step": 37942 }, { "epoch": 3.073801036941024, "grad_norm": 0.07783205062150955, "learning_rate": 5.145596111436158e-05, "loss": 0.2536, "step": 37943 }, { "epoch": 3.0738820479585223, "grad_norm": 0.06937301903963089, "learning_rate": 5.1451460461766954e-05, "loss": 0.2139, "step": 37944 }, { "epoch": 3.0739630589760205, "grad_norm": 0.07388978451490402, "learning_rate": 5.1446959809172335e-05, "loss": 0.2015, "step": 37945 }, { "epoch": 3.074044069993519, "grad_norm": 0.0640605017542839, "learning_rate": 5.14424591565777e-05, "loss": 0.25, "step": 37946 }, { "epoch": 3.0741250810110174, "grad_norm": 0.06384878605604172, "learning_rate": 5.1437958503983075e-05, "loss": 0.2137, "step": 37947 }, { "epoch": 3.0742060920285157, "grad_norm": 0.06250331550836563, "learning_rate": 5.1433457851388456e-05, "loss": 0.2125, "step": 37948 }, { "epoch": 3.0742871030460144, "grad_norm": 0.06558407098054886, "learning_rate": 5.142895719879382e-05, "loss": 0.2383, "step": 37949 }, { "epoch": 3.0743681140635126, "grad_norm": 0.05187705159187317, "learning_rate": 5.142445654619921e-05, "loss": 0.2134, "step": 37950 }, { "epoch": 3.074449125081011, "grad_norm": 0.0846744105219841, "learning_rate": 5.1419955893604576e-05, "loss": 0.2796, "step": 37951 }, { "epoch": 3.0745301360985096, "grad_norm": 0.0652763769030571, "learning_rate": 5.141545524100995e-05, "loss": 0.2173, "step": 37952 }, { "epoch": 3.074611147116008, "grad_norm": 0.07559669762849808, "learning_rate": 5.141095458841533e-05, "loss": 0.2458, "step": 37953 }, { "epoch": 3.074692158133506, "grad_norm": 0.07435882091522217, "learning_rate": 5.14064539358207e-05, "loss": 0.2564, "step": 37954 }, { "epoch": 3.0747731691510047, "grad_norm": 0.08500465750694275, "learning_rate": 5.140195328322607e-05, "loss": 0.262, "step": 37955 }, { "epoch": 3.074854180168503, "grad_norm": 0.05521190166473389, "learning_rate": 5.139745263063145e-05, "loss": 0.2248, "step": 37956 }, { "epoch": 3.0749351911860012, "grad_norm": 0.07541017979383469, "learning_rate": 5.139295197803682e-05, "loss": 0.244, "step": 37957 }, { "epoch": 3.0750162022034995, "grad_norm": 0.06455586850643158, "learning_rate": 5.138845132544219e-05, "loss": 0.2069, "step": 37958 }, { "epoch": 3.075097213220998, "grad_norm": 0.08610700070858002, "learning_rate": 5.138395067284757e-05, "loss": 0.2151, "step": 37959 }, { "epoch": 3.0751782242384964, "grad_norm": 0.07560251653194427, "learning_rate": 5.137945002025294e-05, "loss": 0.1906, "step": 37960 }, { "epoch": 3.0752592352559946, "grad_norm": 0.07362257689237595, "learning_rate": 5.137494936765831e-05, "loss": 0.2349, "step": 37961 }, { "epoch": 3.0753402462734933, "grad_norm": 0.06207628548145294, "learning_rate": 5.137044871506369e-05, "loss": 0.2123, "step": 37962 }, { "epoch": 3.0754212572909916, "grad_norm": 0.07409238815307617, "learning_rate": 5.136594806246906e-05, "loss": 0.2077, "step": 37963 }, { "epoch": 3.07550226830849, "grad_norm": 0.06870327144861221, "learning_rate": 5.136144740987443e-05, "loss": 0.2268, "step": 37964 }, { "epoch": 3.0755832793259885, "grad_norm": 0.07700678706169128, "learning_rate": 5.1356946757279814e-05, "loss": 0.2208, "step": 37965 }, { "epoch": 3.0756642903434868, "grad_norm": 0.06135139986872673, "learning_rate": 5.135244610468518e-05, "loss": 0.2369, "step": 37966 }, { "epoch": 3.075745301360985, "grad_norm": 0.08341161906719208, "learning_rate": 5.1347945452090554e-05, "loss": 0.235, "step": 37967 }, { "epoch": 3.0758263123784833, "grad_norm": 0.09900689870119095, "learning_rate": 5.1343444799495934e-05, "loss": 0.2142, "step": 37968 }, { "epoch": 3.075907323395982, "grad_norm": 0.0842118039727211, "learning_rate": 5.13389441469013e-05, "loss": 0.2431, "step": 37969 }, { "epoch": 3.07598833441348, "grad_norm": 0.06679286807775497, "learning_rate": 5.1334443494306675e-05, "loss": 0.2316, "step": 37970 }, { "epoch": 3.0760693454309784, "grad_norm": 0.0700433999300003, "learning_rate": 5.1329942841712055e-05, "loss": 0.259, "step": 37971 }, { "epoch": 3.076150356448477, "grad_norm": 0.07671942561864853, "learning_rate": 5.132544218911742e-05, "loss": 0.2664, "step": 37972 }, { "epoch": 3.0762313674659754, "grad_norm": 0.07188671827316284, "learning_rate": 5.1320941536522796e-05, "loss": 0.2175, "step": 37973 }, { "epoch": 3.0763123784834736, "grad_norm": 0.06508833169937134, "learning_rate": 5.1316440883928176e-05, "loss": 0.214, "step": 37974 }, { "epoch": 3.0763933895009723, "grad_norm": 0.07478687167167664, "learning_rate": 5.131194023133354e-05, "loss": 0.2472, "step": 37975 }, { "epoch": 3.0764744005184705, "grad_norm": 0.06961677968502045, "learning_rate": 5.130743957873892e-05, "loss": 0.2443, "step": 37976 }, { "epoch": 3.076555411535969, "grad_norm": 0.06969679892063141, "learning_rate": 5.13029389261443e-05, "loss": 0.2226, "step": 37977 }, { "epoch": 3.0766364225534675, "grad_norm": 0.060342058539390564, "learning_rate": 5.1298438273549664e-05, "loss": 0.2395, "step": 37978 }, { "epoch": 3.0767174335709657, "grad_norm": 0.06313896924257278, "learning_rate": 5.1293937620955044e-05, "loss": 0.2749, "step": 37979 }, { "epoch": 3.076798444588464, "grad_norm": 0.05931505560874939, "learning_rate": 5.128943696836042e-05, "loss": 0.221, "step": 37980 }, { "epoch": 3.076879455605962, "grad_norm": 0.06451479345560074, "learning_rate": 5.1284936315765784e-05, "loss": 0.2288, "step": 37981 }, { "epoch": 3.076960466623461, "grad_norm": 0.07562223076820374, "learning_rate": 5.1280435663171165e-05, "loss": 0.2471, "step": 37982 }, { "epoch": 3.077041477640959, "grad_norm": 0.05078315734863281, "learning_rate": 5.127593501057654e-05, "loss": 0.2181, "step": 37983 }, { "epoch": 3.0771224886584574, "grad_norm": 0.07727597653865814, "learning_rate": 5.1271434357981905e-05, "loss": 0.211, "step": 37984 }, { "epoch": 3.077203499675956, "grad_norm": 0.06155778467655182, "learning_rate": 5.1266933705387286e-05, "loss": 0.2155, "step": 37985 }, { "epoch": 3.0772845106934543, "grad_norm": 0.06652826070785522, "learning_rate": 5.126243305279266e-05, "loss": 0.2406, "step": 37986 }, { "epoch": 3.0773655217109526, "grad_norm": 0.06903241574764252, "learning_rate": 5.1257932400198026e-05, "loss": 0.2405, "step": 37987 }, { "epoch": 3.0774465327284513, "grad_norm": 0.07772725820541382, "learning_rate": 5.1253431747603406e-05, "loss": 0.2317, "step": 37988 }, { "epoch": 3.0775275437459495, "grad_norm": 0.07417483627796173, "learning_rate": 5.124893109500878e-05, "loss": 0.2337, "step": 37989 }, { "epoch": 3.0776085547634477, "grad_norm": 0.07937649637460709, "learning_rate": 5.124443044241415e-05, "loss": 0.2705, "step": 37990 }, { "epoch": 3.077689565780946, "grad_norm": 0.07012887299060822, "learning_rate": 5.123992978981953e-05, "loss": 0.1856, "step": 37991 }, { "epoch": 3.0777705767984447, "grad_norm": 0.06151702627539635, "learning_rate": 5.12354291372249e-05, "loss": 0.1996, "step": 37992 }, { "epoch": 3.077851587815943, "grad_norm": 0.06742820888757706, "learning_rate": 5.123092848463027e-05, "loss": 0.2367, "step": 37993 }, { "epoch": 3.077932598833441, "grad_norm": 0.08394744992256165, "learning_rate": 5.122642783203565e-05, "loss": 0.2513, "step": 37994 }, { "epoch": 3.07801360985094, "grad_norm": 0.08035869151353836, "learning_rate": 5.122192717944102e-05, "loss": 0.1969, "step": 37995 }, { "epoch": 3.078094620868438, "grad_norm": 0.06402164697647095, "learning_rate": 5.121742652684639e-05, "loss": 0.2315, "step": 37996 }, { "epoch": 3.0781756318859363, "grad_norm": 0.06812452524900436, "learning_rate": 5.121292587425177e-05, "loss": 0.2563, "step": 37997 }, { "epoch": 3.078256642903435, "grad_norm": 0.06456022709608078, "learning_rate": 5.120842522165714e-05, "loss": 0.2365, "step": 37998 }, { "epoch": 3.0783376539209333, "grad_norm": 0.06034749373793602, "learning_rate": 5.120392456906251e-05, "loss": 0.2411, "step": 37999 }, { "epoch": 3.0784186649384315, "grad_norm": 0.06097142770886421, "learning_rate": 5.119942391646789e-05, "loss": 0.1908, "step": 38000 }, { "epoch": 3.07849967595593, "grad_norm": 0.07852084934711456, "learning_rate": 5.119492326387326e-05, "loss": 0.2292, "step": 38001 }, { "epoch": 3.0785806869734285, "grad_norm": 0.08489027619361877, "learning_rate": 5.1190422611278644e-05, "loss": 0.2771, "step": 38002 }, { "epoch": 3.0786616979909267, "grad_norm": 0.0690150335431099, "learning_rate": 5.118592195868401e-05, "loss": 0.2177, "step": 38003 }, { "epoch": 3.078742709008425, "grad_norm": 0.08611797541379929, "learning_rate": 5.1181421306089384e-05, "loss": 0.2413, "step": 38004 }, { "epoch": 3.0788237200259236, "grad_norm": 0.055115729570388794, "learning_rate": 5.1176920653494764e-05, "loss": 0.1832, "step": 38005 }, { "epoch": 3.078904731043422, "grad_norm": 0.06743993610143661, "learning_rate": 5.117242000090013e-05, "loss": 0.2196, "step": 38006 }, { "epoch": 3.07898574206092, "grad_norm": 0.06356610357761383, "learning_rate": 5.1167919348305505e-05, "loss": 0.2042, "step": 38007 }, { "epoch": 3.079066753078419, "grad_norm": 0.06697381287813187, "learning_rate": 5.1163418695710885e-05, "loss": 0.2388, "step": 38008 }, { "epoch": 3.079147764095917, "grad_norm": 0.08403827995061874, "learning_rate": 5.115891804311625e-05, "loss": 0.2314, "step": 38009 }, { "epoch": 3.0792287751134153, "grad_norm": 0.06729169189929962, "learning_rate": 5.1154417390521626e-05, "loss": 0.2261, "step": 38010 }, { "epoch": 3.079309786130914, "grad_norm": 0.07959628105163574, "learning_rate": 5.1149916737927006e-05, "loss": 0.2357, "step": 38011 }, { "epoch": 3.0793907971484122, "grad_norm": 0.07899930328130722, "learning_rate": 5.114541608533237e-05, "loss": 0.2665, "step": 38012 }, { "epoch": 3.0794718081659105, "grad_norm": 0.06835725158452988, "learning_rate": 5.1140915432737747e-05, "loss": 0.2404, "step": 38013 }, { "epoch": 3.0795528191834087, "grad_norm": 0.06105947867035866, "learning_rate": 5.113641478014313e-05, "loss": 0.2399, "step": 38014 }, { "epoch": 3.0796338302009074, "grad_norm": 0.06209400296211243, "learning_rate": 5.1131914127548494e-05, "loss": 0.1897, "step": 38015 }, { "epoch": 3.0797148412184057, "grad_norm": 0.07727130502462387, "learning_rate": 5.112741347495387e-05, "loss": 0.2556, "step": 38016 }, { "epoch": 3.079795852235904, "grad_norm": 0.07233865559101105, "learning_rate": 5.112291282235925e-05, "loss": 0.2576, "step": 38017 }, { "epoch": 3.0798768632534026, "grad_norm": 0.07869274914264679, "learning_rate": 5.1118412169764615e-05, "loss": 0.2686, "step": 38018 }, { "epoch": 3.079957874270901, "grad_norm": 0.06438925117254257, "learning_rate": 5.111391151716999e-05, "loss": 0.2303, "step": 38019 }, { "epoch": 3.080038885288399, "grad_norm": 0.06574901938438416, "learning_rate": 5.110941086457537e-05, "loss": 0.21, "step": 38020 }, { "epoch": 3.0801198963058978, "grad_norm": 0.09768787771463394, "learning_rate": 5.110491021198074e-05, "loss": 0.2309, "step": 38021 }, { "epoch": 3.080200907323396, "grad_norm": 0.07332216203212738, "learning_rate": 5.110040955938611e-05, "loss": 0.249, "step": 38022 }, { "epoch": 3.0802819183408943, "grad_norm": 0.0715082660317421, "learning_rate": 5.109590890679149e-05, "loss": 0.2539, "step": 38023 }, { "epoch": 3.080362929358393, "grad_norm": 0.06920846551656723, "learning_rate": 5.109140825419686e-05, "loss": 0.2114, "step": 38024 }, { "epoch": 3.080443940375891, "grad_norm": 0.07967160642147064, "learning_rate": 5.108690760160223e-05, "loss": 0.2177, "step": 38025 }, { "epoch": 3.0805249513933894, "grad_norm": 0.08817549794912338, "learning_rate": 5.108240694900761e-05, "loss": 0.2499, "step": 38026 }, { "epoch": 3.0806059624108877, "grad_norm": 0.05930979549884796, "learning_rate": 5.1077906296412984e-05, "loss": 0.2294, "step": 38027 }, { "epoch": 3.0806869734283864, "grad_norm": 0.062281325459480286, "learning_rate": 5.107340564381835e-05, "loss": 0.1834, "step": 38028 }, { "epoch": 3.0807679844458846, "grad_norm": 0.06266391277313232, "learning_rate": 5.106890499122373e-05, "loss": 0.2284, "step": 38029 }, { "epoch": 3.080848995463383, "grad_norm": 0.0831487774848938, "learning_rate": 5.1064404338629105e-05, "loss": 0.2499, "step": 38030 }, { "epoch": 3.0809300064808816, "grad_norm": 0.07766416668891907, "learning_rate": 5.1059903686034485e-05, "loss": 0.2153, "step": 38031 }, { "epoch": 3.08101101749838, "grad_norm": 0.07681175321340561, "learning_rate": 5.105540303343985e-05, "loss": 0.2177, "step": 38032 }, { "epoch": 3.081092028515878, "grad_norm": 0.06907657533884048, "learning_rate": 5.1050902380845225e-05, "loss": 0.2689, "step": 38033 }, { "epoch": 3.0811730395333767, "grad_norm": 0.06728033721446991, "learning_rate": 5.1046401728250606e-05, "loss": 0.2024, "step": 38034 }, { "epoch": 3.081254050550875, "grad_norm": 0.07561451196670532, "learning_rate": 5.104190107565597e-05, "loss": 0.2108, "step": 38035 }, { "epoch": 3.0813350615683732, "grad_norm": 0.06470509618520737, "learning_rate": 5.1037400423061346e-05, "loss": 0.2404, "step": 38036 }, { "epoch": 3.0814160725858715, "grad_norm": 0.05925104022026062, "learning_rate": 5.1032899770466727e-05, "loss": 0.2147, "step": 38037 }, { "epoch": 3.08149708360337, "grad_norm": 0.07685708999633789, "learning_rate": 5.102839911787209e-05, "loss": 0.2463, "step": 38038 }, { "epoch": 3.0815780946208684, "grad_norm": 0.07041988521814346, "learning_rate": 5.102389846527747e-05, "loss": 0.2095, "step": 38039 }, { "epoch": 3.0816591056383666, "grad_norm": 0.0685499832034111, "learning_rate": 5.101939781268285e-05, "loss": 0.2095, "step": 38040 }, { "epoch": 3.0817401166558653, "grad_norm": 0.06696108728647232, "learning_rate": 5.1014897160088214e-05, "loss": 0.2174, "step": 38041 }, { "epoch": 3.0818211276733636, "grad_norm": 0.10919345170259476, "learning_rate": 5.101039650749359e-05, "loss": 0.2313, "step": 38042 }, { "epoch": 3.081902138690862, "grad_norm": 0.07602211087942123, "learning_rate": 5.100589585489897e-05, "loss": 0.2269, "step": 38043 }, { "epoch": 3.0819831497083605, "grad_norm": 0.0673341378569603, "learning_rate": 5.1001395202304335e-05, "loss": 0.2401, "step": 38044 }, { "epoch": 3.0820641607258588, "grad_norm": 0.06349032372236252, "learning_rate": 5.099689454970971e-05, "loss": 0.2084, "step": 38045 }, { "epoch": 3.082145171743357, "grad_norm": 0.05518511310219765, "learning_rate": 5.099239389711509e-05, "loss": 0.1958, "step": 38046 }, { "epoch": 3.0822261827608557, "grad_norm": 0.06599873304367065, "learning_rate": 5.0987893244520456e-05, "loss": 0.2335, "step": 38047 }, { "epoch": 3.082307193778354, "grad_norm": 0.07924923300743103, "learning_rate": 5.098339259192583e-05, "loss": 0.2228, "step": 38048 }, { "epoch": 3.082388204795852, "grad_norm": 0.06403350830078125, "learning_rate": 5.097889193933121e-05, "loss": 0.1988, "step": 38049 }, { "epoch": 3.0824692158133504, "grad_norm": 0.0660524070262909, "learning_rate": 5.097439128673658e-05, "loss": 0.2082, "step": 38050 }, { "epoch": 3.082550226830849, "grad_norm": 0.07383035868406296, "learning_rate": 5.096989063414195e-05, "loss": 0.2288, "step": 38051 }, { "epoch": 3.0826312378483474, "grad_norm": 0.05731251463294029, "learning_rate": 5.096538998154733e-05, "loss": 0.2167, "step": 38052 }, { "epoch": 3.0827122488658456, "grad_norm": 0.08323297649621964, "learning_rate": 5.09608893289527e-05, "loss": 0.207, "step": 38053 }, { "epoch": 3.0827932598833443, "grad_norm": 0.07124746590852737, "learning_rate": 5.095638867635807e-05, "loss": 0.2148, "step": 38054 }, { "epoch": 3.0828742709008425, "grad_norm": 0.09191430360078812, "learning_rate": 5.095188802376345e-05, "loss": 0.2445, "step": 38055 }, { "epoch": 3.082955281918341, "grad_norm": 0.06406380236148834, "learning_rate": 5.094738737116882e-05, "loss": 0.2147, "step": 38056 }, { "epoch": 3.0830362929358395, "grad_norm": 0.06194882094860077, "learning_rate": 5.09428867185742e-05, "loss": 0.2221, "step": 38057 }, { "epoch": 3.0831173039533377, "grad_norm": 0.07221710681915283, "learning_rate": 5.093838606597957e-05, "loss": 0.2295, "step": 38058 }, { "epoch": 3.083198314970836, "grad_norm": 0.07320144772529602, "learning_rate": 5.093388541338494e-05, "loss": 0.2356, "step": 38059 }, { "epoch": 3.083279325988334, "grad_norm": 0.060835100710392, "learning_rate": 5.092938476079032e-05, "loss": 0.2301, "step": 38060 }, { "epoch": 3.083360337005833, "grad_norm": 0.07213728874921799, "learning_rate": 5.092488410819569e-05, "loss": 0.2328, "step": 38061 }, { "epoch": 3.083441348023331, "grad_norm": 0.06496760994195938, "learning_rate": 5.092038345560106e-05, "loss": 0.1952, "step": 38062 }, { "epoch": 3.0835223590408294, "grad_norm": 0.06325732171535492, "learning_rate": 5.091588280300644e-05, "loss": 0.2247, "step": 38063 }, { "epoch": 3.083603370058328, "grad_norm": 0.06883818656206131, "learning_rate": 5.0911382150411814e-05, "loss": 0.2103, "step": 38064 }, { "epoch": 3.0836843810758263, "grad_norm": 0.08192932605743408, "learning_rate": 5.090688149781718e-05, "loss": 0.2638, "step": 38065 }, { "epoch": 3.0837653920933246, "grad_norm": 0.08824772387742996, "learning_rate": 5.090238084522256e-05, "loss": 0.2119, "step": 38066 }, { "epoch": 3.0838464031108233, "grad_norm": 0.08697578310966492, "learning_rate": 5.0897880192627935e-05, "loss": 0.2806, "step": 38067 }, { "epoch": 3.0839274141283215, "grad_norm": 0.06647021323442459, "learning_rate": 5.08933795400333e-05, "loss": 0.1708, "step": 38068 }, { "epoch": 3.0840084251458197, "grad_norm": 0.06968235969543457, "learning_rate": 5.088887888743868e-05, "loss": 0.2041, "step": 38069 }, { "epoch": 3.0840894361633184, "grad_norm": 0.06884042918682098, "learning_rate": 5.0884378234844055e-05, "loss": 0.2403, "step": 38070 }, { "epoch": 3.0841704471808167, "grad_norm": 0.06541278958320618, "learning_rate": 5.087987758224942e-05, "loss": 0.2834, "step": 38071 }, { "epoch": 3.084251458198315, "grad_norm": 0.07046263664960861, "learning_rate": 5.08753769296548e-05, "loss": 0.2242, "step": 38072 }, { "epoch": 3.084332469215813, "grad_norm": 0.0605572834610939, "learning_rate": 5.0870876277060176e-05, "loss": 0.2102, "step": 38073 }, { "epoch": 3.084413480233312, "grad_norm": 0.07622010260820389, "learning_rate": 5.086637562446554e-05, "loss": 0.2486, "step": 38074 }, { "epoch": 3.08449449125081, "grad_norm": 0.06665437668561935, "learning_rate": 5.0861874971870923e-05, "loss": 0.214, "step": 38075 }, { "epoch": 3.0845755022683083, "grad_norm": 0.07465348392724991, "learning_rate": 5.08573743192763e-05, "loss": 0.2371, "step": 38076 }, { "epoch": 3.084656513285807, "grad_norm": 0.08477599918842316, "learning_rate": 5.0852873666681664e-05, "loss": 0.2299, "step": 38077 }, { "epoch": 3.0847375243033053, "grad_norm": 0.08032052218914032, "learning_rate": 5.0848373014087044e-05, "loss": 0.2496, "step": 38078 }, { "epoch": 3.0848185353208035, "grad_norm": 0.0856558084487915, "learning_rate": 5.084387236149242e-05, "loss": 0.2353, "step": 38079 }, { "epoch": 3.084899546338302, "grad_norm": 0.0713203176856041, "learning_rate": 5.0839371708897785e-05, "loss": 0.2028, "step": 38080 }, { "epoch": 3.0849805573558005, "grad_norm": 0.06258882582187653, "learning_rate": 5.0834871056303165e-05, "loss": 0.2636, "step": 38081 }, { "epoch": 3.0850615683732987, "grad_norm": 0.07384096086025238, "learning_rate": 5.083037040370854e-05, "loss": 0.2494, "step": 38082 }, { "epoch": 3.085142579390797, "grad_norm": 0.07079246640205383, "learning_rate": 5.082586975111392e-05, "loss": 0.2386, "step": 38083 }, { "epoch": 3.0852235904082956, "grad_norm": 0.06467340141534805, "learning_rate": 5.0821369098519286e-05, "loss": 0.2367, "step": 38084 }, { "epoch": 3.085304601425794, "grad_norm": 0.07608397305011749, "learning_rate": 5.081686844592466e-05, "loss": 0.2207, "step": 38085 }, { "epoch": 3.085385612443292, "grad_norm": 0.06386285275220871, "learning_rate": 5.081236779333004e-05, "loss": 0.238, "step": 38086 }, { "epoch": 3.085466623460791, "grad_norm": 0.08161813020706177, "learning_rate": 5.080786714073541e-05, "loss": 0.2505, "step": 38087 }, { "epoch": 3.085547634478289, "grad_norm": 0.06811436265707016, "learning_rate": 5.080336648814078e-05, "loss": 0.2641, "step": 38088 }, { "epoch": 3.0856286454957873, "grad_norm": 0.09591707587242126, "learning_rate": 5.079886583554616e-05, "loss": 0.2343, "step": 38089 }, { "epoch": 3.085709656513286, "grad_norm": 0.0632985383272171, "learning_rate": 5.0794365182951534e-05, "loss": 0.2361, "step": 38090 }, { "epoch": 3.0857906675307842, "grad_norm": 0.07549940794706345, "learning_rate": 5.07898645303569e-05, "loss": 0.2339, "step": 38091 }, { "epoch": 3.0858716785482825, "grad_norm": 0.06874658912420273, "learning_rate": 5.078536387776228e-05, "loss": 0.2236, "step": 38092 }, { "epoch": 3.085952689565781, "grad_norm": 0.059727951884269714, "learning_rate": 5.0780863225167655e-05, "loss": 0.2061, "step": 38093 }, { "epoch": 3.0860337005832794, "grad_norm": 0.0709579661488533, "learning_rate": 5.077636257257302e-05, "loss": 0.2269, "step": 38094 }, { "epoch": 3.0861147116007777, "grad_norm": 0.07000521570444107, "learning_rate": 5.07718619199784e-05, "loss": 0.2163, "step": 38095 }, { "epoch": 3.086195722618276, "grad_norm": 0.07339578866958618, "learning_rate": 5.0767361267383776e-05, "loss": 0.2372, "step": 38096 }, { "epoch": 3.0862767336357746, "grad_norm": 0.06663995236158371, "learning_rate": 5.076286061478914e-05, "loss": 0.2488, "step": 38097 }, { "epoch": 3.086357744653273, "grad_norm": 0.06998470425605774, "learning_rate": 5.075835996219452e-05, "loss": 0.2315, "step": 38098 }, { "epoch": 3.086438755670771, "grad_norm": 0.0681297555565834, "learning_rate": 5.07538593095999e-05, "loss": 0.2689, "step": 38099 }, { "epoch": 3.0865197666882698, "grad_norm": 0.05492580682039261, "learning_rate": 5.0749358657005264e-05, "loss": 0.1912, "step": 38100 }, { "epoch": 3.086600777705768, "grad_norm": 0.06821811944246292, "learning_rate": 5.0744858004410644e-05, "loss": 0.2167, "step": 38101 }, { "epoch": 3.0866817887232663, "grad_norm": 0.06851505488157272, "learning_rate": 5.074035735181602e-05, "loss": 0.2513, "step": 38102 }, { "epoch": 3.086762799740765, "grad_norm": 0.094993956387043, "learning_rate": 5.0735856699221384e-05, "loss": 0.2536, "step": 38103 }, { "epoch": 3.086843810758263, "grad_norm": 0.06849288195371628, "learning_rate": 5.0731356046626765e-05, "loss": 0.2221, "step": 38104 }, { "epoch": 3.0869248217757614, "grad_norm": 0.06943635642528534, "learning_rate": 5.072685539403214e-05, "loss": 0.2225, "step": 38105 }, { "epoch": 3.0870058327932597, "grad_norm": 0.07597918063402176, "learning_rate": 5.0722354741437505e-05, "loss": 0.2551, "step": 38106 }, { "epoch": 3.0870868438107584, "grad_norm": 0.07840082049369812, "learning_rate": 5.0717854088842886e-05, "loss": 0.2292, "step": 38107 }, { "epoch": 3.0871678548282566, "grad_norm": 0.07592572271823883, "learning_rate": 5.071335343624826e-05, "loss": 0.2066, "step": 38108 }, { "epoch": 3.087248865845755, "grad_norm": 0.05478053539991379, "learning_rate": 5.070885278365364e-05, "loss": 0.2228, "step": 38109 }, { "epoch": 3.0873298768632536, "grad_norm": 0.07612153142690659, "learning_rate": 5.0704352131059006e-05, "loss": 0.1985, "step": 38110 }, { "epoch": 3.087410887880752, "grad_norm": 0.0656314343214035, "learning_rate": 5.069985147846438e-05, "loss": 0.238, "step": 38111 }, { "epoch": 3.08749189889825, "grad_norm": 0.06686277687549591, "learning_rate": 5.069535082586976e-05, "loss": 0.2263, "step": 38112 }, { "epoch": 3.0875729099157487, "grad_norm": 0.08731774240732193, "learning_rate": 5.069085017327513e-05, "loss": 0.2618, "step": 38113 }, { "epoch": 3.087653920933247, "grad_norm": 0.07624731957912445, "learning_rate": 5.06863495206805e-05, "loss": 0.2091, "step": 38114 }, { "epoch": 3.087734931950745, "grad_norm": 0.06672275066375732, "learning_rate": 5.068184886808588e-05, "loss": 0.2189, "step": 38115 }, { "epoch": 3.087815942968244, "grad_norm": 0.07012347877025604, "learning_rate": 5.067734821549125e-05, "loss": 0.2316, "step": 38116 }, { "epoch": 3.087896953985742, "grad_norm": 0.06306098401546478, "learning_rate": 5.067284756289662e-05, "loss": 0.2217, "step": 38117 }, { "epoch": 3.0879779650032404, "grad_norm": 0.060554634779691696, "learning_rate": 5.0668346910302e-05, "loss": 0.2176, "step": 38118 }, { "epoch": 3.0880589760207386, "grad_norm": 0.0674269050359726, "learning_rate": 5.066384625770737e-05, "loss": 0.2263, "step": 38119 }, { "epoch": 3.0881399870382373, "grad_norm": 0.07302812486886978, "learning_rate": 5.065934560511274e-05, "loss": 0.2039, "step": 38120 }, { "epoch": 3.0882209980557356, "grad_norm": 0.0731666088104248, "learning_rate": 5.065484495251812e-05, "loss": 0.2625, "step": 38121 }, { "epoch": 3.088302009073234, "grad_norm": 0.07279719412326813, "learning_rate": 5.065034429992349e-05, "loss": 0.2763, "step": 38122 }, { "epoch": 3.0883830200907325, "grad_norm": 0.07053932547569275, "learning_rate": 5.064584364732886e-05, "loss": 0.2394, "step": 38123 }, { "epoch": 3.0884640311082308, "grad_norm": 0.06680049002170563, "learning_rate": 5.0641342994734244e-05, "loss": 0.2238, "step": 38124 }, { "epoch": 3.088545042125729, "grad_norm": 0.08036140352487564, "learning_rate": 5.063684234213961e-05, "loss": 0.27, "step": 38125 }, { "epoch": 3.0886260531432277, "grad_norm": 0.05588580295443535, "learning_rate": 5.0632341689544984e-05, "loss": 0.2297, "step": 38126 }, { "epoch": 3.088707064160726, "grad_norm": 0.07165321707725525, "learning_rate": 5.0627841036950364e-05, "loss": 0.2177, "step": 38127 }, { "epoch": 3.088788075178224, "grad_norm": 0.09657379984855652, "learning_rate": 5.062334038435573e-05, "loss": 0.2366, "step": 38128 }, { "epoch": 3.0888690861957224, "grad_norm": 0.08756855130195618, "learning_rate": 5.0618839731761105e-05, "loss": 0.2314, "step": 38129 }, { "epoch": 3.088950097213221, "grad_norm": 0.059539131820201874, "learning_rate": 5.0614339079166485e-05, "loss": 0.1886, "step": 38130 }, { "epoch": 3.0890311082307194, "grad_norm": 0.06523775309324265, "learning_rate": 5.060983842657185e-05, "loss": 0.2018, "step": 38131 }, { "epoch": 3.0891121192482176, "grad_norm": 0.08024541288614273, "learning_rate": 5.0605337773977226e-05, "loss": 0.2334, "step": 38132 }, { "epoch": 3.0891931302657163, "grad_norm": 0.06841821223497391, "learning_rate": 5.0600837121382606e-05, "loss": 0.2721, "step": 38133 }, { "epoch": 3.0892741412832145, "grad_norm": 0.07262247055768967, "learning_rate": 5.059633646878797e-05, "loss": 0.2294, "step": 38134 }, { "epoch": 3.089355152300713, "grad_norm": 0.05673075094819069, "learning_rate": 5.059183581619335e-05, "loss": 0.2388, "step": 38135 }, { "epoch": 3.0894361633182115, "grad_norm": 0.06358672678470612, "learning_rate": 5.058733516359873e-05, "loss": 0.1944, "step": 38136 }, { "epoch": 3.0895171743357097, "grad_norm": 0.08018980920314789, "learning_rate": 5.0582834511004094e-05, "loss": 0.2318, "step": 38137 }, { "epoch": 3.089598185353208, "grad_norm": 0.0760384276509285, "learning_rate": 5.0578333858409474e-05, "loss": 0.218, "step": 38138 }, { "epoch": 3.089679196370706, "grad_norm": 0.06213190406560898, "learning_rate": 5.057383320581485e-05, "loss": 0.2148, "step": 38139 }, { "epoch": 3.089760207388205, "grad_norm": 0.07853163033723831, "learning_rate": 5.0569332553220214e-05, "loss": 0.248, "step": 38140 }, { "epoch": 3.089841218405703, "grad_norm": 0.06849128752946854, "learning_rate": 5.0564831900625595e-05, "loss": 0.2004, "step": 38141 }, { "epoch": 3.0899222294232014, "grad_norm": 0.0643618106842041, "learning_rate": 5.056033124803097e-05, "loss": 0.2423, "step": 38142 }, { "epoch": 3.0900032404407, "grad_norm": 0.0633440613746643, "learning_rate": 5.0555830595436335e-05, "loss": 0.2309, "step": 38143 }, { "epoch": 3.0900842514581983, "grad_norm": 0.07177025824785233, "learning_rate": 5.0551329942841716e-05, "loss": 0.2087, "step": 38144 }, { "epoch": 3.0901652624756966, "grad_norm": 0.05981674790382385, "learning_rate": 5.054682929024709e-05, "loss": 0.2144, "step": 38145 }, { "epoch": 3.0902462734931953, "grad_norm": 0.07178358733654022, "learning_rate": 5.0542328637652456e-05, "loss": 0.2663, "step": 38146 }, { "epoch": 3.0903272845106935, "grad_norm": 0.07453365623950958, "learning_rate": 5.0537827985057836e-05, "loss": 0.2465, "step": 38147 }, { "epoch": 3.0904082955281917, "grad_norm": 0.0767117440700531, "learning_rate": 5.053332733246321e-05, "loss": 0.2382, "step": 38148 }, { "epoch": 3.0904893065456904, "grad_norm": 0.07359892129898071, "learning_rate": 5.052882667986858e-05, "loss": 0.2409, "step": 38149 }, { "epoch": 3.0905703175631887, "grad_norm": 0.06934542953968048, "learning_rate": 5.052432602727396e-05, "loss": 0.1998, "step": 38150 }, { "epoch": 3.090651328580687, "grad_norm": 0.05748188495635986, "learning_rate": 5.051982537467933e-05, "loss": 0.1886, "step": 38151 }, { "epoch": 3.090732339598185, "grad_norm": 0.07303234189748764, "learning_rate": 5.05153247220847e-05, "loss": 0.2035, "step": 38152 }, { "epoch": 3.090813350615684, "grad_norm": 0.06159835681319237, "learning_rate": 5.051082406949008e-05, "loss": 0.222, "step": 38153 }, { "epoch": 3.090894361633182, "grad_norm": 0.07107991725206375, "learning_rate": 5.050632341689545e-05, "loss": 0.2005, "step": 38154 }, { "epoch": 3.0909753726506803, "grad_norm": 0.07847020030021667, "learning_rate": 5.050182276430082e-05, "loss": 0.2172, "step": 38155 }, { "epoch": 3.091056383668179, "grad_norm": 0.07008294761180878, "learning_rate": 5.0497322111706206e-05, "loss": 0.2543, "step": 38156 }, { "epoch": 3.0911373946856773, "grad_norm": 0.0713619813323021, "learning_rate": 5.049282145911157e-05, "loss": 0.2371, "step": 38157 }, { "epoch": 3.0912184057031755, "grad_norm": 0.07178063690662384, "learning_rate": 5.048832080651694e-05, "loss": 0.2314, "step": 38158 }, { "epoch": 3.091299416720674, "grad_norm": 0.06920547783374786, "learning_rate": 5.0483820153922326e-05, "loss": 0.2273, "step": 38159 }, { "epoch": 3.0913804277381725, "grad_norm": 0.07792910188436508, "learning_rate": 5.047931950132769e-05, "loss": 0.2061, "step": 38160 }, { "epoch": 3.0914614387556707, "grad_norm": 0.07506173849105835, "learning_rate": 5.0474818848733074e-05, "loss": 0.289, "step": 38161 }, { "epoch": 3.091542449773169, "grad_norm": 0.07946907728910446, "learning_rate": 5.047031819613845e-05, "loss": 0.2352, "step": 38162 }, { "epoch": 3.0916234607906676, "grad_norm": 0.07613854110240936, "learning_rate": 5.0465817543543814e-05, "loss": 0.2601, "step": 38163 }, { "epoch": 3.091704471808166, "grad_norm": 0.09603036195039749, "learning_rate": 5.0461316890949195e-05, "loss": 0.2455, "step": 38164 }, { "epoch": 3.091785482825664, "grad_norm": 0.06707438081502914, "learning_rate": 5.045681623835457e-05, "loss": 0.2148, "step": 38165 }, { "epoch": 3.091866493843163, "grad_norm": 0.07092217355966568, "learning_rate": 5.0452315585759935e-05, "loss": 0.2007, "step": 38166 }, { "epoch": 3.091947504860661, "grad_norm": 0.07237200438976288, "learning_rate": 5.0447814933165315e-05, "loss": 0.2737, "step": 38167 }, { "epoch": 3.0920285158781593, "grad_norm": 0.06157402694225311, "learning_rate": 5.044331428057069e-05, "loss": 0.2245, "step": 38168 }, { "epoch": 3.092109526895658, "grad_norm": 0.059591781347990036, "learning_rate": 5.0438813627976056e-05, "loss": 0.2296, "step": 38169 }, { "epoch": 3.0921905379131562, "grad_norm": 0.08246058970689774, "learning_rate": 5.0434312975381436e-05, "loss": 0.2673, "step": 38170 }, { "epoch": 3.0922715489306545, "grad_norm": 0.06747202575206757, "learning_rate": 5.042981232278681e-05, "loss": 0.216, "step": 38171 }, { "epoch": 3.0923525599481527, "grad_norm": 0.0788835808634758, "learning_rate": 5.0425311670192177e-05, "loss": 0.2228, "step": 38172 }, { "epoch": 3.0924335709656514, "grad_norm": 0.07319427281618118, "learning_rate": 5.042081101759756e-05, "loss": 0.1856, "step": 38173 }, { "epoch": 3.0925145819831497, "grad_norm": 0.07308870553970337, "learning_rate": 5.041631036500293e-05, "loss": 0.2299, "step": 38174 }, { "epoch": 3.092595593000648, "grad_norm": 0.06862013787031174, "learning_rate": 5.04118097124083e-05, "loss": 0.2207, "step": 38175 }, { "epoch": 3.0926766040181466, "grad_norm": 0.05893588811159134, "learning_rate": 5.040730905981368e-05, "loss": 0.2232, "step": 38176 }, { "epoch": 3.092757615035645, "grad_norm": 0.06568938493728638, "learning_rate": 5.040280840721905e-05, "loss": 0.2481, "step": 38177 }, { "epoch": 3.092838626053143, "grad_norm": 0.0582958459854126, "learning_rate": 5.039830775462442e-05, "loss": 0.2398, "step": 38178 }, { "epoch": 3.0929196370706418, "grad_norm": 0.0737474337220192, "learning_rate": 5.03938071020298e-05, "loss": 0.2147, "step": 38179 }, { "epoch": 3.09300064808814, "grad_norm": 0.06943865865468979, "learning_rate": 5.038930644943517e-05, "loss": 0.2435, "step": 38180 }, { "epoch": 3.0930816591056383, "grad_norm": 0.07017700374126434, "learning_rate": 5.038480579684054e-05, "loss": 0.2478, "step": 38181 }, { "epoch": 3.093162670123137, "grad_norm": 0.059834618121385574, "learning_rate": 5.038030514424592e-05, "loss": 0.2023, "step": 38182 }, { "epoch": 3.093243681140635, "grad_norm": 0.07454963773488998, "learning_rate": 5.037580449165129e-05, "loss": 0.2126, "step": 38183 }, { "epoch": 3.0933246921581334, "grad_norm": 0.08466517925262451, "learning_rate": 5.037130383905666e-05, "loss": 0.279, "step": 38184 }, { "epoch": 3.0934057031756317, "grad_norm": 0.07189661264419556, "learning_rate": 5.036680318646204e-05, "loss": 0.2707, "step": 38185 }, { "epoch": 3.0934867141931304, "grad_norm": 0.08357493579387665, "learning_rate": 5.0362302533867414e-05, "loss": 0.2114, "step": 38186 }, { "epoch": 3.0935677252106286, "grad_norm": 0.08453121036291122, "learning_rate": 5.035780188127278e-05, "loss": 0.2577, "step": 38187 }, { "epoch": 3.093648736228127, "grad_norm": 0.06295271962881088, "learning_rate": 5.035330122867816e-05, "loss": 0.1923, "step": 38188 }, { "epoch": 3.0937297472456255, "grad_norm": 0.06339378654956818, "learning_rate": 5.0348800576083535e-05, "loss": 0.2428, "step": 38189 }, { "epoch": 3.093810758263124, "grad_norm": 0.08615691214799881, "learning_rate": 5.0344299923488915e-05, "loss": 0.2451, "step": 38190 }, { "epoch": 3.093891769280622, "grad_norm": 0.059974655508995056, "learning_rate": 5.033979927089428e-05, "loss": 0.2735, "step": 38191 }, { "epoch": 3.0939727802981207, "grad_norm": 0.08040430396795273, "learning_rate": 5.0335298618299655e-05, "loss": 0.2539, "step": 38192 }, { "epoch": 3.094053791315619, "grad_norm": 0.0721743181347847, "learning_rate": 5.0330797965705036e-05, "loss": 0.2348, "step": 38193 }, { "epoch": 3.094134802333117, "grad_norm": 0.07730038464069366, "learning_rate": 5.03262973131104e-05, "loss": 0.2421, "step": 38194 }, { "epoch": 3.0942158133506155, "grad_norm": 0.07701695710420609, "learning_rate": 5.0321796660515776e-05, "loss": 0.2411, "step": 38195 }, { "epoch": 3.094296824368114, "grad_norm": 0.06585506349802017, "learning_rate": 5.0317296007921157e-05, "loss": 0.2279, "step": 38196 }, { "epoch": 3.0943778353856124, "grad_norm": 0.07853267341852188, "learning_rate": 5.0312795355326523e-05, "loss": 0.222, "step": 38197 }, { "epoch": 3.0944588464031106, "grad_norm": 0.08355189859867096, "learning_rate": 5.03082947027319e-05, "loss": 0.2045, "step": 38198 }, { "epoch": 3.0945398574206093, "grad_norm": 0.07326925545930862, "learning_rate": 5.030379405013728e-05, "loss": 0.2256, "step": 38199 }, { "epoch": 3.0946208684381076, "grad_norm": 0.06939083337783813, "learning_rate": 5.0299293397542644e-05, "loss": 0.2365, "step": 38200 }, { "epoch": 3.094701879455606, "grad_norm": 0.0588260293006897, "learning_rate": 5.029479274494802e-05, "loss": 0.2442, "step": 38201 }, { "epoch": 3.0947828904731045, "grad_norm": 0.05715958774089813, "learning_rate": 5.02902920923534e-05, "loss": 0.2428, "step": 38202 }, { "epoch": 3.0948639014906028, "grad_norm": 0.08294867724180222, "learning_rate": 5.0285791439758765e-05, "loss": 0.238, "step": 38203 }, { "epoch": 3.094944912508101, "grad_norm": 0.07795784622430801, "learning_rate": 5.028129078716414e-05, "loss": 0.2578, "step": 38204 }, { "epoch": 3.0950259235255997, "grad_norm": 0.06538005918264389, "learning_rate": 5.027679013456952e-05, "loss": 0.2279, "step": 38205 }, { "epoch": 3.095106934543098, "grad_norm": 0.08856139332056046, "learning_rate": 5.0272289481974886e-05, "loss": 0.2476, "step": 38206 }, { "epoch": 3.095187945560596, "grad_norm": 0.05094992741942406, "learning_rate": 5.026778882938026e-05, "loss": 0.1996, "step": 38207 }, { "epoch": 3.0952689565780944, "grad_norm": 0.09304434806108475, "learning_rate": 5.026328817678564e-05, "loss": 0.2361, "step": 38208 }, { "epoch": 3.095349967595593, "grad_norm": 0.07475238293409348, "learning_rate": 5.025878752419101e-05, "loss": 0.2201, "step": 38209 }, { "epoch": 3.0954309786130914, "grad_norm": 0.07106000185012817, "learning_rate": 5.025428687159638e-05, "loss": 0.2355, "step": 38210 }, { "epoch": 3.0955119896305896, "grad_norm": 0.06232582405209541, "learning_rate": 5.024978621900176e-05, "loss": 0.1958, "step": 38211 }, { "epoch": 3.0955930006480883, "grad_norm": 0.07143504917621613, "learning_rate": 5.024528556640713e-05, "loss": 0.236, "step": 38212 }, { "epoch": 3.0956740116655865, "grad_norm": 0.07695898413658142, "learning_rate": 5.02407849138125e-05, "loss": 0.1984, "step": 38213 }, { "epoch": 3.095755022683085, "grad_norm": 0.09057194739580154, "learning_rate": 5.023628426121788e-05, "loss": 0.2275, "step": 38214 }, { "epoch": 3.0958360337005835, "grad_norm": 0.06527171283960342, "learning_rate": 5.023178360862325e-05, "loss": 0.2526, "step": 38215 }, { "epoch": 3.0959170447180817, "grad_norm": 0.0839429423213005, "learning_rate": 5.022728295602863e-05, "loss": 0.2383, "step": 38216 }, { "epoch": 3.09599805573558, "grad_norm": 0.07217514514923096, "learning_rate": 5.0222782303434e-05, "loss": 0.2383, "step": 38217 }, { "epoch": 3.096079066753078, "grad_norm": 0.07640687376260757, "learning_rate": 5.021828165083937e-05, "loss": 0.2398, "step": 38218 }, { "epoch": 3.096160077770577, "grad_norm": 0.06141895428299904, "learning_rate": 5.021378099824475e-05, "loss": 0.2374, "step": 38219 }, { "epoch": 3.096241088788075, "grad_norm": 0.08339501917362213, "learning_rate": 5.020928034565012e-05, "loss": 0.2329, "step": 38220 }, { "epoch": 3.0963220998055734, "grad_norm": 0.09006574004888535, "learning_rate": 5.020477969305549e-05, "loss": 0.212, "step": 38221 }, { "epoch": 3.096403110823072, "grad_norm": 0.06869301199913025, "learning_rate": 5.020027904046087e-05, "loss": 0.2354, "step": 38222 }, { "epoch": 3.0964841218405703, "grad_norm": 0.07723093777894974, "learning_rate": 5.0195778387866244e-05, "loss": 0.2233, "step": 38223 }, { "epoch": 3.0965651328580686, "grad_norm": 0.07363084703683853, "learning_rate": 5.019127773527161e-05, "loss": 0.2316, "step": 38224 }, { "epoch": 3.0966461438755672, "grad_norm": 0.08026370406150818, "learning_rate": 5.0186777082677e-05, "loss": 0.2359, "step": 38225 }, { "epoch": 3.0967271548930655, "grad_norm": 0.06522775441408157, "learning_rate": 5.0182276430082365e-05, "loss": 0.2196, "step": 38226 }, { "epoch": 3.0968081659105637, "grad_norm": 0.07051295787096024, "learning_rate": 5.017777577748773e-05, "loss": 0.2017, "step": 38227 }, { "epoch": 3.0968891769280624, "grad_norm": 0.09231901168823242, "learning_rate": 5.017327512489312e-05, "loss": 0.2242, "step": 38228 }, { "epoch": 3.0969701879455607, "grad_norm": 0.07775097340345383, "learning_rate": 5.0168774472298485e-05, "loss": 0.2495, "step": 38229 }, { "epoch": 3.097051198963059, "grad_norm": 0.07223207503557205, "learning_rate": 5.016427381970385e-05, "loss": 0.2057, "step": 38230 }, { "epoch": 3.097132209980557, "grad_norm": 0.06762401759624481, "learning_rate": 5.015977316710924e-05, "loss": 0.2143, "step": 38231 }, { "epoch": 3.097213220998056, "grad_norm": 0.07598146796226501, "learning_rate": 5.0155272514514606e-05, "loss": 0.2392, "step": 38232 }, { "epoch": 3.097294232015554, "grad_norm": 0.05937891826033592, "learning_rate": 5.015077186191997e-05, "loss": 0.1926, "step": 38233 }, { "epoch": 3.0973752430330523, "grad_norm": 0.059172067791223526, "learning_rate": 5.014627120932536e-05, "loss": 0.234, "step": 38234 }, { "epoch": 3.097456254050551, "grad_norm": 0.06860775500535965, "learning_rate": 5.014177055673073e-05, "loss": 0.2155, "step": 38235 }, { "epoch": 3.0975372650680493, "grad_norm": 0.0848398208618164, "learning_rate": 5.0137269904136094e-05, "loss": 0.2572, "step": 38236 }, { "epoch": 3.0976182760855475, "grad_norm": 0.05897241085767746, "learning_rate": 5.013276925154148e-05, "loss": 0.2143, "step": 38237 }, { "epoch": 3.097699287103046, "grad_norm": 0.06091038137674332, "learning_rate": 5.012826859894685e-05, "loss": 0.2176, "step": 38238 }, { "epoch": 3.0977802981205445, "grad_norm": 0.07070525735616684, "learning_rate": 5.0123767946352215e-05, "loss": 0.2277, "step": 38239 }, { "epoch": 3.0978613091380427, "grad_norm": 0.08308535069227219, "learning_rate": 5.01192672937576e-05, "loss": 0.2162, "step": 38240 }, { "epoch": 3.097942320155541, "grad_norm": 0.07822440564632416, "learning_rate": 5.011476664116297e-05, "loss": 0.2496, "step": 38241 }, { "epoch": 3.0980233311730396, "grad_norm": 0.07828620076179504, "learning_rate": 5.011026598856835e-05, "loss": 0.232, "step": 38242 }, { "epoch": 3.098104342190538, "grad_norm": 0.07901854068040848, "learning_rate": 5.010576533597372e-05, "loss": 0.2474, "step": 38243 }, { "epoch": 3.098185353208036, "grad_norm": 0.0792955681681633, "learning_rate": 5.010126468337909e-05, "loss": 0.2495, "step": 38244 }, { "epoch": 3.098266364225535, "grad_norm": 0.07473356276750565, "learning_rate": 5.009676403078447e-05, "loss": 0.2083, "step": 38245 }, { "epoch": 3.098347375243033, "grad_norm": 0.07413233071565628, "learning_rate": 5.0092263378189844e-05, "loss": 0.2267, "step": 38246 }, { "epoch": 3.0984283862605313, "grad_norm": 0.056385502219200134, "learning_rate": 5.008776272559521e-05, "loss": 0.2191, "step": 38247 }, { "epoch": 3.09850939727803, "grad_norm": 0.06543677300214767, "learning_rate": 5.008326207300059e-05, "loss": 0.2259, "step": 38248 }, { "epoch": 3.0985904082955282, "grad_norm": 0.07110854238271713, "learning_rate": 5.0078761420405964e-05, "loss": 0.271, "step": 38249 }, { "epoch": 3.0986714193130265, "grad_norm": 0.06640034914016724, "learning_rate": 5.007426076781133e-05, "loss": 0.2557, "step": 38250 }, { "epoch": 3.098752430330525, "grad_norm": 0.0660850927233696, "learning_rate": 5.006976011521671e-05, "loss": 0.2097, "step": 38251 }, { "epoch": 3.0988334413480234, "grad_norm": 0.07259193062782288, "learning_rate": 5.0065259462622085e-05, "loss": 0.2261, "step": 38252 }, { "epoch": 3.0989144523655217, "grad_norm": 0.07566555589437485, "learning_rate": 5.006075881002745e-05, "loss": 0.2641, "step": 38253 }, { "epoch": 3.09899546338302, "grad_norm": 0.06218456104397774, "learning_rate": 5.005625815743283e-05, "loss": 0.1978, "step": 38254 }, { "epoch": 3.0990764744005186, "grad_norm": 0.07735332101583481, "learning_rate": 5.0051757504838206e-05, "loss": 0.223, "step": 38255 }, { "epoch": 3.099157485418017, "grad_norm": 0.07643113285303116, "learning_rate": 5.004725685224357e-05, "loss": 0.2045, "step": 38256 }, { "epoch": 3.099238496435515, "grad_norm": 0.07494723796844482, "learning_rate": 5.004275619964895e-05, "loss": 0.2389, "step": 38257 }, { "epoch": 3.0993195074530138, "grad_norm": 0.08894181251525879, "learning_rate": 5.003825554705433e-05, "loss": 0.1937, "step": 38258 }, { "epoch": 3.099400518470512, "grad_norm": 0.07444025576114655, "learning_rate": 5.0033754894459694e-05, "loss": 0.2252, "step": 38259 }, { "epoch": 3.0994815294880103, "grad_norm": 0.07227712869644165, "learning_rate": 5.0029254241865074e-05, "loss": 0.2289, "step": 38260 }, { "epoch": 3.099562540505509, "grad_norm": 0.06759762763977051, "learning_rate": 5.002475358927045e-05, "loss": 0.2206, "step": 38261 }, { "epoch": 3.099643551523007, "grad_norm": 0.07775463908910751, "learning_rate": 5.0020252936675814e-05, "loss": 0.2283, "step": 38262 }, { "epoch": 3.0997245625405054, "grad_norm": 0.06791025400161743, "learning_rate": 5.0015752284081195e-05, "loss": 0.2887, "step": 38263 }, { "epoch": 3.0998055735580037, "grad_norm": 0.06941854953765869, "learning_rate": 5.001125163148657e-05, "loss": 0.2424, "step": 38264 }, { "epoch": 3.0998865845755024, "grad_norm": 0.08999748528003693, "learning_rate": 5.0006750978891935e-05, "loss": 0.2171, "step": 38265 }, { "epoch": 3.0999675955930006, "grad_norm": 0.05982363224029541, "learning_rate": 5.0002250326297316e-05, "loss": 0.2452, "step": 38266 }, { "epoch": 3.100048606610499, "grad_norm": 0.07498472929000854, "learning_rate": 4.999774967370269e-05, "loss": 0.1982, "step": 38267 }, { "epoch": 3.1001296176279975, "grad_norm": 0.06741871684789658, "learning_rate": 4.999324902110806e-05, "loss": 0.2182, "step": 38268 }, { "epoch": 3.100210628645496, "grad_norm": 0.07078975439071655, "learning_rate": 4.9988748368513436e-05, "loss": 0.2497, "step": 38269 }, { "epoch": 3.100291639662994, "grad_norm": 0.06628455966711044, "learning_rate": 4.998424771591881e-05, "loss": 0.2502, "step": 38270 }, { "epoch": 3.1003726506804927, "grad_norm": 0.059061840176582336, "learning_rate": 4.9979747063324184e-05, "loss": 0.2168, "step": 38271 }, { "epoch": 3.100453661697991, "grad_norm": 0.07902442663908005, "learning_rate": 4.997524641072956e-05, "loss": 0.2416, "step": 38272 }, { "epoch": 3.100534672715489, "grad_norm": 0.0617385134100914, "learning_rate": 4.997074575813493e-05, "loss": 0.2211, "step": 38273 }, { "epoch": 3.100615683732988, "grad_norm": 0.07803118228912354, "learning_rate": 4.9966245105540304e-05, "loss": 0.2529, "step": 38274 }, { "epoch": 3.100696694750486, "grad_norm": 0.07169701159000397, "learning_rate": 4.996174445294568e-05, "loss": 0.2464, "step": 38275 }, { "epoch": 3.1007777057679844, "grad_norm": 0.06901038438081741, "learning_rate": 4.995724380035106e-05, "loss": 0.2396, "step": 38276 }, { "epoch": 3.1008587167854826, "grad_norm": 0.08568874001502991, "learning_rate": 4.9952743147756425e-05, "loss": 0.2351, "step": 38277 }, { "epoch": 3.1009397278029813, "grad_norm": 0.07365360110998154, "learning_rate": 4.99482424951618e-05, "loss": 0.2488, "step": 38278 }, { "epoch": 3.1010207388204796, "grad_norm": 0.06841385364532471, "learning_rate": 4.994374184256718e-05, "loss": 0.2076, "step": 38279 }, { "epoch": 3.101101749837978, "grad_norm": 0.051799263805150986, "learning_rate": 4.9939241189972546e-05, "loss": 0.2096, "step": 38280 }, { "epoch": 3.1011827608554765, "grad_norm": 0.08439652621746063, "learning_rate": 4.993474053737792e-05, "loss": 0.189, "step": 38281 }, { "epoch": 3.1012637718729748, "grad_norm": 0.061678286641836166, "learning_rate": 4.99302398847833e-05, "loss": 0.2429, "step": 38282 }, { "epoch": 3.101344782890473, "grad_norm": 0.0763859674334526, "learning_rate": 4.992573923218867e-05, "loss": 0.2272, "step": 38283 }, { "epoch": 3.1014257939079717, "grad_norm": 0.08517804741859436, "learning_rate": 4.992123857959404e-05, "loss": 0.289, "step": 38284 }, { "epoch": 3.10150680492547, "grad_norm": 0.08954822272062302, "learning_rate": 4.991673792699942e-05, "loss": 0.2642, "step": 38285 }, { "epoch": 3.101587815942968, "grad_norm": 0.08051921427249908, "learning_rate": 4.991223727440479e-05, "loss": 0.2428, "step": 38286 }, { "epoch": 3.1016688269604664, "grad_norm": 0.06787646561861038, "learning_rate": 4.990773662181016e-05, "loss": 0.242, "step": 38287 }, { "epoch": 3.101749837977965, "grad_norm": 0.07331487536430359, "learning_rate": 4.990323596921554e-05, "loss": 0.2409, "step": 38288 }, { "epoch": 3.1018308489954634, "grad_norm": 0.07462482154369354, "learning_rate": 4.9898735316620915e-05, "loss": 0.2479, "step": 38289 }, { "epoch": 3.1019118600129616, "grad_norm": 0.05779562518000603, "learning_rate": 4.989423466402628e-05, "loss": 0.2191, "step": 38290 }, { "epoch": 3.1019928710304603, "grad_norm": 0.07643572986125946, "learning_rate": 4.988973401143166e-05, "loss": 0.2262, "step": 38291 }, { "epoch": 3.1020738820479585, "grad_norm": 0.0754118412733078, "learning_rate": 4.9885233358837036e-05, "loss": 0.2095, "step": 38292 }, { "epoch": 3.1021548930654568, "grad_norm": 0.06725703179836273, "learning_rate": 4.98807327062424e-05, "loss": 0.2338, "step": 38293 }, { "epoch": 3.1022359040829555, "grad_norm": 0.06517348438501358, "learning_rate": 4.987623205364778e-05, "loss": 0.2653, "step": 38294 }, { "epoch": 3.1023169151004537, "grad_norm": 0.07611946016550064, "learning_rate": 4.987173140105316e-05, "loss": 0.2233, "step": 38295 }, { "epoch": 3.102397926117952, "grad_norm": 0.06755281239748001, "learning_rate": 4.9867230748458524e-05, "loss": 0.2169, "step": 38296 }, { "epoch": 3.1024789371354506, "grad_norm": 0.07308715581893921, "learning_rate": 4.9862730095863904e-05, "loss": 0.2227, "step": 38297 }, { "epoch": 3.102559948152949, "grad_norm": 0.0735616609454155, "learning_rate": 4.985822944326928e-05, "loss": 0.2366, "step": 38298 }, { "epoch": 3.102640959170447, "grad_norm": 0.07177618891000748, "learning_rate": 4.9853728790674645e-05, "loss": 0.2568, "step": 38299 }, { "epoch": 3.1027219701879454, "grad_norm": 0.06585688143968582, "learning_rate": 4.9849228138080025e-05, "loss": 0.2167, "step": 38300 }, { "epoch": 3.102802981205444, "grad_norm": 0.09176765382289886, "learning_rate": 4.98447274854854e-05, "loss": 0.2808, "step": 38301 }, { "epoch": 3.1028839922229423, "grad_norm": 0.07390912622213364, "learning_rate": 4.984022683289077e-05, "loss": 0.2624, "step": 38302 }, { "epoch": 3.1029650032404406, "grad_norm": 0.06442528963088989, "learning_rate": 4.9835726180296146e-05, "loss": 0.1996, "step": 38303 }, { "epoch": 3.1030460142579392, "grad_norm": 0.06658261269330978, "learning_rate": 4.983122552770152e-05, "loss": 0.1991, "step": 38304 }, { "epoch": 3.1031270252754375, "grad_norm": 0.07667221873998642, "learning_rate": 4.982672487510689e-05, "loss": 0.2985, "step": 38305 }, { "epoch": 3.1032080362929357, "grad_norm": 0.07117309421300888, "learning_rate": 4.9822224222512267e-05, "loss": 0.1969, "step": 38306 }, { "epoch": 3.1032890473104344, "grad_norm": 0.06802020967006683, "learning_rate": 4.981772356991764e-05, "loss": 0.2193, "step": 38307 }, { "epoch": 3.1033700583279327, "grad_norm": 0.08191298693418503, "learning_rate": 4.9813222917323014e-05, "loss": 0.2377, "step": 38308 }, { "epoch": 3.103451069345431, "grad_norm": 0.09030667692422867, "learning_rate": 4.980872226472839e-05, "loss": 0.2476, "step": 38309 }, { "epoch": 3.103532080362929, "grad_norm": 0.06818903982639313, "learning_rate": 4.980422161213376e-05, "loss": 0.2317, "step": 38310 }, { "epoch": 3.103613091380428, "grad_norm": 0.0722852423787117, "learning_rate": 4.9799720959539135e-05, "loss": 0.284, "step": 38311 }, { "epoch": 3.103694102397926, "grad_norm": 0.07246318459510803, "learning_rate": 4.979522030694451e-05, "loss": 0.2357, "step": 38312 }, { "epoch": 3.1037751134154243, "grad_norm": 0.07880403101444244, "learning_rate": 4.979071965434988e-05, "loss": 0.2539, "step": 38313 }, { "epoch": 3.103856124432923, "grad_norm": 0.06277298182249069, "learning_rate": 4.9786219001755255e-05, "loss": 0.2124, "step": 38314 }, { "epoch": 3.1039371354504213, "grad_norm": 0.08322549611330032, "learning_rate": 4.9781718349160636e-05, "loss": 0.2272, "step": 38315 }, { "epoch": 3.1040181464679195, "grad_norm": 0.0580422505736351, "learning_rate": 4.9777217696566e-05, "loss": 0.2291, "step": 38316 }, { "epoch": 3.104099157485418, "grad_norm": 0.07431962341070175, "learning_rate": 4.9772717043971376e-05, "loss": 0.2578, "step": 38317 }, { "epoch": 3.1041801685029164, "grad_norm": 0.06599278748035431, "learning_rate": 4.9768216391376757e-05, "loss": 0.2145, "step": 38318 }, { "epoch": 3.1042611795204147, "grad_norm": 0.06994808465242386, "learning_rate": 4.976371573878212e-05, "loss": 0.2237, "step": 38319 }, { "epoch": 3.1043421905379134, "grad_norm": 0.07410278916358948, "learning_rate": 4.97592150861875e-05, "loss": 0.1965, "step": 38320 }, { "epoch": 3.1044232015554116, "grad_norm": 0.06452371925115585, "learning_rate": 4.975471443359288e-05, "loss": 0.2146, "step": 38321 }, { "epoch": 3.10450421257291, "grad_norm": 0.06319420039653778, "learning_rate": 4.9750213780998244e-05, "loss": 0.2332, "step": 38322 }, { "epoch": 3.104585223590408, "grad_norm": 0.06600657105445862, "learning_rate": 4.974571312840362e-05, "loss": 0.2192, "step": 38323 }, { "epoch": 3.104666234607907, "grad_norm": 0.07321722060441971, "learning_rate": 4.9741212475809e-05, "loss": 0.2367, "step": 38324 }, { "epoch": 3.104747245625405, "grad_norm": 0.06495875865221024, "learning_rate": 4.9736711823214365e-05, "loss": 0.2072, "step": 38325 }, { "epoch": 3.1048282566429033, "grad_norm": 0.06502759456634521, "learning_rate": 4.973221117061974e-05, "loss": 0.2244, "step": 38326 }, { "epoch": 3.104909267660402, "grad_norm": 0.06920306384563446, "learning_rate": 4.972771051802512e-05, "loss": 0.2158, "step": 38327 }, { "epoch": 3.1049902786779002, "grad_norm": 0.07908426225185394, "learning_rate": 4.972320986543049e-05, "loss": 0.2175, "step": 38328 }, { "epoch": 3.1050712896953985, "grad_norm": 0.07021722197532654, "learning_rate": 4.971870921283586e-05, "loss": 0.2196, "step": 38329 }, { "epoch": 3.105152300712897, "grad_norm": 0.07090037316083908, "learning_rate": 4.971420856024124e-05, "loss": 0.202, "step": 38330 }, { "epoch": 3.1052333117303954, "grad_norm": 0.0728762075304985, "learning_rate": 4.970970790764661e-05, "loss": 0.2301, "step": 38331 }, { "epoch": 3.1053143227478937, "grad_norm": 0.07722747325897217, "learning_rate": 4.970520725505198e-05, "loss": 0.2302, "step": 38332 }, { "epoch": 3.105395333765392, "grad_norm": 0.0742846429347992, "learning_rate": 4.970070660245736e-05, "loss": 0.2579, "step": 38333 }, { "epoch": 3.1054763447828906, "grad_norm": 0.07764902710914612, "learning_rate": 4.9696205949862734e-05, "loss": 0.236, "step": 38334 }, { "epoch": 3.105557355800389, "grad_norm": 0.07743822038173676, "learning_rate": 4.96917052972681e-05, "loss": 0.2418, "step": 38335 }, { "epoch": 3.105638366817887, "grad_norm": 0.08601834625005722, "learning_rate": 4.968720464467348e-05, "loss": 0.2575, "step": 38336 }, { "epoch": 3.1057193778353858, "grad_norm": 0.07223144918680191, "learning_rate": 4.9682703992078855e-05, "loss": 0.2316, "step": 38337 }, { "epoch": 3.105800388852884, "grad_norm": 0.06830278784036636, "learning_rate": 4.967820333948422e-05, "loss": 0.2304, "step": 38338 }, { "epoch": 3.1058813998703823, "grad_norm": 0.05036047473549843, "learning_rate": 4.96737026868896e-05, "loss": 0.2097, "step": 38339 }, { "epoch": 3.105962410887881, "grad_norm": 0.08505997061729431, "learning_rate": 4.9669202034294976e-05, "loss": 0.2637, "step": 38340 }, { "epoch": 3.106043421905379, "grad_norm": 0.07370181381702423, "learning_rate": 4.966470138170035e-05, "loss": 0.2318, "step": 38341 }, { "epoch": 3.1061244329228774, "grad_norm": 0.08025460690259933, "learning_rate": 4.966020072910572e-05, "loss": 0.2421, "step": 38342 }, { "epoch": 3.1062054439403757, "grad_norm": 0.08092350512742996, "learning_rate": 4.9655700076511097e-05, "loss": 0.2134, "step": 38343 }, { "epoch": 3.1062864549578744, "grad_norm": 0.07050243020057678, "learning_rate": 4.965119942391647e-05, "loss": 0.2454, "step": 38344 }, { "epoch": 3.1063674659753726, "grad_norm": 0.06997469067573547, "learning_rate": 4.9646698771321844e-05, "loss": 0.2398, "step": 38345 }, { "epoch": 3.106448476992871, "grad_norm": 0.08480319380760193, "learning_rate": 4.964219811872722e-05, "loss": 0.2149, "step": 38346 }, { "epoch": 3.1065294880103695, "grad_norm": 0.06980687379837036, "learning_rate": 4.963769746613259e-05, "loss": 0.2136, "step": 38347 }, { "epoch": 3.106610499027868, "grad_norm": 0.06757093966007233, "learning_rate": 4.9633196813537965e-05, "loss": 0.2652, "step": 38348 }, { "epoch": 3.106691510045366, "grad_norm": 0.060810089111328125, "learning_rate": 4.962869616094334e-05, "loss": 0.2343, "step": 38349 }, { "epoch": 3.1067725210628647, "grad_norm": 0.08900392055511475, "learning_rate": 4.962419550834871e-05, "loss": 0.262, "step": 38350 }, { "epoch": 3.106853532080363, "grad_norm": 0.06885910034179688, "learning_rate": 4.9619694855754085e-05, "loss": 0.2192, "step": 38351 }, { "epoch": 3.106934543097861, "grad_norm": 0.06629056483507156, "learning_rate": 4.961519420315946e-05, "loss": 0.235, "step": 38352 }, { "epoch": 3.10701555411536, "grad_norm": 0.09018058329820633, "learning_rate": 4.961069355056483e-05, "loss": 0.2683, "step": 38353 }, { "epoch": 3.107096565132858, "grad_norm": 0.06973500549793243, "learning_rate": 4.960619289797021e-05, "loss": 0.2546, "step": 38354 }, { "epoch": 3.1071775761503564, "grad_norm": 0.0802709087729454, "learning_rate": 4.960169224537558e-05, "loss": 0.2259, "step": 38355 }, { "epoch": 3.1072585871678546, "grad_norm": 0.0760037899017334, "learning_rate": 4.9597191592780953e-05, "loss": 0.2167, "step": 38356 }, { "epoch": 3.1073395981853533, "grad_norm": 0.06522723287343979, "learning_rate": 4.9592690940186334e-05, "loss": 0.2353, "step": 38357 }, { "epoch": 3.1074206092028516, "grad_norm": 0.09381596744060516, "learning_rate": 4.95881902875917e-05, "loss": 0.2924, "step": 38358 }, { "epoch": 3.10750162022035, "grad_norm": 0.07883308082818985, "learning_rate": 4.9583689634997074e-05, "loss": 0.2197, "step": 38359 }, { "epoch": 3.1075826312378485, "grad_norm": 0.07242937386035919, "learning_rate": 4.9579188982402455e-05, "loss": 0.2389, "step": 38360 }, { "epoch": 3.1076636422553467, "grad_norm": 0.07693379372358322, "learning_rate": 4.957468832980782e-05, "loss": 0.2318, "step": 38361 }, { "epoch": 3.107744653272845, "grad_norm": 0.0737452581524849, "learning_rate": 4.9570187677213195e-05, "loss": 0.1915, "step": 38362 }, { "epoch": 3.1078256642903437, "grad_norm": 0.0680558905005455, "learning_rate": 4.9565687024618575e-05, "loss": 0.2563, "step": 38363 }, { "epoch": 3.107906675307842, "grad_norm": 0.060349684208631516, "learning_rate": 4.956118637202394e-05, "loss": 0.2066, "step": 38364 }, { "epoch": 3.10798768632534, "grad_norm": 0.057918623089790344, "learning_rate": 4.9556685719429316e-05, "loss": 0.232, "step": 38365 }, { "epoch": 3.1080686973428384, "grad_norm": 0.0609627366065979, "learning_rate": 4.9552185066834696e-05, "loss": 0.2323, "step": 38366 }, { "epoch": 3.108149708360337, "grad_norm": 0.0612613782286644, "learning_rate": 4.954768441424007e-05, "loss": 0.2169, "step": 38367 }, { "epoch": 3.1082307193778353, "grad_norm": 0.06923031061887741, "learning_rate": 4.954318376164544e-05, "loss": 0.2326, "step": 38368 }, { "epoch": 3.1083117303953336, "grad_norm": 0.06448613852262497, "learning_rate": 4.953868310905082e-05, "loss": 0.2081, "step": 38369 }, { "epoch": 3.1083927414128323, "grad_norm": 0.06773959845304489, "learning_rate": 4.953418245645619e-05, "loss": 0.233, "step": 38370 }, { "epoch": 3.1084737524303305, "grad_norm": 0.06707341223955154, "learning_rate": 4.952968180386156e-05, "loss": 0.1969, "step": 38371 }, { "epoch": 3.1085547634478288, "grad_norm": 0.06954282522201538, "learning_rate": 4.952518115126694e-05, "loss": 0.198, "step": 38372 }, { "epoch": 3.1086357744653275, "grad_norm": 0.06780767440795898, "learning_rate": 4.952068049867231e-05, "loss": 0.2418, "step": 38373 }, { "epoch": 3.1087167854828257, "grad_norm": 0.05618634074926376, "learning_rate": 4.951617984607768e-05, "loss": 0.2001, "step": 38374 }, { "epoch": 3.108797796500324, "grad_norm": 0.05853656679391861, "learning_rate": 4.951167919348306e-05, "loss": 0.2318, "step": 38375 }, { "epoch": 3.108878807517822, "grad_norm": 0.07284107059240341, "learning_rate": 4.950717854088843e-05, "loss": 0.2351, "step": 38376 }, { "epoch": 3.108959818535321, "grad_norm": 0.07166719436645508, "learning_rate": 4.95026778882938e-05, "loss": 0.2497, "step": 38377 }, { "epoch": 3.109040829552819, "grad_norm": 0.09151309728622437, "learning_rate": 4.949817723569918e-05, "loss": 0.1985, "step": 38378 }, { "epoch": 3.1091218405703174, "grad_norm": 0.08460740745067596, "learning_rate": 4.949367658310455e-05, "loss": 0.2366, "step": 38379 }, { "epoch": 3.109202851587816, "grad_norm": 0.08176060765981674, "learning_rate": 4.948917593050993e-05, "loss": 0.2114, "step": 38380 }, { "epoch": 3.1092838626053143, "grad_norm": 0.06409350782632828, "learning_rate": 4.94846752779153e-05, "loss": 0.218, "step": 38381 }, { "epoch": 3.1093648736228126, "grad_norm": 0.0686076357960701, "learning_rate": 4.9480174625320674e-05, "loss": 0.1993, "step": 38382 }, { "epoch": 3.1094458846403112, "grad_norm": 0.07834609597921371, "learning_rate": 4.947567397272605e-05, "loss": 0.2239, "step": 38383 }, { "epoch": 3.1095268956578095, "grad_norm": 0.07187711447477341, "learning_rate": 4.947117332013142e-05, "loss": 0.2386, "step": 38384 }, { "epoch": 3.1096079066753077, "grad_norm": 0.05771234259009361, "learning_rate": 4.9466672667536795e-05, "loss": 0.2127, "step": 38385 }, { "epoch": 3.1096889176928064, "grad_norm": 0.089955173432827, "learning_rate": 4.946217201494217e-05, "loss": 0.2216, "step": 38386 }, { "epoch": 3.1097699287103047, "grad_norm": 0.08648094534873962, "learning_rate": 4.945767136234754e-05, "loss": 0.2784, "step": 38387 }, { "epoch": 3.109850939727803, "grad_norm": 0.06970040500164032, "learning_rate": 4.9453170709752916e-05, "loss": 0.2271, "step": 38388 }, { "epoch": 3.109931950745301, "grad_norm": 0.06475716829299927, "learning_rate": 4.944867005715829e-05, "loss": 0.2032, "step": 38389 }, { "epoch": 3.1100129617628, "grad_norm": 0.07349738478660583, "learning_rate": 4.944416940456366e-05, "loss": 0.2017, "step": 38390 }, { "epoch": 3.110093972780298, "grad_norm": 0.07952949404716492, "learning_rate": 4.9439668751969036e-05, "loss": 0.2741, "step": 38391 }, { "epoch": 3.1101749837977963, "grad_norm": 0.09136699140071869, "learning_rate": 4.943516809937441e-05, "loss": 0.2298, "step": 38392 }, { "epoch": 3.110255994815295, "grad_norm": 0.057764239609241486, "learning_rate": 4.943066744677979e-05, "loss": 0.2019, "step": 38393 }, { "epoch": 3.1103370058327933, "grad_norm": 0.06546784937381744, "learning_rate": 4.942616679418516e-05, "loss": 0.2622, "step": 38394 }, { "epoch": 3.1104180168502915, "grad_norm": 0.0661168321967125, "learning_rate": 4.942166614159053e-05, "loss": 0.2261, "step": 38395 }, { "epoch": 3.11049902786779, "grad_norm": 0.06803473085165024, "learning_rate": 4.941716548899591e-05, "loss": 0.1992, "step": 38396 }, { "epoch": 3.1105800388852884, "grad_norm": 0.09625261276960373, "learning_rate": 4.941266483640128e-05, "loss": 0.261, "step": 38397 }, { "epoch": 3.1106610499027867, "grad_norm": 0.05727479234337807, "learning_rate": 4.940816418380665e-05, "loss": 0.2217, "step": 38398 }, { "epoch": 3.110742060920285, "grad_norm": 0.07928014546632767, "learning_rate": 4.940366353121203e-05, "loss": 0.2105, "step": 38399 }, { "epoch": 3.1108230719377836, "grad_norm": 0.07788312435150146, "learning_rate": 4.93991628786174e-05, "loss": 0.2197, "step": 38400 }, { "epoch": 3.110904082955282, "grad_norm": 0.07708944380283356, "learning_rate": 4.939466222602277e-05, "loss": 0.2597, "step": 38401 }, { "epoch": 3.11098509397278, "grad_norm": 0.06281648576259613, "learning_rate": 4.939016157342815e-05, "loss": 0.2094, "step": 38402 }, { "epoch": 3.111066104990279, "grad_norm": 0.07252870500087738, "learning_rate": 4.938566092083352e-05, "loss": 0.25, "step": 38403 }, { "epoch": 3.111147116007777, "grad_norm": 0.0702609047293663, "learning_rate": 4.938116026823889e-05, "loss": 0.2328, "step": 38404 }, { "epoch": 3.1112281270252753, "grad_norm": 0.06708363443613052, "learning_rate": 4.9376659615644274e-05, "loss": 0.1991, "step": 38405 }, { "epoch": 3.111309138042774, "grad_norm": 0.07520218938589096, "learning_rate": 4.937215896304965e-05, "loss": 0.2102, "step": 38406 }, { "epoch": 3.1113901490602722, "grad_norm": 0.06943734735250473, "learning_rate": 4.9367658310455014e-05, "loss": 0.2571, "step": 38407 }, { "epoch": 3.1114711600777705, "grad_norm": 0.06772951781749725, "learning_rate": 4.9363157657860394e-05, "loss": 0.2482, "step": 38408 }, { "epoch": 3.111552171095269, "grad_norm": 0.06087531894445419, "learning_rate": 4.935865700526577e-05, "loss": 0.249, "step": 38409 }, { "epoch": 3.1116331821127674, "grad_norm": 0.06537861377000809, "learning_rate": 4.9354156352671135e-05, "loss": 0.2136, "step": 38410 }, { "epoch": 3.1117141931302656, "grad_norm": 0.08597944676876068, "learning_rate": 4.9349655700076515e-05, "loss": 0.2539, "step": 38411 }, { "epoch": 3.111795204147764, "grad_norm": 0.07390245795249939, "learning_rate": 4.934515504748189e-05, "loss": 0.2308, "step": 38412 }, { "epoch": 3.1118762151652626, "grad_norm": 0.0940677598118782, "learning_rate": 4.934065439488726e-05, "loss": 0.2728, "step": 38413 }, { "epoch": 3.111957226182761, "grad_norm": 0.06742730736732483, "learning_rate": 4.9336153742292636e-05, "loss": 0.2078, "step": 38414 }, { "epoch": 3.112038237200259, "grad_norm": 0.0731840655207634, "learning_rate": 4.933165308969801e-05, "loss": 0.2605, "step": 38415 }, { "epoch": 3.1121192482177578, "grad_norm": 0.06594830751419067, "learning_rate": 4.932715243710338e-05, "loss": 0.2546, "step": 38416 }, { "epoch": 3.112200259235256, "grad_norm": 0.06969321519136429, "learning_rate": 4.932265178450876e-05, "loss": 0.2551, "step": 38417 }, { "epoch": 3.1122812702527543, "grad_norm": 0.06713997572660446, "learning_rate": 4.931815113191413e-05, "loss": 0.2397, "step": 38418 }, { "epoch": 3.112362281270253, "grad_norm": 0.06411050260066986, "learning_rate": 4.9313650479319504e-05, "loss": 0.2283, "step": 38419 }, { "epoch": 3.112443292287751, "grad_norm": 0.06092459335923195, "learning_rate": 4.930914982672488e-05, "loss": 0.2581, "step": 38420 }, { "epoch": 3.1125243033052494, "grad_norm": 0.05901643633842468, "learning_rate": 4.930464917413025e-05, "loss": 0.2088, "step": 38421 }, { "epoch": 3.1126053143227477, "grad_norm": 0.06839630007743835, "learning_rate": 4.9300148521535625e-05, "loss": 0.2485, "step": 38422 }, { "epoch": 3.1126863253402464, "grad_norm": 0.08143807202577591, "learning_rate": 4.9295647868941e-05, "loss": 0.2186, "step": 38423 }, { "epoch": 3.1127673363577446, "grad_norm": 0.08210988342761993, "learning_rate": 4.929114721634637e-05, "loss": 0.2284, "step": 38424 }, { "epoch": 3.112848347375243, "grad_norm": 0.05726107954978943, "learning_rate": 4.9286646563751746e-05, "loss": 0.2158, "step": 38425 }, { "epoch": 3.1129293583927415, "grad_norm": 0.06085515022277832, "learning_rate": 4.928214591115712e-05, "loss": 0.243, "step": 38426 }, { "epoch": 3.11301036941024, "grad_norm": 0.09158769249916077, "learning_rate": 4.927764525856249e-05, "loss": 0.2211, "step": 38427 }, { "epoch": 3.113091380427738, "grad_norm": 0.08236166834831238, "learning_rate": 4.9273144605967866e-05, "loss": 0.2031, "step": 38428 }, { "epoch": 3.1131723914452367, "grad_norm": 0.06737654656171799, "learning_rate": 4.926864395337324e-05, "loss": 0.238, "step": 38429 }, { "epoch": 3.113253402462735, "grad_norm": 0.06789126247167587, "learning_rate": 4.9264143300778614e-05, "loss": 0.2491, "step": 38430 }, { "epoch": 3.113334413480233, "grad_norm": 0.07835162431001663, "learning_rate": 4.925964264818399e-05, "loss": 0.2356, "step": 38431 }, { "epoch": 3.113415424497732, "grad_norm": 0.060290753841400146, "learning_rate": 4.925514199558936e-05, "loss": 0.2396, "step": 38432 }, { "epoch": 3.11349643551523, "grad_norm": 0.07192988693714142, "learning_rate": 4.9250641342994734e-05, "loss": 0.2081, "step": 38433 }, { "epoch": 3.1135774465327284, "grad_norm": 0.07745447009801865, "learning_rate": 4.924614069040011e-05, "loss": 0.2241, "step": 38434 }, { "epoch": 3.1136584575502266, "grad_norm": 0.09003927558660507, "learning_rate": 4.924164003780549e-05, "loss": 0.2479, "step": 38435 }, { "epoch": 3.1137394685677253, "grad_norm": 0.06698548048734665, "learning_rate": 4.9237139385210855e-05, "loss": 0.232, "step": 38436 }, { "epoch": 3.1138204795852236, "grad_norm": 0.06464772671461105, "learning_rate": 4.923263873261623e-05, "loss": 0.1865, "step": 38437 }, { "epoch": 3.113901490602722, "grad_norm": 0.07534575462341309, "learning_rate": 4.922813808002161e-05, "loss": 0.2207, "step": 38438 }, { "epoch": 3.1139825016202205, "grad_norm": 0.05245482176542282, "learning_rate": 4.9223637427426976e-05, "loss": 0.2074, "step": 38439 }, { "epoch": 3.1140635126377187, "grad_norm": 0.060525473207235336, "learning_rate": 4.921913677483235e-05, "loss": 0.2202, "step": 38440 }, { "epoch": 3.114144523655217, "grad_norm": 0.0742216482758522, "learning_rate": 4.921463612223773e-05, "loss": 0.215, "step": 38441 }, { "epoch": 3.1142255346727157, "grad_norm": 0.07293812930583954, "learning_rate": 4.92101354696431e-05, "loss": 0.2565, "step": 38442 }, { "epoch": 3.114306545690214, "grad_norm": 0.07802662253379822, "learning_rate": 4.920563481704847e-05, "loss": 0.2224, "step": 38443 }, { "epoch": 3.114387556707712, "grad_norm": 0.06432733684778214, "learning_rate": 4.920113416445385e-05, "loss": 0.2281, "step": 38444 }, { "epoch": 3.1144685677252104, "grad_norm": 0.07466696947813034, "learning_rate": 4.919663351185922e-05, "loss": 0.2477, "step": 38445 }, { "epoch": 3.114549578742709, "grad_norm": 0.07490424066781998, "learning_rate": 4.91921328592646e-05, "loss": 0.2081, "step": 38446 }, { "epoch": 3.1146305897602073, "grad_norm": 0.06576748192310333, "learning_rate": 4.918763220666997e-05, "loss": 0.2186, "step": 38447 }, { "epoch": 3.1147116007777056, "grad_norm": 0.05498381704092026, "learning_rate": 4.9183131554075345e-05, "loss": 0.2257, "step": 38448 }, { "epoch": 3.1147926117952043, "grad_norm": 0.07218701392412186, "learning_rate": 4.917863090148072e-05, "loss": 0.2327, "step": 38449 }, { "epoch": 3.1148736228127025, "grad_norm": 0.06278561800718307, "learning_rate": 4.917413024888609e-05, "loss": 0.2252, "step": 38450 }, { "epoch": 3.1149546338302008, "grad_norm": 0.0699433833360672, "learning_rate": 4.9169629596291466e-05, "loss": 0.2236, "step": 38451 }, { "epoch": 3.1150356448476995, "grad_norm": 0.07142642885446548, "learning_rate": 4.916512894369684e-05, "loss": 0.2115, "step": 38452 }, { "epoch": 3.1151166558651977, "grad_norm": 0.07643146812915802, "learning_rate": 4.916062829110221e-05, "loss": 0.2068, "step": 38453 }, { "epoch": 3.115197666882696, "grad_norm": 0.06402745842933655, "learning_rate": 4.915612763850759e-05, "loss": 0.2126, "step": 38454 }, { "epoch": 3.1152786779001946, "grad_norm": 0.09096885472536087, "learning_rate": 4.915162698591296e-05, "loss": 0.2691, "step": 38455 }, { "epoch": 3.115359688917693, "grad_norm": 0.07322259992361069, "learning_rate": 4.9147126333318334e-05, "loss": 0.2385, "step": 38456 }, { "epoch": 3.115440699935191, "grad_norm": 0.07319648563861847, "learning_rate": 4.914262568072371e-05, "loss": 0.2517, "step": 38457 }, { "epoch": 3.1155217109526894, "grad_norm": 0.07214269042015076, "learning_rate": 4.913812502812908e-05, "loss": 0.2342, "step": 38458 }, { "epoch": 3.115602721970188, "grad_norm": 0.06724945455789566, "learning_rate": 4.9133624375534455e-05, "loss": 0.2317, "step": 38459 }, { "epoch": 3.1156837329876863, "grad_norm": 0.10171977430582047, "learning_rate": 4.912912372293983e-05, "loss": 0.2638, "step": 38460 }, { "epoch": 3.1157647440051845, "grad_norm": 0.08508376777172089, "learning_rate": 4.91246230703452e-05, "loss": 0.2443, "step": 38461 }, { "epoch": 3.1158457550226832, "grad_norm": 0.061026692390441895, "learning_rate": 4.9120122417750576e-05, "loss": 0.2006, "step": 38462 }, { "epoch": 3.1159267660401815, "grad_norm": 0.08301940560340881, "learning_rate": 4.911562176515595e-05, "loss": 0.2538, "step": 38463 }, { "epoch": 3.1160077770576797, "grad_norm": 0.07607529312372208, "learning_rate": 4.911112111256132e-05, "loss": 0.2288, "step": 38464 }, { "epoch": 3.1160887880751784, "grad_norm": 0.07203084230422974, "learning_rate": 4.9106620459966697e-05, "loss": 0.2263, "step": 38465 }, { "epoch": 3.1161697990926767, "grad_norm": 0.0560927577316761, "learning_rate": 4.910211980737207e-05, "loss": 0.2014, "step": 38466 }, { "epoch": 3.116250810110175, "grad_norm": 0.0738847628235817, "learning_rate": 4.9097619154777444e-05, "loss": 0.2086, "step": 38467 }, { "epoch": 3.116331821127673, "grad_norm": 0.06414270401000977, "learning_rate": 4.909311850218282e-05, "loss": 0.2061, "step": 38468 }, { "epoch": 3.116412832145172, "grad_norm": 0.06747682392597198, "learning_rate": 4.908861784958819e-05, "loss": 0.245, "step": 38469 }, { "epoch": 3.11649384316267, "grad_norm": 0.05635647103190422, "learning_rate": 4.9084117196993565e-05, "loss": 0.2061, "step": 38470 }, { "epoch": 3.1165748541801683, "grad_norm": 0.07150799036026001, "learning_rate": 4.907961654439894e-05, "loss": 0.2055, "step": 38471 }, { "epoch": 3.116655865197667, "grad_norm": 0.05772681534290314, "learning_rate": 4.907511589180431e-05, "loss": 0.2158, "step": 38472 }, { "epoch": 3.1167368762151653, "grad_norm": 0.05746937170624733, "learning_rate": 4.9070615239209685e-05, "loss": 0.2113, "step": 38473 }, { "epoch": 3.1168178872326635, "grad_norm": 0.06963689625263214, "learning_rate": 4.9066114586615066e-05, "loss": 0.2076, "step": 38474 }, { "epoch": 3.116898898250162, "grad_norm": 0.06944216787815094, "learning_rate": 4.906161393402043e-05, "loss": 0.2228, "step": 38475 }, { "epoch": 3.1169799092676604, "grad_norm": 0.0683133527636528, "learning_rate": 4.9057113281425806e-05, "loss": 0.2156, "step": 38476 }, { "epoch": 3.1170609202851587, "grad_norm": 0.0697324275970459, "learning_rate": 4.9052612628831187e-05, "loss": 0.2047, "step": 38477 }, { "epoch": 3.1171419313026574, "grad_norm": 0.08170359581708908, "learning_rate": 4.904811197623655e-05, "loss": 0.2434, "step": 38478 }, { "epoch": 3.1172229423201556, "grad_norm": 0.07048113644123077, "learning_rate": 4.904361132364193e-05, "loss": 0.2319, "step": 38479 }, { "epoch": 3.117303953337654, "grad_norm": 0.0791177749633789, "learning_rate": 4.903911067104731e-05, "loss": 0.2627, "step": 38480 }, { "epoch": 3.117384964355152, "grad_norm": 0.06898507475852966, "learning_rate": 4.9034610018452674e-05, "loss": 0.2163, "step": 38481 }, { "epoch": 3.117465975372651, "grad_norm": 0.07705669850111008, "learning_rate": 4.9030109365858055e-05, "loss": 0.2502, "step": 38482 }, { "epoch": 3.117546986390149, "grad_norm": 0.07984774559736252, "learning_rate": 4.902560871326343e-05, "loss": 0.2375, "step": 38483 }, { "epoch": 3.1176279974076473, "grad_norm": 0.07025951147079468, "learning_rate": 4.9021108060668795e-05, "loss": 0.2279, "step": 38484 }, { "epoch": 3.117709008425146, "grad_norm": 0.061531197279691696, "learning_rate": 4.9016607408074175e-05, "loss": 0.1944, "step": 38485 }, { "epoch": 3.1177900194426442, "grad_norm": 0.061712365597486496, "learning_rate": 4.901210675547955e-05, "loss": 0.2538, "step": 38486 }, { "epoch": 3.1178710304601425, "grad_norm": 0.0870019868016243, "learning_rate": 4.900760610288492e-05, "loss": 0.2018, "step": 38487 }, { "epoch": 3.117952041477641, "grad_norm": 0.0668628066778183, "learning_rate": 4.9003105450290296e-05, "loss": 0.2187, "step": 38488 }, { "epoch": 3.1180330524951394, "grad_norm": 0.06232215464115143, "learning_rate": 4.899860479769567e-05, "loss": 0.3, "step": 38489 }, { "epoch": 3.1181140635126376, "grad_norm": 0.059925880283117294, "learning_rate": 4.8994104145101043e-05, "loss": 0.2246, "step": 38490 }, { "epoch": 3.118195074530136, "grad_norm": 0.07962461560964584, "learning_rate": 4.898960349250642e-05, "loss": 0.2847, "step": 38491 }, { "epoch": 3.1182760855476346, "grad_norm": 0.0759328305721283, "learning_rate": 4.898510283991179e-05, "loss": 0.2065, "step": 38492 }, { "epoch": 3.118357096565133, "grad_norm": 0.06727610528469086, "learning_rate": 4.8980602187317164e-05, "loss": 0.1977, "step": 38493 }, { "epoch": 3.118438107582631, "grad_norm": 0.06177781522274017, "learning_rate": 4.897610153472254e-05, "loss": 0.2149, "step": 38494 }, { "epoch": 3.1185191186001298, "grad_norm": 0.0664069876074791, "learning_rate": 4.897160088212791e-05, "loss": 0.2184, "step": 38495 }, { "epoch": 3.118600129617628, "grad_norm": 0.07583729922771454, "learning_rate": 4.8967100229533285e-05, "loss": 0.2504, "step": 38496 }, { "epoch": 3.1186811406351262, "grad_norm": 0.062377750873565674, "learning_rate": 4.896259957693866e-05, "loss": 0.1987, "step": 38497 }, { "epoch": 3.118762151652625, "grad_norm": 0.07547123730182648, "learning_rate": 4.895809892434403e-05, "loss": 0.2361, "step": 38498 }, { "epoch": 3.118843162670123, "grad_norm": 0.08136604726314545, "learning_rate": 4.8953598271749406e-05, "loss": 0.2347, "step": 38499 }, { "epoch": 3.1189241736876214, "grad_norm": 0.0768231749534607, "learning_rate": 4.894909761915478e-05, "loss": 0.2109, "step": 38500 }, { "epoch": 3.11900518470512, "grad_norm": 0.07560477405786514, "learning_rate": 4.894459696656015e-05, "loss": 0.2574, "step": 38501 }, { "epoch": 3.1190861957226184, "grad_norm": 0.07469621300697327, "learning_rate": 4.894009631396553e-05, "loss": 0.2551, "step": 38502 }, { "epoch": 3.1191672067401166, "grad_norm": 0.06510797888040543, "learning_rate": 4.89355956613709e-05, "loss": 0.2017, "step": 38503 }, { "epoch": 3.119248217757615, "grad_norm": 0.07766696065664291, "learning_rate": 4.8931095008776274e-05, "loss": 0.2066, "step": 38504 }, { "epoch": 3.1193292287751135, "grad_norm": 0.07333401590585709, "learning_rate": 4.892659435618165e-05, "loss": 0.2208, "step": 38505 }, { "epoch": 3.119410239792612, "grad_norm": 0.061164095997810364, "learning_rate": 4.892209370358702e-05, "loss": 0.2093, "step": 38506 }, { "epoch": 3.11949125081011, "grad_norm": 0.08092789351940155, "learning_rate": 4.8917593050992395e-05, "loss": 0.2249, "step": 38507 }, { "epoch": 3.1195722618276087, "grad_norm": 0.05781802162528038, "learning_rate": 4.891309239839777e-05, "loss": 0.2036, "step": 38508 }, { "epoch": 3.119653272845107, "grad_norm": 0.0585942342877388, "learning_rate": 4.890859174580314e-05, "loss": 0.1811, "step": 38509 }, { "epoch": 3.119734283862605, "grad_norm": 0.07196545600891113, "learning_rate": 4.8904091093208515e-05, "loss": 0.2354, "step": 38510 }, { "epoch": 3.119815294880104, "grad_norm": 0.08211824297904968, "learning_rate": 4.889959044061389e-05, "loss": 0.2284, "step": 38511 }, { "epoch": 3.119896305897602, "grad_norm": 0.07880119234323502, "learning_rate": 4.889508978801926e-05, "loss": 0.2553, "step": 38512 }, { "epoch": 3.1199773169151004, "grad_norm": 0.10465053468942642, "learning_rate": 4.889058913542464e-05, "loss": 0.2787, "step": 38513 }, { "epoch": 3.1200583279325986, "grad_norm": 0.06549686938524246, "learning_rate": 4.888608848283001e-05, "loss": 0.2457, "step": 38514 }, { "epoch": 3.1201393389500973, "grad_norm": 0.07236936688423157, "learning_rate": 4.888158783023539e-05, "loss": 0.2581, "step": 38515 }, { "epoch": 3.1202203499675956, "grad_norm": 0.059712644666433334, "learning_rate": 4.8877087177640764e-05, "loss": 0.2089, "step": 38516 }, { "epoch": 3.120301360985094, "grad_norm": 0.06971476227045059, "learning_rate": 4.887258652504613e-05, "loss": 0.2512, "step": 38517 }, { "epoch": 3.1203823720025925, "grad_norm": 0.06128533557057381, "learning_rate": 4.886808587245151e-05, "loss": 0.2296, "step": 38518 }, { "epoch": 3.1204633830200907, "grad_norm": 0.07567324489355087, "learning_rate": 4.8863585219856885e-05, "loss": 0.2501, "step": 38519 }, { "epoch": 3.120544394037589, "grad_norm": 0.06608163565397263, "learning_rate": 4.885908456726225e-05, "loss": 0.2608, "step": 38520 }, { "epoch": 3.1206254050550877, "grad_norm": 0.06862304359674454, "learning_rate": 4.885458391466763e-05, "loss": 0.2373, "step": 38521 }, { "epoch": 3.120706416072586, "grad_norm": 0.06940841674804688, "learning_rate": 4.8850083262073005e-05, "loss": 0.2527, "step": 38522 }, { "epoch": 3.120787427090084, "grad_norm": 0.07779053598642349, "learning_rate": 4.884558260947837e-05, "loss": 0.2374, "step": 38523 }, { "epoch": 3.120868438107583, "grad_norm": 0.07671964168548584, "learning_rate": 4.884108195688375e-05, "loss": 0.2201, "step": 38524 }, { "epoch": 3.120949449125081, "grad_norm": 0.057076919823884964, "learning_rate": 4.8836581304289126e-05, "loss": 0.2252, "step": 38525 }, { "epoch": 3.1210304601425793, "grad_norm": 0.08262202888727188, "learning_rate": 4.88320806516945e-05, "loss": 0.2759, "step": 38526 }, { "epoch": 3.1211114711600776, "grad_norm": 0.07039379328489304, "learning_rate": 4.8827579999099873e-05, "loss": 0.2373, "step": 38527 }, { "epoch": 3.1211924821775763, "grad_norm": 0.0531785786151886, "learning_rate": 4.882307934650525e-05, "loss": 0.1895, "step": 38528 }, { "epoch": 3.1212734931950745, "grad_norm": 0.08400041610002518, "learning_rate": 4.881857869391062e-05, "loss": 0.2174, "step": 38529 }, { "epoch": 3.1213545042125728, "grad_norm": 0.1000119000673294, "learning_rate": 4.8814078041315994e-05, "loss": 0.2544, "step": 38530 }, { "epoch": 3.1214355152300715, "grad_norm": 0.08154396712779999, "learning_rate": 4.880957738872137e-05, "loss": 0.226, "step": 38531 }, { "epoch": 3.1215165262475697, "grad_norm": 0.06984757632017136, "learning_rate": 4.880507673612674e-05, "loss": 0.2445, "step": 38532 }, { "epoch": 3.121597537265068, "grad_norm": 0.06032893434166908, "learning_rate": 4.8800576083532115e-05, "loss": 0.2255, "step": 38533 }, { "epoch": 3.1216785482825666, "grad_norm": 0.06319891661405563, "learning_rate": 4.879607543093749e-05, "loss": 0.2381, "step": 38534 }, { "epoch": 3.121759559300065, "grad_norm": 0.06383655965328217, "learning_rate": 4.879157477834286e-05, "loss": 0.2084, "step": 38535 }, { "epoch": 3.121840570317563, "grad_norm": 0.06865326315164566, "learning_rate": 4.8787074125748236e-05, "loss": 0.2131, "step": 38536 }, { "epoch": 3.1219215813350614, "grad_norm": 0.08230190724134445, "learning_rate": 4.878257347315361e-05, "loss": 0.2205, "step": 38537 }, { "epoch": 3.12200259235256, "grad_norm": 0.075211301445961, "learning_rate": 4.877807282055898e-05, "loss": 0.2094, "step": 38538 }, { "epoch": 3.1220836033700583, "grad_norm": 0.06502600014209747, "learning_rate": 4.877357216796436e-05, "loss": 0.2047, "step": 38539 }, { "epoch": 3.1221646143875565, "grad_norm": 0.06465815007686615, "learning_rate": 4.876907151536973e-05, "loss": 0.233, "step": 38540 }, { "epoch": 3.1222456254050552, "grad_norm": 0.061203133314847946, "learning_rate": 4.8764570862775104e-05, "loss": 0.2426, "step": 38541 }, { "epoch": 3.1223266364225535, "grad_norm": 0.061528146266937256, "learning_rate": 4.876007021018048e-05, "loss": 0.2538, "step": 38542 }, { "epoch": 3.1224076474400517, "grad_norm": 0.06926919519901276, "learning_rate": 4.875556955758585e-05, "loss": 0.2114, "step": 38543 }, { "epoch": 3.1224886584575504, "grad_norm": 0.07080454379320145, "learning_rate": 4.8751068904991225e-05, "loss": 0.2522, "step": 38544 }, { "epoch": 3.1225696694750487, "grad_norm": 0.07412424683570862, "learning_rate": 4.87465682523966e-05, "loss": 0.2099, "step": 38545 }, { "epoch": 3.122650680492547, "grad_norm": 0.06699513643980026, "learning_rate": 4.874206759980197e-05, "loss": 0.228, "step": 38546 }, { "epoch": 3.1227316915100456, "grad_norm": 0.08302906900644302, "learning_rate": 4.8737566947207346e-05, "loss": 0.2132, "step": 38547 }, { "epoch": 3.122812702527544, "grad_norm": 0.0748620480298996, "learning_rate": 4.873306629461272e-05, "loss": 0.296, "step": 38548 }, { "epoch": 3.122893713545042, "grad_norm": 0.06947177648544312, "learning_rate": 4.872856564201809e-05, "loss": 0.2439, "step": 38549 }, { "epoch": 3.1229747245625403, "grad_norm": 0.07392599433660507, "learning_rate": 4.8724064989423466e-05, "loss": 0.2444, "step": 38550 }, { "epoch": 3.123055735580039, "grad_norm": 0.08208917826414108, "learning_rate": 4.871956433682885e-05, "loss": 0.2224, "step": 38551 }, { "epoch": 3.1231367465975373, "grad_norm": 0.07460086792707443, "learning_rate": 4.871506368423422e-05, "loss": 0.2205, "step": 38552 }, { "epoch": 3.1232177576150355, "grad_norm": 0.08083727955818176, "learning_rate": 4.871056303163959e-05, "loss": 0.2517, "step": 38553 }, { "epoch": 3.123298768632534, "grad_norm": 0.07795874029397964, "learning_rate": 4.870606237904497e-05, "loss": 0.2164, "step": 38554 }, { "epoch": 3.1233797796500324, "grad_norm": 0.07984881848096848, "learning_rate": 4.870156172645034e-05, "loss": 0.2731, "step": 38555 }, { "epoch": 3.1234607906675307, "grad_norm": 0.07941204309463501, "learning_rate": 4.869706107385571e-05, "loss": 0.2455, "step": 38556 }, { "epoch": 3.1235418016850294, "grad_norm": 0.07575903832912445, "learning_rate": 4.869256042126109e-05, "loss": 0.2325, "step": 38557 }, { "epoch": 3.1236228127025276, "grad_norm": 0.07499007135629654, "learning_rate": 4.868805976866646e-05, "loss": 0.2502, "step": 38558 }, { "epoch": 3.123703823720026, "grad_norm": 0.07867011427879333, "learning_rate": 4.868355911607183e-05, "loss": 0.2587, "step": 38559 }, { "epoch": 3.123784834737524, "grad_norm": 0.0736221894621849, "learning_rate": 4.867905846347721e-05, "loss": 0.2328, "step": 38560 }, { "epoch": 3.123865845755023, "grad_norm": 0.08023568987846375, "learning_rate": 4.867455781088258e-05, "loss": 0.2471, "step": 38561 }, { "epoch": 3.123946856772521, "grad_norm": 0.08737244457006454, "learning_rate": 4.867005715828795e-05, "loss": 0.2694, "step": 38562 }, { "epoch": 3.1240278677900193, "grad_norm": 0.06269481778144836, "learning_rate": 4.866555650569333e-05, "loss": 0.2211, "step": 38563 }, { "epoch": 3.124108878807518, "grad_norm": 0.07235285639762878, "learning_rate": 4.8661055853098704e-05, "loss": 0.2377, "step": 38564 }, { "epoch": 3.124189889825016, "grad_norm": 0.06457497179508209, "learning_rate": 4.865655520050408e-05, "loss": 0.1976, "step": 38565 }, { "epoch": 3.1242709008425145, "grad_norm": 0.06922008842229843, "learning_rate": 4.865205454790945e-05, "loss": 0.2261, "step": 38566 }, { "epoch": 3.124351911860013, "grad_norm": 0.05870746448636055, "learning_rate": 4.8647553895314824e-05, "loss": 0.2185, "step": 38567 }, { "epoch": 3.1244329228775114, "grad_norm": 0.07148100435733795, "learning_rate": 4.86430532427202e-05, "loss": 0.2328, "step": 38568 }, { "epoch": 3.1245139338950096, "grad_norm": 0.07290808856487274, "learning_rate": 4.863855259012557e-05, "loss": 0.2128, "step": 38569 }, { "epoch": 3.124594944912508, "grad_norm": 0.08129823952913284, "learning_rate": 4.8634051937530945e-05, "loss": 0.2205, "step": 38570 }, { "epoch": 3.1246759559300066, "grad_norm": 0.08137766271829605, "learning_rate": 4.862955128493632e-05, "loss": 0.2472, "step": 38571 }, { "epoch": 3.124756966947505, "grad_norm": 0.08388211578130722, "learning_rate": 4.862505063234169e-05, "loss": 0.2983, "step": 38572 }, { "epoch": 3.124837977965003, "grad_norm": 0.07012079656124115, "learning_rate": 4.8620549979747066e-05, "loss": 0.2072, "step": 38573 }, { "epoch": 3.1249189889825018, "grad_norm": 0.08775711804628372, "learning_rate": 4.861604932715244e-05, "loss": 0.2144, "step": 38574 }, { "epoch": 3.125, "grad_norm": 0.07253850251436234, "learning_rate": 4.861154867455781e-05, "loss": 0.2097, "step": 38575 }, { "epoch": 3.1250810110174982, "grad_norm": 0.06796213984489441, "learning_rate": 4.860704802196319e-05, "loss": 0.2175, "step": 38576 }, { "epoch": 3.125162022034997, "grad_norm": 0.07270730286836624, "learning_rate": 4.860254736936856e-05, "loss": 0.2226, "step": 38577 }, { "epoch": 3.125243033052495, "grad_norm": 0.056501347571611404, "learning_rate": 4.8598046716773934e-05, "loss": 0.2236, "step": 38578 }, { "epoch": 3.1253240440699934, "grad_norm": 0.06658945977687836, "learning_rate": 4.859354606417931e-05, "loss": 0.2512, "step": 38579 }, { "epoch": 3.1254050550874917, "grad_norm": 0.07839953899383545, "learning_rate": 4.858904541158468e-05, "loss": 0.202, "step": 38580 }, { "epoch": 3.1254860661049904, "grad_norm": 0.07932569831609726, "learning_rate": 4.8584544758990055e-05, "loss": 0.2634, "step": 38581 }, { "epoch": 3.1255670771224886, "grad_norm": 0.06872804462909698, "learning_rate": 4.858004410639543e-05, "loss": 0.3022, "step": 38582 }, { "epoch": 3.125648088139987, "grad_norm": 0.06276540458202362, "learning_rate": 4.85755434538008e-05, "loss": 0.2254, "step": 38583 }, { "epoch": 3.1257290991574855, "grad_norm": 0.0721246674656868, "learning_rate": 4.857104280120618e-05, "loss": 0.1924, "step": 38584 }, { "epoch": 3.125810110174984, "grad_norm": 0.07592836022377014, "learning_rate": 4.856654214861155e-05, "loss": 0.2381, "step": 38585 }, { "epoch": 3.125891121192482, "grad_norm": 0.06872162222862244, "learning_rate": 4.856204149601692e-05, "loss": 0.2617, "step": 38586 }, { "epoch": 3.1259721322099807, "grad_norm": 0.07416519522666931, "learning_rate": 4.85575408434223e-05, "loss": 0.2221, "step": 38587 }, { "epoch": 3.126053143227479, "grad_norm": 0.07375083118677139, "learning_rate": 4.855304019082767e-05, "loss": 0.2604, "step": 38588 }, { "epoch": 3.126134154244977, "grad_norm": 0.07078424096107483, "learning_rate": 4.8548539538233044e-05, "loss": 0.2533, "step": 38589 }, { "epoch": 3.126215165262476, "grad_norm": 0.05443970113992691, "learning_rate": 4.8544038885638424e-05, "loss": 0.2002, "step": 38590 }, { "epoch": 3.126296176279974, "grad_norm": 0.09065693616867065, "learning_rate": 4.85395382330438e-05, "loss": 0.2347, "step": 38591 }, { "epoch": 3.1263771872974724, "grad_norm": 0.07778283953666687, "learning_rate": 4.8535037580449164e-05, "loss": 0.2068, "step": 38592 }, { "epoch": 3.126458198314971, "grad_norm": 0.07738600671291351, "learning_rate": 4.8530536927854545e-05, "loss": 0.2145, "step": 38593 }, { "epoch": 3.1265392093324693, "grad_norm": 0.0660366341471672, "learning_rate": 4.852603627525992e-05, "loss": 0.1892, "step": 38594 }, { "epoch": 3.1266202203499676, "grad_norm": 0.07506325840950012, "learning_rate": 4.8521535622665285e-05, "loss": 0.2487, "step": 38595 }, { "epoch": 3.126701231367466, "grad_norm": 0.0669327825307846, "learning_rate": 4.8517034970070666e-05, "loss": 0.2237, "step": 38596 }, { "epoch": 3.1267822423849645, "grad_norm": 0.06828659027814865, "learning_rate": 4.851253431747604e-05, "loss": 0.2254, "step": 38597 }, { "epoch": 3.1268632534024627, "grad_norm": 0.06882147490978241, "learning_rate": 4.8508033664881406e-05, "loss": 0.2626, "step": 38598 }, { "epoch": 3.126944264419961, "grad_norm": 0.0750584602355957, "learning_rate": 4.8503533012286786e-05, "loss": 0.207, "step": 38599 }, { "epoch": 3.1270252754374597, "grad_norm": 0.0676826611161232, "learning_rate": 4.849903235969216e-05, "loss": 0.2209, "step": 38600 }, { "epoch": 3.127106286454958, "grad_norm": 0.0683126300573349, "learning_rate": 4.849453170709753e-05, "loss": 0.2315, "step": 38601 }, { "epoch": 3.127187297472456, "grad_norm": 0.06574027985334396, "learning_rate": 4.849003105450291e-05, "loss": 0.207, "step": 38602 }, { "epoch": 3.1272683084899544, "grad_norm": 0.06940241903066635, "learning_rate": 4.848553040190828e-05, "loss": 0.2542, "step": 38603 }, { "epoch": 3.127349319507453, "grad_norm": 0.07252786308526993, "learning_rate": 4.848102974931365e-05, "loss": 0.2263, "step": 38604 }, { "epoch": 3.1274303305249513, "grad_norm": 0.06789885461330414, "learning_rate": 4.847652909671903e-05, "loss": 0.2128, "step": 38605 }, { "epoch": 3.1275113415424496, "grad_norm": 0.0669901892542839, "learning_rate": 4.84720284441244e-05, "loss": 0.2043, "step": 38606 }, { "epoch": 3.1275923525599483, "grad_norm": 0.07184362411499023, "learning_rate": 4.8467527791529775e-05, "loss": 0.2282, "step": 38607 }, { "epoch": 3.1276733635774465, "grad_norm": 0.07352180778980255, "learning_rate": 4.846302713893515e-05, "loss": 0.2281, "step": 38608 }, { "epoch": 3.1277543745949448, "grad_norm": 0.06735699623823166, "learning_rate": 4.845852648634052e-05, "loss": 0.2241, "step": 38609 }, { "epoch": 3.1278353856124435, "grad_norm": 0.0694347694516182, "learning_rate": 4.8454025833745896e-05, "loss": 0.2592, "step": 38610 }, { "epoch": 3.1279163966299417, "grad_norm": 0.061657343059778214, "learning_rate": 4.844952518115127e-05, "loss": 0.2214, "step": 38611 }, { "epoch": 3.12799740764744, "grad_norm": 0.06862164288759232, "learning_rate": 4.844502452855664e-05, "loss": 0.2231, "step": 38612 }, { "epoch": 3.1280784186649386, "grad_norm": 0.07311592251062393, "learning_rate": 4.844052387596202e-05, "loss": 0.2551, "step": 38613 }, { "epoch": 3.128159429682437, "grad_norm": 0.06936139613389969, "learning_rate": 4.843602322336739e-05, "loss": 0.2193, "step": 38614 }, { "epoch": 3.128240440699935, "grad_norm": 0.07125812023878098, "learning_rate": 4.8431522570772764e-05, "loss": 0.2687, "step": 38615 }, { "epoch": 3.1283214517174334, "grad_norm": 0.07158618420362473, "learning_rate": 4.842702191817814e-05, "loss": 0.2352, "step": 38616 }, { "epoch": 3.128402462734932, "grad_norm": 0.06227901577949524, "learning_rate": 4.842252126558351e-05, "loss": 0.2616, "step": 38617 }, { "epoch": 3.1284834737524303, "grad_norm": 0.05311376973986626, "learning_rate": 4.8418020612988885e-05, "loss": 0.224, "step": 38618 }, { "epoch": 3.1285644847699285, "grad_norm": 0.08298182487487793, "learning_rate": 4.841351996039426e-05, "loss": 0.2638, "step": 38619 }, { "epoch": 3.1286454957874272, "grad_norm": 0.06601186096668243, "learning_rate": 4.840901930779964e-05, "loss": 0.2168, "step": 38620 }, { "epoch": 3.1287265068049255, "grad_norm": 0.06901419907808304, "learning_rate": 4.8404518655205006e-05, "loss": 0.2298, "step": 38621 }, { "epoch": 3.1288075178224237, "grad_norm": 0.09086368978023529, "learning_rate": 4.840001800261038e-05, "loss": 0.2565, "step": 38622 }, { "epoch": 3.1288885288399224, "grad_norm": 0.07500910013914108, "learning_rate": 4.839551735001576e-05, "loss": 0.2411, "step": 38623 }, { "epoch": 3.1289695398574207, "grad_norm": 0.06747693568468094, "learning_rate": 4.8391016697421127e-05, "loss": 0.2275, "step": 38624 }, { "epoch": 3.129050550874919, "grad_norm": 0.08328603953123093, "learning_rate": 4.83865160448265e-05, "loss": 0.2068, "step": 38625 }, { "epoch": 3.129131561892417, "grad_norm": 0.06553267687559128, "learning_rate": 4.838201539223188e-05, "loss": 0.1888, "step": 38626 }, { "epoch": 3.129212572909916, "grad_norm": 0.06869760900735855, "learning_rate": 4.837751473963725e-05, "loss": 0.2409, "step": 38627 }, { "epoch": 3.129293583927414, "grad_norm": 0.06937336176633835, "learning_rate": 4.837301408704262e-05, "loss": 0.2065, "step": 38628 }, { "epoch": 3.1293745949449123, "grad_norm": 0.09511666744947433, "learning_rate": 4.8368513434448e-05, "loss": 0.2427, "step": 38629 }, { "epoch": 3.129455605962411, "grad_norm": 0.06920617818832397, "learning_rate": 4.836401278185337e-05, "loss": 0.2335, "step": 38630 }, { "epoch": 3.1295366169799093, "grad_norm": 0.07520420104265213, "learning_rate": 4.835951212925874e-05, "loss": 0.2635, "step": 38631 }, { "epoch": 3.1296176279974075, "grad_norm": 0.06603451073169708, "learning_rate": 4.835501147666412e-05, "loss": 0.2203, "step": 38632 }, { "epoch": 3.129698639014906, "grad_norm": 0.07562818378210068, "learning_rate": 4.8350510824069496e-05, "loss": 0.2433, "step": 38633 }, { "epoch": 3.1297796500324044, "grad_norm": 0.0713423416018486, "learning_rate": 4.834601017147486e-05, "loss": 0.1939, "step": 38634 }, { "epoch": 3.1298606610499027, "grad_norm": 0.07066536694765091, "learning_rate": 4.834150951888024e-05, "loss": 0.2573, "step": 38635 }, { "epoch": 3.1299416720674014, "grad_norm": 0.07903990894556046, "learning_rate": 4.8337008866285617e-05, "loss": 0.2318, "step": 38636 }, { "epoch": 3.1300226830848996, "grad_norm": 0.07177235186100006, "learning_rate": 4.8332508213690983e-05, "loss": 0.2103, "step": 38637 }, { "epoch": 3.130103694102398, "grad_norm": 0.07718789577484131, "learning_rate": 4.8328007561096364e-05, "loss": 0.2302, "step": 38638 }, { "epoch": 3.130184705119896, "grad_norm": 0.08384337276220322, "learning_rate": 4.832350690850174e-05, "loss": 0.2397, "step": 38639 }, { "epoch": 3.130265716137395, "grad_norm": 0.07833084464073181, "learning_rate": 4.8319006255907104e-05, "loss": 0.2246, "step": 38640 }, { "epoch": 3.130346727154893, "grad_norm": 0.06530147045850754, "learning_rate": 4.8314505603312485e-05, "loss": 0.2187, "step": 38641 }, { "epoch": 3.1304277381723913, "grad_norm": 0.08604307472705841, "learning_rate": 4.831000495071786e-05, "loss": 0.2247, "step": 38642 }, { "epoch": 3.13050874918989, "grad_norm": 0.07353553175926208, "learning_rate": 4.8305504298123225e-05, "loss": 0.2109, "step": 38643 }, { "epoch": 3.130589760207388, "grad_norm": 0.06441865861415863, "learning_rate": 4.8301003645528605e-05, "loss": 0.2424, "step": 38644 }, { "epoch": 3.1306707712248865, "grad_norm": 0.06363363564014435, "learning_rate": 4.829650299293398e-05, "loss": 0.2517, "step": 38645 }, { "epoch": 3.130751782242385, "grad_norm": 0.06344431638717651, "learning_rate": 4.829200234033935e-05, "loss": 0.245, "step": 38646 }, { "epoch": 3.1308327932598834, "grad_norm": 0.06573443114757538, "learning_rate": 4.8287501687744726e-05, "loss": 0.2111, "step": 38647 }, { "epoch": 3.1309138042773816, "grad_norm": 0.06902682781219482, "learning_rate": 4.82830010351501e-05, "loss": 0.2164, "step": 38648 }, { "epoch": 3.13099481529488, "grad_norm": 0.06718235462903976, "learning_rate": 4.8278500382555473e-05, "loss": 0.2159, "step": 38649 }, { "epoch": 3.1310758263123786, "grad_norm": 0.07870693504810333, "learning_rate": 4.827399972996085e-05, "loss": 0.225, "step": 38650 }, { "epoch": 3.131156837329877, "grad_norm": 0.05864543467760086, "learning_rate": 4.826949907736622e-05, "loss": 0.2387, "step": 38651 }, { "epoch": 3.131237848347375, "grad_norm": 0.07667594403028488, "learning_rate": 4.8264998424771594e-05, "loss": 0.2183, "step": 38652 }, { "epoch": 3.1313188593648738, "grad_norm": 0.07851625978946686, "learning_rate": 4.826049777217697e-05, "loss": 0.2487, "step": 38653 }, { "epoch": 3.131399870382372, "grad_norm": 0.06867916136980057, "learning_rate": 4.825599711958234e-05, "loss": 0.2108, "step": 38654 }, { "epoch": 3.1314808813998702, "grad_norm": 0.08466805517673492, "learning_rate": 4.8251496466987715e-05, "loss": 0.2552, "step": 38655 }, { "epoch": 3.131561892417369, "grad_norm": 0.07399707287549973, "learning_rate": 4.824699581439309e-05, "loss": 0.2248, "step": 38656 }, { "epoch": 3.131642903434867, "grad_norm": 0.059375181794166565, "learning_rate": 4.824249516179846e-05, "loss": 0.2477, "step": 38657 }, { "epoch": 3.1317239144523654, "grad_norm": 0.0669362023472786, "learning_rate": 4.8237994509203836e-05, "loss": 0.2444, "step": 38658 }, { "epoch": 3.131804925469864, "grad_norm": 0.06974013149738312, "learning_rate": 4.8233493856609216e-05, "loss": 0.2411, "step": 38659 }, { "epoch": 3.1318859364873624, "grad_norm": 0.07815289497375488, "learning_rate": 4.822899320401458e-05, "loss": 0.237, "step": 38660 }, { "epoch": 3.1319669475048606, "grad_norm": 0.06880567967891693, "learning_rate": 4.822449255141996e-05, "loss": 0.214, "step": 38661 }, { "epoch": 3.132047958522359, "grad_norm": 0.0771849974989891, "learning_rate": 4.821999189882534e-05, "loss": 0.2473, "step": 38662 }, { "epoch": 3.1321289695398575, "grad_norm": 0.057830676436424255, "learning_rate": 4.8215491246230704e-05, "loss": 0.229, "step": 38663 }, { "epoch": 3.1322099805573558, "grad_norm": 0.09336844086647034, "learning_rate": 4.821099059363608e-05, "loss": 0.2644, "step": 38664 }, { "epoch": 3.132290991574854, "grad_norm": 0.06886488199234009, "learning_rate": 4.820648994104146e-05, "loss": 0.2195, "step": 38665 }, { "epoch": 3.1323720025923527, "grad_norm": 0.06183936074376106, "learning_rate": 4.8201989288446825e-05, "loss": 0.2531, "step": 38666 }, { "epoch": 3.132453013609851, "grad_norm": 0.07469535619020462, "learning_rate": 4.81974886358522e-05, "loss": 0.2538, "step": 38667 }, { "epoch": 3.132534024627349, "grad_norm": 0.0630243569612503, "learning_rate": 4.819298798325758e-05, "loss": 0.207, "step": 38668 }, { "epoch": 3.132615035644848, "grad_norm": 0.07657089084386826, "learning_rate": 4.8188487330662945e-05, "loss": 0.191, "step": 38669 }, { "epoch": 3.132696046662346, "grad_norm": 0.07390504330396652, "learning_rate": 4.818398667806832e-05, "loss": 0.2497, "step": 38670 }, { "epoch": 3.1327770576798444, "grad_norm": 0.060257602483034134, "learning_rate": 4.81794860254737e-05, "loss": 0.2322, "step": 38671 }, { "epoch": 3.1328580686973426, "grad_norm": 0.07124596834182739, "learning_rate": 4.817498537287907e-05, "loss": 0.2334, "step": 38672 }, { "epoch": 3.1329390797148413, "grad_norm": 0.07787282764911652, "learning_rate": 4.817048472028444e-05, "loss": 0.2473, "step": 38673 }, { "epoch": 3.1330200907323396, "grad_norm": 0.10237501561641693, "learning_rate": 4.816598406768982e-05, "loss": 0.2736, "step": 38674 }, { "epoch": 3.133101101749838, "grad_norm": 0.08048980683088303, "learning_rate": 4.8161483415095194e-05, "loss": 0.2468, "step": 38675 }, { "epoch": 3.1331821127673365, "grad_norm": 0.07248836755752563, "learning_rate": 4.815698276250056e-05, "loss": 0.264, "step": 38676 }, { "epoch": 3.1332631237848347, "grad_norm": 0.0636928603053093, "learning_rate": 4.815248210990594e-05, "loss": 0.1894, "step": 38677 }, { "epoch": 3.133344134802333, "grad_norm": 0.07219689339399338, "learning_rate": 4.8147981457311315e-05, "loss": 0.217, "step": 38678 }, { "epoch": 3.1334251458198317, "grad_norm": 0.07658065855503082, "learning_rate": 4.814348080471668e-05, "loss": 0.2181, "step": 38679 }, { "epoch": 3.13350615683733, "grad_norm": 0.09006506949663162, "learning_rate": 4.813898015212206e-05, "loss": 0.2513, "step": 38680 }, { "epoch": 3.133587167854828, "grad_norm": 0.0771331861615181, "learning_rate": 4.8134479499527436e-05, "loss": 0.2224, "step": 38681 }, { "epoch": 3.133668178872327, "grad_norm": 0.09508946537971497, "learning_rate": 4.81299788469328e-05, "loss": 0.2508, "step": 38682 }, { "epoch": 3.133749189889825, "grad_norm": 0.07007356733083725, "learning_rate": 4.812547819433818e-05, "loss": 0.2231, "step": 38683 }, { "epoch": 3.1338302009073233, "grad_norm": 0.07071962207555771, "learning_rate": 4.8120977541743556e-05, "loss": 0.2737, "step": 38684 }, { "epoch": 3.1339112119248216, "grad_norm": 0.07418075948953629, "learning_rate": 4.811647688914893e-05, "loss": 0.2416, "step": 38685 }, { "epoch": 3.1339922229423203, "grad_norm": 0.07185198366641998, "learning_rate": 4.8111976236554304e-05, "loss": 0.2078, "step": 38686 }, { "epoch": 3.1340732339598185, "grad_norm": 0.08141083270311356, "learning_rate": 4.810747558395968e-05, "loss": 0.2263, "step": 38687 }, { "epoch": 3.1341542449773168, "grad_norm": 0.08361703902482986, "learning_rate": 4.810297493136505e-05, "loss": 0.2272, "step": 38688 }, { "epoch": 3.1342352559948155, "grad_norm": 0.0822317898273468, "learning_rate": 4.8098474278770424e-05, "loss": 0.2228, "step": 38689 }, { "epoch": 3.1343162670123137, "grad_norm": 0.07091178745031357, "learning_rate": 4.80939736261758e-05, "loss": 0.2742, "step": 38690 }, { "epoch": 3.134397278029812, "grad_norm": 0.0746161937713623, "learning_rate": 4.808947297358117e-05, "loss": 0.2414, "step": 38691 }, { "epoch": 3.1344782890473106, "grad_norm": 0.07142340391874313, "learning_rate": 4.8084972320986545e-05, "loss": 0.2145, "step": 38692 }, { "epoch": 3.134559300064809, "grad_norm": 0.08708221465349197, "learning_rate": 4.808047166839192e-05, "loss": 0.2405, "step": 38693 }, { "epoch": 3.134640311082307, "grad_norm": 0.07113545387983322, "learning_rate": 4.807597101579729e-05, "loss": 0.2133, "step": 38694 }, { "epoch": 3.1347213220998054, "grad_norm": 0.07318969070911407, "learning_rate": 4.8071470363202666e-05, "loss": 0.233, "step": 38695 }, { "epoch": 3.134802333117304, "grad_norm": 0.0663350448012352, "learning_rate": 4.806696971060804e-05, "loss": 0.2325, "step": 38696 }, { "epoch": 3.1348833441348023, "grad_norm": 0.06916707009077072, "learning_rate": 4.806246905801341e-05, "loss": 0.2597, "step": 38697 }, { "epoch": 3.1349643551523005, "grad_norm": 0.07102689146995544, "learning_rate": 4.8057968405418794e-05, "loss": 0.2161, "step": 38698 }, { "epoch": 3.1350453661697992, "grad_norm": 0.08429417759180069, "learning_rate": 4.805346775282416e-05, "loss": 0.2835, "step": 38699 }, { "epoch": 3.1351263771872975, "grad_norm": 0.0673832818865776, "learning_rate": 4.8048967100229534e-05, "loss": 0.2221, "step": 38700 }, { "epoch": 3.1352073882047957, "grad_norm": 0.0722777470946312, "learning_rate": 4.8044466447634914e-05, "loss": 0.2103, "step": 38701 }, { "epoch": 3.1352883992222944, "grad_norm": 0.0763530358672142, "learning_rate": 4.803996579504028e-05, "loss": 0.24, "step": 38702 }, { "epoch": 3.1353694102397927, "grad_norm": 0.0652783066034317, "learning_rate": 4.8035465142445655e-05, "loss": 0.2175, "step": 38703 }, { "epoch": 3.135450421257291, "grad_norm": 0.0717589259147644, "learning_rate": 4.8030964489851035e-05, "loss": 0.2039, "step": 38704 }, { "epoch": 3.1355314322747896, "grad_norm": 0.058315105736255646, "learning_rate": 4.80264638372564e-05, "loss": 0.2118, "step": 38705 }, { "epoch": 3.135612443292288, "grad_norm": 0.07413259893655777, "learning_rate": 4.8021963184661776e-05, "loss": 0.2673, "step": 38706 }, { "epoch": 3.135693454309786, "grad_norm": 0.06567484140396118, "learning_rate": 4.8017462532067156e-05, "loss": 0.2111, "step": 38707 }, { "epoch": 3.1357744653272843, "grad_norm": 0.06503421813249588, "learning_rate": 4.801296187947252e-05, "loss": 0.2197, "step": 38708 }, { "epoch": 3.135855476344783, "grad_norm": 0.07268182933330536, "learning_rate": 4.8008461226877896e-05, "loss": 0.2642, "step": 38709 }, { "epoch": 3.1359364873622813, "grad_norm": 0.065959133207798, "learning_rate": 4.800396057428328e-05, "loss": 0.2311, "step": 38710 }, { "epoch": 3.1360174983797795, "grad_norm": 0.07882321625947952, "learning_rate": 4.799945992168865e-05, "loss": 0.2721, "step": 38711 }, { "epoch": 3.136098509397278, "grad_norm": 0.08288038522005081, "learning_rate": 4.799495926909402e-05, "loss": 0.2467, "step": 38712 }, { "epoch": 3.1361795204147764, "grad_norm": 0.06765219569206238, "learning_rate": 4.79904586164994e-05, "loss": 0.2218, "step": 38713 }, { "epoch": 3.1362605314322747, "grad_norm": 0.0784728080034256, "learning_rate": 4.798595796390477e-05, "loss": 0.2245, "step": 38714 }, { "epoch": 3.1363415424497734, "grad_norm": 0.07332229614257812, "learning_rate": 4.798145731131014e-05, "loss": 0.2335, "step": 38715 }, { "epoch": 3.1364225534672716, "grad_norm": 0.09900877624750137, "learning_rate": 4.797695665871552e-05, "loss": 0.2265, "step": 38716 }, { "epoch": 3.13650356448477, "grad_norm": 0.09115981310606003, "learning_rate": 4.797245600612089e-05, "loss": 0.2246, "step": 38717 }, { "epoch": 3.136584575502268, "grad_norm": 0.07833981513977051, "learning_rate": 4.796795535352626e-05, "loss": 0.2263, "step": 38718 }, { "epoch": 3.136665586519767, "grad_norm": 0.07188592851161957, "learning_rate": 4.796345470093164e-05, "loss": 0.2478, "step": 38719 }, { "epoch": 3.136746597537265, "grad_norm": 0.07392287254333496, "learning_rate": 4.795895404833701e-05, "loss": 0.2466, "step": 38720 }, { "epoch": 3.1368276085547633, "grad_norm": 0.07458144426345825, "learning_rate": 4.795445339574238e-05, "loss": 0.2373, "step": 38721 }, { "epoch": 3.136908619572262, "grad_norm": 0.06387396156787872, "learning_rate": 4.794995274314776e-05, "loss": 0.2401, "step": 38722 }, { "epoch": 3.13698963058976, "grad_norm": 0.0793103352189064, "learning_rate": 4.7945452090553134e-05, "loss": 0.2443, "step": 38723 }, { "epoch": 3.1370706416072585, "grad_norm": 0.08202017098665237, "learning_rate": 4.794095143795851e-05, "loss": 0.2545, "step": 38724 }, { "epoch": 3.137151652624757, "grad_norm": 0.06801655143499374, "learning_rate": 4.793645078536388e-05, "loss": 0.2316, "step": 38725 }, { "epoch": 3.1372326636422554, "grad_norm": 0.07169846445322037, "learning_rate": 4.7931950132769254e-05, "loss": 0.2517, "step": 38726 }, { "epoch": 3.1373136746597536, "grad_norm": 0.05438097193837166, "learning_rate": 4.792744948017463e-05, "loss": 0.2146, "step": 38727 }, { "epoch": 3.1373946856772523, "grad_norm": 0.06895055621862411, "learning_rate": 4.792294882758e-05, "loss": 0.2305, "step": 38728 }, { "epoch": 3.1374756966947506, "grad_norm": 0.08061118423938751, "learning_rate": 4.7918448174985375e-05, "loss": 0.2277, "step": 38729 }, { "epoch": 3.137556707712249, "grad_norm": 0.0754641741514206, "learning_rate": 4.791394752239075e-05, "loss": 0.222, "step": 38730 }, { "epoch": 3.137637718729747, "grad_norm": 0.09252101182937622, "learning_rate": 4.790944686979612e-05, "loss": 0.264, "step": 38731 }, { "epoch": 3.1377187297472457, "grad_norm": 0.06960175186395645, "learning_rate": 4.7904946217201496e-05, "loss": 0.1871, "step": 38732 }, { "epoch": 3.137799740764744, "grad_norm": 0.06823209673166275, "learning_rate": 4.790044556460687e-05, "loss": 0.2369, "step": 38733 }, { "epoch": 3.1378807517822422, "grad_norm": 0.07666932791471481, "learning_rate": 4.789594491201224e-05, "loss": 0.2539, "step": 38734 }, { "epoch": 3.137961762799741, "grad_norm": 0.06866523623466492, "learning_rate": 4.789144425941762e-05, "loss": 0.2293, "step": 38735 }, { "epoch": 3.138042773817239, "grad_norm": 0.07156126946210861, "learning_rate": 4.788694360682299e-05, "loss": 0.2662, "step": 38736 }, { "epoch": 3.1381237848347374, "grad_norm": 0.0736309066414833, "learning_rate": 4.788244295422837e-05, "loss": 0.2096, "step": 38737 }, { "epoch": 3.138204795852236, "grad_norm": 0.05947743356227875, "learning_rate": 4.787794230163374e-05, "loss": 0.1979, "step": 38738 }, { "epoch": 3.1382858068697344, "grad_norm": 0.05899606645107269, "learning_rate": 4.787344164903911e-05, "loss": 0.2109, "step": 38739 }, { "epoch": 3.1383668178872326, "grad_norm": 0.07126261293888092, "learning_rate": 4.786894099644449e-05, "loss": 0.2453, "step": 38740 }, { "epoch": 3.138447828904731, "grad_norm": 0.07778992503881454, "learning_rate": 4.786444034384986e-05, "loss": 0.2726, "step": 38741 }, { "epoch": 3.1385288399222295, "grad_norm": 0.06148141250014305, "learning_rate": 4.785993969125523e-05, "loss": 0.2053, "step": 38742 }, { "epoch": 3.1386098509397278, "grad_norm": 0.06504407525062561, "learning_rate": 4.785543903866061e-05, "loss": 0.2623, "step": 38743 }, { "epoch": 3.138690861957226, "grad_norm": 0.058437515050172806, "learning_rate": 4.785093838606598e-05, "loss": 0.2091, "step": 38744 }, { "epoch": 3.1387718729747247, "grad_norm": 0.07105541974306107, "learning_rate": 4.784643773347135e-05, "loss": 0.2251, "step": 38745 }, { "epoch": 3.138852883992223, "grad_norm": 0.0854935422539711, "learning_rate": 4.784193708087673e-05, "loss": 0.2837, "step": 38746 }, { "epoch": 3.138933895009721, "grad_norm": 0.06620346009731293, "learning_rate": 4.78374364282821e-05, "loss": 0.1855, "step": 38747 }, { "epoch": 3.13901490602722, "grad_norm": 0.0695490911602974, "learning_rate": 4.7832935775687474e-05, "loss": 0.2548, "step": 38748 }, { "epoch": 3.139095917044718, "grad_norm": 0.05896589532494545, "learning_rate": 4.7828435123092854e-05, "loss": 0.2148, "step": 38749 }, { "epoch": 3.1391769280622164, "grad_norm": 0.06517542153596878, "learning_rate": 4.782393447049823e-05, "loss": 0.2543, "step": 38750 }, { "epoch": 3.139257939079715, "grad_norm": 0.07684143632650375, "learning_rate": 4.7819433817903595e-05, "loss": 0.2843, "step": 38751 }, { "epoch": 3.1393389500972133, "grad_norm": 0.08605005592107773, "learning_rate": 4.7814933165308975e-05, "loss": 0.244, "step": 38752 }, { "epoch": 3.1394199611147116, "grad_norm": 0.07349022477865219, "learning_rate": 4.781043251271435e-05, "loss": 0.2604, "step": 38753 }, { "epoch": 3.13950097213221, "grad_norm": 0.07187152653932571, "learning_rate": 4.7805931860119715e-05, "loss": 0.2178, "step": 38754 }, { "epoch": 3.1395819831497085, "grad_norm": 0.06581337004899979, "learning_rate": 4.7801431207525096e-05, "loss": 0.2162, "step": 38755 }, { "epoch": 3.1396629941672067, "grad_norm": 0.08627573400735855, "learning_rate": 4.779693055493047e-05, "loss": 0.231, "step": 38756 }, { "epoch": 3.139744005184705, "grad_norm": 0.06073044613003731, "learning_rate": 4.7792429902335836e-05, "loss": 0.2178, "step": 38757 }, { "epoch": 3.1398250162022037, "grad_norm": 0.06303252279758453, "learning_rate": 4.7787929249741217e-05, "loss": 0.2319, "step": 38758 }, { "epoch": 3.139906027219702, "grad_norm": 0.08314767479896545, "learning_rate": 4.778342859714659e-05, "loss": 0.2227, "step": 38759 }, { "epoch": 3.1399870382372, "grad_norm": 0.06818264722824097, "learning_rate": 4.777892794455196e-05, "loss": 0.2166, "step": 38760 }, { "epoch": 3.140068049254699, "grad_norm": 0.0643196552991867, "learning_rate": 4.777442729195734e-05, "loss": 0.237, "step": 38761 }, { "epoch": 3.140149060272197, "grad_norm": 0.07213971763849258, "learning_rate": 4.776992663936271e-05, "loss": 0.228, "step": 38762 }, { "epoch": 3.1402300712896953, "grad_norm": 0.06124432384967804, "learning_rate": 4.776542598676808e-05, "loss": 0.1956, "step": 38763 }, { "epoch": 3.1403110823071936, "grad_norm": 0.08000092953443527, "learning_rate": 4.776092533417346e-05, "loss": 0.2412, "step": 38764 }, { "epoch": 3.1403920933246923, "grad_norm": 0.07622207701206207, "learning_rate": 4.775642468157883e-05, "loss": 0.21, "step": 38765 }, { "epoch": 3.1404731043421905, "grad_norm": 0.06574143469333649, "learning_rate": 4.7751924028984205e-05, "loss": 0.1905, "step": 38766 }, { "epoch": 3.1405541153596888, "grad_norm": 0.066102996468544, "learning_rate": 4.774742337638958e-05, "loss": 0.2933, "step": 38767 }, { "epoch": 3.1406351263771874, "grad_norm": 0.06801317632198334, "learning_rate": 4.774292272379495e-05, "loss": 0.2423, "step": 38768 }, { "epoch": 3.1407161373946857, "grad_norm": 0.07819636166095734, "learning_rate": 4.7738422071200326e-05, "loss": 0.2543, "step": 38769 }, { "epoch": 3.140797148412184, "grad_norm": 0.07022461295127869, "learning_rate": 4.77339214186057e-05, "loss": 0.2368, "step": 38770 }, { "epoch": 3.1408781594296826, "grad_norm": 0.06046614423394203, "learning_rate": 4.772942076601107e-05, "loss": 0.2212, "step": 38771 }, { "epoch": 3.140959170447181, "grad_norm": 0.07519835978746414, "learning_rate": 4.772492011341645e-05, "loss": 0.233, "step": 38772 }, { "epoch": 3.141040181464679, "grad_norm": 0.06469036638736725, "learning_rate": 4.772041946082182e-05, "loss": 0.1934, "step": 38773 }, { "epoch": 3.141121192482178, "grad_norm": 0.07506491988897324, "learning_rate": 4.7715918808227194e-05, "loss": 0.2136, "step": 38774 }, { "epoch": 3.141202203499676, "grad_norm": 0.07341165840625763, "learning_rate": 4.771141815563257e-05, "loss": 0.2221, "step": 38775 }, { "epoch": 3.1412832145171743, "grad_norm": 0.05977972596883774, "learning_rate": 4.770691750303794e-05, "loss": 0.1928, "step": 38776 }, { "epoch": 3.1413642255346725, "grad_norm": 0.08076482266187668, "learning_rate": 4.7702416850443315e-05, "loss": 0.2165, "step": 38777 }, { "epoch": 3.1414452365521712, "grad_norm": 0.06527772545814514, "learning_rate": 4.769791619784869e-05, "loss": 0.225, "step": 38778 }, { "epoch": 3.1415262475696695, "grad_norm": 0.09196940809488297, "learning_rate": 4.769341554525407e-05, "loss": 0.2728, "step": 38779 }, { "epoch": 3.1416072585871677, "grad_norm": 0.06105652451515198, "learning_rate": 4.7688914892659436e-05, "loss": 0.1956, "step": 38780 }, { "epoch": 3.1416882696046664, "grad_norm": 0.07764767855405807, "learning_rate": 4.768441424006481e-05, "loss": 0.2564, "step": 38781 }, { "epoch": 3.1417692806221647, "grad_norm": 0.08602216839790344, "learning_rate": 4.767991358747019e-05, "loss": 0.2395, "step": 38782 }, { "epoch": 3.141850291639663, "grad_norm": 0.07574327290058136, "learning_rate": 4.7675412934875557e-05, "loss": 0.2313, "step": 38783 }, { "epoch": 3.141931302657161, "grad_norm": 0.0708719864487648, "learning_rate": 4.767091228228093e-05, "loss": 0.3054, "step": 38784 }, { "epoch": 3.14201231367466, "grad_norm": 0.07440514117479324, "learning_rate": 4.766641162968631e-05, "loss": 0.2894, "step": 38785 }, { "epoch": 3.142093324692158, "grad_norm": 0.04955153539776802, "learning_rate": 4.766191097709168e-05, "loss": 0.2001, "step": 38786 }, { "epoch": 3.1421743357096563, "grad_norm": 0.07119850814342499, "learning_rate": 4.765741032449705e-05, "loss": 0.1937, "step": 38787 }, { "epoch": 3.142255346727155, "grad_norm": 0.07241246104240417, "learning_rate": 4.765290967190243e-05, "loss": 0.218, "step": 38788 }, { "epoch": 3.1423363577446533, "grad_norm": 0.06336984783411026, "learning_rate": 4.76484090193078e-05, "loss": 0.2247, "step": 38789 }, { "epoch": 3.1424173687621515, "grad_norm": 0.0958334356546402, "learning_rate": 4.764390836671317e-05, "loss": 0.2178, "step": 38790 }, { "epoch": 3.14249837977965, "grad_norm": 0.08584460616111755, "learning_rate": 4.763940771411855e-05, "loss": 0.2244, "step": 38791 }, { "epoch": 3.1425793907971484, "grad_norm": 0.07357819378376007, "learning_rate": 4.7634907061523926e-05, "loss": 0.2327, "step": 38792 }, { "epoch": 3.1426604018146467, "grad_norm": 0.07552488893270493, "learning_rate": 4.763040640892929e-05, "loss": 0.2037, "step": 38793 }, { "epoch": 3.1427414128321454, "grad_norm": 0.07349817454814911, "learning_rate": 4.762590575633467e-05, "loss": 0.2213, "step": 38794 }, { "epoch": 3.1428224238496436, "grad_norm": 0.08728177100419998, "learning_rate": 4.762140510374005e-05, "loss": 0.2529, "step": 38795 }, { "epoch": 3.142903434867142, "grad_norm": 0.07271800190210342, "learning_rate": 4.7616904451145413e-05, "loss": 0.2495, "step": 38796 }, { "epoch": 3.1429844458846405, "grad_norm": 0.07111764699220657, "learning_rate": 4.7612403798550794e-05, "loss": 0.2178, "step": 38797 }, { "epoch": 3.143065456902139, "grad_norm": 0.0855024978518486, "learning_rate": 4.760790314595617e-05, "loss": 0.2584, "step": 38798 }, { "epoch": 3.143146467919637, "grad_norm": 0.07055241614580154, "learning_rate": 4.7603402493361534e-05, "loss": 0.2825, "step": 38799 }, { "epoch": 3.1432274789371353, "grad_norm": 0.07589733600616455, "learning_rate": 4.7598901840766915e-05, "loss": 0.2257, "step": 38800 }, { "epoch": 3.143308489954634, "grad_norm": 0.06895039230585098, "learning_rate": 4.759440118817229e-05, "loss": 0.2532, "step": 38801 }, { "epoch": 3.143389500972132, "grad_norm": 0.08680260181427002, "learning_rate": 4.7589900535577655e-05, "loss": 0.2195, "step": 38802 }, { "epoch": 3.1434705119896305, "grad_norm": 0.07699909061193466, "learning_rate": 4.7585399882983035e-05, "loss": 0.2348, "step": 38803 }, { "epoch": 3.143551523007129, "grad_norm": 0.07933976501226425, "learning_rate": 4.758089923038841e-05, "loss": 0.2237, "step": 38804 }, { "epoch": 3.1436325340246274, "grad_norm": 0.0559968575835228, "learning_rate": 4.757639857779378e-05, "loss": 0.2398, "step": 38805 }, { "epoch": 3.1437135450421256, "grad_norm": 0.06536906957626343, "learning_rate": 4.7571897925199156e-05, "loss": 0.224, "step": 38806 }, { "epoch": 3.143794556059624, "grad_norm": 0.0685034990310669, "learning_rate": 4.756739727260453e-05, "loss": 0.2441, "step": 38807 }, { "epoch": 3.1438755670771226, "grad_norm": 0.07415509968996048, "learning_rate": 4.7562896620009903e-05, "loss": 0.2, "step": 38808 }, { "epoch": 3.143956578094621, "grad_norm": 0.06640369445085526, "learning_rate": 4.755839596741528e-05, "loss": 0.2082, "step": 38809 }, { "epoch": 3.144037589112119, "grad_norm": 0.07083263993263245, "learning_rate": 4.755389531482065e-05, "loss": 0.2412, "step": 38810 }, { "epoch": 3.1441186001296177, "grad_norm": 0.08034813404083252, "learning_rate": 4.7549394662226024e-05, "loss": 0.261, "step": 38811 }, { "epoch": 3.144199611147116, "grad_norm": 0.06492961943149567, "learning_rate": 4.75448940096314e-05, "loss": 0.2251, "step": 38812 }, { "epoch": 3.1442806221646142, "grad_norm": 0.07724326848983765, "learning_rate": 4.754039335703677e-05, "loss": 0.2567, "step": 38813 }, { "epoch": 3.144361633182113, "grad_norm": 0.07996118068695068, "learning_rate": 4.7535892704442145e-05, "loss": 0.3033, "step": 38814 }, { "epoch": 3.144442644199611, "grad_norm": 0.07107841223478317, "learning_rate": 4.753139205184752e-05, "loss": 0.23, "step": 38815 }, { "epoch": 3.1445236552171094, "grad_norm": 0.06783238053321838, "learning_rate": 4.752689139925289e-05, "loss": 0.2227, "step": 38816 }, { "epoch": 3.144604666234608, "grad_norm": 0.07720325887203217, "learning_rate": 4.7522390746658266e-05, "loss": 0.2294, "step": 38817 }, { "epoch": 3.1446856772521063, "grad_norm": 0.08420754224061966, "learning_rate": 4.7517890094063646e-05, "loss": 0.2506, "step": 38818 }, { "epoch": 3.1447666882696046, "grad_norm": 0.06361111998558044, "learning_rate": 4.751338944146901e-05, "loss": 0.2415, "step": 38819 }, { "epoch": 3.144847699287103, "grad_norm": 0.07500552386045456, "learning_rate": 4.750888878887439e-05, "loss": 0.2436, "step": 38820 }, { "epoch": 3.1449287103046015, "grad_norm": 0.08437523990869522, "learning_rate": 4.750438813627977e-05, "loss": 0.267, "step": 38821 }, { "epoch": 3.1450097213220998, "grad_norm": 0.06883621960878372, "learning_rate": 4.7499887483685134e-05, "loss": 0.2292, "step": 38822 }, { "epoch": 3.145090732339598, "grad_norm": 0.07928171753883362, "learning_rate": 4.749538683109051e-05, "loss": 0.2373, "step": 38823 }, { "epoch": 3.1451717433570967, "grad_norm": 0.08163254708051682, "learning_rate": 4.749088617849589e-05, "loss": 0.2215, "step": 38824 }, { "epoch": 3.145252754374595, "grad_norm": 0.08181016147136688, "learning_rate": 4.7486385525901255e-05, "loss": 0.2322, "step": 38825 }, { "epoch": 3.145333765392093, "grad_norm": 0.08071200549602509, "learning_rate": 4.748188487330663e-05, "loss": 0.2189, "step": 38826 }, { "epoch": 3.145414776409592, "grad_norm": 0.06839993596076965, "learning_rate": 4.747738422071201e-05, "loss": 0.2245, "step": 38827 }, { "epoch": 3.14549578742709, "grad_norm": 0.07583677768707275, "learning_rate": 4.7472883568117376e-05, "loss": 0.2302, "step": 38828 }, { "epoch": 3.1455767984445884, "grad_norm": 0.06504601240158081, "learning_rate": 4.746838291552275e-05, "loss": 0.2034, "step": 38829 }, { "epoch": 3.1456578094620866, "grad_norm": 0.08468101173639297, "learning_rate": 4.746388226292813e-05, "loss": 0.2392, "step": 38830 }, { "epoch": 3.1457388204795853, "grad_norm": 0.07652562111616135, "learning_rate": 4.74593816103335e-05, "loss": 0.2191, "step": 38831 }, { "epoch": 3.1458198314970836, "grad_norm": 0.06976919621229172, "learning_rate": 4.745488095773887e-05, "loss": 0.25, "step": 38832 }, { "epoch": 3.145900842514582, "grad_norm": 0.08537302166223526, "learning_rate": 4.745038030514425e-05, "loss": 0.21, "step": 38833 }, { "epoch": 3.1459818535320805, "grad_norm": 0.07848033308982849, "learning_rate": 4.7445879652549624e-05, "loss": 0.2535, "step": 38834 }, { "epoch": 3.1460628645495787, "grad_norm": 0.07927160710096359, "learning_rate": 4.744137899995499e-05, "loss": 0.2196, "step": 38835 }, { "epoch": 3.146143875567077, "grad_norm": 0.09206971526145935, "learning_rate": 4.743687834736037e-05, "loss": 0.2628, "step": 38836 }, { "epoch": 3.1462248865845757, "grad_norm": 0.06408418715000153, "learning_rate": 4.7432377694765745e-05, "loss": 0.2262, "step": 38837 }, { "epoch": 3.146305897602074, "grad_norm": 0.07233559340238571, "learning_rate": 4.742787704217111e-05, "loss": 0.2381, "step": 38838 }, { "epoch": 3.146386908619572, "grad_norm": 0.0643056184053421, "learning_rate": 4.742337638957649e-05, "loss": 0.1962, "step": 38839 }, { "epoch": 3.146467919637071, "grad_norm": 0.08704189956188202, "learning_rate": 4.7418875736981866e-05, "loss": 0.2575, "step": 38840 }, { "epoch": 3.146548930654569, "grad_norm": 0.06243178993463516, "learning_rate": 4.741437508438724e-05, "loss": 0.2182, "step": 38841 }, { "epoch": 3.1466299416720673, "grad_norm": 0.06110955774784088, "learning_rate": 4.740987443179261e-05, "loss": 0.2205, "step": 38842 }, { "epoch": 3.1467109526895656, "grad_norm": 0.07244785875082016, "learning_rate": 4.7405373779197986e-05, "loss": 0.2538, "step": 38843 }, { "epoch": 3.1467919637070643, "grad_norm": 0.07918354123830795, "learning_rate": 4.740087312660336e-05, "loss": 0.2611, "step": 38844 }, { "epoch": 3.1468729747245625, "grad_norm": 0.06351567059755325, "learning_rate": 4.7396372474008734e-05, "loss": 0.2168, "step": 38845 }, { "epoch": 3.1469539857420608, "grad_norm": 0.07483189553022385, "learning_rate": 4.739187182141411e-05, "loss": 0.2444, "step": 38846 }, { "epoch": 3.1470349967595594, "grad_norm": 0.06781207770109177, "learning_rate": 4.738737116881948e-05, "loss": 0.2113, "step": 38847 }, { "epoch": 3.1471160077770577, "grad_norm": 0.08817379921674728, "learning_rate": 4.7382870516224854e-05, "loss": 0.2752, "step": 38848 }, { "epoch": 3.147197018794556, "grad_norm": 0.0680769756436348, "learning_rate": 4.737836986363023e-05, "loss": 0.2228, "step": 38849 }, { "epoch": 3.1472780298120546, "grad_norm": 0.06445880234241486, "learning_rate": 4.73738692110356e-05, "loss": 0.2545, "step": 38850 }, { "epoch": 3.147359040829553, "grad_norm": 0.07427272945642471, "learning_rate": 4.7369368558440975e-05, "loss": 0.186, "step": 38851 }, { "epoch": 3.147440051847051, "grad_norm": 0.0860607773065567, "learning_rate": 4.736486790584635e-05, "loss": 0.2141, "step": 38852 }, { "epoch": 3.1475210628645494, "grad_norm": 0.057298026978969574, "learning_rate": 4.736036725325172e-05, "loss": 0.2251, "step": 38853 }, { "epoch": 3.147602073882048, "grad_norm": 0.0735674574971199, "learning_rate": 4.7355866600657096e-05, "loss": 0.2598, "step": 38854 }, { "epoch": 3.1476830848995463, "grad_norm": 0.07864909619092941, "learning_rate": 4.735136594806247e-05, "loss": 0.2058, "step": 38855 }, { "epoch": 3.1477640959170445, "grad_norm": 0.0691840648651123, "learning_rate": 4.734686529546784e-05, "loss": 0.2297, "step": 38856 }, { "epoch": 3.1478451069345432, "grad_norm": 0.0769876092672348, "learning_rate": 4.7342364642873224e-05, "loss": 0.2166, "step": 38857 }, { "epoch": 3.1479261179520415, "grad_norm": 0.08170641958713531, "learning_rate": 4.733786399027859e-05, "loss": 0.2353, "step": 38858 }, { "epoch": 3.1480071289695397, "grad_norm": 0.06473393738269806, "learning_rate": 4.7333363337683964e-05, "loss": 0.1885, "step": 38859 }, { "epoch": 3.1480881399870384, "grad_norm": 0.07050579786300659, "learning_rate": 4.7328862685089344e-05, "loss": 0.2376, "step": 38860 }, { "epoch": 3.1481691510045366, "grad_norm": 0.0721215307712555, "learning_rate": 4.732436203249471e-05, "loss": 0.2477, "step": 38861 }, { "epoch": 3.148250162022035, "grad_norm": 0.08369814604520798, "learning_rate": 4.7319861379900085e-05, "loss": 0.2383, "step": 38862 }, { "epoch": 3.1483311730395336, "grad_norm": 0.0835237056016922, "learning_rate": 4.7315360727305465e-05, "loss": 0.244, "step": 38863 }, { "epoch": 3.148412184057032, "grad_norm": 0.062347088009119034, "learning_rate": 4.731086007471083e-05, "loss": 0.2307, "step": 38864 }, { "epoch": 3.14849319507453, "grad_norm": 0.07704142481088638, "learning_rate": 4.7306359422116206e-05, "loss": 0.2462, "step": 38865 }, { "epoch": 3.1485742060920283, "grad_norm": 0.08721894025802612, "learning_rate": 4.7301858769521586e-05, "loss": 0.2395, "step": 38866 }, { "epoch": 3.148655217109527, "grad_norm": 0.06161055341362953, "learning_rate": 4.729735811692695e-05, "loss": 0.2691, "step": 38867 }, { "epoch": 3.1487362281270252, "grad_norm": 0.07100638747215271, "learning_rate": 4.7292857464332326e-05, "loss": 0.2484, "step": 38868 }, { "epoch": 3.1488172391445235, "grad_norm": 0.08029984682798386, "learning_rate": 4.728835681173771e-05, "loss": 0.2605, "step": 38869 }, { "epoch": 3.148898250162022, "grad_norm": 0.06359708309173584, "learning_rate": 4.728385615914308e-05, "loss": 0.2368, "step": 38870 }, { "epoch": 3.1489792611795204, "grad_norm": 0.06722856312990189, "learning_rate": 4.727935550654845e-05, "loss": 0.2509, "step": 38871 }, { "epoch": 3.1490602721970187, "grad_norm": 0.08963049203157425, "learning_rate": 4.727485485395383e-05, "loss": 0.2237, "step": 38872 }, { "epoch": 3.1491412832145174, "grad_norm": 0.08066631853580475, "learning_rate": 4.72703542013592e-05, "loss": 0.2128, "step": 38873 }, { "epoch": 3.1492222942320156, "grad_norm": 0.07186246663331985, "learning_rate": 4.7265853548764575e-05, "loss": 0.2421, "step": 38874 }, { "epoch": 3.149303305249514, "grad_norm": 0.0749310553073883, "learning_rate": 4.726135289616995e-05, "loss": 0.2125, "step": 38875 }, { "epoch": 3.149384316267012, "grad_norm": 0.0684628114104271, "learning_rate": 4.725685224357532e-05, "loss": 0.2271, "step": 38876 }, { "epoch": 3.149465327284511, "grad_norm": 0.08189097046852112, "learning_rate": 4.7252351590980696e-05, "loss": 0.2347, "step": 38877 }, { "epoch": 3.149546338302009, "grad_norm": 0.06720206886529922, "learning_rate": 4.724785093838607e-05, "loss": 0.2182, "step": 38878 }, { "epoch": 3.1496273493195073, "grad_norm": 0.07193852961063385, "learning_rate": 4.724335028579144e-05, "loss": 0.2394, "step": 38879 }, { "epoch": 3.149708360337006, "grad_norm": 0.06983591616153717, "learning_rate": 4.7238849633196816e-05, "loss": 0.2323, "step": 38880 }, { "epoch": 3.149789371354504, "grad_norm": 0.06906203180551529, "learning_rate": 4.723434898060219e-05, "loss": 0.2282, "step": 38881 }, { "epoch": 3.1498703823720025, "grad_norm": 0.0820295587182045, "learning_rate": 4.7229848328007564e-05, "loss": 0.2498, "step": 38882 }, { "epoch": 3.149951393389501, "grad_norm": 0.07069437205791473, "learning_rate": 4.722534767541294e-05, "loss": 0.2332, "step": 38883 }, { "epoch": 3.1500324044069994, "grad_norm": 0.06761626899242401, "learning_rate": 4.722084702281831e-05, "loss": 0.2758, "step": 38884 }, { "epoch": 3.1501134154244976, "grad_norm": 0.07170595228672028, "learning_rate": 4.7216346370223684e-05, "loss": 0.2273, "step": 38885 }, { "epoch": 3.1501944264419963, "grad_norm": 0.0710739716887474, "learning_rate": 4.721184571762906e-05, "loss": 0.2268, "step": 38886 }, { "epoch": 3.1502754374594946, "grad_norm": 0.08929024636745453, "learning_rate": 4.720734506503443e-05, "loss": 0.245, "step": 38887 }, { "epoch": 3.150356448476993, "grad_norm": 0.0899534523487091, "learning_rate": 4.7202844412439805e-05, "loss": 0.2408, "step": 38888 }, { "epoch": 3.150437459494491, "grad_norm": 0.07042014598846436, "learning_rate": 4.719834375984518e-05, "loss": 0.205, "step": 38889 }, { "epoch": 3.1505184705119897, "grad_norm": 0.06146010756492615, "learning_rate": 4.719384310725055e-05, "loss": 0.2299, "step": 38890 }, { "epoch": 3.150599481529488, "grad_norm": 0.08242175728082657, "learning_rate": 4.7189342454655926e-05, "loss": 0.2746, "step": 38891 }, { "epoch": 3.1506804925469862, "grad_norm": 0.06836254894733429, "learning_rate": 4.71848418020613e-05, "loss": 0.2107, "step": 38892 }, { "epoch": 3.150761503564485, "grad_norm": 0.07505775988101959, "learning_rate": 4.718034114946667e-05, "loss": 0.2615, "step": 38893 }, { "epoch": 3.150842514581983, "grad_norm": 0.07807854562997818, "learning_rate": 4.717584049687205e-05, "loss": 0.2545, "step": 38894 }, { "epoch": 3.1509235255994814, "grad_norm": 0.05729011446237564, "learning_rate": 4.717133984427742e-05, "loss": 0.2179, "step": 38895 }, { "epoch": 3.15100453661698, "grad_norm": 0.06013137102127075, "learning_rate": 4.71668391916828e-05, "loss": 0.2246, "step": 38896 }, { "epoch": 3.1510855476344783, "grad_norm": 0.07943231612443924, "learning_rate": 4.716233853908817e-05, "loss": 0.2535, "step": 38897 }, { "epoch": 3.1511665586519766, "grad_norm": 0.058542679995298386, "learning_rate": 4.715783788649354e-05, "loss": 0.2031, "step": 38898 }, { "epoch": 3.151247569669475, "grad_norm": 0.06623531132936478, "learning_rate": 4.715333723389892e-05, "loss": 0.2113, "step": 38899 }, { "epoch": 3.1513285806869735, "grad_norm": 0.0729438066482544, "learning_rate": 4.714883658130429e-05, "loss": 0.2176, "step": 38900 }, { "epoch": 3.1514095917044718, "grad_norm": 0.07456063479185104, "learning_rate": 4.714433592870966e-05, "loss": 0.2304, "step": 38901 }, { "epoch": 3.15149060272197, "grad_norm": 0.0609896220266819, "learning_rate": 4.713983527611504e-05, "loss": 0.2089, "step": 38902 }, { "epoch": 3.1515716137394687, "grad_norm": 0.05986235663294792, "learning_rate": 4.713533462352041e-05, "loss": 0.2242, "step": 38903 }, { "epoch": 3.151652624756967, "grad_norm": 0.057405129075050354, "learning_rate": 4.713083397092578e-05, "loss": 0.2367, "step": 38904 }, { "epoch": 3.151733635774465, "grad_norm": 0.06006966903805733, "learning_rate": 4.712633331833116e-05, "loss": 0.2318, "step": 38905 }, { "epoch": 3.151814646791964, "grad_norm": 0.0800834521651268, "learning_rate": 4.712183266573653e-05, "loss": 0.2803, "step": 38906 }, { "epoch": 3.151895657809462, "grad_norm": 0.07425723969936371, "learning_rate": 4.711733201314191e-05, "loss": 0.27, "step": 38907 }, { "epoch": 3.1519766688269604, "grad_norm": 0.07813823223114014, "learning_rate": 4.7112831360547284e-05, "loss": 0.2549, "step": 38908 }, { "epoch": 3.152057679844459, "grad_norm": 0.08356890082359314, "learning_rate": 4.710833070795266e-05, "loss": 0.234, "step": 38909 }, { "epoch": 3.1521386908619573, "grad_norm": 0.08138100802898407, "learning_rate": 4.710383005535803e-05, "loss": 0.2303, "step": 38910 }, { "epoch": 3.1522197018794555, "grad_norm": 0.06799155473709106, "learning_rate": 4.7099329402763405e-05, "loss": 0.2413, "step": 38911 }, { "epoch": 3.152300712896954, "grad_norm": 0.07273134589195251, "learning_rate": 4.709482875016878e-05, "loss": 0.1978, "step": 38912 }, { "epoch": 3.1523817239144525, "grad_norm": 0.07886601239442825, "learning_rate": 4.709032809757415e-05, "loss": 0.2509, "step": 38913 }, { "epoch": 3.1524627349319507, "grad_norm": 0.06748409569263458, "learning_rate": 4.7085827444979526e-05, "loss": 0.2317, "step": 38914 }, { "epoch": 3.152543745949449, "grad_norm": 0.07170841842889786, "learning_rate": 4.70813267923849e-05, "loss": 0.2116, "step": 38915 }, { "epoch": 3.1526247569669477, "grad_norm": 0.09538058191537857, "learning_rate": 4.707682613979027e-05, "loss": 0.2623, "step": 38916 }, { "epoch": 3.152705767984446, "grad_norm": 0.06135268881917, "learning_rate": 4.7072325487195647e-05, "loss": 0.2094, "step": 38917 }, { "epoch": 3.152786779001944, "grad_norm": 0.07493219524621964, "learning_rate": 4.706782483460102e-05, "loss": 0.2838, "step": 38918 }, { "epoch": 3.152867790019443, "grad_norm": 0.06381876766681671, "learning_rate": 4.7063324182006394e-05, "loss": 0.2348, "step": 38919 }, { "epoch": 3.152948801036941, "grad_norm": 0.08309265226125717, "learning_rate": 4.705882352941177e-05, "loss": 0.2542, "step": 38920 }, { "epoch": 3.1530298120544393, "grad_norm": 0.06406621634960175, "learning_rate": 4.705432287681714e-05, "loss": 0.2181, "step": 38921 }, { "epoch": 3.1531108230719376, "grad_norm": 0.06495659798383713, "learning_rate": 4.7049822224222515e-05, "loss": 0.2456, "step": 38922 }, { "epoch": 3.1531918340894363, "grad_norm": 0.05980648100376129, "learning_rate": 4.704532157162789e-05, "loss": 0.2007, "step": 38923 }, { "epoch": 3.1532728451069345, "grad_norm": 0.07143256813287735, "learning_rate": 4.704082091903326e-05, "loss": 0.1986, "step": 38924 }, { "epoch": 3.1533538561244328, "grad_norm": 0.06251020729541779, "learning_rate": 4.7036320266438635e-05, "loss": 0.2348, "step": 38925 }, { "epoch": 3.1534348671419314, "grad_norm": 0.0595199279487133, "learning_rate": 4.703181961384401e-05, "loss": 0.2211, "step": 38926 }, { "epoch": 3.1535158781594297, "grad_norm": 0.059528898447752, "learning_rate": 4.702731896124938e-05, "loss": 0.2235, "step": 38927 }, { "epoch": 3.153596889176928, "grad_norm": 0.07514117658138275, "learning_rate": 4.7022818308654756e-05, "loss": 0.2576, "step": 38928 }, { "epoch": 3.1536779001944266, "grad_norm": 0.06593476235866547, "learning_rate": 4.701831765606013e-05, "loss": 0.2625, "step": 38929 }, { "epoch": 3.153758911211925, "grad_norm": 0.0637379065155983, "learning_rate": 4.7013817003465503e-05, "loss": 0.2317, "step": 38930 }, { "epoch": 3.153839922229423, "grad_norm": 0.07333981990814209, "learning_rate": 4.700931635087088e-05, "loss": 0.2393, "step": 38931 }, { "epoch": 3.153920933246922, "grad_norm": 0.06379441171884537, "learning_rate": 4.700481569827625e-05, "loss": 0.2496, "step": 38932 }, { "epoch": 3.15400194426442, "grad_norm": 0.05707908794283867, "learning_rate": 4.7000315045681624e-05, "loss": 0.2441, "step": 38933 }, { "epoch": 3.1540829552819183, "grad_norm": 0.07235048711299896, "learning_rate": 4.6995814393087e-05, "loss": 0.2207, "step": 38934 }, { "epoch": 3.1541639662994165, "grad_norm": 0.07200153917074203, "learning_rate": 4.699131374049237e-05, "loss": 0.2249, "step": 38935 }, { "epoch": 3.154244977316915, "grad_norm": 0.06244270130991936, "learning_rate": 4.6986813087897745e-05, "loss": 0.1957, "step": 38936 }, { "epoch": 3.1543259883344135, "grad_norm": 0.06604592502117157, "learning_rate": 4.698231243530312e-05, "loss": 0.1946, "step": 38937 }, { "epoch": 3.1544069993519117, "grad_norm": 0.08157742023468018, "learning_rate": 4.69778117827085e-05, "loss": 0.2259, "step": 38938 }, { "epoch": 3.1544880103694104, "grad_norm": 0.07753434032201767, "learning_rate": 4.6973311130113866e-05, "loss": 0.2168, "step": 38939 }, { "epoch": 3.1545690213869086, "grad_norm": 0.06853404641151428, "learning_rate": 4.696881047751924e-05, "loss": 0.2584, "step": 38940 }, { "epoch": 3.154650032404407, "grad_norm": 0.0636419877409935, "learning_rate": 4.696430982492462e-05, "loss": 0.2202, "step": 38941 }, { "epoch": 3.1547310434219056, "grad_norm": 0.0700618326663971, "learning_rate": 4.695980917232999e-05, "loss": 0.2059, "step": 38942 }, { "epoch": 3.154812054439404, "grad_norm": 0.057667315006256104, "learning_rate": 4.695530851973537e-05, "loss": 0.2114, "step": 38943 }, { "epoch": 3.154893065456902, "grad_norm": 0.060890886932611465, "learning_rate": 4.695080786714074e-05, "loss": 0.2234, "step": 38944 }, { "epoch": 3.1549740764744003, "grad_norm": 0.06949368119239807, "learning_rate": 4.694630721454611e-05, "loss": 0.2236, "step": 38945 }, { "epoch": 3.155055087491899, "grad_norm": 0.06916090846061707, "learning_rate": 4.694180656195149e-05, "loss": 0.2473, "step": 38946 }, { "epoch": 3.1551360985093972, "grad_norm": 0.0846572294831276, "learning_rate": 4.693730590935686e-05, "loss": 0.2042, "step": 38947 }, { "epoch": 3.1552171095268955, "grad_norm": 0.08265434950590134, "learning_rate": 4.693280525676223e-05, "loss": 0.2485, "step": 38948 }, { "epoch": 3.155298120544394, "grad_norm": 0.09176456183195114, "learning_rate": 4.692830460416761e-05, "loss": 0.2483, "step": 38949 }, { "epoch": 3.1553791315618924, "grad_norm": 0.07183399051427841, "learning_rate": 4.692380395157298e-05, "loss": 0.2667, "step": 38950 }, { "epoch": 3.1554601425793907, "grad_norm": 0.07050126045942307, "learning_rate": 4.6919303298978356e-05, "loss": 0.2226, "step": 38951 }, { "epoch": 3.1555411535968894, "grad_norm": 0.062021028250455856, "learning_rate": 4.691480264638373e-05, "loss": 0.2326, "step": 38952 }, { "epoch": 3.1556221646143876, "grad_norm": 0.09520271420478821, "learning_rate": 4.69103019937891e-05, "loss": 0.2574, "step": 38953 }, { "epoch": 3.155703175631886, "grad_norm": 0.08078912645578384, "learning_rate": 4.690580134119448e-05, "loss": 0.2703, "step": 38954 }, { "epoch": 3.1557841866493845, "grad_norm": 0.06901061534881592, "learning_rate": 4.690130068859985e-05, "loss": 0.2129, "step": 38955 }, { "epoch": 3.155865197666883, "grad_norm": 0.06271662563085556, "learning_rate": 4.6896800036005224e-05, "loss": 0.2145, "step": 38956 }, { "epoch": 3.155946208684381, "grad_norm": 0.058658353984355927, "learning_rate": 4.68922993834106e-05, "loss": 0.1991, "step": 38957 }, { "epoch": 3.1560272197018793, "grad_norm": 0.06902632862329483, "learning_rate": 4.688779873081597e-05, "loss": 0.2, "step": 38958 }, { "epoch": 3.156108230719378, "grad_norm": 0.08432221412658691, "learning_rate": 4.6883298078221345e-05, "loss": 0.2528, "step": 38959 }, { "epoch": 3.156189241736876, "grad_norm": 0.07740698009729385, "learning_rate": 4.687879742562672e-05, "loss": 0.2442, "step": 38960 }, { "epoch": 3.1562702527543745, "grad_norm": 0.06914656609296799, "learning_rate": 4.687429677303209e-05, "loss": 0.1891, "step": 38961 }, { "epoch": 3.156351263771873, "grad_norm": 0.07384679466485977, "learning_rate": 4.6869796120437465e-05, "loss": 0.2505, "step": 38962 }, { "epoch": 3.1564322747893714, "grad_norm": 0.0760929062962532, "learning_rate": 4.686529546784284e-05, "loss": 0.2673, "step": 38963 }, { "epoch": 3.1565132858068696, "grad_norm": 0.0864713042974472, "learning_rate": 4.686079481524821e-05, "loss": 0.2573, "step": 38964 }, { "epoch": 3.1565942968243683, "grad_norm": 0.06917452812194824, "learning_rate": 4.6856294162653586e-05, "loss": 0.2287, "step": 38965 }, { "epoch": 3.1566753078418666, "grad_norm": 0.06080272048711777, "learning_rate": 4.685179351005896e-05, "loss": 0.2278, "step": 38966 }, { "epoch": 3.156756318859365, "grad_norm": 0.07739540934562683, "learning_rate": 4.6847292857464333e-05, "loss": 0.2087, "step": 38967 }, { "epoch": 3.156837329876863, "grad_norm": 0.07544900476932526, "learning_rate": 4.684279220486971e-05, "loss": 0.2143, "step": 38968 }, { "epoch": 3.1569183408943617, "grad_norm": 0.08161510527133942, "learning_rate": 4.683829155227508e-05, "loss": 0.2461, "step": 38969 }, { "epoch": 3.15699935191186, "grad_norm": 0.06656242161989212, "learning_rate": 4.6833790899680454e-05, "loss": 0.2085, "step": 38970 }, { "epoch": 3.1570803629293582, "grad_norm": 0.07763206958770752, "learning_rate": 4.682929024708583e-05, "loss": 0.2502, "step": 38971 }, { "epoch": 3.157161373946857, "grad_norm": 0.07088419049978256, "learning_rate": 4.68247895944912e-05, "loss": 0.2343, "step": 38972 }, { "epoch": 3.157242384964355, "grad_norm": 0.06977204233407974, "learning_rate": 4.6820288941896575e-05, "loss": 0.2188, "step": 38973 }, { "epoch": 3.1573233959818534, "grad_norm": 0.07479950040578842, "learning_rate": 4.681578828930195e-05, "loss": 0.1926, "step": 38974 }, { "epoch": 3.157404406999352, "grad_norm": 0.06984470039606094, "learning_rate": 4.681128763670732e-05, "loss": 0.2006, "step": 38975 }, { "epoch": 3.1574854180168503, "grad_norm": 0.07612467557191849, "learning_rate": 4.68067869841127e-05, "loss": 0.2489, "step": 38976 }, { "epoch": 3.1575664290343486, "grad_norm": 0.07083283364772797, "learning_rate": 4.6802286331518076e-05, "loss": 0.2683, "step": 38977 }, { "epoch": 3.1576474400518473, "grad_norm": 0.07045019418001175, "learning_rate": 4.679778567892344e-05, "loss": 0.2393, "step": 38978 }, { "epoch": 3.1577284510693455, "grad_norm": 0.0706915408372879, "learning_rate": 4.6793285026328824e-05, "loss": 0.2159, "step": 38979 }, { "epoch": 3.1578094620868438, "grad_norm": 0.07124581187963486, "learning_rate": 4.67887843737342e-05, "loss": 0.251, "step": 38980 }, { "epoch": 3.157890473104342, "grad_norm": 0.062179356813430786, "learning_rate": 4.6784283721139564e-05, "loss": 0.2553, "step": 38981 }, { "epoch": 3.1579714841218407, "grad_norm": 0.08533891290426254, "learning_rate": 4.6779783068544944e-05, "loss": 0.2151, "step": 38982 }, { "epoch": 3.158052495139339, "grad_norm": 0.07565402239561081, "learning_rate": 4.677528241595032e-05, "loss": 0.2199, "step": 38983 }, { "epoch": 3.158133506156837, "grad_norm": 0.06953442096710205, "learning_rate": 4.6770781763355685e-05, "loss": 0.252, "step": 38984 }, { "epoch": 3.158214517174336, "grad_norm": 0.06412345916032791, "learning_rate": 4.6766281110761065e-05, "loss": 0.2094, "step": 38985 }, { "epoch": 3.158295528191834, "grad_norm": 0.06534215062856674, "learning_rate": 4.676178045816644e-05, "loss": 0.2428, "step": 38986 }, { "epoch": 3.1583765392093324, "grad_norm": 0.06917164474725723, "learning_rate": 4.6757279805571806e-05, "loss": 0.2255, "step": 38987 }, { "epoch": 3.158457550226831, "grad_norm": 0.06312058866024017, "learning_rate": 4.6752779152977186e-05, "loss": 0.1838, "step": 38988 }, { "epoch": 3.1585385612443293, "grad_norm": 0.08812984824180603, "learning_rate": 4.674827850038256e-05, "loss": 0.2362, "step": 38989 }, { "epoch": 3.1586195722618275, "grad_norm": 0.08063977211713791, "learning_rate": 4.674377784778793e-05, "loss": 0.2505, "step": 38990 }, { "epoch": 3.158700583279326, "grad_norm": 0.06520045548677444, "learning_rate": 4.673927719519331e-05, "loss": 0.2504, "step": 38991 }, { "epoch": 3.1587815942968245, "grad_norm": 0.07683936506509781, "learning_rate": 4.673477654259868e-05, "loss": 0.239, "step": 38992 }, { "epoch": 3.1588626053143227, "grad_norm": 0.06478267163038254, "learning_rate": 4.6730275890004054e-05, "loss": 0.2414, "step": 38993 }, { "epoch": 3.158943616331821, "grad_norm": 0.07072675973176956, "learning_rate": 4.672577523740943e-05, "loss": 0.2405, "step": 38994 }, { "epoch": 3.1590246273493197, "grad_norm": 0.06452537328004837, "learning_rate": 4.67212745848148e-05, "loss": 0.2209, "step": 38995 }, { "epoch": 3.159105638366818, "grad_norm": 0.07300246506929398, "learning_rate": 4.6716773932220175e-05, "loss": 0.2558, "step": 38996 }, { "epoch": 3.159186649384316, "grad_norm": 0.08771176636219025, "learning_rate": 4.671227327962555e-05, "loss": 0.2834, "step": 38997 }, { "epoch": 3.159267660401815, "grad_norm": 0.06920316070318222, "learning_rate": 4.670777262703092e-05, "loss": 0.2021, "step": 38998 }, { "epoch": 3.159348671419313, "grad_norm": 0.07819127291440964, "learning_rate": 4.6703271974436296e-05, "loss": 0.2364, "step": 38999 }, { "epoch": 3.1594296824368113, "grad_norm": 0.06422830373048782, "learning_rate": 4.669877132184167e-05, "loss": 0.2174, "step": 39000 }, { "epoch": 3.15951069345431, "grad_norm": 0.0757855623960495, "learning_rate": 4.669427066924704e-05, "loss": 0.214, "step": 39001 }, { "epoch": 3.1595917044718083, "grad_norm": 0.058662716299295425, "learning_rate": 4.6689770016652416e-05, "loss": 0.174, "step": 39002 }, { "epoch": 3.1596727154893065, "grad_norm": 0.07447095215320587, "learning_rate": 4.668526936405779e-05, "loss": 0.2509, "step": 39003 }, { "epoch": 3.1597537265068047, "grad_norm": 0.06807708740234375, "learning_rate": 4.6680768711463164e-05, "loss": 0.2254, "step": 39004 }, { "epoch": 3.1598347375243034, "grad_norm": 0.0702623799443245, "learning_rate": 4.667626805886854e-05, "loss": 0.2911, "step": 39005 }, { "epoch": 3.1599157485418017, "grad_norm": 0.07235629111528397, "learning_rate": 4.667176740627391e-05, "loss": 0.2326, "step": 39006 }, { "epoch": 3.1599967595593, "grad_norm": 0.0885845422744751, "learning_rate": 4.6667266753679284e-05, "loss": 0.251, "step": 39007 }, { "epoch": 3.1600777705767986, "grad_norm": 0.07090447098016739, "learning_rate": 4.666276610108466e-05, "loss": 0.229, "step": 39008 }, { "epoch": 3.160158781594297, "grad_norm": 0.07172193378210068, "learning_rate": 4.665826544849004e-05, "loss": 0.2568, "step": 39009 }, { "epoch": 3.160239792611795, "grad_norm": 0.07077902555465698, "learning_rate": 4.6653764795895405e-05, "loss": 0.2108, "step": 39010 }, { "epoch": 3.1603208036292934, "grad_norm": 0.07674044370651245, "learning_rate": 4.664926414330078e-05, "loss": 0.2169, "step": 39011 }, { "epoch": 3.160401814646792, "grad_norm": 0.06866537779569626, "learning_rate": 4.664476349070616e-05, "loss": 0.2165, "step": 39012 }, { "epoch": 3.1604828256642903, "grad_norm": 0.083291195333004, "learning_rate": 4.6640262838111526e-05, "loss": 0.2606, "step": 39013 }, { "epoch": 3.1605638366817885, "grad_norm": 0.07825589925050735, "learning_rate": 4.66357621855169e-05, "loss": 0.2251, "step": 39014 }, { "epoch": 3.160644847699287, "grad_norm": 0.0755823627114296, "learning_rate": 4.663126153292228e-05, "loss": 0.2074, "step": 39015 }, { "epoch": 3.1607258587167855, "grad_norm": 0.0745578184723854, "learning_rate": 4.6626760880327654e-05, "loss": 0.2551, "step": 39016 }, { "epoch": 3.1608068697342837, "grad_norm": 0.0651843324303627, "learning_rate": 4.662226022773302e-05, "loss": 0.2119, "step": 39017 }, { "epoch": 3.1608878807517824, "grad_norm": 0.08645545691251755, "learning_rate": 4.66177595751384e-05, "loss": 0.2352, "step": 39018 }, { "epoch": 3.1609688917692806, "grad_norm": 0.05990879610180855, "learning_rate": 4.6613258922543774e-05, "loss": 0.2371, "step": 39019 }, { "epoch": 3.161049902786779, "grad_norm": 0.0780632346868515, "learning_rate": 4.660875826994914e-05, "loss": 0.2135, "step": 39020 }, { "epoch": 3.1611309138042776, "grad_norm": 0.07226870208978653, "learning_rate": 4.660425761735452e-05, "loss": 0.2408, "step": 39021 }, { "epoch": 3.161211924821776, "grad_norm": 0.06137220934033394, "learning_rate": 4.6599756964759895e-05, "loss": 0.24, "step": 39022 }, { "epoch": 3.161292935839274, "grad_norm": 0.0700208768248558, "learning_rate": 4.659525631216526e-05, "loss": 0.2379, "step": 39023 }, { "epoch": 3.1613739468567728, "grad_norm": 0.06638213992118835, "learning_rate": 4.659075565957064e-05, "loss": 0.2173, "step": 39024 }, { "epoch": 3.161454957874271, "grad_norm": 0.09570866078138351, "learning_rate": 4.6586255006976016e-05, "loss": 0.2294, "step": 39025 }, { "epoch": 3.1615359688917692, "grad_norm": 0.06815316528081894, "learning_rate": 4.658175435438138e-05, "loss": 0.2335, "step": 39026 }, { "epoch": 3.1616169799092675, "grad_norm": 0.07303832471370697, "learning_rate": 4.657725370178676e-05, "loss": 0.2293, "step": 39027 }, { "epoch": 3.161697990926766, "grad_norm": 0.07717616856098175, "learning_rate": 4.657275304919214e-05, "loss": 0.2278, "step": 39028 }, { "epoch": 3.1617790019442644, "grad_norm": 0.06783132255077362, "learning_rate": 4.656825239659751e-05, "loss": 0.2411, "step": 39029 }, { "epoch": 3.1618600129617627, "grad_norm": 0.07071271538734436, "learning_rate": 4.6563751744002884e-05, "loss": 0.2536, "step": 39030 }, { "epoch": 3.1619410239792614, "grad_norm": 0.07904054969549179, "learning_rate": 4.655925109140826e-05, "loss": 0.2772, "step": 39031 }, { "epoch": 3.1620220349967596, "grad_norm": 0.0646386668086052, "learning_rate": 4.655475043881363e-05, "loss": 0.215, "step": 39032 }, { "epoch": 3.162103046014258, "grad_norm": 0.07207754254341125, "learning_rate": 4.6550249786219005e-05, "loss": 0.2527, "step": 39033 }, { "epoch": 3.162184057031756, "grad_norm": 0.07161663472652435, "learning_rate": 4.654574913362438e-05, "loss": 0.2577, "step": 39034 }, { "epoch": 3.162265068049255, "grad_norm": 0.060680631548166275, "learning_rate": 4.654124848102975e-05, "loss": 0.1999, "step": 39035 }, { "epoch": 3.162346079066753, "grad_norm": 0.07173289358615875, "learning_rate": 4.6536747828435126e-05, "loss": 0.2046, "step": 39036 }, { "epoch": 3.1624270900842513, "grad_norm": 0.07027477025985718, "learning_rate": 4.65322471758405e-05, "loss": 0.2298, "step": 39037 }, { "epoch": 3.16250810110175, "grad_norm": 0.06637211889028549, "learning_rate": 4.652774652324587e-05, "loss": 0.2421, "step": 39038 }, { "epoch": 3.162589112119248, "grad_norm": 0.06281259655952454, "learning_rate": 4.6523245870651246e-05, "loss": 0.1843, "step": 39039 }, { "epoch": 3.1626701231367464, "grad_norm": 0.08118221908807755, "learning_rate": 4.651874521805662e-05, "loss": 0.1962, "step": 39040 }, { "epoch": 3.162751134154245, "grad_norm": 0.07613281160593033, "learning_rate": 4.6514244565461994e-05, "loss": 0.2431, "step": 39041 }, { "epoch": 3.1628321451717434, "grad_norm": 0.07212066650390625, "learning_rate": 4.650974391286737e-05, "loss": 0.242, "step": 39042 }, { "epoch": 3.1629131561892416, "grad_norm": 0.06668045371770859, "learning_rate": 4.650524326027274e-05, "loss": 0.1964, "step": 39043 }, { "epoch": 3.1629941672067403, "grad_norm": 0.06738068908452988, "learning_rate": 4.6500742607678114e-05, "loss": 0.2564, "step": 39044 }, { "epoch": 3.1630751782242386, "grad_norm": 0.07580120116472244, "learning_rate": 4.6496241955083495e-05, "loss": 0.2398, "step": 39045 }, { "epoch": 3.163156189241737, "grad_norm": 0.07344920188188553, "learning_rate": 4.649174130248886e-05, "loss": 0.2268, "step": 39046 }, { "epoch": 3.163237200259235, "grad_norm": 0.06640015542507172, "learning_rate": 4.6487240649894235e-05, "loss": 0.242, "step": 39047 }, { "epoch": 3.1633182112767337, "grad_norm": 0.07229582220315933, "learning_rate": 4.6482739997299616e-05, "loss": 0.2087, "step": 39048 }, { "epoch": 3.163399222294232, "grad_norm": 0.07315175235271454, "learning_rate": 4.647823934470498e-05, "loss": 0.209, "step": 39049 }, { "epoch": 3.1634802333117302, "grad_norm": 0.07187490165233612, "learning_rate": 4.6473738692110356e-05, "loss": 0.2331, "step": 39050 }, { "epoch": 3.163561244329229, "grad_norm": 0.07890485227108002, "learning_rate": 4.6469238039515736e-05, "loss": 0.2214, "step": 39051 }, { "epoch": 3.163642255346727, "grad_norm": 0.07000371068716049, "learning_rate": 4.64647373869211e-05, "loss": 0.2347, "step": 39052 }, { "epoch": 3.1637232663642254, "grad_norm": 0.07616917043924332, "learning_rate": 4.646023673432648e-05, "loss": 0.2386, "step": 39053 }, { "epoch": 3.163804277381724, "grad_norm": 0.05547773092985153, "learning_rate": 4.645573608173186e-05, "loss": 0.2161, "step": 39054 }, { "epoch": 3.1638852883992223, "grad_norm": 0.06775707751512527, "learning_rate": 4.645123542913723e-05, "loss": 0.2033, "step": 39055 }, { "epoch": 3.1639662994167206, "grad_norm": 0.06869322806596756, "learning_rate": 4.64467347765426e-05, "loss": 0.2232, "step": 39056 }, { "epoch": 3.164047310434219, "grad_norm": 0.08482500165700912, "learning_rate": 4.644223412394798e-05, "loss": 0.2454, "step": 39057 }, { "epoch": 3.1641283214517175, "grad_norm": 0.07148563861846924, "learning_rate": 4.643773347135335e-05, "loss": 0.2288, "step": 39058 }, { "epoch": 3.1642093324692158, "grad_norm": 0.06907016783952713, "learning_rate": 4.643323281875872e-05, "loss": 0.2437, "step": 39059 }, { "epoch": 3.164290343486714, "grad_norm": 0.07077489793300629, "learning_rate": 4.64287321661641e-05, "loss": 0.2361, "step": 39060 }, { "epoch": 3.1643713545042127, "grad_norm": 0.07123490422964096, "learning_rate": 4.642423151356947e-05, "loss": 0.2336, "step": 39061 }, { "epoch": 3.164452365521711, "grad_norm": 0.06324376910924911, "learning_rate": 4.641973086097484e-05, "loss": 0.2325, "step": 39062 }, { "epoch": 3.164533376539209, "grad_norm": 0.054295867681503296, "learning_rate": 4.641523020838022e-05, "loss": 0.2188, "step": 39063 }, { "epoch": 3.164614387556708, "grad_norm": 0.07837305217981339, "learning_rate": 4.641072955578559e-05, "loss": 0.2237, "step": 39064 }, { "epoch": 3.164695398574206, "grad_norm": 0.07684259116649628, "learning_rate": 4.640622890319096e-05, "loss": 0.2184, "step": 39065 }, { "epoch": 3.1647764095917044, "grad_norm": 0.07637816667556763, "learning_rate": 4.640172825059634e-05, "loss": 0.2392, "step": 39066 }, { "epoch": 3.164857420609203, "grad_norm": 0.06445123255252838, "learning_rate": 4.6397227598001714e-05, "loss": 0.2355, "step": 39067 }, { "epoch": 3.1649384316267013, "grad_norm": 0.07527997344732285, "learning_rate": 4.639272694540709e-05, "loss": 0.202, "step": 39068 }, { "epoch": 3.1650194426441995, "grad_norm": 0.07933299988508224, "learning_rate": 4.638822629281246e-05, "loss": 0.2518, "step": 39069 }, { "epoch": 3.165100453661698, "grad_norm": 0.06105326488614082, "learning_rate": 4.6383725640217835e-05, "loss": 0.2122, "step": 39070 }, { "epoch": 3.1651814646791965, "grad_norm": 0.06747523695230484, "learning_rate": 4.637922498762321e-05, "loss": 0.2347, "step": 39071 }, { "epoch": 3.1652624756966947, "grad_norm": 0.06693252921104431, "learning_rate": 4.637472433502858e-05, "loss": 0.1908, "step": 39072 }, { "epoch": 3.165343486714193, "grad_norm": 0.07058507949113846, "learning_rate": 4.6370223682433956e-05, "loss": 0.2492, "step": 39073 }, { "epoch": 3.1654244977316917, "grad_norm": 0.07795606553554535, "learning_rate": 4.636572302983933e-05, "loss": 0.2556, "step": 39074 }, { "epoch": 3.16550550874919, "grad_norm": 0.08045481890439987, "learning_rate": 4.63612223772447e-05, "loss": 0.2276, "step": 39075 }, { "epoch": 3.165586519766688, "grad_norm": 0.07829701155424118, "learning_rate": 4.6356721724650077e-05, "loss": 0.2044, "step": 39076 }, { "epoch": 3.165667530784187, "grad_norm": 0.08137937635183334, "learning_rate": 4.635222107205545e-05, "loss": 0.2658, "step": 39077 }, { "epoch": 3.165748541801685, "grad_norm": 0.06312301009893417, "learning_rate": 4.6347720419460824e-05, "loss": 0.2316, "step": 39078 }, { "epoch": 3.1658295528191833, "grad_norm": 0.07175661623477936, "learning_rate": 4.63432197668662e-05, "loss": 0.2326, "step": 39079 }, { "epoch": 3.1659105638366816, "grad_norm": 0.07733714580535889, "learning_rate": 4.633871911427157e-05, "loss": 0.2684, "step": 39080 }, { "epoch": 3.1659915748541803, "grad_norm": 0.07549124956130981, "learning_rate": 4.633421846167695e-05, "loss": 0.2343, "step": 39081 }, { "epoch": 3.1660725858716785, "grad_norm": 0.0854051411151886, "learning_rate": 4.632971780908232e-05, "loss": 0.2459, "step": 39082 }, { "epoch": 3.1661535968891767, "grad_norm": 0.0745304599404335, "learning_rate": 4.632521715648769e-05, "loss": 0.2441, "step": 39083 }, { "epoch": 3.1662346079066754, "grad_norm": 0.06685039401054382, "learning_rate": 4.632071650389307e-05, "loss": 0.1976, "step": 39084 }, { "epoch": 3.1663156189241737, "grad_norm": 0.0917067676782608, "learning_rate": 4.631621585129844e-05, "loss": 0.206, "step": 39085 }, { "epoch": 3.166396629941672, "grad_norm": 0.06585230678319931, "learning_rate": 4.631171519870381e-05, "loss": 0.203, "step": 39086 }, { "epoch": 3.1664776409591706, "grad_norm": 0.06591516733169556, "learning_rate": 4.630721454610919e-05, "loss": 0.2335, "step": 39087 }, { "epoch": 3.166558651976669, "grad_norm": 0.06399863958358765, "learning_rate": 4.630271389351456e-05, "loss": 0.2319, "step": 39088 }, { "epoch": 3.166639662994167, "grad_norm": 0.06659473478794098, "learning_rate": 4.6298213240919933e-05, "loss": 0.1824, "step": 39089 }, { "epoch": 3.166720674011666, "grad_norm": 0.07460293173789978, "learning_rate": 4.6293712588325314e-05, "loss": 0.2189, "step": 39090 }, { "epoch": 3.166801685029164, "grad_norm": 0.06288280338048935, "learning_rate": 4.628921193573068e-05, "loss": 0.2003, "step": 39091 }, { "epoch": 3.1668826960466623, "grad_norm": 0.08445224165916443, "learning_rate": 4.6284711283136054e-05, "loss": 0.2306, "step": 39092 }, { "epoch": 3.1669637070641605, "grad_norm": 0.09093958884477615, "learning_rate": 4.6280210630541435e-05, "loss": 0.2349, "step": 39093 }, { "epoch": 3.167044718081659, "grad_norm": 0.07478706538677216, "learning_rate": 4.62757099779468e-05, "loss": 0.2249, "step": 39094 }, { "epoch": 3.1671257290991575, "grad_norm": 0.0760730654001236, "learning_rate": 4.6271209325352175e-05, "loss": 0.1897, "step": 39095 }, { "epoch": 3.1672067401166557, "grad_norm": 0.06487081199884415, "learning_rate": 4.6266708672757555e-05, "loss": 0.2018, "step": 39096 }, { "epoch": 3.1672877511341544, "grad_norm": 0.06682021170854568, "learning_rate": 4.626220802016293e-05, "loss": 0.2032, "step": 39097 }, { "epoch": 3.1673687621516526, "grad_norm": 0.06956232339143753, "learning_rate": 4.6257707367568296e-05, "loss": 0.1847, "step": 39098 }, { "epoch": 3.167449773169151, "grad_norm": 0.06547503918409348, "learning_rate": 4.6253206714973676e-05, "loss": 0.213, "step": 39099 }, { "epoch": 3.1675307841866496, "grad_norm": 0.07335997372865677, "learning_rate": 4.624870606237905e-05, "loss": 0.2263, "step": 39100 }, { "epoch": 3.167611795204148, "grad_norm": 0.09277735650539398, "learning_rate": 4.624420540978442e-05, "loss": 0.2525, "step": 39101 }, { "epoch": 3.167692806221646, "grad_norm": 0.06516660749912262, "learning_rate": 4.62397047571898e-05, "loss": 0.214, "step": 39102 }, { "epoch": 3.1677738172391443, "grad_norm": 0.0839308649301529, "learning_rate": 4.623520410459517e-05, "loss": 0.2066, "step": 39103 }, { "epoch": 3.167854828256643, "grad_norm": 0.07529613375663757, "learning_rate": 4.623070345200054e-05, "loss": 0.2172, "step": 39104 }, { "epoch": 3.1679358392741412, "grad_norm": 0.10337688773870468, "learning_rate": 4.622620279940592e-05, "loss": 0.2361, "step": 39105 }, { "epoch": 3.1680168502916395, "grad_norm": 0.062267303466796875, "learning_rate": 4.622170214681129e-05, "loss": 0.2216, "step": 39106 }, { "epoch": 3.168097861309138, "grad_norm": 0.06088486313819885, "learning_rate": 4.621720149421666e-05, "loss": 0.2134, "step": 39107 }, { "epoch": 3.1681788723266364, "grad_norm": 0.06333941221237183, "learning_rate": 4.621270084162204e-05, "loss": 0.2489, "step": 39108 }, { "epoch": 3.1682598833441347, "grad_norm": 0.07425013929605484, "learning_rate": 4.620820018902741e-05, "loss": 0.2517, "step": 39109 }, { "epoch": 3.1683408943616334, "grad_norm": 0.0673804059624672, "learning_rate": 4.6203699536432786e-05, "loss": 0.211, "step": 39110 }, { "epoch": 3.1684219053791316, "grad_norm": 0.07636480033397675, "learning_rate": 4.619919888383816e-05, "loss": 0.233, "step": 39111 }, { "epoch": 3.16850291639663, "grad_norm": 0.05976966768503189, "learning_rate": 4.619469823124353e-05, "loss": 0.2123, "step": 39112 }, { "epoch": 3.1685839274141285, "grad_norm": 0.06142381206154823, "learning_rate": 4.619019757864891e-05, "loss": 0.2222, "step": 39113 }, { "epoch": 3.1686649384316268, "grad_norm": 0.06508833914995193, "learning_rate": 4.618569692605428e-05, "loss": 0.2034, "step": 39114 }, { "epoch": 3.168745949449125, "grad_norm": 0.08630005270242691, "learning_rate": 4.6181196273459654e-05, "loss": 0.2569, "step": 39115 }, { "epoch": 3.1688269604666233, "grad_norm": 0.06716077029705048, "learning_rate": 4.617669562086503e-05, "loss": 0.231, "step": 39116 }, { "epoch": 3.168907971484122, "grad_norm": 0.07154113054275513, "learning_rate": 4.61721949682704e-05, "loss": 0.2275, "step": 39117 }, { "epoch": 3.16898898250162, "grad_norm": 0.08633829653263092, "learning_rate": 4.6167694315675775e-05, "loss": 0.2098, "step": 39118 }, { "epoch": 3.1690699935191184, "grad_norm": 0.08164115250110626, "learning_rate": 4.616319366308115e-05, "loss": 0.2514, "step": 39119 }, { "epoch": 3.169151004536617, "grad_norm": 0.05900833383202553, "learning_rate": 4.615869301048652e-05, "loss": 0.2175, "step": 39120 }, { "epoch": 3.1692320155541154, "grad_norm": 0.058158889412879944, "learning_rate": 4.6154192357891896e-05, "loss": 0.2179, "step": 39121 }, { "epoch": 3.1693130265716136, "grad_norm": 0.06698452681303024, "learning_rate": 4.614969170529727e-05, "loss": 0.1879, "step": 39122 }, { "epoch": 3.1693940375891123, "grad_norm": 0.06863744556903839, "learning_rate": 4.614519105270265e-05, "loss": 0.2186, "step": 39123 }, { "epoch": 3.1694750486066106, "grad_norm": 0.07735446095466614, "learning_rate": 4.6140690400108016e-05, "loss": 0.2531, "step": 39124 }, { "epoch": 3.169556059624109, "grad_norm": 0.07899104058742523, "learning_rate": 4.613618974751339e-05, "loss": 0.2555, "step": 39125 }, { "epoch": 3.169637070641607, "grad_norm": 0.06854166835546494, "learning_rate": 4.613168909491877e-05, "loss": 0.2155, "step": 39126 }, { "epoch": 3.1697180816591057, "grad_norm": 0.06978372484445572, "learning_rate": 4.612718844232414e-05, "loss": 0.2527, "step": 39127 }, { "epoch": 3.169799092676604, "grad_norm": 0.10613539814949036, "learning_rate": 4.612268778972951e-05, "loss": 0.2682, "step": 39128 }, { "epoch": 3.1698801036941022, "grad_norm": 0.07455401122570038, "learning_rate": 4.611818713713489e-05, "loss": 0.2487, "step": 39129 }, { "epoch": 3.169961114711601, "grad_norm": 0.07414111495018005, "learning_rate": 4.611368648454026e-05, "loss": 0.2277, "step": 39130 }, { "epoch": 3.170042125729099, "grad_norm": 0.07571996003389359, "learning_rate": 4.610918583194563e-05, "loss": 0.2065, "step": 39131 }, { "epoch": 3.1701231367465974, "grad_norm": 0.07938110083341599, "learning_rate": 4.610468517935101e-05, "loss": 0.2473, "step": 39132 }, { "epoch": 3.170204147764096, "grad_norm": 0.0692886933684349, "learning_rate": 4.610018452675638e-05, "loss": 0.2334, "step": 39133 }, { "epoch": 3.1702851587815943, "grad_norm": 0.08501394838094711, "learning_rate": 4.609568387416175e-05, "loss": 0.2151, "step": 39134 }, { "epoch": 3.1703661697990926, "grad_norm": 0.0652298629283905, "learning_rate": 4.609118322156713e-05, "loss": 0.2247, "step": 39135 }, { "epoch": 3.1704471808165913, "grad_norm": 0.07804706692695618, "learning_rate": 4.6086682568972506e-05, "loss": 0.2151, "step": 39136 }, { "epoch": 3.1705281918340895, "grad_norm": 0.06097443401813507, "learning_rate": 4.608218191637787e-05, "loss": 0.1911, "step": 39137 }, { "epoch": 3.1706092028515878, "grad_norm": 0.061475615948438644, "learning_rate": 4.6077681263783254e-05, "loss": 0.2282, "step": 39138 }, { "epoch": 3.170690213869086, "grad_norm": 0.0577525831758976, "learning_rate": 4.607318061118863e-05, "loss": 0.2028, "step": 39139 }, { "epoch": 3.1707712248865847, "grad_norm": 0.0783226266503334, "learning_rate": 4.6068679958593994e-05, "loss": 0.2246, "step": 39140 }, { "epoch": 3.170852235904083, "grad_norm": 0.06661267578601837, "learning_rate": 4.6064179305999374e-05, "loss": 0.2263, "step": 39141 }, { "epoch": 3.170933246921581, "grad_norm": 0.06043732538819313, "learning_rate": 4.605967865340475e-05, "loss": 0.2081, "step": 39142 }, { "epoch": 3.17101425793908, "grad_norm": 0.06632187962532043, "learning_rate": 4.6055178000810115e-05, "loss": 0.2239, "step": 39143 }, { "epoch": 3.171095268956578, "grad_norm": 0.0705820843577385, "learning_rate": 4.6050677348215495e-05, "loss": 0.2187, "step": 39144 }, { "epoch": 3.1711762799740764, "grad_norm": 0.08212973922491074, "learning_rate": 4.604617669562087e-05, "loss": 0.2683, "step": 39145 }, { "epoch": 3.171257290991575, "grad_norm": 0.08695808798074722, "learning_rate": 4.6041676043026236e-05, "loss": 0.2334, "step": 39146 }, { "epoch": 3.1713383020090733, "grad_norm": 0.07749685645103455, "learning_rate": 4.6037175390431616e-05, "loss": 0.2731, "step": 39147 }, { "epoch": 3.1714193130265715, "grad_norm": 0.07648932933807373, "learning_rate": 4.603267473783699e-05, "loss": 0.2401, "step": 39148 }, { "epoch": 3.17150032404407, "grad_norm": 0.07884996384382248, "learning_rate": 4.602817408524236e-05, "loss": 0.2483, "step": 39149 }, { "epoch": 3.1715813350615685, "grad_norm": 0.05509842559695244, "learning_rate": 4.602367343264774e-05, "loss": 0.2136, "step": 39150 }, { "epoch": 3.1716623460790667, "grad_norm": 0.06663339585065842, "learning_rate": 4.601917278005311e-05, "loss": 0.2218, "step": 39151 }, { "epoch": 3.171743357096565, "grad_norm": 0.06804881989955902, "learning_rate": 4.6014672127458484e-05, "loss": 0.2708, "step": 39152 }, { "epoch": 3.1718243681140637, "grad_norm": 0.0790795087814331, "learning_rate": 4.601017147486386e-05, "loss": 0.2421, "step": 39153 }, { "epoch": 3.171905379131562, "grad_norm": 0.0650632381439209, "learning_rate": 4.600567082226923e-05, "loss": 0.2207, "step": 39154 }, { "epoch": 3.17198639014906, "grad_norm": 0.06869402527809143, "learning_rate": 4.6001170169674605e-05, "loss": 0.2087, "step": 39155 }, { "epoch": 3.172067401166559, "grad_norm": 0.0867585763335228, "learning_rate": 4.599666951707998e-05, "loss": 0.2352, "step": 39156 }, { "epoch": 3.172148412184057, "grad_norm": 0.06907272338867188, "learning_rate": 4.599216886448535e-05, "loss": 0.2191, "step": 39157 }, { "epoch": 3.1722294232015553, "grad_norm": 0.08108095824718475, "learning_rate": 4.5987668211890726e-05, "loss": 0.3043, "step": 39158 }, { "epoch": 3.172310434219054, "grad_norm": 0.0706065222620964, "learning_rate": 4.59831675592961e-05, "loss": 0.2135, "step": 39159 }, { "epoch": 3.1723914452365523, "grad_norm": 0.07251002639532089, "learning_rate": 4.597866690670147e-05, "loss": 0.2425, "step": 39160 }, { "epoch": 3.1724724562540505, "grad_norm": 0.069508396089077, "learning_rate": 4.5974166254106846e-05, "loss": 0.2001, "step": 39161 }, { "epoch": 3.1725534672715487, "grad_norm": 0.0626758560538292, "learning_rate": 4.596966560151223e-05, "loss": 0.2097, "step": 39162 }, { "epoch": 3.1726344782890474, "grad_norm": 0.07252325862646103, "learning_rate": 4.5965164948917594e-05, "loss": 0.2382, "step": 39163 }, { "epoch": 3.1727154893065457, "grad_norm": 0.07185426354408264, "learning_rate": 4.596066429632297e-05, "loss": 0.2372, "step": 39164 }, { "epoch": 3.172796500324044, "grad_norm": 0.06854097545146942, "learning_rate": 4.595616364372835e-05, "loss": 0.2276, "step": 39165 }, { "epoch": 3.1728775113415426, "grad_norm": 0.08109313994646072, "learning_rate": 4.5951662991133714e-05, "loss": 0.2345, "step": 39166 }, { "epoch": 3.172958522359041, "grad_norm": 0.07841244339942932, "learning_rate": 4.594716233853909e-05, "loss": 0.2221, "step": 39167 }, { "epoch": 3.173039533376539, "grad_norm": 0.06506379693746567, "learning_rate": 4.594266168594447e-05, "loss": 0.2404, "step": 39168 }, { "epoch": 3.173120544394038, "grad_norm": 0.07025019824504852, "learning_rate": 4.5938161033349835e-05, "loss": 0.2172, "step": 39169 }, { "epoch": 3.173201555411536, "grad_norm": 0.06990345567464828, "learning_rate": 4.593366038075521e-05, "loss": 0.2176, "step": 39170 }, { "epoch": 3.1732825664290343, "grad_norm": 0.06490842252969742, "learning_rate": 4.592915972816059e-05, "loss": 0.2048, "step": 39171 }, { "epoch": 3.1733635774465325, "grad_norm": 0.08713230490684509, "learning_rate": 4.5924659075565956e-05, "loss": 0.2451, "step": 39172 }, { "epoch": 3.173444588464031, "grad_norm": 0.07410930097103119, "learning_rate": 4.592015842297133e-05, "loss": 0.2271, "step": 39173 }, { "epoch": 3.1735255994815295, "grad_norm": 0.06233803927898407, "learning_rate": 4.591565777037671e-05, "loss": 0.2077, "step": 39174 }, { "epoch": 3.1736066104990277, "grad_norm": 0.0665794089436531, "learning_rate": 4.5911157117782084e-05, "loss": 0.2672, "step": 39175 }, { "epoch": 3.1736876215165264, "grad_norm": 0.06904125958681107, "learning_rate": 4.590665646518745e-05, "loss": 0.2193, "step": 39176 }, { "epoch": 3.1737686325340246, "grad_norm": 0.0749911218881607, "learning_rate": 4.590215581259283e-05, "loss": 0.2238, "step": 39177 }, { "epoch": 3.173849643551523, "grad_norm": 0.07443109154701233, "learning_rate": 4.5897655159998204e-05, "loss": 0.2185, "step": 39178 }, { "epoch": 3.1739306545690216, "grad_norm": 0.06918110698461533, "learning_rate": 4.589315450740357e-05, "loss": 0.2438, "step": 39179 }, { "epoch": 3.17401166558652, "grad_norm": 0.059091437608003616, "learning_rate": 4.588865385480895e-05, "loss": 0.2122, "step": 39180 }, { "epoch": 3.174092676604018, "grad_norm": 0.08509764820337296, "learning_rate": 4.5884153202214325e-05, "loss": 0.2517, "step": 39181 }, { "epoch": 3.1741736876215167, "grad_norm": 0.07486993074417114, "learning_rate": 4.587965254961969e-05, "loss": 0.2424, "step": 39182 }, { "epoch": 3.174254698639015, "grad_norm": 0.1036665216088295, "learning_rate": 4.587515189702507e-05, "loss": 0.2352, "step": 39183 }, { "epoch": 3.1743357096565132, "grad_norm": 0.07043270021677017, "learning_rate": 4.5870651244430446e-05, "loss": 0.2577, "step": 39184 }, { "epoch": 3.1744167206740115, "grad_norm": 0.07139958441257477, "learning_rate": 4.586615059183581e-05, "loss": 0.2191, "step": 39185 }, { "epoch": 3.17449773169151, "grad_norm": 0.0614413358271122, "learning_rate": 4.586164993924119e-05, "loss": 0.202, "step": 39186 }, { "epoch": 3.1745787427090084, "grad_norm": 0.0673774853348732, "learning_rate": 4.585714928664657e-05, "loss": 0.2389, "step": 39187 }, { "epoch": 3.1746597537265067, "grad_norm": 0.07635167986154556, "learning_rate": 4.585264863405194e-05, "loss": 0.2149, "step": 39188 }, { "epoch": 3.1747407647440054, "grad_norm": 0.07869356125593185, "learning_rate": 4.5848147981457314e-05, "loss": 0.2248, "step": 39189 }, { "epoch": 3.1748217757615036, "grad_norm": 0.06915463507175446, "learning_rate": 4.584364732886269e-05, "loss": 0.2303, "step": 39190 }, { "epoch": 3.174902786779002, "grad_norm": 0.0811949148774147, "learning_rate": 4.583914667626806e-05, "loss": 0.2208, "step": 39191 }, { "epoch": 3.1749837977965005, "grad_norm": 0.06380307674407959, "learning_rate": 4.5834646023673435e-05, "loss": 0.1997, "step": 39192 }, { "epoch": 3.1750648088139988, "grad_norm": 0.08466849476099014, "learning_rate": 4.583014537107881e-05, "loss": 0.2482, "step": 39193 }, { "epoch": 3.175145819831497, "grad_norm": 0.11230959743261337, "learning_rate": 4.582564471848418e-05, "loss": 0.2753, "step": 39194 }, { "epoch": 3.1752268308489953, "grad_norm": 0.062294308096170425, "learning_rate": 4.5821144065889556e-05, "loss": 0.19, "step": 39195 }, { "epoch": 3.175307841866494, "grad_norm": 0.0825783833861351, "learning_rate": 4.581664341329493e-05, "loss": 0.268, "step": 39196 }, { "epoch": 3.175388852883992, "grad_norm": 0.07035788148641586, "learning_rate": 4.58121427607003e-05, "loss": 0.27, "step": 39197 }, { "epoch": 3.1754698639014904, "grad_norm": 0.0746840238571167, "learning_rate": 4.5807642108105677e-05, "loss": 0.2396, "step": 39198 }, { "epoch": 3.175550874918989, "grad_norm": 0.06415130943059921, "learning_rate": 4.580314145551105e-05, "loss": 0.2372, "step": 39199 }, { "epoch": 3.1756318859364874, "grad_norm": 0.07957671582698822, "learning_rate": 4.5798640802916424e-05, "loss": 0.2186, "step": 39200 }, { "epoch": 3.1757128969539856, "grad_norm": 0.06585413962602615, "learning_rate": 4.5794140150321804e-05, "loss": 0.2329, "step": 39201 }, { "epoch": 3.1757939079714843, "grad_norm": 0.06584955751895905, "learning_rate": 4.578963949772717e-05, "loss": 0.1931, "step": 39202 }, { "epoch": 3.1758749189889826, "grad_norm": 0.06202779710292816, "learning_rate": 4.5785138845132545e-05, "loss": 0.2059, "step": 39203 }, { "epoch": 3.175955930006481, "grad_norm": 0.08203383535146713, "learning_rate": 4.5780638192537925e-05, "loss": 0.2704, "step": 39204 }, { "epoch": 3.1760369410239795, "grad_norm": 0.09432760626077652, "learning_rate": 4.577613753994329e-05, "loss": 0.2181, "step": 39205 }, { "epoch": 3.1761179520414777, "grad_norm": 0.06736686825752258, "learning_rate": 4.5771636887348665e-05, "loss": 0.2272, "step": 39206 }, { "epoch": 3.176198963058976, "grad_norm": 0.06527657806873322, "learning_rate": 4.5767136234754046e-05, "loss": 0.1973, "step": 39207 }, { "epoch": 3.176279974076474, "grad_norm": 0.07748841494321823, "learning_rate": 4.576263558215941e-05, "loss": 0.2703, "step": 39208 }, { "epoch": 3.176360985093973, "grad_norm": 0.06551697105169296, "learning_rate": 4.5758134929564786e-05, "loss": 0.2256, "step": 39209 }, { "epoch": 3.176441996111471, "grad_norm": 0.07224316895008087, "learning_rate": 4.5753634276970167e-05, "loss": 0.2528, "step": 39210 }, { "epoch": 3.1765230071289694, "grad_norm": 0.07008182257413864, "learning_rate": 4.574913362437553e-05, "loss": 0.2159, "step": 39211 }, { "epoch": 3.176604018146468, "grad_norm": 0.09387296438217163, "learning_rate": 4.574463297178091e-05, "loss": 0.2536, "step": 39212 }, { "epoch": 3.1766850291639663, "grad_norm": 0.06003788858652115, "learning_rate": 4.574013231918629e-05, "loss": 0.2046, "step": 39213 }, { "epoch": 3.1767660401814646, "grad_norm": 0.06780782341957092, "learning_rate": 4.573563166659166e-05, "loss": 0.2366, "step": 39214 }, { "epoch": 3.176847051198963, "grad_norm": 0.0731881707906723, "learning_rate": 4.573113101399703e-05, "loss": 0.2069, "step": 39215 }, { "epoch": 3.1769280622164615, "grad_norm": 0.07370991259813309, "learning_rate": 4.572663036140241e-05, "loss": 0.1897, "step": 39216 }, { "epoch": 3.1770090732339598, "grad_norm": 0.08127991855144501, "learning_rate": 4.572212970880778e-05, "loss": 0.2508, "step": 39217 }, { "epoch": 3.177090084251458, "grad_norm": 0.07813630998134613, "learning_rate": 4.571762905621315e-05, "loss": 0.2182, "step": 39218 }, { "epoch": 3.1771710952689567, "grad_norm": 0.07162605226039886, "learning_rate": 4.571312840361853e-05, "loss": 0.2367, "step": 39219 }, { "epoch": 3.177252106286455, "grad_norm": 0.06499221175909042, "learning_rate": 4.57086277510239e-05, "loss": 0.2255, "step": 39220 }, { "epoch": 3.177333117303953, "grad_norm": 0.0795610323548317, "learning_rate": 4.570412709842927e-05, "loss": 0.2342, "step": 39221 }, { "epoch": 3.177414128321452, "grad_norm": 0.06543579697608948, "learning_rate": 4.569962644583465e-05, "loss": 0.2007, "step": 39222 }, { "epoch": 3.17749513933895, "grad_norm": 0.06847775727510452, "learning_rate": 4.569512579324002e-05, "loss": 0.2321, "step": 39223 }, { "epoch": 3.1775761503564484, "grad_norm": 0.08107810467481613, "learning_rate": 4.569062514064539e-05, "loss": 0.2443, "step": 39224 }, { "epoch": 3.177657161373947, "grad_norm": 0.06877782195806503, "learning_rate": 4.568612448805077e-05, "loss": 0.2604, "step": 39225 }, { "epoch": 3.1777381723914453, "grad_norm": 0.06588443368673325, "learning_rate": 4.5681623835456144e-05, "loss": 0.2494, "step": 39226 }, { "epoch": 3.1778191834089435, "grad_norm": 0.07563662528991699, "learning_rate": 4.567712318286152e-05, "loss": 0.2448, "step": 39227 }, { "epoch": 3.1779001944264422, "grad_norm": 0.0697861835360527, "learning_rate": 4.567262253026689e-05, "loss": 0.2562, "step": 39228 }, { "epoch": 3.1779812054439405, "grad_norm": 0.07472296804189682, "learning_rate": 4.5668121877672265e-05, "loss": 0.2343, "step": 39229 }, { "epoch": 3.1780622164614387, "grad_norm": 0.058154940605163574, "learning_rate": 4.566362122507764e-05, "loss": 0.1825, "step": 39230 }, { "epoch": 3.178143227478937, "grad_norm": 0.06901900470256805, "learning_rate": 4.565912057248301e-05, "loss": 0.2349, "step": 39231 }, { "epoch": 3.1782242384964356, "grad_norm": 0.06484868377447128, "learning_rate": 4.5654619919888386e-05, "loss": 0.2034, "step": 39232 }, { "epoch": 3.178305249513934, "grad_norm": 0.06554258614778519, "learning_rate": 4.565011926729376e-05, "loss": 0.224, "step": 39233 }, { "epoch": 3.178386260531432, "grad_norm": 0.06775832921266556, "learning_rate": 4.564561861469913e-05, "loss": 0.2217, "step": 39234 }, { "epoch": 3.178467271548931, "grad_norm": 0.07073518633842468, "learning_rate": 4.564111796210451e-05, "loss": 0.2143, "step": 39235 }, { "epoch": 3.178548282566429, "grad_norm": 0.07139239460229874, "learning_rate": 4.563661730950988e-05, "loss": 0.2638, "step": 39236 }, { "epoch": 3.1786292935839273, "grad_norm": 0.07724419981241226, "learning_rate": 4.5632116656915254e-05, "loss": 0.2209, "step": 39237 }, { "epoch": 3.1787103046014256, "grad_norm": 0.06226162612438202, "learning_rate": 4.562761600432063e-05, "loss": 0.2253, "step": 39238 }, { "epoch": 3.1787913156189243, "grad_norm": 0.06873737275600433, "learning_rate": 4.5623115351726e-05, "loss": 0.2261, "step": 39239 }, { "epoch": 3.1788723266364225, "grad_norm": 0.07333312928676605, "learning_rate": 4.561861469913138e-05, "loss": 0.2453, "step": 39240 }, { "epoch": 3.1789533376539207, "grad_norm": 0.07935798913240433, "learning_rate": 4.561411404653675e-05, "loss": 0.2215, "step": 39241 }, { "epoch": 3.1790343486714194, "grad_norm": 0.07684028148651123, "learning_rate": 4.560961339394212e-05, "loss": 0.22, "step": 39242 }, { "epoch": 3.1791153596889177, "grad_norm": 0.08199668675661087, "learning_rate": 4.56051127413475e-05, "loss": 0.2536, "step": 39243 }, { "epoch": 3.179196370706416, "grad_norm": 0.09142853319644928, "learning_rate": 4.560061208875287e-05, "loss": 0.2448, "step": 39244 }, { "epoch": 3.1792773817239146, "grad_norm": 0.06457065045833588, "learning_rate": 4.559611143615824e-05, "loss": 0.2233, "step": 39245 }, { "epoch": 3.179358392741413, "grad_norm": 0.09834888577461243, "learning_rate": 4.559161078356362e-05, "loss": 0.2493, "step": 39246 }, { "epoch": 3.179439403758911, "grad_norm": 0.07715655118227005, "learning_rate": 4.558711013096899e-05, "loss": 0.2241, "step": 39247 }, { "epoch": 3.17952041477641, "grad_norm": 0.08098747581243515, "learning_rate": 4.5582609478374363e-05, "loss": 0.2432, "step": 39248 }, { "epoch": 3.179601425793908, "grad_norm": 0.0630323514342308, "learning_rate": 4.5578108825779744e-05, "loss": 0.2158, "step": 39249 }, { "epoch": 3.1796824368114063, "grad_norm": 0.06567676365375519, "learning_rate": 4.557360817318511e-05, "loss": 0.2456, "step": 39250 }, { "epoch": 3.179763447828905, "grad_norm": 0.06880970299243927, "learning_rate": 4.5569107520590484e-05, "loss": 0.2697, "step": 39251 }, { "epoch": 3.179844458846403, "grad_norm": 0.07647331804037094, "learning_rate": 4.5564606867995865e-05, "loss": 0.2156, "step": 39252 }, { "epoch": 3.1799254698639015, "grad_norm": 0.061718251556158066, "learning_rate": 4.556010621540124e-05, "loss": 0.214, "step": 39253 }, { "epoch": 3.1800064808813997, "grad_norm": 0.06768068671226501, "learning_rate": 4.5555605562806605e-05, "loss": 0.1859, "step": 39254 }, { "epoch": 3.1800874918988984, "grad_norm": 0.0734715387225151, "learning_rate": 4.5551104910211985e-05, "loss": 0.1913, "step": 39255 }, { "epoch": 3.1801685029163966, "grad_norm": 0.0689500942826271, "learning_rate": 4.554660425761736e-05, "loss": 0.2301, "step": 39256 }, { "epoch": 3.180249513933895, "grad_norm": 0.05353236570954323, "learning_rate": 4.5542103605022726e-05, "loss": 0.2004, "step": 39257 }, { "epoch": 3.1803305249513936, "grad_norm": 0.05947539582848549, "learning_rate": 4.5537602952428106e-05, "loss": 0.2278, "step": 39258 }, { "epoch": 3.180411535968892, "grad_norm": 0.06305386871099472, "learning_rate": 4.553310229983348e-05, "loss": 0.2257, "step": 39259 }, { "epoch": 3.18049254698639, "grad_norm": 0.07101128250360489, "learning_rate": 4.552860164723885e-05, "loss": 0.2451, "step": 39260 }, { "epoch": 3.1805735580038883, "grad_norm": 0.06881023198366165, "learning_rate": 4.552410099464423e-05, "loss": 0.2267, "step": 39261 }, { "epoch": 3.180654569021387, "grad_norm": 0.06827352941036224, "learning_rate": 4.55196003420496e-05, "loss": 0.2168, "step": 39262 }, { "epoch": 3.1807355800388852, "grad_norm": 0.07661303132772446, "learning_rate": 4.551509968945497e-05, "loss": 0.221, "step": 39263 }, { "epoch": 3.1808165910563835, "grad_norm": 0.05772950127720833, "learning_rate": 4.551059903686035e-05, "loss": 0.2088, "step": 39264 }, { "epoch": 3.180897602073882, "grad_norm": 0.06366662681102753, "learning_rate": 4.550609838426572e-05, "loss": 0.2454, "step": 39265 }, { "epoch": 3.1809786130913804, "grad_norm": 0.07420886307954788, "learning_rate": 4.5501597731671095e-05, "loss": 0.2599, "step": 39266 }, { "epoch": 3.1810596241088787, "grad_norm": 0.0739051029086113, "learning_rate": 4.549709707907647e-05, "loss": 0.2616, "step": 39267 }, { "epoch": 3.1811406351263773, "grad_norm": 0.08363629132509232, "learning_rate": 4.549259642648184e-05, "loss": 0.2161, "step": 39268 }, { "epoch": 3.1812216461438756, "grad_norm": 0.07953966408967972, "learning_rate": 4.5488095773887216e-05, "loss": 0.2474, "step": 39269 }, { "epoch": 3.181302657161374, "grad_norm": 0.07963499426841736, "learning_rate": 4.548359512129259e-05, "loss": 0.2252, "step": 39270 }, { "epoch": 3.1813836681788725, "grad_norm": 0.06578200310468674, "learning_rate": 4.547909446869796e-05, "loss": 0.2329, "step": 39271 }, { "epoch": 3.1814646791963708, "grad_norm": 0.061785999685525894, "learning_rate": 4.547459381610334e-05, "loss": 0.2612, "step": 39272 }, { "epoch": 3.181545690213869, "grad_norm": 0.06063782051205635, "learning_rate": 4.547009316350871e-05, "loss": 0.2405, "step": 39273 }, { "epoch": 3.1816267012313673, "grad_norm": 0.07053548097610474, "learning_rate": 4.5465592510914084e-05, "loss": 0.1988, "step": 39274 }, { "epoch": 3.181707712248866, "grad_norm": 0.07843134552240372, "learning_rate": 4.546109185831946e-05, "loss": 0.2185, "step": 39275 }, { "epoch": 3.181788723266364, "grad_norm": 0.05935240536928177, "learning_rate": 4.545659120572483e-05, "loss": 0.2044, "step": 39276 }, { "epoch": 3.1818697342838624, "grad_norm": 0.06905554234981537, "learning_rate": 4.5452090553130205e-05, "loss": 0.1804, "step": 39277 }, { "epoch": 3.181950745301361, "grad_norm": 0.06303178519010544, "learning_rate": 4.544758990053558e-05, "loss": 0.2416, "step": 39278 }, { "epoch": 3.1820317563188594, "grad_norm": 0.05552779883146286, "learning_rate": 4.544308924794095e-05, "loss": 0.1982, "step": 39279 }, { "epoch": 3.1821127673363576, "grad_norm": 0.09019738435745239, "learning_rate": 4.5438588595346326e-05, "loss": 0.2264, "step": 39280 }, { "epoch": 3.1821937783538563, "grad_norm": 0.06747749447822571, "learning_rate": 4.54340879427517e-05, "loss": 0.1937, "step": 39281 }, { "epoch": 3.1822747893713546, "grad_norm": 0.07805449515581131, "learning_rate": 4.542958729015708e-05, "loss": 0.1897, "step": 39282 }, { "epoch": 3.182355800388853, "grad_norm": 0.060038719326257706, "learning_rate": 4.5425086637562446e-05, "loss": 0.2494, "step": 39283 }, { "epoch": 3.182436811406351, "grad_norm": 0.06551884859800339, "learning_rate": 4.542058598496782e-05, "loss": 0.2262, "step": 39284 }, { "epoch": 3.1825178224238497, "grad_norm": 0.06679324060678482, "learning_rate": 4.54160853323732e-05, "loss": 0.236, "step": 39285 }, { "epoch": 3.182598833441348, "grad_norm": 0.06539665907621384, "learning_rate": 4.541158467977857e-05, "loss": 0.23, "step": 39286 }, { "epoch": 3.182679844458846, "grad_norm": 0.06999313086271286, "learning_rate": 4.540708402718394e-05, "loss": 0.2264, "step": 39287 }, { "epoch": 3.182760855476345, "grad_norm": 0.0747220367193222, "learning_rate": 4.540258337458932e-05, "loss": 0.2024, "step": 39288 }, { "epoch": 3.182841866493843, "grad_norm": 0.06839533895254135, "learning_rate": 4.539808272199469e-05, "loss": 0.2165, "step": 39289 }, { "epoch": 3.1829228775113414, "grad_norm": 0.054501939564943314, "learning_rate": 4.539358206940006e-05, "loss": 0.2009, "step": 39290 }, { "epoch": 3.18300388852884, "grad_norm": 0.06777974963188171, "learning_rate": 4.538908141680544e-05, "loss": 0.2538, "step": 39291 }, { "epoch": 3.1830848995463383, "grad_norm": 0.06993214040994644, "learning_rate": 4.538458076421081e-05, "loss": 0.229, "step": 39292 }, { "epoch": 3.1831659105638366, "grad_norm": 0.07302381843328476, "learning_rate": 4.538008011161618e-05, "loss": 0.2465, "step": 39293 }, { "epoch": 3.1832469215813353, "grad_norm": 0.06529241055250168, "learning_rate": 4.537557945902156e-05, "loss": 0.2243, "step": 39294 }, { "epoch": 3.1833279325988335, "grad_norm": 0.07419822365045547, "learning_rate": 4.5371078806426936e-05, "loss": 0.2513, "step": 39295 }, { "epoch": 3.1834089436163318, "grad_norm": 0.06903527677059174, "learning_rate": 4.53665781538323e-05, "loss": 0.2284, "step": 39296 }, { "epoch": 3.18348995463383, "grad_norm": 0.0647360309958458, "learning_rate": 4.5362077501237684e-05, "loss": 0.2102, "step": 39297 }, { "epoch": 3.1835709656513287, "grad_norm": 0.07613998651504517, "learning_rate": 4.535757684864306e-05, "loss": 0.2236, "step": 39298 }, { "epoch": 3.183651976668827, "grad_norm": 0.0889119952917099, "learning_rate": 4.5353076196048424e-05, "loss": 0.2244, "step": 39299 }, { "epoch": 3.183732987686325, "grad_norm": 0.07261547446250916, "learning_rate": 4.5348575543453804e-05, "loss": 0.222, "step": 39300 }, { "epoch": 3.183813998703824, "grad_norm": 0.06894300132989883, "learning_rate": 4.534407489085918e-05, "loss": 0.1955, "step": 39301 }, { "epoch": 3.183895009721322, "grad_norm": 0.08977346867322922, "learning_rate": 4.533957423826455e-05, "loss": 0.2679, "step": 39302 }, { "epoch": 3.1839760207388204, "grad_norm": 0.08320219069719315, "learning_rate": 4.5335073585669925e-05, "loss": 0.2693, "step": 39303 }, { "epoch": 3.184057031756319, "grad_norm": 0.08963727951049805, "learning_rate": 4.53305729330753e-05, "loss": 0.2384, "step": 39304 }, { "epoch": 3.1841380427738173, "grad_norm": 0.06358581781387329, "learning_rate": 4.532607228048067e-05, "loss": 0.2049, "step": 39305 }, { "epoch": 3.1842190537913155, "grad_norm": 0.0702756866812706, "learning_rate": 4.5321571627886046e-05, "loss": 0.2305, "step": 39306 }, { "epoch": 3.184300064808814, "grad_norm": 0.0786462277173996, "learning_rate": 4.531707097529142e-05, "loss": 0.2217, "step": 39307 }, { "epoch": 3.1843810758263125, "grad_norm": 0.06890799850225449, "learning_rate": 4.531257032269679e-05, "loss": 0.2644, "step": 39308 }, { "epoch": 3.1844620868438107, "grad_norm": 0.07675221562385559, "learning_rate": 4.530806967010217e-05, "loss": 0.2375, "step": 39309 }, { "epoch": 3.184543097861309, "grad_norm": 0.07103940099477768, "learning_rate": 4.530356901750754e-05, "loss": 0.2751, "step": 39310 }, { "epoch": 3.1846241088788076, "grad_norm": 0.06175985932350159, "learning_rate": 4.5299068364912914e-05, "loss": 0.2273, "step": 39311 }, { "epoch": 3.184705119896306, "grad_norm": 0.06587029248476028, "learning_rate": 4.529456771231829e-05, "loss": 0.2443, "step": 39312 }, { "epoch": 3.184786130913804, "grad_norm": 0.07483391463756561, "learning_rate": 4.529006705972366e-05, "loss": 0.2391, "step": 39313 }, { "epoch": 3.184867141931303, "grad_norm": 0.06790148466825485, "learning_rate": 4.5285566407129035e-05, "loss": 0.2365, "step": 39314 }, { "epoch": 3.184948152948801, "grad_norm": 0.05836471542716026, "learning_rate": 4.528106575453441e-05, "loss": 0.212, "step": 39315 }, { "epoch": 3.1850291639662993, "grad_norm": 0.08337269723415375, "learning_rate": 4.527656510193978e-05, "loss": 0.2291, "step": 39316 }, { "epoch": 3.185110174983798, "grad_norm": 0.07346945255994797, "learning_rate": 4.5272064449345156e-05, "loss": 0.2382, "step": 39317 }, { "epoch": 3.1851911860012962, "grad_norm": 0.07544538378715515, "learning_rate": 4.526756379675053e-05, "loss": 0.2542, "step": 39318 }, { "epoch": 3.1852721970187945, "grad_norm": 0.07884622365236282, "learning_rate": 4.52630631441559e-05, "loss": 0.1987, "step": 39319 }, { "epoch": 3.1853532080362927, "grad_norm": 0.06149621307849884, "learning_rate": 4.5258562491561276e-05, "loss": 0.2165, "step": 39320 }, { "epoch": 3.1854342190537914, "grad_norm": 0.0629139170050621, "learning_rate": 4.525406183896666e-05, "loss": 0.2683, "step": 39321 }, { "epoch": 3.1855152300712897, "grad_norm": 0.05898153409361839, "learning_rate": 4.5249561186372024e-05, "loss": 0.2255, "step": 39322 }, { "epoch": 3.185596241088788, "grad_norm": 0.07770536839962006, "learning_rate": 4.52450605337774e-05, "loss": 0.2499, "step": 39323 }, { "epoch": 3.1856772521062866, "grad_norm": 0.06799954921007156, "learning_rate": 4.524055988118278e-05, "loss": 0.255, "step": 39324 }, { "epoch": 3.185758263123785, "grad_norm": 0.08119504153728485, "learning_rate": 4.5236059228588144e-05, "loss": 0.2339, "step": 39325 }, { "epoch": 3.185839274141283, "grad_norm": 0.07942589372396469, "learning_rate": 4.523155857599352e-05, "loss": 0.2523, "step": 39326 }, { "epoch": 3.185920285158782, "grad_norm": 0.06663884222507477, "learning_rate": 4.52270579233989e-05, "loss": 0.1957, "step": 39327 }, { "epoch": 3.18600129617628, "grad_norm": 0.06974213570356369, "learning_rate": 4.5222557270804265e-05, "loss": 0.2075, "step": 39328 }, { "epoch": 3.1860823071937783, "grad_norm": 0.08034425973892212, "learning_rate": 4.521805661820964e-05, "loss": 0.2451, "step": 39329 }, { "epoch": 3.1861633182112765, "grad_norm": 0.07607818394899368, "learning_rate": 4.521355596561502e-05, "loss": 0.2553, "step": 39330 }, { "epoch": 3.186244329228775, "grad_norm": 0.09670832008123398, "learning_rate": 4.5209055313020386e-05, "loss": 0.2715, "step": 39331 }, { "epoch": 3.1863253402462735, "grad_norm": 0.06915496289730072, "learning_rate": 4.520455466042576e-05, "loss": 0.2111, "step": 39332 }, { "epoch": 3.1864063512637717, "grad_norm": 0.06327734887599945, "learning_rate": 4.520005400783114e-05, "loss": 0.2232, "step": 39333 }, { "epoch": 3.1864873622812704, "grad_norm": 0.06974446773529053, "learning_rate": 4.5195553355236514e-05, "loss": 0.2408, "step": 39334 }, { "epoch": 3.1865683732987686, "grad_norm": 0.06775286048650742, "learning_rate": 4.519105270264189e-05, "loss": 0.1984, "step": 39335 }, { "epoch": 3.186649384316267, "grad_norm": 0.08392871916294098, "learning_rate": 4.518655205004726e-05, "loss": 0.2351, "step": 39336 }, { "epoch": 3.1867303953337656, "grad_norm": 0.0782778263092041, "learning_rate": 4.5182051397452634e-05, "loss": 0.2479, "step": 39337 }, { "epoch": 3.186811406351264, "grad_norm": 0.09481296688318253, "learning_rate": 4.517755074485801e-05, "loss": 0.2382, "step": 39338 }, { "epoch": 3.186892417368762, "grad_norm": 0.08505354076623917, "learning_rate": 4.517305009226338e-05, "loss": 0.2402, "step": 39339 }, { "epoch": 3.1869734283862607, "grad_norm": 0.08199076354503632, "learning_rate": 4.5168549439668755e-05, "loss": 0.2511, "step": 39340 }, { "epoch": 3.187054439403759, "grad_norm": 0.072618268430233, "learning_rate": 4.516404878707413e-05, "loss": 0.2194, "step": 39341 }, { "epoch": 3.1871354504212572, "grad_norm": 0.06208586320281029, "learning_rate": 4.51595481344795e-05, "loss": 0.2061, "step": 39342 }, { "epoch": 3.1872164614387555, "grad_norm": 0.06540694087743759, "learning_rate": 4.5155047481884876e-05, "loss": 0.2373, "step": 39343 }, { "epoch": 3.187297472456254, "grad_norm": 0.05991688743233681, "learning_rate": 4.515054682929025e-05, "loss": 0.2162, "step": 39344 }, { "epoch": 3.1873784834737524, "grad_norm": 0.07174012809991837, "learning_rate": 4.514604617669562e-05, "loss": 0.2339, "step": 39345 }, { "epoch": 3.1874594944912507, "grad_norm": 0.08227608352899551, "learning_rate": 4.5141545524101e-05, "loss": 0.2404, "step": 39346 }, { "epoch": 3.1875405055087493, "grad_norm": 0.09132973104715347, "learning_rate": 4.513704487150637e-05, "loss": 0.2598, "step": 39347 }, { "epoch": 3.1876215165262476, "grad_norm": 0.07459663599729538, "learning_rate": 4.5132544218911744e-05, "loss": 0.1995, "step": 39348 }, { "epoch": 3.187702527543746, "grad_norm": 0.05755617842078209, "learning_rate": 4.512804356631712e-05, "loss": 0.2197, "step": 39349 }, { "epoch": 3.1877835385612445, "grad_norm": 0.0721568837761879, "learning_rate": 4.512354291372249e-05, "loss": 0.2361, "step": 39350 }, { "epoch": 3.1878645495787428, "grad_norm": 0.09603045135736465, "learning_rate": 4.5119042261127865e-05, "loss": 0.2237, "step": 39351 }, { "epoch": 3.187945560596241, "grad_norm": 0.07836206257343292, "learning_rate": 4.511454160853324e-05, "loss": 0.2433, "step": 39352 }, { "epoch": 3.1880265716137393, "grad_norm": 0.0568229965865612, "learning_rate": 4.511004095593861e-05, "loss": 0.207, "step": 39353 }, { "epoch": 3.188107582631238, "grad_norm": 0.07176083326339722, "learning_rate": 4.5105540303343986e-05, "loss": 0.2325, "step": 39354 }, { "epoch": 3.188188593648736, "grad_norm": 0.07750972360372543, "learning_rate": 4.510103965074936e-05, "loss": 0.249, "step": 39355 }, { "epoch": 3.1882696046662344, "grad_norm": 0.07936552911996841, "learning_rate": 4.509653899815473e-05, "loss": 0.2384, "step": 39356 }, { "epoch": 3.188350615683733, "grad_norm": 0.07242981344461441, "learning_rate": 4.5092038345560107e-05, "loss": 0.2362, "step": 39357 }, { "epoch": 3.1884316267012314, "grad_norm": 0.07366965711116791, "learning_rate": 4.508753769296548e-05, "loss": 0.1928, "step": 39358 }, { "epoch": 3.1885126377187296, "grad_norm": 0.060326382517814636, "learning_rate": 4.5083037040370854e-05, "loss": 0.238, "step": 39359 }, { "epoch": 3.1885936487362283, "grad_norm": 0.06053246930241585, "learning_rate": 4.5078536387776234e-05, "loss": 0.2133, "step": 39360 }, { "epoch": 3.1886746597537265, "grad_norm": 0.08124006539583206, "learning_rate": 4.50740357351816e-05, "loss": 0.2512, "step": 39361 }, { "epoch": 3.188755670771225, "grad_norm": 0.07329577207565308, "learning_rate": 4.5069535082586975e-05, "loss": 0.2079, "step": 39362 }, { "epoch": 3.1888366817887235, "grad_norm": 0.08375972509384155, "learning_rate": 4.5065034429992355e-05, "loss": 0.2172, "step": 39363 }, { "epoch": 3.1889176928062217, "grad_norm": 0.0656561627984047, "learning_rate": 4.506053377739772e-05, "loss": 0.2171, "step": 39364 }, { "epoch": 3.18899870382372, "grad_norm": 0.0688033252954483, "learning_rate": 4.5056033124803095e-05, "loss": 0.2515, "step": 39365 }, { "epoch": 3.189079714841218, "grad_norm": 0.07227391749620438, "learning_rate": 4.5051532472208476e-05, "loss": 0.2278, "step": 39366 }, { "epoch": 3.189160725858717, "grad_norm": 0.07597273588180542, "learning_rate": 4.504703181961384e-05, "loss": 0.2374, "step": 39367 }, { "epoch": 3.189241736876215, "grad_norm": 0.07532206922769547, "learning_rate": 4.504253116701922e-05, "loss": 0.2444, "step": 39368 }, { "epoch": 3.1893227478937134, "grad_norm": 0.06747014075517654, "learning_rate": 4.5038030514424597e-05, "loss": 0.24, "step": 39369 }, { "epoch": 3.189403758911212, "grad_norm": 0.0654434859752655, "learning_rate": 4.5033529861829963e-05, "loss": 0.1974, "step": 39370 }, { "epoch": 3.1894847699287103, "grad_norm": 0.06667497009038925, "learning_rate": 4.5029029209235344e-05, "loss": 0.2363, "step": 39371 }, { "epoch": 3.1895657809462086, "grad_norm": 0.07937151938676834, "learning_rate": 4.502452855664072e-05, "loss": 0.2154, "step": 39372 }, { "epoch": 3.1896467919637073, "grad_norm": 0.07885178923606873, "learning_rate": 4.502002790404609e-05, "loss": 0.2385, "step": 39373 }, { "epoch": 3.1897278029812055, "grad_norm": 0.06830567121505737, "learning_rate": 4.5015527251451465e-05, "loss": 0.1888, "step": 39374 }, { "epoch": 3.1898088139987038, "grad_norm": 0.07489234209060669, "learning_rate": 4.501102659885684e-05, "loss": 0.2403, "step": 39375 }, { "epoch": 3.189889825016202, "grad_norm": 0.0693124309182167, "learning_rate": 4.500652594626221e-05, "loss": 0.2068, "step": 39376 }, { "epoch": 3.1899708360337007, "grad_norm": 0.0662122517824173, "learning_rate": 4.5002025293667585e-05, "loss": 0.214, "step": 39377 }, { "epoch": 3.190051847051199, "grad_norm": 0.07063662260770798, "learning_rate": 4.499752464107296e-05, "loss": 0.2014, "step": 39378 }, { "epoch": 3.190132858068697, "grad_norm": 0.06921156495809555, "learning_rate": 4.499302398847833e-05, "loss": 0.2403, "step": 39379 }, { "epoch": 3.190213869086196, "grad_norm": 0.07520690560340881, "learning_rate": 4.4988523335883706e-05, "loss": 0.2592, "step": 39380 }, { "epoch": 3.190294880103694, "grad_norm": 0.042391337454319, "learning_rate": 4.498402268328908e-05, "loss": 0.2146, "step": 39381 }, { "epoch": 3.1903758911211924, "grad_norm": 0.08247774839401245, "learning_rate": 4.4979522030694453e-05, "loss": 0.2418, "step": 39382 }, { "epoch": 3.190456902138691, "grad_norm": 0.06482577323913574, "learning_rate": 4.497502137809983e-05, "loss": 0.2355, "step": 39383 }, { "epoch": 3.1905379131561893, "grad_norm": 0.07847940176725388, "learning_rate": 4.49705207255052e-05, "loss": 0.2225, "step": 39384 }, { "epoch": 3.1906189241736875, "grad_norm": 0.0957542285323143, "learning_rate": 4.4966020072910574e-05, "loss": 0.2325, "step": 39385 }, { "epoch": 3.190699935191186, "grad_norm": 0.07233288884162903, "learning_rate": 4.496151942031595e-05, "loss": 0.2477, "step": 39386 }, { "epoch": 3.1907809462086845, "grad_norm": 0.07749512791633606, "learning_rate": 4.495701876772132e-05, "loss": 0.2174, "step": 39387 }, { "epoch": 3.1908619572261827, "grad_norm": 0.06869810819625854, "learning_rate": 4.4952518115126695e-05, "loss": 0.2335, "step": 39388 }, { "epoch": 3.190942968243681, "grad_norm": 0.06706426292657852, "learning_rate": 4.494801746253207e-05, "loss": 0.1688, "step": 39389 }, { "epoch": 3.1910239792611796, "grad_norm": 0.07330158352851868, "learning_rate": 4.494351680993744e-05, "loss": 0.2111, "step": 39390 }, { "epoch": 3.191104990278678, "grad_norm": 0.06660532206296921, "learning_rate": 4.4939016157342816e-05, "loss": 0.2378, "step": 39391 }, { "epoch": 3.191186001296176, "grad_norm": 0.07427835464477539, "learning_rate": 4.493451550474819e-05, "loss": 0.2075, "step": 39392 }, { "epoch": 3.191267012313675, "grad_norm": 0.08421943336725235, "learning_rate": 4.493001485215356e-05, "loss": 0.254, "step": 39393 }, { "epoch": 3.191348023331173, "grad_norm": 0.0874992161989212, "learning_rate": 4.492551419955894e-05, "loss": 0.2039, "step": 39394 }, { "epoch": 3.1914290343486713, "grad_norm": 0.06732051074504852, "learning_rate": 4.492101354696431e-05, "loss": 0.2368, "step": 39395 }, { "epoch": 3.19151004536617, "grad_norm": 0.06246805191040039, "learning_rate": 4.4916512894369684e-05, "loss": 0.2191, "step": 39396 }, { "epoch": 3.1915910563836682, "grad_norm": 0.06294985860586166, "learning_rate": 4.491201224177506e-05, "loss": 0.2046, "step": 39397 }, { "epoch": 3.1916720674011665, "grad_norm": 0.07085716724395752, "learning_rate": 4.490751158918043e-05, "loss": 0.2596, "step": 39398 }, { "epoch": 3.1917530784186647, "grad_norm": 0.05900321155786514, "learning_rate": 4.490301093658581e-05, "loss": 0.2447, "step": 39399 }, { "epoch": 3.1918340894361634, "grad_norm": 0.06928402185440063, "learning_rate": 4.489851028399118e-05, "loss": 0.1887, "step": 39400 }, { "epoch": 3.1919151004536617, "grad_norm": 0.07420630753040314, "learning_rate": 4.489400963139655e-05, "loss": 0.2255, "step": 39401 }, { "epoch": 3.19199611147116, "grad_norm": 0.07554472237825394, "learning_rate": 4.488950897880193e-05, "loss": 0.2109, "step": 39402 }, { "epoch": 3.1920771224886586, "grad_norm": 0.07431972026824951, "learning_rate": 4.48850083262073e-05, "loss": 0.2052, "step": 39403 }, { "epoch": 3.192158133506157, "grad_norm": 0.07085475325584412, "learning_rate": 4.488050767361268e-05, "loss": 0.2253, "step": 39404 }, { "epoch": 3.192239144523655, "grad_norm": 0.08677297085523605, "learning_rate": 4.487600702101805e-05, "loss": 0.2393, "step": 39405 }, { "epoch": 3.192320155541154, "grad_norm": 0.08472663164138794, "learning_rate": 4.487150636842342e-05, "loss": 0.2295, "step": 39406 }, { "epoch": 3.192401166558652, "grad_norm": 0.07280577719211578, "learning_rate": 4.48670057158288e-05, "loss": 0.2509, "step": 39407 }, { "epoch": 3.1924821775761503, "grad_norm": 0.07977905124425888, "learning_rate": 4.4862505063234174e-05, "loss": 0.2702, "step": 39408 }, { "epoch": 3.192563188593649, "grad_norm": 0.0874275490641594, "learning_rate": 4.485800441063954e-05, "loss": 0.2464, "step": 39409 }, { "epoch": 3.192644199611147, "grad_norm": 0.06713542342185974, "learning_rate": 4.485350375804492e-05, "loss": 0.2273, "step": 39410 }, { "epoch": 3.1927252106286454, "grad_norm": 0.06258574873209, "learning_rate": 4.4849003105450295e-05, "loss": 0.2559, "step": 39411 }, { "epoch": 3.1928062216461437, "grad_norm": 0.06480179727077484, "learning_rate": 4.484450245285567e-05, "loss": 0.2516, "step": 39412 }, { "epoch": 3.1928872326636424, "grad_norm": 0.07016629725694656, "learning_rate": 4.484000180026104e-05, "loss": 0.2385, "step": 39413 }, { "epoch": 3.1929682436811406, "grad_norm": 0.06314028799533844, "learning_rate": 4.4835501147666415e-05, "loss": 0.2396, "step": 39414 }, { "epoch": 3.193049254698639, "grad_norm": 0.07210088521242142, "learning_rate": 4.483100049507179e-05, "loss": 0.2155, "step": 39415 }, { "epoch": 3.1931302657161376, "grad_norm": 0.0667954757809639, "learning_rate": 4.482649984247716e-05, "loss": 0.2494, "step": 39416 }, { "epoch": 3.193211276733636, "grad_norm": 0.07332837581634521, "learning_rate": 4.4821999189882536e-05, "loss": 0.192, "step": 39417 }, { "epoch": 3.193292287751134, "grad_norm": 0.07549264281988144, "learning_rate": 4.481749853728791e-05, "loss": 0.2291, "step": 39418 }, { "epoch": 3.1933732987686327, "grad_norm": 0.06823220103979111, "learning_rate": 4.4812997884693284e-05, "loss": 0.2631, "step": 39419 }, { "epoch": 3.193454309786131, "grad_norm": 0.07784800231456757, "learning_rate": 4.480849723209866e-05, "loss": 0.2375, "step": 39420 }, { "epoch": 3.1935353208036292, "grad_norm": 0.0801076591014862, "learning_rate": 4.480399657950403e-05, "loss": 0.2755, "step": 39421 }, { "epoch": 3.1936163318211275, "grad_norm": 0.0587022602558136, "learning_rate": 4.4799495926909404e-05, "loss": 0.203, "step": 39422 }, { "epoch": 3.193697342838626, "grad_norm": 0.07514984905719757, "learning_rate": 4.479499527431478e-05, "loss": 0.2331, "step": 39423 }, { "epoch": 3.1937783538561244, "grad_norm": 0.07274370640516281, "learning_rate": 4.479049462172015e-05, "loss": 0.199, "step": 39424 }, { "epoch": 3.1938593648736227, "grad_norm": 0.07186318188905716, "learning_rate": 4.4785993969125525e-05, "loss": 0.2202, "step": 39425 }, { "epoch": 3.1939403758911213, "grad_norm": 0.07100582122802734, "learning_rate": 4.47814933165309e-05, "loss": 0.2082, "step": 39426 }, { "epoch": 3.1940213869086196, "grad_norm": 0.07508399337530136, "learning_rate": 4.477699266393627e-05, "loss": 0.2369, "step": 39427 }, { "epoch": 3.194102397926118, "grad_norm": 0.06001093611121178, "learning_rate": 4.4772492011341646e-05, "loss": 0.1918, "step": 39428 }, { "epoch": 3.1941834089436165, "grad_norm": 0.07501449435949326, "learning_rate": 4.476799135874702e-05, "loss": 0.2387, "step": 39429 }, { "epoch": 3.1942644199611148, "grad_norm": 0.07020004838705063, "learning_rate": 4.476349070615239e-05, "loss": 0.236, "step": 39430 }, { "epoch": 3.194345430978613, "grad_norm": 0.06646616011857986, "learning_rate": 4.475899005355777e-05, "loss": 0.2265, "step": 39431 }, { "epoch": 3.1944264419961117, "grad_norm": 0.0961184874176979, "learning_rate": 4.475448940096314e-05, "loss": 0.2664, "step": 39432 }, { "epoch": 3.19450745301361, "grad_norm": 0.06955624371767044, "learning_rate": 4.4749988748368514e-05, "loss": 0.2384, "step": 39433 }, { "epoch": 3.194588464031108, "grad_norm": 0.07705685496330261, "learning_rate": 4.474548809577389e-05, "loss": 0.2422, "step": 39434 }, { "epoch": 3.1946694750486064, "grad_norm": 0.06348368525505066, "learning_rate": 4.474098744317926e-05, "loss": 0.2252, "step": 39435 }, { "epoch": 3.194750486066105, "grad_norm": 0.08564847707748413, "learning_rate": 4.4736486790584635e-05, "loss": 0.2518, "step": 39436 }, { "epoch": 3.1948314970836034, "grad_norm": 0.07809983193874359, "learning_rate": 4.4731986137990015e-05, "loss": 0.2477, "step": 39437 }, { "epoch": 3.1949125081011016, "grad_norm": 0.07049041986465454, "learning_rate": 4.472748548539538e-05, "loss": 0.233, "step": 39438 }, { "epoch": 3.1949935191186003, "grad_norm": 0.07803355157375336, "learning_rate": 4.4722984832800756e-05, "loss": 0.2325, "step": 39439 }, { "epoch": 3.1950745301360985, "grad_norm": 0.07229940593242645, "learning_rate": 4.4718484180206136e-05, "loss": 0.2165, "step": 39440 }, { "epoch": 3.195155541153597, "grad_norm": 0.06585335731506348, "learning_rate": 4.471398352761151e-05, "loss": 0.2107, "step": 39441 }, { "epoch": 3.195236552171095, "grad_norm": 0.05904988572001457, "learning_rate": 4.4709482875016876e-05, "loss": 0.2193, "step": 39442 }, { "epoch": 3.1953175631885937, "grad_norm": 0.06626173108816147, "learning_rate": 4.470498222242226e-05, "loss": 0.1995, "step": 39443 }, { "epoch": 3.195398574206092, "grad_norm": 0.08097077161073685, "learning_rate": 4.470048156982763e-05, "loss": 0.2152, "step": 39444 }, { "epoch": 3.19547958522359, "grad_norm": 0.06193895637989044, "learning_rate": 4.4695980917233e-05, "loss": 0.1984, "step": 39445 }, { "epoch": 3.195560596241089, "grad_norm": 0.06149062514305115, "learning_rate": 4.469148026463838e-05, "loss": 0.179, "step": 39446 }, { "epoch": 3.195641607258587, "grad_norm": 0.07270010560750961, "learning_rate": 4.468697961204375e-05, "loss": 0.2465, "step": 39447 }, { "epoch": 3.1957226182760854, "grad_norm": 0.06477028131484985, "learning_rate": 4.468247895944912e-05, "loss": 0.2283, "step": 39448 }, { "epoch": 3.195803629293584, "grad_norm": 0.06298864632844925, "learning_rate": 4.46779783068545e-05, "loss": 0.2256, "step": 39449 }, { "epoch": 3.1958846403110823, "grad_norm": 0.081520214676857, "learning_rate": 4.467347765425987e-05, "loss": 0.2327, "step": 39450 }, { "epoch": 3.1959656513285806, "grad_norm": 0.08572669327259064, "learning_rate": 4.466897700166524e-05, "loss": 0.2633, "step": 39451 }, { "epoch": 3.1960466623460793, "grad_norm": 0.07522355765104294, "learning_rate": 4.466447634907062e-05, "loss": 0.2173, "step": 39452 }, { "epoch": 3.1961276733635775, "grad_norm": 0.07185790687799454, "learning_rate": 4.465997569647599e-05, "loss": 0.2607, "step": 39453 }, { "epoch": 3.1962086843810757, "grad_norm": 0.07226600497961044, "learning_rate": 4.4655475043881366e-05, "loss": 0.2411, "step": 39454 }, { "epoch": 3.1962896953985744, "grad_norm": 0.06172429025173187, "learning_rate": 4.465097439128674e-05, "loss": 0.1842, "step": 39455 }, { "epoch": 3.1963707064160727, "grad_norm": 0.0709967091679573, "learning_rate": 4.4646473738692114e-05, "loss": 0.2145, "step": 39456 }, { "epoch": 3.196451717433571, "grad_norm": 0.07477506250143051, "learning_rate": 4.464197308609749e-05, "loss": 0.2486, "step": 39457 }, { "epoch": 3.196532728451069, "grad_norm": 0.07171639055013657, "learning_rate": 4.463747243350286e-05, "loss": 0.2086, "step": 39458 }, { "epoch": 3.196613739468568, "grad_norm": 0.060899294912815094, "learning_rate": 4.4632971780908234e-05, "loss": 0.2491, "step": 39459 }, { "epoch": 3.196694750486066, "grad_norm": 0.0627385824918747, "learning_rate": 4.462847112831361e-05, "loss": 0.2359, "step": 39460 }, { "epoch": 3.1967757615035644, "grad_norm": 0.07605332881212234, "learning_rate": 4.462397047571898e-05, "loss": 0.2241, "step": 39461 }, { "epoch": 3.196856772521063, "grad_norm": 0.06061991676688194, "learning_rate": 4.4619469823124355e-05, "loss": 0.2445, "step": 39462 }, { "epoch": 3.1969377835385613, "grad_norm": 0.07427114248275757, "learning_rate": 4.461496917052973e-05, "loss": 0.2162, "step": 39463 }, { "epoch": 3.1970187945560595, "grad_norm": 0.09817933291196823, "learning_rate": 4.46104685179351e-05, "loss": 0.2217, "step": 39464 }, { "epoch": 3.1970998055735578, "grad_norm": 0.07270355522632599, "learning_rate": 4.4605967865340476e-05, "loss": 0.1933, "step": 39465 }, { "epoch": 3.1971808165910565, "grad_norm": 0.06888288259506226, "learning_rate": 4.460146721274585e-05, "loss": 0.2389, "step": 39466 }, { "epoch": 3.1972618276085547, "grad_norm": 0.05891997739672661, "learning_rate": 4.459696656015122e-05, "loss": 0.1841, "step": 39467 }, { "epoch": 3.197342838626053, "grad_norm": 0.07122666388750076, "learning_rate": 4.45924659075566e-05, "loss": 0.2278, "step": 39468 }, { "epoch": 3.1974238496435516, "grad_norm": 0.0724184587597847, "learning_rate": 4.458796525496197e-05, "loss": 0.2311, "step": 39469 }, { "epoch": 3.19750486066105, "grad_norm": 0.08630391210317612, "learning_rate": 4.458346460236735e-05, "loss": 0.2416, "step": 39470 }, { "epoch": 3.197585871678548, "grad_norm": 0.07383977621793747, "learning_rate": 4.457896394977272e-05, "loss": 0.2148, "step": 39471 }, { "epoch": 3.197666882696047, "grad_norm": 0.07253684848546982, "learning_rate": 4.457446329717809e-05, "loss": 0.1938, "step": 39472 }, { "epoch": 3.197747893713545, "grad_norm": 0.08100492507219315, "learning_rate": 4.456996264458347e-05, "loss": 0.2485, "step": 39473 }, { "epoch": 3.1978289047310433, "grad_norm": 0.0740998312830925, "learning_rate": 4.456546199198884e-05, "loss": 0.2162, "step": 39474 }, { "epoch": 3.197909915748542, "grad_norm": 0.08284035325050354, "learning_rate": 4.456096133939421e-05, "loss": 0.2451, "step": 39475 }, { "epoch": 3.1979909267660402, "grad_norm": 0.09450192749500275, "learning_rate": 4.455646068679959e-05, "loss": 0.293, "step": 39476 }, { "epoch": 3.1980719377835385, "grad_norm": 0.06692380458116531, "learning_rate": 4.455196003420496e-05, "loss": 0.227, "step": 39477 }, { "epoch": 3.198152948801037, "grad_norm": 0.0706232488155365, "learning_rate": 4.454745938161033e-05, "loss": 0.251, "step": 39478 }, { "epoch": 3.1982339598185354, "grad_norm": 0.06894442439079285, "learning_rate": 4.454295872901571e-05, "loss": 0.2228, "step": 39479 }, { "epoch": 3.1983149708360337, "grad_norm": 0.061690591275691986, "learning_rate": 4.453845807642109e-05, "loss": 0.183, "step": 39480 }, { "epoch": 3.198395981853532, "grad_norm": 0.07705006748437881, "learning_rate": 4.4533957423826454e-05, "loss": 0.2654, "step": 39481 }, { "epoch": 3.1984769928710306, "grad_norm": 0.06700455397367477, "learning_rate": 4.4529456771231834e-05, "loss": 0.2207, "step": 39482 }, { "epoch": 3.198558003888529, "grad_norm": 0.07812342047691345, "learning_rate": 4.452495611863721e-05, "loss": 0.2465, "step": 39483 }, { "epoch": 3.198639014906027, "grad_norm": 0.07196211814880371, "learning_rate": 4.4520455466042574e-05, "loss": 0.2448, "step": 39484 }, { "epoch": 3.198720025923526, "grad_norm": 0.09784740954637527, "learning_rate": 4.4515954813447955e-05, "loss": 0.237, "step": 39485 }, { "epoch": 3.198801036941024, "grad_norm": 0.061616454273462296, "learning_rate": 4.451145416085333e-05, "loss": 0.222, "step": 39486 }, { "epoch": 3.1988820479585223, "grad_norm": 0.0834859237074852, "learning_rate": 4.4506953508258695e-05, "loss": 0.227, "step": 39487 }, { "epoch": 3.1989630589760205, "grad_norm": 0.07035014033317566, "learning_rate": 4.4502452855664076e-05, "loss": 0.2646, "step": 39488 }, { "epoch": 3.199044069993519, "grad_norm": 0.05911833420395851, "learning_rate": 4.449795220306945e-05, "loss": 0.198, "step": 39489 }, { "epoch": 3.1991250810110174, "grad_norm": 0.0757574588060379, "learning_rate": 4.4493451550474816e-05, "loss": 0.2499, "step": 39490 }, { "epoch": 3.1992060920285157, "grad_norm": 0.07520738989114761, "learning_rate": 4.4488950897880196e-05, "loss": 0.2356, "step": 39491 }, { "epoch": 3.1992871030460144, "grad_norm": 0.07697313278913498, "learning_rate": 4.448445024528557e-05, "loss": 0.2107, "step": 39492 }, { "epoch": 3.1993681140635126, "grad_norm": 0.06699033081531525, "learning_rate": 4.4479949592690944e-05, "loss": 0.2068, "step": 39493 }, { "epoch": 3.199449125081011, "grad_norm": 0.06737679988145828, "learning_rate": 4.447544894009632e-05, "loss": 0.2204, "step": 39494 }, { "epoch": 3.1995301360985096, "grad_norm": 0.07735428959131241, "learning_rate": 4.447094828750169e-05, "loss": 0.2074, "step": 39495 }, { "epoch": 3.199611147116008, "grad_norm": 0.07633911818265915, "learning_rate": 4.4466447634907065e-05, "loss": 0.2447, "step": 39496 }, { "epoch": 3.199692158133506, "grad_norm": 0.07352489233016968, "learning_rate": 4.446194698231244e-05, "loss": 0.1873, "step": 39497 }, { "epoch": 3.1997731691510047, "grad_norm": 0.05530984699726105, "learning_rate": 4.445744632971781e-05, "loss": 0.2343, "step": 39498 }, { "epoch": 3.199854180168503, "grad_norm": 0.06358368694782257, "learning_rate": 4.4452945677123185e-05, "loss": 0.182, "step": 39499 }, { "epoch": 3.1999351911860012, "grad_norm": 0.060369789600372314, "learning_rate": 4.444844502452856e-05, "loss": 0.2203, "step": 39500 }, { "epoch": 3.2000162022034995, "grad_norm": 0.07927286624908447, "learning_rate": 4.444394437193393e-05, "loss": 0.2598, "step": 39501 }, { "epoch": 3.200097213220998, "grad_norm": 0.06475785374641418, "learning_rate": 4.4439443719339306e-05, "loss": 0.2214, "step": 39502 }, { "epoch": 3.2001782242384964, "grad_norm": 0.08461401611566544, "learning_rate": 4.443494306674468e-05, "loss": 0.2505, "step": 39503 }, { "epoch": 3.2002592352559946, "grad_norm": 0.07525065541267395, "learning_rate": 4.443044241415005e-05, "loss": 0.2428, "step": 39504 }, { "epoch": 3.2003402462734933, "grad_norm": 0.05696845054626465, "learning_rate": 4.442594176155543e-05, "loss": 0.2137, "step": 39505 }, { "epoch": 3.2004212572909916, "grad_norm": 0.06406644731760025, "learning_rate": 4.442144110896081e-05, "loss": 0.232, "step": 39506 }, { "epoch": 3.20050226830849, "grad_norm": 0.07079577445983887, "learning_rate": 4.4416940456366174e-05, "loss": 0.2187, "step": 39507 }, { "epoch": 3.2005832793259885, "grad_norm": 0.07825680077075958, "learning_rate": 4.441243980377155e-05, "loss": 0.2354, "step": 39508 }, { "epoch": 3.2006642903434868, "grad_norm": 0.0631539449095726, "learning_rate": 4.440793915117693e-05, "loss": 0.2138, "step": 39509 }, { "epoch": 3.200745301360985, "grad_norm": 0.09467824548482895, "learning_rate": 4.4403438498582295e-05, "loss": 0.2421, "step": 39510 }, { "epoch": 3.2008263123784833, "grad_norm": 0.07566626369953156, "learning_rate": 4.439893784598767e-05, "loss": 0.2237, "step": 39511 }, { "epoch": 3.200907323395982, "grad_norm": 0.07604029029607773, "learning_rate": 4.439443719339305e-05, "loss": 0.2669, "step": 39512 }, { "epoch": 3.20098833441348, "grad_norm": 0.07466589659452438, "learning_rate": 4.4389936540798416e-05, "loss": 0.2227, "step": 39513 }, { "epoch": 3.2010693454309784, "grad_norm": 0.07292468100786209, "learning_rate": 4.438543588820379e-05, "loss": 0.2382, "step": 39514 }, { "epoch": 3.201150356448477, "grad_norm": 0.06553184986114502, "learning_rate": 4.438093523560917e-05, "loss": 0.1965, "step": 39515 }, { "epoch": 3.2012313674659754, "grad_norm": 0.06865046918392181, "learning_rate": 4.4376434583014537e-05, "loss": 0.216, "step": 39516 }, { "epoch": 3.2013123784834736, "grad_norm": 0.06584342569112778, "learning_rate": 4.437193393041991e-05, "loss": 0.2279, "step": 39517 }, { "epoch": 3.2013933895009723, "grad_norm": 0.0877319946885109, "learning_rate": 4.436743327782529e-05, "loss": 0.1958, "step": 39518 }, { "epoch": 3.2014744005184705, "grad_norm": 0.0780157521367073, "learning_rate": 4.4362932625230664e-05, "loss": 0.237, "step": 39519 }, { "epoch": 3.201555411535969, "grad_norm": 0.057421378791332245, "learning_rate": 4.435843197263603e-05, "loss": 0.2052, "step": 39520 }, { "epoch": 3.2016364225534675, "grad_norm": 0.085902638733387, "learning_rate": 4.435393132004141e-05, "loss": 0.2236, "step": 39521 }, { "epoch": 3.2017174335709657, "grad_norm": 0.07996001839637756, "learning_rate": 4.4349430667446785e-05, "loss": 0.2445, "step": 39522 }, { "epoch": 3.201798444588464, "grad_norm": 0.06373366713523865, "learning_rate": 4.434493001485215e-05, "loss": 0.2834, "step": 39523 }, { "epoch": 3.201879455605962, "grad_norm": 0.09339869022369385, "learning_rate": 4.434042936225753e-05, "loss": 0.2771, "step": 39524 }, { "epoch": 3.201960466623461, "grad_norm": 0.07182280719280243, "learning_rate": 4.4335928709662906e-05, "loss": 0.2251, "step": 39525 }, { "epoch": 3.202041477640959, "grad_norm": 0.07255586981773376, "learning_rate": 4.433142805706827e-05, "loss": 0.22, "step": 39526 }, { "epoch": 3.2021224886584574, "grad_norm": 0.09170207381248474, "learning_rate": 4.432692740447365e-05, "loss": 0.3099, "step": 39527 }, { "epoch": 3.202203499675956, "grad_norm": 0.07020127028226852, "learning_rate": 4.4322426751879027e-05, "loss": 0.2411, "step": 39528 }, { "epoch": 3.2022845106934543, "grad_norm": 0.10357783734798431, "learning_rate": 4.4317926099284393e-05, "loss": 0.2265, "step": 39529 }, { "epoch": 3.2023655217109526, "grad_norm": 0.05146079882979393, "learning_rate": 4.4313425446689774e-05, "loss": 0.2233, "step": 39530 }, { "epoch": 3.2024465327284513, "grad_norm": 0.058229777961969376, "learning_rate": 4.430892479409515e-05, "loss": 0.2408, "step": 39531 }, { "epoch": 3.2025275437459495, "grad_norm": 0.06619949638843536, "learning_rate": 4.430442414150052e-05, "loss": 0.2193, "step": 39532 }, { "epoch": 3.2026085547634477, "grad_norm": 0.05871860682964325, "learning_rate": 4.4299923488905895e-05, "loss": 0.2181, "step": 39533 }, { "epoch": 3.202689565780946, "grad_norm": 0.08122187852859497, "learning_rate": 4.429542283631127e-05, "loss": 0.2359, "step": 39534 }, { "epoch": 3.2027705767984447, "grad_norm": 0.06815316528081894, "learning_rate": 4.429092218371664e-05, "loss": 0.2656, "step": 39535 }, { "epoch": 3.202851587815943, "grad_norm": 0.07689882069826126, "learning_rate": 4.4286421531122015e-05, "loss": 0.2508, "step": 39536 }, { "epoch": 3.202932598833441, "grad_norm": 0.05379384383559227, "learning_rate": 4.428192087852739e-05, "loss": 0.1968, "step": 39537 }, { "epoch": 3.20301360985094, "grad_norm": 0.06135953962802887, "learning_rate": 4.427742022593276e-05, "loss": 0.2714, "step": 39538 }, { "epoch": 3.203094620868438, "grad_norm": 0.07056459784507751, "learning_rate": 4.4272919573338136e-05, "loss": 0.2241, "step": 39539 }, { "epoch": 3.2031756318859363, "grad_norm": 0.06879568845033646, "learning_rate": 4.426841892074351e-05, "loss": 0.2303, "step": 39540 }, { "epoch": 3.203256642903435, "grad_norm": 0.05646612122654915, "learning_rate": 4.4263918268148883e-05, "loss": 0.2042, "step": 39541 }, { "epoch": 3.2033376539209333, "grad_norm": 0.06926844269037247, "learning_rate": 4.425941761555426e-05, "loss": 0.2233, "step": 39542 }, { "epoch": 3.2034186649384315, "grad_norm": 0.07639966160058975, "learning_rate": 4.425491696295963e-05, "loss": 0.2522, "step": 39543 }, { "epoch": 3.20349967595593, "grad_norm": 0.05440759286284447, "learning_rate": 4.4250416310365004e-05, "loss": 0.2026, "step": 39544 }, { "epoch": 3.2035806869734285, "grad_norm": 0.06447576731443405, "learning_rate": 4.4245915657770385e-05, "loss": 0.242, "step": 39545 }, { "epoch": 3.2036616979909267, "grad_norm": 0.0778748095035553, "learning_rate": 4.424141500517575e-05, "loss": 0.2519, "step": 39546 }, { "epoch": 3.203742709008425, "grad_norm": 0.07720351964235306, "learning_rate": 4.4236914352581125e-05, "loss": 0.2254, "step": 39547 }, { "epoch": 3.2038237200259236, "grad_norm": 0.07186136394739151, "learning_rate": 4.4232413699986505e-05, "loss": 0.2052, "step": 39548 }, { "epoch": 3.203904731043422, "grad_norm": 0.06151605397462845, "learning_rate": 4.422791304739187e-05, "loss": 0.1893, "step": 39549 }, { "epoch": 3.20398574206092, "grad_norm": 0.07115894556045532, "learning_rate": 4.4223412394797246e-05, "loss": 0.2142, "step": 39550 }, { "epoch": 3.204066753078419, "grad_norm": 0.062248844653367996, "learning_rate": 4.4218911742202626e-05, "loss": 0.2066, "step": 39551 }, { "epoch": 3.204147764095917, "grad_norm": 0.06971555203199387, "learning_rate": 4.421441108960799e-05, "loss": 0.218, "step": 39552 }, { "epoch": 3.2042287751134153, "grad_norm": 0.07507779449224472, "learning_rate": 4.420991043701337e-05, "loss": 0.2896, "step": 39553 }, { "epoch": 3.204309786130914, "grad_norm": 0.08263830095529556, "learning_rate": 4.420540978441875e-05, "loss": 0.2203, "step": 39554 }, { "epoch": 3.2043907971484122, "grad_norm": 0.06579352170228958, "learning_rate": 4.4200909131824114e-05, "loss": 0.2482, "step": 39555 }, { "epoch": 3.2044718081659105, "grad_norm": 0.07024890184402466, "learning_rate": 4.419640847922949e-05, "loss": 0.2254, "step": 39556 }, { "epoch": 3.2045528191834087, "grad_norm": 0.058557070791721344, "learning_rate": 4.419190782663487e-05, "loss": 0.1969, "step": 39557 }, { "epoch": 3.2046338302009074, "grad_norm": 0.07652173936367035, "learning_rate": 4.418740717404024e-05, "loss": 0.2163, "step": 39558 }, { "epoch": 3.2047148412184057, "grad_norm": 0.07383786141872406, "learning_rate": 4.418290652144561e-05, "loss": 0.2454, "step": 39559 }, { "epoch": 3.204795852235904, "grad_norm": 0.07195722311735153, "learning_rate": 4.417840586885099e-05, "loss": 0.2394, "step": 39560 }, { "epoch": 3.2048768632534026, "grad_norm": 0.0840165838599205, "learning_rate": 4.417390521625636e-05, "loss": 0.2379, "step": 39561 }, { "epoch": 3.204957874270901, "grad_norm": 0.07644258439540863, "learning_rate": 4.416940456366173e-05, "loss": 0.2611, "step": 39562 }, { "epoch": 3.205038885288399, "grad_norm": 0.06418444216251373, "learning_rate": 4.416490391106711e-05, "loss": 0.2305, "step": 39563 }, { "epoch": 3.2051198963058978, "grad_norm": 0.07186094671487808, "learning_rate": 4.416040325847248e-05, "loss": 0.2325, "step": 39564 }, { "epoch": 3.205200907323396, "grad_norm": 0.05062396451830864, "learning_rate": 4.415590260587785e-05, "loss": 0.222, "step": 39565 }, { "epoch": 3.2052819183408943, "grad_norm": 0.07012312859296799, "learning_rate": 4.415140195328323e-05, "loss": 0.2384, "step": 39566 }, { "epoch": 3.205362929358393, "grad_norm": 0.0643838495016098, "learning_rate": 4.4146901300688604e-05, "loss": 0.2304, "step": 39567 }, { "epoch": 3.205443940375891, "grad_norm": 0.07166772335767746, "learning_rate": 4.414240064809397e-05, "loss": 0.2272, "step": 39568 }, { "epoch": 3.2055249513933894, "grad_norm": 0.07338915765285492, "learning_rate": 4.413789999549935e-05, "loss": 0.1954, "step": 39569 }, { "epoch": 3.2056059624108877, "grad_norm": 0.07821765542030334, "learning_rate": 4.4133399342904725e-05, "loss": 0.2588, "step": 39570 }, { "epoch": 3.2056869734283864, "grad_norm": 0.06987085193395615, "learning_rate": 4.41288986903101e-05, "loss": 0.232, "step": 39571 }, { "epoch": 3.2057679844458846, "grad_norm": 0.07386733591556549, "learning_rate": 4.412439803771547e-05, "loss": 0.2104, "step": 39572 }, { "epoch": 3.205848995463383, "grad_norm": 0.06136629357933998, "learning_rate": 4.4119897385120846e-05, "loss": 0.2384, "step": 39573 }, { "epoch": 3.2059300064808816, "grad_norm": 0.06790085136890411, "learning_rate": 4.411539673252622e-05, "loss": 0.1765, "step": 39574 }, { "epoch": 3.20601101749838, "grad_norm": 0.0645691379904747, "learning_rate": 4.411089607993159e-05, "loss": 0.1908, "step": 39575 }, { "epoch": 3.206092028515878, "grad_norm": 0.06009569764137268, "learning_rate": 4.4106395427336966e-05, "loss": 0.2207, "step": 39576 }, { "epoch": 3.2061730395333767, "grad_norm": 0.06850836426019669, "learning_rate": 4.410189477474234e-05, "loss": 0.2248, "step": 39577 }, { "epoch": 3.206254050550875, "grad_norm": 0.07242856919765472, "learning_rate": 4.4097394122147714e-05, "loss": 0.2094, "step": 39578 }, { "epoch": 3.2063350615683732, "grad_norm": 0.06407655775547028, "learning_rate": 4.409289346955309e-05, "loss": 0.2143, "step": 39579 }, { "epoch": 3.2064160725858715, "grad_norm": 0.06614059209823608, "learning_rate": 4.408839281695846e-05, "loss": 0.2229, "step": 39580 }, { "epoch": 3.20649708360337, "grad_norm": 0.06821591407060623, "learning_rate": 4.4083892164363834e-05, "loss": 0.2574, "step": 39581 }, { "epoch": 3.2065780946208684, "grad_norm": 0.056826550513505936, "learning_rate": 4.407939151176921e-05, "loss": 0.1807, "step": 39582 }, { "epoch": 3.2066591056383666, "grad_norm": 0.053199753165245056, "learning_rate": 4.407489085917458e-05, "loss": 0.1914, "step": 39583 }, { "epoch": 3.2067401166558653, "grad_norm": 0.0675586462020874, "learning_rate": 4.4070390206579955e-05, "loss": 0.217, "step": 39584 }, { "epoch": 3.2068211276733636, "grad_norm": 0.08078780025243759, "learning_rate": 4.406588955398533e-05, "loss": 0.2329, "step": 39585 }, { "epoch": 3.206902138690862, "grad_norm": 0.06861063092947006, "learning_rate": 4.40613889013907e-05, "loss": 0.2259, "step": 39586 }, { "epoch": 3.2069831497083605, "grad_norm": 0.07793055474758148, "learning_rate": 4.405688824879608e-05, "loss": 0.2086, "step": 39587 }, { "epoch": 3.2070641607258588, "grad_norm": 0.06235434487462044, "learning_rate": 4.405238759620145e-05, "loss": 0.2656, "step": 39588 }, { "epoch": 3.207145171743357, "grad_norm": 0.05257926881313324, "learning_rate": 4.404788694360682e-05, "loss": 0.1803, "step": 39589 }, { "epoch": 3.2072261827608557, "grad_norm": 0.0629105344414711, "learning_rate": 4.4043386291012204e-05, "loss": 0.1994, "step": 39590 }, { "epoch": 3.207307193778354, "grad_norm": 0.060108739882707596, "learning_rate": 4.403888563841757e-05, "loss": 0.2169, "step": 39591 }, { "epoch": 3.207388204795852, "grad_norm": 0.058636438101530075, "learning_rate": 4.4034384985822944e-05, "loss": 0.2089, "step": 39592 }, { "epoch": 3.2074692158133504, "grad_norm": 0.06358274072408676, "learning_rate": 4.4029884333228324e-05, "loss": 0.2394, "step": 39593 }, { "epoch": 3.207550226830849, "grad_norm": 0.05463390052318573, "learning_rate": 4.402538368063369e-05, "loss": 0.1892, "step": 39594 }, { "epoch": 3.2076312378483474, "grad_norm": 0.0995706096291542, "learning_rate": 4.4020883028039065e-05, "loss": 0.2542, "step": 39595 }, { "epoch": 3.2077122488658456, "grad_norm": 0.07657454162836075, "learning_rate": 4.4016382375444445e-05, "loss": 0.2413, "step": 39596 }, { "epoch": 3.2077932598833443, "grad_norm": 0.0618801973760128, "learning_rate": 4.401188172284981e-05, "loss": 0.2223, "step": 39597 }, { "epoch": 3.2078742709008425, "grad_norm": 0.05365942418575287, "learning_rate": 4.4007381070255186e-05, "loss": 0.2279, "step": 39598 }, { "epoch": 3.207955281918341, "grad_norm": 0.06541714072227478, "learning_rate": 4.4002880417660566e-05, "loss": 0.1824, "step": 39599 }, { "epoch": 3.2080362929358395, "grad_norm": 0.06659148633480072, "learning_rate": 4.399837976506594e-05, "loss": 0.2059, "step": 39600 }, { "epoch": 3.2081173039533377, "grad_norm": 0.07549691945314407, "learning_rate": 4.3993879112471306e-05, "loss": 0.2436, "step": 39601 }, { "epoch": 3.208198314970836, "grad_norm": 0.06712198257446289, "learning_rate": 4.398937845987669e-05, "loss": 0.2181, "step": 39602 }, { "epoch": 3.208279325988334, "grad_norm": 0.07315943390130997, "learning_rate": 4.398487780728206e-05, "loss": 0.2334, "step": 39603 }, { "epoch": 3.208360337005833, "grad_norm": 0.06703237444162369, "learning_rate": 4.398037715468743e-05, "loss": 0.2409, "step": 39604 }, { "epoch": 3.208441348023331, "grad_norm": 0.07603147625923157, "learning_rate": 4.397587650209281e-05, "loss": 0.2644, "step": 39605 }, { "epoch": 3.2085223590408294, "grad_norm": 0.07170654088258743, "learning_rate": 4.397137584949818e-05, "loss": 0.247, "step": 39606 }, { "epoch": 3.208603370058328, "grad_norm": 0.08088883757591248, "learning_rate": 4.396687519690355e-05, "loss": 0.2449, "step": 39607 }, { "epoch": 3.2086843810758263, "grad_norm": 0.06888744235038757, "learning_rate": 4.396237454430893e-05, "loss": 0.2266, "step": 39608 }, { "epoch": 3.2087653920933246, "grad_norm": 0.06969289481639862, "learning_rate": 4.39578738917143e-05, "loss": 0.2248, "step": 39609 }, { "epoch": 3.2088464031108233, "grad_norm": 0.07196550071239471, "learning_rate": 4.395337323911967e-05, "loss": 0.213, "step": 39610 }, { "epoch": 3.2089274141283215, "grad_norm": 0.04962148889899254, "learning_rate": 4.394887258652505e-05, "loss": 0.2131, "step": 39611 }, { "epoch": 3.2090084251458197, "grad_norm": 0.06372936069965363, "learning_rate": 4.394437193393042e-05, "loss": 0.2421, "step": 39612 }, { "epoch": 3.2090894361633184, "grad_norm": 0.05823584273457527, "learning_rate": 4.3939871281335796e-05, "loss": 0.2137, "step": 39613 }, { "epoch": 3.2091704471808167, "grad_norm": 0.06661396473646164, "learning_rate": 4.393537062874117e-05, "loss": 0.2285, "step": 39614 }, { "epoch": 3.209251458198315, "grad_norm": 0.07596556842327118, "learning_rate": 4.3930869976146544e-05, "loss": 0.2256, "step": 39615 }, { "epoch": 3.209332469215813, "grad_norm": 0.08950075507164001, "learning_rate": 4.392636932355192e-05, "loss": 0.2299, "step": 39616 }, { "epoch": 3.209413480233312, "grad_norm": 0.060839664191007614, "learning_rate": 4.392186867095729e-05, "loss": 0.1991, "step": 39617 }, { "epoch": 3.20949449125081, "grad_norm": 0.08028296381235123, "learning_rate": 4.3917368018362664e-05, "loss": 0.2459, "step": 39618 }, { "epoch": 3.2095755022683083, "grad_norm": 0.08345425873994827, "learning_rate": 4.391286736576804e-05, "loss": 0.2446, "step": 39619 }, { "epoch": 3.209656513285807, "grad_norm": 0.09471633285284042, "learning_rate": 4.390836671317341e-05, "loss": 0.2773, "step": 39620 }, { "epoch": 3.2097375243033053, "grad_norm": 0.08462955057621002, "learning_rate": 4.3903866060578785e-05, "loss": 0.2488, "step": 39621 }, { "epoch": 3.2098185353208035, "grad_norm": 0.0768325999379158, "learning_rate": 4.389936540798416e-05, "loss": 0.2148, "step": 39622 }, { "epoch": 3.209899546338302, "grad_norm": 0.08202141523361206, "learning_rate": 4.389486475538953e-05, "loss": 0.2505, "step": 39623 }, { "epoch": 3.2099805573558005, "grad_norm": 0.06823612749576569, "learning_rate": 4.3890364102794906e-05, "loss": 0.2475, "step": 39624 }, { "epoch": 3.2100615683732987, "grad_norm": 0.08542759716510773, "learning_rate": 4.388586345020028e-05, "loss": 0.2673, "step": 39625 }, { "epoch": 3.210142579390797, "grad_norm": 0.06803088635206223, "learning_rate": 4.388136279760566e-05, "loss": 0.2603, "step": 39626 }, { "epoch": 3.2102235904082956, "grad_norm": 0.06601130217313766, "learning_rate": 4.387686214501103e-05, "loss": 0.2041, "step": 39627 }, { "epoch": 3.210304601425794, "grad_norm": 0.07751946896314621, "learning_rate": 4.38723614924164e-05, "loss": 0.2676, "step": 39628 }, { "epoch": 3.210385612443292, "grad_norm": 0.06715001910924911, "learning_rate": 4.386786083982178e-05, "loss": 0.2719, "step": 39629 }, { "epoch": 3.210466623460791, "grad_norm": 0.07272826135158539, "learning_rate": 4.386336018722715e-05, "loss": 0.1906, "step": 39630 }, { "epoch": 3.210547634478289, "grad_norm": 0.06991291791200638, "learning_rate": 4.385885953463252e-05, "loss": 0.2383, "step": 39631 }, { "epoch": 3.2106286454957873, "grad_norm": 0.07351662963628769, "learning_rate": 4.38543588820379e-05, "loss": 0.2545, "step": 39632 }, { "epoch": 3.210709656513286, "grad_norm": 0.06192673742771149, "learning_rate": 4.384985822944327e-05, "loss": 0.2378, "step": 39633 }, { "epoch": 3.2107906675307842, "grad_norm": 0.06056393310427666, "learning_rate": 4.384535757684864e-05, "loss": 0.2338, "step": 39634 }, { "epoch": 3.2108716785482825, "grad_norm": 0.07918339222669601, "learning_rate": 4.384085692425402e-05, "loss": 0.2393, "step": 39635 }, { "epoch": 3.210952689565781, "grad_norm": 0.0640377625823021, "learning_rate": 4.383635627165939e-05, "loss": 0.2395, "step": 39636 }, { "epoch": 3.2110337005832794, "grad_norm": 0.0642380639910698, "learning_rate": 4.383185561906476e-05, "loss": 0.2441, "step": 39637 }, { "epoch": 3.2111147116007777, "grad_norm": 0.08133316785097122, "learning_rate": 4.382735496647014e-05, "loss": 0.2384, "step": 39638 }, { "epoch": 3.211195722618276, "grad_norm": 0.06930825859308243, "learning_rate": 4.382285431387552e-05, "loss": 0.2195, "step": 39639 }, { "epoch": 3.2112767336357746, "grad_norm": 0.07919151335954666, "learning_rate": 4.3818353661280884e-05, "loss": 0.2524, "step": 39640 }, { "epoch": 3.211357744653273, "grad_norm": 0.06062348932027817, "learning_rate": 4.3813853008686264e-05, "loss": 0.2116, "step": 39641 }, { "epoch": 3.211438755670771, "grad_norm": 0.06613833457231522, "learning_rate": 4.380935235609164e-05, "loss": 0.2114, "step": 39642 }, { "epoch": 3.2115197666882698, "grad_norm": 0.0750194787979126, "learning_rate": 4.3804851703497005e-05, "loss": 0.2361, "step": 39643 }, { "epoch": 3.211600777705768, "grad_norm": 0.06285949796438217, "learning_rate": 4.3800351050902385e-05, "loss": 0.2189, "step": 39644 }, { "epoch": 3.2116817887232663, "grad_norm": 0.08646561950445175, "learning_rate": 4.379585039830776e-05, "loss": 0.2534, "step": 39645 }, { "epoch": 3.211762799740765, "grad_norm": 0.07352367043495178, "learning_rate": 4.3791349745713125e-05, "loss": 0.2232, "step": 39646 }, { "epoch": 3.211843810758263, "grad_norm": 0.0724693015217781, "learning_rate": 4.3786849093118506e-05, "loss": 0.2481, "step": 39647 }, { "epoch": 3.2119248217757614, "grad_norm": 0.07707555592060089, "learning_rate": 4.378234844052388e-05, "loss": 0.2501, "step": 39648 }, { "epoch": 3.2120058327932597, "grad_norm": 0.06952345371246338, "learning_rate": 4.3777847787929246e-05, "loss": 0.2316, "step": 39649 }, { "epoch": 3.2120868438107584, "grad_norm": 0.07168091088533401, "learning_rate": 4.3773347135334627e-05, "loss": 0.2457, "step": 39650 }, { "epoch": 3.2121678548282566, "grad_norm": 0.06950316578149796, "learning_rate": 4.376884648274e-05, "loss": 0.227, "step": 39651 }, { "epoch": 3.212248865845755, "grad_norm": 0.07516512274742126, "learning_rate": 4.3764345830145374e-05, "loss": 0.233, "step": 39652 }, { "epoch": 3.2123298768632536, "grad_norm": 0.07295958697795868, "learning_rate": 4.375984517755075e-05, "loss": 0.182, "step": 39653 }, { "epoch": 3.212410887880752, "grad_norm": 0.06907928735017776, "learning_rate": 4.375534452495612e-05, "loss": 0.202, "step": 39654 }, { "epoch": 3.21249189889825, "grad_norm": 0.07010002434253693, "learning_rate": 4.3750843872361495e-05, "loss": 0.2223, "step": 39655 }, { "epoch": 3.2125729099157487, "grad_norm": 0.08001629263162613, "learning_rate": 4.374634321976687e-05, "loss": 0.249, "step": 39656 }, { "epoch": 3.212653920933247, "grad_norm": 0.07292798161506653, "learning_rate": 4.374184256717224e-05, "loss": 0.2687, "step": 39657 }, { "epoch": 3.212734931950745, "grad_norm": 0.06464848667383194, "learning_rate": 4.3737341914577615e-05, "loss": 0.221, "step": 39658 }, { "epoch": 3.212815942968244, "grad_norm": 0.06706438958644867, "learning_rate": 4.373284126198299e-05, "loss": 0.2298, "step": 39659 }, { "epoch": 3.212896953985742, "grad_norm": 0.07052440941333771, "learning_rate": 4.372834060938836e-05, "loss": 0.2114, "step": 39660 }, { "epoch": 3.2129779650032404, "grad_norm": 0.07753120362758636, "learning_rate": 4.3723839956793736e-05, "loss": 0.2355, "step": 39661 }, { "epoch": 3.2130589760207386, "grad_norm": 0.09529384970664978, "learning_rate": 4.371933930419911e-05, "loss": 0.2509, "step": 39662 }, { "epoch": 3.2131399870382373, "grad_norm": 0.07285930961370468, "learning_rate": 4.371483865160448e-05, "loss": 0.1771, "step": 39663 }, { "epoch": 3.2132209980557356, "grad_norm": 0.07032152265310287, "learning_rate": 4.371033799900986e-05, "loss": 0.1986, "step": 39664 }, { "epoch": 3.213302009073234, "grad_norm": 0.05863802880048752, "learning_rate": 4.370583734641524e-05, "loss": 0.2618, "step": 39665 }, { "epoch": 3.2133830200907325, "grad_norm": 0.06316442787647247, "learning_rate": 4.3701336693820604e-05, "loss": 0.2136, "step": 39666 }, { "epoch": 3.2134640311082308, "grad_norm": 0.08077801018953323, "learning_rate": 4.369683604122598e-05, "loss": 0.1927, "step": 39667 }, { "epoch": 3.213545042125729, "grad_norm": 0.07931190729141235, "learning_rate": 4.369233538863136e-05, "loss": 0.2187, "step": 39668 }, { "epoch": 3.2136260531432272, "grad_norm": 0.06426815688610077, "learning_rate": 4.3687834736036725e-05, "loss": 0.2464, "step": 39669 }, { "epoch": 3.213707064160726, "grad_norm": 0.06752730160951614, "learning_rate": 4.36833340834421e-05, "loss": 0.2327, "step": 39670 }, { "epoch": 3.213788075178224, "grad_norm": 0.06728702038526535, "learning_rate": 4.367883343084748e-05, "loss": 0.2123, "step": 39671 }, { "epoch": 3.2138690861957224, "grad_norm": 0.07387802749872208, "learning_rate": 4.3674332778252846e-05, "loss": 0.2287, "step": 39672 }, { "epoch": 3.213950097213221, "grad_norm": 0.08815514296293259, "learning_rate": 4.366983212565822e-05, "loss": 0.2163, "step": 39673 }, { "epoch": 3.2140311082307194, "grad_norm": 0.07101006805896759, "learning_rate": 4.36653314730636e-05, "loss": 0.2251, "step": 39674 }, { "epoch": 3.2141121192482176, "grad_norm": 0.07125383615493774, "learning_rate": 4.366083082046897e-05, "loss": 0.2396, "step": 39675 }, { "epoch": 3.2141931302657163, "grad_norm": 0.07291600108146667, "learning_rate": 4.365633016787434e-05, "loss": 0.2282, "step": 39676 }, { "epoch": 3.2142741412832145, "grad_norm": 0.061833396553993225, "learning_rate": 4.365182951527972e-05, "loss": 0.2206, "step": 39677 }, { "epoch": 3.214355152300713, "grad_norm": 0.057680822908878326, "learning_rate": 4.3647328862685094e-05, "loss": 0.1991, "step": 39678 }, { "epoch": 3.2144361633182115, "grad_norm": 0.07487655431032181, "learning_rate": 4.364282821009046e-05, "loss": 0.2321, "step": 39679 }, { "epoch": 3.2145171743357097, "grad_norm": 0.07213618606328964, "learning_rate": 4.363832755749584e-05, "loss": 0.2774, "step": 39680 }, { "epoch": 3.214598185353208, "grad_norm": 0.06856631487607956, "learning_rate": 4.3633826904901215e-05, "loss": 0.2266, "step": 39681 }, { "epoch": 3.2146791963707066, "grad_norm": 0.06372292339801788, "learning_rate": 4.362932625230658e-05, "loss": 0.1977, "step": 39682 }, { "epoch": 3.214760207388205, "grad_norm": 0.06928879767656326, "learning_rate": 4.362482559971196e-05, "loss": 0.2487, "step": 39683 }, { "epoch": 3.214841218405703, "grad_norm": 0.07648466527462006, "learning_rate": 4.3620324947117336e-05, "loss": 0.2154, "step": 39684 }, { "epoch": 3.2149222294232014, "grad_norm": 0.07209611684083939, "learning_rate": 4.36158242945227e-05, "loss": 0.2258, "step": 39685 }, { "epoch": 3.2150032404407, "grad_norm": 0.06907076388597488, "learning_rate": 4.361132364192808e-05, "loss": 0.2124, "step": 39686 }, { "epoch": 3.2150842514581983, "grad_norm": 0.06519749760627747, "learning_rate": 4.360682298933346e-05, "loss": 0.2407, "step": 39687 }, { "epoch": 3.2151652624756966, "grad_norm": 0.06849552690982819, "learning_rate": 4.3602322336738823e-05, "loss": 0.2258, "step": 39688 }, { "epoch": 3.2152462734931953, "grad_norm": 0.08750832825899124, "learning_rate": 4.3597821684144204e-05, "loss": 0.2739, "step": 39689 }, { "epoch": 3.2153272845106935, "grad_norm": 0.06199619174003601, "learning_rate": 4.359332103154958e-05, "loss": 0.2294, "step": 39690 }, { "epoch": 3.2154082955281917, "grad_norm": 0.05395563319325447, "learning_rate": 4.358882037895495e-05, "loss": 0.2295, "step": 39691 }, { "epoch": 3.21548930654569, "grad_norm": 0.08060085028409958, "learning_rate": 4.3584319726360325e-05, "loss": 0.2359, "step": 39692 }, { "epoch": 3.2155703175631887, "grad_norm": 0.07682564109563828, "learning_rate": 4.35798190737657e-05, "loss": 0.2066, "step": 39693 }, { "epoch": 3.215651328580687, "grad_norm": 0.08141874521970749, "learning_rate": 4.357531842117107e-05, "loss": 0.2027, "step": 39694 }, { "epoch": 3.215732339598185, "grad_norm": 0.07336656004190445, "learning_rate": 4.3570817768576445e-05, "loss": 0.2265, "step": 39695 }, { "epoch": 3.215813350615684, "grad_norm": 0.09699318557977676, "learning_rate": 4.356631711598182e-05, "loss": 0.2345, "step": 39696 }, { "epoch": 3.215894361633182, "grad_norm": 0.06387688219547272, "learning_rate": 4.356181646338719e-05, "loss": 0.1987, "step": 39697 }, { "epoch": 3.2159753726506803, "grad_norm": 0.06244372949004173, "learning_rate": 4.3557315810792566e-05, "loss": 0.198, "step": 39698 }, { "epoch": 3.216056383668179, "grad_norm": 0.07071885466575623, "learning_rate": 4.355281515819794e-05, "loss": 0.2346, "step": 39699 }, { "epoch": 3.2161373946856773, "grad_norm": 0.0655641108751297, "learning_rate": 4.3548314505603313e-05, "loss": 0.2039, "step": 39700 }, { "epoch": 3.2162184057031755, "grad_norm": 0.06710990518331528, "learning_rate": 4.354381385300869e-05, "loss": 0.2153, "step": 39701 }, { "epoch": 3.216299416720674, "grad_norm": 0.07572393864393234, "learning_rate": 4.353931320041406e-05, "loss": 0.2441, "step": 39702 }, { "epoch": 3.2163804277381725, "grad_norm": 0.07014419138431549, "learning_rate": 4.3534812547819434e-05, "loss": 0.2369, "step": 39703 }, { "epoch": 3.2164614387556707, "grad_norm": 0.06862316280603409, "learning_rate": 4.3530311895224815e-05, "loss": 0.2343, "step": 39704 }, { "epoch": 3.216542449773169, "grad_norm": 0.08650745451450348, "learning_rate": 4.352581124263018e-05, "loss": 0.2821, "step": 39705 }, { "epoch": 3.2166234607906676, "grad_norm": 0.07822742313146591, "learning_rate": 4.3521310590035555e-05, "loss": 0.2062, "step": 39706 }, { "epoch": 3.216704471808166, "grad_norm": 0.06896897405385971, "learning_rate": 4.3516809937440935e-05, "loss": 0.2286, "step": 39707 }, { "epoch": 3.216785482825664, "grad_norm": 0.05687590688467026, "learning_rate": 4.35123092848463e-05, "loss": 0.2387, "step": 39708 }, { "epoch": 3.216866493843163, "grad_norm": 0.06400833278894424, "learning_rate": 4.3507808632251676e-05, "loss": 0.2247, "step": 39709 }, { "epoch": 3.216947504860661, "grad_norm": 0.0717509388923645, "learning_rate": 4.3503307979657056e-05, "loss": 0.247, "step": 39710 }, { "epoch": 3.2170285158781593, "grad_norm": 0.08033451437950134, "learning_rate": 4.349880732706242e-05, "loss": 0.2517, "step": 39711 }, { "epoch": 3.217109526895658, "grad_norm": 0.06716078519821167, "learning_rate": 4.34943066744678e-05, "loss": 0.2185, "step": 39712 }, { "epoch": 3.2171905379131562, "grad_norm": 0.08149128407239914, "learning_rate": 4.348980602187318e-05, "loss": 0.2789, "step": 39713 }, { "epoch": 3.2172715489306545, "grad_norm": 0.09534599632024765, "learning_rate": 4.3485305369278544e-05, "loss": 0.2281, "step": 39714 }, { "epoch": 3.2173525599481527, "grad_norm": 0.08841525763273239, "learning_rate": 4.348080471668392e-05, "loss": 0.2501, "step": 39715 }, { "epoch": 3.2174335709656514, "grad_norm": 0.059520173817873, "learning_rate": 4.34763040640893e-05, "loss": 0.1922, "step": 39716 }, { "epoch": 3.2175145819831497, "grad_norm": 0.06427840888500214, "learning_rate": 4.347180341149467e-05, "loss": 0.2007, "step": 39717 }, { "epoch": 3.217595593000648, "grad_norm": 0.05985168367624283, "learning_rate": 4.346730275890004e-05, "loss": 0.188, "step": 39718 }, { "epoch": 3.2176766040181466, "grad_norm": 0.06718802452087402, "learning_rate": 4.346280210630542e-05, "loss": 0.2515, "step": 39719 }, { "epoch": 3.217757615035645, "grad_norm": 0.09119051694869995, "learning_rate": 4.345830145371079e-05, "loss": 0.2676, "step": 39720 }, { "epoch": 3.217838626053143, "grad_norm": 0.08295290172100067, "learning_rate": 4.345380080111616e-05, "loss": 0.3019, "step": 39721 }, { "epoch": 3.2179196370706418, "grad_norm": 0.065678171813488, "learning_rate": 4.344930014852154e-05, "loss": 0.2297, "step": 39722 }, { "epoch": 3.21800064808814, "grad_norm": 0.06975282728672028, "learning_rate": 4.344479949592691e-05, "loss": 0.2344, "step": 39723 }, { "epoch": 3.2180816591056383, "grad_norm": 0.08624208718538284, "learning_rate": 4.344029884333228e-05, "loss": 0.2679, "step": 39724 }, { "epoch": 3.218162670123137, "grad_norm": 0.05174512788653374, "learning_rate": 4.343579819073766e-05, "loss": 0.1905, "step": 39725 }, { "epoch": 3.218243681140635, "grad_norm": 0.06753432750701904, "learning_rate": 4.3431297538143034e-05, "loss": 0.245, "step": 39726 }, { "epoch": 3.2183246921581334, "grad_norm": 0.09749765694141388, "learning_rate": 4.342679688554841e-05, "loss": 0.2635, "step": 39727 }, { "epoch": 3.2184057031756317, "grad_norm": 0.06999699026346207, "learning_rate": 4.342229623295378e-05, "loss": 0.2477, "step": 39728 }, { "epoch": 3.2184867141931304, "grad_norm": 0.07547413557767868, "learning_rate": 4.3417795580359155e-05, "loss": 0.2268, "step": 39729 }, { "epoch": 3.2185677252106286, "grad_norm": 0.06009672209620476, "learning_rate": 4.341329492776453e-05, "loss": 0.1921, "step": 39730 }, { "epoch": 3.218648736228127, "grad_norm": 0.07534836232662201, "learning_rate": 4.34087942751699e-05, "loss": 0.2119, "step": 39731 }, { "epoch": 3.2187297472456255, "grad_norm": 0.06061408668756485, "learning_rate": 4.3404293622575276e-05, "loss": 0.2167, "step": 39732 }, { "epoch": 3.218810758263124, "grad_norm": 0.07624932378530502, "learning_rate": 4.339979296998065e-05, "loss": 0.2076, "step": 39733 }, { "epoch": 3.218891769280622, "grad_norm": 0.05887662619352341, "learning_rate": 4.339529231738602e-05, "loss": 0.2435, "step": 39734 }, { "epoch": 3.2189727802981207, "grad_norm": 0.07025136053562164, "learning_rate": 4.3390791664791396e-05, "loss": 0.2048, "step": 39735 }, { "epoch": 3.219053791315619, "grad_norm": 0.06738400459289551, "learning_rate": 4.338629101219677e-05, "loss": 0.2293, "step": 39736 }, { "epoch": 3.219134802333117, "grad_norm": 0.07455737888813019, "learning_rate": 4.3381790359602144e-05, "loss": 0.2085, "step": 39737 }, { "epoch": 3.2192158133506155, "grad_norm": 0.07953792065382004, "learning_rate": 4.337728970700752e-05, "loss": 0.1988, "step": 39738 }, { "epoch": 3.219296824368114, "grad_norm": 0.06335048377513885, "learning_rate": 4.337278905441289e-05, "loss": 0.1996, "step": 39739 }, { "epoch": 3.2193778353856124, "grad_norm": 0.0741506814956665, "learning_rate": 4.3368288401818264e-05, "loss": 0.2117, "step": 39740 }, { "epoch": 3.2194588464031106, "grad_norm": 0.062281083315610886, "learning_rate": 4.336378774922364e-05, "loss": 0.2084, "step": 39741 }, { "epoch": 3.2195398574206093, "grad_norm": 0.05702844262123108, "learning_rate": 4.335928709662901e-05, "loss": 0.2453, "step": 39742 }, { "epoch": 3.2196208684381076, "grad_norm": 0.08654730021953583, "learning_rate": 4.335478644403439e-05, "loss": 0.2111, "step": 39743 }, { "epoch": 3.219701879455606, "grad_norm": 0.07214518636465073, "learning_rate": 4.335028579143976e-05, "loss": 0.2483, "step": 39744 }, { "epoch": 3.2197828904731045, "grad_norm": 0.056577298790216446, "learning_rate": 4.334578513884513e-05, "loss": 0.2138, "step": 39745 }, { "epoch": 3.2198639014906028, "grad_norm": 0.0786077156662941, "learning_rate": 4.334128448625051e-05, "loss": 0.2266, "step": 39746 }, { "epoch": 3.219944912508101, "grad_norm": 0.07313614338636398, "learning_rate": 4.333678383365588e-05, "loss": 0.2651, "step": 39747 }, { "epoch": 3.2200259235255997, "grad_norm": 0.07528704404830933, "learning_rate": 4.333228318106125e-05, "loss": 0.249, "step": 39748 }, { "epoch": 3.220106934543098, "grad_norm": 0.0762871652841568, "learning_rate": 4.3327782528466634e-05, "loss": 0.2261, "step": 39749 }, { "epoch": 3.220187945560596, "grad_norm": 0.07066619396209717, "learning_rate": 4.3323281875872e-05, "loss": 0.2282, "step": 39750 }, { "epoch": 3.2202689565780944, "grad_norm": 0.0701572597026825, "learning_rate": 4.3318781223277374e-05, "loss": 0.2649, "step": 39751 }, { "epoch": 3.220349967595593, "grad_norm": 0.07104917615652084, "learning_rate": 4.3314280570682754e-05, "loss": 0.2487, "step": 39752 }, { "epoch": 3.2204309786130914, "grad_norm": 0.07371274381875992, "learning_rate": 4.330977991808812e-05, "loss": 0.2152, "step": 39753 }, { "epoch": 3.2205119896305896, "grad_norm": 0.057648058980703354, "learning_rate": 4.3305279265493495e-05, "loss": 0.2476, "step": 39754 }, { "epoch": 3.2205930006480883, "grad_norm": 0.06675635278224945, "learning_rate": 4.3300778612898875e-05, "loss": 0.2278, "step": 39755 }, { "epoch": 3.2206740116655865, "grad_norm": 0.061084549874067307, "learning_rate": 4.329627796030424e-05, "loss": 0.2185, "step": 39756 }, { "epoch": 3.220755022683085, "grad_norm": 0.06798110902309418, "learning_rate": 4.3291777307709616e-05, "loss": 0.2031, "step": 39757 }, { "epoch": 3.2208360337005835, "grad_norm": 0.06163044273853302, "learning_rate": 4.3287276655114996e-05, "loss": 0.2724, "step": 39758 }, { "epoch": 3.2209170447180817, "grad_norm": 0.07092875987291336, "learning_rate": 4.328277600252037e-05, "loss": 0.2326, "step": 39759 }, { "epoch": 3.22099805573558, "grad_norm": 0.07430491596460342, "learning_rate": 4.327827534992574e-05, "loss": 0.2233, "step": 39760 }, { "epoch": 3.221079066753078, "grad_norm": 0.06169954314827919, "learning_rate": 4.327377469733112e-05, "loss": 0.2433, "step": 39761 }, { "epoch": 3.221160077770577, "grad_norm": 0.056168779730796814, "learning_rate": 4.326927404473649e-05, "loss": 0.2293, "step": 39762 }, { "epoch": 3.221241088788075, "grad_norm": 0.07749295979738235, "learning_rate": 4.3264773392141864e-05, "loss": 0.2221, "step": 39763 }, { "epoch": 3.2213220998055734, "grad_norm": 0.07110145688056946, "learning_rate": 4.326027273954724e-05, "loss": 0.2397, "step": 39764 }, { "epoch": 3.221403110823072, "grad_norm": 0.0723486915230751, "learning_rate": 4.325577208695261e-05, "loss": 0.2353, "step": 39765 }, { "epoch": 3.2214841218405703, "grad_norm": 0.0709274485707283, "learning_rate": 4.3251271434357985e-05, "loss": 0.2512, "step": 39766 }, { "epoch": 3.2215651328580686, "grad_norm": 0.06597712635993958, "learning_rate": 4.324677078176336e-05, "loss": 0.2468, "step": 39767 }, { "epoch": 3.2216461438755672, "grad_norm": 0.07410014420747757, "learning_rate": 4.324227012916873e-05, "loss": 0.2577, "step": 39768 }, { "epoch": 3.2217271548930655, "grad_norm": 0.06988342851400375, "learning_rate": 4.3237769476574106e-05, "loss": 0.2031, "step": 39769 }, { "epoch": 3.2218081659105637, "grad_norm": 0.09154324978590012, "learning_rate": 4.323326882397948e-05, "loss": 0.2114, "step": 39770 }, { "epoch": 3.2218891769280624, "grad_norm": 0.070953868329525, "learning_rate": 4.322876817138485e-05, "loss": 0.2349, "step": 39771 }, { "epoch": 3.2219701879455607, "grad_norm": 0.07277769595384598, "learning_rate": 4.3224267518790226e-05, "loss": 0.2288, "step": 39772 }, { "epoch": 3.222051198963059, "grad_norm": 0.0824602022767067, "learning_rate": 4.32197668661956e-05, "loss": 0.2354, "step": 39773 }, { "epoch": 3.222132209980557, "grad_norm": 0.07566282898187637, "learning_rate": 4.3215266213600974e-05, "loss": 0.2334, "step": 39774 }, { "epoch": 3.222213220998056, "grad_norm": 0.07005234807729721, "learning_rate": 4.321076556100635e-05, "loss": 0.253, "step": 39775 }, { "epoch": 3.222294232015554, "grad_norm": 0.058707863092422485, "learning_rate": 4.320626490841172e-05, "loss": 0.2302, "step": 39776 }, { "epoch": 3.2223752430330523, "grad_norm": 0.0799265131354332, "learning_rate": 4.3201764255817094e-05, "loss": 0.2804, "step": 39777 }, { "epoch": 3.222456254050551, "grad_norm": 0.08761447668075562, "learning_rate": 4.319726360322247e-05, "loss": 0.2502, "step": 39778 }, { "epoch": 3.2225372650680493, "grad_norm": 0.07801079750061035, "learning_rate": 4.319276295062784e-05, "loss": 0.2351, "step": 39779 }, { "epoch": 3.2226182760855475, "grad_norm": 0.06580349057912827, "learning_rate": 4.3188262298033215e-05, "loss": 0.1748, "step": 39780 }, { "epoch": 3.222699287103046, "grad_norm": 0.07311670482158661, "learning_rate": 4.318376164543859e-05, "loss": 0.2315, "step": 39781 }, { "epoch": 3.2227802981205445, "grad_norm": 0.08054853975772858, "learning_rate": 4.317926099284396e-05, "loss": 0.2481, "step": 39782 }, { "epoch": 3.2228613091380427, "grad_norm": 0.08651946485042572, "learning_rate": 4.3174760340249336e-05, "loss": 0.2907, "step": 39783 }, { "epoch": 3.222942320155541, "grad_norm": 0.058403633534908295, "learning_rate": 4.317025968765471e-05, "loss": 0.223, "step": 39784 }, { "epoch": 3.2230233311730396, "grad_norm": 0.06779738515615463, "learning_rate": 4.316575903506009e-05, "loss": 0.2328, "step": 39785 }, { "epoch": 3.223104342190538, "grad_norm": 0.06688597798347473, "learning_rate": 4.316125838246546e-05, "loss": 0.2056, "step": 39786 }, { "epoch": 3.223185353208036, "grad_norm": 0.08868188410997391, "learning_rate": 4.315675772987083e-05, "loss": 0.229, "step": 39787 }, { "epoch": 3.223266364225535, "grad_norm": 0.06498401612043381, "learning_rate": 4.315225707727621e-05, "loss": 0.1918, "step": 39788 }, { "epoch": 3.223347375243033, "grad_norm": 0.07395123690366745, "learning_rate": 4.314775642468158e-05, "loss": 0.1914, "step": 39789 }, { "epoch": 3.2234283862605313, "grad_norm": 0.0681285709142685, "learning_rate": 4.314325577208695e-05, "loss": 0.221, "step": 39790 }, { "epoch": 3.22350939727803, "grad_norm": 0.09783481061458588, "learning_rate": 4.313875511949233e-05, "loss": 0.2124, "step": 39791 }, { "epoch": 3.2235904082955282, "grad_norm": 0.07654182612895966, "learning_rate": 4.31342544668977e-05, "loss": 0.2031, "step": 39792 }, { "epoch": 3.2236714193130265, "grad_norm": 0.06707725673913956, "learning_rate": 4.312975381430307e-05, "loss": 0.2458, "step": 39793 }, { "epoch": 3.223752430330525, "grad_norm": 0.06937646120786667, "learning_rate": 4.312525316170845e-05, "loss": 0.2395, "step": 39794 }, { "epoch": 3.2238334413480234, "grad_norm": 0.07003714144229889, "learning_rate": 4.312075250911382e-05, "loss": 0.2154, "step": 39795 }, { "epoch": 3.2239144523655217, "grad_norm": 0.06754667311906815, "learning_rate": 4.31162518565192e-05, "loss": 0.2256, "step": 39796 }, { "epoch": 3.22399546338302, "grad_norm": 0.0717126652598381, "learning_rate": 4.311175120392457e-05, "loss": 0.2727, "step": 39797 }, { "epoch": 3.2240764744005186, "grad_norm": 0.06259047985076904, "learning_rate": 4.310725055132995e-05, "loss": 0.2122, "step": 39798 }, { "epoch": 3.224157485418017, "grad_norm": 0.06361819058656693, "learning_rate": 4.310274989873532e-05, "loss": 0.2298, "step": 39799 }, { "epoch": 3.224238496435515, "grad_norm": 0.07437151670455933, "learning_rate": 4.3098249246140694e-05, "loss": 0.2561, "step": 39800 }, { "epoch": 3.2243195074530138, "grad_norm": 0.07215610891580582, "learning_rate": 4.309374859354607e-05, "loss": 0.2253, "step": 39801 }, { "epoch": 3.224400518470512, "grad_norm": 0.08039887994527817, "learning_rate": 4.308924794095144e-05, "loss": 0.232, "step": 39802 }, { "epoch": 3.2244815294880103, "grad_norm": 0.07376774400472641, "learning_rate": 4.3084747288356815e-05, "loss": 0.2657, "step": 39803 }, { "epoch": 3.224562540505509, "grad_norm": 0.0832466259598732, "learning_rate": 4.308024663576219e-05, "loss": 0.2616, "step": 39804 }, { "epoch": 3.224643551523007, "grad_norm": 0.08477449417114258, "learning_rate": 4.307574598316756e-05, "loss": 0.2024, "step": 39805 }, { "epoch": 3.2247245625405054, "grad_norm": 0.059144388884305954, "learning_rate": 4.3071245330572936e-05, "loss": 0.2241, "step": 39806 }, { "epoch": 3.2248055735580037, "grad_norm": 0.06593013554811478, "learning_rate": 4.306674467797831e-05, "loss": 0.1954, "step": 39807 }, { "epoch": 3.2248865845755024, "grad_norm": 0.0681859627366066, "learning_rate": 4.306224402538368e-05, "loss": 0.2068, "step": 39808 }, { "epoch": 3.2249675955930006, "grad_norm": 0.06685998290777206, "learning_rate": 4.3057743372789057e-05, "loss": 0.214, "step": 39809 }, { "epoch": 3.225048606610499, "grad_norm": 0.0786844789981842, "learning_rate": 4.305324272019443e-05, "loss": 0.2236, "step": 39810 }, { "epoch": 3.2251296176279975, "grad_norm": 0.06508845835924149, "learning_rate": 4.3048742067599804e-05, "loss": 0.2363, "step": 39811 }, { "epoch": 3.225210628645496, "grad_norm": 0.06696710735559464, "learning_rate": 4.304424141500518e-05, "loss": 0.2477, "step": 39812 }, { "epoch": 3.225291639662994, "grad_norm": 0.08366618305444717, "learning_rate": 4.303974076241055e-05, "loss": 0.2719, "step": 39813 }, { "epoch": 3.2253726506804927, "grad_norm": 0.06795560568571091, "learning_rate": 4.3035240109815925e-05, "loss": 0.2218, "step": 39814 }, { "epoch": 3.225453661697991, "grad_norm": 0.07089146971702576, "learning_rate": 4.30307394572213e-05, "loss": 0.2224, "step": 39815 }, { "epoch": 3.225534672715489, "grad_norm": 0.05195857584476471, "learning_rate": 4.302623880462667e-05, "loss": 0.223, "step": 39816 }, { "epoch": 3.225615683732988, "grad_norm": 0.064137764275074, "learning_rate": 4.3021738152032045e-05, "loss": 0.2421, "step": 39817 }, { "epoch": 3.225696694750486, "grad_norm": 0.07576876133680344, "learning_rate": 4.301723749943742e-05, "loss": 0.2405, "step": 39818 }, { "epoch": 3.2257777057679844, "grad_norm": 0.07548273354768753, "learning_rate": 4.301273684684279e-05, "loss": 0.2019, "step": 39819 }, { "epoch": 3.2258587167854826, "grad_norm": 0.08538465946912766, "learning_rate": 4.3008236194248166e-05, "loss": 0.2192, "step": 39820 }, { "epoch": 3.2259397278029813, "grad_norm": 0.06191878020763397, "learning_rate": 4.300373554165354e-05, "loss": 0.214, "step": 39821 }, { "epoch": 3.2260207388204796, "grad_norm": 0.07498586922883987, "learning_rate": 4.2999234889058913e-05, "loss": 0.2399, "step": 39822 }, { "epoch": 3.226101749837978, "grad_norm": 0.07455036044120789, "learning_rate": 4.299473423646429e-05, "loss": 0.259, "step": 39823 }, { "epoch": 3.2261827608554765, "grad_norm": 0.06393108516931534, "learning_rate": 4.299023358386967e-05, "loss": 0.2197, "step": 39824 }, { "epoch": 3.2262637718729748, "grad_norm": 0.08502611517906189, "learning_rate": 4.2985732931275034e-05, "loss": 0.2281, "step": 39825 }, { "epoch": 3.226344782890473, "grad_norm": 0.062071532011032104, "learning_rate": 4.298123227868041e-05, "loss": 0.1857, "step": 39826 }, { "epoch": 3.2264257939079717, "grad_norm": 0.06794518232345581, "learning_rate": 4.297673162608579e-05, "loss": 0.2, "step": 39827 }, { "epoch": 3.22650680492547, "grad_norm": 0.07407260686159134, "learning_rate": 4.2972230973491155e-05, "loss": 0.23, "step": 39828 }, { "epoch": 3.226587815942968, "grad_norm": 0.071202851831913, "learning_rate": 4.2967730320896535e-05, "loss": 0.2226, "step": 39829 }, { "epoch": 3.2266688269604664, "grad_norm": 0.06576815247535706, "learning_rate": 4.296322966830191e-05, "loss": 0.2042, "step": 39830 }, { "epoch": 3.226749837977965, "grad_norm": 0.07664418965578079, "learning_rate": 4.2958729015707276e-05, "loss": 0.2397, "step": 39831 }, { "epoch": 3.2268308489954634, "grad_norm": 0.06686937063932419, "learning_rate": 4.2954228363112656e-05, "loss": 0.2074, "step": 39832 }, { "epoch": 3.2269118600129616, "grad_norm": 0.07900944352149963, "learning_rate": 4.294972771051803e-05, "loss": 0.2446, "step": 39833 }, { "epoch": 3.2269928710304603, "grad_norm": 0.07053736597299576, "learning_rate": 4.29452270579234e-05, "loss": 0.2417, "step": 39834 }, { "epoch": 3.2270738820479585, "grad_norm": 0.06694454699754715, "learning_rate": 4.294072640532878e-05, "loss": 0.2184, "step": 39835 }, { "epoch": 3.2271548930654568, "grad_norm": 0.066739022731781, "learning_rate": 4.293622575273415e-05, "loss": 0.2024, "step": 39836 }, { "epoch": 3.2272359040829555, "grad_norm": 0.07889380306005478, "learning_rate": 4.2931725100139524e-05, "loss": 0.2762, "step": 39837 }, { "epoch": 3.2273169151004537, "grad_norm": 0.07989609241485596, "learning_rate": 4.29272244475449e-05, "loss": 0.2569, "step": 39838 }, { "epoch": 3.227397926117952, "grad_norm": 0.08929932117462158, "learning_rate": 4.292272379495027e-05, "loss": 0.2682, "step": 39839 }, { "epoch": 3.2274789371354506, "grad_norm": 0.08622092008590698, "learning_rate": 4.2918223142355645e-05, "loss": 0.235, "step": 39840 }, { "epoch": 3.227559948152949, "grad_norm": 0.05606333166360855, "learning_rate": 4.291372248976102e-05, "loss": 0.2131, "step": 39841 }, { "epoch": 3.227640959170447, "grad_norm": 0.06832990795373917, "learning_rate": 4.290922183716639e-05, "loss": 0.2177, "step": 39842 }, { "epoch": 3.2277219701879454, "grad_norm": 0.06548843532800674, "learning_rate": 4.2904721184571766e-05, "loss": 0.214, "step": 39843 }, { "epoch": 3.227802981205444, "grad_norm": 0.06597808003425598, "learning_rate": 4.290022053197714e-05, "loss": 0.1829, "step": 39844 }, { "epoch": 3.2278839922229423, "grad_norm": 0.06908122450113297, "learning_rate": 4.289571987938251e-05, "loss": 0.2373, "step": 39845 }, { "epoch": 3.2279650032404406, "grad_norm": 0.06956381350755692, "learning_rate": 4.289121922678789e-05, "loss": 0.2024, "step": 39846 }, { "epoch": 3.2280460142579392, "grad_norm": 0.08439984917640686, "learning_rate": 4.288671857419326e-05, "loss": 0.2142, "step": 39847 }, { "epoch": 3.2281270252754375, "grad_norm": 0.07745002955198288, "learning_rate": 4.2882217921598634e-05, "loss": 0.2228, "step": 39848 }, { "epoch": 3.2282080362929357, "grad_norm": 0.07908467203378677, "learning_rate": 4.287771726900401e-05, "loss": 0.2207, "step": 39849 }, { "epoch": 3.2282890473104344, "grad_norm": 0.05995180085301399, "learning_rate": 4.287321661640938e-05, "loss": 0.2602, "step": 39850 }, { "epoch": 3.2283700583279327, "grad_norm": 0.07221881300210953, "learning_rate": 4.2868715963814755e-05, "loss": 0.1869, "step": 39851 }, { "epoch": 3.228451069345431, "grad_norm": 0.07083651423454285, "learning_rate": 4.286421531122013e-05, "loss": 0.2034, "step": 39852 }, { "epoch": 3.228532080362929, "grad_norm": 0.08436165750026703, "learning_rate": 4.28597146586255e-05, "loss": 0.2834, "step": 39853 }, { "epoch": 3.228613091380428, "grad_norm": 0.06885623931884766, "learning_rate": 4.2855214006030875e-05, "loss": 0.2169, "step": 39854 }, { "epoch": 3.228694102397926, "grad_norm": 0.0686570331454277, "learning_rate": 4.285071335343625e-05, "loss": 0.2313, "step": 39855 }, { "epoch": 3.2287751134154243, "grad_norm": 0.06884542107582092, "learning_rate": 4.284621270084162e-05, "loss": 0.2196, "step": 39856 }, { "epoch": 3.228856124432923, "grad_norm": 0.06082329526543617, "learning_rate": 4.2841712048246996e-05, "loss": 0.2205, "step": 39857 }, { "epoch": 3.2289371354504213, "grad_norm": 0.06664751470088959, "learning_rate": 4.283721139565237e-05, "loss": 0.2055, "step": 39858 }, { "epoch": 3.2290181464679195, "grad_norm": 0.0696212649345398, "learning_rate": 4.2832710743057744e-05, "loss": 0.2204, "step": 39859 }, { "epoch": 3.229099157485418, "grad_norm": 0.07365533709526062, "learning_rate": 4.282821009046312e-05, "loss": 0.2293, "step": 39860 }, { "epoch": 3.2291801685029164, "grad_norm": 0.054057564586400986, "learning_rate": 4.282370943786849e-05, "loss": 0.2307, "step": 39861 }, { "epoch": 3.2292611795204147, "grad_norm": 0.06658761948347092, "learning_rate": 4.2819208785273864e-05, "loss": 0.2066, "step": 39862 }, { "epoch": 3.2293421905379134, "grad_norm": 0.0653328150510788, "learning_rate": 4.2814708132679245e-05, "loss": 0.2275, "step": 39863 }, { "epoch": 3.2294232015554116, "grad_norm": 0.06299271434545517, "learning_rate": 4.281020748008461e-05, "loss": 0.206, "step": 39864 }, { "epoch": 3.22950421257291, "grad_norm": 0.06639181077480316, "learning_rate": 4.280570682748999e-05, "loss": 0.2114, "step": 39865 }, { "epoch": 3.229585223590408, "grad_norm": 0.06050573289394379, "learning_rate": 4.2801206174895366e-05, "loss": 0.2398, "step": 39866 }, { "epoch": 3.229666234607907, "grad_norm": 0.07270995527505875, "learning_rate": 4.279670552230073e-05, "loss": 0.202, "step": 39867 }, { "epoch": 3.229747245625405, "grad_norm": 0.07150819897651672, "learning_rate": 4.279220486970611e-05, "loss": 0.2307, "step": 39868 }, { "epoch": 3.2298282566429033, "grad_norm": 0.08548440039157867, "learning_rate": 4.2787704217111486e-05, "loss": 0.2647, "step": 39869 }, { "epoch": 3.229909267660402, "grad_norm": 0.07096060365438461, "learning_rate": 4.278320356451685e-05, "loss": 0.2003, "step": 39870 }, { "epoch": 3.2299902786779002, "grad_norm": 0.07516411691904068, "learning_rate": 4.2778702911922234e-05, "loss": 0.278, "step": 39871 }, { "epoch": 3.2300712896953985, "grad_norm": 0.07198493927717209, "learning_rate": 4.277420225932761e-05, "loss": 0.2163, "step": 39872 }, { "epoch": 3.230152300712897, "grad_norm": 0.08852767944335938, "learning_rate": 4.2769701606732974e-05, "loss": 0.2305, "step": 39873 }, { "epoch": 3.2302333117303954, "grad_norm": 0.08636888116598129, "learning_rate": 4.2765200954138354e-05, "loss": 0.2195, "step": 39874 }, { "epoch": 3.2303143227478937, "grad_norm": 0.0890205055475235, "learning_rate": 4.276070030154373e-05, "loss": 0.2642, "step": 39875 }, { "epoch": 3.230395333765392, "grad_norm": 0.06537780910730362, "learning_rate": 4.27561996489491e-05, "loss": 0.2491, "step": 39876 }, { "epoch": 3.2304763447828906, "grad_norm": 0.0705234557390213, "learning_rate": 4.2751698996354475e-05, "loss": 0.2102, "step": 39877 }, { "epoch": 3.230557355800389, "grad_norm": 0.06498276442289352, "learning_rate": 4.274719834375985e-05, "loss": 0.2042, "step": 39878 }, { "epoch": 3.230638366817887, "grad_norm": 0.092403344810009, "learning_rate": 4.274269769116522e-05, "loss": 0.2329, "step": 39879 }, { "epoch": 3.2307193778353858, "grad_norm": 0.065389484167099, "learning_rate": 4.2738197038570596e-05, "loss": 0.2172, "step": 39880 }, { "epoch": 3.230800388852884, "grad_norm": 0.05993317812681198, "learning_rate": 4.273369638597597e-05, "loss": 0.2092, "step": 39881 }, { "epoch": 3.2308813998703823, "grad_norm": 0.08355020731687546, "learning_rate": 4.272919573338134e-05, "loss": 0.2253, "step": 39882 }, { "epoch": 3.230962410887881, "grad_norm": 0.0723675861954689, "learning_rate": 4.272469508078672e-05, "loss": 0.2647, "step": 39883 }, { "epoch": 3.231043421905379, "grad_norm": 0.08424925804138184, "learning_rate": 4.272019442819209e-05, "loss": 0.2144, "step": 39884 }, { "epoch": 3.2311244329228774, "grad_norm": 0.07128293812274933, "learning_rate": 4.2715693775597464e-05, "loss": 0.2784, "step": 39885 }, { "epoch": 3.231205443940376, "grad_norm": 0.06768849492073059, "learning_rate": 4.271119312300284e-05, "loss": 0.2182, "step": 39886 }, { "epoch": 3.2312864549578744, "grad_norm": 0.0783187672495842, "learning_rate": 4.270669247040821e-05, "loss": 0.2526, "step": 39887 }, { "epoch": 3.2313674659753726, "grad_norm": 0.07558058947324753, "learning_rate": 4.2702191817813585e-05, "loss": 0.2154, "step": 39888 }, { "epoch": 3.231448476992871, "grad_norm": 0.06205056607723236, "learning_rate": 4.269769116521896e-05, "loss": 0.2068, "step": 39889 }, { "epoch": 3.2315294880103695, "grad_norm": 0.07086601853370667, "learning_rate": 4.269319051262433e-05, "loss": 0.246, "step": 39890 }, { "epoch": 3.231610499027868, "grad_norm": 0.08032597601413727, "learning_rate": 4.2688689860029706e-05, "loss": 0.2278, "step": 39891 }, { "epoch": 3.231691510045366, "grad_norm": 0.09461595863103867, "learning_rate": 4.268418920743508e-05, "loss": 0.2683, "step": 39892 }, { "epoch": 3.2317725210628647, "grad_norm": 0.0689258947968483, "learning_rate": 4.267968855484045e-05, "loss": 0.2802, "step": 39893 }, { "epoch": 3.231853532080363, "grad_norm": 0.0880710557103157, "learning_rate": 4.2675187902245826e-05, "loss": 0.2023, "step": 39894 }, { "epoch": 3.231934543097861, "grad_norm": 0.07965899258852005, "learning_rate": 4.26706872496512e-05, "loss": 0.2542, "step": 39895 }, { "epoch": 3.2320155541153595, "grad_norm": 0.07699202001094818, "learning_rate": 4.2666186597056574e-05, "loss": 0.2568, "step": 39896 }, { "epoch": 3.232096565132858, "grad_norm": 0.0677374079823494, "learning_rate": 4.266168594446195e-05, "loss": 0.2427, "step": 39897 }, { "epoch": 3.2321775761503564, "grad_norm": 0.07392044365406036, "learning_rate": 4.265718529186733e-05, "loss": 0.2171, "step": 39898 }, { "epoch": 3.2322585871678546, "grad_norm": 0.07073640823364258, "learning_rate": 4.2652684639272694e-05, "loss": 0.2284, "step": 39899 }, { "epoch": 3.2323395981853533, "grad_norm": 0.0748598575592041, "learning_rate": 4.264818398667807e-05, "loss": 0.1943, "step": 39900 }, { "epoch": 3.2324206092028516, "grad_norm": 0.0716557651758194, "learning_rate": 4.264368333408345e-05, "loss": 0.2442, "step": 39901 }, { "epoch": 3.23250162022035, "grad_norm": 0.07370223104953766, "learning_rate": 4.263918268148882e-05, "loss": 0.2576, "step": 39902 }, { "epoch": 3.2325826312378485, "grad_norm": 0.06414464116096497, "learning_rate": 4.263468202889419e-05, "loss": 0.1914, "step": 39903 }, { "epoch": 3.2326636422553467, "grad_norm": 0.0774352103471756, "learning_rate": 4.263018137629957e-05, "loss": 0.2338, "step": 39904 }, { "epoch": 3.232744653272845, "grad_norm": 0.07154515385627747, "learning_rate": 4.262568072370494e-05, "loss": 0.2327, "step": 39905 }, { "epoch": 3.2328256642903437, "grad_norm": 0.08657229691743851, "learning_rate": 4.262118007111031e-05, "loss": 0.2143, "step": 39906 }, { "epoch": 3.232906675307842, "grad_norm": 0.07232128083705902, "learning_rate": 4.261667941851569e-05, "loss": 0.217, "step": 39907 }, { "epoch": 3.23298768632534, "grad_norm": 0.07182679325342178, "learning_rate": 4.2612178765921064e-05, "loss": 0.249, "step": 39908 }, { "epoch": 3.233068697342839, "grad_norm": 0.08501800894737244, "learning_rate": 4.260767811332643e-05, "loss": 0.2611, "step": 39909 }, { "epoch": 3.233149708360337, "grad_norm": 0.0579972043633461, "learning_rate": 4.260317746073181e-05, "loss": 0.2059, "step": 39910 }, { "epoch": 3.2332307193778353, "grad_norm": 0.06739821285009384, "learning_rate": 4.2598676808137184e-05, "loss": 0.2287, "step": 39911 }, { "epoch": 3.2333117303953336, "grad_norm": 0.06128734350204468, "learning_rate": 4.259417615554255e-05, "loss": 0.2246, "step": 39912 }, { "epoch": 3.2333927414128323, "grad_norm": 0.07265954464673996, "learning_rate": 4.258967550294793e-05, "loss": 0.2316, "step": 39913 }, { "epoch": 3.2334737524303305, "grad_norm": 0.06592036038637161, "learning_rate": 4.2585174850353305e-05, "loss": 0.2136, "step": 39914 }, { "epoch": 3.2335547634478288, "grad_norm": 0.06112872436642647, "learning_rate": 4.258067419775867e-05, "loss": 0.2005, "step": 39915 }, { "epoch": 3.2336357744653275, "grad_norm": 0.07858998328447342, "learning_rate": 4.257617354516405e-05, "loss": 0.259, "step": 39916 }, { "epoch": 3.2337167854828257, "grad_norm": 0.057796552777290344, "learning_rate": 4.2571672892569426e-05, "loss": 0.205, "step": 39917 }, { "epoch": 3.233797796500324, "grad_norm": 0.08636032789945602, "learning_rate": 4.25671722399748e-05, "loss": 0.2348, "step": 39918 }, { "epoch": 3.233878807517822, "grad_norm": 0.06539962440729141, "learning_rate": 4.256267158738017e-05, "loss": 0.2357, "step": 39919 }, { "epoch": 3.233959818535321, "grad_norm": 0.07662045955657959, "learning_rate": 4.255817093478555e-05, "loss": 0.2161, "step": 39920 }, { "epoch": 3.234040829552819, "grad_norm": 0.06934379786252975, "learning_rate": 4.255367028219092e-05, "loss": 0.2061, "step": 39921 }, { "epoch": 3.2341218405703174, "grad_norm": 0.07253503054380417, "learning_rate": 4.2549169629596294e-05, "loss": 0.2175, "step": 39922 }, { "epoch": 3.234202851587816, "grad_norm": 0.09941152483224869, "learning_rate": 4.254466897700167e-05, "loss": 0.2165, "step": 39923 }, { "epoch": 3.2342838626053143, "grad_norm": 0.05564277991652489, "learning_rate": 4.254016832440704e-05, "loss": 0.203, "step": 39924 }, { "epoch": 3.2343648736228126, "grad_norm": 0.07295148819684982, "learning_rate": 4.2535667671812415e-05, "loss": 0.2667, "step": 39925 }, { "epoch": 3.2344458846403112, "grad_norm": 0.06901323795318604, "learning_rate": 4.253116701921779e-05, "loss": 0.2464, "step": 39926 }, { "epoch": 3.2345268956578095, "grad_norm": 0.0735318511724472, "learning_rate": 4.252666636662316e-05, "loss": 0.2284, "step": 39927 }, { "epoch": 3.2346079066753077, "grad_norm": 0.07409574091434479, "learning_rate": 4.2522165714028536e-05, "loss": 0.265, "step": 39928 }, { "epoch": 3.2346889176928064, "grad_norm": 0.07013576477766037, "learning_rate": 4.251766506143391e-05, "loss": 0.2363, "step": 39929 }, { "epoch": 3.2347699287103047, "grad_norm": 0.062335990369319916, "learning_rate": 4.251316440883928e-05, "loss": 0.1896, "step": 39930 }, { "epoch": 3.234850939727803, "grad_norm": 0.0800856202840805, "learning_rate": 4.250866375624466e-05, "loss": 0.2091, "step": 39931 }, { "epoch": 3.234931950745301, "grad_norm": 0.07388359308242798, "learning_rate": 4.250416310365003e-05, "loss": 0.2464, "step": 39932 }, { "epoch": 3.2350129617628, "grad_norm": 0.07145456224679947, "learning_rate": 4.2499662451055404e-05, "loss": 0.2246, "step": 39933 }, { "epoch": 3.235093972780298, "grad_norm": 0.07021401077508926, "learning_rate": 4.2495161798460784e-05, "loss": 0.2272, "step": 39934 }, { "epoch": 3.2351749837977963, "grad_norm": 0.08557034283876419, "learning_rate": 4.249066114586615e-05, "loss": 0.2402, "step": 39935 }, { "epoch": 3.235255994815295, "grad_norm": 0.06644951552152634, "learning_rate": 4.2486160493271525e-05, "loss": 0.2338, "step": 39936 }, { "epoch": 3.2353370058327933, "grad_norm": 0.07197255641222, "learning_rate": 4.2481659840676905e-05, "loss": 0.2147, "step": 39937 }, { "epoch": 3.2354180168502915, "grad_norm": 0.06469669938087463, "learning_rate": 4.247715918808227e-05, "loss": 0.22, "step": 39938 }, { "epoch": 3.23549902786779, "grad_norm": 0.0772833302617073, "learning_rate": 4.2472658535487645e-05, "loss": 0.2134, "step": 39939 }, { "epoch": 3.2355800388852884, "grad_norm": 0.0739804282784462, "learning_rate": 4.2468157882893026e-05, "loss": 0.2352, "step": 39940 }, { "epoch": 3.2356610499027867, "grad_norm": 0.08174054324626923, "learning_rate": 4.246365723029839e-05, "loss": 0.2341, "step": 39941 }, { "epoch": 3.235742060920285, "grad_norm": 0.0759250670671463, "learning_rate": 4.2459156577703766e-05, "loss": 0.2449, "step": 39942 }, { "epoch": 3.2358230719377836, "grad_norm": 0.06863147765398026, "learning_rate": 4.2454655925109147e-05, "loss": 0.2184, "step": 39943 }, { "epoch": 3.235904082955282, "grad_norm": 0.06283069401979446, "learning_rate": 4.245015527251452e-05, "loss": 0.2093, "step": 39944 }, { "epoch": 3.23598509397278, "grad_norm": 0.07585699111223221, "learning_rate": 4.244565461991989e-05, "loss": 0.2897, "step": 39945 }, { "epoch": 3.236066104990279, "grad_norm": 0.06817737966775894, "learning_rate": 4.244115396732527e-05, "loss": 0.1989, "step": 39946 }, { "epoch": 3.236147116007777, "grad_norm": 0.07741937786340714, "learning_rate": 4.243665331473064e-05, "loss": 0.2499, "step": 39947 }, { "epoch": 3.2362281270252753, "grad_norm": 0.06947393715381622, "learning_rate": 4.243215266213601e-05, "loss": 0.1853, "step": 39948 }, { "epoch": 3.236309138042774, "grad_norm": 0.08097727596759796, "learning_rate": 4.242765200954139e-05, "loss": 0.247, "step": 39949 }, { "epoch": 3.2363901490602722, "grad_norm": 0.07119971513748169, "learning_rate": 4.242315135694676e-05, "loss": 0.2289, "step": 39950 }, { "epoch": 3.2364711600777705, "grad_norm": 0.07697370648384094, "learning_rate": 4.241865070435213e-05, "loss": 0.258, "step": 39951 }, { "epoch": 3.236552171095269, "grad_norm": 0.06008782610297203, "learning_rate": 4.241415005175751e-05, "loss": 0.1882, "step": 39952 }, { "epoch": 3.2366331821127674, "grad_norm": 0.08692114800214767, "learning_rate": 4.240964939916288e-05, "loss": 0.2321, "step": 39953 }, { "epoch": 3.2367141931302656, "grad_norm": 0.08523430675268173, "learning_rate": 4.240514874656825e-05, "loss": 0.2368, "step": 39954 }, { "epoch": 3.236795204147764, "grad_norm": 0.08042368292808533, "learning_rate": 4.240064809397363e-05, "loss": 0.23, "step": 39955 }, { "epoch": 3.2368762151652626, "grad_norm": 0.07272639870643616, "learning_rate": 4.2396147441379e-05, "loss": 0.2252, "step": 39956 }, { "epoch": 3.236957226182761, "grad_norm": 0.06889273226261139, "learning_rate": 4.239164678878438e-05, "loss": 0.2267, "step": 39957 }, { "epoch": 3.237038237200259, "grad_norm": 0.06718260794878006, "learning_rate": 4.238714613618975e-05, "loss": 0.2329, "step": 39958 }, { "epoch": 3.2371192482177578, "grad_norm": 0.08459927141666412, "learning_rate": 4.2382645483595124e-05, "loss": 0.2507, "step": 39959 }, { "epoch": 3.237200259235256, "grad_norm": 0.07423102855682373, "learning_rate": 4.23781448310005e-05, "loss": 0.2415, "step": 39960 }, { "epoch": 3.2372812702527543, "grad_norm": 0.06294968724250793, "learning_rate": 4.237364417840587e-05, "loss": 0.2341, "step": 39961 }, { "epoch": 3.237362281270253, "grad_norm": 0.07447552680969238, "learning_rate": 4.2369143525811245e-05, "loss": 0.1979, "step": 39962 }, { "epoch": 3.237443292287751, "grad_norm": 0.0652199238538742, "learning_rate": 4.236464287321662e-05, "loss": 0.2334, "step": 39963 }, { "epoch": 3.2375243033052494, "grad_norm": 0.07575726509094238, "learning_rate": 4.236014222062199e-05, "loss": 0.2712, "step": 39964 }, { "epoch": 3.2376053143227477, "grad_norm": 0.060297705233097076, "learning_rate": 4.2355641568027366e-05, "loss": 0.1856, "step": 39965 }, { "epoch": 3.2376863253402464, "grad_norm": 0.07966022193431854, "learning_rate": 4.235114091543274e-05, "loss": 0.2953, "step": 39966 }, { "epoch": 3.2377673363577446, "grad_norm": 0.07863543182611465, "learning_rate": 4.234664026283811e-05, "loss": 0.1878, "step": 39967 }, { "epoch": 3.237848347375243, "grad_norm": 0.06594900041818619, "learning_rate": 4.2342139610243487e-05, "loss": 0.1895, "step": 39968 }, { "epoch": 3.2379293583927415, "grad_norm": 0.08944254368543625, "learning_rate": 4.233763895764886e-05, "loss": 0.2714, "step": 39969 }, { "epoch": 3.23801036941024, "grad_norm": 0.07178863883018494, "learning_rate": 4.233313830505424e-05, "loss": 0.2505, "step": 39970 }, { "epoch": 3.238091380427738, "grad_norm": 0.0778353214263916, "learning_rate": 4.232863765245961e-05, "loss": 0.2397, "step": 39971 }, { "epoch": 3.2381723914452367, "grad_norm": 0.06244092062115669, "learning_rate": 4.232413699986498e-05, "loss": 0.2348, "step": 39972 }, { "epoch": 3.238253402462735, "grad_norm": 0.07463119179010391, "learning_rate": 4.231963634727036e-05, "loss": 0.2222, "step": 39973 }, { "epoch": 3.238334413480233, "grad_norm": 0.06593679636716843, "learning_rate": 4.231513569467573e-05, "loss": 0.2592, "step": 39974 }, { "epoch": 3.238415424497732, "grad_norm": 0.07185850292444229, "learning_rate": 4.23106350420811e-05, "loss": 0.2656, "step": 39975 }, { "epoch": 3.23849643551523, "grad_norm": 0.060626208782196045, "learning_rate": 4.230613438948648e-05, "loss": 0.1949, "step": 39976 }, { "epoch": 3.2385774465327284, "grad_norm": 0.07208986580371857, "learning_rate": 4.230163373689185e-05, "loss": 0.1869, "step": 39977 }, { "epoch": 3.2386584575502266, "grad_norm": 0.07708663493394852, "learning_rate": 4.229713308429722e-05, "loss": 0.2435, "step": 39978 }, { "epoch": 3.2387394685677253, "grad_norm": 0.08233334124088287, "learning_rate": 4.22926324317026e-05, "loss": 0.2294, "step": 39979 }, { "epoch": 3.2388204795852236, "grad_norm": 0.07808908075094223, "learning_rate": 4.228813177910797e-05, "loss": 0.2123, "step": 39980 }, { "epoch": 3.238901490602722, "grad_norm": 0.07508261501789093, "learning_rate": 4.2283631126513343e-05, "loss": 0.2504, "step": 39981 }, { "epoch": 3.2389825016202205, "grad_norm": 0.06409870833158493, "learning_rate": 4.2279130473918724e-05, "loss": 0.2001, "step": 39982 }, { "epoch": 3.2390635126377187, "grad_norm": 0.0930962786078453, "learning_rate": 4.22746298213241e-05, "loss": 0.2515, "step": 39983 }, { "epoch": 3.239144523655217, "grad_norm": 0.06400201469659805, "learning_rate": 4.2270129168729464e-05, "loss": 0.2159, "step": 39984 }, { "epoch": 3.2392255346727157, "grad_norm": 0.06780712306499481, "learning_rate": 4.2265628516134845e-05, "loss": 0.2247, "step": 39985 }, { "epoch": 3.239306545690214, "grad_norm": 0.07583682984113693, "learning_rate": 4.226112786354022e-05, "loss": 0.2277, "step": 39986 }, { "epoch": 3.239387556707712, "grad_norm": 0.06544255465269089, "learning_rate": 4.2256627210945585e-05, "loss": 0.236, "step": 39987 }, { "epoch": 3.2394685677252104, "grad_norm": 0.05646807700395584, "learning_rate": 4.2252126558350965e-05, "loss": 0.2204, "step": 39988 }, { "epoch": 3.239549578742709, "grad_norm": 0.08052621781826019, "learning_rate": 4.224762590575634e-05, "loss": 0.2119, "step": 39989 }, { "epoch": 3.2396305897602073, "grad_norm": 0.07334674149751663, "learning_rate": 4.2243125253161706e-05, "loss": 0.2209, "step": 39990 }, { "epoch": 3.2397116007777056, "grad_norm": 0.0667957216501236, "learning_rate": 4.2238624600567086e-05, "loss": 0.2564, "step": 39991 }, { "epoch": 3.2397926117952043, "grad_norm": 0.08592815697193146, "learning_rate": 4.223412394797246e-05, "loss": 0.2257, "step": 39992 }, { "epoch": 3.2398736228127025, "grad_norm": 0.06996352225542068, "learning_rate": 4.222962329537783e-05, "loss": 0.2608, "step": 39993 }, { "epoch": 3.2399546338302008, "grad_norm": 0.07029546052217484, "learning_rate": 4.222512264278321e-05, "loss": 0.2647, "step": 39994 }, { "epoch": 3.2400356448476995, "grad_norm": 0.07762210071086884, "learning_rate": 4.222062199018858e-05, "loss": 0.2515, "step": 39995 }, { "epoch": 3.2401166558651977, "grad_norm": 0.06713078916072845, "learning_rate": 4.2216121337593954e-05, "loss": 0.2236, "step": 39996 }, { "epoch": 3.240197666882696, "grad_norm": 0.058481615036726, "learning_rate": 4.221162068499933e-05, "loss": 0.2207, "step": 39997 }, { "epoch": 3.2402786779001946, "grad_norm": 0.06714651733636856, "learning_rate": 4.22071200324047e-05, "loss": 0.2161, "step": 39998 }, { "epoch": 3.240359688917693, "grad_norm": 0.07449248433113098, "learning_rate": 4.2202619379810075e-05, "loss": 0.2118, "step": 39999 }, { "epoch": 3.240440699935191, "grad_norm": 0.07122816145420074, "learning_rate": 4.219811872721545e-05, "loss": 0.2579, "step": 40000 }, { "epoch": 3.2405217109526894, "grad_norm": 0.08362056314945221, "learning_rate": 4.219361807462082e-05, "loss": 0.2342, "step": 40001 }, { "epoch": 3.240602721970188, "grad_norm": 0.06606101989746094, "learning_rate": 4.2189117422026196e-05, "loss": 0.2261, "step": 40002 }, { "epoch": 3.2406837329876863, "grad_norm": 0.06786011904478073, "learning_rate": 4.218461676943157e-05, "loss": 0.2026, "step": 40003 }, { "epoch": 3.2407647440051845, "grad_norm": 0.05847107991576195, "learning_rate": 4.218011611683694e-05, "loss": 0.2293, "step": 40004 }, { "epoch": 3.2408457550226832, "grad_norm": 0.05374923720955849, "learning_rate": 4.217561546424232e-05, "loss": 0.1857, "step": 40005 }, { "epoch": 3.2409267660401815, "grad_norm": 0.0643201693892479, "learning_rate": 4.217111481164769e-05, "loss": 0.2363, "step": 40006 }, { "epoch": 3.2410077770576797, "grad_norm": 0.08065766841173172, "learning_rate": 4.2166614159053064e-05, "loss": 0.3056, "step": 40007 }, { "epoch": 3.2410887880751784, "grad_norm": 0.07379017770290375, "learning_rate": 4.216211350645844e-05, "loss": 0.2356, "step": 40008 }, { "epoch": 3.2411697990926767, "grad_norm": 0.06779438257217407, "learning_rate": 4.215761285386382e-05, "loss": 0.2324, "step": 40009 }, { "epoch": 3.241250810110175, "grad_norm": 0.082339346408844, "learning_rate": 4.2153112201269185e-05, "loss": 0.2111, "step": 40010 }, { "epoch": 3.241331821127673, "grad_norm": 0.06292964518070221, "learning_rate": 4.214861154867456e-05, "loss": 0.2247, "step": 40011 }, { "epoch": 3.241412832145172, "grad_norm": 0.07478726655244827, "learning_rate": 4.214411089607994e-05, "loss": 0.1826, "step": 40012 }, { "epoch": 3.24149384316267, "grad_norm": 0.07911112159490585, "learning_rate": 4.2139610243485306e-05, "loss": 0.2407, "step": 40013 }, { "epoch": 3.2415748541801683, "grad_norm": 0.0716332197189331, "learning_rate": 4.213510959089068e-05, "loss": 0.2314, "step": 40014 }, { "epoch": 3.241655865197667, "grad_norm": 0.07222667336463928, "learning_rate": 4.213060893829606e-05, "loss": 0.2013, "step": 40015 }, { "epoch": 3.2417368762151653, "grad_norm": 0.07332997024059296, "learning_rate": 4.2126108285701426e-05, "loss": 0.2751, "step": 40016 }, { "epoch": 3.2418178872326635, "grad_norm": 0.05805470049381256, "learning_rate": 4.21216076331068e-05, "loss": 0.1887, "step": 40017 }, { "epoch": 3.241898898250162, "grad_norm": 0.07864882051944733, "learning_rate": 4.211710698051218e-05, "loss": 0.2469, "step": 40018 }, { "epoch": 3.2419799092676604, "grad_norm": 0.08058485388755798, "learning_rate": 4.211260632791755e-05, "loss": 0.2614, "step": 40019 }, { "epoch": 3.2420609202851587, "grad_norm": 0.09582924097776413, "learning_rate": 4.210810567532292e-05, "loss": 0.2512, "step": 40020 }, { "epoch": 3.2421419313026574, "grad_norm": 0.07741650193929672, "learning_rate": 4.21036050227283e-05, "loss": 0.2381, "step": 40021 }, { "epoch": 3.2422229423201556, "grad_norm": 0.07290291041135788, "learning_rate": 4.2099104370133675e-05, "loss": 0.2362, "step": 40022 }, { "epoch": 3.242303953337654, "grad_norm": 0.06707461178302765, "learning_rate": 4.209460371753904e-05, "loss": 0.2087, "step": 40023 }, { "epoch": 3.242384964355152, "grad_norm": 0.06291350722312927, "learning_rate": 4.209010306494442e-05, "loss": 0.2387, "step": 40024 }, { "epoch": 3.242465975372651, "grad_norm": 0.07624591886997223, "learning_rate": 4.2085602412349796e-05, "loss": 0.2241, "step": 40025 }, { "epoch": 3.242546986390149, "grad_norm": 0.07474566996097565, "learning_rate": 4.208110175975516e-05, "loss": 0.2405, "step": 40026 }, { "epoch": 3.2426279974076473, "grad_norm": 0.08334729075431824, "learning_rate": 4.207660110716054e-05, "loss": 0.2278, "step": 40027 }, { "epoch": 3.242709008425146, "grad_norm": 0.07016227394342422, "learning_rate": 4.2072100454565916e-05, "loss": 0.2651, "step": 40028 }, { "epoch": 3.2427900194426442, "grad_norm": 0.06835842877626419, "learning_rate": 4.206759980197128e-05, "loss": 0.2312, "step": 40029 }, { "epoch": 3.2428710304601425, "grad_norm": 0.07836335152387619, "learning_rate": 4.2063099149376664e-05, "loss": 0.2378, "step": 40030 }, { "epoch": 3.242952041477641, "grad_norm": 0.08585887402296066, "learning_rate": 4.205859849678204e-05, "loss": 0.2669, "step": 40031 }, { "epoch": 3.2430330524951394, "grad_norm": 0.06686677783727646, "learning_rate": 4.2054097844187404e-05, "loss": 0.2415, "step": 40032 }, { "epoch": 3.2431140635126376, "grad_norm": 0.07130207866430283, "learning_rate": 4.2049597191592784e-05, "loss": 0.2666, "step": 40033 }, { "epoch": 3.243195074530136, "grad_norm": 0.07913440465927124, "learning_rate": 4.204509653899816e-05, "loss": 0.2444, "step": 40034 }, { "epoch": 3.2432760855476346, "grad_norm": 0.06934109330177307, "learning_rate": 4.204059588640353e-05, "loss": 0.2451, "step": 40035 }, { "epoch": 3.243357096565133, "grad_norm": 0.07670455425977707, "learning_rate": 4.2036095233808905e-05, "loss": 0.2396, "step": 40036 }, { "epoch": 3.243438107582631, "grad_norm": 0.0646243691444397, "learning_rate": 4.203159458121428e-05, "loss": 0.1887, "step": 40037 }, { "epoch": 3.2435191186001298, "grad_norm": 0.0583641417324543, "learning_rate": 4.202709392861965e-05, "loss": 0.2231, "step": 40038 }, { "epoch": 3.243600129617628, "grad_norm": 0.0692855641245842, "learning_rate": 4.2022593276025026e-05, "loss": 0.2222, "step": 40039 }, { "epoch": 3.2436811406351262, "grad_norm": 0.08121485263109207, "learning_rate": 4.20180926234304e-05, "loss": 0.284, "step": 40040 }, { "epoch": 3.243762151652625, "grad_norm": 0.07695061713457108, "learning_rate": 4.201359197083577e-05, "loss": 0.2228, "step": 40041 }, { "epoch": 3.243843162670123, "grad_norm": 0.09310201555490494, "learning_rate": 4.200909131824115e-05, "loss": 0.2258, "step": 40042 }, { "epoch": 3.2439241736876214, "grad_norm": 0.07019844651222229, "learning_rate": 4.200459066564652e-05, "loss": 0.2314, "step": 40043 }, { "epoch": 3.24400518470512, "grad_norm": 0.08403699845075607, "learning_rate": 4.2000090013051894e-05, "loss": 0.2421, "step": 40044 }, { "epoch": 3.2440861957226184, "grad_norm": 0.06007116660475731, "learning_rate": 4.199558936045727e-05, "loss": 0.2404, "step": 40045 }, { "epoch": 3.2441672067401166, "grad_norm": 0.07675842195749283, "learning_rate": 4.199108870786264e-05, "loss": 0.2554, "step": 40046 }, { "epoch": 3.244248217757615, "grad_norm": 0.07528151571750641, "learning_rate": 4.1986588055268015e-05, "loss": 0.2055, "step": 40047 }, { "epoch": 3.2443292287751135, "grad_norm": 0.07164599746465683, "learning_rate": 4.1982087402673395e-05, "loss": 0.2471, "step": 40048 }, { "epoch": 3.244410239792612, "grad_norm": 0.08063235133886337, "learning_rate": 4.197758675007876e-05, "loss": 0.2196, "step": 40049 }, { "epoch": 3.24449125081011, "grad_norm": 0.06814529746770859, "learning_rate": 4.1973086097484136e-05, "loss": 0.2558, "step": 40050 }, { "epoch": 3.2445722618276087, "grad_norm": 0.0630267783999443, "learning_rate": 4.1968585444889516e-05, "loss": 0.1958, "step": 40051 }, { "epoch": 3.244653272845107, "grad_norm": 0.07586364448070526, "learning_rate": 4.196408479229488e-05, "loss": 0.2516, "step": 40052 }, { "epoch": 3.244734283862605, "grad_norm": 0.07543324679136276, "learning_rate": 4.1959584139700256e-05, "loss": 0.2222, "step": 40053 }, { "epoch": 3.244815294880104, "grad_norm": 0.07649768888950348, "learning_rate": 4.195508348710564e-05, "loss": 0.2484, "step": 40054 }, { "epoch": 3.244896305897602, "grad_norm": 0.06851378083229065, "learning_rate": 4.1950582834511004e-05, "loss": 0.2299, "step": 40055 }, { "epoch": 3.2449773169151004, "grad_norm": 0.07393316179513931, "learning_rate": 4.194608218191638e-05, "loss": 0.2314, "step": 40056 }, { "epoch": 3.2450583279325986, "grad_norm": 0.06716819852590561, "learning_rate": 4.194158152932176e-05, "loss": 0.2181, "step": 40057 }, { "epoch": 3.2451393389500973, "grad_norm": 0.07790635526180267, "learning_rate": 4.1937080876727124e-05, "loss": 0.2305, "step": 40058 }, { "epoch": 3.2452203499675956, "grad_norm": 0.07181359082460403, "learning_rate": 4.19325802241325e-05, "loss": 0.2227, "step": 40059 }, { "epoch": 3.245301360985094, "grad_norm": 0.08683653920888901, "learning_rate": 4.192807957153788e-05, "loss": 0.1908, "step": 40060 }, { "epoch": 3.2453823720025925, "grad_norm": 0.0615009181201458, "learning_rate": 4.192357891894325e-05, "loss": 0.2266, "step": 40061 }, { "epoch": 3.2454633830200907, "grad_norm": 0.08514515310525894, "learning_rate": 4.191907826634862e-05, "loss": 0.2502, "step": 40062 }, { "epoch": 3.245544394037589, "grad_norm": 0.0665687695145607, "learning_rate": 4.1914577613754e-05, "loss": 0.2217, "step": 40063 }, { "epoch": 3.2456254050550877, "grad_norm": 0.09101278334856033, "learning_rate": 4.191007696115937e-05, "loss": 0.2613, "step": 40064 }, { "epoch": 3.245706416072586, "grad_norm": 0.07187115401029587, "learning_rate": 4.190557630856474e-05, "loss": 0.2, "step": 40065 }, { "epoch": 3.245787427090084, "grad_norm": 0.07283537834882736, "learning_rate": 4.190107565597012e-05, "loss": 0.2134, "step": 40066 }, { "epoch": 3.245868438107583, "grad_norm": 0.06305906176567078, "learning_rate": 4.1896575003375494e-05, "loss": 0.2003, "step": 40067 }, { "epoch": 3.245949449125081, "grad_norm": 0.06886248290538788, "learning_rate": 4.189207435078086e-05, "loss": 0.2297, "step": 40068 }, { "epoch": 3.2460304601425793, "grad_norm": 0.06959490478038788, "learning_rate": 4.188757369818624e-05, "loss": 0.2007, "step": 40069 }, { "epoch": 3.2461114711600776, "grad_norm": 0.07028921693563461, "learning_rate": 4.1883073045591614e-05, "loss": 0.2287, "step": 40070 }, { "epoch": 3.2461924821775763, "grad_norm": 0.09249284863471985, "learning_rate": 4.187857239299698e-05, "loss": 0.2119, "step": 40071 }, { "epoch": 3.2462734931950745, "grad_norm": 0.07930509001016617, "learning_rate": 4.187407174040236e-05, "loss": 0.2291, "step": 40072 }, { "epoch": 3.2463545042125728, "grad_norm": 0.07560622692108154, "learning_rate": 4.1869571087807735e-05, "loss": 0.2331, "step": 40073 }, { "epoch": 3.2464355152300715, "grad_norm": 0.06883285194635391, "learning_rate": 4.186507043521311e-05, "loss": 0.2736, "step": 40074 }, { "epoch": 3.2465165262475697, "grad_norm": 0.05891996994614601, "learning_rate": 4.186056978261848e-05, "loss": 0.2471, "step": 40075 }, { "epoch": 3.246597537265068, "grad_norm": 0.06350000947713852, "learning_rate": 4.1856069130023856e-05, "loss": 0.2182, "step": 40076 }, { "epoch": 3.2466785482825666, "grad_norm": 0.08027451485395432, "learning_rate": 4.185156847742923e-05, "loss": 0.2259, "step": 40077 }, { "epoch": 3.246759559300065, "grad_norm": 0.0639888122677803, "learning_rate": 4.18470678248346e-05, "loss": 0.2068, "step": 40078 }, { "epoch": 3.246840570317563, "grad_norm": 0.07857982814311981, "learning_rate": 4.184256717223998e-05, "loss": 0.2484, "step": 40079 }, { "epoch": 3.2469215813350614, "grad_norm": 0.06648974120616913, "learning_rate": 4.183806651964535e-05, "loss": 0.2119, "step": 40080 }, { "epoch": 3.24700259235256, "grad_norm": 0.07797715067863464, "learning_rate": 4.1833565867050724e-05, "loss": 0.2033, "step": 40081 }, { "epoch": 3.2470836033700583, "grad_norm": 0.0580095537006855, "learning_rate": 4.18290652144561e-05, "loss": 0.1904, "step": 40082 }, { "epoch": 3.2471646143875565, "grad_norm": 0.06907695531845093, "learning_rate": 4.182456456186147e-05, "loss": 0.2335, "step": 40083 }, { "epoch": 3.2472456254050552, "grad_norm": 0.06992605328559875, "learning_rate": 4.1820063909266845e-05, "loss": 0.2249, "step": 40084 }, { "epoch": 3.2473266364225535, "grad_norm": 0.07529851049184799, "learning_rate": 4.181556325667222e-05, "loss": 0.2418, "step": 40085 }, { "epoch": 3.2474076474400517, "grad_norm": 0.05927077680826187, "learning_rate": 4.181106260407759e-05, "loss": 0.2169, "step": 40086 }, { "epoch": 3.2474886584575504, "grad_norm": 0.06652016192674637, "learning_rate": 4.1806561951482966e-05, "loss": 0.1933, "step": 40087 }, { "epoch": 3.2475696694750487, "grad_norm": 0.08537282794713974, "learning_rate": 4.180206129888834e-05, "loss": 0.229, "step": 40088 }, { "epoch": 3.247650680492547, "grad_norm": 0.0686645582318306, "learning_rate": 4.179756064629371e-05, "loss": 0.2335, "step": 40089 }, { "epoch": 3.2477316915100456, "grad_norm": 0.06994078308343887, "learning_rate": 4.179305999369909e-05, "loss": 0.2239, "step": 40090 }, { "epoch": 3.247812702527544, "grad_norm": 0.08000598102807999, "learning_rate": 4.178855934110446e-05, "loss": 0.273, "step": 40091 }, { "epoch": 3.247893713545042, "grad_norm": 0.06356451660394669, "learning_rate": 4.1784058688509834e-05, "loss": 0.2106, "step": 40092 }, { "epoch": 3.2479747245625403, "grad_norm": 0.07621801644563675, "learning_rate": 4.1779558035915214e-05, "loss": 0.2109, "step": 40093 }, { "epoch": 3.248055735580039, "grad_norm": 0.0670614168047905, "learning_rate": 4.177505738332058e-05, "loss": 0.2243, "step": 40094 }, { "epoch": 3.2481367465975373, "grad_norm": 0.06827739626169205, "learning_rate": 4.1770556730725955e-05, "loss": 0.2448, "step": 40095 }, { "epoch": 3.2482177576150355, "grad_norm": 0.0642535388469696, "learning_rate": 4.1766056078131335e-05, "loss": 0.2393, "step": 40096 }, { "epoch": 3.248298768632534, "grad_norm": 0.0656471997499466, "learning_rate": 4.17615554255367e-05, "loss": 0.2621, "step": 40097 }, { "epoch": 3.2483797796500324, "grad_norm": 0.06643085181713104, "learning_rate": 4.1757054772942075e-05, "loss": 0.2005, "step": 40098 }, { "epoch": 3.2484607906675307, "grad_norm": 0.06502383947372437, "learning_rate": 4.1752554120347456e-05, "loss": 0.2495, "step": 40099 }, { "epoch": 3.248541801685029, "grad_norm": 0.06458619236946106, "learning_rate": 4.174805346775282e-05, "loss": 0.2114, "step": 40100 }, { "epoch": 3.2486228127025276, "grad_norm": 0.06655988842248917, "learning_rate": 4.1743552815158196e-05, "loss": 0.229, "step": 40101 }, { "epoch": 3.248703823720026, "grad_norm": 0.06997954100370407, "learning_rate": 4.1739052162563577e-05, "loss": 0.239, "step": 40102 }, { "epoch": 3.248784834737524, "grad_norm": 0.06454320251941681, "learning_rate": 4.173455150996895e-05, "loss": 0.2321, "step": 40103 }, { "epoch": 3.248865845755023, "grad_norm": 0.07845567911863327, "learning_rate": 4.173005085737432e-05, "loss": 0.2179, "step": 40104 }, { "epoch": 3.248946856772521, "grad_norm": 0.07784346491098404, "learning_rate": 4.17255502047797e-05, "loss": 0.269, "step": 40105 }, { "epoch": 3.2490278677900193, "grad_norm": 0.06842927634716034, "learning_rate": 4.172104955218507e-05, "loss": 0.2269, "step": 40106 }, { "epoch": 3.249108878807518, "grad_norm": 0.0676320418715477, "learning_rate": 4.171654889959044e-05, "loss": 0.2394, "step": 40107 }, { "epoch": 3.249189889825016, "grad_norm": 0.08034982532262802, "learning_rate": 4.171204824699582e-05, "loss": 0.2489, "step": 40108 }, { "epoch": 3.2492709008425145, "grad_norm": 0.07106079906225204, "learning_rate": 4.170754759440119e-05, "loss": 0.2396, "step": 40109 }, { "epoch": 3.249351911860013, "grad_norm": 0.07400369644165039, "learning_rate": 4.170304694180656e-05, "loss": 0.2471, "step": 40110 }, { "epoch": 3.2494329228775114, "grad_norm": 0.07235600054264069, "learning_rate": 4.169854628921194e-05, "loss": 0.2298, "step": 40111 }, { "epoch": 3.2495139338950096, "grad_norm": 0.06874226033687592, "learning_rate": 4.169404563661731e-05, "loss": 0.2381, "step": 40112 }, { "epoch": 3.2495949449125083, "grad_norm": 0.07894796133041382, "learning_rate": 4.168954498402268e-05, "loss": 0.2341, "step": 40113 }, { "epoch": 3.2496759559300066, "grad_norm": 0.07245932519435883, "learning_rate": 4.168504433142806e-05, "loss": 0.2202, "step": 40114 }, { "epoch": 3.249756966947505, "grad_norm": 0.06470391154289246, "learning_rate": 4.168054367883343e-05, "loss": 0.2085, "step": 40115 }, { "epoch": 3.249837977965003, "grad_norm": 0.06465082615613937, "learning_rate": 4.167604302623881e-05, "loss": 0.2549, "step": 40116 }, { "epoch": 3.2499189889825018, "grad_norm": 0.06674449145793915, "learning_rate": 4.167154237364418e-05, "loss": 0.2069, "step": 40117 }, { "epoch": 3.25, "grad_norm": 0.07038500905036926, "learning_rate": 4.1667041721049554e-05, "loss": 0.2391, "step": 40118 }, { "epoch": 3.2500810110174982, "grad_norm": 0.07672902941703796, "learning_rate": 4.166254106845493e-05, "loss": 0.225, "step": 40119 }, { "epoch": 3.250162022034997, "grad_norm": 0.07818491011857986, "learning_rate": 4.16580404158603e-05, "loss": 0.2197, "step": 40120 }, { "epoch": 3.250243033052495, "grad_norm": 0.07456718385219574, "learning_rate": 4.1653539763265675e-05, "loss": 0.2212, "step": 40121 }, { "epoch": 3.2503240440699934, "grad_norm": 0.07713481038808823, "learning_rate": 4.164903911067105e-05, "loss": 0.2228, "step": 40122 }, { "epoch": 3.2504050550874917, "grad_norm": 0.06257275491952896, "learning_rate": 4.164453845807642e-05, "loss": 0.2329, "step": 40123 }, { "epoch": 3.2504860661049904, "grad_norm": 0.08117084950208664, "learning_rate": 4.1640037805481796e-05, "loss": 0.2085, "step": 40124 }, { "epoch": 3.2505670771224886, "grad_norm": 0.06761786341667175, "learning_rate": 4.163553715288717e-05, "loss": 0.2545, "step": 40125 }, { "epoch": 3.250648088139987, "grad_norm": 0.08201505988836288, "learning_rate": 4.163103650029254e-05, "loss": 0.2813, "step": 40126 }, { "epoch": 3.2507290991574855, "grad_norm": 0.07313979417085648, "learning_rate": 4.162653584769792e-05, "loss": 0.2312, "step": 40127 }, { "epoch": 3.250810110174984, "grad_norm": 0.0680297464132309, "learning_rate": 4.162203519510329e-05, "loss": 0.238, "step": 40128 }, { "epoch": 3.250891121192482, "grad_norm": 0.07706769555807114, "learning_rate": 4.161753454250867e-05, "loss": 0.2279, "step": 40129 }, { "epoch": 3.2509721322099807, "grad_norm": 0.06838444620370865, "learning_rate": 4.161303388991404e-05, "loss": 0.2087, "step": 40130 }, { "epoch": 3.251053143227479, "grad_norm": 0.08436397463083267, "learning_rate": 4.160853323731941e-05, "loss": 0.2532, "step": 40131 }, { "epoch": 3.251134154244977, "grad_norm": 0.05488111823797226, "learning_rate": 4.160403258472479e-05, "loss": 0.2381, "step": 40132 }, { "epoch": 3.251215165262476, "grad_norm": 0.07039676606655121, "learning_rate": 4.159953193213016e-05, "loss": 0.2248, "step": 40133 }, { "epoch": 3.251296176279974, "grad_norm": 0.0652816966176033, "learning_rate": 4.159503127953553e-05, "loss": 0.2165, "step": 40134 }, { "epoch": 3.2513771872974724, "grad_norm": 0.07050791382789612, "learning_rate": 4.159053062694091e-05, "loss": 0.235, "step": 40135 }, { "epoch": 3.251458198314971, "grad_norm": 0.07297641783952713, "learning_rate": 4.158602997434628e-05, "loss": 0.2507, "step": 40136 }, { "epoch": 3.2515392093324693, "grad_norm": 0.0634954422712326, "learning_rate": 4.158152932175165e-05, "loss": 0.2073, "step": 40137 }, { "epoch": 3.2516202203499676, "grad_norm": 0.07681002467870712, "learning_rate": 4.157702866915703e-05, "loss": 0.2564, "step": 40138 }, { "epoch": 3.251701231367466, "grad_norm": 0.08574255555868149, "learning_rate": 4.15725280165624e-05, "loss": 0.271, "step": 40139 }, { "epoch": 3.2517822423849645, "grad_norm": 0.06906864047050476, "learning_rate": 4.1568027363967773e-05, "loss": 0.1695, "step": 40140 }, { "epoch": 3.2518632534024627, "grad_norm": 0.07001274824142456, "learning_rate": 4.1563526711373154e-05, "loss": 0.2208, "step": 40141 }, { "epoch": 3.251944264419961, "grad_norm": 0.07010383158922195, "learning_rate": 4.155902605877853e-05, "loss": 0.2613, "step": 40142 }, { "epoch": 3.2520252754374597, "grad_norm": 0.07222133874893188, "learning_rate": 4.1554525406183894e-05, "loss": 0.2113, "step": 40143 }, { "epoch": 3.252106286454958, "grad_norm": 0.08615390211343765, "learning_rate": 4.1550024753589275e-05, "loss": 0.2201, "step": 40144 }, { "epoch": 3.252187297472456, "grad_norm": 0.06933669000864029, "learning_rate": 4.154552410099465e-05, "loss": 0.2144, "step": 40145 }, { "epoch": 3.2522683084899544, "grad_norm": 0.07412154227495193, "learning_rate": 4.1541023448400015e-05, "loss": 0.2293, "step": 40146 }, { "epoch": 3.252349319507453, "grad_norm": 0.05812745541334152, "learning_rate": 4.1536522795805395e-05, "loss": 0.1891, "step": 40147 }, { "epoch": 3.2524303305249513, "grad_norm": 0.08077571541070938, "learning_rate": 4.153202214321077e-05, "loss": 0.2291, "step": 40148 }, { "epoch": 3.2525113415424496, "grad_norm": 0.07784144580364227, "learning_rate": 4.1527521490616136e-05, "loss": 0.2176, "step": 40149 }, { "epoch": 3.2525923525599483, "grad_norm": 0.06963331252336502, "learning_rate": 4.1523020838021516e-05, "loss": 0.2368, "step": 40150 }, { "epoch": 3.2526733635774465, "grad_norm": 0.07795493304729462, "learning_rate": 4.151852018542689e-05, "loss": 0.2419, "step": 40151 }, { "epoch": 3.2527543745949448, "grad_norm": 0.05834238603711128, "learning_rate": 4.151401953283226e-05, "loss": 0.1873, "step": 40152 }, { "epoch": 3.2528353856124435, "grad_norm": 0.06364904344081879, "learning_rate": 4.150951888023764e-05, "loss": 0.2099, "step": 40153 }, { "epoch": 3.2529163966299417, "grad_norm": 0.08391781896352768, "learning_rate": 4.150501822764301e-05, "loss": 0.2647, "step": 40154 }, { "epoch": 3.25299740764744, "grad_norm": 0.08501686900854111, "learning_rate": 4.1500517575048384e-05, "loss": 0.2522, "step": 40155 }, { "epoch": 3.2530784186649386, "grad_norm": 0.06751678884029388, "learning_rate": 4.149601692245376e-05, "loss": 0.2084, "step": 40156 }, { "epoch": 3.253159429682437, "grad_norm": 0.07635574042797089, "learning_rate": 4.149151626985913e-05, "loss": 0.2444, "step": 40157 }, { "epoch": 3.253240440699935, "grad_norm": 0.06685523688793182, "learning_rate": 4.1487015617264505e-05, "loss": 0.1723, "step": 40158 }, { "epoch": 3.253321451717434, "grad_norm": 0.06515222042798996, "learning_rate": 4.148251496466988e-05, "loss": 0.2056, "step": 40159 }, { "epoch": 3.253402462734932, "grad_norm": 0.06930624693632126, "learning_rate": 4.147801431207525e-05, "loss": 0.2188, "step": 40160 }, { "epoch": 3.2534834737524303, "grad_norm": 0.0722808688879013, "learning_rate": 4.1473513659480626e-05, "loss": 0.2177, "step": 40161 }, { "epoch": 3.2535644847699285, "grad_norm": 0.07483646273612976, "learning_rate": 4.1469013006886e-05, "loss": 0.258, "step": 40162 }, { "epoch": 3.2536454957874272, "grad_norm": 0.07711175084114075, "learning_rate": 4.146451235429137e-05, "loss": 0.1879, "step": 40163 }, { "epoch": 3.2537265068049255, "grad_norm": 0.08365480601787567, "learning_rate": 4.146001170169675e-05, "loss": 0.2328, "step": 40164 }, { "epoch": 3.2538075178224237, "grad_norm": 0.07842250913381577, "learning_rate": 4.145551104910212e-05, "loss": 0.2462, "step": 40165 }, { "epoch": 3.2538885288399224, "grad_norm": 0.07055499404668808, "learning_rate": 4.1451010396507494e-05, "loss": 0.2611, "step": 40166 }, { "epoch": 3.2539695398574207, "grad_norm": 0.08839535713195801, "learning_rate": 4.144650974391287e-05, "loss": 0.2337, "step": 40167 }, { "epoch": 3.254050550874919, "grad_norm": 0.06849615275859833, "learning_rate": 4.144200909131825e-05, "loss": 0.2392, "step": 40168 }, { "epoch": 3.254131561892417, "grad_norm": 0.0740901306271553, "learning_rate": 4.1437508438723615e-05, "loss": 0.2294, "step": 40169 }, { "epoch": 3.254212572909916, "grad_norm": 0.07550173997879028, "learning_rate": 4.143300778612899e-05, "loss": 0.2075, "step": 40170 }, { "epoch": 3.254293583927414, "grad_norm": 0.06401327252388, "learning_rate": 4.142850713353437e-05, "loss": 0.2083, "step": 40171 }, { "epoch": 3.2543745949449123, "grad_norm": 0.07523950189352036, "learning_rate": 4.1424006480939736e-05, "loss": 0.2428, "step": 40172 }, { "epoch": 3.254455605962411, "grad_norm": 0.06384017318487167, "learning_rate": 4.141950582834511e-05, "loss": 0.2369, "step": 40173 }, { "epoch": 3.2545366169799093, "grad_norm": 0.06851238757371902, "learning_rate": 4.141500517575049e-05, "loss": 0.2115, "step": 40174 }, { "epoch": 3.2546176279974075, "grad_norm": 0.07990797609090805, "learning_rate": 4.1410504523155856e-05, "loss": 0.22, "step": 40175 }, { "epoch": 3.254698639014906, "grad_norm": 0.05939148738980293, "learning_rate": 4.140600387056123e-05, "loss": 0.212, "step": 40176 }, { "epoch": 3.2547796500324044, "grad_norm": 0.0688575804233551, "learning_rate": 4.140150321796661e-05, "loss": 0.2121, "step": 40177 }, { "epoch": 3.2548606610499027, "grad_norm": 0.07078464329242706, "learning_rate": 4.139700256537198e-05, "loss": 0.1916, "step": 40178 }, { "epoch": 3.2549416720674014, "grad_norm": 0.07321570813655853, "learning_rate": 4.139250191277735e-05, "loss": 0.2323, "step": 40179 }, { "epoch": 3.2550226830848996, "grad_norm": 0.07249194383621216, "learning_rate": 4.138800126018273e-05, "loss": 0.2252, "step": 40180 }, { "epoch": 3.255103694102398, "grad_norm": 0.08079152554273605, "learning_rate": 4.1383500607588105e-05, "loss": 0.226, "step": 40181 }, { "epoch": 3.2551847051198965, "grad_norm": 0.0795033872127533, "learning_rate": 4.137899995499347e-05, "loss": 0.2598, "step": 40182 }, { "epoch": 3.255265716137395, "grad_norm": 0.06471917778253555, "learning_rate": 4.137449930239885e-05, "loss": 0.2138, "step": 40183 }, { "epoch": 3.255346727154893, "grad_norm": 0.07006263732910156, "learning_rate": 4.1369998649804226e-05, "loss": 0.2195, "step": 40184 }, { "epoch": 3.2554277381723913, "grad_norm": 0.06144615635275841, "learning_rate": 4.136549799720959e-05, "loss": 0.2555, "step": 40185 }, { "epoch": 3.25550874918989, "grad_norm": 0.0662737786769867, "learning_rate": 4.136099734461497e-05, "loss": 0.1939, "step": 40186 }, { "epoch": 3.255589760207388, "grad_norm": 0.06354191899299622, "learning_rate": 4.1356496692020346e-05, "loss": 0.2214, "step": 40187 }, { "epoch": 3.2556707712248865, "grad_norm": 0.08236868679523468, "learning_rate": 4.135199603942572e-05, "loss": 0.2174, "step": 40188 }, { "epoch": 3.255751782242385, "grad_norm": 0.09432529658079147, "learning_rate": 4.1347495386831094e-05, "loss": 0.2061, "step": 40189 }, { "epoch": 3.2558327932598834, "grad_norm": 0.06357807666063309, "learning_rate": 4.134299473423647e-05, "loss": 0.1987, "step": 40190 }, { "epoch": 3.2559138042773816, "grad_norm": 0.06725596636533737, "learning_rate": 4.133849408164184e-05, "loss": 0.2093, "step": 40191 }, { "epoch": 3.25599481529488, "grad_norm": 0.06739084422588348, "learning_rate": 4.1333993429047214e-05, "loss": 0.2309, "step": 40192 }, { "epoch": 3.2560758263123786, "grad_norm": 0.07015515863895416, "learning_rate": 4.132949277645259e-05, "loss": 0.2096, "step": 40193 }, { "epoch": 3.256156837329877, "grad_norm": 0.06270213425159454, "learning_rate": 4.132499212385796e-05, "loss": 0.2008, "step": 40194 }, { "epoch": 3.256237848347375, "grad_norm": 0.08969100564718246, "learning_rate": 4.1320491471263335e-05, "loss": 0.2107, "step": 40195 }, { "epoch": 3.2563188593648738, "grad_norm": 0.07705602049827576, "learning_rate": 4.131599081866871e-05, "loss": 0.1887, "step": 40196 }, { "epoch": 3.256399870382372, "grad_norm": 0.08064062893390656, "learning_rate": 4.131149016607408e-05, "loss": 0.2065, "step": 40197 }, { "epoch": 3.2564808813998702, "grad_norm": 0.06924363970756531, "learning_rate": 4.1306989513479456e-05, "loss": 0.2621, "step": 40198 }, { "epoch": 3.256561892417369, "grad_norm": 0.06432975083589554, "learning_rate": 4.130248886088483e-05, "loss": 0.229, "step": 40199 }, { "epoch": 3.256642903434867, "grad_norm": 0.0756380558013916, "learning_rate": 4.12979882082902e-05, "loss": 0.2193, "step": 40200 }, { "epoch": 3.2567239144523654, "grad_norm": 0.06310634315013885, "learning_rate": 4.129348755569558e-05, "loss": 0.2315, "step": 40201 }, { "epoch": 3.256804925469864, "grad_norm": 0.0665639266371727, "learning_rate": 4.128898690310095e-05, "loss": 0.2299, "step": 40202 }, { "epoch": 3.2568859364873624, "grad_norm": 0.06902968138456345, "learning_rate": 4.1284486250506324e-05, "loss": 0.2383, "step": 40203 }, { "epoch": 3.2569669475048606, "grad_norm": 0.0826072245836258, "learning_rate": 4.12799855979117e-05, "loss": 0.2294, "step": 40204 }, { "epoch": 3.2570479585223593, "grad_norm": 0.06567224115133286, "learning_rate": 4.127548494531707e-05, "loss": 0.2133, "step": 40205 }, { "epoch": 3.2571289695398575, "grad_norm": 0.0768572986125946, "learning_rate": 4.1270984292722445e-05, "loss": 0.2166, "step": 40206 }, { "epoch": 3.2572099805573558, "grad_norm": 0.06855472177267075, "learning_rate": 4.1266483640127825e-05, "loss": 0.2298, "step": 40207 }, { "epoch": 3.257290991574854, "grad_norm": 0.06466935575008392, "learning_rate": 4.126198298753319e-05, "loss": 0.2315, "step": 40208 }, { "epoch": 3.2573720025923527, "grad_norm": 0.06445963680744171, "learning_rate": 4.1257482334938566e-05, "loss": 0.2114, "step": 40209 }, { "epoch": 3.257453013609851, "grad_norm": 0.06506424397230148, "learning_rate": 4.1252981682343946e-05, "loss": 0.2329, "step": 40210 }, { "epoch": 3.257534024627349, "grad_norm": 0.06592871993780136, "learning_rate": 4.124848102974931e-05, "loss": 0.2447, "step": 40211 }, { "epoch": 3.257615035644848, "grad_norm": 0.05974597483873367, "learning_rate": 4.1243980377154686e-05, "loss": 0.2061, "step": 40212 }, { "epoch": 3.257696046662346, "grad_norm": 0.07356038689613342, "learning_rate": 4.123947972456007e-05, "loss": 0.2689, "step": 40213 }, { "epoch": 3.2577770576798444, "grad_norm": 0.06664173305034637, "learning_rate": 4.1234979071965434e-05, "loss": 0.2183, "step": 40214 }, { "epoch": 3.2578580686973426, "grad_norm": 0.0713634043931961, "learning_rate": 4.123047841937081e-05, "loss": 0.2332, "step": 40215 }, { "epoch": 3.2579390797148413, "grad_norm": 0.07688642293214798, "learning_rate": 4.122597776677619e-05, "loss": 0.2101, "step": 40216 }, { "epoch": 3.2580200907323396, "grad_norm": 0.06881708651781082, "learning_rate": 4.1221477114181554e-05, "loss": 0.2483, "step": 40217 }, { "epoch": 3.258101101749838, "grad_norm": 0.07010731101036072, "learning_rate": 4.121697646158693e-05, "loss": 0.2214, "step": 40218 }, { "epoch": 3.2581821127673365, "grad_norm": 0.06224513053894043, "learning_rate": 4.121247580899231e-05, "loss": 0.1854, "step": 40219 }, { "epoch": 3.2582631237848347, "grad_norm": 0.08643602579832077, "learning_rate": 4.120797515639768e-05, "loss": 0.2819, "step": 40220 }, { "epoch": 3.258344134802333, "grad_norm": 0.08633658289909363, "learning_rate": 4.1203474503803056e-05, "loss": 0.2573, "step": 40221 }, { "epoch": 3.2584251458198317, "grad_norm": 0.06527513265609741, "learning_rate": 4.119897385120843e-05, "loss": 0.2206, "step": 40222 }, { "epoch": 3.25850615683733, "grad_norm": 0.08194277435541153, "learning_rate": 4.11944731986138e-05, "loss": 0.2267, "step": 40223 }, { "epoch": 3.258587167854828, "grad_norm": 0.07805872708559036, "learning_rate": 4.1189972546019176e-05, "loss": 0.2558, "step": 40224 }, { "epoch": 3.258668178872327, "grad_norm": 0.07843924313783646, "learning_rate": 4.118547189342455e-05, "loss": 0.209, "step": 40225 }, { "epoch": 3.258749189889825, "grad_norm": 0.0763542577624321, "learning_rate": 4.1180971240829924e-05, "loss": 0.218, "step": 40226 }, { "epoch": 3.2588302009073233, "grad_norm": 0.07482190430164337, "learning_rate": 4.11764705882353e-05, "loss": 0.24, "step": 40227 }, { "epoch": 3.2589112119248216, "grad_norm": 0.08386291563510895, "learning_rate": 4.117196993564067e-05, "loss": 0.2434, "step": 40228 }, { "epoch": 3.2589922229423203, "grad_norm": 0.08602840453386307, "learning_rate": 4.1167469283046044e-05, "loss": 0.2439, "step": 40229 }, { "epoch": 3.2590732339598185, "grad_norm": 0.08088722825050354, "learning_rate": 4.116296863045142e-05, "loss": 0.2181, "step": 40230 }, { "epoch": 3.2591542449773168, "grad_norm": 0.057865746319293976, "learning_rate": 4.115846797785679e-05, "loss": 0.1942, "step": 40231 }, { "epoch": 3.2592352559948155, "grad_norm": 0.06597369909286499, "learning_rate": 4.1153967325262165e-05, "loss": 0.225, "step": 40232 }, { "epoch": 3.2593162670123137, "grad_norm": 0.0643196552991867, "learning_rate": 4.114946667266754e-05, "loss": 0.2137, "step": 40233 }, { "epoch": 3.259397278029812, "grad_norm": 0.06918679922819138, "learning_rate": 4.114496602007291e-05, "loss": 0.3121, "step": 40234 }, { "epoch": 3.2594782890473106, "grad_norm": 0.07351045310497284, "learning_rate": 4.1140465367478286e-05, "loss": 0.2281, "step": 40235 }, { "epoch": 3.259559300064809, "grad_norm": 0.07151784747838974, "learning_rate": 4.113596471488366e-05, "loss": 0.2064, "step": 40236 }, { "epoch": 3.259640311082307, "grad_norm": 0.06628546118736267, "learning_rate": 4.113146406228903e-05, "loss": 0.2263, "step": 40237 }, { "epoch": 3.2597213220998054, "grad_norm": 0.06502491235733032, "learning_rate": 4.112696340969441e-05, "loss": 0.2323, "step": 40238 }, { "epoch": 3.259802333117304, "grad_norm": 0.06255894899368286, "learning_rate": 4.112246275709978e-05, "loss": 0.1968, "step": 40239 }, { "epoch": 3.2598833441348023, "grad_norm": 0.09315559267997742, "learning_rate": 4.1117962104505154e-05, "loss": 0.2387, "step": 40240 }, { "epoch": 3.2599643551523005, "grad_norm": 0.07603956013917923, "learning_rate": 4.111346145191053e-05, "loss": 0.2689, "step": 40241 }, { "epoch": 3.2600453661697992, "grad_norm": 0.07923232764005661, "learning_rate": 4.11089607993159e-05, "loss": 0.2161, "step": 40242 }, { "epoch": 3.2601263771872975, "grad_norm": 0.07558530569076538, "learning_rate": 4.1104460146721275e-05, "loss": 0.2411, "step": 40243 }, { "epoch": 3.2602073882047957, "grad_norm": 0.0685918852686882, "learning_rate": 4.109995949412665e-05, "loss": 0.2282, "step": 40244 }, { "epoch": 3.2602883992222944, "grad_norm": 0.06940195709466934, "learning_rate": 4.109545884153202e-05, "loss": 0.2205, "step": 40245 }, { "epoch": 3.2603694102397927, "grad_norm": 0.050486329942941666, "learning_rate": 4.1090958188937396e-05, "loss": 0.1769, "step": 40246 }, { "epoch": 3.260450421257291, "grad_norm": 0.06900981813669205, "learning_rate": 4.108645753634277e-05, "loss": 0.1998, "step": 40247 }, { "epoch": 3.2605314322747896, "grad_norm": 0.07367018610239029, "learning_rate": 4.108195688374814e-05, "loss": 0.2257, "step": 40248 }, { "epoch": 3.260612443292288, "grad_norm": 0.07741806656122208, "learning_rate": 4.107745623115352e-05, "loss": 0.2097, "step": 40249 }, { "epoch": 3.260693454309786, "grad_norm": 0.0637543573975563, "learning_rate": 4.107295557855889e-05, "loss": 0.2302, "step": 40250 }, { "epoch": 3.2607744653272843, "grad_norm": 0.08200757205486298, "learning_rate": 4.1068454925964264e-05, "loss": 0.2664, "step": 40251 }, { "epoch": 3.260855476344783, "grad_norm": 0.07200410962104797, "learning_rate": 4.1063954273369644e-05, "loss": 0.1957, "step": 40252 }, { "epoch": 3.2609364873622813, "grad_norm": 0.08035401999950409, "learning_rate": 4.105945362077501e-05, "loss": 0.2495, "step": 40253 }, { "epoch": 3.2610174983797795, "grad_norm": 0.07450471818447113, "learning_rate": 4.1054952968180385e-05, "loss": 0.245, "step": 40254 }, { "epoch": 3.261098509397278, "grad_norm": 0.07003799080848694, "learning_rate": 4.1050452315585765e-05, "loss": 0.24, "step": 40255 }, { "epoch": 3.2611795204147764, "grad_norm": 0.06997160613536835, "learning_rate": 4.104595166299113e-05, "loss": 0.249, "step": 40256 }, { "epoch": 3.2612605314322747, "grad_norm": 0.062063608318567276, "learning_rate": 4.104145101039651e-05, "loss": 0.2189, "step": 40257 }, { "epoch": 3.261341542449773, "grad_norm": 0.08056069165468216, "learning_rate": 4.1036950357801886e-05, "loss": 0.25, "step": 40258 }, { "epoch": 3.2614225534672716, "grad_norm": 0.06770657747983932, "learning_rate": 4.103244970520725e-05, "loss": 0.2254, "step": 40259 }, { "epoch": 3.26150356448477, "grad_norm": 0.06321107596158981, "learning_rate": 4.102794905261263e-05, "loss": 0.2001, "step": 40260 }, { "epoch": 3.261584575502268, "grad_norm": 0.0895034447312355, "learning_rate": 4.1023448400018007e-05, "loss": 0.2436, "step": 40261 }, { "epoch": 3.261665586519767, "grad_norm": 0.06674846261739731, "learning_rate": 4.101894774742338e-05, "loss": 0.2137, "step": 40262 }, { "epoch": 3.261746597537265, "grad_norm": 0.06829430162906647, "learning_rate": 4.1014447094828754e-05, "loss": 0.2496, "step": 40263 }, { "epoch": 3.2618276085547633, "grad_norm": 0.07296734303236008, "learning_rate": 4.100994644223413e-05, "loss": 0.207, "step": 40264 }, { "epoch": 3.261908619572262, "grad_norm": 0.09419719874858856, "learning_rate": 4.10054457896395e-05, "loss": 0.2538, "step": 40265 }, { "epoch": 3.26198963058976, "grad_norm": 0.06881719082593918, "learning_rate": 4.1000945137044875e-05, "loss": 0.2064, "step": 40266 }, { "epoch": 3.2620706416072585, "grad_norm": 0.0725458487868309, "learning_rate": 4.099644448445025e-05, "loss": 0.2233, "step": 40267 }, { "epoch": 3.262151652624757, "grad_norm": 0.06781957298517227, "learning_rate": 4.099194383185562e-05, "loss": 0.2181, "step": 40268 }, { "epoch": 3.2622326636422554, "grad_norm": 0.070824533700943, "learning_rate": 4.0987443179260995e-05, "loss": 0.2251, "step": 40269 }, { "epoch": 3.2623136746597536, "grad_norm": 0.06839597970247269, "learning_rate": 4.098294252666637e-05, "loss": 0.2105, "step": 40270 }, { "epoch": 3.2623946856772523, "grad_norm": 0.05092064291238785, "learning_rate": 4.097844187407174e-05, "loss": 0.1938, "step": 40271 }, { "epoch": 3.2624756966947506, "grad_norm": 0.08955680578947067, "learning_rate": 4.0973941221477116e-05, "loss": 0.2246, "step": 40272 }, { "epoch": 3.262556707712249, "grad_norm": 0.0841311514377594, "learning_rate": 4.096944056888249e-05, "loss": 0.2397, "step": 40273 }, { "epoch": 3.262637718729747, "grad_norm": 0.06255053728818893, "learning_rate": 4.0964939916287863e-05, "loss": 0.2135, "step": 40274 }, { "epoch": 3.2627187297472457, "grad_norm": 0.06263235956430435, "learning_rate": 4.096043926369324e-05, "loss": 0.2218, "step": 40275 }, { "epoch": 3.262799740764744, "grad_norm": 0.0728955864906311, "learning_rate": 4.095593861109861e-05, "loss": 0.2313, "step": 40276 }, { "epoch": 3.2628807517822422, "grad_norm": 0.06179650127887726, "learning_rate": 4.0951437958503984e-05, "loss": 0.218, "step": 40277 }, { "epoch": 3.262961762799741, "grad_norm": 0.06603959947824478, "learning_rate": 4.094693730590936e-05, "loss": 0.2428, "step": 40278 }, { "epoch": 3.263042773817239, "grad_norm": 0.0733291283249855, "learning_rate": 4.094243665331473e-05, "loss": 0.2307, "step": 40279 }, { "epoch": 3.2631237848347374, "grad_norm": 0.0856151282787323, "learning_rate": 4.0937936000720105e-05, "loss": 0.254, "step": 40280 }, { "epoch": 3.2632047958522357, "grad_norm": 0.06327536702156067, "learning_rate": 4.093343534812548e-05, "loss": 0.1955, "step": 40281 }, { "epoch": 3.2632858068697344, "grad_norm": 0.07989270985126495, "learning_rate": 4.092893469553085e-05, "loss": 0.2325, "step": 40282 }, { "epoch": 3.2633668178872326, "grad_norm": 0.0660238191485405, "learning_rate": 4.0924434042936226e-05, "loss": 0.2338, "step": 40283 }, { "epoch": 3.263447828904731, "grad_norm": 0.06523341685533524, "learning_rate": 4.09199333903416e-05, "loss": 0.2042, "step": 40284 }, { "epoch": 3.2635288399222295, "grad_norm": 0.07220982015132904, "learning_rate": 4.091543273774697e-05, "loss": 0.2523, "step": 40285 }, { "epoch": 3.2636098509397278, "grad_norm": 0.0707927718758583, "learning_rate": 4.091093208515235e-05, "loss": 0.2179, "step": 40286 }, { "epoch": 3.263690861957226, "grad_norm": 0.06387905031442642, "learning_rate": 4.090643143255772e-05, "loss": 0.2315, "step": 40287 }, { "epoch": 3.2637718729747247, "grad_norm": 0.07509984076023102, "learning_rate": 4.09019307799631e-05, "loss": 0.2146, "step": 40288 }, { "epoch": 3.263852883992223, "grad_norm": 0.07076673209667206, "learning_rate": 4.089743012736847e-05, "loss": 0.2459, "step": 40289 }, { "epoch": 3.263933895009721, "grad_norm": 0.0829244926571846, "learning_rate": 4.089292947477385e-05, "loss": 0.2001, "step": 40290 }, { "epoch": 3.26401490602722, "grad_norm": 0.08614283800125122, "learning_rate": 4.088842882217922e-05, "loss": 0.2058, "step": 40291 }, { "epoch": 3.264095917044718, "grad_norm": 0.06972739100456238, "learning_rate": 4.088392816958459e-05, "loss": 0.2176, "step": 40292 }, { "epoch": 3.2641769280622164, "grad_norm": 0.07807248830795288, "learning_rate": 4.087942751698997e-05, "loss": 0.2018, "step": 40293 }, { "epoch": 3.264257939079715, "grad_norm": 0.07096074521541595, "learning_rate": 4.087492686439534e-05, "loss": 0.22, "step": 40294 }, { "epoch": 3.2643389500972133, "grad_norm": 0.0878463014960289, "learning_rate": 4.087042621180071e-05, "loss": 0.2645, "step": 40295 }, { "epoch": 3.2644199611147116, "grad_norm": 0.07176758348941803, "learning_rate": 4.086592555920609e-05, "loss": 0.2477, "step": 40296 }, { "epoch": 3.26450097213221, "grad_norm": 0.07876536250114441, "learning_rate": 4.086142490661146e-05, "loss": 0.2319, "step": 40297 }, { "epoch": 3.2645819831497085, "grad_norm": 0.07930036634206772, "learning_rate": 4.085692425401683e-05, "loss": 0.3047, "step": 40298 }, { "epoch": 3.2646629941672067, "grad_norm": 0.07112422585487366, "learning_rate": 4.085242360142221e-05, "loss": 0.2439, "step": 40299 }, { "epoch": 3.264744005184705, "grad_norm": 0.07274965941905975, "learning_rate": 4.0847922948827584e-05, "loss": 0.2461, "step": 40300 }, { "epoch": 3.2648250162022037, "grad_norm": 0.06299559026956558, "learning_rate": 4.084342229623296e-05, "loss": 0.2217, "step": 40301 }, { "epoch": 3.264906027219702, "grad_norm": 0.058486804366111755, "learning_rate": 4.083892164363833e-05, "loss": 0.2019, "step": 40302 }, { "epoch": 3.2649870382372, "grad_norm": 0.07279517501592636, "learning_rate": 4.0834420991043705e-05, "loss": 0.2286, "step": 40303 }, { "epoch": 3.2650680492546984, "grad_norm": 0.07226823270320892, "learning_rate": 4.082992033844908e-05, "loss": 0.2191, "step": 40304 }, { "epoch": 3.265149060272197, "grad_norm": 0.07637904584407806, "learning_rate": 4.082541968585445e-05, "loss": 0.2184, "step": 40305 }, { "epoch": 3.2652300712896953, "grad_norm": 0.0646221861243248, "learning_rate": 4.0820919033259826e-05, "loss": 0.22, "step": 40306 }, { "epoch": 3.2653110823071936, "grad_norm": 0.06766191124916077, "learning_rate": 4.08164183806652e-05, "loss": 0.2598, "step": 40307 }, { "epoch": 3.2653920933246923, "grad_norm": 0.07320290803909302, "learning_rate": 4.081191772807057e-05, "loss": 0.2415, "step": 40308 }, { "epoch": 3.2654731043421905, "grad_norm": 0.0638764277100563, "learning_rate": 4.0807417075475946e-05, "loss": 0.2177, "step": 40309 }, { "epoch": 3.2655541153596888, "grad_norm": 0.07175182551145554, "learning_rate": 4.080291642288132e-05, "loss": 0.2193, "step": 40310 }, { "epoch": 3.2656351263771874, "grad_norm": 0.057568471878767014, "learning_rate": 4.0798415770286694e-05, "loss": 0.2124, "step": 40311 }, { "epoch": 3.2657161373946857, "grad_norm": 0.06688519567251205, "learning_rate": 4.079391511769207e-05, "loss": 0.2058, "step": 40312 }, { "epoch": 3.265797148412184, "grad_norm": 0.06936531513929367, "learning_rate": 4.078941446509744e-05, "loss": 0.2071, "step": 40313 }, { "epoch": 3.2658781594296826, "grad_norm": 0.07027362287044525, "learning_rate": 4.0784913812502814e-05, "loss": 0.2251, "step": 40314 }, { "epoch": 3.265959170447181, "grad_norm": 0.07744862139225006, "learning_rate": 4.078041315990819e-05, "loss": 0.2324, "step": 40315 }, { "epoch": 3.266040181464679, "grad_norm": 0.06890869140625, "learning_rate": 4.077591250731356e-05, "loss": 0.1964, "step": 40316 }, { "epoch": 3.266121192482178, "grad_norm": 0.06374898552894592, "learning_rate": 4.0771411854718935e-05, "loss": 0.2448, "step": 40317 }, { "epoch": 3.266202203499676, "grad_norm": 0.07263403385877609, "learning_rate": 4.076691120212431e-05, "loss": 0.2453, "step": 40318 }, { "epoch": 3.2662832145171743, "grad_norm": 0.08141748607158661, "learning_rate": 4.076241054952968e-05, "loss": 0.2504, "step": 40319 }, { "epoch": 3.2663642255346725, "grad_norm": 0.08458217233419418, "learning_rate": 4.0757909896935056e-05, "loss": 0.2755, "step": 40320 }, { "epoch": 3.2664452365521712, "grad_norm": 0.05991069972515106, "learning_rate": 4.075340924434043e-05, "loss": 0.2408, "step": 40321 }, { "epoch": 3.2665262475696695, "grad_norm": 0.060942504554986954, "learning_rate": 4.07489085917458e-05, "loss": 0.2189, "step": 40322 }, { "epoch": 3.2666072585871677, "grad_norm": 0.07964294403791428, "learning_rate": 4.0744407939151184e-05, "loss": 0.2039, "step": 40323 }, { "epoch": 3.2666882696046664, "grad_norm": 0.08119510114192963, "learning_rate": 4.073990728655655e-05, "loss": 0.2906, "step": 40324 }, { "epoch": 3.2667692806221647, "grad_norm": 0.0648564025759697, "learning_rate": 4.0735406633961924e-05, "loss": 0.2205, "step": 40325 }, { "epoch": 3.266850291639663, "grad_norm": 0.08307959139347076, "learning_rate": 4.0730905981367304e-05, "loss": 0.2415, "step": 40326 }, { "epoch": 3.266931302657161, "grad_norm": 0.0625900849699974, "learning_rate": 4.072640532877268e-05, "loss": 0.2018, "step": 40327 }, { "epoch": 3.26701231367466, "grad_norm": 0.06181721389293671, "learning_rate": 4.0721904676178045e-05, "loss": 0.2271, "step": 40328 }, { "epoch": 3.267093324692158, "grad_norm": 0.06439773738384247, "learning_rate": 4.0717404023583425e-05, "loss": 0.2007, "step": 40329 }, { "epoch": 3.2671743357096563, "grad_norm": 0.06405269354581833, "learning_rate": 4.07129033709888e-05, "loss": 0.2313, "step": 40330 }, { "epoch": 3.267255346727155, "grad_norm": 0.06496585160493851, "learning_rate": 4.0708402718394166e-05, "loss": 0.2219, "step": 40331 }, { "epoch": 3.2673363577446533, "grad_norm": 0.06631439924240112, "learning_rate": 4.0703902065799546e-05, "loss": 0.2238, "step": 40332 }, { "epoch": 3.2674173687621515, "grad_norm": 0.06565840542316437, "learning_rate": 4.069940141320492e-05, "loss": 0.2012, "step": 40333 }, { "epoch": 3.26749837977965, "grad_norm": 0.07592849433422089, "learning_rate": 4.0694900760610286e-05, "loss": 0.2167, "step": 40334 }, { "epoch": 3.2675793907971484, "grad_norm": 0.08509879559278488, "learning_rate": 4.069040010801567e-05, "loss": 0.2258, "step": 40335 }, { "epoch": 3.2676604018146467, "grad_norm": 0.07429090887308121, "learning_rate": 4.068589945542104e-05, "loss": 0.2345, "step": 40336 }, { "epoch": 3.2677414128321454, "grad_norm": 0.05805768445134163, "learning_rate": 4.068139880282641e-05, "loss": 0.245, "step": 40337 }, { "epoch": 3.2678224238496436, "grad_norm": 0.06344527751207352, "learning_rate": 4.067689815023179e-05, "loss": 0.2481, "step": 40338 }, { "epoch": 3.267903434867142, "grad_norm": 0.06499172002077103, "learning_rate": 4.067239749763716e-05, "loss": 0.2008, "step": 40339 }, { "epoch": 3.2679844458846405, "grad_norm": 0.06943495571613312, "learning_rate": 4.0667896845042535e-05, "loss": 0.2198, "step": 40340 }, { "epoch": 3.268065456902139, "grad_norm": 0.08585415780544281, "learning_rate": 4.066339619244791e-05, "loss": 0.2425, "step": 40341 }, { "epoch": 3.268146467919637, "grad_norm": 0.07219972461462021, "learning_rate": 4.065889553985328e-05, "loss": 0.2182, "step": 40342 }, { "epoch": 3.2682274789371353, "grad_norm": 0.07773865759372711, "learning_rate": 4.0654394887258656e-05, "loss": 0.2571, "step": 40343 }, { "epoch": 3.268308489954634, "grad_norm": 0.0748126357793808, "learning_rate": 4.064989423466403e-05, "loss": 0.2264, "step": 40344 }, { "epoch": 3.268389500972132, "grad_norm": 0.06606455147266388, "learning_rate": 4.06453935820694e-05, "loss": 0.2304, "step": 40345 }, { "epoch": 3.2684705119896305, "grad_norm": 0.05831682309508324, "learning_rate": 4.0640892929474776e-05, "loss": 0.2294, "step": 40346 }, { "epoch": 3.268551523007129, "grad_norm": 0.07438673824071884, "learning_rate": 4.063639227688015e-05, "loss": 0.2101, "step": 40347 }, { "epoch": 3.2686325340246274, "grad_norm": 0.08037227392196655, "learning_rate": 4.0631891624285524e-05, "loss": 0.2391, "step": 40348 }, { "epoch": 3.2687135450421256, "grad_norm": 0.07156945019960403, "learning_rate": 4.06273909716909e-05, "loss": 0.2302, "step": 40349 }, { "epoch": 3.268794556059624, "grad_norm": 0.07022353261709213, "learning_rate": 4.062289031909627e-05, "loss": 0.2187, "step": 40350 }, { "epoch": 3.2688755670771226, "grad_norm": 0.071316197514534, "learning_rate": 4.0618389666501644e-05, "loss": 0.2265, "step": 40351 }, { "epoch": 3.268956578094621, "grad_norm": 0.07702403515577316, "learning_rate": 4.061388901390702e-05, "loss": 0.2527, "step": 40352 }, { "epoch": 3.269037589112119, "grad_norm": 0.07936025410890579, "learning_rate": 4.060938836131239e-05, "loss": 0.2151, "step": 40353 }, { "epoch": 3.2691186001296177, "grad_norm": 0.06787516921758652, "learning_rate": 4.0604887708717765e-05, "loss": 0.2043, "step": 40354 }, { "epoch": 3.269199611147116, "grad_norm": 0.08268849551677704, "learning_rate": 4.060038705612314e-05, "loss": 0.2357, "step": 40355 }, { "epoch": 3.2692806221646142, "grad_norm": 0.06543977558612823, "learning_rate": 4.059588640352851e-05, "loss": 0.2564, "step": 40356 }, { "epoch": 3.269361633182113, "grad_norm": 0.059460386633872986, "learning_rate": 4.0591385750933886e-05, "loss": 0.2374, "step": 40357 }, { "epoch": 3.269442644199611, "grad_norm": 0.08025439828634262, "learning_rate": 4.058688509833926e-05, "loss": 0.2432, "step": 40358 }, { "epoch": 3.2695236552171094, "grad_norm": 0.06559722125530243, "learning_rate": 4.058238444574464e-05, "loss": 0.2397, "step": 40359 }, { "epoch": 3.269604666234608, "grad_norm": 0.05569949746131897, "learning_rate": 4.057788379315001e-05, "loss": 0.1952, "step": 40360 }, { "epoch": 3.2696856772521063, "grad_norm": 0.061552103608846664, "learning_rate": 4.057338314055538e-05, "loss": 0.2192, "step": 40361 }, { "epoch": 3.2697666882696046, "grad_norm": 0.06350647658109665, "learning_rate": 4.056888248796076e-05, "loss": 0.2239, "step": 40362 }, { "epoch": 3.2698476992871033, "grad_norm": 0.07742627710103989, "learning_rate": 4.056438183536613e-05, "loss": 0.2484, "step": 40363 }, { "epoch": 3.2699287103046015, "grad_norm": 0.055037204176187515, "learning_rate": 4.05598811827715e-05, "loss": 0.1932, "step": 40364 }, { "epoch": 3.2700097213220998, "grad_norm": 0.06263311207294464, "learning_rate": 4.055538053017688e-05, "loss": 0.2252, "step": 40365 }, { "epoch": 3.270090732339598, "grad_norm": 0.08150181174278259, "learning_rate": 4.0550879877582255e-05, "loss": 0.215, "step": 40366 }, { "epoch": 3.2701717433570967, "grad_norm": 0.058354031294584274, "learning_rate": 4.054637922498762e-05, "loss": 0.2248, "step": 40367 }, { "epoch": 3.270252754374595, "grad_norm": 0.04838745668530464, "learning_rate": 4.0541878572393e-05, "loss": 0.2019, "step": 40368 }, { "epoch": 3.270333765392093, "grad_norm": 0.07810920476913452, "learning_rate": 4.0537377919798376e-05, "loss": 0.2471, "step": 40369 }, { "epoch": 3.270414776409592, "grad_norm": 0.07518798112869263, "learning_rate": 4.053287726720374e-05, "loss": 0.2306, "step": 40370 }, { "epoch": 3.27049578742709, "grad_norm": 0.07304823398590088, "learning_rate": 4.052837661460912e-05, "loss": 0.2235, "step": 40371 }, { "epoch": 3.2705767984445884, "grad_norm": 0.06685718148946762, "learning_rate": 4.05238759620145e-05, "loss": 0.2342, "step": 40372 }, { "epoch": 3.2706578094620866, "grad_norm": 0.07246522605419159, "learning_rate": 4.0519375309419864e-05, "loss": 0.2164, "step": 40373 }, { "epoch": 3.2707388204795853, "grad_norm": 0.06804099678993225, "learning_rate": 4.0514874656825244e-05, "loss": 0.2291, "step": 40374 }, { "epoch": 3.2708198314970836, "grad_norm": 0.06406091898679733, "learning_rate": 4.051037400423062e-05, "loss": 0.2341, "step": 40375 }, { "epoch": 3.270900842514582, "grad_norm": 0.08121266961097717, "learning_rate": 4.0505873351635985e-05, "loss": 0.245, "step": 40376 }, { "epoch": 3.2709818535320805, "grad_norm": 0.0758911669254303, "learning_rate": 4.0501372699041365e-05, "loss": 0.2786, "step": 40377 }, { "epoch": 3.2710628645495787, "grad_norm": 0.06743774563074112, "learning_rate": 4.049687204644674e-05, "loss": 0.2404, "step": 40378 }, { "epoch": 3.271143875567077, "grad_norm": 0.08256151527166367, "learning_rate": 4.049237139385211e-05, "loss": 0.1982, "step": 40379 }, { "epoch": 3.2712248865845757, "grad_norm": 0.06504231691360474, "learning_rate": 4.0487870741257486e-05, "loss": 0.2153, "step": 40380 }, { "epoch": 3.271305897602074, "grad_norm": 0.07067107409238815, "learning_rate": 4.048337008866286e-05, "loss": 0.2151, "step": 40381 }, { "epoch": 3.271386908619572, "grad_norm": 0.07374674826860428, "learning_rate": 4.047886943606823e-05, "loss": 0.2442, "step": 40382 }, { "epoch": 3.271467919637071, "grad_norm": 0.0919092670083046, "learning_rate": 4.0474368783473607e-05, "loss": 0.2523, "step": 40383 }, { "epoch": 3.271548930654569, "grad_norm": 0.08143769204616547, "learning_rate": 4.046986813087898e-05, "loss": 0.237, "step": 40384 }, { "epoch": 3.2716299416720673, "grad_norm": 0.05799148604273796, "learning_rate": 4.0465367478284354e-05, "loss": 0.1916, "step": 40385 }, { "epoch": 3.271710952689566, "grad_norm": 0.07556015253067017, "learning_rate": 4.046086682568973e-05, "loss": 0.2196, "step": 40386 }, { "epoch": 3.2717919637070643, "grad_norm": 0.05838407948613167, "learning_rate": 4.04563661730951e-05, "loss": 0.2266, "step": 40387 }, { "epoch": 3.2718729747245625, "grad_norm": 0.0696408823132515, "learning_rate": 4.0451865520500475e-05, "loss": 0.2339, "step": 40388 }, { "epoch": 3.2719539857420608, "grad_norm": 0.07987789064645767, "learning_rate": 4.044736486790585e-05, "loss": 0.2522, "step": 40389 }, { "epoch": 3.2720349967595594, "grad_norm": 0.08299795538187027, "learning_rate": 4.044286421531122e-05, "loss": 0.2658, "step": 40390 }, { "epoch": 3.2721160077770577, "grad_norm": 0.0716930627822876, "learning_rate": 4.0438363562716595e-05, "loss": 0.2457, "step": 40391 }, { "epoch": 3.272197018794556, "grad_norm": 0.07929502427577972, "learning_rate": 4.0433862910121976e-05, "loss": 0.2447, "step": 40392 }, { "epoch": 3.2722780298120546, "grad_norm": 0.09498481452465057, "learning_rate": 4.042936225752734e-05, "loss": 0.2099, "step": 40393 }, { "epoch": 3.272359040829553, "grad_norm": 0.07571402192115784, "learning_rate": 4.0424861604932716e-05, "loss": 0.2166, "step": 40394 }, { "epoch": 3.272440051847051, "grad_norm": 0.060386765748262405, "learning_rate": 4.0420360952338097e-05, "loss": 0.225, "step": 40395 }, { "epoch": 3.2725210628645494, "grad_norm": 0.06799789518117905, "learning_rate": 4.041586029974346e-05, "loss": 0.2147, "step": 40396 }, { "epoch": 3.272602073882048, "grad_norm": 0.0849473848938942, "learning_rate": 4.041135964714884e-05, "loss": 0.2257, "step": 40397 }, { "epoch": 3.2726830848995463, "grad_norm": 0.07534343749284744, "learning_rate": 4.040685899455422e-05, "loss": 0.2048, "step": 40398 }, { "epoch": 3.2727640959170445, "grad_norm": 0.05903489515185356, "learning_rate": 4.0402358341959584e-05, "loss": 0.1867, "step": 40399 }, { "epoch": 3.2728451069345432, "grad_norm": 0.08578018099069595, "learning_rate": 4.039785768936496e-05, "loss": 0.2531, "step": 40400 }, { "epoch": 3.2729261179520415, "grad_norm": 0.06364873796701431, "learning_rate": 4.039335703677034e-05, "loss": 0.2331, "step": 40401 }, { "epoch": 3.2730071289695397, "grad_norm": 0.08088508993387222, "learning_rate": 4.0388856384175705e-05, "loss": 0.2451, "step": 40402 }, { "epoch": 3.2730881399870384, "grad_norm": 0.07197795808315277, "learning_rate": 4.038435573158108e-05, "loss": 0.2527, "step": 40403 }, { "epoch": 3.2731691510045366, "grad_norm": 0.06356197595596313, "learning_rate": 4.037985507898646e-05, "loss": 0.2293, "step": 40404 }, { "epoch": 3.273250162022035, "grad_norm": 0.0621635764837265, "learning_rate": 4.037535442639183e-05, "loss": 0.2317, "step": 40405 }, { "epoch": 3.2733311730395336, "grad_norm": 0.07144168764352798, "learning_rate": 4.03708537737972e-05, "loss": 0.1993, "step": 40406 }, { "epoch": 3.273412184057032, "grad_norm": 0.06132879480719566, "learning_rate": 4.036635312120258e-05, "loss": 0.2408, "step": 40407 }, { "epoch": 3.27349319507453, "grad_norm": 0.08265918493270874, "learning_rate": 4.036185246860795e-05, "loss": 0.2241, "step": 40408 }, { "epoch": 3.2735742060920288, "grad_norm": 0.06276227533817291, "learning_rate": 4.035735181601332e-05, "loss": 0.2331, "step": 40409 }, { "epoch": 3.273655217109527, "grad_norm": 0.06253093481063843, "learning_rate": 4.03528511634187e-05, "loss": 0.2156, "step": 40410 }, { "epoch": 3.2737362281270252, "grad_norm": 0.0725327879190445, "learning_rate": 4.0348350510824074e-05, "loss": 0.2435, "step": 40411 }, { "epoch": 3.2738172391445235, "grad_norm": 0.0664612203836441, "learning_rate": 4.034384985822944e-05, "loss": 0.2163, "step": 40412 }, { "epoch": 3.273898250162022, "grad_norm": 0.05309682711958885, "learning_rate": 4.033934920563482e-05, "loss": 0.2277, "step": 40413 }, { "epoch": 3.2739792611795204, "grad_norm": 0.0698176920413971, "learning_rate": 4.0334848553040195e-05, "loss": 0.2574, "step": 40414 }, { "epoch": 3.2740602721970187, "grad_norm": 0.07634953409433365, "learning_rate": 4.033034790044556e-05, "loss": 0.2099, "step": 40415 }, { "epoch": 3.2741412832145174, "grad_norm": 0.054520655423402786, "learning_rate": 4.032584724785094e-05, "loss": 0.2122, "step": 40416 }, { "epoch": 3.2742222942320156, "grad_norm": 0.06059606000781059, "learning_rate": 4.0321346595256316e-05, "loss": 0.221, "step": 40417 }, { "epoch": 3.274303305249514, "grad_norm": 0.06204358860850334, "learning_rate": 4.031684594266168e-05, "loss": 0.2128, "step": 40418 }, { "epoch": 3.274384316267012, "grad_norm": 0.06221948191523552, "learning_rate": 4.031234529006706e-05, "loss": 0.1796, "step": 40419 }, { "epoch": 3.274465327284511, "grad_norm": 0.0861034169793129, "learning_rate": 4.0307844637472437e-05, "loss": 0.2372, "step": 40420 }, { "epoch": 3.274546338302009, "grad_norm": 0.08388126641511917, "learning_rate": 4.030334398487781e-05, "loss": 0.232, "step": 40421 }, { "epoch": 3.2746273493195073, "grad_norm": 0.06882581114768982, "learning_rate": 4.0298843332283184e-05, "loss": 0.2234, "step": 40422 }, { "epoch": 3.274708360337006, "grad_norm": 0.07220038771629333, "learning_rate": 4.029434267968856e-05, "loss": 0.2176, "step": 40423 }, { "epoch": 3.274789371354504, "grad_norm": 0.07071521878242493, "learning_rate": 4.028984202709393e-05, "loss": 0.2071, "step": 40424 }, { "epoch": 3.2748703823720025, "grad_norm": 0.07368500530719757, "learning_rate": 4.0285341374499305e-05, "loss": 0.2279, "step": 40425 }, { "epoch": 3.274951393389501, "grad_norm": 0.058155667036771774, "learning_rate": 4.028084072190468e-05, "loss": 0.1896, "step": 40426 }, { "epoch": 3.2750324044069994, "grad_norm": 0.07234590500593185, "learning_rate": 4.027634006931005e-05, "loss": 0.2005, "step": 40427 }, { "epoch": 3.2751134154244976, "grad_norm": 0.08725034445524216, "learning_rate": 4.0271839416715425e-05, "loss": 0.2611, "step": 40428 }, { "epoch": 3.2751944264419963, "grad_norm": 0.0634271427989006, "learning_rate": 4.02673387641208e-05, "loss": 0.2076, "step": 40429 }, { "epoch": 3.2752754374594946, "grad_norm": 0.08502479642629623, "learning_rate": 4.026283811152617e-05, "loss": 0.2315, "step": 40430 }, { "epoch": 3.275356448476993, "grad_norm": 0.0666709840297699, "learning_rate": 4.0258337458931546e-05, "loss": 0.2061, "step": 40431 }, { "epoch": 3.2754374594944915, "grad_norm": 0.06762423366308212, "learning_rate": 4.025383680633692e-05, "loss": 0.212, "step": 40432 }, { "epoch": 3.2755184705119897, "grad_norm": 0.07069918513298035, "learning_rate": 4.0249336153742293e-05, "loss": 0.2186, "step": 40433 }, { "epoch": 3.275599481529488, "grad_norm": 0.08242760598659515, "learning_rate": 4.0244835501147674e-05, "loss": 0.2641, "step": 40434 }, { "epoch": 3.2756804925469862, "grad_norm": 0.07050781697034836, "learning_rate": 4.024033484855304e-05, "loss": 0.2126, "step": 40435 }, { "epoch": 3.275761503564485, "grad_norm": 0.06657261401414871, "learning_rate": 4.0235834195958414e-05, "loss": 0.2132, "step": 40436 }, { "epoch": 3.275842514581983, "grad_norm": 0.07559150457382202, "learning_rate": 4.0231333543363795e-05, "loss": 0.2111, "step": 40437 }, { "epoch": 3.2759235255994814, "grad_norm": 0.08614648133516312, "learning_rate": 4.022683289076916e-05, "loss": 0.2746, "step": 40438 }, { "epoch": 3.27600453661698, "grad_norm": 0.06892319023609161, "learning_rate": 4.0222332238174535e-05, "loss": 0.2025, "step": 40439 }, { "epoch": 3.2760855476344783, "grad_norm": 0.06139829382300377, "learning_rate": 4.0217831585579915e-05, "loss": 0.2142, "step": 40440 }, { "epoch": 3.2761665586519766, "grad_norm": 0.06792233884334564, "learning_rate": 4.021333093298528e-05, "loss": 0.2377, "step": 40441 }, { "epoch": 3.276247569669475, "grad_norm": 0.08298711478710175, "learning_rate": 4.0208830280390656e-05, "loss": 0.2313, "step": 40442 }, { "epoch": 3.2763285806869735, "grad_norm": 0.0739048421382904, "learning_rate": 4.0204329627796036e-05, "loss": 0.2191, "step": 40443 }, { "epoch": 3.2764095917044718, "grad_norm": 0.07907218486070633, "learning_rate": 4.01998289752014e-05, "loss": 0.223, "step": 40444 }, { "epoch": 3.27649060272197, "grad_norm": 0.08246040344238281, "learning_rate": 4.019532832260678e-05, "loss": 0.2294, "step": 40445 }, { "epoch": 3.2765716137394687, "grad_norm": 0.0847393274307251, "learning_rate": 4.019082767001216e-05, "loss": 0.2493, "step": 40446 }, { "epoch": 3.276652624756967, "grad_norm": 0.08675988763570786, "learning_rate": 4.018632701741753e-05, "loss": 0.2401, "step": 40447 }, { "epoch": 3.276733635774465, "grad_norm": 0.06091444939374924, "learning_rate": 4.01818263648229e-05, "loss": 0.2607, "step": 40448 }, { "epoch": 3.276814646791964, "grad_norm": 0.06214442104101181, "learning_rate": 4.017732571222828e-05, "loss": 0.2137, "step": 40449 }, { "epoch": 3.276895657809462, "grad_norm": 0.06971146911382675, "learning_rate": 4.017282505963365e-05, "loss": 0.2195, "step": 40450 }, { "epoch": 3.2769766688269604, "grad_norm": 0.059449177235364914, "learning_rate": 4.016832440703902e-05, "loss": 0.2032, "step": 40451 }, { "epoch": 3.277057679844459, "grad_norm": 0.08385682851076126, "learning_rate": 4.01638237544444e-05, "loss": 0.2421, "step": 40452 }, { "epoch": 3.2771386908619573, "grad_norm": 0.08064158260822296, "learning_rate": 4.015932310184977e-05, "loss": 0.2353, "step": 40453 }, { "epoch": 3.2772197018794555, "grad_norm": 0.09409360587596893, "learning_rate": 4.015482244925514e-05, "loss": 0.2285, "step": 40454 }, { "epoch": 3.277300712896954, "grad_norm": 0.06640844792127609, "learning_rate": 4.015032179666052e-05, "loss": 0.2104, "step": 40455 }, { "epoch": 3.2773817239144525, "grad_norm": 0.07138046622276306, "learning_rate": 4.014582114406589e-05, "loss": 0.2271, "step": 40456 }, { "epoch": 3.2774627349319507, "grad_norm": 0.07712315022945404, "learning_rate": 4.014132049147126e-05, "loss": 0.2353, "step": 40457 }, { "epoch": 3.277543745949449, "grad_norm": 0.06329129636287689, "learning_rate": 4.013681983887664e-05, "loss": 0.2241, "step": 40458 }, { "epoch": 3.2776247569669477, "grad_norm": 0.06462210416793823, "learning_rate": 4.0132319186282014e-05, "loss": 0.224, "step": 40459 }, { "epoch": 3.277705767984446, "grad_norm": 0.0607602559030056, "learning_rate": 4.012781853368739e-05, "loss": 0.2053, "step": 40460 }, { "epoch": 3.277786779001944, "grad_norm": 0.06479512155056, "learning_rate": 4.012331788109276e-05, "loss": 0.1935, "step": 40461 }, { "epoch": 3.2778677900194424, "grad_norm": 0.06079896539449692, "learning_rate": 4.0118817228498135e-05, "loss": 0.2833, "step": 40462 }, { "epoch": 3.277948801036941, "grad_norm": 0.06312386691570282, "learning_rate": 4.011431657590351e-05, "loss": 0.2488, "step": 40463 }, { "epoch": 3.2780298120544393, "grad_norm": 0.0699615329504013, "learning_rate": 4.010981592330888e-05, "loss": 0.2318, "step": 40464 }, { "epoch": 3.2781108230719376, "grad_norm": 0.07368670403957367, "learning_rate": 4.0105315270714256e-05, "loss": 0.2467, "step": 40465 }, { "epoch": 3.2781918340894363, "grad_norm": 0.08029686659574509, "learning_rate": 4.010081461811963e-05, "loss": 0.2667, "step": 40466 }, { "epoch": 3.2782728451069345, "grad_norm": 0.06958233565092087, "learning_rate": 4.0096313965525e-05, "loss": 0.2282, "step": 40467 }, { "epoch": 3.2783538561244328, "grad_norm": 0.059386830776929855, "learning_rate": 4.0091813312930376e-05, "loss": 0.1847, "step": 40468 }, { "epoch": 3.2784348671419314, "grad_norm": 0.0706775113940239, "learning_rate": 4.008731266033575e-05, "loss": 0.2304, "step": 40469 }, { "epoch": 3.2785158781594297, "grad_norm": 0.07206568121910095, "learning_rate": 4.0082812007741124e-05, "loss": 0.1964, "step": 40470 }, { "epoch": 3.278596889176928, "grad_norm": 0.06114106625318527, "learning_rate": 4.00783113551465e-05, "loss": 0.2166, "step": 40471 }, { "epoch": 3.2786779001944266, "grad_norm": 0.06136857345700264, "learning_rate": 4.007381070255187e-05, "loss": 0.2232, "step": 40472 }, { "epoch": 3.278758911211925, "grad_norm": 0.07858065515756607, "learning_rate": 4.006931004995725e-05, "loss": 0.2318, "step": 40473 }, { "epoch": 3.278839922229423, "grad_norm": 0.07737397402524948, "learning_rate": 4.006480939736262e-05, "loss": 0.2279, "step": 40474 }, { "epoch": 3.278920933246922, "grad_norm": 0.06615752726793289, "learning_rate": 4.006030874476799e-05, "loss": 0.2415, "step": 40475 }, { "epoch": 3.27900194426442, "grad_norm": 0.06866739690303802, "learning_rate": 4.005580809217337e-05, "loss": 0.2021, "step": 40476 }, { "epoch": 3.2790829552819183, "grad_norm": 0.06152362748980522, "learning_rate": 4.005130743957874e-05, "loss": 0.2113, "step": 40477 }, { "epoch": 3.2791639662994165, "grad_norm": 0.06418390572071075, "learning_rate": 4.004680678698411e-05, "loss": 0.2114, "step": 40478 }, { "epoch": 3.279244977316915, "grad_norm": 0.06302060931921005, "learning_rate": 4.004230613438949e-05, "loss": 0.2226, "step": 40479 }, { "epoch": 3.2793259883344135, "grad_norm": 0.0747298002243042, "learning_rate": 4.003780548179486e-05, "loss": 0.2506, "step": 40480 }, { "epoch": 3.2794069993519117, "grad_norm": 0.10246086120605469, "learning_rate": 4.003330482920023e-05, "loss": 0.2145, "step": 40481 }, { "epoch": 3.2794880103694104, "grad_norm": 0.07003115862607956, "learning_rate": 4.0028804176605614e-05, "loss": 0.2032, "step": 40482 }, { "epoch": 3.2795690213869086, "grad_norm": 0.06867416203022003, "learning_rate": 4.002430352401098e-05, "loss": 0.2116, "step": 40483 }, { "epoch": 3.279650032404407, "grad_norm": 0.07221326977014542, "learning_rate": 4.0019802871416354e-05, "loss": 0.2057, "step": 40484 }, { "epoch": 3.279731043421905, "grad_norm": 0.0709087997674942, "learning_rate": 4.0015302218821734e-05, "loss": 0.2183, "step": 40485 }, { "epoch": 3.279812054439404, "grad_norm": 0.06863710284233093, "learning_rate": 4.001080156622711e-05, "loss": 0.2072, "step": 40486 }, { "epoch": 3.279893065456902, "grad_norm": 0.07001881301403046, "learning_rate": 4.0006300913632475e-05, "loss": 0.2213, "step": 40487 }, { "epoch": 3.2799740764744003, "grad_norm": 0.06810453534126282, "learning_rate": 4.0001800261037855e-05, "loss": 0.2266, "step": 40488 }, { "epoch": 3.280055087491899, "grad_norm": 0.0766795426607132, "learning_rate": 3.999729960844323e-05, "loss": 0.2426, "step": 40489 }, { "epoch": 3.2801360985093972, "grad_norm": 0.08484476059675217, "learning_rate": 3.9992798955848596e-05, "loss": 0.2461, "step": 40490 }, { "epoch": 3.2802171095268955, "grad_norm": 0.07386729121208191, "learning_rate": 3.9988298303253976e-05, "loss": 0.2298, "step": 40491 }, { "epoch": 3.280298120544394, "grad_norm": 0.0644812285900116, "learning_rate": 3.998379765065935e-05, "loss": 0.2276, "step": 40492 }, { "epoch": 3.2803791315618924, "grad_norm": 0.060633908957242966, "learning_rate": 3.9979296998064716e-05, "loss": 0.2299, "step": 40493 }, { "epoch": 3.2804601425793907, "grad_norm": 0.07635070383548737, "learning_rate": 3.99747963454701e-05, "loss": 0.2038, "step": 40494 }, { "epoch": 3.2805411535968894, "grad_norm": 0.06670946627855301, "learning_rate": 3.997029569287547e-05, "loss": 0.219, "step": 40495 }, { "epoch": 3.2806221646143876, "grad_norm": 0.08006652444601059, "learning_rate": 3.996579504028084e-05, "loss": 0.2482, "step": 40496 }, { "epoch": 3.280703175631886, "grad_norm": 0.07331784814596176, "learning_rate": 3.996129438768622e-05, "loss": 0.2031, "step": 40497 }, { "epoch": 3.2807841866493845, "grad_norm": 0.08051098138093948, "learning_rate": 3.995679373509159e-05, "loss": 0.211, "step": 40498 }, { "epoch": 3.280865197666883, "grad_norm": 0.06945125758647919, "learning_rate": 3.9952293082496965e-05, "loss": 0.2323, "step": 40499 }, { "epoch": 3.280946208684381, "grad_norm": 0.0819791629910469, "learning_rate": 3.994779242990234e-05, "loss": 0.2157, "step": 40500 }, { "epoch": 3.2810272197018793, "grad_norm": 0.07022712379693985, "learning_rate": 3.994329177730771e-05, "loss": 0.2277, "step": 40501 }, { "epoch": 3.281108230719378, "grad_norm": 0.07423558086156845, "learning_rate": 3.9938791124713086e-05, "loss": 0.2255, "step": 40502 }, { "epoch": 3.281189241736876, "grad_norm": 0.0815899595618248, "learning_rate": 3.993429047211846e-05, "loss": 0.2152, "step": 40503 }, { "epoch": 3.2812702527543745, "grad_norm": 0.06895309686660767, "learning_rate": 3.992978981952383e-05, "loss": 0.2385, "step": 40504 }, { "epoch": 3.281351263771873, "grad_norm": 0.06182735040783882, "learning_rate": 3.9925289166929206e-05, "loss": 0.1874, "step": 40505 }, { "epoch": 3.2814322747893714, "grad_norm": 0.09717794507741928, "learning_rate": 3.992078851433458e-05, "loss": 0.2255, "step": 40506 }, { "epoch": 3.2815132858068696, "grad_norm": 0.06994237005710602, "learning_rate": 3.9916287861739954e-05, "loss": 0.2349, "step": 40507 }, { "epoch": 3.281594296824368, "grad_norm": 0.07174836099147797, "learning_rate": 3.991178720914533e-05, "loss": 0.2341, "step": 40508 }, { "epoch": 3.2816753078418666, "grad_norm": 0.06857135891914368, "learning_rate": 3.99072865565507e-05, "loss": 0.2258, "step": 40509 }, { "epoch": 3.281756318859365, "grad_norm": 0.06680644303560257, "learning_rate": 3.9902785903956074e-05, "loss": 0.2283, "step": 40510 }, { "epoch": 3.281837329876863, "grad_norm": 0.058539021760225296, "learning_rate": 3.989828525136145e-05, "loss": 0.214, "step": 40511 }, { "epoch": 3.2819183408943617, "grad_norm": 0.07493918389081955, "learning_rate": 3.989378459876683e-05, "loss": 0.1984, "step": 40512 }, { "epoch": 3.28199935191186, "grad_norm": 0.0886545479297638, "learning_rate": 3.9889283946172195e-05, "loss": 0.2976, "step": 40513 }, { "epoch": 3.2820803629293582, "grad_norm": 0.06237650662660599, "learning_rate": 3.988478329357757e-05, "loss": 0.2202, "step": 40514 }, { "epoch": 3.282161373946857, "grad_norm": 0.07362060993909836, "learning_rate": 3.988028264098295e-05, "loss": 0.2323, "step": 40515 }, { "epoch": 3.282242384964355, "grad_norm": 0.06613124161958694, "learning_rate": 3.9875781988388316e-05, "loss": 0.2699, "step": 40516 }, { "epoch": 3.2823233959818534, "grad_norm": 0.06270667165517807, "learning_rate": 3.987128133579369e-05, "loss": 0.2224, "step": 40517 }, { "epoch": 3.282404406999352, "grad_norm": 0.061522651463747025, "learning_rate": 3.986678068319907e-05, "loss": 0.2401, "step": 40518 }, { "epoch": 3.2824854180168503, "grad_norm": 0.07176893949508667, "learning_rate": 3.986228003060444e-05, "loss": 0.2344, "step": 40519 }, { "epoch": 3.2825664290343486, "grad_norm": 0.07144494354724884, "learning_rate": 3.985777937800981e-05, "loss": 0.2687, "step": 40520 }, { "epoch": 3.2826474400518473, "grad_norm": 0.08052974939346313, "learning_rate": 3.985327872541519e-05, "loss": 0.2198, "step": 40521 }, { "epoch": 3.2827284510693455, "grad_norm": 0.07403968274593353, "learning_rate": 3.984877807282056e-05, "loss": 0.2515, "step": 40522 }, { "epoch": 3.2828094620868438, "grad_norm": 0.06775449961423874, "learning_rate": 3.984427742022593e-05, "loss": 0.2263, "step": 40523 }, { "epoch": 3.282890473104342, "grad_norm": 0.06973139196634293, "learning_rate": 3.983977676763131e-05, "loss": 0.2733, "step": 40524 }, { "epoch": 3.2829714841218407, "grad_norm": 0.07829160243272781, "learning_rate": 3.9835276115036685e-05, "loss": 0.2271, "step": 40525 }, { "epoch": 3.283052495139339, "grad_norm": 0.06388890743255615, "learning_rate": 3.983077546244205e-05, "loss": 0.202, "step": 40526 }, { "epoch": 3.283133506156837, "grad_norm": 0.06818761676549911, "learning_rate": 3.982627480984743e-05, "loss": 0.2385, "step": 40527 }, { "epoch": 3.283214517174336, "grad_norm": 0.06943784654140472, "learning_rate": 3.9821774157252806e-05, "loss": 0.213, "step": 40528 }, { "epoch": 3.283295528191834, "grad_norm": 0.08590265363454819, "learning_rate": 3.981727350465817e-05, "loss": 0.2111, "step": 40529 }, { "epoch": 3.2833765392093324, "grad_norm": 0.06639125943183899, "learning_rate": 3.981277285206355e-05, "loss": 0.278, "step": 40530 }, { "epoch": 3.2834575502268306, "grad_norm": 0.0770188719034195, "learning_rate": 3.980827219946893e-05, "loss": 0.2138, "step": 40531 }, { "epoch": 3.2835385612443293, "grad_norm": 0.06980788707733154, "learning_rate": 3.9803771546874294e-05, "loss": 0.2119, "step": 40532 }, { "epoch": 3.2836195722618275, "grad_norm": 0.07194875180721283, "learning_rate": 3.9799270894279674e-05, "loss": 0.1955, "step": 40533 }, { "epoch": 3.283700583279326, "grad_norm": 0.06898615509271622, "learning_rate": 3.979477024168505e-05, "loss": 0.2319, "step": 40534 }, { "epoch": 3.2837815942968245, "grad_norm": 0.08618927747011185, "learning_rate": 3.9790269589090415e-05, "loss": 0.2448, "step": 40535 }, { "epoch": 3.2838626053143227, "grad_norm": 0.08610135316848755, "learning_rate": 3.9785768936495795e-05, "loss": 0.2309, "step": 40536 }, { "epoch": 3.283943616331821, "grad_norm": 0.06609229743480682, "learning_rate": 3.978126828390117e-05, "loss": 0.2198, "step": 40537 }, { "epoch": 3.2840246273493197, "grad_norm": 0.08390206843614578, "learning_rate": 3.977676763130654e-05, "loss": 0.2391, "step": 40538 }, { "epoch": 3.284105638366818, "grad_norm": 0.06741616129875183, "learning_rate": 3.9772266978711916e-05, "loss": 0.1953, "step": 40539 }, { "epoch": 3.284186649384316, "grad_norm": 0.059727005660533905, "learning_rate": 3.976776632611729e-05, "loss": 0.2071, "step": 40540 }, { "epoch": 3.284267660401815, "grad_norm": 0.06814241409301758, "learning_rate": 3.976326567352266e-05, "loss": 0.2035, "step": 40541 }, { "epoch": 3.284348671419313, "grad_norm": 0.04796573519706726, "learning_rate": 3.9758765020928037e-05, "loss": 0.1851, "step": 40542 }, { "epoch": 3.2844296824368113, "grad_norm": 0.08344428241252899, "learning_rate": 3.975426436833341e-05, "loss": 0.2263, "step": 40543 }, { "epoch": 3.28451069345431, "grad_norm": 0.0701231062412262, "learning_rate": 3.9749763715738784e-05, "loss": 0.2233, "step": 40544 }, { "epoch": 3.2845917044718083, "grad_norm": 0.06695102900266647, "learning_rate": 3.974526306314416e-05, "loss": 0.2326, "step": 40545 }, { "epoch": 3.2846727154893065, "grad_norm": 0.09770773351192474, "learning_rate": 3.974076241054953e-05, "loss": 0.2145, "step": 40546 }, { "epoch": 3.2847537265068047, "grad_norm": 0.08435174822807312, "learning_rate": 3.9736261757954905e-05, "loss": 0.2209, "step": 40547 }, { "epoch": 3.2848347375243034, "grad_norm": 0.0652317926287651, "learning_rate": 3.973176110536028e-05, "loss": 0.2079, "step": 40548 }, { "epoch": 3.2849157485418017, "grad_norm": 0.0690438449382782, "learning_rate": 3.972726045276565e-05, "loss": 0.2337, "step": 40549 }, { "epoch": 3.2849967595593, "grad_norm": 0.07208141684532166, "learning_rate": 3.9722759800171025e-05, "loss": 0.2534, "step": 40550 }, { "epoch": 3.2850777705767986, "grad_norm": 0.0637679249048233, "learning_rate": 3.9718259147576406e-05, "loss": 0.2205, "step": 40551 }, { "epoch": 3.285158781594297, "grad_norm": 0.08957676589488983, "learning_rate": 3.971375849498177e-05, "loss": 0.225, "step": 40552 }, { "epoch": 3.285239792611795, "grad_norm": 0.07437330484390259, "learning_rate": 3.9709257842387146e-05, "loss": 0.2674, "step": 40553 }, { "epoch": 3.2853208036292934, "grad_norm": 0.07008083164691925, "learning_rate": 3.9704757189792527e-05, "loss": 0.222, "step": 40554 }, { "epoch": 3.285401814646792, "grad_norm": 0.08292362093925476, "learning_rate": 3.970025653719789e-05, "loss": 0.242, "step": 40555 }, { "epoch": 3.2854828256642903, "grad_norm": 0.08934476226568222, "learning_rate": 3.969575588460327e-05, "loss": 0.2498, "step": 40556 }, { "epoch": 3.2855638366817885, "grad_norm": 0.0667760893702507, "learning_rate": 3.969125523200865e-05, "loss": 0.2217, "step": 40557 }, { "epoch": 3.285644847699287, "grad_norm": 0.06794371455907822, "learning_rate": 3.9686754579414014e-05, "loss": 0.2221, "step": 40558 }, { "epoch": 3.2857258587167855, "grad_norm": 0.06920351088047028, "learning_rate": 3.968225392681939e-05, "loss": 0.2646, "step": 40559 }, { "epoch": 3.2858068697342837, "grad_norm": 0.06409312784671783, "learning_rate": 3.967775327422477e-05, "loss": 0.2163, "step": 40560 }, { "epoch": 3.2858878807517824, "grad_norm": 0.10056743025779724, "learning_rate": 3.9673252621630135e-05, "loss": 0.2238, "step": 40561 }, { "epoch": 3.2859688917692806, "grad_norm": 0.06588996201753616, "learning_rate": 3.966875196903551e-05, "loss": 0.2304, "step": 40562 }, { "epoch": 3.286049902786779, "grad_norm": 0.06542570888996124, "learning_rate": 3.966425131644089e-05, "loss": 0.2056, "step": 40563 }, { "epoch": 3.2861309138042776, "grad_norm": 0.08661080151796341, "learning_rate": 3.965975066384626e-05, "loss": 0.2425, "step": 40564 }, { "epoch": 3.286211924821776, "grad_norm": 0.0732247456908226, "learning_rate": 3.965525001125163e-05, "loss": 0.2573, "step": 40565 }, { "epoch": 3.286292935839274, "grad_norm": 0.1030229851603508, "learning_rate": 3.965074935865701e-05, "loss": 0.2844, "step": 40566 }, { "epoch": 3.2863739468567728, "grad_norm": 0.07147184759378433, "learning_rate": 3.9646248706062383e-05, "loss": 0.2216, "step": 40567 }, { "epoch": 3.286454957874271, "grad_norm": 0.060462355613708496, "learning_rate": 3.964174805346775e-05, "loss": 0.208, "step": 40568 }, { "epoch": 3.2865359688917692, "grad_norm": 0.06986522674560547, "learning_rate": 3.963724740087313e-05, "loss": 0.2047, "step": 40569 }, { "epoch": 3.2866169799092675, "grad_norm": 0.070008285343647, "learning_rate": 3.9632746748278504e-05, "loss": 0.2261, "step": 40570 }, { "epoch": 3.286697990926766, "grad_norm": 0.0804152712225914, "learning_rate": 3.962824609568387e-05, "loss": 0.2305, "step": 40571 }, { "epoch": 3.2867790019442644, "grad_norm": 0.06697341054677963, "learning_rate": 3.962374544308925e-05, "loss": 0.2189, "step": 40572 }, { "epoch": 3.2868600129617627, "grad_norm": 0.06043664366006851, "learning_rate": 3.9619244790494625e-05, "loss": 0.2129, "step": 40573 }, { "epoch": 3.2869410239792614, "grad_norm": 0.07191526889801025, "learning_rate": 3.961474413789999e-05, "loss": 0.2602, "step": 40574 }, { "epoch": 3.2870220349967596, "grad_norm": 0.07072187960147858, "learning_rate": 3.961024348530537e-05, "loss": 0.2137, "step": 40575 }, { "epoch": 3.287103046014258, "grad_norm": 0.0730489045381546, "learning_rate": 3.9605742832710746e-05, "loss": 0.223, "step": 40576 }, { "epoch": 3.287184057031756, "grad_norm": 0.09229131042957306, "learning_rate": 3.960124218011611e-05, "loss": 0.2081, "step": 40577 }, { "epoch": 3.287265068049255, "grad_norm": 0.0711924135684967, "learning_rate": 3.959674152752149e-05, "loss": 0.1944, "step": 40578 }, { "epoch": 3.287346079066753, "grad_norm": 0.10902519524097443, "learning_rate": 3.959224087492687e-05, "loss": 0.2063, "step": 40579 }, { "epoch": 3.2874270900842513, "grad_norm": 0.0829775333404541, "learning_rate": 3.958774022233224e-05, "loss": 0.2497, "step": 40580 }, { "epoch": 3.28750810110175, "grad_norm": 0.06518006324768066, "learning_rate": 3.9583239569737614e-05, "loss": 0.2016, "step": 40581 }, { "epoch": 3.287589112119248, "grad_norm": 0.0657275915145874, "learning_rate": 3.957873891714299e-05, "loss": 0.2508, "step": 40582 }, { "epoch": 3.2876701231367464, "grad_norm": 0.0721287801861763, "learning_rate": 3.957423826454836e-05, "loss": 0.2437, "step": 40583 }, { "epoch": 3.287751134154245, "grad_norm": 0.057864025235176086, "learning_rate": 3.9569737611953735e-05, "loss": 0.2423, "step": 40584 }, { "epoch": 3.2878321451717434, "grad_norm": 0.08614750951528549, "learning_rate": 3.956523695935911e-05, "loss": 0.2137, "step": 40585 }, { "epoch": 3.2879131561892416, "grad_norm": 0.0680968388915062, "learning_rate": 3.956073630676448e-05, "loss": 0.2327, "step": 40586 }, { "epoch": 3.2879941672067403, "grad_norm": 0.05906018987298012, "learning_rate": 3.9556235654169855e-05, "loss": 0.2161, "step": 40587 }, { "epoch": 3.2880751782242386, "grad_norm": 0.061834465712308884, "learning_rate": 3.955173500157523e-05, "loss": 0.2365, "step": 40588 }, { "epoch": 3.288156189241737, "grad_norm": 0.07073098421096802, "learning_rate": 3.95472343489806e-05, "loss": 0.2204, "step": 40589 }, { "epoch": 3.2882372002592355, "grad_norm": 0.0943407267332077, "learning_rate": 3.9542733696385976e-05, "loss": 0.2297, "step": 40590 }, { "epoch": 3.2883182112767337, "grad_norm": 0.0708475187420845, "learning_rate": 3.953823304379135e-05, "loss": 0.1893, "step": 40591 }, { "epoch": 3.288399222294232, "grad_norm": 0.09488806873559952, "learning_rate": 3.9533732391196723e-05, "loss": 0.2665, "step": 40592 }, { "epoch": 3.2884802333117302, "grad_norm": 0.06400935351848602, "learning_rate": 3.9529231738602104e-05, "loss": 0.228, "step": 40593 }, { "epoch": 3.288561244329229, "grad_norm": 0.0658169537782669, "learning_rate": 3.952473108600747e-05, "loss": 0.1969, "step": 40594 }, { "epoch": 3.288642255346727, "grad_norm": 0.07230688631534576, "learning_rate": 3.9520230433412844e-05, "loss": 0.2115, "step": 40595 }, { "epoch": 3.2887232663642254, "grad_norm": 0.0746084600687027, "learning_rate": 3.9515729780818225e-05, "loss": 0.2529, "step": 40596 }, { "epoch": 3.288804277381724, "grad_norm": 0.0718778520822525, "learning_rate": 3.951122912822359e-05, "loss": 0.2176, "step": 40597 }, { "epoch": 3.2888852883992223, "grad_norm": 0.059904858469963074, "learning_rate": 3.9506728475628965e-05, "loss": 0.2094, "step": 40598 }, { "epoch": 3.2889662994167206, "grad_norm": 0.0675879716873169, "learning_rate": 3.9502227823034345e-05, "loss": 0.2341, "step": 40599 }, { "epoch": 3.289047310434219, "grad_norm": 0.08008304983377457, "learning_rate": 3.949772717043971e-05, "loss": 0.2378, "step": 40600 }, { "epoch": 3.2891283214517175, "grad_norm": 0.084351547062397, "learning_rate": 3.9493226517845086e-05, "loss": 0.1937, "step": 40601 }, { "epoch": 3.2892093324692158, "grad_norm": 0.06926941871643066, "learning_rate": 3.9488725865250466e-05, "loss": 0.2311, "step": 40602 }, { "epoch": 3.289290343486714, "grad_norm": 0.08134906738996506, "learning_rate": 3.948422521265583e-05, "loss": 0.2573, "step": 40603 }, { "epoch": 3.2893713545042127, "grad_norm": 0.0564156100153923, "learning_rate": 3.947972456006121e-05, "loss": 0.1998, "step": 40604 }, { "epoch": 3.289452365521711, "grad_norm": 0.06534619629383087, "learning_rate": 3.947522390746659e-05, "loss": 0.2142, "step": 40605 }, { "epoch": 3.289533376539209, "grad_norm": 0.08344676345586777, "learning_rate": 3.947072325487196e-05, "loss": 0.2201, "step": 40606 }, { "epoch": 3.289614387556708, "grad_norm": 0.08071395009756088, "learning_rate": 3.946622260227733e-05, "loss": 0.2553, "step": 40607 }, { "epoch": 3.289695398574206, "grad_norm": 0.06508363038301468, "learning_rate": 3.946172194968271e-05, "loss": 0.2413, "step": 40608 }, { "epoch": 3.2897764095917044, "grad_norm": 0.07355929911136627, "learning_rate": 3.945722129708808e-05, "loss": 0.2702, "step": 40609 }, { "epoch": 3.289857420609203, "grad_norm": 0.0816044956445694, "learning_rate": 3.945272064449345e-05, "loss": 0.2117, "step": 40610 }, { "epoch": 3.2899384316267013, "grad_norm": 0.07990920543670654, "learning_rate": 3.944821999189883e-05, "loss": 0.2459, "step": 40611 }, { "epoch": 3.2900194426441995, "grad_norm": 0.0657232403755188, "learning_rate": 3.94437193393042e-05, "loss": 0.1885, "step": 40612 }, { "epoch": 3.2901004536616982, "grad_norm": 0.06775541603565216, "learning_rate": 3.943921868670957e-05, "loss": 0.201, "step": 40613 }, { "epoch": 3.2901814646791965, "grad_norm": 0.0732579305768013, "learning_rate": 3.943471803411495e-05, "loss": 0.2058, "step": 40614 }, { "epoch": 3.2902624756966947, "grad_norm": 0.07913044095039368, "learning_rate": 3.943021738152032e-05, "loss": 0.198, "step": 40615 }, { "epoch": 3.290343486714193, "grad_norm": 0.07696747034788132, "learning_rate": 3.94257167289257e-05, "loss": 0.2087, "step": 40616 }, { "epoch": 3.2904244977316917, "grad_norm": 0.0815991535782814, "learning_rate": 3.942121607633107e-05, "loss": 0.2441, "step": 40617 }, { "epoch": 3.29050550874919, "grad_norm": 0.07421345263719559, "learning_rate": 3.9416715423736444e-05, "loss": 0.2168, "step": 40618 }, { "epoch": 3.290586519766688, "grad_norm": 0.07157240062952042, "learning_rate": 3.941221477114182e-05, "loss": 0.2335, "step": 40619 }, { "epoch": 3.290667530784187, "grad_norm": 0.08413759618997574, "learning_rate": 3.940771411854719e-05, "loss": 0.2532, "step": 40620 }, { "epoch": 3.290748541801685, "grad_norm": 0.08253608644008636, "learning_rate": 3.9403213465952565e-05, "loss": 0.2379, "step": 40621 }, { "epoch": 3.2908295528191833, "grad_norm": 0.07659203559160233, "learning_rate": 3.939871281335794e-05, "loss": 0.241, "step": 40622 }, { "epoch": 3.2909105638366816, "grad_norm": 0.06601595133543015, "learning_rate": 3.939421216076331e-05, "loss": 0.2507, "step": 40623 }, { "epoch": 3.2909915748541803, "grad_norm": 0.06105021387338638, "learning_rate": 3.9389711508168686e-05, "loss": 0.2026, "step": 40624 }, { "epoch": 3.2910725858716785, "grad_norm": 0.06365049630403519, "learning_rate": 3.938521085557406e-05, "loss": 0.2026, "step": 40625 }, { "epoch": 3.2911535968891767, "grad_norm": 0.0851701870560646, "learning_rate": 3.938071020297943e-05, "loss": 0.2263, "step": 40626 }, { "epoch": 3.2912346079066754, "grad_norm": 0.06830804795026779, "learning_rate": 3.9376209550384806e-05, "loss": 0.2102, "step": 40627 }, { "epoch": 3.2913156189241737, "grad_norm": 0.06827913969755173, "learning_rate": 3.937170889779018e-05, "loss": 0.1907, "step": 40628 }, { "epoch": 3.291396629941672, "grad_norm": 0.06598909199237823, "learning_rate": 3.9367208245195554e-05, "loss": 0.2028, "step": 40629 }, { "epoch": 3.2914776409591706, "grad_norm": 0.0733598917722702, "learning_rate": 3.936270759260093e-05, "loss": 0.2264, "step": 40630 }, { "epoch": 3.291558651976669, "grad_norm": 0.07068891823291779, "learning_rate": 3.93582069400063e-05, "loss": 0.2373, "step": 40631 }, { "epoch": 3.291639662994167, "grad_norm": 0.08565499633550644, "learning_rate": 3.935370628741168e-05, "loss": 0.2537, "step": 40632 }, { "epoch": 3.291720674011666, "grad_norm": 0.08677096664905548, "learning_rate": 3.934920563481705e-05, "loss": 0.2212, "step": 40633 }, { "epoch": 3.291801685029164, "grad_norm": 0.06935994327068329, "learning_rate": 3.934470498222242e-05, "loss": 0.2052, "step": 40634 }, { "epoch": 3.2918826960466623, "grad_norm": 0.06672733277082443, "learning_rate": 3.93402043296278e-05, "loss": 0.2098, "step": 40635 }, { "epoch": 3.291963707064161, "grad_norm": 0.09355857968330383, "learning_rate": 3.933570367703317e-05, "loss": 0.256, "step": 40636 }, { "epoch": 3.292044718081659, "grad_norm": 0.06566230952739716, "learning_rate": 3.933120302443854e-05, "loss": 0.2027, "step": 40637 }, { "epoch": 3.2921257290991575, "grad_norm": 0.08503949642181396, "learning_rate": 3.932670237184392e-05, "loss": 0.23, "step": 40638 }, { "epoch": 3.2922067401166557, "grad_norm": 0.07533713430166245, "learning_rate": 3.932220171924929e-05, "loss": 0.2462, "step": 40639 }, { "epoch": 3.2922877511341544, "grad_norm": 0.07142367959022522, "learning_rate": 3.931770106665466e-05, "loss": 0.2276, "step": 40640 }, { "epoch": 3.2923687621516526, "grad_norm": 0.07090745121240616, "learning_rate": 3.9313200414060044e-05, "loss": 0.2312, "step": 40641 }, { "epoch": 3.292449773169151, "grad_norm": 0.07475874572992325, "learning_rate": 3.930869976146541e-05, "loss": 0.2095, "step": 40642 }, { "epoch": 3.2925307841866496, "grad_norm": 0.07840246707201004, "learning_rate": 3.9304199108870784e-05, "loss": 0.2488, "step": 40643 }, { "epoch": 3.292611795204148, "grad_norm": 0.07238543778657913, "learning_rate": 3.9299698456276164e-05, "loss": 0.21, "step": 40644 }, { "epoch": 3.292692806221646, "grad_norm": 0.07699737697839737, "learning_rate": 3.929519780368154e-05, "loss": 0.2449, "step": 40645 }, { "epoch": 3.2927738172391443, "grad_norm": 0.07308366894721985, "learning_rate": 3.9290697151086905e-05, "loss": 0.2296, "step": 40646 }, { "epoch": 3.292854828256643, "grad_norm": 0.09641941636800766, "learning_rate": 3.9286196498492285e-05, "loss": 0.2019, "step": 40647 }, { "epoch": 3.2929358392741412, "grad_norm": 0.07292996346950531, "learning_rate": 3.928169584589766e-05, "loss": 0.218, "step": 40648 }, { "epoch": 3.2930168502916395, "grad_norm": 0.0700625404715538, "learning_rate": 3.927719519330303e-05, "loss": 0.2398, "step": 40649 }, { "epoch": 3.293097861309138, "grad_norm": 0.05972727760672569, "learning_rate": 3.9272694540708406e-05, "loss": 0.2302, "step": 40650 }, { "epoch": 3.2931788723266364, "grad_norm": 0.06765510886907578, "learning_rate": 3.926819388811378e-05, "loss": 0.2193, "step": 40651 }, { "epoch": 3.2932598833441347, "grad_norm": 0.09009873867034912, "learning_rate": 3.926369323551915e-05, "loss": 0.2183, "step": 40652 }, { "epoch": 3.2933408943616334, "grad_norm": 0.08619244396686554, "learning_rate": 3.925919258292453e-05, "loss": 0.213, "step": 40653 }, { "epoch": 3.2934219053791316, "grad_norm": 0.0694895014166832, "learning_rate": 3.92546919303299e-05, "loss": 0.2265, "step": 40654 }, { "epoch": 3.29350291639663, "grad_norm": 0.055945686995983124, "learning_rate": 3.9250191277735274e-05, "loss": 0.1879, "step": 40655 }, { "epoch": 3.2935839274141285, "grad_norm": 0.07787401229143143, "learning_rate": 3.924569062514065e-05, "loss": 0.2311, "step": 40656 }, { "epoch": 3.2936649384316268, "grad_norm": 0.0769646093249321, "learning_rate": 3.924118997254602e-05, "loss": 0.2234, "step": 40657 }, { "epoch": 3.293745949449125, "grad_norm": 0.06488602608442307, "learning_rate": 3.9236689319951395e-05, "loss": 0.2414, "step": 40658 }, { "epoch": 3.2938269604666237, "grad_norm": 0.07599738240242004, "learning_rate": 3.923218866735677e-05, "loss": 0.2041, "step": 40659 }, { "epoch": 3.293907971484122, "grad_norm": 0.07230847328901291, "learning_rate": 3.922768801476214e-05, "loss": 0.2485, "step": 40660 }, { "epoch": 3.29398898250162, "grad_norm": 0.06198279932141304, "learning_rate": 3.9223187362167516e-05, "loss": 0.2238, "step": 40661 }, { "epoch": 3.2940699935191184, "grad_norm": 0.07815616577863693, "learning_rate": 3.921868670957289e-05, "loss": 0.2235, "step": 40662 }, { "epoch": 3.294151004536617, "grad_norm": 0.07445298880338669, "learning_rate": 3.921418605697826e-05, "loss": 0.2592, "step": 40663 }, { "epoch": 3.2942320155541154, "grad_norm": 0.06103277578949928, "learning_rate": 3.9209685404383636e-05, "loss": 0.2375, "step": 40664 }, { "epoch": 3.2943130265716136, "grad_norm": 0.07844781875610352, "learning_rate": 3.920518475178901e-05, "loss": 0.1955, "step": 40665 }, { "epoch": 3.2943940375891123, "grad_norm": 0.0629831850528717, "learning_rate": 3.9200684099194384e-05, "loss": 0.2317, "step": 40666 }, { "epoch": 3.2944750486066106, "grad_norm": 0.07163090258836746, "learning_rate": 3.919618344659976e-05, "loss": 0.2178, "step": 40667 }, { "epoch": 3.294556059624109, "grad_norm": 0.09038316458463669, "learning_rate": 3.919168279400513e-05, "loss": 0.2388, "step": 40668 }, { "epoch": 3.294637070641607, "grad_norm": 0.0876893624663353, "learning_rate": 3.9187182141410504e-05, "loss": 0.2163, "step": 40669 }, { "epoch": 3.2947180816591057, "grad_norm": 0.05798359215259552, "learning_rate": 3.918268148881588e-05, "loss": 0.2172, "step": 40670 }, { "epoch": 3.294799092676604, "grad_norm": 0.08011886477470398, "learning_rate": 3.917818083622126e-05, "loss": 0.2366, "step": 40671 }, { "epoch": 3.2948801036941022, "grad_norm": 0.08385702222585678, "learning_rate": 3.9173680183626625e-05, "loss": 0.203, "step": 40672 }, { "epoch": 3.294961114711601, "grad_norm": 0.08129411935806274, "learning_rate": 3.9169179531032e-05, "loss": 0.2321, "step": 40673 }, { "epoch": 3.295042125729099, "grad_norm": 0.06942925602197647, "learning_rate": 3.916467887843738e-05, "loss": 0.2214, "step": 40674 }, { "epoch": 3.2951231367465974, "grad_norm": 0.07936610281467438, "learning_rate": 3.9160178225842746e-05, "loss": 0.2299, "step": 40675 }, { "epoch": 3.295204147764096, "grad_norm": 0.06970565766096115, "learning_rate": 3.915567757324812e-05, "loss": 0.2492, "step": 40676 }, { "epoch": 3.2952851587815943, "grad_norm": 0.07343757152557373, "learning_rate": 3.91511769206535e-05, "loss": 0.1815, "step": 40677 }, { "epoch": 3.2953661697990926, "grad_norm": 0.09622737020254135, "learning_rate": 3.914667626805887e-05, "loss": 0.2402, "step": 40678 }, { "epoch": 3.2954471808165913, "grad_norm": 0.08356326073408127, "learning_rate": 3.914217561546424e-05, "loss": 0.2072, "step": 40679 }, { "epoch": 3.2955281918340895, "grad_norm": 0.06472276896238327, "learning_rate": 3.913767496286962e-05, "loss": 0.2038, "step": 40680 }, { "epoch": 3.2956092028515878, "grad_norm": 0.07029642909765244, "learning_rate": 3.913317431027499e-05, "loss": 0.2132, "step": 40681 }, { "epoch": 3.295690213869086, "grad_norm": 0.07775336503982544, "learning_rate": 3.912867365768037e-05, "loss": 0.2136, "step": 40682 }, { "epoch": 3.2957712248865847, "grad_norm": 0.07341821491718292, "learning_rate": 3.912417300508574e-05, "loss": 0.235, "step": 40683 }, { "epoch": 3.295852235904083, "grad_norm": 0.06530492007732391, "learning_rate": 3.9119672352491115e-05, "loss": 0.2277, "step": 40684 }, { "epoch": 3.295933246921581, "grad_norm": 0.07276744395494461, "learning_rate": 3.911517169989649e-05, "loss": 0.2489, "step": 40685 }, { "epoch": 3.29601425793908, "grad_norm": 0.0678088366985321, "learning_rate": 3.911067104730186e-05, "loss": 0.2075, "step": 40686 }, { "epoch": 3.296095268956578, "grad_norm": 0.07170473784208298, "learning_rate": 3.9106170394707236e-05, "loss": 0.2811, "step": 40687 }, { "epoch": 3.2961762799740764, "grad_norm": 0.08175055682659149, "learning_rate": 3.910166974211261e-05, "loss": 0.2341, "step": 40688 }, { "epoch": 3.2962572909915746, "grad_norm": 0.08029843121767044, "learning_rate": 3.909716908951798e-05, "loss": 0.2256, "step": 40689 }, { "epoch": 3.2963383020090733, "grad_norm": 0.09754343330860138, "learning_rate": 3.909266843692336e-05, "loss": 0.2308, "step": 40690 }, { "epoch": 3.2964193130265715, "grad_norm": 0.06310459226369858, "learning_rate": 3.908816778432873e-05, "loss": 0.2222, "step": 40691 }, { "epoch": 3.29650032404407, "grad_norm": 0.06087411940097809, "learning_rate": 3.9083667131734104e-05, "loss": 0.2274, "step": 40692 }, { "epoch": 3.2965813350615685, "grad_norm": 0.08009161055088043, "learning_rate": 3.907916647913948e-05, "loss": 0.2568, "step": 40693 }, { "epoch": 3.2966623460790667, "grad_norm": 0.06610788404941559, "learning_rate": 3.907466582654485e-05, "loss": 0.2114, "step": 40694 }, { "epoch": 3.296743357096565, "grad_norm": 0.07393686473369598, "learning_rate": 3.9070165173950225e-05, "loss": 0.2408, "step": 40695 }, { "epoch": 3.2968243681140637, "grad_norm": 0.07974232733249664, "learning_rate": 3.90656645213556e-05, "loss": 0.2261, "step": 40696 }, { "epoch": 3.296905379131562, "grad_norm": 0.0787620022892952, "learning_rate": 3.906116386876097e-05, "loss": 0.2442, "step": 40697 }, { "epoch": 3.29698639014906, "grad_norm": 0.12104468047618866, "learning_rate": 3.9056663216166346e-05, "loss": 0.2038, "step": 40698 }, { "epoch": 3.297067401166559, "grad_norm": 0.07490499317646027, "learning_rate": 3.905216256357172e-05, "loss": 0.186, "step": 40699 }, { "epoch": 3.297148412184057, "grad_norm": 0.07450409978628159, "learning_rate": 3.904766191097709e-05, "loss": 0.1863, "step": 40700 }, { "epoch": 3.2972294232015553, "grad_norm": 0.08557897061109543, "learning_rate": 3.9043161258382467e-05, "loss": 0.2356, "step": 40701 }, { "epoch": 3.297310434219054, "grad_norm": 0.0652608573436737, "learning_rate": 3.903866060578784e-05, "loss": 0.2056, "step": 40702 }, { "epoch": 3.2973914452365523, "grad_norm": 0.07898595929145813, "learning_rate": 3.9034159953193214e-05, "loss": 0.294, "step": 40703 }, { "epoch": 3.2974724562540505, "grad_norm": 0.08902262151241302, "learning_rate": 3.902965930059859e-05, "loss": 0.1963, "step": 40704 }, { "epoch": 3.2975534672715487, "grad_norm": 0.06819375604391098, "learning_rate": 3.902515864800396e-05, "loss": 0.2214, "step": 40705 }, { "epoch": 3.2976344782890474, "grad_norm": 0.08384226262569427, "learning_rate": 3.9020657995409335e-05, "loss": 0.218, "step": 40706 }, { "epoch": 3.2977154893065457, "grad_norm": 0.06585050374269485, "learning_rate": 3.901615734281471e-05, "loss": 0.2238, "step": 40707 }, { "epoch": 3.297796500324044, "grad_norm": 0.06808405369520187, "learning_rate": 3.901165669022008e-05, "loss": 0.2746, "step": 40708 }, { "epoch": 3.2978775113415426, "grad_norm": 0.05338800698518753, "learning_rate": 3.9007156037625455e-05, "loss": 0.2007, "step": 40709 }, { "epoch": 3.297958522359041, "grad_norm": 0.07853983342647552, "learning_rate": 3.9002655385030836e-05, "loss": 0.2175, "step": 40710 }, { "epoch": 3.298039533376539, "grad_norm": 0.0728057250380516, "learning_rate": 3.89981547324362e-05, "loss": 0.2283, "step": 40711 }, { "epoch": 3.2981205443940373, "grad_norm": 0.06241932511329651, "learning_rate": 3.8993654079841576e-05, "loss": 0.2096, "step": 40712 }, { "epoch": 3.298201555411536, "grad_norm": 0.07981060445308685, "learning_rate": 3.8989153427246957e-05, "loss": 0.208, "step": 40713 }, { "epoch": 3.2982825664290343, "grad_norm": 0.0637211948633194, "learning_rate": 3.8984652774652323e-05, "loss": 0.2124, "step": 40714 }, { "epoch": 3.2983635774465325, "grad_norm": 0.058054119348526, "learning_rate": 3.89801521220577e-05, "loss": 0.2283, "step": 40715 }, { "epoch": 3.298444588464031, "grad_norm": 0.086003877222538, "learning_rate": 3.897565146946308e-05, "loss": 0.2738, "step": 40716 }, { "epoch": 3.2985255994815295, "grad_norm": 0.06300433725118637, "learning_rate": 3.8971150816868444e-05, "loss": 0.208, "step": 40717 }, { "epoch": 3.2986066104990277, "grad_norm": 0.0810168907046318, "learning_rate": 3.8966650164273825e-05, "loss": 0.2374, "step": 40718 }, { "epoch": 3.2986876215165264, "grad_norm": 0.07268819212913513, "learning_rate": 3.89621495116792e-05, "loss": 0.2419, "step": 40719 }, { "epoch": 3.2987686325340246, "grad_norm": 0.06387283653020859, "learning_rate": 3.8957648859084565e-05, "loss": 0.2113, "step": 40720 }, { "epoch": 3.298849643551523, "grad_norm": 0.08623532950878143, "learning_rate": 3.8953148206489945e-05, "loss": 0.255, "step": 40721 }, { "epoch": 3.2989306545690216, "grad_norm": 0.0770018920302391, "learning_rate": 3.894864755389532e-05, "loss": 0.2395, "step": 40722 }, { "epoch": 3.29901166558652, "grad_norm": 0.06887763738632202, "learning_rate": 3.894414690130069e-05, "loss": 0.2648, "step": 40723 }, { "epoch": 3.299092676604018, "grad_norm": 0.06291645020246506, "learning_rate": 3.8939646248706066e-05, "loss": 0.1935, "step": 40724 }, { "epoch": 3.2991736876215167, "grad_norm": 0.06266850233078003, "learning_rate": 3.893514559611144e-05, "loss": 0.2232, "step": 40725 }, { "epoch": 3.299254698639015, "grad_norm": 0.07828648388385773, "learning_rate": 3.8930644943516813e-05, "loss": 0.2509, "step": 40726 }, { "epoch": 3.2993357096565132, "grad_norm": 0.07096115499734879, "learning_rate": 3.892614429092219e-05, "loss": 0.1945, "step": 40727 }, { "epoch": 3.2994167206740115, "grad_norm": 0.05984153971076012, "learning_rate": 3.892164363832756e-05, "loss": 0.2376, "step": 40728 }, { "epoch": 3.29949773169151, "grad_norm": 0.06432328373193741, "learning_rate": 3.8917142985732934e-05, "loss": 0.2067, "step": 40729 }, { "epoch": 3.2995787427090084, "grad_norm": 0.06568169593811035, "learning_rate": 3.891264233313831e-05, "loss": 0.216, "step": 40730 }, { "epoch": 3.2996597537265067, "grad_norm": 0.08853048086166382, "learning_rate": 3.890814168054368e-05, "loss": 0.2494, "step": 40731 }, { "epoch": 3.2997407647440054, "grad_norm": 0.08453629165887833, "learning_rate": 3.8903641027949055e-05, "loss": 0.2407, "step": 40732 }, { "epoch": 3.2998217757615036, "grad_norm": 0.0698961466550827, "learning_rate": 3.889914037535443e-05, "loss": 0.2613, "step": 40733 }, { "epoch": 3.299902786779002, "grad_norm": 0.07776353508234024, "learning_rate": 3.88946397227598e-05, "loss": 0.2194, "step": 40734 }, { "epoch": 3.2999837977965, "grad_norm": 0.06141514703631401, "learning_rate": 3.8890139070165176e-05, "loss": 0.2003, "step": 40735 }, { "epoch": 3.3000648088139988, "grad_norm": 0.06544289737939835, "learning_rate": 3.888563841757055e-05, "loss": 0.2205, "step": 40736 }, { "epoch": 3.300145819831497, "grad_norm": 0.06846226751804352, "learning_rate": 3.888113776497592e-05, "loss": 0.2053, "step": 40737 }, { "epoch": 3.3002268308489953, "grad_norm": 0.06529880315065384, "learning_rate": 3.88766371123813e-05, "loss": 0.2147, "step": 40738 }, { "epoch": 3.300307841866494, "grad_norm": 0.07303272187709808, "learning_rate": 3.887213645978667e-05, "loss": 0.2141, "step": 40739 }, { "epoch": 3.300388852883992, "grad_norm": 0.0753205344080925, "learning_rate": 3.8867635807192044e-05, "loss": 0.2377, "step": 40740 }, { "epoch": 3.3004698639014904, "grad_norm": 0.06747289001941681, "learning_rate": 3.886313515459742e-05, "loss": 0.2327, "step": 40741 }, { "epoch": 3.300550874918989, "grad_norm": 0.08344721794128418, "learning_rate": 3.885863450200279e-05, "loss": 0.2464, "step": 40742 }, { "epoch": 3.3006318859364874, "grad_norm": 0.08885085582733154, "learning_rate": 3.8854133849408165e-05, "loss": 0.2519, "step": 40743 }, { "epoch": 3.3007128969539856, "grad_norm": 0.06508319079875946, "learning_rate": 3.884963319681354e-05, "loss": 0.2035, "step": 40744 }, { "epoch": 3.3007939079714843, "grad_norm": 0.052353620529174805, "learning_rate": 3.884513254421891e-05, "loss": 0.2429, "step": 40745 }, { "epoch": 3.3008749189889826, "grad_norm": 0.052554935216903687, "learning_rate": 3.8840631891624285e-05, "loss": 0.2111, "step": 40746 }, { "epoch": 3.300955930006481, "grad_norm": 0.0698976144194603, "learning_rate": 3.883613123902966e-05, "loss": 0.2467, "step": 40747 }, { "epoch": 3.3010369410239795, "grad_norm": 0.0756709948182106, "learning_rate": 3.883163058643503e-05, "loss": 0.2193, "step": 40748 }, { "epoch": 3.3011179520414777, "grad_norm": 0.08819136023521423, "learning_rate": 3.8827129933840406e-05, "loss": 0.2592, "step": 40749 }, { "epoch": 3.301198963058976, "grad_norm": 0.06856835633516312, "learning_rate": 3.882262928124578e-05, "loss": 0.2521, "step": 40750 }, { "epoch": 3.301279974076474, "grad_norm": 0.0643986389040947, "learning_rate": 3.881812862865116e-05, "loss": 0.1904, "step": 40751 }, { "epoch": 3.301360985093973, "grad_norm": 0.060446128249168396, "learning_rate": 3.8813627976056534e-05, "loss": 0.1933, "step": 40752 }, { "epoch": 3.301441996111471, "grad_norm": 0.07070574164390564, "learning_rate": 3.88091273234619e-05, "loss": 0.2103, "step": 40753 }, { "epoch": 3.3015230071289694, "grad_norm": 0.06437844783067703, "learning_rate": 3.880462667086728e-05, "loss": 0.2391, "step": 40754 }, { "epoch": 3.301604018146468, "grad_norm": 0.04855308309197426, "learning_rate": 3.8800126018272655e-05, "loss": 0.2116, "step": 40755 }, { "epoch": 3.3016850291639663, "grad_norm": 0.06874486058950424, "learning_rate": 3.879562536567802e-05, "loss": 0.2446, "step": 40756 }, { "epoch": 3.3017660401814646, "grad_norm": 0.08705892413854599, "learning_rate": 3.87911247130834e-05, "loss": 0.2385, "step": 40757 }, { "epoch": 3.301847051198963, "grad_norm": 0.0645841583609581, "learning_rate": 3.8786624060488776e-05, "loss": 0.2009, "step": 40758 }, { "epoch": 3.3019280622164615, "grad_norm": 0.07083959132432938, "learning_rate": 3.878212340789414e-05, "loss": 0.2311, "step": 40759 }, { "epoch": 3.3020090732339598, "grad_norm": 0.07376854866743088, "learning_rate": 3.877762275529952e-05, "loss": 0.1957, "step": 40760 }, { "epoch": 3.302090084251458, "grad_norm": 0.059838343411684036, "learning_rate": 3.8773122102704896e-05, "loss": 0.205, "step": 40761 }, { "epoch": 3.3021710952689567, "grad_norm": 0.05825914815068245, "learning_rate": 3.876862145011026e-05, "loss": 0.1942, "step": 40762 }, { "epoch": 3.302252106286455, "grad_norm": 0.08400315791368484, "learning_rate": 3.8764120797515644e-05, "loss": 0.2344, "step": 40763 }, { "epoch": 3.302333117303953, "grad_norm": 0.06611832231283188, "learning_rate": 3.875962014492102e-05, "loss": 0.2043, "step": 40764 }, { "epoch": 3.302414128321452, "grad_norm": 0.08515192568302155, "learning_rate": 3.875511949232639e-05, "loss": 0.2245, "step": 40765 }, { "epoch": 3.30249513933895, "grad_norm": 0.06319393962621689, "learning_rate": 3.8750618839731764e-05, "loss": 0.2129, "step": 40766 }, { "epoch": 3.3025761503564484, "grad_norm": 0.0669749304652214, "learning_rate": 3.874611818713714e-05, "loss": 0.2245, "step": 40767 }, { "epoch": 3.302657161373947, "grad_norm": 0.06981271505355835, "learning_rate": 3.874161753454251e-05, "loss": 0.2412, "step": 40768 }, { "epoch": 3.3027381723914453, "grad_norm": 0.09386993944644928, "learning_rate": 3.8737116881947885e-05, "loss": 0.2234, "step": 40769 }, { "epoch": 3.3028191834089435, "grad_norm": 0.0718587264418602, "learning_rate": 3.873261622935326e-05, "loss": 0.2105, "step": 40770 }, { "epoch": 3.3029001944264422, "grad_norm": 0.06613459438085556, "learning_rate": 3.872811557675863e-05, "loss": 0.2457, "step": 40771 }, { "epoch": 3.3029812054439405, "grad_norm": 0.05869852751493454, "learning_rate": 3.8723614924164006e-05, "loss": 0.2081, "step": 40772 }, { "epoch": 3.3030622164614387, "grad_norm": 0.07454083859920502, "learning_rate": 3.871911427156938e-05, "loss": 0.2027, "step": 40773 }, { "epoch": 3.303143227478937, "grad_norm": 0.07137610018253326, "learning_rate": 3.871461361897475e-05, "loss": 0.2281, "step": 40774 }, { "epoch": 3.3032242384964356, "grad_norm": 0.07652374356985092, "learning_rate": 3.871011296638013e-05, "loss": 0.2299, "step": 40775 }, { "epoch": 3.303305249513934, "grad_norm": 0.0735517367720604, "learning_rate": 3.87056123137855e-05, "loss": 0.2249, "step": 40776 }, { "epoch": 3.303386260531432, "grad_norm": 0.06907539069652557, "learning_rate": 3.8701111661190874e-05, "loss": 0.2501, "step": 40777 }, { "epoch": 3.303467271548931, "grad_norm": 0.0660337433218956, "learning_rate": 3.869661100859625e-05, "loss": 0.2071, "step": 40778 }, { "epoch": 3.303548282566429, "grad_norm": 0.09033715724945068, "learning_rate": 3.869211035600162e-05, "loss": 0.2094, "step": 40779 }, { "epoch": 3.3036292935839273, "grad_norm": 0.06318441033363342, "learning_rate": 3.8687609703406995e-05, "loss": 0.2159, "step": 40780 }, { "epoch": 3.3037103046014256, "grad_norm": 0.067022405564785, "learning_rate": 3.868310905081237e-05, "loss": 0.2768, "step": 40781 }, { "epoch": 3.3037913156189243, "grad_norm": 0.08411199599504471, "learning_rate": 3.867860839821774e-05, "loss": 0.2385, "step": 40782 }, { "epoch": 3.3038723266364225, "grad_norm": 0.08436904102563858, "learning_rate": 3.8674107745623116e-05, "loss": 0.2055, "step": 40783 }, { "epoch": 3.3039533376539207, "grad_norm": 0.054027408361434937, "learning_rate": 3.8669607093028496e-05, "loss": 0.2361, "step": 40784 }, { "epoch": 3.3040343486714194, "grad_norm": 0.06735050678253174, "learning_rate": 3.866510644043386e-05, "loss": 0.1889, "step": 40785 }, { "epoch": 3.3041153596889177, "grad_norm": 0.0733322873711586, "learning_rate": 3.8660605787839236e-05, "loss": 0.2393, "step": 40786 }, { "epoch": 3.304196370706416, "grad_norm": 0.06890393793582916, "learning_rate": 3.865610513524462e-05, "loss": 0.2059, "step": 40787 }, { "epoch": 3.3042773817239146, "grad_norm": 0.08290769159793854, "learning_rate": 3.8651604482649984e-05, "loss": 0.2401, "step": 40788 }, { "epoch": 3.304358392741413, "grad_norm": 0.07497076690196991, "learning_rate": 3.864710383005536e-05, "loss": 0.2217, "step": 40789 }, { "epoch": 3.304439403758911, "grad_norm": 0.06635642796754837, "learning_rate": 3.864260317746074e-05, "loss": 0.2094, "step": 40790 }, { "epoch": 3.30452041477641, "grad_norm": 0.0692463219165802, "learning_rate": 3.863810252486611e-05, "loss": 0.2341, "step": 40791 }, { "epoch": 3.304601425793908, "grad_norm": 0.08122988045215607, "learning_rate": 3.863360187227148e-05, "loss": 0.2077, "step": 40792 }, { "epoch": 3.3046824368114063, "grad_norm": 0.0768023282289505, "learning_rate": 3.862910121967686e-05, "loss": 0.2513, "step": 40793 }, { "epoch": 3.304763447828905, "grad_norm": 0.0745762512087822, "learning_rate": 3.862460056708223e-05, "loss": 0.2296, "step": 40794 }, { "epoch": 3.304844458846403, "grad_norm": 0.0761742889881134, "learning_rate": 3.86200999144876e-05, "loss": 0.2314, "step": 40795 }, { "epoch": 3.3049254698639015, "grad_norm": 0.07914459705352783, "learning_rate": 3.861559926189298e-05, "loss": 0.2257, "step": 40796 }, { "epoch": 3.3050064808813997, "grad_norm": 0.0910719782114029, "learning_rate": 3.861109860929835e-05, "loss": 0.3021, "step": 40797 }, { "epoch": 3.3050874918988984, "grad_norm": 0.07248116284608841, "learning_rate": 3.860659795670372e-05, "loss": 0.2256, "step": 40798 }, { "epoch": 3.3051685029163966, "grad_norm": 0.07484246790409088, "learning_rate": 3.86020973041091e-05, "loss": 0.2063, "step": 40799 }, { "epoch": 3.305249513933895, "grad_norm": 0.08107568323612213, "learning_rate": 3.8597596651514474e-05, "loss": 0.2075, "step": 40800 }, { "epoch": 3.3053305249513936, "grad_norm": 0.07556001842021942, "learning_rate": 3.859309599891984e-05, "loss": 0.2149, "step": 40801 }, { "epoch": 3.305411535968892, "grad_norm": 0.07405587285757065, "learning_rate": 3.858859534632522e-05, "loss": 0.2081, "step": 40802 }, { "epoch": 3.30549254698639, "grad_norm": 0.05908035486936569, "learning_rate": 3.8584094693730594e-05, "loss": 0.2221, "step": 40803 }, { "epoch": 3.3055735580038883, "grad_norm": 0.09235452115535736, "learning_rate": 3.857959404113597e-05, "loss": 0.2333, "step": 40804 }, { "epoch": 3.305654569021387, "grad_norm": 0.06490433216094971, "learning_rate": 3.857509338854134e-05, "loss": 0.191, "step": 40805 }, { "epoch": 3.3057355800388852, "grad_norm": 0.07053831964731216, "learning_rate": 3.8570592735946715e-05, "loss": 0.2508, "step": 40806 }, { "epoch": 3.3058165910563835, "grad_norm": 0.06669102609157562, "learning_rate": 3.856609208335209e-05, "loss": 0.2089, "step": 40807 }, { "epoch": 3.305897602073882, "grad_norm": 0.07495436817407608, "learning_rate": 3.856159143075746e-05, "loss": 0.212, "step": 40808 }, { "epoch": 3.3059786130913804, "grad_norm": 0.06892978399991989, "learning_rate": 3.8557090778162836e-05, "loss": 0.2598, "step": 40809 }, { "epoch": 3.3060596241088787, "grad_norm": 0.0675312802195549, "learning_rate": 3.855259012556821e-05, "loss": 0.2283, "step": 40810 }, { "epoch": 3.3061406351263773, "grad_norm": 0.07323117554187775, "learning_rate": 3.854808947297358e-05, "loss": 0.2133, "step": 40811 }, { "epoch": 3.3062216461438756, "grad_norm": 0.06845073401927948, "learning_rate": 3.854358882037896e-05, "loss": 0.2194, "step": 40812 }, { "epoch": 3.306302657161374, "grad_norm": 0.09151905030012131, "learning_rate": 3.853908816778433e-05, "loss": 0.2426, "step": 40813 }, { "epoch": 3.3063836681788725, "grad_norm": 0.08238843083381653, "learning_rate": 3.8534587515189704e-05, "loss": 0.2228, "step": 40814 }, { "epoch": 3.3064646791963708, "grad_norm": 0.06988435983657837, "learning_rate": 3.853008686259508e-05, "loss": 0.2416, "step": 40815 }, { "epoch": 3.306545690213869, "grad_norm": 0.06417524069547653, "learning_rate": 3.852558621000045e-05, "loss": 0.2228, "step": 40816 }, { "epoch": 3.3066267012313677, "grad_norm": 0.06799071282148361, "learning_rate": 3.8521085557405825e-05, "loss": 0.2113, "step": 40817 }, { "epoch": 3.306707712248866, "grad_norm": 0.07856892049312592, "learning_rate": 3.85165849048112e-05, "loss": 0.2194, "step": 40818 }, { "epoch": 3.306788723266364, "grad_norm": 0.09208235144615173, "learning_rate": 3.851208425221657e-05, "loss": 0.2542, "step": 40819 }, { "epoch": 3.3068697342838624, "grad_norm": 0.06966143846511841, "learning_rate": 3.850758359962195e-05, "loss": 0.2285, "step": 40820 }, { "epoch": 3.306950745301361, "grad_norm": 0.0819198414683342, "learning_rate": 3.850308294702732e-05, "loss": 0.2144, "step": 40821 }, { "epoch": 3.3070317563188594, "grad_norm": 0.07809050381183624, "learning_rate": 3.849858229443269e-05, "loss": 0.2435, "step": 40822 }, { "epoch": 3.3071127673363576, "grad_norm": 0.0795537456870079, "learning_rate": 3.849408164183807e-05, "loss": 0.2093, "step": 40823 }, { "epoch": 3.3071937783538563, "grad_norm": 0.08147698640823364, "learning_rate": 3.848958098924344e-05, "loss": 0.2786, "step": 40824 }, { "epoch": 3.3072747893713546, "grad_norm": 0.05749053508043289, "learning_rate": 3.8485080336648814e-05, "loss": 0.2318, "step": 40825 }, { "epoch": 3.307355800388853, "grad_norm": 0.07302997261285782, "learning_rate": 3.8480579684054194e-05, "loss": 0.2358, "step": 40826 }, { "epoch": 3.307436811406351, "grad_norm": 0.0854649692773819, "learning_rate": 3.847607903145956e-05, "loss": 0.246, "step": 40827 }, { "epoch": 3.3075178224238497, "grad_norm": 0.06140410900115967, "learning_rate": 3.8471578378864935e-05, "loss": 0.275, "step": 40828 }, { "epoch": 3.307598833441348, "grad_norm": 0.07026208937168121, "learning_rate": 3.8467077726270315e-05, "loss": 0.2614, "step": 40829 }, { "epoch": 3.307679844458846, "grad_norm": 0.05768724903464317, "learning_rate": 3.846257707367569e-05, "loss": 0.2325, "step": 40830 }, { "epoch": 3.307760855476345, "grad_norm": 0.0600363127887249, "learning_rate": 3.8458076421081055e-05, "loss": 0.1755, "step": 40831 }, { "epoch": 3.307841866493843, "grad_norm": 0.08797100186347961, "learning_rate": 3.8453575768486436e-05, "loss": 0.2295, "step": 40832 }, { "epoch": 3.3079228775113414, "grad_norm": 0.07335330545902252, "learning_rate": 3.844907511589181e-05, "loss": 0.2101, "step": 40833 }, { "epoch": 3.30800388852884, "grad_norm": 0.07781440019607544, "learning_rate": 3.8444574463297176e-05, "loss": 0.216, "step": 40834 }, { "epoch": 3.3080848995463383, "grad_norm": 0.08074433356523514, "learning_rate": 3.8440073810702557e-05, "loss": 0.2502, "step": 40835 }, { "epoch": 3.3081659105638366, "grad_norm": 0.05874902382493019, "learning_rate": 3.843557315810793e-05, "loss": 0.2076, "step": 40836 }, { "epoch": 3.3082469215813353, "grad_norm": 0.06767843663692474, "learning_rate": 3.84310725055133e-05, "loss": 0.2357, "step": 40837 }, { "epoch": 3.3083279325988335, "grad_norm": 0.07101554423570633, "learning_rate": 3.842657185291868e-05, "loss": 0.2101, "step": 40838 }, { "epoch": 3.3084089436163318, "grad_norm": 0.06890977919101715, "learning_rate": 3.842207120032405e-05, "loss": 0.2384, "step": 40839 }, { "epoch": 3.3084899546338304, "grad_norm": 0.06549681723117828, "learning_rate": 3.841757054772942e-05, "loss": 0.226, "step": 40840 }, { "epoch": 3.3085709656513287, "grad_norm": 0.06217101216316223, "learning_rate": 3.84130698951348e-05, "loss": 0.2338, "step": 40841 }, { "epoch": 3.308651976668827, "grad_norm": 0.07235632836818695, "learning_rate": 3.840856924254017e-05, "loss": 0.2122, "step": 40842 }, { "epoch": 3.308732987686325, "grad_norm": 0.06837397813796997, "learning_rate": 3.8404068589945545e-05, "loss": 0.2475, "step": 40843 }, { "epoch": 3.308813998703824, "grad_norm": 0.0652531161904335, "learning_rate": 3.839956793735092e-05, "loss": 0.2657, "step": 40844 }, { "epoch": 3.308895009721322, "grad_norm": 0.06572028994560242, "learning_rate": 3.839506728475629e-05, "loss": 0.2221, "step": 40845 }, { "epoch": 3.3089760207388204, "grad_norm": 0.07158073782920837, "learning_rate": 3.8390566632161666e-05, "loss": 0.2385, "step": 40846 }, { "epoch": 3.309057031756319, "grad_norm": 0.07187211513519287, "learning_rate": 3.838606597956704e-05, "loss": 0.2762, "step": 40847 }, { "epoch": 3.3091380427738173, "grad_norm": 0.06367867439985275, "learning_rate": 3.838156532697241e-05, "loss": 0.192, "step": 40848 }, { "epoch": 3.3092190537913155, "grad_norm": 0.0679791048169136, "learning_rate": 3.837706467437779e-05, "loss": 0.2284, "step": 40849 }, { "epoch": 3.309300064808814, "grad_norm": 0.061335284262895584, "learning_rate": 3.837256402178316e-05, "loss": 0.2441, "step": 40850 }, { "epoch": 3.3093810758263125, "grad_norm": 0.06405185163021088, "learning_rate": 3.8368063369188534e-05, "loss": 0.2051, "step": 40851 }, { "epoch": 3.3094620868438107, "grad_norm": 0.07147097587585449, "learning_rate": 3.836356271659391e-05, "loss": 0.2153, "step": 40852 }, { "epoch": 3.309543097861309, "grad_norm": 0.07569832354784012, "learning_rate": 3.835906206399928e-05, "loss": 0.2225, "step": 40853 }, { "epoch": 3.3096241088788076, "grad_norm": 0.08858956396579742, "learning_rate": 3.8354561411404655e-05, "loss": 0.2254, "step": 40854 }, { "epoch": 3.309705119896306, "grad_norm": 0.06722646206617355, "learning_rate": 3.835006075881003e-05, "loss": 0.2188, "step": 40855 }, { "epoch": 3.309786130913804, "grad_norm": 0.06068732589483261, "learning_rate": 3.834556010621541e-05, "loss": 0.234, "step": 40856 }, { "epoch": 3.309867141931303, "grad_norm": 0.0778256431221962, "learning_rate": 3.8341059453620776e-05, "loss": 0.2329, "step": 40857 }, { "epoch": 3.309948152948801, "grad_norm": 0.08266248553991318, "learning_rate": 3.833655880102615e-05, "loss": 0.2201, "step": 40858 }, { "epoch": 3.3100291639662993, "grad_norm": 0.07404722273349762, "learning_rate": 3.833205814843153e-05, "loss": 0.2013, "step": 40859 }, { "epoch": 3.310110174983798, "grad_norm": 0.08323337882757187, "learning_rate": 3.8327557495836897e-05, "loss": 0.2271, "step": 40860 }, { "epoch": 3.3101911860012962, "grad_norm": 0.059005096554756165, "learning_rate": 3.832305684324227e-05, "loss": 0.1849, "step": 40861 }, { "epoch": 3.3102721970187945, "grad_norm": 0.08371363580226898, "learning_rate": 3.831855619064765e-05, "loss": 0.2213, "step": 40862 }, { "epoch": 3.310353208036293, "grad_norm": 0.07347278296947479, "learning_rate": 3.831405553805302e-05, "loss": 0.2097, "step": 40863 }, { "epoch": 3.3104342190537914, "grad_norm": 0.07287754863500595, "learning_rate": 3.830955488545839e-05, "loss": 0.2723, "step": 40864 }, { "epoch": 3.3105152300712897, "grad_norm": 0.06831778585910797, "learning_rate": 3.830505423286377e-05, "loss": 0.2293, "step": 40865 }, { "epoch": 3.310596241088788, "grad_norm": 0.08180266618728638, "learning_rate": 3.830055358026914e-05, "loss": 0.2155, "step": 40866 }, { "epoch": 3.3106772521062866, "grad_norm": 0.0707252249121666, "learning_rate": 3.829605292767451e-05, "loss": 0.2041, "step": 40867 }, { "epoch": 3.310758263123785, "grad_norm": 0.06609929352998734, "learning_rate": 3.829155227507989e-05, "loss": 0.2128, "step": 40868 }, { "epoch": 3.310839274141283, "grad_norm": 0.08587994426488876, "learning_rate": 3.8287051622485266e-05, "loss": 0.2455, "step": 40869 }, { "epoch": 3.310920285158782, "grad_norm": 0.07224773615598679, "learning_rate": 3.828255096989063e-05, "loss": 0.1964, "step": 40870 }, { "epoch": 3.31100129617628, "grad_norm": 0.07214035838842392, "learning_rate": 3.827805031729601e-05, "loss": 0.2122, "step": 40871 }, { "epoch": 3.3110823071937783, "grad_norm": 0.06749515980482101, "learning_rate": 3.827354966470139e-05, "loss": 0.2266, "step": 40872 }, { "epoch": 3.3111633182112765, "grad_norm": 0.07175476849079132, "learning_rate": 3.8269049012106753e-05, "loss": 0.2451, "step": 40873 }, { "epoch": 3.311244329228775, "grad_norm": 0.05612906441092491, "learning_rate": 3.8264548359512134e-05, "loss": 0.2184, "step": 40874 }, { "epoch": 3.3113253402462735, "grad_norm": 0.06670593470335007, "learning_rate": 3.826004770691751e-05, "loss": 0.2125, "step": 40875 }, { "epoch": 3.3114063512637717, "grad_norm": 0.07357032597064972, "learning_rate": 3.8255547054322874e-05, "loss": 0.2017, "step": 40876 }, { "epoch": 3.3114873622812704, "grad_norm": 0.06423185765743256, "learning_rate": 3.8251046401728255e-05, "loss": 0.2121, "step": 40877 }, { "epoch": 3.3115683732987686, "grad_norm": 0.06728553026914597, "learning_rate": 3.824654574913363e-05, "loss": 0.2274, "step": 40878 }, { "epoch": 3.311649384316267, "grad_norm": 0.08222640305757523, "learning_rate": 3.8242045096538995e-05, "loss": 0.2713, "step": 40879 }, { "epoch": 3.3117303953337656, "grad_norm": 0.06782739609479904, "learning_rate": 3.8237544443944375e-05, "loss": 0.2146, "step": 40880 }, { "epoch": 3.311811406351264, "grad_norm": 0.07442223280668259, "learning_rate": 3.823304379134975e-05, "loss": 0.2592, "step": 40881 }, { "epoch": 3.311892417368762, "grad_norm": 0.07705773413181305, "learning_rate": 3.822854313875512e-05, "loss": 0.2149, "step": 40882 }, { "epoch": 3.3119734283862607, "grad_norm": 0.05849326029419899, "learning_rate": 3.8224042486160496e-05, "loss": 0.2335, "step": 40883 }, { "epoch": 3.312054439403759, "grad_norm": 0.06957482546567917, "learning_rate": 3.821954183356587e-05, "loss": 0.2482, "step": 40884 }, { "epoch": 3.3121354504212572, "grad_norm": 0.0883265808224678, "learning_rate": 3.8215041180971243e-05, "loss": 0.2015, "step": 40885 }, { "epoch": 3.3122164614387555, "grad_norm": 0.0719548687338829, "learning_rate": 3.821054052837662e-05, "loss": 0.1951, "step": 40886 }, { "epoch": 3.312297472456254, "grad_norm": 0.09112759679555893, "learning_rate": 3.820603987578199e-05, "loss": 0.2224, "step": 40887 }, { "epoch": 3.3123784834737524, "grad_norm": 0.068062424659729, "learning_rate": 3.8201539223187364e-05, "loss": 0.2267, "step": 40888 }, { "epoch": 3.3124594944912507, "grad_norm": 0.06875590234994888, "learning_rate": 3.819703857059274e-05, "loss": 0.237, "step": 40889 }, { "epoch": 3.3125405055087493, "grad_norm": 0.06804489344358444, "learning_rate": 3.819253791799811e-05, "loss": 0.179, "step": 40890 }, { "epoch": 3.3126215165262476, "grad_norm": 0.05216558277606964, "learning_rate": 3.8188037265403485e-05, "loss": 0.2078, "step": 40891 }, { "epoch": 3.312702527543746, "grad_norm": 0.08079401403665543, "learning_rate": 3.818353661280886e-05, "loss": 0.2436, "step": 40892 }, { "epoch": 3.3127835385612445, "grad_norm": 0.07252391427755356, "learning_rate": 3.817903596021423e-05, "loss": 0.1987, "step": 40893 }, { "epoch": 3.3128645495787428, "grad_norm": 0.06319592148065567, "learning_rate": 3.8174535307619606e-05, "loss": 0.2297, "step": 40894 }, { "epoch": 3.312945560596241, "grad_norm": 0.07093706727027893, "learning_rate": 3.8170034655024986e-05, "loss": 0.2044, "step": 40895 }, { "epoch": 3.3130265716137393, "grad_norm": 0.06530580669641495, "learning_rate": 3.816553400243035e-05, "loss": 0.2554, "step": 40896 }, { "epoch": 3.313107582631238, "grad_norm": 0.08208633214235306, "learning_rate": 3.816103334983573e-05, "loss": 0.2081, "step": 40897 }, { "epoch": 3.313188593648736, "grad_norm": 0.07316944003105164, "learning_rate": 3.815653269724111e-05, "loss": 0.22, "step": 40898 }, { "epoch": 3.3132696046662344, "grad_norm": 0.07189015299081802, "learning_rate": 3.8152032044646474e-05, "loss": 0.2151, "step": 40899 }, { "epoch": 3.313350615683733, "grad_norm": 0.0698755532503128, "learning_rate": 3.814753139205185e-05, "loss": 0.2235, "step": 40900 }, { "epoch": 3.3134316267012314, "grad_norm": 0.0643271952867508, "learning_rate": 3.814303073945723e-05, "loss": 0.2635, "step": 40901 }, { "epoch": 3.3135126377187296, "grad_norm": 0.08225993067026138, "learning_rate": 3.8138530086862595e-05, "loss": 0.2349, "step": 40902 }, { "epoch": 3.3135936487362283, "grad_norm": 0.07047945261001587, "learning_rate": 3.813402943426797e-05, "loss": 0.2403, "step": 40903 }, { "epoch": 3.3136746597537265, "grad_norm": 0.06076255068182945, "learning_rate": 3.812952878167335e-05, "loss": 0.2344, "step": 40904 }, { "epoch": 3.313755670771225, "grad_norm": 0.08206469565629959, "learning_rate": 3.8125028129078716e-05, "loss": 0.2581, "step": 40905 }, { "epoch": 3.3138366817887235, "grad_norm": 0.05962511897087097, "learning_rate": 3.812052747648409e-05, "loss": 0.2062, "step": 40906 }, { "epoch": 3.3139176928062217, "grad_norm": 0.08725281804800034, "learning_rate": 3.811602682388947e-05, "loss": 0.1962, "step": 40907 }, { "epoch": 3.31399870382372, "grad_norm": 0.0772843286395073, "learning_rate": 3.8111526171294836e-05, "loss": 0.2083, "step": 40908 }, { "epoch": 3.314079714841218, "grad_norm": 0.06500102579593658, "learning_rate": 3.810702551870021e-05, "loss": 0.2145, "step": 40909 }, { "epoch": 3.314160725858717, "grad_norm": 0.0693049356341362, "learning_rate": 3.810252486610559e-05, "loss": 0.2264, "step": 40910 }, { "epoch": 3.314241736876215, "grad_norm": 0.0698850080370903, "learning_rate": 3.8098024213510964e-05, "loss": 0.2062, "step": 40911 }, { "epoch": 3.3143227478937134, "grad_norm": 0.07052836567163467, "learning_rate": 3.809352356091633e-05, "loss": 0.2382, "step": 40912 }, { "epoch": 3.314403758911212, "grad_norm": 0.0574011467397213, "learning_rate": 3.808902290832171e-05, "loss": 0.21, "step": 40913 }, { "epoch": 3.3144847699287103, "grad_norm": 0.07263054698705673, "learning_rate": 3.8084522255727085e-05, "loss": 0.2556, "step": 40914 }, { "epoch": 3.3145657809462086, "grad_norm": 0.07969239354133606, "learning_rate": 3.808002160313245e-05, "loss": 0.2419, "step": 40915 }, { "epoch": 3.314646791963707, "grad_norm": 0.09571804851293564, "learning_rate": 3.807552095053783e-05, "loss": 0.2307, "step": 40916 }, { "epoch": 3.3147278029812055, "grad_norm": 0.06808076053857803, "learning_rate": 3.8071020297943206e-05, "loss": 0.2652, "step": 40917 }, { "epoch": 3.3148088139987038, "grad_norm": 0.08558756858110428, "learning_rate": 3.806651964534857e-05, "loss": 0.2409, "step": 40918 }, { "epoch": 3.314889825016202, "grad_norm": 0.0665164515376091, "learning_rate": 3.806201899275395e-05, "loss": 0.1753, "step": 40919 }, { "epoch": 3.3149708360337007, "grad_norm": 0.07719418406486511, "learning_rate": 3.8057518340159326e-05, "loss": 0.2568, "step": 40920 }, { "epoch": 3.315051847051199, "grad_norm": 0.0784536600112915, "learning_rate": 3.805301768756469e-05, "loss": 0.2139, "step": 40921 }, { "epoch": 3.315132858068697, "grad_norm": 0.07965029031038284, "learning_rate": 3.8048517034970074e-05, "loss": 0.2379, "step": 40922 }, { "epoch": 3.315213869086196, "grad_norm": 0.08881150186061859, "learning_rate": 3.804401638237545e-05, "loss": 0.2192, "step": 40923 }, { "epoch": 3.315294880103694, "grad_norm": 0.0813041478395462, "learning_rate": 3.803951572978082e-05, "loss": 0.2427, "step": 40924 }, { "epoch": 3.3153758911211924, "grad_norm": 0.06555255502462387, "learning_rate": 3.8035015077186194e-05, "loss": 0.2041, "step": 40925 }, { "epoch": 3.315456902138691, "grad_norm": 0.08366473764181137, "learning_rate": 3.803051442459157e-05, "loss": 0.2269, "step": 40926 }, { "epoch": 3.3155379131561893, "grad_norm": 0.08231698721647263, "learning_rate": 3.802601377199694e-05, "loss": 0.2259, "step": 40927 }, { "epoch": 3.3156189241736875, "grad_norm": 0.0702444314956665, "learning_rate": 3.8021513119402315e-05, "loss": 0.2219, "step": 40928 }, { "epoch": 3.315699935191186, "grad_norm": 0.07622160017490387, "learning_rate": 3.801701246680769e-05, "loss": 0.2459, "step": 40929 }, { "epoch": 3.3157809462086845, "grad_norm": 0.06236666068434715, "learning_rate": 3.801251181421306e-05, "loss": 0.2388, "step": 40930 }, { "epoch": 3.3158619572261827, "grad_norm": 0.07520124316215515, "learning_rate": 3.8008011161618436e-05, "loss": 0.2309, "step": 40931 }, { "epoch": 3.315942968243681, "grad_norm": 0.06214138865470886, "learning_rate": 3.800351050902381e-05, "loss": 0.2032, "step": 40932 }, { "epoch": 3.3160239792611796, "grad_norm": 0.08071856945753098, "learning_rate": 3.799900985642918e-05, "loss": 0.239, "step": 40933 }, { "epoch": 3.316104990278678, "grad_norm": 0.06837823987007141, "learning_rate": 3.799450920383456e-05, "loss": 0.1834, "step": 40934 }, { "epoch": 3.316186001296176, "grad_norm": 0.06929964572191238, "learning_rate": 3.799000855123993e-05, "loss": 0.2254, "step": 40935 }, { "epoch": 3.316267012313675, "grad_norm": 0.058290932327508926, "learning_rate": 3.7985507898645304e-05, "loss": 0.1865, "step": 40936 }, { "epoch": 3.316348023331173, "grad_norm": 0.06676318496465683, "learning_rate": 3.7981007246050684e-05, "loss": 0.2082, "step": 40937 }, { "epoch": 3.3164290343486713, "grad_norm": 0.0737726241350174, "learning_rate": 3.797650659345605e-05, "loss": 0.1919, "step": 40938 }, { "epoch": 3.3165100453661696, "grad_norm": 0.06238668039441109, "learning_rate": 3.7972005940861425e-05, "loss": 0.204, "step": 40939 }, { "epoch": 3.3165910563836682, "grad_norm": 0.07773415744304657, "learning_rate": 3.7967505288266805e-05, "loss": 0.2373, "step": 40940 }, { "epoch": 3.3166720674011665, "grad_norm": 0.06476813554763794, "learning_rate": 3.796300463567217e-05, "loss": 0.2138, "step": 40941 }, { "epoch": 3.3167530784186647, "grad_norm": 0.06933821737766266, "learning_rate": 3.7958503983077546e-05, "loss": 0.2367, "step": 40942 }, { "epoch": 3.3168340894361634, "grad_norm": 0.0789467841386795, "learning_rate": 3.7954003330482926e-05, "loss": 0.234, "step": 40943 }, { "epoch": 3.3169151004536617, "grad_norm": 0.0722360908985138, "learning_rate": 3.794950267788829e-05, "loss": 0.2598, "step": 40944 }, { "epoch": 3.31699611147116, "grad_norm": 0.07831466943025589, "learning_rate": 3.7945002025293666e-05, "loss": 0.2247, "step": 40945 }, { "epoch": 3.3170771224886586, "grad_norm": 0.07463128119707108, "learning_rate": 3.794050137269905e-05, "loss": 0.2139, "step": 40946 }, { "epoch": 3.317158133506157, "grad_norm": 0.060892872512340546, "learning_rate": 3.7936000720104414e-05, "loss": 0.2258, "step": 40947 }, { "epoch": 3.317239144523655, "grad_norm": 0.07231955975294113, "learning_rate": 3.793150006750979e-05, "loss": 0.237, "step": 40948 }, { "epoch": 3.317320155541154, "grad_norm": 0.07078664749860764, "learning_rate": 3.792699941491517e-05, "loss": 0.2176, "step": 40949 }, { "epoch": 3.317401166558652, "grad_norm": 0.07049831002950668, "learning_rate": 3.792249876232054e-05, "loss": 0.2014, "step": 40950 }, { "epoch": 3.3174821775761503, "grad_norm": 0.06473879516124725, "learning_rate": 3.791799810972591e-05, "loss": 0.2045, "step": 40951 }, { "epoch": 3.317563188593649, "grad_norm": 0.07700739055871964, "learning_rate": 3.791349745713129e-05, "loss": 0.2151, "step": 40952 }, { "epoch": 3.317644199611147, "grad_norm": 0.06077568233013153, "learning_rate": 3.790899680453666e-05, "loss": 0.2349, "step": 40953 }, { "epoch": 3.3177252106286454, "grad_norm": 0.07367191463708878, "learning_rate": 3.790449615194203e-05, "loss": 0.1973, "step": 40954 }, { "epoch": 3.3178062216461437, "grad_norm": 0.08809377253055573, "learning_rate": 3.789999549934741e-05, "loss": 0.2156, "step": 40955 }, { "epoch": 3.3178872326636424, "grad_norm": 0.08473682403564453, "learning_rate": 3.789549484675278e-05, "loss": 0.242, "step": 40956 }, { "epoch": 3.3179682436811406, "grad_norm": 0.06702679395675659, "learning_rate": 3.789099419415815e-05, "loss": 0.2388, "step": 40957 }, { "epoch": 3.318049254698639, "grad_norm": 0.07082941383123398, "learning_rate": 3.788649354156353e-05, "loss": 0.2484, "step": 40958 }, { "epoch": 3.3181302657161376, "grad_norm": 0.06634097546339035, "learning_rate": 3.7881992888968904e-05, "loss": 0.2554, "step": 40959 }, { "epoch": 3.318211276733636, "grad_norm": 0.08151954412460327, "learning_rate": 3.787749223637427e-05, "loss": 0.2588, "step": 40960 }, { "epoch": 3.318292287751134, "grad_norm": 0.06060457602143288, "learning_rate": 3.787299158377965e-05, "loss": 0.2046, "step": 40961 }, { "epoch": 3.3183732987686323, "grad_norm": 0.07713788747787476, "learning_rate": 3.7868490931185024e-05, "loss": 0.2384, "step": 40962 }, { "epoch": 3.318454309786131, "grad_norm": 0.05473591759800911, "learning_rate": 3.78639902785904e-05, "loss": 0.1844, "step": 40963 }, { "epoch": 3.3185353208036292, "grad_norm": 0.06427813321352005, "learning_rate": 3.785948962599577e-05, "loss": 0.1996, "step": 40964 }, { "epoch": 3.3186163318211275, "grad_norm": 0.05841223523020744, "learning_rate": 3.7854988973401145e-05, "loss": 0.2162, "step": 40965 }, { "epoch": 3.318697342838626, "grad_norm": 0.07398484647274017, "learning_rate": 3.785048832080652e-05, "loss": 0.2155, "step": 40966 }, { "epoch": 3.3187783538561244, "grad_norm": 0.07369926571846008, "learning_rate": 3.784598766821189e-05, "loss": 0.2326, "step": 40967 }, { "epoch": 3.3188593648736227, "grad_norm": 0.05897458642721176, "learning_rate": 3.7841487015617266e-05, "loss": 0.1878, "step": 40968 }, { "epoch": 3.3189403758911213, "grad_norm": 0.08481781929731369, "learning_rate": 3.783698636302264e-05, "loss": 0.2356, "step": 40969 }, { "epoch": 3.3190213869086196, "grad_norm": 0.07468573749065399, "learning_rate": 3.783248571042801e-05, "loss": 0.2138, "step": 40970 }, { "epoch": 3.319102397926118, "grad_norm": 0.07561396807432175, "learning_rate": 3.782798505783339e-05, "loss": 0.2199, "step": 40971 }, { "epoch": 3.3191834089436165, "grad_norm": 0.08415020257234573, "learning_rate": 3.782348440523876e-05, "loss": 0.2468, "step": 40972 }, { "epoch": 3.3192644199611148, "grad_norm": 0.06797681003808975, "learning_rate": 3.7818983752644134e-05, "loss": 0.2558, "step": 40973 }, { "epoch": 3.319345430978613, "grad_norm": 0.06277844309806824, "learning_rate": 3.781448310004951e-05, "loss": 0.2838, "step": 40974 }, { "epoch": 3.3194264419961117, "grad_norm": 0.06896187365055084, "learning_rate": 3.780998244745488e-05, "loss": 0.2197, "step": 40975 }, { "epoch": 3.31950745301361, "grad_norm": 0.06196269765496254, "learning_rate": 3.780548179486026e-05, "loss": 0.24, "step": 40976 }, { "epoch": 3.319588464031108, "grad_norm": 0.056846845895051956, "learning_rate": 3.780098114226563e-05, "loss": 0.2185, "step": 40977 }, { "epoch": 3.3196694750486064, "grad_norm": 0.05916735529899597, "learning_rate": 3.7796480489671e-05, "loss": 0.2281, "step": 40978 }, { "epoch": 3.319750486066105, "grad_norm": 0.08651582896709442, "learning_rate": 3.779197983707638e-05, "loss": 0.2281, "step": 40979 }, { "epoch": 3.3198314970836034, "grad_norm": 0.07499776780605316, "learning_rate": 3.778747918448175e-05, "loss": 0.2055, "step": 40980 }, { "epoch": 3.3199125081011016, "grad_norm": 0.08028426021337509, "learning_rate": 3.778297853188712e-05, "loss": 0.2084, "step": 40981 }, { "epoch": 3.3199935191186003, "grad_norm": 0.06728916615247726, "learning_rate": 3.77784778792925e-05, "loss": 0.2382, "step": 40982 }, { "epoch": 3.3200745301360985, "grad_norm": 0.07473582774400711, "learning_rate": 3.777397722669787e-05, "loss": 0.2539, "step": 40983 }, { "epoch": 3.320155541153597, "grad_norm": 0.06037028506398201, "learning_rate": 3.7769476574103244e-05, "loss": 0.2172, "step": 40984 }, { "epoch": 3.320236552171095, "grad_norm": 0.06607451289892197, "learning_rate": 3.7764975921508624e-05, "loss": 0.2263, "step": 40985 }, { "epoch": 3.3203175631885937, "grad_norm": 0.08410777896642685, "learning_rate": 3.776047526891399e-05, "loss": 0.2462, "step": 40986 }, { "epoch": 3.320398574206092, "grad_norm": 0.06139102205634117, "learning_rate": 3.7755974616319365e-05, "loss": 0.2257, "step": 40987 }, { "epoch": 3.32047958522359, "grad_norm": 0.06923295557498932, "learning_rate": 3.7751473963724745e-05, "loss": 0.2722, "step": 40988 }, { "epoch": 3.320560596241089, "grad_norm": 0.06262877583503723, "learning_rate": 3.774697331113012e-05, "loss": 0.2082, "step": 40989 }, { "epoch": 3.320641607258587, "grad_norm": 0.06936154514551163, "learning_rate": 3.7742472658535485e-05, "loss": 0.2164, "step": 40990 }, { "epoch": 3.3207226182760854, "grad_norm": 0.06605419516563416, "learning_rate": 3.7737972005940866e-05, "loss": 0.2194, "step": 40991 }, { "epoch": 3.320803629293584, "grad_norm": 0.07005484402179718, "learning_rate": 3.773347135334624e-05, "loss": 0.2101, "step": 40992 }, { "epoch": 3.3208846403110823, "grad_norm": 0.07076304405927658, "learning_rate": 3.7728970700751606e-05, "loss": 0.2131, "step": 40993 }, { "epoch": 3.3209656513285806, "grad_norm": 0.08401350677013397, "learning_rate": 3.7724470048156987e-05, "loss": 0.2526, "step": 40994 }, { "epoch": 3.3210466623460793, "grad_norm": 0.07341130822896957, "learning_rate": 3.771996939556236e-05, "loss": 0.2047, "step": 40995 }, { "epoch": 3.3211276733635775, "grad_norm": 0.061423882842063904, "learning_rate": 3.771546874296773e-05, "loss": 0.1962, "step": 40996 }, { "epoch": 3.3212086843810757, "grad_norm": 0.06588398665189743, "learning_rate": 3.771096809037311e-05, "loss": 0.1883, "step": 40997 }, { "epoch": 3.3212896953985744, "grad_norm": 0.07813892513513565, "learning_rate": 3.770646743777848e-05, "loss": 0.2181, "step": 40998 }, { "epoch": 3.3213707064160727, "grad_norm": 0.08592596650123596, "learning_rate": 3.770196678518385e-05, "loss": 0.2369, "step": 40999 }, { "epoch": 3.321451717433571, "grad_norm": 0.06253305077552795, "learning_rate": 3.769746613258923e-05, "loss": 0.2143, "step": 41000 }, { "epoch": 3.321532728451069, "grad_norm": 0.07199438661336899, "learning_rate": 3.76929654799946e-05, "loss": 0.191, "step": 41001 }, { "epoch": 3.321613739468568, "grad_norm": 0.08037972450256348, "learning_rate": 3.7688464827399975e-05, "loss": 0.2358, "step": 41002 }, { "epoch": 3.321694750486066, "grad_norm": 0.07553397119045258, "learning_rate": 3.768396417480535e-05, "loss": 0.2193, "step": 41003 }, { "epoch": 3.3217757615035644, "grad_norm": 0.07229546457529068, "learning_rate": 3.767946352221072e-05, "loss": 0.2347, "step": 41004 }, { "epoch": 3.321856772521063, "grad_norm": 0.06054199859499931, "learning_rate": 3.7674962869616096e-05, "loss": 0.2467, "step": 41005 }, { "epoch": 3.3219377835385613, "grad_norm": 0.0800226703286171, "learning_rate": 3.767046221702147e-05, "loss": 0.2404, "step": 41006 }, { "epoch": 3.3220187945560595, "grad_norm": 0.06743893027305603, "learning_rate": 3.7665961564426843e-05, "loss": 0.2017, "step": 41007 }, { "epoch": 3.3220998055735578, "grad_norm": 0.07700297236442566, "learning_rate": 3.766146091183222e-05, "loss": 0.2492, "step": 41008 }, { "epoch": 3.3221808165910565, "grad_norm": 0.07854170352220535, "learning_rate": 3.765696025923759e-05, "loss": 0.219, "step": 41009 }, { "epoch": 3.3222618276085547, "grad_norm": 0.07884832471609116, "learning_rate": 3.7652459606642964e-05, "loss": 0.2217, "step": 41010 }, { "epoch": 3.322342838626053, "grad_norm": 0.07336324453353882, "learning_rate": 3.764795895404834e-05, "loss": 0.2226, "step": 41011 }, { "epoch": 3.3224238496435516, "grad_norm": 0.065107062458992, "learning_rate": 3.764345830145371e-05, "loss": 0.1805, "step": 41012 }, { "epoch": 3.32250486066105, "grad_norm": 0.06360862404108047, "learning_rate": 3.7638957648859085e-05, "loss": 0.2039, "step": 41013 }, { "epoch": 3.322585871678548, "grad_norm": 0.08214043825864792, "learning_rate": 3.763445699626446e-05, "loss": 0.2392, "step": 41014 }, { "epoch": 3.322666882696047, "grad_norm": 0.07553614675998688, "learning_rate": 3.762995634366984e-05, "loss": 0.2276, "step": 41015 }, { "epoch": 3.322747893713545, "grad_norm": 0.06392810493707657, "learning_rate": 3.7625455691075206e-05, "loss": 0.2248, "step": 41016 }, { "epoch": 3.3228289047310433, "grad_norm": 0.06898233294487, "learning_rate": 3.762095503848058e-05, "loss": 0.239, "step": 41017 }, { "epoch": 3.322909915748542, "grad_norm": 0.07211174070835114, "learning_rate": 3.761645438588596e-05, "loss": 0.1974, "step": 41018 }, { "epoch": 3.3229909267660402, "grad_norm": 0.07649188488721848, "learning_rate": 3.761195373329133e-05, "loss": 0.2391, "step": 41019 }, { "epoch": 3.3230719377835385, "grad_norm": 0.06290451437234879, "learning_rate": 3.76074530806967e-05, "loss": 0.2278, "step": 41020 }, { "epoch": 3.323152948801037, "grad_norm": 0.07498381286859512, "learning_rate": 3.760295242810208e-05, "loss": 0.2493, "step": 41021 }, { "epoch": 3.3232339598185354, "grad_norm": 0.06386851519346237, "learning_rate": 3.759845177550745e-05, "loss": 0.2204, "step": 41022 }, { "epoch": 3.3233149708360337, "grad_norm": 0.08008598536252975, "learning_rate": 3.759395112291282e-05, "loss": 0.2542, "step": 41023 }, { "epoch": 3.323395981853532, "grad_norm": 0.0818173736333847, "learning_rate": 3.75894504703182e-05, "loss": 0.2217, "step": 41024 }, { "epoch": 3.3234769928710306, "grad_norm": 0.07507924735546112, "learning_rate": 3.758494981772357e-05, "loss": 0.2582, "step": 41025 }, { "epoch": 3.323558003888529, "grad_norm": 0.08730128407478333, "learning_rate": 3.758044916512894e-05, "loss": 0.2248, "step": 41026 }, { "epoch": 3.323639014906027, "grad_norm": 0.07072755694389343, "learning_rate": 3.757594851253432e-05, "loss": 0.2268, "step": 41027 }, { "epoch": 3.323720025923526, "grad_norm": 0.08612517267465591, "learning_rate": 3.7571447859939696e-05, "loss": 0.2493, "step": 41028 }, { "epoch": 3.323801036941024, "grad_norm": 0.09255276620388031, "learning_rate": 3.756694720734506e-05, "loss": 0.2155, "step": 41029 }, { "epoch": 3.3238820479585223, "grad_norm": 0.07072529196739197, "learning_rate": 3.756244655475044e-05, "loss": 0.2385, "step": 41030 }, { "epoch": 3.3239630589760205, "grad_norm": 0.05725023150444031, "learning_rate": 3.755794590215582e-05, "loss": 0.217, "step": 41031 }, { "epoch": 3.324044069993519, "grad_norm": 0.07195749878883362, "learning_rate": 3.7553445249561183e-05, "loss": 0.2235, "step": 41032 }, { "epoch": 3.3241250810110174, "grad_norm": 0.08406466245651245, "learning_rate": 3.7548944596966564e-05, "loss": 0.2072, "step": 41033 }, { "epoch": 3.3242060920285157, "grad_norm": 0.07305304706096649, "learning_rate": 3.754444394437194e-05, "loss": 0.2314, "step": 41034 }, { "epoch": 3.3242871030460144, "grad_norm": 0.07687455415725708, "learning_rate": 3.7539943291777304e-05, "loss": 0.2263, "step": 41035 }, { "epoch": 3.3243681140635126, "grad_norm": 0.06038808450102806, "learning_rate": 3.7535442639182685e-05, "loss": 0.1994, "step": 41036 }, { "epoch": 3.324449125081011, "grad_norm": 0.07261287420988083, "learning_rate": 3.753094198658806e-05, "loss": 0.2009, "step": 41037 }, { "epoch": 3.3245301360985096, "grad_norm": 0.07470063120126724, "learning_rate": 3.7526441333993425e-05, "loss": 0.233, "step": 41038 }, { "epoch": 3.324611147116008, "grad_norm": 0.15417388081550598, "learning_rate": 3.7521940681398805e-05, "loss": 0.2311, "step": 41039 }, { "epoch": 3.324692158133506, "grad_norm": 0.07478626072406769, "learning_rate": 3.751744002880418e-05, "loss": 0.2175, "step": 41040 }, { "epoch": 3.3247731691510047, "grad_norm": 0.06912209838628769, "learning_rate": 3.751293937620955e-05, "loss": 0.2272, "step": 41041 }, { "epoch": 3.324854180168503, "grad_norm": 0.06295725703239441, "learning_rate": 3.7508438723614926e-05, "loss": 0.2332, "step": 41042 }, { "epoch": 3.3249351911860012, "grad_norm": 0.0667472630739212, "learning_rate": 3.75039380710203e-05, "loss": 0.2394, "step": 41043 }, { "epoch": 3.3250162022035, "grad_norm": 0.06752104312181473, "learning_rate": 3.7499437418425673e-05, "loss": 0.2683, "step": 41044 }, { "epoch": 3.325097213220998, "grad_norm": 0.06008462980389595, "learning_rate": 3.749493676583105e-05, "loss": 0.2224, "step": 41045 }, { "epoch": 3.3251782242384964, "grad_norm": 0.061989929527044296, "learning_rate": 3.749043611323642e-05, "loss": 0.2097, "step": 41046 }, { "epoch": 3.3252592352559946, "grad_norm": 0.0873345360159874, "learning_rate": 3.7485935460641794e-05, "loss": 0.2514, "step": 41047 }, { "epoch": 3.3253402462734933, "grad_norm": 0.059997983276844025, "learning_rate": 3.748143480804717e-05, "loss": 0.2436, "step": 41048 }, { "epoch": 3.3254212572909916, "grad_norm": 0.08212314546108246, "learning_rate": 3.747693415545254e-05, "loss": 0.2218, "step": 41049 }, { "epoch": 3.32550226830849, "grad_norm": 0.0879613533616066, "learning_rate": 3.7472433502857915e-05, "loss": 0.2378, "step": 41050 }, { "epoch": 3.3255832793259885, "grad_norm": 0.06860115379095078, "learning_rate": 3.746793285026329e-05, "loss": 0.2199, "step": 41051 }, { "epoch": 3.3256642903434868, "grad_norm": 0.10232620686292648, "learning_rate": 3.746343219766866e-05, "loss": 0.2238, "step": 41052 }, { "epoch": 3.325745301360985, "grad_norm": 0.06840762495994568, "learning_rate": 3.7458931545074036e-05, "loss": 0.2312, "step": 41053 }, { "epoch": 3.3258263123784833, "grad_norm": 0.07458966225385666, "learning_rate": 3.7454430892479416e-05, "loss": 0.2178, "step": 41054 }, { "epoch": 3.325907323395982, "grad_norm": 0.07784730195999146, "learning_rate": 3.744993023988478e-05, "loss": 0.2165, "step": 41055 }, { "epoch": 3.32598833441348, "grad_norm": 0.07316484302282333, "learning_rate": 3.744542958729016e-05, "loss": 0.206, "step": 41056 }, { "epoch": 3.3260693454309784, "grad_norm": 0.06419821083545685, "learning_rate": 3.744092893469554e-05, "loss": 0.2106, "step": 41057 }, { "epoch": 3.326150356448477, "grad_norm": 0.07934359461069107, "learning_rate": 3.7436428282100904e-05, "loss": 0.2298, "step": 41058 }, { "epoch": 3.3262313674659754, "grad_norm": 0.07370511442422867, "learning_rate": 3.743192762950628e-05, "loss": 0.2486, "step": 41059 }, { "epoch": 3.3263123784834736, "grad_norm": 0.0726943090558052, "learning_rate": 3.742742697691166e-05, "loss": 0.1943, "step": 41060 }, { "epoch": 3.3263933895009723, "grad_norm": 0.0706993043422699, "learning_rate": 3.7422926324317025e-05, "loss": 0.2054, "step": 41061 }, { "epoch": 3.3264744005184705, "grad_norm": 0.08410005271434784, "learning_rate": 3.74184256717224e-05, "loss": 0.225, "step": 41062 }, { "epoch": 3.326555411535969, "grad_norm": 0.06017423793673515, "learning_rate": 3.741392501912778e-05, "loss": 0.2526, "step": 41063 }, { "epoch": 3.3266364225534675, "grad_norm": 0.07366826385259628, "learning_rate": 3.7409424366533146e-05, "loss": 0.2335, "step": 41064 }, { "epoch": 3.3267174335709657, "grad_norm": 0.06251514703035355, "learning_rate": 3.740492371393852e-05, "loss": 0.2121, "step": 41065 }, { "epoch": 3.326798444588464, "grad_norm": 0.10737292468547821, "learning_rate": 3.74004230613439e-05, "loss": 0.2723, "step": 41066 }, { "epoch": 3.3268794556059627, "grad_norm": 0.07564511150121689, "learning_rate": 3.7395922408749266e-05, "loss": 0.2048, "step": 41067 }, { "epoch": 3.326960466623461, "grad_norm": 0.06839878857135773, "learning_rate": 3.739142175615464e-05, "loss": 0.1951, "step": 41068 }, { "epoch": 3.327041477640959, "grad_norm": 0.06925508379936218, "learning_rate": 3.738692110356002e-05, "loss": 0.2168, "step": 41069 }, { "epoch": 3.3271224886584574, "grad_norm": 0.08146601915359497, "learning_rate": 3.7382420450965394e-05, "loss": 0.2619, "step": 41070 }, { "epoch": 3.327203499675956, "grad_norm": 0.08915166556835175, "learning_rate": 3.737791979837076e-05, "loss": 0.2065, "step": 41071 }, { "epoch": 3.3272845106934543, "grad_norm": 0.0576382540166378, "learning_rate": 3.737341914577614e-05, "loss": 0.1842, "step": 41072 }, { "epoch": 3.3273655217109526, "grad_norm": 0.07133428007364273, "learning_rate": 3.7368918493181515e-05, "loss": 0.2147, "step": 41073 }, { "epoch": 3.3274465327284513, "grad_norm": 0.07418784499168396, "learning_rate": 3.736441784058689e-05, "loss": 0.2181, "step": 41074 }, { "epoch": 3.3275275437459495, "grad_norm": 0.08585530519485474, "learning_rate": 3.735991718799226e-05, "loss": 0.2321, "step": 41075 }, { "epoch": 3.3276085547634477, "grad_norm": 0.08041617274284363, "learning_rate": 3.7355416535397636e-05, "loss": 0.2512, "step": 41076 }, { "epoch": 3.327689565780946, "grad_norm": 0.0663224533200264, "learning_rate": 3.735091588280301e-05, "loss": 0.2015, "step": 41077 }, { "epoch": 3.3277705767984447, "grad_norm": 0.05717449635267258, "learning_rate": 3.734641523020838e-05, "loss": 0.1653, "step": 41078 }, { "epoch": 3.327851587815943, "grad_norm": 0.071390800178051, "learning_rate": 3.7341914577613756e-05, "loss": 0.2119, "step": 41079 }, { "epoch": 3.327932598833441, "grad_norm": 0.060663558542728424, "learning_rate": 3.733741392501913e-05, "loss": 0.2232, "step": 41080 }, { "epoch": 3.32801360985094, "grad_norm": 0.05562737211585045, "learning_rate": 3.7332913272424504e-05, "loss": 0.2543, "step": 41081 }, { "epoch": 3.328094620868438, "grad_norm": 0.07524958997964859, "learning_rate": 3.732841261982988e-05, "loss": 0.2296, "step": 41082 }, { "epoch": 3.3281756318859363, "grad_norm": 0.09169891476631165, "learning_rate": 3.732391196723525e-05, "loss": 0.2707, "step": 41083 }, { "epoch": 3.328256642903435, "grad_norm": 0.07389675825834274, "learning_rate": 3.7319411314640624e-05, "loss": 0.2375, "step": 41084 }, { "epoch": 3.3283376539209333, "grad_norm": 0.08331091701984406, "learning_rate": 3.7314910662046e-05, "loss": 0.2336, "step": 41085 }, { "epoch": 3.3284186649384315, "grad_norm": 0.07018539309501648, "learning_rate": 3.731041000945137e-05, "loss": 0.1999, "step": 41086 }, { "epoch": 3.32849967595593, "grad_norm": 0.06626079976558685, "learning_rate": 3.7305909356856745e-05, "loss": 0.2342, "step": 41087 }, { "epoch": 3.3285806869734285, "grad_norm": 0.07911474257707596, "learning_rate": 3.730140870426212e-05, "loss": 0.2656, "step": 41088 }, { "epoch": 3.3286616979909267, "grad_norm": 0.07544355094432831, "learning_rate": 3.729690805166749e-05, "loss": 0.2379, "step": 41089 }, { "epoch": 3.3287427090084254, "grad_norm": 0.07290852069854736, "learning_rate": 3.7292407399072866e-05, "loss": 0.1906, "step": 41090 }, { "epoch": 3.3288237200259236, "grad_norm": 0.07008974999189377, "learning_rate": 3.728790674647824e-05, "loss": 0.2076, "step": 41091 }, { "epoch": 3.328904731043422, "grad_norm": 0.07178022712469101, "learning_rate": 3.728340609388361e-05, "loss": 0.2148, "step": 41092 }, { "epoch": 3.32898574206092, "grad_norm": 0.06797996908426285, "learning_rate": 3.727890544128899e-05, "loss": 0.2439, "step": 41093 }, { "epoch": 3.329066753078419, "grad_norm": 0.06787247955799103, "learning_rate": 3.727440478869436e-05, "loss": 0.2219, "step": 41094 }, { "epoch": 3.329147764095917, "grad_norm": 0.08076586574316025, "learning_rate": 3.7269904136099734e-05, "loss": 0.2366, "step": 41095 }, { "epoch": 3.3292287751134153, "grad_norm": 0.0767500177025795, "learning_rate": 3.7265403483505114e-05, "loss": 0.2551, "step": 41096 }, { "epoch": 3.329309786130914, "grad_norm": 0.09102772176265717, "learning_rate": 3.726090283091048e-05, "loss": 0.251, "step": 41097 }, { "epoch": 3.3293907971484122, "grad_norm": 0.09674298018217087, "learning_rate": 3.7256402178315855e-05, "loss": 0.27, "step": 41098 }, { "epoch": 3.3294718081659105, "grad_norm": 0.06335968524217606, "learning_rate": 3.7251901525721235e-05, "loss": 0.2358, "step": 41099 }, { "epoch": 3.3295528191834087, "grad_norm": 0.07425528764724731, "learning_rate": 3.72474008731266e-05, "loss": 0.229, "step": 41100 }, { "epoch": 3.3296338302009074, "grad_norm": 0.07559535652399063, "learning_rate": 3.7242900220531976e-05, "loss": 0.3009, "step": 41101 }, { "epoch": 3.3297148412184057, "grad_norm": 0.06772898137569427, "learning_rate": 3.7238399567937356e-05, "loss": 0.2082, "step": 41102 }, { "epoch": 3.329795852235904, "grad_norm": 0.07053738087415695, "learning_rate": 3.723389891534272e-05, "loss": 0.2121, "step": 41103 }, { "epoch": 3.3298768632534026, "grad_norm": 0.09082667529582977, "learning_rate": 3.7229398262748096e-05, "loss": 0.2083, "step": 41104 }, { "epoch": 3.329957874270901, "grad_norm": 0.0763431042432785, "learning_rate": 3.722489761015348e-05, "loss": 0.2415, "step": 41105 }, { "epoch": 3.330038885288399, "grad_norm": 0.08213107287883759, "learning_rate": 3.7220396957558844e-05, "loss": 0.2301, "step": 41106 }, { "epoch": 3.3301198963058978, "grad_norm": 0.06864434480667114, "learning_rate": 3.721589630496422e-05, "loss": 0.2031, "step": 41107 }, { "epoch": 3.330200907323396, "grad_norm": 0.06650760769844055, "learning_rate": 3.72113956523696e-05, "loss": 0.2658, "step": 41108 }, { "epoch": 3.3302819183408943, "grad_norm": 0.06854722648859024, "learning_rate": 3.720689499977497e-05, "loss": 0.2103, "step": 41109 }, { "epoch": 3.330362929358393, "grad_norm": 0.06579779833555222, "learning_rate": 3.7202394347180345e-05, "loss": 0.2229, "step": 41110 }, { "epoch": 3.330443940375891, "grad_norm": 0.08219362795352936, "learning_rate": 3.719789369458572e-05, "loss": 0.2209, "step": 41111 }, { "epoch": 3.3305249513933894, "grad_norm": 0.07643623650074005, "learning_rate": 3.719339304199109e-05, "loss": 0.2432, "step": 41112 }, { "epoch": 3.3306059624108877, "grad_norm": 0.07173693925142288, "learning_rate": 3.7188892389396466e-05, "loss": 0.2432, "step": 41113 }, { "epoch": 3.3306869734283864, "grad_norm": 0.07207030057907104, "learning_rate": 3.718439173680184e-05, "loss": 0.2274, "step": 41114 }, { "epoch": 3.3307679844458846, "grad_norm": 0.06946907937526703, "learning_rate": 3.717989108420721e-05, "loss": 0.2349, "step": 41115 }, { "epoch": 3.330848995463383, "grad_norm": 0.06256712228059769, "learning_rate": 3.7175390431612586e-05, "loss": 0.1854, "step": 41116 }, { "epoch": 3.3309300064808816, "grad_norm": 0.06501569598913193, "learning_rate": 3.717088977901796e-05, "loss": 0.2151, "step": 41117 }, { "epoch": 3.33101101749838, "grad_norm": 0.07814828306436539, "learning_rate": 3.7166389126423334e-05, "loss": 0.2147, "step": 41118 }, { "epoch": 3.331092028515878, "grad_norm": 0.07279353588819504, "learning_rate": 3.716188847382871e-05, "loss": 0.2258, "step": 41119 }, { "epoch": 3.3311730395333763, "grad_norm": 0.07200466841459274, "learning_rate": 3.715738782123408e-05, "loss": 0.2295, "step": 41120 }, { "epoch": 3.331254050550875, "grad_norm": 0.08193326741456985, "learning_rate": 3.7152887168639455e-05, "loss": 0.2487, "step": 41121 }, { "epoch": 3.3313350615683732, "grad_norm": 0.0763644427061081, "learning_rate": 3.714838651604483e-05, "loss": 0.2199, "step": 41122 }, { "epoch": 3.3314160725858715, "grad_norm": 0.0799073725938797, "learning_rate": 3.71438858634502e-05, "loss": 0.2549, "step": 41123 }, { "epoch": 3.33149708360337, "grad_norm": 0.08127576112747192, "learning_rate": 3.7139385210855575e-05, "loss": 0.225, "step": 41124 }, { "epoch": 3.3315780946208684, "grad_norm": 0.06715318560600281, "learning_rate": 3.713488455826095e-05, "loss": 0.2203, "step": 41125 }, { "epoch": 3.3316591056383666, "grad_norm": 0.06862284243106842, "learning_rate": 3.713038390566632e-05, "loss": 0.1744, "step": 41126 }, { "epoch": 3.3317401166558653, "grad_norm": 0.07042937725782394, "learning_rate": 3.7125883253071696e-05, "loss": 0.222, "step": 41127 }, { "epoch": 3.3318211276733636, "grad_norm": 0.07680152356624603, "learning_rate": 3.712138260047707e-05, "loss": 0.2143, "step": 41128 }, { "epoch": 3.331902138690862, "grad_norm": 0.06801113486289978, "learning_rate": 3.711688194788244e-05, "loss": 0.2006, "step": 41129 }, { "epoch": 3.3319831497083605, "grad_norm": 0.07561274617910385, "learning_rate": 3.711238129528782e-05, "loss": 0.2103, "step": 41130 }, { "epoch": 3.3320641607258588, "grad_norm": 0.08124644309282303, "learning_rate": 3.710788064269319e-05, "loss": 0.2161, "step": 41131 }, { "epoch": 3.332145171743357, "grad_norm": 0.06947558373212814, "learning_rate": 3.7103379990098564e-05, "loss": 0.2009, "step": 41132 }, { "epoch": 3.3322261827608557, "grad_norm": 0.06849096715450287, "learning_rate": 3.709887933750394e-05, "loss": 0.2314, "step": 41133 }, { "epoch": 3.332307193778354, "grad_norm": 0.08039727807044983, "learning_rate": 3.709437868490931e-05, "loss": 0.2216, "step": 41134 }, { "epoch": 3.332388204795852, "grad_norm": 0.06369374692440033, "learning_rate": 3.708987803231469e-05, "loss": 0.2547, "step": 41135 }, { "epoch": 3.3324692158133504, "grad_norm": 0.0763283222913742, "learning_rate": 3.708537737972006e-05, "loss": 0.2719, "step": 41136 }, { "epoch": 3.332550226830849, "grad_norm": 0.0758407711982727, "learning_rate": 3.708087672712543e-05, "loss": 0.2192, "step": 41137 }, { "epoch": 3.3326312378483474, "grad_norm": 0.0780859962105751, "learning_rate": 3.707637607453081e-05, "loss": 0.2424, "step": 41138 }, { "epoch": 3.3327122488658456, "grad_norm": 0.06562550365924835, "learning_rate": 3.707187542193618e-05, "loss": 0.2204, "step": 41139 }, { "epoch": 3.3327932598833443, "grad_norm": 0.07291289418935776, "learning_rate": 3.706737476934155e-05, "loss": 0.1928, "step": 41140 }, { "epoch": 3.3328742709008425, "grad_norm": 0.07883249223232269, "learning_rate": 3.706287411674693e-05, "loss": 0.2355, "step": 41141 }, { "epoch": 3.332955281918341, "grad_norm": 0.07805757224559784, "learning_rate": 3.70583734641523e-05, "loss": 0.1924, "step": 41142 }, { "epoch": 3.333036292935839, "grad_norm": 0.0664428249001503, "learning_rate": 3.705387281155768e-05, "loss": 0.2141, "step": 41143 }, { "epoch": 3.3331173039533377, "grad_norm": 0.07167979329824448, "learning_rate": 3.7049372158963054e-05, "loss": 0.2545, "step": 41144 }, { "epoch": 3.333198314970836, "grad_norm": 0.06236296892166138, "learning_rate": 3.704487150636842e-05, "loss": 0.1999, "step": 41145 }, { "epoch": 3.333279325988334, "grad_norm": 0.054489415138959885, "learning_rate": 3.70403708537738e-05, "loss": 0.2022, "step": 41146 }, { "epoch": 3.333360337005833, "grad_norm": 0.07027623802423477, "learning_rate": 3.7035870201179175e-05, "loss": 0.2133, "step": 41147 }, { "epoch": 3.333441348023331, "grad_norm": 0.06870032846927643, "learning_rate": 3.703136954858455e-05, "loss": 0.2308, "step": 41148 }, { "epoch": 3.3335223590408294, "grad_norm": 0.06924640387296677, "learning_rate": 3.702686889598992e-05, "loss": 0.1967, "step": 41149 }, { "epoch": 3.333603370058328, "grad_norm": 0.07043696194887161, "learning_rate": 3.7022368243395296e-05, "loss": 0.2301, "step": 41150 }, { "epoch": 3.3336843810758263, "grad_norm": 0.07276766002178192, "learning_rate": 3.701786759080067e-05, "loss": 0.2369, "step": 41151 }, { "epoch": 3.3337653920933246, "grad_norm": 0.09626363217830658, "learning_rate": 3.701336693820604e-05, "loss": 0.2637, "step": 41152 }, { "epoch": 3.3338464031108233, "grad_norm": 0.09370481967926025, "learning_rate": 3.7008866285611417e-05, "loss": 0.2779, "step": 41153 }, { "epoch": 3.3339274141283215, "grad_norm": 0.09347948431968689, "learning_rate": 3.700436563301679e-05, "loss": 0.2279, "step": 41154 }, { "epoch": 3.3340084251458197, "grad_norm": 0.0766562893986702, "learning_rate": 3.6999864980422164e-05, "loss": 0.2301, "step": 41155 }, { "epoch": 3.3340894361633184, "grad_norm": 0.08153241127729416, "learning_rate": 3.699536432782754e-05, "loss": 0.223, "step": 41156 }, { "epoch": 3.3341704471808167, "grad_norm": 0.08658970892429352, "learning_rate": 3.699086367523291e-05, "loss": 0.2521, "step": 41157 }, { "epoch": 3.334251458198315, "grad_norm": 0.07518699765205383, "learning_rate": 3.6986363022638285e-05, "loss": 0.2222, "step": 41158 }, { "epoch": 3.334332469215813, "grad_norm": 0.07649372518062592, "learning_rate": 3.698186237004366e-05, "loss": 0.2444, "step": 41159 }, { "epoch": 3.334413480233312, "grad_norm": 0.07010925561189651, "learning_rate": 3.697736171744903e-05, "loss": 0.2491, "step": 41160 }, { "epoch": 3.33449449125081, "grad_norm": 0.06980877369642258, "learning_rate": 3.6972861064854405e-05, "loss": 0.2384, "step": 41161 }, { "epoch": 3.3345755022683083, "grad_norm": 0.06655539572238922, "learning_rate": 3.696836041225978e-05, "loss": 0.1995, "step": 41162 }, { "epoch": 3.334656513285807, "grad_norm": 0.09244280308485031, "learning_rate": 3.696385975966515e-05, "loss": 0.3013, "step": 41163 }, { "epoch": 3.3347375243033053, "grad_norm": 0.08281079679727554, "learning_rate": 3.6959359107070526e-05, "loss": 0.2368, "step": 41164 }, { "epoch": 3.3348185353208035, "grad_norm": 0.07828114181756973, "learning_rate": 3.69548584544759e-05, "loss": 0.2452, "step": 41165 }, { "epoch": 3.3348995463383018, "grad_norm": 0.06749124079942703, "learning_rate": 3.6950357801881273e-05, "loss": 0.1937, "step": 41166 }, { "epoch": 3.3349805573558005, "grad_norm": 0.0752006322145462, "learning_rate": 3.694585714928665e-05, "loss": 0.2321, "step": 41167 }, { "epoch": 3.3350615683732987, "grad_norm": 0.08764395862817764, "learning_rate": 3.694135649669202e-05, "loss": 0.2135, "step": 41168 }, { "epoch": 3.335142579390797, "grad_norm": 0.07645867764949799, "learning_rate": 3.6936855844097394e-05, "loss": 0.2145, "step": 41169 }, { "epoch": 3.3352235904082956, "grad_norm": 0.06811700761318207, "learning_rate": 3.693235519150277e-05, "loss": 0.2022, "step": 41170 }, { "epoch": 3.335304601425794, "grad_norm": 0.06362304091453552, "learning_rate": 3.692785453890814e-05, "loss": 0.2218, "step": 41171 }, { "epoch": 3.335385612443292, "grad_norm": 0.05618695542216301, "learning_rate": 3.6923353886313515e-05, "loss": 0.1979, "step": 41172 }, { "epoch": 3.335466623460791, "grad_norm": 0.06092815101146698, "learning_rate": 3.691885323371889e-05, "loss": 0.2119, "step": 41173 }, { "epoch": 3.335547634478289, "grad_norm": 0.06255250424146652, "learning_rate": 3.691435258112427e-05, "loss": 0.2096, "step": 41174 }, { "epoch": 3.3356286454957873, "grad_norm": 0.06754370778799057, "learning_rate": 3.6909851928529636e-05, "loss": 0.2277, "step": 41175 }, { "epoch": 3.335709656513286, "grad_norm": 0.0938117578625679, "learning_rate": 3.690535127593501e-05, "loss": 0.272, "step": 41176 }, { "epoch": 3.3357906675307842, "grad_norm": 0.08052606135606766, "learning_rate": 3.690085062334039e-05, "loss": 0.1952, "step": 41177 }, { "epoch": 3.3358716785482825, "grad_norm": 0.06511618942022324, "learning_rate": 3.689634997074576e-05, "loss": 0.231, "step": 41178 }, { "epoch": 3.335952689565781, "grad_norm": 0.05862768739461899, "learning_rate": 3.689184931815114e-05, "loss": 0.1946, "step": 41179 }, { "epoch": 3.3360337005832794, "grad_norm": 0.06720541417598724, "learning_rate": 3.688734866555651e-05, "loss": 0.2034, "step": 41180 }, { "epoch": 3.3361147116007777, "grad_norm": 0.0854513868689537, "learning_rate": 3.688284801296188e-05, "loss": 0.2326, "step": 41181 }, { "epoch": 3.336195722618276, "grad_norm": 0.08422550559043884, "learning_rate": 3.687834736036726e-05, "loss": 0.236, "step": 41182 }, { "epoch": 3.3362767336357746, "grad_norm": 0.06728208065032959, "learning_rate": 3.687384670777263e-05, "loss": 0.2049, "step": 41183 }, { "epoch": 3.336357744653273, "grad_norm": 0.06178736686706543, "learning_rate": 3.6869346055178e-05, "loss": 0.2077, "step": 41184 }, { "epoch": 3.336438755670771, "grad_norm": 0.06015453487634659, "learning_rate": 3.686484540258338e-05, "loss": 0.2575, "step": 41185 }, { "epoch": 3.3365197666882698, "grad_norm": 0.07950470596551895, "learning_rate": 3.686034474998875e-05, "loss": 0.2076, "step": 41186 }, { "epoch": 3.336600777705768, "grad_norm": 0.08321350067853928, "learning_rate": 3.6855844097394126e-05, "loss": 0.2115, "step": 41187 }, { "epoch": 3.3366817887232663, "grad_norm": 0.07731655240058899, "learning_rate": 3.68513434447995e-05, "loss": 0.212, "step": 41188 }, { "epoch": 3.3367627997407645, "grad_norm": 0.08558639883995056, "learning_rate": 3.684684279220487e-05, "loss": 0.254, "step": 41189 }, { "epoch": 3.336843810758263, "grad_norm": 0.07424626499414444, "learning_rate": 3.684234213961025e-05, "loss": 0.2648, "step": 41190 }, { "epoch": 3.3369248217757614, "grad_norm": 0.06944981217384338, "learning_rate": 3.683784148701562e-05, "loss": 0.2381, "step": 41191 }, { "epoch": 3.3370058327932597, "grad_norm": 0.07112687081098557, "learning_rate": 3.6833340834420994e-05, "loss": 0.2338, "step": 41192 }, { "epoch": 3.3370868438107584, "grad_norm": 0.07397808879613876, "learning_rate": 3.682884018182637e-05, "loss": 0.2292, "step": 41193 }, { "epoch": 3.3371678548282566, "grad_norm": 0.07461774349212646, "learning_rate": 3.682433952923174e-05, "loss": 0.2209, "step": 41194 }, { "epoch": 3.337248865845755, "grad_norm": 0.06668192148208618, "learning_rate": 3.6819838876637115e-05, "loss": 0.2313, "step": 41195 }, { "epoch": 3.3373298768632536, "grad_norm": 0.08236929029226303, "learning_rate": 3.681533822404249e-05, "loss": 0.211, "step": 41196 }, { "epoch": 3.337410887880752, "grad_norm": 0.08950665593147278, "learning_rate": 3.681083757144786e-05, "loss": 0.2696, "step": 41197 }, { "epoch": 3.33749189889825, "grad_norm": 0.06646319478750229, "learning_rate": 3.6806336918853236e-05, "loss": 0.2307, "step": 41198 }, { "epoch": 3.3375729099157487, "grad_norm": 0.0690191239118576, "learning_rate": 3.680183626625861e-05, "loss": 0.2302, "step": 41199 }, { "epoch": 3.337653920933247, "grad_norm": 0.06904525309801102, "learning_rate": 3.679733561366398e-05, "loss": 0.1905, "step": 41200 }, { "epoch": 3.337734931950745, "grad_norm": 0.06719229370355606, "learning_rate": 3.6792834961069356e-05, "loss": 0.2102, "step": 41201 }, { "epoch": 3.337815942968244, "grad_norm": 0.07356458902359009, "learning_rate": 3.678833430847473e-05, "loss": 0.1997, "step": 41202 }, { "epoch": 3.337896953985742, "grad_norm": 0.06217246130108833, "learning_rate": 3.6783833655880104e-05, "loss": 0.2282, "step": 41203 }, { "epoch": 3.3379779650032404, "grad_norm": 0.06710822880268097, "learning_rate": 3.677933300328548e-05, "loss": 0.2238, "step": 41204 }, { "epoch": 3.3380589760207386, "grad_norm": 0.07744985073804855, "learning_rate": 3.677483235069085e-05, "loss": 0.2082, "step": 41205 }, { "epoch": 3.3381399870382373, "grad_norm": 0.07299521565437317, "learning_rate": 3.6770331698096224e-05, "loss": 0.2265, "step": 41206 }, { "epoch": 3.3382209980557356, "grad_norm": 0.06934568285942078, "learning_rate": 3.67658310455016e-05, "loss": 0.2083, "step": 41207 }, { "epoch": 3.338302009073234, "grad_norm": 0.06951799243688583, "learning_rate": 3.676133039290697e-05, "loss": 0.2486, "step": 41208 }, { "epoch": 3.3383830200907325, "grad_norm": 0.08006034046411514, "learning_rate": 3.6756829740312345e-05, "loss": 0.2156, "step": 41209 }, { "epoch": 3.3384640311082308, "grad_norm": 0.07656034082174301, "learning_rate": 3.675232908771772e-05, "loss": 0.2395, "step": 41210 }, { "epoch": 3.338545042125729, "grad_norm": 0.08563342690467834, "learning_rate": 3.674782843512309e-05, "loss": 0.2559, "step": 41211 }, { "epoch": 3.3386260531432272, "grad_norm": 0.09620586782693863, "learning_rate": 3.674332778252847e-05, "loss": 0.245, "step": 41212 }, { "epoch": 3.338707064160726, "grad_norm": 0.05890323594212532, "learning_rate": 3.6738827129933846e-05, "loss": 0.2105, "step": 41213 }, { "epoch": 3.338788075178224, "grad_norm": 0.07511017471551895, "learning_rate": 3.673432647733921e-05, "loss": 0.2239, "step": 41214 }, { "epoch": 3.3388690861957224, "grad_norm": 0.05438505485653877, "learning_rate": 3.6729825824744594e-05, "loss": 0.2074, "step": 41215 }, { "epoch": 3.338950097213221, "grad_norm": 0.05938766524195671, "learning_rate": 3.672532517214997e-05, "loss": 0.2266, "step": 41216 }, { "epoch": 3.3390311082307194, "grad_norm": 0.06936460733413696, "learning_rate": 3.6720824519555334e-05, "loss": 0.2513, "step": 41217 }, { "epoch": 3.3391121192482176, "grad_norm": 0.06176231801509857, "learning_rate": 3.6716323866960714e-05, "loss": 0.2257, "step": 41218 }, { "epoch": 3.3391931302657163, "grad_norm": 0.0678444430232048, "learning_rate": 3.671182321436609e-05, "loss": 0.2048, "step": 41219 }, { "epoch": 3.3392741412832145, "grad_norm": 0.07848968356847763, "learning_rate": 3.6707322561771455e-05, "loss": 0.2374, "step": 41220 }, { "epoch": 3.339355152300713, "grad_norm": 0.08253312110900879, "learning_rate": 3.6702821909176835e-05, "loss": 0.2313, "step": 41221 }, { "epoch": 3.3394361633182115, "grad_norm": 0.09484583139419556, "learning_rate": 3.669832125658221e-05, "loss": 0.274, "step": 41222 }, { "epoch": 3.3395171743357097, "grad_norm": 0.06523586064577103, "learning_rate": 3.6693820603987576e-05, "loss": 0.2287, "step": 41223 }, { "epoch": 3.339598185353208, "grad_norm": 0.05800995975732803, "learning_rate": 3.6689319951392956e-05, "loss": 0.2129, "step": 41224 }, { "epoch": 3.3396791963707066, "grad_norm": 0.07219946384429932, "learning_rate": 3.668481929879833e-05, "loss": 0.2564, "step": 41225 }, { "epoch": 3.339760207388205, "grad_norm": 0.06518752127885818, "learning_rate": 3.66803186462037e-05, "loss": 0.2316, "step": 41226 }, { "epoch": 3.339841218405703, "grad_norm": 0.06252483278512955, "learning_rate": 3.667581799360908e-05, "loss": 0.2457, "step": 41227 }, { "epoch": 3.3399222294232014, "grad_norm": 0.07003217935562134, "learning_rate": 3.667131734101445e-05, "loss": 0.2106, "step": 41228 }, { "epoch": 3.3400032404407, "grad_norm": 0.07570035755634308, "learning_rate": 3.6666816688419824e-05, "loss": 0.2339, "step": 41229 }, { "epoch": 3.3400842514581983, "grad_norm": 0.08694077283143997, "learning_rate": 3.66623160358252e-05, "loss": 0.206, "step": 41230 }, { "epoch": 3.3401652624756966, "grad_norm": 0.059254713356494904, "learning_rate": 3.665781538323057e-05, "loss": 0.2416, "step": 41231 }, { "epoch": 3.3402462734931953, "grad_norm": 0.06904411315917969, "learning_rate": 3.6653314730635945e-05, "loss": 0.2027, "step": 41232 }, { "epoch": 3.3403272845106935, "grad_norm": 0.05732307955622673, "learning_rate": 3.664881407804132e-05, "loss": 0.2343, "step": 41233 }, { "epoch": 3.3404082955281917, "grad_norm": 0.07104742527008057, "learning_rate": 3.664431342544669e-05, "loss": 0.247, "step": 41234 }, { "epoch": 3.34048930654569, "grad_norm": 0.0745600238442421, "learning_rate": 3.6639812772852066e-05, "loss": 0.2089, "step": 41235 }, { "epoch": 3.3405703175631887, "grad_norm": 0.07792531698942184, "learning_rate": 3.663531212025744e-05, "loss": 0.2823, "step": 41236 }, { "epoch": 3.340651328580687, "grad_norm": 0.06312679499387741, "learning_rate": 3.663081146766281e-05, "loss": 0.2054, "step": 41237 }, { "epoch": 3.340732339598185, "grad_norm": 0.08045652508735657, "learning_rate": 3.6626310815068186e-05, "loss": 0.2163, "step": 41238 }, { "epoch": 3.340813350615684, "grad_norm": 0.08712105453014374, "learning_rate": 3.662181016247356e-05, "loss": 0.28, "step": 41239 }, { "epoch": 3.340894361633182, "grad_norm": 0.06204592064023018, "learning_rate": 3.6617309509878934e-05, "loss": 0.2088, "step": 41240 }, { "epoch": 3.3409753726506803, "grad_norm": 0.06538501381874084, "learning_rate": 3.661280885728431e-05, "loss": 0.2466, "step": 41241 }, { "epoch": 3.341056383668179, "grad_norm": 0.07625439763069153, "learning_rate": 3.660830820468968e-05, "loss": 0.248, "step": 41242 }, { "epoch": 3.3411373946856773, "grad_norm": 0.06579206883907318, "learning_rate": 3.6603807552095054e-05, "loss": 0.1921, "step": 41243 }, { "epoch": 3.3412184057031755, "grad_norm": 0.07133506238460541, "learning_rate": 3.659930689950043e-05, "loss": 0.2243, "step": 41244 }, { "epoch": 3.341299416720674, "grad_norm": 0.07682822644710541, "learning_rate": 3.659480624690581e-05, "loss": 0.2282, "step": 41245 }, { "epoch": 3.3413804277381725, "grad_norm": 0.07200987637042999, "learning_rate": 3.6590305594311175e-05, "loss": 0.2188, "step": 41246 }, { "epoch": 3.3414614387556707, "grad_norm": 0.06509634107351303, "learning_rate": 3.658580494171655e-05, "loss": 0.2635, "step": 41247 }, { "epoch": 3.3415424497731694, "grad_norm": 0.09091950207948685, "learning_rate": 3.658130428912193e-05, "loss": 0.2183, "step": 41248 }, { "epoch": 3.3416234607906676, "grad_norm": 0.06340814381837845, "learning_rate": 3.6576803636527296e-05, "loss": 0.2341, "step": 41249 }, { "epoch": 3.341704471808166, "grad_norm": 0.0767722800374031, "learning_rate": 3.657230298393267e-05, "loss": 0.2223, "step": 41250 }, { "epoch": 3.341785482825664, "grad_norm": 0.07288037985563278, "learning_rate": 3.656780233133805e-05, "loss": 0.2096, "step": 41251 }, { "epoch": 3.341866493843163, "grad_norm": 0.0707254484295845, "learning_rate": 3.656330167874342e-05, "loss": 0.213, "step": 41252 }, { "epoch": 3.341947504860661, "grad_norm": 0.07349371910095215, "learning_rate": 3.655880102614879e-05, "loss": 0.21, "step": 41253 }, { "epoch": 3.3420285158781593, "grad_norm": 0.07877951860427856, "learning_rate": 3.655430037355417e-05, "loss": 0.256, "step": 41254 }, { "epoch": 3.342109526895658, "grad_norm": 0.07902669906616211, "learning_rate": 3.6549799720959544e-05, "loss": 0.2357, "step": 41255 }, { "epoch": 3.3421905379131562, "grad_norm": 0.06486064195632935, "learning_rate": 3.654529906836491e-05, "loss": 0.2164, "step": 41256 }, { "epoch": 3.3422715489306545, "grad_norm": 0.07269563525915146, "learning_rate": 3.654079841577029e-05, "loss": 0.2057, "step": 41257 }, { "epoch": 3.3423525599481527, "grad_norm": 0.06843418627977371, "learning_rate": 3.6536297763175665e-05, "loss": 0.2091, "step": 41258 }, { "epoch": 3.3424335709656514, "grad_norm": 0.06407822668552399, "learning_rate": 3.653179711058103e-05, "loss": 0.1805, "step": 41259 }, { "epoch": 3.3425145819831497, "grad_norm": 0.06240003928542137, "learning_rate": 3.652729645798641e-05, "loss": 0.2194, "step": 41260 }, { "epoch": 3.342595593000648, "grad_norm": 0.07912351191043854, "learning_rate": 3.6522795805391786e-05, "loss": 0.2346, "step": 41261 }, { "epoch": 3.3426766040181466, "grad_norm": 0.08149971812963486, "learning_rate": 3.651829515279715e-05, "loss": 0.2143, "step": 41262 }, { "epoch": 3.342757615035645, "grad_norm": 0.0701875165104866, "learning_rate": 3.651379450020253e-05, "loss": 0.2038, "step": 41263 }, { "epoch": 3.342838626053143, "grad_norm": 0.07117495685815811, "learning_rate": 3.650929384760791e-05, "loss": 0.2272, "step": 41264 }, { "epoch": 3.3429196370706418, "grad_norm": 0.09499608725309372, "learning_rate": 3.6504793195013274e-05, "loss": 0.2179, "step": 41265 }, { "epoch": 3.34300064808814, "grad_norm": 0.08293692022562027, "learning_rate": 3.6500292542418654e-05, "loss": 0.2042, "step": 41266 }, { "epoch": 3.3430816591056383, "grad_norm": 0.08190654963254929, "learning_rate": 3.649579188982403e-05, "loss": 0.1921, "step": 41267 }, { "epoch": 3.343162670123137, "grad_norm": 0.07378261536359787, "learning_rate": 3.64912912372294e-05, "loss": 0.2202, "step": 41268 }, { "epoch": 3.343243681140635, "grad_norm": 0.0729857087135315, "learning_rate": 3.6486790584634775e-05, "loss": 0.2402, "step": 41269 }, { "epoch": 3.3433246921581334, "grad_norm": 0.07511337101459503, "learning_rate": 3.648228993204015e-05, "loss": 0.2423, "step": 41270 }, { "epoch": 3.343405703175632, "grad_norm": 0.058995719999074936, "learning_rate": 3.647778927944552e-05, "loss": 0.2525, "step": 41271 }, { "epoch": 3.3434867141931304, "grad_norm": 0.06570830196142197, "learning_rate": 3.6473288626850896e-05, "loss": 0.2253, "step": 41272 }, { "epoch": 3.3435677252106286, "grad_norm": 0.07327885925769806, "learning_rate": 3.646878797425627e-05, "loss": 0.245, "step": 41273 }, { "epoch": 3.343648736228127, "grad_norm": 0.08053727447986603, "learning_rate": 3.646428732166164e-05, "loss": 0.2137, "step": 41274 }, { "epoch": 3.3437297472456255, "grad_norm": 0.07759438455104828, "learning_rate": 3.6459786669067017e-05, "loss": 0.2264, "step": 41275 }, { "epoch": 3.343810758263124, "grad_norm": 0.05452917516231537, "learning_rate": 3.645528601647239e-05, "loss": 0.2342, "step": 41276 }, { "epoch": 3.343891769280622, "grad_norm": 0.0786796435713768, "learning_rate": 3.6450785363877764e-05, "loss": 0.2138, "step": 41277 }, { "epoch": 3.3439727802981207, "grad_norm": 0.08116442710161209, "learning_rate": 3.644628471128314e-05, "loss": 0.2576, "step": 41278 }, { "epoch": 3.344053791315619, "grad_norm": 0.07618381828069687, "learning_rate": 3.644178405868851e-05, "loss": 0.2346, "step": 41279 }, { "epoch": 3.344134802333117, "grad_norm": 0.08092188835144043, "learning_rate": 3.6437283406093885e-05, "loss": 0.2129, "step": 41280 }, { "epoch": 3.3442158133506155, "grad_norm": 0.07491854578256607, "learning_rate": 3.6432782753499265e-05, "loss": 0.1987, "step": 41281 }, { "epoch": 3.344296824368114, "grad_norm": 0.08945416659116745, "learning_rate": 3.642828210090463e-05, "loss": 0.2392, "step": 41282 }, { "epoch": 3.3443778353856124, "grad_norm": 0.07511164247989655, "learning_rate": 3.6423781448310005e-05, "loss": 0.2361, "step": 41283 }, { "epoch": 3.3444588464031106, "grad_norm": 0.07991006970405579, "learning_rate": 3.6419280795715386e-05, "loss": 0.2121, "step": 41284 }, { "epoch": 3.3445398574206093, "grad_norm": 0.06376335024833679, "learning_rate": 3.641478014312075e-05, "loss": 0.2125, "step": 41285 }, { "epoch": 3.3446208684381076, "grad_norm": 0.08875862509012222, "learning_rate": 3.6410279490526126e-05, "loss": 0.2539, "step": 41286 }, { "epoch": 3.344701879455606, "grad_norm": 0.0659111961722374, "learning_rate": 3.6405778837931507e-05, "loss": 0.2217, "step": 41287 }, { "epoch": 3.3447828904731045, "grad_norm": 0.0760120376944542, "learning_rate": 3.640127818533687e-05, "loss": 0.2192, "step": 41288 }, { "epoch": 3.3448639014906028, "grad_norm": 0.08299257606267929, "learning_rate": 3.639677753274225e-05, "loss": 0.2269, "step": 41289 }, { "epoch": 3.344944912508101, "grad_norm": 0.07124077528715134, "learning_rate": 3.639227688014763e-05, "loss": 0.2344, "step": 41290 }, { "epoch": 3.3450259235255997, "grad_norm": 0.08746058493852615, "learning_rate": 3.6387776227552994e-05, "loss": 0.222, "step": 41291 }, { "epoch": 3.345106934543098, "grad_norm": 0.07460647821426392, "learning_rate": 3.638327557495837e-05, "loss": 0.2359, "step": 41292 }, { "epoch": 3.345187945560596, "grad_norm": 0.08213378489017487, "learning_rate": 3.637877492236375e-05, "loss": 0.206, "step": 41293 }, { "epoch": 3.345268956578095, "grad_norm": 0.06644278764724731, "learning_rate": 3.637427426976912e-05, "loss": 0.2118, "step": 41294 }, { "epoch": 3.345349967595593, "grad_norm": 0.08180107176303864, "learning_rate": 3.636977361717449e-05, "loss": 0.2258, "step": 41295 }, { "epoch": 3.3454309786130914, "grad_norm": 0.06995628029108047, "learning_rate": 3.636527296457987e-05, "loss": 0.2511, "step": 41296 }, { "epoch": 3.3455119896305896, "grad_norm": 0.08584023267030716, "learning_rate": 3.636077231198524e-05, "loss": 0.2358, "step": 41297 }, { "epoch": 3.3455930006480883, "grad_norm": 0.07145940512418747, "learning_rate": 3.635627165939061e-05, "loss": 0.2136, "step": 41298 }, { "epoch": 3.3456740116655865, "grad_norm": 0.07166958600282669, "learning_rate": 3.635177100679599e-05, "loss": 0.226, "step": 41299 }, { "epoch": 3.345755022683085, "grad_norm": 0.07758349180221558, "learning_rate": 3.634727035420136e-05, "loss": 0.2007, "step": 41300 }, { "epoch": 3.3458360337005835, "grad_norm": 0.0829416960477829, "learning_rate": 3.634276970160673e-05, "loss": 0.2428, "step": 41301 }, { "epoch": 3.3459170447180817, "grad_norm": 0.07670668512582779, "learning_rate": 3.633826904901211e-05, "loss": 0.208, "step": 41302 }, { "epoch": 3.34599805573558, "grad_norm": 0.06386036425828934, "learning_rate": 3.6333768396417484e-05, "loss": 0.2278, "step": 41303 }, { "epoch": 3.346079066753078, "grad_norm": 0.05563236027956009, "learning_rate": 3.632926774382285e-05, "loss": 0.2044, "step": 41304 }, { "epoch": 3.346160077770577, "grad_norm": 0.07629813998937607, "learning_rate": 3.632476709122823e-05, "loss": 0.2544, "step": 41305 }, { "epoch": 3.346241088788075, "grad_norm": 0.08975273370742798, "learning_rate": 3.6320266438633605e-05, "loss": 0.1986, "step": 41306 }, { "epoch": 3.3463220998055734, "grad_norm": 0.05901889130473137, "learning_rate": 3.631576578603898e-05, "loss": 0.2419, "step": 41307 }, { "epoch": 3.346403110823072, "grad_norm": 0.06731534749269485, "learning_rate": 3.631126513344435e-05, "loss": 0.2171, "step": 41308 }, { "epoch": 3.3464841218405703, "grad_norm": 0.0790141150355339, "learning_rate": 3.6306764480849726e-05, "loss": 0.1912, "step": 41309 }, { "epoch": 3.3465651328580686, "grad_norm": 0.06635841727256775, "learning_rate": 3.63022638282551e-05, "loss": 0.1908, "step": 41310 }, { "epoch": 3.3466461438755672, "grad_norm": 0.09180153161287308, "learning_rate": 3.629776317566047e-05, "loss": 0.2472, "step": 41311 }, { "epoch": 3.3467271548930655, "grad_norm": 0.07255294919013977, "learning_rate": 3.629326252306585e-05, "loss": 0.1972, "step": 41312 }, { "epoch": 3.3468081659105637, "grad_norm": 0.06764529645442963, "learning_rate": 3.628876187047122e-05, "loss": 0.2226, "step": 41313 }, { "epoch": 3.3468891769280624, "grad_norm": 0.07115866243839264, "learning_rate": 3.6284261217876594e-05, "loss": 0.2627, "step": 41314 }, { "epoch": 3.3469701879455607, "grad_norm": 0.07019352167844772, "learning_rate": 3.627976056528197e-05, "loss": 0.1983, "step": 41315 }, { "epoch": 3.347051198963059, "grad_norm": 0.06258615106344223, "learning_rate": 3.627525991268734e-05, "loss": 0.2197, "step": 41316 }, { "epoch": 3.3471322099805576, "grad_norm": 0.05955258011817932, "learning_rate": 3.6270759260092715e-05, "loss": 0.2273, "step": 41317 }, { "epoch": 3.347213220998056, "grad_norm": 0.08046617358922958, "learning_rate": 3.626625860749809e-05, "loss": 0.2609, "step": 41318 }, { "epoch": 3.347294232015554, "grad_norm": 0.07196833193302155, "learning_rate": 3.626175795490346e-05, "loss": 0.2844, "step": 41319 }, { "epoch": 3.3473752430330523, "grad_norm": 0.0785125195980072, "learning_rate": 3.625725730230884e-05, "loss": 0.1894, "step": 41320 }, { "epoch": 3.347456254050551, "grad_norm": 0.06598301231861115, "learning_rate": 3.625275664971421e-05, "loss": 0.2484, "step": 41321 }, { "epoch": 3.3475372650680493, "grad_norm": 0.0736595019698143, "learning_rate": 3.624825599711958e-05, "loss": 0.2623, "step": 41322 }, { "epoch": 3.3476182760855475, "grad_norm": 0.0954742580652237, "learning_rate": 3.624375534452496e-05, "loss": 0.2415, "step": 41323 }, { "epoch": 3.347699287103046, "grad_norm": 0.07454533874988556, "learning_rate": 3.623925469193033e-05, "loss": 0.2065, "step": 41324 }, { "epoch": 3.3477802981205445, "grad_norm": 0.06970658898353577, "learning_rate": 3.6234754039335703e-05, "loss": 0.205, "step": 41325 }, { "epoch": 3.3478613091380427, "grad_norm": 0.07173918187618256, "learning_rate": 3.6230253386741084e-05, "loss": 0.2361, "step": 41326 }, { "epoch": 3.347942320155541, "grad_norm": 0.08432280272245407, "learning_rate": 3.622575273414645e-05, "loss": 0.2616, "step": 41327 }, { "epoch": 3.3480233311730396, "grad_norm": 0.06053037941455841, "learning_rate": 3.6221252081551824e-05, "loss": 0.2301, "step": 41328 }, { "epoch": 3.348104342190538, "grad_norm": 0.05914249271154404, "learning_rate": 3.6216751428957205e-05, "loss": 0.2211, "step": 41329 }, { "epoch": 3.348185353208036, "grad_norm": 0.08348757028579712, "learning_rate": 3.621225077636257e-05, "loss": 0.2667, "step": 41330 }, { "epoch": 3.348266364225535, "grad_norm": 0.07757681608200073, "learning_rate": 3.6207750123767945e-05, "loss": 0.2267, "step": 41331 }, { "epoch": 3.348347375243033, "grad_norm": 0.08246699720621109, "learning_rate": 3.6203249471173325e-05, "loss": 0.2224, "step": 41332 }, { "epoch": 3.3484283862605313, "grad_norm": 0.06282779574394226, "learning_rate": 3.61987488185787e-05, "loss": 0.2222, "step": 41333 }, { "epoch": 3.34850939727803, "grad_norm": 0.06982121616601944, "learning_rate": 3.6194248165984066e-05, "loss": 0.2306, "step": 41334 }, { "epoch": 3.3485904082955282, "grad_norm": 0.06591107696294785, "learning_rate": 3.6189747513389446e-05, "loss": 0.2275, "step": 41335 }, { "epoch": 3.3486714193130265, "grad_norm": 0.06394707411527634, "learning_rate": 3.618524686079482e-05, "loss": 0.2253, "step": 41336 }, { "epoch": 3.348752430330525, "grad_norm": 0.08156972378492355, "learning_rate": 3.618074620820019e-05, "loss": 0.2386, "step": 41337 }, { "epoch": 3.3488334413480234, "grad_norm": 0.08084411174058914, "learning_rate": 3.617624555560557e-05, "loss": 0.2309, "step": 41338 }, { "epoch": 3.3489144523655217, "grad_norm": 0.06958631426095963, "learning_rate": 3.617174490301094e-05, "loss": 0.2317, "step": 41339 }, { "epoch": 3.34899546338302, "grad_norm": 0.06364104896783829, "learning_rate": 3.616724425041631e-05, "loss": 0.222, "step": 41340 }, { "epoch": 3.3490764744005186, "grad_norm": 0.06435956805944443, "learning_rate": 3.616274359782169e-05, "loss": 0.2074, "step": 41341 }, { "epoch": 3.349157485418017, "grad_norm": 0.07496075332164764, "learning_rate": 3.615824294522706e-05, "loss": 0.2481, "step": 41342 }, { "epoch": 3.349238496435515, "grad_norm": 0.08156944811344147, "learning_rate": 3.615374229263243e-05, "loss": 0.2184, "step": 41343 }, { "epoch": 3.3493195074530138, "grad_norm": 0.08139017224311829, "learning_rate": 3.614924164003781e-05, "loss": 0.2405, "step": 41344 }, { "epoch": 3.349400518470512, "grad_norm": 0.09298984706401825, "learning_rate": 3.614474098744318e-05, "loss": 0.2581, "step": 41345 }, { "epoch": 3.3494815294880103, "grad_norm": 0.08675330132246017, "learning_rate": 3.6140240334848556e-05, "loss": 0.2504, "step": 41346 }, { "epoch": 3.3495625405055085, "grad_norm": 0.07219713181257248, "learning_rate": 3.613573968225393e-05, "loss": 0.2393, "step": 41347 }, { "epoch": 3.349643551523007, "grad_norm": 0.06823533028364182, "learning_rate": 3.61312390296593e-05, "loss": 0.2374, "step": 41348 }, { "epoch": 3.3497245625405054, "grad_norm": 0.07592404633760452, "learning_rate": 3.612673837706468e-05, "loss": 0.2181, "step": 41349 }, { "epoch": 3.3498055735580037, "grad_norm": 0.07518725842237473, "learning_rate": 3.612223772447005e-05, "loss": 0.2485, "step": 41350 }, { "epoch": 3.3498865845755024, "grad_norm": 0.07888790220022202, "learning_rate": 3.6117737071875424e-05, "loss": 0.2276, "step": 41351 }, { "epoch": 3.3499675955930006, "grad_norm": 0.07398751378059387, "learning_rate": 3.61132364192808e-05, "loss": 0.2346, "step": 41352 }, { "epoch": 3.350048606610499, "grad_norm": 0.07038160413503647, "learning_rate": 3.610873576668617e-05, "loss": 0.2045, "step": 41353 }, { "epoch": 3.3501296176279975, "grad_norm": 0.06793700158596039, "learning_rate": 3.6104235114091545e-05, "loss": 0.2057, "step": 41354 }, { "epoch": 3.350210628645496, "grad_norm": 0.0781884640455246, "learning_rate": 3.609973446149692e-05, "loss": 0.21, "step": 41355 }, { "epoch": 3.350291639662994, "grad_norm": 0.06680549681186676, "learning_rate": 3.609523380890229e-05, "loss": 0.2291, "step": 41356 }, { "epoch": 3.3503726506804927, "grad_norm": 0.05507110431790352, "learning_rate": 3.6090733156307666e-05, "loss": 0.2048, "step": 41357 }, { "epoch": 3.350453661697991, "grad_norm": 0.05875886231660843, "learning_rate": 3.608623250371304e-05, "loss": 0.225, "step": 41358 }, { "epoch": 3.350534672715489, "grad_norm": 0.0633646696805954, "learning_rate": 3.608173185111842e-05, "loss": 0.197, "step": 41359 }, { "epoch": 3.350615683732988, "grad_norm": 0.07051599025726318, "learning_rate": 3.6077231198523786e-05, "loss": 0.2266, "step": 41360 }, { "epoch": 3.350696694750486, "grad_norm": 0.07944469153881073, "learning_rate": 3.607273054592916e-05, "loss": 0.2241, "step": 41361 }, { "epoch": 3.3507777057679844, "grad_norm": 0.06918632984161377, "learning_rate": 3.606822989333454e-05, "loss": 0.2248, "step": 41362 }, { "epoch": 3.3508587167854826, "grad_norm": 0.07134364545345306, "learning_rate": 3.606372924073991e-05, "loss": 0.2256, "step": 41363 }, { "epoch": 3.3509397278029813, "grad_norm": 0.08421452343463898, "learning_rate": 3.605922858814528e-05, "loss": 0.214, "step": 41364 }, { "epoch": 3.3510207388204796, "grad_norm": 0.08210983872413635, "learning_rate": 3.605472793555066e-05, "loss": 0.2212, "step": 41365 }, { "epoch": 3.351101749837978, "grad_norm": 0.07449344545602798, "learning_rate": 3.605022728295603e-05, "loss": 0.2424, "step": 41366 }, { "epoch": 3.3511827608554765, "grad_norm": 0.08094337582588196, "learning_rate": 3.60457266303614e-05, "loss": 0.2194, "step": 41367 }, { "epoch": 3.3512637718729748, "grad_norm": 0.07408936321735382, "learning_rate": 3.604122597776678e-05, "loss": 0.2512, "step": 41368 }, { "epoch": 3.351344782890473, "grad_norm": 0.0715651735663414, "learning_rate": 3.603672532517215e-05, "loss": 0.1979, "step": 41369 }, { "epoch": 3.3514257939079712, "grad_norm": 0.07386700809001923, "learning_rate": 3.603222467257752e-05, "loss": 0.2566, "step": 41370 }, { "epoch": 3.35150680492547, "grad_norm": 0.06657444685697556, "learning_rate": 3.60277240199829e-05, "loss": 0.2044, "step": 41371 }, { "epoch": 3.351587815942968, "grad_norm": 0.0708659291267395, "learning_rate": 3.6023223367388276e-05, "loss": 0.2211, "step": 41372 }, { "epoch": 3.3516688269604664, "grad_norm": 0.0607280470430851, "learning_rate": 3.601872271479364e-05, "loss": 0.2116, "step": 41373 }, { "epoch": 3.351749837977965, "grad_norm": 0.08024850487709045, "learning_rate": 3.6014222062199024e-05, "loss": 0.1993, "step": 41374 }, { "epoch": 3.3518308489954634, "grad_norm": 0.06023063883185387, "learning_rate": 3.60097214096044e-05, "loss": 0.2228, "step": 41375 }, { "epoch": 3.3519118600129616, "grad_norm": 0.08129986375570297, "learning_rate": 3.6005220757009764e-05, "loss": 0.2486, "step": 41376 }, { "epoch": 3.3519928710304603, "grad_norm": 0.09041114151477814, "learning_rate": 3.6000720104415144e-05, "loss": 0.2713, "step": 41377 }, { "epoch": 3.3520738820479585, "grad_norm": 0.06871502846479416, "learning_rate": 3.599621945182052e-05, "loss": 0.2315, "step": 41378 }, { "epoch": 3.3521548930654568, "grad_norm": 0.08983299881219864, "learning_rate": 3.5991718799225885e-05, "loss": 0.2357, "step": 41379 }, { "epoch": 3.3522359040829555, "grad_norm": 0.08149930089712143, "learning_rate": 3.5987218146631265e-05, "loss": 0.2141, "step": 41380 }, { "epoch": 3.3523169151004537, "grad_norm": 0.051490750163793564, "learning_rate": 3.598271749403664e-05, "loss": 0.242, "step": 41381 }, { "epoch": 3.352397926117952, "grad_norm": 0.06942000240087509, "learning_rate": 3.5978216841442006e-05, "loss": 0.1977, "step": 41382 }, { "epoch": 3.3524789371354506, "grad_norm": 0.08682406693696976, "learning_rate": 3.5973716188847386e-05, "loss": 0.2316, "step": 41383 }, { "epoch": 3.352559948152949, "grad_norm": 0.07400385290384293, "learning_rate": 3.596921553625276e-05, "loss": 0.2648, "step": 41384 }, { "epoch": 3.352640959170447, "grad_norm": 0.06687964498996735, "learning_rate": 3.596471488365813e-05, "loss": 0.2522, "step": 41385 }, { "epoch": 3.3527219701879454, "grad_norm": 0.05889909341931343, "learning_rate": 3.596021423106351e-05, "loss": 0.1898, "step": 41386 }, { "epoch": 3.352802981205444, "grad_norm": 0.10397472977638245, "learning_rate": 3.595571357846888e-05, "loss": 0.2495, "step": 41387 }, { "epoch": 3.3528839922229423, "grad_norm": 0.06285160779953003, "learning_rate": 3.5951212925874254e-05, "loss": 0.2641, "step": 41388 }, { "epoch": 3.3529650032404406, "grad_norm": 0.08349147439002991, "learning_rate": 3.594671227327963e-05, "loss": 0.2406, "step": 41389 }, { "epoch": 3.3530460142579392, "grad_norm": 0.07242781668901443, "learning_rate": 3.5942211620685e-05, "loss": 0.2258, "step": 41390 }, { "epoch": 3.3531270252754375, "grad_norm": 0.06428693979978561, "learning_rate": 3.5937710968090375e-05, "loss": 0.2019, "step": 41391 }, { "epoch": 3.3532080362929357, "grad_norm": 0.060086168348789215, "learning_rate": 3.593321031549575e-05, "loss": 0.1769, "step": 41392 }, { "epoch": 3.353289047310434, "grad_norm": 0.07465967535972595, "learning_rate": 3.592870966290112e-05, "loss": 0.2187, "step": 41393 }, { "epoch": 3.3533700583279327, "grad_norm": 0.08104344457387924, "learning_rate": 3.5924209010306496e-05, "loss": 0.2183, "step": 41394 }, { "epoch": 3.353451069345431, "grad_norm": 0.0797872468829155, "learning_rate": 3.591970835771187e-05, "loss": 0.2251, "step": 41395 }, { "epoch": 3.353532080362929, "grad_norm": 0.07382632046937943, "learning_rate": 3.591520770511724e-05, "loss": 0.2221, "step": 41396 }, { "epoch": 3.353613091380428, "grad_norm": 0.06717702001333237, "learning_rate": 3.5910707052522616e-05, "loss": 0.2045, "step": 41397 }, { "epoch": 3.353694102397926, "grad_norm": 0.09043001383543015, "learning_rate": 3.590620639992799e-05, "loss": 0.2273, "step": 41398 }, { "epoch": 3.3537751134154243, "grad_norm": 0.07683952152729034, "learning_rate": 3.5901705747333364e-05, "loss": 0.2321, "step": 41399 }, { "epoch": 3.353856124432923, "grad_norm": 0.060896776616573334, "learning_rate": 3.589720509473874e-05, "loss": 0.2016, "step": 41400 }, { "epoch": 3.3539371354504213, "grad_norm": 0.06365719437599182, "learning_rate": 3.589270444214412e-05, "loss": 0.2161, "step": 41401 }, { "epoch": 3.3540181464679195, "grad_norm": 0.08571047335863113, "learning_rate": 3.5888203789549484e-05, "loss": 0.226, "step": 41402 }, { "epoch": 3.354099157485418, "grad_norm": 0.06898649781942368, "learning_rate": 3.588370313695486e-05, "loss": 0.2279, "step": 41403 }, { "epoch": 3.3541801685029164, "grad_norm": 0.07108738273382187, "learning_rate": 3.587920248436024e-05, "loss": 0.2603, "step": 41404 }, { "epoch": 3.3542611795204147, "grad_norm": 0.07938271015882492, "learning_rate": 3.5874701831765605e-05, "loss": 0.2429, "step": 41405 }, { "epoch": 3.3543421905379134, "grad_norm": 0.08200475573539734, "learning_rate": 3.587020117917098e-05, "loss": 0.2634, "step": 41406 }, { "epoch": 3.3544232015554116, "grad_norm": 0.0656493604183197, "learning_rate": 3.586570052657636e-05, "loss": 0.2378, "step": 41407 }, { "epoch": 3.35450421257291, "grad_norm": 0.08324894309043884, "learning_rate": 3.5861199873981726e-05, "loss": 0.2226, "step": 41408 }, { "epoch": 3.354585223590408, "grad_norm": 0.07772985845804214, "learning_rate": 3.58566992213871e-05, "loss": 0.234, "step": 41409 }, { "epoch": 3.354666234607907, "grad_norm": 0.07231470197439194, "learning_rate": 3.585219856879248e-05, "loss": 0.269, "step": 41410 }, { "epoch": 3.354747245625405, "grad_norm": 0.07173066586256027, "learning_rate": 3.584769791619785e-05, "loss": 0.2278, "step": 41411 }, { "epoch": 3.3548282566429033, "grad_norm": 0.0752212256193161, "learning_rate": 3.584319726360322e-05, "loss": 0.2057, "step": 41412 }, { "epoch": 3.354909267660402, "grad_norm": 0.06340886652469635, "learning_rate": 3.58386966110086e-05, "loss": 0.2561, "step": 41413 }, { "epoch": 3.3549902786779002, "grad_norm": 0.06413735449314117, "learning_rate": 3.5834195958413974e-05, "loss": 0.2133, "step": 41414 }, { "epoch": 3.3550712896953985, "grad_norm": 0.07090727984905243, "learning_rate": 3.582969530581934e-05, "loss": 0.2272, "step": 41415 }, { "epoch": 3.3551523007128967, "grad_norm": 0.05886656045913696, "learning_rate": 3.582519465322472e-05, "loss": 0.206, "step": 41416 }, { "epoch": 3.3552333117303954, "grad_norm": 0.15847265720367432, "learning_rate": 3.5820694000630095e-05, "loss": 0.2223, "step": 41417 }, { "epoch": 3.3553143227478937, "grad_norm": 0.06966357678174973, "learning_rate": 3.581619334803546e-05, "loss": 0.2201, "step": 41418 }, { "epoch": 3.355395333765392, "grad_norm": 0.06600771099328995, "learning_rate": 3.581169269544084e-05, "loss": 0.2487, "step": 41419 }, { "epoch": 3.3554763447828906, "grad_norm": 0.078952357172966, "learning_rate": 3.5807192042846216e-05, "loss": 0.208, "step": 41420 }, { "epoch": 3.355557355800389, "grad_norm": 0.06853339076042175, "learning_rate": 3.580269139025158e-05, "loss": 0.2166, "step": 41421 }, { "epoch": 3.355638366817887, "grad_norm": 0.07810437679290771, "learning_rate": 3.579819073765696e-05, "loss": 0.2307, "step": 41422 }, { "epoch": 3.3557193778353858, "grad_norm": 0.06687109917402267, "learning_rate": 3.579369008506234e-05, "loss": 0.2272, "step": 41423 }, { "epoch": 3.355800388852884, "grad_norm": 0.06530658155679703, "learning_rate": 3.5789189432467704e-05, "loss": 0.2347, "step": 41424 }, { "epoch": 3.3558813998703823, "grad_norm": 0.060046806931495667, "learning_rate": 3.5784688779873084e-05, "loss": 0.2189, "step": 41425 }, { "epoch": 3.355962410887881, "grad_norm": 0.057740457355976105, "learning_rate": 3.578018812727846e-05, "loss": 0.213, "step": 41426 }, { "epoch": 3.356043421905379, "grad_norm": 0.08021904528141022, "learning_rate": 3.577568747468383e-05, "loss": 0.2271, "step": 41427 }, { "epoch": 3.3561244329228774, "grad_norm": 0.06996602565050125, "learning_rate": 3.5771186822089205e-05, "loss": 0.225, "step": 41428 }, { "epoch": 3.356205443940376, "grad_norm": 0.08081180602312088, "learning_rate": 3.576668616949458e-05, "loss": 0.2193, "step": 41429 }, { "epoch": 3.3562864549578744, "grad_norm": 0.07503603398799896, "learning_rate": 3.576218551689995e-05, "loss": 0.2441, "step": 41430 }, { "epoch": 3.3563674659753726, "grad_norm": 0.06850998848676682, "learning_rate": 3.5757684864305326e-05, "loss": 0.2138, "step": 41431 }, { "epoch": 3.356448476992871, "grad_norm": 0.0689677894115448, "learning_rate": 3.57531842117107e-05, "loss": 0.2448, "step": 41432 }, { "epoch": 3.3565294880103695, "grad_norm": 0.08548016846179962, "learning_rate": 3.574868355911607e-05, "loss": 0.2585, "step": 41433 }, { "epoch": 3.356610499027868, "grad_norm": 0.06984124332666397, "learning_rate": 3.5744182906521447e-05, "loss": 0.2284, "step": 41434 }, { "epoch": 3.356691510045366, "grad_norm": 0.06978527456521988, "learning_rate": 3.573968225392682e-05, "loss": 0.2139, "step": 41435 }, { "epoch": 3.3567725210628647, "grad_norm": 0.0799226388335228, "learning_rate": 3.5735181601332194e-05, "loss": 0.2263, "step": 41436 }, { "epoch": 3.356853532080363, "grad_norm": 0.08357705920934677, "learning_rate": 3.573068094873757e-05, "loss": 0.2084, "step": 41437 }, { "epoch": 3.356934543097861, "grad_norm": 0.07593013346195221, "learning_rate": 3.572618029614294e-05, "loss": 0.1942, "step": 41438 }, { "epoch": 3.3570155541153595, "grad_norm": 0.08029653877019882, "learning_rate": 3.5721679643548315e-05, "loss": 0.2392, "step": 41439 }, { "epoch": 3.357096565132858, "grad_norm": 0.062494512647390366, "learning_rate": 3.5717178990953695e-05, "loss": 0.2074, "step": 41440 }, { "epoch": 3.3571775761503564, "grad_norm": 0.07503077387809753, "learning_rate": 3.571267833835906e-05, "loss": 0.2082, "step": 41441 }, { "epoch": 3.3572585871678546, "grad_norm": 0.07346827536821365, "learning_rate": 3.5708177685764435e-05, "loss": 0.2568, "step": 41442 }, { "epoch": 3.3573395981853533, "grad_norm": 0.07334146648645401, "learning_rate": 3.5703677033169816e-05, "loss": 0.2611, "step": 41443 }, { "epoch": 3.3574206092028516, "grad_norm": 0.06703248620033264, "learning_rate": 3.569917638057518e-05, "loss": 0.2296, "step": 41444 }, { "epoch": 3.35750162022035, "grad_norm": 0.07055786997079849, "learning_rate": 3.5694675727980556e-05, "loss": 0.2277, "step": 41445 }, { "epoch": 3.3575826312378485, "grad_norm": 0.08582204580307007, "learning_rate": 3.5690175075385937e-05, "loss": 0.2566, "step": 41446 }, { "epoch": 3.3576636422553467, "grad_norm": 0.07949892431497574, "learning_rate": 3.5685674422791303e-05, "loss": 0.2025, "step": 41447 }, { "epoch": 3.357744653272845, "grad_norm": 0.0567675344645977, "learning_rate": 3.568117377019668e-05, "loss": 0.2358, "step": 41448 }, { "epoch": 3.3578256642903437, "grad_norm": 0.07468612492084503, "learning_rate": 3.567667311760206e-05, "loss": 0.239, "step": 41449 }, { "epoch": 3.357906675307842, "grad_norm": 0.07461059838533401, "learning_rate": 3.5672172465007424e-05, "loss": 0.2381, "step": 41450 }, { "epoch": 3.35798768632534, "grad_norm": 0.05967641994357109, "learning_rate": 3.56676718124128e-05, "loss": 0.2433, "step": 41451 }, { "epoch": 3.358068697342839, "grad_norm": 0.07766906172037125, "learning_rate": 3.566317115981818e-05, "loss": 0.2155, "step": 41452 }, { "epoch": 3.358149708360337, "grad_norm": 0.07030050456523895, "learning_rate": 3.565867050722355e-05, "loss": 0.1952, "step": 41453 }, { "epoch": 3.3582307193778353, "grad_norm": 0.08457501977682114, "learning_rate": 3.565416985462892e-05, "loss": 0.2148, "step": 41454 }, { "epoch": 3.3583117303953336, "grad_norm": 0.07360049337148666, "learning_rate": 3.56496692020343e-05, "loss": 0.213, "step": 41455 }, { "epoch": 3.3583927414128323, "grad_norm": 0.06616782397031784, "learning_rate": 3.564516854943967e-05, "loss": 0.2151, "step": 41456 }, { "epoch": 3.3584737524303305, "grad_norm": 0.07079682499170303, "learning_rate": 3.564066789684504e-05, "loss": 0.2243, "step": 41457 }, { "epoch": 3.3585547634478288, "grad_norm": 0.057243358343839645, "learning_rate": 3.563616724425042e-05, "loss": 0.2123, "step": 41458 }, { "epoch": 3.3586357744653275, "grad_norm": 0.06692282855510712, "learning_rate": 3.5631666591655793e-05, "loss": 0.2198, "step": 41459 }, { "epoch": 3.3587167854828257, "grad_norm": 0.07716784626245499, "learning_rate": 3.562716593906116e-05, "loss": 0.2282, "step": 41460 }, { "epoch": 3.358797796500324, "grad_norm": 0.07021788507699966, "learning_rate": 3.562266528646654e-05, "loss": 0.2209, "step": 41461 }, { "epoch": 3.358878807517822, "grad_norm": 0.07160168141126633, "learning_rate": 3.5618164633871914e-05, "loss": 0.2052, "step": 41462 }, { "epoch": 3.358959818535321, "grad_norm": 0.07971487939357758, "learning_rate": 3.561366398127728e-05, "loss": 0.2111, "step": 41463 }, { "epoch": 3.359040829552819, "grad_norm": 0.06069502979516983, "learning_rate": 3.560916332868266e-05, "loss": 0.2042, "step": 41464 }, { "epoch": 3.3591218405703174, "grad_norm": 0.07485620677471161, "learning_rate": 3.5604662676088035e-05, "loss": 0.2355, "step": 41465 }, { "epoch": 3.359202851587816, "grad_norm": 0.0703471377491951, "learning_rate": 3.560016202349341e-05, "loss": 0.2445, "step": 41466 }, { "epoch": 3.3592838626053143, "grad_norm": 0.05236637964844704, "learning_rate": 3.559566137089878e-05, "loss": 0.1918, "step": 41467 }, { "epoch": 3.3593648736228126, "grad_norm": 0.06970858573913574, "learning_rate": 3.5591160718304156e-05, "loss": 0.1843, "step": 41468 }, { "epoch": 3.3594458846403112, "grad_norm": 0.09697674959897995, "learning_rate": 3.558666006570953e-05, "loss": 0.2533, "step": 41469 }, { "epoch": 3.3595268956578095, "grad_norm": 0.05919763445854187, "learning_rate": 3.55821594131149e-05, "loss": 0.1918, "step": 41470 }, { "epoch": 3.3596079066753077, "grad_norm": 0.07003922760486603, "learning_rate": 3.557765876052028e-05, "loss": 0.2213, "step": 41471 }, { "epoch": 3.3596889176928064, "grad_norm": 0.05733330547809601, "learning_rate": 3.557315810792565e-05, "loss": 0.187, "step": 41472 }, { "epoch": 3.3597699287103047, "grad_norm": 0.06819011270999908, "learning_rate": 3.5568657455331024e-05, "loss": 0.2577, "step": 41473 }, { "epoch": 3.359850939727803, "grad_norm": 0.061745062470436096, "learning_rate": 3.55641568027364e-05, "loss": 0.2011, "step": 41474 }, { "epoch": 3.3599319507453016, "grad_norm": 0.0705268457531929, "learning_rate": 3.555965615014177e-05, "loss": 0.2203, "step": 41475 }, { "epoch": 3.3600129617628, "grad_norm": 0.07620300352573395, "learning_rate": 3.5555155497547145e-05, "loss": 0.2044, "step": 41476 }, { "epoch": 3.360093972780298, "grad_norm": 0.07525172829627991, "learning_rate": 3.555065484495252e-05, "loss": 0.2413, "step": 41477 }, { "epoch": 3.3601749837977963, "grad_norm": 0.07072553038597107, "learning_rate": 3.554615419235789e-05, "loss": 0.2422, "step": 41478 }, { "epoch": 3.360255994815295, "grad_norm": 0.07391635328531265, "learning_rate": 3.554165353976327e-05, "loss": 0.2313, "step": 41479 }, { "epoch": 3.3603370058327933, "grad_norm": 0.06640076637268066, "learning_rate": 3.553715288716864e-05, "loss": 0.2207, "step": 41480 }, { "epoch": 3.3604180168502915, "grad_norm": 0.06782186031341553, "learning_rate": 3.553265223457401e-05, "loss": 0.2443, "step": 41481 }, { "epoch": 3.36049902786779, "grad_norm": 0.07683203369379044, "learning_rate": 3.552815158197939e-05, "loss": 0.2067, "step": 41482 }, { "epoch": 3.3605800388852884, "grad_norm": 0.07086572796106339, "learning_rate": 3.552365092938476e-05, "loss": 0.2251, "step": 41483 }, { "epoch": 3.3606610499027867, "grad_norm": 0.0764269083738327, "learning_rate": 3.5519150276790133e-05, "loss": 0.2153, "step": 41484 }, { "epoch": 3.360742060920285, "grad_norm": 0.08067718893289566, "learning_rate": 3.5514649624195514e-05, "loss": 0.2365, "step": 41485 }, { "epoch": 3.3608230719377836, "grad_norm": 0.0750790387392044, "learning_rate": 3.551014897160088e-05, "loss": 0.2577, "step": 41486 }, { "epoch": 3.360904082955282, "grad_norm": 0.08475764095783234, "learning_rate": 3.5505648319006254e-05, "loss": 0.2226, "step": 41487 }, { "epoch": 3.36098509397278, "grad_norm": 0.0630546286702156, "learning_rate": 3.5501147666411635e-05, "loss": 0.2075, "step": 41488 }, { "epoch": 3.361066104990279, "grad_norm": 0.06775683164596558, "learning_rate": 3.5496647013817e-05, "loss": 0.2018, "step": 41489 }, { "epoch": 3.361147116007777, "grad_norm": 0.07136673480272293, "learning_rate": 3.5492146361222375e-05, "loss": 0.2322, "step": 41490 }, { "epoch": 3.3612281270252753, "grad_norm": 0.08858048915863037, "learning_rate": 3.5487645708627755e-05, "loss": 0.3272, "step": 41491 }, { "epoch": 3.361309138042774, "grad_norm": 0.0807926133275032, "learning_rate": 3.548314505603313e-05, "loss": 0.2342, "step": 41492 }, { "epoch": 3.3613901490602722, "grad_norm": 0.07923466712236404, "learning_rate": 3.5478644403438496e-05, "loss": 0.237, "step": 41493 }, { "epoch": 3.3614711600777705, "grad_norm": 0.06726489961147308, "learning_rate": 3.5474143750843876e-05, "loss": 0.2268, "step": 41494 }, { "epoch": 3.361552171095269, "grad_norm": 0.07273104041814804, "learning_rate": 3.546964309824925e-05, "loss": 0.2161, "step": 41495 }, { "epoch": 3.3616331821127674, "grad_norm": 0.07782911509275436, "learning_rate": 3.546514244565462e-05, "loss": 0.2321, "step": 41496 }, { "epoch": 3.3617141931302656, "grad_norm": 0.06289485096931458, "learning_rate": 3.546064179306e-05, "loss": 0.2091, "step": 41497 }, { "epoch": 3.3617952041477643, "grad_norm": 0.06988698989152908, "learning_rate": 3.545614114046537e-05, "loss": 0.2265, "step": 41498 }, { "epoch": 3.3618762151652626, "grad_norm": 0.08457628637552261, "learning_rate": 3.545164048787074e-05, "loss": 0.2137, "step": 41499 }, { "epoch": 3.361957226182761, "grad_norm": 0.0646219551563263, "learning_rate": 3.544713983527612e-05, "loss": 0.2145, "step": 41500 }, { "epoch": 3.362038237200259, "grad_norm": 0.0836854949593544, "learning_rate": 3.544263918268149e-05, "loss": 0.2149, "step": 41501 }, { "epoch": 3.3621192482177578, "grad_norm": 0.06724182516336441, "learning_rate": 3.5438138530086865e-05, "loss": 0.1955, "step": 41502 }, { "epoch": 3.362200259235256, "grad_norm": 0.08123544603586197, "learning_rate": 3.543363787749224e-05, "loss": 0.2378, "step": 41503 }, { "epoch": 3.3622812702527543, "grad_norm": 0.08634920418262482, "learning_rate": 3.542913722489761e-05, "loss": 0.2397, "step": 41504 }, { "epoch": 3.362362281270253, "grad_norm": 0.07229456305503845, "learning_rate": 3.5424636572302986e-05, "loss": 0.1964, "step": 41505 }, { "epoch": 3.362443292287751, "grad_norm": 0.07100922614336014, "learning_rate": 3.542013591970836e-05, "loss": 0.2391, "step": 41506 }, { "epoch": 3.3625243033052494, "grad_norm": 0.06853149086236954, "learning_rate": 3.541563526711373e-05, "loss": 0.2236, "step": 41507 }, { "epoch": 3.3626053143227477, "grad_norm": 0.07335706055164337, "learning_rate": 3.541113461451911e-05, "loss": 0.2181, "step": 41508 }, { "epoch": 3.3626863253402464, "grad_norm": 0.062075987458229065, "learning_rate": 3.540663396192448e-05, "loss": 0.2111, "step": 41509 }, { "epoch": 3.3627673363577446, "grad_norm": 0.06849157810211182, "learning_rate": 3.5402133309329854e-05, "loss": 0.2474, "step": 41510 }, { "epoch": 3.362848347375243, "grad_norm": 0.07038149982690811, "learning_rate": 3.539763265673523e-05, "loss": 0.2402, "step": 41511 }, { "epoch": 3.3629293583927415, "grad_norm": 0.08231181651353836, "learning_rate": 3.53931320041406e-05, "loss": 0.2719, "step": 41512 }, { "epoch": 3.36301036941024, "grad_norm": 0.07193673402070999, "learning_rate": 3.5388631351545975e-05, "loss": 0.2559, "step": 41513 }, { "epoch": 3.363091380427738, "grad_norm": 0.07190826535224915, "learning_rate": 3.538413069895135e-05, "loss": 0.254, "step": 41514 }, { "epoch": 3.3631723914452367, "grad_norm": 0.059861920773983, "learning_rate": 3.537963004635672e-05, "loss": 0.2133, "step": 41515 }, { "epoch": 3.363253402462735, "grad_norm": 0.09745169430971146, "learning_rate": 3.5375129393762096e-05, "loss": 0.2472, "step": 41516 }, { "epoch": 3.363334413480233, "grad_norm": 0.08337296545505524, "learning_rate": 3.537062874116747e-05, "loss": 0.2274, "step": 41517 }, { "epoch": 3.363415424497732, "grad_norm": 0.08769182115793228, "learning_rate": 3.536612808857285e-05, "loss": 0.2502, "step": 41518 }, { "epoch": 3.36349643551523, "grad_norm": 0.07994231581687927, "learning_rate": 3.5361627435978216e-05, "loss": 0.2367, "step": 41519 }, { "epoch": 3.3635774465327284, "grad_norm": 0.06313978880643845, "learning_rate": 3.535712678338359e-05, "loss": 0.2276, "step": 41520 }, { "epoch": 3.363658457550227, "grad_norm": 0.09761510044336319, "learning_rate": 3.535262613078897e-05, "loss": 0.2405, "step": 41521 }, { "epoch": 3.3637394685677253, "grad_norm": 0.06474805623292923, "learning_rate": 3.534812547819434e-05, "loss": 0.2163, "step": 41522 }, { "epoch": 3.3638204795852236, "grad_norm": 0.061308603733778, "learning_rate": 3.534362482559971e-05, "loss": 0.2052, "step": 41523 }, { "epoch": 3.363901490602722, "grad_norm": 0.06479856371879578, "learning_rate": 3.533912417300509e-05, "loss": 0.2339, "step": 41524 }, { "epoch": 3.3639825016202205, "grad_norm": 0.0835852101445198, "learning_rate": 3.533462352041046e-05, "loss": 0.2184, "step": 41525 }, { "epoch": 3.3640635126377187, "grad_norm": 0.0681370347738266, "learning_rate": 3.533012286781583e-05, "loss": 0.2783, "step": 41526 }, { "epoch": 3.364144523655217, "grad_norm": 0.08533981442451477, "learning_rate": 3.532562221522121e-05, "loss": 0.2507, "step": 41527 }, { "epoch": 3.3642255346727157, "grad_norm": 0.06170447915792465, "learning_rate": 3.532112156262658e-05, "loss": 0.2043, "step": 41528 }, { "epoch": 3.364306545690214, "grad_norm": 0.06911799311637878, "learning_rate": 3.531662091003195e-05, "loss": 0.2144, "step": 41529 }, { "epoch": 3.364387556707712, "grad_norm": 0.0771351158618927, "learning_rate": 3.531212025743733e-05, "loss": 0.2393, "step": 41530 }, { "epoch": 3.3644685677252104, "grad_norm": 0.09463619440793991, "learning_rate": 3.5307619604842706e-05, "loss": 0.2541, "step": 41531 }, { "epoch": 3.364549578742709, "grad_norm": 0.08270283043384552, "learning_rate": 3.530311895224807e-05, "loss": 0.2583, "step": 41532 }, { "epoch": 3.3646305897602073, "grad_norm": 0.08258695900440216, "learning_rate": 3.5298618299653454e-05, "loss": 0.2765, "step": 41533 }, { "epoch": 3.3647116007777056, "grad_norm": 0.07178051024675369, "learning_rate": 3.529411764705883e-05, "loss": 0.2605, "step": 41534 }, { "epoch": 3.3647926117952043, "grad_norm": 0.06768299639225006, "learning_rate": 3.52896169944642e-05, "loss": 0.203, "step": 41535 }, { "epoch": 3.3648736228127025, "grad_norm": 0.060938406735658646, "learning_rate": 3.5285116341869574e-05, "loss": 0.2189, "step": 41536 }, { "epoch": 3.3649546338302008, "grad_norm": 0.056721530854701996, "learning_rate": 3.528061568927495e-05, "loss": 0.2198, "step": 41537 }, { "epoch": 3.3650356448476995, "grad_norm": 0.08980550616979599, "learning_rate": 3.527611503668032e-05, "loss": 0.2538, "step": 41538 }, { "epoch": 3.3651166558651977, "grad_norm": 0.0845855101943016, "learning_rate": 3.5271614384085695e-05, "loss": 0.2587, "step": 41539 }, { "epoch": 3.365197666882696, "grad_norm": 0.06626113504171371, "learning_rate": 3.526711373149107e-05, "loss": 0.2197, "step": 41540 }, { "epoch": 3.3652786779001946, "grad_norm": 0.07001606374979019, "learning_rate": 3.526261307889644e-05, "loss": 0.257, "step": 41541 }, { "epoch": 3.365359688917693, "grad_norm": 0.07456422597169876, "learning_rate": 3.5258112426301816e-05, "loss": 0.1906, "step": 41542 }, { "epoch": 3.365440699935191, "grad_norm": 0.06453201174736023, "learning_rate": 3.525361177370719e-05, "loss": 0.1989, "step": 41543 }, { "epoch": 3.3655217109526894, "grad_norm": 0.07249526679515839, "learning_rate": 3.524911112111256e-05, "loss": 0.216, "step": 41544 }, { "epoch": 3.365602721970188, "grad_norm": 0.06700129806995392, "learning_rate": 3.524461046851794e-05, "loss": 0.2105, "step": 41545 }, { "epoch": 3.3656837329876863, "grad_norm": 0.06668738275766373, "learning_rate": 3.524010981592331e-05, "loss": 0.2296, "step": 41546 }, { "epoch": 3.3657647440051845, "grad_norm": 0.0665898472070694, "learning_rate": 3.5235609163328684e-05, "loss": 0.2088, "step": 41547 }, { "epoch": 3.3658457550226832, "grad_norm": 0.07802686095237732, "learning_rate": 3.523110851073406e-05, "loss": 0.2559, "step": 41548 }, { "epoch": 3.3659267660401815, "grad_norm": 0.09978897869586945, "learning_rate": 3.522660785813943e-05, "loss": 0.2663, "step": 41549 }, { "epoch": 3.3660077770576797, "grad_norm": 0.0862162709236145, "learning_rate": 3.5222107205544805e-05, "loss": 0.2249, "step": 41550 }, { "epoch": 3.3660887880751784, "grad_norm": 0.06248806044459343, "learning_rate": 3.521760655295018e-05, "loss": 0.2346, "step": 41551 }, { "epoch": 3.3661697990926767, "grad_norm": 0.07725557684898376, "learning_rate": 3.521310590035555e-05, "loss": 0.2258, "step": 41552 }, { "epoch": 3.366250810110175, "grad_norm": 0.08579937368631363, "learning_rate": 3.5208605247760926e-05, "loss": 0.2167, "step": 41553 }, { "epoch": 3.366331821127673, "grad_norm": 0.07212509959936142, "learning_rate": 3.52041045951663e-05, "loss": 0.2356, "step": 41554 }, { "epoch": 3.366412832145172, "grad_norm": 0.08952637761831284, "learning_rate": 3.519960394257167e-05, "loss": 0.2534, "step": 41555 }, { "epoch": 3.36649384316267, "grad_norm": 0.05379028245806694, "learning_rate": 3.5195103289977046e-05, "loss": 0.2075, "step": 41556 }, { "epoch": 3.3665748541801683, "grad_norm": 0.07278160005807877, "learning_rate": 3.519060263738243e-05, "loss": 0.2261, "step": 41557 }, { "epoch": 3.366655865197667, "grad_norm": 0.07294684648513794, "learning_rate": 3.5186101984787794e-05, "loss": 0.2228, "step": 41558 }, { "epoch": 3.3667368762151653, "grad_norm": 0.07547971606254578, "learning_rate": 3.518160133219317e-05, "loss": 0.24, "step": 41559 }, { "epoch": 3.3668178872326635, "grad_norm": 0.08458521217107773, "learning_rate": 3.517710067959855e-05, "loss": 0.2331, "step": 41560 }, { "epoch": 3.366898898250162, "grad_norm": 0.07553742080926895, "learning_rate": 3.5172600027003915e-05, "loss": 0.2154, "step": 41561 }, { "epoch": 3.3669799092676604, "grad_norm": 0.07026293128728867, "learning_rate": 3.516809937440929e-05, "loss": 0.221, "step": 41562 }, { "epoch": 3.3670609202851587, "grad_norm": 0.06618590652942657, "learning_rate": 3.516359872181467e-05, "loss": 0.2013, "step": 41563 }, { "epoch": 3.3671419313026574, "grad_norm": 0.08520275354385376, "learning_rate": 3.5159098069220035e-05, "loss": 0.2444, "step": 41564 }, { "epoch": 3.3672229423201556, "grad_norm": 0.07353122532367706, "learning_rate": 3.515459741662541e-05, "loss": 0.2403, "step": 41565 }, { "epoch": 3.367303953337654, "grad_norm": 0.06767702102661133, "learning_rate": 3.515009676403079e-05, "loss": 0.2166, "step": 41566 }, { "epoch": 3.367384964355152, "grad_norm": 0.062399983406066895, "learning_rate": 3.5145596111436156e-05, "loss": 0.2013, "step": 41567 }, { "epoch": 3.367465975372651, "grad_norm": 0.06242172420024872, "learning_rate": 3.514109545884153e-05, "loss": 0.2193, "step": 41568 }, { "epoch": 3.367546986390149, "grad_norm": 0.07391718775033951, "learning_rate": 3.513659480624691e-05, "loss": 0.2046, "step": 41569 }, { "epoch": 3.3676279974076473, "grad_norm": 0.0698646679520607, "learning_rate": 3.513209415365228e-05, "loss": 0.3122, "step": 41570 }, { "epoch": 3.367709008425146, "grad_norm": 0.07431770116090775, "learning_rate": 3.512759350105766e-05, "loss": 0.2247, "step": 41571 }, { "epoch": 3.3677900194426442, "grad_norm": 0.0731852650642395, "learning_rate": 3.512309284846303e-05, "loss": 0.2467, "step": 41572 }, { "epoch": 3.3678710304601425, "grad_norm": 0.06502462178468704, "learning_rate": 3.5118592195868405e-05, "loss": 0.2045, "step": 41573 }, { "epoch": 3.3679520414776407, "grad_norm": 0.06441786885261536, "learning_rate": 3.511409154327378e-05, "loss": 0.2253, "step": 41574 }, { "epoch": 3.3680330524951394, "grad_norm": 0.07801219075918198, "learning_rate": 3.510959089067915e-05, "loss": 0.2156, "step": 41575 }, { "epoch": 3.3681140635126376, "grad_norm": 0.06515826284885406, "learning_rate": 3.5105090238084525e-05, "loss": 0.2164, "step": 41576 }, { "epoch": 3.368195074530136, "grad_norm": 0.08127927780151367, "learning_rate": 3.51005895854899e-05, "loss": 0.2367, "step": 41577 }, { "epoch": 3.3682760855476346, "grad_norm": 0.07091443240642548, "learning_rate": 3.509608893289527e-05, "loss": 0.222, "step": 41578 }, { "epoch": 3.368357096565133, "grad_norm": 0.06333775073289871, "learning_rate": 3.5091588280300646e-05, "loss": 0.243, "step": 41579 }, { "epoch": 3.368438107582631, "grad_norm": 0.06674503535032272, "learning_rate": 3.508708762770602e-05, "loss": 0.22, "step": 41580 }, { "epoch": 3.3685191186001298, "grad_norm": 0.07354476302862167, "learning_rate": 3.508258697511139e-05, "loss": 0.2317, "step": 41581 }, { "epoch": 3.368600129617628, "grad_norm": 0.07980510592460632, "learning_rate": 3.507808632251677e-05, "loss": 0.2393, "step": 41582 }, { "epoch": 3.3686811406351262, "grad_norm": 0.0695723295211792, "learning_rate": 3.507358566992214e-05, "loss": 0.2219, "step": 41583 }, { "epoch": 3.368762151652625, "grad_norm": 0.09566250443458557, "learning_rate": 3.5069085017327514e-05, "loss": 0.2341, "step": 41584 }, { "epoch": 3.368843162670123, "grad_norm": 0.08105815201997757, "learning_rate": 3.506458436473289e-05, "loss": 0.2185, "step": 41585 }, { "epoch": 3.3689241736876214, "grad_norm": 0.05623576045036316, "learning_rate": 3.506008371213826e-05, "loss": 0.2123, "step": 41586 }, { "epoch": 3.36900518470512, "grad_norm": 0.07283440977334976, "learning_rate": 3.5055583059543635e-05, "loss": 0.2149, "step": 41587 }, { "epoch": 3.3690861957226184, "grad_norm": 0.1059572845697403, "learning_rate": 3.505108240694901e-05, "loss": 0.3047, "step": 41588 }, { "epoch": 3.3691672067401166, "grad_norm": 0.08058130741119385, "learning_rate": 3.504658175435438e-05, "loss": 0.2359, "step": 41589 }, { "epoch": 3.369248217757615, "grad_norm": 0.07778458297252655, "learning_rate": 3.5042081101759756e-05, "loss": 0.2128, "step": 41590 }, { "epoch": 3.3693292287751135, "grad_norm": 0.06973801553249359, "learning_rate": 3.503758044916513e-05, "loss": 0.2097, "step": 41591 }, { "epoch": 3.369410239792612, "grad_norm": 0.07245367765426636, "learning_rate": 3.50330797965705e-05, "loss": 0.2282, "step": 41592 }, { "epoch": 3.36949125081011, "grad_norm": 0.0660821944475174, "learning_rate": 3.5028579143975877e-05, "loss": 0.2259, "step": 41593 }, { "epoch": 3.3695722618276087, "grad_norm": 0.08014898747205734, "learning_rate": 3.502407849138125e-05, "loss": 0.2232, "step": 41594 }, { "epoch": 3.369653272845107, "grad_norm": 0.07944493740797043, "learning_rate": 3.5019577838786624e-05, "loss": 0.2678, "step": 41595 }, { "epoch": 3.369734283862605, "grad_norm": 0.06187062710523605, "learning_rate": 3.5015077186192e-05, "loss": 0.2129, "step": 41596 }, { "epoch": 3.3698152948801035, "grad_norm": 0.0689127966761589, "learning_rate": 3.501057653359737e-05, "loss": 0.2384, "step": 41597 }, { "epoch": 3.369896305897602, "grad_norm": 0.07030554860830307, "learning_rate": 3.5006075881002745e-05, "loss": 0.2134, "step": 41598 }, { "epoch": 3.3699773169151004, "grad_norm": 0.057273104786872864, "learning_rate": 3.5001575228408125e-05, "loss": 0.192, "step": 41599 }, { "epoch": 3.3700583279325986, "grad_norm": 0.07547903805971146, "learning_rate": 3.499707457581349e-05, "loss": 0.2211, "step": 41600 }, { "epoch": 3.3701393389500973, "grad_norm": 0.06552055478096008, "learning_rate": 3.4992573923218865e-05, "loss": 0.2295, "step": 41601 }, { "epoch": 3.3702203499675956, "grad_norm": 0.06693745404481888, "learning_rate": 3.4988073270624246e-05, "loss": 0.2524, "step": 41602 }, { "epoch": 3.370301360985094, "grad_norm": 0.06710077077150345, "learning_rate": 3.498357261802961e-05, "loss": 0.2026, "step": 41603 }, { "epoch": 3.3703823720025925, "grad_norm": 0.07833865284919739, "learning_rate": 3.497907196543499e-05, "loss": 0.2409, "step": 41604 }, { "epoch": 3.3704633830200907, "grad_norm": 0.06898022443056107, "learning_rate": 3.4974571312840367e-05, "loss": 0.2081, "step": 41605 }, { "epoch": 3.370544394037589, "grad_norm": 0.08391853421926498, "learning_rate": 3.4970070660245733e-05, "loss": 0.2388, "step": 41606 }, { "epoch": 3.3706254050550877, "grad_norm": 0.06046483665704727, "learning_rate": 3.4965570007651114e-05, "loss": 0.2022, "step": 41607 }, { "epoch": 3.370706416072586, "grad_norm": 0.07326920330524445, "learning_rate": 3.496106935505649e-05, "loss": 0.2629, "step": 41608 }, { "epoch": 3.370787427090084, "grad_norm": 0.061868514865636826, "learning_rate": 3.4956568702461854e-05, "loss": 0.2276, "step": 41609 }, { "epoch": 3.370868438107583, "grad_norm": 0.05633601173758507, "learning_rate": 3.4952068049867235e-05, "loss": 0.2445, "step": 41610 }, { "epoch": 3.370949449125081, "grad_norm": 0.07111150026321411, "learning_rate": 3.494756739727261e-05, "loss": 0.2166, "step": 41611 }, { "epoch": 3.3710304601425793, "grad_norm": 0.0728144571185112, "learning_rate": 3.494306674467798e-05, "loss": 0.203, "step": 41612 }, { "epoch": 3.3711114711600776, "grad_norm": 0.08260970562696457, "learning_rate": 3.4938566092083355e-05, "loss": 0.2125, "step": 41613 }, { "epoch": 3.3711924821775763, "grad_norm": 0.09042169153690338, "learning_rate": 3.493406543948873e-05, "loss": 0.2402, "step": 41614 }, { "epoch": 3.3712734931950745, "grad_norm": 0.09613018482923508, "learning_rate": 3.49295647868941e-05, "loss": 0.2167, "step": 41615 }, { "epoch": 3.3713545042125728, "grad_norm": 0.0749058797955513, "learning_rate": 3.4925064134299476e-05, "loss": 0.2764, "step": 41616 }, { "epoch": 3.3714355152300715, "grad_norm": 0.07069750875234604, "learning_rate": 3.492056348170485e-05, "loss": 0.2284, "step": 41617 }, { "epoch": 3.3715165262475697, "grad_norm": 0.07239100337028503, "learning_rate": 3.4916062829110223e-05, "loss": 0.19, "step": 41618 }, { "epoch": 3.371597537265068, "grad_norm": 0.0704842135310173, "learning_rate": 3.49115621765156e-05, "loss": 0.2203, "step": 41619 }, { "epoch": 3.371678548282566, "grad_norm": 0.05835307016968727, "learning_rate": 3.490706152392097e-05, "loss": 0.2111, "step": 41620 }, { "epoch": 3.371759559300065, "grad_norm": 0.08694245666265488, "learning_rate": 3.4902560871326344e-05, "loss": 0.233, "step": 41621 }, { "epoch": 3.371840570317563, "grad_norm": 0.06228380650281906, "learning_rate": 3.489806021873172e-05, "loss": 0.235, "step": 41622 }, { "epoch": 3.3719215813350614, "grad_norm": 0.07264664769172668, "learning_rate": 3.489355956613709e-05, "loss": 0.2427, "step": 41623 }, { "epoch": 3.37200259235256, "grad_norm": 0.08153603225946426, "learning_rate": 3.4889058913542465e-05, "loss": 0.2491, "step": 41624 }, { "epoch": 3.3720836033700583, "grad_norm": 0.08536143600940704, "learning_rate": 3.488455826094784e-05, "loss": 0.2419, "step": 41625 }, { "epoch": 3.3721646143875565, "grad_norm": 0.0751512423157692, "learning_rate": 3.488005760835321e-05, "loss": 0.2224, "step": 41626 }, { "epoch": 3.3722456254050552, "grad_norm": 0.07146193832159042, "learning_rate": 3.4875556955758586e-05, "loss": 0.2429, "step": 41627 }, { "epoch": 3.3723266364225535, "grad_norm": 0.07874619960784912, "learning_rate": 3.487105630316396e-05, "loss": 0.2047, "step": 41628 }, { "epoch": 3.3724076474400517, "grad_norm": 0.08496353775262833, "learning_rate": 3.486655565056933e-05, "loss": 0.2629, "step": 41629 }, { "epoch": 3.3724886584575504, "grad_norm": 0.08023679256439209, "learning_rate": 3.486205499797471e-05, "loss": 0.2057, "step": 41630 }, { "epoch": 3.3725696694750487, "grad_norm": 0.06785769015550613, "learning_rate": 3.485755434538008e-05, "loss": 0.2537, "step": 41631 }, { "epoch": 3.372650680492547, "grad_norm": 0.0767764076590538, "learning_rate": 3.4853053692785454e-05, "loss": 0.2433, "step": 41632 }, { "epoch": 3.3727316915100456, "grad_norm": 0.07684066146612167, "learning_rate": 3.484855304019083e-05, "loss": 0.2284, "step": 41633 }, { "epoch": 3.372812702527544, "grad_norm": 0.06635360419750214, "learning_rate": 3.48440523875962e-05, "loss": 0.2054, "step": 41634 }, { "epoch": 3.372893713545042, "grad_norm": 0.07786551862955093, "learning_rate": 3.4839551735001575e-05, "loss": 0.2492, "step": 41635 }, { "epoch": 3.3729747245625403, "grad_norm": 0.08272644132375717, "learning_rate": 3.483505108240695e-05, "loss": 0.231, "step": 41636 }, { "epoch": 3.373055735580039, "grad_norm": 0.05650690197944641, "learning_rate": 3.483055042981233e-05, "loss": 0.2144, "step": 41637 }, { "epoch": 3.3731367465975373, "grad_norm": 0.09380374103784561, "learning_rate": 3.48260497772177e-05, "loss": 0.2343, "step": 41638 }, { "epoch": 3.3732177576150355, "grad_norm": 0.0768943652510643, "learning_rate": 3.482154912462307e-05, "loss": 0.2448, "step": 41639 }, { "epoch": 3.373298768632534, "grad_norm": 0.0706968829035759, "learning_rate": 3.481704847202845e-05, "loss": 0.2335, "step": 41640 }, { "epoch": 3.3733797796500324, "grad_norm": 0.07146643102169037, "learning_rate": 3.481254781943382e-05, "loss": 0.2399, "step": 41641 }, { "epoch": 3.3734607906675307, "grad_norm": 0.07195454090833664, "learning_rate": 3.480804716683919e-05, "loss": 0.2083, "step": 41642 }, { "epoch": 3.373541801685029, "grad_norm": 0.07778682559728622, "learning_rate": 3.480354651424457e-05, "loss": 0.212, "step": 41643 }, { "epoch": 3.3736228127025276, "grad_norm": 0.07331526279449463, "learning_rate": 3.4799045861649944e-05, "loss": 0.2222, "step": 41644 }, { "epoch": 3.373703823720026, "grad_norm": 0.06694495677947998, "learning_rate": 3.479454520905531e-05, "loss": 0.2364, "step": 41645 }, { "epoch": 3.373784834737524, "grad_norm": 0.07718578726053238, "learning_rate": 3.479004455646069e-05, "loss": 0.2363, "step": 41646 }, { "epoch": 3.373865845755023, "grad_norm": 0.0727149173617363, "learning_rate": 3.4785543903866065e-05, "loss": 0.2431, "step": 41647 }, { "epoch": 3.373946856772521, "grad_norm": 0.07330166548490524, "learning_rate": 3.478104325127143e-05, "loss": 0.2347, "step": 41648 }, { "epoch": 3.3740278677900193, "grad_norm": 0.0676233097910881, "learning_rate": 3.477654259867681e-05, "loss": 0.2062, "step": 41649 }, { "epoch": 3.374108878807518, "grad_norm": 0.0689397007226944, "learning_rate": 3.4772041946082186e-05, "loss": 0.236, "step": 41650 }, { "epoch": 3.374189889825016, "grad_norm": 0.06201153248548508, "learning_rate": 3.476754129348756e-05, "loss": 0.2163, "step": 41651 }, { "epoch": 3.3742709008425145, "grad_norm": 0.064112089574337, "learning_rate": 3.476304064089293e-05, "loss": 0.2089, "step": 41652 }, { "epoch": 3.374351911860013, "grad_norm": 0.07973623275756836, "learning_rate": 3.4758539988298306e-05, "loss": 0.2264, "step": 41653 }, { "epoch": 3.3744329228775114, "grad_norm": 0.06909281015396118, "learning_rate": 3.475403933570368e-05, "loss": 0.1831, "step": 41654 }, { "epoch": 3.3745139338950096, "grad_norm": 0.06308528780937195, "learning_rate": 3.4749538683109054e-05, "loss": 0.1768, "step": 41655 }, { "epoch": 3.3745949449125083, "grad_norm": 0.08469226956367493, "learning_rate": 3.474503803051443e-05, "loss": 0.2607, "step": 41656 }, { "epoch": 3.3746759559300066, "grad_norm": 0.07713216543197632, "learning_rate": 3.47405373779198e-05, "loss": 0.2268, "step": 41657 }, { "epoch": 3.374756966947505, "grad_norm": 0.0848437175154686, "learning_rate": 3.4736036725325174e-05, "loss": 0.2055, "step": 41658 }, { "epoch": 3.374837977965003, "grad_norm": 0.0557897612452507, "learning_rate": 3.473153607273055e-05, "loss": 0.228, "step": 41659 }, { "epoch": 3.3749189889825018, "grad_norm": 0.07862678170204163, "learning_rate": 3.472703542013592e-05, "loss": 0.241, "step": 41660 }, { "epoch": 3.375, "grad_norm": 0.09499645978212357, "learning_rate": 3.4722534767541295e-05, "loss": 0.1893, "step": 41661 }, { "epoch": 3.3750810110174982, "grad_norm": 0.0774679183959961, "learning_rate": 3.471803411494667e-05, "loss": 0.23, "step": 41662 }, { "epoch": 3.375162022034997, "grad_norm": 0.07637955993413925, "learning_rate": 3.471353346235204e-05, "loss": 0.2347, "step": 41663 }, { "epoch": 3.375243033052495, "grad_norm": 0.0803007259964943, "learning_rate": 3.4709032809757416e-05, "loss": 0.2368, "step": 41664 }, { "epoch": 3.3753240440699934, "grad_norm": 0.0648980364203453, "learning_rate": 3.470453215716279e-05, "loss": 0.2305, "step": 41665 }, { "epoch": 3.3754050550874917, "grad_norm": 0.08036522567272186, "learning_rate": 3.470003150456816e-05, "loss": 0.2259, "step": 41666 }, { "epoch": 3.3754860661049904, "grad_norm": 0.06768336892127991, "learning_rate": 3.469553085197354e-05, "loss": 0.2213, "step": 41667 }, { "epoch": 3.3755670771224886, "grad_norm": 0.09597831964492798, "learning_rate": 3.469103019937891e-05, "loss": 0.2566, "step": 41668 }, { "epoch": 3.375648088139987, "grad_norm": 0.06598563492298126, "learning_rate": 3.4686529546784284e-05, "loss": 0.2495, "step": 41669 }, { "epoch": 3.3757290991574855, "grad_norm": 0.06711921095848083, "learning_rate": 3.468202889418966e-05, "loss": 0.2652, "step": 41670 }, { "epoch": 3.375810110174984, "grad_norm": 0.061890557408332825, "learning_rate": 3.467752824159503e-05, "loss": 0.2294, "step": 41671 }, { "epoch": 3.375891121192482, "grad_norm": 0.0685076043009758, "learning_rate": 3.4673027589000405e-05, "loss": 0.2186, "step": 41672 }, { "epoch": 3.3759721322099807, "grad_norm": 0.07758630812168121, "learning_rate": 3.4668526936405785e-05, "loss": 0.194, "step": 41673 }, { "epoch": 3.376053143227479, "grad_norm": 0.07140947878360748, "learning_rate": 3.466402628381115e-05, "loss": 0.2029, "step": 41674 }, { "epoch": 3.376134154244977, "grad_norm": 0.0828029066324234, "learning_rate": 3.4659525631216526e-05, "loss": 0.2559, "step": 41675 }, { "epoch": 3.376215165262476, "grad_norm": 0.07145032286643982, "learning_rate": 3.4655024978621906e-05, "loss": 0.2119, "step": 41676 }, { "epoch": 3.376296176279974, "grad_norm": 0.06601813435554504, "learning_rate": 3.465052432602728e-05, "loss": 0.2438, "step": 41677 }, { "epoch": 3.3763771872974724, "grad_norm": 0.059962522238492966, "learning_rate": 3.4646023673432646e-05, "loss": 0.2036, "step": 41678 }, { "epoch": 3.376458198314971, "grad_norm": 0.06766490638256073, "learning_rate": 3.464152302083803e-05, "loss": 0.2306, "step": 41679 }, { "epoch": 3.3765392093324693, "grad_norm": 0.07134569436311722, "learning_rate": 3.46370223682434e-05, "loss": 0.2413, "step": 41680 }, { "epoch": 3.3766202203499676, "grad_norm": 0.06784982234239578, "learning_rate": 3.463252171564877e-05, "loss": 0.2278, "step": 41681 }, { "epoch": 3.376701231367466, "grad_norm": 0.07382599264383316, "learning_rate": 3.462802106305415e-05, "loss": 0.2438, "step": 41682 }, { "epoch": 3.3767822423849645, "grad_norm": 0.09235008805990219, "learning_rate": 3.462352041045952e-05, "loss": 0.2344, "step": 41683 }, { "epoch": 3.3768632534024627, "grad_norm": 0.08357124775648117, "learning_rate": 3.461901975786489e-05, "loss": 0.2648, "step": 41684 }, { "epoch": 3.376944264419961, "grad_norm": 0.06024469807744026, "learning_rate": 3.461451910527027e-05, "loss": 0.227, "step": 41685 }, { "epoch": 3.3770252754374597, "grad_norm": 0.07447897642850876, "learning_rate": 3.461001845267564e-05, "loss": 0.2067, "step": 41686 }, { "epoch": 3.377106286454958, "grad_norm": 0.06601405888795853, "learning_rate": 3.460551780008101e-05, "loss": 0.2243, "step": 41687 }, { "epoch": 3.377187297472456, "grad_norm": 0.06824536621570587, "learning_rate": 3.460101714748639e-05, "loss": 0.2331, "step": 41688 }, { "epoch": 3.3772683084899544, "grad_norm": 0.06827297061681747, "learning_rate": 3.459651649489176e-05, "loss": 0.1986, "step": 41689 }, { "epoch": 3.377349319507453, "grad_norm": 0.08309777826070786, "learning_rate": 3.4592015842297136e-05, "loss": 0.2556, "step": 41690 }, { "epoch": 3.3774303305249513, "grad_norm": 0.08225461095571518, "learning_rate": 3.458751518970251e-05, "loss": 0.2468, "step": 41691 }, { "epoch": 3.3775113415424496, "grad_norm": 0.07561355829238892, "learning_rate": 3.4583014537107884e-05, "loss": 0.2368, "step": 41692 }, { "epoch": 3.3775923525599483, "grad_norm": 0.055802978575229645, "learning_rate": 3.457851388451326e-05, "loss": 0.239, "step": 41693 }, { "epoch": 3.3776733635774465, "grad_norm": 0.06992287933826447, "learning_rate": 3.457401323191863e-05, "loss": 0.2551, "step": 41694 }, { "epoch": 3.3777543745949448, "grad_norm": 0.0677470713853836, "learning_rate": 3.4569512579324004e-05, "loss": 0.2556, "step": 41695 }, { "epoch": 3.3778353856124435, "grad_norm": 0.07533083856105804, "learning_rate": 3.456501192672938e-05, "loss": 0.2256, "step": 41696 }, { "epoch": 3.3779163966299417, "grad_norm": 0.08108577132225037, "learning_rate": 3.456051127413475e-05, "loss": 0.2122, "step": 41697 }, { "epoch": 3.37799740764744, "grad_norm": 0.06752969324588776, "learning_rate": 3.4556010621540125e-05, "loss": 0.2487, "step": 41698 }, { "epoch": 3.3780784186649386, "grad_norm": 0.08408936858177185, "learning_rate": 3.45515099689455e-05, "loss": 0.225, "step": 41699 }, { "epoch": 3.378159429682437, "grad_norm": 0.060827113687992096, "learning_rate": 3.454700931635087e-05, "loss": 0.2148, "step": 41700 }, { "epoch": 3.378240440699935, "grad_norm": 0.07266361266374588, "learning_rate": 3.4542508663756246e-05, "loss": 0.2348, "step": 41701 }, { "epoch": 3.378321451717434, "grad_norm": 0.07331918925046921, "learning_rate": 3.453800801116162e-05, "loss": 0.2312, "step": 41702 }, { "epoch": 3.378402462734932, "grad_norm": 0.06699402630329132, "learning_rate": 3.453350735856699e-05, "loss": 0.2344, "step": 41703 }, { "epoch": 3.3784834737524303, "grad_norm": 0.07557890564203262, "learning_rate": 3.452900670597237e-05, "loss": 0.2351, "step": 41704 }, { "epoch": 3.3785644847699285, "grad_norm": 0.07931483536958694, "learning_rate": 3.452450605337774e-05, "loss": 0.2256, "step": 41705 }, { "epoch": 3.3786454957874272, "grad_norm": 0.06666845083236694, "learning_rate": 3.452000540078312e-05, "loss": 0.2349, "step": 41706 }, { "epoch": 3.3787265068049255, "grad_norm": 0.0748247355222702, "learning_rate": 3.451550474818849e-05, "loss": 0.2221, "step": 41707 }, { "epoch": 3.3788075178224237, "grad_norm": 0.08108245581388474, "learning_rate": 3.451100409559386e-05, "loss": 0.2018, "step": 41708 }, { "epoch": 3.3788885288399224, "grad_norm": 0.058831606060266495, "learning_rate": 3.450650344299924e-05, "loss": 0.2051, "step": 41709 }, { "epoch": 3.3789695398574207, "grad_norm": 0.0735163614153862, "learning_rate": 3.450200279040461e-05, "loss": 0.2383, "step": 41710 }, { "epoch": 3.379050550874919, "grad_norm": 0.0725238099694252, "learning_rate": 3.449750213780998e-05, "loss": 0.1845, "step": 41711 }, { "epoch": 3.379131561892417, "grad_norm": 0.07492050528526306, "learning_rate": 3.449300148521536e-05, "loss": 0.2192, "step": 41712 }, { "epoch": 3.379212572909916, "grad_norm": 0.06349498778581619, "learning_rate": 3.448850083262073e-05, "loss": 0.2394, "step": 41713 }, { "epoch": 3.379293583927414, "grad_norm": 0.07114043831825256, "learning_rate": 3.44840001800261e-05, "loss": 0.2025, "step": 41714 }, { "epoch": 3.3793745949449123, "grad_norm": 0.05922449752688408, "learning_rate": 3.447949952743148e-05, "loss": 0.2159, "step": 41715 }, { "epoch": 3.379455605962411, "grad_norm": 0.07918506860733032, "learning_rate": 3.447499887483686e-05, "loss": 0.2496, "step": 41716 }, { "epoch": 3.3795366169799093, "grad_norm": 0.07430548965930939, "learning_rate": 3.4470498222242224e-05, "loss": 0.2284, "step": 41717 }, { "epoch": 3.3796176279974075, "grad_norm": 0.05439443141222, "learning_rate": 3.4465997569647604e-05, "loss": 0.2187, "step": 41718 }, { "epoch": 3.379698639014906, "grad_norm": 0.05918572098016739, "learning_rate": 3.446149691705298e-05, "loss": 0.1868, "step": 41719 }, { "epoch": 3.3797796500324044, "grad_norm": 0.06798598915338516, "learning_rate": 3.4456996264458345e-05, "loss": 0.1913, "step": 41720 }, { "epoch": 3.3798606610499027, "grad_norm": 0.07275962084531784, "learning_rate": 3.4452495611863725e-05, "loss": 0.2509, "step": 41721 }, { "epoch": 3.3799416720674014, "grad_norm": 0.08497357368469238, "learning_rate": 3.44479949592691e-05, "loss": 0.1995, "step": 41722 }, { "epoch": 3.3800226830848996, "grad_norm": 0.04967997223138809, "learning_rate": 3.4443494306674465e-05, "loss": 0.2066, "step": 41723 }, { "epoch": 3.380103694102398, "grad_norm": 0.07163437455892563, "learning_rate": 3.4438993654079846e-05, "loss": 0.22, "step": 41724 }, { "epoch": 3.3801847051198965, "grad_norm": 0.06914282590150833, "learning_rate": 3.443449300148522e-05, "loss": 0.2032, "step": 41725 }, { "epoch": 3.380265716137395, "grad_norm": 0.09515602886676788, "learning_rate": 3.4429992348890586e-05, "loss": 0.2449, "step": 41726 }, { "epoch": 3.380346727154893, "grad_norm": 0.06940874457359314, "learning_rate": 3.4425491696295967e-05, "loss": 0.2025, "step": 41727 }, { "epoch": 3.3804277381723913, "grad_norm": 0.08153418451547623, "learning_rate": 3.442099104370134e-05, "loss": 0.2202, "step": 41728 }, { "epoch": 3.38050874918989, "grad_norm": 0.06321097910404205, "learning_rate": 3.441649039110671e-05, "loss": 0.2001, "step": 41729 }, { "epoch": 3.380589760207388, "grad_norm": 0.07211658358573914, "learning_rate": 3.441198973851209e-05, "loss": 0.2133, "step": 41730 }, { "epoch": 3.3806707712248865, "grad_norm": 0.07158449292182922, "learning_rate": 3.440748908591746e-05, "loss": 0.2732, "step": 41731 }, { "epoch": 3.380751782242385, "grad_norm": 0.06440142542123795, "learning_rate": 3.4402988433322835e-05, "loss": 0.2198, "step": 41732 }, { "epoch": 3.3808327932598834, "grad_norm": 0.06171039864420891, "learning_rate": 3.439848778072821e-05, "loss": 0.2142, "step": 41733 }, { "epoch": 3.3809138042773816, "grad_norm": 0.06435028463602066, "learning_rate": 3.439398712813358e-05, "loss": 0.1972, "step": 41734 }, { "epoch": 3.38099481529488, "grad_norm": 0.058364566415548325, "learning_rate": 3.4389486475538955e-05, "loss": 0.2247, "step": 41735 }, { "epoch": 3.3810758263123786, "grad_norm": 0.06774954497814178, "learning_rate": 3.438498582294433e-05, "loss": 0.2277, "step": 41736 }, { "epoch": 3.381156837329877, "grad_norm": 0.06185242161154747, "learning_rate": 3.43804851703497e-05, "loss": 0.1978, "step": 41737 }, { "epoch": 3.381237848347375, "grad_norm": 0.07757459580898285, "learning_rate": 3.4375984517755076e-05, "loss": 0.2111, "step": 41738 }, { "epoch": 3.3813188593648738, "grad_norm": 0.07607049494981766, "learning_rate": 3.437148386516045e-05, "loss": 0.227, "step": 41739 }, { "epoch": 3.381399870382372, "grad_norm": 0.07231981307268143, "learning_rate": 3.436698321256582e-05, "loss": 0.2336, "step": 41740 }, { "epoch": 3.3814808813998702, "grad_norm": 0.06916042417287827, "learning_rate": 3.43624825599712e-05, "loss": 0.2382, "step": 41741 }, { "epoch": 3.381561892417369, "grad_norm": 0.08472224324941635, "learning_rate": 3.435798190737657e-05, "loss": 0.2729, "step": 41742 }, { "epoch": 3.381642903434867, "grad_norm": 0.06384436786174774, "learning_rate": 3.4353481254781944e-05, "loss": 0.1799, "step": 41743 }, { "epoch": 3.3817239144523654, "grad_norm": 0.07143661379814148, "learning_rate": 3.434898060218732e-05, "loss": 0.2198, "step": 41744 }, { "epoch": 3.381804925469864, "grad_norm": 0.0806695744395256, "learning_rate": 3.43444799495927e-05, "loss": 0.243, "step": 41745 }, { "epoch": 3.3818859364873624, "grad_norm": 0.07502878457307816, "learning_rate": 3.4339979296998065e-05, "loss": 0.2412, "step": 41746 }, { "epoch": 3.3819669475048606, "grad_norm": 0.07038351148366928, "learning_rate": 3.433547864440344e-05, "loss": 0.2576, "step": 41747 }, { "epoch": 3.3820479585223593, "grad_norm": 0.0637182965874672, "learning_rate": 3.433097799180882e-05, "loss": 0.2341, "step": 41748 }, { "epoch": 3.3821289695398575, "grad_norm": 0.06181885302066803, "learning_rate": 3.4326477339214186e-05, "loss": 0.2047, "step": 41749 }, { "epoch": 3.3822099805573558, "grad_norm": 0.06934962421655655, "learning_rate": 3.432197668661956e-05, "loss": 0.1977, "step": 41750 }, { "epoch": 3.382290991574854, "grad_norm": 0.05986544117331505, "learning_rate": 3.431747603402494e-05, "loss": 0.2148, "step": 41751 }, { "epoch": 3.3823720025923527, "grad_norm": 0.06989867985248566, "learning_rate": 3.431297538143031e-05, "loss": 0.2379, "step": 41752 }, { "epoch": 3.382453013609851, "grad_norm": 0.07669881731271744, "learning_rate": 3.430847472883568e-05, "loss": 0.2418, "step": 41753 }, { "epoch": 3.382534024627349, "grad_norm": 0.06954249739646912, "learning_rate": 3.430397407624106e-05, "loss": 0.2336, "step": 41754 }, { "epoch": 3.382615035644848, "grad_norm": 0.0775119811296463, "learning_rate": 3.429947342364643e-05, "loss": 0.191, "step": 41755 }, { "epoch": 3.382696046662346, "grad_norm": 0.0706276074051857, "learning_rate": 3.42949727710518e-05, "loss": 0.2577, "step": 41756 }, { "epoch": 3.3827770576798444, "grad_norm": 0.08283329755067825, "learning_rate": 3.429047211845718e-05, "loss": 0.235, "step": 41757 }, { "epoch": 3.3828580686973426, "grad_norm": 0.07096508890390396, "learning_rate": 3.4285971465862555e-05, "loss": 0.2305, "step": 41758 }, { "epoch": 3.3829390797148413, "grad_norm": 0.07957513630390167, "learning_rate": 3.428147081326792e-05, "loss": 0.21, "step": 41759 }, { "epoch": 3.3830200907323396, "grad_norm": 0.07610480487346649, "learning_rate": 3.42769701606733e-05, "loss": 0.2155, "step": 41760 }, { "epoch": 3.383101101749838, "grad_norm": 0.07064136117696762, "learning_rate": 3.4272469508078676e-05, "loss": 0.2277, "step": 41761 }, { "epoch": 3.3831821127673365, "grad_norm": 0.10831783711910248, "learning_rate": 3.426796885548404e-05, "loss": 0.2739, "step": 41762 }, { "epoch": 3.3832631237848347, "grad_norm": 0.0787920281291008, "learning_rate": 3.426346820288942e-05, "loss": 0.2118, "step": 41763 }, { "epoch": 3.383344134802333, "grad_norm": 0.06703635305166245, "learning_rate": 3.42589675502948e-05, "loss": 0.2023, "step": 41764 }, { "epoch": 3.3834251458198317, "grad_norm": 0.06550689041614532, "learning_rate": 3.4254466897700163e-05, "loss": 0.2192, "step": 41765 }, { "epoch": 3.38350615683733, "grad_norm": 0.08957525342702866, "learning_rate": 3.4249966245105544e-05, "loss": 0.2328, "step": 41766 }, { "epoch": 3.383587167854828, "grad_norm": 0.07353270053863525, "learning_rate": 3.424546559251092e-05, "loss": 0.2321, "step": 41767 }, { "epoch": 3.383668178872327, "grad_norm": 0.06502358615398407, "learning_rate": 3.4240964939916284e-05, "loss": 0.2528, "step": 41768 }, { "epoch": 3.383749189889825, "grad_norm": 0.061305753886699677, "learning_rate": 3.4236464287321665e-05, "loss": 0.1764, "step": 41769 }, { "epoch": 3.3838302009073233, "grad_norm": 0.07350903749465942, "learning_rate": 3.423196363472704e-05, "loss": 0.2103, "step": 41770 }, { "epoch": 3.3839112119248216, "grad_norm": 0.07233782857656479, "learning_rate": 3.422746298213241e-05, "loss": 0.2348, "step": 41771 }, { "epoch": 3.3839922229423203, "grad_norm": 0.06870030611753464, "learning_rate": 3.4222962329537785e-05, "loss": 0.2086, "step": 41772 }, { "epoch": 3.3840732339598185, "grad_norm": 0.06497982144355774, "learning_rate": 3.421846167694316e-05, "loss": 0.1936, "step": 41773 }, { "epoch": 3.3841542449773168, "grad_norm": 0.06733326613903046, "learning_rate": 3.421396102434853e-05, "loss": 0.1935, "step": 41774 }, { "epoch": 3.3842352559948155, "grad_norm": 0.07830017805099487, "learning_rate": 3.4209460371753906e-05, "loss": 0.1875, "step": 41775 }, { "epoch": 3.3843162670123137, "grad_norm": 0.06126867234706879, "learning_rate": 3.420495971915928e-05, "loss": 0.2175, "step": 41776 }, { "epoch": 3.384397278029812, "grad_norm": 0.06090138480067253, "learning_rate": 3.4200459066564653e-05, "loss": 0.2057, "step": 41777 }, { "epoch": 3.3844782890473106, "grad_norm": 0.07315081357955933, "learning_rate": 3.419595841397003e-05, "loss": 0.22, "step": 41778 }, { "epoch": 3.384559300064809, "grad_norm": 0.07009439915418625, "learning_rate": 3.41914577613754e-05, "loss": 0.1932, "step": 41779 }, { "epoch": 3.384640311082307, "grad_norm": 0.0699535682797432, "learning_rate": 3.4186957108780774e-05, "loss": 0.2874, "step": 41780 }, { "epoch": 3.3847213220998054, "grad_norm": 0.08572935312986374, "learning_rate": 3.418245645618615e-05, "loss": 0.233, "step": 41781 }, { "epoch": 3.384802333117304, "grad_norm": 0.06120715290307999, "learning_rate": 3.417795580359152e-05, "loss": 0.1902, "step": 41782 }, { "epoch": 3.3848833441348023, "grad_norm": 0.07531646639108658, "learning_rate": 3.4173455150996895e-05, "loss": 0.2209, "step": 41783 }, { "epoch": 3.3849643551523005, "grad_norm": 0.08339875936508179, "learning_rate": 3.4168954498402275e-05, "loss": 0.2598, "step": 41784 }, { "epoch": 3.3850453661697992, "grad_norm": 0.0988910049200058, "learning_rate": 3.416445384580764e-05, "loss": 0.2628, "step": 41785 }, { "epoch": 3.3851263771872975, "grad_norm": 0.08542412519454956, "learning_rate": 3.4159953193213016e-05, "loss": 0.2276, "step": 41786 }, { "epoch": 3.3852073882047957, "grad_norm": 0.0794190838932991, "learning_rate": 3.4155452540618396e-05, "loss": 0.251, "step": 41787 }, { "epoch": 3.3852883992222944, "grad_norm": 0.05835704505443573, "learning_rate": 3.415095188802376e-05, "loss": 0.1812, "step": 41788 }, { "epoch": 3.3853694102397927, "grad_norm": 0.08290042728185654, "learning_rate": 3.414645123542914e-05, "loss": 0.2212, "step": 41789 }, { "epoch": 3.385450421257291, "grad_norm": 0.07653053849935532, "learning_rate": 3.414195058283452e-05, "loss": 0.2371, "step": 41790 }, { "epoch": 3.3855314322747896, "grad_norm": 0.07488921284675598, "learning_rate": 3.4137449930239884e-05, "loss": 0.2309, "step": 41791 }, { "epoch": 3.385612443292288, "grad_norm": 0.06250148266553879, "learning_rate": 3.413294927764526e-05, "loss": 0.1861, "step": 41792 }, { "epoch": 3.385693454309786, "grad_norm": 0.07396798580884933, "learning_rate": 3.412844862505064e-05, "loss": 0.2872, "step": 41793 }, { "epoch": 3.3857744653272843, "grad_norm": 0.09425674378871918, "learning_rate": 3.4123947972456005e-05, "loss": 0.2424, "step": 41794 }, { "epoch": 3.385855476344783, "grad_norm": 0.07500012218952179, "learning_rate": 3.411944731986138e-05, "loss": 0.2288, "step": 41795 }, { "epoch": 3.3859364873622813, "grad_norm": 0.060686469078063965, "learning_rate": 3.411494666726676e-05, "loss": 0.2192, "step": 41796 }, { "epoch": 3.3860174983797795, "grad_norm": 0.06987955421209335, "learning_rate": 3.411044601467213e-05, "loss": 0.2451, "step": 41797 }, { "epoch": 3.386098509397278, "grad_norm": 0.056687429547309875, "learning_rate": 3.41059453620775e-05, "loss": 0.2562, "step": 41798 }, { "epoch": 3.3861795204147764, "grad_norm": 0.07379105687141418, "learning_rate": 3.410144470948288e-05, "loss": 0.2316, "step": 41799 }, { "epoch": 3.3862605314322747, "grad_norm": 0.0667329877614975, "learning_rate": 3.409694405688825e-05, "loss": 0.2222, "step": 41800 }, { "epoch": 3.386341542449773, "grad_norm": 0.07343947142362595, "learning_rate": 3.409244340429362e-05, "loss": 0.2404, "step": 41801 }, { "epoch": 3.3864225534672716, "grad_norm": 0.07047673314809799, "learning_rate": 3.4087942751699e-05, "loss": 0.1889, "step": 41802 }, { "epoch": 3.38650356448477, "grad_norm": 0.07641629129648209, "learning_rate": 3.4083442099104374e-05, "loss": 0.2693, "step": 41803 }, { "epoch": 3.386584575502268, "grad_norm": 0.0820465087890625, "learning_rate": 3.407894144650974e-05, "loss": 0.2291, "step": 41804 }, { "epoch": 3.386665586519767, "grad_norm": 0.062225062400102615, "learning_rate": 3.407444079391512e-05, "loss": 0.2055, "step": 41805 }, { "epoch": 3.386746597537265, "grad_norm": 0.06488920748233795, "learning_rate": 3.4069940141320495e-05, "loss": 0.2022, "step": 41806 }, { "epoch": 3.3868276085547633, "grad_norm": 0.08183945715427399, "learning_rate": 3.406543948872586e-05, "loss": 0.2388, "step": 41807 }, { "epoch": 3.386908619572262, "grad_norm": 0.08639533072710037, "learning_rate": 3.406093883613124e-05, "loss": 0.2438, "step": 41808 }, { "epoch": 3.38698963058976, "grad_norm": 0.07558859139680862, "learning_rate": 3.4056438183536616e-05, "loss": 0.2301, "step": 41809 }, { "epoch": 3.3870706416072585, "grad_norm": 0.07002075016498566, "learning_rate": 3.405193753094199e-05, "loss": 0.2043, "step": 41810 }, { "epoch": 3.387151652624757, "grad_norm": 0.06354018300771713, "learning_rate": 3.404743687834736e-05, "loss": 0.1763, "step": 41811 }, { "epoch": 3.3872326636422554, "grad_norm": 0.0757867842912674, "learning_rate": 3.4042936225752736e-05, "loss": 0.2062, "step": 41812 }, { "epoch": 3.3873136746597536, "grad_norm": 0.07359560579061508, "learning_rate": 3.403843557315811e-05, "loss": 0.2227, "step": 41813 }, { "epoch": 3.3873946856772523, "grad_norm": 0.08458617329597473, "learning_rate": 3.4033934920563484e-05, "loss": 0.3023, "step": 41814 }, { "epoch": 3.3874756966947506, "grad_norm": 0.07034522294998169, "learning_rate": 3.402943426796886e-05, "loss": 0.2235, "step": 41815 }, { "epoch": 3.387556707712249, "grad_norm": 0.07200920581817627, "learning_rate": 3.402493361537423e-05, "loss": 0.2339, "step": 41816 }, { "epoch": 3.387637718729747, "grad_norm": 0.06833247095346451, "learning_rate": 3.4020432962779604e-05, "loss": 0.2089, "step": 41817 }, { "epoch": 3.3877187297472457, "grad_norm": 0.08015815168619156, "learning_rate": 3.401593231018498e-05, "loss": 0.219, "step": 41818 }, { "epoch": 3.387799740764744, "grad_norm": 0.059349171817302704, "learning_rate": 3.401143165759035e-05, "loss": 0.2029, "step": 41819 }, { "epoch": 3.3878807517822422, "grad_norm": 0.07790016382932663, "learning_rate": 3.4006931004995725e-05, "loss": 0.2399, "step": 41820 }, { "epoch": 3.387961762799741, "grad_norm": 0.057797957211732864, "learning_rate": 3.40024303524011e-05, "loss": 0.2152, "step": 41821 }, { "epoch": 3.388042773817239, "grad_norm": 0.07001172006130219, "learning_rate": 3.399792969980647e-05, "loss": 0.2168, "step": 41822 }, { "epoch": 3.3881237848347374, "grad_norm": 0.06563981622457504, "learning_rate": 3.399342904721185e-05, "loss": 0.2015, "step": 41823 }, { "epoch": 3.3882047958522357, "grad_norm": 0.0651637613773346, "learning_rate": 3.398892839461722e-05, "loss": 0.2154, "step": 41824 }, { "epoch": 3.3882858068697344, "grad_norm": 0.08092446625232697, "learning_rate": 3.398442774202259e-05, "loss": 0.2017, "step": 41825 }, { "epoch": 3.3883668178872326, "grad_norm": 0.07722778618335724, "learning_rate": 3.3979927089427974e-05, "loss": 0.212, "step": 41826 }, { "epoch": 3.388447828904731, "grad_norm": 0.06523314863443375, "learning_rate": 3.397542643683334e-05, "loss": 0.2102, "step": 41827 }, { "epoch": 3.3885288399222295, "grad_norm": 0.0650385171175003, "learning_rate": 3.3970925784238714e-05, "loss": 0.2051, "step": 41828 }, { "epoch": 3.3886098509397278, "grad_norm": 0.06373479217290878, "learning_rate": 3.3966425131644094e-05, "loss": 0.2232, "step": 41829 }, { "epoch": 3.388690861957226, "grad_norm": 0.071293905377388, "learning_rate": 3.396192447904946e-05, "loss": 0.2273, "step": 41830 }, { "epoch": 3.3887718729747247, "grad_norm": 0.0766294002532959, "learning_rate": 3.3957423826454835e-05, "loss": 0.2549, "step": 41831 }, { "epoch": 3.388852883992223, "grad_norm": 0.06483791768550873, "learning_rate": 3.3952923173860215e-05, "loss": 0.2059, "step": 41832 }, { "epoch": 3.388933895009721, "grad_norm": 0.07027419656515121, "learning_rate": 3.394842252126558e-05, "loss": 0.2068, "step": 41833 }, { "epoch": 3.38901490602722, "grad_norm": 0.058067139238119125, "learning_rate": 3.3943921868670956e-05, "loss": 0.198, "step": 41834 }, { "epoch": 3.389095917044718, "grad_norm": 0.06929459422826767, "learning_rate": 3.3939421216076336e-05, "loss": 0.2262, "step": 41835 }, { "epoch": 3.3891769280622164, "grad_norm": 0.07518819719552994, "learning_rate": 3.393492056348171e-05, "loss": 0.2761, "step": 41836 }, { "epoch": 3.389257939079715, "grad_norm": 0.0849183201789856, "learning_rate": 3.3930419910887076e-05, "loss": 0.2318, "step": 41837 }, { "epoch": 3.3893389500972133, "grad_norm": 0.07857996225357056, "learning_rate": 3.392591925829246e-05, "loss": 0.211, "step": 41838 }, { "epoch": 3.3894199611147116, "grad_norm": 0.06797852367162704, "learning_rate": 3.392141860569783e-05, "loss": 0.2219, "step": 41839 }, { "epoch": 3.38950097213221, "grad_norm": 0.05962246283888817, "learning_rate": 3.39169179531032e-05, "loss": 0.1953, "step": 41840 }, { "epoch": 3.3895819831497085, "grad_norm": 0.07180878520011902, "learning_rate": 3.391241730050858e-05, "loss": 0.2332, "step": 41841 }, { "epoch": 3.3896629941672067, "grad_norm": 0.07170886546373367, "learning_rate": 3.390791664791395e-05, "loss": 0.2289, "step": 41842 }, { "epoch": 3.389744005184705, "grad_norm": 0.059607286006212234, "learning_rate": 3.390341599531932e-05, "loss": 0.2315, "step": 41843 }, { "epoch": 3.3898250162022037, "grad_norm": 0.08054961264133453, "learning_rate": 3.38989153427247e-05, "loss": 0.2367, "step": 41844 }, { "epoch": 3.389906027219702, "grad_norm": 0.05538221821188927, "learning_rate": 3.389441469013007e-05, "loss": 0.1927, "step": 41845 }, { "epoch": 3.3899870382372, "grad_norm": 0.0739208310842514, "learning_rate": 3.388991403753544e-05, "loss": 0.2381, "step": 41846 }, { "epoch": 3.3900680492546984, "grad_norm": 0.070955790579319, "learning_rate": 3.388541338494082e-05, "loss": 0.2299, "step": 41847 }, { "epoch": 3.390149060272197, "grad_norm": 0.09719790518283844, "learning_rate": 3.388091273234619e-05, "loss": 0.2684, "step": 41848 }, { "epoch": 3.3902300712896953, "grad_norm": 0.08513044565916061, "learning_rate": 3.3876412079751566e-05, "loss": 0.2464, "step": 41849 }, { "epoch": 3.3903110823071936, "grad_norm": 0.07656481117010117, "learning_rate": 3.387191142715694e-05, "loss": 0.2528, "step": 41850 }, { "epoch": 3.3903920933246923, "grad_norm": 0.07512432336807251, "learning_rate": 3.3867410774562314e-05, "loss": 0.2255, "step": 41851 }, { "epoch": 3.3904731043421905, "grad_norm": 0.07700356096029282, "learning_rate": 3.386291012196769e-05, "loss": 0.2007, "step": 41852 }, { "epoch": 3.3905541153596888, "grad_norm": 0.07253925502300262, "learning_rate": 3.385840946937306e-05, "loss": 0.239, "step": 41853 }, { "epoch": 3.3906351263771874, "grad_norm": 0.06891423463821411, "learning_rate": 3.3853908816778434e-05, "loss": 0.2072, "step": 41854 }, { "epoch": 3.3907161373946857, "grad_norm": 0.0744556412100792, "learning_rate": 3.384940816418381e-05, "loss": 0.2432, "step": 41855 }, { "epoch": 3.390797148412184, "grad_norm": 0.07903169840574265, "learning_rate": 3.384490751158918e-05, "loss": 0.2118, "step": 41856 }, { "epoch": 3.3908781594296826, "grad_norm": 0.0659727156162262, "learning_rate": 3.3840406858994555e-05, "loss": 0.2289, "step": 41857 }, { "epoch": 3.390959170447181, "grad_norm": 0.06620805710554123, "learning_rate": 3.383590620639993e-05, "loss": 0.2178, "step": 41858 }, { "epoch": 3.391040181464679, "grad_norm": 0.0583527609705925, "learning_rate": 3.38314055538053e-05, "loss": 0.1915, "step": 41859 }, { "epoch": 3.391121192482178, "grad_norm": 0.06404381990432739, "learning_rate": 3.3826904901210676e-05, "loss": 0.2422, "step": 41860 }, { "epoch": 3.391202203499676, "grad_norm": 0.06776855140924454, "learning_rate": 3.382240424861605e-05, "loss": 0.2118, "step": 41861 }, { "epoch": 3.3912832145171743, "grad_norm": 0.07318595796823502, "learning_rate": 3.381790359602143e-05, "loss": 0.258, "step": 41862 }, { "epoch": 3.3913642255346725, "grad_norm": 0.06983473151922226, "learning_rate": 3.38134029434268e-05, "loss": 0.2543, "step": 41863 }, { "epoch": 3.3914452365521712, "grad_norm": 0.0680810734629631, "learning_rate": 3.380890229083217e-05, "loss": 0.2119, "step": 41864 }, { "epoch": 3.3915262475696695, "grad_norm": 0.08268336206674576, "learning_rate": 3.380440163823755e-05, "loss": 0.2675, "step": 41865 }, { "epoch": 3.3916072585871677, "grad_norm": 0.08607517182826996, "learning_rate": 3.379990098564292e-05, "loss": 0.2608, "step": 41866 }, { "epoch": 3.3916882696046664, "grad_norm": 0.07086408883333206, "learning_rate": 3.379540033304829e-05, "loss": 0.202, "step": 41867 }, { "epoch": 3.3917692806221647, "grad_norm": 0.08508818596601486, "learning_rate": 3.379089968045367e-05, "loss": 0.2167, "step": 41868 }, { "epoch": 3.391850291639663, "grad_norm": 0.07000960409641266, "learning_rate": 3.378639902785904e-05, "loss": 0.2311, "step": 41869 }, { "epoch": 3.391931302657161, "grad_norm": 0.06851160526275635, "learning_rate": 3.378189837526441e-05, "loss": 0.2337, "step": 41870 }, { "epoch": 3.39201231367466, "grad_norm": 0.07425341755151749, "learning_rate": 3.377739772266979e-05, "loss": 0.2318, "step": 41871 }, { "epoch": 3.392093324692158, "grad_norm": 0.08614206314086914, "learning_rate": 3.377289707007516e-05, "loss": 0.2477, "step": 41872 }, { "epoch": 3.3921743357096563, "grad_norm": 0.06568558514118195, "learning_rate": 3.376839641748053e-05, "loss": 0.2143, "step": 41873 }, { "epoch": 3.392255346727155, "grad_norm": 0.07687494158744812, "learning_rate": 3.376389576488591e-05, "loss": 0.2093, "step": 41874 }, { "epoch": 3.3923363577446533, "grad_norm": 0.08374509960412979, "learning_rate": 3.375939511229129e-05, "loss": 0.2257, "step": 41875 }, { "epoch": 3.3924173687621515, "grad_norm": 0.07319393008947372, "learning_rate": 3.3754894459696654e-05, "loss": 0.2314, "step": 41876 }, { "epoch": 3.39249837977965, "grad_norm": 0.07026918232440948, "learning_rate": 3.3750393807102034e-05, "loss": 0.2239, "step": 41877 }, { "epoch": 3.3925793907971484, "grad_norm": 0.07389682531356812, "learning_rate": 3.374589315450741e-05, "loss": 0.2606, "step": 41878 }, { "epoch": 3.3926604018146467, "grad_norm": 0.07238435745239258, "learning_rate": 3.3741392501912775e-05, "loss": 0.2271, "step": 41879 }, { "epoch": 3.3927414128321454, "grad_norm": 0.09210827946662903, "learning_rate": 3.3736891849318155e-05, "loss": 0.2411, "step": 41880 }, { "epoch": 3.3928224238496436, "grad_norm": 0.06820737570524216, "learning_rate": 3.373239119672353e-05, "loss": 0.2363, "step": 41881 }, { "epoch": 3.392903434867142, "grad_norm": 0.07121577113866806, "learning_rate": 3.3727890544128895e-05, "loss": 0.2266, "step": 41882 }, { "epoch": 3.3929844458846405, "grad_norm": 0.07507503777742386, "learning_rate": 3.3723389891534276e-05, "loss": 0.2798, "step": 41883 }, { "epoch": 3.393065456902139, "grad_norm": 0.07295151799917221, "learning_rate": 3.371888923893965e-05, "loss": 0.2719, "step": 41884 }, { "epoch": 3.393146467919637, "grad_norm": 0.06200207769870758, "learning_rate": 3.3714388586345016e-05, "loss": 0.2045, "step": 41885 }, { "epoch": 3.3932274789371353, "grad_norm": 0.07828261703252792, "learning_rate": 3.3709887933750397e-05, "loss": 0.2602, "step": 41886 }, { "epoch": 3.393308489954634, "grad_norm": 0.07141652703285217, "learning_rate": 3.370538728115577e-05, "loss": 0.2113, "step": 41887 }, { "epoch": 3.393389500972132, "grad_norm": 0.08014405518770218, "learning_rate": 3.370088662856114e-05, "loss": 0.23, "step": 41888 }, { "epoch": 3.3934705119896305, "grad_norm": 0.06970424205064774, "learning_rate": 3.369638597596652e-05, "loss": 0.2211, "step": 41889 }, { "epoch": 3.393551523007129, "grad_norm": 0.07382189482450485, "learning_rate": 3.369188532337189e-05, "loss": 0.244, "step": 41890 }, { "epoch": 3.3936325340246274, "grad_norm": 0.11215295642614365, "learning_rate": 3.3687384670777265e-05, "loss": 0.2367, "step": 41891 }, { "epoch": 3.3937135450421256, "grad_norm": 0.06525477766990662, "learning_rate": 3.368288401818264e-05, "loss": 0.2217, "step": 41892 }, { "epoch": 3.393794556059624, "grad_norm": 0.07368913292884827, "learning_rate": 3.367838336558801e-05, "loss": 0.2218, "step": 41893 }, { "epoch": 3.3938755670771226, "grad_norm": 0.06014920398592949, "learning_rate": 3.3673882712993385e-05, "loss": 0.2072, "step": 41894 }, { "epoch": 3.393956578094621, "grad_norm": 0.06717801094055176, "learning_rate": 3.366938206039876e-05, "loss": 0.2289, "step": 41895 }, { "epoch": 3.394037589112119, "grad_norm": 0.0629340410232544, "learning_rate": 3.366488140780413e-05, "loss": 0.2115, "step": 41896 }, { "epoch": 3.3941186001296177, "grad_norm": 0.07047463953495026, "learning_rate": 3.3660380755209506e-05, "loss": 0.2017, "step": 41897 }, { "epoch": 3.394199611147116, "grad_norm": 0.07417044043540955, "learning_rate": 3.365588010261488e-05, "loss": 0.2439, "step": 41898 }, { "epoch": 3.3942806221646142, "grad_norm": 0.0626869797706604, "learning_rate": 3.3651379450020253e-05, "loss": 0.2207, "step": 41899 }, { "epoch": 3.394361633182113, "grad_norm": 0.08169003576040268, "learning_rate": 3.364687879742563e-05, "loss": 0.2167, "step": 41900 }, { "epoch": 3.394442644199611, "grad_norm": 0.07657211273908615, "learning_rate": 3.3642378144831e-05, "loss": 0.2304, "step": 41901 }, { "epoch": 3.3945236552171094, "grad_norm": 0.07610435783863068, "learning_rate": 3.3637877492236374e-05, "loss": 0.2185, "step": 41902 }, { "epoch": 3.394604666234608, "grad_norm": 0.07222923636436462, "learning_rate": 3.363337683964175e-05, "loss": 0.2256, "step": 41903 }, { "epoch": 3.3946856772521063, "grad_norm": 0.061299409717321396, "learning_rate": 3.362887618704713e-05, "loss": 0.203, "step": 41904 }, { "epoch": 3.3947666882696046, "grad_norm": 0.053315408527851105, "learning_rate": 3.3624375534452495e-05, "loss": 0.1838, "step": 41905 }, { "epoch": 3.3948476992871033, "grad_norm": 0.06681319326162338, "learning_rate": 3.361987488185787e-05, "loss": 0.2455, "step": 41906 }, { "epoch": 3.3949287103046015, "grad_norm": 0.05498621612787247, "learning_rate": 3.361537422926325e-05, "loss": 0.1759, "step": 41907 }, { "epoch": 3.3950097213220998, "grad_norm": 0.06275831907987595, "learning_rate": 3.3610873576668616e-05, "loss": 0.2654, "step": 41908 }, { "epoch": 3.395090732339598, "grad_norm": 0.07026954740285873, "learning_rate": 3.360637292407399e-05, "loss": 0.2193, "step": 41909 }, { "epoch": 3.3951717433570967, "grad_norm": 0.0646926537156105, "learning_rate": 3.360187227147937e-05, "loss": 0.22, "step": 41910 }, { "epoch": 3.395252754374595, "grad_norm": 0.0680764839053154, "learning_rate": 3.359737161888474e-05, "loss": 0.2178, "step": 41911 }, { "epoch": 3.395333765392093, "grad_norm": 0.06526144593954086, "learning_rate": 3.359287096629011e-05, "loss": 0.2329, "step": 41912 }, { "epoch": 3.395414776409592, "grad_norm": 0.07439577579498291, "learning_rate": 3.358837031369549e-05, "loss": 0.2322, "step": 41913 }, { "epoch": 3.39549578742709, "grad_norm": 0.05572812259197235, "learning_rate": 3.358386966110086e-05, "loss": 0.2094, "step": 41914 }, { "epoch": 3.3955767984445884, "grad_norm": 0.07370764017105103, "learning_rate": 3.357936900850623e-05, "loss": 0.2234, "step": 41915 }, { "epoch": 3.3956578094620866, "grad_norm": 0.07162272930145264, "learning_rate": 3.357486835591161e-05, "loss": 0.2457, "step": 41916 }, { "epoch": 3.3957388204795853, "grad_norm": 0.06438523530960083, "learning_rate": 3.3570367703316985e-05, "loss": 0.2496, "step": 41917 }, { "epoch": 3.3958198314970836, "grad_norm": 0.06554609537124634, "learning_rate": 3.356586705072235e-05, "loss": 0.1977, "step": 41918 }, { "epoch": 3.395900842514582, "grad_norm": 0.06860709190368652, "learning_rate": 3.356136639812773e-05, "loss": 0.2366, "step": 41919 }, { "epoch": 3.3959818535320805, "grad_norm": 0.059882767498493195, "learning_rate": 3.3556865745533106e-05, "loss": 0.2326, "step": 41920 }, { "epoch": 3.3960628645495787, "grad_norm": 0.06205613911151886, "learning_rate": 3.355236509293847e-05, "loss": 0.2359, "step": 41921 }, { "epoch": 3.396143875567077, "grad_norm": 0.08020277321338654, "learning_rate": 3.354786444034385e-05, "loss": 0.2034, "step": 41922 }, { "epoch": 3.3962248865845757, "grad_norm": 0.07167024910449982, "learning_rate": 3.354336378774923e-05, "loss": 0.2411, "step": 41923 }, { "epoch": 3.396305897602074, "grad_norm": 0.06740758568048477, "learning_rate": 3.3538863135154593e-05, "loss": 0.2538, "step": 41924 }, { "epoch": 3.396386908619572, "grad_norm": 0.07286737859249115, "learning_rate": 3.3534362482559974e-05, "loss": 0.2211, "step": 41925 }, { "epoch": 3.396467919637071, "grad_norm": 0.09053151309490204, "learning_rate": 3.352986182996535e-05, "loss": 0.2463, "step": 41926 }, { "epoch": 3.396548930654569, "grad_norm": 0.07128996402025223, "learning_rate": 3.3525361177370714e-05, "loss": 0.2289, "step": 41927 }, { "epoch": 3.3966299416720673, "grad_norm": 0.07041050493717194, "learning_rate": 3.3520860524776095e-05, "loss": 0.2312, "step": 41928 }, { "epoch": 3.396710952689566, "grad_norm": 0.07474519312381744, "learning_rate": 3.351635987218147e-05, "loss": 0.256, "step": 41929 }, { "epoch": 3.3967919637070643, "grad_norm": 0.07732457667589188, "learning_rate": 3.351185921958684e-05, "loss": 0.2071, "step": 41930 }, { "epoch": 3.3968729747245625, "grad_norm": 0.06436406075954437, "learning_rate": 3.3507358566992215e-05, "loss": 0.2461, "step": 41931 }, { "epoch": 3.3969539857420608, "grad_norm": 0.07016737014055252, "learning_rate": 3.350285791439759e-05, "loss": 0.2261, "step": 41932 }, { "epoch": 3.3970349967595594, "grad_norm": 0.06618059426546097, "learning_rate": 3.349835726180296e-05, "loss": 0.1887, "step": 41933 }, { "epoch": 3.3971160077770577, "grad_norm": 0.07330776005983353, "learning_rate": 3.3493856609208336e-05, "loss": 0.2334, "step": 41934 }, { "epoch": 3.397197018794556, "grad_norm": 0.08016584813594818, "learning_rate": 3.348935595661371e-05, "loss": 0.1919, "step": 41935 }, { "epoch": 3.3972780298120546, "grad_norm": 0.07798706740140915, "learning_rate": 3.3484855304019084e-05, "loss": 0.1995, "step": 41936 }, { "epoch": 3.397359040829553, "grad_norm": 0.06129057705402374, "learning_rate": 3.348035465142446e-05, "loss": 0.1945, "step": 41937 }, { "epoch": 3.397440051847051, "grad_norm": 0.08152814209461212, "learning_rate": 3.347585399882983e-05, "loss": 0.2431, "step": 41938 }, { "epoch": 3.3975210628645494, "grad_norm": 0.06842713803052902, "learning_rate": 3.3471353346235204e-05, "loss": 0.2326, "step": 41939 }, { "epoch": 3.397602073882048, "grad_norm": 0.06839247792959213, "learning_rate": 3.346685269364058e-05, "loss": 0.2144, "step": 41940 }, { "epoch": 3.3976830848995463, "grad_norm": 0.06789159774780273, "learning_rate": 3.346235204104595e-05, "loss": 0.2215, "step": 41941 }, { "epoch": 3.3977640959170445, "grad_norm": 0.07566022872924805, "learning_rate": 3.3457851388451325e-05, "loss": 0.2042, "step": 41942 }, { "epoch": 3.3978451069345432, "grad_norm": 0.08345601707696915, "learning_rate": 3.3453350735856706e-05, "loss": 0.2257, "step": 41943 }, { "epoch": 3.3979261179520415, "grad_norm": 0.08608893305063248, "learning_rate": 3.344885008326207e-05, "loss": 0.2301, "step": 41944 }, { "epoch": 3.3980071289695397, "grad_norm": 0.066611148416996, "learning_rate": 3.3444349430667446e-05, "loss": 0.2341, "step": 41945 }, { "epoch": 3.3980881399870384, "grad_norm": 0.07658085972070694, "learning_rate": 3.3439848778072826e-05, "loss": 0.2327, "step": 41946 }, { "epoch": 3.3981691510045366, "grad_norm": 0.08586086332798004, "learning_rate": 3.343534812547819e-05, "loss": 0.2459, "step": 41947 }, { "epoch": 3.398250162022035, "grad_norm": 0.06390102207660675, "learning_rate": 3.343084747288357e-05, "loss": 0.2175, "step": 41948 }, { "epoch": 3.3983311730395336, "grad_norm": 0.061589255928993225, "learning_rate": 3.342634682028895e-05, "loss": 0.204, "step": 41949 }, { "epoch": 3.398412184057032, "grad_norm": 0.069948211312294, "learning_rate": 3.3421846167694314e-05, "loss": 0.2128, "step": 41950 }, { "epoch": 3.39849319507453, "grad_norm": 0.08519947528839111, "learning_rate": 3.341734551509969e-05, "loss": 0.2675, "step": 41951 }, { "epoch": 3.3985742060920288, "grad_norm": 0.057270586490631104, "learning_rate": 3.341284486250507e-05, "loss": 0.2281, "step": 41952 }, { "epoch": 3.398655217109527, "grad_norm": 0.0729655846953392, "learning_rate": 3.3408344209910435e-05, "loss": 0.2319, "step": 41953 }, { "epoch": 3.3987362281270252, "grad_norm": 0.0660521537065506, "learning_rate": 3.340384355731581e-05, "loss": 0.229, "step": 41954 }, { "epoch": 3.3988172391445235, "grad_norm": 0.0661383792757988, "learning_rate": 3.339934290472119e-05, "loss": 0.2346, "step": 41955 }, { "epoch": 3.398898250162022, "grad_norm": 0.08104848861694336, "learning_rate": 3.339484225212656e-05, "loss": 0.2119, "step": 41956 }, { "epoch": 3.3989792611795204, "grad_norm": 0.07406584918498993, "learning_rate": 3.339034159953193e-05, "loss": 0.2148, "step": 41957 }, { "epoch": 3.3990602721970187, "grad_norm": 0.07290060818195343, "learning_rate": 3.338584094693731e-05, "loss": 0.2254, "step": 41958 }, { "epoch": 3.3991412832145174, "grad_norm": 0.07462384551763535, "learning_rate": 3.338134029434268e-05, "loss": 0.2184, "step": 41959 }, { "epoch": 3.3992222942320156, "grad_norm": 0.08314083516597748, "learning_rate": 3.337683964174805e-05, "loss": 0.2084, "step": 41960 }, { "epoch": 3.399303305249514, "grad_norm": 0.08253400027751923, "learning_rate": 3.337233898915343e-05, "loss": 0.2203, "step": 41961 }, { "epoch": 3.399384316267012, "grad_norm": 0.07560184597969055, "learning_rate": 3.3367838336558804e-05, "loss": 0.2147, "step": 41962 }, { "epoch": 3.399465327284511, "grad_norm": 0.07779417186975479, "learning_rate": 3.336333768396418e-05, "loss": 0.2544, "step": 41963 }, { "epoch": 3.399546338302009, "grad_norm": 0.06740397214889526, "learning_rate": 3.335883703136955e-05, "loss": 0.2268, "step": 41964 }, { "epoch": 3.3996273493195073, "grad_norm": 0.07044196128845215, "learning_rate": 3.3354336378774925e-05, "loss": 0.2096, "step": 41965 }, { "epoch": 3.399708360337006, "grad_norm": 0.0865856185555458, "learning_rate": 3.33498357261803e-05, "loss": 0.21, "step": 41966 }, { "epoch": 3.399789371354504, "grad_norm": 0.05665763095021248, "learning_rate": 3.334533507358567e-05, "loss": 0.2054, "step": 41967 }, { "epoch": 3.3998703823720025, "grad_norm": 0.07386376708745956, "learning_rate": 3.3340834420991046e-05, "loss": 0.2552, "step": 41968 }, { "epoch": 3.399951393389501, "grad_norm": 0.06082529574632645, "learning_rate": 3.333633376839642e-05, "loss": 0.2202, "step": 41969 }, { "epoch": 3.4000324044069994, "grad_norm": 0.07679502665996552, "learning_rate": 3.333183311580179e-05, "loss": 0.2321, "step": 41970 }, { "epoch": 3.4001134154244976, "grad_norm": 0.07322760671377182, "learning_rate": 3.3327332463207166e-05, "loss": 0.2666, "step": 41971 }, { "epoch": 3.4001944264419963, "grad_norm": 0.06411641091108322, "learning_rate": 3.332283181061254e-05, "loss": 0.2117, "step": 41972 }, { "epoch": 3.4002754374594946, "grad_norm": 0.07164231687784195, "learning_rate": 3.3318331158017914e-05, "loss": 0.2492, "step": 41973 }, { "epoch": 3.400356448476993, "grad_norm": 0.06656774133443832, "learning_rate": 3.331383050542329e-05, "loss": 0.2498, "step": 41974 }, { "epoch": 3.4004374594944915, "grad_norm": 0.07032846659421921, "learning_rate": 3.330932985282866e-05, "loss": 0.2298, "step": 41975 }, { "epoch": 3.4005184705119897, "grad_norm": 0.07302508503198624, "learning_rate": 3.3304829200234034e-05, "loss": 0.2349, "step": 41976 }, { "epoch": 3.400599481529488, "grad_norm": 0.06317179650068283, "learning_rate": 3.330032854763941e-05, "loss": 0.2036, "step": 41977 }, { "epoch": 3.4006804925469862, "grad_norm": 0.08565467596054077, "learning_rate": 3.329582789504478e-05, "loss": 0.2776, "step": 41978 }, { "epoch": 3.400761503564485, "grad_norm": 0.06535403430461884, "learning_rate": 3.3291327242450155e-05, "loss": 0.1986, "step": 41979 }, { "epoch": 3.400842514581983, "grad_norm": 0.0668690949678421, "learning_rate": 3.328682658985553e-05, "loss": 0.1971, "step": 41980 }, { "epoch": 3.4009235255994814, "grad_norm": 0.07167673110961914, "learning_rate": 3.32823259372609e-05, "loss": 0.2251, "step": 41981 }, { "epoch": 3.40100453661698, "grad_norm": 0.07527213543653488, "learning_rate": 3.327782528466628e-05, "loss": 0.2217, "step": 41982 }, { "epoch": 3.4010855476344783, "grad_norm": 0.06892262399196625, "learning_rate": 3.327332463207165e-05, "loss": 0.2242, "step": 41983 }, { "epoch": 3.4011665586519766, "grad_norm": 0.0705183818936348, "learning_rate": 3.326882397947702e-05, "loss": 0.2061, "step": 41984 }, { "epoch": 3.401247569669475, "grad_norm": 0.0694856271147728, "learning_rate": 3.3264323326882404e-05, "loss": 0.2324, "step": 41985 }, { "epoch": 3.4013285806869735, "grad_norm": 0.06926519423723221, "learning_rate": 3.325982267428777e-05, "loss": 0.2091, "step": 41986 }, { "epoch": 3.4014095917044718, "grad_norm": 0.0646764412522316, "learning_rate": 3.3255322021693144e-05, "loss": 0.2188, "step": 41987 }, { "epoch": 3.40149060272197, "grad_norm": 0.09222811460494995, "learning_rate": 3.3250821369098524e-05, "loss": 0.2191, "step": 41988 }, { "epoch": 3.4015716137394687, "grad_norm": 0.06459737569093704, "learning_rate": 3.324632071650389e-05, "loss": 0.2464, "step": 41989 }, { "epoch": 3.401652624756967, "grad_norm": 0.07572778314352036, "learning_rate": 3.3241820063909265e-05, "loss": 0.2692, "step": 41990 }, { "epoch": 3.401733635774465, "grad_norm": 0.0832890048623085, "learning_rate": 3.3237319411314645e-05, "loss": 0.2134, "step": 41991 }, { "epoch": 3.401814646791964, "grad_norm": 0.07352113723754883, "learning_rate": 3.323281875872001e-05, "loss": 0.2078, "step": 41992 }, { "epoch": 3.401895657809462, "grad_norm": 0.08216522634029388, "learning_rate": 3.3228318106125386e-05, "loss": 0.2582, "step": 41993 }, { "epoch": 3.4019766688269604, "grad_norm": 0.07835206389427185, "learning_rate": 3.3223817453530766e-05, "loss": 0.2371, "step": 41994 }, { "epoch": 3.402057679844459, "grad_norm": 0.06377358734607697, "learning_rate": 3.321931680093614e-05, "loss": 0.2443, "step": 41995 }, { "epoch": 3.4021386908619573, "grad_norm": 0.0716206505894661, "learning_rate": 3.321481614834151e-05, "loss": 0.2213, "step": 41996 }, { "epoch": 3.4022197018794555, "grad_norm": 0.0773889496922493, "learning_rate": 3.321031549574689e-05, "loss": 0.2053, "step": 41997 }, { "epoch": 3.402300712896954, "grad_norm": 0.05962640419602394, "learning_rate": 3.320581484315226e-05, "loss": 0.2136, "step": 41998 }, { "epoch": 3.4023817239144525, "grad_norm": 0.07761330157518387, "learning_rate": 3.3201314190557634e-05, "loss": 0.2927, "step": 41999 }, { "epoch": 3.4024627349319507, "grad_norm": 0.06332354247570038, "learning_rate": 3.319681353796301e-05, "loss": 0.2187, "step": 42000 }, { "epoch": 3.402543745949449, "grad_norm": 0.06515365839004517, "learning_rate": 3.319231288536838e-05, "loss": 0.2044, "step": 42001 }, { "epoch": 3.4026247569669477, "grad_norm": 0.07421457767486572, "learning_rate": 3.3187812232773755e-05, "loss": 0.2322, "step": 42002 }, { "epoch": 3.402705767984446, "grad_norm": 0.06750653684139252, "learning_rate": 3.318331158017913e-05, "loss": 0.2325, "step": 42003 }, { "epoch": 3.402786779001944, "grad_norm": 0.0802840143442154, "learning_rate": 3.31788109275845e-05, "loss": 0.2274, "step": 42004 }, { "epoch": 3.4028677900194424, "grad_norm": 0.054571714252233505, "learning_rate": 3.3174310274989876e-05, "loss": 0.2, "step": 42005 }, { "epoch": 3.402948801036941, "grad_norm": 0.05173730105161667, "learning_rate": 3.316980962239525e-05, "loss": 0.2173, "step": 42006 }, { "epoch": 3.4030298120544393, "grad_norm": 0.07291796058416367, "learning_rate": 3.316530896980062e-05, "loss": 0.2111, "step": 42007 }, { "epoch": 3.4031108230719376, "grad_norm": 0.07727206498384476, "learning_rate": 3.3160808317205997e-05, "loss": 0.218, "step": 42008 }, { "epoch": 3.4031918340894363, "grad_norm": 0.07800658047199249, "learning_rate": 3.315630766461137e-05, "loss": 0.2317, "step": 42009 }, { "epoch": 3.4032728451069345, "grad_norm": 0.07881782948970795, "learning_rate": 3.3151807012016744e-05, "loss": 0.2173, "step": 42010 }, { "epoch": 3.4033538561244328, "grad_norm": 0.06382521986961365, "learning_rate": 3.314730635942212e-05, "loss": 0.2202, "step": 42011 }, { "epoch": 3.4034348671419314, "grad_norm": 0.05812549963593483, "learning_rate": 3.314280570682749e-05, "loss": 0.2114, "step": 42012 }, { "epoch": 3.4035158781594297, "grad_norm": 0.08771507441997528, "learning_rate": 3.3138305054232865e-05, "loss": 0.2637, "step": 42013 }, { "epoch": 3.403596889176928, "grad_norm": 0.07088574022054672, "learning_rate": 3.313380440163824e-05, "loss": 0.2169, "step": 42014 }, { "epoch": 3.4036779001944266, "grad_norm": 0.07294151932001114, "learning_rate": 3.312930374904361e-05, "loss": 0.2226, "step": 42015 }, { "epoch": 3.403758911211925, "grad_norm": 0.06775092333555222, "learning_rate": 3.3124803096448985e-05, "loss": 0.2253, "step": 42016 }, { "epoch": 3.403839922229423, "grad_norm": 0.07559601962566376, "learning_rate": 3.312030244385436e-05, "loss": 0.2339, "step": 42017 }, { "epoch": 3.403920933246922, "grad_norm": 0.0761316642165184, "learning_rate": 3.311580179125973e-05, "loss": 0.213, "step": 42018 }, { "epoch": 3.40400194426442, "grad_norm": 0.0763896033167839, "learning_rate": 3.3111301138665106e-05, "loss": 0.191, "step": 42019 }, { "epoch": 3.4040829552819183, "grad_norm": 0.05668618530035019, "learning_rate": 3.310680048607048e-05, "loss": 0.202, "step": 42020 }, { "epoch": 3.4041639662994165, "grad_norm": 0.07589863985776901, "learning_rate": 3.310229983347586e-05, "loss": 0.2269, "step": 42021 }, { "epoch": 3.404244977316915, "grad_norm": 0.06405602395534515, "learning_rate": 3.309779918088123e-05, "loss": 0.1936, "step": 42022 }, { "epoch": 3.4043259883344135, "grad_norm": 0.06749610602855682, "learning_rate": 3.30932985282866e-05, "loss": 0.2179, "step": 42023 }, { "epoch": 3.4044069993519117, "grad_norm": 0.11305932700634003, "learning_rate": 3.308879787569198e-05, "loss": 0.259, "step": 42024 }, { "epoch": 3.4044880103694104, "grad_norm": 0.07221528142690659, "learning_rate": 3.308429722309735e-05, "loss": 0.2161, "step": 42025 }, { "epoch": 3.4045690213869086, "grad_norm": 0.08941753208637238, "learning_rate": 3.307979657050272e-05, "loss": 0.2061, "step": 42026 }, { "epoch": 3.404650032404407, "grad_norm": 0.052454691380262375, "learning_rate": 3.30752959179081e-05, "loss": 0.1872, "step": 42027 }, { "epoch": 3.404731043421905, "grad_norm": 0.07510961592197418, "learning_rate": 3.307079526531347e-05, "loss": 0.2377, "step": 42028 }, { "epoch": 3.404812054439404, "grad_norm": 0.07032472640275955, "learning_rate": 3.306629461271884e-05, "loss": 0.2375, "step": 42029 }, { "epoch": 3.404893065456902, "grad_norm": 0.082360178232193, "learning_rate": 3.306179396012422e-05, "loss": 0.2331, "step": 42030 }, { "epoch": 3.4049740764744003, "grad_norm": 0.07889021933078766, "learning_rate": 3.305729330752959e-05, "loss": 0.2524, "step": 42031 }, { "epoch": 3.405055087491899, "grad_norm": 0.0628356859087944, "learning_rate": 3.305279265493497e-05, "loss": 0.2124, "step": 42032 }, { "epoch": 3.4051360985093972, "grad_norm": 0.07455432415008545, "learning_rate": 3.304829200234034e-05, "loss": 0.2268, "step": 42033 }, { "epoch": 3.4052171095268955, "grad_norm": 0.06572439521551132, "learning_rate": 3.304379134974572e-05, "loss": 0.1964, "step": 42034 }, { "epoch": 3.405298120544394, "grad_norm": 0.0840260237455368, "learning_rate": 3.303929069715109e-05, "loss": 0.2326, "step": 42035 }, { "epoch": 3.4053791315618924, "grad_norm": 0.08687842637300491, "learning_rate": 3.3034790044556464e-05, "loss": 0.264, "step": 42036 }, { "epoch": 3.4054601425793907, "grad_norm": 0.06921391189098358, "learning_rate": 3.303028939196184e-05, "loss": 0.2377, "step": 42037 }, { "epoch": 3.4055411535968894, "grad_norm": 0.08886100351810455, "learning_rate": 3.302578873936721e-05, "loss": 0.2083, "step": 42038 }, { "epoch": 3.4056221646143876, "grad_norm": 0.06395532190799713, "learning_rate": 3.3021288086772585e-05, "loss": 0.2289, "step": 42039 }, { "epoch": 3.405703175631886, "grad_norm": 0.0698009729385376, "learning_rate": 3.301678743417796e-05, "loss": 0.2304, "step": 42040 }, { "epoch": 3.4057841866493845, "grad_norm": 0.05952319875359535, "learning_rate": 3.301228678158333e-05, "loss": 0.2267, "step": 42041 }, { "epoch": 3.405865197666883, "grad_norm": 0.08061492443084717, "learning_rate": 3.3007786128988706e-05, "loss": 0.2407, "step": 42042 }, { "epoch": 3.405946208684381, "grad_norm": 0.0819898247718811, "learning_rate": 3.300328547639408e-05, "loss": 0.2488, "step": 42043 }, { "epoch": 3.4060272197018793, "grad_norm": 0.08814553171396255, "learning_rate": 3.299878482379945e-05, "loss": 0.2529, "step": 42044 }, { "epoch": 3.406108230719378, "grad_norm": 0.056524164974689484, "learning_rate": 3.2994284171204827e-05, "loss": 0.2292, "step": 42045 }, { "epoch": 3.406189241736876, "grad_norm": 0.07974979281425476, "learning_rate": 3.29897835186102e-05, "loss": 0.2689, "step": 42046 }, { "epoch": 3.4062702527543745, "grad_norm": 0.073820099234581, "learning_rate": 3.2985282866015574e-05, "loss": 0.1867, "step": 42047 }, { "epoch": 3.406351263771873, "grad_norm": 0.08438839763402939, "learning_rate": 3.298078221342095e-05, "loss": 0.2103, "step": 42048 }, { "epoch": 3.4064322747893714, "grad_norm": 0.08996589481830597, "learning_rate": 3.297628156082632e-05, "loss": 0.2489, "step": 42049 }, { "epoch": 3.4065132858068696, "grad_norm": 0.07115452736616135, "learning_rate": 3.2971780908231695e-05, "loss": 0.2511, "step": 42050 }, { "epoch": 3.406594296824368, "grad_norm": 0.057320065796375275, "learning_rate": 3.296728025563707e-05, "loss": 0.2197, "step": 42051 }, { "epoch": 3.4066753078418666, "grad_norm": 0.07681119441986084, "learning_rate": 3.296277960304244e-05, "loss": 0.2344, "step": 42052 }, { "epoch": 3.406756318859365, "grad_norm": 0.07666799426078796, "learning_rate": 3.2958278950447815e-05, "loss": 0.227, "step": 42053 }, { "epoch": 3.406837329876863, "grad_norm": 0.13270337879657745, "learning_rate": 3.295377829785319e-05, "loss": 0.186, "step": 42054 }, { "epoch": 3.4069183408943617, "grad_norm": 0.06277471780776978, "learning_rate": 3.294927764525856e-05, "loss": 0.2208, "step": 42055 }, { "epoch": 3.40699935191186, "grad_norm": 0.07769570499658585, "learning_rate": 3.2944776992663936e-05, "loss": 0.221, "step": 42056 }, { "epoch": 3.4070803629293582, "grad_norm": 0.08962923288345337, "learning_rate": 3.294027634006931e-05, "loss": 0.2116, "step": 42057 }, { "epoch": 3.407161373946857, "grad_norm": 0.08468111604452133, "learning_rate": 3.2935775687474683e-05, "loss": 0.2549, "step": 42058 }, { "epoch": 3.407242384964355, "grad_norm": 0.07267171889543533, "learning_rate": 3.293127503488006e-05, "loss": 0.2008, "step": 42059 }, { "epoch": 3.4073233959818534, "grad_norm": 0.09492810070514679, "learning_rate": 3.292677438228543e-05, "loss": 0.2306, "step": 42060 }, { "epoch": 3.407404406999352, "grad_norm": 0.06584914028644562, "learning_rate": 3.2922273729690804e-05, "loss": 0.1956, "step": 42061 }, { "epoch": 3.4074854180168503, "grad_norm": 0.06724611669778824, "learning_rate": 3.291777307709618e-05, "loss": 0.2572, "step": 42062 }, { "epoch": 3.4075664290343486, "grad_norm": 0.066348135471344, "learning_rate": 3.291327242450156e-05, "loss": 0.2384, "step": 42063 }, { "epoch": 3.4076474400518473, "grad_norm": 0.08316512405872345, "learning_rate": 3.2908771771906925e-05, "loss": 0.2129, "step": 42064 }, { "epoch": 3.4077284510693455, "grad_norm": 0.0932237058877945, "learning_rate": 3.2904271119312305e-05, "loss": 0.2358, "step": 42065 }, { "epoch": 3.4078094620868438, "grad_norm": 0.07332948595285416, "learning_rate": 3.289977046671768e-05, "loss": 0.2339, "step": 42066 }, { "epoch": 3.407890473104342, "grad_norm": 0.07958756387233734, "learning_rate": 3.2895269814123046e-05, "loss": 0.2409, "step": 42067 }, { "epoch": 3.4079714841218407, "grad_norm": 0.08175527304410934, "learning_rate": 3.2890769161528426e-05, "loss": 0.2372, "step": 42068 }, { "epoch": 3.408052495139339, "grad_norm": 0.08566399663686752, "learning_rate": 3.28862685089338e-05, "loss": 0.2355, "step": 42069 }, { "epoch": 3.408133506156837, "grad_norm": 0.07212253659963608, "learning_rate": 3.288176785633917e-05, "loss": 0.2183, "step": 42070 }, { "epoch": 3.408214517174336, "grad_norm": 0.08052658289670944, "learning_rate": 3.287726720374455e-05, "loss": 0.215, "step": 42071 }, { "epoch": 3.408295528191834, "grad_norm": 0.07595360279083252, "learning_rate": 3.287276655114992e-05, "loss": 0.2222, "step": 42072 }, { "epoch": 3.4083765392093324, "grad_norm": 0.08452226221561432, "learning_rate": 3.286826589855529e-05, "loss": 0.2687, "step": 42073 }, { "epoch": 3.4084575502268306, "grad_norm": 0.06990109384059906, "learning_rate": 3.286376524596067e-05, "loss": 0.2341, "step": 42074 }, { "epoch": 3.4085385612443293, "grad_norm": 0.0811610296368599, "learning_rate": 3.285926459336604e-05, "loss": 0.2131, "step": 42075 }, { "epoch": 3.4086195722618275, "grad_norm": 0.0643961951136589, "learning_rate": 3.2854763940771415e-05, "loss": 0.2123, "step": 42076 }, { "epoch": 3.408700583279326, "grad_norm": 0.08913237601518631, "learning_rate": 3.285026328817679e-05, "loss": 0.2205, "step": 42077 }, { "epoch": 3.4087815942968245, "grad_norm": 0.07711271941661835, "learning_rate": 3.284576263558216e-05, "loss": 0.2441, "step": 42078 }, { "epoch": 3.4088626053143227, "grad_norm": 0.0935467854142189, "learning_rate": 3.2841261982987536e-05, "loss": 0.2372, "step": 42079 }, { "epoch": 3.408943616331821, "grad_norm": 0.06729929149150848, "learning_rate": 3.283676133039291e-05, "loss": 0.1943, "step": 42080 }, { "epoch": 3.4090246273493197, "grad_norm": 0.07701172679662704, "learning_rate": 3.283226067779828e-05, "loss": 0.2222, "step": 42081 }, { "epoch": 3.409105638366818, "grad_norm": 0.06777993589639664, "learning_rate": 3.282776002520366e-05, "loss": 0.2144, "step": 42082 }, { "epoch": 3.409186649384316, "grad_norm": 0.06507458537817001, "learning_rate": 3.282325937260903e-05, "loss": 0.22, "step": 42083 }, { "epoch": 3.409267660401815, "grad_norm": 0.07021746039390564, "learning_rate": 3.2818758720014404e-05, "loss": 0.2211, "step": 42084 }, { "epoch": 3.409348671419313, "grad_norm": 0.07517733424901962, "learning_rate": 3.281425806741978e-05, "loss": 0.23, "step": 42085 }, { "epoch": 3.4094296824368113, "grad_norm": 0.06435801088809967, "learning_rate": 3.280975741482515e-05, "loss": 0.2325, "step": 42086 }, { "epoch": 3.40951069345431, "grad_norm": 0.09303963929414749, "learning_rate": 3.2805256762230525e-05, "loss": 0.2481, "step": 42087 }, { "epoch": 3.4095917044718083, "grad_norm": 0.06462705135345459, "learning_rate": 3.28007561096359e-05, "loss": 0.2269, "step": 42088 }, { "epoch": 3.4096727154893065, "grad_norm": 0.07319168001413345, "learning_rate": 3.279625545704127e-05, "loss": 0.2431, "step": 42089 }, { "epoch": 3.4097537265068047, "grad_norm": 0.07174596190452576, "learning_rate": 3.2791754804446646e-05, "loss": 0.2157, "step": 42090 }, { "epoch": 3.4098347375243034, "grad_norm": 0.07358871400356293, "learning_rate": 3.278725415185202e-05, "loss": 0.206, "step": 42091 }, { "epoch": 3.4099157485418017, "grad_norm": 0.07019001245498657, "learning_rate": 3.278275349925739e-05, "loss": 0.1846, "step": 42092 }, { "epoch": 3.4099967595593, "grad_norm": 0.08530454337596893, "learning_rate": 3.2778252846662766e-05, "loss": 0.2406, "step": 42093 }, { "epoch": 3.4100777705767986, "grad_norm": 0.0668715164065361, "learning_rate": 3.277375219406814e-05, "loss": 0.2663, "step": 42094 }, { "epoch": 3.410158781594297, "grad_norm": 0.07468143850564957, "learning_rate": 3.2769251541473514e-05, "loss": 0.2462, "step": 42095 }, { "epoch": 3.410239792611795, "grad_norm": 0.06530489772558212, "learning_rate": 3.276475088887889e-05, "loss": 0.2158, "step": 42096 }, { "epoch": 3.4103208036292934, "grad_norm": 0.07643679529428482, "learning_rate": 3.276025023628426e-05, "loss": 0.2148, "step": 42097 }, { "epoch": 3.410401814646792, "grad_norm": 0.08456093817949295, "learning_rate": 3.275574958368964e-05, "loss": 0.2449, "step": 42098 }, { "epoch": 3.4104828256642903, "grad_norm": 0.07500334829092026, "learning_rate": 3.275124893109501e-05, "loss": 0.2554, "step": 42099 }, { "epoch": 3.4105638366817885, "grad_norm": 0.05971658602356911, "learning_rate": 3.274674827850038e-05, "loss": 0.2061, "step": 42100 }, { "epoch": 3.410644847699287, "grad_norm": 0.06988444924354553, "learning_rate": 3.274224762590576e-05, "loss": 0.239, "step": 42101 }, { "epoch": 3.4107258587167855, "grad_norm": 0.0700775682926178, "learning_rate": 3.2737746973311136e-05, "loss": 0.2653, "step": 42102 }, { "epoch": 3.4108068697342837, "grad_norm": 0.08620148152112961, "learning_rate": 3.27332463207165e-05, "loss": 0.209, "step": 42103 }, { "epoch": 3.4108878807517824, "grad_norm": 0.06676038354635239, "learning_rate": 3.272874566812188e-05, "loss": 0.2513, "step": 42104 }, { "epoch": 3.4109688917692806, "grad_norm": 0.07150786370038986, "learning_rate": 3.2724245015527256e-05, "loss": 0.2662, "step": 42105 }, { "epoch": 3.411049902786779, "grad_norm": 0.07146339863538742, "learning_rate": 3.271974436293262e-05, "loss": 0.2214, "step": 42106 }, { "epoch": 3.4111309138042776, "grad_norm": 0.07464755326509476, "learning_rate": 3.2715243710338004e-05, "loss": 0.2759, "step": 42107 }, { "epoch": 3.411211924821776, "grad_norm": 0.08268515020608902, "learning_rate": 3.271074305774338e-05, "loss": 0.2658, "step": 42108 }, { "epoch": 3.411292935839274, "grad_norm": 0.05757330358028412, "learning_rate": 3.2706242405148744e-05, "loss": 0.2068, "step": 42109 }, { "epoch": 3.4113739468567728, "grad_norm": 0.06498200446367264, "learning_rate": 3.2701741752554124e-05, "loss": 0.2338, "step": 42110 }, { "epoch": 3.411454957874271, "grad_norm": 0.08416248857975006, "learning_rate": 3.26972410999595e-05, "loss": 0.232, "step": 42111 }, { "epoch": 3.4115359688917692, "grad_norm": 0.07173217833042145, "learning_rate": 3.2692740447364865e-05, "loss": 0.2071, "step": 42112 }, { "epoch": 3.4116169799092675, "grad_norm": 0.06545745581388474, "learning_rate": 3.2688239794770245e-05, "loss": 0.2342, "step": 42113 }, { "epoch": 3.411697990926766, "grad_norm": 0.0655227079987526, "learning_rate": 3.268373914217562e-05, "loss": 0.2479, "step": 42114 }, { "epoch": 3.4117790019442644, "grad_norm": 0.07191194593906403, "learning_rate": 3.267923848958099e-05, "loss": 0.2217, "step": 42115 }, { "epoch": 3.4118600129617627, "grad_norm": 0.07084406167268753, "learning_rate": 3.2674737836986366e-05, "loss": 0.224, "step": 42116 }, { "epoch": 3.4119410239792614, "grad_norm": 0.06499455124139786, "learning_rate": 3.267023718439174e-05, "loss": 0.2151, "step": 42117 }, { "epoch": 3.4120220349967596, "grad_norm": 0.06535441428422928, "learning_rate": 3.266573653179711e-05, "loss": 0.2235, "step": 42118 }, { "epoch": 3.412103046014258, "grad_norm": 0.064212866127491, "learning_rate": 3.266123587920249e-05, "loss": 0.2189, "step": 42119 }, { "epoch": 3.412184057031756, "grad_norm": 0.06990158557891846, "learning_rate": 3.265673522660786e-05, "loss": 0.2222, "step": 42120 }, { "epoch": 3.412265068049255, "grad_norm": 0.06559919565916061, "learning_rate": 3.2652234574013234e-05, "loss": 0.2046, "step": 42121 }, { "epoch": 3.412346079066753, "grad_norm": 0.08060586452484131, "learning_rate": 3.264773392141861e-05, "loss": 0.2186, "step": 42122 }, { "epoch": 3.4124270900842513, "grad_norm": 0.07627402991056442, "learning_rate": 3.264323326882398e-05, "loss": 0.231, "step": 42123 }, { "epoch": 3.41250810110175, "grad_norm": 0.07456567883491516, "learning_rate": 3.2638732616229355e-05, "loss": 0.2277, "step": 42124 }, { "epoch": 3.412589112119248, "grad_norm": 0.0751354917883873, "learning_rate": 3.263423196363473e-05, "loss": 0.2283, "step": 42125 }, { "epoch": 3.4126701231367464, "grad_norm": 0.08349383622407913, "learning_rate": 3.26297313110401e-05, "loss": 0.207, "step": 42126 }, { "epoch": 3.412751134154245, "grad_norm": 0.07395781576633453, "learning_rate": 3.2625230658445476e-05, "loss": 0.2362, "step": 42127 }, { "epoch": 3.4128321451717434, "grad_norm": 0.06975575536489487, "learning_rate": 3.262073000585085e-05, "loss": 0.209, "step": 42128 }, { "epoch": 3.4129131561892416, "grad_norm": 0.060393065214157104, "learning_rate": 3.261622935325622e-05, "loss": 0.2266, "step": 42129 }, { "epoch": 3.4129941672067403, "grad_norm": 0.08148559927940369, "learning_rate": 3.2611728700661596e-05, "loss": 0.2365, "step": 42130 }, { "epoch": 3.4130751782242386, "grad_norm": 0.07058338075876236, "learning_rate": 3.260722804806697e-05, "loss": 0.2393, "step": 42131 }, { "epoch": 3.413156189241737, "grad_norm": 0.09528659284114838, "learning_rate": 3.2602727395472344e-05, "loss": 0.2786, "step": 42132 }, { "epoch": 3.4132372002592355, "grad_norm": 0.08425279706716537, "learning_rate": 3.259822674287772e-05, "loss": 0.2412, "step": 42133 }, { "epoch": 3.4133182112767337, "grad_norm": 0.07566172629594803, "learning_rate": 3.25937260902831e-05, "loss": 0.2079, "step": 42134 }, { "epoch": 3.413399222294232, "grad_norm": 0.08536390215158463, "learning_rate": 3.2589225437688464e-05, "loss": 0.245, "step": 42135 }, { "epoch": 3.4134802333117302, "grad_norm": 0.05408225953578949, "learning_rate": 3.258472478509384e-05, "loss": 0.1951, "step": 42136 }, { "epoch": 3.413561244329229, "grad_norm": 0.08090090751647949, "learning_rate": 3.258022413249922e-05, "loss": 0.2493, "step": 42137 }, { "epoch": 3.413642255346727, "grad_norm": 0.07815258949995041, "learning_rate": 3.2575723479904585e-05, "loss": 0.2374, "step": 42138 }, { "epoch": 3.4137232663642254, "grad_norm": 0.07899107038974762, "learning_rate": 3.257122282730996e-05, "loss": 0.2339, "step": 42139 }, { "epoch": 3.413804277381724, "grad_norm": 0.08084166795015335, "learning_rate": 3.256672217471534e-05, "loss": 0.2064, "step": 42140 }, { "epoch": 3.4138852883992223, "grad_norm": 0.06659887731075287, "learning_rate": 3.256222152212071e-05, "loss": 0.2341, "step": 42141 }, { "epoch": 3.4139662994167206, "grad_norm": 0.06368932873010635, "learning_rate": 3.255772086952608e-05, "loss": 0.2005, "step": 42142 }, { "epoch": 3.414047310434219, "grad_norm": 0.07554011791944504, "learning_rate": 3.255322021693146e-05, "loss": 0.2267, "step": 42143 }, { "epoch": 3.4141283214517175, "grad_norm": 0.07116712629795074, "learning_rate": 3.2548719564336834e-05, "loss": 0.2392, "step": 42144 }, { "epoch": 3.4142093324692158, "grad_norm": 0.07155577838420868, "learning_rate": 3.25442189117422e-05, "loss": 0.2459, "step": 42145 }, { "epoch": 3.414290343486714, "grad_norm": 0.07143085449934006, "learning_rate": 3.253971825914758e-05, "loss": 0.2184, "step": 42146 }, { "epoch": 3.4143713545042127, "grad_norm": 0.07936885952949524, "learning_rate": 3.2535217606552954e-05, "loss": 0.2474, "step": 42147 }, { "epoch": 3.414452365521711, "grad_norm": 0.06565649062395096, "learning_rate": 3.253071695395832e-05, "loss": 0.2152, "step": 42148 }, { "epoch": 3.414533376539209, "grad_norm": 0.0671452209353447, "learning_rate": 3.25262163013637e-05, "loss": 0.2304, "step": 42149 }, { "epoch": 3.414614387556708, "grad_norm": 0.08637391775846481, "learning_rate": 3.2521715648769075e-05, "loss": 0.209, "step": 42150 }, { "epoch": 3.414695398574206, "grad_norm": 0.082486093044281, "learning_rate": 3.251721499617444e-05, "loss": 0.2421, "step": 42151 }, { "epoch": 3.4147764095917044, "grad_norm": 0.0916626825928688, "learning_rate": 3.251271434357982e-05, "loss": 0.2255, "step": 42152 }, { "epoch": 3.414857420609203, "grad_norm": 0.08788851648569107, "learning_rate": 3.2508213690985196e-05, "loss": 0.246, "step": 42153 }, { "epoch": 3.4149384316267013, "grad_norm": 0.06302738189697266, "learning_rate": 3.250371303839057e-05, "loss": 0.2306, "step": 42154 }, { "epoch": 3.4150194426441995, "grad_norm": 0.0750952884554863, "learning_rate": 3.249921238579594e-05, "loss": 0.2855, "step": 42155 }, { "epoch": 3.4151004536616982, "grad_norm": 0.06570327281951904, "learning_rate": 3.249471173320132e-05, "loss": 0.1777, "step": 42156 }, { "epoch": 3.4151814646791965, "grad_norm": 0.0844057947397232, "learning_rate": 3.249021108060669e-05, "loss": 0.2406, "step": 42157 }, { "epoch": 3.4152624756966947, "grad_norm": 0.0791000947356224, "learning_rate": 3.2485710428012064e-05, "loss": 0.2303, "step": 42158 }, { "epoch": 3.415343486714193, "grad_norm": 0.07193057239055634, "learning_rate": 3.248120977541744e-05, "loss": 0.2337, "step": 42159 }, { "epoch": 3.4154244977316917, "grad_norm": 0.06310512125492096, "learning_rate": 3.247670912282281e-05, "loss": 0.2387, "step": 42160 }, { "epoch": 3.41550550874919, "grad_norm": 0.06961341202259064, "learning_rate": 3.2472208470228185e-05, "loss": 0.1972, "step": 42161 }, { "epoch": 3.415586519766688, "grad_norm": 0.08259893208742142, "learning_rate": 3.246770781763356e-05, "loss": 0.2519, "step": 42162 }, { "epoch": 3.415667530784187, "grad_norm": 0.07711710780858994, "learning_rate": 3.246320716503893e-05, "loss": 0.2417, "step": 42163 }, { "epoch": 3.415748541801685, "grad_norm": 0.07411278784275055, "learning_rate": 3.2458706512444306e-05, "loss": 0.2201, "step": 42164 }, { "epoch": 3.4158295528191833, "grad_norm": 0.06382159888744354, "learning_rate": 3.245420585984968e-05, "loss": 0.2448, "step": 42165 }, { "epoch": 3.4159105638366816, "grad_norm": 0.07473917305469513, "learning_rate": 3.244970520725505e-05, "loss": 0.2149, "step": 42166 }, { "epoch": 3.4159915748541803, "grad_norm": 0.06263245642185211, "learning_rate": 3.244520455466043e-05, "loss": 0.2017, "step": 42167 }, { "epoch": 3.4160725858716785, "grad_norm": 0.08167140185832977, "learning_rate": 3.24407039020658e-05, "loss": 0.2018, "step": 42168 }, { "epoch": 3.4161535968891767, "grad_norm": 0.06857883185148239, "learning_rate": 3.2436203249471174e-05, "loss": 0.2309, "step": 42169 }, { "epoch": 3.4162346079066754, "grad_norm": 0.06811191141605377, "learning_rate": 3.2431702596876554e-05, "loss": 0.2043, "step": 42170 }, { "epoch": 3.4163156189241737, "grad_norm": 0.06153199449181557, "learning_rate": 3.242720194428192e-05, "loss": 0.2259, "step": 42171 }, { "epoch": 3.416396629941672, "grad_norm": 0.07126566022634506, "learning_rate": 3.2422701291687295e-05, "loss": 0.2328, "step": 42172 }, { "epoch": 3.4164776409591706, "grad_norm": 0.06919670104980469, "learning_rate": 3.2418200639092675e-05, "loss": 0.2584, "step": 42173 }, { "epoch": 3.416558651976669, "grad_norm": 0.07292457669973373, "learning_rate": 3.241369998649804e-05, "loss": 0.2119, "step": 42174 }, { "epoch": 3.416639662994167, "grad_norm": 0.08359368145465851, "learning_rate": 3.2409199333903415e-05, "loss": 0.2248, "step": 42175 }, { "epoch": 3.416720674011666, "grad_norm": 0.06527353078126907, "learning_rate": 3.2404698681308796e-05, "loss": 0.2011, "step": 42176 }, { "epoch": 3.416801685029164, "grad_norm": 0.06260892003774643, "learning_rate": 3.240019802871416e-05, "loss": 0.2024, "step": 42177 }, { "epoch": 3.4168826960466623, "grad_norm": 0.06110214442014694, "learning_rate": 3.2395697376119536e-05, "loss": 0.195, "step": 42178 }, { "epoch": 3.416963707064161, "grad_norm": 0.08721933513879776, "learning_rate": 3.2391196723524917e-05, "loss": 0.2181, "step": 42179 }, { "epoch": 3.417044718081659, "grad_norm": 0.07511059194803238, "learning_rate": 3.238669607093029e-05, "loss": 0.2377, "step": 42180 }, { "epoch": 3.4171257290991575, "grad_norm": 0.06601770967245102, "learning_rate": 3.238219541833566e-05, "loss": 0.2181, "step": 42181 }, { "epoch": 3.4172067401166557, "grad_norm": 0.07815555483102798, "learning_rate": 3.237769476574104e-05, "loss": 0.2616, "step": 42182 }, { "epoch": 3.4172877511341544, "grad_norm": 0.08321576565504074, "learning_rate": 3.237319411314641e-05, "loss": 0.2319, "step": 42183 }, { "epoch": 3.4173687621516526, "grad_norm": 0.06954724341630936, "learning_rate": 3.236869346055178e-05, "loss": 0.2003, "step": 42184 }, { "epoch": 3.417449773169151, "grad_norm": 0.07065138220787048, "learning_rate": 3.236419280795716e-05, "loss": 0.205, "step": 42185 }, { "epoch": 3.4175307841866496, "grad_norm": 0.07739468663930893, "learning_rate": 3.235969215536253e-05, "loss": 0.2019, "step": 42186 }, { "epoch": 3.417611795204148, "grad_norm": 0.06493613123893738, "learning_rate": 3.23551915027679e-05, "loss": 0.238, "step": 42187 }, { "epoch": 3.417692806221646, "grad_norm": 0.07237594574689865, "learning_rate": 3.235069085017328e-05, "loss": 0.2355, "step": 42188 }, { "epoch": 3.4177738172391443, "grad_norm": 0.08828700333833694, "learning_rate": 3.234619019757865e-05, "loss": 0.2311, "step": 42189 }, { "epoch": 3.417854828256643, "grad_norm": 0.09129707515239716, "learning_rate": 3.234168954498402e-05, "loss": 0.237, "step": 42190 }, { "epoch": 3.4179358392741412, "grad_norm": 0.06474599242210388, "learning_rate": 3.23371888923894e-05, "loss": 0.2354, "step": 42191 }, { "epoch": 3.4180168502916395, "grad_norm": 0.07153916358947754, "learning_rate": 3.2332688239794773e-05, "loss": 0.1957, "step": 42192 }, { "epoch": 3.418097861309138, "grad_norm": 0.074933722615242, "learning_rate": 3.232818758720015e-05, "loss": 0.282, "step": 42193 }, { "epoch": 3.4181788723266364, "grad_norm": 0.07410770654678345, "learning_rate": 3.232368693460552e-05, "loss": 0.2521, "step": 42194 }, { "epoch": 3.4182598833441347, "grad_norm": 0.0762971043586731, "learning_rate": 3.2319186282010894e-05, "loss": 0.252, "step": 42195 }, { "epoch": 3.4183408943616334, "grad_norm": 0.06339481472969055, "learning_rate": 3.231468562941627e-05, "loss": 0.2324, "step": 42196 }, { "epoch": 3.4184219053791316, "grad_norm": 0.0860566571354866, "learning_rate": 3.231018497682164e-05, "loss": 0.2395, "step": 42197 }, { "epoch": 3.41850291639663, "grad_norm": 0.07654612511396408, "learning_rate": 3.2305684324227015e-05, "loss": 0.2552, "step": 42198 }, { "epoch": 3.4185839274141285, "grad_norm": 0.06669507175683975, "learning_rate": 3.230118367163239e-05, "loss": 0.2329, "step": 42199 }, { "epoch": 3.4186649384316268, "grad_norm": 0.06842575967311859, "learning_rate": 3.229668301903776e-05, "loss": 0.2053, "step": 42200 }, { "epoch": 3.418745949449125, "grad_norm": 0.07196977734565735, "learning_rate": 3.2292182366443136e-05, "loss": 0.2686, "step": 42201 }, { "epoch": 3.4188269604666237, "grad_norm": 0.06731472909450531, "learning_rate": 3.228768171384851e-05, "loss": 0.2441, "step": 42202 }, { "epoch": 3.418907971484122, "grad_norm": 0.09737128764390945, "learning_rate": 3.228318106125388e-05, "loss": 0.2306, "step": 42203 }, { "epoch": 3.41898898250162, "grad_norm": 0.06198712810873985, "learning_rate": 3.227868040865926e-05, "loss": 0.2055, "step": 42204 }, { "epoch": 3.4190699935191184, "grad_norm": 0.0738559439778328, "learning_rate": 3.227417975606463e-05, "loss": 0.2272, "step": 42205 }, { "epoch": 3.419151004536617, "grad_norm": 0.07987718284130096, "learning_rate": 3.226967910347001e-05, "loss": 0.2554, "step": 42206 }, { "epoch": 3.4192320155541154, "grad_norm": 0.07169181108474731, "learning_rate": 3.226517845087538e-05, "loss": 0.1866, "step": 42207 }, { "epoch": 3.4193130265716136, "grad_norm": 0.0694207176566124, "learning_rate": 3.226067779828075e-05, "loss": 0.2508, "step": 42208 }, { "epoch": 3.4193940375891123, "grad_norm": 0.07899581640958786, "learning_rate": 3.225617714568613e-05, "loss": 0.2186, "step": 42209 }, { "epoch": 3.4194750486066106, "grad_norm": 0.05652213841676712, "learning_rate": 3.22516764930915e-05, "loss": 0.211, "step": 42210 }, { "epoch": 3.419556059624109, "grad_norm": 0.06238555163145065, "learning_rate": 3.224717584049687e-05, "loss": 0.2145, "step": 42211 }, { "epoch": 3.419637070641607, "grad_norm": 0.07030345499515533, "learning_rate": 3.224267518790225e-05, "loss": 0.2142, "step": 42212 }, { "epoch": 3.4197180816591057, "grad_norm": 0.06452500075101852, "learning_rate": 3.223817453530762e-05, "loss": 0.2548, "step": 42213 }, { "epoch": 3.419799092676604, "grad_norm": 0.06940393894910812, "learning_rate": 3.223367388271299e-05, "loss": 0.2664, "step": 42214 }, { "epoch": 3.4198801036941022, "grad_norm": 0.06870554387569427, "learning_rate": 3.222917323011837e-05, "loss": 0.1984, "step": 42215 }, { "epoch": 3.419961114711601, "grad_norm": 0.0725892186164856, "learning_rate": 3.222467257752374e-05, "loss": 0.2112, "step": 42216 }, { "epoch": 3.420042125729099, "grad_norm": 0.08166629821062088, "learning_rate": 3.2220171924929113e-05, "loss": 0.2612, "step": 42217 }, { "epoch": 3.4201231367465974, "grad_norm": 0.09648881107568741, "learning_rate": 3.2215671272334494e-05, "loss": 0.2564, "step": 42218 }, { "epoch": 3.420204147764096, "grad_norm": 0.07916789501905441, "learning_rate": 3.221117061973986e-05, "loss": 0.2157, "step": 42219 }, { "epoch": 3.4202851587815943, "grad_norm": 0.07626831531524658, "learning_rate": 3.2206669967145234e-05, "loss": 0.2415, "step": 42220 }, { "epoch": 3.4203661697990926, "grad_norm": 0.06276006996631622, "learning_rate": 3.2202169314550615e-05, "loss": 0.1931, "step": 42221 }, { "epoch": 3.4204471808165913, "grad_norm": 0.06504850834608078, "learning_rate": 3.219766866195599e-05, "loss": 0.2402, "step": 42222 }, { "epoch": 3.4205281918340895, "grad_norm": 0.07947579771280289, "learning_rate": 3.2193168009361355e-05, "loss": 0.2329, "step": 42223 }, { "epoch": 3.4206092028515878, "grad_norm": 0.06318650394678116, "learning_rate": 3.2188667356766735e-05, "loss": 0.2145, "step": 42224 }, { "epoch": 3.420690213869086, "grad_norm": 0.07059568166732788, "learning_rate": 3.218416670417211e-05, "loss": 0.2133, "step": 42225 }, { "epoch": 3.4207712248865847, "grad_norm": 0.06386543810367584, "learning_rate": 3.2179666051577476e-05, "loss": 0.2222, "step": 42226 }, { "epoch": 3.420852235904083, "grad_norm": 0.061109598726034164, "learning_rate": 3.2175165398982856e-05, "loss": 0.2319, "step": 42227 }, { "epoch": 3.420933246921581, "grad_norm": 0.06750620156526566, "learning_rate": 3.217066474638823e-05, "loss": 0.2535, "step": 42228 }, { "epoch": 3.42101425793908, "grad_norm": 0.06807658076286316, "learning_rate": 3.21661640937936e-05, "loss": 0.2076, "step": 42229 }, { "epoch": 3.421095268956578, "grad_norm": 0.06978774815797806, "learning_rate": 3.216166344119898e-05, "loss": 0.2202, "step": 42230 }, { "epoch": 3.4211762799740764, "grad_norm": 0.07130514085292816, "learning_rate": 3.215716278860435e-05, "loss": 0.2647, "step": 42231 }, { "epoch": 3.4212572909915746, "grad_norm": 0.04566334933042526, "learning_rate": 3.215266213600972e-05, "loss": 0.2001, "step": 42232 }, { "epoch": 3.4213383020090733, "grad_norm": 0.06830474734306335, "learning_rate": 3.21481614834151e-05, "loss": 0.2452, "step": 42233 }, { "epoch": 3.4214193130265715, "grad_norm": 0.0593663826584816, "learning_rate": 3.214366083082047e-05, "loss": 0.2232, "step": 42234 }, { "epoch": 3.42150032404407, "grad_norm": 0.06897901743650436, "learning_rate": 3.2139160178225845e-05, "loss": 0.259, "step": 42235 }, { "epoch": 3.4215813350615685, "grad_norm": 0.06421735137701035, "learning_rate": 3.213465952563122e-05, "loss": 0.2263, "step": 42236 }, { "epoch": 3.4216623460790667, "grad_norm": 0.0799030214548111, "learning_rate": 3.213015887303659e-05, "loss": 0.2761, "step": 42237 }, { "epoch": 3.421743357096565, "grad_norm": 0.07802151888608932, "learning_rate": 3.2125658220441966e-05, "loss": 0.219, "step": 42238 }, { "epoch": 3.4218243681140637, "grad_norm": 0.08840132504701614, "learning_rate": 3.212115756784734e-05, "loss": 0.2507, "step": 42239 }, { "epoch": 3.421905379131562, "grad_norm": 0.09409713000059128, "learning_rate": 3.211665691525271e-05, "loss": 0.2447, "step": 42240 }, { "epoch": 3.42198639014906, "grad_norm": 0.08871884644031525, "learning_rate": 3.211215626265809e-05, "loss": 0.2194, "step": 42241 }, { "epoch": 3.422067401166559, "grad_norm": 0.06486831605434418, "learning_rate": 3.210765561006346e-05, "loss": 0.2718, "step": 42242 }, { "epoch": 3.422148412184057, "grad_norm": 0.07197066396474838, "learning_rate": 3.2103154957468834e-05, "loss": 0.2166, "step": 42243 }, { "epoch": 3.4222294232015553, "grad_norm": 0.0681011825799942, "learning_rate": 3.209865430487421e-05, "loss": 0.2244, "step": 42244 }, { "epoch": 3.422310434219054, "grad_norm": 0.07793204486370087, "learning_rate": 3.209415365227958e-05, "loss": 0.2591, "step": 42245 }, { "epoch": 3.4223914452365523, "grad_norm": 0.08512086421251297, "learning_rate": 3.2089652999684955e-05, "loss": 0.2473, "step": 42246 }, { "epoch": 3.4224724562540505, "grad_norm": 0.07522961497306824, "learning_rate": 3.208515234709033e-05, "loss": 0.2238, "step": 42247 }, { "epoch": 3.4225534672715487, "grad_norm": 0.059873390942811966, "learning_rate": 3.208065169449571e-05, "loss": 0.2099, "step": 42248 }, { "epoch": 3.4226344782890474, "grad_norm": 0.07078446447849274, "learning_rate": 3.2076151041901076e-05, "loss": 0.2062, "step": 42249 }, { "epoch": 3.4227154893065457, "grad_norm": 0.08134453743696213, "learning_rate": 3.207165038930645e-05, "loss": 0.2315, "step": 42250 }, { "epoch": 3.422796500324044, "grad_norm": 0.057578153908252716, "learning_rate": 3.206714973671183e-05, "loss": 0.2245, "step": 42251 }, { "epoch": 3.4228775113415426, "grad_norm": 0.08758700639009476, "learning_rate": 3.2062649084117196e-05, "loss": 0.2325, "step": 42252 }, { "epoch": 3.422958522359041, "grad_norm": 0.07909160107374191, "learning_rate": 3.205814843152257e-05, "loss": 0.2182, "step": 42253 }, { "epoch": 3.423039533376539, "grad_norm": 0.07638631761074066, "learning_rate": 3.205364777892795e-05, "loss": 0.2338, "step": 42254 }, { "epoch": 3.4231205443940373, "grad_norm": 0.0887717455625534, "learning_rate": 3.204914712633332e-05, "loss": 0.2464, "step": 42255 }, { "epoch": 3.423201555411536, "grad_norm": 0.07865258306264877, "learning_rate": 3.204464647373869e-05, "loss": 0.2144, "step": 42256 }, { "epoch": 3.4232825664290343, "grad_norm": 0.07845400273799896, "learning_rate": 3.204014582114407e-05, "loss": 0.223, "step": 42257 }, { "epoch": 3.4233635774465325, "grad_norm": 0.0784381777048111, "learning_rate": 3.203564516854944e-05, "loss": 0.2613, "step": 42258 }, { "epoch": 3.423444588464031, "grad_norm": 0.07567717880010605, "learning_rate": 3.203114451595481e-05, "loss": 0.2165, "step": 42259 }, { "epoch": 3.4235255994815295, "grad_norm": 0.07774611562490463, "learning_rate": 3.202664386336019e-05, "loss": 0.2537, "step": 42260 }, { "epoch": 3.4236066104990277, "grad_norm": 0.05703600496053696, "learning_rate": 3.2022143210765566e-05, "loss": 0.1936, "step": 42261 }, { "epoch": 3.4236876215165264, "grad_norm": 0.061656661331653595, "learning_rate": 3.201764255817093e-05, "loss": 0.219, "step": 42262 }, { "epoch": 3.4237686325340246, "grad_norm": 0.06856316328048706, "learning_rate": 3.201314190557631e-05, "loss": 0.2273, "step": 42263 }, { "epoch": 3.423849643551523, "grad_norm": 0.08919894695281982, "learning_rate": 3.2008641252981686e-05, "loss": 0.2685, "step": 42264 }, { "epoch": 3.4239306545690216, "grad_norm": 0.09014753997325897, "learning_rate": 3.200414060038705e-05, "loss": 0.2524, "step": 42265 }, { "epoch": 3.42401166558652, "grad_norm": 0.057525500655174255, "learning_rate": 3.1999639947792434e-05, "loss": 0.2013, "step": 42266 }, { "epoch": 3.424092676604018, "grad_norm": 0.07143286615610123, "learning_rate": 3.199513929519781e-05, "loss": 0.2349, "step": 42267 }, { "epoch": 3.4241736876215167, "grad_norm": 0.06419500708580017, "learning_rate": 3.1990638642603174e-05, "loss": 0.2281, "step": 42268 }, { "epoch": 3.424254698639015, "grad_norm": 0.05905544385313988, "learning_rate": 3.1986137990008554e-05, "loss": 0.2045, "step": 42269 }, { "epoch": 3.4243357096565132, "grad_norm": 0.06558793783187866, "learning_rate": 3.198163733741393e-05, "loss": 0.2118, "step": 42270 }, { "epoch": 3.4244167206740115, "grad_norm": 0.06668176501989365, "learning_rate": 3.1977136684819295e-05, "loss": 0.2378, "step": 42271 }, { "epoch": 3.42449773169151, "grad_norm": 0.05425681918859482, "learning_rate": 3.1972636032224675e-05, "loss": 0.1832, "step": 42272 }, { "epoch": 3.4245787427090084, "grad_norm": 0.05808640643954277, "learning_rate": 3.196813537963005e-05, "loss": 0.2174, "step": 42273 }, { "epoch": 3.4246597537265067, "grad_norm": 0.08048558980226517, "learning_rate": 3.196363472703542e-05, "loss": 0.2616, "step": 42274 }, { "epoch": 3.4247407647440054, "grad_norm": 0.07026704400777817, "learning_rate": 3.1959134074440796e-05, "loss": 0.2646, "step": 42275 }, { "epoch": 3.4248217757615036, "grad_norm": 0.0638585314154625, "learning_rate": 3.195463342184617e-05, "loss": 0.2102, "step": 42276 }, { "epoch": 3.424902786779002, "grad_norm": 0.06224946305155754, "learning_rate": 3.195013276925154e-05, "loss": 0.2134, "step": 42277 }, { "epoch": 3.4249837977965, "grad_norm": 0.07324585318565369, "learning_rate": 3.194563211665692e-05, "loss": 0.2348, "step": 42278 }, { "epoch": 3.4250648088139988, "grad_norm": 0.07813339680433273, "learning_rate": 3.194113146406229e-05, "loss": 0.2311, "step": 42279 }, { "epoch": 3.425145819831497, "grad_norm": 0.07276789844036102, "learning_rate": 3.1936630811467664e-05, "loss": 0.2428, "step": 42280 }, { "epoch": 3.4252268308489953, "grad_norm": 0.06303577870130539, "learning_rate": 3.193213015887304e-05, "loss": 0.1835, "step": 42281 }, { "epoch": 3.425307841866494, "grad_norm": 0.06274379789829254, "learning_rate": 3.192762950627841e-05, "loss": 0.2052, "step": 42282 }, { "epoch": 3.425388852883992, "grad_norm": 0.0668783113360405, "learning_rate": 3.1923128853683785e-05, "loss": 0.2421, "step": 42283 }, { "epoch": 3.4254698639014904, "grad_norm": 0.08327113837003708, "learning_rate": 3.191862820108916e-05, "loss": 0.2334, "step": 42284 }, { "epoch": 3.425550874918989, "grad_norm": 0.06803330034017563, "learning_rate": 3.191412754849453e-05, "loss": 0.1938, "step": 42285 }, { "epoch": 3.4256318859364874, "grad_norm": 0.06721960753202438, "learning_rate": 3.1909626895899906e-05, "loss": 0.2019, "step": 42286 }, { "epoch": 3.4257128969539856, "grad_norm": 0.07778126001358032, "learning_rate": 3.1905126243305286e-05, "loss": 0.2143, "step": 42287 }, { "epoch": 3.4257939079714843, "grad_norm": 0.08955970406532288, "learning_rate": 3.190062559071065e-05, "loss": 0.2377, "step": 42288 }, { "epoch": 3.4258749189889826, "grad_norm": 0.07158930599689484, "learning_rate": 3.1896124938116026e-05, "loss": 0.1812, "step": 42289 }, { "epoch": 3.425955930006481, "grad_norm": 0.06083231419324875, "learning_rate": 3.189162428552141e-05, "loss": 0.1819, "step": 42290 }, { "epoch": 3.4260369410239795, "grad_norm": 0.08491310477256775, "learning_rate": 3.1887123632926774e-05, "loss": 0.258, "step": 42291 }, { "epoch": 3.4261179520414777, "grad_norm": 0.06328444182872772, "learning_rate": 3.188262298033215e-05, "loss": 0.2139, "step": 42292 }, { "epoch": 3.426198963058976, "grad_norm": 0.07530766725540161, "learning_rate": 3.187812232773753e-05, "loss": 0.201, "step": 42293 }, { "epoch": 3.426279974076474, "grad_norm": 0.07389926165342331, "learning_rate": 3.1873621675142894e-05, "loss": 0.2153, "step": 42294 }, { "epoch": 3.426360985093973, "grad_norm": 0.0851617231965065, "learning_rate": 3.186912102254827e-05, "loss": 0.2333, "step": 42295 }, { "epoch": 3.426441996111471, "grad_norm": 0.09308183193206787, "learning_rate": 3.186462036995365e-05, "loss": 0.2593, "step": 42296 }, { "epoch": 3.4265230071289694, "grad_norm": 0.06928377598524094, "learning_rate": 3.1860119717359015e-05, "loss": 0.2096, "step": 42297 }, { "epoch": 3.426604018146468, "grad_norm": 0.06693492829799652, "learning_rate": 3.185561906476439e-05, "loss": 0.2151, "step": 42298 }, { "epoch": 3.4266850291639663, "grad_norm": 0.06244364008307457, "learning_rate": 3.185111841216977e-05, "loss": 0.2239, "step": 42299 }, { "epoch": 3.4267660401814646, "grad_norm": 0.07289474457502365, "learning_rate": 3.184661775957514e-05, "loss": 0.2284, "step": 42300 }, { "epoch": 3.426847051198963, "grad_norm": 0.08296742290258408, "learning_rate": 3.184211710698051e-05, "loss": 0.2659, "step": 42301 }, { "epoch": 3.4269280622164615, "grad_norm": 0.08548368513584137, "learning_rate": 3.183761645438589e-05, "loss": 0.2224, "step": 42302 }, { "epoch": 3.4270090732339598, "grad_norm": 0.07132833451032639, "learning_rate": 3.1833115801791264e-05, "loss": 0.2214, "step": 42303 }, { "epoch": 3.427090084251458, "grad_norm": 0.08157029747962952, "learning_rate": 3.182861514919663e-05, "loss": 0.2263, "step": 42304 }, { "epoch": 3.4271710952689567, "grad_norm": 0.07904928177595139, "learning_rate": 3.182411449660201e-05, "loss": 0.2624, "step": 42305 }, { "epoch": 3.427252106286455, "grad_norm": 0.09257368743419647, "learning_rate": 3.1819613844007384e-05, "loss": 0.2928, "step": 42306 }, { "epoch": 3.427333117303953, "grad_norm": 0.07520698755979538, "learning_rate": 3.181511319141275e-05, "loss": 0.211, "step": 42307 }, { "epoch": 3.427414128321452, "grad_norm": 0.06931690126657486, "learning_rate": 3.181061253881813e-05, "loss": 0.2588, "step": 42308 }, { "epoch": 3.42749513933895, "grad_norm": 0.07930534332990646, "learning_rate": 3.1806111886223505e-05, "loss": 0.2293, "step": 42309 }, { "epoch": 3.4275761503564484, "grad_norm": 0.08392751216888428, "learning_rate": 3.180161123362887e-05, "loss": 0.2733, "step": 42310 }, { "epoch": 3.427657161373947, "grad_norm": 0.06310337781906128, "learning_rate": 3.179711058103425e-05, "loss": 0.2267, "step": 42311 }, { "epoch": 3.4277381723914453, "grad_norm": 0.08746607601642609, "learning_rate": 3.1792609928439626e-05, "loss": 0.1982, "step": 42312 }, { "epoch": 3.4278191834089435, "grad_norm": 0.059589460492134094, "learning_rate": 3.1788109275845e-05, "loss": 0.2267, "step": 42313 }, { "epoch": 3.4279001944264422, "grad_norm": 0.07890890538692474, "learning_rate": 3.178360862325037e-05, "loss": 0.2439, "step": 42314 }, { "epoch": 3.4279812054439405, "grad_norm": 0.07259047776460648, "learning_rate": 3.177910797065575e-05, "loss": 0.2305, "step": 42315 }, { "epoch": 3.4280622164614387, "grad_norm": 0.09121621400117874, "learning_rate": 3.177460731806112e-05, "loss": 0.241, "step": 42316 }, { "epoch": 3.428143227478937, "grad_norm": 0.0720064714550972, "learning_rate": 3.1770106665466494e-05, "loss": 0.1922, "step": 42317 }, { "epoch": 3.4282242384964356, "grad_norm": 0.07883750647306442, "learning_rate": 3.176560601287187e-05, "loss": 0.2434, "step": 42318 }, { "epoch": 3.428305249513934, "grad_norm": 0.07507482171058655, "learning_rate": 3.176110536027724e-05, "loss": 0.2453, "step": 42319 }, { "epoch": 3.428386260531432, "grad_norm": 0.09118028730154037, "learning_rate": 3.1756604707682615e-05, "loss": 0.251, "step": 42320 }, { "epoch": 3.428467271548931, "grad_norm": 0.08633448928594589, "learning_rate": 3.175210405508799e-05, "loss": 0.229, "step": 42321 }, { "epoch": 3.428548282566429, "grad_norm": 0.07962105423212051, "learning_rate": 3.174760340249336e-05, "loss": 0.1936, "step": 42322 }, { "epoch": 3.4286292935839273, "grad_norm": 0.0843440443277359, "learning_rate": 3.1743102749898736e-05, "loss": 0.2523, "step": 42323 }, { "epoch": 3.4287103046014256, "grad_norm": 0.06754980981349945, "learning_rate": 3.173860209730411e-05, "loss": 0.2247, "step": 42324 }, { "epoch": 3.4287913156189243, "grad_norm": 0.0783877968788147, "learning_rate": 3.173410144470948e-05, "loss": 0.2428, "step": 42325 }, { "epoch": 3.4288723266364225, "grad_norm": 0.06497252732515335, "learning_rate": 3.172960079211486e-05, "loss": 0.2004, "step": 42326 }, { "epoch": 3.4289533376539207, "grad_norm": 0.08234023302793503, "learning_rate": 3.172510013952023e-05, "loss": 0.2052, "step": 42327 }, { "epoch": 3.4290343486714194, "grad_norm": 0.06907796114683151, "learning_rate": 3.1720599486925604e-05, "loss": 0.2391, "step": 42328 }, { "epoch": 3.4291153596889177, "grad_norm": 0.06292956322431564, "learning_rate": 3.1716098834330984e-05, "loss": 0.2225, "step": 42329 }, { "epoch": 3.429196370706416, "grad_norm": 0.08327022939920425, "learning_rate": 3.171159818173635e-05, "loss": 0.2288, "step": 42330 }, { "epoch": 3.4292773817239146, "grad_norm": 0.07891655713319778, "learning_rate": 3.1707097529141725e-05, "loss": 0.25, "step": 42331 }, { "epoch": 3.429358392741413, "grad_norm": 0.08023487776517868, "learning_rate": 3.1702596876547105e-05, "loss": 0.2491, "step": 42332 }, { "epoch": 3.429439403758911, "grad_norm": 0.07012294977903366, "learning_rate": 3.169809622395247e-05, "loss": 0.2195, "step": 42333 }, { "epoch": 3.42952041477641, "grad_norm": 0.06975067406892776, "learning_rate": 3.1693595571357845e-05, "loss": 0.2312, "step": 42334 }, { "epoch": 3.429601425793908, "grad_norm": 0.05552361533045769, "learning_rate": 3.1689094918763226e-05, "loss": 0.2132, "step": 42335 }, { "epoch": 3.4296824368114063, "grad_norm": 0.08807866275310516, "learning_rate": 3.168459426616859e-05, "loss": 0.2089, "step": 42336 }, { "epoch": 3.429763447828905, "grad_norm": 0.05967969819903374, "learning_rate": 3.1680093613573966e-05, "loss": 0.2422, "step": 42337 }, { "epoch": 3.429844458846403, "grad_norm": 0.07136420905590057, "learning_rate": 3.1675592960979347e-05, "loss": 0.2563, "step": 42338 }, { "epoch": 3.4299254698639015, "grad_norm": 0.06767350435256958, "learning_rate": 3.167109230838472e-05, "loss": 0.2122, "step": 42339 }, { "epoch": 3.4300064808813997, "grad_norm": 0.07169254124164581, "learning_rate": 3.166659165579009e-05, "loss": 0.2215, "step": 42340 }, { "epoch": 3.4300874918988984, "grad_norm": 0.07639869302511215, "learning_rate": 3.166209100319547e-05, "loss": 0.2776, "step": 42341 }, { "epoch": 3.4301685029163966, "grad_norm": 0.057808637619018555, "learning_rate": 3.165759035060084e-05, "loss": 0.1903, "step": 42342 }, { "epoch": 3.430249513933895, "grad_norm": 0.07123946398496628, "learning_rate": 3.165308969800621e-05, "loss": 0.2204, "step": 42343 }, { "epoch": 3.4303305249513936, "grad_norm": 0.08755022287368774, "learning_rate": 3.164858904541159e-05, "loss": 0.2224, "step": 42344 }, { "epoch": 3.430411535968892, "grad_norm": 0.07249519228935242, "learning_rate": 3.164408839281696e-05, "loss": 0.2239, "step": 42345 }, { "epoch": 3.43049254698639, "grad_norm": 0.08545974642038345, "learning_rate": 3.163958774022233e-05, "loss": 0.2506, "step": 42346 }, { "epoch": 3.4305735580038883, "grad_norm": 0.08462615311145782, "learning_rate": 3.163508708762771e-05, "loss": 0.2436, "step": 42347 }, { "epoch": 3.430654569021387, "grad_norm": 0.06520073860883713, "learning_rate": 3.163058643503308e-05, "loss": 0.226, "step": 42348 }, { "epoch": 3.4307355800388852, "grad_norm": 0.08690381050109863, "learning_rate": 3.162608578243845e-05, "loss": 0.2199, "step": 42349 }, { "epoch": 3.4308165910563835, "grad_norm": 0.07840859144926071, "learning_rate": 3.162158512984383e-05, "loss": 0.2489, "step": 42350 }, { "epoch": 3.430897602073882, "grad_norm": 0.07008511573076248, "learning_rate": 3.1617084477249203e-05, "loss": 0.2561, "step": 42351 }, { "epoch": 3.4309786130913804, "grad_norm": 0.05991966649889946, "learning_rate": 3.161258382465458e-05, "loss": 0.2487, "step": 42352 }, { "epoch": 3.4310596241088787, "grad_norm": 0.06471392512321472, "learning_rate": 3.160808317205995e-05, "loss": 0.2362, "step": 42353 }, { "epoch": 3.4311406351263773, "grad_norm": 0.07750452309846878, "learning_rate": 3.1603582519465324e-05, "loss": 0.2639, "step": 42354 }, { "epoch": 3.4312216461438756, "grad_norm": 0.07131041586399078, "learning_rate": 3.15990818668707e-05, "loss": 0.2227, "step": 42355 }, { "epoch": 3.431302657161374, "grad_norm": 0.07483736425638199, "learning_rate": 3.159458121427607e-05, "loss": 0.2224, "step": 42356 }, { "epoch": 3.4313836681788725, "grad_norm": 0.06175358220934868, "learning_rate": 3.1590080561681445e-05, "loss": 0.2631, "step": 42357 }, { "epoch": 3.4314646791963708, "grad_norm": 0.07144345343112946, "learning_rate": 3.158557990908682e-05, "loss": 0.2151, "step": 42358 }, { "epoch": 3.431545690213869, "grad_norm": 0.07201637327671051, "learning_rate": 3.158107925649219e-05, "loss": 0.2097, "step": 42359 }, { "epoch": 3.4316267012313677, "grad_norm": 0.06086525321006775, "learning_rate": 3.1576578603897566e-05, "loss": 0.2005, "step": 42360 }, { "epoch": 3.431707712248866, "grad_norm": 0.062122467905282974, "learning_rate": 3.157207795130294e-05, "loss": 0.2261, "step": 42361 }, { "epoch": 3.431788723266364, "grad_norm": 0.06578835099935532, "learning_rate": 3.156757729870831e-05, "loss": 0.2348, "step": 42362 }, { "epoch": 3.4318697342838624, "grad_norm": 0.06421864032745361, "learning_rate": 3.156307664611369e-05, "loss": 0.218, "step": 42363 }, { "epoch": 3.431950745301361, "grad_norm": 0.069000244140625, "learning_rate": 3.155857599351906e-05, "loss": 0.2665, "step": 42364 }, { "epoch": 3.4320317563188594, "grad_norm": 0.07800819724798203, "learning_rate": 3.155407534092444e-05, "loss": 0.2314, "step": 42365 }, { "epoch": 3.4321127673363576, "grad_norm": 0.06821920722723007, "learning_rate": 3.154957468832981e-05, "loss": 0.2119, "step": 42366 }, { "epoch": 3.4321937783538563, "grad_norm": 0.05828966572880745, "learning_rate": 3.154507403573518e-05, "loss": 0.2396, "step": 42367 }, { "epoch": 3.4322747893713546, "grad_norm": 0.08024381101131439, "learning_rate": 3.154057338314056e-05, "loss": 0.2105, "step": 42368 }, { "epoch": 3.432355800388853, "grad_norm": 0.0659569799900055, "learning_rate": 3.153607273054593e-05, "loss": 0.2102, "step": 42369 }, { "epoch": 3.432436811406351, "grad_norm": 0.08766531944274902, "learning_rate": 3.15315720779513e-05, "loss": 0.2241, "step": 42370 }, { "epoch": 3.4325178224238497, "grad_norm": 0.08283737301826477, "learning_rate": 3.152707142535668e-05, "loss": 0.2663, "step": 42371 }, { "epoch": 3.432598833441348, "grad_norm": 0.07166048884391785, "learning_rate": 3.152257077276205e-05, "loss": 0.2281, "step": 42372 }, { "epoch": 3.432679844458846, "grad_norm": 0.08123382180929184, "learning_rate": 3.151807012016742e-05, "loss": 0.2408, "step": 42373 }, { "epoch": 3.432760855476345, "grad_norm": 0.07305345684289932, "learning_rate": 3.15135694675728e-05, "loss": 0.2455, "step": 42374 }, { "epoch": 3.432841866493843, "grad_norm": 0.0648297443985939, "learning_rate": 3.150906881497817e-05, "loss": 0.2104, "step": 42375 }, { "epoch": 3.4329228775113414, "grad_norm": 0.0737859457731247, "learning_rate": 3.1504568162383544e-05, "loss": 0.2275, "step": 42376 }, { "epoch": 3.43300388852884, "grad_norm": 0.07172111421823502, "learning_rate": 3.1500067509788924e-05, "loss": 0.2426, "step": 42377 }, { "epoch": 3.4330848995463383, "grad_norm": 0.06762681901454926, "learning_rate": 3.14955668571943e-05, "loss": 0.2571, "step": 42378 }, { "epoch": 3.4331659105638366, "grad_norm": 0.07657989114522934, "learning_rate": 3.1491066204599664e-05, "loss": 0.1949, "step": 42379 }, { "epoch": 3.4332469215813353, "grad_norm": 0.07408314198255539, "learning_rate": 3.1486565552005045e-05, "loss": 0.2221, "step": 42380 }, { "epoch": 3.4333279325988335, "grad_norm": 0.07041918486356735, "learning_rate": 3.148206489941042e-05, "loss": 0.2076, "step": 42381 }, { "epoch": 3.4334089436163318, "grad_norm": 0.0844322219491005, "learning_rate": 3.1477564246815785e-05, "loss": 0.225, "step": 42382 }, { "epoch": 3.4334899546338304, "grad_norm": 0.0668899193406105, "learning_rate": 3.1473063594221166e-05, "loss": 0.2101, "step": 42383 }, { "epoch": 3.4335709656513287, "grad_norm": 0.08455423265695572, "learning_rate": 3.146856294162654e-05, "loss": 0.2501, "step": 42384 }, { "epoch": 3.433651976668827, "grad_norm": 0.07682643830776215, "learning_rate": 3.1464062289031906e-05, "loss": 0.2301, "step": 42385 }, { "epoch": 3.433732987686325, "grad_norm": 0.08980892598628998, "learning_rate": 3.1459561636437286e-05, "loss": 0.2484, "step": 42386 }, { "epoch": 3.433813998703824, "grad_norm": 0.07740815728902817, "learning_rate": 3.145506098384266e-05, "loss": 0.2276, "step": 42387 }, { "epoch": 3.433895009721322, "grad_norm": 0.0819552093744278, "learning_rate": 3.1450560331248034e-05, "loss": 0.2466, "step": 42388 }, { "epoch": 3.4339760207388204, "grad_norm": 0.06842090934515, "learning_rate": 3.144605967865341e-05, "loss": 0.2285, "step": 42389 }, { "epoch": 3.434057031756319, "grad_norm": 0.08851540088653564, "learning_rate": 3.144155902605878e-05, "loss": 0.2125, "step": 42390 }, { "epoch": 3.4341380427738173, "grad_norm": 0.07082171738147736, "learning_rate": 3.1437058373464154e-05, "loss": 0.2133, "step": 42391 }, { "epoch": 3.4342190537913155, "grad_norm": 0.06632804870605469, "learning_rate": 3.143255772086953e-05, "loss": 0.2391, "step": 42392 }, { "epoch": 3.434300064808814, "grad_norm": 0.08337467908859253, "learning_rate": 3.14280570682749e-05, "loss": 0.2033, "step": 42393 }, { "epoch": 3.4343810758263125, "grad_norm": 0.07331390678882599, "learning_rate": 3.1423556415680275e-05, "loss": 0.2282, "step": 42394 }, { "epoch": 3.4344620868438107, "grad_norm": 0.09076662361621857, "learning_rate": 3.141905576308565e-05, "loss": 0.265, "step": 42395 }, { "epoch": 3.434543097861309, "grad_norm": 0.07595141977071762, "learning_rate": 3.141455511049102e-05, "loss": 0.2433, "step": 42396 }, { "epoch": 3.4346241088788076, "grad_norm": 0.06144767627120018, "learning_rate": 3.1410054457896396e-05, "loss": 0.2033, "step": 42397 }, { "epoch": 3.434705119896306, "grad_norm": 0.08792851865291595, "learning_rate": 3.140555380530177e-05, "loss": 0.2632, "step": 42398 }, { "epoch": 3.434786130913804, "grad_norm": 0.05918162316083908, "learning_rate": 3.140105315270714e-05, "loss": 0.2102, "step": 42399 }, { "epoch": 3.434867141931303, "grad_norm": 0.0852525532245636, "learning_rate": 3.139655250011252e-05, "loss": 0.2373, "step": 42400 }, { "epoch": 3.434948152948801, "grad_norm": 0.07010507583618164, "learning_rate": 3.139205184751789e-05, "loss": 0.2158, "step": 42401 }, { "epoch": 3.4350291639662993, "grad_norm": 0.07169964164495468, "learning_rate": 3.1387551194923264e-05, "loss": 0.215, "step": 42402 }, { "epoch": 3.435110174983798, "grad_norm": 0.0834985077381134, "learning_rate": 3.138305054232864e-05, "loss": 0.2279, "step": 42403 }, { "epoch": 3.4351911860012962, "grad_norm": 0.07278002798557281, "learning_rate": 3.137854988973401e-05, "loss": 0.2085, "step": 42404 }, { "epoch": 3.4352721970187945, "grad_norm": 0.062233611941337585, "learning_rate": 3.1374049237139385e-05, "loss": 0.2187, "step": 42405 }, { "epoch": 3.435353208036293, "grad_norm": 0.08605244755744934, "learning_rate": 3.136954858454476e-05, "loss": 0.2261, "step": 42406 }, { "epoch": 3.4354342190537914, "grad_norm": 0.09702229499816895, "learning_rate": 3.136504793195014e-05, "loss": 0.2434, "step": 42407 }, { "epoch": 3.4355152300712897, "grad_norm": 0.06307677924633026, "learning_rate": 3.1360547279355506e-05, "loss": 0.213, "step": 42408 }, { "epoch": 3.435596241088788, "grad_norm": 0.06760883331298828, "learning_rate": 3.135604662676088e-05, "loss": 0.1945, "step": 42409 }, { "epoch": 3.4356772521062866, "grad_norm": 0.06725229322910309, "learning_rate": 3.135154597416626e-05, "loss": 0.2248, "step": 42410 }, { "epoch": 3.435758263123785, "grad_norm": 0.07328040897846222, "learning_rate": 3.1347045321571626e-05, "loss": 0.2482, "step": 42411 }, { "epoch": 3.435839274141283, "grad_norm": 0.07860074937343597, "learning_rate": 3.1342544668977e-05, "loss": 0.1963, "step": 42412 }, { "epoch": 3.435920285158782, "grad_norm": 0.07504335045814514, "learning_rate": 3.133804401638238e-05, "loss": 0.2351, "step": 42413 }, { "epoch": 3.43600129617628, "grad_norm": 0.06371016055345535, "learning_rate": 3.133354336378775e-05, "loss": 0.2212, "step": 42414 }, { "epoch": 3.4360823071937783, "grad_norm": 0.06477545201778412, "learning_rate": 3.132904271119312e-05, "loss": 0.1911, "step": 42415 }, { "epoch": 3.4361633182112765, "grad_norm": 0.07808870077133179, "learning_rate": 3.13245420585985e-05, "loss": 0.2528, "step": 42416 }, { "epoch": 3.436244329228775, "grad_norm": 0.06825084239244461, "learning_rate": 3.132004140600387e-05, "loss": 0.2441, "step": 42417 }, { "epoch": 3.4363253402462735, "grad_norm": 0.07942578196525574, "learning_rate": 3.131554075340924e-05, "loss": 0.2501, "step": 42418 }, { "epoch": 3.4364063512637717, "grad_norm": 0.07507924735546112, "learning_rate": 3.131104010081462e-05, "loss": 0.2097, "step": 42419 }, { "epoch": 3.4364873622812704, "grad_norm": 0.08393153548240662, "learning_rate": 3.1306539448219996e-05, "loss": 0.2316, "step": 42420 }, { "epoch": 3.4365683732987686, "grad_norm": 0.07208391278982162, "learning_rate": 3.130203879562536e-05, "loss": 0.2284, "step": 42421 }, { "epoch": 3.436649384316267, "grad_norm": 0.0677122250199318, "learning_rate": 3.129753814303074e-05, "loss": 0.2468, "step": 42422 }, { "epoch": 3.4367303953337656, "grad_norm": 0.07315247505903244, "learning_rate": 3.1293037490436116e-05, "loss": 0.2323, "step": 42423 }, { "epoch": 3.436811406351264, "grad_norm": 0.07844259589910507, "learning_rate": 3.128853683784149e-05, "loss": 0.2493, "step": 42424 }, { "epoch": 3.436892417368762, "grad_norm": 0.09640884399414062, "learning_rate": 3.1284036185246864e-05, "loss": 0.2681, "step": 42425 }, { "epoch": 3.4369734283862607, "grad_norm": 0.07181743532419205, "learning_rate": 3.127953553265224e-05, "loss": 0.2238, "step": 42426 }, { "epoch": 3.437054439403759, "grad_norm": 0.07064533233642578, "learning_rate": 3.127503488005761e-05, "loss": 0.2211, "step": 42427 }, { "epoch": 3.4371354504212572, "grad_norm": 0.06328742206096649, "learning_rate": 3.1270534227462984e-05, "loss": 0.2169, "step": 42428 }, { "epoch": 3.4372164614387555, "grad_norm": 0.08524394035339355, "learning_rate": 3.126603357486836e-05, "loss": 0.2512, "step": 42429 }, { "epoch": 3.437297472456254, "grad_norm": 0.07788166403770447, "learning_rate": 3.126153292227373e-05, "loss": 0.1988, "step": 42430 }, { "epoch": 3.4373784834737524, "grad_norm": 0.07452834397554398, "learning_rate": 3.1257032269679105e-05, "loss": 0.229, "step": 42431 }, { "epoch": 3.4374594944912507, "grad_norm": 0.06433499604463577, "learning_rate": 3.125253161708448e-05, "loss": 0.2086, "step": 42432 }, { "epoch": 3.4375405055087493, "grad_norm": 0.07874725759029388, "learning_rate": 3.124803096448985e-05, "loss": 0.2252, "step": 42433 }, { "epoch": 3.4376215165262476, "grad_norm": 0.06409906595945358, "learning_rate": 3.1243530311895226e-05, "loss": 0.1908, "step": 42434 }, { "epoch": 3.437702527543746, "grad_norm": 0.07005061954259872, "learning_rate": 3.12390296593006e-05, "loss": 0.2266, "step": 42435 }, { "epoch": 3.4377835385612445, "grad_norm": 0.0679619088768959, "learning_rate": 3.123452900670597e-05, "loss": 0.1863, "step": 42436 }, { "epoch": 3.4378645495787428, "grad_norm": 0.07140480726957321, "learning_rate": 3.123002835411135e-05, "loss": 0.2588, "step": 42437 }, { "epoch": 3.437945560596241, "grad_norm": 0.07242782413959503, "learning_rate": 3.122552770151672e-05, "loss": 0.2201, "step": 42438 }, { "epoch": 3.4380265716137393, "grad_norm": 0.05919842794537544, "learning_rate": 3.1221027048922094e-05, "loss": 0.251, "step": 42439 }, { "epoch": 3.438107582631238, "grad_norm": 0.08086790144443512, "learning_rate": 3.121652639632747e-05, "loss": 0.2448, "step": 42440 }, { "epoch": 3.438188593648736, "grad_norm": 0.0732114315032959, "learning_rate": 3.121202574373284e-05, "loss": 0.2167, "step": 42441 }, { "epoch": 3.4382696046662344, "grad_norm": 0.07175198942422867, "learning_rate": 3.1207525091138215e-05, "loss": 0.1999, "step": 42442 }, { "epoch": 3.438350615683733, "grad_norm": 0.0730404332280159, "learning_rate": 3.120302443854359e-05, "loss": 0.2388, "step": 42443 }, { "epoch": 3.4384316267012314, "grad_norm": 0.07166948169469833, "learning_rate": 3.119852378594896e-05, "loss": 0.2328, "step": 42444 }, { "epoch": 3.4385126377187296, "grad_norm": 0.0726485624909401, "learning_rate": 3.1194023133354336e-05, "loss": 0.2126, "step": 42445 }, { "epoch": 3.4385936487362283, "grad_norm": 0.09775371849536896, "learning_rate": 3.1189522480759716e-05, "loss": 0.2205, "step": 42446 }, { "epoch": 3.4386746597537265, "grad_norm": 0.07552385330200195, "learning_rate": 3.118502182816508e-05, "loss": 0.2254, "step": 42447 }, { "epoch": 3.438755670771225, "grad_norm": 0.06823941320180893, "learning_rate": 3.1180521175570456e-05, "loss": 0.2337, "step": 42448 }, { "epoch": 3.4388366817887235, "grad_norm": 0.06638476252555847, "learning_rate": 3.117602052297584e-05, "loss": 0.2607, "step": 42449 }, { "epoch": 3.4389176928062217, "grad_norm": 0.06269118934869766, "learning_rate": 3.1171519870381204e-05, "loss": 0.194, "step": 42450 }, { "epoch": 3.43899870382372, "grad_norm": 0.07186885923147202, "learning_rate": 3.116701921778658e-05, "loss": 0.209, "step": 42451 }, { "epoch": 3.439079714841218, "grad_norm": 0.06401878595352173, "learning_rate": 3.116251856519196e-05, "loss": 0.2364, "step": 42452 }, { "epoch": 3.439160725858717, "grad_norm": 0.08331364393234253, "learning_rate": 3.1158017912597325e-05, "loss": 0.2699, "step": 42453 }, { "epoch": 3.439241736876215, "grad_norm": 0.07670908421278, "learning_rate": 3.11535172600027e-05, "loss": 0.2253, "step": 42454 }, { "epoch": 3.4393227478937134, "grad_norm": 0.06328018009662628, "learning_rate": 3.114901660740808e-05, "loss": 0.2341, "step": 42455 }, { "epoch": 3.439403758911212, "grad_norm": 0.06630222499370575, "learning_rate": 3.1144515954813445e-05, "loss": 0.2109, "step": 42456 }, { "epoch": 3.4394847699287103, "grad_norm": 0.07363289594650269, "learning_rate": 3.1140015302218826e-05, "loss": 0.258, "step": 42457 }, { "epoch": 3.4395657809462086, "grad_norm": 0.07480932027101517, "learning_rate": 3.11355146496242e-05, "loss": 0.208, "step": 42458 }, { "epoch": 3.439646791963707, "grad_norm": 0.07937634736299515, "learning_rate": 3.113101399702957e-05, "loss": 0.2327, "step": 42459 }, { "epoch": 3.4397278029812055, "grad_norm": 0.07142050564289093, "learning_rate": 3.1126513344434947e-05, "loss": 0.216, "step": 42460 }, { "epoch": 3.4398088139987038, "grad_norm": 0.09153217822313309, "learning_rate": 3.112201269184032e-05, "loss": 0.2545, "step": 42461 }, { "epoch": 3.439889825016202, "grad_norm": 0.07688187062740326, "learning_rate": 3.1117512039245694e-05, "loss": 0.2388, "step": 42462 }, { "epoch": 3.4399708360337007, "grad_norm": 0.07844226807355881, "learning_rate": 3.111301138665107e-05, "loss": 0.2126, "step": 42463 }, { "epoch": 3.440051847051199, "grad_norm": 0.06151213496923447, "learning_rate": 3.110851073405644e-05, "loss": 0.2331, "step": 42464 }, { "epoch": 3.440132858068697, "grad_norm": 0.06838814169168472, "learning_rate": 3.1104010081461815e-05, "loss": 0.2185, "step": 42465 }, { "epoch": 3.440213869086196, "grad_norm": 0.09179142862558365, "learning_rate": 3.109950942886719e-05, "loss": 0.2498, "step": 42466 }, { "epoch": 3.440294880103694, "grad_norm": 0.089014932513237, "learning_rate": 3.109500877627256e-05, "loss": 0.271, "step": 42467 }, { "epoch": 3.4403758911211924, "grad_norm": 0.07677599787712097, "learning_rate": 3.1090508123677935e-05, "loss": 0.2306, "step": 42468 }, { "epoch": 3.440456902138691, "grad_norm": 0.0653160959482193, "learning_rate": 3.108600747108331e-05, "loss": 0.2216, "step": 42469 }, { "epoch": 3.4405379131561893, "grad_norm": 0.06036091968417168, "learning_rate": 3.108150681848868e-05, "loss": 0.2405, "step": 42470 }, { "epoch": 3.4406189241736875, "grad_norm": 0.0778571143746376, "learning_rate": 3.1077006165894056e-05, "loss": 0.2298, "step": 42471 }, { "epoch": 3.440699935191186, "grad_norm": 0.07820609211921692, "learning_rate": 3.107250551329943e-05, "loss": 0.2263, "step": 42472 }, { "epoch": 3.4407809462086845, "grad_norm": 0.07398509234189987, "learning_rate": 3.10680048607048e-05, "loss": 0.238, "step": 42473 }, { "epoch": 3.4408619572261827, "grad_norm": 0.06346040219068527, "learning_rate": 3.106350420811018e-05, "loss": 0.1943, "step": 42474 }, { "epoch": 3.440942968243681, "grad_norm": 0.08498956263065338, "learning_rate": 3.105900355551555e-05, "loss": 0.242, "step": 42475 }, { "epoch": 3.4410239792611796, "grad_norm": 0.08488177508115768, "learning_rate": 3.1054502902920924e-05, "loss": 0.2226, "step": 42476 }, { "epoch": 3.441104990278678, "grad_norm": 0.08173447102308273, "learning_rate": 3.10500022503263e-05, "loss": 0.2493, "step": 42477 }, { "epoch": 3.441186001296176, "grad_norm": 0.08201484382152557, "learning_rate": 3.104550159773167e-05, "loss": 0.2469, "step": 42478 }, { "epoch": 3.441267012313675, "grad_norm": 0.06878629326820374, "learning_rate": 3.1041000945137045e-05, "loss": 0.2157, "step": 42479 }, { "epoch": 3.441348023331173, "grad_norm": 0.08908562362194061, "learning_rate": 3.103650029254242e-05, "loss": 0.2142, "step": 42480 }, { "epoch": 3.4414290343486713, "grad_norm": 0.06992624700069427, "learning_rate": 3.103199963994779e-05, "loss": 0.2487, "step": 42481 }, { "epoch": 3.4415100453661696, "grad_norm": 0.08812936395406723, "learning_rate": 3.1027498987353166e-05, "loss": 0.2578, "step": 42482 }, { "epoch": 3.4415910563836682, "grad_norm": 0.05788000300526619, "learning_rate": 3.102299833475854e-05, "loss": 0.212, "step": 42483 }, { "epoch": 3.4416720674011665, "grad_norm": 0.07030339539051056, "learning_rate": 3.101849768216391e-05, "loss": 0.2193, "step": 42484 }, { "epoch": 3.4417530784186647, "grad_norm": 0.07213641703128815, "learning_rate": 3.101399702956929e-05, "loss": 0.2363, "step": 42485 }, { "epoch": 3.4418340894361634, "grad_norm": 0.07877527922391891, "learning_rate": 3.100949637697466e-05, "loss": 0.2461, "step": 42486 }, { "epoch": 3.4419151004536617, "grad_norm": 0.07526003569364548, "learning_rate": 3.1004995724380034e-05, "loss": 0.2308, "step": 42487 }, { "epoch": 3.44199611147116, "grad_norm": 0.05849730223417282, "learning_rate": 3.1000495071785414e-05, "loss": 0.2031, "step": 42488 }, { "epoch": 3.4420771224886586, "grad_norm": 0.08493011444807053, "learning_rate": 3.099599441919078e-05, "loss": 0.2673, "step": 42489 }, { "epoch": 3.442158133506157, "grad_norm": 0.07385018467903137, "learning_rate": 3.0991493766596155e-05, "loss": 0.2149, "step": 42490 }, { "epoch": 3.442239144523655, "grad_norm": 0.08337543904781342, "learning_rate": 3.0986993114001535e-05, "loss": 0.2323, "step": 42491 }, { "epoch": 3.442320155541154, "grad_norm": 0.0729101151227951, "learning_rate": 3.09824924614069e-05, "loss": 0.2447, "step": 42492 }, { "epoch": 3.442401166558652, "grad_norm": 0.0771954283118248, "learning_rate": 3.097799180881228e-05, "loss": 0.2047, "step": 42493 }, { "epoch": 3.4424821775761503, "grad_norm": 0.06417211145162582, "learning_rate": 3.0973491156217656e-05, "loss": 0.1951, "step": 42494 }, { "epoch": 3.442563188593649, "grad_norm": 0.07637476921081543, "learning_rate": 3.096899050362302e-05, "loss": 0.2544, "step": 42495 }, { "epoch": 3.442644199611147, "grad_norm": 0.08546575158834457, "learning_rate": 3.09644898510284e-05, "loss": 0.2199, "step": 42496 }, { "epoch": 3.4427252106286454, "grad_norm": 0.074388287961483, "learning_rate": 3.095998919843378e-05, "loss": 0.2207, "step": 42497 }, { "epoch": 3.4428062216461437, "grad_norm": 0.07267254590988159, "learning_rate": 3.095548854583915e-05, "loss": 0.2217, "step": 42498 }, { "epoch": 3.4428872326636424, "grad_norm": 0.06971371173858643, "learning_rate": 3.0950987893244524e-05, "loss": 0.2058, "step": 42499 }, { "epoch": 3.4429682436811406, "grad_norm": 0.06479272991418839, "learning_rate": 3.09464872406499e-05, "loss": 0.23, "step": 42500 }, { "epoch": 3.443049254698639, "grad_norm": 0.06856130063533783, "learning_rate": 3.094198658805527e-05, "loss": 0.2244, "step": 42501 }, { "epoch": 3.4431302657161376, "grad_norm": 0.070685975253582, "learning_rate": 3.0937485935460645e-05, "loss": 0.2489, "step": 42502 }, { "epoch": 3.443211276733636, "grad_norm": 0.07081031054258347, "learning_rate": 3.093298528286602e-05, "loss": 0.2575, "step": 42503 }, { "epoch": 3.443292287751134, "grad_norm": 0.08072758466005325, "learning_rate": 3.092848463027139e-05, "loss": 0.2277, "step": 42504 }, { "epoch": 3.4433732987686323, "grad_norm": 0.05815386399626732, "learning_rate": 3.0923983977676765e-05, "loss": 0.2051, "step": 42505 }, { "epoch": 3.443454309786131, "grad_norm": 0.07763691991567612, "learning_rate": 3.091948332508214e-05, "loss": 0.2027, "step": 42506 }, { "epoch": 3.4435353208036292, "grad_norm": 0.0745823010802269, "learning_rate": 3.091498267248751e-05, "loss": 0.2735, "step": 42507 }, { "epoch": 3.4436163318211275, "grad_norm": 0.07333028316497803, "learning_rate": 3.0910482019892886e-05, "loss": 0.218, "step": 42508 }, { "epoch": 3.443697342838626, "grad_norm": 0.08793089538812637, "learning_rate": 3.090598136729826e-05, "loss": 0.2198, "step": 42509 }, { "epoch": 3.4437783538561244, "grad_norm": 0.06536927819252014, "learning_rate": 3.0901480714703633e-05, "loss": 0.232, "step": 42510 }, { "epoch": 3.4438593648736227, "grad_norm": 0.07713781297206879, "learning_rate": 3.089698006210901e-05, "loss": 0.1961, "step": 42511 }, { "epoch": 3.4439403758911213, "grad_norm": 0.08600252866744995, "learning_rate": 3.089247940951438e-05, "loss": 0.2401, "step": 42512 }, { "epoch": 3.4440213869086196, "grad_norm": 0.0582621768116951, "learning_rate": 3.0887978756919754e-05, "loss": 0.2505, "step": 42513 }, { "epoch": 3.444102397926118, "grad_norm": 0.06772822141647339, "learning_rate": 3.088347810432513e-05, "loss": 0.2127, "step": 42514 }, { "epoch": 3.4441834089436165, "grad_norm": 0.08750282973051071, "learning_rate": 3.08789774517305e-05, "loss": 0.2538, "step": 42515 }, { "epoch": 3.4442644199611148, "grad_norm": 0.07079533487558365, "learning_rate": 3.0874476799135875e-05, "loss": 0.25, "step": 42516 }, { "epoch": 3.444345430978613, "grad_norm": 0.08859393000602722, "learning_rate": 3.086997614654125e-05, "loss": 0.221, "step": 42517 }, { "epoch": 3.4444264419961117, "grad_norm": 0.07913894951343536, "learning_rate": 3.086547549394662e-05, "loss": 0.1933, "step": 42518 }, { "epoch": 3.44450745301361, "grad_norm": 0.06824357062578201, "learning_rate": 3.0860974841351996e-05, "loss": 0.1972, "step": 42519 }, { "epoch": 3.444588464031108, "grad_norm": 0.07941274344921112, "learning_rate": 3.085647418875737e-05, "loss": 0.2063, "step": 42520 }, { "epoch": 3.4446694750486064, "grad_norm": 0.06322202831506729, "learning_rate": 3.085197353616274e-05, "loss": 0.2167, "step": 42521 }, { "epoch": 3.444750486066105, "grad_norm": 0.07784758508205414, "learning_rate": 3.084747288356812e-05, "loss": 0.2057, "step": 42522 }, { "epoch": 3.4448314970836034, "grad_norm": 0.07789554446935654, "learning_rate": 3.084297223097349e-05, "loss": 0.2247, "step": 42523 }, { "epoch": 3.4449125081011016, "grad_norm": 0.07606974244117737, "learning_rate": 3.083847157837887e-05, "loss": 0.2362, "step": 42524 }, { "epoch": 3.4449935191186003, "grad_norm": 0.05988244712352753, "learning_rate": 3.083397092578424e-05, "loss": 0.1871, "step": 42525 }, { "epoch": 3.4450745301360985, "grad_norm": 0.07931521534919739, "learning_rate": 3.082947027318962e-05, "loss": 0.2101, "step": 42526 }, { "epoch": 3.445155541153597, "grad_norm": 0.07285697013139725, "learning_rate": 3.082496962059499e-05, "loss": 0.2001, "step": 42527 }, { "epoch": 3.445236552171095, "grad_norm": 0.07134629040956497, "learning_rate": 3.082046896800036e-05, "loss": 0.2359, "step": 42528 }, { "epoch": 3.4453175631885937, "grad_norm": 0.0749007910490036, "learning_rate": 3.081596831540574e-05, "loss": 0.2506, "step": 42529 }, { "epoch": 3.445398574206092, "grad_norm": 0.08527930825948715, "learning_rate": 3.081146766281111e-05, "loss": 0.2467, "step": 42530 }, { "epoch": 3.44547958522359, "grad_norm": 0.08154378086328506, "learning_rate": 3.080696701021648e-05, "loss": 0.2353, "step": 42531 }, { "epoch": 3.445560596241089, "grad_norm": 0.07114233076572418, "learning_rate": 3.080246635762186e-05, "loss": 0.2078, "step": 42532 }, { "epoch": 3.445641607258587, "grad_norm": 0.07162083685398102, "learning_rate": 3.079796570502723e-05, "loss": 0.2133, "step": 42533 }, { "epoch": 3.4457226182760854, "grad_norm": 0.0754132941365242, "learning_rate": 3.07934650524326e-05, "loss": 0.1714, "step": 42534 }, { "epoch": 3.445803629293584, "grad_norm": 0.07492772489786148, "learning_rate": 3.078896439983798e-05, "loss": 0.2537, "step": 42535 }, { "epoch": 3.4458846403110823, "grad_norm": 0.0607149600982666, "learning_rate": 3.0784463747243354e-05, "loss": 0.2311, "step": 42536 }, { "epoch": 3.4459656513285806, "grad_norm": 0.06479800492525101, "learning_rate": 3.077996309464873e-05, "loss": 0.2059, "step": 42537 }, { "epoch": 3.4460466623460793, "grad_norm": 0.07805703580379486, "learning_rate": 3.07754624420541e-05, "loss": 0.2178, "step": 42538 }, { "epoch": 3.4461276733635775, "grad_norm": 0.07639947533607483, "learning_rate": 3.0770961789459475e-05, "loss": 0.2371, "step": 42539 }, { "epoch": 3.4462086843810757, "grad_norm": 0.06437902897596359, "learning_rate": 3.076646113686485e-05, "loss": 0.2374, "step": 42540 }, { "epoch": 3.4462896953985744, "grad_norm": 0.06914235651493073, "learning_rate": 3.076196048427022e-05, "loss": 0.2035, "step": 42541 }, { "epoch": 3.4463707064160727, "grad_norm": 0.0812096893787384, "learning_rate": 3.0757459831675596e-05, "loss": 0.2375, "step": 42542 }, { "epoch": 3.446451717433571, "grad_norm": 0.07674495875835419, "learning_rate": 3.075295917908097e-05, "loss": 0.2185, "step": 42543 }, { "epoch": 3.446532728451069, "grad_norm": 0.09959354251623154, "learning_rate": 3.074845852648634e-05, "loss": 0.2403, "step": 42544 }, { "epoch": 3.446613739468568, "grad_norm": 0.07620353251695633, "learning_rate": 3.0743957873891716e-05, "loss": 0.2011, "step": 42545 }, { "epoch": 3.446694750486066, "grad_norm": 0.07273352146148682, "learning_rate": 3.073945722129709e-05, "loss": 0.2212, "step": 42546 }, { "epoch": 3.4467757615035644, "grad_norm": 0.0625937357544899, "learning_rate": 3.0734956568702464e-05, "loss": 0.2321, "step": 42547 }, { "epoch": 3.446856772521063, "grad_norm": 0.06092469394207001, "learning_rate": 3.073045591610784e-05, "loss": 0.2243, "step": 42548 }, { "epoch": 3.4469377835385613, "grad_norm": 0.06320256739854813, "learning_rate": 3.072595526351321e-05, "loss": 0.2125, "step": 42549 }, { "epoch": 3.4470187945560595, "grad_norm": 0.07723885774612427, "learning_rate": 3.0721454610918584e-05, "loss": 0.1935, "step": 42550 }, { "epoch": 3.4470998055735578, "grad_norm": 0.07916872948408127, "learning_rate": 3.071695395832396e-05, "loss": 0.2217, "step": 42551 }, { "epoch": 3.4471808165910565, "grad_norm": 0.07740025222301483, "learning_rate": 3.071245330572933e-05, "loss": 0.2333, "step": 42552 }, { "epoch": 3.4472618276085547, "grad_norm": 0.08805128186941147, "learning_rate": 3.0707952653134705e-05, "loss": 0.2696, "step": 42553 }, { "epoch": 3.447342838626053, "grad_norm": 0.07534347474575043, "learning_rate": 3.070345200054008e-05, "loss": 0.2199, "step": 42554 }, { "epoch": 3.4474238496435516, "grad_norm": 0.0770130380988121, "learning_rate": 3.069895134794545e-05, "loss": 0.2316, "step": 42555 }, { "epoch": 3.44750486066105, "grad_norm": 0.07147173583507538, "learning_rate": 3.0694450695350826e-05, "loss": 0.2115, "step": 42556 }, { "epoch": 3.447585871678548, "grad_norm": 0.07185516506433487, "learning_rate": 3.06899500427562e-05, "loss": 0.232, "step": 42557 }, { "epoch": 3.447666882696047, "grad_norm": 0.07114926725625992, "learning_rate": 3.068544939016157e-05, "loss": 0.2137, "step": 42558 }, { "epoch": 3.447747893713545, "grad_norm": 0.07756253331899643, "learning_rate": 3.0680948737566954e-05, "loss": 0.2644, "step": 42559 }, { "epoch": 3.4478289047310433, "grad_norm": 0.07162553817033768, "learning_rate": 3.067644808497232e-05, "loss": 0.2374, "step": 42560 }, { "epoch": 3.447909915748542, "grad_norm": 0.0673312172293663, "learning_rate": 3.0671947432377694e-05, "loss": 0.2171, "step": 42561 }, { "epoch": 3.4479909267660402, "grad_norm": 0.09096536785364151, "learning_rate": 3.0667446779783074e-05, "loss": 0.2637, "step": 42562 }, { "epoch": 3.4480719377835385, "grad_norm": 0.0750901997089386, "learning_rate": 3.066294612718844e-05, "loss": 0.2274, "step": 42563 }, { "epoch": 3.448152948801037, "grad_norm": 0.06524889916181564, "learning_rate": 3.0658445474593815e-05, "loss": 0.2416, "step": 42564 }, { "epoch": 3.4482339598185354, "grad_norm": 0.08983972668647766, "learning_rate": 3.0653944821999195e-05, "loss": 0.2537, "step": 42565 }, { "epoch": 3.4483149708360337, "grad_norm": 0.06232641637325287, "learning_rate": 3.064944416940457e-05, "loss": 0.1841, "step": 42566 }, { "epoch": 3.448395981853532, "grad_norm": 0.06145894527435303, "learning_rate": 3.0644943516809936e-05, "loss": 0.1862, "step": 42567 }, { "epoch": 3.4484769928710306, "grad_norm": 0.054291676729917526, "learning_rate": 3.0640442864215316e-05, "loss": 0.2503, "step": 42568 }, { "epoch": 3.448558003888529, "grad_norm": 0.06751246005296707, "learning_rate": 3.063594221162069e-05, "loss": 0.2346, "step": 42569 }, { "epoch": 3.448639014906027, "grad_norm": 0.0678846463561058, "learning_rate": 3.0631441559026056e-05, "loss": 0.2143, "step": 42570 }, { "epoch": 3.448720025923526, "grad_norm": 0.07107531279325485, "learning_rate": 3.062694090643144e-05, "loss": 0.2345, "step": 42571 }, { "epoch": 3.448801036941024, "grad_norm": 0.08533624559640884, "learning_rate": 3.062244025383681e-05, "loss": 0.2227, "step": 42572 }, { "epoch": 3.4488820479585223, "grad_norm": 0.07481777667999268, "learning_rate": 3.061793960124218e-05, "loss": 0.3018, "step": 42573 }, { "epoch": 3.4489630589760205, "grad_norm": 0.07119978964328766, "learning_rate": 3.061343894864756e-05, "loss": 0.2134, "step": 42574 }, { "epoch": 3.449044069993519, "grad_norm": 0.06313995271921158, "learning_rate": 3.060893829605293e-05, "loss": 0.2476, "step": 42575 }, { "epoch": 3.4491250810110174, "grad_norm": 0.086004838347435, "learning_rate": 3.06044376434583e-05, "loss": 0.2488, "step": 42576 }, { "epoch": 3.4492060920285157, "grad_norm": 0.08241745084524155, "learning_rate": 3.059993699086368e-05, "loss": 0.2519, "step": 42577 }, { "epoch": 3.4492871030460144, "grad_norm": 0.08791528642177582, "learning_rate": 3.059543633826905e-05, "loss": 0.2179, "step": 42578 }, { "epoch": 3.4493681140635126, "grad_norm": 0.06407441943883896, "learning_rate": 3.0590935685674426e-05, "loss": 0.2079, "step": 42579 }, { "epoch": 3.449449125081011, "grad_norm": 0.06448465585708618, "learning_rate": 3.05864350330798e-05, "loss": 0.1946, "step": 42580 }, { "epoch": 3.4495301360985096, "grad_norm": 0.06738763302564621, "learning_rate": 3.058193438048517e-05, "loss": 0.2527, "step": 42581 }, { "epoch": 3.449611147116008, "grad_norm": 0.07119040191173553, "learning_rate": 3.0577433727890546e-05, "loss": 0.2128, "step": 42582 }, { "epoch": 3.449692158133506, "grad_norm": 0.08521244674921036, "learning_rate": 3.057293307529592e-05, "loss": 0.2353, "step": 42583 }, { "epoch": 3.4497731691510047, "grad_norm": 0.056287288665771484, "learning_rate": 3.0568432422701294e-05, "loss": 0.1935, "step": 42584 }, { "epoch": 3.449854180168503, "grad_norm": 0.07885636389255524, "learning_rate": 3.056393177010667e-05, "loss": 0.2544, "step": 42585 }, { "epoch": 3.4499351911860012, "grad_norm": 0.07785996794700623, "learning_rate": 3.055943111751204e-05, "loss": 0.2056, "step": 42586 }, { "epoch": 3.4500162022035, "grad_norm": 0.07002022117376328, "learning_rate": 3.0554930464917414e-05, "loss": 0.254, "step": 42587 }, { "epoch": 3.450097213220998, "grad_norm": 0.0954364538192749, "learning_rate": 3.055042981232279e-05, "loss": 0.2299, "step": 42588 }, { "epoch": 3.4501782242384964, "grad_norm": 0.07318907231092453, "learning_rate": 3.054592915972816e-05, "loss": 0.2806, "step": 42589 }, { "epoch": 3.4502592352559946, "grad_norm": 0.08387971669435501, "learning_rate": 3.0541428507133535e-05, "loss": 0.2068, "step": 42590 }, { "epoch": 3.4503402462734933, "grad_norm": 0.06505469977855682, "learning_rate": 3.053692785453891e-05, "loss": 0.2197, "step": 42591 }, { "epoch": 3.4504212572909916, "grad_norm": 0.06631197035312653, "learning_rate": 3.053242720194428e-05, "loss": 0.2046, "step": 42592 }, { "epoch": 3.45050226830849, "grad_norm": 0.06794437021017075, "learning_rate": 3.0527926549349656e-05, "loss": 0.2153, "step": 42593 }, { "epoch": 3.4505832793259885, "grad_norm": 0.07331722229719162, "learning_rate": 3.052342589675503e-05, "loss": 0.2154, "step": 42594 }, { "epoch": 3.4506642903434868, "grad_norm": 0.07726750522851944, "learning_rate": 3.051892524416041e-05, "loss": 0.21, "step": 42595 }, { "epoch": 3.450745301360985, "grad_norm": 0.06050015985965729, "learning_rate": 3.0514424591565777e-05, "loss": 0.1939, "step": 42596 }, { "epoch": 3.4508263123784833, "grad_norm": 0.10080495476722717, "learning_rate": 3.0509923938971154e-05, "loss": 0.2516, "step": 42597 }, { "epoch": 3.450907323395982, "grad_norm": 0.07157032191753387, "learning_rate": 3.0505423286376527e-05, "loss": 0.1925, "step": 42598 }, { "epoch": 3.45098833441348, "grad_norm": 0.05965049937367439, "learning_rate": 3.0500922633781898e-05, "loss": 0.2142, "step": 42599 }, { "epoch": 3.4510693454309784, "grad_norm": 0.08102771639823914, "learning_rate": 3.0496421981187275e-05, "loss": 0.2468, "step": 42600 }, { "epoch": 3.451150356448477, "grad_norm": 0.07460810244083405, "learning_rate": 3.0491921328592648e-05, "loss": 0.2373, "step": 42601 }, { "epoch": 3.4512313674659754, "grad_norm": 0.08648321032524109, "learning_rate": 3.048742067599802e-05, "loss": 0.2106, "step": 42602 }, { "epoch": 3.4513123784834736, "grad_norm": 0.07186142355203629, "learning_rate": 3.0482920023403396e-05, "loss": 0.2229, "step": 42603 }, { "epoch": 3.4513933895009723, "grad_norm": 0.06909585744142532, "learning_rate": 3.047841937080877e-05, "loss": 0.1977, "step": 42604 }, { "epoch": 3.4514744005184705, "grad_norm": 0.08842332661151886, "learning_rate": 3.0473918718214146e-05, "loss": 0.2428, "step": 42605 }, { "epoch": 3.451555411535969, "grad_norm": 0.07989372313022614, "learning_rate": 3.0469418065619516e-05, "loss": 0.2748, "step": 42606 }, { "epoch": 3.4516364225534675, "grad_norm": 0.06668754667043686, "learning_rate": 3.046491741302489e-05, "loss": 0.2262, "step": 42607 }, { "epoch": 3.4517174335709657, "grad_norm": 0.06295004487037659, "learning_rate": 3.0460416760430267e-05, "loss": 0.2197, "step": 42608 }, { "epoch": 3.451798444588464, "grad_norm": 0.08387071639299393, "learning_rate": 3.0455916107835637e-05, "loss": 0.2375, "step": 42609 }, { "epoch": 3.4518794556059627, "grad_norm": 0.07910163700580597, "learning_rate": 3.045141545524101e-05, "loss": 0.2017, "step": 42610 }, { "epoch": 3.451960466623461, "grad_norm": 0.07617625594139099, "learning_rate": 3.0446914802646388e-05, "loss": 0.2246, "step": 42611 }, { "epoch": 3.452041477640959, "grad_norm": 0.07298461347818375, "learning_rate": 3.0442414150051758e-05, "loss": 0.2318, "step": 42612 }, { "epoch": 3.4521224886584574, "grad_norm": 0.07805348932743073, "learning_rate": 3.043791349745713e-05, "loss": 0.2166, "step": 42613 }, { "epoch": 3.452203499675956, "grad_norm": 0.07293111830949783, "learning_rate": 3.043341284486251e-05, "loss": 0.2259, "step": 42614 }, { "epoch": 3.4522845106934543, "grad_norm": 0.08659076690673828, "learning_rate": 3.042891219226788e-05, "loss": 0.1841, "step": 42615 }, { "epoch": 3.4523655217109526, "grad_norm": 0.07256795465946198, "learning_rate": 3.0424411539673252e-05, "loss": 0.2487, "step": 42616 }, { "epoch": 3.4524465327284513, "grad_norm": 0.07146996259689331, "learning_rate": 3.041991088707863e-05, "loss": 0.2341, "step": 42617 }, { "epoch": 3.4525275437459495, "grad_norm": 0.06735842674970627, "learning_rate": 3.0415410234484003e-05, "loss": 0.2418, "step": 42618 }, { "epoch": 3.4526085547634477, "grad_norm": 0.08807352930307388, "learning_rate": 3.0410909581889373e-05, "loss": 0.232, "step": 42619 }, { "epoch": 3.452689565780946, "grad_norm": 0.07156084477901459, "learning_rate": 3.040640892929475e-05, "loss": 0.2337, "step": 42620 }, { "epoch": 3.4527705767984447, "grad_norm": 0.06210574135184288, "learning_rate": 3.0401908276700124e-05, "loss": 0.2258, "step": 42621 }, { "epoch": 3.452851587815943, "grad_norm": 0.0830637589097023, "learning_rate": 3.0397407624105494e-05, "loss": 0.1914, "step": 42622 }, { "epoch": 3.452932598833441, "grad_norm": 0.0659496858716011, "learning_rate": 3.039290697151087e-05, "loss": 0.1955, "step": 42623 }, { "epoch": 3.45301360985094, "grad_norm": 0.08773655444383621, "learning_rate": 3.0388406318916245e-05, "loss": 0.2396, "step": 42624 }, { "epoch": 3.453094620868438, "grad_norm": 0.0867881178855896, "learning_rate": 3.0383905666321615e-05, "loss": 0.2583, "step": 42625 }, { "epoch": 3.4531756318859363, "grad_norm": 0.06408640742301941, "learning_rate": 3.0379405013726992e-05, "loss": 0.2227, "step": 42626 }, { "epoch": 3.453256642903435, "grad_norm": 0.06799597293138504, "learning_rate": 3.0374904361132365e-05, "loss": 0.2251, "step": 42627 }, { "epoch": 3.4533376539209333, "grad_norm": 0.0695396289229393, "learning_rate": 3.0370403708537736e-05, "loss": 0.2336, "step": 42628 }, { "epoch": 3.4534186649384315, "grad_norm": 0.07411754876375198, "learning_rate": 3.0365903055943113e-05, "loss": 0.2278, "step": 42629 }, { "epoch": 3.45349967595593, "grad_norm": 0.09416381269693375, "learning_rate": 3.0361402403348486e-05, "loss": 0.27, "step": 42630 }, { "epoch": 3.4535806869734285, "grad_norm": 0.07336270809173584, "learning_rate": 3.0356901750753863e-05, "loss": 0.2177, "step": 42631 }, { "epoch": 3.4536616979909267, "grad_norm": 0.0843655914068222, "learning_rate": 3.0352401098159233e-05, "loss": 0.2416, "step": 42632 }, { "epoch": 3.4537427090084254, "grad_norm": 0.06593769788742065, "learning_rate": 3.034790044556461e-05, "loss": 0.1886, "step": 42633 }, { "epoch": 3.4538237200259236, "grad_norm": 0.06238039955496788, "learning_rate": 3.0343399792969984e-05, "loss": 0.2111, "step": 42634 }, { "epoch": 3.453904731043422, "grad_norm": 0.07243414968252182, "learning_rate": 3.0338899140375354e-05, "loss": 0.2137, "step": 42635 }, { "epoch": 3.45398574206092, "grad_norm": 0.06280165165662766, "learning_rate": 3.033439848778073e-05, "loss": 0.2483, "step": 42636 }, { "epoch": 3.454066753078419, "grad_norm": 0.0687374547123909, "learning_rate": 3.0329897835186105e-05, "loss": 0.2537, "step": 42637 }, { "epoch": 3.454147764095917, "grad_norm": 0.060395676642656326, "learning_rate": 3.0325397182591475e-05, "loss": 0.2205, "step": 42638 }, { "epoch": 3.4542287751134153, "grad_norm": 0.06727840006351471, "learning_rate": 3.0320896529996852e-05, "loss": 0.226, "step": 42639 }, { "epoch": 3.454309786130914, "grad_norm": 0.07435212284326553, "learning_rate": 3.0316395877402226e-05, "loss": 0.2189, "step": 42640 }, { "epoch": 3.4543907971484122, "grad_norm": 0.08039019256830215, "learning_rate": 3.0311895224807596e-05, "loss": 0.2404, "step": 42641 }, { "epoch": 3.4544718081659105, "grad_norm": 0.08695101737976074, "learning_rate": 3.0307394572212973e-05, "loss": 0.2338, "step": 42642 }, { "epoch": 3.4545528191834087, "grad_norm": 0.06483660638332367, "learning_rate": 3.0302893919618346e-05, "loss": 0.2118, "step": 42643 }, { "epoch": 3.4546338302009074, "grad_norm": 0.06342937052249908, "learning_rate": 3.0298393267023723e-05, "loss": 0.24, "step": 42644 }, { "epoch": 3.4547148412184057, "grad_norm": 0.06715777516365051, "learning_rate": 3.0293892614429094e-05, "loss": 0.2376, "step": 42645 }, { "epoch": 3.454795852235904, "grad_norm": 0.0639479011297226, "learning_rate": 3.0289391961834467e-05, "loss": 0.2272, "step": 42646 }, { "epoch": 3.4548768632534026, "grad_norm": 0.0595063641667366, "learning_rate": 3.0284891309239844e-05, "loss": 0.218, "step": 42647 }, { "epoch": 3.454957874270901, "grad_norm": 0.07153860479593277, "learning_rate": 3.0280390656645214e-05, "loss": 0.2089, "step": 42648 }, { "epoch": 3.455038885288399, "grad_norm": 0.07404875010251999, "learning_rate": 3.0275890004050588e-05, "loss": 0.2961, "step": 42649 }, { "epoch": 3.4551198963058978, "grad_norm": 0.07781454920768738, "learning_rate": 3.0271389351455965e-05, "loss": 0.209, "step": 42650 }, { "epoch": 3.455200907323396, "grad_norm": 0.0751013308763504, "learning_rate": 3.0266888698861335e-05, "loss": 0.2487, "step": 42651 }, { "epoch": 3.4552819183408943, "grad_norm": 0.07977674901485443, "learning_rate": 3.026238804626671e-05, "loss": 0.2522, "step": 42652 }, { "epoch": 3.455362929358393, "grad_norm": 0.07325046509504318, "learning_rate": 3.0257887393672086e-05, "loss": 0.2277, "step": 42653 }, { "epoch": 3.455443940375891, "grad_norm": 0.07782243192195892, "learning_rate": 3.0253386741077456e-05, "loss": 0.1968, "step": 42654 }, { "epoch": 3.4555249513933894, "grad_norm": 0.06764143705368042, "learning_rate": 3.024888608848283e-05, "loss": 0.2293, "step": 42655 }, { "epoch": 3.4556059624108877, "grad_norm": 0.0683240294456482, "learning_rate": 3.0244385435888207e-05, "loss": 0.2326, "step": 42656 }, { "epoch": 3.4556869734283864, "grad_norm": 0.07400035113096237, "learning_rate": 3.023988478329358e-05, "loss": 0.2471, "step": 42657 }, { "epoch": 3.4557679844458846, "grad_norm": 0.08437030017375946, "learning_rate": 3.023538413069895e-05, "loss": 0.2547, "step": 42658 }, { "epoch": 3.455848995463383, "grad_norm": 0.06690854579210281, "learning_rate": 3.0230883478104327e-05, "loss": 0.2286, "step": 42659 }, { "epoch": 3.4559300064808816, "grad_norm": 0.06564860790967941, "learning_rate": 3.02263828255097e-05, "loss": 0.2177, "step": 42660 }, { "epoch": 3.45601101749838, "grad_norm": 0.0681687518954277, "learning_rate": 3.022188217291507e-05, "loss": 0.2529, "step": 42661 }, { "epoch": 3.456092028515878, "grad_norm": 0.07344760745763779, "learning_rate": 3.0217381520320448e-05, "loss": 0.249, "step": 42662 }, { "epoch": 3.4561730395333763, "grad_norm": 0.07287075370550156, "learning_rate": 3.0212880867725822e-05, "loss": 0.2255, "step": 42663 }, { "epoch": 3.456254050550875, "grad_norm": 0.06859376281499863, "learning_rate": 3.0208380215131192e-05, "loss": 0.2455, "step": 42664 }, { "epoch": 3.4563350615683732, "grad_norm": 0.07294625788927078, "learning_rate": 3.020387956253657e-05, "loss": 0.2292, "step": 42665 }, { "epoch": 3.4564160725858715, "grad_norm": 0.07278914749622345, "learning_rate": 3.0199378909941946e-05, "loss": 0.2337, "step": 42666 }, { "epoch": 3.45649708360337, "grad_norm": 0.07642342150211334, "learning_rate": 3.0194878257347313e-05, "loss": 0.2148, "step": 42667 }, { "epoch": 3.4565780946208684, "grad_norm": 0.07237835973501205, "learning_rate": 3.019037760475269e-05, "loss": 0.2142, "step": 42668 }, { "epoch": 3.4566591056383666, "grad_norm": 0.07787852734327316, "learning_rate": 3.0185876952158067e-05, "loss": 0.2466, "step": 42669 }, { "epoch": 3.4567401166558653, "grad_norm": 0.06031728535890579, "learning_rate": 3.018137629956344e-05, "loss": 0.1905, "step": 42670 }, { "epoch": 3.4568211276733636, "grad_norm": 0.07175780087709427, "learning_rate": 3.017687564696881e-05, "loss": 0.2108, "step": 42671 }, { "epoch": 3.456902138690862, "grad_norm": 0.07543084025382996, "learning_rate": 3.0172374994374188e-05, "loss": 0.2642, "step": 42672 }, { "epoch": 3.4569831497083605, "grad_norm": 0.06601322442293167, "learning_rate": 3.016787434177956e-05, "loss": 0.2427, "step": 42673 }, { "epoch": 3.4570641607258588, "grad_norm": 0.06880936771631241, "learning_rate": 3.016337368918493e-05, "loss": 0.2351, "step": 42674 }, { "epoch": 3.457145171743357, "grad_norm": 0.06941180676221848, "learning_rate": 3.015887303659031e-05, "loss": 0.2167, "step": 42675 }, { "epoch": 3.4572261827608557, "grad_norm": 0.08035728335380554, "learning_rate": 3.0154372383995682e-05, "loss": 0.2554, "step": 42676 }, { "epoch": 3.457307193778354, "grad_norm": 0.0804125964641571, "learning_rate": 3.0149871731401052e-05, "loss": 0.2234, "step": 42677 }, { "epoch": 3.457388204795852, "grad_norm": 0.05431007593870163, "learning_rate": 3.014537107880643e-05, "loss": 0.2322, "step": 42678 }, { "epoch": 3.4574692158133504, "grad_norm": 0.06997882574796677, "learning_rate": 3.0140870426211803e-05, "loss": 0.2212, "step": 42679 }, { "epoch": 3.457550226830849, "grad_norm": 0.07095611095428467, "learning_rate": 3.0136369773617173e-05, "loss": 0.2024, "step": 42680 }, { "epoch": 3.4576312378483474, "grad_norm": 0.0627041757106781, "learning_rate": 3.013186912102255e-05, "loss": 0.233, "step": 42681 }, { "epoch": 3.4577122488658456, "grad_norm": 0.059571947902441025, "learning_rate": 3.0127368468427924e-05, "loss": 0.2446, "step": 42682 }, { "epoch": 3.4577932598833443, "grad_norm": 0.06569252908229828, "learning_rate": 3.01228678158333e-05, "loss": 0.234, "step": 42683 }, { "epoch": 3.4578742709008425, "grad_norm": 0.06776624172925949, "learning_rate": 3.011836716323867e-05, "loss": 0.215, "step": 42684 }, { "epoch": 3.457955281918341, "grad_norm": 0.06598973274230957, "learning_rate": 3.0113866510644045e-05, "loss": 0.1994, "step": 42685 }, { "epoch": 3.458036292935839, "grad_norm": 0.06201748922467232, "learning_rate": 3.010936585804942e-05, "loss": 0.2279, "step": 42686 }, { "epoch": 3.4581173039533377, "grad_norm": 0.08058135211467743, "learning_rate": 3.0104865205454792e-05, "loss": 0.2569, "step": 42687 }, { "epoch": 3.458198314970836, "grad_norm": 0.06606631726026535, "learning_rate": 3.0100364552860165e-05, "loss": 0.2738, "step": 42688 }, { "epoch": 3.458279325988334, "grad_norm": 0.06655286252498627, "learning_rate": 3.0095863900265542e-05, "loss": 0.2433, "step": 42689 }, { "epoch": 3.458360337005833, "grad_norm": 0.08217357099056244, "learning_rate": 3.0091363247670913e-05, "loss": 0.2057, "step": 42690 }, { "epoch": 3.458441348023331, "grad_norm": 0.06689063459634781, "learning_rate": 3.0086862595076286e-05, "loss": 0.2438, "step": 42691 }, { "epoch": 3.4585223590408294, "grad_norm": 0.07152974605560303, "learning_rate": 3.0082361942481663e-05, "loss": 0.2558, "step": 42692 }, { "epoch": 3.458603370058328, "grad_norm": 0.0755951926112175, "learning_rate": 3.0077861289887033e-05, "loss": 0.2117, "step": 42693 }, { "epoch": 3.4586843810758263, "grad_norm": 0.08308055996894836, "learning_rate": 3.0073360637292407e-05, "loss": 0.2148, "step": 42694 }, { "epoch": 3.4587653920933246, "grad_norm": 0.07032884657382965, "learning_rate": 3.0068859984697784e-05, "loss": 0.222, "step": 42695 }, { "epoch": 3.4588464031108233, "grad_norm": 0.05964222922921181, "learning_rate": 3.0064359332103158e-05, "loss": 0.2174, "step": 42696 }, { "epoch": 3.4589274141283215, "grad_norm": 0.07548788189888, "learning_rate": 3.0059858679508528e-05, "loss": 0.2334, "step": 42697 }, { "epoch": 3.4590084251458197, "grad_norm": 0.07545255869626999, "learning_rate": 3.0055358026913905e-05, "loss": 0.2385, "step": 42698 }, { "epoch": 3.4590894361633184, "grad_norm": 0.06462171673774719, "learning_rate": 3.005085737431928e-05, "loss": 0.2, "step": 42699 }, { "epoch": 3.4591704471808167, "grad_norm": 0.07996838539838791, "learning_rate": 3.004635672172465e-05, "loss": 0.2185, "step": 42700 }, { "epoch": 3.459251458198315, "grad_norm": 0.0730762779712677, "learning_rate": 3.0041856069130026e-05, "loss": 0.2399, "step": 42701 }, { "epoch": 3.459332469215813, "grad_norm": 0.06021992489695549, "learning_rate": 3.0037355416535403e-05, "loss": 0.2486, "step": 42702 }, { "epoch": 3.459413480233312, "grad_norm": 0.06835592538118362, "learning_rate": 3.003285476394077e-05, "loss": 0.2186, "step": 42703 }, { "epoch": 3.45949449125081, "grad_norm": 0.06691218912601471, "learning_rate": 3.0028354111346146e-05, "loss": 0.239, "step": 42704 }, { "epoch": 3.4595755022683083, "grad_norm": 0.07505834847688675, "learning_rate": 3.0023853458751523e-05, "loss": 0.2934, "step": 42705 }, { "epoch": 3.459656513285807, "grad_norm": 0.06843426823616028, "learning_rate": 3.001935280615689e-05, "loss": 0.2292, "step": 42706 }, { "epoch": 3.4597375243033053, "grad_norm": 0.07168073207139969, "learning_rate": 3.0014852153562267e-05, "loss": 0.2144, "step": 42707 }, { "epoch": 3.4598185353208035, "grad_norm": 0.07357820868492126, "learning_rate": 3.0010351500967644e-05, "loss": 0.1994, "step": 42708 }, { "epoch": 3.4598995463383018, "grad_norm": 0.06848014891147614, "learning_rate": 3.0005850848373018e-05, "loss": 0.2146, "step": 42709 }, { "epoch": 3.4599805573558005, "grad_norm": 0.07098592072725296, "learning_rate": 3.0001350195778388e-05, "loss": 0.2236, "step": 42710 }, { "epoch": 3.4600615683732987, "grad_norm": 0.06135905534029007, "learning_rate": 2.9996849543183765e-05, "loss": 0.2137, "step": 42711 }, { "epoch": 3.460142579390797, "grad_norm": 0.07563517242670059, "learning_rate": 2.999234889058914e-05, "loss": 0.2131, "step": 42712 }, { "epoch": 3.4602235904082956, "grad_norm": 0.07762432843446732, "learning_rate": 2.998784823799451e-05, "loss": 0.2029, "step": 42713 }, { "epoch": 3.460304601425794, "grad_norm": 0.06432337313890457, "learning_rate": 2.9983347585399886e-05, "loss": 0.2375, "step": 42714 }, { "epoch": 3.460385612443292, "grad_norm": 0.07871246337890625, "learning_rate": 2.997884693280526e-05, "loss": 0.269, "step": 42715 }, { "epoch": 3.460466623460791, "grad_norm": 0.07545270025730133, "learning_rate": 2.997434628021063e-05, "loss": 0.2148, "step": 42716 }, { "epoch": 3.460547634478289, "grad_norm": 0.09875917434692383, "learning_rate": 2.9969845627616007e-05, "loss": 0.2502, "step": 42717 }, { "epoch": 3.4606286454957873, "grad_norm": 0.06610118597745895, "learning_rate": 2.996534497502138e-05, "loss": 0.1822, "step": 42718 }, { "epoch": 3.460709656513286, "grad_norm": 0.07913530617952347, "learning_rate": 2.996084432242675e-05, "loss": 0.2197, "step": 42719 }, { "epoch": 3.4607906675307842, "grad_norm": 0.06806863844394684, "learning_rate": 2.9956343669832127e-05, "loss": 0.2239, "step": 42720 }, { "epoch": 3.4608716785482825, "grad_norm": 0.06233429163694382, "learning_rate": 2.99518430172375e-05, "loss": 0.2375, "step": 42721 }, { "epoch": 3.460952689565781, "grad_norm": 0.08534996211528778, "learning_rate": 2.994734236464287e-05, "loss": 0.2351, "step": 42722 }, { "epoch": 3.4610337005832794, "grad_norm": 0.0827810987830162, "learning_rate": 2.9942841712048248e-05, "loss": 0.2308, "step": 42723 }, { "epoch": 3.4611147116007777, "grad_norm": 0.09024398773908615, "learning_rate": 2.9938341059453622e-05, "loss": 0.2467, "step": 42724 }, { "epoch": 3.461195722618276, "grad_norm": 0.07115122675895691, "learning_rate": 2.9933840406859e-05, "loss": 0.2086, "step": 42725 }, { "epoch": 3.4612767336357746, "grad_norm": 0.08028433471918106, "learning_rate": 2.992933975426437e-05, "loss": 0.2335, "step": 42726 }, { "epoch": 3.461357744653273, "grad_norm": 0.0784420445561409, "learning_rate": 2.9924839101669743e-05, "loss": 0.2203, "step": 42727 }, { "epoch": 3.461438755670771, "grad_norm": 0.07200008630752563, "learning_rate": 2.992033844907512e-05, "loss": 0.2139, "step": 42728 }, { "epoch": 3.4615197666882698, "grad_norm": 0.06650616973638535, "learning_rate": 2.991583779648049e-05, "loss": 0.2014, "step": 42729 }, { "epoch": 3.461600777705768, "grad_norm": 0.07266805320978165, "learning_rate": 2.9911337143885863e-05, "loss": 0.2083, "step": 42730 }, { "epoch": 3.4616817887232663, "grad_norm": 0.0538320429623127, "learning_rate": 2.990683649129124e-05, "loss": 0.2266, "step": 42731 }, { "epoch": 3.4617627997407645, "grad_norm": 0.07561052590608597, "learning_rate": 2.990233583869661e-05, "loss": 0.2478, "step": 42732 }, { "epoch": 3.461843810758263, "grad_norm": 0.07131976634263992, "learning_rate": 2.9897835186101984e-05, "loss": 0.2351, "step": 42733 }, { "epoch": 3.4619248217757614, "grad_norm": 0.07233821600675583, "learning_rate": 2.989333453350736e-05, "loss": 0.2601, "step": 42734 }, { "epoch": 3.4620058327932597, "grad_norm": 0.07123769819736481, "learning_rate": 2.988883388091273e-05, "loss": 0.2259, "step": 42735 }, { "epoch": 3.4620868438107584, "grad_norm": 0.08232641220092773, "learning_rate": 2.9884333228318105e-05, "loss": 0.2919, "step": 42736 }, { "epoch": 3.4621678548282566, "grad_norm": 0.07963483780622482, "learning_rate": 2.9879832575723482e-05, "loss": 0.2462, "step": 42737 }, { "epoch": 3.462248865845755, "grad_norm": 0.06596418470144272, "learning_rate": 2.987533192312886e-05, "loss": 0.2181, "step": 42738 }, { "epoch": 3.4623298768632536, "grad_norm": 0.07897919416427612, "learning_rate": 2.9870831270534226e-05, "loss": 0.222, "step": 42739 }, { "epoch": 3.462410887880752, "grad_norm": 0.07320919632911682, "learning_rate": 2.9866330617939603e-05, "loss": 0.2362, "step": 42740 }, { "epoch": 3.46249189889825, "grad_norm": 0.08484750986099243, "learning_rate": 2.986182996534498e-05, "loss": 0.2283, "step": 42741 }, { "epoch": 3.4625729099157487, "grad_norm": 0.05763061344623566, "learning_rate": 2.985732931275035e-05, "loss": 0.2019, "step": 42742 }, { "epoch": 3.462653920933247, "grad_norm": 0.06858108192682266, "learning_rate": 2.9852828660155724e-05, "loss": 0.24, "step": 42743 }, { "epoch": 3.462734931950745, "grad_norm": 0.0650968849658966, "learning_rate": 2.98483280075611e-05, "loss": 0.2225, "step": 42744 }, { "epoch": 3.462815942968244, "grad_norm": 0.0646698996424675, "learning_rate": 2.984382735496647e-05, "loss": 0.2295, "step": 42745 }, { "epoch": 3.462896953985742, "grad_norm": 0.08138510584831238, "learning_rate": 2.9839326702371844e-05, "loss": 0.2516, "step": 42746 }, { "epoch": 3.4629779650032404, "grad_norm": 0.07128766179084778, "learning_rate": 2.983482604977722e-05, "loss": 0.2226, "step": 42747 }, { "epoch": 3.4630589760207386, "grad_norm": 0.05778047442436218, "learning_rate": 2.9830325397182592e-05, "loss": 0.2015, "step": 42748 }, { "epoch": 3.4631399870382373, "grad_norm": 0.06885864585638046, "learning_rate": 2.9825824744587965e-05, "loss": 0.2254, "step": 42749 }, { "epoch": 3.4632209980557356, "grad_norm": 0.07103770226240158, "learning_rate": 2.9821324091993342e-05, "loss": 0.229, "step": 42750 }, { "epoch": 3.463302009073234, "grad_norm": 0.08950777351856232, "learning_rate": 2.9816823439398716e-05, "loss": 0.2434, "step": 42751 }, { "epoch": 3.4633830200907325, "grad_norm": 0.06826742738485336, "learning_rate": 2.9812322786804086e-05, "loss": 0.2148, "step": 42752 }, { "epoch": 3.4634640311082308, "grad_norm": 0.07242395728826523, "learning_rate": 2.9807822134209463e-05, "loss": 0.2436, "step": 42753 }, { "epoch": 3.463545042125729, "grad_norm": 0.07269168645143509, "learning_rate": 2.9803321481614837e-05, "loss": 0.2314, "step": 42754 }, { "epoch": 3.4636260531432272, "grad_norm": 0.06759374588727951, "learning_rate": 2.9798820829020207e-05, "loss": 0.2461, "step": 42755 }, { "epoch": 3.463707064160726, "grad_norm": 0.09427613019943237, "learning_rate": 2.9794320176425584e-05, "loss": 0.2412, "step": 42756 }, { "epoch": 3.463788075178224, "grad_norm": 0.06852936744689941, "learning_rate": 2.9789819523830958e-05, "loss": 0.2618, "step": 42757 }, { "epoch": 3.4638690861957224, "grad_norm": 0.07090926915407181, "learning_rate": 2.9785318871236328e-05, "loss": 0.258, "step": 42758 }, { "epoch": 3.463950097213221, "grad_norm": 0.06704474985599518, "learning_rate": 2.9780818218641705e-05, "loss": 0.2101, "step": 42759 }, { "epoch": 3.4640311082307194, "grad_norm": 0.06343290954828262, "learning_rate": 2.977631756604708e-05, "loss": 0.2247, "step": 42760 }, { "epoch": 3.4641121192482176, "grad_norm": 0.07604165375232697, "learning_rate": 2.977181691345245e-05, "loss": 0.2268, "step": 42761 }, { "epoch": 3.4641931302657163, "grad_norm": 0.0740513950586319, "learning_rate": 2.9767316260857826e-05, "loss": 0.2323, "step": 42762 }, { "epoch": 3.4642741412832145, "grad_norm": 0.07418538630008698, "learning_rate": 2.97628156082632e-05, "loss": 0.2016, "step": 42763 }, { "epoch": 3.464355152300713, "grad_norm": 0.07796464115381241, "learning_rate": 2.9758314955668576e-05, "loss": 0.2353, "step": 42764 }, { "epoch": 3.4644361633182115, "grad_norm": 0.0578065924346447, "learning_rate": 2.9753814303073946e-05, "loss": 0.226, "step": 42765 }, { "epoch": 3.4645171743357097, "grad_norm": 0.060822539031505585, "learning_rate": 2.974931365047932e-05, "loss": 0.2199, "step": 42766 }, { "epoch": 3.464598185353208, "grad_norm": 0.068793386220932, "learning_rate": 2.9744812997884697e-05, "loss": 0.2505, "step": 42767 }, { "epoch": 3.4646791963707066, "grad_norm": 0.08377143740653992, "learning_rate": 2.9740312345290067e-05, "loss": 0.2449, "step": 42768 }, { "epoch": 3.464760207388205, "grad_norm": 0.08305362612009048, "learning_rate": 2.973581169269544e-05, "loss": 0.2428, "step": 42769 }, { "epoch": 3.464841218405703, "grad_norm": 0.07506440579891205, "learning_rate": 2.9731311040100818e-05, "loss": 0.2282, "step": 42770 }, { "epoch": 3.4649222294232014, "grad_norm": 0.056740131229162216, "learning_rate": 2.9726810387506188e-05, "loss": 0.1754, "step": 42771 }, { "epoch": 3.4650032404407, "grad_norm": 0.06253889203071594, "learning_rate": 2.972230973491156e-05, "loss": 0.2289, "step": 42772 }, { "epoch": 3.4650842514581983, "grad_norm": 0.06224200129508972, "learning_rate": 2.971780908231694e-05, "loss": 0.2004, "step": 42773 }, { "epoch": 3.4651652624756966, "grad_norm": 0.06049289181828499, "learning_rate": 2.971330842972231e-05, "loss": 0.2276, "step": 42774 }, { "epoch": 3.4652462734931953, "grad_norm": 0.07351897656917572, "learning_rate": 2.9708807777127682e-05, "loss": 0.2244, "step": 42775 }, { "epoch": 3.4653272845106935, "grad_norm": 0.07887002825737, "learning_rate": 2.970430712453306e-05, "loss": 0.2632, "step": 42776 }, { "epoch": 3.4654082955281917, "grad_norm": 0.07231858372688293, "learning_rate": 2.9699806471938436e-05, "loss": 0.2121, "step": 42777 }, { "epoch": 3.46548930654569, "grad_norm": 0.05875080078840256, "learning_rate": 2.9695305819343807e-05, "loss": 0.2003, "step": 42778 }, { "epoch": 3.4655703175631887, "grad_norm": 0.05856175720691681, "learning_rate": 2.969080516674918e-05, "loss": 0.205, "step": 42779 }, { "epoch": 3.465651328580687, "grad_norm": 0.10160711407661438, "learning_rate": 2.9686304514154557e-05, "loss": 0.234, "step": 42780 }, { "epoch": 3.465732339598185, "grad_norm": 0.08699193596839905, "learning_rate": 2.9681803861559927e-05, "loss": 0.2177, "step": 42781 }, { "epoch": 3.465813350615684, "grad_norm": 0.07056750357151031, "learning_rate": 2.96773032089653e-05, "loss": 0.2105, "step": 42782 }, { "epoch": 3.465894361633182, "grad_norm": 0.08346658945083618, "learning_rate": 2.9672802556370678e-05, "loss": 0.238, "step": 42783 }, { "epoch": 3.4659753726506803, "grad_norm": 0.08231760561466217, "learning_rate": 2.9668301903776048e-05, "loss": 0.2705, "step": 42784 }, { "epoch": 3.466056383668179, "grad_norm": 0.08830294013023376, "learning_rate": 2.9663801251181422e-05, "loss": 0.21, "step": 42785 }, { "epoch": 3.4661373946856773, "grad_norm": 0.1320062279701233, "learning_rate": 2.96593005985868e-05, "loss": 0.2115, "step": 42786 }, { "epoch": 3.4662184057031755, "grad_norm": 0.06545694917440414, "learning_rate": 2.965479994599217e-05, "loss": 0.2369, "step": 42787 }, { "epoch": 3.466299416720674, "grad_norm": 0.07637354731559753, "learning_rate": 2.9650299293397543e-05, "loss": 0.2218, "step": 42788 }, { "epoch": 3.4663804277381725, "grad_norm": 0.0652073547244072, "learning_rate": 2.964579864080292e-05, "loss": 0.2212, "step": 42789 }, { "epoch": 3.4664614387556707, "grad_norm": 0.08444193005561829, "learning_rate": 2.9641297988208293e-05, "loss": 0.2438, "step": 42790 }, { "epoch": 3.4665424497731694, "grad_norm": 0.06109117716550827, "learning_rate": 2.9636797335613663e-05, "loss": 0.2048, "step": 42791 }, { "epoch": 3.4666234607906676, "grad_norm": 0.07534314692020416, "learning_rate": 2.963229668301904e-05, "loss": 0.2061, "step": 42792 }, { "epoch": 3.466704471808166, "grad_norm": 0.072405144572258, "learning_rate": 2.9627796030424414e-05, "loss": 0.2275, "step": 42793 }, { "epoch": 3.466785482825664, "grad_norm": 0.07258030027151108, "learning_rate": 2.9623295377829784e-05, "loss": 0.2439, "step": 42794 }, { "epoch": 3.466866493843163, "grad_norm": 0.08834215998649597, "learning_rate": 2.961879472523516e-05, "loss": 0.2243, "step": 42795 }, { "epoch": 3.466947504860661, "grad_norm": 0.09430413693189621, "learning_rate": 2.9614294072640535e-05, "loss": 0.2272, "step": 42796 }, { "epoch": 3.4670285158781593, "grad_norm": 0.0866408497095108, "learning_rate": 2.9609793420045905e-05, "loss": 0.2357, "step": 42797 }, { "epoch": 3.467109526895658, "grad_norm": 0.06997352838516235, "learning_rate": 2.9605292767451282e-05, "loss": 0.207, "step": 42798 }, { "epoch": 3.4671905379131562, "grad_norm": 0.0892815962433815, "learning_rate": 2.9600792114856656e-05, "loss": 0.2432, "step": 42799 }, { "epoch": 3.4672715489306545, "grad_norm": 0.05779581516981125, "learning_rate": 2.9596291462262026e-05, "loss": 0.242, "step": 42800 }, { "epoch": 3.4673525599481527, "grad_norm": 0.07875336706638336, "learning_rate": 2.9591790809667403e-05, "loss": 0.2588, "step": 42801 }, { "epoch": 3.4674335709656514, "grad_norm": 0.06230032816529274, "learning_rate": 2.9587290157072776e-05, "loss": 0.2007, "step": 42802 }, { "epoch": 3.4675145819831497, "grad_norm": 0.0884285494685173, "learning_rate": 2.9582789504478153e-05, "loss": 0.2151, "step": 42803 }, { "epoch": 3.467595593000648, "grad_norm": 0.07607505470514297, "learning_rate": 2.9578288851883524e-05, "loss": 0.2268, "step": 42804 }, { "epoch": 3.4676766040181466, "grad_norm": 0.06472492963075638, "learning_rate": 2.9573788199288897e-05, "loss": 0.2265, "step": 42805 }, { "epoch": 3.467757615035645, "grad_norm": 0.06241315230727196, "learning_rate": 2.9569287546694274e-05, "loss": 0.2371, "step": 42806 }, { "epoch": 3.467838626053143, "grad_norm": 0.0674692764878273, "learning_rate": 2.9564786894099644e-05, "loss": 0.2064, "step": 42807 }, { "epoch": 3.4679196370706418, "grad_norm": 0.06604752689599991, "learning_rate": 2.9560286241505018e-05, "loss": 0.2242, "step": 42808 }, { "epoch": 3.46800064808814, "grad_norm": 0.07254812121391296, "learning_rate": 2.9555785588910395e-05, "loss": 0.237, "step": 42809 }, { "epoch": 3.4680816591056383, "grad_norm": 0.05818432196974754, "learning_rate": 2.9551284936315765e-05, "loss": 0.2357, "step": 42810 }, { "epoch": 3.468162670123137, "grad_norm": 0.06459543853998184, "learning_rate": 2.9546784283721142e-05, "loss": 0.2142, "step": 42811 }, { "epoch": 3.468243681140635, "grad_norm": 0.06363950669765472, "learning_rate": 2.9542283631126516e-05, "loss": 0.2252, "step": 42812 }, { "epoch": 3.4683246921581334, "grad_norm": 0.07214115560054779, "learning_rate": 2.9537782978531886e-05, "loss": 0.1997, "step": 42813 }, { "epoch": 3.468405703175632, "grad_norm": 0.0833434909582138, "learning_rate": 2.9533282325937263e-05, "loss": 0.2266, "step": 42814 }, { "epoch": 3.4684867141931304, "grad_norm": 0.060628198087215424, "learning_rate": 2.9528781673342637e-05, "loss": 0.2256, "step": 42815 }, { "epoch": 3.4685677252106286, "grad_norm": 0.07649362087249756, "learning_rate": 2.9524281020748014e-05, "loss": 0.2218, "step": 42816 }, { "epoch": 3.468648736228127, "grad_norm": 0.05994151905179024, "learning_rate": 2.9519780368153384e-05, "loss": 0.1972, "step": 42817 }, { "epoch": 3.4687297472456255, "grad_norm": 0.05603544041514397, "learning_rate": 2.9515279715558757e-05, "loss": 0.1852, "step": 42818 }, { "epoch": 3.468810758263124, "grad_norm": 0.0875602439045906, "learning_rate": 2.9510779062964134e-05, "loss": 0.277, "step": 42819 }, { "epoch": 3.468891769280622, "grad_norm": 0.06956169009208679, "learning_rate": 2.9506278410369505e-05, "loss": 0.2408, "step": 42820 }, { "epoch": 3.4689727802981207, "grad_norm": 0.06525658071041107, "learning_rate": 2.9501777757774878e-05, "loss": 0.2127, "step": 42821 }, { "epoch": 3.469053791315619, "grad_norm": 0.053318217396736145, "learning_rate": 2.9497277105180255e-05, "loss": 0.2058, "step": 42822 }, { "epoch": 3.469134802333117, "grad_norm": 0.05750075727701187, "learning_rate": 2.9492776452585626e-05, "loss": 0.1984, "step": 42823 }, { "epoch": 3.4692158133506155, "grad_norm": 0.055032555013895035, "learning_rate": 2.9488275799991e-05, "loss": 0.2495, "step": 42824 }, { "epoch": 3.469296824368114, "grad_norm": 0.06822854280471802, "learning_rate": 2.9483775147396376e-05, "loss": 0.2897, "step": 42825 }, { "epoch": 3.4693778353856124, "grad_norm": 0.06751962006092072, "learning_rate": 2.9479274494801746e-05, "loss": 0.2156, "step": 42826 }, { "epoch": 3.4694588464031106, "grad_norm": 0.06314662098884583, "learning_rate": 2.947477384220712e-05, "loss": 0.2023, "step": 42827 }, { "epoch": 3.4695398574206093, "grad_norm": 0.07605284452438354, "learning_rate": 2.9470273189612497e-05, "loss": 0.2359, "step": 42828 }, { "epoch": 3.4696208684381076, "grad_norm": 0.07924285531044006, "learning_rate": 2.946577253701787e-05, "loss": 0.2344, "step": 42829 }, { "epoch": 3.469701879455606, "grad_norm": 0.07829609513282776, "learning_rate": 2.946127188442324e-05, "loss": 0.2456, "step": 42830 }, { "epoch": 3.4697828904731045, "grad_norm": 0.07626158744096756, "learning_rate": 2.9456771231828618e-05, "loss": 0.2074, "step": 42831 }, { "epoch": 3.4698639014906028, "grad_norm": 0.0745619535446167, "learning_rate": 2.945227057923399e-05, "loss": 0.2177, "step": 42832 }, { "epoch": 3.469944912508101, "grad_norm": 0.06465175747871399, "learning_rate": 2.944776992663936e-05, "loss": 0.207, "step": 42833 }, { "epoch": 3.4700259235255997, "grad_norm": 0.06583263725042343, "learning_rate": 2.944326927404474e-05, "loss": 0.2152, "step": 42834 }, { "epoch": 3.470106934543098, "grad_norm": 0.07880668342113495, "learning_rate": 2.9438768621450112e-05, "loss": 0.2204, "step": 42835 }, { "epoch": 3.470187945560596, "grad_norm": 0.08511168509721756, "learning_rate": 2.9434267968855482e-05, "loss": 0.2511, "step": 42836 }, { "epoch": 3.470268956578095, "grad_norm": 0.0641389861702919, "learning_rate": 2.942976731626086e-05, "loss": 0.2086, "step": 42837 }, { "epoch": 3.470349967595593, "grad_norm": 0.06406936794519424, "learning_rate": 2.9425266663666233e-05, "loss": 0.2147, "step": 42838 }, { "epoch": 3.4704309786130914, "grad_norm": 0.1066378578543663, "learning_rate": 2.9420766011071603e-05, "loss": 0.2282, "step": 42839 }, { "epoch": 3.4705119896305896, "grad_norm": 0.10814861208200455, "learning_rate": 2.941626535847698e-05, "loss": 0.2357, "step": 42840 }, { "epoch": 3.4705930006480883, "grad_norm": 0.06493860483169556, "learning_rate": 2.9411764705882354e-05, "loss": 0.1892, "step": 42841 }, { "epoch": 3.4706740116655865, "grad_norm": 0.06477928161621094, "learning_rate": 2.940726405328773e-05, "loss": 0.2015, "step": 42842 }, { "epoch": 3.470755022683085, "grad_norm": 0.057188913226127625, "learning_rate": 2.94027634006931e-05, "loss": 0.2055, "step": 42843 }, { "epoch": 3.4708360337005835, "grad_norm": 0.06894127279520035, "learning_rate": 2.9398262748098475e-05, "loss": 0.241, "step": 42844 }, { "epoch": 3.4709170447180817, "grad_norm": 0.07011944055557251, "learning_rate": 2.939376209550385e-05, "loss": 0.2181, "step": 42845 }, { "epoch": 3.47099805573558, "grad_norm": 0.07745971530675888, "learning_rate": 2.9389261442909222e-05, "loss": 0.2004, "step": 42846 }, { "epoch": 3.471079066753078, "grad_norm": 0.07370658218860626, "learning_rate": 2.93847607903146e-05, "loss": 0.2229, "step": 42847 }, { "epoch": 3.471160077770577, "grad_norm": 0.07393095642328262, "learning_rate": 2.9380260137719972e-05, "loss": 0.2218, "step": 42848 }, { "epoch": 3.471241088788075, "grad_norm": 0.07989127933979034, "learning_rate": 2.9375759485125343e-05, "loss": 0.2318, "step": 42849 }, { "epoch": 3.4713220998055734, "grad_norm": 0.07488720864057541, "learning_rate": 2.937125883253072e-05, "loss": 0.2146, "step": 42850 }, { "epoch": 3.471403110823072, "grad_norm": 0.057390645146369934, "learning_rate": 2.9366758179936093e-05, "loss": 0.2061, "step": 42851 }, { "epoch": 3.4714841218405703, "grad_norm": 0.05493978410959244, "learning_rate": 2.9362257527341463e-05, "loss": 0.2559, "step": 42852 }, { "epoch": 3.4715651328580686, "grad_norm": 0.056346312165260315, "learning_rate": 2.935775687474684e-05, "loss": 0.2003, "step": 42853 }, { "epoch": 3.4716461438755672, "grad_norm": 0.07699105143547058, "learning_rate": 2.9353256222152214e-05, "loss": 0.2161, "step": 42854 }, { "epoch": 3.4717271548930655, "grad_norm": 0.06203935667872429, "learning_rate": 2.934875556955759e-05, "loss": 0.2139, "step": 42855 }, { "epoch": 3.4718081659105637, "grad_norm": 0.06947055459022522, "learning_rate": 2.934425491696296e-05, "loss": 0.2353, "step": 42856 }, { "epoch": 3.4718891769280624, "grad_norm": 0.07427409291267395, "learning_rate": 2.9339754264368335e-05, "loss": 0.2506, "step": 42857 }, { "epoch": 3.4719701879455607, "grad_norm": 0.0813642218708992, "learning_rate": 2.9335253611773712e-05, "loss": 0.2438, "step": 42858 }, { "epoch": 3.472051198963059, "grad_norm": 0.07532806694507599, "learning_rate": 2.9330752959179082e-05, "loss": 0.2136, "step": 42859 }, { "epoch": 3.4721322099805576, "grad_norm": 0.07544232904911041, "learning_rate": 2.9326252306584456e-05, "loss": 0.252, "step": 42860 }, { "epoch": 3.472213220998056, "grad_norm": 0.07405072450637817, "learning_rate": 2.9321751653989833e-05, "loss": 0.2137, "step": 42861 }, { "epoch": 3.472294232015554, "grad_norm": 0.07314892113208771, "learning_rate": 2.9317251001395203e-05, "loss": 0.2255, "step": 42862 }, { "epoch": 3.4723752430330523, "grad_norm": 0.0625855028629303, "learning_rate": 2.9312750348800576e-05, "loss": 0.2336, "step": 42863 }, { "epoch": 3.472456254050551, "grad_norm": 0.05610349029302597, "learning_rate": 2.9308249696205953e-05, "loss": 0.2248, "step": 42864 }, { "epoch": 3.4725372650680493, "grad_norm": 0.058661460876464844, "learning_rate": 2.9303749043611324e-05, "loss": 0.1841, "step": 42865 }, { "epoch": 3.4726182760855475, "grad_norm": 0.06500184535980225, "learning_rate": 2.9299248391016697e-05, "loss": 0.2377, "step": 42866 }, { "epoch": 3.472699287103046, "grad_norm": 0.07732019573450089, "learning_rate": 2.9294747738422074e-05, "loss": 0.2616, "step": 42867 }, { "epoch": 3.4727802981205445, "grad_norm": 0.07218549400568008, "learning_rate": 2.9290247085827448e-05, "loss": 0.2203, "step": 42868 }, { "epoch": 3.4728613091380427, "grad_norm": 0.07090730220079422, "learning_rate": 2.9285746433232818e-05, "loss": 0.2377, "step": 42869 }, { "epoch": 3.472942320155541, "grad_norm": 0.0621664933860302, "learning_rate": 2.9281245780638195e-05, "loss": 0.238, "step": 42870 }, { "epoch": 3.4730233311730396, "grad_norm": 0.08462394773960114, "learning_rate": 2.927674512804357e-05, "loss": 0.2387, "step": 42871 }, { "epoch": 3.473104342190538, "grad_norm": 0.0961620882153511, "learning_rate": 2.927224447544894e-05, "loss": 0.2399, "step": 42872 }, { "epoch": 3.473185353208036, "grad_norm": 0.07041720300912857, "learning_rate": 2.9267743822854316e-05, "loss": 0.1945, "step": 42873 }, { "epoch": 3.473266364225535, "grad_norm": 0.08104278892278671, "learning_rate": 2.926324317025969e-05, "loss": 0.2402, "step": 42874 }, { "epoch": 3.473347375243033, "grad_norm": 0.08936689049005508, "learning_rate": 2.925874251766506e-05, "loss": 0.2431, "step": 42875 }, { "epoch": 3.4734283862605313, "grad_norm": 0.057427264750003815, "learning_rate": 2.9254241865070437e-05, "loss": 0.1859, "step": 42876 }, { "epoch": 3.47350939727803, "grad_norm": 0.06501757353544235, "learning_rate": 2.924974121247581e-05, "loss": 0.2515, "step": 42877 }, { "epoch": 3.4735904082955282, "grad_norm": 0.06856517493724823, "learning_rate": 2.924524055988118e-05, "loss": 0.2219, "step": 42878 }, { "epoch": 3.4736714193130265, "grad_norm": 0.08182412385940552, "learning_rate": 2.9240739907286557e-05, "loss": 0.2265, "step": 42879 }, { "epoch": 3.473752430330525, "grad_norm": 0.08065382391214371, "learning_rate": 2.9236239254691934e-05, "loss": 0.2306, "step": 42880 }, { "epoch": 3.4738334413480234, "grad_norm": 0.08492909371852875, "learning_rate": 2.92317386020973e-05, "loss": 0.2243, "step": 42881 }, { "epoch": 3.4739144523655217, "grad_norm": 0.07409442961215973, "learning_rate": 2.9227237949502678e-05, "loss": 0.2079, "step": 42882 }, { "epoch": 3.47399546338302, "grad_norm": 0.06843043118715286, "learning_rate": 2.9222737296908055e-05, "loss": 0.2137, "step": 42883 }, { "epoch": 3.4740764744005186, "grad_norm": 0.08158732205629349, "learning_rate": 2.921823664431343e-05, "loss": 0.218, "step": 42884 }, { "epoch": 3.474157485418017, "grad_norm": 0.06149592250585556, "learning_rate": 2.92137359917188e-05, "loss": 0.2288, "step": 42885 }, { "epoch": 3.474238496435515, "grad_norm": 0.08045820891857147, "learning_rate": 2.9209235339124176e-05, "loss": 0.2193, "step": 42886 }, { "epoch": 3.4743195074530138, "grad_norm": 0.07448042184114456, "learning_rate": 2.920473468652955e-05, "loss": 0.2279, "step": 42887 }, { "epoch": 3.474400518470512, "grad_norm": 0.06777680665254593, "learning_rate": 2.920023403393492e-05, "loss": 0.2414, "step": 42888 }, { "epoch": 3.4744815294880103, "grad_norm": 0.07255148887634277, "learning_rate": 2.9195733381340297e-05, "loss": 0.2281, "step": 42889 }, { "epoch": 3.4745625405055085, "grad_norm": 0.0675332099199295, "learning_rate": 2.919123272874567e-05, "loss": 0.1977, "step": 42890 }, { "epoch": 3.474643551523007, "grad_norm": 0.07232514023780823, "learning_rate": 2.918673207615104e-05, "loss": 0.2118, "step": 42891 }, { "epoch": 3.4747245625405054, "grad_norm": 0.07749462872743607, "learning_rate": 2.9182231423556418e-05, "loss": 0.2486, "step": 42892 }, { "epoch": 3.4748055735580037, "grad_norm": 0.06873171776533127, "learning_rate": 2.917773077096179e-05, "loss": 0.2243, "step": 42893 }, { "epoch": 3.4748865845755024, "grad_norm": 0.06732004880905151, "learning_rate": 2.917323011836716e-05, "loss": 0.2039, "step": 42894 }, { "epoch": 3.4749675955930006, "grad_norm": 0.0846250131726265, "learning_rate": 2.916872946577254e-05, "loss": 0.2103, "step": 42895 }, { "epoch": 3.475048606610499, "grad_norm": 0.07390343397855759, "learning_rate": 2.9164228813177912e-05, "loss": 0.1858, "step": 42896 }, { "epoch": 3.4751296176279975, "grad_norm": 0.06746575236320496, "learning_rate": 2.915972816058329e-05, "loss": 0.2331, "step": 42897 }, { "epoch": 3.475210628645496, "grad_norm": 0.08225786685943604, "learning_rate": 2.915522750798866e-05, "loss": 0.2094, "step": 42898 }, { "epoch": 3.475291639662994, "grad_norm": 0.06650905311107635, "learning_rate": 2.9150726855394033e-05, "loss": 0.2149, "step": 42899 }, { "epoch": 3.4753726506804927, "grad_norm": 0.06720145791769028, "learning_rate": 2.914622620279941e-05, "loss": 0.2508, "step": 42900 }, { "epoch": 3.475453661697991, "grad_norm": 0.06689774245023727, "learning_rate": 2.914172555020478e-05, "loss": 0.2164, "step": 42901 }, { "epoch": 3.475534672715489, "grad_norm": 0.06267905980348587, "learning_rate": 2.9137224897610154e-05, "loss": 0.2223, "step": 42902 }, { "epoch": 3.475615683732988, "grad_norm": 0.0679597407579422, "learning_rate": 2.913272424501553e-05, "loss": 0.208, "step": 42903 }, { "epoch": 3.475696694750486, "grad_norm": 0.07159747183322906, "learning_rate": 2.91282235924209e-05, "loss": 0.2249, "step": 42904 }, { "epoch": 3.4757777057679844, "grad_norm": 0.06298567354679108, "learning_rate": 2.9123722939826275e-05, "loss": 0.2318, "step": 42905 }, { "epoch": 3.4758587167854826, "grad_norm": 0.0710611492395401, "learning_rate": 2.911922228723165e-05, "loss": 0.2177, "step": 42906 }, { "epoch": 3.4759397278029813, "grad_norm": 0.05858425050973892, "learning_rate": 2.9114721634637022e-05, "loss": 0.2387, "step": 42907 }, { "epoch": 3.4760207388204796, "grad_norm": 0.07267910987138748, "learning_rate": 2.9110220982042395e-05, "loss": 0.2312, "step": 42908 }, { "epoch": 3.476101749837978, "grad_norm": 0.06025031954050064, "learning_rate": 2.9105720329447772e-05, "loss": 0.2027, "step": 42909 }, { "epoch": 3.4761827608554765, "grad_norm": 0.0847097858786583, "learning_rate": 2.9101219676853146e-05, "loss": 0.2421, "step": 42910 }, { "epoch": 3.4762637718729748, "grad_norm": 0.08266802877187729, "learning_rate": 2.9096719024258516e-05, "loss": 0.2149, "step": 42911 }, { "epoch": 3.476344782890473, "grad_norm": 0.06554539501667023, "learning_rate": 2.9092218371663893e-05, "loss": 0.2361, "step": 42912 }, { "epoch": 3.4764257939079712, "grad_norm": 0.06395899504423141, "learning_rate": 2.908771771906927e-05, "loss": 0.2227, "step": 42913 }, { "epoch": 3.47650680492547, "grad_norm": 0.06811178475618362, "learning_rate": 2.9083217066474637e-05, "loss": 0.2145, "step": 42914 }, { "epoch": 3.476587815942968, "grad_norm": 0.06188002973794937, "learning_rate": 2.9078716413880014e-05, "loss": 0.2076, "step": 42915 }, { "epoch": 3.4766688269604664, "grad_norm": 0.08311402797698975, "learning_rate": 2.907421576128539e-05, "loss": 0.2615, "step": 42916 }, { "epoch": 3.476749837977965, "grad_norm": 0.07584516704082489, "learning_rate": 2.9069715108690758e-05, "loss": 0.232, "step": 42917 }, { "epoch": 3.4768308489954634, "grad_norm": 0.0709504559636116, "learning_rate": 2.9065214456096135e-05, "loss": 0.2111, "step": 42918 }, { "epoch": 3.4769118600129616, "grad_norm": 0.07124251127243042, "learning_rate": 2.9060713803501512e-05, "loss": 0.2293, "step": 42919 }, { "epoch": 3.4769928710304603, "grad_norm": 0.06938184797763824, "learning_rate": 2.905621315090688e-05, "loss": 0.214, "step": 42920 }, { "epoch": 3.4770738820479585, "grad_norm": 0.06147418171167374, "learning_rate": 2.9051712498312256e-05, "loss": 0.2575, "step": 42921 }, { "epoch": 3.4771548930654568, "grad_norm": 0.0724051296710968, "learning_rate": 2.9047211845717633e-05, "loss": 0.2184, "step": 42922 }, { "epoch": 3.4772359040829555, "grad_norm": 0.08725505322217941, "learning_rate": 2.9042711193123006e-05, "loss": 0.2466, "step": 42923 }, { "epoch": 3.4773169151004537, "grad_norm": 0.07237666845321655, "learning_rate": 2.9038210540528376e-05, "loss": 0.2514, "step": 42924 }, { "epoch": 3.477397926117952, "grad_norm": 0.06182453781366348, "learning_rate": 2.9033709887933753e-05, "loss": 0.2208, "step": 42925 }, { "epoch": 3.4774789371354506, "grad_norm": 0.07542017102241516, "learning_rate": 2.9029209235339127e-05, "loss": 0.261, "step": 42926 }, { "epoch": 3.477559948152949, "grad_norm": 0.06871870160102844, "learning_rate": 2.9024708582744497e-05, "loss": 0.2279, "step": 42927 }, { "epoch": 3.477640959170447, "grad_norm": 0.061738964170217514, "learning_rate": 2.9020207930149874e-05, "loss": 0.2213, "step": 42928 }, { "epoch": 3.4777219701879454, "grad_norm": 0.06986607611179352, "learning_rate": 2.9015707277555248e-05, "loss": 0.2009, "step": 42929 }, { "epoch": 3.477802981205444, "grad_norm": 0.10162478685379028, "learning_rate": 2.9011206624960618e-05, "loss": 0.2752, "step": 42930 }, { "epoch": 3.4778839922229423, "grad_norm": 0.07489980012178421, "learning_rate": 2.9006705972365995e-05, "loss": 0.2353, "step": 42931 }, { "epoch": 3.4779650032404406, "grad_norm": 0.0648399144411087, "learning_rate": 2.900220531977137e-05, "loss": 0.2228, "step": 42932 }, { "epoch": 3.4780460142579392, "grad_norm": 0.09491732716560364, "learning_rate": 2.899770466717674e-05, "loss": 0.2694, "step": 42933 }, { "epoch": 3.4781270252754375, "grad_norm": 0.07386920601129532, "learning_rate": 2.8993204014582116e-05, "loss": 0.2206, "step": 42934 }, { "epoch": 3.4782080362929357, "grad_norm": 0.08212792128324509, "learning_rate": 2.898870336198749e-05, "loss": 0.2421, "step": 42935 }, { "epoch": 3.478289047310434, "grad_norm": 0.0611363910138607, "learning_rate": 2.8984202709392866e-05, "loss": 0.2237, "step": 42936 }, { "epoch": 3.4783700583279327, "grad_norm": 0.07726096361875534, "learning_rate": 2.8979702056798237e-05, "loss": 0.277, "step": 42937 }, { "epoch": 3.478451069345431, "grad_norm": 0.05522330850362778, "learning_rate": 2.897520140420361e-05, "loss": 0.1983, "step": 42938 }, { "epoch": 3.478532080362929, "grad_norm": 0.07946809381246567, "learning_rate": 2.8970700751608987e-05, "loss": 0.1909, "step": 42939 }, { "epoch": 3.478613091380428, "grad_norm": 0.06915449351072311, "learning_rate": 2.8966200099014357e-05, "loss": 0.2124, "step": 42940 }, { "epoch": 3.478694102397926, "grad_norm": 0.0850968137383461, "learning_rate": 2.896169944641973e-05, "loss": 0.2116, "step": 42941 }, { "epoch": 3.4787751134154243, "grad_norm": 0.07038792967796326, "learning_rate": 2.8957198793825108e-05, "loss": 0.2848, "step": 42942 }, { "epoch": 3.478856124432923, "grad_norm": 0.06604676693677902, "learning_rate": 2.8952698141230478e-05, "loss": 0.1977, "step": 42943 }, { "epoch": 3.4789371354504213, "grad_norm": 0.09424113482236862, "learning_rate": 2.8948197488635852e-05, "loss": 0.2524, "step": 42944 }, { "epoch": 3.4790181464679195, "grad_norm": 0.08853418380022049, "learning_rate": 2.894369683604123e-05, "loss": 0.2285, "step": 42945 }, { "epoch": 3.479099157485418, "grad_norm": 0.0836792141199112, "learning_rate": 2.89391961834466e-05, "loss": 0.2105, "step": 42946 }, { "epoch": 3.4791801685029164, "grad_norm": 0.07371016591787338, "learning_rate": 2.8934695530851973e-05, "loss": 0.2769, "step": 42947 }, { "epoch": 3.4792611795204147, "grad_norm": 0.08501225709915161, "learning_rate": 2.893019487825735e-05, "loss": 0.2182, "step": 42948 }, { "epoch": 3.4793421905379134, "grad_norm": 0.07889309525489807, "learning_rate": 2.8925694225662727e-05, "loss": 0.2577, "step": 42949 }, { "epoch": 3.4794232015554116, "grad_norm": 0.07091579586267471, "learning_rate": 2.8921193573068093e-05, "loss": 0.2193, "step": 42950 }, { "epoch": 3.47950421257291, "grad_norm": 0.07333512604236603, "learning_rate": 2.891669292047347e-05, "loss": 0.1897, "step": 42951 }, { "epoch": 3.479585223590408, "grad_norm": 0.061077289283275604, "learning_rate": 2.8912192267878847e-05, "loss": 0.2442, "step": 42952 }, { "epoch": 3.479666234607907, "grad_norm": 0.0764218419790268, "learning_rate": 2.8907691615284214e-05, "loss": 0.2307, "step": 42953 }, { "epoch": 3.479747245625405, "grad_norm": 0.05201823636889458, "learning_rate": 2.890319096268959e-05, "loss": 0.2369, "step": 42954 }, { "epoch": 3.4798282566429033, "grad_norm": 0.07234396040439606, "learning_rate": 2.8898690310094968e-05, "loss": 0.223, "step": 42955 }, { "epoch": 3.479909267660402, "grad_norm": 0.07794514298439026, "learning_rate": 2.889418965750034e-05, "loss": 0.2579, "step": 42956 }, { "epoch": 3.4799902786779002, "grad_norm": 0.09208627790212631, "learning_rate": 2.8889689004905712e-05, "loss": 0.222, "step": 42957 }, { "epoch": 3.4800712896953985, "grad_norm": 0.09116863459348679, "learning_rate": 2.888518835231109e-05, "loss": 0.2219, "step": 42958 }, { "epoch": 3.4801523007128967, "grad_norm": 0.10785874724388123, "learning_rate": 2.888068769971646e-05, "loss": 0.238, "step": 42959 }, { "epoch": 3.4802333117303954, "grad_norm": 0.07596461474895477, "learning_rate": 2.8876187047121833e-05, "loss": 0.2486, "step": 42960 }, { "epoch": 3.4803143227478937, "grad_norm": 0.07258222997188568, "learning_rate": 2.887168639452721e-05, "loss": 0.2499, "step": 42961 }, { "epoch": 3.480395333765392, "grad_norm": 0.07630549371242523, "learning_rate": 2.8867185741932583e-05, "loss": 0.2674, "step": 42962 }, { "epoch": 3.4804763447828906, "grad_norm": 0.0784044861793518, "learning_rate": 2.8862685089337954e-05, "loss": 0.205, "step": 42963 }, { "epoch": 3.480557355800389, "grad_norm": 0.06193476542830467, "learning_rate": 2.885818443674333e-05, "loss": 0.1964, "step": 42964 }, { "epoch": 3.480638366817887, "grad_norm": 0.07275792956352234, "learning_rate": 2.8853683784148704e-05, "loss": 0.2368, "step": 42965 }, { "epoch": 3.4807193778353858, "grad_norm": 0.07585861533880234, "learning_rate": 2.8849183131554074e-05, "loss": 0.2616, "step": 42966 }, { "epoch": 3.480800388852884, "grad_norm": 0.06550201028585434, "learning_rate": 2.884468247895945e-05, "loss": 0.2009, "step": 42967 }, { "epoch": 3.4808813998703823, "grad_norm": 0.0743786096572876, "learning_rate": 2.8840181826364825e-05, "loss": 0.2461, "step": 42968 }, { "epoch": 3.480962410887881, "grad_norm": 0.0645429864525795, "learning_rate": 2.8835681173770195e-05, "loss": 0.2319, "step": 42969 }, { "epoch": 3.481043421905379, "grad_norm": 0.07213232666254044, "learning_rate": 2.8831180521175572e-05, "loss": 0.2039, "step": 42970 }, { "epoch": 3.4811244329228774, "grad_norm": 0.06800594180822372, "learning_rate": 2.8826679868580946e-05, "loss": 0.2208, "step": 42971 }, { "epoch": 3.481205443940376, "grad_norm": 0.06744236499071121, "learning_rate": 2.8822179215986316e-05, "loss": 0.2511, "step": 42972 }, { "epoch": 3.4812864549578744, "grad_norm": 0.07173527777194977, "learning_rate": 2.8817678563391693e-05, "loss": 0.2388, "step": 42973 }, { "epoch": 3.4813674659753726, "grad_norm": 0.07332679629325867, "learning_rate": 2.8813177910797067e-05, "loss": 0.2502, "step": 42974 }, { "epoch": 3.481448476992871, "grad_norm": 0.0669303610920906, "learning_rate": 2.8808677258202444e-05, "loss": 0.2514, "step": 42975 }, { "epoch": 3.4815294880103695, "grad_norm": 0.07635664939880371, "learning_rate": 2.8804176605607814e-05, "loss": 0.2145, "step": 42976 }, { "epoch": 3.481610499027868, "grad_norm": 0.06254716217517853, "learning_rate": 2.8799675953013188e-05, "loss": 0.2254, "step": 42977 }, { "epoch": 3.481691510045366, "grad_norm": 0.0803760215640068, "learning_rate": 2.8795175300418565e-05, "loss": 0.2156, "step": 42978 }, { "epoch": 3.4817725210628647, "grad_norm": 0.07645880430936813, "learning_rate": 2.8790674647823935e-05, "loss": 0.2409, "step": 42979 }, { "epoch": 3.481853532080363, "grad_norm": 0.06853979080915451, "learning_rate": 2.878617399522931e-05, "loss": 0.2102, "step": 42980 }, { "epoch": 3.481934543097861, "grad_norm": 0.07574337720870972, "learning_rate": 2.8781673342634685e-05, "loss": 0.2162, "step": 42981 }, { "epoch": 3.4820155541153595, "grad_norm": 0.07896383106708527, "learning_rate": 2.8777172690040056e-05, "loss": 0.2535, "step": 42982 }, { "epoch": 3.482096565132858, "grad_norm": 0.07682851701974869, "learning_rate": 2.877267203744543e-05, "loss": 0.2731, "step": 42983 }, { "epoch": 3.4821775761503564, "grad_norm": 0.07519050687551498, "learning_rate": 2.8768171384850806e-05, "loss": 0.2795, "step": 42984 }, { "epoch": 3.4822585871678546, "grad_norm": 0.07776078581809998, "learning_rate": 2.8763670732256176e-05, "loss": 0.2527, "step": 42985 }, { "epoch": 3.4823395981853533, "grad_norm": 0.07055551558732986, "learning_rate": 2.875917007966155e-05, "loss": 0.2383, "step": 42986 }, { "epoch": 3.4824206092028516, "grad_norm": 0.06520364433526993, "learning_rate": 2.8754669427066927e-05, "loss": 0.2551, "step": 42987 }, { "epoch": 3.48250162022035, "grad_norm": 0.08810622990131378, "learning_rate": 2.8750168774472304e-05, "loss": 0.2372, "step": 42988 }, { "epoch": 3.4825826312378485, "grad_norm": 0.08560158312320709, "learning_rate": 2.874566812187767e-05, "loss": 0.2731, "step": 42989 }, { "epoch": 3.4826636422553467, "grad_norm": 0.07116416841745377, "learning_rate": 2.8741167469283048e-05, "loss": 0.2183, "step": 42990 }, { "epoch": 3.482744653272845, "grad_norm": 0.08705449104309082, "learning_rate": 2.8736666816688425e-05, "loss": 0.2435, "step": 42991 }, { "epoch": 3.4828256642903437, "grad_norm": 0.07582302391529083, "learning_rate": 2.8732166164093795e-05, "loss": 0.2204, "step": 42992 }, { "epoch": 3.482906675307842, "grad_norm": 0.05476190522313118, "learning_rate": 2.872766551149917e-05, "loss": 0.1792, "step": 42993 }, { "epoch": 3.48298768632534, "grad_norm": 0.07866176217794418, "learning_rate": 2.8723164858904546e-05, "loss": 0.2561, "step": 42994 }, { "epoch": 3.483068697342839, "grad_norm": 0.0897442102432251, "learning_rate": 2.8718664206309916e-05, "loss": 0.2552, "step": 42995 }, { "epoch": 3.483149708360337, "grad_norm": 0.07748270034790039, "learning_rate": 2.871416355371529e-05, "loss": 0.246, "step": 42996 }, { "epoch": 3.4832307193778353, "grad_norm": 0.07693302631378174, "learning_rate": 2.8709662901120666e-05, "loss": 0.2287, "step": 42997 }, { "epoch": 3.4833117303953336, "grad_norm": 0.07699127495288849, "learning_rate": 2.8705162248526037e-05, "loss": 0.2412, "step": 42998 }, { "epoch": 3.4833927414128323, "grad_norm": 0.0830434113740921, "learning_rate": 2.870066159593141e-05, "loss": 0.2375, "step": 42999 }, { "epoch": 3.4834737524303305, "grad_norm": 0.08280211687088013, "learning_rate": 2.8696160943336787e-05, "loss": 0.2231, "step": 43000 }, { "epoch": 3.4835547634478288, "grad_norm": 0.07986078411340714, "learning_rate": 2.869166029074216e-05, "loss": 0.2447, "step": 43001 }, { "epoch": 3.4836357744653275, "grad_norm": 0.061485256999731064, "learning_rate": 2.868715963814753e-05, "loss": 0.212, "step": 43002 }, { "epoch": 3.4837167854828257, "grad_norm": 0.06983110308647156, "learning_rate": 2.8682658985552908e-05, "loss": 0.2147, "step": 43003 }, { "epoch": 3.483797796500324, "grad_norm": 0.07056194543838501, "learning_rate": 2.867815833295828e-05, "loss": 0.245, "step": 43004 }, { "epoch": 3.483878807517822, "grad_norm": 0.06419837474822998, "learning_rate": 2.8673657680363652e-05, "loss": 0.1985, "step": 43005 }, { "epoch": 3.483959818535321, "grad_norm": 0.08814400434494019, "learning_rate": 2.866915702776903e-05, "loss": 0.2252, "step": 43006 }, { "epoch": 3.484040829552819, "grad_norm": 0.051433440297842026, "learning_rate": 2.8664656375174402e-05, "loss": 0.2354, "step": 43007 }, { "epoch": 3.4841218405703174, "grad_norm": 0.0714108794927597, "learning_rate": 2.8660155722579773e-05, "loss": 0.26, "step": 43008 }, { "epoch": 3.484202851587816, "grad_norm": 0.07795058935880661, "learning_rate": 2.865565506998515e-05, "loss": 0.2651, "step": 43009 }, { "epoch": 3.4842838626053143, "grad_norm": 0.08348504453897476, "learning_rate": 2.8651154417390523e-05, "loss": 0.2096, "step": 43010 }, { "epoch": 3.4843648736228126, "grad_norm": 0.07615520060062408, "learning_rate": 2.8646653764795893e-05, "loss": 0.2437, "step": 43011 }, { "epoch": 3.4844458846403112, "grad_norm": 0.061513423919677734, "learning_rate": 2.864215311220127e-05, "loss": 0.237, "step": 43012 }, { "epoch": 3.4845268956578095, "grad_norm": 0.06726951152086258, "learning_rate": 2.8637652459606644e-05, "loss": 0.2385, "step": 43013 }, { "epoch": 3.4846079066753077, "grad_norm": 0.05101010948419571, "learning_rate": 2.863315180701202e-05, "loss": 0.167, "step": 43014 }, { "epoch": 3.4846889176928064, "grad_norm": 0.07691899687051773, "learning_rate": 2.862865115441739e-05, "loss": 0.2779, "step": 43015 }, { "epoch": 3.4847699287103047, "grad_norm": 0.06863217055797577, "learning_rate": 2.8624150501822765e-05, "loss": 0.252, "step": 43016 }, { "epoch": 3.484850939727803, "grad_norm": 0.06520593166351318, "learning_rate": 2.8619649849228142e-05, "loss": 0.2115, "step": 43017 }, { "epoch": 3.4849319507453016, "grad_norm": 0.07795144617557526, "learning_rate": 2.8615149196633512e-05, "loss": 0.24, "step": 43018 }, { "epoch": 3.4850129617628, "grad_norm": 0.06358601897954941, "learning_rate": 2.8610648544038886e-05, "loss": 0.2066, "step": 43019 }, { "epoch": 3.485093972780298, "grad_norm": 0.06517867743968964, "learning_rate": 2.8606147891444263e-05, "loss": 0.2033, "step": 43020 }, { "epoch": 3.4851749837977963, "grad_norm": 0.07494281232357025, "learning_rate": 2.8601647238849633e-05, "loss": 0.2197, "step": 43021 }, { "epoch": 3.485255994815295, "grad_norm": 0.062105692923069, "learning_rate": 2.8597146586255006e-05, "loss": 0.1766, "step": 43022 }, { "epoch": 3.4853370058327933, "grad_norm": 0.07513219118118286, "learning_rate": 2.8592645933660383e-05, "loss": 0.2344, "step": 43023 }, { "epoch": 3.4854180168502915, "grad_norm": 0.06747517734766006, "learning_rate": 2.8588145281065754e-05, "loss": 0.1994, "step": 43024 }, { "epoch": 3.48549902786779, "grad_norm": 0.08939173817634583, "learning_rate": 2.858364462847113e-05, "loss": 0.2326, "step": 43025 }, { "epoch": 3.4855800388852884, "grad_norm": 0.0805366039276123, "learning_rate": 2.8579143975876504e-05, "loss": 0.2367, "step": 43026 }, { "epoch": 3.4856610499027867, "grad_norm": 0.07593267410993576, "learning_rate": 2.857464332328188e-05, "loss": 0.2276, "step": 43027 }, { "epoch": 3.485742060920285, "grad_norm": 0.07195086777210236, "learning_rate": 2.857014267068725e-05, "loss": 0.24, "step": 43028 }, { "epoch": 3.4858230719377836, "grad_norm": 0.07583434879779816, "learning_rate": 2.8565642018092625e-05, "loss": 0.2191, "step": 43029 }, { "epoch": 3.485904082955282, "grad_norm": 0.08318190276622772, "learning_rate": 2.8561141365498002e-05, "loss": 0.2208, "step": 43030 }, { "epoch": 3.48598509397278, "grad_norm": 0.06685812771320343, "learning_rate": 2.8556640712903372e-05, "loss": 0.2368, "step": 43031 }, { "epoch": 3.486066104990279, "grad_norm": 0.062124937772750854, "learning_rate": 2.8552140060308746e-05, "loss": 0.2145, "step": 43032 }, { "epoch": 3.486147116007777, "grad_norm": 0.07486393302679062, "learning_rate": 2.8547639407714123e-05, "loss": 0.2283, "step": 43033 }, { "epoch": 3.4862281270252753, "grad_norm": 0.07032036036252975, "learning_rate": 2.8543138755119493e-05, "loss": 0.2012, "step": 43034 }, { "epoch": 3.486309138042774, "grad_norm": 0.07372421026229858, "learning_rate": 2.8538638102524867e-05, "loss": 0.2715, "step": 43035 }, { "epoch": 3.4863901490602722, "grad_norm": 0.06832101941108704, "learning_rate": 2.8534137449930244e-05, "loss": 0.2423, "step": 43036 }, { "epoch": 3.4864711600777705, "grad_norm": 0.07774647325277328, "learning_rate": 2.8529636797335614e-05, "loss": 0.1948, "step": 43037 }, { "epoch": 3.486552171095269, "grad_norm": 0.07262618094682693, "learning_rate": 2.8525136144740987e-05, "loss": 0.2192, "step": 43038 }, { "epoch": 3.4866331821127674, "grad_norm": 0.07207204401493073, "learning_rate": 2.8520635492146364e-05, "loss": 0.2241, "step": 43039 }, { "epoch": 3.4867141931302656, "grad_norm": 0.07984203100204468, "learning_rate": 2.8516134839551735e-05, "loss": 0.2085, "step": 43040 }, { "epoch": 3.4867952041477643, "grad_norm": 0.07520009577274323, "learning_rate": 2.8511634186957108e-05, "loss": 0.2282, "step": 43041 }, { "epoch": 3.4868762151652626, "grad_norm": 0.07236367464065552, "learning_rate": 2.8507133534362485e-05, "loss": 0.2311, "step": 43042 }, { "epoch": 3.486957226182761, "grad_norm": 0.09670182317495346, "learning_rate": 2.850263288176786e-05, "loss": 0.2533, "step": 43043 }, { "epoch": 3.487038237200259, "grad_norm": 0.060888949781656265, "learning_rate": 2.849813222917323e-05, "loss": 0.223, "step": 43044 }, { "epoch": 3.4871192482177578, "grad_norm": 0.07887989282608032, "learning_rate": 2.8493631576578606e-05, "loss": 0.2212, "step": 43045 }, { "epoch": 3.487200259235256, "grad_norm": 0.06833384931087494, "learning_rate": 2.848913092398398e-05, "loss": 0.1889, "step": 43046 }, { "epoch": 3.4872812702527543, "grad_norm": 0.06222878396511078, "learning_rate": 2.848463027138935e-05, "loss": 0.1947, "step": 43047 }, { "epoch": 3.487362281270253, "grad_norm": 0.0773792639374733, "learning_rate": 2.8480129618794727e-05, "loss": 0.2441, "step": 43048 }, { "epoch": 3.487443292287751, "grad_norm": 0.06242474913597107, "learning_rate": 2.84756289662001e-05, "loss": 0.2325, "step": 43049 }, { "epoch": 3.4875243033052494, "grad_norm": 0.06994125247001648, "learning_rate": 2.847112831360547e-05, "loss": 0.204, "step": 43050 }, { "epoch": 3.4876053143227477, "grad_norm": 0.07311102002859116, "learning_rate": 2.8466627661010848e-05, "loss": 0.2739, "step": 43051 }, { "epoch": 3.4876863253402464, "grad_norm": 0.08544214069843292, "learning_rate": 2.846212700841622e-05, "loss": 0.2583, "step": 43052 }, { "epoch": 3.4877673363577446, "grad_norm": 0.07062707096338272, "learning_rate": 2.845762635582159e-05, "loss": 0.2504, "step": 43053 }, { "epoch": 3.487848347375243, "grad_norm": 0.076682910323143, "learning_rate": 2.845312570322697e-05, "loss": 0.2503, "step": 43054 }, { "epoch": 3.4879293583927415, "grad_norm": 0.07357282936573029, "learning_rate": 2.8448625050632342e-05, "loss": 0.2166, "step": 43055 }, { "epoch": 3.48801036941024, "grad_norm": 0.059687837958335876, "learning_rate": 2.844412439803772e-05, "loss": 0.2396, "step": 43056 }, { "epoch": 3.488091380427738, "grad_norm": 0.06864503026008606, "learning_rate": 2.843962374544309e-05, "loss": 0.237, "step": 43057 }, { "epoch": 3.4881723914452367, "grad_norm": 0.05564036965370178, "learning_rate": 2.8435123092848466e-05, "loss": 0.2285, "step": 43058 }, { "epoch": 3.488253402462735, "grad_norm": 0.07747586816549301, "learning_rate": 2.843062244025384e-05, "loss": 0.2396, "step": 43059 }, { "epoch": 3.488334413480233, "grad_norm": 0.08911630511283875, "learning_rate": 2.842612178765921e-05, "loss": 0.2395, "step": 43060 }, { "epoch": 3.488415424497732, "grad_norm": 0.08266628533601761, "learning_rate": 2.8421621135064587e-05, "loss": 0.2303, "step": 43061 }, { "epoch": 3.48849643551523, "grad_norm": 0.0728074312210083, "learning_rate": 2.841712048246996e-05, "loss": 0.2531, "step": 43062 }, { "epoch": 3.4885774465327284, "grad_norm": 0.07155589014291763, "learning_rate": 2.841261982987533e-05, "loss": 0.2534, "step": 43063 }, { "epoch": 3.488658457550227, "grad_norm": 0.07294462621212006, "learning_rate": 2.8408119177280708e-05, "loss": 0.2265, "step": 43064 }, { "epoch": 3.4887394685677253, "grad_norm": 0.06267351657152176, "learning_rate": 2.840361852468608e-05, "loss": 0.1909, "step": 43065 }, { "epoch": 3.4888204795852236, "grad_norm": 0.07813186943531036, "learning_rate": 2.8399117872091452e-05, "loss": 0.265, "step": 43066 }, { "epoch": 3.488901490602722, "grad_norm": 0.055375028401613235, "learning_rate": 2.839461721949683e-05, "loss": 0.1829, "step": 43067 }, { "epoch": 3.4889825016202205, "grad_norm": 0.07581090927124023, "learning_rate": 2.8390116566902202e-05, "loss": 0.2428, "step": 43068 }, { "epoch": 3.4890635126377187, "grad_norm": 0.07282670587301254, "learning_rate": 2.838561591430758e-05, "loss": 0.2169, "step": 43069 }, { "epoch": 3.489144523655217, "grad_norm": 0.08286745846271515, "learning_rate": 2.838111526171295e-05, "loss": 0.2237, "step": 43070 }, { "epoch": 3.4892255346727157, "grad_norm": 0.07731908559799194, "learning_rate": 2.8376614609118323e-05, "loss": 0.1856, "step": 43071 }, { "epoch": 3.489306545690214, "grad_norm": 0.07564588636159897, "learning_rate": 2.83721139565237e-05, "loss": 0.2365, "step": 43072 }, { "epoch": 3.489387556707712, "grad_norm": 0.07171668857336044, "learning_rate": 2.836761330392907e-05, "loss": 0.2576, "step": 43073 }, { "epoch": 3.4894685677252104, "grad_norm": 0.06701047718524933, "learning_rate": 2.8363112651334444e-05, "loss": 0.2432, "step": 43074 }, { "epoch": 3.489549578742709, "grad_norm": 0.0751776471734047, "learning_rate": 2.835861199873982e-05, "loss": 0.2124, "step": 43075 }, { "epoch": 3.4896305897602073, "grad_norm": 0.06368733197450638, "learning_rate": 2.835411134614519e-05, "loss": 0.2255, "step": 43076 }, { "epoch": 3.4897116007777056, "grad_norm": 0.06492561846971512, "learning_rate": 2.8349610693550565e-05, "loss": 0.1783, "step": 43077 }, { "epoch": 3.4897926117952043, "grad_norm": 0.0625244602560997, "learning_rate": 2.8345110040955942e-05, "loss": 0.1977, "step": 43078 }, { "epoch": 3.4898736228127025, "grad_norm": 0.07491852343082428, "learning_rate": 2.8340609388361312e-05, "loss": 0.2121, "step": 43079 }, { "epoch": 3.4899546338302008, "grad_norm": 0.06318768858909607, "learning_rate": 2.8336108735766686e-05, "loss": 0.191, "step": 43080 }, { "epoch": 3.4900356448476995, "grad_norm": 0.07628298550844193, "learning_rate": 2.8331608083172063e-05, "loss": 0.2084, "step": 43081 }, { "epoch": 3.4901166558651977, "grad_norm": 0.09012982994318008, "learning_rate": 2.8327107430577436e-05, "loss": 0.2461, "step": 43082 }, { "epoch": 3.490197666882696, "grad_norm": 0.07702943682670593, "learning_rate": 2.8322606777982806e-05, "loss": 0.2149, "step": 43083 }, { "epoch": 3.4902786779001946, "grad_norm": 0.06603427976369858, "learning_rate": 2.8318106125388183e-05, "loss": 0.2223, "step": 43084 }, { "epoch": 3.490359688917693, "grad_norm": 0.06976785510778427, "learning_rate": 2.8313605472793557e-05, "loss": 0.2085, "step": 43085 }, { "epoch": 3.490440699935191, "grad_norm": 0.05959775671362877, "learning_rate": 2.8309104820198927e-05, "loss": 0.2169, "step": 43086 }, { "epoch": 3.4905217109526894, "grad_norm": 0.06909187883138657, "learning_rate": 2.8304604167604304e-05, "loss": 0.218, "step": 43087 }, { "epoch": 3.490602721970188, "grad_norm": 0.08458421379327774, "learning_rate": 2.8300103515009678e-05, "loss": 0.2573, "step": 43088 }, { "epoch": 3.4906837329876863, "grad_norm": 0.06887239217758179, "learning_rate": 2.8295602862415048e-05, "loss": 0.2332, "step": 43089 }, { "epoch": 3.4907647440051845, "grad_norm": 0.08373768627643585, "learning_rate": 2.8291102209820425e-05, "loss": 0.2235, "step": 43090 }, { "epoch": 3.4908457550226832, "grad_norm": 0.0646241307258606, "learning_rate": 2.82866015572258e-05, "loss": 0.1888, "step": 43091 }, { "epoch": 3.4909267660401815, "grad_norm": 0.06446173042058945, "learning_rate": 2.828210090463117e-05, "loss": 0.1878, "step": 43092 }, { "epoch": 3.4910077770576797, "grad_norm": 0.07939767837524414, "learning_rate": 2.8277600252036546e-05, "loss": 0.2235, "step": 43093 }, { "epoch": 3.4910887880751784, "grad_norm": 0.07496839016675949, "learning_rate": 2.8273099599441923e-05, "loss": 0.1852, "step": 43094 }, { "epoch": 3.4911697990926767, "grad_norm": 0.08608748018741608, "learning_rate": 2.8268598946847296e-05, "loss": 0.2398, "step": 43095 }, { "epoch": 3.491250810110175, "grad_norm": 0.07695958018302917, "learning_rate": 2.8264098294252667e-05, "loss": 0.2398, "step": 43096 }, { "epoch": 3.491331821127673, "grad_norm": 0.07370971143245697, "learning_rate": 2.8259597641658044e-05, "loss": 0.2336, "step": 43097 }, { "epoch": 3.491412832145172, "grad_norm": 0.08485149592161179, "learning_rate": 2.8255096989063417e-05, "loss": 0.2637, "step": 43098 }, { "epoch": 3.49149384316267, "grad_norm": 0.07185976207256317, "learning_rate": 2.8250596336468787e-05, "loss": 0.238, "step": 43099 }, { "epoch": 3.4915748541801683, "grad_norm": 0.06783389300107956, "learning_rate": 2.8246095683874164e-05, "loss": 0.1895, "step": 43100 }, { "epoch": 3.491655865197667, "grad_norm": 0.07479429244995117, "learning_rate": 2.8241595031279538e-05, "loss": 0.2595, "step": 43101 }, { "epoch": 3.4917368762151653, "grad_norm": 0.08388431370258331, "learning_rate": 2.8237094378684908e-05, "loss": 0.2347, "step": 43102 }, { "epoch": 3.4918178872326635, "grad_norm": 0.07217047363519669, "learning_rate": 2.8232593726090285e-05, "loss": 0.2323, "step": 43103 }, { "epoch": 3.491898898250162, "grad_norm": 0.07123685628175735, "learning_rate": 2.822809307349566e-05, "loss": 0.3135, "step": 43104 }, { "epoch": 3.4919799092676604, "grad_norm": 0.06716802716255188, "learning_rate": 2.822359242090103e-05, "loss": 0.1935, "step": 43105 }, { "epoch": 3.4920609202851587, "grad_norm": 0.06284788250923157, "learning_rate": 2.8219091768306406e-05, "loss": 0.204, "step": 43106 }, { "epoch": 3.4921419313026574, "grad_norm": 0.0679827407002449, "learning_rate": 2.821459111571178e-05, "loss": 0.2349, "step": 43107 }, { "epoch": 3.4922229423201556, "grad_norm": 0.07266151905059814, "learning_rate": 2.8210090463117157e-05, "loss": 0.2226, "step": 43108 }, { "epoch": 3.492303953337654, "grad_norm": 0.08697673678398132, "learning_rate": 2.8205589810522527e-05, "loss": 0.255, "step": 43109 }, { "epoch": 3.492384964355152, "grad_norm": 0.0750865638256073, "learning_rate": 2.82010891579279e-05, "loss": 0.2417, "step": 43110 }, { "epoch": 3.492465975372651, "grad_norm": 0.07341280579566956, "learning_rate": 2.8196588505333277e-05, "loss": 0.2504, "step": 43111 }, { "epoch": 3.492546986390149, "grad_norm": 0.07102368026971817, "learning_rate": 2.8192087852738648e-05, "loss": 0.2217, "step": 43112 }, { "epoch": 3.4926279974076473, "grad_norm": 0.06863326579332352, "learning_rate": 2.818758720014402e-05, "loss": 0.1962, "step": 43113 }, { "epoch": 3.492709008425146, "grad_norm": 0.0682842880487442, "learning_rate": 2.8183086547549398e-05, "loss": 0.2033, "step": 43114 }, { "epoch": 3.4927900194426442, "grad_norm": 0.07887216657400131, "learning_rate": 2.817858589495477e-05, "loss": 0.2345, "step": 43115 }, { "epoch": 3.4928710304601425, "grad_norm": 0.08103575557470322, "learning_rate": 2.8174085242360142e-05, "loss": 0.2348, "step": 43116 }, { "epoch": 3.4929520414776407, "grad_norm": 0.07701174169778824, "learning_rate": 2.816958458976552e-05, "loss": 0.2695, "step": 43117 }, { "epoch": 3.4930330524951394, "grad_norm": 0.06716512143611908, "learning_rate": 2.816508393717089e-05, "loss": 0.224, "step": 43118 }, { "epoch": 3.4931140635126376, "grad_norm": 0.08580772578716278, "learning_rate": 2.8160583284576263e-05, "loss": 0.2444, "step": 43119 }, { "epoch": 3.493195074530136, "grad_norm": 0.06725557148456573, "learning_rate": 2.815608263198164e-05, "loss": 0.2023, "step": 43120 }, { "epoch": 3.4932760855476346, "grad_norm": 0.06230286508798599, "learning_rate": 2.8151581979387014e-05, "loss": 0.2167, "step": 43121 }, { "epoch": 3.493357096565133, "grad_norm": 0.08195045590400696, "learning_rate": 2.8147081326792384e-05, "loss": 0.2101, "step": 43122 }, { "epoch": 3.493438107582631, "grad_norm": 0.06331407278776169, "learning_rate": 2.814258067419776e-05, "loss": 0.2322, "step": 43123 }, { "epoch": 3.4935191186001298, "grad_norm": 0.07344713062047958, "learning_rate": 2.8138080021603134e-05, "loss": 0.2243, "step": 43124 }, { "epoch": 3.493600129617628, "grad_norm": 0.07891742140054703, "learning_rate": 2.8133579369008505e-05, "loss": 0.2219, "step": 43125 }, { "epoch": 3.4936811406351262, "grad_norm": 0.06340258568525314, "learning_rate": 2.812907871641388e-05, "loss": 0.23, "step": 43126 }, { "epoch": 3.493762151652625, "grad_norm": 0.07598544657230377, "learning_rate": 2.812457806381926e-05, "loss": 0.247, "step": 43127 }, { "epoch": 3.493843162670123, "grad_norm": 0.07023898512125015, "learning_rate": 2.8120077411224625e-05, "loss": 0.2427, "step": 43128 }, { "epoch": 3.4939241736876214, "grad_norm": 0.06609073281288147, "learning_rate": 2.8115576758630002e-05, "loss": 0.1982, "step": 43129 }, { "epoch": 3.49400518470512, "grad_norm": 0.0786251425743103, "learning_rate": 2.811107610603538e-05, "loss": 0.2497, "step": 43130 }, { "epoch": 3.4940861957226184, "grad_norm": 0.0619148351252079, "learning_rate": 2.8106575453440746e-05, "loss": 0.2363, "step": 43131 }, { "epoch": 3.4941672067401166, "grad_norm": 0.06817974150180817, "learning_rate": 2.8102074800846123e-05, "loss": 0.2371, "step": 43132 }, { "epoch": 3.494248217757615, "grad_norm": 0.053594715893268585, "learning_rate": 2.80975741482515e-05, "loss": 0.1976, "step": 43133 }, { "epoch": 3.4943292287751135, "grad_norm": 0.07943201065063477, "learning_rate": 2.8093073495656874e-05, "loss": 0.2033, "step": 43134 }, { "epoch": 3.494410239792612, "grad_norm": 0.07399984449148178, "learning_rate": 2.8088572843062244e-05, "loss": 0.2317, "step": 43135 }, { "epoch": 3.49449125081011, "grad_norm": 0.07094179093837738, "learning_rate": 2.808407219046762e-05, "loss": 0.2202, "step": 43136 }, { "epoch": 3.4945722618276087, "grad_norm": 0.08724545687437057, "learning_rate": 2.8079571537872995e-05, "loss": 0.253, "step": 43137 }, { "epoch": 3.494653272845107, "grad_norm": 0.07042510062456131, "learning_rate": 2.8075070885278365e-05, "loss": 0.1907, "step": 43138 }, { "epoch": 3.494734283862605, "grad_norm": 0.08044608682394028, "learning_rate": 2.8070570232683742e-05, "loss": 0.243, "step": 43139 }, { "epoch": 3.4948152948801035, "grad_norm": 0.062232423573732376, "learning_rate": 2.8066069580089115e-05, "loss": 0.2121, "step": 43140 }, { "epoch": 3.494896305897602, "grad_norm": 0.07762039452791214, "learning_rate": 2.8061568927494486e-05, "loss": 0.2481, "step": 43141 }, { "epoch": 3.4949773169151004, "grad_norm": 0.0662919357419014, "learning_rate": 2.8057068274899863e-05, "loss": 0.2062, "step": 43142 }, { "epoch": 3.4950583279325986, "grad_norm": 0.08298102766275406, "learning_rate": 2.8052567622305236e-05, "loss": 0.2362, "step": 43143 }, { "epoch": 3.4951393389500973, "grad_norm": 0.08471732586622238, "learning_rate": 2.8048066969710606e-05, "loss": 0.2174, "step": 43144 }, { "epoch": 3.4952203499675956, "grad_norm": 0.06788258254528046, "learning_rate": 2.8043566317115983e-05, "loss": 0.2386, "step": 43145 }, { "epoch": 3.495301360985094, "grad_norm": 0.07684889435768127, "learning_rate": 2.8039065664521357e-05, "loss": 0.2072, "step": 43146 }, { "epoch": 3.4953823720025925, "grad_norm": 0.07309133559465408, "learning_rate": 2.8034565011926734e-05, "loss": 0.2119, "step": 43147 }, { "epoch": 3.4954633830200907, "grad_norm": 0.08018827438354492, "learning_rate": 2.8030064359332104e-05, "loss": 0.2341, "step": 43148 }, { "epoch": 3.495544394037589, "grad_norm": 0.0783516988158226, "learning_rate": 2.8025563706737478e-05, "loss": 0.2104, "step": 43149 }, { "epoch": 3.4956254050550877, "grad_norm": 0.07192262262105942, "learning_rate": 2.8021063054142855e-05, "loss": 0.2282, "step": 43150 }, { "epoch": 3.495706416072586, "grad_norm": 0.07899194210767746, "learning_rate": 2.8016562401548225e-05, "loss": 0.2352, "step": 43151 }, { "epoch": 3.495787427090084, "grad_norm": 0.07733827829360962, "learning_rate": 2.80120617489536e-05, "loss": 0.2129, "step": 43152 }, { "epoch": 3.495868438107583, "grad_norm": 0.0802890807390213, "learning_rate": 2.8007561096358976e-05, "loss": 0.202, "step": 43153 }, { "epoch": 3.495949449125081, "grad_norm": 0.08393776416778564, "learning_rate": 2.8003060443764346e-05, "loss": 0.22, "step": 43154 }, { "epoch": 3.4960304601425793, "grad_norm": 0.06426830589771271, "learning_rate": 2.799855979116972e-05, "loss": 0.2162, "step": 43155 }, { "epoch": 3.4961114711600776, "grad_norm": 0.06880275160074234, "learning_rate": 2.7994059138575096e-05, "loss": 0.245, "step": 43156 }, { "epoch": 3.4961924821775763, "grad_norm": 0.05830361321568489, "learning_rate": 2.7989558485980467e-05, "loss": 0.1964, "step": 43157 }, { "epoch": 3.4962734931950745, "grad_norm": 0.08080387115478516, "learning_rate": 2.798505783338584e-05, "loss": 0.2145, "step": 43158 }, { "epoch": 3.4963545042125728, "grad_norm": 0.056177180260419846, "learning_rate": 2.7980557180791217e-05, "loss": 0.1757, "step": 43159 }, { "epoch": 3.4964355152300715, "grad_norm": 0.07218663394451141, "learning_rate": 2.7976056528196594e-05, "loss": 0.2102, "step": 43160 }, { "epoch": 3.4965165262475697, "grad_norm": 0.061893776059150696, "learning_rate": 2.797155587560196e-05, "loss": 0.2437, "step": 43161 }, { "epoch": 3.496597537265068, "grad_norm": 0.0635591372847557, "learning_rate": 2.7967055223007338e-05, "loss": 0.2191, "step": 43162 }, { "epoch": 3.496678548282566, "grad_norm": 0.07298033684492111, "learning_rate": 2.7962554570412715e-05, "loss": 0.2531, "step": 43163 }, { "epoch": 3.496759559300065, "grad_norm": 0.07126526534557343, "learning_rate": 2.7958053917818082e-05, "loss": 0.2309, "step": 43164 }, { "epoch": 3.496840570317563, "grad_norm": 0.06935898214578629, "learning_rate": 2.795355326522346e-05, "loss": 0.2039, "step": 43165 }, { "epoch": 3.4969215813350614, "grad_norm": 0.07336989790201187, "learning_rate": 2.7949052612628836e-05, "loss": 0.2602, "step": 43166 }, { "epoch": 3.49700259235256, "grad_norm": 0.07263325154781342, "learning_rate": 2.7944551960034203e-05, "loss": 0.2771, "step": 43167 }, { "epoch": 3.4970836033700583, "grad_norm": 0.0619543083012104, "learning_rate": 2.794005130743958e-05, "loss": 0.2135, "step": 43168 }, { "epoch": 3.4971646143875565, "grad_norm": 0.07282905280590057, "learning_rate": 2.7935550654844957e-05, "loss": 0.213, "step": 43169 }, { "epoch": 3.4972456254050552, "grad_norm": 0.0697537288069725, "learning_rate": 2.7931050002250327e-05, "loss": 0.2358, "step": 43170 }, { "epoch": 3.4973266364225535, "grad_norm": 0.06539995968341827, "learning_rate": 2.79265493496557e-05, "loss": 0.2095, "step": 43171 }, { "epoch": 3.4974076474400517, "grad_norm": 0.07993856072425842, "learning_rate": 2.7922048697061077e-05, "loss": 0.222, "step": 43172 }, { "epoch": 3.4974886584575504, "grad_norm": 0.06634227186441422, "learning_rate": 2.791754804446645e-05, "loss": 0.2141, "step": 43173 }, { "epoch": 3.4975696694750487, "grad_norm": 0.07012040168046951, "learning_rate": 2.791304739187182e-05, "loss": 0.2357, "step": 43174 }, { "epoch": 3.497650680492547, "grad_norm": 0.08014750480651855, "learning_rate": 2.7908546739277198e-05, "loss": 0.206, "step": 43175 }, { "epoch": 3.4977316915100456, "grad_norm": 0.0736326202750206, "learning_rate": 2.7904046086682572e-05, "loss": 0.245, "step": 43176 }, { "epoch": 3.497812702527544, "grad_norm": 0.08184340596199036, "learning_rate": 2.7899545434087942e-05, "loss": 0.2249, "step": 43177 }, { "epoch": 3.497893713545042, "grad_norm": 0.09063094854354858, "learning_rate": 2.789504478149332e-05, "loss": 0.2004, "step": 43178 }, { "epoch": 3.4979747245625403, "grad_norm": 0.07432245463132858, "learning_rate": 2.7890544128898693e-05, "loss": 0.2286, "step": 43179 }, { "epoch": 3.498055735580039, "grad_norm": 0.05675497651100159, "learning_rate": 2.7886043476304063e-05, "loss": 0.1832, "step": 43180 }, { "epoch": 3.4981367465975373, "grad_norm": 0.07396359741687775, "learning_rate": 2.788154282370944e-05, "loss": 0.2235, "step": 43181 }, { "epoch": 3.4982177576150355, "grad_norm": 0.0632675364613533, "learning_rate": 2.7877042171114813e-05, "loss": 0.2125, "step": 43182 }, { "epoch": 3.498298768632534, "grad_norm": 0.0689416453242302, "learning_rate": 2.7872541518520184e-05, "loss": 0.2066, "step": 43183 }, { "epoch": 3.4983797796500324, "grad_norm": 0.06183299794793129, "learning_rate": 2.786804086592556e-05, "loss": 0.2191, "step": 43184 }, { "epoch": 3.4984607906675307, "grad_norm": 0.07677946984767914, "learning_rate": 2.7863540213330934e-05, "loss": 0.2463, "step": 43185 }, { "epoch": 3.498541801685029, "grad_norm": 0.06671515852212906, "learning_rate": 2.785903956073631e-05, "loss": 0.2442, "step": 43186 }, { "epoch": 3.4986228127025276, "grad_norm": 0.06701928377151489, "learning_rate": 2.785453890814168e-05, "loss": 0.2063, "step": 43187 }, { "epoch": 3.498703823720026, "grad_norm": 0.07234100997447968, "learning_rate": 2.7850038255547055e-05, "loss": 0.2092, "step": 43188 }, { "epoch": 3.498784834737524, "grad_norm": 0.07436953485012054, "learning_rate": 2.7845537602952432e-05, "loss": 0.2384, "step": 43189 }, { "epoch": 3.498865845755023, "grad_norm": 0.07147400081157684, "learning_rate": 2.7841036950357802e-05, "loss": 0.2286, "step": 43190 }, { "epoch": 3.498946856772521, "grad_norm": 0.0703418031334877, "learning_rate": 2.7836536297763176e-05, "loss": 0.2395, "step": 43191 }, { "epoch": 3.4990278677900193, "grad_norm": 0.08759965747594833, "learning_rate": 2.7832035645168553e-05, "loss": 0.219, "step": 43192 }, { "epoch": 3.499108878807518, "grad_norm": 0.08855955302715302, "learning_rate": 2.7827534992573923e-05, "loss": 0.2187, "step": 43193 }, { "epoch": 3.499189889825016, "grad_norm": 0.07268314808607101, "learning_rate": 2.7823034339979297e-05, "loss": 0.2551, "step": 43194 }, { "epoch": 3.4992709008425145, "grad_norm": 0.07844178378582001, "learning_rate": 2.7818533687384674e-05, "loss": 0.2519, "step": 43195 }, { "epoch": 3.499351911860013, "grad_norm": 0.0641513243317604, "learning_rate": 2.7814033034790044e-05, "loss": 0.2273, "step": 43196 }, { "epoch": 3.4994329228775114, "grad_norm": 0.06389816105365753, "learning_rate": 2.7809532382195418e-05, "loss": 0.2312, "step": 43197 }, { "epoch": 3.4995139338950096, "grad_norm": 0.09214530140161514, "learning_rate": 2.7805031729600795e-05, "loss": 0.2504, "step": 43198 }, { "epoch": 3.4995949449125083, "grad_norm": 0.08582887053489685, "learning_rate": 2.780053107700617e-05, "loss": 0.2388, "step": 43199 }, { "epoch": 3.4996759559300066, "grad_norm": 0.07315149903297424, "learning_rate": 2.779603042441154e-05, "loss": 0.2116, "step": 43200 }, { "epoch": 3.499756966947505, "grad_norm": 0.07323189824819565, "learning_rate": 2.7791529771816915e-05, "loss": 0.2109, "step": 43201 }, { "epoch": 3.499837977965003, "grad_norm": 0.07631869614124298, "learning_rate": 2.7787029119222292e-05, "loss": 0.2315, "step": 43202 }, { "epoch": 3.4999189889825018, "grad_norm": 0.06518463045358658, "learning_rate": 2.7782528466627663e-05, "loss": 0.2602, "step": 43203 }, { "epoch": 3.5, "grad_norm": 0.06721894443035126, "learning_rate": 2.7778027814033036e-05, "loss": 0.2188, "step": 43204 }, { "epoch": 3.5000810110174982, "grad_norm": 0.07444871962070465, "learning_rate": 2.7773527161438413e-05, "loss": 0.2278, "step": 43205 }, { "epoch": 3.5001620220349965, "grad_norm": 0.07968197762966156, "learning_rate": 2.7769026508843783e-05, "loss": 0.2442, "step": 43206 }, { "epoch": 3.500243033052495, "grad_norm": 0.07547379285097122, "learning_rate": 2.7764525856249157e-05, "loss": 0.221, "step": 43207 }, { "epoch": 3.5003240440699934, "grad_norm": 0.0792350247502327, "learning_rate": 2.7760025203654534e-05, "loss": 0.2134, "step": 43208 }, { "epoch": 3.5004050550874917, "grad_norm": 0.054382745176553726, "learning_rate": 2.7755524551059904e-05, "loss": 0.2152, "step": 43209 }, { "epoch": 3.5004860661049904, "grad_norm": 0.07119036465883255, "learning_rate": 2.7751023898465278e-05, "loss": 0.2673, "step": 43210 }, { "epoch": 3.5005670771224886, "grad_norm": 0.07480863481760025, "learning_rate": 2.7746523245870655e-05, "loss": 0.2604, "step": 43211 }, { "epoch": 3.500648088139987, "grad_norm": 0.08311598002910614, "learning_rate": 2.7742022593276025e-05, "loss": 0.2295, "step": 43212 }, { "epoch": 3.5007290991574855, "grad_norm": 0.07891567796468735, "learning_rate": 2.77375219406814e-05, "loss": 0.2204, "step": 43213 }, { "epoch": 3.500810110174984, "grad_norm": 0.07064730674028397, "learning_rate": 2.7733021288086776e-05, "loss": 0.2329, "step": 43214 }, { "epoch": 3.500891121192482, "grad_norm": 0.05172451213002205, "learning_rate": 2.772852063549215e-05, "loss": 0.2341, "step": 43215 }, { "epoch": 3.5009721322099807, "grad_norm": 0.07222268730401993, "learning_rate": 2.772401998289752e-05, "loss": 0.2346, "step": 43216 }, { "epoch": 3.501053143227479, "grad_norm": 0.08112135529518127, "learning_rate": 2.7719519330302896e-05, "loss": 0.2293, "step": 43217 }, { "epoch": 3.501134154244977, "grad_norm": 0.06870246678590775, "learning_rate": 2.771501867770827e-05, "loss": 0.2145, "step": 43218 }, { "epoch": 3.501215165262476, "grad_norm": 0.08160283416509628, "learning_rate": 2.771051802511364e-05, "loss": 0.2234, "step": 43219 }, { "epoch": 3.501296176279974, "grad_norm": 0.07019760459661484, "learning_rate": 2.7706017372519017e-05, "loss": 0.2204, "step": 43220 }, { "epoch": 3.5013771872974724, "grad_norm": 0.07481992244720459, "learning_rate": 2.770151671992439e-05, "loss": 0.2439, "step": 43221 }, { "epoch": 3.501458198314971, "grad_norm": 0.07392154633998871, "learning_rate": 2.769701606732976e-05, "loss": 0.2486, "step": 43222 }, { "epoch": 3.5015392093324693, "grad_norm": 0.08620406687259674, "learning_rate": 2.7692515414735138e-05, "loss": 0.2821, "step": 43223 }, { "epoch": 3.5016202203499676, "grad_norm": 0.05955655127763748, "learning_rate": 2.768801476214051e-05, "loss": 0.2428, "step": 43224 }, { "epoch": 3.5017012313674662, "grad_norm": 0.09394501894712448, "learning_rate": 2.7683514109545882e-05, "loss": 0.266, "step": 43225 }, { "epoch": 3.5017822423849645, "grad_norm": 0.06005888059735298, "learning_rate": 2.767901345695126e-05, "loss": 0.2169, "step": 43226 }, { "epoch": 3.5018632534024627, "grad_norm": 0.07512757182121277, "learning_rate": 2.7674512804356632e-05, "loss": 0.2282, "step": 43227 }, { "epoch": 3.501944264419961, "grad_norm": 0.06217007339000702, "learning_rate": 2.767001215176201e-05, "loss": 0.2056, "step": 43228 }, { "epoch": 3.5020252754374592, "grad_norm": 0.07704039663076401, "learning_rate": 2.766551149916738e-05, "loss": 0.229, "step": 43229 }, { "epoch": 3.502106286454958, "grad_norm": 0.09659653156995773, "learning_rate": 2.7661010846572753e-05, "loss": 0.2121, "step": 43230 }, { "epoch": 3.502187297472456, "grad_norm": 0.07554148137569427, "learning_rate": 2.765651019397813e-05, "loss": 0.2298, "step": 43231 }, { "epoch": 3.5022683084899544, "grad_norm": 0.08793529123067856, "learning_rate": 2.76520095413835e-05, "loss": 0.2959, "step": 43232 }, { "epoch": 3.502349319507453, "grad_norm": 0.07671716064214706, "learning_rate": 2.7647508888788874e-05, "loss": 0.2248, "step": 43233 }, { "epoch": 3.5024303305249513, "grad_norm": 0.07639263570308685, "learning_rate": 2.764300823619425e-05, "loss": 0.2581, "step": 43234 }, { "epoch": 3.5025113415424496, "grad_norm": 0.06445121765136719, "learning_rate": 2.763850758359962e-05, "loss": 0.172, "step": 43235 }, { "epoch": 3.5025923525599483, "grad_norm": 0.059592366218566895, "learning_rate": 2.7634006931004995e-05, "loss": 0.2377, "step": 43236 }, { "epoch": 3.5026733635774465, "grad_norm": 0.06346636265516281, "learning_rate": 2.7629506278410372e-05, "loss": 0.2086, "step": 43237 }, { "epoch": 3.5027543745949448, "grad_norm": 0.07458571344614029, "learning_rate": 2.7625005625815742e-05, "loss": 0.2029, "step": 43238 }, { "epoch": 3.5028353856124435, "grad_norm": 0.08330317586660385, "learning_rate": 2.762050497322112e-05, "loss": 0.2488, "step": 43239 }, { "epoch": 3.5029163966299417, "grad_norm": 0.06813330948352814, "learning_rate": 2.7616004320626493e-05, "loss": 0.2231, "step": 43240 }, { "epoch": 3.50299740764744, "grad_norm": 0.05927650257945061, "learning_rate": 2.761150366803187e-05, "loss": 0.2718, "step": 43241 }, { "epoch": 3.5030784186649386, "grad_norm": 0.07224024087190628, "learning_rate": 2.760700301543724e-05, "loss": 0.2205, "step": 43242 }, { "epoch": 3.503159429682437, "grad_norm": 0.08098310232162476, "learning_rate": 2.7602502362842613e-05, "loss": 0.2277, "step": 43243 }, { "epoch": 3.503240440699935, "grad_norm": 0.07933811098337173, "learning_rate": 2.759800171024799e-05, "loss": 0.2331, "step": 43244 }, { "epoch": 3.503321451717434, "grad_norm": 0.07267658412456512, "learning_rate": 2.759350105765336e-05, "loss": 0.2026, "step": 43245 }, { "epoch": 3.503402462734932, "grad_norm": 0.06561614573001862, "learning_rate": 2.7589000405058734e-05, "loss": 0.2246, "step": 43246 }, { "epoch": 3.5034834737524303, "grad_norm": 0.07257362455129623, "learning_rate": 2.758449975246411e-05, "loss": 0.211, "step": 43247 }, { "epoch": 3.5035644847699285, "grad_norm": 0.08041809499263763, "learning_rate": 2.757999909986948e-05, "loss": 0.2334, "step": 43248 }, { "epoch": 3.5036454957874272, "grad_norm": 0.06276113539934158, "learning_rate": 2.7575498447274855e-05, "loss": 0.2153, "step": 43249 }, { "epoch": 3.5037265068049255, "grad_norm": 0.07225324958562851, "learning_rate": 2.7570997794680232e-05, "loss": 0.2183, "step": 43250 }, { "epoch": 3.5038075178224237, "grad_norm": 0.06255420297384262, "learning_rate": 2.7566497142085602e-05, "loss": 0.1954, "step": 43251 }, { "epoch": 3.503888528839922, "grad_norm": 0.07800949364900589, "learning_rate": 2.7561996489490976e-05, "loss": 0.2321, "step": 43252 }, { "epoch": 3.5039695398574207, "grad_norm": 0.06854311376810074, "learning_rate": 2.7557495836896353e-05, "loss": 0.2122, "step": 43253 }, { "epoch": 3.504050550874919, "grad_norm": 0.07762733846902847, "learning_rate": 2.7552995184301726e-05, "loss": 0.2467, "step": 43254 }, { "epoch": 3.504131561892417, "grad_norm": 0.07714968174695969, "learning_rate": 2.7548494531707097e-05, "loss": 0.2438, "step": 43255 }, { "epoch": 3.504212572909916, "grad_norm": 0.09110550582408905, "learning_rate": 2.7543993879112474e-05, "loss": 0.2134, "step": 43256 }, { "epoch": 3.504293583927414, "grad_norm": 0.07318945974111557, "learning_rate": 2.7539493226517847e-05, "loss": 0.2475, "step": 43257 }, { "epoch": 3.5043745949449123, "grad_norm": 0.06953822076320648, "learning_rate": 2.7534992573923217e-05, "loss": 0.2478, "step": 43258 }, { "epoch": 3.504455605962411, "grad_norm": 0.0579572394490242, "learning_rate": 2.7530491921328594e-05, "loss": 0.234, "step": 43259 }, { "epoch": 3.5045366169799093, "grad_norm": 0.06732886284589767, "learning_rate": 2.7525991268733968e-05, "loss": 0.1999, "step": 43260 }, { "epoch": 3.5046176279974075, "grad_norm": 0.0854315236210823, "learning_rate": 2.7521490616139338e-05, "loss": 0.1762, "step": 43261 }, { "epoch": 3.504698639014906, "grad_norm": 0.0729583278298378, "learning_rate": 2.7516989963544715e-05, "loss": 0.2347, "step": 43262 }, { "epoch": 3.5047796500324044, "grad_norm": 0.08315985649824142, "learning_rate": 2.751248931095009e-05, "loss": 0.2299, "step": 43263 }, { "epoch": 3.5048606610499027, "grad_norm": 0.07033509016036987, "learning_rate": 2.750798865835546e-05, "loss": 0.2541, "step": 43264 }, { "epoch": 3.5049416720674014, "grad_norm": 0.09127448499202728, "learning_rate": 2.7503488005760836e-05, "loss": 0.2135, "step": 43265 }, { "epoch": 3.5050226830848996, "grad_norm": 0.07316549867391586, "learning_rate": 2.749898735316621e-05, "loss": 0.2027, "step": 43266 }, { "epoch": 3.505103694102398, "grad_norm": 0.07362278550863266, "learning_rate": 2.7494486700571587e-05, "loss": 0.2349, "step": 43267 }, { "epoch": 3.5051847051198965, "grad_norm": 0.07606764882802963, "learning_rate": 2.7489986047976957e-05, "loss": 0.2129, "step": 43268 }, { "epoch": 3.505265716137395, "grad_norm": 0.06892552971839905, "learning_rate": 2.748548539538233e-05, "loss": 0.2179, "step": 43269 }, { "epoch": 3.505346727154893, "grad_norm": 0.07052438706159592, "learning_rate": 2.7480984742787708e-05, "loss": 0.2269, "step": 43270 }, { "epoch": 3.5054277381723913, "grad_norm": 0.06831594556570053, "learning_rate": 2.7476484090193078e-05, "loss": 0.22, "step": 43271 }, { "epoch": 3.50550874918989, "grad_norm": 0.06844569742679596, "learning_rate": 2.7471983437598455e-05, "loss": 0.2113, "step": 43272 }, { "epoch": 3.505589760207388, "grad_norm": 0.06712247431278229, "learning_rate": 2.746748278500383e-05, "loss": 0.255, "step": 43273 }, { "epoch": 3.5056707712248865, "grad_norm": 0.06350905448198318, "learning_rate": 2.74629821324092e-05, "loss": 0.2123, "step": 43274 }, { "epoch": 3.5057517822423847, "grad_norm": 0.06915382295846939, "learning_rate": 2.7458481479814576e-05, "loss": 0.246, "step": 43275 }, { "epoch": 3.5058327932598834, "grad_norm": 0.061453476548194885, "learning_rate": 2.745398082721995e-05, "loss": 0.2213, "step": 43276 }, { "epoch": 3.5059138042773816, "grad_norm": 0.07296491414308548, "learning_rate": 2.744948017462532e-05, "loss": 0.2745, "step": 43277 }, { "epoch": 3.50599481529488, "grad_norm": 0.066260926425457, "learning_rate": 2.7444979522030696e-05, "loss": 0.224, "step": 43278 }, { "epoch": 3.5060758263123786, "grad_norm": 0.0701618492603302, "learning_rate": 2.744047886943607e-05, "loss": 0.2119, "step": 43279 }, { "epoch": 3.506156837329877, "grad_norm": 0.07789304852485657, "learning_rate": 2.7435978216841447e-05, "loss": 0.2391, "step": 43280 }, { "epoch": 3.506237848347375, "grad_norm": 0.07035718113183975, "learning_rate": 2.7431477564246817e-05, "loss": 0.2296, "step": 43281 }, { "epoch": 3.5063188593648738, "grad_norm": 0.08160745352506638, "learning_rate": 2.742697691165219e-05, "loss": 0.295, "step": 43282 }, { "epoch": 3.506399870382372, "grad_norm": 0.07453840225934982, "learning_rate": 2.7422476259057568e-05, "loss": 0.2016, "step": 43283 }, { "epoch": 3.5064808813998702, "grad_norm": 0.07481218129396439, "learning_rate": 2.7417975606462938e-05, "loss": 0.1983, "step": 43284 }, { "epoch": 3.506561892417369, "grad_norm": 0.07593325525522232, "learning_rate": 2.741347495386831e-05, "loss": 0.1987, "step": 43285 }, { "epoch": 3.506642903434867, "grad_norm": 0.07317264378070831, "learning_rate": 2.740897430127369e-05, "loss": 0.2239, "step": 43286 }, { "epoch": 3.5067239144523654, "grad_norm": 0.07845775038003922, "learning_rate": 2.740447364867906e-05, "loss": 0.2323, "step": 43287 }, { "epoch": 3.506804925469864, "grad_norm": 0.06483782827854156, "learning_rate": 2.7399972996084432e-05, "loss": 0.2071, "step": 43288 }, { "epoch": 3.5068859364873624, "grad_norm": 0.06465201079845428, "learning_rate": 2.739547234348981e-05, "loss": 0.2113, "step": 43289 }, { "epoch": 3.5069669475048606, "grad_norm": 0.07280424237251282, "learning_rate": 2.739097169089518e-05, "loss": 0.2113, "step": 43290 }, { "epoch": 3.5070479585223593, "grad_norm": 0.07811792194843292, "learning_rate": 2.7386471038300553e-05, "loss": 0.2373, "step": 43291 }, { "epoch": 3.5071289695398575, "grad_norm": 0.07018356025218964, "learning_rate": 2.738197038570593e-05, "loss": 0.213, "step": 43292 }, { "epoch": 3.5072099805573558, "grad_norm": 0.06678864359855652, "learning_rate": 2.7377469733111304e-05, "loss": 0.1966, "step": 43293 }, { "epoch": 3.507290991574854, "grad_norm": 0.07585125416517258, "learning_rate": 2.7372969080516674e-05, "loss": 0.1918, "step": 43294 }, { "epoch": 3.5073720025923527, "grad_norm": 0.07252201437950134, "learning_rate": 2.736846842792205e-05, "loss": 0.209, "step": 43295 }, { "epoch": 3.507453013609851, "grad_norm": 0.07959860563278198, "learning_rate": 2.7363967775327425e-05, "loss": 0.1957, "step": 43296 }, { "epoch": 3.507534024627349, "grad_norm": 0.0687057226896286, "learning_rate": 2.7359467122732795e-05, "loss": 0.2158, "step": 43297 }, { "epoch": 3.5076150356448474, "grad_norm": 0.08582094311714172, "learning_rate": 2.7354966470138172e-05, "loss": 0.2366, "step": 43298 }, { "epoch": 3.507696046662346, "grad_norm": 0.06664074212312698, "learning_rate": 2.7350465817543545e-05, "loss": 0.2462, "step": 43299 }, { "epoch": 3.5077770576798444, "grad_norm": 0.07581663131713867, "learning_rate": 2.7345965164948916e-05, "loss": 0.2236, "step": 43300 }, { "epoch": 3.5078580686973426, "grad_norm": 0.07287465035915375, "learning_rate": 2.7341464512354293e-05, "loss": 0.2285, "step": 43301 }, { "epoch": 3.5079390797148413, "grad_norm": 0.09149542450904846, "learning_rate": 2.7336963859759666e-05, "loss": 0.223, "step": 43302 }, { "epoch": 3.5080200907323396, "grad_norm": 0.06924710422754288, "learning_rate": 2.7332463207165036e-05, "loss": 0.2115, "step": 43303 }, { "epoch": 3.508101101749838, "grad_norm": 0.05233272537589073, "learning_rate": 2.7327962554570413e-05, "loss": 0.184, "step": 43304 }, { "epoch": 3.5081821127673365, "grad_norm": 0.07800295948982239, "learning_rate": 2.732346190197579e-05, "loss": 0.2365, "step": 43305 }, { "epoch": 3.5082631237848347, "grad_norm": 0.06714732199907303, "learning_rate": 2.7318961249381164e-05, "loss": 0.2179, "step": 43306 }, { "epoch": 3.508344134802333, "grad_norm": 0.08950652927160263, "learning_rate": 2.7314460596786534e-05, "loss": 0.2463, "step": 43307 }, { "epoch": 3.5084251458198317, "grad_norm": 0.06976626068353653, "learning_rate": 2.730995994419191e-05, "loss": 0.2343, "step": 43308 }, { "epoch": 3.50850615683733, "grad_norm": 0.06422611325979233, "learning_rate": 2.7305459291597285e-05, "loss": 0.1899, "step": 43309 }, { "epoch": 3.508587167854828, "grad_norm": 0.06161141023039818, "learning_rate": 2.7300958639002655e-05, "loss": 0.235, "step": 43310 }, { "epoch": 3.508668178872327, "grad_norm": 0.06769879907369614, "learning_rate": 2.7296457986408032e-05, "loss": 0.2243, "step": 43311 }, { "epoch": 3.508749189889825, "grad_norm": 0.08198713511228561, "learning_rate": 2.7291957333813406e-05, "loss": 0.2227, "step": 43312 }, { "epoch": 3.5088302009073233, "grad_norm": 0.06843384355306625, "learning_rate": 2.7287456681218776e-05, "loss": 0.226, "step": 43313 }, { "epoch": 3.508911211924822, "grad_norm": 0.07348859310150146, "learning_rate": 2.7282956028624153e-05, "loss": 0.2449, "step": 43314 }, { "epoch": 3.5089922229423203, "grad_norm": 0.06632348895072937, "learning_rate": 2.7278455376029526e-05, "loss": 0.2497, "step": 43315 }, { "epoch": 3.5090732339598185, "grad_norm": 0.0708722472190857, "learning_rate": 2.7273954723434897e-05, "loss": 0.2472, "step": 43316 }, { "epoch": 3.5091542449773168, "grad_norm": 0.05557356774806976, "learning_rate": 2.7269454070840274e-05, "loss": 0.1889, "step": 43317 }, { "epoch": 3.5092352559948155, "grad_norm": 0.07586564868688583, "learning_rate": 2.7264953418245647e-05, "loss": 0.2314, "step": 43318 }, { "epoch": 3.5093162670123137, "grad_norm": 0.0712403655052185, "learning_rate": 2.7260452765651024e-05, "loss": 0.2492, "step": 43319 }, { "epoch": 3.509397278029812, "grad_norm": 0.06740592420101166, "learning_rate": 2.7255952113056394e-05, "loss": 0.1801, "step": 43320 }, { "epoch": 3.50947828904731, "grad_norm": 0.07101822644472122, "learning_rate": 2.7251451460461768e-05, "loss": 0.214, "step": 43321 }, { "epoch": 3.509559300064809, "grad_norm": 0.07239147275686264, "learning_rate": 2.7246950807867145e-05, "loss": 0.2501, "step": 43322 }, { "epoch": 3.509640311082307, "grad_norm": 0.08746582269668579, "learning_rate": 2.7242450155272515e-05, "loss": 0.2814, "step": 43323 }, { "epoch": 3.5097213220998054, "grad_norm": 0.06535092741250992, "learning_rate": 2.723794950267789e-05, "loss": 0.1951, "step": 43324 }, { "epoch": 3.509802333117304, "grad_norm": 0.07915400713682175, "learning_rate": 2.7233448850083266e-05, "loss": 0.2454, "step": 43325 }, { "epoch": 3.5098833441348023, "grad_norm": 0.07169001549482346, "learning_rate": 2.7228948197488636e-05, "loss": 0.2505, "step": 43326 }, { "epoch": 3.5099643551523005, "grad_norm": 0.06705567240715027, "learning_rate": 2.722444754489401e-05, "loss": 0.2369, "step": 43327 }, { "epoch": 3.5100453661697992, "grad_norm": 0.07073170691728592, "learning_rate": 2.7219946892299387e-05, "loss": 0.2528, "step": 43328 }, { "epoch": 3.5101263771872975, "grad_norm": 0.0736280083656311, "learning_rate": 2.7215446239704757e-05, "loss": 0.2452, "step": 43329 }, { "epoch": 3.5102073882047957, "grad_norm": 0.06382174044847488, "learning_rate": 2.721094558711013e-05, "loss": 0.2075, "step": 43330 }, { "epoch": 3.5102883992222944, "grad_norm": 0.0716504231095314, "learning_rate": 2.7206444934515507e-05, "loss": 0.2617, "step": 43331 }, { "epoch": 3.5103694102397927, "grad_norm": 0.06723946332931519, "learning_rate": 2.720194428192088e-05, "loss": 0.225, "step": 43332 }, { "epoch": 3.510450421257291, "grad_norm": 0.0614875927567482, "learning_rate": 2.719744362932625e-05, "loss": 0.2012, "step": 43333 }, { "epoch": 3.5105314322747896, "grad_norm": 0.053596943616867065, "learning_rate": 2.7192942976731628e-05, "loss": 0.2283, "step": 43334 }, { "epoch": 3.510612443292288, "grad_norm": 0.08914054930210114, "learning_rate": 2.7188442324137002e-05, "loss": 0.2425, "step": 43335 }, { "epoch": 3.510693454309786, "grad_norm": 0.08104723691940308, "learning_rate": 2.7183941671542372e-05, "loss": 0.2473, "step": 43336 }, { "epoch": 3.5107744653272848, "grad_norm": 0.06316747516393661, "learning_rate": 2.717944101894775e-05, "loss": 0.2088, "step": 43337 }, { "epoch": 3.510855476344783, "grad_norm": 0.07501699030399323, "learning_rate": 2.7174940366353123e-05, "loss": 0.2572, "step": 43338 }, { "epoch": 3.5109364873622813, "grad_norm": 0.06786436587572098, "learning_rate": 2.7170439713758493e-05, "loss": 0.2194, "step": 43339 }, { "epoch": 3.5110174983797795, "grad_norm": 0.06833315640687943, "learning_rate": 2.716593906116387e-05, "loss": 0.2295, "step": 43340 }, { "epoch": 3.511098509397278, "grad_norm": 0.08341936022043228, "learning_rate": 2.7161438408569247e-05, "loss": 0.2208, "step": 43341 }, { "epoch": 3.5111795204147764, "grad_norm": 0.06512777507305145, "learning_rate": 2.7156937755974614e-05, "loss": 0.2167, "step": 43342 }, { "epoch": 3.5112605314322747, "grad_norm": 0.06528540700674057, "learning_rate": 2.715243710337999e-05, "loss": 0.2188, "step": 43343 }, { "epoch": 3.511341542449773, "grad_norm": 0.084648996591568, "learning_rate": 2.7147936450785368e-05, "loss": 0.2484, "step": 43344 }, { "epoch": 3.5114225534672716, "grad_norm": 0.06107432767748833, "learning_rate": 2.714343579819074e-05, "loss": 0.2201, "step": 43345 }, { "epoch": 3.51150356448477, "grad_norm": 0.0834152102470398, "learning_rate": 2.713893514559611e-05, "loss": 0.2239, "step": 43346 }, { "epoch": 3.511584575502268, "grad_norm": 0.07597782462835312, "learning_rate": 2.713443449300149e-05, "loss": 0.2435, "step": 43347 }, { "epoch": 3.511665586519767, "grad_norm": 0.07404717057943344, "learning_rate": 2.7129933840406862e-05, "loss": 0.236, "step": 43348 }, { "epoch": 3.511746597537265, "grad_norm": 0.06116487458348274, "learning_rate": 2.7125433187812232e-05, "loss": 0.2352, "step": 43349 }, { "epoch": 3.5118276085547633, "grad_norm": 0.069146066904068, "learning_rate": 2.712093253521761e-05, "loss": 0.2009, "step": 43350 }, { "epoch": 3.511908619572262, "grad_norm": 0.07820738852024078, "learning_rate": 2.7116431882622983e-05, "loss": 0.2373, "step": 43351 }, { "epoch": 3.51198963058976, "grad_norm": 0.0879845917224884, "learning_rate": 2.7111931230028353e-05, "loss": 0.2038, "step": 43352 }, { "epoch": 3.5120706416072585, "grad_norm": 0.0692344680428505, "learning_rate": 2.710743057743373e-05, "loss": 0.2639, "step": 43353 }, { "epoch": 3.512151652624757, "grad_norm": 0.08497405797243118, "learning_rate": 2.7102929924839104e-05, "loss": 0.2285, "step": 43354 }, { "epoch": 3.5122326636422554, "grad_norm": 0.07552628964185715, "learning_rate": 2.7098429272244474e-05, "loss": 0.2117, "step": 43355 }, { "epoch": 3.5123136746597536, "grad_norm": 0.07671625167131424, "learning_rate": 2.709392861964985e-05, "loss": 0.2403, "step": 43356 }, { "epoch": 3.5123946856772523, "grad_norm": 0.05206482112407684, "learning_rate": 2.7089427967055225e-05, "loss": 0.2331, "step": 43357 }, { "epoch": 3.5124756966947506, "grad_norm": 0.05869055166840553, "learning_rate": 2.70849273144606e-05, "loss": 0.2128, "step": 43358 }, { "epoch": 3.512556707712249, "grad_norm": 0.07022881507873535, "learning_rate": 2.7080426661865972e-05, "loss": 0.2038, "step": 43359 }, { "epoch": 3.5126377187297475, "grad_norm": 0.09413991868495941, "learning_rate": 2.7075926009271345e-05, "loss": 0.2002, "step": 43360 }, { "epoch": 3.5127187297472457, "grad_norm": 0.073529914021492, "learning_rate": 2.7071425356676722e-05, "loss": 0.2242, "step": 43361 }, { "epoch": 3.512799740764744, "grad_norm": 0.07193224877119064, "learning_rate": 2.7066924704082093e-05, "loss": 0.2196, "step": 43362 }, { "epoch": 3.5128807517822422, "grad_norm": 0.07748138159513474, "learning_rate": 2.7062424051487466e-05, "loss": 0.1935, "step": 43363 }, { "epoch": 3.512961762799741, "grad_norm": 0.09035055339336395, "learning_rate": 2.7057923398892843e-05, "loss": 0.244, "step": 43364 }, { "epoch": 3.513042773817239, "grad_norm": 0.09117685258388519, "learning_rate": 2.7053422746298213e-05, "loss": 0.2753, "step": 43365 }, { "epoch": 3.5131237848347374, "grad_norm": 0.06288989633321762, "learning_rate": 2.7048922093703587e-05, "loss": 0.2242, "step": 43366 }, { "epoch": 3.5132047958522357, "grad_norm": 0.05563490465283394, "learning_rate": 2.7044421441108964e-05, "loss": 0.2217, "step": 43367 }, { "epoch": 3.5132858068697344, "grad_norm": 0.06172913312911987, "learning_rate": 2.7039920788514334e-05, "loss": 0.2102, "step": 43368 }, { "epoch": 3.5133668178872326, "grad_norm": 0.05635415017604828, "learning_rate": 2.7035420135919708e-05, "loss": 0.199, "step": 43369 }, { "epoch": 3.513447828904731, "grad_norm": 0.0707196295261383, "learning_rate": 2.7030919483325085e-05, "loss": 0.2203, "step": 43370 }, { "epoch": 3.5135288399222295, "grad_norm": 0.07637009769678116, "learning_rate": 2.7026418830730455e-05, "loss": 0.2486, "step": 43371 }, { "epoch": 3.5136098509397278, "grad_norm": 0.07130232453346252, "learning_rate": 2.702191817813583e-05, "loss": 0.2888, "step": 43372 }, { "epoch": 3.513690861957226, "grad_norm": 0.07822450250387192, "learning_rate": 2.7017417525541206e-05, "loss": 0.2172, "step": 43373 }, { "epoch": 3.5137718729747247, "grad_norm": 0.07567783445119858, "learning_rate": 2.7012916872946583e-05, "loss": 0.2101, "step": 43374 }, { "epoch": 3.513852883992223, "grad_norm": 0.08278835564851761, "learning_rate": 2.700841622035195e-05, "loss": 0.2311, "step": 43375 }, { "epoch": 3.513933895009721, "grad_norm": 0.07226582616567612, "learning_rate": 2.7003915567757326e-05, "loss": 0.1846, "step": 43376 }, { "epoch": 3.51401490602722, "grad_norm": 0.09501677006483078, "learning_rate": 2.6999414915162703e-05, "loss": 0.2464, "step": 43377 }, { "epoch": 3.514095917044718, "grad_norm": 0.0690857470035553, "learning_rate": 2.699491426256807e-05, "loss": 0.198, "step": 43378 }, { "epoch": 3.5141769280622164, "grad_norm": 0.0640997588634491, "learning_rate": 2.6990413609973447e-05, "loss": 0.2525, "step": 43379 }, { "epoch": 3.514257939079715, "grad_norm": 0.08295673131942749, "learning_rate": 2.6985912957378824e-05, "loss": 0.2663, "step": 43380 }, { "epoch": 3.5143389500972133, "grad_norm": 0.05473535507917404, "learning_rate": 2.698141230478419e-05, "loss": 0.2107, "step": 43381 }, { "epoch": 3.5144199611147116, "grad_norm": 0.0696151927113533, "learning_rate": 2.6976911652189568e-05, "loss": 0.262, "step": 43382 }, { "epoch": 3.5145009721322102, "grad_norm": 0.10080565512180328, "learning_rate": 2.6972410999594945e-05, "loss": 0.2504, "step": 43383 }, { "epoch": 3.5145819831497085, "grad_norm": 0.07367312163114548, "learning_rate": 2.6967910347000315e-05, "loss": 0.2, "step": 43384 }, { "epoch": 3.5146629941672067, "grad_norm": 0.0623532272875309, "learning_rate": 2.696340969440569e-05, "loss": 0.2122, "step": 43385 }, { "epoch": 3.514744005184705, "grad_norm": 0.06241362541913986, "learning_rate": 2.6958909041811066e-05, "loss": 0.1914, "step": 43386 }, { "epoch": 3.5148250162022032, "grad_norm": 0.07094134390354156, "learning_rate": 2.695440838921644e-05, "loss": 0.2357, "step": 43387 }, { "epoch": 3.514906027219702, "grad_norm": 0.07751740515232086, "learning_rate": 2.694990773662181e-05, "loss": 0.2395, "step": 43388 }, { "epoch": 3.5149870382372, "grad_norm": 0.08845694363117218, "learning_rate": 2.6945407084027187e-05, "loss": 0.2242, "step": 43389 }, { "epoch": 3.5150680492546984, "grad_norm": 0.07656298577785492, "learning_rate": 2.694090643143256e-05, "loss": 0.2185, "step": 43390 }, { "epoch": 3.515149060272197, "grad_norm": 0.058742620050907135, "learning_rate": 2.693640577883793e-05, "loss": 0.1817, "step": 43391 }, { "epoch": 3.5152300712896953, "grad_norm": 0.06844554841518402, "learning_rate": 2.6931905126243307e-05, "loss": 0.2017, "step": 43392 }, { "epoch": 3.5153110823071936, "grad_norm": 0.06278139352798462, "learning_rate": 2.692740447364868e-05, "loss": 0.2204, "step": 43393 }, { "epoch": 3.5153920933246923, "grad_norm": 0.06582430005073547, "learning_rate": 2.692290382105405e-05, "loss": 0.2341, "step": 43394 }, { "epoch": 3.5154731043421905, "grad_norm": 0.070784792304039, "learning_rate": 2.6918403168459428e-05, "loss": 0.2349, "step": 43395 }, { "epoch": 3.5155541153596888, "grad_norm": 0.06411685794591904, "learning_rate": 2.6913902515864802e-05, "loss": 0.2045, "step": 43396 }, { "epoch": 3.5156351263771874, "grad_norm": 0.08133114874362946, "learning_rate": 2.6909401863270172e-05, "loss": 0.2236, "step": 43397 }, { "epoch": 3.5157161373946857, "grad_norm": 0.06460542976856232, "learning_rate": 2.690490121067555e-05, "loss": 0.2263, "step": 43398 }, { "epoch": 3.515797148412184, "grad_norm": 0.07408098876476288, "learning_rate": 2.6900400558080923e-05, "loss": 0.19, "step": 43399 }, { "epoch": 3.5158781594296826, "grad_norm": 0.08495966345071793, "learning_rate": 2.68958999054863e-05, "loss": 0.2577, "step": 43400 }, { "epoch": 3.515959170447181, "grad_norm": 0.0630396381020546, "learning_rate": 2.689139925289167e-05, "loss": 0.1878, "step": 43401 }, { "epoch": 3.516040181464679, "grad_norm": 0.07125577330589294, "learning_rate": 2.6886898600297043e-05, "loss": 0.2404, "step": 43402 }, { "epoch": 3.516121192482178, "grad_norm": 0.07317827641963959, "learning_rate": 2.688239794770242e-05, "loss": 0.2285, "step": 43403 }, { "epoch": 3.516202203499676, "grad_norm": 0.07188070565462112, "learning_rate": 2.687789729510779e-05, "loss": 0.2356, "step": 43404 }, { "epoch": 3.5162832145171743, "grad_norm": 0.06981553882360458, "learning_rate": 2.6873396642513164e-05, "loss": 0.2491, "step": 43405 }, { "epoch": 3.516364225534673, "grad_norm": 0.08644460141658783, "learning_rate": 2.686889598991854e-05, "loss": 0.2656, "step": 43406 }, { "epoch": 3.5164452365521712, "grad_norm": 0.08687439560890198, "learning_rate": 2.686439533732391e-05, "loss": 0.2114, "step": 43407 }, { "epoch": 3.5165262475696695, "grad_norm": 0.06241988390684128, "learning_rate": 2.6859894684729285e-05, "loss": 0.2051, "step": 43408 }, { "epoch": 3.5166072585871677, "grad_norm": 0.08557629585266113, "learning_rate": 2.6855394032134662e-05, "loss": 0.2346, "step": 43409 }, { "epoch": 3.516688269604666, "grad_norm": 0.07166421413421631, "learning_rate": 2.6850893379540032e-05, "loss": 0.201, "step": 43410 }, { "epoch": 3.5167692806221647, "grad_norm": 0.08565226197242737, "learning_rate": 2.6846392726945406e-05, "loss": 0.2258, "step": 43411 }, { "epoch": 3.516850291639663, "grad_norm": 0.07092215865850449, "learning_rate": 2.6841892074350783e-05, "loss": 0.2296, "step": 43412 }, { "epoch": 3.516931302657161, "grad_norm": 0.07098492980003357, "learning_rate": 2.683739142175616e-05, "loss": 0.2104, "step": 43413 }, { "epoch": 3.51701231367466, "grad_norm": 0.06782843917608261, "learning_rate": 2.6832890769161527e-05, "loss": 0.196, "step": 43414 }, { "epoch": 3.517093324692158, "grad_norm": 0.07023775577545166, "learning_rate": 2.6828390116566904e-05, "loss": 0.246, "step": 43415 }, { "epoch": 3.5171743357096563, "grad_norm": 0.0655292496085167, "learning_rate": 2.682388946397228e-05, "loss": 0.2372, "step": 43416 }, { "epoch": 3.517255346727155, "grad_norm": 0.10283497720956802, "learning_rate": 2.681938881137765e-05, "loss": 0.2896, "step": 43417 }, { "epoch": 3.5173363577446533, "grad_norm": 0.10663864761590958, "learning_rate": 2.6814888158783025e-05, "loss": 0.2179, "step": 43418 }, { "epoch": 3.5174173687621515, "grad_norm": 0.07355464994907379, "learning_rate": 2.68103875061884e-05, "loss": 0.2129, "step": 43419 }, { "epoch": 3.51749837977965, "grad_norm": 0.07699501514434814, "learning_rate": 2.6805886853593772e-05, "loss": 0.2353, "step": 43420 }, { "epoch": 3.5175793907971484, "grad_norm": 0.0732915848493576, "learning_rate": 2.6801386200999145e-05, "loss": 0.2296, "step": 43421 }, { "epoch": 3.5176604018146467, "grad_norm": 0.07379954308271408, "learning_rate": 2.6796885548404522e-05, "loss": 0.2263, "step": 43422 }, { "epoch": 3.5177414128321454, "grad_norm": 0.11059697717428207, "learning_rate": 2.6792384895809893e-05, "loss": 0.2475, "step": 43423 }, { "epoch": 3.5178224238496436, "grad_norm": 0.0744595155119896, "learning_rate": 2.6787884243215266e-05, "loss": 0.2472, "step": 43424 }, { "epoch": 3.517903434867142, "grad_norm": 0.0822412520647049, "learning_rate": 2.6783383590620643e-05, "loss": 0.2009, "step": 43425 }, { "epoch": 3.5179844458846405, "grad_norm": 0.0705241933465004, "learning_rate": 2.6778882938026017e-05, "loss": 0.2339, "step": 43426 }, { "epoch": 3.518065456902139, "grad_norm": 0.05770557373762131, "learning_rate": 2.6774382285431387e-05, "loss": 0.21, "step": 43427 }, { "epoch": 3.518146467919637, "grad_norm": 0.06801163405179977, "learning_rate": 2.6769881632836764e-05, "loss": 0.2209, "step": 43428 }, { "epoch": 3.5182274789371357, "grad_norm": 0.0825352743268013, "learning_rate": 2.6765380980242138e-05, "loss": 0.2544, "step": 43429 }, { "epoch": 3.518308489954634, "grad_norm": 0.07521167397499084, "learning_rate": 2.6760880327647508e-05, "loss": 0.2237, "step": 43430 }, { "epoch": 3.518389500972132, "grad_norm": 0.06605440378189087, "learning_rate": 2.6756379675052885e-05, "loss": 0.2292, "step": 43431 }, { "epoch": 3.5184705119896305, "grad_norm": 0.06706567108631134, "learning_rate": 2.675187902245826e-05, "loss": 0.2135, "step": 43432 }, { "epoch": 3.5185515230071287, "grad_norm": 0.058007679879665375, "learning_rate": 2.674737836986363e-05, "loss": 0.2274, "step": 43433 }, { "epoch": 3.5186325340246274, "grad_norm": 0.07411615550518036, "learning_rate": 2.6742877717269006e-05, "loss": 0.2037, "step": 43434 }, { "epoch": 3.5187135450421256, "grad_norm": 0.061007410287857056, "learning_rate": 2.673837706467438e-05, "loss": 0.1991, "step": 43435 }, { "epoch": 3.518794556059624, "grad_norm": 0.06855777651071548, "learning_rate": 2.673387641207975e-05, "loss": 0.2241, "step": 43436 }, { "epoch": 3.5188755670771226, "grad_norm": 0.08268239349126816, "learning_rate": 2.6729375759485126e-05, "loss": 0.2109, "step": 43437 }, { "epoch": 3.518956578094621, "grad_norm": 0.06946399062871933, "learning_rate": 2.67248751068905e-05, "loss": 0.2634, "step": 43438 }, { "epoch": 3.519037589112119, "grad_norm": 0.06716107577085495, "learning_rate": 2.6720374454295877e-05, "loss": 0.2045, "step": 43439 }, { "epoch": 3.5191186001296177, "grad_norm": 0.07133271545171738, "learning_rate": 2.6715873801701247e-05, "loss": 0.2299, "step": 43440 }, { "epoch": 3.519199611147116, "grad_norm": 0.09417259693145752, "learning_rate": 2.671137314910662e-05, "loss": 0.2444, "step": 43441 }, { "epoch": 3.5192806221646142, "grad_norm": 0.06738929450511932, "learning_rate": 2.6706872496511998e-05, "loss": 0.2101, "step": 43442 }, { "epoch": 3.519361633182113, "grad_norm": 0.07788601517677307, "learning_rate": 2.6702371843917368e-05, "loss": 0.2391, "step": 43443 }, { "epoch": 3.519442644199611, "grad_norm": 0.07475084066390991, "learning_rate": 2.669787119132274e-05, "loss": 0.2092, "step": 43444 }, { "epoch": 3.5195236552171094, "grad_norm": 0.06062447652220726, "learning_rate": 2.669337053872812e-05, "loss": 0.1965, "step": 43445 }, { "epoch": 3.519604666234608, "grad_norm": 0.057632926851511, "learning_rate": 2.668886988613349e-05, "loss": 0.1866, "step": 43446 }, { "epoch": 3.5196856772521063, "grad_norm": 0.08677884191274643, "learning_rate": 2.6684369233538862e-05, "loss": 0.2396, "step": 43447 }, { "epoch": 3.5197666882696046, "grad_norm": 0.07600312680006027, "learning_rate": 2.667986858094424e-05, "loss": 0.1992, "step": 43448 }, { "epoch": 3.5198476992871033, "grad_norm": 0.08222731947898865, "learning_rate": 2.667536792834961e-05, "loss": 0.2397, "step": 43449 }, { "epoch": 3.5199287103046015, "grad_norm": 0.07942266762256622, "learning_rate": 2.6670867275754983e-05, "loss": 0.225, "step": 43450 }, { "epoch": 3.5200097213220998, "grad_norm": 0.09600380808115005, "learning_rate": 2.666636662316036e-05, "loss": 0.2707, "step": 43451 }, { "epoch": 3.5200907323395985, "grad_norm": 0.08181590586900711, "learning_rate": 2.6661865970565737e-05, "loss": 0.2215, "step": 43452 }, { "epoch": 3.5201717433570967, "grad_norm": 0.06803275644779205, "learning_rate": 2.6657365317971107e-05, "loss": 0.2304, "step": 43453 }, { "epoch": 3.520252754374595, "grad_norm": 0.06476163119077682, "learning_rate": 2.665286466537648e-05, "loss": 0.2317, "step": 43454 }, { "epoch": 3.520333765392093, "grad_norm": 0.06914833933115005, "learning_rate": 2.6648364012781858e-05, "loss": 0.2051, "step": 43455 }, { "epoch": 3.5204147764095914, "grad_norm": 0.07922517508268356, "learning_rate": 2.6643863360187228e-05, "loss": 0.2147, "step": 43456 }, { "epoch": 3.52049578742709, "grad_norm": 0.0863863155245781, "learning_rate": 2.6639362707592602e-05, "loss": 0.2379, "step": 43457 }, { "epoch": 3.5205767984445884, "grad_norm": 0.0803600624203682, "learning_rate": 2.663486205499798e-05, "loss": 0.2228, "step": 43458 }, { "epoch": 3.5206578094620866, "grad_norm": 0.0711086243391037, "learning_rate": 2.663036140240335e-05, "loss": 0.2155, "step": 43459 }, { "epoch": 3.5207388204795853, "grad_norm": 0.07211203128099442, "learning_rate": 2.6625860749808723e-05, "loss": 0.2323, "step": 43460 }, { "epoch": 3.5208198314970836, "grad_norm": 0.06690596044063568, "learning_rate": 2.66213600972141e-05, "loss": 0.217, "step": 43461 }, { "epoch": 3.520900842514582, "grad_norm": 0.06400663405656815, "learning_rate": 2.661685944461947e-05, "loss": 0.2124, "step": 43462 }, { "epoch": 3.5209818535320805, "grad_norm": 0.06587471812963486, "learning_rate": 2.6612358792024843e-05, "loss": 0.207, "step": 43463 }, { "epoch": 3.5210628645495787, "grad_norm": 0.07347527891397476, "learning_rate": 2.660785813943022e-05, "loss": 0.2551, "step": 43464 }, { "epoch": 3.521143875567077, "grad_norm": 0.0675148069858551, "learning_rate": 2.6603357486835594e-05, "loss": 0.2076, "step": 43465 }, { "epoch": 3.5212248865845757, "grad_norm": 0.06692413985729218, "learning_rate": 2.6598856834240964e-05, "loss": 0.2183, "step": 43466 }, { "epoch": 3.521305897602074, "grad_norm": 0.06547009944915771, "learning_rate": 2.659435618164634e-05, "loss": 0.2368, "step": 43467 }, { "epoch": 3.521386908619572, "grad_norm": 0.06786834448575974, "learning_rate": 2.6589855529051715e-05, "loss": 0.2102, "step": 43468 }, { "epoch": 3.521467919637071, "grad_norm": 0.07979714125394821, "learning_rate": 2.6585354876457085e-05, "loss": 0.2283, "step": 43469 }, { "epoch": 3.521548930654569, "grad_norm": 0.08165741711854935, "learning_rate": 2.6580854223862462e-05, "loss": 0.2312, "step": 43470 }, { "epoch": 3.5216299416720673, "grad_norm": 0.06408173590898514, "learning_rate": 2.6576353571267836e-05, "loss": 0.2192, "step": 43471 }, { "epoch": 3.521710952689566, "grad_norm": 0.0637429803609848, "learning_rate": 2.6571852918673206e-05, "loss": 0.2311, "step": 43472 }, { "epoch": 3.5217919637070643, "grad_norm": 0.09980673342943192, "learning_rate": 2.6567352266078583e-05, "loss": 0.2742, "step": 43473 }, { "epoch": 3.5218729747245625, "grad_norm": 0.07004711031913757, "learning_rate": 2.6562851613483956e-05, "loss": 0.2271, "step": 43474 }, { "epoch": 3.5219539857420608, "grad_norm": 0.07771208882331848, "learning_rate": 2.6558350960889327e-05, "loss": 0.256, "step": 43475 }, { "epoch": 3.5220349967595594, "grad_norm": 0.06684209406375885, "learning_rate": 2.6553850308294704e-05, "loss": 0.226, "step": 43476 }, { "epoch": 3.5221160077770577, "grad_norm": 0.07327425479888916, "learning_rate": 2.6549349655700077e-05, "loss": 0.2919, "step": 43477 }, { "epoch": 3.522197018794556, "grad_norm": 0.05451936274766922, "learning_rate": 2.6544849003105454e-05, "loss": 0.1945, "step": 43478 }, { "epoch": 3.522278029812054, "grad_norm": 0.06572496891021729, "learning_rate": 2.6540348350510824e-05, "loss": 0.212, "step": 43479 }, { "epoch": 3.522359040829553, "grad_norm": 0.08500397950410843, "learning_rate": 2.6535847697916198e-05, "loss": 0.2106, "step": 43480 }, { "epoch": 3.522440051847051, "grad_norm": 0.07888907194137573, "learning_rate": 2.6531347045321575e-05, "loss": 0.212, "step": 43481 }, { "epoch": 3.5225210628645494, "grad_norm": 0.07335029542446136, "learning_rate": 2.6526846392726945e-05, "loss": 0.2303, "step": 43482 }, { "epoch": 3.522602073882048, "grad_norm": 0.06485271453857422, "learning_rate": 2.652234574013232e-05, "loss": 0.1977, "step": 43483 }, { "epoch": 3.5226830848995463, "grad_norm": 0.07313435524702072, "learning_rate": 2.6517845087537696e-05, "loss": 0.233, "step": 43484 }, { "epoch": 3.5227640959170445, "grad_norm": 0.07400259375572205, "learning_rate": 2.6513344434943066e-05, "loss": 0.2333, "step": 43485 }, { "epoch": 3.5228451069345432, "grad_norm": 0.07800918072462082, "learning_rate": 2.6508843782348443e-05, "loss": 0.2451, "step": 43486 }, { "epoch": 3.5229261179520415, "grad_norm": 0.0771111249923706, "learning_rate": 2.6504343129753817e-05, "loss": 0.21, "step": 43487 }, { "epoch": 3.5230071289695397, "grad_norm": 0.07032516598701477, "learning_rate": 2.6499842477159187e-05, "loss": 0.2256, "step": 43488 }, { "epoch": 3.5230881399870384, "grad_norm": 0.0727558583021164, "learning_rate": 2.6495341824564564e-05, "loss": 0.2364, "step": 43489 }, { "epoch": 3.5231691510045366, "grad_norm": 0.056780096143484116, "learning_rate": 2.6490841171969938e-05, "loss": 0.2018, "step": 43490 }, { "epoch": 3.523250162022035, "grad_norm": 0.07738316059112549, "learning_rate": 2.6486340519375314e-05, "loss": 0.2495, "step": 43491 }, { "epoch": 3.5233311730395336, "grad_norm": 0.07734917104244232, "learning_rate": 2.6481839866780685e-05, "loss": 0.211, "step": 43492 }, { "epoch": 3.523412184057032, "grad_norm": 0.05843658745288849, "learning_rate": 2.647733921418606e-05, "loss": 0.2026, "step": 43493 }, { "epoch": 3.52349319507453, "grad_norm": 0.06908644735813141, "learning_rate": 2.6472838561591435e-05, "loss": 0.2264, "step": 43494 }, { "epoch": 3.5235742060920288, "grad_norm": 0.07848634570837021, "learning_rate": 2.6468337908996806e-05, "loss": 0.1974, "step": 43495 }, { "epoch": 3.523655217109527, "grad_norm": 0.06460677087306976, "learning_rate": 2.646383725640218e-05, "loss": 0.2461, "step": 43496 }, { "epoch": 3.5237362281270252, "grad_norm": 0.07518939673900604, "learning_rate": 2.6459336603807556e-05, "loss": 0.2351, "step": 43497 }, { "epoch": 3.5238172391445235, "grad_norm": 0.06146230548620224, "learning_rate": 2.6454835951212926e-05, "loss": 0.2307, "step": 43498 }, { "epoch": 3.523898250162022, "grad_norm": 0.08571221679449081, "learning_rate": 2.64503352986183e-05, "loss": 0.2154, "step": 43499 }, { "epoch": 3.5239792611795204, "grad_norm": 0.08313552290201187, "learning_rate": 2.6445834646023677e-05, "loss": 0.2433, "step": 43500 }, { "epoch": 3.5240602721970187, "grad_norm": 0.08192568272352219, "learning_rate": 2.6441333993429047e-05, "loss": 0.2015, "step": 43501 }, { "epoch": 3.524141283214517, "grad_norm": 0.08400919288396835, "learning_rate": 2.643683334083442e-05, "loss": 0.2592, "step": 43502 }, { "epoch": 3.5242222942320156, "grad_norm": 0.06450191885232925, "learning_rate": 2.6432332688239798e-05, "loss": 0.2239, "step": 43503 }, { "epoch": 3.524303305249514, "grad_norm": 0.09003297984600067, "learning_rate": 2.642783203564517e-05, "loss": 0.2483, "step": 43504 }, { "epoch": 3.524384316267012, "grad_norm": 0.07139277458190918, "learning_rate": 2.642333138305054e-05, "loss": 0.2072, "step": 43505 }, { "epoch": 3.524465327284511, "grad_norm": 0.08002956211566925, "learning_rate": 2.641883073045592e-05, "loss": 0.2629, "step": 43506 }, { "epoch": 3.524546338302009, "grad_norm": 0.07315345108509064, "learning_rate": 2.6414330077861292e-05, "loss": 0.2488, "step": 43507 }, { "epoch": 3.5246273493195073, "grad_norm": 0.06532798707485199, "learning_rate": 2.6409829425266662e-05, "loss": 0.2092, "step": 43508 }, { "epoch": 3.524708360337006, "grad_norm": 0.07230593264102936, "learning_rate": 2.640532877267204e-05, "loss": 0.2302, "step": 43509 }, { "epoch": 3.524789371354504, "grad_norm": 0.06936454027891159, "learning_rate": 2.6400828120077413e-05, "loss": 0.2251, "step": 43510 }, { "epoch": 3.5248703823720025, "grad_norm": 0.07026039808988571, "learning_rate": 2.6396327467482783e-05, "loss": 0.2332, "step": 43511 }, { "epoch": 3.524951393389501, "grad_norm": 0.07848269492387772, "learning_rate": 2.639182681488816e-05, "loss": 0.224, "step": 43512 }, { "epoch": 3.5250324044069994, "grad_norm": 0.07015900313854218, "learning_rate": 2.6387326162293534e-05, "loss": 0.2114, "step": 43513 }, { "epoch": 3.5251134154244976, "grad_norm": 0.0776764303445816, "learning_rate": 2.6382825509698904e-05, "loss": 0.2402, "step": 43514 }, { "epoch": 3.5251944264419963, "grad_norm": 0.06507275998592377, "learning_rate": 2.637832485710428e-05, "loss": 0.2209, "step": 43515 }, { "epoch": 3.5252754374594946, "grad_norm": 0.09480078518390656, "learning_rate": 2.6373824204509655e-05, "loss": 0.2453, "step": 43516 }, { "epoch": 3.525356448476993, "grad_norm": 0.057359907776117325, "learning_rate": 2.636932355191503e-05, "loss": 0.233, "step": 43517 }, { "epoch": 3.5254374594944915, "grad_norm": 0.06667699664831161, "learning_rate": 2.6364822899320402e-05, "loss": 0.2, "step": 43518 }, { "epoch": 3.5255184705119897, "grad_norm": 0.06882564723491669, "learning_rate": 2.636032224672578e-05, "loss": 0.2355, "step": 43519 }, { "epoch": 3.525599481529488, "grad_norm": 0.06481310725212097, "learning_rate": 2.6355821594131152e-05, "loss": 0.2272, "step": 43520 }, { "epoch": 3.5256804925469862, "grad_norm": 0.07352931797504425, "learning_rate": 2.6351320941536523e-05, "loss": 0.2435, "step": 43521 }, { "epoch": 3.525761503564485, "grad_norm": 0.07422930747270584, "learning_rate": 2.63468202889419e-05, "loss": 0.2068, "step": 43522 }, { "epoch": 3.525842514581983, "grad_norm": 0.07219050079584122, "learning_rate": 2.6342319636347273e-05, "loss": 0.2251, "step": 43523 }, { "epoch": 3.5259235255994814, "grad_norm": 0.07107728719711304, "learning_rate": 2.6337818983752643e-05, "loss": 0.2168, "step": 43524 }, { "epoch": 3.5260045366169797, "grad_norm": 0.07279662787914276, "learning_rate": 2.633331833115802e-05, "loss": 0.2369, "step": 43525 }, { "epoch": 3.5260855476344783, "grad_norm": 0.07390367239713669, "learning_rate": 2.6328817678563394e-05, "loss": 0.2059, "step": 43526 }, { "epoch": 3.5261665586519766, "grad_norm": 0.07024620473384857, "learning_rate": 2.6324317025968764e-05, "loss": 0.2107, "step": 43527 }, { "epoch": 3.526247569669475, "grad_norm": 0.06905306130647659, "learning_rate": 2.631981637337414e-05, "loss": 0.2138, "step": 43528 }, { "epoch": 3.5263285806869735, "grad_norm": 0.07863122969865799, "learning_rate": 2.6315315720779515e-05, "loss": 0.2485, "step": 43529 }, { "epoch": 3.5264095917044718, "grad_norm": 0.06559717655181885, "learning_rate": 2.6310815068184892e-05, "loss": 0.2174, "step": 43530 }, { "epoch": 3.52649060272197, "grad_norm": 0.06733140349388123, "learning_rate": 2.6306314415590262e-05, "loss": 0.2276, "step": 43531 }, { "epoch": 3.5265716137394687, "grad_norm": 0.05722331628203392, "learning_rate": 2.6301813762995636e-05, "loss": 0.226, "step": 43532 }, { "epoch": 3.526652624756967, "grad_norm": 0.09110940247774124, "learning_rate": 2.6297313110401013e-05, "loss": 0.2204, "step": 43533 }, { "epoch": 3.526733635774465, "grad_norm": 0.08864796161651611, "learning_rate": 2.6292812457806383e-05, "loss": 0.2645, "step": 43534 }, { "epoch": 3.526814646791964, "grad_norm": 0.08280379325151443, "learning_rate": 2.6288311805211756e-05, "loss": 0.2228, "step": 43535 }, { "epoch": 3.526895657809462, "grad_norm": 0.07867808640003204, "learning_rate": 2.6283811152617133e-05, "loss": 0.261, "step": 43536 }, { "epoch": 3.5269766688269604, "grad_norm": 0.10088161379098892, "learning_rate": 2.6279310500022504e-05, "loss": 0.2678, "step": 43537 }, { "epoch": 3.527057679844459, "grad_norm": 0.0921441987156868, "learning_rate": 2.6274809847427877e-05, "loss": 0.2104, "step": 43538 }, { "epoch": 3.5271386908619573, "grad_norm": 0.0665295198559761, "learning_rate": 2.6270309194833254e-05, "loss": 0.2285, "step": 43539 }, { "epoch": 3.5272197018794555, "grad_norm": 0.08360760658979416, "learning_rate": 2.6265808542238624e-05, "loss": 0.2474, "step": 43540 }, { "epoch": 3.5273007128969542, "grad_norm": 0.0686190202832222, "learning_rate": 2.6261307889643998e-05, "loss": 0.1958, "step": 43541 }, { "epoch": 3.5273817239144525, "grad_norm": 0.06415877491235733, "learning_rate": 2.6256807237049375e-05, "loss": 0.2363, "step": 43542 }, { "epoch": 3.5274627349319507, "grad_norm": 0.0608087033033371, "learning_rate": 2.6252306584454745e-05, "loss": 0.2145, "step": 43543 }, { "epoch": 3.527543745949449, "grad_norm": 0.07166855782270432, "learning_rate": 2.624780593186012e-05, "loss": 0.2506, "step": 43544 }, { "epoch": 3.5276247569669477, "grad_norm": 0.06283847987651825, "learning_rate": 2.6243305279265496e-05, "loss": 0.2147, "step": 43545 }, { "epoch": 3.527705767984446, "grad_norm": 0.05699127912521362, "learning_rate": 2.623880462667087e-05, "loss": 0.2245, "step": 43546 }, { "epoch": 3.527786779001944, "grad_norm": 0.08262984454631805, "learning_rate": 2.623430397407624e-05, "loss": 0.2268, "step": 43547 }, { "epoch": 3.5278677900194424, "grad_norm": 0.07195358723402023, "learning_rate": 2.6229803321481617e-05, "loss": 0.212, "step": 43548 }, { "epoch": 3.527948801036941, "grad_norm": 0.048632893711328506, "learning_rate": 2.622530266888699e-05, "loss": 0.2073, "step": 43549 }, { "epoch": 3.5280298120544393, "grad_norm": 0.07497584819793701, "learning_rate": 2.622080201629236e-05, "loss": 0.2036, "step": 43550 }, { "epoch": 3.5281108230719376, "grad_norm": 0.06233372911810875, "learning_rate": 2.6216301363697737e-05, "loss": 0.1991, "step": 43551 }, { "epoch": 3.5281918340894363, "grad_norm": 0.0687880665063858, "learning_rate": 2.621180071110311e-05, "loss": 0.1973, "step": 43552 }, { "epoch": 3.5282728451069345, "grad_norm": 0.0592275932431221, "learning_rate": 2.620730005850848e-05, "loss": 0.211, "step": 43553 }, { "epoch": 3.5283538561244328, "grad_norm": 0.07839930057525635, "learning_rate": 2.6202799405913858e-05, "loss": 0.2164, "step": 43554 }, { "epoch": 3.5284348671419314, "grad_norm": 0.07318031042814255, "learning_rate": 2.6198298753319235e-05, "loss": 0.2062, "step": 43555 }, { "epoch": 3.5285158781594297, "grad_norm": 0.07681858539581299, "learning_rate": 2.6193798100724602e-05, "loss": 0.2095, "step": 43556 }, { "epoch": 3.528596889176928, "grad_norm": 0.0828225240111351, "learning_rate": 2.618929744812998e-05, "loss": 0.2307, "step": 43557 }, { "epoch": 3.5286779001944266, "grad_norm": 0.07809402793645859, "learning_rate": 2.6184796795535356e-05, "loss": 0.2422, "step": 43558 }, { "epoch": 3.528758911211925, "grad_norm": 0.06778375059366226, "learning_rate": 2.618029614294073e-05, "loss": 0.186, "step": 43559 }, { "epoch": 3.528839922229423, "grad_norm": 0.07688381522893906, "learning_rate": 2.61757954903461e-05, "loss": 0.2154, "step": 43560 }, { "epoch": 3.528920933246922, "grad_norm": 0.06942246109247208, "learning_rate": 2.6171294837751477e-05, "loss": 0.2178, "step": 43561 }, { "epoch": 3.52900194426442, "grad_norm": 0.07926230132579803, "learning_rate": 2.616679418515685e-05, "loss": 0.2406, "step": 43562 }, { "epoch": 3.5290829552819183, "grad_norm": 0.05696360394358635, "learning_rate": 2.616229353256222e-05, "loss": 0.2173, "step": 43563 }, { "epoch": 3.529163966299417, "grad_norm": 0.07202611863613129, "learning_rate": 2.6157792879967598e-05, "loss": 0.2429, "step": 43564 }, { "epoch": 3.529244977316915, "grad_norm": 0.07968437671661377, "learning_rate": 2.615329222737297e-05, "loss": 0.2496, "step": 43565 }, { "epoch": 3.5293259883344135, "grad_norm": 0.06798778474330902, "learning_rate": 2.614879157477834e-05, "loss": 0.1897, "step": 43566 }, { "epoch": 3.5294069993519117, "grad_norm": 0.06220000609755516, "learning_rate": 2.614429092218372e-05, "loss": 0.181, "step": 43567 }, { "epoch": 3.5294880103694104, "grad_norm": 0.06689947098493576, "learning_rate": 2.6139790269589092e-05, "loss": 0.213, "step": 43568 }, { "epoch": 3.5295690213869086, "grad_norm": 0.08405417203903198, "learning_rate": 2.6135289616994462e-05, "loss": 0.2039, "step": 43569 }, { "epoch": 3.529650032404407, "grad_norm": 0.0697874054312706, "learning_rate": 2.613078896439984e-05, "loss": 0.196, "step": 43570 }, { "epoch": 3.529731043421905, "grad_norm": 0.06842770427465439, "learning_rate": 2.6126288311805213e-05, "loss": 0.1947, "step": 43571 }, { "epoch": 3.529812054439404, "grad_norm": 0.056984636932611465, "learning_rate": 2.612178765921059e-05, "loss": 0.2377, "step": 43572 }, { "epoch": 3.529893065456902, "grad_norm": 0.094151571393013, "learning_rate": 2.611728700661596e-05, "loss": 0.2242, "step": 43573 }, { "epoch": 3.5299740764744003, "grad_norm": 0.07004104554653168, "learning_rate": 2.6112786354021334e-05, "loss": 0.241, "step": 43574 }, { "epoch": 3.530055087491899, "grad_norm": 0.07609971612691879, "learning_rate": 2.610828570142671e-05, "loss": 0.2417, "step": 43575 }, { "epoch": 3.5301360985093972, "grad_norm": 0.06777872145175934, "learning_rate": 2.610378504883208e-05, "loss": 0.2393, "step": 43576 }, { "epoch": 3.5302171095268955, "grad_norm": 0.07987414300441742, "learning_rate": 2.6099284396237455e-05, "loss": 0.2029, "step": 43577 }, { "epoch": 3.530298120544394, "grad_norm": 0.0721912682056427, "learning_rate": 2.609478374364283e-05, "loss": 0.2173, "step": 43578 }, { "epoch": 3.5303791315618924, "grad_norm": 0.06779767572879791, "learning_rate": 2.6090283091048202e-05, "loss": 0.1965, "step": 43579 }, { "epoch": 3.5304601425793907, "grad_norm": 0.08945897966623306, "learning_rate": 2.6085782438453575e-05, "loss": 0.246, "step": 43580 }, { "epoch": 3.5305411535968894, "grad_norm": 0.054115235805511475, "learning_rate": 2.6081281785858952e-05, "loss": 0.2269, "step": 43581 }, { "epoch": 3.5306221646143876, "grad_norm": 0.06920191645622253, "learning_rate": 2.6076781133264323e-05, "loss": 0.2241, "step": 43582 }, { "epoch": 3.530703175631886, "grad_norm": 0.06602834910154343, "learning_rate": 2.6072280480669696e-05, "loss": 0.25, "step": 43583 }, { "epoch": 3.5307841866493845, "grad_norm": 0.07330073416233063, "learning_rate": 2.6067779828075073e-05, "loss": 0.2399, "step": 43584 }, { "epoch": 3.530865197666883, "grad_norm": 0.07389242202043533, "learning_rate": 2.6063279175480447e-05, "loss": 0.2061, "step": 43585 }, { "epoch": 3.530946208684381, "grad_norm": 0.06531089544296265, "learning_rate": 2.6058778522885817e-05, "loss": 0.2389, "step": 43586 }, { "epoch": 3.5310272197018797, "grad_norm": 0.06592812389135361, "learning_rate": 2.6054277870291194e-05, "loss": 0.2346, "step": 43587 }, { "epoch": 3.531108230719378, "grad_norm": 0.08015481382608414, "learning_rate": 2.604977721769657e-05, "loss": 0.1921, "step": 43588 }, { "epoch": 3.531189241736876, "grad_norm": 0.0655452087521553, "learning_rate": 2.6045276565101938e-05, "loss": 0.2252, "step": 43589 }, { "epoch": 3.5312702527543745, "grad_norm": 0.06559097021818161, "learning_rate": 2.6040775912507315e-05, "loss": 0.2479, "step": 43590 }, { "epoch": 3.531351263771873, "grad_norm": 0.07290159165859222, "learning_rate": 2.6036275259912692e-05, "loss": 0.2125, "step": 43591 }, { "epoch": 3.5314322747893714, "grad_norm": 0.08643457293510437, "learning_rate": 2.603177460731806e-05, "loss": 0.1838, "step": 43592 }, { "epoch": 3.5315132858068696, "grad_norm": 0.08853253722190857, "learning_rate": 2.6027273954723436e-05, "loss": 0.2626, "step": 43593 }, { "epoch": 3.531594296824368, "grad_norm": 0.07214788347482681, "learning_rate": 2.6022773302128813e-05, "loss": 0.2144, "step": 43594 }, { "epoch": 3.5316753078418666, "grad_norm": 0.07845582067966461, "learning_rate": 2.601827264953418e-05, "loss": 0.2285, "step": 43595 }, { "epoch": 3.531756318859365, "grad_norm": 0.08354339003562927, "learning_rate": 2.6013771996939556e-05, "loss": 0.2341, "step": 43596 }, { "epoch": 3.531837329876863, "grad_norm": 0.07601010799407959, "learning_rate": 2.6009271344344933e-05, "loss": 0.224, "step": 43597 }, { "epoch": 3.5319183408943617, "grad_norm": 0.07792460918426514, "learning_rate": 2.6004770691750307e-05, "loss": 0.2374, "step": 43598 }, { "epoch": 3.53199935191186, "grad_norm": 0.07806096225976944, "learning_rate": 2.6000270039155677e-05, "loss": 0.2141, "step": 43599 }, { "epoch": 3.5320803629293582, "grad_norm": 0.07231029868125916, "learning_rate": 2.5995769386561054e-05, "loss": 0.2236, "step": 43600 }, { "epoch": 3.532161373946857, "grad_norm": 0.07012427598237991, "learning_rate": 2.5991268733966428e-05, "loss": 0.2853, "step": 43601 }, { "epoch": 3.532242384964355, "grad_norm": 0.06293465197086334, "learning_rate": 2.5986768081371798e-05, "loss": 0.2437, "step": 43602 }, { "epoch": 3.5323233959818534, "grad_norm": 0.057620417326688766, "learning_rate": 2.5982267428777175e-05, "loss": 0.1869, "step": 43603 }, { "epoch": 3.532404406999352, "grad_norm": 0.08230112493038177, "learning_rate": 2.597776677618255e-05, "loss": 0.2458, "step": 43604 }, { "epoch": 3.5324854180168503, "grad_norm": 0.0726366639137268, "learning_rate": 2.597326612358792e-05, "loss": 0.2376, "step": 43605 }, { "epoch": 3.5325664290343486, "grad_norm": 0.07008222490549088, "learning_rate": 2.5968765470993296e-05, "loss": 0.2074, "step": 43606 }, { "epoch": 3.5326474400518473, "grad_norm": 0.05460607632994652, "learning_rate": 2.596426481839867e-05, "loss": 0.2169, "step": 43607 }, { "epoch": 3.5327284510693455, "grad_norm": 0.07533252239227295, "learning_rate": 2.595976416580404e-05, "loss": 0.2299, "step": 43608 }, { "epoch": 3.5328094620868438, "grad_norm": 0.07314232736825943, "learning_rate": 2.5955263513209417e-05, "loss": 0.2462, "step": 43609 }, { "epoch": 3.5328904731043425, "grad_norm": 0.08383066952228546, "learning_rate": 2.595076286061479e-05, "loss": 0.2775, "step": 43610 }, { "epoch": 3.5329714841218407, "grad_norm": 0.05928299203515053, "learning_rate": 2.5946262208020167e-05, "loss": 0.2412, "step": 43611 }, { "epoch": 3.533052495139339, "grad_norm": 0.09445102512836456, "learning_rate": 2.5941761555425537e-05, "loss": 0.2609, "step": 43612 }, { "epoch": 3.533133506156837, "grad_norm": 0.0719989463686943, "learning_rate": 2.593726090283091e-05, "loss": 0.2429, "step": 43613 }, { "epoch": 3.5332145171743354, "grad_norm": 0.07715129107236862, "learning_rate": 2.5932760250236288e-05, "loss": 0.251, "step": 43614 }, { "epoch": 3.533295528191834, "grad_norm": 0.06272150576114655, "learning_rate": 2.5928259597641658e-05, "loss": 0.2152, "step": 43615 }, { "epoch": 3.5333765392093324, "grad_norm": 0.06173498556017876, "learning_rate": 2.5923758945047032e-05, "loss": 0.2567, "step": 43616 }, { "epoch": 3.5334575502268306, "grad_norm": 0.060172874480485916, "learning_rate": 2.591925829245241e-05, "loss": 0.2426, "step": 43617 }, { "epoch": 3.5335385612443293, "grad_norm": 0.06284452229738235, "learning_rate": 2.591475763985778e-05, "loss": 0.1975, "step": 43618 }, { "epoch": 3.5336195722618275, "grad_norm": 0.07622663676738739, "learning_rate": 2.5910256987263153e-05, "loss": 0.2322, "step": 43619 }, { "epoch": 3.533700583279326, "grad_norm": 0.0806276723742485, "learning_rate": 2.590575633466853e-05, "loss": 0.2461, "step": 43620 }, { "epoch": 3.5337815942968245, "grad_norm": 0.07501956820487976, "learning_rate": 2.59012556820739e-05, "loss": 0.2145, "step": 43621 }, { "epoch": 3.5338626053143227, "grad_norm": 0.06754549592733383, "learning_rate": 2.5896755029479273e-05, "loss": 0.2066, "step": 43622 }, { "epoch": 3.533943616331821, "grad_norm": 0.06591545790433884, "learning_rate": 2.589225437688465e-05, "loss": 0.2396, "step": 43623 }, { "epoch": 3.5340246273493197, "grad_norm": 0.0630035549402237, "learning_rate": 2.5887753724290027e-05, "loss": 0.2335, "step": 43624 }, { "epoch": 3.534105638366818, "grad_norm": 0.06425105035305023, "learning_rate": 2.5883253071695394e-05, "loss": 0.2341, "step": 43625 }, { "epoch": 3.534186649384316, "grad_norm": 0.0694139301776886, "learning_rate": 2.587875241910077e-05, "loss": 0.2123, "step": 43626 }, { "epoch": 3.534267660401815, "grad_norm": 0.09128300100564957, "learning_rate": 2.5874251766506148e-05, "loss": 0.2328, "step": 43627 }, { "epoch": 3.534348671419313, "grad_norm": 0.0733460932970047, "learning_rate": 2.5869751113911515e-05, "loss": 0.2387, "step": 43628 }, { "epoch": 3.5344296824368113, "grad_norm": 0.06711853295564651, "learning_rate": 2.5865250461316892e-05, "loss": 0.2522, "step": 43629 }, { "epoch": 3.53451069345431, "grad_norm": 0.058701638132333755, "learning_rate": 2.586074980872227e-05, "loss": 0.1884, "step": 43630 }, { "epoch": 3.5345917044718083, "grad_norm": 0.07320652157068253, "learning_rate": 2.585624915612764e-05, "loss": 0.2047, "step": 43631 }, { "epoch": 3.5346727154893065, "grad_norm": 0.05535358190536499, "learning_rate": 2.5851748503533013e-05, "loss": 0.1983, "step": 43632 }, { "epoch": 3.534753726506805, "grad_norm": 0.08350860327482224, "learning_rate": 2.584724785093839e-05, "loss": 0.2057, "step": 43633 }, { "epoch": 3.5348347375243034, "grad_norm": 0.0721396878361702, "learning_rate": 2.584274719834376e-05, "loss": 0.2156, "step": 43634 }, { "epoch": 3.5349157485418017, "grad_norm": 0.07066772878170013, "learning_rate": 2.5838246545749134e-05, "loss": 0.2474, "step": 43635 }, { "epoch": 3.5349967595593, "grad_norm": 0.07263556122779846, "learning_rate": 2.583374589315451e-05, "loss": 0.2433, "step": 43636 }, { "epoch": 3.535077770576798, "grad_norm": 0.07293055951595306, "learning_rate": 2.5829245240559884e-05, "loss": 0.2511, "step": 43637 }, { "epoch": 3.535158781594297, "grad_norm": 0.06933064013719559, "learning_rate": 2.5824744587965255e-05, "loss": 0.2128, "step": 43638 }, { "epoch": 3.535239792611795, "grad_norm": 0.08220583200454712, "learning_rate": 2.582024393537063e-05, "loss": 0.2472, "step": 43639 }, { "epoch": 3.5353208036292934, "grad_norm": 0.06579196453094482, "learning_rate": 2.5815743282776005e-05, "loss": 0.2268, "step": 43640 }, { "epoch": 3.535401814646792, "grad_norm": 0.05709078907966614, "learning_rate": 2.5811242630181375e-05, "loss": 0.2105, "step": 43641 }, { "epoch": 3.5354828256642903, "grad_norm": 0.07426831126213074, "learning_rate": 2.5806741977586752e-05, "loss": 0.2047, "step": 43642 }, { "epoch": 3.5355638366817885, "grad_norm": 0.06896337866783142, "learning_rate": 2.5802241324992126e-05, "loss": 0.1821, "step": 43643 }, { "epoch": 3.535644847699287, "grad_norm": 0.061936162412166595, "learning_rate": 2.5797740672397496e-05, "loss": 0.2188, "step": 43644 }, { "epoch": 3.5357258587167855, "grad_norm": 0.07506543397903442, "learning_rate": 2.5793240019802873e-05, "loss": 0.2282, "step": 43645 }, { "epoch": 3.5358068697342837, "grad_norm": 0.07365099340677261, "learning_rate": 2.5788739367208247e-05, "loss": 0.2455, "step": 43646 }, { "epoch": 3.5358878807517824, "grad_norm": 0.07197272032499313, "learning_rate": 2.5784238714613617e-05, "loss": 0.2295, "step": 43647 }, { "epoch": 3.5359688917692806, "grad_norm": 0.07811829447746277, "learning_rate": 2.5779738062018994e-05, "loss": 0.2401, "step": 43648 }, { "epoch": 3.536049902786779, "grad_norm": 0.07260419428348541, "learning_rate": 2.5775237409424368e-05, "loss": 0.2405, "step": 43649 }, { "epoch": 3.5361309138042776, "grad_norm": 0.05985453352332115, "learning_rate": 2.5770736756829745e-05, "loss": 0.2362, "step": 43650 }, { "epoch": 3.536211924821776, "grad_norm": 0.07213029265403748, "learning_rate": 2.5766236104235115e-05, "loss": 0.1907, "step": 43651 }, { "epoch": 3.536292935839274, "grad_norm": 0.08837267756462097, "learning_rate": 2.576173545164049e-05, "loss": 0.2326, "step": 43652 }, { "epoch": 3.5363739468567728, "grad_norm": 0.09892171621322632, "learning_rate": 2.5757234799045865e-05, "loss": 0.2106, "step": 43653 }, { "epoch": 3.536454957874271, "grad_norm": 0.07501991838216782, "learning_rate": 2.5752734146451236e-05, "loss": 0.2417, "step": 43654 }, { "epoch": 3.5365359688917692, "grad_norm": 0.09497696161270142, "learning_rate": 2.574823349385661e-05, "loss": 0.2545, "step": 43655 }, { "epoch": 3.536616979909268, "grad_norm": 0.06474223732948303, "learning_rate": 2.5743732841261986e-05, "loss": 0.2293, "step": 43656 }, { "epoch": 3.536697990926766, "grad_norm": 0.0834692195057869, "learning_rate": 2.5739232188667356e-05, "loss": 0.229, "step": 43657 }, { "epoch": 3.5367790019442644, "grad_norm": 0.0605698861181736, "learning_rate": 2.573473153607273e-05, "loss": 0.1839, "step": 43658 }, { "epoch": 3.5368600129617627, "grad_norm": 0.06913433223962784, "learning_rate": 2.5730230883478107e-05, "loss": 0.2196, "step": 43659 }, { "epoch": 3.536941023979261, "grad_norm": 0.067677341401577, "learning_rate": 2.5725730230883477e-05, "loss": 0.2393, "step": 43660 }, { "epoch": 3.5370220349967596, "grad_norm": 0.06225701421499252, "learning_rate": 2.572122957828885e-05, "loss": 0.2052, "step": 43661 }, { "epoch": 3.537103046014258, "grad_norm": 0.07869463413953781, "learning_rate": 2.5716728925694228e-05, "loss": 0.2149, "step": 43662 }, { "epoch": 3.537184057031756, "grad_norm": 0.08113308250904083, "learning_rate": 2.5712228273099605e-05, "loss": 0.2209, "step": 43663 }, { "epoch": 3.537265068049255, "grad_norm": 0.06685000658035278, "learning_rate": 2.5707727620504975e-05, "loss": 0.2358, "step": 43664 }, { "epoch": 3.537346079066753, "grad_norm": 0.06469444185495377, "learning_rate": 2.570322696791035e-05, "loss": 0.1878, "step": 43665 }, { "epoch": 3.5374270900842513, "grad_norm": 0.07345057278871536, "learning_rate": 2.5698726315315726e-05, "loss": 0.2159, "step": 43666 }, { "epoch": 3.53750810110175, "grad_norm": 0.07360540330410004, "learning_rate": 2.5694225662721096e-05, "loss": 0.2452, "step": 43667 }, { "epoch": 3.537589112119248, "grad_norm": 0.07926061004400253, "learning_rate": 2.568972501012647e-05, "loss": 0.2357, "step": 43668 }, { "epoch": 3.5376701231367464, "grad_norm": 0.07265284657478333, "learning_rate": 2.5685224357531846e-05, "loss": 0.2303, "step": 43669 }, { "epoch": 3.537751134154245, "grad_norm": 0.06345818936824799, "learning_rate": 2.5680723704937217e-05, "loss": 0.2334, "step": 43670 }, { "epoch": 3.5378321451717434, "grad_norm": 0.08022277802228928, "learning_rate": 2.567622305234259e-05, "loss": 0.2569, "step": 43671 }, { "epoch": 3.5379131561892416, "grad_norm": 0.0716053694486618, "learning_rate": 2.5671722399747967e-05, "loss": 0.2109, "step": 43672 }, { "epoch": 3.5379941672067403, "grad_norm": 0.0784161239862442, "learning_rate": 2.5667221747153337e-05, "loss": 0.2048, "step": 43673 }, { "epoch": 3.5380751782242386, "grad_norm": 0.07868270576000214, "learning_rate": 2.566272109455871e-05, "loss": 0.2192, "step": 43674 }, { "epoch": 3.538156189241737, "grad_norm": 0.06503627449274063, "learning_rate": 2.5658220441964088e-05, "loss": 0.2216, "step": 43675 }, { "epoch": 3.5382372002592355, "grad_norm": 0.06809493899345398, "learning_rate": 2.565371978936946e-05, "loss": 0.2062, "step": 43676 }, { "epoch": 3.5383182112767337, "grad_norm": 0.06095467135310173, "learning_rate": 2.5649219136774832e-05, "loss": 0.1965, "step": 43677 }, { "epoch": 3.538399222294232, "grad_norm": 0.08310698717832565, "learning_rate": 2.564471848418021e-05, "loss": 0.2011, "step": 43678 }, { "epoch": 3.5384802333117307, "grad_norm": 0.06410634517669678, "learning_rate": 2.5640217831585582e-05, "loss": 0.2179, "step": 43679 }, { "epoch": 3.538561244329229, "grad_norm": 0.07264720648527145, "learning_rate": 2.5635717178990953e-05, "loss": 0.2191, "step": 43680 }, { "epoch": 3.538642255346727, "grad_norm": 0.06960725039243698, "learning_rate": 2.563121652639633e-05, "loss": 0.1936, "step": 43681 }, { "epoch": 3.5387232663642254, "grad_norm": 0.06291031092405319, "learning_rate": 2.5626715873801703e-05, "loss": 0.1727, "step": 43682 }, { "epoch": 3.5388042773817237, "grad_norm": 0.07345648854970932, "learning_rate": 2.5622215221207073e-05, "loss": 0.2459, "step": 43683 }, { "epoch": 3.5388852883992223, "grad_norm": 0.05809454992413521, "learning_rate": 2.561771456861245e-05, "loss": 0.217, "step": 43684 }, { "epoch": 3.5389662994167206, "grad_norm": 0.06693921983242035, "learning_rate": 2.5613213916017824e-05, "loss": 0.2741, "step": 43685 }, { "epoch": 3.539047310434219, "grad_norm": 0.07190316915512085, "learning_rate": 2.5608713263423194e-05, "loss": 0.2191, "step": 43686 }, { "epoch": 3.5391283214517175, "grad_norm": 0.06388852745294571, "learning_rate": 2.560421261082857e-05, "loss": 0.2488, "step": 43687 }, { "epoch": 3.5392093324692158, "grad_norm": 0.07432588189840317, "learning_rate": 2.5599711958233945e-05, "loss": 0.192, "step": 43688 }, { "epoch": 3.539290343486714, "grad_norm": 0.07199820131063461, "learning_rate": 2.5595211305639322e-05, "loss": 0.2664, "step": 43689 }, { "epoch": 3.5393713545042127, "grad_norm": 0.07324870675802231, "learning_rate": 2.5590710653044692e-05, "loss": 0.2255, "step": 43690 }, { "epoch": 3.539452365521711, "grad_norm": 0.06241829693317413, "learning_rate": 2.5586210000450066e-05, "loss": 0.1873, "step": 43691 }, { "epoch": 3.539533376539209, "grad_norm": 0.06450480967760086, "learning_rate": 2.5581709347855443e-05, "loss": 0.2215, "step": 43692 }, { "epoch": 3.539614387556708, "grad_norm": 0.09523190557956696, "learning_rate": 2.5577208695260813e-05, "loss": 0.1804, "step": 43693 }, { "epoch": 3.539695398574206, "grad_norm": 0.07226721197366714, "learning_rate": 2.5572708042666186e-05, "loss": 0.1974, "step": 43694 }, { "epoch": 3.5397764095917044, "grad_norm": 0.07698127627372742, "learning_rate": 2.5568207390071563e-05, "loss": 0.2496, "step": 43695 }, { "epoch": 3.539857420609203, "grad_norm": 0.09337909519672394, "learning_rate": 2.5563706737476934e-05, "loss": 0.2504, "step": 43696 }, { "epoch": 3.5399384316267013, "grad_norm": 0.06675826013088226, "learning_rate": 2.5559206084882307e-05, "loss": 0.2234, "step": 43697 }, { "epoch": 3.5400194426441995, "grad_norm": 0.07172457128763199, "learning_rate": 2.5554705432287684e-05, "loss": 0.2552, "step": 43698 }, { "epoch": 3.5401004536616982, "grad_norm": 0.06760606169700623, "learning_rate": 2.5550204779693054e-05, "loss": 0.1864, "step": 43699 }, { "epoch": 3.5401814646791965, "grad_norm": 0.07144549489021301, "learning_rate": 2.554570412709843e-05, "loss": 0.2419, "step": 43700 }, { "epoch": 3.5402624756966947, "grad_norm": 0.08824021369218826, "learning_rate": 2.5541203474503805e-05, "loss": 0.2427, "step": 43701 }, { "epoch": 3.540343486714193, "grad_norm": 0.07417906075716019, "learning_rate": 2.5536702821909175e-05, "loss": 0.2503, "step": 43702 }, { "epoch": 3.5404244977316917, "grad_norm": 0.06739521026611328, "learning_rate": 2.5532202169314552e-05, "loss": 0.2274, "step": 43703 }, { "epoch": 3.54050550874919, "grad_norm": 0.08891397714614868, "learning_rate": 2.5527701516719926e-05, "loss": 0.2285, "step": 43704 }, { "epoch": 3.540586519766688, "grad_norm": 0.060347575694322586, "learning_rate": 2.5523200864125303e-05, "loss": 0.1862, "step": 43705 }, { "epoch": 3.5406675307841864, "grad_norm": 0.06603474169969559, "learning_rate": 2.5518700211530673e-05, "loss": 0.249, "step": 43706 }, { "epoch": 3.540748541801685, "grad_norm": 0.08578146994113922, "learning_rate": 2.5514199558936047e-05, "loss": 0.2211, "step": 43707 }, { "epoch": 3.5408295528191833, "grad_norm": 0.0664646178483963, "learning_rate": 2.5509698906341424e-05, "loss": 0.2244, "step": 43708 }, { "epoch": 3.5409105638366816, "grad_norm": 0.06386777758598328, "learning_rate": 2.5505198253746794e-05, "loss": 0.2435, "step": 43709 }, { "epoch": 3.5409915748541803, "grad_norm": 0.06778024137020111, "learning_rate": 2.5500697601152168e-05, "loss": 0.245, "step": 43710 }, { "epoch": 3.5410725858716785, "grad_norm": 0.07709016650915146, "learning_rate": 2.5496196948557544e-05, "loss": 0.2463, "step": 43711 }, { "epoch": 3.5411535968891767, "grad_norm": 0.06232065334916115, "learning_rate": 2.5491696295962915e-05, "loss": 0.2052, "step": 43712 }, { "epoch": 3.5412346079066754, "grad_norm": 0.07343726605176926, "learning_rate": 2.548719564336829e-05, "loss": 0.2242, "step": 43713 }, { "epoch": 3.5413156189241737, "grad_norm": 0.07775748521089554, "learning_rate": 2.5482694990773665e-05, "loss": 0.2272, "step": 43714 }, { "epoch": 3.541396629941672, "grad_norm": 0.08513243496417999, "learning_rate": 2.5478194338179036e-05, "loss": 0.2375, "step": 43715 }, { "epoch": 3.5414776409591706, "grad_norm": 0.07626286894083023, "learning_rate": 2.547369368558441e-05, "loss": 0.2435, "step": 43716 }, { "epoch": 3.541558651976669, "grad_norm": 0.07494010031223297, "learning_rate": 2.5469193032989786e-05, "loss": 0.2179, "step": 43717 }, { "epoch": 3.541639662994167, "grad_norm": 0.07805442065000534, "learning_rate": 2.546469238039516e-05, "loss": 0.2086, "step": 43718 }, { "epoch": 3.541720674011666, "grad_norm": 0.08477230370044708, "learning_rate": 2.546019172780053e-05, "loss": 0.2189, "step": 43719 }, { "epoch": 3.541801685029164, "grad_norm": 0.06619860976934433, "learning_rate": 2.5455691075205907e-05, "loss": 0.2509, "step": 43720 }, { "epoch": 3.5418826960466623, "grad_norm": 0.09968457370996475, "learning_rate": 2.545119042261128e-05, "loss": 0.2586, "step": 43721 }, { "epoch": 3.541963707064161, "grad_norm": 0.07434304803609848, "learning_rate": 2.544668977001665e-05, "loss": 0.2438, "step": 43722 }, { "epoch": 3.542044718081659, "grad_norm": 0.08019180595874786, "learning_rate": 2.5442189117422028e-05, "loss": 0.2362, "step": 43723 }, { "epoch": 3.5421257290991575, "grad_norm": 0.06282759457826614, "learning_rate": 2.54376884648274e-05, "loss": 0.2246, "step": 43724 }, { "epoch": 3.5422067401166557, "grad_norm": 0.07077057659626007, "learning_rate": 2.543318781223277e-05, "loss": 0.2214, "step": 43725 }, { "epoch": 3.5422877511341544, "grad_norm": 0.07170132547616959, "learning_rate": 2.542868715963815e-05, "loss": 0.1954, "step": 43726 }, { "epoch": 3.5423687621516526, "grad_norm": 0.07327235490083694, "learning_rate": 2.5424186507043522e-05, "loss": 0.2473, "step": 43727 }, { "epoch": 3.542449773169151, "grad_norm": 0.07679017633199692, "learning_rate": 2.5419685854448892e-05, "loss": 0.2075, "step": 43728 }, { "epoch": 3.542530784186649, "grad_norm": 0.07175929844379425, "learning_rate": 2.541518520185427e-05, "loss": 0.2231, "step": 43729 }, { "epoch": 3.542611795204148, "grad_norm": 0.07727406173944473, "learning_rate": 2.5410684549259643e-05, "loss": 0.233, "step": 43730 }, { "epoch": 3.542692806221646, "grad_norm": 0.08105980604887009, "learning_rate": 2.540618389666502e-05, "loss": 0.2394, "step": 43731 }, { "epoch": 3.5427738172391443, "grad_norm": 0.0716603472828865, "learning_rate": 2.540168324407039e-05, "loss": 0.2573, "step": 43732 }, { "epoch": 3.542854828256643, "grad_norm": 0.07205747067928314, "learning_rate": 2.5397182591475767e-05, "loss": 0.2198, "step": 43733 }, { "epoch": 3.5429358392741412, "grad_norm": 0.07168762385845184, "learning_rate": 2.539268193888114e-05, "loss": 0.2072, "step": 43734 }, { "epoch": 3.5430168502916395, "grad_norm": 0.07003463804721832, "learning_rate": 2.538818128628651e-05, "loss": 0.2305, "step": 43735 }, { "epoch": 3.543097861309138, "grad_norm": 0.06664938479661942, "learning_rate": 2.5383680633691888e-05, "loss": 0.1891, "step": 43736 }, { "epoch": 3.5431788723266364, "grad_norm": 0.0784919485449791, "learning_rate": 2.537917998109726e-05, "loss": 0.2537, "step": 43737 }, { "epoch": 3.5432598833441347, "grad_norm": 0.060001879930496216, "learning_rate": 2.5374679328502632e-05, "loss": 0.212, "step": 43738 }, { "epoch": 3.5433408943616334, "grad_norm": 0.0571327805519104, "learning_rate": 2.537017867590801e-05, "loss": 0.2013, "step": 43739 }, { "epoch": 3.5434219053791316, "grad_norm": 0.06848404556512833, "learning_rate": 2.5365678023313382e-05, "loss": 0.2151, "step": 43740 }, { "epoch": 3.54350291639663, "grad_norm": 0.0731678456068039, "learning_rate": 2.5361177370718753e-05, "loss": 0.243, "step": 43741 }, { "epoch": 3.5435839274141285, "grad_norm": 0.06608740985393524, "learning_rate": 2.535667671812413e-05, "loss": 0.2395, "step": 43742 }, { "epoch": 3.5436649384316268, "grad_norm": 0.07724490016698837, "learning_rate": 2.5352176065529503e-05, "loss": 0.2537, "step": 43743 }, { "epoch": 3.543745949449125, "grad_norm": 0.07840392738580704, "learning_rate": 2.534767541293488e-05, "loss": 0.2642, "step": 43744 }, { "epoch": 3.5438269604666237, "grad_norm": 0.06792247295379639, "learning_rate": 2.534317476034025e-05, "loss": 0.2715, "step": 43745 }, { "epoch": 3.543907971484122, "grad_norm": 0.08233068883419037, "learning_rate": 2.5338674107745624e-05, "loss": 0.3045, "step": 43746 }, { "epoch": 3.54398898250162, "grad_norm": 0.06983482837677002, "learning_rate": 2.5334173455151e-05, "loss": 0.2062, "step": 43747 }, { "epoch": 3.5440699935191184, "grad_norm": 0.07004276663064957, "learning_rate": 2.532967280255637e-05, "loss": 0.2199, "step": 43748 }, { "epoch": 3.544151004536617, "grad_norm": 0.0649484321475029, "learning_rate": 2.5325172149961745e-05, "loss": 0.1992, "step": 43749 }, { "epoch": 3.5442320155541154, "grad_norm": 0.06359495967626572, "learning_rate": 2.5320671497367122e-05, "loss": 0.207, "step": 43750 }, { "epoch": 3.5443130265716136, "grad_norm": 0.07534097880125046, "learning_rate": 2.5316170844772492e-05, "loss": 0.2242, "step": 43751 }, { "epoch": 3.544394037589112, "grad_norm": 0.07306455075740814, "learning_rate": 2.5311670192177866e-05, "loss": 0.2349, "step": 43752 }, { "epoch": 3.5444750486066106, "grad_norm": 0.07966720312833786, "learning_rate": 2.5307169539583243e-05, "loss": 0.2856, "step": 43753 }, { "epoch": 3.544556059624109, "grad_norm": 0.06478086858987808, "learning_rate": 2.5302668886988613e-05, "loss": 0.2692, "step": 43754 }, { "epoch": 3.544637070641607, "grad_norm": 0.06660716980695724, "learning_rate": 2.5298168234393986e-05, "loss": 0.2397, "step": 43755 }, { "epoch": 3.5447180816591057, "grad_norm": 0.06570249795913696, "learning_rate": 2.5293667581799363e-05, "loss": 0.2331, "step": 43756 }, { "epoch": 3.544799092676604, "grad_norm": 0.07750461995601654, "learning_rate": 2.5289166929204737e-05, "loss": 0.2162, "step": 43757 }, { "epoch": 3.5448801036941022, "grad_norm": 0.07073743641376495, "learning_rate": 2.5284666276610107e-05, "loss": 0.2553, "step": 43758 }, { "epoch": 3.544961114711601, "grad_norm": 0.09300247579813004, "learning_rate": 2.5280165624015484e-05, "loss": 0.2662, "step": 43759 }, { "epoch": 3.545042125729099, "grad_norm": 0.07046713680028915, "learning_rate": 2.5275664971420858e-05, "loss": 0.2389, "step": 43760 }, { "epoch": 3.5451231367465974, "grad_norm": 0.05945826694369316, "learning_rate": 2.5271164318826228e-05, "loss": 0.2003, "step": 43761 }, { "epoch": 3.545204147764096, "grad_norm": 0.052805542945861816, "learning_rate": 2.5266663666231605e-05, "loss": 0.2011, "step": 43762 }, { "epoch": 3.5452851587815943, "grad_norm": 0.07712626457214355, "learning_rate": 2.526216301363698e-05, "loss": 0.2103, "step": 43763 }, { "epoch": 3.5453661697990926, "grad_norm": 0.0709281861782074, "learning_rate": 2.525766236104235e-05, "loss": 0.2384, "step": 43764 }, { "epoch": 3.5454471808165913, "grad_norm": 0.06912218034267426, "learning_rate": 2.5253161708447726e-05, "loss": 0.2296, "step": 43765 }, { "epoch": 3.5455281918340895, "grad_norm": 0.056670188903808594, "learning_rate": 2.5248661055853103e-05, "loss": 0.2324, "step": 43766 }, { "epoch": 3.5456092028515878, "grad_norm": 0.058022577315568924, "learning_rate": 2.524416040325847e-05, "loss": 0.2044, "step": 43767 }, { "epoch": 3.5456902138690864, "grad_norm": 0.0642026960849762, "learning_rate": 2.5239659750663847e-05, "loss": 0.248, "step": 43768 }, { "epoch": 3.5457712248865847, "grad_norm": 0.07394513487815857, "learning_rate": 2.5235159098069224e-05, "loss": 0.2265, "step": 43769 }, { "epoch": 3.545852235904083, "grad_norm": 0.07777998596429825, "learning_rate": 2.5230658445474597e-05, "loss": 0.214, "step": 43770 }, { "epoch": 3.545933246921581, "grad_norm": 0.06933100521564484, "learning_rate": 2.5226157792879967e-05, "loss": 0.243, "step": 43771 }, { "epoch": 3.54601425793908, "grad_norm": 0.07766921818256378, "learning_rate": 2.5221657140285344e-05, "loss": 0.2295, "step": 43772 }, { "epoch": 3.546095268956578, "grad_norm": 0.06734801083803177, "learning_rate": 2.5217156487690718e-05, "loss": 0.2374, "step": 43773 }, { "epoch": 3.5461762799740764, "grad_norm": 0.060280006378889084, "learning_rate": 2.5212655835096088e-05, "loss": 0.1809, "step": 43774 }, { "epoch": 3.5462572909915746, "grad_norm": 0.07323633134365082, "learning_rate": 2.5208155182501465e-05, "loss": 0.2311, "step": 43775 }, { "epoch": 3.5463383020090733, "grad_norm": 0.07583745568990707, "learning_rate": 2.520365452990684e-05, "loss": 0.2303, "step": 43776 }, { "epoch": 3.5464193130265715, "grad_norm": 0.04876444861292839, "learning_rate": 2.519915387731221e-05, "loss": 0.1779, "step": 43777 }, { "epoch": 3.54650032404407, "grad_norm": 0.06728420406579971, "learning_rate": 2.5194653224717586e-05, "loss": 0.2362, "step": 43778 }, { "epoch": 3.5465813350615685, "grad_norm": 0.07322170585393906, "learning_rate": 2.519015257212296e-05, "loss": 0.2153, "step": 43779 }, { "epoch": 3.5466623460790667, "grad_norm": 0.08688737452030182, "learning_rate": 2.518565191952833e-05, "loss": 0.2691, "step": 43780 }, { "epoch": 3.546743357096565, "grad_norm": 0.060829900205135345, "learning_rate": 2.5181151266933707e-05, "loss": 0.2532, "step": 43781 }, { "epoch": 3.5468243681140637, "grad_norm": 0.07676567882299423, "learning_rate": 2.517665061433908e-05, "loss": 0.246, "step": 43782 }, { "epoch": 3.546905379131562, "grad_norm": 0.062333088368177414, "learning_rate": 2.5172149961744457e-05, "loss": 0.2144, "step": 43783 }, { "epoch": 3.54698639014906, "grad_norm": 0.07637736201286316, "learning_rate": 2.5167649309149828e-05, "loss": 0.3052, "step": 43784 }, { "epoch": 3.547067401166559, "grad_norm": 0.1071089506149292, "learning_rate": 2.51631486565552e-05, "loss": 0.2472, "step": 43785 }, { "epoch": 3.547148412184057, "grad_norm": 0.07975732535123825, "learning_rate": 2.5158648003960578e-05, "loss": 0.2158, "step": 43786 }, { "epoch": 3.5472294232015553, "grad_norm": 0.08271283656358719, "learning_rate": 2.515414735136595e-05, "loss": 0.2287, "step": 43787 }, { "epoch": 3.547310434219054, "grad_norm": 0.08874785155057907, "learning_rate": 2.5149646698771322e-05, "loss": 0.2349, "step": 43788 }, { "epoch": 3.5473914452365523, "grad_norm": 0.06711436808109283, "learning_rate": 2.51451460461767e-05, "loss": 0.2569, "step": 43789 }, { "epoch": 3.5474724562540505, "grad_norm": 0.06952887773513794, "learning_rate": 2.514064539358207e-05, "loss": 0.2033, "step": 43790 }, { "epoch": 3.547553467271549, "grad_norm": 0.08235787600278854, "learning_rate": 2.5136144740987443e-05, "loss": 0.2402, "step": 43791 }, { "epoch": 3.5476344782890474, "grad_norm": 0.09327933937311172, "learning_rate": 2.513164408839282e-05, "loss": 0.2153, "step": 43792 }, { "epoch": 3.5477154893065457, "grad_norm": 0.06308753788471222, "learning_rate": 2.512714343579819e-05, "loss": 0.2094, "step": 43793 }, { "epoch": 3.547796500324044, "grad_norm": 0.083448126912117, "learning_rate": 2.5122642783203564e-05, "loss": 0.2456, "step": 43794 }, { "epoch": 3.5478775113415426, "grad_norm": 0.09958131611347198, "learning_rate": 2.511814213060894e-05, "loss": 0.2039, "step": 43795 }, { "epoch": 3.547958522359041, "grad_norm": 0.06535204499959946, "learning_rate": 2.5113641478014314e-05, "loss": 0.2398, "step": 43796 }, { "epoch": 3.548039533376539, "grad_norm": 0.08059913665056229, "learning_rate": 2.5109140825419685e-05, "loss": 0.1909, "step": 43797 }, { "epoch": 3.5481205443940373, "grad_norm": 0.07172200083732605, "learning_rate": 2.510464017282506e-05, "loss": 0.2171, "step": 43798 }, { "epoch": 3.548201555411536, "grad_norm": 0.07303963601589203, "learning_rate": 2.5100139520230435e-05, "loss": 0.2011, "step": 43799 }, { "epoch": 3.5482825664290343, "grad_norm": 0.06889622658491135, "learning_rate": 2.5095638867635805e-05, "loss": 0.1848, "step": 43800 }, { "epoch": 3.5483635774465325, "grad_norm": 0.06545989215373993, "learning_rate": 2.5091138215041182e-05, "loss": 0.2037, "step": 43801 }, { "epoch": 3.548444588464031, "grad_norm": 0.07705027610063553, "learning_rate": 2.508663756244656e-05, "loss": 0.2038, "step": 43802 }, { "epoch": 3.5485255994815295, "grad_norm": 0.08232953399419785, "learning_rate": 2.5082136909851926e-05, "loss": 0.2164, "step": 43803 }, { "epoch": 3.5486066104990277, "grad_norm": 0.0697472095489502, "learning_rate": 2.5077636257257303e-05, "loss": 0.2141, "step": 43804 }, { "epoch": 3.5486876215165264, "grad_norm": 0.07026585936546326, "learning_rate": 2.507313560466268e-05, "loss": 0.2203, "step": 43805 }, { "epoch": 3.5487686325340246, "grad_norm": 0.06228466331958771, "learning_rate": 2.5068634952068047e-05, "loss": 0.2645, "step": 43806 }, { "epoch": 3.548849643551523, "grad_norm": 0.065281443297863, "learning_rate": 2.5064134299473424e-05, "loss": 0.2255, "step": 43807 }, { "epoch": 3.5489306545690216, "grad_norm": 0.07093493640422821, "learning_rate": 2.50596336468788e-05, "loss": 0.2631, "step": 43808 }, { "epoch": 3.54901166558652, "grad_norm": 0.06182321906089783, "learning_rate": 2.5055132994284175e-05, "loss": 0.2215, "step": 43809 }, { "epoch": 3.549092676604018, "grad_norm": 0.0823003426194191, "learning_rate": 2.5050632341689545e-05, "loss": 0.241, "step": 43810 }, { "epoch": 3.5491736876215167, "grad_norm": 0.07107570767402649, "learning_rate": 2.5046131689094922e-05, "loss": 0.2367, "step": 43811 }, { "epoch": 3.549254698639015, "grad_norm": 0.07341833412647247, "learning_rate": 2.5041631036500295e-05, "loss": 0.2445, "step": 43812 }, { "epoch": 3.5493357096565132, "grad_norm": 0.05725165829062462, "learning_rate": 2.5037130383905666e-05, "loss": 0.2779, "step": 43813 }, { "epoch": 3.549416720674012, "grad_norm": 0.06485152244567871, "learning_rate": 2.5032629731311043e-05, "loss": 0.2184, "step": 43814 }, { "epoch": 3.54949773169151, "grad_norm": 0.07398539036512375, "learning_rate": 2.5028129078716416e-05, "loss": 0.2186, "step": 43815 }, { "epoch": 3.5495787427090084, "grad_norm": 0.06690250337123871, "learning_rate": 2.5023628426121786e-05, "loss": 0.213, "step": 43816 }, { "epoch": 3.5496597537265067, "grad_norm": 0.07127828896045685, "learning_rate": 2.5019127773527163e-05, "loss": 0.2559, "step": 43817 }, { "epoch": 3.5497407647440054, "grad_norm": 0.06582987308502197, "learning_rate": 2.5014627120932537e-05, "loss": 0.221, "step": 43818 }, { "epoch": 3.5498217757615036, "grad_norm": 0.06993681192398071, "learning_rate": 2.5010126468337907e-05, "loss": 0.2363, "step": 43819 }, { "epoch": 3.549902786779002, "grad_norm": 0.08037863671779633, "learning_rate": 2.5005625815743284e-05, "loss": 0.2695, "step": 43820 }, { "epoch": 3.5499837977965, "grad_norm": 0.06816840171813965, "learning_rate": 2.5001125163148658e-05, "loss": 0.2376, "step": 43821 }, { "epoch": 3.5500648088139988, "grad_norm": 0.0628628358244896, "learning_rate": 2.499662451055403e-05, "loss": 0.2551, "step": 43822 }, { "epoch": 3.550145819831497, "grad_norm": 0.060273364186286926, "learning_rate": 2.4992123857959405e-05, "loss": 0.2097, "step": 43823 }, { "epoch": 3.5502268308489953, "grad_norm": 0.06653022766113281, "learning_rate": 2.498762320536478e-05, "loss": 0.2405, "step": 43824 }, { "epoch": 3.550307841866494, "grad_norm": 0.06800541281700134, "learning_rate": 2.4983122552770152e-05, "loss": 0.2408, "step": 43825 }, { "epoch": 3.550388852883992, "grad_norm": 0.07112803310155869, "learning_rate": 2.497862190017553e-05, "loss": 0.2259, "step": 43826 }, { "epoch": 3.5504698639014904, "grad_norm": 0.07241787761449814, "learning_rate": 2.49741212475809e-05, "loss": 0.19, "step": 43827 }, { "epoch": 3.550550874918989, "grad_norm": 0.06978960335254669, "learning_rate": 2.4969620594986273e-05, "loss": 0.2314, "step": 43828 }, { "epoch": 3.5506318859364874, "grad_norm": 0.07811637222766876, "learning_rate": 2.496511994239165e-05, "loss": 0.2636, "step": 43829 }, { "epoch": 3.5507128969539856, "grad_norm": 0.07929955422878265, "learning_rate": 2.496061928979702e-05, "loss": 0.214, "step": 43830 }, { "epoch": 3.5507939079714843, "grad_norm": 0.059971027076244354, "learning_rate": 2.4956118637202394e-05, "loss": 0.1951, "step": 43831 }, { "epoch": 3.5508749189889826, "grad_norm": 0.06560038775205612, "learning_rate": 2.495161798460777e-05, "loss": 0.2191, "step": 43832 }, { "epoch": 3.550955930006481, "grad_norm": 0.06976732611656189, "learning_rate": 2.494711733201314e-05, "loss": 0.2603, "step": 43833 }, { "epoch": 3.5510369410239795, "grad_norm": 0.06167634576559067, "learning_rate": 2.4942616679418518e-05, "loss": 0.2475, "step": 43834 }, { "epoch": 3.5511179520414777, "grad_norm": 0.06971214711666107, "learning_rate": 2.493811602682389e-05, "loss": 0.2262, "step": 43835 }, { "epoch": 3.551198963058976, "grad_norm": 0.07255706936120987, "learning_rate": 2.4933615374229262e-05, "loss": 0.233, "step": 43836 }, { "epoch": 3.5512799740764747, "grad_norm": 0.05798391252756119, "learning_rate": 2.492911472163464e-05, "loss": 0.2406, "step": 43837 }, { "epoch": 3.551360985093973, "grad_norm": 0.09059900790452957, "learning_rate": 2.4924614069040012e-05, "loss": 0.2585, "step": 43838 }, { "epoch": 3.551441996111471, "grad_norm": 0.07024770975112915, "learning_rate": 2.4920113416445386e-05, "loss": 0.2306, "step": 43839 }, { "epoch": 3.5515230071289694, "grad_norm": 0.07047968357801437, "learning_rate": 2.491561276385076e-05, "loss": 0.2022, "step": 43840 }, { "epoch": 3.5516040181464676, "grad_norm": 0.07896172255277634, "learning_rate": 2.4911112111256133e-05, "loss": 0.2118, "step": 43841 }, { "epoch": 3.5516850291639663, "grad_norm": 0.06975747644901276, "learning_rate": 2.4906611458661507e-05, "loss": 0.2167, "step": 43842 }, { "epoch": 3.5517660401814646, "grad_norm": 0.06797535717487335, "learning_rate": 2.490211080606688e-05, "loss": 0.2196, "step": 43843 }, { "epoch": 3.551847051198963, "grad_norm": 0.06500260531902313, "learning_rate": 2.4897610153472254e-05, "loss": 0.2352, "step": 43844 }, { "epoch": 3.5519280622164615, "grad_norm": 0.08648168295621872, "learning_rate": 2.4893109500877628e-05, "loss": 0.2259, "step": 43845 }, { "epoch": 3.5520090732339598, "grad_norm": 0.0645139291882515, "learning_rate": 2.4888608848283e-05, "loss": 0.1921, "step": 43846 }, { "epoch": 3.552090084251458, "grad_norm": 0.07572876662015915, "learning_rate": 2.4884108195688378e-05, "loss": 0.2005, "step": 43847 }, { "epoch": 3.5521710952689567, "grad_norm": 0.07796064019203186, "learning_rate": 2.487960754309375e-05, "loss": 0.2647, "step": 43848 }, { "epoch": 3.552252106286455, "grad_norm": 0.06862769275903702, "learning_rate": 2.4875106890499122e-05, "loss": 0.1952, "step": 43849 }, { "epoch": 3.552333117303953, "grad_norm": 0.06902038305997849, "learning_rate": 2.48706062379045e-05, "loss": 0.239, "step": 43850 }, { "epoch": 3.552414128321452, "grad_norm": 0.06956470757722855, "learning_rate": 2.486610558530987e-05, "loss": 0.2132, "step": 43851 }, { "epoch": 3.55249513933895, "grad_norm": 0.0691932812333107, "learning_rate": 2.4861604932715246e-05, "loss": 0.1995, "step": 43852 }, { "epoch": 3.5525761503564484, "grad_norm": 0.05597129836678505, "learning_rate": 2.485710428012062e-05, "loss": 0.2127, "step": 43853 }, { "epoch": 3.552657161373947, "grad_norm": 0.06926069408655167, "learning_rate": 2.485260362752599e-05, "loss": 0.2278, "step": 43854 }, { "epoch": 3.5527381723914453, "grad_norm": 0.06039774417877197, "learning_rate": 2.4848102974931367e-05, "loss": 0.2404, "step": 43855 }, { "epoch": 3.5528191834089435, "grad_norm": 0.07916494458913803, "learning_rate": 2.484360232233674e-05, "loss": 0.2635, "step": 43856 }, { "epoch": 3.5529001944264422, "grad_norm": 0.06923363357782364, "learning_rate": 2.483910166974211e-05, "loss": 0.2183, "step": 43857 }, { "epoch": 3.5529812054439405, "grad_norm": 0.07305173575878143, "learning_rate": 2.4834601017147488e-05, "loss": 0.2313, "step": 43858 }, { "epoch": 3.5530622164614387, "grad_norm": 0.08665354549884796, "learning_rate": 2.483010036455286e-05, "loss": 0.2055, "step": 43859 }, { "epoch": 3.5531432274789374, "grad_norm": 0.07427648454904556, "learning_rate": 2.4825599711958235e-05, "loss": 0.2022, "step": 43860 }, { "epoch": 3.5532242384964356, "grad_norm": 0.053579483181238174, "learning_rate": 2.482109905936361e-05, "loss": 0.1787, "step": 43861 }, { "epoch": 3.553305249513934, "grad_norm": 0.08276137709617615, "learning_rate": 2.4816598406768982e-05, "loss": 0.2655, "step": 43862 }, { "epoch": 3.553386260531432, "grad_norm": 0.07999289780855179, "learning_rate": 2.4812097754174356e-05, "loss": 0.2256, "step": 43863 }, { "epoch": 3.5534672715489304, "grad_norm": 0.058108363300561905, "learning_rate": 2.480759710157973e-05, "loss": 0.2014, "step": 43864 }, { "epoch": 3.553548282566429, "grad_norm": 0.06204414367675781, "learning_rate": 2.4803096448985107e-05, "loss": 0.2261, "step": 43865 }, { "epoch": 3.5536292935839273, "grad_norm": 0.07165088504552841, "learning_rate": 2.4798595796390477e-05, "loss": 0.2246, "step": 43866 }, { "epoch": 3.5537103046014256, "grad_norm": 0.07575501501560211, "learning_rate": 2.479409514379585e-05, "loss": 0.2191, "step": 43867 }, { "epoch": 3.5537913156189243, "grad_norm": 0.07097320258617401, "learning_rate": 2.4789594491201227e-05, "loss": 0.2366, "step": 43868 }, { "epoch": 3.5538723266364225, "grad_norm": 0.06804100424051285, "learning_rate": 2.4785093838606598e-05, "loss": 0.2409, "step": 43869 }, { "epoch": 3.5539533376539207, "grad_norm": 0.100010946393013, "learning_rate": 2.478059318601197e-05, "loss": 0.2307, "step": 43870 }, { "epoch": 3.5540343486714194, "grad_norm": 0.06930720061063766, "learning_rate": 2.4776092533417348e-05, "loss": 0.2133, "step": 43871 }, { "epoch": 3.5541153596889177, "grad_norm": 0.0644577294588089, "learning_rate": 2.477159188082272e-05, "loss": 0.2123, "step": 43872 }, { "epoch": 3.554196370706416, "grad_norm": 0.07674825936555862, "learning_rate": 2.4767091228228095e-05, "loss": 0.2602, "step": 43873 }, { "epoch": 3.5542773817239146, "grad_norm": 0.06586476415395737, "learning_rate": 2.476259057563347e-05, "loss": 0.2086, "step": 43874 }, { "epoch": 3.554358392741413, "grad_norm": 0.07297840714454651, "learning_rate": 2.475808992303884e-05, "loss": 0.2148, "step": 43875 }, { "epoch": 3.554439403758911, "grad_norm": 0.08475550264120102, "learning_rate": 2.4753589270444216e-05, "loss": 0.2141, "step": 43876 }, { "epoch": 3.55452041477641, "grad_norm": 0.06303331255912781, "learning_rate": 2.474908861784959e-05, "loss": 0.2065, "step": 43877 }, { "epoch": 3.554601425793908, "grad_norm": 0.08519791066646576, "learning_rate": 2.4744587965254963e-05, "loss": 0.2199, "step": 43878 }, { "epoch": 3.5546824368114063, "grad_norm": 0.0769948959350586, "learning_rate": 2.4740087312660337e-05, "loss": 0.2515, "step": 43879 }, { "epoch": 3.554763447828905, "grad_norm": 0.0675671175122261, "learning_rate": 2.473558666006571e-05, "loss": 0.2318, "step": 43880 }, { "epoch": 3.554844458846403, "grad_norm": 0.09687194973230362, "learning_rate": 2.4731086007471084e-05, "loss": 0.22, "step": 43881 }, { "epoch": 3.5549254698639015, "grad_norm": 0.07729329913854599, "learning_rate": 2.4726585354876458e-05, "loss": 0.2081, "step": 43882 }, { "epoch": 3.5550064808814, "grad_norm": 0.06892815232276917, "learning_rate": 2.472208470228183e-05, "loss": 0.219, "step": 43883 }, { "epoch": 3.5550874918988984, "grad_norm": 0.08760858327150345, "learning_rate": 2.4717584049687205e-05, "loss": 0.2182, "step": 43884 }, { "epoch": 3.5551685029163966, "grad_norm": 0.08728528022766113, "learning_rate": 2.471308339709258e-05, "loss": 0.242, "step": 43885 }, { "epoch": 3.555249513933895, "grad_norm": 0.060018256306648254, "learning_rate": 2.4708582744497956e-05, "loss": 0.2011, "step": 43886 }, { "epoch": 3.555330524951393, "grad_norm": 0.06998419761657715, "learning_rate": 2.4704082091903326e-05, "loss": 0.2274, "step": 43887 }, { "epoch": 3.555411535968892, "grad_norm": 0.06843650341033936, "learning_rate": 2.46995814393087e-05, "loss": 0.2482, "step": 43888 }, { "epoch": 3.55549254698639, "grad_norm": 0.06852348148822784, "learning_rate": 2.4695080786714076e-05, "loss": 0.2321, "step": 43889 }, { "epoch": 3.5555735580038883, "grad_norm": 0.07453927397727966, "learning_rate": 2.4690580134119447e-05, "loss": 0.2313, "step": 43890 }, { "epoch": 3.555654569021387, "grad_norm": 0.05931844562292099, "learning_rate": 2.4686079481524824e-05, "loss": 0.2634, "step": 43891 }, { "epoch": 3.5557355800388852, "grad_norm": 0.08144770562648773, "learning_rate": 2.4681578828930197e-05, "loss": 0.2287, "step": 43892 }, { "epoch": 3.5558165910563835, "grad_norm": 0.07383275032043457, "learning_rate": 2.4677078176335567e-05, "loss": 0.2394, "step": 43893 }, { "epoch": 3.555897602073882, "grad_norm": 0.06248151510953903, "learning_rate": 2.4672577523740944e-05, "loss": 0.1873, "step": 43894 }, { "epoch": 3.5559786130913804, "grad_norm": 0.06529638916254044, "learning_rate": 2.4668076871146318e-05, "loss": 0.2171, "step": 43895 }, { "epoch": 3.5560596241088787, "grad_norm": 0.07343272864818573, "learning_rate": 2.466357621855169e-05, "loss": 0.2316, "step": 43896 }, { "epoch": 3.5561406351263773, "grad_norm": 0.0745483785867691, "learning_rate": 2.4659075565957065e-05, "loss": 0.2576, "step": 43897 }, { "epoch": 3.5562216461438756, "grad_norm": 0.06922461092472076, "learning_rate": 2.465457491336244e-05, "loss": 0.2411, "step": 43898 }, { "epoch": 3.556302657161374, "grad_norm": 0.07483216375112534, "learning_rate": 2.4650074260767812e-05, "loss": 0.2621, "step": 43899 }, { "epoch": 3.5563836681788725, "grad_norm": 0.06790409982204437, "learning_rate": 2.4645573608173186e-05, "loss": 0.2419, "step": 43900 }, { "epoch": 3.5564646791963708, "grad_norm": 0.0795658528804779, "learning_rate": 2.464107295557856e-05, "loss": 0.1978, "step": 43901 }, { "epoch": 3.556545690213869, "grad_norm": 0.07117760181427002, "learning_rate": 2.4636572302983933e-05, "loss": 0.2248, "step": 43902 }, { "epoch": 3.5566267012313677, "grad_norm": 0.07886912673711777, "learning_rate": 2.4632071650389307e-05, "loss": 0.2056, "step": 43903 }, { "epoch": 3.556707712248866, "grad_norm": 0.06801647692918777, "learning_rate": 2.462757099779468e-05, "loss": 0.2231, "step": 43904 }, { "epoch": 3.556788723266364, "grad_norm": 0.06421799212694168, "learning_rate": 2.4623070345200054e-05, "loss": 0.2224, "step": 43905 }, { "epoch": 3.556869734283863, "grad_norm": 0.06796405464410782, "learning_rate": 2.4618569692605428e-05, "loss": 0.1922, "step": 43906 }, { "epoch": 3.556950745301361, "grad_norm": 0.05750831589102745, "learning_rate": 2.4614069040010805e-05, "loss": 0.2175, "step": 43907 }, { "epoch": 3.5570317563188594, "grad_norm": 0.07587180286645889, "learning_rate": 2.4609568387416175e-05, "loss": 0.2163, "step": 43908 }, { "epoch": 3.5571127673363576, "grad_norm": 0.06804661452770233, "learning_rate": 2.460506773482155e-05, "loss": 0.2091, "step": 43909 }, { "epoch": 3.557193778353856, "grad_norm": 0.0663495659828186, "learning_rate": 2.4600567082226925e-05, "loss": 0.2252, "step": 43910 }, { "epoch": 3.5572747893713546, "grad_norm": 0.07366745918989182, "learning_rate": 2.45960664296323e-05, "loss": 0.2124, "step": 43911 }, { "epoch": 3.557355800388853, "grad_norm": 0.07139547169208527, "learning_rate": 2.4591565777037673e-05, "loss": 0.2407, "step": 43912 }, { "epoch": 3.557436811406351, "grad_norm": 0.06392621994018555, "learning_rate": 2.4587065124443046e-05, "loss": 0.2014, "step": 43913 }, { "epoch": 3.5575178224238497, "grad_norm": 0.08841899037361145, "learning_rate": 2.458256447184842e-05, "loss": 0.2268, "step": 43914 }, { "epoch": 3.557598833441348, "grad_norm": 0.08579627424478531, "learning_rate": 2.4578063819253793e-05, "loss": 0.2625, "step": 43915 }, { "epoch": 3.557679844458846, "grad_norm": 0.06458761543035507, "learning_rate": 2.4573563166659167e-05, "loss": 0.21, "step": 43916 }, { "epoch": 3.557760855476345, "grad_norm": 0.07201027870178223, "learning_rate": 2.456906251406454e-05, "loss": 0.2427, "step": 43917 }, { "epoch": 3.557841866493843, "grad_norm": 0.07014687359333038, "learning_rate": 2.4564561861469914e-05, "loss": 0.2211, "step": 43918 }, { "epoch": 3.5579228775113414, "grad_norm": 0.06852081418037415, "learning_rate": 2.4560061208875288e-05, "loss": 0.2001, "step": 43919 }, { "epoch": 3.55800388852884, "grad_norm": 0.07407641410827637, "learning_rate": 2.455556055628066e-05, "loss": 0.2404, "step": 43920 }, { "epoch": 3.5580848995463383, "grad_norm": 0.07178988307714462, "learning_rate": 2.4551059903686035e-05, "loss": 0.2773, "step": 43921 }, { "epoch": 3.5581659105638366, "grad_norm": 0.07537788897752762, "learning_rate": 2.454655925109141e-05, "loss": 0.2245, "step": 43922 }, { "epoch": 3.5582469215813353, "grad_norm": 0.07685130834579468, "learning_rate": 2.4542058598496782e-05, "loss": 0.2201, "step": 43923 }, { "epoch": 3.5583279325988335, "grad_norm": 0.07150780409574509, "learning_rate": 2.4537557945902156e-05, "loss": 0.2516, "step": 43924 }, { "epoch": 3.5584089436163318, "grad_norm": 0.08379261195659637, "learning_rate": 2.4533057293307533e-05, "loss": 0.2137, "step": 43925 }, { "epoch": 3.5584899546338304, "grad_norm": 0.08677427470684052, "learning_rate": 2.4528556640712903e-05, "loss": 0.2589, "step": 43926 }, { "epoch": 3.5585709656513287, "grad_norm": 0.08629120886325836, "learning_rate": 2.4524055988118277e-05, "loss": 0.2187, "step": 43927 }, { "epoch": 3.558651976668827, "grad_norm": 0.06367433816194534, "learning_rate": 2.4519555335523654e-05, "loss": 0.2112, "step": 43928 }, { "epoch": 3.558732987686325, "grad_norm": 0.08857046067714691, "learning_rate": 2.4515054682929027e-05, "loss": 0.2795, "step": 43929 }, { "epoch": 3.558813998703824, "grad_norm": 0.0720190778374672, "learning_rate": 2.4510554030334397e-05, "loss": 0.2604, "step": 43930 }, { "epoch": 3.558895009721322, "grad_norm": 0.07167576253414154, "learning_rate": 2.4506053377739774e-05, "loss": 0.189, "step": 43931 }, { "epoch": 3.5589760207388204, "grad_norm": 0.059410810470581055, "learning_rate": 2.4501552725145148e-05, "loss": 0.1943, "step": 43932 }, { "epoch": 3.5590570317563186, "grad_norm": 0.07639388740062714, "learning_rate": 2.4497052072550522e-05, "loss": 0.2547, "step": 43933 }, { "epoch": 3.5591380427738173, "grad_norm": 0.07768415659666061, "learning_rate": 2.4492551419955895e-05, "loss": 0.2425, "step": 43934 }, { "epoch": 3.5592190537913155, "grad_norm": 0.08310689777135849, "learning_rate": 2.448805076736127e-05, "loss": 0.2488, "step": 43935 }, { "epoch": 3.559300064808814, "grad_norm": 0.07589665800333023, "learning_rate": 2.4483550114766643e-05, "loss": 0.2233, "step": 43936 }, { "epoch": 3.5593810758263125, "grad_norm": 0.06544729322195053, "learning_rate": 2.4479049462172016e-05, "loss": 0.185, "step": 43937 }, { "epoch": 3.5594620868438107, "grad_norm": 0.09256394952535629, "learning_rate": 2.447454880957739e-05, "loss": 0.2256, "step": 43938 }, { "epoch": 3.559543097861309, "grad_norm": 0.07318930327892303, "learning_rate": 2.4470048156982763e-05, "loss": 0.2212, "step": 43939 }, { "epoch": 3.5596241088788076, "grad_norm": 0.05668797716498375, "learning_rate": 2.4465547504388137e-05, "loss": 0.1985, "step": 43940 }, { "epoch": 3.559705119896306, "grad_norm": 0.06777235120534897, "learning_rate": 2.446104685179351e-05, "loss": 0.2098, "step": 43941 }, { "epoch": 3.559786130913804, "grad_norm": 0.0696289986371994, "learning_rate": 2.4456546199198884e-05, "loss": 0.2132, "step": 43942 }, { "epoch": 3.559867141931303, "grad_norm": 0.0714426040649414, "learning_rate": 2.4452045546604258e-05, "loss": 0.2044, "step": 43943 }, { "epoch": 3.559948152948801, "grad_norm": 0.09928544610738754, "learning_rate": 2.444754489400963e-05, "loss": 0.2655, "step": 43944 }, { "epoch": 3.5600291639662993, "grad_norm": 0.07626913487911224, "learning_rate": 2.4443044241415005e-05, "loss": 0.1946, "step": 43945 }, { "epoch": 3.560110174983798, "grad_norm": 0.10116627812385559, "learning_rate": 2.4438543588820382e-05, "loss": 0.1954, "step": 43946 }, { "epoch": 3.5601911860012962, "grad_norm": 0.06626923382282257, "learning_rate": 2.4434042936225756e-05, "loss": 0.2106, "step": 43947 }, { "epoch": 3.5602721970187945, "grad_norm": 0.08282341063022614, "learning_rate": 2.4429542283631126e-05, "loss": 0.2175, "step": 43948 }, { "epoch": 3.560353208036293, "grad_norm": 0.0847616046667099, "learning_rate": 2.4425041631036503e-05, "loss": 0.2036, "step": 43949 }, { "epoch": 3.5604342190537914, "grad_norm": 0.07245887815952301, "learning_rate": 2.4420540978441876e-05, "loss": 0.2534, "step": 43950 }, { "epoch": 3.5605152300712897, "grad_norm": 0.0700652077794075, "learning_rate": 2.441604032584725e-05, "loss": 0.2381, "step": 43951 }, { "epoch": 3.560596241088788, "grad_norm": 0.06606948375701904, "learning_rate": 2.4411539673252624e-05, "loss": 0.2612, "step": 43952 }, { "epoch": 3.5606772521062866, "grad_norm": 0.08605430275201797, "learning_rate": 2.4407039020657997e-05, "loss": 0.2221, "step": 43953 }, { "epoch": 3.560758263123785, "grad_norm": 0.08067008852958679, "learning_rate": 2.440253836806337e-05, "loss": 0.2909, "step": 43954 }, { "epoch": 3.560839274141283, "grad_norm": 0.08161859959363937, "learning_rate": 2.4398037715468744e-05, "loss": 0.213, "step": 43955 }, { "epoch": 3.5609202851587813, "grad_norm": 0.07555124908685684, "learning_rate": 2.4393537062874118e-05, "loss": 0.1861, "step": 43956 }, { "epoch": 3.56100129617628, "grad_norm": 0.08395944535732269, "learning_rate": 2.438903641027949e-05, "loss": 0.2521, "step": 43957 }, { "epoch": 3.5610823071937783, "grad_norm": 0.08223508298397064, "learning_rate": 2.4384535757684865e-05, "loss": 0.2328, "step": 43958 }, { "epoch": 3.5611633182112765, "grad_norm": 0.06962686032056808, "learning_rate": 2.438003510509024e-05, "loss": 0.2079, "step": 43959 }, { "epoch": 3.561244329228775, "grad_norm": 0.07295332103967667, "learning_rate": 2.4375534452495612e-05, "loss": 0.2196, "step": 43960 }, { "epoch": 3.5613253402462735, "grad_norm": 0.058311235159635544, "learning_rate": 2.4371033799900986e-05, "loss": 0.2089, "step": 43961 }, { "epoch": 3.5614063512637717, "grad_norm": 0.06865894049406052, "learning_rate": 2.436653314730636e-05, "loss": 0.2078, "step": 43962 }, { "epoch": 3.5614873622812704, "grad_norm": 0.06817828863859177, "learning_rate": 2.4362032494711733e-05, "loss": 0.2547, "step": 43963 }, { "epoch": 3.5615683732987686, "grad_norm": 0.05765335261821747, "learning_rate": 2.435753184211711e-05, "loss": 0.1979, "step": 43964 }, { "epoch": 3.561649384316267, "grad_norm": 0.08066023141145706, "learning_rate": 2.4353031189522484e-05, "loss": 0.2031, "step": 43965 }, { "epoch": 3.5617303953337656, "grad_norm": 0.07500291615724564, "learning_rate": 2.4348530536927854e-05, "loss": 0.2186, "step": 43966 }, { "epoch": 3.561811406351264, "grad_norm": 0.07075216621160507, "learning_rate": 2.434402988433323e-05, "loss": 0.2086, "step": 43967 }, { "epoch": 3.561892417368762, "grad_norm": 0.06981159001588821, "learning_rate": 2.4339529231738605e-05, "loss": 0.231, "step": 43968 }, { "epoch": 3.5619734283862607, "grad_norm": 0.07031860947608948, "learning_rate": 2.4335028579143975e-05, "loss": 0.2415, "step": 43969 }, { "epoch": 3.562054439403759, "grad_norm": 0.06598281115293503, "learning_rate": 2.4330527926549352e-05, "loss": 0.2555, "step": 43970 }, { "epoch": 3.5621354504212572, "grad_norm": 0.06520348787307739, "learning_rate": 2.4326027273954725e-05, "loss": 0.2192, "step": 43971 }, { "epoch": 3.562216461438756, "grad_norm": 0.08133994787931442, "learning_rate": 2.43215266213601e-05, "loss": 0.2251, "step": 43972 }, { "epoch": 3.562297472456254, "grad_norm": 0.09172660857439041, "learning_rate": 2.4317025968765473e-05, "loss": 0.2615, "step": 43973 }, { "epoch": 3.5623784834737524, "grad_norm": 0.08263950794935226, "learning_rate": 2.4312525316170846e-05, "loss": 0.234, "step": 43974 }, { "epoch": 3.5624594944912507, "grad_norm": 0.07384882867336273, "learning_rate": 2.430802466357622e-05, "loss": 0.2309, "step": 43975 }, { "epoch": 3.5625405055087493, "grad_norm": 0.0683225616812706, "learning_rate": 2.4303524010981593e-05, "loss": 0.2085, "step": 43976 }, { "epoch": 3.5626215165262476, "grad_norm": 0.06126277148723602, "learning_rate": 2.4299023358386967e-05, "loss": 0.1956, "step": 43977 }, { "epoch": 3.562702527543746, "grad_norm": 0.07429927587509155, "learning_rate": 2.429452270579234e-05, "loss": 0.1984, "step": 43978 }, { "epoch": 3.562783538561244, "grad_norm": 0.06797944009304047, "learning_rate": 2.4290022053197714e-05, "loss": 0.2082, "step": 43979 }, { "epoch": 3.5628645495787428, "grad_norm": 0.07353696972131729, "learning_rate": 2.428552140060309e-05, "loss": 0.213, "step": 43980 }, { "epoch": 3.562945560596241, "grad_norm": 0.06893346458673477, "learning_rate": 2.428102074800846e-05, "loss": 0.1905, "step": 43981 }, { "epoch": 3.5630265716137393, "grad_norm": 0.0654299408197403, "learning_rate": 2.4276520095413835e-05, "loss": 0.2123, "step": 43982 }, { "epoch": 3.563107582631238, "grad_norm": 0.06341482698917389, "learning_rate": 2.4272019442819212e-05, "loss": 0.2231, "step": 43983 }, { "epoch": 3.563188593648736, "grad_norm": 0.06760953366756439, "learning_rate": 2.4267518790224582e-05, "loss": 0.2135, "step": 43984 }, { "epoch": 3.5632696046662344, "grad_norm": 0.0738074854016304, "learning_rate": 2.426301813762996e-05, "loss": 0.2402, "step": 43985 }, { "epoch": 3.563350615683733, "grad_norm": 0.08491325378417969, "learning_rate": 2.4258517485035333e-05, "loss": 0.2373, "step": 43986 }, { "epoch": 3.5634316267012314, "grad_norm": 0.08582518249750137, "learning_rate": 2.4254016832440703e-05, "loss": 0.1853, "step": 43987 }, { "epoch": 3.5635126377187296, "grad_norm": 0.07919377833604813, "learning_rate": 2.424951617984608e-05, "loss": 0.2097, "step": 43988 }, { "epoch": 3.5635936487362283, "grad_norm": 0.06492700427770615, "learning_rate": 2.4245015527251454e-05, "loss": 0.2123, "step": 43989 }, { "epoch": 3.5636746597537265, "grad_norm": 0.07696332037448883, "learning_rate": 2.4240514874656824e-05, "loss": 0.2316, "step": 43990 }, { "epoch": 3.563755670771225, "grad_norm": 0.06934797763824463, "learning_rate": 2.42360142220622e-05, "loss": 0.2295, "step": 43991 }, { "epoch": 3.5638366817887235, "grad_norm": 0.06783980876207352, "learning_rate": 2.4231513569467574e-05, "loss": 0.2046, "step": 43992 }, { "epoch": 3.5639176928062217, "grad_norm": 0.06434223800897598, "learning_rate": 2.4227012916872948e-05, "loss": 0.2079, "step": 43993 }, { "epoch": 3.56399870382372, "grad_norm": 0.06556785106658936, "learning_rate": 2.422251226427832e-05, "loss": 0.2694, "step": 43994 }, { "epoch": 3.5640797148412187, "grad_norm": 0.09260761737823486, "learning_rate": 2.4218011611683695e-05, "loss": 0.2398, "step": 43995 }, { "epoch": 3.564160725858717, "grad_norm": 0.09623724967241287, "learning_rate": 2.421351095908907e-05, "loss": 0.2155, "step": 43996 }, { "epoch": 3.564241736876215, "grad_norm": 0.06439271569252014, "learning_rate": 2.4209010306494442e-05, "loss": 0.2079, "step": 43997 }, { "epoch": 3.5643227478937134, "grad_norm": 0.07777006179094315, "learning_rate": 2.420450965389982e-05, "loss": 0.2026, "step": 43998 }, { "epoch": 3.564403758911212, "grad_norm": 0.06641001999378204, "learning_rate": 2.420000900130519e-05, "loss": 0.2298, "step": 43999 }, { "epoch": 3.5644847699287103, "grad_norm": 0.07535912096500397, "learning_rate": 2.4195508348710563e-05, "loss": 0.2629, "step": 44000 }, { "epoch": 3.5645657809462086, "grad_norm": 0.07521260529756546, "learning_rate": 2.419100769611594e-05, "loss": 0.1767, "step": 44001 }, { "epoch": 3.564646791963707, "grad_norm": 0.0752941370010376, "learning_rate": 2.418650704352131e-05, "loss": 0.1997, "step": 44002 }, { "epoch": 3.5647278029812055, "grad_norm": 0.0634898692369461, "learning_rate": 2.4182006390926684e-05, "loss": 0.2246, "step": 44003 }, { "epoch": 3.5648088139987038, "grad_norm": 0.08196897059679031, "learning_rate": 2.417750573833206e-05, "loss": 0.2227, "step": 44004 }, { "epoch": 3.564889825016202, "grad_norm": 0.06975170969963074, "learning_rate": 2.417300508573743e-05, "loss": 0.2097, "step": 44005 }, { "epoch": 3.5649708360337007, "grad_norm": 0.061027590185403824, "learning_rate": 2.4168504433142808e-05, "loss": 0.2033, "step": 44006 }, { "epoch": 3.565051847051199, "grad_norm": 0.06183544546365738, "learning_rate": 2.4164003780548182e-05, "loss": 0.2268, "step": 44007 }, { "epoch": 3.565132858068697, "grad_norm": 0.06074502319097519, "learning_rate": 2.4159503127953552e-05, "loss": 0.1866, "step": 44008 }, { "epoch": 3.565213869086196, "grad_norm": 0.06271813064813614, "learning_rate": 2.415500247535893e-05, "loss": 0.2076, "step": 44009 }, { "epoch": 3.565294880103694, "grad_norm": 0.08766993880271912, "learning_rate": 2.4150501822764303e-05, "loss": 0.2547, "step": 44010 }, { "epoch": 3.5653758911211924, "grad_norm": 0.06195381283760071, "learning_rate": 2.4146001170169676e-05, "loss": 0.2211, "step": 44011 }, { "epoch": 3.565456902138691, "grad_norm": 0.0778651088476181, "learning_rate": 2.414150051757505e-05, "loss": 0.2419, "step": 44012 }, { "epoch": 3.5655379131561893, "grad_norm": 0.07984745502471924, "learning_rate": 2.4136999864980424e-05, "loss": 0.2673, "step": 44013 }, { "epoch": 3.5656189241736875, "grad_norm": 0.07894036918878555, "learning_rate": 2.4132499212385797e-05, "loss": 0.2124, "step": 44014 }, { "epoch": 3.565699935191186, "grad_norm": 0.06846199929714203, "learning_rate": 2.412799855979117e-05, "loss": 0.1949, "step": 44015 }, { "epoch": 3.5657809462086845, "grad_norm": 0.07311907410621643, "learning_rate": 2.4123497907196544e-05, "loss": 0.2675, "step": 44016 }, { "epoch": 3.5658619572261827, "grad_norm": 0.09844955801963806, "learning_rate": 2.4118997254601918e-05, "loss": 0.2622, "step": 44017 }, { "epoch": 3.5659429682436814, "grad_norm": 0.07872521877288818, "learning_rate": 2.411449660200729e-05, "loss": 0.1942, "step": 44018 }, { "epoch": 3.5660239792611796, "grad_norm": 0.07679609209299088, "learning_rate": 2.410999594941267e-05, "loss": 0.2533, "step": 44019 }, { "epoch": 3.566104990278678, "grad_norm": 0.06353718787431717, "learning_rate": 2.410549529681804e-05, "loss": 0.2144, "step": 44020 }, { "epoch": 3.566186001296176, "grad_norm": 0.06551864743232727, "learning_rate": 2.4100994644223412e-05, "loss": 0.231, "step": 44021 }, { "epoch": 3.566267012313675, "grad_norm": 0.05590434372425079, "learning_rate": 2.409649399162879e-05, "loss": 0.2214, "step": 44022 }, { "epoch": 3.566348023331173, "grad_norm": 0.06585326045751572, "learning_rate": 2.409199333903416e-05, "loss": 0.2054, "step": 44023 }, { "epoch": 3.5664290343486713, "grad_norm": 0.06935842335224152, "learning_rate": 2.4087492686439537e-05, "loss": 0.1959, "step": 44024 }, { "epoch": 3.5665100453661696, "grad_norm": 0.07737302035093307, "learning_rate": 2.408299203384491e-05, "loss": 0.2492, "step": 44025 }, { "epoch": 3.5665910563836682, "grad_norm": 0.07547137141227722, "learning_rate": 2.407849138125028e-05, "loss": 0.2421, "step": 44026 }, { "epoch": 3.5666720674011665, "grad_norm": 0.06433389335870743, "learning_rate": 2.4073990728655657e-05, "loss": 0.2068, "step": 44027 }, { "epoch": 3.5667530784186647, "grad_norm": 0.08710397034883499, "learning_rate": 2.406949007606103e-05, "loss": 0.2787, "step": 44028 }, { "epoch": 3.5668340894361634, "grad_norm": 0.09563450515270233, "learning_rate": 2.40649894234664e-05, "loss": 0.2418, "step": 44029 }, { "epoch": 3.5669151004536617, "grad_norm": 0.06833392381668091, "learning_rate": 2.4060488770871778e-05, "loss": 0.2281, "step": 44030 }, { "epoch": 3.56699611147116, "grad_norm": 0.05998595058917999, "learning_rate": 2.4055988118277152e-05, "loss": 0.2187, "step": 44031 }, { "epoch": 3.5670771224886586, "grad_norm": 0.06386803090572357, "learning_rate": 2.4051487465682525e-05, "loss": 0.2263, "step": 44032 }, { "epoch": 3.567158133506157, "grad_norm": 0.07443685829639435, "learning_rate": 2.40469868130879e-05, "loss": 0.2222, "step": 44033 }, { "epoch": 3.567239144523655, "grad_norm": 0.08675825595855713, "learning_rate": 2.4042486160493273e-05, "loss": 0.2485, "step": 44034 }, { "epoch": 3.567320155541154, "grad_norm": 0.06265902519226074, "learning_rate": 2.4037985507898646e-05, "loss": 0.1981, "step": 44035 }, { "epoch": 3.567401166558652, "grad_norm": 0.08795081079006195, "learning_rate": 2.403348485530402e-05, "loss": 0.2301, "step": 44036 }, { "epoch": 3.5674821775761503, "grad_norm": 0.07681228965520859, "learning_rate": 2.4028984202709397e-05, "loss": 0.2507, "step": 44037 }, { "epoch": 3.567563188593649, "grad_norm": 0.04987798258662224, "learning_rate": 2.4024483550114767e-05, "loss": 0.1904, "step": 44038 }, { "epoch": 3.567644199611147, "grad_norm": 0.07152686268091202, "learning_rate": 2.401998289752014e-05, "loss": 0.2213, "step": 44039 }, { "epoch": 3.5677252106286454, "grad_norm": 0.09124844521284103, "learning_rate": 2.4015482244925518e-05, "loss": 0.27, "step": 44040 }, { "epoch": 3.567806221646144, "grad_norm": 0.07459650933742523, "learning_rate": 2.4010981592330888e-05, "loss": 0.2502, "step": 44041 }, { "epoch": 3.5678872326636424, "grad_norm": 0.06212278828024864, "learning_rate": 2.400648093973626e-05, "loss": 0.193, "step": 44042 }, { "epoch": 3.5679682436811406, "grad_norm": 0.061736591160297394, "learning_rate": 2.400198028714164e-05, "loss": 0.2338, "step": 44043 }, { "epoch": 3.568049254698639, "grad_norm": 0.10944293439388275, "learning_rate": 2.399747963454701e-05, "loss": 0.2443, "step": 44044 }, { "epoch": 3.568130265716137, "grad_norm": 0.07717695832252502, "learning_rate": 2.3992978981952386e-05, "loss": 0.212, "step": 44045 }, { "epoch": 3.568211276733636, "grad_norm": 0.07111458480358124, "learning_rate": 2.398847832935776e-05, "loss": 0.187, "step": 44046 }, { "epoch": 3.568292287751134, "grad_norm": 0.07641852647066116, "learning_rate": 2.398397767676313e-05, "loss": 0.2275, "step": 44047 }, { "epoch": 3.5683732987686323, "grad_norm": 0.07449782639741898, "learning_rate": 2.3979477024168506e-05, "loss": 0.2465, "step": 44048 }, { "epoch": 3.568454309786131, "grad_norm": 0.06614088267087936, "learning_rate": 2.397497637157388e-05, "loss": 0.1915, "step": 44049 }, { "epoch": 3.5685353208036292, "grad_norm": 0.0939965546131134, "learning_rate": 2.3970475718979254e-05, "loss": 0.2293, "step": 44050 }, { "epoch": 3.5686163318211275, "grad_norm": 0.08085455745458603, "learning_rate": 2.3965975066384627e-05, "loss": 0.23, "step": 44051 }, { "epoch": 3.568697342838626, "grad_norm": 0.07873645424842834, "learning_rate": 2.396147441379e-05, "loss": 0.2147, "step": 44052 }, { "epoch": 3.5687783538561244, "grad_norm": 0.0769469141960144, "learning_rate": 2.3956973761195374e-05, "loss": 0.2378, "step": 44053 }, { "epoch": 3.5688593648736227, "grad_norm": 0.0792095884680748, "learning_rate": 2.3952473108600748e-05, "loss": 0.2376, "step": 44054 }, { "epoch": 3.5689403758911213, "grad_norm": 0.06182113662362099, "learning_rate": 2.394797245600612e-05, "loss": 0.2373, "step": 44055 }, { "epoch": 3.5690213869086196, "grad_norm": 0.06288308650255203, "learning_rate": 2.3943471803411495e-05, "loss": 0.2071, "step": 44056 }, { "epoch": 3.569102397926118, "grad_norm": 0.06363116204738617, "learning_rate": 2.393897115081687e-05, "loss": 0.2536, "step": 44057 }, { "epoch": 3.5691834089436165, "grad_norm": 0.07311567664146423, "learning_rate": 2.3934470498222246e-05, "loss": 0.229, "step": 44058 }, { "epoch": 3.5692644199611148, "grad_norm": 0.0979669988155365, "learning_rate": 2.3929969845627616e-05, "loss": 0.2419, "step": 44059 }, { "epoch": 3.569345430978613, "grad_norm": 0.0624614879488945, "learning_rate": 2.392546919303299e-05, "loss": 0.227, "step": 44060 }, { "epoch": 3.5694264419961117, "grad_norm": 0.06651511788368225, "learning_rate": 2.3920968540438367e-05, "loss": 0.2525, "step": 44061 }, { "epoch": 3.56950745301361, "grad_norm": 0.06575529277324677, "learning_rate": 2.3916467887843737e-05, "loss": 0.2151, "step": 44062 }, { "epoch": 3.569588464031108, "grad_norm": 0.062389180064201355, "learning_rate": 2.3911967235249114e-05, "loss": 0.1888, "step": 44063 }, { "epoch": 3.569669475048607, "grad_norm": 0.06860721856355667, "learning_rate": 2.3907466582654487e-05, "loss": 0.2299, "step": 44064 }, { "epoch": 3.569750486066105, "grad_norm": 0.07936166226863861, "learning_rate": 2.3902965930059858e-05, "loss": 0.2001, "step": 44065 }, { "epoch": 3.5698314970836034, "grad_norm": 0.06096033751964569, "learning_rate": 2.3898465277465235e-05, "loss": 0.1796, "step": 44066 }, { "epoch": 3.5699125081011016, "grad_norm": 0.07629892230033875, "learning_rate": 2.3893964624870608e-05, "loss": 0.2292, "step": 44067 }, { "epoch": 3.5699935191186, "grad_norm": 0.08087896555662155, "learning_rate": 2.388946397227598e-05, "loss": 0.2092, "step": 44068 }, { "epoch": 3.5700745301360985, "grad_norm": 0.08344534784555435, "learning_rate": 2.3884963319681355e-05, "loss": 0.2359, "step": 44069 }, { "epoch": 3.570155541153597, "grad_norm": 0.07347863167524338, "learning_rate": 2.388046266708673e-05, "loss": 0.2435, "step": 44070 }, { "epoch": 3.570236552171095, "grad_norm": 0.08139392733573914, "learning_rate": 2.3875962014492103e-05, "loss": 0.2332, "step": 44071 }, { "epoch": 3.5703175631885937, "grad_norm": 0.06455414742231369, "learning_rate": 2.3871461361897476e-05, "loss": 0.2216, "step": 44072 }, { "epoch": 3.570398574206092, "grad_norm": 0.07281234860420227, "learning_rate": 2.386696070930285e-05, "loss": 0.2162, "step": 44073 }, { "epoch": 3.57047958522359, "grad_norm": 0.06973182410001755, "learning_rate": 2.3862460056708223e-05, "loss": 0.2096, "step": 44074 }, { "epoch": 3.570560596241089, "grad_norm": 0.06923564523458481, "learning_rate": 2.3857959404113597e-05, "loss": 0.1975, "step": 44075 }, { "epoch": 3.570641607258587, "grad_norm": 0.0641985535621643, "learning_rate": 2.385345875151897e-05, "loss": 0.1835, "step": 44076 }, { "epoch": 3.5707226182760854, "grad_norm": 0.06189914047718048, "learning_rate": 2.3848958098924344e-05, "loss": 0.2339, "step": 44077 }, { "epoch": 3.570803629293584, "grad_norm": 0.06003765016794205, "learning_rate": 2.3844457446329718e-05, "loss": 0.2617, "step": 44078 }, { "epoch": 3.5708846403110823, "grad_norm": 0.07658020406961441, "learning_rate": 2.3839956793735095e-05, "loss": 0.2086, "step": 44079 }, { "epoch": 3.5709656513285806, "grad_norm": 0.07172147929668427, "learning_rate": 2.3835456141140465e-05, "loss": 0.2026, "step": 44080 }, { "epoch": 3.5710466623460793, "grad_norm": 0.06534678488969803, "learning_rate": 2.383095548854584e-05, "loss": 0.206, "step": 44081 }, { "epoch": 3.5711276733635775, "grad_norm": 0.0649232566356659, "learning_rate": 2.3826454835951216e-05, "loss": 0.2246, "step": 44082 }, { "epoch": 3.5712086843810757, "grad_norm": 0.07970141619443893, "learning_rate": 2.3821954183356586e-05, "loss": 0.216, "step": 44083 }, { "epoch": 3.5712896953985744, "grad_norm": 0.06200230494141579, "learning_rate": 2.3817453530761963e-05, "loss": 0.199, "step": 44084 }, { "epoch": 3.5713707064160727, "grad_norm": 0.0675581619143486, "learning_rate": 2.3812952878167337e-05, "loss": 0.2419, "step": 44085 }, { "epoch": 3.571451717433571, "grad_norm": 0.09428895264863968, "learning_rate": 2.3808452225572707e-05, "loss": 0.2572, "step": 44086 }, { "epoch": 3.5715327284510696, "grad_norm": 0.060645028948783875, "learning_rate": 2.3803951572978084e-05, "loss": 0.2153, "step": 44087 }, { "epoch": 3.571613739468568, "grad_norm": 0.07589592039585114, "learning_rate": 2.3799450920383457e-05, "loss": 0.2024, "step": 44088 }, { "epoch": 3.571694750486066, "grad_norm": 0.07095162570476532, "learning_rate": 2.3794950267788828e-05, "loss": 0.2182, "step": 44089 }, { "epoch": 3.5717757615035644, "grad_norm": 0.07006850838661194, "learning_rate": 2.3790449615194205e-05, "loss": 0.2062, "step": 44090 }, { "epoch": 3.5718567725210626, "grad_norm": 0.0720515325665474, "learning_rate": 2.3785948962599578e-05, "loss": 0.2267, "step": 44091 }, { "epoch": 3.5719377835385613, "grad_norm": 0.06832864880561829, "learning_rate": 2.3781448310004952e-05, "loss": 0.2431, "step": 44092 }, { "epoch": 3.5720187945560595, "grad_norm": 0.0733826756477356, "learning_rate": 2.3776947657410325e-05, "loss": 0.2541, "step": 44093 }, { "epoch": 3.5720998055735578, "grad_norm": 0.0713753029704094, "learning_rate": 2.37724470048157e-05, "loss": 0.23, "step": 44094 }, { "epoch": 3.5721808165910565, "grad_norm": 0.06897611171007156, "learning_rate": 2.3767946352221073e-05, "loss": 0.2151, "step": 44095 }, { "epoch": 3.5722618276085547, "grad_norm": 0.06955330073833466, "learning_rate": 2.3763445699626446e-05, "loss": 0.2112, "step": 44096 }, { "epoch": 3.572342838626053, "grad_norm": 0.07778653502464294, "learning_rate": 2.3758945047031823e-05, "loss": 0.2102, "step": 44097 }, { "epoch": 3.5724238496435516, "grad_norm": 0.07717503607273102, "learning_rate": 2.3754444394437193e-05, "loss": 0.2321, "step": 44098 }, { "epoch": 3.57250486066105, "grad_norm": 0.06059728562831879, "learning_rate": 2.3749943741842567e-05, "loss": 0.2139, "step": 44099 }, { "epoch": 3.572585871678548, "grad_norm": 0.0687459334731102, "learning_rate": 2.3745443089247944e-05, "loss": 0.22, "step": 44100 }, { "epoch": 3.572666882696047, "grad_norm": 0.07666652649641037, "learning_rate": 2.3740942436653314e-05, "loss": 0.2173, "step": 44101 }, { "epoch": 3.572747893713545, "grad_norm": 0.07414747774600983, "learning_rate": 2.3736441784058688e-05, "loss": 0.2318, "step": 44102 }, { "epoch": 3.5728289047310433, "grad_norm": 0.0756225511431694, "learning_rate": 2.3731941131464065e-05, "loss": 0.2279, "step": 44103 }, { "epoch": 3.572909915748542, "grad_norm": 0.07715298235416412, "learning_rate": 2.3727440478869435e-05, "loss": 0.2352, "step": 44104 }, { "epoch": 3.5729909267660402, "grad_norm": 0.0866784080862999, "learning_rate": 2.3722939826274812e-05, "loss": 0.1829, "step": 44105 }, { "epoch": 3.5730719377835385, "grad_norm": 0.0714133158326149, "learning_rate": 2.3718439173680186e-05, "loss": 0.2257, "step": 44106 }, { "epoch": 3.573152948801037, "grad_norm": 0.07470285892486572, "learning_rate": 2.3713938521085556e-05, "loss": 0.1881, "step": 44107 }, { "epoch": 3.5732339598185354, "grad_norm": 0.07752804458141327, "learning_rate": 2.3709437868490933e-05, "loss": 0.2229, "step": 44108 }, { "epoch": 3.5733149708360337, "grad_norm": 0.06709716469049454, "learning_rate": 2.3704937215896306e-05, "loss": 0.2067, "step": 44109 }, { "epoch": 3.5733959818535324, "grad_norm": 0.079058438539505, "learning_rate": 2.370043656330168e-05, "loss": 0.2218, "step": 44110 }, { "epoch": 3.5734769928710306, "grad_norm": 0.07981273531913757, "learning_rate": 2.3695935910707054e-05, "loss": 0.2185, "step": 44111 }, { "epoch": 3.573558003888529, "grad_norm": 0.06988833099603653, "learning_rate": 2.3691435258112427e-05, "loss": 0.2125, "step": 44112 }, { "epoch": 3.573639014906027, "grad_norm": 0.07461047917604446, "learning_rate": 2.36869346055178e-05, "loss": 0.2626, "step": 44113 }, { "epoch": 3.5737200259235253, "grad_norm": 0.07636402547359467, "learning_rate": 2.3682433952923174e-05, "loss": 0.2106, "step": 44114 }, { "epoch": 3.573801036941024, "grad_norm": 0.06755087524652481, "learning_rate": 2.3677933300328548e-05, "loss": 0.2217, "step": 44115 }, { "epoch": 3.5738820479585223, "grad_norm": 0.07610853016376495, "learning_rate": 2.367343264773392e-05, "loss": 0.2001, "step": 44116 }, { "epoch": 3.5739630589760205, "grad_norm": 0.07375774532556534, "learning_rate": 2.3668931995139295e-05, "loss": 0.2158, "step": 44117 }, { "epoch": 3.574044069993519, "grad_norm": 0.07939080893993378, "learning_rate": 2.3664431342544672e-05, "loss": 0.235, "step": 44118 }, { "epoch": 3.5741250810110174, "grad_norm": 0.07518904656171799, "learning_rate": 2.3659930689950042e-05, "loss": 0.2475, "step": 44119 }, { "epoch": 3.5742060920285157, "grad_norm": 0.06927220523357391, "learning_rate": 2.3655430037355416e-05, "loss": 0.2243, "step": 44120 }, { "epoch": 3.5742871030460144, "grad_norm": 0.0808010846376419, "learning_rate": 2.3650929384760793e-05, "loss": 0.2474, "step": 44121 }, { "epoch": 3.5743681140635126, "grad_norm": 0.06906265020370483, "learning_rate": 2.3646428732166163e-05, "loss": 0.2232, "step": 44122 }, { "epoch": 3.574449125081011, "grad_norm": 0.06592061370611191, "learning_rate": 2.364192807957154e-05, "loss": 0.2287, "step": 44123 }, { "epoch": 3.5745301360985096, "grad_norm": 0.07803937792778015, "learning_rate": 2.3637427426976914e-05, "loss": 0.209, "step": 44124 }, { "epoch": 3.574611147116008, "grad_norm": 0.09058477729558945, "learning_rate": 2.3632926774382287e-05, "loss": 0.2473, "step": 44125 }, { "epoch": 3.574692158133506, "grad_norm": 0.06751037389039993, "learning_rate": 2.362842612178766e-05, "loss": 0.2327, "step": 44126 }, { "epoch": 3.5747731691510047, "grad_norm": 0.07758664339780807, "learning_rate": 2.3623925469193035e-05, "loss": 0.2296, "step": 44127 }, { "epoch": 3.574854180168503, "grad_norm": 0.057286325842142105, "learning_rate": 2.3619424816598408e-05, "loss": 0.2316, "step": 44128 }, { "epoch": 3.5749351911860012, "grad_norm": 0.08673517405986786, "learning_rate": 2.3614924164003782e-05, "loss": 0.2512, "step": 44129 }, { "epoch": 3.5750162022035, "grad_norm": 0.07720606029033661, "learning_rate": 2.3610423511409155e-05, "loss": 0.2192, "step": 44130 }, { "epoch": 3.575097213220998, "grad_norm": 0.07449165731668472, "learning_rate": 2.360592285881453e-05, "loss": 0.2266, "step": 44131 }, { "epoch": 3.5751782242384964, "grad_norm": 0.06918004900217056, "learning_rate": 2.3601422206219903e-05, "loss": 0.2318, "step": 44132 }, { "epoch": 3.5752592352559946, "grad_norm": 0.10162968188524246, "learning_rate": 2.3596921553625276e-05, "loss": 0.2452, "step": 44133 }, { "epoch": 3.5753402462734933, "grad_norm": 0.05789915472269058, "learning_rate": 2.359242090103065e-05, "loss": 0.2582, "step": 44134 }, { "epoch": 3.5754212572909916, "grad_norm": 0.08747924119234085, "learning_rate": 2.3587920248436023e-05, "loss": 0.221, "step": 44135 }, { "epoch": 3.57550226830849, "grad_norm": 0.06346782296895981, "learning_rate": 2.35834195958414e-05, "loss": 0.1718, "step": 44136 }, { "epoch": 3.575583279325988, "grad_norm": 0.06199439615011215, "learning_rate": 2.357891894324677e-05, "loss": 0.2305, "step": 44137 }, { "epoch": 3.5756642903434868, "grad_norm": 0.07988022267818451, "learning_rate": 2.3574418290652144e-05, "loss": 0.2175, "step": 44138 }, { "epoch": 3.575745301360985, "grad_norm": 0.06792055815458298, "learning_rate": 2.356991763805752e-05, "loss": 0.2659, "step": 44139 }, { "epoch": 3.5758263123784833, "grad_norm": 0.06658469885587692, "learning_rate": 2.356541698546289e-05, "loss": 0.2286, "step": 44140 }, { "epoch": 3.575907323395982, "grad_norm": 0.08243236690759659, "learning_rate": 2.3560916332868265e-05, "loss": 0.2586, "step": 44141 }, { "epoch": 3.57598833441348, "grad_norm": 0.07459936290979385, "learning_rate": 2.3556415680273642e-05, "loss": 0.2582, "step": 44142 }, { "epoch": 3.5760693454309784, "grad_norm": 0.07289506494998932, "learning_rate": 2.3551915027679016e-05, "loss": 0.2031, "step": 44143 }, { "epoch": 3.576150356448477, "grad_norm": 0.07211292535066605, "learning_rate": 2.354741437508439e-05, "loss": 0.2232, "step": 44144 }, { "epoch": 3.5762313674659754, "grad_norm": 0.06191423907876015, "learning_rate": 2.3542913722489763e-05, "loss": 0.2123, "step": 44145 }, { "epoch": 3.5763123784834736, "grad_norm": 0.07505661994218826, "learning_rate": 2.3538413069895136e-05, "loss": 0.211, "step": 44146 }, { "epoch": 3.5763933895009723, "grad_norm": 0.07626402378082275, "learning_rate": 2.353391241730051e-05, "loss": 0.2534, "step": 44147 }, { "epoch": 3.5764744005184705, "grad_norm": 0.07256495207548141, "learning_rate": 2.3529411764705884e-05, "loss": 0.2379, "step": 44148 }, { "epoch": 3.576555411535969, "grad_norm": 0.07416626811027527, "learning_rate": 2.3524911112111257e-05, "loss": 0.2224, "step": 44149 }, { "epoch": 3.5766364225534675, "grad_norm": 0.07687810063362122, "learning_rate": 2.352041045951663e-05, "loss": 0.2388, "step": 44150 }, { "epoch": 3.5767174335709657, "grad_norm": 0.06605061888694763, "learning_rate": 2.3515909806922004e-05, "loss": 0.225, "step": 44151 }, { "epoch": 3.576798444588464, "grad_norm": 0.07485217601060867, "learning_rate": 2.3511409154327378e-05, "loss": 0.2517, "step": 44152 }, { "epoch": 3.5768794556059627, "grad_norm": 0.05736643448472023, "learning_rate": 2.3506908501732752e-05, "loss": 0.2449, "step": 44153 }, { "epoch": 3.576960466623461, "grad_norm": 0.06579489260911942, "learning_rate": 2.3502407849138125e-05, "loss": 0.2649, "step": 44154 }, { "epoch": 3.577041477640959, "grad_norm": 0.06708400696516037, "learning_rate": 2.34979071965435e-05, "loss": 0.249, "step": 44155 }, { "epoch": 3.5771224886584574, "grad_norm": 0.05705248564481735, "learning_rate": 2.3493406543948873e-05, "loss": 0.2476, "step": 44156 }, { "epoch": 3.577203499675956, "grad_norm": 0.05666022002696991, "learning_rate": 2.348890589135425e-05, "loss": 0.195, "step": 44157 }, { "epoch": 3.5772845106934543, "grad_norm": 0.06512381881475449, "learning_rate": 2.348440523875962e-05, "loss": 0.235, "step": 44158 }, { "epoch": 3.5773655217109526, "grad_norm": 0.07135576754808426, "learning_rate": 2.3479904586164993e-05, "loss": 0.1818, "step": 44159 }, { "epoch": 3.577446532728451, "grad_norm": 0.05715243145823479, "learning_rate": 2.347540393357037e-05, "loss": 0.2448, "step": 44160 }, { "epoch": 3.5775275437459495, "grad_norm": 0.07935239374637604, "learning_rate": 2.3470903280975744e-05, "loss": 0.2379, "step": 44161 }, { "epoch": 3.5776085547634477, "grad_norm": 0.05796222388744354, "learning_rate": 2.3466402628381114e-05, "loss": 0.199, "step": 44162 }, { "epoch": 3.577689565780946, "grad_norm": 0.09846629947423935, "learning_rate": 2.346190197578649e-05, "loss": 0.2093, "step": 44163 }, { "epoch": 3.5777705767984447, "grad_norm": 0.07785390317440033, "learning_rate": 2.3457401323191865e-05, "loss": 0.2151, "step": 44164 }, { "epoch": 3.577851587815943, "grad_norm": 0.06434359401464462, "learning_rate": 2.345290067059724e-05, "loss": 0.2295, "step": 44165 }, { "epoch": 3.577932598833441, "grad_norm": 0.06758967787027359, "learning_rate": 2.3448400018002612e-05, "loss": 0.2413, "step": 44166 }, { "epoch": 3.57801360985094, "grad_norm": 0.08527732640504837, "learning_rate": 2.3443899365407986e-05, "loss": 0.2458, "step": 44167 }, { "epoch": 3.578094620868438, "grad_norm": 0.07337276637554169, "learning_rate": 2.343939871281336e-05, "loss": 0.2312, "step": 44168 }, { "epoch": 3.5781756318859363, "grad_norm": 0.07388912886381149, "learning_rate": 2.3434898060218733e-05, "loss": 0.259, "step": 44169 }, { "epoch": 3.578256642903435, "grad_norm": 0.06941523402929306, "learning_rate": 2.3430397407624106e-05, "loss": 0.2328, "step": 44170 }, { "epoch": 3.5783376539209333, "grad_norm": 0.07117350399494171, "learning_rate": 2.342589675502948e-05, "loss": 0.218, "step": 44171 }, { "epoch": 3.5784186649384315, "grad_norm": 0.0704994648694992, "learning_rate": 2.3421396102434854e-05, "loss": 0.2159, "step": 44172 }, { "epoch": 3.57849967595593, "grad_norm": 0.0686672106385231, "learning_rate": 2.3416895449840227e-05, "loss": 0.2176, "step": 44173 }, { "epoch": 3.5785806869734285, "grad_norm": 0.07033158093690872, "learning_rate": 2.34123947972456e-05, "loss": 0.2051, "step": 44174 }, { "epoch": 3.5786616979909267, "grad_norm": 0.05757991597056389, "learning_rate": 2.3407894144650974e-05, "loss": 0.1934, "step": 44175 }, { "epoch": 3.5787427090084254, "grad_norm": 0.07229195535182953, "learning_rate": 2.340339349205635e-05, "loss": 0.2095, "step": 44176 }, { "epoch": 3.5788237200259236, "grad_norm": 0.07896924018859863, "learning_rate": 2.339889283946172e-05, "loss": 0.1986, "step": 44177 }, { "epoch": 3.578904731043422, "grad_norm": 0.08921925723552704, "learning_rate": 2.33943921868671e-05, "loss": 0.2219, "step": 44178 }, { "epoch": 3.57898574206092, "grad_norm": 0.06069227308034897, "learning_rate": 2.3389891534272472e-05, "loss": 0.1983, "step": 44179 }, { "epoch": 3.579066753078419, "grad_norm": 0.06449750065803528, "learning_rate": 2.3385390881677842e-05, "loss": 0.1994, "step": 44180 }, { "epoch": 3.579147764095917, "grad_norm": 0.06284713745117188, "learning_rate": 2.338089022908322e-05, "loss": 0.2365, "step": 44181 }, { "epoch": 3.5792287751134153, "grad_norm": 0.08436598628759384, "learning_rate": 2.3376389576488593e-05, "loss": 0.2234, "step": 44182 }, { "epoch": 3.5793097861309136, "grad_norm": 0.0867995098233223, "learning_rate": 2.3371888923893967e-05, "loss": 0.2313, "step": 44183 }, { "epoch": 3.5793907971484122, "grad_norm": 0.07555483281612396, "learning_rate": 2.336738827129934e-05, "loss": 0.2104, "step": 44184 }, { "epoch": 3.5794718081659105, "grad_norm": 0.0693432092666626, "learning_rate": 2.3362887618704714e-05, "loss": 0.172, "step": 44185 }, { "epoch": 3.5795528191834087, "grad_norm": 0.07421540468931198, "learning_rate": 2.3358386966110087e-05, "loss": 0.2119, "step": 44186 }, { "epoch": 3.5796338302009074, "grad_norm": 0.07303255051374435, "learning_rate": 2.335388631351546e-05, "loss": 0.234, "step": 44187 }, { "epoch": 3.5797148412184057, "grad_norm": 0.06863290816545486, "learning_rate": 2.3349385660920835e-05, "loss": 0.2207, "step": 44188 }, { "epoch": 3.579795852235904, "grad_norm": 0.06763945519924164, "learning_rate": 2.3344885008326208e-05, "loss": 0.2082, "step": 44189 }, { "epoch": 3.5798768632534026, "grad_norm": 0.0775524452328682, "learning_rate": 2.3340384355731582e-05, "loss": 0.2449, "step": 44190 }, { "epoch": 3.579957874270901, "grad_norm": 0.08569817990064621, "learning_rate": 2.3335883703136955e-05, "loss": 0.2156, "step": 44191 }, { "epoch": 3.580038885288399, "grad_norm": 0.09265130013227463, "learning_rate": 2.333138305054233e-05, "loss": 0.2372, "step": 44192 }, { "epoch": 3.5801198963058978, "grad_norm": 0.06884811818599701, "learning_rate": 2.3326882397947703e-05, "loss": 0.257, "step": 44193 }, { "epoch": 3.580200907323396, "grad_norm": 0.08053863048553467, "learning_rate": 2.332238174535308e-05, "loss": 0.2232, "step": 44194 }, { "epoch": 3.5802819183408943, "grad_norm": 0.08468924462795258, "learning_rate": 2.331788109275845e-05, "loss": 0.2075, "step": 44195 }, { "epoch": 3.580362929358393, "grad_norm": 0.06398703157901764, "learning_rate": 2.3313380440163827e-05, "loss": 0.1933, "step": 44196 }, { "epoch": 3.580443940375891, "grad_norm": 0.06383037567138672, "learning_rate": 2.33088797875692e-05, "loss": 0.2233, "step": 44197 }, { "epoch": 3.5805249513933894, "grad_norm": 0.06736244261264801, "learning_rate": 2.330437913497457e-05, "loss": 0.2269, "step": 44198 }, { "epoch": 3.580605962410888, "grad_norm": 0.09929540008306503, "learning_rate": 2.3299878482379948e-05, "loss": 0.2354, "step": 44199 }, { "epoch": 3.5806869734283864, "grad_norm": 0.08501783013343811, "learning_rate": 2.329537782978532e-05, "loss": 0.2693, "step": 44200 }, { "epoch": 3.5807679844458846, "grad_norm": 0.0647391527891159, "learning_rate": 2.329087717719069e-05, "loss": 0.2211, "step": 44201 }, { "epoch": 3.580848995463383, "grad_norm": 0.07122602313756943, "learning_rate": 2.328637652459607e-05, "loss": 0.2434, "step": 44202 }, { "epoch": 3.5809300064808816, "grad_norm": 0.07307540625333786, "learning_rate": 2.3281875872001442e-05, "loss": 0.2233, "step": 44203 }, { "epoch": 3.58101101749838, "grad_norm": 0.06686873733997345, "learning_rate": 2.3277375219406816e-05, "loss": 0.1938, "step": 44204 }, { "epoch": 3.581092028515878, "grad_norm": 0.08013508468866348, "learning_rate": 2.327287456681219e-05, "loss": 0.2141, "step": 44205 }, { "epoch": 3.5811730395333763, "grad_norm": 0.07765527069568634, "learning_rate": 2.3268373914217563e-05, "loss": 0.2094, "step": 44206 }, { "epoch": 3.581254050550875, "grad_norm": 0.06879838556051254, "learning_rate": 2.3263873261622936e-05, "loss": 0.2456, "step": 44207 }, { "epoch": 3.5813350615683732, "grad_norm": 0.07718323916196823, "learning_rate": 2.325937260902831e-05, "loss": 0.2246, "step": 44208 }, { "epoch": 3.5814160725858715, "grad_norm": 0.07552476227283478, "learning_rate": 2.3254871956433684e-05, "loss": 0.2333, "step": 44209 }, { "epoch": 3.58149708360337, "grad_norm": 0.06853868067264557, "learning_rate": 2.3250371303839057e-05, "loss": 0.2295, "step": 44210 }, { "epoch": 3.5815780946208684, "grad_norm": 0.05678297579288483, "learning_rate": 2.324587065124443e-05, "loss": 0.2204, "step": 44211 }, { "epoch": 3.5816591056383666, "grad_norm": 0.0712512880563736, "learning_rate": 2.3241369998649808e-05, "loss": 0.2131, "step": 44212 }, { "epoch": 3.5817401166558653, "grad_norm": 0.0786905288696289, "learning_rate": 2.3236869346055178e-05, "loss": 0.2262, "step": 44213 }, { "epoch": 3.5818211276733636, "grad_norm": 0.05377919599413872, "learning_rate": 2.323236869346055e-05, "loss": 0.1908, "step": 44214 }, { "epoch": 3.581902138690862, "grad_norm": 0.08514394611120224, "learning_rate": 2.322786804086593e-05, "loss": 0.2215, "step": 44215 }, { "epoch": 3.5819831497083605, "grad_norm": 0.07618945837020874, "learning_rate": 2.32233673882713e-05, "loss": 0.2145, "step": 44216 }, { "epoch": 3.5820641607258588, "grad_norm": 0.07072417438030243, "learning_rate": 2.3218866735676676e-05, "loss": 0.2386, "step": 44217 }, { "epoch": 3.582145171743357, "grad_norm": 0.07065349072217941, "learning_rate": 2.321436608308205e-05, "loss": 0.2201, "step": 44218 }, { "epoch": 3.5822261827608557, "grad_norm": 0.06873950362205505, "learning_rate": 2.320986543048742e-05, "loss": 0.1726, "step": 44219 }, { "epoch": 3.582307193778354, "grad_norm": 0.07127075642347336, "learning_rate": 2.3205364777892797e-05, "loss": 0.2155, "step": 44220 }, { "epoch": 3.582388204795852, "grad_norm": 0.08185587078332901, "learning_rate": 2.320086412529817e-05, "loss": 0.1968, "step": 44221 }, { "epoch": 3.582469215813351, "grad_norm": 0.07481518387794495, "learning_rate": 2.3196363472703544e-05, "loss": 0.2179, "step": 44222 }, { "epoch": 3.582550226830849, "grad_norm": 0.0727316364645958, "learning_rate": 2.3191862820108917e-05, "loss": 0.2648, "step": 44223 }, { "epoch": 3.5826312378483474, "grad_norm": 0.06189148873090744, "learning_rate": 2.318736216751429e-05, "loss": 0.2095, "step": 44224 }, { "epoch": 3.5827122488658456, "grad_norm": 0.0742558166384697, "learning_rate": 2.3182861514919665e-05, "loss": 0.2582, "step": 44225 }, { "epoch": 3.5827932598833443, "grad_norm": 0.07165959477424622, "learning_rate": 2.3178360862325038e-05, "loss": 0.2193, "step": 44226 }, { "epoch": 3.5828742709008425, "grad_norm": 0.069723941385746, "learning_rate": 2.3173860209730412e-05, "loss": 0.2064, "step": 44227 }, { "epoch": 3.582955281918341, "grad_norm": 0.09108491241931915, "learning_rate": 2.3169359557135785e-05, "loss": 0.2345, "step": 44228 }, { "epoch": 3.583036292935839, "grad_norm": 0.07429561018943787, "learning_rate": 2.316485890454116e-05, "loss": 0.1973, "step": 44229 }, { "epoch": 3.5831173039533377, "grad_norm": 0.08311036229133606, "learning_rate": 2.3160358251946536e-05, "loss": 0.2343, "step": 44230 }, { "epoch": 3.583198314970836, "grad_norm": 0.06876590847969055, "learning_rate": 2.3155857599351906e-05, "loss": 0.2563, "step": 44231 }, { "epoch": 3.583279325988334, "grad_norm": 0.06580781936645508, "learning_rate": 2.315135694675728e-05, "loss": 0.21, "step": 44232 }, { "epoch": 3.583360337005833, "grad_norm": 0.07575564086437225, "learning_rate": 2.3146856294162657e-05, "loss": 0.1992, "step": 44233 }, { "epoch": 3.583441348023331, "grad_norm": 0.06198256090283394, "learning_rate": 2.3142355641568027e-05, "loss": 0.2285, "step": 44234 }, { "epoch": 3.5835223590408294, "grad_norm": 0.06165856122970581, "learning_rate": 2.31378549889734e-05, "loss": 0.2144, "step": 44235 }, { "epoch": 3.583603370058328, "grad_norm": 0.06248553469777107, "learning_rate": 2.3133354336378778e-05, "loss": 0.1968, "step": 44236 }, { "epoch": 3.5836843810758263, "grad_norm": 0.06722322851419449, "learning_rate": 2.3128853683784148e-05, "loss": 0.2275, "step": 44237 }, { "epoch": 3.5837653920933246, "grad_norm": 0.07760576903820038, "learning_rate": 2.3124353031189525e-05, "loss": 0.2138, "step": 44238 }, { "epoch": 3.5838464031108233, "grad_norm": 0.08306940644979477, "learning_rate": 2.31198523785949e-05, "loss": 0.2209, "step": 44239 }, { "epoch": 3.5839274141283215, "grad_norm": 0.09010027348995209, "learning_rate": 2.311535172600027e-05, "loss": 0.2129, "step": 44240 }, { "epoch": 3.5840084251458197, "grad_norm": 0.07222206145524979, "learning_rate": 2.3110851073405646e-05, "loss": 0.2182, "step": 44241 }, { "epoch": 3.5840894361633184, "grad_norm": 0.08256030827760696, "learning_rate": 2.310635042081102e-05, "loss": 0.2499, "step": 44242 }, { "epoch": 3.5841704471808167, "grad_norm": 0.08997172117233276, "learning_rate": 2.3101849768216393e-05, "loss": 0.2351, "step": 44243 }, { "epoch": 3.584251458198315, "grad_norm": 0.07810519635677338, "learning_rate": 2.3097349115621767e-05, "loss": 0.2291, "step": 44244 }, { "epoch": 3.5843324692158136, "grad_norm": 0.08510446548461914, "learning_rate": 2.309284846302714e-05, "loss": 0.2035, "step": 44245 }, { "epoch": 3.584413480233312, "grad_norm": 0.07003391534090042, "learning_rate": 2.3088347810432514e-05, "loss": 0.2134, "step": 44246 }, { "epoch": 3.58449449125081, "grad_norm": 0.059189002960920334, "learning_rate": 2.3083847157837887e-05, "loss": 0.2268, "step": 44247 }, { "epoch": 3.5845755022683083, "grad_norm": 0.07633404433727264, "learning_rate": 2.307934650524326e-05, "loss": 0.2364, "step": 44248 }, { "epoch": 3.584656513285807, "grad_norm": 0.0753362625837326, "learning_rate": 2.3074845852648635e-05, "loss": 0.2082, "step": 44249 }, { "epoch": 3.5847375243033053, "grad_norm": 0.07483591139316559, "learning_rate": 2.3070345200054008e-05, "loss": 0.2038, "step": 44250 }, { "epoch": 3.5848185353208035, "grad_norm": 0.07827362418174744, "learning_rate": 2.3065844547459385e-05, "loss": 0.2267, "step": 44251 }, { "epoch": 3.5848995463383018, "grad_norm": 0.08125095814466476, "learning_rate": 2.3061343894864755e-05, "loss": 0.2367, "step": 44252 }, { "epoch": 3.5849805573558005, "grad_norm": 0.08842773735523224, "learning_rate": 2.305684324227013e-05, "loss": 0.2295, "step": 44253 }, { "epoch": 3.5850615683732987, "grad_norm": 0.08604532480239868, "learning_rate": 2.3052342589675506e-05, "loss": 0.2305, "step": 44254 }, { "epoch": 3.585142579390797, "grad_norm": 0.07639449834823608, "learning_rate": 2.3047841937080876e-05, "loss": 0.2107, "step": 44255 }, { "epoch": 3.5852235904082956, "grad_norm": 0.07172799855470657, "learning_rate": 2.3043341284486253e-05, "loss": 0.2284, "step": 44256 }, { "epoch": 3.585304601425794, "grad_norm": 0.08766242116689682, "learning_rate": 2.3038840631891627e-05, "loss": 0.2502, "step": 44257 }, { "epoch": 3.585385612443292, "grad_norm": 0.05565030872821808, "learning_rate": 2.3034339979296997e-05, "loss": 0.2103, "step": 44258 }, { "epoch": 3.585466623460791, "grad_norm": 0.08138461410999298, "learning_rate": 2.3029839326702374e-05, "loss": 0.2241, "step": 44259 }, { "epoch": 3.585547634478289, "grad_norm": 0.08481591194868088, "learning_rate": 2.3025338674107748e-05, "loss": 0.236, "step": 44260 }, { "epoch": 3.5856286454957873, "grad_norm": 0.0756368413567543, "learning_rate": 2.3020838021513118e-05, "loss": 0.2314, "step": 44261 }, { "epoch": 3.585709656513286, "grad_norm": 0.06131047382950783, "learning_rate": 2.3016337368918495e-05, "loss": 0.2096, "step": 44262 }, { "epoch": 3.5857906675307842, "grad_norm": 0.06640103459358215, "learning_rate": 2.301183671632387e-05, "loss": 0.2001, "step": 44263 }, { "epoch": 3.5858716785482825, "grad_norm": 0.06853748112916946, "learning_rate": 2.3007336063729242e-05, "loss": 0.2255, "step": 44264 }, { "epoch": 3.585952689565781, "grad_norm": 0.07723915576934814, "learning_rate": 2.3002835411134616e-05, "loss": 0.2287, "step": 44265 }, { "epoch": 3.5860337005832794, "grad_norm": 0.06693840026855469, "learning_rate": 2.299833475853999e-05, "loss": 0.2201, "step": 44266 }, { "epoch": 3.5861147116007777, "grad_norm": 0.08216645568609238, "learning_rate": 2.2993834105945363e-05, "loss": 0.2183, "step": 44267 }, { "epoch": 3.5861957226182763, "grad_norm": 0.07527400553226471, "learning_rate": 2.2989333453350736e-05, "loss": 0.2227, "step": 44268 }, { "epoch": 3.5862767336357746, "grad_norm": 0.06903623789548874, "learning_rate": 2.2984832800756113e-05, "loss": 0.2249, "step": 44269 }, { "epoch": 3.586357744653273, "grad_norm": 0.08289389312267303, "learning_rate": 2.2980332148161484e-05, "loss": 0.2302, "step": 44270 }, { "epoch": 3.586438755670771, "grad_norm": 0.07931365072727203, "learning_rate": 2.2975831495566857e-05, "loss": 0.2304, "step": 44271 }, { "epoch": 3.5865197666882693, "grad_norm": 0.08668045699596405, "learning_rate": 2.2971330842972234e-05, "loss": 0.2336, "step": 44272 }, { "epoch": 3.586600777705768, "grad_norm": 0.07321757823228836, "learning_rate": 2.2966830190377604e-05, "loss": 0.2303, "step": 44273 }, { "epoch": 3.5866817887232663, "grad_norm": 0.0703120231628418, "learning_rate": 2.2962329537782978e-05, "loss": 0.2143, "step": 44274 }, { "epoch": 3.5867627997407645, "grad_norm": 0.06930385529994965, "learning_rate": 2.2957828885188355e-05, "loss": 0.2813, "step": 44275 }, { "epoch": 3.586843810758263, "grad_norm": 0.058735087513923645, "learning_rate": 2.2953328232593725e-05, "loss": 0.2308, "step": 44276 }, { "epoch": 3.5869248217757614, "grad_norm": 0.06936381757259369, "learning_rate": 2.2948827579999102e-05, "loss": 0.2089, "step": 44277 }, { "epoch": 3.5870058327932597, "grad_norm": 0.059996139258146286, "learning_rate": 2.2944326927404476e-05, "loss": 0.2222, "step": 44278 }, { "epoch": 3.5870868438107584, "grad_norm": 0.058839645236730576, "learning_rate": 2.2939826274809846e-05, "loss": 0.2038, "step": 44279 }, { "epoch": 3.5871678548282566, "grad_norm": 0.06004242226481438, "learning_rate": 2.2935325622215223e-05, "loss": 0.226, "step": 44280 }, { "epoch": 3.587248865845755, "grad_norm": 0.06038869544863701, "learning_rate": 2.2930824969620597e-05, "loss": 0.1797, "step": 44281 }, { "epoch": 3.5873298768632536, "grad_norm": 0.07474492490291595, "learning_rate": 2.292632431702597e-05, "loss": 0.2361, "step": 44282 }, { "epoch": 3.587410887880752, "grad_norm": 0.08312978595495224, "learning_rate": 2.2921823664431344e-05, "loss": 0.2651, "step": 44283 }, { "epoch": 3.58749189889825, "grad_norm": 0.06367320567369461, "learning_rate": 2.2917323011836717e-05, "loss": 0.1975, "step": 44284 }, { "epoch": 3.5875729099157487, "grad_norm": 0.06153202801942825, "learning_rate": 2.291282235924209e-05, "loss": 0.2128, "step": 44285 }, { "epoch": 3.587653920933247, "grad_norm": 0.06177050247788429, "learning_rate": 2.2908321706647465e-05, "loss": 0.21, "step": 44286 }, { "epoch": 3.587734931950745, "grad_norm": 0.07861583679914474, "learning_rate": 2.2903821054052838e-05, "loss": 0.3053, "step": 44287 }, { "epoch": 3.587815942968244, "grad_norm": 0.07128918915987015, "learning_rate": 2.2899320401458212e-05, "loss": 0.228, "step": 44288 }, { "epoch": 3.587896953985742, "grad_norm": 0.0694175511598587, "learning_rate": 2.2894819748863585e-05, "loss": 0.2329, "step": 44289 }, { "epoch": 3.5879779650032404, "grad_norm": 0.07123053073883057, "learning_rate": 2.2890319096268962e-05, "loss": 0.2514, "step": 44290 }, { "epoch": 3.588058976020739, "grad_norm": 0.07652550935745239, "learning_rate": 2.2885818443674333e-05, "loss": 0.2417, "step": 44291 }, { "epoch": 3.5881399870382373, "grad_norm": 0.07941815257072449, "learning_rate": 2.2881317791079706e-05, "loss": 0.231, "step": 44292 }, { "epoch": 3.5882209980557356, "grad_norm": 0.08026675879955292, "learning_rate": 2.2876817138485083e-05, "loss": 0.2154, "step": 44293 }, { "epoch": 3.588302009073234, "grad_norm": 0.07086445391178131, "learning_rate": 2.2872316485890453e-05, "loss": 0.1999, "step": 44294 }, { "epoch": 3.588383020090732, "grad_norm": 0.07486844062805176, "learning_rate": 2.286781583329583e-05, "loss": 0.2385, "step": 44295 }, { "epoch": 3.5884640311082308, "grad_norm": 0.08761772513389587, "learning_rate": 2.2863315180701204e-05, "loss": 0.2555, "step": 44296 }, { "epoch": 3.588545042125729, "grad_norm": 0.08862952888011932, "learning_rate": 2.2858814528106574e-05, "loss": 0.2583, "step": 44297 }, { "epoch": 3.5886260531432272, "grad_norm": 0.07825177162885666, "learning_rate": 2.285431387551195e-05, "loss": 0.2298, "step": 44298 }, { "epoch": 3.588707064160726, "grad_norm": 0.06612584739923477, "learning_rate": 2.2849813222917325e-05, "loss": 0.197, "step": 44299 }, { "epoch": 3.588788075178224, "grad_norm": 0.08789026737213135, "learning_rate": 2.2845312570322695e-05, "loss": 0.2412, "step": 44300 }, { "epoch": 3.5888690861957224, "grad_norm": 0.06065966933965683, "learning_rate": 2.2840811917728072e-05, "loss": 0.227, "step": 44301 }, { "epoch": 3.588950097213221, "grad_norm": 0.07560954242944717, "learning_rate": 2.2836311265133446e-05, "loss": 0.2034, "step": 44302 }, { "epoch": 3.5890311082307194, "grad_norm": 0.0544184185564518, "learning_rate": 2.283181061253882e-05, "loss": 0.2168, "step": 44303 }, { "epoch": 3.5891121192482176, "grad_norm": 0.06866674870252609, "learning_rate": 2.2827309959944193e-05, "loss": 0.2074, "step": 44304 }, { "epoch": 3.5891931302657163, "grad_norm": 0.06643018871545792, "learning_rate": 2.2822809307349567e-05, "loss": 0.2423, "step": 44305 }, { "epoch": 3.5892741412832145, "grad_norm": 0.055913008749485016, "learning_rate": 2.281830865475494e-05, "loss": 0.2113, "step": 44306 }, { "epoch": 3.589355152300713, "grad_norm": 0.05831152945756912, "learning_rate": 2.2813808002160314e-05, "loss": 0.1939, "step": 44307 }, { "epoch": 3.5894361633182115, "grad_norm": 0.06937964260578156, "learning_rate": 2.280930734956569e-05, "loss": 0.2448, "step": 44308 }, { "epoch": 3.5895171743357097, "grad_norm": 0.08315081894397736, "learning_rate": 2.280480669697106e-05, "loss": 0.2412, "step": 44309 }, { "epoch": 3.589598185353208, "grad_norm": 0.0808417946100235, "learning_rate": 2.2800306044376435e-05, "loss": 0.2078, "step": 44310 }, { "epoch": 3.5896791963707066, "grad_norm": 0.055978160351514816, "learning_rate": 2.279580539178181e-05, "loss": 0.185, "step": 44311 }, { "epoch": 3.589760207388205, "grad_norm": 0.06848358362913132, "learning_rate": 2.2791304739187182e-05, "loss": 0.1972, "step": 44312 }, { "epoch": 3.589841218405703, "grad_norm": 0.07640522718429565, "learning_rate": 2.2786804086592555e-05, "loss": 0.2048, "step": 44313 }, { "epoch": 3.589922229423202, "grad_norm": 0.06737855076789856, "learning_rate": 2.2782303433997932e-05, "loss": 0.2057, "step": 44314 }, { "epoch": 3.5900032404407, "grad_norm": 0.08041815459728241, "learning_rate": 2.2777802781403303e-05, "loss": 0.2278, "step": 44315 }, { "epoch": 3.5900842514581983, "grad_norm": 0.06357849389314651, "learning_rate": 2.277330212880868e-05, "loss": 0.2005, "step": 44316 }, { "epoch": 3.5901652624756966, "grad_norm": 0.05959125980734825, "learning_rate": 2.2768801476214053e-05, "loss": 0.1978, "step": 44317 }, { "epoch": 3.590246273493195, "grad_norm": 0.06926389038562775, "learning_rate": 2.2764300823619423e-05, "loss": 0.211, "step": 44318 }, { "epoch": 3.5903272845106935, "grad_norm": 0.07221662253141403, "learning_rate": 2.27598001710248e-05, "loss": 0.2416, "step": 44319 }, { "epoch": 3.5904082955281917, "grad_norm": 0.08942881226539612, "learning_rate": 2.2755299518430174e-05, "loss": 0.2856, "step": 44320 }, { "epoch": 3.59048930654569, "grad_norm": 0.0759754478931427, "learning_rate": 2.2750798865835548e-05, "loss": 0.2244, "step": 44321 }, { "epoch": 3.5905703175631887, "grad_norm": 0.07223279029130936, "learning_rate": 2.274629821324092e-05, "loss": 0.2351, "step": 44322 }, { "epoch": 3.590651328580687, "grad_norm": 0.08292268961668015, "learning_rate": 2.2741797560646295e-05, "loss": 0.1953, "step": 44323 }, { "epoch": 3.590732339598185, "grad_norm": 0.06477510929107666, "learning_rate": 2.273729690805167e-05, "loss": 0.1859, "step": 44324 }, { "epoch": 3.590813350615684, "grad_norm": 0.08031325787305832, "learning_rate": 2.2732796255457042e-05, "loss": 0.2224, "step": 44325 }, { "epoch": 3.590894361633182, "grad_norm": 0.07873866707086563, "learning_rate": 2.2728295602862416e-05, "loss": 0.2035, "step": 44326 }, { "epoch": 3.5909753726506803, "grad_norm": 0.05760648846626282, "learning_rate": 2.272379495026779e-05, "loss": 0.1932, "step": 44327 }, { "epoch": 3.591056383668179, "grad_norm": 0.06344673037528992, "learning_rate": 2.2719294297673163e-05, "loss": 0.1984, "step": 44328 }, { "epoch": 3.5911373946856773, "grad_norm": 0.050335634499788284, "learning_rate": 2.271479364507854e-05, "loss": 0.2229, "step": 44329 }, { "epoch": 3.5912184057031755, "grad_norm": 0.06632192432880402, "learning_rate": 2.271029299248391e-05, "loss": 0.2532, "step": 44330 }, { "epoch": 3.591299416720674, "grad_norm": 0.05906035751104355, "learning_rate": 2.2705792339889284e-05, "loss": 0.1707, "step": 44331 }, { "epoch": 3.5913804277381725, "grad_norm": 0.059910062700510025, "learning_rate": 2.270129168729466e-05, "loss": 0.2044, "step": 44332 }, { "epoch": 3.5914614387556707, "grad_norm": 0.08762989193201065, "learning_rate": 2.269679103470003e-05, "loss": 0.2484, "step": 44333 }, { "epoch": 3.5915424497731694, "grad_norm": 0.07183953374624252, "learning_rate": 2.2692290382105404e-05, "loss": 0.2337, "step": 44334 }, { "epoch": 3.5916234607906676, "grad_norm": 0.05912299081683159, "learning_rate": 2.268778972951078e-05, "loss": 0.2325, "step": 44335 }, { "epoch": 3.591704471808166, "grad_norm": 0.09255866706371307, "learning_rate": 2.268328907691615e-05, "loss": 0.2686, "step": 44336 }, { "epoch": 3.5917854828256646, "grad_norm": 0.08099143952131271, "learning_rate": 2.267878842432153e-05, "loss": 0.2208, "step": 44337 }, { "epoch": 3.591866493843163, "grad_norm": 0.08440107107162476, "learning_rate": 2.2674287771726902e-05, "loss": 0.2394, "step": 44338 }, { "epoch": 3.591947504860661, "grad_norm": 0.07279936969280243, "learning_rate": 2.2669787119132276e-05, "loss": 0.2285, "step": 44339 }, { "epoch": 3.5920285158781593, "grad_norm": 0.07071520388126373, "learning_rate": 2.266528646653765e-05, "loss": 0.2205, "step": 44340 }, { "epoch": 3.5921095268956575, "grad_norm": 0.09466453641653061, "learning_rate": 2.2660785813943023e-05, "loss": 0.2376, "step": 44341 }, { "epoch": 3.5921905379131562, "grad_norm": 0.06887936592102051, "learning_rate": 2.2656285161348397e-05, "loss": 0.1952, "step": 44342 }, { "epoch": 3.5922715489306545, "grad_norm": 0.0704905167222023, "learning_rate": 2.265178450875377e-05, "loss": 0.2008, "step": 44343 }, { "epoch": 3.5923525599481527, "grad_norm": 0.07464319467544556, "learning_rate": 2.2647283856159144e-05, "loss": 0.2208, "step": 44344 }, { "epoch": 3.5924335709656514, "grad_norm": 0.058817021548748016, "learning_rate": 2.2642783203564517e-05, "loss": 0.2325, "step": 44345 }, { "epoch": 3.5925145819831497, "grad_norm": 0.08308090269565582, "learning_rate": 2.263828255096989e-05, "loss": 0.1987, "step": 44346 }, { "epoch": 3.592595593000648, "grad_norm": 0.08090384304523468, "learning_rate": 2.2633781898375265e-05, "loss": 0.2153, "step": 44347 }, { "epoch": 3.5926766040181466, "grad_norm": 0.06967276334762573, "learning_rate": 2.2629281245780638e-05, "loss": 0.2359, "step": 44348 }, { "epoch": 3.592757615035645, "grad_norm": 0.06994897127151489, "learning_rate": 2.2624780593186012e-05, "loss": 0.2142, "step": 44349 }, { "epoch": 3.592838626053143, "grad_norm": 0.0665275827050209, "learning_rate": 2.262027994059139e-05, "loss": 0.21, "step": 44350 }, { "epoch": 3.5929196370706418, "grad_norm": 0.0679289698600769, "learning_rate": 2.261577928799676e-05, "loss": 0.2478, "step": 44351 }, { "epoch": 3.59300064808814, "grad_norm": 0.06700301915407181, "learning_rate": 2.2611278635402133e-05, "loss": 0.2499, "step": 44352 }, { "epoch": 3.5930816591056383, "grad_norm": 0.06900755316019058, "learning_rate": 2.260677798280751e-05, "loss": 0.2314, "step": 44353 }, { "epoch": 3.593162670123137, "grad_norm": 0.07342938333749771, "learning_rate": 2.260227733021288e-05, "loss": 0.1744, "step": 44354 }, { "epoch": 3.593243681140635, "grad_norm": 0.08509702235460281, "learning_rate": 2.2597776677618257e-05, "loss": 0.2156, "step": 44355 }, { "epoch": 3.5933246921581334, "grad_norm": 0.06477095931768417, "learning_rate": 2.259327602502363e-05, "loss": 0.2191, "step": 44356 }, { "epoch": 3.593405703175632, "grad_norm": 0.0756278708577156, "learning_rate": 2.2588775372429004e-05, "loss": 0.2166, "step": 44357 }, { "epoch": 3.5934867141931304, "grad_norm": 0.07322674989700317, "learning_rate": 2.2584274719834378e-05, "loss": 0.2112, "step": 44358 }, { "epoch": 3.5935677252106286, "grad_norm": 0.07110437750816345, "learning_rate": 2.257977406723975e-05, "loss": 0.2165, "step": 44359 }, { "epoch": 3.593648736228127, "grad_norm": 0.07313544303178787, "learning_rate": 2.2575273414645125e-05, "loss": 0.2229, "step": 44360 }, { "epoch": 3.5937297472456255, "grad_norm": 0.06659787148237228, "learning_rate": 2.25707727620505e-05, "loss": 0.2132, "step": 44361 }, { "epoch": 3.593810758263124, "grad_norm": 0.06736768782138824, "learning_rate": 2.2566272109455872e-05, "loss": 0.2004, "step": 44362 }, { "epoch": 3.593891769280622, "grad_norm": 0.0768749788403511, "learning_rate": 2.2561771456861246e-05, "loss": 0.2116, "step": 44363 }, { "epoch": 3.5939727802981203, "grad_norm": 0.06405490636825562, "learning_rate": 2.255727080426662e-05, "loss": 0.2244, "step": 44364 }, { "epoch": 3.594053791315619, "grad_norm": 0.07622356712818146, "learning_rate": 2.2552770151671993e-05, "loss": 0.1906, "step": 44365 }, { "epoch": 3.594134802333117, "grad_norm": 0.08193568140268326, "learning_rate": 2.2548269499077366e-05, "loss": 0.2702, "step": 44366 }, { "epoch": 3.5942158133506155, "grad_norm": 0.06567830592393875, "learning_rate": 2.254376884648274e-05, "loss": 0.2097, "step": 44367 }, { "epoch": 3.594296824368114, "grad_norm": 0.06329605728387833, "learning_rate": 2.2539268193888117e-05, "loss": 0.249, "step": 44368 }, { "epoch": 3.5943778353856124, "grad_norm": 0.09080575406551361, "learning_rate": 2.2534767541293487e-05, "loss": 0.252, "step": 44369 }, { "epoch": 3.5944588464031106, "grad_norm": 0.09907004982233047, "learning_rate": 2.253026688869886e-05, "loss": 0.2272, "step": 44370 }, { "epoch": 3.5945398574206093, "grad_norm": 0.05679634213447571, "learning_rate": 2.2525766236104238e-05, "loss": 0.2007, "step": 44371 }, { "epoch": 3.5946208684381076, "grad_norm": 0.07971812784671783, "learning_rate": 2.252126558350961e-05, "loss": 0.2106, "step": 44372 }, { "epoch": 3.594701879455606, "grad_norm": 0.07787297666072845, "learning_rate": 2.2516764930914982e-05, "loss": 0.2243, "step": 44373 }, { "epoch": 3.5947828904731045, "grad_norm": 0.0740995705127716, "learning_rate": 2.251226427832036e-05, "loss": 0.226, "step": 44374 }, { "epoch": 3.5948639014906028, "grad_norm": 0.08992590010166168, "learning_rate": 2.2507763625725732e-05, "loss": 0.2217, "step": 44375 }, { "epoch": 3.594944912508101, "grad_norm": 0.05741897225379944, "learning_rate": 2.2503262973131106e-05, "loss": 0.241, "step": 44376 }, { "epoch": 3.5950259235255997, "grad_norm": 0.0739036351442337, "learning_rate": 2.249876232053648e-05, "loss": 0.2389, "step": 44377 }, { "epoch": 3.595106934543098, "grad_norm": 0.06869626045227051, "learning_rate": 2.2494261667941853e-05, "loss": 0.2281, "step": 44378 }, { "epoch": 3.595187945560596, "grad_norm": 0.07012733072042465, "learning_rate": 2.2489761015347227e-05, "loss": 0.2321, "step": 44379 }, { "epoch": 3.595268956578095, "grad_norm": 0.07144074887037277, "learning_rate": 2.24852603627526e-05, "loss": 0.2223, "step": 44380 }, { "epoch": 3.595349967595593, "grad_norm": 0.06714019179344177, "learning_rate": 2.2480759710157974e-05, "loss": 0.2119, "step": 44381 }, { "epoch": 3.5954309786130914, "grad_norm": 0.068583644926548, "learning_rate": 2.2476259057563348e-05, "loss": 0.189, "step": 44382 }, { "epoch": 3.5955119896305896, "grad_norm": 0.07480515539646149, "learning_rate": 2.247175840496872e-05, "loss": 0.2178, "step": 44383 }, { "epoch": 3.5955930006480883, "grad_norm": 0.06940307468175888, "learning_rate": 2.2467257752374095e-05, "loss": 0.2033, "step": 44384 }, { "epoch": 3.5956740116655865, "grad_norm": 0.058958880603313446, "learning_rate": 2.246275709977947e-05, "loss": 0.2293, "step": 44385 }, { "epoch": 3.595755022683085, "grad_norm": 0.09506436437368393, "learning_rate": 2.2458256447184842e-05, "loss": 0.2159, "step": 44386 }, { "epoch": 3.595836033700583, "grad_norm": 0.06890156120061874, "learning_rate": 2.2453755794590216e-05, "loss": 0.2072, "step": 44387 }, { "epoch": 3.5959170447180817, "grad_norm": 0.07868379354476929, "learning_rate": 2.244925514199559e-05, "loss": 0.2522, "step": 44388 }, { "epoch": 3.59599805573558, "grad_norm": 0.08256050199270248, "learning_rate": 2.2444754489400966e-05, "loss": 0.2084, "step": 44389 }, { "epoch": 3.596079066753078, "grad_norm": 0.07400922477245331, "learning_rate": 2.244025383680634e-05, "loss": 0.2198, "step": 44390 }, { "epoch": 3.596160077770577, "grad_norm": 0.0915006697177887, "learning_rate": 2.243575318421171e-05, "loss": 0.248, "step": 44391 }, { "epoch": 3.596241088788075, "grad_norm": 0.09188992530107498, "learning_rate": 2.2431252531617087e-05, "loss": 0.2574, "step": 44392 }, { "epoch": 3.5963220998055734, "grad_norm": 0.07915124297142029, "learning_rate": 2.242675187902246e-05, "loss": 0.2752, "step": 44393 }, { "epoch": 3.596403110823072, "grad_norm": 0.062227100133895874, "learning_rate": 2.2422251226427834e-05, "loss": 0.2161, "step": 44394 }, { "epoch": 3.5964841218405703, "grad_norm": 0.06798578798770905, "learning_rate": 2.2417750573833208e-05, "loss": 0.2092, "step": 44395 }, { "epoch": 3.5965651328580686, "grad_norm": 0.06951133906841278, "learning_rate": 2.241324992123858e-05, "loss": 0.19, "step": 44396 }, { "epoch": 3.5966461438755672, "grad_norm": 0.06292098015546799, "learning_rate": 2.2408749268643955e-05, "loss": 0.2326, "step": 44397 }, { "epoch": 3.5967271548930655, "grad_norm": 0.0799744501709938, "learning_rate": 2.240424861604933e-05, "loss": 0.2192, "step": 44398 }, { "epoch": 3.5968081659105637, "grad_norm": 0.0672338604927063, "learning_rate": 2.2399747963454702e-05, "loss": 0.2101, "step": 44399 }, { "epoch": 3.5968891769280624, "grad_norm": 0.08367151021957397, "learning_rate": 2.2395247310860076e-05, "loss": 0.2245, "step": 44400 }, { "epoch": 3.5969701879455607, "grad_norm": 0.07287932187318802, "learning_rate": 2.239074665826545e-05, "loss": 0.2278, "step": 44401 }, { "epoch": 3.597051198963059, "grad_norm": 0.06872007995843887, "learning_rate": 2.2386246005670823e-05, "loss": 0.2436, "step": 44402 }, { "epoch": 3.5971322099805576, "grad_norm": 0.08028792589902878, "learning_rate": 2.2381745353076197e-05, "loss": 0.2507, "step": 44403 }, { "epoch": 3.597213220998056, "grad_norm": 0.08446816354990005, "learning_rate": 2.237724470048157e-05, "loss": 0.2359, "step": 44404 }, { "epoch": 3.597294232015554, "grad_norm": 0.06247890740633011, "learning_rate": 2.2372744047886944e-05, "loss": 0.1942, "step": 44405 }, { "epoch": 3.5973752430330523, "grad_norm": 0.07593336701393127, "learning_rate": 2.2368243395292317e-05, "loss": 0.2303, "step": 44406 }, { "epoch": 3.597456254050551, "grad_norm": 0.06649833917617798, "learning_rate": 2.236374274269769e-05, "loss": 0.2258, "step": 44407 }, { "epoch": 3.5975372650680493, "grad_norm": 0.060069162398576736, "learning_rate": 2.2359242090103068e-05, "loss": 0.241, "step": 44408 }, { "epoch": 3.5976182760855475, "grad_norm": 0.0727672204375267, "learning_rate": 2.2354741437508438e-05, "loss": 0.2355, "step": 44409 }, { "epoch": 3.5976992871030458, "grad_norm": 0.06563632190227509, "learning_rate": 2.2350240784913815e-05, "loss": 0.2434, "step": 44410 }, { "epoch": 3.5977802981205445, "grad_norm": 0.06771665811538696, "learning_rate": 2.234574013231919e-05, "loss": 0.2109, "step": 44411 }, { "epoch": 3.5978613091380427, "grad_norm": 0.07404633611440659, "learning_rate": 2.234123947972456e-05, "loss": 0.2942, "step": 44412 }, { "epoch": 3.597942320155541, "grad_norm": 0.0690755620598793, "learning_rate": 2.2336738827129936e-05, "loss": 0.237, "step": 44413 }, { "epoch": 3.5980233311730396, "grad_norm": 0.06853065639734268, "learning_rate": 2.233223817453531e-05, "loss": 0.1816, "step": 44414 }, { "epoch": 3.598104342190538, "grad_norm": 0.07963988929986954, "learning_rate": 2.2327737521940683e-05, "loss": 0.2502, "step": 44415 }, { "epoch": 3.598185353208036, "grad_norm": 0.07584356516599655, "learning_rate": 2.2323236869346057e-05, "loss": 0.2542, "step": 44416 }, { "epoch": 3.598266364225535, "grad_norm": 0.07289121299982071, "learning_rate": 2.231873621675143e-05, "loss": 0.2325, "step": 44417 }, { "epoch": 3.598347375243033, "grad_norm": 0.0739700198173523, "learning_rate": 2.2314235564156804e-05, "loss": 0.2507, "step": 44418 }, { "epoch": 3.5984283862605313, "grad_norm": 0.07991982251405716, "learning_rate": 2.2309734911562178e-05, "loss": 0.2284, "step": 44419 }, { "epoch": 3.59850939727803, "grad_norm": 0.08472296595573425, "learning_rate": 2.230523425896755e-05, "loss": 0.2609, "step": 44420 }, { "epoch": 3.5985904082955282, "grad_norm": 0.06942620128393173, "learning_rate": 2.2300733606372925e-05, "loss": 0.2495, "step": 44421 }, { "epoch": 3.5986714193130265, "grad_norm": 0.05914752930402756, "learning_rate": 2.22962329537783e-05, "loss": 0.2258, "step": 44422 }, { "epoch": 3.598752430330525, "grad_norm": 0.06569509953260422, "learning_rate": 2.2291732301183675e-05, "loss": 0.2095, "step": 44423 }, { "epoch": 3.5988334413480234, "grad_norm": 0.06737083941698074, "learning_rate": 2.2287231648589046e-05, "loss": 0.1989, "step": 44424 }, { "epoch": 3.5989144523655217, "grad_norm": 0.07976718991994858, "learning_rate": 2.228273099599442e-05, "loss": 0.2179, "step": 44425 }, { "epoch": 3.5989954633830203, "grad_norm": 0.06348604708909988, "learning_rate": 2.2278230343399796e-05, "loss": 0.2015, "step": 44426 }, { "epoch": 3.5990764744005186, "grad_norm": 0.08161592483520508, "learning_rate": 2.2273729690805166e-05, "loss": 0.1966, "step": 44427 }, { "epoch": 3.599157485418017, "grad_norm": 0.0814145877957344, "learning_rate": 2.2269229038210543e-05, "loss": 0.236, "step": 44428 }, { "epoch": 3.599238496435515, "grad_norm": 0.07275941967964172, "learning_rate": 2.2264728385615917e-05, "loss": 0.2234, "step": 44429 }, { "epoch": 3.5993195074530138, "grad_norm": 0.07201056182384491, "learning_rate": 2.2260227733021287e-05, "loss": 0.2436, "step": 44430 }, { "epoch": 3.599400518470512, "grad_norm": 0.06446519494056702, "learning_rate": 2.2255727080426664e-05, "loss": 0.1978, "step": 44431 }, { "epoch": 3.5994815294880103, "grad_norm": 0.0709887146949768, "learning_rate": 2.2251226427832038e-05, "loss": 0.2143, "step": 44432 }, { "epoch": 3.5995625405055085, "grad_norm": 0.06211159750819206, "learning_rate": 2.2246725775237408e-05, "loss": 0.226, "step": 44433 }, { "epoch": 3.599643551523007, "grad_norm": 0.0790424570441246, "learning_rate": 2.2242225122642785e-05, "loss": 0.2229, "step": 44434 }, { "epoch": 3.5997245625405054, "grad_norm": 0.06854367256164551, "learning_rate": 2.223772447004816e-05, "loss": 0.2276, "step": 44435 }, { "epoch": 3.5998055735580037, "grad_norm": 0.08491768687963486, "learning_rate": 2.2233223817453532e-05, "loss": 0.2221, "step": 44436 }, { "epoch": 3.5998865845755024, "grad_norm": 0.08224175870418549, "learning_rate": 2.2228723164858906e-05, "loss": 0.2581, "step": 44437 }, { "epoch": 3.5999675955930006, "grad_norm": 0.07533169537782669, "learning_rate": 2.222422251226428e-05, "loss": 0.2383, "step": 44438 }, { "epoch": 3.600048606610499, "grad_norm": 0.06689838320016861, "learning_rate": 2.2219721859669653e-05, "loss": 0.225, "step": 44439 }, { "epoch": 3.6001296176279975, "grad_norm": 0.07553958892822266, "learning_rate": 2.2215221207075027e-05, "loss": 0.2053, "step": 44440 }, { "epoch": 3.600210628645496, "grad_norm": 0.05749347805976868, "learning_rate": 2.2210720554480404e-05, "loss": 0.2049, "step": 44441 }, { "epoch": 3.600291639662994, "grad_norm": 0.0675349161028862, "learning_rate": 2.2206219901885774e-05, "loss": 0.2247, "step": 44442 }, { "epoch": 3.6003726506804927, "grad_norm": 0.06793300807476044, "learning_rate": 2.2201719249291147e-05, "loss": 0.1925, "step": 44443 }, { "epoch": 3.600453661697991, "grad_norm": 0.07396746426820755, "learning_rate": 2.2197218596696524e-05, "loss": 0.2137, "step": 44444 }, { "epoch": 3.600534672715489, "grad_norm": 0.07258230447769165, "learning_rate": 2.2192717944101895e-05, "loss": 0.2222, "step": 44445 }, { "epoch": 3.600615683732988, "grad_norm": 0.06613799929618835, "learning_rate": 2.2188217291507268e-05, "loss": 0.2336, "step": 44446 }, { "epoch": 3.600696694750486, "grad_norm": 0.095277801156044, "learning_rate": 2.2183716638912645e-05, "loss": 0.2383, "step": 44447 }, { "epoch": 3.6007777057679844, "grad_norm": 0.08406014740467072, "learning_rate": 2.2179215986318015e-05, "loss": 0.2457, "step": 44448 }, { "epoch": 3.600858716785483, "grad_norm": 0.062288954854011536, "learning_rate": 2.2174715333723392e-05, "loss": 0.2496, "step": 44449 }, { "epoch": 3.6009397278029813, "grad_norm": 0.07066863775253296, "learning_rate": 2.2170214681128766e-05, "loss": 0.2206, "step": 44450 }, { "epoch": 3.6010207388204796, "grad_norm": 0.08926728367805481, "learning_rate": 2.2165714028534136e-05, "loss": 0.2167, "step": 44451 }, { "epoch": 3.601101749837978, "grad_norm": 0.06768003851175308, "learning_rate": 2.2161213375939513e-05, "loss": 0.2062, "step": 44452 }, { "epoch": 3.6011827608554765, "grad_norm": 0.07586654275655746, "learning_rate": 2.2156712723344887e-05, "loss": 0.2718, "step": 44453 }, { "epoch": 3.6012637718729748, "grad_norm": 0.07940798252820969, "learning_rate": 2.215221207075026e-05, "loss": 0.2498, "step": 44454 }, { "epoch": 3.601344782890473, "grad_norm": 0.07156102359294891, "learning_rate": 2.2147711418155634e-05, "loss": 0.2055, "step": 44455 }, { "epoch": 3.6014257939079712, "grad_norm": 0.06990113854408264, "learning_rate": 2.2143210765561008e-05, "loss": 0.2067, "step": 44456 }, { "epoch": 3.60150680492547, "grad_norm": 0.06797709316015244, "learning_rate": 2.213871011296638e-05, "loss": 0.2087, "step": 44457 }, { "epoch": 3.601587815942968, "grad_norm": 0.06196364760398865, "learning_rate": 2.2134209460371755e-05, "loss": 0.2331, "step": 44458 }, { "epoch": 3.6016688269604664, "grad_norm": 0.07308321446180344, "learning_rate": 2.212970880777713e-05, "loss": 0.2462, "step": 44459 }, { "epoch": 3.601749837977965, "grad_norm": 0.08339798450469971, "learning_rate": 2.2125208155182502e-05, "loss": 0.1993, "step": 44460 }, { "epoch": 3.6018308489954634, "grad_norm": 0.07711708545684814, "learning_rate": 2.2120707502587876e-05, "loss": 0.181, "step": 44461 }, { "epoch": 3.6019118600129616, "grad_norm": 0.06854071468114853, "learning_rate": 2.2116206849993253e-05, "loss": 0.1748, "step": 44462 }, { "epoch": 3.6019928710304603, "grad_norm": 0.07287586480379105, "learning_rate": 2.2111706197398623e-05, "loss": 0.2096, "step": 44463 }, { "epoch": 3.6020738820479585, "grad_norm": 0.07109333574771881, "learning_rate": 2.2107205544803997e-05, "loss": 0.2754, "step": 44464 }, { "epoch": 3.6021548930654568, "grad_norm": 0.06433144956827164, "learning_rate": 2.2102704892209374e-05, "loss": 0.2237, "step": 44465 }, { "epoch": 3.6022359040829555, "grad_norm": 0.07228867709636688, "learning_rate": 2.2098204239614744e-05, "loss": 0.2134, "step": 44466 }, { "epoch": 3.6023169151004537, "grad_norm": 0.09589815884828568, "learning_rate": 2.209370358702012e-05, "loss": 0.209, "step": 44467 }, { "epoch": 3.602397926117952, "grad_norm": 0.08793746680021286, "learning_rate": 2.2089202934425494e-05, "loss": 0.2464, "step": 44468 }, { "epoch": 3.6024789371354506, "grad_norm": 0.07410049438476562, "learning_rate": 2.2084702281830865e-05, "loss": 0.2563, "step": 44469 }, { "epoch": 3.602559948152949, "grad_norm": 0.07637574523687363, "learning_rate": 2.208020162923624e-05, "loss": 0.2466, "step": 44470 }, { "epoch": 3.602640959170447, "grad_norm": 0.06692693382501602, "learning_rate": 2.2075700976641615e-05, "loss": 0.2339, "step": 44471 }, { "epoch": 3.602721970187946, "grad_norm": 0.06131626293063164, "learning_rate": 2.2071200324046985e-05, "loss": 0.2251, "step": 44472 }, { "epoch": 3.602802981205444, "grad_norm": 0.06866336613893509, "learning_rate": 2.2066699671452362e-05, "loss": 0.2066, "step": 44473 }, { "epoch": 3.6028839922229423, "grad_norm": 0.05573740229010582, "learning_rate": 2.2062199018857736e-05, "loss": 0.193, "step": 44474 }, { "epoch": 3.6029650032404406, "grad_norm": 0.06605105847120285, "learning_rate": 2.205769836626311e-05, "loss": 0.2499, "step": 44475 }, { "epoch": 3.6030460142579392, "grad_norm": 0.06071867421269417, "learning_rate": 2.2053197713668483e-05, "loss": 0.2128, "step": 44476 }, { "epoch": 3.6031270252754375, "grad_norm": 0.06595513224601746, "learning_rate": 2.2048697061073857e-05, "loss": 0.1721, "step": 44477 }, { "epoch": 3.6032080362929357, "grad_norm": 0.07105343788862228, "learning_rate": 2.204419640847923e-05, "loss": 0.2182, "step": 44478 }, { "epoch": 3.603289047310434, "grad_norm": 0.06703602522611618, "learning_rate": 2.2039695755884604e-05, "loss": 0.2671, "step": 44479 }, { "epoch": 3.6033700583279327, "grad_norm": 0.06646741926670074, "learning_rate": 2.2035195103289978e-05, "loss": 0.1988, "step": 44480 }, { "epoch": 3.603451069345431, "grad_norm": 0.07492316514253616, "learning_rate": 2.203069445069535e-05, "loss": 0.2097, "step": 44481 }, { "epoch": 3.603532080362929, "grad_norm": 0.08570502698421478, "learning_rate": 2.2026193798100725e-05, "loss": 0.2202, "step": 44482 }, { "epoch": 3.603613091380428, "grad_norm": 0.07124827057123184, "learning_rate": 2.2021693145506102e-05, "loss": 0.2481, "step": 44483 }, { "epoch": 3.603694102397926, "grad_norm": 0.0758836641907692, "learning_rate": 2.2017192492911472e-05, "loss": 0.1894, "step": 44484 }, { "epoch": 3.6037751134154243, "grad_norm": 0.07663784176111221, "learning_rate": 2.2012691840316846e-05, "loss": 0.253, "step": 44485 }, { "epoch": 3.603856124432923, "grad_norm": 0.07608118653297424, "learning_rate": 2.2008191187722223e-05, "loss": 0.1728, "step": 44486 }, { "epoch": 3.6039371354504213, "grad_norm": 0.08834954351186752, "learning_rate": 2.2003690535127593e-05, "loss": 0.2255, "step": 44487 }, { "epoch": 3.6040181464679195, "grad_norm": 0.09142038226127625, "learning_rate": 2.199918988253297e-05, "loss": 0.2572, "step": 44488 }, { "epoch": 3.604099157485418, "grad_norm": 0.07551602274179459, "learning_rate": 2.1994689229938343e-05, "loss": 0.2013, "step": 44489 }, { "epoch": 3.6041801685029164, "grad_norm": 0.07208868861198425, "learning_rate": 2.1990188577343714e-05, "loss": 0.2482, "step": 44490 }, { "epoch": 3.6042611795204147, "grad_norm": 0.0773419663310051, "learning_rate": 2.198568792474909e-05, "loss": 0.2167, "step": 44491 }, { "epoch": 3.6043421905379134, "grad_norm": 0.0722963884472847, "learning_rate": 2.1981187272154464e-05, "loss": 0.2747, "step": 44492 }, { "epoch": 3.6044232015554116, "grad_norm": 0.08947137743234634, "learning_rate": 2.1976686619559834e-05, "loss": 0.1939, "step": 44493 }, { "epoch": 3.60450421257291, "grad_norm": 0.07143185287714005, "learning_rate": 2.197218596696521e-05, "loss": 0.2449, "step": 44494 }, { "epoch": 3.6045852235904086, "grad_norm": 0.08382023870944977, "learning_rate": 2.1967685314370585e-05, "loss": 0.2386, "step": 44495 }, { "epoch": 3.604666234607907, "grad_norm": 0.07039798051118851, "learning_rate": 2.196318466177596e-05, "loss": 0.2348, "step": 44496 }, { "epoch": 3.604747245625405, "grad_norm": 0.08140499144792557, "learning_rate": 2.1958684009181332e-05, "loss": 0.2135, "step": 44497 }, { "epoch": 3.6048282566429033, "grad_norm": 0.0793289989233017, "learning_rate": 2.1954183356586706e-05, "loss": 0.2307, "step": 44498 }, { "epoch": 3.6049092676604015, "grad_norm": 0.066397525370121, "learning_rate": 2.194968270399208e-05, "loss": 0.2205, "step": 44499 }, { "epoch": 3.6049902786779002, "grad_norm": 0.07063345611095428, "learning_rate": 2.1945182051397453e-05, "loss": 0.2415, "step": 44500 }, { "epoch": 3.6050712896953985, "grad_norm": 0.07360769063234329, "learning_rate": 2.194068139880283e-05, "loss": 0.2323, "step": 44501 }, { "epoch": 3.6051523007128967, "grad_norm": 0.0656171664595604, "learning_rate": 2.19361807462082e-05, "loss": 0.2111, "step": 44502 }, { "epoch": 3.6052333117303954, "grad_norm": 0.08094649016857147, "learning_rate": 2.1931680093613574e-05, "loss": 0.223, "step": 44503 }, { "epoch": 3.6053143227478937, "grad_norm": 0.0766524225473404, "learning_rate": 2.192717944101895e-05, "loss": 0.2723, "step": 44504 }, { "epoch": 3.605395333765392, "grad_norm": 0.057995811104774475, "learning_rate": 2.192267878842432e-05, "loss": 0.2477, "step": 44505 }, { "epoch": 3.6054763447828906, "grad_norm": 0.07008778303861618, "learning_rate": 2.1918178135829695e-05, "loss": 0.2284, "step": 44506 }, { "epoch": 3.605557355800389, "grad_norm": 0.06211728975176811, "learning_rate": 2.191367748323507e-05, "loss": 0.2335, "step": 44507 }, { "epoch": 3.605638366817887, "grad_norm": 0.08289367705583572, "learning_rate": 2.1909176830640442e-05, "loss": 0.2493, "step": 44508 }, { "epoch": 3.6057193778353858, "grad_norm": 0.06763249635696411, "learning_rate": 2.190467617804582e-05, "loss": 0.194, "step": 44509 }, { "epoch": 3.605800388852884, "grad_norm": 0.07699555903673172, "learning_rate": 2.1900175525451192e-05, "loss": 0.2236, "step": 44510 }, { "epoch": 3.6058813998703823, "grad_norm": 0.07511116564273834, "learning_rate": 2.1895674872856563e-05, "loss": 0.2775, "step": 44511 }, { "epoch": 3.605962410887881, "grad_norm": 0.07855696231126785, "learning_rate": 2.189117422026194e-05, "loss": 0.2078, "step": 44512 }, { "epoch": 3.606043421905379, "grad_norm": 0.07785475254058838, "learning_rate": 2.1886673567667313e-05, "loss": 0.2025, "step": 44513 }, { "epoch": 3.6061244329228774, "grad_norm": 0.050321418792009354, "learning_rate": 2.1882172915072687e-05, "loss": 0.1905, "step": 44514 }, { "epoch": 3.606205443940376, "grad_norm": 0.07907307147979736, "learning_rate": 2.187767226247806e-05, "loss": 0.2162, "step": 44515 }, { "epoch": 3.6062864549578744, "grad_norm": 0.08303670585155487, "learning_rate": 2.1873171609883434e-05, "loss": 0.2084, "step": 44516 }, { "epoch": 3.6063674659753726, "grad_norm": 0.07435812801122665, "learning_rate": 2.1868670957288808e-05, "loss": 0.2426, "step": 44517 }, { "epoch": 3.6064484769928713, "grad_norm": 0.06999189406633377, "learning_rate": 2.186417030469418e-05, "loss": 0.2199, "step": 44518 }, { "epoch": 3.6065294880103695, "grad_norm": 0.07246110588312149, "learning_rate": 2.1859669652099555e-05, "loss": 0.2025, "step": 44519 }, { "epoch": 3.606610499027868, "grad_norm": 0.06282434612512589, "learning_rate": 2.185516899950493e-05, "loss": 0.1905, "step": 44520 }, { "epoch": 3.606691510045366, "grad_norm": 0.08029942214488983, "learning_rate": 2.1850668346910302e-05, "loss": 0.248, "step": 44521 }, { "epoch": 3.6067725210628643, "grad_norm": 0.06799287348985672, "learning_rate": 2.184616769431568e-05, "loss": 0.2039, "step": 44522 }, { "epoch": 3.606853532080363, "grad_norm": 0.06617872416973114, "learning_rate": 2.184166704172105e-05, "loss": 0.2185, "step": 44523 }, { "epoch": 3.606934543097861, "grad_norm": 0.06510534137487411, "learning_rate": 2.1837166389126423e-05, "loss": 0.2132, "step": 44524 }, { "epoch": 3.6070155541153595, "grad_norm": 0.08161192387342453, "learning_rate": 2.18326657365318e-05, "loss": 0.2299, "step": 44525 }, { "epoch": 3.607096565132858, "grad_norm": 0.0706726610660553, "learning_rate": 2.182816508393717e-05, "loss": 0.2386, "step": 44526 }, { "epoch": 3.6071775761503564, "grad_norm": 0.08002948760986328, "learning_rate": 2.1823664431342547e-05, "loss": 0.2566, "step": 44527 }, { "epoch": 3.6072585871678546, "grad_norm": 0.0903329849243164, "learning_rate": 2.181916377874792e-05, "loss": 0.2025, "step": 44528 }, { "epoch": 3.6073395981853533, "grad_norm": 0.0838218405842781, "learning_rate": 2.181466312615329e-05, "loss": 0.2238, "step": 44529 }, { "epoch": 3.6074206092028516, "grad_norm": 0.05963883921504021, "learning_rate": 2.1810162473558668e-05, "loss": 0.2197, "step": 44530 }, { "epoch": 3.60750162022035, "grad_norm": 0.06153491139411926, "learning_rate": 2.180566182096404e-05, "loss": 0.2145, "step": 44531 }, { "epoch": 3.6075826312378485, "grad_norm": 0.09146939218044281, "learning_rate": 2.1801161168369412e-05, "loss": 0.1886, "step": 44532 }, { "epoch": 3.6076636422553467, "grad_norm": 0.0709371566772461, "learning_rate": 2.179666051577479e-05, "loss": 0.2091, "step": 44533 }, { "epoch": 3.607744653272845, "grad_norm": 0.0718485489487648, "learning_rate": 2.1792159863180162e-05, "loss": 0.1836, "step": 44534 }, { "epoch": 3.6078256642903437, "grad_norm": 0.07745910435914993, "learning_rate": 2.1787659210585536e-05, "loss": 0.2554, "step": 44535 }, { "epoch": 3.607906675307842, "grad_norm": 0.0641883909702301, "learning_rate": 2.178315855799091e-05, "loss": 0.2313, "step": 44536 }, { "epoch": 3.60798768632534, "grad_norm": 0.07381313294172287, "learning_rate": 2.1778657905396283e-05, "loss": 0.2247, "step": 44537 }, { "epoch": 3.608068697342839, "grad_norm": 0.08087126165628433, "learning_rate": 2.1774157252801657e-05, "loss": 0.262, "step": 44538 }, { "epoch": 3.608149708360337, "grad_norm": 0.08404241502285004, "learning_rate": 2.176965660020703e-05, "loss": 0.2329, "step": 44539 }, { "epoch": 3.6082307193778353, "grad_norm": 0.06949184834957123, "learning_rate": 2.1765155947612407e-05, "loss": 0.2318, "step": 44540 }, { "epoch": 3.608311730395334, "grad_norm": 0.05711599066853523, "learning_rate": 2.1760655295017778e-05, "loss": 0.2243, "step": 44541 }, { "epoch": 3.6083927414128323, "grad_norm": 0.06719649583101273, "learning_rate": 2.175615464242315e-05, "loss": 0.2183, "step": 44542 }, { "epoch": 3.6084737524303305, "grad_norm": 0.07708943635225296, "learning_rate": 2.1751653989828528e-05, "loss": 0.2206, "step": 44543 }, { "epoch": 3.6085547634478288, "grad_norm": 0.06825980544090271, "learning_rate": 2.17471533372339e-05, "loss": 0.2231, "step": 44544 }, { "epoch": 3.608635774465327, "grad_norm": 0.06791453808546066, "learning_rate": 2.1742652684639272e-05, "loss": 0.2441, "step": 44545 }, { "epoch": 3.6087167854828257, "grad_norm": 0.06769877672195435, "learning_rate": 2.173815203204465e-05, "loss": 0.2064, "step": 44546 }, { "epoch": 3.608797796500324, "grad_norm": 0.091277115046978, "learning_rate": 2.173365137945002e-05, "loss": 0.2503, "step": 44547 }, { "epoch": 3.608878807517822, "grad_norm": 0.08346903324127197, "learning_rate": 2.1729150726855396e-05, "loss": 0.2338, "step": 44548 }, { "epoch": 3.608959818535321, "grad_norm": 0.07855582237243652, "learning_rate": 2.172465007426077e-05, "loss": 0.2353, "step": 44549 }, { "epoch": 3.609040829552819, "grad_norm": 0.07885964959859848, "learning_rate": 2.172014942166614e-05, "loss": 0.2253, "step": 44550 }, { "epoch": 3.6091218405703174, "grad_norm": 0.08205586671829224, "learning_rate": 2.1715648769071517e-05, "loss": 0.2436, "step": 44551 }, { "epoch": 3.609202851587816, "grad_norm": 0.07758751511573792, "learning_rate": 2.171114811647689e-05, "loss": 0.2608, "step": 44552 }, { "epoch": 3.6092838626053143, "grad_norm": 0.08244801312685013, "learning_rate": 2.1706647463882264e-05, "loss": 0.2469, "step": 44553 }, { "epoch": 3.6093648736228126, "grad_norm": 0.07393813878297806, "learning_rate": 2.1702146811287638e-05, "loss": 0.2355, "step": 44554 }, { "epoch": 3.6094458846403112, "grad_norm": 0.07184799015522003, "learning_rate": 2.169764615869301e-05, "loss": 0.2044, "step": 44555 }, { "epoch": 3.6095268956578095, "grad_norm": 0.08496291935443878, "learning_rate": 2.1693145506098385e-05, "loss": 0.2218, "step": 44556 }, { "epoch": 3.6096079066753077, "grad_norm": 0.06793703883886337, "learning_rate": 2.168864485350376e-05, "loss": 0.2193, "step": 44557 }, { "epoch": 3.6096889176928064, "grad_norm": 0.07540687918663025, "learning_rate": 2.1684144200909132e-05, "loss": 0.2394, "step": 44558 }, { "epoch": 3.6097699287103047, "grad_norm": 0.08064840734004974, "learning_rate": 2.1679643548314506e-05, "loss": 0.2562, "step": 44559 }, { "epoch": 3.609850939727803, "grad_norm": 0.07052040100097656, "learning_rate": 2.167514289571988e-05, "loss": 0.2064, "step": 44560 }, { "epoch": 3.6099319507453016, "grad_norm": 0.07243770360946655, "learning_rate": 2.1670642243125256e-05, "loss": 0.2232, "step": 44561 }, { "epoch": 3.6100129617628, "grad_norm": 0.08791134506464005, "learning_rate": 2.1666141590530627e-05, "loss": 0.2072, "step": 44562 }, { "epoch": 3.610093972780298, "grad_norm": 0.07799384742975235, "learning_rate": 2.1661640937936e-05, "loss": 0.2253, "step": 44563 }, { "epoch": 3.6101749837977968, "grad_norm": 0.07295829802751541, "learning_rate": 2.1657140285341377e-05, "loss": 0.2282, "step": 44564 }, { "epoch": 3.610255994815295, "grad_norm": 0.06831017136573792, "learning_rate": 2.1652639632746747e-05, "loss": 0.1993, "step": 44565 }, { "epoch": 3.6103370058327933, "grad_norm": 0.07613605260848999, "learning_rate": 2.164813898015212e-05, "loss": 0.1975, "step": 44566 }, { "epoch": 3.6104180168502915, "grad_norm": 0.06792711466550827, "learning_rate": 2.1643638327557498e-05, "loss": 0.2095, "step": 44567 }, { "epoch": 3.6104990278677898, "grad_norm": 0.06990785151720047, "learning_rate": 2.163913767496287e-05, "loss": 0.2229, "step": 44568 }, { "epoch": 3.6105800388852884, "grad_norm": 0.06692981719970703, "learning_rate": 2.1634637022368245e-05, "loss": 0.194, "step": 44569 }, { "epoch": 3.6106610499027867, "grad_norm": 0.061009567230939865, "learning_rate": 2.163013636977362e-05, "loss": 0.2055, "step": 44570 }, { "epoch": 3.610742060920285, "grad_norm": 0.08093198388814926, "learning_rate": 2.1625635717178992e-05, "loss": 0.2446, "step": 44571 }, { "epoch": 3.6108230719377836, "grad_norm": 0.07283247262239456, "learning_rate": 2.1621135064584366e-05, "loss": 0.2425, "step": 44572 }, { "epoch": 3.610904082955282, "grad_norm": 0.07472264766693115, "learning_rate": 2.161663441198974e-05, "loss": 0.2406, "step": 44573 }, { "epoch": 3.61098509397278, "grad_norm": 0.06830692291259766, "learning_rate": 2.1612133759395113e-05, "loss": 0.2285, "step": 44574 }, { "epoch": 3.611066104990279, "grad_norm": 0.07120934873819351, "learning_rate": 2.1607633106800487e-05, "loss": 0.2393, "step": 44575 }, { "epoch": 3.611147116007777, "grad_norm": 0.07623013108968735, "learning_rate": 2.160313245420586e-05, "loss": 0.2172, "step": 44576 }, { "epoch": 3.6112281270252753, "grad_norm": 0.06323181092739105, "learning_rate": 2.1598631801611234e-05, "loss": 0.2188, "step": 44577 }, { "epoch": 3.611309138042774, "grad_norm": 0.07442563772201538, "learning_rate": 2.1594131149016608e-05, "loss": 0.2233, "step": 44578 }, { "epoch": 3.6113901490602722, "grad_norm": 0.07695084065198898, "learning_rate": 2.158963049642198e-05, "loss": 0.2235, "step": 44579 }, { "epoch": 3.6114711600777705, "grad_norm": 0.04874782636761665, "learning_rate": 2.1585129843827355e-05, "loss": 0.2131, "step": 44580 }, { "epoch": 3.611552171095269, "grad_norm": 0.05301962047815323, "learning_rate": 2.158062919123273e-05, "loss": 0.229, "step": 44581 }, { "epoch": 3.6116331821127674, "grad_norm": 0.07020232826471329, "learning_rate": 2.1576128538638105e-05, "loss": 0.224, "step": 44582 }, { "epoch": 3.6117141931302656, "grad_norm": 0.06658996641635895, "learning_rate": 2.1571627886043476e-05, "loss": 0.2266, "step": 44583 }, { "epoch": 3.6117952041477643, "grad_norm": 0.059480443596839905, "learning_rate": 2.156712723344885e-05, "loss": 0.1787, "step": 44584 }, { "epoch": 3.6118762151652626, "grad_norm": 0.06132667139172554, "learning_rate": 2.1562626580854226e-05, "loss": 0.2423, "step": 44585 }, { "epoch": 3.611957226182761, "grad_norm": 0.06131211668252945, "learning_rate": 2.15581259282596e-05, "loss": 0.2182, "step": 44586 }, { "epoch": 3.612038237200259, "grad_norm": 0.07730801403522491, "learning_rate": 2.1553625275664973e-05, "loss": 0.2484, "step": 44587 }, { "epoch": 3.6121192482177578, "grad_norm": 0.07488203793764114, "learning_rate": 2.1549124623070347e-05, "loss": 0.2381, "step": 44588 }, { "epoch": 3.612200259235256, "grad_norm": 0.06727041304111481, "learning_rate": 2.154462397047572e-05, "loss": 0.1783, "step": 44589 }, { "epoch": 3.6122812702527543, "grad_norm": 0.084172323346138, "learning_rate": 2.1540123317881094e-05, "loss": 0.2574, "step": 44590 }, { "epoch": 3.6123622812702525, "grad_norm": 0.07354126125574112, "learning_rate": 2.1535622665286468e-05, "loss": 0.2057, "step": 44591 }, { "epoch": 3.612443292287751, "grad_norm": 0.08954668790102005, "learning_rate": 2.153112201269184e-05, "loss": 0.2535, "step": 44592 }, { "epoch": 3.6125243033052494, "grad_norm": 0.07390826940536499, "learning_rate": 2.1526621360097215e-05, "loss": 0.1942, "step": 44593 }, { "epoch": 3.6126053143227477, "grad_norm": 0.07131165266036987, "learning_rate": 2.152212070750259e-05, "loss": 0.2369, "step": 44594 }, { "epoch": 3.6126863253402464, "grad_norm": 0.07621818780899048, "learning_rate": 2.1517620054907962e-05, "loss": 0.2525, "step": 44595 }, { "epoch": 3.6127673363577446, "grad_norm": 0.06234192103147507, "learning_rate": 2.1513119402313336e-05, "loss": 0.2274, "step": 44596 }, { "epoch": 3.612848347375243, "grad_norm": 0.0673263743519783, "learning_rate": 2.150861874971871e-05, "loss": 0.2239, "step": 44597 }, { "epoch": 3.6129293583927415, "grad_norm": 0.08373507857322693, "learning_rate": 2.1504118097124083e-05, "loss": 0.2688, "step": 44598 }, { "epoch": 3.61301036941024, "grad_norm": 0.0701875388622284, "learning_rate": 2.1499617444529457e-05, "loss": 0.2286, "step": 44599 }, { "epoch": 3.613091380427738, "grad_norm": 0.0799219161272049, "learning_rate": 2.1495116791934834e-05, "loss": 0.2227, "step": 44600 }, { "epoch": 3.6131723914452367, "grad_norm": 0.07932981103658676, "learning_rate": 2.1490616139340204e-05, "loss": 0.2621, "step": 44601 }, { "epoch": 3.613253402462735, "grad_norm": 0.07419607788324356, "learning_rate": 2.1486115486745578e-05, "loss": 0.2149, "step": 44602 }, { "epoch": 3.613334413480233, "grad_norm": 0.0710701197385788, "learning_rate": 2.1481614834150955e-05, "loss": 0.2112, "step": 44603 }, { "epoch": 3.613415424497732, "grad_norm": 0.0597371831536293, "learning_rate": 2.1477114181556328e-05, "loss": 0.1734, "step": 44604 }, { "epoch": 3.61349643551523, "grad_norm": 0.07174075394868851, "learning_rate": 2.14726135289617e-05, "loss": 0.1975, "step": 44605 }, { "epoch": 3.6135774465327284, "grad_norm": 0.06435782462358475, "learning_rate": 2.1468112876367075e-05, "loss": 0.2274, "step": 44606 }, { "epoch": 3.613658457550227, "grad_norm": 0.09974440187215805, "learning_rate": 2.146361222377245e-05, "loss": 0.3172, "step": 44607 }, { "epoch": 3.6137394685677253, "grad_norm": 0.07410252839326859, "learning_rate": 2.1459111571177823e-05, "loss": 0.2305, "step": 44608 }, { "epoch": 3.6138204795852236, "grad_norm": 0.06656160205602646, "learning_rate": 2.1454610918583196e-05, "loss": 0.2305, "step": 44609 }, { "epoch": 3.613901490602722, "grad_norm": 0.07200726866722107, "learning_rate": 2.145011026598857e-05, "loss": 0.2414, "step": 44610 }, { "epoch": 3.6139825016202205, "grad_norm": 0.0850789025425911, "learning_rate": 2.1445609613393943e-05, "loss": 0.2371, "step": 44611 }, { "epoch": 3.6140635126377187, "grad_norm": 0.08625289052724838, "learning_rate": 2.1441108960799317e-05, "loss": 0.2245, "step": 44612 }, { "epoch": 3.614144523655217, "grad_norm": 0.07585910707712173, "learning_rate": 2.143660830820469e-05, "loss": 0.2979, "step": 44613 }, { "epoch": 3.6142255346727152, "grad_norm": 0.07107369601726532, "learning_rate": 2.1432107655610064e-05, "loss": 0.1882, "step": 44614 }, { "epoch": 3.614306545690214, "grad_norm": 0.07323051989078522, "learning_rate": 2.1427607003015438e-05, "loss": 0.2188, "step": 44615 }, { "epoch": 3.614387556707712, "grad_norm": 0.07132337987422943, "learning_rate": 2.142310635042081e-05, "loss": 0.2036, "step": 44616 }, { "epoch": 3.6144685677252104, "grad_norm": 0.07494372874498367, "learning_rate": 2.1418605697826185e-05, "loss": 0.2321, "step": 44617 }, { "epoch": 3.614549578742709, "grad_norm": 0.08323635160923004, "learning_rate": 2.141410504523156e-05, "loss": 0.2297, "step": 44618 }, { "epoch": 3.6146305897602073, "grad_norm": 0.061273686587810516, "learning_rate": 2.1409604392636932e-05, "loss": 0.2212, "step": 44619 }, { "epoch": 3.6147116007777056, "grad_norm": 0.09287270903587341, "learning_rate": 2.1405103740042306e-05, "loss": 0.2586, "step": 44620 }, { "epoch": 3.6147926117952043, "grad_norm": 0.07976991683244705, "learning_rate": 2.1400603087447683e-05, "loss": 0.2058, "step": 44621 }, { "epoch": 3.6148736228127025, "grad_norm": 0.06702376157045364, "learning_rate": 2.1396102434853056e-05, "loss": 0.2451, "step": 44622 }, { "epoch": 3.6149546338302008, "grad_norm": 0.08406377583742142, "learning_rate": 2.1391601782258427e-05, "loss": 0.2201, "step": 44623 }, { "epoch": 3.6150356448476995, "grad_norm": 0.06391263008117676, "learning_rate": 2.1387101129663804e-05, "loss": 0.2116, "step": 44624 }, { "epoch": 3.6151166558651977, "grad_norm": 0.07209109514951706, "learning_rate": 2.1382600477069177e-05, "loss": 0.2178, "step": 44625 }, { "epoch": 3.615197666882696, "grad_norm": 0.05859759449958801, "learning_rate": 2.137809982447455e-05, "loss": 0.1891, "step": 44626 }, { "epoch": 3.6152786779001946, "grad_norm": 0.06623557209968567, "learning_rate": 2.1373599171879924e-05, "loss": 0.2343, "step": 44627 }, { "epoch": 3.615359688917693, "grad_norm": 0.08955825120210648, "learning_rate": 2.1369098519285298e-05, "loss": 0.269, "step": 44628 }, { "epoch": 3.615440699935191, "grad_norm": 0.08716186881065369, "learning_rate": 2.136459786669067e-05, "loss": 0.2382, "step": 44629 }, { "epoch": 3.61552171095269, "grad_norm": 0.06881950795650482, "learning_rate": 2.1360097214096045e-05, "loss": 0.2266, "step": 44630 }, { "epoch": 3.615602721970188, "grad_norm": 0.06224439665675163, "learning_rate": 2.135559656150142e-05, "loss": 0.1997, "step": 44631 }, { "epoch": 3.6156837329876863, "grad_norm": 0.06367988884449005, "learning_rate": 2.1351095908906792e-05, "loss": 0.2104, "step": 44632 }, { "epoch": 3.6157647440051845, "grad_norm": 0.07206551730632782, "learning_rate": 2.1346595256312166e-05, "loss": 0.2131, "step": 44633 }, { "epoch": 3.6158457550226832, "grad_norm": 0.08080728352069855, "learning_rate": 2.134209460371754e-05, "loss": 0.2064, "step": 44634 }, { "epoch": 3.6159267660401815, "grad_norm": 0.09997903555631638, "learning_rate": 2.1337593951122913e-05, "loss": 0.2489, "step": 44635 }, { "epoch": 3.6160077770576797, "grad_norm": 0.07835156470537186, "learning_rate": 2.1333093298528287e-05, "loss": 0.2242, "step": 44636 }, { "epoch": 3.616088788075178, "grad_norm": 0.06274702399969101, "learning_rate": 2.1328592645933664e-05, "loss": 0.243, "step": 44637 }, { "epoch": 3.6161697990926767, "grad_norm": 0.08270495384931564, "learning_rate": 2.1324091993339034e-05, "loss": 0.2314, "step": 44638 }, { "epoch": 3.616250810110175, "grad_norm": 0.0683775395154953, "learning_rate": 2.131959134074441e-05, "loss": 0.1944, "step": 44639 }, { "epoch": 3.616331821127673, "grad_norm": 0.08411401510238647, "learning_rate": 2.1315090688149785e-05, "loss": 0.2662, "step": 44640 }, { "epoch": 3.616412832145172, "grad_norm": 0.07783766090869904, "learning_rate": 2.1310590035555155e-05, "loss": 0.239, "step": 44641 }, { "epoch": 3.61649384316267, "grad_norm": 0.06915144622325897, "learning_rate": 2.1306089382960532e-05, "loss": 0.2263, "step": 44642 }, { "epoch": 3.6165748541801683, "grad_norm": 0.06283178925514221, "learning_rate": 2.1301588730365905e-05, "loss": 0.2191, "step": 44643 }, { "epoch": 3.616655865197667, "grad_norm": 0.07214689999818802, "learning_rate": 2.1297088077771276e-05, "loss": 0.2167, "step": 44644 }, { "epoch": 3.6167368762151653, "grad_norm": 0.07811303436756134, "learning_rate": 2.1292587425176653e-05, "loss": 0.228, "step": 44645 }, { "epoch": 3.6168178872326635, "grad_norm": 0.06575002521276474, "learning_rate": 2.1288086772582026e-05, "loss": 0.2228, "step": 44646 }, { "epoch": 3.616898898250162, "grad_norm": 0.0618891641497612, "learning_rate": 2.12835861199874e-05, "loss": 0.2402, "step": 44647 }, { "epoch": 3.6169799092676604, "grad_norm": 0.06803794205188751, "learning_rate": 2.1279085467392773e-05, "loss": 0.2241, "step": 44648 }, { "epoch": 3.6170609202851587, "grad_norm": 0.09507738053798676, "learning_rate": 2.1274584814798147e-05, "loss": 0.2172, "step": 44649 }, { "epoch": 3.6171419313026574, "grad_norm": 0.07402309775352478, "learning_rate": 2.127008416220352e-05, "loss": 0.2451, "step": 44650 }, { "epoch": 3.6172229423201556, "grad_norm": 0.0824049636721611, "learning_rate": 2.1265583509608894e-05, "loss": 0.2152, "step": 44651 }, { "epoch": 3.617303953337654, "grad_norm": 0.0745280459523201, "learning_rate": 2.1261082857014268e-05, "loss": 0.2252, "step": 44652 }, { "epoch": 3.6173849643551526, "grad_norm": 0.10456760972738266, "learning_rate": 2.125658220441964e-05, "loss": 0.2137, "step": 44653 }, { "epoch": 3.617465975372651, "grad_norm": 0.08100705593824387, "learning_rate": 2.1252081551825015e-05, "loss": 0.246, "step": 44654 }, { "epoch": 3.617546986390149, "grad_norm": 0.07619575411081314, "learning_rate": 2.1247580899230392e-05, "loss": 0.2351, "step": 44655 }, { "epoch": 3.6176279974076473, "grad_norm": 0.06565256416797638, "learning_rate": 2.1243080246635762e-05, "loss": 0.2236, "step": 44656 }, { "epoch": 3.617709008425146, "grad_norm": 0.07464781403541565, "learning_rate": 2.1238579594041136e-05, "loss": 0.2282, "step": 44657 }, { "epoch": 3.6177900194426442, "grad_norm": 0.07250156253576279, "learning_rate": 2.1234078941446513e-05, "loss": 0.2558, "step": 44658 }, { "epoch": 3.6178710304601425, "grad_norm": 0.07820381969213486, "learning_rate": 2.1229578288851883e-05, "loss": 0.2276, "step": 44659 }, { "epoch": 3.6179520414776407, "grad_norm": 0.07306982576847076, "learning_rate": 2.122507763625726e-05, "loss": 0.2289, "step": 44660 }, { "epoch": 3.6180330524951394, "grad_norm": 0.08155608177185059, "learning_rate": 2.1220576983662634e-05, "loss": 0.2199, "step": 44661 }, { "epoch": 3.6181140635126376, "grad_norm": 0.062416404485702515, "learning_rate": 2.1216076331068004e-05, "loss": 0.2089, "step": 44662 }, { "epoch": 3.618195074530136, "grad_norm": 0.09483791142702103, "learning_rate": 2.121157567847338e-05, "loss": 0.2483, "step": 44663 }, { "epoch": 3.6182760855476346, "grad_norm": 0.07709171622991562, "learning_rate": 2.1207075025878754e-05, "loss": 0.1957, "step": 44664 }, { "epoch": 3.618357096565133, "grad_norm": 0.06335264444351196, "learning_rate": 2.1202574373284125e-05, "loss": 0.2168, "step": 44665 }, { "epoch": 3.618438107582631, "grad_norm": 0.07865205407142639, "learning_rate": 2.11980737206895e-05, "loss": 0.2134, "step": 44666 }, { "epoch": 3.6185191186001298, "grad_norm": 0.09624006599187851, "learning_rate": 2.1193573068094875e-05, "loss": 0.2437, "step": 44667 }, { "epoch": 3.618600129617628, "grad_norm": 0.07597095519304276, "learning_rate": 2.118907241550025e-05, "loss": 0.2212, "step": 44668 }, { "epoch": 3.6186811406351262, "grad_norm": 0.06700621545314789, "learning_rate": 2.1184571762905622e-05, "loss": 0.2013, "step": 44669 }, { "epoch": 3.618762151652625, "grad_norm": 0.07764141261577606, "learning_rate": 2.1180071110310996e-05, "loss": 0.2652, "step": 44670 }, { "epoch": 3.618843162670123, "grad_norm": 0.08569448441267014, "learning_rate": 2.117557045771637e-05, "loss": 0.2149, "step": 44671 }, { "epoch": 3.6189241736876214, "grad_norm": 0.08548179268836975, "learning_rate": 2.1171069805121743e-05, "loss": 0.2404, "step": 44672 }, { "epoch": 3.61900518470512, "grad_norm": 0.06488697230815887, "learning_rate": 2.116656915252712e-05, "loss": 0.2117, "step": 44673 }, { "epoch": 3.6190861957226184, "grad_norm": 0.06554609537124634, "learning_rate": 2.116206849993249e-05, "loss": 0.2499, "step": 44674 }, { "epoch": 3.6191672067401166, "grad_norm": 0.05791258439421654, "learning_rate": 2.1157567847337864e-05, "loss": 0.2058, "step": 44675 }, { "epoch": 3.6192482177576153, "grad_norm": 0.06734529137611389, "learning_rate": 2.115306719474324e-05, "loss": 0.2256, "step": 44676 }, { "epoch": 3.6193292287751135, "grad_norm": 0.06773965060710907, "learning_rate": 2.114856654214861e-05, "loss": 0.2161, "step": 44677 }, { "epoch": 3.619410239792612, "grad_norm": 0.06876647472381592, "learning_rate": 2.1144065889553985e-05, "loss": 0.2414, "step": 44678 }, { "epoch": 3.61949125081011, "grad_norm": 0.06989677995443344, "learning_rate": 2.1139565236959362e-05, "loss": 0.2746, "step": 44679 }, { "epoch": 3.6195722618276087, "grad_norm": 0.06485997885465622, "learning_rate": 2.1135064584364732e-05, "loss": 0.1826, "step": 44680 }, { "epoch": 3.619653272845107, "grad_norm": 0.0713554248213768, "learning_rate": 2.113056393177011e-05, "loss": 0.2168, "step": 44681 }, { "epoch": 3.619734283862605, "grad_norm": 0.09202663600444794, "learning_rate": 2.1126063279175483e-05, "loss": 0.2209, "step": 44682 }, { "epoch": 3.6198152948801035, "grad_norm": 0.06724567711353302, "learning_rate": 2.1121562626580853e-05, "loss": 0.1997, "step": 44683 }, { "epoch": 3.619896305897602, "grad_norm": 0.0922047421336174, "learning_rate": 2.111706197398623e-05, "loss": 0.2221, "step": 44684 }, { "epoch": 3.6199773169151004, "grad_norm": 0.053619880229234695, "learning_rate": 2.1112561321391604e-05, "loss": 0.2036, "step": 44685 }, { "epoch": 3.6200583279325986, "grad_norm": 0.07134517282247543, "learning_rate": 2.1108060668796977e-05, "loss": 0.2364, "step": 44686 }, { "epoch": 3.6201393389500973, "grad_norm": 0.07642362266778946, "learning_rate": 2.110356001620235e-05, "loss": 0.2016, "step": 44687 }, { "epoch": 3.6202203499675956, "grad_norm": 0.07684290409088135, "learning_rate": 2.1099059363607724e-05, "loss": 0.2285, "step": 44688 }, { "epoch": 3.620301360985094, "grad_norm": 0.08038019388914108, "learning_rate": 2.1094558711013098e-05, "loss": 0.241, "step": 44689 }, { "epoch": 3.6203823720025925, "grad_norm": 0.06323223561048508, "learning_rate": 2.109005805841847e-05, "loss": 0.1871, "step": 44690 }, { "epoch": 3.6204633830200907, "grad_norm": 0.053566351532936096, "learning_rate": 2.1085557405823845e-05, "loss": 0.1933, "step": 44691 }, { "epoch": 3.620544394037589, "grad_norm": 0.0744895339012146, "learning_rate": 2.108105675322922e-05, "loss": 0.2402, "step": 44692 }, { "epoch": 3.6206254050550877, "grad_norm": 0.07990087568759918, "learning_rate": 2.1076556100634592e-05, "loss": 0.2582, "step": 44693 }, { "epoch": 3.620706416072586, "grad_norm": 0.06844060868024826, "learning_rate": 2.107205544803997e-05, "loss": 0.2262, "step": 44694 }, { "epoch": 3.620787427090084, "grad_norm": 0.058736447244882584, "learning_rate": 2.106755479544534e-05, "loss": 0.1874, "step": 44695 }, { "epoch": 3.620868438107583, "grad_norm": 0.06610717624425888, "learning_rate": 2.1063054142850713e-05, "loss": 0.1953, "step": 44696 }, { "epoch": 3.620949449125081, "grad_norm": 0.06829269230365753, "learning_rate": 2.105855349025609e-05, "loss": 0.219, "step": 44697 }, { "epoch": 3.6210304601425793, "grad_norm": 0.07212626934051514, "learning_rate": 2.105405283766146e-05, "loss": 0.2185, "step": 44698 }, { "epoch": 3.621111471160078, "grad_norm": 0.07561732083559036, "learning_rate": 2.1049552185066837e-05, "loss": 0.2205, "step": 44699 }, { "epoch": 3.6211924821775763, "grad_norm": 0.0672653540968895, "learning_rate": 2.104505153247221e-05, "loss": 0.2368, "step": 44700 }, { "epoch": 3.6212734931950745, "grad_norm": 0.07278675585985184, "learning_rate": 2.104055087987758e-05, "loss": 0.2466, "step": 44701 }, { "epoch": 3.6213545042125728, "grad_norm": 0.07003212720155716, "learning_rate": 2.1036050227282958e-05, "loss": 0.2314, "step": 44702 }, { "epoch": 3.6214355152300715, "grad_norm": 0.07009841501712799, "learning_rate": 2.1031549574688332e-05, "loss": 0.2049, "step": 44703 }, { "epoch": 3.6215165262475697, "grad_norm": 0.07427027076482773, "learning_rate": 2.1027048922093702e-05, "loss": 0.2016, "step": 44704 }, { "epoch": 3.621597537265068, "grad_norm": 0.06905297935009003, "learning_rate": 2.102254826949908e-05, "loss": 0.2347, "step": 44705 }, { "epoch": 3.621678548282566, "grad_norm": 0.06712187081575394, "learning_rate": 2.1018047616904453e-05, "loss": 0.2052, "step": 44706 }, { "epoch": 3.621759559300065, "grad_norm": 0.08001432567834854, "learning_rate": 2.1013546964309826e-05, "loss": 0.2733, "step": 44707 }, { "epoch": 3.621840570317563, "grad_norm": 0.05815906822681427, "learning_rate": 2.10090463117152e-05, "loss": 0.1974, "step": 44708 }, { "epoch": 3.6219215813350614, "grad_norm": 0.0719711035490036, "learning_rate": 2.1004545659120573e-05, "loss": 0.2166, "step": 44709 }, { "epoch": 3.62200259235256, "grad_norm": 0.07093847543001175, "learning_rate": 2.1000045006525947e-05, "loss": 0.2192, "step": 44710 }, { "epoch": 3.6220836033700583, "grad_norm": 0.07297228276729584, "learning_rate": 2.099554435393132e-05, "loss": 0.2077, "step": 44711 }, { "epoch": 3.6221646143875565, "grad_norm": 0.09079265594482422, "learning_rate": 2.0991043701336698e-05, "loss": 0.2513, "step": 44712 }, { "epoch": 3.6222456254050552, "grad_norm": 0.07077782601118088, "learning_rate": 2.0986543048742068e-05, "loss": 0.2004, "step": 44713 }, { "epoch": 3.6223266364225535, "grad_norm": 0.0672997534275055, "learning_rate": 2.098204239614744e-05, "loss": 0.2092, "step": 44714 }, { "epoch": 3.6224076474400517, "grad_norm": 0.06267137825489044, "learning_rate": 2.097754174355282e-05, "loss": 0.1994, "step": 44715 }, { "epoch": 3.6224886584575504, "grad_norm": 0.06340795010328293, "learning_rate": 2.097304109095819e-05, "loss": 0.2151, "step": 44716 }, { "epoch": 3.6225696694750487, "grad_norm": 0.06249105557799339, "learning_rate": 2.0968540438363562e-05, "loss": 0.2197, "step": 44717 }, { "epoch": 3.622650680492547, "grad_norm": 0.07884196192026138, "learning_rate": 2.096403978576894e-05, "loss": 0.2554, "step": 44718 }, { "epoch": 3.6227316915100456, "grad_norm": 0.10559750348329544, "learning_rate": 2.095953913317431e-05, "loss": 0.2637, "step": 44719 }, { "epoch": 3.622812702527544, "grad_norm": 0.06595143675804138, "learning_rate": 2.0955038480579686e-05, "loss": 0.1985, "step": 44720 }, { "epoch": 3.622893713545042, "grad_norm": 0.0710291638970375, "learning_rate": 2.095053782798506e-05, "loss": 0.2555, "step": 44721 }, { "epoch": 3.6229747245625408, "grad_norm": 0.07778099924325943, "learning_rate": 2.094603717539043e-05, "loss": 0.2483, "step": 44722 }, { "epoch": 3.623055735580039, "grad_norm": 0.08554724603891373, "learning_rate": 2.0941536522795807e-05, "loss": 0.2449, "step": 44723 }, { "epoch": 3.6231367465975373, "grad_norm": 0.08926980197429657, "learning_rate": 2.093703587020118e-05, "loss": 0.2237, "step": 44724 }, { "epoch": 3.6232177576150355, "grad_norm": 0.0684860572218895, "learning_rate": 2.0932535217606554e-05, "loss": 0.2221, "step": 44725 }, { "epoch": 3.6232987686325338, "grad_norm": 0.08324017375707626, "learning_rate": 2.0928034565011928e-05, "loss": 0.1966, "step": 44726 }, { "epoch": 3.6233797796500324, "grad_norm": 0.07645716518163681, "learning_rate": 2.09235339124173e-05, "loss": 0.222, "step": 44727 }, { "epoch": 3.6234607906675307, "grad_norm": 0.06555616855621338, "learning_rate": 2.0919033259822675e-05, "loss": 0.2671, "step": 44728 }, { "epoch": 3.623541801685029, "grad_norm": 0.07931575179100037, "learning_rate": 2.091453260722805e-05, "loss": 0.2121, "step": 44729 }, { "epoch": 3.6236228127025276, "grad_norm": 0.08958851546049118, "learning_rate": 2.0910031954633422e-05, "loss": 0.2072, "step": 44730 }, { "epoch": 3.623703823720026, "grad_norm": 0.08067791163921356, "learning_rate": 2.0905531302038796e-05, "loss": 0.2278, "step": 44731 }, { "epoch": 3.623784834737524, "grad_norm": 0.07220610231161118, "learning_rate": 2.090103064944417e-05, "loss": 0.2264, "step": 44732 }, { "epoch": 3.623865845755023, "grad_norm": 0.0624765083193779, "learning_rate": 2.0896529996849547e-05, "loss": 0.1901, "step": 44733 }, { "epoch": 3.623946856772521, "grad_norm": 0.08418891578912735, "learning_rate": 2.0892029344254917e-05, "loss": 0.2468, "step": 44734 }, { "epoch": 3.6240278677900193, "grad_norm": 0.0655638724565506, "learning_rate": 2.088752869166029e-05, "loss": 0.2259, "step": 44735 }, { "epoch": 3.624108878807518, "grad_norm": 0.08135916292667389, "learning_rate": 2.0883028039065667e-05, "loss": 0.2149, "step": 44736 }, { "epoch": 3.624189889825016, "grad_norm": 0.07866069674491882, "learning_rate": 2.0878527386471038e-05, "loss": 0.2547, "step": 44737 }, { "epoch": 3.6242709008425145, "grad_norm": 0.062272604554891586, "learning_rate": 2.087402673387641e-05, "loss": 0.2237, "step": 44738 }, { "epoch": 3.624351911860013, "grad_norm": 0.06917754560709, "learning_rate": 2.0869526081281788e-05, "loss": 0.2302, "step": 44739 }, { "epoch": 3.6244329228775114, "grad_norm": 0.08508767187595367, "learning_rate": 2.086502542868716e-05, "loss": 0.2321, "step": 44740 }, { "epoch": 3.6245139338950096, "grad_norm": 0.06781996786594391, "learning_rate": 2.0860524776092535e-05, "loss": 0.2626, "step": 44741 }, { "epoch": 3.6245949449125083, "grad_norm": 0.07191802561283112, "learning_rate": 2.085602412349791e-05, "loss": 0.2429, "step": 44742 }, { "epoch": 3.6246759559300066, "grad_norm": 0.06998195499181747, "learning_rate": 2.085152347090328e-05, "loss": 0.2048, "step": 44743 }, { "epoch": 3.624756966947505, "grad_norm": 0.06424444913864136, "learning_rate": 2.0847022818308656e-05, "loss": 0.2005, "step": 44744 }, { "epoch": 3.6248379779650035, "grad_norm": 0.06656464189291, "learning_rate": 2.084252216571403e-05, "loss": 0.2245, "step": 44745 }, { "epoch": 3.6249189889825018, "grad_norm": 0.08617588877677917, "learning_rate": 2.0838021513119403e-05, "loss": 0.2295, "step": 44746 }, { "epoch": 3.625, "grad_norm": 0.07731680572032928, "learning_rate": 2.0833520860524777e-05, "loss": 0.2244, "step": 44747 }, { "epoch": 3.6250810110174982, "grad_norm": 0.06422511488199234, "learning_rate": 2.082902020793015e-05, "loss": 0.2516, "step": 44748 }, { "epoch": 3.6251620220349965, "grad_norm": 0.08240256458520889, "learning_rate": 2.0824519555335524e-05, "loss": 0.2162, "step": 44749 }, { "epoch": 3.625243033052495, "grad_norm": 0.0808911994099617, "learning_rate": 2.0820018902740898e-05, "loss": 0.2721, "step": 44750 }, { "epoch": 3.6253240440699934, "grad_norm": 0.06711307168006897, "learning_rate": 2.081551825014627e-05, "loss": 0.2269, "step": 44751 }, { "epoch": 3.6254050550874917, "grad_norm": 0.06413264572620392, "learning_rate": 2.0811017597551645e-05, "loss": 0.2177, "step": 44752 }, { "epoch": 3.6254860661049904, "grad_norm": 0.06592356413602829, "learning_rate": 2.080651694495702e-05, "loss": 0.228, "step": 44753 }, { "epoch": 3.6255670771224886, "grad_norm": 0.07257553189992905, "learning_rate": 2.0802016292362396e-05, "loss": 0.2527, "step": 44754 }, { "epoch": 3.625648088139987, "grad_norm": 0.05981922522187233, "learning_rate": 2.0797515639767766e-05, "loss": 0.2331, "step": 44755 }, { "epoch": 3.6257290991574855, "grad_norm": 0.08513333648443222, "learning_rate": 2.079301498717314e-05, "loss": 0.2322, "step": 44756 }, { "epoch": 3.625810110174984, "grad_norm": 0.062274035066366196, "learning_rate": 2.0788514334578517e-05, "loss": 0.2174, "step": 44757 }, { "epoch": 3.625891121192482, "grad_norm": 0.08532512187957764, "learning_rate": 2.0784013681983887e-05, "loss": 0.2387, "step": 44758 }, { "epoch": 3.6259721322099807, "grad_norm": 0.08419835567474365, "learning_rate": 2.0779513029389264e-05, "loss": 0.2474, "step": 44759 }, { "epoch": 3.626053143227479, "grad_norm": 0.06810888648033142, "learning_rate": 2.0775012376794637e-05, "loss": 0.2138, "step": 44760 }, { "epoch": 3.626134154244977, "grad_norm": 0.07005812227725983, "learning_rate": 2.0770511724200008e-05, "loss": 0.2416, "step": 44761 }, { "epoch": 3.626215165262476, "grad_norm": 0.0587514191865921, "learning_rate": 2.0766011071605385e-05, "loss": 0.1943, "step": 44762 }, { "epoch": 3.626296176279974, "grad_norm": 0.08654726296663284, "learning_rate": 2.0761510419010758e-05, "loss": 0.2777, "step": 44763 }, { "epoch": 3.6263771872974724, "grad_norm": 0.07134860754013062, "learning_rate": 2.075700976641613e-05, "loss": 0.1994, "step": 44764 }, { "epoch": 3.626458198314971, "grad_norm": 0.07229122519493103, "learning_rate": 2.0752509113821505e-05, "loss": 0.2185, "step": 44765 }, { "epoch": 3.6265392093324693, "grad_norm": 0.07302133738994598, "learning_rate": 2.074800846122688e-05, "loss": 0.2114, "step": 44766 }, { "epoch": 3.6266202203499676, "grad_norm": 0.08091484010219574, "learning_rate": 2.0743507808632253e-05, "loss": 0.2138, "step": 44767 }, { "epoch": 3.6267012313674662, "grad_norm": 0.07142447680234909, "learning_rate": 2.0739007156037626e-05, "loss": 0.2521, "step": 44768 }, { "epoch": 3.6267822423849645, "grad_norm": 0.06590250134468079, "learning_rate": 2.0734506503443e-05, "loss": 0.2086, "step": 44769 }, { "epoch": 3.6268632534024627, "grad_norm": 0.07560182362794876, "learning_rate": 2.0730005850848373e-05, "loss": 0.2083, "step": 44770 }, { "epoch": 3.626944264419961, "grad_norm": 0.06524951756000519, "learning_rate": 2.0725505198253747e-05, "loss": 0.2077, "step": 44771 }, { "epoch": 3.6270252754374592, "grad_norm": 0.07804036140441895, "learning_rate": 2.0721004545659124e-05, "loss": 0.2133, "step": 44772 }, { "epoch": 3.627106286454958, "grad_norm": 0.06527618318796158, "learning_rate": 2.0716503893064494e-05, "loss": 0.2157, "step": 44773 }, { "epoch": 3.627187297472456, "grad_norm": 0.07573771476745605, "learning_rate": 2.0712003240469868e-05, "loss": 0.2472, "step": 44774 }, { "epoch": 3.6272683084899544, "grad_norm": 0.08255860209465027, "learning_rate": 2.0707502587875245e-05, "loss": 0.2381, "step": 44775 }, { "epoch": 3.627349319507453, "grad_norm": 0.08117613196372986, "learning_rate": 2.0703001935280615e-05, "loss": 0.2388, "step": 44776 }, { "epoch": 3.6274303305249513, "grad_norm": 0.07737639546394348, "learning_rate": 2.069850128268599e-05, "loss": 0.2194, "step": 44777 }, { "epoch": 3.6275113415424496, "grad_norm": 0.07773453742265701, "learning_rate": 2.0694000630091366e-05, "loss": 0.2374, "step": 44778 }, { "epoch": 3.6275923525599483, "grad_norm": 0.06742957979440689, "learning_rate": 2.0689499977496736e-05, "loss": 0.2675, "step": 44779 }, { "epoch": 3.6276733635774465, "grad_norm": 0.07162263989448547, "learning_rate": 2.0684999324902113e-05, "loss": 0.238, "step": 44780 }, { "epoch": 3.6277543745949448, "grad_norm": 0.07228162884712219, "learning_rate": 2.0680498672307486e-05, "loss": 0.2255, "step": 44781 }, { "epoch": 3.6278353856124435, "grad_norm": 0.0600263848900795, "learning_rate": 2.067599801971286e-05, "loss": 0.2185, "step": 44782 }, { "epoch": 3.6279163966299417, "grad_norm": 0.0768987163901329, "learning_rate": 2.0671497367118234e-05, "loss": 0.2082, "step": 44783 }, { "epoch": 3.62799740764744, "grad_norm": 0.067320816218853, "learning_rate": 2.0666996714523607e-05, "loss": 0.2136, "step": 44784 }, { "epoch": 3.6280784186649386, "grad_norm": 0.06339792907238007, "learning_rate": 2.066249606192898e-05, "loss": 0.2404, "step": 44785 }, { "epoch": 3.628159429682437, "grad_norm": 0.079399473965168, "learning_rate": 2.0657995409334354e-05, "loss": 0.245, "step": 44786 }, { "epoch": 3.628240440699935, "grad_norm": 0.08469132333993912, "learning_rate": 2.0653494756739728e-05, "loss": 0.2405, "step": 44787 }, { "epoch": 3.628321451717434, "grad_norm": 0.06696237623691559, "learning_rate": 2.06489941041451e-05, "loss": 0.2162, "step": 44788 }, { "epoch": 3.628402462734932, "grad_norm": 0.06755819171667099, "learning_rate": 2.0644493451550475e-05, "loss": 0.2242, "step": 44789 }, { "epoch": 3.6284834737524303, "grad_norm": 0.07293059676885605, "learning_rate": 2.063999279895585e-05, "loss": 0.2206, "step": 44790 }, { "epoch": 3.6285644847699285, "grad_norm": 0.07739424705505371, "learning_rate": 2.0635492146361222e-05, "loss": 0.2512, "step": 44791 }, { "epoch": 3.6286454957874272, "grad_norm": 0.061704766005277634, "learning_rate": 2.0630991493766596e-05, "loss": 0.2304, "step": 44792 }, { "epoch": 3.6287265068049255, "grad_norm": 0.089394211769104, "learning_rate": 2.0626490841171973e-05, "loss": 0.2322, "step": 44793 }, { "epoch": 3.6288075178224237, "grad_norm": 0.06811096519231796, "learning_rate": 2.0621990188577343e-05, "loss": 0.2104, "step": 44794 }, { "epoch": 3.628888528839922, "grad_norm": 0.08393049985170364, "learning_rate": 2.0617489535982717e-05, "loss": 0.2022, "step": 44795 }, { "epoch": 3.6289695398574207, "grad_norm": 0.0783284604549408, "learning_rate": 2.0612988883388094e-05, "loss": 0.2322, "step": 44796 }, { "epoch": 3.629050550874919, "grad_norm": 0.0840248391032219, "learning_rate": 2.0608488230793464e-05, "loss": 0.2561, "step": 44797 }, { "epoch": 3.629131561892417, "grad_norm": 0.07756663858890533, "learning_rate": 2.060398757819884e-05, "loss": 0.2345, "step": 44798 }, { "epoch": 3.629212572909916, "grad_norm": 0.08550877869129181, "learning_rate": 2.0599486925604215e-05, "loss": 0.241, "step": 44799 }, { "epoch": 3.629293583927414, "grad_norm": 0.0756603330373764, "learning_rate": 2.0594986273009588e-05, "loss": 0.2138, "step": 44800 }, { "epoch": 3.6293745949449123, "grad_norm": 0.07740037888288498, "learning_rate": 2.0590485620414962e-05, "loss": 0.2471, "step": 44801 }, { "epoch": 3.629455605962411, "grad_norm": 0.07371073216199875, "learning_rate": 2.0585984967820335e-05, "loss": 0.212, "step": 44802 }, { "epoch": 3.6295366169799093, "grad_norm": 0.07205826789140701, "learning_rate": 2.058148431522571e-05, "loss": 0.2403, "step": 44803 }, { "epoch": 3.6296176279974075, "grad_norm": 0.07409129291772842, "learning_rate": 2.0576983662631083e-05, "loss": 0.231, "step": 44804 }, { "epoch": 3.629698639014906, "grad_norm": 0.06722891330718994, "learning_rate": 2.0572483010036456e-05, "loss": 0.2325, "step": 44805 }, { "epoch": 3.6297796500324044, "grad_norm": 0.10076708346605301, "learning_rate": 2.056798235744183e-05, "loss": 0.2289, "step": 44806 }, { "epoch": 3.6298606610499027, "grad_norm": 0.07837366312742233, "learning_rate": 2.0563481704847203e-05, "loss": 0.2313, "step": 44807 }, { "epoch": 3.6299416720674014, "grad_norm": 0.06908228248357773, "learning_rate": 2.0558981052252577e-05, "loss": 0.1936, "step": 44808 }, { "epoch": 3.6300226830848996, "grad_norm": 0.08010173588991165, "learning_rate": 2.055448039965795e-05, "loss": 0.2201, "step": 44809 }, { "epoch": 3.630103694102398, "grad_norm": 0.0739486813545227, "learning_rate": 2.0549979747063324e-05, "loss": 0.1951, "step": 44810 }, { "epoch": 3.6301847051198965, "grad_norm": 0.07065515965223312, "learning_rate": 2.0545479094468698e-05, "loss": 0.2685, "step": 44811 }, { "epoch": 3.630265716137395, "grad_norm": 0.07847166061401367, "learning_rate": 2.054097844187407e-05, "loss": 0.2361, "step": 44812 }, { "epoch": 3.630346727154893, "grad_norm": 0.07692558318376541, "learning_rate": 2.0536477789279445e-05, "loss": 0.2746, "step": 44813 }, { "epoch": 3.6304277381723913, "grad_norm": 0.05729920044541359, "learning_rate": 2.0531977136684822e-05, "loss": 0.2212, "step": 44814 }, { "epoch": 3.63050874918989, "grad_norm": 0.07073798030614853, "learning_rate": 2.0527476484090192e-05, "loss": 0.2149, "step": 44815 }, { "epoch": 3.630589760207388, "grad_norm": 0.07140544056892395, "learning_rate": 2.0522975831495566e-05, "loss": 0.2652, "step": 44816 }, { "epoch": 3.6306707712248865, "grad_norm": 0.0805303305387497, "learning_rate": 2.0518475178900943e-05, "loss": 0.231, "step": 44817 }, { "epoch": 3.6307517822423847, "grad_norm": 0.0681585893034935, "learning_rate": 2.0513974526306316e-05, "loss": 0.2598, "step": 44818 }, { "epoch": 3.6308327932598834, "grad_norm": 0.0736590027809143, "learning_rate": 2.050947387371169e-05, "loss": 0.1995, "step": 44819 }, { "epoch": 3.6309138042773816, "grad_norm": 0.07716482877731323, "learning_rate": 2.0504973221117064e-05, "loss": 0.2254, "step": 44820 }, { "epoch": 3.63099481529488, "grad_norm": 0.08331847190856934, "learning_rate": 2.0500472568522437e-05, "loss": 0.244, "step": 44821 }, { "epoch": 3.6310758263123786, "grad_norm": 0.07626423239707947, "learning_rate": 2.049597191592781e-05, "loss": 0.2124, "step": 44822 }, { "epoch": 3.631156837329877, "grad_norm": 0.07067970186471939, "learning_rate": 2.0491471263333185e-05, "loss": 0.2237, "step": 44823 }, { "epoch": 3.631237848347375, "grad_norm": 0.06633996218442917, "learning_rate": 2.0486970610738558e-05, "loss": 0.2167, "step": 44824 }, { "epoch": 3.6313188593648738, "grad_norm": 0.06839239597320557, "learning_rate": 2.0482469958143932e-05, "loss": 0.1972, "step": 44825 }, { "epoch": 3.631399870382372, "grad_norm": 0.07643083482980728, "learning_rate": 2.0477969305549305e-05, "loss": 0.2385, "step": 44826 }, { "epoch": 3.6314808813998702, "grad_norm": 0.08145930618047714, "learning_rate": 2.047346865295468e-05, "loss": 0.2476, "step": 44827 }, { "epoch": 3.631561892417369, "grad_norm": 0.08592727780342102, "learning_rate": 2.0468968000360053e-05, "loss": 0.2525, "step": 44828 }, { "epoch": 3.631642903434867, "grad_norm": 0.06574290245771408, "learning_rate": 2.0464467347765426e-05, "loss": 0.2167, "step": 44829 }, { "epoch": 3.6317239144523654, "grad_norm": 0.08025723695755005, "learning_rate": 2.04599666951708e-05, "loss": 0.246, "step": 44830 }, { "epoch": 3.631804925469864, "grad_norm": 0.08162990212440491, "learning_rate": 2.0455466042576173e-05, "loss": 0.2437, "step": 44831 }, { "epoch": 3.6318859364873624, "grad_norm": 0.07384198904037476, "learning_rate": 2.045096538998155e-05, "loss": 0.2257, "step": 44832 }, { "epoch": 3.6319669475048606, "grad_norm": 0.07432828843593597, "learning_rate": 2.0446464737386924e-05, "loss": 0.2154, "step": 44833 }, { "epoch": 3.6320479585223593, "grad_norm": 0.0657813623547554, "learning_rate": 2.0441964084792294e-05, "loss": 0.2132, "step": 44834 }, { "epoch": 3.6321289695398575, "grad_norm": 0.06444726884365082, "learning_rate": 2.043746343219767e-05, "loss": 0.2374, "step": 44835 }, { "epoch": 3.6322099805573558, "grad_norm": 0.06306802481412888, "learning_rate": 2.0432962779603045e-05, "loss": 0.2344, "step": 44836 }, { "epoch": 3.632290991574854, "grad_norm": 0.07714951783418655, "learning_rate": 2.0428462127008415e-05, "loss": 0.2334, "step": 44837 }, { "epoch": 3.6323720025923527, "grad_norm": 0.05176575109362602, "learning_rate": 2.0423961474413792e-05, "loss": 0.2235, "step": 44838 }, { "epoch": 3.632453013609851, "grad_norm": 0.07315241545438766, "learning_rate": 2.0419460821819166e-05, "loss": 0.2138, "step": 44839 }, { "epoch": 3.632534024627349, "grad_norm": 0.0775456428527832, "learning_rate": 2.041496016922454e-05, "loss": 0.272, "step": 44840 }, { "epoch": 3.6326150356448474, "grad_norm": 0.07913167774677277, "learning_rate": 2.0410459516629913e-05, "loss": 0.2366, "step": 44841 }, { "epoch": 3.632696046662346, "grad_norm": 0.07824068516492844, "learning_rate": 2.0405958864035286e-05, "loss": 0.2362, "step": 44842 }, { "epoch": 3.6327770576798444, "grad_norm": 0.07198026031255722, "learning_rate": 2.040145821144066e-05, "loss": 0.2302, "step": 44843 }, { "epoch": 3.6328580686973426, "grad_norm": 0.058635443449020386, "learning_rate": 2.0396957558846034e-05, "loss": 0.2049, "step": 44844 }, { "epoch": 3.6329390797148413, "grad_norm": 0.06972447782754898, "learning_rate": 2.0392456906251407e-05, "loss": 0.2108, "step": 44845 }, { "epoch": 3.6330200907323396, "grad_norm": 0.0633692741394043, "learning_rate": 2.038795625365678e-05, "loss": 0.2304, "step": 44846 }, { "epoch": 3.633101101749838, "grad_norm": 0.06997337937355042, "learning_rate": 2.0383455601062154e-05, "loss": 0.2454, "step": 44847 }, { "epoch": 3.6331821127673365, "grad_norm": 0.06759506464004517, "learning_rate": 2.0378954948467528e-05, "loss": 0.2419, "step": 44848 }, { "epoch": 3.6332631237848347, "grad_norm": 0.09851627051830292, "learning_rate": 2.03744542958729e-05, "loss": 0.2466, "step": 44849 }, { "epoch": 3.633344134802333, "grad_norm": 0.06574554741382599, "learning_rate": 2.0369953643278275e-05, "loss": 0.2095, "step": 44850 }, { "epoch": 3.6334251458198317, "grad_norm": 0.07231997698545456, "learning_rate": 2.0365452990683652e-05, "loss": 0.2424, "step": 44851 }, { "epoch": 3.63350615683733, "grad_norm": 0.07679183781147003, "learning_rate": 2.0360952338089022e-05, "loss": 0.2449, "step": 44852 }, { "epoch": 3.633587167854828, "grad_norm": 0.07032614201307297, "learning_rate": 2.03564516854944e-05, "loss": 0.2703, "step": 44853 }, { "epoch": 3.633668178872327, "grad_norm": 0.07566996663808823, "learning_rate": 2.0351951032899773e-05, "loss": 0.2033, "step": 44854 }, { "epoch": 3.633749189889825, "grad_norm": 0.07417989522218704, "learning_rate": 2.0347450380305143e-05, "loss": 0.1984, "step": 44855 }, { "epoch": 3.6338302009073233, "grad_norm": 0.07153971493244171, "learning_rate": 2.034294972771052e-05, "loss": 0.1825, "step": 44856 }, { "epoch": 3.633911211924822, "grad_norm": 0.08344955742359161, "learning_rate": 2.0338449075115894e-05, "loss": 0.2373, "step": 44857 }, { "epoch": 3.6339922229423203, "grad_norm": 0.0726422518491745, "learning_rate": 2.0333948422521267e-05, "loss": 0.222, "step": 44858 }, { "epoch": 3.6340732339598185, "grad_norm": 0.07534100860357285, "learning_rate": 2.032944776992664e-05, "loss": 0.2417, "step": 44859 }, { "epoch": 3.6341542449773168, "grad_norm": 0.07408151030540466, "learning_rate": 2.0324947117332015e-05, "loss": 0.2375, "step": 44860 }, { "epoch": 3.6342352559948155, "grad_norm": 0.07573696970939636, "learning_rate": 2.0320446464737388e-05, "loss": 0.1934, "step": 44861 }, { "epoch": 3.6343162670123137, "grad_norm": 0.0753345638513565, "learning_rate": 2.0315945812142762e-05, "loss": 0.2019, "step": 44862 }, { "epoch": 3.634397278029812, "grad_norm": 0.07289306819438934, "learning_rate": 2.0311445159548135e-05, "loss": 0.265, "step": 44863 }, { "epoch": 3.63447828904731, "grad_norm": 0.0770828127861023, "learning_rate": 2.030694450695351e-05, "loss": 0.2307, "step": 44864 }, { "epoch": 3.634559300064809, "grad_norm": 0.07336337864398956, "learning_rate": 2.0302443854358883e-05, "loss": 0.2183, "step": 44865 }, { "epoch": 3.634640311082307, "grad_norm": 0.06365802139043808, "learning_rate": 2.0297943201764256e-05, "loss": 0.2023, "step": 44866 }, { "epoch": 3.6347213220998054, "grad_norm": 0.08312965929508209, "learning_rate": 2.029344254916963e-05, "loss": 0.2136, "step": 44867 }, { "epoch": 3.634802333117304, "grad_norm": 0.07333937287330627, "learning_rate": 2.0288941896575003e-05, "loss": 0.2503, "step": 44868 }, { "epoch": 3.6348833441348023, "grad_norm": 0.06631088256835938, "learning_rate": 2.028444124398038e-05, "loss": 0.2232, "step": 44869 }, { "epoch": 3.6349643551523005, "grad_norm": 0.08965929597616196, "learning_rate": 2.027994059138575e-05, "loss": 0.2516, "step": 44870 }, { "epoch": 3.6350453661697992, "grad_norm": 0.06641999632120132, "learning_rate": 2.0275439938791128e-05, "loss": 0.2144, "step": 44871 }, { "epoch": 3.6351263771872975, "grad_norm": 0.06216254085302353, "learning_rate": 2.02709392861965e-05, "loss": 0.2177, "step": 44872 }, { "epoch": 3.6352073882047957, "grad_norm": 0.07060118019580841, "learning_rate": 2.026643863360187e-05, "loss": 0.2131, "step": 44873 }, { "epoch": 3.6352883992222944, "grad_norm": 0.055240385234355927, "learning_rate": 2.026193798100725e-05, "loss": 0.172, "step": 44874 }, { "epoch": 3.6353694102397927, "grad_norm": 0.091565802693367, "learning_rate": 2.0257437328412622e-05, "loss": 0.2922, "step": 44875 }, { "epoch": 3.635450421257291, "grad_norm": 0.06432320922613144, "learning_rate": 2.0252936675817992e-05, "loss": 0.2116, "step": 44876 }, { "epoch": 3.6355314322747896, "grad_norm": 0.08700551092624664, "learning_rate": 2.024843602322337e-05, "loss": 0.2635, "step": 44877 }, { "epoch": 3.635612443292288, "grad_norm": 0.062044791877269745, "learning_rate": 2.0243935370628743e-05, "loss": 0.2336, "step": 44878 }, { "epoch": 3.635693454309786, "grad_norm": 0.07158152014017105, "learning_rate": 2.0239434718034116e-05, "loss": 0.2155, "step": 44879 }, { "epoch": 3.6357744653272848, "grad_norm": 0.07280169427394867, "learning_rate": 2.023493406543949e-05, "loss": 0.2107, "step": 44880 }, { "epoch": 3.635855476344783, "grad_norm": 0.06739906966686249, "learning_rate": 2.0230433412844864e-05, "loss": 0.1984, "step": 44881 }, { "epoch": 3.6359364873622813, "grad_norm": 0.06984557956457138, "learning_rate": 2.0225932760250237e-05, "loss": 0.2176, "step": 44882 }, { "epoch": 3.6360174983797795, "grad_norm": 0.0813283696770668, "learning_rate": 2.022143210765561e-05, "loss": 0.2505, "step": 44883 }, { "epoch": 3.636098509397278, "grad_norm": 0.0551692433655262, "learning_rate": 2.0216931455060988e-05, "loss": 0.2228, "step": 44884 }, { "epoch": 3.6361795204147764, "grad_norm": 0.06857820600271225, "learning_rate": 2.0212430802466358e-05, "loss": 0.2151, "step": 44885 }, { "epoch": 3.6362605314322747, "grad_norm": 0.07335050404071808, "learning_rate": 2.020793014987173e-05, "loss": 0.2334, "step": 44886 }, { "epoch": 3.636341542449773, "grad_norm": 0.09249746799468994, "learning_rate": 2.020342949727711e-05, "loss": 0.2194, "step": 44887 }, { "epoch": 3.6364225534672716, "grad_norm": 0.06488305330276489, "learning_rate": 2.019892884468248e-05, "loss": 0.2164, "step": 44888 }, { "epoch": 3.63650356448477, "grad_norm": 0.0670824721455574, "learning_rate": 2.0194428192087852e-05, "loss": 0.233, "step": 44889 }, { "epoch": 3.636584575502268, "grad_norm": 0.06511478871107101, "learning_rate": 2.018992753949323e-05, "loss": 0.2027, "step": 44890 }, { "epoch": 3.636665586519767, "grad_norm": 0.08423558622598648, "learning_rate": 2.01854268868986e-05, "loss": 0.233, "step": 44891 }, { "epoch": 3.636746597537265, "grad_norm": 0.06754543632268906, "learning_rate": 2.0180926234303977e-05, "loss": 0.234, "step": 44892 }, { "epoch": 3.6368276085547633, "grad_norm": 0.07313832640647888, "learning_rate": 2.017642558170935e-05, "loss": 0.2247, "step": 44893 }, { "epoch": 3.636908619572262, "grad_norm": 0.07286649197340012, "learning_rate": 2.017192492911472e-05, "loss": 0.201, "step": 44894 }, { "epoch": 3.63698963058976, "grad_norm": 0.09970583766698837, "learning_rate": 2.0167424276520097e-05, "loss": 0.2809, "step": 44895 }, { "epoch": 3.6370706416072585, "grad_norm": 0.0816020667552948, "learning_rate": 2.016292362392547e-05, "loss": 0.2441, "step": 44896 }, { "epoch": 3.637151652624757, "grad_norm": 0.0763312503695488, "learning_rate": 2.015842297133084e-05, "loss": 0.2461, "step": 44897 }, { "epoch": 3.6372326636422554, "grad_norm": 0.08208746463060379, "learning_rate": 2.0153922318736218e-05, "loss": 0.228, "step": 44898 }, { "epoch": 3.6373136746597536, "grad_norm": 0.06755077093839645, "learning_rate": 2.0149421666141592e-05, "loss": 0.2177, "step": 44899 }, { "epoch": 3.6373946856772523, "grad_norm": 0.06575210392475128, "learning_rate": 2.0144921013546966e-05, "loss": 0.2265, "step": 44900 }, { "epoch": 3.6374756966947506, "grad_norm": 0.08353587239980698, "learning_rate": 2.014042036095234e-05, "loss": 0.2128, "step": 44901 }, { "epoch": 3.637556707712249, "grad_norm": 0.07195543497800827, "learning_rate": 2.0135919708357713e-05, "loss": 0.1838, "step": 44902 }, { "epoch": 3.6376377187297475, "grad_norm": 0.0775909349322319, "learning_rate": 2.0131419055763086e-05, "loss": 0.2209, "step": 44903 }, { "epoch": 3.6377187297472457, "grad_norm": 0.06664683669805527, "learning_rate": 2.012691840316846e-05, "loss": 0.2363, "step": 44904 }, { "epoch": 3.637799740764744, "grad_norm": 0.08048176020383835, "learning_rate": 2.0122417750573837e-05, "loss": 0.2668, "step": 44905 }, { "epoch": 3.6378807517822422, "grad_norm": 0.07442770153284073, "learning_rate": 2.0117917097979207e-05, "loss": 0.2433, "step": 44906 }, { "epoch": 3.637961762799741, "grad_norm": 0.07577343285083771, "learning_rate": 2.011341644538458e-05, "loss": 0.2512, "step": 44907 }, { "epoch": 3.638042773817239, "grad_norm": 0.08586008101701736, "learning_rate": 2.0108915792789958e-05, "loss": 0.2508, "step": 44908 }, { "epoch": 3.6381237848347374, "grad_norm": 0.07474494725465775, "learning_rate": 2.0104415140195328e-05, "loss": 0.2261, "step": 44909 }, { "epoch": 3.6382047958522357, "grad_norm": 0.061617929488420486, "learning_rate": 2.00999144876007e-05, "loss": 0.2242, "step": 44910 }, { "epoch": 3.6382858068697344, "grad_norm": 0.07338300347328186, "learning_rate": 2.009541383500608e-05, "loss": 0.2138, "step": 44911 }, { "epoch": 3.6383668178872326, "grad_norm": 0.07744680345058441, "learning_rate": 2.009091318241145e-05, "loss": 0.2349, "step": 44912 }, { "epoch": 3.638447828904731, "grad_norm": 0.06322570890188217, "learning_rate": 2.0086412529816826e-05, "loss": 0.214, "step": 44913 }, { "epoch": 3.6385288399222295, "grad_norm": 0.06235915422439575, "learning_rate": 2.00819118772222e-05, "loss": 0.2225, "step": 44914 }, { "epoch": 3.6386098509397278, "grad_norm": 0.06920929253101349, "learning_rate": 2.007741122462757e-05, "loss": 0.2169, "step": 44915 }, { "epoch": 3.638690861957226, "grad_norm": 0.07871360331773758, "learning_rate": 2.0072910572032947e-05, "loss": 0.2381, "step": 44916 }, { "epoch": 3.6387718729747247, "grad_norm": 0.06010260805487633, "learning_rate": 2.006840991943832e-05, "loss": 0.2352, "step": 44917 }, { "epoch": 3.638852883992223, "grad_norm": 0.07746662944555283, "learning_rate": 2.0063909266843694e-05, "loss": 0.2243, "step": 44918 }, { "epoch": 3.638933895009721, "grad_norm": 0.0740174651145935, "learning_rate": 2.0059408614249067e-05, "loss": 0.2233, "step": 44919 }, { "epoch": 3.63901490602722, "grad_norm": 0.07685885578393936, "learning_rate": 2.005490796165444e-05, "loss": 0.204, "step": 44920 }, { "epoch": 3.639095917044718, "grad_norm": 0.07137834280729294, "learning_rate": 2.0050407309059815e-05, "loss": 0.2065, "step": 44921 }, { "epoch": 3.6391769280622164, "grad_norm": 0.08067955821752548, "learning_rate": 2.0045906656465188e-05, "loss": 0.2387, "step": 44922 }, { "epoch": 3.639257939079715, "grad_norm": 0.06330350041389465, "learning_rate": 2.0041406003870562e-05, "loss": 0.2375, "step": 44923 }, { "epoch": 3.6393389500972133, "grad_norm": 0.0679686963558197, "learning_rate": 2.0036905351275935e-05, "loss": 0.2031, "step": 44924 }, { "epoch": 3.6394199611147116, "grad_norm": 0.08646734058856964, "learning_rate": 2.003240469868131e-05, "loss": 0.2335, "step": 44925 }, { "epoch": 3.6395009721322102, "grad_norm": 0.08247414231300354, "learning_rate": 2.0027904046086686e-05, "loss": 0.2442, "step": 44926 }, { "epoch": 3.6395819831497085, "grad_norm": 0.08173704147338867, "learning_rate": 2.0023403393492056e-05, "loss": 0.2308, "step": 44927 }, { "epoch": 3.6396629941672067, "grad_norm": 0.06407459825277328, "learning_rate": 2.001890274089743e-05, "loss": 0.1961, "step": 44928 }, { "epoch": 3.639744005184705, "grad_norm": 0.07259169965982437, "learning_rate": 2.0014402088302807e-05, "loss": 0.2154, "step": 44929 }, { "epoch": 3.6398250162022032, "grad_norm": 0.07888376712799072, "learning_rate": 2.0009901435708177e-05, "loss": 0.2135, "step": 44930 }, { "epoch": 3.639906027219702, "grad_norm": 0.06632392108440399, "learning_rate": 2.0005400783113554e-05, "loss": 0.2069, "step": 44931 }, { "epoch": 3.6399870382372, "grad_norm": 0.08187742531299591, "learning_rate": 2.0000900130518928e-05, "loss": 0.1845, "step": 44932 }, { "epoch": 3.6400680492546984, "grad_norm": 0.07673794031143188, "learning_rate": 1.9996399477924298e-05, "loss": 0.2302, "step": 44933 }, { "epoch": 3.640149060272197, "grad_norm": 0.06572210788726807, "learning_rate": 1.9991898825329675e-05, "loss": 0.2324, "step": 44934 }, { "epoch": 3.6402300712896953, "grad_norm": 0.05955757200717926, "learning_rate": 1.998739817273505e-05, "loss": 0.2286, "step": 44935 }, { "epoch": 3.6403110823071936, "grad_norm": 0.08277115225791931, "learning_rate": 1.998289752014042e-05, "loss": 0.2379, "step": 44936 }, { "epoch": 3.6403920933246923, "grad_norm": 0.06754428893327713, "learning_rate": 1.9978396867545796e-05, "loss": 0.2311, "step": 44937 }, { "epoch": 3.6404731043421905, "grad_norm": 0.06916896998882294, "learning_rate": 1.997389621495117e-05, "loss": 0.206, "step": 44938 }, { "epoch": 3.6405541153596888, "grad_norm": 0.07250376790761948, "learning_rate": 1.9969395562356543e-05, "loss": 0.2244, "step": 44939 }, { "epoch": 3.6406351263771874, "grad_norm": 0.0685395821928978, "learning_rate": 1.9964894909761916e-05, "loss": 0.2167, "step": 44940 }, { "epoch": 3.6407161373946857, "grad_norm": 0.07498252391815186, "learning_rate": 1.996039425716729e-05, "loss": 0.2003, "step": 44941 }, { "epoch": 3.640797148412184, "grad_norm": 0.07298076897859573, "learning_rate": 1.9955893604572664e-05, "loss": 0.2286, "step": 44942 }, { "epoch": 3.6408781594296826, "grad_norm": 0.07976315170526505, "learning_rate": 1.9951392951978037e-05, "loss": 0.1953, "step": 44943 }, { "epoch": 3.640959170447181, "grad_norm": 0.08286472409963608, "learning_rate": 1.9946892299383414e-05, "loss": 0.2218, "step": 44944 }, { "epoch": 3.641040181464679, "grad_norm": 0.07928077131509781, "learning_rate": 1.9942391646788784e-05, "loss": 0.2501, "step": 44945 }, { "epoch": 3.641121192482178, "grad_norm": 0.07734663784503937, "learning_rate": 1.9937890994194158e-05, "loss": 0.2303, "step": 44946 }, { "epoch": 3.641202203499676, "grad_norm": 0.07841353863477707, "learning_rate": 1.9933390341599535e-05, "loss": 0.2627, "step": 44947 }, { "epoch": 3.6412832145171743, "grad_norm": 0.08440117537975311, "learning_rate": 1.9928889689004905e-05, "loss": 0.212, "step": 44948 }, { "epoch": 3.641364225534673, "grad_norm": 0.06597091257572174, "learning_rate": 1.992438903641028e-05, "loss": 0.205, "step": 44949 }, { "epoch": 3.6414452365521712, "grad_norm": 0.055673886090517044, "learning_rate": 1.9919888383815656e-05, "loss": 0.2006, "step": 44950 }, { "epoch": 3.6415262475696695, "grad_norm": 0.07033570110797882, "learning_rate": 1.9915387731221026e-05, "loss": 0.2611, "step": 44951 }, { "epoch": 3.6416072585871677, "grad_norm": 0.06624488532543182, "learning_rate": 1.9910887078626403e-05, "loss": 0.2301, "step": 44952 }, { "epoch": 3.641688269604666, "grad_norm": 0.07321982830762863, "learning_rate": 1.9906386426031777e-05, "loss": 0.2513, "step": 44953 }, { "epoch": 3.6417692806221647, "grad_norm": 0.07193963974714279, "learning_rate": 1.9901885773437147e-05, "loss": 0.2259, "step": 44954 }, { "epoch": 3.641850291639663, "grad_norm": 0.07336988300085068, "learning_rate": 1.9897385120842524e-05, "loss": 0.232, "step": 44955 }, { "epoch": 3.641931302657161, "grad_norm": 0.09553751349449158, "learning_rate": 1.9892884468247897e-05, "loss": 0.2257, "step": 44956 }, { "epoch": 3.64201231367466, "grad_norm": 0.06732214242219925, "learning_rate": 1.988838381565327e-05, "loss": 0.2179, "step": 44957 }, { "epoch": 3.642093324692158, "grad_norm": 0.05843079090118408, "learning_rate": 1.9883883163058645e-05, "loss": 0.1975, "step": 44958 }, { "epoch": 3.6421743357096563, "grad_norm": 0.052807748317718506, "learning_rate": 1.9879382510464018e-05, "loss": 0.2034, "step": 44959 }, { "epoch": 3.642255346727155, "grad_norm": 0.05599704384803772, "learning_rate": 1.9874881857869392e-05, "loss": 0.2139, "step": 44960 }, { "epoch": 3.6423363577446533, "grad_norm": 0.07424107939004898, "learning_rate": 1.9870381205274765e-05, "loss": 0.2366, "step": 44961 }, { "epoch": 3.6424173687621515, "grad_norm": 0.08620074391365051, "learning_rate": 1.986588055268014e-05, "loss": 0.2823, "step": 44962 }, { "epoch": 3.64249837977965, "grad_norm": 0.07798013836145401, "learning_rate": 1.9861379900085513e-05, "loss": 0.2068, "step": 44963 }, { "epoch": 3.6425793907971484, "grad_norm": 0.0656246617436409, "learning_rate": 1.9856879247490886e-05, "loss": 0.2024, "step": 44964 }, { "epoch": 3.6426604018146467, "grad_norm": 0.08685047924518585, "learning_rate": 1.9852378594896263e-05, "loss": 0.2391, "step": 44965 }, { "epoch": 3.6427414128321454, "grad_norm": 0.07338472455739975, "learning_rate": 1.9847877942301633e-05, "loss": 0.2311, "step": 44966 }, { "epoch": 3.6428224238496436, "grad_norm": 0.072905994951725, "learning_rate": 1.9843377289707007e-05, "loss": 0.2356, "step": 44967 }, { "epoch": 3.642903434867142, "grad_norm": 0.08200103044509888, "learning_rate": 1.9838876637112384e-05, "loss": 0.2411, "step": 44968 }, { "epoch": 3.6429844458846405, "grad_norm": 0.07015134394168854, "learning_rate": 1.9834375984517754e-05, "loss": 0.209, "step": 44969 }, { "epoch": 3.643065456902139, "grad_norm": 0.07618094980716705, "learning_rate": 1.982987533192313e-05, "loss": 0.2246, "step": 44970 }, { "epoch": 3.643146467919637, "grad_norm": 0.07204940170049667, "learning_rate": 1.9825374679328505e-05, "loss": 0.231, "step": 44971 }, { "epoch": 3.6432274789371357, "grad_norm": 0.06646368652582169, "learning_rate": 1.9820874026733875e-05, "loss": 0.2465, "step": 44972 }, { "epoch": 3.643308489954634, "grad_norm": 0.06563836336135864, "learning_rate": 1.9816373374139252e-05, "loss": 0.2139, "step": 44973 }, { "epoch": 3.643389500972132, "grad_norm": 0.07280927151441574, "learning_rate": 1.9811872721544626e-05, "loss": 0.2233, "step": 44974 }, { "epoch": 3.6434705119896305, "grad_norm": 0.07386231422424316, "learning_rate": 1.9807372068949996e-05, "loss": 0.1915, "step": 44975 }, { "epoch": 3.6435515230071287, "grad_norm": 0.06035052239894867, "learning_rate": 1.9802871416355373e-05, "loss": 0.2234, "step": 44976 }, { "epoch": 3.6436325340246274, "grad_norm": 0.06255166232585907, "learning_rate": 1.9798370763760747e-05, "loss": 0.2322, "step": 44977 }, { "epoch": 3.6437135450421256, "grad_norm": 0.06946348398923874, "learning_rate": 1.979387011116612e-05, "loss": 0.2437, "step": 44978 }, { "epoch": 3.643794556059624, "grad_norm": 0.06545937806367874, "learning_rate": 1.9789369458571494e-05, "loss": 0.2059, "step": 44979 }, { "epoch": 3.6438755670771226, "grad_norm": 0.07262036204338074, "learning_rate": 1.9784868805976867e-05, "loss": 0.2138, "step": 44980 }, { "epoch": 3.643956578094621, "grad_norm": 0.06998401880264282, "learning_rate": 1.978036815338224e-05, "loss": 0.2435, "step": 44981 }, { "epoch": 3.644037589112119, "grad_norm": 0.07168823480606079, "learning_rate": 1.9775867500787615e-05, "loss": 0.2259, "step": 44982 }, { "epoch": 3.6441186001296177, "grad_norm": 0.06272865831851959, "learning_rate": 1.9771366848192988e-05, "loss": 0.228, "step": 44983 }, { "epoch": 3.644199611147116, "grad_norm": 0.060105741024017334, "learning_rate": 1.9766866195598362e-05, "loss": 0.2234, "step": 44984 }, { "epoch": 3.6442806221646142, "grad_norm": 0.07992798089981079, "learning_rate": 1.9762365543003735e-05, "loss": 0.2194, "step": 44985 }, { "epoch": 3.644361633182113, "grad_norm": 0.0626491978764534, "learning_rate": 1.9757864890409112e-05, "loss": 0.2328, "step": 44986 }, { "epoch": 3.644442644199611, "grad_norm": 0.07601386308670044, "learning_rate": 1.9753364237814483e-05, "loss": 0.205, "step": 44987 }, { "epoch": 3.6445236552171094, "grad_norm": 0.06073329225182533, "learning_rate": 1.9748863585219856e-05, "loss": 0.2212, "step": 44988 }, { "epoch": 3.644604666234608, "grad_norm": 0.09335854649543762, "learning_rate": 1.9744362932625233e-05, "loss": 0.2202, "step": 44989 }, { "epoch": 3.6446856772521063, "grad_norm": 0.08301527053117752, "learning_rate": 1.9739862280030603e-05, "loss": 0.2568, "step": 44990 }, { "epoch": 3.6447666882696046, "grad_norm": 0.06001551076769829, "learning_rate": 1.973536162743598e-05, "loss": 0.1837, "step": 44991 }, { "epoch": 3.6448476992871033, "grad_norm": 0.07624075561761856, "learning_rate": 1.9730860974841354e-05, "loss": 0.2352, "step": 44992 }, { "epoch": 3.6449287103046015, "grad_norm": 0.0669778510928154, "learning_rate": 1.9726360322246724e-05, "loss": 0.2127, "step": 44993 }, { "epoch": 3.6450097213220998, "grad_norm": 0.0638890415430069, "learning_rate": 1.97218596696521e-05, "loss": 0.2323, "step": 44994 }, { "epoch": 3.6450907323395985, "grad_norm": 0.06809230148792267, "learning_rate": 1.9717359017057475e-05, "loss": 0.2385, "step": 44995 }, { "epoch": 3.6451717433570967, "grad_norm": 0.06901613622903824, "learning_rate": 1.971285836446285e-05, "loss": 0.2086, "step": 44996 }, { "epoch": 3.645252754374595, "grad_norm": 0.07063661515712738, "learning_rate": 1.9708357711868222e-05, "loss": 0.2189, "step": 44997 }, { "epoch": 3.645333765392093, "grad_norm": 0.06737226247787476, "learning_rate": 1.9703857059273596e-05, "loss": 0.2521, "step": 44998 }, { "epoch": 3.6454147764095914, "grad_norm": 0.06689809262752533, "learning_rate": 1.969935640667897e-05, "loss": 0.2213, "step": 44999 }, { "epoch": 3.64549578742709, "grad_norm": 0.07541733980178833, "learning_rate": 1.9694855754084343e-05, "loss": 0.2074, "step": 45000 }, { "epoch": 3.6455767984445884, "grad_norm": 0.06847432255744934, "learning_rate": 1.9690355101489716e-05, "loss": 0.2044, "step": 45001 }, { "epoch": 3.6456578094620866, "grad_norm": 0.0679175928235054, "learning_rate": 1.968585444889509e-05, "loss": 0.22, "step": 45002 }, { "epoch": 3.6457388204795853, "grad_norm": 0.07706435024738312, "learning_rate": 1.9681353796300464e-05, "loss": 0.199, "step": 45003 }, { "epoch": 3.6458198314970836, "grad_norm": 0.07787571847438812, "learning_rate": 1.967685314370584e-05, "loss": 0.2034, "step": 45004 }, { "epoch": 3.645900842514582, "grad_norm": 0.0687023401260376, "learning_rate": 1.967235249111121e-05, "loss": 0.2242, "step": 45005 }, { "epoch": 3.6459818535320805, "grad_norm": 0.07250736653804779, "learning_rate": 1.9667851838516584e-05, "loss": 0.2299, "step": 45006 }, { "epoch": 3.6460628645495787, "grad_norm": 0.05602612718939781, "learning_rate": 1.966335118592196e-05, "loss": 0.1778, "step": 45007 }, { "epoch": 3.646143875567077, "grad_norm": 0.0714431181550026, "learning_rate": 1.965885053332733e-05, "loss": 0.23, "step": 45008 }, { "epoch": 3.6462248865845757, "grad_norm": 0.06440123170614243, "learning_rate": 1.9654349880732705e-05, "loss": 0.2232, "step": 45009 }, { "epoch": 3.646305897602074, "grad_norm": 0.06279275566339493, "learning_rate": 1.9649849228138082e-05, "loss": 0.2395, "step": 45010 }, { "epoch": 3.646386908619572, "grad_norm": 0.06747173517942429, "learning_rate": 1.9645348575543452e-05, "loss": 0.2352, "step": 45011 }, { "epoch": 3.646467919637071, "grad_norm": 0.06958412379026413, "learning_rate": 1.964084792294883e-05, "loss": 0.2271, "step": 45012 }, { "epoch": 3.646548930654569, "grad_norm": 0.08571343868970871, "learning_rate": 1.9636347270354203e-05, "loss": 0.2349, "step": 45013 }, { "epoch": 3.6466299416720673, "grad_norm": 0.06197122111916542, "learning_rate": 1.9631846617759577e-05, "loss": 0.2058, "step": 45014 }, { "epoch": 3.646710952689566, "grad_norm": 0.06550417840480804, "learning_rate": 1.962734596516495e-05, "loss": 0.229, "step": 45015 }, { "epoch": 3.6467919637070643, "grad_norm": 0.0715557187795639, "learning_rate": 1.9622845312570324e-05, "loss": 0.2679, "step": 45016 }, { "epoch": 3.6468729747245625, "grad_norm": 0.06895950436592102, "learning_rate": 1.9618344659975697e-05, "loss": 0.1977, "step": 45017 }, { "epoch": 3.6469539857420608, "grad_norm": 0.06980966776609421, "learning_rate": 1.961384400738107e-05, "loss": 0.249, "step": 45018 }, { "epoch": 3.6470349967595594, "grad_norm": 0.06876849383115768, "learning_rate": 1.9609343354786445e-05, "loss": 0.2313, "step": 45019 }, { "epoch": 3.6471160077770577, "grad_norm": 0.07544594258069992, "learning_rate": 1.9604842702191818e-05, "loss": 0.2278, "step": 45020 }, { "epoch": 3.647197018794556, "grad_norm": 0.08122597634792328, "learning_rate": 1.9600342049597192e-05, "loss": 0.2362, "step": 45021 }, { "epoch": 3.647278029812054, "grad_norm": 0.06714284420013428, "learning_rate": 1.9595841397002565e-05, "loss": 0.1946, "step": 45022 }, { "epoch": 3.647359040829553, "grad_norm": 0.06398520618677139, "learning_rate": 1.959134074440794e-05, "loss": 0.2302, "step": 45023 }, { "epoch": 3.647440051847051, "grad_norm": 0.07198784500360489, "learning_rate": 1.9586840091813313e-05, "loss": 0.2032, "step": 45024 }, { "epoch": 3.6475210628645494, "grad_norm": 0.06664520502090454, "learning_rate": 1.958233943921869e-05, "loss": 0.2328, "step": 45025 }, { "epoch": 3.647602073882048, "grad_norm": 0.06397178024053574, "learning_rate": 1.957783878662406e-05, "loss": 0.213, "step": 45026 }, { "epoch": 3.6476830848995463, "grad_norm": 0.058583084493875504, "learning_rate": 1.9573338134029433e-05, "loss": 0.203, "step": 45027 }, { "epoch": 3.6477640959170445, "grad_norm": 0.06231728196144104, "learning_rate": 1.956883748143481e-05, "loss": 0.2317, "step": 45028 }, { "epoch": 3.6478451069345432, "grad_norm": 0.08897008001804352, "learning_rate": 1.9564336828840184e-05, "loss": 0.2107, "step": 45029 }, { "epoch": 3.6479261179520415, "grad_norm": 0.08053074032068253, "learning_rate": 1.9559836176245558e-05, "loss": 0.1974, "step": 45030 }, { "epoch": 3.6480071289695397, "grad_norm": 0.07111253589391708, "learning_rate": 1.955533552365093e-05, "loss": 0.2069, "step": 45031 }, { "epoch": 3.6480881399870384, "grad_norm": 0.07549881190061569, "learning_rate": 1.9550834871056305e-05, "loss": 0.2328, "step": 45032 }, { "epoch": 3.6481691510045366, "grad_norm": 0.058063820004463196, "learning_rate": 1.954633421846168e-05, "loss": 0.1701, "step": 45033 }, { "epoch": 3.648250162022035, "grad_norm": 0.09114842861890793, "learning_rate": 1.9541833565867052e-05, "loss": 0.2278, "step": 45034 }, { "epoch": 3.6483311730395336, "grad_norm": 0.08632674813270569, "learning_rate": 1.9537332913272426e-05, "loss": 0.2388, "step": 45035 }, { "epoch": 3.648412184057032, "grad_norm": 0.07076793164014816, "learning_rate": 1.95328322606778e-05, "loss": 0.2131, "step": 45036 }, { "epoch": 3.64849319507453, "grad_norm": 0.08358518034219742, "learning_rate": 1.9528331608083173e-05, "loss": 0.2379, "step": 45037 }, { "epoch": 3.6485742060920288, "grad_norm": 0.058723390102386475, "learning_rate": 1.9523830955488546e-05, "loss": 0.2178, "step": 45038 }, { "epoch": 3.648655217109527, "grad_norm": 0.09624432027339935, "learning_rate": 1.951933030289392e-05, "loss": 0.2327, "step": 45039 }, { "epoch": 3.6487362281270252, "grad_norm": 0.0518026165664196, "learning_rate": 1.9514829650299294e-05, "loss": 0.22, "step": 45040 }, { "epoch": 3.6488172391445235, "grad_norm": 0.07486674934625626, "learning_rate": 1.9510328997704667e-05, "loss": 0.1927, "step": 45041 }, { "epoch": 3.648898250162022, "grad_norm": 0.0642525851726532, "learning_rate": 1.950582834511004e-05, "loss": 0.195, "step": 45042 }, { "epoch": 3.6489792611795204, "grad_norm": 0.0849383994936943, "learning_rate": 1.9501327692515418e-05, "loss": 0.2345, "step": 45043 }, { "epoch": 3.6490602721970187, "grad_norm": 0.05372621491551399, "learning_rate": 1.9496827039920788e-05, "loss": 0.2072, "step": 45044 }, { "epoch": 3.649141283214517, "grad_norm": 0.07009585201740265, "learning_rate": 1.9492326387326162e-05, "loss": 0.2236, "step": 45045 }, { "epoch": 3.6492222942320156, "grad_norm": 0.07817225158214569, "learning_rate": 1.948782573473154e-05, "loss": 0.1953, "step": 45046 }, { "epoch": 3.649303305249514, "grad_norm": 0.06566929817199707, "learning_rate": 1.9483325082136912e-05, "loss": 0.2251, "step": 45047 }, { "epoch": 3.649384316267012, "grad_norm": 0.0773925706744194, "learning_rate": 1.9478824429542283e-05, "loss": 0.2595, "step": 45048 }, { "epoch": 3.649465327284511, "grad_norm": 0.08280571550130844, "learning_rate": 1.947432377694766e-05, "loss": 0.234, "step": 45049 }, { "epoch": 3.649546338302009, "grad_norm": 0.060975659638643265, "learning_rate": 1.9469823124353033e-05, "loss": 0.175, "step": 45050 }, { "epoch": 3.6496273493195073, "grad_norm": 0.0727422684431076, "learning_rate": 1.9465322471758407e-05, "loss": 0.2361, "step": 45051 }, { "epoch": 3.649708360337006, "grad_norm": 0.06734203547239304, "learning_rate": 1.946082181916378e-05, "loss": 0.2283, "step": 45052 }, { "epoch": 3.649789371354504, "grad_norm": 0.0899474248290062, "learning_rate": 1.9456321166569154e-05, "loss": 0.2251, "step": 45053 }, { "epoch": 3.6498703823720025, "grad_norm": 0.05988341569900513, "learning_rate": 1.9451820513974528e-05, "loss": 0.2213, "step": 45054 }, { "epoch": 3.649951393389501, "grad_norm": 0.0768548995256424, "learning_rate": 1.94473198613799e-05, "loss": 0.2439, "step": 45055 }, { "epoch": 3.6500324044069994, "grad_norm": 0.0809468999505043, "learning_rate": 1.9442819208785275e-05, "loss": 0.2152, "step": 45056 }, { "epoch": 3.6501134154244976, "grad_norm": 0.08236753940582275, "learning_rate": 1.943831855619065e-05, "loss": 0.234, "step": 45057 }, { "epoch": 3.6501944264419963, "grad_norm": 0.05949265882372856, "learning_rate": 1.9433817903596022e-05, "loss": 0.1956, "step": 45058 }, { "epoch": 3.6502754374594946, "grad_norm": 0.07996305078268051, "learning_rate": 1.9429317251001396e-05, "loss": 0.2295, "step": 45059 }, { "epoch": 3.650356448476993, "grad_norm": 0.0879431888461113, "learning_rate": 1.942481659840677e-05, "loss": 0.2368, "step": 45060 }, { "epoch": 3.6504374594944915, "grad_norm": 0.09156335890293121, "learning_rate": 1.9420315945812143e-05, "loss": 0.2713, "step": 45061 }, { "epoch": 3.6505184705119897, "grad_norm": 0.08023006469011307, "learning_rate": 1.9415815293217516e-05, "loss": 0.2367, "step": 45062 }, { "epoch": 3.650599481529488, "grad_norm": 0.054662078619003296, "learning_rate": 1.941131464062289e-05, "loss": 0.1989, "step": 45063 }, { "epoch": 3.6506804925469862, "grad_norm": 0.08293486386537552, "learning_rate": 1.9406813988028267e-05, "loss": 0.2082, "step": 45064 }, { "epoch": 3.650761503564485, "grad_norm": 0.06276928633451462, "learning_rate": 1.940231333543364e-05, "loss": 0.2343, "step": 45065 }, { "epoch": 3.650842514581983, "grad_norm": 0.0665750503540039, "learning_rate": 1.939781268283901e-05, "loss": 0.2015, "step": 45066 }, { "epoch": 3.6509235255994814, "grad_norm": 0.08064727485179901, "learning_rate": 1.9393312030244388e-05, "loss": 0.2151, "step": 45067 }, { "epoch": 3.6510045366169797, "grad_norm": 0.08324388414621353, "learning_rate": 1.938881137764976e-05, "loss": 0.2388, "step": 45068 }, { "epoch": 3.6510855476344783, "grad_norm": 0.0781935527920723, "learning_rate": 1.938431072505513e-05, "loss": 0.2498, "step": 45069 }, { "epoch": 3.6511665586519766, "grad_norm": 0.07621213793754578, "learning_rate": 1.937981007246051e-05, "loss": 0.1805, "step": 45070 }, { "epoch": 3.651247569669475, "grad_norm": 0.08255457133054733, "learning_rate": 1.9375309419865882e-05, "loss": 0.2374, "step": 45071 }, { "epoch": 3.6513285806869735, "grad_norm": 0.07902143150568008, "learning_rate": 1.9370808767271256e-05, "loss": 0.2346, "step": 45072 }, { "epoch": 3.6514095917044718, "grad_norm": 0.09876828640699387, "learning_rate": 1.936630811467663e-05, "loss": 0.2745, "step": 45073 }, { "epoch": 3.65149060272197, "grad_norm": 0.05991111323237419, "learning_rate": 1.9361807462082003e-05, "loss": 0.2077, "step": 45074 }, { "epoch": 3.6515716137394687, "grad_norm": 0.07003296166658401, "learning_rate": 1.9357306809487377e-05, "loss": 0.2011, "step": 45075 }, { "epoch": 3.651652624756967, "grad_norm": 0.07214841991662979, "learning_rate": 1.935280615689275e-05, "loss": 0.2229, "step": 45076 }, { "epoch": 3.651733635774465, "grad_norm": 0.07503457367420197, "learning_rate": 1.9348305504298124e-05, "loss": 0.2282, "step": 45077 }, { "epoch": 3.651814646791964, "grad_norm": 0.08666056394577026, "learning_rate": 1.9343804851703497e-05, "loss": 0.2401, "step": 45078 }, { "epoch": 3.651895657809462, "grad_norm": 0.0745568796992302, "learning_rate": 1.933930419910887e-05, "loss": 0.2199, "step": 45079 }, { "epoch": 3.6519766688269604, "grad_norm": 0.06492049247026443, "learning_rate": 1.9334803546514248e-05, "loss": 0.2247, "step": 45080 }, { "epoch": 3.652057679844459, "grad_norm": 0.07261046767234802, "learning_rate": 1.9330302893919618e-05, "loss": 0.2182, "step": 45081 }, { "epoch": 3.6521386908619573, "grad_norm": 0.07351624965667725, "learning_rate": 1.9325802241324992e-05, "loss": 0.2581, "step": 45082 }, { "epoch": 3.6522197018794555, "grad_norm": 0.06611928343772888, "learning_rate": 1.932130158873037e-05, "loss": 0.2083, "step": 45083 }, { "epoch": 3.6523007128969542, "grad_norm": 0.06513310968875885, "learning_rate": 1.931680093613574e-05, "loss": 0.2314, "step": 45084 }, { "epoch": 3.6523817239144525, "grad_norm": 0.0680699497461319, "learning_rate": 1.9312300283541116e-05, "loss": 0.2342, "step": 45085 }, { "epoch": 3.6524627349319507, "grad_norm": 0.07630762457847595, "learning_rate": 1.930779963094649e-05, "loss": 0.2275, "step": 45086 }, { "epoch": 3.652543745949449, "grad_norm": 0.06224377825856209, "learning_rate": 1.930329897835186e-05, "loss": 0.2298, "step": 45087 }, { "epoch": 3.6526247569669477, "grad_norm": 0.06828049570322037, "learning_rate": 1.9298798325757237e-05, "loss": 0.2415, "step": 45088 }, { "epoch": 3.652705767984446, "grad_norm": 0.07400552183389664, "learning_rate": 1.929429767316261e-05, "loss": 0.1922, "step": 45089 }, { "epoch": 3.652786779001944, "grad_norm": 0.0771031454205513, "learning_rate": 1.9289797020567984e-05, "loss": 0.2652, "step": 45090 }, { "epoch": 3.6528677900194424, "grad_norm": 0.06022996827960014, "learning_rate": 1.9285296367973358e-05, "loss": 0.2384, "step": 45091 }, { "epoch": 3.652948801036941, "grad_norm": 0.06557485461235046, "learning_rate": 1.928079571537873e-05, "loss": 0.1857, "step": 45092 }, { "epoch": 3.6530298120544393, "grad_norm": 0.06742022931575775, "learning_rate": 1.9276295062784105e-05, "loss": 0.1977, "step": 45093 }, { "epoch": 3.6531108230719376, "grad_norm": 0.05643368512392044, "learning_rate": 1.927179441018948e-05, "loss": 0.2088, "step": 45094 }, { "epoch": 3.6531918340894363, "grad_norm": 0.06223396584391594, "learning_rate": 1.9267293757594852e-05, "loss": 0.2098, "step": 45095 }, { "epoch": 3.6532728451069345, "grad_norm": 0.06473883986473083, "learning_rate": 1.9262793105000226e-05, "loss": 0.1957, "step": 45096 }, { "epoch": 3.6533538561244328, "grad_norm": 0.07481816411018372, "learning_rate": 1.92582924524056e-05, "loss": 0.2152, "step": 45097 }, { "epoch": 3.6534348671419314, "grad_norm": 0.07294804602861404, "learning_rate": 1.9253791799810976e-05, "loss": 0.1953, "step": 45098 }, { "epoch": 3.6535158781594297, "grad_norm": 0.08691900223493576, "learning_rate": 1.9249291147216346e-05, "loss": 0.2632, "step": 45099 }, { "epoch": 3.653596889176928, "grad_norm": 0.07867283374071121, "learning_rate": 1.924479049462172e-05, "loss": 0.2454, "step": 45100 }, { "epoch": 3.6536779001944266, "grad_norm": 0.07713471353054047, "learning_rate": 1.9240289842027097e-05, "loss": 0.2289, "step": 45101 }, { "epoch": 3.653758911211925, "grad_norm": 0.06068172678351402, "learning_rate": 1.9235789189432467e-05, "loss": 0.2252, "step": 45102 }, { "epoch": 3.653839922229423, "grad_norm": 0.07757342606782913, "learning_rate": 1.9231288536837844e-05, "loss": 0.2017, "step": 45103 }, { "epoch": 3.653920933246922, "grad_norm": 0.07680576294660568, "learning_rate": 1.9226787884243218e-05, "loss": 0.2258, "step": 45104 }, { "epoch": 3.65400194426442, "grad_norm": 0.056338656693696976, "learning_rate": 1.9222287231648588e-05, "loss": 0.2216, "step": 45105 }, { "epoch": 3.6540829552819183, "grad_norm": 0.09292469918727875, "learning_rate": 1.9217786579053965e-05, "loss": 0.2781, "step": 45106 }, { "epoch": 3.654163966299417, "grad_norm": 0.09539378434419632, "learning_rate": 1.921328592645934e-05, "loss": 0.2077, "step": 45107 }, { "epoch": 3.654244977316915, "grad_norm": 0.06527606397867203, "learning_rate": 1.920878527386471e-05, "loss": 0.2205, "step": 45108 }, { "epoch": 3.6543259883344135, "grad_norm": 0.07580121606588364, "learning_rate": 1.9204284621270086e-05, "loss": 0.2244, "step": 45109 }, { "epoch": 3.6544069993519117, "grad_norm": 0.07603007555007935, "learning_rate": 1.919978396867546e-05, "loss": 0.1822, "step": 45110 }, { "epoch": 3.6544880103694104, "grad_norm": 0.08321841061115265, "learning_rate": 1.9195283316080833e-05, "loss": 0.2211, "step": 45111 }, { "epoch": 3.6545690213869086, "grad_norm": 0.07010239362716675, "learning_rate": 1.9190782663486207e-05, "loss": 0.1972, "step": 45112 }, { "epoch": 3.654650032404407, "grad_norm": 0.07282821834087372, "learning_rate": 1.918628201089158e-05, "loss": 0.2093, "step": 45113 }, { "epoch": 3.654731043421905, "grad_norm": 0.07374906539916992, "learning_rate": 1.9181781358296954e-05, "loss": 0.2246, "step": 45114 }, { "epoch": 3.654812054439404, "grad_norm": 0.0666646808385849, "learning_rate": 1.9177280705702327e-05, "loss": 0.2218, "step": 45115 }, { "epoch": 3.654893065456902, "grad_norm": 0.06913021951913834, "learning_rate": 1.9172780053107704e-05, "loss": 0.2354, "step": 45116 }, { "epoch": 3.6549740764744003, "grad_norm": 0.06549743562936783, "learning_rate": 1.9168279400513075e-05, "loss": 0.2382, "step": 45117 }, { "epoch": 3.655055087491899, "grad_norm": 0.07928057760000229, "learning_rate": 1.9163778747918448e-05, "loss": 0.208, "step": 45118 }, { "epoch": 3.6551360985093972, "grad_norm": 0.07727041840553284, "learning_rate": 1.9159278095323825e-05, "loss": 0.1919, "step": 45119 }, { "epoch": 3.6552171095268955, "grad_norm": 0.05975821986794472, "learning_rate": 1.9154777442729196e-05, "loss": 0.2016, "step": 45120 }, { "epoch": 3.655298120544394, "grad_norm": 0.06781353056430817, "learning_rate": 1.915027679013457e-05, "loss": 0.1934, "step": 45121 }, { "epoch": 3.6553791315618924, "grad_norm": 0.06860139220952988, "learning_rate": 1.9145776137539946e-05, "loss": 0.2195, "step": 45122 }, { "epoch": 3.6554601425793907, "grad_norm": 0.07479297369718552, "learning_rate": 1.9141275484945316e-05, "loss": 0.264, "step": 45123 }, { "epoch": 3.6555411535968894, "grad_norm": 0.05585840344429016, "learning_rate": 1.9136774832350693e-05, "loss": 0.2115, "step": 45124 }, { "epoch": 3.6556221646143876, "grad_norm": 0.07544923573732376, "learning_rate": 1.9132274179756067e-05, "loss": 0.2128, "step": 45125 }, { "epoch": 3.655703175631886, "grad_norm": 0.08912745863199234, "learning_rate": 1.9127773527161437e-05, "loss": 0.2231, "step": 45126 }, { "epoch": 3.6557841866493845, "grad_norm": 0.0821533054113388, "learning_rate": 1.9123272874566814e-05, "loss": 0.232, "step": 45127 }, { "epoch": 3.655865197666883, "grad_norm": 0.07527308911085129, "learning_rate": 1.9118772221972188e-05, "loss": 0.2264, "step": 45128 }, { "epoch": 3.655946208684381, "grad_norm": 0.06770501285791397, "learning_rate": 1.911427156937756e-05, "loss": 0.2187, "step": 45129 }, { "epoch": 3.6560272197018797, "grad_norm": 0.07356284558773041, "learning_rate": 1.9109770916782935e-05, "loss": 0.2079, "step": 45130 }, { "epoch": 3.656108230719378, "grad_norm": 0.07901307195425034, "learning_rate": 1.910527026418831e-05, "loss": 0.2404, "step": 45131 }, { "epoch": 3.656189241736876, "grad_norm": 0.07238209992647171, "learning_rate": 1.9100769611593682e-05, "loss": 0.2047, "step": 45132 }, { "epoch": 3.6562702527543745, "grad_norm": 0.07739842683076859, "learning_rate": 1.9096268958999056e-05, "loss": 0.2203, "step": 45133 }, { "epoch": 3.656351263771873, "grad_norm": 0.07297014445066452, "learning_rate": 1.909176830640443e-05, "loss": 0.215, "step": 45134 }, { "epoch": 3.6564322747893714, "grad_norm": 0.07107474654912949, "learning_rate": 1.9087267653809803e-05, "loss": 0.2489, "step": 45135 }, { "epoch": 3.6565132858068696, "grad_norm": 0.07162254303693771, "learning_rate": 1.9082767001215177e-05, "loss": 0.226, "step": 45136 }, { "epoch": 3.656594296824368, "grad_norm": 0.08277851343154907, "learning_rate": 1.9078266348620554e-05, "loss": 0.2264, "step": 45137 }, { "epoch": 3.6566753078418666, "grad_norm": 0.07660046964883804, "learning_rate": 1.9073765696025924e-05, "loss": 0.2185, "step": 45138 }, { "epoch": 3.656756318859365, "grad_norm": 0.05719228461384773, "learning_rate": 1.9069265043431297e-05, "loss": 0.1851, "step": 45139 }, { "epoch": 3.656837329876863, "grad_norm": 0.07676658034324646, "learning_rate": 1.9064764390836674e-05, "loss": 0.2416, "step": 45140 }, { "epoch": 3.6569183408943617, "grad_norm": 0.08541837334632874, "learning_rate": 1.9060263738242045e-05, "loss": 0.2255, "step": 45141 }, { "epoch": 3.65699935191186, "grad_norm": 0.08627410978078842, "learning_rate": 1.9055763085647418e-05, "loss": 0.238, "step": 45142 }, { "epoch": 3.6570803629293582, "grad_norm": 0.06201465427875519, "learning_rate": 1.9051262433052795e-05, "loss": 0.1987, "step": 45143 }, { "epoch": 3.657161373946857, "grad_norm": 0.06707758456468582, "learning_rate": 1.9046761780458165e-05, "loss": 0.2494, "step": 45144 }, { "epoch": 3.657242384964355, "grad_norm": 0.05820019543170929, "learning_rate": 1.9042261127863542e-05, "loss": 0.1994, "step": 45145 }, { "epoch": 3.6573233959818534, "grad_norm": 0.0598413459956646, "learning_rate": 1.9037760475268916e-05, "loss": 0.2162, "step": 45146 }, { "epoch": 3.657404406999352, "grad_norm": 0.06863729655742645, "learning_rate": 1.9033259822674286e-05, "loss": 0.2072, "step": 45147 }, { "epoch": 3.6574854180168503, "grad_norm": 0.09531652182340622, "learning_rate": 1.9028759170079663e-05, "loss": 0.2424, "step": 45148 }, { "epoch": 3.6575664290343486, "grad_norm": 0.10843674093484879, "learning_rate": 1.9024258517485037e-05, "loss": 0.2341, "step": 45149 }, { "epoch": 3.6576474400518473, "grad_norm": 0.06694623082876205, "learning_rate": 1.901975786489041e-05, "loss": 0.232, "step": 45150 }, { "epoch": 3.6577284510693455, "grad_norm": 0.07675240933895111, "learning_rate": 1.9015257212295784e-05, "loss": 0.2522, "step": 45151 }, { "epoch": 3.6578094620868438, "grad_norm": 0.05678509548306465, "learning_rate": 1.9010756559701158e-05, "loss": 0.2597, "step": 45152 }, { "epoch": 3.6578904731043425, "grad_norm": 0.06341982632875443, "learning_rate": 1.900625590710653e-05, "loss": 0.2404, "step": 45153 }, { "epoch": 3.6579714841218407, "grad_norm": 0.0864272266626358, "learning_rate": 1.9001755254511905e-05, "loss": 0.2441, "step": 45154 }, { "epoch": 3.658052495139339, "grad_norm": 0.06379323452711105, "learning_rate": 1.899725460191728e-05, "loss": 0.2627, "step": 45155 }, { "epoch": 3.658133506156837, "grad_norm": 0.06465981900691986, "learning_rate": 1.8992753949322652e-05, "loss": 0.2306, "step": 45156 }, { "epoch": 3.6582145171743354, "grad_norm": 0.07326333969831467, "learning_rate": 1.8988253296728026e-05, "loss": 0.244, "step": 45157 }, { "epoch": 3.658295528191834, "grad_norm": 0.07888638228178024, "learning_rate": 1.8983752644133403e-05, "loss": 0.2279, "step": 45158 }, { "epoch": 3.6583765392093324, "grad_norm": 0.06322069466114044, "learning_rate": 1.8979251991538773e-05, "loss": 0.2152, "step": 45159 }, { "epoch": 3.6584575502268306, "grad_norm": 0.08328167349100113, "learning_rate": 1.8974751338944146e-05, "loss": 0.2313, "step": 45160 }, { "epoch": 3.6585385612443293, "grad_norm": 0.07311400771141052, "learning_rate": 1.8970250686349523e-05, "loss": 0.2291, "step": 45161 }, { "epoch": 3.6586195722618275, "grad_norm": 0.07768473774194717, "learning_rate": 1.8965750033754894e-05, "loss": 0.2011, "step": 45162 }, { "epoch": 3.658700583279326, "grad_norm": 0.07638784497976303, "learning_rate": 1.896124938116027e-05, "loss": 0.2685, "step": 45163 }, { "epoch": 3.6587815942968245, "grad_norm": 0.07639626413583755, "learning_rate": 1.8956748728565644e-05, "loss": 0.2492, "step": 45164 }, { "epoch": 3.6588626053143227, "grad_norm": 0.06662027537822723, "learning_rate": 1.8952248075971014e-05, "loss": 0.2351, "step": 45165 }, { "epoch": 3.658943616331821, "grad_norm": 0.06432883441448212, "learning_rate": 1.894774742337639e-05, "loss": 0.2176, "step": 45166 }, { "epoch": 3.6590246273493197, "grad_norm": 0.08156169950962067, "learning_rate": 1.8943246770781765e-05, "loss": 0.2194, "step": 45167 }, { "epoch": 3.659105638366818, "grad_norm": 0.07922739535570145, "learning_rate": 1.8938746118187135e-05, "loss": 0.2315, "step": 45168 }, { "epoch": 3.659186649384316, "grad_norm": 0.0675259456038475, "learning_rate": 1.8934245465592512e-05, "loss": 0.2337, "step": 45169 }, { "epoch": 3.659267660401815, "grad_norm": 0.07149109244346619, "learning_rate": 1.8929744812997886e-05, "loss": 0.1945, "step": 45170 }, { "epoch": 3.659348671419313, "grad_norm": 0.09225580841302872, "learning_rate": 1.892524416040326e-05, "loss": 0.2229, "step": 45171 }, { "epoch": 3.6594296824368113, "grad_norm": 0.0599265918135643, "learning_rate": 1.8920743507808633e-05, "loss": 0.1962, "step": 45172 }, { "epoch": 3.65951069345431, "grad_norm": 0.08336354047060013, "learning_rate": 1.8916242855214007e-05, "loss": 0.2145, "step": 45173 }, { "epoch": 3.6595917044718083, "grad_norm": 0.08190027624368668, "learning_rate": 1.891174220261938e-05, "loss": 0.2341, "step": 45174 }, { "epoch": 3.6596727154893065, "grad_norm": 0.08267060667276382, "learning_rate": 1.8907241550024754e-05, "loss": 0.2275, "step": 45175 }, { "epoch": 3.659753726506805, "grad_norm": 0.06503156572580338, "learning_rate": 1.890274089743013e-05, "loss": 0.2179, "step": 45176 }, { "epoch": 3.6598347375243034, "grad_norm": 0.08606370538473129, "learning_rate": 1.88982402448355e-05, "loss": 0.2286, "step": 45177 }, { "epoch": 3.6599157485418017, "grad_norm": 0.06882736086845398, "learning_rate": 1.8893739592240875e-05, "loss": 0.2437, "step": 45178 }, { "epoch": 3.6599967595593, "grad_norm": 0.06398408859968185, "learning_rate": 1.888923893964625e-05, "loss": 0.1888, "step": 45179 }, { "epoch": 3.660077770576798, "grad_norm": 0.06924501061439514, "learning_rate": 1.8884738287051622e-05, "loss": 0.2021, "step": 45180 }, { "epoch": 3.660158781594297, "grad_norm": 0.07223118096590042, "learning_rate": 1.8880237634456995e-05, "loss": 0.2387, "step": 45181 }, { "epoch": 3.660239792611795, "grad_norm": 0.07276555150747299, "learning_rate": 1.8875736981862372e-05, "loss": 0.2485, "step": 45182 }, { "epoch": 3.6603208036292934, "grad_norm": 0.06627250462770462, "learning_rate": 1.8871236329267743e-05, "loss": 0.2028, "step": 45183 }, { "epoch": 3.660401814646792, "grad_norm": 0.07248107343912125, "learning_rate": 1.886673567667312e-05, "loss": 0.2226, "step": 45184 }, { "epoch": 3.6604828256642903, "grad_norm": 0.0946134552359581, "learning_rate": 1.8862235024078493e-05, "loss": 0.2456, "step": 45185 }, { "epoch": 3.6605638366817885, "grad_norm": 0.08283112943172455, "learning_rate": 1.8857734371483863e-05, "loss": 0.2349, "step": 45186 }, { "epoch": 3.660644847699287, "grad_norm": 0.0703137218952179, "learning_rate": 1.885323371888924e-05, "loss": 0.2016, "step": 45187 }, { "epoch": 3.6607258587167855, "grad_norm": 0.0719614326953888, "learning_rate": 1.8848733066294614e-05, "loss": 0.2342, "step": 45188 }, { "epoch": 3.6608068697342837, "grad_norm": 0.07138366997241974, "learning_rate": 1.8844232413699988e-05, "loss": 0.225, "step": 45189 }, { "epoch": 3.6608878807517824, "grad_norm": 0.10369545966386795, "learning_rate": 1.883973176110536e-05, "loss": 0.239, "step": 45190 }, { "epoch": 3.6609688917692806, "grad_norm": 0.06715277582406998, "learning_rate": 1.8835231108510735e-05, "loss": 0.2064, "step": 45191 }, { "epoch": 3.661049902786779, "grad_norm": 0.07604720443487167, "learning_rate": 1.883073045591611e-05, "loss": 0.2189, "step": 45192 }, { "epoch": 3.6611309138042776, "grad_norm": 0.08600092679262161, "learning_rate": 1.8826229803321482e-05, "loss": 0.245, "step": 45193 }, { "epoch": 3.661211924821776, "grad_norm": 0.07176908105611801, "learning_rate": 1.8821729150726856e-05, "loss": 0.2597, "step": 45194 }, { "epoch": 3.661292935839274, "grad_norm": 0.08605284988880157, "learning_rate": 1.881722849813223e-05, "loss": 0.2165, "step": 45195 }, { "epoch": 3.6613739468567728, "grad_norm": 0.0742536336183548, "learning_rate": 1.8812727845537603e-05, "loss": 0.2322, "step": 45196 }, { "epoch": 3.661454957874271, "grad_norm": 0.07331538945436478, "learning_rate": 1.880822719294298e-05, "loss": 0.2215, "step": 45197 }, { "epoch": 3.6615359688917692, "grad_norm": 0.07890629023313522, "learning_rate": 1.880372654034835e-05, "loss": 0.233, "step": 45198 }, { "epoch": 3.661616979909268, "grad_norm": 0.06850796937942505, "learning_rate": 1.8799225887753724e-05, "loss": 0.203, "step": 45199 }, { "epoch": 3.661697990926766, "grad_norm": 0.06160985678434372, "learning_rate": 1.87947252351591e-05, "loss": 0.201, "step": 45200 }, { "epoch": 3.6617790019442644, "grad_norm": 0.0723324865102768, "learning_rate": 1.879022458256447e-05, "loss": 0.206, "step": 45201 }, { "epoch": 3.6618600129617627, "grad_norm": 0.07769309729337692, "learning_rate": 1.8785723929969848e-05, "loss": 0.2628, "step": 45202 }, { "epoch": 3.661941023979261, "grad_norm": 0.0801674947142601, "learning_rate": 1.878122327737522e-05, "loss": 0.2018, "step": 45203 }, { "epoch": 3.6620220349967596, "grad_norm": 0.06425900757312775, "learning_rate": 1.8776722624780592e-05, "loss": 0.2094, "step": 45204 }, { "epoch": 3.662103046014258, "grad_norm": 0.0725947692990303, "learning_rate": 1.877222197218597e-05, "loss": 0.2432, "step": 45205 }, { "epoch": 3.662184057031756, "grad_norm": 0.07440198957920074, "learning_rate": 1.8767721319591342e-05, "loss": 0.2038, "step": 45206 }, { "epoch": 3.662265068049255, "grad_norm": 0.07127074152231216, "learning_rate": 1.8763220666996713e-05, "loss": 0.214, "step": 45207 }, { "epoch": 3.662346079066753, "grad_norm": 0.061125315725803375, "learning_rate": 1.875872001440209e-05, "loss": 0.1954, "step": 45208 }, { "epoch": 3.6624270900842513, "grad_norm": 0.06937211006879807, "learning_rate": 1.8754219361807463e-05, "loss": 0.2045, "step": 45209 }, { "epoch": 3.66250810110175, "grad_norm": 0.0728205218911171, "learning_rate": 1.8749718709212837e-05, "loss": 0.238, "step": 45210 }, { "epoch": 3.662589112119248, "grad_norm": 0.09144295752048492, "learning_rate": 1.874521805661821e-05, "loss": 0.2327, "step": 45211 }, { "epoch": 3.6626701231367464, "grad_norm": 0.0762929916381836, "learning_rate": 1.8740717404023584e-05, "loss": 0.2427, "step": 45212 }, { "epoch": 3.662751134154245, "grad_norm": 0.07527792453765869, "learning_rate": 1.8736216751428958e-05, "loss": 0.2229, "step": 45213 }, { "epoch": 3.6628321451717434, "grad_norm": 0.06261378526687622, "learning_rate": 1.873171609883433e-05, "loss": 0.2333, "step": 45214 }, { "epoch": 3.6629131561892416, "grad_norm": 0.0688212513923645, "learning_rate": 1.8727215446239708e-05, "loss": 0.2074, "step": 45215 }, { "epoch": 3.6629941672067403, "grad_norm": 0.07364526391029358, "learning_rate": 1.872271479364508e-05, "loss": 0.261, "step": 45216 }, { "epoch": 3.6630751782242386, "grad_norm": 0.07287118583917618, "learning_rate": 1.8718214141050452e-05, "loss": 0.2361, "step": 45217 }, { "epoch": 3.663156189241737, "grad_norm": 0.07098571956157684, "learning_rate": 1.871371348845583e-05, "loss": 0.2548, "step": 45218 }, { "epoch": 3.6632372002592355, "grad_norm": 0.0735175609588623, "learning_rate": 1.87092128358612e-05, "loss": 0.2525, "step": 45219 }, { "epoch": 3.6633182112767337, "grad_norm": 0.07706687599420547, "learning_rate": 1.8704712183266573e-05, "loss": 0.2814, "step": 45220 }, { "epoch": 3.663399222294232, "grad_norm": 0.07573897391557693, "learning_rate": 1.870021153067195e-05, "loss": 0.2213, "step": 45221 }, { "epoch": 3.6634802333117307, "grad_norm": 0.06459297239780426, "learning_rate": 1.869571087807732e-05, "loss": 0.2569, "step": 45222 }, { "epoch": 3.663561244329229, "grad_norm": 0.06621073186397552, "learning_rate": 1.8691210225482697e-05, "loss": 0.2595, "step": 45223 }, { "epoch": 3.663642255346727, "grad_norm": 0.07406672835350037, "learning_rate": 1.868670957288807e-05, "loss": 0.211, "step": 45224 }, { "epoch": 3.6637232663642254, "grad_norm": 0.05716541036963463, "learning_rate": 1.8682208920293444e-05, "loss": 0.2225, "step": 45225 }, { "epoch": 3.6638042773817237, "grad_norm": 0.061790212988853455, "learning_rate": 1.8677708267698818e-05, "loss": 0.2116, "step": 45226 }, { "epoch": 3.6638852883992223, "grad_norm": 0.06380802392959595, "learning_rate": 1.867320761510419e-05, "loss": 0.2551, "step": 45227 }, { "epoch": 3.6639662994167206, "grad_norm": 0.06814054399728775, "learning_rate": 1.8668706962509565e-05, "loss": 0.2179, "step": 45228 }, { "epoch": 3.664047310434219, "grad_norm": 0.08436217904090881, "learning_rate": 1.866420630991494e-05, "loss": 0.2342, "step": 45229 }, { "epoch": 3.6641283214517175, "grad_norm": 0.07426043599843979, "learning_rate": 1.8659705657320312e-05, "loss": 0.2463, "step": 45230 }, { "epoch": 3.6642093324692158, "grad_norm": 0.0674593448638916, "learning_rate": 1.8655205004725686e-05, "loss": 0.2012, "step": 45231 }, { "epoch": 3.664290343486714, "grad_norm": 0.08109057694673538, "learning_rate": 1.865070435213106e-05, "loss": 0.2572, "step": 45232 }, { "epoch": 3.6643713545042127, "grad_norm": 0.07163460552692413, "learning_rate": 1.8646203699536433e-05, "loss": 0.211, "step": 45233 }, { "epoch": 3.664452365521711, "grad_norm": 0.07475652545690536, "learning_rate": 1.8641703046941807e-05, "loss": 0.2136, "step": 45234 }, { "epoch": 3.664533376539209, "grad_norm": 0.07576707750558853, "learning_rate": 1.863720239434718e-05, "loss": 0.206, "step": 45235 }, { "epoch": 3.664614387556708, "grad_norm": 0.067570261657238, "learning_rate": 1.8632701741752557e-05, "loss": 0.2316, "step": 45236 }, { "epoch": 3.664695398574206, "grad_norm": 0.057533759623765945, "learning_rate": 1.8628201089157927e-05, "loss": 0.2106, "step": 45237 }, { "epoch": 3.6647764095917044, "grad_norm": 0.0835278183221817, "learning_rate": 1.86237004365633e-05, "loss": 0.2861, "step": 45238 }, { "epoch": 3.664857420609203, "grad_norm": 0.07156947255134583, "learning_rate": 1.8619199783968678e-05, "loss": 0.2521, "step": 45239 }, { "epoch": 3.6649384316267013, "grad_norm": 0.06274692714214325, "learning_rate": 1.8614699131374048e-05, "loss": 0.2358, "step": 45240 }, { "epoch": 3.6650194426441995, "grad_norm": 0.07600396871566772, "learning_rate": 1.8610198478779422e-05, "loss": 0.2267, "step": 45241 }, { "epoch": 3.6651004536616982, "grad_norm": 0.08536671102046967, "learning_rate": 1.86056978261848e-05, "loss": 0.2078, "step": 45242 }, { "epoch": 3.6651814646791965, "grad_norm": 0.08486076444387436, "learning_rate": 1.8601197173590172e-05, "loss": 0.2428, "step": 45243 }, { "epoch": 3.6652624756966947, "grad_norm": 0.0657820925116539, "learning_rate": 1.8596696520995546e-05, "loss": 0.2172, "step": 45244 }, { "epoch": 3.665343486714193, "grad_norm": 0.08165675401687622, "learning_rate": 1.859219586840092e-05, "loss": 0.2448, "step": 45245 }, { "epoch": 3.6654244977316917, "grad_norm": 0.07033119350671768, "learning_rate": 1.8587695215806293e-05, "loss": 0.2219, "step": 45246 }, { "epoch": 3.66550550874919, "grad_norm": 0.07771047949790955, "learning_rate": 1.8583194563211667e-05, "loss": 0.252, "step": 45247 }, { "epoch": 3.665586519766688, "grad_norm": 0.06715802103281021, "learning_rate": 1.857869391061704e-05, "loss": 0.2521, "step": 45248 }, { "epoch": 3.6656675307841864, "grad_norm": 0.088836170732975, "learning_rate": 1.8574193258022414e-05, "loss": 0.2596, "step": 45249 }, { "epoch": 3.665748541801685, "grad_norm": 0.06336906552314758, "learning_rate": 1.8569692605427788e-05, "loss": 0.2287, "step": 45250 }, { "epoch": 3.6658295528191833, "grad_norm": 0.07023075222969055, "learning_rate": 1.856519195283316e-05, "loss": 0.2233, "step": 45251 }, { "epoch": 3.6659105638366816, "grad_norm": 0.05971920117735863, "learning_rate": 1.8560691300238535e-05, "loss": 0.1959, "step": 45252 }, { "epoch": 3.6659915748541803, "grad_norm": 0.06513191014528275, "learning_rate": 1.855619064764391e-05, "loss": 0.2232, "step": 45253 }, { "epoch": 3.6660725858716785, "grad_norm": 0.08198903501033783, "learning_rate": 1.8551689995049282e-05, "loss": 0.1973, "step": 45254 }, { "epoch": 3.6661535968891767, "grad_norm": 0.056683849543333054, "learning_rate": 1.8547189342454656e-05, "loss": 0.2219, "step": 45255 }, { "epoch": 3.6662346079066754, "grad_norm": 0.08191985636949539, "learning_rate": 1.854268868986003e-05, "loss": 0.2283, "step": 45256 }, { "epoch": 3.6663156189241737, "grad_norm": 0.08315394073724747, "learning_rate": 1.8538188037265406e-05, "loss": 0.2311, "step": 45257 }, { "epoch": 3.666396629941672, "grad_norm": 0.06751669198274612, "learning_rate": 1.8533687384670776e-05, "loss": 0.2159, "step": 45258 }, { "epoch": 3.6664776409591706, "grad_norm": 0.06708385795354843, "learning_rate": 1.852918673207615e-05, "loss": 0.203, "step": 45259 }, { "epoch": 3.666558651976669, "grad_norm": 0.07147734612226486, "learning_rate": 1.8524686079481527e-05, "loss": 0.2724, "step": 45260 }, { "epoch": 3.666639662994167, "grad_norm": 0.06914891302585602, "learning_rate": 1.85201854268869e-05, "loss": 0.2364, "step": 45261 }, { "epoch": 3.666720674011666, "grad_norm": 0.06511449813842773, "learning_rate": 1.8515684774292274e-05, "loss": 0.2261, "step": 45262 }, { "epoch": 3.666801685029164, "grad_norm": 0.0676787719130516, "learning_rate": 1.8511184121697648e-05, "loss": 0.2022, "step": 45263 }, { "epoch": 3.6668826960466623, "grad_norm": 0.09487228840589523, "learning_rate": 1.850668346910302e-05, "loss": 0.2466, "step": 45264 }, { "epoch": 3.666963707064161, "grad_norm": 0.07777979969978333, "learning_rate": 1.8502182816508395e-05, "loss": 0.2491, "step": 45265 }, { "epoch": 3.667044718081659, "grad_norm": 0.07166828215122223, "learning_rate": 1.849768216391377e-05, "loss": 0.1753, "step": 45266 }, { "epoch": 3.6671257290991575, "grad_norm": 0.06455834209918976, "learning_rate": 1.8493181511319142e-05, "loss": 0.1964, "step": 45267 }, { "epoch": 3.6672067401166557, "grad_norm": 0.07258601486682892, "learning_rate": 1.8488680858724516e-05, "loss": 0.2158, "step": 45268 }, { "epoch": 3.6672877511341544, "grad_norm": 0.0714699998497963, "learning_rate": 1.848418020612989e-05, "loss": 0.2503, "step": 45269 }, { "epoch": 3.6673687621516526, "grad_norm": 0.0788547545671463, "learning_rate": 1.8479679553535263e-05, "loss": 0.2433, "step": 45270 }, { "epoch": 3.667449773169151, "grad_norm": 0.08070094883441925, "learning_rate": 1.8475178900940637e-05, "loss": 0.2471, "step": 45271 }, { "epoch": 3.667530784186649, "grad_norm": 0.09129707515239716, "learning_rate": 1.847067824834601e-05, "loss": 0.2362, "step": 45272 }, { "epoch": 3.667611795204148, "grad_norm": 0.08472602814435959, "learning_rate": 1.8466177595751384e-05, "loss": 0.2054, "step": 45273 }, { "epoch": 3.667692806221646, "grad_norm": 0.0719936415553093, "learning_rate": 1.8461676943156758e-05, "loss": 0.2079, "step": 45274 }, { "epoch": 3.6677738172391443, "grad_norm": 0.07852540165185928, "learning_rate": 1.8457176290562135e-05, "loss": 0.2535, "step": 45275 }, { "epoch": 3.667854828256643, "grad_norm": 0.08097584545612335, "learning_rate": 1.8452675637967505e-05, "loss": 0.2581, "step": 45276 }, { "epoch": 3.6679358392741412, "grad_norm": 0.07683178037405014, "learning_rate": 1.844817498537288e-05, "loss": 0.2382, "step": 45277 }, { "epoch": 3.6680168502916395, "grad_norm": 0.06702481955289841, "learning_rate": 1.8443674332778255e-05, "loss": 0.2282, "step": 45278 }, { "epoch": 3.668097861309138, "grad_norm": 0.07527042180299759, "learning_rate": 1.843917368018363e-05, "loss": 0.186, "step": 45279 }, { "epoch": 3.6681788723266364, "grad_norm": 0.06990735977888107, "learning_rate": 1.8434673027589e-05, "loss": 0.2354, "step": 45280 }, { "epoch": 3.6682598833441347, "grad_norm": 0.08135319501161575, "learning_rate": 1.8430172374994376e-05, "loss": 0.228, "step": 45281 }, { "epoch": 3.6683408943616334, "grad_norm": 0.07040378451347351, "learning_rate": 1.842567172239975e-05, "loss": 0.2158, "step": 45282 }, { "epoch": 3.6684219053791316, "grad_norm": 0.09205630421638489, "learning_rate": 1.8421171069805123e-05, "loss": 0.2466, "step": 45283 }, { "epoch": 3.66850291639663, "grad_norm": 0.06662517786026001, "learning_rate": 1.8416670417210497e-05, "loss": 0.2095, "step": 45284 }, { "epoch": 3.6685839274141285, "grad_norm": 0.07559280097484589, "learning_rate": 1.841216976461587e-05, "loss": 0.2235, "step": 45285 }, { "epoch": 3.6686649384316268, "grad_norm": 0.08209975808858871, "learning_rate": 1.8407669112021244e-05, "loss": 0.2599, "step": 45286 }, { "epoch": 3.668745949449125, "grad_norm": 0.07545511424541473, "learning_rate": 1.8403168459426618e-05, "loss": 0.229, "step": 45287 }, { "epoch": 3.6688269604666237, "grad_norm": 0.05978429690003395, "learning_rate": 1.839866780683199e-05, "loss": 0.2133, "step": 45288 }, { "epoch": 3.668907971484122, "grad_norm": 0.07269906252622604, "learning_rate": 1.8394167154237365e-05, "loss": 0.2143, "step": 45289 }, { "epoch": 3.66898898250162, "grad_norm": 0.0637771263718605, "learning_rate": 1.838966650164274e-05, "loss": 0.1974, "step": 45290 }, { "epoch": 3.6690699935191184, "grad_norm": 0.08513235300779343, "learning_rate": 1.8385165849048112e-05, "loss": 0.2258, "step": 45291 }, { "epoch": 3.669151004536617, "grad_norm": 0.08326667547225952, "learning_rate": 1.8380665196453486e-05, "loss": 0.2099, "step": 45292 }, { "epoch": 3.6692320155541154, "grad_norm": 0.07560274749994278, "learning_rate": 1.837616454385886e-05, "loss": 0.2221, "step": 45293 }, { "epoch": 3.6693130265716136, "grad_norm": 0.07517266273498535, "learning_rate": 1.8371663891264236e-05, "loss": 0.2181, "step": 45294 }, { "epoch": 3.669394037589112, "grad_norm": 0.07194165140390396, "learning_rate": 1.8367163238669607e-05, "loss": 0.2318, "step": 45295 }, { "epoch": 3.6694750486066106, "grad_norm": 0.07393741607666016, "learning_rate": 1.8362662586074984e-05, "loss": 0.2431, "step": 45296 }, { "epoch": 3.669556059624109, "grad_norm": 0.0838971957564354, "learning_rate": 1.8358161933480357e-05, "loss": 0.2495, "step": 45297 }, { "epoch": 3.669637070641607, "grad_norm": 0.08950010687112808, "learning_rate": 1.8353661280885727e-05, "loss": 0.2088, "step": 45298 }, { "epoch": 3.6697180816591057, "grad_norm": 0.06360174715518951, "learning_rate": 1.8349160628291104e-05, "loss": 0.2234, "step": 45299 }, { "epoch": 3.669799092676604, "grad_norm": 0.07285406440496445, "learning_rate": 1.8344659975696478e-05, "loss": 0.2441, "step": 45300 }, { "epoch": 3.6698801036941022, "grad_norm": 0.06920921802520752, "learning_rate": 1.834015932310185e-05, "loss": 0.2242, "step": 45301 }, { "epoch": 3.669961114711601, "grad_norm": 0.07369273155927658, "learning_rate": 1.8335658670507225e-05, "loss": 0.2412, "step": 45302 }, { "epoch": 3.670042125729099, "grad_norm": 0.08306174725294113, "learning_rate": 1.83311580179126e-05, "loss": 0.2479, "step": 45303 }, { "epoch": 3.6701231367465974, "grad_norm": 0.071095310151577, "learning_rate": 1.8326657365317972e-05, "loss": 0.2298, "step": 45304 }, { "epoch": 3.670204147764096, "grad_norm": 0.07723124325275421, "learning_rate": 1.8322156712723346e-05, "loss": 0.2093, "step": 45305 }, { "epoch": 3.6702851587815943, "grad_norm": 0.06528105586767197, "learning_rate": 1.831765606012872e-05, "loss": 0.227, "step": 45306 }, { "epoch": 3.6703661697990926, "grad_norm": 0.07839982211589813, "learning_rate": 1.8313155407534093e-05, "loss": 0.1812, "step": 45307 }, { "epoch": 3.6704471808165913, "grad_norm": 0.06877532601356506, "learning_rate": 1.8308654754939467e-05, "loss": 0.2531, "step": 45308 }, { "epoch": 3.6705281918340895, "grad_norm": 0.06471942365169525, "learning_rate": 1.830415410234484e-05, "loss": 0.2218, "step": 45309 }, { "epoch": 3.6706092028515878, "grad_norm": 0.06157661974430084, "learning_rate": 1.8299653449750214e-05, "loss": 0.2209, "step": 45310 }, { "epoch": 3.6706902138690864, "grad_norm": 0.05959887430071831, "learning_rate": 1.8295152797155588e-05, "loss": 0.2317, "step": 45311 }, { "epoch": 3.6707712248865847, "grad_norm": 0.09365382045507431, "learning_rate": 1.8290652144560965e-05, "loss": 0.2105, "step": 45312 }, { "epoch": 3.670852235904083, "grad_norm": 0.059252869337797165, "learning_rate": 1.8286151491966335e-05, "loss": 0.2384, "step": 45313 }, { "epoch": 3.670933246921581, "grad_norm": 0.07261160761117935, "learning_rate": 1.828165083937171e-05, "loss": 0.2129, "step": 45314 }, { "epoch": 3.67101425793908, "grad_norm": 0.07140535116195679, "learning_rate": 1.8277150186777085e-05, "loss": 0.2484, "step": 45315 }, { "epoch": 3.671095268956578, "grad_norm": 0.06605353951454163, "learning_rate": 1.8272649534182456e-05, "loss": 0.2345, "step": 45316 }, { "epoch": 3.6711762799740764, "grad_norm": 0.0692841038107872, "learning_rate": 1.8268148881587833e-05, "loss": 0.2096, "step": 45317 }, { "epoch": 3.6712572909915746, "grad_norm": 0.07458701729774475, "learning_rate": 1.8263648228993206e-05, "loss": 0.2243, "step": 45318 }, { "epoch": 3.6713383020090733, "grad_norm": 0.07618386298418045, "learning_rate": 1.8259147576398576e-05, "loss": 0.2843, "step": 45319 }, { "epoch": 3.6714193130265715, "grad_norm": 0.06535560637712479, "learning_rate": 1.8254646923803953e-05, "loss": 0.2373, "step": 45320 }, { "epoch": 3.67150032404407, "grad_norm": 0.07870413362979889, "learning_rate": 1.8250146271209327e-05, "loss": 0.2164, "step": 45321 }, { "epoch": 3.6715813350615685, "grad_norm": 0.06687940657138824, "learning_rate": 1.82456456186147e-05, "loss": 0.2201, "step": 45322 }, { "epoch": 3.6716623460790667, "grad_norm": 0.05875231698155403, "learning_rate": 1.8241144966020074e-05, "loss": 0.2444, "step": 45323 }, { "epoch": 3.671743357096565, "grad_norm": 0.07024704664945602, "learning_rate": 1.8236644313425448e-05, "loss": 0.2528, "step": 45324 }, { "epoch": 3.6718243681140637, "grad_norm": 0.07065678387880325, "learning_rate": 1.823214366083082e-05, "loss": 0.2109, "step": 45325 }, { "epoch": 3.671905379131562, "grad_norm": 0.07673554122447968, "learning_rate": 1.8227643008236195e-05, "loss": 0.2085, "step": 45326 }, { "epoch": 3.67198639014906, "grad_norm": 0.08821848034858704, "learning_rate": 1.822314235564157e-05, "loss": 0.266, "step": 45327 }, { "epoch": 3.672067401166559, "grad_norm": 0.09151603281497955, "learning_rate": 1.8218641703046942e-05, "loss": 0.2364, "step": 45328 }, { "epoch": 3.672148412184057, "grad_norm": 0.07013548910617828, "learning_rate": 1.8214141050452316e-05, "loss": 0.2041, "step": 45329 }, { "epoch": 3.6722294232015553, "grad_norm": 0.07981032878160477, "learning_rate": 1.8209640397857693e-05, "loss": 0.2669, "step": 45330 }, { "epoch": 3.672310434219054, "grad_norm": 0.0923733338713646, "learning_rate": 1.8205139745263063e-05, "loss": 0.1997, "step": 45331 }, { "epoch": 3.6723914452365523, "grad_norm": 0.0668359100818634, "learning_rate": 1.8200639092668437e-05, "loss": 0.2247, "step": 45332 }, { "epoch": 3.6724724562540505, "grad_norm": 0.05784869194030762, "learning_rate": 1.8196138440073814e-05, "loss": 0.244, "step": 45333 }, { "epoch": 3.672553467271549, "grad_norm": 0.06569833308458328, "learning_rate": 1.8191637787479184e-05, "loss": 0.2083, "step": 45334 }, { "epoch": 3.6726344782890474, "grad_norm": 0.07707594335079193, "learning_rate": 1.818713713488456e-05, "loss": 0.2344, "step": 45335 }, { "epoch": 3.6727154893065457, "grad_norm": 0.06917215138673782, "learning_rate": 1.8182636482289934e-05, "loss": 0.1973, "step": 45336 }, { "epoch": 3.672796500324044, "grad_norm": 0.061087533831596375, "learning_rate": 1.8178135829695305e-05, "loss": 0.233, "step": 45337 }, { "epoch": 3.6728775113415426, "grad_norm": 0.07393475621938705, "learning_rate": 1.817363517710068e-05, "loss": 0.2184, "step": 45338 }, { "epoch": 3.672958522359041, "grad_norm": 0.08241662383079529, "learning_rate": 1.8169134524506055e-05, "loss": 0.2612, "step": 45339 }, { "epoch": 3.673039533376539, "grad_norm": 0.06417885422706604, "learning_rate": 1.8164633871911426e-05, "loss": 0.2379, "step": 45340 }, { "epoch": 3.6731205443940373, "grad_norm": 0.07297766953706741, "learning_rate": 1.8160133219316803e-05, "loss": 0.2243, "step": 45341 }, { "epoch": 3.673201555411536, "grad_norm": 0.07211649417877197, "learning_rate": 1.8155632566722176e-05, "loss": 0.2282, "step": 45342 }, { "epoch": 3.6732825664290343, "grad_norm": 0.07847045361995697, "learning_rate": 1.815113191412755e-05, "loss": 0.258, "step": 45343 }, { "epoch": 3.6733635774465325, "grad_norm": 0.07077674567699432, "learning_rate": 1.8146631261532923e-05, "loss": 0.2415, "step": 45344 }, { "epoch": 3.673444588464031, "grad_norm": 0.07668691873550415, "learning_rate": 1.8142130608938297e-05, "loss": 0.2154, "step": 45345 }, { "epoch": 3.6735255994815295, "grad_norm": 0.066067636013031, "learning_rate": 1.813762995634367e-05, "loss": 0.2505, "step": 45346 }, { "epoch": 3.6736066104990277, "grad_norm": 0.06294489651918411, "learning_rate": 1.8133129303749044e-05, "loss": 0.2048, "step": 45347 }, { "epoch": 3.6736876215165264, "grad_norm": 0.07754607498645782, "learning_rate": 1.812862865115442e-05, "loss": 0.224, "step": 45348 }, { "epoch": 3.6737686325340246, "grad_norm": 0.07878319174051285, "learning_rate": 1.812412799855979e-05, "loss": 0.2153, "step": 45349 }, { "epoch": 3.673849643551523, "grad_norm": 0.07131267338991165, "learning_rate": 1.8119627345965165e-05, "loss": 0.2618, "step": 45350 }, { "epoch": 3.6739306545690216, "grad_norm": 0.08097372204065323, "learning_rate": 1.8115126693370542e-05, "loss": 0.2629, "step": 45351 }, { "epoch": 3.67401166558652, "grad_norm": 0.07331506907939911, "learning_rate": 1.8110626040775912e-05, "loss": 0.2477, "step": 45352 }, { "epoch": 3.674092676604018, "grad_norm": 0.07750751078128815, "learning_rate": 1.8106125388181286e-05, "loss": 0.2604, "step": 45353 }, { "epoch": 3.6741736876215167, "grad_norm": 0.06813794374465942, "learning_rate": 1.8101624735586663e-05, "loss": 0.2211, "step": 45354 }, { "epoch": 3.674254698639015, "grad_norm": 0.06250642240047455, "learning_rate": 1.8097124082992033e-05, "loss": 0.2105, "step": 45355 }, { "epoch": 3.6743357096565132, "grad_norm": 0.06140798330307007, "learning_rate": 1.809262343039741e-05, "loss": 0.2277, "step": 45356 }, { "epoch": 3.674416720674012, "grad_norm": 0.061542950570583344, "learning_rate": 1.8088122777802784e-05, "loss": 0.208, "step": 45357 }, { "epoch": 3.67449773169151, "grad_norm": 0.07049670815467834, "learning_rate": 1.8083622125208154e-05, "loss": 0.1901, "step": 45358 }, { "epoch": 3.6745787427090084, "grad_norm": 0.08683442324399948, "learning_rate": 1.807912147261353e-05, "loss": 0.2543, "step": 45359 }, { "epoch": 3.6746597537265067, "grad_norm": 0.060977209359407425, "learning_rate": 1.8074620820018904e-05, "loss": 0.2064, "step": 45360 }, { "epoch": 3.6747407647440054, "grad_norm": 0.06796345859766006, "learning_rate": 1.8070120167424278e-05, "loss": 0.2111, "step": 45361 }, { "epoch": 3.6748217757615036, "grad_norm": 0.06472915410995483, "learning_rate": 1.806561951482965e-05, "loss": 0.2397, "step": 45362 }, { "epoch": 3.674902786779002, "grad_norm": 0.06002800911664963, "learning_rate": 1.8061118862235025e-05, "loss": 0.2346, "step": 45363 }, { "epoch": 3.6749837977965, "grad_norm": 0.06987227499485016, "learning_rate": 1.80566182096404e-05, "loss": 0.1946, "step": 45364 }, { "epoch": 3.6750648088139988, "grad_norm": 0.0703306794166565, "learning_rate": 1.8052117557045772e-05, "loss": 0.2144, "step": 45365 }, { "epoch": 3.675145819831497, "grad_norm": 0.07858558744192123, "learning_rate": 1.8047616904451146e-05, "loss": 0.2181, "step": 45366 }, { "epoch": 3.6752268308489953, "grad_norm": 0.0637904480099678, "learning_rate": 1.804311625185652e-05, "loss": 0.1904, "step": 45367 }, { "epoch": 3.675307841866494, "grad_norm": 0.0818314254283905, "learning_rate": 1.8038615599261893e-05, "loss": 0.2519, "step": 45368 }, { "epoch": 3.675388852883992, "grad_norm": 0.067131906747818, "learning_rate": 1.803411494666727e-05, "loss": 0.234, "step": 45369 }, { "epoch": 3.6754698639014904, "grad_norm": 0.07786626368761063, "learning_rate": 1.802961429407264e-05, "loss": 0.2228, "step": 45370 }, { "epoch": 3.675550874918989, "grad_norm": 0.0704607367515564, "learning_rate": 1.8025113641478014e-05, "loss": 0.2205, "step": 45371 }, { "epoch": 3.6756318859364874, "grad_norm": 0.06771155446767807, "learning_rate": 1.802061298888339e-05, "loss": 0.1937, "step": 45372 }, { "epoch": 3.6757128969539856, "grad_norm": 0.07659830898046494, "learning_rate": 1.801611233628876e-05, "loss": 0.2322, "step": 45373 }, { "epoch": 3.6757939079714843, "grad_norm": 0.08131053298711777, "learning_rate": 1.8011611683694138e-05, "loss": 0.2694, "step": 45374 }, { "epoch": 3.6758749189889826, "grad_norm": 0.06618554890155792, "learning_rate": 1.8007111031099512e-05, "loss": 0.2322, "step": 45375 }, { "epoch": 3.675955930006481, "grad_norm": 0.07669448107481003, "learning_rate": 1.8002610378504882e-05, "loss": 0.229, "step": 45376 }, { "epoch": 3.6760369410239795, "grad_norm": 0.07624157518148422, "learning_rate": 1.799810972591026e-05, "loss": 0.2206, "step": 45377 }, { "epoch": 3.6761179520414777, "grad_norm": 0.06907004117965698, "learning_rate": 1.7993609073315633e-05, "loss": 0.2133, "step": 45378 }, { "epoch": 3.676198963058976, "grad_norm": 0.09575460106134415, "learning_rate": 1.7989108420721003e-05, "loss": 0.2484, "step": 45379 }, { "epoch": 3.6762799740764747, "grad_norm": 0.07262253761291504, "learning_rate": 1.798460776812638e-05, "loss": 0.2409, "step": 45380 }, { "epoch": 3.676360985093973, "grad_norm": 0.06717146933078766, "learning_rate": 1.7980107115531753e-05, "loss": 0.2061, "step": 45381 }, { "epoch": 3.676441996111471, "grad_norm": 0.0591311901807785, "learning_rate": 1.7975606462937127e-05, "loss": 0.2206, "step": 45382 }, { "epoch": 3.6765230071289694, "grad_norm": 0.0643257200717926, "learning_rate": 1.79711058103425e-05, "loss": 0.2219, "step": 45383 }, { "epoch": 3.6766040181464676, "grad_norm": 0.07790892571210861, "learning_rate": 1.7966605157747874e-05, "loss": 0.2244, "step": 45384 }, { "epoch": 3.6766850291639663, "grad_norm": 0.08132940530776978, "learning_rate": 1.7962104505153248e-05, "loss": 0.2378, "step": 45385 }, { "epoch": 3.6767660401814646, "grad_norm": 0.0682375505566597, "learning_rate": 1.795760385255862e-05, "loss": 0.2069, "step": 45386 }, { "epoch": 3.676847051198963, "grad_norm": 0.0725887343287468, "learning_rate": 1.7953103199963995e-05, "loss": 0.2119, "step": 45387 }, { "epoch": 3.6769280622164615, "grad_norm": 0.07341375201940536, "learning_rate": 1.794860254736937e-05, "loss": 0.1964, "step": 45388 }, { "epoch": 3.6770090732339598, "grad_norm": 0.07751181721687317, "learning_rate": 1.7944101894774742e-05, "loss": 0.2544, "step": 45389 }, { "epoch": 3.677090084251458, "grad_norm": 0.08196587860584259, "learning_rate": 1.793960124218012e-05, "loss": 0.2033, "step": 45390 }, { "epoch": 3.6771710952689567, "grad_norm": 0.07888181507587433, "learning_rate": 1.793510058958549e-05, "loss": 0.2222, "step": 45391 }, { "epoch": 3.677252106286455, "grad_norm": 0.07049840688705444, "learning_rate": 1.7930599936990863e-05, "loss": 0.2432, "step": 45392 }, { "epoch": 3.677333117303953, "grad_norm": 0.07308931648731232, "learning_rate": 1.792609928439624e-05, "loss": 0.2591, "step": 45393 }, { "epoch": 3.677414128321452, "grad_norm": 0.06875211000442505, "learning_rate": 1.792159863180161e-05, "loss": 0.2524, "step": 45394 }, { "epoch": 3.67749513933895, "grad_norm": 0.0841333344578743, "learning_rate": 1.7917097979206987e-05, "loss": 0.2547, "step": 45395 }, { "epoch": 3.6775761503564484, "grad_norm": 0.0642288327217102, "learning_rate": 1.791259732661236e-05, "loss": 0.2641, "step": 45396 }, { "epoch": 3.677657161373947, "grad_norm": 0.07159867137670517, "learning_rate": 1.790809667401773e-05, "loss": 0.2248, "step": 45397 }, { "epoch": 3.6777381723914453, "grad_norm": 0.06401391327381134, "learning_rate": 1.7903596021423108e-05, "loss": 0.2335, "step": 45398 }, { "epoch": 3.6778191834089435, "grad_norm": 0.07349655032157898, "learning_rate": 1.789909536882848e-05, "loss": 0.245, "step": 45399 }, { "epoch": 3.6779001944264422, "grad_norm": 0.08266490697860718, "learning_rate": 1.7894594716233852e-05, "loss": 0.2315, "step": 45400 }, { "epoch": 3.6779812054439405, "grad_norm": 0.06793235242366791, "learning_rate": 1.789009406363923e-05, "loss": 0.2172, "step": 45401 }, { "epoch": 3.6780622164614387, "grad_norm": 0.08288303017616272, "learning_rate": 1.7885593411044602e-05, "loss": 0.244, "step": 45402 }, { "epoch": 3.6781432274789374, "grad_norm": 0.07585656642913818, "learning_rate": 1.7881092758449976e-05, "loss": 0.2338, "step": 45403 }, { "epoch": 3.6782242384964356, "grad_norm": 0.07964345812797546, "learning_rate": 1.787659210585535e-05, "loss": 0.2392, "step": 45404 }, { "epoch": 3.678305249513934, "grad_norm": 0.06705871969461441, "learning_rate": 1.7872091453260723e-05, "loss": 0.2142, "step": 45405 }, { "epoch": 3.678386260531432, "grad_norm": 0.06813385337591171, "learning_rate": 1.7867590800666097e-05, "loss": 0.2099, "step": 45406 }, { "epoch": 3.6784672715489304, "grad_norm": 0.08102632313966751, "learning_rate": 1.786309014807147e-05, "loss": 0.2217, "step": 45407 }, { "epoch": 3.678548282566429, "grad_norm": 0.06885459274053574, "learning_rate": 1.7858589495476847e-05, "loss": 0.2094, "step": 45408 }, { "epoch": 3.6786292935839273, "grad_norm": 0.06557776778936386, "learning_rate": 1.7854088842882218e-05, "loss": 0.2062, "step": 45409 }, { "epoch": 3.6787103046014256, "grad_norm": 0.06176763400435448, "learning_rate": 1.784958819028759e-05, "loss": 0.2385, "step": 45410 }, { "epoch": 3.6787913156189243, "grad_norm": 0.06020414084196091, "learning_rate": 1.7845087537692968e-05, "loss": 0.2034, "step": 45411 }, { "epoch": 3.6788723266364225, "grad_norm": 0.09392343461513519, "learning_rate": 1.784058688509834e-05, "loss": 0.2268, "step": 45412 }, { "epoch": 3.6789533376539207, "grad_norm": 0.06853241473436356, "learning_rate": 1.7836086232503712e-05, "loss": 0.2186, "step": 45413 }, { "epoch": 3.6790343486714194, "grad_norm": 0.05744815245270729, "learning_rate": 1.783158557990909e-05, "loss": 0.2195, "step": 45414 }, { "epoch": 3.6791153596889177, "grad_norm": 0.06485334038734436, "learning_rate": 1.782708492731446e-05, "loss": 0.1801, "step": 45415 }, { "epoch": 3.679196370706416, "grad_norm": 0.06390785425901413, "learning_rate": 1.7822584274719836e-05, "loss": 0.2253, "step": 45416 }, { "epoch": 3.6792773817239146, "grad_norm": 0.07828384637832642, "learning_rate": 1.781808362212521e-05, "loss": 0.2429, "step": 45417 }, { "epoch": 3.679358392741413, "grad_norm": 0.06546802073717117, "learning_rate": 1.781358296953058e-05, "loss": 0.2563, "step": 45418 }, { "epoch": 3.679439403758911, "grad_norm": 0.06929804384708405, "learning_rate": 1.7809082316935957e-05, "loss": 0.2069, "step": 45419 }, { "epoch": 3.67952041477641, "grad_norm": 0.06710172444581985, "learning_rate": 1.780458166434133e-05, "loss": 0.1934, "step": 45420 }, { "epoch": 3.679601425793908, "grad_norm": 0.08107573539018631, "learning_rate": 1.7800081011746704e-05, "loss": 0.2343, "step": 45421 }, { "epoch": 3.6796824368114063, "grad_norm": 0.062125902622938156, "learning_rate": 1.7795580359152078e-05, "loss": 0.177, "step": 45422 }, { "epoch": 3.679763447828905, "grad_norm": 0.07452069222927094, "learning_rate": 1.779107970655745e-05, "loss": 0.1993, "step": 45423 }, { "epoch": 3.679844458846403, "grad_norm": 0.07609012722969055, "learning_rate": 1.7786579053962825e-05, "loss": 0.3101, "step": 45424 }, { "epoch": 3.6799254698639015, "grad_norm": 0.06842630356550217, "learning_rate": 1.77820784013682e-05, "loss": 0.2318, "step": 45425 }, { "epoch": 3.6800064808814, "grad_norm": 0.05769021809101105, "learning_rate": 1.7777577748773572e-05, "loss": 0.1899, "step": 45426 }, { "epoch": 3.6800874918988984, "grad_norm": 0.0755428820848465, "learning_rate": 1.7773077096178946e-05, "loss": 0.2317, "step": 45427 }, { "epoch": 3.6801685029163966, "grad_norm": 0.0577971413731575, "learning_rate": 1.776857644358432e-05, "loss": 0.1978, "step": 45428 }, { "epoch": 3.680249513933895, "grad_norm": 0.0823320671916008, "learning_rate": 1.7764075790989697e-05, "loss": 0.2494, "step": 45429 }, { "epoch": 3.680330524951393, "grad_norm": 0.0709446594119072, "learning_rate": 1.7759575138395067e-05, "loss": 0.2439, "step": 45430 }, { "epoch": 3.680411535968892, "grad_norm": 0.08453939855098724, "learning_rate": 1.775507448580044e-05, "loss": 0.2154, "step": 45431 }, { "epoch": 3.68049254698639, "grad_norm": 0.06712406128644943, "learning_rate": 1.7750573833205817e-05, "loss": 0.2371, "step": 45432 }, { "epoch": 3.6805735580038883, "grad_norm": 0.09066680818796158, "learning_rate": 1.7746073180611188e-05, "loss": 0.2732, "step": 45433 }, { "epoch": 3.680654569021387, "grad_norm": 0.08785214275121689, "learning_rate": 1.7741572528016565e-05, "loss": 0.2374, "step": 45434 }, { "epoch": 3.6807355800388852, "grad_norm": 0.06791125982999802, "learning_rate": 1.7737071875421938e-05, "loss": 0.1922, "step": 45435 }, { "epoch": 3.6808165910563835, "grad_norm": 0.06534958630800247, "learning_rate": 1.773257122282731e-05, "loss": 0.2348, "step": 45436 }, { "epoch": 3.680897602073882, "grad_norm": 0.06191590055823326, "learning_rate": 1.7728070570232685e-05, "loss": 0.2252, "step": 45437 }, { "epoch": 3.6809786130913804, "grad_norm": 0.07369256019592285, "learning_rate": 1.772356991763806e-05, "loss": 0.236, "step": 45438 }, { "epoch": 3.6810596241088787, "grad_norm": 0.07055247575044632, "learning_rate": 1.7719069265043433e-05, "loss": 0.2381, "step": 45439 }, { "epoch": 3.6811406351263773, "grad_norm": 0.06439191102981567, "learning_rate": 1.7714568612448806e-05, "loss": 0.2122, "step": 45440 }, { "epoch": 3.6812216461438756, "grad_norm": 0.08796586841344833, "learning_rate": 1.771006795985418e-05, "loss": 0.2482, "step": 45441 }, { "epoch": 3.681302657161374, "grad_norm": 0.07205065339803696, "learning_rate": 1.7705567307259553e-05, "loss": 0.2202, "step": 45442 }, { "epoch": 3.6813836681788725, "grad_norm": 0.0916776955127716, "learning_rate": 1.7701066654664927e-05, "loss": 0.2375, "step": 45443 }, { "epoch": 3.6814646791963708, "grad_norm": 0.08529650419950485, "learning_rate": 1.76965660020703e-05, "loss": 0.2101, "step": 45444 }, { "epoch": 3.681545690213869, "grad_norm": 0.07912997901439667, "learning_rate": 1.7692065349475674e-05, "loss": 0.2204, "step": 45445 }, { "epoch": 3.6816267012313677, "grad_norm": 0.0730692520737648, "learning_rate": 1.7687564696881048e-05, "loss": 0.2319, "step": 45446 }, { "epoch": 3.681707712248866, "grad_norm": 0.09680872410535812, "learning_rate": 1.7683064044286425e-05, "loss": 0.2498, "step": 45447 }, { "epoch": 3.681788723266364, "grad_norm": 0.07463949173688889, "learning_rate": 1.7678563391691795e-05, "loss": 0.2261, "step": 45448 }, { "epoch": 3.681869734283863, "grad_norm": 0.06920517235994339, "learning_rate": 1.767406273909717e-05, "loss": 0.2513, "step": 45449 }, { "epoch": 3.681950745301361, "grad_norm": 0.07529915124177933, "learning_rate": 1.7669562086502546e-05, "loss": 0.2541, "step": 45450 }, { "epoch": 3.6820317563188594, "grad_norm": 0.058915991336107254, "learning_rate": 1.7665061433907916e-05, "loss": 0.1903, "step": 45451 }, { "epoch": 3.6821127673363576, "grad_norm": 0.0809895247220993, "learning_rate": 1.766056078131329e-05, "loss": 0.2207, "step": 45452 }, { "epoch": 3.682193778353856, "grad_norm": 0.0563754104077816, "learning_rate": 1.7656060128718666e-05, "loss": 0.227, "step": 45453 }, { "epoch": 3.6822747893713546, "grad_norm": 0.09436178207397461, "learning_rate": 1.7651559476124037e-05, "loss": 0.2434, "step": 45454 }, { "epoch": 3.682355800388853, "grad_norm": 0.08379881083965302, "learning_rate": 1.7647058823529414e-05, "loss": 0.2354, "step": 45455 }, { "epoch": 3.682436811406351, "grad_norm": 0.06248372420668602, "learning_rate": 1.7642558170934787e-05, "loss": 0.2303, "step": 45456 }, { "epoch": 3.6825178224238497, "grad_norm": 0.0720660537481308, "learning_rate": 1.763805751834016e-05, "loss": 0.2351, "step": 45457 }, { "epoch": 3.682598833441348, "grad_norm": 0.07073300331830978, "learning_rate": 1.7633556865745534e-05, "loss": 0.2239, "step": 45458 }, { "epoch": 3.682679844458846, "grad_norm": 0.0651804730296135, "learning_rate": 1.7629056213150908e-05, "loss": 0.2001, "step": 45459 }, { "epoch": 3.682760855476345, "grad_norm": 0.06839761137962341, "learning_rate": 1.762455556055628e-05, "loss": 0.192, "step": 45460 }, { "epoch": 3.682841866493843, "grad_norm": 0.06647450476884842, "learning_rate": 1.7620054907961655e-05, "loss": 0.2151, "step": 45461 }, { "epoch": 3.6829228775113414, "grad_norm": 0.06537552177906036, "learning_rate": 1.761555425536703e-05, "loss": 0.2231, "step": 45462 }, { "epoch": 3.68300388852884, "grad_norm": 0.0682782530784607, "learning_rate": 1.7611053602772402e-05, "loss": 0.1934, "step": 45463 }, { "epoch": 3.6830848995463383, "grad_norm": 0.07463079690933228, "learning_rate": 1.7606552950177776e-05, "loss": 0.2753, "step": 45464 }, { "epoch": 3.6831659105638366, "grad_norm": 0.06640588492155075, "learning_rate": 1.760205229758315e-05, "loss": 0.2724, "step": 45465 }, { "epoch": 3.6832469215813353, "grad_norm": 0.06108155474066734, "learning_rate": 1.7597551644988523e-05, "loss": 0.2318, "step": 45466 }, { "epoch": 3.6833279325988335, "grad_norm": 0.07797868549823761, "learning_rate": 1.7593050992393897e-05, "loss": 0.2367, "step": 45467 }, { "epoch": 3.6834089436163318, "grad_norm": 0.06545042991638184, "learning_rate": 1.7588550339799274e-05, "loss": 0.2378, "step": 45468 }, { "epoch": 3.6834899546338304, "grad_norm": 0.06987342238426208, "learning_rate": 1.7584049687204644e-05, "loss": 0.2367, "step": 45469 }, { "epoch": 3.6835709656513287, "grad_norm": 0.0632578432559967, "learning_rate": 1.7579549034610018e-05, "loss": 0.2142, "step": 45470 }, { "epoch": 3.683651976668827, "grad_norm": 0.058642685413360596, "learning_rate": 1.7575048382015395e-05, "loss": 0.1965, "step": 45471 }, { "epoch": 3.683732987686325, "grad_norm": 0.07923772931098938, "learning_rate": 1.7570547729420765e-05, "loss": 0.2682, "step": 45472 }, { "epoch": 3.683813998703824, "grad_norm": 0.05873147025704384, "learning_rate": 1.756604707682614e-05, "loss": 0.1975, "step": 45473 }, { "epoch": 3.683895009721322, "grad_norm": 0.07675875723361969, "learning_rate": 1.7561546424231515e-05, "loss": 0.2062, "step": 45474 }, { "epoch": 3.6839760207388204, "grad_norm": 0.07482752203941345, "learning_rate": 1.755704577163689e-05, "loss": 0.2373, "step": 45475 }, { "epoch": 3.6840570317563186, "grad_norm": 0.075741246342659, "learning_rate": 1.7552545119042263e-05, "loss": 0.2375, "step": 45476 }, { "epoch": 3.6841380427738173, "grad_norm": 0.07882252335548401, "learning_rate": 1.7548044466447636e-05, "loss": 0.2564, "step": 45477 }, { "epoch": 3.6842190537913155, "grad_norm": 0.07127309590578079, "learning_rate": 1.754354381385301e-05, "loss": 0.2293, "step": 45478 }, { "epoch": 3.684300064808814, "grad_norm": 0.09695709496736526, "learning_rate": 1.7539043161258383e-05, "loss": 0.2266, "step": 45479 }, { "epoch": 3.6843810758263125, "grad_norm": 0.06037114933133125, "learning_rate": 1.7534542508663757e-05, "loss": 0.2087, "step": 45480 }, { "epoch": 3.6844620868438107, "grad_norm": 0.08382045477628708, "learning_rate": 1.753004185606913e-05, "loss": 0.2198, "step": 45481 }, { "epoch": 3.684543097861309, "grad_norm": 0.08488260954618454, "learning_rate": 1.7525541203474504e-05, "loss": 0.2517, "step": 45482 }, { "epoch": 3.6846241088788076, "grad_norm": 0.08701542764902115, "learning_rate": 1.7521040550879878e-05, "loss": 0.2522, "step": 45483 }, { "epoch": 3.684705119896306, "grad_norm": 0.06933901458978653, "learning_rate": 1.751653989828525e-05, "loss": 0.2533, "step": 45484 }, { "epoch": 3.684786130913804, "grad_norm": 0.07655087113380432, "learning_rate": 1.7512039245690625e-05, "loss": 0.2582, "step": 45485 }, { "epoch": 3.684867141931303, "grad_norm": 0.07492398470640182, "learning_rate": 1.7507538593096e-05, "loss": 0.2234, "step": 45486 }, { "epoch": 3.684948152948801, "grad_norm": 0.06873263418674469, "learning_rate": 1.7503037940501372e-05, "loss": 0.2216, "step": 45487 }, { "epoch": 3.6850291639662993, "grad_norm": 0.07171261310577393, "learning_rate": 1.7498537287906746e-05, "loss": 0.2238, "step": 45488 }, { "epoch": 3.685110174983798, "grad_norm": 0.08120883256196976, "learning_rate": 1.7494036635312123e-05, "loss": 0.2817, "step": 45489 }, { "epoch": 3.6851911860012962, "grad_norm": 0.0914805680513382, "learning_rate": 1.7489535982717496e-05, "loss": 0.2555, "step": 45490 }, { "epoch": 3.6852721970187945, "grad_norm": 0.06005210801959038, "learning_rate": 1.7485035330122867e-05, "loss": 0.1913, "step": 45491 }, { "epoch": 3.685353208036293, "grad_norm": 0.0790003314614296, "learning_rate": 1.7480534677528244e-05, "loss": 0.2199, "step": 45492 }, { "epoch": 3.6854342190537914, "grad_norm": 0.09822786599397659, "learning_rate": 1.7476034024933617e-05, "loss": 0.2557, "step": 45493 }, { "epoch": 3.6855152300712897, "grad_norm": 0.08229862153530121, "learning_rate": 1.747153337233899e-05, "loss": 0.2445, "step": 45494 }, { "epoch": 3.685596241088788, "grad_norm": 0.07014301419258118, "learning_rate": 1.7467032719744365e-05, "loss": 0.2522, "step": 45495 }, { "epoch": 3.6856772521062866, "grad_norm": 0.06729548424482346, "learning_rate": 1.7462532067149738e-05, "loss": 0.216, "step": 45496 }, { "epoch": 3.685758263123785, "grad_norm": 0.06353531032800674, "learning_rate": 1.7458031414555112e-05, "loss": 0.2392, "step": 45497 }, { "epoch": 3.685839274141283, "grad_norm": 0.06674017757177353, "learning_rate": 1.7453530761960485e-05, "loss": 0.2371, "step": 45498 }, { "epoch": 3.6859202851587813, "grad_norm": 0.07018964737653732, "learning_rate": 1.744903010936586e-05, "loss": 0.2566, "step": 45499 }, { "epoch": 3.68600129617628, "grad_norm": 0.06647379696369171, "learning_rate": 1.7444529456771233e-05, "loss": 0.2034, "step": 45500 }, { "epoch": 3.6860823071937783, "grad_norm": 0.08332670480012894, "learning_rate": 1.7440028804176606e-05, "loss": 0.2416, "step": 45501 }, { "epoch": 3.6861633182112765, "grad_norm": 0.07239510118961334, "learning_rate": 1.743552815158198e-05, "loss": 0.1988, "step": 45502 }, { "epoch": 3.686244329228775, "grad_norm": 0.08834824711084366, "learning_rate": 1.7431027498987353e-05, "loss": 0.234, "step": 45503 }, { "epoch": 3.6863253402462735, "grad_norm": 0.06722290068864822, "learning_rate": 1.7426526846392727e-05, "loss": 0.236, "step": 45504 }, { "epoch": 3.6864063512637717, "grad_norm": 0.07778163254261017, "learning_rate": 1.74220261937981e-05, "loss": 0.2244, "step": 45505 }, { "epoch": 3.6864873622812704, "grad_norm": 0.06533099710941315, "learning_rate": 1.7417525541203474e-05, "loss": 0.181, "step": 45506 }, { "epoch": 3.6865683732987686, "grad_norm": 0.07727663218975067, "learning_rate": 1.741302488860885e-05, "loss": 0.2273, "step": 45507 }, { "epoch": 3.686649384316267, "grad_norm": 0.08147032558917999, "learning_rate": 1.7408524236014225e-05, "loss": 0.2455, "step": 45508 }, { "epoch": 3.6867303953337656, "grad_norm": 0.07328832149505615, "learning_rate": 1.7404023583419595e-05, "loss": 0.2112, "step": 45509 }, { "epoch": 3.686811406351264, "grad_norm": 0.08036161959171295, "learning_rate": 1.7399522930824972e-05, "loss": 0.2313, "step": 45510 }, { "epoch": 3.686892417368762, "grad_norm": 0.060651037842035294, "learning_rate": 1.7395022278230346e-05, "loss": 0.181, "step": 45511 }, { "epoch": 3.6869734283862607, "grad_norm": 0.06498783826828003, "learning_rate": 1.7390521625635716e-05, "loss": 0.2286, "step": 45512 }, { "epoch": 3.687054439403759, "grad_norm": 0.07742474973201752, "learning_rate": 1.7386020973041093e-05, "loss": 0.2486, "step": 45513 }, { "epoch": 3.6871354504212572, "grad_norm": 0.08565546572208405, "learning_rate": 1.7381520320446466e-05, "loss": 0.2706, "step": 45514 }, { "epoch": 3.687216461438756, "grad_norm": 0.05786954611539841, "learning_rate": 1.737701966785184e-05, "loss": 0.2004, "step": 45515 }, { "epoch": 3.687297472456254, "grad_norm": 0.07234077900648117, "learning_rate": 1.7372519015257214e-05, "loss": 0.2385, "step": 45516 }, { "epoch": 3.6873784834737524, "grad_norm": 0.06998328864574432, "learning_rate": 1.7368018362662587e-05, "loss": 0.2232, "step": 45517 }, { "epoch": 3.6874594944912507, "grad_norm": 0.08726705610752106, "learning_rate": 1.736351771006796e-05, "loss": 0.2643, "step": 45518 }, { "epoch": 3.6875405055087493, "grad_norm": 0.06822450459003448, "learning_rate": 1.7359017057473334e-05, "loss": 0.1948, "step": 45519 }, { "epoch": 3.6876215165262476, "grad_norm": 0.08558908849954605, "learning_rate": 1.7354516404878708e-05, "loss": 0.214, "step": 45520 }, { "epoch": 3.687702527543746, "grad_norm": 0.0689425840973854, "learning_rate": 1.735001575228408e-05, "loss": 0.213, "step": 45521 }, { "epoch": 3.687783538561244, "grad_norm": 0.06478188931941986, "learning_rate": 1.7345515099689455e-05, "loss": 0.2408, "step": 45522 }, { "epoch": 3.6878645495787428, "grad_norm": 0.06854352355003357, "learning_rate": 1.734101444709483e-05, "loss": 0.2573, "step": 45523 }, { "epoch": 3.687945560596241, "grad_norm": 0.09567511081695557, "learning_rate": 1.7336513794500202e-05, "loss": 0.2461, "step": 45524 }, { "epoch": 3.6880265716137393, "grad_norm": 0.08633017539978027, "learning_rate": 1.7332013141905576e-05, "loss": 0.2201, "step": 45525 }, { "epoch": 3.688107582631238, "grad_norm": 0.07063650339841843, "learning_rate": 1.7327512489310953e-05, "loss": 0.1685, "step": 45526 }, { "epoch": 3.688188593648736, "grad_norm": 0.07147509604692459, "learning_rate": 1.7323011836716323e-05, "loss": 0.2507, "step": 45527 }, { "epoch": 3.6882696046662344, "grad_norm": 0.08037501573562622, "learning_rate": 1.73185111841217e-05, "loss": 0.2337, "step": 45528 }, { "epoch": 3.688350615683733, "grad_norm": 0.06710902601480484, "learning_rate": 1.7314010531527074e-05, "loss": 0.2117, "step": 45529 }, { "epoch": 3.6884316267012314, "grad_norm": 0.07238079607486725, "learning_rate": 1.7309509878932444e-05, "loss": 0.2188, "step": 45530 }, { "epoch": 3.6885126377187296, "grad_norm": 0.07218148559331894, "learning_rate": 1.730500922633782e-05, "loss": 0.227, "step": 45531 }, { "epoch": 3.6885936487362283, "grad_norm": 0.09336385875940323, "learning_rate": 1.7300508573743195e-05, "loss": 0.2507, "step": 45532 }, { "epoch": 3.6886746597537265, "grad_norm": 0.08338413387537003, "learning_rate": 1.7296007921148568e-05, "loss": 0.2168, "step": 45533 }, { "epoch": 3.688755670771225, "grad_norm": 0.07480164617300034, "learning_rate": 1.7291507268553942e-05, "loss": 0.2406, "step": 45534 }, { "epoch": 3.6888366817887235, "grad_norm": 0.061553705483675, "learning_rate": 1.7287006615959315e-05, "loss": 0.1878, "step": 45535 }, { "epoch": 3.6889176928062217, "grad_norm": 0.08677572011947632, "learning_rate": 1.728250596336469e-05, "loss": 0.226, "step": 45536 }, { "epoch": 3.68899870382372, "grad_norm": 0.096988245844841, "learning_rate": 1.7278005310770063e-05, "loss": 0.289, "step": 45537 }, { "epoch": 3.6890797148412187, "grad_norm": 0.06231336668133736, "learning_rate": 1.7273504658175436e-05, "loss": 0.2039, "step": 45538 }, { "epoch": 3.689160725858717, "grad_norm": 0.07273116707801819, "learning_rate": 1.726900400558081e-05, "loss": 0.2481, "step": 45539 }, { "epoch": 3.689241736876215, "grad_norm": 0.06380755454301834, "learning_rate": 1.7264503352986183e-05, "loss": 0.2278, "step": 45540 }, { "epoch": 3.6893227478937134, "grad_norm": 0.07865901291370392, "learning_rate": 1.726000270039156e-05, "loss": 0.237, "step": 45541 }, { "epoch": 3.689403758911212, "grad_norm": 0.08549056202173233, "learning_rate": 1.725550204779693e-05, "loss": 0.196, "step": 45542 }, { "epoch": 3.6894847699287103, "grad_norm": 0.07149454206228256, "learning_rate": 1.7251001395202304e-05, "loss": 0.2474, "step": 45543 }, { "epoch": 3.6895657809462086, "grad_norm": 0.07113026827573776, "learning_rate": 1.724650074260768e-05, "loss": 0.2095, "step": 45544 }, { "epoch": 3.689646791963707, "grad_norm": 0.07473193854093552, "learning_rate": 1.724200009001305e-05, "loss": 0.2236, "step": 45545 }, { "epoch": 3.6897278029812055, "grad_norm": 0.08211532980203629, "learning_rate": 1.723749943741843e-05, "loss": 0.2621, "step": 45546 }, { "epoch": 3.6898088139987038, "grad_norm": 0.07540342956781387, "learning_rate": 1.7232998784823802e-05, "loss": 0.246, "step": 45547 }, { "epoch": 3.689889825016202, "grad_norm": 0.06144927814602852, "learning_rate": 1.7228498132229172e-05, "loss": 0.1974, "step": 45548 }, { "epoch": 3.6899708360337007, "grad_norm": 0.06908820569515228, "learning_rate": 1.722399747963455e-05, "loss": 0.2061, "step": 45549 }, { "epoch": 3.690051847051199, "grad_norm": 0.07357897609472275, "learning_rate": 1.7219496827039923e-05, "loss": 0.1908, "step": 45550 }, { "epoch": 3.690132858068697, "grad_norm": 0.07140400260686874, "learning_rate": 1.7214996174445293e-05, "loss": 0.2132, "step": 45551 }, { "epoch": 3.690213869086196, "grad_norm": 0.07593827694654465, "learning_rate": 1.721049552185067e-05, "loss": 0.2472, "step": 45552 }, { "epoch": 3.690294880103694, "grad_norm": 0.07725334167480469, "learning_rate": 1.7205994869256044e-05, "loss": 0.2395, "step": 45553 }, { "epoch": 3.6903758911211924, "grad_norm": 0.08335854858160019, "learning_rate": 1.7201494216661417e-05, "loss": 0.2146, "step": 45554 }, { "epoch": 3.690456902138691, "grad_norm": 0.06782727688550949, "learning_rate": 1.719699356406679e-05, "loss": 0.2199, "step": 45555 }, { "epoch": 3.6905379131561893, "grad_norm": 0.08147486299276352, "learning_rate": 1.7192492911472164e-05, "loss": 0.2419, "step": 45556 }, { "epoch": 3.6906189241736875, "grad_norm": 0.08341187983751297, "learning_rate": 1.7187992258877538e-05, "loss": 0.2484, "step": 45557 }, { "epoch": 3.690699935191186, "grad_norm": 0.07176518440246582, "learning_rate": 1.718349160628291e-05, "loss": 0.2306, "step": 45558 }, { "epoch": 3.6907809462086845, "grad_norm": 0.071227066218853, "learning_rate": 1.7178990953688285e-05, "loss": 0.2252, "step": 45559 }, { "epoch": 3.6908619572261827, "grad_norm": 0.06585358828306198, "learning_rate": 1.717449030109366e-05, "loss": 0.2561, "step": 45560 }, { "epoch": 3.6909429682436814, "grad_norm": 0.10919830203056335, "learning_rate": 1.7169989648499032e-05, "loss": 0.2707, "step": 45561 }, { "epoch": 3.6910239792611796, "grad_norm": 0.08778324723243713, "learning_rate": 1.716548899590441e-05, "loss": 0.2543, "step": 45562 }, { "epoch": 3.691104990278678, "grad_norm": 0.06133211776614189, "learning_rate": 1.716098834330978e-05, "loss": 0.2007, "step": 45563 }, { "epoch": 3.691186001296176, "grad_norm": 0.06192644312977791, "learning_rate": 1.7156487690715153e-05, "loss": 0.2036, "step": 45564 }, { "epoch": 3.691267012313675, "grad_norm": 0.07539551705121994, "learning_rate": 1.715198703812053e-05, "loss": 0.2636, "step": 45565 }, { "epoch": 3.691348023331173, "grad_norm": 0.06967008113861084, "learning_rate": 1.71474863855259e-05, "loss": 0.2439, "step": 45566 }, { "epoch": 3.6914290343486713, "grad_norm": 0.08135908842086792, "learning_rate": 1.7142985732931278e-05, "loss": 0.2111, "step": 45567 }, { "epoch": 3.6915100453661696, "grad_norm": 0.07239343971014023, "learning_rate": 1.713848508033665e-05, "loss": 0.235, "step": 45568 }, { "epoch": 3.6915910563836682, "grad_norm": 0.07607700675725937, "learning_rate": 1.713398442774202e-05, "loss": 0.2198, "step": 45569 }, { "epoch": 3.6916720674011665, "grad_norm": 0.0712643638253212, "learning_rate": 1.71294837751474e-05, "loss": 0.2369, "step": 45570 }, { "epoch": 3.6917530784186647, "grad_norm": 0.06370777636766434, "learning_rate": 1.7124983122552772e-05, "loss": 0.1989, "step": 45571 }, { "epoch": 3.6918340894361634, "grad_norm": 0.06736060231924057, "learning_rate": 1.7120482469958142e-05, "loss": 0.2148, "step": 45572 }, { "epoch": 3.6919151004536617, "grad_norm": 0.05656532570719719, "learning_rate": 1.711598181736352e-05, "loss": 0.1881, "step": 45573 }, { "epoch": 3.69199611147116, "grad_norm": 0.07122478634119034, "learning_rate": 1.7111481164768893e-05, "loss": 0.2198, "step": 45574 }, { "epoch": 3.6920771224886586, "grad_norm": 0.08074306696653366, "learning_rate": 1.7106980512174266e-05, "loss": 0.2857, "step": 45575 }, { "epoch": 3.692158133506157, "grad_norm": 0.07378386706113815, "learning_rate": 1.710247985957964e-05, "loss": 0.203, "step": 45576 }, { "epoch": 3.692239144523655, "grad_norm": 0.06506312638521194, "learning_rate": 1.7097979206985014e-05, "loss": 0.2338, "step": 45577 }, { "epoch": 3.692320155541154, "grad_norm": 0.0626230537891388, "learning_rate": 1.7093478554390387e-05, "loss": 0.2434, "step": 45578 }, { "epoch": 3.692401166558652, "grad_norm": 0.09149368107318878, "learning_rate": 1.708897790179576e-05, "loss": 0.2593, "step": 45579 }, { "epoch": 3.6924821775761503, "grad_norm": 0.07215756922960281, "learning_rate": 1.7084477249201138e-05, "loss": 0.219, "step": 45580 }, { "epoch": 3.692563188593649, "grad_norm": 0.07592841237783432, "learning_rate": 1.7079976596606508e-05, "loss": 0.2666, "step": 45581 }, { "epoch": 3.692644199611147, "grad_norm": 0.0961163192987442, "learning_rate": 1.707547594401188e-05, "loss": 0.2635, "step": 45582 }, { "epoch": 3.6927252106286454, "grad_norm": 0.08747298270463943, "learning_rate": 1.707097529141726e-05, "loss": 0.233, "step": 45583 }, { "epoch": 3.692806221646144, "grad_norm": 0.08068632334470749, "learning_rate": 1.706647463882263e-05, "loss": 0.2191, "step": 45584 }, { "epoch": 3.6928872326636424, "grad_norm": 0.07764396071434021, "learning_rate": 1.7061973986228002e-05, "loss": 0.2235, "step": 45585 }, { "epoch": 3.6929682436811406, "grad_norm": 0.07049860060214996, "learning_rate": 1.705747333363338e-05, "loss": 0.2229, "step": 45586 }, { "epoch": 3.693049254698639, "grad_norm": 0.06508810073137283, "learning_rate": 1.705297268103875e-05, "loss": 0.2215, "step": 45587 }, { "epoch": 3.693130265716137, "grad_norm": 0.06824822723865509, "learning_rate": 1.7048472028444127e-05, "loss": 0.1971, "step": 45588 }, { "epoch": 3.693211276733636, "grad_norm": 0.07630736380815506, "learning_rate": 1.70439713758495e-05, "loss": 0.2508, "step": 45589 }, { "epoch": 3.693292287751134, "grad_norm": 0.06600065529346466, "learning_rate": 1.703947072325487e-05, "loss": 0.2006, "step": 45590 }, { "epoch": 3.6933732987686323, "grad_norm": 0.0757368728518486, "learning_rate": 1.7034970070660247e-05, "loss": 0.235, "step": 45591 }, { "epoch": 3.693454309786131, "grad_norm": 0.0807269960641861, "learning_rate": 1.703046941806562e-05, "loss": 0.2203, "step": 45592 }, { "epoch": 3.6935353208036292, "grad_norm": 0.0880458652973175, "learning_rate": 1.7025968765470995e-05, "loss": 0.1987, "step": 45593 }, { "epoch": 3.6936163318211275, "grad_norm": 0.07349114865064621, "learning_rate": 1.7021468112876368e-05, "loss": 0.242, "step": 45594 }, { "epoch": 3.693697342838626, "grad_norm": 0.07621750235557556, "learning_rate": 1.7016967460281742e-05, "loss": 0.2247, "step": 45595 }, { "epoch": 3.6937783538561244, "grad_norm": 0.07071096450090408, "learning_rate": 1.7012466807687115e-05, "loss": 0.2282, "step": 45596 }, { "epoch": 3.6938593648736227, "grad_norm": 0.05581112951040268, "learning_rate": 1.700796615509249e-05, "loss": 0.196, "step": 45597 }, { "epoch": 3.6939403758911213, "grad_norm": 0.08093393594026566, "learning_rate": 1.7003465502497863e-05, "loss": 0.2417, "step": 45598 }, { "epoch": 3.6940213869086196, "grad_norm": 0.07044923305511475, "learning_rate": 1.6998964849903236e-05, "loss": 0.2366, "step": 45599 }, { "epoch": 3.694102397926118, "grad_norm": 0.0671868696808815, "learning_rate": 1.699446419730861e-05, "loss": 0.1992, "step": 45600 }, { "epoch": 3.6941834089436165, "grad_norm": 0.06780609488487244, "learning_rate": 1.6989963544713987e-05, "loss": 0.1734, "step": 45601 }, { "epoch": 3.6942644199611148, "grad_norm": 0.07801494002342224, "learning_rate": 1.6985462892119357e-05, "loss": 0.2503, "step": 45602 }, { "epoch": 3.694345430978613, "grad_norm": 0.07000548392534256, "learning_rate": 1.698096223952473e-05, "loss": 0.2211, "step": 45603 }, { "epoch": 3.6944264419961117, "grad_norm": 0.09492857754230499, "learning_rate": 1.6976461586930108e-05, "loss": 0.2736, "step": 45604 }, { "epoch": 3.69450745301361, "grad_norm": 0.08605027943849564, "learning_rate": 1.6971960934335478e-05, "loss": 0.2609, "step": 45605 }, { "epoch": 3.694588464031108, "grad_norm": 0.07421616464853287, "learning_rate": 1.6967460281740855e-05, "loss": 0.2275, "step": 45606 }, { "epoch": 3.694669475048607, "grad_norm": 0.08524744212627411, "learning_rate": 1.696295962914623e-05, "loss": 0.2311, "step": 45607 }, { "epoch": 3.694750486066105, "grad_norm": 0.06704798340797424, "learning_rate": 1.69584589765516e-05, "loss": 0.2533, "step": 45608 }, { "epoch": 3.6948314970836034, "grad_norm": 0.049864400178194046, "learning_rate": 1.6953958323956976e-05, "loss": 0.2026, "step": 45609 }, { "epoch": 3.6949125081011016, "grad_norm": 0.07128980755805969, "learning_rate": 1.694945767136235e-05, "loss": 0.2313, "step": 45610 }, { "epoch": 3.6949935191186, "grad_norm": 0.06947165727615356, "learning_rate": 1.694495701876772e-05, "loss": 0.2192, "step": 45611 }, { "epoch": 3.6950745301360985, "grad_norm": 0.06866627931594849, "learning_rate": 1.6940456366173096e-05, "loss": 0.2308, "step": 45612 }, { "epoch": 3.695155541153597, "grad_norm": 0.07855135202407837, "learning_rate": 1.693595571357847e-05, "loss": 0.2266, "step": 45613 }, { "epoch": 3.695236552171095, "grad_norm": 0.06514532119035721, "learning_rate": 1.6931455060983844e-05, "loss": 0.2095, "step": 45614 }, { "epoch": 3.6953175631885937, "grad_norm": 0.058014001697301865, "learning_rate": 1.6926954408389217e-05, "loss": 0.2925, "step": 45615 }, { "epoch": 3.695398574206092, "grad_norm": 0.06363331526517868, "learning_rate": 1.692245375579459e-05, "loss": 0.2324, "step": 45616 }, { "epoch": 3.69547958522359, "grad_norm": 0.06826046854257584, "learning_rate": 1.6917953103199964e-05, "loss": 0.2338, "step": 45617 }, { "epoch": 3.695560596241089, "grad_norm": 0.08055315911769867, "learning_rate": 1.6913452450605338e-05, "loss": 0.2187, "step": 45618 }, { "epoch": 3.695641607258587, "grad_norm": 0.07960078120231628, "learning_rate": 1.6908951798010715e-05, "loss": 0.2421, "step": 45619 }, { "epoch": 3.6957226182760854, "grad_norm": 0.05961255356669426, "learning_rate": 1.6904451145416085e-05, "loss": 0.2017, "step": 45620 }, { "epoch": 3.695803629293584, "grad_norm": 0.07862067967653275, "learning_rate": 1.689995049282146e-05, "loss": 0.2251, "step": 45621 }, { "epoch": 3.6958846403110823, "grad_norm": 0.07465961575508118, "learning_rate": 1.6895449840226836e-05, "loss": 0.222, "step": 45622 }, { "epoch": 3.6959656513285806, "grad_norm": 0.07449892908334732, "learning_rate": 1.6890949187632206e-05, "loss": 0.2295, "step": 45623 }, { "epoch": 3.6960466623460793, "grad_norm": 0.0704660639166832, "learning_rate": 1.688644853503758e-05, "loss": 0.2101, "step": 45624 }, { "epoch": 3.6961276733635775, "grad_norm": 0.07495720684528351, "learning_rate": 1.6881947882442957e-05, "loss": 0.2571, "step": 45625 }, { "epoch": 3.6962086843810757, "grad_norm": 0.06861800700426102, "learning_rate": 1.6877447229848327e-05, "loss": 0.2147, "step": 45626 }, { "epoch": 3.6962896953985744, "grad_norm": 0.07803268730640411, "learning_rate": 1.6872946577253704e-05, "loss": 0.2491, "step": 45627 }, { "epoch": 3.6963707064160727, "grad_norm": 0.0892467275261879, "learning_rate": 1.6868445924659077e-05, "loss": 0.2448, "step": 45628 }, { "epoch": 3.696451717433571, "grad_norm": 0.07141023129224777, "learning_rate": 1.6863945272064448e-05, "loss": 0.215, "step": 45629 }, { "epoch": 3.6965327284510696, "grad_norm": 0.06902121752500534, "learning_rate": 1.6859444619469825e-05, "loss": 0.2112, "step": 45630 }, { "epoch": 3.696613739468568, "grad_norm": 0.058419931679964066, "learning_rate": 1.6854943966875198e-05, "loss": 0.196, "step": 45631 }, { "epoch": 3.696694750486066, "grad_norm": 0.06349142640829086, "learning_rate": 1.685044331428057e-05, "loss": 0.2146, "step": 45632 }, { "epoch": 3.6967757615035644, "grad_norm": 0.07252699136734009, "learning_rate": 1.6845942661685945e-05, "loss": 0.2223, "step": 45633 }, { "epoch": 3.6968567725210626, "grad_norm": 0.09001344442367554, "learning_rate": 1.684144200909132e-05, "loss": 0.253, "step": 45634 }, { "epoch": 3.6969377835385613, "grad_norm": 0.058856382966041565, "learning_rate": 1.6836941356496693e-05, "loss": 0.2569, "step": 45635 }, { "epoch": 3.6970187945560595, "grad_norm": 0.0726538598537445, "learning_rate": 1.6832440703902066e-05, "loss": 0.216, "step": 45636 }, { "epoch": 3.6970998055735578, "grad_norm": 0.06102930009365082, "learning_rate": 1.682794005130744e-05, "loss": 0.221, "step": 45637 }, { "epoch": 3.6971808165910565, "grad_norm": 0.06977268308401108, "learning_rate": 1.6823439398712814e-05, "loss": 0.2021, "step": 45638 }, { "epoch": 3.6972618276085547, "grad_norm": 0.06037342548370361, "learning_rate": 1.6818938746118187e-05, "loss": 0.2138, "step": 45639 }, { "epoch": 3.697342838626053, "grad_norm": 0.07608211785554886, "learning_rate": 1.6814438093523564e-05, "loss": 0.2652, "step": 45640 }, { "epoch": 3.6974238496435516, "grad_norm": 0.07579577714204788, "learning_rate": 1.6809937440928934e-05, "loss": 0.2378, "step": 45641 }, { "epoch": 3.69750486066105, "grad_norm": 0.08454374969005585, "learning_rate": 1.6805436788334308e-05, "loss": 0.2475, "step": 45642 }, { "epoch": 3.697585871678548, "grad_norm": 0.07013771682977676, "learning_rate": 1.6800936135739685e-05, "loss": 0.2294, "step": 45643 }, { "epoch": 3.697666882696047, "grad_norm": 0.07883304357528687, "learning_rate": 1.6796435483145055e-05, "loss": 0.2483, "step": 45644 }, { "epoch": 3.697747893713545, "grad_norm": 0.08239931613206863, "learning_rate": 1.679193483055043e-05, "loss": 0.2347, "step": 45645 }, { "epoch": 3.6978289047310433, "grad_norm": 0.06723158061504364, "learning_rate": 1.6787434177955806e-05, "loss": 0.2109, "step": 45646 }, { "epoch": 3.697909915748542, "grad_norm": 0.08971650153398514, "learning_rate": 1.6782933525361176e-05, "loss": 0.217, "step": 45647 }, { "epoch": 3.6979909267660402, "grad_norm": 0.08268365263938904, "learning_rate": 1.6778432872766553e-05, "loss": 0.2135, "step": 45648 }, { "epoch": 3.6980719377835385, "grad_norm": 0.06352110952138901, "learning_rate": 1.6773932220171927e-05, "loss": 0.1921, "step": 45649 }, { "epoch": 3.698152948801037, "grad_norm": 0.08296002447605133, "learning_rate": 1.6769431567577297e-05, "loss": 0.2022, "step": 45650 }, { "epoch": 3.6982339598185354, "grad_norm": 0.07762415707111359, "learning_rate": 1.6764930914982674e-05, "loss": 0.2247, "step": 45651 }, { "epoch": 3.6983149708360337, "grad_norm": 0.06467720866203308, "learning_rate": 1.6760430262388047e-05, "loss": 0.2153, "step": 45652 }, { "epoch": 3.6983959818535324, "grad_norm": 0.07439886033535004, "learning_rate": 1.675592960979342e-05, "loss": 0.2479, "step": 45653 }, { "epoch": 3.6984769928710306, "grad_norm": 0.05738019570708275, "learning_rate": 1.6751428957198795e-05, "loss": 0.212, "step": 45654 }, { "epoch": 3.698558003888529, "grad_norm": 0.07403714954853058, "learning_rate": 1.6746928304604168e-05, "loss": 0.2277, "step": 45655 }, { "epoch": 3.698639014906027, "grad_norm": 0.07722627371549606, "learning_rate": 1.6742427652009542e-05, "loss": 0.2012, "step": 45656 }, { "epoch": 3.6987200259235253, "grad_norm": 0.0807010680437088, "learning_rate": 1.6737926999414915e-05, "loss": 0.2054, "step": 45657 }, { "epoch": 3.698801036941024, "grad_norm": 0.08320476114749908, "learning_rate": 1.673342634682029e-05, "loss": 0.2418, "step": 45658 }, { "epoch": 3.6988820479585223, "grad_norm": 0.0703231692314148, "learning_rate": 1.6728925694225663e-05, "loss": 0.2332, "step": 45659 }, { "epoch": 3.6989630589760205, "grad_norm": 0.06997893750667572, "learning_rate": 1.6724425041631036e-05, "loss": 0.2516, "step": 45660 }, { "epoch": 3.699044069993519, "grad_norm": 0.06285632401704788, "learning_rate": 1.6719924389036413e-05, "loss": 0.2355, "step": 45661 }, { "epoch": 3.6991250810110174, "grad_norm": 0.07425893098115921, "learning_rate": 1.6715423736441783e-05, "loss": 0.2049, "step": 45662 }, { "epoch": 3.6992060920285157, "grad_norm": 0.07462365180253983, "learning_rate": 1.6710923083847157e-05, "loss": 0.217, "step": 45663 }, { "epoch": 3.6992871030460144, "grad_norm": 0.07210385799407959, "learning_rate": 1.6706422431252534e-05, "loss": 0.2285, "step": 45664 }, { "epoch": 3.6993681140635126, "grad_norm": 0.06601511687040329, "learning_rate": 1.6701921778657904e-05, "loss": 0.2469, "step": 45665 }, { "epoch": 3.699449125081011, "grad_norm": 0.07676741480827332, "learning_rate": 1.669742112606328e-05, "loss": 0.2265, "step": 45666 }, { "epoch": 3.6995301360985096, "grad_norm": 0.058619819581508636, "learning_rate": 1.6692920473468655e-05, "loss": 0.2046, "step": 45667 }, { "epoch": 3.699611147116008, "grad_norm": 0.07764916867017746, "learning_rate": 1.6688419820874025e-05, "loss": 0.2205, "step": 45668 }, { "epoch": 3.699692158133506, "grad_norm": 0.06930316984653473, "learning_rate": 1.6683919168279402e-05, "loss": 0.2196, "step": 45669 }, { "epoch": 3.6997731691510047, "grad_norm": 0.08031480759382248, "learning_rate": 1.6679418515684776e-05, "loss": 0.2509, "step": 45670 }, { "epoch": 3.699854180168503, "grad_norm": 0.07526817917823792, "learning_rate": 1.667491786309015e-05, "loss": 0.2521, "step": 45671 }, { "epoch": 3.6999351911860012, "grad_norm": 0.0837547555565834, "learning_rate": 1.6670417210495523e-05, "loss": 0.2373, "step": 45672 }, { "epoch": 3.7000162022035, "grad_norm": 0.11006888747215271, "learning_rate": 1.6665916557900896e-05, "loss": 0.2406, "step": 45673 }, { "epoch": 3.700097213220998, "grad_norm": 0.060090772807598114, "learning_rate": 1.666141590530627e-05, "loss": 0.2311, "step": 45674 }, { "epoch": 3.7001782242384964, "grad_norm": 0.07697679847478867, "learning_rate": 1.6656915252711644e-05, "loss": 0.2144, "step": 45675 }, { "epoch": 3.7002592352559946, "grad_norm": 0.075130395591259, "learning_rate": 1.6652414600117017e-05, "loss": 0.2075, "step": 45676 }, { "epoch": 3.7003402462734933, "grad_norm": 0.06719817966222763, "learning_rate": 1.664791394752239e-05, "loss": 0.262, "step": 45677 }, { "epoch": 3.7004212572909916, "grad_norm": 0.07148056477308273, "learning_rate": 1.6643413294927764e-05, "loss": 0.2244, "step": 45678 }, { "epoch": 3.70050226830849, "grad_norm": 0.07177882641553879, "learning_rate": 1.663891264233314e-05, "loss": 0.2164, "step": 45679 }, { "epoch": 3.700583279325988, "grad_norm": 0.06076192483305931, "learning_rate": 1.663441198973851e-05, "loss": 0.2045, "step": 45680 }, { "epoch": 3.7006642903434868, "grad_norm": 0.05700770765542984, "learning_rate": 1.6629911337143885e-05, "loss": 0.2023, "step": 45681 }, { "epoch": 3.700745301360985, "grad_norm": 0.07019194215536118, "learning_rate": 1.6625410684549262e-05, "loss": 0.2374, "step": 45682 }, { "epoch": 3.7008263123784833, "grad_norm": 0.059162236750125885, "learning_rate": 1.6620910031954632e-05, "loss": 0.2114, "step": 45683 }, { "epoch": 3.700907323395982, "grad_norm": 0.07613490521907806, "learning_rate": 1.6616409379360006e-05, "loss": 0.2293, "step": 45684 }, { "epoch": 3.70098833441348, "grad_norm": 0.08490029722452164, "learning_rate": 1.6611908726765383e-05, "loss": 0.1955, "step": 45685 }, { "epoch": 3.7010693454309784, "grad_norm": 0.07354255020618439, "learning_rate": 1.6607408074170757e-05, "loss": 0.2268, "step": 45686 }, { "epoch": 3.701150356448477, "grad_norm": 0.0803595781326294, "learning_rate": 1.660290742157613e-05, "loss": 0.2502, "step": 45687 }, { "epoch": 3.7012313674659754, "grad_norm": 0.07173190265893936, "learning_rate": 1.6598406768981504e-05, "loss": 0.2447, "step": 45688 }, { "epoch": 3.7013123784834736, "grad_norm": 0.08350685238838196, "learning_rate": 1.6593906116386877e-05, "loss": 0.2585, "step": 45689 }, { "epoch": 3.7013933895009723, "grad_norm": 0.06672927737236023, "learning_rate": 1.658940546379225e-05, "loss": 0.21, "step": 45690 }, { "epoch": 3.7014744005184705, "grad_norm": 0.07220529764890671, "learning_rate": 1.6584904811197625e-05, "loss": 0.2327, "step": 45691 }, { "epoch": 3.701555411535969, "grad_norm": 0.07617419213056564, "learning_rate": 1.6580404158602998e-05, "loss": 0.2163, "step": 45692 }, { "epoch": 3.7016364225534675, "grad_norm": 0.06999776512384415, "learning_rate": 1.6575903506008372e-05, "loss": 0.2125, "step": 45693 }, { "epoch": 3.7017174335709657, "grad_norm": 0.07320782542228699, "learning_rate": 1.6571402853413745e-05, "loss": 0.2258, "step": 45694 }, { "epoch": 3.701798444588464, "grad_norm": 0.07582980394363403, "learning_rate": 1.656690220081912e-05, "loss": 0.213, "step": 45695 }, { "epoch": 3.7018794556059627, "grad_norm": 0.05617687106132507, "learning_rate": 1.6562401548224493e-05, "loss": 0.1939, "step": 45696 }, { "epoch": 3.701960466623461, "grad_norm": 0.058806758373975754, "learning_rate": 1.6557900895629866e-05, "loss": 0.2449, "step": 45697 }, { "epoch": 3.702041477640959, "grad_norm": 0.0760156661272049, "learning_rate": 1.655340024303524e-05, "loss": 0.238, "step": 45698 }, { "epoch": 3.7021224886584574, "grad_norm": 0.06211948022246361, "learning_rate": 1.6548899590440613e-05, "loss": 0.2389, "step": 45699 }, { "epoch": 3.702203499675956, "grad_norm": 0.0635630339384079, "learning_rate": 1.654439893784599e-05, "loss": 0.217, "step": 45700 }, { "epoch": 3.7022845106934543, "grad_norm": 0.07854915410280228, "learning_rate": 1.653989828525136e-05, "loss": 0.2497, "step": 45701 }, { "epoch": 3.7023655217109526, "grad_norm": 0.06777413934469223, "learning_rate": 1.6535397632656734e-05, "loss": 0.24, "step": 45702 }, { "epoch": 3.702446532728451, "grad_norm": 0.06990412622690201, "learning_rate": 1.653089698006211e-05, "loss": 0.2094, "step": 45703 }, { "epoch": 3.7025275437459495, "grad_norm": 0.06906888633966446, "learning_rate": 1.6526396327467485e-05, "loss": 0.2311, "step": 45704 }, { "epoch": 3.7026085547634477, "grad_norm": 0.06097817420959473, "learning_rate": 1.652189567487286e-05, "loss": 0.1887, "step": 45705 }, { "epoch": 3.702689565780946, "grad_norm": 0.07380296289920807, "learning_rate": 1.6517395022278232e-05, "loss": 0.2236, "step": 45706 }, { "epoch": 3.7027705767984447, "grad_norm": 0.07449514418840408, "learning_rate": 1.6512894369683606e-05, "loss": 0.2055, "step": 45707 }, { "epoch": 3.702851587815943, "grad_norm": 0.07819279283285141, "learning_rate": 1.650839371708898e-05, "loss": 0.2339, "step": 45708 }, { "epoch": 3.702932598833441, "grad_norm": 0.06513992697000504, "learning_rate": 1.6503893064494353e-05, "loss": 0.216, "step": 45709 }, { "epoch": 3.70301360985094, "grad_norm": 0.08240225911140442, "learning_rate": 1.6499392411899726e-05, "loss": 0.2141, "step": 45710 }, { "epoch": 3.703094620868438, "grad_norm": 0.08612393587827682, "learning_rate": 1.64948917593051e-05, "loss": 0.2298, "step": 45711 }, { "epoch": 3.7031756318859363, "grad_norm": 0.06854256242513657, "learning_rate": 1.6490391106710474e-05, "loss": 0.2128, "step": 45712 }, { "epoch": 3.703256642903435, "grad_norm": 0.05733377858996391, "learning_rate": 1.6485890454115847e-05, "loss": 0.2351, "step": 45713 }, { "epoch": 3.7033376539209333, "grad_norm": 0.06590426713228226, "learning_rate": 1.648138980152122e-05, "loss": 0.2482, "step": 45714 }, { "epoch": 3.7034186649384315, "grad_norm": 0.06614598631858826, "learning_rate": 1.6476889148926595e-05, "loss": 0.2188, "step": 45715 }, { "epoch": 3.70349967595593, "grad_norm": 0.06795570999383926, "learning_rate": 1.6472388496331968e-05, "loss": 0.2445, "step": 45716 }, { "epoch": 3.7035806869734285, "grad_norm": 0.07279584556818008, "learning_rate": 1.6467887843737342e-05, "loss": 0.2135, "step": 45717 }, { "epoch": 3.7036616979909267, "grad_norm": 0.0802987739443779, "learning_rate": 1.6463387191142715e-05, "loss": 0.2216, "step": 45718 }, { "epoch": 3.7037427090084254, "grad_norm": 0.07141979783773422, "learning_rate": 1.645888653854809e-05, "loss": 0.2343, "step": 45719 }, { "epoch": 3.7038237200259236, "grad_norm": 0.058926984667778015, "learning_rate": 1.6454385885953463e-05, "loss": 0.2207, "step": 45720 }, { "epoch": 3.703904731043422, "grad_norm": 0.07551530748605728, "learning_rate": 1.644988523335884e-05, "loss": 0.2285, "step": 45721 }, { "epoch": 3.70398574206092, "grad_norm": 0.08013749122619629, "learning_rate": 1.6445384580764213e-05, "loss": 0.2577, "step": 45722 }, { "epoch": 3.704066753078419, "grad_norm": 0.0685802772641182, "learning_rate": 1.6440883928169583e-05, "loss": 0.2172, "step": 45723 }, { "epoch": 3.704147764095917, "grad_norm": 0.07025080174207687, "learning_rate": 1.643638327557496e-05, "loss": 0.2394, "step": 45724 }, { "epoch": 3.7042287751134153, "grad_norm": 0.07615825533866882, "learning_rate": 1.6431882622980334e-05, "loss": 0.2186, "step": 45725 }, { "epoch": 3.7043097861309136, "grad_norm": 0.07653851807117462, "learning_rate": 1.6427381970385708e-05, "loss": 0.2106, "step": 45726 }, { "epoch": 3.7043907971484122, "grad_norm": 0.07739897817373276, "learning_rate": 1.642288131779108e-05, "loss": 0.2066, "step": 45727 }, { "epoch": 3.7044718081659105, "grad_norm": 0.07978235185146332, "learning_rate": 1.6418380665196455e-05, "loss": 0.252, "step": 45728 }, { "epoch": 3.7045528191834087, "grad_norm": 0.07335931062698364, "learning_rate": 1.641388001260183e-05, "loss": 0.1746, "step": 45729 }, { "epoch": 3.7046338302009074, "grad_norm": 0.08139511197805405, "learning_rate": 1.6409379360007202e-05, "loss": 0.2766, "step": 45730 }, { "epoch": 3.7047148412184057, "grad_norm": 0.06404054909944534, "learning_rate": 1.6404878707412576e-05, "loss": 0.1958, "step": 45731 }, { "epoch": 3.704795852235904, "grad_norm": 0.062324702739715576, "learning_rate": 1.640037805481795e-05, "loss": 0.2066, "step": 45732 }, { "epoch": 3.7048768632534026, "grad_norm": 0.08526472002267838, "learning_rate": 1.6395877402223323e-05, "loss": 0.2312, "step": 45733 }, { "epoch": 3.704957874270901, "grad_norm": 0.06586598604917526, "learning_rate": 1.6391376749628696e-05, "loss": 0.2201, "step": 45734 }, { "epoch": 3.705038885288399, "grad_norm": 0.08027635514736176, "learning_rate": 1.638687609703407e-05, "loss": 0.2203, "step": 45735 }, { "epoch": 3.7051198963058978, "grad_norm": 0.0849221721291542, "learning_rate": 1.6382375444439444e-05, "loss": 0.2481, "step": 45736 }, { "epoch": 3.705200907323396, "grad_norm": 0.08749920129776001, "learning_rate": 1.637787479184482e-05, "loss": 0.2237, "step": 45737 }, { "epoch": 3.7052819183408943, "grad_norm": 0.07888040691614151, "learning_rate": 1.637337413925019e-05, "loss": 0.224, "step": 45738 }, { "epoch": 3.705362929358393, "grad_norm": 0.08087297528982162, "learning_rate": 1.6368873486655568e-05, "loss": 0.2175, "step": 45739 }, { "epoch": 3.705443940375891, "grad_norm": 0.0801047757267952, "learning_rate": 1.636437283406094e-05, "loss": 0.2298, "step": 45740 }, { "epoch": 3.7055249513933894, "grad_norm": 0.06400655955076218, "learning_rate": 1.635987218146631e-05, "loss": 0.2214, "step": 45741 }, { "epoch": 3.705605962410888, "grad_norm": 0.06360850483179092, "learning_rate": 1.635537152887169e-05, "loss": 0.2173, "step": 45742 }, { "epoch": 3.7056869734283864, "grad_norm": 0.06993993371725082, "learning_rate": 1.6350870876277062e-05, "loss": 0.2188, "step": 45743 }, { "epoch": 3.7057679844458846, "grad_norm": 0.08145015686750412, "learning_rate": 1.6346370223682432e-05, "loss": 0.2529, "step": 45744 }, { "epoch": 3.705848995463383, "grad_norm": 0.08533214777708054, "learning_rate": 1.634186957108781e-05, "loss": 0.2268, "step": 45745 }, { "epoch": 3.7059300064808816, "grad_norm": 0.08632870018482208, "learning_rate": 1.6337368918493183e-05, "loss": 0.2359, "step": 45746 }, { "epoch": 3.70601101749838, "grad_norm": 0.07463864982128143, "learning_rate": 1.6332868265898557e-05, "loss": 0.2247, "step": 45747 }, { "epoch": 3.706092028515878, "grad_norm": 0.06898374110460281, "learning_rate": 1.632836761330393e-05, "loss": 0.2013, "step": 45748 }, { "epoch": 3.7061730395333763, "grad_norm": 0.06634795665740967, "learning_rate": 1.6323866960709304e-05, "loss": 0.1984, "step": 45749 }, { "epoch": 3.706254050550875, "grad_norm": 0.08424997329711914, "learning_rate": 1.6319366308114677e-05, "loss": 0.2292, "step": 45750 }, { "epoch": 3.7063350615683732, "grad_norm": 0.08333491533994675, "learning_rate": 1.631486565552005e-05, "loss": 0.2562, "step": 45751 }, { "epoch": 3.7064160725858715, "grad_norm": 0.07007382810115814, "learning_rate": 1.6310365002925425e-05, "loss": 0.2016, "step": 45752 }, { "epoch": 3.70649708360337, "grad_norm": 0.07411225140094757, "learning_rate": 1.6305864350330798e-05, "loss": 0.2552, "step": 45753 }, { "epoch": 3.7065780946208684, "grad_norm": 0.08287201076745987, "learning_rate": 1.6301363697736172e-05, "loss": 0.2521, "step": 45754 }, { "epoch": 3.7066591056383666, "grad_norm": 0.07097062468528748, "learning_rate": 1.629686304514155e-05, "loss": 0.2275, "step": 45755 }, { "epoch": 3.7067401166558653, "grad_norm": 0.07343895733356476, "learning_rate": 1.629236239254692e-05, "loss": 0.197, "step": 45756 }, { "epoch": 3.7068211276733636, "grad_norm": 0.06564123928546906, "learning_rate": 1.6287861739952293e-05, "loss": 0.2087, "step": 45757 }, { "epoch": 3.706902138690862, "grad_norm": 0.0649932399392128, "learning_rate": 1.628336108735767e-05, "loss": 0.2356, "step": 45758 }, { "epoch": 3.7069831497083605, "grad_norm": 0.07513502985239029, "learning_rate": 1.627886043476304e-05, "loss": 0.2562, "step": 45759 }, { "epoch": 3.7070641607258588, "grad_norm": 0.07147255539894104, "learning_rate": 1.6274359782168417e-05, "loss": 0.2592, "step": 45760 }, { "epoch": 3.707145171743357, "grad_norm": 0.0936325266957283, "learning_rate": 1.626985912957379e-05, "loss": 0.2757, "step": 45761 }, { "epoch": 3.7072261827608557, "grad_norm": 0.06833754479885101, "learning_rate": 1.626535847697916e-05, "loss": 0.2258, "step": 45762 }, { "epoch": 3.707307193778354, "grad_norm": 0.08121831715106964, "learning_rate": 1.6260857824384538e-05, "loss": 0.211, "step": 45763 }, { "epoch": 3.707388204795852, "grad_norm": 0.0541922003030777, "learning_rate": 1.625635717178991e-05, "loss": 0.1754, "step": 45764 }, { "epoch": 3.707469215813351, "grad_norm": 0.07351001352071762, "learning_rate": 1.6251856519195285e-05, "loss": 0.2369, "step": 45765 }, { "epoch": 3.707550226830849, "grad_norm": 0.1002897396683693, "learning_rate": 1.624735586660066e-05, "loss": 0.2499, "step": 45766 }, { "epoch": 3.7076312378483474, "grad_norm": 0.053547605872154236, "learning_rate": 1.6242855214006032e-05, "loss": 0.2213, "step": 45767 }, { "epoch": 3.7077122488658456, "grad_norm": 0.06925728917121887, "learning_rate": 1.6238354561411406e-05, "loss": 0.2017, "step": 45768 }, { "epoch": 3.7077932598833443, "grad_norm": 0.06350000202655792, "learning_rate": 1.623385390881678e-05, "loss": 0.2297, "step": 45769 }, { "epoch": 3.7078742709008425, "grad_norm": 0.06750939041376114, "learning_rate": 1.6229353256222153e-05, "loss": 0.1952, "step": 45770 }, { "epoch": 3.707955281918341, "grad_norm": 0.0888800173997879, "learning_rate": 1.6224852603627526e-05, "loss": 0.2372, "step": 45771 }, { "epoch": 3.708036292935839, "grad_norm": 0.06730051338672638, "learning_rate": 1.62203519510329e-05, "loss": 0.2262, "step": 45772 }, { "epoch": 3.7081173039533377, "grad_norm": 0.09187749773263931, "learning_rate": 1.6215851298438277e-05, "loss": 0.2275, "step": 45773 }, { "epoch": 3.708198314970836, "grad_norm": 0.06515898555517197, "learning_rate": 1.6211350645843647e-05, "loss": 0.199, "step": 45774 }, { "epoch": 3.708279325988334, "grad_norm": 0.07909893989562988, "learning_rate": 1.620684999324902e-05, "loss": 0.2394, "step": 45775 }, { "epoch": 3.708360337005833, "grad_norm": 0.0797453448176384, "learning_rate": 1.6202349340654398e-05, "loss": 0.2138, "step": 45776 }, { "epoch": 3.708441348023331, "grad_norm": 0.06687083095312119, "learning_rate": 1.6197848688059768e-05, "loss": 0.228, "step": 45777 }, { "epoch": 3.7085223590408294, "grad_norm": 0.0814642459154129, "learning_rate": 1.6193348035465145e-05, "loss": 0.1993, "step": 45778 }, { "epoch": 3.708603370058328, "grad_norm": 0.07986731082201004, "learning_rate": 1.618884738287052e-05, "loss": 0.251, "step": 45779 }, { "epoch": 3.7086843810758263, "grad_norm": 0.06904210150241852, "learning_rate": 1.618434673027589e-05, "loss": 0.2449, "step": 45780 }, { "epoch": 3.7087653920933246, "grad_norm": 0.06334845721721649, "learning_rate": 1.6179846077681266e-05, "loss": 0.2306, "step": 45781 }, { "epoch": 3.7088464031108233, "grad_norm": 0.06799927353858948, "learning_rate": 1.617534542508664e-05, "loss": 0.2138, "step": 45782 }, { "epoch": 3.7089274141283215, "grad_norm": 0.07648967951536179, "learning_rate": 1.617084477249201e-05, "loss": 0.2229, "step": 45783 }, { "epoch": 3.7090084251458197, "grad_norm": 0.05069386214017868, "learning_rate": 1.6166344119897387e-05, "loss": 0.1997, "step": 45784 }, { "epoch": 3.7090894361633184, "grad_norm": 0.07770735770463943, "learning_rate": 1.616184346730276e-05, "loss": 0.2248, "step": 45785 }, { "epoch": 3.7091704471808167, "grad_norm": 0.08397640287876129, "learning_rate": 1.6157342814708134e-05, "loss": 0.2367, "step": 45786 }, { "epoch": 3.709251458198315, "grad_norm": 0.059977419674396515, "learning_rate": 1.6152842162113508e-05, "loss": 0.2107, "step": 45787 }, { "epoch": 3.7093324692158136, "grad_norm": 0.0799662172794342, "learning_rate": 1.614834150951888e-05, "loss": 0.2496, "step": 45788 }, { "epoch": 3.709413480233312, "grad_norm": 0.058951485902071, "learning_rate": 1.6143840856924255e-05, "loss": 0.211, "step": 45789 }, { "epoch": 3.70949449125081, "grad_norm": 0.07071194797754288, "learning_rate": 1.613934020432963e-05, "loss": 0.2352, "step": 45790 }, { "epoch": 3.7095755022683083, "grad_norm": 0.05729727819561958, "learning_rate": 1.6134839551735005e-05, "loss": 0.1978, "step": 45791 }, { "epoch": 3.709656513285807, "grad_norm": 0.06328321993350983, "learning_rate": 1.6130338899140376e-05, "loss": 0.2145, "step": 45792 }, { "epoch": 3.7097375243033053, "grad_norm": 0.07447624206542969, "learning_rate": 1.612583824654575e-05, "loss": 0.2312, "step": 45793 }, { "epoch": 3.7098185353208035, "grad_norm": 0.06918629258871078, "learning_rate": 1.6121337593951126e-05, "loss": 0.2223, "step": 45794 }, { "epoch": 3.7098995463383018, "grad_norm": 0.059608764946460724, "learning_rate": 1.6116836941356496e-05, "loss": 0.2501, "step": 45795 }, { "epoch": 3.7099805573558005, "grad_norm": 0.07887037843465805, "learning_rate": 1.611233628876187e-05, "loss": 0.2245, "step": 45796 }, { "epoch": 3.7100615683732987, "grad_norm": 0.06873272359371185, "learning_rate": 1.6107835636167247e-05, "loss": 0.2225, "step": 45797 }, { "epoch": 3.710142579390797, "grad_norm": 0.09617675095796585, "learning_rate": 1.6103334983572617e-05, "loss": 0.2345, "step": 45798 }, { "epoch": 3.7102235904082956, "grad_norm": 0.07761441171169281, "learning_rate": 1.6098834330977994e-05, "loss": 0.2673, "step": 45799 }, { "epoch": 3.710304601425794, "grad_norm": 0.09091390669345856, "learning_rate": 1.6094333678383368e-05, "loss": 0.2659, "step": 45800 }, { "epoch": 3.710385612443292, "grad_norm": 0.06801667809486389, "learning_rate": 1.6089833025788738e-05, "loss": 0.2206, "step": 45801 }, { "epoch": 3.710466623460791, "grad_norm": 0.07345068454742432, "learning_rate": 1.6085332373194115e-05, "loss": 0.2409, "step": 45802 }, { "epoch": 3.710547634478289, "grad_norm": 0.08711735904216766, "learning_rate": 1.608083172059949e-05, "loss": 0.2144, "step": 45803 }, { "epoch": 3.7106286454957873, "grad_norm": 0.06707657873630524, "learning_rate": 1.607633106800486e-05, "loss": 0.2124, "step": 45804 }, { "epoch": 3.710709656513286, "grad_norm": 0.08471059054136276, "learning_rate": 1.6071830415410236e-05, "loss": 0.2525, "step": 45805 }, { "epoch": 3.7107906675307842, "grad_norm": 0.0858485996723175, "learning_rate": 1.606732976281561e-05, "loss": 0.2109, "step": 45806 }, { "epoch": 3.7108716785482825, "grad_norm": 0.08600111305713654, "learning_rate": 1.6062829110220983e-05, "loss": 0.2317, "step": 45807 }, { "epoch": 3.710952689565781, "grad_norm": 0.06662708520889282, "learning_rate": 1.6058328457626357e-05, "loss": 0.2328, "step": 45808 }, { "epoch": 3.7110337005832794, "grad_norm": 0.05524429306387901, "learning_rate": 1.605382780503173e-05, "loss": 0.2432, "step": 45809 }, { "epoch": 3.7111147116007777, "grad_norm": 0.07598711550235748, "learning_rate": 1.6049327152437104e-05, "loss": 0.2376, "step": 45810 }, { "epoch": 3.7111957226182763, "grad_norm": 0.0858222246170044, "learning_rate": 1.6044826499842477e-05, "loss": 0.2052, "step": 45811 }, { "epoch": 3.7112767336357746, "grad_norm": 0.06710031628608704, "learning_rate": 1.6040325847247854e-05, "loss": 0.2005, "step": 45812 }, { "epoch": 3.711357744653273, "grad_norm": 0.07461856305599213, "learning_rate": 1.6035825194653225e-05, "loss": 0.2281, "step": 45813 }, { "epoch": 3.711438755670771, "grad_norm": 0.06366579234600067, "learning_rate": 1.6031324542058598e-05, "loss": 0.2136, "step": 45814 }, { "epoch": 3.7115197666882693, "grad_norm": 0.07957981526851654, "learning_rate": 1.6026823889463975e-05, "loss": 0.2297, "step": 45815 }, { "epoch": 3.711600777705768, "grad_norm": 0.06554809212684631, "learning_rate": 1.6022323236869345e-05, "loss": 0.23, "step": 45816 }, { "epoch": 3.7116817887232663, "grad_norm": 0.06822206825017929, "learning_rate": 1.601782258427472e-05, "loss": 0.204, "step": 45817 }, { "epoch": 3.7117627997407645, "grad_norm": 0.07761096954345703, "learning_rate": 1.6013321931680096e-05, "loss": 0.2212, "step": 45818 }, { "epoch": 3.711843810758263, "grad_norm": 0.05761200562119484, "learning_rate": 1.6008821279085466e-05, "loss": 0.2218, "step": 45819 }, { "epoch": 3.7119248217757614, "grad_norm": 0.06800486892461777, "learning_rate": 1.6004320626490843e-05, "loss": 0.202, "step": 45820 }, { "epoch": 3.7120058327932597, "grad_norm": 0.08612167090177536, "learning_rate": 1.5999819973896217e-05, "loss": 0.2504, "step": 45821 }, { "epoch": 3.7120868438107584, "grad_norm": 0.0777377337217331, "learning_rate": 1.5995319321301587e-05, "loss": 0.2192, "step": 45822 }, { "epoch": 3.7121678548282566, "grad_norm": 0.0771232396364212, "learning_rate": 1.5990818668706964e-05, "loss": 0.233, "step": 45823 }, { "epoch": 3.712248865845755, "grad_norm": 0.0695451945066452, "learning_rate": 1.5986318016112338e-05, "loss": 0.2525, "step": 45824 }, { "epoch": 3.7123298768632536, "grad_norm": 0.08157804608345032, "learning_rate": 1.598181736351771e-05, "loss": 0.2623, "step": 45825 }, { "epoch": 3.712410887880752, "grad_norm": 0.07781726866960526, "learning_rate": 1.5977316710923085e-05, "loss": 0.2288, "step": 45826 }, { "epoch": 3.71249189889825, "grad_norm": 0.08144915848970413, "learning_rate": 1.597281605832846e-05, "loss": 0.245, "step": 45827 }, { "epoch": 3.7125729099157487, "grad_norm": 0.0604521706700325, "learning_rate": 1.5968315405733832e-05, "loss": 0.1962, "step": 45828 }, { "epoch": 3.712653920933247, "grad_norm": 0.056658048182725906, "learning_rate": 1.5963814753139206e-05, "loss": 0.1805, "step": 45829 }, { "epoch": 3.712734931950745, "grad_norm": 0.06701702624559402, "learning_rate": 1.595931410054458e-05, "loss": 0.2013, "step": 45830 }, { "epoch": 3.712815942968244, "grad_norm": 0.07957711070775986, "learning_rate": 1.5954813447949953e-05, "loss": 0.2567, "step": 45831 }, { "epoch": 3.712896953985742, "grad_norm": 0.07916118204593658, "learning_rate": 1.5950312795355326e-05, "loss": 0.2017, "step": 45832 }, { "epoch": 3.7129779650032404, "grad_norm": 0.06471701711416245, "learning_rate": 1.5945812142760703e-05, "loss": 0.2304, "step": 45833 }, { "epoch": 3.713058976020739, "grad_norm": 0.0795748233795166, "learning_rate": 1.5941311490166074e-05, "loss": 0.2586, "step": 45834 }, { "epoch": 3.7131399870382373, "grad_norm": 0.07704074680805206, "learning_rate": 1.5936810837571447e-05, "loss": 0.2104, "step": 45835 }, { "epoch": 3.7132209980557356, "grad_norm": 0.09900951385498047, "learning_rate": 1.5932310184976824e-05, "loss": 0.2398, "step": 45836 }, { "epoch": 3.713302009073234, "grad_norm": 0.08793462067842484, "learning_rate": 1.5927809532382194e-05, "loss": 0.2122, "step": 45837 }, { "epoch": 3.713383020090732, "grad_norm": 0.07224460691213608, "learning_rate": 1.592330887978757e-05, "loss": 0.2259, "step": 45838 }, { "epoch": 3.7134640311082308, "grad_norm": 0.06400369852781296, "learning_rate": 1.5918808227192945e-05, "loss": 0.2213, "step": 45839 }, { "epoch": 3.713545042125729, "grad_norm": 0.06662207841873169, "learning_rate": 1.5914307574598315e-05, "loss": 0.2238, "step": 45840 }, { "epoch": 3.7136260531432272, "grad_norm": 0.07715484499931335, "learning_rate": 1.5909806922003692e-05, "loss": 0.2254, "step": 45841 }, { "epoch": 3.713707064160726, "grad_norm": 0.08571289479732513, "learning_rate": 1.5905306269409066e-05, "loss": 0.227, "step": 45842 }, { "epoch": 3.713788075178224, "grad_norm": 0.08624354749917984, "learning_rate": 1.5900805616814436e-05, "loss": 0.2444, "step": 45843 }, { "epoch": 3.7138690861957224, "grad_norm": 0.07141070067882538, "learning_rate": 1.5896304964219813e-05, "loss": 0.2302, "step": 45844 }, { "epoch": 3.713950097213221, "grad_norm": 0.07181796431541443, "learning_rate": 1.5891804311625187e-05, "loss": 0.2712, "step": 45845 }, { "epoch": 3.7140311082307194, "grad_norm": 0.08645230531692505, "learning_rate": 1.588730365903056e-05, "loss": 0.2544, "step": 45846 }, { "epoch": 3.7141121192482176, "grad_norm": 0.08005709946155548, "learning_rate": 1.5882803006435934e-05, "loss": 0.2271, "step": 45847 }, { "epoch": 3.7141931302657163, "grad_norm": 0.06787635385990143, "learning_rate": 1.5878302353841307e-05, "loss": 0.2092, "step": 45848 }, { "epoch": 3.7142741412832145, "grad_norm": 0.07872307300567627, "learning_rate": 1.587380170124668e-05, "loss": 0.1951, "step": 45849 }, { "epoch": 3.714355152300713, "grad_norm": 0.06552909314632416, "learning_rate": 1.5869301048652055e-05, "loss": 0.2069, "step": 45850 }, { "epoch": 3.7144361633182115, "grad_norm": 0.06676136702299118, "learning_rate": 1.586480039605743e-05, "loss": 0.2355, "step": 45851 }, { "epoch": 3.7145171743357097, "grad_norm": 0.07451087981462479, "learning_rate": 1.5860299743462802e-05, "loss": 0.2289, "step": 45852 }, { "epoch": 3.714598185353208, "grad_norm": 0.08217606693506241, "learning_rate": 1.5855799090868175e-05, "loss": 0.2305, "step": 45853 }, { "epoch": 3.7146791963707066, "grad_norm": 0.08405117690563202, "learning_rate": 1.5851298438273552e-05, "loss": 0.2559, "step": 45854 }, { "epoch": 3.714760207388205, "grad_norm": 0.049762554466724396, "learning_rate": 1.5846797785678923e-05, "loss": 0.229, "step": 45855 }, { "epoch": 3.714841218405703, "grad_norm": 0.07197723537683487, "learning_rate": 1.5842297133084296e-05, "loss": 0.2394, "step": 45856 }, { "epoch": 3.714922229423202, "grad_norm": 0.07479429244995117, "learning_rate": 1.5837796480489673e-05, "loss": 0.2368, "step": 45857 }, { "epoch": 3.7150032404407, "grad_norm": 0.0639149472117424, "learning_rate": 1.5833295827895044e-05, "loss": 0.2262, "step": 45858 }, { "epoch": 3.7150842514581983, "grad_norm": 0.06918316334486008, "learning_rate": 1.582879517530042e-05, "loss": 0.2128, "step": 45859 }, { "epoch": 3.7151652624756966, "grad_norm": 0.08518219739198685, "learning_rate": 1.5824294522705794e-05, "loss": 0.265, "step": 45860 }, { "epoch": 3.715246273493195, "grad_norm": 0.07202792167663574, "learning_rate": 1.5819793870111164e-05, "loss": 0.2217, "step": 45861 }, { "epoch": 3.7153272845106935, "grad_norm": 0.06419474631547928, "learning_rate": 1.581529321751654e-05, "loss": 0.2155, "step": 45862 }, { "epoch": 3.7154082955281917, "grad_norm": 0.07064595073461533, "learning_rate": 1.5810792564921915e-05, "loss": 0.1976, "step": 45863 }, { "epoch": 3.71548930654569, "grad_norm": 0.0711565911769867, "learning_rate": 1.580629191232729e-05, "loss": 0.2549, "step": 45864 }, { "epoch": 3.7155703175631887, "grad_norm": 0.06996343284845352, "learning_rate": 1.5801791259732662e-05, "loss": 0.1958, "step": 45865 }, { "epoch": 3.715651328580687, "grad_norm": 0.06321109086275101, "learning_rate": 1.5797290607138036e-05, "loss": 0.2098, "step": 45866 }, { "epoch": 3.715732339598185, "grad_norm": 0.07822613418102264, "learning_rate": 1.579278995454341e-05, "loss": 0.2079, "step": 45867 }, { "epoch": 3.715813350615684, "grad_norm": 0.08399234712123871, "learning_rate": 1.5788289301948783e-05, "loss": 0.2388, "step": 45868 }, { "epoch": 3.715894361633182, "grad_norm": 0.06259548664093018, "learning_rate": 1.5783788649354157e-05, "loss": 0.2342, "step": 45869 }, { "epoch": 3.7159753726506803, "grad_norm": 0.08495364338159561, "learning_rate": 1.577928799675953e-05, "loss": 0.2148, "step": 45870 }, { "epoch": 3.716056383668179, "grad_norm": 0.0643046498298645, "learning_rate": 1.5774787344164904e-05, "loss": 0.2099, "step": 45871 }, { "epoch": 3.7161373946856773, "grad_norm": 0.06250204145908356, "learning_rate": 1.577028669157028e-05, "loss": 0.1988, "step": 45872 }, { "epoch": 3.7162184057031755, "grad_norm": 0.0693424865603447, "learning_rate": 1.576578603897565e-05, "loss": 0.2287, "step": 45873 }, { "epoch": 3.716299416720674, "grad_norm": 0.07121361792087555, "learning_rate": 1.5761285386381025e-05, "loss": 0.235, "step": 45874 }, { "epoch": 3.7163804277381725, "grad_norm": 0.07043685764074326, "learning_rate": 1.57567847337864e-05, "loss": 0.2493, "step": 45875 }, { "epoch": 3.7164614387556707, "grad_norm": 0.07397102564573288, "learning_rate": 1.5752284081191772e-05, "loss": 0.2426, "step": 45876 }, { "epoch": 3.7165424497731694, "grad_norm": 0.0942964181303978, "learning_rate": 1.574778342859715e-05, "loss": 0.2565, "step": 45877 }, { "epoch": 3.7166234607906676, "grad_norm": 0.08170833438634872, "learning_rate": 1.5743282776002522e-05, "loss": 0.2895, "step": 45878 }, { "epoch": 3.716704471808166, "grad_norm": 0.07267139106988907, "learning_rate": 1.5738782123407893e-05, "loss": 0.22, "step": 45879 }, { "epoch": 3.7167854828256646, "grad_norm": 0.06435095518827438, "learning_rate": 1.573428147081327e-05, "loss": 0.2089, "step": 45880 }, { "epoch": 3.716866493843163, "grad_norm": 0.07378797233104706, "learning_rate": 1.5729780818218643e-05, "loss": 0.2211, "step": 45881 }, { "epoch": 3.716947504860661, "grad_norm": 0.08473730087280273, "learning_rate": 1.5725280165624017e-05, "loss": 0.2114, "step": 45882 }, { "epoch": 3.7170285158781593, "grad_norm": 0.06100822240114212, "learning_rate": 1.572077951302939e-05, "loss": 0.2234, "step": 45883 }, { "epoch": 3.7171095268956575, "grad_norm": 0.08396106958389282, "learning_rate": 1.5716278860434764e-05, "loss": 0.2526, "step": 45884 }, { "epoch": 3.7171905379131562, "grad_norm": 0.08896417915821075, "learning_rate": 1.5711778207840138e-05, "loss": 0.2871, "step": 45885 }, { "epoch": 3.7172715489306545, "grad_norm": 0.06886778771877289, "learning_rate": 1.570727755524551e-05, "loss": 0.2309, "step": 45886 }, { "epoch": 3.7173525599481527, "grad_norm": 0.06046053022146225, "learning_rate": 1.5702776902650885e-05, "loss": 0.2314, "step": 45887 }, { "epoch": 3.7174335709656514, "grad_norm": 0.0786987841129303, "learning_rate": 1.569827625005626e-05, "loss": 0.2126, "step": 45888 }, { "epoch": 3.7175145819831497, "grad_norm": 0.08876615762710571, "learning_rate": 1.5693775597461632e-05, "loss": 0.2403, "step": 45889 }, { "epoch": 3.717595593000648, "grad_norm": 0.06113407388329506, "learning_rate": 1.5689274944867006e-05, "loss": 0.211, "step": 45890 }, { "epoch": 3.7176766040181466, "grad_norm": 0.06730061769485474, "learning_rate": 1.568477429227238e-05, "loss": 0.2186, "step": 45891 }, { "epoch": 3.717757615035645, "grad_norm": 0.07700332999229431, "learning_rate": 1.5680273639677753e-05, "loss": 0.2191, "step": 45892 }, { "epoch": 3.717838626053143, "grad_norm": 0.06986545771360397, "learning_rate": 1.567577298708313e-05, "loss": 0.2744, "step": 45893 }, { "epoch": 3.7179196370706418, "grad_norm": 0.07376525551080704, "learning_rate": 1.56712723344885e-05, "loss": 0.2353, "step": 45894 }, { "epoch": 3.71800064808814, "grad_norm": 0.08193989098072052, "learning_rate": 1.5666771681893874e-05, "loss": 0.25, "step": 45895 }, { "epoch": 3.7180816591056383, "grad_norm": 0.06663582473993301, "learning_rate": 1.566227102929925e-05, "loss": 0.2445, "step": 45896 }, { "epoch": 3.718162670123137, "grad_norm": 0.08676223456859589, "learning_rate": 1.565777037670462e-05, "loss": 0.2503, "step": 45897 }, { "epoch": 3.718243681140635, "grad_norm": 0.05338404327630997, "learning_rate": 1.5653269724109998e-05, "loss": 0.197, "step": 45898 }, { "epoch": 3.7183246921581334, "grad_norm": 0.08339407294988632, "learning_rate": 1.564876907151537e-05, "loss": 0.2584, "step": 45899 }, { "epoch": 3.718405703175632, "grad_norm": 0.08016695827245712, "learning_rate": 1.5644268418920745e-05, "loss": 0.2498, "step": 45900 }, { "epoch": 3.7184867141931304, "grad_norm": 0.06563158333301544, "learning_rate": 1.563976776632612e-05, "loss": 0.2013, "step": 45901 }, { "epoch": 3.7185677252106286, "grad_norm": 0.07572820782661438, "learning_rate": 1.5635267113731492e-05, "loss": 0.2516, "step": 45902 }, { "epoch": 3.718648736228127, "grad_norm": 0.0666859969496727, "learning_rate": 1.5630766461136866e-05, "loss": 0.2378, "step": 45903 }, { "epoch": 3.7187297472456255, "grad_norm": 0.0793486088514328, "learning_rate": 1.562626580854224e-05, "loss": 0.2311, "step": 45904 }, { "epoch": 3.718810758263124, "grad_norm": 0.07341185212135315, "learning_rate": 1.5621765155947613e-05, "loss": 0.2338, "step": 45905 }, { "epoch": 3.718891769280622, "grad_norm": 0.07319411635398865, "learning_rate": 1.5617264503352987e-05, "loss": 0.1935, "step": 45906 }, { "epoch": 3.7189727802981203, "grad_norm": 0.0693352222442627, "learning_rate": 1.561276385075836e-05, "loss": 0.2155, "step": 45907 }, { "epoch": 3.719053791315619, "grad_norm": 0.07195544987916946, "learning_rate": 1.5608263198163734e-05, "loss": 0.2404, "step": 45908 }, { "epoch": 3.719134802333117, "grad_norm": 0.08030025660991669, "learning_rate": 1.5603762545569107e-05, "loss": 0.2222, "step": 45909 }, { "epoch": 3.7192158133506155, "grad_norm": 0.0607793889939785, "learning_rate": 1.559926189297448e-05, "loss": 0.1801, "step": 45910 }, { "epoch": 3.719296824368114, "grad_norm": 0.0874897688627243, "learning_rate": 1.5594761240379858e-05, "loss": 0.2549, "step": 45911 }, { "epoch": 3.7193778353856124, "grad_norm": 0.08436858654022217, "learning_rate": 1.5590260587785228e-05, "loss": 0.23, "step": 45912 }, { "epoch": 3.7194588464031106, "grad_norm": 0.06420475244522095, "learning_rate": 1.5585759935190602e-05, "loss": 0.216, "step": 45913 }, { "epoch": 3.7195398574206093, "grad_norm": 0.0704539343714714, "learning_rate": 1.558125928259598e-05, "loss": 0.258, "step": 45914 }, { "epoch": 3.7196208684381076, "grad_norm": 0.07884382456541061, "learning_rate": 1.557675863000135e-05, "loss": 0.2243, "step": 45915 }, { "epoch": 3.719701879455606, "grad_norm": 0.07424849271774292, "learning_rate": 1.5572257977406723e-05, "loss": 0.2347, "step": 45916 }, { "epoch": 3.7197828904731045, "grad_norm": 0.0690733939409256, "learning_rate": 1.55677573248121e-05, "loss": 0.2222, "step": 45917 }, { "epoch": 3.7198639014906028, "grad_norm": 0.06833771616220474, "learning_rate": 1.5563256672217473e-05, "loss": 0.2025, "step": 45918 }, { "epoch": 3.719944912508101, "grad_norm": 0.07035204023122787, "learning_rate": 1.5558756019622847e-05, "loss": 0.2337, "step": 45919 }, { "epoch": 3.7200259235255997, "grad_norm": 0.07200058549642563, "learning_rate": 1.555425536702822e-05, "loss": 0.2247, "step": 45920 }, { "epoch": 3.720106934543098, "grad_norm": 0.07681140303611755, "learning_rate": 1.5549754714433594e-05, "loss": 0.2033, "step": 45921 }, { "epoch": 3.720187945560596, "grad_norm": 0.076214499771595, "learning_rate": 1.5545254061838968e-05, "loss": 0.237, "step": 45922 }, { "epoch": 3.720268956578095, "grad_norm": 0.060587845742702484, "learning_rate": 1.554075340924434e-05, "loss": 0.2238, "step": 45923 }, { "epoch": 3.720349967595593, "grad_norm": 0.05092794820666313, "learning_rate": 1.5536252756649715e-05, "loss": 0.2015, "step": 45924 }, { "epoch": 3.7204309786130914, "grad_norm": 0.08442278951406479, "learning_rate": 1.553175210405509e-05, "loss": 0.2763, "step": 45925 }, { "epoch": 3.7205119896305896, "grad_norm": 0.07512500137090683, "learning_rate": 1.5527251451460462e-05, "loss": 0.2318, "step": 45926 }, { "epoch": 3.7205930006480883, "grad_norm": 0.07768537849187851, "learning_rate": 1.5522750798865836e-05, "loss": 0.2908, "step": 45927 }, { "epoch": 3.7206740116655865, "grad_norm": 0.07198259234428406, "learning_rate": 1.551825014627121e-05, "loss": 0.2386, "step": 45928 }, { "epoch": 3.720755022683085, "grad_norm": 0.06927481293678284, "learning_rate": 1.5513749493676583e-05, "loss": 0.221, "step": 45929 }, { "epoch": 3.720836033700583, "grad_norm": 0.06755954027175903, "learning_rate": 1.5509248841081956e-05, "loss": 0.2161, "step": 45930 }, { "epoch": 3.7209170447180817, "grad_norm": 0.08039725571870804, "learning_rate": 1.550474818848733e-05, "loss": 0.2205, "step": 45931 }, { "epoch": 3.72099805573558, "grad_norm": 0.06722427904605865, "learning_rate": 1.5500247535892707e-05, "loss": 0.2134, "step": 45932 }, { "epoch": 3.721079066753078, "grad_norm": 0.07862570881843567, "learning_rate": 1.5495746883298077e-05, "loss": 0.2321, "step": 45933 }, { "epoch": 3.721160077770577, "grad_norm": 0.0632789134979248, "learning_rate": 1.549124623070345e-05, "loss": 0.2378, "step": 45934 }, { "epoch": 3.721241088788075, "grad_norm": 0.06634281575679779, "learning_rate": 1.5486745578108828e-05, "loss": 0.2357, "step": 45935 }, { "epoch": 3.7213220998055734, "grad_norm": 0.07613595575094223, "learning_rate": 1.54822449255142e-05, "loss": 0.2295, "step": 45936 }, { "epoch": 3.721403110823072, "grad_norm": 0.0799480453133583, "learning_rate": 1.5477744272919575e-05, "loss": 0.2222, "step": 45937 }, { "epoch": 3.7214841218405703, "grad_norm": 0.07500126212835312, "learning_rate": 1.547324362032495e-05, "loss": 0.2368, "step": 45938 }, { "epoch": 3.7215651328580686, "grad_norm": 0.06687241792678833, "learning_rate": 1.5468742967730322e-05, "loss": 0.2251, "step": 45939 }, { "epoch": 3.7216461438755672, "grad_norm": 0.06396705657243729, "learning_rate": 1.5464242315135696e-05, "loss": 0.2216, "step": 45940 }, { "epoch": 3.7217271548930655, "grad_norm": 0.06962263584136963, "learning_rate": 1.545974166254107e-05, "loss": 0.2254, "step": 45941 }, { "epoch": 3.7218081659105637, "grad_norm": 0.06181929260492325, "learning_rate": 1.5455241009946443e-05, "loss": 0.2235, "step": 45942 }, { "epoch": 3.7218891769280624, "grad_norm": 0.07112901657819748, "learning_rate": 1.5450740357351817e-05, "loss": 0.2422, "step": 45943 }, { "epoch": 3.7219701879455607, "grad_norm": 0.07527320086956024, "learning_rate": 1.544623970475719e-05, "loss": 0.217, "step": 45944 }, { "epoch": 3.722051198963059, "grad_norm": 0.08394154906272888, "learning_rate": 1.5441739052162564e-05, "loss": 0.2236, "step": 45945 }, { "epoch": 3.7221322099805576, "grad_norm": 0.07716701924800873, "learning_rate": 1.5437238399567938e-05, "loss": 0.2107, "step": 45946 }, { "epoch": 3.722213220998056, "grad_norm": 0.06959621608257294, "learning_rate": 1.543273774697331e-05, "loss": 0.2397, "step": 45947 }, { "epoch": 3.722294232015554, "grad_norm": 0.06327780336141586, "learning_rate": 1.5428237094378685e-05, "loss": 0.1967, "step": 45948 }, { "epoch": 3.7223752430330523, "grad_norm": 0.0753185898065567, "learning_rate": 1.542373644178406e-05, "loss": 0.2441, "step": 45949 }, { "epoch": 3.722456254050551, "grad_norm": 0.062304459512233734, "learning_rate": 1.5419235789189435e-05, "loss": 0.239, "step": 45950 }, { "epoch": 3.7225372650680493, "grad_norm": 0.10284210741519928, "learning_rate": 1.541473513659481e-05, "loss": 0.2458, "step": 45951 }, { "epoch": 3.7226182760855475, "grad_norm": 0.07348388433456421, "learning_rate": 1.541023448400018e-05, "loss": 0.2355, "step": 45952 }, { "epoch": 3.7226992871030458, "grad_norm": 0.06129736825823784, "learning_rate": 1.5405733831405556e-05, "loss": 0.197, "step": 45953 }, { "epoch": 3.7227802981205445, "grad_norm": 0.07383542507886887, "learning_rate": 1.540123317881093e-05, "loss": 0.2116, "step": 45954 }, { "epoch": 3.7228613091380427, "grad_norm": 0.07537295669317245, "learning_rate": 1.53967325262163e-05, "loss": 0.2375, "step": 45955 }, { "epoch": 3.722942320155541, "grad_norm": 0.06659074127674103, "learning_rate": 1.5392231873621677e-05, "loss": 0.2277, "step": 45956 }, { "epoch": 3.7230233311730396, "grad_norm": 0.07395710796117783, "learning_rate": 1.538773122102705e-05, "loss": 0.2246, "step": 45957 }, { "epoch": 3.723104342190538, "grad_norm": 0.07573916763067245, "learning_rate": 1.5383230568432424e-05, "loss": 0.2409, "step": 45958 }, { "epoch": 3.723185353208036, "grad_norm": 0.06378524005413055, "learning_rate": 1.5378729915837798e-05, "loss": 0.2007, "step": 45959 }, { "epoch": 3.723266364225535, "grad_norm": 0.0667334794998169, "learning_rate": 1.537422926324317e-05, "loss": 0.2533, "step": 45960 }, { "epoch": 3.723347375243033, "grad_norm": 0.06946897506713867, "learning_rate": 1.5369728610648545e-05, "loss": 0.1876, "step": 45961 }, { "epoch": 3.7234283862605313, "grad_norm": 0.06870533525943756, "learning_rate": 1.536522795805392e-05, "loss": 0.1848, "step": 45962 }, { "epoch": 3.72350939727803, "grad_norm": 0.0735953152179718, "learning_rate": 1.5360727305459292e-05, "loss": 0.201, "step": 45963 }, { "epoch": 3.7235904082955282, "grad_norm": 0.063729427754879, "learning_rate": 1.5356226652864666e-05, "loss": 0.2096, "step": 45964 }, { "epoch": 3.7236714193130265, "grad_norm": 0.06849828362464905, "learning_rate": 1.535172600027004e-05, "loss": 0.2433, "step": 45965 }, { "epoch": 3.723752430330525, "grad_norm": 0.08339733630418777, "learning_rate": 1.5347225347675413e-05, "loss": 0.2275, "step": 45966 }, { "epoch": 3.7238334413480234, "grad_norm": 0.07081194967031479, "learning_rate": 1.5342724695080787e-05, "loss": 0.1976, "step": 45967 }, { "epoch": 3.7239144523655217, "grad_norm": 0.08006429672241211, "learning_rate": 1.533822404248616e-05, "loss": 0.2303, "step": 45968 }, { "epoch": 3.7239954633830203, "grad_norm": 0.0940973311662674, "learning_rate": 1.5333723389891537e-05, "loss": 0.2473, "step": 45969 }, { "epoch": 3.7240764744005186, "grad_norm": 0.0769064798951149, "learning_rate": 1.5329222737296907e-05, "loss": 0.2101, "step": 45970 }, { "epoch": 3.724157485418017, "grad_norm": 0.06387367844581604, "learning_rate": 1.5324722084702284e-05, "loss": 0.2787, "step": 45971 }, { "epoch": 3.724238496435515, "grad_norm": 0.07406259328126907, "learning_rate": 1.5320221432107658e-05, "loss": 0.234, "step": 45972 }, { "epoch": 3.7243195074530138, "grad_norm": 0.06665657460689545, "learning_rate": 1.5315720779513028e-05, "loss": 0.2568, "step": 45973 }, { "epoch": 3.724400518470512, "grad_norm": 0.0778389424085617, "learning_rate": 1.5311220126918405e-05, "loss": 0.2208, "step": 45974 }, { "epoch": 3.7244815294880103, "grad_norm": 0.06476572155952454, "learning_rate": 1.530671947432378e-05, "loss": 0.2047, "step": 45975 }, { "epoch": 3.7245625405055085, "grad_norm": 0.07310286909341812, "learning_rate": 1.530221882172915e-05, "loss": 0.2068, "step": 45976 }, { "epoch": 3.724643551523007, "grad_norm": 0.06835059821605682, "learning_rate": 1.5297718169134526e-05, "loss": 0.2077, "step": 45977 }, { "epoch": 3.7247245625405054, "grad_norm": 0.08028055727481842, "learning_rate": 1.52932175165399e-05, "loss": 0.2292, "step": 45978 }, { "epoch": 3.7248055735580037, "grad_norm": 0.06749919801950455, "learning_rate": 1.5288716863945273e-05, "loss": 0.1826, "step": 45979 }, { "epoch": 3.7248865845755024, "grad_norm": 0.05876849219202995, "learning_rate": 1.5284216211350647e-05, "loss": 0.2335, "step": 45980 }, { "epoch": 3.7249675955930006, "grad_norm": 0.07642512768507004, "learning_rate": 1.527971555875602e-05, "loss": 0.2121, "step": 45981 }, { "epoch": 3.725048606610499, "grad_norm": 0.06255366653203964, "learning_rate": 1.5275214906161394e-05, "loss": 0.2031, "step": 45982 }, { "epoch": 3.7251296176279975, "grad_norm": 0.06796018779277802, "learning_rate": 1.5270714253566768e-05, "loss": 0.2456, "step": 45983 }, { "epoch": 3.725210628645496, "grad_norm": 0.0734555795788765, "learning_rate": 1.526621360097214e-05, "loss": 0.2187, "step": 45984 }, { "epoch": 3.725291639662994, "grad_norm": 0.06981045752763748, "learning_rate": 1.5261712948377515e-05, "loss": 0.2197, "step": 45985 }, { "epoch": 3.7253726506804927, "grad_norm": 0.07305343449115753, "learning_rate": 1.5257212295782888e-05, "loss": 0.2002, "step": 45986 }, { "epoch": 3.725453661697991, "grad_norm": 0.08334986120462418, "learning_rate": 1.5252711643188264e-05, "loss": 0.2253, "step": 45987 }, { "epoch": 3.725534672715489, "grad_norm": 0.08052529394626617, "learning_rate": 1.5248210990593637e-05, "loss": 0.2338, "step": 45988 }, { "epoch": 3.725615683732988, "grad_norm": 0.08272536844015121, "learning_rate": 1.524371033799901e-05, "loss": 0.2334, "step": 45989 }, { "epoch": 3.725696694750486, "grad_norm": 0.07033362984657288, "learning_rate": 1.5239209685404385e-05, "loss": 0.2335, "step": 45990 }, { "epoch": 3.7257777057679844, "grad_norm": 0.06358537077903748, "learning_rate": 1.5234709032809758e-05, "loss": 0.2542, "step": 45991 }, { "epoch": 3.725858716785483, "grad_norm": 0.08354093134403229, "learning_rate": 1.5230208380215133e-05, "loss": 0.2444, "step": 45992 }, { "epoch": 3.7259397278029813, "grad_norm": 0.07609385251998901, "learning_rate": 1.5225707727620505e-05, "loss": 0.2156, "step": 45993 }, { "epoch": 3.7260207388204796, "grad_norm": 0.07945896685123444, "learning_rate": 1.5221207075025879e-05, "loss": 0.2156, "step": 45994 }, { "epoch": 3.726101749837978, "grad_norm": 0.07363783568143845, "learning_rate": 1.5216706422431254e-05, "loss": 0.2019, "step": 45995 }, { "epoch": 3.7261827608554765, "grad_norm": 0.07394561171531677, "learning_rate": 1.5212205769836626e-05, "loss": 0.1975, "step": 45996 }, { "epoch": 3.7262637718729748, "grad_norm": 0.0713035985827446, "learning_rate": 1.5207705117242001e-05, "loss": 0.2476, "step": 45997 }, { "epoch": 3.726344782890473, "grad_norm": 0.06830364465713501, "learning_rate": 1.5203204464647375e-05, "loss": 0.2395, "step": 45998 }, { "epoch": 3.7264257939079712, "grad_norm": 0.07303628325462341, "learning_rate": 1.5198703812052747e-05, "loss": 0.2143, "step": 45999 }, { "epoch": 3.72650680492547, "grad_norm": 0.07435938715934753, "learning_rate": 1.5194203159458122e-05, "loss": 0.2648, "step": 46000 }, { "epoch": 3.726587815942968, "grad_norm": 0.0673922747373581, "learning_rate": 1.5189702506863496e-05, "loss": 0.2256, "step": 46001 }, { "epoch": 3.7266688269604664, "grad_norm": 0.06162109971046448, "learning_rate": 1.5185201854268868e-05, "loss": 0.2254, "step": 46002 }, { "epoch": 3.726749837977965, "grad_norm": 0.10011584311723709, "learning_rate": 1.5180701201674243e-05, "loss": 0.2665, "step": 46003 }, { "epoch": 3.7268308489954634, "grad_norm": 0.07240945100784302, "learning_rate": 1.5176200549079617e-05, "loss": 0.2359, "step": 46004 }, { "epoch": 3.7269118600129616, "grad_norm": 0.07601157575845718, "learning_rate": 1.5171699896484992e-05, "loss": 0.2338, "step": 46005 }, { "epoch": 3.7269928710304603, "grad_norm": 0.08337153494358063, "learning_rate": 1.5167199243890366e-05, "loss": 0.2743, "step": 46006 }, { "epoch": 3.7270738820479585, "grad_norm": 0.07271520048379898, "learning_rate": 1.5162698591295738e-05, "loss": 0.2244, "step": 46007 }, { "epoch": 3.7271548930654568, "grad_norm": 0.07792726159095764, "learning_rate": 1.5158197938701113e-05, "loss": 0.247, "step": 46008 }, { "epoch": 3.7272359040829555, "grad_norm": 0.07396295666694641, "learning_rate": 1.5153697286106486e-05, "loss": 0.265, "step": 46009 }, { "epoch": 3.7273169151004537, "grad_norm": 0.06522729247808456, "learning_rate": 1.5149196633511862e-05, "loss": 0.2406, "step": 46010 }, { "epoch": 3.727397926117952, "grad_norm": 0.07633328437805176, "learning_rate": 1.5144695980917234e-05, "loss": 0.2499, "step": 46011 }, { "epoch": 3.7274789371354506, "grad_norm": 0.07195275276899338, "learning_rate": 1.5140195328322607e-05, "loss": 0.2147, "step": 46012 }, { "epoch": 3.727559948152949, "grad_norm": 0.07411804050207138, "learning_rate": 1.5135694675727983e-05, "loss": 0.2065, "step": 46013 }, { "epoch": 3.727640959170447, "grad_norm": 0.06602322310209274, "learning_rate": 1.5131194023133354e-05, "loss": 0.2006, "step": 46014 }, { "epoch": 3.727721970187946, "grad_norm": 0.07067742198705673, "learning_rate": 1.5126693370538728e-05, "loss": 0.2145, "step": 46015 }, { "epoch": 3.727802981205444, "grad_norm": 0.06092274561524391, "learning_rate": 1.5122192717944103e-05, "loss": 0.2087, "step": 46016 }, { "epoch": 3.7278839922229423, "grad_norm": 0.07934263348579407, "learning_rate": 1.5117692065349475e-05, "loss": 0.2166, "step": 46017 }, { "epoch": 3.7279650032404406, "grad_norm": 0.07236896455287933, "learning_rate": 1.511319141275485e-05, "loss": 0.2496, "step": 46018 }, { "epoch": 3.7280460142579392, "grad_norm": 0.07020833343267441, "learning_rate": 1.5108690760160224e-05, "loss": 0.2294, "step": 46019 }, { "epoch": 3.7281270252754375, "grad_norm": 0.0577675886452198, "learning_rate": 1.5104190107565596e-05, "loss": 0.2161, "step": 46020 }, { "epoch": 3.7282080362929357, "grad_norm": 0.06877872347831726, "learning_rate": 1.5099689454970973e-05, "loss": 0.2433, "step": 46021 }, { "epoch": 3.728289047310434, "grad_norm": 0.05976353585720062, "learning_rate": 1.5095188802376345e-05, "loss": 0.2133, "step": 46022 }, { "epoch": 3.7283700583279327, "grad_norm": 0.07601095736026764, "learning_rate": 1.509068814978172e-05, "loss": 0.2678, "step": 46023 }, { "epoch": 3.728451069345431, "grad_norm": 0.09052573144435883, "learning_rate": 1.5086187497187094e-05, "loss": 0.2468, "step": 46024 }, { "epoch": 3.728532080362929, "grad_norm": 0.0643225833773613, "learning_rate": 1.5081686844592466e-05, "loss": 0.2259, "step": 46025 }, { "epoch": 3.728613091380428, "grad_norm": 0.07924170792102814, "learning_rate": 1.5077186191997841e-05, "loss": 0.2399, "step": 46026 }, { "epoch": 3.728694102397926, "grad_norm": 0.07029829919338226, "learning_rate": 1.5072685539403215e-05, "loss": 0.2177, "step": 46027 }, { "epoch": 3.7287751134154243, "grad_norm": 0.07222612202167511, "learning_rate": 1.5068184886808587e-05, "loss": 0.2465, "step": 46028 }, { "epoch": 3.728856124432923, "grad_norm": 0.08449197560548782, "learning_rate": 1.5063684234213962e-05, "loss": 0.2017, "step": 46029 }, { "epoch": 3.7289371354504213, "grad_norm": 0.05938245728611946, "learning_rate": 1.5059183581619335e-05, "loss": 0.2076, "step": 46030 }, { "epoch": 3.7290181464679195, "grad_norm": 0.07024046778678894, "learning_rate": 1.505468292902471e-05, "loss": 0.2041, "step": 46031 }, { "epoch": 3.729099157485418, "grad_norm": 0.07162220031023026, "learning_rate": 1.5050182276430083e-05, "loss": 0.2144, "step": 46032 }, { "epoch": 3.7291801685029164, "grad_norm": 0.06670884788036346, "learning_rate": 1.5045681623835456e-05, "loss": 0.2002, "step": 46033 }, { "epoch": 3.7292611795204147, "grad_norm": 0.0706491693854332, "learning_rate": 1.5041180971240832e-05, "loss": 0.1878, "step": 46034 }, { "epoch": 3.7293421905379134, "grad_norm": 0.07656066864728928, "learning_rate": 1.5036680318646203e-05, "loss": 0.1964, "step": 46035 }, { "epoch": 3.7294232015554116, "grad_norm": 0.07184910029172897, "learning_rate": 1.5032179666051579e-05, "loss": 0.2506, "step": 46036 }, { "epoch": 3.72950421257291, "grad_norm": 0.07077179104089737, "learning_rate": 1.5027679013456952e-05, "loss": 0.208, "step": 46037 }, { "epoch": 3.7295852235904086, "grad_norm": 0.0829695537686348, "learning_rate": 1.5023178360862324e-05, "loss": 0.229, "step": 46038 }, { "epoch": 3.729666234607907, "grad_norm": 0.06951703131198883, "learning_rate": 1.5018677708267701e-05, "loss": 0.2148, "step": 46039 }, { "epoch": 3.729747245625405, "grad_norm": 0.0725255087018013, "learning_rate": 1.5014177055673073e-05, "loss": 0.2519, "step": 46040 }, { "epoch": 3.7298282566429033, "grad_norm": 0.06751804798841476, "learning_rate": 1.5009676403078445e-05, "loss": 0.223, "step": 46041 }, { "epoch": 3.7299092676604015, "grad_norm": 0.08554006367921829, "learning_rate": 1.5005175750483822e-05, "loss": 0.2673, "step": 46042 }, { "epoch": 3.7299902786779002, "grad_norm": 0.06487467885017395, "learning_rate": 1.5000675097889194e-05, "loss": 0.2243, "step": 46043 }, { "epoch": 3.7300712896953985, "grad_norm": 0.08155498653650284, "learning_rate": 1.499617444529457e-05, "loss": 0.2768, "step": 46044 }, { "epoch": 3.7301523007128967, "grad_norm": 0.06040952354669571, "learning_rate": 1.4991673792699943e-05, "loss": 0.2139, "step": 46045 }, { "epoch": 3.7302333117303954, "grad_norm": 0.07658139616250992, "learning_rate": 1.4987173140105315e-05, "loss": 0.2356, "step": 46046 }, { "epoch": 3.7303143227478937, "grad_norm": 0.08683796226978302, "learning_rate": 1.498267248751069e-05, "loss": 0.2153, "step": 46047 }, { "epoch": 3.730395333765392, "grad_norm": 0.08210854977369308, "learning_rate": 1.4978171834916064e-05, "loss": 0.2168, "step": 46048 }, { "epoch": 3.7304763447828906, "grad_norm": 0.08197548985481262, "learning_rate": 1.4973671182321436e-05, "loss": 0.2389, "step": 46049 }, { "epoch": 3.730557355800389, "grad_norm": 0.06574133038520813, "learning_rate": 1.4969170529726811e-05, "loss": 0.2428, "step": 46050 }, { "epoch": 3.730638366817887, "grad_norm": 0.082312673330307, "learning_rate": 1.4964669877132185e-05, "loss": 0.2285, "step": 46051 }, { "epoch": 3.7307193778353858, "grad_norm": 0.0676484927535057, "learning_rate": 1.496016922453756e-05, "loss": 0.2057, "step": 46052 }, { "epoch": 3.730800388852884, "grad_norm": 0.06505704671144485, "learning_rate": 1.4955668571942932e-05, "loss": 0.2257, "step": 46053 }, { "epoch": 3.7308813998703823, "grad_norm": 0.0626121386885643, "learning_rate": 1.4951167919348305e-05, "loss": 0.2512, "step": 46054 }, { "epoch": 3.730962410887881, "grad_norm": 0.07555672526359558, "learning_rate": 1.494666726675368e-05, "loss": 0.1947, "step": 46055 }, { "epoch": 3.731043421905379, "grad_norm": 0.05888738855719566, "learning_rate": 1.4942166614159053e-05, "loss": 0.208, "step": 46056 }, { "epoch": 3.7311244329228774, "grad_norm": 0.0674942135810852, "learning_rate": 1.493766596156443e-05, "loss": 0.2275, "step": 46057 }, { "epoch": 3.731205443940376, "grad_norm": 0.05916551873087883, "learning_rate": 1.4933165308969801e-05, "loss": 0.1929, "step": 46058 }, { "epoch": 3.7312864549578744, "grad_norm": 0.08539033681154251, "learning_rate": 1.4928664656375175e-05, "loss": 0.2194, "step": 46059 }, { "epoch": 3.7313674659753726, "grad_norm": 0.06798946112394333, "learning_rate": 1.492416400378055e-05, "loss": 0.2181, "step": 46060 }, { "epoch": 3.7314484769928713, "grad_norm": 0.06484483182430267, "learning_rate": 1.4919663351185922e-05, "loss": 0.2043, "step": 46061 }, { "epoch": 3.7315294880103695, "grad_norm": 0.0604698583483696, "learning_rate": 1.4915162698591296e-05, "loss": 0.1975, "step": 46062 }, { "epoch": 3.731610499027868, "grad_norm": 0.0822327584028244, "learning_rate": 1.4910662045996671e-05, "loss": 0.2609, "step": 46063 }, { "epoch": 3.731691510045366, "grad_norm": 0.09345957636833191, "learning_rate": 1.4906161393402043e-05, "loss": 0.2302, "step": 46064 }, { "epoch": 3.7317725210628643, "grad_norm": 0.06493161618709564, "learning_rate": 1.4901660740807418e-05, "loss": 0.1933, "step": 46065 }, { "epoch": 3.731853532080363, "grad_norm": 0.0702953115105629, "learning_rate": 1.4897160088212792e-05, "loss": 0.1897, "step": 46066 }, { "epoch": 3.731934543097861, "grad_norm": 0.06461524218320847, "learning_rate": 1.4892659435618164e-05, "loss": 0.2407, "step": 46067 }, { "epoch": 3.7320155541153595, "grad_norm": 0.08654788136482239, "learning_rate": 1.488815878302354e-05, "loss": 0.236, "step": 46068 }, { "epoch": 3.732096565132858, "grad_norm": 0.0883837565779686, "learning_rate": 1.4883658130428913e-05, "loss": 0.2058, "step": 46069 }, { "epoch": 3.7321775761503564, "grad_norm": 0.06732512265443802, "learning_rate": 1.4879157477834288e-05, "loss": 0.2224, "step": 46070 }, { "epoch": 3.7322585871678546, "grad_norm": 0.06484067440032959, "learning_rate": 1.487465682523966e-05, "loss": 0.2419, "step": 46071 }, { "epoch": 3.7323395981853533, "grad_norm": 0.0646497905254364, "learning_rate": 1.4870156172645034e-05, "loss": 0.1876, "step": 46072 }, { "epoch": 3.7324206092028516, "grad_norm": 0.101152203977108, "learning_rate": 1.4865655520050409e-05, "loss": 0.2442, "step": 46073 }, { "epoch": 3.73250162022035, "grad_norm": 0.08255450427532196, "learning_rate": 1.486115486745578e-05, "loss": 0.2307, "step": 46074 }, { "epoch": 3.7325826312378485, "grad_norm": 0.08730298280715942, "learning_rate": 1.4856654214861154e-05, "loss": 0.2408, "step": 46075 }, { "epoch": 3.7326636422553467, "grad_norm": 0.0731593444943428, "learning_rate": 1.485215356226653e-05, "loss": 0.1948, "step": 46076 }, { "epoch": 3.732744653272845, "grad_norm": 0.07262025028467178, "learning_rate": 1.4847652909671903e-05, "loss": 0.2244, "step": 46077 }, { "epoch": 3.7328256642903437, "grad_norm": 0.07813366502523422, "learning_rate": 1.4843152257077279e-05, "loss": 0.2623, "step": 46078 }, { "epoch": 3.732906675307842, "grad_norm": 0.06445509940385818, "learning_rate": 1.483865160448265e-05, "loss": 0.2185, "step": 46079 }, { "epoch": 3.73298768632534, "grad_norm": 0.07868556678295135, "learning_rate": 1.4834150951888024e-05, "loss": 0.212, "step": 46080 }, { "epoch": 3.733068697342839, "grad_norm": 0.0866679698228836, "learning_rate": 1.48296502992934e-05, "loss": 0.2424, "step": 46081 }, { "epoch": 3.733149708360337, "grad_norm": 0.06397400051355362, "learning_rate": 1.4825149646698771e-05, "loss": 0.2272, "step": 46082 }, { "epoch": 3.7332307193778353, "grad_norm": 0.07194807380437851, "learning_rate": 1.4820648994104147e-05, "loss": 0.2261, "step": 46083 }, { "epoch": 3.733311730395334, "grad_norm": 0.06789196282625198, "learning_rate": 1.481614834150952e-05, "loss": 0.2246, "step": 46084 }, { "epoch": 3.7333927414128323, "grad_norm": 0.07250261306762695, "learning_rate": 1.4811647688914892e-05, "loss": 0.2306, "step": 46085 }, { "epoch": 3.7334737524303305, "grad_norm": 0.10532908141613007, "learning_rate": 1.4807147036320267e-05, "loss": 0.2482, "step": 46086 }, { "epoch": 3.7335547634478288, "grad_norm": 0.062470242381095886, "learning_rate": 1.4802646383725641e-05, "loss": 0.1862, "step": 46087 }, { "epoch": 3.733635774465327, "grad_norm": 0.07458098232746124, "learning_rate": 1.4798145731131013e-05, "loss": 0.2172, "step": 46088 }, { "epoch": 3.7337167854828257, "grad_norm": 0.07555191963911057, "learning_rate": 1.4793645078536388e-05, "loss": 0.2453, "step": 46089 }, { "epoch": 3.733797796500324, "grad_norm": 0.08217816799879074, "learning_rate": 1.4789144425941762e-05, "loss": 0.2386, "step": 46090 }, { "epoch": 3.733878807517822, "grad_norm": 0.07929562777280807, "learning_rate": 1.4784643773347137e-05, "loss": 0.2048, "step": 46091 }, { "epoch": 3.733959818535321, "grad_norm": 0.09434565156698227, "learning_rate": 1.4780143120752509e-05, "loss": 0.238, "step": 46092 }, { "epoch": 3.734040829552819, "grad_norm": 0.07045267522335052, "learning_rate": 1.4775642468157883e-05, "loss": 0.2308, "step": 46093 }, { "epoch": 3.7341218405703174, "grad_norm": 0.0724666640162468, "learning_rate": 1.4771141815563258e-05, "loss": 0.1918, "step": 46094 }, { "epoch": 3.734202851587816, "grad_norm": 0.08512706309556961, "learning_rate": 1.4766641162968632e-05, "loss": 0.2463, "step": 46095 }, { "epoch": 3.7342838626053143, "grad_norm": 0.06601796299219131, "learning_rate": 1.4762140510374007e-05, "loss": 0.2152, "step": 46096 }, { "epoch": 3.7343648736228126, "grad_norm": 0.0734667181968689, "learning_rate": 1.4757639857779379e-05, "loss": 0.2308, "step": 46097 }, { "epoch": 3.7344458846403112, "grad_norm": 0.076157346367836, "learning_rate": 1.4753139205184752e-05, "loss": 0.1982, "step": 46098 }, { "epoch": 3.7345268956578095, "grad_norm": 0.06739217787981033, "learning_rate": 1.4748638552590128e-05, "loss": 0.267, "step": 46099 }, { "epoch": 3.7346079066753077, "grad_norm": 0.06588942557573318, "learning_rate": 1.47441378999955e-05, "loss": 0.2426, "step": 46100 }, { "epoch": 3.7346889176928064, "grad_norm": 0.08635196089744568, "learning_rate": 1.4739637247400873e-05, "loss": 0.27, "step": 46101 }, { "epoch": 3.7347699287103047, "grad_norm": 0.06514254957437515, "learning_rate": 1.4735136594806248e-05, "loss": 0.2243, "step": 46102 }, { "epoch": 3.734850939727803, "grad_norm": 0.06313839554786682, "learning_rate": 1.473063594221162e-05, "loss": 0.2219, "step": 46103 }, { "epoch": 3.7349319507453016, "grad_norm": 0.06508518010377884, "learning_rate": 1.4726135289616996e-05, "loss": 0.2105, "step": 46104 }, { "epoch": 3.7350129617628, "grad_norm": 0.06646306067705154, "learning_rate": 1.472163463702237e-05, "loss": 0.2061, "step": 46105 }, { "epoch": 3.735093972780298, "grad_norm": 0.07501812279224396, "learning_rate": 1.4717133984427741e-05, "loss": 0.2194, "step": 46106 }, { "epoch": 3.7351749837977968, "grad_norm": 0.08135095983743668, "learning_rate": 1.4712633331833116e-05, "loss": 0.2341, "step": 46107 }, { "epoch": 3.735255994815295, "grad_norm": 0.07120555639266968, "learning_rate": 1.470813267923849e-05, "loss": 0.23, "step": 46108 }, { "epoch": 3.7353370058327933, "grad_norm": 0.05750159174203873, "learning_rate": 1.4703632026643865e-05, "loss": 0.2264, "step": 46109 }, { "epoch": 3.7354180168502915, "grad_norm": 0.060754384845495224, "learning_rate": 1.4699131374049237e-05, "loss": 0.2099, "step": 46110 }, { "epoch": 3.7354990278677898, "grad_norm": 0.08677901327610016, "learning_rate": 1.4694630721454611e-05, "loss": 0.2165, "step": 46111 }, { "epoch": 3.7355800388852884, "grad_norm": 0.07808344811201096, "learning_rate": 1.4690130068859986e-05, "loss": 0.2626, "step": 46112 }, { "epoch": 3.7356610499027867, "grad_norm": 0.08093435317277908, "learning_rate": 1.468562941626536e-05, "loss": 0.2381, "step": 46113 }, { "epoch": 3.735742060920285, "grad_norm": 0.07375194877386093, "learning_rate": 1.4681128763670732e-05, "loss": 0.196, "step": 46114 }, { "epoch": 3.7358230719377836, "grad_norm": 0.07342427223920822, "learning_rate": 1.4676628111076107e-05, "loss": 0.1924, "step": 46115 }, { "epoch": 3.735904082955282, "grad_norm": 0.06829115003347397, "learning_rate": 1.467212745848148e-05, "loss": 0.2165, "step": 46116 }, { "epoch": 3.73598509397278, "grad_norm": 0.07790973037481308, "learning_rate": 1.4667626805886856e-05, "loss": 0.2195, "step": 46117 }, { "epoch": 3.736066104990279, "grad_norm": 0.06781540811061859, "learning_rate": 1.4663126153292228e-05, "loss": 0.1872, "step": 46118 }, { "epoch": 3.736147116007777, "grad_norm": 0.08341260254383087, "learning_rate": 1.4658625500697601e-05, "loss": 0.2518, "step": 46119 }, { "epoch": 3.7362281270252753, "grad_norm": 0.08755706995725632, "learning_rate": 1.4654124848102977e-05, "loss": 0.268, "step": 46120 }, { "epoch": 3.736309138042774, "grad_norm": 0.08430002629756927, "learning_rate": 1.4649624195508349e-05, "loss": 0.2045, "step": 46121 }, { "epoch": 3.7363901490602722, "grad_norm": 0.09913341701030731, "learning_rate": 1.4645123542913724e-05, "loss": 0.2489, "step": 46122 }, { "epoch": 3.7364711600777705, "grad_norm": 0.08157473057508469, "learning_rate": 1.4640622890319098e-05, "loss": 0.2011, "step": 46123 }, { "epoch": 3.736552171095269, "grad_norm": 0.06985532492399216, "learning_rate": 1.463612223772447e-05, "loss": 0.1987, "step": 46124 }, { "epoch": 3.7366331821127674, "grad_norm": 0.0689232274889946, "learning_rate": 1.4631621585129845e-05, "loss": 0.1943, "step": 46125 }, { "epoch": 3.7367141931302656, "grad_norm": 0.07660727947950363, "learning_rate": 1.4627120932535218e-05, "loss": 0.2355, "step": 46126 }, { "epoch": 3.7367952041477643, "grad_norm": 0.07977120578289032, "learning_rate": 1.462262027994059e-05, "loss": 0.2234, "step": 46127 }, { "epoch": 3.7368762151652626, "grad_norm": 0.09145716577768326, "learning_rate": 1.4618119627345967e-05, "loss": 0.2729, "step": 46128 }, { "epoch": 3.736957226182761, "grad_norm": 0.06459056586027145, "learning_rate": 1.4613618974751339e-05, "loss": 0.2561, "step": 46129 }, { "epoch": 3.737038237200259, "grad_norm": 0.07937449961900711, "learning_rate": 1.4609118322156714e-05, "loss": 0.2047, "step": 46130 }, { "epoch": 3.7371192482177578, "grad_norm": 0.08180252462625504, "learning_rate": 1.4604617669562088e-05, "loss": 0.2194, "step": 46131 }, { "epoch": 3.737200259235256, "grad_norm": 0.06420495361089706, "learning_rate": 1.460011701696746e-05, "loss": 0.1774, "step": 46132 }, { "epoch": 3.7372812702527543, "grad_norm": 0.0610925629734993, "learning_rate": 1.4595616364372835e-05, "loss": 0.191, "step": 46133 }, { "epoch": 3.7373622812702525, "grad_norm": 0.06337723135948181, "learning_rate": 1.4591115711778209e-05, "loss": 0.2092, "step": 46134 }, { "epoch": 3.737443292287751, "grad_norm": 0.0656128078699112, "learning_rate": 1.458661505918358e-05, "loss": 0.2142, "step": 46135 }, { "epoch": 3.7375243033052494, "grad_norm": 0.0756414532661438, "learning_rate": 1.4582114406588956e-05, "loss": 0.2215, "step": 46136 }, { "epoch": 3.7376053143227477, "grad_norm": 0.06451795995235443, "learning_rate": 1.457761375399433e-05, "loss": 0.2521, "step": 46137 }, { "epoch": 3.7376863253402464, "grad_norm": 0.07743828743696213, "learning_rate": 1.4573113101399705e-05, "loss": 0.2147, "step": 46138 }, { "epoch": 3.7377673363577446, "grad_norm": 0.0701700821518898, "learning_rate": 1.4568612448805077e-05, "loss": 0.2088, "step": 46139 }, { "epoch": 3.737848347375243, "grad_norm": 0.08126933127641678, "learning_rate": 1.456411179621045e-05, "loss": 0.251, "step": 46140 }, { "epoch": 3.7379293583927415, "grad_norm": 0.07062458992004395, "learning_rate": 1.4559611143615826e-05, "loss": 0.2112, "step": 46141 }, { "epoch": 3.73801036941024, "grad_norm": 0.08064697682857513, "learning_rate": 1.4555110491021198e-05, "loss": 0.2212, "step": 46142 }, { "epoch": 3.738091380427738, "grad_norm": 0.06433752179145813, "learning_rate": 1.4550609838426573e-05, "loss": 0.2224, "step": 46143 }, { "epoch": 3.7381723914452367, "grad_norm": 0.0687873363494873, "learning_rate": 1.4546109185831947e-05, "loss": 0.2246, "step": 46144 }, { "epoch": 3.738253402462735, "grad_norm": 0.06918583065271378, "learning_rate": 1.4541608533237318e-05, "loss": 0.211, "step": 46145 }, { "epoch": 3.738334413480233, "grad_norm": 0.07868772000074387, "learning_rate": 1.4537107880642695e-05, "loss": 0.2275, "step": 46146 }, { "epoch": 3.738415424497732, "grad_norm": 0.06755036860704422, "learning_rate": 1.4532607228048067e-05, "loss": 0.195, "step": 46147 }, { "epoch": 3.73849643551523, "grad_norm": 0.06591461598873138, "learning_rate": 1.452810657545344e-05, "loss": 0.1961, "step": 46148 }, { "epoch": 3.7385774465327284, "grad_norm": 0.090169258415699, "learning_rate": 1.4523605922858816e-05, "loss": 0.243, "step": 46149 }, { "epoch": 3.738658457550227, "grad_norm": 0.07051538676023483, "learning_rate": 1.4519105270264188e-05, "loss": 0.1963, "step": 46150 }, { "epoch": 3.7387394685677253, "grad_norm": 0.08839746564626694, "learning_rate": 1.4514604617669563e-05, "loss": 0.2587, "step": 46151 }, { "epoch": 3.7388204795852236, "grad_norm": 0.06953133642673492, "learning_rate": 1.4510103965074937e-05, "loss": 0.1979, "step": 46152 }, { "epoch": 3.738901490602722, "grad_norm": 0.08543260395526886, "learning_rate": 1.4505603312480309e-05, "loss": 0.2256, "step": 46153 }, { "epoch": 3.7389825016202205, "grad_norm": 0.06941500306129456, "learning_rate": 1.4501102659885684e-05, "loss": 0.2061, "step": 46154 }, { "epoch": 3.7390635126377187, "grad_norm": 0.05090836063027382, "learning_rate": 1.4496602007291058e-05, "loss": 0.2119, "step": 46155 }, { "epoch": 3.739144523655217, "grad_norm": 0.06198856979608536, "learning_rate": 1.4492101354696433e-05, "loss": 0.2057, "step": 46156 }, { "epoch": 3.7392255346727152, "grad_norm": 0.07832913100719452, "learning_rate": 1.4487600702101805e-05, "loss": 0.1959, "step": 46157 }, { "epoch": 3.739306545690214, "grad_norm": 0.07277149707078934, "learning_rate": 1.4483100049507179e-05, "loss": 0.2088, "step": 46158 }, { "epoch": 3.739387556707712, "grad_norm": 0.08063270896673203, "learning_rate": 1.4478599396912554e-05, "loss": 0.2205, "step": 46159 }, { "epoch": 3.7394685677252104, "grad_norm": 0.058787040412425995, "learning_rate": 1.4474098744317926e-05, "loss": 0.1775, "step": 46160 }, { "epoch": 3.739549578742709, "grad_norm": 0.0893818661570549, "learning_rate": 1.44695980917233e-05, "loss": 0.2183, "step": 46161 }, { "epoch": 3.7396305897602073, "grad_norm": 0.0777546837925911, "learning_rate": 1.4465097439128675e-05, "loss": 0.2185, "step": 46162 }, { "epoch": 3.7397116007777056, "grad_norm": 0.0717715248465538, "learning_rate": 1.4460596786534047e-05, "loss": 0.2058, "step": 46163 }, { "epoch": 3.7397926117952043, "grad_norm": 0.07632574439048767, "learning_rate": 1.4456096133939424e-05, "loss": 0.2519, "step": 46164 }, { "epoch": 3.7398736228127025, "grad_norm": 0.07262930274009705, "learning_rate": 1.4451595481344796e-05, "loss": 0.2087, "step": 46165 }, { "epoch": 3.7399546338302008, "grad_norm": 0.07834277302026749, "learning_rate": 1.444709482875017e-05, "loss": 0.2157, "step": 46166 }, { "epoch": 3.7400356448476995, "grad_norm": 0.07401188462972641, "learning_rate": 1.4442594176155545e-05, "loss": 0.2323, "step": 46167 }, { "epoch": 3.7401166558651977, "grad_norm": 0.06235014274716377, "learning_rate": 1.4438093523560916e-05, "loss": 0.2128, "step": 46168 }, { "epoch": 3.740197666882696, "grad_norm": 0.09011727571487427, "learning_rate": 1.4433592870966292e-05, "loss": 0.251, "step": 46169 }, { "epoch": 3.7402786779001946, "grad_norm": 0.07866238057613373, "learning_rate": 1.4429092218371665e-05, "loss": 0.2425, "step": 46170 }, { "epoch": 3.740359688917693, "grad_norm": 0.06134997680783272, "learning_rate": 1.4424591565777037e-05, "loss": 0.191, "step": 46171 }, { "epoch": 3.740440699935191, "grad_norm": 0.05997680500149727, "learning_rate": 1.4420090913182413e-05, "loss": 0.2054, "step": 46172 }, { "epoch": 3.74052171095269, "grad_norm": 0.06824682652950287, "learning_rate": 1.4415590260587786e-05, "loss": 0.2271, "step": 46173 }, { "epoch": 3.740602721970188, "grad_norm": 0.062017668038606644, "learning_rate": 1.4411089607993158e-05, "loss": 0.2037, "step": 46174 }, { "epoch": 3.7406837329876863, "grad_norm": 0.06739827990531921, "learning_rate": 1.4406588955398533e-05, "loss": 0.2437, "step": 46175 }, { "epoch": 3.7407647440051845, "grad_norm": 0.0841369777917862, "learning_rate": 1.4402088302803907e-05, "loss": 0.1901, "step": 46176 }, { "epoch": 3.7408457550226832, "grad_norm": 0.06553324311971664, "learning_rate": 1.4397587650209282e-05, "loss": 0.211, "step": 46177 }, { "epoch": 3.7409267660401815, "grad_norm": 0.07525978982448578, "learning_rate": 1.4393086997614654e-05, "loss": 0.2238, "step": 46178 }, { "epoch": 3.7410077770576797, "grad_norm": 0.07539505511522293, "learning_rate": 1.4388586345020028e-05, "loss": 0.211, "step": 46179 }, { "epoch": 3.741088788075178, "grad_norm": 0.063744455575943, "learning_rate": 1.4384085692425403e-05, "loss": 0.2007, "step": 46180 }, { "epoch": 3.7411697990926767, "grad_norm": 0.07325825095176697, "learning_rate": 1.4379585039830775e-05, "loss": 0.2334, "step": 46181 }, { "epoch": 3.741250810110175, "grad_norm": 0.07868387550115585, "learning_rate": 1.4375084387236152e-05, "loss": 0.2157, "step": 46182 }, { "epoch": 3.741331821127673, "grad_norm": 0.07169875502586365, "learning_rate": 1.4370583734641524e-05, "loss": 0.2132, "step": 46183 }, { "epoch": 3.741412832145172, "grad_norm": 0.07317327708005905, "learning_rate": 1.4366083082046897e-05, "loss": 0.253, "step": 46184 }, { "epoch": 3.74149384316267, "grad_norm": 0.06858827918767929, "learning_rate": 1.4361582429452273e-05, "loss": 0.2038, "step": 46185 }, { "epoch": 3.7415748541801683, "grad_norm": 0.05808888003230095, "learning_rate": 1.4357081776857645e-05, "loss": 0.1893, "step": 46186 }, { "epoch": 3.741655865197667, "grad_norm": 0.06428666412830353, "learning_rate": 1.4352581124263018e-05, "loss": 0.2119, "step": 46187 }, { "epoch": 3.7417368762151653, "grad_norm": 0.07074768841266632, "learning_rate": 1.4348080471668394e-05, "loss": 0.2288, "step": 46188 }, { "epoch": 3.7418178872326635, "grad_norm": 0.08114055544137955, "learning_rate": 1.4343579819073765e-05, "loss": 0.2495, "step": 46189 }, { "epoch": 3.741898898250162, "grad_norm": 0.07321310043334961, "learning_rate": 1.433907916647914e-05, "loss": 0.2476, "step": 46190 }, { "epoch": 3.7419799092676604, "grad_norm": 0.08425623923540115, "learning_rate": 1.4334578513884514e-05, "loss": 0.2302, "step": 46191 }, { "epoch": 3.7420609202851587, "grad_norm": 0.06360992789268494, "learning_rate": 1.4330077861289886e-05, "loss": 0.2127, "step": 46192 }, { "epoch": 3.7421419313026574, "grad_norm": 0.07066339999437332, "learning_rate": 1.4325577208695262e-05, "loss": 0.2265, "step": 46193 }, { "epoch": 3.7422229423201556, "grad_norm": 0.07735880464315414, "learning_rate": 1.4321076556100635e-05, "loss": 0.1989, "step": 46194 }, { "epoch": 3.742303953337654, "grad_norm": 0.07494205236434937, "learning_rate": 1.431657590350601e-05, "loss": 0.2466, "step": 46195 }, { "epoch": 3.7423849643551526, "grad_norm": 0.07548823207616806, "learning_rate": 1.4312075250911382e-05, "loss": 0.25, "step": 46196 }, { "epoch": 3.742465975372651, "grad_norm": 0.0775391012430191, "learning_rate": 1.4307574598316756e-05, "loss": 0.2154, "step": 46197 }, { "epoch": 3.742546986390149, "grad_norm": 0.06699874252080917, "learning_rate": 1.4303073945722131e-05, "loss": 0.2235, "step": 46198 }, { "epoch": 3.7426279974076473, "grad_norm": 0.07778064161539078, "learning_rate": 1.4298573293127503e-05, "loss": 0.2105, "step": 46199 }, { "epoch": 3.742709008425146, "grad_norm": 0.06393593549728394, "learning_rate": 1.4294072640532877e-05, "loss": 0.1742, "step": 46200 }, { "epoch": 3.7427900194426442, "grad_norm": 0.0789911299943924, "learning_rate": 1.4289571987938252e-05, "loss": 0.2344, "step": 46201 }, { "epoch": 3.7428710304601425, "grad_norm": 0.07193242013454437, "learning_rate": 1.4285071335343626e-05, "loss": 0.1964, "step": 46202 }, { "epoch": 3.7429520414776407, "grad_norm": 0.04981725290417671, "learning_rate": 1.4280570682749001e-05, "loss": 0.1949, "step": 46203 }, { "epoch": 3.7430330524951394, "grad_norm": 0.05438205972313881, "learning_rate": 1.4276070030154373e-05, "loss": 0.1834, "step": 46204 }, { "epoch": 3.7431140635126376, "grad_norm": 0.07782084494829178, "learning_rate": 1.4271569377559747e-05, "loss": 0.2375, "step": 46205 }, { "epoch": 3.743195074530136, "grad_norm": 0.08021578937768936, "learning_rate": 1.4267068724965122e-05, "loss": 0.2317, "step": 46206 }, { "epoch": 3.7432760855476346, "grad_norm": 0.06510843336582184, "learning_rate": 1.4262568072370494e-05, "loss": 0.1998, "step": 46207 }, { "epoch": 3.743357096565133, "grad_norm": 0.08877057582139969, "learning_rate": 1.4258067419775867e-05, "loss": 0.2367, "step": 46208 }, { "epoch": 3.743438107582631, "grad_norm": 0.06314340233802795, "learning_rate": 1.4253566767181243e-05, "loss": 0.2043, "step": 46209 }, { "epoch": 3.7435191186001298, "grad_norm": 0.06458821147680283, "learning_rate": 1.4249066114586615e-05, "loss": 0.1905, "step": 46210 }, { "epoch": 3.743600129617628, "grad_norm": 0.054313499480485916, "learning_rate": 1.424456546199199e-05, "loss": 0.2063, "step": 46211 }, { "epoch": 3.7436811406351262, "grad_norm": 0.059741366654634476, "learning_rate": 1.4240064809397363e-05, "loss": 0.1971, "step": 46212 }, { "epoch": 3.743762151652625, "grad_norm": 0.07452462613582611, "learning_rate": 1.4235564156802735e-05, "loss": 0.2196, "step": 46213 }, { "epoch": 3.743843162670123, "grad_norm": 0.07014549523591995, "learning_rate": 1.423106350420811e-05, "loss": 0.2512, "step": 46214 }, { "epoch": 3.7439241736876214, "grad_norm": 0.08021154999732971, "learning_rate": 1.4226562851613484e-05, "loss": 0.2251, "step": 46215 }, { "epoch": 3.74400518470512, "grad_norm": 0.06592398136854172, "learning_rate": 1.422206219901886e-05, "loss": 0.2422, "step": 46216 }, { "epoch": 3.7440861957226184, "grad_norm": 0.0847722515463829, "learning_rate": 1.4217561546424233e-05, "loss": 0.2682, "step": 46217 }, { "epoch": 3.7441672067401166, "grad_norm": 0.07960616797208786, "learning_rate": 1.4213060893829605e-05, "loss": 0.2091, "step": 46218 }, { "epoch": 3.7442482177576153, "grad_norm": 0.06076245382428169, "learning_rate": 1.420856024123498e-05, "loss": 0.2446, "step": 46219 }, { "epoch": 3.7443292287751135, "grad_norm": 0.07363170385360718, "learning_rate": 1.4204059588640354e-05, "loss": 0.218, "step": 46220 }, { "epoch": 3.744410239792612, "grad_norm": 0.062311843037605286, "learning_rate": 1.4199558936045726e-05, "loss": 0.2067, "step": 46221 }, { "epoch": 3.74449125081011, "grad_norm": 0.05965553969144821, "learning_rate": 1.4195058283451101e-05, "loss": 0.218, "step": 46222 }, { "epoch": 3.7445722618276087, "grad_norm": 0.08739668875932693, "learning_rate": 1.4190557630856475e-05, "loss": 0.2213, "step": 46223 }, { "epoch": 3.744653272845107, "grad_norm": 0.08150233328342438, "learning_rate": 1.418605697826185e-05, "loss": 0.2578, "step": 46224 }, { "epoch": 3.744734283862605, "grad_norm": 0.09309550374746323, "learning_rate": 1.4181556325667222e-05, "loss": 0.2188, "step": 46225 }, { "epoch": 3.7448152948801035, "grad_norm": 0.0847107321023941, "learning_rate": 1.4177055673072596e-05, "loss": 0.227, "step": 46226 }, { "epoch": 3.744896305897602, "grad_norm": 0.06954275071620941, "learning_rate": 1.4172555020477971e-05, "loss": 0.2429, "step": 46227 }, { "epoch": 3.7449773169151004, "grad_norm": 0.0696917250752449, "learning_rate": 1.4168054367883343e-05, "loss": 0.2489, "step": 46228 }, { "epoch": 3.7450583279325986, "grad_norm": 0.08773817121982574, "learning_rate": 1.4163553715288718e-05, "loss": 0.2053, "step": 46229 }, { "epoch": 3.7451393389500973, "grad_norm": 0.06404940038919449, "learning_rate": 1.4159053062694092e-05, "loss": 0.2722, "step": 46230 }, { "epoch": 3.7452203499675956, "grad_norm": 0.06850332766771317, "learning_rate": 1.4154552410099464e-05, "loss": 0.2227, "step": 46231 }, { "epoch": 3.745301360985094, "grad_norm": 0.06635698676109314, "learning_rate": 1.4150051757504839e-05, "loss": 0.1951, "step": 46232 }, { "epoch": 3.7453823720025925, "grad_norm": 0.0643756315112114, "learning_rate": 1.4145551104910213e-05, "loss": 0.2088, "step": 46233 }, { "epoch": 3.7454633830200907, "grad_norm": 0.08907685428857803, "learning_rate": 1.4141050452315584e-05, "loss": 0.2359, "step": 46234 }, { "epoch": 3.745544394037589, "grad_norm": 0.07513679563999176, "learning_rate": 1.4136549799720961e-05, "loss": 0.1924, "step": 46235 }, { "epoch": 3.7456254050550877, "grad_norm": 0.06546442955732346, "learning_rate": 1.4132049147126333e-05, "loss": 0.2136, "step": 46236 }, { "epoch": 3.745706416072586, "grad_norm": 0.0697525292634964, "learning_rate": 1.4127548494531709e-05, "loss": 0.2123, "step": 46237 }, { "epoch": 3.745787427090084, "grad_norm": 0.07150597870349884, "learning_rate": 1.4123047841937082e-05, "loss": 0.1814, "step": 46238 }, { "epoch": 3.745868438107583, "grad_norm": 0.08265596628189087, "learning_rate": 1.4118547189342454e-05, "loss": 0.2069, "step": 46239 }, { "epoch": 3.745949449125081, "grad_norm": 0.06267092376947403, "learning_rate": 1.411404653674783e-05, "loss": 0.2089, "step": 46240 }, { "epoch": 3.7460304601425793, "grad_norm": 0.06582377105951309, "learning_rate": 1.4109545884153203e-05, "loss": 0.265, "step": 46241 }, { "epoch": 3.746111471160078, "grad_norm": 0.061006009578704834, "learning_rate": 1.4105045231558578e-05, "loss": 0.2243, "step": 46242 }, { "epoch": 3.7461924821775763, "grad_norm": 0.06815444678068161, "learning_rate": 1.410054457896395e-05, "loss": 0.2261, "step": 46243 }, { "epoch": 3.7462734931950745, "grad_norm": 0.06948699802160263, "learning_rate": 1.4096043926369324e-05, "loss": 0.2476, "step": 46244 }, { "epoch": 3.7463545042125728, "grad_norm": 0.06912975758314133, "learning_rate": 1.4091543273774699e-05, "loss": 0.2313, "step": 46245 }, { "epoch": 3.7464355152300715, "grad_norm": 0.06383688002824783, "learning_rate": 1.4087042621180071e-05, "loss": 0.2016, "step": 46246 }, { "epoch": 3.7465165262475697, "grad_norm": 0.08401155471801758, "learning_rate": 1.4082541968585445e-05, "loss": 0.2159, "step": 46247 }, { "epoch": 3.746597537265068, "grad_norm": 0.06746657192707062, "learning_rate": 1.407804131599082e-05, "loss": 0.2241, "step": 46248 }, { "epoch": 3.746678548282566, "grad_norm": 0.08890854567289352, "learning_rate": 1.4073540663396192e-05, "loss": 0.2303, "step": 46249 }, { "epoch": 3.746759559300065, "grad_norm": 0.06370741128921509, "learning_rate": 1.4069040010801567e-05, "loss": 0.2286, "step": 46250 }, { "epoch": 3.746840570317563, "grad_norm": 0.05881880968809128, "learning_rate": 1.406453935820694e-05, "loss": 0.195, "step": 46251 }, { "epoch": 3.7469215813350614, "grad_norm": 0.06718897074460983, "learning_rate": 1.4060038705612313e-05, "loss": 0.2372, "step": 46252 }, { "epoch": 3.74700259235256, "grad_norm": 0.06468194723129272, "learning_rate": 1.405553805301769e-05, "loss": 0.1888, "step": 46253 }, { "epoch": 3.7470836033700583, "grad_norm": 0.06398289650678635, "learning_rate": 1.4051037400423062e-05, "loss": 0.1908, "step": 46254 }, { "epoch": 3.7471646143875565, "grad_norm": 0.06329668313264847, "learning_rate": 1.4046536747828437e-05, "loss": 0.2213, "step": 46255 }, { "epoch": 3.7472456254050552, "grad_norm": 0.07826821506023407, "learning_rate": 1.404203609523381e-05, "loss": 0.1977, "step": 46256 }, { "epoch": 3.7473266364225535, "grad_norm": 0.0786462053656578, "learning_rate": 1.4037535442639182e-05, "loss": 0.2486, "step": 46257 }, { "epoch": 3.7474076474400517, "grad_norm": 0.07066361606121063, "learning_rate": 1.4033034790044558e-05, "loss": 0.2057, "step": 46258 }, { "epoch": 3.7474886584575504, "grad_norm": 0.07975254207849503, "learning_rate": 1.4028534137449931e-05, "loss": 0.2555, "step": 46259 }, { "epoch": 3.7475696694750487, "grad_norm": 0.08178477734327316, "learning_rate": 1.4024033484855303e-05, "loss": 0.2112, "step": 46260 }, { "epoch": 3.747650680492547, "grad_norm": 0.07174079120159149, "learning_rate": 1.4019532832260678e-05, "loss": 0.1987, "step": 46261 }, { "epoch": 3.7477316915100456, "grad_norm": 0.06191396713256836, "learning_rate": 1.4015032179666052e-05, "loss": 0.2372, "step": 46262 }, { "epoch": 3.747812702527544, "grad_norm": 0.0650116503238678, "learning_rate": 1.4010531527071427e-05, "loss": 0.2416, "step": 46263 }, { "epoch": 3.747893713545042, "grad_norm": 0.07233452051877975, "learning_rate": 1.40060308744768e-05, "loss": 0.2519, "step": 46264 }, { "epoch": 3.7479747245625408, "grad_norm": 0.06753069162368774, "learning_rate": 1.4001530221882173e-05, "loss": 0.1924, "step": 46265 }, { "epoch": 3.748055735580039, "grad_norm": 0.08413957059383392, "learning_rate": 1.3997029569287548e-05, "loss": 0.2399, "step": 46266 }, { "epoch": 3.7481367465975373, "grad_norm": 0.07612863928079605, "learning_rate": 1.399252891669292e-05, "loss": 0.1912, "step": 46267 }, { "epoch": 3.7482177576150355, "grad_norm": 0.06556925177574158, "learning_rate": 1.3988028264098297e-05, "loss": 0.2088, "step": 46268 }, { "epoch": 3.7482987686325338, "grad_norm": 0.07448197156190872, "learning_rate": 1.3983527611503669e-05, "loss": 0.2145, "step": 46269 }, { "epoch": 3.7483797796500324, "grad_norm": 0.07443884760141373, "learning_rate": 1.3979026958909041e-05, "loss": 0.1992, "step": 46270 }, { "epoch": 3.7484607906675307, "grad_norm": 0.07081007212400436, "learning_rate": 1.3974526306314418e-05, "loss": 0.2568, "step": 46271 }, { "epoch": 3.748541801685029, "grad_norm": 0.06729474663734436, "learning_rate": 1.397002565371979e-05, "loss": 0.1957, "step": 46272 }, { "epoch": 3.7486228127025276, "grad_norm": 0.06812798231840134, "learning_rate": 1.3965525001125163e-05, "loss": 0.2147, "step": 46273 }, { "epoch": 3.748703823720026, "grad_norm": 0.07089778780937195, "learning_rate": 1.3961024348530539e-05, "loss": 0.2223, "step": 46274 }, { "epoch": 3.748784834737524, "grad_norm": 0.06887844949960709, "learning_rate": 1.395652369593591e-05, "loss": 0.2128, "step": 46275 }, { "epoch": 3.748865845755023, "grad_norm": 0.0692133754491806, "learning_rate": 1.3952023043341286e-05, "loss": 0.2063, "step": 46276 }, { "epoch": 3.748946856772521, "grad_norm": 0.059787359088659286, "learning_rate": 1.394752239074666e-05, "loss": 0.2294, "step": 46277 }, { "epoch": 3.7490278677900193, "grad_norm": 0.055009905248880386, "learning_rate": 1.3943021738152031e-05, "loss": 0.194, "step": 46278 }, { "epoch": 3.749108878807518, "grad_norm": 0.07308180630207062, "learning_rate": 1.3938521085557407e-05, "loss": 0.2311, "step": 46279 }, { "epoch": 3.749189889825016, "grad_norm": 0.0746115893125534, "learning_rate": 1.393402043296278e-05, "loss": 0.247, "step": 46280 }, { "epoch": 3.7492709008425145, "grad_norm": 0.058564692735672, "learning_rate": 1.3929519780368156e-05, "loss": 0.1998, "step": 46281 }, { "epoch": 3.749351911860013, "grad_norm": 0.06961791962385178, "learning_rate": 1.3925019127773528e-05, "loss": 0.2499, "step": 46282 }, { "epoch": 3.7494329228775114, "grad_norm": 0.06499256938695908, "learning_rate": 1.3920518475178901e-05, "loss": 0.213, "step": 46283 }, { "epoch": 3.7495139338950096, "grad_norm": 0.0894896611571312, "learning_rate": 1.3916017822584276e-05, "loss": 0.2478, "step": 46284 }, { "epoch": 3.7495949449125083, "grad_norm": 0.08614595234394073, "learning_rate": 1.3911517169989648e-05, "loss": 0.209, "step": 46285 }, { "epoch": 3.7496759559300066, "grad_norm": 0.06723782420158386, "learning_rate": 1.3907016517395022e-05, "loss": 0.2103, "step": 46286 }, { "epoch": 3.749756966947505, "grad_norm": 0.07532931864261627, "learning_rate": 1.3902515864800397e-05, "loss": 0.2022, "step": 46287 }, { "epoch": 3.7498379779650035, "grad_norm": 0.08021144568920135, "learning_rate": 1.389801521220577e-05, "loss": 0.2217, "step": 46288 }, { "epoch": 3.7499189889825018, "grad_norm": 0.07499111443758011, "learning_rate": 1.3893514559611146e-05, "loss": 0.2636, "step": 46289 }, { "epoch": 3.75, "grad_norm": 0.10113779455423355, "learning_rate": 1.3889013907016518e-05, "loss": 0.2517, "step": 46290 }, { "epoch": 3.7500810110174982, "grad_norm": 0.05415019765496254, "learning_rate": 1.3884513254421892e-05, "loss": 0.2003, "step": 46291 }, { "epoch": 3.7501620220349965, "grad_norm": 0.06942694634199142, "learning_rate": 1.3880012601827267e-05, "loss": 0.2233, "step": 46292 }, { "epoch": 3.750243033052495, "grad_norm": 0.07195769250392914, "learning_rate": 1.3875511949232639e-05, "loss": 0.1826, "step": 46293 }, { "epoch": 3.7503240440699934, "grad_norm": 0.05428317189216614, "learning_rate": 1.3871011296638012e-05, "loss": 0.21, "step": 46294 }, { "epoch": 3.7504050550874917, "grad_norm": 0.050274260342121124, "learning_rate": 1.3866510644043388e-05, "loss": 0.2175, "step": 46295 }, { "epoch": 3.7504860661049904, "grad_norm": 0.06542641669511795, "learning_rate": 1.386200999144876e-05, "loss": 0.2478, "step": 46296 }, { "epoch": 3.7505670771224886, "grad_norm": 0.07026327401399612, "learning_rate": 1.3857509338854135e-05, "loss": 0.2171, "step": 46297 }, { "epoch": 3.750648088139987, "grad_norm": 0.07940451055765152, "learning_rate": 1.3853008686259509e-05, "loss": 0.2378, "step": 46298 }, { "epoch": 3.7507290991574855, "grad_norm": 0.08603038638830185, "learning_rate": 1.384850803366488e-05, "loss": 0.2269, "step": 46299 }, { "epoch": 3.750810110174984, "grad_norm": 0.07071874290704727, "learning_rate": 1.3844007381070256e-05, "loss": 0.2226, "step": 46300 }, { "epoch": 3.750891121192482, "grad_norm": 0.0803099051117897, "learning_rate": 1.383950672847563e-05, "loss": 0.2084, "step": 46301 }, { "epoch": 3.7509721322099807, "grad_norm": 0.07697241753339767, "learning_rate": 1.3835006075881005e-05, "loss": 0.214, "step": 46302 }, { "epoch": 3.751053143227479, "grad_norm": 0.06945455074310303, "learning_rate": 1.3830505423286377e-05, "loss": 0.2187, "step": 46303 }, { "epoch": 3.751134154244977, "grad_norm": 0.0705169215798378, "learning_rate": 1.382600477069175e-05, "loss": 0.2639, "step": 46304 }, { "epoch": 3.751215165262476, "grad_norm": 0.0709058865904808, "learning_rate": 1.3821504118097126e-05, "loss": 0.2424, "step": 46305 }, { "epoch": 3.751296176279974, "grad_norm": 0.0878700241446495, "learning_rate": 1.3817003465502497e-05, "loss": 0.2605, "step": 46306 }, { "epoch": 3.7513771872974724, "grad_norm": 0.0711369663476944, "learning_rate": 1.3812502812907871e-05, "loss": 0.2516, "step": 46307 }, { "epoch": 3.751458198314971, "grad_norm": 0.07304982095956802, "learning_rate": 1.3808002160313246e-05, "loss": 0.2469, "step": 46308 }, { "epoch": 3.7515392093324693, "grad_norm": 0.0640924721956253, "learning_rate": 1.380350150771862e-05, "loss": 0.1904, "step": 46309 }, { "epoch": 3.7516202203499676, "grad_norm": 0.0668100118637085, "learning_rate": 1.3799000855123995e-05, "loss": 0.2016, "step": 46310 }, { "epoch": 3.7517012313674662, "grad_norm": 0.0843009278178215, "learning_rate": 1.3794500202529367e-05, "loss": 0.229, "step": 46311 }, { "epoch": 3.7517822423849645, "grad_norm": 0.07822394371032715, "learning_rate": 1.378999954993474e-05, "loss": 0.2195, "step": 46312 }, { "epoch": 3.7518632534024627, "grad_norm": 0.07780709862709045, "learning_rate": 1.3785498897340116e-05, "loss": 0.2342, "step": 46313 }, { "epoch": 3.751944264419961, "grad_norm": 0.06893333792686462, "learning_rate": 1.3780998244745488e-05, "loss": 0.232, "step": 46314 }, { "epoch": 3.7520252754374592, "grad_norm": 0.06503161042928696, "learning_rate": 1.3776497592150863e-05, "loss": 0.24, "step": 46315 }, { "epoch": 3.752106286454958, "grad_norm": 0.06948009878396988, "learning_rate": 1.3771996939556237e-05, "loss": 0.2304, "step": 46316 }, { "epoch": 3.752187297472456, "grad_norm": 0.07049506157636642, "learning_rate": 1.3767496286961609e-05, "loss": 0.1865, "step": 46317 }, { "epoch": 3.7522683084899544, "grad_norm": 0.07123062759637833, "learning_rate": 1.3762995634366984e-05, "loss": 0.2311, "step": 46318 }, { "epoch": 3.752349319507453, "grad_norm": 0.07314299792051315, "learning_rate": 1.3758494981772358e-05, "loss": 0.236, "step": 46319 }, { "epoch": 3.7524303305249513, "grad_norm": 0.07087218761444092, "learning_rate": 1.375399432917773e-05, "loss": 0.2167, "step": 46320 }, { "epoch": 3.7525113415424496, "grad_norm": 0.09059132635593414, "learning_rate": 1.3749493676583105e-05, "loss": 0.2234, "step": 46321 }, { "epoch": 3.7525923525599483, "grad_norm": 0.06358359009027481, "learning_rate": 1.3744993023988478e-05, "loss": 0.2308, "step": 46322 }, { "epoch": 3.7526733635774465, "grad_norm": 0.06344583630561829, "learning_rate": 1.3740492371393854e-05, "loss": 0.2081, "step": 46323 }, { "epoch": 3.7527543745949448, "grad_norm": 0.0792200043797493, "learning_rate": 1.3735991718799227e-05, "loss": 0.2335, "step": 46324 }, { "epoch": 3.7528353856124435, "grad_norm": 0.06948511302471161, "learning_rate": 1.37314910662046e-05, "loss": 0.2181, "step": 46325 }, { "epoch": 3.7529163966299417, "grad_norm": 0.06803888082504272, "learning_rate": 1.3726990413609975e-05, "loss": 0.2079, "step": 46326 }, { "epoch": 3.75299740764744, "grad_norm": 0.06263738125562668, "learning_rate": 1.3722489761015348e-05, "loss": 0.2381, "step": 46327 }, { "epoch": 3.7530784186649386, "grad_norm": 0.06731061637401581, "learning_rate": 1.3717989108420723e-05, "loss": 0.2008, "step": 46328 }, { "epoch": 3.753159429682437, "grad_norm": 0.07265542447566986, "learning_rate": 1.3713488455826095e-05, "loss": 0.2197, "step": 46329 }, { "epoch": 3.753240440699935, "grad_norm": 0.08350956439971924, "learning_rate": 1.3708987803231469e-05, "loss": 0.2395, "step": 46330 }, { "epoch": 3.753321451717434, "grad_norm": 0.07093897461891174, "learning_rate": 1.3704487150636844e-05, "loss": 0.2328, "step": 46331 }, { "epoch": 3.753402462734932, "grad_norm": 0.10086548328399658, "learning_rate": 1.3699986498042216e-05, "loss": 0.2574, "step": 46332 }, { "epoch": 3.7534834737524303, "grad_norm": 0.06807972490787506, "learning_rate": 1.369548584544759e-05, "loss": 0.2357, "step": 46333 }, { "epoch": 3.7535644847699285, "grad_norm": 0.09186132252216339, "learning_rate": 1.3690985192852965e-05, "loss": 0.2512, "step": 46334 }, { "epoch": 3.7536454957874272, "grad_norm": 0.06752340495586395, "learning_rate": 1.3686484540258337e-05, "loss": 0.212, "step": 46335 }, { "epoch": 3.7537265068049255, "grad_norm": 0.07947950065135956, "learning_rate": 1.3681983887663712e-05, "loss": 0.2415, "step": 46336 }, { "epoch": 3.7538075178224237, "grad_norm": 0.07616253942251205, "learning_rate": 1.3677483235069086e-05, "loss": 0.2219, "step": 46337 }, { "epoch": 3.753888528839922, "grad_norm": 0.074013851583004, "learning_rate": 1.3672982582474458e-05, "loss": 0.2383, "step": 46338 }, { "epoch": 3.7539695398574207, "grad_norm": 0.07347376644611359, "learning_rate": 1.3668481929879833e-05, "loss": 0.1882, "step": 46339 }, { "epoch": 3.754050550874919, "grad_norm": 0.06440410017967224, "learning_rate": 1.3663981277285207e-05, "loss": 0.2126, "step": 46340 }, { "epoch": 3.754131561892417, "grad_norm": 0.061819300055503845, "learning_rate": 1.3659480624690582e-05, "loss": 0.2163, "step": 46341 }, { "epoch": 3.754212572909916, "grad_norm": 0.09030622243881226, "learning_rate": 1.3654979972095956e-05, "loss": 0.2193, "step": 46342 }, { "epoch": 3.754293583927414, "grad_norm": 0.06111063435673714, "learning_rate": 1.3650479319501328e-05, "loss": 0.2516, "step": 46343 }, { "epoch": 3.7543745949449123, "grad_norm": 0.07280724495649338, "learning_rate": 1.3645978666906703e-05, "loss": 0.2058, "step": 46344 }, { "epoch": 3.754455605962411, "grad_norm": 0.0639389231801033, "learning_rate": 1.3641478014312076e-05, "loss": 0.2089, "step": 46345 }, { "epoch": 3.7545366169799093, "grad_norm": 0.08013645559549332, "learning_rate": 1.3636977361717448e-05, "loss": 0.2546, "step": 46346 }, { "epoch": 3.7546176279974075, "grad_norm": 0.08095403015613556, "learning_rate": 1.3632476709122824e-05, "loss": 0.218, "step": 46347 }, { "epoch": 3.754698639014906, "grad_norm": 0.06311283260583878, "learning_rate": 1.3627976056528197e-05, "loss": 0.2036, "step": 46348 }, { "epoch": 3.7547796500324044, "grad_norm": 0.07423945516347885, "learning_rate": 1.3623475403933573e-05, "loss": 0.2177, "step": 46349 }, { "epoch": 3.7548606610499027, "grad_norm": 0.07765256613492966, "learning_rate": 1.3618974751338944e-05, "loss": 0.2528, "step": 46350 }, { "epoch": 3.7549416720674014, "grad_norm": 0.05665789544582367, "learning_rate": 1.3614474098744318e-05, "loss": 0.2193, "step": 46351 }, { "epoch": 3.7550226830848996, "grad_norm": 0.07820606231689453, "learning_rate": 1.3609973446149693e-05, "loss": 0.2629, "step": 46352 }, { "epoch": 3.755103694102398, "grad_norm": 0.06132517755031586, "learning_rate": 1.3605472793555065e-05, "loss": 0.2099, "step": 46353 }, { "epoch": 3.7551847051198965, "grad_norm": 0.07045713812112808, "learning_rate": 1.360097214096044e-05, "loss": 0.2298, "step": 46354 }, { "epoch": 3.755265716137395, "grad_norm": 0.07467834651470184, "learning_rate": 1.3596471488365814e-05, "loss": 0.2029, "step": 46355 }, { "epoch": 3.755346727154893, "grad_norm": 0.07477845996618271, "learning_rate": 1.3591970835771186e-05, "loss": 0.2221, "step": 46356 }, { "epoch": 3.7554277381723913, "grad_norm": 0.06774298846721649, "learning_rate": 1.3587470183176561e-05, "loss": 0.2328, "step": 46357 }, { "epoch": 3.75550874918989, "grad_norm": 0.06271317601203918, "learning_rate": 1.3582969530581935e-05, "loss": 0.2151, "step": 46358 }, { "epoch": 3.755589760207388, "grad_norm": 0.07417146861553192, "learning_rate": 1.3578468877987307e-05, "loss": 0.2502, "step": 46359 }, { "epoch": 3.7556707712248865, "grad_norm": 0.07773631066083908, "learning_rate": 1.3573968225392684e-05, "loss": 0.2285, "step": 46360 }, { "epoch": 3.7557517822423847, "grad_norm": 0.07055094838142395, "learning_rate": 1.3569467572798056e-05, "loss": 0.2339, "step": 46361 }, { "epoch": 3.7558327932598834, "grad_norm": 0.07049763202667236, "learning_rate": 1.3564966920203431e-05, "loss": 0.2265, "step": 46362 }, { "epoch": 3.7559138042773816, "grad_norm": 0.07336530089378357, "learning_rate": 1.3560466267608805e-05, "loss": 0.1979, "step": 46363 }, { "epoch": 3.75599481529488, "grad_norm": 0.0836474746465683, "learning_rate": 1.3555965615014177e-05, "loss": 0.223, "step": 46364 }, { "epoch": 3.7560758263123786, "grad_norm": 0.076979860663414, "learning_rate": 1.3551464962419552e-05, "loss": 0.2199, "step": 46365 }, { "epoch": 3.756156837329877, "grad_norm": 0.05993069335818291, "learning_rate": 1.3546964309824925e-05, "loss": 0.2014, "step": 46366 }, { "epoch": 3.756237848347375, "grad_norm": 0.07212058454751968, "learning_rate": 1.35424636572303e-05, "loss": 0.2198, "step": 46367 }, { "epoch": 3.7563188593648738, "grad_norm": 0.06068550795316696, "learning_rate": 1.3537963004635673e-05, "loss": 0.2511, "step": 46368 }, { "epoch": 3.756399870382372, "grad_norm": 0.0721498504281044, "learning_rate": 1.3533462352041046e-05, "loss": 0.2439, "step": 46369 }, { "epoch": 3.7564808813998702, "grad_norm": 0.07898570597171783, "learning_rate": 1.3528961699446422e-05, "loss": 0.2429, "step": 46370 }, { "epoch": 3.756561892417369, "grad_norm": 0.06375134736299515, "learning_rate": 1.3524461046851793e-05, "loss": 0.2478, "step": 46371 }, { "epoch": 3.756642903434867, "grad_norm": 0.08874838799238205, "learning_rate": 1.3519960394257167e-05, "loss": 0.2353, "step": 46372 }, { "epoch": 3.7567239144523654, "grad_norm": 0.07910864055156708, "learning_rate": 1.3515459741662542e-05, "loss": 0.2272, "step": 46373 }, { "epoch": 3.756804925469864, "grad_norm": 0.07340925931930542, "learning_rate": 1.3510959089067914e-05, "loss": 0.2493, "step": 46374 }, { "epoch": 3.7568859364873624, "grad_norm": 0.0714777335524559, "learning_rate": 1.3506458436473291e-05, "loss": 0.2262, "step": 46375 }, { "epoch": 3.7569669475048606, "grad_norm": 0.08851076662540436, "learning_rate": 1.3501957783878663e-05, "loss": 0.2639, "step": 46376 }, { "epoch": 3.7570479585223593, "grad_norm": 0.06556161493062973, "learning_rate": 1.3497457131284035e-05, "loss": 0.2123, "step": 46377 }, { "epoch": 3.7571289695398575, "grad_norm": 0.0682496726512909, "learning_rate": 1.3492956478689412e-05, "loss": 0.2372, "step": 46378 }, { "epoch": 3.7572099805573558, "grad_norm": 0.07470902800559998, "learning_rate": 1.3488455826094784e-05, "loss": 0.2004, "step": 46379 }, { "epoch": 3.757290991574854, "grad_norm": 0.0711580142378807, "learning_rate": 1.3483955173500158e-05, "loss": 0.2295, "step": 46380 }, { "epoch": 3.7573720025923527, "grad_norm": 0.06948163360357285, "learning_rate": 1.3479454520905533e-05, "loss": 0.2627, "step": 46381 }, { "epoch": 3.757453013609851, "grad_norm": 0.08253484219312668, "learning_rate": 1.3474953868310905e-05, "loss": 0.2362, "step": 46382 }, { "epoch": 3.757534024627349, "grad_norm": 0.0675487071275711, "learning_rate": 1.347045321571628e-05, "loss": 0.2605, "step": 46383 }, { "epoch": 3.7576150356448474, "grad_norm": 0.0675104409456253, "learning_rate": 1.3465952563121654e-05, "loss": 0.1905, "step": 46384 }, { "epoch": 3.757696046662346, "grad_norm": 0.07248274981975555, "learning_rate": 1.3461451910527026e-05, "loss": 0.2062, "step": 46385 }, { "epoch": 3.7577770576798444, "grad_norm": 0.06134706735610962, "learning_rate": 1.3456951257932401e-05, "loss": 0.1993, "step": 46386 }, { "epoch": 3.7578580686973426, "grad_norm": 0.07231275737285614, "learning_rate": 1.3452450605337775e-05, "loss": 0.2197, "step": 46387 }, { "epoch": 3.7579390797148413, "grad_norm": 0.07492779940366745, "learning_rate": 1.344794995274315e-05, "loss": 0.1983, "step": 46388 }, { "epoch": 3.7580200907323396, "grad_norm": 0.05680622532963753, "learning_rate": 1.3443449300148522e-05, "loss": 0.2109, "step": 46389 }, { "epoch": 3.758101101749838, "grad_norm": 0.06529416888952255, "learning_rate": 1.3438948647553895e-05, "loss": 0.2125, "step": 46390 }, { "epoch": 3.7581821127673365, "grad_norm": 0.07710691541433334, "learning_rate": 1.343444799495927e-05, "loss": 0.2367, "step": 46391 }, { "epoch": 3.7582631237848347, "grad_norm": 0.0891771912574768, "learning_rate": 1.3429947342364643e-05, "loss": 0.2411, "step": 46392 }, { "epoch": 3.758344134802333, "grad_norm": 0.07291162014007568, "learning_rate": 1.3425446689770016e-05, "loss": 0.1957, "step": 46393 }, { "epoch": 3.7584251458198317, "grad_norm": 0.07850871235132217, "learning_rate": 1.3420946037175391e-05, "loss": 0.2449, "step": 46394 }, { "epoch": 3.75850615683733, "grad_norm": 0.07013242691755295, "learning_rate": 1.3416445384580763e-05, "loss": 0.256, "step": 46395 }, { "epoch": 3.758587167854828, "grad_norm": 0.09972033649682999, "learning_rate": 1.341194473198614e-05, "loss": 0.1975, "step": 46396 }, { "epoch": 3.758668178872327, "grad_norm": 0.06596890836954117, "learning_rate": 1.3407444079391512e-05, "loss": 0.1988, "step": 46397 }, { "epoch": 3.758749189889825, "grad_norm": 0.07399401068687439, "learning_rate": 1.3402943426796886e-05, "loss": 0.1893, "step": 46398 }, { "epoch": 3.7588302009073233, "grad_norm": 0.06135550141334534, "learning_rate": 1.3398442774202261e-05, "loss": 0.1996, "step": 46399 }, { "epoch": 3.758911211924822, "grad_norm": 0.06000867113471031, "learning_rate": 1.3393942121607633e-05, "loss": 0.1857, "step": 46400 }, { "epoch": 3.7589922229423203, "grad_norm": 0.06590692698955536, "learning_rate": 1.3389441469013008e-05, "loss": 0.2164, "step": 46401 }, { "epoch": 3.7590732339598185, "grad_norm": 0.06405100971460342, "learning_rate": 1.3384940816418382e-05, "loss": 0.1724, "step": 46402 }, { "epoch": 3.7591542449773168, "grad_norm": 0.06549198925495148, "learning_rate": 1.3380440163823754e-05, "loss": 0.2295, "step": 46403 }, { "epoch": 3.7592352559948155, "grad_norm": 0.06221283972263336, "learning_rate": 1.337593951122913e-05, "loss": 0.2328, "step": 46404 }, { "epoch": 3.7593162670123137, "grad_norm": 0.07838263362646103, "learning_rate": 1.3371438858634503e-05, "loss": 0.2247, "step": 46405 }, { "epoch": 3.759397278029812, "grad_norm": 0.07541157305240631, "learning_rate": 1.3366938206039875e-05, "loss": 0.2444, "step": 46406 }, { "epoch": 3.75947828904731, "grad_norm": 0.06589043140411377, "learning_rate": 1.336243755344525e-05, "loss": 0.1966, "step": 46407 }, { "epoch": 3.759559300064809, "grad_norm": 0.06893175840377808, "learning_rate": 1.3357936900850624e-05, "loss": 0.1866, "step": 46408 }, { "epoch": 3.759640311082307, "grad_norm": 0.07315077632665634, "learning_rate": 1.3353436248255999e-05, "loss": 0.2394, "step": 46409 }, { "epoch": 3.7597213220998054, "grad_norm": 0.06642105430364609, "learning_rate": 1.334893559566137e-05, "loss": 0.2194, "step": 46410 }, { "epoch": 3.759802333117304, "grad_norm": 0.06641194224357605, "learning_rate": 1.3344434943066744e-05, "loss": 0.2434, "step": 46411 }, { "epoch": 3.7598833441348023, "grad_norm": 0.07174092531204224, "learning_rate": 1.333993429047212e-05, "loss": 0.2248, "step": 46412 }, { "epoch": 3.7599643551523005, "grad_norm": 0.08386538177728653, "learning_rate": 1.3335433637877492e-05, "loss": 0.2254, "step": 46413 }, { "epoch": 3.7600453661697992, "grad_norm": 0.07093953341245651, "learning_rate": 1.3330932985282869e-05, "loss": 0.2036, "step": 46414 }, { "epoch": 3.7601263771872975, "grad_norm": 0.06664960831403732, "learning_rate": 1.332643233268824e-05, "loss": 0.1924, "step": 46415 }, { "epoch": 3.7602073882047957, "grad_norm": 0.07157678157091141, "learning_rate": 1.3321931680093614e-05, "loss": 0.2119, "step": 46416 }, { "epoch": 3.7602883992222944, "grad_norm": 0.07037477195262909, "learning_rate": 1.331743102749899e-05, "loss": 0.1769, "step": 46417 }, { "epoch": 3.7603694102397927, "grad_norm": 0.07439891993999481, "learning_rate": 1.3312930374904361e-05, "loss": 0.2255, "step": 46418 }, { "epoch": 3.760450421257291, "grad_norm": 0.0761137455701828, "learning_rate": 1.3308429722309735e-05, "loss": 0.2132, "step": 46419 }, { "epoch": 3.7605314322747896, "grad_norm": 0.061975765973329544, "learning_rate": 1.330392906971511e-05, "loss": 0.1905, "step": 46420 }, { "epoch": 3.760612443292288, "grad_norm": 0.06800936907529831, "learning_rate": 1.3299428417120482e-05, "loss": 0.2167, "step": 46421 }, { "epoch": 3.760693454309786, "grad_norm": 0.06478226184844971, "learning_rate": 1.3294927764525857e-05, "loss": 0.2169, "step": 46422 }, { "epoch": 3.7607744653272848, "grad_norm": 0.06802545487880707, "learning_rate": 1.3290427111931231e-05, "loss": 0.1845, "step": 46423 }, { "epoch": 3.760855476344783, "grad_norm": 0.09708104282617569, "learning_rate": 1.3285926459336603e-05, "loss": 0.2462, "step": 46424 }, { "epoch": 3.7609364873622813, "grad_norm": 0.06333111971616745, "learning_rate": 1.3281425806741978e-05, "loss": 0.209, "step": 46425 }, { "epoch": 3.7610174983797795, "grad_norm": 0.07289381325244904, "learning_rate": 1.3276925154147352e-05, "loss": 0.1954, "step": 46426 }, { "epoch": 3.761098509397278, "grad_norm": 0.06891093403100967, "learning_rate": 1.3272424501552727e-05, "loss": 0.216, "step": 46427 }, { "epoch": 3.7611795204147764, "grad_norm": 0.06211341172456741, "learning_rate": 1.3267923848958099e-05, "loss": 0.217, "step": 46428 }, { "epoch": 3.7612605314322747, "grad_norm": 0.07642439752817154, "learning_rate": 1.3263423196363473e-05, "loss": 0.2142, "step": 46429 }, { "epoch": 3.761341542449773, "grad_norm": 0.06167442724108696, "learning_rate": 1.3258922543768848e-05, "loss": 0.2275, "step": 46430 }, { "epoch": 3.7614225534672716, "grad_norm": 0.0749720111489296, "learning_rate": 1.3254421891174222e-05, "loss": 0.2434, "step": 46431 }, { "epoch": 3.76150356448477, "grad_norm": 0.07655184715986252, "learning_rate": 1.3249921238579593e-05, "loss": 0.2593, "step": 46432 }, { "epoch": 3.761584575502268, "grad_norm": 0.07166169583797455, "learning_rate": 1.3245420585984969e-05, "loss": 0.2194, "step": 46433 }, { "epoch": 3.761665586519767, "grad_norm": 0.07488848268985748, "learning_rate": 1.3240919933390342e-05, "loss": 0.228, "step": 46434 }, { "epoch": 3.761746597537265, "grad_norm": 0.07291141152381897, "learning_rate": 1.3236419280795718e-05, "loss": 0.2271, "step": 46435 }, { "epoch": 3.7618276085547633, "grad_norm": 0.06775379180908203, "learning_rate": 1.323191862820109e-05, "loss": 0.2234, "step": 46436 }, { "epoch": 3.761908619572262, "grad_norm": 0.09158001095056534, "learning_rate": 1.3227417975606463e-05, "loss": 0.2341, "step": 46437 }, { "epoch": 3.76198963058976, "grad_norm": 0.07301585376262665, "learning_rate": 1.3222917323011838e-05, "loss": 0.2097, "step": 46438 }, { "epoch": 3.7620706416072585, "grad_norm": 0.06108618527650833, "learning_rate": 1.321841667041721e-05, "loss": 0.2379, "step": 46439 }, { "epoch": 3.762151652624757, "grad_norm": 0.08330275863409042, "learning_rate": 1.3213916017822586e-05, "loss": 0.2056, "step": 46440 }, { "epoch": 3.7622326636422554, "grad_norm": 0.08911581337451935, "learning_rate": 1.320941536522796e-05, "loss": 0.2901, "step": 46441 }, { "epoch": 3.7623136746597536, "grad_norm": 0.0730244368314743, "learning_rate": 1.3204914712633331e-05, "loss": 0.2179, "step": 46442 }, { "epoch": 3.7623946856772523, "grad_norm": 0.08556057512760162, "learning_rate": 1.3200414060038706e-05, "loss": 0.2335, "step": 46443 }, { "epoch": 3.7624756966947506, "grad_norm": 0.07071968913078308, "learning_rate": 1.319591340744408e-05, "loss": 0.2253, "step": 46444 }, { "epoch": 3.762556707712249, "grad_norm": 0.07956349104642868, "learning_rate": 1.3191412754849452e-05, "loss": 0.2367, "step": 46445 }, { "epoch": 3.7626377187297475, "grad_norm": 0.06337764859199524, "learning_rate": 1.3186912102254827e-05, "loss": 0.1947, "step": 46446 }, { "epoch": 3.7627187297472457, "grad_norm": 0.060510363429784775, "learning_rate": 1.3182411449660201e-05, "loss": 0.2314, "step": 46447 }, { "epoch": 3.762799740764744, "grad_norm": 0.062422338873147964, "learning_rate": 1.3177910797065576e-05, "loss": 0.1888, "step": 46448 }, { "epoch": 3.7628807517822422, "grad_norm": 0.07294078171253204, "learning_rate": 1.317341014447095e-05, "loss": 0.2152, "step": 46449 }, { "epoch": 3.762961762799741, "grad_norm": 0.0869693011045456, "learning_rate": 1.3168909491876322e-05, "loss": 0.227, "step": 46450 }, { "epoch": 3.763042773817239, "grad_norm": 0.08141772449016571, "learning_rate": 1.3164408839281697e-05, "loss": 0.2268, "step": 46451 }, { "epoch": 3.7631237848347374, "grad_norm": 0.07432594150304794, "learning_rate": 1.315990818668707e-05, "loss": 0.1958, "step": 46452 }, { "epoch": 3.7632047958522357, "grad_norm": 0.07830096781253815, "learning_rate": 1.3155407534092446e-05, "loss": 0.2131, "step": 46453 }, { "epoch": 3.7632858068697344, "grad_norm": 0.06278984993696213, "learning_rate": 1.3150906881497818e-05, "loss": 0.2175, "step": 46454 }, { "epoch": 3.7633668178872326, "grad_norm": 0.06053311750292778, "learning_rate": 1.3146406228903191e-05, "loss": 0.2049, "step": 46455 }, { "epoch": 3.763447828904731, "grad_norm": 0.0874246209859848, "learning_rate": 1.3141905576308567e-05, "loss": 0.2286, "step": 46456 }, { "epoch": 3.7635288399222295, "grad_norm": 0.05780855566263199, "learning_rate": 1.3137404923713939e-05, "loss": 0.2368, "step": 46457 }, { "epoch": 3.7636098509397278, "grad_norm": 0.057224925607442856, "learning_rate": 1.3132904271119312e-05, "loss": 0.1832, "step": 46458 }, { "epoch": 3.763690861957226, "grad_norm": 0.08486709743738174, "learning_rate": 1.3128403618524688e-05, "loss": 0.1981, "step": 46459 }, { "epoch": 3.7637718729747247, "grad_norm": 0.08651462197303772, "learning_rate": 1.312390296593006e-05, "loss": 0.2557, "step": 46460 }, { "epoch": 3.763852883992223, "grad_norm": 0.05747639387845993, "learning_rate": 1.3119402313335435e-05, "loss": 0.1794, "step": 46461 }, { "epoch": 3.763933895009721, "grad_norm": 0.04803988337516785, "learning_rate": 1.3114901660740808e-05, "loss": 0.2113, "step": 46462 }, { "epoch": 3.76401490602722, "grad_norm": 0.0615316703915596, "learning_rate": 1.311040100814618e-05, "loss": 0.2208, "step": 46463 }, { "epoch": 3.764095917044718, "grad_norm": 0.09276503324508667, "learning_rate": 1.3105900355551556e-05, "loss": 0.2528, "step": 46464 }, { "epoch": 3.7641769280622164, "grad_norm": 0.07706604897975922, "learning_rate": 1.3101399702956929e-05, "loss": 0.2107, "step": 46465 }, { "epoch": 3.764257939079715, "grad_norm": 0.0845157578587532, "learning_rate": 1.3096899050362301e-05, "loss": 0.2054, "step": 46466 }, { "epoch": 3.7643389500972133, "grad_norm": 0.08171258866786957, "learning_rate": 1.3092398397767678e-05, "loss": 0.2411, "step": 46467 }, { "epoch": 3.7644199611147116, "grad_norm": 0.08288677036762238, "learning_rate": 1.308789774517305e-05, "loss": 0.206, "step": 46468 }, { "epoch": 3.7645009721322102, "grad_norm": 0.06667889654636383, "learning_rate": 1.3083397092578425e-05, "loss": 0.2449, "step": 46469 }, { "epoch": 3.7645819831497085, "grad_norm": 0.060472164303064346, "learning_rate": 1.3078896439983799e-05, "loss": 0.2135, "step": 46470 }, { "epoch": 3.7646629941672067, "grad_norm": 0.07328839600086212, "learning_rate": 1.307439578738917e-05, "loss": 0.2034, "step": 46471 }, { "epoch": 3.764744005184705, "grad_norm": 0.06965433806180954, "learning_rate": 1.3069895134794546e-05, "loss": 0.1813, "step": 46472 }, { "epoch": 3.7648250162022032, "grad_norm": 0.08212985843420029, "learning_rate": 1.306539448219992e-05, "loss": 0.2399, "step": 46473 }, { "epoch": 3.764906027219702, "grad_norm": 0.06601189821958542, "learning_rate": 1.3060893829605295e-05, "loss": 0.202, "step": 46474 }, { "epoch": 3.7649870382372, "grad_norm": 0.08131560683250427, "learning_rate": 1.3056393177010667e-05, "loss": 0.2358, "step": 46475 }, { "epoch": 3.7650680492546984, "grad_norm": 0.07608696073293686, "learning_rate": 1.305189252441604e-05, "loss": 0.1951, "step": 46476 }, { "epoch": 3.765149060272197, "grad_norm": 0.07890737056732178, "learning_rate": 1.3047391871821416e-05, "loss": 0.2582, "step": 46477 }, { "epoch": 3.7652300712896953, "grad_norm": 0.09459563344717026, "learning_rate": 1.3042891219226788e-05, "loss": 0.2385, "step": 46478 }, { "epoch": 3.7653110823071936, "grad_norm": 0.06472989171743393, "learning_rate": 1.3038390566632161e-05, "loss": 0.1975, "step": 46479 }, { "epoch": 3.7653920933246923, "grad_norm": 0.10067077726125717, "learning_rate": 1.3033889914037537e-05, "loss": 0.2349, "step": 46480 }, { "epoch": 3.7654731043421905, "grad_norm": 0.07255268096923828, "learning_rate": 1.3029389261442908e-05, "loss": 0.2272, "step": 46481 }, { "epoch": 3.7655541153596888, "grad_norm": 0.06430191546678543, "learning_rate": 1.3024888608848285e-05, "loss": 0.1932, "step": 46482 }, { "epoch": 3.7656351263771874, "grad_norm": 0.08290670067071915, "learning_rate": 1.3020387956253657e-05, "loss": 0.2751, "step": 46483 }, { "epoch": 3.7657161373946857, "grad_norm": 0.0648224726319313, "learning_rate": 1.301588730365903e-05, "loss": 0.2181, "step": 46484 }, { "epoch": 3.765797148412184, "grad_norm": 0.07298358529806137, "learning_rate": 1.3011386651064406e-05, "loss": 0.2469, "step": 46485 }, { "epoch": 3.7658781594296826, "grad_norm": 0.07706646621227264, "learning_rate": 1.3006885998469778e-05, "loss": 0.2174, "step": 46486 }, { "epoch": 3.765959170447181, "grad_norm": 0.07118535041809082, "learning_rate": 1.3002385345875153e-05, "loss": 0.223, "step": 46487 }, { "epoch": 3.766040181464679, "grad_norm": 0.09811242669820786, "learning_rate": 1.2997884693280527e-05, "loss": 0.2232, "step": 46488 }, { "epoch": 3.766121192482178, "grad_norm": 0.0811871886253357, "learning_rate": 1.2993384040685899e-05, "loss": 0.2519, "step": 46489 }, { "epoch": 3.766202203499676, "grad_norm": 0.09767928719520569, "learning_rate": 1.2988883388091274e-05, "loss": 0.2349, "step": 46490 }, { "epoch": 3.7662832145171743, "grad_norm": 0.07688478380441666, "learning_rate": 1.2984382735496648e-05, "loss": 0.2133, "step": 46491 }, { "epoch": 3.766364225534673, "grad_norm": 0.0702710896730423, "learning_rate": 1.297988208290202e-05, "loss": 0.1993, "step": 46492 }, { "epoch": 3.7664452365521712, "grad_norm": 0.0628383606672287, "learning_rate": 1.2975381430307395e-05, "loss": 0.197, "step": 46493 }, { "epoch": 3.7665262475696695, "grad_norm": 0.06867960095405579, "learning_rate": 1.2970880777712769e-05, "loss": 0.2463, "step": 46494 }, { "epoch": 3.7666072585871677, "grad_norm": 0.08608069270849228, "learning_rate": 1.2966380125118144e-05, "loss": 0.224, "step": 46495 }, { "epoch": 3.766688269604666, "grad_norm": 0.06254792213439941, "learning_rate": 1.2961879472523516e-05, "loss": 0.2338, "step": 46496 }, { "epoch": 3.7667692806221647, "grad_norm": 0.0764245092868805, "learning_rate": 1.295737881992889e-05, "loss": 0.1998, "step": 46497 }, { "epoch": 3.766850291639663, "grad_norm": 0.08328287303447723, "learning_rate": 1.2952878167334265e-05, "loss": 0.226, "step": 46498 }, { "epoch": 3.766931302657161, "grad_norm": 0.08136755973100662, "learning_rate": 1.2948377514739637e-05, "loss": 0.2651, "step": 46499 }, { "epoch": 3.76701231367466, "grad_norm": 0.08298899978399277, "learning_rate": 1.2943876862145014e-05, "loss": 0.2291, "step": 46500 }, { "epoch": 3.767093324692158, "grad_norm": 0.06559506058692932, "learning_rate": 1.2939376209550386e-05, "loss": 0.2238, "step": 46501 }, { "epoch": 3.7671743357096563, "grad_norm": 0.08516070246696472, "learning_rate": 1.2934875556955758e-05, "loss": 0.2544, "step": 46502 }, { "epoch": 3.767255346727155, "grad_norm": 0.06356891244649887, "learning_rate": 1.2930374904361135e-05, "loss": 0.2192, "step": 46503 }, { "epoch": 3.7673363577446533, "grad_norm": 0.07620159536600113, "learning_rate": 1.2925874251766506e-05, "loss": 0.2559, "step": 46504 }, { "epoch": 3.7674173687621515, "grad_norm": 0.06729257851839066, "learning_rate": 1.292137359917188e-05, "loss": 0.1718, "step": 46505 }, { "epoch": 3.76749837977965, "grad_norm": 0.0670362114906311, "learning_rate": 1.2916872946577255e-05, "loss": 0.1767, "step": 46506 }, { "epoch": 3.7675793907971484, "grad_norm": 0.07508490234613419, "learning_rate": 1.2912372293982627e-05, "loss": 0.2409, "step": 46507 }, { "epoch": 3.7676604018146467, "grad_norm": 0.07815518975257874, "learning_rate": 1.2907871641388003e-05, "loss": 0.2148, "step": 46508 }, { "epoch": 3.7677414128321454, "grad_norm": 0.07622785121202469, "learning_rate": 1.2903370988793376e-05, "loss": 0.2404, "step": 46509 }, { "epoch": 3.7678224238496436, "grad_norm": 0.07081098109483719, "learning_rate": 1.2898870336198748e-05, "loss": 0.1935, "step": 46510 }, { "epoch": 3.767903434867142, "grad_norm": 0.0799146220088005, "learning_rate": 1.2894369683604123e-05, "loss": 0.2275, "step": 46511 }, { "epoch": 3.7679844458846405, "grad_norm": 0.08372996002435684, "learning_rate": 1.2889869031009497e-05, "loss": 0.2283, "step": 46512 }, { "epoch": 3.768065456902139, "grad_norm": 0.07471054792404175, "learning_rate": 1.2885368378414872e-05, "loss": 0.2373, "step": 46513 }, { "epoch": 3.768146467919637, "grad_norm": 0.07189594954252243, "learning_rate": 1.2880867725820244e-05, "loss": 0.1982, "step": 46514 }, { "epoch": 3.7682274789371357, "grad_norm": 0.07205279171466827, "learning_rate": 1.2876367073225618e-05, "loss": 0.2298, "step": 46515 }, { "epoch": 3.768308489954634, "grad_norm": 0.06825879216194153, "learning_rate": 1.2871866420630993e-05, "loss": 0.2154, "step": 46516 }, { "epoch": 3.768389500972132, "grad_norm": 0.06254030764102936, "learning_rate": 1.2867365768036365e-05, "loss": 0.2073, "step": 46517 }, { "epoch": 3.7684705119896305, "grad_norm": 0.06677734106779099, "learning_rate": 1.2862865115441739e-05, "loss": 0.1922, "step": 46518 }, { "epoch": 3.7685515230071287, "grad_norm": 0.06440742313861847, "learning_rate": 1.2858364462847114e-05, "loss": 0.2021, "step": 46519 }, { "epoch": 3.7686325340246274, "grad_norm": 0.08662337809801102, "learning_rate": 1.2853863810252487e-05, "loss": 0.2315, "step": 46520 }, { "epoch": 3.7687135450421256, "grad_norm": 0.06901124119758606, "learning_rate": 1.2849363157657863e-05, "loss": 0.1859, "step": 46521 }, { "epoch": 3.768794556059624, "grad_norm": 0.08281880617141724, "learning_rate": 1.2844862505063235e-05, "loss": 0.2549, "step": 46522 }, { "epoch": 3.7688755670771226, "grad_norm": 0.07393230497837067, "learning_rate": 1.2840361852468608e-05, "loss": 0.2464, "step": 46523 }, { "epoch": 3.768956578094621, "grad_norm": 0.07908966392278671, "learning_rate": 1.2835861199873984e-05, "loss": 0.2262, "step": 46524 }, { "epoch": 3.769037589112119, "grad_norm": 0.08938897401094437, "learning_rate": 1.2831360547279356e-05, "loss": 0.2342, "step": 46525 }, { "epoch": 3.7691186001296177, "grad_norm": 0.0999879539012909, "learning_rate": 1.282685989468473e-05, "loss": 0.2072, "step": 46526 }, { "epoch": 3.769199611147116, "grad_norm": 0.05360986292362213, "learning_rate": 1.2822359242090104e-05, "loss": 0.1837, "step": 46527 }, { "epoch": 3.7692806221646142, "grad_norm": 0.06904050707817078, "learning_rate": 1.2817858589495476e-05, "loss": 0.2043, "step": 46528 }, { "epoch": 3.769361633182113, "grad_norm": 0.0860978439450264, "learning_rate": 1.2813357936900852e-05, "loss": 0.2197, "step": 46529 }, { "epoch": 3.769442644199611, "grad_norm": 0.0657324492931366, "learning_rate": 1.2808857284306225e-05, "loss": 0.2159, "step": 46530 }, { "epoch": 3.7695236552171094, "grad_norm": 0.06086688116192818, "learning_rate": 1.2804356631711597e-05, "loss": 0.214, "step": 46531 }, { "epoch": 3.769604666234608, "grad_norm": 0.07376797497272491, "learning_rate": 1.2799855979116972e-05, "loss": 0.249, "step": 46532 }, { "epoch": 3.7696856772521063, "grad_norm": 0.0695323571562767, "learning_rate": 1.2795355326522346e-05, "loss": 0.2095, "step": 46533 }, { "epoch": 3.7697666882696046, "grad_norm": 0.0635128989815712, "learning_rate": 1.2790854673927721e-05, "loss": 0.1936, "step": 46534 }, { "epoch": 3.7698476992871033, "grad_norm": 0.07343027740716934, "learning_rate": 1.2786354021333093e-05, "loss": 0.2146, "step": 46535 }, { "epoch": 3.7699287103046015, "grad_norm": 0.06408240646123886, "learning_rate": 1.2781853368738467e-05, "loss": 0.2182, "step": 46536 }, { "epoch": 3.7700097213220998, "grad_norm": 0.08215359598398209, "learning_rate": 1.2777352716143842e-05, "loss": 0.2187, "step": 46537 }, { "epoch": 3.7700907323395985, "grad_norm": 0.07449215650558472, "learning_rate": 1.2772852063549216e-05, "loss": 0.2312, "step": 46538 }, { "epoch": 3.7701717433570967, "grad_norm": 0.07269278913736343, "learning_rate": 1.2768351410954588e-05, "loss": 0.2118, "step": 46539 }, { "epoch": 3.770252754374595, "grad_norm": 0.0680905431509018, "learning_rate": 1.2763850758359963e-05, "loss": 0.1955, "step": 46540 }, { "epoch": 3.770333765392093, "grad_norm": 0.06702621281147003, "learning_rate": 1.2759350105765337e-05, "loss": 0.2244, "step": 46541 }, { "epoch": 3.7704147764095914, "grad_norm": 0.09051982313394547, "learning_rate": 1.2754849453170712e-05, "loss": 0.2369, "step": 46542 }, { "epoch": 3.77049578742709, "grad_norm": 0.07590014487504959, "learning_rate": 1.2750348800576084e-05, "loss": 0.2358, "step": 46543 }, { "epoch": 3.7705767984445884, "grad_norm": 0.0708058774471283, "learning_rate": 1.2745848147981457e-05, "loss": 0.2117, "step": 46544 }, { "epoch": 3.7706578094620866, "grad_norm": 0.0713592991232872, "learning_rate": 1.2741347495386833e-05, "loss": 0.2053, "step": 46545 }, { "epoch": 3.7707388204795853, "grad_norm": 0.07061046361923218, "learning_rate": 1.2736846842792205e-05, "loss": 0.2271, "step": 46546 }, { "epoch": 3.7708198314970836, "grad_norm": 0.08287011831998825, "learning_rate": 1.273234619019758e-05, "loss": 0.2432, "step": 46547 }, { "epoch": 3.770900842514582, "grad_norm": 0.07770534604787827, "learning_rate": 1.2727845537602953e-05, "loss": 0.2462, "step": 46548 }, { "epoch": 3.7709818535320805, "grad_norm": 0.07193398475646973, "learning_rate": 1.2723344885008325e-05, "loss": 0.218, "step": 46549 }, { "epoch": 3.7710628645495787, "grad_norm": 0.06820261478424072, "learning_rate": 1.27188442324137e-05, "loss": 0.236, "step": 46550 }, { "epoch": 3.771143875567077, "grad_norm": 0.07764853537082672, "learning_rate": 1.2714343579819074e-05, "loss": 0.2336, "step": 46551 }, { "epoch": 3.7712248865845757, "grad_norm": 0.07523754239082336, "learning_rate": 1.2709842927224446e-05, "loss": 0.2062, "step": 46552 }, { "epoch": 3.771305897602074, "grad_norm": 0.07026119530200958, "learning_rate": 1.2705342274629821e-05, "loss": 0.2321, "step": 46553 }, { "epoch": 3.771386908619572, "grad_norm": 0.06876692920923233, "learning_rate": 1.2700841622035195e-05, "loss": 0.2054, "step": 46554 }, { "epoch": 3.771467919637071, "grad_norm": 0.06335452198982239, "learning_rate": 1.269634096944057e-05, "loss": 0.1917, "step": 46555 }, { "epoch": 3.771548930654569, "grad_norm": 0.07451608031988144, "learning_rate": 1.2691840316845944e-05, "loss": 0.255, "step": 46556 }, { "epoch": 3.7716299416720673, "grad_norm": 0.07577665895223618, "learning_rate": 1.2687339664251316e-05, "loss": 0.2117, "step": 46557 }, { "epoch": 3.771710952689566, "grad_norm": 0.07046057283878326, "learning_rate": 1.2682839011656691e-05, "loss": 0.2154, "step": 46558 }, { "epoch": 3.7717919637070643, "grad_norm": 0.09273316711187363, "learning_rate": 1.2678338359062065e-05, "loss": 0.3072, "step": 46559 }, { "epoch": 3.7718729747245625, "grad_norm": 0.07682998478412628, "learning_rate": 1.267383770646744e-05, "loss": 0.1781, "step": 46560 }, { "epoch": 3.7719539857420608, "grad_norm": 0.0753307119011879, "learning_rate": 1.2669337053872812e-05, "loss": 0.2, "step": 46561 }, { "epoch": 3.7720349967595594, "grad_norm": 0.06994988024234772, "learning_rate": 1.2664836401278186e-05, "loss": 0.2132, "step": 46562 }, { "epoch": 3.7721160077770577, "grad_norm": 0.08743558824062347, "learning_rate": 1.2660335748683561e-05, "loss": 0.2153, "step": 46563 }, { "epoch": 3.772197018794556, "grad_norm": 0.06563448905944824, "learning_rate": 1.2655835096088933e-05, "loss": 0.2165, "step": 46564 }, { "epoch": 3.772278029812054, "grad_norm": 0.0646919459104538, "learning_rate": 1.2651334443494306e-05, "loss": 0.1949, "step": 46565 }, { "epoch": 3.772359040829553, "grad_norm": 0.06894893944263458, "learning_rate": 1.2646833790899682e-05, "loss": 0.2411, "step": 46566 }, { "epoch": 3.772440051847051, "grad_norm": 0.06888721883296967, "learning_rate": 1.2642333138305054e-05, "loss": 0.2475, "step": 46567 }, { "epoch": 3.7725210628645494, "grad_norm": 0.0673423707485199, "learning_rate": 1.2637832485710429e-05, "loss": 0.2775, "step": 46568 }, { "epoch": 3.772602073882048, "grad_norm": 0.06546924263238907, "learning_rate": 1.2633331833115803e-05, "loss": 0.248, "step": 46569 }, { "epoch": 3.7726830848995463, "grad_norm": 0.06502770632505417, "learning_rate": 1.2628831180521174e-05, "loss": 0.2148, "step": 46570 }, { "epoch": 3.7727640959170445, "grad_norm": 0.07278035581111908, "learning_rate": 1.2624330527926551e-05, "loss": 0.2191, "step": 46571 }, { "epoch": 3.7728451069345432, "grad_norm": 0.0673990324139595, "learning_rate": 1.2619829875331923e-05, "loss": 0.2305, "step": 46572 }, { "epoch": 3.7729261179520415, "grad_norm": 0.07896624505519867, "learning_rate": 1.2615329222737299e-05, "loss": 0.2684, "step": 46573 }, { "epoch": 3.7730071289695397, "grad_norm": 0.06549516320228577, "learning_rate": 1.2610828570142672e-05, "loss": 0.2069, "step": 46574 }, { "epoch": 3.7730881399870384, "grad_norm": 0.06927746534347534, "learning_rate": 1.2606327917548044e-05, "loss": 0.1988, "step": 46575 }, { "epoch": 3.7731691510045366, "grad_norm": 0.07695776224136353, "learning_rate": 1.260182726495342e-05, "loss": 0.2593, "step": 46576 }, { "epoch": 3.773250162022035, "grad_norm": 0.0668836161494255, "learning_rate": 1.2597326612358793e-05, "loss": 0.2381, "step": 46577 }, { "epoch": 3.7733311730395336, "grad_norm": 0.0662456750869751, "learning_rate": 1.2592825959764165e-05, "loss": 0.2232, "step": 46578 }, { "epoch": 3.773412184057032, "grad_norm": 0.07020401209592819, "learning_rate": 1.258832530716954e-05, "loss": 0.2331, "step": 46579 }, { "epoch": 3.77349319507453, "grad_norm": 0.07420952618122101, "learning_rate": 1.2583824654574914e-05, "loss": 0.2642, "step": 46580 }, { "epoch": 3.7735742060920288, "grad_norm": 0.0619262158870697, "learning_rate": 1.2579324001980289e-05, "loss": 0.2069, "step": 46581 }, { "epoch": 3.773655217109527, "grad_norm": 0.06421462446451187, "learning_rate": 1.2574823349385661e-05, "loss": 0.1812, "step": 46582 }, { "epoch": 3.7737362281270252, "grad_norm": 0.06833804398775101, "learning_rate": 1.2570322696791035e-05, "loss": 0.1944, "step": 46583 }, { "epoch": 3.7738172391445235, "grad_norm": 0.08516917377710342, "learning_rate": 1.256582204419641e-05, "loss": 0.2427, "step": 46584 }, { "epoch": 3.773898250162022, "grad_norm": 0.08085508644580841, "learning_rate": 1.2561321391601782e-05, "loss": 0.232, "step": 46585 }, { "epoch": 3.7739792611795204, "grad_norm": 0.07463680952787399, "learning_rate": 1.2556820739007157e-05, "loss": 0.2403, "step": 46586 }, { "epoch": 3.7740602721970187, "grad_norm": 0.07635892182588577, "learning_rate": 1.255232008641253e-05, "loss": 0.2527, "step": 46587 }, { "epoch": 3.774141283214517, "grad_norm": 0.07656805962324142, "learning_rate": 1.2547819433817903e-05, "loss": 0.2201, "step": 46588 }, { "epoch": 3.7742222942320156, "grad_norm": 0.08051590621471405, "learning_rate": 1.254331878122328e-05, "loss": 0.2406, "step": 46589 }, { "epoch": 3.774303305249514, "grad_norm": 0.08109819889068604, "learning_rate": 1.2538818128628652e-05, "loss": 0.2175, "step": 46590 }, { "epoch": 3.774384316267012, "grad_norm": 0.07000181823968887, "learning_rate": 1.2534317476034023e-05, "loss": 0.1995, "step": 46591 }, { "epoch": 3.774465327284511, "grad_norm": 0.08554103225469589, "learning_rate": 1.25298168234394e-05, "loss": 0.2115, "step": 46592 }, { "epoch": 3.774546338302009, "grad_norm": 0.06602445989847183, "learning_rate": 1.2525316170844772e-05, "loss": 0.2711, "step": 46593 }, { "epoch": 3.7746273493195073, "grad_norm": 0.06864854693412781, "learning_rate": 1.2520815518250148e-05, "loss": 0.2129, "step": 46594 }, { "epoch": 3.774708360337006, "grad_norm": 0.06876920908689499, "learning_rate": 1.2516314865655521e-05, "loss": 0.2075, "step": 46595 }, { "epoch": 3.774789371354504, "grad_norm": 0.0704292431473732, "learning_rate": 1.2511814213060893e-05, "loss": 0.2022, "step": 46596 }, { "epoch": 3.7748703823720025, "grad_norm": 0.06541324406862259, "learning_rate": 1.2507313560466268e-05, "loss": 0.2333, "step": 46597 }, { "epoch": 3.774951393389501, "grad_norm": 0.08963797986507416, "learning_rate": 1.2502812907871642e-05, "loss": 0.2533, "step": 46598 }, { "epoch": 3.7750324044069994, "grad_norm": 0.0644608810544014, "learning_rate": 1.2498312255277016e-05, "loss": 0.2091, "step": 46599 }, { "epoch": 3.7751134154244976, "grad_norm": 0.07457215338945389, "learning_rate": 1.249381160268239e-05, "loss": 0.2618, "step": 46600 }, { "epoch": 3.7751944264419963, "grad_norm": 0.06820184737443924, "learning_rate": 1.2489310950087765e-05, "loss": 0.2127, "step": 46601 }, { "epoch": 3.7752754374594946, "grad_norm": 0.060140807181596756, "learning_rate": 1.2484810297493137e-05, "loss": 0.2029, "step": 46602 }, { "epoch": 3.775356448476993, "grad_norm": 0.05878528580069542, "learning_rate": 1.248030964489851e-05, "loss": 0.2461, "step": 46603 }, { "epoch": 3.7754374594944915, "grad_norm": 0.0732777863740921, "learning_rate": 1.2475808992303885e-05, "loss": 0.2469, "step": 46604 }, { "epoch": 3.7755184705119897, "grad_norm": 0.07616322487592697, "learning_rate": 1.2471308339709259e-05, "loss": 0.2513, "step": 46605 }, { "epoch": 3.775599481529488, "grad_norm": 0.07453019171953201, "learning_rate": 1.2466807687114631e-05, "loss": 0.2097, "step": 46606 }, { "epoch": 3.7756804925469862, "grad_norm": 0.07242731750011444, "learning_rate": 1.2462307034520006e-05, "loss": 0.2427, "step": 46607 }, { "epoch": 3.775761503564485, "grad_norm": 0.07155609875917435, "learning_rate": 1.245780638192538e-05, "loss": 0.236, "step": 46608 }, { "epoch": 3.775842514581983, "grad_norm": 0.07473588734865189, "learning_rate": 1.2453305729330753e-05, "loss": 0.2572, "step": 46609 }, { "epoch": 3.7759235255994814, "grad_norm": 0.06979592144489288, "learning_rate": 1.2448805076736127e-05, "loss": 0.2232, "step": 46610 }, { "epoch": 3.7760045366169797, "grad_norm": 0.0813089907169342, "learning_rate": 1.24443044241415e-05, "loss": 0.2052, "step": 46611 }, { "epoch": 3.7760855476344783, "grad_norm": 0.08803302049636841, "learning_rate": 1.2439803771546874e-05, "loss": 0.2069, "step": 46612 }, { "epoch": 3.7761665586519766, "grad_norm": 0.06549622863531113, "learning_rate": 1.243530311895225e-05, "loss": 0.2477, "step": 46613 }, { "epoch": 3.776247569669475, "grad_norm": 0.07154756784439087, "learning_rate": 1.2430802466357623e-05, "loss": 0.2317, "step": 46614 }, { "epoch": 3.7763285806869735, "grad_norm": 0.06701797246932983, "learning_rate": 1.2426301813762995e-05, "loss": 0.211, "step": 46615 }, { "epoch": 3.7764095917044718, "grad_norm": 0.06661625951528549, "learning_rate": 1.242180116116837e-05, "loss": 0.2281, "step": 46616 }, { "epoch": 3.77649060272197, "grad_norm": 0.06580673903226852, "learning_rate": 1.2417300508573744e-05, "loss": 0.2164, "step": 46617 }, { "epoch": 3.7765716137394687, "grad_norm": 0.0783298909664154, "learning_rate": 1.2412799855979118e-05, "loss": 0.2132, "step": 46618 }, { "epoch": 3.776652624756967, "grad_norm": 0.10125415027141571, "learning_rate": 1.2408299203384491e-05, "loss": 0.2638, "step": 46619 }, { "epoch": 3.776733635774465, "grad_norm": 0.06981062144041061, "learning_rate": 1.2403798550789865e-05, "loss": 0.1883, "step": 46620 }, { "epoch": 3.776814646791964, "grad_norm": 0.06791463494300842, "learning_rate": 1.2399297898195238e-05, "loss": 0.2145, "step": 46621 }, { "epoch": 3.776895657809462, "grad_norm": 0.06609434634447098, "learning_rate": 1.2394797245600614e-05, "loss": 0.2248, "step": 46622 }, { "epoch": 3.7769766688269604, "grad_norm": 0.065769724547863, "learning_rate": 1.2390296593005986e-05, "loss": 0.2087, "step": 46623 }, { "epoch": 3.777057679844459, "grad_norm": 0.07486658543348312, "learning_rate": 1.238579594041136e-05, "loss": 0.2623, "step": 46624 }, { "epoch": 3.7771386908619573, "grad_norm": 0.06038758158683777, "learning_rate": 1.2381295287816734e-05, "loss": 0.1932, "step": 46625 }, { "epoch": 3.7772197018794555, "grad_norm": 0.07746127247810364, "learning_rate": 1.2376794635222108e-05, "loss": 0.2262, "step": 46626 }, { "epoch": 3.7773007128969542, "grad_norm": 0.07121626287698746, "learning_rate": 1.2372293982627482e-05, "loss": 0.2153, "step": 46627 }, { "epoch": 3.7773817239144525, "grad_norm": 0.07847585529088974, "learning_rate": 1.2367793330032855e-05, "loss": 0.1917, "step": 46628 }, { "epoch": 3.7774627349319507, "grad_norm": 0.0689721554517746, "learning_rate": 1.2363292677438229e-05, "loss": 0.2269, "step": 46629 }, { "epoch": 3.777543745949449, "grad_norm": 0.08667279779911041, "learning_rate": 1.2358792024843602e-05, "loss": 0.222, "step": 46630 }, { "epoch": 3.7776247569669477, "grad_norm": 0.06442601978778839, "learning_rate": 1.2354291372248978e-05, "loss": 0.2319, "step": 46631 }, { "epoch": 3.777705767984446, "grad_norm": 0.0721411257982254, "learning_rate": 1.234979071965435e-05, "loss": 0.1926, "step": 46632 }, { "epoch": 3.777786779001944, "grad_norm": 0.06399735063314438, "learning_rate": 1.2345290067059723e-05, "loss": 0.1966, "step": 46633 }, { "epoch": 3.7778677900194424, "grad_norm": 0.0750143975019455, "learning_rate": 1.2340789414465099e-05, "loss": 0.2406, "step": 46634 }, { "epoch": 3.777948801036941, "grad_norm": 0.08481192588806152, "learning_rate": 1.2336288761870472e-05, "loss": 0.2353, "step": 46635 }, { "epoch": 3.7780298120544393, "grad_norm": 0.060849450528621674, "learning_rate": 1.2331788109275846e-05, "loss": 0.2453, "step": 46636 }, { "epoch": 3.7781108230719376, "grad_norm": 0.06765329837799072, "learning_rate": 1.232728745668122e-05, "loss": 0.1992, "step": 46637 }, { "epoch": 3.7781918340894363, "grad_norm": 0.07488850504159927, "learning_rate": 1.2322786804086593e-05, "loss": 0.2427, "step": 46638 }, { "epoch": 3.7782728451069345, "grad_norm": 0.07653640955686569, "learning_rate": 1.2318286151491967e-05, "loss": 0.2297, "step": 46639 }, { "epoch": 3.7783538561244328, "grad_norm": 0.07188255339860916, "learning_rate": 1.231378549889734e-05, "loss": 0.2335, "step": 46640 }, { "epoch": 3.7784348671419314, "grad_norm": 0.07283948361873627, "learning_rate": 1.2309284846302714e-05, "loss": 0.2368, "step": 46641 }, { "epoch": 3.7785158781594297, "grad_norm": 0.056375570595264435, "learning_rate": 1.2304784193708087e-05, "loss": 0.1894, "step": 46642 }, { "epoch": 3.778596889176928, "grad_norm": 0.07126890867948532, "learning_rate": 1.2300283541113463e-05, "loss": 0.2171, "step": 46643 }, { "epoch": 3.7786779001944266, "grad_norm": 0.07158375531435013, "learning_rate": 1.2295782888518836e-05, "loss": 0.1882, "step": 46644 }, { "epoch": 3.778758911211925, "grad_norm": 0.072811059653759, "learning_rate": 1.229128223592421e-05, "loss": 0.2374, "step": 46645 }, { "epoch": 3.778839922229423, "grad_norm": 0.06893569976091385, "learning_rate": 1.2286781583329584e-05, "loss": 0.2205, "step": 46646 }, { "epoch": 3.778920933246922, "grad_norm": 0.07594628632068634, "learning_rate": 1.2282280930734957e-05, "loss": 0.2129, "step": 46647 }, { "epoch": 3.77900194426442, "grad_norm": 0.06287289410829544, "learning_rate": 1.227778027814033e-05, "loss": 0.2281, "step": 46648 }, { "epoch": 3.7790829552819183, "grad_norm": 0.07265198230743408, "learning_rate": 1.2273279625545704e-05, "loss": 0.2196, "step": 46649 }, { "epoch": 3.779163966299417, "grad_norm": 0.05850711092352867, "learning_rate": 1.2268778972951078e-05, "loss": 0.2111, "step": 46650 }, { "epoch": 3.779244977316915, "grad_norm": 0.06421723961830139, "learning_rate": 1.2264278320356452e-05, "loss": 0.2301, "step": 46651 }, { "epoch": 3.7793259883344135, "grad_norm": 0.06834607571363449, "learning_rate": 1.2259777667761827e-05, "loss": 0.2464, "step": 46652 }, { "epoch": 3.7794069993519117, "grad_norm": 0.06908293813467026, "learning_rate": 1.2255277015167199e-05, "loss": 0.1841, "step": 46653 }, { "epoch": 3.7794880103694104, "grad_norm": 0.06474535912275314, "learning_rate": 1.2250776362572574e-05, "loss": 0.1876, "step": 46654 }, { "epoch": 3.7795690213869086, "grad_norm": 0.07207901775836945, "learning_rate": 1.2246275709977948e-05, "loss": 0.2289, "step": 46655 }, { "epoch": 3.779650032404407, "grad_norm": 0.05818149074912071, "learning_rate": 1.2241775057383321e-05, "loss": 0.1996, "step": 46656 }, { "epoch": 3.779731043421905, "grad_norm": 0.0827532559633255, "learning_rate": 1.2237274404788695e-05, "loss": 0.2101, "step": 46657 }, { "epoch": 3.779812054439404, "grad_norm": 0.09072255343198776, "learning_rate": 1.2232773752194068e-05, "loss": 0.256, "step": 46658 }, { "epoch": 3.779893065456902, "grad_norm": 0.07674512267112732, "learning_rate": 1.2228273099599442e-05, "loss": 0.2505, "step": 46659 }, { "epoch": 3.7799740764744003, "grad_norm": 0.060501959174871445, "learning_rate": 1.2223772447004816e-05, "loss": 0.2001, "step": 46660 }, { "epoch": 3.780055087491899, "grad_norm": 0.06538248807191849, "learning_rate": 1.2219271794410191e-05, "loss": 0.2453, "step": 46661 }, { "epoch": 3.7801360985093972, "grad_norm": 0.07560870796442032, "learning_rate": 1.2214771141815563e-05, "loss": 0.1887, "step": 46662 }, { "epoch": 3.7802171095268955, "grad_norm": 0.08395927399396896, "learning_rate": 1.2210270489220938e-05, "loss": 0.1986, "step": 46663 }, { "epoch": 3.780298120544394, "grad_norm": 0.05529021471738815, "learning_rate": 1.2205769836626312e-05, "loss": 0.1969, "step": 46664 }, { "epoch": 3.7803791315618924, "grad_norm": 0.09511775523424149, "learning_rate": 1.2201269184031685e-05, "loss": 0.233, "step": 46665 }, { "epoch": 3.7804601425793907, "grad_norm": 0.06025183945894241, "learning_rate": 1.2196768531437059e-05, "loss": 0.2091, "step": 46666 }, { "epoch": 3.7805411535968894, "grad_norm": 0.07725583016872406, "learning_rate": 1.2192267878842433e-05, "loss": 0.2462, "step": 46667 }, { "epoch": 3.7806221646143876, "grad_norm": 0.07531146705150604, "learning_rate": 1.2187767226247806e-05, "loss": 0.1891, "step": 46668 }, { "epoch": 3.780703175631886, "grad_norm": 0.06599050760269165, "learning_rate": 1.218326657365318e-05, "loss": 0.2141, "step": 46669 }, { "epoch": 3.7807841866493845, "grad_norm": 0.08106547594070435, "learning_rate": 1.2178765921058555e-05, "loss": 0.2459, "step": 46670 }, { "epoch": 3.780865197666883, "grad_norm": 0.07727594673633575, "learning_rate": 1.2174265268463927e-05, "loss": 0.2672, "step": 46671 }, { "epoch": 3.780946208684381, "grad_norm": 0.07024858891963959, "learning_rate": 1.2169764615869302e-05, "loss": 0.1984, "step": 46672 }, { "epoch": 3.7810272197018797, "grad_norm": 0.05269385129213333, "learning_rate": 1.2165263963274676e-05, "loss": 0.1987, "step": 46673 }, { "epoch": 3.781108230719378, "grad_norm": 0.07926183193922043, "learning_rate": 1.216076331068005e-05, "loss": 0.225, "step": 46674 }, { "epoch": 3.781189241736876, "grad_norm": 0.08314008265733719, "learning_rate": 1.2156262658085423e-05, "loss": 0.2404, "step": 46675 }, { "epoch": 3.7812702527543745, "grad_norm": 0.08210525661706924, "learning_rate": 1.2151762005490797e-05, "loss": 0.2526, "step": 46676 }, { "epoch": 3.781351263771873, "grad_norm": 0.08011859655380249, "learning_rate": 1.214726135289617e-05, "loss": 0.2305, "step": 46677 }, { "epoch": 3.7814322747893714, "grad_norm": 0.07893484830856323, "learning_rate": 1.2142760700301546e-05, "loss": 0.2348, "step": 46678 }, { "epoch": 3.7815132858068696, "grad_norm": 0.06235817074775696, "learning_rate": 1.2138260047706918e-05, "loss": 0.1975, "step": 46679 }, { "epoch": 3.781594296824368, "grad_norm": 0.0649525374174118, "learning_rate": 1.2133759395112291e-05, "loss": 0.2165, "step": 46680 }, { "epoch": 3.7816753078418666, "grad_norm": 0.06788064539432526, "learning_rate": 1.2129258742517666e-05, "loss": 0.1885, "step": 46681 }, { "epoch": 3.781756318859365, "grad_norm": 0.07292314618825912, "learning_rate": 1.212475808992304e-05, "loss": 0.2394, "step": 46682 }, { "epoch": 3.781837329876863, "grad_norm": 0.06189664825797081, "learning_rate": 1.2120257437328412e-05, "loss": 0.2327, "step": 46683 }, { "epoch": 3.7819183408943617, "grad_norm": 0.054467201232910156, "learning_rate": 1.2115756784733787e-05, "loss": 0.1965, "step": 46684 }, { "epoch": 3.78199935191186, "grad_norm": 0.06842909753322601, "learning_rate": 1.211125613213916e-05, "loss": 0.2055, "step": 46685 }, { "epoch": 3.7820803629293582, "grad_norm": 0.07629824429750443, "learning_rate": 1.2106755479544534e-05, "loss": 0.2288, "step": 46686 }, { "epoch": 3.782161373946857, "grad_norm": 0.06854870170354843, "learning_rate": 1.210225482694991e-05, "loss": 0.2034, "step": 46687 }, { "epoch": 3.782242384964355, "grad_norm": 0.08357995003461838, "learning_rate": 1.2097754174355282e-05, "loss": 0.2294, "step": 46688 }, { "epoch": 3.7823233959818534, "grad_norm": 0.05101915821433067, "learning_rate": 1.2093253521760655e-05, "loss": 0.1895, "step": 46689 }, { "epoch": 3.782404406999352, "grad_norm": 0.0792124941945076, "learning_rate": 1.208875286916603e-05, "loss": 0.216, "step": 46690 }, { "epoch": 3.7824854180168503, "grad_norm": 0.07666429132223129, "learning_rate": 1.2084252216571404e-05, "loss": 0.2374, "step": 46691 }, { "epoch": 3.7825664290343486, "grad_norm": 0.08312845975160599, "learning_rate": 1.2079751563976776e-05, "loss": 0.2384, "step": 46692 }, { "epoch": 3.7826474400518473, "grad_norm": 0.08143067359924316, "learning_rate": 1.2075250911382151e-05, "loss": 0.2452, "step": 46693 }, { "epoch": 3.7827284510693455, "grad_norm": 0.06815093010663986, "learning_rate": 1.2070750258787525e-05, "loss": 0.2508, "step": 46694 }, { "epoch": 3.7828094620868438, "grad_norm": 0.07493395358324051, "learning_rate": 1.2066249606192899e-05, "loss": 0.2458, "step": 46695 }, { "epoch": 3.7828904731043425, "grad_norm": 0.079828180372715, "learning_rate": 1.2061748953598272e-05, "loss": 0.2392, "step": 46696 }, { "epoch": 3.7829714841218407, "grad_norm": 0.0668768361210823, "learning_rate": 1.2057248301003646e-05, "loss": 0.2474, "step": 46697 }, { "epoch": 3.783052495139339, "grad_norm": 0.06913848966360092, "learning_rate": 1.205274764840902e-05, "loss": 0.2297, "step": 46698 }, { "epoch": 3.783133506156837, "grad_norm": 0.07236789911985397, "learning_rate": 1.2048246995814395e-05, "loss": 0.2377, "step": 46699 }, { "epoch": 3.7832145171743354, "grad_norm": 0.06536900997161865, "learning_rate": 1.2043746343219768e-05, "loss": 0.2262, "step": 46700 }, { "epoch": 3.783295528191834, "grad_norm": 0.08164361864328384, "learning_rate": 1.203924569062514e-05, "loss": 0.2633, "step": 46701 }, { "epoch": 3.7833765392093324, "grad_norm": 0.0710197389125824, "learning_rate": 1.2034745038030515e-05, "loss": 0.2106, "step": 46702 }, { "epoch": 3.7834575502268306, "grad_norm": 0.08425655961036682, "learning_rate": 1.2030244385435889e-05, "loss": 0.2288, "step": 46703 }, { "epoch": 3.7835385612443293, "grad_norm": 0.06478645652532578, "learning_rate": 1.2025743732841263e-05, "loss": 0.2321, "step": 46704 }, { "epoch": 3.7836195722618275, "grad_norm": 0.06343191862106323, "learning_rate": 1.2021243080246636e-05, "loss": 0.2063, "step": 46705 }, { "epoch": 3.783700583279326, "grad_norm": 0.08273705095052719, "learning_rate": 1.201674242765201e-05, "loss": 0.2683, "step": 46706 }, { "epoch": 3.7837815942968245, "grad_norm": 0.05265715345740318, "learning_rate": 1.2012241775057383e-05, "loss": 0.2125, "step": 46707 }, { "epoch": 3.7838626053143227, "grad_norm": 0.06474486738443375, "learning_rate": 1.2007741122462759e-05, "loss": 0.1809, "step": 46708 }, { "epoch": 3.783943616331821, "grad_norm": 0.07659123092889786, "learning_rate": 1.200324046986813e-05, "loss": 0.2185, "step": 46709 }, { "epoch": 3.7840246273493197, "grad_norm": 0.0784589946269989, "learning_rate": 1.1998739817273504e-05, "loss": 0.2195, "step": 46710 }, { "epoch": 3.784105638366818, "grad_norm": 0.0916212946176529, "learning_rate": 1.199423916467888e-05, "loss": 0.2416, "step": 46711 }, { "epoch": 3.784186649384316, "grad_norm": 0.0649249330163002, "learning_rate": 1.1989738512084253e-05, "loss": 0.2101, "step": 46712 }, { "epoch": 3.784267660401815, "grad_norm": 0.0661170706152916, "learning_rate": 1.1985237859489627e-05, "loss": 0.2285, "step": 46713 }, { "epoch": 3.784348671419313, "grad_norm": 0.08094004541635513, "learning_rate": 1.1980737206895e-05, "loss": 0.2624, "step": 46714 }, { "epoch": 3.7844296824368113, "grad_norm": 0.06313817203044891, "learning_rate": 1.1976236554300374e-05, "loss": 0.1953, "step": 46715 }, { "epoch": 3.78451069345431, "grad_norm": 0.07197370380163193, "learning_rate": 1.1971735901705748e-05, "loss": 0.2086, "step": 46716 }, { "epoch": 3.7845917044718083, "grad_norm": 0.060122050344944, "learning_rate": 1.1967235249111123e-05, "loss": 0.2372, "step": 46717 }, { "epoch": 3.7846727154893065, "grad_norm": 0.0652763694524765, "learning_rate": 1.1962734596516495e-05, "loss": 0.2288, "step": 46718 }, { "epoch": 3.784753726506805, "grad_norm": 0.07435501366853714, "learning_rate": 1.1958233943921868e-05, "loss": 0.2219, "step": 46719 }, { "epoch": 3.7848347375243034, "grad_norm": 0.06599126011133194, "learning_rate": 1.1953733291327244e-05, "loss": 0.221, "step": 46720 }, { "epoch": 3.7849157485418017, "grad_norm": 0.08771032840013504, "learning_rate": 1.1949232638732617e-05, "loss": 0.2255, "step": 46721 }, { "epoch": 3.7849967595593, "grad_norm": 0.08068722486495972, "learning_rate": 1.194473198613799e-05, "loss": 0.24, "step": 46722 }, { "epoch": 3.785077770576798, "grad_norm": 0.06992881000041962, "learning_rate": 1.1940231333543365e-05, "loss": 0.2148, "step": 46723 }, { "epoch": 3.785158781594297, "grad_norm": 0.06782852113246918, "learning_rate": 1.1935730680948738e-05, "loss": 0.2005, "step": 46724 }, { "epoch": 3.785239792611795, "grad_norm": 0.07253803312778473, "learning_rate": 1.1931230028354112e-05, "loss": 0.2628, "step": 46725 }, { "epoch": 3.7853208036292934, "grad_norm": 0.05372246354818344, "learning_rate": 1.1926729375759485e-05, "loss": 0.1846, "step": 46726 }, { "epoch": 3.785401814646792, "grad_norm": 0.0695682018995285, "learning_rate": 1.1922228723164859e-05, "loss": 0.2172, "step": 46727 }, { "epoch": 3.7854828256642903, "grad_norm": 0.07446936517953873, "learning_rate": 1.1917728070570233e-05, "loss": 0.2159, "step": 46728 }, { "epoch": 3.7855638366817885, "grad_norm": 0.07652425020933151, "learning_rate": 1.1913227417975608e-05, "loss": 0.2145, "step": 46729 }, { "epoch": 3.785644847699287, "grad_norm": 0.07526133209466934, "learning_rate": 1.1908726765380981e-05, "loss": 0.2344, "step": 46730 }, { "epoch": 3.7857258587167855, "grad_norm": 0.06465907394886017, "learning_rate": 1.1904226112786353e-05, "loss": 0.2097, "step": 46731 }, { "epoch": 3.7858068697342837, "grad_norm": 0.055496908724308014, "learning_rate": 1.1899725460191729e-05, "loss": 0.204, "step": 46732 }, { "epoch": 3.7858878807517824, "grad_norm": 0.07140771299600601, "learning_rate": 1.1895224807597102e-05, "loss": 0.1999, "step": 46733 }, { "epoch": 3.7859688917692806, "grad_norm": 0.07559964805841446, "learning_rate": 1.1890724155002476e-05, "loss": 0.2583, "step": 46734 }, { "epoch": 3.786049902786779, "grad_norm": 0.0660320520401001, "learning_rate": 1.188622350240785e-05, "loss": 0.231, "step": 46735 }, { "epoch": 3.7861309138042776, "grad_norm": 0.06079673767089844, "learning_rate": 1.1881722849813223e-05, "loss": 0.2112, "step": 46736 }, { "epoch": 3.786211924821776, "grad_norm": 0.07198115438222885, "learning_rate": 1.1877222197218597e-05, "loss": 0.1936, "step": 46737 }, { "epoch": 3.786292935839274, "grad_norm": 0.07653490453958511, "learning_rate": 1.1872721544623972e-05, "loss": 0.2059, "step": 46738 }, { "epoch": 3.7863739468567728, "grad_norm": 0.07065163552761078, "learning_rate": 1.1868220892029344e-05, "loss": 0.197, "step": 46739 }, { "epoch": 3.786454957874271, "grad_norm": 0.08107943832874298, "learning_rate": 1.1863720239434717e-05, "loss": 0.2431, "step": 46740 }, { "epoch": 3.7865359688917692, "grad_norm": 0.056850139051675797, "learning_rate": 1.1859219586840093e-05, "loss": 0.2156, "step": 46741 }, { "epoch": 3.786616979909268, "grad_norm": 0.10843037068843842, "learning_rate": 1.1854718934245466e-05, "loss": 0.2501, "step": 46742 }, { "epoch": 3.786697990926766, "grad_norm": 0.0759415253996849, "learning_rate": 1.185021828165084e-05, "loss": 0.2182, "step": 46743 }, { "epoch": 3.7867790019442644, "grad_norm": 0.07525965571403503, "learning_rate": 1.1845717629056214e-05, "loss": 0.237, "step": 46744 }, { "epoch": 3.7868600129617627, "grad_norm": 0.07234998047351837, "learning_rate": 1.1841216976461587e-05, "loss": 0.2268, "step": 46745 }, { "epoch": 3.786941023979261, "grad_norm": 0.05831919237971306, "learning_rate": 1.183671632386696e-05, "loss": 0.2266, "step": 46746 }, { "epoch": 3.7870220349967596, "grad_norm": 0.07513531297445297, "learning_rate": 1.1832215671272336e-05, "loss": 0.2511, "step": 46747 }, { "epoch": 3.787103046014258, "grad_norm": 0.06514651328325272, "learning_rate": 1.1827715018677708e-05, "loss": 0.1951, "step": 46748 }, { "epoch": 3.787184057031756, "grad_norm": 0.07075642794370651, "learning_rate": 1.1823214366083082e-05, "loss": 0.2222, "step": 46749 }, { "epoch": 3.787265068049255, "grad_norm": 0.09682905673980713, "learning_rate": 1.1818713713488457e-05, "loss": 0.2183, "step": 46750 }, { "epoch": 3.787346079066753, "grad_norm": 0.07281651347875595, "learning_rate": 1.181421306089383e-05, "loss": 0.2481, "step": 46751 }, { "epoch": 3.7874270900842513, "grad_norm": 0.055947430431842804, "learning_rate": 1.1809712408299204e-05, "loss": 0.2086, "step": 46752 }, { "epoch": 3.78750810110175, "grad_norm": 0.07676409929990768, "learning_rate": 1.1805211755704578e-05, "loss": 0.2487, "step": 46753 }, { "epoch": 3.787589112119248, "grad_norm": 0.07692111283540726, "learning_rate": 1.1800711103109951e-05, "loss": 0.2786, "step": 46754 }, { "epoch": 3.7876701231367464, "grad_norm": 0.06674608588218689, "learning_rate": 1.1796210450515325e-05, "loss": 0.2249, "step": 46755 }, { "epoch": 3.787751134154245, "grad_norm": 0.06333047896623611, "learning_rate": 1.17917097979207e-05, "loss": 0.2187, "step": 46756 }, { "epoch": 3.7878321451717434, "grad_norm": 0.07365437597036362, "learning_rate": 1.1787209145326072e-05, "loss": 0.2089, "step": 46757 }, { "epoch": 3.7879131561892416, "grad_norm": 0.09069455415010452, "learning_rate": 1.1782708492731446e-05, "loss": 0.2148, "step": 46758 }, { "epoch": 3.7879941672067403, "grad_norm": 0.07776268571615219, "learning_rate": 1.1778207840136821e-05, "loss": 0.2311, "step": 46759 }, { "epoch": 3.7880751782242386, "grad_norm": 0.08250845968723297, "learning_rate": 1.1773707187542195e-05, "loss": 0.2531, "step": 46760 }, { "epoch": 3.788156189241737, "grad_norm": 0.08193209767341614, "learning_rate": 1.1769206534947568e-05, "loss": 0.2188, "step": 46761 }, { "epoch": 3.7882372002592355, "grad_norm": 0.07312662899494171, "learning_rate": 1.1764705882352942e-05, "loss": 0.2247, "step": 46762 }, { "epoch": 3.7883182112767337, "grad_norm": 0.07228390127420425, "learning_rate": 1.1760205229758315e-05, "loss": 0.2473, "step": 46763 }, { "epoch": 3.788399222294232, "grad_norm": 0.0762069895863533, "learning_rate": 1.1755704577163689e-05, "loss": 0.206, "step": 46764 }, { "epoch": 3.7884802333117307, "grad_norm": 0.06151983514428139, "learning_rate": 1.1751203924569063e-05, "loss": 0.1886, "step": 46765 }, { "epoch": 3.788561244329229, "grad_norm": 0.07053109258413315, "learning_rate": 1.1746703271974436e-05, "loss": 0.227, "step": 46766 }, { "epoch": 3.788642255346727, "grad_norm": 0.06008979305624962, "learning_rate": 1.174220261937981e-05, "loss": 0.1776, "step": 46767 }, { "epoch": 3.7887232663642254, "grad_norm": 0.06683298200368881, "learning_rate": 1.1737701966785185e-05, "loss": 0.2484, "step": 46768 }, { "epoch": 3.7888042773817237, "grad_norm": 0.07074110209941864, "learning_rate": 1.1733201314190557e-05, "loss": 0.1979, "step": 46769 }, { "epoch": 3.7888852883992223, "grad_norm": 0.07903765887022018, "learning_rate": 1.1728700661595932e-05, "loss": 0.2239, "step": 46770 }, { "epoch": 3.7889662994167206, "grad_norm": 0.08197832852602005, "learning_rate": 1.1724200009001306e-05, "loss": 0.2449, "step": 46771 }, { "epoch": 3.789047310434219, "grad_norm": 0.07146139442920685, "learning_rate": 1.171969935640668e-05, "loss": 0.2572, "step": 46772 }, { "epoch": 3.7891283214517175, "grad_norm": 0.061843667179346085, "learning_rate": 1.1715198703812053e-05, "loss": 0.2238, "step": 46773 }, { "epoch": 3.7892093324692158, "grad_norm": 0.07453931123018265, "learning_rate": 1.1710698051217427e-05, "loss": 0.1794, "step": 46774 }, { "epoch": 3.789290343486714, "grad_norm": 0.06958021223545074, "learning_rate": 1.17061973986228e-05, "loss": 0.2162, "step": 46775 }, { "epoch": 3.7893713545042127, "grad_norm": 0.09496792405843735, "learning_rate": 1.1701696746028176e-05, "loss": 0.2395, "step": 46776 }, { "epoch": 3.789452365521711, "grad_norm": 0.06393705308437347, "learning_rate": 1.169719609343355e-05, "loss": 0.2149, "step": 46777 }, { "epoch": 3.789533376539209, "grad_norm": 0.07385969907045364, "learning_rate": 1.1692695440838921e-05, "loss": 0.2408, "step": 46778 }, { "epoch": 3.789614387556708, "grad_norm": 0.06838518381118774, "learning_rate": 1.1688194788244296e-05, "loss": 0.2357, "step": 46779 }, { "epoch": 3.789695398574206, "grad_norm": 0.07647363841533661, "learning_rate": 1.168369413564967e-05, "loss": 0.2594, "step": 46780 }, { "epoch": 3.7897764095917044, "grad_norm": 0.0688256323337555, "learning_rate": 1.1679193483055044e-05, "loss": 0.2013, "step": 46781 }, { "epoch": 3.789857420609203, "grad_norm": 0.07052019238471985, "learning_rate": 1.1674692830460417e-05, "loss": 0.2156, "step": 46782 }, { "epoch": 3.7899384316267013, "grad_norm": 0.07364077866077423, "learning_rate": 1.1670192177865791e-05, "loss": 0.2067, "step": 46783 }, { "epoch": 3.7900194426441995, "grad_norm": 0.0811486765742302, "learning_rate": 1.1665691525271165e-05, "loss": 0.2418, "step": 46784 }, { "epoch": 3.7901004536616982, "grad_norm": 0.08562948554754257, "learning_rate": 1.166119087267654e-05, "loss": 0.2343, "step": 46785 }, { "epoch": 3.7901814646791965, "grad_norm": 0.08794140070676804, "learning_rate": 1.1656690220081913e-05, "loss": 0.2201, "step": 46786 }, { "epoch": 3.7902624756966947, "grad_norm": 0.07163339853286743, "learning_rate": 1.1652189567487285e-05, "loss": 0.2319, "step": 46787 }, { "epoch": 3.790343486714193, "grad_norm": 0.06453966349363327, "learning_rate": 1.164768891489266e-05, "loss": 0.2019, "step": 46788 }, { "epoch": 3.7904244977316917, "grad_norm": 0.0652361512184143, "learning_rate": 1.1643188262298034e-05, "loss": 0.2053, "step": 46789 }, { "epoch": 3.79050550874919, "grad_norm": 0.06884594261646271, "learning_rate": 1.1638687609703408e-05, "loss": 0.2362, "step": 46790 }, { "epoch": 3.790586519766688, "grad_norm": 0.07537364959716797, "learning_rate": 1.1634186957108781e-05, "loss": 0.2326, "step": 46791 }, { "epoch": 3.7906675307841864, "grad_norm": 0.06967596709728241, "learning_rate": 1.1629686304514155e-05, "loss": 0.1997, "step": 46792 }, { "epoch": 3.790748541801685, "grad_norm": 0.08205866068601608, "learning_rate": 1.1625185651919529e-05, "loss": 0.2289, "step": 46793 }, { "epoch": 3.7908295528191833, "grad_norm": 0.08043761551380157, "learning_rate": 1.1620684999324904e-05, "loss": 0.2037, "step": 46794 }, { "epoch": 3.7909105638366816, "grad_norm": 0.07155504822731018, "learning_rate": 1.1616184346730276e-05, "loss": 0.2216, "step": 46795 }, { "epoch": 3.7909915748541803, "grad_norm": 0.07402777671813965, "learning_rate": 1.161168369413565e-05, "loss": 0.237, "step": 46796 }, { "epoch": 3.7910725858716785, "grad_norm": 0.07951734960079193, "learning_rate": 1.1607183041541025e-05, "loss": 0.2707, "step": 46797 }, { "epoch": 3.7911535968891767, "grad_norm": 0.07128282636404037, "learning_rate": 1.1602682388946398e-05, "loss": 0.2531, "step": 46798 }, { "epoch": 3.7912346079066754, "grad_norm": 0.07474765181541443, "learning_rate": 1.1598181736351772e-05, "loss": 0.2073, "step": 46799 }, { "epoch": 3.7913156189241737, "grad_norm": 0.06123476102948189, "learning_rate": 1.1593681083757146e-05, "loss": 0.2372, "step": 46800 }, { "epoch": 3.791396629941672, "grad_norm": 0.08566850423812866, "learning_rate": 1.1589180431162519e-05, "loss": 0.2129, "step": 46801 }, { "epoch": 3.7914776409591706, "grad_norm": 0.0908622220158577, "learning_rate": 1.1584679778567893e-05, "loss": 0.2277, "step": 46802 }, { "epoch": 3.791558651976669, "grad_norm": 0.05587979778647423, "learning_rate": 1.1580179125973268e-05, "loss": 0.2102, "step": 46803 }, { "epoch": 3.791639662994167, "grad_norm": 0.08635761588811874, "learning_rate": 1.157567847337864e-05, "loss": 0.208, "step": 46804 }, { "epoch": 3.791720674011666, "grad_norm": 0.07278425991535187, "learning_rate": 1.1571177820784014e-05, "loss": 0.2413, "step": 46805 }, { "epoch": 3.791801685029164, "grad_norm": 0.08281299471855164, "learning_rate": 1.1566677168189389e-05, "loss": 0.2106, "step": 46806 }, { "epoch": 3.7918826960466623, "grad_norm": 0.07331330329179764, "learning_rate": 1.1562176515594762e-05, "loss": 0.2529, "step": 46807 }, { "epoch": 3.791963707064161, "grad_norm": 0.07193376123905182, "learning_rate": 1.1557675863000134e-05, "loss": 0.2057, "step": 46808 }, { "epoch": 3.792044718081659, "grad_norm": 0.0806722342967987, "learning_rate": 1.155317521040551e-05, "loss": 0.2446, "step": 46809 }, { "epoch": 3.7921257290991575, "grad_norm": 0.0638408288359642, "learning_rate": 1.1548674557810883e-05, "loss": 0.24, "step": 46810 }, { "epoch": 3.7922067401166557, "grad_norm": 0.07036413997411728, "learning_rate": 1.1544173905216257e-05, "loss": 0.2169, "step": 46811 }, { "epoch": 3.7922877511341544, "grad_norm": 0.06902635842561722, "learning_rate": 1.153967325262163e-05, "loss": 0.2231, "step": 46812 }, { "epoch": 3.7923687621516526, "grad_norm": 0.06593969464302063, "learning_rate": 1.1535172600027004e-05, "loss": 0.2089, "step": 46813 }, { "epoch": 3.792449773169151, "grad_norm": 0.07559620589017868, "learning_rate": 1.1530671947432378e-05, "loss": 0.2276, "step": 46814 }, { "epoch": 3.792530784186649, "grad_norm": 0.06421814113855362, "learning_rate": 1.1526171294837753e-05, "loss": 0.2729, "step": 46815 }, { "epoch": 3.792611795204148, "grad_norm": 0.06696971505880356, "learning_rate": 1.1521670642243127e-05, "loss": 0.2587, "step": 46816 }, { "epoch": 3.792692806221646, "grad_norm": 0.08010052144527435, "learning_rate": 1.1517169989648498e-05, "loss": 0.1825, "step": 46817 }, { "epoch": 3.7927738172391443, "grad_norm": 0.08656228333711624, "learning_rate": 1.1512669337053874e-05, "loss": 0.2341, "step": 46818 }, { "epoch": 3.792854828256643, "grad_norm": 0.07407809793949127, "learning_rate": 1.1508168684459247e-05, "loss": 0.2599, "step": 46819 }, { "epoch": 3.7929358392741412, "grad_norm": 0.07218343764543533, "learning_rate": 1.1503668031864621e-05, "loss": 0.2203, "step": 46820 }, { "epoch": 3.7930168502916395, "grad_norm": 0.07398039847612381, "learning_rate": 1.1499167379269995e-05, "loss": 0.2177, "step": 46821 }, { "epoch": 3.793097861309138, "grad_norm": 0.06317486613988876, "learning_rate": 1.1494666726675368e-05, "loss": 0.2141, "step": 46822 }, { "epoch": 3.7931788723266364, "grad_norm": 0.07027360796928406, "learning_rate": 1.1490166074080742e-05, "loss": 0.2265, "step": 46823 }, { "epoch": 3.7932598833441347, "grad_norm": 0.07729087769985199, "learning_rate": 1.1485665421486117e-05, "loss": 0.2256, "step": 46824 }, { "epoch": 3.7933408943616334, "grad_norm": 0.07687903195619583, "learning_rate": 1.1481164768891489e-05, "loss": 0.2307, "step": 46825 }, { "epoch": 3.7934219053791316, "grad_norm": 0.07208074629306793, "learning_rate": 1.1476664116296863e-05, "loss": 0.2155, "step": 46826 }, { "epoch": 3.79350291639663, "grad_norm": 0.07030342519283295, "learning_rate": 1.1472163463702238e-05, "loss": 0.2055, "step": 46827 }, { "epoch": 3.7935839274141285, "grad_norm": 0.06092243641614914, "learning_rate": 1.1467662811107612e-05, "loss": 0.2253, "step": 46828 }, { "epoch": 3.7936649384316268, "grad_norm": 0.08080343902111053, "learning_rate": 1.1463162158512985e-05, "loss": 0.2315, "step": 46829 }, { "epoch": 3.793745949449125, "grad_norm": 0.06889671087265015, "learning_rate": 1.1458661505918359e-05, "loss": 0.2071, "step": 46830 }, { "epoch": 3.7938269604666237, "grad_norm": 0.061423543840646744, "learning_rate": 1.1454160853323732e-05, "loss": 0.1851, "step": 46831 }, { "epoch": 3.793907971484122, "grad_norm": 0.06487643718719482, "learning_rate": 1.1449660200729106e-05, "loss": 0.225, "step": 46832 }, { "epoch": 3.79398898250162, "grad_norm": 0.0830056294798851, "learning_rate": 1.1445159548134481e-05, "loss": 0.217, "step": 46833 }, { "epoch": 3.7940699935191184, "grad_norm": 0.06530779600143433, "learning_rate": 1.1440658895539853e-05, "loss": 0.2175, "step": 46834 }, { "epoch": 3.794151004536617, "grad_norm": 0.07887805998325348, "learning_rate": 1.1436158242945227e-05, "loss": 0.2445, "step": 46835 }, { "epoch": 3.7942320155541154, "grad_norm": 0.06664882600307465, "learning_rate": 1.1431657590350602e-05, "loss": 0.2433, "step": 46836 }, { "epoch": 3.7943130265716136, "grad_norm": 0.06435833871364594, "learning_rate": 1.1427156937755976e-05, "loss": 0.2001, "step": 46837 }, { "epoch": 3.794394037589112, "grad_norm": 0.062327224761247635, "learning_rate": 1.1422656285161348e-05, "loss": 0.2213, "step": 46838 }, { "epoch": 3.7944750486066106, "grad_norm": 0.07332039624452591, "learning_rate": 1.1418155632566723e-05, "loss": 0.1763, "step": 46839 }, { "epoch": 3.794556059624109, "grad_norm": 0.09450053423643112, "learning_rate": 1.1413654979972096e-05, "loss": 0.2553, "step": 46840 }, { "epoch": 3.794637070641607, "grad_norm": 0.06693083792924881, "learning_rate": 1.140915432737747e-05, "loss": 0.1962, "step": 46841 }, { "epoch": 3.7947180816591057, "grad_norm": 0.07547756284475327, "learning_rate": 1.1404653674782845e-05, "loss": 0.217, "step": 46842 }, { "epoch": 3.794799092676604, "grad_norm": 0.06735184788703918, "learning_rate": 1.1400153022188217e-05, "loss": 0.2216, "step": 46843 }, { "epoch": 3.7948801036941022, "grad_norm": 0.06307090818881989, "learning_rate": 1.1395652369593591e-05, "loss": 0.2395, "step": 46844 }, { "epoch": 3.794961114711601, "grad_norm": 0.06501543521881104, "learning_rate": 1.1391151716998966e-05, "loss": 0.2125, "step": 46845 }, { "epoch": 3.795042125729099, "grad_norm": 0.09293830394744873, "learning_rate": 1.138665106440434e-05, "loss": 0.2492, "step": 46846 }, { "epoch": 3.7951231367465974, "grad_norm": 0.06201693415641785, "learning_rate": 1.1382150411809712e-05, "loss": 0.2137, "step": 46847 }, { "epoch": 3.795204147764096, "grad_norm": 0.08595079183578491, "learning_rate": 1.1377649759215087e-05, "loss": 0.2071, "step": 46848 }, { "epoch": 3.7952851587815943, "grad_norm": 0.07711045444011688, "learning_rate": 1.137314910662046e-05, "loss": 0.2485, "step": 46849 }, { "epoch": 3.7953661697990926, "grad_norm": 0.08922844380140305, "learning_rate": 1.1368648454025834e-05, "loss": 0.23, "step": 46850 }, { "epoch": 3.7954471808165913, "grad_norm": 0.07273554801940918, "learning_rate": 1.1364147801431208e-05, "loss": 0.2015, "step": 46851 }, { "epoch": 3.7955281918340895, "grad_norm": 0.06916754692792892, "learning_rate": 1.1359647148836581e-05, "loss": 0.2357, "step": 46852 }, { "epoch": 3.7956092028515878, "grad_norm": 0.08604365587234497, "learning_rate": 1.1355146496241955e-05, "loss": 0.2403, "step": 46853 }, { "epoch": 3.7956902138690864, "grad_norm": 0.07010132074356079, "learning_rate": 1.135064584364733e-05, "loss": 0.2159, "step": 46854 }, { "epoch": 3.7957712248865847, "grad_norm": 0.08245091885328293, "learning_rate": 1.1346145191052702e-05, "loss": 0.2221, "step": 46855 }, { "epoch": 3.795852235904083, "grad_norm": 0.07700005918741226, "learning_rate": 1.1341644538458076e-05, "loss": 0.2152, "step": 46856 }, { "epoch": 3.795933246921581, "grad_norm": 0.08508516103029251, "learning_rate": 1.1337143885863451e-05, "loss": 0.2322, "step": 46857 }, { "epoch": 3.79601425793908, "grad_norm": 0.08205549418926239, "learning_rate": 1.1332643233268825e-05, "loss": 0.1954, "step": 46858 }, { "epoch": 3.796095268956578, "grad_norm": 0.06523613631725311, "learning_rate": 1.1328142580674198e-05, "loss": 0.2025, "step": 46859 }, { "epoch": 3.7961762799740764, "grad_norm": 0.08023390918970108, "learning_rate": 1.1323641928079572e-05, "loss": 0.2702, "step": 46860 }, { "epoch": 3.7962572909915746, "grad_norm": 0.08578156679868698, "learning_rate": 1.1319141275484946e-05, "loss": 0.2349, "step": 46861 }, { "epoch": 3.7963383020090733, "grad_norm": 0.06631217896938324, "learning_rate": 1.1314640622890319e-05, "loss": 0.2388, "step": 46862 }, { "epoch": 3.7964193130265715, "grad_norm": 0.07069762051105499, "learning_rate": 1.1310139970295694e-05, "loss": 0.2054, "step": 46863 }, { "epoch": 3.79650032404407, "grad_norm": 0.07219357043504715, "learning_rate": 1.1305639317701066e-05, "loss": 0.2596, "step": 46864 }, { "epoch": 3.7965813350615685, "grad_norm": 0.05612671747803688, "learning_rate": 1.130113866510644e-05, "loss": 0.2227, "step": 46865 }, { "epoch": 3.7966623460790667, "grad_norm": 0.08102644979953766, "learning_rate": 1.1296638012511815e-05, "loss": 0.2556, "step": 46866 }, { "epoch": 3.796743357096565, "grad_norm": 0.05597464740276337, "learning_rate": 1.1292137359917189e-05, "loss": 0.2073, "step": 46867 }, { "epoch": 3.7968243681140637, "grad_norm": 0.060989245772361755, "learning_rate": 1.1287636707322562e-05, "loss": 0.2173, "step": 46868 }, { "epoch": 3.796905379131562, "grad_norm": 0.07221823185682297, "learning_rate": 1.1283136054727936e-05, "loss": 0.2246, "step": 46869 }, { "epoch": 3.79698639014906, "grad_norm": 0.07029426097869873, "learning_rate": 1.127863540213331e-05, "loss": 0.2219, "step": 46870 }, { "epoch": 3.797067401166559, "grad_norm": 0.06306787580251694, "learning_rate": 1.1274134749538683e-05, "loss": 0.1918, "step": 46871 }, { "epoch": 3.797148412184057, "grad_norm": 0.0684630498290062, "learning_rate": 1.1269634096944059e-05, "loss": 0.2177, "step": 46872 }, { "epoch": 3.7972294232015553, "grad_norm": 0.06849277019500732, "learning_rate": 1.126513344434943e-05, "loss": 0.2131, "step": 46873 }, { "epoch": 3.797310434219054, "grad_norm": 0.08130700141191483, "learning_rate": 1.1260632791754806e-05, "loss": 0.2531, "step": 46874 }, { "epoch": 3.7973914452365523, "grad_norm": 0.0685175359249115, "learning_rate": 1.125613213916018e-05, "loss": 0.2292, "step": 46875 }, { "epoch": 3.7974724562540505, "grad_norm": 0.054702240973711014, "learning_rate": 1.1251631486565553e-05, "loss": 0.1895, "step": 46876 }, { "epoch": 3.797553467271549, "grad_norm": 0.07435936480760574, "learning_rate": 1.1247130833970927e-05, "loss": 0.2479, "step": 46877 }, { "epoch": 3.7976344782890474, "grad_norm": 0.062439724802970886, "learning_rate": 1.12426301813763e-05, "loss": 0.2002, "step": 46878 }, { "epoch": 3.7977154893065457, "grad_norm": 0.07218343019485474, "learning_rate": 1.1238129528781674e-05, "loss": 0.1966, "step": 46879 }, { "epoch": 3.797796500324044, "grad_norm": 0.055278975516557693, "learning_rate": 1.1233628876187047e-05, "loss": 0.2051, "step": 46880 }, { "epoch": 3.7978775113415426, "grad_norm": 0.07765598595142365, "learning_rate": 1.1229128223592421e-05, "loss": 0.2112, "step": 46881 }, { "epoch": 3.797958522359041, "grad_norm": 0.07940090447664261, "learning_rate": 1.1224627570997795e-05, "loss": 0.2491, "step": 46882 }, { "epoch": 3.798039533376539, "grad_norm": 0.06536324322223663, "learning_rate": 1.122012691840317e-05, "loss": 0.2599, "step": 46883 }, { "epoch": 3.7981205443940373, "grad_norm": 0.06517624109983444, "learning_rate": 1.1215626265808543e-05, "loss": 0.1863, "step": 46884 }, { "epoch": 3.798201555411536, "grad_norm": 0.06190231814980507, "learning_rate": 1.1211125613213917e-05, "loss": 0.2044, "step": 46885 }, { "epoch": 3.7982825664290343, "grad_norm": 0.0682215765118599, "learning_rate": 1.120662496061929e-05, "loss": 0.2717, "step": 46886 }, { "epoch": 3.7983635774465325, "grad_norm": 0.08895383030176163, "learning_rate": 1.1202124308024664e-05, "loss": 0.2608, "step": 46887 }, { "epoch": 3.798444588464031, "grad_norm": 0.07362218201160431, "learning_rate": 1.1197623655430038e-05, "loss": 0.224, "step": 46888 }, { "epoch": 3.7985255994815295, "grad_norm": 0.06116807460784912, "learning_rate": 1.1193123002835411e-05, "loss": 0.2436, "step": 46889 }, { "epoch": 3.7986066104990277, "grad_norm": 0.06734014302492142, "learning_rate": 1.1188622350240785e-05, "loss": 0.2235, "step": 46890 }, { "epoch": 3.7986876215165264, "grad_norm": 0.06932863593101501, "learning_rate": 1.1184121697646159e-05, "loss": 0.2126, "step": 46891 }, { "epoch": 3.7987686325340246, "grad_norm": 0.06863842904567719, "learning_rate": 1.1179621045051534e-05, "loss": 0.2258, "step": 46892 }, { "epoch": 3.798849643551523, "grad_norm": 0.06340306252241135, "learning_rate": 1.1175120392456908e-05, "loss": 0.2538, "step": 46893 }, { "epoch": 3.7989306545690216, "grad_norm": 0.0711321234703064, "learning_rate": 1.117061973986228e-05, "loss": 0.2343, "step": 46894 }, { "epoch": 3.79901166558652, "grad_norm": 0.08093009144067764, "learning_rate": 1.1166119087267655e-05, "loss": 0.2195, "step": 46895 }, { "epoch": 3.799092676604018, "grad_norm": 0.08460341393947601, "learning_rate": 1.1161618434673028e-05, "loss": 0.2329, "step": 46896 }, { "epoch": 3.7991736876215167, "grad_norm": 0.06339055299758911, "learning_rate": 1.1157117782078402e-05, "loss": 0.2479, "step": 46897 }, { "epoch": 3.799254698639015, "grad_norm": 0.06539372354745865, "learning_rate": 1.1152617129483776e-05, "loss": 0.2171, "step": 46898 }, { "epoch": 3.7993357096565132, "grad_norm": 0.0670798122882843, "learning_rate": 1.114811647688915e-05, "loss": 0.2258, "step": 46899 }, { "epoch": 3.799416720674012, "grad_norm": 0.07918045669794083, "learning_rate": 1.1143615824294523e-05, "loss": 0.2393, "step": 46900 }, { "epoch": 3.79949773169151, "grad_norm": 0.08320426940917969, "learning_rate": 1.1139115171699898e-05, "loss": 0.211, "step": 46901 }, { "epoch": 3.7995787427090084, "grad_norm": 0.07436665892601013, "learning_rate": 1.1134614519105272e-05, "loss": 0.2237, "step": 46902 }, { "epoch": 3.7996597537265067, "grad_norm": 0.0866570994257927, "learning_rate": 1.1130113866510644e-05, "loss": 0.2256, "step": 46903 }, { "epoch": 3.7997407647440054, "grad_norm": 0.07788897305727005, "learning_rate": 1.1125613213916019e-05, "loss": 0.2382, "step": 46904 }, { "epoch": 3.7998217757615036, "grad_norm": 0.06670193374156952, "learning_rate": 1.1121112561321393e-05, "loss": 0.2062, "step": 46905 }, { "epoch": 3.799902786779002, "grad_norm": 0.0638703927397728, "learning_rate": 1.1116611908726766e-05, "loss": 0.2127, "step": 46906 }, { "epoch": 3.7999837977965, "grad_norm": 0.06693035364151001, "learning_rate": 1.111211125613214e-05, "loss": 0.2352, "step": 46907 }, { "epoch": 3.8000648088139988, "grad_norm": 0.08188080787658691, "learning_rate": 1.1107610603537513e-05, "loss": 0.2332, "step": 46908 }, { "epoch": 3.800145819831497, "grad_norm": 0.06827425211668015, "learning_rate": 1.1103109950942887e-05, "loss": 0.2249, "step": 46909 }, { "epoch": 3.8002268308489953, "grad_norm": 0.06886734068393707, "learning_rate": 1.1098609298348262e-05, "loss": 0.2149, "step": 46910 }, { "epoch": 3.800307841866494, "grad_norm": 0.08088071644306183, "learning_rate": 1.1094108645753634e-05, "loss": 0.2399, "step": 46911 }, { "epoch": 3.800388852883992, "grad_norm": 0.08014461398124695, "learning_rate": 1.1089607993159008e-05, "loss": 0.2109, "step": 46912 }, { "epoch": 3.8004698639014904, "grad_norm": 0.08599234372377396, "learning_rate": 1.1085107340564383e-05, "loss": 0.2075, "step": 46913 }, { "epoch": 3.800550874918989, "grad_norm": 0.07755023241043091, "learning_rate": 1.1080606687969757e-05, "loss": 0.2339, "step": 46914 }, { "epoch": 3.8006318859364874, "grad_norm": 0.06985119730234146, "learning_rate": 1.107610603537513e-05, "loss": 0.2058, "step": 46915 }, { "epoch": 3.8007128969539856, "grad_norm": 0.0760570541024208, "learning_rate": 1.1071605382780504e-05, "loss": 0.1955, "step": 46916 }, { "epoch": 3.8007939079714843, "grad_norm": 0.06698282063007355, "learning_rate": 1.1067104730185877e-05, "loss": 0.1952, "step": 46917 }, { "epoch": 3.8008749189889826, "grad_norm": 0.07477567344903946, "learning_rate": 1.1062604077591251e-05, "loss": 0.214, "step": 46918 }, { "epoch": 3.800955930006481, "grad_norm": 0.07477416098117828, "learning_rate": 1.1058103424996626e-05, "loss": 0.2861, "step": 46919 }, { "epoch": 3.8010369410239795, "grad_norm": 0.07379210740327835, "learning_rate": 1.1053602772401998e-05, "loss": 0.2168, "step": 46920 }, { "epoch": 3.8011179520414777, "grad_norm": 0.07364281266927719, "learning_rate": 1.1049102119807372e-05, "loss": 0.2101, "step": 46921 }, { "epoch": 3.801198963058976, "grad_norm": 0.06869223713874817, "learning_rate": 1.1044601467212747e-05, "loss": 0.1867, "step": 46922 }, { "epoch": 3.8012799740764747, "grad_norm": 0.072836734354496, "learning_rate": 1.104010081461812e-05, "loss": 0.2036, "step": 46923 }, { "epoch": 3.801360985093973, "grad_norm": 0.060940057039260864, "learning_rate": 1.1035600162023493e-05, "loss": 0.196, "step": 46924 }, { "epoch": 3.801441996111471, "grad_norm": 0.07574928551912308, "learning_rate": 1.1031099509428868e-05, "loss": 0.2082, "step": 46925 }, { "epoch": 3.8015230071289694, "grad_norm": 0.08537424355745316, "learning_rate": 1.1026598856834242e-05, "loss": 0.233, "step": 46926 }, { "epoch": 3.8016040181464676, "grad_norm": 0.06580768525600433, "learning_rate": 1.1022098204239615e-05, "loss": 0.197, "step": 46927 }, { "epoch": 3.8016850291639663, "grad_norm": 0.06638256460428238, "learning_rate": 1.1017597551644989e-05, "loss": 0.2215, "step": 46928 }, { "epoch": 3.8017660401814646, "grad_norm": 0.09222010523080826, "learning_rate": 1.1013096899050362e-05, "loss": 0.2341, "step": 46929 }, { "epoch": 3.801847051198963, "grad_norm": 0.07473092526197433, "learning_rate": 1.1008596246455736e-05, "loss": 0.2388, "step": 46930 }, { "epoch": 3.8019280622164615, "grad_norm": 0.07252812385559082, "learning_rate": 1.1004095593861111e-05, "loss": 0.2267, "step": 46931 }, { "epoch": 3.8020090732339598, "grad_norm": 0.077595055103302, "learning_rate": 1.0999594941266485e-05, "loss": 0.2154, "step": 46932 }, { "epoch": 3.802090084251458, "grad_norm": 0.06827554106712341, "learning_rate": 1.0995094288671857e-05, "loss": 0.2222, "step": 46933 }, { "epoch": 3.8021710952689567, "grad_norm": 0.07482986897230148, "learning_rate": 1.0990593636077232e-05, "loss": 0.1913, "step": 46934 }, { "epoch": 3.802252106286455, "grad_norm": 0.07571776211261749, "learning_rate": 1.0986092983482606e-05, "loss": 0.2262, "step": 46935 }, { "epoch": 3.802333117303953, "grad_norm": 0.08740594238042831, "learning_rate": 1.098159233088798e-05, "loss": 0.2704, "step": 46936 }, { "epoch": 3.802414128321452, "grad_norm": 0.08148932456970215, "learning_rate": 1.0977091678293353e-05, "loss": 0.1973, "step": 46937 }, { "epoch": 3.80249513933895, "grad_norm": 0.07424408197402954, "learning_rate": 1.0972591025698727e-05, "loss": 0.1896, "step": 46938 }, { "epoch": 3.8025761503564484, "grad_norm": 0.06565883755683899, "learning_rate": 1.09680903731041e-05, "loss": 0.193, "step": 46939 }, { "epoch": 3.802657161373947, "grad_norm": 0.0705159530043602, "learning_rate": 1.0963589720509475e-05, "loss": 0.218, "step": 46940 }, { "epoch": 3.8027381723914453, "grad_norm": 0.07602539658546448, "learning_rate": 1.0959089067914847e-05, "loss": 0.2325, "step": 46941 }, { "epoch": 3.8028191834089435, "grad_norm": 0.08233897387981415, "learning_rate": 1.0954588415320221e-05, "loss": 0.2499, "step": 46942 }, { "epoch": 3.8029001944264422, "grad_norm": 0.07934436202049255, "learning_rate": 1.0950087762725596e-05, "loss": 0.2348, "step": 46943 }, { "epoch": 3.8029812054439405, "grad_norm": 0.06771920621395111, "learning_rate": 1.094558711013097e-05, "loss": 0.2272, "step": 46944 }, { "epoch": 3.8030622164614387, "grad_norm": 0.0798344835639, "learning_rate": 1.0941086457536343e-05, "loss": 0.2718, "step": 46945 }, { "epoch": 3.8031432274789374, "grad_norm": 0.062396757304668427, "learning_rate": 1.0936585804941717e-05, "loss": 0.2419, "step": 46946 }, { "epoch": 3.8032242384964356, "grad_norm": 0.0885952040553093, "learning_rate": 1.093208515234709e-05, "loss": 0.2712, "step": 46947 }, { "epoch": 3.803305249513934, "grad_norm": 0.06853952258825302, "learning_rate": 1.0927584499752464e-05, "loss": 0.2351, "step": 46948 }, { "epoch": 3.803386260531432, "grad_norm": 0.05572928115725517, "learning_rate": 1.092308384715784e-05, "loss": 0.181, "step": 46949 }, { "epoch": 3.8034672715489304, "grad_norm": 0.07789760828018188, "learning_rate": 1.0918583194563211e-05, "loss": 0.2282, "step": 46950 }, { "epoch": 3.803548282566429, "grad_norm": 0.06651580333709717, "learning_rate": 1.0914082541968585e-05, "loss": 0.2261, "step": 46951 }, { "epoch": 3.8036292935839273, "grad_norm": 0.07149095833301544, "learning_rate": 1.090958188937396e-05, "loss": 0.2143, "step": 46952 }, { "epoch": 3.8037103046014256, "grad_norm": 0.07038377225399017, "learning_rate": 1.0905081236779334e-05, "loss": 0.2192, "step": 46953 }, { "epoch": 3.8037913156189243, "grad_norm": 0.07646401226520538, "learning_rate": 1.0900580584184706e-05, "loss": 0.2406, "step": 46954 }, { "epoch": 3.8038723266364225, "grad_norm": 0.09022786468267441, "learning_rate": 1.0896079931590081e-05, "loss": 0.2274, "step": 46955 }, { "epoch": 3.8039533376539207, "grad_norm": 0.0737614631652832, "learning_rate": 1.0891579278995455e-05, "loss": 0.2387, "step": 46956 }, { "epoch": 3.8040343486714194, "grad_norm": 0.06396791338920593, "learning_rate": 1.0887078626400828e-05, "loss": 0.2446, "step": 46957 }, { "epoch": 3.8041153596889177, "grad_norm": 0.09200059622526169, "learning_rate": 1.0882577973806204e-05, "loss": 0.225, "step": 46958 }, { "epoch": 3.804196370706416, "grad_norm": 0.07705798000097275, "learning_rate": 1.0878077321211576e-05, "loss": 0.2335, "step": 46959 }, { "epoch": 3.8042773817239146, "grad_norm": 0.06533969193696976, "learning_rate": 1.087357666861695e-05, "loss": 0.2366, "step": 46960 }, { "epoch": 3.804358392741413, "grad_norm": 0.06600935012102127, "learning_rate": 1.0869076016022324e-05, "loss": 0.2587, "step": 46961 }, { "epoch": 3.804439403758911, "grad_norm": 0.06288919597864151, "learning_rate": 1.0864575363427698e-05, "loss": 0.2536, "step": 46962 }, { "epoch": 3.80452041477641, "grad_norm": 0.09356596320867538, "learning_rate": 1.086007471083307e-05, "loss": 0.2503, "step": 46963 }, { "epoch": 3.804601425793908, "grad_norm": 0.0647389367222786, "learning_rate": 1.0855574058238445e-05, "loss": 0.2205, "step": 46964 }, { "epoch": 3.8046824368114063, "grad_norm": 0.06880980730056763, "learning_rate": 1.0851073405643819e-05, "loss": 0.2179, "step": 46965 }, { "epoch": 3.804763447828905, "grad_norm": 0.06399885565042496, "learning_rate": 1.0846572753049192e-05, "loss": 0.2462, "step": 46966 }, { "epoch": 3.804844458846403, "grad_norm": 0.07660163193941116, "learning_rate": 1.0842072100454566e-05, "loss": 0.2162, "step": 46967 }, { "epoch": 3.8049254698639015, "grad_norm": 0.06323090195655823, "learning_rate": 1.083757144785994e-05, "loss": 0.2289, "step": 46968 }, { "epoch": 3.8050064808814, "grad_norm": 0.05920364335179329, "learning_rate": 1.0833070795265313e-05, "loss": 0.2378, "step": 46969 }, { "epoch": 3.8050874918988984, "grad_norm": 0.07590261846780777, "learning_rate": 1.0828570142670689e-05, "loss": 0.205, "step": 46970 }, { "epoch": 3.8051685029163966, "grad_norm": 0.07777293771505356, "learning_rate": 1.082406949007606e-05, "loss": 0.2218, "step": 46971 }, { "epoch": 3.805249513933895, "grad_norm": 0.09066983312368393, "learning_rate": 1.0819568837481436e-05, "loss": 0.2632, "step": 46972 }, { "epoch": 3.805330524951393, "grad_norm": 0.07806780189275742, "learning_rate": 1.081506818488681e-05, "loss": 0.1912, "step": 46973 }, { "epoch": 3.805411535968892, "grad_norm": 0.0826018676161766, "learning_rate": 1.0810567532292183e-05, "loss": 0.2424, "step": 46974 }, { "epoch": 3.80549254698639, "grad_norm": 0.08856657892465591, "learning_rate": 1.0806066879697557e-05, "loss": 0.2551, "step": 46975 }, { "epoch": 3.8055735580038883, "grad_norm": 0.06531494110822678, "learning_rate": 1.080156622710293e-05, "loss": 0.223, "step": 46976 }, { "epoch": 3.805654569021387, "grad_norm": 0.07311249524354935, "learning_rate": 1.0797065574508304e-05, "loss": 0.2724, "step": 46977 }, { "epoch": 3.8057355800388852, "grad_norm": 0.060811128467321396, "learning_rate": 1.0792564921913677e-05, "loss": 0.2253, "step": 46978 }, { "epoch": 3.8058165910563835, "grad_norm": 0.06457604467868805, "learning_rate": 1.0788064269319053e-05, "loss": 0.2496, "step": 46979 }, { "epoch": 3.805897602073882, "grad_norm": 0.08397595584392548, "learning_rate": 1.0783563616724425e-05, "loss": 0.2444, "step": 46980 }, { "epoch": 3.8059786130913804, "grad_norm": 0.07146035879850388, "learning_rate": 1.07790629641298e-05, "loss": 0.2103, "step": 46981 }, { "epoch": 3.8060596241088787, "grad_norm": 0.08192295581102371, "learning_rate": 1.0774562311535174e-05, "loss": 0.2084, "step": 46982 }, { "epoch": 3.8061406351263773, "grad_norm": 0.057976480573415756, "learning_rate": 1.0770061658940547e-05, "loss": 0.2378, "step": 46983 }, { "epoch": 3.8062216461438756, "grad_norm": 0.05698695778846741, "learning_rate": 1.076556100634592e-05, "loss": 0.1849, "step": 46984 }, { "epoch": 3.806302657161374, "grad_norm": 0.06821150332689285, "learning_rate": 1.0761060353751294e-05, "loss": 0.2393, "step": 46985 }, { "epoch": 3.8063836681788725, "grad_norm": 0.07215702533721924, "learning_rate": 1.0756559701156668e-05, "loss": 0.2485, "step": 46986 }, { "epoch": 3.8064646791963708, "grad_norm": 0.08539849519729614, "learning_rate": 1.0752059048562042e-05, "loss": 0.2288, "step": 46987 }, { "epoch": 3.806545690213869, "grad_norm": 0.08093149214982986, "learning_rate": 1.0747558395967417e-05, "loss": 0.2345, "step": 46988 }, { "epoch": 3.8066267012313677, "grad_norm": 0.06490255147218704, "learning_rate": 1.0743057743372789e-05, "loss": 0.2143, "step": 46989 }, { "epoch": 3.806707712248866, "grad_norm": 0.08042438328266144, "learning_rate": 1.0738557090778164e-05, "loss": 0.2459, "step": 46990 }, { "epoch": 3.806788723266364, "grad_norm": 0.09989763796329498, "learning_rate": 1.0734056438183538e-05, "loss": 0.2358, "step": 46991 }, { "epoch": 3.806869734283863, "grad_norm": 0.06737187504768372, "learning_rate": 1.0729555785588911e-05, "loss": 0.2002, "step": 46992 }, { "epoch": 3.806950745301361, "grad_norm": 0.0732245147228241, "learning_rate": 1.0725055132994285e-05, "loss": 0.2308, "step": 46993 }, { "epoch": 3.8070317563188594, "grad_norm": 0.08116856962442398, "learning_rate": 1.0720554480399658e-05, "loss": 0.2367, "step": 46994 }, { "epoch": 3.8071127673363576, "grad_norm": 0.08011330664157867, "learning_rate": 1.0716053827805032e-05, "loss": 0.2478, "step": 46995 }, { "epoch": 3.807193778353856, "grad_norm": 0.06650631129741669, "learning_rate": 1.0711553175210406e-05, "loss": 0.2343, "step": 46996 }, { "epoch": 3.8072747893713546, "grad_norm": 0.075144462287426, "learning_rate": 1.070705252261578e-05, "loss": 0.2326, "step": 46997 }, { "epoch": 3.807355800388853, "grad_norm": 0.05947421118617058, "learning_rate": 1.0702551870021153e-05, "loss": 0.1932, "step": 46998 }, { "epoch": 3.807436811406351, "grad_norm": 0.08503783494234085, "learning_rate": 1.0698051217426528e-05, "loss": 0.2376, "step": 46999 }, { "epoch": 3.8075178224238497, "grad_norm": 0.06805943697690964, "learning_rate": 1.0693550564831902e-05, "loss": 0.1997, "step": 47000 }, { "epoch": 3.807598833441348, "grad_norm": 0.07300718128681183, "learning_rate": 1.0689049912237275e-05, "loss": 0.2294, "step": 47001 }, { "epoch": 3.807679844458846, "grad_norm": 0.06472918391227722, "learning_rate": 1.0684549259642649e-05, "loss": 0.2222, "step": 47002 }, { "epoch": 3.807760855476345, "grad_norm": 0.08821234107017517, "learning_rate": 1.0680048607048023e-05, "loss": 0.2618, "step": 47003 }, { "epoch": 3.807841866493843, "grad_norm": 0.061438728123903275, "learning_rate": 1.0675547954453396e-05, "loss": 0.2295, "step": 47004 }, { "epoch": 3.8079228775113414, "grad_norm": 0.0627550482749939, "learning_rate": 1.067104730185877e-05, "loss": 0.2232, "step": 47005 }, { "epoch": 3.80800388852884, "grad_norm": 0.07976122945547104, "learning_rate": 1.0666546649264143e-05, "loss": 0.2091, "step": 47006 }, { "epoch": 3.8080848995463383, "grad_norm": 0.06467067450284958, "learning_rate": 1.0662045996669517e-05, "loss": 0.2178, "step": 47007 }, { "epoch": 3.8081659105638366, "grad_norm": 0.07180802524089813, "learning_rate": 1.0657545344074892e-05, "loss": 0.1963, "step": 47008 }, { "epoch": 3.8082469215813353, "grad_norm": 0.06795401126146317, "learning_rate": 1.0653044691480266e-05, "loss": 0.2451, "step": 47009 }, { "epoch": 3.8083279325988335, "grad_norm": 0.10496599227190018, "learning_rate": 1.0648544038885638e-05, "loss": 0.2441, "step": 47010 }, { "epoch": 3.8084089436163318, "grad_norm": 0.0665077492594719, "learning_rate": 1.0644043386291013e-05, "loss": 0.2213, "step": 47011 }, { "epoch": 3.8084899546338304, "grad_norm": 0.07035201042890549, "learning_rate": 1.0639542733696387e-05, "loss": 0.2269, "step": 47012 }, { "epoch": 3.8085709656513287, "grad_norm": 0.07154831290245056, "learning_rate": 1.063504208110176e-05, "loss": 0.2242, "step": 47013 }, { "epoch": 3.808651976668827, "grad_norm": 0.0710812658071518, "learning_rate": 1.0630541428507134e-05, "loss": 0.2438, "step": 47014 }, { "epoch": 3.808732987686325, "grad_norm": 0.07493715733289719, "learning_rate": 1.0626040775912508e-05, "loss": 0.1937, "step": 47015 }, { "epoch": 3.808813998703824, "grad_norm": 0.0741448625922203, "learning_rate": 1.0621540123317881e-05, "loss": 0.2626, "step": 47016 }, { "epoch": 3.808895009721322, "grad_norm": 0.07966463267803192, "learning_rate": 1.0617039470723256e-05, "loss": 0.2063, "step": 47017 }, { "epoch": 3.8089760207388204, "grad_norm": 0.0726398155093193, "learning_rate": 1.061253881812863e-05, "loss": 0.2499, "step": 47018 }, { "epoch": 3.8090570317563186, "grad_norm": 0.07825640588998795, "learning_rate": 1.0608038165534002e-05, "loss": 0.2546, "step": 47019 }, { "epoch": 3.8091380427738173, "grad_norm": 0.07793726027011871, "learning_rate": 1.0603537512939377e-05, "loss": 0.22, "step": 47020 }, { "epoch": 3.8092190537913155, "grad_norm": 0.06841432303190231, "learning_rate": 1.059903686034475e-05, "loss": 0.186, "step": 47021 }, { "epoch": 3.809300064808814, "grad_norm": 0.06918314844369888, "learning_rate": 1.0594536207750124e-05, "loss": 0.2519, "step": 47022 }, { "epoch": 3.8093810758263125, "grad_norm": 0.07555932551622391, "learning_rate": 1.0590035555155498e-05, "loss": 0.2431, "step": 47023 }, { "epoch": 3.8094620868438107, "grad_norm": 0.06252846866846085, "learning_rate": 1.0585534902560872e-05, "loss": 0.2124, "step": 47024 }, { "epoch": 3.809543097861309, "grad_norm": 0.07834303379058838, "learning_rate": 1.0581034249966245e-05, "loss": 0.2625, "step": 47025 }, { "epoch": 3.8096241088788076, "grad_norm": 0.08204419165849686, "learning_rate": 1.057653359737162e-05, "loss": 0.2187, "step": 47026 }, { "epoch": 3.809705119896306, "grad_norm": 0.06510677188634872, "learning_rate": 1.0572032944776992e-05, "loss": 0.2681, "step": 47027 }, { "epoch": 3.809786130913804, "grad_norm": 0.06332890689373016, "learning_rate": 1.0567532292182366e-05, "loss": 0.2501, "step": 47028 }, { "epoch": 3.809867141931303, "grad_norm": 0.057162806391716, "learning_rate": 1.0563031639587741e-05, "loss": 0.2032, "step": 47029 }, { "epoch": 3.809948152948801, "grad_norm": 0.06294887512922287, "learning_rate": 1.0558530986993115e-05, "loss": 0.2121, "step": 47030 }, { "epoch": 3.8100291639662993, "grad_norm": 0.06234182044863701, "learning_rate": 1.0554030334398489e-05, "loss": 0.2268, "step": 47031 }, { "epoch": 3.810110174983798, "grad_norm": 0.06718173623085022, "learning_rate": 1.0549529681803862e-05, "loss": 0.2252, "step": 47032 }, { "epoch": 3.8101911860012962, "grad_norm": 0.07185987383127213, "learning_rate": 1.0545029029209236e-05, "loss": 0.24, "step": 47033 }, { "epoch": 3.8102721970187945, "grad_norm": 0.07447812706232071, "learning_rate": 1.054052837661461e-05, "loss": 0.2539, "step": 47034 }, { "epoch": 3.810353208036293, "grad_norm": 0.06620065122842789, "learning_rate": 1.0536027724019985e-05, "loss": 0.2141, "step": 47035 }, { "epoch": 3.8104342190537914, "grad_norm": 0.07579522579908371, "learning_rate": 1.0531527071425357e-05, "loss": 0.227, "step": 47036 }, { "epoch": 3.8105152300712897, "grad_norm": 0.07805155217647552, "learning_rate": 1.052702641883073e-05, "loss": 0.2272, "step": 47037 }, { "epoch": 3.810596241088788, "grad_norm": 0.06929910182952881, "learning_rate": 1.0522525766236105e-05, "loss": 0.2378, "step": 47038 }, { "epoch": 3.8106772521062866, "grad_norm": 0.0670161247253418, "learning_rate": 1.0518025113641479e-05, "loss": 0.1883, "step": 47039 }, { "epoch": 3.810758263123785, "grad_norm": 0.06071063503623009, "learning_rate": 1.0513524461046851e-05, "loss": 0.2017, "step": 47040 }, { "epoch": 3.810839274141283, "grad_norm": 0.07034777849912643, "learning_rate": 1.0509023808452226e-05, "loss": 0.2325, "step": 47041 }, { "epoch": 3.8109202851587813, "grad_norm": 0.0668913945555687, "learning_rate": 1.05045231558576e-05, "loss": 0.2328, "step": 47042 }, { "epoch": 3.81100129617628, "grad_norm": 0.07246463745832443, "learning_rate": 1.0500022503262973e-05, "loss": 0.2207, "step": 47043 }, { "epoch": 3.8110823071937783, "grad_norm": 0.07312662899494171, "learning_rate": 1.0495521850668349e-05, "loss": 0.1934, "step": 47044 }, { "epoch": 3.8111633182112765, "grad_norm": 0.08061020821332932, "learning_rate": 1.049102119807372e-05, "loss": 0.2648, "step": 47045 }, { "epoch": 3.811244329228775, "grad_norm": 0.06351017206907272, "learning_rate": 1.0486520545479094e-05, "loss": 0.2229, "step": 47046 }, { "epoch": 3.8113253402462735, "grad_norm": 0.06795711815357208, "learning_rate": 1.048201989288447e-05, "loss": 0.2055, "step": 47047 }, { "epoch": 3.8114063512637717, "grad_norm": 0.06773922592401505, "learning_rate": 1.0477519240289843e-05, "loss": 0.2385, "step": 47048 }, { "epoch": 3.8114873622812704, "grad_norm": 0.07266738265752792, "learning_rate": 1.0473018587695215e-05, "loss": 0.2618, "step": 47049 }, { "epoch": 3.8115683732987686, "grad_norm": 0.08389297127723694, "learning_rate": 1.046851793510059e-05, "loss": 0.2324, "step": 47050 }, { "epoch": 3.811649384316267, "grad_norm": 0.06552331149578094, "learning_rate": 1.0464017282505964e-05, "loss": 0.2226, "step": 47051 }, { "epoch": 3.8117303953337656, "grad_norm": 0.07753307372331619, "learning_rate": 1.0459516629911338e-05, "loss": 0.2392, "step": 47052 }, { "epoch": 3.811811406351264, "grad_norm": 0.07049240171909332, "learning_rate": 1.0455015977316711e-05, "loss": 0.1888, "step": 47053 }, { "epoch": 3.811892417368762, "grad_norm": 0.07749372720718384, "learning_rate": 1.0450515324722085e-05, "loss": 0.2226, "step": 47054 }, { "epoch": 3.8119734283862607, "grad_norm": 0.07106560468673706, "learning_rate": 1.0446014672127458e-05, "loss": 0.2248, "step": 47055 }, { "epoch": 3.812054439403759, "grad_norm": 0.0729873925447464, "learning_rate": 1.0441514019532834e-05, "loss": 0.2439, "step": 47056 }, { "epoch": 3.8121354504212572, "grad_norm": 0.06278953701257706, "learning_rate": 1.0437013366938206e-05, "loss": 0.1792, "step": 47057 }, { "epoch": 3.812216461438756, "grad_norm": 0.08032325655221939, "learning_rate": 1.043251271434358e-05, "loss": 0.2306, "step": 47058 }, { "epoch": 3.812297472456254, "grad_norm": 0.07312075048685074, "learning_rate": 1.0428012061748955e-05, "loss": 0.2361, "step": 47059 }, { "epoch": 3.8123784834737524, "grad_norm": 0.09086312353610992, "learning_rate": 1.0423511409154328e-05, "loss": 0.2511, "step": 47060 }, { "epoch": 3.8124594944912507, "grad_norm": 0.07702907919883728, "learning_rate": 1.0419010756559702e-05, "loss": 0.1923, "step": 47061 }, { "epoch": 3.8125405055087493, "grad_norm": 0.09750400483608246, "learning_rate": 1.0414510103965075e-05, "loss": 0.2327, "step": 47062 }, { "epoch": 3.8126215165262476, "grad_norm": 0.0724315345287323, "learning_rate": 1.0410009451370449e-05, "loss": 0.2207, "step": 47063 }, { "epoch": 3.812702527543746, "grad_norm": 0.06693287193775177, "learning_rate": 1.0405508798775823e-05, "loss": 0.2127, "step": 47064 }, { "epoch": 3.812783538561244, "grad_norm": 0.07102993130683899, "learning_rate": 1.0401008146181198e-05, "loss": 0.2034, "step": 47065 }, { "epoch": 3.8128645495787428, "grad_norm": 0.07117815315723419, "learning_rate": 1.039650749358657e-05, "loss": 0.2773, "step": 47066 }, { "epoch": 3.812945560596241, "grad_norm": 0.06364418566226959, "learning_rate": 1.0392006840991943e-05, "loss": 0.243, "step": 47067 }, { "epoch": 3.8130265716137393, "grad_norm": 0.10085894912481308, "learning_rate": 1.0387506188397319e-05, "loss": 0.2299, "step": 47068 }, { "epoch": 3.813107582631238, "grad_norm": 0.06981120258569717, "learning_rate": 1.0383005535802692e-05, "loss": 0.1809, "step": 47069 }, { "epoch": 3.813188593648736, "grad_norm": 0.06516273319721222, "learning_rate": 1.0378504883208064e-05, "loss": 0.1991, "step": 47070 }, { "epoch": 3.8132696046662344, "grad_norm": 0.08382715284824371, "learning_rate": 1.037400423061344e-05, "loss": 0.2271, "step": 47071 }, { "epoch": 3.813350615683733, "grad_norm": 0.08625270426273346, "learning_rate": 1.0369503578018813e-05, "loss": 0.2135, "step": 47072 }, { "epoch": 3.8134316267012314, "grad_norm": 0.07801330834627151, "learning_rate": 1.0365002925424187e-05, "loss": 0.2405, "step": 47073 }, { "epoch": 3.8135126377187296, "grad_norm": 0.08635073155164719, "learning_rate": 1.0360502272829562e-05, "loss": 0.2164, "step": 47074 }, { "epoch": 3.8135936487362283, "grad_norm": 0.07388366758823395, "learning_rate": 1.0356001620234934e-05, "loss": 0.2158, "step": 47075 }, { "epoch": 3.8136746597537265, "grad_norm": 0.06860913336277008, "learning_rate": 1.0351500967640307e-05, "loss": 0.2161, "step": 47076 }, { "epoch": 3.813755670771225, "grad_norm": 0.07081514596939087, "learning_rate": 1.0347000315045683e-05, "loss": 0.2519, "step": 47077 }, { "epoch": 3.8138366817887235, "grad_norm": 0.07424487173557281, "learning_rate": 1.0342499662451056e-05, "loss": 0.2206, "step": 47078 }, { "epoch": 3.8139176928062217, "grad_norm": 0.07972376048564911, "learning_rate": 1.033799900985643e-05, "loss": 0.2378, "step": 47079 }, { "epoch": 3.81399870382372, "grad_norm": 0.07115171104669571, "learning_rate": 1.0333498357261804e-05, "loss": 0.2134, "step": 47080 }, { "epoch": 3.8140797148412187, "grad_norm": 0.07627512514591217, "learning_rate": 1.0328997704667177e-05, "loss": 0.19, "step": 47081 }, { "epoch": 3.814160725858717, "grad_norm": 0.07725601643323898, "learning_rate": 1.032449705207255e-05, "loss": 0.2713, "step": 47082 }, { "epoch": 3.814241736876215, "grad_norm": 0.07265699654817581, "learning_rate": 1.0319996399477924e-05, "loss": 0.2425, "step": 47083 }, { "epoch": 3.8143227478937134, "grad_norm": 0.07284402847290039, "learning_rate": 1.0315495746883298e-05, "loss": 0.2464, "step": 47084 }, { "epoch": 3.814403758911212, "grad_norm": 0.07427023351192474, "learning_rate": 1.0310995094288672e-05, "loss": 0.2099, "step": 47085 }, { "epoch": 3.8144847699287103, "grad_norm": 0.05913237854838371, "learning_rate": 1.0306494441694047e-05, "loss": 0.1989, "step": 47086 }, { "epoch": 3.8145657809462086, "grad_norm": 0.07824961096048355, "learning_rate": 1.030199378909942e-05, "loss": 0.2294, "step": 47087 }, { "epoch": 3.814646791963707, "grad_norm": 0.06919219344854355, "learning_rate": 1.0297493136504794e-05, "loss": 0.2452, "step": 47088 }, { "epoch": 3.8147278029812055, "grad_norm": 0.09075573086738586, "learning_rate": 1.0292992483910168e-05, "loss": 0.2306, "step": 47089 }, { "epoch": 3.8148088139987038, "grad_norm": 0.07499659061431885, "learning_rate": 1.0288491831315541e-05, "loss": 0.1971, "step": 47090 }, { "epoch": 3.814889825016202, "grad_norm": 0.06980670988559723, "learning_rate": 1.0283991178720915e-05, "loss": 0.2373, "step": 47091 }, { "epoch": 3.8149708360337007, "grad_norm": 0.06824301928281784, "learning_rate": 1.0279490526126289e-05, "loss": 0.2478, "step": 47092 }, { "epoch": 3.815051847051199, "grad_norm": 0.06680431962013245, "learning_rate": 1.0274989873531662e-05, "loss": 0.2322, "step": 47093 }, { "epoch": 3.815132858068697, "grad_norm": 0.06365301460027695, "learning_rate": 1.0270489220937036e-05, "loss": 0.2449, "step": 47094 }, { "epoch": 3.815213869086196, "grad_norm": 0.07429978251457214, "learning_rate": 1.0265988568342411e-05, "loss": 0.2178, "step": 47095 }, { "epoch": 3.815294880103694, "grad_norm": 0.07181497663259506, "learning_rate": 1.0261487915747783e-05, "loss": 0.2281, "step": 47096 }, { "epoch": 3.8153758911211924, "grad_norm": 0.06791014224290848, "learning_rate": 1.0256987263153158e-05, "loss": 0.2259, "step": 47097 }, { "epoch": 3.815456902138691, "grad_norm": 0.08312923461198807, "learning_rate": 1.0252486610558532e-05, "loss": 0.2231, "step": 47098 }, { "epoch": 3.8155379131561893, "grad_norm": 0.07397427409887314, "learning_rate": 1.0247985957963905e-05, "loss": 0.2101, "step": 47099 }, { "epoch": 3.8156189241736875, "grad_norm": 0.08106406778097153, "learning_rate": 1.0243485305369279e-05, "loss": 0.2218, "step": 47100 }, { "epoch": 3.815699935191186, "grad_norm": 0.07498326152563095, "learning_rate": 1.0238984652774653e-05, "loss": 0.2559, "step": 47101 }, { "epoch": 3.8157809462086845, "grad_norm": 0.07188771665096283, "learning_rate": 1.0234484000180026e-05, "loss": 0.2475, "step": 47102 }, { "epoch": 3.8158619572261827, "grad_norm": 0.07121875882148743, "learning_rate": 1.02299833475854e-05, "loss": 0.238, "step": 47103 }, { "epoch": 3.8159429682436814, "grad_norm": 0.067873515188694, "learning_rate": 1.0225482694990775e-05, "loss": 0.1926, "step": 47104 }, { "epoch": 3.8160239792611796, "grad_norm": 0.07478199154138565, "learning_rate": 1.0220982042396147e-05, "loss": 0.245, "step": 47105 }, { "epoch": 3.816104990278678, "grad_norm": 0.06381670385599136, "learning_rate": 1.0216481389801522e-05, "loss": 0.265, "step": 47106 }, { "epoch": 3.816186001296176, "grad_norm": 0.07955259829759598, "learning_rate": 1.0211980737206896e-05, "loss": 0.2075, "step": 47107 }, { "epoch": 3.816267012313675, "grad_norm": 0.09649759531021118, "learning_rate": 1.020748008461227e-05, "loss": 0.236, "step": 47108 }, { "epoch": 3.816348023331173, "grad_norm": 0.06495945900678635, "learning_rate": 1.0202979432017643e-05, "loss": 0.2247, "step": 47109 }, { "epoch": 3.8164290343486713, "grad_norm": 0.07418833673000336, "learning_rate": 1.0198478779423017e-05, "loss": 0.2183, "step": 47110 }, { "epoch": 3.8165100453661696, "grad_norm": 0.0698823407292366, "learning_rate": 1.019397812682839e-05, "loss": 0.1966, "step": 47111 }, { "epoch": 3.8165910563836682, "grad_norm": 0.08725552260875702, "learning_rate": 1.0189477474233764e-05, "loss": 0.2079, "step": 47112 }, { "epoch": 3.8166720674011665, "grad_norm": 0.060063328593969345, "learning_rate": 1.0184976821639138e-05, "loss": 0.1972, "step": 47113 }, { "epoch": 3.8167530784186647, "grad_norm": 0.06648138910531998, "learning_rate": 1.0180476169044511e-05, "loss": 0.2085, "step": 47114 }, { "epoch": 3.8168340894361634, "grad_norm": 0.06786450743675232, "learning_rate": 1.0175975516449886e-05, "loss": 0.1979, "step": 47115 }, { "epoch": 3.8169151004536617, "grad_norm": 0.06979463994503021, "learning_rate": 1.017147486385526e-05, "loss": 0.2417, "step": 47116 }, { "epoch": 3.81699611147116, "grad_norm": 0.08767568320035934, "learning_rate": 1.0166974211260634e-05, "loss": 0.2051, "step": 47117 }, { "epoch": 3.8170771224886586, "grad_norm": 0.061907850205898285, "learning_rate": 1.0162473558666007e-05, "loss": 0.2234, "step": 47118 }, { "epoch": 3.817158133506157, "grad_norm": 0.06363208591938019, "learning_rate": 1.0157972906071381e-05, "loss": 0.1849, "step": 47119 }, { "epoch": 3.817239144523655, "grad_norm": 0.07457547634840012, "learning_rate": 1.0153472253476755e-05, "loss": 0.2401, "step": 47120 }, { "epoch": 3.817320155541154, "grad_norm": 0.07944779843091965, "learning_rate": 1.0148971600882128e-05, "loss": 0.2281, "step": 47121 }, { "epoch": 3.817401166558652, "grad_norm": 0.07503432035446167, "learning_rate": 1.0144470948287502e-05, "loss": 0.2235, "step": 47122 }, { "epoch": 3.8174821775761503, "grad_norm": 0.07481568306684494, "learning_rate": 1.0139970295692875e-05, "loss": 0.2295, "step": 47123 }, { "epoch": 3.817563188593649, "grad_norm": 0.07879388332366943, "learning_rate": 1.013546964309825e-05, "loss": 0.2554, "step": 47124 }, { "epoch": 3.817644199611147, "grad_norm": 0.0720246359705925, "learning_rate": 1.0130968990503624e-05, "loss": 0.1888, "step": 47125 }, { "epoch": 3.8177252106286454, "grad_norm": 0.08811241388320923, "learning_rate": 1.0126468337908996e-05, "loss": 0.2323, "step": 47126 }, { "epoch": 3.817806221646144, "grad_norm": 0.06210675463080406, "learning_rate": 1.0121967685314371e-05, "loss": 0.2095, "step": 47127 }, { "epoch": 3.8178872326636424, "grad_norm": 0.0800977572798729, "learning_rate": 1.0117467032719745e-05, "loss": 0.2031, "step": 47128 }, { "epoch": 3.8179682436811406, "grad_norm": 0.06897516548633575, "learning_rate": 1.0112966380125119e-05, "loss": 0.2186, "step": 47129 }, { "epoch": 3.818049254698639, "grad_norm": 0.06213768571615219, "learning_rate": 1.0108465727530494e-05, "loss": 0.229, "step": 47130 }, { "epoch": 3.818130265716137, "grad_norm": 0.07513062655925751, "learning_rate": 1.0103965074935866e-05, "loss": 0.2606, "step": 47131 }, { "epoch": 3.818211276733636, "grad_norm": 0.0878511592745781, "learning_rate": 1.009946442234124e-05, "loss": 0.2577, "step": 47132 }, { "epoch": 3.818292287751134, "grad_norm": 0.10174962133169174, "learning_rate": 1.0094963769746615e-05, "loss": 0.2628, "step": 47133 }, { "epoch": 3.8183732987686323, "grad_norm": 0.08247354626655579, "learning_rate": 1.0090463117151988e-05, "loss": 0.223, "step": 47134 }, { "epoch": 3.818454309786131, "grad_norm": 0.10168582201004028, "learning_rate": 1.008596246455736e-05, "loss": 0.2132, "step": 47135 }, { "epoch": 3.8185353208036292, "grad_norm": 0.06533244997262955, "learning_rate": 1.0081461811962736e-05, "loss": 0.2634, "step": 47136 }, { "epoch": 3.8186163318211275, "grad_norm": 0.05962993949651718, "learning_rate": 1.0076961159368109e-05, "loss": 0.2111, "step": 47137 }, { "epoch": 3.818697342838626, "grad_norm": 0.06415306776762009, "learning_rate": 1.0072460506773483e-05, "loss": 0.2213, "step": 47138 }, { "epoch": 3.8187783538561244, "grad_norm": 0.0562756322324276, "learning_rate": 1.0067959854178856e-05, "loss": 0.19, "step": 47139 }, { "epoch": 3.8188593648736227, "grad_norm": 0.06657944619655609, "learning_rate": 1.006345920158423e-05, "loss": 0.2282, "step": 47140 }, { "epoch": 3.8189403758911213, "grad_norm": 0.06836859881877899, "learning_rate": 1.0058958548989604e-05, "loss": 0.2284, "step": 47141 }, { "epoch": 3.8190213869086196, "grad_norm": 0.07675183564424515, "learning_rate": 1.0054457896394979e-05, "loss": 0.2459, "step": 47142 }, { "epoch": 3.819102397926118, "grad_norm": 0.08422303944826126, "learning_rate": 1.004995724380035e-05, "loss": 0.2848, "step": 47143 }, { "epoch": 3.8191834089436165, "grad_norm": 0.07882612198591232, "learning_rate": 1.0045456591205724e-05, "loss": 0.2573, "step": 47144 }, { "epoch": 3.8192644199611148, "grad_norm": 0.058982573449611664, "learning_rate": 1.00409559386111e-05, "loss": 0.2202, "step": 47145 }, { "epoch": 3.819345430978613, "grad_norm": 0.07594690471887589, "learning_rate": 1.0036455286016473e-05, "loss": 0.2612, "step": 47146 }, { "epoch": 3.8194264419961117, "grad_norm": 0.06958582997322083, "learning_rate": 1.0031954633421847e-05, "loss": 0.1971, "step": 47147 }, { "epoch": 3.81950745301361, "grad_norm": 0.09242573380470276, "learning_rate": 1.002745398082722e-05, "loss": 0.2689, "step": 47148 }, { "epoch": 3.819588464031108, "grad_norm": 0.08315322548151016, "learning_rate": 1.0022953328232594e-05, "loss": 0.2526, "step": 47149 }, { "epoch": 3.819669475048607, "grad_norm": 0.06824847310781479, "learning_rate": 1.0018452675637968e-05, "loss": 0.2435, "step": 47150 }, { "epoch": 3.819750486066105, "grad_norm": 0.07796567678451538, "learning_rate": 1.0013952023043343e-05, "loss": 0.221, "step": 47151 }, { "epoch": 3.8198314970836034, "grad_norm": 0.07060608267784119, "learning_rate": 1.0009451370448715e-05, "loss": 0.2133, "step": 47152 }, { "epoch": 3.8199125081011016, "grad_norm": 0.07457584887742996, "learning_rate": 1.0004950717854088e-05, "loss": 0.2156, "step": 47153 }, { "epoch": 3.8199935191186, "grad_norm": 0.09381989389657974, "learning_rate": 1.0000450065259464e-05, "loss": 0.2183, "step": 47154 }, { "epoch": 3.8200745301360985, "grad_norm": 0.0633532926440239, "learning_rate": 9.995949412664837e-06, "loss": 0.2166, "step": 47155 }, { "epoch": 3.820155541153597, "grad_norm": 0.06335076689720154, "learning_rate": 9.99144876007021e-06, "loss": 0.1908, "step": 47156 }, { "epoch": 3.820236552171095, "grad_norm": 0.0725879818201065, "learning_rate": 9.986948107475585e-06, "loss": 0.2631, "step": 47157 }, { "epoch": 3.8203175631885937, "grad_norm": 0.07099777460098267, "learning_rate": 9.982447454880958e-06, "loss": 0.2009, "step": 47158 }, { "epoch": 3.820398574206092, "grad_norm": 0.08141002804040909, "learning_rate": 9.977946802286332e-06, "loss": 0.2486, "step": 47159 }, { "epoch": 3.82047958522359, "grad_norm": 0.06455133110284805, "learning_rate": 9.973446149691707e-06, "loss": 0.2197, "step": 47160 }, { "epoch": 3.820560596241089, "grad_norm": 0.08412550389766693, "learning_rate": 9.968945497097079e-06, "loss": 0.2283, "step": 47161 }, { "epoch": 3.820641607258587, "grad_norm": 0.08144482970237732, "learning_rate": 9.964444844502453e-06, "loss": 0.198, "step": 47162 }, { "epoch": 3.8207226182760854, "grad_norm": 0.06888513267040253, "learning_rate": 9.959944191907828e-06, "loss": 0.2045, "step": 47163 }, { "epoch": 3.820803629293584, "grad_norm": 0.06686069816350937, "learning_rate": 9.955443539313202e-06, "loss": 0.1933, "step": 47164 }, { "epoch": 3.8208846403110823, "grad_norm": 0.1000877320766449, "learning_rate": 9.950942886718573e-06, "loss": 0.2362, "step": 47165 }, { "epoch": 3.8209656513285806, "grad_norm": 0.07413871586322784, "learning_rate": 9.946442234123949e-06, "loss": 0.2607, "step": 47166 }, { "epoch": 3.8210466623460793, "grad_norm": 0.07616769522428513, "learning_rate": 9.941941581529322e-06, "loss": 0.2294, "step": 47167 }, { "epoch": 3.8211276733635775, "grad_norm": 0.08212704211473465, "learning_rate": 9.937440928934696e-06, "loss": 0.208, "step": 47168 }, { "epoch": 3.8212086843810757, "grad_norm": 0.06919477134943008, "learning_rate": 9.93294027634007e-06, "loss": 0.2393, "step": 47169 }, { "epoch": 3.8212896953985744, "grad_norm": 0.07358191162347794, "learning_rate": 9.928439623745443e-06, "loss": 0.2176, "step": 47170 }, { "epoch": 3.8213707064160727, "grad_norm": 0.0823577418923378, "learning_rate": 9.923938971150817e-06, "loss": 0.2783, "step": 47171 }, { "epoch": 3.821451717433571, "grad_norm": 0.06688446551561356, "learning_rate": 9.919438318556192e-06, "loss": 0.2079, "step": 47172 }, { "epoch": 3.8215327284510696, "grad_norm": 0.0666053518652916, "learning_rate": 9.914937665961566e-06, "loss": 0.2031, "step": 47173 }, { "epoch": 3.821613739468568, "grad_norm": 0.08584258705377579, "learning_rate": 9.910437013366938e-06, "loss": 0.2129, "step": 47174 }, { "epoch": 3.821694750486066, "grad_norm": 0.07315590977668762, "learning_rate": 9.905936360772313e-06, "loss": 0.2045, "step": 47175 }, { "epoch": 3.8217757615035644, "grad_norm": 0.05562111362814903, "learning_rate": 9.901435708177686e-06, "loss": 0.181, "step": 47176 }, { "epoch": 3.8218567725210626, "grad_norm": 0.07396908849477768, "learning_rate": 9.89693505558306e-06, "loss": 0.2028, "step": 47177 }, { "epoch": 3.8219377835385613, "grad_norm": 0.0769689679145813, "learning_rate": 9.892434402988434e-06, "loss": 0.2081, "step": 47178 }, { "epoch": 3.8220187945560595, "grad_norm": 0.09181632101535797, "learning_rate": 9.887933750393807e-06, "loss": 0.2465, "step": 47179 }, { "epoch": 3.8220998055735578, "grad_norm": 0.07666897028684616, "learning_rate": 9.883433097799181e-06, "loss": 0.2228, "step": 47180 }, { "epoch": 3.8221808165910565, "grad_norm": 0.06831949949264526, "learning_rate": 9.878932445204556e-06, "loss": 0.2263, "step": 47181 }, { "epoch": 3.8222618276085547, "grad_norm": 0.06521642953157425, "learning_rate": 9.874431792609928e-06, "loss": 0.2294, "step": 47182 }, { "epoch": 3.822342838626053, "grad_norm": 0.07536672800779343, "learning_rate": 9.869931140015302e-06, "loss": 0.2143, "step": 47183 }, { "epoch": 3.8224238496435516, "grad_norm": 0.08660756051540375, "learning_rate": 9.865430487420677e-06, "loss": 0.2777, "step": 47184 }, { "epoch": 3.82250486066105, "grad_norm": 0.0728485956788063, "learning_rate": 9.86092983482605e-06, "loss": 0.2351, "step": 47185 }, { "epoch": 3.822585871678548, "grad_norm": 0.06775187700986862, "learning_rate": 9.856429182231424e-06, "loss": 0.2358, "step": 47186 }, { "epoch": 3.822666882696047, "grad_norm": 0.07535045593976974, "learning_rate": 9.851928529636798e-06, "loss": 0.2201, "step": 47187 }, { "epoch": 3.822747893713545, "grad_norm": 0.07898474484682083, "learning_rate": 9.847427877042171e-06, "loss": 0.2667, "step": 47188 }, { "epoch": 3.8228289047310433, "grad_norm": 0.08030825108289719, "learning_rate": 9.842927224447545e-06, "loss": 0.2676, "step": 47189 }, { "epoch": 3.822909915748542, "grad_norm": 0.07572636753320694, "learning_rate": 9.83842657185292e-06, "loss": 0.2874, "step": 47190 }, { "epoch": 3.8229909267660402, "grad_norm": 0.07902305573225021, "learning_rate": 9.833925919258292e-06, "loss": 0.2218, "step": 47191 }, { "epoch": 3.8230719377835385, "grad_norm": 0.07862576842308044, "learning_rate": 9.829425266663666e-06, "loss": 0.2515, "step": 47192 }, { "epoch": 3.823152948801037, "grad_norm": 0.07469526678323746, "learning_rate": 9.824924614069041e-06, "loss": 0.221, "step": 47193 }, { "epoch": 3.8232339598185354, "grad_norm": 0.07735791057348251, "learning_rate": 9.820423961474415e-06, "loss": 0.2396, "step": 47194 }, { "epoch": 3.8233149708360337, "grad_norm": 0.06498521566390991, "learning_rate": 9.815923308879788e-06, "loss": 0.2311, "step": 47195 }, { "epoch": 3.8233959818535324, "grad_norm": 0.07805728167295456, "learning_rate": 9.811422656285162e-06, "loss": 0.2454, "step": 47196 }, { "epoch": 3.8234769928710306, "grad_norm": 0.065180204808712, "learning_rate": 9.806922003690536e-06, "loss": 0.2308, "step": 47197 }, { "epoch": 3.823558003888529, "grad_norm": 0.07017407566308975, "learning_rate": 9.802421351095909e-06, "loss": 0.2326, "step": 47198 }, { "epoch": 3.823639014906027, "grad_norm": 0.08247072994709015, "learning_rate": 9.797920698501283e-06, "loss": 0.23, "step": 47199 }, { "epoch": 3.8237200259235253, "grad_norm": 0.06262676417827606, "learning_rate": 9.793420045906656e-06, "loss": 0.2121, "step": 47200 }, { "epoch": 3.823801036941024, "grad_norm": 0.10090301930904388, "learning_rate": 9.78891939331203e-06, "loss": 0.2373, "step": 47201 }, { "epoch": 3.8238820479585223, "grad_norm": 0.08574521541595459, "learning_rate": 9.784418740717405e-06, "loss": 0.2366, "step": 47202 }, { "epoch": 3.8239630589760205, "grad_norm": 0.0797303095459938, "learning_rate": 9.779918088122779e-06, "loss": 0.2047, "step": 47203 }, { "epoch": 3.824044069993519, "grad_norm": 0.07990000396966934, "learning_rate": 9.775417435528152e-06, "loss": 0.194, "step": 47204 }, { "epoch": 3.8241250810110174, "grad_norm": 0.06872112303972244, "learning_rate": 9.770916782933526e-06, "loss": 0.1983, "step": 47205 }, { "epoch": 3.8242060920285157, "grad_norm": 0.06418676674365997, "learning_rate": 9.7664161303389e-06, "loss": 0.2198, "step": 47206 }, { "epoch": 3.8242871030460144, "grad_norm": 0.0675126165151596, "learning_rate": 9.761915477744273e-06, "loss": 0.191, "step": 47207 }, { "epoch": 3.8243681140635126, "grad_norm": 0.0721009373664856, "learning_rate": 9.757414825149647e-06, "loss": 0.2582, "step": 47208 }, { "epoch": 3.824449125081011, "grad_norm": 0.05475969240069389, "learning_rate": 9.75291417255502e-06, "loss": 0.1922, "step": 47209 }, { "epoch": 3.8245301360985096, "grad_norm": 0.07951171696186066, "learning_rate": 9.748413519960394e-06, "loss": 0.2298, "step": 47210 }, { "epoch": 3.824611147116008, "grad_norm": 0.059227924793958664, "learning_rate": 9.74391286736577e-06, "loss": 0.2276, "step": 47211 }, { "epoch": 3.824692158133506, "grad_norm": 0.06179160252213478, "learning_rate": 9.739412214771141e-06, "loss": 0.2209, "step": 47212 }, { "epoch": 3.8247731691510047, "grad_norm": 0.06057712808251381, "learning_rate": 9.734911562176517e-06, "loss": 0.1778, "step": 47213 }, { "epoch": 3.824854180168503, "grad_norm": 0.06524763256311417, "learning_rate": 9.73041090958189e-06, "loss": 0.2312, "step": 47214 }, { "epoch": 3.8249351911860012, "grad_norm": 0.06625210493803024, "learning_rate": 9.725910256987264e-06, "loss": 0.2464, "step": 47215 }, { "epoch": 3.8250162022035, "grad_norm": 0.05672033131122589, "learning_rate": 9.721409604392637e-06, "loss": 0.2107, "step": 47216 }, { "epoch": 3.825097213220998, "grad_norm": 0.08716113865375519, "learning_rate": 9.716908951798011e-06, "loss": 0.2429, "step": 47217 }, { "epoch": 3.8251782242384964, "grad_norm": 0.09075607359409332, "learning_rate": 9.712408299203385e-06, "loss": 0.2329, "step": 47218 }, { "epoch": 3.8252592352559946, "grad_norm": 0.08668406307697296, "learning_rate": 9.707907646608758e-06, "loss": 0.2409, "step": 47219 }, { "epoch": 3.8253402462734933, "grad_norm": 0.07025102525949478, "learning_rate": 9.703406994014133e-06, "loss": 0.1961, "step": 47220 }, { "epoch": 3.8254212572909916, "grad_norm": 0.05928102135658264, "learning_rate": 9.698906341419505e-06, "loss": 0.2193, "step": 47221 }, { "epoch": 3.82550226830849, "grad_norm": 0.0666092187166214, "learning_rate": 9.69440568882488e-06, "loss": 0.2267, "step": 47222 }, { "epoch": 3.825583279325988, "grad_norm": 0.07980141043663025, "learning_rate": 9.689905036230254e-06, "loss": 0.232, "step": 47223 }, { "epoch": 3.8256642903434868, "grad_norm": 0.09296827018260956, "learning_rate": 9.685404383635628e-06, "loss": 0.2557, "step": 47224 }, { "epoch": 3.825745301360985, "grad_norm": 0.08211761713027954, "learning_rate": 9.680903731041001e-06, "loss": 0.2107, "step": 47225 }, { "epoch": 3.8258263123784833, "grad_norm": 0.0850875973701477, "learning_rate": 9.676403078446375e-06, "loss": 0.2087, "step": 47226 }, { "epoch": 3.825907323395982, "grad_norm": 0.0714445635676384, "learning_rate": 9.671902425851749e-06, "loss": 0.2302, "step": 47227 }, { "epoch": 3.82598833441348, "grad_norm": 0.08113661408424377, "learning_rate": 9.667401773257124e-06, "loss": 0.2145, "step": 47228 }, { "epoch": 3.8260693454309784, "grad_norm": 0.0769774541258812, "learning_rate": 9.662901120662496e-06, "loss": 0.2095, "step": 47229 }, { "epoch": 3.826150356448477, "grad_norm": 0.0717720314860344, "learning_rate": 9.65840046806787e-06, "loss": 0.2377, "step": 47230 }, { "epoch": 3.8262313674659754, "grad_norm": 0.06568815559148788, "learning_rate": 9.653899815473245e-06, "loss": 0.2288, "step": 47231 }, { "epoch": 3.8263123784834736, "grad_norm": 0.0737098678946495, "learning_rate": 9.649399162878618e-06, "loss": 0.2059, "step": 47232 }, { "epoch": 3.8263933895009723, "grad_norm": 0.06918147206306458, "learning_rate": 9.644898510283992e-06, "loss": 0.2238, "step": 47233 }, { "epoch": 3.8264744005184705, "grad_norm": 0.0679457038640976, "learning_rate": 9.640397857689366e-06, "loss": 0.2414, "step": 47234 }, { "epoch": 3.826555411535969, "grad_norm": 0.07485439628362656, "learning_rate": 9.63589720509474e-06, "loss": 0.1962, "step": 47235 }, { "epoch": 3.8266364225534675, "grad_norm": 0.07543614506721497, "learning_rate": 9.631396552500113e-06, "loss": 0.2554, "step": 47236 }, { "epoch": 3.8267174335709657, "grad_norm": 0.07579874992370605, "learning_rate": 9.626895899905488e-06, "loss": 0.1838, "step": 47237 }, { "epoch": 3.826798444588464, "grad_norm": 0.06702306121587753, "learning_rate": 9.62239524731086e-06, "loss": 0.246, "step": 47238 }, { "epoch": 3.8268794556059627, "grad_norm": 0.06871180236339569, "learning_rate": 9.617894594716234e-06, "loss": 0.232, "step": 47239 }, { "epoch": 3.826960466623461, "grad_norm": 0.0695720836520195, "learning_rate": 9.613393942121609e-06, "loss": 0.2181, "step": 47240 }, { "epoch": 3.827041477640959, "grad_norm": 0.07191573828458786, "learning_rate": 9.608893289526983e-06, "loss": 0.2541, "step": 47241 }, { "epoch": 3.8271224886584574, "grad_norm": 0.07103823870420456, "learning_rate": 9.604392636932354e-06, "loss": 0.2188, "step": 47242 }, { "epoch": 3.827203499675956, "grad_norm": 0.057614583522081375, "learning_rate": 9.59989198433773e-06, "loss": 0.247, "step": 47243 }, { "epoch": 3.8272845106934543, "grad_norm": 0.08780107647180557, "learning_rate": 9.595391331743103e-06, "loss": 0.2499, "step": 47244 }, { "epoch": 3.8273655217109526, "grad_norm": 0.07042431086301804, "learning_rate": 9.590890679148477e-06, "loss": 0.2408, "step": 47245 }, { "epoch": 3.827446532728451, "grad_norm": 0.09787672013044357, "learning_rate": 9.586390026553852e-06, "loss": 0.233, "step": 47246 }, { "epoch": 3.8275275437459495, "grad_norm": 0.05550922080874443, "learning_rate": 9.581889373959224e-06, "loss": 0.254, "step": 47247 }, { "epoch": 3.8276085547634477, "grad_norm": 0.06615682691335678, "learning_rate": 9.577388721364598e-06, "loss": 0.2103, "step": 47248 }, { "epoch": 3.827689565780946, "grad_norm": 0.0725463330745697, "learning_rate": 9.572888068769973e-06, "loss": 0.2129, "step": 47249 }, { "epoch": 3.8277705767984447, "grad_norm": 0.08158998191356659, "learning_rate": 9.568387416175347e-06, "loss": 0.2423, "step": 47250 }, { "epoch": 3.827851587815943, "grad_norm": 0.07956718653440475, "learning_rate": 9.563886763580719e-06, "loss": 0.2078, "step": 47251 }, { "epoch": 3.827932598833441, "grad_norm": 0.07327364385128021, "learning_rate": 9.559386110986094e-06, "loss": 0.2675, "step": 47252 }, { "epoch": 3.82801360985094, "grad_norm": 0.0754021480679512, "learning_rate": 9.554885458391467e-06, "loss": 0.2211, "step": 47253 }, { "epoch": 3.828094620868438, "grad_norm": 0.07733095437288284, "learning_rate": 9.550384805796841e-06, "loss": 0.2136, "step": 47254 }, { "epoch": 3.8281756318859363, "grad_norm": 0.0626547783613205, "learning_rate": 9.545884153202215e-06, "loss": 0.2049, "step": 47255 }, { "epoch": 3.828256642903435, "grad_norm": 0.0786009356379509, "learning_rate": 9.541383500607588e-06, "loss": 0.226, "step": 47256 }, { "epoch": 3.8283376539209333, "grad_norm": 0.07420649379491806, "learning_rate": 9.536882848012962e-06, "loss": 0.1857, "step": 47257 }, { "epoch": 3.8284186649384315, "grad_norm": 0.08987807482481003, "learning_rate": 9.532382195418337e-06, "loss": 0.2385, "step": 47258 }, { "epoch": 3.82849967595593, "grad_norm": 0.08396057784557343, "learning_rate": 9.527881542823709e-06, "loss": 0.227, "step": 47259 }, { "epoch": 3.8285806869734285, "grad_norm": 0.08503457903862, "learning_rate": 9.523380890229083e-06, "loss": 0.2937, "step": 47260 }, { "epoch": 3.8286616979909267, "grad_norm": 0.07922942191362381, "learning_rate": 9.518880237634458e-06, "loss": 0.2653, "step": 47261 }, { "epoch": 3.8287427090084254, "grad_norm": 0.07605444639921188, "learning_rate": 9.514379585039832e-06, "loss": 0.2179, "step": 47262 }, { "epoch": 3.8288237200259236, "grad_norm": 0.062154121696949005, "learning_rate": 9.509878932445205e-06, "loss": 0.2167, "step": 47263 }, { "epoch": 3.828904731043422, "grad_norm": 0.059175748378038406, "learning_rate": 9.505378279850579e-06, "loss": 0.2376, "step": 47264 }, { "epoch": 3.82898574206092, "grad_norm": 0.06250623613595963, "learning_rate": 9.500877627255952e-06, "loss": 0.2287, "step": 47265 }, { "epoch": 3.829066753078419, "grad_norm": 0.06687180697917938, "learning_rate": 9.496376974661326e-06, "loss": 0.2141, "step": 47266 }, { "epoch": 3.829147764095917, "grad_norm": 0.08554664254188538, "learning_rate": 9.491876322066701e-06, "loss": 0.2137, "step": 47267 }, { "epoch": 3.8292287751134153, "grad_norm": 0.07720212638378143, "learning_rate": 9.487375669472073e-06, "loss": 0.2481, "step": 47268 }, { "epoch": 3.8293097861309136, "grad_norm": 0.07368236035108566, "learning_rate": 9.482875016877447e-06, "loss": 0.1931, "step": 47269 }, { "epoch": 3.8293907971484122, "grad_norm": 0.07523944973945618, "learning_rate": 9.478374364282822e-06, "loss": 0.198, "step": 47270 }, { "epoch": 3.8294718081659105, "grad_norm": 0.07883183658123016, "learning_rate": 9.473873711688196e-06, "loss": 0.2222, "step": 47271 }, { "epoch": 3.8295528191834087, "grad_norm": 0.06268235296010971, "learning_rate": 9.469373059093568e-06, "loss": 0.2399, "step": 47272 }, { "epoch": 3.8296338302009074, "grad_norm": 0.06675557792186737, "learning_rate": 9.464872406498943e-06, "loss": 0.2021, "step": 47273 }, { "epoch": 3.8297148412184057, "grad_norm": 0.07962408661842346, "learning_rate": 9.460371753904317e-06, "loss": 0.2065, "step": 47274 }, { "epoch": 3.829795852235904, "grad_norm": 0.06950665265321732, "learning_rate": 9.45587110130969e-06, "loss": 0.2035, "step": 47275 }, { "epoch": 3.8298768632534026, "grad_norm": 0.07848667353391647, "learning_rate": 9.451370448715065e-06, "loss": 0.2406, "step": 47276 }, { "epoch": 3.829957874270901, "grad_norm": 0.07826760411262512, "learning_rate": 9.446869796120437e-06, "loss": 0.2433, "step": 47277 }, { "epoch": 3.830038885288399, "grad_norm": 0.06589142233133316, "learning_rate": 9.442369143525811e-06, "loss": 0.2277, "step": 47278 }, { "epoch": 3.8301198963058978, "grad_norm": 0.0657556802034378, "learning_rate": 9.437868490931186e-06, "loss": 0.2145, "step": 47279 }, { "epoch": 3.830200907323396, "grad_norm": 0.08655353635549545, "learning_rate": 9.43336783833656e-06, "loss": 0.2212, "step": 47280 }, { "epoch": 3.8302819183408943, "grad_norm": 0.06460024416446686, "learning_rate": 9.428867185741932e-06, "loss": 0.2437, "step": 47281 }, { "epoch": 3.830362929358393, "grad_norm": 0.06946887820959091, "learning_rate": 9.424366533147307e-06, "loss": 0.2367, "step": 47282 }, { "epoch": 3.830443940375891, "grad_norm": 0.07267460972070694, "learning_rate": 9.41986588055268e-06, "loss": 0.2402, "step": 47283 }, { "epoch": 3.8305249513933894, "grad_norm": 0.06545892357826233, "learning_rate": 9.415365227958054e-06, "loss": 0.243, "step": 47284 }, { "epoch": 3.830605962410888, "grad_norm": 0.07193044573068619, "learning_rate": 9.410864575363428e-06, "loss": 0.2309, "step": 47285 }, { "epoch": 3.8306869734283864, "grad_norm": 0.07626271992921829, "learning_rate": 9.406363922768801e-06, "loss": 0.2003, "step": 47286 }, { "epoch": 3.8307679844458846, "grad_norm": 0.06547614932060242, "learning_rate": 9.401863270174175e-06, "loss": 0.2529, "step": 47287 }, { "epoch": 3.830848995463383, "grad_norm": 0.07183431833982468, "learning_rate": 9.39736261757955e-06, "loss": 0.1978, "step": 47288 }, { "epoch": 3.8309300064808816, "grad_norm": 0.06502663344144821, "learning_rate": 9.392861964984924e-06, "loss": 0.2103, "step": 47289 }, { "epoch": 3.83101101749838, "grad_norm": 0.0723065510392189, "learning_rate": 9.388361312390296e-06, "loss": 0.2106, "step": 47290 }, { "epoch": 3.831092028515878, "grad_norm": 0.06067093834280968, "learning_rate": 9.383860659795671e-06, "loss": 0.2475, "step": 47291 }, { "epoch": 3.8311730395333763, "grad_norm": 0.0675438940525055, "learning_rate": 9.379360007201045e-06, "loss": 0.2143, "step": 47292 }, { "epoch": 3.831254050550875, "grad_norm": 0.08510125428438187, "learning_rate": 9.374859354606418e-06, "loss": 0.2459, "step": 47293 }, { "epoch": 3.8313350615683732, "grad_norm": 0.08360667526721954, "learning_rate": 9.370358702011792e-06, "loss": 0.2058, "step": 47294 }, { "epoch": 3.8314160725858715, "grad_norm": 0.06987543404102325, "learning_rate": 9.365858049417166e-06, "loss": 0.2325, "step": 47295 }, { "epoch": 3.83149708360337, "grad_norm": 0.07574071735143661, "learning_rate": 9.36135739682254e-06, "loss": 0.2243, "step": 47296 }, { "epoch": 3.8315780946208684, "grad_norm": 0.06666197627782822, "learning_rate": 9.356856744227914e-06, "loss": 0.2158, "step": 47297 }, { "epoch": 3.8316591056383666, "grad_norm": 0.08488959819078445, "learning_rate": 9.352356091633286e-06, "loss": 0.2025, "step": 47298 }, { "epoch": 3.8317401166558653, "grad_norm": 0.07799491286277771, "learning_rate": 9.34785543903866e-06, "loss": 0.209, "step": 47299 }, { "epoch": 3.8318211276733636, "grad_norm": 0.07381635904312134, "learning_rate": 9.343354786444035e-06, "loss": 0.2521, "step": 47300 }, { "epoch": 3.831902138690862, "grad_norm": 0.05815286561846733, "learning_rate": 9.338854133849409e-06, "loss": 0.2023, "step": 47301 }, { "epoch": 3.8319831497083605, "grad_norm": 0.08299100399017334, "learning_rate": 9.334353481254782e-06, "loss": 0.1874, "step": 47302 }, { "epoch": 3.8320641607258588, "grad_norm": 0.0680263414978981, "learning_rate": 9.329852828660156e-06, "loss": 0.2688, "step": 47303 }, { "epoch": 3.832145171743357, "grad_norm": 0.07331136614084244, "learning_rate": 9.32535217606553e-06, "loss": 0.2756, "step": 47304 }, { "epoch": 3.8322261827608557, "grad_norm": 0.07893449068069458, "learning_rate": 9.320851523470903e-06, "loss": 0.2511, "step": 47305 }, { "epoch": 3.832307193778354, "grad_norm": 0.06692652404308319, "learning_rate": 9.316350870876279e-06, "loss": 0.1834, "step": 47306 }, { "epoch": 3.832388204795852, "grad_norm": 0.10165359824895859, "learning_rate": 9.31185021828165e-06, "loss": 0.2666, "step": 47307 }, { "epoch": 3.832469215813351, "grad_norm": 0.06731588393449783, "learning_rate": 9.307349565687024e-06, "loss": 0.2005, "step": 47308 }, { "epoch": 3.832550226830849, "grad_norm": 0.09456529468297958, "learning_rate": 9.3028489130924e-06, "loss": 0.2468, "step": 47309 }, { "epoch": 3.8326312378483474, "grad_norm": 0.07092951238155365, "learning_rate": 9.298348260497773e-06, "loss": 0.2123, "step": 47310 }, { "epoch": 3.8327122488658456, "grad_norm": 0.07524535059928894, "learning_rate": 9.293847607903147e-06, "loss": 0.227, "step": 47311 }, { "epoch": 3.8327932598833443, "grad_norm": 0.0925503671169281, "learning_rate": 9.28934695530852e-06, "loss": 0.2284, "step": 47312 }, { "epoch": 3.8328742709008425, "grad_norm": 0.0901738703250885, "learning_rate": 9.284846302713894e-06, "loss": 0.2441, "step": 47313 }, { "epoch": 3.832955281918341, "grad_norm": 0.06687404960393906, "learning_rate": 9.280345650119267e-06, "loss": 0.2427, "step": 47314 }, { "epoch": 3.833036292935839, "grad_norm": 0.07051212340593338, "learning_rate": 9.275844997524641e-06, "loss": 0.2208, "step": 47315 }, { "epoch": 3.8331173039533377, "grad_norm": 0.08045321702957153, "learning_rate": 9.271344344930015e-06, "loss": 0.2244, "step": 47316 }, { "epoch": 3.833198314970836, "grad_norm": 0.06755134463310242, "learning_rate": 9.266843692335388e-06, "loss": 0.1857, "step": 47317 }, { "epoch": 3.833279325988334, "grad_norm": 0.06752123683691025, "learning_rate": 9.262343039740764e-06, "loss": 0.2187, "step": 47318 }, { "epoch": 3.833360337005833, "grad_norm": 0.07143384963274002, "learning_rate": 9.257842387146137e-06, "loss": 0.2296, "step": 47319 }, { "epoch": 3.833441348023331, "grad_norm": 0.06815242767333984, "learning_rate": 9.25334173455151e-06, "loss": 0.1858, "step": 47320 }, { "epoch": 3.8335223590408294, "grad_norm": 0.08003578335046768, "learning_rate": 9.248841081956884e-06, "loss": 0.2072, "step": 47321 }, { "epoch": 3.833603370058328, "grad_norm": 0.07147976011037827, "learning_rate": 9.244340429362258e-06, "loss": 0.2295, "step": 47322 }, { "epoch": 3.8336843810758263, "grad_norm": 0.07325585186481476, "learning_rate": 9.239839776767632e-06, "loss": 0.2423, "step": 47323 }, { "epoch": 3.8337653920933246, "grad_norm": 0.06136981397867203, "learning_rate": 9.235339124173005e-06, "loss": 0.1829, "step": 47324 }, { "epoch": 3.8338464031108233, "grad_norm": 0.07087627798318863, "learning_rate": 9.230838471578379e-06, "loss": 0.1777, "step": 47325 }, { "epoch": 3.8339274141283215, "grad_norm": 0.07408089935779572, "learning_rate": 9.226337818983752e-06, "loss": 0.2293, "step": 47326 }, { "epoch": 3.8340084251458197, "grad_norm": 0.08621295541524887, "learning_rate": 9.221837166389128e-06, "loss": 0.2657, "step": 47327 }, { "epoch": 3.8340894361633184, "grad_norm": 0.06372913718223572, "learning_rate": 9.2173365137945e-06, "loss": 0.2012, "step": 47328 }, { "epoch": 3.8341704471808167, "grad_norm": 0.07300638407468796, "learning_rate": 9.212835861199875e-06, "loss": 0.2104, "step": 47329 }, { "epoch": 3.834251458198315, "grad_norm": 0.05497792363166809, "learning_rate": 9.208335208605248e-06, "loss": 0.2106, "step": 47330 }, { "epoch": 3.8343324692158136, "grad_norm": 0.06223801150918007, "learning_rate": 9.203834556010622e-06, "loss": 0.1981, "step": 47331 }, { "epoch": 3.834413480233312, "grad_norm": 0.08193052560091019, "learning_rate": 9.199333903415996e-06, "loss": 0.2798, "step": 47332 }, { "epoch": 3.83449449125081, "grad_norm": 0.07334339618682861, "learning_rate": 9.19483325082137e-06, "loss": 0.2237, "step": 47333 }, { "epoch": 3.8345755022683083, "grad_norm": 0.07783214747905731, "learning_rate": 9.190332598226743e-06, "loss": 0.2113, "step": 47334 }, { "epoch": 3.834656513285807, "grad_norm": 0.07094062864780426, "learning_rate": 9.185831945632118e-06, "loss": 0.2396, "step": 47335 }, { "epoch": 3.8347375243033053, "grad_norm": 0.07203572988510132, "learning_rate": 9.181331293037492e-06, "loss": 0.2329, "step": 47336 }, { "epoch": 3.8348185353208035, "grad_norm": 0.06293873488903046, "learning_rate": 9.176830640442864e-06, "loss": 0.2344, "step": 47337 }, { "epoch": 3.8348995463383018, "grad_norm": 0.07198001444339752, "learning_rate": 9.172329987848239e-06, "loss": 0.2002, "step": 47338 }, { "epoch": 3.8349805573558005, "grad_norm": 0.07168415933847427, "learning_rate": 9.167829335253613e-06, "loss": 0.2593, "step": 47339 }, { "epoch": 3.8350615683732987, "grad_norm": 0.06184476613998413, "learning_rate": 9.163328682658986e-06, "loss": 0.2313, "step": 47340 }, { "epoch": 3.835142579390797, "grad_norm": 0.05653228610754013, "learning_rate": 9.15882803006436e-06, "loss": 0.1854, "step": 47341 }, { "epoch": 3.8352235904082956, "grad_norm": 0.06754690408706665, "learning_rate": 9.154327377469733e-06, "loss": 0.2537, "step": 47342 }, { "epoch": 3.835304601425794, "grad_norm": 0.07572982460260391, "learning_rate": 9.149826724875107e-06, "loss": 0.264, "step": 47343 }, { "epoch": 3.835385612443292, "grad_norm": 0.07662025839090347, "learning_rate": 9.145326072280482e-06, "loss": 0.2123, "step": 47344 }, { "epoch": 3.835466623460791, "grad_norm": 0.07293036580085754, "learning_rate": 9.140825419685854e-06, "loss": 0.231, "step": 47345 }, { "epoch": 3.835547634478289, "grad_norm": 0.061868395656347275, "learning_rate": 9.136324767091228e-06, "loss": 0.2088, "step": 47346 }, { "epoch": 3.8356286454957873, "grad_norm": 0.05718451365828514, "learning_rate": 9.131824114496603e-06, "loss": 0.2455, "step": 47347 }, { "epoch": 3.835709656513286, "grad_norm": 0.06627894937992096, "learning_rate": 9.127323461901977e-06, "loss": 0.2288, "step": 47348 }, { "epoch": 3.8357906675307842, "grad_norm": 0.08713270723819733, "learning_rate": 9.12282280930735e-06, "loss": 0.2371, "step": 47349 }, { "epoch": 3.8358716785482825, "grad_norm": 0.05953427404165268, "learning_rate": 9.118322156712724e-06, "loss": 0.214, "step": 47350 }, { "epoch": 3.835952689565781, "grad_norm": 0.06580721586942673, "learning_rate": 9.113821504118098e-06, "loss": 0.2079, "step": 47351 }, { "epoch": 3.8360337005832794, "grad_norm": 0.08798111230134964, "learning_rate": 9.109320851523471e-06, "loss": 0.2701, "step": 47352 }, { "epoch": 3.8361147116007777, "grad_norm": 0.0650232806801796, "learning_rate": 9.104820198928846e-06, "loss": 0.2124, "step": 47353 }, { "epoch": 3.8361957226182763, "grad_norm": 0.0631076991558075, "learning_rate": 9.100319546334218e-06, "loss": 0.2093, "step": 47354 }, { "epoch": 3.8362767336357746, "grad_norm": 0.0658016949892044, "learning_rate": 9.095818893739592e-06, "loss": 0.2163, "step": 47355 }, { "epoch": 3.836357744653273, "grad_norm": 0.06750229001045227, "learning_rate": 9.091318241144967e-06, "loss": 0.2013, "step": 47356 }, { "epoch": 3.836438755670771, "grad_norm": 0.07032305002212524, "learning_rate": 9.08681758855034e-06, "loss": 0.2533, "step": 47357 }, { "epoch": 3.8365197666882693, "grad_norm": 0.0590241402387619, "learning_rate": 9.082316935955713e-06, "loss": 0.2212, "step": 47358 }, { "epoch": 3.836600777705768, "grad_norm": 0.06706096976995468, "learning_rate": 9.077816283361088e-06, "loss": 0.2238, "step": 47359 }, { "epoch": 3.8366817887232663, "grad_norm": 0.08542868494987488, "learning_rate": 9.073315630766462e-06, "loss": 0.2208, "step": 47360 }, { "epoch": 3.8367627997407645, "grad_norm": 0.06941386312246323, "learning_rate": 9.068814978171835e-06, "loss": 0.2698, "step": 47361 }, { "epoch": 3.836843810758263, "grad_norm": 0.060006238520145416, "learning_rate": 9.06431432557721e-06, "loss": 0.1835, "step": 47362 }, { "epoch": 3.8369248217757614, "grad_norm": 0.06161088868975639, "learning_rate": 9.059813672982582e-06, "loss": 0.204, "step": 47363 }, { "epoch": 3.8370058327932597, "grad_norm": 0.0850311890244484, "learning_rate": 9.055313020387956e-06, "loss": 0.2353, "step": 47364 }, { "epoch": 3.8370868438107584, "grad_norm": 0.06477893143892288, "learning_rate": 9.050812367793331e-06, "loss": 0.2299, "step": 47365 }, { "epoch": 3.8371678548282566, "grad_norm": 0.06894608587026596, "learning_rate": 9.046311715198705e-06, "loss": 0.232, "step": 47366 }, { "epoch": 3.837248865845755, "grad_norm": 0.07398436218500137, "learning_rate": 9.041811062604077e-06, "loss": 0.2443, "step": 47367 }, { "epoch": 3.8373298768632536, "grad_norm": 0.07592013478279114, "learning_rate": 9.037310410009452e-06, "loss": 0.2166, "step": 47368 }, { "epoch": 3.837410887880752, "grad_norm": 0.07261329144239426, "learning_rate": 9.032809757414826e-06, "loss": 0.2163, "step": 47369 }, { "epoch": 3.83749189889825, "grad_norm": 0.0708971843123436, "learning_rate": 9.0283091048202e-06, "loss": 0.256, "step": 47370 }, { "epoch": 3.8375729099157487, "grad_norm": 0.07381059974431992, "learning_rate": 9.023808452225573e-06, "loss": 0.2307, "step": 47371 }, { "epoch": 3.837653920933247, "grad_norm": 0.04990634694695473, "learning_rate": 9.019307799630947e-06, "loss": 0.2111, "step": 47372 }, { "epoch": 3.837734931950745, "grad_norm": 0.06506974995136261, "learning_rate": 9.01480714703632e-06, "loss": 0.2267, "step": 47373 }, { "epoch": 3.837815942968244, "grad_norm": 0.08760213106870651, "learning_rate": 9.010306494441695e-06, "loss": 0.2154, "step": 47374 }, { "epoch": 3.837896953985742, "grad_norm": 0.07203999906778336, "learning_rate": 9.005805841847069e-06, "loss": 0.2392, "step": 47375 }, { "epoch": 3.8379779650032404, "grad_norm": 0.0878889188170433, "learning_rate": 9.001305189252441e-06, "loss": 0.2725, "step": 47376 }, { "epoch": 3.838058976020739, "grad_norm": 0.08162927627563477, "learning_rate": 8.996804536657816e-06, "loss": 0.217, "step": 47377 }, { "epoch": 3.8381399870382373, "grad_norm": 0.07394520938396454, "learning_rate": 8.99230388406319e-06, "loss": 0.185, "step": 47378 }, { "epoch": 3.8382209980557356, "grad_norm": 0.0606919601559639, "learning_rate": 8.987803231468564e-06, "loss": 0.2269, "step": 47379 }, { "epoch": 3.838302009073234, "grad_norm": 0.07362279295921326, "learning_rate": 8.983302578873937e-06, "loss": 0.2205, "step": 47380 }, { "epoch": 3.838383020090732, "grad_norm": 0.06929288804531097, "learning_rate": 8.97880192627931e-06, "loss": 0.1886, "step": 47381 }, { "epoch": 3.8384640311082308, "grad_norm": 0.05175120756030083, "learning_rate": 8.974301273684684e-06, "loss": 0.1816, "step": 47382 }, { "epoch": 3.838545042125729, "grad_norm": 0.06899210810661316, "learning_rate": 8.96980062109006e-06, "loss": 0.2392, "step": 47383 }, { "epoch": 3.8386260531432272, "grad_norm": 0.07615984231233597, "learning_rate": 8.965299968495432e-06, "loss": 0.2399, "step": 47384 }, { "epoch": 3.838707064160726, "grad_norm": 0.08293159306049347, "learning_rate": 8.960799315900805e-06, "loss": 0.2523, "step": 47385 }, { "epoch": 3.838788075178224, "grad_norm": 0.06818405538797379, "learning_rate": 8.95629866330618e-06, "loss": 0.2465, "step": 47386 }, { "epoch": 3.8388690861957224, "grad_norm": 0.07864660769701004, "learning_rate": 8.951798010711554e-06, "loss": 0.2487, "step": 47387 }, { "epoch": 3.838950097213221, "grad_norm": 0.06878384947776794, "learning_rate": 8.947297358116926e-06, "loss": 0.2125, "step": 47388 }, { "epoch": 3.8390311082307194, "grad_norm": 0.08059203624725342, "learning_rate": 8.942796705522301e-06, "loss": 0.1948, "step": 47389 }, { "epoch": 3.8391121192482176, "grad_norm": 0.07346974313259125, "learning_rate": 8.938296052927675e-06, "loss": 0.2186, "step": 47390 }, { "epoch": 3.8391931302657163, "grad_norm": 0.06083647534251213, "learning_rate": 8.933795400333048e-06, "loss": 0.2405, "step": 47391 }, { "epoch": 3.8392741412832145, "grad_norm": 0.07009142637252808, "learning_rate": 8.929294747738424e-06, "loss": 0.2472, "step": 47392 }, { "epoch": 3.839355152300713, "grad_norm": 0.07019902765750885, "learning_rate": 8.924794095143796e-06, "loss": 0.1912, "step": 47393 }, { "epoch": 3.8394361633182115, "grad_norm": 0.07780317962169647, "learning_rate": 8.92029344254917e-06, "loss": 0.2321, "step": 47394 }, { "epoch": 3.8395171743357097, "grad_norm": 0.061078991740942, "learning_rate": 8.915792789954545e-06, "loss": 0.1962, "step": 47395 }, { "epoch": 3.839598185353208, "grad_norm": 0.07453559339046478, "learning_rate": 8.911292137359918e-06, "loss": 0.2545, "step": 47396 }, { "epoch": 3.8396791963707066, "grad_norm": 0.0780182033777237, "learning_rate": 8.90679148476529e-06, "loss": 0.2747, "step": 47397 }, { "epoch": 3.839760207388205, "grad_norm": 0.07100244611501694, "learning_rate": 8.902290832170665e-06, "loss": 0.2418, "step": 47398 }, { "epoch": 3.839841218405703, "grad_norm": 0.053392693400382996, "learning_rate": 8.897790179576039e-06, "loss": 0.2013, "step": 47399 }, { "epoch": 3.839922229423202, "grad_norm": 0.052919309586286545, "learning_rate": 8.893289526981413e-06, "loss": 0.2098, "step": 47400 }, { "epoch": 3.8400032404407, "grad_norm": 0.0662708431482315, "learning_rate": 8.888788874386786e-06, "loss": 0.2121, "step": 47401 }, { "epoch": 3.8400842514581983, "grad_norm": 0.07481697201728821, "learning_rate": 8.88428822179216e-06, "loss": 0.2062, "step": 47402 }, { "epoch": 3.8401652624756966, "grad_norm": 0.07575065642595291, "learning_rate": 8.879787569197533e-06, "loss": 0.2033, "step": 47403 }, { "epoch": 3.840246273493195, "grad_norm": 0.06485338509082794, "learning_rate": 8.875286916602909e-06, "loss": 0.2298, "step": 47404 }, { "epoch": 3.8403272845106935, "grad_norm": 0.06933486461639404, "learning_rate": 8.870786264008282e-06, "loss": 0.2329, "step": 47405 }, { "epoch": 3.8404082955281917, "grad_norm": 0.07605458796024323, "learning_rate": 8.866285611413654e-06, "loss": 0.2347, "step": 47406 }, { "epoch": 3.84048930654569, "grad_norm": 0.07894845306873322, "learning_rate": 8.86178495881903e-06, "loss": 0.2167, "step": 47407 }, { "epoch": 3.8405703175631887, "grad_norm": 0.0782691240310669, "learning_rate": 8.857284306224403e-06, "loss": 0.264, "step": 47408 }, { "epoch": 3.840651328580687, "grad_norm": 0.07806120067834854, "learning_rate": 8.852783653629777e-06, "loss": 0.2047, "step": 47409 }, { "epoch": 3.840732339598185, "grad_norm": 0.06680679321289062, "learning_rate": 8.84828300103515e-06, "loss": 0.1997, "step": 47410 }, { "epoch": 3.840813350615684, "grad_norm": 0.09053415805101395, "learning_rate": 8.843782348440524e-06, "loss": 0.2392, "step": 47411 }, { "epoch": 3.840894361633182, "grad_norm": 0.08757054805755615, "learning_rate": 8.839281695845897e-06, "loss": 0.2489, "step": 47412 }, { "epoch": 3.8409753726506803, "grad_norm": 0.10022596269845963, "learning_rate": 8.834781043251273e-06, "loss": 0.2216, "step": 47413 }, { "epoch": 3.841056383668179, "grad_norm": 0.06728453189134598, "learning_rate": 8.830280390656645e-06, "loss": 0.1942, "step": 47414 }, { "epoch": 3.8411373946856773, "grad_norm": 0.07960285991430283, "learning_rate": 8.825779738062018e-06, "loss": 0.2327, "step": 47415 }, { "epoch": 3.8412184057031755, "grad_norm": 0.0784974917769432, "learning_rate": 8.821279085467394e-06, "loss": 0.2411, "step": 47416 }, { "epoch": 3.841299416720674, "grad_norm": 0.06307407468557358, "learning_rate": 8.816778432872767e-06, "loss": 0.198, "step": 47417 }, { "epoch": 3.8413804277381725, "grad_norm": 0.07177886366844177, "learning_rate": 8.81227778027814e-06, "loss": 0.2221, "step": 47418 }, { "epoch": 3.8414614387556707, "grad_norm": 0.07093466073274612, "learning_rate": 8.807777127683514e-06, "loss": 0.2285, "step": 47419 }, { "epoch": 3.8415424497731694, "grad_norm": 0.07861575484275818, "learning_rate": 8.803276475088888e-06, "loss": 0.2226, "step": 47420 }, { "epoch": 3.8416234607906676, "grad_norm": 0.08279623091220856, "learning_rate": 8.798775822494262e-06, "loss": 0.2354, "step": 47421 }, { "epoch": 3.841704471808166, "grad_norm": 0.07097911089658737, "learning_rate": 8.794275169899637e-06, "loss": 0.1956, "step": 47422 }, { "epoch": 3.8417854828256646, "grad_norm": 0.08817955106496811, "learning_rate": 8.789774517305009e-06, "loss": 0.2312, "step": 47423 }, { "epoch": 3.841866493843163, "grad_norm": 0.06684723496437073, "learning_rate": 8.785273864710382e-06, "loss": 0.2451, "step": 47424 }, { "epoch": 3.841947504860661, "grad_norm": 0.07929820567369461, "learning_rate": 8.780773212115758e-06, "loss": 0.2249, "step": 47425 }, { "epoch": 3.8420285158781593, "grad_norm": 0.069803386926651, "learning_rate": 8.776272559521131e-06, "loss": 0.2111, "step": 47426 }, { "epoch": 3.8421095268956575, "grad_norm": 0.07593082636594772, "learning_rate": 8.771771906926505e-06, "loss": 0.1928, "step": 47427 }, { "epoch": 3.8421905379131562, "grad_norm": 0.0848335325717926, "learning_rate": 8.767271254331879e-06, "loss": 0.2202, "step": 47428 }, { "epoch": 3.8422715489306545, "grad_norm": 0.07958927750587463, "learning_rate": 8.762770601737252e-06, "loss": 0.2404, "step": 47429 }, { "epoch": 3.8423525599481527, "grad_norm": 0.08629477769136429, "learning_rate": 8.758269949142626e-06, "loss": 0.2137, "step": 47430 }, { "epoch": 3.8424335709656514, "grad_norm": 0.0668138712644577, "learning_rate": 8.753769296548e-06, "loss": 0.2097, "step": 47431 }, { "epoch": 3.8425145819831497, "grad_norm": 0.06344747543334961, "learning_rate": 8.749268643953373e-06, "loss": 0.2491, "step": 47432 }, { "epoch": 3.842595593000648, "grad_norm": 0.05329788476228714, "learning_rate": 8.744767991358748e-06, "loss": 0.1815, "step": 47433 }, { "epoch": 3.8426766040181466, "grad_norm": 0.06972850859165192, "learning_rate": 8.740267338764122e-06, "loss": 0.234, "step": 47434 }, { "epoch": 3.842757615035645, "grad_norm": 0.06892770528793335, "learning_rate": 8.735766686169495e-06, "loss": 0.2041, "step": 47435 }, { "epoch": 3.842838626053143, "grad_norm": 0.07953707873821259, "learning_rate": 8.731266033574869e-06, "loss": 0.2542, "step": 47436 }, { "epoch": 3.8429196370706418, "grad_norm": 0.09926214069128036, "learning_rate": 8.726765380980243e-06, "loss": 0.2195, "step": 47437 }, { "epoch": 3.84300064808814, "grad_norm": 0.08672218024730682, "learning_rate": 8.722264728385616e-06, "loss": 0.2131, "step": 47438 }, { "epoch": 3.8430816591056383, "grad_norm": 0.07387741655111313, "learning_rate": 8.71776407579099e-06, "loss": 0.2321, "step": 47439 }, { "epoch": 3.843162670123137, "grad_norm": 0.10262610018253326, "learning_rate": 8.713263423196363e-06, "loss": 0.2587, "step": 47440 }, { "epoch": 3.843243681140635, "grad_norm": 0.0705871507525444, "learning_rate": 8.708762770601737e-06, "loss": 0.2413, "step": 47441 }, { "epoch": 3.8433246921581334, "grad_norm": 0.0629960298538208, "learning_rate": 8.704262118007112e-06, "loss": 0.2274, "step": 47442 }, { "epoch": 3.843405703175632, "grad_norm": 0.06705893576145172, "learning_rate": 8.699761465412486e-06, "loss": 0.2104, "step": 47443 }, { "epoch": 3.8434867141931304, "grad_norm": 0.0768185630440712, "learning_rate": 8.695260812817858e-06, "loss": 0.2321, "step": 47444 }, { "epoch": 3.8435677252106286, "grad_norm": 0.07738561928272247, "learning_rate": 8.690760160223233e-06, "loss": 0.2219, "step": 47445 }, { "epoch": 3.843648736228127, "grad_norm": 0.087811179459095, "learning_rate": 8.686259507628607e-06, "loss": 0.2231, "step": 47446 }, { "epoch": 3.8437297472456255, "grad_norm": 0.0710371658205986, "learning_rate": 8.68175885503398e-06, "loss": 0.2017, "step": 47447 }, { "epoch": 3.843810758263124, "grad_norm": 0.06479393690824509, "learning_rate": 8.677258202439354e-06, "loss": 0.1882, "step": 47448 }, { "epoch": 3.843891769280622, "grad_norm": 0.06025944650173187, "learning_rate": 8.672757549844728e-06, "loss": 0.2261, "step": 47449 }, { "epoch": 3.8439727802981203, "grad_norm": 0.05862006917595863, "learning_rate": 8.668256897250101e-06, "loss": 0.213, "step": 47450 }, { "epoch": 3.844053791315619, "grad_norm": 0.07549209147691727, "learning_rate": 8.663756244655476e-06, "loss": 0.2305, "step": 47451 }, { "epoch": 3.844134802333117, "grad_norm": 0.07217980176210403, "learning_rate": 8.65925559206085e-06, "loss": 0.2431, "step": 47452 }, { "epoch": 3.8442158133506155, "grad_norm": 0.07103247195482254, "learning_rate": 8.654754939466222e-06, "loss": 0.2087, "step": 47453 }, { "epoch": 3.844296824368114, "grad_norm": 0.06943337619304657, "learning_rate": 8.650254286871597e-06, "loss": 0.2815, "step": 47454 }, { "epoch": 3.8443778353856124, "grad_norm": 0.06522897630929947, "learning_rate": 8.645753634276971e-06, "loss": 0.1963, "step": 47455 }, { "epoch": 3.8444588464031106, "grad_norm": 0.07871834933757782, "learning_rate": 8.641252981682345e-06, "loss": 0.2545, "step": 47456 }, { "epoch": 3.8445398574206093, "grad_norm": 0.054104987531900406, "learning_rate": 8.636752329087718e-06, "loss": 0.2043, "step": 47457 }, { "epoch": 3.8446208684381076, "grad_norm": 0.06654616445302963, "learning_rate": 8.632251676493092e-06, "loss": 0.2432, "step": 47458 }, { "epoch": 3.844701879455606, "grad_norm": 0.09337121993303299, "learning_rate": 8.627751023898465e-06, "loss": 0.2573, "step": 47459 }, { "epoch": 3.8447828904731045, "grad_norm": 0.0734034851193428, "learning_rate": 8.62325037130384e-06, "loss": 0.237, "step": 47460 }, { "epoch": 3.8448639014906028, "grad_norm": 0.0719435065984726, "learning_rate": 8.618749718709214e-06, "loss": 0.1828, "step": 47461 }, { "epoch": 3.844944912508101, "grad_norm": 0.0561227910220623, "learning_rate": 8.614249066114586e-06, "loss": 0.1634, "step": 47462 }, { "epoch": 3.8450259235255997, "grad_norm": 0.06853722035884857, "learning_rate": 8.609748413519961e-06, "loss": 0.2296, "step": 47463 }, { "epoch": 3.845106934543098, "grad_norm": 0.07261405140161514, "learning_rate": 8.605247760925335e-06, "loss": 0.2562, "step": 47464 }, { "epoch": 3.845187945560596, "grad_norm": 0.06826271861791611, "learning_rate": 8.600747108330709e-06, "loss": 0.2318, "step": 47465 }, { "epoch": 3.845268956578095, "grad_norm": 0.07757086306810379, "learning_rate": 8.596246455736082e-06, "loss": 0.1849, "step": 47466 }, { "epoch": 3.845349967595593, "grad_norm": 0.07977145165205002, "learning_rate": 8.591745803141456e-06, "loss": 0.2338, "step": 47467 }, { "epoch": 3.8454309786130914, "grad_norm": 0.06785174459218979, "learning_rate": 8.58724515054683e-06, "loss": 0.2292, "step": 47468 }, { "epoch": 3.8455119896305896, "grad_norm": 0.07069353014230728, "learning_rate": 8.582744497952205e-06, "loss": 0.2281, "step": 47469 }, { "epoch": 3.8455930006480883, "grad_norm": 0.0715603157877922, "learning_rate": 8.578243845357577e-06, "loss": 0.2052, "step": 47470 }, { "epoch": 3.8456740116655865, "grad_norm": 0.08521236479282379, "learning_rate": 8.57374319276295e-06, "loss": 0.2068, "step": 47471 }, { "epoch": 3.845755022683085, "grad_norm": 0.06399007141590118, "learning_rate": 8.569242540168326e-06, "loss": 0.2399, "step": 47472 }, { "epoch": 3.845836033700583, "grad_norm": 0.06669558584690094, "learning_rate": 8.5647418875737e-06, "loss": 0.2103, "step": 47473 }, { "epoch": 3.8459170447180817, "grad_norm": 0.06358367949724197, "learning_rate": 8.560241234979071e-06, "loss": 0.2175, "step": 47474 }, { "epoch": 3.84599805573558, "grad_norm": 0.07232925295829773, "learning_rate": 8.555740582384446e-06, "loss": 0.264, "step": 47475 }, { "epoch": 3.846079066753078, "grad_norm": 0.0711951032280922, "learning_rate": 8.55123992978982e-06, "loss": 0.2351, "step": 47476 }, { "epoch": 3.846160077770577, "grad_norm": 0.07407108694314957, "learning_rate": 8.546739277195194e-06, "loss": 0.248, "step": 47477 }, { "epoch": 3.846241088788075, "grad_norm": 0.06144161522388458, "learning_rate": 8.542238624600569e-06, "loss": 0.2372, "step": 47478 }, { "epoch": 3.8463220998055734, "grad_norm": 0.07053658366203308, "learning_rate": 8.53773797200594e-06, "loss": 0.2325, "step": 47479 }, { "epoch": 3.846403110823072, "grad_norm": 0.08187015354633331, "learning_rate": 8.533237319411314e-06, "loss": 0.2449, "step": 47480 }, { "epoch": 3.8464841218405703, "grad_norm": 0.06435208022594452, "learning_rate": 8.52873666681669e-06, "loss": 0.2177, "step": 47481 }, { "epoch": 3.8465651328580686, "grad_norm": 0.06776495277881622, "learning_rate": 8.524236014222063e-06, "loss": 0.197, "step": 47482 }, { "epoch": 3.8466461438755672, "grad_norm": 0.07252959907054901, "learning_rate": 8.519735361627435e-06, "loss": 0.2276, "step": 47483 }, { "epoch": 3.8467271548930655, "grad_norm": 0.08307579159736633, "learning_rate": 8.51523470903281e-06, "loss": 0.2332, "step": 47484 }, { "epoch": 3.8468081659105637, "grad_norm": 0.07153511792421341, "learning_rate": 8.510734056438184e-06, "loss": 0.2307, "step": 47485 }, { "epoch": 3.8468891769280624, "grad_norm": 0.07924754917621613, "learning_rate": 8.506233403843558e-06, "loss": 0.2227, "step": 47486 }, { "epoch": 3.8469701879455607, "grad_norm": 0.08361861109733582, "learning_rate": 8.501732751248931e-06, "loss": 0.2129, "step": 47487 }, { "epoch": 3.847051198963059, "grad_norm": 0.06627753376960754, "learning_rate": 8.497232098654305e-06, "loss": 0.2173, "step": 47488 }, { "epoch": 3.8471322099805576, "grad_norm": 0.08294981718063354, "learning_rate": 8.492731446059679e-06, "loss": 0.2502, "step": 47489 }, { "epoch": 3.847213220998056, "grad_norm": 0.060485485941171646, "learning_rate": 8.488230793465054e-06, "loss": 0.1837, "step": 47490 }, { "epoch": 3.847294232015554, "grad_norm": 0.06141113489866257, "learning_rate": 8.483730140870427e-06, "loss": 0.2074, "step": 47491 }, { "epoch": 3.8473752430330523, "grad_norm": 0.07824747264385223, "learning_rate": 8.4792294882758e-06, "loss": 0.2496, "step": 47492 }, { "epoch": 3.847456254050551, "grad_norm": 0.058931589126586914, "learning_rate": 8.474728835681175e-06, "loss": 0.1947, "step": 47493 }, { "epoch": 3.8475372650680493, "grad_norm": 0.08158572763204575, "learning_rate": 8.470228183086548e-06, "loss": 0.2319, "step": 47494 }, { "epoch": 3.8476182760855475, "grad_norm": 0.06126769632101059, "learning_rate": 8.465727530491922e-06, "loss": 0.2286, "step": 47495 }, { "epoch": 3.8476992871030458, "grad_norm": 0.05657956004142761, "learning_rate": 8.461226877897295e-06, "loss": 0.1731, "step": 47496 }, { "epoch": 3.8477802981205445, "grad_norm": 0.07108405232429504, "learning_rate": 8.456726225302669e-06, "loss": 0.2344, "step": 47497 }, { "epoch": 3.8478613091380427, "grad_norm": 0.07252726703882217, "learning_rate": 8.452225572708043e-06, "loss": 0.218, "step": 47498 }, { "epoch": 3.847942320155541, "grad_norm": 0.07801450788974762, "learning_rate": 8.447724920113418e-06, "loss": 0.2097, "step": 47499 }, { "epoch": 3.8480233311730396, "grad_norm": 0.06933198869228363, "learning_rate": 8.44322426751879e-06, "loss": 0.2117, "step": 47500 }, { "epoch": 3.848104342190538, "grad_norm": 0.06913826614618301, "learning_rate": 8.438723614924163e-06, "loss": 0.2026, "step": 47501 }, { "epoch": 3.848185353208036, "grad_norm": 0.08163614571094513, "learning_rate": 8.434222962329539e-06, "loss": 0.1878, "step": 47502 }, { "epoch": 3.848266364225535, "grad_norm": 0.06842586398124695, "learning_rate": 8.429722309734912e-06, "loss": 0.2108, "step": 47503 }, { "epoch": 3.848347375243033, "grad_norm": 0.07575143128633499, "learning_rate": 8.425221657140284e-06, "loss": 0.2302, "step": 47504 }, { "epoch": 3.8484283862605313, "grad_norm": 0.06495438516139984, "learning_rate": 8.42072100454566e-06, "loss": 0.1875, "step": 47505 }, { "epoch": 3.84850939727803, "grad_norm": 0.06851106882095337, "learning_rate": 8.416220351951033e-06, "loss": 0.2367, "step": 47506 }, { "epoch": 3.8485904082955282, "grad_norm": 0.08247493207454681, "learning_rate": 8.411719699356407e-06, "loss": 0.2409, "step": 47507 }, { "epoch": 3.8486714193130265, "grad_norm": 0.07875152677297592, "learning_rate": 8.407219046761782e-06, "loss": 0.246, "step": 47508 }, { "epoch": 3.848752430330525, "grad_norm": 0.08067942410707474, "learning_rate": 8.402718394167154e-06, "loss": 0.2295, "step": 47509 }, { "epoch": 3.8488334413480234, "grad_norm": 0.07155679911375046, "learning_rate": 8.398217741572528e-06, "loss": 0.2452, "step": 47510 }, { "epoch": 3.8489144523655217, "grad_norm": 0.06344729661941528, "learning_rate": 8.393717088977903e-06, "loss": 0.2275, "step": 47511 }, { "epoch": 3.8489954633830203, "grad_norm": 0.06181887909770012, "learning_rate": 8.389216436383276e-06, "loss": 0.1943, "step": 47512 }, { "epoch": 3.8490764744005186, "grad_norm": 0.07348298281431198, "learning_rate": 8.384715783788648e-06, "loss": 0.1927, "step": 47513 }, { "epoch": 3.849157485418017, "grad_norm": 0.06353019177913666, "learning_rate": 8.380215131194024e-06, "loss": 0.2485, "step": 47514 }, { "epoch": 3.849238496435515, "grad_norm": 0.07545365393161774, "learning_rate": 8.375714478599397e-06, "loss": 0.2549, "step": 47515 }, { "epoch": 3.8493195074530138, "grad_norm": 0.07624714821577072, "learning_rate": 8.371213826004771e-06, "loss": 0.2302, "step": 47516 }, { "epoch": 3.849400518470512, "grad_norm": 0.06423209607601166, "learning_rate": 8.366713173410144e-06, "loss": 0.2249, "step": 47517 }, { "epoch": 3.8494815294880103, "grad_norm": 0.08231019973754883, "learning_rate": 8.362212520815518e-06, "loss": 0.2052, "step": 47518 }, { "epoch": 3.8495625405055085, "grad_norm": 0.06847333908081055, "learning_rate": 8.357711868220892e-06, "loss": 0.1864, "step": 47519 }, { "epoch": 3.849643551523007, "grad_norm": 0.07198107242584229, "learning_rate": 8.353211215626267e-06, "loss": 0.2418, "step": 47520 }, { "epoch": 3.8497245625405054, "grad_norm": 0.06648959964513779, "learning_rate": 8.34871056303164e-06, "loss": 0.2507, "step": 47521 }, { "epoch": 3.8498055735580037, "grad_norm": 0.06837810575962067, "learning_rate": 8.344209910437012e-06, "loss": 0.2359, "step": 47522 }, { "epoch": 3.8498865845755024, "grad_norm": 0.06529515236616135, "learning_rate": 8.339709257842388e-06, "loss": 0.2084, "step": 47523 }, { "epoch": 3.8499675955930006, "grad_norm": 0.058206506073474884, "learning_rate": 8.335208605247761e-06, "loss": 0.186, "step": 47524 }, { "epoch": 3.850048606610499, "grad_norm": 0.07718270272016525, "learning_rate": 8.330707952653135e-06, "loss": 0.2177, "step": 47525 }, { "epoch": 3.8501296176279975, "grad_norm": 0.07879050821065903, "learning_rate": 8.326207300058509e-06, "loss": 0.2429, "step": 47526 }, { "epoch": 3.850210628645496, "grad_norm": 0.0804138109087944, "learning_rate": 8.321706647463882e-06, "loss": 0.2276, "step": 47527 }, { "epoch": 3.850291639662994, "grad_norm": 0.07796037197113037, "learning_rate": 8.317205994869256e-06, "loss": 0.2471, "step": 47528 }, { "epoch": 3.8503726506804927, "grad_norm": 0.0824827328324318, "learning_rate": 8.312705342274631e-06, "loss": 0.2451, "step": 47529 }, { "epoch": 3.850453661697991, "grad_norm": 0.06677177548408508, "learning_rate": 8.308204689680003e-06, "loss": 0.2119, "step": 47530 }, { "epoch": 3.850534672715489, "grad_norm": 0.09421838819980621, "learning_rate": 8.303704037085378e-06, "loss": 0.2343, "step": 47531 }, { "epoch": 3.850615683732988, "grad_norm": 0.06931822001934052, "learning_rate": 8.299203384490752e-06, "loss": 0.2017, "step": 47532 }, { "epoch": 3.850696694750486, "grad_norm": 0.07877426594495773, "learning_rate": 8.294702731896126e-06, "loss": 0.2199, "step": 47533 }, { "epoch": 3.8507777057679844, "grad_norm": 0.07271779328584671, "learning_rate": 8.290202079301499e-06, "loss": 0.2334, "step": 47534 }, { "epoch": 3.850858716785483, "grad_norm": 0.0686742514371872, "learning_rate": 8.285701426706873e-06, "loss": 0.2049, "step": 47535 }, { "epoch": 3.8509397278029813, "grad_norm": 0.08275677263736725, "learning_rate": 8.281200774112246e-06, "loss": 0.2126, "step": 47536 }, { "epoch": 3.8510207388204796, "grad_norm": 0.059194862842559814, "learning_rate": 8.27670012151762e-06, "loss": 0.1964, "step": 47537 }, { "epoch": 3.851101749837978, "grad_norm": 0.05792196840047836, "learning_rate": 8.272199468922995e-06, "loss": 0.1838, "step": 47538 }, { "epoch": 3.8511827608554765, "grad_norm": 0.07841230928897858, "learning_rate": 8.267698816328367e-06, "loss": 0.2224, "step": 47539 }, { "epoch": 3.8512637718729748, "grad_norm": 0.06587585806846619, "learning_rate": 8.263198163733742e-06, "loss": 0.2365, "step": 47540 }, { "epoch": 3.851344782890473, "grad_norm": 0.06362347304821014, "learning_rate": 8.258697511139116e-06, "loss": 0.2163, "step": 47541 }, { "epoch": 3.8514257939079712, "grad_norm": 0.06370340287685394, "learning_rate": 8.25419685854449e-06, "loss": 0.2243, "step": 47542 }, { "epoch": 3.85150680492547, "grad_norm": 0.07428768277168274, "learning_rate": 8.249696205949863e-06, "loss": 0.2428, "step": 47543 }, { "epoch": 3.851587815942968, "grad_norm": 0.08028055727481842, "learning_rate": 8.245195553355237e-06, "loss": 0.2172, "step": 47544 }, { "epoch": 3.8516688269604664, "grad_norm": 0.06244838237762451, "learning_rate": 8.24069490076061e-06, "loss": 0.2239, "step": 47545 }, { "epoch": 3.851749837977965, "grad_norm": 0.07794243097305298, "learning_rate": 8.236194248165984e-06, "loss": 0.2254, "step": 47546 }, { "epoch": 3.8518308489954634, "grad_norm": 0.07461123168468475, "learning_rate": 8.231693595571358e-06, "loss": 0.2043, "step": 47547 }, { "epoch": 3.8519118600129616, "grad_norm": 0.07418804615736008, "learning_rate": 8.227192942976731e-06, "loss": 0.2453, "step": 47548 }, { "epoch": 3.8519928710304603, "grad_norm": 0.06374292820692062, "learning_rate": 8.222692290382107e-06, "loss": 0.205, "step": 47549 }, { "epoch": 3.8520738820479585, "grad_norm": 0.061300892382860184, "learning_rate": 8.21819163778748e-06, "loss": 0.223, "step": 47550 }, { "epoch": 3.8521548930654568, "grad_norm": 0.06799773126840591, "learning_rate": 8.213690985192854e-06, "loss": 0.2759, "step": 47551 }, { "epoch": 3.8522359040829555, "grad_norm": 0.07855547964572906, "learning_rate": 8.209190332598227e-06, "loss": 0.2448, "step": 47552 }, { "epoch": 3.8523169151004537, "grad_norm": 0.07066880166530609, "learning_rate": 8.204689680003601e-06, "loss": 0.2482, "step": 47553 }, { "epoch": 3.852397926117952, "grad_norm": 0.08537422120571136, "learning_rate": 8.200189027408975e-06, "loss": 0.2267, "step": 47554 }, { "epoch": 3.8524789371354506, "grad_norm": 0.07067983597517014, "learning_rate": 8.195688374814348e-06, "loss": 0.2373, "step": 47555 }, { "epoch": 3.852559948152949, "grad_norm": 0.058485932648181915, "learning_rate": 8.191187722219722e-06, "loss": 0.1834, "step": 47556 }, { "epoch": 3.852640959170447, "grad_norm": 0.06360162794589996, "learning_rate": 8.186687069625095e-06, "loss": 0.1954, "step": 47557 }, { "epoch": 3.852721970187946, "grad_norm": 0.07332653552293777, "learning_rate": 8.18218641703047e-06, "loss": 0.2465, "step": 47558 }, { "epoch": 3.852802981205444, "grad_norm": 0.07406601309776306, "learning_rate": 8.177685764435844e-06, "loss": 0.1827, "step": 47559 }, { "epoch": 3.8528839922229423, "grad_norm": 0.06085878983139992, "learning_rate": 8.173185111841216e-06, "loss": 0.247, "step": 47560 }, { "epoch": 3.8529650032404406, "grad_norm": 0.07216692715883255, "learning_rate": 8.168684459246591e-06, "loss": 0.2053, "step": 47561 }, { "epoch": 3.8530460142579392, "grad_norm": 0.07375588268041611, "learning_rate": 8.164183806651965e-06, "loss": 0.2224, "step": 47562 }, { "epoch": 3.8531270252754375, "grad_norm": 0.07635340839624405, "learning_rate": 8.159683154057339e-06, "loss": 0.2446, "step": 47563 }, { "epoch": 3.8532080362929357, "grad_norm": 0.08759939670562744, "learning_rate": 8.155182501462712e-06, "loss": 0.2265, "step": 47564 }, { "epoch": 3.853289047310434, "grad_norm": 0.0754440650343895, "learning_rate": 8.150681848868086e-06, "loss": 0.2195, "step": 47565 }, { "epoch": 3.8533700583279327, "grad_norm": 0.08304356038570404, "learning_rate": 8.14618119627346e-06, "loss": 0.2309, "step": 47566 }, { "epoch": 3.853451069345431, "grad_norm": 0.07643993943929672, "learning_rate": 8.141680543678835e-06, "loss": 0.2244, "step": 47567 }, { "epoch": 3.853532080362929, "grad_norm": 0.08395697921514511, "learning_rate": 8.137179891084208e-06, "loss": 0.1951, "step": 47568 }, { "epoch": 3.853613091380428, "grad_norm": 0.07898442447185516, "learning_rate": 8.13267923848958e-06, "loss": 0.2543, "step": 47569 }, { "epoch": 3.853694102397926, "grad_norm": 0.0748404785990715, "learning_rate": 8.128178585894956e-06, "loss": 0.2381, "step": 47570 }, { "epoch": 3.8537751134154243, "grad_norm": 0.05667143315076828, "learning_rate": 8.12367793330033e-06, "loss": 0.2349, "step": 47571 }, { "epoch": 3.853856124432923, "grad_norm": 0.058684345334768295, "learning_rate": 8.119177280705703e-06, "loss": 0.2218, "step": 47572 }, { "epoch": 3.8539371354504213, "grad_norm": 0.05510832741856575, "learning_rate": 8.114676628111076e-06, "loss": 0.1891, "step": 47573 }, { "epoch": 3.8540181464679195, "grad_norm": 0.07431642711162567, "learning_rate": 8.11017597551645e-06, "loss": 0.2462, "step": 47574 }, { "epoch": 3.854099157485418, "grad_norm": 0.07371494174003601, "learning_rate": 8.105675322921824e-06, "loss": 0.218, "step": 47575 }, { "epoch": 3.8541801685029164, "grad_norm": 0.07297206670045853, "learning_rate": 8.101174670327199e-06, "loss": 0.23, "step": 47576 }, { "epoch": 3.8542611795204147, "grad_norm": 0.07745885103940964, "learning_rate": 8.096674017732573e-06, "loss": 0.2658, "step": 47577 }, { "epoch": 3.8543421905379134, "grad_norm": 0.07080569863319397, "learning_rate": 8.092173365137944e-06, "loss": 0.2377, "step": 47578 }, { "epoch": 3.8544232015554116, "grad_norm": 0.07197723537683487, "learning_rate": 8.08767271254332e-06, "loss": 0.2297, "step": 47579 }, { "epoch": 3.85450421257291, "grad_norm": 0.0773099884390831, "learning_rate": 8.083172059948693e-06, "loss": 0.2535, "step": 47580 }, { "epoch": 3.8545852235904086, "grad_norm": 0.06363910436630249, "learning_rate": 8.078671407354067e-06, "loss": 0.2199, "step": 47581 }, { "epoch": 3.854666234607907, "grad_norm": 0.06943506747484207, "learning_rate": 8.07417075475944e-06, "loss": 0.2324, "step": 47582 }, { "epoch": 3.854747245625405, "grad_norm": 0.0709523931145668, "learning_rate": 8.069670102164814e-06, "loss": 0.2093, "step": 47583 }, { "epoch": 3.8548282566429033, "grad_norm": 0.06644728779792786, "learning_rate": 8.065169449570188e-06, "loss": 0.1882, "step": 47584 }, { "epoch": 3.8549092676604015, "grad_norm": 0.06858302652835846, "learning_rate": 8.060668796975563e-06, "loss": 0.2329, "step": 47585 }, { "epoch": 3.8549902786779002, "grad_norm": 0.06228305771946907, "learning_rate": 8.056168144380935e-06, "loss": 0.2061, "step": 47586 }, { "epoch": 3.8550712896953985, "grad_norm": 0.07189958542585373, "learning_rate": 8.051667491786309e-06, "loss": 0.2127, "step": 47587 }, { "epoch": 3.8551523007128967, "grad_norm": 0.07673601806163788, "learning_rate": 8.047166839191684e-06, "loss": 0.181, "step": 47588 }, { "epoch": 3.8552333117303954, "grad_norm": 0.07827714085578918, "learning_rate": 8.042666186597057e-06, "loss": 0.2282, "step": 47589 }, { "epoch": 3.8553143227478937, "grad_norm": 0.08124902099370956, "learning_rate": 8.03816553400243e-06, "loss": 0.2034, "step": 47590 }, { "epoch": 3.855395333765392, "grad_norm": 0.08442587405443192, "learning_rate": 8.033664881407805e-06, "loss": 0.2236, "step": 47591 }, { "epoch": 3.8554763447828906, "grad_norm": 0.06765611469745636, "learning_rate": 8.029164228813178e-06, "loss": 0.2152, "step": 47592 }, { "epoch": 3.855557355800389, "grad_norm": 0.06260386109352112, "learning_rate": 8.024663576218552e-06, "loss": 0.2243, "step": 47593 }, { "epoch": 3.855638366817887, "grad_norm": 0.06030689924955368, "learning_rate": 8.020162923623927e-06, "loss": 0.2254, "step": 47594 }, { "epoch": 3.8557193778353858, "grad_norm": 0.06486286222934723, "learning_rate": 8.015662271029299e-06, "loss": 0.2008, "step": 47595 }, { "epoch": 3.855800388852884, "grad_norm": 0.06872467696666718, "learning_rate": 8.011161618434673e-06, "loss": 0.2458, "step": 47596 }, { "epoch": 3.8558813998703823, "grad_norm": 0.06587346643209457, "learning_rate": 8.006660965840048e-06, "loss": 0.2315, "step": 47597 }, { "epoch": 3.855962410887881, "grad_norm": 0.07045286148786545, "learning_rate": 8.002160313245422e-06, "loss": 0.1869, "step": 47598 }, { "epoch": 3.856043421905379, "grad_norm": 0.09421037882566452, "learning_rate": 7.997659660650794e-06, "loss": 0.2215, "step": 47599 }, { "epoch": 3.8561244329228774, "grad_norm": 0.09723097085952759, "learning_rate": 7.993159008056169e-06, "loss": 0.2381, "step": 47600 }, { "epoch": 3.856205443940376, "grad_norm": 0.0791918933391571, "learning_rate": 7.988658355461542e-06, "loss": 0.2435, "step": 47601 }, { "epoch": 3.8562864549578744, "grad_norm": 0.06977692991495132, "learning_rate": 7.984157702866916e-06, "loss": 0.2726, "step": 47602 }, { "epoch": 3.8563674659753726, "grad_norm": 0.09157250821590424, "learning_rate": 7.97965705027229e-06, "loss": 0.234, "step": 47603 }, { "epoch": 3.8564484769928713, "grad_norm": 0.07410857826471329, "learning_rate": 7.975156397677663e-06, "loss": 0.2527, "step": 47604 }, { "epoch": 3.8565294880103695, "grad_norm": 0.05430734157562256, "learning_rate": 7.970655745083037e-06, "loss": 0.209, "step": 47605 }, { "epoch": 3.856610499027868, "grad_norm": 0.08034774661064148, "learning_rate": 7.966155092488412e-06, "loss": 0.208, "step": 47606 }, { "epoch": 3.856691510045366, "grad_norm": 0.07333406805992126, "learning_rate": 7.961654439893786e-06, "loss": 0.2328, "step": 47607 }, { "epoch": 3.8567725210628643, "grad_norm": 0.06615491956472397, "learning_rate": 7.957153787299158e-06, "loss": 0.1903, "step": 47608 }, { "epoch": 3.856853532080363, "grad_norm": 0.07822062820196152, "learning_rate": 7.952653134704533e-06, "loss": 0.2189, "step": 47609 }, { "epoch": 3.856934543097861, "grad_norm": 0.06766904890537262, "learning_rate": 7.948152482109907e-06, "loss": 0.2208, "step": 47610 }, { "epoch": 3.8570155541153595, "grad_norm": 0.08099085837602615, "learning_rate": 7.94365182951528e-06, "loss": 0.2364, "step": 47611 }, { "epoch": 3.857096565132858, "grad_norm": 0.06338600814342499, "learning_rate": 7.939151176920654e-06, "loss": 0.2428, "step": 47612 }, { "epoch": 3.8571775761503564, "grad_norm": 0.06889151781797409, "learning_rate": 7.934650524326027e-06, "loss": 0.232, "step": 47613 }, { "epoch": 3.8572585871678546, "grad_norm": 0.08298153430223465, "learning_rate": 7.930149871731401e-06, "loss": 0.2129, "step": 47614 }, { "epoch": 3.8573395981853533, "grad_norm": 0.09067533910274506, "learning_rate": 7.925649219136776e-06, "loss": 0.2504, "step": 47615 }, { "epoch": 3.8574206092028516, "grad_norm": 0.07296990603208542, "learning_rate": 7.921148566542148e-06, "loss": 0.208, "step": 47616 }, { "epoch": 3.85750162022035, "grad_norm": 0.09865029901266098, "learning_rate": 7.916647913947522e-06, "loss": 0.2474, "step": 47617 }, { "epoch": 3.8575826312378485, "grad_norm": 0.06511934846639633, "learning_rate": 7.912147261352897e-06, "loss": 0.2175, "step": 47618 }, { "epoch": 3.8576636422553467, "grad_norm": 0.08340641856193542, "learning_rate": 7.90764660875827e-06, "loss": 0.2267, "step": 47619 }, { "epoch": 3.857744653272845, "grad_norm": 0.06559319794178009, "learning_rate": 7.903145956163644e-06, "loss": 0.2252, "step": 47620 }, { "epoch": 3.8578256642903437, "grad_norm": 0.07104986160993576, "learning_rate": 7.898645303569018e-06, "loss": 0.2364, "step": 47621 }, { "epoch": 3.857906675307842, "grad_norm": 0.06943206489086151, "learning_rate": 7.894144650974391e-06, "loss": 0.2119, "step": 47622 }, { "epoch": 3.85798768632534, "grad_norm": 0.05858639255166054, "learning_rate": 7.889643998379765e-06, "loss": 0.2225, "step": 47623 }, { "epoch": 3.858068697342839, "grad_norm": 0.0890335813164711, "learning_rate": 7.88514334578514e-06, "loss": 0.2403, "step": 47624 }, { "epoch": 3.858149708360337, "grad_norm": 0.061061326414346695, "learning_rate": 7.880642693190512e-06, "loss": 0.2216, "step": 47625 }, { "epoch": 3.8582307193778353, "grad_norm": 0.049848560243844986, "learning_rate": 7.876142040595886e-06, "loss": 0.2138, "step": 47626 }, { "epoch": 3.858311730395334, "grad_norm": 0.08001705259084702, "learning_rate": 7.871641388001261e-06, "loss": 0.2296, "step": 47627 }, { "epoch": 3.8583927414128323, "grad_norm": 0.09020736813545227, "learning_rate": 7.867140735406635e-06, "loss": 0.2792, "step": 47628 }, { "epoch": 3.8584737524303305, "grad_norm": 0.09471940994262695, "learning_rate": 7.862640082812008e-06, "loss": 0.2496, "step": 47629 }, { "epoch": 3.8585547634478288, "grad_norm": 0.08428974449634552, "learning_rate": 7.858139430217382e-06, "loss": 0.2456, "step": 47630 }, { "epoch": 3.858635774465327, "grad_norm": 0.0708090215921402, "learning_rate": 7.853638777622756e-06, "loss": 0.2004, "step": 47631 }, { "epoch": 3.8587167854828257, "grad_norm": 0.07518108189105988, "learning_rate": 7.84913812502813e-06, "loss": 0.2188, "step": 47632 }, { "epoch": 3.858797796500324, "grad_norm": 0.07420327514410019, "learning_rate": 7.844637472433503e-06, "loss": 0.2057, "step": 47633 }, { "epoch": 3.858878807517822, "grad_norm": 0.08655344694852829, "learning_rate": 7.840136819838876e-06, "loss": 0.2228, "step": 47634 }, { "epoch": 3.858959818535321, "grad_norm": 0.0829836055636406, "learning_rate": 7.83563616724425e-06, "loss": 0.2225, "step": 47635 }, { "epoch": 3.859040829552819, "grad_norm": 0.06697103381156921, "learning_rate": 7.831135514649625e-06, "loss": 0.1977, "step": 47636 }, { "epoch": 3.8591218405703174, "grad_norm": 0.0661018043756485, "learning_rate": 7.826634862054999e-06, "loss": 0.2293, "step": 47637 }, { "epoch": 3.859202851587816, "grad_norm": 0.07524219155311584, "learning_rate": 7.822134209460373e-06, "loss": 0.2315, "step": 47638 }, { "epoch": 3.8592838626053143, "grad_norm": 0.06726586073637009, "learning_rate": 7.817633556865746e-06, "loss": 0.2015, "step": 47639 }, { "epoch": 3.8593648736228126, "grad_norm": 0.06795717775821686, "learning_rate": 7.81313290427112e-06, "loss": 0.2322, "step": 47640 }, { "epoch": 3.8594458846403112, "grad_norm": 0.07243220508098602, "learning_rate": 7.808632251676493e-06, "loss": 0.2447, "step": 47641 }, { "epoch": 3.8595268956578095, "grad_norm": 0.06129208952188492, "learning_rate": 7.804131599081867e-06, "loss": 0.2262, "step": 47642 }, { "epoch": 3.8596079066753077, "grad_norm": 0.06558024883270264, "learning_rate": 7.79963094648724e-06, "loss": 0.2312, "step": 47643 }, { "epoch": 3.8596889176928064, "grad_norm": 0.07786918431520462, "learning_rate": 7.795130293892614e-06, "loss": 0.198, "step": 47644 }, { "epoch": 3.8597699287103047, "grad_norm": 0.06113965064287186, "learning_rate": 7.79062964129799e-06, "loss": 0.2327, "step": 47645 }, { "epoch": 3.859850939727803, "grad_norm": 0.06368988752365112, "learning_rate": 7.786128988703361e-06, "loss": 0.2153, "step": 47646 }, { "epoch": 3.8599319507453016, "grad_norm": 0.06596409529447556, "learning_rate": 7.781628336108737e-06, "loss": 0.2012, "step": 47647 }, { "epoch": 3.8600129617628, "grad_norm": 0.06855443865060806, "learning_rate": 7.77712768351411e-06, "loss": 0.1867, "step": 47648 }, { "epoch": 3.860093972780298, "grad_norm": 0.08048706501722336, "learning_rate": 7.772627030919484e-06, "loss": 0.231, "step": 47649 }, { "epoch": 3.8601749837977968, "grad_norm": 0.06914348155260086, "learning_rate": 7.768126378324857e-06, "loss": 0.1876, "step": 47650 }, { "epoch": 3.860255994815295, "grad_norm": 0.059210408478975296, "learning_rate": 7.763625725730231e-06, "loss": 0.1888, "step": 47651 }, { "epoch": 3.8603370058327933, "grad_norm": 0.07270028442144394, "learning_rate": 7.759125073135605e-06, "loss": 0.2167, "step": 47652 }, { "epoch": 3.8604180168502915, "grad_norm": 0.07310686260461807, "learning_rate": 7.754624420540978e-06, "loss": 0.219, "step": 47653 }, { "epoch": 3.8604990278677898, "grad_norm": 0.07835370302200317, "learning_rate": 7.750123767946354e-06, "loss": 0.2336, "step": 47654 }, { "epoch": 3.8605800388852884, "grad_norm": 0.06291946768760681, "learning_rate": 7.745623115351725e-06, "loss": 0.2268, "step": 47655 }, { "epoch": 3.8606610499027867, "grad_norm": 0.07243097573518753, "learning_rate": 7.7411224627571e-06, "loss": 0.2729, "step": 47656 }, { "epoch": 3.860742060920285, "grad_norm": 0.07852333039045334, "learning_rate": 7.736621810162474e-06, "loss": 0.2527, "step": 47657 }, { "epoch": 3.8608230719377836, "grad_norm": 0.0972912460565567, "learning_rate": 7.732121157567848e-06, "loss": 0.2353, "step": 47658 }, { "epoch": 3.860904082955282, "grad_norm": 0.05944295972585678, "learning_rate": 7.727620504973222e-06, "loss": 0.1901, "step": 47659 }, { "epoch": 3.86098509397278, "grad_norm": 0.06972935795783997, "learning_rate": 7.723119852378595e-06, "loss": 0.2063, "step": 47660 }, { "epoch": 3.861066104990279, "grad_norm": 0.07527864724397659, "learning_rate": 7.718619199783969e-06, "loss": 0.2041, "step": 47661 }, { "epoch": 3.861147116007777, "grad_norm": 0.06941043585538864, "learning_rate": 7.714118547189342e-06, "loss": 0.2499, "step": 47662 }, { "epoch": 3.8612281270252753, "grad_norm": 0.06668348610401154, "learning_rate": 7.709617894594718e-06, "loss": 0.2021, "step": 47663 }, { "epoch": 3.861309138042774, "grad_norm": 0.06672746688127518, "learning_rate": 7.70511724200009e-06, "loss": 0.2108, "step": 47664 }, { "epoch": 3.8613901490602722, "grad_norm": 0.07876428961753845, "learning_rate": 7.700616589405465e-06, "loss": 0.2696, "step": 47665 }, { "epoch": 3.8614711600777705, "grad_norm": 0.07343681901693344, "learning_rate": 7.696115936810838e-06, "loss": 0.2279, "step": 47666 }, { "epoch": 3.861552171095269, "grad_norm": 0.06768275797367096, "learning_rate": 7.691615284216212e-06, "loss": 0.233, "step": 47667 }, { "epoch": 3.8616331821127674, "grad_norm": 0.07021360844373703, "learning_rate": 7.687114631621586e-06, "loss": 0.2214, "step": 47668 }, { "epoch": 3.8617141931302656, "grad_norm": 0.06176622211933136, "learning_rate": 7.68261397902696e-06, "loss": 0.2481, "step": 47669 }, { "epoch": 3.8617952041477643, "grad_norm": 0.08668699860572815, "learning_rate": 7.678113326432333e-06, "loss": 0.2582, "step": 47670 }, { "epoch": 3.8618762151652626, "grad_norm": 0.07205960154533386, "learning_rate": 7.673612673837706e-06, "loss": 0.2159, "step": 47671 }, { "epoch": 3.861957226182761, "grad_norm": 0.0772663801908493, "learning_rate": 7.66911202124308e-06, "loss": 0.2653, "step": 47672 }, { "epoch": 3.862038237200259, "grad_norm": 0.10105203837156296, "learning_rate": 7.664611368648454e-06, "loss": 0.2585, "step": 47673 }, { "epoch": 3.8621192482177578, "grad_norm": 0.06276296079158783, "learning_rate": 7.660110716053829e-06, "loss": 0.1984, "step": 47674 }, { "epoch": 3.862200259235256, "grad_norm": 0.06691402941942215, "learning_rate": 7.655610063459203e-06, "loss": 0.2076, "step": 47675 }, { "epoch": 3.8622812702527543, "grad_norm": 0.06842536479234695, "learning_rate": 7.651109410864575e-06, "loss": 0.2234, "step": 47676 }, { "epoch": 3.8623622812702525, "grad_norm": 0.08435767143964767, "learning_rate": 7.64660875826995e-06, "loss": 0.2745, "step": 47677 }, { "epoch": 3.862443292287751, "grad_norm": 0.06818706542253494, "learning_rate": 7.642108105675323e-06, "loss": 0.2251, "step": 47678 }, { "epoch": 3.8625243033052494, "grad_norm": 0.07116655260324478, "learning_rate": 7.637607453080697e-06, "loss": 0.2734, "step": 47679 }, { "epoch": 3.8626053143227477, "grad_norm": 0.06385378539562225, "learning_rate": 7.63310680048607e-06, "loss": 0.2295, "step": 47680 }, { "epoch": 3.8626863253402464, "grad_norm": 0.07078246772289276, "learning_rate": 7.628606147891444e-06, "loss": 0.2238, "step": 47681 }, { "epoch": 3.8627673363577446, "grad_norm": 0.09647151082754135, "learning_rate": 7.624105495296819e-06, "loss": 0.2137, "step": 47682 }, { "epoch": 3.862848347375243, "grad_norm": 0.07162981480360031, "learning_rate": 7.619604842702192e-06, "loss": 0.2156, "step": 47683 }, { "epoch": 3.8629293583927415, "grad_norm": 0.06130341812968254, "learning_rate": 7.615104190107567e-06, "loss": 0.2207, "step": 47684 }, { "epoch": 3.86301036941024, "grad_norm": 0.0788789615035057, "learning_rate": 7.6106035375129395e-06, "loss": 0.2426, "step": 47685 }, { "epoch": 3.863091380427738, "grad_norm": 0.07394041866064072, "learning_rate": 7.606102884918313e-06, "loss": 0.2206, "step": 47686 }, { "epoch": 3.8631723914452367, "grad_norm": 0.07278414815664291, "learning_rate": 7.6016022323236875e-06, "loss": 0.2395, "step": 47687 }, { "epoch": 3.863253402462735, "grad_norm": 0.07134126871824265, "learning_rate": 7.597101579729061e-06, "loss": 0.2305, "step": 47688 }, { "epoch": 3.863334413480233, "grad_norm": 0.07982486486434937, "learning_rate": 7.592600927134434e-06, "loss": 0.2611, "step": 47689 }, { "epoch": 3.863415424497732, "grad_norm": 0.06409557163715363, "learning_rate": 7.588100274539808e-06, "loss": 0.2097, "step": 47690 }, { "epoch": 3.86349643551523, "grad_norm": 0.0836891233921051, "learning_rate": 7.583599621945183e-06, "loss": 0.214, "step": 47691 }, { "epoch": 3.8635774465327284, "grad_norm": 0.06255742907524109, "learning_rate": 7.579098969350556e-06, "loss": 0.2042, "step": 47692 }, { "epoch": 3.863658457550227, "grad_norm": 0.08783721923828125, "learning_rate": 7.574598316755931e-06, "loss": 0.2644, "step": 47693 }, { "epoch": 3.8637394685677253, "grad_norm": 0.05096682161092758, "learning_rate": 7.570097664161304e-06, "loss": 0.166, "step": 47694 }, { "epoch": 3.8638204795852236, "grad_norm": 0.07620611041784286, "learning_rate": 7.565597011566677e-06, "loss": 0.2563, "step": 47695 }, { "epoch": 3.863901490602722, "grad_norm": 0.0640488713979721, "learning_rate": 7.561096358972052e-06, "loss": 0.2226, "step": 47696 }, { "epoch": 3.8639825016202205, "grad_norm": 0.05692597106099129, "learning_rate": 7.556595706377425e-06, "loss": 0.2158, "step": 47697 }, { "epoch": 3.8640635126377187, "grad_norm": 0.08596763014793396, "learning_rate": 7.552095053782798e-06, "loss": 0.2586, "step": 47698 }, { "epoch": 3.864144523655217, "grad_norm": 0.06715045869350433, "learning_rate": 7.5475944011881725e-06, "loss": 0.2337, "step": 47699 }, { "epoch": 3.8642255346727152, "grad_norm": 0.06749710440635681, "learning_rate": 7.543093748593547e-06, "loss": 0.2108, "step": 47700 }, { "epoch": 3.864306545690214, "grad_norm": 0.08679332584142685, "learning_rate": 7.5385930959989205e-06, "loss": 0.2405, "step": 47701 }, { "epoch": 3.864387556707712, "grad_norm": 0.07425546646118164, "learning_rate": 7.534092443404293e-06, "loss": 0.2116, "step": 47702 }, { "epoch": 3.8644685677252104, "grad_norm": 0.07318223267793655, "learning_rate": 7.529591790809668e-06, "loss": 0.2119, "step": 47703 }, { "epoch": 3.864549578742709, "grad_norm": 0.0675434023141861, "learning_rate": 7.525091138215041e-06, "loss": 0.2143, "step": 47704 }, { "epoch": 3.8646305897602073, "grad_norm": 0.07143059372901917, "learning_rate": 7.520590485620416e-06, "loss": 0.2349, "step": 47705 }, { "epoch": 3.8647116007777056, "grad_norm": 0.07418014854192734, "learning_rate": 7.516089833025789e-06, "loss": 0.2372, "step": 47706 }, { "epoch": 3.8647926117952043, "grad_norm": 0.0735107809305191, "learning_rate": 7.511589180431162e-06, "loss": 0.2023, "step": 47707 }, { "epoch": 3.8648736228127025, "grad_norm": 0.06702899187803268, "learning_rate": 7.507088527836537e-06, "loss": 0.2527, "step": 47708 }, { "epoch": 3.8649546338302008, "grad_norm": 0.0680052861571312, "learning_rate": 7.502587875241911e-06, "loss": 0.2181, "step": 47709 }, { "epoch": 3.8650356448476995, "grad_norm": 0.07556014508008957, "learning_rate": 7.498087222647285e-06, "loss": 0.1964, "step": 47710 }, { "epoch": 3.8651166558651977, "grad_norm": 0.06902261078357697, "learning_rate": 7.493586570052657e-06, "loss": 0.212, "step": 47711 }, { "epoch": 3.865197666882696, "grad_norm": 0.07958406209945679, "learning_rate": 7.489085917458032e-06, "loss": 0.2346, "step": 47712 }, { "epoch": 3.8652786779001946, "grad_norm": 0.076631560921669, "learning_rate": 7.4845852648634055e-06, "loss": 0.2123, "step": 47713 }, { "epoch": 3.865359688917693, "grad_norm": 0.06917714327573776, "learning_rate": 7.48008461226878e-06, "loss": 0.2468, "step": 47714 }, { "epoch": 3.865440699935191, "grad_norm": 0.08754683285951614, "learning_rate": 7.475583959674153e-06, "loss": 0.2226, "step": 47715 }, { "epoch": 3.86552171095269, "grad_norm": 0.07786770164966583, "learning_rate": 7.471083307079526e-06, "loss": 0.2222, "step": 47716 }, { "epoch": 3.865602721970188, "grad_norm": 0.06999102234840393, "learning_rate": 7.466582654484901e-06, "loss": 0.2052, "step": 47717 }, { "epoch": 3.8656837329876863, "grad_norm": 0.0734010711312294, "learning_rate": 7.462082001890275e-06, "loss": 0.1994, "step": 47718 }, { "epoch": 3.8657647440051845, "grad_norm": 0.07305295020341873, "learning_rate": 7.457581349295648e-06, "loss": 0.2284, "step": 47719 }, { "epoch": 3.8658457550226832, "grad_norm": 0.07487478107213974, "learning_rate": 7.4530806967010215e-06, "loss": 0.2056, "step": 47720 }, { "epoch": 3.8659267660401815, "grad_norm": 0.07713651657104492, "learning_rate": 7.448580044106396e-06, "loss": 0.2125, "step": 47721 }, { "epoch": 3.8660077770576797, "grad_norm": 0.07192755490541458, "learning_rate": 7.44407939151177e-06, "loss": 0.2273, "step": 47722 }, { "epoch": 3.866088788075178, "grad_norm": 0.07173226028680801, "learning_rate": 7.439578738917144e-06, "loss": 0.2011, "step": 47723 }, { "epoch": 3.8661697990926767, "grad_norm": 0.09787497669458389, "learning_rate": 7.435078086322517e-06, "loss": 0.2243, "step": 47724 }, { "epoch": 3.866250810110175, "grad_norm": 0.09983297437429428, "learning_rate": 7.43057743372789e-06, "loss": 0.277, "step": 47725 }, { "epoch": 3.866331821127673, "grad_norm": 0.0823056623339653, "learning_rate": 7.426076781133265e-06, "loss": 0.2216, "step": 47726 }, { "epoch": 3.866412832145172, "grad_norm": 0.059470660984516144, "learning_rate": 7.421576128538639e-06, "loss": 0.2341, "step": 47727 }, { "epoch": 3.86649384316267, "grad_norm": 0.06996002048254013, "learning_rate": 7.417075475944012e-06, "loss": 0.2243, "step": 47728 }, { "epoch": 3.8665748541801683, "grad_norm": 0.07670509815216064, "learning_rate": 7.412574823349386e-06, "loss": 0.1981, "step": 47729 }, { "epoch": 3.866655865197667, "grad_norm": 0.07600407302379608, "learning_rate": 7.40807417075476e-06, "loss": 0.2125, "step": 47730 }, { "epoch": 3.8667368762151653, "grad_norm": 0.0708894208073616, "learning_rate": 7.403573518160134e-06, "loss": 0.2284, "step": 47731 }, { "epoch": 3.8668178872326635, "grad_norm": 0.07632531970739365, "learning_rate": 7.3990728655655065e-06, "loss": 0.2346, "step": 47732 }, { "epoch": 3.866898898250162, "grad_norm": 0.06774915009737015, "learning_rate": 7.394572212970881e-06, "loss": 0.2466, "step": 47733 }, { "epoch": 3.8669799092676604, "grad_norm": 0.07281273603439331, "learning_rate": 7.3900715603762545e-06, "loss": 0.2315, "step": 47734 }, { "epoch": 3.8670609202851587, "grad_norm": 0.06819894909858704, "learning_rate": 7.385570907781629e-06, "loss": 0.2103, "step": 47735 }, { "epoch": 3.8671419313026574, "grad_norm": 0.07330294698476791, "learning_rate": 7.381070255187003e-06, "loss": 0.2339, "step": 47736 }, { "epoch": 3.8672229423201556, "grad_norm": 0.0813525915145874, "learning_rate": 7.376569602592376e-06, "loss": 0.2204, "step": 47737 }, { "epoch": 3.867303953337654, "grad_norm": 0.05763068050146103, "learning_rate": 7.37206894999775e-06, "loss": 0.2022, "step": 47738 }, { "epoch": 3.8673849643551526, "grad_norm": 0.07878169417381287, "learning_rate": 7.367568297403124e-06, "loss": 0.2099, "step": 47739 }, { "epoch": 3.867465975372651, "grad_norm": 0.06456834822893143, "learning_rate": 7.363067644808498e-06, "loss": 0.2102, "step": 47740 }, { "epoch": 3.867546986390149, "grad_norm": 0.06957262009382248, "learning_rate": 7.358566992213871e-06, "loss": 0.2178, "step": 47741 }, { "epoch": 3.8676279974076473, "grad_norm": 0.06936120241880417, "learning_rate": 7.354066339619245e-06, "loss": 0.2231, "step": 47742 }, { "epoch": 3.867709008425146, "grad_norm": 0.08154875040054321, "learning_rate": 7.349565687024619e-06, "loss": 0.2084, "step": 47743 }, { "epoch": 3.8677900194426442, "grad_norm": 0.07597959786653519, "learning_rate": 7.345065034429993e-06, "loss": 0.2453, "step": 47744 }, { "epoch": 3.8678710304601425, "grad_norm": 0.05550116300582886, "learning_rate": 7.340564381835366e-06, "loss": 0.2199, "step": 47745 }, { "epoch": 3.8679520414776407, "grad_norm": 0.06986594945192337, "learning_rate": 7.33606372924074e-06, "loss": 0.2177, "step": 47746 }, { "epoch": 3.8680330524951394, "grad_norm": 0.09499722719192505, "learning_rate": 7.331563076646114e-06, "loss": 0.2531, "step": 47747 }, { "epoch": 3.8681140635126376, "grad_norm": 0.07436975091695786, "learning_rate": 7.327062424051488e-06, "loss": 0.2138, "step": 47748 }, { "epoch": 3.868195074530136, "grad_norm": 0.08323274552822113, "learning_rate": 7.322561771456862e-06, "loss": 0.2353, "step": 47749 }, { "epoch": 3.8682760855476346, "grad_norm": 0.06948281079530716, "learning_rate": 7.318061118862235e-06, "loss": 0.2377, "step": 47750 }, { "epoch": 3.868357096565133, "grad_norm": 0.08530833572149277, "learning_rate": 7.313560466267609e-06, "loss": 0.2491, "step": 47751 }, { "epoch": 3.868438107582631, "grad_norm": 0.07238127291202545, "learning_rate": 7.309059813672984e-06, "loss": 0.2442, "step": 47752 }, { "epoch": 3.8685191186001298, "grad_norm": 0.06473894417285919, "learning_rate": 7.304559161078357e-06, "loss": 0.2375, "step": 47753 }, { "epoch": 3.868600129617628, "grad_norm": 0.06814999133348465, "learning_rate": 7.30005850848373e-06, "loss": 0.2127, "step": 47754 }, { "epoch": 3.8686811406351262, "grad_norm": 0.08416423946619034, "learning_rate": 7.295557855889104e-06, "loss": 0.2159, "step": 47755 }, { "epoch": 3.868762151652625, "grad_norm": 0.06730145215988159, "learning_rate": 7.291057203294478e-06, "loss": 0.2514, "step": 47756 }, { "epoch": 3.868843162670123, "grad_norm": 0.061186712235212326, "learning_rate": 7.2865565506998525e-06, "loss": 0.1958, "step": 47757 }, { "epoch": 3.8689241736876214, "grad_norm": 0.07807473093271255, "learning_rate": 7.282055898105225e-06, "loss": 0.1886, "step": 47758 }, { "epoch": 3.86900518470512, "grad_norm": 0.07652845978736877, "learning_rate": 7.277555245510599e-06, "loss": 0.2328, "step": 47759 }, { "epoch": 3.8690861957226184, "grad_norm": 0.07402211427688599, "learning_rate": 7.273054592915973e-06, "loss": 0.2609, "step": 47760 }, { "epoch": 3.8691672067401166, "grad_norm": 0.05133407190442085, "learning_rate": 7.268553940321348e-06, "loss": 0.2124, "step": 47761 }, { "epoch": 3.8692482177576153, "grad_norm": 0.08077628910541534, "learning_rate": 7.26405328772672e-06, "loss": 0.2662, "step": 47762 }, { "epoch": 3.8693292287751135, "grad_norm": 0.06384014338254929, "learning_rate": 7.259552635132094e-06, "loss": 0.2063, "step": 47763 }, { "epoch": 3.869410239792612, "grad_norm": 0.06897179782390594, "learning_rate": 7.2550519825374685e-06, "loss": 0.227, "step": 47764 }, { "epoch": 3.86949125081011, "grad_norm": 0.0741807222366333, "learning_rate": 7.250551329942842e-06, "loss": 0.1986, "step": 47765 }, { "epoch": 3.8695722618276087, "grad_norm": 0.07519097626209259, "learning_rate": 7.246050677348217e-06, "loss": 0.2383, "step": 47766 }, { "epoch": 3.869653272845107, "grad_norm": 0.07148484885692596, "learning_rate": 7.241550024753589e-06, "loss": 0.2481, "step": 47767 }, { "epoch": 3.869734283862605, "grad_norm": 0.07364317774772644, "learning_rate": 7.237049372158963e-06, "loss": 0.2098, "step": 47768 }, { "epoch": 3.8698152948801035, "grad_norm": 0.07559014856815338, "learning_rate": 7.232548719564337e-06, "loss": 0.2394, "step": 47769 }, { "epoch": 3.869896305897602, "grad_norm": 0.07173273712396622, "learning_rate": 7.228048066969712e-06, "loss": 0.2245, "step": 47770 }, { "epoch": 3.8699773169151004, "grad_norm": 0.07271765172481537, "learning_rate": 7.223547414375085e-06, "loss": 0.1872, "step": 47771 }, { "epoch": 3.8700583279325986, "grad_norm": 0.06458864361047745, "learning_rate": 7.219046761780458e-06, "loss": 0.2289, "step": 47772 }, { "epoch": 3.8701393389500973, "grad_norm": 0.07476481795310974, "learning_rate": 7.214546109185833e-06, "loss": 0.2548, "step": 47773 }, { "epoch": 3.8702203499675956, "grad_norm": 0.07546652108430862, "learning_rate": 7.210045456591206e-06, "loss": 0.2514, "step": 47774 }, { "epoch": 3.870301360985094, "grad_norm": 0.06332651525735855, "learning_rate": 7.205544803996579e-06, "loss": 0.1829, "step": 47775 }, { "epoch": 3.8703823720025925, "grad_norm": 0.07313700765371323, "learning_rate": 7.2010441514019535e-06, "loss": 0.2082, "step": 47776 }, { "epoch": 3.8704633830200907, "grad_norm": 0.06601651757955551, "learning_rate": 7.196543498807327e-06, "loss": 0.2213, "step": 47777 }, { "epoch": 3.870544394037589, "grad_norm": 0.07746600359678268, "learning_rate": 7.1920428462127015e-06, "loss": 0.2392, "step": 47778 }, { "epoch": 3.8706254050550877, "grad_norm": 0.08070232719182968, "learning_rate": 7.187542193618076e-06, "loss": 0.2264, "step": 47779 }, { "epoch": 3.870706416072586, "grad_norm": 0.06326809525489807, "learning_rate": 7.183041541023449e-06, "loss": 0.2145, "step": 47780 }, { "epoch": 3.870787427090084, "grad_norm": 0.058843906968832016, "learning_rate": 7.178540888428822e-06, "loss": 0.1969, "step": 47781 }, { "epoch": 3.870868438107583, "grad_norm": 0.05918506532907486, "learning_rate": 7.174040235834197e-06, "loss": 0.2175, "step": 47782 }, { "epoch": 3.870949449125081, "grad_norm": 0.071584552526474, "learning_rate": 7.16953958323957e-06, "loss": 0.1764, "step": 47783 }, { "epoch": 3.8710304601425793, "grad_norm": 0.05277230218052864, "learning_rate": 7.165038930644943e-06, "loss": 0.2319, "step": 47784 }, { "epoch": 3.871111471160078, "grad_norm": 0.07672587037086487, "learning_rate": 7.160538278050318e-06, "loss": 0.209, "step": 47785 }, { "epoch": 3.8711924821775763, "grad_norm": 0.08189419656991959, "learning_rate": 7.156037625455691e-06, "loss": 0.2412, "step": 47786 }, { "epoch": 3.8712734931950745, "grad_norm": 0.06885275989770889, "learning_rate": 7.151536972861066e-06, "loss": 0.2351, "step": 47787 }, { "epoch": 3.8713545042125728, "grad_norm": 0.07223542034626007, "learning_rate": 7.147036320266438e-06, "loss": 0.2375, "step": 47788 }, { "epoch": 3.8714355152300715, "grad_norm": 0.07468521595001221, "learning_rate": 7.142535667671813e-06, "loss": 0.2284, "step": 47789 }, { "epoch": 3.8715165262475697, "grad_norm": 0.06014881283044815, "learning_rate": 7.1380350150771865e-06, "loss": 0.1972, "step": 47790 }, { "epoch": 3.871597537265068, "grad_norm": 0.0729193314909935, "learning_rate": 7.133534362482561e-06, "loss": 0.2033, "step": 47791 }, { "epoch": 3.871678548282566, "grad_norm": 0.0688919946551323, "learning_rate": 7.129033709887934e-06, "loss": 0.2597, "step": 47792 }, { "epoch": 3.871759559300065, "grad_norm": 0.08578962832689285, "learning_rate": 7.124533057293307e-06, "loss": 0.2209, "step": 47793 }, { "epoch": 3.871840570317563, "grad_norm": 0.06939221173524857, "learning_rate": 7.120032404698682e-06, "loss": 0.2107, "step": 47794 }, { "epoch": 3.8719215813350614, "grad_norm": 0.07219339162111282, "learning_rate": 7.115531752104055e-06, "loss": 0.2238, "step": 47795 }, { "epoch": 3.87200259235256, "grad_norm": 0.06591325253248215, "learning_rate": 7.11103109950943e-06, "loss": 0.2357, "step": 47796 }, { "epoch": 3.8720836033700583, "grad_norm": 0.06282512098550797, "learning_rate": 7.1065304469148025e-06, "loss": 0.209, "step": 47797 }, { "epoch": 3.8721646143875565, "grad_norm": 0.07997066527605057, "learning_rate": 7.102029794320177e-06, "loss": 0.2465, "step": 47798 }, { "epoch": 3.8722456254050552, "grad_norm": 0.08144117146730423, "learning_rate": 7.097529141725551e-06, "loss": 0.2519, "step": 47799 }, { "epoch": 3.8723266364225535, "grad_norm": 0.06622984260320663, "learning_rate": 7.093028489130925e-06, "loss": 0.1985, "step": 47800 }, { "epoch": 3.8724076474400517, "grad_norm": 0.06841832399368286, "learning_rate": 7.088527836536298e-06, "loss": 0.2252, "step": 47801 }, { "epoch": 3.8724886584575504, "grad_norm": 0.06852193176746368, "learning_rate": 7.084027183941671e-06, "loss": 0.2244, "step": 47802 }, { "epoch": 3.8725696694750487, "grad_norm": 0.09205551445484161, "learning_rate": 7.079526531347046e-06, "loss": 0.2398, "step": 47803 }, { "epoch": 3.872650680492547, "grad_norm": 0.07046569138765335, "learning_rate": 7.0750258787524195e-06, "loss": 0.2459, "step": 47804 }, { "epoch": 3.8727316915100456, "grad_norm": 0.0705006867647171, "learning_rate": 7.070525226157792e-06, "loss": 0.2379, "step": 47805 }, { "epoch": 3.872812702527544, "grad_norm": 0.06745926290750504, "learning_rate": 7.066024573563167e-06, "loss": 0.2444, "step": 47806 }, { "epoch": 3.872893713545042, "grad_norm": 0.06479792296886444, "learning_rate": 7.061523920968541e-06, "loss": 0.1962, "step": 47807 }, { "epoch": 3.8729747245625408, "grad_norm": 0.07828095555305481, "learning_rate": 7.057023268373915e-06, "loss": 0.2304, "step": 47808 }, { "epoch": 3.873055735580039, "grad_norm": 0.05744514614343643, "learning_rate": 7.052522615779289e-06, "loss": 0.2172, "step": 47809 }, { "epoch": 3.8731367465975373, "grad_norm": 0.06972257792949677, "learning_rate": 7.048021963184662e-06, "loss": 0.2432, "step": 47810 }, { "epoch": 3.8732177576150355, "grad_norm": 0.058357443660497665, "learning_rate": 7.0435213105900355e-06, "loss": 0.1878, "step": 47811 }, { "epoch": 3.8732987686325338, "grad_norm": 0.06073423847556114, "learning_rate": 7.03902065799541e-06, "loss": 0.2022, "step": 47812 }, { "epoch": 3.8733797796500324, "grad_norm": 0.059745658189058304, "learning_rate": 7.034520005400784e-06, "loss": 0.1826, "step": 47813 }, { "epoch": 3.8734607906675307, "grad_norm": 0.07808952778577805, "learning_rate": 7.030019352806156e-06, "loss": 0.2375, "step": 47814 }, { "epoch": 3.873541801685029, "grad_norm": 0.07170235365629196, "learning_rate": 7.025518700211531e-06, "loss": 0.2244, "step": 47815 }, { "epoch": 3.8736228127025276, "grad_norm": 0.07987401634454727, "learning_rate": 7.021018047616905e-06, "loss": 0.2086, "step": 47816 }, { "epoch": 3.873703823720026, "grad_norm": 0.0680292397737503, "learning_rate": 7.016517395022279e-06, "loss": 0.2353, "step": 47817 }, { "epoch": 3.873784834737524, "grad_norm": 0.06402581185102463, "learning_rate": 7.012016742427652e-06, "loss": 0.1892, "step": 47818 }, { "epoch": 3.873865845755023, "grad_norm": 0.07108760625123978, "learning_rate": 7.007516089833026e-06, "loss": 0.2752, "step": 47819 }, { "epoch": 3.873946856772521, "grad_norm": 0.07389482855796814, "learning_rate": 7.0030154372384e-06, "loss": 0.2215, "step": 47820 }, { "epoch": 3.8740278677900193, "grad_norm": 0.07002807408571243, "learning_rate": 6.998514784643774e-06, "loss": 0.2196, "step": 47821 }, { "epoch": 3.874108878807518, "grad_norm": 0.07865195721387863, "learning_rate": 6.9940141320491485e-06, "loss": 0.2383, "step": 47822 }, { "epoch": 3.874189889825016, "grad_norm": 0.07489572465419769, "learning_rate": 6.9895134794545205e-06, "loss": 0.2034, "step": 47823 }, { "epoch": 3.8742709008425145, "grad_norm": 0.0735786184668541, "learning_rate": 6.985012826859895e-06, "loss": 0.2558, "step": 47824 }, { "epoch": 3.874351911860013, "grad_norm": 0.07100185006856918, "learning_rate": 6.980512174265269e-06, "loss": 0.2405, "step": 47825 }, { "epoch": 3.8744329228775114, "grad_norm": 0.06581882387399673, "learning_rate": 6.976011521670643e-06, "loss": 0.1944, "step": 47826 }, { "epoch": 3.8745139338950096, "grad_norm": 0.06488057225942612, "learning_rate": 6.971510869076016e-06, "loss": 0.224, "step": 47827 }, { "epoch": 3.8745949449125083, "grad_norm": 0.07766662538051605, "learning_rate": 6.96701021648139e-06, "loss": 0.234, "step": 47828 }, { "epoch": 3.8746759559300066, "grad_norm": 0.06239504739642143, "learning_rate": 6.962509563886764e-06, "loss": 0.1958, "step": 47829 }, { "epoch": 3.874756966947505, "grad_norm": 0.06142323091626167, "learning_rate": 6.958008911292138e-06, "loss": 0.219, "step": 47830 }, { "epoch": 3.8748379779650035, "grad_norm": 0.07786870002746582, "learning_rate": 6.953508258697511e-06, "loss": 0.239, "step": 47831 }, { "epoch": 3.8749189889825018, "grad_norm": 0.062448933720588684, "learning_rate": 6.949007606102885e-06, "loss": 0.2096, "step": 47832 }, { "epoch": 3.875, "grad_norm": 0.05940355360507965, "learning_rate": 6.944506953508259e-06, "loss": 0.1905, "step": 47833 }, { "epoch": 3.8750810110174982, "grad_norm": 0.05543764308094978, "learning_rate": 6.9400063009136335e-06, "loss": 0.1915, "step": 47834 }, { "epoch": 3.8751620220349965, "grad_norm": 0.08461619913578033, "learning_rate": 6.935505648319006e-06, "loss": 0.2633, "step": 47835 }, { "epoch": 3.875243033052495, "grad_norm": 0.0797097235918045, "learning_rate": 6.93100499572438e-06, "loss": 0.2521, "step": 47836 }, { "epoch": 3.8753240440699934, "grad_norm": 0.07515867799520493, "learning_rate": 6.926504343129754e-06, "loss": 0.2429, "step": 47837 }, { "epoch": 3.8754050550874917, "grad_norm": 0.0636037066578865, "learning_rate": 6.922003690535128e-06, "loss": 0.1941, "step": 47838 }, { "epoch": 3.8754860661049904, "grad_norm": 0.0706285685300827, "learning_rate": 6.917503037940502e-06, "loss": 0.2353, "step": 47839 }, { "epoch": 3.8755670771224886, "grad_norm": 0.0567207969725132, "learning_rate": 6.913002385345875e-06, "loss": 0.1933, "step": 47840 }, { "epoch": 3.875648088139987, "grad_norm": 0.07477942109107971, "learning_rate": 6.908501732751249e-06, "loss": 0.2183, "step": 47841 }, { "epoch": 3.8757290991574855, "grad_norm": 0.057915329933166504, "learning_rate": 6.904001080156623e-06, "loss": 0.2068, "step": 47842 }, { "epoch": 3.875810110174984, "grad_norm": 0.0781538337469101, "learning_rate": 6.899500427561998e-06, "loss": 0.1967, "step": 47843 }, { "epoch": 3.875891121192482, "grad_norm": 0.07939945161342621, "learning_rate": 6.89499977496737e-06, "loss": 0.2356, "step": 47844 }, { "epoch": 3.8759721322099807, "grad_norm": 0.05997350439429283, "learning_rate": 6.890499122372744e-06, "loss": 0.2141, "step": 47845 }, { "epoch": 3.876053143227479, "grad_norm": 0.06055265665054321, "learning_rate": 6.885998469778118e-06, "loss": 0.2461, "step": 47846 }, { "epoch": 3.876134154244977, "grad_norm": 0.07572788745164871, "learning_rate": 6.881497817183492e-06, "loss": 0.2126, "step": 47847 }, { "epoch": 3.876215165262476, "grad_norm": 0.06472624838352203, "learning_rate": 6.876997164588865e-06, "loss": 0.189, "step": 47848 }, { "epoch": 3.876296176279974, "grad_norm": 0.08194227516651154, "learning_rate": 6.872496511994239e-06, "loss": 0.1988, "step": 47849 }, { "epoch": 3.8763771872974724, "grad_norm": 0.08095631748437881, "learning_rate": 6.867995859399614e-06, "loss": 0.2483, "step": 47850 }, { "epoch": 3.876458198314971, "grad_norm": 0.075938880443573, "learning_rate": 6.863495206804987e-06, "loss": 0.2077, "step": 47851 }, { "epoch": 3.8765392093324693, "grad_norm": 0.07606460154056549, "learning_rate": 6.858994554210362e-06, "loss": 0.2077, "step": 47852 }, { "epoch": 3.8766202203499676, "grad_norm": 0.06377337127923965, "learning_rate": 6.8544939016157345e-06, "loss": 0.2375, "step": 47853 }, { "epoch": 3.8767012313674662, "grad_norm": 0.06751275062561035, "learning_rate": 6.849993249021108e-06, "loss": 0.2163, "step": 47854 }, { "epoch": 3.8767822423849645, "grad_norm": 0.06989561021327972, "learning_rate": 6.8454925964264825e-06, "loss": 0.2183, "step": 47855 }, { "epoch": 3.8768632534024627, "grad_norm": 0.05857057496905327, "learning_rate": 6.840991943831856e-06, "loss": 0.2215, "step": 47856 }, { "epoch": 3.876944264419961, "grad_norm": 0.09082645177841187, "learning_rate": 6.836491291237229e-06, "loss": 0.2536, "step": 47857 }, { "epoch": 3.8770252754374592, "grad_norm": 0.0694134309887886, "learning_rate": 6.831990638642603e-06, "loss": 0.196, "step": 47858 }, { "epoch": 3.877106286454958, "grad_norm": 0.07403349131345749, "learning_rate": 6.827489986047978e-06, "loss": 0.2341, "step": 47859 }, { "epoch": 3.877187297472456, "grad_norm": 0.061810147017240524, "learning_rate": 6.822989333453351e-06, "loss": 0.2047, "step": 47860 }, { "epoch": 3.8772683084899544, "grad_norm": 0.07295320928096771, "learning_rate": 6.818488680858724e-06, "loss": 0.257, "step": 47861 }, { "epoch": 3.877349319507453, "grad_norm": 0.06367763131856918, "learning_rate": 6.813988028264099e-06, "loss": 0.2078, "step": 47862 }, { "epoch": 3.8774303305249513, "grad_norm": 0.084407739341259, "learning_rate": 6.809487375669472e-06, "loss": 0.1852, "step": 47863 }, { "epoch": 3.8775113415424496, "grad_norm": 0.07967525720596313, "learning_rate": 6.804986723074847e-06, "loss": 0.2155, "step": 47864 }, { "epoch": 3.8775923525599483, "grad_norm": 0.0668228268623352, "learning_rate": 6.80048607048022e-06, "loss": 0.2117, "step": 47865 }, { "epoch": 3.8776733635774465, "grad_norm": 0.06333678960800171, "learning_rate": 6.795985417885593e-06, "loss": 0.1996, "step": 47866 }, { "epoch": 3.8777543745949448, "grad_norm": 0.07337368279695511, "learning_rate": 6.7914847652909675e-06, "loss": 0.2246, "step": 47867 }, { "epoch": 3.8778353856124435, "grad_norm": 0.0809299424290657, "learning_rate": 6.786984112696342e-06, "loss": 0.2155, "step": 47868 }, { "epoch": 3.8779163966299417, "grad_norm": 0.0768105685710907, "learning_rate": 6.7824834601017155e-06, "loss": 0.205, "step": 47869 }, { "epoch": 3.87799740764744, "grad_norm": 0.07321664690971375, "learning_rate": 6.777982807507088e-06, "loss": 0.2521, "step": 47870 }, { "epoch": 3.8780784186649386, "grad_norm": 0.06322470307350159, "learning_rate": 6.773482154912463e-06, "loss": 0.2193, "step": 47871 }, { "epoch": 3.878159429682437, "grad_norm": 0.0774395614862442, "learning_rate": 6.768981502317836e-06, "loss": 0.2615, "step": 47872 }, { "epoch": 3.878240440699935, "grad_norm": 0.07283959537744522, "learning_rate": 6.764480849723211e-06, "loss": 0.2156, "step": 47873 }, { "epoch": 3.878321451717434, "grad_norm": 0.0624484121799469, "learning_rate": 6.7599801971285835e-06, "loss": 0.227, "step": 47874 }, { "epoch": 3.878402462734932, "grad_norm": 0.07772690057754517, "learning_rate": 6.755479544533957e-06, "loss": 0.2378, "step": 47875 }, { "epoch": 3.8784834737524303, "grad_norm": 0.06441731750965118, "learning_rate": 6.750978891939332e-06, "loss": 0.2066, "step": 47876 }, { "epoch": 3.8785644847699285, "grad_norm": 0.07770302146673203, "learning_rate": 6.746478239344706e-06, "loss": 0.2372, "step": 47877 }, { "epoch": 3.8786454957874272, "grad_norm": 0.07686355710029602, "learning_rate": 6.741977586750079e-06, "loss": 0.2351, "step": 47878 }, { "epoch": 3.8787265068049255, "grad_norm": 0.09807416796684265, "learning_rate": 6.737476934155452e-06, "loss": 0.2255, "step": 47879 }, { "epoch": 3.8788075178224237, "grad_norm": 0.06583904474973679, "learning_rate": 6.732976281560827e-06, "loss": 0.2109, "step": 47880 }, { "epoch": 3.878888528839922, "grad_norm": 0.07584837824106216, "learning_rate": 6.7284756289662005e-06, "loss": 0.2419, "step": 47881 }, { "epoch": 3.8789695398574207, "grad_norm": 0.05641576275229454, "learning_rate": 6.723974976371575e-06, "loss": 0.2139, "step": 47882 }, { "epoch": 3.879050550874919, "grad_norm": 0.07774477452039719, "learning_rate": 6.719474323776948e-06, "loss": 0.2532, "step": 47883 }, { "epoch": 3.879131561892417, "grad_norm": 0.06663135439157486, "learning_rate": 6.714973671182321e-06, "loss": 0.2139, "step": 47884 }, { "epoch": 3.879212572909916, "grad_norm": 0.07237344980239868, "learning_rate": 6.710473018587696e-06, "loss": 0.188, "step": 47885 }, { "epoch": 3.879293583927414, "grad_norm": 0.0650007426738739, "learning_rate": 6.70597236599307e-06, "loss": 0.2663, "step": 47886 }, { "epoch": 3.8793745949449123, "grad_norm": 0.07538321614265442, "learning_rate": 6.701471713398443e-06, "loss": 0.2219, "step": 47887 }, { "epoch": 3.879455605962411, "grad_norm": 0.0606030635535717, "learning_rate": 6.6969710608038165e-06, "loss": 0.2124, "step": 47888 }, { "epoch": 3.8795366169799093, "grad_norm": 0.07034909725189209, "learning_rate": 6.692470408209191e-06, "loss": 0.2611, "step": 47889 }, { "epoch": 3.8796176279974075, "grad_norm": 0.055674366652965546, "learning_rate": 6.687969755614565e-06, "loss": 0.1966, "step": 47890 }, { "epoch": 3.879698639014906, "grad_norm": 0.06106676533818245, "learning_rate": 6.683469103019937e-06, "loss": 0.184, "step": 47891 }, { "epoch": 3.8797796500324044, "grad_norm": 0.06368546187877655, "learning_rate": 6.678968450425312e-06, "loss": 0.2065, "step": 47892 }, { "epoch": 3.8798606610499027, "grad_norm": 0.08897584676742554, "learning_rate": 6.674467797830685e-06, "loss": 0.2394, "step": 47893 }, { "epoch": 3.8799416720674014, "grad_norm": 0.06789989769458771, "learning_rate": 6.66996714523606e-06, "loss": 0.2254, "step": 47894 }, { "epoch": 3.8800226830848996, "grad_norm": 0.057918865233659744, "learning_rate": 6.665466492641434e-06, "loss": 0.2368, "step": 47895 }, { "epoch": 3.880103694102398, "grad_norm": 0.06686865538358688, "learning_rate": 6.660965840046807e-06, "loss": 0.1985, "step": 47896 }, { "epoch": 3.8801847051198965, "grad_norm": 0.07179523259401321, "learning_rate": 6.656465187452181e-06, "loss": 0.2398, "step": 47897 }, { "epoch": 3.880265716137395, "grad_norm": 0.09124450385570526, "learning_rate": 6.651964534857555e-06, "loss": 0.2308, "step": 47898 }, { "epoch": 3.880346727154893, "grad_norm": 0.06625550985336304, "learning_rate": 6.647463882262929e-06, "loss": 0.2009, "step": 47899 }, { "epoch": 3.8804277381723913, "grad_norm": 0.08759491890668869, "learning_rate": 6.6429632296683015e-06, "loss": 0.2358, "step": 47900 }, { "epoch": 3.88050874918989, "grad_norm": 0.08081474155187607, "learning_rate": 6.638462577073676e-06, "loss": 0.2259, "step": 47901 }, { "epoch": 3.880589760207388, "grad_norm": 0.08028419315814972, "learning_rate": 6.6339619244790495e-06, "loss": 0.2476, "step": 47902 }, { "epoch": 3.8806707712248865, "grad_norm": 0.0740567073225975, "learning_rate": 6.629461271884424e-06, "loss": 0.2456, "step": 47903 }, { "epoch": 3.8807517822423847, "grad_norm": 0.06059883162379265, "learning_rate": 6.624960619289797e-06, "loss": 0.2061, "step": 47904 }, { "epoch": 3.8808327932598834, "grad_norm": 0.06069719418883324, "learning_rate": 6.620459966695171e-06, "loss": 0.1905, "step": 47905 }, { "epoch": 3.8809138042773816, "grad_norm": 0.07481228560209274, "learning_rate": 6.615959314100545e-06, "loss": 0.2472, "step": 47906 }, { "epoch": 3.88099481529488, "grad_norm": 0.0695427656173706, "learning_rate": 6.611458661505919e-06, "loss": 0.2197, "step": 47907 }, { "epoch": 3.8810758263123786, "grad_norm": 0.07964444160461426, "learning_rate": 6.606958008911293e-06, "loss": 0.2039, "step": 47908 }, { "epoch": 3.881156837329877, "grad_norm": 0.06760581582784653, "learning_rate": 6.602457356316666e-06, "loss": 0.24, "step": 47909 }, { "epoch": 3.881237848347375, "grad_norm": 0.08289235085248947, "learning_rate": 6.59795670372204e-06, "loss": 0.2075, "step": 47910 }, { "epoch": 3.8813188593648738, "grad_norm": 0.05402611568570137, "learning_rate": 6.593456051127414e-06, "loss": 0.1988, "step": 47911 }, { "epoch": 3.881399870382372, "grad_norm": 0.06173928454518318, "learning_rate": 6.588955398532788e-06, "loss": 0.2041, "step": 47912 }, { "epoch": 3.8814808813998702, "grad_norm": 0.06798407435417175, "learning_rate": 6.584454745938161e-06, "loss": 0.2773, "step": 47913 }, { "epoch": 3.881561892417369, "grad_norm": 0.07197803258895874, "learning_rate": 6.579954093343535e-06, "loss": 0.2325, "step": 47914 }, { "epoch": 3.881642903434867, "grad_norm": 0.0872403159737587, "learning_rate": 6.575453440748909e-06, "loss": 0.2524, "step": 47915 }, { "epoch": 3.8817239144523654, "grad_norm": 0.07195434719324112, "learning_rate": 6.570952788154283e-06, "loss": 0.2117, "step": 47916 }, { "epoch": 3.881804925469864, "grad_norm": 0.05543294548988342, "learning_rate": 6.566452135559656e-06, "loss": 0.1988, "step": 47917 }, { "epoch": 3.8818859364873624, "grad_norm": 0.0677102655172348, "learning_rate": 6.56195148296503e-06, "loss": 0.2404, "step": 47918 }, { "epoch": 3.8819669475048606, "grad_norm": 0.07490658760070801, "learning_rate": 6.557450830370404e-06, "loss": 0.2111, "step": 47919 }, { "epoch": 3.8820479585223593, "grad_norm": 0.06448684632778168, "learning_rate": 6.552950177775778e-06, "loss": 0.1931, "step": 47920 }, { "epoch": 3.8821289695398575, "grad_norm": 0.06765627861022949, "learning_rate": 6.5484495251811505e-06, "loss": 0.2379, "step": 47921 }, { "epoch": 3.8822099805573558, "grad_norm": 0.05987018346786499, "learning_rate": 6.543948872586525e-06, "loss": 0.2281, "step": 47922 }, { "epoch": 3.882290991574854, "grad_norm": 0.07392005622386932, "learning_rate": 6.5394482199918994e-06, "loss": 0.2199, "step": 47923 }, { "epoch": 3.8823720025923527, "grad_norm": 0.07084795087575912, "learning_rate": 6.534947567397273e-06, "loss": 0.2391, "step": 47924 }, { "epoch": 3.882453013609851, "grad_norm": 0.06273969262838364, "learning_rate": 6.5304469148026475e-06, "loss": 0.2309, "step": 47925 }, { "epoch": 3.882534024627349, "grad_norm": 0.08438969403505325, "learning_rate": 6.52594626220802e-06, "loss": 0.2198, "step": 47926 }, { "epoch": 3.8826150356448474, "grad_norm": 0.0724400132894516, "learning_rate": 6.521445609613394e-06, "loss": 0.2152, "step": 47927 }, { "epoch": 3.882696046662346, "grad_norm": 0.060646384954452515, "learning_rate": 6.516944957018768e-06, "loss": 0.2088, "step": 47928 }, { "epoch": 3.8827770576798444, "grad_norm": 0.07137048989534378, "learning_rate": 6.512444304424143e-06, "loss": 0.2723, "step": 47929 }, { "epoch": 3.8828580686973426, "grad_norm": 0.07384997606277466, "learning_rate": 6.507943651829515e-06, "loss": 0.2015, "step": 47930 }, { "epoch": 3.8829390797148413, "grad_norm": 0.082190603017807, "learning_rate": 6.503442999234889e-06, "loss": 0.2473, "step": 47931 }, { "epoch": 3.8830200907323396, "grad_norm": 0.07313903421163559, "learning_rate": 6.4989423466402635e-06, "loss": 0.2301, "step": 47932 }, { "epoch": 3.883101101749838, "grad_norm": 0.0644293874502182, "learning_rate": 6.494441694045637e-06, "loss": 0.2226, "step": 47933 }, { "epoch": 3.8831821127673365, "grad_norm": 0.0785566046833992, "learning_rate": 6.48994104145101e-06, "loss": 0.2528, "step": 47934 }, { "epoch": 3.8832631237848347, "grad_norm": 0.08307922631502151, "learning_rate": 6.485440388856384e-06, "loss": 0.207, "step": 47935 }, { "epoch": 3.883344134802333, "grad_norm": 0.07435651123523712, "learning_rate": 6.480939736261758e-06, "loss": 0.2639, "step": 47936 }, { "epoch": 3.8834251458198317, "grad_norm": 0.06681717187166214, "learning_rate": 6.476439083667132e-06, "loss": 0.1967, "step": 47937 }, { "epoch": 3.88350615683733, "grad_norm": 0.06831416487693787, "learning_rate": 6.471938431072507e-06, "loss": 0.2202, "step": 47938 }, { "epoch": 3.883587167854828, "grad_norm": 0.06847839057445526, "learning_rate": 6.467437778477879e-06, "loss": 0.1872, "step": 47939 }, { "epoch": 3.883668178872327, "grad_norm": 0.07324326038360596, "learning_rate": 6.462937125883253e-06, "loss": 0.2338, "step": 47940 }, { "epoch": 3.883749189889825, "grad_norm": 0.06632672995328903, "learning_rate": 6.458436473288628e-06, "loss": 0.2185, "step": 47941 }, { "epoch": 3.8838302009073233, "grad_norm": 0.09587211906909943, "learning_rate": 6.453935820694001e-06, "loss": 0.2197, "step": 47942 }, { "epoch": 3.883911211924822, "grad_norm": 0.07188690453767776, "learning_rate": 6.449435168099374e-06, "loss": 0.206, "step": 47943 }, { "epoch": 3.8839922229423203, "grad_norm": 0.06359101831912994, "learning_rate": 6.4449345155047485e-06, "loss": 0.1849, "step": 47944 }, { "epoch": 3.8840732339598185, "grad_norm": 0.08528807014226913, "learning_rate": 6.440433862910122e-06, "loss": 0.2048, "step": 47945 }, { "epoch": 3.8841542449773168, "grad_norm": 0.09145567566156387, "learning_rate": 6.4359332103154965e-06, "loss": 0.2217, "step": 47946 }, { "epoch": 3.8842352559948155, "grad_norm": 0.0667794719338417, "learning_rate": 6.431432557720869e-06, "loss": 0.2307, "step": 47947 }, { "epoch": 3.8843162670123137, "grad_norm": 0.08978570997714996, "learning_rate": 6.426931905126244e-06, "loss": 0.2436, "step": 47948 }, { "epoch": 3.884397278029812, "grad_norm": 0.09070317447185516, "learning_rate": 6.422431252531617e-06, "loss": 0.2234, "step": 47949 }, { "epoch": 3.88447828904731, "grad_norm": 0.07344508171081543, "learning_rate": 6.417930599936992e-06, "loss": 0.1885, "step": 47950 }, { "epoch": 3.884559300064809, "grad_norm": 0.0661882534623146, "learning_rate": 6.413429947342365e-06, "loss": 0.1988, "step": 47951 }, { "epoch": 3.884640311082307, "grad_norm": 0.07575134187936783, "learning_rate": 6.408929294747738e-06, "loss": 0.2317, "step": 47952 }, { "epoch": 3.8847213220998054, "grad_norm": 0.08309940248727798, "learning_rate": 6.404428642153113e-06, "loss": 0.2172, "step": 47953 }, { "epoch": 3.884802333117304, "grad_norm": 0.06687768548727036, "learning_rate": 6.399927989558486e-06, "loss": 0.211, "step": 47954 }, { "epoch": 3.8848833441348023, "grad_norm": 0.10296569019556046, "learning_rate": 6.395427336963861e-06, "loss": 0.2346, "step": 47955 }, { "epoch": 3.8849643551523005, "grad_norm": 0.0639154314994812, "learning_rate": 6.390926684369233e-06, "loss": 0.2162, "step": 47956 }, { "epoch": 3.8850453661697992, "grad_norm": 0.07917410880327225, "learning_rate": 6.386426031774608e-06, "loss": 0.2054, "step": 47957 }, { "epoch": 3.8851263771872975, "grad_norm": 0.08458743989467621, "learning_rate": 6.3819253791799815e-06, "loss": 0.2153, "step": 47958 }, { "epoch": 3.8852073882047957, "grad_norm": 0.07421503216028214, "learning_rate": 6.377424726585356e-06, "loss": 0.2597, "step": 47959 }, { "epoch": 3.8852883992222944, "grad_norm": 0.08566386252641678, "learning_rate": 6.372924073990729e-06, "loss": 0.2534, "step": 47960 }, { "epoch": 3.8853694102397927, "grad_norm": 0.07812653481960297, "learning_rate": 6.368423421396102e-06, "loss": 0.2537, "step": 47961 }, { "epoch": 3.885450421257291, "grad_norm": 0.09011181443929672, "learning_rate": 6.363922768801477e-06, "loss": 0.2197, "step": 47962 }, { "epoch": 3.8855314322747896, "grad_norm": 0.07989250868558884, "learning_rate": 6.35942211620685e-06, "loss": 0.2303, "step": 47963 }, { "epoch": 3.885612443292288, "grad_norm": 0.08667084574699402, "learning_rate": 6.354921463612223e-06, "loss": 0.247, "step": 47964 }, { "epoch": 3.885693454309786, "grad_norm": 0.07098482549190521, "learning_rate": 6.3504208110175975e-06, "loss": 0.2297, "step": 47965 }, { "epoch": 3.8857744653272848, "grad_norm": 0.06345818936824799, "learning_rate": 6.345920158422972e-06, "loss": 0.1796, "step": 47966 }, { "epoch": 3.885855476344783, "grad_norm": 0.06717762351036072, "learning_rate": 6.341419505828346e-06, "loss": 0.2043, "step": 47967 }, { "epoch": 3.8859364873622813, "grad_norm": 0.06535407900810242, "learning_rate": 6.33691885323372e-06, "loss": 0.2326, "step": 47968 }, { "epoch": 3.8860174983797795, "grad_norm": 0.07947888225317001, "learning_rate": 6.332418200639093e-06, "loss": 0.2643, "step": 47969 }, { "epoch": 3.886098509397278, "grad_norm": 0.07099038362503052, "learning_rate": 6.327917548044466e-06, "loss": 0.2183, "step": 47970 }, { "epoch": 3.8861795204147764, "grad_norm": 0.08765587210655212, "learning_rate": 6.323416895449841e-06, "loss": 0.2197, "step": 47971 }, { "epoch": 3.8862605314322747, "grad_norm": 0.06536171585321426, "learning_rate": 6.3189162428552145e-06, "loss": 0.1928, "step": 47972 }, { "epoch": 3.886341542449773, "grad_norm": 0.08230137079954147, "learning_rate": 6.314415590260587e-06, "loss": 0.231, "step": 47973 }, { "epoch": 3.8864225534672716, "grad_norm": 0.06578972190618515, "learning_rate": 6.309914937665962e-06, "loss": 0.2168, "step": 47974 }, { "epoch": 3.88650356448477, "grad_norm": 0.06204579770565033, "learning_rate": 6.305414285071336e-06, "loss": 0.2093, "step": 47975 }, { "epoch": 3.886584575502268, "grad_norm": 0.06162213534116745, "learning_rate": 6.30091363247671e-06, "loss": 0.2351, "step": 47976 }, { "epoch": 3.886665586519767, "grad_norm": 0.07075859606266022, "learning_rate": 6.2964129798820825e-06, "loss": 0.1968, "step": 47977 }, { "epoch": 3.886746597537265, "grad_norm": 0.08070050179958344, "learning_rate": 6.291912327287457e-06, "loss": 0.2021, "step": 47978 }, { "epoch": 3.8868276085547633, "grad_norm": 0.07428973913192749, "learning_rate": 6.2874116746928305e-06, "loss": 0.249, "step": 47979 }, { "epoch": 3.886908619572262, "grad_norm": 0.08572898060083389, "learning_rate": 6.282911022098205e-06, "loss": 0.2257, "step": 47980 }, { "epoch": 3.88698963058976, "grad_norm": 0.06448810547590256, "learning_rate": 6.278410369503579e-06, "loss": 0.1896, "step": 47981 }, { "epoch": 3.8870706416072585, "grad_norm": 0.05051233246922493, "learning_rate": 6.273909716908951e-06, "loss": 0.2156, "step": 47982 }, { "epoch": 3.887151652624757, "grad_norm": 0.08261623233556747, "learning_rate": 6.269409064314326e-06, "loss": 0.2675, "step": 47983 }, { "epoch": 3.8872326636422554, "grad_norm": 0.05112019553780556, "learning_rate": 6.2649084117197e-06, "loss": 0.2252, "step": 47984 }, { "epoch": 3.8873136746597536, "grad_norm": 0.09426064044237137, "learning_rate": 6.260407759125074e-06, "loss": 0.2285, "step": 47985 }, { "epoch": 3.8873946856772523, "grad_norm": 0.08980914205312729, "learning_rate": 6.255907106530447e-06, "loss": 0.2585, "step": 47986 }, { "epoch": 3.8874756966947506, "grad_norm": 0.0838213637471199, "learning_rate": 6.251406453935821e-06, "loss": 0.1998, "step": 47987 }, { "epoch": 3.887556707712249, "grad_norm": 0.07811640948057175, "learning_rate": 6.246905801341195e-06, "loss": 0.2306, "step": 47988 }, { "epoch": 3.8876377187297475, "grad_norm": 0.08223123103380203, "learning_rate": 6.242405148746568e-06, "loss": 0.2695, "step": 47989 }, { "epoch": 3.8877187297472457, "grad_norm": 0.0751977488398552, "learning_rate": 6.237904496151943e-06, "loss": 0.2244, "step": 47990 }, { "epoch": 3.887799740764744, "grad_norm": 0.06286336481571198, "learning_rate": 6.2334038435573155e-06, "loss": 0.202, "step": 47991 }, { "epoch": 3.8878807517822422, "grad_norm": 0.08371991664171219, "learning_rate": 6.22890319096269e-06, "loss": 0.229, "step": 47992 }, { "epoch": 3.887961762799741, "grad_norm": 0.054850783199071884, "learning_rate": 6.2244025383680635e-06, "loss": 0.2143, "step": 47993 }, { "epoch": 3.888042773817239, "grad_norm": 0.056063272058963776, "learning_rate": 6.219901885773437e-06, "loss": 0.1868, "step": 47994 }, { "epoch": 3.8881237848347374, "grad_norm": 0.07119081169366837, "learning_rate": 6.2154012331788116e-06, "loss": 0.2235, "step": 47995 }, { "epoch": 3.8882047958522357, "grad_norm": 0.10805629193782806, "learning_rate": 6.210900580584185e-06, "loss": 0.1982, "step": 47996 }, { "epoch": 3.8882858068697344, "grad_norm": 0.0731663703918457, "learning_rate": 6.206399927989559e-06, "loss": 0.2528, "step": 47997 }, { "epoch": 3.8883668178872326, "grad_norm": 0.07942456007003784, "learning_rate": 6.201899275394932e-06, "loss": 0.2717, "step": 47998 }, { "epoch": 3.888447828904731, "grad_norm": 0.0707758367061615, "learning_rate": 6.197398622800307e-06, "loss": 0.2002, "step": 47999 }, { "epoch": 3.8885288399222295, "grad_norm": 0.06386274099349976, "learning_rate": 6.19289797020568e-06, "loss": 0.2274, "step": 48000 }, { "epoch": 3.8886098509397278, "grad_norm": 0.07666601985692978, "learning_rate": 6.188397317611054e-06, "loss": 0.2049, "step": 48001 }, { "epoch": 3.888690861957226, "grad_norm": 0.07881522923707962, "learning_rate": 6.183896665016428e-06, "loss": 0.2338, "step": 48002 }, { "epoch": 3.8887718729747247, "grad_norm": 0.07827413827180862, "learning_rate": 6.179396012421801e-06, "loss": 0.2311, "step": 48003 }, { "epoch": 3.888852883992223, "grad_norm": 0.08375756442546844, "learning_rate": 6.174895359827175e-06, "loss": 0.2089, "step": 48004 }, { "epoch": 3.888933895009721, "grad_norm": 0.06420580297708511, "learning_rate": 6.170394707232549e-06, "loss": 0.2367, "step": 48005 }, { "epoch": 3.88901490602722, "grad_norm": 0.08399081230163574, "learning_rate": 6.165894054637923e-06, "loss": 0.2573, "step": 48006 }, { "epoch": 3.889095917044718, "grad_norm": 0.07225336134433746, "learning_rate": 6.1613934020432965e-06, "loss": 0.2114, "step": 48007 }, { "epoch": 3.8891769280622164, "grad_norm": 0.07664166390895844, "learning_rate": 6.15689274944867e-06, "loss": 0.2658, "step": 48008 }, { "epoch": 3.889257939079715, "grad_norm": 0.07305591553449631, "learning_rate": 6.152392096854044e-06, "loss": 0.2189, "step": 48009 }, { "epoch": 3.8893389500972133, "grad_norm": 0.06890987604856491, "learning_rate": 6.147891444259418e-06, "loss": 0.2175, "step": 48010 }, { "epoch": 3.8894199611147116, "grad_norm": 0.06536436080932617, "learning_rate": 6.143390791664792e-06, "loss": 0.251, "step": 48011 }, { "epoch": 3.8895009721322102, "grad_norm": 0.07678571343421936, "learning_rate": 6.138890139070165e-06, "loss": 0.2241, "step": 48012 }, { "epoch": 3.8895819831497085, "grad_norm": 0.06775952875614166, "learning_rate": 6.134389486475539e-06, "loss": 0.2281, "step": 48013 }, { "epoch": 3.8896629941672067, "grad_norm": 0.09381411224603653, "learning_rate": 6.129888833880913e-06, "loss": 0.2554, "step": 48014 }, { "epoch": 3.889744005184705, "grad_norm": 0.05685148388147354, "learning_rate": 6.125388181286287e-06, "loss": 0.239, "step": 48015 }, { "epoch": 3.8898250162022032, "grad_norm": 0.07663068175315857, "learning_rate": 6.120887528691661e-06, "loss": 0.2633, "step": 48016 }, { "epoch": 3.889906027219702, "grad_norm": 0.0696638897061348, "learning_rate": 6.116386876097034e-06, "loss": 0.2184, "step": 48017 }, { "epoch": 3.8899870382372, "grad_norm": 0.06149803474545479, "learning_rate": 6.111886223502408e-06, "loss": 0.2162, "step": 48018 }, { "epoch": 3.8900680492546984, "grad_norm": 0.07225798070430756, "learning_rate": 6.1073855709077814e-06, "loss": 0.2452, "step": 48019 }, { "epoch": 3.890149060272197, "grad_norm": 0.06644891202449799, "learning_rate": 6.102884918313156e-06, "loss": 0.2403, "step": 48020 }, { "epoch": 3.8902300712896953, "grad_norm": 0.07242048531770706, "learning_rate": 6.0983842657185295e-06, "loss": 0.2397, "step": 48021 }, { "epoch": 3.8903110823071936, "grad_norm": 0.07406973838806152, "learning_rate": 6.093883613123903e-06, "loss": 0.223, "step": 48022 }, { "epoch": 3.8903920933246923, "grad_norm": 0.06930666416883469, "learning_rate": 6.0893829605292775e-06, "loss": 0.2271, "step": 48023 }, { "epoch": 3.8904731043421905, "grad_norm": 0.07608542591333389, "learning_rate": 6.084882307934651e-06, "loss": 0.2076, "step": 48024 }, { "epoch": 3.8905541153596888, "grad_norm": 0.06247393414378166, "learning_rate": 6.080381655340025e-06, "loss": 0.2051, "step": 48025 }, { "epoch": 3.8906351263771874, "grad_norm": 0.061167728155851364, "learning_rate": 6.075881002745398e-06, "loss": 0.2088, "step": 48026 }, { "epoch": 3.8907161373946857, "grad_norm": 0.06517788022756577, "learning_rate": 6.071380350150773e-06, "loss": 0.241, "step": 48027 }, { "epoch": 3.890797148412184, "grad_norm": 0.07304881513118744, "learning_rate": 6.0668796975561456e-06, "loss": 0.2088, "step": 48028 }, { "epoch": 3.8908781594296826, "grad_norm": 0.06430026143789291, "learning_rate": 6.06237904496152e-06, "loss": 0.235, "step": 48029 }, { "epoch": 3.890959170447181, "grad_norm": 0.06506291031837463, "learning_rate": 6.057878392366894e-06, "loss": 0.1662, "step": 48030 }, { "epoch": 3.891040181464679, "grad_norm": 0.06876938790082932, "learning_rate": 6.053377739772267e-06, "loss": 0.2141, "step": 48031 }, { "epoch": 3.891121192482178, "grad_norm": 0.08055271953344345, "learning_rate": 6.048877087177641e-06, "loss": 0.2293, "step": 48032 }, { "epoch": 3.891202203499676, "grad_norm": 0.07058732956647873, "learning_rate": 6.044376434583015e-06, "loss": 0.2289, "step": 48033 }, { "epoch": 3.8912832145171743, "grad_norm": 0.086977019906044, "learning_rate": 6.039875781988388e-06, "loss": 0.2359, "step": 48034 }, { "epoch": 3.891364225534673, "grad_norm": 0.07203526794910431, "learning_rate": 6.0353751293937625e-06, "loss": 0.2086, "step": 48035 }, { "epoch": 3.8914452365521712, "grad_norm": 0.06266345083713531, "learning_rate": 6.030874476799136e-06, "loss": 0.1983, "step": 48036 }, { "epoch": 3.8915262475696695, "grad_norm": 0.07486934214830399, "learning_rate": 6.02637382420451e-06, "loss": 0.235, "step": 48037 }, { "epoch": 3.8916072585871677, "grad_norm": 0.07059779763221741, "learning_rate": 6.021873171609884e-06, "loss": 0.2082, "step": 48038 }, { "epoch": 3.891688269604666, "grad_norm": 0.06780023127794266, "learning_rate": 6.017372519015258e-06, "loss": 0.2632, "step": 48039 }, { "epoch": 3.8917692806221647, "grad_norm": 0.0676514059305191, "learning_rate": 6.012871866420631e-06, "loss": 0.2406, "step": 48040 }, { "epoch": 3.891850291639663, "grad_norm": 0.06825751811265945, "learning_rate": 6.008371213826005e-06, "loss": 0.2402, "step": 48041 }, { "epoch": 3.891931302657161, "grad_norm": 0.06496651470661163, "learning_rate": 6.003870561231379e-06, "loss": 0.2294, "step": 48042 }, { "epoch": 3.89201231367466, "grad_norm": 0.0673632025718689, "learning_rate": 5.999369908636752e-06, "loss": 0.206, "step": 48043 }, { "epoch": 3.892093324692158, "grad_norm": 0.059508103877305984, "learning_rate": 5.994869256042127e-06, "loss": 0.2146, "step": 48044 }, { "epoch": 3.8921743357096563, "grad_norm": 0.07362375408411026, "learning_rate": 5.9903686034475e-06, "loss": 0.2195, "step": 48045 }, { "epoch": 3.892255346727155, "grad_norm": 0.07658713310956955, "learning_rate": 5.985867950852874e-06, "loss": 0.2371, "step": 48046 }, { "epoch": 3.8923363577446533, "grad_norm": 0.06624037772417068, "learning_rate": 5.981367298258247e-06, "loss": 0.1921, "step": 48047 }, { "epoch": 3.8924173687621515, "grad_norm": 0.08692865073680878, "learning_rate": 5.976866645663622e-06, "loss": 0.2362, "step": 48048 }, { "epoch": 3.89249837977965, "grad_norm": 0.07544679939746857, "learning_rate": 5.972365993068995e-06, "loss": 0.221, "step": 48049 }, { "epoch": 3.8925793907971484, "grad_norm": 0.05986184626817703, "learning_rate": 5.967865340474369e-06, "loss": 0.2001, "step": 48050 }, { "epoch": 3.8926604018146467, "grad_norm": 0.08265157043933868, "learning_rate": 5.963364687879743e-06, "loss": 0.3066, "step": 48051 }, { "epoch": 3.8927414128321454, "grad_norm": 0.07029622048139572, "learning_rate": 5.958864035285116e-06, "loss": 0.2507, "step": 48052 }, { "epoch": 3.8928224238496436, "grad_norm": 0.07296372205018997, "learning_rate": 5.954363382690491e-06, "loss": 0.2628, "step": 48053 }, { "epoch": 3.892903434867142, "grad_norm": 0.08202331513166428, "learning_rate": 5.949862730095864e-06, "loss": 0.236, "step": 48054 }, { "epoch": 3.8929844458846405, "grad_norm": 0.07177530229091644, "learning_rate": 5.945362077501238e-06, "loss": 0.2239, "step": 48055 }, { "epoch": 3.893065456902139, "grad_norm": 0.06391312927007675, "learning_rate": 5.9408614249066115e-06, "loss": 0.2415, "step": 48056 }, { "epoch": 3.893146467919637, "grad_norm": 0.07368528842926025, "learning_rate": 5.936360772311986e-06, "loss": 0.2102, "step": 48057 }, { "epoch": 3.8932274789371357, "grad_norm": 0.09229487180709839, "learning_rate": 5.931860119717359e-06, "loss": 0.2683, "step": 48058 }, { "epoch": 3.893308489954634, "grad_norm": 0.07881072908639908, "learning_rate": 5.927359467122733e-06, "loss": 0.205, "step": 48059 }, { "epoch": 3.893389500972132, "grad_norm": 0.08213283866643906, "learning_rate": 5.922858814528107e-06, "loss": 0.2216, "step": 48060 }, { "epoch": 3.8934705119896305, "grad_norm": 0.06197618693113327, "learning_rate": 5.91835816193348e-06, "loss": 0.2131, "step": 48061 }, { "epoch": 3.8935515230071287, "grad_norm": 0.06296448409557343, "learning_rate": 5.913857509338854e-06, "loss": 0.2414, "step": 48062 }, { "epoch": 3.8936325340246274, "grad_norm": 0.057206038385629654, "learning_rate": 5.9093568567442285e-06, "loss": 0.2052, "step": 48063 }, { "epoch": 3.8937135450421256, "grad_norm": 0.06184779480099678, "learning_rate": 5.904856204149602e-06, "loss": 0.1972, "step": 48064 }, { "epoch": 3.893794556059624, "grad_norm": 0.06405945867300034, "learning_rate": 5.900355551554976e-06, "loss": 0.1939, "step": 48065 }, { "epoch": 3.8938755670771226, "grad_norm": 0.08158735185861588, "learning_rate": 5.89585489896035e-06, "loss": 0.2208, "step": 48066 }, { "epoch": 3.893956578094621, "grad_norm": 0.06656576693058014, "learning_rate": 5.891354246365723e-06, "loss": 0.2169, "step": 48067 }, { "epoch": 3.894037589112119, "grad_norm": 0.06311673671007156, "learning_rate": 5.886853593771097e-06, "loss": 0.2384, "step": 48068 }, { "epoch": 3.8941186001296177, "grad_norm": 0.0772138312458992, "learning_rate": 5.882352941176471e-06, "loss": 0.2311, "step": 48069 }, { "epoch": 3.894199611147116, "grad_norm": 0.0871867686510086, "learning_rate": 5.8778522885818445e-06, "loss": 0.22, "step": 48070 }, { "epoch": 3.8942806221646142, "grad_norm": 0.07590407133102417, "learning_rate": 5.873351635987218e-06, "loss": 0.2274, "step": 48071 }, { "epoch": 3.894361633182113, "grad_norm": 0.0857917070388794, "learning_rate": 5.868850983392593e-06, "loss": 0.2184, "step": 48072 }, { "epoch": 3.894442644199611, "grad_norm": 0.07525145262479782, "learning_rate": 5.864350330797966e-06, "loss": 0.1988, "step": 48073 }, { "epoch": 3.8945236552171094, "grad_norm": 0.06835979968309402, "learning_rate": 5.85984967820334e-06, "loss": 0.2214, "step": 48074 }, { "epoch": 3.894604666234608, "grad_norm": 0.06695519387722015, "learning_rate": 5.855349025608713e-06, "loss": 0.2288, "step": 48075 }, { "epoch": 3.8946856772521063, "grad_norm": 0.08336454629898071, "learning_rate": 5.850848373014088e-06, "loss": 0.2016, "step": 48076 }, { "epoch": 3.8947666882696046, "grad_norm": 0.06542129814624786, "learning_rate": 5.846347720419461e-06, "loss": 0.1926, "step": 48077 }, { "epoch": 3.8948476992871033, "grad_norm": 0.07468671351671219, "learning_rate": 5.841847067824835e-06, "loss": 0.2031, "step": 48078 }, { "epoch": 3.8949287103046015, "grad_norm": 0.08390598744153976, "learning_rate": 5.837346415230209e-06, "loss": 0.2465, "step": 48079 }, { "epoch": 3.8950097213220998, "grad_norm": 0.0719221755862236, "learning_rate": 5.832845762635582e-06, "loss": 0.2255, "step": 48080 }, { "epoch": 3.8950907323395985, "grad_norm": 0.07787550985813141, "learning_rate": 5.828345110040957e-06, "loss": 0.1899, "step": 48081 }, { "epoch": 3.8951717433570967, "grad_norm": 0.06885991245508194, "learning_rate": 5.82384445744633e-06, "loss": 0.1858, "step": 48082 }, { "epoch": 3.895252754374595, "grad_norm": 0.08002043515443802, "learning_rate": 5.819343804851704e-06, "loss": 0.2095, "step": 48083 }, { "epoch": 3.895333765392093, "grad_norm": 0.06390658766031265, "learning_rate": 5.8148431522570775e-06, "loss": 0.2274, "step": 48084 }, { "epoch": 3.8954147764095914, "grad_norm": 0.0804852768778801, "learning_rate": 5.810342499662452e-06, "loss": 0.2423, "step": 48085 }, { "epoch": 3.89549578742709, "grad_norm": 0.06975232064723969, "learning_rate": 5.805841847067825e-06, "loss": 0.198, "step": 48086 }, { "epoch": 3.8955767984445884, "grad_norm": 0.09129457175731659, "learning_rate": 5.801341194473199e-06, "loss": 0.1861, "step": 48087 }, { "epoch": 3.8956578094620866, "grad_norm": 0.07378769665956497, "learning_rate": 5.796840541878573e-06, "loss": 0.2115, "step": 48088 }, { "epoch": 3.8957388204795853, "grad_norm": 0.07973496615886688, "learning_rate": 5.792339889283946e-06, "loss": 0.2271, "step": 48089 }, { "epoch": 3.8958198314970836, "grad_norm": 0.07622456550598145, "learning_rate": 5.78783923668932e-06, "loss": 0.2271, "step": 48090 }, { "epoch": 3.895900842514582, "grad_norm": 0.06588424742221832, "learning_rate": 5.7833385840946944e-06, "loss": 0.2054, "step": 48091 }, { "epoch": 3.8959818535320805, "grad_norm": 0.06296347826719284, "learning_rate": 5.778837931500067e-06, "loss": 0.1969, "step": 48092 }, { "epoch": 3.8960628645495787, "grad_norm": 0.07207436114549637, "learning_rate": 5.774337278905442e-06, "loss": 0.2263, "step": 48093 }, { "epoch": 3.896143875567077, "grad_norm": 0.07418610155582428, "learning_rate": 5.769836626310815e-06, "loss": 0.2296, "step": 48094 }, { "epoch": 3.8962248865845757, "grad_norm": 0.08219721913337708, "learning_rate": 5.765335973716189e-06, "loss": 0.2525, "step": 48095 }, { "epoch": 3.896305897602074, "grad_norm": 0.06975866854190826, "learning_rate": 5.760835321121563e-06, "loss": 0.2565, "step": 48096 }, { "epoch": 3.896386908619572, "grad_norm": 0.06241089478135109, "learning_rate": 5.756334668526937e-06, "loss": 0.2418, "step": 48097 }, { "epoch": 3.896467919637071, "grad_norm": 0.0809803307056427, "learning_rate": 5.7518340159323105e-06, "loss": 0.2403, "step": 48098 }, { "epoch": 3.896548930654569, "grad_norm": 0.06909254193305969, "learning_rate": 5.747333363337684e-06, "loss": 0.2054, "step": 48099 }, { "epoch": 3.8966299416720673, "grad_norm": 0.06723940372467041, "learning_rate": 5.7428327107430586e-06, "loss": 0.2459, "step": 48100 }, { "epoch": 3.896710952689566, "grad_norm": 0.07118137925863266, "learning_rate": 5.738332058148431e-06, "loss": 0.2237, "step": 48101 }, { "epoch": 3.8967919637070643, "grad_norm": 0.07935565710067749, "learning_rate": 5.733831405553806e-06, "loss": 0.2189, "step": 48102 }, { "epoch": 3.8968729747245625, "grad_norm": 0.08424384146928787, "learning_rate": 5.729330752959179e-06, "loss": 0.2415, "step": 48103 }, { "epoch": 3.8969539857420608, "grad_norm": 0.07037527859210968, "learning_rate": 5.724830100364553e-06, "loss": 0.1857, "step": 48104 }, { "epoch": 3.8970349967595594, "grad_norm": 0.06845095008611679, "learning_rate": 5.7203294477699266e-06, "loss": 0.2233, "step": 48105 }, { "epoch": 3.8971160077770577, "grad_norm": 0.08850236237049103, "learning_rate": 5.715828795175301e-06, "loss": 0.279, "step": 48106 }, { "epoch": 3.897197018794556, "grad_norm": 0.06084459275007248, "learning_rate": 5.711328142580674e-06, "loss": 0.201, "step": 48107 }, { "epoch": 3.897278029812054, "grad_norm": 0.07755912840366364, "learning_rate": 5.706827489986048e-06, "loss": 0.2315, "step": 48108 }, { "epoch": 3.897359040829553, "grad_norm": 0.06639105826616287, "learning_rate": 5.702326837391423e-06, "loss": 0.2502, "step": 48109 }, { "epoch": 3.897440051847051, "grad_norm": 0.07094542682170868, "learning_rate": 5.6978261847967954e-06, "loss": 0.1805, "step": 48110 }, { "epoch": 3.8975210628645494, "grad_norm": 0.07018940150737762, "learning_rate": 5.69332553220217e-06, "loss": 0.1724, "step": 48111 }, { "epoch": 3.897602073882048, "grad_norm": 0.06900765746831894, "learning_rate": 5.6888248796075435e-06, "loss": 0.2331, "step": 48112 }, { "epoch": 3.8976830848995463, "grad_norm": 0.0696382075548172, "learning_rate": 5.684324227012917e-06, "loss": 0.2468, "step": 48113 }, { "epoch": 3.8977640959170445, "grad_norm": 0.06245898827910423, "learning_rate": 5.679823574418291e-06, "loss": 0.2286, "step": 48114 }, { "epoch": 3.8978451069345432, "grad_norm": 0.07008770108222961, "learning_rate": 5.675322921823665e-06, "loss": 0.2411, "step": 48115 }, { "epoch": 3.8979261179520415, "grad_norm": 0.05966051667928696, "learning_rate": 5.670822269229038e-06, "loss": 0.1998, "step": 48116 }, { "epoch": 3.8980071289695397, "grad_norm": 0.07350362837314606, "learning_rate": 5.666321616634412e-06, "loss": 0.2289, "step": 48117 }, { "epoch": 3.8980881399870384, "grad_norm": 0.06739611178636551, "learning_rate": 5.661820964039786e-06, "loss": 0.2126, "step": 48118 }, { "epoch": 3.8981691510045366, "grad_norm": 0.06899629533290863, "learning_rate": 5.6573203114451596e-06, "loss": 0.2236, "step": 48119 }, { "epoch": 3.898250162022035, "grad_norm": 0.06305073946714401, "learning_rate": 5.652819658850533e-06, "loss": 0.2044, "step": 48120 }, { "epoch": 3.8983311730395336, "grad_norm": 0.0846519023180008, "learning_rate": 5.648319006255908e-06, "loss": 0.2821, "step": 48121 }, { "epoch": 3.898412184057032, "grad_norm": 0.06912169605493546, "learning_rate": 5.643818353661281e-06, "loss": 0.2427, "step": 48122 }, { "epoch": 3.89849319507453, "grad_norm": 0.07149054855108261, "learning_rate": 5.639317701066655e-06, "loss": 0.2001, "step": 48123 }, { "epoch": 3.8985742060920288, "grad_norm": 0.05685052648186684, "learning_rate": 5.634817048472029e-06, "loss": 0.2235, "step": 48124 }, { "epoch": 3.898655217109527, "grad_norm": 0.0757313072681427, "learning_rate": 5.630316395877403e-06, "loss": 0.2373, "step": 48125 }, { "epoch": 3.8987362281270252, "grad_norm": 0.08734872937202454, "learning_rate": 5.6258157432827765e-06, "loss": 0.2707, "step": 48126 }, { "epoch": 3.8988172391445235, "grad_norm": 0.07042701542377472, "learning_rate": 5.62131509068815e-06, "loss": 0.2077, "step": 48127 }, { "epoch": 3.898898250162022, "grad_norm": 0.06531260907649994, "learning_rate": 5.616814438093524e-06, "loss": 0.2404, "step": 48128 }, { "epoch": 3.8989792611795204, "grad_norm": 0.06730636954307556, "learning_rate": 5.612313785498897e-06, "loss": 0.2318, "step": 48129 }, { "epoch": 3.8990602721970187, "grad_norm": 0.08711560815572739, "learning_rate": 5.607813132904272e-06, "loss": 0.2223, "step": 48130 }, { "epoch": 3.899141283214517, "grad_norm": 0.06874527782201767, "learning_rate": 5.603312480309645e-06, "loss": 0.2563, "step": 48131 }, { "epoch": 3.8992222942320156, "grad_norm": 0.08103624731302261, "learning_rate": 5.598811827715019e-06, "loss": 0.2288, "step": 48132 }, { "epoch": 3.899303305249514, "grad_norm": 0.07119531184434891, "learning_rate": 5.5943111751203925e-06, "loss": 0.2334, "step": 48133 }, { "epoch": 3.899384316267012, "grad_norm": 0.06922445446252823, "learning_rate": 5.589810522525767e-06, "loss": 0.2301, "step": 48134 }, { "epoch": 3.899465327284511, "grad_norm": 0.07962538301944733, "learning_rate": 5.58530986993114e-06, "loss": 0.2631, "step": 48135 }, { "epoch": 3.899546338302009, "grad_norm": 0.07506944984197617, "learning_rate": 5.580809217336514e-06, "loss": 0.2203, "step": 48136 }, { "epoch": 3.8996273493195073, "grad_norm": 0.08897983282804489, "learning_rate": 5.576308564741888e-06, "loss": 0.2219, "step": 48137 }, { "epoch": 3.899708360337006, "grad_norm": 0.06361392885446548, "learning_rate": 5.571807912147261e-06, "loss": 0.2409, "step": 48138 }, { "epoch": 3.899789371354504, "grad_norm": 0.07459505647420883, "learning_rate": 5.567307259552636e-06, "loss": 0.2043, "step": 48139 }, { "epoch": 3.8998703823720025, "grad_norm": 0.059818338602781296, "learning_rate": 5.5628066069580095e-06, "loss": 0.2136, "step": 48140 }, { "epoch": 3.899951393389501, "grad_norm": 0.08024513721466064, "learning_rate": 5.558305954363383e-06, "loss": 0.2156, "step": 48141 }, { "epoch": 3.9000324044069994, "grad_norm": 0.06556698679924011, "learning_rate": 5.553805301768757e-06, "loss": 0.2035, "step": 48142 }, { "epoch": 3.9001134154244976, "grad_norm": 0.08273103833198547, "learning_rate": 5.549304649174131e-06, "loss": 0.251, "step": 48143 }, { "epoch": 3.9001944264419963, "grad_norm": 0.07010772824287415, "learning_rate": 5.544803996579504e-06, "loss": 0.2426, "step": 48144 }, { "epoch": 3.9002754374594946, "grad_norm": 0.07389405369758606, "learning_rate": 5.540303343984878e-06, "loss": 0.2398, "step": 48145 }, { "epoch": 3.900356448476993, "grad_norm": 0.051351238042116165, "learning_rate": 5.535802691390252e-06, "loss": 0.2364, "step": 48146 }, { "epoch": 3.9004374594944915, "grad_norm": 0.0765320435166359, "learning_rate": 5.5313020387956255e-06, "loss": 0.2237, "step": 48147 }, { "epoch": 3.9005184705119897, "grad_norm": 0.07203897833824158, "learning_rate": 5.526801386200999e-06, "loss": 0.2089, "step": 48148 }, { "epoch": 3.900599481529488, "grad_norm": 0.06963177770376205, "learning_rate": 5.522300733606374e-06, "loss": 0.21, "step": 48149 }, { "epoch": 3.9006804925469862, "grad_norm": 0.07618485391139984, "learning_rate": 5.517800081011746e-06, "loss": 0.2284, "step": 48150 }, { "epoch": 3.900761503564485, "grad_norm": 0.08152638375759125, "learning_rate": 5.513299428417121e-06, "loss": 0.2354, "step": 48151 }, { "epoch": 3.900842514581983, "grad_norm": 0.07824641466140747, "learning_rate": 5.508798775822494e-06, "loss": 0.2582, "step": 48152 }, { "epoch": 3.9009235255994814, "grad_norm": 0.07195582240819931, "learning_rate": 5.504298123227868e-06, "loss": 0.1995, "step": 48153 }, { "epoch": 3.9010045366169797, "grad_norm": 0.07551611214876175, "learning_rate": 5.4997974706332424e-06, "loss": 0.2055, "step": 48154 }, { "epoch": 3.9010855476344783, "grad_norm": 0.06853777915239334, "learning_rate": 5.495296818038616e-06, "loss": 0.1924, "step": 48155 }, { "epoch": 3.9011665586519766, "grad_norm": 0.09760008007287979, "learning_rate": 5.49079616544399e-06, "loss": 0.2279, "step": 48156 }, { "epoch": 3.901247569669475, "grad_norm": 0.060199085623025894, "learning_rate": 5.486295512849363e-06, "loss": 0.1821, "step": 48157 }, { "epoch": 3.9013285806869735, "grad_norm": 0.07289555668830872, "learning_rate": 5.481794860254738e-06, "loss": 0.241, "step": 48158 }, { "epoch": 3.9014095917044718, "grad_norm": 0.08234255015850067, "learning_rate": 5.4772942076601105e-06, "loss": 0.2462, "step": 48159 }, { "epoch": 3.90149060272197, "grad_norm": 0.07204477488994598, "learning_rate": 5.472793555065485e-06, "loss": 0.2149, "step": 48160 }, { "epoch": 3.9015716137394687, "grad_norm": 0.0728304460644722, "learning_rate": 5.4682929024708585e-06, "loss": 0.2578, "step": 48161 }, { "epoch": 3.901652624756967, "grad_norm": 0.08322672545909882, "learning_rate": 5.463792249876232e-06, "loss": 0.2468, "step": 48162 }, { "epoch": 3.901733635774465, "grad_norm": 0.07119564712047577, "learning_rate": 5.459291597281606e-06, "loss": 0.2027, "step": 48163 }, { "epoch": 3.901814646791964, "grad_norm": 0.06412103772163391, "learning_rate": 5.45479094468698e-06, "loss": 0.2256, "step": 48164 }, { "epoch": 3.901895657809462, "grad_norm": 0.07036664336919785, "learning_rate": 5.450290292092353e-06, "loss": 0.2439, "step": 48165 }, { "epoch": 3.9019766688269604, "grad_norm": 0.07236220687627792, "learning_rate": 5.445789639497727e-06, "loss": 0.232, "step": 48166 }, { "epoch": 3.902057679844459, "grad_norm": 0.07063250243663788, "learning_rate": 5.441288986903102e-06, "loss": 0.1938, "step": 48167 }, { "epoch": 3.9021386908619573, "grad_norm": 0.08155050873756409, "learning_rate": 5.436788334308475e-06, "loss": 0.207, "step": 48168 }, { "epoch": 3.9022197018794555, "grad_norm": 0.09283901005983353, "learning_rate": 5.432287681713849e-06, "loss": 0.2062, "step": 48169 }, { "epoch": 3.9023007128969542, "grad_norm": 0.07058636099100113, "learning_rate": 5.427787029119223e-06, "loss": 0.2262, "step": 48170 }, { "epoch": 3.9023817239144525, "grad_norm": 0.07090901583433151, "learning_rate": 5.423286376524596e-06, "loss": 0.217, "step": 48171 }, { "epoch": 3.9024627349319507, "grad_norm": 0.06685709208250046, "learning_rate": 5.41878572392997e-06, "loss": 0.2017, "step": 48172 }, { "epoch": 3.902543745949449, "grad_norm": 0.0714755430817604, "learning_rate": 5.414285071335344e-06, "loss": 0.2207, "step": 48173 }, { "epoch": 3.9026247569669477, "grad_norm": 0.06478474289178848, "learning_rate": 5.409784418740718e-06, "loss": 0.1963, "step": 48174 }, { "epoch": 3.902705767984446, "grad_norm": 0.059301264584064484, "learning_rate": 5.4052837661460915e-06, "loss": 0.1878, "step": 48175 }, { "epoch": 3.902786779001944, "grad_norm": 0.07554680854082108, "learning_rate": 5.400783113551465e-06, "loss": 0.196, "step": 48176 }, { "epoch": 3.9028677900194424, "grad_norm": 0.071644127368927, "learning_rate": 5.396282460956839e-06, "loss": 0.2209, "step": 48177 }, { "epoch": 3.902948801036941, "grad_norm": 0.07129331678152084, "learning_rate": 5.391781808362212e-06, "loss": 0.1822, "step": 48178 }, { "epoch": 3.9030298120544393, "grad_norm": 0.08478628844022751, "learning_rate": 5.387281155767587e-06, "loss": 0.2269, "step": 48179 }, { "epoch": 3.9031108230719376, "grad_norm": 0.06071711704134941, "learning_rate": 5.38278050317296e-06, "loss": 0.2276, "step": 48180 }, { "epoch": 3.9031918340894363, "grad_norm": 0.0666237398982048, "learning_rate": 5.378279850578334e-06, "loss": 0.2171, "step": 48181 }, { "epoch": 3.9032728451069345, "grad_norm": 0.07781444489955902, "learning_rate": 5.373779197983708e-06, "loss": 0.2382, "step": 48182 }, { "epoch": 3.9033538561244328, "grad_norm": 0.07283270359039307, "learning_rate": 5.369278545389082e-06, "loss": 0.2168, "step": 48183 }, { "epoch": 3.9034348671419314, "grad_norm": 0.07119312882423401, "learning_rate": 5.364777892794456e-06, "loss": 0.2312, "step": 48184 }, { "epoch": 3.9035158781594297, "grad_norm": 0.06919389218091965, "learning_rate": 5.360277240199829e-06, "loss": 0.2035, "step": 48185 }, { "epoch": 3.903596889176928, "grad_norm": 0.09564153850078583, "learning_rate": 5.355776587605203e-06, "loss": 0.2171, "step": 48186 }, { "epoch": 3.9036779001944266, "grad_norm": 0.06871563196182251, "learning_rate": 5.3512759350105764e-06, "loss": 0.2065, "step": 48187 }, { "epoch": 3.903758911211925, "grad_norm": 0.07969631999731064, "learning_rate": 5.346775282415951e-06, "loss": 0.2581, "step": 48188 }, { "epoch": 3.903839922229423, "grad_norm": 0.07645699381828308, "learning_rate": 5.3422746298213245e-06, "loss": 0.2623, "step": 48189 }, { "epoch": 3.903920933246922, "grad_norm": 0.07592182606458664, "learning_rate": 5.337773977226698e-06, "loss": 0.2024, "step": 48190 }, { "epoch": 3.90400194426442, "grad_norm": 0.07032636553049088, "learning_rate": 5.333273324632072e-06, "loss": 0.2, "step": 48191 }, { "epoch": 3.9040829552819183, "grad_norm": 0.06590494513511658, "learning_rate": 5.328772672037446e-06, "loss": 0.2019, "step": 48192 }, { "epoch": 3.904163966299417, "grad_norm": 0.07423686981201172, "learning_rate": 5.324272019442819e-06, "loss": 0.2317, "step": 48193 }, { "epoch": 3.904244977316915, "grad_norm": 0.0696929395198822, "learning_rate": 5.319771366848193e-06, "loss": 0.2315, "step": 48194 }, { "epoch": 3.9043259883344135, "grad_norm": 0.08379188925027847, "learning_rate": 5.315270714253567e-06, "loss": 0.2312, "step": 48195 }, { "epoch": 3.9044069993519117, "grad_norm": 0.07730215787887573, "learning_rate": 5.3107700616589406e-06, "loss": 0.2267, "step": 48196 }, { "epoch": 3.9044880103694104, "grad_norm": 0.07866203784942627, "learning_rate": 5.306269409064315e-06, "loss": 0.2381, "step": 48197 }, { "epoch": 3.9045690213869086, "grad_norm": 0.08509163558483124, "learning_rate": 5.301768756469689e-06, "loss": 0.2215, "step": 48198 }, { "epoch": 3.904650032404407, "grad_norm": 0.07737905532121658, "learning_rate": 5.297268103875062e-06, "loss": 0.2104, "step": 48199 }, { "epoch": 3.904731043421905, "grad_norm": 0.08024188876152039, "learning_rate": 5.292767451280436e-06, "loss": 0.2141, "step": 48200 }, { "epoch": 3.904812054439404, "grad_norm": 0.0622677244246006, "learning_rate": 5.28826679868581e-06, "loss": 0.1994, "step": 48201 }, { "epoch": 3.904893065456902, "grad_norm": 0.08390653878450394, "learning_rate": 5.283766146091183e-06, "loss": 0.2637, "step": 48202 }, { "epoch": 3.9049740764744003, "grad_norm": 0.07863643020391464, "learning_rate": 5.2792654934965575e-06, "loss": 0.2527, "step": 48203 }, { "epoch": 3.905055087491899, "grad_norm": 0.07339465618133545, "learning_rate": 5.274764840901931e-06, "loss": 0.2585, "step": 48204 }, { "epoch": 3.9051360985093972, "grad_norm": 0.06816806644201279, "learning_rate": 5.270264188307305e-06, "loss": 0.1963, "step": 48205 }, { "epoch": 3.9052171095268955, "grad_norm": 0.07438135147094727, "learning_rate": 5.265763535712678e-06, "loss": 0.2358, "step": 48206 }, { "epoch": 3.905298120544394, "grad_norm": 0.09304346889257431, "learning_rate": 5.261262883118053e-06, "loss": 0.2418, "step": 48207 }, { "epoch": 3.9053791315618924, "grad_norm": 0.07517294585704803, "learning_rate": 5.2567622305234255e-06, "loss": 0.2178, "step": 48208 }, { "epoch": 3.9054601425793907, "grad_norm": 0.06006520986557007, "learning_rate": 5.2522615779288e-06, "loss": 0.2233, "step": 48209 }, { "epoch": 3.9055411535968894, "grad_norm": 0.061688102781772614, "learning_rate": 5.247760925334174e-06, "loss": 0.1981, "step": 48210 }, { "epoch": 3.9056221646143876, "grad_norm": 0.11346621811389923, "learning_rate": 5.243260272739547e-06, "loss": 0.2113, "step": 48211 }, { "epoch": 3.905703175631886, "grad_norm": 0.06851883232593536, "learning_rate": 5.238759620144922e-06, "loss": 0.2678, "step": 48212 }, { "epoch": 3.9057841866493845, "grad_norm": 0.07190129160881042, "learning_rate": 5.234258967550295e-06, "loss": 0.2227, "step": 48213 }, { "epoch": 3.905865197666883, "grad_norm": 0.07402346283197403, "learning_rate": 5.229758314955669e-06, "loss": 0.23, "step": 48214 }, { "epoch": 3.905946208684381, "grad_norm": 0.0806785300374031, "learning_rate": 5.225257662361042e-06, "loss": 0.2571, "step": 48215 }, { "epoch": 3.9060272197018797, "grad_norm": 0.06920921057462692, "learning_rate": 5.220757009766417e-06, "loss": 0.2173, "step": 48216 }, { "epoch": 3.906108230719378, "grad_norm": 0.06963902711868286, "learning_rate": 5.21625635717179e-06, "loss": 0.19, "step": 48217 }, { "epoch": 3.906189241736876, "grad_norm": 0.06644149869680405, "learning_rate": 5.211755704577164e-06, "loss": 0.2332, "step": 48218 }, { "epoch": 3.9062702527543745, "grad_norm": 0.06656293570995331, "learning_rate": 5.207255051982538e-06, "loss": 0.1953, "step": 48219 }, { "epoch": 3.906351263771873, "grad_norm": 0.07155372947454453, "learning_rate": 5.202754399387911e-06, "loss": 0.2218, "step": 48220 }, { "epoch": 3.9064322747893714, "grad_norm": 0.06269364804029465, "learning_rate": 5.198253746793285e-06, "loss": 0.1846, "step": 48221 }, { "epoch": 3.9065132858068696, "grad_norm": 0.06916271895170212, "learning_rate": 5.193753094198659e-06, "loss": 0.2343, "step": 48222 }, { "epoch": 3.906594296824368, "grad_norm": 0.07343260943889618, "learning_rate": 5.189252441604032e-06, "loss": 0.2274, "step": 48223 }, { "epoch": 3.9066753078418666, "grad_norm": 0.08011267334222794, "learning_rate": 5.1847517890094065e-06, "loss": 0.2191, "step": 48224 }, { "epoch": 3.906756318859365, "grad_norm": 0.07275541871786118, "learning_rate": 5.180251136414781e-06, "loss": 0.2005, "step": 48225 }, { "epoch": 3.906837329876863, "grad_norm": 0.08077385276556015, "learning_rate": 5.175750483820154e-06, "loss": 0.2796, "step": 48226 }, { "epoch": 3.9069183408943617, "grad_norm": 0.08260789513587952, "learning_rate": 5.171249831225528e-06, "loss": 0.2247, "step": 48227 }, { "epoch": 3.90699935191186, "grad_norm": 0.05853225663304329, "learning_rate": 5.166749178630902e-06, "loss": 0.1863, "step": 48228 }, { "epoch": 3.9070803629293582, "grad_norm": 0.08310955762863159, "learning_rate": 5.162248526036275e-06, "loss": 0.2158, "step": 48229 }, { "epoch": 3.907161373946857, "grad_norm": 0.07807844877243042, "learning_rate": 5.157747873441649e-06, "loss": 0.2443, "step": 48230 }, { "epoch": 3.907242384964355, "grad_norm": 0.07691746205091476, "learning_rate": 5.1532472208470235e-06, "loss": 0.2138, "step": 48231 }, { "epoch": 3.9073233959818534, "grad_norm": 0.07122568041086197, "learning_rate": 5.148746568252397e-06, "loss": 0.2198, "step": 48232 }, { "epoch": 3.907404406999352, "grad_norm": 0.06453979015350342, "learning_rate": 5.144245915657771e-06, "loss": 0.2408, "step": 48233 }, { "epoch": 3.9074854180168503, "grad_norm": 0.07711661607027054, "learning_rate": 5.139745263063144e-06, "loss": 0.201, "step": 48234 }, { "epoch": 3.9075664290343486, "grad_norm": 0.08223751932382584, "learning_rate": 5.135244610468518e-06, "loss": 0.2204, "step": 48235 }, { "epoch": 3.9076474400518473, "grad_norm": 0.08006234467029572, "learning_rate": 5.1307439578738915e-06, "loss": 0.2498, "step": 48236 }, { "epoch": 3.9077284510693455, "grad_norm": 0.06987843662500381, "learning_rate": 5.126243305279266e-06, "loss": 0.2437, "step": 48237 }, { "epoch": 3.9078094620868438, "grad_norm": 0.06831565499305725, "learning_rate": 5.1217426526846395e-06, "loss": 0.1795, "step": 48238 }, { "epoch": 3.9078904731043425, "grad_norm": 0.0886467844247818, "learning_rate": 5.117242000090013e-06, "loss": 0.2054, "step": 48239 }, { "epoch": 3.9079714841218407, "grad_norm": 0.09053325653076172, "learning_rate": 5.112741347495388e-06, "loss": 0.2823, "step": 48240 }, { "epoch": 3.908052495139339, "grad_norm": 0.07012274116277695, "learning_rate": 5.108240694900761e-06, "loss": 0.2037, "step": 48241 }, { "epoch": 3.908133506156837, "grad_norm": 0.07728597521781921, "learning_rate": 5.103740042306135e-06, "loss": 0.2068, "step": 48242 }, { "epoch": 3.9082145171743354, "grad_norm": 0.06844566017389297, "learning_rate": 5.099239389711508e-06, "loss": 0.1873, "step": 48243 }, { "epoch": 3.908295528191834, "grad_norm": 0.0649128332734108, "learning_rate": 5.094738737116882e-06, "loss": 0.1839, "step": 48244 }, { "epoch": 3.9083765392093324, "grad_norm": 0.06959527730941772, "learning_rate": 5.090238084522256e-06, "loss": 0.2126, "step": 48245 }, { "epoch": 3.9084575502268306, "grad_norm": 0.0665079727768898, "learning_rate": 5.08573743192763e-06, "loss": 0.2057, "step": 48246 }, { "epoch": 3.9085385612443293, "grad_norm": 0.09884722530841827, "learning_rate": 5.081236779333004e-06, "loss": 0.2282, "step": 48247 }, { "epoch": 3.9086195722618275, "grad_norm": 0.07287062704563141, "learning_rate": 5.076736126738377e-06, "loss": 0.2146, "step": 48248 }, { "epoch": 3.908700583279326, "grad_norm": 0.07356756180524826, "learning_rate": 5.072235474143751e-06, "loss": 0.2424, "step": 48249 }, { "epoch": 3.9087815942968245, "grad_norm": 0.0727347582578659, "learning_rate": 5.067734821549125e-06, "loss": 0.1947, "step": 48250 }, { "epoch": 3.9088626053143227, "grad_norm": 0.06179845333099365, "learning_rate": 5.063234168954498e-06, "loss": 0.2131, "step": 48251 }, { "epoch": 3.908943616331821, "grad_norm": 0.08347577601671219, "learning_rate": 5.0587335163598725e-06, "loss": 0.2417, "step": 48252 }, { "epoch": 3.9090246273493197, "grad_norm": 0.07791955024003983, "learning_rate": 5.054232863765247e-06, "loss": 0.2335, "step": 48253 }, { "epoch": 3.909105638366818, "grad_norm": 0.07559989392757416, "learning_rate": 5.04973221117062e-06, "loss": 0.203, "step": 48254 }, { "epoch": 3.909186649384316, "grad_norm": 0.06269510090351105, "learning_rate": 5.045231558575994e-06, "loss": 0.2261, "step": 48255 }, { "epoch": 3.909267660401815, "grad_norm": 0.06181592866778374, "learning_rate": 5.040730905981368e-06, "loss": 0.2074, "step": 48256 }, { "epoch": 3.909348671419313, "grad_norm": 0.07220128178596497, "learning_rate": 5.036230253386741e-06, "loss": 0.2157, "step": 48257 }, { "epoch": 3.9094296824368113, "grad_norm": 0.07272609323263168, "learning_rate": 5.031729600792115e-06, "loss": 0.2441, "step": 48258 }, { "epoch": 3.90951069345431, "grad_norm": 0.0771983340382576, "learning_rate": 5.0272289481974894e-06, "loss": 0.2157, "step": 48259 }, { "epoch": 3.9095917044718083, "grad_norm": 0.08448202162981033, "learning_rate": 5.022728295602862e-06, "loss": 0.207, "step": 48260 }, { "epoch": 3.9096727154893065, "grad_norm": 0.07362006604671478, "learning_rate": 5.018227643008237e-06, "loss": 0.2275, "step": 48261 }, { "epoch": 3.909753726506805, "grad_norm": 0.05870373547077179, "learning_rate": 5.01372699041361e-06, "loss": 0.2229, "step": 48262 }, { "epoch": 3.9098347375243034, "grad_norm": 0.07628266513347626, "learning_rate": 5.009226337818984e-06, "loss": 0.2208, "step": 48263 }, { "epoch": 3.9099157485418017, "grad_norm": 0.08400886505842209, "learning_rate": 5.0047256852243574e-06, "loss": 0.2524, "step": 48264 }, { "epoch": 3.9099967595593, "grad_norm": 0.0675138533115387, "learning_rate": 5.000225032629732e-06, "loss": 0.1857, "step": 48265 }, { "epoch": 3.910077770576798, "grad_norm": 0.08384126424789429, "learning_rate": 4.995724380035105e-06, "loss": 0.2271, "step": 48266 }, { "epoch": 3.910158781594297, "grad_norm": 0.0884663388133049, "learning_rate": 4.991223727440479e-06, "loss": 0.267, "step": 48267 }, { "epoch": 3.910239792611795, "grad_norm": 0.06389939039945602, "learning_rate": 4.9867230748458536e-06, "loss": 0.2102, "step": 48268 }, { "epoch": 3.9103208036292934, "grad_norm": 0.060799382627010345, "learning_rate": 4.982222422251226e-06, "loss": 0.2267, "step": 48269 }, { "epoch": 3.910401814646792, "grad_norm": 0.07198642194271088, "learning_rate": 4.977721769656601e-06, "loss": 0.2249, "step": 48270 }, { "epoch": 3.9104828256642903, "grad_norm": 0.0687641128897667, "learning_rate": 4.973221117061974e-06, "loss": 0.1983, "step": 48271 }, { "epoch": 3.9105638366817885, "grad_norm": 0.07991275936365128, "learning_rate": 4.968720464467348e-06, "loss": 0.253, "step": 48272 }, { "epoch": 3.910644847699287, "grad_norm": 0.07373076677322388, "learning_rate": 4.9642198118727216e-06, "loss": 0.235, "step": 48273 }, { "epoch": 3.9107258587167855, "grad_norm": 0.07304935157299042, "learning_rate": 4.959719159278096e-06, "loss": 0.1938, "step": 48274 }, { "epoch": 3.9108068697342837, "grad_norm": 0.06456219404935837, "learning_rate": 4.955218506683469e-06, "loss": 0.2212, "step": 48275 }, { "epoch": 3.9108878807517824, "grad_norm": 0.06691355258226395, "learning_rate": 4.950717854088843e-06, "loss": 0.198, "step": 48276 }, { "epoch": 3.9109688917692806, "grad_norm": 0.07013965398073196, "learning_rate": 4.946217201494217e-06, "loss": 0.2197, "step": 48277 }, { "epoch": 3.911049902786779, "grad_norm": 0.0730832889676094, "learning_rate": 4.9417165488995904e-06, "loss": 0.1978, "step": 48278 }, { "epoch": 3.9111309138042776, "grad_norm": 0.06756222993135452, "learning_rate": 4.937215896304964e-06, "loss": 0.2059, "step": 48279 }, { "epoch": 3.911211924821776, "grad_norm": 0.05772652477025986, "learning_rate": 4.9327152437103385e-06, "loss": 0.2107, "step": 48280 }, { "epoch": 3.911292935839274, "grad_norm": 0.07321856915950775, "learning_rate": 4.928214591115712e-06, "loss": 0.2468, "step": 48281 }, { "epoch": 3.9113739468567728, "grad_norm": 0.07220222055912018, "learning_rate": 4.923713938521086e-06, "loss": 0.2097, "step": 48282 }, { "epoch": 3.911454957874271, "grad_norm": 0.08239828050136566, "learning_rate": 4.91921328592646e-06, "loss": 0.2462, "step": 48283 }, { "epoch": 3.9115359688917692, "grad_norm": 0.053629614412784576, "learning_rate": 4.914712633331833e-06, "loss": 0.1856, "step": 48284 }, { "epoch": 3.911616979909268, "grad_norm": 0.07192451506853104, "learning_rate": 4.910211980737207e-06, "loss": 0.2489, "step": 48285 }, { "epoch": 3.911697990926766, "grad_norm": 0.07000795751810074, "learning_rate": 4.905711328142581e-06, "loss": 0.2101, "step": 48286 }, { "epoch": 3.9117790019442644, "grad_norm": 0.06989791244268417, "learning_rate": 4.9012106755479546e-06, "loss": 0.2376, "step": 48287 }, { "epoch": 3.9118600129617627, "grad_norm": 0.08663799613714218, "learning_rate": 4.896710022953328e-06, "loss": 0.2485, "step": 48288 }, { "epoch": 3.911941023979261, "grad_norm": 0.07052966952323914, "learning_rate": 4.892209370358703e-06, "loss": 0.1894, "step": 48289 }, { "epoch": 3.9120220349967596, "grad_norm": 0.05605921521782875, "learning_rate": 4.887708717764076e-06, "loss": 0.1966, "step": 48290 }, { "epoch": 3.912103046014258, "grad_norm": 0.06981483846902847, "learning_rate": 4.88320806516945e-06, "loss": 0.2424, "step": 48291 }, { "epoch": 3.912184057031756, "grad_norm": 0.05360450968146324, "learning_rate": 4.878707412574823e-06, "loss": 0.1981, "step": 48292 }, { "epoch": 3.912265068049255, "grad_norm": 0.07205212116241455, "learning_rate": 4.874206759980197e-06, "loss": 0.1913, "step": 48293 }, { "epoch": 3.912346079066753, "grad_norm": 0.06900376826524734, "learning_rate": 4.869706107385571e-06, "loss": 0.2177, "step": 48294 }, { "epoch": 3.9124270900842513, "grad_norm": 0.07426581531763077, "learning_rate": 4.865205454790945e-06, "loss": 0.2662, "step": 48295 }, { "epoch": 3.91250810110175, "grad_norm": 0.0741383284330368, "learning_rate": 4.860704802196319e-06, "loss": 0.2323, "step": 48296 }, { "epoch": 3.912589112119248, "grad_norm": 0.07219462841749191, "learning_rate": 4.856204149601692e-06, "loss": 0.2199, "step": 48297 }, { "epoch": 3.9126701231367464, "grad_norm": 0.06240931153297424, "learning_rate": 4.851703497007067e-06, "loss": 0.2398, "step": 48298 }, { "epoch": 3.912751134154245, "grad_norm": 0.0665903314948082, "learning_rate": 4.84720284441244e-06, "loss": 0.2174, "step": 48299 }, { "epoch": 3.9128321451717434, "grad_norm": 0.06555341929197311, "learning_rate": 4.842702191817814e-06, "loss": 0.1886, "step": 48300 }, { "epoch": 3.9129131561892416, "grad_norm": 0.06640680134296417, "learning_rate": 4.8382015392231875e-06, "loss": 0.1735, "step": 48301 }, { "epoch": 3.9129941672067403, "grad_norm": 0.07242927700281143, "learning_rate": 4.833700886628562e-06, "loss": 0.2254, "step": 48302 }, { "epoch": 3.9130751782242386, "grad_norm": 0.06851553171873093, "learning_rate": 4.829200234033935e-06, "loss": 0.1927, "step": 48303 }, { "epoch": 3.913156189241737, "grad_norm": 0.07618321478366852, "learning_rate": 4.824699581439309e-06, "loss": 0.2333, "step": 48304 }, { "epoch": 3.9132372002592355, "grad_norm": 0.07696599513292313, "learning_rate": 4.820198928844683e-06, "loss": 0.2151, "step": 48305 }, { "epoch": 3.9133182112767337, "grad_norm": 0.07403657585382462, "learning_rate": 4.815698276250056e-06, "loss": 0.2144, "step": 48306 }, { "epoch": 3.913399222294232, "grad_norm": 0.07694181799888611, "learning_rate": 4.81119762365543e-06, "loss": 0.2745, "step": 48307 }, { "epoch": 3.9134802333117307, "grad_norm": 0.06381962448358536, "learning_rate": 4.8066969710608045e-06, "loss": 0.2746, "step": 48308 }, { "epoch": 3.913561244329229, "grad_norm": 0.06298644095659256, "learning_rate": 4.802196318466177e-06, "loss": 0.1824, "step": 48309 }, { "epoch": 3.913642255346727, "grad_norm": 0.07825368642807007, "learning_rate": 4.797695665871552e-06, "loss": 0.2237, "step": 48310 }, { "epoch": 3.9137232663642254, "grad_norm": 0.05910641327500343, "learning_rate": 4.793195013276926e-06, "loss": 0.1973, "step": 48311 }, { "epoch": 3.9138042773817237, "grad_norm": 0.08719029277563095, "learning_rate": 4.788694360682299e-06, "loss": 0.2168, "step": 48312 }, { "epoch": 3.9138852883992223, "grad_norm": 0.07956347614526749, "learning_rate": 4.784193708087673e-06, "loss": 0.2315, "step": 48313 }, { "epoch": 3.9139662994167206, "grad_norm": 0.07194239646196365, "learning_rate": 4.779693055493047e-06, "loss": 0.2449, "step": 48314 }, { "epoch": 3.914047310434219, "grad_norm": 0.06650910526514053, "learning_rate": 4.7751924028984205e-06, "loss": 0.2138, "step": 48315 }, { "epoch": 3.9141283214517175, "grad_norm": 0.07512077689170837, "learning_rate": 4.770691750303794e-06, "loss": 0.2245, "step": 48316 }, { "epoch": 3.9142093324692158, "grad_norm": 0.09508345276117325, "learning_rate": 4.766191097709169e-06, "loss": 0.3053, "step": 48317 }, { "epoch": 3.914290343486714, "grad_norm": 0.07228030264377594, "learning_rate": 4.761690445114541e-06, "loss": 0.2202, "step": 48318 }, { "epoch": 3.9143713545042127, "grad_norm": 0.07119833678007126, "learning_rate": 4.757189792519916e-06, "loss": 0.1919, "step": 48319 }, { "epoch": 3.914452365521711, "grad_norm": 0.059811804443597794, "learning_rate": 4.752689139925289e-06, "loss": 0.1986, "step": 48320 }, { "epoch": 3.914533376539209, "grad_norm": 0.06910062581300735, "learning_rate": 4.748188487330663e-06, "loss": 0.2151, "step": 48321 }, { "epoch": 3.914614387556708, "grad_norm": 0.06613852828741074, "learning_rate": 4.743687834736037e-06, "loss": 0.2022, "step": 48322 }, { "epoch": 3.914695398574206, "grad_norm": 0.06570880860090256, "learning_rate": 4.739187182141411e-06, "loss": 0.1867, "step": 48323 }, { "epoch": 3.9147764095917044, "grad_norm": 0.0788615420460701, "learning_rate": 4.734686529546784e-06, "loss": 0.2258, "step": 48324 }, { "epoch": 3.914857420609203, "grad_norm": 0.0693746879696846, "learning_rate": 4.730185876952158e-06, "loss": 0.2217, "step": 48325 }, { "epoch": 3.9149384316267013, "grad_norm": 0.08763092756271362, "learning_rate": 4.725685224357533e-06, "loss": 0.2442, "step": 48326 }, { "epoch": 3.9150194426441995, "grad_norm": 0.06549570709466934, "learning_rate": 4.7211845717629055e-06, "loss": 0.2142, "step": 48327 }, { "epoch": 3.9151004536616982, "grad_norm": 0.08535436540842056, "learning_rate": 4.71668391916828e-06, "loss": 0.249, "step": 48328 }, { "epoch": 3.9151814646791965, "grad_norm": 0.0782715380191803, "learning_rate": 4.7121832665736535e-06, "loss": 0.2268, "step": 48329 }, { "epoch": 3.9152624756966947, "grad_norm": 0.07017436623573303, "learning_rate": 4.707682613979027e-06, "loss": 0.231, "step": 48330 }, { "epoch": 3.915343486714193, "grad_norm": 0.06549299508333206, "learning_rate": 4.703181961384401e-06, "loss": 0.2067, "step": 48331 }, { "epoch": 3.9154244977316917, "grad_norm": 0.07573448866605759, "learning_rate": 4.698681308789775e-06, "loss": 0.2563, "step": 48332 }, { "epoch": 3.91550550874919, "grad_norm": 0.07193144410848618, "learning_rate": 4.694180656195148e-06, "loss": 0.2246, "step": 48333 }, { "epoch": 3.915586519766688, "grad_norm": 0.0836506187915802, "learning_rate": 4.689680003600522e-06, "loss": 0.2372, "step": 48334 }, { "epoch": 3.9156675307841864, "grad_norm": 0.07173941284418106, "learning_rate": 4.685179351005896e-06, "loss": 0.2322, "step": 48335 }, { "epoch": 3.915748541801685, "grad_norm": 0.0687011256814003, "learning_rate": 4.68067869841127e-06, "loss": 0.2385, "step": 48336 }, { "epoch": 3.9158295528191833, "grad_norm": 0.06658374518156052, "learning_rate": 4.676178045816643e-06, "loss": 0.2497, "step": 48337 }, { "epoch": 3.9159105638366816, "grad_norm": 0.0663018673658371, "learning_rate": 4.671677393222018e-06, "loss": 0.197, "step": 48338 }, { "epoch": 3.9159915748541803, "grad_norm": 0.058599188923835754, "learning_rate": 4.667176740627391e-06, "loss": 0.2165, "step": 48339 }, { "epoch": 3.9160725858716785, "grad_norm": 0.06904032081365585, "learning_rate": 4.662676088032765e-06, "loss": 0.2292, "step": 48340 }, { "epoch": 3.9161535968891767, "grad_norm": 0.08278106898069382, "learning_rate": 4.658175435438139e-06, "loss": 0.2045, "step": 48341 }, { "epoch": 3.9162346079066754, "grad_norm": 0.0761694386601448, "learning_rate": 4.653674782843512e-06, "loss": 0.2144, "step": 48342 }, { "epoch": 3.9163156189241737, "grad_norm": 0.07530530542135239, "learning_rate": 4.6491741302488865e-06, "loss": 0.2537, "step": 48343 }, { "epoch": 3.916396629941672, "grad_norm": 0.08102653175592422, "learning_rate": 4.64467347765426e-06, "loss": 0.2358, "step": 48344 }, { "epoch": 3.9164776409591706, "grad_norm": 0.07261081039905548, "learning_rate": 4.640172825059634e-06, "loss": 0.249, "step": 48345 }, { "epoch": 3.916558651976669, "grad_norm": 0.09279650449752808, "learning_rate": 4.635672172465007e-06, "loss": 0.2493, "step": 48346 }, { "epoch": 3.916639662994167, "grad_norm": 0.08426755666732788, "learning_rate": 4.631171519870382e-06, "loss": 0.2518, "step": 48347 }, { "epoch": 3.916720674011666, "grad_norm": 0.06982297450304031, "learning_rate": 4.626670867275755e-06, "loss": 0.2133, "step": 48348 }, { "epoch": 3.916801685029164, "grad_norm": 0.062374137341976166, "learning_rate": 4.622170214681129e-06, "loss": 0.2058, "step": 48349 }, { "epoch": 3.9168826960466623, "grad_norm": 0.06334874778985977, "learning_rate": 4.617669562086503e-06, "loss": 0.2218, "step": 48350 }, { "epoch": 3.916963707064161, "grad_norm": 0.07470595091581345, "learning_rate": 4.613168909491876e-06, "loss": 0.2633, "step": 48351 }, { "epoch": 3.917044718081659, "grad_norm": 0.06525980681180954, "learning_rate": 4.60866825689725e-06, "loss": 0.2238, "step": 48352 }, { "epoch": 3.9171257290991575, "grad_norm": 0.0697428286075592, "learning_rate": 4.604167604302624e-06, "loss": 0.2647, "step": 48353 }, { "epoch": 3.9172067401166557, "grad_norm": 0.07528168708086014, "learning_rate": 4.599666951707998e-06, "loss": 0.2266, "step": 48354 }, { "epoch": 3.9172877511341544, "grad_norm": 0.07216835767030716, "learning_rate": 4.5951662991133714e-06, "loss": 0.2087, "step": 48355 }, { "epoch": 3.9173687621516526, "grad_norm": 0.06816212087869644, "learning_rate": 4.590665646518746e-06, "loss": 0.1967, "step": 48356 }, { "epoch": 3.917449773169151, "grad_norm": 0.07805240899324417, "learning_rate": 4.5861649939241195e-06, "loss": 0.2293, "step": 48357 }, { "epoch": 3.917530784186649, "grad_norm": 0.06178323179483414, "learning_rate": 4.581664341329493e-06, "loss": 0.2204, "step": 48358 }, { "epoch": 3.917611795204148, "grad_norm": 0.07420182973146439, "learning_rate": 4.577163688734867e-06, "loss": 0.2364, "step": 48359 }, { "epoch": 3.917692806221646, "grad_norm": 0.06499535590410233, "learning_rate": 4.572663036140241e-06, "loss": 0.2054, "step": 48360 }, { "epoch": 3.9177738172391443, "grad_norm": 0.07386469841003418, "learning_rate": 4.568162383545614e-06, "loss": 0.224, "step": 48361 }, { "epoch": 3.917854828256643, "grad_norm": 0.07269761711359024, "learning_rate": 4.563661730950988e-06, "loss": 0.1961, "step": 48362 }, { "epoch": 3.9179358392741412, "grad_norm": 0.07524976879358292, "learning_rate": 4.559161078356362e-06, "loss": 0.22, "step": 48363 }, { "epoch": 3.9180168502916395, "grad_norm": 0.07413894683122635, "learning_rate": 4.5546604257617356e-06, "loss": 0.2123, "step": 48364 }, { "epoch": 3.918097861309138, "grad_norm": 0.06486944854259491, "learning_rate": 4.550159773167109e-06, "loss": 0.2077, "step": 48365 }, { "epoch": 3.9181788723266364, "grad_norm": 0.06430947780609131, "learning_rate": 4.545659120572484e-06, "loss": 0.2326, "step": 48366 }, { "epoch": 3.9182598833441347, "grad_norm": 0.08823602646589279, "learning_rate": 4.541158467977856e-06, "loss": 0.2792, "step": 48367 }, { "epoch": 3.9183408943616334, "grad_norm": 0.05794184282422066, "learning_rate": 4.536657815383231e-06, "loss": 0.2169, "step": 48368 }, { "epoch": 3.9184219053791316, "grad_norm": 0.08605548739433289, "learning_rate": 4.532157162788605e-06, "loss": 0.2541, "step": 48369 }, { "epoch": 3.91850291639663, "grad_norm": 0.08970436453819275, "learning_rate": 4.527656510193978e-06, "loss": 0.2634, "step": 48370 }, { "epoch": 3.9185839274141285, "grad_norm": 0.07923775911331177, "learning_rate": 4.5231558575993525e-06, "loss": 0.2305, "step": 48371 }, { "epoch": 3.9186649384316268, "grad_norm": 0.07092005014419556, "learning_rate": 4.518655205004726e-06, "loss": 0.2317, "step": 48372 }, { "epoch": 3.918745949449125, "grad_norm": 0.08713727444410324, "learning_rate": 4.5141545524101e-06, "loss": 0.2262, "step": 48373 }, { "epoch": 3.9188269604666237, "grad_norm": 0.06912411004304886, "learning_rate": 4.509653899815473e-06, "loss": 0.2173, "step": 48374 }, { "epoch": 3.918907971484122, "grad_norm": 0.0842410996556282, "learning_rate": 4.505153247220848e-06, "loss": 0.2268, "step": 48375 }, { "epoch": 3.91898898250162, "grad_norm": 0.06546398997306824, "learning_rate": 4.5006525946262205e-06, "loss": 0.2216, "step": 48376 }, { "epoch": 3.9190699935191184, "grad_norm": 0.09463486075401306, "learning_rate": 4.496151942031595e-06, "loss": 0.3016, "step": 48377 }, { "epoch": 3.919151004536617, "grad_norm": 0.0747930109500885, "learning_rate": 4.4916512894369686e-06, "loss": 0.2222, "step": 48378 }, { "epoch": 3.9192320155541154, "grad_norm": 0.06386351585388184, "learning_rate": 4.487150636842342e-06, "loss": 0.204, "step": 48379 }, { "epoch": 3.9193130265716136, "grad_norm": 0.06935272365808487, "learning_rate": 4.482649984247716e-06, "loss": 0.227, "step": 48380 }, { "epoch": 3.919394037589112, "grad_norm": 0.0631873607635498, "learning_rate": 4.47814933165309e-06, "loss": 0.2332, "step": 48381 }, { "epoch": 3.9194750486066106, "grad_norm": 0.07327888160943985, "learning_rate": 4.473648679058463e-06, "loss": 0.225, "step": 48382 }, { "epoch": 3.919556059624109, "grad_norm": 0.07921938598155975, "learning_rate": 4.469148026463837e-06, "loss": 0.2254, "step": 48383 }, { "epoch": 3.919637070641607, "grad_norm": 0.06906044483184814, "learning_rate": 4.464647373869212e-06, "loss": 0.2089, "step": 48384 }, { "epoch": 3.9197180816591057, "grad_norm": 0.0595407597720623, "learning_rate": 4.460146721274585e-06, "loss": 0.1853, "step": 48385 }, { "epoch": 3.919799092676604, "grad_norm": 0.08025841414928436, "learning_rate": 4.455646068679959e-06, "loss": 0.2379, "step": 48386 }, { "epoch": 3.9198801036941022, "grad_norm": 0.06751351803541183, "learning_rate": 4.451145416085333e-06, "loss": 0.1824, "step": 48387 }, { "epoch": 3.919961114711601, "grad_norm": 0.07128855586051941, "learning_rate": 4.446644763490706e-06, "loss": 0.2065, "step": 48388 }, { "epoch": 3.920042125729099, "grad_norm": 0.07042013853788376, "learning_rate": 4.44214411089608e-06, "loss": 0.2286, "step": 48389 }, { "epoch": 3.9201231367465974, "grad_norm": 0.07936536520719528, "learning_rate": 4.437643458301454e-06, "loss": 0.2273, "step": 48390 }, { "epoch": 3.920204147764096, "grad_norm": 0.07564130425453186, "learning_rate": 4.433142805706827e-06, "loss": 0.2124, "step": 48391 }, { "epoch": 3.9202851587815943, "grad_norm": 0.0812234953045845, "learning_rate": 4.4286421531122015e-06, "loss": 0.2354, "step": 48392 }, { "epoch": 3.9203661697990926, "grad_norm": 0.07354864478111267, "learning_rate": 4.424141500517575e-06, "loss": 0.2063, "step": 48393 }, { "epoch": 3.9204471808165913, "grad_norm": 0.060475897043943405, "learning_rate": 4.419640847922949e-06, "loss": 0.1771, "step": 48394 }, { "epoch": 3.9205281918340895, "grad_norm": 0.06647646427154541, "learning_rate": 4.415140195328322e-06, "loss": 0.2066, "step": 48395 }, { "epoch": 3.9206092028515878, "grad_norm": 0.07337382435798645, "learning_rate": 4.410639542733697e-06, "loss": 0.2287, "step": 48396 }, { "epoch": 3.9206902138690864, "grad_norm": 0.07359199970960617, "learning_rate": 4.40613889013907e-06, "loss": 0.244, "step": 48397 }, { "epoch": 3.9207712248865847, "grad_norm": 0.074060820043087, "learning_rate": 4.401638237544444e-06, "loss": 0.2418, "step": 48398 }, { "epoch": 3.920852235904083, "grad_norm": 0.07838385552167892, "learning_rate": 4.3971375849498185e-06, "loss": 0.2612, "step": 48399 }, { "epoch": 3.920933246921581, "grad_norm": 0.07551511377096176, "learning_rate": 4.392636932355191e-06, "loss": 0.2409, "step": 48400 }, { "epoch": 3.92101425793908, "grad_norm": 0.08527684211730957, "learning_rate": 4.388136279760566e-06, "loss": 0.204, "step": 48401 }, { "epoch": 3.921095268956578, "grad_norm": 0.06834513694047928, "learning_rate": 4.383635627165939e-06, "loss": 0.2157, "step": 48402 }, { "epoch": 3.9211762799740764, "grad_norm": 0.06227676197886467, "learning_rate": 4.379134974571313e-06, "loss": 0.2034, "step": 48403 }, { "epoch": 3.9212572909915746, "grad_norm": 0.07426196336746216, "learning_rate": 4.3746343219766865e-06, "loss": 0.2606, "step": 48404 }, { "epoch": 3.9213383020090733, "grad_norm": 0.07575037330389023, "learning_rate": 4.370133669382061e-06, "loss": 0.22, "step": 48405 }, { "epoch": 3.9214193130265715, "grad_norm": 0.06229977682232857, "learning_rate": 4.3656330167874345e-06, "loss": 0.2217, "step": 48406 }, { "epoch": 3.92150032404407, "grad_norm": 0.07139471918344498, "learning_rate": 4.361132364192808e-06, "loss": 0.2437, "step": 48407 }, { "epoch": 3.9215813350615685, "grad_norm": 0.06911724805831909, "learning_rate": 4.356631711598182e-06, "loss": 0.1807, "step": 48408 }, { "epoch": 3.9216623460790667, "grad_norm": 0.07534075528383255, "learning_rate": 4.352131059003556e-06, "loss": 0.2281, "step": 48409 }, { "epoch": 3.921743357096565, "grad_norm": 0.07420405745506287, "learning_rate": 4.347630406408929e-06, "loss": 0.2289, "step": 48410 }, { "epoch": 3.9218243681140637, "grad_norm": 0.07598921656608582, "learning_rate": 4.343129753814303e-06, "loss": 0.2104, "step": 48411 }, { "epoch": 3.921905379131562, "grad_norm": 0.061629679054021835, "learning_rate": 4.338629101219677e-06, "loss": 0.2472, "step": 48412 }, { "epoch": 3.92198639014906, "grad_norm": 0.07201918214559555, "learning_rate": 4.334128448625051e-06, "loss": 0.1946, "step": 48413 }, { "epoch": 3.922067401166559, "grad_norm": 0.07076304405927658, "learning_rate": 4.329627796030425e-06, "loss": 0.221, "step": 48414 }, { "epoch": 3.922148412184057, "grad_norm": 0.0704672783613205, "learning_rate": 4.325127143435799e-06, "loss": 0.2151, "step": 48415 }, { "epoch": 3.9222294232015553, "grad_norm": 0.06875283271074295, "learning_rate": 4.320626490841172e-06, "loss": 0.1926, "step": 48416 }, { "epoch": 3.922310434219054, "grad_norm": 0.05916054546833038, "learning_rate": 4.316125838246546e-06, "loss": 0.2194, "step": 48417 }, { "epoch": 3.9223914452365523, "grad_norm": 0.060847945511341095, "learning_rate": 4.31162518565192e-06, "loss": 0.2302, "step": 48418 }, { "epoch": 3.9224724562540505, "grad_norm": 0.07540497928857803, "learning_rate": 4.307124533057293e-06, "loss": 0.2381, "step": 48419 }, { "epoch": 3.922553467271549, "grad_norm": 0.07262145727872849, "learning_rate": 4.3026238804626675e-06, "loss": 0.209, "step": 48420 }, { "epoch": 3.9226344782890474, "grad_norm": 0.061988167464733124, "learning_rate": 4.298123227868041e-06, "loss": 0.1932, "step": 48421 }, { "epoch": 3.9227154893065457, "grad_norm": 0.06330009549856186, "learning_rate": 4.293622575273415e-06, "loss": 0.222, "step": 48422 }, { "epoch": 3.922796500324044, "grad_norm": 0.08140560984611511, "learning_rate": 4.289121922678788e-06, "loss": 0.2162, "step": 48423 }, { "epoch": 3.9228775113415426, "grad_norm": 0.07807426899671555, "learning_rate": 4.284621270084163e-06, "loss": 0.2249, "step": 48424 }, { "epoch": 3.922958522359041, "grad_norm": 0.07387779653072357, "learning_rate": 4.2801206174895355e-06, "loss": 0.2144, "step": 48425 }, { "epoch": 3.923039533376539, "grad_norm": 0.06375554203987122, "learning_rate": 4.27561996489491e-06, "loss": 0.2085, "step": 48426 }, { "epoch": 3.9231205443940373, "grad_norm": 0.07752867043018341, "learning_rate": 4.2711193123002844e-06, "loss": 0.2388, "step": 48427 }, { "epoch": 3.923201555411536, "grad_norm": 0.07950585335493088, "learning_rate": 4.266618659705657e-06, "loss": 0.2315, "step": 48428 }, { "epoch": 3.9232825664290343, "grad_norm": 0.0622391402721405, "learning_rate": 4.262118007111032e-06, "loss": 0.2157, "step": 48429 }, { "epoch": 3.9233635774465325, "grad_norm": 0.0833074301481247, "learning_rate": 4.257617354516405e-06, "loss": 0.2555, "step": 48430 }, { "epoch": 3.923444588464031, "grad_norm": 0.06539009511470795, "learning_rate": 4.253116701921779e-06, "loss": 0.2225, "step": 48431 }, { "epoch": 3.9235255994815295, "grad_norm": 0.07489827275276184, "learning_rate": 4.2486160493271525e-06, "loss": 0.2581, "step": 48432 }, { "epoch": 3.9236066104990277, "grad_norm": 0.06998512893915176, "learning_rate": 4.244115396732527e-06, "loss": 0.176, "step": 48433 }, { "epoch": 3.9236876215165264, "grad_norm": 0.06104741990566254, "learning_rate": 4.2396147441379e-06, "loss": 0.2076, "step": 48434 }, { "epoch": 3.9237686325340246, "grad_norm": 0.07176847755908966, "learning_rate": 4.235114091543274e-06, "loss": 0.2334, "step": 48435 }, { "epoch": 3.923849643551523, "grad_norm": 0.05985512211918831, "learning_rate": 4.230613438948648e-06, "loss": 0.2031, "step": 48436 }, { "epoch": 3.9239306545690216, "grad_norm": 0.06858471781015396, "learning_rate": 4.226112786354021e-06, "loss": 0.249, "step": 48437 }, { "epoch": 3.92401166558652, "grad_norm": 0.07977214455604553, "learning_rate": 4.221612133759395e-06, "loss": 0.2112, "step": 48438 }, { "epoch": 3.924092676604018, "grad_norm": 0.07993137091398239, "learning_rate": 4.217111481164769e-06, "loss": 0.1969, "step": 48439 }, { "epoch": 3.9241736876215167, "grad_norm": 0.06767088174819946, "learning_rate": 4.212610828570142e-06, "loss": 0.2476, "step": 48440 }, { "epoch": 3.924254698639015, "grad_norm": 0.0661626085639, "learning_rate": 4.2081101759755166e-06, "loss": 0.2613, "step": 48441 }, { "epoch": 3.9243357096565132, "grad_norm": 0.07554125785827637, "learning_rate": 4.203609523380891e-06, "loss": 0.207, "step": 48442 }, { "epoch": 3.924416720674012, "grad_norm": 0.06287429481744766, "learning_rate": 4.199108870786264e-06, "loss": 0.2114, "step": 48443 }, { "epoch": 3.92449773169151, "grad_norm": 0.07063333690166473, "learning_rate": 4.194608218191638e-06, "loss": 0.2113, "step": 48444 }, { "epoch": 3.9245787427090084, "grad_norm": 0.074994757771492, "learning_rate": 4.190107565597012e-06, "loss": 0.2693, "step": 48445 }, { "epoch": 3.9246597537265067, "grad_norm": 0.07786433398723602, "learning_rate": 4.1856069130023854e-06, "loss": 0.2375, "step": 48446 }, { "epoch": 3.9247407647440054, "grad_norm": 0.08209618926048279, "learning_rate": 4.181106260407759e-06, "loss": 0.2616, "step": 48447 }, { "epoch": 3.9248217757615036, "grad_norm": 0.060023292899131775, "learning_rate": 4.1766056078131335e-06, "loss": 0.2174, "step": 48448 }, { "epoch": 3.924902786779002, "grad_norm": 0.08991419523954391, "learning_rate": 4.172104955218506e-06, "loss": 0.2533, "step": 48449 }, { "epoch": 3.9249837977965, "grad_norm": 0.07790505886077881, "learning_rate": 4.167604302623881e-06, "loss": 0.2452, "step": 48450 }, { "epoch": 3.9250648088139988, "grad_norm": 0.07460498064756393, "learning_rate": 4.163103650029254e-06, "loss": 0.2162, "step": 48451 }, { "epoch": 3.925145819831497, "grad_norm": 0.07503633946180344, "learning_rate": 4.158602997434628e-06, "loss": 0.1964, "step": 48452 }, { "epoch": 3.9252268308489953, "grad_norm": 0.06638554483652115, "learning_rate": 4.1541023448400015e-06, "loss": 0.1769, "step": 48453 }, { "epoch": 3.925307841866494, "grad_norm": 0.0684497132897377, "learning_rate": 4.149601692245376e-06, "loss": 0.1895, "step": 48454 }, { "epoch": 3.925388852883992, "grad_norm": 0.06736957281827927, "learning_rate": 4.1451010396507496e-06, "loss": 0.2522, "step": 48455 }, { "epoch": 3.9254698639014904, "grad_norm": 0.07692014425992966, "learning_rate": 4.140600387056123e-06, "loss": 0.2327, "step": 48456 }, { "epoch": 3.925550874918989, "grad_norm": 0.07177066057920456, "learning_rate": 4.136099734461498e-06, "loss": 0.198, "step": 48457 }, { "epoch": 3.9256318859364874, "grad_norm": 0.05650921165943146, "learning_rate": 4.131599081866871e-06, "loss": 0.2021, "step": 48458 }, { "epoch": 3.9257128969539856, "grad_norm": 0.07662155479192734, "learning_rate": 4.127098429272245e-06, "loss": 0.1941, "step": 48459 }, { "epoch": 3.9257939079714843, "grad_norm": 0.07912242412567139, "learning_rate": 4.1225977766776184e-06, "loss": 0.2419, "step": 48460 }, { "epoch": 3.9258749189889826, "grad_norm": 0.09148503839969635, "learning_rate": 4.118097124082992e-06, "loss": 0.1989, "step": 48461 }, { "epoch": 3.925955930006481, "grad_norm": 0.072085440158844, "learning_rate": 4.113596471488366e-06, "loss": 0.2192, "step": 48462 }, { "epoch": 3.9260369410239795, "grad_norm": 0.0772864818572998, "learning_rate": 4.10909581889374e-06, "loss": 0.2057, "step": 48463 }, { "epoch": 3.9261179520414777, "grad_norm": 0.07115405797958374, "learning_rate": 4.104595166299114e-06, "loss": 0.2298, "step": 48464 }, { "epoch": 3.926198963058976, "grad_norm": 0.08247821033000946, "learning_rate": 4.100094513704487e-06, "loss": 0.2068, "step": 48465 }, { "epoch": 3.9262799740764747, "grad_norm": 0.09296160936355591, "learning_rate": 4.095593861109861e-06, "loss": 0.2661, "step": 48466 }, { "epoch": 3.926360985093973, "grad_norm": 0.0736137181520462, "learning_rate": 4.091093208515235e-06, "loss": 0.2183, "step": 48467 }, { "epoch": 3.926441996111471, "grad_norm": 0.07801380753517151, "learning_rate": 4.086592555920608e-06, "loss": 0.2269, "step": 48468 }, { "epoch": 3.9265230071289694, "grad_norm": 0.07622180134057999, "learning_rate": 4.0820919033259826e-06, "loss": 0.2498, "step": 48469 }, { "epoch": 3.9266040181464676, "grad_norm": 0.08292534947395325, "learning_rate": 4.077591250731356e-06, "loss": 0.2366, "step": 48470 }, { "epoch": 3.9266850291639663, "grad_norm": 0.06659669429063797, "learning_rate": 4.07309059813673e-06, "loss": 0.2112, "step": 48471 }, { "epoch": 3.9267660401814646, "grad_norm": 0.07388713955879211, "learning_rate": 4.068589945542104e-06, "loss": 0.2077, "step": 48472 }, { "epoch": 3.926847051198963, "grad_norm": 0.09441567212343216, "learning_rate": 4.064089292947478e-06, "loss": 0.2082, "step": 48473 }, { "epoch": 3.9269280622164615, "grad_norm": 0.06913454085588455, "learning_rate": 4.059588640352851e-06, "loss": 0.2489, "step": 48474 }, { "epoch": 3.9270090732339598, "grad_norm": 0.0766616016626358, "learning_rate": 4.055087987758225e-06, "loss": 0.2211, "step": 48475 }, { "epoch": 3.927090084251458, "grad_norm": 0.07652173191308975, "learning_rate": 4.0505873351635995e-06, "loss": 0.1975, "step": 48476 }, { "epoch": 3.9271710952689567, "grad_norm": 0.06538225710391998, "learning_rate": 4.046086682568972e-06, "loss": 0.2346, "step": 48477 }, { "epoch": 3.927252106286455, "grad_norm": 0.06540494412183762, "learning_rate": 4.041586029974347e-06, "loss": 0.1988, "step": 48478 }, { "epoch": 3.927333117303953, "grad_norm": 0.07058582454919815, "learning_rate": 4.03708537737972e-06, "loss": 0.2295, "step": 48479 }, { "epoch": 3.927414128321452, "grad_norm": 0.06702663749456406, "learning_rate": 4.032584724785094e-06, "loss": 0.2176, "step": 48480 }, { "epoch": 3.92749513933895, "grad_norm": 0.07759080082178116, "learning_rate": 4.0280840721904675e-06, "loss": 0.2394, "step": 48481 }, { "epoch": 3.9275761503564484, "grad_norm": 0.07646369934082031, "learning_rate": 4.023583419595842e-06, "loss": 0.2168, "step": 48482 }, { "epoch": 3.927657161373947, "grad_norm": 0.0533691830933094, "learning_rate": 4.019082767001215e-06, "loss": 0.2078, "step": 48483 }, { "epoch": 3.9277381723914453, "grad_norm": 0.0704951360821724, "learning_rate": 4.014582114406589e-06, "loss": 0.2282, "step": 48484 }, { "epoch": 3.9278191834089435, "grad_norm": 0.07538004964590073, "learning_rate": 4.010081461811964e-06, "loss": 0.2543, "step": 48485 }, { "epoch": 3.9279001944264422, "grad_norm": 0.08612173050642014, "learning_rate": 4.005580809217336e-06, "loss": 0.2632, "step": 48486 }, { "epoch": 3.9279812054439405, "grad_norm": 0.07190410047769547, "learning_rate": 4.001080156622711e-06, "loss": 0.216, "step": 48487 }, { "epoch": 3.9280622164614387, "grad_norm": 0.07013415545225143, "learning_rate": 3.996579504028084e-06, "loss": 0.1973, "step": 48488 }, { "epoch": 3.9281432274789374, "grad_norm": 0.06593007594347, "learning_rate": 3.992078851433458e-06, "loss": 0.2084, "step": 48489 }, { "epoch": 3.9282242384964356, "grad_norm": 0.07043427973985672, "learning_rate": 3.987578198838832e-06, "loss": 0.2394, "step": 48490 }, { "epoch": 3.928305249513934, "grad_norm": 0.06437504291534424, "learning_rate": 3.983077546244206e-06, "loss": 0.171, "step": 48491 }, { "epoch": 3.928386260531432, "grad_norm": 0.07793059200048447, "learning_rate": 3.978576893649579e-06, "loss": 0.1901, "step": 48492 }, { "epoch": 3.9284672715489304, "grad_norm": 0.0729973241686821, "learning_rate": 3.974076241054953e-06, "loss": 0.1898, "step": 48493 }, { "epoch": 3.928548282566429, "grad_norm": 0.08081178367137909, "learning_rate": 3.969575588460327e-06, "loss": 0.2452, "step": 48494 }, { "epoch": 3.9286292935839273, "grad_norm": 0.08382073789834976, "learning_rate": 3.9650749358657005e-06, "loss": 0.2119, "step": 48495 }, { "epoch": 3.9287103046014256, "grad_norm": 0.07415829598903656, "learning_rate": 3.960574283271074e-06, "loss": 0.1941, "step": 48496 }, { "epoch": 3.9287913156189243, "grad_norm": 0.06375744938850403, "learning_rate": 3.9560736306764485e-06, "loss": 0.2514, "step": 48497 }, { "epoch": 3.9288723266364225, "grad_norm": 0.07081378996372223, "learning_rate": 3.951572978081822e-06, "loss": 0.2356, "step": 48498 }, { "epoch": 3.9289533376539207, "grad_norm": 0.06521176546812057, "learning_rate": 3.947072325487196e-06, "loss": 0.2177, "step": 48499 }, { "epoch": 3.9290343486714194, "grad_norm": 0.07624541968107224, "learning_rate": 3.94257167289257e-06, "loss": 0.1909, "step": 48500 }, { "epoch": 3.9291153596889177, "grad_norm": 0.08679436892271042, "learning_rate": 3.938071020297943e-06, "loss": 0.2492, "step": 48501 }, { "epoch": 3.929196370706416, "grad_norm": 0.06400018185377121, "learning_rate": 3.933570367703317e-06, "loss": 0.2234, "step": 48502 }, { "epoch": 3.9292773817239146, "grad_norm": 0.07425360381603241, "learning_rate": 3.929069715108691e-06, "loss": 0.2425, "step": 48503 }, { "epoch": 3.929358392741413, "grad_norm": 0.08528902381658554, "learning_rate": 3.924569062514065e-06, "loss": 0.2177, "step": 48504 }, { "epoch": 3.929439403758911, "grad_norm": 0.05845775082707405, "learning_rate": 3.920068409919438e-06, "loss": 0.1795, "step": 48505 }, { "epoch": 3.92952041477641, "grad_norm": 0.06745034456253052, "learning_rate": 3.915567757324813e-06, "loss": 0.2215, "step": 48506 }, { "epoch": 3.929601425793908, "grad_norm": 0.07435566931962967, "learning_rate": 3.911067104730186e-06, "loss": 0.2253, "step": 48507 }, { "epoch": 3.9296824368114063, "grad_norm": 0.08194363862276077, "learning_rate": 3.90656645213556e-06, "loss": 0.2381, "step": 48508 }, { "epoch": 3.929763447828905, "grad_norm": 0.07673655450344086, "learning_rate": 3.9020657995409335e-06, "loss": 0.2508, "step": 48509 }, { "epoch": 3.929844458846403, "grad_norm": 0.06954783946275711, "learning_rate": 3.897565146946307e-06, "loss": 0.2466, "step": 48510 }, { "epoch": 3.9299254698639015, "grad_norm": 0.05643027275800705, "learning_rate": 3.893064494351681e-06, "loss": 0.2215, "step": 48511 }, { "epoch": 3.9300064808814, "grad_norm": 0.06844652444124222, "learning_rate": 3.888563841757055e-06, "loss": 0.2043, "step": 48512 }, { "epoch": 3.9300874918988984, "grad_norm": 0.07119164615869522, "learning_rate": 3.884063189162429e-06, "loss": 0.2427, "step": 48513 }, { "epoch": 3.9301685029163966, "grad_norm": 0.07002280652523041, "learning_rate": 3.879562536567802e-06, "loss": 0.2559, "step": 48514 }, { "epoch": 3.930249513933895, "grad_norm": 0.06633292883634567, "learning_rate": 3.875061883973177e-06, "loss": 0.2434, "step": 48515 }, { "epoch": 3.930330524951393, "grad_norm": 0.06863868981599808, "learning_rate": 3.87056123137855e-06, "loss": 0.2181, "step": 48516 }, { "epoch": 3.930411535968892, "grad_norm": 0.0700906440615654, "learning_rate": 3.866060578783924e-06, "loss": 0.2224, "step": 48517 }, { "epoch": 3.93049254698639, "grad_norm": 0.07740800082683563, "learning_rate": 3.861559926189298e-06, "loss": 0.1978, "step": 48518 }, { "epoch": 3.9305735580038883, "grad_norm": 0.07479584217071533, "learning_rate": 3.857059273594671e-06, "loss": 0.2067, "step": 48519 }, { "epoch": 3.930654569021387, "grad_norm": 0.07812254130840302, "learning_rate": 3.852558621000045e-06, "loss": 0.2554, "step": 48520 }, { "epoch": 3.9307355800388852, "grad_norm": 0.08209620416164398, "learning_rate": 3.848057968405419e-06, "loss": 0.2061, "step": 48521 }, { "epoch": 3.9308165910563835, "grad_norm": 0.06698629260063171, "learning_rate": 3.843557315810793e-06, "loss": 0.2113, "step": 48522 }, { "epoch": 3.930897602073882, "grad_norm": 0.0727633461356163, "learning_rate": 3.8390566632161664e-06, "loss": 0.2138, "step": 48523 }, { "epoch": 3.9309786130913804, "grad_norm": 0.07083755731582642, "learning_rate": 3.83455601062154e-06, "loss": 0.2036, "step": 48524 }, { "epoch": 3.9310596241088787, "grad_norm": 0.07174476236104965, "learning_rate": 3.8300553580269145e-06, "loss": 0.2266, "step": 48525 }, { "epoch": 3.9311406351263773, "grad_norm": 0.07710791379213333, "learning_rate": 3.825554705432287e-06, "loss": 0.2302, "step": 48526 }, { "epoch": 3.9312216461438756, "grad_norm": 0.07124129682779312, "learning_rate": 3.821054052837662e-06, "loss": 0.2003, "step": 48527 }, { "epoch": 3.931302657161374, "grad_norm": 0.07960180193185806, "learning_rate": 3.816553400243035e-06, "loss": 0.2458, "step": 48528 }, { "epoch": 3.9313836681788725, "grad_norm": 0.08520202338695526, "learning_rate": 3.8120527476484093e-06, "loss": 0.2287, "step": 48529 }, { "epoch": 3.9314646791963708, "grad_norm": 0.06994429975748062, "learning_rate": 3.8075520950537834e-06, "loss": 0.2281, "step": 48530 }, { "epoch": 3.931545690213869, "grad_norm": 0.07213662564754486, "learning_rate": 3.8030514424591565e-06, "loss": 0.1761, "step": 48531 }, { "epoch": 3.9316267012313677, "grad_norm": 0.07031689584255219, "learning_rate": 3.7985507898645306e-06, "loss": 0.2011, "step": 48532 }, { "epoch": 3.931707712248866, "grad_norm": 0.0783347338438034, "learning_rate": 3.794050137269904e-06, "loss": 0.2522, "step": 48533 }, { "epoch": 3.931788723266364, "grad_norm": 0.07658717036247253, "learning_rate": 3.789549484675278e-06, "loss": 0.2342, "step": 48534 }, { "epoch": 3.931869734283863, "grad_norm": 0.07086281478404999, "learning_rate": 3.785048832080652e-06, "loss": 0.2134, "step": 48535 }, { "epoch": 3.931950745301361, "grad_norm": 0.06902775913476944, "learning_rate": 3.780548179486026e-06, "loss": 0.1919, "step": 48536 }, { "epoch": 3.9320317563188594, "grad_norm": 0.06554694473743439, "learning_rate": 3.776047526891399e-06, "loss": 0.2314, "step": 48537 }, { "epoch": 3.9321127673363576, "grad_norm": 0.08830972760915756, "learning_rate": 3.7715468742967735e-06, "loss": 0.2198, "step": 48538 }, { "epoch": 3.932193778353856, "grad_norm": 0.07285527884960175, "learning_rate": 3.7670462217021466e-06, "loss": 0.2034, "step": 48539 }, { "epoch": 3.9322747893713546, "grad_norm": 0.08034533262252808, "learning_rate": 3.7625455691075207e-06, "loss": 0.2121, "step": 48540 }, { "epoch": 3.932355800388853, "grad_norm": 0.07474691420793533, "learning_rate": 3.7580449165128947e-06, "loss": 0.2456, "step": 48541 }, { "epoch": 3.932436811406351, "grad_norm": 0.06668350100517273, "learning_rate": 3.7535442639182683e-06, "loss": 0.239, "step": 48542 }, { "epoch": 3.9325178224238497, "grad_norm": 0.06283923983573914, "learning_rate": 3.7490436113236423e-06, "loss": 0.1962, "step": 48543 }, { "epoch": 3.932598833441348, "grad_norm": 0.08116850256919861, "learning_rate": 3.744542958729016e-06, "loss": 0.256, "step": 48544 }, { "epoch": 3.932679844458846, "grad_norm": 0.07628383487462997, "learning_rate": 3.74004230613439e-06, "loss": 0.2212, "step": 48545 }, { "epoch": 3.932760855476345, "grad_norm": 0.07699400186538696, "learning_rate": 3.735541653539763e-06, "loss": 0.194, "step": 48546 }, { "epoch": 3.932841866493843, "grad_norm": 0.08026984333992004, "learning_rate": 3.7310410009451376e-06, "loss": 0.1822, "step": 48547 }, { "epoch": 3.9329228775113414, "grad_norm": 0.0703667625784874, "learning_rate": 3.7265403483505108e-06, "loss": 0.2552, "step": 48548 }, { "epoch": 3.93300388852884, "grad_norm": 0.06922667473554611, "learning_rate": 3.722039695755885e-06, "loss": 0.2323, "step": 48549 }, { "epoch": 3.9330848995463383, "grad_norm": 0.07332278788089752, "learning_rate": 3.7175390431612584e-06, "loss": 0.2274, "step": 48550 }, { "epoch": 3.9331659105638366, "grad_norm": 0.07613363862037659, "learning_rate": 3.7130383905666324e-06, "loss": 0.2087, "step": 48551 }, { "epoch": 3.9332469215813353, "grad_norm": 0.09742595255374908, "learning_rate": 3.708537737972006e-06, "loss": 0.2368, "step": 48552 }, { "epoch": 3.9333279325988335, "grad_norm": 0.0734672024846077, "learning_rate": 3.70403708537738e-06, "loss": 0.2262, "step": 48553 }, { "epoch": 3.9334089436163318, "grad_norm": 0.06810317933559418, "learning_rate": 3.6995364327827532e-06, "loss": 0.2113, "step": 48554 }, { "epoch": 3.9334899546338304, "grad_norm": 0.07320161908864975, "learning_rate": 3.6950357801881273e-06, "loss": 0.2495, "step": 48555 }, { "epoch": 3.9335709656513287, "grad_norm": 0.07830044627189636, "learning_rate": 3.6905351275935017e-06, "loss": 0.1991, "step": 48556 }, { "epoch": 3.933651976668827, "grad_norm": 0.06398749351501465, "learning_rate": 3.686034474998875e-06, "loss": 0.2219, "step": 48557 }, { "epoch": 3.933732987686325, "grad_norm": 0.07395961135625839, "learning_rate": 3.681533822404249e-06, "loss": 0.211, "step": 48558 }, { "epoch": 3.933813998703824, "grad_norm": 0.0828605592250824, "learning_rate": 3.6770331698096225e-06, "loss": 0.2104, "step": 48559 }, { "epoch": 3.933895009721322, "grad_norm": 0.06945844739675522, "learning_rate": 3.6725325172149965e-06, "loss": 0.2022, "step": 48560 }, { "epoch": 3.9339760207388204, "grad_norm": 0.08338592201471329, "learning_rate": 3.66803186462037e-06, "loss": 0.2406, "step": 48561 }, { "epoch": 3.9340570317563186, "grad_norm": 0.07173708081245422, "learning_rate": 3.663531212025744e-06, "loss": 0.2097, "step": 48562 }, { "epoch": 3.9341380427738173, "grad_norm": 0.06706194579601288, "learning_rate": 3.6590305594311174e-06, "loss": 0.2191, "step": 48563 }, { "epoch": 3.9342190537913155, "grad_norm": 0.06909432262182236, "learning_rate": 3.654529906836492e-06, "loss": 0.226, "step": 48564 }, { "epoch": 3.934300064808814, "grad_norm": 0.07070121169090271, "learning_rate": 3.650029254241865e-06, "loss": 0.2187, "step": 48565 }, { "epoch": 3.9343810758263125, "grad_norm": 0.07241905480623245, "learning_rate": 3.645528601647239e-06, "loss": 0.2526, "step": 48566 }, { "epoch": 3.9344620868438107, "grad_norm": 0.059708014130592346, "learning_rate": 3.6410279490526126e-06, "loss": 0.2184, "step": 48567 }, { "epoch": 3.934543097861309, "grad_norm": 0.08609946072101593, "learning_rate": 3.6365272964579866e-06, "loss": 0.2065, "step": 48568 }, { "epoch": 3.9346241088788076, "grad_norm": 0.07386572659015656, "learning_rate": 3.63202664386336e-06, "loss": 0.2091, "step": 48569 }, { "epoch": 3.934705119896306, "grad_norm": 0.06636470556259155, "learning_rate": 3.6275259912687343e-06, "loss": 0.2014, "step": 48570 }, { "epoch": 3.934786130913804, "grad_norm": 0.07500702887773514, "learning_rate": 3.6230253386741083e-06, "loss": 0.2051, "step": 48571 }, { "epoch": 3.934867141931303, "grad_norm": 0.06589839607477188, "learning_rate": 3.6185246860794815e-06, "loss": 0.2176, "step": 48572 }, { "epoch": 3.934948152948801, "grad_norm": 0.06833402067422867, "learning_rate": 3.614024033484856e-06, "loss": 0.2436, "step": 48573 }, { "epoch": 3.9350291639662993, "grad_norm": 0.07177960872650146, "learning_rate": 3.609523380890229e-06, "loss": 0.263, "step": 48574 }, { "epoch": 3.935110174983798, "grad_norm": 0.06268391013145447, "learning_rate": 3.605022728295603e-06, "loss": 0.2019, "step": 48575 }, { "epoch": 3.9351911860012962, "grad_norm": 0.0845642238855362, "learning_rate": 3.6005220757009767e-06, "loss": 0.2184, "step": 48576 }, { "epoch": 3.9352721970187945, "grad_norm": 0.05564136430621147, "learning_rate": 3.5960214231063508e-06, "loss": 0.2024, "step": 48577 }, { "epoch": 3.935353208036293, "grad_norm": 0.06291015446186066, "learning_rate": 3.5915207705117244e-06, "loss": 0.2304, "step": 48578 }, { "epoch": 3.9354342190537914, "grad_norm": 0.07351517677307129, "learning_rate": 3.5870201179170984e-06, "loss": 0.234, "step": 48579 }, { "epoch": 3.9355152300712897, "grad_norm": 0.07256698608398438, "learning_rate": 3.5825194653224716e-06, "loss": 0.2334, "step": 48580 }, { "epoch": 3.935596241088788, "grad_norm": 0.08631699532270432, "learning_rate": 3.5780188127278456e-06, "loss": 0.2292, "step": 48581 }, { "epoch": 3.9356772521062866, "grad_norm": 0.06245209649205208, "learning_rate": 3.573518160133219e-06, "loss": 0.2052, "step": 48582 }, { "epoch": 3.935758263123785, "grad_norm": 0.06966308504343033, "learning_rate": 3.5690175075385932e-06, "loss": 0.2312, "step": 48583 }, { "epoch": 3.935839274141283, "grad_norm": 0.057079412043094635, "learning_rate": 3.564516854943967e-06, "loss": 0.2015, "step": 48584 }, { "epoch": 3.9359202851587813, "grad_norm": 0.0594225637614727, "learning_rate": 3.560016202349341e-06, "loss": 0.2156, "step": 48585 }, { "epoch": 3.93600129617628, "grad_norm": 0.06722415238618851, "learning_rate": 3.555515549754715e-06, "loss": 0.1988, "step": 48586 }, { "epoch": 3.9360823071937783, "grad_norm": 0.06993760913610458, "learning_rate": 3.5510148971600885e-06, "loss": 0.2112, "step": 48587 }, { "epoch": 3.9361633182112765, "grad_norm": 0.08021251857280731, "learning_rate": 3.5465142445654625e-06, "loss": 0.2323, "step": 48588 }, { "epoch": 3.936244329228775, "grad_norm": 0.07788848876953125, "learning_rate": 3.5420135919708357e-06, "loss": 0.2113, "step": 48589 }, { "epoch": 3.9363253402462735, "grad_norm": 0.06252850592136383, "learning_rate": 3.5375129393762097e-06, "loss": 0.214, "step": 48590 }, { "epoch": 3.9364063512637717, "grad_norm": 0.0875447690486908, "learning_rate": 3.5330122867815833e-06, "loss": 0.2773, "step": 48591 }, { "epoch": 3.9364873622812704, "grad_norm": 0.067319855093956, "learning_rate": 3.5285116341869574e-06, "loss": 0.2524, "step": 48592 }, { "epoch": 3.9365683732987686, "grad_norm": 0.06517969071865082, "learning_rate": 3.524010981592331e-06, "loss": 0.2369, "step": 48593 }, { "epoch": 3.936649384316267, "grad_norm": 0.06585026532411575, "learning_rate": 3.519510328997705e-06, "loss": 0.2006, "step": 48594 }, { "epoch": 3.9367303953337656, "grad_norm": 0.0712442547082901, "learning_rate": 3.515009676403078e-06, "loss": 0.1914, "step": 48595 }, { "epoch": 3.936811406351264, "grad_norm": 0.07155847549438477, "learning_rate": 3.5105090238084526e-06, "loss": 0.2596, "step": 48596 }, { "epoch": 3.936892417368762, "grad_norm": 0.0668448656797409, "learning_rate": 3.506008371213826e-06, "loss": 0.2167, "step": 48597 }, { "epoch": 3.9369734283862607, "grad_norm": 0.06561889499425888, "learning_rate": 3.5015077186192e-06, "loss": 0.1859, "step": 48598 }, { "epoch": 3.937054439403759, "grad_norm": 0.05717025324702263, "learning_rate": 3.4970070660245743e-06, "loss": 0.2039, "step": 48599 }, { "epoch": 3.9371354504212572, "grad_norm": 0.0749245211482048, "learning_rate": 3.4925064134299475e-06, "loss": 0.2545, "step": 48600 }, { "epoch": 3.937216461438756, "grad_norm": 0.0730648785829544, "learning_rate": 3.4880057608353215e-06, "loss": 0.2204, "step": 48601 }, { "epoch": 3.937297472456254, "grad_norm": 0.06548863649368286, "learning_rate": 3.483505108240695e-06, "loss": 0.2004, "step": 48602 }, { "epoch": 3.9373784834737524, "grad_norm": 0.06222781166434288, "learning_rate": 3.479004455646069e-06, "loss": 0.1914, "step": 48603 }, { "epoch": 3.9374594944912507, "grad_norm": 0.061245959252119064, "learning_rate": 3.4745038030514423e-06, "loss": 0.2335, "step": 48604 }, { "epoch": 3.9375405055087493, "grad_norm": 0.09785652905702591, "learning_rate": 3.4700031504568167e-06, "loss": 0.2717, "step": 48605 }, { "epoch": 3.9376215165262476, "grad_norm": 0.07860812544822693, "learning_rate": 3.46550249786219e-06, "loss": 0.2264, "step": 48606 }, { "epoch": 3.937702527543746, "grad_norm": 0.08016818761825562, "learning_rate": 3.461001845267564e-06, "loss": 0.2651, "step": 48607 }, { "epoch": 3.937783538561244, "grad_norm": 0.07426834851503372, "learning_rate": 3.4565011926729376e-06, "loss": 0.2745, "step": 48608 }, { "epoch": 3.9378645495787428, "grad_norm": 0.0777416005730629, "learning_rate": 3.4520005400783116e-06, "loss": 0.2511, "step": 48609 }, { "epoch": 3.937945560596241, "grad_norm": 0.08751657605171204, "learning_rate": 3.447499887483685e-06, "loss": 0.2446, "step": 48610 }, { "epoch": 3.9380265716137393, "grad_norm": 0.08031098544597626, "learning_rate": 3.442999234889059e-06, "loss": 0.2586, "step": 48611 }, { "epoch": 3.938107582631238, "grad_norm": 0.0816061943769455, "learning_rate": 3.4384985822944324e-06, "loss": 0.2311, "step": 48612 }, { "epoch": 3.938188593648736, "grad_norm": 0.07019905745983124, "learning_rate": 3.433997929699807e-06, "loss": 0.2114, "step": 48613 }, { "epoch": 3.9382696046662344, "grad_norm": 0.061768822371959686, "learning_rate": 3.429497277105181e-06, "loss": 0.2184, "step": 48614 }, { "epoch": 3.938350615683733, "grad_norm": 0.07346883416175842, "learning_rate": 3.424996624510554e-06, "loss": 0.2302, "step": 48615 }, { "epoch": 3.9384316267012314, "grad_norm": 0.078464575111866, "learning_rate": 3.420495971915928e-06, "loss": 0.1974, "step": 48616 }, { "epoch": 3.9385126377187296, "grad_norm": 0.08362342417240143, "learning_rate": 3.4159953193213017e-06, "loss": 0.2104, "step": 48617 }, { "epoch": 3.9385936487362283, "grad_norm": 0.07425287365913391, "learning_rate": 3.4114946667266757e-06, "loss": 0.1987, "step": 48618 }, { "epoch": 3.9386746597537265, "grad_norm": 0.05362372472882271, "learning_rate": 3.4069940141320493e-06, "loss": 0.1655, "step": 48619 }, { "epoch": 3.938755670771225, "grad_norm": 0.0676586851477623, "learning_rate": 3.4024933615374233e-06, "loss": 0.1871, "step": 48620 }, { "epoch": 3.9388366817887235, "grad_norm": 0.06317394226789474, "learning_rate": 3.3979927089427965e-06, "loss": 0.2735, "step": 48621 }, { "epoch": 3.9389176928062217, "grad_norm": 0.06809334456920624, "learning_rate": 3.393492056348171e-06, "loss": 0.2171, "step": 48622 }, { "epoch": 3.93899870382372, "grad_norm": 0.0660707876086235, "learning_rate": 3.388991403753544e-06, "loss": 0.2299, "step": 48623 }, { "epoch": 3.9390797148412187, "grad_norm": 0.08089926093816757, "learning_rate": 3.384490751158918e-06, "loss": 0.2247, "step": 48624 }, { "epoch": 3.939160725858717, "grad_norm": 0.06330505758523941, "learning_rate": 3.3799900985642918e-06, "loss": 0.2135, "step": 48625 }, { "epoch": 3.939241736876215, "grad_norm": 0.06716379523277283, "learning_rate": 3.375489445969666e-06, "loss": 0.232, "step": 48626 }, { "epoch": 3.9393227478937134, "grad_norm": 0.06354763358831406, "learning_rate": 3.3709887933750394e-06, "loss": 0.2079, "step": 48627 }, { "epoch": 3.939403758911212, "grad_norm": 0.07505752146244049, "learning_rate": 3.3664881407804134e-06, "loss": 0.2218, "step": 48628 }, { "epoch": 3.9394847699287103, "grad_norm": 0.08010073751211166, "learning_rate": 3.3619874881857875e-06, "loss": 0.2033, "step": 48629 }, { "epoch": 3.9395657809462086, "grad_norm": 0.07689400762319565, "learning_rate": 3.3574868355911606e-06, "loss": 0.2303, "step": 48630 }, { "epoch": 3.939646791963707, "grad_norm": 0.0748937800526619, "learning_rate": 3.352986182996535e-06, "loss": 0.2066, "step": 48631 }, { "epoch": 3.9397278029812055, "grad_norm": 0.08342572301626205, "learning_rate": 3.3484855304019083e-06, "loss": 0.2457, "step": 48632 }, { "epoch": 3.9398088139987038, "grad_norm": 0.08893360197544098, "learning_rate": 3.3439848778072823e-06, "loss": 0.2338, "step": 48633 }, { "epoch": 3.939889825016202, "grad_norm": 0.06578105688095093, "learning_rate": 3.339484225212656e-06, "loss": 0.2092, "step": 48634 }, { "epoch": 3.9399708360337007, "grad_norm": 0.06503915041685104, "learning_rate": 3.33498357261803e-06, "loss": 0.2476, "step": 48635 }, { "epoch": 3.940051847051199, "grad_norm": 0.0719245970249176, "learning_rate": 3.3304829200234035e-06, "loss": 0.2677, "step": 48636 }, { "epoch": 3.940132858068697, "grad_norm": 0.06504833698272705, "learning_rate": 3.3259822674287776e-06, "loss": 0.2252, "step": 48637 }, { "epoch": 3.940213869086196, "grad_norm": 0.08234488219022751, "learning_rate": 3.3214816148341507e-06, "loss": 0.2319, "step": 48638 }, { "epoch": 3.940294880103694, "grad_norm": 0.06820017844438553, "learning_rate": 3.3169809622395248e-06, "loss": 0.1929, "step": 48639 }, { "epoch": 3.9403758911211924, "grad_norm": 0.05836334079504013, "learning_rate": 3.3124803096448984e-06, "loss": 0.2172, "step": 48640 }, { "epoch": 3.940456902138691, "grad_norm": 0.06549455970525742, "learning_rate": 3.3079796570502724e-06, "loss": 0.2515, "step": 48641 }, { "epoch": 3.9405379131561893, "grad_norm": 0.07402468472719193, "learning_rate": 3.3034790044556464e-06, "loss": 0.2103, "step": 48642 }, { "epoch": 3.9406189241736875, "grad_norm": 0.06810709089040756, "learning_rate": 3.29897835186102e-06, "loss": 0.2017, "step": 48643 }, { "epoch": 3.940699935191186, "grad_norm": 0.06563235074281693, "learning_rate": 3.294477699266394e-06, "loss": 0.2042, "step": 48644 }, { "epoch": 3.9407809462086845, "grad_norm": 0.08226774632930756, "learning_rate": 3.2899770466717677e-06, "loss": 0.2507, "step": 48645 }, { "epoch": 3.9408619572261827, "grad_norm": 0.06738846004009247, "learning_rate": 3.2854763940771417e-06, "loss": 0.2144, "step": 48646 }, { "epoch": 3.9409429682436814, "grad_norm": 0.06427872180938721, "learning_rate": 3.280975741482515e-06, "loss": 0.2197, "step": 48647 }, { "epoch": 3.9410239792611796, "grad_norm": 0.07773533463478088, "learning_rate": 3.276475088887889e-06, "loss": 0.2123, "step": 48648 }, { "epoch": 3.941104990278678, "grad_norm": 0.08229377865791321, "learning_rate": 3.2719744362932625e-06, "loss": 0.2247, "step": 48649 }, { "epoch": 3.941186001296176, "grad_norm": 0.0808233693242073, "learning_rate": 3.2674737836986365e-06, "loss": 0.2419, "step": 48650 }, { "epoch": 3.941267012313675, "grad_norm": 0.08312023431062698, "learning_rate": 3.26297313110401e-06, "loss": 0.2376, "step": 48651 }, { "epoch": 3.941348023331173, "grad_norm": 0.0641668364405632, "learning_rate": 3.258472478509384e-06, "loss": 0.2425, "step": 48652 }, { "epoch": 3.9414290343486713, "grad_norm": 0.06582210958003998, "learning_rate": 3.2539718259147573e-06, "loss": 0.2453, "step": 48653 }, { "epoch": 3.9415100453661696, "grad_norm": 0.07841324061155319, "learning_rate": 3.2494711733201318e-06, "loss": 0.2307, "step": 48654 }, { "epoch": 3.9415910563836682, "grad_norm": 0.0673704668879509, "learning_rate": 3.244970520725505e-06, "loss": 0.2163, "step": 48655 }, { "epoch": 3.9416720674011665, "grad_norm": 0.0752340778708458, "learning_rate": 3.240469868130879e-06, "loss": 0.2501, "step": 48656 }, { "epoch": 3.9417530784186647, "grad_norm": 0.07765194028615952, "learning_rate": 3.2359692155362534e-06, "loss": 0.2192, "step": 48657 }, { "epoch": 3.9418340894361634, "grad_norm": 0.06525509059429169, "learning_rate": 3.2314685629416266e-06, "loss": 0.223, "step": 48658 }, { "epoch": 3.9419151004536617, "grad_norm": 0.06445495784282684, "learning_rate": 3.2269679103470006e-06, "loss": 0.2472, "step": 48659 }, { "epoch": 3.94199611147116, "grad_norm": 0.06818129867315292, "learning_rate": 3.2224672577523742e-06, "loss": 0.2138, "step": 48660 }, { "epoch": 3.9420771224886586, "grad_norm": 0.07735411822795868, "learning_rate": 3.2179666051577483e-06, "loss": 0.2252, "step": 48661 }, { "epoch": 3.942158133506157, "grad_norm": 0.07455040514469147, "learning_rate": 3.213465952563122e-06, "loss": 0.2383, "step": 48662 }, { "epoch": 3.942239144523655, "grad_norm": 0.07096820324659348, "learning_rate": 3.208965299968496e-06, "loss": 0.2368, "step": 48663 }, { "epoch": 3.942320155541154, "grad_norm": 0.06698424369096756, "learning_rate": 3.204464647373869e-06, "loss": 0.2426, "step": 48664 }, { "epoch": 3.942401166558652, "grad_norm": 0.05881981924176216, "learning_rate": 3.199963994779243e-06, "loss": 0.1789, "step": 48665 }, { "epoch": 3.9424821775761503, "grad_norm": 0.07194176316261292, "learning_rate": 3.1954633421846167e-06, "loss": 0.2194, "step": 48666 }, { "epoch": 3.942563188593649, "grad_norm": 0.06709557771682739, "learning_rate": 3.1909626895899907e-06, "loss": 0.2023, "step": 48667 }, { "epoch": 3.942644199611147, "grad_norm": 0.08858643472194672, "learning_rate": 3.1864620369953643e-06, "loss": 0.2341, "step": 48668 }, { "epoch": 3.9427252106286454, "grad_norm": 0.08386151492595673, "learning_rate": 3.1819613844007384e-06, "loss": 0.2279, "step": 48669 }, { "epoch": 3.942806221646144, "grad_norm": 0.07175328582525253, "learning_rate": 3.1774607318061115e-06, "loss": 0.2233, "step": 48670 }, { "epoch": 3.9428872326636424, "grad_norm": 0.0729939341545105, "learning_rate": 3.172960079211486e-06, "loss": 0.2228, "step": 48671 }, { "epoch": 3.9429682436811406, "grad_norm": 0.07817017287015915, "learning_rate": 3.16845942661686e-06, "loss": 0.23, "step": 48672 }, { "epoch": 3.943049254698639, "grad_norm": 0.06868911534547806, "learning_rate": 3.163958774022233e-06, "loss": 0.2288, "step": 48673 }, { "epoch": 3.943130265716137, "grad_norm": 0.07988391816616058, "learning_rate": 3.1594581214276072e-06, "loss": 0.2186, "step": 48674 }, { "epoch": 3.943211276733636, "grad_norm": 0.062225591391325, "learning_rate": 3.154957468832981e-06, "loss": 0.2169, "step": 48675 }, { "epoch": 3.943292287751134, "grad_norm": 0.07657849788665771, "learning_rate": 3.150456816238355e-06, "loss": 0.2065, "step": 48676 }, { "epoch": 3.9433732987686323, "grad_norm": 0.09984945505857468, "learning_rate": 3.1459561636437285e-06, "loss": 0.2521, "step": 48677 }, { "epoch": 3.943454309786131, "grad_norm": 0.08694443851709366, "learning_rate": 3.1414555110491025e-06, "loss": 0.2002, "step": 48678 }, { "epoch": 3.9435353208036292, "grad_norm": 0.06631725281476974, "learning_rate": 3.1369548584544757e-06, "loss": 0.1917, "step": 48679 }, { "epoch": 3.9436163318211275, "grad_norm": 0.06001517176628113, "learning_rate": 3.13245420585985e-06, "loss": 0.203, "step": 48680 }, { "epoch": 3.943697342838626, "grad_norm": 0.060536958277225494, "learning_rate": 3.1279535532652233e-06, "loss": 0.2354, "step": 48681 }, { "epoch": 3.9437783538561244, "grad_norm": 0.07151622325181961, "learning_rate": 3.1234529006705973e-06, "loss": 0.2221, "step": 48682 }, { "epoch": 3.9438593648736227, "grad_norm": 0.07806843519210815, "learning_rate": 3.1189522480759714e-06, "loss": 0.1761, "step": 48683 }, { "epoch": 3.9439403758911213, "grad_norm": 0.061488077044487, "learning_rate": 3.114451595481345e-06, "loss": 0.1776, "step": 48684 }, { "epoch": 3.9440213869086196, "grad_norm": 0.08169107884168625, "learning_rate": 3.1099509428867186e-06, "loss": 0.2266, "step": 48685 }, { "epoch": 3.944102397926118, "grad_norm": 0.0697530210018158, "learning_rate": 3.1054502902920926e-06, "loss": 0.1994, "step": 48686 }, { "epoch": 3.9441834089436165, "grad_norm": 0.06424905359745026, "learning_rate": 3.100949637697466e-06, "loss": 0.2624, "step": 48687 }, { "epoch": 3.9442644199611148, "grad_norm": 0.0686437338590622, "learning_rate": 3.09644898510284e-06, "loss": 0.199, "step": 48688 }, { "epoch": 3.944345430978613, "grad_norm": 0.07491966336965561, "learning_rate": 3.091948332508214e-06, "loss": 0.2548, "step": 48689 }, { "epoch": 3.9444264419961117, "grad_norm": 0.08908192068338394, "learning_rate": 3.0874476799135874e-06, "loss": 0.2572, "step": 48690 }, { "epoch": 3.94450745301361, "grad_norm": 0.07038780301809311, "learning_rate": 3.0829470273189614e-06, "loss": 0.2381, "step": 48691 }, { "epoch": 3.944588464031108, "grad_norm": 0.06175041198730469, "learning_rate": 3.078446374724335e-06, "loss": 0.251, "step": 48692 }, { "epoch": 3.944669475048607, "grad_norm": 0.07298500090837479, "learning_rate": 3.073945722129709e-06, "loss": 0.2434, "step": 48693 }, { "epoch": 3.944750486066105, "grad_norm": 0.07891558855772018, "learning_rate": 3.0694450695350827e-06, "loss": 0.246, "step": 48694 }, { "epoch": 3.9448314970836034, "grad_norm": 0.07210226356983185, "learning_rate": 3.0649444169404567e-06, "loss": 0.2258, "step": 48695 }, { "epoch": 3.9449125081011016, "grad_norm": 0.07565312087535858, "learning_rate": 3.0604437643458303e-06, "loss": 0.2653, "step": 48696 }, { "epoch": 3.9449935191186, "grad_norm": 0.06832912564277649, "learning_rate": 3.055943111751204e-06, "loss": 0.1948, "step": 48697 }, { "epoch": 3.9450745301360985, "grad_norm": 0.07523760199546814, "learning_rate": 3.051442459156578e-06, "loss": 0.2343, "step": 48698 }, { "epoch": 3.945155541153597, "grad_norm": 0.0894690528512001, "learning_rate": 3.0469418065619515e-06, "loss": 0.2323, "step": 48699 }, { "epoch": 3.945236552171095, "grad_norm": 0.06247549131512642, "learning_rate": 3.0424411539673256e-06, "loss": 0.2019, "step": 48700 }, { "epoch": 3.9453175631885937, "grad_norm": 0.06587101519107819, "learning_rate": 3.037940501372699e-06, "loss": 0.2405, "step": 48701 }, { "epoch": 3.945398574206092, "grad_norm": 0.07532519102096558, "learning_rate": 3.0334398487780728e-06, "loss": 0.1945, "step": 48702 }, { "epoch": 3.94547958522359, "grad_norm": 0.06462553888559341, "learning_rate": 3.028939196183447e-06, "loss": 0.2456, "step": 48703 }, { "epoch": 3.945560596241089, "grad_norm": 0.05730045959353447, "learning_rate": 3.0244385435888204e-06, "loss": 0.1876, "step": 48704 }, { "epoch": 3.945641607258587, "grad_norm": 0.08065466582775116, "learning_rate": 3.019937890994194e-06, "loss": 0.2045, "step": 48705 }, { "epoch": 3.9457226182760854, "grad_norm": 0.08913881331682205, "learning_rate": 3.015437238399568e-06, "loss": 0.2335, "step": 48706 }, { "epoch": 3.945803629293584, "grad_norm": 0.0686987042427063, "learning_rate": 3.010936585804942e-06, "loss": 0.2315, "step": 48707 }, { "epoch": 3.9458846403110823, "grad_norm": 0.07149714231491089, "learning_rate": 3.0064359332103157e-06, "loss": 0.1983, "step": 48708 }, { "epoch": 3.9459656513285806, "grad_norm": 0.09264829754829407, "learning_rate": 3.0019352806156897e-06, "loss": 0.2337, "step": 48709 }, { "epoch": 3.9460466623460793, "grad_norm": 0.06271969527006149, "learning_rate": 2.9974346280210633e-06, "loss": 0.2145, "step": 48710 }, { "epoch": 3.9461276733635775, "grad_norm": 0.06298379600048065, "learning_rate": 2.992933975426437e-06, "loss": 0.1905, "step": 48711 }, { "epoch": 3.9462086843810757, "grad_norm": 0.09435836970806122, "learning_rate": 2.988433322831811e-06, "loss": 0.2418, "step": 48712 }, { "epoch": 3.9462896953985744, "grad_norm": 0.06588099151849747, "learning_rate": 2.9839326702371845e-06, "loss": 0.2022, "step": 48713 }, { "epoch": 3.9463707064160727, "grad_norm": 0.06596425175666809, "learning_rate": 2.979432017642558e-06, "loss": 0.1856, "step": 48714 }, { "epoch": 3.946451717433571, "grad_norm": 0.06752095371484756, "learning_rate": 2.974931365047932e-06, "loss": 0.2004, "step": 48715 }, { "epoch": 3.9465327284510696, "grad_norm": 0.07633611559867859, "learning_rate": 2.9704307124533058e-06, "loss": 0.2349, "step": 48716 }, { "epoch": 3.946613739468568, "grad_norm": 0.08213532716035843, "learning_rate": 2.9659300598586794e-06, "loss": 0.2315, "step": 48717 }, { "epoch": 3.946694750486066, "grad_norm": 0.06958873569965363, "learning_rate": 2.9614294072640534e-06, "loss": 0.2222, "step": 48718 }, { "epoch": 3.9467757615035644, "grad_norm": 0.0933084487915039, "learning_rate": 2.956928754669427e-06, "loss": 0.2254, "step": 48719 }, { "epoch": 3.9468567725210626, "grad_norm": 0.07122278958559036, "learning_rate": 2.952428102074801e-06, "loss": 0.2559, "step": 48720 }, { "epoch": 3.9469377835385613, "grad_norm": 0.08493492752313614, "learning_rate": 2.947927449480175e-06, "loss": 0.2415, "step": 48721 }, { "epoch": 3.9470187945560595, "grad_norm": 0.07494331151247025, "learning_rate": 2.9434267968855487e-06, "loss": 0.2334, "step": 48722 }, { "epoch": 3.9470998055735578, "grad_norm": 0.07146378606557846, "learning_rate": 2.9389261442909223e-06, "loss": 0.217, "step": 48723 }, { "epoch": 3.9471808165910565, "grad_norm": 0.05992691218852997, "learning_rate": 2.9344254916962963e-06, "loss": 0.2004, "step": 48724 }, { "epoch": 3.9472618276085547, "grad_norm": 0.07522998005151749, "learning_rate": 2.92992483910167e-06, "loss": 0.1857, "step": 48725 }, { "epoch": 3.947342838626053, "grad_norm": 0.07378930598497391, "learning_rate": 2.925424186507044e-06, "loss": 0.1981, "step": 48726 }, { "epoch": 3.9474238496435516, "grad_norm": 0.08331488817930222, "learning_rate": 2.9209235339124175e-06, "loss": 0.2355, "step": 48727 }, { "epoch": 3.94750486066105, "grad_norm": 0.08735651522874832, "learning_rate": 2.916422881317791e-06, "loss": 0.2182, "step": 48728 }, { "epoch": 3.947585871678548, "grad_norm": 0.06788183748722076, "learning_rate": 2.911922228723165e-06, "loss": 0.2102, "step": 48729 }, { "epoch": 3.947666882696047, "grad_norm": 0.0716099888086319, "learning_rate": 2.9074215761285388e-06, "loss": 0.2277, "step": 48730 }, { "epoch": 3.947747893713545, "grad_norm": 0.08619321882724762, "learning_rate": 2.9029209235339124e-06, "loss": 0.2234, "step": 48731 }, { "epoch": 3.9478289047310433, "grad_norm": 0.0653134360909462, "learning_rate": 2.8984202709392864e-06, "loss": 0.2295, "step": 48732 }, { "epoch": 3.947909915748542, "grad_norm": 0.08110470324754715, "learning_rate": 2.89391961834466e-06, "loss": 0.2422, "step": 48733 }, { "epoch": 3.9479909267660402, "grad_norm": 0.08325320482254028, "learning_rate": 2.8894189657500336e-06, "loss": 0.22, "step": 48734 }, { "epoch": 3.9480719377835385, "grad_norm": 0.07577069103717804, "learning_rate": 2.8849183131554076e-06, "loss": 0.1979, "step": 48735 }, { "epoch": 3.948152948801037, "grad_norm": 0.07868307828903198, "learning_rate": 2.8804176605607816e-06, "loss": 0.241, "step": 48736 }, { "epoch": 3.9482339598185354, "grad_norm": 0.07431667298078537, "learning_rate": 2.8759170079661552e-06, "loss": 0.2376, "step": 48737 }, { "epoch": 3.9483149708360337, "grad_norm": 0.08248012512922287, "learning_rate": 2.8714163553715293e-06, "loss": 0.2488, "step": 48738 }, { "epoch": 3.9483959818535324, "grad_norm": 0.07398643344640732, "learning_rate": 2.866915702776903e-06, "loss": 0.1939, "step": 48739 }, { "epoch": 3.9484769928710306, "grad_norm": 0.06579329073429108, "learning_rate": 2.8624150501822765e-06, "loss": 0.2379, "step": 48740 }, { "epoch": 3.948558003888529, "grad_norm": 0.06077032908797264, "learning_rate": 2.8579143975876505e-06, "loss": 0.2155, "step": 48741 }, { "epoch": 3.948639014906027, "grad_norm": 0.07503858208656311, "learning_rate": 2.853413744993024e-06, "loss": 0.1803, "step": 48742 }, { "epoch": 3.9487200259235253, "grad_norm": 0.08480262011289597, "learning_rate": 2.8489130923983977e-06, "loss": 0.2527, "step": 48743 }, { "epoch": 3.948801036941024, "grad_norm": 0.062442317605018616, "learning_rate": 2.8444124398037717e-06, "loss": 0.2152, "step": 48744 }, { "epoch": 3.9488820479585223, "grad_norm": 0.07862494140863419, "learning_rate": 2.8399117872091453e-06, "loss": 0.2158, "step": 48745 }, { "epoch": 3.9489630589760205, "grad_norm": 0.07681937515735626, "learning_rate": 2.835411134614519e-06, "loss": 0.2325, "step": 48746 }, { "epoch": 3.949044069993519, "grad_norm": 0.0824529230594635, "learning_rate": 2.830910482019893e-06, "loss": 0.2331, "step": 48747 }, { "epoch": 3.9491250810110174, "grad_norm": 0.06473097205162048, "learning_rate": 2.8264098294252666e-06, "loss": 0.2245, "step": 48748 }, { "epoch": 3.9492060920285157, "grad_norm": 0.08356422930955887, "learning_rate": 2.8219091768306406e-06, "loss": 0.2531, "step": 48749 }, { "epoch": 3.9492871030460144, "grad_norm": 0.0842425748705864, "learning_rate": 2.8174085242360146e-06, "loss": 0.2154, "step": 48750 }, { "epoch": 3.9493681140635126, "grad_norm": 0.05836378410458565, "learning_rate": 2.8129078716413882e-06, "loss": 0.2178, "step": 48751 }, { "epoch": 3.949449125081011, "grad_norm": 0.060801442712545395, "learning_rate": 2.808407219046762e-06, "loss": 0.2085, "step": 48752 }, { "epoch": 3.9495301360985096, "grad_norm": 0.06836865842342377, "learning_rate": 2.803906566452136e-06, "loss": 0.2641, "step": 48753 }, { "epoch": 3.949611147116008, "grad_norm": 0.06536520272493362, "learning_rate": 2.7994059138575095e-06, "loss": 0.228, "step": 48754 }, { "epoch": 3.949692158133506, "grad_norm": 0.0712912380695343, "learning_rate": 2.7949052612628835e-06, "loss": 0.2276, "step": 48755 }, { "epoch": 3.9497731691510047, "grad_norm": 0.060562681406736374, "learning_rate": 2.790404608668257e-06, "loss": 0.1713, "step": 48756 }, { "epoch": 3.949854180168503, "grad_norm": 0.07603670656681061, "learning_rate": 2.7859039560736307e-06, "loss": 0.2296, "step": 48757 }, { "epoch": 3.9499351911860012, "grad_norm": 0.08450186252593994, "learning_rate": 2.7814033034790047e-06, "loss": 0.2233, "step": 48758 }, { "epoch": 3.9500162022035, "grad_norm": 0.07135901600122452, "learning_rate": 2.7769026508843783e-06, "loss": 0.2052, "step": 48759 }, { "epoch": 3.950097213220998, "grad_norm": 0.07295495271682739, "learning_rate": 2.772401998289752e-06, "loss": 0.2177, "step": 48760 }, { "epoch": 3.9501782242384964, "grad_norm": 0.07470090687274933, "learning_rate": 2.767901345695126e-06, "loss": 0.2413, "step": 48761 }, { "epoch": 3.9502592352559946, "grad_norm": 0.08046706765890121, "learning_rate": 2.7634006931004996e-06, "loss": 0.2657, "step": 48762 }, { "epoch": 3.9503402462734933, "grad_norm": 0.06358887255191803, "learning_rate": 2.758900040505873e-06, "loss": 0.2253, "step": 48763 }, { "epoch": 3.9504212572909916, "grad_norm": 0.0710747092962265, "learning_rate": 2.754399387911247e-06, "loss": 0.2431, "step": 48764 }, { "epoch": 3.95050226830849, "grad_norm": 0.08081281930208206, "learning_rate": 2.7498987353166212e-06, "loss": 0.2467, "step": 48765 }, { "epoch": 3.950583279325988, "grad_norm": 0.07632328569889069, "learning_rate": 2.745398082721995e-06, "loss": 0.2273, "step": 48766 }, { "epoch": 3.9506642903434868, "grad_norm": 0.09434718638658524, "learning_rate": 2.740897430127369e-06, "loss": 0.2381, "step": 48767 }, { "epoch": 3.950745301360985, "grad_norm": 0.0825645849108696, "learning_rate": 2.7363967775327425e-06, "loss": 0.2702, "step": 48768 }, { "epoch": 3.9508263123784833, "grad_norm": 0.07329137623310089, "learning_rate": 2.731896124938116e-06, "loss": 0.228, "step": 48769 }, { "epoch": 3.950907323395982, "grad_norm": 0.06830742955207825, "learning_rate": 2.72739547234349e-06, "loss": 0.2123, "step": 48770 }, { "epoch": 3.95098833441348, "grad_norm": 0.06959126144647598, "learning_rate": 2.7228948197488637e-06, "loss": 0.2137, "step": 48771 }, { "epoch": 3.9510693454309784, "grad_norm": 0.07482346892356873, "learning_rate": 2.7183941671542373e-06, "loss": 0.2218, "step": 48772 }, { "epoch": 3.951150356448477, "grad_norm": 0.0668899193406105, "learning_rate": 2.7138935145596113e-06, "loss": 0.2187, "step": 48773 }, { "epoch": 3.9512313674659754, "grad_norm": 0.061617180705070496, "learning_rate": 2.709392861964985e-06, "loss": 0.2002, "step": 48774 }, { "epoch": 3.9513123784834736, "grad_norm": 0.08560971915721893, "learning_rate": 2.704892209370359e-06, "loss": 0.2289, "step": 48775 }, { "epoch": 3.9513933895009723, "grad_norm": 0.08538944274187088, "learning_rate": 2.7003915567757326e-06, "loss": 0.2263, "step": 48776 }, { "epoch": 3.9514744005184705, "grad_norm": 0.07387255877256393, "learning_rate": 2.695890904181106e-06, "loss": 0.2137, "step": 48777 }, { "epoch": 3.951555411535969, "grad_norm": 0.07042668759822845, "learning_rate": 2.69139025158648e-06, "loss": 0.1982, "step": 48778 }, { "epoch": 3.9516364225534675, "grad_norm": 0.09200357645750046, "learning_rate": 2.686889598991854e-06, "loss": 0.21, "step": 48779 }, { "epoch": 3.9517174335709657, "grad_norm": 0.11538971960544586, "learning_rate": 2.682388946397228e-06, "loss": 0.2395, "step": 48780 }, { "epoch": 3.951798444588464, "grad_norm": 0.09120815247297287, "learning_rate": 2.6778882938026014e-06, "loss": 0.266, "step": 48781 }, { "epoch": 3.9518794556059627, "grad_norm": 0.06618034094572067, "learning_rate": 2.6733876412079754e-06, "loss": 0.1992, "step": 48782 }, { "epoch": 3.951960466623461, "grad_norm": 0.07401160895824432, "learning_rate": 2.668886988613349e-06, "loss": 0.207, "step": 48783 }, { "epoch": 3.952041477640959, "grad_norm": 0.08141220360994339, "learning_rate": 2.664386336018723e-06, "loss": 0.2225, "step": 48784 }, { "epoch": 3.9521224886584574, "grad_norm": 0.06158602237701416, "learning_rate": 2.6598856834240967e-06, "loss": 0.1807, "step": 48785 }, { "epoch": 3.952203499675956, "grad_norm": 0.07168226689100266, "learning_rate": 2.6553850308294703e-06, "loss": 0.1947, "step": 48786 }, { "epoch": 3.9522845106934543, "grad_norm": 0.10002150386571884, "learning_rate": 2.6508843782348443e-06, "loss": 0.227, "step": 48787 }, { "epoch": 3.9523655217109526, "grad_norm": 0.060031428933143616, "learning_rate": 2.646383725640218e-06, "loss": 0.2223, "step": 48788 }, { "epoch": 3.952446532728451, "grad_norm": 0.07038126140832901, "learning_rate": 2.6418830730455915e-06, "loss": 0.22, "step": 48789 }, { "epoch": 3.9525275437459495, "grad_norm": 0.06693422794342041, "learning_rate": 2.6373824204509655e-06, "loss": 0.2072, "step": 48790 }, { "epoch": 3.9526085547634477, "grad_norm": 0.07882367074489594, "learning_rate": 2.632881767856339e-06, "loss": 0.2548, "step": 48791 }, { "epoch": 3.952689565780946, "grad_norm": 0.06965406239032745, "learning_rate": 2.6283811152617127e-06, "loss": 0.23, "step": 48792 }, { "epoch": 3.9527705767984447, "grad_norm": 0.060692980885505676, "learning_rate": 2.623880462667087e-06, "loss": 0.2233, "step": 48793 }, { "epoch": 3.952851587815943, "grad_norm": 0.06531994789838791, "learning_rate": 2.619379810072461e-06, "loss": 0.2353, "step": 48794 }, { "epoch": 3.952932598833441, "grad_norm": 0.06411170959472656, "learning_rate": 2.6148791574778344e-06, "loss": 0.2196, "step": 48795 }, { "epoch": 3.95301360985094, "grad_norm": 0.08259644359350204, "learning_rate": 2.6103785048832084e-06, "loss": 0.2309, "step": 48796 }, { "epoch": 3.953094620868438, "grad_norm": 0.07093773037195206, "learning_rate": 2.605877852288582e-06, "loss": 0.2239, "step": 48797 }, { "epoch": 3.9531756318859363, "grad_norm": 0.06640201807022095, "learning_rate": 2.6013771996939556e-06, "loss": 0.2004, "step": 48798 }, { "epoch": 3.953256642903435, "grad_norm": 0.07526429742574692, "learning_rate": 2.5968765470993297e-06, "loss": 0.2639, "step": 48799 }, { "epoch": 3.9533376539209333, "grad_norm": 0.06677767634391785, "learning_rate": 2.5923758945047033e-06, "loss": 0.2551, "step": 48800 }, { "epoch": 3.9534186649384315, "grad_norm": 0.06592489778995514, "learning_rate": 2.587875241910077e-06, "loss": 0.2013, "step": 48801 }, { "epoch": 3.95349967595593, "grad_norm": 0.07010385394096375, "learning_rate": 2.583374589315451e-06, "loss": 0.2542, "step": 48802 }, { "epoch": 3.9535806869734285, "grad_norm": 0.05288232862949371, "learning_rate": 2.5788739367208245e-06, "loss": 0.2347, "step": 48803 }, { "epoch": 3.9536616979909267, "grad_norm": 0.07920708507299423, "learning_rate": 2.5743732841261985e-06, "loss": 0.2599, "step": 48804 }, { "epoch": 3.9537427090084254, "grad_norm": 0.07881376147270203, "learning_rate": 2.569872631531572e-06, "loss": 0.2526, "step": 48805 }, { "epoch": 3.9538237200259236, "grad_norm": 0.07075386494398117, "learning_rate": 2.5653719789369457e-06, "loss": 0.2357, "step": 48806 }, { "epoch": 3.953904731043422, "grad_norm": 0.0738983005285263, "learning_rate": 2.5608713263423198e-06, "loss": 0.2094, "step": 48807 }, { "epoch": 3.95398574206092, "grad_norm": 0.06531957536935806, "learning_rate": 2.556370673747694e-06, "loss": 0.1814, "step": 48808 }, { "epoch": 3.954066753078419, "grad_norm": 0.07314100116491318, "learning_rate": 2.5518700211530674e-06, "loss": 0.2491, "step": 48809 }, { "epoch": 3.954147764095917, "grad_norm": 0.06974782794713974, "learning_rate": 2.547369368558441e-06, "loss": 0.2168, "step": 48810 }, { "epoch": 3.9542287751134153, "grad_norm": 0.07167565077543259, "learning_rate": 2.542868715963815e-06, "loss": 0.2411, "step": 48811 }, { "epoch": 3.9543097861309136, "grad_norm": 0.057915229350328445, "learning_rate": 2.5383680633691886e-06, "loss": 0.2, "step": 48812 }, { "epoch": 3.9543907971484122, "grad_norm": 0.0739583894610405, "learning_rate": 2.5338674107745627e-06, "loss": 0.2263, "step": 48813 }, { "epoch": 3.9544718081659105, "grad_norm": 0.08724256604909897, "learning_rate": 2.5293667581799363e-06, "loss": 0.2154, "step": 48814 }, { "epoch": 3.9545528191834087, "grad_norm": 0.08466099947690964, "learning_rate": 2.52486610558531e-06, "loss": 0.2697, "step": 48815 }, { "epoch": 3.9546338302009074, "grad_norm": 0.06930121034383774, "learning_rate": 2.520365452990684e-06, "loss": 0.1939, "step": 48816 }, { "epoch": 3.9547148412184057, "grad_norm": 0.08779342472553253, "learning_rate": 2.5158648003960575e-06, "loss": 0.2267, "step": 48817 }, { "epoch": 3.954795852235904, "grad_norm": 0.06339520961046219, "learning_rate": 2.511364147801431e-06, "loss": 0.2178, "step": 48818 }, { "epoch": 3.9548768632534026, "grad_norm": 0.06660187989473343, "learning_rate": 2.506863495206805e-06, "loss": 0.2536, "step": 48819 }, { "epoch": 3.954957874270901, "grad_norm": 0.06031389907002449, "learning_rate": 2.5023628426121787e-06, "loss": 0.1746, "step": 48820 }, { "epoch": 3.955038885288399, "grad_norm": 0.07642403244972229, "learning_rate": 2.4978621900175523e-06, "loss": 0.2516, "step": 48821 }, { "epoch": 3.9551198963058978, "grad_norm": 0.06653022021055222, "learning_rate": 2.4933615374229268e-06, "loss": 0.2368, "step": 48822 }, { "epoch": 3.955200907323396, "grad_norm": 0.05918882414698601, "learning_rate": 2.4888608848283004e-06, "loss": 0.2194, "step": 48823 }, { "epoch": 3.9552819183408943, "grad_norm": 0.090346559882164, "learning_rate": 2.484360232233674e-06, "loss": 0.226, "step": 48824 }, { "epoch": 3.955362929358393, "grad_norm": 0.06755279004573822, "learning_rate": 2.479859579639048e-06, "loss": 0.2484, "step": 48825 }, { "epoch": 3.955443940375891, "grad_norm": 0.06781566888093948, "learning_rate": 2.4753589270444216e-06, "loss": 0.1975, "step": 48826 }, { "epoch": 3.9555249513933894, "grad_norm": 0.07741211354732513, "learning_rate": 2.4708582744497952e-06, "loss": 0.2321, "step": 48827 }, { "epoch": 3.955605962410888, "grad_norm": 0.07529742270708084, "learning_rate": 2.4663576218551692e-06, "loss": 0.2329, "step": 48828 }, { "epoch": 3.9556869734283864, "grad_norm": 0.06810367852449417, "learning_rate": 2.461856969260543e-06, "loss": 0.2081, "step": 48829 }, { "epoch": 3.9557679844458846, "grad_norm": 0.06922873854637146, "learning_rate": 2.4573563166659165e-06, "loss": 0.2262, "step": 48830 }, { "epoch": 3.955848995463383, "grad_norm": 0.07545153796672821, "learning_rate": 2.4528556640712905e-06, "loss": 0.2295, "step": 48831 }, { "epoch": 3.9559300064808816, "grad_norm": 0.07698988914489746, "learning_rate": 2.448355011476664e-06, "loss": 0.2162, "step": 48832 }, { "epoch": 3.95601101749838, "grad_norm": 0.06655178219079971, "learning_rate": 2.443854358882038e-06, "loss": 0.2242, "step": 48833 }, { "epoch": 3.956092028515878, "grad_norm": 0.06900296360254288, "learning_rate": 2.4393537062874117e-06, "loss": 0.2319, "step": 48834 }, { "epoch": 3.9561730395333763, "grad_norm": 0.0699462965130806, "learning_rate": 2.4348530536927853e-06, "loss": 0.2429, "step": 48835 }, { "epoch": 3.956254050550875, "grad_norm": 0.07188187539577484, "learning_rate": 2.4303524010981593e-06, "loss": 0.2344, "step": 48836 }, { "epoch": 3.9563350615683732, "grad_norm": 0.07619171589612961, "learning_rate": 2.4258517485035334e-06, "loss": 0.2031, "step": 48837 }, { "epoch": 3.9564160725858715, "grad_norm": 0.0603308379650116, "learning_rate": 2.421351095908907e-06, "loss": 0.2033, "step": 48838 }, { "epoch": 3.95649708360337, "grad_norm": 0.0679556280374527, "learning_rate": 2.416850443314281e-06, "loss": 0.2109, "step": 48839 }, { "epoch": 3.9565780946208684, "grad_norm": 0.06686028093099594, "learning_rate": 2.4123497907196546e-06, "loss": 0.2583, "step": 48840 }, { "epoch": 3.9566591056383666, "grad_norm": 0.07004310190677643, "learning_rate": 2.407849138125028e-06, "loss": 0.232, "step": 48841 }, { "epoch": 3.9567401166558653, "grad_norm": 0.05624713748693466, "learning_rate": 2.4033484855304022e-06, "loss": 0.2123, "step": 48842 }, { "epoch": 3.9568211276733636, "grad_norm": 0.0720772072672844, "learning_rate": 2.398847832935776e-06, "loss": 0.2112, "step": 48843 }, { "epoch": 3.956902138690862, "grad_norm": 0.07022903114557266, "learning_rate": 2.3943471803411494e-06, "loss": 0.199, "step": 48844 }, { "epoch": 3.9569831497083605, "grad_norm": 0.0740879699587822, "learning_rate": 2.3898465277465235e-06, "loss": 0.2469, "step": 48845 }, { "epoch": 3.9570641607258588, "grad_norm": 0.07837864011526108, "learning_rate": 2.385345875151897e-06, "loss": 0.2414, "step": 48846 }, { "epoch": 3.957145171743357, "grad_norm": 0.06510287523269653, "learning_rate": 2.3808452225572707e-06, "loss": 0.2119, "step": 48847 }, { "epoch": 3.9572261827608557, "grad_norm": 0.06796301901340485, "learning_rate": 2.3763445699626447e-06, "loss": 0.1879, "step": 48848 }, { "epoch": 3.957307193778354, "grad_norm": 0.0652165561914444, "learning_rate": 2.3718439173680183e-06, "loss": 0.2306, "step": 48849 }, { "epoch": 3.957388204795852, "grad_norm": 0.08012350648641586, "learning_rate": 2.367343264773392e-06, "loss": 0.2511, "step": 48850 }, { "epoch": 3.957469215813351, "grad_norm": 0.06359118223190308, "learning_rate": 2.3628426121787664e-06, "loss": 0.1956, "step": 48851 }, { "epoch": 3.957550226830849, "grad_norm": 0.09778131544589996, "learning_rate": 2.35834195958414e-06, "loss": 0.2337, "step": 48852 }, { "epoch": 3.9576312378483474, "grad_norm": 0.07835190743207932, "learning_rate": 2.3538413069895136e-06, "loss": 0.2037, "step": 48853 }, { "epoch": 3.9577122488658456, "grad_norm": 0.07351531833410263, "learning_rate": 2.3493406543948876e-06, "loss": 0.276, "step": 48854 }, { "epoch": 3.9577932598833443, "grad_norm": 0.0607091523706913, "learning_rate": 2.344840001800261e-06, "loss": 0.2262, "step": 48855 }, { "epoch": 3.9578742709008425, "grad_norm": 0.06498251855373383, "learning_rate": 2.340339349205635e-06, "loss": 0.2159, "step": 48856 }, { "epoch": 3.957955281918341, "grad_norm": 0.07029843330383301, "learning_rate": 2.335838696611009e-06, "loss": 0.1888, "step": 48857 }, { "epoch": 3.958036292935839, "grad_norm": 0.07102137804031372, "learning_rate": 2.3313380440163824e-06, "loss": 0.2604, "step": 48858 }, { "epoch": 3.9581173039533377, "grad_norm": 0.0782492458820343, "learning_rate": 2.326837391421756e-06, "loss": 0.2622, "step": 48859 }, { "epoch": 3.958198314970836, "grad_norm": 0.06843463331460953, "learning_rate": 2.32233673882713e-06, "loss": 0.2179, "step": 48860 }, { "epoch": 3.958279325988334, "grad_norm": 0.07084222882986069, "learning_rate": 2.3178360862325037e-06, "loss": 0.1941, "step": 48861 }, { "epoch": 3.958360337005833, "grad_norm": 0.0704958513379097, "learning_rate": 2.3133354336378777e-06, "loss": 0.226, "step": 48862 }, { "epoch": 3.958441348023331, "grad_norm": 0.06495588272809982, "learning_rate": 2.3088347810432513e-06, "loss": 0.2503, "step": 48863 }, { "epoch": 3.9585223590408294, "grad_norm": 0.0666610449552536, "learning_rate": 2.304334128448625e-06, "loss": 0.2016, "step": 48864 }, { "epoch": 3.958603370058328, "grad_norm": 0.0791105106472969, "learning_rate": 2.299833475853999e-06, "loss": 0.211, "step": 48865 }, { "epoch": 3.9586843810758263, "grad_norm": 0.08585468679666519, "learning_rate": 2.295332823259373e-06, "loss": 0.2206, "step": 48866 }, { "epoch": 3.9587653920933246, "grad_norm": 0.07571440190076828, "learning_rate": 2.2908321706647466e-06, "loss": 0.2165, "step": 48867 }, { "epoch": 3.9588464031108233, "grad_norm": 0.07079493254423141, "learning_rate": 2.2863315180701206e-06, "loss": 0.2351, "step": 48868 }, { "epoch": 3.9589274141283215, "grad_norm": 0.08157934993505478, "learning_rate": 2.281830865475494e-06, "loss": 0.209, "step": 48869 }, { "epoch": 3.9590084251458197, "grad_norm": 0.058229394257068634, "learning_rate": 2.2773302128808678e-06, "loss": 0.216, "step": 48870 }, { "epoch": 3.9590894361633184, "grad_norm": 0.06710825115442276, "learning_rate": 2.272829560286242e-06, "loss": 0.2343, "step": 48871 }, { "epoch": 3.9591704471808167, "grad_norm": 0.07763312757015228, "learning_rate": 2.2683289076916154e-06, "loss": 0.2398, "step": 48872 }, { "epoch": 3.959251458198315, "grad_norm": 0.07329755276441574, "learning_rate": 2.263828255096989e-06, "loss": 0.1929, "step": 48873 }, { "epoch": 3.9593324692158136, "grad_norm": 0.052778083831071854, "learning_rate": 2.259327602502363e-06, "loss": 0.2198, "step": 48874 }, { "epoch": 3.959413480233312, "grad_norm": 0.06382036954164505, "learning_rate": 2.2548269499077366e-06, "loss": 0.2334, "step": 48875 }, { "epoch": 3.95949449125081, "grad_norm": 0.06736230105161667, "learning_rate": 2.2503262973131103e-06, "loss": 0.1772, "step": 48876 }, { "epoch": 3.9595755022683083, "grad_norm": 0.09311016649007797, "learning_rate": 2.2458256447184843e-06, "loss": 0.2461, "step": 48877 }, { "epoch": 3.959656513285807, "grad_norm": 0.067777618765831, "learning_rate": 2.241324992123858e-06, "loss": 0.2162, "step": 48878 }, { "epoch": 3.9597375243033053, "grad_norm": 0.07997038960456848, "learning_rate": 2.2368243395292315e-06, "loss": 0.241, "step": 48879 }, { "epoch": 3.9598185353208035, "grad_norm": 0.07150693982839584, "learning_rate": 2.232323686934606e-06, "loss": 0.2265, "step": 48880 }, { "epoch": 3.9598995463383018, "grad_norm": 0.06763894110918045, "learning_rate": 2.2278230343399795e-06, "loss": 0.2165, "step": 48881 }, { "epoch": 3.9599805573558005, "grad_norm": 0.07978539168834686, "learning_rate": 2.223322381745353e-06, "loss": 0.2443, "step": 48882 }, { "epoch": 3.9600615683732987, "grad_norm": 0.06002137437462807, "learning_rate": 2.218821729150727e-06, "loss": 0.249, "step": 48883 }, { "epoch": 3.960142579390797, "grad_norm": 0.06918959319591522, "learning_rate": 2.2143210765561008e-06, "loss": 0.2428, "step": 48884 }, { "epoch": 3.9602235904082956, "grad_norm": 0.06803537160158157, "learning_rate": 2.2098204239614744e-06, "loss": 0.2001, "step": 48885 }, { "epoch": 3.960304601425794, "grad_norm": 0.06053481996059418, "learning_rate": 2.2053197713668484e-06, "loss": 0.1956, "step": 48886 }, { "epoch": 3.960385612443292, "grad_norm": 0.062240056693553925, "learning_rate": 2.200819118772222e-06, "loss": 0.2152, "step": 48887 }, { "epoch": 3.960466623460791, "grad_norm": 0.06525016576051712, "learning_rate": 2.1963184661775956e-06, "loss": 0.1922, "step": 48888 }, { "epoch": 3.960547634478289, "grad_norm": 0.07532958686351776, "learning_rate": 2.1918178135829696e-06, "loss": 0.2079, "step": 48889 }, { "epoch": 3.9606286454957873, "grad_norm": 0.08174377679824829, "learning_rate": 2.1873171609883432e-06, "loss": 0.2211, "step": 48890 }, { "epoch": 3.960709656513286, "grad_norm": 0.07705192267894745, "learning_rate": 2.1828165083937173e-06, "loss": 0.2116, "step": 48891 }, { "epoch": 3.9607906675307842, "grad_norm": 0.08387063443660736, "learning_rate": 2.178315855799091e-06, "loss": 0.1979, "step": 48892 }, { "epoch": 3.9608716785482825, "grad_norm": 0.07427896559238434, "learning_rate": 2.1738152032044645e-06, "loss": 0.2114, "step": 48893 }, { "epoch": 3.960952689565781, "grad_norm": 0.0671217292547226, "learning_rate": 2.1693145506098385e-06, "loss": 0.2102, "step": 48894 }, { "epoch": 3.9610337005832794, "grad_norm": 0.0731186717748642, "learning_rate": 2.1648138980152125e-06, "loss": 0.2358, "step": 48895 }, { "epoch": 3.9611147116007777, "grad_norm": 0.08874718844890594, "learning_rate": 2.160313245420586e-06, "loss": 0.2466, "step": 48896 }, { "epoch": 3.9611957226182763, "grad_norm": 0.08343454450368881, "learning_rate": 2.15581259282596e-06, "loss": 0.2146, "step": 48897 }, { "epoch": 3.9612767336357746, "grad_norm": 0.07566836476325989, "learning_rate": 2.1513119402313338e-06, "loss": 0.2411, "step": 48898 }, { "epoch": 3.961357744653273, "grad_norm": 0.08290202170610428, "learning_rate": 2.1468112876367074e-06, "loss": 0.2512, "step": 48899 }, { "epoch": 3.961438755670771, "grad_norm": 0.07356373220682144, "learning_rate": 2.1423106350420814e-06, "loss": 0.2484, "step": 48900 }, { "epoch": 3.9615197666882693, "grad_norm": 0.08584269881248474, "learning_rate": 2.137809982447455e-06, "loss": 0.2302, "step": 48901 }, { "epoch": 3.961600777705768, "grad_norm": 0.06294574588537216, "learning_rate": 2.1333093298528286e-06, "loss": 0.2307, "step": 48902 }, { "epoch": 3.9616817887232663, "grad_norm": 0.06678062677383423, "learning_rate": 2.1288086772582026e-06, "loss": 0.2059, "step": 48903 }, { "epoch": 3.9617627997407645, "grad_norm": 0.06297358125448227, "learning_rate": 2.1243080246635762e-06, "loss": 0.2176, "step": 48904 }, { "epoch": 3.961843810758263, "grad_norm": 0.07585348933935165, "learning_rate": 2.11980737206895e-06, "loss": 0.2497, "step": 48905 }, { "epoch": 3.9619248217757614, "grad_norm": 0.07568811625242233, "learning_rate": 2.115306719474324e-06, "loss": 0.2114, "step": 48906 }, { "epoch": 3.9620058327932597, "grad_norm": 0.07441581040620804, "learning_rate": 2.1108060668796975e-06, "loss": 0.287, "step": 48907 }, { "epoch": 3.9620868438107584, "grad_norm": 0.06748269498348236, "learning_rate": 2.106305414285071e-06, "loss": 0.2269, "step": 48908 }, { "epoch": 3.9621678548282566, "grad_norm": 0.07350584864616394, "learning_rate": 2.1018047616904455e-06, "loss": 0.2298, "step": 48909 }, { "epoch": 3.962248865845755, "grad_norm": 0.07611532509326935, "learning_rate": 2.097304109095819e-06, "loss": 0.236, "step": 48910 }, { "epoch": 3.9623298768632536, "grad_norm": 0.07814755290746689, "learning_rate": 2.0928034565011927e-06, "loss": 0.2699, "step": 48911 }, { "epoch": 3.962410887880752, "grad_norm": 0.07571461796760559, "learning_rate": 2.0883028039065667e-06, "loss": 0.2033, "step": 48912 }, { "epoch": 3.96249189889825, "grad_norm": 0.08205527067184448, "learning_rate": 2.0838021513119403e-06, "loss": 0.2449, "step": 48913 }, { "epoch": 3.9625729099157487, "grad_norm": 0.06475473195314407, "learning_rate": 2.079301498717314e-06, "loss": 0.2245, "step": 48914 }, { "epoch": 3.962653920933247, "grad_norm": 0.06566368043422699, "learning_rate": 2.074800846122688e-06, "loss": 0.1976, "step": 48915 }, { "epoch": 3.962734931950745, "grad_norm": 0.08065775781869888, "learning_rate": 2.0703001935280616e-06, "loss": 0.2169, "step": 48916 }, { "epoch": 3.962815942968244, "grad_norm": 0.06467075645923615, "learning_rate": 2.0657995409334356e-06, "loss": 0.2001, "step": 48917 }, { "epoch": 3.962896953985742, "grad_norm": 0.06762147694826126, "learning_rate": 2.0612988883388092e-06, "loss": 0.2267, "step": 48918 }, { "epoch": 3.9629779650032404, "grad_norm": 0.09147316217422485, "learning_rate": 2.056798235744183e-06, "loss": 0.2093, "step": 48919 }, { "epoch": 3.963058976020739, "grad_norm": 0.050940994173288345, "learning_rate": 2.052297583149557e-06, "loss": 0.2315, "step": 48920 }, { "epoch": 3.9631399870382373, "grad_norm": 0.06474053114652634, "learning_rate": 2.0477969305549304e-06, "loss": 0.2059, "step": 48921 }, { "epoch": 3.9632209980557356, "grad_norm": 0.06484822183847427, "learning_rate": 2.043296277960304e-06, "loss": 0.2061, "step": 48922 }, { "epoch": 3.963302009073234, "grad_norm": 0.0787152349948883, "learning_rate": 2.038795625365678e-06, "loss": 0.2333, "step": 48923 }, { "epoch": 3.963383020090732, "grad_norm": 0.07503559440374374, "learning_rate": 2.034294972771052e-06, "loss": 0.2119, "step": 48924 }, { "epoch": 3.9634640311082308, "grad_norm": 0.06612849235534668, "learning_rate": 2.0297943201764257e-06, "loss": 0.2236, "step": 48925 }, { "epoch": 3.963545042125729, "grad_norm": 0.07943277806043625, "learning_rate": 2.0252936675817997e-06, "loss": 0.1927, "step": 48926 }, { "epoch": 3.9636260531432272, "grad_norm": 0.08900720626115799, "learning_rate": 2.0207930149871733e-06, "loss": 0.2616, "step": 48927 }, { "epoch": 3.963707064160726, "grad_norm": 0.07647895067930222, "learning_rate": 2.016292362392547e-06, "loss": 0.2124, "step": 48928 }, { "epoch": 3.963788075178224, "grad_norm": 0.08729333430528641, "learning_rate": 2.011791709797921e-06, "loss": 0.225, "step": 48929 }, { "epoch": 3.9638690861957224, "grad_norm": 0.06749184429645538, "learning_rate": 2.0072910572032946e-06, "loss": 0.2568, "step": 48930 }, { "epoch": 3.963950097213221, "grad_norm": 0.07493914663791656, "learning_rate": 2.002790404608668e-06, "loss": 0.2483, "step": 48931 }, { "epoch": 3.9640311082307194, "grad_norm": 0.07415442168712616, "learning_rate": 1.998289752014042e-06, "loss": 0.2709, "step": 48932 }, { "epoch": 3.9641121192482176, "grad_norm": 0.055181387811899185, "learning_rate": 1.993789099419416e-06, "loss": 0.1915, "step": 48933 }, { "epoch": 3.9641931302657163, "grad_norm": 0.0670875683426857, "learning_rate": 1.9892884468247894e-06, "loss": 0.2323, "step": 48934 }, { "epoch": 3.9642741412832145, "grad_norm": 0.06657449901103973, "learning_rate": 1.9847877942301634e-06, "loss": 0.2334, "step": 48935 }, { "epoch": 3.964355152300713, "grad_norm": 0.06159026175737381, "learning_rate": 1.980287141635537e-06, "loss": 0.1616, "step": 48936 }, { "epoch": 3.9644361633182115, "grad_norm": 0.06844350695610046, "learning_rate": 1.975786489040911e-06, "loss": 0.2003, "step": 48937 }, { "epoch": 3.9645171743357097, "grad_norm": 0.07265136390924454, "learning_rate": 1.971285836446285e-06, "loss": 0.2481, "step": 48938 }, { "epoch": 3.964598185353208, "grad_norm": 0.07296200096607208, "learning_rate": 1.9667851838516587e-06, "loss": 0.2074, "step": 48939 }, { "epoch": 3.9646791963707066, "grad_norm": 0.07084230333566666, "learning_rate": 1.9622845312570323e-06, "loss": 0.2267, "step": 48940 }, { "epoch": 3.964760207388205, "grad_norm": 0.06154124438762665, "learning_rate": 1.9577838786624063e-06, "loss": 0.218, "step": 48941 }, { "epoch": 3.964841218405703, "grad_norm": 0.05799073725938797, "learning_rate": 1.95328322606778e-06, "loss": 0.2076, "step": 48942 }, { "epoch": 3.964922229423202, "grad_norm": 0.06497079133987427, "learning_rate": 1.9487825734731535e-06, "loss": 0.2096, "step": 48943 }, { "epoch": 3.9650032404407, "grad_norm": 0.07975282520055771, "learning_rate": 1.9442819208785276e-06, "loss": 0.2206, "step": 48944 }, { "epoch": 3.9650842514581983, "grad_norm": 0.06756958365440369, "learning_rate": 1.939781268283901e-06, "loss": 0.2282, "step": 48945 }, { "epoch": 3.9651652624756966, "grad_norm": 0.08484717458486557, "learning_rate": 1.935280615689275e-06, "loss": 0.2343, "step": 48946 }, { "epoch": 3.965246273493195, "grad_norm": 0.07329592853784561, "learning_rate": 1.930779963094649e-06, "loss": 0.2059, "step": 48947 }, { "epoch": 3.9653272845106935, "grad_norm": 0.05684712901711464, "learning_rate": 1.9262793105000224e-06, "loss": 0.228, "step": 48948 }, { "epoch": 3.9654082955281917, "grad_norm": 0.05936472862958908, "learning_rate": 1.9217786579053964e-06, "loss": 0.2148, "step": 48949 }, { "epoch": 3.96548930654569, "grad_norm": 0.07850753515958786, "learning_rate": 1.91727800531077e-06, "loss": 0.2136, "step": 48950 }, { "epoch": 3.9655703175631887, "grad_norm": 0.0721631646156311, "learning_rate": 1.9127773527161436e-06, "loss": 0.1994, "step": 48951 }, { "epoch": 3.965651328580687, "grad_norm": 0.06391099095344543, "learning_rate": 1.9082767001215177e-06, "loss": 0.2079, "step": 48952 }, { "epoch": 3.965732339598185, "grad_norm": 0.06663154065608978, "learning_rate": 1.9037760475268917e-06, "loss": 0.2159, "step": 48953 }, { "epoch": 3.965813350615684, "grad_norm": 0.060114867985248566, "learning_rate": 1.8992753949322653e-06, "loss": 0.2138, "step": 48954 }, { "epoch": 3.965894361633182, "grad_norm": 0.07615365833044052, "learning_rate": 1.894774742337639e-06, "loss": 0.2277, "step": 48955 }, { "epoch": 3.9659753726506803, "grad_norm": 0.06525284796953201, "learning_rate": 1.890274089743013e-06, "loss": 0.2232, "step": 48956 }, { "epoch": 3.966056383668179, "grad_norm": 0.06666583567857742, "learning_rate": 1.8857734371483867e-06, "loss": 0.2199, "step": 48957 }, { "epoch": 3.9661373946856773, "grad_norm": 0.07888475805521011, "learning_rate": 1.8812727845537603e-06, "loss": 0.2368, "step": 48958 }, { "epoch": 3.9662184057031755, "grad_norm": 0.05811379477381706, "learning_rate": 1.8767721319591341e-06, "loss": 0.2513, "step": 48959 }, { "epoch": 3.966299416720674, "grad_norm": 0.07906852662563324, "learning_rate": 1.872271479364508e-06, "loss": 0.2457, "step": 48960 }, { "epoch": 3.9663804277381725, "grad_norm": 0.07660603523254395, "learning_rate": 1.8677708267698816e-06, "loss": 0.2245, "step": 48961 }, { "epoch": 3.9664614387556707, "grad_norm": 0.07632768154144287, "learning_rate": 1.8632701741752554e-06, "loss": 0.2333, "step": 48962 }, { "epoch": 3.9665424497731694, "grad_norm": 0.07934503257274628, "learning_rate": 1.8587695215806292e-06, "loss": 0.2479, "step": 48963 }, { "epoch": 3.9666234607906676, "grad_norm": 0.06583593785762787, "learning_rate": 1.854268868986003e-06, "loss": 0.1923, "step": 48964 }, { "epoch": 3.966704471808166, "grad_norm": 0.07885053008794785, "learning_rate": 1.8497682163913766e-06, "loss": 0.2003, "step": 48965 }, { "epoch": 3.9667854828256646, "grad_norm": 0.07885368168354034, "learning_rate": 1.8452675637967509e-06, "loss": 0.2529, "step": 48966 }, { "epoch": 3.966866493843163, "grad_norm": 0.05907120555639267, "learning_rate": 1.8407669112021245e-06, "loss": 0.2194, "step": 48967 }, { "epoch": 3.966947504860661, "grad_norm": 0.05766279622912407, "learning_rate": 1.8362662586074983e-06, "loss": 0.2244, "step": 48968 }, { "epoch": 3.9670285158781593, "grad_norm": 0.08073402941226959, "learning_rate": 1.831765606012872e-06, "loss": 0.1892, "step": 48969 }, { "epoch": 3.9671095268956575, "grad_norm": 0.08297807723283768, "learning_rate": 1.827264953418246e-06, "loss": 0.2275, "step": 48970 }, { "epoch": 3.9671905379131562, "grad_norm": 0.07996219396591187, "learning_rate": 1.8227643008236195e-06, "loss": 0.2753, "step": 48971 }, { "epoch": 3.9672715489306545, "grad_norm": 0.06728029996156693, "learning_rate": 1.8182636482289933e-06, "loss": 0.2084, "step": 48972 }, { "epoch": 3.9673525599481527, "grad_norm": 0.07134328782558441, "learning_rate": 1.8137629956343671e-06, "loss": 0.2387, "step": 48973 }, { "epoch": 3.9674335709656514, "grad_norm": 0.06936592608690262, "learning_rate": 1.8092623430397407e-06, "loss": 0.226, "step": 48974 }, { "epoch": 3.9675145819831497, "grad_norm": 0.060569167137145996, "learning_rate": 1.8047616904451146e-06, "loss": 0.2323, "step": 48975 }, { "epoch": 3.967595593000648, "grad_norm": 0.06395695358514786, "learning_rate": 1.8002610378504884e-06, "loss": 0.2362, "step": 48976 }, { "epoch": 3.9676766040181466, "grad_norm": 0.0779598280787468, "learning_rate": 1.7957603852558622e-06, "loss": 0.215, "step": 48977 }, { "epoch": 3.967757615035645, "grad_norm": 0.06363376975059509, "learning_rate": 1.7912597326612358e-06, "loss": 0.1997, "step": 48978 }, { "epoch": 3.967838626053143, "grad_norm": 0.07200288027524948, "learning_rate": 1.7867590800666096e-06, "loss": 0.2048, "step": 48979 }, { "epoch": 3.9679196370706418, "grad_norm": 0.07821229100227356, "learning_rate": 1.7822584274719834e-06, "loss": 0.2087, "step": 48980 }, { "epoch": 3.96800064808814, "grad_norm": 0.062123917043209076, "learning_rate": 1.7777577748773574e-06, "loss": 0.2389, "step": 48981 }, { "epoch": 3.9680816591056383, "grad_norm": 0.06906332820653915, "learning_rate": 1.7732571222827313e-06, "loss": 0.2279, "step": 48982 }, { "epoch": 3.968162670123137, "grad_norm": 0.08044512569904327, "learning_rate": 1.7687564696881049e-06, "loss": 0.2301, "step": 48983 }, { "epoch": 3.968243681140635, "grad_norm": 0.06454446911811829, "learning_rate": 1.7642558170934787e-06, "loss": 0.1969, "step": 48984 }, { "epoch": 3.9683246921581334, "grad_norm": 0.08553464710712433, "learning_rate": 1.7597551644988525e-06, "loss": 0.2333, "step": 48985 }, { "epoch": 3.968405703175632, "grad_norm": 0.06968870759010315, "learning_rate": 1.7552545119042263e-06, "loss": 0.2341, "step": 48986 }, { "epoch": 3.9684867141931304, "grad_norm": 0.06505095958709717, "learning_rate": 1.7507538593096e-06, "loss": 0.2056, "step": 48987 }, { "epoch": 3.9685677252106286, "grad_norm": 0.07731178402900696, "learning_rate": 1.7462532067149737e-06, "loss": 0.216, "step": 48988 }, { "epoch": 3.968648736228127, "grad_norm": 0.05740072578191757, "learning_rate": 1.7417525541203475e-06, "loss": 0.2334, "step": 48989 }, { "epoch": 3.9687297472456255, "grad_norm": 0.06639726459980011, "learning_rate": 1.7372519015257211e-06, "loss": 0.2386, "step": 48990 }, { "epoch": 3.968810758263124, "grad_norm": 0.0703684538602829, "learning_rate": 1.732751248931095e-06, "loss": 0.2012, "step": 48991 }, { "epoch": 3.968891769280622, "grad_norm": 0.07246779650449753, "learning_rate": 1.7282505963364688e-06, "loss": 0.2345, "step": 48992 }, { "epoch": 3.9689727802981203, "grad_norm": 0.060479771345853806, "learning_rate": 1.7237499437418426e-06, "loss": 0.1812, "step": 48993 }, { "epoch": 3.969053791315619, "grad_norm": 0.06786667555570602, "learning_rate": 1.7192492911472162e-06, "loss": 0.2288, "step": 48994 }, { "epoch": 3.969134802333117, "grad_norm": 0.07820719480514526, "learning_rate": 1.7147486385525904e-06, "loss": 0.2097, "step": 48995 }, { "epoch": 3.9692158133506155, "grad_norm": 0.06689919531345367, "learning_rate": 1.710247985957964e-06, "loss": 0.2155, "step": 48996 }, { "epoch": 3.969296824368114, "grad_norm": 0.07324912399053574, "learning_rate": 1.7057473333633379e-06, "loss": 0.2158, "step": 48997 }, { "epoch": 3.9693778353856124, "grad_norm": 0.07657838612794876, "learning_rate": 1.7012466807687117e-06, "loss": 0.2181, "step": 48998 }, { "epoch": 3.9694588464031106, "grad_norm": 0.07740187644958496, "learning_rate": 1.6967460281740855e-06, "loss": 0.2351, "step": 48999 }, { "epoch": 3.9695398574206093, "grad_norm": 0.048127420246601105, "learning_rate": 1.692245375579459e-06, "loss": 0.2236, "step": 49000 }, { "epoch": 3.9696208684381076, "grad_norm": 0.07730501145124435, "learning_rate": 1.687744722984833e-06, "loss": 0.2137, "step": 49001 }, { "epoch": 3.969701879455606, "grad_norm": 0.07064519822597504, "learning_rate": 1.6832440703902067e-06, "loss": 0.2536, "step": 49002 }, { "epoch": 3.9697828904731045, "grad_norm": 0.07275062054395676, "learning_rate": 1.6787434177955803e-06, "loss": 0.2273, "step": 49003 }, { "epoch": 3.9698639014906028, "grad_norm": 0.08206738531589508, "learning_rate": 1.6742427652009541e-06, "loss": 0.2448, "step": 49004 }, { "epoch": 3.969944912508101, "grad_norm": 0.07399595528841019, "learning_rate": 1.669742112606328e-06, "loss": 0.2251, "step": 49005 }, { "epoch": 3.9700259235255997, "grad_norm": 0.07586342096328735, "learning_rate": 1.6652414600117018e-06, "loss": 0.2214, "step": 49006 }, { "epoch": 3.970106934543098, "grad_norm": 0.07879979908466339, "learning_rate": 1.6607408074170754e-06, "loss": 0.2474, "step": 49007 }, { "epoch": 3.970187945560596, "grad_norm": 0.061993636190891266, "learning_rate": 1.6562401548224492e-06, "loss": 0.2171, "step": 49008 }, { "epoch": 3.970268956578095, "grad_norm": 0.0825590267777443, "learning_rate": 1.6517395022278232e-06, "loss": 0.2091, "step": 49009 }, { "epoch": 3.970349967595593, "grad_norm": 0.07057903707027435, "learning_rate": 1.647238849633197e-06, "loss": 0.2551, "step": 49010 }, { "epoch": 3.9704309786130914, "grad_norm": 0.07474622130393982, "learning_rate": 1.6427381970385708e-06, "loss": 0.2076, "step": 49011 }, { "epoch": 3.9705119896305896, "grad_norm": 0.06988409161567688, "learning_rate": 1.6382375444439444e-06, "loss": 0.2123, "step": 49012 }, { "epoch": 3.9705930006480883, "grad_norm": 0.09014685451984406, "learning_rate": 1.6337368918493183e-06, "loss": 0.2558, "step": 49013 }, { "epoch": 3.9706740116655865, "grad_norm": 0.05605039373040199, "learning_rate": 1.629236239254692e-06, "loss": 0.2196, "step": 49014 }, { "epoch": 3.970755022683085, "grad_norm": 0.06118842214345932, "learning_rate": 1.6247355866600659e-06, "loss": 0.2252, "step": 49015 }, { "epoch": 3.970836033700583, "grad_norm": 0.0719040036201477, "learning_rate": 1.6202349340654395e-06, "loss": 0.2198, "step": 49016 }, { "epoch": 3.9709170447180817, "grad_norm": 0.07295656204223633, "learning_rate": 1.6157342814708133e-06, "loss": 0.2345, "step": 49017 }, { "epoch": 3.97099805573558, "grad_norm": 0.07177040725946426, "learning_rate": 1.6112336288761871e-06, "loss": 0.2319, "step": 49018 }, { "epoch": 3.971079066753078, "grad_norm": 0.06768198311328888, "learning_rate": 1.606732976281561e-06, "loss": 0.244, "step": 49019 }, { "epoch": 3.971160077770577, "grad_norm": 0.07683272659778595, "learning_rate": 1.6022323236869345e-06, "loss": 0.2378, "step": 49020 }, { "epoch": 3.971241088788075, "grad_norm": 0.06740285456180573, "learning_rate": 1.5977316710923084e-06, "loss": 0.2183, "step": 49021 }, { "epoch": 3.9713220998055734, "grad_norm": 0.0736578181385994, "learning_rate": 1.5932310184976822e-06, "loss": 0.2054, "step": 49022 }, { "epoch": 3.971403110823072, "grad_norm": 0.10093529522418976, "learning_rate": 1.5887303659030558e-06, "loss": 0.2277, "step": 49023 }, { "epoch": 3.9714841218405703, "grad_norm": 0.08745209872722626, "learning_rate": 1.58422971330843e-06, "loss": 0.2632, "step": 49024 }, { "epoch": 3.9715651328580686, "grad_norm": 0.07922547310590744, "learning_rate": 1.5797290607138036e-06, "loss": 0.2189, "step": 49025 }, { "epoch": 3.9716461438755672, "grad_norm": 0.06714785844087601, "learning_rate": 1.5752284081191774e-06, "loss": 0.1954, "step": 49026 }, { "epoch": 3.9717271548930655, "grad_norm": 0.07381433248519897, "learning_rate": 1.5707277555245512e-06, "loss": 0.2117, "step": 49027 }, { "epoch": 3.9718081659105637, "grad_norm": 0.06595097482204437, "learning_rate": 1.566227102929925e-06, "loss": 0.2259, "step": 49028 }, { "epoch": 3.9718891769280624, "grad_norm": 0.07303611189126968, "learning_rate": 1.5617264503352987e-06, "loss": 0.2157, "step": 49029 }, { "epoch": 3.9719701879455607, "grad_norm": 0.09227094054222107, "learning_rate": 1.5572257977406725e-06, "loss": 0.2355, "step": 49030 }, { "epoch": 3.972051198963059, "grad_norm": 0.07859620451927185, "learning_rate": 1.5527251451460463e-06, "loss": 0.2492, "step": 49031 }, { "epoch": 3.9721322099805576, "grad_norm": 0.07130131870508194, "learning_rate": 1.54822449255142e-06, "loss": 0.2507, "step": 49032 }, { "epoch": 3.972213220998056, "grad_norm": 0.08751437067985535, "learning_rate": 1.5437238399567937e-06, "loss": 0.22, "step": 49033 }, { "epoch": 3.972294232015554, "grad_norm": 0.07616513967514038, "learning_rate": 1.5392231873621675e-06, "loss": 0.2547, "step": 49034 }, { "epoch": 3.9723752430330523, "grad_norm": 0.07148608565330505, "learning_rate": 1.5347225347675413e-06, "loss": 0.2185, "step": 49035 }, { "epoch": 3.972456254050551, "grad_norm": 0.07048953324556351, "learning_rate": 1.5302218821729152e-06, "loss": 0.2212, "step": 49036 }, { "epoch": 3.9725372650680493, "grad_norm": 0.06672168523073196, "learning_rate": 1.525721229578289e-06, "loss": 0.1841, "step": 49037 }, { "epoch": 3.9726182760855475, "grad_norm": 0.07018128037452698, "learning_rate": 1.5212205769836628e-06, "loss": 0.1995, "step": 49038 }, { "epoch": 3.9726992871030458, "grad_norm": 0.07781115919351578, "learning_rate": 1.5167199243890364e-06, "loss": 0.2275, "step": 49039 }, { "epoch": 3.9727802981205445, "grad_norm": 0.06993318349123001, "learning_rate": 1.5122192717944102e-06, "loss": 0.2015, "step": 49040 }, { "epoch": 3.9728613091380427, "grad_norm": 0.0710805132985115, "learning_rate": 1.507718619199784e-06, "loss": 0.2146, "step": 49041 }, { "epoch": 3.972942320155541, "grad_norm": 0.08362104743719101, "learning_rate": 1.5032179666051578e-06, "loss": 0.2333, "step": 49042 }, { "epoch": 3.9730233311730396, "grad_norm": 0.07563608884811401, "learning_rate": 1.4987173140105317e-06, "loss": 0.214, "step": 49043 }, { "epoch": 3.973104342190538, "grad_norm": 0.07376503944396973, "learning_rate": 1.4942166614159055e-06, "loss": 0.1837, "step": 49044 }, { "epoch": 3.973185353208036, "grad_norm": 0.07147838920354843, "learning_rate": 1.489716008821279e-06, "loss": 0.2029, "step": 49045 }, { "epoch": 3.973266364225535, "grad_norm": 0.08322165906429291, "learning_rate": 1.4852153562266529e-06, "loss": 0.2219, "step": 49046 }, { "epoch": 3.973347375243033, "grad_norm": 0.07755421847105026, "learning_rate": 1.4807147036320267e-06, "loss": 0.2314, "step": 49047 }, { "epoch": 3.9734283862605313, "grad_norm": 0.06550732254981995, "learning_rate": 1.4762140510374005e-06, "loss": 0.218, "step": 49048 }, { "epoch": 3.97350939727803, "grad_norm": 0.062065187841653824, "learning_rate": 1.4717133984427743e-06, "loss": 0.2148, "step": 49049 }, { "epoch": 3.9735904082955282, "grad_norm": 0.06424777954816818, "learning_rate": 1.4672127458481481e-06, "loss": 0.223, "step": 49050 }, { "epoch": 3.9736714193130265, "grad_norm": 0.06383474171161652, "learning_rate": 1.462712093253522e-06, "loss": 0.216, "step": 49051 }, { "epoch": 3.973752430330525, "grad_norm": 0.07289434969425201, "learning_rate": 1.4582114406588956e-06, "loss": 0.2535, "step": 49052 }, { "epoch": 3.9738334413480234, "grad_norm": 0.08658458292484283, "learning_rate": 1.4537107880642694e-06, "loss": 0.2414, "step": 49053 }, { "epoch": 3.9739144523655217, "grad_norm": 0.050192661583423615, "learning_rate": 1.4492101354696432e-06, "loss": 0.2052, "step": 49054 }, { "epoch": 3.9739954633830203, "grad_norm": 0.061246853321790695, "learning_rate": 1.4447094828750168e-06, "loss": 0.2015, "step": 49055 }, { "epoch": 3.9740764744005186, "grad_norm": 0.08989216387271881, "learning_rate": 1.4402088302803908e-06, "loss": 0.2483, "step": 49056 }, { "epoch": 3.974157485418017, "grad_norm": 0.06548626720905304, "learning_rate": 1.4357081776857646e-06, "loss": 0.2245, "step": 49057 }, { "epoch": 3.974238496435515, "grad_norm": 0.09296084940433502, "learning_rate": 1.4312075250911382e-06, "loss": 0.211, "step": 49058 }, { "epoch": 3.9743195074530138, "grad_norm": 0.08216899633407593, "learning_rate": 1.426706872496512e-06, "loss": 0.2369, "step": 49059 }, { "epoch": 3.974400518470512, "grad_norm": 0.06864972412586212, "learning_rate": 1.4222062199018859e-06, "loss": 0.2543, "step": 49060 }, { "epoch": 3.9744815294880103, "grad_norm": 0.06113716587424278, "learning_rate": 1.4177055673072595e-06, "loss": 0.2291, "step": 49061 }, { "epoch": 3.9745625405055085, "grad_norm": 0.07123496383428574, "learning_rate": 1.4132049147126333e-06, "loss": 0.192, "step": 49062 }, { "epoch": 3.974643551523007, "grad_norm": 0.07263696938753128, "learning_rate": 1.4087042621180073e-06, "loss": 0.2148, "step": 49063 }, { "epoch": 3.9747245625405054, "grad_norm": 0.07747448235750198, "learning_rate": 1.404203609523381e-06, "loss": 0.2068, "step": 49064 }, { "epoch": 3.9748055735580037, "grad_norm": 0.06405054777860641, "learning_rate": 1.3997029569287547e-06, "loss": 0.2361, "step": 49065 }, { "epoch": 3.9748865845755024, "grad_norm": 0.08656203746795654, "learning_rate": 1.3952023043341286e-06, "loss": 0.2595, "step": 49066 }, { "epoch": 3.9749675955930006, "grad_norm": 0.06898082792758942, "learning_rate": 1.3907016517395024e-06, "loss": 0.1922, "step": 49067 }, { "epoch": 3.975048606610499, "grad_norm": 0.08081252872943878, "learning_rate": 1.386200999144876e-06, "loss": 0.2472, "step": 49068 }, { "epoch": 3.9751296176279975, "grad_norm": 0.1028057113289833, "learning_rate": 1.3817003465502498e-06, "loss": 0.2194, "step": 49069 }, { "epoch": 3.975210628645496, "grad_norm": 0.0644211396574974, "learning_rate": 1.3771996939556236e-06, "loss": 0.2284, "step": 49070 }, { "epoch": 3.975291639662994, "grad_norm": 0.07005336135625839, "learning_rate": 1.3726990413609974e-06, "loss": 0.2258, "step": 49071 }, { "epoch": 3.9753726506804927, "grad_norm": 0.07396363466978073, "learning_rate": 1.3681983887663712e-06, "loss": 0.2059, "step": 49072 }, { "epoch": 3.975453661697991, "grad_norm": 0.08447012305259705, "learning_rate": 1.363697736171745e-06, "loss": 0.2223, "step": 49073 }, { "epoch": 3.975534672715489, "grad_norm": 0.07208585739135742, "learning_rate": 1.3591970835771186e-06, "loss": 0.2292, "step": 49074 }, { "epoch": 3.975615683732988, "grad_norm": 0.07174715399742126, "learning_rate": 1.3546964309824925e-06, "loss": 0.2476, "step": 49075 }, { "epoch": 3.975696694750486, "grad_norm": 0.06257397681474686, "learning_rate": 1.3501957783878663e-06, "loss": 0.2405, "step": 49076 }, { "epoch": 3.9757777057679844, "grad_norm": 0.07049359381198883, "learning_rate": 1.34569512579324e-06, "loss": 0.2424, "step": 49077 }, { "epoch": 3.975858716785483, "grad_norm": 0.06485053151845932, "learning_rate": 1.341194473198614e-06, "loss": 0.2209, "step": 49078 }, { "epoch": 3.9759397278029813, "grad_norm": 0.07532617449760437, "learning_rate": 1.3366938206039877e-06, "loss": 0.2469, "step": 49079 }, { "epoch": 3.9760207388204796, "grad_norm": 0.07897515594959259, "learning_rate": 1.3321931680093615e-06, "loss": 0.2256, "step": 49080 }, { "epoch": 3.976101749837978, "grad_norm": 0.07902484387159348, "learning_rate": 1.3276925154147351e-06, "loss": 0.2179, "step": 49081 }, { "epoch": 3.9761827608554765, "grad_norm": 0.07536326348781586, "learning_rate": 1.323191862820109e-06, "loss": 0.2311, "step": 49082 }, { "epoch": 3.9762637718729748, "grad_norm": 0.1022820770740509, "learning_rate": 1.3186912102254828e-06, "loss": 0.2329, "step": 49083 }, { "epoch": 3.976344782890473, "grad_norm": 0.08580672740936279, "learning_rate": 1.3141905576308564e-06, "loss": 0.2162, "step": 49084 }, { "epoch": 3.9764257939079712, "grad_norm": 0.06963769346475601, "learning_rate": 1.3096899050362304e-06, "loss": 0.2087, "step": 49085 }, { "epoch": 3.97650680492547, "grad_norm": 0.06818298995494843, "learning_rate": 1.3051892524416042e-06, "loss": 0.1951, "step": 49086 }, { "epoch": 3.976587815942968, "grad_norm": 0.0778006762266159, "learning_rate": 1.3006885998469778e-06, "loss": 0.2266, "step": 49087 }, { "epoch": 3.9766688269604664, "grad_norm": 0.06969862431287766, "learning_rate": 1.2961879472523516e-06, "loss": 0.2652, "step": 49088 }, { "epoch": 3.976749837977965, "grad_norm": 0.07298214733600616, "learning_rate": 1.2916872946577254e-06, "loss": 0.2344, "step": 49089 }, { "epoch": 3.9768308489954634, "grad_norm": 0.07190950959920883, "learning_rate": 1.2871866420630993e-06, "loss": 0.2142, "step": 49090 }, { "epoch": 3.9769118600129616, "grad_norm": 0.07995373755693436, "learning_rate": 1.2826859894684729e-06, "loss": 0.1973, "step": 49091 }, { "epoch": 3.9769928710304603, "grad_norm": 0.0613616481423378, "learning_rate": 1.278185336873847e-06, "loss": 0.2093, "step": 49092 }, { "epoch": 3.9770738820479585, "grad_norm": 0.06585995107889175, "learning_rate": 1.2736846842792205e-06, "loss": 0.2344, "step": 49093 }, { "epoch": 3.9771548930654568, "grad_norm": 0.07106611132621765, "learning_rate": 1.2691840316845943e-06, "loss": 0.2236, "step": 49094 }, { "epoch": 3.9772359040829555, "grad_norm": 0.08742845058441162, "learning_rate": 1.2646833790899681e-06, "loss": 0.2795, "step": 49095 }, { "epoch": 3.9773169151004537, "grad_norm": 0.06771603226661682, "learning_rate": 1.260182726495342e-06, "loss": 0.1974, "step": 49096 }, { "epoch": 3.977397926117952, "grad_norm": 0.07327091693878174, "learning_rate": 1.2556820739007155e-06, "loss": 0.2038, "step": 49097 }, { "epoch": 3.9774789371354506, "grad_norm": 0.08291606605052948, "learning_rate": 1.2511814213060894e-06, "loss": 0.2405, "step": 49098 }, { "epoch": 3.977559948152949, "grad_norm": 0.07166020572185516, "learning_rate": 1.2466807687114634e-06, "loss": 0.2089, "step": 49099 }, { "epoch": 3.977640959170447, "grad_norm": 0.0633031502366066, "learning_rate": 1.242180116116837e-06, "loss": 0.2083, "step": 49100 }, { "epoch": 3.977721970187946, "grad_norm": 0.07655423879623413, "learning_rate": 1.2376794635222108e-06, "loss": 0.2237, "step": 49101 }, { "epoch": 3.977802981205444, "grad_norm": 0.06801167130470276, "learning_rate": 1.2331788109275846e-06, "loss": 0.2008, "step": 49102 }, { "epoch": 3.9778839922229423, "grad_norm": 0.09980975836515427, "learning_rate": 1.2286781583329582e-06, "loss": 0.2363, "step": 49103 }, { "epoch": 3.9779650032404406, "grad_norm": 0.08504997938871384, "learning_rate": 1.224177505738332e-06, "loss": 0.241, "step": 49104 }, { "epoch": 3.9780460142579392, "grad_norm": 0.07652637362480164, "learning_rate": 1.2196768531437059e-06, "loss": 0.2589, "step": 49105 }, { "epoch": 3.9781270252754375, "grad_norm": 0.06277074664831161, "learning_rate": 1.2151762005490797e-06, "loss": 0.2218, "step": 49106 }, { "epoch": 3.9782080362929357, "grad_norm": 0.06175466999411583, "learning_rate": 1.2106755479544535e-06, "loss": 0.2167, "step": 49107 }, { "epoch": 3.978289047310434, "grad_norm": 0.06797228753566742, "learning_rate": 1.2061748953598273e-06, "loss": 0.1944, "step": 49108 }, { "epoch": 3.9783700583279327, "grad_norm": 0.07684358209371567, "learning_rate": 1.2016742427652011e-06, "loss": 0.2701, "step": 49109 }, { "epoch": 3.978451069345431, "grad_norm": 0.06503838300704956, "learning_rate": 1.1971735901705747e-06, "loss": 0.1913, "step": 49110 }, { "epoch": 3.978532080362929, "grad_norm": 0.06464909762144089, "learning_rate": 1.1926729375759485e-06, "loss": 0.1783, "step": 49111 }, { "epoch": 3.978613091380428, "grad_norm": 0.09183213114738464, "learning_rate": 1.1881722849813223e-06, "loss": 0.2243, "step": 49112 }, { "epoch": 3.978694102397926, "grad_norm": 0.05835896357893944, "learning_rate": 1.183671632386696e-06, "loss": 0.1829, "step": 49113 }, { "epoch": 3.9787751134154243, "grad_norm": 0.07257760316133499, "learning_rate": 1.17917097979207e-06, "loss": 0.2254, "step": 49114 }, { "epoch": 3.978856124432923, "grad_norm": 0.06942275911569595, "learning_rate": 1.1746703271974438e-06, "loss": 0.2233, "step": 49115 }, { "epoch": 3.9789371354504213, "grad_norm": 0.0803317055106163, "learning_rate": 1.1701696746028174e-06, "loss": 0.2426, "step": 49116 }, { "epoch": 3.9790181464679195, "grad_norm": 0.08098768442869186, "learning_rate": 1.1656690220081912e-06, "loss": 0.2706, "step": 49117 }, { "epoch": 3.979099157485418, "grad_norm": 0.07547670602798462, "learning_rate": 1.161168369413565e-06, "loss": 0.2932, "step": 49118 }, { "epoch": 3.9791801685029164, "grad_norm": 0.060938771814107895, "learning_rate": 1.1566677168189388e-06, "loss": 0.2588, "step": 49119 }, { "epoch": 3.9792611795204147, "grad_norm": 0.0760703831911087, "learning_rate": 1.1521670642243124e-06, "loss": 0.2226, "step": 49120 }, { "epoch": 3.9793421905379134, "grad_norm": 0.0770978331565857, "learning_rate": 1.1476664116296865e-06, "loss": 0.2161, "step": 49121 }, { "epoch": 3.9794232015554116, "grad_norm": 0.08614175021648407, "learning_rate": 1.1431657590350603e-06, "loss": 0.2087, "step": 49122 }, { "epoch": 3.97950421257291, "grad_norm": 0.07408390194177628, "learning_rate": 1.1386651064404339e-06, "loss": 0.2756, "step": 49123 }, { "epoch": 3.9795852235904086, "grad_norm": 0.07045788317918777, "learning_rate": 1.1341644538458077e-06, "loss": 0.25, "step": 49124 }, { "epoch": 3.979666234607907, "grad_norm": 0.06843449175357819, "learning_rate": 1.1296638012511815e-06, "loss": 0.235, "step": 49125 }, { "epoch": 3.979747245625405, "grad_norm": 0.06088000163435936, "learning_rate": 1.1251631486565551e-06, "loss": 0.1662, "step": 49126 }, { "epoch": 3.9798282566429033, "grad_norm": 0.09108071774244308, "learning_rate": 1.120662496061929e-06, "loss": 0.1939, "step": 49127 }, { "epoch": 3.9799092676604015, "grad_norm": 0.08113054931163788, "learning_rate": 1.116161843467303e-06, "loss": 0.2037, "step": 49128 }, { "epoch": 3.9799902786779002, "grad_norm": 0.06638524681329727, "learning_rate": 1.1116611908726766e-06, "loss": 0.2, "step": 49129 }, { "epoch": 3.9800712896953985, "grad_norm": 0.07080232352018356, "learning_rate": 1.1071605382780504e-06, "loss": 0.2203, "step": 49130 }, { "epoch": 3.9801523007128967, "grad_norm": 0.06715063750743866, "learning_rate": 1.1026598856834242e-06, "loss": 0.2034, "step": 49131 }, { "epoch": 3.9802333117303954, "grad_norm": 0.07261967658996582, "learning_rate": 1.0981592330887978e-06, "loss": 0.2407, "step": 49132 }, { "epoch": 3.9803143227478937, "grad_norm": 0.08318926393985748, "learning_rate": 1.0936585804941716e-06, "loss": 0.1799, "step": 49133 }, { "epoch": 3.980395333765392, "grad_norm": 0.07309187203645706, "learning_rate": 1.0891579278995454e-06, "loss": 0.2382, "step": 49134 }, { "epoch": 3.9804763447828906, "grad_norm": 0.07083363831043243, "learning_rate": 1.0846572753049192e-06, "loss": 0.2248, "step": 49135 }, { "epoch": 3.980557355800389, "grad_norm": 0.07788734883069992, "learning_rate": 1.080156622710293e-06, "loss": 0.2758, "step": 49136 }, { "epoch": 3.980638366817887, "grad_norm": 0.07184889167547226, "learning_rate": 1.0756559701156669e-06, "loss": 0.2069, "step": 49137 }, { "epoch": 3.9807193778353858, "grad_norm": 0.05995340272784233, "learning_rate": 1.0711553175210407e-06, "loss": 0.1908, "step": 49138 }, { "epoch": 3.980800388852884, "grad_norm": 0.06793571263551712, "learning_rate": 1.0666546649264143e-06, "loss": 0.2183, "step": 49139 }, { "epoch": 3.9808813998703823, "grad_norm": 0.0675991103053093, "learning_rate": 1.0621540123317881e-06, "loss": 0.2088, "step": 49140 }, { "epoch": 3.980962410887881, "grad_norm": 0.06840286403894424, "learning_rate": 1.057653359737162e-06, "loss": 0.2176, "step": 49141 }, { "epoch": 3.981043421905379, "grad_norm": 0.07902107387781143, "learning_rate": 1.0531527071425355e-06, "loss": 0.2281, "step": 49142 }, { "epoch": 3.9811244329228774, "grad_norm": 0.06392716616392136, "learning_rate": 1.0486520545479096e-06, "loss": 0.2276, "step": 49143 }, { "epoch": 3.981205443940376, "grad_norm": 0.07346402853727341, "learning_rate": 1.0441514019532834e-06, "loss": 0.2349, "step": 49144 }, { "epoch": 3.9812864549578744, "grad_norm": 0.07689003646373749, "learning_rate": 1.039650749358657e-06, "loss": 0.2228, "step": 49145 }, { "epoch": 3.9813674659753726, "grad_norm": 0.0874786451458931, "learning_rate": 1.0351500967640308e-06, "loss": 0.2419, "step": 49146 }, { "epoch": 3.9814484769928713, "grad_norm": 0.07139569520950317, "learning_rate": 1.0306494441694046e-06, "loss": 0.252, "step": 49147 }, { "epoch": 3.9815294880103695, "grad_norm": 0.08377675712108612, "learning_rate": 1.0261487915747784e-06, "loss": 0.2182, "step": 49148 }, { "epoch": 3.981610499027868, "grad_norm": 0.0838305801153183, "learning_rate": 1.021648138980152e-06, "loss": 0.237, "step": 49149 }, { "epoch": 3.981691510045366, "grad_norm": 0.07349993288516998, "learning_rate": 1.017147486385526e-06, "loss": 0.2277, "step": 49150 }, { "epoch": 3.9817725210628643, "grad_norm": 0.07764997333288193, "learning_rate": 1.0126468337908999e-06, "loss": 0.2132, "step": 49151 }, { "epoch": 3.981853532080363, "grad_norm": 0.07144745439291, "learning_rate": 1.0081461811962735e-06, "loss": 0.2223, "step": 49152 }, { "epoch": 3.981934543097861, "grad_norm": 0.06616366654634476, "learning_rate": 1.0036455286016473e-06, "loss": 0.2418, "step": 49153 }, { "epoch": 3.9820155541153595, "grad_norm": 0.0774175375699997, "learning_rate": 9.99144876007021e-07, "loss": 0.2486, "step": 49154 }, { "epoch": 3.982096565132858, "grad_norm": 0.08736024051904678, "learning_rate": 9.946442234123947e-07, "loss": 0.2434, "step": 49155 }, { "epoch": 3.9821775761503564, "grad_norm": 0.06919243931770325, "learning_rate": 9.901435708177685e-07, "loss": 0.2726, "step": 49156 }, { "epoch": 3.9822585871678546, "grad_norm": 0.07379870116710663, "learning_rate": 9.856429182231425e-07, "loss": 0.2308, "step": 49157 }, { "epoch": 3.9823395981853533, "grad_norm": 0.05552654340863228, "learning_rate": 9.811422656285161e-07, "loss": 0.2211, "step": 49158 }, { "epoch": 3.9824206092028516, "grad_norm": 0.07735942304134369, "learning_rate": 9.7664161303389e-07, "loss": 0.2101, "step": 49159 }, { "epoch": 3.98250162022035, "grad_norm": 0.07008510828018188, "learning_rate": 9.721409604392638e-07, "loss": 0.2402, "step": 49160 }, { "epoch": 3.9825826312378485, "grad_norm": 0.06766650825738907, "learning_rate": 9.676403078446376e-07, "loss": 0.2189, "step": 49161 }, { "epoch": 3.9826636422553467, "grad_norm": 0.06831780821084976, "learning_rate": 9.631396552500112e-07, "loss": 0.1818, "step": 49162 }, { "epoch": 3.982744653272845, "grad_norm": 0.07361584156751633, "learning_rate": 9.58639002655385e-07, "loss": 0.2391, "step": 49163 }, { "epoch": 3.9828256642903437, "grad_norm": 0.07479403913021088, "learning_rate": 9.541383500607588e-07, "loss": 0.2105, "step": 49164 }, { "epoch": 3.982906675307842, "grad_norm": 0.07533962279558182, "learning_rate": 9.496376974661326e-07, "loss": 0.2128, "step": 49165 }, { "epoch": 3.98298768632534, "grad_norm": 0.07377752661705017, "learning_rate": 9.451370448715065e-07, "loss": 0.2785, "step": 49166 }, { "epoch": 3.983068697342839, "grad_norm": 0.06769118458032608, "learning_rate": 9.406363922768802e-07, "loss": 0.2386, "step": 49167 }, { "epoch": 3.983149708360337, "grad_norm": 0.06710786372423172, "learning_rate": 9.36135739682254e-07, "loss": 0.2021, "step": 49168 }, { "epoch": 3.9832307193778353, "grad_norm": 0.07649517804384232, "learning_rate": 9.316350870876277e-07, "loss": 0.2129, "step": 49169 }, { "epoch": 3.983311730395334, "grad_norm": 0.07384105026721954, "learning_rate": 9.271344344930015e-07, "loss": 0.206, "step": 49170 }, { "epoch": 3.9833927414128323, "grad_norm": 0.07594034075737, "learning_rate": 9.226337818983754e-07, "loss": 0.2499, "step": 49171 }, { "epoch": 3.9834737524303305, "grad_norm": 0.07467283308506012, "learning_rate": 9.181331293037491e-07, "loss": 0.236, "step": 49172 }, { "epoch": 3.9835547634478288, "grad_norm": 0.07646078616380692, "learning_rate": 9.13632476709123e-07, "loss": 0.2835, "step": 49173 }, { "epoch": 3.983635774465327, "grad_norm": 0.079698346555233, "learning_rate": 9.091318241144967e-07, "loss": 0.2269, "step": 49174 }, { "epoch": 3.9837167854828257, "grad_norm": 0.08032261580228806, "learning_rate": 9.046311715198704e-07, "loss": 0.2175, "step": 49175 }, { "epoch": 3.983797796500324, "grad_norm": 0.08225135505199432, "learning_rate": 9.001305189252442e-07, "loss": 0.2507, "step": 49176 }, { "epoch": 3.983878807517822, "grad_norm": 0.08566189557313919, "learning_rate": 8.956298663306179e-07, "loss": 0.2036, "step": 49177 }, { "epoch": 3.983959818535321, "grad_norm": 0.07229893654584885, "learning_rate": 8.911292137359917e-07, "loss": 0.2109, "step": 49178 }, { "epoch": 3.984040829552819, "grad_norm": 0.07611256837844849, "learning_rate": 8.866285611413656e-07, "loss": 0.2151, "step": 49179 }, { "epoch": 3.9841218405703174, "grad_norm": 0.06633207947015762, "learning_rate": 8.821279085467393e-07, "loss": 0.2284, "step": 49180 }, { "epoch": 3.984202851587816, "grad_norm": 0.0713014304637909, "learning_rate": 8.776272559521132e-07, "loss": 0.2267, "step": 49181 }, { "epoch": 3.9842838626053143, "grad_norm": 0.05671551451086998, "learning_rate": 8.731266033574869e-07, "loss": 0.231, "step": 49182 }, { "epoch": 3.9843648736228126, "grad_norm": 0.08908804506063461, "learning_rate": 8.686259507628606e-07, "loss": 0.243, "step": 49183 }, { "epoch": 3.9844458846403112, "grad_norm": 0.08369207382202148, "learning_rate": 8.641252981682344e-07, "loss": 0.2124, "step": 49184 }, { "epoch": 3.9845268956578095, "grad_norm": 0.075348399579525, "learning_rate": 8.596246455736081e-07, "loss": 0.2084, "step": 49185 }, { "epoch": 3.9846079066753077, "grad_norm": 0.07731541991233826, "learning_rate": 8.55123992978982e-07, "loss": 0.2089, "step": 49186 }, { "epoch": 3.9846889176928064, "grad_norm": 0.06858506798744202, "learning_rate": 8.506233403843558e-07, "loss": 0.1975, "step": 49187 }, { "epoch": 3.9847699287103047, "grad_norm": 0.06854752451181412, "learning_rate": 8.461226877897295e-07, "loss": 0.2154, "step": 49188 }, { "epoch": 3.984850939727803, "grad_norm": 0.0722111314535141, "learning_rate": 8.416220351951034e-07, "loss": 0.2499, "step": 49189 }, { "epoch": 3.9849319507453016, "grad_norm": 0.07207582145929337, "learning_rate": 8.371213826004771e-07, "loss": 0.2138, "step": 49190 }, { "epoch": 3.9850129617628, "grad_norm": 0.07529518753290176, "learning_rate": 8.326207300058509e-07, "loss": 0.2467, "step": 49191 }, { "epoch": 3.985093972780298, "grad_norm": 0.0700589045882225, "learning_rate": 8.281200774112246e-07, "loss": 0.237, "step": 49192 }, { "epoch": 3.9851749837977968, "grad_norm": 0.06596378237009048, "learning_rate": 8.236194248165985e-07, "loss": 0.19, "step": 49193 }, { "epoch": 3.985255994815295, "grad_norm": 0.058588989078998566, "learning_rate": 8.191187722219722e-07, "loss": 0.2075, "step": 49194 }, { "epoch": 3.9853370058327933, "grad_norm": 0.08515679091215134, "learning_rate": 8.14618119627346e-07, "loss": 0.2337, "step": 49195 }, { "epoch": 3.9854180168502915, "grad_norm": 0.08090581744909286, "learning_rate": 8.101174670327197e-07, "loss": 0.2216, "step": 49196 }, { "epoch": 3.9854990278677898, "grad_norm": 0.06988976150751114, "learning_rate": 8.056168144380936e-07, "loss": 0.2209, "step": 49197 }, { "epoch": 3.9855800388852884, "grad_norm": 0.08122329413890839, "learning_rate": 8.011161618434673e-07, "loss": 0.2666, "step": 49198 }, { "epoch": 3.9856610499027867, "grad_norm": 0.06371399760246277, "learning_rate": 7.966155092488411e-07, "loss": 0.2513, "step": 49199 }, { "epoch": 3.985742060920285, "grad_norm": 0.07604499161243439, "learning_rate": 7.92114856654215e-07, "loss": 0.2035, "step": 49200 }, { "epoch": 3.9858230719377836, "grad_norm": 0.07374300062656403, "learning_rate": 7.876142040595887e-07, "loss": 0.2181, "step": 49201 }, { "epoch": 3.985904082955282, "grad_norm": 0.07626064866781235, "learning_rate": 7.831135514649625e-07, "loss": 0.239, "step": 49202 }, { "epoch": 3.98598509397278, "grad_norm": 0.05903629586100578, "learning_rate": 7.786128988703362e-07, "loss": 0.2137, "step": 49203 }, { "epoch": 3.986066104990279, "grad_norm": 0.07717857509851456, "learning_rate": 7.7411224627571e-07, "loss": 0.2144, "step": 49204 }, { "epoch": 3.986147116007777, "grad_norm": 0.07685103267431259, "learning_rate": 7.696115936810838e-07, "loss": 0.2063, "step": 49205 }, { "epoch": 3.9862281270252753, "grad_norm": 0.06778260320425034, "learning_rate": 7.651109410864576e-07, "loss": 0.2426, "step": 49206 }, { "epoch": 3.986309138042774, "grad_norm": 0.06964470446109772, "learning_rate": 7.606102884918314e-07, "loss": 0.2167, "step": 49207 }, { "epoch": 3.9863901490602722, "grad_norm": 0.0588604211807251, "learning_rate": 7.561096358972051e-07, "loss": 0.2095, "step": 49208 }, { "epoch": 3.9864711600777705, "grad_norm": 0.06138395890593529, "learning_rate": 7.516089833025789e-07, "loss": 0.2115, "step": 49209 }, { "epoch": 3.986552171095269, "grad_norm": 0.07522926479578018, "learning_rate": 7.471083307079527e-07, "loss": 0.228, "step": 49210 }, { "epoch": 3.9866331821127674, "grad_norm": 0.06714662909507751, "learning_rate": 7.426076781133264e-07, "loss": 0.2457, "step": 49211 }, { "epoch": 3.9867141931302656, "grad_norm": 0.06937988847494125, "learning_rate": 7.381070255187003e-07, "loss": 0.2112, "step": 49212 }, { "epoch": 3.9867952041477643, "grad_norm": 0.06987298280000687, "learning_rate": 7.336063729240741e-07, "loss": 0.2207, "step": 49213 }, { "epoch": 3.9868762151652626, "grad_norm": 0.07640345394611359, "learning_rate": 7.291057203294478e-07, "loss": 0.2241, "step": 49214 }, { "epoch": 3.986957226182761, "grad_norm": 0.06703965365886688, "learning_rate": 7.246050677348216e-07, "loss": 0.2068, "step": 49215 }, { "epoch": 3.987038237200259, "grad_norm": 0.06497196108102798, "learning_rate": 7.201044151401954e-07, "loss": 0.2513, "step": 49216 }, { "epoch": 3.9871192482177578, "grad_norm": 0.08134781569242477, "learning_rate": 7.156037625455691e-07, "loss": 0.2198, "step": 49217 }, { "epoch": 3.987200259235256, "grad_norm": 0.07386305928230286, "learning_rate": 7.111031099509429e-07, "loss": 0.2293, "step": 49218 }, { "epoch": 3.9872812702527543, "grad_norm": 0.06610637903213501, "learning_rate": 7.066024573563166e-07, "loss": 0.2115, "step": 49219 }, { "epoch": 3.9873622812702525, "grad_norm": 0.073157899081707, "learning_rate": 7.021018047616905e-07, "loss": 0.2124, "step": 49220 }, { "epoch": 3.987443292287751, "grad_norm": 0.0644218772649765, "learning_rate": 6.976011521670643e-07, "loss": 0.2333, "step": 49221 }, { "epoch": 3.9875243033052494, "grad_norm": 0.07077537477016449, "learning_rate": 6.93100499572438e-07, "loss": 0.1877, "step": 49222 }, { "epoch": 3.9876053143227477, "grad_norm": 0.0648820549249649, "learning_rate": 6.885998469778118e-07, "loss": 0.2393, "step": 49223 }, { "epoch": 3.9876863253402464, "grad_norm": 0.06722790747880936, "learning_rate": 6.840991943831856e-07, "loss": 0.2105, "step": 49224 }, { "epoch": 3.9877673363577446, "grad_norm": 0.07644420117139816, "learning_rate": 6.795985417885593e-07, "loss": 0.2234, "step": 49225 }, { "epoch": 3.987848347375243, "grad_norm": 0.07702749967575073, "learning_rate": 6.750978891939331e-07, "loss": 0.2136, "step": 49226 }, { "epoch": 3.9879293583927415, "grad_norm": 0.06729872524738312, "learning_rate": 6.70597236599307e-07, "loss": 0.191, "step": 49227 }, { "epoch": 3.98801036941024, "grad_norm": 0.07493147253990173, "learning_rate": 6.660965840046808e-07, "loss": 0.2039, "step": 49228 }, { "epoch": 3.988091380427738, "grad_norm": 0.06625507026910782, "learning_rate": 6.615959314100545e-07, "loss": 0.2119, "step": 49229 }, { "epoch": 3.9881723914452367, "grad_norm": 0.052606746554374695, "learning_rate": 6.570952788154282e-07, "loss": 0.2399, "step": 49230 }, { "epoch": 3.988253402462735, "grad_norm": 0.06827935576438904, "learning_rate": 6.525946262208021e-07, "loss": 0.2288, "step": 49231 }, { "epoch": 3.988334413480233, "grad_norm": 0.08367155492305756, "learning_rate": 6.480939736261758e-07, "loss": 0.2511, "step": 49232 }, { "epoch": 3.988415424497732, "grad_norm": 0.08619492501020432, "learning_rate": 6.435933210315496e-07, "loss": 0.2279, "step": 49233 }, { "epoch": 3.98849643551523, "grad_norm": 0.08508004993200302, "learning_rate": 6.390926684369234e-07, "loss": 0.2145, "step": 49234 }, { "epoch": 3.9885774465327284, "grad_norm": 0.07652024179697037, "learning_rate": 6.345920158422972e-07, "loss": 0.3088, "step": 49235 }, { "epoch": 3.988658457550227, "grad_norm": 0.09197687357664108, "learning_rate": 6.30091363247671e-07, "loss": 0.2427, "step": 49236 }, { "epoch": 3.9887394685677253, "grad_norm": 0.05859651789069176, "learning_rate": 6.255907106530447e-07, "loss": 0.2136, "step": 49237 }, { "epoch": 3.9888204795852236, "grad_norm": 0.08267036080360413, "learning_rate": 6.210900580584185e-07, "loss": 0.2678, "step": 49238 }, { "epoch": 3.988901490602722, "grad_norm": 0.05907399207353592, "learning_rate": 6.165894054637923e-07, "loss": 0.2066, "step": 49239 }, { "epoch": 3.9889825016202205, "grad_norm": 0.07600349932909012, "learning_rate": 6.12088752869166e-07, "loss": 0.2264, "step": 49240 }, { "epoch": 3.9890635126377187, "grad_norm": 0.08025678247213364, "learning_rate": 6.075881002745398e-07, "loss": 0.2387, "step": 49241 }, { "epoch": 3.989144523655217, "grad_norm": 0.06902442127466202, "learning_rate": 6.030874476799137e-07, "loss": 0.2211, "step": 49242 }, { "epoch": 3.9892255346727152, "grad_norm": 0.08164741843938828, "learning_rate": 5.985867950852874e-07, "loss": 0.2501, "step": 49243 }, { "epoch": 3.989306545690214, "grad_norm": 0.058244526386260986, "learning_rate": 5.940861424906612e-07, "loss": 0.2106, "step": 49244 }, { "epoch": 3.989387556707712, "grad_norm": 0.06369130313396454, "learning_rate": 5.89585489896035e-07, "loss": 0.1867, "step": 49245 }, { "epoch": 3.9894685677252104, "grad_norm": 0.05307641252875328, "learning_rate": 5.850848373014087e-07, "loss": 0.1732, "step": 49246 }, { "epoch": 3.989549578742709, "grad_norm": 0.06525785475969315, "learning_rate": 5.805841847067825e-07, "loss": 0.2048, "step": 49247 }, { "epoch": 3.9896305897602073, "grad_norm": 0.06935491412878036, "learning_rate": 5.760835321121562e-07, "loss": 0.1843, "step": 49248 }, { "epoch": 3.9897116007777056, "grad_norm": 0.058017581701278687, "learning_rate": 5.715828795175301e-07, "loss": 0.21, "step": 49249 }, { "epoch": 3.9897926117952043, "grad_norm": 0.0743686705827713, "learning_rate": 5.670822269229039e-07, "loss": 0.2, "step": 49250 }, { "epoch": 3.9898736228127025, "grad_norm": 0.07183623313903809, "learning_rate": 5.625815743282776e-07, "loss": 0.2428, "step": 49251 }, { "epoch": 3.9899546338302008, "grad_norm": 0.06011229380965233, "learning_rate": 5.580809217336515e-07, "loss": 0.2315, "step": 49252 }, { "epoch": 3.9900356448476995, "grad_norm": 0.0707809180021286, "learning_rate": 5.535802691390252e-07, "loss": 0.2014, "step": 49253 }, { "epoch": 3.9901166558651977, "grad_norm": 0.06794430315494537, "learning_rate": 5.490796165443989e-07, "loss": 0.2359, "step": 49254 }, { "epoch": 3.990197666882696, "grad_norm": 0.07199983298778534, "learning_rate": 5.445789639497727e-07, "loss": 0.2156, "step": 49255 }, { "epoch": 3.9902786779001946, "grad_norm": 0.10244887322187424, "learning_rate": 5.400783113551465e-07, "loss": 0.2666, "step": 49256 }, { "epoch": 3.990359688917693, "grad_norm": 0.058576490730047226, "learning_rate": 5.355776587605203e-07, "loss": 0.197, "step": 49257 }, { "epoch": 3.990440699935191, "grad_norm": 0.07232537120580673, "learning_rate": 5.310770061658941e-07, "loss": 0.203, "step": 49258 }, { "epoch": 3.99052171095269, "grad_norm": 0.07895112782716751, "learning_rate": 5.265763535712678e-07, "loss": 0.1973, "step": 49259 }, { "epoch": 3.990602721970188, "grad_norm": 0.0817318707704544, "learning_rate": 5.220757009766417e-07, "loss": 0.2765, "step": 49260 }, { "epoch": 3.9906837329876863, "grad_norm": 0.06556425988674164, "learning_rate": 5.175750483820154e-07, "loss": 0.2576, "step": 49261 }, { "epoch": 3.9907647440051845, "grad_norm": 0.07194989919662476, "learning_rate": 5.130743957873892e-07, "loss": 0.2051, "step": 49262 }, { "epoch": 3.9908457550226832, "grad_norm": 0.07065384835004807, "learning_rate": 5.08573743192763e-07, "loss": 0.2483, "step": 49263 }, { "epoch": 3.9909267660401815, "grad_norm": 0.07403761893510818, "learning_rate": 5.040730905981367e-07, "loss": 0.2408, "step": 49264 }, { "epoch": 3.9910077770576797, "grad_norm": 0.06701704114675522, "learning_rate": 4.995724380035106e-07, "loss": 0.1916, "step": 49265 }, { "epoch": 3.991088788075178, "grad_norm": 0.07365620136260986, "learning_rate": 4.950717854088843e-07, "loss": 0.2207, "step": 49266 }, { "epoch": 3.9911697990926767, "grad_norm": 0.07243810594081879, "learning_rate": 4.905711328142581e-07, "loss": 0.1908, "step": 49267 }, { "epoch": 3.991250810110175, "grad_norm": 0.06687340140342712, "learning_rate": 4.860704802196319e-07, "loss": 0.2387, "step": 49268 }, { "epoch": 3.991331821127673, "grad_norm": 0.07321549952030182, "learning_rate": 4.815698276250056e-07, "loss": 0.2107, "step": 49269 }, { "epoch": 3.991412832145172, "grad_norm": 0.06943608075380325, "learning_rate": 4.770691750303794e-07, "loss": 0.2445, "step": 49270 }, { "epoch": 3.99149384316267, "grad_norm": 0.06473980844020844, "learning_rate": 4.7256852243575323e-07, "loss": 0.1774, "step": 49271 }, { "epoch": 3.9915748541801683, "grad_norm": 0.08363772183656693, "learning_rate": 4.68067869841127e-07, "loss": 0.2475, "step": 49272 }, { "epoch": 3.991655865197667, "grad_norm": 0.08146770298480988, "learning_rate": 4.6356721724650075e-07, "loss": 0.2079, "step": 49273 }, { "epoch": 3.9917368762151653, "grad_norm": 0.07280329614877701, "learning_rate": 4.5906656465187457e-07, "loss": 0.2295, "step": 49274 }, { "epoch": 3.9918178872326635, "grad_norm": 0.08392112702131271, "learning_rate": 4.5456591205724833e-07, "loss": 0.2488, "step": 49275 }, { "epoch": 3.991898898250162, "grad_norm": 0.06567014008760452, "learning_rate": 4.500652594626221e-07, "loss": 0.2126, "step": 49276 }, { "epoch": 3.9919799092676604, "grad_norm": 0.06484349817037582, "learning_rate": 4.4556460686799585e-07, "loss": 0.1919, "step": 49277 }, { "epoch": 3.9920609202851587, "grad_norm": 0.07832783460617065, "learning_rate": 4.4106395427336967e-07, "loss": 0.2157, "step": 49278 }, { "epoch": 3.9921419313026574, "grad_norm": 0.0768279954791069, "learning_rate": 4.3656330167874343e-07, "loss": 0.2463, "step": 49279 }, { "epoch": 3.9922229423201556, "grad_norm": 0.06600246578454971, "learning_rate": 4.320626490841172e-07, "loss": 0.2531, "step": 49280 }, { "epoch": 3.992303953337654, "grad_norm": 0.0632348507642746, "learning_rate": 4.27561996489491e-07, "loss": 0.2087, "step": 49281 }, { "epoch": 3.9923849643551526, "grad_norm": 0.10359801352024078, "learning_rate": 4.2306134389486477e-07, "loss": 0.1958, "step": 49282 }, { "epoch": 3.992465975372651, "grad_norm": 0.0757150650024414, "learning_rate": 4.1856069130023853e-07, "loss": 0.2425, "step": 49283 }, { "epoch": 3.992546986390149, "grad_norm": 0.07178405672311783, "learning_rate": 4.140600387056123e-07, "loss": 0.2043, "step": 49284 }, { "epoch": 3.9926279974076473, "grad_norm": 0.09844893217086792, "learning_rate": 4.095593861109861e-07, "loss": 0.2262, "step": 49285 }, { "epoch": 3.992709008425146, "grad_norm": 0.07299178838729858, "learning_rate": 4.0505873351635987e-07, "loss": 0.2082, "step": 49286 }, { "epoch": 3.9927900194426442, "grad_norm": 0.08776606619358063, "learning_rate": 4.0055808092173363e-07, "loss": 0.2229, "step": 49287 }, { "epoch": 3.9928710304601425, "grad_norm": 0.06883752346038818, "learning_rate": 3.960574283271075e-07, "loss": 0.1953, "step": 49288 }, { "epoch": 3.9929520414776407, "grad_norm": 0.06461341679096222, "learning_rate": 3.9155677573248126e-07, "loss": 0.2013, "step": 49289 }, { "epoch": 3.9930330524951394, "grad_norm": 0.058432161808013916, "learning_rate": 3.87056123137855e-07, "loss": 0.2223, "step": 49290 }, { "epoch": 3.9931140635126376, "grad_norm": 0.06838857382535934, "learning_rate": 3.825554705432288e-07, "loss": 0.2357, "step": 49291 }, { "epoch": 3.993195074530136, "grad_norm": 0.07145708054304123, "learning_rate": 3.7805481794860255e-07, "loss": 0.2288, "step": 49292 }, { "epoch": 3.9932760855476346, "grad_norm": 0.06667095422744751, "learning_rate": 3.7355416535397637e-07, "loss": 0.22, "step": 49293 }, { "epoch": 3.993357096565133, "grad_norm": 0.06883816421031952, "learning_rate": 3.6905351275935013e-07, "loss": 0.2138, "step": 49294 }, { "epoch": 3.993438107582631, "grad_norm": 0.07182978838682175, "learning_rate": 3.645528601647239e-07, "loss": 0.2382, "step": 49295 }, { "epoch": 3.9935191186001298, "grad_norm": 0.08456697314977646, "learning_rate": 3.600522075700977e-07, "loss": 0.2418, "step": 49296 }, { "epoch": 3.993600129617628, "grad_norm": 0.06518872827291489, "learning_rate": 3.5555155497547147e-07, "loss": 0.2274, "step": 49297 }, { "epoch": 3.9936811406351262, "grad_norm": 0.07191832363605499, "learning_rate": 3.5105090238084523e-07, "loss": 0.2078, "step": 49298 }, { "epoch": 3.993762151652625, "grad_norm": 0.07570493221282959, "learning_rate": 3.46550249786219e-07, "loss": 0.1972, "step": 49299 }, { "epoch": 3.993843162670123, "grad_norm": 0.0672534927725792, "learning_rate": 3.420495971915928e-07, "loss": 0.2069, "step": 49300 }, { "epoch": 3.9939241736876214, "grad_norm": 0.08297023177146912, "learning_rate": 3.3754894459696657e-07, "loss": 0.2224, "step": 49301 }, { "epoch": 3.99400518470512, "grad_norm": 0.0740421712398529, "learning_rate": 3.330482920023404e-07, "loss": 0.2122, "step": 49302 }, { "epoch": 3.9940861957226184, "grad_norm": 0.07864964753389359, "learning_rate": 3.285476394077141e-07, "loss": 0.2519, "step": 49303 }, { "epoch": 3.9941672067401166, "grad_norm": 0.07417288422584534, "learning_rate": 3.240469868130879e-07, "loss": 0.2207, "step": 49304 }, { "epoch": 3.9942482177576153, "grad_norm": 0.06941584497690201, "learning_rate": 3.195463342184617e-07, "loss": 0.2537, "step": 49305 }, { "epoch": 3.9943292287751135, "grad_norm": 0.07486558705568314, "learning_rate": 3.150456816238355e-07, "loss": 0.2112, "step": 49306 }, { "epoch": 3.994410239792612, "grad_norm": 0.06437824666500092, "learning_rate": 3.1054502902920925e-07, "loss": 0.2062, "step": 49307 }, { "epoch": 3.99449125081011, "grad_norm": 0.07233487814664841, "learning_rate": 3.06044376434583e-07, "loss": 0.2241, "step": 49308 }, { "epoch": 3.9945722618276087, "grad_norm": 0.06914579123258591, "learning_rate": 3.015437238399568e-07, "loss": 0.1937, "step": 49309 }, { "epoch": 3.994653272845107, "grad_norm": 0.07031723856925964, "learning_rate": 2.970430712453306e-07, "loss": 0.2298, "step": 49310 }, { "epoch": 3.994734283862605, "grad_norm": 0.0636644959449768, "learning_rate": 2.9254241865070435e-07, "loss": 0.2067, "step": 49311 }, { "epoch": 3.9948152948801035, "grad_norm": 0.08062580972909927, "learning_rate": 2.880417660560781e-07, "loss": 0.2336, "step": 49312 }, { "epoch": 3.994896305897602, "grad_norm": 0.07863558083772659, "learning_rate": 2.8354111346145193e-07, "loss": 0.202, "step": 49313 }, { "epoch": 3.9949773169151004, "grad_norm": 0.0746251717209816, "learning_rate": 2.7904046086682574e-07, "loss": 0.2108, "step": 49314 }, { "epoch": 3.9950583279325986, "grad_norm": 0.06599914282560349, "learning_rate": 2.7453980827219945e-07, "loss": 0.203, "step": 49315 }, { "epoch": 3.9951393389500973, "grad_norm": 0.0633784607052803, "learning_rate": 2.7003915567757327e-07, "loss": 0.214, "step": 49316 }, { "epoch": 3.9952203499675956, "grad_norm": 0.07074976712465286, "learning_rate": 2.6553850308294703e-07, "loss": 0.2186, "step": 49317 }, { "epoch": 3.995301360985094, "grad_norm": 0.08301674574613571, "learning_rate": 2.6103785048832084e-07, "loss": 0.2604, "step": 49318 }, { "epoch": 3.9953823720025925, "grad_norm": 0.07280449569225311, "learning_rate": 2.565371978936946e-07, "loss": 0.2365, "step": 49319 }, { "epoch": 3.9954633830200907, "grad_norm": 0.06467054784297943, "learning_rate": 2.5203654529906837e-07, "loss": 0.2106, "step": 49320 }, { "epoch": 3.995544394037589, "grad_norm": 0.07908143103122711, "learning_rate": 2.4753589270444213e-07, "loss": 0.2395, "step": 49321 }, { "epoch": 3.9956254050550877, "grad_norm": 0.06821177899837494, "learning_rate": 2.4303524010981594e-07, "loss": 0.2398, "step": 49322 }, { "epoch": 3.995706416072586, "grad_norm": 0.07683097571134567, "learning_rate": 2.385345875151897e-07, "loss": 0.2641, "step": 49323 }, { "epoch": 3.995787427090084, "grad_norm": 0.0672578513622284, "learning_rate": 2.340339349205635e-07, "loss": 0.201, "step": 49324 }, { "epoch": 3.995868438107583, "grad_norm": 0.10541965812444687, "learning_rate": 2.2953328232593728e-07, "loss": 0.1953, "step": 49325 }, { "epoch": 3.995949449125081, "grad_norm": 0.07959267497062683, "learning_rate": 2.2503262973131105e-07, "loss": 0.2044, "step": 49326 }, { "epoch": 3.9960304601425793, "grad_norm": 0.07636445015668869, "learning_rate": 2.2053197713668483e-07, "loss": 0.2205, "step": 49327 }, { "epoch": 3.996111471160078, "grad_norm": 0.07331163436174393, "learning_rate": 2.160313245420586e-07, "loss": 0.2267, "step": 49328 }, { "epoch": 3.9961924821775763, "grad_norm": 0.06154733523726463, "learning_rate": 2.1153067194743239e-07, "loss": 0.2145, "step": 49329 }, { "epoch": 3.9962734931950745, "grad_norm": 0.07518443465232849, "learning_rate": 2.0703001935280615e-07, "loss": 0.2381, "step": 49330 }, { "epoch": 3.9963545042125728, "grad_norm": 0.06782988458871841, "learning_rate": 2.0252936675817994e-07, "loss": 0.2327, "step": 49331 }, { "epoch": 3.9964355152300715, "grad_norm": 0.08090611547231674, "learning_rate": 1.9802871416355375e-07, "loss": 0.2294, "step": 49332 }, { "epoch": 3.9965165262475697, "grad_norm": 0.05896861106157303, "learning_rate": 1.935280615689275e-07, "loss": 0.2398, "step": 49333 }, { "epoch": 3.996597537265068, "grad_norm": 0.06596443057060242, "learning_rate": 1.8902740897430128e-07, "loss": 0.2233, "step": 49334 }, { "epoch": 3.996678548282566, "grad_norm": 0.0906447172164917, "learning_rate": 1.8452675637967506e-07, "loss": 0.25, "step": 49335 }, { "epoch": 3.996759559300065, "grad_norm": 0.07093532383441925, "learning_rate": 1.8002610378504885e-07, "loss": 0.2599, "step": 49336 }, { "epoch": 3.996840570317563, "grad_norm": 0.07621223479509354, "learning_rate": 1.7552545119042261e-07, "loss": 0.2288, "step": 49337 }, { "epoch": 3.9969215813350614, "grad_norm": 0.06504885852336884, "learning_rate": 1.710247985957964e-07, "loss": 0.1997, "step": 49338 }, { "epoch": 3.99700259235256, "grad_norm": 0.07198863476514816, "learning_rate": 1.665241460011702e-07, "loss": 0.2141, "step": 49339 }, { "epoch": 3.9970836033700583, "grad_norm": 0.06278213858604431, "learning_rate": 1.6202349340654395e-07, "loss": 0.219, "step": 49340 }, { "epoch": 3.9971646143875565, "grad_norm": 0.06734520196914673, "learning_rate": 1.5752284081191774e-07, "loss": 0.2033, "step": 49341 }, { "epoch": 3.9972456254050552, "grad_norm": 0.05727878585457802, "learning_rate": 1.530221882172915e-07, "loss": 0.2046, "step": 49342 }, { "epoch": 3.9973266364225535, "grad_norm": 0.08659632503986359, "learning_rate": 1.485215356226653e-07, "loss": 0.2383, "step": 49343 }, { "epoch": 3.9974076474400517, "grad_norm": 0.06606918573379517, "learning_rate": 1.4402088302803906e-07, "loss": 0.1683, "step": 49344 }, { "epoch": 3.9974886584575504, "grad_norm": 0.07601425051689148, "learning_rate": 1.3952023043341287e-07, "loss": 0.2367, "step": 49345 }, { "epoch": 3.9975696694750487, "grad_norm": 0.07500436156988144, "learning_rate": 1.3501957783878663e-07, "loss": 0.2605, "step": 49346 }, { "epoch": 3.997650680492547, "grad_norm": 0.07162158936262131, "learning_rate": 1.3051892524416042e-07, "loss": 0.2128, "step": 49347 }, { "epoch": 3.9977316915100456, "grad_norm": 0.07516414672136307, "learning_rate": 1.2601827264953418e-07, "loss": 0.1984, "step": 49348 }, { "epoch": 3.997812702527544, "grad_norm": 0.07357397675514221, "learning_rate": 1.2151762005490797e-07, "loss": 0.2305, "step": 49349 }, { "epoch": 3.997893713545042, "grad_norm": 0.07578955590724945, "learning_rate": 1.1701696746028175e-07, "loss": 0.2729, "step": 49350 }, { "epoch": 3.9979747245625408, "grad_norm": 0.08693145960569382, "learning_rate": 1.1251631486565552e-07, "loss": 0.2081, "step": 49351 }, { "epoch": 3.998055735580039, "grad_norm": 0.06208210065960884, "learning_rate": 1.080156622710293e-07, "loss": 0.1848, "step": 49352 }, { "epoch": 3.9981367465975373, "grad_norm": 0.06679573655128479, "learning_rate": 1.0351500967640307e-07, "loss": 0.2396, "step": 49353 }, { "epoch": 3.9982177576150355, "grad_norm": 0.07043427228927612, "learning_rate": 9.901435708177688e-08, "loss": 0.2277, "step": 49354 }, { "epoch": 3.9982987686325338, "grad_norm": 0.06167418509721756, "learning_rate": 9.451370448715064e-08, "loss": 0.2084, "step": 49355 }, { "epoch": 3.9983797796500324, "grad_norm": 0.058673106133937836, "learning_rate": 9.001305189252443e-08, "loss": 0.2388, "step": 49356 }, { "epoch": 3.9984607906675307, "grad_norm": 0.0721500888466835, "learning_rate": 8.55123992978982e-08, "loss": 0.2257, "step": 49357 }, { "epoch": 3.998541801685029, "grad_norm": 0.07805679738521576, "learning_rate": 8.101174670327198e-08, "loss": 0.285, "step": 49358 }, { "epoch": 3.9986228127025276, "grad_norm": 0.0756671130657196, "learning_rate": 7.651109410864575e-08, "loss": 0.2225, "step": 49359 }, { "epoch": 3.998703823720026, "grad_norm": 0.0644574761390686, "learning_rate": 7.201044151401953e-08, "loss": 0.2034, "step": 49360 }, { "epoch": 3.998784834737524, "grad_norm": 0.09972898662090302, "learning_rate": 6.750978891939332e-08, "loss": 0.2121, "step": 49361 }, { "epoch": 3.998865845755023, "grad_norm": 0.07944905012845993, "learning_rate": 6.300913632476709e-08, "loss": 0.2141, "step": 49362 }, { "epoch": 3.998946856772521, "grad_norm": 0.07724474370479584, "learning_rate": 5.8508483730140874e-08, "loss": 0.2399, "step": 49363 }, { "epoch": 3.9990278677900193, "grad_norm": 0.06207426264882088, "learning_rate": 5.400783113551465e-08, "loss": 0.2202, "step": 49364 }, { "epoch": 3.999108878807518, "grad_norm": 0.07973155379295349, "learning_rate": 4.950717854088844e-08, "loss": 0.2378, "step": 49365 }, { "epoch": 3.999189889825016, "grad_norm": 0.07270248979330063, "learning_rate": 4.500652594626221e-08, "loss": 0.2186, "step": 49366 }, { "epoch": 3.9992709008425145, "grad_norm": 0.06034340709447861, "learning_rate": 4.050587335163599e-08, "loss": 0.1782, "step": 49367 }, { "epoch": 3.999351911860013, "grad_norm": 0.08102718740701675, "learning_rate": 3.6005220757009764e-08, "loss": 0.2278, "step": 49368 }, { "epoch": 3.9994329228775114, "grad_norm": 0.060907378792762756, "learning_rate": 3.1504568162383546e-08, "loss": 0.2229, "step": 49369 }, { "epoch": 3.9995139338950096, "grad_norm": 0.07012410461902618, "learning_rate": 2.7003915567757325e-08, "loss": 0.2241, "step": 49370 }, { "epoch": 3.9995949449125083, "grad_norm": 0.0700206607580185, "learning_rate": 2.2503262973131107e-08, "loss": 0.2083, "step": 49371 }, { "epoch": 3.9996759559300066, "grad_norm": 0.08111918717622757, "learning_rate": 1.8002610378504882e-08, "loss": 0.2562, "step": 49372 }, { "epoch": 3.999756966947505, "grad_norm": 0.07391811162233353, "learning_rate": 1.3501957783878662e-08, "loss": 0.1937, "step": 49373 }, { "epoch": 3.9998379779650035, "grad_norm": 0.05770977959036827, "learning_rate": 9.001305189252441e-09, "loss": 0.191, "step": 49374 }, { "epoch": 3.9999189889825018, "grad_norm": 0.05946144461631775, "learning_rate": 4.5006525946262205e-09, "loss": 0.1792, "step": 49375 }, { "epoch": 4.0, "grad_norm": 0.07257899641990662, "learning_rate": 0.0, "loss": 0.2477, "step": 49376 } ], "logging_steps": 1, "max_steps": 49376, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.2393504748092785e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }