|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 364600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13713658804168952, |
|
"grad_norm": 2.6796674728393555, |
|
"learning_rate": 4.993143170597916e-05, |
|
"loss": 6.6471, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27427317608337903, |
|
"grad_norm": 2.4919307231903076, |
|
"learning_rate": 4.986286341195831e-05, |
|
"loss": 5.8655, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4114097641250686, |
|
"grad_norm": 2.4707608222961426, |
|
"learning_rate": 4.9794295117937464e-05, |
|
"loss": 5.5305, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5485463521667581, |
|
"grad_norm": 2.745155096054077, |
|
"learning_rate": 4.972572682391663e-05, |
|
"loss": 5.3124, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6856829402084477, |
|
"grad_norm": 2.7914953231811523, |
|
"learning_rate": 4.965715852989578e-05, |
|
"loss": 5.1281, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8228195282501372, |
|
"grad_norm": 2.9527249336242676, |
|
"learning_rate": 4.958859023587493e-05, |
|
"loss": 4.9858, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9599561162918266, |
|
"grad_norm": 2.5611634254455566, |
|
"learning_rate": 4.952002194185409e-05, |
|
"loss": 4.8776, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.0970927043335161, |
|
"grad_norm": 3.1537859439849854, |
|
"learning_rate": 4.9451453647833245e-05, |
|
"loss": 4.7393, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.2342292923752056, |
|
"grad_norm": 2.9716005325317383, |
|
"learning_rate": 4.9382885353812394e-05, |
|
"loss": 4.67, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.3713658804168953, |
|
"grad_norm": 3.2810921669006348, |
|
"learning_rate": 4.931431705979155e-05, |
|
"loss": 4.5978, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.5085024684585848, |
|
"grad_norm": 3.3998653888702393, |
|
"learning_rate": 4.9245748765770713e-05, |
|
"loss": 4.5642, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.6456390565002743, |
|
"grad_norm": 3.440001964569092, |
|
"learning_rate": 4.917718047174987e-05, |
|
"loss": 4.5106, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.7827756445419638, |
|
"grad_norm": 3.469703435897827, |
|
"learning_rate": 4.910861217772902e-05, |
|
"loss": 4.4556, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.9199122325836533, |
|
"grad_norm": 3.6554129123687744, |
|
"learning_rate": 4.9040043883708175e-05, |
|
"loss": 4.418, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.0570488206253428, |
|
"grad_norm": 3.7117955684661865, |
|
"learning_rate": 4.897147558968733e-05, |
|
"loss": 4.3516, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.1941854086670323, |
|
"grad_norm": 3.9816272258758545, |
|
"learning_rate": 4.890290729566648e-05, |
|
"loss": 4.2883, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.3313219967087218, |
|
"grad_norm": 3.5806946754455566, |
|
"learning_rate": 4.8834339001645644e-05, |
|
"loss": 4.2377, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.4684585847504112, |
|
"grad_norm": 3.8521053791046143, |
|
"learning_rate": 4.87657707076248e-05, |
|
"loss": 4.2398, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.6055951727921007, |
|
"grad_norm": 3.8129312992095947, |
|
"learning_rate": 4.8697202413603956e-05, |
|
"loss": 4.2187, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.7427317608337907, |
|
"grad_norm": 3.7518739700317383, |
|
"learning_rate": 4.8628634119583105e-05, |
|
"loss": 4.1825, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.8798683488754797, |
|
"grad_norm": 4.074005603790283, |
|
"learning_rate": 4.856006582556226e-05, |
|
"loss": 4.1529, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.0170049369171696, |
|
"grad_norm": 3.723891258239746, |
|
"learning_rate": 4.849149753154142e-05, |
|
"loss": 4.1431, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.154141524958859, |
|
"grad_norm": 4.242587089538574, |
|
"learning_rate": 4.842292923752057e-05, |
|
"loss": 4.027, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.2912781130005486, |
|
"grad_norm": 4.178415775299072, |
|
"learning_rate": 4.835436094349973e-05, |
|
"loss": 4.0418, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.428414701042238, |
|
"grad_norm": 4.147921085357666, |
|
"learning_rate": 4.8285792649478886e-05, |
|
"loss": 4.0158, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.5655512890839276, |
|
"grad_norm": 4.463027000427246, |
|
"learning_rate": 4.821722435545804e-05, |
|
"loss": 4.014, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.702687877125617, |
|
"grad_norm": 3.6492116451263428, |
|
"learning_rate": 4.814865606143719e-05, |
|
"loss": 3.9874, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.8398244651673066, |
|
"grad_norm": 4.560110092163086, |
|
"learning_rate": 4.808008776741635e-05, |
|
"loss": 3.99, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.976961053208996, |
|
"grad_norm": 3.869370222091675, |
|
"learning_rate": 4.8011519473395504e-05, |
|
"loss": 3.9618, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.1140976412506856, |
|
"grad_norm": 4.145757675170898, |
|
"learning_rate": 4.794295117937466e-05, |
|
"loss": 3.8752, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.2512342292923755, |
|
"grad_norm": 3.9991416931152344, |
|
"learning_rate": 4.7874382885353817e-05, |
|
"loss": 3.8646, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.3883708173340645, |
|
"grad_norm": 4.006019115447998, |
|
"learning_rate": 4.780581459133297e-05, |
|
"loss": 3.8571, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.5255074053757545, |
|
"grad_norm": 3.8029978275299072, |
|
"learning_rate": 4.773724629731213e-05, |
|
"loss": 3.8605, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.6626439934174435, |
|
"grad_norm": 4.239439010620117, |
|
"learning_rate": 4.766867800329128e-05, |
|
"loss": 3.8272, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.799780581459133, |
|
"grad_norm": 4.278761863708496, |
|
"learning_rate": 4.7600109709270434e-05, |
|
"loss": 3.8232, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.9369171695008225, |
|
"grad_norm": 4.346251964569092, |
|
"learning_rate": 4.753154141524959e-05, |
|
"loss": 3.8141, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.074053757542512, |
|
"grad_norm": 4.909966468811035, |
|
"learning_rate": 4.746297312122875e-05, |
|
"loss": 3.7563, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.2111903455842015, |
|
"grad_norm": 4.102847099304199, |
|
"learning_rate": 4.73944048272079e-05, |
|
"loss": 3.7147, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.348326933625891, |
|
"grad_norm": 4.887523174285889, |
|
"learning_rate": 4.732583653318706e-05, |
|
"loss": 3.6985, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 5.485463521667581, |
|
"grad_norm": 4.481743812561035, |
|
"learning_rate": 4.7257268239166215e-05, |
|
"loss": 3.7095, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.62260010970927, |
|
"grad_norm": 4.673679828643799, |
|
"learning_rate": 4.7188699945145365e-05, |
|
"loss": 3.7015, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 5.75973669775096, |
|
"grad_norm": 4.764498233795166, |
|
"learning_rate": 4.712013165112452e-05, |
|
"loss": 3.6906, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.896873285792649, |
|
"grad_norm": 4.535381317138672, |
|
"learning_rate": 4.705156335710368e-05, |
|
"loss": 3.6911, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.034009873834339, |
|
"grad_norm": 4.882272720336914, |
|
"learning_rate": 4.698299506308283e-05, |
|
"loss": 3.6483, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.171146461876028, |
|
"grad_norm": 4.5370354652404785, |
|
"learning_rate": 4.691442676906199e-05, |
|
"loss": 3.5739, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 6.308283049917718, |
|
"grad_norm": 4.514719486236572, |
|
"learning_rate": 4.6845858475041146e-05, |
|
"loss": 3.5716, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 6.445419637959407, |
|
"grad_norm": 5.22483491897583, |
|
"learning_rate": 4.67772901810203e-05, |
|
"loss": 3.5767, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 6.582556226001097, |
|
"grad_norm": 4.764497756958008, |
|
"learning_rate": 4.670872188699945e-05, |
|
"loss": 3.5682, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 6.719692814042786, |
|
"grad_norm": 4.6897406578063965, |
|
"learning_rate": 4.664015359297861e-05, |
|
"loss": 3.5883, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 6.856829402084476, |
|
"grad_norm": 4.739509105682373, |
|
"learning_rate": 4.6571585298957763e-05, |
|
"loss": 3.5627, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 6.993965990126165, |
|
"grad_norm": 4.331806182861328, |
|
"learning_rate": 4.650301700493692e-05, |
|
"loss": 3.5664, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 7.131102578167855, |
|
"grad_norm": 4.402791500091553, |
|
"learning_rate": 4.6434448710916076e-05, |
|
"loss": 3.4629, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 7.268239166209545, |
|
"grad_norm": 4.822177410125732, |
|
"learning_rate": 4.636588041689523e-05, |
|
"loss": 3.4411, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 7.405375754251234, |
|
"grad_norm": 4.601207733154297, |
|
"learning_rate": 4.629731212287439e-05, |
|
"loss": 3.4691, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 7.542512342292924, |
|
"grad_norm": 5.0039215087890625, |
|
"learning_rate": 4.622874382885354e-05, |
|
"loss": 3.4672, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 7.679648930334613, |
|
"grad_norm": 4.404879093170166, |
|
"learning_rate": 4.6160175534832694e-05, |
|
"loss": 3.465, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 7.816785518376303, |
|
"grad_norm": 4.750667095184326, |
|
"learning_rate": 4.609160724081185e-05, |
|
"loss": 3.4425, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 7.953922106417992, |
|
"grad_norm": 5.396721363067627, |
|
"learning_rate": 4.6023038946791006e-05, |
|
"loss": 3.4686, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 8.091058694459681, |
|
"grad_norm": 4.806807518005371, |
|
"learning_rate": 4.595447065277016e-05, |
|
"loss": 3.3741, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 8.228195282501371, |
|
"grad_norm": 4.791159629821777, |
|
"learning_rate": 4.588590235874932e-05, |
|
"loss": 3.336, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 8.365331870543061, |
|
"grad_norm": 5.24031925201416, |
|
"learning_rate": 4.5817334064728475e-05, |
|
"loss": 3.3489, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 8.502468458584751, |
|
"grad_norm": 4.839347839355469, |
|
"learning_rate": 4.5748765770707624e-05, |
|
"loss": 3.3387, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 8.63960504662644, |
|
"grad_norm": 5.201210021972656, |
|
"learning_rate": 4.568019747668678e-05, |
|
"loss": 3.3739, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 8.776741634668129, |
|
"grad_norm": 4.874946117401123, |
|
"learning_rate": 4.5611629182665936e-05, |
|
"loss": 3.3543, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 8.913878222709819, |
|
"grad_norm": 4.564042091369629, |
|
"learning_rate": 4.554306088864509e-05, |
|
"loss": 3.3685, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 9.051014810751509, |
|
"grad_norm": 5.104782581329346, |
|
"learning_rate": 4.547449259462425e-05, |
|
"loss": 3.3079, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 9.188151398793199, |
|
"grad_norm": 5.117952823638916, |
|
"learning_rate": 4.5405924300603405e-05, |
|
"loss": 3.2292, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 9.325287986834887, |
|
"grad_norm": 5.0324387550354, |
|
"learning_rate": 4.533735600658256e-05, |
|
"loss": 3.2464, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 9.462424574876577, |
|
"grad_norm": 5.019642353057861, |
|
"learning_rate": 4.526878771256171e-05, |
|
"loss": 3.2425, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 9.599561162918267, |
|
"grad_norm": 4.830804824829102, |
|
"learning_rate": 4.5200219418540867e-05, |
|
"loss": 3.257, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 9.736697750959957, |
|
"grad_norm": 4.778350830078125, |
|
"learning_rate": 4.513165112452002e-05, |
|
"loss": 3.256, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 9.873834339001645, |
|
"grad_norm": 5.261332988739014, |
|
"learning_rate": 4.506308283049918e-05, |
|
"loss": 3.2568, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 10.010970927043335, |
|
"grad_norm": 5.060239315032959, |
|
"learning_rate": 4.4994514536478335e-05, |
|
"loss": 3.2595, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 10.148107515085025, |
|
"grad_norm": 4.848392009735107, |
|
"learning_rate": 4.492594624245749e-05, |
|
"loss": 3.1429, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 10.285244103126715, |
|
"grad_norm": 5.145500183105469, |
|
"learning_rate": 4.485737794843665e-05, |
|
"loss": 3.1512, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 10.422380691168403, |
|
"grad_norm": 4.9423041343688965, |
|
"learning_rate": 4.47888096544158e-05, |
|
"loss": 3.1507, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 10.559517279210093, |
|
"grad_norm": 4.46920108795166, |
|
"learning_rate": 4.472024136039495e-05, |
|
"loss": 3.1502, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 10.696653867251783, |
|
"grad_norm": 4.90908670425415, |
|
"learning_rate": 4.465167306637411e-05, |
|
"loss": 3.1577, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 10.833790455293473, |
|
"grad_norm": 4.850174903869629, |
|
"learning_rate": 4.4583104772353265e-05, |
|
"loss": 3.1667, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 10.970927043335163, |
|
"grad_norm": 4.480921268463135, |
|
"learning_rate": 4.451453647833242e-05, |
|
"loss": 3.1676, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 11.10806363137685, |
|
"grad_norm": 4.8438801765441895, |
|
"learning_rate": 4.444596818431158e-05, |
|
"loss": 3.0481, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 11.24520021941854, |
|
"grad_norm": 5.078440189361572, |
|
"learning_rate": 4.4377399890290734e-05, |
|
"loss": 3.0403, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 11.38233680746023, |
|
"grad_norm": 4.893128395080566, |
|
"learning_rate": 4.430883159626989e-05, |
|
"loss": 3.0463, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 11.51947339550192, |
|
"grad_norm": 5.149147033691406, |
|
"learning_rate": 4.424026330224904e-05, |
|
"loss": 3.0649, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 11.656609983543609, |
|
"grad_norm": 4.947761058807373, |
|
"learning_rate": 4.4171695008228196e-05, |
|
"loss": 3.0579, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 11.793746571585299, |
|
"grad_norm": 5.356738567352295, |
|
"learning_rate": 4.410312671420735e-05, |
|
"loss": 3.0662, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 11.930883159626989, |
|
"grad_norm": 5.635279655456543, |
|
"learning_rate": 4.403455842018651e-05, |
|
"loss": 3.0744, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 12.068019747668679, |
|
"grad_norm": 5.142524242401123, |
|
"learning_rate": 4.3965990126165664e-05, |
|
"loss": 3.0006, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 12.205156335710367, |
|
"grad_norm": 4.920190334320068, |
|
"learning_rate": 4.389742183214482e-05, |
|
"loss": 2.9331, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 12.342292923752057, |
|
"grad_norm": 5.261963367462158, |
|
"learning_rate": 4.3828853538123976e-05, |
|
"loss": 2.9692, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 12.479429511793747, |
|
"grad_norm": 5.450014114379883, |
|
"learning_rate": 4.3760285244103126e-05, |
|
"loss": 2.9458, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 12.616566099835437, |
|
"grad_norm": 5.4277520179748535, |
|
"learning_rate": 4.369171695008228e-05, |
|
"loss": 2.9547, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 12.753702687877126, |
|
"grad_norm": 5.046356201171875, |
|
"learning_rate": 4.362314865606144e-05, |
|
"loss": 2.953, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 12.890839275918815, |
|
"grad_norm": 4.98581075668335, |
|
"learning_rate": 4.3554580362040594e-05, |
|
"loss": 2.9482, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 13.027975863960505, |
|
"grad_norm": 5.0181450843811035, |
|
"learning_rate": 4.348601206801975e-05, |
|
"loss": 2.9263, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 13.165112452002194, |
|
"grad_norm": 5.356304168701172, |
|
"learning_rate": 4.341744377399891e-05, |
|
"loss": 2.8199, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 13.302249040043884, |
|
"grad_norm": 5.0527825355529785, |
|
"learning_rate": 4.334887547997806e-05, |
|
"loss": 2.8353, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 13.439385628085573, |
|
"grad_norm": 5.287441253662109, |
|
"learning_rate": 4.328030718595721e-05, |
|
"loss": 2.8235, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 13.576522216127263, |
|
"grad_norm": 5.292849540710449, |
|
"learning_rate": 4.321173889193637e-05, |
|
"loss": 2.8426, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 13.713658804168952, |
|
"grad_norm": 5.380087852478027, |
|
"learning_rate": 4.3143170597915525e-05, |
|
"loss": 2.8293, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 13.850795392210642, |
|
"grad_norm": 5.534645080566406, |
|
"learning_rate": 4.307460230389468e-05, |
|
"loss": 2.8535, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 13.98793198025233, |
|
"grad_norm": 5.294557571411133, |
|
"learning_rate": 4.300603400987384e-05, |
|
"loss": 2.8432, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 14.12506856829402, |
|
"grad_norm": 5.039003849029541, |
|
"learning_rate": 4.293746571585299e-05, |
|
"loss": 2.7162, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 14.26220515633571, |
|
"grad_norm": 5.455623149871826, |
|
"learning_rate": 4.286889742183215e-05, |
|
"loss": 2.712, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 14.3993417443774, |
|
"grad_norm": 5.256813049316406, |
|
"learning_rate": 4.28003291278113e-05, |
|
"loss": 2.7246, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 14.53647833241909, |
|
"grad_norm": 5.521039962768555, |
|
"learning_rate": 4.2731760833790455e-05, |
|
"loss": 2.7471, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 14.673614920460778, |
|
"grad_norm": 5.75991153717041, |
|
"learning_rate": 4.266319253976961e-05, |
|
"loss": 2.7177, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 14.810751508502468, |
|
"grad_norm": 4.9295759201049805, |
|
"learning_rate": 4.259462424574877e-05, |
|
"loss": 2.7111, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 14.947888096544158, |
|
"grad_norm": 4.961513042449951, |
|
"learning_rate": 4.252605595172792e-05, |
|
"loss": 2.7263, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 15.085024684585848, |
|
"grad_norm": 4.933211803436279, |
|
"learning_rate": 4.245748765770708e-05, |
|
"loss": 2.6599, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 15.222161272627536, |
|
"grad_norm": 5.510207176208496, |
|
"learning_rate": 4.2388919363686236e-05, |
|
"loss": 2.6078, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 15.359297860669226, |
|
"grad_norm": 5.186633110046387, |
|
"learning_rate": 4.2320351069665385e-05, |
|
"loss": 2.6238, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 15.496434448710916, |
|
"grad_norm": 5.6987690925598145, |
|
"learning_rate": 4.225178277564454e-05, |
|
"loss": 2.6189, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 15.633571036752606, |
|
"grad_norm": 5.060766696929932, |
|
"learning_rate": 4.21832144816237e-05, |
|
"loss": 2.6261, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 15.770707624794294, |
|
"grad_norm": 5.581600666046143, |
|
"learning_rate": 4.2114646187602854e-05, |
|
"loss": 2.6096, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 15.907844212835984, |
|
"grad_norm": 5.272013187408447, |
|
"learning_rate": 4.204607789358201e-05, |
|
"loss": 2.6243, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 16.044980800877674, |
|
"grad_norm": 5.0031538009643555, |
|
"learning_rate": 4.1977509599561166e-05, |
|
"loss": 2.5654, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 16.182117388919362, |
|
"grad_norm": 5.4185872077941895, |
|
"learning_rate": 4.190894130554032e-05, |
|
"loss": 2.4769, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 16.319253976961054, |
|
"grad_norm": 5.633464336395264, |
|
"learning_rate": 4.184037301151947e-05, |
|
"loss": 2.4867, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 16.456390565002742, |
|
"grad_norm": 5.207147598266602, |
|
"learning_rate": 4.177180471749863e-05, |
|
"loss": 2.5209, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 16.593527153044434, |
|
"grad_norm": 5.337882995605469, |
|
"learning_rate": 4.170323642347779e-05, |
|
"loss": 2.5095, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 16.730663741086122, |
|
"grad_norm": 5.710779666900635, |
|
"learning_rate": 4.163466812945694e-05, |
|
"loss": 2.5256, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 16.86780032912781, |
|
"grad_norm": 4.833573818206787, |
|
"learning_rate": 4.1566099835436096e-05, |
|
"loss": 2.5221, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 17.004936917169502, |
|
"grad_norm": 4.590396404266357, |
|
"learning_rate": 4.149753154141525e-05, |
|
"loss": 2.5274, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 17.14207350521119, |
|
"grad_norm": 5.467580318450928, |
|
"learning_rate": 4.142896324739441e-05, |
|
"loss": 2.3651, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 17.27921009325288, |
|
"grad_norm": 5.374948024749756, |
|
"learning_rate": 4.136039495337356e-05, |
|
"loss": 2.3904, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 17.41634668129457, |
|
"grad_norm": 5.345193386077881, |
|
"learning_rate": 4.1291826659352714e-05, |
|
"loss": 2.4162, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 17.553483269336258, |
|
"grad_norm": 5.317601680755615, |
|
"learning_rate": 4.122325836533188e-05, |
|
"loss": 2.4228, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 17.69061985737795, |
|
"grad_norm": 5.649726390838623, |
|
"learning_rate": 4.1154690071311026e-05, |
|
"loss": 2.398, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 17.827756445419638, |
|
"grad_norm": 4.870903015136719, |
|
"learning_rate": 4.108612177729018e-05, |
|
"loss": 2.4126, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 17.964893033461326, |
|
"grad_norm": 5.537862300872803, |
|
"learning_rate": 4.101755348326934e-05, |
|
"loss": 2.4315, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 18.102029621503018, |
|
"grad_norm": 5.414814472198486, |
|
"learning_rate": 4.0948985189248495e-05, |
|
"loss": 2.3059, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 18.239166209544706, |
|
"grad_norm": 5.167638301849365, |
|
"learning_rate": 4.0880416895227644e-05, |
|
"loss": 2.2847, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 18.376302797586398, |
|
"grad_norm": 5.151243209838867, |
|
"learning_rate": 4.08118486012068e-05, |
|
"loss": 2.2914, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 18.513439385628086, |
|
"grad_norm": 5.785707473754883, |
|
"learning_rate": 4.0743280307185963e-05, |
|
"loss": 2.3047, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 18.650575973669774, |
|
"grad_norm": 4.904608249664307, |
|
"learning_rate": 4.067471201316512e-05, |
|
"loss": 2.3021, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 18.787712561711466, |
|
"grad_norm": 5.454782009124756, |
|
"learning_rate": 4.060614371914427e-05, |
|
"loss": 2.3305, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 18.924849149753154, |
|
"grad_norm": 5.2010650634765625, |
|
"learning_rate": 4.0537575425123425e-05, |
|
"loss": 2.3182, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 19.061985737794842, |
|
"grad_norm": 5.094666481018066, |
|
"learning_rate": 4.046900713110258e-05, |
|
"loss": 2.2601, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 19.199122325836534, |
|
"grad_norm": 5.217191696166992, |
|
"learning_rate": 4.040043883708173e-05, |
|
"loss": 2.1853, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 19.336258913878222, |
|
"grad_norm": 5.011998653411865, |
|
"learning_rate": 4.033187054306089e-05, |
|
"loss": 2.1981, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 19.473395501919914, |
|
"grad_norm": 5.134762287139893, |
|
"learning_rate": 4.026330224904005e-05, |
|
"loss": 2.2176, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 19.610532089961602, |
|
"grad_norm": 5.362982273101807, |
|
"learning_rate": 4.0194733955019206e-05, |
|
"loss": 2.2141, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 19.74766867800329, |
|
"grad_norm": 5.136562347412109, |
|
"learning_rate": 4.0126165660998355e-05, |
|
"loss": 2.2004, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 19.88480526604498, |
|
"grad_norm": 5.2206220626831055, |
|
"learning_rate": 4.005759736697751e-05, |
|
"loss": 2.2177, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 20.02194185408667, |
|
"grad_norm": 5.294692516326904, |
|
"learning_rate": 3.998902907295667e-05, |
|
"loss": 2.1987, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 20.15907844212836, |
|
"grad_norm": 5.572756767272949, |
|
"learning_rate": 3.992046077893582e-05, |
|
"loss": 2.0699, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 20.29621503017005, |
|
"grad_norm": 5.801488876342773, |
|
"learning_rate": 3.985189248491497e-05, |
|
"loss": 2.0924, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 20.433351618211738, |
|
"grad_norm": 5.149176120758057, |
|
"learning_rate": 3.9783324190894136e-05, |
|
"loss": 2.114, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 20.57048820625343, |
|
"grad_norm": 5.202007293701172, |
|
"learning_rate": 3.971475589687329e-05, |
|
"loss": 2.1022, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 20.707624794295118, |
|
"grad_norm": 4.496254920959473, |
|
"learning_rate": 3.964618760285244e-05, |
|
"loss": 2.1327, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 20.844761382336806, |
|
"grad_norm": 5.123493194580078, |
|
"learning_rate": 3.95776193088316e-05, |
|
"loss": 2.1265, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 20.981897970378498, |
|
"grad_norm": 5.082859516143799, |
|
"learning_rate": 3.9509051014810754e-05, |
|
"loss": 2.1341, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 21.119034558420186, |
|
"grad_norm": 4.651580810546875, |
|
"learning_rate": 3.9440482720789904e-05, |
|
"loss": 2.0016, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 21.256171146461877, |
|
"grad_norm": 5.409528732299805, |
|
"learning_rate": 3.9371914426769066e-05, |
|
"loss": 2.0007, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 21.393307734503566, |
|
"grad_norm": 5.502586841583252, |
|
"learning_rate": 3.930334613274822e-05, |
|
"loss": 2.0057, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 21.530444322545254, |
|
"grad_norm": 5.030213356018066, |
|
"learning_rate": 3.923477783872738e-05, |
|
"loss": 2.0159, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 21.667580910586945, |
|
"grad_norm": 4.999740123748779, |
|
"learning_rate": 3.916620954470653e-05, |
|
"loss": 2.0237, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 21.804717498628634, |
|
"grad_norm": 5.182149887084961, |
|
"learning_rate": 3.9097641250685684e-05, |
|
"loss": 2.0275, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 21.941854086670325, |
|
"grad_norm": 5.282116889953613, |
|
"learning_rate": 3.902907295666484e-05, |
|
"loss": 2.0473, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 22.078990674712013, |
|
"grad_norm": 4.748703956604004, |
|
"learning_rate": 3.896050466264399e-05, |
|
"loss": 1.9621, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 22.2161272627537, |
|
"grad_norm": 4.832570552825928, |
|
"learning_rate": 3.889193636862315e-05, |
|
"loss": 1.9008, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 22.353263850795393, |
|
"grad_norm": 5.199923992156982, |
|
"learning_rate": 3.882336807460231e-05, |
|
"loss": 1.9096, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 22.49040043883708, |
|
"grad_norm": 5.1267499923706055, |
|
"learning_rate": 3.8754799780581465e-05, |
|
"loss": 1.9247, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 22.62753702687877, |
|
"grad_norm": 4.7476606369018555, |
|
"learning_rate": 3.8686231486560615e-05, |
|
"loss": 1.9216, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 22.76467361492046, |
|
"grad_norm": 5.416210174560547, |
|
"learning_rate": 3.861766319253977e-05, |
|
"loss": 1.9493, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 22.90181020296215, |
|
"grad_norm": 5.211349010467529, |
|
"learning_rate": 3.854909489851893e-05, |
|
"loss": 1.9391, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 23.03894679100384, |
|
"grad_norm": 5.296257495880127, |
|
"learning_rate": 3.8480526604498076e-05, |
|
"loss": 1.9047, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 23.17608337904553, |
|
"grad_norm": 5.259824752807617, |
|
"learning_rate": 3.841195831047724e-05, |
|
"loss": 1.803, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 23.313219967087218, |
|
"grad_norm": 4.756730079650879, |
|
"learning_rate": 3.8343390016456395e-05, |
|
"loss": 1.8172, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 23.45035655512891, |
|
"grad_norm": 5.009732723236084, |
|
"learning_rate": 3.827482172243555e-05, |
|
"loss": 1.8314, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 23.587493143170597, |
|
"grad_norm": 5.3414082527160645, |
|
"learning_rate": 3.82062534284147e-05, |
|
"loss": 1.8489, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 23.72462973121229, |
|
"grad_norm": 4.76619815826416, |
|
"learning_rate": 3.813768513439386e-05, |
|
"loss": 1.8542, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 23.861766319253977, |
|
"grad_norm": 5.249925136566162, |
|
"learning_rate": 3.806911684037301e-05, |
|
"loss": 1.8639, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 23.998902907295665, |
|
"grad_norm": 4.97225284576416, |
|
"learning_rate": 3.800054854635216e-05, |
|
"loss": 1.8802, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 24.136039495337357, |
|
"grad_norm": 5.291701793670654, |
|
"learning_rate": 3.7931980252331326e-05, |
|
"loss": 1.7144, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 24.273176083379045, |
|
"grad_norm": 5.1743340492248535, |
|
"learning_rate": 3.786341195831048e-05, |
|
"loss": 1.7312, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 24.410312671420733, |
|
"grad_norm": 5.3917646408081055, |
|
"learning_rate": 3.779484366428964e-05, |
|
"loss": 1.7488, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 24.547449259462425, |
|
"grad_norm": 4.806937217712402, |
|
"learning_rate": 3.772627537026879e-05, |
|
"loss": 1.7637, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 24.684585847504113, |
|
"grad_norm": 5.0730156898498535, |
|
"learning_rate": 3.7657707076247944e-05, |
|
"loss": 1.7668, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 24.821722435545805, |
|
"grad_norm": 4.786214828491211, |
|
"learning_rate": 3.75891387822271e-05, |
|
"loss": 1.7769, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 24.958859023587493, |
|
"grad_norm": 5.39318323135376, |
|
"learning_rate": 3.752057048820625e-05, |
|
"loss": 1.7903, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 25.09599561162918, |
|
"grad_norm": 4.981703281402588, |
|
"learning_rate": 3.745200219418541e-05, |
|
"loss": 1.6718, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 25.233132199670873, |
|
"grad_norm": 4.901900291442871, |
|
"learning_rate": 3.738343390016457e-05, |
|
"loss": 1.6542, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 25.37026878771256, |
|
"grad_norm": 5.158128261566162, |
|
"learning_rate": 3.7314865606143724e-05, |
|
"loss": 1.6573, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 25.507405375754253, |
|
"grad_norm": 4.649386882781982, |
|
"learning_rate": 3.7246297312122874e-05, |
|
"loss": 1.6773, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 25.64454196379594, |
|
"grad_norm": 4.9402666091918945, |
|
"learning_rate": 3.717772901810203e-05, |
|
"loss": 1.687, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 25.78167855183763, |
|
"grad_norm": 5.1116180419921875, |
|
"learning_rate": 3.7109160724081186e-05, |
|
"loss": 1.701, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 25.91881513987932, |
|
"grad_norm": 5.389803886413574, |
|
"learning_rate": 3.704059243006034e-05, |
|
"loss": 1.7042, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 26.05595172792101, |
|
"grad_norm": 5.371042251586914, |
|
"learning_rate": 3.69720241360395e-05, |
|
"loss": 1.637, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 26.193088315962697, |
|
"grad_norm": 5.292448997497559, |
|
"learning_rate": 3.6903455842018655e-05, |
|
"loss": 1.5579, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 26.33022490400439, |
|
"grad_norm": 5.034709453582764, |
|
"learning_rate": 3.683488754799781e-05, |
|
"loss": 1.5781, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 26.467361492046077, |
|
"grad_norm": 4.979785919189453, |
|
"learning_rate": 3.676631925397696e-05, |
|
"loss": 1.6006, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 26.60449808008777, |
|
"grad_norm": 4.940494537353516, |
|
"learning_rate": 3.6697750959956116e-05, |
|
"loss": 1.6032, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 26.741634668129457, |
|
"grad_norm": 5.339479923248291, |
|
"learning_rate": 3.662918266593527e-05, |
|
"loss": 1.6248, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 26.878771256171145, |
|
"grad_norm": 5.139049530029297, |
|
"learning_rate": 3.656061437191443e-05, |
|
"loss": 1.6189, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 27.015907844212837, |
|
"grad_norm": 4.733531951904297, |
|
"learning_rate": 3.6492046077893585e-05, |
|
"loss": 1.6215, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 27.153044432254525, |
|
"grad_norm": 5.294017791748047, |
|
"learning_rate": 3.642347778387274e-05, |
|
"loss": 1.4842, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 27.290181020296217, |
|
"grad_norm": 5.071205139160156, |
|
"learning_rate": 3.63549094898519e-05, |
|
"loss": 1.5071, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 27.427317608337905, |
|
"grad_norm": 5.08548641204834, |
|
"learning_rate": 3.628634119583105e-05, |
|
"loss": 1.5179, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 27.564454196379593, |
|
"grad_norm": 5.183330059051514, |
|
"learning_rate": 3.62177729018102e-05, |
|
"loss": 1.5282, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 27.701590784421285, |
|
"grad_norm": 4.851142406463623, |
|
"learning_rate": 3.614920460778936e-05, |
|
"loss": 1.5419, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 27.838727372462973, |
|
"grad_norm": 4.878331661224365, |
|
"learning_rate": 3.6080636313768515e-05, |
|
"loss": 1.5537, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 27.97586396050466, |
|
"grad_norm": 5.406539440155029, |
|
"learning_rate": 3.601206801974767e-05, |
|
"loss": 1.5562, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 28.113000548546353, |
|
"grad_norm": 5.543664455413818, |
|
"learning_rate": 3.594349972572683e-05, |
|
"loss": 1.4368, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 28.25013713658804, |
|
"grad_norm": 5.570579528808594, |
|
"learning_rate": 3.5874931431705984e-05, |
|
"loss": 1.4252, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 28.387273724629733, |
|
"grad_norm": 4.777440547943115, |
|
"learning_rate": 3.580636313768513e-05, |
|
"loss": 1.4427, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 28.52441031267142, |
|
"grad_norm": 4.820840835571289, |
|
"learning_rate": 3.573779484366429e-05, |
|
"loss": 1.4574, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 28.66154690071311, |
|
"grad_norm": 4.499929904937744, |
|
"learning_rate": 3.5669226549643445e-05, |
|
"loss": 1.4651, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 28.7986834887548, |
|
"grad_norm": 4.876035213470459, |
|
"learning_rate": 3.56006582556226e-05, |
|
"loss": 1.4721, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 28.93582007679649, |
|
"grad_norm": 5.974823951721191, |
|
"learning_rate": 3.553208996160176e-05, |
|
"loss": 1.4854, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 29.07295666483818, |
|
"grad_norm": 5.119105815887451, |
|
"learning_rate": 3.5463521667580914e-05, |
|
"loss": 1.4078, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 29.21009325287987, |
|
"grad_norm": 4.832869052886963, |
|
"learning_rate": 3.539495337356007e-05, |
|
"loss": 1.3521, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 29.347229840921557, |
|
"grad_norm": 5.020029544830322, |
|
"learning_rate": 3.532638507953922e-05, |
|
"loss": 1.3673, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 29.48436642896325, |
|
"grad_norm": 5.573171615600586, |
|
"learning_rate": 3.5257816785518376e-05, |
|
"loss": 1.3926, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 29.621503017004937, |
|
"grad_norm": 5.574306488037109, |
|
"learning_rate": 3.518924849149753e-05, |
|
"loss": 1.4032, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 29.758639605046625, |
|
"grad_norm": 5.316165924072266, |
|
"learning_rate": 3.512068019747669e-05, |
|
"loss": 1.4039, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 29.895776193088317, |
|
"grad_norm": 5.210799217224121, |
|
"learning_rate": 3.5052111903455844e-05, |
|
"loss": 1.4099, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 30.032912781130005, |
|
"grad_norm": 4.973813056945801, |
|
"learning_rate": 3.4983543609435e-05, |
|
"loss": 1.3881, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 30.170049369171696, |
|
"grad_norm": 4.447306156158447, |
|
"learning_rate": 3.4914975315414157e-05, |
|
"loss": 1.2853, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 30.307185957213385, |
|
"grad_norm": 5.200187683105469, |
|
"learning_rate": 3.4846407021393306e-05, |
|
"loss": 1.2992, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 30.444322545255073, |
|
"grad_norm": 5.067360877990723, |
|
"learning_rate": 3.477783872737246e-05, |
|
"loss": 1.3248, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 30.581459133296764, |
|
"grad_norm": 5.2174391746521, |
|
"learning_rate": 3.470927043335162e-05, |
|
"loss": 1.326, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 30.718595721338453, |
|
"grad_norm": 5.6121392250061035, |
|
"learning_rate": 3.4640702139330774e-05, |
|
"loss": 1.3509, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 30.855732309380144, |
|
"grad_norm": 5.090517997741699, |
|
"learning_rate": 3.457213384530993e-05, |
|
"loss": 1.3437, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 30.992868897421832, |
|
"grad_norm": 4.977377414703369, |
|
"learning_rate": 3.450356555128909e-05, |
|
"loss": 1.3577, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 31.13000548546352, |
|
"grad_norm": 5.1490478515625, |
|
"learning_rate": 3.443499725726824e-05, |
|
"loss": 1.2348, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 31.267142073505212, |
|
"grad_norm": 4.903263092041016, |
|
"learning_rate": 3.436642896324739e-05, |
|
"loss": 1.2272, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 31.4042786615469, |
|
"grad_norm": 5.068541049957275, |
|
"learning_rate": 3.429786066922655e-05, |
|
"loss": 1.2484, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 31.54141524958859, |
|
"grad_norm": 5.064205169677734, |
|
"learning_rate": 3.4229292375205705e-05, |
|
"loss": 1.2704, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 31.67855183763028, |
|
"grad_norm": 5.463748455047607, |
|
"learning_rate": 3.416072408118486e-05, |
|
"loss": 1.2753, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 31.81568842567197, |
|
"grad_norm": 4.637465476989746, |
|
"learning_rate": 3.409215578716402e-05, |
|
"loss": 1.2875, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 31.95282501371366, |
|
"grad_norm": 4.767406463623047, |
|
"learning_rate": 3.402358749314317e-05, |
|
"loss": 1.2919, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 32.08996160175535, |
|
"grad_norm": 4.907227993011475, |
|
"learning_rate": 3.395501919912233e-05, |
|
"loss": 1.2129, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 32.22709818979704, |
|
"grad_norm": 4.724886417388916, |
|
"learning_rate": 3.388645090510148e-05, |
|
"loss": 1.1743, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 32.364234777838725, |
|
"grad_norm": 5.002569198608398, |
|
"learning_rate": 3.3817882611080635e-05, |
|
"loss": 1.1865, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 32.501371365880416, |
|
"grad_norm": 4.655109405517578, |
|
"learning_rate": 3.374931431705979e-05, |
|
"loss": 1.2088, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 32.63850795392211, |
|
"grad_norm": 5.301872730255127, |
|
"learning_rate": 3.368074602303895e-05, |
|
"loss": 1.2133, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 32.77564454196379, |
|
"grad_norm": 5.0290846824646, |
|
"learning_rate": 3.3612177729018103e-05, |
|
"loss": 1.224, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 32.912781130005484, |
|
"grad_norm": 5.073773384094238, |
|
"learning_rate": 3.354360943499726e-05, |
|
"loss": 1.2292, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 33.049917718047176, |
|
"grad_norm": 5.129011631011963, |
|
"learning_rate": 3.3475041140976416e-05, |
|
"loss": 1.1887, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 33.18705430608887, |
|
"grad_norm": 5.234120845794678, |
|
"learning_rate": 3.340647284695557e-05, |
|
"loss": 1.1145, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 33.32419089413055, |
|
"grad_norm": 5.61316442489624, |
|
"learning_rate": 3.333790455293472e-05, |
|
"loss": 1.1246, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 33.461327482172244, |
|
"grad_norm": 5.373575210571289, |
|
"learning_rate": 3.326933625891388e-05, |
|
"loss": 1.1392, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 33.598464070213936, |
|
"grad_norm": 5.573062419891357, |
|
"learning_rate": 3.3200767964893034e-05, |
|
"loss": 1.1565, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 33.73560065825562, |
|
"grad_norm": 5.016828536987305, |
|
"learning_rate": 3.313219967087219e-05, |
|
"loss": 1.1566, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 33.87273724629731, |
|
"grad_norm": 5.493660926818848, |
|
"learning_rate": 3.3063631376851346e-05, |
|
"loss": 1.1775, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 34.009873834339004, |
|
"grad_norm": 4.961755275726318, |
|
"learning_rate": 3.29950630828305e-05, |
|
"loss": 1.182, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 34.14701042238069, |
|
"grad_norm": 5.307010173797607, |
|
"learning_rate": 3.292649478880966e-05, |
|
"loss": 1.0434, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 34.28414701042238, |
|
"grad_norm": 5.011436462402344, |
|
"learning_rate": 3.285792649478881e-05, |
|
"loss": 1.0732, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 34.42128359846407, |
|
"grad_norm": 5.172646522521973, |
|
"learning_rate": 3.2789358200767964e-05, |
|
"loss": 1.0886, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 34.55842018650576, |
|
"grad_norm": 5.302252769470215, |
|
"learning_rate": 3.272078990674713e-05, |
|
"loss": 1.0978, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 34.69555677454745, |
|
"grad_norm": 5.635678768157959, |
|
"learning_rate": 3.2652221612726276e-05, |
|
"loss": 1.1098, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 34.83269336258914, |
|
"grad_norm": 5.217731475830078, |
|
"learning_rate": 3.258365331870543e-05, |
|
"loss": 1.1163, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 34.96982995063083, |
|
"grad_norm": 5.012636661529541, |
|
"learning_rate": 3.251508502468459e-05, |
|
"loss": 1.1212, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 35.106966538672516, |
|
"grad_norm": 4.536286354064941, |
|
"learning_rate": 3.2446516730663745e-05, |
|
"loss": 1.0247, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 35.24410312671421, |
|
"grad_norm": 5.208780288696289, |
|
"learning_rate": 3.2377948436642894e-05, |
|
"loss": 1.0127, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 35.3812397147559, |
|
"grad_norm": 5.084893226623535, |
|
"learning_rate": 3.230938014262205e-05, |
|
"loss": 1.0279, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 35.518376302797584, |
|
"grad_norm": 4.847336769104004, |
|
"learning_rate": 3.224081184860121e-05, |
|
"loss": 1.0429, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 35.655512890839276, |
|
"grad_norm": 5.661252021789551, |
|
"learning_rate": 3.217224355458036e-05, |
|
"loss": 1.0581, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 35.79264947888097, |
|
"grad_norm": 5.379410743713379, |
|
"learning_rate": 3.210367526055952e-05, |
|
"loss": 1.0666, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 35.92978606692265, |
|
"grad_norm": 5.579956531524658, |
|
"learning_rate": 3.2035106966538675e-05, |
|
"loss": 1.0739, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 36.066922654964344, |
|
"grad_norm": 5.134979248046875, |
|
"learning_rate": 3.196653867251783e-05, |
|
"loss": 1.01, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 36.204059243006036, |
|
"grad_norm": 5.556998252868652, |
|
"learning_rate": 3.189797037849698e-05, |
|
"loss": 0.9573, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 36.34119583104772, |
|
"grad_norm": 5.259885787963867, |
|
"learning_rate": 3.182940208447614e-05, |
|
"loss": 0.9718, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 36.47833241908941, |
|
"grad_norm": 5.2222208976745605, |
|
"learning_rate": 3.17608337904553e-05, |
|
"loss": 0.9924, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 36.615469007131104, |
|
"grad_norm": 5.009335041046143, |
|
"learning_rate": 3.169226549643445e-05, |
|
"loss": 0.9983, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 36.752605595172795, |
|
"grad_norm": 4.928483009338379, |
|
"learning_rate": 3.1623697202413605e-05, |
|
"loss": 1.0232, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 36.88974218321448, |
|
"grad_norm": 5.5725226402282715, |
|
"learning_rate": 3.155512890839276e-05, |
|
"loss": 1.0157, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 37.02687877125617, |
|
"grad_norm": 5.25609827041626, |
|
"learning_rate": 3.148656061437192e-05, |
|
"loss": 1.0, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 37.16401535929786, |
|
"grad_norm": 5.325344085693359, |
|
"learning_rate": 3.141799232035107e-05, |
|
"loss": 0.9007, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 37.30115194733955, |
|
"grad_norm": 5.14201021194458, |
|
"learning_rate": 3.134942402633022e-05, |
|
"loss": 0.9298, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 37.43828853538124, |
|
"grad_norm": 5.08565092086792, |
|
"learning_rate": 3.1280855732309386e-05, |
|
"loss": 0.941, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 37.57542512342293, |
|
"grad_norm": 5.582076549530029, |
|
"learning_rate": 3.1212287438288536e-05, |
|
"loss": 0.9441, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 37.712561711464616, |
|
"grad_norm": 5.214138031005859, |
|
"learning_rate": 3.114371914426769e-05, |
|
"loss": 0.9642, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 37.84969829950631, |
|
"grad_norm": 5.962718963623047, |
|
"learning_rate": 3.107515085024685e-05, |
|
"loss": 0.9764, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 37.986834887548, |
|
"grad_norm": 5.05949592590332, |
|
"learning_rate": 3.1006582556226004e-05, |
|
"loss": 0.9785, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 38.123971475589684, |
|
"grad_norm": 5.427227020263672, |
|
"learning_rate": 3.0938014262205153e-05, |
|
"loss": 0.8783, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 38.261108063631376, |
|
"grad_norm": 5.215878009796143, |
|
"learning_rate": 3.086944596818431e-05, |
|
"loss": 0.8717, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 38.39824465167307, |
|
"grad_norm": 5.433798789978027, |
|
"learning_rate": 3.080087767416347e-05, |
|
"loss": 0.8879, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 38.53538123971476, |
|
"grad_norm": 5.417360782623291, |
|
"learning_rate": 3.073230938014262e-05, |
|
"loss": 0.9018, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 38.672517827756444, |
|
"grad_norm": 5.553948879241943, |
|
"learning_rate": 3.066374108612178e-05, |
|
"loss": 0.9187, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 38.809654415798136, |
|
"grad_norm": 5.142756938934326, |
|
"learning_rate": 3.0595172792100934e-05, |
|
"loss": 0.9246, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 38.94679100383983, |
|
"grad_norm": 5.797046184539795, |
|
"learning_rate": 3.052660449808009e-05, |
|
"loss": 0.9304, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 39.08392759188151, |
|
"grad_norm": 4.4715986251831055, |
|
"learning_rate": 3.0458036204059243e-05, |
|
"loss": 0.859, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 39.221064179923204, |
|
"grad_norm": 4.92647123336792, |
|
"learning_rate": 3.03894679100384e-05, |
|
"loss": 0.8293, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 39.358200767964895, |
|
"grad_norm": 5.064645767211914, |
|
"learning_rate": 3.0320899616017556e-05, |
|
"loss": 0.8431, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 39.49533735600658, |
|
"grad_norm": 5.243420600891113, |
|
"learning_rate": 3.025233132199671e-05, |
|
"loss": 0.8615, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 39.63247394404827, |
|
"grad_norm": 6.133671760559082, |
|
"learning_rate": 3.0183763027975865e-05, |
|
"loss": 0.8714, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 39.76961053208996, |
|
"grad_norm": 5.261296272277832, |
|
"learning_rate": 3.011519473395502e-05, |
|
"loss": 0.8801, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 39.90674712013165, |
|
"grad_norm": 5.25457239151001, |
|
"learning_rate": 3.0046626439934177e-05, |
|
"loss": 0.8869, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 40.04388370817334, |
|
"grad_norm": 5.886989116668701, |
|
"learning_rate": 2.997805814591333e-05, |
|
"loss": 0.8569, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 40.18102029621503, |
|
"grad_norm": 4.354552745819092, |
|
"learning_rate": 2.9909489851892486e-05, |
|
"loss": 0.7847, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 40.31815688425672, |
|
"grad_norm": 5.114023208618164, |
|
"learning_rate": 2.9840921557871642e-05, |
|
"loss": 0.8053, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 40.45529347229841, |
|
"grad_norm": 5.665450572967529, |
|
"learning_rate": 2.9772353263850798e-05, |
|
"loss": 0.8053, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 40.5924300603401, |
|
"grad_norm": 4.803800106048584, |
|
"learning_rate": 2.970378496982995e-05, |
|
"loss": 0.8227, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 40.72956664838179, |
|
"grad_norm": 5.80670690536499, |
|
"learning_rate": 2.9635216675809107e-05, |
|
"loss": 0.8375, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 40.866703236423476, |
|
"grad_norm": 5.025584697723389, |
|
"learning_rate": 2.9566648381788263e-05, |
|
"loss": 0.8358, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 41.00383982446517, |
|
"grad_norm": 4.726833343505859, |
|
"learning_rate": 2.9498080087767416e-05, |
|
"loss": 0.8472, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 41.14097641250686, |
|
"grad_norm": 5.068787097930908, |
|
"learning_rate": 2.9429511793746572e-05, |
|
"loss": 0.7477, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 41.278113000548544, |
|
"grad_norm": 4.701972484588623, |
|
"learning_rate": 2.936094349972573e-05, |
|
"loss": 0.7578, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 41.415249588590235, |
|
"grad_norm": 4.899438858032227, |
|
"learning_rate": 2.9292375205704885e-05, |
|
"loss": 0.7685, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 41.55238617663193, |
|
"grad_norm": 5.0015482902526855, |
|
"learning_rate": 2.9223806911684037e-05, |
|
"loss": 0.7812, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 41.68952276467361, |
|
"grad_norm": 4.952108860015869, |
|
"learning_rate": 2.9155238617663194e-05, |
|
"loss": 0.7886, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 41.8266593527153, |
|
"grad_norm": 5.88131046295166, |
|
"learning_rate": 2.908667032364235e-05, |
|
"loss": 0.7972, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 41.963795940756995, |
|
"grad_norm": 5.144876956939697, |
|
"learning_rate": 2.9018102029621502e-05, |
|
"loss": 0.8097, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 42.10093252879869, |
|
"grad_norm": 5.848343849182129, |
|
"learning_rate": 2.894953373560066e-05, |
|
"loss": 0.7389, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 42.23806911684037, |
|
"grad_norm": 5.04640007019043, |
|
"learning_rate": 2.8880965441579815e-05, |
|
"loss": 0.7163, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 42.37520570488206, |
|
"grad_norm": 5.1840128898620605, |
|
"learning_rate": 2.881239714755897e-05, |
|
"loss": 0.732, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 42.512342292923755, |
|
"grad_norm": 5.124771595001221, |
|
"learning_rate": 2.8743828853538124e-05, |
|
"loss": 0.7431, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 42.64947888096544, |
|
"grad_norm": 4.6923089027404785, |
|
"learning_rate": 2.867526055951728e-05, |
|
"loss": 0.7582, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 42.78661546900713, |
|
"grad_norm": 5.027599334716797, |
|
"learning_rate": 2.8606692265496436e-05, |
|
"loss": 0.7561, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 42.92375205704882, |
|
"grad_norm": 4.931192398071289, |
|
"learning_rate": 2.853812397147559e-05, |
|
"loss": 0.7642, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 43.06088864509051, |
|
"grad_norm": 4.427544593811035, |
|
"learning_rate": 2.8469555677454745e-05, |
|
"loss": 0.7286, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 43.1980252331322, |
|
"grad_norm": 5.119362831115723, |
|
"learning_rate": 2.84009873834339e-05, |
|
"loss": 0.6782, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 43.33516182117389, |
|
"grad_norm": 4.8863749504089355, |
|
"learning_rate": 2.833241908941306e-05, |
|
"loss": 0.6933, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 43.472298409215576, |
|
"grad_norm": 5.453842639923096, |
|
"learning_rate": 2.826385079539221e-05, |
|
"loss": 0.7039, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 43.60943499725727, |
|
"grad_norm": 4.8158721923828125, |
|
"learning_rate": 2.8195282501371366e-05, |
|
"loss": 0.711, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 43.74657158529896, |
|
"grad_norm": 5.3100905418396, |
|
"learning_rate": 2.8126714207350523e-05, |
|
"loss": 0.7238, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 43.88370817334065, |
|
"grad_norm": 4.8812031745910645, |
|
"learning_rate": 2.8058145913329675e-05, |
|
"loss": 0.7283, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 44.020844761382335, |
|
"grad_norm": 5.003659725189209, |
|
"learning_rate": 2.798957761930883e-05, |
|
"loss": 0.7262, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 44.15798134942403, |
|
"grad_norm": 5.185481548309326, |
|
"learning_rate": 2.7921009325287988e-05, |
|
"loss": 0.6417, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 44.29511793746572, |
|
"grad_norm": 4.771406173706055, |
|
"learning_rate": 2.7852441031267147e-05, |
|
"loss": 0.6564, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 44.4322545255074, |
|
"grad_norm": 5.313647270202637, |
|
"learning_rate": 2.7783872737246297e-05, |
|
"loss": 0.6727, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 44.569391113549095, |
|
"grad_norm": 5.134614944458008, |
|
"learning_rate": 2.7715304443225453e-05, |
|
"loss": 0.6784, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 44.70652770159079, |
|
"grad_norm": 4.888493537902832, |
|
"learning_rate": 2.764673614920461e-05, |
|
"loss": 0.6869, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 44.84366428963247, |
|
"grad_norm": 5.336511135101318, |
|
"learning_rate": 2.7578167855183762e-05, |
|
"loss": 0.692, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 44.98080087767416, |
|
"grad_norm": 5.053600311279297, |
|
"learning_rate": 2.7509599561162918e-05, |
|
"loss": 0.7041, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 45.117937465715855, |
|
"grad_norm": 4.778295993804932, |
|
"learning_rate": 2.7441031267142074e-05, |
|
"loss": 0.6192, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 45.25507405375754, |
|
"grad_norm": 5.197367191314697, |
|
"learning_rate": 2.7372462973121234e-05, |
|
"loss": 0.6225, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 45.39221064179923, |
|
"grad_norm": 5.395830154418945, |
|
"learning_rate": 2.7303894679100383e-05, |
|
"loss": 0.6342, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 45.52934722984092, |
|
"grad_norm": 5.031848430633545, |
|
"learning_rate": 2.723532638507954e-05, |
|
"loss": 0.6448, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 45.666483817882614, |
|
"grad_norm": 5.896296977996826, |
|
"learning_rate": 2.71667580910587e-05, |
|
"loss": 0.6532, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 45.8036204059243, |
|
"grad_norm": 5.266870021820068, |
|
"learning_rate": 2.7098189797037848e-05, |
|
"loss": 0.6583, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 45.94075699396599, |
|
"grad_norm": 5.225521087646484, |
|
"learning_rate": 2.7029621503017004e-05, |
|
"loss": 0.6648, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 46.07789358200768, |
|
"grad_norm": 5.04818058013916, |
|
"learning_rate": 2.696105320899616e-05, |
|
"loss": 0.6178, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 46.21503017004937, |
|
"grad_norm": 4.635532855987549, |
|
"learning_rate": 2.689248491497532e-05, |
|
"loss": 0.5913, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 46.35216675809106, |
|
"grad_norm": 5.3561906814575195, |
|
"learning_rate": 2.682391662095447e-05, |
|
"loss": 0.5949, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 46.48930334613275, |
|
"grad_norm": 5.117276191711426, |
|
"learning_rate": 2.6755348326933626e-05, |
|
"loss": 0.6108, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 46.626439934174435, |
|
"grad_norm": 5.213390350341797, |
|
"learning_rate": 2.6686780032912785e-05, |
|
"loss": 0.6224, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 46.76357652221613, |
|
"grad_norm": 5.088405609130859, |
|
"learning_rate": 2.6618211738891935e-05, |
|
"loss": 0.6281, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 46.90071311025782, |
|
"grad_norm": 5.051976680755615, |
|
"learning_rate": 2.654964344487109e-05, |
|
"loss": 0.6368, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 47.0378496982995, |
|
"grad_norm": 4.881986141204834, |
|
"learning_rate": 2.648107515085025e-05, |
|
"loss": 0.6152, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 47.174986286341195, |
|
"grad_norm": 5.066763401031494, |
|
"learning_rate": 2.6412506856829406e-05, |
|
"loss": 0.564, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 47.31212287438289, |
|
"grad_norm": 4.73757791519165, |
|
"learning_rate": 2.6343938562808556e-05, |
|
"loss": 0.5697, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 47.44925946242458, |
|
"grad_norm": 4.839804172515869, |
|
"learning_rate": 2.6275370268787712e-05, |
|
"loss": 0.5825, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 47.58639605046626, |
|
"grad_norm": 5.461195945739746, |
|
"learning_rate": 2.620680197476687e-05, |
|
"loss": 0.5853, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 47.723532638507955, |
|
"grad_norm": 4.896440029144287, |
|
"learning_rate": 2.613823368074602e-05, |
|
"loss": 0.593, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 47.860669226549646, |
|
"grad_norm": 4.847322463989258, |
|
"learning_rate": 2.6069665386725177e-05, |
|
"loss": 0.6, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 47.99780581459133, |
|
"grad_norm": 4.478647708892822, |
|
"learning_rate": 2.6001097092704337e-05, |
|
"loss": 0.6104, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 48.13494240263302, |
|
"grad_norm": 5.172453880310059, |
|
"learning_rate": 2.5932528798683493e-05, |
|
"loss": 0.5294, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 48.272078990674714, |
|
"grad_norm": 4.307365894317627, |
|
"learning_rate": 2.5863960504662642e-05, |
|
"loss": 0.5392, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 48.4092155787164, |
|
"grad_norm": 4.813899517059326, |
|
"learning_rate": 2.57953922106418e-05, |
|
"loss": 0.5521, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 48.54635216675809, |
|
"grad_norm": 5.233691215515137, |
|
"learning_rate": 2.5726823916620958e-05, |
|
"loss": 0.5565, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 48.68348875479978, |
|
"grad_norm": 5.3576979637146, |
|
"learning_rate": 2.5658255622600114e-05, |
|
"loss": 0.5663, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 48.82062534284147, |
|
"grad_norm": 5.31622314453125, |
|
"learning_rate": 2.5589687328579264e-05, |
|
"loss": 0.5754, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 48.95776193088316, |
|
"grad_norm": 5.2634148597717285, |
|
"learning_rate": 2.5521119034558423e-05, |
|
"loss": 0.58, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 49.09489851892485, |
|
"grad_norm": 4.982797622680664, |
|
"learning_rate": 2.545255074053758e-05, |
|
"loss": 0.5266, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 49.23203510696654, |
|
"grad_norm": 4.663660526275635, |
|
"learning_rate": 2.538398244651673e-05, |
|
"loss": 0.5101, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 49.36917169500823, |
|
"grad_norm": 4.653820991516113, |
|
"learning_rate": 2.5315414152495888e-05, |
|
"loss": 0.5206, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 49.50630828304992, |
|
"grad_norm": 4.846981048583984, |
|
"learning_rate": 2.5246845858475044e-05, |
|
"loss": 0.5349, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 49.64344487109161, |
|
"grad_norm": 4.962299346923828, |
|
"learning_rate": 2.51782775644542e-05, |
|
"loss": 0.5378, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 49.780581459133295, |
|
"grad_norm": 4.924633979797363, |
|
"learning_rate": 2.510970927043335e-05, |
|
"loss": 0.5446, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 49.917718047174986, |
|
"grad_norm": 5.435749053955078, |
|
"learning_rate": 2.504114097641251e-05, |
|
"loss": 0.5535, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 50.05485463521668, |
|
"grad_norm": 4.581083297729492, |
|
"learning_rate": 2.4972572682391662e-05, |
|
"loss": 0.5224, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 50.19199122325836, |
|
"grad_norm": 4.435048580169678, |
|
"learning_rate": 2.490400438837082e-05, |
|
"loss": 0.4925, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 50.329127811300054, |
|
"grad_norm": 4.9870710372924805, |
|
"learning_rate": 2.4835436094349975e-05, |
|
"loss": 0.4966, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 50.466264399341746, |
|
"grad_norm": 4.312280178070068, |
|
"learning_rate": 2.476686780032913e-05, |
|
"loss": 0.505, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 50.60340098738343, |
|
"grad_norm": 4.78123664855957, |
|
"learning_rate": 2.4698299506308284e-05, |
|
"loss": 0.508, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 50.74053757542512, |
|
"grad_norm": 5.319374084472656, |
|
"learning_rate": 2.462973121228744e-05, |
|
"loss": 0.5149, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 50.877674163466814, |
|
"grad_norm": 4.26421594619751, |
|
"learning_rate": 2.4561162918266596e-05, |
|
"loss": 0.5278, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 51.014810751508506, |
|
"grad_norm": 4.891973495483398, |
|
"learning_rate": 2.449259462424575e-05, |
|
"loss": 0.522, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 51.15194733955019, |
|
"grad_norm": 5.03622579574585, |
|
"learning_rate": 2.4424026330224905e-05, |
|
"loss": 0.4602, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 51.28908392759188, |
|
"grad_norm": 4.524442195892334, |
|
"learning_rate": 2.435545803620406e-05, |
|
"loss": 0.4689, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 51.426220515633574, |
|
"grad_norm": 4.18233060836792, |
|
"learning_rate": 2.4286889742183217e-05, |
|
"loss": 0.4786, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 51.56335710367526, |
|
"grad_norm": 4.806675434112549, |
|
"learning_rate": 2.421832144816237e-05, |
|
"loss": 0.4886, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 51.70049369171695, |
|
"grad_norm": 4.611050128936768, |
|
"learning_rate": 2.4149753154141526e-05, |
|
"loss": 0.4901, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 51.83763027975864, |
|
"grad_norm": 5.323733806610107, |
|
"learning_rate": 2.4081184860120682e-05, |
|
"loss": 0.5021, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 51.97476686780033, |
|
"grad_norm": 4.821100234985352, |
|
"learning_rate": 2.4012616566099835e-05, |
|
"loss": 0.5053, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 52.11190345584202, |
|
"grad_norm": 4.823397159576416, |
|
"learning_rate": 2.394404827207899e-05, |
|
"loss": 0.4498, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 52.24904004388371, |
|
"grad_norm": 4.650783061981201, |
|
"learning_rate": 2.3875479978058147e-05, |
|
"loss": 0.4506, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 52.386176631925395, |
|
"grad_norm": 5.3509697914123535, |
|
"learning_rate": 2.3806911684037304e-05, |
|
"loss": 0.4492, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 52.523313219967086, |
|
"grad_norm": 5.251642227172852, |
|
"learning_rate": 2.3738343390016456e-05, |
|
"loss": 0.4665, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 52.66044980800878, |
|
"grad_norm": 4.471257209777832, |
|
"learning_rate": 2.3669775095995613e-05, |
|
"loss": 0.4686, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 52.79758639605047, |
|
"grad_norm": 4.814416885375977, |
|
"learning_rate": 2.360120680197477e-05, |
|
"loss": 0.4761, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 52.934722984092154, |
|
"grad_norm": 5.369185924530029, |
|
"learning_rate": 2.353263850795392e-05, |
|
"loss": 0.4826, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 53.071859572133846, |
|
"grad_norm": 4.826727867126465, |
|
"learning_rate": 2.3464070213933078e-05, |
|
"loss": 0.4517, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 53.20899616017554, |
|
"grad_norm": 4.9067583084106445, |
|
"learning_rate": 2.3395501919912234e-05, |
|
"loss": 0.4241, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 53.34613274821722, |
|
"grad_norm": 5.361186981201172, |
|
"learning_rate": 2.332693362589139e-05, |
|
"loss": 0.4334, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 53.483269336258914, |
|
"grad_norm": 4.9540300369262695, |
|
"learning_rate": 2.3258365331870543e-05, |
|
"loss": 0.4379, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 53.620405924300606, |
|
"grad_norm": 5.23082971572876, |
|
"learning_rate": 2.31897970378497e-05, |
|
"loss": 0.4495, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 53.75754251234229, |
|
"grad_norm": 4.608271598815918, |
|
"learning_rate": 2.3121228743828855e-05, |
|
"loss": 0.4521, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 53.89467910038398, |
|
"grad_norm": 4.835067272186279, |
|
"learning_rate": 2.305266044980801e-05, |
|
"loss": 0.4616, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 54.031815688425674, |
|
"grad_norm": 4.397408485412598, |
|
"learning_rate": 2.2984092155787164e-05, |
|
"loss": 0.4483, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 54.16895227646736, |
|
"grad_norm": 4.769198894500732, |
|
"learning_rate": 2.291552386176632e-05, |
|
"loss": 0.4038, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 54.30608886450905, |
|
"grad_norm": 4.403786659240723, |
|
"learning_rate": 2.2846955567745476e-05, |
|
"loss": 0.4146, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 54.44322545255074, |
|
"grad_norm": 4.6071696281433105, |
|
"learning_rate": 2.277838727372463e-05, |
|
"loss": 0.4199, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 54.58036204059243, |
|
"grad_norm": 4.638876438140869, |
|
"learning_rate": 2.2709818979703785e-05, |
|
"loss": 0.4268, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 54.71749862863412, |
|
"grad_norm": 4.671108722686768, |
|
"learning_rate": 2.264125068568294e-05, |
|
"loss": 0.4302, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 54.85463521667581, |
|
"grad_norm": 5.265748977661133, |
|
"learning_rate": 2.2572682391662098e-05, |
|
"loss": 0.44, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 54.9917718047175, |
|
"grad_norm": 5.179275989532471, |
|
"learning_rate": 2.250411409764125e-05, |
|
"loss": 0.4381, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 55.128908392759186, |
|
"grad_norm": 4.084758758544922, |
|
"learning_rate": 2.2435545803620407e-05, |
|
"loss": 0.3888, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 55.26604498080088, |
|
"grad_norm": 4.465928554534912, |
|
"learning_rate": 2.2366977509599563e-05, |
|
"loss": 0.3935, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 55.40318156884257, |
|
"grad_norm": 4.657350540161133, |
|
"learning_rate": 2.2298409215578716e-05, |
|
"loss": 0.3963, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 55.540318156884254, |
|
"grad_norm": 4.591371059417725, |
|
"learning_rate": 2.2229840921557872e-05, |
|
"loss": 0.4052, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 55.677454744925946, |
|
"grad_norm": 4.821173191070557, |
|
"learning_rate": 2.2161272627537028e-05, |
|
"loss": 0.4135, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 55.81459133296764, |
|
"grad_norm": 4.650514125823975, |
|
"learning_rate": 2.2092704333516184e-05, |
|
"loss": 0.4171, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 55.95172792100932, |
|
"grad_norm": 4.952467441558838, |
|
"learning_rate": 2.2024136039495337e-05, |
|
"loss": 0.4238, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 56.088864509051014, |
|
"grad_norm": 4.717243671417236, |
|
"learning_rate": 2.1955567745474493e-05, |
|
"loss": 0.3891, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 56.226001097092706, |
|
"grad_norm": 4.069623947143555, |
|
"learning_rate": 2.188699945145365e-05, |
|
"loss": 0.373, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 56.3631376851344, |
|
"grad_norm": 4.447889804840088, |
|
"learning_rate": 2.1818431157432802e-05, |
|
"loss": 0.3799, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 56.50027427317608, |
|
"grad_norm": 4.514695644378662, |
|
"learning_rate": 2.174986286341196e-05, |
|
"loss": 0.39, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 56.637410861217774, |
|
"grad_norm": 5.111133098602295, |
|
"learning_rate": 2.1681294569391114e-05, |
|
"loss": 0.3919, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 56.774547449259465, |
|
"grad_norm": 4.48080587387085, |
|
"learning_rate": 2.161272627537027e-05, |
|
"loss": 0.3965, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 56.91168403730115, |
|
"grad_norm": 4.876768589019775, |
|
"learning_rate": 2.1544157981349423e-05, |
|
"loss": 0.4046, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 57.04882062534284, |
|
"grad_norm": 4.336927890777588, |
|
"learning_rate": 2.1475589687328583e-05, |
|
"loss": 0.3817, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 57.18595721338453, |
|
"grad_norm": 4.570804595947266, |
|
"learning_rate": 2.1407021393307736e-05, |
|
"loss": 0.357, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 57.32309380142622, |
|
"grad_norm": 4.742151737213135, |
|
"learning_rate": 2.133845309928689e-05, |
|
"loss": 0.3648, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 57.46023038946791, |
|
"grad_norm": 4.641295433044434, |
|
"learning_rate": 2.1269884805266048e-05, |
|
"loss": 0.3716, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 57.5973669775096, |
|
"grad_norm": 5.475332736968994, |
|
"learning_rate": 2.12013165112452e-05, |
|
"loss": 0.3731, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 57.734503565551286, |
|
"grad_norm": 4.712151050567627, |
|
"learning_rate": 2.1132748217224357e-05, |
|
"loss": 0.3809, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 57.87164015359298, |
|
"grad_norm": 4.409310817718506, |
|
"learning_rate": 2.106417992320351e-05, |
|
"loss": 0.3812, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 58.00877674163467, |
|
"grad_norm": 4.162150859832764, |
|
"learning_rate": 2.099561162918267e-05, |
|
"loss": 0.383, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 58.14591332967636, |
|
"grad_norm": 4.273313999176025, |
|
"learning_rate": 2.0927043335161822e-05, |
|
"loss": 0.3407, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 58.283049917718046, |
|
"grad_norm": 4.508772850036621, |
|
"learning_rate": 2.0858475041140975e-05, |
|
"loss": 0.3489, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 58.42018650575974, |
|
"grad_norm": 5.550928592681885, |
|
"learning_rate": 2.0789906747120134e-05, |
|
"loss": 0.3526, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 58.55732309380143, |
|
"grad_norm": 4.722227096557617, |
|
"learning_rate": 2.0721338453099287e-05, |
|
"loss": 0.3576, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 58.694459681843114, |
|
"grad_norm": 4.649284839630127, |
|
"learning_rate": 2.0652770159078443e-05, |
|
"loss": 0.3605, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 58.831596269884805, |
|
"grad_norm": 4.80319881439209, |
|
"learning_rate": 2.05842018650576e-05, |
|
"loss": 0.3655, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 58.9687328579265, |
|
"grad_norm": 5.22609806060791, |
|
"learning_rate": 2.0515633571036756e-05, |
|
"loss": 0.3714, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 59.10586944596818, |
|
"grad_norm": 5.241272926330566, |
|
"learning_rate": 2.044706527701591e-05, |
|
"loss": 0.3345, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 59.24300603400987, |
|
"grad_norm": 4.466114044189453, |
|
"learning_rate": 2.037849698299506e-05, |
|
"loss": 0.3314, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 59.380142622051565, |
|
"grad_norm": 4.289991855621338, |
|
"learning_rate": 2.030992868897422e-05, |
|
"loss": 0.3353, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 59.51727921009325, |
|
"grad_norm": 4.458993911743164, |
|
"learning_rate": 2.0241360394953374e-05, |
|
"loss": 0.3385, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 59.65441579813494, |
|
"grad_norm": 4.6871724128723145, |
|
"learning_rate": 2.017279210093253e-05, |
|
"loss": 0.3452, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 59.79155238617663, |
|
"grad_norm": 4.503798484802246, |
|
"learning_rate": 2.0104223806911686e-05, |
|
"loss": 0.3476, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 59.928688974218325, |
|
"grad_norm": 5.342411041259766, |
|
"learning_rate": 2.0035655512890842e-05, |
|
"loss": 0.3553, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 60.06582556226001, |
|
"grad_norm": 5.1502180099487305, |
|
"learning_rate": 1.9967087218869995e-05, |
|
"loss": 0.3331, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 60.2029621503017, |
|
"grad_norm": 4.446504592895508, |
|
"learning_rate": 1.9898518924849148e-05, |
|
"loss": 0.3166, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 60.34009873834339, |
|
"grad_norm": 4.185482025146484, |
|
"learning_rate": 1.9829950630828307e-05, |
|
"loss": 0.3204, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 60.47723532638508, |
|
"grad_norm": 4.356864929199219, |
|
"learning_rate": 1.976138233680746e-05, |
|
"loss": 0.3262, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 60.61437191442677, |
|
"grad_norm": 4.678393840789795, |
|
"learning_rate": 1.9692814042786616e-05, |
|
"loss": 0.3311, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 60.75150850246846, |
|
"grad_norm": 5.001060962677002, |
|
"learning_rate": 1.9624245748765772e-05, |
|
"loss": 0.3354, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 60.888645090510146, |
|
"grad_norm": 5.079350471496582, |
|
"learning_rate": 1.955567745474493e-05, |
|
"loss": 0.3404, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 61.02578167855184, |
|
"grad_norm": 4.419836044311523, |
|
"learning_rate": 1.948710916072408e-05, |
|
"loss": 0.331, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 61.16291826659353, |
|
"grad_norm": 4.383386611938477, |
|
"learning_rate": 1.9418540866703238e-05, |
|
"loss": 0.3028, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 61.30005485463521, |
|
"grad_norm": 4.333778381347656, |
|
"learning_rate": 1.9349972572682394e-05, |
|
"loss": 0.3055, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 61.437191442676905, |
|
"grad_norm": 4.988595008850098, |
|
"learning_rate": 1.9281404278661547e-05, |
|
"loss": 0.3112, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 61.5743280307186, |
|
"grad_norm": 5.163971900939941, |
|
"learning_rate": 1.9212835984640703e-05, |
|
"loss": 0.3153, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 61.71146461876029, |
|
"grad_norm": 3.907899856567383, |
|
"learning_rate": 1.914426769061986e-05, |
|
"loss": 0.3228, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 61.84860120680197, |
|
"grad_norm": 4.212146282196045, |
|
"learning_rate": 1.9075699396599015e-05, |
|
"loss": 0.325, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 61.985737794843665, |
|
"grad_norm": 4.616479873657227, |
|
"learning_rate": 1.9007131102578168e-05, |
|
"loss": 0.3263, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 62.12287438288536, |
|
"grad_norm": 4.422669887542725, |
|
"learning_rate": 1.8938562808557324e-05, |
|
"loss": 0.2962, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 62.26001097092704, |
|
"grad_norm": 4.242331027984619, |
|
"learning_rate": 1.886999451453648e-05, |
|
"loss": 0.2926, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 62.39714755896873, |
|
"grad_norm": 4.647274494171143, |
|
"learning_rate": 1.8801426220515633e-05, |
|
"loss": 0.2985, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 62.534284147010425, |
|
"grad_norm": 4.557641983032227, |
|
"learning_rate": 1.873285792649479e-05, |
|
"loss": 0.3027, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 62.67142073505211, |
|
"grad_norm": 4.458461284637451, |
|
"learning_rate": 1.8664289632473945e-05, |
|
"loss": 0.3088, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 62.8085573230938, |
|
"grad_norm": 4.6789727210998535, |
|
"learning_rate": 1.85957213384531e-05, |
|
"loss": 0.3105, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 62.94569391113549, |
|
"grad_norm": 4.642698287963867, |
|
"learning_rate": 1.8527153044432254e-05, |
|
"loss": 0.3154, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 63.08283049917718, |
|
"grad_norm": 4.549673557281494, |
|
"learning_rate": 1.845858475041141e-05, |
|
"loss": 0.2929, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 63.21996708721887, |
|
"grad_norm": 4.2093119621276855, |
|
"learning_rate": 1.8390016456390567e-05, |
|
"loss": 0.2838, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 63.35710367526056, |
|
"grad_norm": 4.682537078857422, |
|
"learning_rate": 1.832144816236972e-05, |
|
"loss": 0.2853, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 63.49424026330225, |
|
"grad_norm": 4.815731048583984, |
|
"learning_rate": 1.8252879868348876e-05, |
|
"loss": 0.2885, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 63.63137685134394, |
|
"grad_norm": 5.170729160308838, |
|
"learning_rate": 1.818431157432803e-05, |
|
"loss": 0.2957, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 63.76851343938563, |
|
"grad_norm": 4.020371913909912, |
|
"learning_rate": 1.8115743280307188e-05, |
|
"loss": 0.2985, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 63.90565002742732, |
|
"grad_norm": 4.983353137969971, |
|
"learning_rate": 1.804717498628634e-05, |
|
"loss": 0.3007, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 64.04278661546901, |
|
"grad_norm": 4.521115303039551, |
|
"learning_rate": 1.7978606692265497e-05, |
|
"loss": 0.2887, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 64.1799232035107, |
|
"grad_norm": 4.261961460113525, |
|
"learning_rate": 1.7910038398244653e-05, |
|
"loss": 0.2695, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 64.31705979155238, |
|
"grad_norm": 4.490432262420654, |
|
"learning_rate": 1.7841470104223806e-05, |
|
"loss": 0.2726, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 64.45419637959408, |
|
"grad_norm": 4.353551864624023, |
|
"learning_rate": 1.7772901810202962e-05, |
|
"loss": 0.2774, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 64.59133296763576, |
|
"grad_norm": 4.908097743988037, |
|
"learning_rate": 1.7704333516182118e-05, |
|
"loss": 0.2837, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 64.72846955567745, |
|
"grad_norm": 4.305734157562256, |
|
"learning_rate": 1.7635765222161274e-05, |
|
"loss": 0.2836, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 64.86560614371915, |
|
"grad_norm": 5.043435096740723, |
|
"learning_rate": 1.7567196928140427e-05, |
|
"loss": 0.2897, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 65.00274273176083, |
|
"grad_norm": 4.230961322784424, |
|
"learning_rate": 1.7498628634119583e-05, |
|
"loss": 0.2911, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 65.13987931980252, |
|
"grad_norm": 4.4803056716918945, |
|
"learning_rate": 1.743006034009874e-05, |
|
"loss": 0.259, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 65.27701590784422, |
|
"grad_norm": 3.8626787662506104, |
|
"learning_rate": 1.7361492046077896e-05, |
|
"loss": 0.2618, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 65.4141524958859, |
|
"grad_norm": 4.65452766418457, |
|
"learning_rate": 1.729292375205705e-05, |
|
"loss": 0.265, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 65.55128908392759, |
|
"grad_norm": 4.291559219360352, |
|
"learning_rate": 1.7224355458036205e-05, |
|
"loss": 0.2709, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 65.68842567196928, |
|
"grad_norm": 4.508846282958984, |
|
"learning_rate": 1.715578716401536e-05, |
|
"loss": 0.2721, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 65.82556226001097, |
|
"grad_norm": 5.093057632446289, |
|
"learning_rate": 1.7087218869994513e-05, |
|
"loss": 0.2769, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 65.96269884805265, |
|
"grad_norm": 4.549623012542725, |
|
"learning_rate": 1.7018650575973673e-05, |
|
"loss": 0.2773, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 66.09983543609435, |
|
"grad_norm": 4.05508279800415, |
|
"learning_rate": 1.6950082281952826e-05, |
|
"loss": 0.2553, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 66.23697202413604, |
|
"grad_norm": 3.7369630336761475, |
|
"learning_rate": 1.6881513987931982e-05, |
|
"loss": 0.2528, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 66.37410861217774, |
|
"grad_norm": 3.813990831375122, |
|
"learning_rate": 1.6812945693911135e-05, |
|
"loss": 0.2526, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 66.51124520021942, |
|
"grad_norm": 3.993372917175293, |
|
"learning_rate": 1.674437739989029e-05, |
|
"loss": 0.2592, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 66.6483817882611, |
|
"grad_norm": 4.96673059463501, |
|
"learning_rate": 1.6675809105869447e-05, |
|
"loss": 0.2611, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 66.7855183763028, |
|
"grad_norm": 4.10557746887207, |
|
"learning_rate": 1.66072408118486e-05, |
|
"loss": 0.2648, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 66.92265496434449, |
|
"grad_norm": 4.813425064086914, |
|
"learning_rate": 1.653867251782776e-05, |
|
"loss": 0.2656, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 67.05979155238617, |
|
"grad_norm": 4.064112186431885, |
|
"learning_rate": 1.6470104223806912e-05, |
|
"loss": 0.2536, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 67.19692814042787, |
|
"grad_norm": 4.719504356384277, |
|
"learning_rate": 1.640153592978607e-05, |
|
"loss": 0.2411, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 67.33406472846956, |
|
"grad_norm": 4.4745588302612305, |
|
"learning_rate": 1.633296763576522e-05, |
|
"loss": 0.2505, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 67.47120131651124, |
|
"grad_norm": 4.499454021453857, |
|
"learning_rate": 1.6264399341744377e-05, |
|
"loss": 0.2493, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 67.60833790455294, |
|
"grad_norm": 3.987778663635254, |
|
"learning_rate": 1.6195831047723534e-05, |
|
"loss": 0.2503, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 67.74547449259462, |
|
"grad_norm": 4.4290618896484375, |
|
"learning_rate": 1.6127262753702686e-05, |
|
"loss": 0.2552, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 67.88261108063631, |
|
"grad_norm": 4.531731605529785, |
|
"learning_rate": 1.6058694459681846e-05, |
|
"loss": 0.2579, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 68.01974766867801, |
|
"grad_norm": 3.8032639026641846, |
|
"learning_rate": 1.5990126165661e-05, |
|
"loss": 0.2562, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 68.15688425671969, |
|
"grad_norm": 3.864058017730713, |
|
"learning_rate": 1.5921557871640155e-05, |
|
"loss": 0.234, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 68.29402084476138, |
|
"grad_norm": 3.7496285438537598, |
|
"learning_rate": 1.585298957761931e-05, |
|
"loss": 0.2362, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 68.43115743280308, |
|
"grad_norm": 3.9640090465545654, |
|
"learning_rate": 1.5784421283598467e-05, |
|
"loss": 0.2406, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 68.56829402084476, |
|
"grad_norm": 4.273751258850098, |
|
"learning_rate": 1.571585298957762e-05, |
|
"loss": 0.2402, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 68.70543060888645, |
|
"grad_norm": 3.934805393218994, |
|
"learning_rate": 1.5647284695556773e-05, |
|
"loss": 0.2437, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 68.84256719692814, |
|
"grad_norm": 3.652498245239258, |
|
"learning_rate": 1.5578716401535932e-05, |
|
"loss": 0.2467, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 68.97970378496983, |
|
"grad_norm": 3.7606563568115234, |
|
"learning_rate": 1.5510148107515085e-05, |
|
"loss": 0.2489, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 69.11684037301151, |
|
"grad_norm": 4.354647636413574, |
|
"learning_rate": 1.544157981349424e-05, |
|
"loss": 0.2272, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 69.25397696105321, |
|
"grad_norm": 3.411524772644043, |
|
"learning_rate": 1.5373011519473397e-05, |
|
"loss": 0.2274, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 69.3911135490949, |
|
"grad_norm": 4.171504020690918, |
|
"learning_rate": 1.5304443225452554e-05, |
|
"loss": 0.2305, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 69.52825013713658, |
|
"grad_norm": 4.308210372924805, |
|
"learning_rate": 1.5235874931431706e-05, |
|
"loss": 0.2319, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 69.66538672517828, |
|
"grad_norm": 4.150519847869873, |
|
"learning_rate": 1.516730663741086e-05, |
|
"loss": 0.2344, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 69.80252331321996, |
|
"grad_norm": 4.316656112670898, |
|
"learning_rate": 1.5098738343390017e-05, |
|
"loss": 0.2391, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 69.93965990126166, |
|
"grad_norm": 4.44851541519165, |
|
"learning_rate": 1.5030170049369171e-05, |
|
"loss": 0.2385, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 70.07679648930335, |
|
"grad_norm": 4.209973335266113, |
|
"learning_rate": 1.4961601755348328e-05, |
|
"loss": 0.225, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 70.21393307734503, |
|
"grad_norm": 4.037484169006348, |
|
"learning_rate": 1.4893033461327482e-05, |
|
"loss": 0.2179, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 70.35106966538673, |
|
"grad_norm": 3.6946587562561035, |
|
"learning_rate": 1.482446516730664e-05, |
|
"loss": 0.2222, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 70.48820625342842, |
|
"grad_norm": 4.2428717613220215, |
|
"learning_rate": 1.4755896873285793e-05, |
|
"loss": 0.2211, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 70.6253428414701, |
|
"grad_norm": 3.7683310508728027, |
|
"learning_rate": 1.4687328579264947e-05, |
|
"loss": 0.2259, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 70.7624794295118, |
|
"grad_norm": 4.147058486938477, |
|
"learning_rate": 1.4618760285244103e-05, |
|
"loss": 0.2283, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 70.89961601755348, |
|
"grad_norm": 4.305523872375488, |
|
"learning_rate": 1.4550191991223258e-05, |
|
"loss": 0.2316, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 71.03675260559517, |
|
"grad_norm": 4.284609317779541, |
|
"learning_rate": 1.4481623697202416e-05, |
|
"loss": 0.2254, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 71.17388919363687, |
|
"grad_norm": 3.876636028289795, |
|
"learning_rate": 1.4413055403181569e-05, |
|
"loss": 0.2084, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 71.31102578167855, |
|
"grad_norm": 4.208460330963135, |
|
"learning_rate": 1.4344487109160726e-05, |
|
"loss": 0.2141, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 71.44816236972024, |
|
"grad_norm": 3.976590156555176, |
|
"learning_rate": 1.427591881513988e-05, |
|
"loss": 0.2146, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 71.58529895776194, |
|
"grad_norm": 3.778451442718506, |
|
"learning_rate": 1.4207350521119034e-05, |
|
"loss": 0.2163, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 71.72243554580362, |
|
"grad_norm": 4.75286340713501, |
|
"learning_rate": 1.4138782227098192e-05, |
|
"loss": 0.2194, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 71.8595721338453, |
|
"grad_norm": 3.755993366241455, |
|
"learning_rate": 1.4070213933077344e-05, |
|
"loss": 0.2236, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 71.996708721887, |
|
"grad_norm": 4.23431396484375, |
|
"learning_rate": 1.4001645639056502e-05, |
|
"loss": 0.224, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 72.13384530992869, |
|
"grad_norm": 4.001950263977051, |
|
"learning_rate": 1.3933077345035655e-05, |
|
"loss": 0.2022, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 72.27098189797037, |
|
"grad_norm": 3.7588768005371094, |
|
"learning_rate": 1.3864509051014813e-05, |
|
"loss": 0.2043, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 72.40811848601207, |
|
"grad_norm": 4.171288013458252, |
|
"learning_rate": 1.3795940756993966e-05, |
|
"loss": 0.2065, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 72.54525507405376, |
|
"grad_norm": 4.1884636878967285, |
|
"learning_rate": 1.3727372462973123e-05, |
|
"loss": 0.2081, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 72.68239166209544, |
|
"grad_norm": 4.019055366516113, |
|
"learning_rate": 1.3658804168952278e-05, |
|
"loss": 0.212, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 72.81952825013714, |
|
"grad_norm": 3.9061167240142822, |
|
"learning_rate": 1.359023587493143e-05, |
|
"loss": 0.2128, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 72.95666483817882, |
|
"grad_norm": 4.590092182159424, |
|
"learning_rate": 1.3521667580910589e-05, |
|
"loss": 0.2149, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 73.09380142622051, |
|
"grad_norm": 4.069841384887695, |
|
"learning_rate": 1.3453099286889741e-05, |
|
"loss": 0.2016, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 73.23093801426221, |
|
"grad_norm": 3.7650821208953857, |
|
"learning_rate": 1.33845309928689e-05, |
|
"loss": 0.1969, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 73.36807460230389, |
|
"grad_norm": 3.8244950771331787, |
|
"learning_rate": 1.3315962698848054e-05, |
|
"loss": 0.1984, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 73.50521119034559, |
|
"grad_norm": 3.6921212673187256, |
|
"learning_rate": 1.324739440482721e-05, |
|
"loss": 0.2026, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 73.64234777838728, |
|
"grad_norm": 4.225021839141846, |
|
"learning_rate": 1.3178826110806364e-05, |
|
"loss": 0.2036, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 73.77948436642896, |
|
"grad_norm": 4.311788082122803, |
|
"learning_rate": 1.3110257816785517e-05, |
|
"loss": 0.2052, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 73.91662095447066, |
|
"grad_norm": 4.360690116882324, |
|
"learning_rate": 1.3041689522764675e-05, |
|
"loss": 0.2081, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 74.05375754251234, |
|
"grad_norm": 3.889430522918701, |
|
"learning_rate": 1.297312122874383e-05, |
|
"loss": 0.2006, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 74.19089413055403, |
|
"grad_norm": 4.069758892059326, |
|
"learning_rate": 1.2904552934722986e-05, |
|
"loss": 0.1903, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 74.32803071859573, |
|
"grad_norm": 3.5697872638702393, |
|
"learning_rate": 1.283598464070214e-05, |
|
"loss": 0.1921, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 74.46516730663741, |
|
"grad_norm": 4.888301849365234, |
|
"learning_rate": 1.2767416346681296e-05, |
|
"loss": 0.1976, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 74.6023038946791, |
|
"grad_norm": 4.195688247680664, |
|
"learning_rate": 1.269884805266045e-05, |
|
"loss": 0.1956, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 74.7394404827208, |
|
"grad_norm": 3.5373120307922363, |
|
"learning_rate": 1.2630279758639604e-05, |
|
"loss": 0.1976, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 74.87657707076248, |
|
"grad_norm": 4.081260681152344, |
|
"learning_rate": 1.2561711464618761e-05, |
|
"loss": 0.1993, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 75.01371365880416, |
|
"grad_norm": 3.637251615524292, |
|
"learning_rate": 1.2493143170597916e-05, |
|
"loss": 0.1993, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 75.15085024684586, |
|
"grad_norm": 4.6371355056762695, |
|
"learning_rate": 1.2424574876577072e-05, |
|
"loss": 0.1847, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 75.28798683488755, |
|
"grad_norm": 3.781407594680786, |
|
"learning_rate": 1.2356006582556227e-05, |
|
"loss": 0.1862, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 75.42512342292923, |
|
"grad_norm": 3.249769926071167, |
|
"learning_rate": 1.2287438288535381e-05, |
|
"loss": 0.189, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 75.56226001097093, |
|
"grad_norm": 3.62080717086792, |
|
"learning_rate": 1.2218869994514537e-05, |
|
"loss": 0.1904, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 75.69939659901262, |
|
"grad_norm": 3.6299779415130615, |
|
"learning_rate": 1.2150301700493692e-05, |
|
"loss": 0.1913, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 75.8365331870543, |
|
"grad_norm": 4.178566932678223, |
|
"learning_rate": 1.2081733406472848e-05, |
|
"loss": 0.1916, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 75.973669775096, |
|
"grad_norm": 3.7569074630737305, |
|
"learning_rate": 1.2013165112452002e-05, |
|
"loss": 0.1932, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 76.11080636313768, |
|
"grad_norm": 3.6671714782714844, |
|
"learning_rate": 1.1944596818431158e-05, |
|
"loss": 0.1789, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 76.24794295117937, |
|
"grad_norm": 4.360944747924805, |
|
"learning_rate": 1.1876028524410313e-05, |
|
"loss": 0.1799, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 76.38507953922107, |
|
"grad_norm": 4.378243446350098, |
|
"learning_rate": 1.1807460230389467e-05, |
|
"loss": 0.1815, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 76.52221612726275, |
|
"grad_norm": 3.7712574005126953, |
|
"learning_rate": 1.1738891936368624e-05, |
|
"loss": 0.1849, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 76.65935271530444, |
|
"grad_norm": 3.6135239601135254, |
|
"learning_rate": 1.1670323642347778e-05, |
|
"loss": 0.1851, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 76.79648930334614, |
|
"grad_norm": 4.262831687927246, |
|
"learning_rate": 1.1601755348326934e-05, |
|
"loss": 0.187, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 76.93362589138782, |
|
"grad_norm": 3.7981927394866943, |
|
"learning_rate": 1.153318705430609e-05, |
|
"loss": 0.1867, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 77.07076247942952, |
|
"grad_norm": 3.799161434173584, |
|
"learning_rate": 1.1464618760285245e-05, |
|
"loss": 0.1781, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 77.2078990674712, |
|
"grad_norm": 3.511946201324463, |
|
"learning_rate": 1.1396050466264401e-05, |
|
"loss": 0.1721, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 77.34503565551289, |
|
"grad_norm": 3.6062841415405273, |
|
"learning_rate": 1.1327482172243554e-05, |
|
"loss": 0.1768, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 77.48217224355459, |
|
"grad_norm": 3.6229002475738525, |
|
"learning_rate": 1.125891387822271e-05, |
|
"loss": 0.1761, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 77.61930883159627, |
|
"grad_norm": 4.036831378936768, |
|
"learning_rate": 1.1190345584201866e-05, |
|
"loss": 0.1775, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 77.75644541963796, |
|
"grad_norm": 3.842072010040283, |
|
"learning_rate": 1.112177729018102e-05, |
|
"loss": 0.1794, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 77.89358200767965, |
|
"grad_norm": 4.432040691375732, |
|
"learning_rate": 1.1053208996160177e-05, |
|
"loss": 0.1809, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 78.03071859572134, |
|
"grad_norm": 3.7242350578308105, |
|
"learning_rate": 1.0984640702139331e-05, |
|
"loss": 0.1777, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 78.16785518376302, |
|
"grad_norm": 3.5870072841644287, |
|
"learning_rate": 1.0916072408118487e-05, |
|
"loss": 0.1685, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 78.30499177180472, |
|
"grad_norm": 4.315713405609131, |
|
"learning_rate": 1.0847504114097642e-05, |
|
"loss": 0.1691, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 78.44212835984641, |
|
"grad_norm": 4.229913234710693, |
|
"learning_rate": 1.0778935820076796e-05, |
|
"loss": 0.1694, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 78.57926494788809, |
|
"grad_norm": 4.238448143005371, |
|
"learning_rate": 1.0710367526055953e-05, |
|
"loss": 0.1722, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 78.71640153592979, |
|
"grad_norm": 3.810060739517212, |
|
"learning_rate": 1.0641799232035107e-05, |
|
"loss": 0.1739, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 78.85353812397148, |
|
"grad_norm": 3.8846802711486816, |
|
"learning_rate": 1.0573230938014263e-05, |
|
"loss": 0.1743, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 78.99067471201316, |
|
"grad_norm": 3.194765567779541, |
|
"learning_rate": 1.0504662643993418e-05, |
|
"loss": 0.1771, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 79.12781130005486, |
|
"grad_norm": 3.9391047954559326, |
|
"learning_rate": 1.0436094349972574e-05, |
|
"loss": 0.1636, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 79.26494788809654, |
|
"grad_norm": 4.282817840576172, |
|
"learning_rate": 1.0367526055951728e-05, |
|
"loss": 0.1637, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 79.40208447613823, |
|
"grad_norm": 3.725553512573242, |
|
"learning_rate": 1.0298957761930883e-05, |
|
"loss": 0.167, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 79.53922106417993, |
|
"grad_norm": 3.7785303592681885, |
|
"learning_rate": 1.0230389467910039e-05, |
|
"loss": 0.1674, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 79.67635765222161, |
|
"grad_norm": 3.667619228363037, |
|
"learning_rate": 1.0161821173889193e-05, |
|
"loss": 0.1661, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 79.8134942402633, |
|
"grad_norm": 3.732048273086548, |
|
"learning_rate": 1.009325287986835e-05, |
|
"loss": 0.1696, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 79.950630828305, |
|
"grad_norm": 4.32537841796875, |
|
"learning_rate": 1.0024684585847504e-05, |
|
"loss": 0.1697, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 80.08776741634668, |
|
"grad_norm": 3.7802329063415527, |
|
"learning_rate": 9.95611629182666e-06, |
|
"loss": 0.1632, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 80.22490400438836, |
|
"grad_norm": 4.236711025238037, |
|
"learning_rate": 9.887547997805815e-06, |
|
"loss": 0.1587, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 80.36204059243006, |
|
"grad_norm": 3.8807108402252197, |
|
"learning_rate": 9.818979703784971e-06, |
|
"loss": 0.1588, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 80.49917718047175, |
|
"grad_norm": 3.935448408126831, |
|
"learning_rate": 9.750411409764125e-06, |
|
"loss": 0.1598, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 80.63631376851345, |
|
"grad_norm": 3.9982056617736816, |
|
"learning_rate": 9.68184311574328e-06, |
|
"loss": 0.1623, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 80.77345035655513, |
|
"grad_norm": 4.14504337310791, |
|
"learning_rate": 9.613274821722436e-06, |
|
"loss": 0.1641, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 80.91058694459682, |
|
"grad_norm": 3.4991772174835205, |
|
"learning_rate": 9.54470652770159e-06, |
|
"loss": 0.1647, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 81.04772353263851, |
|
"grad_norm": 3.483520030975342, |
|
"learning_rate": 9.476138233680747e-06, |
|
"loss": 0.1608, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 81.1848601206802, |
|
"grad_norm": 3.9691319465637207, |
|
"learning_rate": 9.407569939659903e-06, |
|
"loss": 0.1542, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 81.32199670872188, |
|
"grad_norm": 4.031587600708008, |
|
"learning_rate": 9.339001645639057e-06, |
|
"loss": 0.1551, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 81.45913329676358, |
|
"grad_norm": 3.9734628200531006, |
|
"learning_rate": 9.270433351618212e-06, |
|
"loss": 0.1533, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 81.59626988480527, |
|
"grad_norm": 3.245915651321411, |
|
"learning_rate": 9.201865057597366e-06, |
|
"loss": 0.1573, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 81.73340647284695, |
|
"grad_norm": 3.699833393096924, |
|
"learning_rate": 9.133296763576522e-06, |
|
"loss": 0.1575, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 81.87054306088865, |
|
"grad_norm": 3.8309028148651123, |
|
"learning_rate": 9.064728469555677e-06, |
|
"loss": 0.1581, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 82.00767964893033, |
|
"grad_norm": 4.079482078552246, |
|
"learning_rate": 8.996160175534833e-06, |
|
"loss": 0.1593, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 82.14481623697202, |
|
"grad_norm": 3.2036027908325195, |
|
"learning_rate": 8.92759188151399e-06, |
|
"loss": 0.1485, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 82.28195282501372, |
|
"grad_norm": 3.7567873001098633, |
|
"learning_rate": 8.859023587493144e-06, |
|
"loss": 0.1502, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 82.4190894130554, |
|
"grad_norm": 4.391474723815918, |
|
"learning_rate": 8.7904552934723e-06, |
|
"loss": 0.1517, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 82.55622600109709, |
|
"grad_norm": 2.887322187423706, |
|
"learning_rate": 8.721886999451453e-06, |
|
"loss": 0.1535, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 82.69336258913879, |
|
"grad_norm": 3.5882978439331055, |
|
"learning_rate": 8.653318705430609e-06, |
|
"loss": 0.1525, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 82.83049917718047, |
|
"grad_norm": 3.38724946975708, |
|
"learning_rate": 8.584750411409765e-06, |
|
"loss": 0.1541, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 82.96763576522216, |
|
"grad_norm": 3.582343578338623, |
|
"learning_rate": 8.51618211738892e-06, |
|
"loss": 0.1539, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 83.10477235326385, |
|
"grad_norm": 3.700831413269043, |
|
"learning_rate": 8.447613823368076e-06, |
|
"loss": 0.1472, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 83.24190894130554, |
|
"grad_norm": 3.810107707977295, |
|
"learning_rate": 8.37904552934723e-06, |
|
"loss": 0.1438, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 83.37904552934722, |
|
"grad_norm": 3.461057424545288, |
|
"learning_rate": 8.310477235326386e-06, |
|
"loss": 0.1468, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 83.51618211738892, |
|
"grad_norm": 3.1016461849212646, |
|
"learning_rate": 8.24190894130554e-06, |
|
"loss": 0.147, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 83.6533187054306, |
|
"grad_norm": 3.615780830383301, |
|
"learning_rate": 8.173340647284695e-06, |
|
"loss": 0.1484, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 83.79045529347229, |
|
"grad_norm": 3.3265013694763184, |
|
"learning_rate": 8.104772353263851e-06, |
|
"loss": 0.1468, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 83.92759188151399, |
|
"grad_norm": 3.722999334335327, |
|
"learning_rate": 8.036204059243006e-06, |
|
"loss": 0.1503, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 84.06472846955567, |
|
"grad_norm": 3.4315872192382812, |
|
"learning_rate": 7.967635765222162e-06, |
|
"loss": 0.1465, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 84.20186505759737, |
|
"grad_norm": 3.664315700531006, |
|
"learning_rate": 7.899067471201317e-06, |
|
"loss": 0.1414, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 84.33900164563906, |
|
"grad_norm": 2.992607831954956, |
|
"learning_rate": 7.830499177180473e-06, |
|
"loss": 0.1412, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 84.47613823368074, |
|
"grad_norm": 3.560657024383545, |
|
"learning_rate": 7.761930883159627e-06, |
|
"loss": 0.1425, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 84.61327482172244, |
|
"grad_norm": 4.001883506774902, |
|
"learning_rate": 7.693362589138782e-06, |
|
"loss": 0.145, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 84.75041140976413, |
|
"grad_norm": 3.371948480606079, |
|
"learning_rate": 7.624794295117937e-06, |
|
"loss": 0.1476, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 84.88754799780581, |
|
"grad_norm": 3.9280834197998047, |
|
"learning_rate": 7.556226001097093e-06, |
|
"loss": 0.1455, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 85.02468458584751, |
|
"grad_norm": 3.2914552688598633, |
|
"learning_rate": 7.4876577070762485e-06, |
|
"loss": 0.1434, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 85.1618211738892, |
|
"grad_norm": 3.4161980152130127, |
|
"learning_rate": 7.419089413055404e-06, |
|
"loss": 0.138, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 85.29895776193088, |
|
"grad_norm": 3.9036171436309814, |
|
"learning_rate": 7.350521119034559e-06, |
|
"loss": 0.1393, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 85.43609434997258, |
|
"grad_norm": 3.8328452110290527, |
|
"learning_rate": 7.2819528250137145e-06, |
|
"loss": 0.1389, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 85.57323093801426, |
|
"grad_norm": 3.2638742923736572, |
|
"learning_rate": 7.21338453099287e-06, |
|
"loss": 0.1387, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 85.71036752605595, |
|
"grad_norm": 3.8440749645233154, |
|
"learning_rate": 7.144816236972024e-06, |
|
"loss": 0.1413, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 85.84750411409765, |
|
"grad_norm": 4.172990798950195, |
|
"learning_rate": 7.07624794295118e-06, |
|
"loss": 0.1409, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 85.98464070213933, |
|
"grad_norm": 3.7025864124298096, |
|
"learning_rate": 7.007679648930335e-06, |
|
"loss": 0.1412, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 86.12177729018102, |
|
"grad_norm": 2.4466094970703125, |
|
"learning_rate": 6.93911135490949e-06, |
|
"loss": 0.1346, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 86.25891387822271, |
|
"grad_norm": 3.610511541366577, |
|
"learning_rate": 6.870543060888646e-06, |
|
"loss": 0.1346, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 86.3960504662644, |
|
"grad_norm": 3.2303617000579834, |
|
"learning_rate": 6.801974766867801e-06, |
|
"loss": 0.1357, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 86.53318705430608, |
|
"grad_norm": 3.74819016456604, |
|
"learning_rate": 6.733406472846956e-06, |
|
"loss": 0.1377, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 86.67032364234778, |
|
"grad_norm": 3.3001086711883545, |
|
"learning_rate": 6.664838178826111e-06, |
|
"loss": 0.1374, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 86.80746023038947, |
|
"grad_norm": 3.8687660694122314, |
|
"learning_rate": 6.596269884805266e-06, |
|
"loss": 0.1381, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 86.94459681843115, |
|
"grad_norm": 3.627427101135254, |
|
"learning_rate": 6.527701590784421e-06, |
|
"loss": 0.1374, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 87.08173340647285, |
|
"grad_norm": 3.2286431789398193, |
|
"learning_rate": 6.459133296763577e-06, |
|
"loss": 0.1327, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 87.21886999451453, |
|
"grad_norm": 2.8570611476898193, |
|
"learning_rate": 6.390565002742732e-06, |
|
"loss": 0.1322, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 87.35600658255622, |
|
"grad_norm": 3.3692467212677, |
|
"learning_rate": 6.321996708721887e-06, |
|
"loss": 0.1315, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 87.49314317059792, |
|
"grad_norm": 3.5185604095458984, |
|
"learning_rate": 6.253428414701043e-06, |
|
"loss": 0.1327, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 87.6302797586396, |
|
"grad_norm": 3.416106700897217, |
|
"learning_rate": 6.184860120680198e-06, |
|
"loss": 0.1328, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 87.7674163466813, |
|
"grad_norm": 2.7670998573303223, |
|
"learning_rate": 6.116291826659353e-06, |
|
"loss": 0.1325, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 87.90455293472299, |
|
"grad_norm": 3.5294463634490967, |
|
"learning_rate": 6.047723532638509e-06, |
|
"loss": 0.1355, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 88.04168952276467, |
|
"grad_norm": 2.728625535964966, |
|
"learning_rate": 5.979155238617663e-06, |
|
"loss": 0.1316, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 88.17882611080637, |
|
"grad_norm": 3.675401449203491, |
|
"learning_rate": 5.9105869445968184e-06, |
|
"loss": 0.1279, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 88.31596269884805, |
|
"grad_norm": 3.3878486156463623, |
|
"learning_rate": 5.842018650575974e-06, |
|
"loss": 0.1286, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 88.45309928688974, |
|
"grad_norm": 3.215028762817383, |
|
"learning_rate": 5.773450356555129e-06, |
|
"loss": 0.1285, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 88.59023587493144, |
|
"grad_norm": 3.3920953273773193, |
|
"learning_rate": 5.704882062534284e-06, |
|
"loss": 0.1309, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 88.72737246297312, |
|
"grad_norm": 4.03735876083374, |
|
"learning_rate": 5.63631376851344e-06, |
|
"loss": 0.1288, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 88.8645090510148, |
|
"grad_norm": 3.8700907230377197, |
|
"learning_rate": 5.567745474492595e-06, |
|
"loss": 0.1314, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 89.0016456390565, |
|
"grad_norm": 3.8290393352508545, |
|
"learning_rate": 5.4991771804717495e-06, |
|
"loss": 0.1296, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 89.13878222709819, |
|
"grad_norm": 3.1456034183502197, |
|
"learning_rate": 5.430608886450905e-06, |
|
"loss": 0.1225, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 89.27591881513987, |
|
"grad_norm": 3.4296352863311768, |
|
"learning_rate": 5.362040592430061e-06, |
|
"loss": 0.1241, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 89.41305540318157, |
|
"grad_norm": 3.2781150341033936, |
|
"learning_rate": 5.293472298409216e-06, |
|
"loss": 0.1245, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 89.55019199122326, |
|
"grad_norm": 2.664435625076294, |
|
"learning_rate": 5.224904004388371e-06, |
|
"loss": 0.1266, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 89.68732857926494, |
|
"grad_norm": 4.348361015319824, |
|
"learning_rate": 5.156335710367526e-06, |
|
"loss": 0.127, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 89.82446516730664, |
|
"grad_norm": 3.075655698776245, |
|
"learning_rate": 5.0877674163466815e-06, |
|
"loss": 0.1255, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 89.96160175534833, |
|
"grad_norm": 3.5324909687042236, |
|
"learning_rate": 5.019199122325837e-06, |
|
"loss": 0.1269, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 90.09873834339001, |
|
"grad_norm": 2.883422374725342, |
|
"learning_rate": 4.950630828304992e-06, |
|
"loss": 0.1237, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 90.23587493143171, |
|
"grad_norm": 3.225177049636841, |
|
"learning_rate": 4.8820625342841474e-06, |
|
"loss": 0.1221, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 90.3730115194734, |
|
"grad_norm": 3.199986457824707, |
|
"learning_rate": 4.813494240263303e-06, |
|
"loss": 0.1225, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 90.51014810751508, |
|
"grad_norm": 3.1552860736846924, |
|
"learning_rate": 4.744925946242457e-06, |
|
"loss": 0.1231, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 90.64728469555678, |
|
"grad_norm": 3.491950750350952, |
|
"learning_rate": 4.6763576522216126e-06, |
|
"loss": 0.1227, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 90.78442128359846, |
|
"grad_norm": 3.0924017429351807, |
|
"learning_rate": 4.607789358200768e-06, |
|
"loss": 0.1235, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 90.92155787164015, |
|
"grad_norm": 3.1873390674591064, |
|
"learning_rate": 4.539221064179924e-06, |
|
"loss": 0.1217, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 91.05869445968185, |
|
"grad_norm": 3.9850494861602783, |
|
"learning_rate": 4.4706527701590785e-06, |
|
"loss": 0.122, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 91.19583104772353, |
|
"grad_norm": 3.238954782485962, |
|
"learning_rate": 4.402084476138234e-06, |
|
"loss": 0.1207, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 91.33296763576523, |
|
"grad_norm": 2.354977607727051, |
|
"learning_rate": 4.333516182117389e-06, |
|
"loss": 0.1207, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 91.47010422380691, |
|
"grad_norm": 3.9573888778686523, |
|
"learning_rate": 4.2649478880965445e-06, |
|
"loss": 0.1195, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 91.6072408118486, |
|
"grad_norm": 3.099452495574951, |
|
"learning_rate": 4.196379594075699e-06, |
|
"loss": 0.1199, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 91.7443773998903, |
|
"grad_norm": 2.886826753616333, |
|
"learning_rate": 4.127811300054855e-06, |
|
"loss": 0.1191, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 91.88151398793198, |
|
"grad_norm": 2.478618860244751, |
|
"learning_rate": 4.0592430060340105e-06, |
|
"loss": 0.121, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 92.01865057597367, |
|
"grad_norm": 3.46500301361084, |
|
"learning_rate": 3.990674712013166e-06, |
|
"loss": 0.1201, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 92.15578716401536, |
|
"grad_norm": 2.743831157684326, |
|
"learning_rate": 3.92210641799232e-06, |
|
"loss": 0.1162, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 92.29292375205705, |
|
"grad_norm": 3.4375343322753906, |
|
"learning_rate": 3.853538123971476e-06, |
|
"loss": 0.1159, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 92.43006034009873, |
|
"grad_norm": 3.173588991165161, |
|
"learning_rate": 3.7849698299506313e-06, |
|
"loss": 0.1183, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 92.56719692814043, |
|
"grad_norm": 3.2577898502349854, |
|
"learning_rate": 3.7164015359297867e-06, |
|
"loss": 0.1167, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 92.70433351618212, |
|
"grad_norm": 3.3100554943084717, |
|
"learning_rate": 3.647833241908941e-06, |
|
"loss": 0.1173, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 92.8414701042238, |
|
"grad_norm": 3.179342269897461, |
|
"learning_rate": 3.579264947888097e-06, |
|
"loss": 0.1165, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 92.9786066922655, |
|
"grad_norm": 3.096334218978882, |
|
"learning_rate": 3.510696653867252e-06, |
|
"loss": 0.1176, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 93.11574328030719, |
|
"grad_norm": 2.9532058238983154, |
|
"learning_rate": 3.4421283598464067e-06, |
|
"loss": 0.1142, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 93.25287986834887, |
|
"grad_norm": 3.717654228210449, |
|
"learning_rate": 3.3735600658255624e-06, |
|
"loss": 0.1143, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 93.39001645639057, |
|
"grad_norm": 3.084181308746338, |
|
"learning_rate": 3.3049917718047177e-06, |
|
"loss": 0.1146, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 93.52715304443225, |
|
"grad_norm": 3.636079788208008, |
|
"learning_rate": 3.236423477783873e-06, |
|
"loss": 0.1147, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 93.66428963247394, |
|
"grad_norm": 2.80279278755188, |
|
"learning_rate": 3.167855183763028e-06, |
|
"loss": 0.1153, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 93.80142622051564, |
|
"grad_norm": 2.7597951889038086, |
|
"learning_rate": 3.0992868897421833e-06, |
|
"loss": 0.1133, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 93.93856280855732, |
|
"grad_norm": 3.1757214069366455, |
|
"learning_rate": 3.0307185957213386e-06, |
|
"loss": 0.1159, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 94.075699396599, |
|
"grad_norm": 3.245447874069214, |
|
"learning_rate": 2.962150301700494e-06, |
|
"loss": 0.1129, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 94.2128359846407, |
|
"grad_norm": 2.7797350883483887, |
|
"learning_rate": 2.8935820076796493e-06, |
|
"loss": 0.1117, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 94.34997257268239, |
|
"grad_norm": 3.2236897945404053, |
|
"learning_rate": 2.825013713658804e-06, |
|
"loss": 0.112, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 94.48710916072407, |
|
"grad_norm": 3.792973756790161, |
|
"learning_rate": 2.7564454196379595e-06, |
|
"loss": 0.1118, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 94.62424574876577, |
|
"grad_norm": 2.6465868949890137, |
|
"learning_rate": 2.687877125617115e-06, |
|
"loss": 0.1124, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 94.76138233680746, |
|
"grad_norm": 2.944362163543701, |
|
"learning_rate": 2.61930883159627e-06, |
|
"loss": 0.113, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 94.89851892484916, |
|
"grad_norm": 3.0111756324768066, |
|
"learning_rate": 2.550740537575425e-06, |
|
"loss": 0.1114, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 95.03565551289084, |
|
"grad_norm": 3.691293954849243, |
|
"learning_rate": 2.4821722435545808e-06, |
|
"loss": 0.1119, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 95.17279210093253, |
|
"grad_norm": 2.5828378200531006, |
|
"learning_rate": 2.4136039495337357e-06, |
|
"loss": 0.1106, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 95.30992868897422, |
|
"grad_norm": 3.733536720275879, |
|
"learning_rate": 2.3450356555128906e-06, |
|
"loss": 0.1091, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 95.44706527701591, |
|
"grad_norm": 3.203916311264038, |
|
"learning_rate": 2.2764673614920463e-06, |
|
"loss": 0.1102, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 95.5842018650576, |
|
"grad_norm": 2.8628923892974854, |
|
"learning_rate": 2.2078990674712012e-06, |
|
"loss": 0.1123, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 95.72133845309929, |
|
"grad_norm": 3.761380195617676, |
|
"learning_rate": 2.1393307734503565e-06, |
|
"loss": 0.1097, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 95.85847504114098, |
|
"grad_norm": 2.951045036315918, |
|
"learning_rate": 2.070762479429512e-06, |
|
"loss": 0.1106, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 95.99561162918266, |
|
"grad_norm": 3.6867475509643555, |
|
"learning_rate": 2.002194185408667e-06, |
|
"loss": 0.1098, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 96.13274821722436, |
|
"grad_norm": 3.162787675857544, |
|
"learning_rate": 1.933625891387822e-06, |
|
"loss": 0.1077, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 96.26988480526605, |
|
"grad_norm": 3.5869784355163574, |
|
"learning_rate": 1.8650575973669776e-06, |
|
"loss": 0.1084, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 96.40702139330773, |
|
"grad_norm": 3.4423720836639404, |
|
"learning_rate": 1.7964893033461327e-06, |
|
"loss": 0.1075, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 96.54415798134943, |
|
"grad_norm": 3.4415297508239746, |
|
"learning_rate": 1.727921009325288e-06, |
|
"loss": 0.1081, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 96.68129456939111, |
|
"grad_norm": 2.9299986362457275, |
|
"learning_rate": 1.6593527153044432e-06, |
|
"loss": 0.1088, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 96.8184311574328, |
|
"grad_norm": 3.395812511444092, |
|
"learning_rate": 1.5907844212835987e-06, |
|
"loss": 0.1073, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 96.9555677454745, |
|
"grad_norm": 3.1126651763916016, |
|
"learning_rate": 1.5222161272627538e-06, |
|
"loss": 0.1086, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 97.09270433351618, |
|
"grad_norm": 2.898881435394287, |
|
"learning_rate": 1.453647833241909e-06, |
|
"loss": 0.1067, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 97.22984092155787, |
|
"grad_norm": 3.300261974334717, |
|
"learning_rate": 1.3850795392210643e-06, |
|
"loss": 0.1082, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 97.36697750959956, |
|
"grad_norm": 3.1039366722106934, |
|
"learning_rate": 1.3165112452002194e-06, |
|
"loss": 0.1073, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 97.50411409764125, |
|
"grad_norm": 3.544015645980835, |
|
"learning_rate": 1.2479429511793747e-06, |
|
"loss": 0.1075, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 97.64125068568293, |
|
"grad_norm": 2.692314624786377, |
|
"learning_rate": 1.17937465715853e-06, |
|
"loss": 0.1075, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 97.77838727372463, |
|
"grad_norm": 2.966008186340332, |
|
"learning_rate": 1.1108063631376851e-06, |
|
"loss": 0.1077, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 97.91552386176632, |
|
"grad_norm": 2.9783902168273926, |
|
"learning_rate": 1.0422380691168404e-06, |
|
"loss": 0.1076, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 98.052660449808, |
|
"grad_norm": 3.1313674449920654, |
|
"learning_rate": 9.736697750959958e-07, |
|
"loss": 0.1052, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 98.1897970378497, |
|
"grad_norm": 3.143101453781128, |
|
"learning_rate": 9.05101481075151e-07, |
|
"loss": 0.1038, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 98.32693362589139, |
|
"grad_norm": 3.30661940574646, |
|
"learning_rate": 8.365331870543062e-07, |
|
"loss": 0.1059, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 98.46407021393308, |
|
"grad_norm": 3.1109259128570557, |
|
"learning_rate": 7.679648930334613e-07, |
|
"loss": 0.1062, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 98.60120680197477, |
|
"grad_norm": 3.4787518978118896, |
|
"learning_rate": 6.993965990126166e-07, |
|
"loss": 0.1037, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 98.73834339001645, |
|
"grad_norm": 3.0321710109710693, |
|
"learning_rate": 6.308283049917719e-07, |
|
"loss": 0.1056, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 98.87547997805815, |
|
"grad_norm": 3.13843035697937, |
|
"learning_rate": 5.622600109709271e-07, |
|
"loss": 0.1058, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 99.01261656609984, |
|
"grad_norm": 2.8458125591278076, |
|
"learning_rate": 4.936917169500823e-07, |
|
"loss": 0.1051, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 99.14975315414152, |
|
"grad_norm": 2.781649589538574, |
|
"learning_rate": 4.2512342292923756e-07, |
|
"loss": 0.1039, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 99.28688974218322, |
|
"grad_norm": 3.680230140686035, |
|
"learning_rate": 3.565551289083928e-07, |
|
"loss": 0.1046, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 99.4240263302249, |
|
"grad_norm": 3.4057164192199707, |
|
"learning_rate": 2.87986834887548e-07, |
|
"loss": 0.1064, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 99.56116291826659, |
|
"grad_norm": 3.2353737354278564, |
|
"learning_rate": 2.1941854086670326e-07, |
|
"loss": 0.1056, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 99.69829950630829, |
|
"grad_norm": 3.273487091064453, |
|
"learning_rate": 1.5085024684585848e-07, |
|
"loss": 0.1034, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 99.83543609434997, |
|
"grad_norm": 2.404613733291626, |
|
"learning_rate": 8.228195282501371e-08, |
|
"loss": 0.1058, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 99.97257268239166, |
|
"grad_norm": 3.081162691116333, |
|
"learning_rate": 1.3713658804168954e-08, |
|
"loss": 0.1047, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 364600, |
|
"total_flos": 1.1071101974803907e+18, |
|
"train_loss": 1.123260397659967, |
|
"train_runtime": 96380.4057, |
|
"train_samples_per_second": 121.035, |
|
"train_steps_per_second": 3.783 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 364600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1071101974803907e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|