|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 4819, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00020751193193608634, |
|
"grad_norm": 23.81510217286444, |
|
"learning_rate": 2.0746887966804982e-08, |
|
"loss": 1.3923, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0010375596596804316, |
|
"grad_norm": 23.986426679578184, |
|
"learning_rate": 1.037344398340249e-07, |
|
"loss": 1.4149, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.002075119319360863, |
|
"grad_norm": 21.906176707284583, |
|
"learning_rate": 2.074688796680498e-07, |
|
"loss": 1.4046, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003112678979041295, |
|
"grad_norm": 15.167476223997951, |
|
"learning_rate": 3.112033195020747e-07, |
|
"loss": 1.3592, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004150238638721726, |
|
"grad_norm": 9.140589856353586, |
|
"learning_rate": 4.149377593360996e-07, |
|
"loss": 1.2548, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.005187798298402158, |
|
"grad_norm": 10.826361201441884, |
|
"learning_rate": 5.186721991701245e-07, |
|
"loss": 1.1628, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.00622535795808259, |
|
"grad_norm": 9.062979414726044, |
|
"learning_rate": 6.224066390041494e-07, |
|
"loss": 1.0633, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.007262917617763021, |
|
"grad_norm": 3.7471401092788903, |
|
"learning_rate": 7.261410788381744e-07, |
|
"loss": 1.0125, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.008300477277443452, |
|
"grad_norm": 3.4052281488293517, |
|
"learning_rate": 8.298755186721992e-07, |
|
"loss": 0.9673, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.009338036937123885, |
|
"grad_norm": 3.4155319860158584, |
|
"learning_rate": 9.336099585062241e-07, |
|
"loss": 0.9582, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.010375596596804317, |
|
"grad_norm": 3.0460439284849947, |
|
"learning_rate": 1.037344398340249e-06, |
|
"loss": 0.9393, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.011413156256484747, |
|
"grad_norm": 3.0573711664380117, |
|
"learning_rate": 1.141078838174274e-06, |
|
"loss": 0.928, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01245071591616518, |
|
"grad_norm": 3.104368834698698, |
|
"learning_rate": 1.2448132780082988e-06, |
|
"loss": 0.9128, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.013488275575845612, |
|
"grad_norm": 3.1380190764833094, |
|
"learning_rate": 1.3485477178423237e-06, |
|
"loss": 0.9156, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.014525835235526042, |
|
"grad_norm": 3.1006570296836182, |
|
"learning_rate": 1.4522821576763488e-06, |
|
"loss": 0.905, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.015563394895206474, |
|
"grad_norm": 3.127744100257649, |
|
"learning_rate": 1.5560165975103735e-06, |
|
"loss": 0.9163, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.016600954554886905, |
|
"grad_norm": 3.038437602227199, |
|
"learning_rate": 1.6597510373443984e-06, |
|
"loss": 0.8884, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.017638514214567337, |
|
"grad_norm": 2.9596644094641413, |
|
"learning_rate": 1.7634854771784235e-06, |
|
"loss": 0.8923, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.01867607387424777, |
|
"grad_norm": 2.958917381373649, |
|
"learning_rate": 1.8672199170124482e-06, |
|
"loss": 0.8939, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0197136335339282, |
|
"grad_norm": 3.13129582575627, |
|
"learning_rate": 1.970954356846473e-06, |
|
"loss": 0.8671, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.020751193193608634, |
|
"grad_norm": 3.212698731191082, |
|
"learning_rate": 2.074688796680498e-06, |
|
"loss": 0.8846, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.021788752853289062, |
|
"grad_norm": 2.9778959899019273, |
|
"learning_rate": 2.178423236514523e-06, |
|
"loss": 0.8747, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.022826312512969495, |
|
"grad_norm": 3.02722706158588, |
|
"learning_rate": 2.282157676348548e-06, |
|
"loss": 0.8841, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.023863872172649927, |
|
"grad_norm": 3.104436315782777, |
|
"learning_rate": 2.385892116182573e-06, |
|
"loss": 0.8646, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.02490143183233036, |
|
"grad_norm": 3.0389426041639847, |
|
"learning_rate": 2.4896265560165977e-06, |
|
"loss": 0.8769, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02593899149201079, |
|
"grad_norm": 3.0481198307330772, |
|
"learning_rate": 2.5933609958506228e-06, |
|
"loss": 0.8597, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.026976551151691223, |
|
"grad_norm": 3.02592811695882, |
|
"learning_rate": 2.6970954356846475e-06, |
|
"loss": 0.8668, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.028014110811371652, |
|
"grad_norm": 3.2120467143006954, |
|
"learning_rate": 2.8008298755186726e-06, |
|
"loss": 0.8651, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.029051670471052084, |
|
"grad_norm": 2.9463572893956633, |
|
"learning_rate": 2.9045643153526977e-06, |
|
"loss": 0.8648, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.030089230130732517, |
|
"grad_norm": 3.2030553684980054, |
|
"learning_rate": 3.008298755186722e-06, |
|
"loss": 0.861, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.03112678979041295, |
|
"grad_norm": 3.134802130941582, |
|
"learning_rate": 3.112033195020747e-06, |
|
"loss": 0.8572, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03216434945009338, |
|
"grad_norm": 3.0051811744191075, |
|
"learning_rate": 3.215767634854772e-06, |
|
"loss": 0.8548, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.03320190910977381, |
|
"grad_norm": 2.9087769254486013, |
|
"learning_rate": 3.319502074688797e-06, |
|
"loss": 0.857, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.034239468769454245, |
|
"grad_norm": 3.251836422833884, |
|
"learning_rate": 3.423236514522822e-06, |
|
"loss": 0.8479, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.035277028429134674, |
|
"grad_norm": 2.988883877751971, |
|
"learning_rate": 3.526970954356847e-06, |
|
"loss": 0.8573, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03631458808881511, |
|
"grad_norm": 2.9670335974516155, |
|
"learning_rate": 3.6307053941908714e-06, |
|
"loss": 0.8509, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03735214774849554, |
|
"grad_norm": 3.0847182089806333, |
|
"learning_rate": 3.7344398340248965e-06, |
|
"loss": 0.8421, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03838970740817597, |
|
"grad_norm": 2.8264906420734843, |
|
"learning_rate": 3.838174273858922e-06, |
|
"loss": 0.8484, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.0394272670678564, |
|
"grad_norm": 2.974170420918456, |
|
"learning_rate": 3.941908713692946e-06, |
|
"loss": 0.8474, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04046482672753683, |
|
"grad_norm": 3.077208581236518, |
|
"learning_rate": 4.045643153526971e-06, |
|
"loss": 0.8275, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.04150238638721727, |
|
"grad_norm": 3.134791812920161, |
|
"learning_rate": 4.149377593360996e-06, |
|
"loss": 0.8419, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.042539946046897696, |
|
"grad_norm": 3.2479409082629935, |
|
"learning_rate": 4.253112033195021e-06, |
|
"loss": 0.8466, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.043577505706578125, |
|
"grad_norm": 2.8988040172466767, |
|
"learning_rate": 4.356846473029046e-06, |
|
"loss": 0.8489, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04461506536625856, |
|
"grad_norm": 3.0923511376582797, |
|
"learning_rate": 4.460580912863071e-06, |
|
"loss": 0.8455, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.04565262502593899, |
|
"grad_norm": 3.1659114421543895, |
|
"learning_rate": 4.564315352697096e-06, |
|
"loss": 0.8366, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.046690184685619425, |
|
"grad_norm": 2.9510079953090953, |
|
"learning_rate": 4.66804979253112e-06, |
|
"loss": 0.8492, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.047727744345299854, |
|
"grad_norm": 2.9528441973334503, |
|
"learning_rate": 4.771784232365146e-06, |
|
"loss": 0.8317, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04876530400498029, |
|
"grad_norm": 3.009602261579148, |
|
"learning_rate": 4.875518672199171e-06, |
|
"loss": 0.8282, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.04980286366466072, |
|
"grad_norm": 3.0538602837457702, |
|
"learning_rate": 4.979253112033195e-06, |
|
"loss": 0.8379, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05084042332434115, |
|
"grad_norm": 3.067939083048346, |
|
"learning_rate": 5.08298755186722e-06, |
|
"loss": 0.8255, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.05187798298402158, |
|
"grad_norm": 2.8381217796695233, |
|
"learning_rate": 5.1867219917012455e-06, |
|
"loss": 0.8424, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05291554264370201, |
|
"grad_norm": 3.4784967911008247, |
|
"learning_rate": 5.29045643153527e-06, |
|
"loss": 0.8288, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.05395310230338245, |
|
"grad_norm": 3.308889130459959, |
|
"learning_rate": 5.394190871369295e-06, |
|
"loss": 0.8458, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.054990661963062876, |
|
"grad_norm": 3.043690822549995, |
|
"learning_rate": 5.4979253112033204e-06, |
|
"loss": 0.8233, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.056028221622743304, |
|
"grad_norm": 3.168880712945065, |
|
"learning_rate": 5.601659751037345e-06, |
|
"loss": 0.831, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05706578128242374, |
|
"grad_norm": 3.1149299465392644, |
|
"learning_rate": 5.70539419087137e-06, |
|
"loss": 0.8363, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.05810334094210417, |
|
"grad_norm": 3.1047167518774814, |
|
"learning_rate": 5.809128630705395e-06, |
|
"loss": 0.8391, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.059140900601784605, |
|
"grad_norm": 2.8999047365073927, |
|
"learning_rate": 5.91286307053942e-06, |
|
"loss": 0.8368, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.06017846026146503, |
|
"grad_norm": 3.066022536608264, |
|
"learning_rate": 6.016597510373444e-06, |
|
"loss": 0.8379, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06121601992114547, |
|
"grad_norm": 3.116983544786236, |
|
"learning_rate": 6.1203319502074694e-06, |
|
"loss": 0.8169, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.0622535795808259, |
|
"grad_norm": 7.508628361145243, |
|
"learning_rate": 6.224066390041494e-06, |
|
"loss": 0.8186, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06329113924050633, |
|
"grad_norm": 2.959364581244697, |
|
"learning_rate": 6.327800829875519e-06, |
|
"loss": 0.823, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.06432869890018676, |
|
"grad_norm": 3.3247616385013554, |
|
"learning_rate": 6.431535269709544e-06, |
|
"loss": 0.8336, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06536625855986719, |
|
"grad_norm": 2.97755825450701, |
|
"learning_rate": 6.535269709543569e-06, |
|
"loss": 0.8403, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.06640381821954762, |
|
"grad_norm": 2.920924218399819, |
|
"learning_rate": 6.639004149377594e-06, |
|
"loss": 0.8352, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06744137787922806, |
|
"grad_norm": 2.996226962381477, |
|
"learning_rate": 6.742738589211619e-06, |
|
"loss": 0.8149, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.06847893753890849, |
|
"grad_norm": 2.9599991745332743, |
|
"learning_rate": 6.846473029045644e-06, |
|
"loss": 0.8227, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06951649719858892, |
|
"grad_norm": 3.3083929869948374, |
|
"learning_rate": 6.950207468879669e-06, |
|
"loss": 0.8293, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.07055405685826935, |
|
"grad_norm": 2.921592069295814, |
|
"learning_rate": 7.053941908713694e-06, |
|
"loss": 0.8063, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07159161651794978, |
|
"grad_norm": 2.9356160908302296, |
|
"learning_rate": 7.157676348547719e-06, |
|
"loss": 0.8249, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.07262917617763022, |
|
"grad_norm": 3.127667490017175, |
|
"learning_rate": 7.261410788381743e-06, |
|
"loss": 0.805, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07366673583731065, |
|
"grad_norm": 2.811701116040028, |
|
"learning_rate": 7.365145228215769e-06, |
|
"loss": 0.8141, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.07470429549699108, |
|
"grad_norm": 3.428565494126869, |
|
"learning_rate": 7.468879668049793e-06, |
|
"loss": 0.8137, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.0757418551566715, |
|
"grad_norm": 3.0132405721717794, |
|
"learning_rate": 7.572614107883818e-06, |
|
"loss": 0.8121, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.07677941481635193, |
|
"grad_norm": 3.0044504000522045, |
|
"learning_rate": 7.676348547717844e-06, |
|
"loss": 0.8112, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.07781697447603238, |
|
"grad_norm": 2.938773166801736, |
|
"learning_rate": 7.780082987551869e-06, |
|
"loss": 0.8071, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.0788545341357128, |
|
"grad_norm": 2.816233639978962, |
|
"learning_rate": 7.883817427385892e-06, |
|
"loss": 0.8269, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07989209379539323, |
|
"grad_norm": 3.2016060279962297, |
|
"learning_rate": 7.987551867219918e-06, |
|
"loss": 0.8173, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.08092965345507366, |
|
"grad_norm": 3.0338701288383567, |
|
"learning_rate": 8.091286307053943e-06, |
|
"loss": 0.7997, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.08196721311475409, |
|
"grad_norm": 2.8900162172973025, |
|
"learning_rate": 8.195020746887967e-06, |
|
"loss": 0.8296, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.08300477277443453, |
|
"grad_norm": 2.775870448004777, |
|
"learning_rate": 8.298755186721992e-06, |
|
"loss": 0.8139, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08404233243411496, |
|
"grad_norm": 2.997946086685837, |
|
"learning_rate": 8.402489626556017e-06, |
|
"loss": 0.8244, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.08507989209379539, |
|
"grad_norm": 2.79051986833281, |
|
"learning_rate": 8.506224066390042e-06, |
|
"loss": 0.8207, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.08611745175347582, |
|
"grad_norm": 2.8248639051971334, |
|
"learning_rate": 8.609958506224068e-06, |
|
"loss": 0.805, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.08715501141315625, |
|
"grad_norm": 2.9096736006801485, |
|
"learning_rate": 8.713692946058093e-06, |
|
"loss": 0.8037, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08819257107283669, |
|
"grad_norm": 2.940919950600375, |
|
"learning_rate": 8.817427385892117e-06, |
|
"loss": 0.8186, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.08923013073251712, |
|
"grad_norm": 2.896130509805648, |
|
"learning_rate": 8.921161825726142e-06, |
|
"loss": 0.8245, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.09026769039219755, |
|
"grad_norm": 2.8401858033389713, |
|
"learning_rate": 9.024896265560167e-06, |
|
"loss": 0.8081, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.09130525005187798, |
|
"grad_norm": 2.86912546506336, |
|
"learning_rate": 9.128630705394191e-06, |
|
"loss": 0.8164, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.09234280971155842, |
|
"grad_norm": 2.7242600466916453, |
|
"learning_rate": 9.232365145228218e-06, |
|
"loss": 0.8073, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.09338036937123885, |
|
"grad_norm": 3.044646841531881, |
|
"learning_rate": 9.33609958506224e-06, |
|
"loss": 0.8145, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.09441792903091928, |
|
"grad_norm": 2.8295153855299198, |
|
"learning_rate": 9.439834024896265e-06, |
|
"loss": 0.7957, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.09545548869059971, |
|
"grad_norm": 2.910948125766166, |
|
"learning_rate": 9.543568464730292e-06, |
|
"loss": 0.8053, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.09649304835028014, |
|
"grad_norm": 2.8219575214924597, |
|
"learning_rate": 9.647302904564317e-06, |
|
"loss": 0.8128, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.09753060800996058, |
|
"grad_norm": 2.9677851670315167, |
|
"learning_rate": 9.751037344398341e-06, |
|
"loss": 0.8061, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.09856816766964101, |
|
"grad_norm": 3.1531286571785326, |
|
"learning_rate": 9.854771784232366e-06, |
|
"loss": 0.7962, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.09960572732932144, |
|
"grad_norm": 3.1075603430133105, |
|
"learning_rate": 9.95850622406639e-06, |
|
"loss": 0.8072, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.10064328698900186, |
|
"grad_norm": 2.844518989686556, |
|
"learning_rate": 9.99998819398724e-06, |
|
"loss": 0.8198, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.1016808466486823, |
|
"grad_norm": 2.8894422413511127, |
|
"learning_rate": 9.999916046333384e-06, |
|
"loss": 0.8146, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.10271840630836274, |
|
"grad_norm": 2.8815754440809878, |
|
"learning_rate": 9.999778310866921e-06, |
|
"loss": 0.7899, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.10375596596804317, |
|
"grad_norm": 2.885119932296298, |
|
"learning_rate": 9.999574989394634e-06, |
|
"loss": 0.8013, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1047935256277236, |
|
"grad_norm": 2.8642122606932174, |
|
"learning_rate": 9.99930608458365e-06, |
|
"loss": 0.805, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.10583108528740402, |
|
"grad_norm": 2.9310957652228082, |
|
"learning_rate": 9.998971599961405e-06, |
|
"loss": 0.7915, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.10686864494708445, |
|
"grad_norm": 2.8531320874329746, |
|
"learning_rate": 9.998571539915592e-06, |
|
"loss": 0.7981, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.1079062046067649, |
|
"grad_norm": 2.839970691252372, |
|
"learning_rate": 9.998105909694117e-06, |
|
"loss": 0.7999, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.10894376426644532, |
|
"grad_norm": 2.853880263858815, |
|
"learning_rate": 9.997574715405011e-06, |
|
"loss": 0.8311, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.10998132392612575, |
|
"grad_norm": 2.9338168378898337, |
|
"learning_rate": 9.996977964016371e-06, |
|
"loss": 0.8005, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.11101888358580618, |
|
"grad_norm": 2.7032896393481254, |
|
"learning_rate": 9.996315663356247e-06, |
|
"loss": 0.8003, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.11205644324548661, |
|
"grad_norm": 2.761074984380135, |
|
"learning_rate": 9.995587822112558e-06, |
|
"loss": 0.8044, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.11309400290516705, |
|
"grad_norm": 2.6815711646329556, |
|
"learning_rate": 9.994794449832966e-06, |
|
"loss": 0.7887, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.11413156256484748, |
|
"grad_norm": 2.8649570007362657, |
|
"learning_rate": 9.993935556924756e-06, |
|
"loss": 0.7776, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.11516912222452791, |
|
"grad_norm": 2.9788872729907547, |
|
"learning_rate": 9.993011154654702e-06, |
|
"loss": 0.7778, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.11620668188420834, |
|
"grad_norm": 2.771583627699142, |
|
"learning_rate": 9.992021255148907e-06, |
|
"loss": 0.7876, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.11724424154388878, |
|
"grad_norm": 3.156864019145989, |
|
"learning_rate": 9.990965871392662e-06, |
|
"loss": 0.7924, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.11828180120356921, |
|
"grad_norm": 2.6854537961356653, |
|
"learning_rate": 9.989845017230258e-06, |
|
"loss": 0.7841, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.11931936086324964, |
|
"grad_norm": 3.194287668643439, |
|
"learning_rate": 9.988658707364819e-06, |
|
"loss": 0.7807, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.12035692052293007, |
|
"grad_norm": 2.7638444846888173, |
|
"learning_rate": 9.9874069573581e-06, |
|
"loss": 0.7846, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.1213944801826105, |
|
"grad_norm": 2.7802968983308936, |
|
"learning_rate": 9.986089783630286e-06, |
|
"loss": 0.775, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.12243203984229094, |
|
"grad_norm": 2.8469947394820103, |
|
"learning_rate": 9.984707203459774e-06, |
|
"loss": 0.7672, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.12346959950197137, |
|
"grad_norm": 2.9990572740062493, |
|
"learning_rate": 9.983259234982951e-06, |
|
"loss": 0.7779, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.1245071591616518, |
|
"grad_norm": 3.1849226285333345, |
|
"learning_rate": 9.981745897193955e-06, |
|
"loss": 0.7714, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.12554471882133222, |
|
"grad_norm": 2.835199675947756, |
|
"learning_rate": 9.98016720994442e-06, |
|
"loss": 0.7784, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.12658227848101267, |
|
"grad_norm": 2.8294757615641886, |
|
"learning_rate": 9.978523193943222e-06, |
|
"loss": 0.7905, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.12761983814069308, |
|
"grad_norm": 2.6228325540550657, |
|
"learning_rate": 9.976813870756209e-06, |
|
"loss": 0.7695, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.12865739780037352, |
|
"grad_norm": 2.6754726256364902, |
|
"learning_rate": 9.975039262805907e-06, |
|
"loss": 0.7784, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.12969495746005397, |
|
"grad_norm": 2.6428193830815663, |
|
"learning_rate": 9.973199393371242e-06, |
|
"loss": 0.7768, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.13073251711973438, |
|
"grad_norm": 2.7019164377169824, |
|
"learning_rate": 9.97129428658722e-06, |
|
"loss": 0.7787, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.13177007677941482, |
|
"grad_norm": 2.7193094088840635, |
|
"learning_rate": 9.969323967444616e-06, |
|
"loss": 0.7691, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.13280763643909524, |
|
"grad_norm": 2.9547462574056227, |
|
"learning_rate": 9.96728846178965e-06, |
|
"loss": 0.7791, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.13384519609877568, |
|
"grad_norm": 2.7070337933579673, |
|
"learning_rate": 9.965187796323643e-06, |
|
"loss": 0.7793, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.13488275575845612, |
|
"grad_norm": 2.846568092264262, |
|
"learning_rate": 9.96302199860267e-06, |
|
"loss": 0.7657, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.13592031541813654, |
|
"grad_norm": 2.6430732770071885, |
|
"learning_rate": 9.96079109703719e-06, |
|
"loss": 0.7613, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.13695787507781698, |
|
"grad_norm": 2.564352165321478, |
|
"learning_rate": 9.95849512089169e-06, |
|
"loss": 0.7716, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.1379954347374974, |
|
"grad_norm": 2.787666582812226, |
|
"learning_rate": 9.956134100284285e-06, |
|
"loss": 0.7788, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.13903299439717784, |
|
"grad_norm": 2.565898584923949, |
|
"learning_rate": 9.95370806618633e-06, |
|
"loss": 0.7612, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.14007055405685828, |
|
"grad_norm": 2.9186277859262244, |
|
"learning_rate": 9.951217050422013e-06, |
|
"loss": 0.7787, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.1411081137165387, |
|
"grad_norm": 2.7573443860042546, |
|
"learning_rate": 9.94866108566794e-06, |
|
"loss": 0.7556, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.14214567337621914, |
|
"grad_norm": 2.840469194753, |
|
"learning_rate": 9.946040205452699e-06, |
|
"loss": 0.7456, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.14318323303589955, |
|
"grad_norm": 2.55261542401345, |
|
"learning_rate": 9.943354444156428e-06, |
|
"loss": 0.7789, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.14422079269558, |
|
"grad_norm": 2.6744948199976535, |
|
"learning_rate": 9.940603837010358e-06, |
|
"loss": 0.773, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.14525835235526044, |
|
"grad_norm": 2.7013083342024107, |
|
"learning_rate": 9.937788420096362e-06, |
|
"loss": 0.7735, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.14629591201494085, |
|
"grad_norm": 2.731487086002391, |
|
"learning_rate": 9.934908230346462e-06, |
|
"loss": 0.7523, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.1473334716746213, |
|
"grad_norm": 2.6033671380903334, |
|
"learning_rate": 9.931963305542363e-06, |
|
"loss": 0.7517, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1483710313343017, |
|
"grad_norm": 2.662624062037032, |
|
"learning_rate": 9.92895368431495e-06, |
|
"loss": 0.7659, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.14940859099398215, |
|
"grad_norm": 3.714026552589638, |
|
"learning_rate": 9.925879406143779e-06, |
|
"loss": 0.7646, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1504461506536626, |
|
"grad_norm": 3.3049933455540073, |
|
"learning_rate": 9.922740511356565e-06, |
|
"loss": 0.7681, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.151483710313343, |
|
"grad_norm": 2.7320515915400816, |
|
"learning_rate": 9.919537041128647e-06, |
|
"loss": 0.746, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.15252126997302345, |
|
"grad_norm": 2.635817269274888, |
|
"learning_rate": 9.916269037482452e-06, |
|
"loss": 0.7306, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.15355882963270387, |
|
"grad_norm": 2.662103702116443, |
|
"learning_rate": 9.912936543286939e-06, |
|
"loss": 0.7536, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1545963892923843, |
|
"grad_norm": 2.85326362399482, |
|
"learning_rate": 9.909539602257048e-06, |
|
"loss": 0.7673, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.15563394895206475, |
|
"grad_norm": 2.567231081949611, |
|
"learning_rate": 9.90607825895311e-06, |
|
"loss": 0.738, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.15667150861174517, |
|
"grad_norm": 2.869479144385386, |
|
"learning_rate": 9.902552558780276e-06, |
|
"loss": 0.7598, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.1577090682714256, |
|
"grad_norm": 2.730929830901487, |
|
"learning_rate": 9.898962547987913e-06, |
|
"loss": 0.748, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.15874662793110603, |
|
"grad_norm": 2.793985046920194, |
|
"learning_rate": 9.895308273669007e-06, |
|
"loss": 0.7328, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.15978418759078647, |
|
"grad_norm": 2.7966738638089246, |
|
"learning_rate": 9.89158978375953e-06, |
|
"loss": 0.7676, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1608217472504669, |
|
"grad_norm": 2.6679897290967576, |
|
"learning_rate": 9.887807127037827e-06, |
|
"loss": 0.7295, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.16185930691014733, |
|
"grad_norm": 2.5457872650558455, |
|
"learning_rate": 9.88396035312397e-06, |
|
"loss": 0.728, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.16289686656982777, |
|
"grad_norm": 2.8332820570127626, |
|
"learning_rate": 9.880049512479097e-06, |
|
"loss": 0.7421, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.16393442622950818, |
|
"grad_norm": 2.7639539461730114, |
|
"learning_rate": 9.876074656404773e-06, |
|
"loss": 0.7534, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.16497198588918863, |
|
"grad_norm": 2.7213075641667928, |
|
"learning_rate": 9.872035837042292e-06, |
|
"loss": 0.7363, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.16600954554886907, |
|
"grad_norm": 2.6178852546410183, |
|
"learning_rate": 9.86793310737201e-06, |
|
"loss": 0.7318, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.16704710520854948, |
|
"grad_norm": 2.7566382614918585, |
|
"learning_rate": 9.863766521212646e-06, |
|
"loss": 0.7507, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.16808466486822993, |
|
"grad_norm": 2.75430515948912, |
|
"learning_rate": 9.859536133220569e-06, |
|
"loss": 0.7481, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.16912222452791034, |
|
"grad_norm": 2.7773285238684813, |
|
"learning_rate": 9.855241998889091e-06, |
|
"loss": 0.7456, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.17015978418759078, |
|
"grad_norm": 2.9942393187975904, |
|
"learning_rate": 9.850884174547734e-06, |
|
"loss": 0.7512, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.17119734384727123, |
|
"grad_norm": 2.8176703833149706, |
|
"learning_rate": 9.846462717361489e-06, |
|
"loss": 0.7229, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.17223490350695164, |
|
"grad_norm": 2.606112734187681, |
|
"learning_rate": 9.841977685330074e-06, |
|
"loss": 0.7544, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.17327246316663208, |
|
"grad_norm": 2.7643282324128324, |
|
"learning_rate": 9.837429137287164e-06, |
|
"loss": 0.7233, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.1743100228263125, |
|
"grad_norm": 2.58906230921786, |
|
"learning_rate": 9.832817132899622e-06, |
|
"loss": 0.7496, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.17534758248599294, |
|
"grad_norm": 2.9173083959154913, |
|
"learning_rate": 9.828141732666722e-06, |
|
"loss": 0.7405, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.17638514214567338, |
|
"grad_norm": 2.807434396354819, |
|
"learning_rate": 9.823402997919346e-06, |
|
"loss": 0.7032, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1774227018053538, |
|
"grad_norm": 2.7235521219589343, |
|
"learning_rate": 9.818600990819193e-06, |
|
"loss": 0.7162, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.17846026146503424, |
|
"grad_norm": 3.0405865223857247, |
|
"learning_rate": 9.813735774357942e-06, |
|
"loss": 0.7286, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.17949782112471468, |
|
"grad_norm": 2.6580664453159786, |
|
"learning_rate": 9.80880741235645e-06, |
|
"loss": 0.7153, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.1805353807843951, |
|
"grad_norm": 2.6234079932920764, |
|
"learning_rate": 9.803815969463898e-06, |
|
"loss": 0.7267, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.18157294044407554, |
|
"grad_norm": 2.7632489453595457, |
|
"learning_rate": 9.798761511156948e-06, |
|
"loss": 0.7198, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.18261050010375596, |
|
"grad_norm": 2.607364882820461, |
|
"learning_rate": 9.79364410373889e-06, |
|
"loss": 0.7197, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.1836480597634364, |
|
"grad_norm": 2.7165078647946856, |
|
"learning_rate": 9.78846381433876e-06, |
|
"loss": 0.7311, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.18468561942311684, |
|
"grad_norm": 2.510454703638443, |
|
"learning_rate": 9.783220710910471e-06, |
|
"loss": 0.7318, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.18572317908279726, |
|
"grad_norm": 2.638416867941834, |
|
"learning_rate": 9.777914862231912e-06, |
|
"loss": 0.73, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.1867607387424777, |
|
"grad_norm": 2.7242863471244148, |
|
"learning_rate": 9.772546337904054e-06, |
|
"loss": 0.7191, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18779829840215811, |
|
"grad_norm": 2.6641307804734806, |
|
"learning_rate": 9.767115208350035e-06, |
|
"loss": 0.7207, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.18883585806183856, |
|
"grad_norm": 2.4803694238796905, |
|
"learning_rate": 9.761621544814232e-06, |
|
"loss": 0.7366, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.189873417721519, |
|
"grad_norm": 2.54201756384705, |
|
"learning_rate": 9.756065419361329e-06, |
|
"loss": 0.6971, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.19091097738119941, |
|
"grad_norm": 2.663357207599627, |
|
"learning_rate": 9.750446904875374e-06, |
|
"loss": 0.7093, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.19194853704087986, |
|
"grad_norm": 2.665290508123204, |
|
"learning_rate": 9.744766075058817e-06, |
|
"loss": 0.7092, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.19298609670056027, |
|
"grad_norm": 2.561102249391226, |
|
"learning_rate": 9.739023004431553e-06, |
|
"loss": 0.7022, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.19402365636024071, |
|
"grad_norm": 2.8532699081444344, |
|
"learning_rate": 9.733217768329934e-06, |
|
"loss": 0.7125, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.19506121601992116, |
|
"grad_norm": 2.658165458053829, |
|
"learning_rate": 9.727350442905786e-06, |
|
"loss": 0.713, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.19609877567960157, |
|
"grad_norm": 2.8346689183428233, |
|
"learning_rate": 9.721421105125409e-06, |
|
"loss": 0.7111, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.19713633533928202, |
|
"grad_norm": 2.691363256166988, |
|
"learning_rate": 9.715429832768566e-06, |
|
"loss": 0.6997, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.19817389499896243, |
|
"grad_norm": 2.670459556620767, |
|
"learning_rate": 9.709376704427471e-06, |
|
"loss": 0.7002, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.19921145465864287, |
|
"grad_norm": 2.8298140051123624, |
|
"learning_rate": 9.703261799505743e-06, |
|
"loss": 0.6919, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.20024901431832332, |
|
"grad_norm": 2.690706447862383, |
|
"learning_rate": 9.697085198217378e-06, |
|
"loss": 0.6951, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.20128657397800373, |
|
"grad_norm": 2.6190530004747536, |
|
"learning_rate": 9.690846981585689e-06, |
|
"loss": 0.7088, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.20232413363768417, |
|
"grad_norm": 2.896807508207353, |
|
"learning_rate": 9.684547231442248e-06, |
|
"loss": 0.7036, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.2033616932973646, |
|
"grad_norm": 2.653583975726927, |
|
"learning_rate": 9.678186030425806e-06, |
|
"loss": 0.7014, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.20439925295704503, |
|
"grad_norm": 2.7468040879725857, |
|
"learning_rate": 9.67176346198122e-06, |
|
"loss": 0.6887, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.20543681261672547, |
|
"grad_norm": 2.6425850355422607, |
|
"learning_rate": 9.665279610358347e-06, |
|
"loss": 0.6912, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2064743722764059, |
|
"grad_norm": 2.607466586766701, |
|
"learning_rate": 9.658734560610942e-06, |
|
"loss": 0.6986, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.20751193193608633, |
|
"grad_norm": 2.5797036833444684, |
|
"learning_rate": 9.652128398595548e-06, |
|
"loss": 0.6893, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.20854949159576675, |
|
"grad_norm": 2.627466198960634, |
|
"learning_rate": 9.645461210970363e-06, |
|
"loss": 0.6939, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.2095870512554472, |
|
"grad_norm": 2.7514054708664486, |
|
"learning_rate": 9.638733085194105e-06, |
|
"loss": 0.6879, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.21062461091512763, |
|
"grad_norm": 2.762232274342595, |
|
"learning_rate": 9.631944109524867e-06, |
|
"loss": 0.7206, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.21166217057480805, |
|
"grad_norm": 2.573942783710098, |
|
"learning_rate": 9.625094373018957e-06, |
|
"loss": 0.672, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.2126997302344885, |
|
"grad_norm": 2.9862062253264154, |
|
"learning_rate": 9.61818396552973e-06, |
|
"loss": 0.7027, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.2137372898941689, |
|
"grad_norm": 2.604158278569754, |
|
"learning_rate": 9.61121297770641e-06, |
|
"loss": 0.6832, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.21477484955384935, |
|
"grad_norm": 2.847316696943195, |
|
"learning_rate": 9.604181500992904e-06, |
|
"loss": 0.6799, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.2158124092135298, |
|
"grad_norm": 2.779271975810847, |
|
"learning_rate": 9.597089627626594e-06, |
|
"loss": 0.6804, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2168499688732102, |
|
"grad_norm": 2.5638323571099546, |
|
"learning_rate": 9.589937450637134e-06, |
|
"loss": 0.6837, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.21788752853289065, |
|
"grad_norm": 2.7020801988998384, |
|
"learning_rate": 9.58272506384523e-06, |
|
"loss": 0.684, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.21892508819257106, |
|
"grad_norm": 2.549003888633134, |
|
"learning_rate": 9.5754525618614e-06, |
|
"loss": 0.6871, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.2199626478522515, |
|
"grad_norm": 2.8154214183624284, |
|
"learning_rate": 9.568120040084752e-06, |
|
"loss": 0.6652, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.22100020751193195, |
|
"grad_norm": 2.6474463880567884, |
|
"learning_rate": 9.56072759470171e-06, |
|
"loss": 0.6896, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.22203776717161236, |
|
"grad_norm": 2.623449606437885, |
|
"learning_rate": 9.553275322684769e-06, |
|
"loss": 0.6731, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2230753268312928, |
|
"grad_norm": 2.5852337807326324, |
|
"learning_rate": 9.545763321791213e-06, |
|
"loss": 0.6914, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.22411288649097322, |
|
"grad_norm": 2.5768442254313424, |
|
"learning_rate": 9.538191690561838e-06, |
|
"loss": 0.6827, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.22515044615065366, |
|
"grad_norm": 2.616849381186977, |
|
"learning_rate": 9.530560528319657e-06, |
|
"loss": 0.6861, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.2261880058103341, |
|
"grad_norm": 2.542283706279765, |
|
"learning_rate": 9.522869935168601e-06, |
|
"loss": 0.6673, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.22722556547001452, |
|
"grad_norm": 2.6428940149632916, |
|
"learning_rate": 9.515120011992199e-06, |
|
"loss": 0.6595, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.22826312512969496, |
|
"grad_norm": 2.6516305122608324, |
|
"learning_rate": 9.507310860452258e-06, |
|
"loss": 0.6508, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.22930068478937538, |
|
"grad_norm": 2.618199521456939, |
|
"learning_rate": 9.499442582987535e-06, |
|
"loss": 0.672, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.23033824444905582, |
|
"grad_norm": 2.580520201322647, |
|
"learning_rate": 9.491515282812383e-06, |
|
"loss": 0.6798, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.23137580410873626, |
|
"grad_norm": 2.5578402107468285, |
|
"learning_rate": 9.483529063915405e-06, |
|
"loss": 0.6575, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.23241336376841668, |
|
"grad_norm": 2.5898669244363743, |
|
"learning_rate": 9.475484031058081e-06, |
|
"loss": 0.6686, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.23345092342809712, |
|
"grad_norm": 2.581092169980836, |
|
"learning_rate": 9.46738028977341e-06, |
|
"loss": 0.676, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.23448848308777756, |
|
"grad_norm": 2.564857684910852, |
|
"learning_rate": 9.459217946364508e-06, |
|
"loss": 0.6603, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.23552604274745798, |
|
"grad_norm": 2.6352764131909394, |
|
"learning_rate": 9.450997107903222e-06, |
|
"loss": 0.673, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.23656360240713842, |
|
"grad_norm": 2.7623386766168494, |
|
"learning_rate": 9.442717882228727e-06, |
|
"loss": 0.6713, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.23760116206681883, |
|
"grad_norm": 2.646657632225148, |
|
"learning_rate": 9.434380377946104e-06, |
|
"loss": 0.6714, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.23863872172649928, |
|
"grad_norm": 2.547166874529112, |
|
"learning_rate": 9.425984704424927e-06, |
|
"loss": 0.6664, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.23967628138617972, |
|
"grad_norm": 2.5806952200679225, |
|
"learning_rate": 9.417530971797812e-06, |
|
"loss": 0.6733, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.24071384104586013, |
|
"grad_norm": 2.574640603606341, |
|
"learning_rate": 9.409019290958993e-06, |
|
"loss": 0.6737, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.24175140070554058, |
|
"grad_norm": 2.5060886593359113, |
|
"learning_rate": 9.400449773562849e-06, |
|
"loss": 0.6762, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.242788960365221, |
|
"grad_norm": 2.716564112603813, |
|
"learning_rate": 9.391822532022445e-06, |
|
"loss": 0.6551, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.24382652002490143, |
|
"grad_norm": 2.7171895315845216, |
|
"learning_rate": 9.383137679508063e-06, |
|
"loss": 0.6561, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.24486407968458188, |
|
"grad_norm": 2.58866525923876, |
|
"learning_rate": 9.374395329945714e-06, |
|
"loss": 0.6586, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.2459016393442623, |
|
"grad_norm": 2.641911339118785, |
|
"learning_rate": 9.365595598015635e-06, |
|
"loss": 0.6879, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.24693919900394273, |
|
"grad_norm": 2.6209891495095716, |
|
"learning_rate": 9.356738599150805e-06, |
|
"loss": 0.6562, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.24797675866362315, |
|
"grad_norm": 2.537931458295151, |
|
"learning_rate": 9.347824449535406e-06, |
|
"loss": 0.671, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.2490143183233036, |
|
"grad_norm": 2.6373525596001777, |
|
"learning_rate": 9.338853266103318e-06, |
|
"loss": 0.6469, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.250051877982984, |
|
"grad_norm": 2.49609290349947, |
|
"learning_rate": 9.329825166536578e-06, |
|
"loss": 0.6494, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.25108943764266445, |
|
"grad_norm": 2.597719369713314, |
|
"learning_rate": 9.32074026926383e-06, |
|
"loss": 0.6644, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2521269973023449, |
|
"grad_norm": 2.622902413291871, |
|
"learning_rate": 9.31159869345879e-06, |
|
"loss": 0.6277, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.25316455696202533, |
|
"grad_norm": 2.4773937545219615, |
|
"learning_rate": 9.302400559038658e-06, |
|
"loss": 0.6435, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.2542021166217057, |
|
"grad_norm": 2.417435834815959, |
|
"learning_rate": 9.293145986662567e-06, |
|
"loss": 0.6551, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.25523967628138616, |
|
"grad_norm": 2.7620863467259134, |
|
"learning_rate": 9.283835097729984e-06, |
|
"loss": 0.6524, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.2562772359410666, |
|
"grad_norm": 2.661471370041919, |
|
"learning_rate": 9.27446801437913e-06, |
|
"loss": 0.6564, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.25731479560074705, |
|
"grad_norm": 2.6022353359733996, |
|
"learning_rate": 9.265044859485369e-06, |
|
"loss": 0.6504, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.2583523552604275, |
|
"grad_norm": 2.67544454301597, |
|
"learning_rate": 9.2555657566596e-06, |
|
"loss": 0.6379, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.25938991492010793, |
|
"grad_norm": 2.7294894592664742, |
|
"learning_rate": 9.246030830246633e-06, |
|
"loss": 0.653, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2604274745797883, |
|
"grad_norm": 2.4934502710929642, |
|
"learning_rate": 9.236440205323564e-06, |
|
"loss": 0.6504, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.26146503423946876, |
|
"grad_norm": 2.732333826367612, |
|
"learning_rate": 9.226794007698128e-06, |
|
"loss": 0.6417, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.2625025938991492, |
|
"grad_norm": 2.6288344277507023, |
|
"learning_rate": 9.217092363907047e-06, |
|
"loss": 0.6193, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.26354015355882965, |
|
"grad_norm": 2.7169648920990483, |
|
"learning_rate": 9.207335401214379e-06, |
|
"loss": 0.6536, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.2645777132185101, |
|
"grad_norm": 2.520924205856965, |
|
"learning_rate": 9.197523247609839e-06, |
|
"loss": 0.6375, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.2656152728781905, |
|
"grad_norm": 2.515144443600501, |
|
"learning_rate": 9.187656031807129e-06, |
|
"loss": 0.6442, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.2666528325378709, |
|
"grad_norm": 2.5704491825236473, |
|
"learning_rate": 9.177733883242244e-06, |
|
"loss": 0.6586, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.26769039219755136, |
|
"grad_norm": 2.8282989819254634, |
|
"learning_rate": 9.167756932071769e-06, |
|
"loss": 0.6609, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.2687279518572318, |
|
"grad_norm": 2.485389299186558, |
|
"learning_rate": 9.157725309171183e-06, |
|
"loss": 0.6459, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.26976551151691225, |
|
"grad_norm": 2.7788274917257327, |
|
"learning_rate": 9.147639146133142e-06, |
|
"loss": 0.6433, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.27080307117659264, |
|
"grad_norm": 2.521611630972388, |
|
"learning_rate": 9.137498575265736e-06, |
|
"loss": 0.6271, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.2718406308362731, |
|
"grad_norm": 2.67661014478125, |
|
"learning_rate": 9.12730372959077e-06, |
|
"loss": 0.6551, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.2728781904959535, |
|
"grad_norm": 2.678821798528819, |
|
"learning_rate": 9.11705474284202e-06, |
|
"loss": 0.6275, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.27391575015563396, |
|
"grad_norm": 2.5914261808444574, |
|
"learning_rate": 9.106751749463463e-06, |
|
"loss": 0.6401, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.2749533098153144, |
|
"grad_norm": 2.5454105011810664, |
|
"learning_rate": 9.09639488460753e-06, |
|
"loss": 0.6275, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.2759908694749948, |
|
"grad_norm": 2.746226513585246, |
|
"learning_rate": 9.08598428413333e-06, |
|
"loss": 0.6199, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.27702842913467524, |
|
"grad_norm": 2.4978350247576886, |
|
"learning_rate": 9.075520084604849e-06, |
|
"loss": 0.6081, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.2780659887943557, |
|
"grad_norm": 2.565415348716322, |
|
"learning_rate": 9.065002423289189e-06, |
|
"loss": 0.6117, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.2791035484540361, |
|
"grad_norm": 2.5742091961939892, |
|
"learning_rate": 9.054431438154745e-06, |
|
"loss": 0.613, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.28014110811371656, |
|
"grad_norm": 2.5626823209269642, |
|
"learning_rate": 9.043807267869403e-06, |
|
"loss": 0.624, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.28117866777339695, |
|
"grad_norm": 2.4910808560943942, |
|
"learning_rate": 9.033130051798725e-06, |
|
"loss": 0.6314, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.2822162274330774, |
|
"grad_norm": 2.4573499982125524, |
|
"learning_rate": 9.022399930004106e-06, |
|
"loss": 0.625, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.28325378709275784, |
|
"grad_norm": 2.531960067205625, |
|
"learning_rate": 9.011617043240956e-06, |
|
"loss": 0.6261, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.2842913467524383, |
|
"grad_norm": 2.6336708253712917, |
|
"learning_rate": 9.000781532956844e-06, |
|
"loss": 0.6057, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.2853289064121187, |
|
"grad_norm": 2.5416291249522063, |
|
"learning_rate": 8.989893541289636e-06, |
|
"loss": 0.6114, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.2863664660717991, |
|
"grad_norm": 2.6016836625846227, |
|
"learning_rate": 8.978953211065645e-06, |
|
"loss": 0.6308, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.28740402573147955, |
|
"grad_norm": 2.670421258864357, |
|
"learning_rate": 8.96796068579774e-06, |
|
"loss": 0.6373, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.28844158539116, |
|
"grad_norm": 2.6256018056635195, |
|
"learning_rate": 8.956916109683488e-06, |
|
"loss": 0.6136, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.28947914505084044, |
|
"grad_norm": 2.665986383822575, |
|
"learning_rate": 8.945819627603235e-06, |
|
"loss": 0.6294, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.2905167047105209, |
|
"grad_norm": 2.620184006600422, |
|
"learning_rate": 8.934671385118224e-06, |
|
"loss": 0.6154, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.29155426437020127, |
|
"grad_norm": 2.5773178200606797, |
|
"learning_rate": 8.923471528468675e-06, |
|
"loss": 0.6263, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.2925918240298817, |
|
"grad_norm": 2.528035051025028, |
|
"learning_rate": 8.912220204571878e-06, |
|
"loss": 0.6139, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.29362938368956215, |
|
"grad_norm": 2.6830704473334053, |
|
"learning_rate": 8.900917561020255e-06, |
|
"loss": 0.6256, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.2946669433492426, |
|
"grad_norm": 2.5701558374316256, |
|
"learning_rate": 8.889563746079428e-06, |
|
"loss": 0.6163, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.29570450300892304, |
|
"grad_norm": 2.542727546558585, |
|
"learning_rate": 8.878158908686276e-06, |
|
"loss": 0.6214, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.2967420626686034, |
|
"grad_norm": 2.6289522750495684, |
|
"learning_rate": 8.86670319844698e-06, |
|
"loss": 0.6184, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.29777962232828387, |
|
"grad_norm": 2.605979434187302, |
|
"learning_rate": 8.855196765635055e-06, |
|
"loss": 0.6148, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.2988171819879643, |
|
"grad_norm": 2.5378051705763136, |
|
"learning_rate": 8.843639761189392e-06, |
|
"loss": 0.6309, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.29985474164764475, |
|
"grad_norm": 3.122542869019648, |
|
"learning_rate": 8.83203233671226e-06, |
|
"loss": 0.6148, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.3008923013073252, |
|
"grad_norm": 2.7847080279432355, |
|
"learning_rate": 8.820374644467334e-06, |
|
"loss": 0.6149, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3019298609670056, |
|
"grad_norm": 2.7571745781556176, |
|
"learning_rate": 8.808666837377688e-06, |
|
"loss": 0.6043, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.302967420626686, |
|
"grad_norm": 2.6729505157429645, |
|
"learning_rate": 8.796909069023793e-06, |
|
"loss": 0.6091, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.30400498028636647, |
|
"grad_norm": 2.5670622136089123, |
|
"learning_rate": 8.7851014936415e-06, |
|
"loss": 0.5973, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.3050425399460469, |
|
"grad_norm": 2.5809462309222586, |
|
"learning_rate": 8.77324426612002e-06, |
|
"loss": 0.601, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.30608009960572735, |
|
"grad_norm": 2.6928125038468695, |
|
"learning_rate": 8.761337541999884e-06, |
|
"loss": 0.603, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.30711765926540774, |
|
"grad_norm": 2.5859004844354887, |
|
"learning_rate": 8.749381477470915e-06, |
|
"loss": 0.5902, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3081552189250882, |
|
"grad_norm": 2.706857250295533, |
|
"learning_rate": 8.73737622937017e-06, |
|
"loss": 0.6068, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.3091927785847686, |
|
"grad_norm": 2.5825402406347995, |
|
"learning_rate": 8.725321955179886e-06, |
|
"loss": 0.5943, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.31023033824444907, |
|
"grad_norm": 2.564859545646307, |
|
"learning_rate": 8.713218813025412e-06, |
|
"loss": 0.6166, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.3112678979041295, |
|
"grad_norm": 2.4788537811048514, |
|
"learning_rate": 8.70106696167314e-06, |
|
"loss": 0.6107, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3123054575638099, |
|
"grad_norm": 2.5086302932261857, |
|
"learning_rate": 8.688866560528414e-06, |
|
"loss": 0.5953, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.31334301722349034, |
|
"grad_norm": 2.641157195225993, |
|
"learning_rate": 8.676617769633449e-06, |
|
"loss": 0.5942, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3143805768831708, |
|
"grad_norm": 2.5041601695953952, |
|
"learning_rate": 8.66432074966522e-06, |
|
"loss": 0.614, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.3154181365428512, |
|
"grad_norm": 2.6463153946217988, |
|
"learning_rate": 8.651975661933368e-06, |
|
"loss": 0.6046, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.31645569620253167, |
|
"grad_norm": 2.5329735196813186, |
|
"learning_rate": 8.639582668378068e-06, |
|
"loss": 0.5939, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.31749325586221205, |
|
"grad_norm": 2.626444758743183, |
|
"learning_rate": 8.627141931567918e-06, |
|
"loss": 0.5955, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3185308155218925, |
|
"grad_norm": 2.4901380695636672, |
|
"learning_rate": 8.614653614697804e-06, |
|
"loss": 0.5887, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.31956837518157294, |
|
"grad_norm": 2.562038119980849, |
|
"learning_rate": 8.602117881586748e-06, |
|
"loss": 0.5887, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3206059348412534, |
|
"grad_norm": 2.705668712762943, |
|
"learning_rate": 8.589534896675782e-06, |
|
"loss": 0.6155, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.3216434945009338, |
|
"grad_norm": 2.4700710695591144, |
|
"learning_rate": 8.576904825025763e-06, |
|
"loss": 0.5805, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3226810541606142, |
|
"grad_norm": 2.569997778071618, |
|
"learning_rate": 8.56422783231523e-06, |
|
"loss": 0.5861, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.32371861382029465, |
|
"grad_norm": 2.6800463634463454, |
|
"learning_rate": 8.551504084838217e-06, |
|
"loss": 0.5888, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.3247561734799751, |
|
"grad_norm": 2.614913869620652, |
|
"learning_rate": 8.538733749502084e-06, |
|
"loss": 0.5916, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.32579373313965554, |
|
"grad_norm": 2.5392913094762015, |
|
"learning_rate": 8.525916993825312e-06, |
|
"loss": 0.5845, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.326831292799336, |
|
"grad_norm": 2.5970061681752714, |
|
"learning_rate": 8.51305398593532e-06, |
|
"loss": 0.5885, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.32786885245901637, |
|
"grad_norm": 2.892436571898512, |
|
"learning_rate": 8.50014489456625e-06, |
|
"loss": 0.5735, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3289064121186968, |
|
"grad_norm": 2.6138781972835874, |
|
"learning_rate": 8.487189889056758e-06, |
|
"loss": 0.5559, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.32994397177837725, |
|
"grad_norm": 2.623576409499823, |
|
"learning_rate": 8.474189139347795e-06, |
|
"loss": 0.5846, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3309815314380577, |
|
"grad_norm": 2.468823382349316, |
|
"learning_rate": 8.461142815980368e-06, |
|
"loss": 0.5986, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.33201909109773814, |
|
"grad_norm": 2.672237640145444, |
|
"learning_rate": 8.448051090093315e-06, |
|
"loss": 0.591, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3330566507574185, |
|
"grad_norm": 2.4728672068736937, |
|
"learning_rate": 8.434914133421053e-06, |
|
"loss": 0.5845, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.33409421041709897, |
|
"grad_norm": 2.5895072383457087, |
|
"learning_rate": 8.421732118291326e-06, |
|
"loss": 0.5782, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3351317700767794, |
|
"grad_norm": 2.5040782851199137, |
|
"learning_rate": 8.408505217622942e-06, |
|
"loss": 0.5815, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.33616932973645985, |
|
"grad_norm": 3.6988626884139846, |
|
"learning_rate": 8.395233604923515e-06, |
|
"loss": 0.5843, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.3372068893961403, |
|
"grad_norm": 2.5828684207028547, |
|
"learning_rate": 8.381917454287175e-06, |
|
"loss": 0.5793, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.3382444490558207, |
|
"grad_norm": 2.638686184144453, |
|
"learning_rate": 8.368556940392295e-06, |
|
"loss": 0.5841, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3392820087155011, |
|
"grad_norm": 2.650538574655116, |
|
"learning_rate": 8.355152238499192e-06, |
|
"loss": 0.5875, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.34031956837518157, |
|
"grad_norm": 2.486723068143089, |
|
"learning_rate": 8.341703524447834e-06, |
|
"loss": 0.5752, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.341357128034862, |
|
"grad_norm": 2.542548425745693, |
|
"learning_rate": 8.328210974655534e-06, |
|
"loss": 0.582, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.34239468769454245, |
|
"grad_norm": 2.5238474016255346, |
|
"learning_rate": 8.314674766114625e-06, |
|
"loss": 0.5886, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.34343224735422284, |
|
"grad_norm": 2.6663652283930164, |
|
"learning_rate": 8.301095076390151e-06, |
|
"loss": 0.5703, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.3444698070139033, |
|
"grad_norm": 2.5916048021671765, |
|
"learning_rate": 8.287472083617534e-06, |
|
"loss": 0.5578, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.3455073666735837, |
|
"grad_norm": 2.64819187278271, |
|
"learning_rate": 8.273805966500233e-06, |
|
"loss": 0.566, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.34654492633326417, |
|
"grad_norm": 2.7355426760241115, |
|
"learning_rate": 8.260096904307404e-06, |
|
"loss": 0.5724, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.3475824859929446, |
|
"grad_norm": 2.5288788248400422, |
|
"learning_rate": 8.246345076871548e-06, |
|
"loss": 0.5852, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.348620045652625, |
|
"grad_norm": 2.4333668936065727, |
|
"learning_rate": 8.232550664586145e-06, |
|
"loss": 0.562, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.34965760531230544, |
|
"grad_norm": 2.666095931772747, |
|
"learning_rate": 8.218713848403306e-06, |
|
"loss": 0.5761, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.3506951649719859, |
|
"grad_norm": 2.585591858891525, |
|
"learning_rate": 8.204834809831377e-06, |
|
"loss": 0.579, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.3517327246316663, |
|
"grad_norm": 2.5419925763820457, |
|
"learning_rate": 8.190913730932567e-06, |
|
"loss": 0.5792, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.35277028429134677, |
|
"grad_norm": 2.5691748657197575, |
|
"learning_rate": 8.176950794320572e-06, |
|
"loss": 0.5647, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.35380784395102716, |
|
"grad_norm": 2.5977820134449603, |
|
"learning_rate": 8.16294618315816e-06, |
|
"loss": 0.5708, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.3548454036107076, |
|
"grad_norm": 2.423522377595987, |
|
"learning_rate": 8.148900081154773e-06, |
|
"loss": 0.5666, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.35588296327038804, |
|
"grad_norm": 2.569485264156882, |
|
"learning_rate": 8.134812672564131e-06, |
|
"loss": 0.5504, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.3569205229300685, |
|
"grad_norm": 2.758198111046606, |
|
"learning_rate": 8.1206841421818e-06, |
|
"loss": 0.5691, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.3579580825897489, |
|
"grad_norm": 2.5786351112814545, |
|
"learning_rate": 8.10651467534277e-06, |
|
"loss": 0.57, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.35899564224942937, |
|
"grad_norm": 2.4962078269510015, |
|
"learning_rate": 8.092304457919028e-06, |
|
"loss": 0.557, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.36003320190910976, |
|
"grad_norm": 2.5552381714765384, |
|
"learning_rate": 8.078053676317124e-06, |
|
"loss": 0.5673, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.3610707615687902, |
|
"grad_norm": 2.611892754049072, |
|
"learning_rate": 8.06376251747571e-06, |
|
"loss": 0.5535, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.36210832122847064, |
|
"grad_norm": 2.4981862280677545, |
|
"learning_rate": 8.049431168863107e-06, |
|
"loss": 0.5543, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.3631458808881511, |
|
"grad_norm": 2.590859008799774, |
|
"learning_rate": 8.035059818474833e-06, |
|
"loss": 0.5688, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.3641834405478315, |
|
"grad_norm": 2.7049066460444657, |
|
"learning_rate": 8.02064865483114e-06, |
|
"loss": 0.5666, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.3652210002075119, |
|
"grad_norm": 2.50555658991844, |
|
"learning_rate": 8.00619786697454e-06, |
|
"loss": 0.553, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.36625855986719236, |
|
"grad_norm": 2.4491820476275805, |
|
"learning_rate": 7.991707644467335e-06, |
|
"loss": 0.5635, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.3672961195268728, |
|
"grad_norm": 2.560789498381096, |
|
"learning_rate": 7.97717817738911e-06, |
|
"loss": 0.5408, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.36833367918655324, |
|
"grad_norm": 2.6052621436325416, |
|
"learning_rate": 7.962609656334262e-06, |
|
"loss": 0.5488, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.3693712388462337, |
|
"grad_norm": 2.5278436185793582, |
|
"learning_rate": 7.94800227240948e-06, |
|
"loss": 0.5573, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.37040879850591407, |
|
"grad_norm": 2.5168973470980607, |
|
"learning_rate": 7.933356217231261e-06, |
|
"loss": 0.5358, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.3714463581655945, |
|
"grad_norm": 2.5427476959285475, |
|
"learning_rate": 7.918671682923371e-06, |
|
"loss": 0.557, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.37248391782527496, |
|
"grad_norm": 2.5597051499240315, |
|
"learning_rate": 7.90394886211434e-06, |
|
"loss": 0.5443, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.3735214774849554, |
|
"grad_norm": 2.6134854223700033, |
|
"learning_rate": 7.889187947934939e-06, |
|
"loss": 0.5643, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.37455903714463584, |
|
"grad_norm": 2.5638490649548507, |
|
"learning_rate": 7.874389134015627e-06, |
|
"loss": 0.5515, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.37559659680431623, |
|
"grad_norm": 2.638245607620569, |
|
"learning_rate": 7.859552614484035e-06, |
|
"loss": 0.5512, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.37663415646399667, |
|
"grad_norm": 2.5439284853350683, |
|
"learning_rate": 7.844678583962403e-06, |
|
"loss": 0.5357, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.3776717161236771, |
|
"grad_norm": 2.7382120697501264, |
|
"learning_rate": 7.829767237565027e-06, |
|
"loss": 0.5499, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.37870927578335756, |
|
"grad_norm": 2.693790343217592, |
|
"learning_rate": 7.814818770895718e-06, |
|
"loss": 0.5447, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.379746835443038, |
|
"grad_norm": 2.5002163516357685, |
|
"learning_rate": 7.79983338004521e-06, |
|
"loss": 0.5548, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.3807843951027184, |
|
"grad_norm": 2.5140469437451514, |
|
"learning_rate": 7.784811261588605e-06, |
|
"loss": 0.5396, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.38182195476239883, |
|
"grad_norm": 2.3632709986503384, |
|
"learning_rate": 7.769752612582793e-06, |
|
"loss": 0.5455, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.3828595144220793, |
|
"grad_norm": 2.6174115493874415, |
|
"learning_rate": 7.754657630563855e-06, |
|
"loss": 0.5501, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.3838970740817597, |
|
"grad_norm": 2.662869705594198, |
|
"learning_rate": 7.739526513544492e-06, |
|
"loss": 0.5458, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.38493463374144016, |
|
"grad_norm": 2.595342018363226, |
|
"learning_rate": 7.724359460011406e-06, |
|
"loss": 0.5484, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.38597219340112054, |
|
"grad_norm": 2.574988659923898, |
|
"learning_rate": 7.709156668922715e-06, |
|
"loss": 0.5465, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.387009753060801, |
|
"grad_norm": 2.696253501363756, |
|
"learning_rate": 7.693918339705327e-06, |
|
"loss": 0.5416, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.38804731272048143, |
|
"grad_norm": 2.571358317228633, |
|
"learning_rate": 7.678644672252334e-06, |
|
"loss": 0.5432, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.3890848723801619, |
|
"grad_norm": 2.6773383342412007, |
|
"learning_rate": 7.663335866920389e-06, |
|
"loss": 0.5435, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.3901224320398423, |
|
"grad_norm": 2.605715400204335, |
|
"learning_rate": 7.647992124527076e-06, |
|
"loss": 0.5394, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.3911599916995227, |
|
"grad_norm": 2.542289996117672, |
|
"learning_rate": 7.632613646348273e-06, |
|
"loss": 0.5365, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.39219755135920314, |
|
"grad_norm": 2.640322094229756, |
|
"learning_rate": 7.617200634115516e-06, |
|
"loss": 0.5473, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.3932351110188836, |
|
"grad_norm": 2.5708221170868355, |
|
"learning_rate": 7.601753290013353e-06, |
|
"loss": 0.5209, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.39427267067856403, |
|
"grad_norm": 2.525915351063816, |
|
"learning_rate": 7.586271816676687e-06, |
|
"loss": 0.5288, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.3953102303382445, |
|
"grad_norm": 2.5364307727837234, |
|
"learning_rate": 7.570756417188123e-06, |
|
"loss": 0.5429, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.39634778999792486, |
|
"grad_norm": 2.559190451038623, |
|
"learning_rate": 7.555207295075303e-06, |
|
"loss": 0.5128, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.3973853496576053, |
|
"grad_norm": 2.7079589585625095, |
|
"learning_rate": 7.539624654308231e-06, |
|
"loss": 0.5333, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.39842290931728574, |
|
"grad_norm": 2.4387259072975267, |
|
"learning_rate": 7.5240086992966045e-06, |
|
"loss": 0.5334, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3994604689769662, |
|
"grad_norm": 2.5588774252471818, |
|
"learning_rate": 7.508359634887128e-06, |
|
"loss": 0.5429, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.40049802863664663, |
|
"grad_norm": 2.50003757681576, |
|
"learning_rate": 7.4926776663608305e-06, |
|
"loss": 0.5353, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.401535588296327, |
|
"grad_norm": 2.471663404094866, |
|
"learning_rate": 7.476962999430368e-06, |
|
"loss": 0.5373, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.40257314795600746, |
|
"grad_norm": 2.4541757324292215, |
|
"learning_rate": 7.461215840237329e-06, |
|
"loss": 0.5278, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4036107076156879, |
|
"grad_norm": 2.5133909898638738, |
|
"learning_rate": 7.4454363953495255e-06, |
|
"loss": 0.5224, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.40464826727536835, |
|
"grad_norm": 2.456759039414412, |
|
"learning_rate": 7.429624871758289e-06, |
|
"loss": 0.5274, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.4056858269350488, |
|
"grad_norm": 2.6254399143011025, |
|
"learning_rate": 7.41378147687575e-06, |
|
"loss": 0.539, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.4067233865947292, |
|
"grad_norm": 2.77782256038311, |
|
"learning_rate": 7.397906418532124e-06, |
|
"loss": 0.5255, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.4077609462544096, |
|
"grad_norm": 2.401784319404263, |
|
"learning_rate": 7.381999904972974e-06, |
|
"loss": 0.5373, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.40879850591409006, |
|
"grad_norm": 2.4340739184784996, |
|
"learning_rate": 7.366062144856494e-06, |
|
"loss": 0.5292, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4098360655737705, |
|
"grad_norm": 2.5139056739660104, |
|
"learning_rate": 7.350093347250754e-06, |
|
"loss": 0.524, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.41087362523345095, |
|
"grad_norm": 2.513276255127495, |
|
"learning_rate": 7.334093721630976e-06, |
|
"loss": 0.5231, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.41191118489313133, |
|
"grad_norm": 2.6310824706160636, |
|
"learning_rate": 7.318063477876775e-06, |
|
"loss": 0.5233, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.4129487445528118, |
|
"grad_norm": 2.491475901603011, |
|
"learning_rate": 7.302002826269401e-06, |
|
"loss": 0.5341, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4139863042124922, |
|
"grad_norm": 2.506660610799657, |
|
"learning_rate": 7.285911977488995e-06, |
|
"loss": 0.5182, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.41502386387217266, |
|
"grad_norm": 2.4794467040525507, |
|
"learning_rate": 7.269791142611819e-06, |
|
"loss": 0.5305, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4160614235318531, |
|
"grad_norm": 2.4409844087069197, |
|
"learning_rate": 7.253640533107482e-06, |
|
"loss": 0.5097, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.4170989831915335, |
|
"grad_norm": 2.5831589194501334, |
|
"learning_rate": 7.23746036083617e-06, |
|
"loss": 0.509, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.41813654285121393, |
|
"grad_norm": 2.5848519997923134, |
|
"learning_rate": 7.221250838045866e-06, |
|
"loss": 0.5212, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.4191741025108944, |
|
"grad_norm": 2.4649428551198507, |
|
"learning_rate": 7.205012177369573e-06, |
|
"loss": 0.5097, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4202116621705748, |
|
"grad_norm": 2.6470337729349334, |
|
"learning_rate": 7.188744591822514e-06, |
|
"loss": 0.5265, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.42124922183025526, |
|
"grad_norm": 2.6777002487915427, |
|
"learning_rate": 7.17244829479934e-06, |
|
"loss": 0.5132, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.42228678148993565, |
|
"grad_norm": 2.628723636023145, |
|
"learning_rate": 7.156123500071337e-06, |
|
"loss": 0.5383, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.4233243411496161, |
|
"grad_norm": 2.6077956409975322, |
|
"learning_rate": 7.139770421783616e-06, |
|
"loss": 0.5143, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.42436190080929653, |
|
"grad_norm": 2.5195148593267165, |
|
"learning_rate": 7.1233892744523055e-06, |
|
"loss": 0.5292, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.425399460468977, |
|
"grad_norm": 2.4869546800490276, |
|
"learning_rate": 7.1069802729617385e-06, |
|
"loss": 0.5219, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4264370201286574, |
|
"grad_norm": 2.498457038824186, |
|
"learning_rate": 7.090543632561632e-06, |
|
"loss": 0.5227, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.4274745797883378, |
|
"grad_norm": 2.5334280776647855, |
|
"learning_rate": 7.0740795688642635e-06, |
|
"loss": 0.5174, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.42851213944801825, |
|
"grad_norm": 2.4142684408837667, |
|
"learning_rate": 7.057588297841645e-06, |
|
"loss": 0.5154, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.4295496991076987, |
|
"grad_norm": 2.7421341811351967, |
|
"learning_rate": 7.041070035822687e-06, |
|
"loss": 0.4983, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.43058725876737913, |
|
"grad_norm": 2.385233500154077, |
|
"learning_rate": 7.024524999490364e-06, |
|
"loss": 0.535, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.4316248184270596, |
|
"grad_norm": 2.493022029900477, |
|
"learning_rate": 7.007953405878867e-06, |
|
"loss": 0.5036, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.43266237808673996, |
|
"grad_norm": 2.5007769964543507, |
|
"learning_rate": 6.991355472370762e-06, |
|
"loss": 0.5288, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.4336999377464204, |
|
"grad_norm": 2.4139386188608842, |
|
"learning_rate": 6.974731416694135e-06, |
|
"loss": 0.5142, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.43473749740610085, |
|
"grad_norm": 2.5053762871669303, |
|
"learning_rate": 6.958081456919737e-06, |
|
"loss": 0.502, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.4357750570657813, |
|
"grad_norm": 2.5446406360038267, |
|
"learning_rate": 6.941405811458126e-06, |
|
"loss": 0.5079, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.43681261672546173, |
|
"grad_norm": 2.5024786886158465, |
|
"learning_rate": 6.924704699056792e-06, |
|
"loss": 0.5102, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.4378501763851421, |
|
"grad_norm": 2.5991293471527754, |
|
"learning_rate": 6.907978338797304e-06, |
|
"loss": 0.5033, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.43888773604482256, |
|
"grad_norm": 2.45095417799665, |
|
"learning_rate": 6.891226950092422e-06, |
|
"loss": 0.5033, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.439925295704503, |
|
"grad_norm": 2.5068078607240762, |
|
"learning_rate": 6.874450752683223e-06, |
|
"loss": 0.5131, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.44096285536418345, |
|
"grad_norm": 2.615502411671901, |
|
"learning_rate": 6.85764996663622e-06, |
|
"loss": 0.514, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.4420004150238639, |
|
"grad_norm": 2.483179103106064, |
|
"learning_rate": 6.840824812340476e-06, |
|
"loss": 0.482, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.4430379746835443, |
|
"grad_norm": 2.615056810287462, |
|
"learning_rate": 6.82397551050471e-06, |
|
"loss": 0.4981, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.4440755343432247, |
|
"grad_norm": 2.5081191557582962, |
|
"learning_rate": 6.807102282154406e-06, |
|
"loss": 0.5038, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.44511309400290516, |
|
"grad_norm": 2.4228335632525857, |
|
"learning_rate": 6.790205348628902e-06, |
|
"loss": 0.5116, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.4461506536625856, |
|
"grad_norm": 2.4865567217795834, |
|
"learning_rate": 6.773284931578508e-06, |
|
"loss": 0.4923, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.44718821332226605, |
|
"grad_norm": 2.5028391081664707, |
|
"learning_rate": 6.756341252961575e-06, |
|
"loss": 0.507, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.44822577298194644, |
|
"grad_norm": 2.4437292124023755, |
|
"learning_rate": 6.739374535041601e-06, |
|
"loss": 0.5041, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.4492633326416269, |
|
"grad_norm": 2.4804781219836705, |
|
"learning_rate": 6.722385000384305e-06, |
|
"loss": 0.5071, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.4503008923013073, |
|
"grad_norm": 2.613493029619529, |
|
"learning_rate": 6.705372871854713e-06, |
|
"loss": 0.5045, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.45133845196098776, |
|
"grad_norm": 2.5550452506734613, |
|
"learning_rate": 6.688338372614232e-06, |
|
"loss": 0.4954, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.4523760116206682, |
|
"grad_norm": 2.5823699134656795, |
|
"learning_rate": 6.671281726117721e-06, |
|
"loss": 0.5029, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.4534135712803486, |
|
"grad_norm": 2.6550826128938287, |
|
"learning_rate": 6.654203156110565e-06, |
|
"loss": 0.4942, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.45445113094002904, |
|
"grad_norm": 2.4268474548849976, |
|
"learning_rate": 6.6371028866257355e-06, |
|
"loss": 0.5027, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.4554886905997095, |
|
"grad_norm": 2.437142297358699, |
|
"learning_rate": 6.6199811419808525e-06, |
|
"loss": 0.4949, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.4565262502593899, |
|
"grad_norm": 2.494477319925722, |
|
"learning_rate": 6.602838146775243e-06, |
|
"loss": 0.4796, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.45756380991907036, |
|
"grad_norm": 2.5244330207655223, |
|
"learning_rate": 6.585674125886996e-06, |
|
"loss": 0.5066, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.45860136957875075, |
|
"grad_norm": 2.6127909654783608, |
|
"learning_rate": 6.568489304470007e-06, |
|
"loss": 0.4909, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.4596389292384312, |
|
"grad_norm": 2.5733537948603105, |
|
"learning_rate": 6.551283907951031e-06, |
|
"loss": 0.4886, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.46067648889811164, |
|
"grad_norm": 2.627653050074194, |
|
"learning_rate": 6.534058162026724e-06, |
|
"loss": 0.4871, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.4617140485577921, |
|
"grad_norm": 2.493707567539853, |
|
"learning_rate": 6.516812292660675e-06, |
|
"loss": 0.5115, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.4627516082174725, |
|
"grad_norm": 2.576298574074138, |
|
"learning_rate": 6.499546526080457e-06, |
|
"loss": 0.4935, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.46378916787715296, |
|
"grad_norm": 2.3441840780878227, |
|
"learning_rate": 6.482261088774642e-06, |
|
"loss": 0.4918, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.46482672753683335, |
|
"grad_norm": 2.5855232225238702, |
|
"learning_rate": 6.464956207489843e-06, |
|
"loss": 0.5009, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.4658642871965138, |
|
"grad_norm": 2.578617818062702, |
|
"learning_rate": 6.447632109227735e-06, |
|
"loss": 0.4931, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.46690184685619424, |
|
"grad_norm": 2.6300654942955752, |
|
"learning_rate": 6.4302890212420735e-06, |
|
"loss": 0.4924, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.4679394065158747, |
|
"grad_norm": 2.535272976711584, |
|
"learning_rate": 6.412927171035721e-06, |
|
"loss": 0.4864, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.4689769661755551, |
|
"grad_norm": 2.497392960102334, |
|
"learning_rate": 6.3955467863576555e-06, |
|
"loss": 0.502, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.4700145258352355, |
|
"grad_norm": 2.476885100955309, |
|
"learning_rate": 6.37814809519999e-06, |
|
"loss": 0.498, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.47105208549491595, |
|
"grad_norm": 2.3650744048382726, |
|
"learning_rate": 6.360731325794975e-06, |
|
"loss": 0.486, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.4720896451545964, |
|
"grad_norm": 2.469078585494707, |
|
"learning_rate": 6.343296706612008e-06, |
|
"loss": 0.4745, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.47312720481427684, |
|
"grad_norm": 2.5527304594928326, |
|
"learning_rate": 6.325844466354637e-06, |
|
"loss": 0.4959, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.4741647644739573, |
|
"grad_norm": 2.388876155874213, |
|
"learning_rate": 6.308374833957556e-06, |
|
"loss": 0.4787, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.47520232413363767, |
|
"grad_norm": 2.642090924554838, |
|
"learning_rate": 6.290888038583611e-06, |
|
"loss": 0.4951, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.4762398837933181, |
|
"grad_norm": 2.5493215911731557, |
|
"learning_rate": 6.273384309620785e-06, |
|
"loss": 0.4799, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.47727744345299855, |
|
"grad_norm": 2.3751142920110944, |
|
"learning_rate": 6.25586387667919e-06, |
|
"loss": 0.4841, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.478315003112679, |
|
"grad_norm": 2.4695590079808625, |
|
"learning_rate": 6.238326969588062e-06, |
|
"loss": 0.4739, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.47935256277235944, |
|
"grad_norm": 2.6302561474616937, |
|
"learning_rate": 6.220773818392738e-06, |
|
"loss": 0.4809, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.4803901224320398, |
|
"grad_norm": 2.5254595904398744, |
|
"learning_rate": 6.203204653351642e-06, |
|
"loss": 0.4964, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.48142768209172027, |
|
"grad_norm": 2.4902195244274288, |
|
"learning_rate": 6.185619704933267e-06, |
|
"loss": 0.4654, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.4824652417514007, |
|
"grad_norm": 2.5224684668035007, |
|
"learning_rate": 6.168019203813143e-06, |
|
"loss": 0.479, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.48350280141108115, |
|
"grad_norm": 2.531197814844583, |
|
"learning_rate": 6.15040338087082e-06, |
|
"loss": 0.4756, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.4845403610707616, |
|
"grad_norm": 2.3546555565826135, |
|
"learning_rate": 6.132772467186841e-06, |
|
"loss": 0.4649, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.485577920730442, |
|
"grad_norm": 2.3784120564924622, |
|
"learning_rate": 6.115126694039699e-06, |
|
"loss": 0.4709, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.4866154803901224, |
|
"grad_norm": 2.56815940386139, |
|
"learning_rate": 6.097466292902815e-06, |
|
"loss": 0.486, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.48765304004980287, |
|
"grad_norm": 2.5301845486075103, |
|
"learning_rate": 6.079791495441491e-06, |
|
"loss": 0.4754, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.4886905997094833, |
|
"grad_norm": 2.4650710165826806, |
|
"learning_rate": 6.062102533509886e-06, |
|
"loss": 0.4663, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.48972815936916375, |
|
"grad_norm": 2.7587452612942145, |
|
"learning_rate": 6.044399639147957e-06, |
|
"loss": 0.4632, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.49076571902884414, |
|
"grad_norm": 2.543480240365517, |
|
"learning_rate": 6.026683044578427e-06, |
|
"loss": 0.4689, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.4918032786885246, |
|
"grad_norm": 2.4525809117166366, |
|
"learning_rate": 6.008952982203737e-06, |
|
"loss": 0.4843, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.492840838348205, |
|
"grad_norm": 2.417071327082838, |
|
"learning_rate": 5.991209684602991e-06, |
|
"loss": 0.4677, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.49387839800788547, |
|
"grad_norm": 2.8159423032357394, |
|
"learning_rate": 5.9734533845289144e-06, |
|
"loss": 0.466, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.4949159576675659, |
|
"grad_norm": 2.5331145358141023, |
|
"learning_rate": 5.955684314904795e-06, |
|
"loss": 0.491, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.4959535173272463, |
|
"grad_norm": 2.5850294725860103, |
|
"learning_rate": 5.937902708821427e-06, |
|
"loss": 0.4727, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.49699107698692674, |
|
"grad_norm": 2.3455874492137627, |
|
"learning_rate": 5.920108799534059e-06, |
|
"loss": 0.4699, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.4980286366466072, |
|
"grad_norm": 2.496007013879939, |
|
"learning_rate": 5.902302820459324e-06, |
|
"loss": 0.4599, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.4990661963062876, |
|
"grad_norm": 2.412699996479322, |
|
"learning_rate": 5.884485005172189e-06, |
|
"loss": 0.474, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.500103755965968, |
|
"grad_norm": 2.6197335120279805, |
|
"learning_rate": 5.866655587402886e-06, |
|
"loss": 0.4815, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5011413156256485, |
|
"grad_norm": 2.632568563481647, |
|
"learning_rate": 5.8488148010338445e-06, |
|
"loss": 0.474, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.5021788752853289, |
|
"grad_norm": 2.383359491818944, |
|
"learning_rate": 5.8309628800966225e-06, |
|
"loss": 0.464, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5032164349450093, |
|
"grad_norm": 2.687191301722998, |
|
"learning_rate": 5.813100058768841e-06, |
|
"loss": 0.4671, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.5042539946046898, |
|
"grad_norm": 2.455559246352499, |
|
"learning_rate": 5.795226571371114e-06, |
|
"loss": 0.4682, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5052915542643702, |
|
"grad_norm": 2.589166158482144, |
|
"learning_rate": 5.777342652363963e-06, |
|
"loss": 0.4756, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.5063291139240507, |
|
"grad_norm": 2.5173329990266295, |
|
"learning_rate": 5.759448536344753e-06, |
|
"loss": 0.4849, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5073666735837311, |
|
"grad_norm": 2.3813334648732227, |
|
"learning_rate": 5.741544458044611e-06, |
|
"loss": 0.4725, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.5084042332434114, |
|
"grad_norm": 2.4645035637647683, |
|
"learning_rate": 5.723630652325349e-06, |
|
"loss": 0.4523, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5094417929030919, |
|
"grad_norm": 2.5330130007551848, |
|
"learning_rate": 5.705707354176377e-06, |
|
"loss": 0.4655, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.5104793525627723, |
|
"grad_norm": 2.3938015606881886, |
|
"learning_rate": 5.687774798711627e-06, |
|
"loss": 0.468, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5115169122224528, |
|
"grad_norm": 2.647072227845224, |
|
"learning_rate": 5.669833221166469e-06, |
|
"loss": 0.4695, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.5125544718821332, |
|
"grad_norm": 2.4713289117715544, |
|
"learning_rate": 5.651882856894615e-06, |
|
"loss": 0.4617, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5135920315418137, |
|
"grad_norm": 2.494015444510411, |
|
"learning_rate": 5.633923941365049e-06, |
|
"loss": 0.4659, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.5146295912014941, |
|
"grad_norm": 2.3455915787636807, |
|
"learning_rate": 5.615956710158921e-06, |
|
"loss": 0.4563, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5156671508611745, |
|
"grad_norm": 2.453795440026443, |
|
"learning_rate": 5.597981398966468e-06, |
|
"loss": 0.4698, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.516704710520855, |
|
"grad_norm": 2.3862841672431143, |
|
"learning_rate": 5.579998243583919e-06, |
|
"loss": 0.4583, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5177422701805354, |
|
"grad_norm": 2.412824706136417, |
|
"learning_rate": 5.562007479910396e-06, |
|
"loss": 0.4714, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.5187798298402159, |
|
"grad_norm": 2.5121998422157543, |
|
"learning_rate": 5.544009343944834e-06, |
|
"loss": 0.4597, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5198173894998962, |
|
"grad_norm": 2.479588579717285, |
|
"learning_rate": 5.526004071782868e-06, |
|
"loss": 0.461, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.5208549491595766, |
|
"grad_norm": 2.4676776770935365, |
|
"learning_rate": 5.507991899613746e-06, |
|
"loss": 0.4632, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.5218925088192571, |
|
"grad_norm": 2.4315208171491838, |
|
"learning_rate": 5.489973063717233e-06, |
|
"loss": 0.4702, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.5229300684789375, |
|
"grad_norm": 2.430974288239865, |
|
"learning_rate": 5.471947800460502e-06, |
|
"loss": 0.4389, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.523967628138618, |
|
"grad_norm": 2.481418769699562, |
|
"learning_rate": 5.453916346295043e-06, |
|
"loss": 0.4516, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.5250051877982984, |
|
"grad_norm": 2.454530070786792, |
|
"learning_rate": 5.435878937753553e-06, |
|
"loss": 0.4461, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.5260427474579789, |
|
"grad_norm": 2.412430721171224, |
|
"learning_rate": 5.417835811446839e-06, |
|
"loss": 0.4516, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.5270803071176593, |
|
"grad_norm": 2.5095500970858566, |
|
"learning_rate": 5.3997872040607154e-06, |
|
"loss": 0.4647, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.5281178667773397, |
|
"grad_norm": 2.47241909883305, |
|
"learning_rate": 5.3817333523528895e-06, |
|
"loss": 0.4529, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.5291554264370202, |
|
"grad_norm": 2.5164134259250677, |
|
"learning_rate": 5.363674493149868e-06, |
|
"loss": 0.4584, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.5301929860967005, |
|
"grad_norm": 2.4698416937451073, |
|
"learning_rate": 5.345610863343843e-06, |
|
"loss": 0.4479, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.531230545756381, |
|
"grad_norm": 2.388109532933269, |
|
"learning_rate": 5.327542699889586e-06, |
|
"loss": 0.4527, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.5322681054160614, |
|
"grad_norm": 2.532256574198705, |
|
"learning_rate": 5.309470239801343e-06, |
|
"loss": 0.4541, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.5333056650757418, |
|
"grad_norm": 2.414906181196464, |
|
"learning_rate": 5.291393720149716e-06, |
|
"loss": 0.4415, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.5343432247354223, |
|
"grad_norm": 2.5860824474566146, |
|
"learning_rate": 5.273313378058566e-06, |
|
"loss": 0.4377, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.5353807843951027, |
|
"grad_norm": 2.6418841514226967, |
|
"learning_rate": 5.255229450701893e-06, |
|
"loss": 0.4342, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.5364183440547832, |
|
"grad_norm": 2.4497315903217247, |
|
"learning_rate": 5.237142175300726e-06, |
|
"loss": 0.4533, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.5374559037144636, |
|
"grad_norm": 2.4935146868944265, |
|
"learning_rate": 5.219051789120015e-06, |
|
"loss": 0.44, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.538493463374144, |
|
"grad_norm": 2.4091632402760346, |
|
"learning_rate": 5.200958529465517e-06, |
|
"loss": 0.454, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.5395310230338245, |
|
"grad_norm": 2.460401303833881, |
|
"learning_rate": 5.182862633680683e-06, |
|
"loss": 0.4512, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5405685826935048, |
|
"grad_norm": 2.49980869010523, |
|
"learning_rate": 5.164764339143542e-06, |
|
"loss": 0.4531, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.5416061423531853, |
|
"grad_norm": 2.3750857632390767, |
|
"learning_rate": 5.14666388326359e-06, |
|
"loss": 0.4548, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.5426437020128657, |
|
"grad_norm": 2.402759567147983, |
|
"learning_rate": 5.128561503478676e-06, |
|
"loss": 0.4582, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.5436812616725462, |
|
"grad_norm": 2.387820424005501, |
|
"learning_rate": 5.110457437251886e-06, |
|
"loss": 0.4413, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.5447188213322266, |
|
"grad_norm": 2.3796099453754764, |
|
"learning_rate": 5.092351922068427e-06, |
|
"loss": 0.4524, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.545756380991907, |
|
"grad_norm": 2.3669548551534563, |
|
"learning_rate": 5.0742451954325156e-06, |
|
"loss": 0.4473, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.5467939406515875, |
|
"grad_norm": 2.463149244250711, |
|
"learning_rate": 5.056137494864259e-06, |
|
"loss": 0.447, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.5478315003112679, |
|
"grad_norm": 2.3744815980543494, |
|
"learning_rate": 5.0380290578965375e-06, |
|
"loss": 0.4404, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.5488690599709484, |
|
"grad_norm": 2.3391410347348796, |
|
"learning_rate": 5.019920122071896e-06, |
|
"loss": 0.4388, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 0.5499066196306288, |
|
"grad_norm": 2.3768157404751085, |
|
"learning_rate": 5.00181092493942e-06, |
|
"loss": 0.4386, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.5509441792903091, |
|
"grad_norm": 2.6059542471826824, |
|
"learning_rate": 4.983701704051625e-06, |
|
"loss": 0.4528, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 0.5519817389499896, |
|
"grad_norm": 2.417585242968673, |
|
"learning_rate": 4.965592696961335e-06, |
|
"loss": 0.4501, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.55301929860967, |
|
"grad_norm": 2.4947079086664123, |
|
"learning_rate": 4.947484141218572e-06, |
|
"loss": 0.4385, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 0.5540568582693505, |
|
"grad_norm": 2.5191730683436604, |
|
"learning_rate": 4.929376274367438e-06, |
|
"loss": 0.4324, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.5550944179290309, |
|
"grad_norm": 2.4257268049363336, |
|
"learning_rate": 4.911269333942994e-06, |
|
"loss": 0.4388, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.5561319775887114, |
|
"grad_norm": 2.5184876591899648, |
|
"learning_rate": 4.893163557468155e-06, |
|
"loss": 0.4316, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.5571695372483918, |
|
"grad_norm": 2.380468990224694, |
|
"learning_rate": 4.87505918245056e-06, |
|
"loss": 0.437, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 0.5582070969080722, |
|
"grad_norm": 2.3265663935377727, |
|
"learning_rate": 4.856956446379472e-06, |
|
"loss": 0.4307, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.5592446565677527, |
|
"grad_norm": 2.3400451782909646, |
|
"learning_rate": 4.838855586722647e-06, |
|
"loss": 0.4351, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 0.5602822162274331, |
|
"grad_norm": 2.4589440190512675, |
|
"learning_rate": 4.820756840923232e-06, |
|
"loss": 0.4447, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5613197758871135, |
|
"grad_norm": 2.4951528488343184, |
|
"learning_rate": 4.802660446396642e-06, |
|
"loss": 0.4445, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 0.5623573355467939, |
|
"grad_norm": 2.666275326085189, |
|
"learning_rate": 4.784566640527451e-06, |
|
"loss": 0.4406, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.5633948952064743, |
|
"grad_norm": 2.4165213736699522, |
|
"learning_rate": 4.766475660666271e-06, |
|
"loss": 0.4222, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 0.5644324548661548, |
|
"grad_norm": 2.345464539142852, |
|
"learning_rate": 4.748387744126649e-06, |
|
"loss": 0.4355, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.5654700145258352, |
|
"grad_norm": 2.5576836547836215, |
|
"learning_rate": 4.730303128181944e-06, |
|
"loss": 0.4289, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.5665075741855157, |
|
"grad_norm": 2.5283882321035858, |
|
"learning_rate": 4.712222050062219e-06, |
|
"loss": 0.4283, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.5675451338451961, |
|
"grad_norm": 2.556873104995108, |
|
"learning_rate": 4.694144746951131e-06, |
|
"loss": 0.434, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 0.5685826935048766, |
|
"grad_norm": 2.280037463289983, |
|
"learning_rate": 4.676071455982811e-06, |
|
"loss": 0.4294, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.569620253164557, |
|
"grad_norm": 2.3570715386535555, |
|
"learning_rate": 4.658002414238771e-06, |
|
"loss": 0.4357, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 0.5706578128242374, |
|
"grad_norm": 2.4558176026821217, |
|
"learning_rate": 4.63993785874477e-06, |
|
"loss": 0.4421, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.5716953724839178, |
|
"grad_norm": 2.4779834247504082, |
|
"learning_rate": 4.621878026467725e-06, |
|
"loss": 0.4336, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 0.5727329321435982, |
|
"grad_norm": 2.3737617720589483, |
|
"learning_rate": 4.603823154312593e-06, |
|
"loss": 0.4263, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.5737704918032787, |
|
"grad_norm": 2.4648165069495147, |
|
"learning_rate": 4.585773479119265e-06, |
|
"loss": 0.4487, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.5748080514629591, |
|
"grad_norm": 2.3105842246846064, |
|
"learning_rate": 4.567729237659459e-06, |
|
"loss": 0.4252, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.5758456111226395, |
|
"grad_norm": 2.560822029486094, |
|
"learning_rate": 4.549690666633615e-06, |
|
"loss": 0.4432, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.57688317078232, |
|
"grad_norm": 2.6130981185416, |
|
"learning_rate": 4.531658002667787e-06, |
|
"loss": 0.4402, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.5779207304420004, |
|
"grad_norm": 2.363414455159581, |
|
"learning_rate": 4.51363148231055e-06, |
|
"loss": 0.4351, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 0.5789582901016809, |
|
"grad_norm": 2.363075041688293, |
|
"learning_rate": 4.495611342029875e-06, |
|
"loss": 0.428, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.5799958497613613, |
|
"grad_norm": 2.4754540310783733, |
|
"learning_rate": 4.477597818210054e-06, |
|
"loss": 0.4246, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 0.5810334094210418, |
|
"grad_norm": 2.6816555150046755, |
|
"learning_rate": 4.459591147148575e-06, |
|
"loss": 0.4253, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5820709690807221, |
|
"grad_norm": 2.4036860880645894, |
|
"learning_rate": 4.441591565053041e-06, |
|
"loss": 0.4272, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 0.5831085287404025, |
|
"grad_norm": 2.4802074251251156, |
|
"learning_rate": 4.423599308038057e-06, |
|
"loss": 0.4209, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.584146088400083, |
|
"grad_norm": 2.415585886184946, |
|
"learning_rate": 4.405614612122145e-06, |
|
"loss": 0.4226, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 0.5851836480597634, |
|
"grad_norm": 2.4533356227902816, |
|
"learning_rate": 4.387637713224638e-06, |
|
"loss": 0.4343, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.5862212077194439, |
|
"grad_norm": 2.7240629036546284, |
|
"learning_rate": 4.36966884716259e-06, |
|
"loss": 0.4287, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.5872587673791243, |
|
"grad_norm": 2.3878324516316276, |
|
"learning_rate": 4.3517082496476845e-06, |
|
"loss": 0.4184, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.5882963270388047, |
|
"grad_norm": 2.422038027754476, |
|
"learning_rate": 4.333756156283136e-06, |
|
"loss": 0.4209, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 0.5893338866984852, |
|
"grad_norm": 2.5027178602080697, |
|
"learning_rate": 4.315812802560609e-06, |
|
"loss": 0.4151, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.5903714463581656, |
|
"grad_norm": 2.440654195509635, |
|
"learning_rate": 4.2978784238571145e-06, |
|
"loss": 0.424, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 0.5914090060178461, |
|
"grad_norm": 2.4534970838374206, |
|
"learning_rate": 4.279953255431944e-06, |
|
"loss": 0.427, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.5924465656775264, |
|
"grad_norm": 2.4846011931620087, |
|
"learning_rate": 4.262037532423556e-06, |
|
"loss": 0.4376, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 0.5934841253372068, |
|
"grad_norm": 2.4555820059315, |
|
"learning_rate": 4.244131489846519e-06, |
|
"loss": 0.4102, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.5945216849968873, |
|
"grad_norm": 2.443517931338813, |
|
"learning_rate": 4.2262353625884054e-06, |
|
"loss": 0.4138, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 0.5955592446565677, |
|
"grad_norm": 2.369110831081435, |
|
"learning_rate": 4.208349385406729e-06, |
|
"loss": 0.4364, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.5965968043162482, |
|
"grad_norm": 2.3567775981536423, |
|
"learning_rate": 4.190473792925851e-06, |
|
"loss": 0.4277, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.5976343639759286, |
|
"grad_norm": 2.3822530664354216, |
|
"learning_rate": 4.1726088196339106e-06, |
|
"loss": 0.4266, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.5986719236356091, |
|
"grad_norm": 2.3958405655171293, |
|
"learning_rate": 4.154754699879748e-06, |
|
"loss": 0.4177, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 0.5997094832952895, |
|
"grad_norm": 2.3762721908121573, |
|
"learning_rate": 4.136911667869827e-06, |
|
"loss": 0.4146, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.60074704295497, |
|
"grad_norm": 2.2932296484786137, |
|
"learning_rate": 4.119079957665163e-06, |
|
"loss": 0.4074, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 0.6017846026146504, |
|
"grad_norm": 2.446668454803684, |
|
"learning_rate": 4.101259803178265e-06, |
|
"loss": 0.4318, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6028221622743307, |
|
"grad_norm": 2.361867602889005, |
|
"learning_rate": 4.083451438170039e-06, |
|
"loss": 0.4098, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 0.6038597219340112, |
|
"grad_norm": 2.323202296910631, |
|
"learning_rate": 4.065655096246755e-06, |
|
"loss": 0.408, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.6048972815936916, |
|
"grad_norm": 2.3845839128387722, |
|
"learning_rate": 4.047871010856959e-06, |
|
"loss": 0.4071, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 0.605934841253372, |
|
"grad_norm": 2.317266250356725, |
|
"learning_rate": 4.03009941528842e-06, |
|
"loss": 0.4051, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.6069724009130525, |
|
"grad_norm": 2.4278074780829852, |
|
"learning_rate": 4.012340542665067e-06, |
|
"loss": 0.4002, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.6080099605727329, |
|
"grad_norm": 2.4948657955155853, |
|
"learning_rate": 3.994594625943936e-06, |
|
"loss": 0.4103, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.6090475202324134, |
|
"grad_norm": 2.4097506379402596, |
|
"learning_rate": 3.976861897912106e-06, |
|
"loss": 0.4137, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 0.6100850798920938, |
|
"grad_norm": 2.695103229335665, |
|
"learning_rate": 3.959142591183652e-06, |
|
"loss": 0.4184, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.6111226395517743, |
|
"grad_norm": 2.433881870125308, |
|
"learning_rate": 3.9414369381965904e-06, |
|
"loss": 0.4084, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.6121601992114547, |
|
"grad_norm": 2.424853521797893, |
|
"learning_rate": 3.92374517120983e-06, |
|
"loss": 0.4157, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.6131977588711351, |
|
"grad_norm": 2.3289318124697926, |
|
"learning_rate": 3.90606752230013e-06, |
|
"loss": 0.4002, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 0.6142353185308155, |
|
"grad_norm": 2.4465640629382186, |
|
"learning_rate": 3.888404223359045e-06, |
|
"loss": 0.4057, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.6152728781904959, |
|
"grad_norm": 2.572631649766761, |
|
"learning_rate": 3.870755506089899e-06, |
|
"loss": 0.4144, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 0.6163104378501764, |
|
"grad_norm": 2.4132420332744204, |
|
"learning_rate": 3.8531216020047246e-06, |
|
"loss": 0.4116, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.6173479975098568, |
|
"grad_norm": 2.3892278386798593, |
|
"learning_rate": 3.835502742421251e-06, |
|
"loss": 0.4093, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.6183855571695372, |
|
"grad_norm": 2.2641401615974406, |
|
"learning_rate": 3.8178991584598474e-06, |
|
"loss": 0.4131, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.6194231168292177, |
|
"grad_norm": 2.438762066594083, |
|
"learning_rate": 3.8003110810405065e-06, |
|
"loss": 0.4064, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 0.6204606764888981, |
|
"grad_norm": 2.4506479186056396, |
|
"learning_rate": 3.782738740879806e-06, |
|
"loss": 0.4052, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.6214982361485786, |
|
"grad_norm": 2.5283170692610617, |
|
"learning_rate": 3.7651823684878884e-06, |
|
"loss": 0.396, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 0.622535795808259, |
|
"grad_norm": 2.4749410345861453, |
|
"learning_rate": 3.7476421941654318e-06, |
|
"loss": 0.4193, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6235733554679395, |
|
"grad_norm": 2.317549282593909, |
|
"learning_rate": 3.7301184480006337e-06, |
|
"loss": 0.3973, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 0.6246109151276198, |
|
"grad_norm": 2.5252500112854275, |
|
"learning_rate": 3.712611359866188e-06, |
|
"loss": 0.4147, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.6256484747873002, |
|
"grad_norm": 2.4400394840947475, |
|
"learning_rate": 3.6951211594162784e-06, |
|
"loss": 0.4089, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 0.6266860344469807, |
|
"grad_norm": 2.349957863539589, |
|
"learning_rate": 3.677648076083549e-06, |
|
"loss": 0.3992, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.6277235941066611, |
|
"grad_norm": 2.3832033455743833, |
|
"learning_rate": 3.6601923390761156e-06, |
|
"loss": 0.4131, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.6287611537663416, |
|
"grad_norm": 2.576394180910392, |
|
"learning_rate": 3.6427541773745433e-06, |
|
"loss": 0.3968, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.629798713426022, |
|
"grad_norm": 2.4507894648738877, |
|
"learning_rate": 3.6253338197288505e-06, |
|
"loss": 0.4023, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 0.6308362730857024, |
|
"grad_norm": 2.3382065135829997, |
|
"learning_rate": 3.607931494655504e-06, |
|
"loss": 0.3918, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.6318738327453829, |
|
"grad_norm": 2.4244914619480373, |
|
"learning_rate": 3.5905474304344225e-06, |
|
"loss": 0.4117, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 0.6329113924050633, |
|
"grad_norm": 2.567189154890026, |
|
"learning_rate": 3.573181855105986e-06, |
|
"loss": 0.42, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.6339489520647438, |
|
"grad_norm": 2.278238684757535, |
|
"learning_rate": 3.555834996468039e-06, |
|
"loss": 0.4041, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 0.6349865117244241, |
|
"grad_norm": 2.504690078277028, |
|
"learning_rate": 3.538507082072905e-06, |
|
"loss": 0.3944, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.6360240713841046, |
|
"grad_norm": 2.508152236947373, |
|
"learning_rate": 3.5211983392243996e-06, |
|
"loss": 0.4, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 0.637061631043785, |
|
"grad_norm": 2.3910619945271683, |
|
"learning_rate": 3.503908994974856e-06, |
|
"loss": 0.4093, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.6380991907034654, |
|
"grad_norm": 2.3248985179294652, |
|
"learning_rate": 3.4866392761221303e-06, |
|
"loss": 0.4065, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.6391367503631459, |
|
"grad_norm": 2.361597466745494, |
|
"learning_rate": 3.4693894092066483e-06, |
|
"loss": 0.3907, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.6401743100228263, |
|
"grad_norm": 2.4949060134883743, |
|
"learning_rate": 3.452159620508414e-06, |
|
"loss": 0.4004, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 0.6412118696825068, |
|
"grad_norm": 2.3586999565378886, |
|
"learning_rate": 3.4349501360440556e-06, |
|
"loss": 0.3977, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.6422494293421872, |
|
"grad_norm": 2.2746540643844426, |
|
"learning_rate": 3.417761181563849e-06, |
|
"loss": 0.3949, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 0.6432869890018676, |
|
"grad_norm": 2.4940700061779064, |
|
"learning_rate": 3.4005929825487684e-06, |
|
"loss": 0.4011, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.6443245486615481, |
|
"grad_norm": 2.3950183894327934, |
|
"learning_rate": 3.383445764207516e-06, |
|
"loss": 0.408, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 0.6453621083212284, |
|
"grad_norm": 2.30382353091722, |
|
"learning_rate": 3.366319751473579e-06, |
|
"loss": 0.4022, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.6463996679809089, |
|
"grad_norm": 2.4321346720182766, |
|
"learning_rate": 3.3492151690022712e-06, |
|
"loss": 0.3986, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 0.6474372276405893, |
|
"grad_norm": 2.3061011632873285, |
|
"learning_rate": 3.332132241167793e-06, |
|
"loss": 0.3972, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.6484747873002698, |
|
"grad_norm": 2.4917204012266327, |
|
"learning_rate": 3.3150711920602765e-06, |
|
"loss": 0.4042, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.6495123469599502, |
|
"grad_norm": 2.190798555385459, |
|
"learning_rate": 3.2980322454828617e-06, |
|
"loss": 0.3917, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.6505499066196306, |
|
"grad_norm": 2.343964465011578, |
|
"learning_rate": 3.281015624948746e-06, |
|
"loss": 0.3893, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 0.6515874662793111, |
|
"grad_norm": 2.61426487111248, |
|
"learning_rate": 3.264021553678264e-06, |
|
"loss": 0.4087, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.6526250259389915, |
|
"grad_norm": 2.3381914892622553, |
|
"learning_rate": 3.247050254595947e-06, |
|
"loss": 0.3996, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 0.653662585598672, |
|
"grad_norm": 2.4097371109758945, |
|
"learning_rate": 3.2301019503276144e-06, |
|
"loss": 0.404, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.6547001452583524, |
|
"grad_norm": 2.3425157027203127, |
|
"learning_rate": 3.2131768631974375e-06, |
|
"loss": 0.4025, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 2.405066414410578, |
|
"learning_rate": 3.196275215225032e-06, |
|
"loss": 0.4095, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.6567752645777132, |
|
"grad_norm": 2.332707293053418, |
|
"learning_rate": 3.179397228122547e-06, |
|
"loss": 0.4, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 0.6578128242373936, |
|
"grad_norm": 2.374933561243685, |
|
"learning_rate": 3.162543123291749e-06, |
|
"loss": 0.3887, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.6588503838970741, |
|
"grad_norm": 2.265067002723173, |
|
"learning_rate": 3.1457131218211263e-06, |
|
"loss": 0.3974, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.6598879435567545, |
|
"grad_norm": 2.363694486377073, |
|
"learning_rate": 3.1289074444829783e-06, |
|
"loss": 0.3932, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.660925503216435, |
|
"grad_norm": 2.5691413705019603, |
|
"learning_rate": 3.1121263117305355e-06, |
|
"loss": 0.3848, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 0.6619630628761154, |
|
"grad_norm": 2.3700655398167005, |
|
"learning_rate": 3.0953699436950464e-06, |
|
"loss": 0.3942, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.6630006225357958, |
|
"grad_norm": 2.5079197198404932, |
|
"learning_rate": 3.0786385601829114e-06, |
|
"loss": 0.3921, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 0.6640381821954763, |
|
"grad_norm": 2.3572529532760136, |
|
"learning_rate": 3.061932380672783e-06, |
|
"loss": 0.389, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6650757418551567, |
|
"grad_norm": 2.3406368361183643, |
|
"learning_rate": 3.0452516243126955e-06, |
|
"loss": 0.3942, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 0.666113301514837, |
|
"grad_norm": 2.2910811346177953, |
|
"learning_rate": 3.0285965099171864e-06, |
|
"loss": 0.3954, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.6671508611745175, |
|
"grad_norm": 2.3711327251353254, |
|
"learning_rate": 3.0119672559644313e-06, |
|
"loss": 0.3825, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 0.6681884208341979, |
|
"grad_norm": 2.346432027672407, |
|
"learning_rate": 2.995364080593368e-06, |
|
"loss": 0.3862, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.6692259804938784, |
|
"grad_norm": 2.25488355968479, |
|
"learning_rate": 2.978787201600847e-06, |
|
"loss": 0.3869, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.6702635401535588, |
|
"grad_norm": 2.5145720297012204, |
|
"learning_rate": 2.9622368364387626e-06, |
|
"loss": 0.3979, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.6713010998132393, |
|
"grad_norm": 2.430288285642923, |
|
"learning_rate": 2.9457132022112156e-06, |
|
"loss": 0.3876, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 0.6723386594729197, |
|
"grad_norm": 2.52733248085438, |
|
"learning_rate": 2.9292165156716447e-06, |
|
"loss": 0.3918, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.6733762191326002, |
|
"grad_norm": 2.4117862477617464, |
|
"learning_rate": 2.9127469932200034e-06, |
|
"loss": 0.3904, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 0.6744137787922806, |
|
"grad_norm": 2.544838045591555, |
|
"learning_rate": 2.89630485089991e-06, |
|
"loss": 0.3968, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.675451338451961, |
|
"grad_norm": 2.3114731154281687, |
|
"learning_rate": 2.879890304395816e-06, |
|
"loss": 0.389, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.6764888981116414, |
|
"grad_norm": 2.214278091013657, |
|
"learning_rate": 2.8635035690301725e-06, |
|
"loss": 0.368, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.6775264577713218, |
|
"grad_norm": 2.4150879417081033, |
|
"learning_rate": 2.847144859760622e-06, |
|
"loss": 0.3997, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 0.6785640174310023, |
|
"grad_norm": 2.2503456214277766, |
|
"learning_rate": 2.8308143911771555e-06, |
|
"loss": 0.3785, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.6796015770906827, |
|
"grad_norm": 2.400256629243757, |
|
"learning_rate": 2.8145123774993075e-06, |
|
"loss": 0.3873, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.6806391367503631, |
|
"grad_norm": 2.4464548102871393, |
|
"learning_rate": 2.798239032573362e-06, |
|
"loss": 0.3811, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.6816766964100436, |
|
"grad_norm": 2.5094627463784973, |
|
"learning_rate": 2.7819945698695148e-06, |
|
"loss": 0.387, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 0.682714256069724, |
|
"grad_norm": 2.4588062818595944, |
|
"learning_rate": 2.765779202479103e-06, |
|
"loss": 0.3848, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.6837518157294045, |
|
"grad_norm": 2.2530973533871035, |
|
"learning_rate": 2.749593143111793e-06, |
|
"loss": 0.3641, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 0.6847893753890849, |
|
"grad_norm": 2.3169454419169684, |
|
"learning_rate": 2.733436604092797e-06, |
|
"loss": 0.378, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.6858269350487654, |
|
"grad_norm": 2.5056936406549917, |
|
"learning_rate": 2.7173097973600806e-06, |
|
"loss": 0.3837, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 0.6868644947084457, |
|
"grad_norm": 2.315516361770829, |
|
"learning_rate": 2.7012129344615933e-06, |
|
"loss": 0.3797, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.6879020543681261, |
|
"grad_norm": 2.3765574424439504, |
|
"learning_rate": 2.6851462265524862e-06, |
|
"loss": 0.3821, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 0.6889396140278066, |
|
"grad_norm": 2.632664268427571, |
|
"learning_rate": 2.6691098843923464e-06, |
|
"loss": 0.3869, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.689977173687487, |
|
"grad_norm": 2.4558633695260554, |
|
"learning_rate": 2.65310411834242e-06, |
|
"loss": 0.3778, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.6910147333471675, |
|
"grad_norm": 2.3802011004843053, |
|
"learning_rate": 2.637129138362877e-06, |
|
"loss": 0.3818, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.6920522930068479, |
|
"grad_norm": 2.3545532106676625, |
|
"learning_rate": 2.62118515401003e-06, |
|
"loss": 0.3745, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 0.6930898526665283, |
|
"grad_norm": 2.2370151266527913, |
|
"learning_rate": 2.6052723744336027e-06, |
|
"loss": 0.382, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.6941274123262088, |
|
"grad_norm": 2.3233925949072645, |
|
"learning_rate": 2.589391008373982e-06, |
|
"loss": 0.3901, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 0.6951649719858892, |
|
"grad_norm": 2.385447861484446, |
|
"learning_rate": 2.5735412641594804e-06, |
|
"loss": 0.3804, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.6962025316455697, |
|
"grad_norm": 2.3578389155683217, |
|
"learning_rate": 2.5577233497035943e-06, |
|
"loss": 0.3888, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 0.69724009130525, |
|
"grad_norm": 2.3006918024124965, |
|
"learning_rate": 2.541937472502293e-06, |
|
"loss": 0.3661, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.6982776509649304, |
|
"grad_norm": 2.303381728187516, |
|
"learning_rate": 2.526183839631283e-06, |
|
"loss": 0.3827, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 0.6993152106246109, |
|
"grad_norm": 2.324125134648238, |
|
"learning_rate": 2.5104626577433022e-06, |
|
"loss": 0.376, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.7003527702842913, |
|
"grad_norm": 2.4863826311733614, |
|
"learning_rate": 2.4947741330653942e-06, |
|
"loss": 0.3765, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.7013903299439718, |
|
"grad_norm": 2.309178058770916, |
|
"learning_rate": 2.4791184713962207e-06, |
|
"loss": 0.3665, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.7024278896036522, |
|
"grad_norm": 2.338910146993185, |
|
"learning_rate": 2.463495878103352e-06, |
|
"loss": 0.3601, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 0.7034654492633327, |
|
"grad_norm": 2.386442857917931, |
|
"learning_rate": 2.4479065581205673e-06, |
|
"loss": 0.3775, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.7045030089230131, |
|
"grad_norm": 2.4264131593130913, |
|
"learning_rate": 2.4323507159451887e-06, |
|
"loss": 0.3775, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 0.7055405685826935, |
|
"grad_norm": 35.69245904532589, |
|
"learning_rate": 2.416828555635368e-06, |
|
"loss": 0.3836, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.706578128242374, |
|
"grad_norm": 2.3981493771293385, |
|
"learning_rate": 2.4013402808074356e-06, |
|
"loss": 0.3648, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 0.7076156879020543, |
|
"grad_norm": 2.458681006711934, |
|
"learning_rate": 2.3858860946332148e-06, |
|
"loss": 0.3706, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.7086532475617348, |
|
"grad_norm": 2.5072358874605545, |
|
"learning_rate": 2.3704661998373652e-06, |
|
"loss": 0.3786, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 0.7096908072214152, |
|
"grad_norm": 2.4783372667192576, |
|
"learning_rate": 2.3550807986947133e-06, |
|
"loss": 0.3635, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.7107283668810956, |
|
"grad_norm": 2.583412985853997, |
|
"learning_rate": 2.3397300930276116e-06, |
|
"loss": 0.3574, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.7117659265407761, |
|
"grad_norm": 2.39052336222071, |
|
"learning_rate": 2.3244142842032823e-06, |
|
"loss": 0.3642, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.7128034862004565, |
|
"grad_norm": 2.2998480766526974, |
|
"learning_rate": 2.309133573131181e-06, |
|
"loss": 0.3659, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 0.713841045860137, |
|
"grad_norm": 2.452595068182353, |
|
"learning_rate": 2.2938881602603496e-06, |
|
"loss": 0.3767, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.7148786055198174, |
|
"grad_norm": 2.4427339177937597, |
|
"learning_rate": 2.2786782455768113e-06, |
|
"loss": 0.3701, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 0.7159161651794979, |
|
"grad_norm": 2.5344881209783647, |
|
"learning_rate": 2.2635040286009163e-06, |
|
"loss": 0.3636, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.7169537248391783, |
|
"grad_norm": 2.4517299777866914, |
|
"learning_rate": 2.2483657083847487e-06, |
|
"loss": 0.3638, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 0.7179912844988587, |
|
"grad_norm": 2.333164872282216, |
|
"learning_rate": 2.233263483509505e-06, |
|
"loss": 0.3652, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.7190288441585391, |
|
"grad_norm": 2.536346581261384, |
|
"learning_rate": 2.218197552082893e-06, |
|
"loss": 0.3843, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 0.7200664038182195, |
|
"grad_norm": 2.3170765412707444, |
|
"learning_rate": 2.203168111736524e-06, |
|
"loss": 0.3702, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.7211039634779, |
|
"grad_norm": 2.3720426965424926, |
|
"learning_rate": 2.1881753596233334e-06, |
|
"loss": 0.3661, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.7221415231375804, |
|
"grad_norm": 2.4784864232706427, |
|
"learning_rate": 2.173219492414988e-06, |
|
"loss": 0.3557, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.7231790827972608, |
|
"grad_norm": 2.2572170707401598, |
|
"learning_rate": 2.1583007062993037e-06, |
|
"loss": 0.3626, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 0.7242166424569413, |
|
"grad_norm": 2.3751613572262618, |
|
"learning_rate": 2.1434191969776787e-06, |
|
"loss": 0.372, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.7252542021166217, |
|
"grad_norm": 2.403916983756595, |
|
"learning_rate": 2.1285751596625153e-06, |
|
"loss": 0.3793, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 0.7262917617763022, |
|
"grad_norm": 2.422300937120251, |
|
"learning_rate": 2.1137687890746733e-06, |
|
"loss": 0.3757, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.7273293214359826, |
|
"grad_norm": 2.3278136492746904, |
|
"learning_rate": 2.099000279440905e-06, |
|
"loss": 0.3695, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 0.728366881095663, |
|
"grad_norm": 2.3521512237809774, |
|
"learning_rate": 2.0842698244913146e-06, |
|
"loss": 0.3577, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.7294044407553434, |
|
"grad_norm": 2.4420360123708837, |
|
"learning_rate": 2.0695776174568054e-06, |
|
"loss": 0.3617, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 0.7304420004150238, |
|
"grad_norm": 2.355326415228057, |
|
"learning_rate": 2.054923851066561e-06, |
|
"loss": 0.366, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.7314795600747043, |
|
"grad_norm": 2.408952585713157, |
|
"learning_rate": 2.0403087175455044e-06, |
|
"loss": 0.3592, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.7325171197343847, |
|
"grad_norm": 2.3814394489123556, |
|
"learning_rate": 2.025732408611786e-06, |
|
"loss": 0.3702, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.7335546793940652, |
|
"grad_norm": 2.505258380168417, |
|
"learning_rate": 2.0111951154742526e-06, |
|
"loss": 0.3541, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 0.7345922390537456, |
|
"grad_norm": 2.2378548590102962, |
|
"learning_rate": 1.9966970288299666e-06, |
|
"loss": 0.367, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.735629798713426, |
|
"grad_norm": 2.5282472044177657, |
|
"learning_rate": 1.982238338861673e-06, |
|
"loss": 0.372, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 0.7366673583731065, |
|
"grad_norm": 2.395346352470493, |
|
"learning_rate": 1.9678192352353282e-06, |
|
"loss": 0.3647, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.7377049180327869, |
|
"grad_norm": 2.377816912239941, |
|
"learning_rate": 1.9534399070976013e-06, |
|
"loss": 0.3694, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 0.7387424776924674, |
|
"grad_norm": 2.395069006251528, |
|
"learning_rate": 1.9391005430733973e-06, |
|
"loss": 0.3643, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.7397800373521477, |
|
"grad_norm": 2.360921498446891, |
|
"learning_rate": 1.924801331263375e-06, |
|
"loss": 0.3592, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.7408175970118281, |
|
"grad_norm": 2.4026862396255977, |
|
"learning_rate": 1.9105424592414905e-06, |
|
"loss": 0.3596, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.7418551566715086, |
|
"grad_norm": 2.3493974707324323, |
|
"learning_rate": 1.8963241140525302e-06, |
|
"loss": 0.352, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.742892716331189, |
|
"grad_norm": 2.255383769774033, |
|
"learning_rate": 1.8821464822096587e-06, |
|
"loss": 0.3545, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.7439302759908695, |
|
"grad_norm": 2.3439036527893804, |
|
"learning_rate": 1.8680097496919663e-06, |
|
"loss": 0.3614, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 0.7449678356505499, |
|
"grad_norm": 2.402995035755627, |
|
"learning_rate": 1.8539141019420459e-06, |
|
"loss": 0.3591, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.7460053953102304, |
|
"grad_norm": 2.3095366349656845, |
|
"learning_rate": 1.8398597238635375e-06, |
|
"loss": 0.3555, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 0.7470429549699108, |
|
"grad_norm": 2.454441768372595, |
|
"learning_rate": 1.825846799818722e-06, |
|
"loss": 0.3645, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.7480805146295912, |
|
"grad_norm": 2.3783282437356883, |
|
"learning_rate": 1.8118755136260941e-06, |
|
"loss": 0.364, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 0.7491180742892717, |
|
"grad_norm": 2.4804513717759975, |
|
"learning_rate": 1.7979460485579486e-06, |
|
"loss": 0.3577, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.750155633948952, |
|
"grad_norm": 2.399533389844388, |
|
"learning_rate": 1.784058587337984e-06, |
|
"loss": 0.3548, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 0.7511931936086325, |
|
"grad_norm": 2.5287966988574992, |
|
"learning_rate": 1.7702133121388999e-06, |
|
"loss": 0.3702, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.7522307532683129, |
|
"grad_norm": 2.5280174627826097, |
|
"learning_rate": 1.7564104045800101e-06, |
|
"loss": 0.3717, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.7532683129279933, |
|
"grad_norm": 2.454120509191578, |
|
"learning_rate": 1.7426500457248552e-06, |
|
"loss": 0.3508, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.7543058725876738, |
|
"grad_norm": 2.2917885535880176, |
|
"learning_rate": 1.7289324160788346e-06, |
|
"loss": 0.3509, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 0.7553434322473542, |
|
"grad_norm": 2.478759913910432, |
|
"learning_rate": 1.7152576955868338e-06, |
|
"loss": 0.3597, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.7563809919070347, |
|
"grad_norm": 2.3448679544687256, |
|
"learning_rate": 1.701626063630869e-06, |
|
"loss": 0.367, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 0.7574185515667151, |
|
"grad_norm": 2.3452502536112485, |
|
"learning_rate": 1.6880376990277202e-06, |
|
"loss": 0.3548, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.7584561112263956, |
|
"grad_norm": 2.3587859019469084, |
|
"learning_rate": 1.674492780026611e-06, |
|
"loss": 0.3635, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 0.759493670886076, |
|
"grad_norm": 2.4281427875718635, |
|
"learning_rate": 1.6609914843068403e-06, |
|
"loss": 0.3535, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.7605312305457563, |
|
"grad_norm": 2.644672013859547, |
|
"learning_rate": 1.6475339889754755e-06, |
|
"loss": 0.3546, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 0.7615687902054368, |
|
"grad_norm": 2.231863917742126, |
|
"learning_rate": 1.6341204705650155e-06, |
|
"loss": 0.3565, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.7626063498651172, |
|
"grad_norm": 2.3627064704364003, |
|
"learning_rate": 1.6207511050310842e-06, |
|
"loss": 0.3601, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.7636439095247977, |
|
"grad_norm": 2.5482612194562746, |
|
"learning_rate": 1.6074260677501102e-06, |
|
"loss": 0.3606, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.7646814691844781, |
|
"grad_norm": 2.4794463771443187, |
|
"learning_rate": 1.5941455335170408e-06, |
|
"loss": 0.3538, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 0.7657190288441585, |
|
"grad_norm": 2.6762628509028565, |
|
"learning_rate": 1.5809096765430387e-06, |
|
"loss": 0.3517, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.766756588503839, |
|
"grad_norm": 2.2429463729622565, |
|
"learning_rate": 1.5677186704532016e-06, |
|
"loss": 0.3457, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 0.7677941481635194, |
|
"grad_norm": 2.36484474879296, |
|
"learning_rate": 1.5545726882842782e-06, |
|
"loss": 0.3573, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.7688317078231999, |
|
"grad_norm": 2.294696024445774, |
|
"learning_rate": 1.5414719024824127e-06, |
|
"loss": 0.3509, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 0.7698692674828803, |
|
"grad_norm": 2.51233601030323, |
|
"learning_rate": 1.5284164849008648e-06, |
|
"loss": 0.3589, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.7709068271425606, |
|
"grad_norm": 2.473322215306534, |
|
"learning_rate": 1.515406606797763e-06, |
|
"loss": 0.3491, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 0.7719443868022411, |
|
"grad_norm": 2.2061318656296067, |
|
"learning_rate": 1.5024424388338682e-06, |
|
"loss": 0.3499, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.7729819464619215, |
|
"grad_norm": 2.435209027478781, |
|
"learning_rate": 1.4895241510703157e-06, |
|
"loss": 0.3643, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.774019506121602, |
|
"grad_norm": 2.369611296713981, |
|
"learning_rate": 1.4766519129663992e-06, |
|
"loss": 0.3483, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.7750570657812824, |
|
"grad_norm": 2.355834071175694, |
|
"learning_rate": 1.4638258933773425e-06, |
|
"loss": 0.3519, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 0.7760946254409629, |
|
"grad_norm": 2.469740057096309, |
|
"learning_rate": 1.451046260552086e-06, |
|
"loss": 0.3475, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.7771321851006433, |
|
"grad_norm": 2.4205796114015077, |
|
"learning_rate": 1.438313182131073e-06, |
|
"loss": 0.3523, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 0.7781697447603237, |
|
"grad_norm": 2.252004484682199, |
|
"learning_rate": 1.4256268251440631e-06, |
|
"loss": 0.3501, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.7792073044200042, |
|
"grad_norm": 2.3823736067008556, |
|
"learning_rate": 1.412987356007931e-06, |
|
"loss": 0.3439, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 0.7802448640796846, |
|
"grad_norm": 2.440498718677342, |
|
"learning_rate": 1.4003949405244888e-06, |
|
"loss": 0.3545, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.781282423739365, |
|
"grad_norm": 2.4557991100819905, |
|
"learning_rate": 1.3878497438783035e-06, |
|
"loss": 0.3476, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 0.7823199833990454, |
|
"grad_norm": 2.3768196794994956, |
|
"learning_rate": 1.3753519306345443e-06, |
|
"loss": 0.3566, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.7833575430587258, |
|
"grad_norm": 2.3763354198113427, |
|
"learning_rate": 1.3629016647368077e-06, |
|
"loss": 0.3508, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.7843951027184063, |
|
"grad_norm": 2.5905901892830725, |
|
"learning_rate": 1.3504991095049774e-06, |
|
"loss": 0.3499, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.7854326623780867, |
|
"grad_norm": 2.4256417735699407, |
|
"learning_rate": 1.338144427633079e-06, |
|
"loss": 0.3504, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 0.7864702220377672, |
|
"grad_norm": 2.4053687510971664, |
|
"learning_rate": 1.3258377811871481e-06, |
|
"loss": 0.3484, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.7875077816974476, |
|
"grad_norm": 2.4549151530337654, |
|
"learning_rate": 1.3135793316030958e-06, |
|
"loss": 0.3482, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 0.7885453413571281, |
|
"grad_norm": 2.4124091117521576, |
|
"learning_rate": 1.3013692396846028e-06, |
|
"loss": 0.3417, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.7895829010168085, |
|
"grad_norm": 2.492914244103795, |
|
"learning_rate": 1.2892076656010017e-06, |
|
"loss": 0.346, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 0.790620460676489, |
|
"grad_norm": 2.5012298456270807, |
|
"learning_rate": 1.277094768885182e-06, |
|
"loss": 0.3359, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.7916580203361693, |
|
"grad_norm": 2.3887971930046, |
|
"learning_rate": 1.2650307084314872e-06, |
|
"loss": 0.3434, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 0.7926955799958497, |
|
"grad_norm": 2.4930229591528605, |
|
"learning_rate": 1.2530156424936469e-06, |
|
"loss": 0.362, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.7937331396555302, |
|
"grad_norm": 2.386338375070925, |
|
"learning_rate": 1.241049728682684e-06, |
|
"loss": 0.3484, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.7947706993152106, |
|
"grad_norm": 2.303121828761511, |
|
"learning_rate": 1.229133123964853e-06, |
|
"loss": 0.355, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.795808258974891, |
|
"grad_norm": 2.418730350979906, |
|
"learning_rate": 1.2172659846595924e-06, |
|
"loss": 0.3534, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 0.7968458186345715, |
|
"grad_norm": 2.4002454150472143, |
|
"learning_rate": 1.2054484664374533e-06, |
|
"loss": 0.3484, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.7978833782942519, |
|
"grad_norm": 2.453421003455541, |
|
"learning_rate": 1.1936807243180743e-06, |
|
"loss": 0.3493, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 0.7989209379539324, |
|
"grad_norm": 2.4746199776766473, |
|
"learning_rate": 1.1819629126681398e-06, |
|
"loss": 0.3387, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.7999584976136128, |
|
"grad_norm": 2.38307853790052, |
|
"learning_rate": 1.1702951851993598e-06, |
|
"loss": 0.3483, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 0.8009960572732933, |
|
"grad_norm": 2.4103963437096114, |
|
"learning_rate": 1.1586776949664453e-06, |
|
"loss": 0.3423, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.8020336169329736, |
|
"grad_norm": 2.3551774825792924, |
|
"learning_rate": 1.1471105943651117e-06, |
|
"loss": 0.341, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 0.803071176592654, |
|
"grad_norm": 2.518707782804587, |
|
"learning_rate": 1.1355940351300715e-06, |
|
"loss": 0.349, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.8041087362523345, |
|
"grad_norm": 2.3344993783154964, |
|
"learning_rate": 1.1241281683330486e-06, |
|
"loss": 0.3519, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.8051462959120149, |
|
"grad_norm": 2.3344944133616665, |
|
"learning_rate": 1.1127131443807887e-06, |
|
"loss": 0.3412, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.8061838555716954, |
|
"grad_norm": 2.3019549791312754, |
|
"learning_rate": 1.1013491130131027e-06, |
|
"loss": 0.3346, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 0.8072214152313758, |
|
"grad_norm": 2.3790731919640415, |
|
"learning_rate": 1.0900362233008804e-06, |
|
"loss": 0.3414, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.8082589748910562, |
|
"grad_norm": 2.4317034804688453, |
|
"learning_rate": 1.0787746236441538e-06, |
|
"loss": 0.3521, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 0.8092965345507367, |
|
"grad_norm": 2.387356188471124, |
|
"learning_rate": 1.0675644617701402e-06, |
|
"loss": 0.3528, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.8103340942104171, |
|
"grad_norm": 2.53615626667828, |
|
"learning_rate": 1.0564058847313108e-06, |
|
"loss": 0.351, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 0.8113716538700976, |
|
"grad_norm": 2.4249799199654376, |
|
"learning_rate": 1.0452990389034507e-06, |
|
"loss": 0.3392, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.8124092135297779, |
|
"grad_norm": 2.3915606563087946, |
|
"learning_rate": 1.0342440699837537e-06, |
|
"loss": 0.3361, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 0.8134467731894583, |
|
"grad_norm": 2.5405240392917094, |
|
"learning_rate": 1.0232411229888994e-06, |
|
"loss": 0.3457, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.8144843328491388, |
|
"grad_norm": 2.424497940803967, |
|
"learning_rate": 1.0122903422531588e-06, |
|
"loss": 0.3424, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.8155218925088192, |
|
"grad_norm": 2.4383046197013187, |
|
"learning_rate": 1.001391871426492e-06, |
|
"loss": 0.3559, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.8165594521684997, |
|
"grad_norm": 2.3911832191085054, |
|
"learning_rate": 9.90545853472673e-07, |
|
"loss": 0.3429, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 0.8175970118281801, |
|
"grad_norm": 2.279983678354184, |
|
"learning_rate": 9.797524306674104e-07, |
|
"loss": 0.3437, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.8186345714878606, |
|
"grad_norm": 2.50140074824066, |
|
"learning_rate": 9.69011744596477e-07, |
|
"loss": 0.3402, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 0.819672131147541, |
|
"grad_norm": 2.4365828792120294, |
|
"learning_rate": 9.583239361538638e-07, |
|
"loss": 0.3417, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.8207096908072214, |
|
"grad_norm": 2.3579736935494955, |
|
"learning_rate": 9.476891455399168e-07, |
|
"loss": 0.3346, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 0.8217472504669019, |
|
"grad_norm": 2.5090769048416566, |
|
"learning_rate": 9.371075122595103e-07, |
|
"loss": 0.3231, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.8227848101265823, |
|
"grad_norm": 2.439152419655965, |
|
"learning_rate": 9.265791751202113e-07, |
|
"loss": 0.3437, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 0.8238223697862627, |
|
"grad_norm": 2.4534357310565214, |
|
"learning_rate": 9.161042722304609e-07, |
|
"loss": 0.3409, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.8248599294459431, |
|
"grad_norm": 2.2889519820080357, |
|
"learning_rate": 9.056829409977574e-07, |
|
"loss": 0.3423, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.8258974891056236, |
|
"grad_norm": 2.6079615773622544, |
|
"learning_rate": 8.953153181268609e-07, |
|
"loss": 0.3412, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.826935048765304, |
|
"grad_norm": 2.462659677967327, |
|
"learning_rate": 8.850015396179962e-07, |
|
"loss": 0.3399, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 0.8279726084249844, |
|
"grad_norm": 2.453876430295839, |
|
"learning_rate": 8.747417407650704e-07, |
|
"loss": 0.3368, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.8290101680846649, |
|
"grad_norm": 2.5260638258531722, |
|
"learning_rate": 8.645360561538935e-07, |
|
"loss": 0.3506, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 0.8300477277443453, |
|
"grad_norm": 2.392012536793574, |
|
"learning_rate": 8.543846196604239e-07, |
|
"loss": 0.3434, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.8310852874040258, |
|
"grad_norm": 2.4984903138745587, |
|
"learning_rate": 8.442875644489962e-07, |
|
"loss": 0.3351, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 0.8321228470637062, |
|
"grad_norm": 2.464203117108214, |
|
"learning_rate": 8.342450229705889e-07, |
|
"loss": 0.3455, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.8331604067233866, |
|
"grad_norm": 2.489210802914686, |
|
"learning_rate": 8.2425712696108e-07, |
|
"loss": 0.33, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 0.834197966383067, |
|
"grad_norm": 2.306914933986251, |
|
"learning_rate": 8.143240074395198e-07, |
|
"loss": 0.3418, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.8352355260427474, |
|
"grad_norm": 2.513591795617815, |
|
"learning_rate": 8.044457947064116e-07, |
|
"loss": 0.3418, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.8362730857024279, |
|
"grad_norm": 2.3537491795821524, |
|
"learning_rate": 7.946226183420047e-07, |
|
"loss": 0.3479, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.8373106453621083, |
|
"grad_norm": 2.422572559987635, |
|
"learning_rate": 7.848546072045932e-07, |
|
"loss": 0.3446, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 0.8383482050217888, |
|
"grad_norm": 2.3037996424039084, |
|
"learning_rate": 7.75141889428826e-07, |
|
"loss": 0.3257, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.8393857646814692, |
|
"grad_norm": 2.3570961713348186, |
|
"learning_rate": 7.654845924240228e-07, |
|
"loss": 0.3341, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 0.8404233243411496, |
|
"grad_norm": 2.3236115130562762, |
|
"learning_rate": 7.558828428725102e-07, |
|
"loss": 0.3328, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.8414608840008301, |
|
"grad_norm": 2.4888053313991603, |
|
"learning_rate": 7.463367667279515e-07, |
|
"loss": 0.3429, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 0.8424984436605105, |
|
"grad_norm": 2.4813791916370334, |
|
"learning_rate": 7.368464892137006e-07, |
|
"loss": 0.3412, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.843536003320191, |
|
"grad_norm": 2.3113527571110737, |
|
"learning_rate": 7.274121348211582e-07, |
|
"loss": 0.3475, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 0.8445735629798713, |
|
"grad_norm": 2.587556104836598, |
|
"learning_rate": 7.180338273081327e-07, |
|
"loss": 0.3354, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.8456111226395517, |
|
"grad_norm": 2.296420055640647, |
|
"learning_rate": 7.087116896972268e-07, |
|
"loss": 0.3357, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.8466486822992322, |
|
"grad_norm": 2.3266676608899663, |
|
"learning_rate": 6.994458442742163e-07, |
|
"loss": 0.3362, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.8476862419589126, |
|
"grad_norm": 2.6045589584755984, |
|
"learning_rate": 6.902364125864496e-07, |
|
"loss": 0.34, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 0.8487238016185931, |
|
"grad_norm": 2.352378817551258, |
|
"learning_rate": 6.810835154412487e-07, |
|
"loss": 0.339, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.8497613612782735, |
|
"grad_norm": 2.3547116125315446, |
|
"learning_rate": 6.719872729043331e-07, |
|
"loss": 0.3378, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 0.850798920937954, |
|
"grad_norm": 2.321427219175219, |
|
"learning_rate": 6.629478042982346e-07, |
|
"loss": 0.3229, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.8518364805976344, |
|
"grad_norm": 2.521475742171532, |
|
"learning_rate": 6.539652282007386e-07, |
|
"loss": 0.3376, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 0.8528740402573148, |
|
"grad_norm": 2.3926877720494573, |
|
"learning_rate": 6.450396624433286e-07, |
|
"loss": 0.3325, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.8539115999169953, |
|
"grad_norm": 2.425081699530235, |
|
"learning_rate": 6.361712241096374e-07, |
|
"loss": 0.3314, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 0.8549491595766756, |
|
"grad_norm": 2.487314370898736, |
|
"learning_rate": 6.273600295339111e-07, |
|
"loss": 0.3352, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.855986719236356, |
|
"grad_norm": 2.4556339075670723, |
|
"learning_rate": 6.186061942994864e-07, |
|
"loss": 0.3338, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.8570242788960365, |
|
"grad_norm": 2.417547682627604, |
|
"learning_rate": 6.099098332372733e-07, |
|
"loss": 0.3299, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.8580618385557169, |
|
"grad_norm": 2.611592074254707, |
|
"learning_rate": 6.012710604242478e-07, |
|
"loss": 0.3331, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 0.8590993982153974, |
|
"grad_norm": 2.448052874786836, |
|
"learning_rate": 5.926899891819521e-07, |
|
"loss": 0.3376, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.8601369578750778, |
|
"grad_norm": 2.475864292987021, |
|
"learning_rate": 5.841667320750188e-07, |
|
"loss": 0.3437, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 0.8611745175347583, |
|
"grad_norm": 2.373719046516931, |
|
"learning_rate": 5.757014009096801e-07, |
|
"loss": 0.3357, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.8622120771944387, |
|
"grad_norm": 2.3162935725557032, |
|
"learning_rate": 5.672941067323124e-07, |
|
"loss": 0.3305, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 0.8632496368541192, |
|
"grad_norm": 2.3285428336849736, |
|
"learning_rate": 5.589449598279762e-07, |
|
"loss": 0.3278, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.8642871965137996, |
|
"grad_norm": 2.6975663812633823, |
|
"learning_rate": 5.506540697189638e-07, |
|
"loss": 0.3235, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 0.8653247561734799, |
|
"grad_norm": 2.385391836897359, |
|
"learning_rate": 5.424215451633719e-07, |
|
"loss": 0.3366, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.8663623158331604, |
|
"grad_norm": 2.509848709680898, |
|
"learning_rate": 5.342474941536701e-07, |
|
"loss": 0.3349, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.8673998754928408, |
|
"grad_norm": 2.4992507740450063, |
|
"learning_rate": 5.261320239152851e-07, |
|
"loss": 0.3248, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.8684374351525213, |
|
"grad_norm": 2.4274228991197826, |
|
"learning_rate": 5.180752409051892e-07, |
|
"loss": 0.3276, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.8694749948122017, |
|
"grad_norm": 2.3127395460474975, |
|
"learning_rate": 5.100772508105139e-07, |
|
"loss": 0.318, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.8705125544718821, |
|
"grad_norm": 2.468734058758486, |
|
"learning_rate": 5.021381585471563e-07, |
|
"loss": 0.3332, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 0.8715501141315626, |
|
"grad_norm": 2.4531926615746564, |
|
"learning_rate": 4.942580682584041e-07, |
|
"loss": 0.3286, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.872587673791243, |
|
"grad_norm": 2.377223115961875, |
|
"learning_rate": 4.864370833135673e-07, |
|
"loss": 0.3342, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 0.8736252334509235, |
|
"grad_norm": 2.449018104688608, |
|
"learning_rate": 4.786753063066318e-07, |
|
"loss": 0.3355, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.8746627931106039, |
|
"grad_norm": 2.4173614341699685, |
|
"learning_rate": 4.7097283905489956e-07, |
|
"loss": 0.3288, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 0.8757003527702842, |
|
"grad_norm": 2.603118901855438, |
|
"learning_rate": 4.633297825976635e-07, |
|
"loss": 0.3371, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.8767379124299647, |
|
"grad_norm": 2.4773560425700727, |
|
"learning_rate": 4.5574623719487787e-07, |
|
"loss": 0.3256, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.8777754720896451, |
|
"grad_norm": 2.4564917228085053, |
|
"learning_rate": 4.482223023258453e-07, |
|
"loss": 0.337, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.8788130317493256, |
|
"grad_norm": 2.4727605694693553, |
|
"learning_rate": 4.407580766879066e-07, |
|
"loss": 0.3221, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 0.879850591409006, |
|
"grad_norm": 2.593564549202061, |
|
"learning_rate": 4.333536581951542e-07, |
|
"loss": 0.3364, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.8808881510686865, |
|
"grad_norm": 2.3065543830266204, |
|
"learning_rate": 4.2600914397714023e-07, |
|
"loss": 0.3266, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 0.8819257107283669, |
|
"grad_norm": 2.3918281813023783, |
|
"learning_rate": 4.1872463037760823e-07, |
|
"loss": 0.3311, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.8829632703880473, |
|
"grad_norm": 2.5113381009120417, |
|
"learning_rate": 4.1150021295322306e-07, |
|
"loss": 0.3373, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 0.8840008300477278, |
|
"grad_norm": 2.4230720157484935, |
|
"learning_rate": 4.043359864723262e-07, |
|
"loss": 0.3329, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.8850383897074082, |
|
"grad_norm": 2.616499204627849, |
|
"learning_rate": 3.972320449136829e-07, |
|
"loss": 0.3295, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 0.8860759493670886, |
|
"grad_norm": 2.5866660581654677, |
|
"learning_rate": 3.90188481465254e-07, |
|
"loss": 0.3276, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.887113509026769, |
|
"grad_norm": 2.4795883896583364, |
|
"learning_rate": 3.8320538852297694e-07, |
|
"loss": 0.3339, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.8881510686864494, |
|
"grad_norm": 2.3898148517858244, |
|
"learning_rate": 3.762828576895472e-07, |
|
"loss": 0.3373, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.8891886283461299, |
|
"grad_norm": 2.4124000504118044, |
|
"learning_rate": 3.694209797732201e-07, |
|
"loss": 0.332, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 0.8902261880058103, |
|
"grad_norm": 2.724223147572049, |
|
"learning_rate": 3.6261984478662025e-07, |
|
"loss": 0.3417, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.8912637476654908, |
|
"grad_norm": 2.4453049065554278, |
|
"learning_rate": 3.558795419455596e-07, |
|
"loss": 0.3198, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 0.8923013073251712, |
|
"grad_norm": 2.3658613859590165, |
|
"learning_rate": 3.492001596678651e-07, |
|
"loss": 0.3248, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.8933388669848517, |
|
"grad_norm": 2.415791137686327, |
|
"learning_rate": 3.4258178557222354e-07, |
|
"loss": 0.3238, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 0.8943764266445321, |
|
"grad_norm": 2.2801295759850566, |
|
"learning_rate": 3.3602450647702847e-07, |
|
"loss": 0.3317, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.8954139863042125, |
|
"grad_norm": 2.532814151310614, |
|
"learning_rate": 3.295284083992434e-07, |
|
"loss": 0.3217, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 0.8964515459638929, |
|
"grad_norm": 2.5019562185543993, |
|
"learning_rate": 3.2309357655326945e-07, |
|
"loss": 0.333, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.8974891056235733, |
|
"grad_norm": 2.2943698094322955, |
|
"learning_rate": 3.167200953498367e-07, |
|
"loss": 0.3181, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.8985266652832538, |
|
"grad_norm": 2.405841184133046, |
|
"learning_rate": 3.1040804839488406e-07, |
|
"loss": 0.3339, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.8995642249429342, |
|
"grad_norm": 2.386316529188201, |
|
"learning_rate": 3.041575184884732e-07, |
|
"loss": 0.3341, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 0.9006017846026146, |
|
"grad_norm": 2.517011507851569, |
|
"learning_rate": 2.979685876236982e-07, |
|
"loss": 0.3346, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.9016393442622951, |
|
"grad_norm": 2.5508437593952094, |
|
"learning_rate": 2.918413369856105e-07, |
|
"loss": 0.3258, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 0.9026769039219755, |
|
"grad_norm": 2.457005114059522, |
|
"learning_rate": 2.857758469501509e-07, |
|
"loss": 0.3197, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.903714463581656, |
|
"grad_norm": 2.3964711097696836, |
|
"learning_rate": 2.7977219708310134e-07, |
|
"loss": 0.3288, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 0.9047520232413364, |
|
"grad_norm": 2.4092985201196084, |
|
"learning_rate": 2.7383046613903676e-07, |
|
"loss": 0.325, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.9057895829010169, |
|
"grad_norm": 2.519420182065836, |
|
"learning_rate": 2.679507320602931e-07, |
|
"loss": 0.325, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 0.9068271425606972, |
|
"grad_norm": 2.4612555403219214, |
|
"learning_rate": 2.6213307197594353e-07, |
|
"loss": 0.3261, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.9078647022203776, |
|
"grad_norm": 2.568118436273184, |
|
"learning_rate": 2.5637756220079135e-07, |
|
"loss": 0.3246, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.9089022618800581, |
|
"grad_norm": 2.5397440534691373, |
|
"learning_rate": 2.506842782343627e-07, |
|
"loss": 0.3162, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.9099398215397385, |
|
"grad_norm": 2.4925975703816245, |
|
"learning_rate": 2.4505329475991823e-07, |
|
"loss": 0.331, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 0.910977381199419, |
|
"grad_norm": 3.3523863369544986, |
|
"learning_rate": 2.3948468564347904e-07, |
|
"loss": 0.3277, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.9120149408590994, |
|
"grad_norm": 2.575554252160631, |
|
"learning_rate": 2.3397852393284792e-07, |
|
"loss": 0.3404, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 0.9130525005187798, |
|
"grad_norm": 2.481671747693274, |
|
"learning_rate": 2.2853488185665796e-07, |
|
"loss": 0.3206, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.9140900601784603, |
|
"grad_norm": 2.4222594768146415, |
|
"learning_rate": 2.231538308234249e-07, |
|
"loss": 0.3216, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 0.9151276198381407, |
|
"grad_norm": 2.579114792911807, |
|
"learning_rate": 2.178354414206063e-07, |
|
"loss": 0.3267, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.9161651794978212, |
|
"grad_norm": 2.472569881694958, |
|
"learning_rate": 2.125797834136789e-07, |
|
"loss": 0.3212, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 0.9172027391575015, |
|
"grad_norm": 2.4091565145640605, |
|
"learning_rate": 2.0738692574522324e-07, |
|
"loss": 0.3246, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.9182402988171819, |
|
"grad_norm": 2.4603025192987644, |
|
"learning_rate": 2.0225693653401824e-07, |
|
"loss": 0.3221, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.9192778584768624, |
|
"grad_norm": 2.536120994058227, |
|
"learning_rate": 1.9718988307414866e-07, |
|
"loss": 0.3289, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.9203154181365428, |
|
"grad_norm": 2.423050703239737, |
|
"learning_rate": 1.921858318341191e-07, |
|
"loss": 0.3258, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 0.9213529777962233, |
|
"grad_norm": 2.429414838855423, |
|
"learning_rate": 1.8724484845598855e-07, |
|
"loss": 0.3196, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.9223905374559037, |
|
"grad_norm": 2.7662680215892004, |
|
"learning_rate": 1.8236699775450338e-07, |
|
"loss": 0.3258, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 0.9234280971155842, |
|
"grad_norm": 2.45993148237687, |
|
"learning_rate": 1.7755234371624908e-07, |
|
"loss": 0.318, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.9244656567752646, |
|
"grad_norm": 2.4765798027549644, |
|
"learning_rate": 1.7280094949881144e-07, |
|
"loss": 0.3287, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 0.925503216434945, |
|
"grad_norm": 2.5990110307214027, |
|
"learning_rate": 1.6811287742994897e-07, |
|
"loss": 0.3203, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.9265407760946255, |
|
"grad_norm": 2.7248202388018563, |
|
"learning_rate": 1.6348818900677077e-07, |
|
"loss": 0.3196, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 0.9275783357543059, |
|
"grad_norm": 2.360215127100717, |
|
"learning_rate": 1.5892694489493598e-07, |
|
"loss": 0.3266, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.9286158954139863, |
|
"grad_norm": 2.3701662902928415, |
|
"learning_rate": 1.5442920492785396e-07, |
|
"loss": 0.3217, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.9296534550736667, |
|
"grad_norm": 2.5408968036091593, |
|
"learning_rate": 1.4999502810590094e-07, |
|
"loss": 0.3147, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.9306910147333471, |
|
"grad_norm": 2.3872752954425276, |
|
"learning_rate": 1.456244725956446e-07, |
|
"loss": 0.3314, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 0.9317285743930276, |
|
"grad_norm": 2.510401039927896, |
|
"learning_rate": 1.4131759572908354e-07, |
|
"loss": 0.3233, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.932766134052708, |
|
"grad_norm": 2.452160220911644, |
|
"learning_rate": 1.370744540028929e-07, |
|
"loss": 0.3216, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 0.9338036937123885, |
|
"grad_norm": 2.5517139832044826, |
|
"learning_rate": 1.328951030776826e-07, |
|
"loss": 0.322, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.9348412533720689, |
|
"grad_norm": 2.600821629157991, |
|
"learning_rate": 1.2877959777727212e-07, |
|
"loss": 0.328, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 0.9358788130317494, |
|
"grad_norm": 2.4402893254633207, |
|
"learning_rate": 1.2472799208796517e-07, |
|
"loss": 0.3352, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.9369163726914298, |
|
"grad_norm": 2.4890223515899748, |
|
"learning_rate": 1.2074033915784543e-07, |
|
"loss": 0.3273, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 0.9379539323511102, |
|
"grad_norm": 2.541162954952577, |
|
"learning_rate": 1.168166912960772e-07, |
|
"loss": 0.3198, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.9389914920107906, |
|
"grad_norm": 2.510264694405438, |
|
"learning_rate": 1.1295709997222182e-07, |
|
"loss": 0.3278, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.940029051670471, |
|
"grad_norm": 2.4830766706425895, |
|
"learning_rate": 1.0916161581555895e-07, |
|
"loss": 0.3153, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.9410666113301515, |
|
"grad_norm": 2.3227486456210222, |
|
"learning_rate": 1.0543028861442539e-07, |
|
"loss": 0.3177, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 0.9421041709898319, |
|
"grad_norm": 2.5419821606370174, |
|
"learning_rate": 1.0176316731556112e-07, |
|
"loss": 0.3322, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.9431417306495123, |
|
"grad_norm": 2.6673497102822536, |
|
"learning_rate": 9.816030002346766e-08, |
|
"loss": 0.3319, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 0.9441792903091928, |
|
"grad_norm": 2.3937271331548904, |
|
"learning_rate": 9.462173399977348e-08, |
|
"loss": 0.3267, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.9452168499688732, |
|
"grad_norm": 2.3407051661833123, |
|
"learning_rate": 9.11475156626207e-08, |
|
"loss": 0.3103, |
|
"step": 4555 |
|
}, |
|
{ |
|
"epoch": 0.9462544096285537, |
|
"grad_norm": 2.5377283482688755, |
|
"learning_rate": 8.773769058605053e-08, |
|
"loss": 0.3356, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.9472919692882341, |
|
"grad_norm": 2.3498925102259904, |
|
"learning_rate": 8.439230349940708e-08, |
|
"loss": 0.3254, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 0.9483295289479146, |
|
"grad_norm": 2.3416477533349145, |
|
"learning_rate": 8.111139828675175e-08, |
|
"loss": 0.3293, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.9493670886075949, |
|
"grad_norm": 2.559009235835814, |
|
"learning_rate": 7.78950179862864e-08, |
|
"loss": 0.3185, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.9504046482672753, |
|
"grad_norm": 2.586458213825969, |
|
"learning_rate": 7.474320478978946e-08, |
|
"loss": 0.3246, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.9514422079269558, |
|
"grad_norm": 2.3485714776360407, |
|
"learning_rate": 7.16560000420613e-08, |
|
"loss": 0.3238, |
|
"step": 4585 |
|
}, |
|
{ |
|
"epoch": 0.9524797675866362, |
|
"grad_norm": 2.369694918601893, |
|
"learning_rate": 6.863344424038354e-08, |
|
"loss": 0.3103, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.9535173272463167, |
|
"grad_norm": 2.4252802076691538, |
|
"learning_rate": 6.567557703398675e-08, |
|
"loss": 0.3273, |
|
"step": 4595 |
|
}, |
|
{ |
|
"epoch": 0.9545548869059971, |
|
"grad_norm": 2.552882664412225, |
|
"learning_rate": 6.278243722352973e-08, |
|
"loss": 0.3182, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.9555924465656775, |
|
"grad_norm": 2.5353013413397507, |
|
"learning_rate": 5.995406276059267e-08, |
|
"loss": 0.3335, |
|
"step": 4605 |
|
}, |
|
{ |
|
"epoch": 0.956630006225358, |
|
"grad_norm": 2.4812443315722934, |
|
"learning_rate": 5.719049074717764e-08, |
|
"loss": 0.3191, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.9576675658850384, |
|
"grad_norm": 2.427622171598608, |
|
"learning_rate": 5.4491757435220505e-08, |
|
"loss": 0.3236, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 0.9587051255447189, |
|
"grad_norm": 2.601332407638946, |
|
"learning_rate": 5.185789822612086e-08, |
|
"loss": 0.3335, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.9597426852043992, |
|
"grad_norm": 2.4380419721098976, |
|
"learning_rate": 4.9288947670270146e-08, |
|
"loss": 0.3193, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.9607802448640796, |
|
"grad_norm": 2.36715515753985, |
|
"learning_rate": 4.678493946660423e-08, |
|
"loss": 0.3228, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.9618178045237601, |
|
"grad_norm": 2.4303066509314677, |
|
"learning_rate": 4.434590646215819e-08, |
|
"loss": 0.3266, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 0.9628553641834405, |
|
"grad_norm": 2.865610610806469, |
|
"learning_rate": 4.1971880651638376e-08, |
|
"loss": 0.3247, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.963892923843121, |
|
"grad_norm": 2.5407706997414268, |
|
"learning_rate": 3.966289317699878e-08, |
|
"loss": 0.3288, |
|
"step": 4645 |
|
}, |
|
{ |
|
"epoch": 0.9649304835028014, |
|
"grad_norm": 2.4204826042021437, |
|
"learning_rate": 3.74189743270359e-08, |
|
"loss": 0.3145, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.9659680431624819, |
|
"grad_norm": 2.2543367363310716, |
|
"learning_rate": 3.5240153536988954e-08, |
|
"loss": 0.3307, |
|
"step": 4655 |
|
}, |
|
{ |
|
"epoch": 0.9670056028221623, |
|
"grad_norm": 2.3642879401404677, |
|
"learning_rate": 3.312645938815695e-08, |
|
"loss": 0.3111, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.9680431624818427, |
|
"grad_norm": 2.551087158396502, |
|
"learning_rate": 3.107791960752005e-08, |
|
"loss": 0.3202, |
|
"step": 4665 |
|
}, |
|
{ |
|
"epoch": 0.9690807221415232, |
|
"grad_norm": 2.431842119612971, |
|
"learning_rate": 2.909456106737818e-08, |
|
"loss": 0.3271, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.9701182818012035, |
|
"grad_norm": 2.469650411851549, |
|
"learning_rate": 2.7176409784998027e-08, |
|
"loss": 0.3225, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.971155841460884, |
|
"grad_norm": 2.6570472493964337, |
|
"learning_rate": 2.5323490922271044e-08, |
|
"loss": 0.3233, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.9721934011205644, |
|
"grad_norm": 2.792428588323876, |
|
"learning_rate": 2.3535828785384296e-08, |
|
"loss": 0.3263, |
|
"step": 4685 |
|
}, |
|
{ |
|
"epoch": 0.9732309607802448, |
|
"grad_norm": 2.3558732345665456, |
|
"learning_rate": 2.1813446824502372e-08, |
|
"loss": 0.3136, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.9742685204399253, |
|
"grad_norm": 2.356877231683961, |
|
"learning_rate": 2.0156367633455965e-08, |
|
"loss": 0.3239, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 0.9753060800996057, |
|
"grad_norm": 2.4529847227301818, |
|
"learning_rate": 1.8564612949451555e-08, |
|
"loss": 0.3168, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.9763436397592862, |
|
"grad_norm": 2.382662217743029, |
|
"learning_rate": 1.7038203652781083e-08, |
|
"loss": 0.3234, |
|
"step": 4705 |
|
}, |
|
{ |
|
"epoch": 0.9773811994189666, |
|
"grad_norm": 2.333476690943818, |
|
"learning_rate": 1.5577159766548832e-08, |
|
"loss": 0.3274, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.9784187590786471, |
|
"grad_norm": 2.402141955867801, |
|
"learning_rate": 1.4181500456412755e-08, |
|
"loss": 0.3215, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 0.9794563187383275, |
|
"grad_norm": 2.501308170912781, |
|
"learning_rate": 1.2851244030328004e-08, |
|
"loss": 0.3212, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.9804938783980078, |
|
"grad_norm": 2.6177200025512626, |
|
"learning_rate": 1.1586407938308785e-08, |
|
"loss": 0.3136, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.9815314380576883, |
|
"grad_norm": 2.548386853689889, |
|
"learning_rate": 1.0387008772200779e-08, |
|
"loss": 0.342, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.9825689977173687, |
|
"grad_norm": 2.552595538532663, |
|
"learning_rate": 9.253062265461855e-09, |
|
"loss": 0.3234, |
|
"step": 4735 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"grad_norm": 2.489168583450402, |
|
"learning_rate": 8.184583292955572e-09, |
|
"loss": 0.318, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.9846441170367296, |
|
"grad_norm": 2.380606466941288, |
|
"learning_rate": 7.181585870757457e-09, |
|
"loss": 0.326, |
|
"step": 4745 |
|
}, |
|
{ |
|
"epoch": 0.98568167669641, |
|
"grad_norm": 2.4173383787142297, |
|
"learning_rate": 6.2440831559690275e-09, |
|
"loss": 0.3185, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.9867192363560905, |
|
"grad_norm": 2.542878596032387, |
|
"learning_rate": 5.372087446547935e-09, |
|
"loss": 0.3252, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 0.9877567960157709, |
|
"grad_norm": 2.4745977048628918, |
|
"learning_rate": 4.565610181144209e-09, |
|
"loss": 0.3304, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.9887943556754514, |
|
"grad_norm": 2.4052051630616895, |
|
"learning_rate": 3.824661938951479e-09, |
|
"loss": 0.3243, |
|
"step": 4765 |
|
}, |
|
{ |
|
"epoch": 0.9898319153351318, |
|
"grad_norm": 2.6094698060669264, |
|
"learning_rate": 3.1492524395682065e-09, |
|
"loss": 0.3189, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.9908694749948121, |
|
"grad_norm": 2.3476458011761134, |
|
"learning_rate": 2.5393905428688913e-09, |
|
"loss": 0.3231, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.9919070346544926, |
|
"grad_norm": 2.3445329353887456, |
|
"learning_rate": 1.9950842488891674e-09, |
|
"loss": 0.3355, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.992944594314173, |
|
"grad_norm": 2.4693805125337094, |
|
"learning_rate": 1.5163406977219963e-09, |
|
"loss": 0.3256, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 0.9939821539738535, |
|
"grad_norm": 2.563629879915604, |
|
"learning_rate": 1.103166169420522e-09, |
|
"loss": 0.3254, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.9950197136335339, |
|
"grad_norm": 2.494757369109791, |
|
"learning_rate": 7.555660839181356e-10, |
|
"loss": 0.3088, |
|
"step": 4795 |
|
}, |
|
{ |
|
"epoch": 0.9960572732932144, |
|
"grad_norm": 2.373070173051403, |
|
"learning_rate": 4.735450009579756e-10, |
|
"loss": 0.323, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.9970948329528948, |
|
"grad_norm": 2.397230200383968, |
|
"learning_rate": 2.571066200307559e-10, |
|
"loss": 0.3303, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 0.9981323926125752, |
|
"grad_norm": 2.6318567902913985, |
|
"learning_rate": 1.0625378032813604e-10, |
|
"loss": 0.3156, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.9991699522722557, |
|
"grad_norm": 2.3100958526551696, |
|
"learning_rate": 2.0988460705528846e-11, |
|
"loss": 0.3192, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.2939795255661011, |
|
"eval_runtime": 0.9809, |
|
"eval_samples_per_second": 3.058, |
|
"eval_steps_per_second": 1.019, |
|
"step": 4819 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 4819, |
|
"total_flos": 504500280360960.0, |
|
"train_loss": 0.5211530197307415, |
|
"train_runtime": 55738.8815, |
|
"train_samples_per_second": 1.383, |
|
"train_steps_per_second": 0.086 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4819, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 504500280360960.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|