|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9965217391304346, |
|
"eval_steps": 500, |
|
"global_step": 574, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0034782608695652175, |
|
"grad_norm": 0.1545655359230834, |
|
"learning_rate": 3.448275862068966e-06, |
|
"loss": 0.1804, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006956521739130435, |
|
"grad_norm": 0.15798307731453395, |
|
"learning_rate": 6.896551724137932e-06, |
|
"loss": 0.1443, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.010434782608695653, |
|
"grad_norm": 0.14372383095748037, |
|
"learning_rate": 1.0344827586206897e-05, |
|
"loss": 0.1414, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01391304347826087, |
|
"grad_norm": 0.2420744995778043, |
|
"learning_rate": 1.3793103448275863e-05, |
|
"loss": 0.1926, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.017391304347826087, |
|
"grad_norm": 0.1463366912249852, |
|
"learning_rate": 1.7241379310344828e-05, |
|
"loss": 0.1598, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.020869565217391306, |
|
"grad_norm": 0.2742107559459329, |
|
"learning_rate": 2.0689655172413793e-05, |
|
"loss": 0.2451, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02434782608695652, |
|
"grad_norm": 0.1545956455873269, |
|
"learning_rate": 2.413793103448276e-05, |
|
"loss": 0.1467, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02782608695652174, |
|
"grad_norm": 0.11833712816221738, |
|
"learning_rate": 2.7586206896551727e-05, |
|
"loss": 0.1146, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03130434782608696, |
|
"grad_norm": 0.1636683202816951, |
|
"learning_rate": 3.103448275862069e-05, |
|
"loss": 0.1511, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.034782608695652174, |
|
"grad_norm": 0.12096851431359755, |
|
"learning_rate": 3.4482758620689657e-05, |
|
"loss": 0.1392, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03826086956521739, |
|
"grad_norm": 0.20113450226273455, |
|
"learning_rate": 3.793103448275862e-05, |
|
"loss": 0.1829, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.04173913043478261, |
|
"grad_norm": 0.1724183342324261, |
|
"learning_rate": 4.1379310344827587e-05, |
|
"loss": 0.1393, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04521739130434783, |
|
"grad_norm": 0.16317141755627293, |
|
"learning_rate": 4.482758620689655e-05, |
|
"loss": 0.1569, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04869565217391304, |
|
"grad_norm": 0.20158026184467487, |
|
"learning_rate": 4.827586206896552e-05, |
|
"loss": 0.1719, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.05217391304347826, |
|
"grad_norm": 0.19268086804807166, |
|
"learning_rate": 5.172413793103449e-05, |
|
"loss": 0.1783, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05565217391304348, |
|
"grad_norm": 0.15367624679456215, |
|
"learning_rate": 5.517241379310345e-05, |
|
"loss": 0.1457, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.059130434782608696, |
|
"grad_norm": 0.16131163703415627, |
|
"learning_rate": 5.862068965517241e-05, |
|
"loss": 0.1741, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06260869565217392, |
|
"grad_norm": 0.1513439967052575, |
|
"learning_rate": 6.206896551724138e-05, |
|
"loss": 0.1567, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06608695652173913, |
|
"grad_norm": 0.11397034244477378, |
|
"learning_rate": 6.551724137931034e-05, |
|
"loss": 0.1448, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.06956521739130435, |
|
"grad_norm": 0.18890710907597627, |
|
"learning_rate": 6.896551724137931e-05, |
|
"loss": 0.1576, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07304347826086957, |
|
"grad_norm": 0.17148715059837027, |
|
"learning_rate": 7.241379310344828e-05, |
|
"loss": 0.1531, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.07652173913043478, |
|
"grad_norm": 0.15845773761518642, |
|
"learning_rate": 7.586206896551724e-05, |
|
"loss": 0.1795, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.16432205778499775, |
|
"learning_rate": 7.931034482758621e-05, |
|
"loss": 0.1455, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.08347826086956522, |
|
"grad_norm": 0.18507516537799124, |
|
"learning_rate": 8.275862068965517e-05, |
|
"loss": 0.1792, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.08695652173913043, |
|
"grad_norm": 0.1489906198108428, |
|
"learning_rate": 8.620689655172413e-05, |
|
"loss": 0.1575, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.09043478260869565, |
|
"grad_norm": 0.19257597111889158, |
|
"learning_rate": 8.96551724137931e-05, |
|
"loss": 0.1977, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.09391304347826086, |
|
"grad_norm": 0.15869513580726594, |
|
"learning_rate": 9.310344827586207e-05, |
|
"loss": 0.1491, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.09739130434782609, |
|
"grad_norm": 0.23763138206897608, |
|
"learning_rate": 9.655172413793105e-05, |
|
"loss": 0.2305, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.10086956521739131, |
|
"grad_norm": 0.19313130092481448, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1991, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.10434782608695652, |
|
"grad_norm": 0.15957163254805692, |
|
"learning_rate": 0.00010344827586206898, |
|
"loss": 0.1494, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.10782608695652174, |
|
"grad_norm": 0.15175494387195537, |
|
"learning_rate": 0.00010689655172413792, |
|
"loss": 0.1539, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.11130434782608696, |
|
"grad_norm": 0.1577067484050021, |
|
"learning_rate": 0.0001103448275862069, |
|
"loss": 0.1481, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.11478260869565217, |
|
"grad_norm": 0.09295501667856695, |
|
"learning_rate": 0.00011379310344827588, |
|
"loss": 0.1018, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.11826086956521739, |
|
"grad_norm": 0.13149067291539926, |
|
"learning_rate": 0.00011724137931034482, |
|
"loss": 0.1176, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.12173913043478261, |
|
"grad_norm": 0.15815867098069847, |
|
"learning_rate": 0.0001206896551724138, |
|
"loss": 0.1315, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.12521739130434784, |
|
"grad_norm": 0.1228801998135233, |
|
"learning_rate": 0.00012413793103448277, |
|
"loss": 0.1226, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.12869565217391304, |
|
"grad_norm": 0.14615808183921733, |
|
"learning_rate": 0.00012758620689655174, |
|
"loss": 0.1351, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.13217391304347825, |
|
"grad_norm": 0.13959696283916806, |
|
"learning_rate": 0.00013103448275862068, |
|
"loss": 0.1265, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1356521739130435, |
|
"grad_norm": 0.1674438071444559, |
|
"learning_rate": 0.00013448275862068965, |
|
"loss": 0.1763, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1391304347826087, |
|
"grad_norm": 0.14248711889055726, |
|
"learning_rate": 0.00013793103448275863, |
|
"loss": 0.1273, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1426086956521739, |
|
"grad_norm": 0.12483278168498144, |
|
"learning_rate": 0.0001413793103448276, |
|
"loss": 0.1158, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.14608695652173914, |
|
"grad_norm": 0.12252417486446492, |
|
"learning_rate": 0.00014482758620689657, |
|
"loss": 0.0978, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.14956521739130435, |
|
"grad_norm": 0.1379518468653693, |
|
"learning_rate": 0.00014827586206896554, |
|
"loss": 0.1265, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.15304347826086956, |
|
"grad_norm": 0.1523565561366162, |
|
"learning_rate": 0.00015172413793103449, |
|
"loss": 0.1823, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1565217391304348, |
|
"grad_norm": 0.1801898533175253, |
|
"learning_rate": 0.00015517241379310346, |
|
"loss": 0.1999, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.13012748020707876, |
|
"learning_rate": 0.00015862068965517243, |
|
"loss": 0.1409, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1634782608695652, |
|
"grad_norm": 0.1413893808116691, |
|
"learning_rate": 0.00016206896551724137, |
|
"loss": 0.1262, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.16695652173913045, |
|
"grad_norm": 0.16233434268275468, |
|
"learning_rate": 0.00016551724137931035, |
|
"loss": 0.1467, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.17043478260869566, |
|
"grad_norm": 0.15079503853002107, |
|
"learning_rate": 0.00016896551724137932, |
|
"loss": 0.1058, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.17391304347826086, |
|
"grad_norm": 0.15412291289995766, |
|
"learning_rate": 0.00017241379310344826, |
|
"loss": 0.168, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17739130434782607, |
|
"grad_norm": 0.1722020517750421, |
|
"learning_rate": 0.00017586206896551723, |
|
"loss": 0.1183, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1808695652173913, |
|
"grad_norm": 0.10905711916480021, |
|
"learning_rate": 0.0001793103448275862, |
|
"loss": 0.1093, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.18434782608695652, |
|
"grad_norm": 0.16963364557672264, |
|
"learning_rate": 0.00018275862068965518, |
|
"loss": 0.1557, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.18782608695652173, |
|
"grad_norm": 0.15154120729033607, |
|
"learning_rate": 0.00018620689655172415, |
|
"loss": 0.1594, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.19130434782608696, |
|
"grad_norm": 0.13757866713331232, |
|
"learning_rate": 0.00018965517241379312, |
|
"loss": 0.1407, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.19478260869565217, |
|
"grad_norm": 0.08797746875562075, |
|
"learning_rate": 0.0001931034482758621, |
|
"loss": 0.0941, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.19826086956521738, |
|
"grad_norm": 0.18086221573643768, |
|
"learning_rate": 0.00019655172413793104, |
|
"loss": 0.1781, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.20173913043478262, |
|
"grad_norm": 0.17700454857957337, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1879, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.20521739130434782, |
|
"grad_norm": 0.1558083475840659, |
|
"learning_rate": 0.00019999814660065618, |
|
"loss": 0.1831, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.20869565217391303, |
|
"grad_norm": 0.1032213761254349, |
|
"learning_rate": 0.00019999258647132646, |
|
"loss": 0.1188, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.21217391304347827, |
|
"grad_norm": 0.14893393244118194, |
|
"learning_rate": 0.00019998331981811366, |
|
"loss": 0.1554, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.21565217391304348, |
|
"grad_norm": 0.14353596472572114, |
|
"learning_rate": 0.00019997034698451395, |
|
"loss": 0.1807, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.21913043478260869, |
|
"grad_norm": 0.1051492618618541, |
|
"learning_rate": 0.00019995366845140415, |
|
"loss": 0.1278, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.22260869565217392, |
|
"grad_norm": 0.15519178380797527, |
|
"learning_rate": 0.00019993328483702393, |
|
"loss": 0.1718, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.22608695652173913, |
|
"grad_norm": 0.16979535445201727, |
|
"learning_rate": 0.00019990919689695286, |
|
"loss": 0.1759, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.22956521739130434, |
|
"grad_norm": 0.19955078650794816, |
|
"learning_rate": 0.0001998814055240823, |
|
"loss": 0.1659, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.23304347826086957, |
|
"grad_norm": 0.21069141049146595, |
|
"learning_rate": 0.00019984991174858257, |
|
"loss": 0.1591, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.23652173913043478, |
|
"grad_norm": 0.10858740428706376, |
|
"learning_rate": 0.00019981471673786452, |
|
"loss": 0.1143, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.12877038648097636, |
|
"learning_rate": 0.00019977582179653633, |
|
"loss": 0.113, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.24347826086956523, |
|
"grad_norm": 0.15092333453545853, |
|
"learning_rate": 0.00019973322836635518, |
|
"loss": 0.183, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.24695652173913044, |
|
"grad_norm": 0.12997966260226232, |
|
"learning_rate": 0.00019968693802617374, |
|
"loss": 0.144, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.25043478260869567, |
|
"grad_norm": 0.12761141406209162, |
|
"learning_rate": 0.00019963695249188183, |
|
"loss": 0.1292, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.2539130434782609, |
|
"grad_norm": 0.16597376098252953, |
|
"learning_rate": 0.00019958327361634248, |
|
"loss": 0.1645, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.2573913043478261, |
|
"grad_norm": 0.10098015772720864, |
|
"learning_rate": 0.00019952590338932356, |
|
"loss": 0.1067, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.2608695652173913, |
|
"grad_norm": 0.15925018221087978, |
|
"learning_rate": 0.00019946484393742399, |
|
"loss": 0.1554, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2643478260869565, |
|
"grad_norm": 0.1532302933171606, |
|
"learning_rate": 0.0001994000975239946, |
|
"loss": 0.1817, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.2678260869565217, |
|
"grad_norm": 0.15154786378403498, |
|
"learning_rate": 0.00019933166654905466, |
|
"loss": 0.1467, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.271304347826087, |
|
"grad_norm": 0.15690138906152937, |
|
"learning_rate": 0.00019925955354920265, |
|
"loss": 0.1373, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2747826086956522, |
|
"grad_norm": 0.1859438689490505, |
|
"learning_rate": 0.0001991837611975223, |
|
"loss": 0.1932, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.2782608695652174, |
|
"grad_norm": 0.14861843675913228, |
|
"learning_rate": 0.00019910429230348347, |
|
"loss": 0.1675, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2817391304347826, |
|
"grad_norm": 0.14218774514095903, |
|
"learning_rate": 0.00019902114981283812, |
|
"loss": 0.1283, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2852173913043478, |
|
"grad_norm": 0.15988803314683084, |
|
"learning_rate": 0.00019893433680751103, |
|
"loss": 0.1336, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.288695652173913, |
|
"grad_norm": 0.15975061567872123, |
|
"learning_rate": 0.0001988438565054855, |
|
"loss": 0.1676, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2921739130434783, |
|
"grad_norm": 0.0903484060539206, |
|
"learning_rate": 0.00019874971226068415, |
|
"loss": 0.0909, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2956521739130435, |
|
"grad_norm": 0.12570120193815287, |
|
"learning_rate": 0.00019865190756284467, |
|
"loss": 0.1333, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2991304347826087, |
|
"grad_norm": 0.12595056424947598, |
|
"learning_rate": 0.0001985504460373903, |
|
"loss": 0.1092, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.3026086956521739, |
|
"grad_norm": 0.13479356357232541, |
|
"learning_rate": 0.0001984453314452955, |
|
"loss": 0.1478, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.3060869565217391, |
|
"grad_norm": 0.13307683198992498, |
|
"learning_rate": 0.00019833656768294662, |
|
"loss": 0.146, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.3095652173913043, |
|
"grad_norm": 0.14686125301552883, |
|
"learning_rate": 0.0001982241587819974, |
|
"loss": 0.1285, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.3130434782608696, |
|
"grad_norm": 0.12720833595582368, |
|
"learning_rate": 0.00019810810890921943, |
|
"loss": 0.1437, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3165217391304348, |
|
"grad_norm": 0.13968930311918126, |
|
"learning_rate": 0.00019798842236634797, |
|
"loss": 0.1291, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.16133982393912974, |
|
"learning_rate": 0.00019786510358992213, |
|
"loss": 0.2008, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.3234782608695652, |
|
"grad_norm": 0.1266301495042648, |
|
"learning_rate": 0.00019773815715112074, |
|
"loss": 0.1372, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.3269565217391304, |
|
"grad_norm": 0.12427333520991247, |
|
"learning_rate": 0.00019760758775559274, |
|
"loss": 0.1432, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.33043478260869563, |
|
"grad_norm": 0.13028439018276217, |
|
"learning_rate": 0.0001974734002432827, |
|
"loss": 0.1354, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.3339130434782609, |
|
"grad_norm": 0.13268075146491365, |
|
"learning_rate": 0.00019733559958825167, |
|
"loss": 0.1189, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3373913043478261, |
|
"grad_norm": 0.2048660606818272, |
|
"learning_rate": 0.00019719419089849247, |
|
"loss": 0.1566, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.3408695652173913, |
|
"grad_norm": 0.11124284248033606, |
|
"learning_rate": 0.00019704917941574051, |
|
"loss": 0.1299, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.3443478260869565, |
|
"grad_norm": 0.1415128364022893, |
|
"learning_rate": 0.00019690057051527965, |
|
"loss": 0.1396, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.34782608695652173, |
|
"grad_norm": 0.10665529705089029, |
|
"learning_rate": 0.00019674836970574254, |
|
"loss": 0.1314, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35130434782608694, |
|
"grad_norm": 0.14169554362167064, |
|
"learning_rate": 0.00019659258262890683, |
|
"loss": 0.1281, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.35478260869565215, |
|
"grad_norm": 0.16648182361835823, |
|
"learning_rate": 0.00019643321505948585, |
|
"loss": 0.1511, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.3582608695652174, |
|
"grad_norm": 0.15512935363008726, |
|
"learning_rate": 0.00019627027290491458, |
|
"loss": 0.1362, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.3617391304347826, |
|
"grad_norm": 0.14829391492240007, |
|
"learning_rate": 0.00019610376220513068, |
|
"loss": 0.16, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.3652173913043478, |
|
"grad_norm": 0.1721382097621375, |
|
"learning_rate": 0.00019593368913235052, |
|
"loss": 0.1927, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.36869565217391304, |
|
"grad_norm": 0.1073039991014123, |
|
"learning_rate": 0.0001957600599908406, |
|
"loss": 0.1077, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.37217391304347824, |
|
"grad_norm": 0.1765959958499992, |
|
"learning_rate": 0.00019558288121668363, |
|
"loss": 0.1679, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.37565217391304345, |
|
"grad_norm": 0.13247232361226763, |
|
"learning_rate": 0.00019540215937754007, |
|
"loss": 0.1201, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.3791304347826087, |
|
"grad_norm": 0.13402863250728775, |
|
"learning_rate": 0.0001952179011724047, |
|
"loss": 0.1331, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3826086956521739, |
|
"grad_norm": 0.15379139900705738, |
|
"learning_rate": 0.00019503011343135825, |
|
"loss": 0.1507, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38608695652173913, |
|
"grad_norm": 0.12569941197730944, |
|
"learning_rate": 0.00019483880311531424, |
|
"loss": 0.1245, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.38956521739130434, |
|
"grad_norm": 0.13176534371798201, |
|
"learning_rate": 0.00019464397731576094, |
|
"loss": 0.1346, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.39304347826086955, |
|
"grad_norm": 0.1308496741778078, |
|
"learning_rate": 0.00019444564325449853, |
|
"loss": 0.1528, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.39652173913043476, |
|
"grad_norm": 0.11662685828907265, |
|
"learning_rate": 0.00019424380828337144, |
|
"loss": 0.1042, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.15311025163121064, |
|
"learning_rate": 0.0001940384798839957, |
|
"loss": 0.124, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.40347826086956523, |
|
"grad_norm": 0.14271720010282954, |
|
"learning_rate": 0.00019382966566748168, |
|
"loss": 0.1385, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.40695652173913044, |
|
"grad_norm": 0.21076081706460564, |
|
"learning_rate": 0.00019361737337415206, |
|
"loss": 0.2177, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.41043478260869565, |
|
"grad_norm": 0.1326954013355056, |
|
"learning_rate": 0.0001934016108732548, |
|
"loss": 0.1491, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.41391304347826086, |
|
"grad_norm": 0.10972822431140547, |
|
"learning_rate": 0.00019318238616267141, |
|
"loss": 0.1135, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.41739130434782606, |
|
"grad_norm": 0.11664553001228962, |
|
"learning_rate": 0.00019295970736862064, |
|
"loss": 0.1335, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.42086956521739133, |
|
"grad_norm": 0.12037673410124465, |
|
"learning_rate": 0.00019273358274535704, |
|
"loss": 0.0989, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.42434782608695654, |
|
"grad_norm": 0.13278062849114713, |
|
"learning_rate": 0.00019250402067486522, |
|
"loss": 0.1328, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.42782608695652175, |
|
"grad_norm": 0.13381559738712595, |
|
"learning_rate": 0.00019227102966654896, |
|
"loss": 0.1296, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.43130434782608695, |
|
"grad_norm": 0.1646662488521753, |
|
"learning_rate": 0.00019203461835691594, |
|
"loss": 0.1581, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.43478260869565216, |
|
"grad_norm": 0.15934887298251812, |
|
"learning_rate": 0.00019179479550925747, |
|
"loss": 0.1627, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.43826086956521737, |
|
"grad_norm": 0.1410826901549644, |
|
"learning_rate": 0.00019155157001332374, |
|
"loss": 0.1789, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.44173913043478263, |
|
"grad_norm": 0.16699816673214457, |
|
"learning_rate": 0.0001913049508849942, |
|
"loss": 0.1608, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.44521739130434784, |
|
"grad_norm": 0.11736817608666682, |
|
"learning_rate": 0.00019105494726594344, |
|
"loss": 0.1387, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.44869565217391305, |
|
"grad_norm": 0.13490354839004873, |
|
"learning_rate": 0.00019080156842330242, |
|
"loss": 0.1355, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.45217391304347826, |
|
"grad_norm": 0.166052611822799, |
|
"learning_rate": 0.00019054482374931467, |
|
"loss": 0.1628, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.45565217391304347, |
|
"grad_norm": 0.10962794054522577, |
|
"learning_rate": 0.00019028472276098844, |
|
"loss": 0.1109, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.4591304347826087, |
|
"grad_norm": 0.10757925577294936, |
|
"learning_rate": 0.00019002127509974376, |
|
"loss": 0.1124, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.46260869565217394, |
|
"grad_norm": 0.14061789137211347, |
|
"learning_rate": 0.00018975449053105505, |
|
"loss": 0.1445, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.46608695652173915, |
|
"grad_norm": 0.1096963245848753, |
|
"learning_rate": 0.00018948437894408918, |
|
"loss": 0.1265, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.46956521739130436, |
|
"grad_norm": 0.12314690150275322, |
|
"learning_rate": 0.00018921095035133898, |
|
"loss": 0.1202, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.47304347826086957, |
|
"grad_norm": 0.1779920573282376, |
|
"learning_rate": 0.0001889342148882519, |
|
"loss": 0.1997, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.4765217391304348, |
|
"grad_norm": 0.13319522745287313, |
|
"learning_rate": 0.00018865418281285444, |
|
"loss": 0.1402, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.12083080356885761, |
|
"learning_rate": 0.00018837086450537193, |
|
"loss": 0.1238, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.4834782608695652, |
|
"grad_norm": 0.1582932839712108, |
|
"learning_rate": 0.00018808427046784366, |
|
"loss": 0.1499, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.48695652173913045, |
|
"grad_norm": 0.14876994205070418, |
|
"learning_rate": 0.00018779441132373362, |
|
"loss": 0.1557, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.49043478260869566, |
|
"grad_norm": 0.17699025587530975, |
|
"learning_rate": 0.0001875012978175368, |
|
"loss": 0.1967, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.49391304347826087, |
|
"grad_norm": 0.14037478538346934, |
|
"learning_rate": 0.00018720494081438078, |
|
"loss": 0.1596, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.4973913043478261, |
|
"grad_norm": 0.11128336848068965, |
|
"learning_rate": 0.00018690535129962306, |
|
"loss": 0.1013, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.5008695652173913, |
|
"grad_norm": 0.15354451724868373, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 0.1812, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.5043478260869565, |
|
"grad_norm": 0.17621002427736646, |
|
"learning_rate": 0.00018629651927543447, |
|
"loss": 0.22, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.5078260869565218, |
|
"grad_norm": 0.11412894846283952, |
|
"learning_rate": 0.000185987299334181, |
|
"loss": 0.1277, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.5113043478260869, |
|
"grad_norm": 0.10330685267150483, |
|
"learning_rate": 0.0001856748920168443, |
|
"loss": 0.1149, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.5147826086956522, |
|
"grad_norm": 0.16038774046228474, |
|
"learning_rate": 0.00018535930890373466, |
|
"loss": 0.1614, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.5182608695652174, |
|
"grad_norm": 0.12341631086149, |
|
"learning_rate": 0.00018504056169288275, |
|
"loss": 0.1243, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.5217391304347826, |
|
"grad_norm": 0.14222035267405325, |
|
"learning_rate": 0.00018471866219960602, |
|
"loss": 0.1591, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5252173913043479, |
|
"grad_norm": 0.15381954436682013, |
|
"learning_rate": 0.0001843936223560707, |
|
"loss": 0.1411, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.528695652173913, |
|
"grad_norm": 0.16749949682456056, |
|
"learning_rate": 0.0001840654542108494, |
|
"loss": 0.173, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.5321739130434783, |
|
"grad_norm": 0.16138212597769477, |
|
"learning_rate": 0.0001837341699284746, |
|
"loss": 0.1378, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.5356521739130434, |
|
"grad_norm": 0.11820972909841256, |
|
"learning_rate": 0.0001833997817889878, |
|
"loss": 0.1415, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.5391304347826087, |
|
"grad_norm": 0.1732254350869074, |
|
"learning_rate": 0.00018306230218748413, |
|
"loss": 0.1565, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.542608695652174, |
|
"grad_norm": 0.12134029048709205, |
|
"learning_rate": 0.000182721743633653, |
|
"loss": 0.1354, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.5460869565217391, |
|
"grad_norm": 0.15757519533817987, |
|
"learning_rate": 0.00018237811875131444, |
|
"loss": 0.1783, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.5495652173913044, |
|
"grad_norm": 0.1389328342147638, |
|
"learning_rate": 0.0001820314402779511, |
|
"loss": 0.1373, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.5530434782608695, |
|
"grad_norm": 0.13113073991864377, |
|
"learning_rate": 0.00018168172106423607, |
|
"loss": 0.1272, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.5565217391304348, |
|
"grad_norm": 0.14093537485863689, |
|
"learning_rate": 0.00018132897407355657, |
|
"loss": 0.1364, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.1407116914405213, |
|
"learning_rate": 0.00018097321238153338, |
|
"loss": 0.1329, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.5634782608695652, |
|
"grad_norm": 0.14535376492750982, |
|
"learning_rate": 0.00018061444917553629, |
|
"loss": 0.1692, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.5669565217391305, |
|
"grad_norm": 0.14031883322639, |
|
"learning_rate": 0.00018025269775419507, |
|
"loss": 0.1356, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.5704347826086956, |
|
"grad_norm": 0.1551541472991319, |
|
"learning_rate": 0.00017988797152690671, |
|
"loss": 0.148, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.5739130434782609, |
|
"grad_norm": 0.16740550198996068, |
|
"learning_rate": 0.00017952028401333817, |
|
"loss": 0.1643, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.577391304347826, |
|
"grad_norm": 0.11979937989365573, |
|
"learning_rate": 0.00017914964884292544, |
|
"loss": 0.1282, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.5808695652173913, |
|
"grad_norm": 0.11342656946095574, |
|
"learning_rate": 0.00017877607975436805, |
|
"loss": 0.1192, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.5843478260869566, |
|
"grad_norm": 0.12812233079916055, |
|
"learning_rate": 0.00017839959059512016, |
|
"loss": 0.1513, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.5878260869565217, |
|
"grad_norm": 0.12442713946144991, |
|
"learning_rate": 0.00017802019532087694, |
|
"loss": 0.1456, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.591304347826087, |
|
"grad_norm": 0.13585627394105457, |
|
"learning_rate": 0.00017763790799505747, |
|
"loss": 0.155, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5947826086956521, |
|
"grad_norm": 0.10995274239294903, |
|
"learning_rate": 0.00017725274278828325, |
|
"loss": 0.1008, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.5982608695652174, |
|
"grad_norm": 0.13574783390341455, |
|
"learning_rate": 0.0001768647139778532, |
|
"loss": 0.1766, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.6017391304347826, |
|
"grad_norm": 0.12560446559496083, |
|
"learning_rate": 0.00017647383594721416, |
|
"loss": 0.1378, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.6052173913043478, |
|
"grad_norm": 0.24726328454376442, |
|
"learning_rate": 0.0001760801231854278, |
|
"loss": 0.2, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.6086956521739131, |
|
"grad_norm": 0.1300492912908485, |
|
"learning_rate": 0.00017568359028663364, |
|
"loss": 0.1353, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.6121739130434782, |
|
"grad_norm": 0.12024702168048951, |
|
"learning_rate": 0.00017528425194950794, |
|
"loss": 0.1346, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.6156521739130435, |
|
"grad_norm": 0.13400618019089086, |
|
"learning_rate": 0.000174882122976719, |
|
"loss": 0.147, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.6191304347826087, |
|
"grad_norm": 0.10665251622268654, |
|
"learning_rate": 0.0001744772182743782, |
|
"loss": 0.1269, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.6226086956521739, |
|
"grad_norm": 0.12190300959390951, |
|
"learning_rate": 0.00017406955285148782, |
|
"loss": 0.1263, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.6260869565217392, |
|
"grad_norm": 0.08623960123094311, |
|
"learning_rate": 0.0001736591418193844, |
|
"loss": 0.1075, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6295652173913043, |
|
"grad_norm": 0.15899695178173323, |
|
"learning_rate": 0.00017324600039117863, |
|
"loss": 0.1335, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.6330434782608696, |
|
"grad_norm": 0.12405567103892874, |
|
"learning_rate": 0.00017283014388119159, |
|
"loss": 0.1261, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.6365217391304347, |
|
"grad_norm": 0.12227415658908525, |
|
"learning_rate": 0.000172411587704387, |
|
"loss": 0.1394, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.10299259784769293, |
|
"learning_rate": 0.0001719903473757996, |
|
"loss": 0.1179, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.6434782608695652, |
|
"grad_norm": 0.18072288336432377, |
|
"learning_rate": 0.00017156643850996047, |
|
"loss": 0.1678, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.6469565217391304, |
|
"grad_norm": 0.13931470098249313, |
|
"learning_rate": 0.0001711398768203178, |
|
"loss": 0.1468, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.6504347826086957, |
|
"grad_norm": 0.142891653601056, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 0.1699, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.6539130434782608, |
|
"grad_norm": 0.1543203031358245, |
|
"learning_rate": 0.00017027885831450318, |
|
"loss": 0.163, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.6573913043478261, |
|
"grad_norm": 0.08881257657108957, |
|
"learning_rate": 0.0001698444334145539, |
|
"loss": 0.0956, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.6608695652173913, |
|
"grad_norm": 0.1437015724786564, |
|
"learning_rate": 0.0001694074195220634, |
|
"loss": 0.1531, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6643478260869565, |
|
"grad_norm": 0.15239548568770145, |
|
"learning_rate": 0.0001689678328362569, |
|
"loss": 0.1583, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.6678260869565218, |
|
"grad_norm": 0.12999990256807817, |
|
"learning_rate": 0.00016852568965172791, |
|
"loss": 0.1241, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.671304347826087, |
|
"grad_norm": 0.16058602233359284, |
|
"learning_rate": 0.00016808100635783423, |
|
"loss": 0.1901, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.6747826086956522, |
|
"grad_norm": 0.09752013699351626, |
|
"learning_rate": 0.00016763379943809028, |
|
"loss": 0.1104, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.6782608695652174, |
|
"grad_norm": 0.1171558354901818, |
|
"learning_rate": 0.00016718408546955636, |
|
"loss": 0.1393, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.6817391304347826, |
|
"grad_norm": 0.12541030208785753, |
|
"learning_rate": 0.00016673188112222394, |
|
"loss": 0.1339, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.6852173913043478, |
|
"grad_norm": 0.16378504667963803, |
|
"learning_rate": 0.00016627720315839784, |
|
"loss": 0.1896, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.688695652173913, |
|
"grad_norm": 0.1254436356043883, |
|
"learning_rate": 0.0001658200684320748, |
|
"loss": 0.155, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.6921739130434783, |
|
"grad_norm": 0.10926424609512125, |
|
"learning_rate": 0.00016536049388831894, |
|
"loss": 0.1333, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.6956521739130435, |
|
"grad_norm": 0.12166335086653808, |
|
"learning_rate": 0.00016489849656263337, |
|
"loss": 0.1307, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6991304347826087, |
|
"grad_norm": 0.09726778569787221, |
|
"learning_rate": 0.00016443409358032887, |
|
"loss": 0.1093, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.7026086956521739, |
|
"grad_norm": 0.18623972301385774, |
|
"learning_rate": 0.00016396730215588915, |
|
"loss": 0.1329, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.7060869565217391, |
|
"grad_norm": 0.1036420764487769, |
|
"learning_rate": 0.00016349813959233255, |
|
"loss": 0.1066, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.7095652173913043, |
|
"grad_norm": 0.15859483282291995, |
|
"learning_rate": 0.00016302662328057088, |
|
"loss": 0.1236, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.7130434782608696, |
|
"grad_norm": 0.1352010399451213, |
|
"learning_rate": 0.00016255277069876454, |
|
"loss": 0.1556, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.7165217391304348, |
|
"grad_norm": 0.0847816136200446, |
|
"learning_rate": 0.00016207659941167485, |
|
"loss": 0.1033, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.13868944339810388, |
|
"learning_rate": 0.00016159812707001282, |
|
"loss": 0.1583, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.7234782608695652, |
|
"grad_norm": 0.11403894766591344, |
|
"learning_rate": 0.00016111737140978494, |
|
"loss": 0.1193, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.7269565217391304, |
|
"grad_norm": 0.11921529189670015, |
|
"learning_rate": 0.00016063435025163569, |
|
"loss": 0.1272, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.7304347826086957, |
|
"grad_norm": 0.16113792796352755, |
|
"learning_rate": 0.00016014908150018703, |
|
"loss": 0.1972, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7339130434782609, |
|
"grad_norm": 0.12349845734675136, |
|
"learning_rate": 0.00015966158314337472, |
|
"loss": 0.1462, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.7373913043478261, |
|
"grad_norm": 0.1502644739489071, |
|
"learning_rate": 0.00015917187325178138, |
|
"loss": 0.1626, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.7408695652173913, |
|
"grad_norm": 0.14447398546355603, |
|
"learning_rate": 0.00015867996997796685, |
|
"loss": 0.1653, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.7443478260869565, |
|
"grad_norm": 0.13747896173823398, |
|
"learning_rate": 0.0001581858915557953, |
|
"loss": 0.1436, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.7478260869565218, |
|
"grad_norm": 0.14978167508747187, |
|
"learning_rate": 0.00015768965629975914, |
|
"loss": 0.146, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.7513043478260869, |
|
"grad_norm": 0.10530370902507546, |
|
"learning_rate": 0.0001571912826043003, |
|
"loss": 0.1067, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.7547826086956522, |
|
"grad_norm": 0.15065236331393017, |
|
"learning_rate": 0.00015669078894312848, |
|
"loss": 0.1278, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.7582608695652174, |
|
"grad_norm": 0.13038147931466645, |
|
"learning_rate": 0.00015618819386853606, |
|
"loss": 0.1363, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.7617391304347826, |
|
"grad_norm": 0.12241560985671367, |
|
"learning_rate": 0.0001556835160107107, |
|
"loss": 0.1381, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.7652173913043478, |
|
"grad_norm": 0.1032079433563102, |
|
"learning_rate": 0.0001551767740770446, |
|
"loss": 0.1329, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.768695652173913, |
|
"grad_norm": 0.10420850780658172, |
|
"learning_rate": 0.00015466798685144113, |
|
"loss": 0.108, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.7721739130434783, |
|
"grad_norm": 0.12440213702363168, |
|
"learning_rate": 0.00015415717319361847, |
|
"loss": 0.1378, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.7756521739130435, |
|
"grad_norm": 0.1441063665454779, |
|
"learning_rate": 0.00015364435203841058, |
|
"loss": 0.1546, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.7791304347826087, |
|
"grad_norm": 0.10283016985275265, |
|
"learning_rate": 0.00015312954239506533, |
|
"loss": 0.1398, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.782608695652174, |
|
"grad_norm": 0.11879627421875508, |
|
"learning_rate": 0.0001526127633465398, |
|
"loss": 0.1394, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.7860869565217391, |
|
"grad_norm": 0.1340444040194527, |
|
"learning_rate": 0.00015209403404879303, |
|
"loss": 0.1371, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.7895652173913044, |
|
"grad_norm": 0.15078724481486633, |
|
"learning_rate": 0.00015157337373007578, |
|
"loss": 0.1626, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.7930434782608695, |
|
"grad_norm": 0.14991040307874806, |
|
"learning_rate": 0.0001510508016902179, |
|
"loss": 0.1563, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.7965217391304348, |
|
"grad_norm": 0.11713195212511589, |
|
"learning_rate": 0.00015052633729991294, |
|
"loss": 0.1372, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.10665559275288661, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.1174, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.8034782608695652, |
|
"grad_norm": 0.15701030356110557, |
|
"learning_rate": 0.00014947180930074326, |
|
"loss": 0.1575, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.8069565217391305, |
|
"grad_norm": 0.11847918443040721, |
|
"learning_rate": 0.00014894178478110857, |
|
"loss": 0.1203, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.8104347826086956, |
|
"grad_norm": 0.1285162400608025, |
|
"learning_rate": 0.0001484099460880379, |
|
"loss": 0.133, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.8139130434782609, |
|
"grad_norm": 0.1512166257756219, |
|
"learning_rate": 0.00014787631293572092, |
|
"loss": 0.1584, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.8173913043478261, |
|
"grad_norm": 0.1584657384276377, |
|
"learning_rate": 0.00014734090510486433, |
|
"loss": 0.176, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.8208695652173913, |
|
"grad_norm": 0.10354148249587801, |
|
"learning_rate": 0.0001468037424419586, |
|
"loss": 0.1288, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.8243478260869566, |
|
"grad_norm": 0.11214117311491091, |
|
"learning_rate": 0.0001462648448585423, |
|
"loss": 0.1221, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.8278260869565217, |
|
"grad_norm": 0.14772445459512365, |
|
"learning_rate": 0.00014572423233046386, |
|
"loss": 0.1329, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.831304347826087, |
|
"grad_norm": 0.14615479240284515, |
|
"learning_rate": 0.0001451819248971415, |
|
"loss": 0.1643, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.8347826086956521, |
|
"grad_norm": 0.12753795686628652, |
|
"learning_rate": 0.00014463794266081993, |
|
"loss": 0.1557, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8382608695652174, |
|
"grad_norm": 0.13887522594093168, |
|
"learning_rate": 0.00014409230578582566, |
|
"loss": 0.1639, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.8417391304347827, |
|
"grad_norm": 0.16912324583465613, |
|
"learning_rate": 0.00014354503449781912, |
|
"loss": 0.1688, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.8452173913043478, |
|
"grad_norm": 0.09449246440948272, |
|
"learning_rate": 0.0001429961490830453, |
|
"loss": 0.0993, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.8486956521739131, |
|
"grad_norm": 0.10550648117339549, |
|
"learning_rate": 0.00014244566988758152, |
|
"loss": 0.1356, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.8521739130434782, |
|
"grad_norm": 0.10969662638776663, |
|
"learning_rate": 0.00014189361731658338, |
|
"loss": 0.1239, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.8556521739130435, |
|
"grad_norm": 0.14808204518572862, |
|
"learning_rate": 0.00014134001183352832, |
|
"loss": 0.1579, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.8591304347826086, |
|
"grad_norm": 0.13859857433183218, |
|
"learning_rate": 0.00014078487395945713, |
|
"loss": 0.1747, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.8626086956521739, |
|
"grad_norm": 0.13502318508676295, |
|
"learning_rate": 0.00014022822427221324, |
|
"loss": 0.1558, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.8660869565217392, |
|
"grad_norm": 0.11993193249652914, |
|
"learning_rate": 0.00013967008340567998, |
|
"loss": 0.1318, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 0.14432862128479182, |
|
"learning_rate": 0.0001391104720490156, |
|
"loss": 0.1718, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8730434782608696, |
|
"grad_norm": 0.10960589296514184, |
|
"learning_rate": 0.0001385494109458866, |
|
"loss": 0.1216, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.8765217391304347, |
|
"grad_norm": 0.1444495982064661, |
|
"learning_rate": 0.00013798692089369855, |
|
"loss": 0.1511, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.14195714442676055, |
|
"learning_rate": 0.00013742302274282533, |
|
"loss": 0.164, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.8834782608695653, |
|
"grad_norm": 0.15939971031248268, |
|
"learning_rate": 0.00013685773739583617, |
|
"loss": 0.1589, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.8869565217391304, |
|
"grad_norm": 0.10567415705517683, |
|
"learning_rate": 0.00013629108580672094, |
|
"loss": 0.1006, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.8904347826086957, |
|
"grad_norm": 0.12878257656430525, |
|
"learning_rate": 0.0001357230889801133, |
|
"loss": 0.1267, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8939130434782608, |
|
"grad_norm": 0.11395046485825466, |
|
"learning_rate": 0.0001351537679705121, |
|
"loss": 0.134, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.8973913043478261, |
|
"grad_norm": 0.13632342342499126, |
|
"learning_rate": 0.00013458314388150114, |
|
"loss": 0.1598, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.9008695652173913, |
|
"grad_norm": 0.16308025278021065, |
|
"learning_rate": 0.00013401123786496664, |
|
"loss": 0.2041, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.9043478260869565, |
|
"grad_norm": 0.10241355755764081, |
|
"learning_rate": 0.00013343807112031327, |
|
"loss": 0.1081, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.9078260869565218, |
|
"grad_norm": 0.1310395387251736, |
|
"learning_rate": 0.00013286366489367846, |
|
"loss": 0.158, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.9113043478260869, |
|
"grad_norm": 0.13100096116141785, |
|
"learning_rate": 0.00013228804047714463, |
|
"loss": 0.1607, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.9147826086956522, |
|
"grad_norm": 0.11969415969012737, |
|
"learning_rate": 0.00013171121920795014, |
|
"loss": 0.1308, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.9182608695652174, |
|
"grad_norm": 0.1295097570140744, |
|
"learning_rate": 0.00013113322246769817, |
|
"loss": 0.1502, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.9217391304347826, |
|
"grad_norm": 0.11814028103328439, |
|
"learning_rate": 0.00013055407168156437, |
|
"loss": 0.1241, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.9252173913043479, |
|
"grad_norm": 0.11218111509954955, |
|
"learning_rate": 0.00012997378831750242, |
|
"loss": 0.1381, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.928695652173913, |
|
"grad_norm": 0.12021997514568723, |
|
"learning_rate": 0.00012939239388544852, |
|
"loss": 0.1395, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.9321739130434783, |
|
"grad_norm": 0.12114779793419364, |
|
"learning_rate": 0.00012880990993652377, |
|
"loss": 0.117, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.9356521739130435, |
|
"grad_norm": 0.1690185626815269, |
|
"learning_rate": 0.00012822635806223557, |
|
"loss": 0.2055, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.9391304347826087, |
|
"grad_norm": 0.10540099318141671, |
|
"learning_rate": 0.00012764175989367718, |
|
"loss": 0.1292, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.9426086956521739, |
|
"grad_norm": 0.1123676795677547, |
|
"learning_rate": 0.00012705613710072575, |
|
"loss": 0.1401, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.9460869565217391, |
|
"grad_norm": 0.12163076229024251, |
|
"learning_rate": 0.00012646951139123934, |
|
"loss": 0.1393, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.9495652173913044, |
|
"grad_norm": 0.10635388207764115, |
|
"learning_rate": 0.00012588190451025207, |
|
"loss": 0.1192, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.9530434782608695, |
|
"grad_norm": 0.1324746367162532, |
|
"learning_rate": 0.00012529333823916807, |
|
"loss": 0.1674, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.9565217391304348, |
|
"grad_norm": 0.12690900530317173, |
|
"learning_rate": 0.00012470383439495416, |
|
"loss": 0.164, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.12178811089584775, |
|
"learning_rate": 0.0001241134148293311, |
|
"loss": 0.1472, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.9634782608695652, |
|
"grad_norm": 0.09558226725121408, |
|
"learning_rate": 0.0001235221014279636, |
|
"loss": 0.1107, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.9669565217391304, |
|
"grad_norm": 0.11947361537383715, |
|
"learning_rate": 0.00012292991610964903, |
|
"loss": 0.1454, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.9704347826086956, |
|
"grad_norm": 0.09245448807939725, |
|
"learning_rate": 0.000122336880825505, |
|
"loss": 0.1063, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.9739130434782609, |
|
"grad_norm": 0.12313564570662155, |
|
"learning_rate": 0.00012174301755815571, |
|
"loss": 0.1482, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.9773913043478261, |
|
"grad_norm": 0.14222809451041388, |
|
"learning_rate": 0.00012114834832091691, |
|
"loss": 0.1905, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.9808695652173913, |
|
"grad_norm": 0.10079732072591296, |
|
"learning_rate": 0.00012055289515698007, |
|
"loss": 0.1114, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.9843478260869565, |
|
"grad_norm": 0.0893949612581931, |
|
"learning_rate": 0.00011995668013859529, |
|
"loss": 0.1057, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.9878260869565217, |
|
"grad_norm": 0.0986410641315097, |
|
"learning_rate": 0.00011935972536625302, |
|
"loss": 0.111, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.991304347826087, |
|
"grad_norm": 0.10054024829355615, |
|
"learning_rate": 0.00011876205296786493, |
|
"loss": 0.0972, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.9947826086956522, |
|
"grad_norm": 0.12467802363495945, |
|
"learning_rate": 0.00011816368509794364, |
|
"loss": 0.147, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.9982608695652174, |
|
"grad_norm": 0.08424816142149656, |
|
"learning_rate": 0.00011756464393678153, |
|
"loss": 0.103, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.9982608695652174, |
|
"eval_loss": 0.1444740742444992, |
|
"eval_runtime": 52.3252, |
|
"eval_samples_per_second": 4.568, |
|
"eval_steps_per_second": 0.573, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.0017391304347827, |
|
"grad_norm": 0.11878547881930412, |
|
"learning_rate": 0.00011696495168962847, |
|
"loss": 0.1385, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.0052173913043478, |
|
"grad_norm": 0.09391887138015648, |
|
"learning_rate": 0.00011636463058586881, |
|
"loss": 0.0826, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.008695652173913, |
|
"grad_norm": 0.1221171087699073, |
|
"learning_rate": 0.00011576370287819736, |
|
"loss": 0.1305, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.0121739130434784, |
|
"grad_norm": 0.08852002687146088, |
|
"learning_rate": 0.0001151621908417945, |
|
"loss": 0.0893, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.0156521739130435, |
|
"grad_norm": 0.11159916956566551, |
|
"learning_rate": 0.00011456011677350051, |
|
"loss": 0.1112, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.0191304347826087, |
|
"grad_norm": 0.10003818148322566, |
|
"learning_rate": 0.000113957502990989, |
|
"loss": 0.091, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.0226086956521738, |
|
"grad_norm": 0.16412668815167833, |
|
"learning_rate": 0.0001133543718319398, |
|
"loss": 0.0684, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.0260869565217392, |
|
"grad_norm": 0.12591860799015855, |
|
"learning_rate": 0.0001127507456532108, |
|
"loss": 0.1155, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.0295652173913044, |
|
"grad_norm": 0.09691052326677896, |
|
"learning_rate": 0.00011214664683000927, |
|
"loss": 0.0655, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.0330434782608695, |
|
"grad_norm": 0.11401647857375072, |
|
"learning_rate": 0.00011154209775506241, |
|
"loss": 0.0819, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.0365217391304349, |
|
"grad_norm": 0.12069848422212905, |
|
"learning_rate": 0.00011093712083778746, |
|
"loss": 0.0827, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.11216573920077354, |
|
"learning_rate": 0.00011033173850346082, |
|
"loss": 0.0754, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.0434782608695652, |
|
"grad_norm": 0.14906810717855873, |
|
"learning_rate": 0.0001097259731923869, |
|
"loss": 0.0888, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0469565217391303, |
|
"grad_norm": 0.17640102936065463, |
|
"learning_rate": 0.00010911984735906635, |
|
"loss": 0.0987, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.0504347826086957, |
|
"grad_norm": 0.10731016230700624, |
|
"learning_rate": 0.00010851338347136357, |
|
"loss": 0.0654, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.0539130434782609, |
|
"grad_norm": 0.13955232812110846, |
|
"learning_rate": 0.000107906604009674, |
|
"loss": 0.0766, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.057391304347826, |
|
"grad_norm": 0.13869916502517549, |
|
"learning_rate": 0.00010729953146609076, |
|
"loss": 0.0905, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.0608695652173914, |
|
"grad_norm": 0.16180614723177286, |
|
"learning_rate": 0.00010669218834357091, |
|
"loss": 0.1025, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.0643478260869565, |
|
"grad_norm": 0.09389888673848854, |
|
"learning_rate": 0.00010608459715510139, |
|
"loss": 0.0613, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.0678260869565217, |
|
"grad_norm": 0.11083339472481404, |
|
"learning_rate": 0.00010547678042286436, |
|
"loss": 0.0705, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.0713043478260869, |
|
"grad_norm": 0.15345557779758465, |
|
"learning_rate": 0.00010486876067740252, |
|
"loss": 0.0878, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.0747826086956522, |
|
"grad_norm": 0.12649607806775048, |
|
"learning_rate": 0.00010426056045678376, |
|
"loss": 0.0879, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.0782608695652174, |
|
"grad_norm": 0.14680466140336335, |
|
"learning_rate": 0.0001036522023057659, |
|
"loss": 0.0958, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0817391304347825, |
|
"grad_norm": 0.11612953696390602, |
|
"learning_rate": 0.0001030437087749609, |
|
"loss": 0.0736, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.085217391304348, |
|
"grad_norm": 0.11879942840457153, |
|
"learning_rate": 0.00010243510241999899, |
|
"loss": 0.0723, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.088695652173913, |
|
"grad_norm": 0.13060110667263794, |
|
"learning_rate": 0.0001018264058006925, |
|
"loss": 0.0935, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.0921739130434782, |
|
"grad_norm": 0.14907408553806142, |
|
"learning_rate": 0.00010121764148019976, |
|
"loss": 0.1067, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.0956521739130434, |
|
"grad_norm": 0.09945695753413593, |
|
"learning_rate": 0.00010060883202418862, |
|
"loss": 0.0717, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.0991304347826087, |
|
"grad_norm": 0.14172732221333895, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0965, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.102608695652174, |
|
"grad_norm": 0.1308399790176956, |
|
"learning_rate": 9.93911679758114e-05, |
|
"loss": 0.1035, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.106086956521739, |
|
"grad_norm": 0.11697945837103665, |
|
"learning_rate": 9.878235851980025e-05, |
|
"loss": 0.0904, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.1095652173913044, |
|
"grad_norm": 0.12653991847887303, |
|
"learning_rate": 9.817359419930751e-05, |
|
"loss": 0.0856, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.1130434782608696, |
|
"grad_norm": 0.1217289403364997, |
|
"learning_rate": 9.756489758000105e-05, |
|
"loss": 0.0868, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.1165217391304347, |
|
"grad_norm": 0.11310356101526439, |
|
"learning_rate": 9.69562912250391e-05, |
|
"loss": 0.0866, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.10719359269477195, |
|
"learning_rate": 9.63477976942341e-05, |
|
"loss": 0.0716, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.1234782608695653, |
|
"grad_norm": 0.1512816323423573, |
|
"learning_rate": 9.573943954321626e-05, |
|
"loss": 0.104, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.1269565217391304, |
|
"grad_norm": 0.09749679838740939, |
|
"learning_rate": 9.513123932259751e-05, |
|
"loss": 0.0767, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.1304347826086956, |
|
"grad_norm": 0.12636925131896773, |
|
"learning_rate": 9.452321957713564e-05, |
|
"loss": 0.0874, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.133913043478261, |
|
"grad_norm": 0.08724868085956655, |
|
"learning_rate": 9.391540284489862e-05, |
|
"loss": 0.0675, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.137391304347826, |
|
"grad_norm": 0.09917562166921519, |
|
"learning_rate": 9.330781165642907e-05, |
|
"loss": 0.0835, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.1408695652173912, |
|
"grad_norm": 0.11005238071063954, |
|
"learning_rate": 9.270046853390925e-05, |
|
"loss": 0.0926, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.1443478260869564, |
|
"grad_norm": 0.13592915315342272, |
|
"learning_rate": 9.209339599032601e-05, |
|
"loss": 0.0921, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.1478260869565218, |
|
"grad_norm": 0.09959026553962852, |
|
"learning_rate": 9.148661652863642e-05, |
|
"loss": 0.0669, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.151304347826087, |
|
"grad_norm": 0.12926733392574546, |
|
"learning_rate": 9.088015264093365e-05, |
|
"loss": 0.0882, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.154782608695652, |
|
"grad_norm": 0.12554624045521445, |
|
"learning_rate": 9.027402680761309e-05, |
|
"loss": 0.0988, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.1582608695652175, |
|
"grad_norm": 0.1672440454873292, |
|
"learning_rate": 8.966826149653923e-05, |
|
"loss": 0.1213, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.1617391304347826, |
|
"grad_norm": 0.11985957465820539, |
|
"learning_rate": 8.906287916221259e-05, |
|
"loss": 0.0868, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.1652173913043478, |
|
"grad_norm": 0.1272151243776101, |
|
"learning_rate": 8.845790224493763e-05, |
|
"loss": 0.0936, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.1686956521739131, |
|
"grad_norm": 0.1328045736153317, |
|
"learning_rate": 8.785335316999078e-05, |
|
"loss": 0.1051, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.1721739130434783, |
|
"grad_norm": 0.09448312790900673, |
|
"learning_rate": 8.724925434678923e-05, |
|
"loss": 0.0735, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.1756521739130434, |
|
"grad_norm": 0.13775516158820159, |
|
"learning_rate": 8.664562816806022e-05, |
|
"loss": 0.0826, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.1791304347826088, |
|
"grad_norm": 0.095050504784669, |
|
"learning_rate": 8.604249700901101e-05, |
|
"loss": 0.0606, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.182608695652174, |
|
"grad_norm": 0.10883208791380891, |
|
"learning_rate": 8.543988322649954e-05, |
|
"loss": 0.0776, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.1860869565217391, |
|
"grad_norm": 0.1432959854298642, |
|
"learning_rate": 8.483780915820553e-05, |
|
"loss": 0.105, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.1895652173913043, |
|
"grad_norm": 0.1934560716364753, |
|
"learning_rate": 8.423629712180265e-05, |
|
"loss": 0.1167, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.1930434782608696, |
|
"grad_norm": 0.14737287305329302, |
|
"learning_rate": 8.363536941413121e-05, |
|
"loss": 0.0952, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.1965217391304348, |
|
"grad_norm": 0.1535547643880873, |
|
"learning_rate": 8.303504831037154e-05, |
|
"loss": 0.1146, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.15481576726903015, |
|
"learning_rate": 8.243535606321848e-05, |
|
"loss": 0.1088, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.203478260869565, |
|
"grad_norm": 0.1589929120048658, |
|
"learning_rate": 8.183631490205637e-05, |
|
"loss": 0.1288, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.2069565217391305, |
|
"grad_norm": 0.12926833828040588, |
|
"learning_rate": 8.12379470321351e-05, |
|
"loss": 0.0779, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.2104347826086956, |
|
"grad_norm": 0.10432967192535712, |
|
"learning_rate": 8.064027463374702e-05, |
|
"loss": 0.0733, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.2139130434782608, |
|
"grad_norm": 0.1423904166119135, |
|
"learning_rate": 8.004331986140474e-05, |
|
"loss": 0.097, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.2173913043478262, |
|
"grad_norm": 0.16415634432026194, |
|
"learning_rate": 7.944710484301995e-05, |
|
"loss": 0.1044, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.2208695652173913, |
|
"grad_norm": 0.14367056293640723, |
|
"learning_rate": 7.88516516790831e-05, |
|
"loss": 0.108, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.2243478260869565, |
|
"grad_norm": 0.09627642646890802, |
|
"learning_rate": 7.825698244184431e-05, |
|
"loss": 0.0716, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.2278260869565218, |
|
"grad_norm": 0.12349504031653168, |
|
"learning_rate": 7.766311917449501e-05, |
|
"loss": 0.0846, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.231304347826087, |
|
"grad_norm": 0.11917707968673376, |
|
"learning_rate": 7.707008389035101e-05, |
|
"loss": 0.0893, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.2347826086956522, |
|
"grad_norm": 0.14958731827081473, |
|
"learning_rate": 7.647789857203645e-05, |
|
"loss": 0.1005, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.2382608695652173, |
|
"grad_norm": 0.09807418540274827, |
|
"learning_rate": 7.588658517066892e-05, |
|
"loss": 0.0777, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.2417391304347827, |
|
"grad_norm": 0.13031128610452009, |
|
"learning_rate": 7.529616560504585e-05, |
|
"loss": 0.0877, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.2452173913043478, |
|
"grad_norm": 0.15458552977098033, |
|
"learning_rate": 7.470666176083192e-05, |
|
"loss": 0.1006, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.248695652173913, |
|
"grad_norm": 0.10086297540969145, |
|
"learning_rate": 7.411809548974792e-05, |
|
"loss": 0.0771, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.2521739130434781, |
|
"grad_norm": 0.10503599360725659, |
|
"learning_rate": 7.353048860876064e-05, |
|
"loss": 0.0699, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.2556521739130435, |
|
"grad_norm": 0.11445411107296893, |
|
"learning_rate": 7.294386289927425e-05, |
|
"loss": 0.0878, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.2591304347826087, |
|
"grad_norm": 0.09163778675554561, |
|
"learning_rate": 7.235824010632283e-05, |
|
"loss": 0.0774, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.2626086956521738, |
|
"grad_norm": 0.12753545759992949, |
|
"learning_rate": 7.177364193776441e-05, |
|
"loss": 0.0891, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.2660869565217392, |
|
"grad_norm": 0.10783034916975004, |
|
"learning_rate": 7.119009006347625e-05, |
|
"loss": 0.0727, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.2695652173913043, |
|
"grad_norm": 0.12242485363979573, |
|
"learning_rate": 7.060760611455152e-05, |
|
"loss": 0.0628, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.2730434782608695, |
|
"grad_norm": 0.0974356463850898, |
|
"learning_rate": 7.002621168249759e-05, |
|
"loss": 0.0791, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.2765217391304349, |
|
"grad_norm": 0.11983018538507342, |
|
"learning_rate": 6.944592831843566e-05, |
|
"loss": 0.067, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.1364747598273945, |
|
"learning_rate": 6.886677753230184e-05, |
|
"loss": 0.0905, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.2834782608695652, |
|
"grad_norm": 0.13965549240604952, |
|
"learning_rate": 6.82887807920499e-05, |
|
"loss": 0.0965, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.2869565217391306, |
|
"grad_norm": 0.1361838338173524, |
|
"learning_rate": 6.77119595228554e-05, |
|
"loss": 0.0884, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.2904347826086957, |
|
"grad_norm": 0.1554086553741736, |
|
"learning_rate": 6.713633510632157e-05, |
|
"loss": 0.1058, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.2939130434782609, |
|
"grad_norm": 0.13154153458769796, |
|
"learning_rate": 6.656192887968675e-05, |
|
"loss": 0.1069, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.297391304347826, |
|
"grad_norm": 0.12317336873376321, |
|
"learning_rate": 6.598876213503339e-05, |
|
"loss": 0.0855, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.3008695652173912, |
|
"grad_norm": 0.12111523304638382, |
|
"learning_rate": 6.541685611849887e-05, |
|
"loss": 0.0796, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.3043478260869565, |
|
"grad_norm": 0.11822393281008113, |
|
"learning_rate": 6.484623202948789e-05, |
|
"loss": 0.0678, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.3078260869565217, |
|
"grad_norm": 0.14902345594338023, |
|
"learning_rate": 6.427691101988673e-05, |
|
"loss": 0.095, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.3113043478260868, |
|
"grad_norm": 0.1804018948634972, |
|
"learning_rate": 6.370891419327907e-05, |
|
"loss": 0.1282, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.3147826086956522, |
|
"grad_norm": 0.11547994985396455, |
|
"learning_rate": 6.314226260416382e-05, |
|
"loss": 0.0794, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.3182608695652174, |
|
"grad_norm": 0.13442398839445116, |
|
"learning_rate": 6.257697725717468e-05, |
|
"loss": 0.0828, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.3217391304347825, |
|
"grad_norm": 0.16157920308299395, |
|
"learning_rate": 6.201307910630146e-05, |
|
"loss": 0.0862, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.325217391304348, |
|
"grad_norm": 0.09483163105782791, |
|
"learning_rate": 6.145058905411343e-05, |
|
"loss": 0.0602, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.328695652173913, |
|
"grad_norm": 0.1326696358587778, |
|
"learning_rate": 6.0889527950984416e-05, |
|
"loss": 0.081, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.3321739130434782, |
|
"grad_norm": 0.09578653192083227, |
|
"learning_rate": 6.0329916594320054e-05, |
|
"loss": 0.0632, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.3356521739130436, |
|
"grad_norm": 0.1445496359915367, |
|
"learning_rate": 5.977177572778678e-05, |
|
"loss": 0.1043, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.3391304347826087, |
|
"grad_norm": 0.11696872605657838, |
|
"learning_rate": 5.921512604054289e-05, |
|
"loss": 0.075, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.342608695652174, |
|
"grad_norm": 0.10474941138685831, |
|
"learning_rate": 5.865998816647171e-05, |
|
"loss": 0.0808, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.3460869565217393, |
|
"grad_norm": 0.12195030923899196, |
|
"learning_rate": 5.8106382683416635e-05, |
|
"loss": 0.0906, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.3495652173913044, |
|
"grad_norm": 0.1247261310171403, |
|
"learning_rate": 5.755433011241851e-05, |
|
"loss": 0.0799, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.3530434782608696, |
|
"grad_norm": 0.12001527150963033, |
|
"learning_rate": 5.7003850916954705e-05, |
|
"loss": 0.0737, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.3565217391304347, |
|
"grad_norm": 0.12921970865724472, |
|
"learning_rate": 5.645496550218089e-05, |
|
"loss": 0.0802, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.3599999999999999, |
|
"grad_norm": 0.14148810186262428, |
|
"learning_rate": 5.5907694214174344e-05, |
|
"loss": 0.0998, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.3634782608695653, |
|
"grad_norm": 0.1822115264684952, |
|
"learning_rate": 5.536205733918007e-05, |
|
"loss": 0.1139, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.3669565217391304, |
|
"grad_norm": 0.11275316954836014, |
|
"learning_rate": 5.4818075102858526e-05, |
|
"loss": 0.0839, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.3704347826086956, |
|
"grad_norm": 0.1049274592340904, |
|
"learning_rate": 5.4275767669536146e-05, |
|
"loss": 0.078, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.373913043478261, |
|
"grad_norm": 0.1275403647919897, |
|
"learning_rate": 5.373515514145772e-05, |
|
"loss": 0.0882, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.377391304347826, |
|
"grad_norm": 0.1414442736987841, |
|
"learning_rate": 5.3196257558041386e-05, |
|
"loss": 0.0905, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.3808695652173912, |
|
"grad_norm": 0.1647573834843455, |
|
"learning_rate": 5.265909489513567e-05, |
|
"loss": 0.0868, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.3843478260869566, |
|
"grad_norm": 0.14978728162298646, |
|
"learning_rate": 5.212368706427912e-05, |
|
"loss": 0.0967, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.3878260869565218, |
|
"grad_norm": 0.13582863247078658, |
|
"learning_rate": 5.159005391196213e-05, |
|
"loss": 0.0888, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.391304347826087, |
|
"grad_norm": 0.11281045642311609, |
|
"learning_rate": 5.105821521889147e-05, |
|
"loss": 0.0899, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3947826086956523, |
|
"grad_norm": 0.1525391794429011, |
|
"learning_rate": 5.052819069925676e-05, |
|
"loss": 0.1121, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.3982608695652174, |
|
"grad_norm": 0.10553540876961562, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 0.0667, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.4017391304347826, |
|
"grad_norm": 0.14272542918507544, |
|
"learning_rate": 4.947366270008707e-05, |
|
"loss": 0.1049, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.4052173913043478, |
|
"grad_norm": 0.11523131534313182, |
|
"learning_rate": 4.894919830978212e-05, |
|
"loss": 0.083, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.4086956521739131, |
|
"grad_norm": 0.11250758245733375, |
|
"learning_rate": 4.8426626269924266e-05, |
|
"loss": 0.0822, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.4121739130434783, |
|
"grad_norm": 0.13451779717959741, |
|
"learning_rate": 4.790596595120699e-05, |
|
"loss": 0.0967, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.4156521739130434, |
|
"grad_norm": 0.17014026695649226, |
|
"learning_rate": 4.738723665346021e-05, |
|
"loss": 0.0952, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.4191304347826086, |
|
"grad_norm": 0.11335400231382785, |
|
"learning_rate": 4.687045760493468e-05, |
|
"loss": 0.0765, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.422608695652174, |
|
"grad_norm": 0.13153029025610707, |
|
"learning_rate": 4.635564796158945e-05, |
|
"loss": 0.0942, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.4260869565217391, |
|
"grad_norm": 0.14072727769903307, |
|
"learning_rate": 4.5842826806381544e-05, |
|
"loss": 0.1033, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.4295652173913043, |
|
"grad_norm": 0.19021079673592267, |
|
"learning_rate": 4.533201314855891e-05, |
|
"loss": 0.0908, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.4330434782608696, |
|
"grad_norm": 0.1282315437032552, |
|
"learning_rate": 4.48232259229554e-05, |
|
"loss": 0.0923, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.4365217391304348, |
|
"grad_norm": 0.10482566251391306, |
|
"learning_rate": 4.431648398928933e-05, |
|
"loss": 0.0769, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.0989285401022153, |
|
"learning_rate": 4.381180613146395e-05, |
|
"loss": 0.0627, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.4434782608695653, |
|
"grad_norm": 0.15004726013623923, |
|
"learning_rate": 4.3309211056871546e-05, |
|
"loss": 0.107, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.4469565217391305, |
|
"grad_norm": 0.10917064763259954, |
|
"learning_rate": 4.280871739569972e-05, |
|
"loss": 0.0723, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.4504347826086956, |
|
"grad_norm": 0.14217337210991582, |
|
"learning_rate": 4.231034370024088e-05, |
|
"loss": 0.0876, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.453913043478261, |
|
"grad_norm": 0.12259499737310682, |
|
"learning_rate": 4.181410844420474e-05, |
|
"loss": 0.072, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.4573913043478262, |
|
"grad_norm": 0.1383064965783125, |
|
"learning_rate": 4.132003002203314e-05, |
|
"loss": 0.1001, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.4608695652173913, |
|
"grad_norm": 0.15628614353703477, |
|
"learning_rate": 4.0828126748218654e-05, |
|
"loss": 0.1024, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.4643478260869565, |
|
"grad_norm": 0.15540806197515133, |
|
"learning_rate": 4.0338416856625294e-05, |
|
"loss": 0.1064, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.4678260869565216, |
|
"grad_norm": 0.12867401972303838, |
|
"learning_rate": 3.985091849981297e-05, |
|
"loss": 0.0814, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.471304347826087, |
|
"grad_norm": 0.10461015345788115, |
|
"learning_rate": 3.936564974836431e-05, |
|
"loss": 0.0551, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.4747826086956521, |
|
"grad_norm": 0.17422707198524348, |
|
"learning_rate": 3.8882628590215074e-05, |
|
"loss": 0.1068, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.4782608695652173, |
|
"grad_norm": 0.11823762504382565, |
|
"learning_rate": 3.840187292998717e-05, |
|
"loss": 0.0847, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.4817391304347827, |
|
"grad_norm": 0.14190454091036495, |
|
"learning_rate": 3.7923400588325155e-05, |
|
"loss": 0.0985, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.4852173913043478, |
|
"grad_norm": 0.1487917306625744, |
|
"learning_rate": 3.7447229301235445e-05, |
|
"loss": 0.0972, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.488695652173913, |
|
"grad_norm": 0.11307811508469943, |
|
"learning_rate": 3.697337671942913e-05, |
|
"loss": 0.0769, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.4921739130434784, |
|
"grad_norm": 0.12456291954504964, |
|
"learning_rate": 3.6501860407667465e-05, |
|
"loss": 0.0757, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.4956521739130435, |
|
"grad_norm": 0.14812964550659216, |
|
"learning_rate": 3.60326978441109e-05, |
|
"loss": 0.1029, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.4991304347826087, |
|
"grad_norm": 0.1681784734853534, |
|
"learning_rate": 3.556590641967115e-05, |
|
"loss": 0.1252, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.502608695652174, |
|
"grad_norm": 0.14613030602008723, |
|
"learning_rate": 3.510150343736668e-05, |
|
"loss": 0.0912, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.5060869565217392, |
|
"grad_norm": 0.15179818766879094, |
|
"learning_rate": 3.463950611168111e-05, |
|
"loss": 0.0858, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.5095652173913043, |
|
"grad_norm": 0.12461414121764455, |
|
"learning_rate": 3.4179931567925216e-05, |
|
"loss": 0.0824, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.5130434782608697, |
|
"grad_norm": 0.11765068168074926, |
|
"learning_rate": 3.372279684160221e-05, |
|
"loss": 0.0862, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.5165217391304346, |
|
"grad_norm": 0.14280556708472175, |
|
"learning_rate": 3.3268118877776066e-05, |
|
"loss": 0.0954, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.11285620318100742, |
|
"learning_rate": 3.281591453044366e-05, |
|
"loss": 0.0735, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.5234782608695652, |
|
"grad_norm": 0.10694921241597416, |
|
"learning_rate": 3.236620056190972e-05, |
|
"loss": 0.069, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.5269565217391303, |
|
"grad_norm": 0.12484188708941266, |
|
"learning_rate": 3.191899364216581e-05, |
|
"loss": 0.083, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.5304347826086957, |
|
"grad_norm": 0.15429288005492145, |
|
"learning_rate": 3.147431034827208e-05, |
|
"loss": 0.1033, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.5339130434782609, |
|
"grad_norm": 0.1253058317602747, |
|
"learning_rate": 3.103216716374312e-05, |
|
"loss": 0.0751, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.537391304347826, |
|
"grad_norm": 0.11203979862187523, |
|
"learning_rate": 3.059258047793661e-05, |
|
"loss": 0.0804, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.5408695652173914, |
|
"grad_norm": 0.13184136276253297, |
|
"learning_rate": 3.0155566585446117e-05, |
|
"loss": 0.0892, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.5443478260869565, |
|
"grad_norm": 0.10496670695439927, |
|
"learning_rate": 2.9721141685496823e-05, |
|
"loss": 0.08, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.5478260869565217, |
|
"grad_norm": 0.11136343180704414, |
|
"learning_rate": 2.9289321881345254e-05, |
|
"loss": 0.0764, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.551304347826087, |
|
"grad_norm": 0.14576709922104164, |
|
"learning_rate": 2.8860123179682242e-05, |
|
"loss": 0.1061, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.5547826086956522, |
|
"grad_norm": 0.09499364976886815, |
|
"learning_rate": 2.8433561490039573e-05, |
|
"loss": 0.0745, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.5582608695652174, |
|
"grad_norm": 0.12469651410155881, |
|
"learning_rate": 2.800965262420043e-05, |
|
"loss": 0.086, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.5617391304347827, |
|
"grad_norm": 0.0950193427692519, |
|
"learning_rate": 2.7588412295613043e-05, |
|
"loss": 0.0548, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.5652173913043477, |
|
"grad_norm": 0.1436085195291988, |
|
"learning_rate": 2.716985611880841e-05, |
|
"loss": 0.0923, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.568695652173913, |
|
"grad_norm": 0.1220012073528301, |
|
"learning_rate": 2.675399960882138e-05, |
|
"loss": 0.0835, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.5721739130434784, |
|
"grad_norm": 0.14250023280956398, |
|
"learning_rate": 2.6340858180615646e-05, |
|
"loss": 0.0817, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.5756521739130434, |
|
"grad_norm": 0.14016261789642684, |
|
"learning_rate": 2.593044714851218e-05, |
|
"loss": 0.1009, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.5791304347826087, |
|
"grad_norm": 0.1519687009324273, |
|
"learning_rate": 2.5522781725621813e-05, |
|
"loss": 0.0936, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.5826086956521739, |
|
"grad_norm": 0.10018240850657148, |
|
"learning_rate": 2.511787702328102e-05, |
|
"loss": 0.0695, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.586086956521739, |
|
"grad_norm": 0.15832897678113741, |
|
"learning_rate": 2.471574805049206e-05, |
|
"loss": 0.103, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.5895652173913044, |
|
"grad_norm": 0.09635042116603919, |
|
"learning_rate": 2.4316409713366352e-05, |
|
"loss": 0.0713, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.5930434782608696, |
|
"grad_norm": 0.16551038949811617, |
|
"learning_rate": 2.3919876814572194e-05, |
|
"loss": 0.1165, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.5965217391304347, |
|
"grad_norm": 0.1591761285439053, |
|
"learning_rate": 2.352616405278586e-05, |
|
"loss": 0.1065, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.1257794232379624, |
|
"learning_rate": 2.3135286022146785e-05, |
|
"loss": 0.0878, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.6034782608695652, |
|
"grad_norm": 0.13064370809940834, |
|
"learning_rate": 2.2747257211716757e-05, |
|
"loss": 0.0878, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.6069565217391304, |
|
"grad_norm": 0.1373673611302553, |
|
"learning_rate": 2.236209200494258e-05, |
|
"loss": 0.08, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.6104347826086958, |
|
"grad_norm": 0.15683223957755238, |
|
"learning_rate": 2.1979804679123106e-05, |
|
"loss": 0.097, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.613913043478261, |
|
"grad_norm": 0.11215372603755155, |
|
"learning_rate": 2.1600409404879874e-05, |
|
"loss": 0.0759, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.617391304347826, |
|
"grad_norm": 0.12472859826284394, |
|
"learning_rate": 2.122392024563199e-05, |
|
"loss": 0.0798, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.6208695652173915, |
|
"grad_norm": 0.14167323311602448, |
|
"learning_rate": 2.0850351157074598e-05, |
|
"loss": 0.1025, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.6243478260869564, |
|
"grad_norm": 0.13106838058233283, |
|
"learning_rate": 2.047971598666184e-05, |
|
"loss": 0.0966, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.6278260869565218, |
|
"grad_norm": 0.12245656492036927, |
|
"learning_rate": 2.011202847309329e-05, |
|
"loss": 0.0858, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.631304347826087, |
|
"grad_norm": 0.15076412437271922, |
|
"learning_rate": 1.9747302245804945e-05, |
|
"loss": 0.0988, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.634782608695652, |
|
"grad_norm": 0.1890224571658569, |
|
"learning_rate": 1.9385550824463727e-05, |
|
"loss": 0.141, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.6382608695652174, |
|
"grad_norm": 0.12643818292640252, |
|
"learning_rate": 1.9026787618466646e-05, |
|
"loss": 0.0821, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.6417391304347826, |
|
"grad_norm": 0.11974342973177961, |
|
"learning_rate": 1.8671025926443465e-05, |
|
"loss": 0.0852, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.6452173913043477, |
|
"grad_norm": 0.11053773314022491, |
|
"learning_rate": 1.8318278935763955e-05, |
|
"loss": 0.0693, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.6486956521739131, |
|
"grad_norm": 0.12718860708539992, |
|
"learning_rate": 1.7968559722048906e-05, |
|
"loss": 0.0759, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.6521739130434783, |
|
"grad_norm": 0.11472304774066805, |
|
"learning_rate": 1.762188124868557e-05, |
|
"loss": 0.0822, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.6556521739130434, |
|
"grad_norm": 0.1586172339858714, |
|
"learning_rate": 1.7278256366347035e-05, |
|
"loss": 0.1156, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.6591304347826088, |
|
"grad_norm": 0.16408772559550205, |
|
"learning_rate": 1.6937697812515894e-05, |
|
"loss": 0.0918, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.662608695652174, |
|
"grad_norm": 0.12800527362364758, |
|
"learning_rate": 1.660021821101222e-05, |
|
"loss": 0.0789, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.666086956521739, |
|
"grad_norm": 0.15521778399290198, |
|
"learning_rate": 1.626583007152539e-05, |
|
"loss": 0.0987, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.6695652173913045, |
|
"grad_norm": 0.14944005207844402, |
|
"learning_rate": 1.5934545789150623e-05, |
|
"loss": 0.1133, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.6730434782608694, |
|
"grad_norm": 0.12173810785220801, |
|
"learning_rate": 1.5606377643929304e-05, |
|
"loss": 0.0794, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.6765217391304348, |
|
"grad_norm": 0.12290655885053603, |
|
"learning_rate": 1.5281337800393968e-05, |
|
"loss": 0.0717, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.6800000000000002, |
|
"grad_norm": 0.13763340851307898, |
|
"learning_rate": 1.4959438307117247e-05, |
|
"loss": 0.095, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.683478260869565, |
|
"grad_norm": 0.10678789082393463, |
|
"learning_rate": 1.4640691096265358e-05, |
|
"loss": 0.0838, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.6869565217391305, |
|
"grad_norm": 0.12694424997511286, |
|
"learning_rate": 1.4325107983155694e-05, |
|
"loss": 0.0884, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.6904347826086956, |
|
"grad_norm": 0.13805939087384794, |
|
"learning_rate": 1.401270066581899e-05, |
|
"loss": 0.0884, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.6939130434782608, |
|
"grad_norm": 0.1116542985760522, |
|
"learning_rate": 1.3703480724565577e-05, |
|
"loss": 0.0819, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.6973913043478261, |
|
"grad_norm": 0.130701148914566, |
|
"learning_rate": 1.339745962155613e-05, |
|
"loss": 0.0942, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.7008695652173913, |
|
"grad_norm": 0.12303229923584438, |
|
"learning_rate": 1.3094648700376954e-05, |
|
"loss": 0.0968, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.7043478260869565, |
|
"grad_norm": 0.10050903994662669, |
|
"learning_rate": 1.2795059185619229e-05, |
|
"loss": 0.064, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.7078260869565218, |
|
"grad_norm": 0.13529518412698788, |
|
"learning_rate": 1.249870218246323e-05, |
|
"loss": 0.0891, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.711304347826087, |
|
"grad_norm": 0.11568064512791533, |
|
"learning_rate": 1.2205588676266388e-05, |
|
"loss": 0.0841, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.7147826086956521, |
|
"grad_norm": 0.11324213029173631, |
|
"learning_rate": 1.1915729532156372e-05, |
|
"loss": 0.0693, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.7182608695652175, |
|
"grad_norm": 0.12078490458473878, |
|
"learning_rate": 1.1629135494628096e-05, |
|
"loss": 0.0809, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.7217391304347827, |
|
"grad_norm": 0.15619885447728415, |
|
"learning_rate": 1.134581718714558e-05, |
|
"loss": 0.0982, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.7252173913043478, |
|
"grad_norm": 0.13958396553029748, |
|
"learning_rate": 1.1065785111748117e-05, |
|
"loss": 0.1006, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.7286956521739132, |
|
"grad_norm": 0.11936287781907709, |
|
"learning_rate": 1.0789049648661043e-05, |
|
"loss": 0.0778, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.7321739130434781, |
|
"grad_norm": 0.13994107260501892, |
|
"learning_rate": 1.0515621055910817e-05, |
|
"loss": 0.0994, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.7356521739130435, |
|
"grad_norm": 0.10069177741815626, |
|
"learning_rate": 1.0245509468944992e-05, |
|
"loss": 0.0798, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"grad_norm": 0.1520239032704441, |
|
"learning_rate": 9.978724900256265e-06, |
|
"loss": 0.0936, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.7426086956521738, |
|
"grad_norm": 0.12537489299552443, |
|
"learning_rate": 9.715277239011578e-06, |
|
"loss": 0.0759, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.7460869565217392, |
|
"grad_norm": 0.16914167358101417, |
|
"learning_rate": 9.455176250685338e-06, |
|
"loss": 0.1159, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.7495652173913043, |
|
"grad_norm": 0.12340433382499669, |
|
"learning_rate": 9.198431576697608e-06, |
|
"loss": 0.0809, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.7530434782608695, |
|
"grad_norm": 0.16038700994407892, |
|
"learning_rate": 8.945052734056581e-06, |
|
"loss": 0.0927, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.7565217391304349, |
|
"grad_norm": 0.18736397280927972, |
|
"learning_rate": 8.695049115005837e-06, |
|
"loss": 0.1138, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.11455094890434803, |
|
"learning_rate": 8.448429986676298e-06, |
|
"loss": 0.0876, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.7634782608695652, |
|
"grad_norm": 0.13381829396413253, |
|
"learning_rate": 8.205204490742536e-06, |
|
"loss": 0.0932, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.7669565217391305, |
|
"grad_norm": 0.10231732967595585, |
|
"learning_rate": 7.96538164308407e-06, |
|
"loss": 0.0702, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.7704347826086957, |
|
"grad_norm": 0.0947188798552471, |
|
"learning_rate": 7.728970333451035e-06, |
|
"loss": 0.0706, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.7739130434782608, |
|
"grad_norm": 0.09733737409054823, |
|
"learning_rate": 7.4959793251348055e-06, |
|
"loss": 0.0644, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.7773913043478262, |
|
"grad_norm": 0.11169634637379897, |
|
"learning_rate": 7.2664172546429655e-06, |
|
"loss": 0.0709, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.7808695652173911, |
|
"grad_norm": 0.12974806998277916, |
|
"learning_rate": 7.040292631379386e-06, |
|
"loss": 0.0856, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.7843478260869565, |
|
"grad_norm": 0.13011819014873824, |
|
"learning_rate": 6.817613837328573e-06, |
|
"loss": 0.0924, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.787826086956522, |
|
"grad_norm": 0.1508887480796253, |
|
"learning_rate": 6.598389126745208e-06, |
|
"loss": 0.1101, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.7913043478260868, |
|
"grad_norm": 0.1528558553271661, |
|
"learning_rate": 6.382626625847921e-06, |
|
"loss": 0.1014, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.7947826086956522, |
|
"grad_norm": 0.13295695013628608, |
|
"learning_rate": 6.170334332518324e-06, |
|
"loss": 0.0866, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.7982608695652174, |
|
"grad_norm": 0.16036744040311404, |
|
"learning_rate": 5.961520116004327e-06, |
|
"loss": 0.1076, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.8017391304347825, |
|
"grad_norm": 0.11717096876409042, |
|
"learning_rate": 5.756191716628556e-06, |
|
"loss": 0.0688, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.8052173913043479, |
|
"grad_norm": 0.11484830279438352, |
|
"learning_rate": 5.554356745501454e-06, |
|
"loss": 0.0694, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.808695652173913, |
|
"grad_norm": 0.17176181086966022, |
|
"learning_rate": 5.3560226842390596e-06, |
|
"loss": 0.1032, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.8121739130434782, |
|
"grad_norm": 0.11739088349195866, |
|
"learning_rate": 5.1611968846857815e-06, |
|
"loss": 0.0732, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.8156521739130436, |
|
"grad_norm": 0.13709017479262753, |
|
"learning_rate": 4.969886568641757e-06, |
|
"loss": 0.0918, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.8191304347826087, |
|
"grad_norm": 0.1280476174629274, |
|
"learning_rate": 4.7820988275953045e-06, |
|
"loss": 0.0938, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.8226086956521739, |
|
"grad_norm": 0.11201422652339658, |
|
"learning_rate": 4.597840622459937e-06, |
|
"loss": 0.0814, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.8260869565217392, |
|
"grad_norm": 0.09871056879272744, |
|
"learning_rate": 4.417118783316388e-06, |
|
"loss": 0.072, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.8295652173913044, |
|
"grad_norm": 0.10542472286239411, |
|
"learning_rate": 4.2399400091594154e-06, |
|
"loss": 0.068, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.8330434782608696, |
|
"grad_norm": 0.14017893040374907, |
|
"learning_rate": 4.066310867649481e-06, |
|
"loss": 0.1032, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.836521739130435, |
|
"grad_norm": 0.11855048113345314, |
|
"learning_rate": 3.896237794869339e-06, |
|
"loss": 0.0783, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.8399999999999999, |
|
"grad_norm": 0.1244102175680237, |
|
"learning_rate": 3.729727095085422e-06, |
|
"loss": 0.0922, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.8434782608695652, |
|
"grad_norm": 0.12180644294551433, |
|
"learning_rate": 3.566784940514145e-06, |
|
"loss": 0.0807, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.8469565217391304, |
|
"grad_norm": 0.09761026100653182, |
|
"learning_rate": 3.40741737109318e-06, |
|
"loss": 0.0641, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.8504347826086955, |
|
"grad_norm": 0.09710029722289329, |
|
"learning_rate": 3.2516302942574793e-06, |
|
"loss": 0.067, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.853913043478261, |
|
"grad_norm": 0.10724535703528021, |
|
"learning_rate": 3.0994294847203733e-06, |
|
"loss": 0.0743, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.857391304347826, |
|
"grad_norm": 0.13083100814230067, |
|
"learning_rate": 2.9508205842594728e-06, |
|
"loss": 0.0754, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.8608695652173912, |
|
"grad_norm": 0.12672158607204304, |
|
"learning_rate": 2.8058091015075394e-06, |
|
"loss": 0.078, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.8643478260869566, |
|
"grad_norm": 0.17103224377006737, |
|
"learning_rate": 2.6644004117483356e-06, |
|
"loss": 0.0922, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.8678260869565217, |
|
"grad_norm": 0.134150142101436, |
|
"learning_rate": 2.526599756717285e-06, |
|
"loss": 0.1002, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.871304347826087, |
|
"grad_norm": 0.129521169878982, |
|
"learning_rate": 2.392412244407294e-06, |
|
"loss": 0.0836, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.8747826086956523, |
|
"grad_norm": 0.10885289790789841, |
|
"learning_rate": 2.26184284887927e-06, |
|
"loss": 0.0774, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.8782608695652174, |
|
"grad_norm": 0.10488094490283079, |
|
"learning_rate": 2.134896410077891e-06, |
|
"loss": 0.0789, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.8817391304347826, |
|
"grad_norm": 0.11889491296378912, |
|
"learning_rate": 2.011577633652062e-06, |
|
"loss": 0.0782, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.885217391304348, |
|
"grad_norm": 0.12096235669049085, |
|
"learning_rate": 1.8918910907805732e-06, |
|
"loss": 0.0881, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.8886956521739129, |
|
"grad_norm": 0.1106479394276716, |
|
"learning_rate": 1.7758412180026273e-06, |
|
"loss": 0.0802, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.8921739130434783, |
|
"grad_norm": 0.12821924742613686, |
|
"learning_rate": 1.6634323170533928e-06, |
|
"loss": 0.0911, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.8956521739130436, |
|
"grad_norm": 0.15604807612172736, |
|
"learning_rate": 1.5546685547045192e-06, |
|
"loss": 0.1, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.8991304347826086, |
|
"grad_norm": 0.1478681396223387, |
|
"learning_rate": 1.4495539626097288e-06, |
|
"loss": 0.0804, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.902608695652174, |
|
"grad_norm": 0.13421748048136942, |
|
"learning_rate": 1.348092437155346e-06, |
|
"loss": 0.089, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.906086956521739, |
|
"grad_norm": 0.11687932254739727, |
|
"learning_rate": 1.2502877393158586e-06, |
|
"loss": 0.0871, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.9095652173913042, |
|
"grad_norm": 0.15643926713744022, |
|
"learning_rate": 1.1561434945145277e-06, |
|
"loss": 0.104, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.9130434782608696, |
|
"grad_norm": 0.10696169647909613, |
|
"learning_rate": 1.0656631924889749e-06, |
|
"loss": 0.0716, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.9165217391304348, |
|
"grad_norm": 0.14019705935951768, |
|
"learning_rate": 9.788501871618728e-07, |
|
"loss": 0.0898, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.15767772433554056, |
|
"learning_rate": 8.957076965165235e-07, |
|
"loss": 0.1015, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.9234782608695653, |
|
"grad_norm": 0.12202925229447881, |
|
"learning_rate": 8.162388024777201e-07, |
|
"loss": 0.0889, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.9269565217391305, |
|
"grad_norm": 0.14213284579860058, |
|
"learning_rate": 7.404464507973608e-07, |
|
"loss": 0.1061, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.9304347826086956, |
|
"grad_norm": 0.11946138428666646, |
|
"learning_rate": 6.683334509453465e-07, |
|
"loss": 0.0756, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.933913043478261, |
|
"grad_norm": 0.1776730484619494, |
|
"learning_rate": 5.999024760054095e-07, |
|
"loss": 0.1156, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.9373913043478261, |
|
"grad_norm": 0.15552558119011417, |
|
"learning_rate": 5.351560625760254e-07, |
|
"loss": 0.1111, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.9408695652173913, |
|
"grad_norm": 0.1269110866764246, |
|
"learning_rate": 4.7409661067642217e-07, |
|
"loss": 0.0929, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.9443478260869567, |
|
"grad_norm": 0.10309350272790443, |
|
"learning_rate": 4.167263836575286e-07, |
|
"loss": 0.0547, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.9478260869565216, |
|
"grad_norm": 0.12377918248036159, |
|
"learning_rate": 3.630475081181861e-07, |
|
"loss": 0.0808, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.951304347826087, |
|
"grad_norm": 0.12729430798666608, |
|
"learning_rate": 3.1306197382624526e-07, |
|
"loss": 0.077, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.9547826086956521, |
|
"grad_norm": 0.11766868772742071, |
|
"learning_rate": 2.667716336448356e-07, |
|
"loss": 0.0871, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.9582608695652173, |
|
"grad_norm": 0.12138412723458143, |
|
"learning_rate": 2.2417820346367635e-07, |
|
"loss": 0.0983, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.9617391304347827, |
|
"grad_norm": 0.12163696179721654, |
|
"learning_rate": 1.8528326213548274e-07, |
|
"loss": 0.0855, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.9652173913043478, |
|
"grad_norm": 0.1569270166290431, |
|
"learning_rate": 1.50088251417424e-07, |
|
"loss": 0.1015, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.968695652173913, |
|
"grad_norm": 0.12730784199491677, |
|
"learning_rate": 1.1859447591769934e-07, |
|
"loss": 0.0878, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.9721739130434783, |
|
"grad_norm": 0.12648022636737355, |
|
"learning_rate": 9.080310304716567e-08, |
|
"loss": 0.0842, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.9756521739130435, |
|
"grad_norm": 0.11283992913356376, |
|
"learning_rate": 6.671516297606095e-08, |
|
"loss": 0.0834, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.9791304347826086, |
|
"grad_norm": 0.10119868305303333, |
|
"learning_rate": 4.6331548595845984e-08, |
|
"loss": 0.0667, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.982608695652174, |
|
"grad_norm": 0.1227080883131745, |
|
"learning_rate": 2.965301548606414e-08, |
|
"loss": 0.0873, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.9860869565217392, |
|
"grad_norm": 0.158380237566967, |
|
"learning_rate": 1.6680181886352676e-08, |
|
"loss": 0.1049, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.9895652173913043, |
|
"grad_norm": 0.17246726825049064, |
|
"learning_rate": 7.413528673549941e-09, |
|
"loss": 0.0969, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.9930434782608697, |
|
"grad_norm": 0.15178078485673158, |
|
"learning_rate": 1.8533993438318852e-09, |
|
"loss": 0.0884, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.9965217391304346, |
|
"grad_norm": 0.1411963796704214, |
|
"learning_rate": 0.0, |
|
"loss": 0.0874, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.9965217391304346, |
|
"eval_loss": 0.14970487356185913, |
|
"eval_runtime": 49.8439, |
|
"eval_samples_per_second": 4.795, |
|
"eval_steps_per_second": 0.602, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.9965217391304346, |
|
"step": 574, |
|
"total_flos": 465841769250816.0, |
|
"train_loss": 0.11642231966144947, |
|
"train_runtime": 5186.3709, |
|
"train_samples_per_second": 1.772, |
|
"train_steps_per_second": 0.111 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 574, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 465841769250816.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|