|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 127150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.980338183248132e-05, |
|
"loss": 8.0518, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9606763664962646e-05, |
|
"loss": 3.358, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9410145497443964e-05, |
|
"loss": 2.9014, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.921352732992528e-05, |
|
"loss": 2.6617, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.901690916240661e-05, |
|
"loss": 2.4945, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.882029099488793e-05, |
|
"loss": 2.3686, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.862367282736925e-05, |
|
"loss": 2.2674, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.8427054659850575e-05, |
|
"loss": 2.1889, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.8230436492331893e-05, |
|
"loss": 2.1107, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.803381832481321e-05, |
|
"loss": 2.0427, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.783720015729454e-05, |
|
"loss": 1.9907, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.7640581989775855e-05, |
|
"loss": 1.9401, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.744396382225718e-05, |
|
"loss": 1.9027, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.72473456547385e-05, |
|
"loss": 1.8655, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.705072748721982e-05, |
|
"loss": 1.823, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.685410931970115e-05, |
|
"loss": 1.7995, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.6657491152182466e-05, |
|
"loss": 1.7682, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.6460872984663785e-05, |
|
"loss": 1.7367, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.626425481714511e-05, |
|
"loss": 1.7111, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.606763664962643e-05, |
|
"loss": 1.6903, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.5871018482107746e-05, |
|
"loss": 1.6655, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.567440031458907e-05, |
|
"loss": 1.6468, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.547778214707039e-05, |
|
"loss": 1.6196, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.528116397955171e-05, |
|
"loss": 1.6017, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.508454581203303e-05, |
|
"loss": 1.5889, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.488792764451436e-05, |
|
"loss": 1.5679, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.4691309476995676e-05, |
|
"loss": 1.5419, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.4494691309477e-05, |
|
"loss": 1.5339, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.429807314195832e-05, |
|
"loss": 1.5189, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.410145497443964e-05, |
|
"loss": 1.5071, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.390483680692096e-05, |
|
"loss": 1.4936, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.370821863940228e-05, |
|
"loss": 1.4849, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.3511600471883605e-05, |
|
"loss": 1.4671, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.3314982304364923e-05, |
|
"loss": 1.4519, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.311836413684625e-05, |
|
"loss": 1.4411, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.2921745969327573e-05, |
|
"loss": 1.4355, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.272512780180889e-05, |
|
"loss": 1.4224, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.252850963429021e-05, |
|
"loss": 1.4221, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.2331891466771535e-05, |
|
"loss": 1.4053, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.213527329925285e-05, |
|
"loss": 1.394, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.193865513173417e-05, |
|
"loss": 1.3904, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.1742036964215496e-05, |
|
"loss": 1.377, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.1545418796696815e-05, |
|
"loss": 1.3735, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.134880062917813e-05, |
|
"loss": 1.364, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.115218246165946e-05, |
|
"loss": 1.3605, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.095556429414078e-05, |
|
"loss": 1.3539, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.07589461266221e-05, |
|
"loss": 1.3397, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.0562327959103426e-05, |
|
"loss": 1.3326, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 4.0365709791584744e-05, |
|
"loss": 1.3295, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.016909162406606e-05, |
|
"loss": 1.3297, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.997247345654739e-05, |
|
"loss": 1.312, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.9775855289028706e-05, |
|
"loss": 1.2981, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.957923712151003e-05, |
|
"loss": 1.2988, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.938261895399135e-05, |
|
"loss": 1.289, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.9186000786472674e-05, |
|
"loss": 1.2861, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.8989382618954e-05, |
|
"loss": 1.2823, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.879276445143532e-05, |
|
"loss": 1.2718, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.8596146283916635e-05, |
|
"loss": 1.2673, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.839952811639796e-05, |
|
"loss": 1.2672, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.820290994887928e-05, |
|
"loss": 1.2538, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.80062917813606e-05, |
|
"loss": 1.2533, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.780967361384192e-05, |
|
"loss": 1.2519, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.761305544632324e-05, |
|
"loss": 1.2437, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.741643727880456e-05, |
|
"loss": 1.2363, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.721981911128588e-05, |
|
"loss": 1.2397, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.702320094376721e-05, |
|
"loss": 1.2328, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.6826582776248526e-05, |
|
"loss": 1.2336, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.662996460872985e-05, |
|
"loss": 1.2271, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 3.643334644121117e-05, |
|
"loss": 1.2199, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 3.623672827369249e-05, |
|
"loss": 1.2167, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.604011010617381e-05, |
|
"loss": 1.2115, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.584349193865513e-05, |
|
"loss": 1.2095, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.564687377113645e-05, |
|
"loss": 1.2043, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.5450255603617774e-05, |
|
"loss": 1.2029, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.52536374360991e-05, |
|
"loss": 1.1952, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.5057019268580424e-05, |
|
"loss": 1.1916, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.486040110106174e-05, |
|
"loss": 1.183, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.466378293354306e-05, |
|
"loss": 1.1823, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.4467164766024386e-05, |
|
"loss": 1.1806, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.4270546598505704e-05, |
|
"loss": 1.182, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3.407392843098702e-05, |
|
"loss": 1.1705, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3.387731026346835e-05, |
|
"loss": 1.174, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.3680692095949665e-05, |
|
"loss": 1.1634, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.3484073928430984e-05, |
|
"loss": 1.1655, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 3.328745576091231e-05, |
|
"loss": 1.1622, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.3090837593393633e-05, |
|
"loss": 1.161, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 3.289421942587495e-05, |
|
"loss": 1.1495, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 3.269760125835628e-05, |
|
"loss": 1.1522, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.2500983090837595e-05, |
|
"loss": 1.1483, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3.230436492331891e-05, |
|
"loss": 1.1516, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 3.210774675580024e-05, |
|
"loss": 1.1475, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 3.1911128588281556e-05, |
|
"loss": 1.1386, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 3.1714510420762875e-05, |
|
"loss": 1.142, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 3.15178922532442e-05, |
|
"loss": 1.1403, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3.1321274085725525e-05, |
|
"loss": 1.1328, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 3.112465591820685e-05, |
|
"loss": 1.1257, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 3.092803775068817e-05, |
|
"loss": 1.1259, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.0731419583169486e-05, |
|
"loss": 1.128, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 3.053480141565081e-05, |
|
"loss": 1.1299, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 3.033818324813213e-05, |
|
"loss": 1.1285, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 3.0141565080613447e-05, |
|
"loss": 1.1224, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 2.9944946913094772e-05, |
|
"loss": 1.116, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 2.9748328745576094e-05, |
|
"loss": 1.1125, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.9551710578057412e-05, |
|
"loss": 1.1031, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.9355092410538737e-05, |
|
"loss": 1.1091, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 2.9158474243020055e-05, |
|
"loss": 1.1058, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 2.8961856075501377e-05, |
|
"loss": 1.1025, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2.8765237907982702e-05, |
|
"loss": 1.1076, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.856861974046402e-05, |
|
"loss": 1.1052, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.837200157294534e-05, |
|
"loss": 1.0995, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.8175383405426664e-05, |
|
"loss": 1.0948, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.7978765237907985e-05, |
|
"loss": 1.0985, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 2.7782147070389303e-05, |
|
"loss": 1.0969, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 2.758552890287063e-05, |
|
"loss": 1.0921, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 2.7388910735351947e-05, |
|
"loss": 1.0913, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 2.719229256783327e-05, |
|
"loss": 1.0883, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 2.699567440031459e-05, |
|
"loss": 1.0859, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 2.679905623279591e-05, |
|
"loss": 1.0898, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 2.6602438065277236e-05, |
|
"loss": 1.0809, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 2.6405819897758555e-05, |
|
"loss": 1.084, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.6209201730239873e-05, |
|
"loss": 1.0767, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 2.6012583562721198e-05, |
|
"loss": 1.0709, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 2.581596539520252e-05, |
|
"loss": 1.0789, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 2.5619347227683838e-05, |
|
"loss": 1.0779, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.5422729060165163e-05, |
|
"loss": 1.0732, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 2.522611089264648e-05, |
|
"loss": 1.0718, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.5029492725127802e-05, |
|
"loss": 1.0665, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 2.4832874557609124e-05, |
|
"loss": 1.0673, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.4636256390090446e-05, |
|
"loss": 1.0603, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 2.4439638222571767e-05, |
|
"loss": 1.0557, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 2.4243020055053085e-05, |
|
"loss": 1.0563, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 2.404640188753441e-05, |
|
"loss": 1.0557, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 2.3849783720015732e-05, |
|
"loss": 1.0586, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 2.3653165552497054e-05, |
|
"loss": 1.0574, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 2.3456547384978372e-05, |
|
"loss": 1.0578, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 2.3259929217459694e-05, |
|
"loss": 1.0569, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 2.3063311049941015e-05, |
|
"loss": 1.0541, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 2.2866692882422337e-05, |
|
"loss": 1.0532, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 2.267007471490366e-05, |
|
"loss": 1.0538, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 2.247345654738498e-05, |
|
"loss": 1.0467, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 2.2276838379866298e-05, |
|
"loss": 1.0483, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 2.2080220212347623e-05, |
|
"loss": 1.0565, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 2.1883602044828945e-05, |
|
"loss": 1.0494, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 2.1686983877310266e-05, |
|
"loss": 1.0423, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 2.1490365709791585e-05, |
|
"loss": 1.0449, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 2.1293747542272906e-05, |
|
"loss": 1.0447, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 2.1097129374754228e-05, |
|
"loss": 1.0411, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 2.090051120723555e-05, |
|
"loss": 1.0424, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 2.070389303971687e-05, |
|
"loss": 1.0394, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 2.0507274872198193e-05, |
|
"loss": 1.035, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 2.031065670467951e-05, |
|
"loss": 1.0343, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 2.0114038537160836e-05, |
|
"loss": 1.0371, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 1.9917420369642157e-05, |
|
"loss": 1.0269, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 1.972080220212348e-05, |
|
"loss": 1.0361, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1.9524184034604797e-05, |
|
"loss": 1.0291, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 1.932756586708612e-05, |
|
"loss": 1.0275, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 1.913094769956744e-05, |
|
"loss": 1.0308, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 1.8934329532048762e-05, |
|
"loss": 1.0259, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 1.8737711364530084e-05, |
|
"loss": 1.0238, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 1.8541093197011405e-05, |
|
"loss": 1.0263, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 1.8344475029492724e-05, |
|
"loss": 1.024, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 1.814785686197405e-05, |
|
"loss": 1.0217, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 1.795123869445537e-05, |
|
"loss": 1.0229, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 1.7754620526936692e-05, |
|
"loss": 1.0211, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 1.755800235941801e-05, |
|
"loss": 1.0143, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 1.736138419189933e-05, |
|
"loss": 1.0234, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 1.7164766024380653e-05, |
|
"loss": 1.0182, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 1.6968147856861975e-05, |
|
"loss": 1.0163, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1.6771529689343296e-05, |
|
"loss": 1.0143, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 1.6574911521824618e-05, |
|
"loss": 1.0184, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 1.6378293354305936e-05, |
|
"loss": 1.0183, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 1.618167518678726e-05, |
|
"loss": 1.0174, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1.5985057019268583e-05, |
|
"loss": 1.0151, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 1.5788438851749904e-05, |
|
"loss": 1.0116, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 1.5591820684231223e-05, |
|
"loss": 1.0114, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 1.5395202516712544e-05, |
|
"loss": 1.0144, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 1.5198584349193868e-05, |
|
"loss": 1.0106, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.5001966181675187e-05, |
|
"loss": 1.0196, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.4805348014156509e-05, |
|
"loss": 1.0041, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.460872984663783e-05, |
|
"loss": 1.0061, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 1.441211167911915e-05, |
|
"loss": 1.0013, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 1.4215493511600472e-05, |
|
"loss": 1.0085, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1.4018875344081794e-05, |
|
"loss": 1.0028, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 1.3822257176563117e-05, |
|
"loss": 1.0028, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 1.3625639009044435e-05, |
|
"loss": 0.9988, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 1.3429020841525759e-05, |
|
"loss": 1.0067, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 1.323240267400708e-05, |
|
"loss": 1.0072, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 1.30357845064884e-05, |
|
"loss": 1.0001, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 1.2839166338969722e-05, |
|
"loss": 1.0012, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 1.2642548171451043e-05, |
|
"loss": 1.0003, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 1.2445930003932365e-05, |
|
"loss": 0.9877, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.2249311836413685e-05, |
|
"loss": 1.0001, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 1.2052693668895006e-05, |
|
"loss": 0.9954, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 1.1856075501376328e-05, |
|
"loss": 0.9963, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 1.1659457333857648e-05, |
|
"loss": 0.994, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 1.1462839166338971e-05, |
|
"loss": 1.003, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 1.1266220998820291e-05, |
|
"loss": 0.9931, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 1.1069602831301613e-05, |
|
"loss": 0.9983, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 1.0872984663782934e-05, |
|
"loss": 0.9922, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 1.0676366496264254e-05, |
|
"loss": 0.9968, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 1.0479748328745578e-05, |
|
"loss": 0.9956, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 1.0283130161226898e-05, |
|
"loss": 0.991, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 1.008651199370822e-05, |
|
"loss": 0.9915, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 9.88989382618954e-06, |
|
"loss": 0.9859, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 9.69327565867086e-06, |
|
"loss": 0.986, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 9.496657491152184e-06, |
|
"loss": 0.9937, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 9.300039323633504e-06, |
|
"loss": 0.9863, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 9.103421156114826e-06, |
|
"loss": 0.9836, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 8.906802988596147e-06, |
|
"loss": 0.9857, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 8.710184821077467e-06, |
|
"loss": 0.989, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 8.51356665355879e-06, |
|
"loss": 0.9886, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 8.31694848604011e-06, |
|
"loss": 0.9885, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 8.120330318521432e-06, |
|
"loss": 0.9847, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 7.923712151002753e-06, |
|
"loss": 0.9846, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 7.727093983484073e-06, |
|
"loss": 0.9882, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 7.530475815965396e-06, |
|
"loss": 0.9866, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 7.333857648446717e-06, |
|
"loss": 0.9823, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 7.137239480928037e-06, |
|
"loss": 0.9841, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 6.94062131340936e-06, |
|
"loss": 0.9844, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 6.7440031458906806e-06, |
|
"loss": 0.9817, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 6.547384978372002e-06, |
|
"loss": 0.9854, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 6.350766810853323e-06, |
|
"loss": 0.9845, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 6.1541486433346445e-06, |
|
"loss": 0.984, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 5.957530475815966e-06, |
|
"loss": 0.9865, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 5.760912308297287e-06, |
|
"loss": 0.9765, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 5.564294140778608e-06, |
|
"loss": 0.9867, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 5.367675973259929e-06, |
|
"loss": 0.986, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 5.171057805741251e-06, |
|
"loss": 0.9817, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.9744396382225725e-06, |
|
"loss": 0.9774, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 4.777821470703893e-06, |
|
"loss": 0.9811, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 4.581203303185214e-06, |
|
"loss": 0.9759, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 4.384585135666536e-06, |
|
"loss": 0.9769, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 4.187966968147857e-06, |
|
"loss": 0.9807, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 3.991348800629179e-06, |
|
"loss": 0.9822, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 3.794730633110499e-06, |
|
"loss": 0.9843, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 3.5981124655918208e-06, |
|
"loss": 0.9777, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 3.401494298073142e-06, |
|
"loss": 0.977, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 3.2048761305544636e-06, |
|
"loss": 0.9785, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 3.0082579630357847e-06, |
|
"loss": 0.9756, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 2.811639795517106e-06, |
|
"loss": 0.9803, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 2.615021627998427e-06, |
|
"loss": 0.9774, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.4184034604797483e-06, |
|
"loss": 0.984, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 2.22178529296107e-06, |
|
"loss": 0.9753, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 2.025167125442391e-06, |
|
"loss": 0.9764, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 1.8285489579237123e-06, |
|
"loss": 0.9756, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 1.6319307904050335e-06, |
|
"loss": 0.9733, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 1.4353126228863549e-06, |
|
"loss": 0.9726, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 1.238694455367676e-06, |
|
"loss": 0.9783, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 1.0420762878489972e-06, |
|
"loss": 0.9786, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 8.454581203303186e-07, |
|
"loss": 0.9802, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 6.488399528116398e-07, |
|
"loss": 0.979, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 4.522217852929611e-07, |
|
"loss": 0.978, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 2.556036177742823e-07, |
|
"loss": 0.9718, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 5.898545025560362e-08, |
|
"loss": 0.9811, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 127150, |
|
"total_flos": 5.807966088205763e+17, |
|
"train_loss": 1.219492632603017, |
|
"train_runtime": 160361.9384, |
|
"train_samples_per_second": 38.059, |
|
"train_steps_per_second": 0.793 |
|
} |
|
], |
|
"max_steps": 127150, |
|
"num_train_epochs": 10, |
|
"total_flos": 5.807966088205763e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|