|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.1529425258791255, |
|
"eval_steps": 1000000, |
|
"global_step": 225243, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 9.9239, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 9.1673, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.5e-06, |
|
"loss": 8.7549, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 8.2851, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.5e-06, |
|
"loss": 7.7885, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3e-06, |
|
"loss": 7.4014, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.5e-06, |
|
"loss": 7.1544, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 7.0253, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.5e-06, |
|
"loss": 6.9305, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5e-06, |
|
"loss": 6.8678, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 6.814, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6e-06, |
|
"loss": 6.7725, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 6.734, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7e-06, |
|
"loss": 6.6882, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 6.6641, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 6.6342, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.5e-06, |
|
"loss": 6.6129, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9e-06, |
|
"loss": 6.5907, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.5e-06, |
|
"loss": 6.5534, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1e-05, |
|
"loss": 6.5351, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.999521627232806e-06, |
|
"loss": 6.5133, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.99904325446561e-06, |
|
"loss": 6.4921, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.998564881698415e-06, |
|
"loss": 6.472, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.99808650893122e-06, |
|
"loss": 6.4614, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.997608136164025e-06, |
|
"loss": 6.4463, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.99712976339683e-06, |
|
"loss": 6.4302, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.996651390629635e-06, |
|
"loss": 6.4184, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.99617301786244e-06, |
|
"loss": 6.4077, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.995694645095244e-06, |
|
"loss": 6.3951, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.99521627232805e-06, |
|
"loss": 6.3849, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.994737899560854e-06, |
|
"loss": 6.3742, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.994259526793659e-06, |
|
"loss": 6.3584, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.993781154026464e-06, |
|
"loss": 6.3541, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.99330278125927e-06, |
|
"loss": 6.338, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.992824408492075e-06, |
|
"loss": 6.3329, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.992346035724878e-06, |
|
"loss": 6.3238, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.991867662957683e-06, |
|
"loss": 6.3103, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.991389290190488e-06, |
|
"loss": 6.3042, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.990910917423293e-06, |
|
"loss": 6.3042, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.9904325446561e-06, |
|
"loss": 6.2976, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.989954171888904e-06, |
|
"loss": 6.2935, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.989475799121709e-06, |
|
"loss": 6.2808, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.988997426354512e-06, |
|
"loss": 6.2811, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.988519053587317e-06, |
|
"loss": 6.275, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.988040680820124e-06, |
|
"loss": 6.2721, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.987562308052929e-06, |
|
"loss": 6.2596, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.987083935285733e-06, |
|
"loss": 6.25, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.986605562518538e-06, |
|
"loss": 6.251, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.986127189751343e-06, |
|
"loss": 6.2465, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.985648816984146e-06, |
|
"loss": 6.2406, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.985170444216953e-06, |
|
"loss": 6.2365, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.984692071449758e-06, |
|
"loss": 6.2353, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.984213698682562e-06, |
|
"loss": 6.2313, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.983735325915367e-06, |
|
"loss": 6.2208, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.983256953148172e-06, |
|
"loss": 6.2279, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.982778580380977e-06, |
|
"loss": 6.2163, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.982300207613782e-06, |
|
"loss": 6.206, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.981821834846587e-06, |
|
"loss": 6.2043, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.981343462079391e-06, |
|
"loss": 6.2042, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.980865089312196e-06, |
|
"loss": 6.2034, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.980386716545001e-06, |
|
"loss": 6.201, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.979908343777806e-06, |
|
"loss": 6.1837, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.979429971010611e-06, |
|
"loss": 6.1866, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.978951598243416e-06, |
|
"loss": 6.1925, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.97847322547622e-06, |
|
"loss": 6.1772, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.977994852709025e-06, |
|
"loss": 6.1805, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.977516479941832e-06, |
|
"loss": 6.1788, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.977038107174635e-06, |
|
"loss": 6.1714, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.97655973440744e-06, |
|
"loss": 6.1686, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.976081361640245e-06, |
|
"loss": 6.1714, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.97560298887305e-06, |
|
"loss": 6.1704, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.975124616105856e-06, |
|
"loss": 6.1579, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.974646243338661e-06, |
|
"loss": 6.1669, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.974167870571466e-06, |
|
"loss": 6.1611, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.973689497804269e-06, |
|
"loss": 6.1548, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.973211125037074e-06, |
|
"loss": 6.1569, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.972732752269879e-06, |
|
"loss": 6.1528, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.972254379502685e-06, |
|
"loss": 6.1402, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.97177600673549e-06, |
|
"loss": 6.1469, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.971297633968295e-06, |
|
"loss": 6.1441, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.9708192612011e-06, |
|
"loss": 6.1391, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.970340888433903e-06, |
|
"loss": 6.1449, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.96986251566671e-06, |
|
"loss": 6.1341, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.969384142899514e-06, |
|
"loss": 6.1357, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.96890577013232e-06, |
|
"loss": 6.1371, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.968427397365124e-06, |
|
"loss": 6.1304, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.967949024597929e-06, |
|
"loss": 6.1308, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.967470651830734e-06, |
|
"loss": 6.1265, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.966992279063539e-06, |
|
"loss": 6.1159, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.966513906296343e-06, |
|
"loss": 6.1206, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.966035533529148e-06, |
|
"loss": 6.1177, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.965557160761953e-06, |
|
"loss": 6.1157, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.965078787994758e-06, |
|
"loss": 6.1145, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.964600415227563e-06, |
|
"loss": 6.106, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.964122042460368e-06, |
|
"loss": 6.1095, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.963643669693172e-06, |
|
"loss": 6.1017, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.963165296925977e-06, |
|
"loss": 6.0923, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.962686924158782e-06, |
|
"loss": 6.0763, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.962208551391587e-06, |
|
"loss": 6.057, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.961730178624392e-06, |
|
"loss": 6.0377, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.961251805857197e-06, |
|
"loss": 6.0221, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.960773433090002e-06, |
|
"loss": 6.0077, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.960295060322806e-06, |
|
"loss": 5.9924, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.959816687555611e-06, |
|
"loss": 5.985, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.959338314788418e-06, |
|
"loss": 5.9684, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.958859942021221e-06, |
|
"loss": 5.9564, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.958381569254026e-06, |
|
"loss": 5.9449, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.95790319648683e-06, |
|
"loss": 5.9338, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.957424823719635e-06, |
|
"loss": 5.9249, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.95694645095244e-06, |
|
"loss": 5.9231, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.956468078185247e-06, |
|
"loss": 5.9075, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.955989705418052e-06, |
|
"loss": 5.9082, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.955511332650855e-06, |
|
"loss": 5.899, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.95503295988366e-06, |
|
"loss": 5.8856, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.954554587116465e-06, |
|
"loss": 5.8841, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.954076214349271e-06, |
|
"loss": 5.8765, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.953597841582076e-06, |
|
"loss": 5.8661, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.95311946881488e-06, |
|
"loss": 5.8583, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.952641096047686e-06, |
|
"loss": 5.8525, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.952162723280489e-06, |
|
"loss": 5.8418, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.951684350513294e-06, |
|
"loss": 5.8411, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.9512059777461e-06, |
|
"loss": 5.8394, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.950727604978905e-06, |
|
"loss": 5.8241, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.95024923221171e-06, |
|
"loss": 5.8221, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.949770859444515e-06, |
|
"loss": 5.8117, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.94929248667732e-06, |
|
"loss": 5.8019, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.948814113910124e-06, |
|
"loss": 5.798, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.94833574114293e-06, |
|
"loss": 5.788, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.947857368375734e-06, |
|
"loss": 5.7772, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.947378995608539e-06, |
|
"loss": 5.7652, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.946900622841344e-06, |
|
"loss": 5.7641, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.946422250074149e-06, |
|
"loss": 5.7417, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.945943877306953e-06, |
|
"loss": 5.7279, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.945465504539758e-06, |
|
"loss": 5.7096, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.944987131772563e-06, |
|
"loss": 5.6962, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.944508759005368e-06, |
|
"loss": 5.6763, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.944030386238173e-06, |
|
"loss": 5.6662, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.943552013470978e-06, |
|
"loss": 5.6343, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.943073640703783e-06, |
|
"loss": 5.5925, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.942595267936587e-06, |
|
"loss": 5.5517, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.942116895169392e-06, |
|
"loss": 5.5147, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.941638522402197e-06, |
|
"loss": 5.4691, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.941160149635002e-06, |
|
"loss": 5.4319, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.940681776867808e-06, |
|
"loss": 5.3928, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.940203404100612e-06, |
|
"loss": 5.3468, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.939725031333416e-06, |
|
"loss": 5.3136, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.939246658566221e-06, |
|
"loss": 5.2761, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.938768285799026e-06, |
|
"loss": 5.2384, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.938289913031833e-06, |
|
"loss": 5.2061, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.937811540264638e-06, |
|
"loss": 5.1755, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.937333167497442e-06, |
|
"loss": 5.1405, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.936854794730246e-06, |
|
"loss": 5.11, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.93637642196305e-06, |
|
"loss": 5.0771, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.935898049195855e-06, |
|
"loss": 5.0511, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.935419676428662e-06, |
|
"loss": 5.0296, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.934941303661467e-06, |
|
"loss": 4.9985, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.934462930894271e-06, |
|
"loss": 4.9759, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.933984558127076e-06, |
|
"loss": 4.942, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.93350618535988e-06, |
|
"loss": 4.9226, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.933027812592686e-06, |
|
"loss": 4.8993, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.932549439825491e-06, |
|
"loss": 4.8738, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.932071067058296e-06, |
|
"loss": 4.8467, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.9315926942911e-06, |
|
"loss": 4.8251, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.931114321523905e-06, |
|
"loss": 4.813, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.93063594875671e-06, |
|
"loss": 4.784, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.930157575989515e-06, |
|
"loss": 4.7694, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.92967920322232e-06, |
|
"loss": 4.7434, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.929200830455125e-06, |
|
"loss": 4.7263, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.92872245768793e-06, |
|
"loss": 4.7086, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.928244084920734e-06, |
|
"loss": 4.6852, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.92776571215354e-06, |
|
"loss": 4.6643, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.927287339386344e-06, |
|
"loss": 4.6441, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.926808966619149e-06, |
|
"loss": 4.624, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.926330593851954e-06, |
|
"loss": 4.5976, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.925852221084759e-06, |
|
"loss": 4.5814, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.925373848317564e-06, |
|
"loss": 4.5606, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.924895475550368e-06, |
|
"loss": 4.5413, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.924417102783173e-06, |
|
"loss": 4.5017, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.923938730015978e-06, |
|
"loss": 4.4623, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.923460357248783e-06, |
|
"loss": 4.4205, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.922981984481588e-06, |
|
"loss": 4.3811, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.922503611714394e-06, |
|
"loss": 4.3612, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.922025238947197e-06, |
|
"loss": 4.3201, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.921546866180002e-06, |
|
"loss": 4.3, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.921068493412807e-06, |
|
"loss": 4.2616, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.920590120645612e-06, |
|
"loss": 4.2337, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.920111747878419e-06, |
|
"loss": 4.2122, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.919633375111223e-06, |
|
"loss": 4.185, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.919155002344028e-06, |
|
"loss": 4.162, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.918676629576831e-06, |
|
"loss": 4.1374, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.918198256809636e-06, |
|
"loss": 4.1102, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.917719884042441e-06, |
|
"loss": 4.0982, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.917241511275248e-06, |
|
"loss": 4.0628, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.916763138508052e-06, |
|
"loss": 4.0464, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.916284765740857e-06, |
|
"loss": 4.0196, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.915806392973662e-06, |
|
"loss": 4.0049, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.915328020206465e-06, |
|
"loss": 3.9919, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.914849647439272e-06, |
|
"loss": 3.9626, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.914371274672077e-06, |
|
"loss": 3.9469, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.913892901904882e-06, |
|
"loss": 3.9354, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.913414529137686e-06, |
|
"loss": 3.9084, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.912936156370491e-06, |
|
"loss": 3.8966, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.912457783603296e-06, |
|
"loss": 3.8735, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.911979410836101e-06, |
|
"loss": 3.8618, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.911501038068906e-06, |
|
"loss": 3.8407, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.91102266530171e-06, |
|
"loss": 3.8246, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.910544292534515e-06, |
|
"loss": 3.8059, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.91006591976732e-06, |
|
"loss": 3.796, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.909587547000125e-06, |
|
"loss": 3.7835, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.90910917423293e-06, |
|
"loss": 3.7657, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.908630801465735e-06, |
|
"loss": 3.7459, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.90815242869854e-06, |
|
"loss": 3.7431, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.907674055931345e-06, |
|
"loss": 3.7274, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.90719568316415e-06, |
|
"loss": 3.7172, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.906717310396954e-06, |
|
"loss": 3.7015, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.906238937629759e-06, |
|
"loss": 3.6916, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.905760564862564e-06, |
|
"loss": 3.6755, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.905282192095369e-06, |
|
"loss": 3.6646, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.904803819328174e-06, |
|
"loss": 3.6548, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.90432544656098e-06, |
|
"loss": 3.6433, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.903847073793785e-06, |
|
"loss": 3.6269, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.903368701026588e-06, |
|
"loss": 3.6216, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.902890328259393e-06, |
|
"loss": 3.6056, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.902411955492198e-06, |
|
"loss": 3.5949, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.901933582725003e-06, |
|
"loss": 3.5931, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.90145520995781e-06, |
|
"loss": 3.5786, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.900976837190614e-06, |
|
"loss": 3.5733, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.900498464423419e-06, |
|
"loss": 3.5628, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.900020091656222e-06, |
|
"loss": 3.5453, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.899541718889027e-06, |
|
"loss": 3.5472, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.899063346121833e-06, |
|
"loss": 3.5334, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.898584973354638e-06, |
|
"loss": 3.5187, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.898106600587443e-06, |
|
"loss": 3.5137, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.897628227820248e-06, |
|
"loss": 3.5029, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.897149855053053e-06, |
|
"loss": 3.4994, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.896671482285856e-06, |
|
"loss": 3.4834, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.896193109518663e-06, |
|
"loss": 3.4748, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.895714736751467e-06, |
|
"loss": 3.4802, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.895236363984272e-06, |
|
"loss": 3.4635, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.894757991217077e-06, |
|
"loss": 3.4572, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.894279618449882e-06, |
|
"loss": 3.4505, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 9.893801245682687e-06, |
|
"loss": 3.4351, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 9.893322872915492e-06, |
|
"loss": 3.4333, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.892844500148296e-06, |
|
"loss": 3.4241, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.892366127381101e-06, |
|
"loss": 3.4255, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 9.891887754613906e-06, |
|
"loss": 3.4108, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 9.891409381846711e-06, |
|
"loss": 3.4036, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 9.890931009079516e-06, |
|
"loss": 3.3975, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 9.89045263631232e-06, |
|
"loss": 3.3904, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 9.889974263545126e-06, |
|
"loss": 3.387, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 9.88949589077793e-06, |
|
"loss": 3.3716, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 9.889017518010735e-06, |
|
"loss": 3.369, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 9.88853914524354e-06, |
|
"loss": 3.3587, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 9.888060772476345e-06, |
|
"loss": 3.3638, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 9.88758239970915e-06, |
|
"loss": 3.3531, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 9.887104026941955e-06, |
|
"loss": 3.3412, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 9.88662565417476e-06, |
|
"loss": 3.3392, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 9.886147281407564e-06, |
|
"loss": 3.327, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 9.88566890864037e-06, |
|
"loss": 3.3197, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 9.885190535873174e-06, |
|
"loss": 3.3237, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 9.884712163105979e-06, |
|
"loss": 3.3081, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 9.884233790338784e-06, |
|
"loss": 3.3071, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 9.883755417571589e-06, |
|
"loss": 3.301, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 9.883277044804395e-06, |
|
"loss": 3.3011, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 9.8827986720372e-06, |
|
"loss": 3.2862, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 9.882320299270005e-06, |
|
"loss": 3.2838, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 9.881841926502808e-06, |
|
"loss": 3.2749, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 9.881363553735613e-06, |
|
"loss": 3.2778, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 9.880885180968418e-06, |
|
"loss": 3.2691, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 9.880406808201224e-06, |
|
"loss": 3.2645, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 9.879928435434029e-06, |
|
"loss": 3.2589, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 9.879450062666834e-06, |
|
"loss": 3.2441, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 9.878971689899639e-06, |
|
"loss": 3.2469, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 9.878493317132442e-06, |
|
"loss": 3.2443, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 9.878014944365248e-06, |
|
"loss": 3.2431, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 9.877536571598053e-06, |
|
"loss": 3.2268, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 9.877058198830858e-06, |
|
"loss": 3.219, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 9.876579826063663e-06, |
|
"loss": 3.2265, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 9.876101453296468e-06, |
|
"loss": 3.2182, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.875623080529273e-06, |
|
"loss": 3.2138, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.875144707762077e-06, |
|
"loss": 3.204, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 9.874666334994882e-06, |
|
"loss": 3.1989, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 9.874187962227687e-06, |
|
"loss": 3.2008, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 9.873709589460492e-06, |
|
"loss": 3.1909, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 9.873231216693297e-06, |
|
"loss": 3.1902, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 9.872752843926102e-06, |
|
"loss": 3.1796, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 9.872274471158907e-06, |
|
"loss": 3.1792, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 9.871796098391711e-06, |
|
"loss": 3.1798, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 9.871317725624516e-06, |
|
"loss": 3.1621, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 9.870839352857321e-06, |
|
"loss": 3.168, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 9.870360980090128e-06, |
|
"loss": 3.1598, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 9.86988260732293e-06, |
|
"loss": 3.155, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 9.869404234555736e-06, |
|
"loss": 3.1539, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 9.86892586178854e-06, |
|
"loss": 3.1468, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 9.868447489021345e-06, |
|
"loss": 3.136, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 9.86796911625415e-06, |
|
"loss": 3.1355, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 9.867490743486957e-06, |
|
"loss": 3.1361, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 9.867012370719761e-06, |
|
"loss": 3.1299, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.866533997952565e-06, |
|
"loss": 3.1259, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.86605562518537e-06, |
|
"loss": 3.1176, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 9.865577252418174e-06, |
|
"loss": 3.126, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 9.865098879650981e-06, |
|
"loss": 3.1132, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 9.864620506883786e-06, |
|
"loss": 3.1072, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 9.86414213411659e-06, |
|
"loss": 3.1036, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 9.863663761349395e-06, |
|
"loss": 3.1007, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 9.863185388582199e-06, |
|
"loss": 3.0971, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 9.862707015815003e-06, |
|
"loss": 3.0893, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 9.86222864304781e-06, |
|
"loss": 3.0835, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 9.861750270280615e-06, |
|
"loss": 3.0827, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 9.86127189751342e-06, |
|
"loss": 3.0792, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9.860793524746224e-06, |
|
"loss": 3.0726, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9.86031515197903e-06, |
|
"loss": 3.0696, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.859836779211834e-06, |
|
"loss": 3.0624, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.859358406444639e-06, |
|
"loss": 3.0654, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.858880033677444e-06, |
|
"loss": 3.0591, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.858401660910249e-06, |
|
"loss": 3.0501, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.857923288143054e-06, |
|
"loss": 3.0534, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.857444915375858e-06, |
|
"loss": 3.0498, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.856966542608663e-06, |
|
"loss": 3.0412, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.856488169841468e-06, |
|
"loss": 3.0357, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.856009797074273e-06, |
|
"loss": 3.0351, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.855531424307078e-06, |
|
"loss": 3.034, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.855053051539883e-06, |
|
"loss": 3.0332, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.854574678772687e-06, |
|
"loss": 3.0269, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.854096306005492e-06, |
|
"loss": 3.0232, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.853617933238297e-06, |
|
"loss": 3.0266, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.853139560471102e-06, |
|
"loss": 3.0164, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.852661187703907e-06, |
|
"loss": 3.0115, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.852182814936712e-06, |
|
"loss": 3.0088, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.851704442169517e-06, |
|
"loss": 3.0049, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.851226069402321e-06, |
|
"loss": 2.9978, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.850747696635126e-06, |
|
"loss": 3.0032, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.850269323867931e-06, |
|
"loss": 2.993, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.849790951100736e-06, |
|
"loss": 2.9939, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.849312578333542e-06, |
|
"loss": 2.9906, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.848834205566347e-06, |
|
"loss": 2.9855, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.84835583279915e-06, |
|
"loss": 2.9814, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.847877460031955e-06, |
|
"loss": 2.9848, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.84739908726476e-06, |
|
"loss": 2.9744, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.846920714497565e-06, |
|
"loss": 2.9682, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.846442341730372e-06, |
|
"loss": 2.9689, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.845963968963176e-06, |
|
"loss": 2.9649, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.845485596195981e-06, |
|
"loss": 2.9616, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.845007223428784e-06, |
|
"loss": 2.9621, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.84452885066159e-06, |
|
"loss": 2.9569, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.844050477894396e-06, |
|
"loss": 2.951, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.8435721051272e-06, |
|
"loss": 2.9469, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.843093732360005e-06, |
|
"loss": 2.9409, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.84261535959281e-06, |
|
"loss": 2.9422, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.842136986825615e-06, |
|
"loss": 2.938, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 9.841658614058418e-06, |
|
"loss": 2.9334, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 9.841180241291225e-06, |
|
"loss": 2.9355, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 9.84070186852403e-06, |
|
"loss": 2.9299, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 9.840223495756835e-06, |
|
"loss": 2.9272, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 9.83974512298964e-06, |
|
"loss": 2.919, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 9.839266750222444e-06, |
|
"loss": 2.928, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 9.838788377455249e-06, |
|
"loss": 2.9123, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 9.838310004688054e-06, |
|
"loss": 2.9111, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 9.837831631920859e-06, |
|
"loss": 2.912, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 9.837353259153664e-06, |
|
"loss": 2.906, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.836874886386468e-06, |
|
"loss": 2.9096, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.836396513619273e-06, |
|
"loss": 2.9066, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.835918140852078e-06, |
|
"loss": 2.9049, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 9.835439768084883e-06, |
|
"loss": 2.9004, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 9.834961395317688e-06, |
|
"loss": 2.8895, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 9.834483022550493e-06, |
|
"loss": 2.8974, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 9.834004649783298e-06, |
|
"loss": 2.8893, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 9.833526277016104e-06, |
|
"loss": 2.8864, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 9.833047904248907e-06, |
|
"loss": 2.8876, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 9.832569531481712e-06, |
|
"loss": 2.8786, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 9.832091158714517e-06, |
|
"loss": 2.8743, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 9.831612785947322e-06, |
|
"loss": 2.8711, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 9.831134413180127e-06, |
|
"loss": 2.8704, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 9.830656040412933e-06, |
|
"loss": 2.8686, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 9.830177667645738e-06, |
|
"loss": 2.8641, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9.829699294878541e-06, |
|
"loss": 2.8697, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9.829220922111346e-06, |
|
"loss": 2.8572, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 9.828742549344151e-06, |
|
"loss": 2.8619, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 9.828264176576957e-06, |
|
"loss": 2.8614, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 9.827785803809762e-06, |
|
"loss": 2.855, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 9.827307431042567e-06, |
|
"loss": 2.8589, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 9.826829058275372e-06, |
|
"loss": 2.8497, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 9.826350685508175e-06, |
|
"loss": 2.8471, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.825872312740982e-06, |
|
"loss": 2.8485, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.825393939973786e-06, |
|
"loss": 2.8452, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.824915567206591e-06, |
|
"loss": 2.8429, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 9.824437194439396e-06, |
|
"loss": 2.84, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 9.823958821672201e-06, |
|
"loss": 2.8344, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 9.823480448905006e-06, |
|
"loss": 2.8337, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 9.82300207613781e-06, |
|
"loss": 2.8341, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 9.822523703370616e-06, |
|
"loss": 2.8372, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 9.82204533060342e-06, |
|
"loss": 2.8268, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 9.821566957836225e-06, |
|
"loss": 2.8248, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 9.82108858506903e-06, |
|
"loss": 2.8233, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 9.820610212301835e-06, |
|
"loss": 2.8141, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 9.82013183953464e-06, |
|
"loss": 2.818, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.819653466767445e-06, |
|
"loss": 2.8156, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.81917509400025e-06, |
|
"loss": 2.8165, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 9.818696721233054e-06, |
|
"loss": 2.8092, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 9.81821834846586e-06, |
|
"loss": 2.803, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 9.817739975698664e-06, |
|
"loss": 2.8103, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 9.817261602931469e-06, |
|
"loss": 2.8056, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 9.816783230164274e-06, |
|
"loss": 2.8062, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 9.816304857397079e-06, |
|
"loss": 2.8035, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 9.815826484629883e-06, |
|
"loss": 2.8014, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 9.81534811186269e-06, |
|
"loss": 2.7975, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 9.814869739095493e-06, |
|
"loss": 2.7922, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 9.814391366328298e-06, |
|
"loss": 2.7891, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 9.813912993561103e-06, |
|
"loss": 2.7872, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.813434620793908e-06, |
|
"loss": 2.7839, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.812956248026712e-06, |
|
"loss": 2.7831, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 9.812477875259519e-06, |
|
"loss": 2.7802, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 9.811999502492324e-06, |
|
"loss": 2.7764, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 9.811521129725127e-06, |
|
"loss": 2.774, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 9.811042756957932e-06, |
|
"loss": 2.7793, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 9.810564384190737e-06, |
|
"loss": 2.7738, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 9.810086011423543e-06, |
|
"loss": 2.7756, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.809607638656348e-06, |
|
"loss": 2.7736, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.809129265889153e-06, |
|
"loss": 2.7643, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.808650893121958e-06, |
|
"loss": 2.7643, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.808172520354761e-06, |
|
"loss": 2.7615, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.807694147587566e-06, |
|
"loss": 2.7653, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.807215774820372e-06, |
|
"loss": 2.757, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.806737402053177e-06, |
|
"loss": 2.7607, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.806259029285982e-06, |
|
"loss": 2.7567, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.805780656518787e-06, |
|
"loss": 2.7563, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.805302283751592e-06, |
|
"loss": 2.7533, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.804823910984397e-06, |
|
"loss": 2.7443, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.804345538217201e-06, |
|
"loss": 2.7415, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.803867165450006e-06, |
|
"loss": 2.7493, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.803388792682811e-06, |
|
"loss": 2.741, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.802910419915616e-06, |
|
"loss": 2.7392, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.80243204714842e-06, |
|
"loss": 2.734, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.801953674381226e-06, |
|
"loss": 2.7402, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.80147530161403e-06, |
|
"loss": 2.7282, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.800996928846835e-06, |
|
"loss": 2.7278, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.80051855607964e-06, |
|
"loss": 2.7316, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.800040183312445e-06, |
|
"loss": 2.7258, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.79956181054525e-06, |
|
"loss": 2.7267, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.799083437778055e-06, |
|
"loss": 2.728, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.79860506501086e-06, |
|
"loss": 2.7308, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.798126692243664e-06, |
|
"loss": 2.7204, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.79764831947647e-06, |
|
"loss": 2.7143, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.797169946709274e-06, |
|
"loss": 2.7151, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.79669157394208e-06, |
|
"loss": 2.7091, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.796213201174884e-06, |
|
"loss": 2.7201, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.795734828407689e-06, |
|
"loss": 2.7186, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.795256455640493e-06, |
|
"loss": 2.7121, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.794778082873298e-06, |
|
"loss": 2.7096, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.794299710106105e-06, |
|
"loss": 2.7105, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"step": 225243, |
|
"total_flos": 3.118398678566437e+18, |
|
"train_loss": 4.290504575642074, |
|
"train_runtime": 197999.5648, |
|
"train_samples_per_second": 2958.975, |
|
"train_steps_per_second": 52.839 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10462100, |
|
"num_train_epochs": 100, |
|
"save_steps": 1000000, |
|
"total_flos": 3.118398678566437e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|