|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.346941507161024, |
|
"global_step": 9320, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.989377682403434e-06, |
|
"loss": 1.5134, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.978755364806867e-06, |
|
"loss": 1.518, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.968133047210302e-06, |
|
"loss": 1.5192, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.957510729613735e-06, |
|
"loss": 1.5191, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.946888412017168e-06, |
|
"loss": 1.5168, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.936266094420601e-06, |
|
"loss": 1.5148, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.925643776824036e-06, |
|
"loss": 1.518, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.915021459227469e-06, |
|
"loss": 1.5194, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.904399141630902e-06, |
|
"loss": 1.5159, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.893776824034335e-06, |
|
"loss": 1.5157, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.88315450643777e-06, |
|
"loss": 1.5205, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.872532188841201e-06, |
|
"loss": 1.5164, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.861909871244636e-06, |
|
"loss": 1.5171, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.85128755364807e-06, |
|
"loss": 1.5206, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.840665236051502e-06, |
|
"loss": 1.516, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.830042918454937e-06, |
|
"loss": 1.5183, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.4158966541290283, |
|
"eval_runtime": 75.2493, |
|
"eval_samples_per_second": 531.566, |
|
"eval_steps_per_second": 8.306, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.81942060085837e-06, |
|
"loss": 1.521, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.808798283261803e-06, |
|
"loss": 1.5193, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.798175965665236e-06, |
|
"loss": 1.52, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.787553648068671e-06, |
|
"loss": 1.5226, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.776931330472104e-06, |
|
"loss": 1.5168, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.766309012875537e-06, |
|
"loss": 1.5187, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.75568669527897e-06, |
|
"loss": 1.5169, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.745064377682405e-06, |
|
"loss": 1.5178, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.734442060085837e-06, |
|
"loss": 1.5172, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.723819742489271e-06, |
|
"loss": 1.518, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.713197424892705e-06, |
|
"loss": 1.5193, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.702575107296138e-06, |
|
"loss": 1.5177, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.69195278969957e-06, |
|
"loss": 1.5213, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.681330472103006e-06, |
|
"loss": 1.5195, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.670708154506439e-06, |
|
"loss": 1.5245, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.660085836909872e-06, |
|
"loss": 1.5188, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.4158122539520264, |
|
"eval_runtime": 75.1458, |
|
"eval_samples_per_second": 532.298, |
|
"eval_steps_per_second": 8.317, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.649463519313305e-06, |
|
"loss": 1.5178, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.63884120171674e-06, |
|
"loss": 1.5199, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.628218884120173e-06, |
|
"loss": 1.5212, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.617596566523606e-06, |
|
"loss": 1.5176, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.60697424892704e-06, |
|
"loss": 1.5199, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.596351931330472e-06, |
|
"loss": 1.5209, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.585729613733907e-06, |
|
"loss": 1.5191, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.57510729613734e-06, |
|
"loss": 1.5182, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.564484978540773e-06, |
|
"loss": 1.5212, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.553862660944206e-06, |
|
"loss": 1.5203, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.54324034334764e-06, |
|
"loss": 1.52, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.532618025751074e-06, |
|
"loss": 1.5187, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.521995708154507e-06, |
|
"loss": 1.5214, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.51137339055794e-06, |
|
"loss": 1.5199, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.500751072961375e-06, |
|
"loss": 1.6614, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.490128755364808e-06, |
|
"loss": 1.5205, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 1.4153233766555786, |
|
"eval_runtime": 75.1691, |
|
"eval_samples_per_second": 532.133, |
|
"eval_steps_per_second": 8.315, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.479506437768241e-06, |
|
"loss": 1.5163, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.468884120171674e-06, |
|
"loss": 1.5208, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.458261802575107e-06, |
|
"loss": 1.5188, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.447639484978542e-06, |
|
"loss": 1.5187, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.437017167381975e-06, |
|
"loss": 1.518, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 9.426394849785408e-06, |
|
"loss": 1.5191, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 9.415772532188841e-06, |
|
"loss": 1.5184, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 9.405150214592276e-06, |
|
"loss": 1.518, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 9.39452789699571e-06, |
|
"loss": 1.5166, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 9.383905579399142e-06, |
|
"loss": 1.5191, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 9.373283261802575e-06, |
|
"loss": 1.5176, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 9.36266094420601e-06, |
|
"loss": 1.5216, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 9.352038626609442e-06, |
|
"loss": 1.5194, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 9.341416309012876e-06, |
|
"loss": 1.5153, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 9.33079399141631e-06, |
|
"loss": 1.52, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 9.320171673819743e-06, |
|
"loss": 1.5213, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 1.4161808490753174, |
|
"eval_runtime": 75.313, |
|
"eval_samples_per_second": 531.117, |
|
"eval_steps_per_second": 8.299, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 9.309549356223177e-06, |
|
"loss": 1.5178, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 9.29892703862661e-06, |
|
"loss": 1.5195, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 9.288304721030044e-06, |
|
"loss": 1.5157, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 9.277682403433477e-06, |
|
"loss": 1.5179, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 9.267060085836911e-06, |
|
"loss": 1.5172, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.256437768240345e-06, |
|
"loss": 1.5173, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.245815450643778e-06, |
|
"loss": 1.5156, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.23519313304721e-06, |
|
"loss": 1.5144, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.224570815450646e-06, |
|
"loss": 1.5199, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.213948497854079e-06, |
|
"loss": 1.5204, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.203326180257512e-06, |
|
"loss": 1.516, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.192703862660945e-06, |
|
"loss": 1.5206, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.18208154506438e-06, |
|
"loss": 1.5188, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.171459227467811e-06, |
|
"loss": 1.5211, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 9.160836909871246e-06, |
|
"loss": 1.5184, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 9.150214592274679e-06, |
|
"loss": 1.5195, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 1.4168281555175781, |
|
"eval_runtime": 75.9377, |
|
"eval_samples_per_second": 526.747, |
|
"eval_steps_per_second": 8.23, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 9.139592274678112e-06, |
|
"loss": 1.5188, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 9.128969957081545e-06, |
|
"loss": 1.5167, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 9.11834763948498e-06, |
|
"loss": 1.5207, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9.107725321888413e-06, |
|
"loss": 1.52, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 9.097103004291846e-06, |
|
"loss": 1.5186, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.08648068669528e-06, |
|
"loss": 1.5206, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 9.075858369098712e-06, |
|
"loss": 1.5193, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 9.065236051502147e-06, |
|
"loss": 1.5194, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 9.05461373390558e-06, |
|
"loss": 1.522, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 9.043991416309015e-06, |
|
"loss": 1.5169, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 9.033369098712446e-06, |
|
"loss": 1.5164, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 9.022746781115881e-06, |
|
"loss": 1.5207, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 9.012124463519314e-06, |
|
"loss": 1.5193, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.001502145922747e-06, |
|
"loss": 1.6613, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 8.99087982832618e-06, |
|
"loss": 1.518, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 8.980257510729615e-06, |
|
"loss": 1.5194, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_loss": 1.4150291681289673, |
|
"eval_runtime": 75.9845, |
|
"eval_samples_per_second": 526.423, |
|
"eval_steps_per_second": 8.225, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 8.969635193133048e-06, |
|
"loss": 1.5201, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 8.959012875536481e-06, |
|
"loss": 1.5199, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.948390557939914e-06, |
|
"loss": 1.5185, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.93776824034335e-06, |
|
"loss": 1.5191, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.927145922746782e-06, |
|
"loss": 1.5187, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.916523605150215e-06, |
|
"loss": 1.5172, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.905901287553649e-06, |
|
"loss": 1.5166, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.895278969957082e-06, |
|
"loss": 1.5216, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.884656652360516e-06, |
|
"loss": 1.5186, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.87403433476395e-06, |
|
"loss": 1.5182, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 8.863412017167383e-06, |
|
"loss": 1.5194, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 8.852789699570816e-06, |
|
"loss": 1.515, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 8.84216738197425e-06, |
|
"loss": 1.5183, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 8.831545064377682e-06, |
|
"loss": 1.5166, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 8.820922746781117e-06, |
|
"loss": 1.5186, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 8.81030042918455e-06, |
|
"loss": 1.5182, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.4141547679901123, |
|
"eval_runtime": 75.9015, |
|
"eval_samples_per_second": 526.999, |
|
"eval_steps_per_second": 8.234, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 8.799678111587985e-06, |
|
"loss": 1.5164, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 8.789055793991418e-06, |
|
"loss": 1.5165, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.77843347639485e-06, |
|
"loss": 1.5216, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.767811158798284e-06, |
|
"loss": 1.5161, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.757188841201717e-06, |
|
"loss": 1.5189, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.746566523605152e-06, |
|
"loss": 1.5192, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.735944206008585e-06, |
|
"loss": 1.515, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 8.725321888412018e-06, |
|
"loss": 1.5185, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 8.714699570815451e-06, |
|
"loss": 1.5156, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 8.704077253218886e-06, |
|
"loss": 1.5191, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 8.693454935622319e-06, |
|
"loss": 1.5164, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 8.682832618025752e-06, |
|
"loss": 1.5216, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 8.672210300429185e-06, |
|
"loss": 1.5187, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 8.66158798283262e-06, |
|
"loss": 1.5172, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 8.650965665236051e-06, |
|
"loss": 1.5202, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.640343347639486e-06, |
|
"loss": 1.5182, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 1.4131401777267456, |
|
"eval_runtime": 76.5921, |
|
"eval_samples_per_second": 522.247, |
|
"eval_steps_per_second": 8.16, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 8.629721030042919e-06, |
|
"loss": 1.5177, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 8.619098712446352e-06, |
|
"loss": 1.5204, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 8.608476394849785e-06, |
|
"loss": 1.5183, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 8.59785407725322e-06, |
|
"loss": 1.52, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 8.587231759656653e-06, |
|
"loss": 1.5214, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.576609442060086e-06, |
|
"loss": 1.5193, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 8.565987124463521e-06, |
|
"loss": 1.5205, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.555364806866953e-06, |
|
"loss": 1.5196, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 8.544742489270387e-06, |
|
"loss": 1.5187, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 8.53412017167382e-06, |
|
"loss": 1.5198, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 8.523497854077255e-06, |
|
"loss": 1.5212, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.512875536480687e-06, |
|
"loss": 1.6612, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 8.502253218884121e-06, |
|
"loss": 1.519, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 8.491630901287554e-06, |
|
"loss": 1.5206, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 8.48100858369099e-06, |
|
"loss": 1.5206, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 8.47038626609442e-06, |
|
"loss": 1.5177, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"eval_loss": 1.4166858196258545, |
|
"eval_runtime": 77.5531, |
|
"eval_samples_per_second": 515.776, |
|
"eval_steps_per_second": 8.059, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 8.459763948497855e-06, |
|
"loss": 1.5197, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 8.449141630901289e-06, |
|
"loss": 1.5183, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.438519313304722e-06, |
|
"loss": 1.5188, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 8.427896995708155e-06, |
|
"loss": 1.5197, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 8.41727467811159e-06, |
|
"loss": 1.5174, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 8.406652360515023e-06, |
|
"loss": 1.5163, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 8.396030042918456e-06, |
|
"loss": 1.5189, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 8.385407725321889e-06, |
|
"loss": 1.521, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 8.374785407725322e-06, |
|
"loss": 1.5168, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 8.364163090128757e-06, |
|
"loss": 1.5162, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 8.35354077253219e-06, |
|
"loss": 1.5172, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 8.342918454935623e-06, |
|
"loss": 1.5211, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 8.332296137339056e-06, |
|
"loss": 1.5183, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 8.32167381974249e-06, |
|
"loss": 1.5188, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 8.311051502145922e-06, |
|
"loss": 1.5199, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 8.300429184549357e-06, |
|
"loss": 1.5201, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_loss": 1.415603756904602, |
|
"eval_runtime": 78.5248, |
|
"eval_samples_per_second": 509.393, |
|
"eval_steps_per_second": 7.959, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 8.28980686695279e-06, |
|
"loss": 1.5193, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 8.279184549356225e-06, |
|
"loss": 1.5183, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 8.268562231759658e-06, |
|
"loss": 1.515, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 8.257939914163091e-06, |
|
"loss": 1.5145, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 8.247317596566524e-06, |
|
"loss": 1.5166, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 8.236695278969959e-06, |
|
"loss": 1.5163, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 8.226072961373392e-06, |
|
"loss": 1.5206, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 8.215450643776825e-06, |
|
"loss": 1.5164, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 8.204828326180258e-06, |
|
"loss": 1.5185, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 8.194206008583691e-06, |
|
"loss": 1.5182, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 8.183583690987126e-06, |
|
"loss": 1.5219, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 8.17296137339056e-06, |
|
"loss": 1.519, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 8.162339055793992e-06, |
|
"loss": 1.5169, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 8.151716738197425e-06, |
|
"loss": 1.5203, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 8.14109442060086e-06, |
|
"loss": 1.5184, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 8.130472103004292e-06, |
|
"loss": 1.5173, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 1.4110851287841797, |
|
"eval_runtime": 79.1992, |
|
"eval_samples_per_second": 505.055, |
|
"eval_steps_per_second": 7.891, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 8.119849785407726e-06, |
|
"loss": 1.5198, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 8.10922746781116e-06, |
|
"loss": 1.5187, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 8.098605150214593e-06, |
|
"loss": 1.5205, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 8.087982832618026e-06, |
|
"loss": 1.5195, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 8.07736051502146e-06, |
|
"loss": 1.5187, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 8.066738197424893e-06, |
|
"loss": 1.5187, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 8.056115879828327e-06, |
|
"loss": 1.5196, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 8.045493562231761e-06, |
|
"loss": 1.5214, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 8.034871244635194e-06, |
|
"loss": 1.5195, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 8.024248927038628e-06, |
|
"loss": 1.5206, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 8.01362660944206e-06, |
|
"loss": 1.66, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 8.003004291845495e-06, |
|
"loss": 1.5193, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 7.992381974248929e-06, |
|
"loss": 1.5179, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 7.981759656652362e-06, |
|
"loss": 1.517, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 7.971137339055795e-06, |
|
"loss": 1.5177, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 7.96051502145923e-06, |
|
"loss": 1.52, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_loss": 1.4117424488067627, |
|
"eval_runtime": 79.9929, |
|
"eval_samples_per_second": 500.044, |
|
"eval_steps_per_second": 7.813, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 7.949892703862661e-06, |
|
"loss": 1.5204, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 7.939270386266096e-06, |
|
"loss": 1.5208, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 7.928648068669529e-06, |
|
"loss": 1.5167, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 7.918025751072962e-06, |
|
"loss": 1.5161, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 7.907403433476395e-06, |
|
"loss": 1.5168, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 7.89678111587983e-06, |
|
"loss": 1.5211, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 7.886158798283263e-06, |
|
"loss": 1.5203, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 7.875536480686696e-06, |
|
"loss": 1.5189, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 7.864914163090129e-06, |
|
"loss": 1.518, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 7.854291845493562e-06, |
|
"loss": 1.5172, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 7.843669527896997e-06, |
|
"loss": 1.5191, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 7.83304721030043e-06, |
|
"loss": 1.5206, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 7.822424892703863e-06, |
|
"loss": 1.5184, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 7.811802575107296e-06, |
|
"loss": 1.5221, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 7.801180257510731e-06, |
|
"loss": 1.5175, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 7.790557939914162e-06, |
|
"loss": 1.5184, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"eval_loss": 1.4151064157485962, |
|
"eval_runtime": 81.878, |
|
"eval_samples_per_second": 488.532, |
|
"eval_steps_per_second": 7.633, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 7.779935622317597e-06, |
|
"loss": 1.5166, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 7.76931330472103e-06, |
|
"loss": 1.5172, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 7.758690987124465e-06, |
|
"loss": 1.5164, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 7.748068669527898e-06, |
|
"loss": 1.5145, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 7.737446351931331e-06, |
|
"loss": 1.5202, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 7.726824034334764e-06, |
|
"loss": 1.517, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 7.7162017167382e-06, |
|
"loss": 1.5185, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 7.70557939914163e-06, |
|
"loss": 1.5155, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 7.694957081545065e-06, |
|
"loss": 1.5193, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 7.684334763948498e-06, |
|
"loss": 1.5214, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 7.673712446351932e-06, |
|
"loss": 1.5178, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 7.663090128755365e-06, |
|
"loss": 1.518, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 7.6524678111588e-06, |
|
"loss": 1.5197, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 7.641845493562233e-06, |
|
"loss": 1.5167, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 7.631223175965666e-06, |
|
"loss": 1.5202, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 7.620600858369098e-06, |
|
"loss": 1.5198, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_loss": 1.4096823930740356, |
|
"eval_runtime": 80.6519, |
|
"eval_samples_per_second": 495.959, |
|
"eval_steps_per_second": 7.749, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 7.609978540772531e-06, |
|
"loss": 1.5202, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 7.599356223175966e-06, |
|
"loss": 1.5164, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 7.5887339055794e-06, |
|
"loss": 1.5208, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 7.578111587982832e-06, |
|
"loss": 1.5176, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 7.567489270386266e-06, |
|
"loss": 1.5201, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 7.5568669527897e-06, |
|
"loss": 1.5181, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 7.546244635193132e-06, |
|
"loss": 1.519, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 7.535622317596566e-06, |
|
"loss": 1.5198, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 7.525e-06, |
|
"loss": 1.5171, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 7.514377682403433e-06, |
|
"loss": 1.6592, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 7.503755364806867e-06, |
|
"loss": 1.5173, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 7.4931330472103e-06, |
|
"loss": 1.5182, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 7.482510729613735e-06, |
|
"loss": 1.5192, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 7.471888412017167e-06, |
|
"loss": 1.5198, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 7.4612660944206e-06, |
|
"loss": 1.5163, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 7.450643776824033e-06, |
|
"loss": 1.5202, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_loss": 1.4161678552627563, |
|
"eval_runtime": 75.8006, |
|
"eval_samples_per_second": 527.7, |
|
"eval_steps_per_second": 8.245, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 7.440021459227468e-06, |
|
"loss": 1.5173, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 7.429399141630901e-06, |
|
"loss": 1.5165, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 7.418776824034335e-06, |
|
"loss": 1.5169, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 7.408154506437768e-06, |
|
"loss": 1.5196, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 7.397532188841201e-06, |
|
"loss": 1.5176, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 7.386909871244635e-06, |
|
"loss": 1.5205, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 7.376287553648069e-06, |
|
"loss": 1.518, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 7.3656652360515015e-06, |
|
"loss": 1.5171, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 7.355042918454935e-06, |
|
"loss": 1.5211, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 7.344420600858369e-06, |
|
"loss": 1.5201, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 7.333798283261802e-06, |
|
"loss": 1.5177, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 7.3231759656652355e-06, |
|
"loss": 1.5194, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 7.3125536480686695e-06, |
|
"loss": 1.5175, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 7.3019313304721026e-06, |
|
"loss": 1.5177, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 7.291309012875537e-06, |
|
"loss": 1.5184, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 7.2806866952789704e-06, |
|
"loss": 1.5166, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_loss": 1.4130014181137085, |
|
"eval_runtime": 75.8444, |
|
"eval_samples_per_second": 527.396, |
|
"eval_steps_per_second": 8.241, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 7.2700643776824035e-06, |
|
"loss": 1.5158, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 7.2594420600858375e-06, |
|
"loss": 1.5163, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 7.2488197424892706e-06, |
|
"loss": 1.5173, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 7.238197424892704e-06, |
|
"loss": 1.5206, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 7.227575107296138e-06, |
|
"loss": 1.515, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 7.216952789699571e-06, |
|
"loss": 1.5196, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 7.206330472103005e-06, |
|
"loss": 1.5179, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 7.195708154506439e-06, |
|
"loss": 1.5226, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 7.185085836909871e-06, |
|
"loss": 1.5206, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 7.174463519313305e-06, |
|
"loss": 1.5173, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 7.163841201716739e-06, |
|
"loss": 1.5169, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 7.153218884120171e-06, |
|
"loss": 1.5199, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 7.142596566523605e-06, |
|
"loss": 1.5179, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 7.13197424892704e-06, |
|
"loss": 1.5202, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 7.121351931330473e-06, |
|
"loss": 1.5202, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 7.110729613733906e-06, |
|
"loss": 1.5184, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"eval_loss": 1.4138652086257935, |
|
"eval_runtime": 75.865, |
|
"eval_samples_per_second": 527.252, |
|
"eval_steps_per_second": 8.238, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 7.100107296137338e-06, |
|
"loss": 1.5209, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 7.089484978540771e-06, |
|
"loss": 1.5183, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 7.078862660944206e-06, |
|
"loss": 1.5196, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 7.06824034334764e-06, |
|
"loss": 1.5169, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 7.057618025751072e-06, |
|
"loss": 1.5194, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 7.046995708154507e-06, |
|
"loss": 1.5211, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 7.03637339055794e-06, |
|
"loss": 1.5191, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 7.025751072961372e-06, |
|
"loss": 1.6597, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 7.015128755364807e-06, |
|
"loss": 1.5205, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 7.00450643776824e-06, |
|
"loss": 1.5186, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 6.993884120171674e-06, |
|
"loss": 1.517, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 6.983261802575107e-06, |
|
"loss": 1.5152, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 6.97263948497854e-06, |
|
"loss": 1.5173, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 6.962017167381975e-06, |
|
"loss": 1.5206, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 6.951394849785407e-06, |
|
"loss": 1.5178, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 6.9407725321888405e-06, |
|
"loss": 1.5174, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"eval_loss": 1.4127614498138428, |
|
"eval_runtime": 75.8874, |
|
"eval_samples_per_second": 527.097, |
|
"eval_steps_per_second": 8.236, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 6.930150214592274e-06, |
|
"loss": 1.5191, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 6.919527896995708e-06, |
|
"loss": 1.5183, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 6.9089055793991415e-06, |
|
"loss": 1.5223, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 6.898283261802575e-06, |
|
"loss": 1.5183, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 6.8876609442060085e-06, |
|
"loss": 1.5192, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 6.877038626609442e-06, |
|
"loss": 1.5175, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 6.8664163090128755e-06, |
|
"loss": 1.5176, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 6.8557939914163095e-06, |
|
"loss": 1.5202, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 6.845171673819742e-06, |
|
"loss": 1.5187, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 6.834549356223176e-06, |
|
"loss": 1.5184, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 6.82392703862661e-06, |
|
"loss": 1.5202, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 6.813304721030042e-06, |
|
"loss": 1.5158, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 6.8026824034334775e-06, |
|
"loss": 1.5215, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 6.79206008583691e-06, |
|
"loss": 1.5158, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 6.781437768240343e-06, |
|
"loss": 1.5161, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 6.770815450643778e-06, |
|
"loss": 1.5161, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_loss": 1.412627100944519, |
|
"eval_runtime": 75.8175, |
|
"eval_samples_per_second": 527.583, |
|
"eval_steps_per_second": 8.243, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 6.760193133047211e-06, |
|
"loss": 1.5151, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 6.749570815450644e-06, |
|
"loss": 1.5175, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 6.738948497854078e-06, |
|
"loss": 1.5197, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 6.728326180257511e-06, |
|
"loss": 1.5172, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 6.717703862660944e-06, |
|
"loss": 1.5193, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 6.707081545064378e-06, |
|
"loss": 1.5191, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 6.696459227467811e-06, |
|
"loss": 1.5202, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 6.685836909871245e-06, |
|
"loss": 1.5184, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 6.675214592274679e-06, |
|
"loss": 1.5173, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 6.664592274678111e-06, |
|
"loss": 1.5202, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 6.653969957081545e-06, |
|
"loss": 1.517, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 6.643347639484979e-06, |
|
"loss": 1.5171, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 6.632725321888411e-06, |
|
"loss": 1.52, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 6.622103004291845e-06, |
|
"loss": 1.5191, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 6.61148068669528e-06, |
|
"loss": 1.5169, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 6.600858369098713e-06, |
|
"loss": 1.5175, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"eval_loss": 1.4094613790512085, |
|
"eval_runtime": 75.8182, |
|
"eval_samples_per_second": 527.578, |
|
"eval_steps_per_second": 8.243, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 6.590236051502146e-06, |
|
"loss": 1.5178, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 6.579613733905578e-06, |
|
"loss": 1.5206, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 6.5689914163090115e-06, |
|
"loss": 1.519, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 6.558369098712447e-06, |
|
"loss": 1.5162, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 6.54774678111588e-06, |
|
"loss": 1.5203, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 6.5371244635193125e-06, |
|
"loss": 1.5193, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 6.526502145922747e-06, |
|
"loss": 1.6616, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 6.51587982832618e-06, |
|
"loss": 1.5154, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 6.505257510729613e-06, |
|
"loss": 1.5169, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 6.494635193133047e-06, |
|
"loss": 1.5204, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 6.4840128755364805e-06, |
|
"loss": 1.5175, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 6.4733905579399144e-06, |
|
"loss": 1.5195, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 6.4627682403433475e-06, |
|
"loss": 1.5165, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 6.452145922746781e-06, |
|
"loss": 1.5161, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 6.441523605150215e-06, |
|
"loss": 1.5182, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 6.430901287553648e-06, |
|
"loss": 1.5169, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 1.4117693901062012, |
|
"eval_runtime": 75.551, |
|
"eval_samples_per_second": 529.443, |
|
"eval_steps_per_second": 8.273, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 6.420278969957081e-06, |
|
"loss": 1.5192, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 6.409656652360514e-06, |
|
"loss": 1.5188, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 6.399034334763949e-06, |
|
"loss": 1.5219, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 6.388412017167382e-06, |
|
"loss": 1.5202, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 6.377789699570816e-06, |
|
"loss": 1.5176, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 6.367167381974249e-06, |
|
"loss": 1.5178, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 6.356545064377682e-06, |
|
"loss": 1.5185, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 6.345922746781116e-06, |
|
"loss": 1.52, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 6.33530042918455e-06, |
|
"loss": 1.5186, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 6.324678111587982e-06, |
|
"loss": 1.5197, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 6.314055793991417e-06, |
|
"loss": 1.5171, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 6.30343347639485e-06, |
|
"loss": 1.5193, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 6.292811158798282e-06, |
|
"loss": 1.5145, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 6.282188841201718e-06, |
|
"loss": 1.5151, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 6.27156652360515e-06, |
|
"loss": 1.5172, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 6.260944206008583e-06, |
|
"loss": 1.516, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 1.411309003829956, |
|
"eval_runtime": 75.5679, |
|
"eval_samples_per_second": 529.326, |
|
"eval_steps_per_second": 8.271, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 6.250321888412018e-06, |
|
"loss": 1.5177, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 6.239699570815451e-06, |
|
"loss": 1.5179, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 6.229077253218884e-06, |
|
"loss": 1.5194, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 6.218454935622318e-06, |
|
"loss": 1.5171, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 6.207832618025751e-06, |
|
"loss": 1.5225, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 6.197210300429184e-06, |
|
"loss": 1.5184, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 6.186587982832618e-06, |
|
"loss": 1.5148, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 6.175965665236051e-06, |
|
"loss": 1.5159, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 6.165343347639485e-06, |
|
"loss": 1.5202, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 6.154721030042919e-06, |
|
"loss": 1.5146, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 6.144098712446351e-06, |
|
"loss": 1.5195, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 6.133476394849785e-06, |
|
"loss": 1.5179, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 6.122854077253219e-06, |
|
"loss": 1.5217, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 6.1122317596566515e-06, |
|
"loss": 1.5221, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 6.1016094420600854e-06, |
|
"loss": 1.518, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 6.09098712446352e-06, |
|
"loss": 1.5182, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"eval_loss": 1.4096906185150146, |
|
"eval_runtime": 75.6834, |
|
"eval_samples_per_second": 528.517, |
|
"eval_steps_per_second": 8.258, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 6.080364806866953e-06, |
|
"loss": 1.5166, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 6.069742489270387e-06, |
|
"loss": 1.5203, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 6.059120171673819e-06, |
|
"loss": 1.5183, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 6.048497854077252e-06, |
|
"loss": 1.5186, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 6.037875536480687e-06, |
|
"loss": 1.6603, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 6.02725321888412e-06, |
|
"loss": 1.5197, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 6.016630901287553e-06, |
|
"loss": 1.521, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 6.0060085836909875e-06, |
|
"loss": 1.5189, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 5.995386266094421e-06, |
|
"loss": 1.5204, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 5.9847639484978546e-06, |
|
"loss": 1.5182, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 5.974141630901288e-06, |
|
"loss": 1.5174, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 5.963519313304721e-06, |
|
"loss": 1.5173, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 5.952896995708155e-06, |
|
"loss": 1.5165, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 5.942274678111588e-06, |
|
"loss": 1.5148, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 5.931652360515021e-06, |
|
"loss": 1.5177, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 5.921030042918456e-06, |
|
"loss": 1.5195, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"eval_loss": 1.4117556810379028, |
|
"eval_runtime": 75.1101, |
|
"eval_samples_per_second": 532.551, |
|
"eval_steps_per_second": 8.321, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 5.910407725321888e-06, |
|
"loss": 1.5196, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 5.899785407725321e-06, |
|
"loss": 1.5199, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 5.889163090128754e-06, |
|
"loss": 1.5181, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 5.878540772532189e-06, |
|
"loss": 1.5141, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 5.867918454935622e-06, |
|
"loss": 1.5185, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 5.857296137339056e-06, |
|
"loss": 1.5208, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 5.846673819742489e-06, |
|
"loss": 1.5179, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 5.836051502145922e-06, |
|
"loss": 1.5178, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 5.825429184549356e-06, |
|
"loss": 1.5186, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 5.81480686695279e-06, |
|
"loss": 1.5197, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 5.804184549356222e-06, |
|
"loss": 1.5179, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 5.793562231759657e-06, |
|
"loss": 1.5192, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 5.78293991416309e-06, |
|
"loss": 1.5174, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 5.772317596566522e-06, |
|
"loss": 1.5159, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 5.761695278969958e-06, |
|
"loss": 1.5194, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 5.75107296137339e-06, |
|
"loss": 1.5187, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"eval_loss": 1.4119081497192383, |
|
"eval_runtime": 75.3322, |
|
"eval_samples_per_second": 530.981, |
|
"eval_steps_per_second": 8.297, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 5.740450643776823e-06, |
|
"loss": 1.5213, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 5.729828326180258e-06, |
|
"loss": 1.5207, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 5.719206008583691e-06, |
|
"loss": 1.5175, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 5.708583690987124e-06, |
|
"loss": 1.5199, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 5.697961373390558e-06, |
|
"loss": 1.5199, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 5.687339055793991e-06, |
|
"loss": 1.5171, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 5.6767167381974245e-06, |
|
"loss": 1.5184, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 5.666094420600858e-06, |
|
"loss": 1.5198, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 5.6554721030042915e-06, |
|
"loss": 1.5176, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 5.6448497854077255e-06, |
|
"loss": 1.5203, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 5.634227467811159e-06, |
|
"loss": 1.5168, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 5.623605150214592e-06, |
|
"loss": 1.5165, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 5.612982832618026e-06, |
|
"loss": 1.5179, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 5.6023605150214595e-06, |
|
"loss": 1.5199, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 5.591738197424892e-06, |
|
"loss": 1.5183, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 5.581115879828326e-06, |
|
"loss": 1.5149, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"eval_loss": 1.4132717847824097, |
|
"eval_runtime": 75.3167, |
|
"eval_samples_per_second": 531.09, |
|
"eval_steps_per_second": 8.298, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 5.5704935622317605e-06, |
|
"loss": 1.5219, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 5.559871244635194e-06, |
|
"loss": 1.5165, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 5.5492489270386275e-06, |
|
"loss": 1.5201, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 5.538626609442059e-06, |
|
"loss": 1.5194, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 5.528004291845492e-06, |
|
"loss": 1.5175, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 5.517381974248928e-06, |
|
"loss": 1.5168, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 5.50675965665236e-06, |
|
"loss": 1.5181, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 5.496137339055794e-06, |
|
"loss": 1.5192, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 5.485515021459228e-06, |
|
"loss": 1.5203, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 5.474892703862661e-06, |
|
"loss": 1.5199, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 5.464270386266095e-06, |
|
"loss": 1.5191, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 5.453648068669528e-06, |
|
"loss": 1.5191, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 5.443025751072961e-06, |
|
"loss": 1.519, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 5.432403433476395e-06, |
|
"loss": 1.5198, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 5.421781115879828e-06, |
|
"loss": 1.5212, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 5.411158798283261e-06, |
|
"loss": 1.5183, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"eval_loss": 1.4096869230270386, |
|
"eval_runtime": 75.5469, |
|
"eval_samples_per_second": 529.472, |
|
"eval_steps_per_second": 8.273, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 5.400536480686696e-06, |
|
"loss": 1.5217, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 5.389914163090128e-06, |
|
"loss": 1.5194, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 5.379291845493561e-06, |
|
"loss": 1.6613, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 5.368669527896994e-06, |
|
"loss": 1.5163, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 5.358047210300429e-06, |
|
"loss": 1.5187, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 5.347424892703862e-06, |
|
"loss": 1.5219, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 5.336802575107296e-06, |
|
"loss": 1.5155, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 5.326180257510729e-06, |
|
"loss": 1.5201, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 5.315557939914162e-06, |
|
"loss": 1.5186, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 5.304935622317597e-06, |
|
"loss": 1.519, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 5.29431330472103e-06, |
|
"loss": 1.5198, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 5.2836909871244625e-06, |
|
"loss": 1.5179, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 5.273068669527897e-06, |
|
"loss": 1.518, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 5.26244635193133e-06, |
|
"loss": 1.5194, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 5.251824034334765e-06, |
|
"loss": 1.5188, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 5.241201716738198e-06, |
|
"loss": 1.5192, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"eval_loss": 1.4101494550704956, |
|
"eval_runtime": 75.3568, |
|
"eval_samples_per_second": 530.808, |
|
"eval_steps_per_second": 8.294, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 5.2305793991416305e-06, |
|
"loss": 1.515, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 5.219957081545064e-06, |
|
"loss": 1.5177, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 5.2093347639484984e-06, |
|
"loss": 1.5199, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 5.1987124463519315e-06, |
|
"loss": 1.5191, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 5.188090128755365e-06, |
|
"loss": 1.518, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 5.1774678111587986e-06, |
|
"loss": 1.5198, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 5.166845493562232e-06, |
|
"loss": 1.519, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 5.156223175965665e-06, |
|
"loss": 1.519, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 5.145600858369099e-06, |
|
"loss": 1.5173, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 5.134978540772532e-06, |
|
"loss": 1.5176, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 5.124356223175966e-06, |
|
"loss": 1.5165, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 5.1137339055794e-06, |
|
"loss": 1.5119, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 5.103111587982832e-06, |
|
"loss": 1.518, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 5.092489270386266e-06, |
|
"loss": 1.5174, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 5.0818669527897e-06, |
|
"loss": 1.5171, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 5.071244635193132e-06, |
|
"loss": 1.5191, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"eval_loss": 1.414576768875122, |
|
"eval_runtime": 75.373, |
|
"eval_samples_per_second": 530.694, |
|
"eval_steps_per_second": 8.292, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 5.060622317596566e-06, |
|
"loss": 1.5188, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 5.050000000000001e-06, |
|
"loss": 1.5213, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 5.039377682403434e-06, |
|
"loss": 1.5196, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 5.028755364806867e-06, |
|
"loss": 1.5159, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 5.018133047210299e-06, |
|
"loss": 1.5182, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 5.007510729613734e-06, |
|
"loss": 1.5166, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 4.996888412017167e-06, |
|
"loss": 1.5171, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 4.9862660944206e-06, |
|
"loss": 1.5191, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 4.975643776824034e-06, |
|
"loss": 1.5206, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 4.965021459227468e-06, |
|
"loss": 1.5189, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 4.954399141630901e-06, |
|
"loss": 1.5213, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 4.943776824034335e-06, |
|
"loss": 1.5168, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 4.933154506437768e-06, |
|
"loss": 1.5192, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 4.922532188841201e-06, |
|
"loss": 1.5167, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 4.911909871244635e-06, |
|
"loss": 1.5174, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 4.901287553648069e-06, |
|
"loss": 1.5192, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_loss": 1.4164894819259644, |
|
"eval_runtime": 75.1669, |
|
"eval_samples_per_second": 532.149, |
|
"eval_steps_per_second": 8.315, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 4.8906652360515014e-06, |
|
"loss": 1.5199, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 4.880042918454935e-06, |
|
"loss": 1.6615, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 4.869420600858369e-06, |
|
"loss": 1.5158, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 4.858798283261803e-06, |
|
"loss": 1.5177, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 4.8481759656652355e-06, |
|
"loss": 1.5187, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 4.8375536480686694e-06, |
|
"loss": 1.5207, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 4.826931330472103e-06, |
|
"loss": 1.5193, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 4.8163090128755365e-06, |
|
"loss": 1.5177, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 4.8056866952789696e-06, |
|
"loss": 1.5195, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 4.7950643776824035e-06, |
|
"loss": 1.5171, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 4.784442060085837e-06, |
|
"loss": 1.5149, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 4.7738197424892706e-06, |
|
"loss": 1.5177, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 4.763197424892704e-06, |
|
"loss": 1.5178, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 4.752575107296137e-06, |
|
"loss": 1.5182, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 4.741952789699571e-06, |
|
"loss": 1.5177, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 4.731330472103005e-06, |
|
"loss": 1.5164, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"eval_loss": 1.411863923072815, |
|
"eval_runtime": 75.1375, |
|
"eval_samples_per_second": 532.358, |
|
"eval_steps_per_second": 8.318, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 4.7207081545064386e-06, |
|
"loss": 1.5154, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 4.710085836909871e-06, |
|
"loss": 1.5185, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 4.699463519313305e-06, |
|
"loss": 1.5185, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 4.688841201716739e-06, |
|
"loss": 1.5181, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 4.678218884120171e-06, |
|
"loss": 1.5178, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 4.667596566523605e-06, |
|
"loss": 1.5163, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 10.47, |
|
"learning_rate": 4.656974248927039e-06, |
|
"loss": 1.5222, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 4.646351931330472e-06, |
|
"loss": 1.5118, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 4.635729613733905e-06, |
|
"loss": 1.5157, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 4.625107296137339e-06, |
|
"loss": 1.5164, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 4.614484978540773e-06, |
|
"loss": 1.5154, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 4.603862660944206e-06, |
|
"loss": 1.519, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 4.59324034334764e-06, |
|
"loss": 1.5173, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 4.582618025751073e-06, |
|
"loss": 1.5173, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 4.571995708154506e-06, |
|
"loss": 1.5178, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 4.56137339055794e-06, |
|
"loss": 1.5235, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"eval_loss": 1.4089233875274658, |
|
"eval_runtime": 75.36, |
|
"eval_samples_per_second": 530.785, |
|
"eval_steps_per_second": 8.294, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 4.550751072961374e-06, |
|
"loss": 1.5191, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 4.540128755364806e-06, |
|
"loss": 1.5176, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 4.52950643776824e-06, |
|
"loss": 1.5161, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 4.518884120171674e-06, |
|
"loss": 1.521, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 4.508261802575106e-06, |
|
"loss": 1.5174, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 4.49763948497854e-06, |
|
"loss": 1.5192, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 4.487017167381974e-06, |
|
"loss": 1.5172, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 4.476394849785408e-06, |
|
"loss": 1.5201, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 4.4657725321888405e-06, |
|
"loss": 1.5213, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 4.455150214592274e-06, |
|
"loss": 1.5197, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 4.444527896995708e-06, |
|
"loss": 1.517, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"learning_rate": 4.4339055793991414e-06, |
|
"loss": 1.5144, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 4.423283261802575e-06, |
|
"loss": 1.5191, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 4.4126609442060085e-06, |
|
"loss": 1.5206, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 4.4020386266094416e-06, |
|
"loss": 1.5202, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.3914163090128755e-06, |
|
"loss": 1.6571, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.4120959043502808, |
|
"eval_runtime": 75.422, |
|
"eval_samples_per_second": 530.349, |
|
"eval_steps_per_second": 8.287, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 4.3807939914163095e-06, |
|
"loss": 1.5169, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 4.370171673819742e-06, |
|
"loss": 1.5192, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 4.359549356223176e-06, |
|
"loss": 1.5176, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 4.34892703862661e-06, |
|
"loss": 1.5213, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 4.3383047210300435e-06, |
|
"loss": 1.5185, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 4.327682403433476e-06, |
|
"loss": 1.5159, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 4.31706008583691e-06, |
|
"loss": 1.5148, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"learning_rate": 4.306437768240344e-06, |
|
"loss": 1.5167, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.295815450643777e-06, |
|
"loss": 1.5164, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 4.28519313304721e-06, |
|
"loss": 1.5171, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 4.274570815450644e-06, |
|
"loss": 1.5181, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 4.263948497854077e-06, |
|
"loss": 1.5188, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 4.253326180257511e-06, |
|
"loss": 1.5194, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 4.242703862660944e-06, |
|
"loss": 1.5175, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 4.232081545064378e-06, |
|
"loss": 1.5138, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 4.221459227467811e-06, |
|
"loss": 1.5184, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"eval_loss": 1.4102325439453125, |
|
"eval_runtime": 75.0945, |
|
"eval_samples_per_second": 532.662, |
|
"eval_steps_per_second": 8.323, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 4.210836909871245e-06, |
|
"loss": 1.5191, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"learning_rate": 4.200214592274679e-06, |
|
"loss": 1.5189, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 4.189592274678111e-06, |
|
"loss": 1.517, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 4.178969957081545e-06, |
|
"loss": 1.5159, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 4.168347639484979e-06, |
|
"loss": 1.5183, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 4.157725321888411e-06, |
|
"loss": 1.5185, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 4.147103004291845e-06, |
|
"loss": 1.5148, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 4.136480686695279e-06, |
|
"loss": 1.5146, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 4.125858369098712e-06, |
|
"loss": 1.5176, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 4.115236051502145e-06, |
|
"loss": 1.5153, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 4.104613733905579e-06, |
|
"loss": 1.5208, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 4.093991416309013e-06, |
|
"loss": 1.5162, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 4.083369098712446e-06, |
|
"loss": 1.5196, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 4.07274678111588e-06, |
|
"loss": 1.517, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 4.062124463519313e-06, |
|
"loss": 1.5217, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 11.69, |
|
"learning_rate": 4.051502145922746e-06, |
|
"loss": 1.5185, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 11.69, |
|
"eval_loss": 1.4111074209213257, |
|
"eval_runtime": 74.9661, |
|
"eval_samples_per_second": 533.575, |
|
"eval_steps_per_second": 8.337, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 4.04087982832618e-06, |
|
"loss": 1.5178, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 4.030257510729614e-06, |
|
"loss": 1.5198, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"learning_rate": 4.0196351931330465e-06, |
|
"loss": 1.5173, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 4.0090128755364805e-06, |
|
"loss": 1.5193, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 3.998390557939914e-06, |
|
"loss": 1.5197, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 3.987768240343348e-06, |
|
"loss": 1.516, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 3.977145922746781e-06, |
|
"loss": 1.5179, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 3.9665236051502145e-06, |
|
"loss": 1.516, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 11.88, |
|
"learning_rate": 3.9559012875536485e-06, |
|
"loss": 1.5155, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 3.945278969957081e-06, |
|
"loss": 1.5188, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 3.934656652360515e-06, |
|
"loss": 1.5197, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 3.924034334763949e-06, |
|
"loss": 1.5186, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 3.913412017167382e-06, |
|
"loss": 1.5194, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 3.902789699570816e-06, |
|
"loss": 1.519, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 3.89216738197425e-06, |
|
"loss": 1.6582, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 3.881545064377682e-06, |
|
"loss": 1.5172, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"eval_loss": 1.414238452911377, |
|
"eval_runtime": 75.0838, |
|
"eval_samples_per_second": 532.738, |
|
"eval_steps_per_second": 8.324, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 3.870922746781116e-06, |
|
"loss": 1.5144, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"learning_rate": 3.86030042918455e-06, |
|
"loss": 1.5208, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 3.849678111587984e-06, |
|
"loss": 1.5146, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"learning_rate": 3.839055793991416e-06, |
|
"loss": 1.5187, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 3.82843347639485e-06, |
|
"loss": 1.5155, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 3.817811158798284e-06, |
|
"loss": 1.5178, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 3.8071888412017165e-06, |
|
"loss": 1.5184, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 3.7965665236051504e-06, |
|
"loss": 1.5136, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 3.785944206008584e-06, |
|
"loss": 1.5176, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 3.775321888412017e-06, |
|
"loss": 1.5168, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 3.7646995708154505e-06, |
|
"loss": 1.5187, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 3.754077253218884e-06, |
|
"loss": 1.5168, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 3.743454935622318e-06, |
|
"loss": 1.5164, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 3.7328326180257507e-06, |
|
"loss": 1.5146, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 3.722210300429185e-06, |
|
"loss": 1.5185, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 12.38, |
|
"learning_rate": 3.7115879828326186e-06, |
|
"loss": 1.5189, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 12.38, |
|
"eval_loss": 1.4129457473754883, |
|
"eval_runtime": 75.5813, |
|
"eval_samples_per_second": 529.231, |
|
"eval_steps_per_second": 8.269, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 3.7009656652360516e-06, |
|
"loss": 1.5164, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"learning_rate": 3.6903433476394847e-06, |
|
"loss": 1.5186, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 3.6797210300429187e-06, |
|
"loss": 1.5153, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 3.6690987124463514e-06, |
|
"loss": 1.5184, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 3.6584763948497853e-06, |
|
"loss": 1.5147, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"learning_rate": 3.6478540772532197e-06, |
|
"loss": 1.5169, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 3.637231759656652e-06, |
|
"loss": 1.516, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 3.6266094420600854e-06, |
|
"loss": 1.5143, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 3.6159871244635194e-06, |
|
"loss": 1.5171, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 3.6053648068669533e-06, |
|
"loss": 1.5169, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 3.594742489270386e-06, |
|
"loss": 1.5168, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 3.58412017167382e-06, |
|
"loss": 1.5174, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"learning_rate": 3.5734978540772534e-06, |
|
"loss": 1.5191, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 12.68, |
|
"learning_rate": 3.562875536480686e-06, |
|
"loss": 1.5196, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 3.55225321888412e-06, |
|
"loss": 1.5149, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 3.541630901287554e-06, |
|
"loss": 1.5147, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"eval_loss": 1.4089046716690063, |
|
"eval_runtime": 75.931, |
|
"eval_samples_per_second": 526.794, |
|
"eval_steps_per_second": 8.231, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 12.74, |
|
"learning_rate": 3.5310085836909867e-06, |
|
"loss": 1.5196, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 3.5203862660944198e-06, |
|
"loss": 1.5154, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 3.509763948497854e-06, |
|
"loss": 1.5179, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 3.499141630901288e-06, |
|
"loss": 1.5162, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"learning_rate": 3.4885193133047207e-06, |
|
"loss": 1.5202, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 3.4778969957081547e-06, |
|
"loss": 1.5191, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"learning_rate": 3.4672746781115886e-06, |
|
"loss": 1.5194, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"learning_rate": 3.456652360515021e-06, |
|
"loss": 1.5155, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 3.4460300429184544e-06, |
|
"loss": 1.5197, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 3.4354077253218888e-06, |
|
"loss": 1.5175, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 3.424785407725321e-06, |
|
"loss": 1.5163, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 3.4141630901287554e-06, |
|
"loss": 1.5197, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.403540772532189e-06, |
|
"loss": 1.5171, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 3.3929184549356216e-06, |
|
"loss": 1.6585, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 3.3822961373390555e-06, |
|
"loss": 1.5188, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 3.371673819742489e-06, |
|
"loss": 1.5177, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"eval_loss": 1.4098169803619385, |
|
"eval_runtime": 75.6507, |
|
"eval_samples_per_second": 528.746, |
|
"eval_steps_per_second": 8.262, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 3.361051502145923e-06, |
|
"loss": 1.5155, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 3.3504291845493556e-06, |
|
"loss": 1.5182, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 3.3398068669527896e-06, |
|
"loss": 1.5186, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"learning_rate": 3.3291845493562235e-06, |
|
"loss": 1.5174, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 13.17, |
|
"learning_rate": 3.318562231759656e-06, |
|
"loss": 1.5167, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 3.30793991416309e-06, |
|
"loss": 1.516, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 3.2973175965665236e-06, |
|
"loss": 1.517, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 3.2866952789699567e-06, |
|
"loss": 1.5188, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 3.2760729613733907e-06, |
|
"loss": 1.5159, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 3.265450643776824e-06, |
|
"loss": 1.5168, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 3.254828326180258e-06, |
|
"loss": 1.516, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 3.244206008583691e-06, |
|
"loss": 1.5141, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"learning_rate": 3.2335836909871243e-06, |
|
"loss": 1.5172, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 13.36, |
|
"learning_rate": 3.2229613733905583e-06, |
|
"loss": 1.5169, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 13.39, |
|
"learning_rate": 3.212339055793991e-06, |
|
"loss": 1.5169, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 3.2017167381974253e-06, |
|
"loss": 1.5164, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"eval_loss": 1.40966796875, |
|
"eval_runtime": 75.1896, |
|
"eval_samples_per_second": 531.988, |
|
"eval_steps_per_second": 8.312, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 3.191094420600859e-06, |
|
"loss": 1.5162, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 3.180472103004292e-06, |
|
"loss": 1.5146, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 3.169849785407725e-06, |
|
"loss": 1.5186, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 3.159227467811159e-06, |
|
"loss": 1.5142, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 3.1486051502145916e-06, |
|
"loss": 1.5143, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"learning_rate": 3.1379828326180256e-06, |
|
"loss": 1.5159, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 3.12736051502146e-06, |
|
"loss": 1.5143, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 13.58, |
|
"learning_rate": 3.1167381974248926e-06, |
|
"loss": 1.5186, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 3.1061158798283257e-06, |
|
"loss": 1.5153, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 3.0954935622317596e-06, |
|
"loss": 1.5184, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 3.0848712446351936e-06, |
|
"loss": 1.5186, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 13.66, |
|
"learning_rate": 3.0742489270386263e-06, |
|
"loss": 1.5228, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"learning_rate": 3.0636266094420598e-06, |
|
"loss": 1.5177, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 3.0530042918454937e-06, |
|
"loss": 1.5168, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 3.0423819742489264e-06, |
|
"loss": 1.5179, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 3.0317596566523603e-06, |
|
"loss": 1.5188, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"eval_loss": 1.4108576774597168, |
|
"eval_runtime": 74.9353, |
|
"eval_samples_per_second": 533.794, |
|
"eval_steps_per_second": 8.341, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 3.0211373390557943e-06, |
|
"loss": 1.5163, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"learning_rate": 3.010515021459227e-06, |
|
"loss": 1.5186, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 2.99989270386266e-06, |
|
"loss": 1.5162, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 13.84, |
|
"learning_rate": 2.9892703862660944e-06, |
|
"loss": 1.5188, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 2.9786480686695283e-06, |
|
"loss": 1.5164, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 2.968025751072961e-06, |
|
"loss": 1.5167, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 2.957403433476395e-06, |
|
"loss": 1.5175, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"learning_rate": 2.946781115879829e-06, |
|
"loss": 1.5165, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 2.936158798283261e-06, |
|
"loss": 1.5203, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"learning_rate": 2.9255364806866947e-06, |
|
"loss": 1.5196, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 2.914914163090129e-06, |
|
"loss": 1.5157, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 2.9042918454935613e-06, |
|
"loss": 1.6583, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 2.8936695278969956e-06, |
|
"loss": 1.518, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 2.883047210300429e-06, |
|
"loss": 1.5165, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 2.8724248927038627e-06, |
|
"loss": 1.5166, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 2.8618025751072958e-06, |
|
"loss": 1.5158, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"eval_loss": 1.4134007692337036, |
|
"eval_runtime": 74.8707, |
|
"eval_samples_per_second": 534.254, |
|
"eval_steps_per_second": 8.348, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"learning_rate": 2.8511802575107293e-06, |
|
"loss": 1.5155, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"learning_rate": 2.8405579399141632e-06, |
|
"loss": 1.5165, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 2.829935622317596e-06, |
|
"loss": 1.5155, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 2.81931330472103e-06, |
|
"loss": 1.5167, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 2.8086909871244638e-06, |
|
"loss": 1.5166, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 2.7980686695278965e-06, |
|
"loss": 1.5153, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 2.7874463519313304e-06, |
|
"loss": 1.5183, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 14.27, |
|
"learning_rate": 2.776824034334764e-06, |
|
"loss": 1.5178, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 2.766201716738197e-06, |
|
"loss": 1.5166, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 2.755579399141631e-06, |
|
"loss": 1.5171, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 2.7449570815450645e-06, |
|
"loss": 1.5144, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 2.7343347639484984e-06, |
|
"loss": 1.5171, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 2.723712446351931e-06, |
|
"loss": 1.5167, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 14.39, |
|
"learning_rate": 2.7130901287553646e-06, |
|
"loss": 1.5165, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 2.7024678111587985e-06, |
|
"loss": 1.5195, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 2.691845493562231e-06, |
|
"loss": 1.5134, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"eval_loss": 1.4091237783432007, |
|
"eval_runtime": 74.8243, |
|
"eval_samples_per_second": 534.586, |
|
"eval_steps_per_second": 8.353, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 14.46, |
|
"learning_rate": 2.6812231759656656e-06, |
|
"loss": 1.519, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 2.670600858369099e-06, |
|
"loss": 1.5172, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 2.659978540772532e-06, |
|
"loss": 1.5146, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 2.6493562231759653e-06, |
|
"loss": 1.5169, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 14.54, |
|
"learning_rate": 2.6387339055793992e-06, |
|
"loss": 1.5138, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 2.6281115879828336e-06, |
|
"loss": 1.5179, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 2.617489270386266e-06, |
|
"loss": 1.5163, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 2.6068669527897e-06, |
|
"loss": 1.5131, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 2.596244635193133e-06, |
|
"loss": 1.5183, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"learning_rate": 2.585622317596566e-06, |
|
"loss": 1.5165, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 2.575e-06, |
|
"loss": 1.5195, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"learning_rate": 2.564377682403434e-06, |
|
"loss": 1.517, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 2.5537553648068665e-06, |
|
"loss": 1.5183, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 2.5431330472103e-06, |
|
"loss": 1.5186, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 14.76, |
|
"learning_rate": 2.532510729613734e-06, |
|
"loss": 1.5175, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 2.5218884120171667e-06, |
|
"loss": 1.5167, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"eval_loss": 1.4089406728744507, |
|
"eval_runtime": 74.9005, |
|
"eval_samples_per_second": 534.042, |
|
"eval_steps_per_second": 8.344, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 2.5112660944206006e-06, |
|
"loss": 1.5191, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"learning_rate": 2.5006437768240345e-06, |
|
"loss": 1.5178, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 14.84, |
|
"learning_rate": 2.490021459227468e-06, |
|
"loss": 1.517, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"learning_rate": 2.4793991416309007e-06, |
|
"loss": 1.5222, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 2.4687768240343347e-06, |
|
"loss": 1.5156, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 2.4581545064377686e-06, |
|
"loss": 1.5181, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 2.4475321888412013e-06, |
|
"loss": 1.5154, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"learning_rate": 2.4369098712446352e-06, |
|
"loss": 1.5175, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 2.4262875536480687e-06, |
|
"loss": 1.5198, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 2.4156652360515014e-06, |
|
"loss": 1.5161, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 2.4050429184549354e-06, |
|
"loss": 1.6582, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 2.3944206008583693e-06, |
|
"loss": 1.516, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 15.06, |
|
"learning_rate": 2.383798283261803e-06, |
|
"loss": 1.5172, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"learning_rate": 2.373175965665236e-06, |
|
"loss": 1.5129, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 2.3625536480686694e-06, |
|
"loss": 1.5181, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 2.3519313304721034e-06, |
|
"loss": 1.5163, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"eval_loss": 1.4139931201934814, |
|
"eval_runtime": 74.8677, |
|
"eval_samples_per_second": 534.276, |
|
"eval_steps_per_second": 8.348, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 2.341309012875536e-06, |
|
"loss": 1.5156, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"learning_rate": 2.33068669527897e-06, |
|
"loss": 1.5178, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 2.3200643776824035e-06, |
|
"loss": 1.5163, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 15.21, |
|
"learning_rate": 2.3094420600858366e-06, |
|
"loss": 1.5156, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 2.29881974248927e-06, |
|
"loss": 1.5184, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 2.288197424892704e-06, |
|
"loss": 1.5173, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"learning_rate": 2.2775751072961367e-06, |
|
"loss": 1.5169, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"learning_rate": 2.2669527896995707e-06, |
|
"loss": 1.5184, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 2.256330472103004e-06, |
|
"loss": 1.5125, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 15.34, |
|
"learning_rate": 2.245708154506438e-06, |
|
"loss": 1.5161, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 2.235085836909871e-06, |
|
"loss": 1.5154, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 2.2244635193133047e-06, |
|
"loss": 1.518, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 2.2138412017167387e-06, |
|
"loss": 1.5171, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 15.42, |
|
"learning_rate": 2.2032188841201713e-06, |
|
"loss": 1.5158, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 2.192596566523605e-06, |
|
"loss": 1.5165, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 2.181974248927039e-06, |
|
"loss": 1.5172, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"eval_loss": 1.4083021879196167, |
|
"eval_runtime": 74.9039, |
|
"eval_samples_per_second": 534.018, |
|
"eval_steps_per_second": 8.344, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 2.1713519313304715e-06, |
|
"loss": 1.5105, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 15.51, |
|
"learning_rate": 2.1607296137339054e-06, |
|
"loss": 1.5149, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 2.1501072961373394e-06, |
|
"loss": 1.5168, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 15.55, |
|
"learning_rate": 2.139484978540773e-06, |
|
"loss": 1.5151, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"learning_rate": 2.1288626609442056e-06, |
|
"loss": 1.5163, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 2.1182403433476395e-06, |
|
"loss": 1.5149, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 2.1076180257510734e-06, |
|
"loss": 1.516, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"learning_rate": 2.096995708154506e-06, |
|
"loss": 1.5134, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"learning_rate": 2.08637339055794e-06, |
|
"loss": 1.5204, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 15.68, |
|
"learning_rate": 2.0757510729613736e-06, |
|
"loss": 1.5173, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"learning_rate": 2.0651287553648062e-06, |
|
"loss": 1.5172, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"learning_rate": 2.05450643776824e-06, |
|
"loss": 1.5164, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"learning_rate": 2.043884120171674e-06, |
|
"loss": 1.5144, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 15.77, |
|
"learning_rate": 2.033261802575107e-06, |
|
"loss": 1.515, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 2.0226394849785403e-06, |
|
"loss": 1.5178, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 2.0120171673819743e-06, |
|
"loss": 1.5153, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"eval_loss": 1.4108531475067139, |
|
"eval_runtime": 74.8832, |
|
"eval_samples_per_second": 534.165, |
|
"eval_steps_per_second": 8.346, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 2.001394849785408e-06, |
|
"loss": 1.5179, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"learning_rate": 1.990772532188841e-06, |
|
"loss": 1.5193, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 1.980150214592275e-06, |
|
"loss": 1.516, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 1.9695278969957087e-06, |
|
"loss": 1.5181, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"learning_rate": 1.9589055793991414e-06, |
|
"loss": 1.5148, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 1.948283261802575e-06, |
|
"loss": 1.516, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"learning_rate": 1.937660944206009e-06, |
|
"loss": 1.5202, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"learning_rate": 1.9270386266094416e-06, |
|
"loss": 1.5186, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 1.9164163090128755e-06, |
|
"loss": 1.6598, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 1.9057939914163096e-06, |
|
"loss": 1.5168, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"learning_rate": 1.895171673819742e-06, |
|
"loss": 1.5168, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 16.07, |
|
"learning_rate": 1.8845493562231756e-06, |
|
"loss": 1.5153, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 1.8739270386266096e-06, |
|
"loss": 1.5147, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 1.8633047210300433e-06, |
|
"loss": 1.5162, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 1.8526824034334762e-06, |
|
"loss": 1.5151, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 16.15, |
|
"learning_rate": 1.8420600858369101e-06, |
|
"loss": 1.5164, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 16.15, |
|
"eval_loss": 1.4092837572097778, |
|
"eval_runtime": 74.9552, |
|
"eval_samples_per_second": 533.652, |
|
"eval_steps_per_second": 8.338, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 16.18, |
|
"learning_rate": 1.8314377682403436e-06, |
|
"loss": 1.5168, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 1.8208154506437763e-06, |
|
"loss": 1.5179, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 16.22, |
|
"learning_rate": 1.8101931330472102e-06, |
|
"loss": 1.515, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"learning_rate": 1.7995708154506442e-06, |
|
"loss": 1.5175, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 16.26, |
|
"learning_rate": 1.7889484978540769e-06, |
|
"loss": 1.5177, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"learning_rate": 1.7783261802575102e-06, |
|
"loss": 1.5157, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 1.7677038626609443e-06, |
|
"loss": 1.5161, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"learning_rate": 1.7570815450643783e-06, |
|
"loss": 1.5166, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 1.7464592274678111e-06, |
|
"loss": 1.5157, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 1.7358369098712449e-06, |
|
"loss": 1.5163, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 1.7252145922746786e-06, |
|
"loss": 1.5171, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 1.714592274678111e-06, |
|
"loss": 1.516, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 1.7039699570815448e-06, |
|
"loss": 1.5139, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 1.693347639484979e-06, |
|
"loss": 1.5152, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 16.15, |
|
"learning_rate": 1.6827253218884116e-06, |
|
"loss": 1.5139, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 1.6721030042918458e-06, |
|
"loss": 1.5164, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"eval_loss": 1.4107787609100342, |
|
"eval_runtime": 90.0318, |
|
"eval_samples_per_second": 444.288, |
|
"eval_steps_per_second": 6.942, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 16.19, |
|
"learning_rate": 1.661480686695279e-06, |
|
"loss": 1.5149, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 1.6508583690987118e-06, |
|
"loss": 1.5136, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"learning_rate": 1.6402360515021457e-06, |
|
"loss": 1.5168, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 16.26, |
|
"learning_rate": 1.6296137339055796e-06, |
|
"loss": 1.5128, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"learning_rate": 1.6189914163090132e-06, |
|
"loss": 1.5193, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 1.6083690987124462e-06, |
|
"loss": 1.5132, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 1.5977467811158798e-06, |
|
"loss": 1.5139, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 1.5871244635193137e-06, |
|
"loss": 1.5131, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 1.5765021459227464e-06, |
|
"loss": 1.5167, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"learning_rate": 1.5658798283261803e-06, |
|
"loss": 1.5147, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"learning_rate": 1.5552575107296138e-06, |
|
"loss": 1.5131, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 1.544635193133047e-06, |
|
"loss": 1.5183, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"learning_rate": 1.5340128755364804e-06, |
|
"loss": 1.5162, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 16.47, |
|
"learning_rate": 1.5233905579399144e-06, |
|
"loss": 1.5194, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"learning_rate": 1.5127682403433483e-06, |
|
"loss": 1.5126, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 1.502145922746781e-06, |
|
"loss": 1.515, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"eval_loss": 1.410232424736023, |
|
"eval_runtime": 88.626, |
|
"eval_samples_per_second": 451.335, |
|
"eval_steps_per_second": 7.052, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 16.54, |
|
"learning_rate": 1.4915236051502147e-06, |
|
"loss": 1.5139, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 16.56, |
|
"learning_rate": 1.4809012875536485e-06, |
|
"loss": 1.5157, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"learning_rate": 1.470278969957081e-06, |
|
"loss": 1.5185, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 16.6, |
|
"learning_rate": 1.459656652360515e-06, |
|
"loss": 1.5148, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 1.449034334763949e-06, |
|
"loss": 1.5171, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 1.438412017167382e-06, |
|
"loss": 1.5161, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"learning_rate": 1.4277896995708152e-06, |
|
"loss": 1.5214, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 16.69, |
|
"learning_rate": 1.417167381974249e-06, |
|
"loss": 1.5153, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 1.4065450643776818e-06, |
|
"loss": 1.5146, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 16.73, |
|
"learning_rate": 1.3959227467811158e-06, |
|
"loss": 1.5181, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"learning_rate": 1.38530042918455e-06, |
|
"loss": 1.519, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 1.3746781115879832e-06, |
|
"loss": 1.515, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 1.3640557939914159e-06, |
|
"loss": 1.5184, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"learning_rate": 1.3534334763948498e-06, |
|
"loss": 1.5149, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 1.3428111587982836e-06, |
|
"loss": 1.5181, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 1.3321888412017164e-06, |
|
"loss": 1.5164, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"eval_loss": 1.4089733362197876, |
|
"eval_runtime": 75.6986, |
|
"eval_samples_per_second": 528.411, |
|
"eval_steps_per_second": 8.256, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"learning_rate": 1.32156652360515e-06, |
|
"loss": 1.5162, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 1.310944206008584e-06, |
|
"loss": 1.5192, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 16.92, |
|
"learning_rate": 1.3003218884120166e-06, |
|
"loss": 1.5175, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"learning_rate": 1.2896995708154505e-06, |
|
"loss": 1.5167, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 1.2790772532188845e-06, |
|
"loss": 1.5187, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 1.268454935622318e-06, |
|
"loss": 1.5179, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 1.2578326180257504e-06, |
|
"loss": 1.6602, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"learning_rate": 1.2472103004291846e-06, |
|
"loss": 1.5216, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"learning_rate": 1.2365879828326185e-06, |
|
"loss": 1.5173, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 1.2259656652360512e-06, |
|
"loss": 1.5189, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 1.2153433476394851e-06, |
|
"loss": 1.5138, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"learning_rate": 1.2047210300429187e-06, |
|
"loss": 1.5169, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 1.1940987124463513e-06, |
|
"loss": 1.5151, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 1.1834763948497853e-06, |
|
"loss": 1.5167, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"learning_rate": 1.1728540772532192e-06, |
|
"loss": 1.519, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 1.1622317596566519e-06, |
|
"loss": 1.5163, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"eval_loss": 1.4109740257263184, |
|
"eval_runtime": 75.6126, |
|
"eval_samples_per_second": 529.012, |
|
"eval_steps_per_second": 8.266, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 1.1516094420600858e-06, |
|
"loss": 1.5149, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 1.1409871244635193e-06, |
|
"loss": 1.5184, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 17.27, |
|
"learning_rate": 1.1303648068669533e-06, |
|
"loss": 1.5157, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"learning_rate": 1.119742489270386e-06, |
|
"loss": 1.5178, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 1.10912017167382e-06, |
|
"loss": 1.5144, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 17.33, |
|
"learning_rate": 1.0984978540772534e-06, |
|
"loss": 1.5142, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 1.0878755364806865e-06, |
|
"loss": 1.5178, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"learning_rate": 1.07725321888412e-06, |
|
"loss": 1.5145, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 1.066630901287554e-06, |
|
"loss": 1.5142, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 17.42, |
|
"learning_rate": 1.0560085836909866e-06, |
|
"loss": 1.5178, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 17.44, |
|
"learning_rate": 1.0453862660944206e-06, |
|
"loss": 1.5136, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 17.46, |
|
"learning_rate": 1.0347639484978543e-06, |
|
"loss": 1.5158, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 1.024141630901288e-06, |
|
"loss": 1.5147, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 1.0135193133047207e-06, |
|
"loss": 1.5132, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 1.0028969957081547e-06, |
|
"loss": 1.5158, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"learning_rate": 9.922746781115884e-07, |
|
"loss": 1.5142, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"eval_loss": 1.4122473001480103, |
|
"eval_runtime": 75.6407, |
|
"eval_samples_per_second": 528.816, |
|
"eval_steps_per_second": 8.263, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"learning_rate": 9.816523605150213e-07, |
|
"loss": 1.5191, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"learning_rate": 9.71030042918455e-07, |
|
"loss": 1.5172, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 9.604077253218887e-07, |
|
"loss": 1.5157, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 9.497854077253216e-07, |
|
"loss": 1.5165, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"learning_rate": 9.391630901287551e-07, |
|
"loss": 1.5156, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 17.67, |
|
"learning_rate": 9.285407725321891e-07, |
|
"loss": 1.5191, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 17.69, |
|
"learning_rate": 9.179184549356219e-07, |
|
"loss": 1.5147, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"learning_rate": 9.072961373390556e-07, |
|
"loss": 1.5154, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"learning_rate": 8.966738197424894e-07, |
|
"loss": 1.5166, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"learning_rate": 8.860515021459234e-07, |
|
"loss": 1.5149, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"learning_rate": 8.754291845493559e-07, |
|
"loss": 1.5154, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 8.648068669527898e-07, |
|
"loss": 1.5185, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 17.82, |
|
"learning_rate": 8.541845493562236e-07, |
|
"loss": 1.517, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 17.84, |
|
"learning_rate": 8.435622317596563e-07, |
|
"loss": 1.5174, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"learning_rate": 8.329399141630901e-07, |
|
"loss": 1.5211, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"learning_rate": 8.223175965665239e-07, |
|
"loss": 1.5166, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"eval_loss": 1.409213662147522, |
|
"eval_runtime": 75.6047, |
|
"eval_samples_per_second": 529.068, |
|
"eval_steps_per_second": 8.267, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"learning_rate": 8.116952789699566e-07, |
|
"loss": 1.5189, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 8.010729613733904e-07, |
|
"loss": 1.5177, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"learning_rate": 7.904506437768243e-07, |
|
"loss": 1.5154, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"learning_rate": 7.79828326180258e-07, |
|
"loss": 1.5186, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 7.692060085836908e-07, |
|
"loss": 1.5185, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 7.585836909871245e-07, |
|
"loss": 1.6583, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"learning_rate": 7.479613733905582e-07, |
|
"loss": 1.517, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"learning_rate": 7.37339055793991e-07, |
|
"loss": 1.5154, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"learning_rate": 7.26716738197425e-07, |
|
"loss": 1.5173, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"learning_rate": 7.160944206008587e-07, |
|
"loss": 1.5173, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"learning_rate": 7.054721030042915e-07, |
|
"loss": 1.5159, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 18.15, |
|
"learning_rate": 6.948497854077252e-07, |
|
"loss": 1.5156, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 18.17, |
|
"learning_rate": 6.84227467811159e-07, |
|
"loss": 1.5149, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"learning_rate": 6.736051502145918e-07, |
|
"loss": 1.5168, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"learning_rate": 6.629828326180255e-07, |
|
"loss": 1.5156, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 18.23, |
|
"learning_rate": 6.523605150214595e-07, |
|
"loss": 1.5172, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 18.23, |
|
"eval_loss": 1.405761480331421, |
|
"eval_runtime": 75.6165, |
|
"eval_samples_per_second": 528.985, |
|
"eval_steps_per_second": 8.265, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"learning_rate": 6.417381974248932e-07, |
|
"loss": 1.5152, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 18.27, |
|
"learning_rate": 6.31115879828326e-07, |
|
"loss": 1.5184, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 18.3, |
|
"learning_rate": 6.204935622317597e-07, |
|
"loss": 1.518, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"learning_rate": 6.098712446351936e-07, |
|
"loss": 1.5147, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 18.34, |
|
"learning_rate": 5.992489270386262e-07, |
|
"loss": 1.5164, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"learning_rate": 5.886266094420601e-07, |
|
"loss": 1.5162, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 18.38, |
|
"learning_rate": 5.780042918454939e-07, |
|
"loss": 1.5188, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 5.673819742489266e-07, |
|
"loss": 1.5148, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"learning_rate": 5.567596566523604e-07, |
|
"loss": 1.5172, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 5.461373390557942e-07, |
|
"loss": 1.5137, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 18.47, |
|
"learning_rate": 5.355150214592269e-07, |
|
"loss": 1.5179, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 18.49, |
|
"learning_rate": 5.248927038626608e-07, |
|
"loss": 1.5154, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 5.142703862660946e-07, |
|
"loss": 1.5135, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 5.036480686695283e-07, |
|
"loss": 1.5171, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"learning_rate": 4.930257510729611e-07, |
|
"loss": 1.5136, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 18.57, |
|
"learning_rate": 4.824034334763949e-07, |
|
"loss": 1.5153, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 18.57, |
|
"eval_loss": 1.411158561706543, |
|
"eval_runtime": 75.4883, |
|
"eval_samples_per_second": 529.884, |
|
"eval_steps_per_second": 8.279, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 4.717811158798287e-07, |
|
"loss": 1.5157, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 18.62, |
|
"learning_rate": 4.611587982832614e-07, |
|
"loss": 1.5166, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 4.505364806866952e-07, |
|
"loss": 1.5136, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 4.39914163090129e-07, |
|
"loss": 1.5197, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 18.68, |
|
"learning_rate": 4.2929184549356173e-07, |
|
"loss": 1.5174, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"learning_rate": 4.1866952789699556e-07, |
|
"loss": 1.5153, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"learning_rate": 4.080472103004293e-07, |
|
"loss": 1.5167, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 3.974248927038632e-07, |
|
"loss": 1.5149, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 18.77, |
|
"learning_rate": 3.868025751072959e-07, |
|
"loss": 1.5155, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"learning_rate": 3.761802575107297e-07, |
|
"loss": 1.5176, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"learning_rate": 3.655579399141635e-07, |
|
"loss": 1.5194, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 18.83, |
|
"learning_rate": 3.549356223175962e-07, |
|
"loss": 1.5176, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"learning_rate": 3.4431330472103003e-07, |
|
"loss": 1.5181, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"learning_rate": 3.3369098712446387e-07, |
|
"loss": 1.5143, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 18.9, |
|
"learning_rate": 3.2306866952789654e-07, |
|
"loss": 1.5157, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 18.92, |
|
"learning_rate": 3.124463519313304e-07, |
|
"loss": 1.517, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 18.92, |
|
"eval_loss": 1.4098219871520996, |
|
"eval_runtime": 75.6146, |
|
"eval_samples_per_second": 528.998, |
|
"eval_steps_per_second": 8.266, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"learning_rate": 3.018240343347642e-07, |
|
"loss": 1.5181, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"learning_rate": 2.912017167381969e-07, |
|
"loss": 1.5167, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"learning_rate": 2.805793991416307e-07, |
|
"loss": 1.519, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2.699570815450645e-07, |
|
"loss": 1.66, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"learning_rate": 2.5933476394849834e-07, |
|
"loss": 1.5183, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 2.4871244635193106e-07, |
|
"loss": 1.5174, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 19.07, |
|
"learning_rate": 2.3809012875536484e-07, |
|
"loss": 1.5166, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 19.09, |
|
"learning_rate": 2.2746781115879863e-07, |
|
"loss": 1.5178, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"learning_rate": 2.1684549356223138e-07, |
|
"loss": 1.5152, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 2.0622317596566518e-07, |
|
"loss": 1.5159, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 1.95600858369099e-07, |
|
"loss": 1.5153, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 19.18, |
|
"learning_rate": 1.849785407725317e-07, |
|
"loss": 1.5167, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 1.7435622317596553e-07, |
|
"loss": 1.5139, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"learning_rate": 1.637339055793993e-07, |
|
"loss": 1.5161, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"learning_rate": 1.5311158798283315e-07, |
|
"loss": 1.5158, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 19.26, |
|
"learning_rate": 1.4248927038626584e-07, |
|
"loss": 1.5163, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 19.26, |
|
"eval_loss": 1.4113389253616333, |
|
"eval_runtime": 75.6638, |
|
"eval_samples_per_second": 528.654, |
|
"eval_steps_per_second": 8.26, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 19.28, |
|
"learning_rate": 1.3186695278969965e-07, |
|
"loss": 1.5164, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 19.3, |
|
"learning_rate": 1.2124463519313346e-07, |
|
"loss": 1.5164, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 19.33, |
|
"learning_rate": 1.1062231759656619e-07, |
|
"loss": 1.5135, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 1e-07, |
|
"loss": 1.5163, |
|
"step": 9320 |
|
}, |
|
{ |
|
"before_init_mem_cpu": 1174028288, |
|
"before_init_mem_gpu": 0, |
|
"epoch": 19.35, |
|
"init_mem_cpu_alloc_delta": 2783887360, |
|
"init_mem_cpu_peaked_delta": 414412800, |
|
"init_mem_gpu_alloc_delta": 497994240, |
|
"init_mem_gpu_peaked_delta": 512, |
|
"step": 9320, |
|
"total_flos": 2.013804511704069e+19, |
|
"train_loss": 0.2542688299146333, |
|
"train_mem_cpu_alloc_delta": 8443502592, |
|
"train_mem_cpu_peaked_delta": 383119360, |
|
"train_mem_gpu_alloc_delta": 2006973440, |
|
"train_mem_gpu_peaked_delta": 15468135424, |
|
"train_runtime": 73469.8819, |
|
"train_samples_per_second": 1041.258, |
|
"train_steps_per_second": 0.127 |
|
} |
|
], |
|
"max_steps": 9320, |
|
"num_train_epochs": 20, |
|
"total_flos": 2.013804511704069e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|