|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.940507276253302, |
|
"global_step": 520000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9999999999999997e-06, |
|
"loss": 0.8784, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.999999999999999e-06, |
|
"loss": 0.77, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.999999999999999e-06, |
|
"loss": 0.7664, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.1999999999999999e-05, |
|
"loss": 0.7655, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.4999999999999999e-05, |
|
"loss": 0.765, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_runtime": 1.3797, |
|
"eval_samples_per_second": 724.791, |
|
"eval_steps_per_second": 11.597, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.7999999999999997e-05, |
|
"loss": 0.7647, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.7644, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.3999999999999997e-05, |
|
"loss": 0.7638, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.6999999999999996e-05, |
|
"loss": 0.7633, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 0.76, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_runtime": 1.1376, |
|
"eval_samples_per_second": 879.066, |
|
"eval_steps_per_second": 14.065, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.2999999999999996e-05, |
|
"loss": 0.7148, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.5999999999999994e-05, |
|
"loss": 0.6963, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.9e-05, |
|
"loss": 0.6755, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.6516, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 0.6412, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_runtime": 1.1689, |
|
"eval_samples_per_second": 855.472, |
|
"eval_steps_per_second": 13.688, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7999999999999994e-05, |
|
"loss": 0.6348, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5.1e-05, |
|
"loss": 0.6295, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5.399999999999999e-05, |
|
"loss": 0.6224, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 5.6999999999999996e-05, |
|
"loss": 0.6169, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 0.6113, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_runtime": 1.0179, |
|
"eval_samples_per_second": 982.441, |
|
"eval_steps_per_second": 15.719, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.299999999999999e-05, |
|
"loss": 0.6074, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.599999999999999e-05, |
|
"loss": 0.6039, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.9e-05, |
|
"loss": 0.6005, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.199999999999999e-05, |
|
"loss": 0.5968, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.5932, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_runtime": 1.1249, |
|
"eval_samples_per_second": 888.989, |
|
"eval_steps_per_second": 14.224, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.8e-05, |
|
"loss": 0.5912, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.1e-05, |
|
"loss": 0.58, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.4e-05, |
|
"loss": 0.5698, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.699999999999999e-05, |
|
"loss": 0.5639, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 0.5601, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_runtime": 1.0096, |
|
"eval_samples_per_second": 990.512, |
|
"eval_steps_per_second": 15.848, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.3e-05, |
|
"loss": 0.5536, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.599999999999999e-05, |
|
"loss": 0.5496, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.9e-05, |
|
"loss": 0.5458, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.000102, |
|
"loss": 0.5426, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00010499999999999999, |
|
"loss": 0.5394, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_runtime": 1.3102, |
|
"eval_samples_per_second": 763.27, |
|
"eval_steps_per_second": 12.212, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00010799999999999998, |
|
"loss": 0.5345, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00011099999999999999, |
|
"loss": 0.5302, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00011399999999999999, |
|
"loss": 0.527, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.000117, |
|
"loss": 0.5232, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 0.5202, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_runtime": 1.0146, |
|
"eval_samples_per_second": 985.598, |
|
"eval_steps_per_second": 15.77, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00012299999999999998, |
|
"loss": 0.5163, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00012599999999999997, |
|
"loss": 0.5126, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.000129, |
|
"loss": 0.5094, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00013199999999999998, |
|
"loss": 0.5061, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.000135, |
|
"loss": 0.5036, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_runtime": 1.0362, |
|
"eval_samples_per_second": 965.067, |
|
"eval_steps_per_second": 15.441, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.000138, |
|
"loss": 0.4995, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00014099999999999998, |
|
"loss": 0.4967, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00014399999999999998, |
|
"loss": 0.4934, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.000147, |
|
"loss": 0.4898, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00015, |
|
"loss": 0.4863, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_runtime": 1.0374, |
|
"eval_samples_per_second": 963.96, |
|
"eval_steps_per_second": 15.423, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001499996172456075, |
|
"loss": 0.4824, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00014999846898661572, |
|
"loss": 0.4778, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00014999655523558183, |
|
"loss": 0.474, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00014999387601343436, |
|
"loss": 0.4694, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00014999043134947282, |
|
"loss": 0.4651, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_runtime": 1.0465, |
|
"eval_samples_per_second": 955.566, |
|
"eval_steps_per_second": 15.289, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00014998622128136748, |
|
"loss": 0.4608, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.000149981245855159, |
|
"loss": 0.4566, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00014997550512525784, |
|
"loss": 0.4523, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0001499689991544437, |
|
"loss": 0.4483, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00014996172801386482, |
|
"loss": 0.4447, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_runtime": 1.2288, |
|
"eval_samples_per_second": 813.826, |
|
"eval_steps_per_second": 13.021, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00014995369178303722, |
|
"loss": 0.4408, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0001499448905498439, |
|
"loss": 0.4381, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00014993532441053364, |
|
"loss": 0.434, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0001499249934697203, |
|
"loss": 0.4316, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001499138978403813, |
|
"loss": 0.4275, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_runtime": 1.0345, |
|
"eval_samples_per_second": 966.655, |
|
"eval_steps_per_second": 15.466, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00014990203764385677, |
|
"loss": 0.425, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00014988941300984784, |
|
"loss": 0.422, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0001498760240764155, |
|
"loss": 0.4191, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.000149861870989979, |
|
"loss": 0.4164, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0001498469539053142, |
|
"loss": 0.4138, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_runtime": 1.1341, |
|
"eval_samples_per_second": 881.784, |
|
"eval_steps_per_second": 14.109, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00014983127298555198, |
|
"loss": 0.4114, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00014981482840217632, |
|
"loss": 0.4086, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00014979762033502262, |
|
"loss": 0.4066, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00014977964897227547, |
|
"loss": 0.4042, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00014976091451046687, |
|
"loss": 0.402, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_runtime": 1.0331, |
|
"eval_samples_per_second": 967.957, |
|
"eval_steps_per_second": 15.487, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00014974141715447386, |
|
"loss": 0.3999, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00014972115711751644, |
|
"loss": 0.398, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00014970013462115505, |
|
"loss": 0.3971, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00014967834989528843, |
|
"loss": 0.3942, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00014965580317815078, |
|
"loss": 0.3926, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_runtime": 1.084, |
|
"eval_samples_per_second": 922.521, |
|
"eval_steps_per_second": 14.76, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00014963249471630944, |
|
"loss": 0.3906, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.000149608424764662, |
|
"loss": 0.391, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0001495835935864336, |
|
"loss": 0.3875, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00014955800145317397, |
|
"loss": 0.3861, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00014953164864475466, |
|
"loss": 0.3844, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_runtime": 1.0992, |
|
"eval_samples_per_second": 909.734, |
|
"eval_steps_per_second": 14.556, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0001495045354493657, |
|
"loss": 0.3829, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00014947666216351272, |
|
"loss": 0.3815, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00014944802909201344, |
|
"loss": 0.38, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00014941863654799456, |
|
"loss": 0.3789, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00014938848485288825, |
|
"loss": 0.3785, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_runtime": 0.9266, |
|
"eval_samples_per_second": 1079.167, |
|
"eval_steps_per_second": 17.267, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0001493575743364286, |
|
"loss": 0.3766, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00014932590533664808, |
|
"loss": 0.3745, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0001492934781998738, |
|
"loss": 0.3741, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0001492602932807237, |
|
"loss": 0.3729, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00014922635094210277, |
|
"loss": 0.3709, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_runtime": 0.9895, |
|
"eval_samples_per_second": 1010.579, |
|
"eval_steps_per_second": 16.169, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.000149191651555199, |
|
"loss": 0.3699, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0001491561954994793, |
|
"loss": 0.3688, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00014911998316268537, |
|
"loss": 0.3678, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00014908301494082963, |
|
"loss": 0.3666, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014904529123819054, |
|
"loss": 0.3654, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_runtime": 1.0046, |
|
"eval_samples_per_second": 995.424, |
|
"eval_steps_per_second": 15.927, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014900681246730852, |
|
"loss": 0.3643, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014896757904898125, |
|
"loss": 0.3646, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014892759141225904, |
|
"loss": 0.3628, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014888684999444035, |
|
"loss": 0.3616, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014884535524106675, |
|
"loss": 0.3604, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_runtime": 1.0499, |
|
"eval_samples_per_second": 952.499, |
|
"eval_steps_per_second": 15.24, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00014880310760591824, |
|
"loss": 0.3594, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0001487601075510082, |
|
"loss": 0.3597, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0001487163555465783, |
|
"loss": 0.3583, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0001486718520710935, |
|
"loss": 0.3583, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00014862659761123663, |
|
"loss": 0.3558, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_runtime": 1.0153, |
|
"eval_samples_per_second": 984.91, |
|
"eval_steps_per_second": 15.759, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00014858059266190327, |
|
"loss": 0.3552, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00014853383772619612, |
|
"loss": 0.3544, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00014848633331541967, |
|
"loss": 0.3537, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001484380799490746, |
|
"loss": 0.3524, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00014838907815485194, |
|
"loss": 0.3519, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_runtime": 1.003, |
|
"eval_samples_per_second": 997.001, |
|
"eval_steps_per_second": 15.952, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00014833932846862748, |
|
"loss": 0.3511, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00014828883143445582, |
|
"loss": 0.3502, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0001482375876045644, |
|
"loss": 0.3493, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0001481855975393476, |
|
"loss": 0.3489, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0001481328618073604, |
|
"loss": 0.3482, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_runtime": 1.0596, |
|
"eval_samples_per_second": 943.744, |
|
"eval_steps_per_second": 15.1, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0001480793809853123, |
|
"loss": 0.3478, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00014802515565806107, |
|
"loss": 0.3468, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00014797018641860612, |
|
"loss": 0.346, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0001479144738680823, |
|
"loss": 0.3474, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00014785801861575312, |
|
"loss": 0.3447, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_runtime": 0.9375, |
|
"eval_samples_per_second": 1066.699, |
|
"eval_steps_per_second": 17.067, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00014780082127900416, |
|
"loss": 0.3439, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00014774288248333635, |
|
"loss": 0.3436, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00014768420286235908, |
|
"loss": 0.3429, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00014762478305778328, |
|
"loss": 0.3422, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001475646237194144, |
|
"loss": 0.3414, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_runtime": 1.0085, |
|
"eval_samples_per_second": 991.553, |
|
"eval_steps_per_second": 15.865, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00014750372550514533, |
|
"loss": 0.3409, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0001474420890809492, |
|
"loss": 0.3401, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00014737971512087202, |
|
"loss": 0.3396, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00014731660430702552, |
|
"loss": 0.339, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00014725275732957937, |
|
"loss": 0.3402, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_runtime": 1.1005, |
|
"eval_samples_per_second": 908.652, |
|
"eval_steps_per_second": 14.538, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00014718817488675387, |
|
"loss": 0.3379, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00014712285768481235, |
|
"loss": 0.3371, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00014705680643805323, |
|
"loss": 0.3368, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00014699002186880232, |
|
"loss": 0.3363, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00014692250470740503, |
|
"loss": 0.3361, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_runtime": 1.0104, |
|
"eval_samples_per_second": 989.716, |
|
"eval_steps_per_second": 15.835, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00014685425569221819, |
|
"loss": 0.3353, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.00014678527556960207, |
|
"loss": 0.3346, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0001467155650939123, |
|
"loss": 0.3342, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00014664512502749141, |
|
"loss": 0.3338, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00014657395614066075, |
|
"loss": 0.3334, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_runtime": 1.0369, |
|
"eval_samples_per_second": 964.439, |
|
"eval_steps_per_second": 15.431, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0001465020592117118, |
|
"loss": 0.3327, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0001464294350268979, |
|
"loss": 0.3324, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00014635608438042546, |
|
"loss": 0.3319, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00014628200807444543, |
|
"loss": 0.3313, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.0001462072069190444, |
|
"loss": 0.3307, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_runtime": 1.0431, |
|
"eval_samples_per_second": 958.687, |
|
"eval_steps_per_second": 15.339, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00014613168173223585, |
|
"loss": 0.3308, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.00014605543333995113, |
|
"loss": 0.3302, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00014597846257603038, |
|
"loss": 0.3294, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0001459007702822136, |
|
"loss": 0.329, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.00014582235730813128, |
|
"loss": 0.3283, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_runtime": 1.0629, |
|
"eval_samples_per_second": 940.817, |
|
"eval_steps_per_second": 15.053, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00014574322451129507, |
|
"loss": 0.3281, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00014566337275708863, |
|
"loss": 0.328, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0001455828029187579, |
|
"loss": 0.3272, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00014550151587740178, |
|
"loss": 0.3269, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00014541951252196225, |
|
"loss": 0.3265, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_runtime": 1.0199, |
|
"eval_samples_per_second": 980.452, |
|
"eval_steps_per_second": 15.687, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.00014533679374921493, |
|
"loss": 0.3259, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00014525336046375905, |
|
"loss": 0.3254, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00014516921357800766, |
|
"loss": 0.3251, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00014508435401217759, |
|
"loss": 0.3244, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.00014499878269427948, |
|
"loss": 0.3243, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_runtime": 1.0655, |
|
"eval_samples_per_second": 938.486, |
|
"eval_steps_per_second": 15.016, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00014491250056010758, |
|
"loss": 0.3236, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.00014482550855322943, |
|
"loss": 0.3233, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.0001447378076249757, |
|
"loss": 0.3231, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.00014464939873442973, |
|
"loss": 0.3228, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.00014456028284841693, |
|
"loss": 0.3221, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_runtime": 1.1756, |
|
"eval_samples_per_second": 850.656, |
|
"eval_steps_per_second": 13.611, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.00014447046094149437, |
|
"loss": 0.3221, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00014437993399594003, |
|
"loss": 0.3216, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0001442887030017421, |
|
"loss": 0.3217, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.00014419676895658807, |
|
"loss": 0.3208, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.000144104132865854, |
|
"loss": 0.3207, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_runtime": 1.0679, |
|
"eval_samples_per_second": 936.423, |
|
"eval_steps_per_second": 14.983, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.0001440107957425933, |
|
"loss": 0.3203, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0001439167586075258, |
|
"loss": 0.3201, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0001438220224890265, |
|
"loss": 0.3191, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00014372658842311449, |
|
"loss": 0.3195, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.00014363045745344137, |
|
"loss": 0.3191, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_runtime": 1.0169, |
|
"eval_samples_per_second": 983.42, |
|
"eval_steps_per_second": 15.735, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.00014353363063128005, |
|
"loss": 0.3183, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.0001434361090155131, |
|
"loss": 0.3177, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.00014333789367262136, |
|
"loss": 0.3178, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.00014323898567667202, |
|
"loss": 0.3177, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.00014313938610930712, |
|
"loss": 0.3171, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_runtime": 1.0441, |
|
"eval_samples_per_second": 957.721, |
|
"eval_steps_per_second": 15.324, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.00014303909605973154, |
|
"loss": 0.3167, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.0001429381166247012, |
|
"loss": 0.3168, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00014283644890851103, |
|
"loss": 0.3164, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.00014273409402298291, |
|
"loss": 0.3161, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.00014263105308745343, |
|
"loss": 0.3155, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_runtime": 1.0119, |
|
"eval_samples_per_second": 988.212, |
|
"eval_steps_per_second": 15.811, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.00014252732722876176, |
|
"loss": 0.3149, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.0001424229175812373, |
|
"loss": 0.3149, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.00014231782528668717, |
|
"loss": 0.3146, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00014221205149438394, |
|
"loss": 0.3145, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.0001421055973610528, |
|
"loss": 0.3138, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_runtime": 1.0908, |
|
"eval_samples_per_second": 916.734, |
|
"eval_steps_per_second": 14.668, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.00014199846405085913, |
|
"loss": 0.3137, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.00014189065273539564, |
|
"loss": 0.3135, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.00014178216459366958, |
|
"loss": 0.3137, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.00014167300081208988, |
|
"loss": 0.3131, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.00014156316258445421, |
|
"loss": 0.3125, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_runtime": 1.1346, |
|
"eval_samples_per_second": 881.333, |
|
"eval_steps_per_second": 14.101, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.00014145265111193583, |
|
"loss": 0.3121, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.00014134146760307043, |
|
"loss": 0.3122, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.00014122961327374313, |
|
"loss": 0.3131, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.0001411170893471749, |
|
"loss": 0.3116, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.00014100389705390938, |
|
"loss": 0.311, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_runtime": 1.1239, |
|
"eval_samples_per_second": 889.731, |
|
"eval_steps_per_second": 14.236, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.0001408900376317994, |
|
"loss": 0.311, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.0001407755123259933, |
|
"loss": 0.3108, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.00014066032238892152, |
|
"loss": 0.3104, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00014054446908028272, |
|
"loss": 0.3102, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.00014042795366703018, |
|
"loss": 0.3097, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_runtime": 1.0233, |
|
"eval_samples_per_second": 977.233, |
|
"eval_steps_per_second": 15.636, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0001403107774233577, |
|
"loss": 0.3098, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.00014019294163068597, |
|
"loss": 0.3093, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.00014007444757764835, |
|
"loss": 0.3093, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.0001399552965600768, |
|
"loss": 0.3088, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.0001398354898809877, |
|
"loss": 0.3089, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_runtime": 1.0098, |
|
"eval_samples_per_second": 990.287, |
|
"eval_steps_per_second": 15.845, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0001397150288505678, |
|
"loss": 0.3315, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.00013959391478615959, |
|
"loss": 0.628, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.00013947214901224706, |
|
"loss": 0.3112, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.0001393497328604412, |
|
"loss": 0.3094, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.00013922666766946545, |
|
"loss": 0.3082, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"eval_runtime": 1.0751, |
|
"eval_samples_per_second": 930.139, |
|
"eval_steps_per_second": 14.882, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.00013910295478514106, |
|
"loss": 0.3079, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.0001389785955603722, |
|
"loss": 0.3077, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.00013885359135513154, |
|
"loss": 0.3073, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.000138727943536445, |
|
"loss": 0.3064, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.00013860165347837698, |
|
"loss": 0.3066, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_runtime": 1.0901, |
|
"eval_samples_per_second": 917.309, |
|
"eval_steps_per_second": 14.677, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.00013847472256201535, |
|
"loss": 0.306, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00013834715217545625, |
|
"loss": 0.3058, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.000138218943713789, |
|
"loss": 0.3056, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.0001380900985790808, |
|
"loss": 0.3054, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.00013796061818036138, |
|
"loss": 0.3051, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_runtime": 1.0217, |
|
"eval_samples_per_second": 978.715, |
|
"eval_steps_per_second": 15.659, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.00013783050393360768, |
|
"loss": 0.3048, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0001376997572617282, |
|
"loss": 0.305, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.00013756837959454766, |
|
"loss": 0.3042, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0001374363723687911, |
|
"loss": 0.3042, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.00013730373702806846, |
|
"loss": 0.304, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_runtime": 1.0392, |
|
"eval_samples_per_second": 962.319, |
|
"eval_steps_per_second": 15.397, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.00013717047502285855, |
|
"loss": 0.3036, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0001370365878104933, |
|
"loss": 0.3036, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.00013690207685514185, |
|
"loss": 0.3031, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.0001367669436277944, |
|
"loss": 0.3032, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.0001366311896062463, |
|
"loss": 0.3036, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_runtime": 1.0097, |
|
"eval_samples_per_second": 990.396, |
|
"eval_steps_per_second": 15.846, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.00013649481627508181, |
|
"loss": 0.3031, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.0001363578251256578, |
|
"loss": 0.3023, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.00013622021765608754, |
|
"loss": 0.3022, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.00013608199537122425, |
|
"loss": 0.3017, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.0001359431597826447, |
|
"loss": 0.3019, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_runtime": 1.0744, |
|
"eval_samples_per_second": 930.717, |
|
"eval_steps_per_second": 14.891, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.0001358037124086327, |
|
"loss": 0.3015, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00013566365477416233, |
|
"loss": 0.3018, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.00013552298841088144, |
|
"loss": 0.3013, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.00013538171485709486, |
|
"loss": 0.3006, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.00013523983565774753, |
|
"loss": 0.3008, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"eval_runtime": 1.0168, |
|
"eval_samples_per_second": 983.434, |
|
"eval_steps_per_second": 15.735, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.00013509735236440766, |
|
"loss": 0.3003, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.00013495426653524972, |
|
"loss": 0.3, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.00013481057973503742, |
|
"loss": 0.3, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.00013466629353510651, |
|
"loss": 0.2997, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.00013452140951334787, |
|
"loss": 0.2995, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_runtime": 0.8192, |
|
"eval_samples_per_second": 1220.744, |
|
"eval_steps_per_second": 19.532, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.00013437592925418985, |
|
"loss": 0.2996, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.00013422985434858133, |
|
"loss": 0.299, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.00013408318639397405, |
|
"loss": 0.2987, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.00013393592699430525, |
|
"loss": 0.2986, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.00013378807775998012, |
|
"loss": 0.2984, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_runtime": 1.0461, |
|
"eval_samples_per_second": 955.963, |
|
"eval_steps_per_second": 15.295, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.00013363964030785422, |
|
"loss": 0.2983, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00013349061626121578, |
|
"loss": 0.2982, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.00013334100724976783, |
|
"loss": 0.2977, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.0001331908149096106, |
|
"loss": 0.2976, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.00013304004088322342, |
|
"loss": 0.2978, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_runtime": 1.0225, |
|
"eval_samples_per_second": 978.001, |
|
"eval_steps_per_second": 15.648, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.00013288868681944692, |
|
"loss": 0.2971, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.00013273675437346487, |
|
"loss": 0.2972, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.00013258424520678618, |
|
"loss": 0.2969, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.00013243116098722663, |
|
"loss": 0.2968, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.00013227750338889077, |
|
"loss": 0.2966, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_runtime": 1.1084, |
|
"eval_samples_per_second": 902.192, |
|
"eval_steps_per_second": 14.435, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 0.00013212327409215343, |
|
"loss": 0.296, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.0001319684747836415, |
|
"loss": 0.2958, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.0001318131071562154, |
|
"loss": 0.2961, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.00013165717290895067, |
|
"loss": 0.2957, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0001315006737471192, |
|
"loss": 0.2955, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_runtime": 1.0552, |
|
"eval_samples_per_second": 947.654, |
|
"eval_steps_per_second": 15.162, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.0001313436113821708, |
|
"loss": 0.2952, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.00013118598753171425, |
|
"loss": 0.2951, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.0001310278039194988, |
|
"loss": 0.2951, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.00013086906227539506, |
|
"loss": 0.2952, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.00013070976433537623, |
|
"loss": 0.2946, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_runtime": 1.0293, |
|
"eval_samples_per_second": 971.532, |
|
"eval_steps_per_second": 15.545, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00013054991184149905, |
|
"loss": 0.2946, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.00013038950654188476, |
|
"loss": 0.2942, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.00013022855019070005, |
|
"loss": 0.2941, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.0001300670445481378, |
|
"loss": 0.2937, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.0001299049913803978, |
|
"loss": 0.2937, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_runtime": 1.0469, |
|
"eval_samples_per_second": 955.197, |
|
"eval_steps_per_second": 15.283, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.00012974239245966754, |
|
"loss": 0.2934, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.0001295792495641028, |
|
"loss": 0.2962, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.00012941556447780813, |
|
"loss": 0.2931, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.0001292513389908174, |
|
"loss": 0.2931, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.0001290865748990742, |
|
"loss": 0.2932, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_runtime": 1.0143, |
|
"eval_samples_per_second": 985.898, |
|
"eval_steps_per_second": 15.774, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.00012892127400441228, |
|
"loss": 0.2923, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.00012875543811453576, |
|
"loss": 0.2919, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.0001285890690429993, |
|
"loss": 0.2931, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.00012842216860918846, |
|
"loss": 0.292, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0001282547386382996, |
|
"loss": 0.2914, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_runtime": 1.0329, |
|
"eval_samples_per_second": 968.135, |
|
"eval_steps_per_second": 15.49, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.0001280867809613201, |
|
"loss": 0.2919, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.0001279182974150082, |
|
"loss": 0.2915, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.00012774928984187297, |
|
"loss": 0.2914, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.00012757976009015413, |
|
"loss": 0.2908, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.0001274097100138019, |
|
"loss": 0.2909, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_runtime": 1.0054, |
|
"eval_samples_per_second": 994.612, |
|
"eval_steps_per_second": 15.914, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.00012723914147245663, |
|
"loss": 0.2906, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.00012706805633142863, |
|
"loss": 0.2906, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.00012689645646167755, |
|
"loss": 0.2902, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.00012672434373979207, |
|
"loss": 0.291, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.00012655172004796936, |
|
"loss": 0.2899, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"eval_runtime": 1.0975, |
|
"eval_samples_per_second": 911.158, |
|
"eval_steps_per_second": 14.579, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 0.00012637858727399448, |
|
"loss": 0.2898, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.00012620494731121966, |
|
"loss": 0.2896, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.00012603080205854372, |
|
"loss": 0.2894, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.00012585615342039126, |
|
"loss": 0.2894, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.0001256810033066918, |
|
"loss": 0.2894, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"eval_runtime": 1.0481, |
|
"eval_samples_per_second": 954.11, |
|
"eval_steps_per_second": 15.266, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.0001255053536328589, |
|
"loss": 0.2887, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.0001253292063197693, |
|
"loss": 0.2887, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.0001251525632937418, |
|
"loss": 0.2886, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.00012497542648651615, |
|
"loss": 0.2887, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.00012479779783523216, |
|
"loss": 0.2883, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_runtime": 1.0333, |
|
"eval_samples_per_second": 967.804, |
|
"eval_steps_per_second": 15.485, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 0.00012461967928240828, |
|
"loss": 0.2883, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.00012444107277592047, |
|
"loss": 0.2877, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.0001242619802689809, |
|
"loss": 0.2879, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.00012408240372011647, |
|
"loss": 0.2876, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.0001239023450931476, |
|
"loss": 0.2874, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"eval_runtime": 1.04, |
|
"eval_samples_per_second": 961.537, |
|
"eval_steps_per_second": 15.385, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.00012372180635716656, |
|
"loss": 0.2874, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.00012354078948651604, |
|
"loss": 0.2873, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.00012335929646076758, |
|
"loss": 0.2868, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.00012317732926469976, |
|
"loss": 0.2871, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.00012299488988827675, |
|
"loss": 0.2869, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_runtime": 1.3977, |
|
"eval_samples_per_second": 715.452, |
|
"eval_steps_per_second": 11.447, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.0001228119803266263, |
|
"loss": 0.2867, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0001226286025800181, |
|
"loss": 0.2866, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.00012244475865384177, |
|
"loss": 0.2862, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.00012226045055858505, |
|
"loss": 0.2858, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.00012207568030981174, |
|
"loss": 0.2859, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"eval_runtime": 1.1314, |
|
"eval_samples_per_second": 883.862, |
|
"eval_steps_per_second": 14.142, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.00012189044992813972, |
|
"loss": 0.2858, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.0001217047614392187, |
|
"loss": 0.2857, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.00012151861687370828, |
|
"loss": 0.2857, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.00012133201826725558, |
|
"loss": 0.2852, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.0001211449676604731, |
|
"loss": 0.2853, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_runtime": 1.3419, |
|
"eval_samples_per_second": 745.216, |
|
"eval_steps_per_second": 11.923, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.00012095746709891632, |
|
"loss": 0.2852, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.00012076951863306127, |
|
"loss": 0.285, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.0001205811243182823, |
|
"loss": 0.2848, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.00012039228621482949, |
|
"loss": 0.2858, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.00012020300638780604, |
|
"loss": 0.2845, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_runtime": 1.2559, |
|
"eval_samples_per_second": 796.26, |
|
"eval_steps_per_second": 12.74, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.00012001328690714582, |
|
"loss": 0.284, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.00011982312984759068, |
|
"loss": 0.2845, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.00011963253728866778, |
|
"loss": 0.2841, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.00011944151131466675, |
|
"loss": 0.284, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.00011925005401461709, |
|
"loss": 0.2836, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"eval_runtime": 1.1037, |
|
"eval_samples_per_second": 906.031, |
|
"eval_steps_per_second": 14.496, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.00011905816748226513, |
|
"loss": 0.2834, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 0.00011886585381605125, |
|
"loss": 0.2835, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00011867311511908693, |
|
"loss": 0.2832, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.00011847995349913162, |
|
"loss": 0.2828, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.00011828637106856989, |
|
"loss": 0.2828, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"eval_runtime": 1.0295, |
|
"eval_samples_per_second": 971.32, |
|
"eval_steps_per_second": 15.541, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.00011809236994438816, |
|
"loss": 0.2831, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.00011789795224815164, |
|
"loss": 0.2827, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.00011770312010598116, |
|
"loss": 0.282, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.00011750787564852973, |
|
"loss": 0.2822, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.00011731222101095955, |
|
"loss": 0.2825, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_runtime": 1.0697, |
|
"eval_samples_per_second": 934.885, |
|
"eval_steps_per_second": 14.958, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.00011711615833291833, |
|
"loss": 0.2822, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.0001169196897585161, |
|
"loss": 0.2824, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.00011672281743630175, |
|
"loss": 0.2818, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.0001165255435192394, |
|
"loss": 0.2815, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 0.00011632787016468506, |
|
"loss": 0.2819, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"eval_runtime": 1.1008, |
|
"eval_samples_per_second": 908.433, |
|
"eval_steps_per_second": 14.535, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0001161297995343628, |
|
"loss": 0.2815, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.00011593133379434138, |
|
"loss": 0.2815, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 0.00011573247511501028, |
|
"loss": 0.2811, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.00011553322567105619, |
|
"loss": 0.2807, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.00011533358764143905, |
|
"loss": 0.2808, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_runtime": 1.1301, |
|
"eval_samples_per_second": 884.842, |
|
"eval_steps_per_second": 14.157, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.00011513356320936841, |
|
"loss": 0.2808, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.00011493315456227943, |
|
"loss": 0.2817, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.00011473236389180894, |
|
"loss": 0.2803, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.00011453119339377154, |
|
"loss": 0.2803, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.00011432964526813558, |
|
"loss": 0.2817, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_runtime": 1.2187, |
|
"eval_samples_per_second": 820.56, |
|
"eval_steps_per_second": 13.129, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.00011412772171899904, |
|
"loss": 0.2819, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 0.00011392542495456556, |
|
"loss": 0.28, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.00011372275718712006, |
|
"loss": 0.2797, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.00011351972063300484, |
|
"loss": 0.2797, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 0.00011331631751259515, |
|
"loss": 0.2801, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_runtime": 1.0146, |
|
"eval_samples_per_second": 985.631, |
|
"eval_steps_per_second": 15.77, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.00011311255005027487, |
|
"loss": 0.2789, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.00011290842047441232, |
|
"loss": 0.2791, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.00011270393101733585, |
|
"loss": 0.279, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.00011249908391530946, |
|
"loss": 0.279, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.00011229388140850814, |
|
"loss": 0.279, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"eval_runtime": 1.2375, |
|
"eval_samples_per_second": 808.112, |
|
"eval_steps_per_second": 12.93, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.00011208832574099368, |
|
"loss": 0.2788, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.00011188241916068993, |
|
"loss": 0.2785, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.00011167616391935826, |
|
"loss": 0.2783, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.00011146956227257293, |
|
"loss": 0.2785, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 0.00011126261647969645, |
|
"loss": 0.2781, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_runtime": 1.0191, |
|
"eval_samples_per_second": 981.273, |
|
"eval_steps_per_second": 15.7, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00011105532880385487, |
|
"loss": 0.2782, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.00011084770151191299, |
|
"loss": 0.2782, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.00011063973687444962, |
|
"loss": 0.2779, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.00011043143716573272, |
|
"loss": 0.2774, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 0.00011022280466369448, |
|
"loss": 0.2776, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_runtime": 1.0236, |
|
"eval_samples_per_second": 976.954, |
|
"eval_steps_per_second": 15.631, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.00011001384164990662, |
|
"loss": 0.2775, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.00010980455040955506, |
|
"loss": 0.2769, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00010959493323141538, |
|
"loss": 0.2773, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.00010938499240782739, |
|
"loss": 0.277, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 0.00010917473023467032, |
|
"loss": 0.277, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_runtime": 1.0769, |
|
"eval_samples_per_second": 928.59, |
|
"eval_steps_per_second": 14.857, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.00010896414901133761, |
|
"loss": 0.2766, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.00010875325104071177, |
|
"loss": 0.2768, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.00010854203862913927, |
|
"loss": 0.2765, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.00010833051408640509, |
|
"loss": 0.2763, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.00010811867972570786, |
|
"loss": 0.2767, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"eval_runtime": 1.1081, |
|
"eval_samples_per_second": 902.417, |
|
"eval_steps_per_second": 14.439, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.00010790653786363416, |
|
"loss": 0.2759, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.00010769409082013337, |
|
"loss": 0.2759, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.00010748134091849238, |
|
"loss": 0.2757, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.00010726829048531, |
|
"loss": 0.2762, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.00010705494185047165, |
|
"loss": 0.276, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"eval_runtime": 1.1676, |
|
"eval_samples_per_second": 856.476, |
|
"eval_steps_per_second": 13.704, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.0001068412973471238, |
|
"loss": 0.2754, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.00010662735931164853, |
|
"loss": 0.2755, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.0001064131300836379, |
|
"loss": 0.2752, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.0001061986120058684, |
|
"loss": 0.2748, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.00010598380742427543, |
|
"loss": 0.2749, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"eval_runtime": 1.0797, |
|
"eval_samples_per_second": 926.22, |
|
"eval_steps_per_second": 14.82, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.00010576871868792746, |
|
"loss": 0.275, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.0001055533481490004, |
|
"loss": 0.2746, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.000105337698162752, |
|
"loss": 0.2741, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.00010512177108749594, |
|
"loss": 0.2746, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.00010490556928457616, |
|
"loss": 0.2743, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"eval_runtime": 1.0107, |
|
"eval_samples_per_second": 989.389, |
|
"eval_steps_per_second": 15.83, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.00010468909511834088, |
|
"loss": 0.2741, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.00010447235095611692, |
|
"loss": 0.2738, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.00010425533916818376, |
|
"loss": 0.2738, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00010403806212774747, |
|
"loss": 0.2742, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.000103820522210915, |
|
"loss": 0.2737, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"eval_runtime": 1.055, |
|
"eval_samples_per_second": 947.861, |
|
"eval_steps_per_second": 15.166, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.00010360272179666802, |
|
"loss": 0.2742, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 0.00010338466326683697, |
|
"loss": 0.2733, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 0.00010316634900607497, |
|
"loss": 0.2737, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.00010294778140183182, |
|
"loss": 0.2732, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 0.00010272896284432785, |
|
"loss": 0.2733, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"eval_runtime": 1.0035, |
|
"eval_samples_per_second": 996.544, |
|
"eval_steps_per_second": 15.945, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.00010250989572652766, |
|
"loss": 0.2728, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00010229058244411427, |
|
"loss": 0.2729, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.00010207102539546251, |
|
"loss": 0.2728, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.00010185122698161311, |
|
"loss": 0.2726, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.00010163118960624632, |
|
"loss": 0.2725, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"eval_runtime": 1.0983, |
|
"eval_samples_per_second": 910.508, |
|
"eval_steps_per_second": 14.568, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.00010141091567565561, |
|
"loss": 0.2727, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.00010119040759872142, |
|
"loss": 0.2725, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.00010096966778688472, |
|
"loss": 0.2721, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.00010074869865412074, |
|
"loss": 0.272, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.00010052750261691254, |
|
"loss": 0.2721, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"eval_runtime": 0.9895, |
|
"eval_samples_per_second": 1010.612, |
|
"eval_steps_per_second": 16.17, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.0001003060820942245, |
|
"loss": 0.2716, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.00010008443950747599, |
|
"loss": 0.2716, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 9.986257728051483e-05, |
|
"loss": 0.2717, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 9.964049783959082e-05, |
|
"loss": 0.2716, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 9.94182036133291e-05, |
|
"loss": 0.2715, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_runtime": 1.0245, |
|
"eval_samples_per_second": 976.131, |
|
"eval_steps_per_second": 15.618, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 9.919569703270376e-05, |
|
"loss": 0.2716, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 9.89729805310111e-05, |
|
"loss": 0.2711, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 9.875005654384307e-05, |
|
"loss": 0.2712, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 9.852692750906071e-05, |
|
"loss": 0.2717, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 9.830359586676737e-05, |
|
"loss": 0.2722, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_runtime": 1.1145, |
|
"eval_samples_per_second": 897.295, |
|
"eval_steps_per_second": 14.357, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 9.808006405928215e-05, |
|
"loss": 0.2703, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 9.785633453111306e-05, |
|
"loss": 0.2705, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 9.763240972893037e-05, |
|
"loss": 0.27, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 9.740829210153984e-05, |
|
"loss": 0.2703, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 9.718398409985593e-05, |
|
"loss": 0.27, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"eval_runtime": 0.9938, |
|
"eval_samples_per_second": 1006.215, |
|
"eval_steps_per_second": 16.099, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 9.695948817687504e-05, |
|
"loss": 0.2699, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 9.673480678764858e-05, |
|
"loss": 0.2698, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 9.650994238925626e-05, |
|
"loss": 0.2699, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 9.628489744077911e-05, |
|
"loss": 0.2696, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 9.60596744032726e-05, |
|
"loss": 0.2699, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"eval_runtime": 1.0008, |
|
"eval_samples_per_second": 999.165, |
|
"eval_steps_per_second": 15.987, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 9.583427573973982e-05, |
|
"loss": 0.2696, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 9.560870391510441e-05, |
|
"loss": 0.2695, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 9.538296139618371e-05, |
|
"loss": 0.2691, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 9.515705065166178e-05, |
|
"loss": 0.2693, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 9.493097415206228e-05, |
|
"loss": 0.2688, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"eval_runtime": 1.0225, |
|
"eval_samples_per_second": 978.034, |
|
"eval_steps_per_second": 15.649, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 9.47047343697216e-05, |
|
"loss": 0.269, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 9.447833377876176e-05, |
|
"loss": 0.269, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 9.425177485506336e-05, |
|
"loss": 0.2688, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 9.402506007623848e-05, |
|
"loss": 0.269, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 9.379819192160362e-05, |
|
"loss": 0.2692, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_runtime": 1.1401, |
|
"eval_samples_per_second": 877.142, |
|
"eval_steps_per_second": 14.034, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 9.357117287215258e-05, |
|
"loss": 0.2682, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 9.334400541052928e-05, |
|
"loss": 0.2683, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 9.311669202100073e-05, |
|
"loss": 0.2693, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 9.288923518942968e-05, |
|
"loss": 0.2683, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 9.26616374032477e-05, |
|
"loss": 0.2677, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"eval_runtime": 0.8954, |
|
"eval_samples_per_second": 1116.774, |
|
"eval_steps_per_second": 17.868, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 9.243390115142761e-05, |
|
"loss": 0.2678, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 9.220602892445661e-05, |
|
"loss": 0.2678, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 9.197802321430889e-05, |
|
"loss": 0.2679, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 9.174988651441833e-05, |
|
"loss": 0.2673, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 9.152162131965137e-05, |
|
"loss": 0.2675, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_runtime": 1.0353, |
|
"eval_samples_per_second": 965.922, |
|
"eval_steps_per_second": 15.455, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 9.129323012627956e-05, |
|
"loss": 0.2693, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 9.106471543195244e-05, |
|
"loss": 0.2675, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 9.08360797356701e-05, |
|
"loss": 0.2679, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 9.060732553775582e-05, |
|
"loss": 0.2672, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 9.037845533982892e-05, |
|
"loss": 0.267, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"eval_runtime": 1.0347, |
|
"eval_samples_per_second": 966.468, |
|
"eval_steps_per_second": 15.463, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 9.014947164477721e-05, |
|
"loss": 0.2663, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 8.992037695672967e-05, |
|
"loss": 0.267, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 8.969117378102912e-05, |
|
"loss": 0.2665, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 8.946186462420478e-05, |
|
"loss": 0.2662, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 8.923245199394482e-05, |
|
"loss": 0.2662, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_runtime": 1.0079, |
|
"eval_samples_per_second": 992.191, |
|
"eval_steps_per_second": 15.875, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 8.900293839906903e-05, |
|
"loss": 0.2664, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 8.87733263495013e-05, |
|
"loss": 0.2658, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 8.85436183562422e-05, |
|
"loss": 0.2659, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 8.83138169313416e-05, |
|
"loss": 0.2663, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 8.808392458787103e-05, |
|
"loss": 0.2656, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_runtime": 1.075, |
|
"eval_samples_per_second": 930.213, |
|
"eval_steps_per_second": 14.883, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 8.78539438398963e-05, |
|
"loss": 0.2655, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 8.762387720245008e-05, |
|
"loss": 0.2656, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 8.73937271915042e-05, |
|
"loss": 0.2655, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 8.716349632394235e-05, |
|
"loss": 0.2652, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 8.69331871175324e-05, |
|
"loss": 0.2651, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"eval_runtime": 1.1978, |
|
"eval_samples_per_second": 834.871, |
|
"eval_steps_per_second": 13.358, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 8.67028020908989e-05, |
|
"loss": 0.2647, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 8.647234376349565e-05, |
|
"loss": 0.2653, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 8.624181465557794e-05, |
|
"loss": 0.2649, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 8.601121728817519e-05, |
|
"loss": 0.2647, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 8.578055418306327e-05, |
|
"loss": 0.2654, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"eval_runtime": 1.1022, |
|
"eval_samples_per_second": 907.298, |
|
"eval_steps_per_second": 14.517, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 8.55498278627369e-05, |
|
"loss": 0.2646, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 8.531904085038221e-05, |
|
"loss": 0.2646, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 8.508819566984897e-05, |
|
"loss": 0.2641, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 8.485729484562307e-05, |
|
"loss": 0.2641, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 8.462634090279895e-05, |
|
"loss": 0.264, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_runtime": 1.0129, |
|
"eval_samples_per_second": 987.309, |
|
"eval_steps_per_second": 15.797, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 8.439533636705194e-05, |
|
"loss": 0.2635, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 8.416428376461061e-05, |
|
"loss": 0.2644, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 8.393318562222916e-05, |
|
"loss": 0.2642, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 8.370204446715997e-05, |
|
"loss": 0.2638, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 8.347086282712556e-05, |
|
"loss": 0.2637, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"eval_runtime": 1.1071, |
|
"eval_samples_per_second": 903.278, |
|
"eval_steps_per_second": 14.452, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 8.323964323029136e-05, |
|
"loss": 0.2633, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 8.300838820523784e-05, |
|
"loss": 0.2634, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 8.277710028093289e-05, |
|
"loss": 0.263, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 8.254578198670421e-05, |
|
"loss": 0.2632, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 8.231443585221157e-05, |
|
"loss": 0.2629, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"eval_runtime": 1.0457, |
|
"eval_samples_per_second": 956.256, |
|
"eval_steps_per_second": 15.3, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 8.208306440741926e-05, |
|
"loss": 0.2626, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 8.185167018256834e-05, |
|
"loss": 0.2629, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 8.162025570814896e-05, |
|
"loss": 0.2625, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 8.138882351487275e-05, |
|
"loss": 0.2623, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 8.115737613364511e-05, |
|
"loss": 0.2626, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"eval_runtime": 1.0504, |
|
"eval_samples_per_second": 952.036, |
|
"eval_steps_per_second": 15.233, |
|
"step": 520000 |
|
} |
|
], |
|
"max_steps": 1000000, |
|
"num_train_epochs": 16, |
|
"total_flos": 3.6452089741010102e+22, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|