|
{ |
|
"best_metric": 0.14816446602344513, |
|
"best_model_checkpoint": "./mistral7b/13-02-24-Weni-ZeroShot-3.3.0-Mistral-7b-Multilanguage-3.1.0_zeroshot-2_max_steps-4968_batch_128_2024-02-13_03/checkpoint-4840", |
|
"epoch": 148.92307692307693, |
|
"eval_steps": 20, |
|
"global_step": 4840, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.82258064516129e-06, |
|
"loss": 1.6698, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.6584945917129517, |
|
"eval_runtime": 26.1472, |
|
"eval_samples_per_second": 17.669, |
|
"eval_steps_per_second": 0.574, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.0483870967741936e-05, |
|
"loss": 1.6069, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 1.5037014484405518, |
|
"eval_runtime": 26.055, |
|
"eval_samples_per_second": 17.732, |
|
"eval_steps_per_second": 0.576, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.8548387096774193e-05, |
|
"loss": 1.3817, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 1.246311902999878, |
|
"eval_runtime": 26.0429, |
|
"eval_samples_per_second": 17.74, |
|
"eval_steps_per_second": 0.576, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.661290322580645e-05, |
|
"loss": 1.1419, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 1.0472239255905151, |
|
"eval_runtime": 26.0923, |
|
"eval_samples_per_second": 17.706, |
|
"eval_steps_per_second": 0.575, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.467741935483872e-05, |
|
"loss": 0.9906, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"eval_loss": 0.9301682710647583, |
|
"eval_runtime": 26.1937, |
|
"eval_samples_per_second": 17.638, |
|
"eval_steps_per_second": 0.573, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 4.2741935483870973e-05, |
|
"loss": 0.8634, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_loss": 0.7793559432029724, |
|
"eval_runtime": 26.0879, |
|
"eval_samples_per_second": 17.709, |
|
"eval_steps_per_second": 0.575, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 5.080645161290323e-05, |
|
"loss": 0.7382, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_loss": 0.7010539174079895, |
|
"eval_runtime": 26.032, |
|
"eval_samples_per_second": 17.747, |
|
"eval_steps_per_second": 0.576, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 5.887096774193549e-05, |
|
"loss": 0.6869, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 0.6714752316474915, |
|
"eval_runtime": 44.7747, |
|
"eval_samples_per_second": 10.318, |
|
"eval_steps_per_second": 0.335, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 6.693548387096774e-05, |
|
"loss": 0.6623, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_loss": 0.6569082140922546, |
|
"eval_runtime": 44.9201, |
|
"eval_samples_per_second": 10.285, |
|
"eval_steps_per_second": 0.334, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.6508, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"eval_loss": 0.6456889510154724, |
|
"eval_runtime": 44.7964, |
|
"eval_samples_per_second": 10.313, |
|
"eval_steps_per_second": 0.335, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 8.306451612903227e-05, |
|
"loss": 0.6394, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_loss": 0.6361492276191711, |
|
"eval_runtime": 26.7207, |
|
"eval_samples_per_second": 17.29, |
|
"eval_steps_per_second": 0.561, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 9.112903225806452e-05, |
|
"loss": 0.6289, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"eval_loss": 0.6280443072319031, |
|
"eval_runtime": 26.6969, |
|
"eval_samples_per_second": 17.305, |
|
"eval_steps_per_second": 0.562, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 9.919354838709678e-05, |
|
"loss": 0.6239, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.6212936043739319, |
|
"eval_runtime": 26.7173, |
|
"eval_samples_per_second": 17.292, |
|
"eval_steps_per_second": 0.561, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 0.00010725806451612903, |
|
"loss": 0.6171, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"eval_loss": 0.614993155002594, |
|
"eval_runtime": 26.7242, |
|
"eval_samples_per_second": 17.288, |
|
"eval_steps_per_second": 0.561, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 0.00011532258064516131, |
|
"loss": 0.6096, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"eval_loss": 0.608863353729248, |
|
"eval_runtime": 26.7229, |
|
"eval_samples_per_second": 17.289, |
|
"eval_steps_per_second": 0.561, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 0.00012338709677419356, |
|
"loss": 0.6048, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"eval_loss": 0.6036637425422668, |
|
"eval_runtime": 26.725, |
|
"eval_samples_per_second": 17.287, |
|
"eval_steps_per_second": 0.561, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 0.0001314516129032258, |
|
"loss": 0.5986, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"eval_loss": 0.5977433323860168, |
|
"eval_runtime": 26.7485, |
|
"eval_samples_per_second": 17.272, |
|
"eval_steps_per_second": 0.561, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 0.0001395161290322581, |
|
"loss": 0.5914, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"eval_loss": 0.591983437538147, |
|
"eval_runtime": 44.868, |
|
"eval_samples_per_second": 10.297, |
|
"eval_steps_per_second": 0.334, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 11.69, |
|
"learning_rate": 0.00014758064516129032, |
|
"loss": 0.5871, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 11.69, |
|
"eval_loss": 0.5865333676338196, |
|
"eval_runtime": 44.9887, |
|
"eval_samples_per_second": 10.269, |
|
"eval_steps_per_second": 0.333, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 0.0001556451612903226, |
|
"loss": 0.5808, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"eval_loss": 0.5812229514122009, |
|
"eval_runtime": 26.7165, |
|
"eval_samples_per_second": 17.293, |
|
"eval_steps_per_second": 0.561, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 0.00016370967741935485, |
|
"loss": 0.5746, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"eval_loss": 0.5760770440101624, |
|
"eval_runtime": 26.7316, |
|
"eval_samples_per_second": 17.283, |
|
"eval_steps_per_second": 0.561, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"learning_rate": 0.00017177419354838711, |
|
"loss": 0.5684, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"eval_loss": 0.5711672306060791, |
|
"eval_runtime": 26.708, |
|
"eval_samples_per_second": 17.298, |
|
"eval_steps_per_second": 0.562, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 0.00017983870967741935, |
|
"loss": 0.5641, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_loss": 0.5648314356803894, |
|
"eval_runtime": 26.7361, |
|
"eval_samples_per_second": 17.28, |
|
"eval_steps_per_second": 0.561, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"learning_rate": 0.00018790322580645164, |
|
"loss": 0.5573, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"eval_loss": 0.5593515634536743, |
|
"eval_runtime": 26.7412, |
|
"eval_samples_per_second": 17.277, |
|
"eval_steps_per_second": 0.561, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 0.00019596774193548388, |
|
"loss": 0.5517, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"eval_loss": 0.5539582967758179, |
|
"eval_runtime": 26.732, |
|
"eval_samples_per_second": 17.283, |
|
"eval_steps_per_second": 0.561, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.00019999753245902063, |
|
"loss": 0.5447, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.54853355884552, |
|
"eval_runtime": 26.7332, |
|
"eval_samples_per_second": 17.282, |
|
"eval_steps_per_second": 0.561, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 0.00019997779286183058, |
|
"loss": 0.5372, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"eval_loss": 0.5420479774475098, |
|
"eval_runtime": 26.733, |
|
"eval_samples_per_second": 17.282, |
|
"eval_steps_per_second": 0.561, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 17.23, |
|
"learning_rate": 0.00019993831756406357, |
|
"loss": 0.5314, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 17.23, |
|
"eval_loss": 0.5360643267631531, |
|
"eval_runtime": 26.736, |
|
"eval_samples_per_second": 17.28, |
|
"eval_steps_per_second": 0.561, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"learning_rate": 0.0001998791143581767, |
|
"loss": 0.5248, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"eval_loss": 0.5307183265686035, |
|
"eval_runtime": 26.7116, |
|
"eval_samples_per_second": 17.296, |
|
"eval_steps_per_second": 0.562, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"learning_rate": 0.00019980019493093267, |
|
"loss": 0.5195, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"eval_loss": 0.5241357684135437, |
|
"eval_runtime": 26.7372, |
|
"eval_samples_per_second": 17.279, |
|
"eval_steps_per_second": 0.561, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 19.08, |
|
"learning_rate": 0.00019970157486109296, |
|
"loss": 0.5136, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 19.08, |
|
"eval_loss": 0.5182597637176514, |
|
"eval_runtime": 26.7038, |
|
"eval_samples_per_second": 17.301, |
|
"eval_steps_per_second": 0.562, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 19.69, |
|
"learning_rate": 0.00019958327361634248, |
|
"loss": 0.5036, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 19.69, |
|
"eval_loss": 0.5129547715187073, |
|
"eval_runtime": 26.7294, |
|
"eval_samples_per_second": 17.284, |
|
"eval_steps_per_second": 0.561, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 20.31, |
|
"learning_rate": 0.00019944531454944663, |
|
"loss": 0.4996, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 20.31, |
|
"eval_loss": 0.5069959163665771, |
|
"eval_runtime": 26.7503, |
|
"eval_samples_per_second": 17.271, |
|
"eval_steps_per_second": 0.561, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"learning_rate": 0.0001992877248936415, |
|
"loss": 0.4941, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"eval_loss": 0.5006260871887207, |
|
"eval_runtime": 26.7128, |
|
"eval_samples_per_second": 17.295, |
|
"eval_steps_per_second": 0.562, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 21.54, |
|
"learning_rate": 0.000199110535757258, |
|
"loss": 0.4838, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 21.54, |
|
"eval_loss": 0.4946294128894806, |
|
"eval_runtime": 26.7236, |
|
"eval_samples_per_second": 17.288, |
|
"eval_steps_per_second": 0.561, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 22.15, |
|
"learning_rate": 0.00019891378211758096, |
|
"loss": 0.4795, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 22.15, |
|
"eval_loss": 0.4879631996154785, |
|
"eval_runtime": 26.733, |
|
"eval_samples_per_second": 17.282, |
|
"eval_steps_per_second": 0.561, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 22.77, |
|
"learning_rate": 0.0001986975028139447, |
|
"loss": 0.4722, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 22.77, |
|
"eval_loss": 0.48206356167793274, |
|
"eval_runtime": 26.7335, |
|
"eval_samples_per_second": 17.282, |
|
"eval_steps_per_second": 0.561, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 23.38, |
|
"learning_rate": 0.00019846174054006607, |
|
"loss": 0.464, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 23.38, |
|
"eval_loss": 0.4757327735424042, |
|
"eval_runtime": 26.722, |
|
"eval_samples_per_second": 17.289, |
|
"eval_steps_per_second": 0.561, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 0.00019820654183561658, |
|
"loss": 0.4605, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.47157037258148193, |
|
"eval_runtime": 26.6987, |
|
"eval_samples_per_second": 17.304, |
|
"eval_steps_per_second": 0.562, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 24.62, |
|
"learning_rate": 0.00019793195707703567, |
|
"loss": 0.4524, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 24.62, |
|
"eval_loss": 0.46418875455856323, |
|
"eval_runtime": 26.7165, |
|
"eval_samples_per_second": 17.293, |
|
"eval_steps_per_second": 0.561, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 25.23, |
|
"learning_rate": 0.00019763804046758602, |
|
"loss": 0.4461, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 25.23, |
|
"eval_loss": 0.45834338665008545, |
|
"eval_runtime": 26.7468, |
|
"eval_samples_per_second": 17.273, |
|
"eval_steps_per_second": 0.561, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 25.85, |
|
"learning_rate": 0.00019732485002665415, |
|
"loss": 0.4393, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 25.85, |
|
"eval_loss": 0.4538223147392273, |
|
"eval_runtime": 26.746, |
|
"eval_samples_per_second": 17.274, |
|
"eval_steps_per_second": 0.561, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 26.46, |
|
"learning_rate": 0.00019699244757829702, |
|
"loss": 0.4337, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 26.46, |
|
"eval_loss": 0.4459408223628998, |
|
"eval_runtime": 26.705, |
|
"eval_samples_per_second": 17.3, |
|
"eval_steps_per_second": 0.562, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 27.08, |
|
"learning_rate": 0.0001966408987390381, |
|
"loss": 0.4269, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 27.08, |
|
"eval_loss": 0.43985316157341003, |
|
"eval_runtime": 26.7756, |
|
"eval_samples_per_second": 17.255, |
|
"eval_steps_per_second": 0.56, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 27.69, |
|
"learning_rate": 0.00019627027290491458, |
|
"loss": 0.4191, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 27.69, |
|
"eval_loss": 0.43426012992858887, |
|
"eval_runtime": 26.7408, |
|
"eval_samples_per_second": 17.277, |
|
"eval_steps_per_second": 0.561, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 28.31, |
|
"learning_rate": 0.00019588064323777853, |
|
"loss": 0.4138, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 28.31, |
|
"eval_loss": 0.4298732876777649, |
|
"eval_runtime": 26.7628, |
|
"eval_samples_per_second": 17.263, |
|
"eval_steps_per_second": 0.56, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 28.92, |
|
"learning_rate": 0.00019549296276462325, |
|
"loss": 0.408, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 28.92, |
|
"eval_loss": 0.42369845509529114, |
|
"eval_runtime": 26.7274, |
|
"eval_samples_per_second": 17.286, |
|
"eval_steps_per_second": 0.561, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 29.54, |
|
"learning_rate": 0.00019506650024792317, |
|
"loss": 0.4001, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 29.54, |
|
"eval_loss": 0.4199902415275574, |
|
"eval_runtime": 26.7231, |
|
"eval_samples_per_second": 17.288, |
|
"eval_steps_per_second": 0.561, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 30.15, |
|
"learning_rate": 0.0001946212715239476, |
|
"loss": 0.3978, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 30.15, |
|
"eval_loss": 0.41251733899116516, |
|
"eval_runtime": 26.7147, |
|
"eval_samples_per_second": 17.294, |
|
"eval_steps_per_second": 0.561, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"learning_rate": 0.00019415736448122193, |
|
"loss": 0.3891, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"eval_loss": 0.40809690952301025, |
|
"eval_runtime": 26.7253, |
|
"eval_samples_per_second": 17.287, |
|
"eval_steps_per_second": 0.561, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 31.38, |
|
"learning_rate": 0.0001936748706953874, |
|
"loss": 0.3861, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 31.38, |
|
"eval_loss": 0.40387141704559326, |
|
"eval_runtime": 26.7175, |
|
"eval_samples_per_second": 17.292, |
|
"eval_steps_per_second": 0.561, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 0.00019317388541112396, |
|
"loss": 0.3806, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.3994995355606079, |
|
"eval_runtime": 26.6886, |
|
"eval_samples_per_second": 17.311, |
|
"eval_steps_per_second": 0.562, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 32.62, |
|
"learning_rate": 0.000192654507523349, |
|
"loss": 0.3744, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 32.62, |
|
"eval_loss": 0.3944377303123474, |
|
"eval_runtime": 43.1554, |
|
"eval_samples_per_second": 10.705, |
|
"eval_steps_per_second": 0.348, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 33.23, |
|
"learning_rate": 0.00019211683955769538, |
|
"loss": 0.3704, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 33.23, |
|
"eval_loss": 0.3890739977359772, |
|
"eval_runtime": 44.8442, |
|
"eval_samples_per_second": 10.302, |
|
"eval_steps_per_second": 0.334, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 33.85, |
|
"learning_rate": 0.00019156098765027262, |
|
"loss": 0.3642, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 33.85, |
|
"eval_loss": 0.3840695321559906, |
|
"eval_runtime": 44.6934, |
|
"eval_samples_per_second": 10.337, |
|
"eval_steps_per_second": 0.336, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 34.46, |
|
"learning_rate": 0.00019098706152671576, |
|
"loss": 0.3578, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 34.46, |
|
"eval_loss": 0.37998247146606445, |
|
"eval_runtime": 44.349, |
|
"eval_samples_per_second": 10.417, |
|
"eval_steps_per_second": 0.338, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 35.08, |
|
"learning_rate": 0.00019039517448052535, |
|
"loss": 0.3547, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 35.08, |
|
"eval_loss": 0.3763927221298218, |
|
"eval_runtime": 44.8048, |
|
"eval_samples_per_second": 10.311, |
|
"eval_steps_per_second": 0.335, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 35.69, |
|
"learning_rate": 0.00018978544335070314, |
|
"loss": 0.3494, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 35.69, |
|
"eval_loss": 0.37159162759780884, |
|
"eval_runtime": 44.97, |
|
"eval_samples_per_second": 10.274, |
|
"eval_steps_per_second": 0.334, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 36.31, |
|
"learning_rate": 0.0001891579884986881, |
|
"loss": 0.3449, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 36.31, |
|
"eval_loss": 0.36737060546875, |
|
"eval_runtime": 44.3891, |
|
"eval_samples_per_second": 10.408, |
|
"eval_steps_per_second": 0.338, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 36.92, |
|
"learning_rate": 0.00018851293378459685, |
|
"loss": 0.3409, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 36.92, |
|
"eval_loss": 0.3632607161998749, |
|
"eval_runtime": 44.9198, |
|
"eval_samples_per_second": 10.285, |
|
"eval_steps_per_second": 0.334, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 37.54, |
|
"learning_rate": 0.0001878504065427736, |
|
"loss": 0.3339, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 37.54, |
|
"eval_loss": 0.3598220944404602, |
|
"eval_runtime": 44.8761, |
|
"eval_samples_per_second": 10.295, |
|
"eval_steps_per_second": 0.334, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 38.15, |
|
"learning_rate": 0.00018717053755665437, |
|
"loss": 0.3301, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 38.15, |
|
"eval_loss": 0.35608917474746704, |
|
"eval_runtime": 44.9887, |
|
"eval_samples_per_second": 10.269, |
|
"eval_steps_per_second": 0.333, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 38.77, |
|
"learning_rate": 0.00018647346103295003, |
|
"loss": 0.3267, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 38.77, |
|
"eval_loss": 0.3520090579986572, |
|
"eval_runtime": 40.8773, |
|
"eval_samples_per_second": 11.302, |
|
"eval_steps_per_second": 0.367, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 39.38, |
|
"learning_rate": 0.00018575931457515382, |
|
"loss": 0.3247, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 39.38, |
|
"eval_loss": 0.34774109721183777, |
|
"eval_runtime": 45.7461, |
|
"eval_samples_per_second": 10.099, |
|
"eval_steps_per_second": 0.328, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.00018502823915637846, |
|
"loss": 0.3196, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.34475430846214294, |
|
"eval_runtime": 44.903, |
|
"eval_samples_per_second": 10.289, |
|
"eval_steps_per_second": 0.334, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 40.62, |
|
"learning_rate": 0.00018428037909152785, |
|
"loss": 0.3155, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 40.62, |
|
"eval_loss": 0.3413088619709015, |
|
"eval_runtime": 44.6571, |
|
"eval_samples_per_second": 10.345, |
|
"eval_steps_per_second": 0.336, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 41.23, |
|
"learning_rate": 0.00018351588200880907, |
|
"loss": 0.311, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 41.23, |
|
"eval_loss": 0.3366176187992096, |
|
"eval_runtime": 45.0459, |
|
"eval_samples_per_second": 10.256, |
|
"eval_steps_per_second": 0.333, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 41.85, |
|
"learning_rate": 0.00018273489882059062, |
|
"loss": 0.3059, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 41.85, |
|
"eval_loss": 0.3341914713382721, |
|
"eval_runtime": 45.038, |
|
"eval_samples_per_second": 10.258, |
|
"eval_steps_per_second": 0.333, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 42.46, |
|
"learning_rate": 0.0001819375836936121, |
|
"loss": 0.3047, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 42.46, |
|
"eval_loss": 0.33061912655830383, |
|
"eval_runtime": 26.7201, |
|
"eval_samples_per_second": 17.29, |
|
"eval_steps_per_second": 0.561, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 43.08, |
|
"learning_rate": 0.00018112409401855158, |
|
"loss": 0.3006, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 43.08, |
|
"eval_loss": 0.32672399282455444, |
|
"eval_runtime": 45.0029, |
|
"eval_samples_per_second": 10.266, |
|
"eval_steps_per_second": 0.333, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 43.69, |
|
"learning_rate": 0.00018029459037895658, |
|
"loss": 0.2967, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 43.69, |
|
"eval_loss": 0.32409900426864624, |
|
"eval_runtime": 45.8902, |
|
"eval_samples_per_second": 10.067, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 44.31, |
|
"learning_rate": 0.00017944923651954474, |
|
"loss": 0.2924, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 44.31, |
|
"eval_loss": 0.3199877142906189, |
|
"eval_runtime": 44.6863, |
|
"eval_samples_per_second": 10.339, |
|
"eval_steps_per_second": 0.336, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 44.92, |
|
"learning_rate": 0.00017858819931388032, |
|
"loss": 0.2876, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 44.92, |
|
"eval_loss": 0.3171309530735016, |
|
"eval_runtime": 45.7535, |
|
"eval_samples_per_second": 10.098, |
|
"eval_steps_per_second": 0.328, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 45.54, |
|
"learning_rate": 0.0001777116487314335, |
|
"loss": 0.2848, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 45.54, |
|
"eval_loss": 0.3142802119255066, |
|
"eval_runtime": 45.7609, |
|
"eval_samples_per_second": 10.096, |
|
"eval_steps_per_second": 0.328, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 0.00017681975780402807, |
|
"loss": 0.2836, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"eval_loss": 0.31119367480278015, |
|
"eval_runtime": 45.7107, |
|
"eval_samples_per_second": 10.107, |
|
"eval_steps_per_second": 0.328, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 46.77, |
|
"learning_rate": 0.00017591270259168477, |
|
"loss": 0.2786, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 46.77, |
|
"eval_loss": 0.3083397448062897, |
|
"eval_runtime": 45.8269, |
|
"eval_samples_per_second": 10.081, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 47.38, |
|
"learning_rate": 0.00017499066214786708, |
|
"loss": 0.2766, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 47.38, |
|
"eval_loss": 0.307574063539505, |
|
"eval_runtime": 45.6083, |
|
"eval_samples_per_second": 10.13, |
|
"eval_steps_per_second": 0.329, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 0.00017405381848413571, |
|
"loss": 0.273, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 0.3024856150150299, |
|
"eval_runtime": 45.6794, |
|
"eval_samples_per_second": 10.114, |
|
"eval_steps_per_second": 0.328, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 48.62, |
|
"learning_rate": 0.0001731023565342195, |
|
"loss": 0.2691, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 48.62, |
|
"eval_loss": 0.3002566397190094, |
|
"eval_runtime": 45.8955, |
|
"eval_samples_per_second": 10.066, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 49.23, |
|
"learning_rate": 0.00017213646411750935, |
|
"loss": 0.2657, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 49.23, |
|
"eval_loss": 0.29747503995895386, |
|
"eval_runtime": 45.7917, |
|
"eval_samples_per_second": 10.089, |
|
"eval_steps_per_second": 0.328, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 49.85, |
|
"learning_rate": 0.00017115633190198238, |
|
"loss": 0.2615, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 49.85, |
|
"eval_loss": 0.2955474853515625, |
|
"eval_runtime": 45.8407, |
|
"eval_samples_per_second": 10.078, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 50.46, |
|
"learning_rate": 0.000170162153366564, |
|
"loss": 0.2614, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 50.46, |
|
"eval_loss": 0.2920401394367218, |
|
"eval_runtime": 45.835, |
|
"eval_samples_per_second": 10.08, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 51.08, |
|
"learning_rate": 0.00016915412476293512, |
|
"loss": 0.2587, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 51.08, |
|
"eval_loss": 0.28886348009109497, |
|
"eval_runtime": 45.8928, |
|
"eval_samples_per_second": 10.067, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 51.69, |
|
"learning_rate": 0.00016813244507679165, |
|
"loss": 0.2543, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 51.69, |
|
"eval_loss": 0.28654780983924866, |
|
"eval_runtime": 45.8541, |
|
"eval_samples_per_second": 10.075, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 52.31, |
|
"learning_rate": 0.0001670973159885648, |
|
"loss": 0.2507, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 52.31, |
|
"eval_loss": 0.28392159938812256, |
|
"eval_runtime": 46.0394, |
|
"eval_samples_per_second": 10.035, |
|
"eval_steps_per_second": 0.326, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 52.92, |
|
"learning_rate": 0.000166048941833609, |
|
"loss": 0.2512, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 52.92, |
|
"eval_loss": 0.28163596987724304, |
|
"eval_runtime": 45.9037, |
|
"eval_samples_per_second": 10.065, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 53.54, |
|
"learning_rate": 0.00016498752956186605, |
|
"loss": 0.2446, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 53.54, |
|
"eval_loss": 0.27993378043174744, |
|
"eval_runtime": 45.8612, |
|
"eval_samples_per_second": 10.074, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 54.15, |
|
"learning_rate": 0.00016391328869701306, |
|
"loss": 0.2428, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 54.15, |
|
"eval_loss": 0.27705731987953186, |
|
"eval_runtime": 45.8648, |
|
"eval_samples_per_second": 10.073, |
|
"eval_steps_per_second": 0.327, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 54.77, |
|
"learning_rate": 0.00016282643129510212, |
|
"loss": 0.2421, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 54.77, |
|
"eval_loss": 0.27512410283088684, |
|
"eval_runtime": 45.742, |
|
"eval_samples_per_second": 10.1, |
|
"eval_steps_per_second": 0.328, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 55.38, |
|
"learning_rate": 0.00016172717190270045, |
|
"loss": 0.24, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 55.38, |
|
"eval_loss": 0.2725893259048462, |
|
"eval_runtime": 26.75, |
|
"eval_samples_per_second": 17.271, |
|
"eval_steps_per_second": 0.561, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 0.00016061572751453862, |
|
"loss": 0.2379, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 0.2703319787979126, |
|
"eval_runtime": 26.7376, |
|
"eval_samples_per_second": 17.279, |
|
"eval_steps_per_second": 0.561, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 56.62, |
|
"learning_rate": 0.0001594923175306756, |
|
"loss": 0.2348, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 56.62, |
|
"eval_loss": 0.2680712342262268, |
|
"eval_runtime": 26.7571, |
|
"eval_samples_per_second": 17.266, |
|
"eval_steps_per_second": 0.561, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 57.23, |
|
"learning_rate": 0.00015835716371318908, |
|
"loss": 0.2318, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 57.23, |
|
"eval_loss": 0.2659159302711487, |
|
"eval_runtime": 26.7348, |
|
"eval_samples_per_second": 17.281, |
|
"eval_steps_per_second": 0.561, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 57.85, |
|
"learning_rate": 0.00015721049014239943, |
|
"loss": 0.2288, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 57.85, |
|
"eval_loss": 0.263480007648468, |
|
"eval_runtime": 26.7538, |
|
"eval_samples_per_second": 17.269, |
|
"eval_steps_per_second": 0.561, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 58.46, |
|
"learning_rate": 0.0001560525231726359, |
|
"loss": 0.2288, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 58.46, |
|
"eval_loss": 0.2611861228942871, |
|
"eval_runtime": 26.7314, |
|
"eval_samples_per_second": 17.283, |
|
"eval_steps_per_second": 0.561, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 59.08, |
|
"learning_rate": 0.00015488349138755448, |
|
"loss": 0.2239, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 59.08, |
|
"eval_loss": 0.25924989581108093, |
|
"eval_runtime": 26.7587, |
|
"eval_samples_per_second": 17.265, |
|
"eval_steps_per_second": 0.561, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 59.69, |
|
"learning_rate": 0.0001537036255550147, |
|
"loss": 0.2233, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 59.69, |
|
"eval_loss": 0.2566944360733032, |
|
"eval_runtime": 26.7636, |
|
"eval_samples_per_second": 17.262, |
|
"eval_steps_per_second": 0.56, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 60.31, |
|
"learning_rate": 0.0001525131585815264, |
|
"loss": 0.2199, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 60.31, |
|
"eval_loss": 0.25529178977012634, |
|
"eval_runtime": 26.7593, |
|
"eval_samples_per_second": 17.265, |
|
"eval_steps_per_second": 0.561, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 60.92, |
|
"learning_rate": 0.00015131232546627355, |
|
"loss": 0.219, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 60.92, |
|
"eval_loss": 0.2528415322303772, |
|
"eval_runtime": 26.7591, |
|
"eval_samples_per_second": 17.265, |
|
"eval_steps_per_second": 0.561, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 61.54, |
|
"learning_rate": 0.0001501013632547252, |
|
"loss": 0.217, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 61.54, |
|
"eval_loss": 0.25113263726234436, |
|
"eval_runtime": 26.7433, |
|
"eval_samples_per_second": 17.275, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 62.15, |
|
"learning_rate": 0.00014888051099184256, |
|
"loss": 0.2154, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 62.15, |
|
"eval_loss": 0.24899105727672577, |
|
"eval_runtime": 26.7472, |
|
"eval_samples_per_second": 17.273, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 62.77, |
|
"learning_rate": 0.0001476500096748913, |
|
"loss": 0.2126, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 62.77, |
|
"eval_loss": 0.24699197709560394, |
|
"eval_runtime": 26.7455, |
|
"eval_samples_per_second": 17.274, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 63.38, |
|
"learning_rate": 0.00014641010220586858, |
|
"loss": 0.2085, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 63.38, |
|
"eval_loss": 0.24530422687530518, |
|
"eval_runtime": 26.7332, |
|
"eval_samples_per_second": 17.282, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 0.0001451610333435538, |
|
"loss": 0.2088, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_loss": 0.24252080917358398, |
|
"eval_runtime": 26.7344, |
|
"eval_samples_per_second": 17.281, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 64.62, |
|
"learning_rate": 0.00014390304965519312, |
|
"loss": 0.207, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 64.62, |
|
"eval_loss": 0.24115830659866333, |
|
"eval_runtime": 26.7241, |
|
"eval_samples_per_second": 17.288, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 65.23, |
|
"learning_rate": 0.00014263639946782695, |
|
"loss": 0.2066, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 65.23, |
|
"eval_loss": 0.23876874148845673, |
|
"eval_runtime": 26.7538, |
|
"eval_samples_per_second": 17.269, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 65.85, |
|
"learning_rate": 0.00014136133281926987, |
|
"loss": 0.2021, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 65.85, |
|
"eval_loss": 0.2371101826429367, |
|
"eval_runtime": 26.7047, |
|
"eval_samples_per_second": 17.3, |
|
"eval_steps_per_second": 0.562, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 66.46, |
|
"learning_rate": 0.00014007810140875295, |
|
"loss": 0.2016, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 66.46, |
|
"eval_loss": 0.2353435754776001, |
|
"eval_runtime": 26.7324, |
|
"eval_samples_per_second": 17.282, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 67.08, |
|
"learning_rate": 0.00013878695854723826, |
|
"loss": 0.1986, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 67.08, |
|
"eval_loss": 0.23351863026618958, |
|
"eval_runtime": 45.6122, |
|
"eval_samples_per_second": 10.129, |
|
"eval_steps_per_second": 0.329, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 67.69, |
|
"learning_rate": 0.0001374881591074148, |
|
"loss": 0.1965, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 67.69, |
|
"eval_loss": 0.23166298866271973, |
|
"eval_runtime": 45.6494, |
|
"eval_samples_per_second": 10.121, |
|
"eval_steps_per_second": 0.329, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 68.31, |
|
"learning_rate": 0.0001361819594733868, |
|
"loss": 0.1969, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 68.31, |
|
"eval_loss": 0.23032891750335693, |
|
"eval_runtime": 26.7108, |
|
"eval_samples_per_second": 17.296, |
|
"eval_steps_per_second": 0.562, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 68.92, |
|
"learning_rate": 0.00013486861749006286, |
|
"loss": 0.1957, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 68.92, |
|
"eval_loss": 0.22889761626720428, |
|
"eval_runtime": 26.7424, |
|
"eval_samples_per_second": 17.276, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 69.54, |
|
"learning_rate": 0.0001335483924122575, |
|
"loss": 0.1918, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 69.54, |
|
"eval_loss": 0.2264855057001114, |
|
"eval_runtime": 26.7477, |
|
"eval_samples_per_second": 17.273, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 70.15, |
|
"learning_rate": 0.00013222154485351375, |
|
"loss": 0.1913, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 70.15, |
|
"eval_loss": 0.22507672011852264, |
|
"eval_runtime": 45.5727, |
|
"eval_samples_per_second": 10.138, |
|
"eval_steps_per_second": 0.329, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 70.77, |
|
"learning_rate": 0.0001308883367346581, |
|
"loss": 0.1892, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 70.77, |
|
"eval_loss": 0.22298868000507355, |
|
"eval_runtime": 26.731, |
|
"eval_samples_per_second": 17.283, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 71.38, |
|
"learning_rate": 0.00012954903123209687, |
|
"loss": 0.1885, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 71.38, |
|
"eval_loss": 0.2219810038805008, |
|
"eval_runtime": 26.7121, |
|
"eval_samples_per_second": 17.296, |
|
"eval_steps_per_second": 0.562, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 0.0001282038927258651, |
|
"loss": 0.1876, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_loss": 0.2204855978488922, |
|
"eval_runtime": 45.041, |
|
"eval_samples_per_second": 10.257, |
|
"eval_steps_per_second": 0.333, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 72.62, |
|
"learning_rate": 0.0001268531867474377, |
|
"loss": 0.1855, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 72.62, |
|
"eval_loss": 0.21908599138259888, |
|
"eval_runtime": 45.1286, |
|
"eval_samples_per_second": 10.237, |
|
"eval_steps_per_second": 0.332, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 73.23, |
|
"learning_rate": 0.00012549717992731317, |
|
"loss": 0.1841, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 73.23, |
|
"eval_loss": 0.21735349297523499, |
|
"eval_runtime": 26.5835, |
|
"eval_samples_per_second": 17.379, |
|
"eval_steps_per_second": 0.564, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 73.85, |
|
"learning_rate": 0.0001241361399423808, |
|
"loss": 0.1819, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 73.85, |
|
"eval_loss": 0.21623647212982178, |
|
"eval_runtime": 26.7905, |
|
"eval_samples_per_second": 17.245, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 74.46, |
|
"learning_rate": 0.0001227703354630807, |
|
"loss": 0.1812, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 74.46, |
|
"eval_loss": 0.21434533596038818, |
|
"eval_runtime": 26.7405, |
|
"eval_samples_per_second": 17.277, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 75.08, |
|
"learning_rate": 0.0001214000361003683, |
|
"loss": 0.1801, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 75.08, |
|
"eval_loss": 0.21285748481750488, |
|
"eval_runtime": 26.7466, |
|
"eval_samples_per_second": 17.273, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 75.69, |
|
"learning_rate": 0.00012002551235249268, |
|
"loss": 0.1773, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 75.69, |
|
"eval_loss": 0.21103879809379578, |
|
"eval_runtime": 26.7635, |
|
"eval_samples_per_second": 17.262, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 76.31, |
|
"learning_rate": 0.00011864703555160028, |
|
"loss": 0.1771, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 76.31, |
|
"eval_loss": 0.21016329526901245, |
|
"eval_runtime": 26.7904, |
|
"eval_samples_per_second": 17.245, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"learning_rate": 0.00011726487781017337, |
|
"loss": 0.1752, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_loss": 0.2086782455444336, |
|
"eval_runtime": 44.9559, |
|
"eval_samples_per_second": 10.277, |
|
"eval_steps_per_second": 0.334, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 77.54, |
|
"learning_rate": 0.00011587931196731505, |
|
"loss": 0.1742, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 77.54, |
|
"eval_loss": 0.2075587958097458, |
|
"eval_runtime": 44.9658, |
|
"eval_samples_per_second": 10.274, |
|
"eval_steps_per_second": 0.334, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 78.15, |
|
"learning_rate": 0.00011449061153489055, |
|
"loss": 0.1735, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 78.15, |
|
"eval_loss": 0.20602142810821533, |
|
"eval_runtime": 26.6678, |
|
"eval_samples_per_second": 17.324, |
|
"eval_steps_per_second": 0.562, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 78.77, |
|
"learning_rate": 0.00011309905064353575, |
|
"loss": 0.1715, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 78.77, |
|
"eval_loss": 0.20492884516716003, |
|
"eval_runtime": 26.7865, |
|
"eval_samples_per_second": 17.248, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 79.38, |
|
"learning_rate": 0.00011170490398854336, |
|
"loss": 0.1707, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 79.38, |
|
"eval_loss": 0.2027878314256668, |
|
"eval_runtime": 42.4616, |
|
"eval_samples_per_second": 10.88, |
|
"eval_steps_per_second": 0.353, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0001103084467756382, |
|
"loss": 0.1701, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 0.201686292886734, |
|
"eval_runtime": 45.0476, |
|
"eval_samples_per_second": 10.256, |
|
"eval_steps_per_second": 0.333, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 80.62, |
|
"learning_rate": 0.00010890995466665108, |
|
"loss": 0.1675, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 80.62, |
|
"eval_loss": 0.2003440409898758, |
|
"eval_runtime": 45.0995, |
|
"eval_samples_per_second": 10.244, |
|
"eval_steps_per_second": 0.333, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 81.23, |
|
"learning_rate": 0.00010750970372510307, |
|
"loss": 0.1663, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 81.23, |
|
"eval_loss": 0.19951596856117249, |
|
"eval_runtime": 45.0074, |
|
"eval_samples_per_second": 10.265, |
|
"eval_steps_per_second": 0.333, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 81.85, |
|
"learning_rate": 0.00010610797036171014, |
|
"loss": 0.1653, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 81.85, |
|
"eval_loss": 0.19835925102233887, |
|
"eval_runtime": 45.0031, |
|
"eval_samples_per_second": 10.266, |
|
"eval_steps_per_second": 0.333, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 82.46, |
|
"learning_rate": 0.00010470503127981977, |
|
"loss": 0.165, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 82.46, |
|
"eval_loss": 0.19678974151611328, |
|
"eval_runtime": 45.1089, |
|
"eval_samples_per_second": 10.242, |
|
"eval_steps_per_second": 0.333, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 83.08, |
|
"learning_rate": 0.0001033011634207891, |
|
"loss": 0.1644, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 83.08, |
|
"eval_loss": 0.19566014409065247, |
|
"eval_runtime": 26.7641, |
|
"eval_samples_per_second": 17.262, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 83.69, |
|
"learning_rate": 0.00010189664390931682, |
|
"loss": 0.1631, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 83.69, |
|
"eval_loss": 0.19463180005550385, |
|
"eval_runtime": 26.7406, |
|
"eval_samples_per_second": 17.277, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 84.31, |
|
"learning_rate": 0.00010049174999873823, |
|
"loss": 0.162, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 84.31, |
|
"eval_loss": 0.1935625970363617, |
|
"eval_runtime": 26.7779, |
|
"eval_samples_per_second": 17.253, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 84.92, |
|
"learning_rate": 9.908675901629543e-05, |
|
"loss": 0.1604, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 84.92, |
|
"eval_loss": 0.19222331047058105, |
|
"eval_runtime": 26.7706, |
|
"eval_samples_per_second": 17.258, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 85.54, |
|
"learning_rate": 9.768194830839252e-05, |
|
"loss": 0.1598, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 85.54, |
|
"eval_loss": 0.19124871492385864, |
|
"eval_runtime": 26.778, |
|
"eval_samples_per_second": 17.253, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 86.15, |
|
"learning_rate": 9.627759518584733e-05, |
|
"loss": 0.1583, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 86.15, |
|
"eval_loss": 0.19053253531455994, |
|
"eval_runtime": 26.7797, |
|
"eval_samples_per_second": 17.252, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 86.77, |
|
"learning_rate": 9.487397686914985e-05, |
|
"loss": 0.1581, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 86.77, |
|
"eval_loss": 0.18871891498565674, |
|
"eval_runtime": 26.767, |
|
"eval_samples_per_second": 17.26, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 87.38, |
|
"learning_rate": 9.347137043373885e-05, |
|
"loss": 0.1569, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 87.38, |
|
"eval_loss": 0.18785762786865234, |
|
"eval_runtime": 26.7805, |
|
"eval_samples_per_second": 17.251, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 9.20700527553069e-05, |
|
"loss": 0.1553, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_loss": 0.18669484555721283, |
|
"eval_runtime": 26.7742, |
|
"eval_samples_per_second": 17.255, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 88.62, |
|
"learning_rate": 9.067030045514476e-05, |
|
"loss": 0.154, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 88.62, |
|
"eval_loss": 0.1860661506652832, |
|
"eval_runtime": 26.7794, |
|
"eval_samples_per_second": 17.252, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 89.23, |
|
"learning_rate": 8.927238984553626e-05, |
|
"loss": 0.1549, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 89.23, |
|
"eval_loss": 0.18506208062171936, |
|
"eval_runtime": 26.7725, |
|
"eval_samples_per_second": 17.257, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 89.85, |
|
"learning_rate": 8.787659687521403e-05, |
|
"loss": 0.1528, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 89.85, |
|
"eval_loss": 0.18385158479213715, |
|
"eval_runtime": 26.763, |
|
"eval_samples_per_second": 17.263, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 90.46, |
|
"learning_rate": 8.648319707488682e-05, |
|
"loss": 0.1523, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 90.46, |
|
"eval_loss": 0.18269173800945282, |
|
"eval_runtime": 26.7762, |
|
"eval_samples_per_second": 17.254, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 91.08, |
|
"learning_rate": 8.509246550284961e-05, |
|
"loss": 0.1513, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 91.08, |
|
"eval_loss": 0.18222320079803467, |
|
"eval_runtime": 26.788, |
|
"eval_samples_per_second": 17.247, |
|
"eval_steps_per_second": 0.56, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 91.69, |
|
"learning_rate": 8.37046766906869e-05, |
|
"loss": 0.1503, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 91.69, |
|
"eval_loss": 0.18125151097774506, |
|
"eval_runtime": 26.7603, |
|
"eval_samples_per_second": 17.264, |
|
"eval_steps_per_second": 0.561, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 92.31, |
|
"learning_rate": 8.232010458907992e-05, |
|
"loss": 0.1502, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 92.31, |
|
"eval_loss": 0.1806618869304657, |
|
"eval_runtime": 26.7921, |
|
"eval_samples_per_second": 17.244, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 92.92, |
|
"learning_rate": 8.093902251372853e-05, |
|
"loss": 0.1481, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 92.92, |
|
"eval_loss": 0.17959125339984894, |
|
"eval_runtime": 26.7772, |
|
"eval_samples_per_second": 17.254, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 93.54, |
|
"learning_rate": 7.956170309139842e-05, |
|
"loss": 0.1475, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 93.54, |
|
"eval_loss": 0.1786828190088272, |
|
"eval_runtime": 26.773, |
|
"eval_samples_per_second": 17.256, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 94.15, |
|
"learning_rate": 7.825698244184431e-05, |
|
"loss": 0.1469, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 94.15, |
|
"eval_loss": 0.17820928990840912, |
|
"eval_runtime": 26.7735, |
|
"eval_samples_per_second": 17.256, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 94.77, |
|
"learning_rate": 7.68877814745228e-05, |
|
"loss": 0.1472, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 94.77, |
|
"eval_loss": 0.17709802091121674, |
|
"eval_runtime": 26.7597, |
|
"eval_samples_per_second": 17.265, |
|
"eval_steps_per_second": 0.561, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 95.38, |
|
"learning_rate": 7.552314287861831e-05, |
|
"loss": 0.1461, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 95.38, |
|
"eval_loss": 0.1761600822210312, |
|
"eval_runtime": 26.7646, |
|
"eval_samples_per_second": 17.262, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 7.416333603493977e-05, |
|
"loss": 0.145, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_loss": 0.17534850537776947, |
|
"eval_runtime": 26.7471, |
|
"eval_samples_per_second": 17.273, |
|
"eval_steps_per_second": 0.561, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 96.62, |
|
"learning_rate": 7.280862937050435e-05, |
|
"loss": 0.143, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 96.62, |
|
"eval_loss": 0.1751878708600998, |
|
"eval_runtime": 26.7568, |
|
"eval_samples_per_second": 17.267, |
|
"eval_steps_per_second": 0.561, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 97.23, |
|
"learning_rate": 7.152662566194701e-05, |
|
"loss": 0.1436, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 97.23, |
|
"eval_loss": 0.17516781389713287, |
|
"eval_runtime": 26.8034, |
|
"eval_samples_per_second": 17.237, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 97.85, |
|
"learning_rate": 7.018263255002402e-05, |
|
"loss": 0.1426, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 97.85, |
|
"eval_loss": 0.17369630932807922, |
|
"eval_runtime": 26.781, |
|
"eval_samples_per_second": 17.251, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 98.46, |
|
"learning_rate": 6.884452541156719e-05, |
|
"loss": 0.1427, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 98.46, |
|
"eval_loss": 0.17294321954250336, |
|
"eval_runtime": 26.7722, |
|
"eval_samples_per_second": 17.257, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 99.08, |
|
"learning_rate": 6.751256839005342e-05, |
|
"loss": 0.142, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 99.08, |
|
"eval_loss": 0.17208707332611084, |
|
"eval_runtime": 26.7495, |
|
"eval_samples_per_second": 17.271, |
|
"eval_steps_per_second": 0.561, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 99.69, |
|
"learning_rate": 6.625314525914243e-05, |
|
"loss": 0.1411, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 99.69, |
|
"eval_loss": 0.17149858176708221, |
|
"eval_runtime": 26.756, |
|
"eval_samples_per_second": 17.267, |
|
"eval_steps_per_second": 0.561, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 100.31, |
|
"learning_rate": 6.493393606401967e-05, |
|
"loss": 0.1406, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 100.31, |
|
"eval_loss": 0.1708817183971405, |
|
"eval_runtime": 26.7681, |
|
"eval_samples_per_second": 17.259, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 100.92, |
|
"learning_rate": 6.36216489394732e-05, |
|
"loss": 0.1403, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 100.92, |
|
"eval_loss": 0.16994836926460266, |
|
"eval_runtime": 26.7644, |
|
"eval_samples_per_second": 17.262, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 101.54, |
|
"learning_rate": 6.231654293208744e-05, |
|
"loss": 0.1401, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 101.54, |
|
"eval_loss": 0.16944177448749542, |
|
"eval_runtime": 26.7612, |
|
"eval_samples_per_second": 17.264, |
|
"eval_steps_per_second": 0.561, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 102.15, |
|
"learning_rate": 6.101887567088831e-05, |
|
"loss": 0.1377, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 102.15, |
|
"eval_loss": 0.16865964233875275, |
|
"eval_runtime": 26.7843, |
|
"eval_samples_per_second": 17.249, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 102.77, |
|
"learning_rate": 5.972890331648686e-05, |
|
"loss": 0.1383, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 102.77, |
|
"eval_loss": 0.16790008544921875, |
|
"eval_runtime": 26.7761, |
|
"eval_samples_per_second": 17.254, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 103.38, |
|
"learning_rate": 5.8446880510513144e-05, |
|
"loss": 0.1378, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 103.38, |
|
"eval_loss": 0.1674834042787552, |
|
"eval_runtime": 26.7598, |
|
"eval_samples_per_second": 17.265, |
|
"eval_steps_per_second": 0.561, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"learning_rate": 5.717306032534962e-05, |
|
"loss": 0.1372, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_loss": 0.16623561084270477, |
|
"eval_runtime": 26.77, |
|
"eval_samples_per_second": 17.258, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 104.62, |
|
"learning_rate": 5.5907694214174344e-05, |
|
"loss": 0.1362, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 104.62, |
|
"eval_loss": 0.16605305671691895, |
|
"eval_runtime": 26.7686, |
|
"eval_samples_per_second": 17.259, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 105.23, |
|
"learning_rate": 5.4651031961324364e-05, |
|
"loss": 0.1343, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 105.23, |
|
"eval_loss": 0.16553008556365967, |
|
"eval_runtime": 26.7736, |
|
"eval_samples_per_second": 17.256, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 105.85, |
|
"learning_rate": 5.3403321632987425e-05, |
|
"loss": 0.1357, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 105.85, |
|
"eval_loss": 0.16530947387218475, |
|
"eval_runtime": 26.7838, |
|
"eval_samples_per_second": 17.249, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 106.46, |
|
"learning_rate": 5.2164809528234015e-05, |
|
"loss": 0.1344, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 106.46, |
|
"eval_loss": 0.1647026091814041, |
|
"eval_runtime": 26.7756, |
|
"eval_samples_per_second": 17.254, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 107.08, |
|
"learning_rate": 5.0935740130397494e-05, |
|
"loss": 0.1339, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 107.08, |
|
"eval_loss": 0.16388827562332153, |
|
"eval_runtime": 26.7918, |
|
"eval_samples_per_second": 17.244, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 107.69, |
|
"learning_rate": 4.971635605881291e-05, |
|
"loss": 0.1336, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 107.69, |
|
"eval_loss": 0.16345228254795074, |
|
"eval_runtime": 26.7727, |
|
"eval_samples_per_second": 17.256, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 108.31, |
|
"learning_rate": 4.850689802092378e-05, |
|
"loss": 0.1333, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 108.31, |
|
"eval_loss": 0.16290676593780518, |
|
"eval_runtime": 26.7927, |
|
"eval_samples_per_second": 17.243, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 108.92, |
|
"learning_rate": 4.730760476476611e-05, |
|
"loss": 0.1332, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 108.92, |
|
"eval_loss": 0.1624392867088318, |
|
"eval_runtime": 26.7947, |
|
"eval_samples_per_second": 17.242, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 109.54, |
|
"learning_rate": 4.611871303183952e-05, |
|
"loss": 0.1322, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 109.54, |
|
"eval_loss": 0.1618904024362564, |
|
"eval_runtime": 26.7757, |
|
"eval_samples_per_second": 17.254, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 110.15, |
|
"learning_rate": 4.4940457510374136e-05, |
|
"loss": 0.1327, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 110.15, |
|
"eval_loss": 0.16122287511825562, |
|
"eval_runtime": 26.8152, |
|
"eval_samples_per_second": 17.229, |
|
"eval_steps_per_second": 0.559, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 110.77, |
|
"learning_rate": 4.3773070789003026e-05, |
|
"loss": 0.1311, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 110.77, |
|
"eval_loss": 0.16065527498722076, |
|
"eval_runtime": 26.7937, |
|
"eval_samples_per_second": 17.243, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 111.38, |
|
"learning_rate": 4.261678331084884e-05, |
|
"loss": 0.132, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 111.38, |
|
"eval_loss": 0.16051311790943146, |
|
"eval_runtime": 26.7716, |
|
"eval_samples_per_second": 17.257, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"learning_rate": 4.147182332803439e-05, |
|
"loss": 0.131, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_loss": 0.16002397239208221, |
|
"eval_runtime": 26.809, |
|
"eval_samples_per_second": 17.233, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 112.62, |
|
"learning_rate": 4.0338416856625294e-05, |
|
"loss": 0.1298, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 112.62, |
|
"eval_loss": 0.15938027203083038, |
|
"eval_runtime": 26.7668, |
|
"eval_samples_per_second": 17.26, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 113.23, |
|
"learning_rate": 3.921678763201434e-05, |
|
"loss": 0.13, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 113.23, |
|
"eval_loss": 0.1591978669166565, |
|
"eval_runtime": 26.7683, |
|
"eval_samples_per_second": 17.259, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 113.85, |
|
"learning_rate": 3.810715706475575e-05, |
|
"loss": 0.1302, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 113.85, |
|
"eval_loss": 0.15868616104125977, |
|
"eval_runtime": 26.7596, |
|
"eval_samples_per_second": 17.265, |
|
"eval_steps_per_second": 0.561, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 114.46, |
|
"learning_rate": 3.70097441968588e-05, |
|
"loss": 0.1292, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 114.46, |
|
"eval_loss": 0.1582469791173935, |
|
"eval_runtime": 26.8259, |
|
"eval_samples_per_second": 17.222, |
|
"eval_steps_per_second": 0.559, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 115.08, |
|
"learning_rate": 3.592476565854854e-05, |
|
"loss": 0.1284, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 115.08, |
|
"eval_loss": 0.15772594511508942, |
|
"eval_runtime": 26.7663, |
|
"eval_samples_per_second": 17.261, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 115.69, |
|
"learning_rate": 3.485243562550297e-05, |
|
"loss": 0.1278, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 115.69, |
|
"eval_loss": 0.1572510004043579, |
|
"eval_runtime": 26.8195, |
|
"eval_samples_per_second": 17.226, |
|
"eval_steps_per_second": 0.559, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 116.31, |
|
"learning_rate": 3.379296577657434e-05, |
|
"loss": 0.1281, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 116.31, |
|
"eval_loss": 0.156888946890831, |
|
"eval_runtime": 26.7809, |
|
"eval_samples_per_second": 17.251, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 116.92, |
|
"learning_rate": 3.2746565252003815e-05, |
|
"loss": 0.1277, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 116.92, |
|
"eval_loss": 0.15669023990631104, |
|
"eval_runtime": 26.7251, |
|
"eval_samples_per_second": 17.287, |
|
"eval_steps_per_second": 0.561, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 117.54, |
|
"learning_rate": 3.1713440612136924e-05, |
|
"loss": 0.1266, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 117.54, |
|
"eval_loss": 0.1565510779619217, |
|
"eval_runtime": 26.7847, |
|
"eval_samples_per_second": 17.249, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 118.15, |
|
"learning_rate": 3.069379579664835e-05, |
|
"loss": 0.1279, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 118.15, |
|
"eval_loss": 0.15575794875621796, |
|
"eval_runtime": 26.785, |
|
"eval_samples_per_second": 17.248, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 118.77, |
|
"learning_rate": 2.9737802267115754e-05, |
|
"loss": 0.1261, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 118.77, |
|
"eval_loss": 0.15544316172599792, |
|
"eval_runtime": 26.805, |
|
"eval_samples_per_second": 17.236, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 119.38, |
|
"learning_rate": 2.8745019577809483e-05, |
|
"loss": 0.1271, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 119.38, |
|
"eval_loss": 0.15522195398807526, |
|
"eval_runtime": 26.7839, |
|
"eval_samples_per_second": 17.249, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"learning_rate": 2.7766302681695688e-05, |
|
"loss": 0.1263, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_loss": 0.1547752469778061, |
|
"eval_runtime": 26.8002, |
|
"eval_samples_per_second": 17.239, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 120.62, |
|
"learning_rate": 2.6801844778314467e-05, |
|
"loss": 0.1254, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 120.62, |
|
"eval_loss": 0.15464647114276886, |
|
"eval_runtime": 26.8029, |
|
"eval_samples_per_second": 17.237, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 121.23, |
|
"learning_rate": 2.5851836252468897e-05, |
|
"loss": 0.1255, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 121.23, |
|
"eval_loss": 0.15429826080799103, |
|
"eval_runtime": 26.8105, |
|
"eval_samples_per_second": 17.232, |
|
"eval_steps_per_second": 0.559, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 121.85, |
|
"learning_rate": 2.491646463664261e-05, |
|
"loss": 0.1261, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 121.85, |
|
"eval_loss": 0.1540435552597046, |
|
"eval_runtime": 26.8082, |
|
"eval_samples_per_second": 17.234, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 122.46, |
|
"learning_rate": 2.399591457398106e-05, |
|
"loss": 0.1257, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 122.46, |
|
"eval_loss": 0.15359282493591309, |
|
"eval_runtime": 26.7751, |
|
"eval_samples_per_second": 17.255, |
|
"eval_steps_per_second": 0.56, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 123.08, |
|
"learning_rate": 2.3090367781842413e-05, |
|
"loss": 0.1246, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 123.08, |
|
"eval_loss": 0.15332242846488953, |
|
"eval_runtime": 26.7777, |
|
"eval_samples_per_second": 17.253, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 123.69, |
|
"learning_rate": 2.2200003015926705e-05, |
|
"loss": 0.1247, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 123.69, |
|
"eval_loss": 0.15318149328231812, |
|
"eval_runtime": 26.7776, |
|
"eval_samples_per_second": 17.253, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 124.31, |
|
"learning_rate": 2.1324996034989165e-05, |
|
"loss": 0.1252, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 124.31, |
|
"eval_loss": 0.15291614830493927, |
|
"eval_runtime": 26.7677, |
|
"eval_samples_per_second": 17.26, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 124.92, |
|
"learning_rate": 2.046551956614534e-05, |
|
"loss": 0.1249, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 124.92, |
|
"eval_loss": 0.15260463953018188, |
|
"eval_runtime": 26.7848, |
|
"eval_samples_per_second": 17.249, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 125.54, |
|
"learning_rate": 1.9621743270774597e-05, |
|
"loss": 0.1242, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 125.54, |
|
"eval_loss": 0.15242013335227966, |
|
"eval_runtime": 26.7819, |
|
"eval_samples_per_second": 17.25, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 126.15, |
|
"learning_rate": 1.8793833711028773e-05, |
|
"loss": 0.1239, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 126.15, |
|
"eval_loss": 0.1519923359155655, |
|
"eval_runtime": 26.789, |
|
"eval_samples_per_second": 17.246, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 126.77, |
|
"learning_rate": 1.7981954316952786e-05, |
|
"loss": 0.1231, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 126.77, |
|
"eval_loss": 0.15172038972377777, |
|
"eval_runtime": 26.788, |
|
"eval_samples_per_second": 17.246, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 127.38, |
|
"learning_rate": 1.718626535422332e-05, |
|
"loss": 0.1235, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 127.38, |
|
"eval_loss": 0.15152348577976227, |
|
"eval_runtime": 26.7862, |
|
"eval_samples_per_second": 17.248, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"learning_rate": 1.6406923892512284e-05, |
|
"loss": 0.123, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_loss": 0.151360422372818, |
|
"eval_runtime": 26.8008, |
|
"eval_samples_per_second": 17.238, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 128.62, |
|
"learning_rate": 1.5644083774481043e-05, |
|
"loss": 0.123, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 128.62, |
|
"eval_loss": 0.1512284278869629, |
|
"eval_runtime": 26.7984, |
|
"eval_samples_per_second": 17.24, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 129.23, |
|
"learning_rate": 1.489789558541187e-05, |
|
"loss": 0.1235, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 129.23, |
|
"eval_loss": 0.15105997025966644, |
|
"eval_runtime": 26.7894, |
|
"eval_samples_per_second": 17.246, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 129.85, |
|
"learning_rate": 1.4168506623482202e-05, |
|
"loss": 0.1222, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 129.85, |
|
"eval_loss": 0.15094506740570068, |
|
"eval_runtime": 26.7743, |
|
"eval_samples_per_second": 17.255, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 130.46, |
|
"learning_rate": 1.3456060870687937e-05, |
|
"loss": 0.1221, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 130.46, |
|
"eval_loss": 0.1505899429321289, |
|
"eval_runtime": 26.7534, |
|
"eval_samples_per_second": 17.269, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 131.08, |
|
"learning_rate": 1.2760698964421091e-05, |
|
"loss": 0.1212, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 131.08, |
|
"eval_loss": 0.15049409866333008, |
|
"eval_runtime": 26.7596, |
|
"eval_samples_per_second": 17.265, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 131.69, |
|
"learning_rate": 1.2082558169708081e-05, |
|
"loss": 0.122, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 131.69, |
|
"eval_loss": 0.15041232109069824, |
|
"eval_runtime": 26.7564, |
|
"eval_samples_per_second": 17.267, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 132.31, |
|
"learning_rate": 1.1421772352113336e-05, |
|
"loss": 0.1225, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 132.31, |
|
"eval_loss": 0.1501646488904953, |
|
"eval_runtime": 26.7249, |
|
"eval_samples_per_second": 17.287, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 132.92, |
|
"learning_rate": 1.0778471951314229e-05, |
|
"loss": 0.1213, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 132.92, |
|
"eval_loss": 0.15006287395954132, |
|
"eval_runtime": 26.7285, |
|
"eval_samples_per_second": 17.285, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 133.54, |
|
"learning_rate": 1.015278395535203e-05, |
|
"loss": 0.1225, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 133.54, |
|
"eval_loss": 0.14982885122299194, |
|
"eval_runtime": 26.7639, |
|
"eval_samples_per_second": 17.262, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 134.15, |
|
"learning_rate": 9.54483187556453e-06, |
|
"loss": 0.1219, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 134.15, |
|
"eval_loss": 0.14970383048057556, |
|
"eval_runtime": 26.7778, |
|
"eval_samples_per_second": 17.253, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 134.77, |
|
"learning_rate": 8.954735722204689e-06, |
|
"loss": 0.1213, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 134.77, |
|
"eval_loss": 0.14960302412509918, |
|
"eval_runtime": 26.7677, |
|
"eval_samples_per_second": 17.26, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 135.38, |
|
"learning_rate": 8.382611980750532e-06, |
|
"loss": 0.1216, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 135.38, |
|
"eval_loss": 0.14945241808891296, |
|
"eval_runtime": 26.7712, |
|
"eval_samples_per_second": 17.257, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"learning_rate": 7.828573588910859e-06, |
|
"loss": 0.1211, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_loss": 0.1492658108472824, |
|
"eval_runtime": 26.7652, |
|
"eval_samples_per_second": 17.261, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 136.62, |
|
"learning_rate": 7.292729914331142e-06, |
|
"loss": 0.1216, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 136.62, |
|
"eval_loss": 0.14914917945861816, |
|
"eval_runtime": 26.7705, |
|
"eval_samples_per_second": 17.258, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 137.23, |
|
"learning_rate": 6.775186733004424e-06, |
|
"loss": 0.1197, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 137.23, |
|
"eval_loss": 0.14917601644992828, |
|
"eval_runtime": 26.7525, |
|
"eval_samples_per_second": 17.269, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 137.85, |
|
"learning_rate": 6.276046208390873e-06, |
|
"loss": 0.1203, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 137.85, |
|
"eval_loss": 0.14903923869132996, |
|
"eval_runtime": 26.762, |
|
"eval_samples_per_second": 17.263, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 138.46, |
|
"learning_rate": 5.795406871250797e-06, |
|
"loss": 0.1209, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 138.46, |
|
"eval_loss": 0.14884509146213531, |
|
"eval_runtime": 26.758, |
|
"eval_samples_per_second": 17.266, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 139.08, |
|
"learning_rate": 5.333363600194396e-06, |
|
"loss": 0.1197, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 139.08, |
|
"eval_loss": 0.14882220327854156, |
|
"eval_runtime": 26.765, |
|
"eval_samples_per_second": 17.261, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 139.69, |
|
"learning_rate": 4.890007602952828e-06, |
|
"loss": 0.1202, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 139.69, |
|
"eval_loss": 0.1487365961074829, |
|
"eval_runtime": 26.7652, |
|
"eval_samples_per_second": 17.261, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 140.31, |
|
"learning_rate": 4.46542639837364e-06, |
|
"loss": 0.121, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 140.31, |
|
"eval_loss": 0.1486121267080307, |
|
"eval_runtime": 26.748, |
|
"eval_samples_per_second": 17.272, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 140.92, |
|
"learning_rate": 4.059703799144476e-06, |
|
"loss": 0.1202, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 140.92, |
|
"eval_loss": 0.1485925018787384, |
|
"eval_runtime": 26.769, |
|
"eval_samples_per_second": 17.259, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 141.54, |
|
"learning_rate": 3.6729198952483724e-06, |
|
"loss": 0.1194, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 141.54, |
|
"eval_loss": 0.14861957728862762, |
|
"eval_runtime": 26.7652, |
|
"eval_samples_per_second": 17.261, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 142.15, |
|
"learning_rate": 3.305151038153964e-06, |
|
"loss": 0.1199, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 142.15, |
|
"eval_loss": 0.1484871208667755, |
|
"eval_runtime": 26.7519, |
|
"eval_samples_per_second": 17.27, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 142.77, |
|
"learning_rate": 2.956469825743613e-06, |
|
"loss": 0.1201, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 142.77, |
|
"eval_loss": 0.14845435321331024, |
|
"eval_runtime": 26.7438, |
|
"eval_samples_per_second": 17.275, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 143.38, |
|
"learning_rate": 2.6269450879825243e-06, |
|
"loss": 0.1198, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 143.38, |
|
"eval_loss": 0.1484329104423523, |
|
"eval_runtime": 26.7558, |
|
"eval_samples_per_second": 17.267, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"learning_rate": 2.316641873331704e-06, |
|
"loss": 0.12, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_loss": 0.14837703108787537, |
|
"eval_runtime": 26.7449, |
|
"eval_samples_per_second": 17.274, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 144.62, |
|
"learning_rate": 2.025621435907221e-06, |
|
"loss": 0.1197, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 144.62, |
|
"eval_loss": 0.14830969274044037, |
|
"eval_runtime": 26.7686, |
|
"eval_samples_per_second": 17.259, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 145.23, |
|
"learning_rate": 1.753941223388733e-06, |
|
"loss": 0.1195, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 145.23, |
|
"eval_loss": 0.14829565584659576, |
|
"eval_runtime": 26.7574, |
|
"eval_samples_per_second": 17.266, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 145.85, |
|
"learning_rate": 1.5016548656791697e-06, |
|
"loss": 0.1206, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 145.85, |
|
"eval_loss": 0.1482698619365692, |
|
"eval_runtime": 26.744, |
|
"eval_samples_per_second": 17.275, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 146.46, |
|
"learning_rate": 1.2688121643181893e-06, |
|
"loss": 0.1211, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 146.46, |
|
"eval_loss": 0.1482834368944168, |
|
"eval_runtime": 26.7646, |
|
"eval_samples_per_second": 17.262, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 147.08, |
|
"learning_rate": 1.0554590826512778e-06, |
|
"loss": 0.1196, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 147.08, |
|
"eval_loss": 0.14823544025421143, |
|
"eval_runtime": 26.7343, |
|
"eval_samples_per_second": 17.281, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 147.69, |
|
"learning_rate": 8.61637736756582e-07, |
|
"loss": 0.1197, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 147.69, |
|
"eval_loss": 0.1482395976781845, |
|
"eval_runtime": 26.7411, |
|
"eval_samples_per_second": 17.277, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 148.31, |
|
"learning_rate": 6.873863871311614e-07, |
|
"loss": 0.1212, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 148.31, |
|
"eval_loss": 0.14818722009658813, |
|
"eval_runtime": 26.7764, |
|
"eval_samples_per_second": 17.254, |
|
"eval_steps_per_second": 0.56, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 148.92, |
|
"learning_rate": 5.32739431138285e-07, |
|
"loss": 0.1205, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 148.92, |
|
"eval_loss": 0.14816446602344513, |
|
"eval_runtime": 26.7592, |
|
"eval_samples_per_second": 17.265, |
|
"eval_steps_per_second": 0.561, |
|
"step": 4840 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 4968, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 156, |
|
"save_steps": 20, |
|
"total_flos": 9998909350871040.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|