|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.8507242605603262, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.62e-07, |
|
"loss": 0.6957, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.42e-07, |
|
"loss": 0.6958, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.22e-07, |
|
"loss": 0.6916, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.02e-07, |
|
"loss": 0.6959, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.820000000000001e-07, |
|
"loss": 0.6927, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.062e-06, |
|
"loss": 0.6921, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2420000000000001e-06, |
|
"loss": 0.691, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.4220000000000001e-06, |
|
"loss": 0.6892, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.6019999999999999e-06, |
|
"loss": 0.6881, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.782e-06, |
|
"loss": 0.6857, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.962e-06, |
|
"loss": 0.6836, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.142e-06, |
|
"loss": 0.6802, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.322e-06, |
|
"loss": 0.6714, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.4840000000000003e-06, |
|
"loss": 0.6661, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.664e-06, |
|
"loss": 0.6599, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.826e-06, |
|
"loss": 0.6524, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.006e-06, |
|
"loss": 0.6362, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.168e-06, |
|
"loss": 0.632, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.348e-06, |
|
"loss": 0.6301, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.5280000000000004e-06, |
|
"loss": 0.6272, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.708e-06, |
|
"loss": 0.6172, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.87e-06, |
|
"loss": 0.6146, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.05e-06, |
|
"loss": 0.606, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.23e-06, |
|
"loss": 0.6194, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.392e-06, |
|
"loss": 0.5863, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_webgpt_accuracy": 0.547752808988764, |
|
"eval_webgpt_loss": 0.7199520468711853, |
|
"eval_webgpt_runtime": 152.9037, |
|
"eval_webgpt_samples_per_second": 25.611, |
|
"eval_webgpt_steps_per_second": 2.564, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_hfsummary_accuracy": 0.630263277211861, |
|
"eval_hfsummary_loss": 0.6598580479621887, |
|
"eval_hfsummary_runtime": 2471.5424, |
|
"eval_hfsummary_samples_per_second": 13.386, |
|
"eval_hfsummary_steps_per_second": 1.339, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_gptsynthetic_accuracy": 0.9978883861236802, |
|
"eval_gptsynthetic_loss": 0.09524902701377869, |
|
"eval_gptsynthetic_runtime": 116.7014, |
|
"eval_gptsynthetic_samples_per_second": 28.406, |
|
"eval_gptsynthetic_steps_per_second": 2.845, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.5720000000000004e-06, |
|
"loss": 0.5818, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.752e-06, |
|
"loss": 0.5817, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.932e-06, |
|
"loss": 0.5713, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5.1119999999999995e-06, |
|
"loss": 0.5787, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.2919999999999995e-06, |
|
"loss": 0.566, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.472e-06, |
|
"loss": 0.5765, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.652e-06, |
|
"loss": 0.5286, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.814000000000001e-06, |
|
"loss": 0.5644, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 5.9940000000000005e-06, |
|
"loss": 0.5278, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 6.1740000000000005e-06, |
|
"loss": 0.5376, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 6.354e-06, |
|
"loss": 0.5396, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 6.534e-06, |
|
"loss": 0.517, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 6.7140000000000004e-06, |
|
"loss": 0.5001, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 6.894e-06, |
|
"loss": 0.5069, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.074e-06, |
|
"loss": 0.5102, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.254e-06, |
|
"loss": 0.5105, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.4339999999999995e-06, |
|
"loss": 0.5332, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.614e-06, |
|
"loss": 0.5076, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.794e-06, |
|
"loss": 0.5088, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.974e-06, |
|
"loss": 0.4968, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.154e-06, |
|
"loss": 0.4902, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.334e-06, |
|
"loss": 0.5097, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.514e-06, |
|
"loss": 0.5038, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.694e-06, |
|
"loss": 0.5082, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.874e-06, |
|
"loss": 0.4887, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_webgpt_accuracy": 0.5623084780388151, |
|
"eval_webgpt_loss": 0.7031316161155701, |
|
"eval_webgpt_runtime": 152.8509, |
|
"eval_webgpt_samples_per_second": 25.62, |
|
"eval_webgpt_steps_per_second": 2.565, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_hfsummary_accuracy": 0.6744249312335641, |
|
"eval_hfsummary_loss": 0.6107630729675293, |
|
"eval_hfsummary_runtime": 2471.6908, |
|
"eval_hfsummary_samples_per_second": 13.385, |
|
"eval_hfsummary_steps_per_second": 1.339, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_gptsynthetic_accuracy": 0.9987933634992459, |
|
"eval_gptsynthetic_loss": 0.023942505940794945, |
|
"eval_gptsynthetic_runtime": 116.8138, |
|
"eval_gptsynthetic_samples_per_second": 28.379, |
|
"eval_gptsynthetic_steps_per_second": 2.842, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.99293563579278e-06, |
|
"loss": 0.4875, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.969387755102041e-06, |
|
"loss": 0.483, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.945839874411304e-06, |
|
"loss": 0.5057, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.922291993720566e-06, |
|
"loss": 0.4923, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.898744113029827e-06, |
|
"loss": 0.4623, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.87519623233909e-06, |
|
"loss": 0.4735, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.851648351648352e-06, |
|
"loss": 0.4516, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.828100470957614e-06, |
|
"loss": 0.4542, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.804552590266875e-06, |
|
"loss": 0.465, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.781004709576138e-06, |
|
"loss": 0.4577, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.7574568288854e-06, |
|
"loss": 0.4994, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.733908948194663e-06, |
|
"loss": 0.4565, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.710361067503925e-06, |
|
"loss": 0.4617, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.686813186813188e-06, |
|
"loss": 0.4598, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.663265306122449e-06, |
|
"loss": 0.4441, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.639717425431711e-06, |
|
"loss": 0.4935, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.616169544740974e-06, |
|
"loss": 0.4646, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.592621664050236e-06, |
|
"loss": 0.4771, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.569073783359497e-06, |
|
"loss": 0.4754, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.54552590266876e-06, |
|
"loss": 0.4684, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.521978021978022e-06, |
|
"loss": 0.4691, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.498430141287285e-06, |
|
"loss": 0.4807, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.474882260596547e-06, |
|
"loss": 0.4597, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.45133437990581e-06, |
|
"loss": 0.455, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.42778649921507e-06, |
|
"loss": 0.4561, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_webgpt_accuracy": 0.5702247191011236, |
|
"eval_webgpt_loss": 0.6994287371635437, |
|
"eval_webgpt_runtime": 152.9386, |
|
"eval_webgpt_samples_per_second": 25.605, |
|
"eval_webgpt_steps_per_second": 2.563, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_hfsummary_accuracy": 0.6827373575552399, |
|
"eval_hfsummary_loss": 0.60725337266922, |
|
"eval_hfsummary_runtime": 2470.4498, |
|
"eval_hfsummary_samples_per_second": 13.391, |
|
"eval_hfsummary_steps_per_second": 1.339, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_gptsynthetic_accuracy": 0.9975867269984917, |
|
"eval_gptsynthetic_loss": 0.01110268384218216, |
|
"eval_gptsynthetic_runtime": 116.8749, |
|
"eval_gptsynthetic_samples_per_second": 28.364, |
|
"eval_gptsynthetic_steps_per_second": 2.841, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.404238618524333e-06, |
|
"loss": 0.4578, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.380690737833595e-06, |
|
"loss": 0.4798, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.359497645211932e-06, |
|
"loss": 0.4402, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.335949764521193e-06, |
|
"loss": 0.4632, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.312401883830455e-06, |
|
"loss": 0.4505, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.288854003139718e-06, |
|
"loss": 0.4641, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.26530612244898e-06, |
|
"loss": 0.4373, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.241758241758241e-06, |
|
"loss": 0.4192, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.218210361067504e-06, |
|
"loss": 0.4671, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.194662480376766e-06, |
|
"loss": 0.4481, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.171114599686029e-06, |
|
"loss": 0.4481, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.147566718995291e-06, |
|
"loss": 0.4317, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.124018838304554e-06, |
|
"loss": 0.4517, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.100470957613814e-06, |
|
"loss": 0.4282, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.076923076923077e-06, |
|
"loss": 0.4543, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.05337519623234e-06, |
|
"loss": 0.4303, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.029827315541602e-06, |
|
"loss": 0.4423, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.006279434850863e-06, |
|
"loss": 0.4624, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.982731554160125e-06, |
|
"loss": 0.4318, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.959183673469388e-06, |
|
"loss": 0.4374, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.93563579277865e-06, |
|
"loss": 0.4271, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.912087912087913e-06, |
|
"loss": 0.4535, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.888540031397175e-06, |
|
"loss": 0.4393, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.864992150706436e-06, |
|
"loss": 0.4513, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.841444270015699e-06, |
|
"loss": 0.4259, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_webgpt_accuracy": 0.5720122574055159, |
|
"eval_webgpt_loss": 0.6880369782447815, |
|
"eval_webgpt_runtime": 153.0376, |
|
"eval_webgpt_samples_per_second": 25.588, |
|
"eval_webgpt_steps_per_second": 2.561, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_hfsummary_accuracy": 0.6844905238339933, |
|
"eval_hfsummary_loss": 0.6170333027839661, |
|
"eval_hfsummary_runtime": 2474.9308, |
|
"eval_hfsummary_samples_per_second": 13.367, |
|
"eval_hfsummary_steps_per_second": 1.337, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_gptsynthetic_accuracy": 0.9978883861236802, |
|
"eval_gptsynthetic_loss": 0.014577150344848633, |
|
"eval_gptsynthetic_runtime": 116.8198, |
|
"eval_gptsynthetic_samples_per_second": 28.377, |
|
"eval_gptsynthetic_steps_per_second": 2.842, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.817896389324961e-06, |
|
"loss": 0.449, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.794348508634224e-06, |
|
"loss": 0.4274, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.770800627943485e-06, |
|
"loss": 0.4014, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.747252747252747e-06, |
|
"loss": 0.4356, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 7.72370486656201e-06, |
|
"loss": 0.4378, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 7.700156985871272e-06, |
|
"loss": 0.4298, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 7.676609105180535e-06, |
|
"loss": 0.4514, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 7.653061224489796e-06, |
|
"loss": 0.4526, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 7.629513343799058e-06, |
|
"loss": 0.4192, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.60596546310832e-06, |
|
"loss": 0.4433, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.582417582417582e-06, |
|
"loss": 0.4199, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 7.558869701726845e-06, |
|
"loss": 0.4419, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 7.535321821036107e-06, |
|
"loss": 0.4163, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.511773940345369e-06, |
|
"loss": 0.411, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.4882260596546314e-06, |
|
"loss": 0.4492, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.464678178963893e-06, |
|
"loss": 0.454, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.441130298273156e-06, |
|
"loss": 0.4311, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.417582417582417e-06, |
|
"loss": 0.4368, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.39403453689168e-06, |
|
"loss": 0.4264, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.3704866562009415e-06, |
|
"loss": 0.4163, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.346938775510204e-06, |
|
"loss": 0.4214, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.3233908948194665e-06, |
|
"loss": 0.4263, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.299843014128729e-06, |
|
"loss": 0.4233, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.276295133437991e-06, |
|
"loss": 0.4331, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.252747252747253e-06, |
|
"loss": 0.4241, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_webgpt_accuracy": 0.5806945863125639, |
|
"eval_webgpt_loss": 0.6766389608383179, |
|
"eval_webgpt_runtime": 153.0189, |
|
"eval_webgpt_samples_per_second": 25.592, |
|
"eval_webgpt_steps_per_second": 2.562, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_hfsummary_accuracy": 0.6927122691412508, |
|
"eval_hfsummary_loss": 0.6117472648620605, |
|
"eval_hfsummary_runtime": 2473.4013, |
|
"eval_hfsummary_samples_per_second": 13.376, |
|
"eval_hfsummary_steps_per_second": 1.338, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_gptsynthetic_accuracy": 0.9987933634992459, |
|
"eval_gptsynthetic_loss": 0.007306403946131468, |
|
"eval_gptsynthetic_runtime": 116.8034, |
|
"eval_gptsynthetic_samples_per_second": 28.381, |
|
"eval_gptsynthetic_steps_per_second": 2.842, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.229199372056515e-06, |
|
"loss": 0.3976, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.205651491365777e-06, |
|
"loss": 0.4221, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.182103610675039e-06, |
|
"loss": 0.4225, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.158555729984302e-06, |
|
"loss": 0.3961, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.135007849293563e-06, |
|
"loss": 0.4348, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.111459968602826e-06, |
|
"loss": 0.4146, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.087912087912088e-06, |
|
"loss": 0.4122, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.064364207221351e-06, |
|
"loss": 0.4126, |
|
"step": 1330 |
|
}, |
|
|