|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.99825479930192, |
|
"global_step": 2860, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 12.9083, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 12.5233, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.699999999999999e-05, |
|
"loss": 12.1986, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00011099999999999999, |
|
"loss": 9.567, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000138, |
|
"loss": 6.7421, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.000168, |
|
"loss": 4.3567, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000198, |
|
"loss": 3.5698, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00022799999999999999, |
|
"loss": 3.2567, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.000258, |
|
"loss": 3.145, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00028799999999999995, |
|
"loss": 3.2802, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 3.8881919384002686, |
|
"eval_runtime": 112.7654, |
|
"eval_samples_per_second": 12.867, |
|
"eval_steps_per_second": 0.807, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002993478260869565, |
|
"loss": 3.6105, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00029826086956521737, |
|
"loss": 3.1704, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00029717391304347825, |
|
"loss": 3.1124, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002960869565217391, |
|
"loss": 3.1266, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00029499999999999996, |
|
"loss": 3.2521, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00029391304347826084, |
|
"loss": 3.6024, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002928260869565217, |
|
"loss": 3.2086, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0002917391304347826, |
|
"loss": 3.1616, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00029065217391304344, |
|
"loss": 3.1408, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002895652173913043, |
|
"loss": 3.2254, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 3.885490894317627, |
|
"eval_runtime": 111.0614, |
|
"eval_samples_per_second": 13.065, |
|
"eval_steps_per_second": 0.819, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002884782608695652, |
|
"loss": 3.6835, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00028739130434782603, |
|
"loss": 3.1449, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002863043478260869, |
|
"loss": 3.2003, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002852173913043478, |
|
"loss": 3.0836, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002841304347826087, |
|
"loss": 3.1128, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002830434782608695, |
|
"loss": 3.3853, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0002819565217391304, |
|
"loss": 3.102, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0002808695652173913, |
|
"loss": 3.0516, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0002797826086956521, |
|
"loss": 3.5681, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00027869565217391305, |
|
"loss": 3.1697, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 3.419605255126953, |
|
"eval_runtime": 111.0117, |
|
"eval_samples_per_second": 13.071, |
|
"eval_steps_per_second": 0.82, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0002776086956521739, |
|
"loss": 3.0489, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00027652173913043476, |
|
"loss": 3.0347, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00027543478260869564, |
|
"loss": 3.0201, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00027434782608695647, |
|
"loss": 3.2079, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00027326086956521735, |
|
"loss": 3.0785, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00027217391304347824, |
|
"loss": 2.9747, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0002710869565217391, |
|
"loss": 2.9664, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00027, |
|
"loss": 2.9724, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00026891304347826083, |
|
"loss": 3.1082, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0002678260869565217, |
|
"loss": 3.0258, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 3.195385217666626, |
|
"eval_runtime": 110.895, |
|
"eval_samples_per_second": 13.084, |
|
"eval_steps_per_second": 0.821, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0002667391304347826, |
|
"loss": 2.9719, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00026565217391304343, |
|
"loss": 2.9884, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00026456521739130437, |
|
"loss": 2.9625, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0002634782608695652, |
|
"loss": 3.0735, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0002623913043478261, |
|
"loss": 2.9998, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00026130434782608696, |
|
"loss": 2.9621, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0002602173913043478, |
|
"loss": 2.9433, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002591304347826087, |
|
"loss": 2.9256, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00025804347826086956, |
|
"loss": 2.9996, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00025695652173913044, |
|
"loss": 2.8553, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 3.017104387283325, |
|
"eval_runtime": 110.9455, |
|
"eval_samples_per_second": 13.078, |
|
"eval_steps_per_second": 0.82, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00025586956521739127, |
|
"loss": 2.7029, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00025478260869565215, |
|
"loss": 2.4464, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00025369565217391303, |
|
"loss": 2.2426, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00025260869565217386, |
|
"loss": 2.2914, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00025152173913043475, |
|
"loss": 1.9541, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00025043478260869563, |
|
"loss": 1.5822, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0002493478260869565, |
|
"loss": 1.5446, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0002482608695652174, |
|
"loss": 2.0892, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.0002471739130434782, |
|
"loss": 1.3977, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.0002460869565217391, |
|
"loss": 1.2436, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 2.155662775039673, |
|
"eval_runtime": 111.1811, |
|
"eval_samples_per_second": 13.051, |
|
"eval_steps_per_second": 0.818, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.000245, |
|
"loss": 1.1344, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.00024391304347826085, |
|
"loss": 1.1795, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.0002428260869565217, |
|
"loss": 2.1046, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00024173913043478261, |
|
"loss": 1.0792, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.00024065217391304347, |
|
"loss": 1.157, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00023956521739130433, |
|
"loss": 1.0428, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00023847826086956518, |
|
"loss": 1.1063, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00023739130434782607, |
|
"loss": 1.6716, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00023630434782608692, |
|
"loss": 0.9602, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00023521739130434778, |
|
"loss": 0.9642, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_loss": 1.5505317449569702, |
|
"eval_runtime": 111.9226, |
|
"eval_samples_per_second": 12.964, |
|
"eval_steps_per_second": 0.813, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002341304347826087, |
|
"loss": 0.8874, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.00023304347826086954, |
|
"loss": 0.9596, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.00023195652173913043, |
|
"loss": 1.3932, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00023086956521739128, |
|
"loss": 0.8699, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.00022978260869565214, |
|
"loss": 0.8153, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.00022869565217391302, |
|
"loss": 0.9178, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00022760869565217388, |
|
"loss": 0.9647, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00022652173913043476, |
|
"loss": 1.0555, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.00022543478260869565, |
|
"loss": 0.8834, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0002243478260869565, |
|
"loss": 0.752, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 0.9008368253707886, |
|
"eval_runtime": 111.0482, |
|
"eval_samples_per_second": 13.066, |
|
"eval_steps_per_second": 0.819, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.00022326086956521736, |
|
"loss": 0.8197, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00022217391304347824, |
|
"loss": 0.7771, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.0002210869565217391, |
|
"loss": 0.9506, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.00021999999999999995, |
|
"loss": 0.7518, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.00021891304347826086, |
|
"loss": 0.7297, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.00021782608695652172, |
|
"loss": 0.8188, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0002167391304347826, |
|
"loss": 0.6478, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.00021565217391304346, |
|
"loss": 0.6546, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.00021456521739130432, |
|
"loss": 0.7012, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.0002134782608695652, |
|
"loss": 0.6164, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_loss": 0.6567979454994202, |
|
"eval_runtime": 111.2282, |
|
"eval_samples_per_second": 13.045, |
|
"eval_steps_per_second": 0.818, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.00021239130434782605, |
|
"loss": 0.7296, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.00021130434782608694, |
|
"loss": 0.6166, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.00021021739130434782, |
|
"loss": 0.6354, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.00020913043478260868, |
|
"loss": 0.6388, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.00020804347826086953, |
|
"loss": 0.6069, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.00020695652173913042, |
|
"loss": 0.7107, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00020586956521739127, |
|
"loss": 0.6162, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.00020478260869565213, |
|
"loss": 0.567, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.00020369565217391304, |
|
"loss": 0.5679, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.0002026086956521739, |
|
"loss": 0.6362, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 0.5460886359214783, |
|
"eval_runtime": 110.911, |
|
"eval_samples_per_second": 13.083, |
|
"eval_steps_per_second": 0.82, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.00020152173913043478, |
|
"loss": 0.6771, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.00020043478260869563, |
|
"loss": 0.6144, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0001993478260869565, |
|
"loss": 0.583, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.00019826086956521737, |
|
"loss": 0.5773, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00019717391304347826, |
|
"loss": 0.5981, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.0001960869565217391, |
|
"loss": 0.6162, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.000195, |
|
"loss": 0.5387, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00019391304347826085, |
|
"loss": 0.5651, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0001928260869565217, |
|
"loss": 0.544, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.0001917391304347826, |
|
"loss": 0.5539, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 0.5009284615516663, |
|
"eval_runtime": 111.9745, |
|
"eval_samples_per_second": 12.958, |
|
"eval_steps_per_second": 0.813, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.00019065217391304345, |
|
"loss": 0.6894, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.00018956521739130436, |
|
"loss": 0.5409, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.00018847826086956521, |
|
"loss": 0.5676, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.00018739130434782607, |
|
"loss": 0.5543, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.00018630434782608695, |
|
"loss": 0.6413, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 0.0001852173913043478, |
|
"loss": 0.4664, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.00018413043478260867, |
|
"loss": 0.5098, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.00018304347826086955, |
|
"loss": 0.4832, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.00018195652173913043, |
|
"loss": 0.4859, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0001808695652173913, |
|
"loss": 0.5144, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_loss": 0.4911543130874634, |
|
"eval_runtime": 111.7063, |
|
"eval_samples_per_second": 12.989, |
|
"eval_steps_per_second": 0.815, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.00017978260869565217, |
|
"loss": 0.4853, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.00017869565217391303, |
|
"loss": 0.5149, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.00017760869565217388, |
|
"loss": 0.4608, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.00017652173913043477, |
|
"loss": 0.4917, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.00017543478260869562, |
|
"loss": 0.4615, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.00017434782608695653, |
|
"loss": 0.4175, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 0.0001732608695652174, |
|
"loss": 0.4752, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.00017217391304347825, |
|
"loss": 0.4979, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.00017108695652173913, |
|
"loss": 0.4272, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.00016999999999999999, |
|
"loss": 0.5067, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"eval_loss": 0.4602111279964447, |
|
"eval_runtime": 111.5487, |
|
"eval_samples_per_second": 13.008, |
|
"eval_steps_per_second": 0.816, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.00016891304347826084, |
|
"loss": 0.483, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.0001678260869565217, |
|
"loss": 0.4671, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 0.0001667391304347826, |
|
"loss": 0.4806, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.00016565217391304346, |
|
"loss": 0.4798, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.00016456521739130435, |
|
"loss": 0.5021, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.0001634782608695652, |
|
"loss": 0.4445, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.00016239130434782606, |
|
"loss": 0.4462, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 0.00016130434782608694, |
|
"loss": 0.5033, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.0001602173913043478, |
|
"loss": 0.4577, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.0001591304347826087, |
|
"loss": 0.4999, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"eval_loss": 0.4281094968318939, |
|
"eval_runtime": 111.6556, |
|
"eval_samples_per_second": 12.995, |
|
"eval_steps_per_second": 0.815, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 0.00015804347826086956, |
|
"loss": 0.4366, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.00015695652173913042, |
|
"loss": 0.4696, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00015586956521739128, |
|
"loss": 0.491, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.00015478260869565216, |
|
"loss": 0.4202, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.00015369565217391302, |
|
"loss": 0.4307, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.00015260869565217387, |
|
"loss": 0.4103, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.00015152173913043478, |
|
"loss": 0.4157, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.00015043478260869564, |
|
"loss": 0.4326, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 0.00014934782608695652, |
|
"loss": 0.3842, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.00014826086956521738, |
|
"loss": 0.4072, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 0.416363924741745, |
|
"eval_runtime": 111.6811, |
|
"eval_samples_per_second": 12.992, |
|
"eval_steps_per_second": 0.815, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.00014717391304347823, |
|
"loss": 0.4093, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00014608695652173912, |
|
"loss": 0.4013, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 0.000145, |
|
"loss": 0.4534, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.00014391304347826086, |
|
"loss": 0.3509, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.00014282608695652174, |
|
"loss": 0.4058, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.0001417391304347826, |
|
"loss": 0.3956, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.00014065217391304345, |
|
"loss": 0.415, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 0.00013956521739130434, |
|
"loss": 0.4255, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.00013847826086956522, |
|
"loss": 0.3925, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.00013739130434782607, |
|
"loss": 0.3855, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_loss": 0.43422192335128784, |
|
"eval_runtime": 111.3707, |
|
"eval_samples_per_second": 13.029, |
|
"eval_steps_per_second": 0.817, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.00013630434782608693, |
|
"loss": 0.4364, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.00013521739130434781, |
|
"loss": 0.3908, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.0001341304347826087, |
|
"loss": 0.4419, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 0.00013304347826086955, |
|
"loss": 0.3667, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.0001319565217391304, |
|
"loss": 0.3971, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.0001308695652173913, |
|
"loss": 0.4102, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.00012978260869565218, |
|
"loss": 0.4063, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 0.00012869565217391303, |
|
"loss": 0.4576, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.00012760869565217392, |
|
"loss": 0.3524, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.00012652173913043477, |
|
"loss": 0.4075, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_loss": 0.40860703587532043, |
|
"eval_runtime": 112.0658, |
|
"eval_samples_per_second": 12.948, |
|
"eval_steps_per_second": 0.812, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.00012543478260869563, |
|
"loss": 0.3744, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.0001243478260869565, |
|
"loss": 0.3863, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.0001232608695652174, |
|
"loss": 0.338, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.00012217391304347825, |
|
"loss": 0.3789, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.00012108695652173912, |
|
"loss": 0.3253, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 0.3878, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.00011891304347826086, |
|
"loss": 0.3732, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.00011782608695652173, |
|
"loss": 0.3494, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.00011673913043478258, |
|
"loss": 0.3701, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.00011565217391304347, |
|
"loss": 0.347, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"eval_loss": 0.42197009921073914, |
|
"eval_runtime": 111.9078, |
|
"eval_samples_per_second": 12.966, |
|
"eval_steps_per_second": 0.813, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.00011456521739130434, |
|
"loss": 0.3484, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00011347826086956521, |
|
"loss": 0.3328, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00011239130434782608, |
|
"loss": 0.3444, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.00011130434782608695, |
|
"loss": 0.321, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.00011021739130434782, |
|
"loss": 0.4089, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.00010913043478260867, |
|
"loss": 0.335, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.00010804347826086956, |
|
"loss": 0.376, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.00010695652173913043, |
|
"loss": 0.3324, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.0001058695652173913, |
|
"loss": 0.3359, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.00010478260869565216, |
|
"loss": 0.3708, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"eval_loss": 0.4211980998516083, |
|
"eval_runtime": 113.1099, |
|
"eval_samples_per_second": 12.828, |
|
"eval_steps_per_second": 0.805, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.00010369565217391303, |
|
"loss": 0.3512, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.0001026086956521739, |
|
"loss": 0.3381, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 0.00010152173913043479, |
|
"loss": 0.2925, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.00010043478260869564, |
|
"loss": 0.3805, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 9.934782608695651e-05, |
|
"loss": 0.3803, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 9.826086956521738e-05, |
|
"loss": 0.3499, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 9.717391304347825e-05, |
|
"loss": 0.3419, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 9.608695652173912e-05, |
|
"loss": 0.2918, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 9.499999999999999e-05, |
|
"loss": 0.3602, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 9.391304347826087e-05, |
|
"loss": 0.3788, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_loss": 0.419994056224823, |
|
"eval_runtime": 111.5298, |
|
"eval_samples_per_second": 13.01, |
|
"eval_steps_per_second": 0.816, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 9.282608695652173e-05, |
|
"loss": 0.3368, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 9.17391304347826e-05, |
|
"loss": 0.3186, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 9.065217391304346e-05, |
|
"loss": 0.3134, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 8.956521739130434e-05, |
|
"loss": 0.3021, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 8.847826086956521e-05, |
|
"loss": 0.3168, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 8.739130434782608e-05, |
|
"loss": 0.2573, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 8.630434782608696e-05, |
|
"loss": 0.3107, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 8.521739130434782e-05, |
|
"loss": 0.2961, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 8.413043478260869e-05, |
|
"loss": 0.3487, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 8.304347826086954e-05, |
|
"loss": 0.337, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_loss": 0.41391661763191223, |
|
"eval_runtime": 111.525, |
|
"eval_samples_per_second": 13.011, |
|
"eval_steps_per_second": 0.816, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 8.195652173913043e-05, |
|
"loss": 0.3379, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 8.08695652173913e-05, |
|
"loss": 0.3168, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 7.978260869565217e-05, |
|
"loss": 0.3339, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 7.869565217391304e-05, |
|
"loss": 0.3712, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 7.76086956521739e-05, |
|
"loss": 0.2807, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 7.652173913043478e-05, |
|
"loss": 0.3018, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 7.543478260869563e-05, |
|
"loss": 0.311, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 7.434782608695652e-05, |
|
"loss": 0.309, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 7.326086956521738e-05, |
|
"loss": 0.3175, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 7.217391304347825e-05, |
|
"loss": 0.3045, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"eval_loss": 0.4140127897262573, |
|
"eval_runtime": 111.986, |
|
"eval_samples_per_second": 12.957, |
|
"eval_steps_per_second": 0.813, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 7.108695652173912e-05, |
|
"loss": 0.3065, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 7e-05, |
|
"loss": 0.2784, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 6.891304347826086e-05, |
|
"loss": 0.3367, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 6.782608695652173e-05, |
|
"loss": 0.3176, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 6.67391304347826e-05, |
|
"loss": 0.3126, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 6.565217391304347e-05, |
|
"loss": 0.3586, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 6.456521739130434e-05, |
|
"loss": 0.3199, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 6.347826086956521e-05, |
|
"loss": 0.3302, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 6.239130434782608e-05, |
|
"loss": 0.346, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 6.130434782608695e-05, |
|
"loss": 0.2547, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"eval_loss": 0.41661086678504944, |
|
"eval_runtime": 111.4943, |
|
"eval_samples_per_second": 13.014, |
|
"eval_steps_per_second": 0.816, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 6.021739130434782e-05, |
|
"loss": 0.2923, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 5.913043478260869e-05, |
|
"loss": 0.2889, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 5.804347826086956e-05, |
|
"loss": 0.2858, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 5.695652173913043e-05, |
|
"loss": 0.3082, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 5.58695652173913e-05, |
|
"loss": 0.2721, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 5.478260869565217e-05, |
|
"loss": 0.2911, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 5.3695652173913046e-05, |
|
"loss": 0.3015, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 5.260869565217391e-05, |
|
"loss": 0.3471, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 5.152173913043478e-05, |
|
"loss": 0.3053, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 5.043478260869565e-05, |
|
"loss": 0.2584, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"eval_loss": 0.408309668302536, |
|
"eval_runtime": 111.5385, |
|
"eval_samples_per_second": 13.009, |
|
"eval_steps_per_second": 0.816, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 4.934782608695652e-05, |
|
"loss": 0.3029, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 4.826086956521738e-05, |
|
"loss": 0.2933, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 4.717391304347826e-05, |
|
"loss": 0.2807, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 4.6086956521739126e-05, |
|
"loss": 0.2899, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 0.229, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 4.3913043478260866e-05, |
|
"loss": 0.2919, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 4.2826086956521735e-05, |
|
"loss": 0.2989, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 4.1739130434782605e-05, |
|
"loss": 0.2919, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 4.065217391304348e-05, |
|
"loss": 0.2988, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 3.9565217391304344e-05, |
|
"loss": 0.2673, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"eval_loss": 0.39945822954177856, |
|
"eval_runtime": 111.4334, |
|
"eval_samples_per_second": 13.021, |
|
"eval_steps_per_second": 0.817, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 3.8478260869565214e-05, |
|
"loss": 0.2668, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 3.7391304347826084e-05, |
|
"loss": 0.3159, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 3.630434782608695e-05, |
|
"loss": 0.3037, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 3.521739130434782e-05, |
|
"loss": 0.2869, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 3.413043478260869e-05, |
|
"loss": 0.2792, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 3.304347826086956e-05, |
|
"loss": 0.2939, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 3.195652173913043e-05, |
|
"loss": 0.2781, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3.08695652173913e-05, |
|
"loss": 0.3154, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 2.978260869565217e-05, |
|
"loss": 0.301, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 2.869565217391304e-05, |
|
"loss": 0.2765, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 0.4026023745536804, |
|
"eval_runtime": 111.3595, |
|
"eval_samples_per_second": 13.03, |
|
"eval_steps_per_second": 0.817, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 2.760869565217391e-05, |
|
"loss": 0.2801, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 2.652173913043478e-05, |
|
"loss": 0.263, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 2.543478260869565e-05, |
|
"loss": 0.2586, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 2.4347826086956516e-05, |
|
"loss": 0.2496, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 2.3260869565217393e-05, |
|
"loss": 0.269, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 2.217391304347826e-05, |
|
"loss": 0.296, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 2.108695652173913e-05, |
|
"loss": 0.2531, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 0.2871, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 1.8913043478260868e-05, |
|
"loss": 0.2616, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 1.7826086956521738e-05, |
|
"loss": 0.2453, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"eval_loss": 0.4079287052154541, |
|
"eval_runtime": 111.4001, |
|
"eval_samples_per_second": 13.025, |
|
"eval_steps_per_second": 0.817, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 1.6739130434782607e-05, |
|
"loss": 0.3043, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 1.5652173913043477e-05, |
|
"loss": 0.2297, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 1.4565217391304347e-05, |
|
"loss": 0.2844, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 1.3478260869565216e-05, |
|
"loss": 0.2485, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 1.2391304347826086e-05, |
|
"loss": 0.2695, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 1.1304347826086956e-05, |
|
"loss": 0.3031, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 1.0217391304347825e-05, |
|
"loss": 0.2716, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 9.130434782608695e-06, |
|
"loss": 0.2424, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 8.043478260869565e-06, |
|
"loss": 0.2652, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 6.956521739130434e-06, |
|
"loss": 0.2883, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"eval_loss": 0.4038572311401367, |
|
"eval_runtime": 111.7946, |
|
"eval_samples_per_second": 12.979, |
|
"eval_steps_per_second": 0.814, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 5.869565217391305e-06, |
|
"loss": 0.2743, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 4.7826086956521735e-06, |
|
"loss": 0.2823, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 3.695652173913043e-06, |
|
"loss": 0.2766, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 2.608695652173913e-06, |
|
"loss": 0.2881, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 1.5217391304347823e-06, |
|
"loss": 0.3219, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.3478260869565214e-07, |
|
"loss": 0.294, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2860, |
|
"total_flos": 6.827433558323335e+18, |
|
"train_loss": 1.1227069218675574, |
|
"train_runtime": 9275.3367, |
|
"train_samples_per_second": 4.936, |
|
"train_steps_per_second": 0.308 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.40599533915519714, |
|
"eval_runtime": 111.6061, |
|
"eval_samples_per_second": 13.001, |
|
"eval_steps_per_second": 0.815, |
|
"step": 2860 |
|
} |
|
], |
|
"max_steps": 2860, |
|
"num_train_epochs": 10, |
|
"total_flos": 6.827433558323335e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|