|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.915300209591912, |
|
"global_step": 3500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.555555555555555e-05, |
|
"loss": 13.7427, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001111111111111111, |
|
"loss": 8.8821, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 2.5857, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002222222222222222, |
|
"loss": 1.3518, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 1.0054, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 0.864, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0003888888888888889, |
|
"loss": 0.8669, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004444444444444444, |
|
"loss": 0.9395, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0005, |
|
"loss": 1.0424, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0004985515643105447, |
|
"loss": 0.6298, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.61441171169281, |
|
"eval_rouge1": 0.07727272727272727, |
|
"eval_rouge2": 0.061111111111111116, |
|
"eval_rougeL": 0.06893939393939394, |
|
"eval_rougeLsum": 0.08333333333333333, |
|
"eval_runtime": 91.9317, |
|
"eval_samples_per_second": 0.218, |
|
"eval_steps_per_second": 0.218, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004971031286210893, |
|
"loss": 0.5699, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004956546929316338, |
|
"loss": 0.6074, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004942062572421785, |
|
"loss": 0.6291, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004927578215527231, |
|
"loss": 0.5098, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004913093858632677, |
|
"loss": 0.4448, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0004898609501738123, |
|
"loss": 0.4508, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0004884125144843569, |
|
"loss": 0.4255, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00048696407879490153, |
|
"loss": 0.3705, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004855156431054461, |
|
"loss": 0.4009, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00048406720741599077, |
|
"loss": 0.4317, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.176011323928833, |
|
"eval_rouge1": 0.16153846153846152, |
|
"eval_rouge2": 0.06988636363636364, |
|
"eval_rougeL": 0.15999999999999998, |
|
"eval_rougeLsum": 0.15999999999999998, |
|
"eval_runtime": 88.2692, |
|
"eval_samples_per_second": 0.227, |
|
"eval_steps_per_second": 0.227, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00048261877172653536, |
|
"loss": 0.3679, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00048117033603707995, |
|
"loss": 0.3252, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0004797219003476246, |
|
"loss": 0.3707, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0004782734646581692, |
|
"loss": 0.3543, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0004768250289687138, |
|
"loss": 0.3434, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0004753765932792584, |
|
"loss": 0.4007, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.000473928157589803, |
|
"loss": 0.3552, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00047247972190034765, |
|
"loss": 0.388, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00047103128621089224, |
|
"loss": 0.307, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0004695828505214368, |
|
"loss": 0.2588, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.0633952617645264, |
|
"eval_rouge1": 0.08, |
|
"eval_rouge2": 0.05555555555555556, |
|
"eval_rougeL": 0.08414141414141416, |
|
"eval_rougeLsum": 0.08383838383838385, |
|
"eval_runtime": 88.5138, |
|
"eval_samples_per_second": 0.226, |
|
"eval_steps_per_second": 0.226, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00046813441483198147, |
|
"loss": 0.2965, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00046668597914252606, |
|
"loss": 0.3096, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0004652375434530707, |
|
"loss": 0.301, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00046378910776361535, |
|
"loss": 0.3153, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00046234067207415994, |
|
"loss": 0.3549, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0004608922363847045, |
|
"loss": 0.356, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00045944380069524917, |
|
"loss": 0.3278, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00045799536500579376, |
|
"loss": 0.2985, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00045654692931633835, |
|
"loss": 0.3152, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.000455098493626883, |
|
"loss": 0.2665, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.044259786605835, |
|
"eval_rouge1": 0.06307692307692307, |
|
"eval_rouge2": 0.021590909090909088, |
|
"eval_rougeL": 0.06307692307692307, |
|
"eval_rougeLsum": 0.06307692307692307, |
|
"eval_runtime": 80.5878, |
|
"eval_samples_per_second": 0.248, |
|
"eval_steps_per_second": 0.248, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0004536500579374276, |
|
"loss": 0.2323, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00045220162224797217, |
|
"loss": 0.2222, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0004507531865585168, |
|
"loss": 0.2516, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0004493047508690614, |
|
"loss": 0.2851, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.000447856315179606, |
|
"loss": 0.2677, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00044640787949015064, |
|
"loss": 0.2447, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00044495944380069523, |
|
"loss": 0.3186, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0004435110081112398, |
|
"loss": 0.3035, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0004420625724217845, |
|
"loss": 0.3036, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0004406141367323291, |
|
"loss": 0.1972, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 1.0465357303619385, |
|
"eval_rouge1": 0.1908791208791209, |
|
"eval_rouge2": 0.10681818181818181, |
|
"eval_rougeL": 0.17934065934065935, |
|
"eval_rougeLsum": 0.19159340659340657, |
|
"eval_runtime": 84.3482, |
|
"eval_samples_per_second": 0.237, |
|
"eval_steps_per_second": 0.237, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00043916570104287375, |
|
"loss": 0.279, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00043771726535341834, |
|
"loss": 0.272, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00043626882966396293, |
|
"loss": 0.2272, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0004348203939745076, |
|
"loss": 0.2495, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00043337195828505216, |
|
"loss": 0.1965, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00043192352259559675, |
|
"loss": 0.2364, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0004304750869061414, |
|
"loss": 0.2478, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.000429026651216686, |
|
"loss": 0.2046, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0004275782155272306, |
|
"loss": 0.2661, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0004261297798377752, |
|
"loss": 0.2041, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 0.9551488757133484, |
|
"eval_rouge1": 0.09038461538461538, |
|
"eval_rouge2": 0.05051948051948052, |
|
"eval_rougeL": 0.09679487179487178, |
|
"eval_rougeLsum": 0.09871794871794871, |
|
"eval_runtime": 89.0139, |
|
"eval_samples_per_second": 0.225, |
|
"eval_steps_per_second": 0.225, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0004246813441483198, |
|
"loss": 0.2816, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0004232329084588644, |
|
"loss": 0.1904, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00042178447276940904, |
|
"loss": 0.21, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0004203360370799537, |
|
"loss": 0.1662, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0004188876013904983, |
|
"loss": 0.3052, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0004174391657010429, |
|
"loss": 0.1744, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0004159907300115875, |
|
"loss": 0.288, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0004145422943221321, |
|
"loss": 0.2303, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00041309385863267674, |
|
"loss": 0.2866, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00041164542294322133, |
|
"loss": 0.238, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 0.9423090219497681, |
|
"eval_rouge1": 0.1, |
|
"eval_rouge2": 0.07291666666666667, |
|
"eval_rougeL": 0.1, |
|
"eval_rougeLsum": 0.10333333333333335, |
|
"eval_runtime": 88.0456, |
|
"eval_samples_per_second": 0.227, |
|
"eval_steps_per_second": 0.227, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0004101969872537659, |
|
"loss": 0.2342, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00040874855156431057, |
|
"loss": 0.2716, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00040730011587485516, |
|
"loss": 0.2453, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00040585168018539974, |
|
"loss": 0.2313, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0004044032444959444, |
|
"loss": 0.2306, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.000402954808806489, |
|
"loss": 0.1773, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00040150637311703357, |
|
"loss": 0.1957, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0004000579374275782, |
|
"loss": 0.2758, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0003986095017381228, |
|
"loss": 0.2649, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0003971610660486675, |
|
"loss": 0.275, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 0.9273136258125305, |
|
"eval_rouge1": 0.14666666666666667, |
|
"eval_rouge2": 0.10977272727272727, |
|
"eval_rougeL": 0.15038461538461537, |
|
"eval_rougeLsum": 0.15153846153846154, |
|
"eval_runtime": 87.8017, |
|
"eval_samples_per_second": 0.228, |
|
"eval_steps_per_second": 0.228, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0003957126303592121, |
|
"loss": 0.2102, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0003942641946697567, |
|
"loss": 0.2146, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0003928157589803013, |
|
"loss": 0.1918, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0003913673232908459, |
|
"loss": 0.2512, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0003899188876013905, |
|
"loss": 0.2499, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00038847045191193515, |
|
"loss": 0.228, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00038702201622247974, |
|
"loss": 0.2507, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0003855735805330243, |
|
"loss": 0.1735, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00038412514484356897, |
|
"loss": 0.2752, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00038267670915411356, |
|
"loss": 0.2379, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_loss": 0.9023244976997375, |
|
"eval_rouge1": 0.1, |
|
"eval_rouge2": 0.08333333333333333, |
|
"eval_rougeL": 0.1, |
|
"eval_rougeLsum": 0.1, |
|
"eval_runtime": 80.7798, |
|
"eval_samples_per_second": 0.248, |
|
"eval_steps_per_second": 0.248, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00038122827346465815, |
|
"loss": 0.1993, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0003797798377752028, |
|
"loss": 0.2058, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0003783314020857474, |
|
"loss": 0.2675, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00037688296639629197, |
|
"loss": 0.1928, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0003754345307068366, |
|
"loss": 0.1903, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00037398609501738126, |
|
"loss": 0.1967, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00037253765932792585, |
|
"loss": 0.2044, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0003710892236384705, |
|
"loss": 0.2027, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0003696407879490151, |
|
"loss": 0.25, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00036819235225955967, |
|
"loss": 0.2896, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 0.9184179306030273, |
|
"eval_rouge1": 0.19, |
|
"eval_rouge2": 0.1, |
|
"eval_rougeL": 0.18893939393939393, |
|
"eval_rougeLsum": 0.19848484848484846, |
|
"eval_runtime": 81.9559, |
|
"eval_samples_per_second": 0.244, |
|
"eval_steps_per_second": 0.244, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0003667439165701043, |
|
"loss": 0.1462, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0003652954808806489, |
|
"loss": 0.1944, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0003638470451911935, |
|
"loss": 0.2296, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00036239860950173814, |
|
"loss": 0.2446, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00036095017381228273, |
|
"loss": 0.1448, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0003595017381228273, |
|
"loss": 0.1507, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00035805330243337196, |
|
"loss": 0.1636, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00035660486674391655, |
|
"loss": 0.1909, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.0003551564310544612, |
|
"loss": 0.1895, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0003537079953650058, |
|
"loss": 0.2663, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_loss": 0.9002671241760254, |
|
"eval_rouge1": 0.07948717948717948, |
|
"eval_rouge2": 0.06779220779220778, |
|
"eval_rougeL": 0.08782051282051281, |
|
"eval_rougeLsum": 0.08333333333333333, |
|
"eval_runtime": 84.9078, |
|
"eval_samples_per_second": 0.236, |
|
"eval_steps_per_second": 0.236, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0003522595596755504, |
|
"loss": 0.1672, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00035081112398609507, |
|
"loss": 0.1909, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.00034936268829663966, |
|
"loss": 0.1675, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00034791425260718425, |
|
"loss": 0.2636, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0003464658169177289, |
|
"loss": 0.2119, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.0003450173812282735, |
|
"loss": 0.2114, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.0003435689455388181, |
|
"loss": 0.1456, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0003421205098493627, |
|
"loss": 0.1993, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0003406720741599073, |
|
"loss": 0.1467, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0003392236384704519, |
|
"loss": 0.237, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 0.9139176607131958, |
|
"eval_rouge1": 0.19904761904761903, |
|
"eval_rouge2": 0.10285714285714284, |
|
"eval_rougeL": 0.19511904761904764, |
|
"eval_rougeLsum": 0.20619047619047615, |
|
"eval_runtime": 85.9903, |
|
"eval_samples_per_second": 0.233, |
|
"eval_steps_per_second": 0.233, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.00033777520278099654, |
|
"loss": 0.1996, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00033632676709154113, |
|
"loss": 0.1725, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.0003348783314020857, |
|
"loss": 0.2089, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00033342989571263036, |
|
"loss": 0.183, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00033198146002317495, |
|
"loss": 0.1631, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00033053302433371954, |
|
"loss": 0.1889, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00032908458864426424, |
|
"loss": 0.172, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00032763615295480883, |
|
"loss": 0.1236, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0003261877172653534, |
|
"loss": 0.1682, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.00032473928157589806, |
|
"loss": 0.2019, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_loss": 0.920964241027832, |
|
"eval_rouge1": 0.11282051282051282, |
|
"eval_rouge2": 0.03636363636363636, |
|
"eval_rougeL": 0.1128205128205128, |
|
"eval_rougeLsum": 0.11607142857142858, |
|
"eval_runtime": 82.9262, |
|
"eval_samples_per_second": 0.241, |
|
"eval_steps_per_second": 0.241, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00032329084588644265, |
|
"loss": 0.1681, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.00032184241019698724, |
|
"loss": 0.2372, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.0003203939745075319, |
|
"loss": 0.1343, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.0003189455388180765, |
|
"loss": 0.2125, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0003174971031286211, |
|
"loss": 0.2217, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.0003160486674391657, |
|
"loss": 0.1542, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.0003146002317497103, |
|
"loss": 0.171, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00031315179606025494, |
|
"loss": 0.1808, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.00031170336037079953, |
|
"loss": 0.1423, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0003102549246813441, |
|
"loss": 0.1794, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 0.9037507772445679, |
|
"eval_rouge1": 0.11666666666666667, |
|
"eval_rouge2": 0.08636363636363635, |
|
"eval_rougeL": 0.11833333333333333, |
|
"eval_rougeLsum": 0.12064102564102563, |
|
"eval_runtime": 84.5851, |
|
"eval_samples_per_second": 0.236, |
|
"eval_steps_per_second": 0.236, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.00030880648899188877, |
|
"loss": 0.2313, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.00030735805330243336, |
|
"loss": 0.1548, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.000305909617612978, |
|
"loss": 0.2318, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.00030446118192352264, |
|
"loss": 0.1959, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.00030301274623406723, |
|
"loss": 0.1438, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.0003015643105446118, |
|
"loss": 0.1953, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.00030011587485515647, |
|
"loss": 0.1542, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.00029866743916570106, |
|
"loss": 0.1693, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.00029721900347624565, |
|
"loss": 0.1836, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.0002957705677867903, |
|
"loss": 0.1847, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 0.8892697095870972, |
|
"eval_rouge1": 0.14335664335664336, |
|
"eval_rouge2": 0.13131313131313133, |
|
"eval_rougeL": 0.14375624375624377, |
|
"eval_rougeLsum": 0.14725274725274726, |
|
"eval_runtime": 86.985, |
|
"eval_samples_per_second": 0.23, |
|
"eval_steps_per_second": 0.23, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.0002943221320973349, |
|
"loss": 0.1245, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00029287369640787947, |
|
"loss": 0.1917, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.0002914252607184241, |
|
"loss": 0.205, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.0002899768250289687, |
|
"loss": 0.1493, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.0002885283893395133, |
|
"loss": 0.1596, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 0.00028707995365005794, |
|
"loss": 0.1689, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.0002856315179606025, |
|
"loss": 0.1371, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.0002841830822711471, |
|
"loss": 0.1676, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 0.0002827346465816918, |
|
"loss": 0.1441, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.0002812862108922364, |
|
"loss": 0.1436, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_loss": 0.8872199058532715, |
|
"eval_rouge1": 0.16825396825396824, |
|
"eval_rouge2": 0.05833333333333333, |
|
"eval_rougeL": 0.1650793650793651, |
|
"eval_rougeLsum": 0.17285714285714288, |
|
"eval_runtime": 83.5131, |
|
"eval_samples_per_second": 0.239, |
|
"eval_steps_per_second": 0.239, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.000279837775202781, |
|
"loss": 0.2173, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.00027838933951332564, |
|
"loss": 0.1457, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 0.0002769409038238702, |
|
"loss": 0.1503, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.00027549246813441487, |
|
"loss": 0.1371, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.00027404403244495946, |
|
"loss": 0.1331, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.00027259559675550405, |
|
"loss": 0.2068, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0002711471610660487, |
|
"loss": 0.2001, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.0002696987253765933, |
|
"loss": 0.177, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.00026825028968713787, |
|
"loss": 0.1772, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.0002668018539976825, |
|
"loss": 0.138, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"eval_loss": 0.8929020762443542, |
|
"eval_rouge1": 0.22999999999999998, |
|
"eval_rouge2": 0.12491883116883117, |
|
"eval_rougeL": 0.22615384615384615, |
|
"eval_rougeLsum": 0.23115384615384618, |
|
"eval_runtime": 86.6494, |
|
"eval_samples_per_second": 0.231, |
|
"eval_steps_per_second": 0.231, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.0002653534183082271, |
|
"loss": 0.164, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.0002639049826187717, |
|
"loss": 0.1249, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.00026245654692931634, |
|
"loss": 0.1356, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.000261008111239861, |
|
"loss": 0.1374, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.00025955967555040557, |
|
"loss": 0.2013, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.0002581112398609502, |
|
"loss": 0.1337, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.0002566628041714948, |
|
"loss": 0.1226, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.0002552143684820394, |
|
"loss": 0.1166, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.00025376593279258404, |
|
"loss": 0.2308, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.00025231749710312863, |
|
"loss": 0.1265, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_loss": 0.9203845858573914, |
|
"eval_rouge1": 0.17454545454545453, |
|
"eval_rouge2": 0.07291666666666667, |
|
"eval_rougeL": 0.16999999999999998, |
|
"eval_rougeLsum": 0.17727272727272728, |
|
"eval_runtime": 87.9704, |
|
"eval_samples_per_second": 0.227, |
|
"eval_steps_per_second": 0.227, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.0002508690614136732, |
|
"loss": 0.1526, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.00024942062572421786, |
|
"loss": 0.2201, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.00024797219003476245, |
|
"loss": 0.1271, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.0002465237543453071, |
|
"loss": 0.1749, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.0002450753186558517, |
|
"loss": 0.133, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.0002436268829663963, |
|
"loss": 0.2259, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.00024217844727694092, |
|
"loss": 0.1549, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.00024073001158748554, |
|
"loss": 0.1173, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.00023928157589803013, |
|
"loss": 0.1337, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.00023783314020857474, |
|
"loss": 0.1828, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_loss": 0.9094276428222656, |
|
"eval_rouge1": 0.18, |
|
"eval_rouge2": 0.14886363636363636, |
|
"eval_rougeL": 0.18, |
|
"eval_rougeLsum": 0.18615384615384614, |
|
"eval_runtime": 84.5106, |
|
"eval_samples_per_second": 0.237, |
|
"eval_steps_per_second": 0.237, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00023638470451911936, |
|
"loss": 0.1821, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.00023493626882966395, |
|
"loss": 0.1257, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.00023348783314020857, |
|
"loss": 0.172, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.0002320393974507532, |
|
"loss": 0.1833, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.0002305909617612978, |
|
"loss": 0.1334, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.00022914252607184242, |
|
"loss": 0.1736, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.00022769409038238703, |
|
"loss": 0.1163, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.00022624565469293165, |
|
"loss": 0.1844, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.00022479721900347624, |
|
"loss": 0.1358, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.00022334878331402086, |
|
"loss": 0.1447, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_loss": 0.89415442943573, |
|
"eval_rouge1": 0.19, |
|
"eval_rouge2": 0.09886363636363635, |
|
"eval_rougeL": 0.18615384615384617, |
|
"eval_rougeLsum": 0.19615384615384615, |
|
"eval_runtime": 84.0506, |
|
"eval_samples_per_second": 0.238, |
|
"eval_steps_per_second": 0.238, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.00022190034762456547, |
|
"loss": 0.1566, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.0002204519119351101, |
|
"loss": 0.1132, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.0002190034762456547, |
|
"loss": 0.1013, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.00021755504055619932, |
|
"loss": 0.1554, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.0002161066048667439, |
|
"loss": 0.1405, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.00021465816917728853, |
|
"loss": 0.152, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.00021320973348783315, |
|
"loss": 0.1591, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.00021176129779837773, |
|
"loss": 0.152, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.00021031286210892235, |
|
"loss": 0.1059, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.000208864426419467, |
|
"loss": 0.099, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_loss": 0.9297454953193665, |
|
"eval_rouge1": 0.23864468864468863, |
|
"eval_rouge2": 0.15, |
|
"eval_rougeL": 0.23516483516483513, |
|
"eval_rougeLsum": 0.24514652014652014, |
|
"eval_runtime": 85.7086, |
|
"eval_samples_per_second": 0.233, |
|
"eval_steps_per_second": 0.233, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.0002074159907300116, |
|
"loss": 0.0916, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.0002059675550405562, |
|
"loss": 0.2006, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.00020451911935110082, |
|
"loss": 0.144, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.00020307068366164544, |
|
"loss": 0.1893, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.00020162224797219002, |
|
"loss": 0.1697, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.00020017381228273464, |
|
"loss": 0.1101, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00019872537659327929, |
|
"loss": 0.1539, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.00019727694090382387, |
|
"loss": 0.1038, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.0001958285052143685, |
|
"loss": 0.1466, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.0001943800695249131, |
|
"loss": 0.1366, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_loss": 0.9124263525009155, |
|
"eval_rouge1": 0.12, |
|
"eval_rouge2": 0.07291666666666667, |
|
"eval_rougeL": 0.12, |
|
"eval_rougeLsum": 0.12454545454545454, |
|
"eval_runtime": 91.5173, |
|
"eval_samples_per_second": 0.219, |
|
"eval_steps_per_second": 0.219, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.0001929316338354577, |
|
"loss": 0.1298, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.00019148319814600231, |
|
"loss": 0.1886, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.00019003476245654693, |
|
"loss": 0.1579, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.00018858632676709152, |
|
"loss": 0.1078, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.00018713789107763616, |
|
"loss": 0.1509, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.00018568945538818078, |
|
"loss": 0.108, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.0001842410196987254, |
|
"loss": 0.1305, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.00018279258400927, |
|
"loss": 0.1257, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.0001813441483198146, |
|
"loss": 0.117, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.00017989571263035922, |
|
"loss": 0.1519, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"eval_loss": 0.9040172696113586, |
|
"eval_rouge1": 0.18727272727272726, |
|
"eval_rouge2": 0.09861111111111112, |
|
"eval_rougeL": 0.18333333333333332, |
|
"eval_rougeLsum": 0.1906060606060606, |
|
"eval_runtime": 82.9032, |
|
"eval_samples_per_second": 0.241, |
|
"eval_steps_per_second": 0.241, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.0001784472769409038, |
|
"loss": 0.1536, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.00017699884125144843, |
|
"loss": 0.1159, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.00017555040556199307, |
|
"loss": 0.1257, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.00017410196987253766, |
|
"loss": 0.1698, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.00017265353418308228, |
|
"loss": 0.1369, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.0001712050984936269, |
|
"loss": 0.0809, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.00016975666280417148, |
|
"loss": 0.1003, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.0001683082271147161, |
|
"loss": 0.1066, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.00016685979142526072, |
|
"loss": 0.1621, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.00016541135573580533, |
|
"loss": 0.119, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_loss": 0.9120545387268066, |
|
"eval_rouge1": 0.12, |
|
"eval_rouge2": 0.04583333333333333, |
|
"eval_rougeL": 0.11285714285714285, |
|
"eval_rougeLsum": 0.12285714285714286, |
|
"eval_runtime": 84.1673, |
|
"eval_samples_per_second": 0.238, |
|
"eval_steps_per_second": 0.238, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.00016396292004634995, |
|
"loss": 0.1185, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 0.00016251448435689457, |
|
"loss": 0.1657, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.00016106604866743918, |
|
"loss": 0.1233, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 0.00015961761297798377, |
|
"loss": 0.1611, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.0001581691772885284, |
|
"loss": 0.1613, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.000156720741599073, |
|
"loss": 0.1107, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.0001552723059096176, |
|
"loss": 0.1436, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.00015382387022016221, |
|
"loss": 0.129, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.00015237543453070686, |
|
"loss": 0.1586, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.00015092699884125145, |
|
"loss": 0.1364, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_loss": 0.9120429754257202, |
|
"eval_rouge1": 0.20904761904761907, |
|
"eval_rouge2": 0.12583333333333332, |
|
"eval_rougeL": 0.20666666666666664, |
|
"eval_rougeLsum": 0.21904761904761902, |
|
"eval_runtime": 82.9463, |
|
"eval_samples_per_second": 0.241, |
|
"eval_steps_per_second": 0.241, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 0.00014947856315179606, |
|
"loss": 0.1688, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 0.00014803012746234068, |
|
"loss": 0.1385, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0001465816917728853, |
|
"loss": 0.1592, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.0001451332560834299, |
|
"loss": 0.1014, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.0001436848203939745, |
|
"loss": 0.0796, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 0.00014223638470451912, |
|
"loss": 0.0981, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.00014078794901506374, |
|
"loss": 0.093, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.00013933951332560835, |
|
"loss": 0.1599, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.00013789107763615297, |
|
"loss": 0.1223, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.00013644264194669756, |
|
"loss": 0.1, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"eval_loss": 0.9408878087997437, |
|
"eval_rouge1": 0.12507936507936507, |
|
"eval_rouge2": 0.08333333333333333, |
|
"eval_rougeL": 0.12396825396825398, |
|
"eval_rougeLsum": 0.13111111111111112, |
|
"eval_runtime": 96.2103, |
|
"eval_samples_per_second": 0.208, |
|
"eval_steps_per_second": 0.208, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.00013499420625724218, |
|
"loss": 0.1284, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.0001335457705677868, |
|
"loss": 0.1523, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.00013209733487833138, |
|
"loss": 0.1051, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.00013064889918887603, |
|
"loss": 0.1216, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.00012920046349942064, |
|
"loss": 0.1219, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00012775202780996523, |
|
"loss": 0.1482, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.00012630359212050985, |
|
"loss": 0.1076, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 0.00012485515643105447, |
|
"loss": 0.121, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.00012340672074159908, |
|
"loss": 0.1448, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.0001219582850521437, |
|
"loss": 0.1683, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"eval_loss": 0.9422550201416016, |
|
"eval_rouge1": 0.13818181818181818, |
|
"eval_rouge2": 0.0951010101010101, |
|
"eval_rougeL": 0.13713286713286715, |
|
"eval_rougeLsum": 0.14174825174825176, |
|
"eval_runtime": 96.365, |
|
"eval_samples_per_second": 0.208, |
|
"eval_steps_per_second": 0.208, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.0001205098493626883, |
|
"loss": 0.103, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.0001190614136732329, |
|
"loss": 0.1434, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.00011761297798377752, |
|
"loss": 0.1419, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.00011616454229432214, |
|
"loss": 0.1145, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.00011471610660486674, |
|
"loss": 0.1302, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.00011326767091541136, |
|
"loss": 0.0718, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.00011181923522595596, |
|
"loss": 0.1166, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 0.0001103707995365006, |
|
"loss": 0.1265, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0001089223638470452, |
|
"loss": 0.0972, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.0001074739281575898, |
|
"loss": 0.1395, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"eval_loss": 0.9336325526237488, |
|
"eval_rouge1": 0.16115384615384615, |
|
"eval_rouge2": 0.12329545454545454, |
|
"eval_rougeL": 0.15999999999999998, |
|
"eval_rougeLsum": 0.16307692307692306, |
|
"eval_runtime": 93.5346, |
|
"eval_samples_per_second": 0.214, |
|
"eval_steps_per_second": 0.214, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.00010602549246813442, |
|
"loss": 0.0808, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.00010457705677867903, |
|
"loss": 0.1205, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.00010312862108922364, |
|
"loss": 0.119, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 0.00010168018539976825, |
|
"loss": 0.1357, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.00010023174971031286, |
|
"loss": 0.1144, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 9.878331402085749e-05, |
|
"loss": 0.138, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 9.733487833140209e-05, |
|
"loss": 0.0998, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 9.588644264194669e-05, |
|
"loss": 0.1437, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 9.443800695249131e-05, |
|
"loss": 0.1053, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 9.298957126303593e-05, |
|
"loss": 0.1067, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 0.9290033578872681, |
|
"eval_rouge1": 0.2234265734265734, |
|
"eval_rouge2": 0.13156565656565655, |
|
"eval_rougeL": 0.21744755244755246, |
|
"eval_rougeLsum": 0.2169230769230769, |
|
"eval_runtime": 91.8958, |
|
"eval_samples_per_second": 0.218, |
|
"eval_steps_per_second": 0.218, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 9.154113557358054e-05, |
|
"loss": 0.1225, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 9.009269988412515e-05, |
|
"loss": 0.0867, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 8.864426419466975e-05, |
|
"loss": 0.1325, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 8.719582850521438e-05, |
|
"loss": 0.118, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 8.574739281575898e-05, |
|
"loss": 0.112, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 8.429895712630359e-05, |
|
"loss": 0.1326, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 8.28505214368482e-05, |
|
"loss": 0.1506, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 8.140208574739282e-05, |
|
"loss": 0.1499, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 7.995365005793744e-05, |
|
"loss": 0.1092, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 7.850521436848204e-05, |
|
"loss": 0.1104, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_loss": 0.9244877696037292, |
|
"eval_rouge1": 0.2, |
|
"eval_rouge2": 0.1, |
|
"eval_rougeL": 0.19153846153846155, |
|
"eval_rougeLsum": 0.19153846153846155, |
|
"eval_runtime": 93.7022, |
|
"eval_samples_per_second": 0.213, |
|
"eval_steps_per_second": 0.213, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 7.705677867902664e-05, |
|
"loss": 0.0824, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 7.560834298957127e-05, |
|
"loss": 0.1048, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 7.415990730011588e-05, |
|
"loss": 0.1295, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 7.271147161066048e-05, |
|
"loss": 0.1296, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 7.12630359212051e-05, |
|
"loss": 0.1146, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 6.981460023174971e-05, |
|
"loss": 0.1158, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 6.836616454229433e-05, |
|
"loss": 0.094, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 6.691772885283893e-05, |
|
"loss": 0.1141, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 6.546929316338354e-05, |
|
"loss": 0.1025, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 6.402085747392817e-05, |
|
"loss": 0.1474, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 0.9422538876533508, |
|
"eval_rouge1": 0.20069541569541566, |
|
"eval_rouge2": 0.10303030303030303, |
|
"eval_rougeL": 0.19625097125097124, |
|
"eval_rougeLsum": 0.19848096348096347, |
|
"eval_runtime": 94.7246, |
|
"eval_samples_per_second": 0.211, |
|
"eval_steps_per_second": 0.211, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 6.257242178447277e-05, |
|
"loss": 0.09, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 6.112398609501739e-05, |
|
"loss": 0.1235, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 5.9675550405561996e-05, |
|
"loss": 0.0733, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 5.822711471610661e-05, |
|
"loss": 0.1035, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 5.6778679026651216e-05, |
|
"loss": 0.1027, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 5.533024333719583e-05, |
|
"loss": 0.0863, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 5.388180764774044e-05, |
|
"loss": 0.095, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 5.243337195828506e-05, |
|
"loss": 0.1103, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 5.098493626882966e-05, |
|
"loss": 0.1325, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 4.953650057937428e-05, |
|
"loss": 0.1052, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"eval_loss": 0.9328528642654419, |
|
"eval_rouge1": 0.2023076923076923, |
|
"eval_rouge2": 0.1102272727272727, |
|
"eval_rougeL": 0.19999999999999998, |
|
"eval_rougeLsum": 0.2, |
|
"eval_runtime": 92.8302, |
|
"eval_samples_per_second": 0.215, |
|
"eval_steps_per_second": 0.215, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 4.808806488991889e-05, |
|
"loss": 0.1293, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 4.6639629200463506e-05, |
|
"loss": 0.1301, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 4.519119351100811e-05, |
|
"loss": 0.0873, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 4.3742757821552725e-05, |
|
"loss": 0.1017, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 4.2294322132097335e-05, |
|
"loss": 0.0846, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 4.084588644264195e-05, |
|
"loss": 0.0898, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 3.9397450753186555e-05, |
|
"loss": 0.1494, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 3.794901506373117e-05, |
|
"loss": 0.0742, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 3.650057937427578e-05, |
|
"loss": 0.0793, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 3.50521436848204e-05, |
|
"loss": 0.1203, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_loss": 0.9380243420600891, |
|
"eval_rouge1": 0.2023076923076923, |
|
"eval_rouge2": 0.1102272727272727, |
|
"eval_rougeL": 0.19999999999999998, |
|
"eval_rougeLsum": 0.2, |
|
"eval_runtime": 94.5066, |
|
"eval_samples_per_second": 0.212, |
|
"eval_steps_per_second": 0.212, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 3.360370799536501e-05, |
|
"loss": 0.1257, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 3.215527230590962e-05, |
|
"loss": 0.1177, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 3.070683661645423e-05, |
|
"loss": 0.1359, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 2.9258400926998842e-05, |
|
"loss": 0.1303, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 2.7809965237543452e-05, |
|
"loss": 0.0968, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 2.6361529548088065e-05, |
|
"loss": 0.1061, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 2.4913093858632675e-05, |
|
"loss": 0.1307, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 2.346465816917729e-05, |
|
"loss": 0.0981, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 2.20162224797219e-05, |
|
"loss": 0.0901, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 2.0567786790266515e-05, |
|
"loss": 0.1125, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_loss": 0.9421626925468445, |
|
"eval_rouge1": 0.18958041958041955, |
|
"eval_rouge2": 0.0977272727272727, |
|
"eval_rougeL": 0.18615384615384614, |
|
"eval_rougeLsum": 0.19, |
|
"eval_runtime": 96.4239, |
|
"eval_samples_per_second": 0.207, |
|
"eval_steps_per_second": 0.207, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 1.9119351100811125e-05, |
|
"loss": 0.0823, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 1.767091541135574e-05, |
|
"loss": 0.1016, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.6222479721900348e-05, |
|
"loss": 0.1172, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1.477404403244496e-05, |
|
"loss": 0.0959, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 1.3325608342989572e-05, |
|
"loss": 0.1534, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 1.1877172653534183e-05, |
|
"loss": 0.125, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 1.0428736964078795e-05, |
|
"loss": 0.1221, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 8.980301274623406e-06, |
|
"loss": 0.1391, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 7.531865585168019e-06, |
|
"loss": 0.0986, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 6.083429895712631e-06, |
|
"loss": 0.1323, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"eval_loss": 0.9433181881904602, |
|
"eval_rouge1": 0.19, |
|
"eval_rouge2": 0.0977272727272727, |
|
"eval_rougeL": 0.18615384615384617, |
|
"eval_rougeLsum": 0.19, |
|
"eval_runtime": 94.6833, |
|
"eval_samples_per_second": 0.211, |
|
"eval_steps_per_second": 0.211, |
|
"step": 3500 |
|
} |
|
], |
|
"max_steps": 3542, |
|
"num_train_epochs": 7, |
|
"total_flos": 8.516307584906035e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|