Vignesh-M's picture
Upload 12 files
8d56454 verified
raw
history blame contribute delete
No virus
82.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.879792873874985,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 5.555555555555555e-05,
"loss": 13.7427,
"step": 10
},
{
"epoch": 0.04,
"learning_rate": 0.0001111111111111111,
"loss": 8.8821,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 0.00016666666666666666,
"loss": 2.5857,
"step": 30
},
{
"epoch": 0.08,
"learning_rate": 0.0002222222222222222,
"loss": 1.3518,
"step": 40
},
{
"epoch": 0.1,
"learning_rate": 0.0002777777777777778,
"loss": 1.0054,
"step": 50
},
{
"epoch": 0.12,
"learning_rate": 0.0003333333333333333,
"loss": 0.864,
"step": 60
},
{
"epoch": 0.14,
"learning_rate": 0.0003888888888888889,
"loss": 0.8669,
"step": 70
},
{
"epoch": 0.16,
"learning_rate": 0.0004444444444444444,
"loss": 0.9395,
"step": 80
},
{
"epoch": 0.18,
"learning_rate": 0.0005,
"loss": 1.0424,
"step": 90
},
{
"epoch": 0.2,
"learning_rate": 0.0004985515643105447,
"loss": 0.6298,
"step": 100
},
{
"epoch": 0.2,
"eval_loss": 1.61441171169281,
"eval_rouge1": 0.07727272727272727,
"eval_rouge2": 0.061111111111111116,
"eval_rougeL": 0.06893939393939394,
"eval_rougeLsum": 0.08333333333333333,
"eval_runtime": 91.9317,
"eval_samples_per_second": 0.218,
"eval_steps_per_second": 0.218,
"step": 100
},
{
"epoch": 0.22,
"learning_rate": 0.0004971031286210893,
"loss": 0.5699,
"step": 110
},
{
"epoch": 0.24,
"learning_rate": 0.0004956546929316338,
"loss": 0.6074,
"step": 120
},
{
"epoch": 0.26,
"learning_rate": 0.0004942062572421785,
"loss": 0.6291,
"step": 130
},
{
"epoch": 0.28,
"learning_rate": 0.0004927578215527231,
"loss": 0.5098,
"step": 140
},
{
"epoch": 0.3,
"learning_rate": 0.0004913093858632677,
"loss": 0.4448,
"step": 150
},
{
"epoch": 0.32,
"learning_rate": 0.0004898609501738123,
"loss": 0.4508,
"step": 160
},
{
"epoch": 0.34,
"learning_rate": 0.0004884125144843569,
"loss": 0.4255,
"step": 170
},
{
"epoch": 0.36,
"learning_rate": 0.00048696407879490153,
"loss": 0.3705,
"step": 180
},
{
"epoch": 0.37,
"learning_rate": 0.0004855156431054461,
"loss": 0.4009,
"step": 190
},
{
"epoch": 0.39,
"learning_rate": 0.00048406720741599077,
"loss": 0.4317,
"step": 200
},
{
"epoch": 0.39,
"eval_loss": 1.176011323928833,
"eval_rouge1": 0.16153846153846152,
"eval_rouge2": 0.06988636363636364,
"eval_rougeL": 0.15999999999999998,
"eval_rougeLsum": 0.15999999999999998,
"eval_runtime": 88.2692,
"eval_samples_per_second": 0.227,
"eval_steps_per_second": 0.227,
"step": 200
},
{
"epoch": 0.41,
"learning_rate": 0.00048261877172653536,
"loss": 0.3679,
"step": 210
},
{
"epoch": 0.43,
"learning_rate": 0.00048117033603707995,
"loss": 0.3252,
"step": 220
},
{
"epoch": 0.45,
"learning_rate": 0.0004797219003476246,
"loss": 0.3707,
"step": 230
},
{
"epoch": 0.47,
"learning_rate": 0.0004782734646581692,
"loss": 0.3543,
"step": 240
},
{
"epoch": 0.49,
"learning_rate": 0.0004768250289687138,
"loss": 0.3434,
"step": 250
},
{
"epoch": 0.51,
"learning_rate": 0.0004753765932792584,
"loss": 0.4007,
"step": 260
},
{
"epoch": 0.53,
"learning_rate": 0.000473928157589803,
"loss": 0.3552,
"step": 270
},
{
"epoch": 0.55,
"learning_rate": 0.00047247972190034765,
"loss": 0.388,
"step": 280
},
{
"epoch": 0.57,
"learning_rate": 0.00047103128621089224,
"loss": 0.307,
"step": 290
},
{
"epoch": 0.59,
"learning_rate": 0.0004695828505214368,
"loss": 0.2588,
"step": 300
},
{
"epoch": 0.59,
"eval_loss": 1.0633952617645264,
"eval_rouge1": 0.08,
"eval_rouge2": 0.05555555555555556,
"eval_rougeL": 0.08414141414141416,
"eval_rougeLsum": 0.08383838383838385,
"eval_runtime": 88.5138,
"eval_samples_per_second": 0.226,
"eval_steps_per_second": 0.226,
"step": 300
},
{
"epoch": 0.61,
"learning_rate": 0.00046813441483198147,
"loss": 0.2965,
"step": 310
},
{
"epoch": 0.63,
"learning_rate": 0.00046668597914252606,
"loss": 0.3096,
"step": 320
},
{
"epoch": 0.65,
"learning_rate": 0.0004652375434530707,
"loss": 0.301,
"step": 330
},
{
"epoch": 0.67,
"learning_rate": 0.00046378910776361535,
"loss": 0.3153,
"step": 340
},
{
"epoch": 0.69,
"learning_rate": 0.00046234067207415994,
"loss": 0.3549,
"step": 350
},
{
"epoch": 0.71,
"learning_rate": 0.0004608922363847045,
"loss": 0.356,
"step": 360
},
{
"epoch": 0.73,
"learning_rate": 0.00045944380069524917,
"loss": 0.3278,
"step": 370
},
{
"epoch": 0.75,
"learning_rate": 0.00045799536500579376,
"loss": 0.2985,
"step": 380
},
{
"epoch": 0.77,
"learning_rate": 0.00045654692931633835,
"loss": 0.3152,
"step": 390
},
{
"epoch": 0.79,
"learning_rate": 0.000455098493626883,
"loss": 0.2665,
"step": 400
},
{
"epoch": 0.79,
"eval_loss": 1.044259786605835,
"eval_rouge1": 0.06307692307692307,
"eval_rouge2": 0.021590909090909088,
"eval_rougeL": 0.06307692307692307,
"eval_rougeLsum": 0.06307692307692307,
"eval_runtime": 80.5878,
"eval_samples_per_second": 0.248,
"eval_steps_per_second": 0.248,
"step": 400
},
{
"epoch": 0.81,
"learning_rate": 0.0004536500579374276,
"loss": 0.2323,
"step": 410
},
{
"epoch": 0.83,
"learning_rate": 0.00045220162224797217,
"loss": 0.2222,
"step": 420
},
{
"epoch": 0.85,
"learning_rate": 0.0004507531865585168,
"loss": 0.2516,
"step": 430
},
{
"epoch": 0.87,
"learning_rate": 0.0004493047508690614,
"loss": 0.2851,
"step": 440
},
{
"epoch": 0.89,
"learning_rate": 0.000447856315179606,
"loss": 0.2677,
"step": 450
},
{
"epoch": 0.91,
"learning_rate": 0.00044640787949015064,
"loss": 0.2447,
"step": 460
},
{
"epoch": 0.93,
"learning_rate": 0.00044495944380069523,
"loss": 0.3186,
"step": 470
},
{
"epoch": 0.95,
"learning_rate": 0.0004435110081112398,
"loss": 0.3035,
"step": 480
},
{
"epoch": 0.97,
"learning_rate": 0.0004420625724217845,
"loss": 0.3036,
"step": 490
},
{
"epoch": 0.99,
"learning_rate": 0.0004406141367323291,
"loss": 0.1972,
"step": 500
},
{
"epoch": 0.99,
"eval_loss": 1.0465357303619385,
"eval_rouge1": 0.1908791208791209,
"eval_rouge2": 0.10681818181818181,
"eval_rougeL": 0.17934065934065935,
"eval_rougeLsum": 0.19159340659340657,
"eval_runtime": 84.3482,
"eval_samples_per_second": 0.237,
"eval_steps_per_second": 0.237,
"step": 500
},
{
"epoch": 1.01,
"learning_rate": 0.00043916570104287375,
"loss": 0.279,
"step": 510
},
{
"epoch": 1.03,
"learning_rate": 0.00043771726535341834,
"loss": 0.272,
"step": 520
},
{
"epoch": 1.05,
"learning_rate": 0.00043626882966396293,
"loss": 0.2272,
"step": 530
},
{
"epoch": 1.07,
"learning_rate": 0.0004348203939745076,
"loss": 0.2495,
"step": 540
},
{
"epoch": 1.09,
"learning_rate": 0.00043337195828505216,
"loss": 0.1965,
"step": 550
},
{
"epoch": 1.11,
"learning_rate": 0.00043192352259559675,
"loss": 0.2364,
"step": 560
},
{
"epoch": 1.13,
"learning_rate": 0.0004304750869061414,
"loss": 0.2478,
"step": 570
},
{
"epoch": 1.15,
"learning_rate": 0.000429026651216686,
"loss": 0.2046,
"step": 580
},
{
"epoch": 1.17,
"learning_rate": 0.0004275782155272306,
"loss": 0.2661,
"step": 590
},
{
"epoch": 1.19,
"learning_rate": 0.0004261297798377752,
"loss": 0.2041,
"step": 600
},
{
"epoch": 1.19,
"eval_loss": 0.9551488757133484,
"eval_rouge1": 0.09038461538461538,
"eval_rouge2": 0.05051948051948052,
"eval_rougeL": 0.09679487179487178,
"eval_rougeLsum": 0.09871794871794871,
"eval_runtime": 89.0139,
"eval_samples_per_second": 0.225,
"eval_steps_per_second": 0.225,
"step": 600
},
{
"epoch": 1.21,
"learning_rate": 0.0004246813441483198,
"loss": 0.2816,
"step": 610
},
{
"epoch": 1.22,
"learning_rate": 0.0004232329084588644,
"loss": 0.1904,
"step": 620
},
{
"epoch": 1.24,
"learning_rate": 0.00042178447276940904,
"loss": 0.21,
"step": 630
},
{
"epoch": 1.26,
"learning_rate": 0.0004203360370799537,
"loss": 0.1662,
"step": 640
},
{
"epoch": 1.28,
"learning_rate": 0.0004188876013904983,
"loss": 0.3052,
"step": 650
},
{
"epoch": 1.3,
"learning_rate": 0.0004174391657010429,
"loss": 0.1744,
"step": 660
},
{
"epoch": 1.32,
"learning_rate": 0.0004159907300115875,
"loss": 0.288,
"step": 670
},
{
"epoch": 1.34,
"learning_rate": 0.0004145422943221321,
"loss": 0.2303,
"step": 680
},
{
"epoch": 1.36,
"learning_rate": 0.00041309385863267674,
"loss": 0.2866,
"step": 690
},
{
"epoch": 1.38,
"learning_rate": 0.00041164542294322133,
"loss": 0.238,
"step": 700
},
{
"epoch": 1.38,
"eval_loss": 0.9423090219497681,
"eval_rouge1": 0.1,
"eval_rouge2": 0.07291666666666667,
"eval_rougeL": 0.1,
"eval_rougeLsum": 0.10333333333333335,
"eval_runtime": 88.0456,
"eval_samples_per_second": 0.227,
"eval_steps_per_second": 0.227,
"step": 700
},
{
"epoch": 1.4,
"learning_rate": 0.0004101969872537659,
"loss": 0.2342,
"step": 710
},
{
"epoch": 1.42,
"learning_rate": 0.00040874855156431057,
"loss": 0.2716,
"step": 720
},
{
"epoch": 1.44,
"learning_rate": 0.00040730011587485516,
"loss": 0.2453,
"step": 730
},
{
"epoch": 1.46,
"learning_rate": 0.00040585168018539974,
"loss": 0.2313,
"step": 740
},
{
"epoch": 1.48,
"learning_rate": 0.0004044032444959444,
"loss": 0.2306,
"step": 750
},
{
"epoch": 1.5,
"learning_rate": 0.000402954808806489,
"loss": 0.1773,
"step": 760
},
{
"epoch": 1.52,
"learning_rate": 0.00040150637311703357,
"loss": 0.1957,
"step": 770
},
{
"epoch": 1.54,
"learning_rate": 0.0004000579374275782,
"loss": 0.2758,
"step": 780
},
{
"epoch": 1.56,
"learning_rate": 0.0003986095017381228,
"loss": 0.2649,
"step": 790
},
{
"epoch": 1.58,
"learning_rate": 0.0003971610660486675,
"loss": 0.275,
"step": 800
},
{
"epoch": 1.58,
"eval_loss": 0.9273136258125305,
"eval_rouge1": 0.14666666666666667,
"eval_rouge2": 0.10977272727272727,
"eval_rougeL": 0.15038461538461537,
"eval_rougeLsum": 0.15153846153846154,
"eval_runtime": 87.8017,
"eval_samples_per_second": 0.228,
"eval_steps_per_second": 0.228,
"step": 800
},
{
"epoch": 1.6,
"learning_rate": 0.0003957126303592121,
"loss": 0.2102,
"step": 810
},
{
"epoch": 1.62,
"learning_rate": 0.0003942641946697567,
"loss": 0.2146,
"step": 820
},
{
"epoch": 1.64,
"learning_rate": 0.0003928157589803013,
"loss": 0.1918,
"step": 830
},
{
"epoch": 1.66,
"learning_rate": 0.0003913673232908459,
"loss": 0.2512,
"step": 840
},
{
"epoch": 1.68,
"learning_rate": 0.0003899188876013905,
"loss": 0.2499,
"step": 850
},
{
"epoch": 1.7,
"learning_rate": 0.00038847045191193515,
"loss": 0.228,
"step": 860
},
{
"epoch": 1.72,
"learning_rate": 0.00038702201622247974,
"loss": 0.2507,
"step": 870
},
{
"epoch": 1.74,
"learning_rate": 0.0003855735805330243,
"loss": 0.1735,
"step": 880
},
{
"epoch": 1.76,
"learning_rate": 0.00038412514484356897,
"loss": 0.2752,
"step": 890
},
{
"epoch": 1.78,
"learning_rate": 0.00038267670915411356,
"loss": 0.2379,
"step": 900
},
{
"epoch": 1.78,
"eval_loss": 0.9023244976997375,
"eval_rouge1": 0.1,
"eval_rouge2": 0.08333333333333333,
"eval_rougeL": 0.1,
"eval_rougeLsum": 0.1,
"eval_runtime": 80.7798,
"eval_samples_per_second": 0.248,
"eval_steps_per_second": 0.248,
"step": 900
},
{
"epoch": 1.8,
"learning_rate": 0.00038122827346465815,
"loss": 0.1993,
"step": 910
},
{
"epoch": 1.82,
"learning_rate": 0.0003797798377752028,
"loss": 0.2058,
"step": 920
},
{
"epoch": 1.84,
"learning_rate": 0.0003783314020857474,
"loss": 0.2675,
"step": 930
},
{
"epoch": 1.86,
"learning_rate": 0.00037688296639629197,
"loss": 0.1928,
"step": 940
},
{
"epoch": 1.88,
"learning_rate": 0.0003754345307068366,
"loss": 0.1903,
"step": 950
},
{
"epoch": 1.9,
"learning_rate": 0.00037398609501738126,
"loss": 0.1967,
"step": 960
},
{
"epoch": 1.92,
"learning_rate": 0.00037253765932792585,
"loss": 0.2044,
"step": 970
},
{
"epoch": 1.94,
"learning_rate": 0.0003710892236384705,
"loss": 0.2027,
"step": 980
},
{
"epoch": 1.95,
"learning_rate": 0.0003696407879490151,
"loss": 0.25,
"step": 990
},
{
"epoch": 1.97,
"learning_rate": 0.00036819235225955967,
"loss": 0.2896,
"step": 1000
},
{
"epoch": 1.97,
"eval_loss": 0.9184179306030273,
"eval_rouge1": 0.19,
"eval_rouge2": 0.1,
"eval_rougeL": 0.18893939393939393,
"eval_rougeLsum": 0.19848484848484846,
"eval_runtime": 81.9559,
"eval_samples_per_second": 0.244,
"eval_steps_per_second": 0.244,
"step": 1000
},
{
"epoch": 1.99,
"learning_rate": 0.0003667439165701043,
"loss": 0.1462,
"step": 1010
},
{
"epoch": 2.02,
"learning_rate": 0.0003652954808806489,
"loss": 0.1944,
"step": 1020
},
{
"epoch": 2.04,
"learning_rate": 0.0003638470451911935,
"loss": 0.2296,
"step": 1030
},
{
"epoch": 2.06,
"learning_rate": 0.00036239860950173814,
"loss": 0.2446,
"step": 1040
},
{
"epoch": 2.07,
"learning_rate": 0.00036095017381228273,
"loss": 0.1448,
"step": 1050
},
{
"epoch": 2.09,
"learning_rate": 0.0003595017381228273,
"loss": 0.1507,
"step": 1060
},
{
"epoch": 2.11,
"learning_rate": 0.00035805330243337196,
"loss": 0.1636,
"step": 1070
},
{
"epoch": 2.13,
"learning_rate": 0.00035660486674391655,
"loss": 0.1909,
"step": 1080
},
{
"epoch": 2.15,
"learning_rate": 0.0003551564310544612,
"loss": 0.1895,
"step": 1090
},
{
"epoch": 2.17,
"learning_rate": 0.0003537079953650058,
"loss": 0.2663,
"step": 1100
},
{
"epoch": 2.17,
"eval_loss": 0.9002671241760254,
"eval_rouge1": 0.07948717948717948,
"eval_rouge2": 0.06779220779220778,
"eval_rougeL": 0.08782051282051281,
"eval_rougeLsum": 0.08333333333333333,
"eval_runtime": 84.9078,
"eval_samples_per_second": 0.236,
"eval_steps_per_second": 0.236,
"step": 1100
},
{
"epoch": 2.19,
"learning_rate": 0.0003522595596755504,
"loss": 0.1672,
"step": 1110
},
{
"epoch": 2.21,
"learning_rate": 0.00035081112398609507,
"loss": 0.1909,
"step": 1120
},
{
"epoch": 2.23,
"learning_rate": 0.00034936268829663966,
"loss": 0.1675,
"step": 1130
},
{
"epoch": 2.25,
"learning_rate": 0.00034791425260718425,
"loss": 0.2636,
"step": 1140
},
{
"epoch": 2.27,
"learning_rate": 0.0003464658169177289,
"loss": 0.2119,
"step": 1150
},
{
"epoch": 2.29,
"learning_rate": 0.0003450173812282735,
"loss": 0.2114,
"step": 1160
},
{
"epoch": 2.31,
"learning_rate": 0.0003435689455388181,
"loss": 0.1456,
"step": 1170
},
{
"epoch": 2.33,
"learning_rate": 0.0003421205098493627,
"loss": 0.1993,
"step": 1180
},
{
"epoch": 2.35,
"learning_rate": 0.0003406720741599073,
"loss": 0.1467,
"step": 1190
},
{
"epoch": 2.37,
"learning_rate": 0.0003392236384704519,
"loss": 0.237,
"step": 1200
},
{
"epoch": 2.37,
"eval_loss": 0.9139176607131958,
"eval_rouge1": 0.19904761904761903,
"eval_rouge2": 0.10285714285714284,
"eval_rougeL": 0.19511904761904764,
"eval_rougeLsum": 0.20619047619047615,
"eval_runtime": 85.9903,
"eval_samples_per_second": 0.233,
"eval_steps_per_second": 0.233,
"step": 1200
},
{
"epoch": 2.39,
"learning_rate": 0.00033777520278099654,
"loss": 0.1996,
"step": 1210
},
{
"epoch": 2.41,
"learning_rate": 0.00033632676709154113,
"loss": 0.1725,
"step": 1220
},
{
"epoch": 2.43,
"learning_rate": 0.0003348783314020857,
"loss": 0.2089,
"step": 1230
},
{
"epoch": 2.45,
"learning_rate": 0.00033342989571263036,
"loss": 0.183,
"step": 1240
},
{
"epoch": 2.47,
"learning_rate": 0.00033198146002317495,
"loss": 0.1631,
"step": 1250
},
{
"epoch": 2.49,
"learning_rate": 0.00033053302433371954,
"loss": 0.1889,
"step": 1260
},
{
"epoch": 2.51,
"learning_rate": 0.00032908458864426424,
"loss": 0.172,
"step": 1270
},
{
"epoch": 2.53,
"learning_rate": 0.00032763615295480883,
"loss": 0.1236,
"step": 1280
},
{
"epoch": 2.55,
"learning_rate": 0.0003261877172653534,
"loss": 0.1682,
"step": 1290
},
{
"epoch": 2.57,
"learning_rate": 0.00032473928157589806,
"loss": 0.2019,
"step": 1300
},
{
"epoch": 2.57,
"eval_loss": 0.920964241027832,
"eval_rouge1": 0.11282051282051282,
"eval_rouge2": 0.03636363636363636,
"eval_rougeL": 0.1128205128205128,
"eval_rougeLsum": 0.11607142857142858,
"eval_runtime": 82.9262,
"eval_samples_per_second": 0.241,
"eval_steps_per_second": 0.241,
"step": 1300
},
{
"epoch": 2.59,
"learning_rate": 0.00032329084588644265,
"loss": 0.1681,
"step": 1310
},
{
"epoch": 2.61,
"learning_rate": 0.00032184241019698724,
"loss": 0.2372,
"step": 1320
},
{
"epoch": 2.63,
"learning_rate": 0.0003203939745075319,
"loss": 0.1343,
"step": 1330
},
{
"epoch": 2.65,
"learning_rate": 0.0003189455388180765,
"loss": 0.2125,
"step": 1340
},
{
"epoch": 2.67,
"learning_rate": 0.0003174971031286211,
"loss": 0.2217,
"step": 1350
},
{
"epoch": 2.69,
"learning_rate": 0.0003160486674391657,
"loss": 0.1542,
"step": 1360
},
{
"epoch": 2.71,
"learning_rate": 0.0003146002317497103,
"loss": 0.171,
"step": 1370
},
{
"epoch": 2.73,
"learning_rate": 0.00031315179606025494,
"loss": 0.1808,
"step": 1380
},
{
"epoch": 2.75,
"learning_rate": 0.00031170336037079953,
"loss": 0.1423,
"step": 1390
},
{
"epoch": 2.77,
"learning_rate": 0.0003102549246813441,
"loss": 0.1794,
"step": 1400
},
{
"epoch": 2.77,
"eval_loss": 0.9037507772445679,
"eval_rouge1": 0.11666666666666667,
"eval_rouge2": 0.08636363636363635,
"eval_rougeL": 0.11833333333333333,
"eval_rougeLsum": 0.12064102564102563,
"eval_runtime": 84.5851,
"eval_samples_per_second": 0.236,
"eval_steps_per_second": 0.236,
"step": 1400
},
{
"epoch": 2.79,
"learning_rate": 0.00030880648899188877,
"loss": 0.2313,
"step": 1410
},
{
"epoch": 2.8,
"learning_rate": 0.00030735805330243336,
"loss": 0.1548,
"step": 1420
},
{
"epoch": 2.82,
"learning_rate": 0.000305909617612978,
"loss": 0.2318,
"step": 1430
},
{
"epoch": 2.84,
"learning_rate": 0.00030446118192352264,
"loss": 0.1959,
"step": 1440
},
{
"epoch": 2.86,
"learning_rate": 0.00030301274623406723,
"loss": 0.1438,
"step": 1450
},
{
"epoch": 2.88,
"learning_rate": 0.0003015643105446118,
"loss": 0.1953,
"step": 1460
},
{
"epoch": 2.9,
"learning_rate": 0.00030011587485515647,
"loss": 0.1542,
"step": 1470
},
{
"epoch": 2.92,
"learning_rate": 0.00029866743916570106,
"loss": 0.1693,
"step": 1480
},
{
"epoch": 2.94,
"learning_rate": 0.00029721900347624565,
"loss": 0.1836,
"step": 1490
},
{
"epoch": 2.96,
"learning_rate": 0.0002957705677867903,
"loss": 0.1847,
"step": 1500
},
{
"epoch": 2.96,
"eval_loss": 0.8892697095870972,
"eval_rouge1": 0.14335664335664336,
"eval_rouge2": 0.13131313131313133,
"eval_rougeL": 0.14375624375624377,
"eval_rougeLsum": 0.14725274725274726,
"eval_runtime": 86.985,
"eval_samples_per_second": 0.23,
"eval_steps_per_second": 0.23,
"step": 1500
},
{
"epoch": 2.98,
"learning_rate": 0.0002943221320973349,
"loss": 0.1245,
"step": 1510
},
{
"epoch": 3.0,
"learning_rate": 0.00029287369640787947,
"loss": 0.1917,
"step": 1520
},
{
"epoch": 3.02,
"learning_rate": 0.0002914252607184241,
"loss": 0.205,
"step": 1530
},
{
"epoch": 3.04,
"learning_rate": 0.0002899768250289687,
"loss": 0.1493,
"step": 1540
},
{
"epoch": 3.06,
"learning_rate": 0.0002885283893395133,
"loss": 0.1596,
"step": 1550
},
{
"epoch": 3.08,
"learning_rate": 0.00028707995365005794,
"loss": 0.1689,
"step": 1560
},
{
"epoch": 3.1,
"learning_rate": 0.0002856315179606025,
"loss": 0.1371,
"step": 1570
},
{
"epoch": 3.12,
"learning_rate": 0.0002841830822711471,
"loss": 0.1676,
"step": 1580
},
{
"epoch": 3.14,
"learning_rate": 0.0002827346465816918,
"loss": 0.1441,
"step": 1590
},
{
"epoch": 3.16,
"learning_rate": 0.0002812862108922364,
"loss": 0.1436,
"step": 1600
},
{
"epoch": 3.16,
"eval_loss": 0.8872199058532715,
"eval_rouge1": 0.16825396825396824,
"eval_rouge2": 0.05833333333333333,
"eval_rougeL": 0.1650793650793651,
"eval_rougeLsum": 0.17285714285714288,
"eval_runtime": 83.5131,
"eval_samples_per_second": 0.239,
"eval_steps_per_second": 0.239,
"step": 1600
},
{
"epoch": 3.18,
"learning_rate": 0.000279837775202781,
"loss": 0.2173,
"step": 1610
},
{
"epoch": 3.2,
"learning_rate": 0.00027838933951332564,
"loss": 0.1457,
"step": 1620
},
{
"epoch": 3.22,
"learning_rate": 0.0002769409038238702,
"loss": 0.1503,
"step": 1630
},
{
"epoch": 3.24,
"learning_rate": 0.00027549246813441487,
"loss": 0.1371,
"step": 1640
},
{
"epoch": 3.26,
"learning_rate": 0.00027404403244495946,
"loss": 0.1331,
"step": 1650
},
{
"epoch": 3.28,
"learning_rate": 0.00027259559675550405,
"loss": 0.2068,
"step": 1660
},
{
"epoch": 3.3,
"learning_rate": 0.0002711471610660487,
"loss": 0.2001,
"step": 1670
},
{
"epoch": 3.32,
"learning_rate": 0.0002696987253765933,
"loss": 0.177,
"step": 1680
},
{
"epoch": 3.34,
"learning_rate": 0.00026825028968713787,
"loss": 0.1772,
"step": 1690
},
{
"epoch": 3.36,
"learning_rate": 0.0002668018539976825,
"loss": 0.138,
"step": 1700
},
{
"epoch": 3.36,
"eval_loss": 0.8929020762443542,
"eval_rouge1": 0.22999999999999998,
"eval_rouge2": 0.12491883116883117,
"eval_rougeL": 0.22615384615384615,
"eval_rougeLsum": 0.23115384615384618,
"eval_runtime": 86.6494,
"eval_samples_per_second": 0.231,
"eval_steps_per_second": 0.231,
"step": 1700
},
{
"epoch": 3.38,
"learning_rate": 0.0002653534183082271,
"loss": 0.164,
"step": 1710
},
{
"epoch": 3.4,
"learning_rate": 0.0002639049826187717,
"loss": 0.1249,
"step": 1720
},
{
"epoch": 3.42,
"learning_rate": 0.00026245654692931634,
"loss": 0.1356,
"step": 1730
},
{
"epoch": 3.44,
"learning_rate": 0.000261008111239861,
"loss": 0.1374,
"step": 1740
},
{
"epoch": 3.46,
"learning_rate": 0.00025955967555040557,
"loss": 0.2013,
"step": 1750
},
{
"epoch": 3.48,
"learning_rate": 0.0002581112398609502,
"loss": 0.1337,
"step": 1760
},
{
"epoch": 3.5,
"learning_rate": 0.0002566628041714948,
"loss": 0.1226,
"step": 1770
},
{
"epoch": 3.52,
"learning_rate": 0.0002552143684820394,
"loss": 0.1166,
"step": 1780
},
{
"epoch": 3.54,
"learning_rate": 0.00025376593279258404,
"loss": 0.2308,
"step": 1790
},
{
"epoch": 3.56,
"learning_rate": 0.00025231749710312863,
"loss": 0.1265,
"step": 1800
},
{
"epoch": 3.56,
"eval_loss": 0.9203845858573914,
"eval_rouge1": 0.17454545454545453,
"eval_rouge2": 0.07291666666666667,
"eval_rougeL": 0.16999999999999998,
"eval_rougeLsum": 0.17727272727272728,
"eval_runtime": 87.9704,
"eval_samples_per_second": 0.227,
"eval_steps_per_second": 0.227,
"step": 1800
},
{
"epoch": 3.58,
"learning_rate": 0.0002508690614136732,
"loss": 0.1526,
"step": 1810
},
{
"epoch": 3.6,
"learning_rate": 0.00024942062572421786,
"loss": 0.2201,
"step": 1820
},
{
"epoch": 3.62,
"learning_rate": 0.00024797219003476245,
"loss": 0.1271,
"step": 1830
},
{
"epoch": 3.64,
"learning_rate": 0.0002465237543453071,
"loss": 0.1749,
"step": 1840
},
{
"epoch": 3.65,
"learning_rate": 0.0002450753186558517,
"loss": 0.133,
"step": 1850
},
{
"epoch": 3.67,
"learning_rate": 0.0002436268829663963,
"loss": 0.2259,
"step": 1860
},
{
"epoch": 3.69,
"learning_rate": 0.00024217844727694092,
"loss": 0.1549,
"step": 1870
},
{
"epoch": 3.71,
"learning_rate": 0.00024073001158748554,
"loss": 0.1173,
"step": 1880
},
{
"epoch": 3.73,
"learning_rate": 0.00023928157589803013,
"loss": 0.1337,
"step": 1890
},
{
"epoch": 3.75,
"learning_rate": 0.00023783314020857474,
"loss": 0.1828,
"step": 1900
},
{
"epoch": 3.75,
"eval_loss": 0.9094276428222656,
"eval_rouge1": 0.18,
"eval_rouge2": 0.14886363636363636,
"eval_rougeL": 0.18,
"eval_rougeLsum": 0.18615384615384614,
"eval_runtime": 84.5106,
"eval_samples_per_second": 0.237,
"eval_steps_per_second": 0.237,
"step": 1900
},
{
"epoch": 3.77,
"learning_rate": 0.00023638470451911936,
"loss": 0.1821,
"step": 1910
},
{
"epoch": 3.79,
"learning_rate": 0.00023493626882966395,
"loss": 0.1257,
"step": 1920
},
{
"epoch": 3.81,
"learning_rate": 0.00023348783314020857,
"loss": 0.172,
"step": 1930
},
{
"epoch": 3.83,
"learning_rate": 0.0002320393974507532,
"loss": 0.1833,
"step": 1940
},
{
"epoch": 3.85,
"learning_rate": 0.0002305909617612978,
"loss": 0.1334,
"step": 1950
},
{
"epoch": 3.87,
"learning_rate": 0.00022914252607184242,
"loss": 0.1736,
"step": 1960
},
{
"epoch": 3.89,
"learning_rate": 0.00022769409038238703,
"loss": 0.1163,
"step": 1970
},
{
"epoch": 3.91,
"learning_rate": 0.00022624565469293165,
"loss": 0.1844,
"step": 1980
},
{
"epoch": 3.93,
"learning_rate": 0.00022479721900347624,
"loss": 0.1358,
"step": 1990
},
{
"epoch": 3.95,
"learning_rate": 0.00022334878331402086,
"loss": 0.1447,
"step": 2000
},
{
"epoch": 3.95,
"eval_loss": 0.89415442943573,
"eval_rouge1": 0.19,
"eval_rouge2": 0.09886363636363635,
"eval_rougeL": 0.18615384615384617,
"eval_rougeLsum": 0.19615384615384615,
"eval_runtime": 84.0506,
"eval_samples_per_second": 0.238,
"eval_steps_per_second": 0.238,
"step": 2000
},
{
"epoch": 3.97,
"learning_rate": 0.00022190034762456547,
"loss": 0.1566,
"step": 2010
},
{
"epoch": 3.99,
"learning_rate": 0.0002204519119351101,
"loss": 0.1132,
"step": 2020
},
{
"epoch": 4.01,
"learning_rate": 0.0002190034762456547,
"loss": 0.1013,
"step": 2030
},
{
"epoch": 4.03,
"learning_rate": 0.00021755504055619932,
"loss": 0.1554,
"step": 2040
},
{
"epoch": 4.05,
"learning_rate": 0.0002161066048667439,
"loss": 0.1405,
"step": 2050
},
{
"epoch": 4.07,
"learning_rate": 0.00021465816917728853,
"loss": 0.152,
"step": 2060
},
{
"epoch": 4.09,
"learning_rate": 0.00021320973348783315,
"loss": 0.1591,
"step": 2070
},
{
"epoch": 4.11,
"learning_rate": 0.00021176129779837773,
"loss": 0.152,
"step": 2080
},
{
"epoch": 4.13,
"learning_rate": 0.00021031286210892235,
"loss": 0.1059,
"step": 2090
},
{
"epoch": 4.15,
"learning_rate": 0.000208864426419467,
"loss": 0.099,
"step": 2100
},
{
"epoch": 4.15,
"eval_loss": 0.9297454953193665,
"eval_rouge1": 0.23864468864468863,
"eval_rouge2": 0.15,
"eval_rougeL": 0.23516483516483513,
"eval_rougeLsum": 0.24514652014652014,
"eval_runtime": 85.7086,
"eval_samples_per_second": 0.233,
"eval_steps_per_second": 0.233,
"step": 2100
},
{
"epoch": 4.17,
"learning_rate": 0.0002074159907300116,
"loss": 0.0916,
"step": 2110
},
{
"epoch": 4.19,
"learning_rate": 0.0002059675550405562,
"loss": 0.2006,
"step": 2120
},
{
"epoch": 4.21,
"learning_rate": 0.00020451911935110082,
"loss": 0.144,
"step": 2130
},
{
"epoch": 4.23,
"learning_rate": 0.00020307068366164544,
"loss": 0.1893,
"step": 2140
},
{
"epoch": 4.25,
"learning_rate": 0.00020162224797219002,
"loss": 0.1697,
"step": 2150
},
{
"epoch": 4.27,
"learning_rate": 0.00020017381228273464,
"loss": 0.1101,
"step": 2160
},
{
"epoch": 4.29,
"learning_rate": 0.00019872537659327929,
"loss": 0.1539,
"step": 2170
},
{
"epoch": 4.31,
"learning_rate": 0.00019727694090382387,
"loss": 0.1038,
"step": 2180
},
{
"epoch": 4.33,
"learning_rate": 0.0001958285052143685,
"loss": 0.1466,
"step": 2190
},
{
"epoch": 4.35,
"learning_rate": 0.0001943800695249131,
"loss": 0.1366,
"step": 2200
},
{
"epoch": 4.35,
"eval_loss": 0.9124263525009155,
"eval_rouge1": 0.12,
"eval_rouge2": 0.07291666666666667,
"eval_rougeL": 0.12,
"eval_rougeLsum": 0.12454545454545454,
"eval_runtime": 91.5173,
"eval_samples_per_second": 0.219,
"eval_steps_per_second": 0.219,
"step": 2200
},
{
"epoch": 4.37,
"learning_rate": 0.0001929316338354577,
"loss": 0.1298,
"step": 2210
},
{
"epoch": 4.39,
"learning_rate": 0.00019148319814600231,
"loss": 0.1886,
"step": 2220
},
{
"epoch": 4.41,
"learning_rate": 0.00019003476245654693,
"loss": 0.1579,
"step": 2230
},
{
"epoch": 4.43,
"learning_rate": 0.00018858632676709152,
"loss": 0.1078,
"step": 2240
},
{
"epoch": 4.45,
"learning_rate": 0.00018713789107763616,
"loss": 0.1509,
"step": 2250
},
{
"epoch": 4.47,
"learning_rate": 0.00018568945538818078,
"loss": 0.108,
"step": 2260
},
{
"epoch": 4.49,
"learning_rate": 0.0001842410196987254,
"loss": 0.1305,
"step": 2270
},
{
"epoch": 4.5,
"learning_rate": 0.00018279258400927,
"loss": 0.1257,
"step": 2280
},
{
"epoch": 4.52,
"learning_rate": 0.0001813441483198146,
"loss": 0.117,
"step": 2290
},
{
"epoch": 4.54,
"learning_rate": 0.00017989571263035922,
"loss": 0.1519,
"step": 2300
},
{
"epoch": 4.54,
"eval_loss": 0.9040172696113586,
"eval_rouge1": 0.18727272727272726,
"eval_rouge2": 0.09861111111111112,
"eval_rougeL": 0.18333333333333332,
"eval_rougeLsum": 0.1906060606060606,
"eval_runtime": 82.9032,
"eval_samples_per_second": 0.241,
"eval_steps_per_second": 0.241,
"step": 2300
},
{
"epoch": 4.56,
"learning_rate": 0.0001784472769409038,
"loss": 0.1536,
"step": 2310
},
{
"epoch": 4.58,
"learning_rate": 0.00017699884125144843,
"loss": 0.1159,
"step": 2320
},
{
"epoch": 4.6,
"learning_rate": 0.00017555040556199307,
"loss": 0.1257,
"step": 2330
},
{
"epoch": 4.62,
"learning_rate": 0.00017410196987253766,
"loss": 0.1698,
"step": 2340
},
{
"epoch": 4.64,
"learning_rate": 0.00017265353418308228,
"loss": 0.1369,
"step": 2350
},
{
"epoch": 4.66,
"learning_rate": 0.0001712050984936269,
"loss": 0.0809,
"step": 2360
},
{
"epoch": 4.68,
"learning_rate": 0.00016975666280417148,
"loss": 0.1003,
"step": 2370
},
{
"epoch": 4.7,
"learning_rate": 0.0001683082271147161,
"loss": 0.1066,
"step": 2380
},
{
"epoch": 4.72,
"learning_rate": 0.00016685979142526072,
"loss": 0.1621,
"step": 2390
},
{
"epoch": 4.74,
"learning_rate": 0.00016541135573580533,
"loss": 0.119,
"step": 2400
},
{
"epoch": 4.74,
"eval_loss": 0.9120545387268066,
"eval_rouge1": 0.12,
"eval_rouge2": 0.04583333333333333,
"eval_rougeL": 0.11285714285714285,
"eval_rougeLsum": 0.12285714285714286,
"eval_runtime": 84.1673,
"eval_samples_per_second": 0.238,
"eval_steps_per_second": 0.238,
"step": 2400
},
{
"epoch": 4.76,
"learning_rate": 0.00016396292004634995,
"loss": 0.1185,
"step": 2410
},
{
"epoch": 4.78,
"learning_rate": 0.00016251448435689457,
"loss": 0.1657,
"step": 2420
},
{
"epoch": 4.8,
"learning_rate": 0.00016106604866743918,
"loss": 0.1233,
"step": 2430
},
{
"epoch": 4.82,
"learning_rate": 0.00015961761297798377,
"loss": 0.1611,
"step": 2440
},
{
"epoch": 4.84,
"learning_rate": 0.0001581691772885284,
"loss": 0.1613,
"step": 2450
},
{
"epoch": 4.86,
"learning_rate": 0.000156720741599073,
"loss": 0.1107,
"step": 2460
},
{
"epoch": 4.88,
"learning_rate": 0.0001552723059096176,
"loss": 0.1436,
"step": 2470
},
{
"epoch": 4.9,
"learning_rate": 0.00015382387022016221,
"loss": 0.129,
"step": 2480
},
{
"epoch": 4.92,
"learning_rate": 0.00015237543453070686,
"loss": 0.1586,
"step": 2490
},
{
"epoch": 4.94,
"learning_rate": 0.00015092699884125145,
"loss": 0.1364,
"step": 2500
},
{
"epoch": 4.94,
"eval_loss": 0.9120429754257202,
"eval_rouge1": 0.20904761904761907,
"eval_rouge2": 0.12583333333333332,
"eval_rougeL": 0.20666666666666664,
"eval_rougeLsum": 0.21904761904761902,
"eval_runtime": 82.9463,
"eval_samples_per_second": 0.241,
"eval_steps_per_second": 0.241,
"step": 2500
},
{
"epoch": 4.96,
"learning_rate": 0.00014947856315179606,
"loss": 0.1688,
"step": 2510
},
{
"epoch": 4.98,
"learning_rate": 0.00014803012746234068,
"loss": 0.1385,
"step": 2520
},
{
"epoch": 5.0,
"learning_rate": 0.0001465816917728853,
"loss": 0.1592,
"step": 2530
},
{
"epoch": 5.02,
"learning_rate": 0.0001451332560834299,
"loss": 0.1014,
"step": 2540
},
{
"epoch": 5.04,
"learning_rate": 0.0001436848203939745,
"loss": 0.0796,
"step": 2550
},
{
"epoch": 5.06,
"learning_rate": 0.00014223638470451912,
"loss": 0.0981,
"step": 2560
},
{
"epoch": 5.08,
"learning_rate": 0.00014078794901506374,
"loss": 0.093,
"step": 2570
},
{
"epoch": 5.1,
"learning_rate": 0.00013933951332560835,
"loss": 0.1599,
"step": 2580
},
{
"epoch": 5.12,
"learning_rate": 0.00013789107763615297,
"loss": 0.1223,
"step": 2590
},
{
"epoch": 5.14,
"learning_rate": 0.00013644264194669756,
"loss": 0.1,
"step": 2600
},
{
"epoch": 5.14,
"eval_loss": 0.9408878087997437,
"eval_rouge1": 0.12507936507936507,
"eval_rouge2": 0.08333333333333333,
"eval_rougeL": 0.12396825396825398,
"eval_rougeLsum": 0.13111111111111112,
"eval_runtime": 96.2103,
"eval_samples_per_second": 0.208,
"eval_steps_per_second": 0.208,
"step": 2600
},
{
"epoch": 5.16,
"learning_rate": 0.00013499420625724218,
"loss": 0.1284,
"step": 2610
},
{
"epoch": 5.18,
"learning_rate": 0.0001335457705677868,
"loss": 0.1523,
"step": 2620
},
{
"epoch": 5.2,
"learning_rate": 0.00013209733487833138,
"loss": 0.1051,
"step": 2630
},
{
"epoch": 5.22,
"learning_rate": 0.00013064889918887603,
"loss": 0.1216,
"step": 2640
},
{
"epoch": 5.24,
"learning_rate": 0.00012920046349942064,
"loss": 0.1219,
"step": 2650
},
{
"epoch": 5.26,
"learning_rate": 0.00012775202780996523,
"loss": 0.1482,
"step": 2660
},
{
"epoch": 5.28,
"learning_rate": 0.00012630359212050985,
"loss": 0.1076,
"step": 2670
},
{
"epoch": 5.3,
"learning_rate": 0.00012485515643105447,
"loss": 0.121,
"step": 2680
},
{
"epoch": 5.32,
"learning_rate": 0.00012340672074159908,
"loss": 0.1448,
"step": 2690
},
{
"epoch": 5.34,
"learning_rate": 0.0001219582850521437,
"loss": 0.1683,
"step": 2700
},
{
"epoch": 5.34,
"eval_loss": 0.9422550201416016,
"eval_rouge1": 0.13818181818181818,
"eval_rouge2": 0.0951010101010101,
"eval_rougeL": 0.13713286713286715,
"eval_rougeLsum": 0.14174825174825176,
"eval_runtime": 96.365,
"eval_samples_per_second": 0.208,
"eval_steps_per_second": 0.208,
"step": 2700
},
{
"epoch": 5.36,
"learning_rate": 0.0001205098493626883,
"loss": 0.103,
"step": 2710
},
{
"epoch": 5.37,
"learning_rate": 0.0001190614136732329,
"loss": 0.1434,
"step": 2720
},
{
"epoch": 5.39,
"learning_rate": 0.00011761297798377752,
"loss": 0.1419,
"step": 2730
},
{
"epoch": 5.41,
"learning_rate": 0.00011616454229432214,
"loss": 0.1145,
"step": 2740
},
{
"epoch": 5.43,
"learning_rate": 0.00011471610660486674,
"loss": 0.1302,
"step": 2750
},
{
"epoch": 5.45,
"learning_rate": 0.00011326767091541136,
"loss": 0.0718,
"step": 2760
},
{
"epoch": 5.47,
"learning_rate": 0.00011181923522595596,
"loss": 0.1166,
"step": 2770
},
{
"epoch": 5.49,
"learning_rate": 0.0001103707995365006,
"loss": 0.1265,
"step": 2780
},
{
"epoch": 5.51,
"learning_rate": 0.0001089223638470452,
"loss": 0.0972,
"step": 2790
},
{
"epoch": 5.53,
"learning_rate": 0.0001074739281575898,
"loss": 0.1395,
"step": 2800
},
{
"epoch": 5.53,
"eval_loss": 0.9336325526237488,
"eval_rouge1": 0.16115384615384615,
"eval_rouge2": 0.12329545454545454,
"eval_rougeL": 0.15999999999999998,
"eval_rougeLsum": 0.16307692307692306,
"eval_runtime": 93.5346,
"eval_samples_per_second": 0.214,
"eval_steps_per_second": 0.214,
"step": 2800
},
{
"epoch": 5.55,
"learning_rate": 0.00010602549246813442,
"loss": 0.0808,
"step": 2810
},
{
"epoch": 5.57,
"learning_rate": 0.00010457705677867903,
"loss": 0.1205,
"step": 2820
},
{
"epoch": 5.59,
"learning_rate": 0.00010312862108922364,
"loss": 0.119,
"step": 2830
},
{
"epoch": 5.61,
"learning_rate": 0.00010168018539976825,
"loss": 0.1357,
"step": 2840
},
{
"epoch": 5.63,
"learning_rate": 0.00010023174971031286,
"loss": 0.1144,
"step": 2850
},
{
"epoch": 5.65,
"learning_rate": 9.878331402085749e-05,
"loss": 0.138,
"step": 2860
},
{
"epoch": 5.67,
"learning_rate": 9.733487833140209e-05,
"loss": 0.0998,
"step": 2870
},
{
"epoch": 5.69,
"learning_rate": 9.588644264194669e-05,
"loss": 0.1437,
"step": 2880
},
{
"epoch": 5.71,
"learning_rate": 9.443800695249131e-05,
"loss": 0.1053,
"step": 2890
},
{
"epoch": 5.73,
"learning_rate": 9.298957126303593e-05,
"loss": 0.1067,
"step": 2900
},
{
"epoch": 5.73,
"eval_loss": 0.9290033578872681,
"eval_rouge1": 0.2234265734265734,
"eval_rouge2": 0.13156565656565655,
"eval_rougeL": 0.21744755244755246,
"eval_rougeLsum": 0.2169230769230769,
"eval_runtime": 91.8958,
"eval_samples_per_second": 0.218,
"eval_steps_per_second": 0.218,
"step": 2900
},
{
"epoch": 5.75,
"learning_rate": 9.154113557358054e-05,
"loss": 0.1225,
"step": 2910
},
{
"epoch": 5.77,
"learning_rate": 9.009269988412515e-05,
"loss": 0.0867,
"step": 2920
},
{
"epoch": 5.79,
"learning_rate": 8.864426419466975e-05,
"loss": 0.1325,
"step": 2930
},
{
"epoch": 5.81,
"learning_rate": 8.719582850521438e-05,
"loss": 0.118,
"step": 2940
},
{
"epoch": 5.83,
"learning_rate": 8.574739281575898e-05,
"loss": 0.112,
"step": 2950
},
{
"epoch": 5.85,
"learning_rate": 8.429895712630359e-05,
"loss": 0.1326,
"step": 2960
},
{
"epoch": 5.87,
"learning_rate": 8.28505214368482e-05,
"loss": 0.1506,
"step": 2970
},
{
"epoch": 5.89,
"learning_rate": 8.140208574739282e-05,
"loss": 0.1499,
"step": 2980
},
{
"epoch": 5.91,
"learning_rate": 7.995365005793744e-05,
"loss": 0.1092,
"step": 2990
},
{
"epoch": 5.93,
"learning_rate": 7.850521436848204e-05,
"loss": 0.1104,
"step": 3000
},
{
"epoch": 5.93,
"eval_loss": 0.9244877696037292,
"eval_rouge1": 0.2,
"eval_rouge2": 0.1,
"eval_rougeL": 0.19153846153846155,
"eval_rougeLsum": 0.19153846153846155,
"eval_runtime": 93.7022,
"eval_samples_per_second": 0.213,
"eval_steps_per_second": 0.213,
"step": 3000
},
{
"epoch": 5.95,
"learning_rate": 7.705677867902664e-05,
"loss": 0.0824,
"step": 3010
},
{
"epoch": 5.97,
"learning_rate": 7.560834298957127e-05,
"loss": 0.1048,
"step": 3020
},
{
"epoch": 5.99,
"learning_rate": 7.415990730011588e-05,
"loss": 0.1295,
"step": 3030
},
{
"epoch": 6.01,
"learning_rate": 7.271147161066048e-05,
"loss": 0.1296,
"step": 3040
},
{
"epoch": 6.03,
"learning_rate": 7.12630359212051e-05,
"loss": 0.1146,
"step": 3050
},
{
"epoch": 6.05,
"learning_rate": 6.981460023174971e-05,
"loss": 0.1158,
"step": 3060
},
{
"epoch": 6.07,
"learning_rate": 6.836616454229433e-05,
"loss": 0.094,
"step": 3070
},
{
"epoch": 6.09,
"learning_rate": 6.691772885283893e-05,
"loss": 0.1141,
"step": 3080
},
{
"epoch": 6.11,
"learning_rate": 6.546929316338354e-05,
"loss": 0.1025,
"step": 3090
},
{
"epoch": 6.13,
"learning_rate": 6.402085747392817e-05,
"loss": 0.1474,
"step": 3100
},
{
"epoch": 6.13,
"eval_loss": 0.9422538876533508,
"eval_rouge1": 0.20069541569541566,
"eval_rouge2": 0.10303030303030303,
"eval_rougeL": 0.19625097125097124,
"eval_rougeLsum": 0.19848096348096347,
"eval_runtime": 94.7246,
"eval_samples_per_second": 0.211,
"eval_steps_per_second": 0.211,
"step": 3100
},
{
"epoch": 6.15,
"learning_rate": 6.257242178447277e-05,
"loss": 0.09,
"step": 3110
},
{
"epoch": 6.17,
"learning_rate": 6.112398609501739e-05,
"loss": 0.1235,
"step": 3120
},
{
"epoch": 6.19,
"learning_rate": 5.9675550405561996e-05,
"loss": 0.0733,
"step": 3130
},
{
"epoch": 6.21,
"learning_rate": 5.822711471610661e-05,
"loss": 0.1035,
"step": 3140
},
{
"epoch": 6.22,
"learning_rate": 5.6778679026651216e-05,
"loss": 0.1027,
"step": 3150
},
{
"epoch": 6.24,
"learning_rate": 5.533024333719583e-05,
"loss": 0.0863,
"step": 3160
},
{
"epoch": 6.26,
"learning_rate": 5.388180764774044e-05,
"loss": 0.095,
"step": 3170
},
{
"epoch": 6.28,
"learning_rate": 5.243337195828506e-05,
"loss": 0.1103,
"step": 3180
},
{
"epoch": 6.3,
"learning_rate": 5.098493626882966e-05,
"loss": 0.1325,
"step": 3190
},
{
"epoch": 6.32,
"learning_rate": 4.953650057937428e-05,
"loss": 0.1052,
"step": 3200
},
{
"epoch": 6.32,
"eval_loss": 0.9328528642654419,
"eval_rouge1": 0.2023076923076923,
"eval_rouge2": 0.1102272727272727,
"eval_rougeL": 0.19999999999999998,
"eval_rougeLsum": 0.2,
"eval_runtime": 92.8302,
"eval_samples_per_second": 0.215,
"eval_steps_per_second": 0.215,
"step": 3200
},
{
"epoch": 6.34,
"learning_rate": 4.808806488991889e-05,
"loss": 0.1293,
"step": 3210
},
{
"epoch": 6.36,
"learning_rate": 4.6639629200463506e-05,
"loss": 0.1301,
"step": 3220
},
{
"epoch": 6.38,
"learning_rate": 4.519119351100811e-05,
"loss": 0.0873,
"step": 3230
},
{
"epoch": 6.4,
"learning_rate": 4.3742757821552725e-05,
"loss": 0.1017,
"step": 3240
},
{
"epoch": 6.42,
"learning_rate": 4.2294322132097335e-05,
"loss": 0.0846,
"step": 3250
},
{
"epoch": 6.44,
"learning_rate": 4.084588644264195e-05,
"loss": 0.0898,
"step": 3260
},
{
"epoch": 6.46,
"learning_rate": 3.9397450753186555e-05,
"loss": 0.1494,
"step": 3270
},
{
"epoch": 6.48,
"learning_rate": 3.794901506373117e-05,
"loss": 0.0742,
"step": 3280
},
{
"epoch": 6.5,
"learning_rate": 3.650057937427578e-05,
"loss": 0.0793,
"step": 3290
},
{
"epoch": 6.52,
"learning_rate": 3.50521436848204e-05,
"loss": 0.1203,
"step": 3300
},
{
"epoch": 6.52,
"eval_loss": 0.9380243420600891,
"eval_rouge1": 0.2023076923076923,
"eval_rouge2": 0.1102272727272727,
"eval_rougeL": 0.19999999999999998,
"eval_rougeLsum": 0.2,
"eval_runtime": 94.5066,
"eval_samples_per_second": 0.212,
"eval_steps_per_second": 0.212,
"step": 3300
},
{
"epoch": 6.54,
"learning_rate": 3.360370799536501e-05,
"loss": 0.1257,
"step": 3310
},
{
"epoch": 6.56,
"learning_rate": 3.215527230590962e-05,
"loss": 0.1177,
"step": 3320
},
{
"epoch": 6.58,
"learning_rate": 3.070683661645423e-05,
"loss": 0.1359,
"step": 3330
},
{
"epoch": 6.6,
"learning_rate": 2.9258400926998842e-05,
"loss": 0.1303,
"step": 3340
},
{
"epoch": 6.62,
"learning_rate": 2.7809965237543452e-05,
"loss": 0.0968,
"step": 3350
},
{
"epoch": 6.64,
"learning_rate": 2.6361529548088065e-05,
"loss": 0.1061,
"step": 3360
},
{
"epoch": 6.66,
"learning_rate": 2.4913093858632675e-05,
"loss": 0.1307,
"step": 3370
},
{
"epoch": 6.68,
"learning_rate": 2.346465816917729e-05,
"loss": 0.0981,
"step": 3380
},
{
"epoch": 6.7,
"learning_rate": 2.20162224797219e-05,
"loss": 0.0901,
"step": 3390
},
{
"epoch": 6.72,
"learning_rate": 2.0567786790266515e-05,
"loss": 0.1125,
"step": 3400
},
{
"epoch": 6.72,
"eval_loss": 0.9421626925468445,
"eval_rouge1": 0.18958041958041955,
"eval_rouge2": 0.0977272727272727,
"eval_rougeL": 0.18615384615384614,
"eval_rougeLsum": 0.19,
"eval_runtime": 96.4239,
"eval_samples_per_second": 0.207,
"eval_steps_per_second": 0.207,
"step": 3400
},
{
"epoch": 6.74,
"learning_rate": 1.9119351100811125e-05,
"loss": 0.0823,
"step": 3410
},
{
"epoch": 6.76,
"learning_rate": 1.767091541135574e-05,
"loss": 0.1016,
"step": 3420
},
{
"epoch": 6.78,
"learning_rate": 1.6222479721900348e-05,
"loss": 0.1172,
"step": 3430
},
{
"epoch": 6.8,
"learning_rate": 1.477404403244496e-05,
"loss": 0.0959,
"step": 3440
},
{
"epoch": 6.82,
"learning_rate": 1.3325608342989572e-05,
"loss": 0.1534,
"step": 3450
},
{
"epoch": 6.84,
"learning_rate": 1.1877172653534183e-05,
"loss": 0.125,
"step": 3460
},
{
"epoch": 6.86,
"learning_rate": 1.0428736964078795e-05,
"loss": 0.1221,
"step": 3470
},
{
"epoch": 6.88,
"learning_rate": 8.980301274623406e-06,
"loss": 0.1391,
"step": 3480
},
{
"epoch": 6.9,
"learning_rate": 7.531865585168019e-06,
"loss": 0.0986,
"step": 3490
},
{
"epoch": 6.92,
"learning_rate": 6.083429895712631e-06,
"loss": 0.1323,
"step": 3500
},
{
"epoch": 6.92,
"eval_loss": 0.9433181881904602,
"eval_rouge1": 0.19,
"eval_rouge2": 0.0977272727272727,
"eval_rougeL": 0.18615384615384617,
"eval_rougeLsum": 0.19,
"eval_runtime": 94.6833,
"eval_samples_per_second": 0.211,
"eval_steps_per_second": 0.211,
"step": 3500
},
{
"epoch": 6.94,
"learning_rate": 0.00015593561368209256,
"loss": 0.1147,
"step": 3510
},
{
"epoch": 6.95,
"learning_rate": 0.00015492957746478874,
"loss": 0.0879,
"step": 3520
},
{
"epoch": 6.97,
"learning_rate": 0.0001539235412474849,
"loss": 0.095,
"step": 3530
},
{
"epoch": 6.99,
"learning_rate": 0.00015291750503018109,
"loss": 0.1277,
"step": 3540
},
{
"epoch": 7.02,
"learning_rate": 0.00015191146881287726,
"loss": 0.1332,
"step": 3550
},
{
"epoch": 7.04,
"learning_rate": 0.00015090543259557344,
"loss": 0.1055,
"step": 3560
},
{
"epoch": 7.06,
"learning_rate": 0.00014989939637826964,
"loss": 0.1114,
"step": 3570
},
{
"epoch": 7.07,
"learning_rate": 0.00014889336016096582,
"loss": 0.0983,
"step": 3580
},
{
"epoch": 7.09,
"learning_rate": 0.00014788732394366196,
"loss": 0.0823,
"step": 3590
},
{
"epoch": 7.11,
"learning_rate": 0.00014688128772635814,
"loss": 0.0949,
"step": 3600
},
{
"epoch": 7.11,
"eval_loss": 0.9529324769973755,
"eval_rouge1": 0.1603205128205128,
"eval_rouge2": 0.09454545454545453,
"eval_rougeL": 0.16115384615384615,
"eval_rougeLsum": 0.15993589743589742,
"eval_runtime": 96.2453,
"eval_samples_per_second": 0.208,
"eval_steps_per_second": 0.208,
"step": 3600
},
{
"epoch": 7.13,
"learning_rate": 0.00014587525150905434,
"loss": 0.0698,
"step": 3610
},
{
"epoch": 7.15,
"learning_rate": 0.00014486921529175052,
"loss": 0.0694,
"step": 3620
},
{
"epoch": 7.17,
"learning_rate": 0.0001438631790744467,
"loss": 0.1078,
"step": 3630
},
{
"epoch": 7.19,
"learning_rate": 0.00014285714285714284,
"loss": 0.1292,
"step": 3640
},
{
"epoch": 7.21,
"learning_rate": 0.00014185110663983904,
"loss": 0.1175,
"step": 3650
},
{
"epoch": 7.23,
"learning_rate": 0.00014084507042253522,
"loss": 0.1168,
"step": 3660
},
{
"epoch": 7.25,
"learning_rate": 0.0001398390342052314,
"loss": 0.0948,
"step": 3670
},
{
"epoch": 7.27,
"learning_rate": 0.00013883299798792757,
"loss": 0.1314,
"step": 3680
},
{
"epoch": 7.29,
"learning_rate": 0.00013782696177062375,
"loss": 0.1068,
"step": 3690
},
{
"epoch": 7.31,
"learning_rate": 0.00013682092555331992,
"loss": 0.1059,
"step": 3700
},
{
"epoch": 7.31,
"eval_loss": 0.9520353078842163,
"eval_rouge1": 0.13832167832167833,
"eval_rouge2": 0.0977272727272727,
"eval_rougeL": 0.14185314685314687,
"eval_rougeLsum": 0.13999999999999999,
"eval_runtime": 91.3536,
"eval_samples_per_second": 0.219,
"eval_steps_per_second": 0.219,
"step": 3700
},
{
"epoch": 7.33,
"learning_rate": 0.0001358148893360161,
"loss": 0.0945,
"step": 3710
},
{
"epoch": 7.35,
"learning_rate": 0.00013480885311871227,
"loss": 0.1298,
"step": 3720
},
{
"epoch": 7.37,
"learning_rate": 0.00013380281690140845,
"loss": 0.0972,
"step": 3730
},
{
"epoch": 7.39,
"learning_rate": 0.00013279678068410465,
"loss": 0.1007,
"step": 3740
},
{
"epoch": 7.41,
"learning_rate": 0.0001317907444668008,
"loss": 0.1194,
"step": 3750
},
{
"epoch": 7.43,
"learning_rate": 0.00013078470824949697,
"loss": 0.1416,
"step": 3760
},
{
"epoch": 7.45,
"learning_rate": 0.00012977867203219315,
"loss": 0.1112,
"step": 3770
},
{
"epoch": 7.47,
"learning_rate": 0.00012877263581488935,
"loss": 0.1232,
"step": 3780
},
{
"epoch": 7.49,
"learning_rate": 0.00012776659959758553,
"loss": 0.1053,
"step": 3790
},
{
"epoch": 7.51,
"learning_rate": 0.0001267605633802817,
"loss": 0.1482,
"step": 3800
},
{
"epoch": 7.51,
"eval_loss": 0.9513714909553528,
"eval_rouge1": 0.21115384615384616,
"eval_rouge2": 0.12045454545454545,
"eval_rougeL": 0.20999999999999996,
"eval_rougeLsum": 0.20730769230769228,
"eval_runtime": 90.7686,
"eval_samples_per_second": 0.22,
"eval_steps_per_second": 0.22,
"step": 3800
},
{
"epoch": 7.53,
"learning_rate": 0.00012575452716297785,
"loss": 0.1281,
"step": 3810
},
{
"epoch": 7.55,
"learning_rate": 0.00012474849094567405,
"loss": 0.1547,
"step": 3820
},
{
"epoch": 7.57,
"learning_rate": 0.00012374245472837023,
"loss": 0.1283,
"step": 3830
},
{
"epoch": 7.59,
"learning_rate": 0.0001227364185110664,
"loss": 0.174,
"step": 3840
},
{
"epoch": 7.61,
"learning_rate": 0.00012173038229376258,
"loss": 0.0827,
"step": 3850
},
{
"epoch": 7.63,
"learning_rate": 0.00012072434607645876,
"loss": 0.1174,
"step": 3860
},
{
"epoch": 7.65,
"learning_rate": 0.00011971830985915493,
"loss": 0.0914,
"step": 3870
},
{
"epoch": 7.67,
"learning_rate": 0.0001187122736418511,
"loss": 0.1205,
"step": 3880
},
{
"epoch": 7.69,
"learning_rate": 0.00011770623742454728,
"loss": 0.0821,
"step": 3890
},
{
"epoch": 7.71,
"learning_rate": 0.00011670020120724347,
"loss": 0.1268,
"step": 3900
},
{
"epoch": 7.71,
"eval_loss": 0.938602089881897,
"eval_rouge1": 0.20384615384615384,
"eval_rouge2": 0.10909090909090909,
"eval_rougeL": 0.20153846153846153,
"eval_rougeLsum": 0.20076923076923076,
"eval_runtime": 89.8217,
"eval_samples_per_second": 0.223,
"eval_steps_per_second": 0.223,
"step": 3900
},
{
"epoch": 7.73,
"learning_rate": 0.00011569416498993963,
"loss": 0.0964,
"step": 3910
},
{
"epoch": 7.75,
"learning_rate": 0.00011468812877263582,
"loss": 0.0878,
"step": 3920
},
{
"epoch": 7.77,
"learning_rate": 0.00011368209255533198,
"loss": 0.1205,
"step": 3930
},
{
"epoch": 7.79,
"learning_rate": 0.00011267605633802817,
"loss": 0.0916,
"step": 3940
},
{
"epoch": 7.8,
"learning_rate": 0.00011167002012072435,
"loss": 0.1021,
"step": 3950
},
{
"epoch": 7.82,
"learning_rate": 0.00011066398390342052,
"loss": 0.0843,
"step": 3960
},
{
"epoch": 7.84,
"learning_rate": 0.0001096579476861167,
"loss": 0.0947,
"step": 3970
},
{
"epoch": 7.86,
"learning_rate": 0.00010865191146881289,
"loss": 0.0884,
"step": 3980
},
{
"epoch": 7.88,
"learning_rate": 0.00010764587525150905,
"loss": 0.0943,
"step": 3990
},
{
"epoch": 7.9,
"learning_rate": 0.00010663983903420524,
"loss": 0.089,
"step": 4000
},
{
"epoch": 7.9,
"eval_loss": 0.9426229596138,
"eval_rouge1": 0.15076923076923077,
"eval_rouge2": 0.1181818181818182,
"eval_rougeL": 0.15615384615384614,
"eval_rougeLsum": 0.15384615384615383,
"eval_runtime": 90.9011,
"eval_samples_per_second": 0.22,
"eval_steps_per_second": 0.22,
"step": 4000
},
{
"epoch": 7.92,
"learning_rate": 0.00010563380281690141,
"loss": 0.1264,
"step": 4010
},
{
"epoch": 7.94,
"learning_rate": 0.00010462776659959759,
"loss": 0.1026,
"step": 4020
},
{
"epoch": 7.96,
"learning_rate": 0.00010362173038229377,
"loss": 0.1348,
"step": 4030
},
{
"epoch": 7.98,
"learning_rate": 0.00010261569416498995,
"loss": 0.1893,
"step": 4040
},
{
"epoch": 8.0,
"learning_rate": 0.00010160965794768612,
"loss": 0.1048,
"step": 4050
},
{
"epoch": 8.02,
"learning_rate": 0.0001006036217303823,
"loss": 0.1576,
"step": 4060
},
{
"epoch": 8.04,
"learning_rate": 9.959758551307847e-05,
"loss": 0.1084,
"step": 4070
},
{
"epoch": 8.06,
"learning_rate": 9.859154929577464e-05,
"loss": 0.089,
"step": 4080
},
{
"epoch": 8.08,
"learning_rate": 9.758551307847083e-05,
"loss": 0.0989,
"step": 4090
},
{
"epoch": 8.1,
"learning_rate": 9.6579476861167e-05,
"loss": 0.108,
"step": 4100
},
{
"epoch": 8.1,
"eval_loss": 0.9726575016975403,
"eval_rouge1": 0.1383333333333333,
"eval_rouge2": 0.10340909090909092,
"eval_rougeL": 0.14448717948717948,
"eval_rougeLsum": 0.13666666666666666,
"eval_runtime": 89.426,
"eval_samples_per_second": 0.224,
"eval_steps_per_second": 0.224,
"step": 4100
},
{
"epoch": 8.12,
"learning_rate": 9.557344064386318e-05,
"loss": 0.0576,
"step": 4110
},
{
"epoch": 8.14,
"learning_rate": 9.456740442655936e-05,
"loss": 0.0937,
"step": 4120
},
{
"epoch": 8.16,
"learning_rate": 9.356136820925553e-05,
"loss": 0.0814,
"step": 4130
},
{
"epoch": 8.18,
"learning_rate": 9.255533199195171e-05,
"loss": 0.0832,
"step": 4140
},
{
"epoch": 8.2,
"learning_rate": 9.15492957746479e-05,
"loss": 0.0881,
"step": 4150
},
{
"epoch": 8.22,
"learning_rate": 9.054325955734406e-05,
"loss": 0.0785,
"step": 4160
},
{
"epoch": 8.24,
"learning_rate": 8.953722334004025e-05,
"loss": 0.1046,
"step": 4170
},
{
"epoch": 8.26,
"learning_rate": 8.853118712273642e-05,
"loss": 0.1137,
"step": 4180
},
{
"epoch": 8.28,
"learning_rate": 8.75251509054326e-05,
"loss": 0.0966,
"step": 4190
},
{
"epoch": 8.3,
"learning_rate": 8.651911468812877e-05,
"loss": 0.1292,
"step": 4200
},
{
"epoch": 8.3,
"eval_loss": 0.9639500379562378,
"eval_rouge1": 0.21000000000000002,
"eval_rouge2": 0.12563131313131312,
"eval_rougeL": 0.2097902097902098,
"eval_rougeLsum": 0.20979020979020976,
"eval_runtime": 86.8779,
"eval_samples_per_second": 0.23,
"eval_steps_per_second": 0.23,
"step": 4200
},
{
"epoch": 8.32,
"learning_rate": 8.551307847082495e-05,
"loss": 0.099,
"step": 4210
},
{
"epoch": 8.34,
"learning_rate": 8.450704225352113e-05,
"loss": 0.082,
"step": 4220
},
{
"epoch": 8.36,
"learning_rate": 8.350100603621731e-05,
"loss": 0.1007,
"step": 4230
},
{
"epoch": 8.38,
"learning_rate": 8.249496981891348e-05,
"loss": 0.0826,
"step": 4240
},
{
"epoch": 8.4,
"learning_rate": 8.148893360160967e-05,
"loss": 0.0823,
"step": 4250
},
{
"epoch": 8.42,
"learning_rate": 8.048289738430584e-05,
"loss": 0.0863,
"step": 4260
},
{
"epoch": 8.44,
"learning_rate": 7.9476861167002e-05,
"loss": 0.1037,
"step": 4270
},
{
"epoch": 8.46,
"learning_rate": 7.847082494969819e-05,
"loss": 0.097,
"step": 4280
},
{
"epoch": 8.48,
"learning_rate": 7.746478873239437e-05,
"loss": 0.0589,
"step": 4290
},
{
"epoch": 8.5,
"learning_rate": 7.645875251509054e-05,
"loss": 0.0868,
"step": 4300
},
{
"epoch": 8.5,
"eval_loss": 0.9618169069290161,
"eval_rouge1": 0.15,
"eval_rouge2": 0.09431818181818181,
"eval_rougeL": 0.15076923076923077,
"eval_rougeLsum": 0.1465384615384615,
"eval_runtime": 86.2134,
"eval_samples_per_second": 0.232,
"eval_steps_per_second": 0.232,
"step": 4300
},
{
"epoch": 8.52,
"learning_rate": 7.545271629778672e-05,
"loss": 0.0964,
"step": 4310
},
{
"epoch": 8.54,
"learning_rate": 7.444668008048291e-05,
"loss": 0.1144,
"step": 4320
},
{
"epoch": 8.56,
"learning_rate": 7.344064386317907e-05,
"loss": 0.1029,
"step": 4330
},
{
"epoch": 8.58,
"learning_rate": 7.243460764587526e-05,
"loss": 0.0978,
"step": 4340
},
{
"epoch": 8.6,
"learning_rate": 7.142857142857142e-05,
"loss": 0.142,
"step": 4350
},
{
"epoch": 8.62,
"learning_rate": 7.042253521126761e-05,
"loss": 0.0957,
"step": 4360
},
{
"epoch": 8.64,
"learning_rate": 6.941649899396378e-05,
"loss": 0.0896,
"step": 4370
},
{
"epoch": 8.65,
"learning_rate": 6.841046277665996e-05,
"loss": 0.0998,
"step": 4380
},
{
"epoch": 8.67,
"learning_rate": 6.740442655935614e-05,
"loss": 0.0828,
"step": 4390
},
{
"epoch": 8.69,
"learning_rate": 6.639839034205232e-05,
"loss": 0.1023,
"step": 4400
},
{
"epoch": 8.69,
"eval_loss": 0.9609012603759766,
"eval_rouge1": 0.18,
"eval_rouge2": 0.075,
"eval_rougeL": 0.18,
"eval_rougeLsum": 0.18,
"eval_runtime": 85.733,
"eval_samples_per_second": 0.233,
"eval_steps_per_second": 0.233,
"step": 4400
},
{
"epoch": 8.71,
"learning_rate": 6.539235412474849e-05,
"loss": 0.1324,
"step": 4410
},
{
"epoch": 8.73,
"learning_rate": 6.438631790744468e-05,
"loss": 0.1107,
"step": 4420
},
{
"epoch": 8.75,
"learning_rate": 6.338028169014085e-05,
"loss": 0.0756,
"step": 4430
},
{
"epoch": 8.77,
"learning_rate": 6.237424547283703e-05,
"loss": 0.1019,
"step": 4440
},
{
"epoch": 8.79,
"learning_rate": 6.13682092555332e-05,
"loss": 0.1232,
"step": 4450
},
{
"epoch": 8.81,
"learning_rate": 6.036217303822938e-05,
"loss": 0.1186,
"step": 4460
},
{
"epoch": 8.83,
"learning_rate": 5.935613682092555e-05,
"loss": 0.1093,
"step": 4470
},
{
"epoch": 8.85,
"learning_rate": 5.8350100603621735e-05,
"loss": 0.1009,
"step": 4480
},
{
"epoch": 8.87,
"learning_rate": 5.734406438631791e-05,
"loss": 0.0878,
"step": 4490
},
{
"epoch": 8.89,
"learning_rate": 5.6338028169014086e-05,
"loss": 0.1102,
"step": 4500
},
{
"epoch": 8.89,
"eval_loss": 0.9644363522529602,
"eval_rouge1": 0.14615384615384613,
"eval_rouge2": 0.1,
"eval_rougeL": 0.15115384615384614,
"eval_rougeLsum": 0.145,
"eval_runtime": 83.9759,
"eval_samples_per_second": 0.238,
"eval_steps_per_second": 0.238,
"step": 4500
},
{
"epoch": 8.91,
"learning_rate": 5.533199195171026e-05,
"loss": 0.1363,
"step": 4510
},
{
"epoch": 8.93,
"learning_rate": 5.4325955734406444e-05,
"loss": 0.0738,
"step": 4520
},
{
"epoch": 8.95,
"learning_rate": 5.331991951710262e-05,
"loss": 0.0649,
"step": 4530
},
{
"epoch": 8.97,
"learning_rate": 5.2313883299798795e-05,
"loss": 0.0971,
"step": 4540
},
{
"epoch": 8.99,
"learning_rate": 5.130784708249498e-05,
"loss": 0.0688,
"step": 4550
},
{
"epoch": 9.01,
"learning_rate": 5.030181086519115e-05,
"loss": 0.0837,
"step": 4560
},
{
"epoch": 9.03,
"learning_rate": 4.929577464788732e-05,
"loss": 0.0891,
"step": 4570
},
{
"epoch": 9.05,
"learning_rate": 4.82897384305835e-05,
"loss": 0.0744,
"step": 4580
},
{
"epoch": 9.07,
"learning_rate": 4.728370221327968e-05,
"loss": 0.0906,
"step": 4590
},
{
"epoch": 9.09,
"learning_rate": 4.6277665995975854e-05,
"loss": 0.1102,
"step": 4600
},
{
"epoch": 9.09,
"eval_loss": 0.9806769490242004,
"eval_rouge1": 0.12615384615384617,
"eval_rouge2": 0.08636363636363635,
"eval_rougeL": 0.13615384615384613,
"eval_rougeLsum": 0.12615384615384617,
"eval_runtime": 88.592,
"eval_samples_per_second": 0.226,
"eval_steps_per_second": 0.226,
"step": 4600
},
{
"epoch": 9.11,
"learning_rate": 4.527162977867203e-05,
"loss": 0.072,
"step": 4610
},
{
"epoch": 9.13,
"learning_rate": 4.426559356136821e-05,
"loss": 0.0729,
"step": 4620
},
{
"epoch": 9.15,
"learning_rate": 4.325955734406439e-05,
"loss": 0.0884,
"step": 4630
},
{
"epoch": 9.17,
"learning_rate": 4.225352112676056e-05,
"loss": 0.0782,
"step": 4640
},
{
"epoch": 9.19,
"learning_rate": 4.124748490945674e-05,
"loss": 0.0879,
"step": 4650
},
{
"epoch": 9.21,
"learning_rate": 4.024144869215292e-05,
"loss": 0.1,
"step": 4660
},
{
"epoch": 9.23,
"learning_rate": 3.9235412474849096e-05,
"loss": 0.0867,
"step": 4670
},
{
"epoch": 9.25,
"learning_rate": 3.822937625754527e-05,
"loss": 0.0881,
"step": 4680
},
{
"epoch": 9.27,
"learning_rate": 3.7223340040241454e-05,
"loss": 0.0903,
"step": 4690
},
{
"epoch": 9.29,
"learning_rate": 3.621730382293763e-05,
"loss": 0.0942,
"step": 4700
},
{
"epoch": 9.29,
"eval_loss": 0.9865831136703491,
"eval_rouge1": 0.13999999999999999,
"eval_rouge2": 0.09772727272727272,
"eval_rougeL": 0.14615384615384613,
"eval_rougeLsum": 0.13999999999999999,
"eval_runtime": 86.651,
"eval_samples_per_second": 0.231,
"eval_steps_per_second": 0.231,
"step": 4700
},
{
"epoch": 9.31,
"learning_rate": 3.5211267605633805e-05,
"loss": 0.1079,
"step": 4710
},
{
"epoch": 9.33,
"learning_rate": 3.420523138832998e-05,
"loss": 0.0807,
"step": 4720
},
{
"epoch": 9.35,
"learning_rate": 3.319919517102616e-05,
"loss": 0.105,
"step": 4730
},
{
"epoch": 9.37,
"learning_rate": 3.219315895372234e-05,
"loss": 0.095,
"step": 4740
},
{
"epoch": 9.39,
"learning_rate": 3.118712273641851e-05,
"loss": 0.0965,
"step": 4750
},
{
"epoch": 9.41,
"learning_rate": 3.018108651911469e-05,
"loss": 0.1001,
"step": 4760
},
{
"epoch": 9.43,
"learning_rate": 2.9175050301810868e-05,
"loss": 0.0782,
"step": 4770
},
{
"epoch": 9.45,
"learning_rate": 2.8169014084507043e-05,
"loss": 0.115,
"step": 4780
},
{
"epoch": 9.47,
"learning_rate": 2.7162977867203222e-05,
"loss": 0.071,
"step": 4790
},
{
"epoch": 9.49,
"learning_rate": 2.6156941649899397e-05,
"loss": 0.129,
"step": 4800
},
{
"epoch": 9.49,
"eval_loss": 0.9853466153144836,
"eval_rouge1": 0.12837606837606838,
"eval_rouge2": 0.08636363636363635,
"eval_rougeL": 0.13615384615384613,
"eval_rougeLsum": 0.1294871794871795,
"eval_runtime": 86.6444,
"eval_samples_per_second": 0.231,
"eval_steps_per_second": 0.231,
"step": 4800
},
{
"epoch": 9.5,
"learning_rate": 2.5150905432595576e-05,
"loss": 0.1285,
"step": 4810
},
{
"epoch": 9.52,
"learning_rate": 2.414486921529175e-05,
"loss": 0.0747,
"step": 4820
},
{
"epoch": 9.54,
"learning_rate": 2.3138832997987927e-05,
"loss": 0.0702,
"step": 4830
},
{
"epoch": 9.56,
"learning_rate": 2.2132796780684106e-05,
"loss": 0.1029,
"step": 4840
},
{
"epoch": 9.58,
"learning_rate": 2.112676056338028e-05,
"loss": 0.102,
"step": 4850
},
{
"epoch": 9.6,
"learning_rate": 2.012072434607646e-05,
"loss": 0.0909,
"step": 4860
},
{
"epoch": 9.62,
"learning_rate": 1.9114688128772636e-05,
"loss": 0.0849,
"step": 4870
},
{
"epoch": 9.64,
"learning_rate": 1.8108651911468815e-05,
"loss": 0.1216,
"step": 4880
},
{
"epoch": 9.66,
"learning_rate": 1.710261569416499e-05,
"loss": 0.1016,
"step": 4890
},
{
"epoch": 9.68,
"learning_rate": 1.609657947686117e-05,
"loss": 0.0949,
"step": 4900
},
{
"epoch": 9.68,
"eval_loss": 0.9819391965866089,
"eval_rouge1": 0.1911111111111111,
"eval_rouge2": 0.09772727272727272,
"eval_rougeL": 0.19615384615384612,
"eval_rougeLsum": 0.1923076923076923,
"eval_runtime": 87.241,
"eval_samples_per_second": 0.229,
"eval_steps_per_second": 0.229,
"step": 4900
},
{
"epoch": 9.7,
"learning_rate": 1.5090543259557344e-05,
"loss": 0.0692,
"step": 4910
},
{
"epoch": 9.72,
"learning_rate": 1.4084507042253522e-05,
"loss": 0.0653,
"step": 4920
},
{
"epoch": 9.74,
"learning_rate": 1.3078470824949699e-05,
"loss": 0.0797,
"step": 4930
},
{
"epoch": 9.76,
"learning_rate": 1.2072434607645874e-05,
"loss": 0.0905,
"step": 4940
},
{
"epoch": 9.78,
"learning_rate": 1.1066398390342053e-05,
"loss": 0.0868,
"step": 4950
},
{
"epoch": 9.8,
"learning_rate": 1.006036217303823e-05,
"loss": 0.0964,
"step": 4960
},
{
"epoch": 9.82,
"learning_rate": 9.054325955734407e-06,
"loss": 0.0913,
"step": 4970
},
{
"epoch": 9.84,
"learning_rate": 8.048289738430584e-06,
"loss": 0.0708,
"step": 4980
},
{
"epoch": 9.86,
"learning_rate": 7.042253521126761e-06,
"loss": 0.1102,
"step": 4990
},
{
"epoch": 9.88,
"learning_rate": 6.036217303822937e-06,
"loss": 0.0852,
"step": 5000
},
{
"epoch": 9.88,
"eval_loss": 0.9852367639541626,
"eval_rouge1": 0.12615384615384617,
"eval_rouge2": 0.08636363636363635,
"eval_rougeL": 0.13615384615384613,
"eval_rougeLsum": 0.12615384615384617,
"eval_runtime": 87.473,
"eval_samples_per_second": 0.229,
"eval_steps_per_second": 0.229,
"step": 5000
}
],
"max_steps": 5060,
"num_train_epochs": 10,
"total_flos": 1.2175308798022656e+17,
"trial_name": null,
"trial_params": null
}