pubmed-summarisation-pegasus / trainer_state.json
root
updated by abhijithneilabraham
9d18855
raw history blame
No virus
29.1 kB
{
"best_metric": 1.4883581399917603,
"best_model_checkpoint": "ccdv_pegasus_xsum_summarization/checkpoint-13500",
"epoch": 2.0012007204322595,
"global_step": 30000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 4.944411091099104e-05,
"loss": 1.3482,
"step": 500
},
{
"epoch": 0.03,
"eval_gen_len": 190.7041,
"eval_loss": 1.5671061277389526,
"eval_rouge1": 43.9725,
"eval_rouge2": 20.8852,
"eval_rougeL": 29.6036,
"eval_rougeLsum": 39.2595,
"eval_runtime": 10745.9844,
"eval_samples_per_second": 0.617,
"eval_steps_per_second": 0.077,
"step": 500
},
{
"epoch": 0.07,
"learning_rate": 4.8888221821982085e-05,
"loss": 1.0335,
"step": 1000
},
{
"epoch": 0.07,
"eval_gen_len": 159.8545,
"eval_loss": 1.5465657711029053,
"eval_rouge1": 44.9236,
"eval_rouge2": 21.1853,
"eval_rougeL": 30.4447,
"eval_rougeLsum": 39.9918,
"eval_runtime": 10231.1208,
"eval_samples_per_second": 0.648,
"eval_steps_per_second": 0.081,
"step": 1000
},
{
"epoch": 0.1,
"learning_rate": 4.8332332732973116e-05,
"loss": 1.0184,
"step": 1500
},
{
"epoch": 0.1,
"eval_gen_len": 143.5453,
"eval_loss": 1.5334348678588867,
"eval_rouge1": 44.9483,
"eval_rouge2": 20.9962,
"eval_rougeL": 30.5328,
"eval_rougeLsum": 40.0531,
"eval_runtime": 8769.3615,
"eval_samples_per_second": 0.756,
"eval_steps_per_second": 0.095,
"step": 1500
},
{
"epoch": 0.13,
"learning_rate": 4.777644364396416e-05,
"loss": 1.0015,
"step": 2000
},
{
"epoch": 0.13,
"eval_gen_len": 137.856,
"eval_loss": 1.529853343963623,
"eval_rouge1": 45.9034,
"eval_rouge2": 21.784,
"eval_rougeL": 31.4025,
"eval_rougeLsum": 40.8983,
"eval_runtime": 7582.4229,
"eval_samples_per_second": 0.875,
"eval_steps_per_second": 0.109,
"step": 2000
},
{
"epoch": 0.17,
"learning_rate": 4.72205545549552e-05,
"loss": 1.0101,
"step": 2500
},
{
"epoch": 0.17,
"eval_gen_len": 134.5485,
"eval_loss": 1.5291049480438232,
"eval_rouge1": 45.6738,
"eval_rouge2": 21.5853,
"eval_rougeL": 31.1439,
"eval_rougeLsum": 40.7442,
"eval_runtime": 6945.0608,
"eval_samples_per_second": 0.955,
"eval_steps_per_second": 0.12,
"step": 2500
},
{
"epoch": 0.2,
"learning_rate": 4.6664665465946236e-05,
"loss": 0.9973,
"step": 3000
},
{
"epoch": 0.2,
"eval_gen_len": 131.2587,
"eval_loss": 1.523977518081665,
"eval_rouge1": 45.5052,
"eval_rouge2": 21.4202,
"eval_rougeL": 31.1499,
"eval_rougeLsum": 40.5736,
"eval_runtime": 6458.4712,
"eval_samples_per_second": 1.027,
"eval_steps_per_second": 0.129,
"step": 3000
},
{
"epoch": 0.23,
"learning_rate": 4.6108776376937274e-05,
"loss": 0.9855,
"step": 3500
},
{
"epoch": 0.23,
"eval_gen_len": 131.0582,
"eval_loss": 1.5234577655792236,
"eval_rouge1": 45.8336,
"eval_rouge2": 21.7072,
"eval_rougeL": 31.439,
"eval_rougeLsum": 40.9387,
"eval_runtime": 6345.3859,
"eval_samples_per_second": 1.045,
"eval_steps_per_second": 0.131,
"step": 3500
},
{
"epoch": 0.27,
"learning_rate": 4.555288728792831e-05,
"loss": 0.9868,
"step": 4000
},
{
"epoch": 0.27,
"eval_gen_len": 127.9753,
"eval_loss": 1.5183237791061401,
"eval_rouge1": 45.6348,
"eval_rouge2": 21.5462,
"eval_rougeL": 31.3009,
"eval_rougeLsum": 40.6469,
"eval_runtime": 6091.2782,
"eval_samples_per_second": 1.089,
"eval_steps_per_second": 0.136,
"step": 4000
},
{
"epoch": 0.3,
"learning_rate": 4.4996998198919356e-05,
"loss": 0.9802,
"step": 4500
},
{
"epoch": 0.3,
"eval_gen_len": 127.78,
"eval_loss": 1.5132805109024048,
"eval_rouge1": 45.4357,
"eval_rouge2": 21.3339,
"eval_rougeL": 31.1304,
"eval_rougeLsum": 40.531,
"eval_runtime": 5970.8563,
"eval_samples_per_second": 1.111,
"eval_steps_per_second": 0.139,
"step": 4500
},
{
"epoch": 0.33,
"learning_rate": 4.4441109109910394e-05,
"loss": 0.9743,
"step": 5000
},
{
"epoch": 0.33,
"eval_gen_len": 126.9619,
"eval_loss": 1.5101301670074463,
"eval_rouge1": 45.4845,
"eval_rouge2": 21.4302,
"eval_rougeL": 31.2033,
"eval_rougeLsum": 40.5934,
"eval_runtime": 5820.9525,
"eval_samples_per_second": 1.14,
"eval_steps_per_second": 0.143,
"step": 5000
},
{
"epoch": 0.37,
"learning_rate": 4.388522002090143e-05,
"loss": 0.972,
"step": 5500
},
{
"epoch": 0.37,
"eval_gen_len": 127.1796,
"eval_loss": 1.5053614377975464,
"eval_rouge1": 45.196,
"eval_rouge2": 21.1882,
"eval_rougeL": 30.9407,
"eval_rougeLsum": 40.2648,
"eval_runtime": 5768.324,
"eval_samples_per_second": 1.15,
"eval_steps_per_second": 0.144,
"step": 5500
},
{
"epoch": 0.4,
"learning_rate": 4.332933093189247e-05,
"loss": 0.9651,
"step": 6000
},
{
"epoch": 0.4,
"eval_gen_len": 126.9254,
"eval_loss": 1.5030862092971802,
"eval_rouge1": 45.4822,
"eval_rouge2": 21.4363,
"eval_rougeL": 31.1422,
"eval_rougeLsum": 40.5397,
"eval_runtime": 5665.8916,
"eval_samples_per_second": 1.171,
"eval_steps_per_second": 0.146,
"step": 6000
},
{
"epoch": 0.43,
"learning_rate": 4.277344184288351e-05,
"loss": 0.9758,
"step": 6500
},
{
"epoch": 0.43,
"eval_gen_len": 126.4933,
"eval_loss": 1.495548963546753,
"eval_rouge1": 45.299,
"eval_rouge2": 21.346,
"eval_rougeL": 31.0361,
"eval_rougeLsum": 40.3325,
"eval_runtime": 5589.2093,
"eval_samples_per_second": 1.187,
"eval_steps_per_second": 0.149,
"step": 6500
},
{
"epoch": 0.47,
"learning_rate": 4.221755275387455e-05,
"loss": 0.9652,
"step": 7000
},
{
"epoch": 0.47,
"eval_gen_len": 126.0859,
"eval_loss": 1.4975615739822388,
"eval_rouge1": 45.4694,
"eval_rouge2": 21.5044,
"eval_rougeL": 31.1786,
"eval_rougeLsum": 40.5032,
"eval_runtime": 5569.8623,
"eval_samples_per_second": 1.191,
"eval_steps_per_second": 0.149,
"step": 7000
},
{
"epoch": 0.5,
"learning_rate": 4.166166366486558e-05,
"loss": 0.9601,
"step": 7500
},
{
"epoch": 0.5,
"eval_gen_len": 126.8815,
"eval_loss": 1.4945002794265747,
"eval_rouge1": 45.1971,
"eval_rouge2": 21.2682,
"eval_rougeL": 30.9321,
"eval_rougeLsum": 40.2959,
"eval_runtime": 5557.7856,
"eval_samples_per_second": 1.193,
"eval_steps_per_second": 0.149,
"step": 7500
},
{
"epoch": 0.53,
"learning_rate": 4.110577457585663e-05,
"loss": 0.9502,
"step": 8000
},
{
"epoch": 0.53,
"eval_gen_len": 126.4628,
"eval_loss": 1.49406898021698,
"eval_rouge1": 45.5653,
"eval_rouge2": 21.5655,
"eval_rougeL": 31.2703,
"eval_rougeLsum": 40.5622,
"eval_runtime": 5535.3927,
"eval_samples_per_second": 1.198,
"eval_steps_per_second": 0.15,
"step": 8000
},
{
"epoch": 0.57,
"learning_rate": 4.0549885486847665e-05,
"loss": 0.9537,
"step": 8500
},
{
"epoch": 0.57,
"eval_gen_len": 126.5709,
"eval_loss": 1.4941043853759766,
"eval_rouge1": 45.2806,
"eval_rouge2": 21.2587,
"eval_rougeL": 30.93,
"eval_rougeLsum": 40.354,
"eval_runtime": 5533.5879,
"eval_samples_per_second": 1.199,
"eval_steps_per_second": 0.15,
"step": 8500
},
{
"epoch": 0.6,
"learning_rate": 3.999399639783871e-05,
"loss": 0.9629,
"step": 9000
},
{
"epoch": 0.6,
"eval_gen_len": 126.547,
"eval_loss": 1.4939745664596558,
"eval_rouge1": 45.2474,
"eval_rouge2": 21.275,
"eval_rougeL": 30.9302,
"eval_rougeLsum": 40.3377,
"eval_runtime": 5530.7272,
"eval_samples_per_second": 1.199,
"eval_steps_per_second": 0.15,
"step": 9000
},
{
"epoch": 0.63,
"learning_rate": 3.943810730882974e-05,
"loss": 0.9528,
"step": 9500
},
{
"epoch": 0.63,
"eval_gen_len": 126.768,
"eval_loss": 1.4947481155395508,
"eval_rouge1": 45.3619,
"eval_rouge2": 21.3754,
"eval_rougeL": 31.0723,
"eval_rougeLsum": 40.4162,
"eval_runtime": 5524.5717,
"eval_samples_per_second": 1.201,
"eval_steps_per_second": 0.15,
"step": 9500
},
{
"epoch": 0.67,
"learning_rate": 3.888221821982078e-05,
"loss": 0.9532,
"step": 10000
},
{
"epoch": 0.67,
"eval_gen_len": 126.5323,
"eval_loss": 1.4923893213272095,
"eval_rouge1": 45.5763,
"eval_rouge2": 21.6469,
"eval_rougeL": 31.2585,
"eval_rougeLsum": 40.5722,
"eval_runtime": 5518.2912,
"eval_samples_per_second": 1.202,
"eval_steps_per_second": 0.15,
"step": 10000
},
{
"epoch": 0.7,
"learning_rate": 3.832632913081182e-05,
"loss": 0.945,
"step": 10500
},
{
"epoch": 0.7,
"eval_gen_len": 126.69,
"eval_loss": 1.4898710250854492,
"eval_rouge1": 45.2629,
"eval_rouge2": 21.3471,
"eval_rougeL": 31.0405,
"eval_rougeLsum": 40.3211,
"eval_runtime": 6184.8714,
"eval_samples_per_second": 1.072,
"eval_steps_per_second": 0.134,
"step": 10500
},
{
"epoch": 0.73,
"learning_rate": 3.777044004180286e-05,
"loss": 0.9464,
"step": 11000
},
{
"epoch": 0.73,
"eval_gen_len": 126.9052,
"eval_loss": 1.489205002784729,
"eval_rouge1": 45.3769,
"eval_rouge2": 21.3457,
"eval_rougeL": 30.9968,
"eval_rougeLsum": 40.388,
"eval_runtime": 5520.5499,
"eval_samples_per_second": 1.202,
"eval_steps_per_second": 0.15,
"step": 11000
},
{
"epoch": 0.77,
"learning_rate": 3.7214550952793906e-05,
"loss": 0.9544,
"step": 11500
},
{
"epoch": 0.77,
"eval_gen_len": 126.5739,
"eval_loss": 1.4892535209655762,
"eval_rouge1": 45.411,
"eval_rouge2": 21.3852,
"eval_rougeL": 31.0295,
"eval_rougeLsum": 40.4881,
"eval_runtime": 5521.4271,
"eval_samples_per_second": 1.201,
"eval_steps_per_second": 0.15,
"step": 11500
},
{
"epoch": 0.8,
"learning_rate": 3.6658661863784937e-05,
"loss": 0.9467,
"step": 12000
},
{
"epoch": 0.8,
"eval_gen_len": 126.7315,
"eval_loss": 1.4929231405258179,
"eval_rouge1": 45.4345,
"eval_rouge2": 21.4378,
"eval_rougeL": 31.1163,
"eval_rougeLsum": 40.4393,
"eval_runtime": 5524.2145,
"eval_samples_per_second": 1.201,
"eval_steps_per_second": 0.15,
"step": 12000
},
{
"epoch": 0.83,
"learning_rate": 3.610277277477598e-05,
"loss": 0.9517,
"step": 12500
},
{
"epoch": 0.83,
"eval_gen_len": 126.58,
"eval_loss": 1.4917516708374023,
"eval_rouge1": 45.3614,
"eval_rouge2": 21.3396,
"eval_rougeL": 30.9925,
"eval_rougeLsum": 40.3636,
"eval_runtime": 5514.201,
"eval_samples_per_second": 1.203,
"eval_steps_per_second": 0.151,
"step": 12500
},
{
"epoch": 0.87,
"learning_rate": 3.554688368576702e-05,
"loss": 0.9497,
"step": 13000
},
{
"epoch": 0.87,
"eval_gen_len": 126.7977,
"eval_loss": 1.4918133020401,
"eval_rouge1": 45.2485,
"eval_rouge2": 21.2367,
"eval_rougeL": 30.9282,
"eval_rougeLsum": 40.3438,
"eval_runtime": 6509.3818,
"eval_samples_per_second": 1.019,
"eval_steps_per_second": 0.128,
"step": 13000
},
{
"epoch": 0.9,
"learning_rate": 3.499099459675806e-05,
"loss": 0.9386,
"step": 13500
},
{
"epoch": 0.9,
"eval_gen_len": 126.7524,
"eval_loss": 1.4883581399917603,
"eval_rouge1": 45.5038,
"eval_rouge2": 21.5064,
"eval_rougeL": 31.2132,
"eval_rougeLsum": 40.5696,
"eval_runtime": 5529.57,
"eval_samples_per_second": 1.2,
"eval_steps_per_second": 0.15,
"step": 13500
},
{
"epoch": 0.93,
"learning_rate": 3.4435105507749095e-05,
"loss": 0.9473,
"step": 14000
},
{
"epoch": 0.93,
"eval_gen_len": 126.6534,
"eval_loss": 1.4918317794799805,
"eval_rouge1": 45.2367,
"eval_rouge2": 21.2615,
"eval_rougeL": 30.9179,
"eval_rougeLsum": 40.2548,
"eval_runtime": 5515.735,
"eval_samples_per_second": 1.203,
"eval_steps_per_second": 0.15,
"step": 14000
},
{
"epoch": 0.97,
"learning_rate": 3.387921641874013e-05,
"loss": 0.9235,
"step": 14500
},
{
"epoch": 0.97,
"eval_gen_len": 126.5972,
"eval_loss": 1.4897193908691406,
"eval_rouge1": 45.8027,
"eval_rouge2": 21.7228,
"eval_rougeL": 31.3946,
"eval_rougeLsum": 40.764,
"eval_runtime": 5518.2889,
"eval_samples_per_second": 1.202,
"eval_steps_per_second": 0.15,
"step": 14500
},
{
"epoch": 1.0,
"learning_rate": 3.332332732973118e-05,
"loss": 0.9344,
"step": 15000
},
{
"epoch": 1.0,
"eval_gen_len": 126.9212,
"eval_loss": 1.4973394870758057,
"eval_rouge1": 44.8773,
"eval_rouge2": 20.9475,
"eval_rougeL": 30.5827,
"eval_rougeLsum": 39.9489,
"eval_runtime": 5510.2549,
"eval_samples_per_second": 1.204,
"eval_steps_per_second": 0.151,
"step": 15000
},
{
"epoch": 1.03,
"learning_rate": 3.2767438240722215e-05,
"loss": 0.9139,
"step": 15500
},
{
"epoch": 1.03,
"eval_gen_len": 126.692,
"eval_loss": 1.5064738988876343,
"eval_rouge1": 45.4207,
"eval_rouge2": 21.3856,
"eval_rougeL": 31.0837,
"eval_rougeLsum": 40.4414,
"eval_runtime": 5526.017,
"eval_samples_per_second": 1.2,
"eval_steps_per_second": 0.15,
"step": 15500
},
{
"epoch": 1.07,
"learning_rate": 3.221154915171325e-05,
"loss": 0.8939,
"step": 16000
},
{
"epoch": 1.07,
"eval_gen_len": 126.5179,
"eval_loss": 1.508902668952942,
"eval_rouge1": 45.5575,
"eval_rouge2": 21.5153,
"eval_rougeL": 31.2115,
"eval_rougeLsum": 40.5517,
"eval_runtime": 5698.8075,
"eval_samples_per_second": 1.164,
"eval_steps_per_second": 0.146,
"step": 16000
},
{
"epoch": 1.1,
"learning_rate": 3.165566006270429e-05,
"loss": 0.8968,
"step": 16500
},
{
"epoch": 1.1,
"eval_gen_len": 126.5447,
"eval_loss": 1.5106098651885986,
"eval_rouge1": 45.4574,
"eval_rouge2": 21.4786,
"eval_rougeL": 31.1065,
"eval_rougeLsum": 40.495,
"eval_runtime": 5606.9538,
"eval_samples_per_second": 1.183,
"eval_steps_per_second": 0.148,
"step": 16500
},
{
"epoch": 1.13,
"learning_rate": 3.109977097369533e-05,
"loss": 0.8999,
"step": 17000
},
{
"epoch": 1.13,
"eval_gen_len": 126.6894,
"eval_loss": 1.5100876092910767,
"eval_rouge1": 45.4805,
"eval_rouge2": 21.4579,
"eval_rougeL": 31.1062,
"eval_rougeLsum": 40.5138,
"eval_runtime": 5594.355,
"eval_samples_per_second": 1.186,
"eval_steps_per_second": 0.148,
"step": 17000
},
{
"epoch": 1.17,
"learning_rate": 3.054388188468637e-05,
"loss": 0.903,
"step": 17500
},
{
"epoch": 1.17,
"eval_gen_len": 126.5988,
"eval_loss": 1.5103389024734497,
"eval_rouge1": 45.495,
"eval_rouge2": 21.4395,
"eval_rougeL": 31.1445,
"eval_rougeLsum": 40.4949,
"eval_runtime": 5586.6059,
"eval_samples_per_second": 1.187,
"eval_steps_per_second": 0.149,
"step": 17500
},
{
"epoch": 1.2,
"learning_rate": 2.9987992795677407e-05,
"loss": 0.8988,
"step": 18000
},
{
"epoch": 1.2,
"eval_gen_len": 126.5643,
"eval_loss": 1.5120760202407837,
"eval_rouge1": 45.2764,
"eval_rouge2": 21.2652,
"eval_rougeL": 30.944,
"eval_rougeLsum": 40.3249,
"eval_runtime": 5558.8098,
"eval_samples_per_second": 1.193,
"eval_steps_per_second": 0.149,
"step": 18000
},
{
"epoch": 1.23,
"learning_rate": 2.9432103706668445e-05,
"loss": 0.9027,
"step": 18500
},
{
"epoch": 1.23,
"eval_gen_len": 126.8441,
"eval_loss": 1.5092076063156128,
"eval_rouge1": 45.4884,
"eval_rouge2": 21.4334,
"eval_rougeL": 31.0499,
"eval_rougeLsum": 40.4796,
"eval_runtime": 5536.9856,
"eval_samples_per_second": 1.198,
"eval_steps_per_second": 0.15,
"step": 18500
},
{
"epoch": 1.27,
"learning_rate": 2.8876214617659486e-05,
"loss": 0.9044,
"step": 19000
},
{
"epoch": 1.27,
"eval_gen_len": 126.8737,
"eval_loss": 1.5079020261764526,
"eval_rouge1": 45.5708,
"eval_rouge2": 21.5358,
"eval_rougeL": 31.1862,
"eval_rougeLsum": 40.594,
"eval_runtime": 5524.867,
"eval_samples_per_second": 1.201,
"eval_steps_per_second": 0.15,
"step": 19000
},
{
"epoch": 1.3,
"learning_rate": 2.8320325528650527e-05,
"loss": 0.906,
"step": 19500
},
{
"epoch": 1.3,
"eval_gen_len": 126.8627,
"eval_loss": 1.5116254091262817,
"eval_rouge1": 45.4542,
"eval_rouge2": 21.4172,
"eval_rougeL": 31.0754,
"eval_rougeLsum": 40.439,
"eval_runtime": 5524.341,
"eval_samples_per_second": 1.201,
"eval_steps_per_second": 0.15,
"step": 19500
},
{
"epoch": 1.33,
"learning_rate": 2.776443643964157e-05,
"loss": 0.8994,
"step": 20000
},
{
"epoch": 1.33,
"eval_gen_len": 126.8206,
"eval_loss": 1.5085355043411255,
"eval_rouge1": 45.5424,
"eval_rouge2": 21.5009,
"eval_rougeL": 31.1428,
"eval_rougeLsum": 40.5667,
"eval_runtime": 5528.1375,
"eval_samples_per_second": 1.2,
"eval_steps_per_second": 0.15,
"step": 20000
},
{
"epoch": 1.37,
"learning_rate": 2.7208547350632603e-05,
"loss": 0.9088,
"step": 20500
},
{
"epoch": 1.37,
"eval_gen_len": 126.7414,
"eval_loss": 1.5124515295028687,
"eval_rouge1": 45.3129,
"eval_rouge2": 21.2629,
"eval_rougeL": 30.9461,
"eval_rougeLsum": 40.3271,
"eval_runtime": 5534.3419,
"eval_samples_per_second": 1.199,
"eval_steps_per_second": 0.15,
"step": 20500
},
{
"epoch": 1.4,
"learning_rate": 2.665265826162364e-05,
"loss": 0.8983,
"step": 21000
},
{
"epoch": 1.4,
"eval_gen_len": 126.357,
"eval_loss": 1.5135449171066284,
"eval_rouge1": 45.6846,
"eval_rouge2": 21.6282,
"eval_rougeL": 31.2929,
"eval_rougeLsum": 40.6821,
"eval_runtime": 5538.2932,
"eval_samples_per_second": 1.198,
"eval_steps_per_second": 0.15,
"step": 21000
},
{
"epoch": 1.43,
"learning_rate": 2.6096769172614682e-05,
"loss": 0.907,
"step": 21500
},
{
"epoch": 1.43,
"eval_gen_len": 127.0029,
"eval_loss": 1.5076923370361328,
"eval_rouge1": 45.4873,
"eval_rouge2": 21.455,
"eval_rougeL": 31.1193,
"eval_rougeLsum": 40.5128,
"eval_runtime": 5539.9922,
"eval_samples_per_second": 1.197,
"eval_steps_per_second": 0.15,
"step": 21500
},
{
"epoch": 1.47,
"learning_rate": 2.5540880083605723e-05,
"loss": 0.9097,
"step": 22000
},
{
"epoch": 1.47,
"eval_gen_len": 126.8553,
"eval_loss": 1.5052434206008911,
"eval_rouge1": 45.5988,
"eval_rouge2": 21.6134,
"eval_rougeL": 31.247,
"eval_rougeLsum": 40.58,
"eval_runtime": 5539.7468,
"eval_samples_per_second": 1.197,
"eval_steps_per_second": 0.15,
"step": 22000
},
{
"epoch": 1.5,
"learning_rate": 2.498499099459676e-05,
"loss": 0.9033,
"step": 22500
},
{
"epoch": 1.5,
"eval_gen_len": 127.0048,
"eval_loss": 1.5133850574493408,
"eval_rouge1": 45.3223,
"eval_rouge2": 21.2968,
"eval_rougeL": 30.9357,
"eval_rougeLsum": 40.3813,
"eval_runtime": 9404.5,
"eval_samples_per_second": 0.705,
"eval_steps_per_second": 0.088,
"step": 22500
},
{
"epoch": 1.53,
"learning_rate": 2.44291019055878e-05,
"loss": 0.8925,
"step": 23000
},
{
"epoch": 1.53,
"eval_gen_len": 126.7316,
"eval_loss": 1.510839819908142,
"eval_rouge1": 45.6747,
"eval_rouge2": 21.6374,
"eval_rougeL": 31.31,
"eval_rougeLsum": 40.7015,
"eval_runtime": 22732.2519,
"eval_samples_per_second": 0.292,
"eval_steps_per_second": 0.037,
"step": 23000
},
{
"epoch": 1.57,
"learning_rate": 2.387321281657884e-05,
"loss": 0.8913,
"step": 23500
},
{
"epoch": 1.57,
"eval_gen_len": 126.6869,
"eval_loss": 1.5129714012145996,
"eval_rouge1": 45.6531,
"eval_rouge2": 21.6354,
"eval_rougeL": 31.2956,
"eval_rougeLsum": 40.6555,
"eval_runtime": 6945.8776,
"eval_samples_per_second": 0.955,
"eval_steps_per_second": 0.119,
"step": 23500
},
{
"epoch": 1.6,
"learning_rate": 2.3317323727569874e-05,
"loss": 0.8931,
"step": 24000
},
{
"epoch": 1.6,
"eval_gen_len": 126.4862,
"eval_loss": 1.5111068487167358,
"eval_rouge1": 45.7876,
"eval_rouge2": 21.7115,
"eval_rougeL": 31.3274,
"eval_rougeLsum": 40.7579,
"eval_runtime": 5539.5619,
"eval_samples_per_second": 1.197,
"eval_steps_per_second": 0.15,
"step": 24000
},
{
"epoch": 1.63,
"learning_rate": 2.2761434638560915e-05,
"loss": 0.9009,
"step": 24500
},
{
"epoch": 1.63,
"eval_gen_len": 126.6229,
"eval_loss": 1.5083845853805542,
"eval_rouge1": 45.6359,
"eval_rouge2": 21.583,
"eval_rougeL": 31.2775,
"eval_rougeLsum": 40.6351,
"eval_runtime": 5545.0209,
"eval_samples_per_second": 1.196,
"eval_steps_per_second": 0.15,
"step": 24500
},
{
"epoch": 1.67,
"learning_rate": 2.2205545549551953e-05,
"loss": 0.8925,
"step": 25000
},
{
"epoch": 1.67,
"eval_gen_len": 126.8396,
"eval_loss": 1.5094473361968994,
"eval_rouge1": 45.397,
"eval_rouge2": 21.4266,
"eval_rougeL": 31.082,
"eval_rougeLsum": 40.4261,
"eval_runtime": 5534.6802,
"eval_samples_per_second": 1.198,
"eval_steps_per_second": 0.15,
"step": 25000
},
{
"epoch": 1.7,
"learning_rate": 2.1649656460542994e-05,
"loss": 0.8991,
"step": 25500
},
{
"epoch": 1.7,
"eval_gen_len": 126.722,
"eval_loss": 1.512014627456665,
"eval_rouge1": 45.2851,
"eval_rouge2": 21.2798,
"eval_rougeL": 30.8973,
"eval_rougeLsum": 40.2787,
"eval_runtime": 5538.1327,
"eval_samples_per_second": 1.198,
"eval_steps_per_second": 0.15,
"step": 25500
},
{
"epoch": 1.73,
"learning_rate": 2.1093767371534032e-05,
"loss": 0.9019,
"step": 26000
},
{
"epoch": 1.73,
"eval_gen_len": 126.8048,
"eval_loss": 1.510252833366394,
"eval_rouge1": 45.2905,
"eval_rouge2": 21.2992,
"eval_rougeL": 30.9204,
"eval_rougeLsum": 40.3262,
"eval_runtime": 5535.5354,
"eval_samples_per_second": 1.198,
"eval_steps_per_second": 0.15,
"step": 26000
},
{
"epoch": 1.77,
"learning_rate": 2.0537878282525073e-05,
"loss": 0.891,
"step": 26500
},
{
"epoch": 1.77,
"eval_gen_len": 126.2902,
"eval_loss": 1.5112383365631104,
"eval_rouge1": 45.7091,
"eval_rouge2": 21.6159,
"eval_rougeL": 31.2889,
"eval_rougeLsum": 40.6986,
"eval_runtime": 5537.5343,
"eval_samples_per_second": 1.198,
"eval_steps_per_second": 0.15,
"step": 26500
},
{
"epoch": 1.8,
"learning_rate": 1.998198919351611e-05,
"loss": 0.898,
"step": 27000
},
{
"epoch": 1.8,
"eval_gen_len": 126.5218,
"eval_loss": 1.5084278583526611,
"eval_rouge1": 45.4964,
"eval_rouge2": 21.4702,
"eval_rougeL": 31.177,
"eval_rougeLsum": 40.5432,
"eval_runtime": 5530.5865,
"eval_samples_per_second": 1.199,
"eval_steps_per_second": 0.15,
"step": 27000
},
{
"epoch": 1.83,
"learning_rate": 1.942610010450715e-05,
"loss": 0.8839,
"step": 27500
},
{
"epoch": 1.83,
"eval_gen_len": 126.8648,
"eval_loss": 1.5090144872665405,
"eval_rouge1": 45.6279,
"eval_rouge2": 21.5346,
"eval_rougeL": 31.252,
"eval_rougeLsum": 40.6096,
"eval_runtime": 5522.5033,
"eval_samples_per_second": 1.201,
"eval_steps_per_second": 0.15,
"step": 27500
},
{
"epoch": 1.87,
"learning_rate": 1.8870211015498187e-05,
"loss": 0.8899,
"step": 28000
},
{
"epoch": 1.87,
"eval_gen_len": 126.8498,
"eval_loss": 1.5073039531707764,
"eval_rouge1": 45.6406,
"eval_rouge2": 21.5551,
"eval_rougeL": 31.2519,
"eval_rougeLsum": 40.6425,
"eval_runtime": 5520.5026,
"eval_samples_per_second": 1.202,
"eval_steps_per_second": 0.15,
"step": 28000
},
{
"epoch": 1.9,
"learning_rate": 1.8314321926489228e-05,
"loss": 0.8904,
"step": 28500
},
{
"epoch": 1.9,
"eval_gen_len": 126.689,
"eval_loss": 1.5086652040481567,
"eval_rouge1": 45.7334,
"eval_rouge2": 21.7071,
"eval_rougeL": 31.3069,
"eval_rougeLsum": 40.6992,
"eval_runtime": 5519.0861,
"eval_samples_per_second": 1.202,
"eval_steps_per_second": 0.15,
"step": 28500
},
{
"epoch": 1.93,
"learning_rate": 1.7758432837480266e-05,
"loss": 0.8958,
"step": 29000
},
{
"epoch": 1.93,
"eval_gen_len": 126.9157,
"eval_loss": 1.5112992525100708,
"eval_rouge1": 45.4618,
"eval_rouge2": 21.4623,
"eval_rougeL": 31.0914,
"eval_rougeLsum": 40.4648,
"eval_runtime": 5520.0088,
"eval_samples_per_second": 1.202,
"eval_steps_per_second": 0.15,
"step": 29000
},
{
"epoch": 1.97,
"learning_rate": 1.7202543748471307e-05,
"loss": 0.8991,
"step": 29500
},
{
"epoch": 1.97,
"eval_gen_len": 126.7855,
"eval_loss": 1.5126971006393433,
"eval_rouge1": 45.6364,
"eval_rouge2": 21.5467,
"eval_rougeL": 31.2001,
"eval_rougeLsum": 40.5946,
"eval_runtime": 5532.4138,
"eval_samples_per_second": 1.199,
"eval_steps_per_second": 0.15,
"step": 29500
},
{
"epoch": 2.0,
"learning_rate": 1.6646654659462345e-05,
"loss": 0.889,
"step": 30000
},
{
"epoch": 2.0,
"eval_gen_len": 126.6989,
"eval_loss": 1.5128982067108154,
"eval_rouge1": 45.3668,
"eval_rouge2": 21.3563,
"eval_rougeL": 30.998,
"eval_rougeLsum": 40.3714,
"eval_runtime": 5736.9272,
"eval_samples_per_second": 1.156,
"eval_steps_per_second": 0.145,
"step": 30000
}
],
"max_steps": 44973,
"num_train_epochs": 3,
"total_flos": 7.801293866564321e+17,
"trial_name": null,
"trial_params": null
}