chatcell-small / trainer_state.json
sadgaj's picture
Upload 13 files
00336ed verified
raw
history blame
249 kB
{
"best_metric": 42.0728,
"best_model_checkpoint": "/newdisk1/lkw/chatcell/t5_model/v22_small_50_0_100%/checkpoint-185000",
"epoch": 49.29942916450441,
"eval_steps": 5000,
"global_step": 190000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 3.997924234561495e-05,
"loss": 7.3867,
"step": 100
},
{
"epoch": 0.05,
"learning_rate": 3.995848469122989e-05,
"loss": 5.0855,
"step": 200
},
{
"epoch": 0.08,
"learning_rate": 3.993772703684484e-05,
"loss": 5.0196,
"step": 300
},
{
"epoch": 0.1,
"learning_rate": 3.991696938245979e-05,
"loss": 4.9903,
"step": 400
},
{
"epoch": 0.13,
"learning_rate": 3.9896211728074735e-05,
"loss": 4.9698,
"step": 500
},
{
"epoch": 0.16,
"learning_rate": 3.987545407368968e-05,
"loss": 4.9409,
"step": 600
},
{
"epoch": 0.18,
"learning_rate": 3.9854696419304625e-05,
"loss": 4.924,
"step": 700
},
{
"epoch": 0.21,
"learning_rate": 3.983393876491956e-05,
"loss": 4.9034,
"step": 800
},
{
"epoch": 0.23,
"learning_rate": 3.981318111053451e-05,
"loss": 4.8898,
"step": 900
},
{
"epoch": 0.26,
"learning_rate": 3.979242345614946e-05,
"loss": 4.8703,
"step": 1000
},
{
"epoch": 0.29,
"learning_rate": 3.9771665801764405e-05,
"loss": 4.8548,
"step": 1100
},
{
"epoch": 0.31,
"learning_rate": 3.975090814737935e-05,
"loss": 4.8347,
"step": 1200
},
{
"epoch": 0.34,
"learning_rate": 3.9730150492994295e-05,
"loss": 4.8244,
"step": 1300
},
{
"epoch": 0.36,
"learning_rate": 3.970939283860924e-05,
"loss": 4.8042,
"step": 1400
},
{
"epoch": 0.39,
"learning_rate": 3.9688635184224185e-05,
"loss": 4.7933,
"step": 1500
},
{
"epoch": 0.42,
"learning_rate": 3.966787752983913e-05,
"loss": 4.7756,
"step": 1600
},
{
"epoch": 0.44,
"learning_rate": 3.964711987545408e-05,
"loss": 4.7667,
"step": 1700
},
{
"epoch": 0.47,
"learning_rate": 3.9626362221069026e-05,
"loss": 4.7527,
"step": 1800
},
{
"epoch": 0.49,
"learning_rate": 3.960560456668397e-05,
"loss": 4.7391,
"step": 1900
},
{
"epoch": 0.52,
"learning_rate": 3.9584846912298916e-05,
"loss": 4.73,
"step": 2000
},
{
"epoch": 0.54,
"learning_rate": 3.956408925791386e-05,
"loss": 4.7012,
"step": 2100
},
{
"epoch": 0.57,
"learning_rate": 3.9543331603528806e-05,
"loss": 4.6942,
"step": 2200
},
{
"epoch": 0.6,
"learning_rate": 3.952257394914375e-05,
"loss": 4.6838,
"step": 2300
},
{
"epoch": 0.62,
"learning_rate": 3.9501816294758696e-05,
"loss": 4.669,
"step": 2400
},
{
"epoch": 0.65,
"learning_rate": 3.948105864037364e-05,
"loss": 4.6536,
"step": 2500
},
{
"epoch": 0.67,
"learning_rate": 3.9460300985988586e-05,
"loss": 4.6464,
"step": 2600
},
{
"epoch": 0.7,
"learning_rate": 3.943954333160353e-05,
"loss": 4.6331,
"step": 2700
},
{
"epoch": 0.73,
"learning_rate": 3.9418785677218476e-05,
"loss": 4.6069,
"step": 2800
},
{
"epoch": 0.75,
"learning_rate": 3.939802802283342e-05,
"loss": 4.6039,
"step": 2900
},
{
"epoch": 0.78,
"learning_rate": 3.937727036844837e-05,
"loss": 4.5902,
"step": 3000
},
{
"epoch": 0.8,
"learning_rate": 3.935651271406332e-05,
"loss": 4.5851,
"step": 3100
},
{
"epoch": 0.83,
"learning_rate": 3.933575505967826e-05,
"loss": 4.5493,
"step": 3200
},
{
"epoch": 0.86,
"learning_rate": 3.931499740529321e-05,
"loss": 4.5515,
"step": 3300
},
{
"epoch": 0.88,
"learning_rate": 3.929423975090815e-05,
"loss": 4.5391,
"step": 3400
},
{
"epoch": 0.91,
"learning_rate": 3.92734820965231e-05,
"loss": 4.5266,
"step": 3500
},
{
"epoch": 0.93,
"learning_rate": 3.925272444213804e-05,
"loss": 4.518,
"step": 3600
},
{
"epoch": 0.96,
"learning_rate": 3.923196678775299e-05,
"loss": 4.5006,
"step": 3700
},
{
"epoch": 0.99,
"learning_rate": 3.921120913336793e-05,
"loss": 4.4839,
"step": 3800
},
{
"epoch": 1.01,
"learning_rate": 3.919045147898288e-05,
"loss": 4.4753,
"step": 3900
},
{
"epoch": 1.04,
"learning_rate": 3.916969382459782e-05,
"loss": 4.4482,
"step": 4000
},
{
"epoch": 1.06,
"learning_rate": 3.914893617021277e-05,
"loss": 4.446,
"step": 4100
},
{
"epoch": 1.09,
"learning_rate": 3.912817851582771e-05,
"loss": 4.4486,
"step": 4200
},
{
"epoch": 1.12,
"learning_rate": 3.9107420861442664e-05,
"loss": 4.4225,
"step": 4300
},
{
"epoch": 1.14,
"learning_rate": 3.908666320705761e-05,
"loss": 4.4129,
"step": 4400
},
{
"epoch": 1.17,
"learning_rate": 3.9065905552672554e-05,
"loss": 4.401,
"step": 4500
},
{
"epoch": 1.19,
"learning_rate": 3.90451478982875e-05,
"loss": 4.3917,
"step": 4600
},
{
"epoch": 1.22,
"learning_rate": 3.9024390243902444e-05,
"loss": 4.372,
"step": 4700
},
{
"epoch": 1.25,
"learning_rate": 3.900363258951739e-05,
"loss": 4.3572,
"step": 4800
},
{
"epoch": 1.27,
"learning_rate": 3.8982874935132334e-05,
"loss": 4.35,
"step": 4900
},
{
"epoch": 1.3,
"learning_rate": 3.896211728074728e-05,
"loss": 4.3412,
"step": 5000
},
{
"epoch": 1.3,
"eval_gen_len": 12.5597,
"eval_loss": 4.288102149963379,
"eval_rouge1": 13.4415,
"eval_rouge2": 1.5683,
"eval_rougeL": 12.4048,
"eval_rougeLsum": 12.4255,
"eval_runtime": 81.4031,
"eval_samples_per_second": 47.345,
"eval_steps_per_second": 5.921,
"step": 5000
},
{
"epoch": 1.32,
"learning_rate": 3.8941359626362224e-05,
"loss": 4.3273,
"step": 5100
},
{
"epoch": 1.35,
"learning_rate": 3.892060197197717e-05,
"loss": 4.3152,
"step": 5200
},
{
"epoch": 1.38,
"learning_rate": 3.8899844317592114e-05,
"loss": 4.3043,
"step": 5300
},
{
"epoch": 1.4,
"learning_rate": 3.887908666320706e-05,
"loss": 4.287,
"step": 5400
},
{
"epoch": 1.43,
"learning_rate": 3.8858329008822004e-05,
"loss": 4.2814,
"step": 5500
},
{
"epoch": 1.45,
"learning_rate": 3.8837571354436955e-05,
"loss": 4.267,
"step": 5600
},
{
"epoch": 1.48,
"learning_rate": 3.88168137000519e-05,
"loss": 4.2597,
"step": 5700
},
{
"epoch": 1.5,
"learning_rate": 3.8796056045666845e-05,
"loss": 4.2513,
"step": 5800
},
{
"epoch": 1.53,
"learning_rate": 3.877529839128179e-05,
"loss": 4.2313,
"step": 5900
},
{
"epoch": 1.56,
"learning_rate": 3.8754540736896735e-05,
"loss": 4.2101,
"step": 6000
},
{
"epoch": 1.58,
"learning_rate": 3.873378308251168e-05,
"loss": 4.2142,
"step": 6100
},
{
"epoch": 1.61,
"learning_rate": 3.8713025428126625e-05,
"loss": 4.2015,
"step": 6200
},
{
"epoch": 1.63,
"learning_rate": 3.869226777374157e-05,
"loss": 4.1821,
"step": 6300
},
{
"epoch": 1.66,
"learning_rate": 3.8671510119356515e-05,
"loss": 4.1763,
"step": 6400
},
{
"epoch": 1.69,
"learning_rate": 3.865075246497146e-05,
"loss": 4.1673,
"step": 6500
},
{
"epoch": 1.71,
"learning_rate": 3.8629994810586405e-05,
"loss": 4.1591,
"step": 6600
},
{
"epoch": 1.74,
"learning_rate": 3.860923715620135e-05,
"loss": 4.1596,
"step": 6700
},
{
"epoch": 1.76,
"learning_rate": 3.8588479501816295e-05,
"loss": 4.1356,
"step": 6800
},
{
"epoch": 1.79,
"learning_rate": 3.856772184743125e-05,
"loss": 4.1433,
"step": 6900
},
{
"epoch": 1.82,
"learning_rate": 3.854696419304619e-05,
"loss": 4.1157,
"step": 7000
},
{
"epoch": 1.84,
"learning_rate": 3.852620653866114e-05,
"loss": 4.0836,
"step": 7100
},
{
"epoch": 1.87,
"learning_rate": 3.850544888427608e-05,
"loss": 4.1102,
"step": 7200
},
{
"epoch": 1.89,
"learning_rate": 3.8484691229891027e-05,
"loss": 4.0818,
"step": 7300
},
{
"epoch": 1.92,
"learning_rate": 3.846393357550597e-05,
"loss": 4.0663,
"step": 7400
},
{
"epoch": 1.95,
"learning_rate": 3.8443175921120917e-05,
"loss": 4.07,
"step": 7500
},
{
"epoch": 1.97,
"learning_rate": 3.842241826673586e-05,
"loss": 4.0689,
"step": 7600
},
{
"epoch": 2.0,
"learning_rate": 3.8401660612350806e-05,
"loss": 4.0442,
"step": 7700
},
{
"epoch": 2.02,
"learning_rate": 3.838090295796575e-05,
"loss": 4.0421,
"step": 7800
},
{
"epoch": 2.05,
"learning_rate": 3.8360145303580696e-05,
"loss": 4.0381,
"step": 7900
},
{
"epoch": 2.08,
"learning_rate": 3.833938764919564e-05,
"loss": 4.0242,
"step": 8000
},
{
"epoch": 2.1,
"learning_rate": 3.8318629994810586e-05,
"loss": 4.0184,
"step": 8100
},
{
"epoch": 2.13,
"learning_rate": 3.829787234042554e-05,
"loss": 4.0159,
"step": 8200
},
{
"epoch": 2.15,
"learning_rate": 3.827711468604048e-05,
"loss": 4.0,
"step": 8300
},
{
"epoch": 2.18,
"learning_rate": 3.825635703165543e-05,
"loss": 3.9939,
"step": 8400
},
{
"epoch": 2.21,
"learning_rate": 3.823559937727037e-05,
"loss": 3.9851,
"step": 8500
},
{
"epoch": 2.23,
"learning_rate": 3.821484172288532e-05,
"loss": 3.9711,
"step": 8600
},
{
"epoch": 2.26,
"learning_rate": 3.819408406850026e-05,
"loss": 3.9732,
"step": 8700
},
{
"epoch": 2.28,
"learning_rate": 3.817332641411521e-05,
"loss": 3.9666,
"step": 8800
},
{
"epoch": 2.31,
"learning_rate": 3.815256875973015e-05,
"loss": 3.9528,
"step": 8900
},
{
"epoch": 2.34,
"learning_rate": 3.81318111053451e-05,
"loss": 3.9496,
"step": 9000
},
{
"epoch": 2.36,
"learning_rate": 3.811105345096004e-05,
"loss": 3.9426,
"step": 9100
},
{
"epoch": 2.39,
"learning_rate": 3.809029579657499e-05,
"loss": 3.9496,
"step": 9200
},
{
"epoch": 2.41,
"learning_rate": 3.806953814218993e-05,
"loss": 3.9376,
"step": 9300
},
{
"epoch": 2.44,
"learning_rate": 3.804878048780488e-05,
"loss": 3.9404,
"step": 9400
},
{
"epoch": 2.46,
"learning_rate": 3.802802283341983e-05,
"loss": 3.9204,
"step": 9500
},
{
"epoch": 2.49,
"learning_rate": 3.8007265179034774e-05,
"loss": 3.8957,
"step": 9600
},
{
"epoch": 2.52,
"learning_rate": 3.798650752464972e-05,
"loss": 3.9224,
"step": 9700
},
{
"epoch": 2.54,
"learning_rate": 3.7965749870264664e-05,
"loss": 3.9076,
"step": 9800
},
{
"epoch": 2.57,
"learning_rate": 3.794499221587961e-05,
"loss": 3.897,
"step": 9900
},
{
"epoch": 2.59,
"learning_rate": 3.7924234561494554e-05,
"loss": 3.8995,
"step": 10000
},
{
"epoch": 2.59,
"eval_gen_len": 12.2688,
"eval_loss": 3.854327917098999,
"eval_rouge1": 24.0743,
"eval_rouge2": 1.9282,
"eval_rougeL": 20.7914,
"eval_rougeLsum": 20.7945,
"eval_runtime": 81.1697,
"eval_samples_per_second": 47.481,
"eval_steps_per_second": 5.938,
"step": 10000
},
{
"epoch": 2.62,
"learning_rate": 3.79034769071095e-05,
"loss": 3.895,
"step": 10100
},
{
"epoch": 2.65,
"learning_rate": 3.7882719252724444e-05,
"loss": 3.8924,
"step": 10200
},
{
"epoch": 2.67,
"learning_rate": 3.786196159833939e-05,
"loss": 3.894,
"step": 10300
},
{
"epoch": 2.7,
"learning_rate": 3.7841203943954334e-05,
"loss": 3.8961,
"step": 10400
},
{
"epoch": 2.72,
"learning_rate": 3.782044628956928e-05,
"loss": 3.8752,
"step": 10500
},
{
"epoch": 2.75,
"learning_rate": 3.7799688635184224e-05,
"loss": 3.8819,
"step": 10600
},
{
"epoch": 2.78,
"learning_rate": 3.777893098079917e-05,
"loss": 3.8756,
"step": 10700
},
{
"epoch": 2.8,
"learning_rate": 3.775817332641412e-05,
"loss": 3.8762,
"step": 10800
},
{
"epoch": 2.83,
"learning_rate": 3.7737415672029066e-05,
"loss": 3.849,
"step": 10900
},
{
"epoch": 2.85,
"learning_rate": 3.771665801764401e-05,
"loss": 3.8517,
"step": 11000
},
{
"epoch": 2.88,
"learning_rate": 3.7695900363258956e-05,
"loss": 3.8561,
"step": 11100
},
{
"epoch": 2.91,
"learning_rate": 3.76751427088739e-05,
"loss": 3.8511,
"step": 11200
},
{
"epoch": 2.93,
"learning_rate": 3.7654385054488846e-05,
"loss": 3.8213,
"step": 11300
},
{
"epoch": 2.96,
"learning_rate": 3.763362740010379e-05,
"loss": 3.8539,
"step": 11400
},
{
"epoch": 2.98,
"learning_rate": 3.7612869745718736e-05,
"loss": 3.844,
"step": 11500
},
{
"epoch": 3.01,
"learning_rate": 3.759211209133368e-05,
"loss": 3.8349,
"step": 11600
},
{
"epoch": 3.04,
"learning_rate": 3.7571354436948626e-05,
"loss": 3.8363,
"step": 11700
},
{
"epoch": 3.06,
"learning_rate": 3.755059678256357e-05,
"loss": 3.8444,
"step": 11800
},
{
"epoch": 3.09,
"learning_rate": 3.7529839128178515e-05,
"loss": 3.8336,
"step": 11900
},
{
"epoch": 3.11,
"learning_rate": 3.750908147379346e-05,
"loss": 3.8238,
"step": 12000
},
{
"epoch": 3.14,
"learning_rate": 3.748832381940841e-05,
"loss": 3.7921,
"step": 12100
},
{
"epoch": 3.17,
"learning_rate": 3.746756616502336e-05,
"loss": 3.8307,
"step": 12200
},
{
"epoch": 3.19,
"learning_rate": 3.74468085106383e-05,
"loss": 3.8104,
"step": 12300
},
{
"epoch": 3.22,
"learning_rate": 3.742605085625325e-05,
"loss": 3.8083,
"step": 12400
},
{
"epoch": 3.24,
"learning_rate": 3.740529320186819e-05,
"loss": 3.8045,
"step": 12500
},
{
"epoch": 3.27,
"learning_rate": 3.738453554748314e-05,
"loss": 3.814,
"step": 12600
},
{
"epoch": 3.3,
"learning_rate": 3.736377789309808e-05,
"loss": 3.816,
"step": 12700
},
{
"epoch": 3.32,
"learning_rate": 3.734302023871303e-05,
"loss": 3.8151,
"step": 12800
},
{
"epoch": 3.35,
"learning_rate": 3.732226258432797e-05,
"loss": 3.8171,
"step": 12900
},
{
"epoch": 3.37,
"learning_rate": 3.730150492994292e-05,
"loss": 3.7895,
"step": 13000
},
{
"epoch": 3.4,
"learning_rate": 3.728074727555786e-05,
"loss": 3.7782,
"step": 13100
},
{
"epoch": 3.43,
"learning_rate": 3.725998962117281e-05,
"loss": 3.7925,
"step": 13200
},
{
"epoch": 3.45,
"learning_rate": 3.723923196678775e-05,
"loss": 3.7955,
"step": 13300
},
{
"epoch": 3.48,
"learning_rate": 3.7218474312402704e-05,
"loss": 3.7964,
"step": 13400
},
{
"epoch": 3.5,
"learning_rate": 3.719771665801765e-05,
"loss": 3.7809,
"step": 13500
},
{
"epoch": 3.53,
"learning_rate": 3.7176959003632593e-05,
"loss": 3.816,
"step": 13600
},
{
"epoch": 3.55,
"learning_rate": 3.715620134924754e-05,
"loss": 3.795,
"step": 13700
},
{
"epoch": 3.58,
"learning_rate": 3.713544369486248e-05,
"loss": 3.7816,
"step": 13800
},
{
"epoch": 3.61,
"learning_rate": 3.711468604047743e-05,
"loss": 3.784,
"step": 13900
},
{
"epoch": 3.63,
"learning_rate": 3.709392838609237e-05,
"loss": 3.7755,
"step": 14000
},
{
"epoch": 3.66,
"learning_rate": 3.7073170731707325e-05,
"loss": 3.7809,
"step": 14100
},
{
"epoch": 3.68,
"learning_rate": 3.705241307732227e-05,
"loss": 3.7854,
"step": 14200
},
{
"epoch": 3.71,
"learning_rate": 3.703165542293721e-05,
"loss": 3.7811,
"step": 14300
},
{
"epoch": 3.74,
"learning_rate": 3.701089776855215e-05,
"loss": 3.7859,
"step": 14400
},
{
"epoch": 3.76,
"learning_rate": 3.69901401141671e-05,
"loss": 3.771,
"step": 14500
},
{
"epoch": 3.79,
"learning_rate": 3.696938245978204e-05,
"loss": 3.7613,
"step": 14600
},
{
"epoch": 3.81,
"learning_rate": 3.6948624805396995e-05,
"loss": 3.7879,
"step": 14700
},
{
"epoch": 3.84,
"learning_rate": 3.692786715101194e-05,
"loss": 3.775,
"step": 14800
},
{
"epoch": 3.87,
"learning_rate": 3.6907109496626885e-05,
"loss": 3.7578,
"step": 14900
},
{
"epoch": 3.89,
"learning_rate": 3.688635184224183e-05,
"loss": 3.7673,
"step": 15000
},
{
"epoch": 3.89,
"eval_gen_len": 12.2688,
"eval_loss": 3.7353405952453613,
"eval_rouge1": 25.2952,
"eval_rouge2": 2.0011,
"eval_rougeL": 21.2449,
"eval_rougeLsum": 21.2222,
"eval_runtime": 81.3221,
"eval_samples_per_second": 47.392,
"eval_steps_per_second": 5.927,
"step": 15000
},
{
"epoch": 3.92,
"learning_rate": 3.6865594187856775e-05,
"loss": 3.7752,
"step": 15100
},
{
"epoch": 3.94,
"learning_rate": 3.684483653347172e-05,
"loss": 3.7749,
"step": 15200
},
{
"epoch": 3.97,
"learning_rate": 3.6824078879086665e-05,
"loss": 3.7379,
"step": 15300
},
{
"epoch": 4.0,
"learning_rate": 3.6803321224701616e-05,
"loss": 3.756,
"step": 15400
},
{
"epoch": 4.02,
"learning_rate": 3.678256357031656e-05,
"loss": 3.7516,
"step": 15500
},
{
"epoch": 4.05,
"learning_rate": 3.6761805915931506e-05,
"loss": 3.7543,
"step": 15600
},
{
"epoch": 4.07,
"learning_rate": 3.674104826154645e-05,
"loss": 3.7594,
"step": 15700
},
{
"epoch": 4.1,
"learning_rate": 3.6720290607161396e-05,
"loss": 3.7529,
"step": 15800
},
{
"epoch": 4.13,
"learning_rate": 3.6699532952776334e-05,
"loss": 3.7571,
"step": 15900
},
{
"epoch": 4.15,
"learning_rate": 3.667877529839128e-05,
"loss": 3.7617,
"step": 16000
},
{
"epoch": 4.18,
"learning_rate": 3.665801764400623e-05,
"loss": 3.7688,
"step": 16100
},
{
"epoch": 4.2,
"learning_rate": 3.6637259989621176e-05,
"loss": 3.7514,
"step": 16200
},
{
"epoch": 4.23,
"learning_rate": 3.661650233523612e-05,
"loss": 3.7574,
"step": 16300
},
{
"epoch": 4.26,
"learning_rate": 3.6595744680851066e-05,
"loss": 3.766,
"step": 16400
},
{
"epoch": 4.28,
"learning_rate": 3.657498702646601e-05,
"loss": 3.7406,
"step": 16500
},
{
"epoch": 4.31,
"learning_rate": 3.6554229372080956e-05,
"loss": 3.7226,
"step": 16600
},
{
"epoch": 4.33,
"learning_rate": 3.653347171769591e-05,
"loss": 3.7476,
"step": 16700
},
{
"epoch": 4.36,
"learning_rate": 3.651271406331085e-05,
"loss": 3.7391,
"step": 16800
},
{
"epoch": 4.39,
"learning_rate": 3.64919564089258e-05,
"loss": 3.7396,
"step": 16900
},
{
"epoch": 4.41,
"learning_rate": 3.647119875454074e-05,
"loss": 3.7518,
"step": 17000
},
{
"epoch": 4.44,
"learning_rate": 3.645044110015569e-05,
"loss": 3.7414,
"step": 17100
},
{
"epoch": 4.46,
"learning_rate": 3.642968344577063e-05,
"loss": 3.747,
"step": 17200
},
{
"epoch": 4.49,
"learning_rate": 3.640892579138558e-05,
"loss": 3.7615,
"step": 17300
},
{
"epoch": 4.51,
"learning_rate": 3.638816813700052e-05,
"loss": 3.7379,
"step": 17400
},
{
"epoch": 4.54,
"learning_rate": 3.636741048261547e-05,
"loss": 3.7485,
"step": 17500
},
{
"epoch": 4.57,
"learning_rate": 3.634665282823041e-05,
"loss": 3.7474,
"step": 17600
},
{
"epoch": 4.59,
"learning_rate": 3.632589517384536e-05,
"loss": 3.7209,
"step": 17700
},
{
"epoch": 4.62,
"learning_rate": 3.63051375194603e-05,
"loss": 3.7424,
"step": 17800
},
{
"epoch": 4.64,
"learning_rate": 3.628437986507525e-05,
"loss": 3.7427,
"step": 17900
},
{
"epoch": 4.67,
"learning_rate": 3.62636222106902e-05,
"loss": 3.7466,
"step": 18000
},
{
"epoch": 4.7,
"learning_rate": 3.6242864556305144e-05,
"loss": 3.7372,
"step": 18100
},
{
"epoch": 4.72,
"learning_rate": 3.622210690192009e-05,
"loss": 3.7325,
"step": 18200
},
{
"epoch": 4.75,
"learning_rate": 3.6201349247535034e-05,
"loss": 3.7315,
"step": 18300
},
{
"epoch": 4.77,
"learning_rate": 3.618059159314998e-05,
"loss": 3.7248,
"step": 18400
},
{
"epoch": 4.8,
"learning_rate": 3.6159833938764924e-05,
"loss": 3.7315,
"step": 18500
},
{
"epoch": 4.83,
"learning_rate": 3.613907628437987e-05,
"loss": 3.7164,
"step": 18600
},
{
"epoch": 4.85,
"learning_rate": 3.6118318629994814e-05,
"loss": 3.7248,
"step": 18700
},
{
"epoch": 4.88,
"learning_rate": 3.609756097560976e-05,
"loss": 3.7286,
"step": 18800
},
{
"epoch": 4.9,
"learning_rate": 3.6076803321224704e-05,
"loss": 3.7321,
"step": 18900
},
{
"epoch": 4.93,
"learning_rate": 3.605604566683965e-05,
"loss": 3.7238,
"step": 19000
},
{
"epoch": 4.96,
"learning_rate": 3.6035288012454594e-05,
"loss": 3.7334,
"step": 19100
},
{
"epoch": 4.98,
"learning_rate": 3.601453035806954e-05,
"loss": 3.7116,
"step": 19200
},
{
"epoch": 5.01,
"learning_rate": 3.599377270368449e-05,
"loss": 3.7395,
"step": 19300
},
{
"epoch": 5.03,
"learning_rate": 3.5973015049299435e-05,
"loss": 3.7334,
"step": 19400
},
{
"epoch": 5.06,
"learning_rate": 3.595225739491438e-05,
"loss": 3.7274,
"step": 19500
},
{
"epoch": 5.09,
"learning_rate": 3.5931499740529325e-05,
"loss": 3.7216,
"step": 19600
},
{
"epoch": 5.11,
"learning_rate": 3.591074208614427e-05,
"loss": 3.7238,
"step": 19700
},
{
"epoch": 5.14,
"learning_rate": 3.5889984431759215e-05,
"loss": 3.7158,
"step": 19800
},
{
"epoch": 5.16,
"learning_rate": 3.586922677737416e-05,
"loss": 3.7302,
"step": 19900
},
{
"epoch": 5.19,
"learning_rate": 3.5848469122989105e-05,
"loss": 3.7137,
"step": 20000
},
{
"epoch": 5.19,
"eval_gen_len": 12.2688,
"eval_loss": 3.690828561782837,
"eval_rouge1": 26.1164,
"eval_rouge2": 2.0984,
"eval_rougeL": 22.1244,
"eval_rougeLsum": 22.1089,
"eval_runtime": 81.0762,
"eval_samples_per_second": 47.536,
"eval_steps_per_second": 5.945,
"step": 20000
},
{
"epoch": 5.22,
"learning_rate": 3.582771146860405e-05,
"loss": 3.7286,
"step": 20100
},
{
"epoch": 5.24,
"learning_rate": 3.5806953814218995e-05,
"loss": 3.7333,
"step": 20200
},
{
"epoch": 5.27,
"learning_rate": 3.578619615983394e-05,
"loss": 3.7041,
"step": 20300
},
{
"epoch": 5.29,
"learning_rate": 3.5765438505448885e-05,
"loss": 3.6972,
"step": 20400
},
{
"epoch": 5.32,
"learning_rate": 3.574468085106383e-05,
"loss": 3.7198,
"step": 20500
},
{
"epoch": 5.35,
"learning_rate": 3.572392319667878e-05,
"loss": 3.7212,
"step": 20600
},
{
"epoch": 5.37,
"learning_rate": 3.570316554229373e-05,
"loss": 3.7162,
"step": 20700
},
{
"epoch": 5.4,
"learning_rate": 3.568240788790867e-05,
"loss": 3.7239,
"step": 20800
},
{
"epoch": 5.42,
"learning_rate": 3.566165023352362e-05,
"loss": 3.7266,
"step": 20900
},
{
"epoch": 5.45,
"learning_rate": 3.564089257913856e-05,
"loss": 3.7136,
"step": 21000
},
{
"epoch": 5.47,
"learning_rate": 3.562013492475351e-05,
"loss": 3.7118,
"step": 21100
},
{
"epoch": 5.5,
"learning_rate": 3.559937727036845e-05,
"loss": 3.7149,
"step": 21200
},
{
"epoch": 5.53,
"learning_rate": 3.55786196159834e-05,
"loss": 3.7064,
"step": 21300
},
{
"epoch": 5.55,
"learning_rate": 3.555786196159834e-05,
"loss": 3.6967,
"step": 21400
},
{
"epoch": 5.58,
"learning_rate": 3.5537104307213287e-05,
"loss": 3.7106,
"step": 21500
},
{
"epoch": 5.6,
"learning_rate": 3.551634665282823e-05,
"loss": 3.6849,
"step": 21600
},
{
"epoch": 5.63,
"learning_rate": 3.5495588998443177e-05,
"loss": 3.6902,
"step": 21700
},
{
"epoch": 5.66,
"learning_rate": 3.547483134405812e-05,
"loss": 3.6975,
"step": 21800
},
{
"epoch": 5.68,
"learning_rate": 3.545407368967307e-05,
"loss": 3.7142,
"step": 21900
},
{
"epoch": 5.71,
"learning_rate": 3.543331603528802e-05,
"loss": 3.6887,
"step": 22000
},
{
"epoch": 5.73,
"learning_rate": 3.541255838090296e-05,
"loss": 3.6966,
"step": 22100
},
{
"epoch": 5.76,
"learning_rate": 3.539180072651791e-05,
"loss": 3.7046,
"step": 22200
},
{
"epoch": 5.79,
"learning_rate": 3.537104307213285e-05,
"loss": 3.7046,
"step": 22300
},
{
"epoch": 5.81,
"learning_rate": 3.53502854177478e-05,
"loss": 3.7108,
"step": 22400
},
{
"epoch": 5.84,
"learning_rate": 3.532952776336274e-05,
"loss": 3.6994,
"step": 22500
},
{
"epoch": 5.86,
"learning_rate": 3.530877010897769e-05,
"loss": 3.6976,
"step": 22600
},
{
"epoch": 5.89,
"learning_rate": 3.528801245459263e-05,
"loss": 3.7163,
"step": 22700
},
{
"epoch": 5.92,
"learning_rate": 3.526725480020758e-05,
"loss": 3.7022,
"step": 22800
},
{
"epoch": 5.94,
"learning_rate": 3.524649714582252e-05,
"loss": 3.6893,
"step": 22900
},
{
"epoch": 5.97,
"learning_rate": 3.522573949143747e-05,
"loss": 3.6954,
"step": 23000
},
{
"epoch": 5.99,
"learning_rate": 3.520498183705241e-05,
"loss": 3.7143,
"step": 23100
},
{
"epoch": 6.02,
"learning_rate": 3.5184224182667365e-05,
"loss": 3.6649,
"step": 23200
},
{
"epoch": 6.05,
"learning_rate": 3.516346652828231e-05,
"loss": 3.6953,
"step": 23300
},
{
"epoch": 6.07,
"learning_rate": 3.5142708873897255e-05,
"loss": 3.7025,
"step": 23400
},
{
"epoch": 6.1,
"learning_rate": 3.51219512195122e-05,
"loss": 3.7151,
"step": 23500
},
{
"epoch": 6.12,
"learning_rate": 3.5101193565127144e-05,
"loss": 3.6985,
"step": 23600
},
{
"epoch": 6.15,
"learning_rate": 3.508043591074209e-05,
"loss": 3.7094,
"step": 23700
},
{
"epoch": 6.18,
"learning_rate": 3.5059678256357034e-05,
"loss": 3.6963,
"step": 23800
},
{
"epoch": 6.2,
"learning_rate": 3.503892060197198e-05,
"loss": 3.7083,
"step": 23900
},
{
"epoch": 6.23,
"learning_rate": 3.5018162947586924e-05,
"loss": 3.6809,
"step": 24000
},
{
"epoch": 6.25,
"learning_rate": 3.499740529320187e-05,
"loss": 3.6915,
"step": 24100
},
{
"epoch": 6.28,
"learning_rate": 3.4976647638816814e-05,
"loss": 3.6886,
"step": 24200
},
{
"epoch": 6.31,
"learning_rate": 3.495588998443176e-05,
"loss": 3.7067,
"step": 24300
},
{
"epoch": 6.33,
"learning_rate": 3.4935132330046704e-05,
"loss": 3.6912,
"step": 24400
},
{
"epoch": 6.36,
"learning_rate": 3.4914374675661656e-05,
"loss": 3.7084,
"step": 24500
},
{
"epoch": 6.38,
"learning_rate": 3.48936170212766e-05,
"loss": 3.6849,
"step": 24600
},
{
"epoch": 6.41,
"learning_rate": 3.4872859366891546e-05,
"loss": 3.7204,
"step": 24700
},
{
"epoch": 6.43,
"learning_rate": 3.485210171250649e-05,
"loss": 3.7057,
"step": 24800
},
{
"epoch": 6.46,
"learning_rate": 3.4831344058121436e-05,
"loss": 3.7103,
"step": 24900
},
{
"epoch": 6.49,
"learning_rate": 3.481058640373638e-05,
"loss": 3.6905,
"step": 25000
},
{
"epoch": 6.49,
"eval_gen_len": 12.2701,
"eval_loss": 3.667330503463745,
"eval_rouge1": 26.7248,
"eval_rouge2": 2.0984,
"eval_rougeL": 22.7285,
"eval_rougeLsum": 22.7508,
"eval_runtime": 81.3332,
"eval_samples_per_second": 47.385,
"eval_steps_per_second": 5.926,
"step": 25000
},
{
"epoch": 6.51,
"learning_rate": 3.4789828749351326e-05,
"loss": 3.6919,
"step": 25100
},
{
"epoch": 6.54,
"learning_rate": 3.476907109496627e-05,
"loss": 3.7111,
"step": 25200
},
{
"epoch": 6.56,
"learning_rate": 3.4748313440581216e-05,
"loss": 3.6901,
"step": 25300
},
{
"epoch": 6.59,
"learning_rate": 3.472755578619616e-05,
"loss": 3.6921,
"step": 25400
},
{
"epoch": 6.62,
"learning_rate": 3.4706798131811106e-05,
"loss": 3.6951,
"step": 25500
},
{
"epoch": 6.64,
"learning_rate": 3.468604047742605e-05,
"loss": 3.7004,
"step": 25600
},
{
"epoch": 6.67,
"learning_rate": 3.4665282823040996e-05,
"loss": 3.6868,
"step": 25700
},
{
"epoch": 6.69,
"learning_rate": 3.464452516865594e-05,
"loss": 3.6954,
"step": 25800
},
{
"epoch": 6.72,
"learning_rate": 3.462376751427089e-05,
"loss": 3.7049,
"step": 25900
},
{
"epoch": 6.75,
"learning_rate": 3.460300985988584e-05,
"loss": 3.6947,
"step": 26000
},
{
"epoch": 6.77,
"learning_rate": 3.458225220550078e-05,
"loss": 3.6963,
"step": 26100
},
{
"epoch": 6.8,
"learning_rate": 3.456149455111573e-05,
"loss": 3.6925,
"step": 26200
},
{
"epoch": 6.82,
"learning_rate": 3.454073689673067e-05,
"loss": 3.6845,
"step": 26300
},
{
"epoch": 6.85,
"learning_rate": 3.451997924234562e-05,
"loss": 3.6786,
"step": 26400
},
{
"epoch": 6.88,
"learning_rate": 3.449922158796056e-05,
"loss": 3.6842,
"step": 26500
},
{
"epoch": 6.9,
"learning_rate": 3.447846393357551e-05,
"loss": 3.6805,
"step": 26600
},
{
"epoch": 6.93,
"learning_rate": 3.445770627919045e-05,
"loss": 3.7005,
"step": 26700
},
{
"epoch": 6.95,
"learning_rate": 3.44369486248054e-05,
"loss": 3.683,
"step": 26800
},
{
"epoch": 6.98,
"learning_rate": 3.441619097042034e-05,
"loss": 3.6775,
"step": 26900
},
{
"epoch": 7.01,
"learning_rate": 3.439543331603529e-05,
"loss": 3.6823,
"step": 27000
},
{
"epoch": 7.03,
"learning_rate": 3.437467566165023e-05,
"loss": 3.6702,
"step": 27100
},
{
"epoch": 7.06,
"learning_rate": 3.4353918007265184e-05,
"loss": 3.6796,
"step": 27200
},
{
"epoch": 7.08,
"learning_rate": 3.433316035288013e-05,
"loss": 3.6707,
"step": 27300
},
{
"epoch": 7.11,
"learning_rate": 3.4312402698495074e-05,
"loss": 3.6907,
"step": 27400
},
{
"epoch": 7.14,
"learning_rate": 3.429164504411002e-05,
"loss": 3.6784,
"step": 27500
},
{
"epoch": 7.16,
"learning_rate": 3.4270887389724963e-05,
"loss": 3.6915,
"step": 27600
},
{
"epoch": 7.19,
"learning_rate": 3.425012973533991e-05,
"loss": 3.6872,
"step": 27700
},
{
"epoch": 7.21,
"learning_rate": 3.422937208095486e-05,
"loss": 3.694,
"step": 27800
},
{
"epoch": 7.24,
"learning_rate": 3.42086144265698e-05,
"loss": 3.6703,
"step": 27900
},
{
"epoch": 7.27,
"learning_rate": 3.418785677218474e-05,
"loss": 3.6963,
"step": 28000
},
{
"epoch": 7.29,
"learning_rate": 3.416709911779969e-05,
"loss": 3.6891,
"step": 28100
},
{
"epoch": 7.32,
"learning_rate": 3.414634146341463e-05,
"loss": 3.7015,
"step": 28200
},
{
"epoch": 7.34,
"learning_rate": 3.412558380902958e-05,
"loss": 3.689,
"step": 28300
},
{
"epoch": 7.37,
"learning_rate": 3.410482615464452e-05,
"loss": 3.6655,
"step": 28400
},
{
"epoch": 7.39,
"learning_rate": 3.4084068500259475e-05,
"loss": 3.6964,
"step": 28500
},
{
"epoch": 7.42,
"learning_rate": 3.406331084587442e-05,
"loss": 3.6859,
"step": 28600
},
{
"epoch": 7.45,
"learning_rate": 3.4042553191489365e-05,
"loss": 3.6751,
"step": 28700
},
{
"epoch": 7.47,
"learning_rate": 3.402179553710431e-05,
"loss": 3.6822,
"step": 28800
},
{
"epoch": 7.5,
"learning_rate": 3.4001037882719255e-05,
"loss": 3.6945,
"step": 28900
},
{
"epoch": 7.52,
"learning_rate": 3.39802802283342e-05,
"loss": 3.6647,
"step": 29000
},
{
"epoch": 7.55,
"learning_rate": 3.395952257394915e-05,
"loss": 3.6802,
"step": 29100
},
{
"epoch": 7.58,
"learning_rate": 3.3938764919564097e-05,
"loss": 3.6815,
"step": 29200
},
{
"epoch": 7.6,
"learning_rate": 3.391800726517904e-05,
"loss": 3.6923,
"step": 29300
},
{
"epoch": 7.63,
"learning_rate": 3.389724961079398e-05,
"loss": 3.6765,
"step": 29400
},
{
"epoch": 7.65,
"learning_rate": 3.3876491956408925e-05,
"loss": 3.6841,
"step": 29500
},
{
"epoch": 7.68,
"learning_rate": 3.385573430202387e-05,
"loss": 3.669,
"step": 29600
},
{
"epoch": 7.71,
"learning_rate": 3.3834976647638815e-05,
"loss": 3.6691,
"step": 29700
},
{
"epoch": 7.73,
"learning_rate": 3.3814218993253766e-05,
"loss": 3.6699,
"step": 29800
},
{
"epoch": 7.76,
"learning_rate": 3.379346133886871e-05,
"loss": 3.6903,
"step": 29900
},
{
"epoch": 7.78,
"learning_rate": 3.3772703684483656e-05,
"loss": 3.6904,
"step": 30000
},
{
"epoch": 7.78,
"eval_gen_len": 12.2865,
"eval_loss": 3.6548309326171875,
"eval_rouge1": 27.2987,
"eval_rouge2": 2.0984,
"eval_rougeL": 23.3145,
"eval_rougeLsum": 23.3265,
"eval_runtime": 80.6803,
"eval_samples_per_second": 47.769,
"eval_steps_per_second": 5.974,
"step": 30000
},
{
"epoch": 7.81,
"learning_rate": 3.37519460300986e-05,
"loss": 3.69,
"step": 30100
},
{
"epoch": 7.84,
"learning_rate": 3.3731188375713546e-05,
"loss": 3.6648,
"step": 30200
},
{
"epoch": 7.86,
"learning_rate": 3.371043072132849e-05,
"loss": 3.6907,
"step": 30300
},
{
"epoch": 7.89,
"learning_rate": 3.368967306694344e-05,
"loss": 3.6636,
"step": 30400
},
{
"epoch": 7.91,
"learning_rate": 3.366891541255839e-05,
"loss": 3.6756,
"step": 30500
},
{
"epoch": 7.94,
"learning_rate": 3.364815775817333e-05,
"loss": 3.6653,
"step": 30600
},
{
"epoch": 7.97,
"learning_rate": 3.362740010378828e-05,
"loss": 3.6831,
"step": 30700
},
{
"epoch": 7.99,
"learning_rate": 3.360664244940322e-05,
"loss": 3.6724,
"step": 30800
},
{
"epoch": 8.02,
"learning_rate": 3.358588479501816e-05,
"loss": 3.6916,
"step": 30900
},
{
"epoch": 8.04,
"learning_rate": 3.3565127140633106e-05,
"loss": 3.6814,
"step": 31000
},
{
"epoch": 8.07,
"learning_rate": 3.354436948624806e-05,
"loss": 3.6791,
"step": 31100
},
{
"epoch": 8.1,
"learning_rate": 3.3523611831863e-05,
"loss": 3.6805,
"step": 31200
},
{
"epoch": 8.12,
"learning_rate": 3.350285417747795e-05,
"loss": 3.6936,
"step": 31300
},
{
"epoch": 8.15,
"learning_rate": 3.348209652309289e-05,
"loss": 3.6695,
"step": 31400
},
{
"epoch": 8.17,
"learning_rate": 3.346133886870784e-05,
"loss": 3.66,
"step": 31500
},
{
"epoch": 8.2,
"learning_rate": 3.344058121432278e-05,
"loss": 3.6621,
"step": 31600
},
{
"epoch": 8.23,
"learning_rate": 3.3419823559937734e-05,
"loss": 3.6892,
"step": 31700
},
{
"epoch": 8.25,
"learning_rate": 3.339906590555268e-05,
"loss": 3.6252,
"step": 31800
},
{
"epoch": 8.28,
"learning_rate": 3.3378308251167624e-05,
"loss": 3.6804,
"step": 31900
},
{
"epoch": 8.3,
"learning_rate": 3.335755059678257e-05,
"loss": 3.6879,
"step": 32000
},
{
"epoch": 8.33,
"learning_rate": 3.3336792942397514e-05,
"loss": 3.6683,
"step": 32100
},
{
"epoch": 8.35,
"learning_rate": 3.331603528801246e-05,
"loss": 3.6801,
"step": 32200
},
{
"epoch": 8.38,
"learning_rate": 3.3295277633627404e-05,
"loss": 3.6568,
"step": 32300
},
{
"epoch": 8.41,
"learning_rate": 3.327451997924235e-05,
"loss": 3.6621,
"step": 32400
},
{
"epoch": 8.43,
"learning_rate": 3.3253762324857294e-05,
"loss": 3.6686,
"step": 32500
},
{
"epoch": 8.46,
"learning_rate": 3.323300467047224e-05,
"loss": 3.6942,
"step": 32600
},
{
"epoch": 8.48,
"learning_rate": 3.3212247016087184e-05,
"loss": 3.6566,
"step": 32700
},
{
"epoch": 8.51,
"learning_rate": 3.319148936170213e-05,
"loss": 3.678,
"step": 32800
},
{
"epoch": 8.54,
"learning_rate": 3.3170731707317074e-05,
"loss": 3.6526,
"step": 32900
},
{
"epoch": 8.56,
"learning_rate": 3.3149974052932026e-05,
"loss": 3.6796,
"step": 33000
},
{
"epoch": 8.59,
"learning_rate": 3.312921639854697e-05,
"loss": 3.671,
"step": 33100
},
{
"epoch": 8.61,
"learning_rate": 3.3108458744161916e-05,
"loss": 3.6556,
"step": 33200
},
{
"epoch": 8.64,
"learning_rate": 3.308770108977686e-05,
"loss": 3.6819,
"step": 33300
},
{
"epoch": 8.67,
"learning_rate": 3.3066943435391806e-05,
"loss": 3.6574,
"step": 33400
},
{
"epoch": 8.69,
"learning_rate": 3.304618578100675e-05,
"loss": 3.6426,
"step": 33500
},
{
"epoch": 8.72,
"learning_rate": 3.3025428126621695e-05,
"loss": 3.6796,
"step": 33600
},
{
"epoch": 8.74,
"learning_rate": 3.300467047223664e-05,
"loss": 3.6892,
"step": 33700
},
{
"epoch": 8.77,
"learning_rate": 3.2983912817851585e-05,
"loss": 3.6702,
"step": 33800
},
{
"epoch": 8.8,
"learning_rate": 3.296315516346653e-05,
"loss": 3.6809,
"step": 33900
},
{
"epoch": 8.82,
"learning_rate": 3.2942397509081475e-05,
"loss": 3.6658,
"step": 34000
},
{
"epoch": 8.85,
"learning_rate": 3.292163985469642e-05,
"loss": 3.6747,
"step": 34100
},
{
"epoch": 8.87,
"learning_rate": 3.2900882200311365e-05,
"loss": 3.6703,
"step": 34200
},
{
"epoch": 8.9,
"learning_rate": 3.288012454592632e-05,
"loss": 3.6684,
"step": 34300
},
{
"epoch": 8.93,
"learning_rate": 3.285936689154126e-05,
"loss": 3.6565,
"step": 34400
},
{
"epoch": 8.95,
"learning_rate": 3.283860923715621e-05,
"loss": 3.6783,
"step": 34500
},
{
"epoch": 8.98,
"learning_rate": 3.281785158277115e-05,
"loss": 3.6727,
"step": 34600
},
{
"epoch": 9.0,
"learning_rate": 3.27970939283861e-05,
"loss": 3.6891,
"step": 34700
},
{
"epoch": 9.03,
"learning_rate": 3.277633627400104e-05,
"loss": 3.6667,
"step": 34800
},
{
"epoch": 9.06,
"learning_rate": 3.275557861961599e-05,
"loss": 3.6713,
"step": 34900
},
{
"epoch": 9.08,
"learning_rate": 3.273482096523093e-05,
"loss": 3.6643,
"step": 35000
},
{
"epoch": 9.08,
"eval_gen_len": 12.2763,
"eval_loss": 3.646332263946533,
"eval_rouge1": 27.3301,
"eval_rouge2": 2.0984,
"eval_rougeL": 23.3261,
"eval_rougeLsum": 23.3581,
"eval_runtime": 76.595,
"eval_samples_per_second": 50.317,
"eval_steps_per_second": 6.293,
"step": 35000
},
{
"epoch": 9.11,
"learning_rate": 3.271406331084588e-05,
"loss": 3.6574,
"step": 35100
},
{
"epoch": 9.13,
"learning_rate": 3.269330565646082e-05,
"loss": 3.6598,
"step": 35200
},
{
"epoch": 9.16,
"learning_rate": 3.267254800207577e-05,
"loss": 3.6643,
"step": 35300
},
{
"epoch": 9.19,
"learning_rate": 3.265179034769071e-05,
"loss": 3.6675,
"step": 35400
},
{
"epoch": 9.21,
"learning_rate": 3.263103269330566e-05,
"loss": 3.6809,
"step": 35500
},
{
"epoch": 9.24,
"learning_rate": 3.26102750389206e-05,
"loss": 3.6862,
"step": 35600
},
{
"epoch": 9.26,
"learning_rate": 3.258951738453555e-05,
"loss": 3.662,
"step": 35700
},
{
"epoch": 9.29,
"learning_rate": 3.25687597301505e-05,
"loss": 3.671,
"step": 35800
},
{
"epoch": 9.31,
"learning_rate": 3.254800207576544e-05,
"loss": 3.6771,
"step": 35900
},
{
"epoch": 9.34,
"learning_rate": 3.252724442138039e-05,
"loss": 3.6686,
"step": 36000
},
{
"epoch": 9.37,
"learning_rate": 3.250648676699533e-05,
"loss": 3.6508,
"step": 36100
},
{
"epoch": 9.39,
"learning_rate": 3.248572911261028e-05,
"loss": 3.6553,
"step": 36200
},
{
"epoch": 9.42,
"learning_rate": 3.246497145822522e-05,
"loss": 3.665,
"step": 36300
},
{
"epoch": 9.44,
"learning_rate": 3.244421380384017e-05,
"loss": 3.6832,
"step": 36400
},
{
"epoch": 9.47,
"learning_rate": 3.242345614945511e-05,
"loss": 3.6666,
"step": 36500
},
{
"epoch": 9.5,
"learning_rate": 3.240269849507006e-05,
"loss": 3.6608,
"step": 36600
},
{
"epoch": 9.52,
"learning_rate": 3.2381940840685e-05,
"loss": 3.6624,
"step": 36700
},
{
"epoch": 9.55,
"learning_rate": 3.236118318629995e-05,
"loss": 3.6643,
"step": 36800
},
{
"epoch": 9.57,
"learning_rate": 3.234042553191489e-05,
"loss": 3.6563,
"step": 36900
},
{
"epoch": 9.6,
"learning_rate": 3.2319667877529845e-05,
"loss": 3.6471,
"step": 37000
},
{
"epoch": 9.63,
"learning_rate": 3.229891022314479e-05,
"loss": 3.659,
"step": 37100
},
{
"epoch": 9.65,
"learning_rate": 3.2278152568759735e-05,
"loss": 3.6758,
"step": 37200
},
{
"epoch": 9.68,
"learning_rate": 3.225739491437468e-05,
"loss": 3.6391,
"step": 37300
},
{
"epoch": 9.7,
"learning_rate": 3.2236637259989625e-05,
"loss": 3.6669,
"step": 37400
},
{
"epoch": 9.73,
"learning_rate": 3.221587960560457e-05,
"loss": 3.6713,
"step": 37500
},
{
"epoch": 9.76,
"learning_rate": 3.2195121951219514e-05,
"loss": 3.6773,
"step": 37600
},
{
"epoch": 9.78,
"learning_rate": 3.217436429683446e-05,
"loss": 3.643,
"step": 37700
},
{
"epoch": 9.81,
"learning_rate": 3.2153606642449404e-05,
"loss": 3.6637,
"step": 37800
},
{
"epoch": 9.83,
"learning_rate": 3.213284898806435e-05,
"loss": 3.6277,
"step": 37900
},
{
"epoch": 9.86,
"learning_rate": 3.2112091333679294e-05,
"loss": 3.676,
"step": 38000
},
{
"epoch": 9.89,
"learning_rate": 3.209133367929424e-05,
"loss": 3.6501,
"step": 38100
},
{
"epoch": 9.91,
"learning_rate": 3.2070576024909184e-05,
"loss": 3.6605,
"step": 38200
},
{
"epoch": 9.94,
"learning_rate": 3.2049818370524136e-05,
"loss": 3.6792,
"step": 38300
},
{
"epoch": 9.96,
"learning_rate": 3.202906071613908e-05,
"loss": 3.6747,
"step": 38400
},
{
"epoch": 9.99,
"learning_rate": 3.2008303061754026e-05,
"loss": 3.6834,
"step": 38500
},
{
"epoch": 10.02,
"learning_rate": 3.198754540736897e-05,
"loss": 3.6586,
"step": 38600
},
{
"epoch": 10.04,
"learning_rate": 3.1966787752983916e-05,
"loss": 3.6534,
"step": 38700
},
{
"epoch": 10.07,
"learning_rate": 3.194603009859886e-05,
"loss": 3.6691,
"step": 38800
},
{
"epoch": 10.09,
"learning_rate": 3.1925272444213806e-05,
"loss": 3.655,
"step": 38900
},
{
"epoch": 10.12,
"learning_rate": 3.190451478982875e-05,
"loss": 3.6692,
"step": 39000
},
{
"epoch": 10.15,
"learning_rate": 3.1883757135443696e-05,
"loss": 3.6568,
"step": 39100
},
{
"epoch": 10.17,
"learning_rate": 3.186299948105864e-05,
"loss": 3.6839,
"step": 39200
},
{
"epoch": 10.2,
"learning_rate": 3.1842241826673586e-05,
"loss": 3.6715,
"step": 39300
},
{
"epoch": 10.22,
"learning_rate": 3.182148417228853e-05,
"loss": 3.6353,
"step": 39400
},
{
"epoch": 10.25,
"learning_rate": 3.1800726517903476e-05,
"loss": 3.6357,
"step": 39500
},
{
"epoch": 10.28,
"learning_rate": 3.177996886351843e-05,
"loss": 3.6646,
"step": 39600
},
{
"epoch": 10.3,
"learning_rate": 3.175921120913337e-05,
"loss": 3.6664,
"step": 39700
},
{
"epoch": 10.33,
"learning_rate": 3.173845355474832e-05,
"loss": 3.6654,
"step": 39800
},
{
"epoch": 10.35,
"learning_rate": 3.171769590036326e-05,
"loss": 3.6732,
"step": 39900
},
{
"epoch": 10.38,
"learning_rate": 3.169693824597821e-05,
"loss": 3.6677,
"step": 40000
},
{
"epoch": 10.38,
"eval_gen_len": 12.2836,
"eval_loss": 3.640160322189331,
"eval_rouge1": 27.5354,
"eval_rouge2": 2.095,
"eval_rougeL": 23.5496,
"eval_rougeLsum": 23.563,
"eval_runtime": 75.9134,
"eval_samples_per_second": 50.768,
"eval_steps_per_second": 6.349,
"step": 40000
},
{
"epoch": 10.4,
"learning_rate": 3.167618059159315e-05,
"loss": 3.6737,
"step": 40100
},
{
"epoch": 10.43,
"learning_rate": 3.16554229372081e-05,
"loss": 3.6693,
"step": 40200
},
{
"epoch": 10.46,
"learning_rate": 3.163466528282304e-05,
"loss": 3.6675,
"step": 40300
},
{
"epoch": 10.48,
"learning_rate": 3.161390762843799e-05,
"loss": 3.6627,
"step": 40400
},
{
"epoch": 10.51,
"learning_rate": 3.159314997405293e-05,
"loss": 3.6404,
"step": 40500
},
{
"epoch": 10.53,
"learning_rate": 3.157239231966788e-05,
"loss": 3.6688,
"step": 40600
},
{
"epoch": 10.56,
"learning_rate": 3.155163466528282e-05,
"loss": 3.6564,
"step": 40700
},
{
"epoch": 10.59,
"learning_rate": 3.153087701089777e-05,
"loss": 3.6103,
"step": 40800
},
{
"epoch": 10.61,
"learning_rate": 3.151011935651272e-05,
"loss": 3.6527,
"step": 40900
},
{
"epoch": 10.64,
"learning_rate": 3.1489361702127664e-05,
"loss": 3.6814,
"step": 41000
},
{
"epoch": 10.66,
"learning_rate": 3.146860404774261e-05,
"loss": 3.6341,
"step": 41100
},
{
"epoch": 10.69,
"learning_rate": 3.1447846393357554e-05,
"loss": 3.6594,
"step": 41200
},
{
"epoch": 10.72,
"learning_rate": 3.14270887389725e-05,
"loss": 3.6601,
"step": 41300
},
{
"epoch": 10.74,
"learning_rate": 3.1406331084587444e-05,
"loss": 3.6622,
"step": 41400
},
{
"epoch": 10.77,
"learning_rate": 3.138557343020239e-05,
"loss": 3.627,
"step": 41500
},
{
"epoch": 10.79,
"learning_rate": 3.1364815775817334e-05,
"loss": 3.6415,
"step": 41600
},
{
"epoch": 10.82,
"learning_rate": 3.134405812143228e-05,
"loss": 3.6322,
"step": 41700
},
{
"epoch": 10.85,
"learning_rate": 3.1323300467047223e-05,
"loss": 3.6771,
"step": 41800
},
{
"epoch": 10.87,
"learning_rate": 3.130254281266217e-05,
"loss": 3.6656,
"step": 41900
},
{
"epoch": 10.9,
"learning_rate": 3.1281785158277113e-05,
"loss": 3.6416,
"step": 42000
},
{
"epoch": 10.92,
"learning_rate": 3.126102750389206e-05,
"loss": 3.6719,
"step": 42100
},
{
"epoch": 10.95,
"learning_rate": 3.124026984950701e-05,
"loss": 3.6718,
"step": 42200
},
{
"epoch": 10.98,
"learning_rate": 3.1219512195121955e-05,
"loss": 3.6597,
"step": 42300
},
{
"epoch": 11.0,
"learning_rate": 3.11987545407369e-05,
"loss": 3.6492,
"step": 42400
},
{
"epoch": 11.03,
"learning_rate": 3.1177996886351845e-05,
"loss": 3.6484,
"step": 42500
},
{
"epoch": 11.05,
"learning_rate": 3.115723923196679e-05,
"loss": 3.6498,
"step": 42600
},
{
"epoch": 11.08,
"learning_rate": 3.1136481577581735e-05,
"loss": 3.6788,
"step": 42700
},
{
"epoch": 11.11,
"learning_rate": 3.111572392319669e-05,
"loss": 3.6653,
"step": 42800
},
{
"epoch": 11.13,
"learning_rate": 3.1094966268811625e-05,
"loss": 3.6476,
"step": 42900
},
{
"epoch": 11.16,
"learning_rate": 3.107420861442657e-05,
"loss": 3.671,
"step": 43000
},
{
"epoch": 11.18,
"learning_rate": 3.1053450960041515e-05,
"loss": 3.6251,
"step": 43100
},
{
"epoch": 11.21,
"learning_rate": 3.103269330565646e-05,
"loss": 3.6515,
"step": 43200
},
{
"epoch": 11.24,
"learning_rate": 3.1011935651271405e-05,
"loss": 3.6637,
"step": 43300
},
{
"epoch": 11.26,
"learning_rate": 3.099117799688635e-05,
"loss": 3.6481,
"step": 43400
},
{
"epoch": 11.29,
"learning_rate": 3.09704203425013e-05,
"loss": 3.6647,
"step": 43500
},
{
"epoch": 11.31,
"learning_rate": 3.0949662688116246e-05,
"loss": 3.6511,
"step": 43600
},
{
"epoch": 11.34,
"learning_rate": 3.092890503373119e-05,
"loss": 3.6438,
"step": 43700
},
{
"epoch": 11.36,
"learning_rate": 3.0908147379346136e-05,
"loss": 3.6742,
"step": 43800
},
{
"epoch": 11.39,
"learning_rate": 3.088738972496108e-05,
"loss": 3.6609,
"step": 43900
},
{
"epoch": 11.42,
"learning_rate": 3.0866632070576026e-05,
"loss": 3.6299,
"step": 44000
},
{
"epoch": 11.44,
"learning_rate": 3.084587441619098e-05,
"loss": 3.6595,
"step": 44100
},
{
"epoch": 11.47,
"learning_rate": 3.082511676180592e-05,
"loss": 3.6559,
"step": 44200
},
{
"epoch": 11.49,
"learning_rate": 3.080435910742087e-05,
"loss": 3.6838,
"step": 44300
},
{
"epoch": 11.52,
"learning_rate": 3.078360145303581e-05,
"loss": 3.667,
"step": 44400
},
{
"epoch": 11.55,
"learning_rate": 3.076284379865075e-05,
"loss": 3.6619,
"step": 44500
},
{
"epoch": 11.57,
"learning_rate": 3.0742086144265696e-05,
"loss": 3.6601,
"step": 44600
},
{
"epoch": 11.6,
"learning_rate": 3.072132848988064e-05,
"loss": 3.6608,
"step": 44700
},
{
"epoch": 11.62,
"learning_rate": 3.070057083549559e-05,
"loss": 3.654,
"step": 44800
},
{
"epoch": 11.65,
"learning_rate": 3.067981318111054e-05,
"loss": 3.6641,
"step": 44900
},
{
"epoch": 11.68,
"learning_rate": 3.065905552672548e-05,
"loss": 3.6526,
"step": 45000
},
{
"epoch": 11.68,
"eval_gen_len": 12.2807,
"eval_loss": 3.6362619400024414,
"eval_rouge1": 28.1357,
"eval_rouge2": 2.3033,
"eval_rougeL": 24.161,
"eval_rougeLsum": 24.1576,
"eval_runtime": 76.1206,
"eval_samples_per_second": 50.63,
"eval_steps_per_second": 6.332,
"step": 45000
},
{
"epoch": 11.7,
"learning_rate": 3.063829787234043e-05,
"loss": 3.6808,
"step": 45100
},
{
"epoch": 11.73,
"learning_rate": 3.061754021795537e-05,
"loss": 3.6443,
"step": 45200
},
{
"epoch": 11.75,
"learning_rate": 3.059678256357032e-05,
"loss": 3.6565,
"step": 45300
},
{
"epoch": 11.78,
"learning_rate": 3.057602490918527e-05,
"loss": 3.6611,
"step": 45400
},
{
"epoch": 11.81,
"learning_rate": 3.0555267254800214e-05,
"loss": 3.6532,
"step": 45500
},
{
"epoch": 11.83,
"learning_rate": 3.053450960041516e-05,
"loss": 3.655,
"step": 45600
},
{
"epoch": 11.86,
"learning_rate": 3.05137519460301e-05,
"loss": 3.6631,
"step": 45700
},
{
"epoch": 11.88,
"learning_rate": 3.0492994291645046e-05,
"loss": 3.6478,
"step": 45800
},
{
"epoch": 11.91,
"learning_rate": 3.047223663725999e-05,
"loss": 3.65,
"step": 45900
},
{
"epoch": 11.94,
"learning_rate": 3.0451478982874936e-05,
"loss": 3.6404,
"step": 46000
},
{
"epoch": 11.96,
"learning_rate": 3.0430721328489884e-05,
"loss": 3.6551,
"step": 46100
},
{
"epoch": 11.99,
"learning_rate": 3.040996367410483e-05,
"loss": 3.6547,
"step": 46200
},
{
"epoch": 12.01,
"learning_rate": 3.0389206019719774e-05,
"loss": 3.6557,
"step": 46300
},
{
"epoch": 12.04,
"learning_rate": 3.036844836533472e-05,
"loss": 3.6672,
"step": 46400
},
{
"epoch": 12.07,
"learning_rate": 3.0347690710949664e-05,
"loss": 3.6625,
"step": 46500
},
{
"epoch": 12.09,
"learning_rate": 3.032693305656461e-05,
"loss": 3.6332,
"step": 46600
},
{
"epoch": 12.12,
"learning_rate": 3.0306175402179554e-05,
"loss": 3.6408,
"step": 46700
},
{
"epoch": 12.14,
"learning_rate": 3.0285417747794502e-05,
"loss": 3.6518,
"step": 46800
},
{
"epoch": 12.17,
"learning_rate": 3.0264660093409447e-05,
"loss": 3.6416,
"step": 46900
},
{
"epoch": 12.2,
"learning_rate": 3.0243902439024392e-05,
"loss": 3.6405,
"step": 47000
},
{
"epoch": 12.22,
"learning_rate": 3.0223144784639337e-05,
"loss": 3.659,
"step": 47100
},
{
"epoch": 12.25,
"learning_rate": 3.0202387130254282e-05,
"loss": 3.6342,
"step": 47200
},
{
"epoch": 12.27,
"learning_rate": 3.0181629475869227e-05,
"loss": 3.6718,
"step": 47300
},
{
"epoch": 12.3,
"learning_rate": 3.0160871821484176e-05,
"loss": 3.6391,
"step": 47400
},
{
"epoch": 12.32,
"learning_rate": 3.014011416709912e-05,
"loss": 3.6564,
"step": 47500
},
{
"epoch": 12.35,
"learning_rate": 3.0119356512714066e-05,
"loss": 3.6481,
"step": 47600
},
{
"epoch": 12.38,
"learning_rate": 3.009859885832901e-05,
"loss": 3.6446,
"step": 47700
},
{
"epoch": 12.4,
"learning_rate": 3.0077841203943955e-05,
"loss": 3.6356,
"step": 47800
},
{
"epoch": 12.43,
"learning_rate": 3.00570835495589e-05,
"loss": 3.6563,
"step": 47900
},
{
"epoch": 12.45,
"learning_rate": 3.0036325895173845e-05,
"loss": 3.65,
"step": 48000
},
{
"epoch": 12.48,
"learning_rate": 3.0015568240788794e-05,
"loss": 3.6357,
"step": 48100
},
{
"epoch": 12.51,
"learning_rate": 2.999481058640374e-05,
"loss": 3.6648,
"step": 48200
},
{
"epoch": 12.53,
"learning_rate": 2.9974052932018684e-05,
"loss": 3.6636,
"step": 48300
},
{
"epoch": 12.56,
"learning_rate": 2.995329527763363e-05,
"loss": 3.6416,
"step": 48400
},
{
"epoch": 12.58,
"learning_rate": 2.9932537623248574e-05,
"loss": 3.638,
"step": 48500
},
{
"epoch": 12.61,
"learning_rate": 2.991177996886352e-05,
"loss": 3.6637,
"step": 48600
},
{
"epoch": 12.64,
"learning_rate": 2.9891022314478467e-05,
"loss": 3.6412,
"step": 48700
},
{
"epoch": 12.66,
"learning_rate": 2.9870264660093412e-05,
"loss": 3.6471,
"step": 48800
},
{
"epoch": 12.69,
"learning_rate": 2.9849507005708357e-05,
"loss": 3.6729,
"step": 48900
},
{
"epoch": 12.71,
"learning_rate": 2.9828749351323302e-05,
"loss": 3.6692,
"step": 49000
},
{
"epoch": 12.74,
"learning_rate": 2.9807991696938247e-05,
"loss": 3.6498,
"step": 49100
},
{
"epoch": 12.77,
"learning_rate": 2.9787234042553192e-05,
"loss": 3.6471,
"step": 49200
},
{
"epoch": 12.79,
"learning_rate": 2.9766476388168137e-05,
"loss": 3.6535,
"step": 49300
},
{
"epoch": 12.82,
"learning_rate": 2.9745718733783085e-05,
"loss": 3.6526,
"step": 49400
},
{
"epoch": 12.84,
"learning_rate": 2.972496107939803e-05,
"loss": 3.6555,
"step": 49500
},
{
"epoch": 12.87,
"learning_rate": 2.9704203425012975e-05,
"loss": 3.6368,
"step": 49600
},
{
"epoch": 12.9,
"learning_rate": 2.968344577062792e-05,
"loss": 3.6546,
"step": 49700
},
{
"epoch": 12.92,
"learning_rate": 2.9662688116242865e-05,
"loss": 3.6621,
"step": 49800
},
{
"epoch": 12.95,
"learning_rate": 2.964193046185781e-05,
"loss": 3.6332,
"step": 49900
},
{
"epoch": 12.97,
"learning_rate": 2.962117280747276e-05,
"loss": 3.652,
"step": 50000
},
{
"epoch": 12.97,
"eval_gen_len": 12.2789,
"eval_loss": 3.6313188076019287,
"eval_rouge1": 31.2625,
"eval_rouge2": 3.8392,
"eval_rougeL": 27.2814,
"eval_rougeLsum": 27.2856,
"eval_runtime": 76.4786,
"eval_samples_per_second": 50.393,
"eval_steps_per_second": 6.302,
"step": 50000
},
{
"epoch": 13.0,
"learning_rate": 2.9600415153087703e-05,
"loss": 3.6524,
"step": 50100
},
{
"epoch": 13.03,
"learning_rate": 2.9579657498702648e-05,
"loss": 3.666,
"step": 50200
},
{
"epoch": 13.05,
"learning_rate": 2.9558899844317593e-05,
"loss": 3.6342,
"step": 50300
},
{
"epoch": 13.08,
"learning_rate": 2.9538142189932538e-05,
"loss": 3.6535,
"step": 50400
},
{
"epoch": 13.1,
"learning_rate": 2.9517384535547483e-05,
"loss": 3.6558,
"step": 50500
},
{
"epoch": 13.13,
"learning_rate": 2.9496626881162428e-05,
"loss": 3.6452,
"step": 50600
},
{
"epoch": 13.16,
"learning_rate": 2.947586922677738e-05,
"loss": 3.6442,
"step": 50700
},
{
"epoch": 13.18,
"learning_rate": 2.945511157239232e-05,
"loss": 3.6576,
"step": 50800
},
{
"epoch": 13.21,
"learning_rate": 2.9434353918007266e-05,
"loss": 3.6663,
"step": 50900
},
{
"epoch": 13.23,
"learning_rate": 2.941359626362221e-05,
"loss": 3.6328,
"step": 51000
},
{
"epoch": 13.26,
"learning_rate": 2.9392838609237156e-05,
"loss": 3.6412,
"step": 51100
},
{
"epoch": 13.28,
"learning_rate": 2.93720809548521e-05,
"loss": 3.6603,
"step": 51200
},
{
"epoch": 13.31,
"learning_rate": 2.9351323300467053e-05,
"loss": 3.6423,
"step": 51300
},
{
"epoch": 13.34,
"learning_rate": 2.9330565646081998e-05,
"loss": 3.6527,
"step": 51400
},
{
"epoch": 13.36,
"learning_rate": 2.9309807991696943e-05,
"loss": 3.6655,
"step": 51500
},
{
"epoch": 13.39,
"learning_rate": 2.9289050337311885e-05,
"loss": 3.6603,
"step": 51600
},
{
"epoch": 13.41,
"learning_rate": 2.926829268292683e-05,
"loss": 3.6243,
"step": 51700
},
{
"epoch": 13.44,
"learning_rate": 2.9247535028541774e-05,
"loss": 3.6392,
"step": 51800
},
{
"epoch": 13.47,
"learning_rate": 2.922677737415672e-05,
"loss": 3.6298,
"step": 51900
},
{
"epoch": 13.49,
"learning_rate": 2.920601971977167e-05,
"loss": 3.6615,
"step": 52000
},
{
"epoch": 13.52,
"learning_rate": 2.9185262065386616e-05,
"loss": 3.6398,
"step": 52100
},
{
"epoch": 13.54,
"learning_rate": 2.916450441100156e-05,
"loss": 3.6652,
"step": 52200
},
{
"epoch": 13.57,
"learning_rate": 2.9143746756616506e-05,
"loss": 3.6702,
"step": 52300
},
{
"epoch": 13.6,
"learning_rate": 2.9122989102231448e-05,
"loss": 3.6696,
"step": 52400
},
{
"epoch": 13.62,
"learning_rate": 2.9102231447846393e-05,
"loss": 3.654,
"step": 52500
},
{
"epoch": 13.65,
"learning_rate": 2.9081473793461344e-05,
"loss": 3.6507,
"step": 52600
},
{
"epoch": 13.67,
"learning_rate": 2.906071613907629e-05,
"loss": 3.6341,
"step": 52700
},
{
"epoch": 13.7,
"learning_rate": 2.9039958484691234e-05,
"loss": 3.625,
"step": 52800
},
{
"epoch": 13.73,
"learning_rate": 2.901920083030618e-05,
"loss": 3.651,
"step": 52900
},
{
"epoch": 13.75,
"learning_rate": 2.8998443175921124e-05,
"loss": 3.6476,
"step": 53000
},
{
"epoch": 13.78,
"learning_rate": 2.8977685521536066e-05,
"loss": 3.6234,
"step": 53100
},
{
"epoch": 13.8,
"learning_rate": 2.895692786715101e-05,
"loss": 3.6568,
"step": 53200
},
{
"epoch": 13.83,
"learning_rate": 2.8936170212765963e-05,
"loss": 3.6294,
"step": 53300
},
{
"epoch": 13.86,
"learning_rate": 2.8915412558380908e-05,
"loss": 3.6513,
"step": 53400
},
{
"epoch": 13.88,
"learning_rate": 2.8894654903995852e-05,
"loss": 3.6455,
"step": 53500
},
{
"epoch": 13.91,
"learning_rate": 2.8873897249610797e-05,
"loss": 3.6467,
"step": 53600
},
{
"epoch": 13.93,
"learning_rate": 2.8853139595225742e-05,
"loss": 3.6241,
"step": 53700
},
{
"epoch": 13.96,
"learning_rate": 2.8832381940840687e-05,
"loss": 3.6388,
"step": 53800
},
{
"epoch": 13.99,
"learning_rate": 2.8811624286455636e-05,
"loss": 3.6207,
"step": 53900
},
{
"epoch": 14.01,
"learning_rate": 2.879086663207058e-05,
"loss": 3.665,
"step": 54000
},
{
"epoch": 14.04,
"learning_rate": 2.8770108977685526e-05,
"loss": 3.6486,
"step": 54100
},
{
"epoch": 14.06,
"learning_rate": 2.874935132330047e-05,
"loss": 3.6488,
"step": 54200
},
{
"epoch": 14.09,
"learning_rate": 2.8728593668915416e-05,
"loss": 3.6435,
"step": 54300
},
{
"epoch": 14.12,
"learning_rate": 2.870783601453036e-05,
"loss": 3.6244,
"step": 54400
},
{
"epoch": 14.14,
"learning_rate": 2.8687078360145306e-05,
"loss": 3.6598,
"step": 54500
},
{
"epoch": 14.17,
"learning_rate": 2.8666320705760254e-05,
"loss": 3.6124,
"step": 54600
},
{
"epoch": 14.19,
"learning_rate": 2.86455630513752e-05,
"loss": 3.6635,
"step": 54700
},
{
"epoch": 14.22,
"learning_rate": 2.8624805396990144e-05,
"loss": 3.6364,
"step": 54800
},
{
"epoch": 14.24,
"learning_rate": 2.860404774260509e-05,
"loss": 3.6522,
"step": 54900
},
{
"epoch": 14.27,
"learning_rate": 2.8583290088220034e-05,
"loss": 3.6544,
"step": 55000
},
{
"epoch": 14.27,
"eval_gen_len": 12.2802,
"eval_loss": 3.628124713897705,
"eval_rouge1": 31.4632,
"eval_rouge2": 4.1625,
"eval_rougeL": 27.459,
"eval_rougeLsum": 27.4835,
"eval_runtime": 76.195,
"eval_samples_per_second": 50.581,
"eval_steps_per_second": 6.326,
"step": 55000
},
{
"epoch": 14.3,
"learning_rate": 2.856253243383498e-05,
"loss": 3.6345,
"step": 55100
},
{
"epoch": 14.32,
"learning_rate": 2.8541774779449927e-05,
"loss": 3.6543,
"step": 55200
},
{
"epoch": 14.35,
"learning_rate": 2.8521017125064872e-05,
"loss": 3.6405,
"step": 55300
},
{
"epoch": 14.37,
"learning_rate": 2.8500259470679817e-05,
"loss": 3.6421,
"step": 55400
},
{
"epoch": 14.4,
"learning_rate": 2.8479501816294762e-05,
"loss": 3.6653,
"step": 55500
},
{
"epoch": 14.43,
"learning_rate": 2.8458744161909707e-05,
"loss": 3.6695,
"step": 55600
},
{
"epoch": 14.45,
"learning_rate": 2.8437986507524652e-05,
"loss": 3.6273,
"step": 55700
},
{
"epoch": 14.48,
"learning_rate": 2.8417228853139597e-05,
"loss": 3.6402,
"step": 55800
},
{
"epoch": 14.5,
"learning_rate": 2.8396471198754545e-05,
"loss": 3.6508,
"step": 55900
},
{
"epoch": 14.53,
"learning_rate": 2.837571354436949e-05,
"loss": 3.6514,
"step": 56000
},
{
"epoch": 14.56,
"learning_rate": 2.8354955889984435e-05,
"loss": 3.6448,
"step": 56100
},
{
"epoch": 14.58,
"learning_rate": 2.833419823559938e-05,
"loss": 3.6388,
"step": 56200
},
{
"epoch": 14.61,
"learning_rate": 2.8313440581214325e-05,
"loss": 3.632,
"step": 56300
},
{
"epoch": 14.63,
"learning_rate": 2.829268292682927e-05,
"loss": 3.6273,
"step": 56400
},
{
"epoch": 14.66,
"learning_rate": 2.8271925272444215e-05,
"loss": 3.6467,
"step": 56500
},
{
"epoch": 14.69,
"learning_rate": 2.8251167618059163e-05,
"loss": 3.6213,
"step": 56600
},
{
"epoch": 14.71,
"learning_rate": 2.823040996367411e-05,
"loss": 3.6341,
"step": 56700
},
{
"epoch": 14.74,
"learning_rate": 2.8209652309289053e-05,
"loss": 3.6443,
"step": 56800
},
{
"epoch": 14.76,
"learning_rate": 2.8188894654904e-05,
"loss": 3.6283,
"step": 56900
},
{
"epoch": 14.79,
"learning_rate": 2.8168137000518943e-05,
"loss": 3.6681,
"step": 57000
},
{
"epoch": 14.82,
"learning_rate": 2.8147379346133888e-05,
"loss": 3.6549,
"step": 57100
},
{
"epoch": 14.84,
"learning_rate": 2.8126621691748837e-05,
"loss": 3.6512,
"step": 57200
},
{
"epoch": 14.87,
"learning_rate": 2.810586403736378e-05,
"loss": 3.6574,
"step": 57300
},
{
"epoch": 14.89,
"learning_rate": 2.8085106382978727e-05,
"loss": 3.6357,
"step": 57400
},
{
"epoch": 14.92,
"learning_rate": 2.806434872859367e-05,
"loss": 3.6537,
"step": 57500
},
{
"epoch": 14.95,
"learning_rate": 2.8043591074208617e-05,
"loss": 3.6414,
"step": 57600
},
{
"epoch": 14.97,
"learning_rate": 2.802283341982356e-05,
"loss": 3.6496,
"step": 57700
},
{
"epoch": 15.0,
"learning_rate": 2.8002075765438506e-05,
"loss": 3.6444,
"step": 57800
},
{
"epoch": 15.02,
"learning_rate": 2.7981318111053455e-05,
"loss": 3.652,
"step": 57900
},
{
"epoch": 15.05,
"learning_rate": 2.79605604566684e-05,
"loss": 3.6259,
"step": 58000
},
{
"epoch": 15.08,
"learning_rate": 2.7939802802283345e-05,
"loss": 3.6478,
"step": 58100
},
{
"epoch": 15.1,
"learning_rate": 2.791904514789829e-05,
"loss": 3.6543,
"step": 58200
},
{
"epoch": 15.13,
"learning_rate": 2.7898287493513235e-05,
"loss": 3.6468,
"step": 58300
},
{
"epoch": 15.15,
"learning_rate": 2.787752983912818e-05,
"loss": 3.6576,
"step": 58400
},
{
"epoch": 15.18,
"learning_rate": 2.7856772184743128e-05,
"loss": 3.6458,
"step": 58500
},
{
"epoch": 15.2,
"learning_rate": 2.7836014530358073e-05,
"loss": 3.6502,
"step": 58600
},
{
"epoch": 15.23,
"learning_rate": 2.7815256875973018e-05,
"loss": 3.6211,
"step": 58700
},
{
"epoch": 15.26,
"learning_rate": 2.7794499221587963e-05,
"loss": 3.645,
"step": 58800
},
{
"epoch": 15.28,
"learning_rate": 2.7773741567202908e-05,
"loss": 3.6441,
"step": 58900
},
{
"epoch": 15.31,
"learning_rate": 2.7752983912817853e-05,
"loss": 3.6321,
"step": 59000
},
{
"epoch": 15.33,
"learning_rate": 2.7732226258432798e-05,
"loss": 3.6391,
"step": 59100
},
{
"epoch": 15.36,
"learning_rate": 2.7711468604047746e-05,
"loss": 3.6461,
"step": 59200
},
{
"epoch": 15.39,
"learning_rate": 2.769071094966269e-05,
"loss": 3.619,
"step": 59300
},
{
"epoch": 15.41,
"learning_rate": 2.7669953295277636e-05,
"loss": 3.6244,
"step": 59400
},
{
"epoch": 15.44,
"learning_rate": 2.764919564089258e-05,
"loss": 3.664,
"step": 59500
},
{
"epoch": 15.46,
"learning_rate": 2.7628437986507526e-05,
"loss": 3.6522,
"step": 59600
},
{
"epoch": 15.49,
"learning_rate": 2.760768033212247e-05,
"loss": 3.6308,
"step": 59700
},
{
"epoch": 15.52,
"learning_rate": 2.758692267773742e-05,
"loss": 3.6505,
"step": 59800
},
{
"epoch": 15.54,
"learning_rate": 2.7566165023352364e-05,
"loss": 3.6457,
"step": 59900
},
{
"epoch": 15.57,
"learning_rate": 2.754540736896731e-05,
"loss": 3.6381,
"step": 60000
},
{
"epoch": 15.57,
"eval_gen_len": 12.2794,
"eval_loss": 3.6244375705718994,
"eval_rouge1": 34.0589,
"eval_rouge2": 5.2472,
"eval_rougeL": 30.0323,
"eval_rougeLsum": 30.0873,
"eval_runtime": 76.347,
"eval_samples_per_second": 50.48,
"eval_steps_per_second": 6.313,
"step": 60000
},
{
"epoch": 15.59,
"learning_rate": 2.7524649714582254e-05,
"loss": 3.6259,
"step": 60100
},
{
"epoch": 15.62,
"learning_rate": 2.75038920601972e-05,
"loss": 3.6488,
"step": 60200
},
{
"epoch": 15.65,
"learning_rate": 2.7483134405812144e-05,
"loss": 3.6415,
"step": 60300
},
{
"epoch": 15.67,
"learning_rate": 2.746237675142709e-05,
"loss": 3.6595,
"step": 60400
},
{
"epoch": 15.7,
"learning_rate": 2.7441619097042038e-05,
"loss": 3.636,
"step": 60500
},
{
"epoch": 15.72,
"learning_rate": 2.7420861442656982e-05,
"loss": 3.6107,
"step": 60600
},
{
"epoch": 15.75,
"learning_rate": 2.7400103788271927e-05,
"loss": 3.6465,
"step": 60700
},
{
"epoch": 15.78,
"learning_rate": 2.7379346133886872e-05,
"loss": 3.6097,
"step": 60800
},
{
"epoch": 15.8,
"learning_rate": 2.7358588479501817e-05,
"loss": 3.6584,
"step": 60900
},
{
"epoch": 15.83,
"learning_rate": 2.7337830825116762e-05,
"loss": 3.6508,
"step": 61000
},
{
"epoch": 15.85,
"learning_rate": 2.731707317073171e-05,
"loss": 3.6474,
"step": 61100
},
{
"epoch": 15.88,
"learning_rate": 2.7296315516346656e-05,
"loss": 3.5899,
"step": 61200
},
{
"epoch": 15.91,
"learning_rate": 2.72755578619616e-05,
"loss": 3.624,
"step": 61300
},
{
"epoch": 15.93,
"learning_rate": 2.7254800207576546e-05,
"loss": 3.6318,
"step": 61400
},
{
"epoch": 15.96,
"learning_rate": 2.723404255319149e-05,
"loss": 3.6407,
"step": 61500
},
{
"epoch": 15.98,
"learning_rate": 2.7213284898806436e-05,
"loss": 3.6605,
"step": 61600
},
{
"epoch": 16.01,
"learning_rate": 2.719252724442138e-05,
"loss": 3.6452,
"step": 61700
},
{
"epoch": 16.04,
"learning_rate": 2.717176959003633e-05,
"loss": 3.646,
"step": 61800
},
{
"epoch": 16.06,
"learning_rate": 2.7151011935651274e-05,
"loss": 3.6475,
"step": 61900
},
{
"epoch": 16.09,
"learning_rate": 2.713025428126622e-05,
"loss": 3.6483,
"step": 62000
},
{
"epoch": 16.11,
"learning_rate": 2.7109496626881164e-05,
"loss": 3.6051,
"step": 62100
},
{
"epoch": 16.14,
"learning_rate": 2.708873897249611e-05,
"loss": 3.6369,
"step": 62200
},
{
"epoch": 16.17,
"learning_rate": 2.7067981318111054e-05,
"loss": 3.6364,
"step": 62300
},
{
"epoch": 16.19,
"learning_rate": 2.7047223663726002e-05,
"loss": 3.6398,
"step": 62400
},
{
"epoch": 16.22,
"learning_rate": 2.7026466009340947e-05,
"loss": 3.6348,
"step": 62500
},
{
"epoch": 16.24,
"learning_rate": 2.7005708354955892e-05,
"loss": 3.6484,
"step": 62600
},
{
"epoch": 16.27,
"learning_rate": 2.6984950700570837e-05,
"loss": 3.6168,
"step": 62700
},
{
"epoch": 16.29,
"learning_rate": 2.6964193046185782e-05,
"loss": 3.6328,
"step": 62800
},
{
"epoch": 16.32,
"learning_rate": 2.6943435391800727e-05,
"loss": 3.6456,
"step": 62900
},
{
"epoch": 16.35,
"learning_rate": 2.6922677737415672e-05,
"loss": 3.6292,
"step": 63000
},
{
"epoch": 16.37,
"learning_rate": 2.690192008303062e-05,
"loss": 3.6523,
"step": 63100
},
{
"epoch": 16.4,
"learning_rate": 2.6881162428645565e-05,
"loss": 3.6296,
"step": 63200
},
{
"epoch": 16.42,
"learning_rate": 2.686040477426051e-05,
"loss": 3.6355,
"step": 63300
},
{
"epoch": 16.45,
"learning_rate": 2.6839647119875455e-05,
"loss": 3.627,
"step": 63400
},
{
"epoch": 16.48,
"learning_rate": 2.68188894654904e-05,
"loss": 3.6334,
"step": 63500
},
{
"epoch": 16.5,
"learning_rate": 2.6798131811105345e-05,
"loss": 3.6365,
"step": 63600
},
{
"epoch": 16.53,
"learning_rate": 2.6777374156720293e-05,
"loss": 3.6399,
"step": 63700
},
{
"epoch": 16.55,
"learning_rate": 2.675661650233524e-05,
"loss": 3.6597,
"step": 63800
},
{
"epoch": 16.58,
"learning_rate": 2.6735858847950183e-05,
"loss": 3.6349,
"step": 63900
},
{
"epoch": 16.61,
"learning_rate": 2.671510119356513e-05,
"loss": 3.6331,
"step": 64000
},
{
"epoch": 16.63,
"learning_rate": 2.6694343539180073e-05,
"loss": 3.6382,
"step": 64100
},
{
"epoch": 16.66,
"learning_rate": 2.6673585884795018e-05,
"loss": 3.6401,
"step": 64200
},
{
"epoch": 16.68,
"learning_rate": 2.6652828230409963e-05,
"loss": 3.6546,
"step": 64300
},
{
"epoch": 16.71,
"learning_rate": 2.663207057602491e-05,
"loss": 3.6492,
"step": 64400
},
{
"epoch": 16.74,
"learning_rate": 2.6611312921639857e-05,
"loss": 3.6681,
"step": 64500
},
{
"epoch": 16.76,
"learning_rate": 2.65905552672548e-05,
"loss": 3.6566,
"step": 64600
},
{
"epoch": 16.79,
"learning_rate": 2.6569797612869746e-05,
"loss": 3.6528,
"step": 64700
},
{
"epoch": 16.81,
"learning_rate": 2.654903995848469e-05,
"loss": 3.6492,
"step": 64800
},
{
"epoch": 16.84,
"learning_rate": 2.6528282304099636e-05,
"loss": 3.6372,
"step": 64900
},
{
"epoch": 16.87,
"learning_rate": 2.6507524649714588e-05,
"loss": 3.63,
"step": 65000
},
{
"epoch": 16.87,
"eval_gen_len": 12.2807,
"eval_loss": 3.6208901405334473,
"eval_rouge1": 35.2903,
"eval_rouge2": 5.4605,
"eval_rougeL": 31.2651,
"eval_rougeLsum": 31.3001,
"eval_runtime": 76.0126,
"eval_samples_per_second": 50.702,
"eval_steps_per_second": 6.341,
"step": 65000
},
{
"epoch": 16.89,
"learning_rate": 2.648676699532953e-05,
"loss": 3.6417,
"step": 65100
},
{
"epoch": 16.92,
"learning_rate": 2.6466009340944475e-05,
"loss": 3.6349,
"step": 65200
},
{
"epoch": 16.94,
"learning_rate": 2.644525168655942e-05,
"loss": 3.6443,
"step": 65300
},
{
"epoch": 16.97,
"learning_rate": 2.6424494032174365e-05,
"loss": 3.639,
"step": 65400
},
{
"epoch": 17.0,
"learning_rate": 2.640373637778931e-05,
"loss": 3.6413,
"step": 65500
},
{
"epoch": 17.02,
"learning_rate": 2.6382978723404255e-05,
"loss": 3.646,
"step": 65600
},
{
"epoch": 17.05,
"learning_rate": 2.6362221069019206e-05,
"loss": 3.6391,
"step": 65700
},
{
"epoch": 17.07,
"learning_rate": 2.634146341463415e-05,
"loss": 3.6542,
"step": 65800
},
{
"epoch": 17.1,
"learning_rate": 2.6320705760249093e-05,
"loss": 3.6401,
"step": 65900
},
{
"epoch": 17.13,
"learning_rate": 2.6299948105864038e-05,
"loss": 3.6508,
"step": 66000
},
{
"epoch": 17.15,
"learning_rate": 2.6279190451478983e-05,
"loss": 3.6418,
"step": 66100
},
{
"epoch": 17.18,
"learning_rate": 2.6258432797093928e-05,
"loss": 3.6355,
"step": 66200
},
{
"epoch": 17.2,
"learning_rate": 2.6237675142708873e-05,
"loss": 3.628,
"step": 66300
},
{
"epoch": 17.23,
"learning_rate": 2.6216917488323825e-05,
"loss": 3.6268,
"step": 66400
},
{
"epoch": 17.25,
"learning_rate": 2.619615983393877e-05,
"loss": 3.6455,
"step": 66500
},
{
"epoch": 17.28,
"learning_rate": 2.6175402179553714e-05,
"loss": 3.6476,
"step": 66600
},
{
"epoch": 17.31,
"learning_rate": 2.6154644525168656e-05,
"loss": 3.62,
"step": 66700
},
{
"epoch": 17.33,
"learning_rate": 2.61338868707836e-05,
"loss": 3.6338,
"step": 66800
},
{
"epoch": 17.36,
"learning_rate": 2.6113129216398546e-05,
"loss": 3.6408,
"step": 66900
},
{
"epoch": 17.38,
"learning_rate": 2.6092371562013498e-05,
"loss": 3.6332,
"step": 67000
},
{
"epoch": 17.41,
"learning_rate": 2.6071613907628443e-05,
"loss": 3.6388,
"step": 67100
},
{
"epoch": 17.44,
"learning_rate": 2.6050856253243388e-05,
"loss": 3.6309,
"step": 67200
},
{
"epoch": 17.46,
"learning_rate": 2.6030098598858333e-05,
"loss": 3.6142,
"step": 67300
},
{
"epoch": 17.49,
"learning_rate": 2.6009340944473274e-05,
"loss": 3.6209,
"step": 67400
},
{
"epoch": 17.51,
"learning_rate": 2.598858329008822e-05,
"loss": 3.6563,
"step": 67500
},
{
"epoch": 17.54,
"learning_rate": 2.5967825635703164e-05,
"loss": 3.6453,
"step": 67600
},
{
"epoch": 17.57,
"learning_rate": 2.5947067981318116e-05,
"loss": 3.6369,
"step": 67700
},
{
"epoch": 17.59,
"learning_rate": 2.592631032693306e-05,
"loss": 3.6374,
"step": 67800
},
{
"epoch": 17.62,
"learning_rate": 2.5905552672548006e-05,
"loss": 3.6093,
"step": 67900
},
{
"epoch": 17.64,
"learning_rate": 2.588479501816295e-05,
"loss": 3.6268,
"step": 68000
},
{
"epoch": 17.67,
"learning_rate": 2.5864037363777896e-05,
"loss": 3.6367,
"step": 68100
},
{
"epoch": 17.7,
"learning_rate": 2.5843279709392837e-05,
"loss": 3.6321,
"step": 68200
},
{
"epoch": 17.72,
"learning_rate": 2.582252205500779e-05,
"loss": 3.6407,
"step": 68300
},
{
"epoch": 17.75,
"learning_rate": 2.5801764400622734e-05,
"loss": 3.643,
"step": 68400
},
{
"epoch": 17.77,
"learning_rate": 2.578100674623768e-05,
"loss": 3.6505,
"step": 68500
},
{
"epoch": 17.8,
"learning_rate": 2.5760249091852624e-05,
"loss": 3.6214,
"step": 68600
},
{
"epoch": 17.83,
"learning_rate": 2.573949143746757e-05,
"loss": 3.6302,
"step": 68700
},
{
"epoch": 17.85,
"learning_rate": 2.5718733783082514e-05,
"loss": 3.6455,
"step": 68800
},
{
"epoch": 17.88,
"learning_rate": 2.5697976128697455e-05,
"loss": 3.6369,
"step": 68900
},
{
"epoch": 17.9,
"learning_rate": 2.5677218474312407e-05,
"loss": 3.6254,
"step": 69000
},
{
"epoch": 17.93,
"learning_rate": 2.5656460819927352e-05,
"loss": 3.6399,
"step": 69100
},
{
"epoch": 17.96,
"learning_rate": 2.5635703165542297e-05,
"loss": 3.6317,
"step": 69200
},
{
"epoch": 17.98,
"learning_rate": 2.5614945511157242e-05,
"loss": 3.6274,
"step": 69300
},
{
"epoch": 18.01,
"learning_rate": 2.5594187856772187e-05,
"loss": 3.6413,
"step": 69400
},
{
"epoch": 18.03,
"learning_rate": 2.5573430202387132e-05,
"loss": 3.5785,
"step": 69500
},
{
"epoch": 18.06,
"learning_rate": 2.555267254800208e-05,
"loss": 3.6545,
"step": 69600
},
{
"epoch": 18.09,
"learning_rate": 2.5531914893617025e-05,
"loss": 3.6421,
"step": 69700
},
{
"epoch": 18.11,
"learning_rate": 2.551115723923197e-05,
"loss": 3.6563,
"step": 69800
},
{
"epoch": 18.14,
"learning_rate": 2.5490399584846915e-05,
"loss": 3.6307,
"step": 69900
},
{
"epoch": 18.16,
"learning_rate": 2.546964193046186e-05,
"loss": 3.6466,
"step": 70000
},
{
"epoch": 18.16,
"eval_gen_len": 12.2802,
"eval_loss": 3.6190104484558105,
"eval_rouge1": 35.9674,
"eval_rouge2": 5.5141,
"eval_rougeL": 31.9434,
"eval_rougeLsum": 31.9335,
"eval_runtime": 75.2606,
"eval_samples_per_second": 51.209,
"eval_steps_per_second": 6.404,
"step": 70000
},
{
"epoch": 18.19,
"learning_rate": 2.5448884276076805e-05,
"loss": 3.6478,
"step": 70100
},
{
"epoch": 18.21,
"learning_rate": 2.542812662169175e-05,
"loss": 3.6251,
"step": 70200
},
{
"epoch": 18.24,
"learning_rate": 2.54073689673067e-05,
"loss": 3.6281,
"step": 70300
},
{
"epoch": 18.27,
"learning_rate": 2.5386611312921644e-05,
"loss": 3.6251,
"step": 70400
},
{
"epoch": 18.29,
"learning_rate": 2.536585365853659e-05,
"loss": 3.6252,
"step": 70500
},
{
"epoch": 18.32,
"learning_rate": 2.5345096004151533e-05,
"loss": 3.6214,
"step": 70600
},
{
"epoch": 18.34,
"learning_rate": 2.532433834976648e-05,
"loss": 3.6205,
"step": 70700
},
{
"epoch": 18.37,
"learning_rate": 2.5303580695381423e-05,
"loss": 3.6499,
"step": 70800
},
{
"epoch": 18.4,
"learning_rate": 2.5282823040996372e-05,
"loss": 3.6373,
"step": 70900
},
{
"epoch": 18.42,
"learning_rate": 2.5262065386611317e-05,
"loss": 3.6431,
"step": 71000
},
{
"epoch": 18.45,
"learning_rate": 2.5241307732226262e-05,
"loss": 3.6412,
"step": 71100
},
{
"epoch": 18.47,
"learning_rate": 2.5220550077841207e-05,
"loss": 3.6491,
"step": 71200
},
{
"epoch": 18.5,
"learning_rate": 2.519979242345615e-05,
"loss": 3.6163,
"step": 71300
},
{
"epoch": 18.53,
"learning_rate": 2.5179034769071097e-05,
"loss": 3.6328,
"step": 71400
},
{
"epoch": 18.55,
"learning_rate": 2.515827711468604e-05,
"loss": 3.623,
"step": 71500
},
{
"epoch": 18.58,
"learning_rate": 2.513751946030099e-05,
"loss": 3.6301,
"step": 71600
},
{
"epoch": 18.6,
"learning_rate": 2.5116761805915935e-05,
"loss": 3.6226,
"step": 71700
},
{
"epoch": 18.63,
"learning_rate": 2.509600415153088e-05,
"loss": 3.6297,
"step": 71800
},
{
"epoch": 18.66,
"learning_rate": 2.5075246497145825e-05,
"loss": 3.6523,
"step": 71900
},
{
"epoch": 18.68,
"learning_rate": 2.505448884276077e-05,
"loss": 3.6711,
"step": 72000
},
{
"epoch": 18.71,
"learning_rate": 2.5033731188375715e-05,
"loss": 3.6309,
"step": 72100
},
{
"epoch": 18.73,
"learning_rate": 2.5012973533990663e-05,
"loss": 3.6492,
"step": 72200
},
{
"epoch": 18.76,
"learning_rate": 2.4992215879605608e-05,
"loss": 3.6338,
"step": 72300
},
{
"epoch": 18.79,
"learning_rate": 2.4971458225220553e-05,
"loss": 3.6364,
"step": 72400
},
{
"epoch": 18.81,
"learning_rate": 2.4950700570835498e-05,
"loss": 3.634,
"step": 72500
},
{
"epoch": 18.84,
"learning_rate": 2.4929942916450443e-05,
"loss": 3.6298,
"step": 72600
},
{
"epoch": 18.86,
"learning_rate": 2.4909185262065388e-05,
"loss": 3.6512,
"step": 72700
},
{
"epoch": 18.89,
"learning_rate": 2.4888427607680333e-05,
"loss": 3.6493,
"step": 72800
},
{
"epoch": 18.92,
"learning_rate": 2.486766995329528e-05,
"loss": 3.6528,
"step": 72900
},
{
"epoch": 18.94,
"learning_rate": 2.4846912298910226e-05,
"loss": 3.6366,
"step": 73000
},
{
"epoch": 18.97,
"learning_rate": 2.482615464452517e-05,
"loss": 3.6066,
"step": 73100
},
{
"epoch": 18.99,
"learning_rate": 2.4805396990140116e-05,
"loss": 3.6273,
"step": 73200
},
{
"epoch": 19.02,
"learning_rate": 2.478463933575506e-05,
"loss": 3.6446,
"step": 73300
},
{
"epoch": 19.05,
"learning_rate": 2.4763881681370006e-05,
"loss": 3.6293,
"step": 73400
},
{
"epoch": 19.07,
"learning_rate": 2.4743124026984954e-05,
"loss": 3.6043,
"step": 73500
},
{
"epoch": 19.1,
"learning_rate": 2.47223663725999e-05,
"loss": 3.6257,
"step": 73600
},
{
"epoch": 19.12,
"learning_rate": 2.4701608718214844e-05,
"loss": 3.6326,
"step": 73700
},
{
"epoch": 19.15,
"learning_rate": 2.468085106382979e-05,
"loss": 3.6104,
"step": 73800
},
{
"epoch": 19.17,
"learning_rate": 2.4660093409444734e-05,
"loss": 3.6372,
"step": 73900
},
{
"epoch": 19.2,
"learning_rate": 2.463933575505968e-05,
"loss": 3.6068,
"step": 74000
},
{
"epoch": 19.23,
"learning_rate": 2.4618578100674624e-05,
"loss": 3.6266,
"step": 74100
},
{
"epoch": 19.25,
"learning_rate": 2.4597820446289573e-05,
"loss": 3.63,
"step": 74200
},
{
"epoch": 19.28,
"learning_rate": 2.4577062791904518e-05,
"loss": 3.6421,
"step": 74300
},
{
"epoch": 19.3,
"learning_rate": 2.4556305137519463e-05,
"loss": 3.6375,
"step": 74400
},
{
"epoch": 19.33,
"learning_rate": 2.4535547483134408e-05,
"loss": 3.6254,
"step": 74500
},
{
"epoch": 19.36,
"learning_rate": 2.4514789828749353e-05,
"loss": 3.6355,
"step": 74600
},
{
"epoch": 19.38,
"learning_rate": 2.4494032174364297e-05,
"loss": 3.6511,
"step": 74700
},
{
"epoch": 19.41,
"learning_rate": 2.4473274519979246e-05,
"loss": 3.6639,
"step": 74800
},
{
"epoch": 19.43,
"learning_rate": 2.445251686559419e-05,
"loss": 3.6405,
"step": 74900
},
{
"epoch": 19.46,
"learning_rate": 2.4431759211209136e-05,
"loss": 3.6307,
"step": 75000
},
{
"epoch": 19.46,
"eval_gen_len": 12.2797,
"eval_loss": 3.616529703140259,
"eval_rouge1": 37.1937,
"eval_rouge2": 6.2828,
"eval_rougeL": 33.1867,
"eval_rougeLsum": 33.1809,
"eval_runtime": 75.7672,
"eval_samples_per_second": 50.866,
"eval_steps_per_second": 6.362,
"step": 75000
},
{
"epoch": 19.49,
"learning_rate": 2.441100155682408e-05,
"loss": 3.6128,
"step": 75100
},
{
"epoch": 19.51,
"learning_rate": 2.4390243902439026e-05,
"loss": 3.6349,
"step": 75200
},
{
"epoch": 19.54,
"learning_rate": 2.436948624805397e-05,
"loss": 3.6422,
"step": 75300
},
{
"epoch": 19.56,
"learning_rate": 2.4348728593668916e-05,
"loss": 3.6391,
"step": 75400
},
{
"epoch": 19.59,
"learning_rate": 2.4327970939283864e-05,
"loss": 3.6343,
"step": 75500
},
{
"epoch": 19.62,
"learning_rate": 2.430721328489881e-05,
"loss": 3.6291,
"step": 75600
},
{
"epoch": 19.64,
"learning_rate": 2.4286455630513754e-05,
"loss": 3.6087,
"step": 75700
},
{
"epoch": 19.67,
"learning_rate": 2.42656979761287e-05,
"loss": 3.6324,
"step": 75800
},
{
"epoch": 19.69,
"learning_rate": 2.4244940321743644e-05,
"loss": 3.648,
"step": 75900
},
{
"epoch": 19.72,
"learning_rate": 2.422418266735859e-05,
"loss": 3.6319,
"step": 76000
},
{
"epoch": 19.75,
"learning_rate": 2.4203425012973534e-05,
"loss": 3.6331,
"step": 76100
},
{
"epoch": 19.77,
"learning_rate": 2.4182667358588482e-05,
"loss": 3.6391,
"step": 76200
},
{
"epoch": 19.8,
"learning_rate": 2.4161909704203427e-05,
"loss": 3.6544,
"step": 76300
},
{
"epoch": 19.82,
"learning_rate": 2.4141152049818372e-05,
"loss": 3.6293,
"step": 76400
},
{
"epoch": 19.85,
"learning_rate": 2.4120394395433317e-05,
"loss": 3.6295,
"step": 76500
},
{
"epoch": 19.88,
"learning_rate": 2.4099636741048262e-05,
"loss": 3.6216,
"step": 76600
},
{
"epoch": 19.9,
"learning_rate": 2.4078879086663207e-05,
"loss": 3.6345,
"step": 76700
},
{
"epoch": 19.93,
"learning_rate": 2.4058121432278155e-05,
"loss": 3.6347,
"step": 76800
},
{
"epoch": 19.95,
"learning_rate": 2.40373637778931e-05,
"loss": 3.6191,
"step": 76900
},
{
"epoch": 19.98,
"learning_rate": 2.4016606123508045e-05,
"loss": 3.6214,
"step": 77000
},
{
"epoch": 20.01,
"learning_rate": 2.399584846912299e-05,
"loss": 3.6456,
"step": 77100
},
{
"epoch": 20.03,
"learning_rate": 2.3975090814737935e-05,
"loss": 3.6474,
"step": 77200
},
{
"epoch": 20.06,
"learning_rate": 2.395433316035288e-05,
"loss": 3.6347,
"step": 77300
},
{
"epoch": 20.08,
"learning_rate": 2.3933575505967825e-05,
"loss": 3.626,
"step": 77400
},
{
"epoch": 20.11,
"learning_rate": 2.3912817851582774e-05,
"loss": 3.6323,
"step": 77500
},
{
"epoch": 20.13,
"learning_rate": 2.389206019719772e-05,
"loss": 3.6282,
"step": 77600
},
{
"epoch": 20.16,
"learning_rate": 2.3871302542812663e-05,
"loss": 3.6296,
"step": 77700
},
{
"epoch": 20.19,
"learning_rate": 2.385054488842761e-05,
"loss": 3.6191,
"step": 77800
},
{
"epoch": 20.21,
"learning_rate": 2.3829787234042553e-05,
"loss": 3.6191,
"step": 77900
},
{
"epoch": 20.24,
"learning_rate": 2.38090295796575e-05,
"loss": 3.6518,
"step": 78000
},
{
"epoch": 20.26,
"learning_rate": 2.3788271925272447e-05,
"loss": 3.6158,
"step": 78100
},
{
"epoch": 20.29,
"learning_rate": 2.3767514270887392e-05,
"loss": 3.5855,
"step": 78200
},
{
"epoch": 20.32,
"learning_rate": 2.3746756616502337e-05,
"loss": 3.6417,
"step": 78300
},
{
"epoch": 20.34,
"learning_rate": 2.372599896211728e-05,
"loss": 3.5877,
"step": 78400
},
{
"epoch": 20.37,
"learning_rate": 2.3705241307732227e-05,
"loss": 3.6276,
"step": 78500
},
{
"epoch": 20.39,
"learning_rate": 2.368448365334717e-05,
"loss": 3.6331,
"step": 78600
},
{
"epoch": 20.42,
"learning_rate": 2.3663725998962117e-05,
"loss": 3.6065,
"step": 78700
},
{
"epoch": 20.45,
"learning_rate": 2.3642968344577065e-05,
"loss": 3.6643,
"step": 78800
},
{
"epoch": 20.47,
"learning_rate": 2.362221069019201e-05,
"loss": 3.6337,
"step": 78900
},
{
"epoch": 20.5,
"learning_rate": 2.3601453035806955e-05,
"loss": 3.6463,
"step": 79000
},
{
"epoch": 20.52,
"learning_rate": 2.35806953814219e-05,
"loss": 3.646,
"step": 79100
},
{
"epoch": 20.55,
"learning_rate": 2.3559937727036845e-05,
"loss": 3.6284,
"step": 79200
},
{
"epoch": 20.58,
"learning_rate": 2.353918007265179e-05,
"loss": 3.6241,
"step": 79300
},
{
"epoch": 20.6,
"learning_rate": 2.3518422418266738e-05,
"loss": 3.6432,
"step": 79400
},
{
"epoch": 20.63,
"learning_rate": 2.3497664763881683e-05,
"loss": 3.5956,
"step": 79500
},
{
"epoch": 20.65,
"learning_rate": 2.3476907109496628e-05,
"loss": 3.6143,
"step": 79600
},
{
"epoch": 20.68,
"learning_rate": 2.3456149455111573e-05,
"loss": 3.6383,
"step": 79700
},
{
"epoch": 20.71,
"learning_rate": 2.3435391800726518e-05,
"loss": 3.6151,
"step": 79800
},
{
"epoch": 20.73,
"learning_rate": 2.3414634146341463e-05,
"loss": 3.6192,
"step": 79900
},
{
"epoch": 20.76,
"learning_rate": 2.3393876491956408e-05,
"loss": 3.6267,
"step": 80000
},
{
"epoch": 20.76,
"eval_gen_len": 12.2774,
"eval_loss": 3.6140060424804688,
"eval_rouge1": 37.7966,
"eval_rouge2": 6.6257,
"eval_rougeL": 33.7751,
"eval_rougeLsum": 33.8284,
"eval_runtime": 75.5711,
"eval_samples_per_second": 50.998,
"eval_steps_per_second": 6.378,
"step": 80000
},
{
"epoch": 20.78,
"learning_rate": 2.337311883757136e-05,
"loss": 3.6422,
"step": 80100
},
{
"epoch": 20.81,
"learning_rate": 2.33523611831863e-05,
"loss": 3.6305,
"step": 80200
},
{
"epoch": 20.84,
"learning_rate": 2.3331603528801246e-05,
"loss": 3.6263,
"step": 80300
},
{
"epoch": 20.86,
"learning_rate": 2.331084587441619e-05,
"loss": 3.6409,
"step": 80400
},
{
"epoch": 20.89,
"learning_rate": 2.3290088220031136e-05,
"loss": 3.6349,
"step": 80500
},
{
"epoch": 20.91,
"learning_rate": 2.326933056564608e-05,
"loss": 3.6293,
"step": 80600
},
{
"epoch": 20.94,
"learning_rate": 2.3248572911261033e-05,
"loss": 3.6439,
"step": 80700
},
{
"epoch": 20.97,
"learning_rate": 2.3227815256875978e-05,
"loss": 3.6455,
"step": 80800
},
{
"epoch": 20.99,
"learning_rate": 2.3207057602490923e-05,
"loss": 3.6419,
"step": 80900
},
{
"epoch": 21.02,
"learning_rate": 2.3186299948105864e-05,
"loss": 3.6248,
"step": 81000
},
{
"epoch": 21.04,
"learning_rate": 2.316554229372081e-05,
"loss": 3.6155,
"step": 81100
},
{
"epoch": 21.07,
"learning_rate": 2.3144784639335754e-05,
"loss": 3.5999,
"step": 81200
},
{
"epoch": 21.09,
"learning_rate": 2.31240269849507e-05,
"loss": 3.6312,
"step": 81300
},
{
"epoch": 21.12,
"learning_rate": 2.310326933056565e-05,
"loss": 3.6309,
"step": 81400
},
{
"epoch": 21.15,
"learning_rate": 2.3082511676180596e-05,
"loss": 3.6292,
"step": 81500
},
{
"epoch": 21.17,
"learning_rate": 2.306175402179554e-05,
"loss": 3.6346,
"step": 81600
},
{
"epoch": 21.2,
"learning_rate": 2.3040996367410483e-05,
"loss": 3.6325,
"step": 81700
},
{
"epoch": 21.22,
"learning_rate": 2.3020238713025427e-05,
"loss": 3.6513,
"step": 81800
},
{
"epoch": 21.25,
"learning_rate": 2.2999481058640372e-05,
"loss": 3.618,
"step": 81900
},
{
"epoch": 21.28,
"learning_rate": 2.2978723404255324e-05,
"loss": 3.6536,
"step": 82000
},
{
"epoch": 21.3,
"learning_rate": 2.295796574987027e-05,
"loss": 3.6265,
"step": 82100
},
{
"epoch": 21.33,
"learning_rate": 2.2937208095485214e-05,
"loss": 3.6115,
"step": 82200
},
{
"epoch": 21.35,
"learning_rate": 2.291645044110016e-05,
"loss": 3.6246,
"step": 82300
},
{
"epoch": 21.38,
"learning_rate": 2.2895692786715104e-05,
"loss": 3.6248,
"step": 82400
},
{
"epoch": 21.41,
"learning_rate": 2.2874935132330046e-05,
"loss": 3.6398,
"step": 82500
},
{
"epoch": 21.43,
"learning_rate": 2.285417747794499e-05,
"loss": 3.6227,
"step": 82600
},
{
"epoch": 21.46,
"learning_rate": 2.2833419823559942e-05,
"loss": 3.6327,
"step": 82700
},
{
"epoch": 21.48,
"learning_rate": 2.2812662169174887e-05,
"loss": 3.6141,
"step": 82800
},
{
"epoch": 21.51,
"learning_rate": 2.2791904514789832e-05,
"loss": 3.6388,
"step": 82900
},
{
"epoch": 21.54,
"learning_rate": 2.2771146860404777e-05,
"loss": 3.5972,
"step": 83000
},
{
"epoch": 21.56,
"learning_rate": 2.2750389206019722e-05,
"loss": 3.6391,
"step": 83100
},
{
"epoch": 21.59,
"learning_rate": 2.2729631551634664e-05,
"loss": 3.6326,
"step": 83200
},
{
"epoch": 21.61,
"learning_rate": 2.2708873897249616e-05,
"loss": 3.6511,
"step": 83300
},
{
"epoch": 21.64,
"learning_rate": 2.268811624286456e-05,
"loss": 3.6438,
"step": 83400
},
{
"epoch": 21.67,
"learning_rate": 2.2667358588479505e-05,
"loss": 3.6262,
"step": 83500
},
{
"epoch": 21.69,
"learning_rate": 2.264660093409445e-05,
"loss": 3.6197,
"step": 83600
},
{
"epoch": 21.72,
"learning_rate": 2.2625843279709395e-05,
"loss": 3.6237,
"step": 83700
},
{
"epoch": 21.74,
"learning_rate": 2.260508562532434e-05,
"loss": 3.6164,
"step": 83800
},
{
"epoch": 21.77,
"learning_rate": 2.2584327970939285e-05,
"loss": 3.6177,
"step": 83900
},
{
"epoch": 21.8,
"learning_rate": 2.2563570316554234e-05,
"loss": 3.6335,
"step": 84000
},
{
"epoch": 21.82,
"learning_rate": 2.254281266216918e-05,
"loss": 3.6037,
"step": 84100
},
{
"epoch": 21.85,
"learning_rate": 2.2522055007784124e-05,
"loss": 3.6256,
"step": 84200
},
{
"epoch": 21.87,
"learning_rate": 2.250129735339907e-05,
"loss": 3.6176,
"step": 84300
},
{
"epoch": 21.9,
"learning_rate": 2.2480539699014014e-05,
"loss": 3.638,
"step": 84400
},
{
"epoch": 21.93,
"learning_rate": 2.245978204462896e-05,
"loss": 3.6471,
"step": 84500
},
{
"epoch": 21.95,
"learning_rate": 2.2439024390243907e-05,
"loss": 3.6242,
"step": 84600
},
{
"epoch": 21.98,
"learning_rate": 2.2418266735858852e-05,
"loss": 3.6208,
"step": 84700
},
{
"epoch": 22.0,
"learning_rate": 2.2397509081473797e-05,
"loss": 3.5985,
"step": 84800
},
{
"epoch": 22.03,
"learning_rate": 2.2376751427088742e-05,
"loss": 3.6171,
"step": 84900
},
{
"epoch": 22.06,
"learning_rate": 2.2355993772703687e-05,
"loss": 3.6323,
"step": 85000
},
{
"epoch": 22.06,
"eval_gen_len": 12.2784,
"eval_loss": 3.6118929386138916,
"eval_rouge1": 38.1155,
"eval_rouge2": 7.0586,
"eval_rougeL": 34.0831,
"eval_rougeLsum": 34.1346,
"eval_runtime": 76.1382,
"eval_samples_per_second": 50.618,
"eval_steps_per_second": 6.331,
"step": 85000
},
{
"epoch": 22.08,
"learning_rate": 2.2335236118318632e-05,
"loss": 3.6053,
"step": 85100
},
{
"epoch": 22.11,
"learning_rate": 2.2314478463933577e-05,
"loss": 3.6152,
"step": 85200
},
{
"epoch": 22.13,
"learning_rate": 2.2293720809548525e-05,
"loss": 3.6379,
"step": 85300
},
{
"epoch": 22.16,
"learning_rate": 2.227296315516347e-05,
"loss": 3.6244,
"step": 85400
},
{
"epoch": 22.18,
"learning_rate": 2.2252205500778415e-05,
"loss": 3.6135,
"step": 85500
},
{
"epoch": 22.21,
"learning_rate": 2.223144784639336e-05,
"loss": 3.5963,
"step": 85600
},
{
"epoch": 22.24,
"learning_rate": 2.2210690192008305e-05,
"loss": 3.6271,
"step": 85700
},
{
"epoch": 22.26,
"learning_rate": 2.218993253762325e-05,
"loss": 3.6101,
"step": 85800
},
{
"epoch": 22.29,
"learning_rate": 2.2169174883238198e-05,
"loss": 3.6205,
"step": 85900
},
{
"epoch": 22.31,
"learning_rate": 2.2148417228853143e-05,
"loss": 3.6117,
"step": 86000
},
{
"epoch": 22.34,
"learning_rate": 2.2127659574468088e-05,
"loss": 3.6298,
"step": 86100
},
{
"epoch": 22.37,
"learning_rate": 2.2106901920083033e-05,
"loss": 3.6262,
"step": 86200
},
{
"epoch": 22.39,
"learning_rate": 2.2086144265697978e-05,
"loss": 3.6343,
"step": 86300
},
{
"epoch": 22.42,
"learning_rate": 2.2065386611312923e-05,
"loss": 3.6137,
"step": 86400
},
{
"epoch": 22.44,
"learning_rate": 2.2044628956927868e-05,
"loss": 3.6192,
"step": 86500
},
{
"epoch": 22.47,
"learning_rate": 2.2023871302542816e-05,
"loss": 3.6328,
"step": 86600
},
{
"epoch": 22.5,
"learning_rate": 2.200311364815776e-05,
"loss": 3.6345,
"step": 86700
},
{
"epoch": 22.52,
"learning_rate": 2.1982355993772706e-05,
"loss": 3.6275,
"step": 86800
},
{
"epoch": 22.55,
"learning_rate": 2.196159833938765e-05,
"loss": 3.6173,
"step": 86900
},
{
"epoch": 22.57,
"learning_rate": 2.1940840685002596e-05,
"loss": 3.654,
"step": 87000
},
{
"epoch": 22.6,
"learning_rate": 2.192008303061754e-05,
"loss": 3.6459,
"step": 87100
},
{
"epoch": 22.63,
"learning_rate": 2.1899325376232486e-05,
"loss": 3.6171,
"step": 87200
},
{
"epoch": 22.65,
"learning_rate": 2.1878567721847435e-05,
"loss": 3.6092,
"step": 87300
},
{
"epoch": 22.68,
"learning_rate": 2.185781006746238e-05,
"loss": 3.6502,
"step": 87400
},
{
"epoch": 22.7,
"learning_rate": 2.1837052413077325e-05,
"loss": 3.6308,
"step": 87500
},
{
"epoch": 22.73,
"learning_rate": 2.181629475869227e-05,
"loss": 3.6208,
"step": 87600
},
{
"epoch": 22.76,
"learning_rate": 2.1795537104307214e-05,
"loss": 3.6293,
"step": 87700
},
{
"epoch": 22.78,
"learning_rate": 2.177477944992216e-05,
"loss": 3.6228,
"step": 87800
},
{
"epoch": 22.81,
"learning_rate": 2.1754021795537108e-05,
"loss": 3.6328,
"step": 87900
},
{
"epoch": 22.83,
"learning_rate": 2.1733264141152053e-05,
"loss": 3.6413,
"step": 88000
},
{
"epoch": 22.86,
"learning_rate": 2.1712506486766998e-05,
"loss": 3.623,
"step": 88100
},
{
"epoch": 22.89,
"learning_rate": 2.1691748832381943e-05,
"loss": 3.6183,
"step": 88200
},
{
"epoch": 22.91,
"learning_rate": 2.1670991177996888e-05,
"loss": 3.6458,
"step": 88300
},
{
"epoch": 22.94,
"learning_rate": 2.1650233523611833e-05,
"loss": 3.6135,
"step": 88400
},
{
"epoch": 22.96,
"learning_rate": 2.1629475869226778e-05,
"loss": 3.6387,
"step": 88500
},
{
"epoch": 22.99,
"learning_rate": 2.1608718214841726e-05,
"loss": 3.6328,
"step": 88600
},
{
"epoch": 23.02,
"learning_rate": 2.158796056045667e-05,
"loss": 3.642,
"step": 88700
},
{
"epoch": 23.04,
"learning_rate": 2.1567202906071616e-05,
"loss": 3.6133,
"step": 88800
},
{
"epoch": 23.07,
"learning_rate": 2.154644525168656e-05,
"loss": 3.6341,
"step": 88900
},
{
"epoch": 23.09,
"learning_rate": 2.1525687597301506e-05,
"loss": 3.6534,
"step": 89000
},
{
"epoch": 23.12,
"learning_rate": 2.150492994291645e-05,
"loss": 3.6197,
"step": 89100
},
{
"epoch": 23.14,
"learning_rate": 2.14841722885314e-05,
"loss": 3.6146,
"step": 89200
},
{
"epoch": 23.17,
"learning_rate": 2.1463414634146344e-05,
"loss": 3.619,
"step": 89300
},
{
"epoch": 23.2,
"learning_rate": 2.144265697976129e-05,
"loss": 3.6136,
"step": 89400
},
{
"epoch": 23.22,
"learning_rate": 2.1421899325376234e-05,
"loss": 3.6261,
"step": 89500
},
{
"epoch": 23.25,
"learning_rate": 2.140114167099118e-05,
"loss": 3.6204,
"step": 89600
},
{
"epoch": 23.27,
"learning_rate": 2.1380384016606124e-05,
"loss": 3.6304,
"step": 89700
},
{
"epoch": 23.3,
"learning_rate": 2.135962636222107e-05,
"loss": 3.6339,
"step": 89800
},
{
"epoch": 23.33,
"learning_rate": 2.1338868707836017e-05,
"loss": 3.643,
"step": 89900
},
{
"epoch": 23.35,
"learning_rate": 2.1318111053450962e-05,
"loss": 3.6424,
"step": 90000
},
{
"epoch": 23.35,
"eval_gen_len": 12.2787,
"eval_loss": 3.610205888748169,
"eval_rouge1": 38.4968,
"eval_rouge2": 7.1642,
"eval_rougeL": 34.4804,
"eval_rougeLsum": 34.4687,
"eval_runtime": 76.2319,
"eval_samples_per_second": 50.556,
"eval_steps_per_second": 6.323,
"step": 90000
},
{
"epoch": 23.38,
"learning_rate": 2.1297353399065907e-05,
"loss": 3.6358,
"step": 90100
},
{
"epoch": 23.4,
"learning_rate": 2.1276595744680852e-05,
"loss": 3.6221,
"step": 90200
},
{
"epoch": 23.43,
"learning_rate": 2.1255838090295797e-05,
"loss": 3.6116,
"step": 90300
},
{
"epoch": 23.46,
"learning_rate": 2.1235080435910742e-05,
"loss": 3.6152,
"step": 90400
},
{
"epoch": 23.48,
"learning_rate": 2.121432278152569e-05,
"loss": 3.6206,
"step": 90500
},
{
"epoch": 23.51,
"learning_rate": 2.1193565127140635e-05,
"loss": 3.6349,
"step": 90600
},
{
"epoch": 23.53,
"learning_rate": 2.117280747275558e-05,
"loss": 3.616,
"step": 90700
},
{
"epoch": 23.56,
"learning_rate": 2.1152049818370525e-05,
"loss": 3.6248,
"step": 90800
},
{
"epoch": 23.59,
"learning_rate": 2.113129216398547e-05,
"loss": 3.6108,
"step": 90900
},
{
"epoch": 23.61,
"learning_rate": 2.1110534509600415e-05,
"loss": 3.6109,
"step": 91000
},
{
"epoch": 23.64,
"learning_rate": 2.108977685521536e-05,
"loss": 3.6205,
"step": 91100
},
{
"epoch": 23.66,
"learning_rate": 2.106901920083031e-05,
"loss": 3.616,
"step": 91200
},
{
"epoch": 23.69,
"learning_rate": 2.1048261546445254e-05,
"loss": 3.6276,
"step": 91300
},
{
"epoch": 23.72,
"learning_rate": 2.10275038920602e-05,
"loss": 3.6425,
"step": 91400
},
{
"epoch": 23.74,
"learning_rate": 2.1006746237675144e-05,
"loss": 3.6308,
"step": 91500
},
{
"epoch": 23.77,
"learning_rate": 2.098598858329009e-05,
"loss": 3.6103,
"step": 91600
},
{
"epoch": 23.79,
"learning_rate": 2.0965230928905034e-05,
"loss": 3.6167,
"step": 91700
},
{
"epoch": 23.82,
"learning_rate": 2.0944473274519982e-05,
"loss": 3.6349,
"step": 91800
},
{
"epoch": 23.85,
"learning_rate": 2.0923715620134927e-05,
"loss": 3.6212,
"step": 91900
},
{
"epoch": 23.87,
"learning_rate": 2.0902957965749872e-05,
"loss": 3.6271,
"step": 92000
},
{
"epoch": 23.9,
"learning_rate": 2.0882200311364817e-05,
"loss": 3.6428,
"step": 92100
},
{
"epoch": 23.92,
"learning_rate": 2.0861442656979762e-05,
"loss": 3.6327,
"step": 92200
},
{
"epoch": 23.95,
"learning_rate": 2.0840685002594707e-05,
"loss": 3.6309,
"step": 92300
},
{
"epoch": 23.98,
"learning_rate": 2.081992734820965e-05,
"loss": 3.6299,
"step": 92400
},
{
"epoch": 24.0,
"learning_rate": 2.07991696938246e-05,
"loss": 3.6317,
"step": 92500
},
{
"epoch": 24.03,
"learning_rate": 2.0778412039439545e-05,
"loss": 3.631,
"step": 92600
},
{
"epoch": 24.05,
"learning_rate": 2.075765438505449e-05,
"loss": 3.6313,
"step": 92700
},
{
"epoch": 24.08,
"learning_rate": 2.0736896730669435e-05,
"loss": 3.6339,
"step": 92800
},
{
"epoch": 24.1,
"learning_rate": 2.071613907628438e-05,
"loss": 3.627,
"step": 92900
},
{
"epoch": 24.13,
"learning_rate": 2.0695381421899325e-05,
"loss": 3.5829,
"step": 93000
},
{
"epoch": 24.16,
"learning_rate": 2.0674623767514273e-05,
"loss": 3.6276,
"step": 93100
},
{
"epoch": 24.18,
"learning_rate": 2.0653866113129218e-05,
"loss": 3.634,
"step": 93200
},
{
"epoch": 24.21,
"learning_rate": 2.0633108458744163e-05,
"loss": 3.6445,
"step": 93300
},
{
"epoch": 24.23,
"learning_rate": 2.0612350804359108e-05,
"loss": 3.6152,
"step": 93400
},
{
"epoch": 24.26,
"learning_rate": 2.0591593149974053e-05,
"loss": 3.6221,
"step": 93500
},
{
"epoch": 24.29,
"learning_rate": 2.0570835495588998e-05,
"loss": 3.6276,
"step": 93600
},
{
"epoch": 24.31,
"learning_rate": 2.0550077841203943e-05,
"loss": 3.6177,
"step": 93700
},
{
"epoch": 24.34,
"learning_rate": 2.052932018681889e-05,
"loss": 3.6295,
"step": 93800
},
{
"epoch": 24.36,
"learning_rate": 2.0508562532433836e-05,
"loss": 3.6136,
"step": 93900
},
{
"epoch": 24.39,
"learning_rate": 2.048780487804878e-05,
"loss": 3.6271,
"step": 94000
},
{
"epoch": 24.42,
"learning_rate": 2.0467047223663726e-05,
"loss": 3.6286,
"step": 94100
},
{
"epoch": 24.44,
"learning_rate": 2.044628956927867e-05,
"loss": 3.6246,
"step": 94200
},
{
"epoch": 24.47,
"learning_rate": 2.0425531914893616e-05,
"loss": 3.6044,
"step": 94300
},
{
"epoch": 24.49,
"learning_rate": 2.0404774260508568e-05,
"loss": 3.6185,
"step": 94400
},
{
"epoch": 24.52,
"learning_rate": 2.038401660612351e-05,
"loss": 3.6121,
"step": 94500
},
{
"epoch": 24.55,
"learning_rate": 2.0363258951738455e-05,
"loss": 3.6212,
"step": 94600
},
{
"epoch": 24.57,
"learning_rate": 2.03425012973534e-05,
"loss": 3.656,
"step": 94700
},
{
"epoch": 24.6,
"learning_rate": 2.0321743642968344e-05,
"loss": 3.592,
"step": 94800
},
{
"epoch": 24.62,
"learning_rate": 2.030098598858329e-05,
"loss": 3.6093,
"step": 94900
},
{
"epoch": 24.65,
"learning_rate": 2.0280228334198234e-05,
"loss": 3.6256,
"step": 95000
},
{
"epoch": 24.65,
"eval_gen_len": 12.2776,
"eval_loss": 3.6076412200927734,
"eval_rouge1": 39.6155,
"eval_rouge2": 7.3309,
"eval_rougeL": 35.6135,
"eval_rougeLsum": 35.6554,
"eval_runtime": 76.2409,
"eval_samples_per_second": 50.55,
"eval_steps_per_second": 6.322,
"step": 95000
},
{
"epoch": 24.68,
"learning_rate": 2.0259470679813186e-05,
"loss": 3.6108,
"step": 95100
},
{
"epoch": 24.7,
"learning_rate": 2.023871302542813e-05,
"loss": 3.6275,
"step": 95200
},
{
"epoch": 24.73,
"learning_rate": 2.0217955371043073e-05,
"loss": 3.607,
"step": 95300
},
{
"epoch": 24.75,
"learning_rate": 2.0197197716658018e-05,
"loss": 3.6376,
"step": 95400
},
{
"epoch": 24.78,
"learning_rate": 2.0176440062272963e-05,
"loss": 3.6322,
"step": 95500
},
{
"epoch": 24.81,
"learning_rate": 2.0155682407887908e-05,
"loss": 3.6188,
"step": 95600
},
{
"epoch": 24.83,
"learning_rate": 2.013492475350286e-05,
"loss": 3.6069,
"step": 95700
},
{
"epoch": 24.86,
"learning_rate": 2.0114167099117804e-05,
"loss": 3.6363,
"step": 95800
},
{
"epoch": 24.88,
"learning_rate": 2.009340944473275e-05,
"loss": 3.636,
"step": 95900
},
{
"epoch": 24.91,
"learning_rate": 2.007265179034769e-05,
"loss": 3.6337,
"step": 96000
},
{
"epoch": 24.94,
"learning_rate": 2.0051894135962636e-05,
"loss": 3.6192,
"step": 96100
},
{
"epoch": 24.96,
"learning_rate": 2.003113648157758e-05,
"loss": 3.6057,
"step": 96200
},
{
"epoch": 24.99,
"learning_rate": 2.0010378827192526e-05,
"loss": 3.6318,
"step": 96300
},
{
"epoch": 25.01,
"learning_rate": 1.9989621172807474e-05,
"loss": 3.628,
"step": 96400
},
{
"epoch": 25.04,
"learning_rate": 1.996886351842242e-05,
"loss": 3.6271,
"step": 96500
},
{
"epoch": 25.06,
"learning_rate": 1.9948105864037367e-05,
"loss": 3.5885,
"step": 96600
},
{
"epoch": 25.09,
"learning_rate": 1.9927348209652312e-05,
"loss": 3.6315,
"step": 96700
},
{
"epoch": 25.12,
"learning_rate": 1.9906590555267254e-05,
"loss": 3.632,
"step": 96800
},
{
"epoch": 25.14,
"learning_rate": 1.9885832900882202e-05,
"loss": 3.6198,
"step": 96900
},
{
"epoch": 25.17,
"learning_rate": 1.9865075246497147e-05,
"loss": 3.5705,
"step": 97000
},
{
"epoch": 25.19,
"learning_rate": 1.9844317592112092e-05,
"loss": 3.624,
"step": 97100
},
{
"epoch": 25.22,
"learning_rate": 1.982355993772704e-05,
"loss": 3.6397,
"step": 97200
},
{
"epoch": 25.25,
"learning_rate": 1.9802802283341986e-05,
"loss": 3.6237,
"step": 97300
},
{
"epoch": 25.27,
"learning_rate": 1.978204462895693e-05,
"loss": 3.6085,
"step": 97400
},
{
"epoch": 25.3,
"learning_rate": 1.9761286974571876e-05,
"loss": 3.649,
"step": 97500
},
{
"epoch": 25.32,
"learning_rate": 1.974052932018682e-05,
"loss": 3.6188,
"step": 97600
},
{
"epoch": 25.35,
"learning_rate": 1.9719771665801765e-05,
"loss": 3.5814,
"step": 97700
},
{
"epoch": 25.38,
"learning_rate": 1.969901401141671e-05,
"loss": 3.63,
"step": 97800
},
{
"epoch": 25.4,
"learning_rate": 1.967825635703166e-05,
"loss": 3.6045,
"step": 97900
},
{
"epoch": 25.43,
"learning_rate": 1.9657498702646604e-05,
"loss": 3.6146,
"step": 98000
},
{
"epoch": 25.45,
"learning_rate": 1.963674104826155e-05,
"loss": 3.6045,
"step": 98100
},
{
"epoch": 25.48,
"learning_rate": 1.9615983393876494e-05,
"loss": 3.6275,
"step": 98200
},
{
"epoch": 25.51,
"learning_rate": 1.959522573949144e-05,
"loss": 3.6167,
"step": 98300
},
{
"epoch": 25.53,
"learning_rate": 1.9574468085106384e-05,
"loss": 3.6221,
"step": 98400
},
{
"epoch": 25.56,
"learning_rate": 1.9553710430721332e-05,
"loss": 3.6447,
"step": 98500
},
{
"epoch": 25.58,
"learning_rate": 1.9532952776336277e-05,
"loss": 3.6243,
"step": 98600
},
{
"epoch": 25.61,
"learning_rate": 1.9512195121951222e-05,
"loss": 3.6212,
"step": 98700
},
{
"epoch": 25.64,
"learning_rate": 1.9491437467566167e-05,
"loss": 3.6,
"step": 98800
},
{
"epoch": 25.66,
"learning_rate": 1.9470679813181112e-05,
"loss": 3.6268,
"step": 98900
},
{
"epoch": 25.69,
"learning_rate": 1.9449922158796057e-05,
"loss": 3.6228,
"step": 99000
},
{
"epoch": 25.71,
"learning_rate": 1.9429164504411002e-05,
"loss": 3.6269,
"step": 99100
},
{
"epoch": 25.74,
"learning_rate": 1.940840685002595e-05,
"loss": 3.6146,
"step": 99200
},
{
"epoch": 25.77,
"learning_rate": 1.9387649195640895e-05,
"loss": 3.6174,
"step": 99300
},
{
"epoch": 25.79,
"learning_rate": 1.936689154125584e-05,
"loss": 3.6065,
"step": 99400
},
{
"epoch": 25.82,
"learning_rate": 1.9346133886870785e-05,
"loss": 3.6325,
"step": 99500
},
{
"epoch": 25.84,
"learning_rate": 1.932537623248573e-05,
"loss": 3.5995,
"step": 99600
},
{
"epoch": 25.87,
"learning_rate": 1.9304618578100675e-05,
"loss": 3.6314,
"step": 99700
},
{
"epoch": 25.9,
"learning_rate": 1.9283860923715623e-05,
"loss": 3.5918,
"step": 99800
},
{
"epoch": 25.92,
"learning_rate": 1.926310326933057e-05,
"loss": 3.6165,
"step": 99900
},
{
"epoch": 25.95,
"learning_rate": 1.9242345614945513e-05,
"loss": 3.6295,
"step": 100000
},
{
"epoch": 25.95,
"eval_gen_len": 12.2774,
"eval_loss": 3.606283664703369,
"eval_rouge1": 39.4383,
"eval_rouge2": 7.6179,
"eval_rougeL": 35.444,
"eval_rougeLsum": 35.4606,
"eval_runtime": 75.8245,
"eval_samples_per_second": 50.828,
"eval_steps_per_second": 6.357,
"step": 100000
},
{
"epoch": 25.97,
"learning_rate": 1.9221587960560458e-05,
"loss": 3.6279,
"step": 100100
},
{
"epoch": 26.0,
"learning_rate": 1.9200830306175403e-05,
"loss": 3.6296,
"step": 100200
},
{
"epoch": 26.02,
"learning_rate": 1.9180072651790348e-05,
"loss": 3.5905,
"step": 100300
},
{
"epoch": 26.05,
"learning_rate": 1.9159314997405293e-05,
"loss": 3.5854,
"step": 100400
},
{
"epoch": 26.08,
"learning_rate": 1.913855734302024e-05,
"loss": 3.6127,
"step": 100500
},
{
"epoch": 26.1,
"learning_rate": 1.9117799688635186e-05,
"loss": 3.6121,
"step": 100600
},
{
"epoch": 26.13,
"learning_rate": 1.909704203425013e-05,
"loss": 3.6261,
"step": 100700
},
{
"epoch": 26.15,
"learning_rate": 1.9076284379865076e-05,
"loss": 3.6273,
"step": 100800
},
{
"epoch": 26.18,
"learning_rate": 1.905552672548002e-05,
"loss": 3.6397,
"step": 100900
},
{
"epoch": 26.21,
"learning_rate": 1.9034769071094966e-05,
"loss": 3.6361,
"step": 101000
},
{
"epoch": 26.23,
"learning_rate": 1.9014011416709915e-05,
"loss": 3.626,
"step": 101100
},
{
"epoch": 26.26,
"learning_rate": 1.899325376232486e-05,
"loss": 3.6197,
"step": 101200
},
{
"epoch": 26.28,
"learning_rate": 1.8972496107939805e-05,
"loss": 3.6215,
"step": 101300
},
{
"epoch": 26.31,
"learning_rate": 1.895173845355475e-05,
"loss": 3.6079,
"step": 101400
},
{
"epoch": 26.34,
"learning_rate": 1.8930980799169695e-05,
"loss": 3.6268,
"step": 101500
},
{
"epoch": 26.36,
"learning_rate": 1.891022314478464e-05,
"loss": 3.6208,
"step": 101600
},
{
"epoch": 26.39,
"learning_rate": 1.8889465490399585e-05,
"loss": 3.6079,
"step": 101700
},
{
"epoch": 26.41,
"learning_rate": 1.8868707836014533e-05,
"loss": 3.6288,
"step": 101800
},
{
"epoch": 26.44,
"learning_rate": 1.8847950181629478e-05,
"loss": 3.616,
"step": 101900
},
{
"epoch": 26.47,
"learning_rate": 1.8827192527244423e-05,
"loss": 3.6295,
"step": 102000
},
{
"epoch": 26.49,
"learning_rate": 1.8806434872859368e-05,
"loss": 3.6294,
"step": 102100
},
{
"epoch": 26.52,
"learning_rate": 1.8785677218474313e-05,
"loss": 3.5978,
"step": 102200
},
{
"epoch": 26.54,
"learning_rate": 1.8764919564089258e-05,
"loss": 3.6268,
"step": 102300
},
{
"epoch": 26.57,
"learning_rate": 1.8744161909704206e-05,
"loss": 3.6098,
"step": 102400
},
{
"epoch": 26.6,
"learning_rate": 1.872340425531915e-05,
"loss": 3.6064,
"step": 102500
},
{
"epoch": 26.62,
"learning_rate": 1.8702646600934096e-05,
"loss": 3.6046,
"step": 102600
},
{
"epoch": 26.65,
"learning_rate": 1.868188894654904e-05,
"loss": 3.6195,
"step": 102700
},
{
"epoch": 26.67,
"learning_rate": 1.8661131292163986e-05,
"loss": 3.6087,
"step": 102800
},
{
"epoch": 26.7,
"learning_rate": 1.864037363777893e-05,
"loss": 3.6082,
"step": 102900
},
{
"epoch": 26.73,
"learning_rate": 1.8619615983393876e-05,
"loss": 3.6098,
"step": 103000
},
{
"epoch": 26.75,
"learning_rate": 1.8598858329008824e-05,
"loss": 3.6233,
"step": 103100
},
{
"epoch": 26.78,
"learning_rate": 1.857810067462377e-05,
"loss": 3.6173,
"step": 103200
},
{
"epoch": 26.8,
"learning_rate": 1.8557343020238714e-05,
"loss": 3.6218,
"step": 103300
},
{
"epoch": 26.83,
"learning_rate": 1.8536585365853663e-05,
"loss": 3.6211,
"step": 103400
},
{
"epoch": 26.86,
"learning_rate": 1.8515827711468604e-05,
"loss": 3.6139,
"step": 103500
},
{
"epoch": 26.88,
"learning_rate": 1.849507005708355e-05,
"loss": 3.6078,
"step": 103600
},
{
"epoch": 26.91,
"learning_rate": 1.8474312402698497e-05,
"loss": 3.6302,
"step": 103700
},
{
"epoch": 26.93,
"learning_rate": 1.8453554748313442e-05,
"loss": 3.606,
"step": 103800
},
{
"epoch": 26.96,
"learning_rate": 1.8432797093928387e-05,
"loss": 3.6303,
"step": 103900
},
{
"epoch": 26.98,
"learning_rate": 1.8412039439543332e-05,
"loss": 3.6284,
"step": 104000
},
{
"epoch": 27.01,
"learning_rate": 1.839128178515828e-05,
"loss": 3.63,
"step": 104100
},
{
"epoch": 27.04,
"learning_rate": 1.8370524130773226e-05,
"loss": 3.5811,
"step": 104200
},
{
"epoch": 27.06,
"learning_rate": 1.8349766476388167e-05,
"loss": 3.6216,
"step": 104300
},
{
"epoch": 27.09,
"learning_rate": 1.8329008822003116e-05,
"loss": 3.6315,
"step": 104400
},
{
"epoch": 27.11,
"learning_rate": 1.830825116761806e-05,
"loss": 3.6269,
"step": 104500
},
{
"epoch": 27.14,
"learning_rate": 1.8287493513233006e-05,
"loss": 3.6195,
"step": 104600
},
{
"epoch": 27.17,
"learning_rate": 1.8266735858847954e-05,
"loss": 3.6127,
"step": 104700
},
{
"epoch": 27.19,
"learning_rate": 1.82459782044629e-05,
"loss": 3.6216,
"step": 104800
},
{
"epoch": 27.22,
"learning_rate": 1.8225220550077844e-05,
"loss": 3.6112,
"step": 104900
},
{
"epoch": 27.24,
"learning_rate": 1.820446289569279e-05,
"loss": 3.6186,
"step": 105000
},
{
"epoch": 27.24,
"eval_gen_len": 12.2769,
"eval_loss": 3.6040432453155518,
"eval_rouge1": 40.2581,
"eval_rouge2": 7.9107,
"eval_rougeL": 36.2296,
"eval_rougeLsum": 36.2565,
"eval_runtime": 75.7656,
"eval_samples_per_second": 50.867,
"eval_steps_per_second": 6.362,
"step": 105000
},
{
"epoch": 27.27,
"learning_rate": 1.8183705241307734e-05,
"loss": 3.6093,
"step": 105100
},
{
"epoch": 27.3,
"learning_rate": 1.816294758692268e-05,
"loss": 3.6051,
"step": 105200
},
{
"epoch": 27.32,
"learning_rate": 1.8142189932537624e-05,
"loss": 3.6303,
"step": 105300
},
{
"epoch": 27.35,
"learning_rate": 1.8121432278152572e-05,
"loss": 3.6031,
"step": 105400
},
{
"epoch": 27.37,
"learning_rate": 1.8100674623767517e-05,
"loss": 3.6177,
"step": 105500
},
{
"epoch": 27.4,
"learning_rate": 1.8079916969382462e-05,
"loss": 3.6258,
"step": 105600
},
{
"epoch": 27.43,
"learning_rate": 1.8059159314997407e-05,
"loss": 3.642,
"step": 105700
},
{
"epoch": 27.45,
"learning_rate": 1.8038401660612352e-05,
"loss": 3.62,
"step": 105800
},
{
"epoch": 27.48,
"learning_rate": 1.8017644006227297e-05,
"loss": 3.6238,
"step": 105900
},
{
"epoch": 27.5,
"learning_rate": 1.7996886351842245e-05,
"loss": 3.6382,
"step": 106000
},
{
"epoch": 27.53,
"learning_rate": 1.797612869745719e-05,
"loss": 3.5665,
"step": 106100
},
{
"epoch": 27.56,
"learning_rate": 1.7955371043072135e-05,
"loss": 3.6275,
"step": 106200
},
{
"epoch": 27.58,
"learning_rate": 1.793461338868708e-05,
"loss": 3.6112,
"step": 106300
},
{
"epoch": 27.61,
"learning_rate": 1.7913855734302025e-05,
"loss": 3.6087,
"step": 106400
},
{
"epoch": 27.63,
"learning_rate": 1.789309807991697e-05,
"loss": 3.5993,
"step": 106500
},
{
"epoch": 27.66,
"learning_rate": 1.7872340425531915e-05,
"loss": 3.6198,
"step": 106600
},
{
"epoch": 27.69,
"learning_rate": 1.7851582771146863e-05,
"loss": 3.6191,
"step": 106700
},
{
"epoch": 27.71,
"learning_rate": 1.783082511676181e-05,
"loss": 3.6086,
"step": 106800
},
{
"epoch": 27.74,
"learning_rate": 1.7810067462376753e-05,
"loss": 3.6147,
"step": 106900
},
{
"epoch": 27.76,
"learning_rate": 1.77893098079917e-05,
"loss": 3.6102,
"step": 107000
},
{
"epoch": 27.79,
"learning_rate": 1.7768552153606643e-05,
"loss": 3.6216,
"step": 107100
},
{
"epoch": 27.82,
"learning_rate": 1.7747794499221588e-05,
"loss": 3.6088,
"step": 107200
},
{
"epoch": 27.84,
"learning_rate": 1.7727036844836537e-05,
"loss": 3.6231,
"step": 107300
},
{
"epoch": 27.87,
"learning_rate": 1.770627919045148e-05,
"loss": 3.6177,
"step": 107400
},
{
"epoch": 27.89,
"learning_rate": 1.7685521536066427e-05,
"loss": 3.6116,
"step": 107500
},
{
"epoch": 27.92,
"learning_rate": 1.766476388168137e-05,
"loss": 3.6395,
"step": 107600
},
{
"epoch": 27.94,
"learning_rate": 1.7644006227296316e-05,
"loss": 3.6175,
"step": 107700
},
{
"epoch": 27.97,
"learning_rate": 1.762324857291126e-05,
"loss": 3.6288,
"step": 107800
},
{
"epoch": 28.0,
"learning_rate": 1.7602490918526206e-05,
"loss": 3.6269,
"step": 107900
},
{
"epoch": 28.02,
"learning_rate": 1.7581733264141155e-05,
"loss": 3.6127,
"step": 108000
},
{
"epoch": 28.05,
"learning_rate": 1.75609756097561e-05,
"loss": 3.626,
"step": 108100
},
{
"epoch": 28.07,
"learning_rate": 1.7540217955371045e-05,
"loss": 3.5972,
"step": 108200
},
{
"epoch": 28.1,
"learning_rate": 1.751946030098599e-05,
"loss": 3.6151,
"step": 108300
},
{
"epoch": 28.13,
"learning_rate": 1.7498702646600935e-05,
"loss": 3.6286,
"step": 108400
},
{
"epoch": 28.15,
"learning_rate": 1.747794499221588e-05,
"loss": 3.5928,
"step": 108500
},
{
"epoch": 28.18,
"learning_rate": 1.7457187337830828e-05,
"loss": 3.6095,
"step": 108600
},
{
"epoch": 28.2,
"learning_rate": 1.7436429683445773e-05,
"loss": 3.6135,
"step": 108700
},
{
"epoch": 28.23,
"learning_rate": 1.7415672029060718e-05,
"loss": 3.6138,
"step": 108800
},
{
"epoch": 28.26,
"learning_rate": 1.7394914374675663e-05,
"loss": 3.6359,
"step": 108900
},
{
"epoch": 28.28,
"learning_rate": 1.7374156720290608e-05,
"loss": 3.6128,
"step": 109000
},
{
"epoch": 28.31,
"learning_rate": 1.7353399065905553e-05,
"loss": 3.5651,
"step": 109100
},
{
"epoch": 28.33,
"learning_rate": 1.7332641411520498e-05,
"loss": 3.6242,
"step": 109200
},
{
"epoch": 28.36,
"learning_rate": 1.7311883757135446e-05,
"loss": 3.6071,
"step": 109300
},
{
"epoch": 28.39,
"learning_rate": 1.729112610275039e-05,
"loss": 3.6131,
"step": 109400
},
{
"epoch": 28.41,
"learning_rate": 1.7270368448365336e-05,
"loss": 3.6289,
"step": 109500
},
{
"epoch": 28.44,
"learning_rate": 1.724961079398028e-05,
"loss": 3.6139,
"step": 109600
},
{
"epoch": 28.46,
"learning_rate": 1.7228853139595226e-05,
"loss": 3.6368,
"step": 109700
},
{
"epoch": 28.49,
"learning_rate": 1.720809548521017e-05,
"loss": 3.6212,
"step": 109800
},
{
"epoch": 28.52,
"learning_rate": 1.7187337830825116e-05,
"loss": 3.625,
"step": 109900
},
{
"epoch": 28.54,
"learning_rate": 1.7166580176440064e-05,
"loss": 3.6196,
"step": 110000
},
{
"epoch": 28.54,
"eval_gen_len": 12.2761,
"eval_loss": 3.60282039642334,
"eval_rouge1": 40.2885,
"eval_rouge2": 7.7568,
"eval_rougeL": 36.2473,
"eval_rougeLsum": 36.286,
"eval_runtime": 75.5525,
"eval_samples_per_second": 51.011,
"eval_steps_per_second": 6.38,
"step": 110000
},
{
"epoch": 28.57,
"learning_rate": 1.714582252205501e-05,
"loss": 3.6141,
"step": 110100
},
{
"epoch": 28.59,
"learning_rate": 1.7125064867669954e-05,
"loss": 3.624,
"step": 110200
},
{
"epoch": 28.62,
"learning_rate": 1.71043072132849e-05,
"loss": 3.6169,
"step": 110300
},
{
"epoch": 28.65,
"learning_rate": 1.7083549558899844e-05,
"loss": 3.6252,
"step": 110400
},
{
"epoch": 28.67,
"learning_rate": 1.706279190451479e-05,
"loss": 3.6386,
"step": 110500
},
{
"epoch": 28.7,
"learning_rate": 1.7042034250129737e-05,
"loss": 3.6126,
"step": 110600
},
{
"epoch": 28.72,
"learning_rate": 1.7021276595744682e-05,
"loss": 3.6023,
"step": 110700
},
{
"epoch": 28.75,
"learning_rate": 1.7000518941359627e-05,
"loss": 3.5926,
"step": 110800
},
{
"epoch": 28.78,
"learning_rate": 1.6979761286974576e-05,
"loss": 3.5743,
"step": 110900
},
{
"epoch": 28.8,
"learning_rate": 1.695900363258952e-05,
"loss": 3.5902,
"step": 111000
},
{
"epoch": 28.83,
"learning_rate": 1.6938245978204462e-05,
"loss": 3.5791,
"step": 111100
},
{
"epoch": 28.85,
"learning_rate": 1.6917488323819407e-05,
"loss": 3.6161,
"step": 111200
},
{
"epoch": 28.88,
"learning_rate": 1.6896730669434356e-05,
"loss": 3.6272,
"step": 111300
},
{
"epoch": 28.91,
"learning_rate": 1.68759730150493e-05,
"loss": 3.6376,
"step": 111400
},
{
"epoch": 28.93,
"learning_rate": 1.6855215360664246e-05,
"loss": 3.6284,
"step": 111500
},
{
"epoch": 28.96,
"learning_rate": 1.6834457706279194e-05,
"loss": 3.6144,
"step": 111600
},
{
"epoch": 28.98,
"learning_rate": 1.681370005189414e-05,
"loss": 3.6209,
"step": 111700
},
{
"epoch": 29.01,
"learning_rate": 1.679294239750908e-05,
"loss": 3.6326,
"step": 111800
},
{
"epoch": 29.03,
"learning_rate": 1.677218474312403e-05,
"loss": 3.6165,
"step": 111900
},
{
"epoch": 29.06,
"learning_rate": 1.6751427088738974e-05,
"loss": 3.6292,
"step": 112000
},
{
"epoch": 29.09,
"learning_rate": 1.673066943435392e-05,
"loss": 3.6099,
"step": 112100
},
{
"epoch": 29.11,
"learning_rate": 1.6709911779968867e-05,
"loss": 3.6171,
"step": 112200
},
{
"epoch": 29.14,
"learning_rate": 1.6689154125583812e-05,
"loss": 3.6182,
"step": 112300
},
{
"epoch": 29.16,
"learning_rate": 1.6668396471198757e-05,
"loss": 3.6187,
"step": 112400
},
{
"epoch": 29.19,
"learning_rate": 1.6647638816813702e-05,
"loss": 3.6385,
"step": 112500
},
{
"epoch": 29.22,
"learning_rate": 1.6626881162428647e-05,
"loss": 3.6077,
"step": 112600
},
{
"epoch": 29.24,
"learning_rate": 1.6606123508043592e-05,
"loss": 3.6391,
"step": 112700
},
{
"epoch": 29.27,
"learning_rate": 1.6585365853658537e-05,
"loss": 3.6257,
"step": 112800
},
{
"epoch": 29.29,
"learning_rate": 1.6564608199273485e-05,
"loss": 3.5785,
"step": 112900
},
{
"epoch": 29.32,
"learning_rate": 1.654385054488843e-05,
"loss": 3.6128,
"step": 113000
},
{
"epoch": 29.35,
"learning_rate": 1.6523092890503375e-05,
"loss": 3.6107,
"step": 113100
},
{
"epoch": 29.37,
"learning_rate": 1.650233523611832e-05,
"loss": 3.609,
"step": 113200
},
{
"epoch": 29.4,
"learning_rate": 1.6481577581733265e-05,
"loss": 3.6079,
"step": 113300
},
{
"epoch": 29.42,
"learning_rate": 1.646081992734821e-05,
"loss": 3.6008,
"step": 113400
},
{
"epoch": 29.45,
"learning_rate": 1.644006227296316e-05,
"loss": 3.6209,
"step": 113500
},
{
"epoch": 29.48,
"learning_rate": 1.6419304618578103e-05,
"loss": 3.6079,
"step": 113600
},
{
"epoch": 29.5,
"learning_rate": 1.639854696419305e-05,
"loss": 3.6135,
"step": 113700
},
{
"epoch": 29.53,
"learning_rate": 1.6377789309807993e-05,
"loss": 3.6233,
"step": 113800
},
{
"epoch": 29.55,
"learning_rate": 1.635703165542294e-05,
"loss": 3.5894,
"step": 113900
},
{
"epoch": 29.58,
"learning_rate": 1.6336274001037883e-05,
"loss": 3.6198,
"step": 114000
},
{
"epoch": 29.61,
"learning_rate": 1.631551634665283e-05,
"loss": 3.6168,
"step": 114100
},
{
"epoch": 29.63,
"learning_rate": 1.6294758692267777e-05,
"loss": 3.6076,
"step": 114200
},
{
"epoch": 29.66,
"learning_rate": 1.627400103788272e-05,
"loss": 3.6185,
"step": 114300
},
{
"epoch": 29.68,
"learning_rate": 1.6253243383497667e-05,
"loss": 3.6049,
"step": 114400
},
{
"epoch": 29.71,
"learning_rate": 1.623248572911261e-05,
"loss": 3.5918,
"step": 114500
},
{
"epoch": 29.74,
"learning_rate": 1.6211728074727557e-05,
"loss": 3.6093,
"step": 114600
},
{
"epoch": 29.76,
"learning_rate": 1.61909704203425e-05,
"loss": 3.6138,
"step": 114700
},
{
"epoch": 29.79,
"learning_rate": 1.6170212765957446e-05,
"loss": 3.6159,
"step": 114800
},
{
"epoch": 29.81,
"learning_rate": 1.6149455111572395e-05,
"loss": 3.6266,
"step": 114900
},
{
"epoch": 29.84,
"learning_rate": 1.612869745718734e-05,
"loss": 3.6218,
"step": 115000
},
{
"epoch": 29.84,
"eval_gen_len": 12.2766,
"eval_loss": 3.6007707118988037,
"eval_rouge1": 40.3343,
"eval_rouge2": 8.055,
"eval_rougeL": 36.3397,
"eval_rougeLsum": 36.3797,
"eval_runtime": 75.7103,
"eval_samples_per_second": 50.905,
"eval_steps_per_second": 6.366,
"step": 115000
},
{
"epoch": 29.87,
"learning_rate": 1.6107939802802285e-05,
"loss": 3.6301,
"step": 115100
},
{
"epoch": 29.89,
"learning_rate": 1.608718214841723e-05,
"loss": 3.615,
"step": 115200
},
{
"epoch": 29.92,
"learning_rate": 1.6066424494032175e-05,
"loss": 3.5828,
"step": 115300
},
{
"epoch": 29.94,
"learning_rate": 1.604566683964712e-05,
"loss": 3.6307,
"step": 115400
},
{
"epoch": 29.97,
"learning_rate": 1.6024909185262068e-05,
"loss": 3.5944,
"step": 115500
},
{
"epoch": 29.99,
"learning_rate": 1.6004151530877013e-05,
"loss": 3.6015,
"step": 115600
},
{
"epoch": 30.02,
"learning_rate": 1.5983393876491958e-05,
"loss": 3.6145,
"step": 115700
},
{
"epoch": 30.05,
"learning_rate": 1.5962636222106903e-05,
"loss": 3.5917,
"step": 115800
},
{
"epoch": 30.07,
"learning_rate": 1.5941878567721848e-05,
"loss": 3.6101,
"step": 115900
},
{
"epoch": 30.1,
"learning_rate": 1.5921120913336793e-05,
"loss": 3.6065,
"step": 116000
},
{
"epoch": 30.12,
"learning_rate": 1.5900363258951738e-05,
"loss": 3.6199,
"step": 116100
},
{
"epoch": 30.15,
"learning_rate": 1.5879605604566686e-05,
"loss": 3.6231,
"step": 116200
},
{
"epoch": 30.18,
"learning_rate": 1.585884795018163e-05,
"loss": 3.6277,
"step": 116300
},
{
"epoch": 30.2,
"learning_rate": 1.5838090295796576e-05,
"loss": 3.6115,
"step": 116400
},
{
"epoch": 30.23,
"learning_rate": 1.581733264141152e-05,
"loss": 3.6093,
"step": 116500
},
{
"epoch": 30.25,
"learning_rate": 1.5796574987026466e-05,
"loss": 3.6299,
"step": 116600
},
{
"epoch": 30.28,
"learning_rate": 1.577581733264141e-05,
"loss": 3.6238,
"step": 116700
},
{
"epoch": 30.31,
"learning_rate": 1.575505967825636e-05,
"loss": 3.6178,
"step": 116800
},
{
"epoch": 30.33,
"learning_rate": 1.5734302023871304e-05,
"loss": 3.5967,
"step": 116900
},
{
"epoch": 30.36,
"learning_rate": 1.571354436948625e-05,
"loss": 3.5902,
"step": 117000
},
{
"epoch": 30.38,
"learning_rate": 1.5692786715101194e-05,
"loss": 3.6247,
"step": 117100
},
{
"epoch": 30.41,
"learning_rate": 1.567202906071614e-05,
"loss": 3.5995,
"step": 117200
},
{
"epoch": 30.44,
"learning_rate": 1.5651271406331084e-05,
"loss": 3.5888,
"step": 117300
},
{
"epoch": 30.46,
"learning_rate": 1.563051375194603e-05,
"loss": 3.6221,
"step": 117400
},
{
"epoch": 30.49,
"learning_rate": 1.5609756097560978e-05,
"loss": 3.6046,
"step": 117500
},
{
"epoch": 30.51,
"learning_rate": 1.5588998443175923e-05,
"loss": 3.6324,
"step": 117600
},
{
"epoch": 30.54,
"learning_rate": 1.5568240788790867e-05,
"loss": 3.6292,
"step": 117700
},
{
"epoch": 30.57,
"learning_rate": 1.5547483134405812e-05,
"loss": 3.6127,
"step": 117800
},
{
"epoch": 30.59,
"learning_rate": 1.5526725480020757e-05,
"loss": 3.5948,
"step": 117900
},
{
"epoch": 30.62,
"learning_rate": 1.5505967825635702e-05,
"loss": 3.6113,
"step": 118000
},
{
"epoch": 30.64,
"learning_rate": 1.548521017125065e-05,
"loss": 3.6061,
"step": 118100
},
{
"epoch": 30.67,
"learning_rate": 1.5464452516865596e-05,
"loss": 3.6211,
"step": 118200
},
{
"epoch": 30.7,
"learning_rate": 1.544369486248054e-05,
"loss": 3.6006,
"step": 118300
},
{
"epoch": 30.72,
"learning_rate": 1.542293720809549e-05,
"loss": 3.5968,
"step": 118400
},
{
"epoch": 30.75,
"learning_rate": 1.5402179553710434e-05,
"loss": 3.6215,
"step": 118500
},
{
"epoch": 30.77,
"learning_rate": 1.5381421899325376e-05,
"loss": 3.6059,
"step": 118600
},
{
"epoch": 30.8,
"learning_rate": 1.536066424494032e-05,
"loss": 3.6082,
"step": 118700
},
{
"epoch": 30.83,
"learning_rate": 1.533990659055527e-05,
"loss": 3.6177,
"step": 118800
},
{
"epoch": 30.85,
"learning_rate": 1.5319148936170214e-05,
"loss": 3.6112,
"step": 118900
},
{
"epoch": 30.88,
"learning_rate": 1.529839128178516e-05,
"loss": 3.6266,
"step": 119000
},
{
"epoch": 30.9,
"learning_rate": 1.5277633627400107e-05,
"loss": 3.6059,
"step": 119100
},
{
"epoch": 30.93,
"learning_rate": 1.525687597301505e-05,
"loss": 3.6226,
"step": 119200
},
{
"epoch": 30.95,
"learning_rate": 1.5236118318629995e-05,
"loss": 3.6271,
"step": 119300
},
{
"epoch": 30.98,
"learning_rate": 1.5215360664244942e-05,
"loss": 3.6145,
"step": 119400
},
{
"epoch": 31.01,
"learning_rate": 1.5194603009859887e-05,
"loss": 3.611,
"step": 119500
},
{
"epoch": 31.03,
"learning_rate": 1.5173845355474832e-05,
"loss": 3.5825,
"step": 119600
},
{
"epoch": 31.06,
"learning_rate": 1.5153087701089777e-05,
"loss": 3.5962,
"step": 119700
},
{
"epoch": 31.08,
"learning_rate": 1.5132330046704724e-05,
"loss": 3.5957,
"step": 119800
},
{
"epoch": 31.11,
"learning_rate": 1.5111572392319669e-05,
"loss": 3.6402,
"step": 119900
},
{
"epoch": 31.14,
"learning_rate": 1.5090814737934614e-05,
"loss": 3.6194,
"step": 120000
},
{
"epoch": 31.14,
"eval_gen_len": 12.2761,
"eval_loss": 3.598875045776367,
"eval_rouge1": 40.1727,
"eval_rouge2": 8.0263,
"eval_rougeL": 36.1356,
"eval_rougeLsum": 36.148,
"eval_runtime": 75.8469,
"eval_samples_per_second": 50.813,
"eval_steps_per_second": 6.355,
"step": 120000
},
{
"epoch": 31.16,
"learning_rate": 1.507005708354956e-05,
"loss": 3.6332,
"step": 120100
},
{
"epoch": 31.19,
"learning_rate": 1.5049299429164505e-05,
"loss": 3.6044,
"step": 120200
},
{
"epoch": 31.21,
"learning_rate": 1.502854177477945e-05,
"loss": 3.6413,
"step": 120300
},
{
"epoch": 31.24,
"learning_rate": 1.5007784120394397e-05,
"loss": 3.6182,
"step": 120400
},
{
"epoch": 31.27,
"learning_rate": 1.4987026466009342e-05,
"loss": 3.6115,
"step": 120500
},
{
"epoch": 31.29,
"learning_rate": 1.4966268811624287e-05,
"loss": 3.6008,
"step": 120600
},
{
"epoch": 31.32,
"learning_rate": 1.4945511157239233e-05,
"loss": 3.6245,
"step": 120700
},
{
"epoch": 31.34,
"learning_rate": 1.4924753502854178e-05,
"loss": 3.6105,
"step": 120800
},
{
"epoch": 31.37,
"learning_rate": 1.4903995848469123e-05,
"loss": 3.6147,
"step": 120900
},
{
"epoch": 31.4,
"learning_rate": 1.4883238194084068e-05,
"loss": 3.6158,
"step": 121000
},
{
"epoch": 31.42,
"learning_rate": 1.4862480539699015e-05,
"loss": 3.6337,
"step": 121100
},
{
"epoch": 31.45,
"learning_rate": 1.484172288531396e-05,
"loss": 3.5977,
"step": 121200
},
{
"epoch": 31.47,
"learning_rate": 1.4820965230928905e-05,
"loss": 3.6235,
"step": 121300
},
{
"epoch": 31.5,
"learning_rate": 1.4800207576543852e-05,
"loss": 3.5937,
"step": 121400
},
{
"epoch": 31.53,
"learning_rate": 1.4779449922158797e-05,
"loss": 3.604,
"step": 121500
},
{
"epoch": 31.55,
"learning_rate": 1.4758692267773742e-05,
"loss": 3.6288,
"step": 121600
},
{
"epoch": 31.58,
"learning_rate": 1.473793461338869e-05,
"loss": 3.6092,
"step": 121700
},
{
"epoch": 31.6,
"learning_rate": 1.4717176959003633e-05,
"loss": 3.6009,
"step": 121800
},
{
"epoch": 31.63,
"learning_rate": 1.4696419304618578e-05,
"loss": 3.6203,
"step": 121900
},
{
"epoch": 31.66,
"learning_rate": 1.4675661650233527e-05,
"loss": 3.6227,
"step": 122000
},
{
"epoch": 31.68,
"learning_rate": 1.4654903995848471e-05,
"loss": 3.6076,
"step": 122100
},
{
"epoch": 31.71,
"learning_rate": 1.4634146341463415e-05,
"loss": 3.6155,
"step": 122200
},
{
"epoch": 31.73,
"learning_rate": 1.461338868707836e-05,
"loss": 3.6202,
"step": 122300
},
{
"epoch": 31.76,
"learning_rate": 1.4592631032693308e-05,
"loss": 3.6015,
"step": 122400
},
{
"epoch": 31.79,
"learning_rate": 1.4571873378308253e-05,
"loss": 3.6127,
"step": 122500
},
{
"epoch": 31.81,
"learning_rate": 1.4551115723923196e-05,
"loss": 3.6039,
"step": 122600
},
{
"epoch": 31.84,
"learning_rate": 1.4530358069538145e-05,
"loss": 3.6027,
"step": 122700
},
{
"epoch": 31.86,
"learning_rate": 1.450960041515309e-05,
"loss": 3.6075,
"step": 122800
},
{
"epoch": 31.89,
"learning_rate": 1.4488842760768033e-05,
"loss": 3.5994,
"step": 122900
},
{
"epoch": 31.91,
"learning_rate": 1.4468085106382981e-05,
"loss": 3.6048,
"step": 123000
},
{
"epoch": 31.94,
"learning_rate": 1.4447327451997926e-05,
"loss": 3.6216,
"step": 123100
},
{
"epoch": 31.97,
"learning_rate": 1.4426569797612871e-05,
"loss": 3.6034,
"step": 123200
},
{
"epoch": 31.99,
"learning_rate": 1.4405812143227818e-05,
"loss": 3.5946,
"step": 123300
},
{
"epoch": 32.02,
"learning_rate": 1.4385054488842763e-05,
"loss": 3.6223,
"step": 123400
},
{
"epoch": 32.04,
"learning_rate": 1.4364296834457708e-05,
"loss": 3.6146,
"step": 123500
},
{
"epoch": 32.07,
"learning_rate": 1.4343539180072653e-05,
"loss": 3.6067,
"step": 123600
},
{
"epoch": 32.1,
"learning_rate": 1.43227815256876e-05,
"loss": 3.6073,
"step": 123700
},
{
"epoch": 32.12,
"learning_rate": 1.4302023871302544e-05,
"loss": 3.6005,
"step": 123800
},
{
"epoch": 32.15,
"learning_rate": 1.428126621691749e-05,
"loss": 3.6215,
"step": 123900
},
{
"epoch": 32.17,
"learning_rate": 1.4260508562532436e-05,
"loss": 3.6146,
"step": 124000
},
{
"epoch": 32.2,
"learning_rate": 1.4239750908147381e-05,
"loss": 3.6269,
"step": 124100
},
{
"epoch": 32.23,
"learning_rate": 1.4218993253762326e-05,
"loss": 3.6096,
"step": 124200
},
{
"epoch": 32.25,
"learning_rate": 1.4198235599377273e-05,
"loss": 3.5796,
"step": 124300
},
{
"epoch": 32.28,
"learning_rate": 1.4177477944992218e-05,
"loss": 3.6166,
"step": 124400
},
{
"epoch": 32.3,
"learning_rate": 1.4156720290607163e-05,
"loss": 3.6222,
"step": 124500
},
{
"epoch": 32.33,
"learning_rate": 1.4135962636222108e-05,
"loss": 3.6166,
"step": 124600
},
{
"epoch": 32.36,
"learning_rate": 1.4115204981837054e-05,
"loss": 3.626,
"step": 124700
},
{
"epoch": 32.38,
"learning_rate": 1.4094447327452e-05,
"loss": 3.6202,
"step": 124800
},
{
"epoch": 32.41,
"learning_rate": 1.4073689673066944e-05,
"loss": 3.5897,
"step": 124900
},
{
"epoch": 32.43,
"learning_rate": 1.405293201868189e-05,
"loss": 3.619,
"step": 125000
},
{
"epoch": 32.43,
"eval_gen_len": 12.2763,
"eval_loss": 3.597046136856079,
"eval_rouge1": 40.9033,
"eval_rouge2": 8.2223,
"eval_rougeL": 36.8949,
"eval_rougeLsum": 36.9021,
"eval_runtime": 75.7787,
"eval_samples_per_second": 50.859,
"eval_steps_per_second": 6.361,
"step": 125000
},
{
"epoch": 32.46,
"learning_rate": 1.4032174364296836e-05,
"loss": 3.6003,
"step": 125100
},
{
"epoch": 32.49,
"learning_rate": 1.401141670991178e-05,
"loss": 3.6193,
"step": 125200
},
{
"epoch": 32.51,
"learning_rate": 1.3990659055526727e-05,
"loss": 3.5954,
"step": 125300
},
{
"epoch": 32.54,
"learning_rate": 1.3969901401141672e-05,
"loss": 3.6219,
"step": 125400
},
{
"epoch": 32.56,
"learning_rate": 1.3949143746756617e-05,
"loss": 3.6268,
"step": 125500
},
{
"epoch": 32.59,
"learning_rate": 1.3928386092371564e-05,
"loss": 3.6083,
"step": 125600
},
{
"epoch": 32.62,
"learning_rate": 1.3907628437986509e-05,
"loss": 3.6312,
"step": 125700
},
{
"epoch": 32.64,
"learning_rate": 1.3886870783601454e-05,
"loss": 3.5935,
"step": 125800
},
{
"epoch": 32.67,
"learning_rate": 1.3866113129216399e-05,
"loss": 3.5712,
"step": 125900
},
{
"epoch": 32.69,
"learning_rate": 1.3845355474831346e-05,
"loss": 3.597,
"step": 126000
},
{
"epoch": 32.72,
"learning_rate": 1.382459782044629e-05,
"loss": 3.6079,
"step": 126100
},
{
"epoch": 32.75,
"learning_rate": 1.3803840166061235e-05,
"loss": 3.5818,
"step": 126200
},
{
"epoch": 32.77,
"learning_rate": 1.3783082511676182e-05,
"loss": 3.5973,
"step": 126300
},
{
"epoch": 32.8,
"learning_rate": 1.3762324857291127e-05,
"loss": 3.5802,
"step": 126400
},
{
"epoch": 32.82,
"learning_rate": 1.3741567202906072e-05,
"loss": 3.6062,
"step": 126500
},
{
"epoch": 32.85,
"learning_rate": 1.3720809548521019e-05,
"loss": 3.6021,
"step": 126600
},
{
"epoch": 32.87,
"learning_rate": 1.3700051894135964e-05,
"loss": 3.6181,
"step": 126700
},
{
"epoch": 32.9,
"learning_rate": 1.3679294239750909e-05,
"loss": 3.623,
"step": 126800
},
{
"epoch": 32.93,
"learning_rate": 1.3658536585365855e-05,
"loss": 3.6192,
"step": 126900
},
{
"epoch": 32.95,
"learning_rate": 1.36377789309808e-05,
"loss": 3.6072,
"step": 127000
},
{
"epoch": 32.98,
"learning_rate": 1.3617021276595745e-05,
"loss": 3.6354,
"step": 127100
},
{
"epoch": 33.0,
"learning_rate": 1.359626362221069e-05,
"loss": 3.566,
"step": 127200
},
{
"epoch": 33.03,
"learning_rate": 1.3575505967825637e-05,
"loss": 3.6098,
"step": 127300
},
{
"epoch": 33.06,
"learning_rate": 1.3554748313440582e-05,
"loss": 3.6211,
"step": 127400
},
{
"epoch": 33.08,
"learning_rate": 1.3533990659055527e-05,
"loss": 3.6143,
"step": 127500
},
{
"epoch": 33.11,
"learning_rate": 1.3513233004670474e-05,
"loss": 3.6278,
"step": 127600
},
{
"epoch": 33.13,
"learning_rate": 1.3492475350285418e-05,
"loss": 3.6094,
"step": 127700
},
{
"epoch": 33.16,
"learning_rate": 1.3471717695900363e-05,
"loss": 3.5985,
"step": 127800
},
{
"epoch": 33.19,
"learning_rate": 1.345096004151531e-05,
"loss": 3.5683,
"step": 127900
},
{
"epoch": 33.21,
"learning_rate": 1.3430202387130255e-05,
"loss": 3.617,
"step": 128000
},
{
"epoch": 33.24,
"learning_rate": 1.34094447327452e-05,
"loss": 3.6051,
"step": 128100
},
{
"epoch": 33.26,
"learning_rate": 1.3388687078360147e-05,
"loss": 3.6116,
"step": 128200
},
{
"epoch": 33.29,
"learning_rate": 1.3367929423975092e-05,
"loss": 3.6178,
"step": 128300
},
{
"epoch": 33.32,
"learning_rate": 1.3347171769590037e-05,
"loss": 3.6324,
"step": 128400
},
{
"epoch": 33.34,
"learning_rate": 1.3326414115204982e-05,
"loss": 3.5958,
"step": 128500
},
{
"epoch": 33.37,
"learning_rate": 1.3305656460819928e-05,
"loss": 3.6233,
"step": 128600
},
{
"epoch": 33.39,
"learning_rate": 1.3284898806434873e-05,
"loss": 3.5967,
"step": 128700
},
{
"epoch": 33.42,
"learning_rate": 1.3264141152049818e-05,
"loss": 3.6049,
"step": 128800
},
{
"epoch": 33.45,
"learning_rate": 1.3243383497664765e-05,
"loss": 3.6029,
"step": 128900
},
{
"epoch": 33.47,
"learning_rate": 1.322262584327971e-05,
"loss": 3.5684,
"step": 129000
},
{
"epoch": 33.5,
"learning_rate": 1.3201868188894655e-05,
"loss": 3.5898,
"step": 129100
},
{
"epoch": 33.52,
"learning_rate": 1.3181110534509603e-05,
"loss": 3.6096,
"step": 129200
},
{
"epoch": 33.55,
"learning_rate": 1.3160352880124546e-05,
"loss": 3.6109,
"step": 129300
},
{
"epoch": 33.58,
"learning_rate": 1.3139595225739491e-05,
"loss": 3.6187,
"step": 129400
},
{
"epoch": 33.6,
"learning_rate": 1.3118837571354436e-05,
"loss": 3.6139,
"step": 129500
},
{
"epoch": 33.63,
"learning_rate": 1.3098079916969385e-05,
"loss": 3.6026,
"step": 129600
},
{
"epoch": 33.65,
"learning_rate": 1.3077322262584328e-05,
"loss": 3.5973,
"step": 129700
},
{
"epoch": 33.68,
"learning_rate": 1.3056564608199273e-05,
"loss": 3.5847,
"step": 129800
},
{
"epoch": 33.71,
"learning_rate": 1.3035806953814221e-05,
"loss": 3.6199,
"step": 129900
},
{
"epoch": 33.73,
"learning_rate": 1.3015049299429166e-05,
"loss": 3.6116,
"step": 130000
},
{
"epoch": 33.73,
"eval_gen_len": 12.2763,
"eval_loss": 3.5960943698883057,
"eval_rouge1": 41.6154,
"eval_rouge2": 8.5729,
"eval_rougeL": 37.598,
"eval_rougeLsum": 37.6264,
"eval_runtime": 75.6128,
"eval_samples_per_second": 50.97,
"eval_steps_per_second": 6.375,
"step": 130000
},
{
"epoch": 33.76,
"learning_rate": 1.299429164504411e-05,
"loss": 3.6062,
"step": 130100
},
{
"epoch": 33.78,
"learning_rate": 1.2973533990659058e-05,
"loss": 3.6131,
"step": 130200
},
{
"epoch": 33.81,
"learning_rate": 1.2952776336274003e-05,
"loss": 3.6163,
"step": 130300
},
{
"epoch": 33.83,
"learning_rate": 1.2932018681888948e-05,
"loss": 3.6133,
"step": 130400
},
{
"epoch": 33.86,
"learning_rate": 1.2911261027503895e-05,
"loss": 3.6176,
"step": 130500
},
{
"epoch": 33.89,
"learning_rate": 1.289050337311884e-05,
"loss": 3.588,
"step": 130600
},
{
"epoch": 33.91,
"learning_rate": 1.2869745718733784e-05,
"loss": 3.6007,
"step": 130700
},
{
"epoch": 33.94,
"learning_rate": 1.2848988064348728e-05,
"loss": 3.5726,
"step": 130800
},
{
"epoch": 33.96,
"learning_rate": 1.2828230409963676e-05,
"loss": 3.5923,
"step": 130900
},
{
"epoch": 33.99,
"learning_rate": 1.2807472755578621e-05,
"loss": 3.5719,
"step": 131000
},
{
"epoch": 34.02,
"learning_rate": 1.2786715101193566e-05,
"loss": 3.6186,
"step": 131100
},
{
"epoch": 34.04,
"learning_rate": 1.2765957446808513e-05,
"loss": 3.6011,
"step": 131200
},
{
"epoch": 34.07,
"learning_rate": 1.2745199792423458e-05,
"loss": 3.6053,
"step": 131300
},
{
"epoch": 34.09,
"learning_rate": 1.2724442138038403e-05,
"loss": 3.6126,
"step": 131400
},
{
"epoch": 34.12,
"learning_rate": 1.270368448365335e-05,
"loss": 3.6255,
"step": 131500
},
{
"epoch": 34.15,
"learning_rate": 1.2682926829268294e-05,
"loss": 3.6124,
"step": 131600
},
{
"epoch": 34.17,
"learning_rate": 1.266216917488324e-05,
"loss": 3.6038,
"step": 131700
},
{
"epoch": 34.2,
"learning_rate": 1.2641411520498186e-05,
"loss": 3.6156,
"step": 131800
},
{
"epoch": 34.22,
"learning_rate": 1.2620653866113131e-05,
"loss": 3.609,
"step": 131900
},
{
"epoch": 34.25,
"learning_rate": 1.2599896211728076e-05,
"loss": 3.6114,
"step": 132000
},
{
"epoch": 34.28,
"learning_rate": 1.257913855734302e-05,
"loss": 3.591,
"step": 132100
},
{
"epoch": 34.3,
"learning_rate": 1.2558380902957967e-05,
"loss": 3.6103,
"step": 132200
},
{
"epoch": 34.33,
"learning_rate": 1.2537623248572912e-05,
"loss": 3.6059,
"step": 132300
},
{
"epoch": 34.35,
"learning_rate": 1.2516865594187857e-05,
"loss": 3.6076,
"step": 132400
},
{
"epoch": 34.38,
"learning_rate": 1.2496107939802804e-05,
"loss": 3.5984,
"step": 132500
},
{
"epoch": 34.41,
"learning_rate": 1.2475350285417749e-05,
"loss": 3.6029,
"step": 132600
},
{
"epoch": 34.43,
"learning_rate": 1.2454592631032694e-05,
"loss": 3.6089,
"step": 132700
},
{
"epoch": 34.46,
"learning_rate": 1.243383497664764e-05,
"loss": 3.5939,
"step": 132800
},
{
"epoch": 34.48,
"learning_rate": 1.2413077322262586e-05,
"loss": 3.6122,
"step": 132900
},
{
"epoch": 34.51,
"learning_rate": 1.239231966787753e-05,
"loss": 3.6043,
"step": 133000
},
{
"epoch": 34.54,
"learning_rate": 1.2371562013492477e-05,
"loss": 3.6155,
"step": 133100
},
{
"epoch": 34.56,
"learning_rate": 1.2350804359107422e-05,
"loss": 3.5631,
"step": 133200
},
{
"epoch": 34.59,
"learning_rate": 1.2330046704722367e-05,
"loss": 3.6253,
"step": 133300
},
{
"epoch": 34.61,
"learning_rate": 1.2309289050337312e-05,
"loss": 3.625,
"step": 133400
},
{
"epoch": 34.64,
"learning_rate": 1.2288531395952259e-05,
"loss": 3.5985,
"step": 133500
},
{
"epoch": 34.67,
"learning_rate": 1.2267773741567204e-05,
"loss": 3.6154,
"step": 133600
},
{
"epoch": 34.69,
"learning_rate": 1.2247016087182149e-05,
"loss": 3.6042,
"step": 133700
},
{
"epoch": 34.72,
"learning_rate": 1.2226258432797095e-05,
"loss": 3.5926,
"step": 133800
},
{
"epoch": 34.74,
"learning_rate": 1.220550077841204e-05,
"loss": 3.5777,
"step": 133900
},
{
"epoch": 34.77,
"learning_rate": 1.2184743124026985e-05,
"loss": 3.6164,
"step": 134000
},
{
"epoch": 34.8,
"learning_rate": 1.2163985469641932e-05,
"loss": 3.5981,
"step": 134100
},
{
"epoch": 34.82,
"learning_rate": 1.2143227815256877e-05,
"loss": 3.619,
"step": 134200
},
{
"epoch": 34.85,
"learning_rate": 1.2122470160871822e-05,
"loss": 3.5911,
"step": 134300
},
{
"epoch": 34.87,
"learning_rate": 1.2101712506486767e-05,
"loss": 3.6282,
"step": 134400
},
{
"epoch": 34.9,
"learning_rate": 1.2080954852101714e-05,
"loss": 3.618,
"step": 134500
},
{
"epoch": 34.92,
"learning_rate": 1.2060197197716659e-05,
"loss": 3.612,
"step": 134600
},
{
"epoch": 34.95,
"learning_rate": 1.2039439543331604e-05,
"loss": 3.5997,
"step": 134700
},
{
"epoch": 34.98,
"learning_rate": 1.201868188894655e-05,
"loss": 3.6071,
"step": 134800
},
{
"epoch": 35.0,
"learning_rate": 1.1997924234561495e-05,
"loss": 3.6195,
"step": 134900
},
{
"epoch": 35.03,
"learning_rate": 1.197716658017644e-05,
"loss": 3.6,
"step": 135000
},
{
"epoch": 35.03,
"eval_gen_len": 12.2766,
"eval_loss": 3.5951695442199707,
"eval_rouge1": 41.4914,
"eval_rouge2": 8.5934,
"eval_rougeL": 37.4486,
"eval_rougeLsum": 37.4712,
"eval_runtime": 75.6467,
"eval_samples_per_second": 50.947,
"eval_steps_per_second": 6.372,
"step": 135000
},
{
"epoch": 35.05,
"learning_rate": 1.1956408925791387e-05,
"loss": 3.6107,
"step": 135100
},
{
"epoch": 35.08,
"learning_rate": 1.1935651271406332e-05,
"loss": 3.613,
"step": 135200
},
{
"epoch": 35.11,
"learning_rate": 1.1914893617021277e-05,
"loss": 3.6227,
"step": 135300
},
{
"epoch": 35.13,
"learning_rate": 1.1894135962636223e-05,
"loss": 3.62,
"step": 135400
},
{
"epoch": 35.16,
"learning_rate": 1.1873378308251168e-05,
"loss": 3.6185,
"step": 135500
},
{
"epoch": 35.18,
"learning_rate": 1.1852620653866113e-05,
"loss": 3.5743,
"step": 135600
},
{
"epoch": 35.21,
"learning_rate": 1.1831862999481058e-05,
"loss": 3.5984,
"step": 135700
},
{
"epoch": 35.24,
"learning_rate": 1.1811105345096005e-05,
"loss": 3.5912,
"step": 135800
},
{
"epoch": 35.26,
"learning_rate": 1.179034769071095e-05,
"loss": 3.6083,
"step": 135900
},
{
"epoch": 35.29,
"learning_rate": 1.1769590036325895e-05,
"loss": 3.604,
"step": 136000
},
{
"epoch": 35.31,
"learning_rate": 1.1748832381940842e-05,
"loss": 3.6075,
"step": 136100
},
{
"epoch": 35.34,
"learning_rate": 1.1728074727555786e-05,
"loss": 3.6021,
"step": 136200
},
{
"epoch": 35.37,
"learning_rate": 1.1707317073170731e-05,
"loss": 3.6198,
"step": 136300
},
{
"epoch": 35.39,
"learning_rate": 1.168655941878568e-05,
"loss": 3.6024,
"step": 136400
},
{
"epoch": 35.42,
"learning_rate": 1.1665801764400623e-05,
"loss": 3.6125,
"step": 136500
},
{
"epoch": 35.44,
"learning_rate": 1.1645044110015568e-05,
"loss": 3.5958,
"step": 136600
},
{
"epoch": 35.47,
"learning_rate": 1.1624286455630516e-05,
"loss": 3.5855,
"step": 136700
},
{
"epoch": 35.5,
"learning_rate": 1.1603528801245461e-05,
"loss": 3.5616,
"step": 136800
},
{
"epoch": 35.52,
"learning_rate": 1.1582771146860405e-05,
"loss": 3.6106,
"step": 136900
},
{
"epoch": 35.55,
"learning_rate": 1.156201349247535e-05,
"loss": 3.6263,
"step": 137000
},
{
"epoch": 35.57,
"learning_rate": 1.1541255838090298e-05,
"loss": 3.6063,
"step": 137100
},
{
"epoch": 35.6,
"learning_rate": 1.1520498183705241e-05,
"loss": 3.608,
"step": 137200
},
{
"epoch": 35.63,
"learning_rate": 1.1499740529320186e-05,
"loss": 3.6091,
"step": 137300
},
{
"epoch": 35.65,
"learning_rate": 1.1478982874935135e-05,
"loss": 3.6027,
"step": 137400
},
{
"epoch": 35.68,
"learning_rate": 1.145822522055008e-05,
"loss": 3.6037,
"step": 137500
},
{
"epoch": 35.7,
"learning_rate": 1.1437467566165023e-05,
"loss": 3.5811,
"step": 137600
},
{
"epoch": 35.73,
"learning_rate": 1.1416709911779971e-05,
"loss": 3.609,
"step": 137700
},
{
"epoch": 35.76,
"learning_rate": 1.1395952257394916e-05,
"loss": 3.6111,
"step": 137800
},
{
"epoch": 35.78,
"learning_rate": 1.1375194603009861e-05,
"loss": 3.598,
"step": 137900
},
{
"epoch": 35.81,
"learning_rate": 1.1354436948624808e-05,
"loss": 3.6237,
"step": 138000
},
{
"epoch": 35.83,
"learning_rate": 1.1333679294239753e-05,
"loss": 3.6093,
"step": 138100
},
{
"epoch": 35.86,
"learning_rate": 1.1312921639854698e-05,
"loss": 3.6202,
"step": 138200
},
{
"epoch": 35.88,
"learning_rate": 1.1292163985469643e-05,
"loss": 3.5869,
"step": 138300
},
{
"epoch": 35.91,
"learning_rate": 1.127140633108459e-05,
"loss": 3.6183,
"step": 138400
},
{
"epoch": 35.94,
"learning_rate": 1.1250648676699534e-05,
"loss": 3.6089,
"step": 138500
},
{
"epoch": 35.96,
"learning_rate": 1.122989102231448e-05,
"loss": 3.6135,
"step": 138600
},
{
"epoch": 35.99,
"learning_rate": 1.1209133367929426e-05,
"loss": 3.6,
"step": 138700
},
{
"epoch": 36.01,
"learning_rate": 1.1188375713544371e-05,
"loss": 3.5911,
"step": 138800
},
{
"epoch": 36.04,
"learning_rate": 1.1167618059159316e-05,
"loss": 3.5952,
"step": 138900
},
{
"epoch": 36.07,
"learning_rate": 1.1146860404774263e-05,
"loss": 3.6088,
"step": 139000
},
{
"epoch": 36.09,
"learning_rate": 1.1126102750389208e-05,
"loss": 3.6096,
"step": 139100
},
{
"epoch": 36.12,
"learning_rate": 1.1105345096004152e-05,
"loss": 3.5832,
"step": 139200
},
{
"epoch": 36.14,
"learning_rate": 1.1084587441619099e-05,
"loss": 3.6094,
"step": 139300
},
{
"epoch": 36.17,
"learning_rate": 1.1063829787234044e-05,
"loss": 3.62,
"step": 139400
},
{
"epoch": 36.2,
"learning_rate": 1.1043072132848989e-05,
"loss": 3.5665,
"step": 139500
},
{
"epoch": 36.22,
"learning_rate": 1.1022314478463934e-05,
"loss": 3.5996,
"step": 139600
},
{
"epoch": 36.25,
"learning_rate": 1.100155682407888e-05,
"loss": 3.579,
"step": 139700
},
{
"epoch": 36.27,
"learning_rate": 1.0980799169693826e-05,
"loss": 3.6229,
"step": 139800
},
{
"epoch": 36.3,
"learning_rate": 1.096004151530877e-05,
"loss": 3.6143,
"step": 139900
},
{
"epoch": 36.33,
"learning_rate": 1.0939283860923717e-05,
"loss": 3.5896,
"step": 140000
},
{
"epoch": 36.33,
"eval_gen_len": 12.2763,
"eval_loss": 3.5935587882995605,
"eval_rouge1": 41.7094,
"eval_rouge2": 8.5817,
"eval_rougeL": 37.677,
"eval_rougeLsum": 37.72,
"eval_runtime": 75.9538,
"eval_samples_per_second": 50.741,
"eval_steps_per_second": 6.346,
"step": 140000
},
{
"epoch": 36.35,
"learning_rate": 1.0918526206538662e-05,
"loss": 3.6109,
"step": 140100
},
{
"epoch": 36.38,
"learning_rate": 1.0897768552153607e-05,
"loss": 3.5871,
"step": 140200
},
{
"epoch": 36.4,
"learning_rate": 1.0877010897768554e-05,
"loss": 3.5966,
"step": 140300
},
{
"epoch": 36.43,
"learning_rate": 1.0856253243383499e-05,
"loss": 3.5886,
"step": 140400
},
{
"epoch": 36.46,
"learning_rate": 1.0835495588998444e-05,
"loss": 3.6128,
"step": 140500
},
{
"epoch": 36.48,
"learning_rate": 1.0814737934613389e-05,
"loss": 3.5788,
"step": 140600
},
{
"epoch": 36.51,
"learning_rate": 1.0793980280228335e-05,
"loss": 3.5739,
"step": 140700
},
{
"epoch": 36.53,
"learning_rate": 1.077322262584328e-05,
"loss": 3.6301,
"step": 140800
},
{
"epoch": 36.56,
"learning_rate": 1.0752464971458225e-05,
"loss": 3.5858,
"step": 140900
},
{
"epoch": 36.59,
"learning_rate": 1.0731707317073172e-05,
"loss": 3.5925,
"step": 141000
},
{
"epoch": 36.61,
"learning_rate": 1.0710949662688117e-05,
"loss": 3.6174,
"step": 141100
},
{
"epoch": 36.64,
"learning_rate": 1.0690192008303062e-05,
"loss": 3.6002,
"step": 141200
},
{
"epoch": 36.66,
"learning_rate": 1.0669434353918009e-05,
"loss": 3.6139,
"step": 141300
},
{
"epoch": 36.69,
"learning_rate": 1.0648676699532954e-05,
"loss": 3.6358,
"step": 141400
},
{
"epoch": 36.72,
"learning_rate": 1.0627919045147899e-05,
"loss": 3.6055,
"step": 141500
},
{
"epoch": 36.74,
"learning_rate": 1.0607161390762845e-05,
"loss": 3.6213,
"step": 141600
},
{
"epoch": 36.77,
"learning_rate": 1.058640373637779e-05,
"loss": 3.5881,
"step": 141700
},
{
"epoch": 36.79,
"learning_rate": 1.0565646081992735e-05,
"loss": 3.6262,
"step": 141800
},
{
"epoch": 36.82,
"learning_rate": 1.054488842760768e-05,
"loss": 3.6123,
"step": 141900
},
{
"epoch": 36.84,
"learning_rate": 1.0524130773222627e-05,
"loss": 3.6091,
"step": 142000
},
{
"epoch": 36.87,
"learning_rate": 1.0503373118837572e-05,
"loss": 3.6199,
"step": 142100
},
{
"epoch": 36.9,
"learning_rate": 1.0482615464452517e-05,
"loss": 3.6182,
"step": 142200
},
{
"epoch": 36.92,
"learning_rate": 1.0461857810067463e-05,
"loss": 3.5861,
"step": 142300
},
{
"epoch": 36.95,
"learning_rate": 1.0441100155682408e-05,
"loss": 3.5922,
"step": 142400
},
{
"epoch": 36.97,
"learning_rate": 1.0420342501297353e-05,
"loss": 3.6228,
"step": 142500
},
{
"epoch": 37.0,
"learning_rate": 1.03995848469123e-05,
"loss": 3.5662,
"step": 142600
},
{
"epoch": 37.03,
"learning_rate": 1.0378827192527245e-05,
"loss": 3.5931,
"step": 142700
},
{
"epoch": 37.05,
"learning_rate": 1.035806953814219e-05,
"loss": 3.6024,
"step": 142800
},
{
"epoch": 37.08,
"learning_rate": 1.0337311883757137e-05,
"loss": 3.5865,
"step": 142900
},
{
"epoch": 37.1,
"learning_rate": 1.0316554229372082e-05,
"loss": 3.6059,
"step": 143000
},
{
"epoch": 37.13,
"learning_rate": 1.0295796574987027e-05,
"loss": 3.6064,
"step": 143100
},
{
"epoch": 37.16,
"learning_rate": 1.0275038920601972e-05,
"loss": 3.5973,
"step": 143200
},
{
"epoch": 37.18,
"learning_rate": 1.0254281266216918e-05,
"loss": 3.6123,
"step": 143300
},
{
"epoch": 37.21,
"learning_rate": 1.0233523611831863e-05,
"loss": 3.5877,
"step": 143400
},
{
"epoch": 37.23,
"learning_rate": 1.0212765957446808e-05,
"loss": 3.6036,
"step": 143500
},
{
"epoch": 37.26,
"learning_rate": 1.0192008303061755e-05,
"loss": 3.6061,
"step": 143600
},
{
"epoch": 37.29,
"learning_rate": 1.01712506486767e-05,
"loss": 3.6231,
"step": 143700
},
{
"epoch": 37.31,
"learning_rate": 1.0150492994291645e-05,
"loss": 3.6303,
"step": 143800
},
{
"epoch": 37.34,
"learning_rate": 1.0129735339906593e-05,
"loss": 3.5989,
"step": 143900
},
{
"epoch": 37.36,
"learning_rate": 1.0108977685521536e-05,
"loss": 3.6216,
"step": 144000
},
{
"epoch": 37.39,
"learning_rate": 1.0088220031136481e-05,
"loss": 3.5965,
"step": 144100
},
{
"epoch": 37.42,
"learning_rate": 1.006746237675143e-05,
"loss": 3.6071,
"step": 144200
},
{
"epoch": 37.44,
"learning_rate": 1.0046704722366375e-05,
"loss": 3.6013,
"step": 144300
},
{
"epoch": 37.47,
"learning_rate": 1.0025947067981318e-05,
"loss": 3.6158,
"step": 144400
},
{
"epoch": 37.49,
"learning_rate": 1.0005189413596263e-05,
"loss": 3.601,
"step": 144500
},
{
"epoch": 37.52,
"learning_rate": 9.98443175921121e-06,
"loss": 3.5991,
"step": 144600
},
{
"epoch": 37.55,
"learning_rate": 9.963674104826156e-06,
"loss": 3.5952,
"step": 144700
},
{
"epoch": 37.57,
"learning_rate": 9.942916450441101e-06,
"loss": 3.6145,
"step": 144800
},
{
"epoch": 37.6,
"learning_rate": 9.922158796056046e-06,
"loss": 3.5897,
"step": 144900
},
{
"epoch": 37.62,
"learning_rate": 9.901401141670993e-06,
"loss": 3.5978,
"step": 145000
},
{
"epoch": 37.62,
"eval_gen_len": 12.2774,
"eval_loss": 3.592960834503174,
"eval_rouge1": 41.3445,
"eval_rouge2": 8.728,
"eval_rougeL": 37.3647,
"eval_rougeLsum": 37.3694,
"eval_runtime": 75.9693,
"eval_samples_per_second": 50.731,
"eval_steps_per_second": 6.345,
"step": 145000
},
{
"epoch": 37.65,
"learning_rate": 9.880643487285938e-06,
"loss": 3.6027,
"step": 145100
},
{
"epoch": 37.68,
"learning_rate": 9.859885832900883e-06,
"loss": 3.5978,
"step": 145200
},
{
"epoch": 37.7,
"learning_rate": 9.83912817851583e-06,
"loss": 3.5987,
"step": 145300
},
{
"epoch": 37.73,
"learning_rate": 9.818370524130774e-06,
"loss": 3.6126,
"step": 145400
},
{
"epoch": 37.75,
"learning_rate": 9.79761286974572e-06,
"loss": 3.6129,
"step": 145500
},
{
"epoch": 37.78,
"learning_rate": 9.776855215360666e-06,
"loss": 3.6129,
"step": 145600
},
{
"epoch": 37.8,
"learning_rate": 9.756097560975611e-06,
"loss": 3.5921,
"step": 145700
},
{
"epoch": 37.83,
"learning_rate": 9.735339906590556e-06,
"loss": 3.6137,
"step": 145800
},
{
"epoch": 37.86,
"learning_rate": 9.714582252205501e-06,
"loss": 3.6208,
"step": 145900
},
{
"epoch": 37.88,
"learning_rate": 9.693824597820448e-06,
"loss": 3.6154,
"step": 146000
},
{
"epoch": 37.91,
"learning_rate": 9.673066943435393e-06,
"loss": 3.6112,
"step": 146100
},
{
"epoch": 37.93,
"learning_rate": 9.652309289050338e-06,
"loss": 3.5969,
"step": 146200
},
{
"epoch": 37.96,
"learning_rate": 9.631551634665284e-06,
"loss": 3.6015,
"step": 146300
},
{
"epoch": 37.99,
"learning_rate": 9.610793980280229e-06,
"loss": 3.6054,
"step": 146400
},
{
"epoch": 38.01,
"learning_rate": 9.590036325895174e-06,
"loss": 3.6147,
"step": 146500
},
{
"epoch": 38.04,
"learning_rate": 9.56927867151012e-06,
"loss": 3.5835,
"step": 146600
},
{
"epoch": 38.06,
"learning_rate": 9.548521017125066e-06,
"loss": 3.5983,
"step": 146700
},
{
"epoch": 38.09,
"learning_rate": 9.52776336274001e-06,
"loss": 3.6203,
"step": 146800
},
{
"epoch": 38.12,
"learning_rate": 9.507005708354957e-06,
"loss": 3.6003,
"step": 146900
},
{
"epoch": 38.14,
"learning_rate": 9.486248053969902e-06,
"loss": 3.5873,
"step": 147000
},
{
"epoch": 38.17,
"learning_rate": 9.465490399584847e-06,
"loss": 3.5896,
"step": 147100
},
{
"epoch": 38.19,
"learning_rate": 9.444732745199792e-06,
"loss": 3.6103,
"step": 147200
},
{
"epoch": 38.22,
"learning_rate": 9.423975090814739e-06,
"loss": 3.6003,
"step": 147300
},
{
"epoch": 38.25,
"learning_rate": 9.403217436429684e-06,
"loss": 3.6127,
"step": 147400
},
{
"epoch": 38.27,
"learning_rate": 9.382459782044629e-06,
"loss": 3.6222,
"step": 147500
},
{
"epoch": 38.3,
"learning_rate": 9.361702127659576e-06,
"loss": 3.5931,
"step": 147600
},
{
"epoch": 38.32,
"learning_rate": 9.34094447327452e-06,
"loss": 3.5879,
"step": 147700
},
{
"epoch": 38.35,
"learning_rate": 9.320186818889465e-06,
"loss": 3.6139,
"step": 147800
},
{
"epoch": 38.38,
"learning_rate": 9.299429164504412e-06,
"loss": 3.5875,
"step": 147900
},
{
"epoch": 38.4,
"learning_rate": 9.278671510119357e-06,
"loss": 3.6209,
"step": 148000
},
{
"epoch": 38.43,
"learning_rate": 9.257913855734302e-06,
"loss": 3.6078,
"step": 148100
},
{
"epoch": 38.45,
"learning_rate": 9.237156201349249e-06,
"loss": 3.6101,
"step": 148200
},
{
"epoch": 38.48,
"learning_rate": 9.216398546964194e-06,
"loss": 3.6003,
"step": 148300
},
{
"epoch": 38.51,
"learning_rate": 9.19564089257914e-06,
"loss": 3.5993,
"step": 148400
},
{
"epoch": 38.53,
"learning_rate": 9.174883238194084e-06,
"loss": 3.5977,
"step": 148500
},
{
"epoch": 38.56,
"learning_rate": 9.15412558380903e-06,
"loss": 3.6018,
"step": 148600
},
{
"epoch": 38.58,
"learning_rate": 9.133367929423977e-06,
"loss": 3.5985,
"step": 148700
},
{
"epoch": 38.61,
"learning_rate": 9.112610275038922e-06,
"loss": 3.6143,
"step": 148800
},
{
"epoch": 38.64,
"learning_rate": 9.091852620653867e-06,
"loss": 3.6048,
"step": 148900
},
{
"epoch": 38.66,
"learning_rate": 9.071094966268812e-06,
"loss": 3.6186,
"step": 149000
},
{
"epoch": 38.69,
"learning_rate": 9.050337311883759e-06,
"loss": 3.5875,
"step": 149100
},
{
"epoch": 38.71,
"learning_rate": 9.029579657498703e-06,
"loss": 3.6165,
"step": 149200
},
{
"epoch": 38.74,
"learning_rate": 9.008822003113648e-06,
"loss": 3.595,
"step": 149300
},
{
"epoch": 38.76,
"learning_rate": 8.988064348728595e-06,
"loss": 3.6045,
"step": 149400
},
{
"epoch": 38.79,
"learning_rate": 8.96730669434354e-06,
"loss": 3.5736,
"step": 149500
},
{
"epoch": 38.82,
"learning_rate": 8.946549039958485e-06,
"loss": 3.6124,
"step": 149600
},
{
"epoch": 38.84,
"learning_rate": 8.925791385573432e-06,
"loss": 3.5944,
"step": 149700
},
{
"epoch": 38.87,
"learning_rate": 8.905033731188377e-06,
"loss": 3.6054,
"step": 149800
},
{
"epoch": 38.89,
"learning_rate": 8.884276076803322e-06,
"loss": 3.602,
"step": 149900
},
{
"epoch": 38.92,
"learning_rate": 8.863518422418268e-06,
"loss": 3.6121,
"step": 150000
},
{
"epoch": 38.92,
"eval_gen_len": 12.2769,
"eval_loss": 3.591606855392456,
"eval_rouge1": 41.9288,
"eval_rouge2": 8.6859,
"eval_rougeL": 37.8925,
"eval_rougeLsum": 37.9125,
"eval_runtime": 75.6977,
"eval_samples_per_second": 50.913,
"eval_steps_per_second": 6.367,
"step": 150000
},
{
"epoch": 38.95,
"learning_rate": 8.842760768033213e-06,
"loss": 3.617,
"step": 150100
},
{
"epoch": 38.97,
"learning_rate": 8.822003113648158e-06,
"loss": 3.5798,
"step": 150200
},
{
"epoch": 39.0,
"learning_rate": 8.801245459263103e-06,
"loss": 3.5933,
"step": 150300
},
{
"epoch": 39.02,
"learning_rate": 8.78048780487805e-06,
"loss": 3.594,
"step": 150400
},
{
"epoch": 39.05,
"learning_rate": 8.759730150492995e-06,
"loss": 3.6042,
"step": 150500
},
{
"epoch": 39.08,
"learning_rate": 8.73897249610794e-06,
"loss": 3.6139,
"step": 150600
},
{
"epoch": 39.1,
"learning_rate": 8.718214841722886e-06,
"loss": 3.6119,
"step": 150700
},
{
"epoch": 39.13,
"learning_rate": 8.697457187337831e-06,
"loss": 3.5806,
"step": 150800
},
{
"epoch": 39.15,
"learning_rate": 8.676699532952776e-06,
"loss": 3.5956,
"step": 150900
},
{
"epoch": 39.18,
"learning_rate": 8.655941878567723e-06,
"loss": 3.5559,
"step": 151000
},
{
"epoch": 39.21,
"learning_rate": 8.635184224182668e-06,
"loss": 3.6148,
"step": 151100
},
{
"epoch": 39.23,
"learning_rate": 8.614426569797613e-06,
"loss": 3.5941,
"step": 151200
},
{
"epoch": 39.26,
"learning_rate": 8.593668915412558e-06,
"loss": 3.5734,
"step": 151300
},
{
"epoch": 39.28,
"learning_rate": 8.572911261027505e-06,
"loss": 3.5983,
"step": 151400
},
{
"epoch": 39.31,
"learning_rate": 8.55215360664245e-06,
"loss": 3.6197,
"step": 151500
},
{
"epoch": 39.34,
"learning_rate": 8.531395952257395e-06,
"loss": 3.6068,
"step": 151600
},
{
"epoch": 39.36,
"learning_rate": 8.510638297872341e-06,
"loss": 3.5989,
"step": 151700
},
{
"epoch": 39.39,
"learning_rate": 8.489880643487288e-06,
"loss": 3.6053,
"step": 151800
},
{
"epoch": 39.41,
"learning_rate": 8.469122989102231e-06,
"loss": 3.6161,
"step": 151900
},
{
"epoch": 39.44,
"learning_rate": 8.448365334717178e-06,
"loss": 3.6292,
"step": 152000
},
{
"epoch": 39.47,
"learning_rate": 8.427607680332123e-06,
"loss": 3.6121,
"step": 152100
},
{
"epoch": 39.49,
"learning_rate": 8.40685002594707e-06,
"loss": 3.6004,
"step": 152200
},
{
"epoch": 39.52,
"learning_rate": 8.386092371562014e-06,
"loss": 3.6157,
"step": 152300
},
{
"epoch": 39.54,
"learning_rate": 8.36533471717696e-06,
"loss": 3.622,
"step": 152400
},
{
"epoch": 39.57,
"learning_rate": 8.344577062791906e-06,
"loss": 3.6119,
"step": 152500
},
{
"epoch": 39.6,
"learning_rate": 8.323819408406851e-06,
"loss": 3.6092,
"step": 152600
},
{
"epoch": 39.62,
"learning_rate": 8.303061754021796e-06,
"loss": 3.5876,
"step": 152700
},
{
"epoch": 39.65,
"learning_rate": 8.282304099636743e-06,
"loss": 3.6196,
"step": 152800
},
{
"epoch": 39.67,
"learning_rate": 8.261546445251688e-06,
"loss": 3.6121,
"step": 152900
},
{
"epoch": 39.7,
"learning_rate": 8.240788790866633e-06,
"loss": 3.6184,
"step": 153000
},
{
"epoch": 39.72,
"learning_rate": 8.22003113648158e-06,
"loss": 3.5772,
"step": 153100
},
{
"epoch": 39.75,
"learning_rate": 8.199273482096524e-06,
"loss": 3.6062,
"step": 153200
},
{
"epoch": 39.78,
"learning_rate": 8.17851582771147e-06,
"loss": 3.6152,
"step": 153300
},
{
"epoch": 39.8,
"learning_rate": 8.157758173326414e-06,
"loss": 3.5901,
"step": 153400
},
{
"epoch": 39.83,
"learning_rate": 8.13700051894136e-06,
"loss": 3.5957,
"step": 153500
},
{
"epoch": 39.85,
"learning_rate": 8.116242864556306e-06,
"loss": 3.6184,
"step": 153600
},
{
"epoch": 39.88,
"learning_rate": 8.09548521017125e-06,
"loss": 3.5991,
"step": 153700
},
{
"epoch": 39.91,
"learning_rate": 8.074727555786197e-06,
"loss": 3.6069,
"step": 153800
},
{
"epoch": 39.93,
"learning_rate": 8.053969901401142e-06,
"loss": 3.5976,
"step": 153900
},
{
"epoch": 39.96,
"learning_rate": 8.033212247016087e-06,
"loss": 3.5954,
"step": 154000
},
{
"epoch": 39.98,
"learning_rate": 8.012454592631034e-06,
"loss": 3.6192,
"step": 154100
},
{
"epoch": 40.01,
"learning_rate": 7.991696938245979e-06,
"loss": 3.5992,
"step": 154200
},
{
"epoch": 40.04,
"learning_rate": 7.970939283860924e-06,
"loss": 3.6007,
"step": 154300
},
{
"epoch": 40.06,
"learning_rate": 7.950181629475869e-06,
"loss": 3.5923,
"step": 154400
},
{
"epoch": 40.09,
"learning_rate": 7.929423975090816e-06,
"loss": 3.6094,
"step": 154500
},
{
"epoch": 40.11,
"learning_rate": 7.90866632070576e-06,
"loss": 3.618,
"step": 154600
},
{
"epoch": 40.14,
"learning_rate": 7.887908666320706e-06,
"loss": 3.6084,
"step": 154700
},
{
"epoch": 40.17,
"learning_rate": 7.867151011935652e-06,
"loss": 3.5987,
"step": 154800
},
{
"epoch": 40.19,
"learning_rate": 7.846393357550597e-06,
"loss": 3.5991,
"step": 154900
},
{
"epoch": 40.22,
"learning_rate": 7.825635703165542e-06,
"loss": 3.6154,
"step": 155000
},
{
"epoch": 40.22,
"eval_gen_len": 12.2763,
"eval_loss": 3.590810775756836,
"eval_rouge1": 41.7854,
"eval_rouge2": 8.6556,
"eval_rougeL": 37.7725,
"eval_rougeLsum": 37.7749,
"eval_runtime": 76.5307,
"eval_samples_per_second": 50.359,
"eval_steps_per_second": 6.298,
"step": 155000
},
{
"epoch": 40.24,
"learning_rate": 7.804878048780489e-06,
"loss": 3.626,
"step": 155100
},
{
"epoch": 40.27,
"learning_rate": 7.784120394395434e-06,
"loss": 3.6092,
"step": 155200
},
{
"epoch": 40.3,
"learning_rate": 7.763362740010379e-06,
"loss": 3.5901,
"step": 155300
},
{
"epoch": 40.32,
"learning_rate": 7.742605085625325e-06,
"loss": 3.6067,
"step": 155400
},
{
"epoch": 40.35,
"learning_rate": 7.72184743124027e-06,
"loss": 3.6106,
"step": 155500
},
{
"epoch": 40.37,
"learning_rate": 7.701089776855217e-06,
"loss": 3.6024,
"step": 155600
},
{
"epoch": 40.4,
"learning_rate": 7.68033212247016e-06,
"loss": 3.6023,
"step": 155700
},
{
"epoch": 40.43,
"learning_rate": 7.659574468085107e-06,
"loss": 3.5921,
"step": 155800
},
{
"epoch": 40.45,
"learning_rate": 7.638816813700054e-06,
"loss": 3.6076,
"step": 155900
},
{
"epoch": 40.48,
"learning_rate": 7.618059159314998e-06,
"loss": 3.6178,
"step": 156000
},
{
"epoch": 40.5,
"learning_rate": 7.5973015049299435e-06,
"loss": 3.6246,
"step": 156100
},
{
"epoch": 40.53,
"learning_rate": 7.5765438505448885e-06,
"loss": 3.591,
"step": 156200
},
{
"epoch": 40.56,
"learning_rate": 7.555786196159834e-06,
"loss": 3.5834,
"step": 156300
},
{
"epoch": 40.58,
"learning_rate": 7.53502854177478e-06,
"loss": 3.6077,
"step": 156400
},
{
"epoch": 40.61,
"learning_rate": 7.514270887389725e-06,
"loss": 3.6109,
"step": 156500
},
{
"epoch": 40.63,
"learning_rate": 7.493513233004671e-06,
"loss": 3.6024,
"step": 156600
},
{
"epoch": 40.66,
"learning_rate": 7.472755578619617e-06,
"loss": 3.6082,
"step": 156700
},
{
"epoch": 40.69,
"learning_rate": 7.451997924234562e-06,
"loss": 3.6214,
"step": 156800
},
{
"epoch": 40.71,
"learning_rate": 7.4312402698495075e-06,
"loss": 3.5791,
"step": 156900
},
{
"epoch": 40.74,
"learning_rate": 7.4104826154644525e-06,
"loss": 3.6122,
"step": 157000
},
{
"epoch": 40.76,
"learning_rate": 7.389724961079398e-06,
"loss": 3.6106,
"step": 157100
},
{
"epoch": 40.79,
"learning_rate": 7.368967306694345e-06,
"loss": 3.6073,
"step": 157200
},
{
"epoch": 40.81,
"learning_rate": 7.348209652309289e-06,
"loss": 3.6024,
"step": 157300
},
{
"epoch": 40.84,
"learning_rate": 7.327451997924236e-06,
"loss": 3.5964,
"step": 157400
},
{
"epoch": 40.87,
"learning_rate": 7.30669434353918e-06,
"loss": 3.5994,
"step": 157500
},
{
"epoch": 40.89,
"learning_rate": 7.2859366891541265e-06,
"loss": 3.6074,
"step": 157600
},
{
"epoch": 40.92,
"learning_rate": 7.265179034769072e-06,
"loss": 3.5946,
"step": 157700
},
{
"epoch": 40.94,
"learning_rate": 7.2444213803840165e-06,
"loss": 3.5902,
"step": 157800
},
{
"epoch": 40.97,
"learning_rate": 7.223663725998963e-06,
"loss": 3.59,
"step": 157900
},
{
"epoch": 41.0,
"learning_rate": 7.202906071613909e-06,
"loss": 3.6009,
"step": 158000
},
{
"epoch": 41.02,
"learning_rate": 7.182148417228854e-06,
"loss": 3.5957,
"step": 158100
},
{
"epoch": 41.05,
"learning_rate": 7.1613907628438e-06,
"loss": 3.5913,
"step": 158200
},
{
"epoch": 41.07,
"learning_rate": 7.140633108458745e-06,
"loss": 3.5876,
"step": 158300
},
{
"epoch": 41.1,
"learning_rate": 7.1198754540736905e-06,
"loss": 3.6022,
"step": 158400
},
{
"epoch": 41.13,
"learning_rate": 7.099117799688636e-06,
"loss": 3.6123,
"step": 158500
},
{
"epoch": 41.15,
"learning_rate": 7.078360145303581e-06,
"loss": 3.5919,
"step": 158600
},
{
"epoch": 41.18,
"learning_rate": 7.057602490918527e-06,
"loss": 3.6186,
"step": 158700
},
{
"epoch": 41.2,
"learning_rate": 7.036844836533472e-06,
"loss": 3.6035,
"step": 158800
},
{
"epoch": 41.23,
"learning_rate": 7.016087182148418e-06,
"loss": 3.5958,
"step": 158900
},
{
"epoch": 41.26,
"learning_rate": 6.995329527763364e-06,
"loss": 3.5873,
"step": 159000
},
{
"epoch": 41.28,
"learning_rate": 6.974571873378309e-06,
"loss": 3.6196,
"step": 159100
},
{
"epoch": 41.31,
"learning_rate": 6.9538142189932545e-06,
"loss": 3.6018,
"step": 159200
},
{
"epoch": 41.33,
"learning_rate": 6.9330565646081994e-06,
"loss": 3.5975,
"step": 159300
},
{
"epoch": 41.36,
"learning_rate": 6.912298910223145e-06,
"loss": 3.5927,
"step": 159400
},
{
"epoch": 41.39,
"learning_rate": 6.891541255838091e-06,
"loss": 3.6108,
"step": 159500
},
{
"epoch": 41.41,
"learning_rate": 6.870783601453036e-06,
"loss": 3.617,
"step": 159600
},
{
"epoch": 41.44,
"learning_rate": 6.850025947067982e-06,
"loss": 3.6035,
"step": 159700
},
{
"epoch": 41.46,
"learning_rate": 6.829268292682928e-06,
"loss": 3.6088,
"step": 159800
},
{
"epoch": 41.49,
"learning_rate": 6.808510638297873e-06,
"loss": 3.6014,
"step": 159900
},
{
"epoch": 41.52,
"learning_rate": 6.7877529839128185e-06,
"loss": 3.574,
"step": 160000
},
{
"epoch": 41.52,
"eval_gen_len": 12.2779,
"eval_loss": 3.5903589725494385,
"eval_rouge1": 41.8234,
"eval_rouge2": 8.7383,
"eval_rougeL": 37.8126,
"eval_rougeLsum": 37.813,
"eval_runtime": 76.3545,
"eval_samples_per_second": 50.475,
"eval_steps_per_second": 6.313,
"step": 160000
},
{
"epoch": 41.54,
"learning_rate": 6.7669953295277634e-06,
"loss": 3.6005,
"step": 160100
},
{
"epoch": 41.57,
"learning_rate": 6.746237675142709e-06,
"loss": 3.571,
"step": 160200
},
{
"epoch": 41.59,
"learning_rate": 6.725480020757655e-06,
"loss": 3.616,
"step": 160300
},
{
"epoch": 41.62,
"learning_rate": 6.7047223663726e-06,
"loss": 3.605,
"step": 160400
},
{
"epoch": 41.65,
"learning_rate": 6.683964711987546e-06,
"loss": 3.5991,
"step": 160500
},
{
"epoch": 41.67,
"learning_rate": 6.663207057602491e-06,
"loss": 3.6063,
"step": 160600
},
{
"epoch": 41.7,
"learning_rate": 6.642449403217437e-06,
"loss": 3.5771,
"step": 160700
},
{
"epoch": 41.72,
"learning_rate": 6.6216917488323824e-06,
"loss": 3.6011,
"step": 160800
},
{
"epoch": 41.75,
"learning_rate": 6.600934094447327e-06,
"loss": 3.6073,
"step": 160900
},
{
"epoch": 41.77,
"learning_rate": 6.580176440062273e-06,
"loss": 3.6129,
"step": 161000
},
{
"epoch": 41.8,
"learning_rate": 6.559418785677218e-06,
"loss": 3.5917,
"step": 161100
},
{
"epoch": 41.83,
"learning_rate": 6.538661131292164e-06,
"loss": 3.6049,
"step": 161200
},
{
"epoch": 41.85,
"learning_rate": 6.517903476907111e-06,
"loss": 3.5975,
"step": 161300
},
{
"epoch": 41.88,
"learning_rate": 6.497145822522055e-06,
"loss": 3.5925,
"step": 161400
},
{
"epoch": 41.9,
"learning_rate": 6.4763881681370014e-06,
"loss": 3.6042,
"step": 161500
},
{
"epoch": 41.93,
"learning_rate": 6.455630513751947e-06,
"loss": 3.5978,
"step": 161600
},
{
"epoch": 41.96,
"learning_rate": 6.434872859366892e-06,
"loss": 3.5942,
"step": 161700
},
{
"epoch": 41.98,
"learning_rate": 6.414115204981838e-06,
"loss": 3.6045,
"step": 161800
},
{
"epoch": 42.01,
"learning_rate": 6.393357550596783e-06,
"loss": 3.5946,
"step": 161900
},
{
"epoch": 42.03,
"learning_rate": 6.372599896211729e-06,
"loss": 3.6214,
"step": 162000
},
{
"epoch": 42.06,
"learning_rate": 6.351842241826675e-06,
"loss": 3.6172,
"step": 162100
},
{
"epoch": 42.09,
"learning_rate": 6.33108458744162e-06,
"loss": 3.6064,
"step": 162200
},
{
"epoch": 42.11,
"learning_rate": 6.3103269330565654e-06,
"loss": 3.5931,
"step": 162300
},
{
"epoch": 42.14,
"learning_rate": 6.28956927867151e-06,
"loss": 3.5975,
"step": 162400
},
{
"epoch": 42.16,
"learning_rate": 6.268811624286456e-06,
"loss": 3.5952,
"step": 162500
},
{
"epoch": 42.19,
"learning_rate": 6.248053969901402e-06,
"loss": 3.6057,
"step": 162600
},
{
"epoch": 42.22,
"learning_rate": 6.227296315516347e-06,
"loss": 3.5965,
"step": 162700
},
{
"epoch": 42.24,
"learning_rate": 6.206538661131293e-06,
"loss": 3.6234,
"step": 162800
},
{
"epoch": 42.27,
"learning_rate": 6.185781006746239e-06,
"loss": 3.5977,
"step": 162900
},
{
"epoch": 42.29,
"learning_rate": 6.165023352361184e-06,
"loss": 3.6099,
"step": 163000
},
{
"epoch": 42.32,
"learning_rate": 6.144265697976129e-06,
"loss": 3.5625,
"step": 163100
},
{
"epoch": 42.35,
"learning_rate": 6.123508043591074e-06,
"loss": 3.6009,
"step": 163200
},
{
"epoch": 42.37,
"learning_rate": 6.10275038920602e-06,
"loss": 3.5834,
"step": 163300
},
{
"epoch": 42.4,
"learning_rate": 6.081992734820966e-06,
"loss": 3.564,
"step": 163400
},
{
"epoch": 42.42,
"learning_rate": 6.061235080435911e-06,
"loss": 3.6205,
"step": 163500
},
{
"epoch": 42.45,
"learning_rate": 6.040477426050857e-06,
"loss": 3.5879,
"step": 163600
},
{
"epoch": 42.48,
"learning_rate": 6.019719771665802e-06,
"loss": 3.6,
"step": 163700
},
{
"epoch": 42.5,
"learning_rate": 5.9989621172807476e-06,
"loss": 3.5986,
"step": 163800
},
{
"epoch": 42.53,
"learning_rate": 5.978204462895693e-06,
"loss": 3.6023,
"step": 163900
},
{
"epoch": 42.55,
"learning_rate": 5.957446808510638e-06,
"loss": 3.6012,
"step": 164000
},
{
"epoch": 42.58,
"learning_rate": 5.936689154125584e-06,
"loss": 3.5837,
"step": 164100
},
{
"epoch": 42.61,
"learning_rate": 5.915931499740529e-06,
"loss": 3.6029,
"step": 164200
},
{
"epoch": 42.63,
"learning_rate": 5.895173845355475e-06,
"loss": 3.6119,
"step": 164300
},
{
"epoch": 42.66,
"learning_rate": 5.874416190970421e-06,
"loss": 3.5922,
"step": 164400
},
{
"epoch": 42.68,
"learning_rate": 5.853658536585366e-06,
"loss": 3.6092,
"step": 164500
},
{
"epoch": 42.71,
"learning_rate": 5.8329008822003115e-06,
"loss": 3.5925,
"step": 164600
},
{
"epoch": 42.73,
"learning_rate": 5.812143227815258e-06,
"loss": 3.6074,
"step": 164700
},
{
"epoch": 42.76,
"learning_rate": 5.791385573430202e-06,
"loss": 3.6085,
"step": 164800
},
{
"epoch": 42.79,
"learning_rate": 5.770627919045149e-06,
"loss": 3.6101,
"step": 164900
},
{
"epoch": 42.81,
"learning_rate": 5.749870264660093e-06,
"loss": 3.5595,
"step": 165000
},
{
"epoch": 42.81,
"eval_gen_len": 12.2761,
"eval_loss": 3.5897645950317383,
"eval_rouge1": 41.7274,
"eval_rouge2": 8.7877,
"eval_rougeL": 37.7333,
"eval_rougeLsum": 37.7388,
"eval_runtime": 76.3561,
"eval_samples_per_second": 50.474,
"eval_steps_per_second": 6.313,
"step": 165000
},
{
"epoch": 42.84,
"learning_rate": 5.72911261027504e-06,
"loss": 3.624,
"step": 165100
},
{
"epoch": 42.86,
"learning_rate": 5.708354955889986e-06,
"loss": 3.6039,
"step": 165200
},
{
"epoch": 42.89,
"learning_rate": 5.6875973015049306e-06,
"loss": 3.6057,
"step": 165300
},
{
"epoch": 42.92,
"learning_rate": 5.666839647119876e-06,
"loss": 3.5824,
"step": 165400
},
{
"epoch": 42.94,
"learning_rate": 5.646081992734821e-06,
"loss": 3.5717,
"step": 165500
},
{
"epoch": 42.97,
"learning_rate": 5.625324338349767e-06,
"loss": 3.5933,
"step": 165600
},
{
"epoch": 42.99,
"learning_rate": 5.604566683964713e-06,
"loss": 3.5948,
"step": 165700
},
{
"epoch": 43.02,
"learning_rate": 5.583809029579658e-06,
"loss": 3.5862,
"step": 165800
},
{
"epoch": 43.05,
"learning_rate": 5.563051375194604e-06,
"loss": 3.5895,
"step": 165900
},
{
"epoch": 43.07,
"learning_rate": 5.5422937208095496e-06,
"loss": 3.6074,
"step": 166000
},
{
"epoch": 43.1,
"learning_rate": 5.5215360664244945e-06,
"loss": 3.6064,
"step": 166100
},
{
"epoch": 43.12,
"learning_rate": 5.50077841203944e-06,
"loss": 3.6145,
"step": 166200
},
{
"epoch": 43.15,
"learning_rate": 5.480020757654385e-06,
"loss": 3.5731,
"step": 166300
},
{
"epoch": 43.18,
"learning_rate": 5.459263103269331e-06,
"loss": 3.6008,
"step": 166400
},
{
"epoch": 43.2,
"learning_rate": 5.438505448884277e-06,
"loss": 3.6109,
"step": 166500
},
{
"epoch": 43.23,
"learning_rate": 5.417747794499222e-06,
"loss": 3.6112,
"step": 166600
},
{
"epoch": 43.25,
"learning_rate": 5.396990140114168e-06,
"loss": 3.5723,
"step": 166700
},
{
"epoch": 43.28,
"learning_rate": 5.376232485729113e-06,
"loss": 3.6063,
"step": 166800
},
{
"epoch": 43.31,
"learning_rate": 5.3554748313440585e-06,
"loss": 3.6037,
"step": 166900
},
{
"epoch": 43.33,
"learning_rate": 5.334717176959004e-06,
"loss": 3.5815,
"step": 167000
},
{
"epoch": 43.36,
"learning_rate": 5.313959522573949e-06,
"loss": 3.6042,
"step": 167100
},
{
"epoch": 43.38,
"learning_rate": 5.293201868188895e-06,
"loss": 3.621,
"step": 167200
},
{
"epoch": 43.41,
"learning_rate": 5.27244421380384e-06,
"loss": 3.5913,
"step": 167300
},
{
"epoch": 43.44,
"learning_rate": 5.251686559418786e-06,
"loss": 3.5908,
"step": 167400
},
{
"epoch": 43.46,
"learning_rate": 5.230928905033732e-06,
"loss": 3.5799,
"step": 167500
},
{
"epoch": 43.49,
"learning_rate": 5.210171250648677e-06,
"loss": 3.5886,
"step": 167600
},
{
"epoch": 43.51,
"learning_rate": 5.1894135962636225e-06,
"loss": 3.6089,
"step": 167700
},
{
"epoch": 43.54,
"learning_rate": 5.168655941878568e-06,
"loss": 3.5928,
"step": 167800
},
{
"epoch": 43.57,
"learning_rate": 5.147898287493513e-06,
"loss": 3.5681,
"step": 167900
},
{
"epoch": 43.59,
"learning_rate": 5.127140633108459e-06,
"loss": 3.611,
"step": 168000
},
{
"epoch": 43.62,
"learning_rate": 5.106382978723404e-06,
"loss": 3.612,
"step": 168100
},
{
"epoch": 43.64,
"learning_rate": 5.08562532433835e-06,
"loss": 3.5959,
"step": 168200
},
{
"epoch": 43.67,
"learning_rate": 5.0648676699532965e-06,
"loss": 3.5898,
"step": 168300
},
{
"epoch": 43.69,
"learning_rate": 5.044110015568241e-06,
"loss": 3.5836,
"step": 168400
},
{
"epoch": 43.72,
"learning_rate": 5.023352361183187e-06,
"loss": 3.5974,
"step": 168500
},
{
"epoch": 43.75,
"learning_rate": 5.0025947067981314e-06,
"loss": 3.6092,
"step": 168600
},
{
"epoch": 43.77,
"learning_rate": 4.981837052413078e-06,
"loss": 3.6135,
"step": 168700
},
{
"epoch": 43.8,
"learning_rate": 4.961079398028023e-06,
"loss": 3.6055,
"step": 168800
},
{
"epoch": 43.82,
"learning_rate": 4.940321743642969e-06,
"loss": 3.5622,
"step": 168900
},
{
"epoch": 43.85,
"learning_rate": 4.919564089257915e-06,
"loss": 3.6091,
"step": 169000
},
{
"epoch": 43.88,
"learning_rate": 4.89880643487286e-06,
"loss": 3.5868,
"step": 169100
},
{
"epoch": 43.9,
"learning_rate": 4.8780487804878055e-06,
"loss": 3.6074,
"step": 169200
},
{
"epoch": 43.93,
"learning_rate": 4.8572911261027505e-06,
"loss": 3.6067,
"step": 169300
},
{
"epoch": 43.95,
"learning_rate": 4.836533471717696e-06,
"loss": 3.604,
"step": 169400
},
{
"epoch": 43.98,
"learning_rate": 4.815775817332642e-06,
"loss": 3.5873,
"step": 169500
},
{
"epoch": 44.01,
"learning_rate": 4.795018162947587e-06,
"loss": 3.6056,
"step": 169600
},
{
"epoch": 44.03,
"learning_rate": 4.774260508562533e-06,
"loss": 3.6109,
"step": 169700
},
{
"epoch": 44.06,
"learning_rate": 4.753502854177479e-06,
"loss": 3.6052,
"step": 169800
},
{
"epoch": 44.08,
"learning_rate": 4.732745199792424e-06,
"loss": 3.6063,
"step": 169900
},
{
"epoch": 44.11,
"learning_rate": 4.7119875454073695e-06,
"loss": 3.5897,
"step": 170000
},
{
"epoch": 44.11,
"eval_gen_len": 12.2763,
"eval_loss": 3.588927984237671,
"eval_rouge1": 41.8372,
"eval_rouge2": 8.7034,
"eval_rougeL": 37.8307,
"eval_rougeLsum": 37.841,
"eval_runtime": 89.0261,
"eval_samples_per_second": 43.291,
"eval_steps_per_second": 5.414,
"step": 170000
},
{
"epoch": 44.14,
"learning_rate": 4.6912298910223144e-06,
"loss": 3.5712,
"step": 170100
},
{
"epoch": 44.16,
"learning_rate": 4.67047223663726e-06,
"loss": 3.6044,
"step": 170200
},
{
"epoch": 44.19,
"learning_rate": 4.649714582252206e-06,
"loss": 3.6017,
"step": 170300
},
{
"epoch": 44.21,
"learning_rate": 4.628956927867151e-06,
"loss": 3.5832,
"step": 170400
},
{
"epoch": 44.24,
"learning_rate": 4.608199273482097e-06,
"loss": 3.6089,
"step": 170500
},
{
"epoch": 44.27,
"learning_rate": 4.587441619097042e-06,
"loss": 3.6157,
"step": 170600
},
{
"epoch": 44.29,
"learning_rate": 4.5666839647119885e-06,
"loss": 3.586,
"step": 170700
},
{
"epoch": 44.32,
"learning_rate": 4.5459263103269334e-06,
"loss": 3.6016,
"step": 170800
},
{
"epoch": 44.34,
"learning_rate": 4.525168655941879e-06,
"loss": 3.6018,
"step": 170900
},
{
"epoch": 44.37,
"learning_rate": 4.504411001556824e-06,
"loss": 3.5958,
"step": 171000
},
{
"epoch": 44.4,
"learning_rate": 4.48365334717177e-06,
"loss": 3.5539,
"step": 171100
},
{
"epoch": 44.42,
"learning_rate": 4.462895692786716e-06,
"loss": 3.5958,
"step": 171200
},
{
"epoch": 44.45,
"learning_rate": 4.442138038401661e-06,
"loss": 3.5845,
"step": 171300
},
{
"epoch": 44.47,
"learning_rate": 4.421380384016607e-06,
"loss": 3.5792,
"step": 171400
},
{
"epoch": 44.5,
"learning_rate": 4.400622729631552e-06,
"loss": 3.6142,
"step": 171500
},
{
"epoch": 44.53,
"learning_rate": 4.379865075246497e-06,
"loss": 3.6097,
"step": 171600
},
{
"epoch": 44.55,
"learning_rate": 4.359107420861443e-06,
"loss": 3.6089,
"step": 171700
},
{
"epoch": 44.58,
"learning_rate": 4.338349766476388e-06,
"loss": 3.6002,
"step": 171800
},
{
"epoch": 44.6,
"learning_rate": 4.317592112091334e-06,
"loss": 3.5973,
"step": 171900
},
{
"epoch": 44.63,
"learning_rate": 4.296834457706279e-06,
"loss": 3.6005,
"step": 172000
},
{
"epoch": 44.65,
"learning_rate": 4.276076803321225e-06,
"loss": 3.6128,
"step": 172100
},
{
"epoch": 44.68,
"learning_rate": 4.255319148936171e-06,
"loss": 3.5831,
"step": 172200
},
{
"epoch": 44.71,
"learning_rate": 4.234561494551116e-06,
"loss": 3.6096,
"step": 172300
},
{
"epoch": 44.73,
"learning_rate": 4.213803840166061e-06,
"loss": 3.5979,
"step": 172400
},
{
"epoch": 44.76,
"learning_rate": 4.193046185781007e-06,
"loss": 3.6007,
"step": 172500
},
{
"epoch": 44.78,
"learning_rate": 4.172288531395953e-06,
"loss": 3.5962,
"step": 172600
},
{
"epoch": 44.81,
"learning_rate": 4.151530877010898e-06,
"loss": 3.6065,
"step": 172700
},
{
"epoch": 44.84,
"learning_rate": 4.130773222625844e-06,
"loss": 3.5907,
"step": 172800
},
{
"epoch": 44.86,
"learning_rate": 4.11001556824079e-06,
"loss": 3.5942,
"step": 172900
},
{
"epoch": 44.89,
"learning_rate": 4.089257913855735e-06,
"loss": 3.6046,
"step": 173000
},
{
"epoch": 44.91,
"learning_rate": 4.06850025947068e-06,
"loss": 3.5897,
"step": 173100
},
{
"epoch": 44.94,
"learning_rate": 4.047742605085625e-06,
"loss": 3.6068,
"step": 173200
},
{
"epoch": 44.97,
"learning_rate": 4.026984950700571e-06,
"loss": 3.5986,
"step": 173300
},
{
"epoch": 44.99,
"learning_rate": 4.006227296315517e-06,
"loss": 3.6101,
"step": 173400
},
{
"epoch": 45.02,
"learning_rate": 3.985469641930462e-06,
"loss": 3.6068,
"step": 173500
},
{
"epoch": 45.04,
"learning_rate": 3.964711987545408e-06,
"loss": 3.6006,
"step": 173600
},
{
"epoch": 45.07,
"learning_rate": 3.943954333160353e-06,
"loss": 3.5955,
"step": 173700
},
{
"epoch": 45.1,
"learning_rate": 3.9231966787752986e-06,
"loss": 3.6147,
"step": 173800
},
{
"epoch": 45.12,
"learning_rate": 3.902439024390244e-06,
"loss": 3.6057,
"step": 173900
},
{
"epoch": 45.15,
"learning_rate": 3.881681370005189e-06,
"loss": 3.6,
"step": 174000
},
{
"epoch": 45.17,
"learning_rate": 3.860923715620135e-06,
"loss": 3.6016,
"step": 174100
},
{
"epoch": 45.2,
"learning_rate": 3.84016606123508e-06,
"loss": 3.6131,
"step": 174200
},
{
"epoch": 45.23,
"learning_rate": 3.819408406850027e-06,
"loss": 3.6014,
"step": 174300
},
{
"epoch": 45.25,
"learning_rate": 3.7986507524649718e-06,
"loss": 3.6189,
"step": 174400
},
{
"epoch": 45.28,
"learning_rate": 3.777893098079917e-06,
"loss": 3.601,
"step": 174500
},
{
"epoch": 45.3,
"learning_rate": 3.7571354436948626e-06,
"loss": 3.6047,
"step": 174600
},
{
"epoch": 45.33,
"learning_rate": 3.7363777893098084e-06,
"loss": 3.6055,
"step": 174700
},
{
"epoch": 45.36,
"learning_rate": 3.7156201349247538e-06,
"loss": 3.5924,
"step": 174800
},
{
"epoch": 45.38,
"learning_rate": 3.694862480539699e-06,
"loss": 3.5623,
"step": 174900
},
{
"epoch": 45.41,
"learning_rate": 3.6741048261546445e-06,
"loss": 3.603,
"step": 175000
},
{
"epoch": 45.41,
"eval_gen_len": 12.2761,
"eval_loss": 3.588848114013672,
"eval_rouge1": 41.7972,
"eval_rouge2": 8.7107,
"eval_rougeL": 37.7776,
"eval_rougeLsum": 37.818,
"eval_runtime": 75.5493,
"eval_samples_per_second": 51.013,
"eval_steps_per_second": 6.38,
"step": 175000
},
{
"epoch": 45.43,
"learning_rate": 3.65334717176959e-06,
"loss": 3.5854,
"step": 175100
},
{
"epoch": 45.46,
"learning_rate": 3.632589517384536e-06,
"loss": 3.6205,
"step": 175200
},
{
"epoch": 45.49,
"learning_rate": 3.6118318629994816e-06,
"loss": 3.5974,
"step": 175300
},
{
"epoch": 45.51,
"learning_rate": 3.591074208614427e-06,
"loss": 3.5544,
"step": 175400
},
{
"epoch": 45.54,
"learning_rate": 3.5703165542293723e-06,
"loss": 3.6007,
"step": 175500
},
{
"epoch": 45.56,
"learning_rate": 3.549558899844318e-06,
"loss": 3.5852,
"step": 175600
},
{
"epoch": 45.59,
"learning_rate": 3.5288012454592636e-06,
"loss": 3.5849,
"step": 175700
},
{
"epoch": 45.61,
"learning_rate": 3.508043591074209e-06,
"loss": 3.6133,
"step": 175800
},
{
"epoch": 45.64,
"learning_rate": 3.4872859366891543e-06,
"loss": 3.6079,
"step": 175900
},
{
"epoch": 45.67,
"learning_rate": 3.4665282823040997e-06,
"loss": 3.6056,
"step": 176000
},
{
"epoch": 45.69,
"learning_rate": 3.4457706279190455e-06,
"loss": 3.6158,
"step": 176100
},
{
"epoch": 45.72,
"learning_rate": 3.425012973533991e-06,
"loss": 3.5834,
"step": 176200
},
{
"epoch": 45.74,
"learning_rate": 3.4042553191489363e-06,
"loss": 3.6131,
"step": 176300
},
{
"epoch": 45.77,
"learning_rate": 3.3834976647638817e-06,
"loss": 3.599,
"step": 176400
},
{
"epoch": 45.8,
"learning_rate": 3.3627400103788275e-06,
"loss": 3.5958,
"step": 176500
},
{
"epoch": 45.82,
"learning_rate": 3.341982355993773e-06,
"loss": 3.603,
"step": 176600
},
{
"epoch": 45.85,
"learning_rate": 3.3212247016087183e-06,
"loss": 3.6061,
"step": 176700
},
{
"epoch": 45.87,
"learning_rate": 3.3004670472236637e-06,
"loss": 3.6092,
"step": 176800
},
{
"epoch": 45.9,
"learning_rate": 3.279709392838609e-06,
"loss": 3.5822,
"step": 176900
},
{
"epoch": 45.93,
"learning_rate": 3.2589517384535553e-06,
"loss": 3.6168,
"step": 177000
},
{
"epoch": 45.95,
"learning_rate": 3.2381940840685007e-06,
"loss": 3.5881,
"step": 177100
},
{
"epoch": 45.98,
"learning_rate": 3.217436429683446e-06,
"loss": 3.5902,
"step": 177200
},
{
"epoch": 46.0,
"learning_rate": 3.1966787752983915e-06,
"loss": 3.6043,
"step": 177300
},
{
"epoch": 46.03,
"learning_rate": 3.1759211209133373e-06,
"loss": 3.622,
"step": 177400
},
{
"epoch": 46.06,
"learning_rate": 3.1551634665282827e-06,
"loss": 3.596,
"step": 177500
},
{
"epoch": 46.08,
"learning_rate": 3.134405812143228e-06,
"loss": 3.6066,
"step": 177600
},
{
"epoch": 46.11,
"learning_rate": 3.1136481577581735e-06,
"loss": 3.5779,
"step": 177700
},
{
"epoch": 46.13,
"learning_rate": 3.0928905033731193e-06,
"loss": 3.5979,
"step": 177800
},
{
"epoch": 46.16,
"learning_rate": 3.0721328489880647e-06,
"loss": 3.6115,
"step": 177900
},
{
"epoch": 46.19,
"learning_rate": 3.05137519460301e-06,
"loss": 3.5964,
"step": 178000
},
{
"epoch": 46.21,
"learning_rate": 3.0306175402179555e-06,
"loss": 3.601,
"step": 178100
},
{
"epoch": 46.24,
"learning_rate": 3.009859885832901e-06,
"loss": 3.605,
"step": 178200
},
{
"epoch": 46.26,
"learning_rate": 2.9891022314478467e-06,
"loss": 3.5961,
"step": 178300
},
{
"epoch": 46.29,
"learning_rate": 2.968344577062792e-06,
"loss": 3.5992,
"step": 178400
},
{
"epoch": 46.32,
"learning_rate": 2.9475869226777375e-06,
"loss": 3.5911,
"step": 178500
},
{
"epoch": 46.34,
"learning_rate": 2.926829268292683e-06,
"loss": 3.6145,
"step": 178600
},
{
"epoch": 46.37,
"learning_rate": 2.906071613907629e-06,
"loss": 3.6187,
"step": 178700
},
{
"epoch": 46.39,
"learning_rate": 2.8853139595225745e-06,
"loss": 3.5984,
"step": 178800
},
{
"epoch": 46.42,
"learning_rate": 2.86455630513752e-06,
"loss": 3.6209,
"step": 178900
},
{
"epoch": 46.45,
"learning_rate": 2.8437986507524653e-06,
"loss": 3.6118,
"step": 179000
},
{
"epoch": 46.47,
"learning_rate": 2.8230409963674107e-06,
"loss": 3.5919,
"step": 179100
},
{
"epoch": 46.5,
"learning_rate": 2.8022833419823565e-06,
"loss": 3.5834,
"step": 179200
},
{
"epoch": 46.52,
"learning_rate": 2.781525687597302e-06,
"loss": 3.6115,
"step": 179300
},
{
"epoch": 46.55,
"learning_rate": 2.7607680332122473e-06,
"loss": 3.5936,
"step": 179400
},
{
"epoch": 46.57,
"learning_rate": 2.7400103788271927e-06,
"loss": 3.6,
"step": 179500
},
{
"epoch": 46.6,
"learning_rate": 2.7192527244421385e-06,
"loss": 3.5933,
"step": 179600
},
{
"epoch": 46.63,
"learning_rate": 2.698495070057084e-06,
"loss": 3.6119,
"step": 179700
},
{
"epoch": 46.65,
"learning_rate": 2.6777374156720293e-06,
"loss": 3.5724,
"step": 179800
},
{
"epoch": 46.68,
"learning_rate": 2.6569797612869746e-06,
"loss": 3.6003,
"step": 179900
},
{
"epoch": 46.7,
"learning_rate": 2.63622210690192e-06,
"loss": 3.5871,
"step": 180000
},
{
"epoch": 46.7,
"eval_gen_len": 12.2763,
"eval_loss": 3.588209390640259,
"eval_rouge1": 42.036,
"eval_rouge2": 8.9032,
"eval_rougeL": 38.038,
"eval_rougeLsum": 38.0396,
"eval_runtime": 75.5618,
"eval_samples_per_second": 51.005,
"eval_steps_per_second": 6.379,
"step": 180000
},
{
"epoch": 46.73,
"learning_rate": 2.615464452516866e-06,
"loss": 3.6057,
"step": 180100
},
{
"epoch": 46.76,
"learning_rate": 2.5947067981318112e-06,
"loss": 3.6021,
"step": 180200
},
{
"epoch": 46.78,
"learning_rate": 2.5739491437467566e-06,
"loss": 3.6029,
"step": 180300
},
{
"epoch": 46.81,
"learning_rate": 2.553191489361702e-06,
"loss": 3.5808,
"step": 180400
},
{
"epoch": 46.83,
"learning_rate": 2.5324338349766483e-06,
"loss": 3.5926,
"step": 180500
},
{
"epoch": 46.86,
"learning_rate": 2.5116761805915937e-06,
"loss": 3.5993,
"step": 180600
},
{
"epoch": 46.89,
"learning_rate": 2.490918526206539e-06,
"loss": 3.5895,
"step": 180700
},
{
"epoch": 46.91,
"learning_rate": 2.4701608718214844e-06,
"loss": 3.5926,
"step": 180800
},
{
"epoch": 46.94,
"learning_rate": 2.44940321743643e-06,
"loss": 3.5833,
"step": 180900
},
{
"epoch": 46.96,
"learning_rate": 2.4286455630513752e-06,
"loss": 3.5937,
"step": 181000
},
{
"epoch": 46.99,
"learning_rate": 2.407887908666321e-06,
"loss": 3.5956,
"step": 181100
},
{
"epoch": 47.02,
"learning_rate": 2.3871302542812664e-06,
"loss": 3.5819,
"step": 181200
},
{
"epoch": 47.04,
"learning_rate": 2.366372599896212e-06,
"loss": 3.5829,
"step": 181300
},
{
"epoch": 47.07,
"learning_rate": 2.3456149455111572e-06,
"loss": 3.5862,
"step": 181400
},
{
"epoch": 47.09,
"learning_rate": 2.324857291126103e-06,
"loss": 3.596,
"step": 181500
},
{
"epoch": 47.12,
"learning_rate": 2.3040996367410484e-06,
"loss": 3.5806,
"step": 181600
},
{
"epoch": 47.15,
"learning_rate": 2.2833419823559942e-06,
"loss": 3.6108,
"step": 181700
},
{
"epoch": 47.17,
"learning_rate": 2.2625843279709396e-06,
"loss": 3.6012,
"step": 181800
},
{
"epoch": 47.2,
"learning_rate": 2.241826673585885e-06,
"loss": 3.6083,
"step": 181900
},
{
"epoch": 47.22,
"learning_rate": 2.2210690192008304e-06,
"loss": 3.6121,
"step": 182000
},
{
"epoch": 47.25,
"learning_rate": 2.200311364815776e-06,
"loss": 3.5922,
"step": 182100
},
{
"epoch": 47.28,
"learning_rate": 2.1795537104307216e-06,
"loss": 3.6015,
"step": 182200
},
{
"epoch": 47.3,
"learning_rate": 2.158796056045667e-06,
"loss": 3.5939,
"step": 182300
},
{
"epoch": 47.33,
"learning_rate": 2.1380384016606124e-06,
"loss": 3.5858,
"step": 182400
},
{
"epoch": 47.35,
"learning_rate": 2.117280747275558e-06,
"loss": 3.596,
"step": 182500
},
{
"epoch": 47.38,
"learning_rate": 2.0965230928905036e-06,
"loss": 3.5993,
"step": 182600
},
{
"epoch": 47.41,
"learning_rate": 2.075765438505449e-06,
"loss": 3.6232,
"step": 182700
},
{
"epoch": 47.43,
"learning_rate": 2.055007784120395e-06,
"loss": 3.6277,
"step": 182800
},
{
"epoch": 47.46,
"learning_rate": 2.03425012973534e-06,
"loss": 3.6111,
"step": 182900
},
{
"epoch": 47.48,
"learning_rate": 2.0134924753502856e-06,
"loss": 3.5782,
"step": 183000
},
{
"epoch": 47.51,
"learning_rate": 1.992734820965231e-06,
"loss": 3.6124,
"step": 183100
},
{
"epoch": 47.54,
"learning_rate": 1.9719771665801764e-06,
"loss": 3.5887,
"step": 183200
},
{
"epoch": 47.56,
"learning_rate": 1.951219512195122e-06,
"loss": 3.5961,
"step": 183300
},
{
"epoch": 47.59,
"learning_rate": 1.9304618578100676e-06,
"loss": 3.6037,
"step": 183400
},
{
"epoch": 47.61,
"learning_rate": 1.9097042034250134e-06,
"loss": 3.5906,
"step": 183500
},
{
"epoch": 47.64,
"learning_rate": 1.8889465490399586e-06,
"loss": 3.6008,
"step": 183600
},
{
"epoch": 47.66,
"learning_rate": 1.8681888946549042e-06,
"loss": 3.6003,
"step": 183700
},
{
"epoch": 47.69,
"learning_rate": 1.8474312402698496e-06,
"loss": 3.5995,
"step": 183800
},
{
"epoch": 47.72,
"learning_rate": 1.826673585884795e-06,
"loss": 3.6065,
"step": 183900
},
{
"epoch": 47.74,
"learning_rate": 1.8059159314997408e-06,
"loss": 3.603,
"step": 184000
},
{
"epoch": 47.77,
"learning_rate": 1.7851582771146862e-06,
"loss": 3.6115,
"step": 184100
},
{
"epoch": 47.79,
"learning_rate": 1.7644006227296318e-06,
"loss": 3.607,
"step": 184200
},
{
"epoch": 47.82,
"learning_rate": 1.7436429683445772e-06,
"loss": 3.5868,
"step": 184300
},
{
"epoch": 47.85,
"learning_rate": 1.7228853139595228e-06,
"loss": 3.5955,
"step": 184400
},
{
"epoch": 47.87,
"learning_rate": 1.7021276595744682e-06,
"loss": 3.5936,
"step": 184500
},
{
"epoch": 47.9,
"learning_rate": 1.6813700051894138e-06,
"loss": 3.5856,
"step": 184600
},
{
"epoch": 47.92,
"learning_rate": 1.6606123508043592e-06,
"loss": 3.5826,
"step": 184700
},
{
"epoch": 47.95,
"learning_rate": 1.6398546964193045e-06,
"loss": 3.596,
"step": 184800
},
{
"epoch": 47.98,
"learning_rate": 1.6190970420342504e-06,
"loss": 3.5876,
"step": 184900
},
{
"epoch": 48.0,
"learning_rate": 1.5983393876491958e-06,
"loss": 3.5989,
"step": 185000
},
{
"epoch": 48.0,
"eval_gen_len": 12.2766,
"eval_loss": 3.588301181793213,
"eval_rouge1": 42.0728,
"eval_rouge2": 8.8115,
"eval_rougeL": 38.0677,
"eval_rougeLsum": 38.0791,
"eval_runtime": 76.2595,
"eval_samples_per_second": 50.538,
"eval_steps_per_second": 6.321,
"step": 185000
},
{
"epoch": 48.03,
"learning_rate": 1.5775817332641414e-06,
"loss": 3.5737,
"step": 185100
},
{
"epoch": 48.05,
"learning_rate": 1.5568240788790867e-06,
"loss": 3.5853,
"step": 185200
},
{
"epoch": 48.08,
"learning_rate": 1.5360664244940324e-06,
"loss": 3.5972,
"step": 185300
},
{
"epoch": 48.11,
"learning_rate": 1.5153087701089777e-06,
"loss": 3.5913,
"step": 185400
},
{
"epoch": 48.13,
"learning_rate": 1.4945511157239233e-06,
"loss": 3.6013,
"step": 185500
},
{
"epoch": 48.16,
"learning_rate": 1.4737934613388687e-06,
"loss": 3.6086,
"step": 185600
},
{
"epoch": 48.18,
"learning_rate": 1.4530358069538146e-06,
"loss": 3.6082,
"step": 185700
},
{
"epoch": 48.21,
"learning_rate": 1.43227815256876e-06,
"loss": 3.6127,
"step": 185800
},
{
"epoch": 48.24,
"learning_rate": 1.4115204981837053e-06,
"loss": 3.5891,
"step": 185900
},
{
"epoch": 48.26,
"learning_rate": 1.390762843798651e-06,
"loss": 3.5713,
"step": 186000
},
{
"epoch": 48.29,
"learning_rate": 1.3700051894135963e-06,
"loss": 3.6016,
"step": 186100
},
{
"epoch": 48.31,
"learning_rate": 1.349247535028542e-06,
"loss": 3.6099,
"step": 186200
},
{
"epoch": 48.34,
"learning_rate": 1.3284898806434873e-06,
"loss": 3.6008,
"step": 186300
},
{
"epoch": 48.37,
"learning_rate": 1.307732226258433e-06,
"loss": 3.6014,
"step": 186400
},
{
"epoch": 48.39,
"learning_rate": 1.2869745718733783e-06,
"loss": 3.5783,
"step": 186500
},
{
"epoch": 48.42,
"learning_rate": 1.2662169174883241e-06,
"loss": 3.6134,
"step": 186600
},
{
"epoch": 48.44,
"learning_rate": 1.2454592631032695e-06,
"loss": 3.6004,
"step": 186700
},
{
"epoch": 48.47,
"learning_rate": 1.224701608718215e-06,
"loss": 3.5965,
"step": 186800
},
{
"epoch": 48.5,
"learning_rate": 1.2039439543331605e-06,
"loss": 3.6013,
"step": 186900
},
{
"epoch": 48.52,
"learning_rate": 1.183186299948106e-06,
"loss": 3.6047,
"step": 187000
},
{
"epoch": 48.55,
"learning_rate": 1.1624286455630515e-06,
"loss": 3.6083,
"step": 187100
},
{
"epoch": 48.57,
"learning_rate": 1.1416709911779971e-06,
"loss": 3.5758,
"step": 187200
},
{
"epoch": 48.6,
"learning_rate": 1.1209133367929425e-06,
"loss": 3.5949,
"step": 187300
},
{
"epoch": 48.62,
"learning_rate": 1.100155682407888e-06,
"loss": 3.6031,
"step": 187400
},
{
"epoch": 48.65,
"learning_rate": 1.0793980280228335e-06,
"loss": 3.6055,
"step": 187500
},
{
"epoch": 48.68,
"learning_rate": 1.058640373637779e-06,
"loss": 3.5967,
"step": 187600
},
{
"epoch": 48.7,
"learning_rate": 1.0378827192527245e-06,
"loss": 3.6363,
"step": 187700
},
{
"epoch": 48.73,
"learning_rate": 1.01712506486767e-06,
"loss": 3.5949,
"step": 187800
},
{
"epoch": 48.75,
"learning_rate": 9.963674104826155e-07,
"loss": 3.6098,
"step": 187900
},
{
"epoch": 48.78,
"learning_rate": 9.75609756097561e-07,
"loss": 3.5849,
"step": 188000
},
{
"epoch": 48.81,
"learning_rate": 9.548521017125067e-07,
"loss": 3.5787,
"step": 188100
},
{
"epoch": 48.83,
"learning_rate": 9.340944473274521e-07,
"loss": 3.6023,
"step": 188200
},
{
"epoch": 48.86,
"learning_rate": 9.133367929423975e-07,
"loss": 3.596,
"step": 188300
},
{
"epoch": 48.88,
"learning_rate": 8.925791385573431e-07,
"loss": 3.5904,
"step": 188400
},
{
"epoch": 48.91,
"learning_rate": 8.718214841722886e-07,
"loss": 3.5774,
"step": 188500
},
{
"epoch": 48.94,
"learning_rate": 8.510638297872341e-07,
"loss": 3.5695,
"step": 188600
},
{
"epoch": 48.96,
"learning_rate": 8.303061754021796e-07,
"loss": 3.61,
"step": 188700
},
{
"epoch": 48.99,
"learning_rate": 8.095485210171252e-07,
"loss": 3.5872,
"step": 188800
},
{
"epoch": 49.01,
"learning_rate": 7.887908666320707e-07,
"loss": 3.6107,
"step": 188900
},
{
"epoch": 49.04,
"learning_rate": 7.680332122470162e-07,
"loss": 3.5763,
"step": 189000
},
{
"epoch": 49.07,
"learning_rate": 7.472755578619617e-07,
"loss": 3.5971,
"step": 189100
},
{
"epoch": 49.09,
"learning_rate": 7.265179034769073e-07,
"loss": 3.6019,
"step": 189200
},
{
"epoch": 49.12,
"learning_rate": 7.057602490918527e-07,
"loss": 3.6155,
"step": 189300
},
{
"epoch": 49.14,
"learning_rate": 6.850025947067982e-07,
"loss": 3.5935,
"step": 189400
},
{
"epoch": 49.17,
"learning_rate": 6.642449403217437e-07,
"loss": 3.6014,
"step": 189500
},
{
"epoch": 49.2,
"learning_rate": 6.434872859366892e-07,
"loss": 3.6099,
"step": 189600
},
{
"epoch": 49.22,
"learning_rate": 6.227296315516348e-07,
"loss": 3.5569,
"step": 189700
},
{
"epoch": 49.25,
"learning_rate": 6.019719771665803e-07,
"loss": 3.6024,
"step": 189800
},
{
"epoch": 49.27,
"learning_rate": 5.812143227815258e-07,
"loss": 3.5954,
"step": 189900
},
{
"epoch": 49.3,
"learning_rate": 5.604566683964713e-07,
"loss": 3.5932,
"step": 190000
},
{
"epoch": 49.3,
"eval_gen_len": 12.2761,
"eval_loss": 3.588038682937622,
"eval_rouge1": 41.988,
"eval_rouge2": 8.8748,
"eval_rougeL": 37.9796,
"eval_rougeLsum": 37.984,
"eval_runtime": 75.4468,
"eval_samples_per_second": 51.082,
"eval_steps_per_second": 6.389,
"step": 190000
}
],
"logging_steps": 100,
"max_steps": 192700,
"num_train_epochs": 50,
"save_steps": 5000,
"total_flos": 9.256502644614758e+16,
"trial_name": null,
"trial_params": null
}