Ht5-small / trainer_state.json
Amin
first version
c47993e
raw
history blame
122 kB
{
"best_metric": 2.459300994873047,
"best_model_checkpoint": "results/models/t5-small-NewsRoom1/checkpoint-487005",
"epoch": 5.0,
"global_step": 487005,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.9979630599275163e-05,
"loss": 3.5575,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 1.995909692918964e-05,
"loss": 3.2397,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 1.9938563259104117e-05,
"loss": 3.2235,
"step": 1500
},
{
"epoch": 0.02,
"learning_rate": 1.9918029589018594e-05,
"loss": 3.1572,
"step": 2000
},
{
"epoch": 0.03,
"learning_rate": 1.989749591893307e-05,
"loss": 3.1626,
"step": 2500
},
{
"epoch": 0.03,
"learning_rate": 1.987696224884755e-05,
"loss": 3.1051,
"step": 3000
},
{
"epoch": 0.04,
"learning_rate": 1.9856428578762026e-05,
"loss": 3.1111,
"step": 3500
},
{
"epoch": 0.04,
"learning_rate": 1.9835894908676503e-05,
"loss": 3.0974,
"step": 4000
},
{
"epoch": 0.05,
"learning_rate": 1.9815402305931152e-05,
"loss": 3.0898,
"step": 4500
},
{
"epoch": 0.05,
"learning_rate": 1.97949097031858e-05,
"loss": 3.1047,
"step": 5000
},
{
"epoch": 0.06,
"learning_rate": 1.9774376033100278e-05,
"loss": 3.0322,
"step": 5500
},
{
"epoch": 0.06,
"learning_rate": 1.9753842363014755e-05,
"loss": 3.0404,
"step": 6000
},
{
"epoch": 0.07,
"learning_rate": 1.9733308692929233e-05,
"loss": 3.0298,
"step": 6500
},
{
"epoch": 0.07,
"learning_rate": 1.971277502284371e-05,
"loss": 3.0271,
"step": 7000
},
{
"epoch": 0.08,
"learning_rate": 1.9692241352758187e-05,
"loss": 3.015,
"step": 7500
},
{
"epoch": 0.08,
"learning_rate": 1.9671707682672664e-05,
"loss": 3.0293,
"step": 8000
},
{
"epoch": 0.09,
"learning_rate": 1.965117401258714e-05,
"loss": 2.9866,
"step": 8500
},
{
"epoch": 0.09,
"learning_rate": 1.963068140984179e-05,
"loss": 2.9875,
"step": 9000
},
{
"epoch": 0.1,
"learning_rate": 1.9610147739756268e-05,
"loss": 2.9972,
"step": 9500
},
{
"epoch": 0.1,
"learning_rate": 1.9589614069670745e-05,
"loss": 2.9819,
"step": 10000
},
{
"epoch": 0.11,
"learning_rate": 1.9569080399585222e-05,
"loss": 2.9796,
"step": 10500
},
{
"epoch": 0.11,
"learning_rate": 1.9548628864180043e-05,
"loss": 2.9545,
"step": 11000
},
{
"epoch": 0.12,
"learning_rate": 1.952809519409452e-05,
"loss": 2.95,
"step": 11500
},
{
"epoch": 0.12,
"learning_rate": 1.9507561524008997e-05,
"loss": 2.978,
"step": 12000
},
{
"epoch": 0.13,
"learning_rate": 1.948702785392347e-05,
"loss": 2.9684,
"step": 12500
},
{
"epoch": 0.13,
"learning_rate": 1.9466494183837948e-05,
"loss": 2.959,
"step": 13000
},
{
"epoch": 0.14,
"learning_rate": 1.9445960513752425e-05,
"loss": 2.968,
"step": 13500
},
{
"epoch": 0.14,
"learning_rate": 1.9425426843666903e-05,
"loss": 2.9581,
"step": 14000
},
{
"epoch": 0.15,
"learning_rate": 1.940489317358138e-05,
"loss": 2.9357,
"step": 14500
},
{
"epoch": 0.15,
"learning_rate": 1.9384400570836032e-05,
"loss": 2.9308,
"step": 15000
},
{
"epoch": 0.16,
"learning_rate": 1.9363866900750506e-05,
"loss": 2.9518,
"step": 15500
},
{
"epoch": 0.16,
"learning_rate": 1.9343333230664983e-05,
"loss": 2.9479,
"step": 16000
},
{
"epoch": 0.17,
"learning_rate": 1.932279956057946e-05,
"loss": 2.9299,
"step": 16500
},
{
"epoch": 0.17,
"learning_rate": 1.930230695783411e-05,
"loss": 2.9688,
"step": 17000
},
{
"epoch": 0.18,
"learning_rate": 1.9281773287748587e-05,
"loss": 2.9159,
"step": 17500
},
{
"epoch": 0.18,
"learning_rate": 1.9261239617663067e-05,
"loss": 2.9034,
"step": 18000
},
{
"epoch": 0.19,
"learning_rate": 1.9240705947577544e-05,
"loss": 2.9215,
"step": 18500
},
{
"epoch": 0.2,
"learning_rate": 1.922021334483219e-05,
"loss": 2.9202,
"step": 19000
},
{
"epoch": 0.2,
"learning_rate": 1.9199679674746667e-05,
"loss": 2.8881,
"step": 19500
},
{
"epoch": 0.21,
"learning_rate": 1.9179146004661144e-05,
"loss": 2.9043,
"step": 20000
},
{
"epoch": 0.21,
"learning_rate": 1.915861233457562e-05,
"loss": 2.8839,
"step": 20500
},
{
"epoch": 0.22,
"learning_rate": 1.913811973183027e-05,
"loss": 2.8857,
"step": 21000
},
{
"epoch": 0.22,
"learning_rate": 1.9117586061744748e-05,
"loss": 2.8636,
"step": 21500
},
{
"epoch": 0.23,
"learning_rate": 1.9097052391659225e-05,
"loss": 2.8856,
"step": 22000
},
{
"epoch": 0.23,
"learning_rate": 1.9076559788913874e-05,
"loss": 2.869,
"step": 22500
},
{
"epoch": 0.24,
"learning_rate": 1.905602611882835e-05,
"loss": 2.9347,
"step": 23000
},
{
"epoch": 0.24,
"learning_rate": 1.9035492448742828e-05,
"loss": 2.88,
"step": 23500
},
{
"epoch": 0.25,
"learning_rate": 1.9014958778657305e-05,
"loss": 2.8891,
"step": 24000
},
{
"epoch": 0.25,
"learning_rate": 1.8994425108571783e-05,
"loss": 2.8673,
"step": 24500
},
{
"epoch": 0.26,
"learning_rate": 1.897393250582643e-05,
"loss": 2.8301,
"step": 25000
},
{
"epoch": 0.26,
"learning_rate": 1.895339883574091e-05,
"loss": 2.8586,
"step": 25500
},
{
"epoch": 0.27,
"learning_rate": 1.8932865165655386e-05,
"loss": 2.8351,
"step": 26000
},
{
"epoch": 0.27,
"learning_rate": 1.8912331495569863e-05,
"loss": 2.8603,
"step": 26500
},
{
"epoch": 0.28,
"learning_rate": 1.889179782548434e-05,
"loss": 2.8528,
"step": 27000
},
{
"epoch": 0.28,
"learning_rate": 1.8871264155398818e-05,
"loss": 2.8671,
"step": 27500
},
{
"epoch": 0.29,
"learning_rate": 1.8850730485313295e-05,
"loss": 2.8824,
"step": 28000
},
{
"epoch": 0.29,
"learning_rate": 1.8830196815227772e-05,
"loss": 2.8712,
"step": 28500
},
{
"epoch": 0.3,
"learning_rate": 1.880970421248242e-05,
"loss": 2.8514,
"step": 29000
},
{
"epoch": 0.3,
"learning_rate": 1.8789170542396898e-05,
"loss": 2.8822,
"step": 29500
},
{
"epoch": 0.31,
"learning_rate": 1.8768636872311375e-05,
"loss": 2.868,
"step": 30000
},
{
"epoch": 0.31,
"learning_rate": 1.8748103202225853e-05,
"loss": 2.8742,
"step": 30500
},
{
"epoch": 0.32,
"learning_rate": 1.8727610599480498e-05,
"loss": 2.8513,
"step": 31000
},
{
"epoch": 0.32,
"learning_rate": 1.8707076929394975e-05,
"loss": 2.8506,
"step": 31500
},
{
"epoch": 0.33,
"learning_rate": 1.8686543259309453e-05,
"loss": 2.8392,
"step": 32000
},
{
"epoch": 0.33,
"learning_rate": 1.8666009589223933e-05,
"loss": 2.8767,
"step": 32500
},
{
"epoch": 0.34,
"learning_rate": 1.864551698647858e-05,
"loss": 2.8435,
"step": 33000
},
{
"epoch": 0.34,
"learning_rate": 1.8625024383733228e-05,
"loss": 2.8289,
"step": 33500
},
{
"epoch": 0.35,
"learning_rate": 1.8604490713647705e-05,
"loss": 2.8443,
"step": 34000
},
{
"epoch": 0.35,
"learning_rate": 1.8583957043562182e-05,
"loss": 2.8266,
"step": 34500
},
{
"epoch": 0.36,
"learning_rate": 1.856342337347666e-05,
"loss": 2.857,
"step": 35000
},
{
"epoch": 0.36,
"learning_rate": 1.8542889703391136e-05,
"loss": 2.8371,
"step": 35500
},
{
"epoch": 0.37,
"learning_rate": 1.8522356033305614e-05,
"loss": 2.8395,
"step": 36000
},
{
"epoch": 0.37,
"learning_rate": 1.850182236322009e-05,
"loss": 2.8286,
"step": 36500
},
{
"epoch": 0.38,
"learning_rate": 1.8481288693134568e-05,
"loss": 2.8074,
"step": 37000
},
{
"epoch": 0.39,
"learning_rate": 1.8460796090389217e-05,
"loss": 2.8464,
"step": 37500
},
{
"epoch": 0.39,
"learning_rate": 1.8440262420303694e-05,
"loss": 2.8489,
"step": 38000
},
{
"epoch": 0.4,
"learning_rate": 1.841972875021817e-05,
"loss": 2.8259,
"step": 38500
},
{
"epoch": 0.4,
"learning_rate": 1.839919508013265e-05,
"loss": 2.8229,
"step": 39000
},
{
"epoch": 0.41,
"learning_rate": 1.8378702477387298e-05,
"loss": 2.7956,
"step": 39500
},
{
"epoch": 0.41,
"learning_rate": 1.8358209874641947e-05,
"loss": 2.837,
"step": 40000
},
{
"epoch": 0.42,
"learning_rate": 1.8337676204556424e-05,
"loss": 2.8233,
"step": 40500
},
{
"epoch": 0.42,
"learning_rate": 1.83171425344709e-05,
"loss": 2.8293,
"step": 41000
},
{
"epoch": 0.43,
"learning_rate": 1.8296608864385378e-05,
"loss": 2.8451,
"step": 41500
},
{
"epoch": 0.43,
"learning_rate": 1.8276075194299855e-05,
"loss": 2.8166,
"step": 42000
},
{
"epoch": 0.44,
"learning_rate": 1.8255541524214333e-05,
"loss": 2.8388,
"step": 42500
},
{
"epoch": 0.44,
"learning_rate": 1.823500785412881e-05,
"loss": 2.8199,
"step": 43000
},
{
"epoch": 0.45,
"learning_rate": 1.8214474184043287e-05,
"loss": 2.829,
"step": 43500
},
{
"epoch": 0.45,
"learning_rate": 1.8193981581297936e-05,
"loss": 2.8076,
"step": 44000
},
{
"epoch": 0.46,
"learning_rate": 1.8173447911212413e-05,
"loss": 2.8422,
"step": 44500
},
{
"epoch": 0.46,
"learning_rate": 1.815291424112689e-05,
"loss": 2.8232,
"step": 45000
},
{
"epoch": 0.47,
"learning_rate": 1.8132380571041364e-05,
"loss": 2.8369,
"step": 45500
},
{
"epoch": 0.47,
"learning_rate": 1.8111887968296017e-05,
"loss": 2.8246,
"step": 46000
},
{
"epoch": 0.48,
"learning_rate": 1.8091354298210494e-05,
"loss": 2.8111,
"step": 46500
},
{
"epoch": 0.48,
"learning_rate": 1.807082062812497e-05,
"loss": 2.7974,
"step": 47000
},
{
"epoch": 0.49,
"learning_rate": 1.8050286958039448e-05,
"loss": 2.8035,
"step": 47500
},
{
"epoch": 0.49,
"learning_rate": 1.8029794355294094e-05,
"loss": 2.7829,
"step": 48000
},
{
"epoch": 0.5,
"learning_rate": 1.800926068520857e-05,
"loss": 2.8093,
"step": 48500
},
{
"epoch": 0.5,
"learning_rate": 1.7988727015123048e-05,
"loss": 2.7819,
"step": 49000
},
{
"epoch": 0.51,
"learning_rate": 1.7968193345037525e-05,
"loss": 2.8044,
"step": 49500
},
{
"epoch": 0.51,
"learning_rate": 1.7947659674952006e-05,
"loss": 2.7933,
"step": 50000
},
{
"epoch": 0.52,
"learning_rate": 1.792716707220665e-05,
"loss": 2.8061,
"step": 50500
},
{
"epoch": 0.52,
"learning_rate": 1.790663340212113e-05,
"loss": 2.8124,
"step": 51000
},
{
"epoch": 0.53,
"learning_rate": 1.7886099732035606e-05,
"loss": 2.7989,
"step": 51500
},
{
"epoch": 0.53,
"learning_rate": 1.7865566061950083e-05,
"loss": 2.7809,
"step": 52000
},
{
"epoch": 0.54,
"learning_rate": 1.7845073459204732e-05,
"loss": 2.7934,
"step": 52500
},
{
"epoch": 0.54,
"learning_rate": 1.782453978911921e-05,
"loss": 2.7905,
"step": 53000
},
{
"epoch": 0.55,
"learning_rate": 1.7804006119033686e-05,
"loss": 2.7978,
"step": 53500
},
{
"epoch": 0.55,
"learning_rate": 1.7783472448948164e-05,
"loss": 2.7977,
"step": 54000
},
{
"epoch": 0.56,
"learning_rate": 1.7762979846202813e-05,
"loss": 2.7847,
"step": 54500
},
{
"epoch": 0.56,
"learning_rate": 1.774244617611729e-05,
"loss": 2.8054,
"step": 55000
},
{
"epoch": 0.57,
"learning_rate": 1.7721912506031767e-05,
"loss": 2.7806,
"step": 55500
},
{
"epoch": 0.57,
"learning_rate": 1.7701378835946244e-05,
"loss": 2.788,
"step": 56000
},
{
"epoch": 0.58,
"learning_rate": 1.7680886233200893e-05,
"loss": 2.7815,
"step": 56500
},
{
"epoch": 0.59,
"learning_rate": 1.766035256311537e-05,
"loss": 2.7515,
"step": 57000
},
{
"epoch": 0.59,
"learning_rate": 1.7639818893029848e-05,
"loss": 2.7862,
"step": 57500
},
{
"epoch": 0.6,
"learning_rate": 1.7619285222944325e-05,
"loss": 2.7743,
"step": 58000
},
{
"epoch": 0.6,
"learning_rate": 1.7598833687539142e-05,
"loss": 2.7668,
"step": 58500
},
{
"epoch": 0.61,
"learning_rate": 1.757830001745362e-05,
"loss": 2.8068,
"step": 59000
},
{
"epoch": 0.61,
"learning_rate": 1.75577663473681e-05,
"loss": 2.7708,
"step": 59500
},
{
"epoch": 0.62,
"learning_rate": 1.7537232677282577e-05,
"loss": 2.7488,
"step": 60000
},
{
"epoch": 0.62,
"learning_rate": 1.7516699007197054e-05,
"loss": 2.8045,
"step": 60500
},
{
"epoch": 0.63,
"learning_rate": 1.749616533711153e-05,
"loss": 2.7621,
"step": 61000
},
{
"epoch": 0.63,
"learning_rate": 1.747563166702601e-05,
"loss": 2.7739,
"step": 61500
},
{
"epoch": 0.64,
"learning_rate": 1.7455097996940486e-05,
"loss": 2.7787,
"step": 62000
},
{
"epoch": 0.64,
"learning_rate": 1.743460539419513e-05,
"loss": 2.7455,
"step": 62500
},
{
"epoch": 0.65,
"learning_rate": 1.741407172410961e-05,
"loss": 2.7489,
"step": 63000
},
{
"epoch": 0.65,
"learning_rate": 1.739353805402409e-05,
"loss": 2.7444,
"step": 63500
},
{
"epoch": 0.66,
"learning_rate": 1.7373045451278735e-05,
"loss": 2.7777,
"step": 64000
},
{
"epoch": 0.66,
"learning_rate": 1.7352511781193212e-05,
"loss": 2.7936,
"step": 64500
},
{
"epoch": 0.67,
"learning_rate": 1.733197811110769e-05,
"loss": 2.7491,
"step": 65000
},
{
"epoch": 0.67,
"learning_rate": 1.7311444441022167e-05,
"loss": 2.7712,
"step": 65500
},
{
"epoch": 0.68,
"learning_rate": 1.7290910770936644e-05,
"loss": 2.7617,
"step": 66000
},
{
"epoch": 0.68,
"learning_rate": 1.727037710085112e-05,
"loss": 2.7422,
"step": 66500
},
{
"epoch": 0.69,
"learning_rate": 1.7249843430765598e-05,
"loss": 2.7406,
"step": 67000
},
{
"epoch": 0.69,
"learning_rate": 1.722930976068008e-05,
"loss": 2.7828,
"step": 67500
},
{
"epoch": 0.7,
"learning_rate": 1.7208817157934724e-05,
"loss": 2.7263,
"step": 68000
},
{
"epoch": 0.7,
"learning_rate": 1.71882834878492e-05,
"loss": 2.7545,
"step": 68500
},
{
"epoch": 0.71,
"learning_rate": 1.716774981776368e-05,
"loss": 2.7511,
"step": 69000
},
{
"epoch": 0.71,
"learning_rate": 1.7147257215018328e-05,
"loss": 2.7539,
"step": 69500
},
{
"epoch": 0.72,
"learning_rate": 1.7126723544932805e-05,
"loss": 2.7397,
"step": 70000
},
{
"epoch": 0.72,
"learning_rate": 1.7106189874847282e-05,
"loss": 2.7778,
"step": 70500
},
{
"epoch": 0.73,
"learning_rate": 1.708565620476176e-05,
"loss": 2.7383,
"step": 71000
},
{
"epoch": 0.73,
"learning_rate": 1.7065122534676236e-05,
"loss": 2.7689,
"step": 71500
},
{
"epoch": 0.74,
"learning_rate": 1.7044588864590714e-05,
"loss": 2.7666,
"step": 72000
},
{
"epoch": 0.74,
"learning_rate": 1.702405519450519e-05,
"loss": 2.7912,
"step": 72500
},
{
"epoch": 0.75,
"learning_rate": 1.7003521524419668e-05,
"loss": 2.7468,
"step": 73000
},
{
"epoch": 0.75,
"learning_rate": 1.698306998901449e-05,
"loss": 2.7189,
"step": 73500
},
{
"epoch": 0.76,
"learning_rate": 1.6962536318928966e-05,
"loss": 2.7461,
"step": 74000
},
{
"epoch": 0.76,
"learning_rate": 1.6942002648843443e-05,
"loss": 2.7588,
"step": 74500
},
{
"epoch": 0.77,
"learning_rate": 1.692146897875792e-05,
"loss": 2.7358,
"step": 75000
},
{
"epoch": 0.78,
"learning_rate": 1.6900935308672398e-05,
"loss": 2.7604,
"step": 75500
},
{
"epoch": 0.78,
"learning_rate": 1.6880401638586875e-05,
"loss": 2.7661,
"step": 76000
},
{
"epoch": 0.79,
"learning_rate": 1.6859867968501352e-05,
"loss": 2.75,
"step": 76500
},
{
"epoch": 0.79,
"learning_rate": 1.683933429841583e-05,
"loss": 2.7526,
"step": 77000
},
{
"epoch": 0.8,
"learning_rate": 1.6818800628330306e-05,
"loss": 2.7351,
"step": 77500
},
{
"epoch": 0.8,
"learning_rate": 1.6798308025584955e-05,
"loss": 2.7663,
"step": 78000
},
{
"epoch": 0.81,
"learning_rate": 1.6777774355499433e-05,
"loss": 2.7209,
"step": 78500
},
{
"epoch": 0.81,
"learning_rate": 1.675724068541391e-05,
"loss": 2.762,
"step": 79000
},
{
"epoch": 0.82,
"learning_rate": 1.6736707015328387e-05,
"loss": 2.7819,
"step": 79500
},
{
"epoch": 0.82,
"learning_rate": 1.6716214412583033e-05,
"loss": 2.7384,
"step": 80000
},
{
"epoch": 0.83,
"learning_rate": 1.669568074249751e-05,
"loss": 2.7595,
"step": 80500
},
{
"epoch": 0.83,
"learning_rate": 1.6675147072411987e-05,
"loss": 2.7407,
"step": 81000
},
{
"epoch": 0.84,
"learning_rate": 1.6654613402326468e-05,
"loss": 2.7268,
"step": 81500
},
{
"epoch": 0.84,
"learning_rate": 1.6634120799581117e-05,
"loss": 2.7237,
"step": 82000
},
{
"epoch": 0.85,
"learning_rate": 1.661358712949559e-05,
"loss": 2.7743,
"step": 82500
},
{
"epoch": 0.85,
"learning_rate": 1.6593053459410068e-05,
"loss": 2.7322,
"step": 83000
},
{
"epoch": 0.86,
"learning_rate": 1.6572519789324545e-05,
"loss": 2.7411,
"step": 83500
},
{
"epoch": 0.86,
"learning_rate": 1.6552027186579194e-05,
"loss": 2.779,
"step": 84000
},
{
"epoch": 0.87,
"learning_rate": 1.6531493516493674e-05,
"loss": 2.7452,
"step": 84500
},
{
"epoch": 0.87,
"learning_rate": 1.651095984640815e-05,
"loss": 2.7376,
"step": 85000
},
{
"epoch": 0.88,
"learning_rate": 1.6490426176322625e-05,
"loss": 2.7268,
"step": 85500
},
{
"epoch": 0.88,
"learning_rate": 1.6469933573577274e-05,
"loss": 2.7354,
"step": 86000
},
{
"epoch": 0.89,
"learning_rate": 1.644939990349175e-05,
"loss": 2.7492,
"step": 86500
},
{
"epoch": 0.89,
"learning_rate": 1.642886623340623e-05,
"loss": 2.7243,
"step": 87000
},
{
"epoch": 0.9,
"learning_rate": 1.6408332563320706e-05,
"loss": 2.7543,
"step": 87500
},
{
"epoch": 0.9,
"learning_rate": 1.6387839960575355e-05,
"loss": 2.7325,
"step": 88000
},
{
"epoch": 0.91,
"learning_rate": 1.6367306290489832e-05,
"loss": 2.7412,
"step": 88500
},
{
"epoch": 0.91,
"learning_rate": 1.634677262040431e-05,
"loss": 2.7478,
"step": 89000
},
{
"epoch": 0.92,
"learning_rate": 1.6326238950318786e-05,
"loss": 2.7506,
"step": 89500
},
{
"epoch": 0.92,
"learning_rate": 1.6305746347573435e-05,
"loss": 2.7484,
"step": 90000
},
{
"epoch": 0.93,
"learning_rate": 1.6285212677487913e-05,
"loss": 2.7401,
"step": 90500
},
{
"epoch": 0.93,
"learning_rate": 1.626467900740239e-05,
"loss": 2.7286,
"step": 91000
},
{
"epoch": 0.94,
"learning_rate": 1.6244145337316867e-05,
"loss": 2.7509,
"step": 91500
},
{
"epoch": 0.94,
"learning_rate": 1.6223652734571516e-05,
"loss": 2.7376,
"step": 92000
},
{
"epoch": 0.95,
"learning_rate": 1.6203119064485993e-05,
"loss": 2.709,
"step": 92500
},
{
"epoch": 0.95,
"learning_rate": 1.618258539440047e-05,
"loss": 2.723,
"step": 93000
},
{
"epoch": 0.96,
"learning_rate": 1.6162051724314948e-05,
"loss": 2.7194,
"step": 93500
},
{
"epoch": 0.97,
"learning_rate": 1.6141559121569593e-05,
"loss": 2.7385,
"step": 94000
},
{
"epoch": 0.97,
"learning_rate": 1.612102545148407e-05,
"loss": 2.6993,
"step": 94500
},
{
"epoch": 0.98,
"learning_rate": 1.610049178139855e-05,
"loss": 2.7298,
"step": 95000
},
{
"epoch": 0.98,
"learning_rate": 1.6079958111313028e-05,
"loss": 2.718,
"step": 95500
},
{
"epoch": 0.99,
"learning_rate": 1.6059465508567674e-05,
"loss": 2.752,
"step": 96000
},
{
"epoch": 0.99,
"learning_rate": 1.603893183848215e-05,
"loss": 2.7448,
"step": 96500
},
{
"epoch": 1.0,
"learning_rate": 1.6018398168396628e-05,
"loss": 2.724,
"step": 97000
},
{
"epoch": 1.0,
"eval_gen_len": 14.378,
"eval_loss": 2.594017744064331,
"eval_rouge1": 32.3136,
"eval_rouge2": 13.6987,
"eval_rougeL": 29.7482,
"eval_rougeLsum": 29.7976,
"eval_runtime": 64.9599,
"eval_samples_per_second": 15.394,
"eval_steps_per_second": 1.924,
"step": 97401
},
{
"epoch": 1.0,
"learning_rate": 1.5997864498311105e-05,
"loss": 2.7119,
"step": 97500
},
{
"epoch": 1.01,
"learning_rate": 1.5977371895565754e-05,
"loss": 2.7325,
"step": 98000
},
{
"epoch": 1.01,
"learning_rate": 1.5956838225480235e-05,
"loss": 2.6739,
"step": 98500
},
{
"epoch": 1.02,
"learning_rate": 1.593630455539471e-05,
"loss": 2.6834,
"step": 99000
},
{
"epoch": 1.02,
"learning_rate": 1.5915770885309186e-05,
"loss": 2.7296,
"step": 99500
},
{
"epoch": 1.03,
"learning_rate": 1.5895278282563835e-05,
"loss": 2.723,
"step": 100000
},
{
"epoch": 1.03,
"learning_rate": 1.5874744612478312e-05,
"loss": 2.6704,
"step": 100500
},
{
"epoch": 1.04,
"learning_rate": 1.585421094239279e-05,
"loss": 2.6961,
"step": 101000
},
{
"epoch": 1.04,
"learning_rate": 1.5833677272307267e-05,
"loss": 2.7187,
"step": 101500
},
{
"epoch": 1.05,
"learning_rate": 1.5813184669561916e-05,
"loss": 2.6922,
"step": 102000
},
{
"epoch": 1.05,
"learning_rate": 1.5792650999476393e-05,
"loss": 2.7033,
"step": 102500
},
{
"epoch": 1.06,
"learning_rate": 1.577211732939087e-05,
"loss": 2.7269,
"step": 103000
},
{
"epoch": 1.06,
"learning_rate": 1.5751583659305347e-05,
"loss": 2.6851,
"step": 103500
},
{
"epoch": 1.07,
"learning_rate": 1.5731049989219824e-05,
"loss": 2.6726,
"step": 104000
},
{
"epoch": 1.07,
"learning_rate": 1.5710598453814642e-05,
"loss": 2.7191,
"step": 104500
},
{
"epoch": 1.08,
"learning_rate": 1.5690064783729122e-05,
"loss": 2.706,
"step": 105000
},
{
"epoch": 1.08,
"learning_rate": 1.56695311136436e-05,
"loss": 2.6976,
"step": 105500
},
{
"epoch": 1.09,
"learning_rate": 1.5648997443558077e-05,
"loss": 2.7006,
"step": 106000
},
{
"epoch": 1.09,
"learning_rate": 1.5628463773472554e-05,
"loss": 2.6924,
"step": 106500
},
{
"epoch": 1.1,
"learning_rate": 1.560793010338703e-05,
"loss": 2.6632,
"step": 107000
},
{
"epoch": 1.1,
"learning_rate": 1.5587396433301508e-05,
"loss": 2.7045,
"step": 107500
},
{
"epoch": 1.11,
"learning_rate": 1.5566862763215985e-05,
"loss": 2.6816,
"step": 108000
},
{
"epoch": 1.11,
"learning_rate": 1.5546370160470634e-05,
"loss": 2.6965,
"step": 108500
},
{
"epoch": 1.12,
"learning_rate": 1.552583649038511e-05,
"loss": 2.6934,
"step": 109000
},
{
"epoch": 1.12,
"learning_rate": 1.550530282029959e-05,
"loss": 2.7123,
"step": 109500
},
{
"epoch": 1.13,
"learning_rate": 1.5484769150214066e-05,
"loss": 2.6684,
"step": 110000
},
{
"epoch": 1.13,
"learning_rate": 1.546427654746871e-05,
"loss": 2.6811,
"step": 110500
},
{
"epoch": 1.14,
"learning_rate": 1.544374287738319e-05,
"loss": 2.6575,
"step": 111000
},
{
"epoch": 1.14,
"learning_rate": 1.5423209207297666e-05,
"loss": 2.702,
"step": 111500
},
{
"epoch": 1.15,
"learning_rate": 1.5402675537212143e-05,
"loss": 2.7073,
"step": 112000
},
{
"epoch": 1.16,
"learning_rate": 1.5382182934466792e-05,
"loss": 2.6536,
"step": 112500
},
{
"epoch": 1.16,
"learning_rate": 1.536164926438127e-05,
"loss": 2.6625,
"step": 113000
},
{
"epoch": 1.17,
"learning_rate": 1.5341115594295747e-05,
"loss": 2.7035,
"step": 113500
},
{
"epoch": 1.17,
"learning_rate": 1.5320622991550396e-05,
"loss": 2.6826,
"step": 114000
},
{
"epoch": 1.18,
"learning_rate": 1.5300089321464873e-05,
"loss": 2.7117,
"step": 114500
},
{
"epoch": 1.18,
"learning_rate": 1.527955565137935e-05,
"loss": 2.6746,
"step": 115000
},
{
"epoch": 1.19,
"learning_rate": 1.5259021981293827e-05,
"loss": 2.7035,
"step": 115500
},
{
"epoch": 1.19,
"learning_rate": 1.5238529378548476e-05,
"loss": 2.6901,
"step": 116000
},
{
"epoch": 1.2,
"learning_rate": 1.5217995708462953e-05,
"loss": 2.6582,
"step": 116500
},
{
"epoch": 1.2,
"learning_rate": 1.519746203837743e-05,
"loss": 2.6899,
"step": 117000
},
{
"epoch": 1.21,
"learning_rate": 1.5176928368291908e-05,
"loss": 2.677,
"step": 117500
},
{
"epoch": 1.21,
"learning_rate": 1.5156394698206385e-05,
"loss": 2.7053,
"step": 118000
},
{
"epoch": 1.22,
"learning_rate": 1.5135861028120862e-05,
"loss": 2.6964,
"step": 118500
},
{
"epoch": 1.22,
"learning_rate": 1.511532735803534e-05,
"loss": 2.6845,
"step": 119000
},
{
"epoch": 1.23,
"learning_rate": 1.5094793687949818e-05,
"loss": 2.687,
"step": 119500
},
{
"epoch": 1.23,
"learning_rate": 1.5074260017864295e-05,
"loss": 2.6311,
"step": 120000
},
{
"epoch": 1.24,
"learning_rate": 1.5053726347778773e-05,
"loss": 2.6907,
"step": 120500
},
{
"epoch": 1.24,
"learning_rate": 1.503323374503342e-05,
"loss": 2.6775,
"step": 121000
},
{
"epoch": 1.25,
"learning_rate": 1.5012700074947897e-05,
"loss": 2.6635,
"step": 121500
},
{
"epoch": 1.25,
"learning_rate": 1.4992166404862374e-05,
"loss": 2.6797,
"step": 122000
},
{
"epoch": 1.26,
"learning_rate": 1.4971632734776851e-05,
"loss": 2.6788,
"step": 122500
},
{
"epoch": 1.26,
"learning_rate": 1.4951099064691329e-05,
"loss": 2.6853,
"step": 123000
},
{
"epoch": 1.27,
"learning_rate": 1.4930565394605808e-05,
"loss": 2.6984,
"step": 123500
},
{
"epoch": 1.27,
"learning_rate": 1.4910031724520285e-05,
"loss": 2.7143,
"step": 124000
},
{
"epoch": 1.28,
"learning_rate": 1.4889498054434762e-05,
"loss": 2.6955,
"step": 124500
},
{
"epoch": 1.28,
"learning_rate": 1.486900545168941e-05,
"loss": 2.6878,
"step": 125000
},
{
"epoch": 1.29,
"learning_rate": 1.4848471781603886e-05,
"loss": 2.6728,
"step": 125500
},
{
"epoch": 1.29,
"learning_rate": 1.4827938111518364e-05,
"loss": 2.6604,
"step": 126000
},
{
"epoch": 1.3,
"learning_rate": 1.4807404441432839e-05,
"loss": 2.6752,
"step": 126500
},
{
"epoch": 1.3,
"learning_rate": 1.478691183868749e-05,
"loss": 2.6586,
"step": 127000
},
{
"epoch": 1.31,
"learning_rate": 1.4766378168601967e-05,
"loss": 2.6779,
"step": 127500
},
{
"epoch": 1.31,
"learning_rate": 1.4745885565856614e-05,
"loss": 2.6706,
"step": 128000
},
{
"epoch": 1.32,
"learning_rate": 1.4725351895771092e-05,
"loss": 2.6445,
"step": 128500
},
{
"epoch": 1.32,
"learning_rate": 1.4704818225685569e-05,
"loss": 2.6967,
"step": 129000
},
{
"epoch": 1.33,
"learning_rate": 1.4684284555600046e-05,
"loss": 2.657,
"step": 129500
},
{
"epoch": 1.33,
"learning_rate": 1.4663750885514523e-05,
"loss": 2.6834,
"step": 130000
},
{
"epoch": 1.34,
"learning_rate": 1.4643258282769172e-05,
"loss": 2.6453,
"step": 130500
},
{
"epoch": 1.34,
"learning_rate": 1.462272461268365e-05,
"loss": 2.6937,
"step": 131000
},
{
"epoch": 1.35,
"learning_rate": 1.4602190942598126e-05,
"loss": 2.6599,
"step": 131500
},
{
"epoch": 1.36,
"learning_rate": 1.4581657272512604e-05,
"loss": 2.6725,
"step": 132000
},
{
"epoch": 1.36,
"learning_rate": 1.4561123602427081e-05,
"loss": 2.6797,
"step": 132500
},
{
"epoch": 1.37,
"learning_rate": 1.4540589932341558e-05,
"loss": 2.6563,
"step": 133000
},
{
"epoch": 1.37,
"learning_rate": 1.4520056262256035e-05,
"loss": 2.6849,
"step": 133500
},
{
"epoch": 1.38,
"learning_rate": 1.4499522592170512e-05,
"loss": 2.6705,
"step": 134000
},
{
"epoch": 1.38,
"learning_rate": 1.4479029989425161e-05,
"loss": 2.6557,
"step": 134500
},
{
"epoch": 1.39,
"learning_rate": 1.4458496319339639e-05,
"loss": 2.7131,
"step": 135000
},
{
"epoch": 1.39,
"learning_rate": 1.4437962649254116e-05,
"loss": 2.6721,
"step": 135500
},
{
"epoch": 1.4,
"learning_rate": 1.4417428979168593e-05,
"loss": 2.6652,
"step": 136000
},
{
"epoch": 1.4,
"learning_rate": 1.439693637642324e-05,
"loss": 2.6549,
"step": 136500
},
{
"epoch": 1.41,
"learning_rate": 1.4376443773677891e-05,
"loss": 2.6765,
"step": 137000
},
{
"epoch": 1.41,
"learning_rate": 1.4355910103592368e-05,
"loss": 2.6764,
"step": 137500
},
{
"epoch": 1.42,
"learning_rate": 1.4335376433506845e-05,
"loss": 2.6518,
"step": 138000
},
{
"epoch": 1.42,
"learning_rate": 1.4314842763421321e-05,
"loss": 2.7014,
"step": 138500
},
{
"epoch": 1.43,
"learning_rate": 1.4294309093335798e-05,
"loss": 2.6446,
"step": 139000
},
{
"epoch": 1.43,
"learning_rate": 1.4273775423250275e-05,
"loss": 2.6551,
"step": 139500
},
{
"epoch": 1.44,
"learning_rate": 1.4253241753164752e-05,
"loss": 2.6676,
"step": 140000
},
{
"epoch": 1.44,
"learning_rate": 1.423270808307923e-05,
"loss": 2.6614,
"step": 140500
},
{
"epoch": 1.45,
"learning_rate": 1.421221548033388e-05,
"loss": 2.6662,
"step": 141000
},
{
"epoch": 1.45,
"learning_rate": 1.4191681810248356e-05,
"loss": 2.6525,
"step": 141500
},
{
"epoch": 1.46,
"learning_rate": 1.4171189207503005e-05,
"loss": 2.6505,
"step": 142000
},
{
"epoch": 1.46,
"learning_rate": 1.4150655537417482e-05,
"loss": 2.7146,
"step": 142500
},
{
"epoch": 1.47,
"learning_rate": 1.413012186733196e-05,
"loss": 2.6776,
"step": 143000
},
{
"epoch": 1.47,
"learning_rate": 1.4109588197246435e-05,
"loss": 2.6868,
"step": 143500
},
{
"epoch": 1.48,
"learning_rate": 1.4089054527160912e-05,
"loss": 2.6372,
"step": 144000
},
{
"epoch": 1.48,
"learning_rate": 1.4068520857075389e-05,
"loss": 2.6477,
"step": 144500
},
{
"epoch": 1.49,
"learning_rate": 1.4047987186989868e-05,
"loss": 2.6651,
"step": 145000
},
{
"epoch": 1.49,
"learning_rate": 1.4027453516904345e-05,
"loss": 2.6542,
"step": 145500
},
{
"epoch": 1.5,
"learning_rate": 1.4006960914158994e-05,
"loss": 2.6695,
"step": 146000
},
{
"epoch": 1.5,
"learning_rate": 1.398642724407347e-05,
"loss": 2.6435,
"step": 146500
},
{
"epoch": 1.51,
"learning_rate": 1.3965893573987947e-05,
"loss": 2.6432,
"step": 147000
},
{
"epoch": 1.51,
"learning_rate": 1.3945400971242596e-05,
"loss": 2.6672,
"step": 147500
},
{
"epoch": 1.52,
"learning_rate": 1.3924867301157075e-05,
"loss": 2.6579,
"step": 148000
},
{
"epoch": 1.52,
"learning_rate": 1.3904333631071552e-05,
"loss": 2.6796,
"step": 148500
},
{
"epoch": 1.53,
"learning_rate": 1.388379996098603e-05,
"loss": 2.6714,
"step": 149000
},
{
"epoch": 1.53,
"learning_rate": 1.3863266290900506e-05,
"loss": 2.6398,
"step": 149500
},
{
"epoch": 1.54,
"learning_rate": 1.3842732620814982e-05,
"loss": 2.7002,
"step": 150000
},
{
"epoch": 1.55,
"learning_rate": 1.3822240018069631e-05,
"loss": 2.6604,
"step": 150500
},
{
"epoch": 1.55,
"learning_rate": 1.3801706347984108e-05,
"loss": 2.6441,
"step": 151000
},
{
"epoch": 1.56,
"learning_rate": 1.3781172677898584e-05,
"loss": 2.6678,
"step": 151500
},
{
"epoch": 1.56,
"learning_rate": 1.3760639007813064e-05,
"loss": 2.6336,
"step": 152000
},
{
"epoch": 1.57,
"learning_rate": 1.3740105337727541e-05,
"loss": 2.675,
"step": 152500
},
{
"epoch": 1.57,
"learning_rate": 1.3719571667642017e-05,
"loss": 2.642,
"step": 153000
},
{
"epoch": 1.58,
"learning_rate": 1.3699037997556494e-05,
"loss": 2.6444,
"step": 153500
},
{
"epoch": 1.58,
"learning_rate": 1.3678504327470971e-05,
"loss": 2.6615,
"step": 154000
},
{
"epoch": 1.59,
"learning_rate": 1.365801172472562e-05,
"loss": 2.6338,
"step": 154500
},
{
"epoch": 1.59,
"learning_rate": 1.3637478054640096e-05,
"loss": 2.6943,
"step": 155000
},
{
"epoch": 1.6,
"learning_rate": 1.3616944384554573e-05,
"loss": 2.6665,
"step": 155500
},
{
"epoch": 1.6,
"learning_rate": 1.3596410714469053e-05,
"loss": 2.6514,
"step": 156000
},
{
"epoch": 1.61,
"learning_rate": 1.35759181117237e-05,
"loss": 2.6531,
"step": 156500
},
{
"epoch": 1.61,
"learning_rate": 1.3555384441638178e-05,
"loss": 2.6352,
"step": 157000
},
{
"epoch": 1.62,
"learning_rate": 1.3534850771552655e-05,
"loss": 2.6602,
"step": 157500
},
{
"epoch": 1.62,
"learning_rate": 1.351431710146713e-05,
"loss": 2.642,
"step": 158000
},
{
"epoch": 1.63,
"learning_rate": 1.349382449872178e-05,
"loss": 2.6788,
"step": 158500
},
{
"epoch": 1.63,
"learning_rate": 1.3473290828636259e-05,
"loss": 2.6793,
"step": 159000
},
{
"epoch": 1.64,
"learning_rate": 1.3452757158550736e-05,
"loss": 2.6424,
"step": 159500
},
{
"epoch": 1.64,
"learning_rate": 1.3432223488465213e-05,
"loss": 2.7017,
"step": 160000
},
{
"epoch": 1.65,
"learning_rate": 1.341173088571986e-05,
"loss": 2.6175,
"step": 160500
},
{
"epoch": 1.65,
"learning_rate": 1.3391197215634337e-05,
"loss": 2.6276,
"step": 161000
},
{
"epoch": 1.66,
"learning_rate": 1.3370663545548815e-05,
"loss": 2.6621,
"step": 161500
},
{
"epoch": 1.66,
"learning_rate": 1.3350170942803462e-05,
"loss": 2.6435,
"step": 162000
},
{
"epoch": 1.67,
"learning_rate": 1.332963727271794e-05,
"loss": 2.6881,
"step": 162500
},
{
"epoch": 1.67,
"learning_rate": 1.3309103602632418e-05,
"loss": 2.6399,
"step": 163000
},
{
"epoch": 1.68,
"learning_rate": 1.3288569932546895e-05,
"loss": 2.6619,
"step": 163500
},
{
"epoch": 1.68,
"learning_rate": 1.3268036262461372e-05,
"loss": 2.6493,
"step": 164000
},
{
"epoch": 1.69,
"learning_rate": 1.324750259237585e-05,
"loss": 2.6573,
"step": 164500
},
{
"epoch": 1.69,
"learning_rate": 1.3226968922290327e-05,
"loss": 2.668,
"step": 165000
},
{
"epoch": 1.7,
"learning_rate": 1.3206435252204804e-05,
"loss": 2.6345,
"step": 165500
},
{
"epoch": 1.7,
"learning_rate": 1.3185942649459451e-05,
"loss": 2.665,
"step": 166000
},
{
"epoch": 1.71,
"learning_rate": 1.316540897937393e-05,
"loss": 2.6491,
"step": 166500
},
{
"epoch": 1.71,
"learning_rate": 1.3144875309288407e-05,
"loss": 2.6337,
"step": 167000
},
{
"epoch": 1.72,
"learning_rate": 1.3124341639202885e-05,
"loss": 2.6246,
"step": 167500
},
{
"epoch": 1.72,
"learning_rate": 1.3103807969117362e-05,
"loss": 2.6633,
"step": 168000
},
{
"epoch": 1.73,
"learning_rate": 1.3083274299031839e-05,
"loss": 2.6581,
"step": 168500
},
{
"epoch": 1.74,
"learning_rate": 1.3062740628946316e-05,
"loss": 2.6849,
"step": 169000
},
{
"epoch": 1.74,
"learning_rate": 1.3042206958860793e-05,
"loss": 2.647,
"step": 169500
},
{
"epoch": 1.75,
"learning_rate": 1.3021714356115442e-05,
"loss": 2.6492,
"step": 170000
},
{
"epoch": 1.75,
"learning_rate": 1.300118068602992e-05,
"loss": 2.6167,
"step": 170500
},
{
"epoch": 1.76,
"learning_rate": 1.2980647015944397e-05,
"loss": 2.6717,
"step": 171000
},
{
"epoch": 1.76,
"learning_rate": 1.2960113345858874e-05,
"loss": 2.6636,
"step": 171500
},
{
"epoch": 1.77,
"learning_rate": 1.2939620743113521e-05,
"loss": 2.6242,
"step": 172000
},
{
"epoch": 1.77,
"learning_rate": 1.2919087073027998e-05,
"loss": 2.6444,
"step": 172500
},
{
"epoch": 1.78,
"learning_rate": 1.2898594470282646e-05,
"loss": 2.6382,
"step": 173000
},
{
"epoch": 1.78,
"learning_rate": 1.2878060800197125e-05,
"loss": 2.6251,
"step": 173500
},
{
"epoch": 1.79,
"learning_rate": 1.2857527130111602e-05,
"loss": 2.6521,
"step": 174000
},
{
"epoch": 1.79,
"learning_rate": 1.2836993460026079e-05,
"loss": 2.6421,
"step": 174500
},
{
"epoch": 1.8,
"learning_rate": 1.2816459789940556e-05,
"loss": 2.6558,
"step": 175000
},
{
"epoch": 1.8,
"learning_rate": 1.2795926119855033e-05,
"loss": 2.6497,
"step": 175500
},
{
"epoch": 1.81,
"learning_rate": 1.277539244976951e-05,
"loss": 2.6507,
"step": 176000
},
{
"epoch": 1.81,
"learning_rate": 1.2754858779683988e-05,
"loss": 2.6507,
"step": 176500
},
{
"epoch": 1.82,
"learning_rate": 1.2734366176938635e-05,
"loss": 2.6641,
"step": 177000
},
{
"epoch": 1.82,
"learning_rate": 1.2713832506853114e-05,
"loss": 2.6628,
"step": 177500
},
{
"epoch": 1.83,
"learning_rate": 1.2693298836767591e-05,
"loss": 2.6347,
"step": 178000
},
{
"epoch": 1.83,
"learning_rate": 1.2672765166682068e-05,
"loss": 2.6648,
"step": 178500
},
{
"epoch": 1.84,
"learning_rate": 1.2652272563936716e-05,
"loss": 2.6192,
"step": 179000
},
{
"epoch": 1.84,
"learning_rate": 1.2631738893851193e-05,
"loss": 2.6273,
"step": 179500
},
{
"epoch": 1.85,
"learning_rate": 1.261120522376567e-05,
"loss": 2.638,
"step": 180000
},
{
"epoch": 1.85,
"learning_rate": 1.2590671553680147e-05,
"loss": 2.6297,
"step": 180500
},
{
"epoch": 1.86,
"learning_rate": 1.2570178950934798e-05,
"loss": 2.6306,
"step": 181000
},
{
"epoch": 1.86,
"learning_rate": 1.2549645280849273e-05,
"loss": 2.6197,
"step": 181500
},
{
"epoch": 1.87,
"learning_rate": 1.252911161076375e-05,
"loss": 2.6327,
"step": 182000
},
{
"epoch": 1.87,
"learning_rate": 1.2508577940678228e-05,
"loss": 2.6493,
"step": 182500
},
{
"epoch": 1.88,
"learning_rate": 1.2488085337932877e-05,
"loss": 2.6695,
"step": 183000
},
{
"epoch": 1.88,
"learning_rate": 1.2467551667847352e-05,
"loss": 2.6641,
"step": 183500
},
{
"epoch": 1.89,
"learning_rate": 1.244701799776183e-05,
"loss": 2.6565,
"step": 184000
},
{
"epoch": 1.89,
"learning_rate": 1.242648432767631e-05,
"loss": 2.6743,
"step": 184500
},
{
"epoch": 1.9,
"learning_rate": 1.2405991724930957e-05,
"loss": 2.6213,
"step": 185000
},
{
"epoch": 1.9,
"learning_rate": 1.2385499122185605e-05,
"loss": 2.6396,
"step": 185500
},
{
"epoch": 1.91,
"learning_rate": 1.2364965452100082e-05,
"loss": 2.6562,
"step": 186000
},
{
"epoch": 1.91,
"learning_rate": 1.2344431782014559e-05,
"loss": 2.6467,
"step": 186500
},
{
"epoch": 1.92,
"learning_rate": 1.2323898111929036e-05,
"loss": 2.6323,
"step": 187000
},
{
"epoch": 1.93,
"learning_rate": 1.2303364441843515e-05,
"loss": 2.6235,
"step": 187500
},
{
"epoch": 1.93,
"learning_rate": 1.2282830771757992e-05,
"loss": 2.6383,
"step": 188000
},
{
"epoch": 1.94,
"learning_rate": 1.226229710167247e-05,
"loss": 2.6293,
"step": 188500
},
{
"epoch": 1.94,
"learning_rate": 1.2241763431586947e-05,
"loss": 2.6463,
"step": 189000
},
{
"epoch": 1.95,
"learning_rate": 1.2221270828841594e-05,
"loss": 2.639,
"step": 189500
},
{
"epoch": 1.95,
"learning_rate": 1.2200737158756071e-05,
"loss": 2.6436,
"step": 190000
},
{
"epoch": 1.96,
"learning_rate": 1.2180203488670548e-05,
"loss": 2.6464,
"step": 190500
},
{
"epoch": 1.96,
"learning_rate": 1.2159669818585026e-05,
"loss": 2.6449,
"step": 191000
},
{
"epoch": 1.97,
"learning_rate": 1.2139177215839675e-05,
"loss": 2.615,
"step": 191500
},
{
"epoch": 1.97,
"learning_rate": 1.2118643545754152e-05,
"loss": 2.6306,
"step": 192000
},
{
"epoch": 1.98,
"learning_rate": 1.2098109875668629e-05,
"loss": 2.6308,
"step": 192500
},
{
"epoch": 1.98,
"learning_rate": 1.2077576205583106e-05,
"loss": 2.6156,
"step": 193000
},
{
"epoch": 1.99,
"learning_rate": 1.2057042535497583e-05,
"loss": 2.6197,
"step": 193500
},
{
"epoch": 1.99,
"learning_rate": 1.203650886541206e-05,
"loss": 2.596,
"step": 194000
},
{
"epoch": 2.0,
"learning_rate": 1.2015975195326538e-05,
"loss": 2.6856,
"step": 194500
},
{
"epoch": 2.0,
"eval_gen_len": 14.239,
"eval_loss": 2.5211517810821533,
"eval_rouge1": 32.6384,
"eval_rouge2": 14.1175,
"eval_rougeL": 30.0462,
"eval_rougeLsum": 30.1515,
"eval_runtime": 63.6279,
"eval_samples_per_second": 15.716,
"eval_steps_per_second": 1.965,
"step": 194802
},
{
"epoch": 2.0,
"learning_rate": 1.1995441525241013e-05,
"loss": 2.6536,
"step": 195000
},
{
"epoch": 2.01,
"learning_rate": 1.1974948922495664e-05,
"loss": 2.5967,
"step": 195500
},
{
"epoch": 2.01,
"learning_rate": 1.1954415252410141e-05,
"loss": 2.6196,
"step": 196000
},
{
"epoch": 2.02,
"learning_rate": 1.1933881582324618e-05,
"loss": 2.6051,
"step": 196500
},
{
"epoch": 2.02,
"learning_rate": 1.1913347912239095e-05,
"loss": 2.6344,
"step": 197000
},
{
"epoch": 2.03,
"learning_rate": 1.1892855309493743e-05,
"loss": 2.6343,
"step": 197500
},
{
"epoch": 2.03,
"learning_rate": 1.187232163940822e-05,
"loss": 2.5969,
"step": 198000
},
{
"epoch": 2.04,
"learning_rate": 1.1851787969322699e-05,
"loss": 2.6364,
"step": 198500
},
{
"epoch": 2.04,
"learning_rate": 1.1831254299237176e-05,
"loss": 2.5896,
"step": 199000
},
{
"epoch": 2.05,
"learning_rate": 1.1810761696491823e-05,
"loss": 2.6468,
"step": 199500
},
{
"epoch": 2.05,
"learning_rate": 1.17902280264063e-05,
"loss": 2.6083,
"step": 200000
},
{
"epoch": 2.06,
"learning_rate": 1.1769694356320778e-05,
"loss": 2.6013,
"step": 200500
},
{
"epoch": 2.06,
"learning_rate": 1.1749160686235255e-05,
"loss": 2.6303,
"step": 201000
},
{
"epoch": 2.07,
"learning_rate": 1.1728668083489902e-05,
"loss": 2.616,
"step": 201500
},
{
"epoch": 2.07,
"learning_rate": 1.1708134413404381e-05,
"loss": 2.6251,
"step": 202000
},
{
"epoch": 2.08,
"learning_rate": 1.1687600743318858e-05,
"loss": 2.6259,
"step": 202500
},
{
"epoch": 2.08,
"learning_rate": 1.1667067073233336e-05,
"loss": 2.6299,
"step": 203000
},
{
"epoch": 2.09,
"learning_rate": 1.1646574470487983e-05,
"loss": 2.6205,
"step": 203500
},
{
"epoch": 2.09,
"learning_rate": 1.162604080040246e-05,
"loss": 2.6091,
"step": 204000
},
{
"epoch": 2.1,
"learning_rate": 1.1605507130316937e-05,
"loss": 2.6182,
"step": 204500
},
{
"epoch": 2.1,
"learning_rate": 1.1584973460231414e-05,
"loss": 2.6144,
"step": 205000
},
{
"epoch": 2.11,
"learning_rate": 1.1564480857486065e-05,
"loss": 2.6163,
"step": 205500
},
{
"epoch": 2.11,
"learning_rate": 1.1543947187400542e-05,
"loss": 2.606,
"step": 206000
},
{
"epoch": 2.12,
"learning_rate": 1.1523413517315018e-05,
"loss": 2.6081,
"step": 206500
},
{
"epoch": 2.13,
"learning_rate": 1.1502879847229495e-05,
"loss": 2.6181,
"step": 207000
},
{
"epoch": 2.13,
"learning_rate": 1.1482387244484144e-05,
"loss": 2.5952,
"step": 207500
},
{
"epoch": 2.14,
"learning_rate": 1.1461853574398621e-05,
"loss": 2.6011,
"step": 208000
},
{
"epoch": 2.14,
"learning_rate": 1.1441319904313097e-05,
"loss": 2.6163,
"step": 208500
},
{
"epoch": 2.15,
"learning_rate": 1.1420827301567747e-05,
"loss": 2.6228,
"step": 209000
},
{
"epoch": 2.15,
"learning_rate": 1.1400293631482225e-05,
"loss": 2.638,
"step": 209500
},
{
"epoch": 2.16,
"learning_rate": 1.1379759961396702e-05,
"loss": 2.6202,
"step": 210000
},
{
"epoch": 2.16,
"learning_rate": 1.1359226291311179e-05,
"loss": 2.5915,
"step": 210500
},
{
"epoch": 2.17,
"learning_rate": 1.1338692621225656e-05,
"loss": 2.6379,
"step": 211000
},
{
"epoch": 2.17,
"learning_rate": 1.1318158951140132e-05,
"loss": 2.5958,
"step": 211500
},
{
"epoch": 2.18,
"learning_rate": 1.1297625281054609e-05,
"loss": 2.6,
"step": 212000
},
{
"epoch": 2.18,
"learning_rate": 1.1277091610969086e-05,
"loss": 2.5956,
"step": 212500
},
{
"epoch": 2.19,
"learning_rate": 1.1256599008223737e-05,
"loss": 2.6104,
"step": 213000
},
{
"epoch": 2.19,
"learning_rate": 1.1236065338138214e-05,
"loss": 2.6338,
"step": 213500
},
{
"epoch": 2.2,
"learning_rate": 1.1215531668052691e-05,
"loss": 2.6188,
"step": 214000
},
{
"epoch": 2.2,
"learning_rate": 1.1194997997967168e-05,
"loss": 2.6236,
"step": 214500
},
{
"epoch": 2.21,
"learning_rate": 1.1174505395221816e-05,
"loss": 2.5834,
"step": 215000
},
{
"epoch": 2.21,
"learning_rate": 1.1153971725136293e-05,
"loss": 2.6136,
"step": 215500
},
{
"epoch": 2.22,
"learning_rate": 1.1133438055050772e-05,
"loss": 2.6168,
"step": 216000
},
{
"epoch": 2.22,
"learning_rate": 1.1112904384965249e-05,
"loss": 2.6097,
"step": 216500
},
{
"epoch": 2.23,
"learning_rate": 1.1092411782219896e-05,
"loss": 2.5822,
"step": 217000
},
{
"epoch": 2.23,
"learning_rate": 1.1071878112134373e-05,
"loss": 2.5863,
"step": 217500
},
{
"epoch": 2.24,
"learning_rate": 1.105134444204885e-05,
"loss": 2.5925,
"step": 218000
},
{
"epoch": 2.24,
"learning_rate": 1.1030810771963328e-05,
"loss": 2.5834,
"step": 218500
},
{
"epoch": 2.25,
"learning_rate": 1.1010318169217975e-05,
"loss": 2.6185,
"step": 219000
},
{
"epoch": 2.25,
"learning_rate": 1.0989784499132454e-05,
"loss": 2.5897,
"step": 219500
},
{
"epoch": 2.26,
"learning_rate": 1.0969250829046931e-05,
"loss": 2.6144,
"step": 220000
},
{
"epoch": 2.26,
"learning_rate": 1.0948717158961408e-05,
"loss": 2.5723,
"step": 220500
},
{
"epoch": 2.27,
"learning_rate": 1.0928224556216056e-05,
"loss": 2.617,
"step": 221000
},
{
"epoch": 2.27,
"learning_rate": 1.0907690886130533e-05,
"loss": 2.588,
"step": 221500
},
{
"epoch": 2.28,
"learning_rate": 1.088715721604501e-05,
"loss": 2.6123,
"step": 222000
},
{
"epoch": 2.28,
"learning_rate": 1.0866623545959487e-05,
"loss": 2.6046,
"step": 222500
},
{
"epoch": 2.29,
"learning_rate": 1.0846130943214138e-05,
"loss": 2.6093,
"step": 223000
},
{
"epoch": 2.29,
"learning_rate": 1.0825597273128613e-05,
"loss": 2.6046,
"step": 223500
},
{
"epoch": 2.3,
"learning_rate": 1.080506360304309e-05,
"loss": 2.6244,
"step": 224000
},
{
"epoch": 2.3,
"learning_rate": 1.0784529932957568e-05,
"loss": 2.5966,
"step": 224500
},
{
"epoch": 2.31,
"learning_rate": 1.0764037330212215e-05,
"loss": 2.6191,
"step": 225000
},
{
"epoch": 2.32,
"learning_rate": 1.0743503660126692e-05,
"loss": 2.6419,
"step": 225500
},
{
"epoch": 2.32,
"learning_rate": 1.072296999004117e-05,
"loss": 2.6193,
"step": 226000
},
{
"epoch": 2.33,
"learning_rate": 1.0702436319955648e-05,
"loss": 2.6015,
"step": 226500
},
{
"epoch": 2.33,
"learning_rate": 1.0681943717210297e-05,
"loss": 2.6035,
"step": 227000
},
{
"epoch": 2.34,
"learning_rate": 1.0661410047124775e-05,
"loss": 2.5822,
"step": 227500
},
{
"epoch": 2.34,
"learning_rate": 1.0640876377039252e-05,
"loss": 2.6084,
"step": 228000
},
{
"epoch": 2.35,
"learning_rate": 1.0620342706953727e-05,
"loss": 2.5711,
"step": 228500
},
{
"epoch": 2.35,
"learning_rate": 1.0599850104208376e-05,
"loss": 2.5969,
"step": 229000
},
{
"epoch": 2.36,
"learning_rate": 1.0579316434122855e-05,
"loss": 2.5911,
"step": 229500
},
{
"epoch": 2.36,
"learning_rate": 1.0558782764037332e-05,
"loss": 2.608,
"step": 230000
},
{
"epoch": 2.37,
"learning_rate": 1.053824909395181e-05,
"loss": 2.63,
"step": 230500
},
{
"epoch": 2.37,
"learning_rate": 1.0517756491206457e-05,
"loss": 2.5886,
"step": 231000
},
{
"epoch": 2.38,
"learning_rate": 1.0497222821120934e-05,
"loss": 2.6097,
"step": 231500
},
{
"epoch": 2.38,
"learning_rate": 1.0476689151035411e-05,
"loss": 2.6133,
"step": 232000
},
{
"epoch": 2.39,
"learning_rate": 1.0456155480949888e-05,
"loss": 2.6208,
"step": 232500
},
{
"epoch": 2.39,
"learning_rate": 1.0435662878204537e-05,
"loss": 2.6012,
"step": 233000
},
{
"epoch": 2.4,
"learning_rate": 1.0415129208119015e-05,
"loss": 2.632,
"step": 233500
},
{
"epoch": 2.4,
"learning_rate": 1.0394595538033492e-05,
"loss": 2.5796,
"step": 234000
},
{
"epoch": 2.41,
"learning_rate": 1.0374061867947969e-05,
"loss": 2.6075,
"step": 234500
},
{
"epoch": 2.41,
"learning_rate": 1.0353610332542786e-05,
"loss": 2.6212,
"step": 235000
},
{
"epoch": 2.42,
"learning_rate": 1.0333076662457264e-05,
"loss": 2.5728,
"step": 235500
},
{
"epoch": 2.42,
"learning_rate": 1.0312542992371744e-05,
"loss": 2.5845,
"step": 236000
},
{
"epoch": 2.43,
"learning_rate": 1.0292009322286221e-05,
"loss": 2.5888,
"step": 236500
},
{
"epoch": 2.43,
"learning_rate": 1.0271475652200697e-05,
"loss": 2.5924,
"step": 237000
},
{
"epoch": 2.44,
"learning_rate": 1.0250941982115174e-05,
"loss": 2.6477,
"step": 237500
},
{
"epoch": 2.44,
"learning_rate": 1.0230408312029651e-05,
"loss": 2.613,
"step": 238000
},
{
"epoch": 2.45,
"learning_rate": 1.0209874641944128e-05,
"loss": 2.576,
"step": 238500
},
{
"epoch": 2.45,
"learning_rate": 1.0189382039198776e-05,
"loss": 2.617,
"step": 239000
},
{
"epoch": 2.46,
"learning_rate": 1.0168848369113253e-05,
"loss": 2.5819,
"step": 239500
},
{
"epoch": 2.46,
"learning_rate": 1.0148314699027732e-05,
"loss": 2.5933,
"step": 240000
},
{
"epoch": 2.47,
"learning_rate": 1.0127781028942209e-05,
"loss": 2.6146,
"step": 240500
},
{
"epoch": 2.47,
"learning_rate": 1.0107247358856686e-05,
"loss": 2.6096,
"step": 241000
},
{
"epoch": 2.48,
"learning_rate": 1.0086754756111335e-05,
"loss": 2.609,
"step": 241500
},
{
"epoch": 2.48,
"learning_rate": 1.006622108602581e-05,
"loss": 2.5756,
"step": 242000
},
{
"epoch": 2.49,
"learning_rate": 1.0045687415940288e-05,
"loss": 2.5681,
"step": 242500
},
{
"epoch": 2.49,
"learning_rate": 1.0025153745854765e-05,
"loss": 2.5871,
"step": 243000
},
{
"epoch": 2.5,
"learning_rate": 1.0004661143109416e-05,
"loss": 2.602,
"step": 243500
},
{
"epoch": 2.51,
"learning_rate": 9.984127473023891e-06,
"loss": 2.6109,
"step": 244000
},
{
"epoch": 2.51,
"learning_rate": 9.96363487027854e-06,
"loss": 2.5935,
"step": 244500
},
{
"epoch": 2.52,
"learning_rate": 9.943101200193017e-06,
"loss": 2.5964,
"step": 245000
},
{
"epoch": 2.52,
"learning_rate": 9.922567530107495e-06,
"loss": 2.5973,
"step": 245500
},
{
"epoch": 2.53,
"learning_rate": 9.902033860021972e-06,
"loss": 2.5965,
"step": 246000
},
{
"epoch": 2.53,
"learning_rate": 9.881500189936449e-06,
"loss": 2.6214,
"step": 246500
},
{
"epoch": 2.54,
"learning_rate": 9.860966519850926e-06,
"loss": 2.5965,
"step": 247000
},
{
"epoch": 2.54,
"learning_rate": 9.840432849765403e-06,
"loss": 2.5984,
"step": 247500
},
{
"epoch": 2.55,
"learning_rate": 9.81989917967988e-06,
"loss": 2.6208,
"step": 248000
},
{
"epoch": 2.55,
"learning_rate": 9.79940657693453e-06,
"loss": 2.584,
"step": 248500
},
{
"epoch": 2.56,
"learning_rate": 9.778872906849007e-06,
"loss": 2.614,
"step": 249000
},
{
"epoch": 2.56,
"learning_rate": 9.758339236763484e-06,
"loss": 2.5768,
"step": 249500
},
{
"epoch": 2.57,
"learning_rate": 9.737805566677961e-06,
"loss": 2.5877,
"step": 250000
},
{
"epoch": 2.57,
"learning_rate": 9.717312963932609e-06,
"loss": 2.6115,
"step": 250500
},
{
"epoch": 2.58,
"learning_rate": 9.696779293847086e-06,
"loss": 2.5976,
"step": 251000
},
{
"epoch": 2.58,
"learning_rate": 9.676245623761565e-06,
"loss": 2.5741,
"step": 251500
},
{
"epoch": 2.59,
"learning_rate": 9.655711953676042e-06,
"loss": 2.597,
"step": 252000
},
{
"epoch": 2.59,
"learning_rate": 9.635219350930689e-06,
"loss": 2.6006,
"step": 252500
},
{
"epoch": 2.6,
"learning_rate": 9.614685680845168e-06,
"loss": 2.5908,
"step": 253000
},
{
"epoch": 2.6,
"learning_rate": 9.594152010759643e-06,
"loss": 2.6164,
"step": 253500
},
{
"epoch": 2.61,
"learning_rate": 9.57361834067412e-06,
"loss": 2.6051,
"step": 254000
},
{
"epoch": 2.61,
"learning_rate": 9.55312573792877e-06,
"loss": 2.5831,
"step": 254500
},
{
"epoch": 2.62,
"learning_rate": 9.532592067843247e-06,
"loss": 2.5701,
"step": 255000
},
{
"epoch": 2.62,
"learning_rate": 9.512058397757724e-06,
"loss": 2.613,
"step": 255500
},
{
"epoch": 2.63,
"learning_rate": 9.491524727672201e-06,
"loss": 2.6147,
"step": 256000
},
{
"epoch": 2.63,
"learning_rate": 9.47103212492685e-06,
"loss": 2.606,
"step": 256500
},
{
"epoch": 2.64,
"learning_rate": 9.450498454841327e-06,
"loss": 2.5975,
"step": 257000
},
{
"epoch": 2.64,
"learning_rate": 9.429964784755805e-06,
"loss": 2.5789,
"step": 257500
},
{
"epoch": 2.65,
"learning_rate": 9.409431114670282e-06,
"loss": 2.5324,
"step": 258000
},
{
"epoch": 2.65,
"learning_rate": 9.388938511924929e-06,
"loss": 2.6155,
"step": 258500
},
{
"epoch": 2.66,
"learning_rate": 9.368404841839406e-06,
"loss": 2.5899,
"step": 259000
},
{
"epoch": 2.66,
"learning_rate": 9.347871171753883e-06,
"loss": 2.6095,
"step": 259500
},
{
"epoch": 2.67,
"learning_rate": 9.32733750166836e-06,
"loss": 2.6055,
"step": 260000
},
{
"epoch": 2.67,
"learning_rate": 9.30684489892301e-06,
"loss": 2.6007,
"step": 260500
},
{
"epoch": 2.68,
"learning_rate": 9.286311228837487e-06,
"loss": 2.6287,
"step": 261000
},
{
"epoch": 2.68,
"learning_rate": 9.265777558751964e-06,
"loss": 2.6304,
"step": 261500
},
{
"epoch": 2.69,
"learning_rate": 9.245243888666441e-06,
"loss": 2.5703,
"step": 262000
},
{
"epoch": 2.7,
"learning_rate": 9.22475128592109e-06,
"loss": 2.5771,
"step": 262500
},
{
"epoch": 2.7,
"learning_rate": 9.204217615835567e-06,
"loss": 2.5947,
"step": 263000
},
{
"epoch": 2.71,
"learning_rate": 9.183683945750045e-06,
"loss": 2.6205,
"step": 263500
},
{
"epoch": 2.71,
"learning_rate": 9.163150275664522e-06,
"loss": 2.5745,
"step": 264000
},
{
"epoch": 2.72,
"learning_rate": 9.142616605578999e-06,
"loss": 2.6126,
"step": 264500
},
{
"epoch": 2.72,
"learning_rate": 9.122124002833648e-06,
"loss": 2.5981,
"step": 265000
},
{
"epoch": 2.73,
"learning_rate": 9.101590332748125e-06,
"loss": 2.5772,
"step": 265500
},
{
"epoch": 2.73,
"learning_rate": 9.081056662662602e-06,
"loss": 2.6235,
"step": 266000
},
{
"epoch": 2.74,
"learning_rate": 9.06052299257708e-06,
"loss": 2.5577,
"step": 266500
},
{
"epoch": 2.74,
"learning_rate": 9.040030389831727e-06,
"loss": 2.6086,
"step": 267000
},
{
"epoch": 2.75,
"learning_rate": 9.019496719746204e-06,
"loss": 2.5849,
"step": 267500
},
{
"epoch": 2.75,
"learning_rate": 8.998963049660681e-06,
"loss": 2.5951,
"step": 268000
},
{
"epoch": 2.76,
"learning_rate": 8.978429379575158e-06,
"loss": 2.5432,
"step": 268500
},
{
"epoch": 2.76,
"learning_rate": 8.957936776829808e-06,
"loss": 2.596,
"step": 269000
},
{
"epoch": 2.77,
"learning_rate": 8.937403106744285e-06,
"loss": 2.6096,
"step": 269500
},
{
"epoch": 2.77,
"learning_rate": 8.916869436658762e-06,
"loss": 2.6013,
"step": 270000
},
{
"epoch": 2.78,
"learning_rate": 8.896335766573239e-06,
"loss": 2.5926,
"step": 270500
},
{
"epoch": 2.78,
"learning_rate": 8.875843163827888e-06,
"loss": 2.5741,
"step": 271000
},
{
"epoch": 2.79,
"learning_rate": 8.855309493742365e-06,
"loss": 2.5585,
"step": 271500
},
{
"epoch": 2.79,
"learning_rate": 8.83477582365684e-06,
"loss": 2.6125,
"step": 272000
},
{
"epoch": 2.8,
"learning_rate": 8.81424215357132e-06,
"loss": 2.5581,
"step": 272500
},
{
"epoch": 2.8,
"learning_rate": 8.793749550825967e-06,
"loss": 2.5723,
"step": 273000
},
{
"epoch": 2.81,
"learning_rate": 8.773215880740444e-06,
"loss": 2.5726,
"step": 273500
},
{
"epoch": 2.81,
"learning_rate": 8.752682210654923e-06,
"loss": 2.6084,
"step": 274000
},
{
"epoch": 2.82,
"learning_rate": 8.7321485405694e-06,
"loss": 2.5814,
"step": 274500
},
{
"epoch": 2.82,
"learning_rate": 8.711655937824048e-06,
"loss": 2.6171,
"step": 275000
},
{
"epoch": 2.83,
"learning_rate": 8.691122267738525e-06,
"loss": 2.6089,
"step": 275500
},
{
"epoch": 2.83,
"learning_rate": 8.670588597653002e-06,
"loss": 2.5841,
"step": 276000
},
{
"epoch": 2.84,
"learning_rate": 8.650054927567479e-06,
"loss": 2.5735,
"step": 276500
},
{
"epoch": 2.84,
"learning_rate": 8.629562324822128e-06,
"loss": 2.5961,
"step": 277000
},
{
"epoch": 2.85,
"learning_rate": 8.609028654736605e-06,
"loss": 2.5913,
"step": 277500
},
{
"epoch": 2.85,
"learning_rate": 8.588494984651082e-06,
"loss": 2.5947,
"step": 278000
},
{
"epoch": 2.86,
"learning_rate": 8.56796131456556e-06,
"loss": 2.6138,
"step": 278500
},
{
"epoch": 2.86,
"learning_rate": 8.547468711820209e-06,
"loss": 2.584,
"step": 279000
},
{
"epoch": 2.87,
"learning_rate": 8.526935041734686e-06,
"loss": 2.5909,
"step": 279500
},
{
"epoch": 2.87,
"learning_rate": 8.506401371649163e-06,
"loss": 2.5691,
"step": 280000
},
{
"epoch": 2.88,
"learning_rate": 8.485867701563639e-06,
"loss": 2.5852,
"step": 280500
},
{
"epoch": 2.88,
"learning_rate": 8.46541616615846e-06,
"loss": 2.5937,
"step": 281000
},
{
"epoch": 2.89,
"learning_rate": 8.444882496072937e-06,
"loss": 2.5956,
"step": 281500
},
{
"epoch": 2.9,
"learning_rate": 8.424348825987414e-06,
"loss": 2.627,
"step": 282000
},
{
"epoch": 2.9,
"learning_rate": 8.403815155901891e-06,
"loss": 2.6027,
"step": 282500
},
{
"epoch": 2.91,
"learning_rate": 8.383281485816368e-06,
"loss": 2.5815,
"step": 283000
},
{
"epoch": 2.91,
"learning_rate": 8.362747815730845e-06,
"loss": 2.5685,
"step": 283500
},
{
"epoch": 2.92,
"learning_rate": 8.342214145645323e-06,
"loss": 2.6211,
"step": 284000
},
{
"epoch": 2.92,
"learning_rate": 8.321721542899972e-06,
"loss": 2.6028,
"step": 284500
},
{
"epoch": 2.93,
"learning_rate": 8.301187872814449e-06,
"loss": 2.5959,
"step": 285000
},
{
"epoch": 2.93,
"learning_rate": 8.280654202728924e-06,
"loss": 2.5821,
"step": 285500
},
{
"epoch": 2.94,
"learning_rate": 8.260120532643403e-06,
"loss": 2.5694,
"step": 286000
},
{
"epoch": 2.94,
"learning_rate": 8.23958686255788e-06,
"loss": 2.5806,
"step": 286500
},
{
"epoch": 2.95,
"learning_rate": 8.219053192472357e-06,
"loss": 2.5853,
"step": 287000
},
{
"epoch": 2.95,
"learning_rate": 8.198519522386835e-06,
"loss": 2.561,
"step": 287500
},
{
"epoch": 2.96,
"learning_rate": 8.177985852301312e-06,
"loss": 2.5861,
"step": 288000
},
{
"epoch": 2.96,
"learning_rate": 8.15749324955596e-06,
"loss": 2.5743,
"step": 288500
},
{
"epoch": 2.97,
"learning_rate": 8.136959579470436e-06,
"loss": 2.5937,
"step": 289000
},
{
"epoch": 2.97,
"learning_rate": 8.116425909384914e-06,
"loss": 2.5558,
"step": 289500
},
{
"epoch": 2.98,
"learning_rate": 8.095892239299392e-06,
"loss": 2.644,
"step": 290000
},
{
"epoch": 2.98,
"learning_rate": 8.07539963655404e-06,
"loss": 2.576,
"step": 290500
},
{
"epoch": 2.99,
"learning_rate": 8.054865966468517e-06,
"loss": 2.614,
"step": 291000
},
{
"epoch": 2.99,
"learning_rate": 8.034332296382996e-06,
"loss": 2.5867,
"step": 291500
},
{
"epoch": 3.0,
"learning_rate": 8.013798626297471e-06,
"loss": 2.5847,
"step": 292000
},
{
"epoch": 3.0,
"eval_gen_len": 14.18,
"eval_loss": 2.4842422008514404,
"eval_rouge1": 32.8849,
"eval_rouge2": 14.1714,
"eval_rougeL": 30.3938,
"eval_rougeLsum": 30.5032,
"eval_runtime": 64.3777,
"eval_samples_per_second": 15.533,
"eval_steps_per_second": 1.942,
"step": 292203
},
{
"epoch": 3.0,
"learning_rate": 7.99330602355212e-06,
"loss": 2.6098,
"step": 292500
},
{
"epoch": 3.01,
"learning_rate": 7.972772353466598e-06,
"loss": 2.5892,
"step": 293000
},
{
"epoch": 3.01,
"learning_rate": 7.952238683381075e-06,
"loss": 2.6083,
"step": 293500
},
{
"epoch": 3.02,
"learning_rate": 7.931705013295552e-06,
"loss": 2.5594,
"step": 294000
},
{
"epoch": 3.02,
"learning_rate": 7.911212410550201e-06,
"loss": 2.578,
"step": 294500
},
{
"epoch": 3.03,
"learning_rate": 7.890678740464678e-06,
"loss": 2.5775,
"step": 295000
},
{
"epoch": 3.03,
"learning_rate": 7.870145070379155e-06,
"loss": 2.5704,
"step": 295500
},
{
"epoch": 3.04,
"learning_rate": 7.849611400293632e-06,
"loss": 2.5723,
"step": 296000
},
{
"epoch": 3.04,
"learning_rate": 7.829118797548281e-06,
"loss": 2.5669,
"step": 296500
},
{
"epoch": 3.05,
"learning_rate": 7.808626194802929e-06,
"loss": 2.588,
"step": 297000
},
{
"epoch": 3.05,
"learning_rate": 7.788092524717406e-06,
"loss": 2.5423,
"step": 297500
},
{
"epoch": 3.06,
"learning_rate": 7.767558854631883e-06,
"loss": 2.6014,
"step": 298000
},
{
"epoch": 3.06,
"learning_rate": 7.74702518454636e-06,
"loss": 2.5682,
"step": 298500
},
{
"epoch": 3.07,
"learning_rate": 7.726491514460838e-06,
"loss": 2.5787,
"step": 299000
},
{
"epoch": 3.07,
"learning_rate": 7.705957844375315e-06,
"loss": 2.5539,
"step": 299500
},
{
"epoch": 3.08,
"learning_rate": 7.685424174289794e-06,
"loss": 2.5781,
"step": 300000
},
{
"epoch": 3.09,
"learning_rate": 7.664890504204269e-06,
"loss": 2.5441,
"step": 300500
},
{
"epoch": 3.09,
"learning_rate": 7.644397901458918e-06,
"loss": 2.5906,
"step": 301000
},
{
"epoch": 3.1,
"learning_rate": 7.6238642313733945e-06,
"loss": 2.5589,
"step": 301500
},
{
"epoch": 3.1,
"learning_rate": 7.6033305612878725e-06,
"loss": 2.5654,
"step": 302000
},
{
"epoch": 3.11,
"learning_rate": 7.58279689120235e-06,
"loss": 2.5716,
"step": 302500
},
{
"epoch": 3.11,
"learning_rate": 7.562304288456998e-06,
"loss": 2.5695,
"step": 303000
},
{
"epoch": 3.12,
"learning_rate": 7.541770618371476e-06,
"loss": 2.5949,
"step": 303500
},
{
"epoch": 3.12,
"learning_rate": 7.521236948285952e-06,
"loss": 2.5542,
"step": 304000
},
{
"epoch": 3.13,
"learning_rate": 7.5007032782004294e-06,
"loss": 2.5522,
"step": 304500
},
{
"epoch": 3.13,
"learning_rate": 7.4802106754550785e-06,
"loss": 2.5402,
"step": 305000
},
{
"epoch": 3.14,
"learning_rate": 7.459677005369556e-06,
"loss": 2.5999,
"step": 305500
},
{
"epoch": 3.14,
"learning_rate": 7.439143335284033e-06,
"loss": 2.5563,
"step": 306000
},
{
"epoch": 3.15,
"learning_rate": 7.418609665198509e-06,
"loss": 2.5637,
"step": 306500
},
{
"epoch": 3.15,
"learning_rate": 7.398075995112986e-06,
"loss": 2.5705,
"step": 307000
},
{
"epoch": 3.16,
"learning_rate": 7.377583392367635e-06,
"loss": 2.5639,
"step": 307500
},
{
"epoch": 3.16,
"learning_rate": 7.3570497222821126e-06,
"loss": 2.5505,
"step": 308000
},
{
"epoch": 3.17,
"learning_rate": 7.33651605219659e-06,
"loss": 2.5897,
"step": 308500
},
{
"epoch": 3.17,
"learning_rate": 7.315982382111068e-06,
"loss": 2.5572,
"step": 309000
},
{
"epoch": 3.18,
"learning_rate": 7.295489779365715e-06,
"loss": 2.5474,
"step": 309500
},
{
"epoch": 3.18,
"learning_rate": 7.274956109280192e-06,
"loss": 2.5405,
"step": 310000
},
{
"epoch": 3.19,
"learning_rate": 7.25442243919467e-06,
"loss": 2.5853,
"step": 310500
},
{
"epoch": 3.19,
"learning_rate": 7.2338887691091475e-06,
"loss": 2.5544,
"step": 311000
},
{
"epoch": 3.2,
"learning_rate": 7.213396166363795e-06,
"loss": 2.5801,
"step": 311500
},
{
"epoch": 3.2,
"learning_rate": 7.192862496278274e-06,
"loss": 2.6023,
"step": 312000
},
{
"epoch": 3.21,
"learning_rate": 7.17232882619275e-06,
"loss": 2.5458,
"step": 312500
},
{
"epoch": 3.21,
"learning_rate": 7.151795156107227e-06,
"loss": 2.5735,
"step": 313000
},
{
"epoch": 3.22,
"learning_rate": 7.131302553361876e-06,
"loss": 2.547,
"step": 313500
},
{
"epoch": 3.22,
"learning_rate": 7.1107688832763534e-06,
"loss": 2.59,
"step": 314000
},
{
"epoch": 3.23,
"learning_rate": 7.090235213190831e-06,
"loss": 2.5819,
"step": 314500
},
{
"epoch": 3.23,
"learning_rate": 7.069701543105307e-06,
"loss": 2.5353,
"step": 315000
},
{
"epoch": 3.24,
"learning_rate": 7.049250007700127e-06,
"loss": 2.5629,
"step": 315500
},
{
"epoch": 3.24,
"learning_rate": 7.028716337614604e-06,
"loss": 2.5804,
"step": 316000
},
{
"epoch": 3.25,
"learning_rate": 7.008182667529081e-06,
"loss": 2.5637,
"step": 316500
},
{
"epoch": 3.25,
"learning_rate": 6.987648997443559e-06,
"loss": 2.6034,
"step": 317000
},
{
"epoch": 3.26,
"learning_rate": 6.967115327358036e-06,
"loss": 2.5929,
"step": 317500
},
{
"epoch": 3.26,
"learning_rate": 6.946581657272513e-06,
"loss": 2.5733,
"step": 318000
},
{
"epoch": 3.27,
"learning_rate": 6.92604798718699e-06,
"loss": 2.5497,
"step": 318500
},
{
"epoch": 3.28,
"learning_rate": 6.905514317101468e-06,
"loss": 2.5831,
"step": 319000
},
{
"epoch": 3.28,
"learning_rate": 6.884980647015945e-06,
"loss": 2.5796,
"step": 319500
},
{
"epoch": 3.29,
"learning_rate": 6.864488044270593e-06,
"loss": 2.5689,
"step": 320000
},
{
"epoch": 3.29,
"learning_rate": 6.84395437418507e-06,
"loss": 2.5814,
"step": 320500
},
{
"epoch": 3.3,
"learning_rate": 6.823420704099548e-06,
"loss": 2.5869,
"step": 321000
},
{
"epoch": 3.3,
"learning_rate": 6.802887034014025e-06,
"loss": 2.5768,
"step": 321500
},
{
"epoch": 3.31,
"learning_rate": 6.782394431268673e-06,
"loss": 2.5517,
"step": 322000
},
{
"epoch": 3.31,
"learning_rate": 6.761860761183151e-06,
"loss": 2.5505,
"step": 322500
},
{
"epoch": 3.32,
"learning_rate": 6.7413270910976284e-06,
"loss": 2.5703,
"step": 323000
},
{
"epoch": 3.32,
"learning_rate": 6.720793421012105e-06,
"loss": 2.5399,
"step": 323500
},
{
"epoch": 3.33,
"learning_rate": 6.700300818266754e-06,
"loss": 2.5217,
"step": 324000
},
{
"epoch": 3.33,
"learning_rate": 6.679767148181231e-06,
"loss": 2.5858,
"step": 324500
},
{
"epoch": 3.34,
"learning_rate": 6.659233478095708e-06,
"loss": 2.5503,
"step": 325000
},
{
"epoch": 3.34,
"learning_rate": 6.638699808010185e-06,
"loss": 2.5654,
"step": 325500
},
{
"epoch": 3.35,
"learning_rate": 6.6182482726050045e-06,
"loss": 2.5682,
"step": 326000
},
{
"epoch": 3.35,
"learning_rate": 6.597714602519482e-06,
"loss": 2.5831,
"step": 326500
},
{
"epoch": 3.36,
"learning_rate": 6.577180932433959e-06,
"loss": 2.5548,
"step": 327000
},
{
"epoch": 3.36,
"learning_rate": 6.556647262348437e-06,
"loss": 2.5693,
"step": 327500
},
{
"epoch": 3.37,
"learning_rate": 6.536113592262914e-06,
"loss": 2.5505,
"step": 328000
},
{
"epoch": 3.37,
"learning_rate": 6.5155799221773904e-06,
"loss": 2.5547,
"step": 328500
},
{
"epoch": 3.38,
"learning_rate": 6.495046252091868e-06,
"loss": 2.5548,
"step": 329000
},
{
"epoch": 3.38,
"learning_rate": 6.474512582006346e-06,
"loss": 2.5613,
"step": 329500
},
{
"epoch": 3.39,
"learning_rate": 6.454019979260994e-06,
"loss": 2.5908,
"step": 330000
},
{
"epoch": 3.39,
"learning_rate": 6.433486309175471e-06,
"loss": 2.5816,
"step": 330500
},
{
"epoch": 3.4,
"learning_rate": 6.412952639089949e-06,
"loss": 2.581,
"step": 331000
},
{
"epoch": 3.4,
"learning_rate": 6.392418969004425e-06,
"loss": 2.5566,
"step": 331500
},
{
"epoch": 3.41,
"learning_rate": 6.371885298918903e-06,
"loss": 2.5769,
"step": 332000
},
{
"epoch": 3.41,
"learning_rate": 6.35135162883338e-06,
"loss": 2.5591,
"step": 332500
},
{
"epoch": 3.42,
"learning_rate": 6.330817958747857e-06,
"loss": 2.5916,
"step": 333000
},
{
"epoch": 3.42,
"learning_rate": 6.310284288662335e-06,
"loss": 2.5778,
"step": 333500
},
{
"epoch": 3.43,
"learning_rate": 6.289791685916982e-06,
"loss": 2.5726,
"step": 334000
},
{
"epoch": 3.43,
"learning_rate": 6.2692580158314595e-06,
"loss": 2.5671,
"step": 334500
},
{
"epoch": 3.44,
"learning_rate": 6.2487243457459376e-06,
"loss": 2.5546,
"step": 335000
},
{
"epoch": 3.44,
"learning_rate": 6.228190675660415e-06,
"loss": 2.5633,
"step": 335500
},
{
"epoch": 3.45,
"learning_rate": 6.207698072915063e-06,
"loss": 2.557,
"step": 336000
},
{
"epoch": 3.45,
"learning_rate": 6.187164402829541e-06,
"loss": 2.5664,
"step": 336500
},
{
"epoch": 3.46,
"learning_rate": 6.166630732744018e-06,
"loss": 2.5559,
"step": 337000
},
{
"epoch": 3.47,
"learning_rate": 6.1460970626584945e-06,
"loss": 2.5573,
"step": 337500
},
{
"epoch": 3.47,
"learning_rate": 6.125604459913143e-06,
"loss": 2.5808,
"step": 338000
},
{
"epoch": 3.48,
"learning_rate": 6.105070789827621e-06,
"loss": 2.5853,
"step": 338500
},
{
"epoch": 3.48,
"learning_rate": 6.084537119742098e-06,
"loss": 2.5694,
"step": 339000
},
{
"epoch": 3.49,
"learning_rate": 6.064003449656575e-06,
"loss": 2.5655,
"step": 339500
},
{
"epoch": 3.49,
"learning_rate": 6.043510846911223e-06,
"loss": 2.5472,
"step": 340000
},
{
"epoch": 3.5,
"learning_rate": 6.0229771768257e-06,
"loss": 2.5605,
"step": 340500
},
{
"epoch": 3.5,
"learning_rate": 6.002443506740178e-06,
"loss": 2.5799,
"step": 341000
},
{
"epoch": 3.51,
"learning_rate": 5.981909836654655e-06,
"loss": 2.5311,
"step": 341500
},
{
"epoch": 3.51,
"learning_rate": 5.961376166569133e-06,
"loss": 2.5579,
"step": 342000
},
{
"epoch": 3.52,
"learning_rate": 5.94088356382378e-06,
"loss": 2.5476,
"step": 342500
},
{
"epoch": 3.52,
"learning_rate": 5.920390961078429e-06,
"loss": 2.5484,
"step": 343000
},
{
"epoch": 3.53,
"learning_rate": 5.899857290992906e-06,
"loss": 2.5756,
"step": 343500
},
{
"epoch": 3.53,
"learning_rate": 5.8793236209073835e-06,
"loss": 2.5241,
"step": 344000
},
{
"epoch": 3.54,
"learning_rate": 5.858789950821861e-06,
"loss": 2.5444,
"step": 344500
},
{
"epoch": 3.54,
"learning_rate": 5.838256280736337e-06,
"loss": 2.5398,
"step": 345000
},
{
"epoch": 3.55,
"learning_rate": 5.817722610650816e-06,
"loss": 2.5498,
"step": 345500
},
{
"epoch": 3.55,
"learning_rate": 5.797188940565292e-06,
"loss": 2.566,
"step": 346000
},
{
"epoch": 3.56,
"learning_rate": 5.7766552704797695e-06,
"loss": 2.574,
"step": 346500
},
{
"epoch": 3.56,
"learning_rate": 5.7561626677344185e-06,
"loss": 2.5921,
"step": 347000
},
{
"epoch": 3.57,
"learning_rate": 5.735628997648896e-06,
"loss": 2.5325,
"step": 347500
},
{
"epoch": 3.57,
"learning_rate": 5.715095327563373e-06,
"loss": 2.5733,
"step": 348000
},
{
"epoch": 3.58,
"learning_rate": 5.694561657477849e-06,
"loss": 2.5372,
"step": 348500
},
{
"epoch": 3.58,
"learning_rate": 5.674069054732498e-06,
"loss": 2.5424,
"step": 349000
},
{
"epoch": 3.59,
"learning_rate": 5.653535384646975e-06,
"loss": 2.5745,
"step": 349500
},
{
"epoch": 3.59,
"learning_rate": 5.633001714561453e-06,
"loss": 2.5407,
"step": 350000
},
{
"epoch": 3.6,
"learning_rate": 5.61246804447593e-06,
"loss": 2.5545,
"step": 350500
},
{
"epoch": 3.6,
"learning_rate": 5.591975441730578e-06,
"loss": 2.5532,
"step": 351000
},
{
"epoch": 3.61,
"learning_rate": 5.571441771645055e-06,
"loss": 2.5653,
"step": 351500
},
{
"epoch": 3.61,
"learning_rate": 5.550908101559532e-06,
"loss": 2.5588,
"step": 352000
},
{
"epoch": 3.62,
"learning_rate": 5.53037443147401e-06,
"loss": 2.5499,
"step": 352500
},
{
"epoch": 3.62,
"learning_rate": 5.5098407613884875e-06,
"loss": 2.5769,
"step": 353000
},
{
"epoch": 3.63,
"learning_rate": 5.489348158643135e-06,
"loss": 2.5594,
"step": 353500
},
{
"epoch": 3.63,
"learning_rate": 5.468814488557614e-06,
"loss": 2.5792,
"step": 354000
},
{
"epoch": 3.64,
"learning_rate": 5.44828081847209e-06,
"loss": 2.5614,
"step": 354500
},
{
"epoch": 3.64,
"learning_rate": 5.427747148386567e-06,
"loss": 2.5843,
"step": 355000
},
{
"epoch": 3.65,
"learning_rate": 5.4072545456412154e-06,
"loss": 2.5558,
"step": 355500
},
{
"epoch": 3.65,
"learning_rate": 5.3867208755556935e-06,
"loss": 2.5836,
"step": 356000
},
{
"epoch": 3.66,
"learning_rate": 5.366187205470171e-06,
"loss": 2.5505,
"step": 356500
},
{
"epoch": 3.67,
"learning_rate": 5.345653535384647e-06,
"loss": 2.5709,
"step": 357000
},
{
"epoch": 3.67,
"learning_rate": 5.325160932639296e-06,
"loss": 2.5546,
"step": 357500
},
{
"epoch": 3.68,
"learning_rate": 5.304627262553773e-06,
"loss": 2.5987,
"step": 358000
},
{
"epoch": 3.68,
"learning_rate": 5.28409359246825e-06,
"loss": 2.5515,
"step": 358500
},
{
"epoch": 3.69,
"learning_rate": 5.263559922382728e-06,
"loss": 2.5559,
"step": 359000
},
{
"epoch": 3.69,
"learning_rate": 5.243067319637376e-06,
"loss": 2.5584,
"step": 359500
},
{
"epoch": 3.7,
"learning_rate": 5.222533649551853e-06,
"loss": 2.5676,
"step": 360000
},
{
"epoch": 3.7,
"learning_rate": 5.20199997946633e-06,
"loss": 2.5724,
"step": 360500
},
{
"epoch": 3.71,
"learning_rate": 5.181507376720979e-06,
"loss": 2.5489,
"step": 361000
},
{
"epoch": 3.71,
"learning_rate": 5.160973706635456e-06,
"loss": 2.5659,
"step": 361500
},
{
"epoch": 3.72,
"learning_rate": 5.140440036549933e-06,
"loss": 2.5627,
"step": 362000
},
{
"epoch": 3.72,
"learning_rate": 5.11990636646441e-06,
"loss": 2.5339,
"step": 362500
},
{
"epoch": 3.73,
"learning_rate": 5.099372696378888e-06,
"loss": 2.5638,
"step": 363000
},
{
"epoch": 3.73,
"learning_rate": 5.078839026293365e-06,
"loss": 2.5694,
"step": 363500
},
{
"epoch": 3.74,
"learning_rate": 5.058305356207842e-06,
"loss": 2.5592,
"step": 364000
},
{
"epoch": 3.74,
"learning_rate": 5.0377716861223195e-06,
"loss": 2.5519,
"step": 364500
},
{
"epoch": 3.75,
"learning_rate": 5.017279083376968e-06,
"loss": 2.5775,
"step": 365000
},
{
"epoch": 3.75,
"learning_rate": 4.996745413291445e-06,
"loss": 2.5731,
"step": 365500
},
{
"epoch": 3.76,
"learning_rate": 4.976211743205922e-06,
"loss": 2.5421,
"step": 366000
},
{
"epoch": 3.76,
"learning_rate": 4.955678073120399e-06,
"loss": 2.5494,
"step": 366500
},
{
"epoch": 3.77,
"learning_rate": 4.935185470375048e-06,
"loss": 2.5624,
"step": 367000
},
{
"epoch": 3.77,
"learning_rate": 4.9146518002895245e-06,
"loss": 2.5722,
"step": 367500
},
{
"epoch": 3.78,
"learning_rate": 4.894118130204003e-06,
"loss": 2.56,
"step": 368000
},
{
"epoch": 3.78,
"learning_rate": 4.87358446011848e-06,
"loss": 2.5629,
"step": 368500
},
{
"epoch": 3.79,
"learning_rate": 4.853091857373128e-06,
"loss": 2.5359,
"step": 369000
},
{
"epoch": 3.79,
"learning_rate": 4.832558187287605e-06,
"loss": 2.5635,
"step": 369500
},
{
"epoch": 3.8,
"learning_rate": 4.812024517202082e-06,
"loss": 2.5447,
"step": 370000
},
{
"epoch": 3.8,
"learning_rate": 4.79149084711656e-06,
"loss": 2.5798,
"step": 370500
},
{
"epoch": 3.81,
"learning_rate": 4.7709982443712085e-06,
"loss": 2.5582,
"step": 371000
},
{
"epoch": 3.81,
"learning_rate": 4.750464574285686e-06,
"loss": 2.5558,
"step": 371500
},
{
"epoch": 3.82,
"learning_rate": 4.729930904200163e-06,
"loss": 2.5458,
"step": 372000
},
{
"epoch": 3.82,
"learning_rate": 4.70939723411464e-06,
"loss": 2.5491,
"step": 372500
},
{
"epoch": 3.83,
"learning_rate": 4.688904631369288e-06,
"loss": 2.5444,
"step": 373000
},
{
"epoch": 3.83,
"learning_rate": 4.6683709612837654e-06,
"loss": 2.5637,
"step": 373500
},
{
"epoch": 3.84,
"learning_rate": 4.647837291198243e-06,
"loss": 2.5462,
"step": 374000
},
{
"epoch": 3.84,
"learning_rate": 4.62730362111272e-06,
"loss": 2.5801,
"step": 374500
},
{
"epoch": 3.85,
"learning_rate": 4.606811018367368e-06,
"loss": 2.5811,
"step": 375000
},
{
"epoch": 3.86,
"learning_rate": 4.586277348281846e-06,
"loss": 2.5696,
"step": 375500
},
{
"epoch": 3.86,
"learning_rate": 4.565743678196322e-06,
"loss": 2.5353,
"step": 376000
},
{
"epoch": 3.87,
"learning_rate": 4.5452100081108e-06,
"loss": 2.5476,
"step": 376500
},
{
"epoch": 3.87,
"learning_rate": 4.5246763380252776e-06,
"loss": 2.5637,
"step": 377000
},
{
"epoch": 3.88,
"learning_rate": 4.504183735279926e-06,
"loss": 2.5978,
"step": 377500
},
{
"epoch": 3.88,
"learning_rate": 4.483650065194403e-06,
"loss": 2.5526,
"step": 378000
},
{
"epoch": 3.89,
"learning_rate": 4.46311639510888e-06,
"loss": 2.5606,
"step": 378500
},
{
"epoch": 3.89,
"learning_rate": 4.442582725023357e-06,
"loss": 2.5618,
"step": 379000
},
{
"epoch": 3.9,
"learning_rate": 4.4220901222780055e-06,
"loss": 2.5707,
"step": 379500
},
{
"epoch": 3.9,
"learning_rate": 4.401556452192483e-06,
"loss": 2.5567,
"step": 380000
},
{
"epoch": 3.91,
"learning_rate": 4.38102278210696e-06,
"loss": 2.5751,
"step": 380500
},
{
"epoch": 3.91,
"learning_rate": 4.360489112021438e-06,
"loss": 2.5543,
"step": 381000
},
{
"epoch": 3.92,
"learning_rate": 4.339996509276086e-06,
"loss": 2.5537,
"step": 381500
},
{
"epoch": 3.92,
"learning_rate": 4.319462839190563e-06,
"loss": 2.5465,
"step": 382000
},
{
"epoch": 3.93,
"learning_rate": 4.29892916910504e-06,
"loss": 2.5502,
"step": 382500
},
{
"epoch": 3.93,
"learning_rate": 4.278395499019518e-06,
"loss": 2.5531,
"step": 383000
},
{
"epoch": 3.94,
"learning_rate": 4.257902896274166e-06,
"loss": 2.5419,
"step": 383500
},
{
"epoch": 3.94,
"learning_rate": 4.237369226188644e-06,
"loss": 2.5731,
"step": 384000
},
{
"epoch": 3.95,
"learning_rate": 4.21683555610312e-06,
"loss": 2.5539,
"step": 384500
},
{
"epoch": 3.95,
"learning_rate": 4.196301886017597e-06,
"loss": 2.5286,
"step": 385000
},
{
"epoch": 3.96,
"learning_rate": 4.1758092832722455e-06,
"loss": 2.5725,
"step": 385500
},
{
"epoch": 3.96,
"learning_rate": 4.1552756131867235e-06,
"loss": 2.553,
"step": 386000
},
{
"epoch": 3.97,
"learning_rate": 4.134741943101201e-06,
"loss": 2.5295,
"step": 386500
},
{
"epoch": 3.97,
"learning_rate": 4.114208273015678e-06,
"loss": 2.5762,
"step": 387000
},
{
"epoch": 3.98,
"learning_rate": 4.093715670270326e-06,
"loss": 2.5528,
"step": 387500
},
{
"epoch": 3.98,
"learning_rate": 4.073182000184803e-06,
"loss": 2.5377,
"step": 388000
},
{
"epoch": 3.99,
"learning_rate": 4.0526483300992805e-06,
"loss": 2.5382,
"step": 388500
},
{
"epoch": 3.99,
"learning_rate": 4.032114660013758e-06,
"loss": 2.5778,
"step": 389000
},
{
"epoch": 4.0,
"learning_rate": 4.011622057268406e-06,
"loss": 2.5578,
"step": 389500
},
{
"epoch": 4.0,
"eval_gen_len": 14.226,
"eval_loss": 2.4658281803131104,
"eval_rouge1": 33.1247,
"eval_rouge2": 14.4487,
"eval_rougeL": 30.3793,
"eval_rougeLsum": 30.5158,
"eval_runtime": 65.1503,
"eval_samples_per_second": 15.349,
"eval_steps_per_second": 1.919,
"step": 389604
},
{
"epoch": 4.0,
"learning_rate": 3.991088387182884e-06,
"loss": 2.5441,
"step": 390000
},
{
"epoch": 4.01,
"learning_rate": 3.970554717097361e-06,
"loss": 2.5623,
"step": 390500
},
{
"epoch": 4.01,
"learning_rate": 3.950021047011837e-06,
"loss": 2.5435,
"step": 391000
},
{
"epoch": 4.02,
"learning_rate": 3.929528444266486e-06,
"loss": 2.5428,
"step": 391500
},
{
"epoch": 4.02,
"learning_rate": 3.908994774180964e-06,
"loss": 2.51,
"step": 392000
},
{
"epoch": 4.03,
"learning_rate": 3.888461104095441e-06,
"loss": 2.5402,
"step": 392500
},
{
"epoch": 4.03,
"learning_rate": 3.867927434009918e-06,
"loss": 2.5662,
"step": 393000
},
{
"epoch": 4.04,
"learning_rate": 3.847434831264566e-06,
"loss": 2.5443,
"step": 393500
},
{
"epoch": 4.05,
"learning_rate": 3.826901161179043e-06,
"loss": 2.5292,
"step": 394000
},
{
"epoch": 4.05,
"learning_rate": 3.806367491093521e-06,
"loss": 2.54,
"step": 394500
},
{
"epoch": 4.06,
"learning_rate": 3.785833821007998e-06,
"loss": 2.5755,
"step": 395000
},
{
"epoch": 4.06,
"learning_rate": 3.7653412182626463e-06,
"loss": 2.5804,
"step": 395500
},
{
"epoch": 4.07,
"learning_rate": 3.744807548177124e-06,
"loss": 2.5354,
"step": 396000
},
{
"epoch": 4.07,
"learning_rate": 3.724273878091601e-06,
"loss": 2.5347,
"step": 396500
},
{
"epoch": 4.08,
"learning_rate": 3.703740208006078e-06,
"loss": 2.5257,
"step": 397000
},
{
"epoch": 4.08,
"learning_rate": 3.6832476052607264e-06,
"loss": 2.5587,
"step": 397500
},
{
"epoch": 4.09,
"learning_rate": 3.662713935175204e-06,
"loss": 2.5545,
"step": 398000
},
{
"epoch": 4.09,
"learning_rate": 3.642180265089681e-06,
"loss": 2.5325,
"step": 398500
},
{
"epoch": 4.1,
"learning_rate": 3.6216465950041584e-06,
"loss": 2.5691,
"step": 399000
},
{
"epoch": 4.1,
"learning_rate": 3.601153992258807e-06,
"loss": 2.556,
"step": 399500
},
{
"epoch": 4.11,
"learning_rate": 3.5806203221732838e-06,
"loss": 2.5294,
"step": 400000
},
{
"epoch": 4.11,
"learning_rate": 3.5600866520877614e-06,
"loss": 2.5724,
"step": 400500
},
{
"epoch": 4.12,
"learning_rate": 3.5395529820022386e-06,
"loss": 2.5741,
"step": 401000
},
{
"epoch": 4.12,
"learning_rate": 3.5190603792568867e-06,
"loss": 2.5525,
"step": 401500
},
{
"epoch": 4.13,
"learning_rate": 3.4985267091713643e-06,
"loss": 2.5642,
"step": 402000
},
{
"epoch": 4.13,
"learning_rate": 3.477993039085841e-06,
"loss": 2.5298,
"step": 402500
},
{
"epoch": 4.14,
"learning_rate": 3.4574593690003183e-06,
"loss": 2.5205,
"step": 403000
},
{
"epoch": 4.14,
"learning_rate": 3.4369667662549665e-06,
"loss": 2.5503,
"step": 403500
},
{
"epoch": 4.15,
"learning_rate": 3.416433096169444e-06,
"loss": 2.5291,
"step": 404000
},
{
"epoch": 4.15,
"learning_rate": 3.3958994260839213e-06,
"loss": 2.5487,
"step": 404500
},
{
"epoch": 4.16,
"learning_rate": 3.375365755998399e-06,
"loss": 2.536,
"step": 405000
},
{
"epoch": 4.16,
"learning_rate": 3.354873153253047e-06,
"loss": 2.5518,
"step": 405500
},
{
"epoch": 4.17,
"learning_rate": 3.3343394831675242e-06,
"loss": 2.5262,
"step": 406000
},
{
"epoch": 4.17,
"learning_rate": 3.313805813082002e-06,
"loss": 2.544,
"step": 406500
},
{
"epoch": 4.18,
"learning_rate": 3.2932721429964786e-06,
"loss": 2.5418,
"step": 407000
},
{
"epoch": 4.18,
"learning_rate": 3.272779540251127e-06,
"loss": 2.5587,
"step": 407500
},
{
"epoch": 4.19,
"learning_rate": 3.2522458701656044e-06,
"loss": 2.5786,
"step": 408000
},
{
"epoch": 4.19,
"learning_rate": 3.2317122000800816e-06,
"loss": 2.5272,
"step": 408500
},
{
"epoch": 4.2,
"learning_rate": 3.2111785299945588e-06,
"loss": 2.556,
"step": 409000
},
{
"epoch": 4.2,
"learning_rate": 3.190685927249207e-06,
"loss": 2.5468,
"step": 409500
},
{
"epoch": 4.21,
"learning_rate": 3.1701522571636845e-06,
"loss": 2.5786,
"step": 410000
},
{
"epoch": 4.21,
"learning_rate": 3.1496185870781613e-06,
"loss": 2.5543,
"step": 410500
},
{
"epoch": 4.22,
"learning_rate": 3.129084916992639e-06,
"loss": 2.5549,
"step": 411000
},
{
"epoch": 4.22,
"learning_rate": 3.108551246907116e-06,
"loss": 2.5092,
"step": 411500
},
{
"epoch": 4.23,
"learning_rate": 3.0880175768215937e-06,
"loss": 2.5441,
"step": 412000
},
{
"epoch": 4.24,
"learning_rate": 3.0674839067360705e-06,
"loss": 2.5641,
"step": 412500
},
{
"epoch": 4.24,
"learning_rate": 3.046950236650548e-06,
"loss": 2.5564,
"step": 413000
},
{
"epoch": 4.25,
"learning_rate": 3.0264576339051967e-06,
"loss": 2.5076,
"step": 413500
},
{
"epoch": 4.25,
"learning_rate": 3.0059239638196735e-06,
"loss": 2.5449,
"step": 414000
},
{
"epoch": 4.26,
"learning_rate": 2.9853902937341506e-06,
"loss": 2.5608,
"step": 414500
},
{
"epoch": 4.26,
"learning_rate": 2.9648566236486283e-06,
"loss": 2.5437,
"step": 415000
},
{
"epoch": 4.27,
"learning_rate": 2.9443640209032764e-06,
"loss": 2.547,
"step": 415500
},
{
"epoch": 4.27,
"learning_rate": 2.9238303508177536e-06,
"loss": 2.5329,
"step": 416000
},
{
"epoch": 4.28,
"learning_rate": 2.9032966807322312e-06,
"loss": 2.5452,
"step": 416500
},
{
"epoch": 4.28,
"learning_rate": 2.8828040779868794e-06,
"loss": 2.5384,
"step": 417000
},
{
"epoch": 4.29,
"learning_rate": 2.862270407901356e-06,
"loss": 2.5517,
"step": 417500
},
{
"epoch": 4.29,
"learning_rate": 2.8417367378158338e-06,
"loss": 2.546,
"step": 418000
},
{
"epoch": 4.3,
"learning_rate": 2.821203067730311e-06,
"loss": 2.5606,
"step": 418500
},
{
"epoch": 4.3,
"learning_rate": 2.8006693976447886e-06,
"loss": 2.5544,
"step": 419000
},
{
"epoch": 4.31,
"learning_rate": 2.7801357275592653e-06,
"loss": 2.5416,
"step": 419500
},
{
"epoch": 4.31,
"learning_rate": 2.7596020574737425e-06,
"loss": 2.5364,
"step": 420000
},
{
"epoch": 4.32,
"learning_rate": 2.73906838738822e-06,
"loss": 2.541,
"step": 420500
},
{
"epoch": 4.32,
"learning_rate": 2.7185757846428683e-06,
"loss": 2.5646,
"step": 421000
},
{
"epoch": 4.33,
"learning_rate": 2.6980421145573455e-06,
"loss": 2.5587,
"step": 421500
},
{
"epoch": 4.33,
"learning_rate": 2.677508444471823e-06,
"loss": 2.5614,
"step": 422000
},
{
"epoch": 4.34,
"learning_rate": 2.6569747743863e-06,
"loss": 2.5268,
"step": 422500
},
{
"epoch": 4.34,
"learning_rate": 2.6364821716409484e-06,
"loss": 2.5468,
"step": 423000
},
{
"epoch": 4.35,
"learning_rate": 2.615948501555426e-06,
"loss": 2.5255,
"step": 423500
},
{
"epoch": 4.35,
"learning_rate": 2.595414831469903e-06,
"loss": 2.5398,
"step": 424000
},
{
"epoch": 4.36,
"learning_rate": 2.5748811613843804e-06,
"loss": 2.5561,
"step": 424500
},
{
"epoch": 4.36,
"learning_rate": 2.5543885586390286e-06,
"loss": 2.5396,
"step": 425000
},
{
"epoch": 4.37,
"learning_rate": 2.533854888553506e-06,
"loss": 2.5366,
"step": 425500
},
{
"epoch": 4.37,
"learning_rate": 2.513362285808154e-06,
"loss": 2.5748,
"step": 426000
},
{
"epoch": 4.38,
"learning_rate": 2.4928286157226316e-06,
"loss": 2.5557,
"step": 426500
},
{
"epoch": 4.38,
"learning_rate": 2.4722949456371088e-06,
"loss": 2.5178,
"step": 427000
},
{
"epoch": 4.39,
"learning_rate": 2.451761275551586e-06,
"loss": 2.557,
"step": 427500
},
{
"epoch": 4.39,
"learning_rate": 2.431227605466063e-06,
"loss": 2.5357,
"step": 428000
},
{
"epoch": 4.4,
"learning_rate": 2.4106939353805403e-06,
"loss": 2.5228,
"step": 428500
},
{
"epoch": 4.4,
"learning_rate": 2.3901602652950175e-06,
"loss": 2.5376,
"step": 429000
},
{
"epoch": 4.41,
"learning_rate": 2.3696265952094947e-06,
"loss": 2.5418,
"step": 429500
},
{
"epoch": 4.41,
"learning_rate": 2.3491339924641433e-06,
"loss": 2.5534,
"step": 430000
},
{
"epoch": 4.42,
"learning_rate": 2.3286003223786205e-06,
"loss": 2.5183,
"step": 430500
},
{
"epoch": 4.43,
"learning_rate": 2.3080666522930977e-06,
"loss": 2.5207,
"step": 431000
},
{
"epoch": 4.43,
"learning_rate": 2.2875329822075753e-06,
"loss": 2.5431,
"step": 431500
},
{
"epoch": 4.44,
"learning_rate": 2.2670403794622234e-06,
"loss": 2.5484,
"step": 432000
},
{
"epoch": 4.44,
"learning_rate": 2.2465067093767006e-06,
"loss": 2.5322,
"step": 432500
},
{
"epoch": 4.45,
"learning_rate": 2.225973039291178e-06,
"loss": 2.5424,
"step": 433000
},
{
"epoch": 4.45,
"learning_rate": 2.2054393692056554e-06,
"loss": 2.5249,
"step": 433500
},
{
"epoch": 4.46,
"learning_rate": 2.184905699120132e-06,
"loss": 2.532,
"step": 434000
},
{
"epoch": 4.46,
"learning_rate": 2.164413096374781e-06,
"loss": 2.5406,
"step": 434500
},
{
"epoch": 4.47,
"learning_rate": 2.143879426289258e-06,
"loss": 2.5651,
"step": 435000
},
{
"epoch": 4.47,
"learning_rate": 2.123345756203735e-06,
"loss": 2.5343,
"step": 435500
},
{
"epoch": 4.48,
"learning_rate": 2.1028120861182124e-06,
"loss": 2.5554,
"step": 436000
},
{
"epoch": 4.48,
"learning_rate": 2.082319483372861e-06,
"loss": 2.5519,
"step": 436500
},
{
"epoch": 4.49,
"learning_rate": 2.061785813287338e-06,
"loss": 2.5579,
"step": 437000
},
{
"epoch": 4.49,
"learning_rate": 2.0412521432018153e-06,
"loss": 2.5515,
"step": 437500
},
{
"epoch": 4.5,
"learning_rate": 2.0207184731162925e-06,
"loss": 2.5662,
"step": 438000
},
{
"epoch": 4.5,
"learning_rate": 2.000225870370941e-06,
"loss": 2.5343,
"step": 438500
},
{
"epoch": 4.51,
"learning_rate": 1.9796922002854183e-06,
"loss": 2.5598,
"step": 439000
},
{
"epoch": 4.51,
"learning_rate": 1.9591585301998955e-06,
"loss": 2.5431,
"step": 439500
},
{
"epoch": 4.52,
"learning_rate": 1.9386248601143727e-06,
"loss": 2.5741,
"step": 440000
},
{
"epoch": 4.52,
"learning_rate": 1.9181322573690213e-06,
"loss": 2.5596,
"step": 440500
},
{
"epoch": 4.53,
"learning_rate": 1.8975985872834982e-06,
"loss": 2.5372,
"step": 441000
},
{
"epoch": 4.53,
"learning_rate": 1.8770649171979754e-06,
"loss": 2.5461,
"step": 441500
},
{
"epoch": 4.54,
"learning_rate": 1.8565312471124528e-06,
"loss": 2.564,
"step": 442000
},
{
"epoch": 4.54,
"learning_rate": 1.836038644367101e-06,
"loss": 2.5384,
"step": 442500
},
{
"epoch": 4.55,
"learning_rate": 1.8155049742815784e-06,
"loss": 2.5388,
"step": 443000
},
{
"epoch": 4.55,
"learning_rate": 1.7949713041960556e-06,
"loss": 2.5461,
"step": 443500
},
{
"epoch": 4.56,
"learning_rate": 1.774437634110533e-06,
"loss": 2.5616,
"step": 444000
},
{
"epoch": 4.56,
"learning_rate": 1.7539450313651811e-06,
"loss": 2.5392,
"step": 444500
},
{
"epoch": 4.57,
"learning_rate": 1.7334113612796585e-06,
"loss": 2.5661,
"step": 445000
},
{
"epoch": 4.57,
"learning_rate": 1.7128776911941357e-06,
"loss": 2.5373,
"step": 445500
},
{
"epoch": 4.58,
"learning_rate": 1.6923440211086131e-06,
"loss": 2.5193,
"step": 446000
},
{
"epoch": 4.58,
"learning_rate": 1.6718514183632615e-06,
"loss": 2.517,
"step": 446500
},
{
"epoch": 4.59,
"learning_rate": 1.6513177482777385e-06,
"loss": 2.5266,
"step": 447000
},
{
"epoch": 4.59,
"learning_rate": 1.6307840781922157e-06,
"loss": 2.5429,
"step": 447500
},
{
"epoch": 4.6,
"learning_rate": 1.610250408106693e-06,
"loss": 2.5305,
"step": 448000
},
{
"epoch": 4.6,
"learning_rate": 1.5897167380211703e-06,
"loss": 2.5723,
"step": 448500
},
{
"epoch": 4.61,
"learning_rate": 1.5692241352758186e-06,
"loss": 2.5435,
"step": 449000
},
{
"epoch": 4.61,
"learning_rate": 1.5486904651902958e-06,
"loss": 2.5542,
"step": 449500
},
{
"epoch": 4.62,
"learning_rate": 1.5281567951047732e-06,
"loss": 2.5342,
"step": 450000
},
{
"epoch": 4.63,
"learning_rate": 1.5076231250192504e-06,
"loss": 2.5589,
"step": 450500
},
{
"epoch": 4.63,
"learning_rate": 1.4871305222738988e-06,
"loss": 2.5397,
"step": 451000
},
{
"epoch": 4.64,
"learning_rate": 1.4665968521883762e-06,
"loss": 2.5361,
"step": 451500
},
{
"epoch": 4.64,
"learning_rate": 1.4460631821028534e-06,
"loss": 2.5638,
"step": 452000
},
{
"epoch": 4.65,
"learning_rate": 1.4255295120173304e-06,
"loss": 2.532,
"step": 452500
},
{
"epoch": 4.65,
"learning_rate": 1.4050369092719787e-06,
"loss": 2.5609,
"step": 453000
},
{
"epoch": 4.66,
"learning_rate": 1.384503239186456e-06,
"loss": 2.5257,
"step": 453500
},
{
"epoch": 4.66,
"learning_rate": 1.3639695691009333e-06,
"loss": 2.5254,
"step": 454000
},
{
"epoch": 4.67,
"learning_rate": 1.3434358990154105e-06,
"loss": 2.5407,
"step": 454500
},
{
"epoch": 4.67,
"learning_rate": 1.3229843636102299e-06,
"loss": 2.5279,
"step": 455000
},
{
"epoch": 4.68,
"learning_rate": 1.3024506935247073e-06,
"loss": 2.5276,
"step": 455500
},
{
"epoch": 4.68,
"learning_rate": 1.2819170234391844e-06,
"loss": 2.5704,
"step": 456000
},
{
"epoch": 4.69,
"learning_rate": 1.2613833533536618e-06,
"loss": 2.5349,
"step": 456500
},
{
"epoch": 4.69,
"learning_rate": 1.240849683268139e-06,
"loss": 2.5449,
"step": 457000
},
{
"epoch": 4.7,
"learning_rate": 1.2203160131826162e-06,
"loss": 2.5408,
"step": 457500
},
{
"epoch": 4.7,
"learning_rate": 1.1997823430970936e-06,
"loss": 2.5165,
"step": 458000
},
{
"epoch": 4.71,
"learning_rate": 1.1792486730115708e-06,
"loss": 2.563,
"step": 458500
},
{
"epoch": 4.71,
"learning_rate": 1.1587560702662192e-06,
"loss": 2.5678,
"step": 459000
},
{
"epoch": 4.72,
"learning_rate": 1.1382224001806964e-06,
"loss": 2.5404,
"step": 459500
},
{
"epoch": 4.72,
"learning_rate": 1.1176887300951736e-06,
"loss": 2.5691,
"step": 460000
},
{
"epoch": 4.73,
"learning_rate": 1.097155060009651e-06,
"loss": 2.516,
"step": 460500
},
{
"epoch": 4.73,
"learning_rate": 1.0766624572642991e-06,
"loss": 2.5326,
"step": 461000
},
{
"epoch": 4.74,
"learning_rate": 1.0561287871787765e-06,
"loss": 2.5426,
"step": 461500
},
{
"epoch": 4.74,
"learning_rate": 1.0355951170932537e-06,
"loss": 2.51,
"step": 462000
},
{
"epoch": 4.75,
"learning_rate": 1.0150614470077311e-06,
"loss": 2.5328,
"step": 462500
},
{
"epoch": 4.75,
"learning_rate": 9.945688442623793e-07,
"loss": 2.5352,
"step": 463000
},
{
"epoch": 4.76,
"learning_rate": 9.740351741768565e-07,
"loss": 2.4885,
"step": 463500
},
{
"epoch": 4.76,
"learning_rate": 9.535015040913338e-07,
"loss": 2.5488,
"step": 464000
},
{
"epoch": 4.77,
"learning_rate": 9.329678340058111e-07,
"loss": 2.5455,
"step": 464500
},
{
"epoch": 4.77,
"learning_rate": 9.124752312604594e-07,
"loss": 2.5433,
"step": 465000
},
{
"epoch": 4.78,
"learning_rate": 8.919415611749367e-07,
"loss": 2.5511,
"step": 465500
},
{
"epoch": 4.78,
"learning_rate": 8.714078910894138e-07,
"loss": 2.5286,
"step": 466000
},
{
"epoch": 4.79,
"learning_rate": 8.508742210038911e-07,
"loss": 2.5607,
"step": 466500
},
{
"epoch": 4.79,
"learning_rate": 8.303816182585395e-07,
"loss": 2.5621,
"step": 467000
},
{
"epoch": 4.8,
"learning_rate": 8.098479481730168e-07,
"loss": 2.534,
"step": 467500
},
{
"epoch": 4.8,
"learning_rate": 7.893142780874941e-07,
"loss": 2.586,
"step": 468000
},
{
"epoch": 4.81,
"learning_rate": 7.687806080019714e-07,
"loss": 2.541,
"step": 468500
},
{
"epoch": 4.82,
"learning_rate": 7.482469379164485e-07,
"loss": 2.5375,
"step": 469000
},
{
"epoch": 4.82,
"learning_rate": 7.277132678309258e-07,
"loss": 2.5287,
"step": 469500
},
{
"epoch": 4.83,
"learning_rate": 7.071795977454031e-07,
"loss": 2.5211,
"step": 470000
},
{
"epoch": 4.83,
"learning_rate": 6.866869950000514e-07,
"loss": 2.5658,
"step": 470500
},
{
"epoch": 4.84,
"learning_rate": 6.661533249145287e-07,
"loss": 2.5567,
"step": 471000
},
{
"epoch": 4.84,
"learning_rate": 6.456196548290058e-07,
"loss": 2.535,
"step": 471500
},
{
"epoch": 4.85,
"learning_rate": 6.250859847434831e-07,
"loss": 2.5405,
"step": 472000
},
{
"epoch": 4.85,
"learning_rate": 6.045933819981315e-07,
"loss": 2.5291,
"step": 472500
},
{
"epoch": 4.86,
"learning_rate": 5.840597119126088e-07,
"loss": 2.5359,
"step": 473000
},
{
"epoch": 4.86,
"learning_rate": 5.635260418270861e-07,
"loss": 2.5548,
"step": 473500
},
{
"epoch": 4.87,
"learning_rate": 5.429923717415633e-07,
"loss": 2.5573,
"step": 474000
},
{
"epoch": 4.87,
"learning_rate": 5.224587016560406e-07,
"loss": 2.5402,
"step": 474500
},
{
"epoch": 4.88,
"learning_rate": 5.019250315705177e-07,
"loss": 2.526,
"step": 475000
},
{
"epoch": 4.88,
"learning_rate": 4.81391361484995e-07,
"loss": 2.5451,
"step": 475500
},
{
"epoch": 4.89,
"learning_rate": 4.608576913994723e-07,
"loss": 2.5159,
"step": 476000
},
{
"epoch": 4.89,
"learning_rate": 4.403650886541206e-07,
"loss": 2.5486,
"step": 476500
},
{
"epoch": 4.9,
"learning_rate": 4.198314185685979e-07,
"loss": 2.5529,
"step": 477000
},
{
"epoch": 4.9,
"learning_rate": 3.9929774848307514e-07,
"loss": 2.5502,
"step": 477500
},
{
"epoch": 4.91,
"learning_rate": 3.7876407839755243e-07,
"loss": 2.5564,
"step": 478000
},
{
"epoch": 4.91,
"learning_rate": 3.582714756522007e-07,
"loss": 2.5274,
"step": 478500
},
{
"epoch": 4.92,
"learning_rate": 3.37737805566678e-07,
"loss": 2.5686,
"step": 479000
},
{
"epoch": 4.92,
"learning_rate": 3.1720413548115524e-07,
"loss": 2.552,
"step": 479500
},
{
"epoch": 4.93,
"learning_rate": 2.9671153273580356e-07,
"loss": 2.546,
"step": 480000
},
{
"epoch": 4.93,
"learning_rate": 2.7617786265028085e-07,
"loss": 2.5279,
"step": 480500
},
{
"epoch": 4.94,
"learning_rate": 2.556441925647581e-07,
"loss": 2.5537,
"step": 481000
},
{
"epoch": 4.94,
"learning_rate": 2.3511052247923537e-07,
"loss": 2.5668,
"step": 481500
},
{
"epoch": 4.95,
"learning_rate": 2.145768523937126e-07,
"loss": 2.5783,
"step": 482000
},
{
"epoch": 4.95,
"learning_rate": 1.9404318230818988e-07,
"loss": 2.5368,
"step": 482500
},
{
"epoch": 4.96,
"learning_rate": 1.7350951222266712e-07,
"loss": 2.5358,
"step": 483000
},
{
"epoch": 4.96,
"learning_rate": 1.529758421371444e-07,
"loss": 2.566,
"step": 483500
},
{
"epoch": 4.97,
"learning_rate": 1.3248323939179268e-07,
"loss": 2.5619,
"step": 484000
},
{
"epoch": 4.97,
"learning_rate": 1.1199063664644102e-07,
"loss": 2.5566,
"step": 484500
},
{
"epoch": 4.98,
"learning_rate": 9.145696656091827e-08,
"loss": 2.5638,
"step": 485000
},
{
"epoch": 4.98,
"learning_rate": 7.092329647539553e-08,
"loss": 2.5277,
"step": 485500
},
{
"epoch": 4.99,
"learning_rate": 5.038962638987279e-08,
"loss": 2.5254,
"step": 486000
},
{
"epoch": 4.99,
"learning_rate": 2.985595630435006e-08,
"loss": 2.5533,
"step": 486500
},
{
"epoch": 5.0,
"learning_rate": 9.363353558998369e-09,
"loss": 2.5433,
"step": 487000
},
{
"epoch": 5.0,
"eval_gen_len": 14.198,
"eval_loss": 2.459300994873047,
"eval_rouge1": 33.0901,
"eval_rouge2": 14.5749,
"eval_rougeL": 30.4267,
"eval_rougeLsum": 30.5438,
"eval_runtime": 65.3559,
"eval_samples_per_second": 15.301,
"eval_steps_per_second": 1.913,
"step": 487005
}
],
"max_steps": 487005,
"num_train_epochs": 5,
"total_flos": 1.0373616546436547e+18,
"trial_name": null,
"trial_params": null
}