|
{ |
|
"best_metric": 2.459300994873047, |
|
"best_model_checkpoint": "results/models/t5-small-NewsRoom1/checkpoint-487005", |
|
"epoch": 5.0, |
|
"global_step": 487005, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9979630599275163e-05, |
|
"loss": 3.5575, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.995909692918964e-05, |
|
"loss": 3.2397, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9938563259104117e-05, |
|
"loss": 3.2235, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9918029589018594e-05, |
|
"loss": 3.1572, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.989749591893307e-05, |
|
"loss": 3.1626, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.987696224884755e-05, |
|
"loss": 3.1051, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9856428578762026e-05, |
|
"loss": 3.1111, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9835894908676503e-05, |
|
"loss": 3.0974, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9815402305931152e-05, |
|
"loss": 3.0898, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.97949097031858e-05, |
|
"loss": 3.1047, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9774376033100278e-05, |
|
"loss": 3.0322, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9753842363014755e-05, |
|
"loss": 3.0404, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9733308692929233e-05, |
|
"loss": 3.0298, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.971277502284371e-05, |
|
"loss": 3.0271, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9692241352758187e-05, |
|
"loss": 3.015, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9671707682672664e-05, |
|
"loss": 3.0293, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.965117401258714e-05, |
|
"loss": 2.9866, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.963068140984179e-05, |
|
"loss": 2.9875, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9610147739756268e-05, |
|
"loss": 2.9972, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9589614069670745e-05, |
|
"loss": 2.9819, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9569080399585222e-05, |
|
"loss": 2.9796, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9548628864180043e-05, |
|
"loss": 2.9545, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.952809519409452e-05, |
|
"loss": 2.95, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9507561524008997e-05, |
|
"loss": 2.978, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.948702785392347e-05, |
|
"loss": 2.9684, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9466494183837948e-05, |
|
"loss": 2.959, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9445960513752425e-05, |
|
"loss": 2.968, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9425426843666903e-05, |
|
"loss": 2.9581, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.940489317358138e-05, |
|
"loss": 2.9357, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9384400570836032e-05, |
|
"loss": 2.9308, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9363866900750506e-05, |
|
"loss": 2.9518, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9343333230664983e-05, |
|
"loss": 2.9479, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.932279956057946e-05, |
|
"loss": 2.9299, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.930230695783411e-05, |
|
"loss": 2.9688, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9281773287748587e-05, |
|
"loss": 2.9159, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9261239617663067e-05, |
|
"loss": 2.9034, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9240705947577544e-05, |
|
"loss": 2.9215, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.922021334483219e-05, |
|
"loss": 2.9202, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9199679674746667e-05, |
|
"loss": 2.8881, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9179146004661144e-05, |
|
"loss": 2.9043, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.915861233457562e-05, |
|
"loss": 2.8839, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.913811973183027e-05, |
|
"loss": 2.8857, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9117586061744748e-05, |
|
"loss": 2.8636, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9097052391659225e-05, |
|
"loss": 2.8856, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9076559788913874e-05, |
|
"loss": 2.869, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.905602611882835e-05, |
|
"loss": 2.9347, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9035492448742828e-05, |
|
"loss": 2.88, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9014958778657305e-05, |
|
"loss": 2.8891, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.8994425108571783e-05, |
|
"loss": 2.8673, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.897393250582643e-05, |
|
"loss": 2.8301, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.895339883574091e-05, |
|
"loss": 2.8586, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.8932865165655386e-05, |
|
"loss": 2.8351, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.8912331495569863e-05, |
|
"loss": 2.8603, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.889179782548434e-05, |
|
"loss": 2.8528, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.8871264155398818e-05, |
|
"loss": 2.8671, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.8850730485313295e-05, |
|
"loss": 2.8824, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.8830196815227772e-05, |
|
"loss": 2.8712, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.880970421248242e-05, |
|
"loss": 2.8514, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.8789170542396898e-05, |
|
"loss": 2.8822, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.8768636872311375e-05, |
|
"loss": 2.868, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.8748103202225853e-05, |
|
"loss": 2.8742, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.8727610599480498e-05, |
|
"loss": 2.8513, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.8707076929394975e-05, |
|
"loss": 2.8506, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.8686543259309453e-05, |
|
"loss": 2.8392, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.8666009589223933e-05, |
|
"loss": 2.8767, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.864551698647858e-05, |
|
"loss": 2.8435, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.8625024383733228e-05, |
|
"loss": 2.8289, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.8604490713647705e-05, |
|
"loss": 2.8443, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.8583957043562182e-05, |
|
"loss": 2.8266, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.856342337347666e-05, |
|
"loss": 2.857, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.8542889703391136e-05, |
|
"loss": 2.8371, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8522356033305614e-05, |
|
"loss": 2.8395, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.850182236322009e-05, |
|
"loss": 2.8286, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8481288693134568e-05, |
|
"loss": 2.8074, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.8460796090389217e-05, |
|
"loss": 2.8464, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.8440262420303694e-05, |
|
"loss": 2.8489, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.841972875021817e-05, |
|
"loss": 2.8259, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.839919508013265e-05, |
|
"loss": 2.8229, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.8378702477387298e-05, |
|
"loss": 2.7956, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.8358209874641947e-05, |
|
"loss": 2.837, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.8337676204556424e-05, |
|
"loss": 2.8233, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.83171425344709e-05, |
|
"loss": 2.8293, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8296608864385378e-05, |
|
"loss": 2.8451, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8276075194299855e-05, |
|
"loss": 2.8166, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8255541524214333e-05, |
|
"loss": 2.8388, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.823500785412881e-05, |
|
"loss": 2.8199, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8214474184043287e-05, |
|
"loss": 2.829, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8193981581297936e-05, |
|
"loss": 2.8076, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.8173447911212413e-05, |
|
"loss": 2.8422, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.815291424112689e-05, |
|
"loss": 2.8232, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.8132380571041364e-05, |
|
"loss": 2.8369, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.8111887968296017e-05, |
|
"loss": 2.8246, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.8091354298210494e-05, |
|
"loss": 2.8111, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.807082062812497e-05, |
|
"loss": 2.7974, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.8050286958039448e-05, |
|
"loss": 2.8035, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.8029794355294094e-05, |
|
"loss": 2.7829, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.800926068520857e-05, |
|
"loss": 2.8093, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.7988727015123048e-05, |
|
"loss": 2.7819, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.7968193345037525e-05, |
|
"loss": 2.8044, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.7947659674952006e-05, |
|
"loss": 2.7933, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.792716707220665e-05, |
|
"loss": 2.8061, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.790663340212113e-05, |
|
"loss": 2.8124, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.7886099732035606e-05, |
|
"loss": 2.7989, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.7865566061950083e-05, |
|
"loss": 2.7809, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.7845073459204732e-05, |
|
"loss": 2.7934, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.782453978911921e-05, |
|
"loss": 2.7905, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7804006119033686e-05, |
|
"loss": 2.7978, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7783472448948164e-05, |
|
"loss": 2.7977, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.7762979846202813e-05, |
|
"loss": 2.7847, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.774244617611729e-05, |
|
"loss": 2.8054, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.7721912506031767e-05, |
|
"loss": 2.7806, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.7701378835946244e-05, |
|
"loss": 2.788, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.7680886233200893e-05, |
|
"loss": 2.7815, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.766035256311537e-05, |
|
"loss": 2.7515, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.7639818893029848e-05, |
|
"loss": 2.7862, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.7619285222944325e-05, |
|
"loss": 2.7743, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.7598833687539142e-05, |
|
"loss": 2.7668, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.757830001745362e-05, |
|
"loss": 2.8068, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.75577663473681e-05, |
|
"loss": 2.7708, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.7537232677282577e-05, |
|
"loss": 2.7488, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.7516699007197054e-05, |
|
"loss": 2.8045, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.749616533711153e-05, |
|
"loss": 2.7621, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.747563166702601e-05, |
|
"loss": 2.7739, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7455097996940486e-05, |
|
"loss": 2.7787, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.743460539419513e-05, |
|
"loss": 2.7455, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.741407172410961e-05, |
|
"loss": 2.7489, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.739353805402409e-05, |
|
"loss": 2.7444, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7373045451278735e-05, |
|
"loss": 2.7777, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7352511781193212e-05, |
|
"loss": 2.7936, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.733197811110769e-05, |
|
"loss": 2.7491, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.7311444441022167e-05, |
|
"loss": 2.7712, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7290910770936644e-05, |
|
"loss": 2.7617, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.727037710085112e-05, |
|
"loss": 2.7422, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7249843430765598e-05, |
|
"loss": 2.7406, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.722930976068008e-05, |
|
"loss": 2.7828, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7208817157934724e-05, |
|
"loss": 2.7263, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.71882834878492e-05, |
|
"loss": 2.7545, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.716774981776368e-05, |
|
"loss": 2.7511, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7147257215018328e-05, |
|
"loss": 2.7539, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7126723544932805e-05, |
|
"loss": 2.7397, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7106189874847282e-05, |
|
"loss": 2.7778, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.708565620476176e-05, |
|
"loss": 2.7383, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7065122534676236e-05, |
|
"loss": 2.7689, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7044588864590714e-05, |
|
"loss": 2.7666, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.702405519450519e-05, |
|
"loss": 2.7912, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7003521524419668e-05, |
|
"loss": 2.7468, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.698306998901449e-05, |
|
"loss": 2.7189, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6962536318928966e-05, |
|
"loss": 2.7461, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6942002648843443e-05, |
|
"loss": 2.7588, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.692146897875792e-05, |
|
"loss": 2.7358, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.6900935308672398e-05, |
|
"loss": 2.7604, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.6880401638586875e-05, |
|
"loss": 2.7661, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.6859867968501352e-05, |
|
"loss": 2.75, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.683933429841583e-05, |
|
"loss": 2.7526, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.6818800628330306e-05, |
|
"loss": 2.7351, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.6798308025584955e-05, |
|
"loss": 2.7663, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.6777774355499433e-05, |
|
"loss": 2.7209, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.675724068541391e-05, |
|
"loss": 2.762, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.6736707015328387e-05, |
|
"loss": 2.7819, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.6716214412583033e-05, |
|
"loss": 2.7384, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.669568074249751e-05, |
|
"loss": 2.7595, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.6675147072411987e-05, |
|
"loss": 2.7407, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.6654613402326468e-05, |
|
"loss": 2.7268, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.6634120799581117e-05, |
|
"loss": 2.7237, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.661358712949559e-05, |
|
"loss": 2.7743, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6593053459410068e-05, |
|
"loss": 2.7322, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.6572519789324545e-05, |
|
"loss": 2.7411, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.6552027186579194e-05, |
|
"loss": 2.779, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.6531493516493674e-05, |
|
"loss": 2.7452, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.651095984640815e-05, |
|
"loss": 2.7376, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6490426176322625e-05, |
|
"loss": 2.7268, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6469933573577274e-05, |
|
"loss": 2.7354, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.644939990349175e-05, |
|
"loss": 2.7492, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.642886623340623e-05, |
|
"loss": 2.7243, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6408332563320706e-05, |
|
"loss": 2.7543, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6387839960575355e-05, |
|
"loss": 2.7325, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.6367306290489832e-05, |
|
"loss": 2.7412, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.634677262040431e-05, |
|
"loss": 2.7478, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6326238950318786e-05, |
|
"loss": 2.7506, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6305746347573435e-05, |
|
"loss": 2.7484, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6285212677487913e-05, |
|
"loss": 2.7401, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.626467900740239e-05, |
|
"loss": 2.7286, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.6244145337316867e-05, |
|
"loss": 2.7509, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.6223652734571516e-05, |
|
"loss": 2.7376, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6203119064485993e-05, |
|
"loss": 2.709, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.618258539440047e-05, |
|
"loss": 2.723, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.6162051724314948e-05, |
|
"loss": 2.7194, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6141559121569593e-05, |
|
"loss": 2.7385, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.612102545148407e-05, |
|
"loss": 2.6993, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.610049178139855e-05, |
|
"loss": 2.7298, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.6079958111313028e-05, |
|
"loss": 2.718, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.6059465508567674e-05, |
|
"loss": 2.752, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.603893183848215e-05, |
|
"loss": 2.7448, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.6018398168396628e-05, |
|
"loss": 2.724, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 14.378, |
|
"eval_loss": 2.594017744064331, |
|
"eval_rouge1": 32.3136, |
|
"eval_rouge2": 13.6987, |
|
"eval_rougeL": 29.7482, |
|
"eval_rougeLsum": 29.7976, |
|
"eval_runtime": 64.9599, |
|
"eval_samples_per_second": 15.394, |
|
"eval_steps_per_second": 1.924, |
|
"step": 97401 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.5997864498311105e-05, |
|
"loss": 2.7119, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.5977371895565754e-05, |
|
"loss": 2.7325, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.5956838225480235e-05, |
|
"loss": 2.6739, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.593630455539471e-05, |
|
"loss": 2.6834, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.5915770885309186e-05, |
|
"loss": 2.7296, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.5895278282563835e-05, |
|
"loss": 2.723, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.5874744612478312e-05, |
|
"loss": 2.6704, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.585421094239279e-05, |
|
"loss": 2.6961, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.5833677272307267e-05, |
|
"loss": 2.7187, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.5813184669561916e-05, |
|
"loss": 2.6922, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.5792650999476393e-05, |
|
"loss": 2.7033, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.577211732939087e-05, |
|
"loss": 2.7269, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.5751583659305347e-05, |
|
"loss": 2.6851, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.5731049989219824e-05, |
|
"loss": 2.6726, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.5710598453814642e-05, |
|
"loss": 2.7191, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.5690064783729122e-05, |
|
"loss": 2.706, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.56695311136436e-05, |
|
"loss": 2.6976, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.5648997443558077e-05, |
|
"loss": 2.7006, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.5628463773472554e-05, |
|
"loss": 2.6924, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.560793010338703e-05, |
|
"loss": 2.6632, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.5587396433301508e-05, |
|
"loss": 2.7045, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.5566862763215985e-05, |
|
"loss": 2.6816, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.5546370160470634e-05, |
|
"loss": 2.6965, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.552583649038511e-05, |
|
"loss": 2.6934, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.550530282029959e-05, |
|
"loss": 2.7123, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.5484769150214066e-05, |
|
"loss": 2.6684, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.546427654746871e-05, |
|
"loss": 2.6811, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.544374287738319e-05, |
|
"loss": 2.6575, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.5423209207297666e-05, |
|
"loss": 2.702, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.5402675537212143e-05, |
|
"loss": 2.7073, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.5382182934466792e-05, |
|
"loss": 2.6536, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.536164926438127e-05, |
|
"loss": 2.6625, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.5341115594295747e-05, |
|
"loss": 2.7035, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.5320622991550396e-05, |
|
"loss": 2.6826, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.5300089321464873e-05, |
|
"loss": 2.7117, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.527955565137935e-05, |
|
"loss": 2.6746, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.5259021981293827e-05, |
|
"loss": 2.7035, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.5238529378548476e-05, |
|
"loss": 2.6901, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.5217995708462953e-05, |
|
"loss": 2.6582, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.519746203837743e-05, |
|
"loss": 2.6899, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.5176928368291908e-05, |
|
"loss": 2.677, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.5156394698206385e-05, |
|
"loss": 2.7053, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.5135861028120862e-05, |
|
"loss": 2.6964, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.511532735803534e-05, |
|
"loss": 2.6845, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.5094793687949818e-05, |
|
"loss": 2.687, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.5074260017864295e-05, |
|
"loss": 2.6311, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.5053726347778773e-05, |
|
"loss": 2.6907, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.503323374503342e-05, |
|
"loss": 2.6775, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.5012700074947897e-05, |
|
"loss": 2.6635, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.4992166404862374e-05, |
|
"loss": 2.6797, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.4971632734776851e-05, |
|
"loss": 2.6788, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.4951099064691329e-05, |
|
"loss": 2.6853, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.4930565394605808e-05, |
|
"loss": 2.6984, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.4910031724520285e-05, |
|
"loss": 2.7143, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.4889498054434762e-05, |
|
"loss": 2.6955, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.486900545168941e-05, |
|
"loss": 2.6878, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.4848471781603886e-05, |
|
"loss": 2.6728, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.4827938111518364e-05, |
|
"loss": 2.6604, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.4807404441432839e-05, |
|
"loss": 2.6752, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.478691183868749e-05, |
|
"loss": 2.6586, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.4766378168601967e-05, |
|
"loss": 2.6779, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.4745885565856614e-05, |
|
"loss": 2.6706, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.4725351895771092e-05, |
|
"loss": 2.6445, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.4704818225685569e-05, |
|
"loss": 2.6967, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.4684284555600046e-05, |
|
"loss": 2.657, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.4663750885514523e-05, |
|
"loss": 2.6834, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.4643258282769172e-05, |
|
"loss": 2.6453, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.462272461268365e-05, |
|
"loss": 2.6937, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.4602190942598126e-05, |
|
"loss": 2.6599, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.4581657272512604e-05, |
|
"loss": 2.6725, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.4561123602427081e-05, |
|
"loss": 2.6797, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.4540589932341558e-05, |
|
"loss": 2.6563, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.4520056262256035e-05, |
|
"loss": 2.6849, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.4499522592170512e-05, |
|
"loss": 2.6705, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.4479029989425161e-05, |
|
"loss": 2.6557, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.4458496319339639e-05, |
|
"loss": 2.7131, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.4437962649254116e-05, |
|
"loss": 2.6721, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.4417428979168593e-05, |
|
"loss": 2.6652, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.439693637642324e-05, |
|
"loss": 2.6549, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.4376443773677891e-05, |
|
"loss": 2.6765, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.4355910103592368e-05, |
|
"loss": 2.6764, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.4335376433506845e-05, |
|
"loss": 2.6518, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.4314842763421321e-05, |
|
"loss": 2.7014, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.4294309093335798e-05, |
|
"loss": 2.6446, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.4273775423250275e-05, |
|
"loss": 2.6551, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.4253241753164752e-05, |
|
"loss": 2.6676, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.423270808307923e-05, |
|
"loss": 2.6614, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.421221548033388e-05, |
|
"loss": 2.6662, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.4191681810248356e-05, |
|
"loss": 2.6525, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.4171189207503005e-05, |
|
"loss": 2.6505, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.4150655537417482e-05, |
|
"loss": 2.7146, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.413012186733196e-05, |
|
"loss": 2.6776, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.4109588197246435e-05, |
|
"loss": 2.6868, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.4089054527160912e-05, |
|
"loss": 2.6372, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.4068520857075389e-05, |
|
"loss": 2.6477, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.4047987186989868e-05, |
|
"loss": 2.6651, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.4027453516904345e-05, |
|
"loss": 2.6542, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.4006960914158994e-05, |
|
"loss": 2.6695, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.398642724407347e-05, |
|
"loss": 2.6435, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.3965893573987947e-05, |
|
"loss": 2.6432, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.3945400971242596e-05, |
|
"loss": 2.6672, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.3924867301157075e-05, |
|
"loss": 2.6579, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.3904333631071552e-05, |
|
"loss": 2.6796, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.388379996098603e-05, |
|
"loss": 2.6714, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.3863266290900506e-05, |
|
"loss": 2.6398, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.3842732620814982e-05, |
|
"loss": 2.7002, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.3822240018069631e-05, |
|
"loss": 2.6604, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.3801706347984108e-05, |
|
"loss": 2.6441, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.3781172677898584e-05, |
|
"loss": 2.6678, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.3760639007813064e-05, |
|
"loss": 2.6336, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.3740105337727541e-05, |
|
"loss": 2.675, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.3719571667642017e-05, |
|
"loss": 2.642, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.3699037997556494e-05, |
|
"loss": 2.6444, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.3678504327470971e-05, |
|
"loss": 2.6615, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.365801172472562e-05, |
|
"loss": 2.6338, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.3637478054640096e-05, |
|
"loss": 2.6943, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.3616944384554573e-05, |
|
"loss": 2.6665, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.3596410714469053e-05, |
|
"loss": 2.6514, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.35759181117237e-05, |
|
"loss": 2.6531, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.3555384441638178e-05, |
|
"loss": 2.6352, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.3534850771552655e-05, |
|
"loss": 2.6602, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.351431710146713e-05, |
|
"loss": 2.642, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.349382449872178e-05, |
|
"loss": 2.6788, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.3473290828636259e-05, |
|
"loss": 2.6793, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.3452757158550736e-05, |
|
"loss": 2.6424, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.3432223488465213e-05, |
|
"loss": 2.7017, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.341173088571986e-05, |
|
"loss": 2.6175, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.3391197215634337e-05, |
|
"loss": 2.6276, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.3370663545548815e-05, |
|
"loss": 2.6621, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.3350170942803462e-05, |
|
"loss": 2.6435, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.332963727271794e-05, |
|
"loss": 2.6881, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.3309103602632418e-05, |
|
"loss": 2.6399, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3288569932546895e-05, |
|
"loss": 2.6619, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3268036262461372e-05, |
|
"loss": 2.6493, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.324750259237585e-05, |
|
"loss": 2.6573, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.3226968922290327e-05, |
|
"loss": 2.668, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.3206435252204804e-05, |
|
"loss": 2.6345, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.3185942649459451e-05, |
|
"loss": 2.665, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.316540897937393e-05, |
|
"loss": 2.6491, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.3144875309288407e-05, |
|
"loss": 2.6337, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.3124341639202885e-05, |
|
"loss": 2.6246, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.3103807969117362e-05, |
|
"loss": 2.6633, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.3083274299031839e-05, |
|
"loss": 2.6581, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.3062740628946316e-05, |
|
"loss": 2.6849, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.3042206958860793e-05, |
|
"loss": 2.647, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.3021714356115442e-05, |
|
"loss": 2.6492, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.300118068602992e-05, |
|
"loss": 2.6167, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.2980647015944397e-05, |
|
"loss": 2.6717, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.2960113345858874e-05, |
|
"loss": 2.6636, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.2939620743113521e-05, |
|
"loss": 2.6242, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.2919087073027998e-05, |
|
"loss": 2.6444, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.2898594470282646e-05, |
|
"loss": 2.6382, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.2878060800197125e-05, |
|
"loss": 2.6251, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2857527130111602e-05, |
|
"loss": 2.6521, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2836993460026079e-05, |
|
"loss": 2.6421, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.2816459789940556e-05, |
|
"loss": 2.6558, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.2795926119855033e-05, |
|
"loss": 2.6497, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.277539244976951e-05, |
|
"loss": 2.6507, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.2754858779683988e-05, |
|
"loss": 2.6507, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.2734366176938635e-05, |
|
"loss": 2.6641, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.2713832506853114e-05, |
|
"loss": 2.6628, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.2693298836767591e-05, |
|
"loss": 2.6347, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.2672765166682068e-05, |
|
"loss": 2.6648, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.2652272563936716e-05, |
|
"loss": 2.6192, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.2631738893851193e-05, |
|
"loss": 2.6273, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.261120522376567e-05, |
|
"loss": 2.638, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.2590671553680147e-05, |
|
"loss": 2.6297, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.2570178950934798e-05, |
|
"loss": 2.6306, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.2549645280849273e-05, |
|
"loss": 2.6197, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.252911161076375e-05, |
|
"loss": 2.6327, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.2508577940678228e-05, |
|
"loss": 2.6493, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.2488085337932877e-05, |
|
"loss": 2.6695, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.2467551667847352e-05, |
|
"loss": 2.6641, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.244701799776183e-05, |
|
"loss": 2.6565, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.242648432767631e-05, |
|
"loss": 2.6743, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.2405991724930957e-05, |
|
"loss": 2.6213, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.2385499122185605e-05, |
|
"loss": 2.6396, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.2364965452100082e-05, |
|
"loss": 2.6562, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.2344431782014559e-05, |
|
"loss": 2.6467, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.2323898111929036e-05, |
|
"loss": 2.6323, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.2303364441843515e-05, |
|
"loss": 2.6235, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.2282830771757992e-05, |
|
"loss": 2.6383, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.226229710167247e-05, |
|
"loss": 2.6293, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.2241763431586947e-05, |
|
"loss": 2.6463, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.2221270828841594e-05, |
|
"loss": 2.639, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.2200737158756071e-05, |
|
"loss": 2.6436, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.2180203488670548e-05, |
|
"loss": 2.6464, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.2159669818585026e-05, |
|
"loss": 2.6449, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.2139177215839675e-05, |
|
"loss": 2.615, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.2118643545754152e-05, |
|
"loss": 2.6306, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.2098109875668629e-05, |
|
"loss": 2.6308, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.2077576205583106e-05, |
|
"loss": 2.6156, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.2057042535497583e-05, |
|
"loss": 2.6197, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.203650886541206e-05, |
|
"loss": 2.596, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.2015975195326538e-05, |
|
"loss": 2.6856, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 14.239, |
|
"eval_loss": 2.5211517810821533, |
|
"eval_rouge1": 32.6384, |
|
"eval_rouge2": 14.1175, |
|
"eval_rougeL": 30.0462, |
|
"eval_rougeLsum": 30.1515, |
|
"eval_runtime": 63.6279, |
|
"eval_samples_per_second": 15.716, |
|
"eval_steps_per_second": 1.965, |
|
"step": 194802 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.1995441525241013e-05, |
|
"loss": 2.6536, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.1974948922495664e-05, |
|
"loss": 2.5967, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.1954415252410141e-05, |
|
"loss": 2.6196, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.1933881582324618e-05, |
|
"loss": 2.6051, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.1913347912239095e-05, |
|
"loss": 2.6344, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.1892855309493743e-05, |
|
"loss": 2.6343, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.187232163940822e-05, |
|
"loss": 2.5969, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.1851787969322699e-05, |
|
"loss": 2.6364, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.1831254299237176e-05, |
|
"loss": 2.5896, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.1810761696491823e-05, |
|
"loss": 2.6468, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.17902280264063e-05, |
|
"loss": 2.6083, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.1769694356320778e-05, |
|
"loss": 2.6013, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.1749160686235255e-05, |
|
"loss": 2.6303, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.1728668083489902e-05, |
|
"loss": 2.616, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.1708134413404381e-05, |
|
"loss": 2.6251, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.1687600743318858e-05, |
|
"loss": 2.6259, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.1667067073233336e-05, |
|
"loss": 2.6299, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.1646574470487983e-05, |
|
"loss": 2.6205, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.162604080040246e-05, |
|
"loss": 2.6091, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.1605507130316937e-05, |
|
"loss": 2.6182, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.1584973460231414e-05, |
|
"loss": 2.6144, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.1564480857486065e-05, |
|
"loss": 2.6163, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.1543947187400542e-05, |
|
"loss": 2.606, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.1523413517315018e-05, |
|
"loss": 2.6081, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.1502879847229495e-05, |
|
"loss": 2.6181, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.1482387244484144e-05, |
|
"loss": 2.5952, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.1461853574398621e-05, |
|
"loss": 2.6011, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.1441319904313097e-05, |
|
"loss": 2.6163, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.1420827301567747e-05, |
|
"loss": 2.6228, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.1400293631482225e-05, |
|
"loss": 2.638, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.1379759961396702e-05, |
|
"loss": 2.6202, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.1359226291311179e-05, |
|
"loss": 2.5915, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.1338692621225656e-05, |
|
"loss": 2.6379, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.1318158951140132e-05, |
|
"loss": 2.5958, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.1297625281054609e-05, |
|
"loss": 2.6, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.1277091610969086e-05, |
|
"loss": 2.5956, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.1256599008223737e-05, |
|
"loss": 2.6104, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.1236065338138214e-05, |
|
"loss": 2.6338, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.1215531668052691e-05, |
|
"loss": 2.6188, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.1194997997967168e-05, |
|
"loss": 2.6236, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.1174505395221816e-05, |
|
"loss": 2.5834, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.1153971725136293e-05, |
|
"loss": 2.6136, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.1133438055050772e-05, |
|
"loss": 2.6168, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.1112904384965249e-05, |
|
"loss": 2.6097, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.1092411782219896e-05, |
|
"loss": 2.5822, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.1071878112134373e-05, |
|
"loss": 2.5863, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.105134444204885e-05, |
|
"loss": 2.5925, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.1030810771963328e-05, |
|
"loss": 2.5834, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.1010318169217975e-05, |
|
"loss": 2.6185, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.0989784499132454e-05, |
|
"loss": 2.5897, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.0969250829046931e-05, |
|
"loss": 2.6144, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.0948717158961408e-05, |
|
"loss": 2.5723, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.0928224556216056e-05, |
|
"loss": 2.617, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.0907690886130533e-05, |
|
"loss": 2.588, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.088715721604501e-05, |
|
"loss": 2.6123, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.0866623545959487e-05, |
|
"loss": 2.6046, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.0846130943214138e-05, |
|
"loss": 2.6093, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.0825597273128613e-05, |
|
"loss": 2.6046, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.080506360304309e-05, |
|
"loss": 2.6244, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.0784529932957568e-05, |
|
"loss": 2.5966, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.0764037330212215e-05, |
|
"loss": 2.6191, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.0743503660126692e-05, |
|
"loss": 2.6419, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.072296999004117e-05, |
|
"loss": 2.6193, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.0702436319955648e-05, |
|
"loss": 2.6015, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.0681943717210297e-05, |
|
"loss": 2.6035, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.0661410047124775e-05, |
|
"loss": 2.5822, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.0640876377039252e-05, |
|
"loss": 2.6084, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0620342706953727e-05, |
|
"loss": 2.5711, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0599850104208376e-05, |
|
"loss": 2.5969, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0579316434122855e-05, |
|
"loss": 2.5911, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0558782764037332e-05, |
|
"loss": 2.608, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.053824909395181e-05, |
|
"loss": 2.63, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0517756491206457e-05, |
|
"loss": 2.5886, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0497222821120934e-05, |
|
"loss": 2.6097, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0476689151035411e-05, |
|
"loss": 2.6133, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0456155480949888e-05, |
|
"loss": 2.6208, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0435662878204537e-05, |
|
"loss": 2.6012, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0415129208119015e-05, |
|
"loss": 2.632, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0394595538033492e-05, |
|
"loss": 2.5796, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0374061867947969e-05, |
|
"loss": 2.6075, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0353610332542786e-05, |
|
"loss": 2.6212, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0333076662457264e-05, |
|
"loss": 2.5728, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0312542992371744e-05, |
|
"loss": 2.5845, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0292009322286221e-05, |
|
"loss": 2.5888, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0271475652200697e-05, |
|
"loss": 2.5924, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0250941982115174e-05, |
|
"loss": 2.6477, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0230408312029651e-05, |
|
"loss": 2.613, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0209874641944128e-05, |
|
"loss": 2.576, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0189382039198776e-05, |
|
"loss": 2.617, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0168848369113253e-05, |
|
"loss": 2.5819, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0148314699027732e-05, |
|
"loss": 2.5933, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.0127781028942209e-05, |
|
"loss": 2.6146, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.0107247358856686e-05, |
|
"loss": 2.6096, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.0086754756111335e-05, |
|
"loss": 2.609, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.006622108602581e-05, |
|
"loss": 2.5756, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.0045687415940288e-05, |
|
"loss": 2.5681, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.0025153745854765e-05, |
|
"loss": 2.5871, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.0004661143109416e-05, |
|
"loss": 2.602, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.984127473023891e-06, |
|
"loss": 2.6109, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.96363487027854e-06, |
|
"loss": 2.5935, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 9.943101200193017e-06, |
|
"loss": 2.5964, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 9.922567530107495e-06, |
|
"loss": 2.5973, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 9.902033860021972e-06, |
|
"loss": 2.5965, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 9.881500189936449e-06, |
|
"loss": 2.6214, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 9.860966519850926e-06, |
|
"loss": 2.5965, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 9.840432849765403e-06, |
|
"loss": 2.5984, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 9.81989917967988e-06, |
|
"loss": 2.6208, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 9.79940657693453e-06, |
|
"loss": 2.584, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 9.778872906849007e-06, |
|
"loss": 2.614, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 9.758339236763484e-06, |
|
"loss": 2.5768, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 9.737805566677961e-06, |
|
"loss": 2.5877, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 9.717312963932609e-06, |
|
"loss": 2.6115, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 9.696779293847086e-06, |
|
"loss": 2.5976, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 9.676245623761565e-06, |
|
"loss": 2.5741, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 9.655711953676042e-06, |
|
"loss": 2.597, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 9.635219350930689e-06, |
|
"loss": 2.6006, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.614685680845168e-06, |
|
"loss": 2.5908, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.594152010759643e-06, |
|
"loss": 2.6164, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.57361834067412e-06, |
|
"loss": 2.6051, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.55312573792877e-06, |
|
"loss": 2.5831, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 9.532592067843247e-06, |
|
"loss": 2.5701, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 9.512058397757724e-06, |
|
"loss": 2.613, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 9.491524727672201e-06, |
|
"loss": 2.6147, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 9.47103212492685e-06, |
|
"loss": 2.606, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 9.450498454841327e-06, |
|
"loss": 2.5975, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 9.429964784755805e-06, |
|
"loss": 2.5789, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 9.409431114670282e-06, |
|
"loss": 2.5324, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 9.388938511924929e-06, |
|
"loss": 2.6155, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 9.368404841839406e-06, |
|
"loss": 2.5899, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 9.347871171753883e-06, |
|
"loss": 2.6095, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 9.32733750166836e-06, |
|
"loss": 2.6055, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 9.30684489892301e-06, |
|
"loss": 2.6007, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 9.286311228837487e-06, |
|
"loss": 2.6287, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 9.265777558751964e-06, |
|
"loss": 2.6304, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 9.245243888666441e-06, |
|
"loss": 2.5703, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.22475128592109e-06, |
|
"loss": 2.5771, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.204217615835567e-06, |
|
"loss": 2.5947, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 9.183683945750045e-06, |
|
"loss": 2.6205, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 9.163150275664522e-06, |
|
"loss": 2.5745, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.142616605578999e-06, |
|
"loss": 2.6126, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.122124002833648e-06, |
|
"loss": 2.5981, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.101590332748125e-06, |
|
"loss": 2.5772, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.081056662662602e-06, |
|
"loss": 2.6235, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.06052299257708e-06, |
|
"loss": 2.5577, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.040030389831727e-06, |
|
"loss": 2.6086, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 9.019496719746204e-06, |
|
"loss": 2.5849, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.998963049660681e-06, |
|
"loss": 2.5951, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.978429379575158e-06, |
|
"loss": 2.5432, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.957936776829808e-06, |
|
"loss": 2.596, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 8.937403106744285e-06, |
|
"loss": 2.6096, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 8.916869436658762e-06, |
|
"loss": 2.6013, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 8.896335766573239e-06, |
|
"loss": 2.5926, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 8.875843163827888e-06, |
|
"loss": 2.5741, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 8.855309493742365e-06, |
|
"loss": 2.5585, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 8.83477582365684e-06, |
|
"loss": 2.6125, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 8.81424215357132e-06, |
|
"loss": 2.5581, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 8.793749550825967e-06, |
|
"loss": 2.5723, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 8.773215880740444e-06, |
|
"loss": 2.5726, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 8.752682210654923e-06, |
|
"loss": 2.6084, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 8.7321485405694e-06, |
|
"loss": 2.5814, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 8.711655937824048e-06, |
|
"loss": 2.6171, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 8.691122267738525e-06, |
|
"loss": 2.6089, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 8.670588597653002e-06, |
|
"loss": 2.5841, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 8.650054927567479e-06, |
|
"loss": 2.5735, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 8.629562324822128e-06, |
|
"loss": 2.5961, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 8.609028654736605e-06, |
|
"loss": 2.5913, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 8.588494984651082e-06, |
|
"loss": 2.5947, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 8.56796131456556e-06, |
|
"loss": 2.6138, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 8.547468711820209e-06, |
|
"loss": 2.584, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.526935041734686e-06, |
|
"loss": 2.5909, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 8.506401371649163e-06, |
|
"loss": 2.5691, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 8.485867701563639e-06, |
|
"loss": 2.5852, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 8.46541616615846e-06, |
|
"loss": 2.5937, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 8.444882496072937e-06, |
|
"loss": 2.5956, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 8.424348825987414e-06, |
|
"loss": 2.627, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 8.403815155901891e-06, |
|
"loss": 2.6027, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.383281485816368e-06, |
|
"loss": 2.5815, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.362747815730845e-06, |
|
"loss": 2.5685, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.342214145645323e-06, |
|
"loss": 2.6211, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.321721542899972e-06, |
|
"loss": 2.6028, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.301187872814449e-06, |
|
"loss": 2.5959, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.280654202728924e-06, |
|
"loss": 2.5821, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 8.260120532643403e-06, |
|
"loss": 2.5694, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 8.23958686255788e-06, |
|
"loss": 2.5806, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.219053192472357e-06, |
|
"loss": 2.5853, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.198519522386835e-06, |
|
"loss": 2.561, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 8.177985852301312e-06, |
|
"loss": 2.5861, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 8.15749324955596e-06, |
|
"loss": 2.5743, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 8.136959579470436e-06, |
|
"loss": 2.5937, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 8.116425909384914e-06, |
|
"loss": 2.5558, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 8.095892239299392e-06, |
|
"loss": 2.644, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 8.07539963655404e-06, |
|
"loss": 2.576, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 8.054865966468517e-06, |
|
"loss": 2.614, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 8.034332296382996e-06, |
|
"loss": 2.5867, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.013798626297471e-06, |
|
"loss": 2.5847, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 14.18, |
|
"eval_loss": 2.4842422008514404, |
|
"eval_rouge1": 32.8849, |
|
"eval_rouge2": 14.1714, |
|
"eval_rougeL": 30.3938, |
|
"eval_rougeLsum": 30.5032, |
|
"eval_runtime": 64.3777, |
|
"eval_samples_per_second": 15.533, |
|
"eval_steps_per_second": 1.942, |
|
"step": 292203 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.99330602355212e-06, |
|
"loss": 2.6098, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 7.972772353466598e-06, |
|
"loss": 2.5892, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 7.952238683381075e-06, |
|
"loss": 2.6083, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 7.931705013295552e-06, |
|
"loss": 2.5594, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 7.911212410550201e-06, |
|
"loss": 2.578, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 7.890678740464678e-06, |
|
"loss": 2.5775, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 7.870145070379155e-06, |
|
"loss": 2.5704, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.849611400293632e-06, |
|
"loss": 2.5723, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.829118797548281e-06, |
|
"loss": 2.5669, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 7.808626194802929e-06, |
|
"loss": 2.588, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 7.788092524717406e-06, |
|
"loss": 2.5423, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 7.767558854631883e-06, |
|
"loss": 2.6014, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 7.74702518454636e-06, |
|
"loss": 2.5682, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 7.726491514460838e-06, |
|
"loss": 2.5787, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 7.705957844375315e-06, |
|
"loss": 2.5539, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 7.685424174289794e-06, |
|
"loss": 2.5781, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 7.664890504204269e-06, |
|
"loss": 2.5441, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 7.644397901458918e-06, |
|
"loss": 2.5906, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 7.6238642313733945e-06, |
|
"loss": 2.5589, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 7.6033305612878725e-06, |
|
"loss": 2.5654, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 7.58279689120235e-06, |
|
"loss": 2.5716, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 7.562304288456998e-06, |
|
"loss": 2.5695, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 7.541770618371476e-06, |
|
"loss": 2.5949, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 7.521236948285952e-06, |
|
"loss": 2.5542, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 7.5007032782004294e-06, |
|
"loss": 2.5522, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 7.4802106754550785e-06, |
|
"loss": 2.5402, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 7.459677005369556e-06, |
|
"loss": 2.5999, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 7.439143335284033e-06, |
|
"loss": 2.5563, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 7.418609665198509e-06, |
|
"loss": 2.5637, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 7.398075995112986e-06, |
|
"loss": 2.5705, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.377583392367635e-06, |
|
"loss": 2.5639, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.3570497222821126e-06, |
|
"loss": 2.5505, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.33651605219659e-06, |
|
"loss": 2.5897, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 7.315982382111068e-06, |
|
"loss": 2.5572, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.295489779365715e-06, |
|
"loss": 2.5474, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.274956109280192e-06, |
|
"loss": 2.5405, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.25442243919467e-06, |
|
"loss": 2.5853, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.2338887691091475e-06, |
|
"loss": 2.5544, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 7.213396166363795e-06, |
|
"loss": 2.5801, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 7.192862496278274e-06, |
|
"loss": 2.6023, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 7.17232882619275e-06, |
|
"loss": 2.5458, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 7.151795156107227e-06, |
|
"loss": 2.5735, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 7.131302553361876e-06, |
|
"loss": 2.547, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 7.1107688832763534e-06, |
|
"loss": 2.59, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 7.090235213190831e-06, |
|
"loss": 2.5819, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 7.069701543105307e-06, |
|
"loss": 2.5353, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 7.049250007700127e-06, |
|
"loss": 2.5629, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 7.028716337614604e-06, |
|
"loss": 2.5804, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 7.008182667529081e-06, |
|
"loss": 2.5637, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 6.987648997443559e-06, |
|
"loss": 2.6034, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 6.967115327358036e-06, |
|
"loss": 2.5929, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 6.946581657272513e-06, |
|
"loss": 2.5733, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 6.92604798718699e-06, |
|
"loss": 2.5497, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 6.905514317101468e-06, |
|
"loss": 2.5831, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 6.884980647015945e-06, |
|
"loss": 2.5796, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 6.864488044270593e-06, |
|
"loss": 2.5689, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 6.84395437418507e-06, |
|
"loss": 2.5814, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 6.823420704099548e-06, |
|
"loss": 2.5869, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 6.802887034014025e-06, |
|
"loss": 2.5768, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 6.782394431268673e-06, |
|
"loss": 2.5517, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 6.761860761183151e-06, |
|
"loss": 2.5505, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 6.7413270910976284e-06, |
|
"loss": 2.5703, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 6.720793421012105e-06, |
|
"loss": 2.5399, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 6.700300818266754e-06, |
|
"loss": 2.5217, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 6.679767148181231e-06, |
|
"loss": 2.5858, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 6.659233478095708e-06, |
|
"loss": 2.5503, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 6.638699808010185e-06, |
|
"loss": 2.5654, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 6.6182482726050045e-06, |
|
"loss": 2.5682, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 6.597714602519482e-06, |
|
"loss": 2.5831, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.577180932433959e-06, |
|
"loss": 2.5548, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.556647262348437e-06, |
|
"loss": 2.5693, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 6.536113592262914e-06, |
|
"loss": 2.5505, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 6.5155799221773904e-06, |
|
"loss": 2.5547, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 6.495046252091868e-06, |
|
"loss": 2.5548, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 6.474512582006346e-06, |
|
"loss": 2.5613, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 6.454019979260994e-06, |
|
"loss": 2.5908, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 6.433486309175471e-06, |
|
"loss": 2.5816, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 6.412952639089949e-06, |
|
"loss": 2.581, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 6.392418969004425e-06, |
|
"loss": 2.5566, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 6.371885298918903e-06, |
|
"loss": 2.5769, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 6.35135162883338e-06, |
|
"loss": 2.5591, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 6.330817958747857e-06, |
|
"loss": 2.5916, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 6.310284288662335e-06, |
|
"loss": 2.5778, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 6.289791685916982e-06, |
|
"loss": 2.5726, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 6.2692580158314595e-06, |
|
"loss": 2.5671, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 6.2487243457459376e-06, |
|
"loss": 2.5546, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 6.228190675660415e-06, |
|
"loss": 2.5633, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 6.207698072915063e-06, |
|
"loss": 2.557, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 6.187164402829541e-06, |
|
"loss": 2.5664, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 6.166630732744018e-06, |
|
"loss": 2.5559, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 6.1460970626584945e-06, |
|
"loss": 2.5573, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 6.125604459913143e-06, |
|
"loss": 2.5808, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 6.105070789827621e-06, |
|
"loss": 2.5853, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 6.084537119742098e-06, |
|
"loss": 2.5694, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 6.064003449656575e-06, |
|
"loss": 2.5655, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 6.043510846911223e-06, |
|
"loss": 2.5472, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 6.0229771768257e-06, |
|
"loss": 2.5605, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 6.002443506740178e-06, |
|
"loss": 2.5799, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 5.981909836654655e-06, |
|
"loss": 2.5311, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 5.961376166569133e-06, |
|
"loss": 2.5579, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 5.94088356382378e-06, |
|
"loss": 2.5476, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 5.920390961078429e-06, |
|
"loss": 2.5484, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 5.899857290992906e-06, |
|
"loss": 2.5756, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 5.8793236209073835e-06, |
|
"loss": 2.5241, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 5.858789950821861e-06, |
|
"loss": 2.5444, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 5.838256280736337e-06, |
|
"loss": 2.5398, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 5.817722610650816e-06, |
|
"loss": 2.5498, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 5.797188940565292e-06, |
|
"loss": 2.566, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 5.7766552704797695e-06, |
|
"loss": 2.574, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 5.7561626677344185e-06, |
|
"loss": 2.5921, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 5.735628997648896e-06, |
|
"loss": 2.5325, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 5.715095327563373e-06, |
|
"loss": 2.5733, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 5.694561657477849e-06, |
|
"loss": 2.5372, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 5.674069054732498e-06, |
|
"loss": 2.5424, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 5.653535384646975e-06, |
|
"loss": 2.5745, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 5.633001714561453e-06, |
|
"loss": 2.5407, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5.61246804447593e-06, |
|
"loss": 2.5545, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5.591975441730578e-06, |
|
"loss": 2.5532, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 5.571441771645055e-06, |
|
"loss": 2.5653, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 5.550908101559532e-06, |
|
"loss": 2.5588, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 5.53037443147401e-06, |
|
"loss": 2.5499, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 5.5098407613884875e-06, |
|
"loss": 2.5769, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 5.489348158643135e-06, |
|
"loss": 2.5594, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 5.468814488557614e-06, |
|
"loss": 2.5792, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 5.44828081847209e-06, |
|
"loss": 2.5614, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 5.427747148386567e-06, |
|
"loss": 2.5843, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 5.4072545456412154e-06, |
|
"loss": 2.5558, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 5.3867208755556935e-06, |
|
"loss": 2.5836, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 5.366187205470171e-06, |
|
"loss": 2.5505, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 5.345653535384647e-06, |
|
"loss": 2.5709, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 5.325160932639296e-06, |
|
"loss": 2.5546, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 5.304627262553773e-06, |
|
"loss": 2.5987, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 5.28409359246825e-06, |
|
"loss": 2.5515, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 5.263559922382728e-06, |
|
"loss": 2.5559, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 5.243067319637376e-06, |
|
"loss": 2.5584, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 5.222533649551853e-06, |
|
"loss": 2.5676, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 5.20199997946633e-06, |
|
"loss": 2.5724, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 5.181507376720979e-06, |
|
"loss": 2.5489, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 5.160973706635456e-06, |
|
"loss": 2.5659, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 5.140440036549933e-06, |
|
"loss": 2.5627, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 5.11990636646441e-06, |
|
"loss": 2.5339, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 5.099372696378888e-06, |
|
"loss": 2.5638, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 5.078839026293365e-06, |
|
"loss": 2.5694, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 5.058305356207842e-06, |
|
"loss": 2.5592, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 5.0377716861223195e-06, |
|
"loss": 2.5519, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 5.017279083376968e-06, |
|
"loss": 2.5775, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 4.996745413291445e-06, |
|
"loss": 2.5731, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 4.976211743205922e-06, |
|
"loss": 2.5421, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 4.955678073120399e-06, |
|
"loss": 2.5494, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 4.935185470375048e-06, |
|
"loss": 2.5624, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 4.9146518002895245e-06, |
|
"loss": 2.5722, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 4.894118130204003e-06, |
|
"loss": 2.56, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 4.87358446011848e-06, |
|
"loss": 2.5629, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 4.853091857373128e-06, |
|
"loss": 2.5359, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 4.832558187287605e-06, |
|
"loss": 2.5635, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 4.812024517202082e-06, |
|
"loss": 2.5447, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 4.79149084711656e-06, |
|
"loss": 2.5798, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 4.7709982443712085e-06, |
|
"loss": 2.5582, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 4.750464574285686e-06, |
|
"loss": 2.5558, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 4.729930904200163e-06, |
|
"loss": 2.5458, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 4.70939723411464e-06, |
|
"loss": 2.5491, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.688904631369288e-06, |
|
"loss": 2.5444, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.6683709612837654e-06, |
|
"loss": 2.5637, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 4.647837291198243e-06, |
|
"loss": 2.5462, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 4.62730362111272e-06, |
|
"loss": 2.5801, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 4.606811018367368e-06, |
|
"loss": 2.5811, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 4.586277348281846e-06, |
|
"loss": 2.5696, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 4.565743678196322e-06, |
|
"loss": 2.5353, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 4.5452100081108e-06, |
|
"loss": 2.5476, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 4.5246763380252776e-06, |
|
"loss": 2.5637, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 4.504183735279926e-06, |
|
"loss": 2.5978, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 4.483650065194403e-06, |
|
"loss": 2.5526, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 4.46311639510888e-06, |
|
"loss": 2.5606, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 4.442582725023357e-06, |
|
"loss": 2.5618, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.4220901222780055e-06, |
|
"loss": 2.5707, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.401556452192483e-06, |
|
"loss": 2.5567, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 4.38102278210696e-06, |
|
"loss": 2.5751, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 4.360489112021438e-06, |
|
"loss": 2.5543, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 4.339996509276086e-06, |
|
"loss": 2.5537, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 4.319462839190563e-06, |
|
"loss": 2.5465, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 4.29892916910504e-06, |
|
"loss": 2.5502, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 4.278395499019518e-06, |
|
"loss": 2.5531, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.257902896274166e-06, |
|
"loss": 2.5419, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.237369226188644e-06, |
|
"loss": 2.5731, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 4.21683555610312e-06, |
|
"loss": 2.5539, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 4.196301886017597e-06, |
|
"loss": 2.5286, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 4.1758092832722455e-06, |
|
"loss": 2.5725, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 4.1552756131867235e-06, |
|
"loss": 2.553, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 4.134741943101201e-06, |
|
"loss": 2.5295, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 4.114208273015678e-06, |
|
"loss": 2.5762, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 4.093715670270326e-06, |
|
"loss": 2.5528, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 4.073182000184803e-06, |
|
"loss": 2.5377, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 4.0526483300992805e-06, |
|
"loss": 2.5382, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 4.032114660013758e-06, |
|
"loss": 2.5778, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.011622057268406e-06, |
|
"loss": 2.5578, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 14.226, |
|
"eval_loss": 2.4658281803131104, |
|
"eval_rouge1": 33.1247, |
|
"eval_rouge2": 14.4487, |
|
"eval_rougeL": 30.3793, |
|
"eval_rougeLsum": 30.5158, |
|
"eval_runtime": 65.1503, |
|
"eval_samples_per_second": 15.349, |
|
"eval_steps_per_second": 1.919, |
|
"step": 389604 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.991088387182884e-06, |
|
"loss": 2.5441, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.970554717097361e-06, |
|
"loss": 2.5623, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3.950021047011837e-06, |
|
"loss": 2.5435, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 3.929528444266486e-06, |
|
"loss": 2.5428, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 3.908994774180964e-06, |
|
"loss": 2.51, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.888461104095441e-06, |
|
"loss": 2.5402, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.867927434009918e-06, |
|
"loss": 2.5662, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.847434831264566e-06, |
|
"loss": 2.5443, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.826901161179043e-06, |
|
"loss": 2.5292, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.806367491093521e-06, |
|
"loss": 2.54, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 3.785833821007998e-06, |
|
"loss": 2.5755, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 3.7653412182626463e-06, |
|
"loss": 2.5804, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 3.744807548177124e-06, |
|
"loss": 2.5354, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 3.724273878091601e-06, |
|
"loss": 2.5347, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.703740208006078e-06, |
|
"loss": 2.5257, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.6832476052607264e-06, |
|
"loss": 2.5587, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.662713935175204e-06, |
|
"loss": 2.5545, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.642180265089681e-06, |
|
"loss": 2.5325, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.6216465950041584e-06, |
|
"loss": 2.5691, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.601153992258807e-06, |
|
"loss": 2.556, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 3.5806203221732838e-06, |
|
"loss": 2.5294, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 3.5600866520877614e-06, |
|
"loss": 2.5724, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 3.5395529820022386e-06, |
|
"loss": 2.5741, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 3.5190603792568867e-06, |
|
"loss": 2.5525, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.4985267091713643e-06, |
|
"loss": 2.5642, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.477993039085841e-06, |
|
"loss": 2.5298, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 3.4574593690003183e-06, |
|
"loss": 2.5205, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 3.4369667662549665e-06, |
|
"loss": 2.5503, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 3.416433096169444e-06, |
|
"loss": 2.5291, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 3.3958994260839213e-06, |
|
"loss": 2.5487, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 3.375365755998399e-06, |
|
"loss": 2.536, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 3.354873153253047e-06, |
|
"loss": 2.5518, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 3.3343394831675242e-06, |
|
"loss": 2.5262, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 3.313805813082002e-06, |
|
"loss": 2.544, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.2932721429964786e-06, |
|
"loss": 2.5418, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.272779540251127e-06, |
|
"loss": 2.5587, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.2522458701656044e-06, |
|
"loss": 2.5786, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.2317122000800816e-06, |
|
"loss": 2.5272, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.2111785299945588e-06, |
|
"loss": 2.556, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.190685927249207e-06, |
|
"loss": 2.5468, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 3.1701522571636845e-06, |
|
"loss": 2.5786, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 3.1496185870781613e-06, |
|
"loss": 2.5543, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 3.129084916992639e-06, |
|
"loss": 2.5549, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 3.108551246907116e-06, |
|
"loss": 2.5092, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 3.0880175768215937e-06, |
|
"loss": 2.5441, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 3.0674839067360705e-06, |
|
"loss": 2.5641, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 3.046950236650548e-06, |
|
"loss": 2.5564, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 3.0264576339051967e-06, |
|
"loss": 2.5076, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 3.0059239638196735e-06, |
|
"loss": 2.5449, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.9853902937341506e-06, |
|
"loss": 2.5608, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.9648566236486283e-06, |
|
"loss": 2.5437, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.9443640209032764e-06, |
|
"loss": 2.547, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.9238303508177536e-06, |
|
"loss": 2.5329, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.9032966807322312e-06, |
|
"loss": 2.5452, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.8828040779868794e-06, |
|
"loss": 2.5384, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.862270407901356e-06, |
|
"loss": 2.5517, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.8417367378158338e-06, |
|
"loss": 2.546, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 2.821203067730311e-06, |
|
"loss": 2.5606, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 2.8006693976447886e-06, |
|
"loss": 2.5544, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.7801357275592653e-06, |
|
"loss": 2.5416, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.7596020574737425e-06, |
|
"loss": 2.5364, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.73906838738822e-06, |
|
"loss": 2.541, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.7185757846428683e-06, |
|
"loss": 2.5646, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.6980421145573455e-06, |
|
"loss": 2.5587, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.677508444471823e-06, |
|
"loss": 2.5614, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 2.6569747743863e-06, |
|
"loss": 2.5268, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 2.6364821716409484e-06, |
|
"loss": 2.5468, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.615948501555426e-06, |
|
"loss": 2.5255, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.595414831469903e-06, |
|
"loss": 2.5398, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.5748811613843804e-06, |
|
"loss": 2.5561, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.5543885586390286e-06, |
|
"loss": 2.5396, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 2.533854888553506e-06, |
|
"loss": 2.5366, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 2.513362285808154e-06, |
|
"loss": 2.5748, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 2.4928286157226316e-06, |
|
"loss": 2.5557, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 2.4722949456371088e-06, |
|
"loss": 2.5178, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 2.451761275551586e-06, |
|
"loss": 2.557, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 2.431227605466063e-06, |
|
"loss": 2.5357, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.4106939353805403e-06, |
|
"loss": 2.5228, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.3901602652950175e-06, |
|
"loss": 2.5376, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 2.3696265952094947e-06, |
|
"loss": 2.5418, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 2.3491339924641433e-06, |
|
"loss": 2.5534, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 2.3286003223786205e-06, |
|
"loss": 2.5183, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 2.3080666522930977e-06, |
|
"loss": 2.5207, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 2.2875329822075753e-06, |
|
"loss": 2.5431, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 2.2670403794622234e-06, |
|
"loss": 2.5484, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 2.2465067093767006e-06, |
|
"loss": 2.5322, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 2.225973039291178e-06, |
|
"loss": 2.5424, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 2.2054393692056554e-06, |
|
"loss": 2.5249, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 2.184905699120132e-06, |
|
"loss": 2.532, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 2.164413096374781e-06, |
|
"loss": 2.5406, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 2.143879426289258e-06, |
|
"loss": 2.5651, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 2.123345756203735e-06, |
|
"loss": 2.5343, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 2.1028120861182124e-06, |
|
"loss": 2.5554, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 2.082319483372861e-06, |
|
"loss": 2.5519, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 2.061785813287338e-06, |
|
"loss": 2.5579, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 2.0412521432018153e-06, |
|
"loss": 2.5515, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2.0207184731162925e-06, |
|
"loss": 2.5662, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2.000225870370941e-06, |
|
"loss": 2.5343, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.9796922002854183e-06, |
|
"loss": 2.5598, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.9591585301998955e-06, |
|
"loss": 2.5431, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 1.9386248601143727e-06, |
|
"loss": 2.5741, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 1.9181322573690213e-06, |
|
"loss": 2.5596, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.8975985872834982e-06, |
|
"loss": 2.5372, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.8770649171979754e-06, |
|
"loss": 2.5461, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.8565312471124528e-06, |
|
"loss": 2.564, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.836038644367101e-06, |
|
"loss": 2.5384, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 1.8155049742815784e-06, |
|
"loss": 2.5388, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 1.7949713041960556e-06, |
|
"loss": 2.5461, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 1.774437634110533e-06, |
|
"loss": 2.5616, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 1.7539450313651811e-06, |
|
"loss": 2.5392, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 1.7334113612796585e-06, |
|
"loss": 2.5661, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 1.7128776911941357e-06, |
|
"loss": 2.5373, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 1.6923440211086131e-06, |
|
"loss": 2.5193, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 1.6718514183632615e-06, |
|
"loss": 2.517, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 1.6513177482777385e-06, |
|
"loss": 2.5266, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 1.6307840781922157e-06, |
|
"loss": 2.5429, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 1.610250408106693e-06, |
|
"loss": 2.5305, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 1.5897167380211703e-06, |
|
"loss": 2.5723, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 1.5692241352758186e-06, |
|
"loss": 2.5435, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 1.5486904651902958e-06, |
|
"loss": 2.5542, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 1.5281567951047732e-06, |
|
"loss": 2.5342, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 1.5076231250192504e-06, |
|
"loss": 2.5589, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 1.4871305222738988e-06, |
|
"loss": 2.5397, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 1.4665968521883762e-06, |
|
"loss": 2.5361, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 1.4460631821028534e-06, |
|
"loss": 2.5638, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 1.4255295120173304e-06, |
|
"loss": 2.532, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 1.4050369092719787e-06, |
|
"loss": 2.5609, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 1.384503239186456e-06, |
|
"loss": 2.5257, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 1.3639695691009333e-06, |
|
"loss": 2.5254, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 1.3434358990154105e-06, |
|
"loss": 2.5407, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 1.3229843636102299e-06, |
|
"loss": 2.5279, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 1.3024506935247073e-06, |
|
"loss": 2.5276, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 1.2819170234391844e-06, |
|
"loss": 2.5704, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 1.2613833533536618e-06, |
|
"loss": 2.5349, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 1.240849683268139e-06, |
|
"loss": 2.5449, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.2203160131826162e-06, |
|
"loss": 2.5408, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.1997823430970936e-06, |
|
"loss": 2.5165, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 1.1792486730115708e-06, |
|
"loss": 2.563, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 1.1587560702662192e-06, |
|
"loss": 2.5678, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 1.1382224001806964e-06, |
|
"loss": 2.5404, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 1.1176887300951736e-06, |
|
"loss": 2.5691, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 1.097155060009651e-06, |
|
"loss": 2.516, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 1.0766624572642991e-06, |
|
"loss": 2.5326, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 1.0561287871787765e-06, |
|
"loss": 2.5426, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 1.0355951170932537e-06, |
|
"loss": 2.51, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 1.0150614470077311e-06, |
|
"loss": 2.5328, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 9.945688442623793e-07, |
|
"loss": 2.5352, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 9.740351741768565e-07, |
|
"loss": 2.4885, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 9.535015040913338e-07, |
|
"loss": 2.5488, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 9.329678340058111e-07, |
|
"loss": 2.5455, |
|
"step": 464500 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 9.124752312604594e-07, |
|
"loss": 2.5433, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 8.919415611749367e-07, |
|
"loss": 2.5511, |
|
"step": 465500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 8.714078910894138e-07, |
|
"loss": 2.5286, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 8.508742210038911e-07, |
|
"loss": 2.5607, |
|
"step": 466500 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 8.303816182585395e-07, |
|
"loss": 2.5621, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 8.098479481730168e-07, |
|
"loss": 2.534, |
|
"step": 467500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 7.893142780874941e-07, |
|
"loss": 2.586, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 7.687806080019714e-07, |
|
"loss": 2.541, |
|
"step": 468500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 7.482469379164485e-07, |
|
"loss": 2.5375, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 7.277132678309258e-07, |
|
"loss": 2.5287, |
|
"step": 469500 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 7.071795977454031e-07, |
|
"loss": 2.5211, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 6.866869950000514e-07, |
|
"loss": 2.5658, |
|
"step": 470500 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 6.661533249145287e-07, |
|
"loss": 2.5567, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 6.456196548290058e-07, |
|
"loss": 2.535, |
|
"step": 471500 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 6.250859847434831e-07, |
|
"loss": 2.5405, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 6.045933819981315e-07, |
|
"loss": 2.5291, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 5.840597119126088e-07, |
|
"loss": 2.5359, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 5.635260418270861e-07, |
|
"loss": 2.5548, |
|
"step": 473500 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 5.429923717415633e-07, |
|
"loss": 2.5573, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 5.224587016560406e-07, |
|
"loss": 2.5402, |
|
"step": 474500 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 5.019250315705177e-07, |
|
"loss": 2.526, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 4.81391361484995e-07, |
|
"loss": 2.5451, |
|
"step": 475500 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 4.608576913994723e-07, |
|
"loss": 2.5159, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 4.403650886541206e-07, |
|
"loss": 2.5486, |
|
"step": 476500 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 4.198314185685979e-07, |
|
"loss": 2.5529, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 3.9929774848307514e-07, |
|
"loss": 2.5502, |
|
"step": 477500 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 3.7876407839755243e-07, |
|
"loss": 2.5564, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 3.582714756522007e-07, |
|
"loss": 2.5274, |
|
"step": 478500 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 3.37737805566678e-07, |
|
"loss": 2.5686, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 3.1720413548115524e-07, |
|
"loss": 2.552, |
|
"step": 479500 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 2.9671153273580356e-07, |
|
"loss": 2.546, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 2.7617786265028085e-07, |
|
"loss": 2.5279, |
|
"step": 480500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 2.556441925647581e-07, |
|
"loss": 2.5537, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 2.3511052247923537e-07, |
|
"loss": 2.5668, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 2.145768523937126e-07, |
|
"loss": 2.5783, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 1.9404318230818988e-07, |
|
"loss": 2.5368, |
|
"step": 482500 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 1.7350951222266712e-07, |
|
"loss": 2.5358, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 1.529758421371444e-07, |
|
"loss": 2.566, |
|
"step": 483500 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 1.3248323939179268e-07, |
|
"loss": 2.5619, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 1.1199063664644102e-07, |
|
"loss": 2.5566, |
|
"step": 484500 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 9.145696656091827e-08, |
|
"loss": 2.5638, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 7.092329647539553e-08, |
|
"loss": 2.5277, |
|
"step": 485500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 5.038962638987279e-08, |
|
"loss": 2.5254, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.985595630435006e-08, |
|
"loss": 2.5533, |
|
"step": 486500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 9.363353558998369e-09, |
|
"loss": 2.5433, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 14.198, |
|
"eval_loss": 2.459300994873047, |
|
"eval_rouge1": 33.0901, |
|
"eval_rouge2": 14.5749, |
|
"eval_rougeL": 30.4267, |
|
"eval_rougeLsum": 30.5438, |
|
"eval_runtime": 65.3559, |
|
"eval_samples_per_second": 15.301, |
|
"eval_steps_per_second": 1.913, |
|
"step": 487005 |
|
} |
|
], |
|
"max_steps": 487005, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.0373616546436547e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|