{ "best_metric": 2.459300994873047, "best_model_checkpoint": "results/models/t5-small-NewsRoom1/checkpoint-487005", "epoch": 5.0, "global_step": 487005, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.9979630599275163e-05, "loss": 3.5575, "step": 500 }, { "epoch": 0.01, "learning_rate": 1.995909692918964e-05, "loss": 3.2397, "step": 1000 }, { "epoch": 0.02, "learning_rate": 1.9938563259104117e-05, "loss": 3.2235, "step": 1500 }, { "epoch": 0.02, "learning_rate": 1.9918029589018594e-05, "loss": 3.1572, "step": 2000 }, { "epoch": 0.03, "learning_rate": 1.989749591893307e-05, "loss": 3.1626, "step": 2500 }, { "epoch": 0.03, "learning_rate": 1.987696224884755e-05, "loss": 3.1051, "step": 3000 }, { "epoch": 0.04, "learning_rate": 1.9856428578762026e-05, "loss": 3.1111, "step": 3500 }, { "epoch": 0.04, "learning_rate": 1.9835894908676503e-05, "loss": 3.0974, "step": 4000 }, { "epoch": 0.05, "learning_rate": 1.9815402305931152e-05, "loss": 3.0898, "step": 4500 }, { "epoch": 0.05, "learning_rate": 1.97949097031858e-05, "loss": 3.1047, "step": 5000 }, { "epoch": 0.06, "learning_rate": 1.9774376033100278e-05, "loss": 3.0322, "step": 5500 }, { "epoch": 0.06, "learning_rate": 1.9753842363014755e-05, "loss": 3.0404, "step": 6000 }, { "epoch": 0.07, "learning_rate": 1.9733308692929233e-05, "loss": 3.0298, "step": 6500 }, { "epoch": 0.07, "learning_rate": 1.971277502284371e-05, "loss": 3.0271, "step": 7000 }, { "epoch": 0.08, "learning_rate": 1.9692241352758187e-05, "loss": 3.015, "step": 7500 }, { "epoch": 0.08, "learning_rate": 1.9671707682672664e-05, "loss": 3.0293, "step": 8000 }, { "epoch": 0.09, "learning_rate": 1.965117401258714e-05, "loss": 2.9866, "step": 8500 }, { "epoch": 0.09, "learning_rate": 1.963068140984179e-05, "loss": 2.9875, "step": 9000 }, { "epoch": 0.1, "learning_rate": 1.9610147739756268e-05, "loss": 2.9972, "step": 9500 }, { "epoch": 0.1, "learning_rate": 1.9589614069670745e-05, "loss": 2.9819, "step": 10000 }, { "epoch": 0.11, "learning_rate": 1.9569080399585222e-05, "loss": 2.9796, "step": 10500 }, { "epoch": 0.11, "learning_rate": 1.9548628864180043e-05, "loss": 2.9545, "step": 11000 }, { "epoch": 0.12, "learning_rate": 1.952809519409452e-05, "loss": 2.95, "step": 11500 }, { "epoch": 0.12, "learning_rate": 1.9507561524008997e-05, "loss": 2.978, "step": 12000 }, { "epoch": 0.13, "learning_rate": 1.948702785392347e-05, "loss": 2.9684, "step": 12500 }, { "epoch": 0.13, "learning_rate": 1.9466494183837948e-05, "loss": 2.959, "step": 13000 }, { "epoch": 0.14, "learning_rate": 1.9445960513752425e-05, "loss": 2.968, "step": 13500 }, { "epoch": 0.14, "learning_rate": 1.9425426843666903e-05, "loss": 2.9581, "step": 14000 }, { "epoch": 0.15, "learning_rate": 1.940489317358138e-05, "loss": 2.9357, "step": 14500 }, { "epoch": 0.15, "learning_rate": 1.9384400570836032e-05, "loss": 2.9308, "step": 15000 }, { "epoch": 0.16, "learning_rate": 1.9363866900750506e-05, "loss": 2.9518, "step": 15500 }, { "epoch": 0.16, "learning_rate": 1.9343333230664983e-05, "loss": 2.9479, "step": 16000 }, { "epoch": 0.17, "learning_rate": 1.932279956057946e-05, "loss": 2.9299, "step": 16500 }, { "epoch": 0.17, "learning_rate": 1.930230695783411e-05, "loss": 2.9688, "step": 17000 }, { "epoch": 0.18, "learning_rate": 1.9281773287748587e-05, "loss": 2.9159, "step": 17500 }, { "epoch": 0.18, "learning_rate": 1.9261239617663067e-05, "loss": 2.9034, "step": 18000 }, { "epoch": 0.19, "learning_rate": 1.9240705947577544e-05, "loss": 2.9215, "step": 18500 }, { "epoch": 0.2, "learning_rate": 1.922021334483219e-05, "loss": 2.9202, "step": 19000 }, { "epoch": 0.2, "learning_rate": 1.9199679674746667e-05, "loss": 2.8881, "step": 19500 }, { "epoch": 0.21, "learning_rate": 1.9179146004661144e-05, "loss": 2.9043, "step": 20000 }, { "epoch": 0.21, "learning_rate": 1.915861233457562e-05, "loss": 2.8839, "step": 20500 }, { "epoch": 0.22, "learning_rate": 1.913811973183027e-05, "loss": 2.8857, "step": 21000 }, { "epoch": 0.22, "learning_rate": 1.9117586061744748e-05, "loss": 2.8636, "step": 21500 }, { "epoch": 0.23, "learning_rate": 1.9097052391659225e-05, "loss": 2.8856, "step": 22000 }, { "epoch": 0.23, "learning_rate": 1.9076559788913874e-05, "loss": 2.869, "step": 22500 }, { "epoch": 0.24, "learning_rate": 1.905602611882835e-05, "loss": 2.9347, "step": 23000 }, { "epoch": 0.24, "learning_rate": 1.9035492448742828e-05, "loss": 2.88, "step": 23500 }, { "epoch": 0.25, "learning_rate": 1.9014958778657305e-05, "loss": 2.8891, "step": 24000 }, { "epoch": 0.25, "learning_rate": 1.8994425108571783e-05, "loss": 2.8673, "step": 24500 }, { "epoch": 0.26, "learning_rate": 1.897393250582643e-05, "loss": 2.8301, "step": 25000 }, { "epoch": 0.26, "learning_rate": 1.895339883574091e-05, "loss": 2.8586, "step": 25500 }, { "epoch": 0.27, "learning_rate": 1.8932865165655386e-05, "loss": 2.8351, "step": 26000 }, { "epoch": 0.27, "learning_rate": 1.8912331495569863e-05, "loss": 2.8603, "step": 26500 }, { "epoch": 0.28, "learning_rate": 1.889179782548434e-05, "loss": 2.8528, "step": 27000 }, { "epoch": 0.28, "learning_rate": 1.8871264155398818e-05, "loss": 2.8671, "step": 27500 }, { "epoch": 0.29, "learning_rate": 1.8850730485313295e-05, "loss": 2.8824, "step": 28000 }, { "epoch": 0.29, "learning_rate": 1.8830196815227772e-05, "loss": 2.8712, "step": 28500 }, { "epoch": 0.3, "learning_rate": 1.880970421248242e-05, "loss": 2.8514, "step": 29000 }, { "epoch": 0.3, "learning_rate": 1.8789170542396898e-05, "loss": 2.8822, "step": 29500 }, { "epoch": 0.31, "learning_rate": 1.8768636872311375e-05, "loss": 2.868, "step": 30000 }, { "epoch": 0.31, "learning_rate": 1.8748103202225853e-05, "loss": 2.8742, "step": 30500 }, { "epoch": 0.32, "learning_rate": 1.8727610599480498e-05, "loss": 2.8513, "step": 31000 }, { "epoch": 0.32, "learning_rate": 1.8707076929394975e-05, "loss": 2.8506, "step": 31500 }, { "epoch": 0.33, "learning_rate": 1.8686543259309453e-05, "loss": 2.8392, "step": 32000 }, { "epoch": 0.33, "learning_rate": 1.8666009589223933e-05, "loss": 2.8767, "step": 32500 }, { "epoch": 0.34, "learning_rate": 1.864551698647858e-05, "loss": 2.8435, "step": 33000 }, { "epoch": 0.34, "learning_rate": 1.8625024383733228e-05, "loss": 2.8289, "step": 33500 }, { "epoch": 0.35, "learning_rate": 1.8604490713647705e-05, "loss": 2.8443, "step": 34000 }, { "epoch": 0.35, "learning_rate": 1.8583957043562182e-05, "loss": 2.8266, "step": 34500 }, { "epoch": 0.36, "learning_rate": 1.856342337347666e-05, "loss": 2.857, "step": 35000 }, { "epoch": 0.36, "learning_rate": 1.8542889703391136e-05, "loss": 2.8371, "step": 35500 }, { "epoch": 0.37, "learning_rate": 1.8522356033305614e-05, "loss": 2.8395, "step": 36000 }, { "epoch": 0.37, "learning_rate": 1.850182236322009e-05, "loss": 2.8286, "step": 36500 }, { "epoch": 0.38, "learning_rate": 1.8481288693134568e-05, "loss": 2.8074, "step": 37000 }, { "epoch": 0.39, "learning_rate": 1.8460796090389217e-05, "loss": 2.8464, "step": 37500 }, { "epoch": 0.39, "learning_rate": 1.8440262420303694e-05, "loss": 2.8489, "step": 38000 }, { "epoch": 0.4, "learning_rate": 1.841972875021817e-05, "loss": 2.8259, "step": 38500 }, { "epoch": 0.4, "learning_rate": 1.839919508013265e-05, "loss": 2.8229, "step": 39000 }, { "epoch": 0.41, "learning_rate": 1.8378702477387298e-05, "loss": 2.7956, "step": 39500 }, { "epoch": 0.41, "learning_rate": 1.8358209874641947e-05, "loss": 2.837, "step": 40000 }, { "epoch": 0.42, "learning_rate": 1.8337676204556424e-05, "loss": 2.8233, "step": 40500 }, { "epoch": 0.42, "learning_rate": 1.83171425344709e-05, "loss": 2.8293, "step": 41000 }, { "epoch": 0.43, "learning_rate": 1.8296608864385378e-05, "loss": 2.8451, "step": 41500 }, { "epoch": 0.43, "learning_rate": 1.8276075194299855e-05, "loss": 2.8166, "step": 42000 }, { "epoch": 0.44, "learning_rate": 1.8255541524214333e-05, "loss": 2.8388, "step": 42500 }, { "epoch": 0.44, "learning_rate": 1.823500785412881e-05, "loss": 2.8199, "step": 43000 }, { "epoch": 0.45, "learning_rate": 1.8214474184043287e-05, "loss": 2.829, "step": 43500 }, { "epoch": 0.45, "learning_rate": 1.8193981581297936e-05, "loss": 2.8076, "step": 44000 }, { "epoch": 0.46, "learning_rate": 1.8173447911212413e-05, "loss": 2.8422, "step": 44500 }, { "epoch": 0.46, "learning_rate": 1.815291424112689e-05, "loss": 2.8232, "step": 45000 }, { "epoch": 0.47, "learning_rate": 1.8132380571041364e-05, "loss": 2.8369, "step": 45500 }, { "epoch": 0.47, "learning_rate": 1.8111887968296017e-05, "loss": 2.8246, "step": 46000 }, { "epoch": 0.48, "learning_rate": 1.8091354298210494e-05, "loss": 2.8111, "step": 46500 }, { "epoch": 0.48, "learning_rate": 1.807082062812497e-05, "loss": 2.7974, "step": 47000 }, { "epoch": 0.49, "learning_rate": 1.8050286958039448e-05, "loss": 2.8035, "step": 47500 }, { "epoch": 0.49, "learning_rate": 1.8029794355294094e-05, "loss": 2.7829, "step": 48000 }, { "epoch": 0.5, "learning_rate": 1.800926068520857e-05, "loss": 2.8093, "step": 48500 }, { "epoch": 0.5, "learning_rate": 1.7988727015123048e-05, "loss": 2.7819, "step": 49000 }, { "epoch": 0.51, "learning_rate": 1.7968193345037525e-05, "loss": 2.8044, "step": 49500 }, { "epoch": 0.51, "learning_rate": 1.7947659674952006e-05, "loss": 2.7933, "step": 50000 }, { "epoch": 0.52, "learning_rate": 1.792716707220665e-05, "loss": 2.8061, "step": 50500 }, { "epoch": 0.52, "learning_rate": 1.790663340212113e-05, "loss": 2.8124, "step": 51000 }, { "epoch": 0.53, "learning_rate": 1.7886099732035606e-05, "loss": 2.7989, "step": 51500 }, { "epoch": 0.53, "learning_rate": 1.7865566061950083e-05, "loss": 2.7809, "step": 52000 }, { "epoch": 0.54, "learning_rate": 1.7845073459204732e-05, "loss": 2.7934, "step": 52500 }, { "epoch": 0.54, "learning_rate": 1.782453978911921e-05, "loss": 2.7905, "step": 53000 }, { "epoch": 0.55, "learning_rate": 1.7804006119033686e-05, "loss": 2.7978, "step": 53500 }, { "epoch": 0.55, "learning_rate": 1.7783472448948164e-05, "loss": 2.7977, "step": 54000 }, { "epoch": 0.56, "learning_rate": 1.7762979846202813e-05, "loss": 2.7847, "step": 54500 }, { "epoch": 0.56, "learning_rate": 1.774244617611729e-05, "loss": 2.8054, "step": 55000 }, { "epoch": 0.57, "learning_rate": 1.7721912506031767e-05, "loss": 2.7806, "step": 55500 }, { "epoch": 0.57, "learning_rate": 1.7701378835946244e-05, "loss": 2.788, "step": 56000 }, { "epoch": 0.58, "learning_rate": 1.7680886233200893e-05, "loss": 2.7815, "step": 56500 }, { "epoch": 0.59, "learning_rate": 1.766035256311537e-05, "loss": 2.7515, "step": 57000 }, { "epoch": 0.59, "learning_rate": 1.7639818893029848e-05, "loss": 2.7862, "step": 57500 }, { "epoch": 0.6, "learning_rate": 1.7619285222944325e-05, "loss": 2.7743, "step": 58000 }, { "epoch": 0.6, "learning_rate": 1.7598833687539142e-05, "loss": 2.7668, "step": 58500 }, { "epoch": 0.61, "learning_rate": 1.757830001745362e-05, "loss": 2.8068, "step": 59000 }, { "epoch": 0.61, "learning_rate": 1.75577663473681e-05, "loss": 2.7708, "step": 59500 }, { "epoch": 0.62, "learning_rate": 1.7537232677282577e-05, "loss": 2.7488, "step": 60000 }, { "epoch": 0.62, "learning_rate": 1.7516699007197054e-05, "loss": 2.8045, "step": 60500 }, { "epoch": 0.63, "learning_rate": 1.749616533711153e-05, "loss": 2.7621, "step": 61000 }, { "epoch": 0.63, "learning_rate": 1.747563166702601e-05, "loss": 2.7739, "step": 61500 }, { "epoch": 0.64, "learning_rate": 1.7455097996940486e-05, "loss": 2.7787, "step": 62000 }, { "epoch": 0.64, "learning_rate": 1.743460539419513e-05, "loss": 2.7455, "step": 62500 }, { "epoch": 0.65, "learning_rate": 1.741407172410961e-05, "loss": 2.7489, "step": 63000 }, { "epoch": 0.65, "learning_rate": 1.739353805402409e-05, "loss": 2.7444, "step": 63500 }, { "epoch": 0.66, "learning_rate": 1.7373045451278735e-05, "loss": 2.7777, "step": 64000 }, { "epoch": 0.66, "learning_rate": 1.7352511781193212e-05, "loss": 2.7936, "step": 64500 }, { "epoch": 0.67, "learning_rate": 1.733197811110769e-05, "loss": 2.7491, "step": 65000 }, { "epoch": 0.67, "learning_rate": 1.7311444441022167e-05, "loss": 2.7712, "step": 65500 }, { "epoch": 0.68, "learning_rate": 1.7290910770936644e-05, "loss": 2.7617, "step": 66000 }, { "epoch": 0.68, "learning_rate": 1.727037710085112e-05, "loss": 2.7422, "step": 66500 }, { "epoch": 0.69, "learning_rate": 1.7249843430765598e-05, "loss": 2.7406, "step": 67000 }, { "epoch": 0.69, "learning_rate": 1.722930976068008e-05, "loss": 2.7828, "step": 67500 }, { "epoch": 0.7, "learning_rate": 1.7208817157934724e-05, "loss": 2.7263, "step": 68000 }, { "epoch": 0.7, "learning_rate": 1.71882834878492e-05, "loss": 2.7545, "step": 68500 }, { "epoch": 0.71, "learning_rate": 1.716774981776368e-05, "loss": 2.7511, "step": 69000 }, { "epoch": 0.71, "learning_rate": 1.7147257215018328e-05, "loss": 2.7539, "step": 69500 }, { "epoch": 0.72, "learning_rate": 1.7126723544932805e-05, "loss": 2.7397, "step": 70000 }, { "epoch": 0.72, "learning_rate": 1.7106189874847282e-05, "loss": 2.7778, "step": 70500 }, { "epoch": 0.73, "learning_rate": 1.708565620476176e-05, "loss": 2.7383, "step": 71000 }, { "epoch": 0.73, "learning_rate": 1.7065122534676236e-05, "loss": 2.7689, "step": 71500 }, { "epoch": 0.74, "learning_rate": 1.7044588864590714e-05, "loss": 2.7666, "step": 72000 }, { "epoch": 0.74, "learning_rate": 1.702405519450519e-05, "loss": 2.7912, "step": 72500 }, { "epoch": 0.75, "learning_rate": 1.7003521524419668e-05, "loss": 2.7468, "step": 73000 }, { "epoch": 0.75, "learning_rate": 1.698306998901449e-05, "loss": 2.7189, "step": 73500 }, { "epoch": 0.76, "learning_rate": 1.6962536318928966e-05, "loss": 2.7461, "step": 74000 }, { "epoch": 0.76, "learning_rate": 1.6942002648843443e-05, "loss": 2.7588, "step": 74500 }, { "epoch": 0.77, "learning_rate": 1.692146897875792e-05, "loss": 2.7358, "step": 75000 }, { "epoch": 0.78, "learning_rate": 1.6900935308672398e-05, "loss": 2.7604, "step": 75500 }, { "epoch": 0.78, "learning_rate": 1.6880401638586875e-05, "loss": 2.7661, "step": 76000 }, { "epoch": 0.79, "learning_rate": 1.6859867968501352e-05, "loss": 2.75, "step": 76500 }, { "epoch": 0.79, "learning_rate": 1.683933429841583e-05, "loss": 2.7526, "step": 77000 }, { "epoch": 0.8, "learning_rate": 1.6818800628330306e-05, "loss": 2.7351, "step": 77500 }, { "epoch": 0.8, "learning_rate": 1.6798308025584955e-05, "loss": 2.7663, "step": 78000 }, { "epoch": 0.81, "learning_rate": 1.6777774355499433e-05, "loss": 2.7209, "step": 78500 }, { "epoch": 0.81, "learning_rate": 1.675724068541391e-05, "loss": 2.762, "step": 79000 }, { "epoch": 0.82, "learning_rate": 1.6736707015328387e-05, "loss": 2.7819, "step": 79500 }, { "epoch": 0.82, "learning_rate": 1.6716214412583033e-05, "loss": 2.7384, "step": 80000 }, { "epoch": 0.83, "learning_rate": 1.669568074249751e-05, "loss": 2.7595, "step": 80500 }, { "epoch": 0.83, "learning_rate": 1.6675147072411987e-05, "loss": 2.7407, "step": 81000 }, { "epoch": 0.84, "learning_rate": 1.6654613402326468e-05, "loss": 2.7268, "step": 81500 }, { "epoch": 0.84, "learning_rate": 1.6634120799581117e-05, "loss": 2.7237, "step": 82000 }, { "epoch": 0.85, "learning_rate": 1.661358712949559e-05, "loss": 2.7743, "step": 82500 }, { "epoch": 0.85, "learning_rate": 1.6593053459410068e-05, "loss": 2.7322, "step": 83000 }, { "epoch": 0.86, "learning_rate": 1.6572519789324545e-05, "loss": 2.7411, "step": 83500 }, { "epoch": 0.86, "learning_rate": 1.6552027186579194e-05, "loss": 2.779, "step": 84000 }, { "epoch": 0.87, "learning_rate": 1.6531493516493674e-05, "loss": 2.7452, "step": 84500 }, { "epoch": 0.87, "learning_rate": 1.651095984640815e-05, "loss": 2.7376, "step": 85000 }, { "epoch": 0.88, "learning_rate": 1.6490426176322625e-05, "loss": 2.7268, "step": 85500 }, { "epoch": 0.88, "learning_rate": 1.6469933573577274e-05, "loss": 2.7354, "step": 86000 }, { "epoch": 0.89, "learning_rate": 1.644939990349175e-05, "loss": 2.7492, "step": 86500 }, { "epoch": 0.89, "learning_rate": 1.642886623340623e-05, "loss": 2.7243, "step": 87000 }, { "epoch": 0.9, "learning_rate": 1.6408332563320706e-05, "loss": 2.7543, "step": 87500 }, { "epoch": 0.9, "learning_rate": 1.6387839960575355e-05, "loss": 2.7325, "step": 88000 }, { "epoch": 0.91, "learning_rate": 1.6367306290489832e-05, "loss": 2.7412, "step": 88500 }, { "epoch": 0.91, "learning_rate": 1.634677262040431e-05, "loss": 2.7478, "step": 89000 }, { "epoch": 0.92, "learning_rate": 1.6326238950318786e-05, "loss": 2.7506, "step": 89500 }, { "epoch": 0.92, "learning_rate": 1.6305746347573435e-05, "loss": 2.7484, "step": 90000 }, { "epoch": 0.93, "learning_rate": 1.6285212677487913e-05, "loss": 2.7401, "step": 90500 }, { "epoch": 0.93, "learning_rate": 1.626467900740239e-05, "loss": 2.7286, "step": 91000 }, { "epoch": 0.94, "learning_rate": 1.6244145337316867e-05, "loss": 2.7509, "step": 91500 }, { "epoch": 0.94, "learning_rate": 1.6223652734571516e-05, "loss": 2.7376, "step": 92000 }, { "epoch": 0.95, "learning_rate": 1.6203119064485993e-05, "loss": 2.709, "step": 92500 }, { "epoch": 0.95, "learning_rate": 1.618258539440047e-05, "loss": 2.723, "step": 93000 }, { "epoch": 0.96, "learning_rate": 1.6162051724314948e-05, "loss": 2.7194, "step": 93500 }, { "epoch": 0.97, "learning_rate": 1.6141559121569593e-05, "loss": 2.7385, "step": 94000 }, { "epoch": 0.97, "learning_rate": 1.612102545148407e-05, "loss": 2.6993, "step": 94500 }, { "epoch": 0.98, "learning_rate": 1.610049178139855e-05, "loss": 2.7298, "step": 95000 }, { "epoch": 0.98, "learning_rate": 1.6079958111313028e-05, "loss": 2.718, "step": 95500 }, { "epoch": 0.99, "learning_rate": 1.6059465508567674e-05, "loss": 2.752, "step": 96000 }, { "epoch": 0.99, "learning_rate": 1.603893183848215e-05, "loss": 2.7448, "step": 96500 }, { "epoch": 1.0, "learning_rate": 1.6018398168396628e-05, "loss": 2.724, "step": 97000 }, { "epoch": 1.0, "eval_gen_len": 14.378, "eval_loss": 2.594017744064331, "eval_rouge1": 32.3136, "eval_rouge2": 13.6987, "eval_rougeL": 29.7482, "eval_rougeLsum": 29.7976, "eval_runtime": 64.9599, "eval_samples_per_second": 15.394, "eval_steps_per_second": 1.924, "step": 97401 }, { "epoch": 1.0, "learning_rate": 1.5997864498311105e-05, "loss": 2.7119, "step": 97500 }, { "epoch": 1.01, "learning_rate": 1.5977371895565754e-05, "loss": 2.7325, "step": 98000 }, { "epoch": 1.01, "learning_rate": 1.5956838225480235e-05, "loss": 2.6739, "step": 98500 }, { "epoch": 1.02, "learning_rate": 1.593630455539471e-05, "loss": 2.6834, "step": 99000 }, { "epoch": 1.02, "learning_rate": 1.5915770885309186e-05, "loss": 2.7296, "step": 99500 }, { "epoch": 1.03, "learning_rate": 1.5895278282563835e-05, "loss": 2.723, "step": 100000 }, { "epoch": 1.03, "learning_rate": 1.5874744612478312e-05, "loss": 2.6704, "step": 100500 }, { "epoch": 1.04, "learning_rate": 1.585421094239279e-05, "loss": 2.6961, "step": 101000 }, { "epoch": 1.04, "learning_rate": 1.5833677272307267e-05, "loss": 2.7187, "step": 101500 }, { "epoch": 1.05, "learning_rate": 1.5813184669561916e-05, "loss": 2.6922, "step": 102000 }, { "epoch": 1.05, "learning_rate": 1.5792650999476393e-05, "loss": 2.7033, "step": 102500 }, { "epoch": 1.06, "learning_rate": 1.577211732939087e-05, "loss": 2.7269, "step": 103000 }, { "epoch": 1.06, "learning_rate": 1.5751583659305347e-05, "loss": 2.6851, "step": 103500 }, { "epoch": 1.07, "learning_rate": 1.5731049989219824e-05, "loss": 2.6726, "step": 104000 }, { "epoch": 1.07, "learning_rate": 1.5710598453814642e-05, "loss": 2.7191, "step": 104500 }, { "epoch": 1.08, "learning_rate": 1.5690064783729122e-05, "loss": 2.706, "step": 105000 }, { "epoch": 1.08, "learning_rate": 1.56695311136436e-05, "loss": 2.6976, "step": 105500 }, { "epoch": 1.09, "learning_rate": 1.5648997443558077e-05, "loss": 2.7006, "step": 106000 }, { "epoch": 1.09, "learning_rate": 1.5628463773472554e-05, "loss": 2.6924, "step": 106500 }, { "epoch": 1.1, "learning_rate": 1.560793010338703e-05, "loss": 2.6632, "step": 107000 }, { "epoch": 1.1, "learning_rate": 1.5587396433301508e-05, "loss": 2.7045, "step": 107500 }, { "epoch": 1.11, "learning_rate": 1.5566862763215985e-05, "loss": 2.6816, "step": 108000 }, { "epoch": 1.11, "learning_rate": 1.5546370160470634e-05, "loss": 2.6965, "step": 108500 }, { "epoch": 1.12, "learning_rate": 1.552583649038511e-05, "loss": 2.6934, "step": 109000 }, { "epoch": 1.12, "learning_rate": 1.550530282029959e-05, "loss": 2.7123, "step": 109500 }, { "epoch": 1.13, "learning_rate": 1.5484769150214066e-05, "loss": 2.6684, "step": 110000 }, { "epoch": 1.13, "learning_rate": 1.546427654746871e-05, "loss": 2.6811, "step": 110500 }, { "epoch": 1.14, "learning_rate": 1.544374287738319e-05, "loss": 2.6575, "step": 111000 }, { "epoch": 1.14, "learning_rate": 1.5423209207297666e-05, "loss": 2.702, "step": 111500 }, { "epoch": 1.15, "learning_rate": 1.5402675537212143e-05, "loss": 2.7073, "step": 112000 }, { "epoch": 1.16, "learning_rate": 1.5382182934466792e-05, "loss": 2.6536, "step": 112500 }, { "epoch": 1.16, "learning_rate": 1.536164926438127e-05, "loss": 2.6625, "step": 113000 }, { "epoch": 1.17, "learning_rate": 1.5341115594295747e-05, "loss": 2.7035, "step": 113500 }, { "epoch": 1.17, "learning_rate": 1.5320622991550396e-05, "loss": 2.6826, "step": 114000 }, { "epoch": 1.18, "learning_rate": 1.5300089321464873e-05, "loss": 2.7117, "step": 114500 }, { "epoch": 1.18, "learning_rate": 1.527955565137935e-05, "loss": 2.6746, "step": 115000 }, { "epoch": 1.19, "learning_rate": 1.5259021981293827e-05, "loss": 2.7035, "step": 115500 }, { "epoch": 1.19, "learning_rate": 1.5238529378548476e-05, "loss": 2.6901, "step": 116000 }, { "epoch": 1.2, "learning_rate": 1.5217995708462953e-05, "loss": 2.6582, "step": 116500 }, { "epoch": 1.2, "learning_rate": 1.519746203837743e-05, "loss": 2.6899, "step": 117000 }, { "epoch": 1.21, "learning_rate": 1.5176928368291908e-05, "loss": 2.677, "step": 117500 }, { "epoch": 1.21, "learning_rate": 1.5156394698206385e-05, "loss": 2.7053, "step": 118000 }, { "epoch": 1.22, "learning_rate": 1.5135861028120862e-05, "loss": 2.6964, "step": 118500 }, { "epoch": 1.22, "learning_rate": 1.511532735803534e-05, "loss": 2.6845, "step": 119000 }, { "epoch": 1.23, "learning_rate": 1.5094793687949818e-05, "loss": 2.687, "step": 119500 }, { "epoch": 1.23, "learning_rate": 1.5074260017864295e-05, "loss": 2.6311, "step": 120000 }, { "epoch": 1.24, "learning_rate": 1.5053726347778773e-05, "loss": 2.6907, "step": 120500 }, { "epoch": 1.24, "learning_rate": 1.503323374503342e-05, "loss": 2.6775, "step": 121000 }, { "epoch": 1.25, "learning_rate": 1.5012700074947897e-05, "loss": 2.6635, "step": 121500 }, { "epoch": 1.25, "learning_rate": 1.4992166404862374e-05, "loss": 2.6797, "step": 122000 }, { "epoch": 1.26, "learning_rate": 1.4971632734776851e-05, "loss": 2.6788, "step": 122500 }, { "epoch": 1.26, "learning_rate": 1.4951099064691329e-05, "loss": 2.6853, "step": 123000 }, { "epoch": 1.27, "learning_rate": 1.4930565394605808e-05, "loss": 2.6984, "step": 123500 }, { "epoch": 1.27, "learning_rate": 1.4910031724520285e-05, "loss": 2.7143, "step": 124000 }, { "epoch": 1.28, "learning_rate": 1.4889498054434762e-05, "loss": 2.6955, "step": 124500 }, { "epoch": 1.28, "learning_rate": 1.486900545168941e-05, "loss": 2.6878, "step": 125000 }, { "epoch": 1.29, "learning_rate": 1.4848471781603886e-05, "loss": 2.6728, "step": 125500 }, { "epoch": 1.29, "learning_rate": 1.4827938111518364e-05, "loss": 2.6604, "step": 126000 }, { "epoch": 1.3, "learning_rate": 1.4807404441432839e-05, "loss": 2.6752, "step": 126500 }, { "epoch": 1.3, "learning_rate": 1.478691183868749e-05, "loss": 2.6586, "step": 127000 }, { "epoch": 1.31, "learning_rate": 1.4766378168601967e-05, "loss": 2.6779, "step": 127500 }, { "epoch": 1.31, "learning_rate": 1.4745885565856614e-05, "loss": 2.6706, "step": 128000 }, { "epoch": 1.32, "learning_rate": 1.4725351895771092e-05, "loss": 2.6445, "step": 128500 }, { "epoch": 1.32, "learning_rate": 1.4704818225685569e-05, "loss": 2.6967, "step": 129000 }, { "epoch": 1.33, "learning_rate": 1.4684284555600046e-05, "loss": 2.657, "step": 129500 }, { "epoch": 1.33, "learning_rate": 1.4663750885514523e-05, "loss": 2.6834, "step": 130000 }, { "epoch": 1.34, "learning_rate": 1.4643258282769172e-05, "loss": 2.6453, "step": 130500 }, { "epoch": 1.34, "learning_rate": 1.462272461268365e-05, "loss": 2.6937, "step": 131000 }, { "epoch": 1.35, "learning_rate": 1.4602190942598126e-05, "loss": 2.6599, "step": 131500 }, { "epoch": 1.36, "learning_rate": 1.4581657272512604e-05, "loss": 2.6725, "step": 132000 }, { "epoch": 1.36, "learning_rate": 1.4561123602427081e-05, "loss": 2.6797, "step": 132500 }, { "epoch": 1.37, "learning_rate": 1.4540589932341558e-05, "loss": 2.6563, "step": 133000 }, { "epoch": 1.37, "learning_rate": 1.4520056262256035e-05, "loss": 2.6849, "step": 133500 }, { "epoch": 1.38, "learning_rate": 1.4499522592170512e-05, "loss": 2.6705, "step": 134000 }, { "epoch": 1.38, "learning_rate": 1.4479029989425161e-05, "loss": 2.6557, "step": 134500 }, { "epoch": 1.39, "learning_rate": 1.4458496319339639e-05, "loss": 2.7131, "step": 135000 }, { "epoch": 1.39, "learning_rate": 1.4437962649254116e-05, "loss": 2.6721, "step": 135500 }, { "epoch": 1.4, "learning_rate": 1.4417428979168593e-05, "loss": 2.6652, "step": 136000 }, { "epoch": 1.4, "learning_rate": 1.439693637642324e-05, "loss": 2.6549, "step": 136500 }, { "epoch": 1.41, "learning_rate": 1.4376443773677891e-05, "loss": 2.6765, "step": 137000 }, { "epoch": 1.41, "learning_rate": 1.4355910103592368e-05, "loss": 2.6764, "step": 137500 }, { "epoch": 1.42, "learning_rate": 1.4335376433506845e-05, "loss": 2.6518, "step": 138000 }, { "epoch": 1.42, "learning_rate": 1.4314842763421321e-05, "loss": 2.7014, "step": 138500 }, { "epoch": 1.43, "learning_rate": 1.4294309093335798e-05, "loss": 2.6446, "step": 139000 }, { "epoch": 1.43, "learning_rate": 1.4273775423250275e-05, "loss": 2.6551, "step": 139500 }, { "epoch": 1.44, "learning_rate": 1.4253241753164752e-05, "loss": 2.6676, "step": 140000 }, { "epoch": 1.44, "learning_rate": 1.423270808307923e-05, "loss": 2.6614, "step": 140500 }, { "epoch": 1.45, "learning_rate": 1.421221548033388e-05, "loss": 2.6662, "step": 141000 }, { "epoch": 1.45, "learning_rate": 1.4191681810248356e-05, "loss": 2.6525, "step": 141500 }, { "epoch": 1.46, "learning_rate": 1.4171189207503005e-05, "loss": 2.6505, "step": 142000 }, { "epoch": 1.46, "learning_rate": 1.4150655537417482e-05, "loss": 2.7146, "step": 142500 }, { "epoch": 1.47, "learning_rate": 1.413012186733196e-05, "loss": 2.6776, "step": 143000 }, { "epoch": 1.47, "learning_rate": 1.4109588197246435e-05, "loss": 2.6868, "step": 143500 }, { "epoch": 1.48, "learning_rate": 1.4089054527160912e-05, "loss": 2.6372, "step": 144000 }, { "epoch": 1.48, "learning_rate": 1.4068520857075389e-05, "loss": 2.6477, "step": 144500 }, { "epoch": 1.49, "learning_rate": 1.4047987186989868e-05, "loss": 2.6651, "step": 145000 }, { "epoch": 1.49, "learning_rate": 1.4027453516904345e-05, "loss": 2.6542, "step": 145500 }, { "epoch": 1.5, "learning_rate": 1.4006960914158994e-05, "loss": 2.6695, "step": 146000 }, { "epoch": 1.5, "learning_rate": 1.398642724407347e-05, "loss": 2.6435, "step": 146500 }, { "epoch": 1.51, "learning_rate": 1.3965893573987947e-05, "loss": 2.6432, "step": 147000 }, { "epoch": 1.51, "learning_rate": 1.3945400971242596e-05, "loss": 2.6672, "step": 147500 }, { "epoch": 1.52, "learning_rate": 1.3924867301157075e-05, "loss": 2.6579, "step": 148000 }, { "epoch": 1.52, "learning_rate": 1.3904333631071552e-05, "loss": 2.6796, "step": 148500 }, { "epoch": 1.53, "learning_rate": 1.388379996098603e-05, "loss": 2.6714, "step": 149000 }, { "epoch": 1.53, "learning_rate": 1.3863266290900506e-05, "loss": 2.6398, "step": 149500 }, { "epoch": 1.54, "learning_rate": 1.3842732620814982e-05, "loss": 2.7002, "step": 150000 }, { "epoch": 1.55, "learning_rate": 1.3822240018069631e-05, "loss": 2.6604, "step": 150500 }, { "epoch": 1.55, "learning_rate": 1.3801706347984108e-05, "loss": 2.6441, "step": 151000 }, { "epoch": 1.56, "learning_rate": 1.3781172677898584e-05, "loss": 2.6678, "step": 151500 }, { "epoch": 1.56, "learning_rate": 1.3760639007813064e-05, "loss": 2.6336, "step": 152000 }, { "epoch": 1.57, "learning_rate": 1.3740105337727541e-05, "loss": 2.675, "step": 152500 }, { "epoch": 1.57, "learning_rate": 1.3719571667642017e-05, "loss": 2.642, "step": 153000 }, { "epoch": 1.58, "learning_rate": 1.3699037997556494e-05, "loss": 2.6444, "step": 153500 }, { "epoch": 1.58, "learning_rate": 1.3678504327470971e-05, "loss": 2.6615, "step": 154000 }, { "epoch": 1.59, "learning_rate": 1.365801172472562e-05, "loss": 2.6338, "step": 154500 }, { "epoch": 1.59, "learning_rate": 1.3637478054640096e-05, "loss": 2.6943, "step": 155000 }, { "epoch": 1.6, "learning_rate": 1.3616944384554573e-05, "loss": 2.6665, "step": 155500 }, { "epoch": 1.6, "learning_rate": 1.3596410714469053e-05, "loss": 2.6514, "step": 156000 }, { "epoch": 1.61, "learning_rate": 1.35759181117237e-05, "loss": 2.6531, "step": 156500 }, { "epoch": 1.61, "learning_rate": 1.3555384441638178e-05, "loss": 2.6352, "step": 157000 }, { "epoch": 1.62, "learning_rate": 1.3534850771552655e-05, "loss": 2.6602, "step": 157500 }, { "epoch": 1.62, "learning_rate": 1.351431710146713e-05, "loss": 2.642, "step": 158000 }, { "epoch": 1.63, "learning_rate": 1.349382449872178e-05, "loss": 2.6788, "step": 158500 }, { "epoch": 1.63, "learning_rate": 1.3473290828636259e-05, "loss": 2.6793, "step": 159000 }, { "epoch": 1.64, "learning_rate": 1.3452757158550736e-05, "loss": 2.6424, "step": 159500 }, { "epoch": 1.64, "learning_rate": 1.3432223488465213e-05, "loss": 2.7017, "step": 160000 }, { "epoch": 1.65, "learning_rate": 1.341173088571986e-05, "loss": 2.6175, "step": 160500 }, { "epoch": 1.65, "learning_rate": 1.3391197215634337e-05, "loss": 2.6276, "step": 161000 }, { "epoch": 1.66, "learning_rate": 1.3370663545548815e-05, "loss": 2.6621, "step": 161500 }, { "epoch": 1.66, "learning_rate": 1.3350170942803462e-05, "loss": 2.6435, "step": 162000 }, { "epoch": 1.67, "learning_rate": 1.332963727271794e-05, "loss": 2.6881, "step": 162500 }, { "epoch": 1.67, "learning_rate": 1.3309103602632418e-05, "loss": 2.6399, "step": 163000 }, { "epoch": 1.68, "learning_rate": 1.3288569932546895e-05, "loss": 2.6619, "step": 163500 }, { "epoch": 1.68, "learning_rate": 1.3268036262461372e-05, "loss": 2.6493, "step": 164000 }, { "epoch": 1.69, "learning_rate": 1.324750259237585e-05, "loss": 2.6573, "step": 164500 }, { "epoch": 1.69, "learning_rate": 1.3226968922290327e-05, "loss": 2.668, "step": 165000 }, { "epoch": 1.7, "learning_rate": 1.3206435252204804e-05, "loss": 2.6345, "step": 165500 }, { "epoch": 1.7, "learning_rate": 1.3185942649459451e-05, "loss": 2.665, "step": 166000 }, { "epoch": 1.71, "learning_rate": 1.316540897937393e-05, "loss": 2.6491, "step": 166500 }, { "epoch": 1.71, "learning_rate": 1.3144875309288407e-05, "loss": 2.6337, "step": 167000 }, { "epoch": 1.72, "learning_rate": 1.3124341639202885e-05, "loss": 2.6246, "step": 167500 }, { "epoch": 1.72, "learning_rate": 1.3103807969117362e-05, "loss": 2.6633, "step": 168000 }, { "epoch": 1.73, "learning_rate": 1.3083274299031839e-05, "loss": 2.6581, "step": 168500 }, { "epoch": 1.74, "learning_rate": 1.3062740628946316e-05, "loss": 2.6849, "step": 169000 }, { "epoch": 1.74, "learning_rate": 1.3042206958860793e-05, "loss": 2.647, "step": 169500 }, { "epoch": 1.75, "learning_rate": 1.3021714356115442e-05, "loss": 2.6492, "step": 170000 }, { "epoch": 1.75, "learning_rate": 1.300118068602992e-05, "loss": 2.6167, "step": 170500 }, { "epoch": 1.76, "learning_rate": 1.2980647015944397e-05, "loss": 2.6717, "step": 171000 }, { "epoch": 1.76, "learning_rate": 1.2960113345858874e-05, "loss": 2.6636, "step": 171500 }, { "epoch": 1.77, "learning_rate": 1.2939620743113521e-05, "loss": 2.6242, "step": 172000 }, { "epoch": 1.77, "learning_rate": 1.2919087073027998e-05, "loss": 2.6444, "step": 172500 }, { "epoch": 1.78, "learning_rate": 1.2898594470282646e-05, "loss": 2.6382, "step": 173000 }, { "epoch": 1.78, "learning_rate": 1.2878060800197125e-05, "loss": 2.6251, "step": 173500 }, { "epoch": 1.79, "learning_rate": 1.2857527130111602e-05, "loss": 2.6521, "step": 174000 }, { "epoch": 1.79, "learning_rate": 1.2836993460026079e-05, "loss": 2.6421, "step": 174500 }, { "epoch": 1.8, "learning_rate": 1.2816459789940556e-05, "loss": 2.6558, "step": 175000 }, { "epoch": 1.8, "learning_rate": 1.2795926119855033e-05, "loss": 2.6497, "step": 175500 }, { "epoch": 1.81, "learning_rate": 1.277539244976951e-05, "loss": 2.6507, "step": 176000 }, { "epoch": 1.81, "learning_rate": 1.2754858779683988e-05, "loss": 2.6507, "step": 176500 }, { "epoch": 1.82, "learning_rate": 1.2734366176938635e-05, "loss": 2.6641, "step": 177000 }, { "epoch": 1.82, "learning_rate": 1.2713832506853114e-05, "loss": 2.6628, "step": 177500 }, { "epoch": 1.83, "learning_rate": 1.2693298836767591e-05, "loss": 2.6347, "step": 178000 }, { "epoch": 1.83, "learning_rate": 1.2672765166682068e-05, "loss": 2.6648, "step": 178500 }, { "epoch": 1.84, "learning_rate": 1.2652272563936716e-05, "loss": 2.6192, "step": 179000 }, { "epoch": 1.84, "learning_rate": 1.2631738893851193e-05, "loss": 2.6273, "step": 179500 }, { "epoch": 1.85, "learning_rate": 1.261120522376567e-05, "loss": 2.638, "step": 180000 }, { "epoch": 1.85, "learning_rate": 1.2590671553680147e-05, "loss": 2.6297, "step": 180500 }, { "epoch": 1.86, "learning_rate": 1.2570178950934798e-05, "loss": 2.6306, "step": 181000 }, { "epoch": 1.86, "learning_rate": 1.2549645280849273e-05, "loss": 2.6197, "step": 181500 }, { "epoch": 1.87, "learning_rate": 1.252911161076375e-05, "loss": 2.6327, "step": 182000 }, { "epoch": 1.87, "learning_rate": 1.2508577940678228e-05, "loss": 2.6493, "step": 182500 }, { "epoch": 1.88, "learning_rate": 1.2488085337932877e-05, "loss": 2.6695, "step": 183000 }, { "epoch": 1.88, "learning_rate": 1.2467551667847352e-05, "loss": 2.6641, "step": 183500 }, { "epoch": 1.89, "learning_rate": 1.244701799776183e-05, "loss": 2.6565, "step": 184000 }, { "epoch": 1.89, "learning_rate": 1.242648432767631e-05, "loss": 2.6743, "step": 184500 }, { "epoch": 1.9, "learning_rate": 1.2405991724930957e-05, "loss": 2.6213, "step": 185000 }, { "epoch": 1.9, "learning_rate": 1.2385499122185605e-05, "loss": 2.6396, "step": 185500 }, { "epoch": 1.91, "learning_rate": 1.2364965452100082e-05, "loss": 2.6562, "step": 186000 }, { "epoch": 1.91, "learning_rate": 1.2344431782014559e-05, "loss": 2.6467, "step": 186500 }, { "epoch": 1.92, "learning_rate": 1.2323898111929036e-05, "loss": 2.6323, "step": 187000 }, { "epoch": 1.93, "learning_rate": 1.2303364441843515e-05, "loss": 2.6235, "step": 187500 }, { "epoch": 1.93, "learning_rate": 1.2282830771757992e-05, "loss": 2.6383, "step": 188000 }, { "epoch": 1.94, "learning_rate": 1.226229710167247e-05, "loss": 2.6293, "step": 188500 }, { "epoch": 1.94, "learning_rate": 1.2241763431586947e-05, "loss": 2.6463, "step": 189000 }, { "epoch": 1.95, "learning_rate": 1.2221270828841594e-05, "loss": 2.639, "step": 189500 }, { "epoch": 1.95, "learning_rate": 1.2200737158756071e-05, "loss": 2.6436, "step": 190000 }, { "epoch": 1.96, "learning_rate": 1.2180203488670548e-05, "loss": 2.6464, "step": 190500 }, { "epoch": 1.96, "learning_rate": 1.2159669818585026e-05, "loss": 2.6449, "step": 191000 }, { "epoch": 1.97, "learning_rate": 1.2139177215839675e-05, "loss": 2.615, "step": 191500 }, { "epoch": 1.97, "learning_rate": 1.2118643545754152e-05, "loss": 2.6306, "step": 192000 }, { "epoch": 1.98, "learning_rate": 1.2098109875668629e-05, "loss": 2.6308, "step": 192500 }, { "epoch": 1.98, "learning_rate": 1.2077576205583106e-05, "loss": 2.6156, "step": 193000 }, { "epoch": 1.99, "learning_rate": 1.2057042535497583e-05, "loss": 2.6197, "step": 193500 }, { "epoch": 1.99, "learning_rate": 1.203650886541206e-05, "loss": 2.596, "step": 194000 }, { "epoch": 2.0, "learning_rate": 1.2015975195326538e-05, "loss": 2.6856, "step": 194500 }, { "epoch": 2.0, "eval_gen_len": 14.239, "eval_loss": 2.5211517810821533, "eval_rouge1": 32.6384, "eval_rouge2": 14.1175, "eval_rougeL": 30.0462, "eval_rougeLsum": 30.1515, "eval_runtime": 63.6279, "eval_samples_per_second": 15.716, "eval_steps_per_second": 1.965, "step": 194802 }, { "epoch": 2.0, "learning_rate": 1.1995441525241013e-05, "loss": 2.6536, "step": 195000 }, { "epoch": 2.01, "learning_rate": 1.1974948922495664e-05, "loss": 2.5967, "step": 195500 }, { "epoch": 2.01, "learning_rate": 1.1954415252410141e-05, "loss": 2.6196, "step": 196000 }, { "epoch": 2.02, "learning_rate": 1.1933881582324618e-05, "loss": 2.6051, "step": 196500 }, { "epoch": 2.02, "learning_rate": 1.1913347912239095e-05, "loss": 2.6344, "step": 197000 }, { "epoch": 2.03, "learning_rate": 1.1892855309493743e-05, "loss": 2.6343, "step": 197500 }, { "epoch": 2.03, "learning_rate": 1.187232163940822e-05, "loss": 2.5969, "step": 198000 }, { "epoch": 2.04, "learning_rate": 1.1851787969322699e-05, "loss": 2.6364, "step": 198500 }, { "epoch": 2.04, "learning_rate": 1.1831254299237176e-05, "loss": 2.5896, "step": 199000 }, { "epoch": 2.05, "learning_rate": 1.1810761696491823e-05, "loss": 2.6468, "step": 199500 }, { "epoch": 2.05, "learning_rate": 1.17902280264063e-05, "loss": 2.6083, "step": 200000 }, { "epoch": 2.06, "learning_rate": 1.1769694356320778e-05, "loss": 2.6013, "step": 200500 }, { "epoch": 2.06, "learning_rate": 1.1749160686235255e-05, "loss": 2.6303, "step": 201000 }, { "epoch": 2.07, "learning_rate": 1.1728668083489902e-05, "loss": 2.616, "step": 201500 }, { "epoch": 2.07, "learning_rate": 1.1708134413404381e-05, "loss": 2.6251, "step": 202000 }, { "epoch": 2.08, "learning_rate": 1.1687600743318858e-05, "loss": 2.6259, "step": 202500 }, { "epoch": 2.08, "learning_rate": 1.1667067073233336e-05, "loss": 2.6299, "step": 203000 }, { "epoch": 2.09, "learning_rate": 1.1646574470487983e-05, "loss": 2.6205, "step": 203500 }, { "epoch": 2.09, "learning_rate": 1.162604080040246e-05, "loss": 2.6091, "step": 204000 }, { "epoch": 2.1, "learning_rate": 1.1605507130316937e-05, "loss": 2.6182, "step": 204500 }, { "epoch": 2.1, "learning_rate": 1.1584973460231414e-05, "loss": 2.6144, "step": 205000 }, { "epoch": 2.11, "learning_rate": 1.1564480857486065e-05, "loss": 2.6163, "step": 205500 }, { "epoch": 2.11, "learning_rate": 1.1543947187400542e-05, "loss": 2.606, "step": 206000 }, { "epoch": 2.12, "learning_rate": 1.1523413517315018e-05, "loss": 2.6081, "step": 206500 }, { "epoch": 2.13, "learning_rate": 1.1502879847229495e-05, "loss": 2.6181, "step": 207000 }, { "epoch": 2.13, "learning_rate": 1.1482387244484144e-05, "loss": 2.5952, "step": 207500 }, { "epoch": 2.14, "learning_rate": 1.1461853574398621e-05, "loss": 2.6011, "step": 208000 }, { "epoch": 2.14, "learning_rate": 1.1441319904313097e-05, "loss": 2.6163, "step": 208500 }, { "epoch": 2.15, "learning_rate": 1.1420827301567747e-05, "loss": 2.6228, "step": 209000 }, { "epoch": 2.15, "learning_rate": 1.1400293631482225e-05, "loss": 2.638, "step": 209500 }, { "epoch": 2.16, "learning_rate": 1.1379759961396702e-05, "loss": 2.6202, "step": 210000 }, { "epoch": 2.16, "learning_rate": 1.1359226291311179e-05, "loss": 2.5915, "step": 210500 }, { "epoch": 2.17, "learning_rate": 1.1338692621225656e-05, "loss": 2.6379, "step": 211000 }, { "epoch": 2.17, "learning_rate": 1.1318158951140132e-05, "loss": 2.5958, "step": 211500 }, { "epoch": 2.18, "learning_rate": 1.1297625281054609e-05, "loss": 2.6, "step": 212000 }, { "epoch": 2.18, "learning_rate": 1.1277091610969086e-05, "loss": 2.5956, "step": 212500 }, { "epoch": 2.19, "learning_rate": 1.1256599008223737e-05, "loss": 2.6104, "step": 213000 }, { "epoch": 2.19, "learning_rate": 1.1236065338138214e-05, "loss": 2.6338, "step": 213500 }, { "epoch": 2.2, "learning_rate": 1.1215531668052691e-05, "loss": 2.6188, "step": 214000 }, { "epoch": 2.2, "learning_rate": 1.1194997997967168e-05, "loss": 2.6236, "step": 214500 }, { "epoch": 2.21, "learning_rate": 1.1174505395221816e-05, "loss": 2.5834, "step": 215000 }, { "epoch": 2.21, "learning_rate": 1.1153971725136293e-05, "loss": 2.6136, "step": 215500 }, { "epoch": 2.22, "learning_rate": 1.1133438055050772e-05, "loss": 2.6168, "step": 216000 }, { "epoch": 2.22, "learning_rate": 1.1112904384965249e-05, "loss": 2.6097, "step": 216500 }, { "epoch": 2.23, "learning_rate": 1.1092411782219896e-05, "loss": 2.5822, "step": 217000 }, { "epoch": 2.23, "learning_rate": 1.1071878112134373e-05, "loss": 2.5863, "step": 217500 }, { "epoch": 2.24, "learning_rate": 1.105134444204885e-05, "loss": 2.5925, "step": 218000 }, { "epoch": 2.24, "learning_rate": 1.1030810771963328e-05, "loss": 2.5834, "step": 218500 }, { "epoch": 2.25, "learning_rate": 1.1010318169217975e-05, "loss": 2.6185, "step": 219000 }, { "epoch": 2.25, "learning_rate": 1.0989784499132454e-05, "loss": 2.5897, "step": 219500 }, { "epoch": 2.26, "learning_rate": 1.0969250829046931e-05, "loss": 2.6144, "step": 220000 }, { "epoch": 2.26, "learning_rate": 1.0948717158961408e-05, "loss": 2.5723, "step": 220500 }, { "epoch": 2.27, "learning_rate": 1.0928224556216056e-05, "loss": 2.617, "step": 221000 }, { "epoch": 2.27, "learning_rate": 1.0907690886130533e-05, "loss": 2.588, "step": 221500 }, { "epoch": 2.28, "learning_rate": 1.088715721604501e-05, "loss": 2.6123, "step": 222000 }, { "epoch": 2.28, "learning_rate": 1.0866623545959487e-05, "loss": 2.6046, "step": 222500 }, { "epoch": 2.29, "learning_rate": 1.0846130943214138e-05, "loss": 2.6093, "step": 223000 }, { "epoch": 2.29, "learning_rate": 1.0825597273128613e-05, "loss": 2.6046, "step": 223500 }, { "epoch": 2.3, "learning_rate": 1.080506360304309e-05, "loss": 2.6244, "step": 224000 }, { "epoch": 2.3, "learning_rate": 1.0784529932957568e-05, "loss": 2.5966, "step": 224500 }, { "epoch": 2.31, "learning_rate": 1.0764037330212215e-05, "loss": 2.6191, "step": 225000 }, { "epoch": 2.32, "learning_rate": 1.0743503660126692e-05, "loss": 2.6419, "step": 225500 }, { "epoch": 2.32, "learning_rate": 1.072296999004117e-05, "loss": 2.6193, "step": 226000 }, { "epoch": 2.33, "learning_rate": 1.0702436319955648e-05, "loss": 2.6015, "step": 226500 }, { "epoch": 2.33, "learning_rate": 1.0681943717210297e-05, "loss": 2.6035, "step": 227000 }, { "epoch": 2.34, "learning_rate": 1.0661410047124775e-05, "loss": 2.5822, "step": 227500 }, { "epoch": 2.34, "learning_rate": 1.0640876377039252e-05, "loss": 2.6084, "step": 228000 }, { "epoch": 2.35, "learning_rate": 1.0620342706953727e-05, "loss": 2.5711, "step": 228500 }, { "epoch": 2.35, "learning_rate": 1.0599850104208376e-05, "loss": 2.5969, "step": 229000 }, { "epoch": 2.36, "learning_rate": 1.0579316434122855e-05, "loss": 2.5911, "step": 229500 }, { "epoch": 2.36, "learning_rate": 1.0558782764037332e-05, "loss": 2.608, "step": 230000 }, { "epoch": 2.37, "learning_rate": 1.053824909395181e-05, "loss": 2.63, "step": 230500 }, { "epoch": 2.37, "learning_rate": 1.0517756491206457e-05, "loss": 2.5886, "step": 231000 }, { "epoch": 2.38, "learning_rate": 1.0497222821120934e-05, "loss": 2.6097, "step": 231500 }, { "epoch": 2.38, "learning_rate": 1.0476689151035411e-05, "loss": 2.6133, "step": 232000 }, { "epoch": 2.39, "learning_rate": 1.0456155480949888e-05, "loss": 2.6208, "step": 232500 }, { "epoch": 2.39, "learning_rate": 1.0435662878204537e-05, "loss": 2.6012, "step": 233000 }, { "epoch": 2.4, "learning_rate": 1.0415129208119015e-05, "loss": 2.632, "step": 233500 }, { "epoch": 2.4, "learning_rate": 1.0394595538033492e-05, "loss": 2.5796, "step": 234000 }, { "epoch": 2.41, "learning_rate": 1.0374061867947969e-05, "loss": 2.6075, "step": 234500 }, { "epoch": 2.41, "learning_rate": 1.0353610332542786e-05, "loss": 2.6212, "step": 235000 }, { "epoch": 2.42, "learning_rate": 1.0333076662457264e-05, "loss": 2.5728, "step": 235500 }, { "epoch": 2.42, "learning_rate": 1.0312542992371744e-05, "loss": 2.5845, "step": 236000 }, { "epoch": 2.43, "learning_rate": 1.0292009322286221e-05, "loss": 2.5888, "step": 236500 }, { "epoch": 2.43, "learning_rate": 1.0271475652200697e-05, "loss": 2.5924, "step": 237000 }, { "epoch": 2.44, "learning_rate": 1.0250941982115174e-05, "loss": 2.6477, "step": 237500 }, { "epoch": 2.44, "learning_rate": 1.0230408312029651e-05, "loss": 2.613, "step": 238000 }, { "epoch": 2.45, "learning_rate": 1.0209874641944128e-05, "loss": 2.576, "step": 238500 }, { "epoch": 2.45, "learning_rate": 1.0189382039198776e-05, "loss": 2.617, "step": 239000 }, { "epoch": 2.46, "learning_rate": 1.0168848369113253e-05, "loss": 2.5819, "step": 239500 }, { "epoch": 2.46, "learning_rate": 1.0148314699027732e-05, "loss": 2.5933, "step": 240000 }, { "epoch": 2.47, "learning_rate": 1.0127781028942209e-05, "loss": 2.6146, "step": 240500 }, { "epoch": 2.47, "learning_rate": 1.0107247358856686e-05, "loss": 2.6096, "step": 241000 }, { "epoch": 2.48, "learning_rate": 1.0086754756111335e-05, "loss": 2.609, "step": 241500 }, { "epoch": 2.48, "learning_rate": 1.006622108602581e-05, "loss": 2.5756, "step": 242000 }, { "epoch": 2.49, "learning_rate": 1.0045687415940288e-05, "loss": 2.5681, "step": 242500 }, { "epoch": 2.49, "learning_rate": 1.0025153745854765e-05, "loss": 2.5871, "step": 243000 }, { "epoch": 2.5, "learning_rate": 1.0004661143109416e-05, "loss": 2.602, "step": 243500 }, { "epoch": 2.51, "learning_rate": 9.984127473023891e-06, "loss": 2.6109, "step": 244000 }, { "epoch": 2.51, "learning_rate": 9.96363487027854e-06, "loss": 2.5935, "step": 244500 }, { "epoch": 2.52, "learning_rate": 9.943101200193017e-06, "loss": 2.5964, "step": 245000 }, { "epoch": 2.52, "learning_rate": 9.922567530107495e-06, "loss": 2.5973, "step": 245500 }, { "epoch": 2.53, "learning_rate": 9.902033860021972e-06, "loss": 2.5965, "step": 246000 }, { "epoch": 2.53, "learning_rate": 9.881500189936449e-06, "loss": 2.6214, "step": 246500 }, { "epoch": 2.54, "learning_rate": 9.860966519850926e-06, "loss": 2.5965, "step": 247000 }, { "epoch": 2.54, "learning_rate": 9.840432849765403e-06, "loss": 2.5984, "step": 247500 }, { "epoch": 2.55, "learning_rate": 9.81989917967988e-06, "loss": 2.6208, "step": 248000 }, { "epoch": 2.55, "learning_rate": 9.79940657693453e-06, "loss": 2.584, "step": 248500 }, { "epoch": 2.56, "learning_rate": 9.778872906849007e-06, "loss": 2.614, "step": 249000 }, { "epoch": 2.56, "learning_rate": 9.758339236763484e-06, "loss": 2.5768, "step": 249500 }, { "epoch": 2.57, "learning_rate": 9.737805566677961e-06, "loss": 2.5877, "step": 250000 }, { "epoch": 2.57, "learning_rate": 9.717312963932609e-06, "loss": 2.6115, "step": 250500 }, { "epoch": 2.58, "learning_rate": 9.696779293847086e-06, "loss": 2.5976, "step": 251000 }, { "epoch": 2.58, "learning_rate": 9.676245623761565e-06, "loss": 2.5741, "step": 251500 }, { "epoch": 2.59, "learning_rate": 9.655711953676042e-06, "loss": 2.597, "step": 252000 }, { "epoch": 2.59, "learning_rate": 9.635219350930689e-06, "loss": 2.6006, "step": 252500 }, { "epoch": 2.6, "learning_rate": 9.614685680845168e-06, "loss": 2.5908, "step": 253000 }, { "epoch": 2.6, "learning_rate": 9.594152010759643e-06, "loss": 2.6164, "step": 253500 }, { "epoch": 2.61, "learning_rate": 9.57361834067412e-06, "loss": 2.6051, "step": 254000 }, { "epoch": 2.61, "learning_rate": 9.55312573792877e-06, "loss": 2.5831, "step": 254500 }, { "epoch": 2.62, "learning_rate": 9.532592067843247e-06, "loss": 2.5701, "step": 255000 }, { "epoch": 2.62, "learning_rate": 9.512058397757724e-06, "loss": 2.613, "step": 255500 }, { "epoch": 2.63, "learning_rate": 9.491524727672201e-06, "loss": 2.6147, "step": 256000 }, { "epoch": 2.63, "learning_rate": 9.47103212492685e-06, "loss": 2.606, "step": 256500 }, { "epoch": 2.64, "learning_rate": 9.450498454841327e-06, "loss": 2.5975, "step": 257000 }, { "epoch": 2.64, "learning_rate": 9.429964784755805e-06, "loss": 2.5789, "step": 257500 }, { "epoch": 2.65, "learning_rate": 9.409431114670282e-06, "loss": 2.5324, "step": 258000 }, { "epoch": 2.65, "learning_rate": 9.388938511924929e-06, "loss": 2.6155, "step": 258500 }, { "epoch": 2.66, "learning_rate": 9.368404841839406e-06, "loss": 2.5899, "step": 259000 }, { "epoch": 2.66, "learning_rate": 9.347871171753883e-06, "loss": 2.6095, "step": 259500 }, { "epoch": 2.67, "learning_rate": 9.32733750166836e-06, "loss": 2.6055, "step": 260000 }, { "epoch": 2.67, "learning_rate": 9.30684489892301e-06, "loss": 2.6007, "step": 260500 }, { "epoch": 2.68, "learning_rate": 9.286311228837487e-06, "loss": 2.6287, "step": 261000 }, { "epoch": 2.68, "learning_rate": 9.265777558751964e-06, "loss": 2.6304, "step": 261500 }, { "epoch": 2.69, "learning_rate": 9.245243888666441e-06, "loss": 2.5703, "step": 262000 }, { "epoch": 2.7, "learning_rate": 9.22475128592109e-06, "loss": 2.5771, "step": 262500 }, { "epoch": 2.7, "learning_rate": 9.204217615835567e-06, "loss": 2.5947, "step": 263000 }, { "epoch": 2.71, "learning_rate": 9.183683945750045e-06, "loss": 2.6205, "step": 263500 }, { "epoch": 2.71, "learning_rate": 9.163150275664522e-06, "loss": 2.5745, "step": 264000 }, { "epoch": 2.72, "learning_rate": 9.142616605578999e-06, "loss": 2.6126, "step": 264500 }, { "epoch": 2.72, "learning_rate": 9.122124002833648e-06, "loss": 2.5981, "step": 265000 }, { "epoch": 2.73, "learning_rate": 9.101590332748125e-06, "loss": 2.5772, "step": 265500 }, { "epoch": 2.73, "learning_rate": 9.081056662662602e-06, "loss": 2.6235, "step": 266000 }, { "epoch": 2.74, "learning_rate": 9.06052299257708e-06, "loss": 2.5577, "step": 266500 }, { "epoch": 2.74, "learning_rate": 9.040030389831727e-06, "loss": 2.6086, "step": 267000 }, { "epoch": 2.75, "learning_rate": 9.019496719746204e-06, "loss": 2.5849, "step": 267500 }, { "epoch": 2.75, "learning_rate": 8.998963049660681e-06, "loss": 2.5951, "step": 268000 }, { "epoch": 2.76, "learning_rate": 8.978429379575158e-06, "loss": 2.5432, "step": 268500 }, { "epoch": 2.76, "learning_rate": 8.957936776829808e-06, "loss": 2.596, "step": 269000 }, { "epoch": 2.77, "learning_rate": 8.937403106744285e-06, "loss": 2.6096, "step": 269500 }, { "epoch": 2.77, "learning_rate": 8.916869436658762e-06, "loss": 2.6013, "step": 270000 }, { "epoch": 2.78, "learning_rate": 8.896335766573239e-06, "loss": 2.5926, "step": 270500 }, { "epoch": 2.78, "learning_rate": 8.875843163827888e-06, "loss": 2.5741, "step": 271000 }, { "epoch": 2.79, "learning_rate": 8.855309493742365e-06, "loss": 2.5585, "step": 271500 }, { "epoch": 2.79, "learning_rate": 8.83477582365684e-06, "loss": 2.6125, "step": 272000 }, { "epoch": 2.8, "learning_rate": 8.81424215357132e-06, "loss": 2.5581, "step": 272500 }, { "epoch": 2.8, "learning_rate": 8.793749550825967e-06, "loss": 2.5723, "step": 273000 }, { "epoch": 2.81, "learning_rate": 8.773215880740444e-06, "loss": 2.5726, "step": 273500 }, { "epoch": 2.81, "learning_rate": 8.752682210654923e-06, "loss": 2.6084, "step": 274000 }, { "epoch": 2.82, "learning_rate": 8.7321485405694e-06, "loss": 2.5814, "step": 274500 }, { "epoch": 2.82, "learning_rate": 8.711655937824048e-06, "loss": 2.6171, "step": 275000 }, { "epoch": 2.83, "learning_rate": 8.691122267738525e-06, "loss": 2.6089, "step": 275500 }, { "epoch": 2.83, "learning_rate": 8.670588597653002e-06, "loss": 2.5841, "step": 276000 }, { "epoch": 2.84, "learning_rate": 8.650054927567479e-06, "loss": 2.5735, "step": 276500 }, { "epoch": 2.84, "learning_rate": 8.629562324822128e-06, "loss": 2.5961, "step": 277000 }, { "epoch": 2.85, "learning_rate": 8.609028654736605e-06, "loss": 2.5913, "step": 277500 }, { "epoch": 2.85, "learning_rate": 8.588494984651082e-06, "loss": 2.5947, "step": 278000 }, { "epoch": 2.86, "learning_rate": 8.56796131456556e-06, "loss": 2.6138, "step": 278500 }, { "epoch": 2.86, "learning_rate": 8.547468711820209e-06, "loss": 2.584, "step": 279000 }, { "epoch": 2.87, "learning_rate": 8.526935041734686e-06, "loss": 2.5909, "step": 279500 }, { "epoch": 2.87, "learning_rate": 8.506401371649163e-06, "loss": 2.5691, "step": 280000 }, { "epoch": 2.88, "learning_rate": 8.485867701563639e-06, "loss": 2.5852, "step": 280500 }, { "epoch": 2.88, "learning_rate": 8.46541616615846e-06, "loss": 2.5937, "step": 281000 }, { "epoch": 2.89, "learning_rate": 8.444882496072937e-06, "loss": 2.5956, "step": 281500 }, { "epoch": 2.9, "learning_rate": 8.424348825987414e-06, "loss": 2.627, "step": 282000 }, { "epoch": 2.9, "learning_rate": 8.403815155901891e-06, "loss": 2.6027, "step": 282500 }, { "epoch": 2.91, "learning_rate": 8.383281485816368e-06, "loss": 2.5815, "step": 283000 }, { "epoch": 2.91, "learning_rate": 8.362747815730845e-06, "loss": 2.5685, "step": 283500 }, { "epoch": 2.92, "learning_rate": 8.342214145645323e-06, "loss": 2.6211, "step": 284000 }, { "epoch": 2.92, "learning_rate": 8.321721542899972e-06, "loss": 2.6028, "step": 284500 }, { "epoch": 2.93, "learning_rate": 8.301187872814449e-06, "loss": 2.5959, "step": 285000 }, { "epoch": 2.93, "learning_rate": 8.280654202728924e-06, "loss": 2.5821, "step": 285500 }, { "epoch": 2.94, "learning_rate": 8.260120532643403e-06, "loss": 2.5694, "step": 286000 }, { "epoch": 2.94, "learning_rate": 8.23958686255788e-06, "loss": 2.5806, "step": 286500 }, { "epoch": 2.95, "learning_rate": 8.219053192472357e-06, "loss": 2.5853, "step": 287000 }, { "epoch": 2.95, "learning_rate": 8.198519522386835e-06, "loss": 2.561, "step": 287500 }, { "epoch": 2.96, "learning_rate": 8.177985852301312e-06, "loss": 2.5861, "step": 288000 }, { "epoch": 2.96, "learning_rate": 8.15749324955596e-06, "loss": 2.5743, "step": 288500 }, { "epoch": 2.97, "learning_rate": 8.136959579470436e-06, "loss": 2.5937, "step": 289000 }, { "epoch": 2.97, "learning_rate": 8.116425909384914e-06, "loss": 2.5558, "step": 289500 }, { "epoch": 2.98, "learning_rate": 8.095892239299392e-06, "loss": 2.644, "step": 290000 }, { "epoch": 2.98, "learning_rate": 8.07539963655404e-06, "loss": 2.576, "step": 290500 }, { "epoch": 2.99, "learning_rate": 8.054865966468517e-06, "loss": 2.614, "step": 291000 }, { "epoch": 2.99, "learning_rate": 8.034332296382996e-06, "loss": 2.5867, "step": 291500 }, { "epoch": 3.0, "learning_rate": 8.013798626297471e-06, "loss": 2.5847, "step": 292000 }, { "epoch": 3.0, "eval_gen_len": 14.18, "eval_loss": 2.4842422008514404, "eval_rouge1": 32.8849, "eval_rouge2": 14.1714, "eval_rougeL": 30.3938, "eval_rougeLsum": 30.5032, "eval_runtime": 64.3777, "eval_samples_per_second": 15.533, "eval_steps_per_second": 1.942, "step": 292203 }, { "epoch": 3.0, "learning_rate": 7.99330602355212e-06, "loss": 2.6098, "step": 292500 }, { "epoch": 3.01, "learning_rate": 7.972772353466598e-06, "loss": 2.5892, "step": 293000 }, { "epoch": 3.01, "learning_rate": 7.952238683381075e-06, "loss": 2.6083, "step": 293500 }, { "epoch": 3.02, "learning_rate": 7.931705013295552e-06, "loss": 2.5594, "step": 294000 }, { "epoch": 3.02, "learning_rate": 7.911212410550201e-06, "loss": 2.578, "step": 294500 }, { "epoch": 3.03, "learning_rate": 7.890678740464678e-06, "loss": 2.5775, "step": 295000 }, { "epoch": 3.03, "learning_rate": 7.870145070379155e-06, "loss": 2.5704, "step": 295500 }, { "epoch": 3.04, "learning_rate": 7.849611400293632e-06, "loss": 2.5723, "step": 296000 }, { "epoch": 3.04, "learning_rate": 7.829118797548281e-06, "loss": 2.5669, "step": 296500 }, { "epoch": 3.05, "learning_rate": 7.808626194802929e-06, "loss": 2.588, "step": 297000 }, { "epoch": 3.05, "learning_rate": 7.788092524717406e-06, "loss": 2.5423, "step": 297500 }, { "epoch": 3.06, "learning_rate": 7.767558854631883e-06, "loss": 2.6014, "step": 298000 }, { "epoch": 3.06, "learning_rate": 7.74702518454636e-06, "loss": 2.5682, "step": 298500 }, { "epoch": 3.07, "learning_rate": 7.726491514460838e-06, "loss": 2.5787, "step": 299000 }, { "epoch": 3.07, "learning_rate": 7.705957844375315e-06, "loss": 2.5539, "step": 299500 }, { "epoch": 3.08, "learning_rate": 7.685424174289794e-06, "loss": 2.5781, "step": 300000 }, { "epoch": 3.09, "learning_rate": 7.664890504204269e-06, "loss": 2.5441, "step": 300500 }, { "epoch": 3.09, "learning_rate": 7.644397901458918e-06, "loss": 2.5906, "step": 301000 }, { "epoch": 3.1, "learning_rate": 7.6238642313733945e-06, "loss": 2.5589, "step": 301500 }, { "epoch": 3.1, "learning_rate": 7.6033305612878725e-06, "loss": 2.5654, "step": 302000 }, { "epoch": 3.11, "learning_rate": 7.58279689120235e-06, "loss": 2.5716, "step": 302500 }, { "epoch": 3.11, "learning_rate": 7.562304288456998e-06, "loss": 2.5695, "step": 303000 }, { "epoch": 3.12, "learning_rate": 7.541770618371476e-06, "loss": 2.5949, "step": 303500 }, { "epoch": 3.12, "learning_rate": 7.521236948285952e-06, "loss": 2.5542, "step": 304000 }, { "epoch": 3.13, "learning_rate": 7.5007032782004294e-06, "loss": 2.5522, "step": 304500 }, { "epoch": 3.13, "learning_rate": 7.4802106754550785e-06, "loss": 2.5402, "step": 305000 }, { "epoch": 3.14, "learning_rate": 7.459677005369556e-06, "loss": 2.5999, "step": 305500 }, { "epoch": 3.14, "learning_rate": 7.439143335284033e-06, "loss": 2.5563, "step": 306000 }, { "epoch": 3.15, "learning_rate": 7.418609665198509e-06, "loss": 2.5637, "step": 306500 }, { "epoch": 3.15, "learning_rate": 7.398075995112986e-06, "loss": 2.5705, "step": 307000 }, { "epoch": 3.16, "learning_rate": 7.377583392367635e-06, "loss": 2.5639, "step": 307500 }, { "epoch": 3.16, "learning_rate": 7.3570497222821126e-06, "loss": 2.5505, "step": 308000 }, { "epoch": 3.17, "learning_rate": 7.33651605219659e-06, "loss": 2.5897, "step": 308500 }, { "epoch": 3.17, "learning_rate": 7.315982382111068e-06, "loss": 2.5572, "step": 309000 }, { "epoch": 3.18, "learning_rate": 7.295489779365715e-06, "loss": 2.5474, "step": 309500 }, { "epoch": 3.18, "learning_rate": 7.274956109280192e-06, "loss": 2.5405, "step": 310000 }, { "epoch": 3.19, "learning_rate": 7.25442243919467e-06, "loss": 2.5853, "step": 310500 }, { "epoch": 3.19, "learning_rate": 7.2338887691091475e-06, "loss": 2.5544, "step": 311000 }, { "epoch": 3.2, "learning_rate": 7.213396166363795e-06, "loss": 2.5801, "step": 311500 }, { "epoch": 3.2, "learning_rate": 7.192862496278274e-06, "loss": 2.6023, "step": 312000 }, { "epoch": 3.21, "learning_rate": 7.17232882619275e-06, "loss": 2.5458, "step": 312500 }, { "epoch": 3.21, "learning_rate": 7.151795156107227e-06, "loss": 2.5735, "step": 313000 }, { "epoch": 3.22, "learning_rate": 7.131302553361876e-06, "loss": 2.547, "step": 313500 }, { "epoch": 3.22, "learning_rate": 7.1107688832763534e-06, "loss": 2.59, "step": 314000 }, { "epoch": 3.23, "learning_rate": 7.090235213190831e-06, "loss": 2.5819, "step": 314500 }, { "epoch": 3.23, "learning_rate": 7.069701543105307e-06, "loss": 2.5353, "step": 315000 }, { "epoch": 3.24, "learning_rate": 7.049250007700127e-06, "loss": 2.5629, "step": 315500 }, { "epoch": 3.24, "learning_rate": 7.028716337614604e-06, "loss": 2.5804, "step": 316000 }, { "epoch": 3.25, "learning_rate": 7.008182667529081e-06, "loss": 2.5637, "step": 316500 }, { "epoch": 3.25, "learning_rate": 6.987648997443559e-06, "loss": 2.6034, "step": 317000 }, { "epoch": 3.26, "learning_rate": 6.967115327358036e-06, "loss": 2.5929, "step": 317500 }, { "epoch": 3.26, "learning_rate": 6.946581657272513e-06, "loss": 2.5733, "step": 318000 }, { "epoch": 3.27, "learning_rate": 6.92604798718699e-06, "loss": 2.5497, "step": 318500 }, { "epoch": 3.28, "learning_rate": 6.905514317101468e-06, "loss": 2.5831, "step": 319000 }, { "epoch": 3.28, "learning_rate": 6.884980647015945e-06, "loss": 2.5796, "step": 319500 }, { "epoch": 3.29, "learning_rate": 6.864488044270593e-06, "loss": 2.5689, "step": 320000 }, { "epoch": 3.29, "learning_rate": 6.84395437418507e-06, "loss": 2.5814, "step": 320500 }, { "epoch": 3.3, "learning_rate": 6.823420704099548e-06, "loss": 2.5869, "step": 321000 }, { "epoch": 3.3, "learning_rate": 6.802887034014025e-06, "loss": 2.5768, "step": 321500 }, { "epoch": 3.31, "learning_rate": 6.782394431268673e-06, "loss": 2.5517, "step": 322000 }, { "epoch": 3.31, "learning_rate": 6.761860761183151e-06, "loss": 2.5505, "step": 322500 }, { "epoch": 3.32, "learning_rate": 6.7413270910976284e-06, "loss": 2.5703, "step": 323000 }, { "epoch": 3.32, "learning_rate": 6.720793421012105e-06, "loss": 2.5399, "step": 323500 }, { "epoch": 3.33, "learning_rate": 6.700300818266754e-06, "loss": 2.5217, "step": 324000 }, { "epoch": 3.33, "learning_rate": 6.679767148181231e-06, "loss": 2.5858, "step": 324500 }, { "epoch": 3.34, "learning_rate": 6.659233478095708e-06, "loss": 2.5503, "step": 325000 }, { "epoch": 3.34, "learning_rate": 6.638699808010185e-06, "loss": 2.5654, "step": 325500 }, { "epoch": 3.35, "learning_rate": 6.6182482726050045e-06, "loss": 2.5682, "step": 326000 }, { "epoch": 3.35, "learning_rate": 6.597714602519482e-06, "loss": 2.5831, "step": 326500 }, { "epoch": 3.36, "learning_rate": 6.577180932433959e-06, "loss": 2.5548, "step": 327000 }, { "epoch": 3.36, "learning_rate": 6.556647262348437e-06, "loss": 2.5693, "step": 327500 }, { "epoch": 3.37, "learning_rate": 6.536113592262914e-06, "loss": 2.5505, "step": 328000 }, { "epoch": 3.37, "learning_rate": 6.5155799221773904e-06, "loss": 2.5547, "step": 328500 }, { "epoch": 3.38, "learning_rate": 6.495046252091868e-06, "loss": 2.5548, "step": 329000 }, { "epoch": 3.38, "learning_rate": 6.474512582006346e-06, "loss": 2.5613, "step": 329500 }, { "epoch": 3.39, "learning_rate": 6.454019979260994e-06, "loss": 2.5908, "step": 330000 }, { "epoch": 3.39, "learning_rate": 6.433486309175471e-06, "loss": 2.5816, "step": 330500 }, { "epoch": 3.4, "learning_rate": 6.412952639089949e-06, "loss": 2.581, "step": 331000 }, { "epoch": 3.4, "learning_rate": 6.392418969004425e-06, "loss": 2.5566, "step": 331500 }, { "epoch": 3.41, "learning_rate": 6.371885298918903e-06, "loss": 2.5769, "step": 332000 }, { "epoch": 3.41, "learning_rate": 6.35135162883338e-06, "loss": 2.5591, "step": 332500 }, { "epoch": 3.42, "learning_rate": 6.330817958747857e-06, "loss": 2.5916, "step": 333000 }, { "epoch": 3.42, "learning_rate": 6.310284288662335e-06, "loss": 2.5778, "step": 333500 }, { "epoch": 3.43, "learning_rate": 6.289791685916982e-06, "loss": 2.5726, "step": 334000 }, { "epoch": 3.43, "learning_rate": 6.2692580158314595e-06, "loss": 2.5671, "step": 334500 }, { "epoch": 3.44, "learning_rate": 6.2487243457459376e-06, "loss": 2.5546, "step": 335000 }, { "epoch": 3.44, "learning_rate": 6.228190675660415e-06, "loss": 2.5633, "step": 335500 }, { "epoch": 3.45, "learning_rate": 6.207698072915063e-06, "loss": 2.557, "step": 336000 }, { "epoch": 3.45, "learning_rate": 6.187164402829541e-06, "loss": 2.5664, "step": 336500 }, { "epoch": 3.46, "learning_rate": 6.166630732744018e-06, "loss": 2.5559, "step": 337000 }, { "epoch": 3.47, "learning_rate": 6.1460970626584945e-06, "loss": 2.5573, "step": 337500 }, { "epoch": 3.47, "learning_rate": 6.125604459913143e-06, "loss": 2.5808, "step": 338000 }, { "epoch": 3.48, "learning_rate": 6.105070789827621e-06, "loss": 2.5853, "step": 338500 }, { "epoch": 3.48, "learning_rate": 6.084537119742098e-06, "loss": 2.5694, "step": 339000 }, { "epoch": 3.49, "learning_rate": 6.064003449656575e-06, "loss": 2.5655, "step": 339500 }, { "epoch": 3.49, "learning_rate": 6.043510846911223e-06, "loss": 2.5472, "step": 340000 }, { "epoch": 3.5, "learning_rate": 6.0229771768257e-06, "loss": 2.5605, "step": 340500 }, { "epoch": 3.5, "learning_rate": 6.002443506740178e-06, "loss": 2.5799, "step": 341000 }, { "epoch": 3.51, "learning_rate": 5.981909836654655e-06, "loss": 2.5311, "step": 341500 }, { "epoch": 3.51, "learning_rate": 5.961376166569133e-06, "loss": 2.5579, "step": 342000 }, { "epoch": 3.52, "learning_rate": 5.94088356382378e-06, "loss": 2.5476, "step": 342500 }, { "epoch": 3.52, "learning_rate": 5.920390961078429e-06, "loss": 2.5484, "step": 343000 }, { "epoch": 3.53, "learning_rate": 5.899857290992906e-06, "loss": 2.5756, "step": 343500 }, { "epoch": 3.53, "learning_rate": 5.8793236209073835e-06, "loss": 2.5241, "step": 344000 }, { "epoch": 3.54, "learning_rate": 5.858789950821861e-06, "loss": 2.5444, "step": 344500 }, { "epoch": 3.54, "learning_rate": 5.838256280736337e-06, "loss": 2.5398, "step": 345000 }, { "epoch": 3.55, "learning_rate": 5.817722610650816e-06, "loss": 2.5498, "step": 345500 }, { "epoch": 3.55, "learning_rate": 5.797188940565292e-06, "loss": 2.566, "step": 346000 }, { "epoch": 3.56, "learning_rate": 5.7766552704797695e-06, "loss": 2.574, "step": 346500 }, { "epoch": 3.56, "learning_rate": 5.7561626677344185e-06, "loss": 2.5921, "step": 347000 }, { "epoch": 3.57, "learning_rate": 5.735628997648896e-06, "loss": 2.5325, "step": 347500 }, { "epoch": 3.57, "learning_rate": 5.715095327563373e-06, "loss": 2.5733, "step": 348000 }, { "epoch": 3.58, "learning_rate": 5.694561657477849e-06, "loss": 2.5372, "step": 348500 }, { "epoch": 3.58, "learning_rate": 5.674069054732498e-06, "loss": 2.5424, "step": 349000 }, { "epoch": 3.59, "learning_rate": 5.653535384646975e-06, "loss": 2.5745, "step": 349500 }, { "epoch": 3.59, "learning_rate": 5.633001714561453e-06, "loss": 2.5407, "step": 350000 }, { "epoch": 3.6, "learning_rate": 5.61246804447593e-06, "loss": 2.5545, "step": 350500 }, { "epoch": 3.6, "learning_rate": 5.591975441730578e-06, "loss": 2.5532, "step": 351000 }, { "epoch": 3.61, "learning_rate": 5.571441771645055e-06, "loss": 2.5653, "step": 351500 }, { "epoch": 3.61, "learning_rate": 5.550908101559532e-06, "loss": 2.5588, "step": 352000 }, { "epoch": 3.62, "learning_rate": 5.53037443147401e-06, "loss": 2.5499, "step": 352500 }, { "epoch": 3.62, "learning_rate": 5.5098407613884875e-06, "loss": 2.5769, "step": 353000 }, { "epoch": 3.63, "learning_rate": 5.489348158643135e-06, "loss": 2.5594, "step": 353500 }, { "epoch": 3.63, "learning_rate": 5.468814488557614e-06, "loss": 2.5792, "step": 354000 }, { "epoch": 3.64, "learning_rate": 5.44828081847209e-06, "loss": 2.5614, "step": 354500 }, { "epoch": 3.64, "learning_rate": 5.427747148386567e-06, "loss": 2.5843, "step": 355000 }, { "epoch": 3.65, "learning_rate": 5.4072545456412154e-06, "loss": 2.5558, "step": 355500 }, { "epoch": 3.65, "learning_rate": 5.3867208755556935e-06, "loss": 2.5836, "step": 356000 }, { "epoch": 3.66, "learning_rate": 5.366187205470171e-06, "loss": 2.5505, "step": 356500 }, { "epoch": 3.67, "learning_rate": 5.345653535384647e-06, "loss": 2.5709, "step": 357000 }, { "epoch": 3.67, "learning_rate": 5.325160932639296e-06, "loss": 2.5546, "step": 357500 }, { "epoch": 3.68, "learning_rate": 5.304627262553773e-06, "loss": 2.5987, "step": 358000 }, { "epoch": 3.68, "learning_rate": 5.28409359246825e-06, "loss": 2.5515, "step": 358500 }, { "epoch": 3.69, "learning_rate": 5.263559922382728e-06, "loss": 2.5559, "step": 359000 }, { "epoch": 3.69, "learning_rate": 5.243067319637376e-06, "loss": 2.5584, "step": 359500 }, { "epoch": 3.7, "learning_rate": 5.222533649551853e-06, "loss": 2.5676, "step": 360000 }, { "epoch": 3.7, "learning_rate": 5.20199997946633e-06, "loss": 2.5724, "step": 360500 }, { "epoch": 3.71, "learning_rate": 5.181507376720979e-06, "loss": 2.5489, "step": 361000 }, { "epoch": 3.71, "learning_rate": 5.160973706635456e-06, "loss": 2.5659, "step": 361500 }, { "epoch": 3.72, "learning_rate": 5.140440036549933e-06, "loss": 2.5627, "step": 362000 }, { "epoch": 3.72, "learning_rate": 5.11990636646441e-06, "loss": 2.5339, "step": 362500 }, { "epoch": 3.73, "learning_rate": 5.099372696378888e-06, "loss": 2.5638, "step": 363000 }, { "epoch": 3.73, "learning_rate": 5.078839026293365e-06, "loss": 2.5694, "step": 363500 }, { "epoch": 3.74, "learning_rate": 5.058305356207842e-06, "loss": 2.5592, "step": 364000 }, { "epoch": 3.74, "learning_rate": 5.0377716861223195e-06, "loss": 2.5519, "step": 364500 }, { "epoch": 3.75, "learning_rate": 5.017279083376968e-06, "loss": 2.5775, "step": 365000 }, { "epoch": 3.75, "learning_rate": 4.996745413291445e-06, "loss": 2.5731, "step": 365500 }, { "epoch": 3.76, "learning_rate": 4.976211743205922e-06, "loss": 2.5421, "step": 366000 }, { "epoch": 3.76, "learning_rate": 4.955678073120399e-06, "loss": 2.5494, "step": 366500 }, { "epoch": 3.77, "learning_rate": 4.935185470375048e-06, "loss": 2.5624, "step": 367000 }, { "epoch": 3.77, "learning_rate": 4.9146518002895245e-06, "loss": 2.5722, "step": 367500 }, { "epoch": 3.78, "learning_rate": 4.894118130204003e-06, "loss": 2.56, "step": 368000 }, { "epoch": 3.78, "learning_rate": 4.87358446011848e-06, "loss": 2.5629, "step": 368500 }, { "epoch": 3.79, "learning_rate": 4.853091857373128e-06, "loss": 2.5359, "step": 369000 }, { "epoch": 3.79, "learning_rate": 4.832558187287605e-06, "loss": 2.5635, "step": 369500 }, { "epoch": 3.8, "learning_rate": 4.812024517202082e-06, "loss": 2.5447, "step": 370000 }, { "epoch": 3.8, "learning_rate": 4.79149084711656e-06, "loss": 2.5798, "step": 370500 }, { "epoch": 3.81, "learning_rate": 4.7709982443712085e-06, "loss": 2.5582, "step": 371000 }, { "epoch": 3.81, "learning_rate": 4.750464574285686e-06, "loss": 2.5558, "step": 371500 }, { "epoch": 3.82, "learning_rate": 4.729930904200163e-06, "loss": 2.5458, "step": 372000 }, { "epoch": 3.82, "learning_rate": 4.70939723411464e-06, "loss": 2.5491, "step": 372500 }, { "epoch": 3.83, "learning_rate": 4.688904631369288e-06, "loss": 2.5444, "step": 373000 }, { "epoch": 3.83, "learning_rate": 4.6683709612837654e-06, "loss": 2.5637, "step": 373500 }, { "epoch": 3.84, "learning_rate": 4.647837291198243e-06, "loss": 2.5462, "step": 374000 }, { "epoch": 3.84, "learning_rate": 4.62730362111272e-06, "loss": 2.5801, "step": 374500 }, { "epoch": 3.85, "learning_rate": 4.606811018367368e-06, "loss": 2.5811, "step": 375000 }, { "epoch": 3.86, "learning_rate": 4.586277348281846e-06, "loss": 2.5696, "step": 375500 }, { "epoch": 3.86, "learning_rate": 4.565743678196322e-06, "loss": 2.5353, "step": 376000 }, { "epoch": 3.87, "learning_rate": 4.5452100081108e-06, "loss": 2.5476, "step": 376500 }, { "epoch": 3.87, "learning_rate": 4.5246763380252776e-06, "loss": 2.5637, "step": 377000 }, { "epoch": 3.88, "learning_rate": 4.504183735279926e-06, "loss": 2.5978, "step": 377500 }, { "epoch": 3.88, "learning_rate": 4.483650065194403e-06, "loss": 2.5526, "step": 378000 }, { "epoch": 3.89, "learning_rate": 4.46311639510888e-06, "loss": 2.5606, "step": 378500 }, { "epoch": 3.89, "learning_rate": 4.442582725023357e-06, "loss": 2.5618, "step": 379000 }, { "epoch": 3.9, "learning_rate": 4.4220901222780055e-06, "loss": 2.5707, "step": 379500 }, { "epoch": 3.9, "learning_rate": 4.401556452192483e-06, "loss": 2.5567, "step": 380000 }, { "epoch": 3.91, "learning_rate": 4.38102278210696e-06, "loss": 2.5751, "step": 380500 }, { "epoch": 3.91, "learning_rate": 4.360489112021438e-06, "loss": 2.5543, "step": 381000 }, { "epoch": 3.92, "learning_rate": 4.339996509276086e-06, "loss": 2.5537, "step": 381500 }, { "epoch": 3.92, "learning_rate": 4.319462839190563e-06, "loss": 2.5465, "step": 382000 }, { "epoch": 3.93, "learning_rate": 4.29892916910504e-06, "loss": 2.5502, "step": 382500 }, { "epoch": 3.93, "learning_rate": 4.278395499019518e-06, "loss": 2.5531, "step": 383000 }, { "epoch": 3.94, "learning_rate": 4.257902896274166e-06, "loss": 2.5419, "step": 383500 }, { "epoch": 3.94, "learning_rate": 4.237369226188644e-06, "loss": 2.5731, "step": 384000 }, { "epoch": 3.95, "learning_rate": 4.21683555610312e-06, "loss": 2.5539, "step": 384500 }, { "epoch": 3.95, "learning_rate": 4.196301886017597e-06, "loss": 2.5286, "step": 385000 }, { "epoch": 3.96, "learning_rate": 4.1758092832722455e-06, "loss": 2.5725, "step": 385500 }, { "epoch": 3.96, "learning_rate": 4.1552756131867235e-06, "loss": 2.553, "step": 386000 }, { "epoch": 3.97, "learning_rate": 4.134741943101201e-06, "loss": 2.5295, "step": 386500 }, { "epoch": 3.97, "learning_rate": 4.114208273015678e-06, "loss": 2.5762, "step": 387000 }, { "epoch": 3.98, "learning_rate": 4.093715670270326e-06, "loss": 2.5528, "step": 387500 }, { "epoch": 3.98, "learning_rate": 4.073182000184803e-06, "loss": 2.5377, "step": 388000 }, { "epoch": 3.99, "learning_rate": 4.0526483300992805e-06, "loss": 2.5382, "step": 388500 }, { "epoch": 3.99, "learning_rate": 4.032114660013758e-06, "loss": 2.5778, "step": 389000 }, { "epoch": 4.0, "learning_rate": 4.011622057268406e-06, "loss": 2.5578, "step": 389500 }, { "epoch": 4.0, "eval_gen_len": 14.226, "eval_loss": 2.4658281803131104, "eval_rouge1": 33.1247, "eval_rouge2": 14.4487, "eval_rougeL": 30.3793, "eval_rougeLsum": 30.5158, "eval_runtime": 65.1503, "eval_samples_per_second": 15.349, "eval_steps_per_second": 1.919, "step": 389604 }, { "epoch": 4.0, "learning_rate": 3.991088387182884e-06, "loss": 2.5441, "step": 390000 }, { "epoch": 4.01, "learning_rate": 3.970554717097361e-06, "loss": 2.5623, "step": 390500 }, { "epoch": 4.01, "learning_rate": 3.950021047011837e-06, "loss": 2.5435, "step": 391000 }, { "epoch": 4.02, "learning_rate": 3.929528444266486e-06, "loss": 2.5428, "step": 391500 }, { "epoch": 4.02, "learning_rate": 3.908994774180964e-06, "loss": 2.51, "step": 392000 }, { "epoch": 4.03, "learning_rate": 3.888461104095441e-06, "loss": 2.5402, "step": 392500 }, { "epoch": 4.03, "learning_rate": 3.867927434009918e-06, "loss": 2.5662, "step": 393000 }, { "epoch": 4.04, "learning_rate": 3.847434831264566e-06, "loss": 2.5443, "step": 393500 }, { "epoch": 4.05, "learning_rate": 3.826901161179043e-06, "loss": 2.5292, "step": 394000 }, { "epoch": 4.05, "learning_rate": 3.806367491093521e-06, "loss": 2.54, "step": 394500 }, { "epoch": 4.06, "learning_rate": 3.785833821007998e-06, "loss": 2.5755, "step": 395000 }, { "epoch": 4.06, "learning_rate": 3.7653412182626463e-06, "loss": 2.5804, "step": 395500 }, { "epoch": 4.07, "learning_rate": 3.744807548177124e-06, "loss": 2.5354, "step": 396000 }, { "epoch": 4.07, "learning_rate": 3.724273878091601e-06, "loss": 2.5347, "step": 396500 }, { "epoch": 4.08, "learning_rate": 3.703740208006078e-06, "loss": 2.5257, "step": 397000 }, { "epoch": 4.08, "learning_rate": 3.6832476052607264e-06, "loss": 2.5587, "step": 397500 }, { "epoch": 4.09, "learning_rate": 3.662713935175204e-06, "loss": 2.5545, "step": 398000 }, { "epoch": 4.09, "learning_rate": 3.642180265089681e-06, "loss": 2.5325, "step": 398500 }, { "epoch": 4.1, "learning_rate": 3.6216465950041584e-06, "loss": 2.5691, "step": 399000 }, { "epoch": 4.1, "learning_rate": 3.601153992258807e-06, "loss": 2.556, "step": 399500 }, { "epoch": 4.11, "learning_rate": 3.5806203221732838e-06, "loss": 2.5294, "step": 400000 }, { "epoch": 4.11, "learning_rate": 3.5600866520877614e-06, "loss": 2.5724, "step": 400500 }, { "epoch": 4.12, "learning_rate": 3.5395529820022386e-06, "loss": 2.5741, "step": 401000 }, { "epoch": 4.12, "learning_rate": 3.5190603792568867e-06, "loss": 2.5525, "step": 401500 }, { "epoch": 4.13, "learning_rate": 3.4985267091713643e-06, "loss": 2.5642, "step": 402000 }, { "epoch": 4.13, "learning_rate": 3.477993039085841e-06, "loss": 2.5298, "step": 402500 }, { "epoch": 4.14, "learning_rate": 3.4574593690003183e-06, "loss": 2.5205, "step": 403000 }, { "epoch": 4.14, "learning_rate": 3.4369667662549665e-06, "loss": 2.5503, "step": 403500 }, { "epoch": 4.15, "learning_rate": 3.416433096169444e-06, "loss": 2.5291, "step": 404000 }, { "epoch": 4.15, "learning_rate": 3.3958994260839213e-06, "loss": 2.5487, "step": 404500 }, { "epoch": 4.16, "learning_rate": 3.375365755998399e-06, "loss": 2.536, "step": 405000 }, { "epoch": 4.16, "learning_rate": 3.354873153253047e-06, "loss": 2.5518, "step": 405500 }, { "epoch": 4.17, "learning_rate": 3.3343394831675242e-06, "loss": 2.5262, "step": 406000 }, { "epoch": 4.17, "learning_rate": 3.313805813082002e-06, "loss": 2.544, "step": 406500 }, { "epoch": 4.18, "learning_rate": 3.2932721429964786e-06, "loss": 2.5418, "step": 407000 }, { "epoch": 4.18, "learning_rate": 3.272779540251127e-06, "loss": 2.5587, "step": 407500 }, { "epoch": 4.19, "learning_rate": 3.2522458701656044e-06, "loss": 2.5786, "step": 408000 }, { "epoch": 4.19, "learning_rate": 3.2317122000800816e-06, "loss": 2.5272, "step": 408500 }, { "epoch": 4.2, "learning_rate": 3.2111785299945588e-06, "loss": 2.556, "step": 409000 }, { "epoch": 4.2, "learning_rate": 3.190685927249207e-06, "loss": 2.5468, "step": 409500 }, { "epoch": 4.21, "learning_rate": 3.1701522571636845e-06, "loss": 2.5786, "step": 410000 }, { "epoch": 4.21, "learning_rate": 3.1496185870781613e-06, "loss": 2.5543, "step": 410500 }, { "epoch": 4.22, "learning_rate": 3.129084916992639e-06, "loss": 2.5549, "step": 411000 }, { "epoch": 4.22, "learning_rate": 3.108551246907116e-06, "loss": 2.5092, "step": 411500 }, { "epoch": 4.23, "learning_rate": 3.0880175768215937e-06, "loss": 2.5441, "step": 412000 }, { "epoch": 4.24, "learning_rate": 3.0674839067360705e-06, "loss": 2.5641, "step": 412500 }, { "epoch": 4.24, "learning_rate": 3.046950236650548e-06, "loss": 2.5564, "step": 413000 }, { "epoch": 4.25, "learning_rate": 3.0264576339051967e-06, "loss": 2.5076, "step": 413500 }, { "epoch": 4.25, "learning_rate": 3.0059239638196735e-06, "loss": 2.5449, "step": 414000 }, { "epoch": 4.26, "learning_rate": 2.9853902937341506e-06, "loss": 2.5608, "step": 414500 }, { "epoch": 4.26, "learning_rate": 2.9648566236486283e-06, "loss": 2.5437, "step": 415000 }, { "epoch": 4.27, "learning_rate": 2.9443640209032764e-06, "loss": 2.547, "step": 415500 }, { "epoch": 4.27, "learning_rate": 2.9238303508177536e-06, "loss": 2.5329, "step": 416000 }, { "epoch": 4.28, "learning_rate": 2.9032966807322312e-06, "loss": 2.5452, "step": 416500 }, { "epoch": 4.28, "learning_rate": 2.8828040779868794e-06, "loss": 2.5384, "step": 417000 }, { "epoch": 4.29, "learning_rate": 2.862270407901356e-06, "loss": 2.5517, "step": 417500 }, { "epoch": 4.29, "learning_rate": 2.8417367378158338e-06, "loss": 2.546, "step": 418000 }, { "epoch": 4.3, "learning_rate": 2.821203067730311e-06, "loss": 2.5606, "step": 418500 }, { "epoch": 4.3, "learning_rate": 2.8006693976447886e-06, "loss": 2.5544, "step": 419000 }, { "epoch": 4.31, "learning_rate": 2.7801357275592653e-06, "loss": 2.5416, "step": 419500 }, { "epoch": 4.31, "learning_rate": 2.7596020574737425e-06, "loss": 2.5364, "step": 420000 }, { "epoch": 4.32, "learning_rate": 2.73906838738822e-06, "loss": 2.541, "step": 420500 }, { "epoch": 4.32, "learning_rate": 2.7185757846428683e-06, "loss": 2.5646, "step": 421000 }, { "epoch": 4.33, "learning_rate": 2.6980421145573455e-06, "loss": 2.5587, "step": 421500 }, { "epoch": 4.33, "learning_rate": 2.677508444471823e-06, "loss": 2.5614, "step": 422000 }, { "epoch": 4.34, "learning_rate": 2.6569747743863e-06, "loss": 2.5268, "step": 422500 }, { "epoch": 4.34, "learning_rate": 2.6364821716409484e-06, "loss": 2.5468, "step": 423000 }, { "epoch": 4.35, "learning_rate": 2.615948501555426e-06, "loss": 2.5255, "step": 423500 }, { "epoch": 4.35, "learning_rate": 2.595414831469903e-06, "loss": 2.5398, "step": 424000 }, { "epoch": 4.36, "learning_rate": 2.5748811613843804e-06, "loss": 2.5561, "step": 424500 }, { "epoch": 4.36, "learning_rate": 2.5543885586390286e-06, "loss": 2.5396, "step": 425000 }, { "epoch": 4.37, "learning_rate": 2.533854888553506e-06, "loss": 2.5366, "step": 425500 }, { "epoch": 4.37, "learning_rate": 2.513362285808154e-06, "loss": 2.5748, "step": 426000 }, { "epoch": 4.38, "learning_rate": 2.4928286157226316e-06, "loss": 2.5557, "step": 426500 }, { "epoch": 4.38, "learning_rate": 2.4722949456371088e-06, "loss": 2.5178, "step": 427000 }, { "epoch": 4.39, "learning_rate": 2.451761275551586e-06, "loss": 2.557, "step": 427500 }, { "epoch": 4.39, "learning_rate": 2.431227605466063e-06, "loss": 2.5357, "step": 428000 }, { "epoch": 4.4, "learning_rate": 2.4106939353805403e-06, "loss": 2.5228, "step": 428500 }, { "epoch": 4.4, "learning_rate": 2.3901602652950175e-06, "loss": 2.5376, "step": 429000 }, { "epoch": 4.41, "learning_rate": 2.3696265952094947e-06, "loss": 2.5418, "step": 429500 }, { "epoch": 4.41, "learning_rate": 2.3491339924641433e-06, "loss": 2.5534, "step": 430000 }, { "epoch": 4.42, "learning_rate": 2.3286003223786205e-06, "loss": 2.5183, "step": 430500 }, { "epoch": 4.43, "learning_rate": 2.3080666522930977e-06, "loss": 2.5207, "step": 431000 }, { "epoch": 4.43, "learning_rate": 2.2875329822075753e-06, "loss": 2.5431, "step": 431500 }, { "epoch": 4.44, "learning_rate": 2.2670403794622234e-06, "loss": 2.5484, "step": 432000 }, { "epoch": 4.44, "learning_rate": 2.2465067093767006e-06, "loss": 2.5322, "step": 432500 }, { "epoch": 4.45, "learning_rate": 2.225973039291178e-06, "loss": 2.5424, "step": 433000 }, { "epoch": 4.45, "learning_rate": 2.2054393692056554e-06, "loss": 2.5249, "step": 433500 }, { "epoch": 4.46, "learning_rate": 2.184905699120132e-06, "loss": 2.532, "step": 434000 }, { "epoch": 4.46, "learning_rate": 2.164413096374781e-06, "loss": 2.5406, "step": 434500 }, { "epoch": 4.47, "learning_rate": 2.143879426289258e-06, "loss": 2.5651, "step": 435000 }, { "epoch": 4.47, "learning_rate": 2.123345756203735e-06, "loss": 2.5343, "step": 435500 }, { "epoch": 4.48, "learning_rate": 2.1028120861182124e-06, "loss": 2.5554, "step": 436000 }, { "epoch": 4.48, "learning_rate": 2.082319483372861e-06, "loss": 2.5519, "step": 436500 }, { "epoch": 4.49, "learning_rate": 2.061785813287338e-06, "loss": 2.5579, "step": 437000 }, { "epoch": 4.49, "learning_rate": 2.0412521432018153e-06, "loss": 2.5515, "step": 437500 }, { "epoch": 4.5, "learning_rate": 2.0207184731162925e-06, "loss": 2.5662, "step": 438000 }, { "epoch": 4.5, "learning_rate": 2.000225870370941e-06, "loss": 2.5343, "step": 438500 }, { "epoch": 4.51, "learning_rate": 1.9796922002854183e-06, "loss": 2.5598, "step": 439000 }, { "epoch": 4.51, "learning_rate": 1.9591585301998955e-06, "loss": 2.5431, "step": 439500 }, { "epoch": 4.52, "learning_rate": 1.9386248601143727e-06, "loss": 2.5741, "step": 440000 }, { "epoch": 4.52, "learning_rate": 1.9181322573690213e-06, "loss": 2.5596, "step": 440500 }, { "epoch": 4.53, "learning_rate": 1.8975985872834982e-06, "loss": 2.5372, "step": 441000 }, { "epoch": 4.53, "learning_rate": 1.8770649171979754e-06, "loss": 2.5461, "step": 441500 }, { "epoch": 4.54, "learning_rate": 1.8565312471124528e-06, "loss": 2.564, "step": 442000 }, { "epoch": 4.54, "learning_rate": 1.836038644367101e-06, "loss": 2.5384, "step": 442500 }, { "epoch": 4.55, "learning_rate": 1.8155049742815784e-06, "loss": 2.5388, "step": 443000 }, { "epoch": 4.55, "learning_rate": 1.7949713041960556e-06, "loss": 2.5461, "step": 443500 }, { "epoch": 4.56, "learning_rate": 1.774437634110533e-06, "loss": 2.5616, "step": 444000 }, { "epoch": 4.56, "learning_rate": 1.7539450313651811e-06, "loss": 2.5392, "step": 444500 }, { "epoch": 4.57, "learning_rate": 1.7334113612796585e-06, "loss": 2.5661, "step": 445000 }, { "epoch": 4.57, "learning_rate": 1.7128776911941357e-06, "loss": 2.5373, "step": 445500 }, { "epoch": 4.58, "learning_rate": 1.6923440211086131e-06, "loss": 2.5193, "step": 446000 }, { "epoch": 4.58, "learning_rate": 1.6718514183632615e-06, "loss": 2.517, "step": 446500 }, { "epoch": 4.59, "learning_rate": 1.6513177482777385e-06, "loss": 2.5266, "step": 447000 }, { "epoch": 4.59, "learning_rate": 1.6307840781922157e-06, "loss": 2.5429, "step": 447500 }, { "epoch": 4.6, "learning_rate": 1.610250408106693e-06, "loss": 2.5305, "step": 448000 }, { "epoch": 4.6, "learning_rate": 1.5897167380211703e-06, "loss": 2.5723, "step": 448500 }, { "epoch": 4.61, "learning_rate": 1.5692241352758186e-06, "loss": 2.5435, "step": 449000 }, { "epoch": 4.61, "learning_rate": 1.5486904651902958e-06, "loss": 2.5542, "step": 449500 }, { "epoch": 4.62, "learning_rate": 1.5281567951047732e-06, "loss": 2.5342, "step": 450000 }, { "epoch": 4.63, "learning_rate": 1.5076231250192504e-06, "loss": 2.5589, "step": 450500 }, { "epoch": 4.63, "learning_rate": 1.4871305222738988e-06, "loss": 2.5397, "step": 451000 }, { "epoch": 4.64, "learning_rate": 1.4665968521883762e-06, "loss": 2.5361, "step": 451500 }, { "epoch": 4.64, "learning_rate": 1.4460631821028534e-06, "loss": 2.5638, "step": 452000 }, { "epoch": 4.65, "learning_rate": 1.4255295120173304e-06, "loss": 2.532, "step": 452500 }, { "epoch": 4.65, "learning_rate": 1.4050369092719787e-06, "loss": 2.5609, "step": 453000 }, { "epoch": 4.66, "learning_rate": 1.384503239186456e-06, "loss": 2.5257, "step": 453500 }, { "epoch": 4.66, "learning_rate": 1.3639695691009333e-06, "loss": 2.5254, "step": 454000 }, { "epoch": 4.67, "learning_rate": 1.3434358990154105e-06, "loss": 2.5407, "step": 454500 }, { "epoch": 4.67, "learning_rate": 1.3229843636102299e-06, "loss": 2.5279, "step": 455000 }, { "epoch": 4.68, "learning_rate": 1.3024506935247073e-06, "loss": 2.5276, "step": 455500 }, { "epoch": 4.68, "learning_rate": 1.2819170234391844e-06, "loss": 2.5704, "step": 456000 }, { "epoch": 4.69, "learning_rate": 1.2613833533536618e-06, "loss": 2.5349, "step": 456500 }, { "epoch": 4.69, "learning_rate": 1.240849683268139e-06, "loss": 2.5449, "step": 457000 }, { "epoch": 4.7, "learning_rate": 1.2203160131826162e-06, "loss": 2.5408, "step": 457500 }, { "epoch": 4.7, "learning_rate": 1.1997823430970936e-06, "loss": 2.5165, "step": 458000 }, { "epoch": 4.71, "learning_rate": 1.1792486730115708e-06, "loss": 2.563, "step": 458500 }, { "epoch": 4.71, "learning_rate": 1.1587560702662192e-06, "loss": 2.5678, "step": 459000 }, { "epoch": 4.72, "learning_rate": 1.1382224001806964e-06, "loss": 2.5404, "step": 459500 }, { "epoch": 4.72, "learning_rate": 1.1176887300951736e-06, "loss": 2.5691, "step": 460000 }, { "epoch": 4.73, "learning_rate": 1.097155060009651e-06, "loss": 2.516, "step": 460500 }, { "epoch": 4.73, "learning_rate": 1.0766624572642991e-06, "loss": 2.5326, "step": 461000 }, { "epoch": 4.74, "learning_rate": 1.0561287871787765e-06, "loss": 2.5426, "step": 461500 }, { "epoch": 4.74, "learning_rate": 1.0355951170932537e-06, "loss": 2.51, "step": 462000 }, { "epoch": 4.75, "learning_rate": 1.0150614470077311e-06, "loss": 2.5328, "step": 462500 }, { "epoch": 4.75, "learning_rate": 9.945688442623793e-07, "loss": 2.5352, "step": 463000 }, { "epoch": 4.76, "learning_rate": 9.740351741768565e-07, "loss": 2.4885, "step": 463500 }, { "epoch": 4.76, "learning_rate": 9.535015040913338e-07, "loss": 2.5488, "step": 464000 }, { "epoch": 4.77, "learning_rate": 9.329678340058111e-07, "loss": 2.5455, "step": 464500 }, { "epoch": 4.77, "learning_rate": 9.124752312604594e-07, "loss": 2.5433, "step": 465000 }, { "epoch": 4.78, "learning_rate": 8.919415611749367e-07, "loss": 2.5511, "step": 465500 }, { "epoch": 4.78, "learning_rate": 8.714078910894138e-07, "loss": 2.5286, "step": 466000 }, { "epoch": 4.79, "learning_rate": 8.508742210038911e-07, "loss": 2.5607, "step": 466500 }, { "epoch": 4.79, "learning_rate": 8.303816182585395e-07, "loss": 2.5621, "step": 467000 }, { "epoch": 4.8, "learning_rate": 8.098479481730168e-07, "loss": 2.534, "step": 467500 }, { "epoch": 4.8, "learning_rate": 7.893142780874941e-07, "loss": 2.586, "step": 468000 }, { "epoch": 4.81, "learning_rate": 7.687806080019714e-07, "loss": 2.541, "step": 468500 }, { "epoch": 4.82, "learning_rate": 7.482469379164485e-07, "loss": 2.5375, "step": 469000 }, { "epoch": 4.82, "learning_rate": 7.277132678309258e-07, "loss": 2.5287, "step": 469500 }, { "epoch": 4.83, "learning_rate": 7.071795977454031e-07, "loss": 2.5211, "step": 470000 }, { "epoch": 4.83, "learning_rate": 6.866869950000514e-07, "loss": 2.5658, "step": 470500 }, { "epoch": 4.84, "learning_rate": 6.661533249145287e-07, "loss": 2.5567, "step": 471000 }, { "epoch": 4.84, "learning_rate": 6.456196548290058e-07, "loss": 2.535, "step": 471500 }, { "epoch": 4.85, "learning_rate": 6.250859847434831e-07, "loss": 2.5405, "step": 472000 }, { "epoch": 4.85, "learning_rate": 6.045933819981315e-07, "loss": 2.5291, "step": 472500 }, { "epoch": 4.86, "learning_rate": 5.840597119126088e-07, "loss": 2.5359, "step": 473000 }, { "epoch": 4.86, "learning_rate": 5.635260418270861e-07, "loss": 2.5548, "step": 473500 }, { "epoch": 4.87, "learning_rate": 5.429923717415633e-07, "loss": 2.5573, "step": 474000 }, { "epoch": 4.87, "learning_rate": 5.224587016560406e-07, "loss": 2.5402, "step": 474500 }, { "epoch": 4.88, "learning_rate": 5.019250315705177e-07, "loss": 2.526, "step": 475000 }, { "epoch": 4.88, "learning_rate": 4.81391361484995e-07, "loss": 2.5451, "step": 475500 }, { "epoch": 4.89, "learning_rate": 4.608576913994723e-07, "loss": 2.5159, "step": 476000 }, { "epoch": 4.89, "learning_rate": 4.403650886541206e-07, "loss": 2.5486, "step": 476500 }, { "epoch": 4.9, "learning_rate": 4.198314185685979e-07, "loss": 2.5529, "step": 477000 }, { "epoch": 4.9, "learning_rate": 3.9929774848307514e-07, "loss": 2.5502, "step": 477500 }, { "epoch": 4.91, "learning_rate": 3.7876407839755243e-07, "loss": 2.5564, "step": 478000 }, { "epoch": 4.91, "learning_rate": 3.582714756522007e-07, "loss": 2.5274, "step": 478500 }, { "epoch": 4.92, "learning_rate": 3.37737805566678e-07, "loss": 2.5686, "step": 479000 }, { "epoch": 4.92, "learning_rate": 3.1720413548115524e-07, "loss": 2.552, "step": 479500 }, { "epoch": 4.93, "learning_rate": 2.9671153273580356e-07, "loss": 2.546, "step": 480000 }, { "epoch": 4.93, "learning_rate": 2.7617786265028085e-07, "loss": 2.5279, "step": 480500 }, { "epoch": 4.94, "learning_rate": 2.556441925647581e-07, "loss": 2.5537, "step": 481000 }, { "epoch": 4.94, "learning_rate": 2.3511052247923537e-07, "loss": 2.5668, "step": 481500 }, { "epoch": 4.95, "learning_rate": 2.145768523937126e-07, "loss": 2.5783, "step": 482000 }, { "epoch": 4.95, "learning_rate": 1.9404318230818988e-07, "loss": 2.5368, "step": 482500 }, { "epoch": 4.96, "learning_rate": 1.7350951222266712e-07, "loss": 2.5358, "step": 483000 }, { "epoch": 4.96, "learning_rate": 1.529758421371444e-07, "loss": 2.566, "step": 483500 }, { "epoch": 4.97, "learning_rate": 1.3248323939179268e-07, "loss": 2.5619, "step": 484000 }, { "epoch": 4.97, "learning_rate": 1.1199063664644102e-07, "loss": 2.5566, "step": 484500 }, { "epoch": 4.98, "learning_rate": 9.145696656091827e-08, "loss": 2.5638, "step": 485000 }, { "epoch": 4.98, "learning_rate": 7.092329647539553e-08, "loss": 2.5277, "step": 485500 }, { "epoch": 4.99, "learning_rate": 5.038962638987279e-08, "loss": 2.5254, "step": 486000 }, { "epoch": 4.99, "learning_rate": 2.985595630435006e-08, "loss": 2.5533, "step": 486500 }, { "epoch": 5.0, "learning_rate": 9.363353558998369e-09, "loss": 2.5433, "step": 487000 }, { "epoch": 5.0, "eval_gen_len": 14.198, "eval_loss": 2.459300994873047, "eval_rouge1": 33.0901, "eval_rouge2": 14.5749, "eval_rougeL": 30.4267, "eval_rougeLsum": 30.5438, "eval_runtime": 65.3559, "eval_samples_per_second": 15.301, "eval_steps_per_second": 1.913, "step": 487005 } ], "max_steps": 487005, "num_train_epochs": 5, "total_flos": 1.0373616546436547e+18, "trial_name": null, "trial_params": null }