{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999985976918007, "global_step": 106965, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.9985976721357454e-05, "loss": 3.2989, "step": 50 }, { "epoch": 0.0, "learning_rate": 2.9971953442714907e-05, "loss": 2.6398, "step": 100 }, { "epoch": 0.0, "learning_rate": 2.995793016407236e-05, "loss": 2.3782, "step": 150 }, { "epoch": 0.01, "learning_rate": 2.9943906885429814e-05, "loss": 2.2384, "step": 200 }, { "epoch": 0.01, "learning_rate": 2.9929883606787268e-05, "loss": 2.1279, "step": 250 }, { "epoch": 0.01, "learning_rate": 2.991586032814472e-05, "loss": 1.9667, "step": 300 }, { "epoch": 0.01, "learning_rate": 2.9901837049502174e-05, "loss": 1.9143, "step": 350 }, { "epoch": 0.01, "learning_rate": 2.9887813770859628e-05, "loss": 1.8277, "step": 400 }, { "epoch": 0.01, "learning_rate": 2.987379049221708e-05, "loss": 1.6988, "step": 450 }, { "epoch": 0.01, "learning_rate": 2.9859767213574534e-05, "loss": 1.637, "step": 500 }, { "epoch": 0.02, "learning_rate": 2.9845743934931988e-05, "loss": 1.5378, "step": 550 }, { "epoch": 0.02, "learning_rate": 2.983172065628944e-05, "loss": 1.4824, "step": 600 }, { "epoch": 0.02, "learning_rate": 2.9817697377646894e-05, "loss": 1.4092, "step": 650 }, { "epoch": 0.02, "learning_rate": 2.9803674099004348e-05, "loss": 1.294, "step": 700 }, { "epoch": 0.02, "learning_rate": 2.97896508203618e-05, "loss": 1.2394, "step": 750 }, { "epoch": 0.02, "learning_rate": 2.9775627541719254e-05, "loss": 1.1931, "step": 800 }, { "epoch": 0.02, "learning_rate": 2.9761604263076708e-05, "loss": 1.1169, "step": 850 }, { "epoch": 0.03, "learning_rate": 2.974758098443416e-05, "loss": 1.0487, "step": 900 }, { "epoch": 0.03, "learning_rate": 2.9733557705791615e-05, "loss": 0.9585, "step": 950 }, { "epoch": 0.03, "learning_rate": 2.9719534427149068e-05, "loss": 0.9143, "step": 1000 }, { "epoch": 0.03, "eval_bleu": 79.3692, "eval_gen_len": 61.919, "eval_loss": 0.5720869302749634, "eval_rouge1": 87.2325, "eval_rouge2": 78.8924, "eval_rougeL": 85.959, "eval_rougeLsum": 86.2188, "eval_runtime": 3277.2108, "eval_samples_per_second": 0.915, "eval_steps_per_second": 0.229, "step": 1000 }, { "epoch": 0.03, "learning_rate": 2.970551114850652e-05, "loss": 0.8543, "step": 1050 }, { "epoch": 0.03, "learning_rate": 2.9691487869863975e-05, "loss": 0.813, "step": 1100 }, { "epoch": 0.03, "learning_rate": 2.9677464591221428e-05, "loss": 0.7565, "step": 1150 }, { "epoch": 0.03, "learning_rate": 2.966344131257888e-05, "loss": 0.6924, "step": 1200 }, { "epoch": 0.04, "learning_rate": 2.9649418033936335e-05, "loss": 0.6407, "step": 1250 }, { "epoch": 0.04, "learning_rate": 2.9635394755293788e-05, "loss": 0.6138, "step": 1300 }, { "epoch": 0.04, "learning_rate": 2.962137147665124e-05, "loss": 0.5659, "step": 1350 }, { "epoch": 0.04, "learning_rate": 2.9607348198008695e-05, "loss": 0.5688, "step": 1400 }, { "epoch": 0.04, "learning_rate": 2.9593324919366148e-05, "loss": 0.516, "step": 1450 }, { "epoch": 0.04, "learning_rate": 2.95793016407236e-05, "loss": 0.4945, "step": 1500 }, { "epoch": 0.04, "learning_rate": 2.9565278362081055e-05, "loss": 0.4414, "step": 1550 }, { "epoch": 0.04, "learning_rate": 2.9551255083438508e-05, "loss": 0.4591, "step": 1600 }, { "epoch": 0.05, "learning_rate": 2.9537231804795965e-05, "loss": 0.4378, "step": 1650 }, { "epoch": 0.05, "learning_rate": 2.9523208526153415e-05, "loss": 0.4287, "step": 1700 }, { "epoch": 0.05, "learning_rate": 2.9509185247510868e-05, "loss": 0.4494, "step": 1750 }, { "epoch": 0.05, "learning_rate": 2.949516196886832e-05, "loss": 0.4241, "step": 1800 }, { "epoch": 0.05, "learning_rate": 2.9481138690225775e-05, "loss": 0.4092, "step": 1850 }, { "epoch": 0.05, "learning_rate": 2.946711541158323e-05, "loss": 0.4149, "step": 1900 }, { "epoch": 0.05, "learning_rate": 2.9453092132940682e-05, "loss": 0.3603, "step": 1950 }, { "epoch": 0.06, "learning_rate": 2.9439068854298135e-05, "loss": 0.3801, "step": 2000 }, { "epoch": 0.06, "eval_bleu": 93.6507, "eval_gen_len": 64.828, "eval_loss": 0.4033011794090271, "eval_rouge1": 91.7405, "eval_rouge2": 86.6147, "eval_rougeL": 91.4332, "eval_rougeLsum": 91.4208, "eval_runtime": 3199.5288, "eval_samples_per_second": 0.938, "eval_steps_per_second": 0.234, "step": 2000 }, { "epoch": 0.06, "learning_rate": 2.9425045575655592e-05, "loss": 0.3768, "step": 2050 }, { "epoch": 0.06, "learning_rate": 2.9411022297013042e-05, "loss": 0.3616, "step": 2100 }, { "epoch": 0.06, "learning_rate": 2.9396999018370495e-05, "loss": 0.3987, "step": 2150 }, { "epoch": 0.06, "learning_rate": 2.938297573972795e-05, "loss": 0.3601, "step": 2200 }, { "epoch": 0.06, "learning_rate": 2.9368952461085402e-05, "loss": 0.3416, "step": 2250 }, { "epoch": 0.06, "learning_rate": 2.9354929182442855e-05, "loss": 0.3517, "step": 2300 }, { "epoch": 0.07, "learning_rate": 2.934090590380031e-05, "loss": 0.3315, "step": 2350 }, { "epoch": 0.07, "learning_rate": 2.9326882625157762e-05, "loss": 0.3501, "step": 2400 }, { "epoch": 0.07, "learning_rate": 2.931285934651522e-05, "loss": 0.3275, "step": 2450 }, { "epoch": 0.07, "learning_rate": 2.929883606787267e-05, "loss": 0.3098, "step": 2500 }, { "epoch": 0.07, "learning_rate": 2.9284812789230122e-05, "loss": 0.324, "step": 2550 }, { "epoch": 0.07, "learning_rate": 2.9270789510587575e-05, "loss": 0.3129, "step": 2600 }, { "epoch": 0.07, "learning_rate": 2.925676623194503e-05, "loss": 0.3394, "step": 2650 }, { "epoch": 0.08, "learning_rate": 2.9242742953302482e-05, "loss": 0.2813, "step": 2700 }, { "epoch": 0.08, "learning_rate": 2.9228719674659935e-05, "loss": 0.3168, "step": 2750 }, { "epoch": 0.08, "learning_rate": 2.921469639601739e-05, "loss": 0.3266, "step": 2800 }, { "epoch": 0.08, "learning_rate": 2.9200673117374846e-05, "loss": 0.2833, "step": 2850 }, { "epoch": 0.08, "learning_rate": 2.9186649838732296e-05, "loss": 0.3025, "step": 2900 }, { "epoch": 0.08, "learning_rate": 2.917262656008975e-05, "loss": 0.3095, "step": 2950 }, { "epoch": 0.08, "learning_rate": 2.9158603281447202e-05, "loss": 0.2844, "step": 3000 }, { "epoch": 0.08, "eval_bleu": 94.365, "eval_gen_len": 64.905, "eval_loss": 0.3663557171821594, "eval_rouge1": 91.9207, "eval_rouge2": 87.0399, "eval_rougeL": 91.6573, "eval_rougeLsum": 91.6307, "eval_runtime": 3129.011, "eval_samples_per_second": 0.959, "eval_steps_per_second": 0.24, "step": 3000 }, { "epoch": 0.09, "learning_rate": 2.9144580002804656e-05, "loss": 0.3001, "step": 3050 }, { "epoch": 0.09, "learning_rate": 2.9130556724162112e-05, "loss": 0.2903, "step": 3100 }, { "epoch": 0.09, "learning_rate": 2.9116533445519562e-05, "loss": 0.2751, "step": 3150 }, { "epoch": 0.09, "learning_rate": 2.9102510166877016e-05, "loss": 0.2725, "step": 3200 }, { "epoch": 0.09, "learning_rate": 2.9088486888234472e-05, "loss": 0.2897, "step": 3250 }, { "epoch": 0.09, "learning_rate": 2.9074463609591922e-05, "loss": 0.2878, "step": 3300 }, { "epoch": 0.09, "learning_rate": 2.9060440330949376e-05, "loss": 0.2966, "step": 3350 }, { "epoch": 0.1, "learning_rate": 2.904641705230683e-05, "loss": 0.3079, "step": 3400 }, { "epoch": 0.1, "learning_rate": 2.9032393773664282e-05, "loss": 0.2598, "step": 3450 }, { "epoch": 0.1, "learning_rate": 2.901837049502174e-05, "loss": 0.2982, "step": 3500 }, { "epoch": 0.1, "learning_rate": 2.900434721637919e-05, "loss": 0.2634, "step": 3550 }, { "epoch": 0.1, "learning_rate": 2.8990323937736643e-05, "loss": 0.2896, "step": 3600 }, { "epoch": 0.1, "learning_rate": 2.89763006590941e-05, "loss": 0.2822, "step": 3650 }, { "epoch": 0.1, "learning_rate": 2.896227738045155e-05, "loss": 0.278, "step": 3700 }, { "epoch": 0.11, "learning_rate": 2.8948254101809003e-05, "loss": 0.2818, "step": 3750 }, { "epoch": 0.11, "learning_rate": 2.8934230823166456e-05, "loss": 0.2788, "step": 3800 }, { "epoch": 0.11, "learning_rate": 2.892020754452391e-05, "loss": 0.2989, "step": 3850 }, { "epoch": 0.11, "learning_rate": 2.8906184265881366e-05, "loss": 0.3021, "step": 3900 }, { "epoch": 0.11, "learning_rate": 2.8892160987238816e-05, "loss": 0.281, "step": 3950 }, { "epoch": 0.11, "learning_rate": 2.887813770859627e-05, "loss": 0.2612, "step": 4000 }, { "epoch": 0.11, "eval_bleu": 94.6072, "eval_gen_len": 64.9013, "eval_loss": 0.3458440899848938, "eval_rouge1": 92.2699, "eval_rouge2": 87.5817, "eval_rougeL": 92.003, "eval_rougeLsum": 91.9759, "eval_runtime": 3105.6443, "eval_samples_per_second": 0.966, "eval_steps_per_second": 0.241, "step": 4000 }, { "epoch": 0.11, "learning_rate": 2.8864114429953726e-05, "loss": 0.2749, "step": 4050 }, { "epoch": 0.11, "learning_rate": 2.8850091151311176e-05, "loss": 0.2948, "step": 4100 }, { "epoch": 0.12, "learning_rate": 2.883606787266863e-05, "loss": 0.2429, "step": 4150 }, { "epoch": 0.12, "learning_rate": 2.8822044594026083e-05, "loss": 0.2793, "step": 4200 }, { "epoch": 0.12, "learning_rate": 2.8808021315383536e-05, "loss": 0.2899, "step": 4250 }, { "epoch": 0.12, "learning_rate": 2.8793998036740993e-05, "loss": 0.2384, "step": 4300 }, { "epoch": 0.12, "learning_rate": 2.8779974758098443e-05, "loss": 0.2783, "step": 4350 }, { "epoch": 0.12, "learning_rate": 2.8765951479455896e-05, "loss": 0.2709, "step": 4400 }, { "epoch": 0.12, "learning_rate": 2.8751928200813353e-05, "loss": 0.2453, "step": 4450 }, { "epoch": 0.13, "learning_rate": 2.8737904922170803e-05, "loss": 0.2626, "step": 4500 }, { "epoch": 0.13, "learning_rate": 2.872388164352826e-05, "loss": 0.2476, "step": 4550 }, { "epoch": 0.13, "learning_rate": 2.870985836488571e-05, "loss": 0.2207, "step": 4600 }, { "epoch": 0.13, "learning_rate": 2.8695835086243163e-05, "loss": 0.2553, "step": 4650 }, { "epoch": 0.13, "learning_rate": 2.868181180760062e-05, "loss": 0.2624, "step": 4700 }, { "epoch": 0.13, "learning_rate": 2.866778852895807e-05, "loss": 0.2505, "step": 4750 }, { "epoch": 0.13, "learning_rate": 2.8653765250315523e-05, "loss": 0.2356, "step": 4800 }, { "epoch": 0.14, "learning_rate": 2.863974197167298e-05, "loss": 0.2529, "step": 4850 }, { "epoch": 0.14, "learning_rate": 2.862571869303043e-05, "loss": 0.2331, "step": 4900 }, { "epoch": 0.14, "learning_rate": 2.8611695414387887e-05, "loss": 0.2487, "step": 4950 }, { "epoch": 0.14, "learning_rate": 2.8597672135745337e-05, "loss": 0.2609, "step": 5000 }, { "epoch": 0.14, "eval_bleu": 94.6745, "eval_gen_len": 64.809, "eval_loss": 0.33044180274009705, "eval_rouge1": 92.3236, "eval_rouge2": 87.7132, "eval_rougeL": 92.0699, "eval_rougeLsum": 92.0344, "eval_runtime": 3150.3076, "eval_samples_per_second": 0.952, "eval_steps_per_second": 0.238, "step": 5000 }, { "epoch": 0.14, "learning_rate": 2.858364885710279e-05, "loss": 0.2444, "step": 5050 }, { "epoch": 0.14, "learning_rate": 2.8569625578460247e-05, "loss": 0.2717, "step": 5100 }, { "epoch": 0.14, "learning_rate": 2.8555602299817697e-05, "loss": 0.2372, "step": 5150 }, { "epoch": 0.15, "learning_rate": 2.854157902117515e-05, "loss": 0.2448, "step": 5200 }, { "epoch": 0.15, "learning_rate": 2.8527555742532607e-05, "loss": 0.2752, "step": 5250 }, { "epoch": 0.15, "learning_rate": 2.8513532463890057e-05, "loss": 0.2762, "step": 5300 }, { "epoch": 0.15, "learning_rate": 2.8499509185247513e-05, "loss": 0.2355, "step": 5350 }, { "epoch": 0.15, "learning_rate": 2.8485485906604963e-05, "loss": 0.2565, "step": 5400 }, { "epoch": 0.15, "learning_rate": 2.8471462627962417e-05, "loss": 0.2795, "step": 5450 }, { "epoch": 0.15, "learning_rate": 2.8457439349319873e-05, "loss": 0.2305, "step": 5500 }, { "epoch": 0.16, "learning_rate": 2.8443416070677323e-05, "loss": 0.2515, "step": 5550 }, { "epoch": 0.16, "learning_rate": 2.8429392792034777e-05, "loss": 0.2473, "step": 5600 }, { "epoch": 0.16, "learning_rate": 2.8415369513392234e-05, "loss": 0.2531, "step": 5650 }, { "epoch": 0.16, "learning_rate": 2.8401346234749684e-05, "loss": 0.2243, "step": 5700 }, { "epoch": 0.16, "learning_rate": 2.838732295610714e-05, "loss": 0.2272, "step": 5750 }, { "epoch": 0.16, "learning_rate": 2.837329967746459e-05, "loss": 0.2601, "step": 5800 }, { "epoch": 0.16, "learning_rate": 2.8359276398822044e-05, "loss": 0.2539, "step": 5850 }, { "epoch": 0.17, "learning_rate": 2.83452531201795e-05, "loss": 0.2445, "step": 5900 }, { "epoch": 0.17, "learning_rate": 2.833122984153695e-05, "loss": 0.213, "step": 5950 }, { "epoch": 0.17, "learning_rate": 2.8317206562894407e-05, "loss": 0.2173, "step": 6000 }, { "epoch": 0.17, "eval_bleu": 94.886, "eval_gen_len": 64.737, "eval_loss": 0.32196304202079773, "eval_rouge1": 92.4893, "eval_rouge2": 87.9435, "eval_rougeL": 92.23, "eval_rougeLsum": 92.1972, "eval_runtime": 3194.0894, "eval_samples_per_second": 0.939, "eval_steps_per_second": 0.235, "step": 6000 }, { "epoch": 0.17, "learning_rate": 2.830318328425186e-05, "loss": 0.2123, "step": 6050 }, { "epoch": 0.17, "learning_rate": 2.828916000560931e-05, "loss": 0.2351, "step": 6100 }, { "epoch": 0.17, "learning_rate": 2.8275136726966767e-05, "loss": 0.2546, "step": 6150 }, { "epoch": 0.17, "learning_rate": 2.8261113448324217e-05, "loss": 0.2388, "step": 6200 }, { "epoch": 0.18, "learning_rate": 2.824709016968167e-05, "loss": 0.2309, "step": 6250 }, { "epoch": 0.18, "learning_rate": 2.8233066891039127e-05, "loss": 0.2379, "step": 6300 }, { "epoch": 0.18, "learning_rate": 2.8219043612396577e-05, "loss": 0.2269, "step": 6350 }, { "epoch": 0.18, "learning_rate": 2.8205020333754034e-05, "loss": 0.2362, "step": 6400 }, { "epoch": 0.18, "learning_rate": 2.8190997055111487e-05, "loss": 0.2325, "step": 6450 }, { "epoch": 0.18, "learning_rate": 2.8176973776468937e-05, "loss": 0.2519, "step": 6500 }, { "epoch": 0.18, "learning_rate": 2.8162950497826394e-05, "loss": 0.215, "step": 6550 }, { "epoch": 0.19, "learning_rate": 2.8148927219183844e-05, "loss": 0.2301, "step": 6600 }, { "epoch": 0.19, "learning_rate": 2.8134903940541297e-05, "loss": 0.247, "step": 6650 }, { "epoch": 0.19, "learning_rate": 2.8120880661898754e-05, "loss": 0.2261, "step": 6700 }, { "epoch": 0.19, "learning_rate": 2.8106857383256204e-05, "loss": 0.2349, "step": 6750 }, { "epoch": 0.19, "learning_rate": 2.809283410461366e-05, "loss": 0.2253, "step": 6800 }, { "epoch": 0.19, "learning_rate": 2.8078810825971114e-05, "loss": 0.2186, "step": 6850 }, { "epoch": 0.19, "learning_rate": 2.8064787547328564e-05, "loss": 0.238, "step": 6900 }, { "epoch": 0.19, "learning_rate": 2.805076426868602e-05, "loss": 0.2308, "step": 6950 }, { "epoch": 0.2, "learning_rate": 2.803674099004347e-05, "loss": 0.2252, "step": 7000 }, { "epoch": 0.2, "eval_bleu": 94.9526, "eval_gen_len": 64.719, "eval_loss": 0.3162732422351837, "eval_rouge1": 92.5629, "eval_rouge2": 88.0962, "eval_rougeL": 92.3033, "eval_rougeLsum": 92.2707, "eval_runtime": 3111.1893, "eval_samples_per_second": 0.964, "eval_steps_per_second": 0.241, "step": 7000 }, { "epoch": 0.2, "learning_rate": 2.8022717711400924e-05, "loss": 0.2267, "step": 7050 }, { "epoch": 0.2, "learning_rate": 2.800869443275838e-05, "loss": 0.2197, "step": 7100 }, { "epoch": 0.2, "learning_rate": 2.799467115411583e-05, "loss": 0.195, "step": 7150 }, { "epoch": 0.2, "learning_rate": 2.7980647875473288e-05, "loss": 0.1975, "step": 7200 }, { "epoch": 0.2, "learning_rate": 2.796662459683074e-05, "loss": 0.2261, "step": 7250 }, { "epoch": 0.2, "learning_rate": 2.795260131818819e-05, "loss": 0.2276, "step": 7300 }, { "epoch": 0.21, "learning_rate": 2.7938578039545648e-05, "loss": 0.2236, "step": 7350 }, { "epoch": 0.21, "learning_rate": 2.7924554760903098e-05, "loss": 0.2362, "step": 7400 }, { "epoch": 0.21, "learning_rate": 2.7910531482260554e-05, "loss": 0.1933, "step": 7450 }, { "epoch": 0.21, "learning_rate": 2.7896508203618008e-05, "loss": 0.256, "step": 7500 }, { "epoch": 0.21, "learning_rate": 2.7882484924975458e-05, "loss": 0.22, "step": 7550 }, { "epoch": 0.21, "learning_rate": 2.7868461646332915e-05, "loss": 0.2285, "step": 7600 }, { "epoch": 0.21, "learning_rate": 2.7854438367690368e-05, "loss": 0.2297, "step": 7650 }, { "epoch": 0.22, "learning_rate": 2.7840415089047818e-05, "loss": 0.2198, "step": 7700 }, { "epoch": 0.22, "learning_rate": 2.7826391810405275e-05, "loss": 0.219, "step": 7750 }, { "epoch": 0.22, "learning_rate": 2.7812368531762725e-05, "loss": 0.264, "step": 7800 }, { "epoch": 0.22, "learning_rate": 2.779834525312018e-05, "loss": 0.2218, "step": 7850 }, { "epoch": 0.22, "learning_rate": 2.7784321974477635e-05, "loss": 0.2343, "step": 7900 }, { "epoch": 0.22, "learning_rate": 2.7770298695835085e-05, "loss": 0.223, "step": 7950 }, { "epoch": 0.22, "learning_rate": 2.775627541719254e-05, "loss": 0.2111, "step": 8000 }, { "epoch": 0.22, "eval_bleu": 95.0762, "eval_gen_len": 64.6663, "eval_loss": 0.31111225485801697, "eval_rouge1": 92.7158, "eval_rouge2": 88.3325, "eval_rougeL": 92.4682, "eval_rougeLsum": 92.4352, "eval_runtime": 3140.711, "eval_samples_per_second": 0.955, "eval_steps_per_second": 0.239, "step": 8000 }, { "epoch": 0.23, "learning_rate": 2.7742252138549995e-05, "loss": 0.2171, "step": 8050 }, { "epoch": 0.23, "learning_rate": 2.7728228859907445e-05, "loss": 0.2263, "step": 8100 }, { "epoch": 0.23, "learning_rate": 2.77142055812649e-05, "loss": 0.2359, "step": 8150 }, { "epoch": 0.23, "learning_rate": 2.770018230262235e-05, "loss": 0.2058, "step": 8200 }, { "epoch": 0.23, "learning_rate": 2.7686159023979808e-05, "loss": 0.2044, "step": 8250 }, { "epoch": 0.23, "learning_rate": 2.767213574533726e-05, "loss": 0.2322, "step": 8300 }, { "epoch": 0.23, "learning_rate": 2.765811246669471e-05, "loss": 0.2289, "step": 8350 }, { "epoch": 0.24, "learning_rate": 2.7644089188052168e-05, "loss": 0.2228, "step": 8400 }, { "epoch": 0.24, "learning_rate": 2.763006590940962e-05, "loss": 0.2211, "step": 8450 }, { "epoch": 0.24, "learning_rate": 2.761604263076707e-05, "loss": 0.2297, "step": 8500 }, { "epoch": 0.24, "learning_rate": 2.760201935212453e-05, "loss": 0.2087, "step": 8550 }, { "epoch": 0.24, "learning_rate": 2.7587996073481978e-05, "loss": 0.2266, "step": 8600 }, { "epoch": 0.24, "learning_rate": 2.7573972794839435e-05, "loss": 0.2259, "step": 8650 }, { "epoch": 0.24, "learning_rate": 2.755994951619689e-05, "loss": 0.1985, "step": 8700 }, { "epoch": 0.25, "learning_rate": 2.754592623755434e-05, "loss": 0.2023, "step": 8750 }, { "epoch": 0.25, "learning_rate": 2.7531902958911795e-05, "loss": 0.2297, "step": 8800 }, { "epoch": 0.25, "learning_rate": 2.751787968026925e-05, "loss": 0.2179, "step": 8850 }, { "epoch": 0.25, "learning_rate": 2.7503856401626702e-05, "loss": 0.1993, "step": 8900 }, { "epoch": 0.25, "learning_rate": 2.7489833122984155e-05, "loss": 0.1946, "step": 8950 }, { "epoch": 0.25, "learning_rate": 2.7475809844341605e-05, "loss": 0.195, "step": 9000 }, { "epoch": 0.25, "eval_bleu": 95.1941, "eval_gen_len": 64.634, "eval_loss": 0.3037899434566498, "eval_rouge1": 92.7974, "eval_rouge2": 88.5146, "eval_rougeL": 92.5398, "eval_rougeLsum": 92.5031, "eval_runtime": 3164.9424, "eval_samples_per_second": 0.948, "eval_steps_per_second": 0.237, "step": 9000 }, { "epoch": 0.25, "learning_rate": 2.7461786565699062e-05, "loss": 0.2171, "step": 9050 }, { "epoch": 0.26, "learning_rate": 2.7447763287056515e-05, "loss": 0.2282, "step": 9100 }, { "epoch": 0.26, "learning_rate": 2.7433740008413965e-05, "loss": 0.2159, "step": 9150 }, { "epoch": 0.26, "learning_rate": 2.7419716729771422e-05, "loss": 0.221, "step": 9200 }, { "epoch": 0.26, "learning_rate": 2.7405693451128875e-05, "loss": 0.2357, "step": 9250 }, { "epoch": 0.26, "learning_rate": 2.739167017248633e-05, "loss": 0.221, "step": 9300 }, { "epoch": 0.26, "learning_rate": 2.7377646893843782e-05, "loss": 0.2064, "step": 9350 }, { "epoch": 0.26, "learning_rate": 2.7363623615201232e-05, "loss": 0.2112, "step": 9400 }, { "epoch": 0.27, "learning_rate": 2.734960033655869e-05, "loss": 0.2489, "step": 9450 }, { "epoch": 0.27, "learning_rate": 2.7335577057916142e-05, "loss": 0.2209, "step": 9500 }, { "epoch": 0.27, "learning_rate": 2.7321553779273592e-05, "loss": 0.2435, "step": 9550 }, { "epoch": 0.27, "learning_rate": 2.730753050063105e-05, "loss": 0.2135, "step": 9600 }, { "epoch": 0.27, "learning_rate": 2.7293507221988502e-05, "loss": 0.2398, "step": 9650 }, { "epoch": 0.27, "learning_rate": 2.7279483943345956e-05, "loss": 0.2005, "step": 9700 }, { "epoch": 0.27, "learning_rate": 2.726546066470341e-05, "loss": 0.2122, "step": 9750 }, { "epoch": 0.27, "learning_rate": 2.725143738606086e-05, "loss": 0.2027, "step": 9800 }, { "epoch": 0.28, "learning_rate": 2.7237414107418316e-05, "loss": 0.199, "step": 9850 }, { "epoch": 0.28, "learning_rate": 2.722339082877577e-05, "loss": 0.222, "step": 9900 }, { "epoch": 0.28, "learning_rate": 2.7209367550133222e-05, "loss": 0.2185, "step": 9950 }, { "epoch": 0.28, "learning_rate": 2.7195344271490676e-05, "loss": 0.1982, "step": 10000 }, { "epoch": 0.28, "eval_bleu": 95.2522, "eval_gen_len": 64.626, "eval_loss": 0.3007320463657379, "eval_rouge1": 92.7899, "eval_rouge2": 88.4907, "eval_rougeL": 92.5338, "eval_rougeLsum": 92.4971, "eval_runtime": 3103.217, "eval_samples_per_second": 0.967, "eval_steps_per_second": 0.242, "step": 10000 }, { "epoch": 0.28, "learning_rate": 2.718132099284813e-05, "loss": 0.2071, "step": 10050 }, { "epoch": 0.28, "learning_rate": 2.7167297714205582e-05, "loss": 0.204, "step": 10100 }, { "epoch": 0.28, "learning_rate": 2.7153274435563036e-05, "loss": 0.2138, "step": 10150 }, { "epoch": 0.29, "learning_rate": 2.7139251156920486e-05, "loss": 0.2045, "step": 10200 }, { "epoch": 0.29, "learning_rate": 2.7125227878277942e-05, "loss": 0.2005, "step": 10250 }, { "epoch": 0.29, "learning_rate": 2.7111204599635396e-05, "loss": 0.2392, "step": 10300 }, { "epoch": 0.29, "learning_rate": 2.709718132099285e-05, "loss": 0.2126, "step": 10350 }, { "epoch": 0.29, "learning_rate": 2.7083158042350303e-05, "loss": 0.2097, "step": 10400 }, { "epoch": 0.29, "learning_rate": 2.7069134763707756e-05, "loss": 0.2197, "step": 10450 }, { "epoch": 0.29, "learning_rate": 2.705511148506521e-05, "loss": 0.2013, "step": 10500 }, { "epoch": 0.3, "learning_rate": 2.7041088206422663e-05, "loss": 0.2208, "step": 10550 }, { "epoch": 0.3, "learning_rate": 2.7027064927780113e-05, "loss": 0.1938, "step": 10600 }, { "epoch": 0.3, "learning_rate": 2.701304164913757e-05, "loss": 0.2048, "step": 10650 }, { "epoch": 0.3, "learning_rate": 2.6999018370495023e-05, "loss": 0.2065, "step": 10700 }, { "epoch": 0.3, "learning_rate": 2.6984995091852476e-05, "loss": 0.2075, "step": 10750 }, { "epoch": 0.3, "learning_rate": 2.697097181320993e-05, "loss": 0.2279, "step": 10800 }, { "epoch": 0.3, "learning_rate": 2.6956948534567383e-05, "loss": 0.2072, "step": 10850 }, { "epoch": 0.31, "learning_rate": 2.6942925255924836e-05, "loss": 0.1998, "step": 10900 }, { "epoch": 0.31, "learning_rate": 2.692890197728229e-05, "loss": 0.1904, "step": 10950 }, { "epoch": 0.31, "learning_rate": 2.691487869863974e-05, "loss": 0.1762, "step": 11000 }, { "epoch": 0.31, "eval_bleu": 95.3107, "eval_gen_len": 64.546, "eval_loss": 0.29688259959220886, "eval_rouge1": 92.939, "eval_rouge2": 88.7461, "eval_rougeL": 92.6735, "eval_rougeLsum": 92.6483, "eval_runtime": 3120.8781, "eval_samples_per_second": 0.961, "eval_steps_per_second": 0.24, "step": 11000 }, { "epoch": 0.31, "learning_rate": 2.6900855419997196e-05, "loss": 0.2241, "step": 11050 }, { "epoch": 0.31, "learning_rate": 2.688683214135465e-05, "loss": 0.2087, "step": 11100 }, { "epoch": 0.31, "learning_rate": 2.6872808862712103e-05, "loss": 0.1918, "step": 11150 }, { "epoch": 0.31, "learning_rate": 2.6858785584069556e-05, "loss": 0.2117, "step": 11200 }, { "epoch": 0.32, "learning_rate": 2.684476230542701e-05, "loss": 0.213, "step": 11250 }, { "epoch": 0.32, "learning_rate": 2.6830739026784463e-05, "loss": 0.2338, "step": 11300 }, { "epoch": 0.32, "learning_rate": 2.6816715748141916e-05, "loss": 0.2056, "step": 11350 }, { "epoch": 0.32, "learning_rate": 2.680269246949937e-05, "loss": 0.2087, "step": 11400 }, { "epoch": 0.32, "learning_rate": 2.6788669190856823e-05, "loss": 0.2014, "step": 11450 }, { "epoch": 0.32, "learning_rate": 2.6774645912214276e-05, "loss": 0.2464, "step": 11500 }, { "epoch": 0.32, "learning_rate": 2.676062263357173e-05, "loss": 0.2132, "step": 11550 }, { "epoch": 0.33, "learning_rate": 2.6746599354929183e-05, "loss": 0.2199, "step": 11600 }, { "epoch": 0.33, "learning_rate": 2.6732576076286636e-05, "loss": 0.187, "step": 11650 }, { "epoch": 0.33, "learning_rate": 2.671855279764409e-05, "loss": 0.2053, "step": 11700 }, { "epoch": 0.33, "learning_rate": 2.6704529519001543e-05, "loss": 0.1936, "step": 11750 }, { "epoch": 0.33, "learning_rate": 2.6690506240358997e-05, "loss": 0.2214, "step": 11800 }, { "epoch": 0.33, "learning_rate": 2.667648296171645e-05, "loss": 0.2056, "step": 11850 }, { "epoch": 0.33, "learning_rate": 2.6662459683073903e-05, "loss": 0.181, "step": 11900 }, { "epoch": 0.34, "learning_rate": 2.6648436404431357e-05, "loss": 0.2127, "step": 11950 }, { "epoch": 0.34, "learning_rate": 2.663441312578881e-05, "loss": 0.1798, "step": 12000 }, { "epoch": 0.34, "eval_bleu": 95.3546, "eval_gen_len": 64.4707, "eval_loss": 0.2918665111064911, "eval_rouge1": 92.9828, "eval_rouge2": 88.8459, "eval_rougeL": 92.7418, "eval_rougeLsum": 92.6996, "eval_runtime": 3099.2674, "eval_samples_per_second": 0.968, "eval_steps_per_second": 0.242, "step": 12000 }, { "epoch": 0.34, "learning_rate": 2.6620389847146263e-05, "loss": 0.207, "step": 12050 }, { "epoch": 0.34, "learning_rate": 2.6606366568503717e-05, "loss": 0.1874, "step": 12100 }, { "epoch": 0.34, "learning_rate": 2.659234328986117e-05, "loss": 0.1933, "step": 12150 }, { "epoch": 0.34, "learning_rate": 2.6578320011218623e-05, "loss": 0.2117, "step": 12200 }, { "epoch": 0.34, "learning_rate": 2.6564296732576077e-05, "loss": 0.2147, "step": 12250 }, { "epoch": 0.34, "learning_rate": 2.655027345393353e-05, "loss": 0.2206, "step": 12300 }, { "epoch": 0.35, "learning_rate": 2.6536250175290983e-05, "loss": 0.2034, "step": 12350 }, { "epoch": 0.35, "learning_rate": 2.6522226896648437e-05, "loss": 0.2019, "step": 12400 }, { "epoch": 0.35, "learning_rate": 2.650820361800589e-05, "loss": 0.1982, "step": 12450 }, { "epoch": 0.35, "learning_rate": 2.6494180339363344e-05, "loss": 0.1851, "step": 12500 }, { "epoch": 0.35, "learning_rate": 2.6480157060720797e-05, "loss": 0.2089, "step": 12550 }, { "epoch": 0.35, "learning_rate": 2.646613378207825e-05, "loss": 0.1985, "step": 12600 }, { "epoch": 0.35, "learning_rate": 2.6452110503435704e-05, "loss": 0.1879, "step": 12650 }, { "epoch": 0.36, "learning_rate": 2.6438087224793157e-05, "loss": 0.2285, "step": 12700 }, { "epoch": 0.36, "learning_rate": 2.642406394615061e-05, "loss": 0.2068, "step": 12750 }, { "epoch": 0.36, "learning_rate": 2.6410040667508064e-05, "loss": 0.2248, "step": 12800 }, { "epoch": 0.36, "learning_rate": 2.6396017388865517e-05, "loss": 0.2017, "step": 12850 }, { "epoch": 0.36, "learning_rate": 2.638199411022297e-05, "loss": 0.2064, "step": 12900 }, { "epoch": 0.36, "learning_rate": 2.6367970831580424e-05, "loss": 0.1967, "step": 12950 }, { "epoch": 0.36, "learning_rate": 2.6353947552937877e-05, "loss": 0.2037, "step": 13000 }, { "epoch": 0.36, "eval_bleu": 95.472, "eval_gen_len": 64.5277, "eval_loss": 0.28798067569732666, "eval_rouge1": 93.1329, "eval_rouge2": 89.0529, "eval_rougeL": 92.8733, "eval_rougeLsum": 92.8293, "eval_runtime": 3148.5331, "eval_samples_per_second": 0.953, "eval_steps_per_second": 0.238, "step": 13000 }, { "epoch": 0.37, "learning_rate": 2.633992427429533e-05, "loss": 0.1902, "step": 13050 }, { "epoch": 0.37, "learning_rate": 2.6325900995652784e-05, "loss": 0.1787, "step": 13100 }, { "epoch": 0.37, "learning_rate": 2.6311877717010237e-05, "loss": 0.1822, "step": 13150 }, { "epoch": 0.37, "learning_rate": 2.629785443836769e-05, "loss": 0.2024, "step": 13200 }, { "epoch": 0.37, "learning_rate": 2.6283831159725147e-05, "loss": 0.2068, "step": 13250 }, { "epoch": 0.37, "learning_rate": 2.6269807881082597e-05, "loss": 0.2084, "step": 13300 }, { "epoch": 0.37, "learning_rate": 2.625578460244005e-05, "loss": 0.1967, "step": 13350 }, { "epoch": 0.38, "learning_rate": 2.6241761323797504e-05, "loss": 0.1902, "step": 13400 }, { "epoch": 0.38, "learning_rate": 2.6227738045154957e-05, "loss": 0.1975, "step": 13450 }, { "epoch": 0.38, "learning_rate": 2.621371476651241e-05, "loss": 0.2069, "step": 13500 }, { "epoch": 0.38, "learning_rate": 2.6199691487869864e-05, "loss": 0.2188, "step": 13550 }, { "epoch": 0.38, "learning_rate": 2.6185668209227317e-05, "loss": 0.2088, "step": 13600 }, { "epoch": 0.38, "learning_rate": 2.6171644930584774e-05, "loss": 0.1875, "step": 13650 }, { "epoch": 0.38, "learning_rate": 2.6157621651942224e-05, "loss": 0.1881, "step": 13700 }, { "epoch": 0.39, "learning_rate": 2.6143598373299678e-05, "loss": 0.1907, "step": 13750 }, { "epoch": 0.39, "learning_rate": 2.612957509465713e-05, "loss": 0.1968, "step": 13800 }, { "epoch": 0.39, "learning_rate": 2.6115551816014584e-05, "loss": 0.2087, "step": 13850 }, { "epoch": 0.39, "learning_rate": 2.6101528537372038e-05, "loss": 0.1805, "step": 13900 }, { "epoch": 0.39, "learning_rate": 2.608750525872949e-05, "loss": 0.1897, "step": 13950 }, { "epoch": 0.39, "learning_rate": 2.6073481980086944e-05, "loss": 0.2091, "step": 14000 }, { "epoch": 0.39, "eval_bleu": 95.499, "eval_gen_len": 64.423, "eval_loss": 0.2838546335697174, "eval_rouge1": 93.0724, "eval_rouge2": 89.0302, "eval_rougeL": 92.8261, "eval_rougeLsum": 92.7827, "eval_runtime": 3139.4968, "eval_samples_per_second": 0.956, "eval_steps_per_second": 0.239, "step": 14000 }, { "epoch": 0.39, "learning_rate": 2.60594587014444e-05, "loss": 0.1831, "step": 14050 }, { "epoch": 0.4, "learning_rate": 2.604543542280185e-05, "loss": 0.1918, "step": 14100 }, { "epoch": 0.4, "learning_rate": 2.6031412144159304e-05, "loss": 0.1973, "step": 14150 }, { "epoch": 0.4, "learning_rate": 2.6017388865516758e-05, "loss": 0.1948, "step": 14200 }, { "epoch": 0.4, "learning_rate": 2.600336558687421e-05, "loss": 0.202, "step": 14250 }, { "epoch": 0.4, "learning_rate": 2.5989342308231664e-05, "loss": 0.2092, "step": 14300 }, { "epoch": 0.4, "learning_rate": 2.5975319029589118e-05, "loss": 0.189, "step": 14350 }, { "epoch": 0.4, "learning_rate": 2.596129575094657e-05, "loss": 0.2014, "step": 14400 }, { "epoch": 0.41, "learning_rate": 2.5947272472304028e-05, "loss": 0.1835, "step": 14450 }, { "epoch": 0.41, "learning_rate": 2.5933249193661478e-05, "loss": 0.1941, "step": 14500 }, { "epoch": 0.41, "learning_rate": 2.591922591501893e-05, "loss": 0.2027, "step": 14550 }, { "epoch": 0.41, "learning_rate": 2.5905202636376385e-05, "loss": 0.1794, "step": 14600 }, { "epoch": 0.41, "learning_rate": 2.5891179357733838e-05, "loss": 0.2, "step": 14650 }, { "epoch": 0.41, "learning_rate": 2.5877156079091295e-05, "loss": 0.2091, "step": 14700 }, { "epoch": 0.41, "learning_rate": 2.5863132800448745e-05, "loss": 0.1813, "step": 14750 }, { "epoch": 0.42, "learning_rate": 2.5849109521806198e-05, "loss": 0.1793, "step": 14800 }, { "epoch": 0.42, "learning_rate": 2.5835086243163655e-05, "loss": 0.1852, "step": 14850 }, { "epoch": 0.42, "learning_rate": 2.5821062964521105e-05, "loss": 0.2122, "step": 14900 }, { "epoch": 0.42, "learning_rate": 2.5807039685878558e-05, "loss": 0.1758, "step": 14950 }, { "epoch": 0.42, "learning_rate": 2.579301640723601e-05, "loss": 0.1998, "step": 15000 }, { "epoch": 0.42, "eval_bleu": 95.5491, "eval_gen_len": 64.419, "eval_loss": 0.28141552209854126, "eval_rouge1": 93.1634, "eval_rouge2": 89.1339, "eval_rougeL": 92.9186, "eval_rougeLsum": 92.904, "eval_runtime": 3158.2947, "eval_samples_per_second": 0.95, "eval_steps_per_second": 0.237, "step": 15000 }, { "epoch": 0.42, "learning_rate": 2.5778993128593465e-05, "loss": 0.1902, "step": 15050 }, { "epoch": 0.42, "learning_rate": 2.576496984995092e-05, "loss": 0.1829, "step": 15100 }, { "epoch": 0.42, "learning_rate": 2.575094657130837e-05, "loss": 0.2212, "step": 15150 }, { "epoch": 0.43, "learning_rate": 2.5736923292665825e-05, "loss": 0.1878, "step": 15200 }, { "epoch": 0.43, "learning_rate": 2.572290001402328e-05, "loss": 0.2088, "step": 15250 }, { "epoch": 0.43, "learning_rate": 2.570887673538073e-05, "loss": 0.217, "step": 15300 }, { "epoch": 0.43, "learning_rate": 2.5694853456738185e-05, "loss": 0.1859, "step": 15350 }, { "epoch": 0.43, "learning_rate": 2.568083017809564e-05, "loss": 0.1975, "step": 15400 }, { "epoch": 0.43, "learning_rate": 2.566680689945309e-05, "loss": 0.1848, "step": 15450 }, { "epoch": 0.43, "learning_rate": 2.565278362081055e-05, "loss": 0.2097, "step": 15500 }, { "epoch": 0.44, "learning_rate": 2.5638760342168e-05, "loss": 0.1984, "step": 15550 }, { "epoch": 0.44, "learning_rate": 2.5624737063525452e-05, "loss": 0.1905, "step": 15600 }, { "epoch": 0.44, "learning_rate": 2.561071378488291e-05, "loss": 0.2155, "step": 15650 }, { "epoch": 0.44, "learning_rate": 2.559669050624036e-05, "loss": 0.1933, "step": 15700 }, { "epoch": 0.44, "learning_rate": 2.5582667227597815e-05, "loss": 0.1853, "step": 15750 }, { "epoch": 0.44, "learning_rate": 2.5568643948955265e-05, "loss": 0.1849, "step": 15800 }, { "epoch": 0.44, "learning_rate": 2.555462067031272e-05, "loss": 0.179, "step": 15850 }, { "epoch": 0.45, "learning_rate": 2.5540597391670175e-05, "loss": 0.1907, "step": 15900 }, { "epoch": 0.45, "learning_rate": 2.5526574113027625e-05, "loss": 0.2032, "step": 15950 }, { "epoch": 0.45, "learning_rate": 2.551255083438508e-05, "loss": 0.1855, "step": 16000 }, { "epoch": 0.45, "eval_bleu": 95.5711, "eval_gen_len": 64.4307, "eval_loss": 0.27981552481651306, "eval_rouge1": 93.2066, "eval_rouge2": 89.2588, "eval_rougeL": 92.9732, "eval_rougeLsum": 92.9506, "eval_runtime": 3083.9167, "eval_samples_per_second": 0.973, "eval_steps_per_second": 0.243, "step": 16000 }, { "epoch": 0.45, "learning_rate": 2.5498527555742535e-05, "loss": 0.19, "step": 16050 }, { "epoch": 0.45, "learning_rate": 2.5484504277099985e-05, "loss": 0.1877, "step": 16100 }, { "epoch": 0.45, "learning_rate": 2.5470480998457442e-05, "loss": 0.1868, "step": 16150 }, { "epoch": 0.45, "learning_rate": 2.5456457719814892e-05, "loss": 0.179, "step": 16200 }, { "epoch": 0.46, "learning_rate": 2.5442434441172345e-05, "loss": 0.2247, "step": 16250 }, { "epoch": 0.46, "learning_rate": 2.5428411162529802e-05, "loss": 0.1745, "step": 16300 }, { "epoch": 0.46, "learning_rate": 2.5414387883887252e-05, "loss": 0.2015, "step": 16350 }, { "epoch": 0.46, "learning_rate": 2.5400364605244705e-05, "loss": 0.212, "step": 16400 }, { "epoch": 0.46, "learning_rate": 2.5386341326602162e-05, "loss": 0.2018, "step": 16450 }, { "epoch": 0.46, "learning_rate": 2.5372318047959612e-05, "loss": 0.1888, "step": 16500 }, { "epoch": 0.46, "learning_rate": 2.535829476931707e-05, "loss": 0.1806, "step": 16550 }, { "epoch": 0.47, "learning_rate": 2.534427149067452e-05, "loss": 0.1791, "step": 16600 }, { "epoch": 0.47, "learning_rate": 2.5330248212031972e-05, "loss": 0.1949, "step": 16650 }, { "epoch": 0.47, "learning_rate": 2.531622493338943e-05, "loss": 0.1858, "step": 16700 }, { "epoch": 0.47, "learning_rate": 2.530220165474688e-05, "loss": 0.1879, "step": 16750 }, { "epoch": 0.47, "learning_rate": 2.5288178376104332e-05, "loss": 0.1936, "step": 16800 }, { "epoch": 0.47, "learning_rate": 2.527415509746179e-05, "loss": 0.1692, "step": 16850 }, { "epoch": 0.47, "learning_rate": 2.526013181881924e-05, "loss": 0.1852, "step": 16900 }, { "epoch": 0.48, "learning_rate": 2.5246108540176696e-05, "loss": 0.2128, "step": 16950 }, { "epoch": 0.48, "learning_rate": 2.5232085261534146e-05, "loss": 0.184, "step": 17000 }, { "epoch": 0.48, "eval_bleu": 95.6325, "eval_gen_len": 64.3623, "eval_loss": 0.2761005759239197, "eval_rouge1": 93.269, "eval_rouge2": 89.3661, "eval_rougeL": 93.0637, "eval_rougeLsum": 93.0433, "eval_runtime": 3126.3939, "eval_samples_per_second": 0.96, "eval_steps_per_second": 0.24, "step": 17000 }, { "epoch": 0.48, "learning_rate": 2.52180619828916e-05, "loss": 0.2114, "step": 17050 }, { "epoch": 0.48, "learning_rate": 2.5204038704249056e-05, "loss": 0.1933, "step": 17100 }, { "epoch": 0.48, "learning_rate": 2.5190015425606506e-05, "loss": 0.1978, "step": 17150 }, { "epoch": 0.48, "learning_rate": 2.5175992146963963e-05, "loss": 0.2033, "step": 17200 }, { "epoch": 0.48, "learning_rate": 2.5161968868321416e-05, "loss": 0.1854, "step": 17250 }, { "epoch": 0.49, "learning_rate": 2.5147945589678866e-05, "loss": 0.1792, "step": 17300 }, { "epoch": 0.49, "learning_rate": 2.5133922311036323e-05, "loss": 0.1804, "step": 17350 }, { "epoch": 0.49, "learning_rate": 2.5119899032393773e-05, "loss": 0.1901, "step": 17400 }, { "epoch": 0.49, "learning_rate": 2.5105875753751226e-05, "loss": 0.2045, "step": 17450 }, { "epoch": 0.49, "learning_rate": 2.5091852475108683e-05, "loss": 0.1796, "step": 17500 }, { "epoch": 0.49, "learning_rate": 2.5077829196466133e-05, "loss": 0.1763, "step": 17550 }, { "epoch": 0.49, "learning_rate": 2.506380591782359e-05, "loss": 0.1882, "step": 17600 }, { "epoch": 0.5, "learning_rate": 2.5049782639181043e-05, "loss": 0.191, "step": 17650 }, { "epoch": 0.5, "learning_rate": 2.5035759360538493e-05, "loss": 0.1646, "step": 17700 }, { "epoch": 0.5, "learning_rate": 2.502173608189595e-05, "loss": 0.1898, "step": 17750 }, { "epoch": 0.5, "learning_rate": 2.50077128032534e-05, "loss": 0.1717, "step": 17800 }, { "epoch": 0.5, "learning_rate": 2.4993689524610853e-05, "loss": 0.1676, "step": 17850 }, { "epoch": 0.5, "learning_rate": 2.497966624596831e-05, "loss": 0.1978, "step": 17900 }, { "epoch": 0.5, "learning_rate": 2.496564296732576e-05, "loss": 0.216, "step": 17950 }, { "epoch": 0.5, "learning_rate": 2.4951619688683216e-05, "loss": 0.2025, "step": 18000 }, { "epoch": 0.5, "eval_bleu": 95.6412, "eval_gen_len": 64.4067, "eval_loss": 0.27226048707962036, "eval_rouge1": 93.2987, "eval_rouge2": 89.3773, "eval_rougeL": 93.084, "eval_rougeLsum": 93.0595, "eval_runtime": 3091.5775, "eval_samples_per_second": 0.97, "eval_steps_per_second": 0.243, "step": 18000 }, { "epoch": 0.51, "learning_rate": 2.493759641004067e-05, "loss": 0.1948, "step": 18050 }, { "epoch": 0.51, "learning_rate": 2.492357313139812e-05, "loss": 0.1896, "step": 18100 }, { "epoch": 0.51, "learning_rate": 2.4909549852755576e-05, "loss": 0.1767, "step": 18150 }, { "epoch": 0.51, "learning_rate": 2.4895526574113026e-05, "loss": 0.2107, "step": 18200 }, { "epoch": 0.51, "learning_rate": 2.488150329547048e-05, "loss": 0.1878, "step": 18250 }, { "epoch": 0.51, "learning_rate": 2.4867480016827936e-05, "loss": 0.1735, "step": 18300 }, { "epoch": 0.51, "learning_rate": 2.4853456738185386e-05, "loss": 0.1607, "step": 18350 }, { "epoch": 0.52, "learning_rate": 2.4839433459542843e-05, "loss": 0.1611, "step": 18400 }, { "epoch": 0.52, "learning_rate": 2.4825410180900297e-05, "loss": 0.1889, "step": 18450 }, { "epoch": 0.52, "learning_rate": 2.4811386902257746e-05, "loss": 0.1962, "step": 18500 }, { "epoch": 0.52, "learning_rate": 2.4797363623615203e-05, "loss": 0.1767, "step": 18550 }, { "epoch": 0.52, "learning_rate": 2.4783340344972653e-05, "loss": 0.2027, "step": 18600 }, { "epoch": 0.52, "learning_rate": 2.476931706633011e-05, "loss": 0.1994, "step": 18650 }, { "epoch": 0.52, "learning_rate": 2.4755293787687563e-05, "loss": 0.1725, "step": 18700 }, { "epoch": 0.53, "learning_rate": 2.4741270509045013e-05, "loss": 0.1931, "step": 18750 }, { "epoch": 0.53, "learning_rate": 2.472724723040247e-05, "loss": 0.1897, "step": 18800 }, { "epoch": 0.53, "learning_rate": 2.4713223951759923e-05, "loss": 0.191, "step": 18850 }, { "epoch": 0.53, "learning_rate": 2.4699200673117373e-05, "loss": 0.1868, "step": 18900 }, { "epoch": 0.53, "learning_rate": 2.468517739447483e-05, "loss": 0.1738, "step": 18950 }, { "epoch": 0.53, "learning_rate": 2.467115411583228e-05, "loss": 0.1808, "step": 19000 }, { "epoch": 0.53, "eval_bleu": 95.6749, "eval_gen_len": 64.317, "eval_loss": 0.2700382173061371, "eval_rouge1": 93.4064, "eval_rouge2": 89.5527, "eval_rougeL": 93.2047, "eval_rougeLsum": 93.1846, "eval_runtime": 3097.9008, "eval_samples_per_second": 0.968, "eval_steps_per_second": 0.242, "step": 19000 }, { "epoch": 0.53, "learning_rate": 2.4657130837189737e-05, "loss": 0.1747, "step": 19050 }, { "epoch": 0.54, "learning_rate": 2.464310755854719e-05, "loss": 0.189, "step": 19100 }, { "epoch": 0.54, "learning_rate": 2.462908427990464e-05, "loss": 0.172, "step": 19150 }, { "epoch": 0.54, "learning_rate": 2.4615061001262097e-05, "loss": 0.1793, "step": 19200 }, { "epoch": 0.54, "learning_rate": 2.460103772261955e-05, "loss": 0.1855, "step": 19250 }, { "epoch": 0.54, "learning_rate": 2.4587014443977e-05, "loss": 0.1845, "step": 19300 }, { "epoch": 0.54, "learning_rate": 2.4572991165334457e-05, "loss": 0.1907, "step": 19350 }, { "epoch": 0.54, "learning_rate": 2.4558967886691907e-05, "loss": 0.1713, "step": 19400 }, { "epoch": 0.55, "learning_rate": 2.4544944608049364e-05, "loss": 0.1983, "step": 19450 }, { "epoch": 0.55, "learning_rate": 2.4530921329406817e-05, "loss": 0.1654, "step": 19500 }, { "epoch": 0.55, "learning_rate": 2.4516898050764267e-05, "loss": 0.2049, "step": 19550 }, { "epoch": 0.55, "learning_rate": 2.4502874772121724e-05, "loss": 0.1826, "step": 19600 }, { "epoch": 0.55, "learning_rate": 2.4488851493479177e-05, "loss": 0.1622, "step": 19650 }, { "epoch": 0.55, "learning_rate": 2.4474828214836627e-05, "loss": 0.196, "step": 19700 }, { "epoch": 0.55, "learning_rate": 2.4460804936194084e-05, "loss": 0.1805, "step": 19750 }, { "epoch": 0.56, "learning_rate": 2.4446781657551534e-05, "loss": 0.176, "step": 19800 }, { "epoch": 0.56, "learning_rate": 2.443275837890899e-05, "loss": 0.1756, "step": 19850 }, { "epoch": 0.56, "learning_rate": 2.4418735100266444e-05, "loss": 0.2069, "step": 19900 }, { "epoch": 0.56, "learning_rate": 2.4404711821623894e-05, "loss": 0.2099, "step": 19950 }, { "epoch": 0.56, "learning_rate": 2.439068854298135e-05, "loss": 0.1844, "step": 20000 }, { "epoch": 0.56, "eval_bleu": 95.7094, "eval_gen_len": 64.34, "eval_loss": 0.2679564356803894, "eval_rouge1": 93.3936, "eval_rouge2": 89.5638, "eval_rougeL": 93.2067, "eval_rougeLsum": 93.1818, "eval_runtime": 3170.5236, "eval_samples_per_second": 0.946, "eval_steps_per_second": 0.237, "step": 20000 }, { "epoch": 0.56, "learning_rate": 2.4376665264338804e-05, "loss": 0.1718, "step": 20050 }, { "epoch": 0.56, "learning_rate": 2.4362641985696257e-05, "loss": 0.1938, "step": 20100 }, { "epoch": 0.57, "learning_rate": 2.434861870705371e-05, "loss": 0.1808, "step": 20150 }, { "epoch": 0.57, "learning_rate": 2.433459542841116e-05, "loss": 0.1805, "step": 20200 }, { "epoch": 0.57, "learning_rate": 2.4320572149768617e-05, "loss": 0.1761, "step": 20250 }, { "epoch": 0.57, "learning_rate": 2.430654887112607e-05, "loss": 0.1826, "step": 20300 }, { "epoch": 0.57, "learning_rate": 2.429252559248352e-05, "loss": 0.1822, "step": 20350 }, { "epoch": 0.57, "learning_rate": 2.4278502313840977e-05, "loss": 0.1838, "step": 20400 }, { "epoch": 0.57, "learning_rate": 2.426447903519843e-05, "loss": 0.1723, "step": 20450 }, { "epoch": 0.57, "learning_rate": 2.4250455756555884e-05, "loss": 0.1816, "step": 20500 }, { "epoch": 0.58, "learning_rate": 2.4236432477913338e-05, "loss": 0.1973, "step": 20550 }, { "epoch": 0.58, "learning_rate": 2.4222409199270788e-05, "loss": 0.2059, "step": 20600 }, { "epoch": 0.58, "learning_rate": 2.4208385920628244e-05, "loss": 0.1743, "step": 20650 }, { "epoch": 0.58, "learning_rate": 2.4194362641985698e-05, "loss": 0.2011, "step": 20700 }, { "epoch": 0.58, "learning_rate": 2.4180339363343148e-05, "loss": 0.1914, "step": 20750 }, { "epoch": 0.58, "learning_rate": 2.4166316084700604e-05, "loss": 0.1716, "step": 20800 }, { "epoch": 0.58, "learning_rate": 2.4152292806058058e-05, "loss": 0.179, "step": 20850 }, { "epoch": 0.59, "learning_rate": 2.413826952741551e-05, "loss": 0.2066, "step": 20900 }, { "epoch": 0.59, "learning_rate": 2.4124246248772964e-05, "loss": 0.1964, "step": 20950 }, { "epoch": 0.59, "learning_rate": 2.4110222970130414e-05, "loss": 0.1892, "step": 21000 }, { "epoch": 0.59, "eval_bleu": 95.7337, "eval_gen_len": 64.2923, "eval_loss": 0.2668701708316803, "eval_rouge1": 93.4236, "eval_rouge2": 89.643, "eval_rougeL": 93.2265, "eval_rougeLsum": 93.2057, "eval_runtime": 3109.7477, "eval_samples_per_second": 0.965, "eval_steps_per_second": 0.241, "step": 21000 }, { "epoch": 0.59, "learning_rate": 2.409619969148787e-05, "loss": 0.1852, "step": 21050 }, { "epoch": 0.59, "learning_rate": 2.4082176412845324e-05, "loss": 0.1833, "step": 21100 }, { "epoch": 0.59, "learning_rate": 2.4068153134202774e-05, "loss": 0.1668, "step": 21150 }, { "epoch": 0.59, "learning_rate": 2.405412985556023e-05, "loss": 0.1971, "step": 21200 }, { "epoch": 0.6, "learning_rate": 2.4040106576917685e-05, "loss": 0.1819, "step": 21250 }, { "epoch": 0.6, "learning_rate": 2.4026083298275138e-05, "loss": 0.1922, "step": 21300 }, { "epoch": 0.6, "learning_rate": 2.401206001963259e-05, "loss": 0.1799, "step": 21350 }, { "epoch": 0.6, "learning_rate": 2.399803674099004e-05, "loss": 0.1892, "step": 21400 }, { "epoch": 0.6, "learning_rate": 2.3984013462347498e-05, "loss": 0.1631, "step": 21450 }, { "epoch": 0.6, "learning_rate": 2.396999018370495e-05, "loss": 0.1728, "step": 21500 }, { "epoch": 0.6, "learning_rate": 2.3955966905062405e-05, "loss": 0.1833, "step": 21550 }, { "epoch": 0.61, "learning_rate": 2.3941943626419858e-05, "loss": 0.1988, "step": 21600 }, { "epoch": 0.61, "learning_rate": 2.392792034777731e-05, "loss": 0.176, "step": 21650 }, { "epoch": 0.61, "learning_rate": 2.3913897069134765e-05, "loss": 0.1664, "step": 21700 }, { "epoch": 0.61, "learning_rate": 2.3899873790492218e-05, "loss": 0.1749, "step": 21750 }, { "epoch": 0.61, "learning_rate": 2.3885850511849668e-05, "loss": 0.1856, "step": 21800 }, { "epoch": 0.61, "learning_rate": 2.3871827233207125e-05, "loss": 0.1845, "step": 21850 }, { "epoch": 0.61, "learning_rate": 2.3857803954564578e-05, "loss": 0.1823, "step": 21900 }, { "epoch": 0.62, "learning_rate": 2.384378067592203e-05, "loss": 0.1882, "step": 21950 }, { "epoch": 0.62, "learning_rate": 2.3829757397279485e-05, "loss": 0.1754, "step": 22000 }, { "epoch": 0.62, "eval_bleu": 95.7216, "eval_gen_len": 64.29, "eval_loss": 0.26628053188323975, "eval_rouge1": 93.494, "eval_rouge2": 89.7041, "eval_rougeL": 93.2775, "eval_rougeLsum": 93.255, "eval_runtime": 3108.636, "eval_samples_per_second": 0.965, "eval_steps_per_second": 0.241, "step": 22000 }, { "epoch": 0.62, "learning_rate": 2.3815734118636938e-05, "loss": 0.1727, "step": 22050 }, { "epoch": 0.62, "learning_rate": 2.380171083999439e-05, "loss": 0.2006, "step": 22100 }, { "epoch": 0.62, "learning_rate": 2.3787687561351845e-05, "loss": 0.1731, "step": 22150 }, { "epoch": 0.62, "learning_rate": 2.3773664282709295e-05, "loss": 0.1718, "step": 22200 }, { "epoch": 0.62, "learning_rate": 2.3759641004066752e-05, "loss": 0.1935, "step": 22250 }, { "epoch": 0.63, "learning_rate": 2.3745617725424205e-05, "loss": 0.1855, "step": 22300 }, { "epoch": 0.63, "learning_rate": 2.373159444678166e-05, "loss": 0.1788, "step": 22350 }, { "epoch": 0.63, "learning_rate": 2.3717571168139112e-05, "loss": 0.1722, "step": 22400 }, { "epoch": 0.63, "learning_rate": 2.3703547889496565e-05, "loss": 0.1685, "step": 22450 }, { "epoch": 0.63, "learning_rate": 2.368952461085402e-05, "loss": 0.1998, "step": 22500 }, { "epoch": 0.63, "learning_rate": 2.3675501332211472e-05, "loss": 0.1891, "step": 22550 }, { "epoch": 0.63, "learning_rate": 2.3661478053568922e-05, "loss": 0.1865, "step": 22600 }, { "epoch": 0.64, "learning_rate": 2.364745477492638e-05, "loss": 0.2003, "step": 22650 }, { "epoch": 0.64, "learning_rate": 2.3633431496283832e-05, "loss": 0.1833, "step": 22700 }, { "epoch": 0.64, "learning_rate": 2.3619408217641285e-05, "loss": 0.1709, "step": 22750 }, { "epoch": 0.64, "learning_rate": 2.360538493899874e-05, "loss": 0.1784, "step": 22800 }, { "epoch": 0.64, "learning_rate": 2.3591361660356192e-05, "loss": 0.1976, "step": 22850 }, { "epoch": 0.64, "learning_rate": 2.3577338381713645e-05, "loss": 0.2096, "step": 22900 }, { "epoch": 0.64, "learning_rate": 2.35633151030711e-05, "loss": 0.1795, "step": 22950 }, { "epoch": 0.65, "learning_rate": 2.3549291824428552e-05, "loss": 0.1843, "step": 23000 }, { "epoch": 0.65, "eval_bleu": 95.7705, "eval_gen_len": 64.281, "eval_loss": 0.263296902179718, "eval_rouge1": 93.5248, "eval_rouge2": 89.7445, "eval_rougeL": 93.3203, "eval_rougeLsum": 93.3217, "eval_runtime": 3090.4348, "eval_samples_per_second": 0.971, "eval_steps_per_second": 0.243, "step": 23000 }, { "epoch": 0.65, "learning_rate": 2.3535268545786005e-05, "loss": 0.171, "step": 23050 }, { "epoch": 0.65, "learning_rate": 2.352124526714346e-05, "loss": 0.1777, "step": 23100 }, { "epoch": 0.65, "learning_rate": 2.3507221988500912e-05, "loss": 0.2083, "step": 23150 }, { "epoch": 0.65, "learning_rate": 2.3493198709858365e-05, "loss": 0.163, "step": 23200 }, { "epoch": 0.65, "learning_rate": 2.347917543121582e-05, "loss": 0.1884, "step": 23250 }, { "epoch": 0.65, "learning_rate": 2.3465152152573272e-05, "loss": 0.1772, "step": 23300 }, { "epoch": 0.65, "learning_rate": 2.3451128873930726e-05, "loss": 0.1746, "step": 23350 }, { "epoch": 0.66, "learning_rate": 2.343710559528818e-05, "loss": 0.1971, "step": 23400 }, { "epoch": 0.66, "learning_rate": 2.3423082316645632e-05, "loss": 0.1697, "step": 23450 }, { "epoch": 0.66, "learning_rate": 2.3409059038003086e-05, "loss": 0.1635, "step": 23500 }, { "epoch": 0.66, "learning_rate": 2.339503575936054e-05, "loss": 0.1725, "step": 23550 }, { "epoch": 0.66, "learning_rate": 2.3381012480717992e-05, "loss": 0.1669, "step": 23600 }, { "epoch": 0.66, "learning_rate": 2.3366989202075446e-05, "loss": 0.1871, "step": 23650 }, { "epoch": 0.66, "learning_rate": 2.33529659234329e-05, "loss": 0.1641, "step": 23700 }, { "epoch": 0.67, "learning_rate": 2.3338942644790352e-05, "loss": 0.1909, "step": 23750 }, { "epoch": 0.67, "learning_rate": 2.3324919366147806e-05, "loss": 0.1782, "step": 23800 }, { "epoch": 0.67, "learning_rate": 2.331089608750526e-05, "loss": 0.1802, "step": 23850 }, { "epoch": 0.67, "learning_rate": 2.3296872808862713e-05, "loss": 0.1669, "step": 23900 }, { "epoch": 0.67, "learning_rate": 2.3282849530220166e-05, "loss": 0.1745, "step": 23950 }, { "epoch": 0.67, "learning_rate": 2.326882625157762e-05, "loss": 0.1607, "step": 24000 }, { "epoch": 0.67, "eval_bleu": 95.7802, "eval_gen_len": 64.2037, "eval_loss": 0.26373326778411865, "eval_rouge1": 93.5949, "eval_rouge2": 89.9557, "eval_rougeL": 93.4252, "eval_rougeLsum": 93.3955, "eval_runtime": 3090.8367, "eval_samples_per_second": 0.971, "eval_steps_per_second": 0.243, "step": 24000 }, { "epoch": 0.67, "learning_rate": 2.3254802972935073e-05, "loss": 0.1758, "step": 24050 }, { "epoch": 0.68, "learning_rate": 2.3240779694292526e-05, "loss": 0.1948, "step": 24100 }, { "epoch": 0.68, "learning_rate": 2.322675641564998e-05, "loss": 0.1836, "step": 24150 }, { "epoch": 0.68, "learning_rate": 2.3212733137007433e-05, "loss": 0.1999, "step": 24200 }, { "epoch": 0.68, "learning_rate": 2.3198709858364886e-05, "loss": 0.181, "step": 24250 }, { "epoch": 0.68, "learning_rate": 2.318468657972234e-05, "loss": 0.1729, "step": 24300 }, { "epoch": 0.68, "learning_rate": 2.3170663301079793e-05, "loss": 0.1814, "step": 24350 }, { "epoch": 0.68, "learning_rate": 2.3156640022437246e-05, "loss": 0.1751, "step": 24400 }, { "epoch": 0.69, "learning_rate": 2.3142616743794703e-05, "loss": 0.1655, "step": 24450 }, { "epoch": 0.69, "learning_rate": 2.3128593465152153e-05, "loss": 0.1742, "step": 24500 }, { "epoch": 0.69, "learning_rate": 2.3114570186509606e-05, "loss": 0.154, "step": 24550 }, { "epoch": 0.69, "learning_rate": 2.310054690786706e-05, "loss": 0.1758, "step": 24600 }, { "epoch": 0.69, "learning_rate": 2.3086523629224513e-05, "loss": 0.1921, "step": 24650 }, { "epoch": 0.69, "learning_rate": 2.3072500350581966e-05, "loss": 0.1803, "step": 24700 }, { "epoch": 0.69, "learning_rate": 2.305847707193942e-05, "loss": 0.2012, "step": 24750 }, { "epoch": 0.7, "learning_rate": 2.3044453793296873e-05, "loss": 0.163, "step": 24800 }, { "epoch": 0.7, "learning_rate": 2.303043051465433e-05, "loss": 0.1679, "step": 24850 }, { "epoch": 0.7, "learning_rate": 2.301640723601178e-05, "loss": 0.1846, "step": 24900 }, { "epoch": 0.7, "learning_rate": 2.3002383957369233e-05, "loss": 0.1813, "step": 24950 }, { "epoch": 0.7, "learning_rate": 2.2988360678726686e-05, "loss": 0.1583, "step": 25000 }, { "epoch": 0.7, "eval_bleu": 95.7853, "eval_gen_len": 64.1823, "eval_loss": 0.26211991906166077, "eval_rouge1": 93.6077, "eval_rouge2": 89.8774, "eval_rougeL": 93.4105, "eval_rougeLsum": 93.3911, "eval_runtime": 3088.3555, "eval_samples_per_second": 0.971, "eval_steps_per_second": 0.243, "step": 25000 }, { "epoch": 0.7, "learning_rate": 2.297433740008414e-05, "loss": 0.203, "step": 25050 }, { "epoch": 0.7, "learning_rate": 2.2960314121441593e-05, "loss": 0.1676, "step": 25100 }, { "epoch": 0.71, "learning_rate": 2.2946290842799046e-05, "loss": 0.1866, "step": 25150 }, { "epoch": 0.71, "learning_rate": 2.29322675641565e-05, "loss": 0.1928, "step": 25200 }, { "epoch": 0.71, "learning_rate": 2.2918244285513957e-05, "loss": 0.1836, "step": 25250 }, { "epoch": 0.71, "learning_rate": 2.2904221006871407e-05, "loss": 0.1706, "step": 25300 }, { "epoch": 0.71, "learning_rate": 2.289019772822886e-05, "loss": 0.2018, "step": 25350 }, { "epoch": 0.71, "learning_rate": 2.2876174449586313e-05, "loss": 0.1771, "step": 25400 }, { "epoch": 0.71, "learning_rate": 2.2862151170943767e-05, "loss": 0.1808, "step": 25450 }, { "epoch": 0.72, "learning_rate": 2.284812789230122e-05, "loss": 0.1968, "step": 25500 }, { "epoch": 0.72, "learning_rate": 2.2834104613658673e-05, "loss": 0.1776, "step": 25550 }, { "epoch": 0.72, "learning_rate": 2.2820081335016127e-05, "loss": 0.1657, "step": 25600 }, { "epoch": 0.72, "learning_rate": 2.2806058056373583e-05, "loss": 0.1627, "step": 25650 }, { "epoch": 0.72, "learning_rate": 2.2792034777731033e-05, "loss": 0.1687, "step": 25700 }, { "epoch": 0.72, "learning_rate": 2.2778011499088487e-05, "loss": 0.1691, "step": 25750 }, { "epoch": 0.72, "learning_rate": 2.276398822044594e-05, "loss": 0.1875, "step": 25800 }, { "epoch": 0.72, "learning_rate": 2.2749964941803393e-05, "loss": 0.1757, "step": 25850 }, { "epoch": 0.73, "learning_rate": 2.273594166316085e-05, "loss": 0.2009, "step": 25900 }, { "epoch": 0.73, "learning_rate": 2.27219183845183e-05, "loss": 0.1762, "step": 25950 }, { "epoch": 0.73, "learning_rate": 2.2707895105875754e-05, "loss": 0.1533, "step": 26000 }, { "epoch": 0.73, "eval_bleu": 95.8266, "eval_gen_len": 64.2703, "eval_loss": 0.26142334938049316, "eval_rouge1": 93.5866, "eval_rouge2": 89.888, "eval_rougeL": 93.3841, "eval_rougeLsum": 93.3821, "eval_runtime": 3090.7679, "eval_samples_per_second": 0.971, "eval_steps_per_second": 0.243, "step": 26000 }, { "epoch": 0.73, "learning_rate": 2.269387182723321e-05, "loss": 0.1756, "step": 26050 }, { "epoch": 0.73, "learning_rate": 2.267984854859066e-05, "loss": 0.1793, "step": 26100 }, { "epoch": 0.73, "learning_rate": 2.2665825269948114e-05, "loss": 0.1646, "step": 26150 }, { "epoch": 0.73, "learning_rate": 2.2651801991305567e-05, "loss": 0.1537, "step": 26200 }, { "epoch": 0.74, "learning_rate": 2.263777871266302e-05, "loss": 0.1593, "step": 26250 }, { "epoch": 0.74, "learning_rate": 2.2623755434020477e-05, "loss": 0.182, "step": 26300 }, { "epoch": 0.74, "learning_rate": 2.2609732155377927e-05, "loss": 0.1721, "step": 26350 }, { "epoch": 0.74, "learning_rate": 2.259570887673538e-05, "loss": 0.1922, "step": 26400 }, { "epoch": 0.74, "learning_rate": 2.2581685598092837e-05, "loss": 0.1703, "step": 26450 }, { "epoch": 0.74, "learning_rate": 2.2567662319450287e-05, "loss": 0.1731, "step": 26500 }, { "epoch": 0.74, "learning_rate": 2.255363904080774e-05, "loss": 0.1689, "step": 26550 }, { "epoch": 0.75, "learning_rate": 2.2539615762165194e-05, "loss": 0.1537, "step": 26600 }, { "epoch": 0.75, "learning_rate": 2.2525592483522647e-05, "loss": 0.161, "step": 26650 }, { "epoch": 0.75, "learning_rate": 2.2511569204880104e-05, "loss": 0.1812, "step": 26700 }, { "epoch": 0.75, "learning_rate": 2.2497545926237554e-05, "loss": 0.188, "step": 26750 }, { "epoch": 0.75, "learning_rate": 2.2483522647595007e-05, "loss": 0.2057, "step": 26800 }, { "epoch": 0.75, "learning_rate": 2.2469499368952464e-05, "loss": 0.1778, "step": 26850 }, { "epoch": 0.75, "learning_rate": 2.2455476090309914e-05, "loss": 0.1757, "step": 26900 }, { "epoch": 0.76, "learning_rate": 2.2441452811667367e-05, "loss": 0.196, "step": 26950 }, { "epoch": 0.76, "learning_rate": 2.242742953302482e-05, "loss": 0.1552, "step": 27000 }, { "epoch": 0.76, "eval_bleu": 95.8418, "eval_gen_len": 64.2587, "eval_loss": 0.2597999572753906, "eval_rouge1": 93.5569, "eval_rouge2": 89.8854, "eval_rougeL": 93.3668, "eval_rougeLsum": 93.3569, "eval_runtime": 3122.2549, "eval_samples_per_second": 0.961, "eval_steps_per_second": 0.24, "step": 27000 }, { "epoch": 0.76, "learning_rate": 2.2413406254382274e-05, "loss": 0.1963, "step": 27050 }, { "epoch": 0.76, "learning_rate": 2.239938297573973e-05, "loss": 0.1837, "step": 27100 }, { "epoch": 0.76, "learning_rate": 2.238535969709718e-05, "loss": 0.1722, "step": 27150 }, { "epoch": 0.76, "learning_rate": 2.2371336418454634e-05, "loss": 0.1698, "step": 27200 }, { "epoch": 0.76, "learning_rate": 2.235731313981209e-05, "loss": 0.1932, "step": 27250 }, { "epoch": 0.77, "learning_rate": 2.234328986116954e-05, "loss": 0.1519, "step": 27300 }, { "epoch": 0.77, "learning_rate": 2.2329266582526998e-05, "loss": 0.1744, "step": 27350 }, { "epoch": 0.77, "learning_rate": 2.2315243303884448e-05, "loss": 0.1672, "step": 27400 }, { "epoch": 0.77, "learning_rate": 2.23012200252419e-05, "loss": 0.1641, "step": 27450 }, { "epoch": 0.77, "learning_rate": 2.2287196746599358e-05, "loss": 0.1815, "step": 27500 }, { "epoch": 0.77, "learning_rate": 2.2273173467956808e-05, "loss": 0.1567, "step": 27550 }, { "epoch": 0.77, "learning_rate": 2.225915018931426e-05, "loss": 0.1621, "step": 27600 }, { "epoch": 0.78, "learning_rate": 2.2245126910671718e-05, "loss": 0.1489, "step": 27650 }, { "epoch": 0.78, "learning_rate": 2.2231103632029168e-05, "loss": 0.1692, "step": 27700 }, { "epoch": 0.78, "learning_rate": 2.2217080353386624e-05, "loss": 0.1856, "step": 27750 }, { "epoch": 0.78, "learning_rate": 2.2203057074744074e-05, "loss": 0.169, "step": 27800 }, { "epoch": 0.78, "learning_rate": 2.2189033796101528e-05, "loss": 0.1749, "step": 27850 }, { "epoch": 0.78, "learning_rate": 2.2175010517458985e-05, "loss": 0.1764, "step": 27900 }, { "epoch": 0.78, "learning_rate": 2.2160987238816434e-05, "loss": 0.1578, "step": 27950 }, { "epoch": 0.79, "learning_rate": 2.2146963960173888e-05, "loss": 0.1749, "step": 28000 }, { "epoch": 0.79, "eval_bleu": 95.8187, "eval_gen_len": 64.232, "eval_loss": 0.25857558846473694, "eval_rouge1": 93.7097, "eval_rouge2": 89.9991, "eval_rougeL": 93.5074, "eval_rougeLsum": 93.4927, "eval_runtime": 3115.4186, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.241, "step": 28000 }, { "epoch": 0.79, "learning_rate": 2.2132940681531345e-05, "loss": 0.1808, "step": 28050 }, { "epoch": 0.79, "learning_rate": 2.2118917402888795e-05, "loss": 0.1781, "step": 28100 }, { "epoch": 0.79, "learning_rate": 2.210489412424625e-05, "loss": 0.1684, "step": 28150 }, { "epoch": 0.79, "learning_rate": 2.20908708456037e-05, "loss": 0.1749, "step": 28200 }, { "epoch": 0.79, "learning_rate": 2.2076847566961155e-05, "loss": 0.1758, "step": 28250 }, { "epoch": 0.79, "learning_rate": 2.206282428831861e-05, "loss": 0.1811, "step": 28300 }, { "epoch": 0.8, "learning_rate": 2.204880100967606e-05, "loss": 0.1652, "step": 28350 }, { "epoch": 0.8, "learning_rate": 2.2034777731033515e-05, "loss": 0.1735, "step": 28400 }, { "epoch": 0.8, "learning_rate": 2.202075445239097e-05, "loss": 0.1735, "step": 28450 }, { "epoch": 0.8, "learning_rate": 2.200673117374842e-05, "loss": 0.1881, "step": 28500 }, { "epoch": 0.8, "learning_rate": 2.1992707895105878e-05, "loss": 0.1892, "step": 28550 }, { "epoch": 0.8, "learning_rate": 2.1978684616463328e-05, "loss": 0.1665, "step": 28600 }, { "epoch": 0.8, "learning_rate": 2.196466133782078e-05, "loss": 0.1866, "step": 28650 }, { "epoch": 0.8, "learning_rate": 2.1950638059178238e-05, "loss": 0.1756, "step": 28700 }, { "epoch": 0.81, "learning_rate": 2.1936614780535688e-05, "loss": 0.1945, "step": 28750 }, { "epoch": 0.81, "learning_rate": 2.1922591501893145e-05, "loss": 0.1882, "step": 28800 }, { "epoch": 0.81, "learning_rate": 2.1908568223250598e-05, "loss": 0.1693, "step": 28850 }, { "epoch": 0.81, "learning_rate": 2.1894544944608048e-05, "loss": 0.1653, "step": 28900 }, { "epoch": 0.81, "learning_rate": 2.1880521665965505e-05, "loss": 0.1688, "step": 28950 }, { "epoch": 0.81, "learning_rate": 2.1866498387322955e-05, "loss": 0.1725, "step": 29000 }, { "epoch": 0.81, "eval_bleu": 95.8531, "eval_gen_len": 64.207, "eval_loss": 0.25807130336761475, "eval_rouge1": 93.6865, "eval_rouge2": 90.02, "eval_rougeL": 93.4949, "eval_rougeLsum": 93.4886, "eval_runtime": 3103.2566, "eval_samples_per_second": 0.967, "eval_steps_per_second": 0.242, "step": 29000 }, { "epoch": 0.81, "learning_rate": 2.185247510868041e-05, "loss": 0.1597, "step": 29050 }, { "epoch": 0.82, "learning_rate": 2.1838451830037865e-05, "loss": 0.1639, "step": 29100 }, { "epoch": 0.82, "learning_rate": 2.1824428551395315e-05, "loss": 0.1748, "step": 29150 }, { "epoch": 0.82, "learning_rate": 2.1810405272752772e-05, "loss": 0.1734, "step": 29200 }, { "epoch": 0.82, "learning_rate": 2.1796381994110225e-05, "loss": 0.1783, "step": 29250 }, { "epoch": 0.82, "learning_rate": 2.1782358715467675e-05, "loss": 0.1872, "step": 29300 }, { "epoch": 0.82, "learning_rate": 2.1768335436825132e-05, "loss": 0.1727, "step": 29350 }, { "epoch": 0.82, "learning_rate": 2.1754312158182582e-05, "loss": 0.1607, "step": 29400 }, { "epoch": 0.83, "learning_rate": 2.1740288879540035e-05, "loss": 0.1702, "step": 29450 }, { "epoch": 0.83, "learning_rate": 2.1726265600897492e-05, "loss": 0.162, "step": 29500 }, { "epoch": 0.83, "learning_rate": 2.1712242322254942e-05, "loss": 0.2007, "step": 29550 }, { "epoch": 0.83, "learning_rate": 2.16982190436124e-05, "loss": 0.2098, "step": 29600 }, { "epoch": 0.83, "learning_rate": 2.1684195764969852e-05, "loss": 0.1707, "step": 29650 }, { "epoch": 0.83, "learning_rate": 2.1670172486327302e-05, "loss": 0.1658, "step": 29700 }, { "epoch": 0.83, "learning_rate": 2.165614920768476e-05, "loss": 0.1736, "step": 29750 }, { "epoch": 0.84, "learning_rate": 2.164212592904221e-05, "loss": 0.1759, "step": 29800 }, { "epoch": 0.84, "learning_rate": 2.1628102650399662e-05, "loss": 0.1915, "step": 29850 }, { "epoch": 0.84, "learning_rate": 2.161407937175712e-05, "loss": 0.1755, "step": 29900 }, { "epoch": 0.84, "learning_rate": 2.160005609311457e-05, "loss": 0.171, "step": 29950 }, { "epoch": 0.84, "learning_rate": 2.1586032814472026e-05, "loss": 0.1917, "step": 30000 }, { "epoch": 0.84, "eval_bleu": 95.8729, "eval_gen_len": 64.2307, "eval_loss": 0.2573055922985077, "eval_rouge1": 93.7203, "eval_rouge2": 90.0218, "eval_rougeL": 93.5266, "eval_rougeLsum": 93.517, "eval_runtime": 3112.1338, "eval_samples_per_second": 0.964, "eval_steps_per_second": 0.241, "step": 30000 }, { "epoch": 0.84, "learning_rate": 2.157200953582948e-05, "loss": 0.165, "step": 30050 }, { "epoch": 0.84, "learning_rate": 2.155798625718693e-05, "loss": 0.1712, "step": 30100 }, { "epoch": 0.85, "learning_rate": 2.1543962978544386e-05, "loss": 0.1821, "step": 30150 }, { "epoch": 0.85, "learning_rate": 2.1529939699901836e-05, "loss": 0.1696, "step": 30200 }, { "epoch": 0.85, "learning_rate": 2.1515916421259292e-05, "loss": 0.1666, "step": 30250 }, { "epoch": 0.85, "learning_rate": 2.1501893142616746e-05, "loss": 0.1935, "step": 30300 }, { "epoch": 0.85, "learning_rate": 2.1487869863974196e-05, "loss": 0.1819, "step": 30350 }, { "epoch": 0.85, "learning_rate": 2.1473846585331652e-05, "loss": 0.1606, "step": 30400 }, { "epoch": 0.85, "learning_rate": 2.1459823306689106e-05, "loss": 0.1591, "step": 30450 }, { "epoch": 0.86, "learning_rate": 2.1445800028046556e-05, "loss": 0.1602, "step": 30500 }, { "epoch": 0.86, "learning_rate": 2.1431776749404012e-05, "loss": 0.1569, "step": 30550 }, { "epoch": 0.86, "learning_rate": 2.1417753470761462e-05, "loss": 0.1721, "step": 30600 }, { "epoch": 0.86, "learning_rate": 2.140373019211892e-05, "loss": 0.1757, "step": 30650 }, { "epoch": 0.86, "learning_rate": 2.1389706913476373e-05, "loss": 0.192, "step": 30700 }, { "epoch": 0.86, "learning_rate": 2.1375683634833823e-05, "loss": 0.1957, "step": 30750 }, { "epoch": 0.86, "learning_rate": 2.136166035619128e-05, "loss": 0.1645, "step": 30800 }, { "epoch": 0.87, "learning_rate": 2.1347637077548733e-05, "loss": 0.1751, "step": 30850 }, { "epoch": 0.87, "learning_rate": 2.1333613798906183e-05, "loss": 0.1767, "step": 30900 }, { "epoch": 0.87, "learning_rate": 2.131959052026364e-05, "loss": 0.1697, "step": 30950 }, { "epoch": 0.87, "learning_rate": 2.130556724162109e-05, "loss": 0.1897, "step": 31000 }, { "epoch": 0.87, "eval_bleu": 95.8468, "eval_gen_len": 64.2037, "eval_loss": 0.25732484459877014, "eval_rouge1": 93.6811, "eval_rouge2": 89.9877, "eval_rougeL": 93.478, "eval_rougeLsum": 93.4745, "eval_runtime": 3156.1925, "eval_samples_per_second": 0.951, "eval_steps_per_second": 0.238, "step": 31000 }, { "epoch": 0.87, "learning_rate": 2.1291543962978546e-05, "loss": 0.1742, "step": 31050 }, { "epoch": 0.87, "learning_rate": 2.1277520684336e-05, "loss": 0.1663, "step": 31100 }, { "epoch": 0.87, "learning_rate": 2.126349740569345e-05, "loss": 0.1831, "step": 31150 }, { "epoch": 0.88, "learning_rate": 2.1249474127050906e-05, "loss": 0.1653, "step": 31200 }, { "epoch": 0.88, "learning_rate": 2.123545084840836e-05, "loss": 0.1739, "step": 31250 }, { "epoch": 0.88, "learning_rate": 2.122142756976581e-05, "loss": 0.1741, "step": 31300 }, { "epoch": 0.88, "learning_rate": 2.1207404291123266e-05, "loss": 0.1755, "step": 31350 }, { "epoch": 0.88, "learning_rate": 2.1193381012480716e-05, "loss": 0.164, "step": 31400 }, { "epoch": 0.88, "learning_rate": 2.1179357733838173e-05, "loss": 0.1624, "step": 31450 }, { "epoch": 0.88, "learning_rate": 2.1165334455195626e-05, "loss": 0.1716, "step": 31500 }, { "epoch": 0.88, "learning_rate": 2.1151311176553076e-05, "loss": 0.1759, "step": 31550 }, { "epoch": 0.89, "learning_rate": 2.1137287897910533e-05, "loss": 0.1845, "step": 31600 }, { "epoch": 0.89, "learning_rate": 2.1123264619267986e-05, "loss": 0.1637, "step": 31650 }, { "epoch": 0.89, "learning_rate": 2.110924134062544e-05, "loss": 0.1939, "step": 31700 }, { "epoch": 0.89, "learning_rate": 2.1095218061982893e-05, "loss": 0.1747, "step": 31750 }, { "epoch": 0.89, "learning_rate": 2.1081194783340343e-05, "loss": 0.1727, "step": 31800 }, { "epoch": 0.89, "learning_rate": 2.10671715046978e-05, "loss": 0.1626, "step": 31850 }, { "epoch": 0.89, "learning_rate": 2.1053148226055253e-05, "loss": 0.1885, "step": 31900 }, { "epoch": 0.9, "learning_rate": 2.1039124947412703e-05, "loss": 0.1812, "step": 31950 }, { "epoch": 0.9, "learning_rate": 2.102510166877016e-05, "loss": 0.1925, "step": 32000 }, { "epoch": 0.9, "eval_bleu": 95.8275, "eval_gen_len": 64.1743, "eval_loss": 0.2557416260242462, "eval_rouge1": 93.6863, "eval_rouge2": 89.9362, "eval_rougeL": 93.482, "eval_rougeLsum": 93.4648, "eval_runtime": 3095.3083, "eval_samples_per_second": 0.969, "eval_steps_per_second": 0.242, "step": 32000 }, { "epoch": 0.9, "learning_rate": 2.1011078390127613e-05, "loss": 0.1858, "step": 32050 }, { "epoch": 0.9, "learning_rate": 2.0997055111485067e-05, "loss": 0.1588, "step": 32100 }, { "epoch": 0.9, "learning_rate": 2.098303183284252e-05, "loss": 0.1797, "step": 32150 }, { "epoch": 0.9, "learning_rate": 2.096900855419997e-05, "loss": 0.1846, "step": 32200 }, { "epoch": 0.9, "learning_rate": 2.0954985275557427e-05, "loss": 0.185, "step": 32250 }, { "epoch": 0.91, "learning_rate": 2.094096199691488e-05, "loss": 0.1565, "step": 32300 }, { "epoch": 0.91, "learning_rate": 2.092693871827233e-05, "loss": 0.2177, "step": 32350 }, { "epoch": 0.91, "learning_rate": 2.0912915439629787e-05, "loss": 0.1861, "step": 32400 }, { "epoch": 0.91, "learning_rate": 2.089889216098724e-05, "loss": 0.1667, "step": 32450 }, { "epoch": 0.91, "learning_rate": 2.0884868882344693e-05, "loss": 0.1788, "step": 32500 }, { "epoch": 0.91, "learning_rate": 2.0870845603702147e-05, "loss": 0.2086, "step": 32550 }, { "epoch": 0.91, "learning_rate": 2.0856822325059597e-05, "loss": 0.1607, "step": 32600 }, { "epoch": 0.92, "learning_rate": 2.0842799046417053e-05, "loss": 0.1655, "step": 32650 }, { "epoch": 0.92, "learning_rate": 2.0828775767774507e-05, "loss": 0.1798, "step": 32700 }, { "epoch": 0.92, "learning_rate": 2.0814752489131957e-05, "loss": 0.1953, "step": 32750 }, { "epoch": 0.92, "learning_rate": 2.0800729210489414e-05, "loss": 0.166, "step": 32800 }, { "epoch": 0.92, "learning_rate": 2.0786705931846867e-05, "loss": 0.1718, "step": 32850 }, { "epoch": 0.92, "learning_rate": 2.077268265320432e-05, "loss": 0.1888, "step": 32900 }, { "epoch": 0.92, "learning_rate": 2.0758659374561774e-05, "loss": 0.1788, "step": 32950 }, { "epoch": 0.93, "learning_rate": 2.0744636095919224e-05, "loss": 0.1853, "step": 33000 }, { "epoch": 0.93, "eval_bleu": 95.8702, "eval_gen_len": 64.183, "eval_loss": 0.2551117241382599, "eval_rouge1": 93.7191, "eval_rouge2": 90.0474, "eval_rougeL": 93.5286, "eval_rougeLsum": 93.516, "eval_runtime": 3134.3467, "eval_samples_per_second": 0.957, "eval_steps_per_second": 0.239, "step": 33000 }, { "epoch": 0.93, "learning_rate": 2.073061281727668e-05, "loss": 0.1964, "step": 33050 }, { "epoch": 0.93, "learning_rate": 2.0716589538634134e-05, "loss": 0.178, "step": 33100 }, { "epoch": 0.93, "learning_rate": 2.0702566259991587e-05, "loss": 0.1911, "step": 33150 }, { "epoch": 0.93, "learning_rate": 2.068854298134904e-05, "loss": 0.1705, "step": 33200 }, { "epoch": 0.93, "learning_rate": 2.0674519702706494e-05, "loss": 0.1835, "step": 33250 }, { "epoch": 0.93, "learning_rate": 2.0660496424063947e-05, "loss": 0.1617, "step": 33300 }, { "epoch": 0.94, "learning_rate": 2.06464731454214e-05, "loss": 0.169, "step": 33350 }, { "epoch": 0.94, "learning_rate": 2.063244986677885e-05, "loss": 0.1738, "step": 33400 }, { "epoch": 0.94, "learning_rate": 2.0618426588136307e-05, "loss": 0.1648, "step": 33450 }, { "epoch": 0.94, "learning_rate": 2.060440330949376e-05, "loss": 0.1822, "step": 33500 }, { "epoch": 0.94, "learning_rate": 2.0590380030851214e-05, "loss": 0.1719, "step": 33550 }, { "epoch": 0.94, "learning_rate": 2.0576356752208667e-05, "loss": 0.1899, "step": 33600 }, { "epoch": 0.94, "learning_rate": 2.056233347356612e-05, "loss": 0.1754, "step": 33650 }, { "epoch": 0.95, "learning_rate": 2.0548310194923574e-05, "loss": 0.1693, "step": 33700 }, { "epoch": 0.95, "learning_rate": 2.0534286916281027e-05, "loss": 0.1578, "step": 33750 }, { "epoch": 0.95, "learning_rate": 2.0520263637638477e-05, "loss": 0.1757, "step": 33800 }, { "epoch": 0.95, "learning_rate": 2.0506240358995934e-05, "loss": 0.1735, "step": 33850 }, { "epoch": 0.95, "learning_rate": 2.0492217080353387e-05, "loss": 0.1732, "step": 33900 }, { "epoch": 0.95, "learning_rate": 2.047819380171084e-05, "loss": 0.1645, "step": 33950 }, { "epoch": 0.95, "learning_rate": 2.0464170523068294e-05, "loss": 0.166, "step": 34000 }, { "epoch": 0.95, "eval_bleu": 95.8966, "eval_gen_len": 64.227, "eval_loss": 0.2542245090007782, "eval_rouge1": 93.6823, "eval_rouge2": 89.9822, "eval_rougeL": 93.4675, "eval_rougeLsum": 93.4534, "eval_runtime": 3105.0348, "eval_samples_per_second": 0.966, "eval_steps_per_second": 0.242, "step": 34000 }, { "epoch": 0.95, "learning_rate": 2.0450147244425747e-05, "loss": 0.1736, "step": 34050 }, { "epoch": 0.96, "learning_rate": 2.04361239657832e-05, "loss": 0.1888, "step": 34100 }, { "epoch": 0.96, "learning_rate": 2.0422100687140654e-05, "loss": 0.1701, "step": 34150 }, { "epoch": 0.96, "learning_rate": 2.0408077408498108e-05, "loss": 0.151, "step": 34200 }, { "epoch": 0.96, "learning_rate": 2.039405412985556e-05, "loss": 0.1776, "step": 34250 }, { "epoch": 0.96, "learning_rate": 2.0380030851213014e-05, "loss": 0.1928, "step": 34300 }, { "epoch": 0.96, "learning_rate": 2.0366007572570468e-05, "loss": 0.1762, "step": 34350 }, { "epoch": 0.96, "learning_rate": 2.035198429392792e-05, "loss": 0.1631, "step": 34400 }, { "epoch": 0.97, "learning_rate": 2.0337961015285374e-05, "loss": 0.1913, "step": 34450 }, { "epoch": 0.97, "learning_rate": 2.0323937736642828e-05, "loss": 0.1662, "step": 34500 }, { "epoch": 0.97, "learning_rate": 2.030991445800028e-05, "loss": 0.1691, "step": 34550 }, { "epoch": 0.97, "learning_rate": 2.0295891179357734e-05, "loss": 0.1664, "step": 34600 }, { "epoch": 0.97, "learning_rate": 2.0281867900715188e-05, "loss": 0.1526, "step": 34650 }, { "epoch": 0.97, "learning_rate": 2.026784462207264e-05, "loss": 0.1816, "step": 34700 }, { "epoch": 0.97, "learning_rate": 2.0253821343430095e-05, "loss": 0.1892, "step": 34750 }, { "epoch": 0.98, "learning_rate": 2.0239798064787548e-05, "loss": 0.1632, "step": 34800 }, { "epoch": 0.98, "learning_rate": 2.0225774786145e-05, "loss": 0.1542, "step": 34850 }, { "epoch": 0.98, "learning_rate": 2.0211751507502455e-05, "loss": 0.1529, "step": 34900 }, { "epoch": 0.98, "learning_rate": 2.0197728228859908e-05, "loss": 0.1597, "step": 34950 }, { "epoch": 0.98, "learning_rate": 2.018370495021736e-05, "loss": 0.1795, "step": 35000 }, { "epoch": 0.98, "eval_bleu": 95.9192, "eval_gen_len": 64.2247, "eval_loss": 0.2535455822944641, "eval_rouge1": 93.7504, "eval_rouge2": 90.1079, "eval_rougeL": 93.5482, "eval_rougeLsum": 93.5289, "eval_runtime": 3098.1952, "eval_samples_per_second": 0.968, "eval_steps_per_second": 0.242, "step": 35000 }, { "epoch": 0.98, "learning_rate": 2.0169681671574815e-05, "loss": 0.1634, "step": 35050 }, { "epoch": 0.98, "learning_rate": 2.0155658392932268e-05, "loss": 0.1723, "step": 35100 }, { "epoch": 0.99, "learning_rate": 2.014163511428972e-05, "loss": 0.1624, "step": 35150 }, { "epoch": 0.99, "learning_rate": 2.0127611835647175e-05, "loss": 0.1437, "step": 35200 }, { "epoch": 0.99, "learning_rate": 2.0113588557004628e-05, "loss": 0.1638, "step": 35250 }, { "epoch": 0.99, "learning_rate": 2.009956527836208e-05, "loss": 0.153, "step": 35300 }, { "epoch": 0.99, "learning_rate": 2.0085541999719535e-05, "loss": 0.1749, "step": 35350 }, { "epoch": 0.99, "learning_rate": 2.0071518721076988e-05, "loss": 0.1802, "step": 35400 }, { "epoch": 0.99, "learning_rate": 2.005749544243444e-05, "loss": 0.1794, "step": 35450 }, { "epoch": 1.0, "learning_rate": 2.0043472163791895e-05, "loss": 0.1778, "step": 35500 }, { "epoch": 1.0, "learning_rate": 2.0029448885149348e-05, "loss": 0.1608, "step": 35550 }, { "epoch": 1.0, "learning_rate": 2.00154256065068e-05, "loss": 0.1722, "step": 35600 }, { "epoch": 1.0, "learning_rate": 2.0001402327864255e-05, "loss": 0.1674, "step": 35650 }, { "epoch": 1.0, "learning_rate": 1.9987379049221708e-05, "loss": 0.1786, "step": 35700 }, { "epoch": 1.0, "learning_rate": 1.997335577057916e-05, "loss": 0.1597, "step": 35750 }, { "epoch": 1.0, "learning_rate": 1.9959332491936615e-05, "loss": 0.1684, "step": 35800 }, { "epoch": 1.01, "learning_rate": 1.994530921329407e-05, "loss": 0.1526, "step": 35850 }, { "epoch": 1.01, "learning_rate": 1.9931285934651522e-05, "loss": 0.1523, "step": 35900 }, { "epoch": 1.01, "learning_rate": 1.9917262656008975e-05, "loss": 0.1743, "step": 35950 }, { "epoch": 1.01, "learning_rate": 1.990323937736643e-05, "loss": 0.152, "step": 36000 }, { "epoch": 1.01, "eval_bleu": 95.9457, "eval_gen_len": 64.2747, "eval_loss": 0.2530899941921234, "eval_rouge1": 93.7477, "eval_rouge2": 90.0914, "eval_rougeL": 93.5409, "eval_rougeLsum": 93.5214, "eval_runtime": 3128.4817, "eval_samples_per_second": 0.959, "eval_steps_per_second": 0.24, "step": 36000 }, { "epoch": 1.01, "learning_rate": 1.9889216098723885e-05, "loss": 0.1547, "step": 36050 }, { "epoch": 1.01, "learning_rate": 1.9875192820081335e-05, "loss": 0.1586, "step": 36100 }, { "epoch": 1.01, "learning_rate": 1.986116954143879e-05, "loss": 0.1778, "step": 36150 }, { "epoch": 1.02, "learning_rate": 1.9847146262796242e-05, "loss": 0.1466, "step": 36200 }, { "epoch": 1.02, "learning_rate": 1.9833122984153695e-05, "loss": 0.1561, "step": 36250 }, { "epoch": 1.02, "learning_rate": 1.981909970551115e-05, "loss": 0.1611, "step": 36300 }, { "epoch": 1.02, "learning_rate": 1.9805076426868602e-05, "loss": 0.1547, "step": 36350 }, { "epoch": 1.02, "learning_rate": 1.9791053148226055e-05, "loss": 0.1413, "step": 36400 }, { "epoch": 1.02, "learning_rate": 1.9777029869583512e-05, "loss": 0.1641, "step": 36450 }, { "epoch": 1.02, "learning_rate": 1.9763006590940962e-05, "loss": 0.1591, "step": 36500 }, { "epoch": 1.03, "learning_rate": 1.9748983312298415e-05, "loss": 0.1604, "step": 36550 }, { "epoch": 1.03, "learning_rate": 1.973496003365587e-05, "loss": 0.163, "step": 36600 }, { "epoch": 1.03, "learning_rate": 1.9720936755013322e-05, "loss": 0.1517, "step": 36650 }, { "epoch": 1.03, "learning_rate": 1.9706913476370775e-05, "loss": 0.1675, "step": 36700 }, { "epoch": 1.03, "learning_rate": 1.969289019772823e-05, "loss": 0.1729, "step": 36750 }, { "epoch": 1.03, "learning_rate": 1.9678866919085682e-05, "loss": 0.1564, "step": 36800 }, { "epoch": 1.03, "learning_rate": 1.966484364044314e-05, "loss": 0.1807, "step": 36850 }, { "epoch": 1.03, "learning_rate": 1.965082036180059e-05, "loss": 0.161, "step": 36900 }, { "epoch": 1.04, "learning_rate": 1.9636797083158042e-05, "loss": 0.1514, "step": 36950 }, { "epoch": 1.04, "learning_rate": 1.9622773804515496e-05, "loss": 0.1548, "step": 37000 }, { "epoch": 1.04, "eval_bleu": 95.9217, "eval_gen_len": 64.223, "eval_loss": 0.25345203280448914, "eval_rouge1": 93.7839, "eval_rouge2": 90.1221, "eval_rougeL": 93.5635, "eval_rougeLsum": 93.554, "eval_runtime": 3095.5847, "eval_samples_per_second": 0.969, "eval_steps_per_second": 0.242, "step": 37000 }, { "epoch": 1.04, "learning_rate": 1.960875052587295e-05, "loss": 0.1647, "step": 37050 }, { "epoch": 1.04, "learning_rate": 1.9594727247230402e-05, "loss": 0.1629, "step": 37100 }, { "epoch": 1.04, "learning_rate": 1.9580703968587856e-05, "loss": 0.1805, "step": 37150 }, { "epoch": 1.04, "learning_rate": 1.956668068994531e-05, "loss": 0.1727, "step": 37200 }, { "epoch": 1.04, "learning_rate": 1.9552657411302766e-05, "loss": 0.1967, "step": 37250 }, { "epoch": 1.05, "learning_rate": 1.9538634132660216e-05, "loss": 0.1551, "step": 37300 }, { "epoch": 1.05, "learning_rate": 1.952461085401767e-05, "loss": 0.1554, "step": 37350 }, { "epoch": 1.05, "learning_rate": 1.9510587575375122e-05, "loss": 0.1491, "step": 37400 }, { "epoch": 1.05, "learning_rate": 1.9496564296732576e-05, "loss": 0.1572, "step": 37450 }, { "epoch": 1.05, "learning_rate": 1.9482541018090033e-05, "loss": 0.1706, "step": 37500 }, { "epoch": 1.05, "learning_rate": 1.9468517739447483e-05, "loss": 0.1632, "step": 37550 }, { "epoch": 1.05, "learning_rate": 1.9454494460804936e-05, "loss": 0.1609, "step": 37600 }, { "epoch": 1.06, "learning_rate": 1.9440471182162393e-05, "loss": 0.1672, "step": 37650 }, { "epoch": 1.06, "learning_rate": 1.9426447903519843e-05, "loss": 0.1535, "step": 37700 }, { "epoch": 1.06, "learning_rate": 1.9412424624877296e-05, "loss": 0.1545, "step": 37750 }, { "epoch": 1.06, "learning_rate": 1.939840134623475e-05, "loss": 0.1606, "step": 37800 }, { "epoch": 1.06, "learning_rate": 1.9384378067592203e-05, "loss": 0.1475, "step": 37850 }, { "epoch": 1.06, "learning_rate": 1.937035478894966e-05, "loss": 0.1663, "step": 37900 }, { "epoch": 1.06, "learning_rate": 1.935633151030711e-05, "loss": 0.1698, "step": 37950 }, { "epoch": 1.07, "learning_rate": 1.9342308231664563e-05, "loss": 0.1565, "step": 38000 }, { "epoch": 1.07, "eval_bleu": 95.9359, "eval_gen_len": 64.2767, "eval_loss": 0.25277698040008545, "eval_rouge1": 93.7483, "eval_rouge2": 90.0899, "eval_rougeL": 93.5433, "eval_rougeLsum": 93.5171, "eval_runtime": 3125.498, "eval_samples_per_second": 0.96, "eval_steps_per_second": 0.24, "step": 38000 }, { "epoch": 1.07, "learning_rate": 1.932828495302202e-05, "loss": 0.1686, "step": 38050 }, { "epoch": 1.07, "learning_rate": 1.931426167437947e-05, "loss": 0.1466, "step": 38100 }, { "epoch": 1.07, "learning_rate": 1.9300238395736923e-05, "loss": 0.1817, "step": 38150 }, { "epoch": 1.07, "learning_rate": 1.9286215117094376e-05, "loss": 0.1765, "step": 38200 }, { "epoch": 1.07, "learning_rate": 1.927219183845183e-05, "loss": 0.1642, "step": 38250 }, { "epoch": 1.07, "learning_rate": 1.9258168559809286e-05, "loss": 0.1732, "step": 38300 }, { "epoch": 1.08, "learning_rate": 1.9244145281166736e-05, "loss": 0.1462, "step": 38350 }, { "epoch": 1.08, "learning_rate": 1.923012200252419e-05, "loss": 0.167, "step": 38400 }, { "epoch": 1.08, "learning_rate": 1.9216098723881646e-05, "loss": 0.1484, "step": 38450 }, { "epoch": 1.08, "learning_rate": 1.9202075445239096e-05, "loss": 0.1661, "step": 38500 }, { "epoch": 1.08, "learning_rate": 1.918805216659655e-05, "loss": 0.1457, "step": 38550 }, { "epoch": 1.08, "learning_rate": 1.9174028887954003e-05, "loss": 0.1445, "step": 38600 }, { "epoch": 1.08, "learning_rate": 1.9160005609311456e-05, "loss": 0.1499, "step": 38650 }, { "epoch": 1.09, "learning_rate": 1.9145982330668913e-05, "loss": 0.1495, "step": 38700 }, { "epoch": 1.09, "learning_rate": 1.9131959052026363e-05, "loss": 0.1752, "step": 38750 }, { "epoch": 1.09, "learning_rate": 1.9117935773383816e-05, "loss": 0.174, "step": 38800 }, { "epoch": 1.09, "learning_rate": 1.9103912494741273e-05, "loss": 0.1513, "step": 38850 }, { "epoch": 1.09, "learning_rate": 1.9089889216098723e-05, "loss": 0.1727, "step": 38900 }, { "epoch": 1.09, "learning_rate": 1.907586593745618e-05, "loss": 0.1738, "step": 38950 }, { "epoch": 1.09, "learning_rate": 1.906184265881363e-05, "loss": 0.1695, "step": 39000 }, { "epoch": 1.09, "eval_bleu": 95.9169, "eval_gen_len": 64.224, "eval_loss": 0.2530539035797119, "eval_rouge1": 93.7543, "eval_rouge2": 90.0728, "eval_rougeL": 93.5572, "eval_rougeLsum": 93.5471, "eval_runtime": 3118.1985, "eval_samples_per_second": 0.962, "eval_steps_per_second": 0.241, "step": 39000 }, { "epoch": 1.1, "learning_rate": 1.9047819380171083e-05, "loss": 0.1575, "step": 39050 }, { "epoch": 1.1, "learning_rate": 1.903379610152854e-05, "loss": 0.1706, "step": 39100 }, { "epoch": 1.1, "learning_rate": 1.901977282288599e-05, "loss": 0.1483, "step": 39150 }, { "epoch": 1.1, "learning_rate": 1.9005749544243443e-05, "loss": 0.1535, "step": 39200 }, { "epoch": 1.1, "learning_rate": 1.89917262656009e-05, "loss": 0.1454, "step": 39250 }, { "epoch": 1.1, "learning_rate": 1.897770298695835e-05, "loss": 0.1614, "step": 39300 }, { "epoch": 1.1, "learning_rate": 1.8963679708315807e-05, "loss": 0.1584, "step": 39350 }, { "epoch": 1.11, "learning_rate": 1.8949656429673257e-05, "loss": 0.1733, "step": 39400 }, { "epoch": 1.11, "learning_rate": 1.893563315103071e-05, "loss": 0.1391, "step": 39450 }, { "epoch": 1.11, "learning_rate": 1.8921609872388167e-05, "loss": 0.1517, "step": 39500 }, { "epoch": 1.11, "learning_rate": 1.8907586593745617e-05, "loss": 0.152, "step": 39550 }, { "epoch": 1.11, "learning_rate": 1.889356331510307e-05, "loss": 0.1458, "step": 39600 }, { "epoch": 1.11, "learning_rate": 1.8879540036460527e-05, "loss": 0.1675, "step": 39650 }, { "epoch": 1.11, "learning_rate": 1.8865516757817977e-05, "loss": 0.1554, "step": 39700 }, { "epoch": 1.11, "learning_rate": 1.8851493479175434e-05, "loss": 0.169, "step": 39750 }, { "epoch": 1.12, "learning_rate": 1.8837470200532884e-05, "loss": 0.1841, "step": 39800 }, { "epoch": 1.12, "learning_rate": 1.8823446921890337e-05, "loss": 0.1717, "step": 39850 }, { "epoch": 1.12, "learning_rate": 1.8809423643247794e-05, "loss": 0.1386, "step": 39900 }, { "epoch": 1.12, "learning_rate": 1.8795400364605244e-05, "loss": 0.1642, "step": 39950 }, { "epoch": 1.12, "learning_rate": 1.87813770859627e-05, "loss": 0.1804, "step": 40000 }, { "epoch": 1.12, "eval_bleu": 95.9129, "eval_gen_len": 64.2003, "eval_loss": 0.2509741187095642, "eval_rouge1": 93.8069, "eval_rouge2": 90.1198, "eval_rougeL": 93.5952, "eval_rougeLsum": 93.5824, "eval_runtime": 3104.9942, "eval_samples_per_second": 0.966, "eval_steps_per_second": 0.242, "step": 40000 }, { "epoch": 1.12, "learning_rate": 1.8767353807320154e-05, "loss": 0.1632, "step": 40050 }, { "epoch": 1.12, "learning_rate": 1.8753330528677604e-05, "loss": 0.1432, "step": 40100 }, { "epoch": 1.13, "learning_rate": 1.873930725003506e-05, "loss": 0.1485, "step": 40150 }, { "epoch": 1.13, "learning_rate": 1.872528397139251e-05, "loss": 0.1397, "step": 40200 }, { "epoch": 1.13, "learning_rate": 1.8711260692749964e-05, "loss": 0.1595, "step": 40250 }, { "epoch": 1.13, "learning_rate": 1.869723741410742e-05, "loss": 0.1539, "step": 40300 }, { "epoch": 1.13, "learning_rate": 1.868321413546487e-05, "loss": 0.1635, "step": 40350 }, { "epoch": 1.13, "learning_rate": 1.8669190856822327e-05, "loss": 0.1709, "step": 40400 }, { "epoch": 1.13, "learning_rate": 1.865516757817978e-05, "loss": 0.1554, "step": 40450 }, { "epoch": 1.14, "learning_rate": 1.864114429953723e-05, "loss": 0.1597, "step": 40500 }, { "epoch": 1.14, "learning_rate": 1.8627121020894687e-05, "loss": 0.1652, "step": 40550 }, { "epoch": 1.14, "learning_rate": 1.8613097742252137e-05, "loss": 0.1794, "step": 40600 }, { "epoch": 1.14, "learning_rate": 1.859907446360959e-05, "loss": 0.159, "step": 40650 }, { "epoch": 1.14, "learning_rate": 1.8585051184967047e-05, "loss": 0.1611, "step": 40700 }, { "epoch": 1.14, "learning_rate": 1.8571027906324497e-05, "loss": 0.1545, "step": 40750 }, { "epoch": 1.14, "learning_rate": 1.8557004627681954e-05, "loss": 0.1458, "step": 40800 }, { "epoch": 1.15, "learning_rate": 1.8542981349039408e-05, "loss": 0.1745, "step": 40850 }, { "epoch": 1.15, "learning_rate": 1.8528958070396858e-05, "loss": 0.1612, "step": 40900 }, { "epoch": 1.15, "learning_rate": 1.8514934791754314e-05, "loss": 0.1589, "step": 40950 }, { "epoch": 1.15, "learning_rate": 1.8500911513111764e-05, "loss": 0.1421, "step": 41000 }, { "epoch": 1.15, "eval_bleu": 95.9507, "eval_gen_len": 64.2133, "eval_loss": 0.2519792318344116, "eval_rouge1": 93.805, "eval_rouge2": 90.1779, "eval_rougeL": 93.6222, "eval_rougeLsum": 93.6155, "eval_runtime": 3133.4577, "eval_samples_per_second": 0.957, "eval_steps_per_second": 0.239, "step": 41000 }, { "epoch": 1.15, "learning_rate": 1.8486888234469218e-05, "loss": 0.1424, "step": 41050 }, { "epoch": 1.15, "learning_rate": 1.8472864955826674e-05, "loss": 0.1578, "step": 41100 }, { "epoch": 1.15, "learning_rate": 1.8458841677184124e-05, "loss": 0.1553, "step": 41150 }, { "epoch": 1.16, "learning_rate": 1.844481839854158e-05, "loss": 0.1969, "step": 41200 }, { "epoch": 1.16, "learning_rate": 1.8430795119899034e-05, "loss": 0.1674, "step": 41250 }, { "epoch": 1.16, "learning_rate": 1.8416771841256484e-05, "loss": 0.1763, "step": 41300 }, { "epoch": 1.16, "learning_rate": 1.840274856261394e-05, "loss": 0.1751, "step": 41350 }, { "epoch": 1.16, "learning_rate": 1.838872528397139e-05, "loss": 0.1588, "step": 41400 }, { "epoch": 1.16, "learning_rate": 1.8374702005328848e-05, "loss": 0.173, "step": 41450 }, { "epoch": 1.16, "learning_rate": 1.83606787266863e-05, "loss": 0.1759, "step": 41500 }, { "epoch": 1.17, "learning_rate": 1.834665544804375e-05, "loss": 0.1614, "step": 41550 }, { "epoch": 1.17, "learning_rate": 1.8332632169401208e-05, "loss": 0.1463, "step": 41600 }, { "epoch": 1.17, "learning_rate": 1.831860889075866e-05, "loss": 0.1825, "step": 41650 }, { "epoch": 1.17, "learning_rate": 1.830458561211611e-05, "loss": 0.1483, "step": 41700 }, { "epoch": 1.17, "learning_rate": 1.8290562333473568e-05, "loss": 0.1651, "step": 41750 }, { "epoch": 1.17, "learning_rate": 1.8276539054831018e-05, "loss": 0.1602, "step": 41800 }, { "epoch": 1.17, "learning_rate": 1.8262515776188475e-05, "loss": 0.1462, "step": 41850 }, { "epoch": 1.18, "learning_rate": 1.8248492497545928e-05, "loss": 0.1686, "step": 41900 }, { "epoch": 1.18, "learning_rate": 1.8234469218903378e-05, "loss": 0.1474, "step": 41950 }, { "epoch": 1.18, "learning_rate": 1.8220445940260835e-05, "loss": 0.1594, "step": 42000 }, { "epoch": 1.18, "eval_bleu": 95.9473, "eval_gen_len": 64.235, "eval_loss": 0.2515351176261902, "eval_rouge1": 93.7671, "eval_rouge2": 90.1013, "eval_rougeL": 93.575, "eval_rougeLsum": 93.5369, "eval_runtime": 3108.2504, "eval_samples_per_second": 0.965, "eval_steps_per_second": 0.241, "step": 42000 }, { "epoch": 1.18, "learning_rate": 1.8206422661618288e-05, "loss": 0.1723, "step": 42050 }, { "epoch": 1.18, "learning_rate": 1.8192399382975738e-05, "loss": 0.1553, "step": 42100 }, { "epoch": 1.18, "learning_rate": 1.8178376104333195e-05, "loss": 0.1709, "step": 42150 }, { "epoch": 1.18, "learning_rate": 1.8164352825690645e-05, "loss": 0.1861, "step": 42200 }, { "epoch": 1.18, "learning_rate": 1.81503295470481e-05, "loss": 0.1631, "step": 42250 }, { "epoch": 1.19, "learning_rate": 1.8136306268405555e-05, "loss": 0.1657, "step": 42300 }, { "epoch": 1.19, "learning_rate": 1.8122282989763005e-05, "loss": 0.1516, "step": 42350 }, { "epoch": 1.19, "learning_rate": 1.810825971112046e-05, "loss": 0.1532, "step": 42400 }, { "epoch": 1.19, "learning_rate": 1.8094236432477915e-05, "loss": 0.1428, "step": 42450 }, { "epoch": 1.19, "learning_rate": 1.8080213153835365e-05, "loss": 0.1493, "step": 42500 }, { "epoch": 1.19, "learning_rate": 1.806618987519282e-05, "loss": 0.1378, "step": 42550 }, { "epoch": 1.19, "learning_rate": 1.805216659655027e-05, "loss": 0.144, "step": 42600 }, { "epoch": 1.2, "learning_rate": 1.803814331790773e-05, "loss": 0.1685, "step": 42650 }, { "epoch": 1.2, "learning_rate": 1.8024120039265182e-05, "loss": 0.1612, "step": 42700 }, { "epoch": 1.2, "learning_rate": 1.8010096760622632e-05, "loss": 0.1678, "step": 42750 }, { "epoch": 1.2, "learning_rate": 1.799607348198009e-05, "loss": 0.1843, "step": 42800 }, { "epoch": 1.2, "learning_rate": 1.7982050203337542e-05, "loss": 0.1592, "step": 42850 }, { "epoch": 1.2, "learning_rate": 1.7968026924694995e-05, "loss": 0.1376, "step": 42900 }, { "epoch": 1.2, "learning_rate": 1.795400364605245e-05, "loss": 0.1777, "step": 42950 }, { "epoch": 1.21, "learning_rate": 1.79399803674099e-05, "loss": 0.1343, "step": 43000 }, { "epoch": 1.21, "eval_bleu": 95.9595, "eval_gen_len": 64.2177, "eval_loss": 0.25027555227279663, "eval_rouge1": 93.7701, "eval_rouge2": 90.1732, "eval_rougeL": 93.607, "eval_rougeLsum": 93.6046, "eval_runtime": 3107.9422, "eval_samples_per_second": 0.965, "eval_steps_per_second": 0.241, "step": 43000 }, { "epoch": 1.21, "learning_rate": 1.7925957088767355e-05, "loss": 0.1539, "step": 43050 }, { "epoch": 1.21, "learning_rate": 1.791193381012481e-05, "loss": 0.1527, "step": 43100 }, { "epoch": 1.21, "learning_rate": 1.789791053148226e-05, "loss": 0.1821, "step": 43150 }, { "epoch": 1.21, "learning_rate": 1.7883887252839715e-05, "loss": 0.1645, "step": 43200 }, { "epoch": 1.21, "learning_rate": 1.786986397419717e-05, "loss": 0.1596, "step": 43250 }, { "epoch": 1.21, "learning_rate": 1.7855840695554622e-05, "loss": 0.1394, "step": 43300 }, { "epoch": 1.22, "learning_rate": 1.7841817416912075e-05, "loss": 0.1526, "step": 43350 }, { "epoch": 1.22, "learning_rate": 1.7827794138269525e-05, "loss": 0.1529, "step": 43400 }, { "epoch": 1.22, "learning_rate": 1.7813770859626982e-05, "loss": 0.1662, "step": 43450 }, { "epoch": 1.22, "learning_rate": 1.7799747580984435e-05, "loss": 0.1572, "step": 43500 }, { "epoch": 1.22, "learning_rate": 1.7785724302341885e-05, "loss": 0.168, "step": 43550 }, { "epoch": 1.22, "learning_rate": 1.7771701023699342e-05, "loss": 0.1442, "step": 43600 }, { "epoch": 1.22, "learning_rate": 1.7757677745056796e-05, "loss": 0.15, "step": 43650 }, { "epoch": 1.23, "learning_rate": 1.774365446641425e-05, "loss": 0.1666, "step": 43700 }, { "epoch": 1.23, "learning_rate": 1.7729631187771702e-05, "loss": 0.1609, "step": 43750 }, { "epoch": 1.23, "learning_rate": 1.7715607909129152e-05, "loss": 0.1732, "step": 43800 }, { "epoch": 1.23, "learning_rate": 1.770158463048661e-05, "loss": 0.163, "step": 43850 }, { "epoch": 1.23, "learning_rate": 1.7687561351844062e-05, "loss": 0.1482, "step": 43900 }, { "epoch": 1.23, "learning_rate": 1.7673538073201512e-05, "loss": 0.1489, "step": 43950 }, { "epoch": 1.23, "learning_rate": 1.765951479455897e-05, "loss": 0.1435, "step": 44000 }, { "epoch": 1.23, "eval_bleu": 95.9319, "eval_gen_len": 64.194, "eval_loss": 0.2497410923242569, "eval_rouge1": 93.7912, "eval_rouge2": 90.1359, "eval_rougeL": 93.5982, "eval_rougeLsum": 93.5886, "eval_runtime": 3092.2489, "eval_samples_per_second": 0.97, "eval_steps_per_second": 0.243, "step": 44000 }, { "epoch": 1.24, "learning_rate": 1.7645491515916422e-05, "loss": 0.1686, "step": 44050 }, { "epoch": 1.24, "learning_rate": 1.7631468237273876e-05, "loss": 0.1699, "step": 44100 }, { "epoch": 1.24, "learning_rate": 1.761744495863133e-05, "loss": 0.1564, "step": 44150 }, { "epoch": 1.24, "learning_rate": 1.760342167998878e-05, "loss": 0.1613, "step": 44200 }, { "epoch": 1.24, "learning_rate": 1.7589398401346236e-05, "loss": 0.1526, "step": 44250 }, { "epoch": 1.24, "learning_rate": 1.757537512270369e-05, "loss": 0.1549, "step": 44300 }, { "epoch": 1.24, "learning_rate": 1.7561351844061143e-05, "loss": 0.181, "step": 44350 }, { "epoch": 1.25, "learning_rate": 1.7547328565418596e-05, "loss": 0.1665, "step": 44400 }, { "epoch": 1.25, "learning_rate": 1.753330528677605e-05, "loss": 0.1805, "step": 44450 }, { "epoch": 1.25, "learning_rate": 1.7519282008133503e-05, "loss": 0.1414, "step": 44500 }, { "epoch": 1.25, "learning_rate": 1.7505258729490956e-05, "loss": 0.1581, "step": 44550 }, { "epoch": 1.25, "learning_rate": 1.7491235450848406e-05, "loss": 0.15, "step": 44600 }, { "epoch": 1.25, "learning_rate": 1.7477212172205863e-05, "loss": 0.1401, "step": 44650 }, { "epoch": 1.25, "learning_rate": 1.7463188893563316e-05, "loss": 0.144, "step": 44700 }, { "epoch": 1.26, "learning_rate": 1.744916561492077e-05, "loss": 0.1459, "step": 44750 }, { "epoch": 1.26, "learning_rate": 1.7435142336278223e-05, "loss": 0.164, "step": 44800 }, { "epoch": 1.26, "learning_rate": 1.7421119057635676e-05, "loss": 0.1428, "step": 44850 }, { "epoch": 1.26, "learning_rate": 1.740709577899313e-05, "loss": 0.1529, "step": 44900 }, { "epoch": 1.26, "learning_rate": 1.7393072500350583e-05, "loss": 0.1631, "step": 44950 }, { "epoch": 1.26, "learning_rate": 1.7379049221708033e-05, "loss": 0.1613, "step": 45000 }, { "epoch": 1.26, "eval_bleu": 95.9702, "eval_gen_len": 64.2347, "eval_loss": 0.24982018768787384, "eval_rouge1": 93.7924, "eval_rouge2": 90.1761, "eval_rougeL": 93.6063, "eval_rougeLsum": 93.5896, "eval_runtime": 3101.0672, "eval_samples_per_second": 0.967, "eval_steps_per_second": 0.242, "step": 45000 }, { "epoch": 1.26, "learning_rate": 1.736502594306549e-05, "loss": 0.165, "step": 45050 }, { "epoch": 1.26, "learning_rate": 1.7351002664422943e-05, "loss": 0.1467, "step": 45100 }, { "epoch": 1.27, "learning_rate": 1.7336979385780396e-05, "loss": 0.1935, "step": 45150 }, { "epoch": 1.27, "learning_rate": 1.732295610713785e-05, "loss": 0.1451, "step": 45200 }, { "epoch": 1.27, "learning_rate": 1.7308932828495303e-05, "loss": 0.1428, "step": 45250 }, { "epoch": 1.27, "learning_rate": 1.7294909549852756e-05, "loss": 0.1494, "step": 45300 }, { "epoch": 1.27, "learning_rate": 1.728088627121021e-05, "loss": 0.1626, "step": 45350 }, { "epoch": 1.27, "learning_rate": 1.726686299256766e-05, "loss": 0.1591, "step": 45400 }, { "epoch": 1.27, "learning_rate": 1.7252839713925116e-05, "loss": 0.1611, "step": 45450 }, { "epoch": 1.28, "learning_rate": 1.723881643528257e-05, "loss": 0.1549, "step": 45500 }, { "epoch": 1.28, "learning_rate": 1.7224793156640023e-05, "loss": 0.1818, "step": 45550 }, { "epoch": 1.28, "learning_rate": 1.7210769877997477e-05, "loss": 0.1738, "step": 45600 }, { "epoch": 1.28, "learning_rate": 1.719674659935493e-05, "loss": 0.1516, "step": 45650 }, { "epoch": 1.28, "learning_rate": 1.7182723320712383e-05, "loss": 0.1508, "step": 45700 }, { "epoch": 1.28, "learning_rate": 1.7168700042069837e-05, "loss": 0.1578, "step": 45750 }, { "epoch": 1.28, "learning_rate": 1.715467676342729e-05, "loss": 0.1601, "step": 45800 }, { "epoch": 1.29, "learning_rate": 1.7140653484784743e-05, "loss": 0.1603, "step": 45850 }, { "epoch": 1.29, "learning_rate": 1.7126630206142197e-05, "loss": 0.1698, "step": 45900 }, { "epoch": 1.29, "learning_rate": 1.711260692749965e-05, "loss": 0.1406, "step": 45950 }, { "epoch": 1.29, "learning_rate": 1.7098583648857103e-05, "loss": 0.1636, "step": 46000 }, { "epoch": 1.29, "eval_bleu": 95.9319, "eval_gen_len": 64.244, "eval_loss": 0.25013232231140137, "eval_rouge1": 93.7318, "eval_rouge2": 90.1123, "eval_rougeL": 93.5419, "eval_rougeLsum": 93.5341, "eval_runtime": 3080.401, "eval_samples_per_second": 0.974, "eval_steps_per_second": 0.243, "step": 46000 }, { "epoch": 1.29, "learning_rate": 1.7084560370214557e-05, "loss": 0.1589, "step": 46050 }, { "epoch": 1.29, "learning_rate": 1.707053709157201e-05, "loss": 0.1574, "step": 46100 }, { "epoch": 1.29, "learning_rate": 1.7056513812929463e-05, "loss": 0.1478, "step": 46150 }, { "epoch": 1.3, "learning_rate": 1.7042490534286917e-05, "loss": 0.1528, "step": 46200 }, { "epoch": 1.3, "learning_rate": 1.702846725564437e-05, "loss": 0.1469, "step": 46250 }, { "epoch": 1.3, "learning_rate": 1.7014443977001824e-05, "loss": 0.1489, "step": 46300 }, { "epoch": 1.3, "learning_rate": 1.7000420698359277e-05, "loss": 0.1464, "step": 46350 }, { "epoch": 1.3, "learning_rate": 1.698639741971673e-05, "loss": 0.144, "step": 46400 }, { "epoch": 1.3, "learning_rate": 1.6972374141074184e-05, "loss": 0.1694, "step": 46450 }, { "epoch": 1.3, "learning_rate": 1.6958350862431637e-05, "loss": 0.1569, "step": 46500 }, { "epoch": 1.31, "learning_rate": 1.694432758378909e-05, "loss": 0.1658, "step": 46550 }, { "epoch": 1.31, "learning_rate": 1.6930304305146544e-05, "loss": 0.166, "step": 46600 }, { "epoch": 1.31, "learning_rate": 1.6916281026503997e-05, "loss": 0.1837, "step": 46650 }, { "epoch": 1.31, "learning_rate": 1.690225774786145e-05, "loss": 0.1858, "step": 46700 }, { "epoch": 1.31, "learning_rate": 1.6888234469218904e-05, "loss": 0.149, "step": 46750 }, { "epoch": 1.31, "learning_rate": 1.6874211190576357e-05, "loss": 0.1679, "step": 46800 }, { "epoch": 1.31, "learning_rate": 1.686018791193381e-05, "loss": 0.1571, "step": 46850 }, { "epoch": 1.32, "learning_rate": 1.6846164633291264e-05, "loss": 0.1439, "step": 46900 }, { "epoch": 1.32, "learning_rate": 1.6832141354648717e-05, "loss": 0.1661, "step": 46950 }, { "epoch": 1.32, "learning_rate": 1.681811807600617e-05, "loss": 0.1661, "step": 47000 }, { "epoch": 1.32, "eval_bleu": 95.9777, "eval_gen_len": 64.2717, "eval_loss": 0.24935156106948853, "eval_rouge1": 93.831, "eval_rouge2": 90.2688, "eval_rougeL": 93.6413, "eval_rougeLsum": 93.6338, "eval_runtime": 3079.0307, "eval_samples_per_second": 0.974, "eval_steps_per_second": 0.244, "step": 47000 }, { "epoch": 1.32, "learning_rate": 1.6804094797363624e-05, "loss": 0.1605, "step": 47050 }, { "epoch": 1.32, "learning_rate": 1.6790071518721077e-05, "loss": 0.1488, "step": 47100 }, { "epoch": 1.32, "learning_rate": 1.677604824007853e-05, "loss": 0.1515, "step": 47150 }, { "epoch": 1.32, "learning_rate": 1.6762024961435984e-05, "loss": 0.1651, "step": 47200 }, { "epoch": 1.33, "learning_rate": 1.674800168279344e-05, "loss": 0.166, "step": 47250 }, { "epoch": 1.33, "learning_rate": 1.673397840415089e-05, "loss": 0.1507, "step": 47300 }, { "epoch": 1.33, "learning_rate": 1.6719955125508344e-05, "loss": 0.1538, "step": 47350 }, { "epoch": 1.33, "learning_rate": 1.6705931846865797e-05, "loss": 0.1574, "step": 47400 }, { "epoch": 1.33, "learning_rate": 1.669190856822325e-05, "loss": 0.1534, "step": 47450 }, { "epoch": 1.33, "learning_rate": 1.6677885289580704e-05, "loss": 0.1617, "step": 47500 }, { "epoch": 1.33, "learning_rate": 1.6663862010938157e-05, "loss": 0.1506, "step": 47550 }, { "epoch": 1.34, "learning_rate": 1.664983873229561e-05, "loss": 0.1554, "step": 47600 }, { "epoch": 1.34, "learning_rate": 1.6635815453653068e-05, "loss": 0.1405, "step": 47650 }, { "epoch": 1.34, "learning_rate": 1.6621792175010518e-05, "loss": 0.162, "step": 47700 }, { "epoch": 1.34, "learning_rate": 1.660776889636797e-05, "loss": 0.1456, "step": 47750 }, { "epoch": 1.34, "learning_rate": 1.6593745617725424e-05, "loss": 0.1664, "step": 47800 }, { "epoch": 1.34, "learning_rate": 1.6579722339082878e-05, "loss": 0.1626, "step": 47850 }, { "epoch": 1.34, "learning_rate": 1.656569906044033e-05, "loss": 0.1568, "step": 47900 }, { "epoch": 1.34, "learning_rate": 1.6551675781797784e-05, "loss": 0.154, "step": 47950 }, { "epoch": 1.35, "learning_rate": 1.6537652503155238e-05, "loss": 0.1484, "step": 48000 }, { "epoch": 1.35, "eval_bleu": 95.9573, "eval_gen_len": 64.25, "eval_loss": 0.24856652319431305, "eval_rouge1": 93.8022, "eval_rouge2": 90.2009, "eval_rougeL": 93.6171, "eval_rougeLsum": 93.5939, "eval_runtime": 3131.2312, "eval_samples_per_second": 0.958, "eval_steps_per_second": 0.24, "step": 48000 }, { "epoch": 1.35, "learning_rate": 1.6523629224512694e-05, "loss": 0.1558, "step": 48050 }, { "epoch": 1.35, "learning_rate": 1.6509605945870144e-05, "loss": 0.144, "step": 48100 }, { "epoch": 1.35, "learning_rate": 1.6495582667227598e-05, "loss": 0.16, "step": 48150 }, { "epoch": 1.35, "learning_rate": 1.648155938858505e-05, "loss": 0.1812, "step": 48200 }, { "epoch": 1.35, "learning_rate": 1.6467536109942504e-05, "loss": 0.1516, "step": 48250 }, { "epoch": 1.35, "learning_rate": 1.6453512831299958e-05, "loss": 0.1506, "step": 48300 }, { "epoch": 1.36, "learning_rate": 1.643948955265741e-05, "loss": 0.1515, "step": 48350 }, { "epoch": 1.36, "learning_rate": 1.6425466274014865e-05, "loss": 0.1418, "step": 48400 }, { "epoch": 1.36, "learning_rate": 1.641144299537232e-05, "loss": 0.162, "step": 48450 }, { "epoch": 1.36, "learning_rate": 1.639741971672977e-05, "loss": 0.1552, "step": 48500 }, { "epoch": 1.36, "learning_rate": 1.6383396438087225e-05, "loss": 0.1613, "step": 48550 }, { "epoch": 1.36, "learning_rate": 1.6369373159444678e-05, "loss": 0.1621, "step": 48600 }, { "epoch": 1.36, "learning_rate": 1.635534988080213e-05, "loss": 0.1594, "step": 48650 }, { "epoch": 1.37, "learning_rate": 1.6341326602159588e-05, "loss": 0.1447, "step": 48700 }, { "epoch": 1.37, "learning_rate": 1.6327303323517038e-05, "loss": 0.1508, "step": 48750 }, { "epoch": 1.37, "learning_rate": 1.631328004487449e-05, "loss": 0.1663, "step": 48800 }, { "epoch": 1.37, "learning_rate": 1.6299256766231948e-05, "loss": 0.1546, "step": 48850 }, { "epoch": 1.37, "learning_rate": 1.6285233487589398e-05, "loss": 0.1435, "step": 48900 }, { "epoch": 1.37, "learning_rate": 1.627121020894685e-05, "loss": 0.1645, "step": 48950 }, { "epoch": 1.37, "learning_rate": 1.6257186930304305e-05, "loss": 0.1532, "step": 49000 }, { "epoch": 1.37, "eval_bleu": 95.9496, "eval_gen_len": 64.2453, "eval_loss": 0.24891996383666992, "eval_rouge1": 93.7774, "eval_rouge2": 90.1343, "eval_rougeL": 93.5686, "eval_rougeLsum": 93.5586, "eval_runtime": 3102.2649, "eval_samples_per_second": 0.967, "eval_steps_per_second": 0.242, "step": 49000 }, { "epoch": 1.38, "learning_rate": 1.6243163651661758e-05, "loss": 0.1465, "step": 49050 }, { "epoch": 1.38, "learning_rate": 1.6229140373019215e-05, "loss": 0.1652, "step": 49100 }, { "epoch": 1.38, "learning_rate": 1.6215117094376665e-05, "loss": 0.1538, "step": 49150 }, { "epoch": 1.38, "learning_rate": 1.6201093815734118e-05, "loss": 0.1608, "step": 49200 }, { "epoch": 1.38, "learning_rate": 1.6187070537091575e-05, "loss": 0.1735, "step": 49250 }, { "epoch": 1.38, "learning_rate": 1.6173047258449025e-05, "loss": 0.1752, "step": 49300 }, { "epoch": 1.38, "learning_rate": 1.615902397980648e-05, "loss": 0.1812, "step": 49350 }, { "epoch": 1.39, "learning_rate": 1.614500070116393e-05, "loss": 0.1568, "step": 49400 }, { "epoch": 1.39, "learning_rate": 1.6130977422521385e-05, "loss": 0.1543, "step": 49450 }, { "epoch": 1.39, "learning_rate": 1.6116954143878842e-05, "loss": 0.1625, "step": 49500 }, { "epoch": 1.39, "learning_rate": 1.6102930865236292e-05, "loss": 0.1547, "step": 49550 }, { "epoch": 1.39, "learning_rate": 1.6088907586593745e-05, "loss": 0.1455, "step": 49600 }, { "epoch": 1.39, "learning_rate": 1.6074884307951202e-05, "loss": 0.1623, "step": 49650 }, { "epoch": 1.39, "learning_rate": 1.6060861029308652e-05, "loss": 0.1532, "step": 49700 }, { "epoch": 1.4, "learning_rate": 1.6046837750666105e-05, "loss": 0.1539, "step": 49750 }, { "epoch": 1.4, "learning_rate": 1.603281447202356e-05, "loss": 0.1541, "step": 49800 }, { "epoch": 1.4, "learning_rate": 1.6018791193381012e-05, "loss": 0.1538, "step": 49850 }, { "epoch": 1.4, "learning_rate": 1.600476791473847e-05, "loss": 0.1656, "step": 49900 }, { "epoch": 1.4, "learning_rate": 1.599074463609592e-05, "loss": 0.1598, "step": 49950 }, { "epoch": 1.4, "learning_rate": 1.5976721357453372e-05, "loss": 0.1523, "step": 50000 }, { "epoch": 1.4, "eval_bleu": 95.9721, "eval_gen_len": 64.2117, "eval_loss": 0.2481638789176941, "eval_rouge1": 93.8515, "eval_rouge2": 90.2059, "eval_rougeL": 93.6666, "eval_rougeLsum": 93.6523, "eval_runtime": 3096.96, "eval_samples_per_second": 0.969, "eval_steps_per_second": 0.242, "step": 50000 }, { "epoch": 1.4, "learning_rate": 1.596269807881083e-05, "loss": 0.1373, "step": 50050 }, { "epoch": 1.41, "learning_rate": 1.594867480016828e-05, "loss": 0.1535, "step": 50100 }, { "epoch": 1.41, "learning_rate": 1.5934651521525735e-05, "loss": 0.1483, "step": 50150 }, { "epoch": 1.41, "learning_rate": 1.5920628242883185e-05, "loss": 0.1558, "step": 50200 }, { "epoch": 1.41, "learning_rate": 1.590660496424064e-05, "loss": 0.1669, "step": 50250 }, { "epoch": 1.41, "learning_rate": 1.5892581685598096e-05, "loss": 0.1654, "step": 50300 }, { "epoch": 1.41, "learning_rate": 1.5878558406955545e-05, "loss": 0.178, "step": 50350 }, { "epoch": 1.41, "learning_rate": 1.5864535128313e-05, "loss": 0.1556, "step": 50400 }, { "epoch": 1.41, "learning_rate": 1.5850511849670456e-05, "loss": 0.1705, "step": 50450 }, { "epoch": 1.42, "learning_rate": 1.5836488571027906e-05, "loss": 0.1585, "step": 50500 }, { "epoch": 1.42, "learning_rate": 1.5822465292385362e-05, "loss": 0.1307, "step": 50550 }, { "epoch": 1.42, "learning_rate": 1.5808442013742812e-05, "loss": 0.1471, "step": 50600 }, { "epoch": 1.42, "learning_rate": 1.5794418735100266e-05, "loss": 0.1732, "step": 50650 }, { "epoch": 1.42, "learning_rate": 1.5780395456457722e-05, "loss": 0.1465, "step": 50700 }, { "epoch": 1.42, "learning_rate": 1.5766372177815172e-05, "loss": 0.1387, "step": 50750 }, { "epoch": 1.42, "learning_rate": 1.5752348899172626e-05, "loss": 0.1711, "step": 50800 }, { "epoch": 1.43, "learning_rate": 1.5738325620530082e-05, "loss": 0.1361, "step": 50850 }, { "epoch": 1.43, "learning_rate": 1.5724302341887532e-05, "loss": 0.1503, "step": 50900 }, { "epoch": 1.43, "learning_rate": 1.571027906324499e-05, "loss": 0.1615, "step": 50950 }, { "epoch": 1.43, "learning_rate": 1.569625578460244e-05, "loss": 0.1597, "step": 51000 }, { "epoch": 1.43, "eval_bleu": 96.0124, "eval_gen_len": 64.2523, "eval_loss": 0.24883191287517548, "eval_rouge1": 93.8575, "eval_rouge2": 90.2696, "eval_rougeL": 93.6678, "eval_rougeLsum": 93.6672, "eval_runtime": 3113.1853, "eval_samples_per_second": 0.964, "eval_steps_per_second": 0.241, "step": 51000 }, { "epoch": 1.43, "learning_rate": 1.5682232505959892e-05, "loss": 0.1595, "step": 51050 }, { "epoch": 1.43, "learning_rate": 1.566820922731735e-05, "loss": 0.1589, "step": 51100 }, { "epoch": 1.43, "learning_rate": 1.56541859486748e-05, "loss": 0.1436, "step": 51150 }, { "epoch": 1.44, "learning_rate": 1.5640162670032253e-05, "loss": 0.1417, "step": 51200 }, { "epoch": 1.44, "learning_rate": 1.562613939138971e-05, "loss": 0.1727, "step": 51250 }, { "epoch": 1.44, "learning_rate": 1.561211611274716e-05, "loss": 0.1563, "step": 51300 }, { "epoch": 1.44, "learning_rate": 1.5598092834104616e-05, "loss": 0.1521, "step": 51350 }, { "epoch": 1.44, "learning_rate": 1.5584069555462066e-05, "loss": 0.1583, "step": 51400 }, { "epoch": 1.44, "learning_rate": 1.557004627681952e-05, "loss": 0.1627, "step": 51450 }, { "epoch": 1.44, "learning_rate": 1.5556022998176976e-05, "loss": 0.142, "step": 51500 }, { "epoch": 1.45, "learning_rate": 1.5541999719534426e-05, "loss": 0.165, "step": 51550 }, { "epoch": 1.45, "learning_rate": 1.5527976440891883e-05, "loss": 0.1666, "step": 51600 }, { "epoch": 1.45, "learning_rate": 1.5513953162249336e-05, "loss": 0.1522, "step": 51650 }, { "epoch": 1.45, "learning_rate": 1.5499929883606786e-05, "loss": 0.1576, "step": 51700 }, { "epoch": 1.45, "learning_rate": 1.5485906604964243e-05, "loss": 0.1435, "step": 51750 }, { "epoch": 1.45, "learning_rate": 1.5471883326321693e-05, "loss": 0.1809, "step": 51800 }, { "epoch": 1.45, "learning_rate": 1.5457860047679146e-05, "loss": 0.1621, "step": 51850 }, { "epoch": 1.46, "learning_rate": 1.5443836769036603e-05, "loss": 0.1644, "step": 51900 }, { "epoch": 1.46, "learning_rate": 1.5429813490394053e-05, "loss": 0.1629, "step": 51950 }, { "epoch": 1.46, "learning_rate": 1.541579021175151e-05, "loss": 0.1564, "step": 52000 }, { "epoch": 1.46, "eval_bleu": 96.0177, "eval_gen_len": 64.2583, "eval_loss": 0.24838809669017792, "eval_rouge1": 93.8642, "eval_rouge2": 90.2554, "eval_rougeL": 93.6654, "eval_rougeLsum": 93.6673, "eval_runtime": 3134.0792, "eval_samples_per_second": 0.957, "eval_steps_per_second": 0.239, "step": 52000 }, { "epoch": 1.46, "learning_rate": 1.5401766933108963e-05, "loss": 0.1697, "step": 52050 }, { "epoch": 1.46, "learning_rate": 1.5387743654466413e-05, "loss": 0.1629, "step": 52100 }, { "epoch": 1.46, "learning_rate": 1.537372037582387e-05, "loss": 0.1769, "step": 52150 }, { "epoch": 1.46, "learning_rate": 1.535969709718132e-05, "loss": 0.1569, "step": 52200 }, { "epoch": 1.47, "learning_rate": 1.5345673818538773e-05, "loss": 0.158, "step": 52250 }, { "epoch": 1.47, "learning_rate": 1.533165053989623e-05, "loss": 0.1486, "step": 52300 }, { "epoch": 1.47, "learning_rate": 1.531762726125368e-05, "loss": 0.1517, "step": 52350 }, { "epoch": 1.47, "learning_rate": 1.5303603982611137e-05, "loss": 0.1496, "step": 52400 }, { "epoch": 1.47, "learning_rate": 1.528958070396859e-05, "loss": 0.1511, "step": 52450 }, { "epoch": 1.47, "learning_rate": 1.527555742532604e-05, "loss": 0.1435, "step": 52500 }, { "epoch": 1.47, "learning_rate": 1.5261534146683497e-05, "loss": 0.1342, "step": 52550 }, { "epoch": 1.48, "learning_rate": 1.5247510868040947e-05, "loss": 0.1369, "step": 52600 }, { "epoch": 1.48, "learning_rate": 1.5233487589398402e-05, "loss": 0.1393, "step": 52650 }, { "epoch": 1.48, "learning_rate": 1.5219464310755857e-05, "loss": 0.1537, "step": 52700 }, { "epoch": 1.48, "learning_rate": 1.5205441032113307e-05, "loss": 0.1514, "step": 52750 }, { "epoch": 1.48, "learning_rate": 1.5191417753470762e-05, "loss": 0.1517, "step": 52800 }, { "epoch": 1.48, "learning_rate": 1.5177394474828217e-05, "loss": 0.1507, "step": 52850 }, { "epoch": 1.48, "learning_rate": 1.5163371196185668e-05, "loss": 0.1845, "step": 52900 }, { "epoch": 1.49, "learning_rate": 1.5149347917543123e-05, "loss": 0.1449, "step": 52950 }, { "epoch": 1.49, "learning_rate": 1.5135324638900573e-05, "loss": 0.1488, "step": 53000 }, { "epoch": 1.49, "eval_bleu": 96.0359, "eval_gen_len": 64.297, "eval_loss": 0.2471029907464981, "eval_rouge1": 93.848, "eval_rouge2": 90.2745, "eval_rougeL": 93.6537, "eval_rougeLsum": 93.6498, "eval_runtime": 3110.5388, "eval_samples_per_second": 0.964, "eval_steps_per_second": 0.241, "step": 53000 }, { "epoch": 1.49, "learning_rate": 1.5121301360258028e-05, "loss": 0.1479, "step": 53050 }, { "epoch": 1.49, "learning_rate": 1.5107278081615484e-05, "loss": 0.1713, "step": 53100 }, { "epoch": 1.49, "learning_rate": 1.5093254802972935e-05, "loss": 0.1583, "step": 53150 }, { "epoch": 1.49, "learning_rate": 1.5079231524330389e-05, "loss": 0.159, "step": 53200 }, { "epoch": 1.49, "learning_rate": 1.5065208245687844e-05, "loss": 0.1429, "step": 53250 }, { "epoch": 1.49, "learning_rate": 1.5051184967045295e-05, "loss": 0.1513, "step": 53300 }, { "epoch": 1.5, "learning_rate": 1.503716168840275e-05, "loss": 0.1605, "step": 53350 }, { "epoch": 1.5, "learning_rate": 1.50231384097602e-05, "loss": 0.1537, "step": 53400 }, { "epoch": 1.5, "learning_rate": 1.5009115131117655e-05, "loss": 0.1573, "step": 53450 }, { "epoch": 1.5, "learning_rate": 1.4995091852475109e-05, "loss": 0.1678, "step": 53500 }, { "epoch": 1.5, "learning_rate": 1.4981068573832564e-05, "loss": 0.1602, "step": 53550 }, { "epoch": 1.5, "learning_rate": 1.4967045295190015e-05, "loss": 0.1313, "step": 53600 }, { "epoch": 1.5, "learning_rate": 1.4953022016547469e-05, "loss": 0.1377, "step": 53650 }, { "epoch": 1.51, "learning_rate": 1.4938998737904922e-05, "loss": 0.1509, "step": 53700 }, { "epoch": 1.51, "learning_rate": 1.4924975459262377e-05, "loss": 0.1717, "step": 53750 }, { "epoch": 1.51, "learning_rate": 1.4910952180619829e-05, "loss": 0.1518, "step": 53800 }, { "epoch": 1.51, "learning_rate": 1.4896928901977282e-05, "loss": 0.1576, "step": 53850 }, { "epoch": 1.51, "learning_rate": 1.4882905623334736e-05, "loss": 0.1741, "step": 53900 }, { "epoch": 1.51, "learning_rate": 1.486888234469219e-05, "loss": 0.1457, "step": 53950 }, { "epoch": 1.51, "learning_rate": 1.4854859066049642e-05, "loss": 0.1285, "step": 54000 }, { "epoch": 1.51, "eval_bleu": 96.0361, "eval_gen_len": 64.2367, "eval_loss": 0.2483721524477005, "eval_rouge1": 93.8768, "eval_rouge2": 90.3035, "eval_rougeL": 93.698, "eval_rougeLsum": 93.6976, "eval_runtime": 3128.3254, "eval_samples_per_second": 0.959, "eval_steps_per_second": 0.24, "step": 54000 }, { "epoch": 1.52, "learning_rate": 1.4840835787407096e-05, "loss": 0.1327, "step": 54050 }, { "epoch": 1.52, "learning_rate": 1.4826812508764549e-05, "loss": 0.1455, "step": 54100 }, { "epoch": 1.52, "learning_rate": 1.4812789230122004e-05, "loss": 0.1336, "step": 54150 }, { "epoch": 1.52, "learning_rate": 1.4798765951479456e-05, "loss": 0.1497, "step": 54200 }, { "epoch": 1.52, "learning_rate": 1.4784742672836909e-05, "loss": 0.1545, "step": 54250 }, { "epoch": 1.52, "learning_rate": 1.4770719394194362e-05, "loss": 0.1488, "step": 54300 }, { "epoch": 1.52, "learning_rate": 1.4756696115551817e-05, "loss": 0.1591, "step": 54350 }, { "epoch": 1.53, "learning_rate": 1.4742672836909271e-05, "loss": 0.1515, "step": 54400 }, { "epoch": 1.53, "learning_rate": 1.4728649558266723e-05, "loss": 0.1538, "step": 54450 }, { "epoch": 1.53, "learning_rate": 1.4714626279624176e-05, "loss": 0.1652, "step": 54500 }, { "epoch": 1.53, "learning_rate": 1.4700603000981631e-05, "loss": 0.1561, "step": 54550 }, { "epoch": 1.53, "learning_rate": 1.4686579722339084e-05, "loss": 0.1644, "step": 54600 }, { "epoch": 1.53, "learning_rate": 1.4672556443696536e-05, "loss": 0.1596, "step": 54650 }, { "epoch": 1.53, "learning_rate": 1.465853316505399e-05, "loss": 0.1585, "step": 54700 }, { "epoch": 1.54, "learning_rate": 1.4644509886411444e-05, "loss": 0.1557, "step": 54750 }, { "epoch": 1.54, "learning_rate": 1.4630486607768898e-05, "loss": 0.137, "step": 54800 }, { "epoch": 1.54, "learning_rate": 1.461646332912635e-05, "loss": 0.1783, "step": 54850 }, { "epoch": 1.54, "learning_rate": 1.4602440050483803e-05, "loss": 0.1731, "step": 54900 }, { "epoch": 1.54, "learning_rate": 1.4588416771841258e-05, "loss": 0.1552, "step": 54950 }, { "epoch": 1.54, "learning_rate": 1.4574393493198711e-05, "loss": 0.1628, "step": 55000 }, { "epoch": 1.54, "eval_bleu": 96.033, "eval_gen_len": 64.2217, "eval_loss": 0.2469196319580078, "eval_rouge1": 93.9233, "eval_rouge2": 90.3508, "eval_rougeL": 93.7332, "eval_rougeLsum": 93.7184, "eval_runtime": 3083.8837, "eval_samples_per_second": 0.973, "eval_steps_per_second": 0.243, "step": 55000 }, { "epoch": 1.54, "learning_rate": 1.4560370214556163e-05, "loss": 0.1606, "step": 55050 }, { "epoch": 1.55, "learning_rate": 1.4546346935913616e-05, "loss": 0.1534, "step": 55100 }, { "epoch": 1.55, "learning_rate": 1.4532323657271071e-05, "loss": 0.1516, "step": 55150 }, { "epoch": 1.55, "learning_rate": 1.4518300378628525e-05, "loss": 0.1674, "step": 55200 }, { "epoch": 1.55, "learning_rate": 1.4504277099985976e-05, "loss": 0.1562, "step": 55250 }, { "epoch": 1.55, "learning_rate": 1.449025382134343e-05, "loss": 0.1509, "step": 55300 }, { "epoch": 1.55, "learning_rate": 1.4476230542700885e-05, "loss": 0.1476, "step": 55350 }, { "epoch": 1.55, "learning_rate": 1.4462207264058338e-05, "loss": 0.1514, "step": 55400 }, { "epoch": 1.56, "learning_rate": 1.444818398541579e-05, "loss": 0.1497, "step": 55450 }, { "epoch": 1.56, "learning_rate": 1.4434160706773243e-05, "loss": 0.1728, "step": 55500 }, { "epoch": 1.56, "learning_rate": 1.4420137428130698e-05, "loss": 0.1499, "step": 55550 }, { "epoch": 1.56, "learning_rate": 1.4406114149488151e-05, "loss": 0.1588, "step": 55600 }, { "epoch": 1.56, "learning_rate": 1.4392090870845603e-05, "loss": 0.1589, "step": 55650 }, { "epoch": 1.56, "learning_rate": 1.4378067592203056e-05, "loss": 0.1332, "step": 55700 }, { "epoch": 1.56, "learning_rate": 1.4364044313560512e-05, "loss": 0.15, "step": 55750 }, { "epoch": 1.56, "learning_rate": 1.4350021034917965e-05, "loss": 0.1731, "step": 55800 }, { "epoch": 1.57, "learning_rate": 1.4335997756275418e-05, "loss": 0.1468, "step": 55850 }, { "epoch": 1.57, "learning_rate": 1.432197447763287e-05, "loss": 0.1464, "step": 55900 }, { "epoch": 1.57, "learning_rate": 1.4307951198990325e-05, "loss": 0.1597, "step": 55950 }, { "epoch": 1.57, "learning_rate": 1.4293927920347778e-05, "loss": 0.155, "step": 56000 }, { "epoch": 1.57, "eval_bleu": 96.0287, "eval_gen_len": 64.226, "eval_loss": 0.24680346250534058, "eval_rouge1": 93.8476, "eval_rouge2": 90.2618, "eval_rougeL": 93.6663, "eval_rougeLsum": 93.6463, "eval_runtime": 3086.3631, "eval_samples_per_second": 0.972, "eval_steps_per_second": 0.243, "step": 56000 }, { "epoch": 1.57, "learning_rate": 1.4279904641705232e-05, "loss": 0.1392, "step": 56050 }, { "epoch": 1.57, "learning_rate": 1.4265881363062683e-05, "loss": 0.1546, "step": 56100 }, { "epoch": 1.57, "learning_rate": 1.4251858084420138e-05, "loss": 0.1596, "step": 56150 }, { "epoch": 1.58, "learning_rate": 1.4237834805777592e-05, "loss": 0.1574, "step": 56200 }, { "epoch": 1.58, "learning_rate": 1.4223811527135045e-05, "loss": 0.1593, "step": 56250 }, { "epoch": 1.58, "learning_rate": 1.4209788248492497e-05, "loss": 0.1497, "step": 56300 }, { "epoch": 1.58, "learning_rate": 1.4195764969849952e-05, "loss": 0.1447, "step": 56350 }, { "epoch": 1.58, "learning_rate": 1.4181741691207405e-05, "loss": 0.1399, "step": 56400 }, { "epoch": 1.58, "learning_rate": 1.4167718412564859e-05, "loss": 0.1392, "step": 56450 }, { "epoch": 1.58, "learning_rate": 1.415369513392231e-05, "loss": 0.1587, "step": 56500 }, { "epoch": 1.59, "learning_rate": 1.4139671855279765e-05, "loss": 0.1763, "step": 56550 }, { "epoch": 1.59, "learning_rate": 1.4125648576637219e-05, "loss": 0.1542, "step": 56600 }, { "epoch": 1.59, "learning_rate": 1.4111625297994672e-05, "loss": 0.1422, "step": 56650 }, { "epoch": 1.59, "learning_rate": 1.4097602019352124e-05, "loss": 0.158, "step": 56700 }, { "epoch": 1.59, "learning_rate": 1.4083578740709579e-05, "loss": 0.1511, "step": 56750 }, { "epoch": 1.59, "learning_rate": 1.4069555462067032e-05, "loss": 0.1572, "step": 56800 }, { "epoch": 1.59, "learning_rate": 1.4055532183424485e-05, "loss": 0.1677, "step": 56850 }, { "epoch": 1.6, "learning_rate": 1.4041508904781937e-05, "loss": 0.1425, "step": 56900 }, { "epoch": 1.6, "learning_rate": 1.4027485626139392e-05, "loss": 0.1744, "step": 56950 }, { "epoch": 1.6, "learning_rate": 1.4013462347496845e-05, "loss": 0.1514, "step": 57000 }, { "epoch": 1.6, "eval_bleu": 96.0359, "eval_gen_len": 64.2267, "eval_loss": 0.24612654745578766, "eval_rouge1": 93.9162, "eval_rouge2": 90.3455, "eval_rougeL": 93.7454, "eval_rougeLsum": 93.7264, "eval_runtime": 3113.4814, "eval_samples_per_second": 0.964, "eval_steps_per_second": 0.241, "step": 57000 }, { "epoch": 1.6, "learning_rate": 1.3999439068854299e-05, "loss": 0.149, "step": 57050 }, { "epoch": 1.6, "learning_rate": 1.3985415790211752e-05, "loss": 0.1457, "step": 57100 }, { "epoch": 1.6, "learning_rate": 1.3971392511569206e-05, "loss": 0.1426, "step": 57150 }, { "epoch": 1.6, "learning_rate": 1.3957369232926659e-05, "loss": 0.1574, "step": 57200 }, { "epoch": 1.61, "learning_rate": 1.3943345954284112e-05, "loss": 0.1566, "step": 57250 }, { "epoch": 1.61, "learning_rate": 1.3929322675641566e-05, "loss": 0.1459, "step": 57300 }, { "epoch": 1.61, "learning_rate": 1.3915299396999019e-05, "loss": 0.1431, "step": 57350 }, { "epoch": 1.61, "learning_rate": 1.3901276118356472e-05, "loss": 0.154, "step": 57400 }, { "epoch": 1.61, "learning_rate": 1.3887252839713926e-05, "loss": 0.1513, "step": 57450 }, { "epoch": 1.61, "learning_rate": 1.3873229561071379e-05, "loss": 0.1571, "step": 57500 }, { "epoch": 1.61, "learning_rate": 1.3859206282428832e-05, "loss": 0.1499, "step": 57550 }, { "epoch": 1.62, "learning_rate": 1.3845183003786286e-05, "loss": 0.1563, "step": 57600 }, { "epoch": 1.62, "learning_rate": 1.3831159725143739e-05, "loss": 0.1719, "step": 57650 }, { "epoch": 1.62, "learning_rate": 1.3817136446501192e-05, "loss": 0.1507, "step": 57700 }, { "epoch": 1.62, "learning_rate": 1.3803113167858646e-05, "loss": 0.1387, "step": 57750 }, { "epoch": 1.62, "learning_rate": 1.37890898892161e-05, "loss": 0.1439, "step": 57800 }, { "epoch": 1.62, "learning_rate": 1.3775066610573553e-05, "loss": 0.1434, "step": 57850 }, { "epoch": 1.62, "learning_rate": 1.3761043331931006e-05, "loss": 0.1719, "step": 57900 }, { "epoch": 1.63, "learning_rate": 1.374702005328846e-05, "loss": 0.1824, "step": 57950 }, { "epoch": 1.63, "learning_rate": 1.3732996774645913e-05, "loss": 0.1393, "step": 58000 }, { "epoch": 1.63, "eval_bleu": 96.0528, "eval_gen_len": 64.256, "eval_loss": 0.24644368886947632, "eval_rouge1": 93.9108, "eval_rouge2": 90.3273, "eval_rougeL": 93.7332, "eval_rougeLsum": 93.7113, "eval_runtime": 3121.7481, "eval_samples_per_second": 0.961, "eval_steps_per_second": 0.24, "step": 58000 }, { "epoch": 1.63, "learning_rate": 1.3718973496003366e-05, "loss": 0.1601, "step": 58050 }, { "epoch": 1.63, "learning_rate": 1.370495021736082e-05, "loss": 0.1494, "step": 58100 }, { "epoch": 1.63, "learning_rate": 1.3690926938718273e-05, "loss": 0.1413, "step": 58150 }, { "epoch": 1.63, "learning_rate": 1.3676903660075726e-05, "loss": 0.1609, "step": 58200 }, { "epoch": 1.63, "learning_rate": 1.366288038143318e-05, "loss": 0.1683, "step": 58250 }, { "epoch": 1.64, "learning_rate": 1.3648857102790633e-05, "loss": 0.1556, "step": 58300 }, { "epoch": 1.64, "learning_rate": 1.3634833824148086e-05, "loss": 0.1479, "step": 58350 }, { "epoch": 1.64, "learning_rate": 1.362081054550554e-05, "loss": 0.169, "step": 58400 }, { "epoch": 1.64, "learning_rate": 1.3606787266862993e-05, "loss": 0.1521, "step": 58450 }, { "epoch": 1.64, "learning_rate": 1.3592763988220446e-05, "loss": 0.16, "step": 58500 }, { "epoch": 1.64, "learning_rate": 1.35787407095779e-05, "loss": 0.1453, "step": 58550 }, { "epoch": 1.64, "learning_rate": 1.3564717430935353e-05, "loss": 0.1439, "step": 58600 }, { "epoch": 1.64, "learning_rate": 1.3550694152292806e-05, "loss": 0.1546, "step": 58650 }, { "epoch": 1.65, "learning_rate": 1.353667087365026e-05, "loss": 0.157, "step": 58700 }, { "epoch": 1.65, "learning_rate": 1.3522647595007715e-05, "loss": 0.1769, "step": 58750 }, { "epoch": 1.65, "learning_rate": 1.3508624316365166e-05, "loss": 0.1591, "step": 58800 }, { "epoch": 1.65, "learning_rate": 1.349460103772262e-05, "loss": 0.1492, "step": 58850 }, { "epoch": 1.65, "learning_rate": 1.3480577759080073e-05, "loss": 0.1474, "step": 58900 }, { "epoch": 1.65, "learning_rate": 1.3466554480437528e-05, "loss": 0.1622, "step": 58950 }, { "epoch": 1.65, "learning_rate": 1.345253120179498e-05, "loss": 0.1691, "step": 59000 }, { "epoch": 1.65, "eval_bleu": 96.0335, "eval_gen_len": 64.2307, "eval_loss": 0.24554915726184845, "eval_rouge1": 93.8976, "eval_rouge2": 90.3009, "eval_rougeL": 93.7085, "eval_rougeLsum": 93.6965, "eval_runtime": 3154.4266, "eval_samples_per_second": 0.951, "eval_steps_per_second": 0.238, "step": 59000 }, { "epoch": 1.66, "learning_rate": 1.3438507923152433e-05, "loss": 0.157, "step": 59050 }, { "epoch": 1.66, "learning_rate": 1.3424484644509886e-05, "loss": 0.1341, "step": 59100 }, { "epoch": 1.66, "learning_rate": 1.3410461365867342e-05, "loss": 0.1638, "step": 59150 }, { "epoch": 1.66, "learning_rate": 1.3396438087224793e-05, "loss": 0.1523, "step": 59200 }, { "epoch": 1.66, "learning_rate": 1.3382414808582247e-05, "loss": 0.1301, "step": 59250 }, { "epoch": 1.66, "learning_rate": 1.33683915299397e-05, "loss": 0.1456, "step": 59300 }, { "epoch": 1.66, "learning_rate": 1.3354368251297155e-05, "loss": 0.1592, "step": 59350 }, { "epoch": 1.67, "learning_rate": 1.3340344972654607e-05, "loss": 0.1464, "step": 59400 }, { "epoch": 1.67, "learning_rate": 1.332632169401206e-05, "loss": 0.1383, "step": 59450 }, { "epoch": 1.67, "learning_rate": 1.3312298415369513e-05, "loss": 0.1372, "step": 59500 }, { "epoch": 1.67, "learning_rate": 1.3298275136726968e-05, "loss": 0.1515, "step": 59550 }, { "epoch": 1.67, "learning_rate": 1.328425185808442e-05, "loss": 0.1714, "step": 59600 }, { "epoch": 1.67, "learning_rate": 1.3270228579441873e-05, "loss": 0.1572, "step": 59650 }, { "epoch": 1.67, "learning_rate": 1.3256205300799327e-05, "loss": 0.1468, "step": 59700 }, { "epoch": 1.68, "learning_rate": 1.3242182022156782e-05, "loss": 0.16, "step": 59750 }, { "epoch": 1.68, "learning_rate": 1.3228158743514233e-05, "loss": 0.1402, "step": 59800 }, { "epoch": 1.68, "learning_rate": 1.3214135464871687e-05, "loss": 0.1427, "step": 59850 }, { "epoch": 1.68, "learning_rate": 1.320011218622914e-05, "loss": 0.1508, "step": 59900 }, { "epoch": 1.68, "learning_rate": 1.3186088907586595e-05, "loss": 0.1613, "step": 59950 }, { "epoch": 1.68, "learning_rate": 1.3172065628944049e-05, "loss": 0.1623, "step": 60000 }, { "epoch": 1.68, "eval_bleu": 96.0534, "eval_gen_len": 64.217, "eval_loss": 0.245611310005188, "eval_rouge1": 93.9428, "eval_rouge2": 90.3851, "eval_rougeL": 93.7507, "eval_rougeLsum": 93.731, "eval_runtime": 3078.7009, "eval_samples_per_second": 0.974, "eval_steps_per_second": 0.244, "step": 60000 }, { "epoch": 1.68, "learning_rate": 1.31580423503015e-05, "loss": 0.1554, "step": 60050 }, { "epoch": 1.69, "learning_rate": 1.3144019071658954e-05, "loss": 0.1618, "step": 60100 }, { "epoch": 1.69, "learning_rate": 1.3129995793016409e-05, "loss": 0.164, "step": 60150 }, { "epoch": 1.69, "learning_rate": 1.3115972514373862e-05, "loss": 0.1601, "step": 60200 }, { "epoch": 1.69, "learning_rate": 1.3101949235731314e-05, "loss": 0.1497, "step": 60250 }, { "epoch": 1.69, "learning_rate": 1.3087925957088767e-05, "loss": 0.1414, "step": 60300 }, { "epoch": 1.69, "learning_rate": 1.3073902678446222e-05, "loss": 0.1665, "step": 60350 }, { "epoch": 1.69, "learning_rate": 1.3059879399803675e-05, "loss": 0.1615, "step": 60400 }, { "epoch": 1.7, "learning_rate": 1.3045856121161127e-05, "loss": 0.1411, "step": 60450 }, { "epoch": 1.7, "learning_rate": 1.303183284251858e-05, "loss": 0.1527, "step": 60500 }, { "epoch": 1.7, "learning_rate": 1.3017809563876036e-05, "loss": 0.147, "step": 60550 }, { "epoch": 1.7, "learning_rate": 1.3003786285233489e-05, "loss": 0.1346, "step": 60600 }, { "epoch": 1.7, "learning_rate": 1.298976300659094e-05, "loss": 0.1512, "step": 60650 }, { "epoch": 1.7, "learning_rate": 1.2975739727948394e-05, "loss": 0.1363, "step": 60700 }, { "epoch": 1.7, "learning_rate": 1.2961716449305849e-05, "loss": 0.1592, "step": 60750 }, { "epoch": 1.71, "learning_rate": 1.2947693170663302e-05, "loss": 0.1357, "step": 60800 }, { "epoch": 1.71, "learning_rate": 1.2933669892020754e-05, "loss": 0.1617, "step": 60850 }, { "epoch": 1.71, "learning_rate": 1.2919646613378207e-05, "loss": 0.1425, "step": 60900 }, { "epoch": 1.71, "learning_rate": 1.2905623334735662e-05, "loss": 0.1334, "step": 60950 }, { "epoch": 1.71, "learning_rate": 1.2891600056093116e-05, "loss": 0.1577, "step": 61000 }, { "epoch": 1.71, "eval_bleu": 96.0539, "eval_gen_len": 64.2317, "eval_loss": 0.24573621153831482, "eval_rouge1": 93.8833, "eval_rouge2": 90.3011, "eval_rougeL": 93.7003, "eval_rougeLsum": 93.6749, "eval_runtime": 3094.1804, "eval_samples_per_second": 0.97, "eval_steps_per_second": 0.242, "step": 61000 }, { "epoch": 1.71, "learning_rate": 1.2877576777450567e-05, "loss": 0.1561, "step": 61050 }, { "epoch": 1.71, "learning_rate": 1.286355349880802e-05, "loss": 0.1526, "step": 61100 }, { "epoch": 1.72, "learning_rate": 1.2849530220165476e-05, "loss": 0.1516, "step": 61150 }, { "epoch": 1.72, "learning_rate": 1.283550694152293e-05, "loss": 0.1604, "step": 61200 }, { "epoch": 1.72, "learning_rate": 1.2821483662880381e-05, "loss": 0.1393, "step": 61250 }, { "epoch": 1.72, "learning_rate": 1.2807460384237834e-05, "loss": 0.1522, "step": 61300 }, { "epoch": 1.72, "learning_rate": 1.279343710559529e-05, "loss": 0.1605, "step": 61350 }, { "epoch": 1.72, "learning_rate": 1.2779413826952743e-05, "loss": 0.1483, "step": 61400 }, { "epoch": 1.72, "learning_rate": 1.2765390548310196e-05, "loss": 0.1866, "step": 61450 }, { "epoch": 1.72, "learning_rate": 1.2751367269667648e-05, "loss": 0.1714, "step": 61500 }, { "epoch": 1.73, "learning_rate": 1.2737343991025103e-05, "loss": 0.1616, "step": 61550 }, { "epoch": 1.73, "learning_rate": 1.2723320712382556e-05, "loss": 0.1377, "step": 61600 }, { "epoch": 1.73, "learning_rate": 1.270929743374001e-05, "loss": 0.1322, "step": 61650 }, { "epoch": 1.73, "learning_rate": 1.2695274155097461e-05, "loss": 0.1655, "step": 61700 }, { "epoch": 1.73, "learning_rate": 1.2681250876454916e-05, "loss": 0.1752, "step": 61750 }, { "epoch": 1.73, "learning_rate": 1.266722759781237e-05, "loss": 0.1452, "step": 61800 }, { "epoch": 1.73, "learning_rate": 1.2653204319169823e-05, "loss": 0.1592, "step": 61850 }, { "epoch": 1.74, "learning_rate": 1.2639181040527274e-05, "loss": 0.1676, "step": 61900 }, { "epoch": 1.74, "learning_rate": 1.262515776188473e-05, "loss": 0.1578, "step": 61950 }, { "epoch": 1.74, "learning_rate": 1.2611134483242183e-05, "loss": 0.1543, "step": 62000 }, { "epoch": 1.74, "eval_bleu": 96.0497, "eval_gen_len": 64.2223, "eval_loss": 0.2444319725036621, "eval_rouge1": 93.9499, "eval_rouge2": 90.3622, "eval_rougeL": 93.7659, "eval_rougeLsum": 93.7507, "eval_runtime": 3090.9521, "eval_samples_per_second": 0.971, "eval_steps_per_second": 0.243, "step": 62000 }, { "epoch": 1.74, "learning_rate": 1.2597111204599636e-05, "loss": 0.1429, "step": 62050 }, { "epoch": 1.74, "learning_rate": 1.2583087925957088e-05, "loss": 0.1425, "step": 62100 }, { "epoch": 1.74, "learning_rate": 1.2569064647314543e-05, "loss": 0.1501, "step": 62150 }, { "epoch": 1.74, "learning_rate": 1.2555041368671996e-05, "loss": 0.1417, "step": 62200 }, { "epoch": 1.75, "learning_rate": 1.254101809002945e-05, "loss": 0.1495, "step": 62250 }, { "epoch": 1.75, "learning_rate": 1.2526994811386901e-05, "loss": 0.1493, "step": 62300 }, { "epoch": 1.75, "learning_rate": 1.2512971532744356e-05, "loss": 0.1437, "step": 62350 }, { "epoch": 1.75, "learning_rate": 1.249894825410181e-05, "loss": 0.1497, "step": 62400 }, { "epoch": 1.75, "learning_rate": 1.2484924975459263e-05, "loss": 0.1659, "step": 62450 }, { "epoch": 1.75, "learning_rate": 1.2470901696816715e-05, "loss": 0.1702, "step": 62500 }, { "epoch": 1.75, "learning_rate": 1.245687841817417e-05, "loss": 0.1591, "step": 62550 }, { "epoch": 1.76, "learning_rate": 1.2442855139531623e-05, "loss": 0.1472, "step": 62600 }, { "epoch": 1.76, "learning_rate": 1.2428831860889077e-05, "loss": 0.1638, "step": 62650 }, { "epoch": 1.76, "learning_rate": 1.2414808582246528e-05, "loss": 0.1669, "step": 62700 }, { "epoch": 1.76, "learning_rate": 1.2400785303603983e-05, "loss": 0.1435, "step": 62750 }, { "epoch": 1.76, "learning_rate": 1.2386762024961437e-05, "loss": 0.1513, "step": 62800 }, { "epoch": 1.76, "learning_rate": 1.237273874631889e-05, "loss": 0.1455, "step": 62850 }, { "epoch": 1.76, "learning_rate": 1.2358715467676343e-05, "loss": 0.1578, "step": 62900 }, { "epoch": 1.77, "learning_rate": 1.2344692189033797e-05, "loss": 0.1554, "step": 62950 }, { "epoch": 1.77, "learning_rate": 1.233066891039125e-05, "loss": 0.1544, "step": 63000 }, { "epoch": 1.77, "eval_bleu": 96.0681, "eval_gen_len": 64.241, "eval_loss": 0.245001420378685, "eval_rouge1": 93.9489, "eval_rouge2": 90.383, "eval_rougeL": 93.7697, "eval_rougeLsum": 93.7483, "eval_runtime": 3098.5549, "eval_samples_per_second": 0.968, "eval_steps_per_second": 0.242, "step": 63000 }, { "epoch": 1.77, "learning_rate": 1.2316645631748703e-05, "loss": 0.1269, "step": 63050 }, { "epoch": 1.77, "learning_rate": 1.2302622353106157e-05, "loss": 0.1689, "step": 63100 }, { "epoch": 1.77, "learning_rate": 1.228859907446361e-05, "loss": 0.1512, "step": 63150 }, { "epoch": 1.77, "learning_rate": 1.2274575795821063e-05, "loss": 0.1584, "step": 63200 }, { "epoch": 1.77, "learning_rate": 1.2260552517178517e-05, "loss": 0.142, "step": 63250 }, { "epoch": 1.78, "learning_rate": 1.224652923853597e-05, "loss": 0.1424, "step": 63300 }, { "epoch": 1.78, "learning_rate": 1.2232505959893424e-05, "loss": 0.144, "step": 63350 }, { "epoch": 1.78, "learning_rate": 1.2218482681250877e-05, "loss": 0.1608, "step": 63400 }, { "epoch": 1.78, "learning_rate": 1.220445940260833e-05, "loss": 0.1376, "step": 63450 }, { "epoch": 1.78, "learning_rate": 1.2190436123965784e-05, "loss": 0.17, "step": 63500 }, { "epoch": 1.78, "learning_rate": 1.2176412845323237e-05, "loss": 0.1587, "step": 63550 }, { "epoch": 1.78, "learning_rate": 1.216238956668069e-05, "loss": 0.1572, "step": 63600 }, { "epoch": 1.79, "learning_rate": 1.2148366288038144e-05, "loss": 0.1501, "step": 63650 }, { "epoch": 1.79, "learning_rate": 1.2134343009395597e-05, "loss": 0.1514, "step": 63700 }, { "epoch": 1.79, "learning_rate": 1.212031973075305e-05, "loss": 0.1656, "step": 63750 }, { "epoch": 1.79, "learning_rate": 1.2106296452110504e-05, "loss": 0.1684, "step": 63800 }, { "epoch": 1.79, "learning_rate": 1.2092273173467957e-05, "loss": 0.1481, "step": 63850 }, { "epoch": 1.79, "learning_rate": 1.207824989482541e-05, "loss": 0.1528, "step": 63900 }, { "epoch": 1.79, "learning_rate": 1.2064226616182864e-05, "loss": 0.1433, "step": 63950 }, { "epoch": 1.79, "learning_rate": 1.2050203337540317e-05, "loss": 0.148, "step": 64000 }, { "epoch": 1.79, "eval_bleu": 96.1034, "eval_gen_len": 64.2707, "eval_loss": 0.24430668354034424, "eval_rouge1": 93.969, "eval_rouge2": 90.4279, "eval_rougeL": 93.7797, "eval_rougeLsum": 93.7478, "eval_runtime": 3170.7735, "eval_samples_per_second": 0.946, "eval_steps_per_second": 0.237, "step": 64000 }, { "epoch": 1.8, "learning_rate": 1.203618005889777e-05, "loss": 0.1517, "step": 64050 }, { "epoch": 1.8, "learning_rate": 1.2022156780255224e-05, "loss": 0.1383, "step": 64100 }, { "epoch": 1.8, "learning_rate": 1.2008133501612677e-05, "loss": 0.1335, "step": 64150 }, { "epoch": 1.8, "learning_rate": 1.199411022297013e-05, "loss": 0.1427, "step": 64200 }, { "epoch": 1.8, "learning_rate": 1.1980086944327584e-05, "loss": 0.1617, "step": 64250 }, { "epoch": 1.8, "learning_rate": 1.1966063665685037e-05, "loss": 0.1247, "step": 64300 }, { "epoch": 1.8, "learning_rate": 1.1952040387042492e-05, "loss": 0.1574, "step": 64350 }, { "epoch": 1.81, "learning_rate": 1.1938017108399944e-05, "loss": 0.1431, "step": 64400 }, { "epoch": 1.81, "learning_rate": 1.1923993829757397e-05, "loss": 0.1423, "step": 64450 }, { "epoch": 1.81, "learning_rate": 1.190997055111485e-05, "loss": 0.1508, "step": 64500 }, { "epoch": 1.81, "learning_rate": 1.1895947272472306e-05, "loss": 0.1516, "step": 64550 }, { "epoch": 1.81, "learning_rate": 1.1881923993829758e-05, "loss": 0.1392, "step": 64600 }, { "epoch": 1.81, "learning_rate": 1.1867900715187211e-05, "loss": 0.1487, "step": 64650 }, { "epoch": 1.81, "learning_rate": 1.1853877436544664e-05, "loss": 0.1772, "step": 64700 }, { "epoch": 1.82, "learning_rate": 1.183985415790212e-05, "loss": 0.1416, "step": 64750 }, { "epoch": 1.82, "learning_rate": 1.1825830879259571e-05, "loss": 0.1644, "step": 64800 }, { "epoch": 1.82, "learning_rate": 1.1811807600617024e-05, "loss": 0.1524, "step": 64850 }, { "epoch": 1.82, "learning_rate": 1.1797784321974478e-05, "loss": 0.1573, "step": 64900 }, { "epoch": 1.82, "learning_rate": 1.1783761043331933e-05, "loss": 0.1497, "step": 64950 }, { "epoch": 1.82, "learning_rate": 1.1769737764689384e-05, "loss": 0.1676, "step": 65000 }, { "epoch": 1.82, "eval_bleu": 96.0602, "eval_gen_len": 64.2243, "eval_loss": 0.24451076984405518, "eval_rouge1": 93.9544, "eval_rouge2": 90.3539, "eval_rougeL": 93.7721, "eval_rougeLsum": 93.7482, "eval_runtime": 3116.5293, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.241, "step": 65000 }, { "epoch": 1.82, "learning_rate": 1.1755714486046838e-05, "loss": 0.1516, "step": 65050 }, { "epoch": 1.83, "learning_rate": 1.1741691207404291e-05, "loss": 0.1279, "step": 65100 }, { "epoch": 1.83, "learning_rate": 1.1727667928761746e-05, "loss": 0.1641, "step": 65150 }, { "epoch": 1.83, "learning_rate": 1.1713644650119198e-05, "loss": 0.1234, "step": 65200 }, { "epoch": 1.83, "learning_rate": 1.1699621371476651e-05, "loss": 0.1391, "step": 65250 }, { "epoch": 1.83, "learning_rate": 1.1685598092834105e-05, "loss": 0.1653, "step": 65300 }, { "epoch": 1.83, "learning_rate": 1.167157481419156e-05, "loss": 0.1587, "step": 65350 }, { "epoch": 1.83, "learning_rate": 1.1657551535549011e-05, "loss": 0.1459, "step": 65400 }, { "epoch": 1.84, "learning_rate": 1.1643528256906465e-05, "loss": 0.1583, "step": 65450 }, { "epoch": 1.84, "learning_rate": 1.1629504978263918e-05, "loss": 0.1477, "step": 65500 }, { "epoch": 1.84, "learning_rate": 1.1615481699621373e-05, "loss": 0.1549, "step": 65550 }, { "epoch": 1.84, "learning_rate": 1.1601458420978825e-05, "loss": 0.1531, "step": 65600 }, { "epoch": 1.84, "learning_rate": 1.1587435142336278e-05, "loss": 0.1594, "step": 65650 }, { "epoch": 1.84, "learning_rate": 1.1573411863693731e-05, "loss": 0.1635, "step": 65700 }, { "epoch": 1.84, "learning_rate": 1.1559388585051186e-05, "loss": 0.1508, "step": 65750 }, { "epoch": 1.85, "learning_rate": 1.154536530640864e-05, "loss": 0.1314, "step": 65800 }, { "epoch": 1.85, "learning_rate": 1.1531342027766091e-05, "loss": 0.1398, "step": 65850 }, { "epoch": 1.85, "learning_rate": 1.1517318749123545e-05, "loss": 0.1427, "step": 65900 }, { "epoch": 1.85, "learning_rate": 1.1503295470481e-05, "loss": 0.1338, "step": 65950 }, { "epoch": 1.85, "learning_rate": 1.1489272191838453e-05, "loss": 0.1542, "step": 66000 }, { "epoch": 1.85, "eval_bleu": 96.0707, "eval_gen_len": 64.2427, "eval_loss": 0.24484632909297943, "eval_rouge1": 93.9242, "eval_rouge2": 90.3577, "eval_rougeL": 93.7378, "eval_rougeLsum": 93.7199, "eval_runtime": 3063.6568, "eval_samples_per_second": 0.979, "eval_steps_per_second": 0.245, "step": 66000 }, { "epoch": 1.85, "learning_rate": 1.1475248913195905e-05, "loss": 0.1571, "step": 66050 }, { "epoch": 1.85, "learning_rate": 1.1461225634553358e-05, "loss": 0.1345, "step": 66100 }, { "epoch": 1.86, "learning_rate": 1.1447202355910813e-05, "loss": 0.1669, "step": 66150 }, { "epoch": 1.86, "learning_rate": 1.1433179077268267e-05, "loss": 0.1534, "step": 66200 }, { "epoch": 1.86, "learning_rate": 1.1419155798625718e-05, "loss": 0.1729, "step": 66250 }, { "epoch": 1.86, "learning_rate": 1.1405132519983172e-05, "loss": 0.1338, "step": 66300 }, { "epoch": 1.86, "learning_rate": 1.1391109241340627e-05, "loss": 0.1464, "step": 66350 }, { "epoch": 1.86, "learning_rate": 1.137708596269808e-05, "loss": 0.1445, "step": 66400 }, { "epoch": 1.86, "learning_rate": 1.1363062684055532e-05, "loss": 0.1372, "step": 66450 }, { "epoch": 1.87, "learning_rate": 1.1349039405412985e-05, "loss": 0.1819, "step": 66500 }, { "epoch": 1.87, "learning_rate": 1.133501612677044e-05, "loss": 0.1483, "step": 66550 }, { "epoch": 1.87, "learning_rate": 1.1320992848127894e-05, "loss": 0.1391, "step": 66600 }, { "epoch": 1.87, "learning_rate": 1.1306969569485345e-05, "loss": 0.1555, "step": 66650 }, { "epoch": 1.87, "learning_rate": 1.1292946290842799e-05, "loss": 0.1748, "step": 66700 }, { "epoch": 1.87, "learning_rate": 1.1278923012200254e-05, "loss": 0.1608, "step": 66750 }, { "epoch": 1.87, "learning_rate": 1.1264899733557707e-05, "loss": 0.1399, "step": 66800 }, { "epoch": 1.87, "learning_rate": 1.1250876454915159e-05, "loss": 0.1399, "step": 66850 }, { "epoch": 1.88, "learning_rate": 1.1236853176272612e-05, "loss": 0.1753, "step": 66900 }, { "epoch": 1.88, "learning_rate": 1.1222829897630067e-05, "loss": 0.1494, "step": 66950 }, { "epoch": 1.88, "learning_rate": 1.120880661898752e-05, "loss": 0.1507, "step": 67000 }, { "epoch": 1.88, "eval_bleu": 96.0481, "eval_gen_len": 64.2317, "eval_loss": 0.24457640945911407, "eval_rouge1": 93.9262, "eval_rouge2": 90.3319, "eval_rougeL": 93.7352, "eval_rougeLsum": 93.7164, "eval_runtime": 3094.0136, "eval_samples_per_second": 0.97, "eval_steps_per_second": 0.242, "step": 67000 }, { "epoch": 1.88, "learning_rate": 1.1194783340344972e-05, "loss": 0.1775, "step": 67050 }, { "epoch": 1.88, "learning_rate": 1.1180760061702425e-05, "loss": 0.1619, "step": 67100 }, { "epoch": 1.88, "learning_rate": 1.116673678305988e-05, "loss": 0.1589, "step": 67150 }, { "epoch": 1.88, "learning_rate": 1.1152713504417334e-05, "loss": 0.1598, "step": 67200 }, { "epoch": 1.89, "learning_rate": 1.1138690225774787e-05, "loss": 0.1652, "step": 67250 }, { "epoch": 1.89, "learning_rate": 1.1124666947132239e-05, "loss": 0.1336, "step": 67300 }, { "epoch": 1.89, "learning_rate": 1.1110643668489694e-05, "loss": 0.1574, "step": 67350 }, { "epoch": 1.89, "learning_rate": 1.1096620389847147e-05, "loss": 0.1644, "step": 67400 }, { "epoch": 1.89, "learning_rate": 1.10825971112046e-05, "loss": 0.1637, "step": 67450 }, { "epoch": 1.89, "learning_rate": 1.1068573832562052e-05, "loss": 0.1396, "step": 67500 }, { "epoch": 1.89, "learning_rate": 1.1054550553919507e-05, "loss": 0.1304, "step": 67550 }, { "epoch": 1.9, "learning_rate": 1.104052727527696e-05, "loss": 0.153, "step": 67600 }, { "epoch": 1.9, "learning_rate": 1.1026503996634414e-05, "loss": 0.1324, "step": 67650 }, { "epoch": 1.9, "learning_rate": 1.1012480717991866e-05, "loss": 0.1581, "step": 67700 }, { "epoch": 1.9, "learning_rate": 1.099845743934932e-05, "loss": 0.1593, "step": 67750 }, { "epoch": 1.9, "learning_rate": 1.0984434160706774e-05, "loss": 0.1422, "step": 67800 }, { "epoch": 1.9, "learning_rate": 1.0970410882064227e-05, "loss": 0.1491, "step": 67850 }, { "epoch": 1.9, "learning_rate": 1.0956387603421679e-05, "loss": 0.1727, "step": 67900 }, { "epoch": 1.91, "learning_rate": 1.0942364324779134e-05, "loss": 0.1576, "step": 67950 }, { "epoch": 1.91, "learning_rate": 1.0928341046136588e-05, "loss": 0.1701, "step": 68000 }, { "epoch": 1.91, "eval_bleu": 96.0842, "eval_gen_len": 64.2387, "eval_loss": 0.24398915469646454, "eval_rouge1": 93.9618, "eval_rouge2": 90.4097, "eval_rougeL": 93.7775, "eval_rougeLsum": 93.7665, "eval_runtime": 3135.9742, "eval_samples_per_second": 0.957, "eval_steps_per_second": 0.239, "step": 68000 }, { "epoch": 1.91, "learning_rate": 1.0914317767494041e-05, "loss": 0.1359, "step": 68050 }, { "epoch": 1.91, "learning_rate": 1.0900294488851493e-05, "loss": 0.1762, "step": 68100 }, { "epoch": 1.91, "learning_rate": 1.0886271210208948e-05, "loss": 0.1596, "step": 68150 }, { "epoch": 1.91, "learning_rate": 1.0872247931566401e-05, "loss": 0.1574, "step": 68200 }, { "epoch": 1.91, "learning_rate": 1.0858224652923854e-05, "loss": 0.1334, "step": 68250 }, { "epoch": 1.92, "learning_rate": 1.0844201374281306e-05, "loss": 0.1456, "step": 68300 }, { "epoch": 1.92, "learning_rate": 1.0830178095638761e-05, "loss": 0.1625, "step": 68350 }, { "epoch": 1.92, "learning_rate": 1.0816154816996214e-05, "loss": 0.1715, "step": 68400 }, { "epoch": 1.92, "learning_rate": 1.0802131538353668e-05, "loss": 0.1444, "step": 68450 }, { "epoch": 1.92, "learning_rate": 1.078810825971112e-05, "loss": 0.1748, "step": 68500 }, { "epoch": 1.92, "learning_rate": 1.0774084981068574e-05, "loss": 0.1522, "step": 68550 }, { "epoch": 1.92, "learning_rate": 1.0760061702426028e-05, "loss": 0.1364, "step": 68600 }, { "epoch": 1.93, "learning_rate": 1.0746038423783481e-05, "loss": 0.1557, "step": 68650 }, { "epoch": 1.93, "learning_rate": 1.0732015145140935e-05, "loss": 0.1399, "step": 68700 }, { "epoch": 1.93, "learning_rate": 1.0717991866498388e-05, "loss": 0.1478, "step": 68750 }, { "epoch": 1.93, "learning_rate": 1.0703968587855841e-05, "loss": 0.1444, "step": 68800 }, { "epoch": 1.93, "learning_rate": 1.0689945309213295e-05, "loss": 0.1419, "step": 68850 }, { "epoch": 1.93, "learning_rate": 1.0675922030570748e-05, "loss": 0.1642, "step": 68900 }, { "epoch": 1.93, "learning_rate": 1.0661898751928201e-05, "loss": 0.1646, "step": 68950 }, { "epoch": 1.94, "learning_rate": 1.0647875473285655e-05, "loss": 0.161, "step": 69000 }, { "epoch": 1.94, "eval_bleu": 96.1059, "eval_gen_len": 64.2643, "eval_loss": 0.24390248954296112, "eval_rouge1": 93.9311, "eval_rouge2": 90.376, "eval_rougeL": 93.746, "eval_rougeLsum": 93.7269, "eval_runtime": 3190.0005, "eval_samples_per_second": 0.94, "eval_steps_per_second": 0.235, "step": 69000 }, { "epoch": 1.94, "learning_rate": 1.0633852194643108e-05, "loss": 0.1595, "step": 69050 }, { "epoch": 1.94, "learning_rate": 1.0619828916000561e-05, "loss": 0.1468, "step": 69100 }, { "epoch": 1.94, "learning_rate": 1.0605805637358015e-05, "loss": 0.1417, "step": 69150 }, { "epoch": 1.94, "learning_rate": 1.0591782358715468e-05, "loss": 0.1555, "step": 69200 }, { "epoch": 1.94, "learning_rate": 1.0577759080072921e-05, "loss": 0.1611, "step": 69250 }, { "epoch": 1.94, "learning_rate": 1.0563735801430375e-05, "loss": 0.1587, "step": 69300 }, { "epoch": 1.95, "learning_rate": 1.0549712522787828e-05, "loss": 0.1521, "step": 69350 }, { "epoch": 1.95, "learning_rate": 1.0535689244145282e-05, "loss": 0.1356, "step": 69400 }, { "epoch": 1.95, "learning_rate": 1.0521665965502735e-05, "loss": 0.1494, "step": 69450 }, { "epoch": 1.95, "learning_rate": 1.0507642686860188e-05, "loss": 0.1391, "step": 69500 }, { "epoch": 1.95, "learning_rate": 1.0493619408217642e-05, "loss": 0.1631, "step": 69550 }, { "epoch": 1.95, "learning_rate": 1.0479596129575095e-05, "loss": 0.1526, "step": 69600 }, { "epoch": 1.95, "learning_rate": 1.0465572850932548e-05, "loss": 0.1431, "step": 69650 }, { "epoch": 1.95, "learning_rate": 1.0451549572290002e-05, "loss": 0.161, "step": 69700 }, { "epoch": 1.96, "learning_rate": 1.0437526293647455e-05, "loss": 0.1628, "step": 69750 }, { "epoch": 1.96, "learning_rate": 1.0423503015004908e-05, "loss": 0.1529, "step": 69800 }, { "epoch": 1.96, "learning_rate": 1.0409479736362362e-05, "loss": 0.1452, "step": 69850 }, { "epoch": 1.96, "learning_rate": 1.0395456457719815e-05, "loss": 0.1459, "step": 69900 }, { "epoch": 1.96, "learning_rate": 1.0381433179077268e-05, "loss": 0.1444, "step": 69950 }, { "epoch": 1.96, "learning_rate": 1.0367409900434722e-05, "loss": 0.1316, "step": 70000 }, { "epoch": 1.96, "eval_bleu": 96.0919, "eval_gen_len": 64.248, "eval_loss": 0.24350972473621368, "eval_rouge1": 93.9235, "eval_rouge2": 90.3844, "eval_rougeL": 93.751, "eval_rougeLsum": 93.7278, "eval_runtime": 3089.9712, "eval_samples_per_second": 0.971, "eval_steps_per_second": 0.243, "step": 70000 }, { "epoch": 1.96, "learning_rate": 1.0353386621792175e-05, "loss": 0.1485, "step": 70050 }, { "epoch": 1.97, "learning_rate": 1.0339363343149629e-05, "loss": 0.1559, "step": 70100 }, { "epoch": 1.97, "learning_rate": 1.0325340064507084e-05, "loss": 0.1483, "step": 70150 }, { "epoch": 1.97, "learning_rate": 1.0311316785864535e-05, "loss": 0.1845, "step": 70200 }, { "epoch": 1.97, "learning_rate": 1.0297293507221989e-05, "loss": 0.1417, "step": 70250 }, { "epoch": 1.97, "learning_rate": 1.0283270228579442e-05, "loss": 0.1341, "step": 70300 }, { "epoch": 1.97, "learning_rate": 1.0269246949936897e-05, "loss": 0.1462, "step": 70350 }, { "epoch": 1.97, "learning_rate": 1.0255223671294349e-05, "loss": 0.1721, "step": 70400 }, { "epoch": 1.98, "learning_rate": 1.0241200392651802e-05, "loss": 0.1549, "step": 70450 }, { "epoch": 1.98, "learning_rate": 1.0227177114009255e-05, "loss": 0.16, "step": 70500 }, { "epoch": 1.98, "learning_rate": 1.021315383536671e-05, "loss": 0.1455, "step": 70550 }, { "epoch": 1.98, "learning_rate": 1.0199130556724162e-05, "loss": 0.1312, "step": 70600 }, { "epoch": 1.98, "learning_rate": 1.0185107278081615e-05, "loss": 0.1607, "step": 70650 }, { "epoch": 1.98, "learning_rate": 1.0171083999439069e-05, "loss": 0.1483, "step": 70700 }, { "epoch": 1.98, "learning_rate": 1.0157060720796524e-05, "loss": 0.1725, "step": 70750 }, { "epoch": 1.99, "learning_rate": 1.0143037442153976e-05, "loss": 0.1598, "step": 70800 }, { "epoch": 1.99, "learning_rate": 1.0129014163511429e-05, "loss": 0.1384, "step": 70850 }, { "epoch": 1.99, "learning_rate": 1.0114990884868882e-05, "loss": 0.1448, "step": 70900 }, { "epoch": 1.99, "learning_rate": 1.0100967606226337e-05, "loss": 0.1534, "step": 70950 }, { "epoch": 1.99, "learning_rate": 1.0086944327583789e-05, "loss": 0.136, "step": 71000 }, { "epoch": 1.99, "eval_bleu": 96.0747, "eval_gen_len": 64.2193, "eval_loss": 0.24425600469112396, "eval_rouge1": 93.9676, "eval_rouge2": 90.4404, "eval_rougeL": 93.7902, "eval_rougeLsum": 93.7617, "eval_runtime": 3086.9294, "eval_samples_per_second": 0.972, "eval_steps_per_second": 0.243, "step": 71000 }, { "epoch": 1.99, "learning_rate": 1.0072921048941242e-05, "loss": 0.163, "step": 71050 }, { "epoch": 1.99, "learning_rate": 1.0058897770298696e-05, "loss": 0.1373, "step": 71100 }, { "epoch": 2.0, "learning_rate": 1.004487449165615e-05, "loss": 0.1452, "step": 71150 }, { "epoch": 2.0, "learning_rate": 1.0030851213013602e-05, "loss": 0.1583, "step": 71200 }, { "epoch": 2.0, "learning_rate": 1.0016827934371056e-05, "loss": 0.1396, "step": 71250 }, { "epoch": 2.0, "learning_rate": 1.0002804655728509e-05, "loss": 0.1309, "step": 71300 }, { "epoch": 2.0, "learning_rate": 9.988781377085964e-06, "loss": 0.1628, "step": 71350 }, { "epoch": 2.0, "learning_rate": 9.974758098443416e-06, "loss": 0.1354, "step": 71400 }, { "epoch": 2.0, "learning_rate": 9.96073481980087e-06, "loss": 0.1565, "step": 71450 }, { "epoch": 2.01, "learning_rate": 9.946711541158323e-06, "loss": 0.1411, "step": 71500 }, { "epoch": 2.01, "learning_rate": 9.932688262515778e-06, "loss": 0.1681, "step": 71550 }, { "epoch": 2.01, "learning_rate": 9.918664983873231e-06, "loss": 0.1406, "step": 71600 }, { "epoch": 2.01, "learning_rate": 9.904641705230683e-06, "loss": 0.1308, "step": 71650 }, { "epoch": 2.01, "learning_rate": 9.890618426588136e-06, "loss": 0.1221, "step": 71700 }, { "epoch": 2.01, "learning_rate": 9.876595147945591e-06, "loss": 0.1325, "step": 71750 }, { "epoch": 2.01, "learning_rate": 9.862571869303044e-06, "loss": 0.1377, "step": 71800 }, { "epoch": 2.02, "learning_rate": 9.848548590660496e-06, "loss": 0.1437, "step": 71850 }, { "epoch": 2.02, "learning_rate": 9.83452531201795e-06, "loss": 0.153, "step": 71900 }, { "epoch": 2.02, "learning_rate": 9.820502033375404e-06, "loss": 0.1462, "step": 71950 }, { "epoch": 2.02, "learning_rate": 9.806478754732858e-06, "loss": 0.1651, "step": 72000 }, { "epoch": 2.02, "eval_bleu": 96.1009, "eval_gen_len": 64.2463, "eval_loss": 0.24310755729675293, "eval_rouge1": 94.0005, "eval_rouge2": 90.4979, "eval_rougeL": 93.8136, "eval_rougeLsum": 93.7931, "eval_runtime": 3096.8678, "eval_samples_per_second": 0.969, "eval_steps_per_second": 0.242, "step": 72000 }, { "epoch": 2.02, "learning_rate": 9.79245547609031e-06, "loss": 0.1675, "step": 72050 }, { "epoch": 2.02, "learning_rate": 9.778432197447763e-06, "loss": 0.15, "step": 72100 }, { "epoch": 2.02, "learning_rate": 9.764408918805218e-06, "loss": 0.1351, "step": 72150 }, { "epoch": 2.02, "learning_rate": 9.750385640162671e-06, "loss": 0.1424, "step": 72200 }, { "epoch": 2.03, "learning_rate": 9.736362361520123e-06, "loss": 0.1351, "step": 72250 }, { "epoch": 2.03, "learning_rate": 9.722339082877576e-06, "loss": 0.1265, "step": 72300 }, { "epoch": 2.03, "learning_rate": 9.708315804235031e-06, "loss": 0.1696, "step": 72350 }, { "epoch": 2.03, "learning_rate": 9.694292525592485e-06, "loss": 0.1451, "step": 72400 }, { "epoch": 2.03, "learning_rate": 9.680269246949936e-06, "loss": 0.1417, "step": 72450 }, { "epoch": 2.03, "learning_rate": 9.66624596830739e-06, "loss": 0.1383, "step": 72500 }, { "epoch": 2.03, "learning_rate": 9.652222689664845e-06, "loss": 0.1393, "step": 72550 }, { "epoch": 2.04, "learning_rate": 9.638199411022298e-06, "loss": 0.1378, "step": 72600 }, { "epoch": 2.04, "learning_rate": 9.62417613237975e-06, "loss": 0.1632, "step": 72650 }, { "epoch": 2.04, "learning_rate": 9.610152853737203e-06, "loss": 0.152, "step": 72700 }, { "epoch": 2.04, "learning_rate": 9.596129575094658e-06, "loss": 0.1392, "step": 72750 }, { "epoch": 2.04, "learning_rate": 9.582106296452112e-06, "loss": 0.1381, "step": 72800 }, { "epoch": 2.04, "learning_rate": 9.568083017809565e-06, "loss": 0.1341, "step": 72850 }, { "epoch": 2.04, "learning_rate": 9.554059739167017e-06, "loss": 0.1372, "step": 72900 }, { "epoch": 2.05, "learning_rate": 9.540036460524472e-06, "loss": 0.1521, "step": 72950 }, { "epoch": 2.05, "learning_rate": 9.526013181881925e-06, "loss": 0.1552, "step": 73000 }, { "epoch": 2.05, "eval_bleu": 96.0944, "eval_gen_len": 64.2553, "eval_loss": 0.2440294772386551, "eval_rouge1": 93.9391, "eval_rouge2": 90.4451, "eval_rougeL": 93.7575, "eval_rougeLsum": 93.7421, "eval_runtime": 3104.3345, "eval_samples_per_second": 0.966, "eval_steps_per_second": 0.242, "step": 73000 }, { "epoch": 2.05, "learning_rate": 9.511989903239378e-06, "loss": 0.1374, "step": 73050 }, { "epoch": 2.05, "learning_rate": 9.49796662459683e-06, "loss": 0.1416, "step": 73100 }, { "epoch": 2.05, "learning_rate": 9.483943345954285e-06, "loss": 0.1511, "step": 73150 }, { "epoch": 2.05, "learning_rate": 9.469920067311738e-06, "loss": 0.1287, "step": 73200 }, { "epoch": 2.05, "learning_rate": 9.455896788669192e-06, "loss": 0.154, "step": 73250 }, { "epoch": 2.06, "learning_rate": 9.441873510026643e-06, "loss": 0.1399, "step": 73300 }, { "epoch": 2.06, "learning_rate": 9.427850231384098e-06, "loss": 0.1371, "step": 73350 }, { "epoch": 2.06, "learning_rate": 9.413826952741552e-06, "loss": 0.1424, "step": 73400 }, { "epoch": 2.06, "learning_rate": 9.399803674099005e-06, "loss": 0.1441, "step": 73450 }, { "epoch": 2.06, "learning_rate": 9.385780395456457e-06, "loss": 0.1276, "step": 73500 }, { "epoch": 2.06, "learning_rate": 9.371757116813912e-06, "loss": 0.1268, "step": 73550 }, { "epoch": 2.06, "learning_rate": 9.357733838171365e-06, "loss": 0.1587, "step": 73600 }, { "epoch": 2.07, "learning_rate": 9.343710559528819e-06, "loss": 0.1475, "step": 73650 }, { "epoch": 2.07, "learning_rate": 9.32968728088627e-06, "loss": 0.1288, "step": 73700 }, { "epoch": 2.07, "learning_rate": 9.315664002243725e-06, "loss": 0.1432, "step": 73750 }, { "epoch": 2.07, "learning_rate": 9.301640723601179e-06, "loss": 0.148, "step": 73800 }, { "epoch": 2.07, "learning_rate": 9.287617444958632e-06, "loss": 0.1506, "step": 73850 }, { "epoch": 2.07, "learning_rate": 9.273594166316084e-06, "loss": 0.1345, "step": 73900 }, { "epoch": 2.07, "learning_rate": 9.259570887673539e-06, "loss": 0.1276, "step": 73950 }, { "epoch": 2.08, "learning_rate": 9.245547609030992e-06, "loss": 0.1491, "step": 74000 }, { "epoch": 2.08, "eval_bleu": 96.07, "eval_gen_len": 64.2557, "eval_loss": 0.24399851262569427, "eval_rouge1": 93.9143, "eval_rouge2": 90.3682, "eval_rougeL": 93.7204, "eval_rougeLsum": 93.712, "eval_runtime": 3103.8893, "eval_samples_per_second": 0.967, "eval_steps_per_second": 0.242, "step": 74000 }, { "epoch": 2.08, "learning_rate": 9.231524330388445e-06, "loss": 0.1277, "step": 74050 }, { "epoch": 2.08, "learning_rate": 9.217501051745897e-06, "loss": 0.1306, "step": 74100 }, { "epoch": 2.08, "learning_rate": 9.203477773103352e-06, "loss": 0.1371, "step": 74150 }, { "epoch": 2.08, "learning_rate": 9.189454494460806e-06, "loss": 0.1384, "step": 74200 }, { "epoch": 2.08, "learning_rate": 9.175431215818259e-06, "loss": 0.1405, "step": 74250 }, { "epoch": 2.08, "learning_rate": 9.161407937175712e-06, "loss": 0.1373, "step": 74300 }, { "epoch": 2.09, "learning_rate": 9.147384658533166e-06, "loss": 0.1418, "step": 74350 }, { "epoch": 2.09, "learning_rate": 9.133361379890619e-06, "loss": 0.126, "step": 74400 }, { "epoch": 2.09, "learning_rate": 9.119338101248072e-06, "loss": 0.1738, "step": 74450 }, { "epoch": 2.09, "learning_rate": 9.105314822605526e-06, "loss": 0.1466, "step": 74500 }, { "epoch": 2.09, "learning_rate": 9.091291543962979e-06, "loss": 0.1381, "step": 74550 }, { "epoch": 2.09, "learning_rate": 9.077268265320432e-06, "loss": 0.1384, "step": 74600 }, { "epoch": 2.09, "learning_rate": 9.063244986677886e-06, "loss": 0.1442, "step": 74650 }, { "epoch": 2.1, "learning_rate": 9.049221708035339e-06, "loss": 0.1371, "step": 74700 }, { "epoch": 2.1, "learning_rate": 9.035198429392793e-06, "loss": 0.1331, "step": 74750 }, { "epoch": 2.1, "learning_rate": 9.021175150750246e-06, "loss": 0.136, "step": 74800 }, { "epoch": 2.1, "learning_rate": 9.0071518721077e-06, "loss": 0.1422, "step": 74850 }, { "epoch": 2.1, "learning_rate": 8.993128593465153e-06, "loss": 0.1386, "step": 74900 }, { "epoch": 2.1, "learning_rate": 8.979105314822606e-06, "loss": 0.1247, "step": 74950 }, { "epoch": 2.1, "learning_rate": 8.96508203618006e-06, "loss": 0.1567, "step": 75000 }, { "epoch": 2.1, "eval_bleu": 96.1043, "eval_gen_len": 64.258, "eval_loss": 0.24418920278549194, "eval_rouge1": 93.9448, "eval_rouge2": 90.3671, "eval_rougeL": 93.7538, "eval_rougeLsum": 93.7435, "eval_runtime": 3102.9976, "eval_samples_per_second": 0.967, "eval_steps_per_second": 0.242, "step": 75000 }, { "epoch": 2.1, "learning_rate": 8.951058757537513e-06, "loss": 0.1308, "step": 75050 }, { "epoch": 2.11, "learning_rate": 8.937035478894966e-06, "loss": 0.1387, "step": 75100 }, { "epoch": 2.11, "learning_rate": 8.92301220025242e-06, "loss": 0.1356, "step": 75150 }, { "epoch": 2.11, "learning_rate": 8.908988921609873e-06, "loss": 0.1397, "step": 75200 }, { "epoch": 2.11, "learning_rate": 8.894965642967326e-06, "loss": 0.1276, "step": 75250 }, { "epoch": 2.11, "learning_rate": 8.88094236432478e-06, "loss": 0.1354, "step": 75300 }, { "epoch": 2.11, "learning_rate": 8.866919085682233e-06, "loss": 0.1537, "step": 75350 }, { "epoch": 2.11, "learning_rate": 8.852895807039686e-06, "loss": 0.1508, "step": 75400 }, { "epoch": 2.12, "learning_rate": 8.83887252839714e-06, "loss": 0.1659, "step": 75450 }, { "epoch": 2.12, "learning_rate": 8.824849249754593e-06, "loss": 0.1433, "step": 75500 }, { "epoch": 2.12, "learning_rate": 8.810825971112046e-06, "loss": 0.1315, "step": 75550 }, { "epoch": 2.12, "learning_rate": 8.7968026924695e-06, "loss": 0.1257, "step": 75600 }, { "epoch": 2.12, "learning_rate": 8.782779413826953e-06, "loss": 0.1226, "step": 75650 }, { "epoch": 2.12, "learning_rate": 8.768756135184406e-06, "loss": 0.1397, "step": 75700 }, { "epoch": 2.12, "learning_rate": 8.754732856541861e-06, "loss": 0.1245, "step": 75750 }, { "epoch": 2.13, "learning_rate": 8.740709577899313e-06, "loss": 0.1508, "step": 75800 }, { "epoch": 2.13, "learning_rate": 8.726686299256766e-06, "loss": 0.152, "step": 75850 }, { "epoch": 2.13, "learning_rate": 8.71266302061422e-06, "loss": 0.1475, "step": 75900 }, { "epoch": 2.13, "learning_rate": 8.698639741971675e-06, "loss": 0.1448, "step": 75950 }, { "epoch": 2.13, "learning_rate": 8.684616463329126e-06, "loss": 0.1444, "step": 76000 }, { "epoch": 2.13, "eval_bleu": 96.0981, "eval_gen_len": 64.2277, "eval_loss": 0.24386686086654663, "eval_rouge1": 93.9613, "eval_rouge2": 90.4186, "eval_rougeL": 93.7791, "eval_rougeLsum": 93.7578, "eval_runtime": 3114.9073, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.241, "step": 76000 }, { "epoch": 2.13, "learning_rate": 8.67059318468658e-06, "loss": 0.1552, "step": 76050 }, { "epoch": 2.13, "learning_rate": 8.656569906044033e-06, "loss": 0.151, "step": 76100 }, { "epoch": 2.14, "learning_rate": 8.642546627401488e-06, "loss": 0.1493, "step": 76150 }, { "epoch": 2.14, "learning_rate": 8.62852334875894e-06, "loss": 0.1279, "step": 76200 }, { "epoch": 2.14, "learning_rate": 8.614500070116393e-06, "loss": 0.1521, "step": 76250 }, { "epoch": 2.14, "learning_rate": 8.600476791473847e-06, "loss": 0.1565, "step": 76300 }, { "epoch": 2.14, "learning_rate": 8.586453512831302e-06, "loss": 0.1412, "step": 76350 }, { "epoch": 2.14, "learning_rate": 8.572430234188753e-06, "loss": 0.138, "step": 76400 }, { "epoch": 2.14, "learning_rate": 8.558406955546207e-06, "loss": 0.1302, "step": 76450 }, { "epoch": 2.15, "learning_rate": 8.54438367690366e-06, "loss": 0.1542, "step": 76500 }, { "epoch": 2.15, "learning_rate": 8.530360398261115e-06, "loss": 0.1341, "step": 76550 }, { "epoch": 2.15, "learning_rate": 8.516337119618567e-06, "loss": 0.1643, "step": 76600 }, { "epoch": 2.15, "learning_rate": 8.50231384097602e-06, "loss": 0.1574, "step": 76650 }, { "epoch": 2.15, "learning_rate": 8.488290562333473e-06, "loss": 0.1382, "step": 76700 }, { "epoch": 2.15, "learning_rate": 8.474267283690929e-06, "loss": 0.1355, "step": 76750 }, { "epoch": 2.15, "learning_rate": 8.46024400504838e-06, "loss": 0.1396, "step": 76800 }, { "epoch": 2.16, "learning_rate": 8.446220726405834e-06, "loss": 0.1244, "step": 76850 }, { "epoch": 2.16, "learning_rate": 8.432197447763287e-06, "loss": 0.1291, "step": 76900 }, { "epoch": 2.16, "learning_rate": 8.418174169120742e-06, "loss": 0.1402, "step": 76950 }, { "epoch": 2.16, "learning_rate": 8.404150890478194e-06, "loss": 0.151, "step": 77000 }, { "epoch": 2.16, "eval_bleu": 96.1028, "eval_gen_len": 64.2047, "eval_loss": 0.243386372923851, "eval_rouge1": 93.9323, "eval_rouge2": 90.4104, "eval_rougeL": 93.7353, "eval_rougeLsum": 93.7303, "eval_runtime": 3082.1904, "eval_samples_per_second": 0.973, "eval_steps_per_second": 0.243, "step": 77000 }, { "epoch": 2.16, "learning_rate": 8.390127611835647e-06, "loss": 0.1471, "step": 77050 }, { "epoch": 2.16, "learning_rate": 8.3761043331931e-06, "loss": 0.1363, "step": 77100 }, { "epoch": 2.16, "learning_rate": 8.362081054550555e-06, "loss": 0.1174, "step": 77150 }, { "epoch": 2.17, "learning_rate": 8.348057775908009e-06, "loss": 0.1249, "step": 77200 }, { "epoch": 2.17, "learning_rate": 8.33403449726546e-06, "loss": 0.1323, "step": 77250 }, { "epoch": 2.17, "learning_rate": 8.320011218622914e-06, "loss": 0.1441, "step": 77300 }, { "epoch": 2.17, "learning_rate": 8.305987939980369e-06, "loss": 0.1354, "step": 77350 }, { "epoch": 2.17, "learning_rate": 8.291964661337822e-06, "loss": 0.1299, "step": 77400 }, { "epoch": 2.17, "learning_rate": 8.277941382695274e-06, "loss": 0.16, "step": 77450 }, { "epoch": 2.17, "learning_rate": 8.263918104052727e-06, "loss": 0.1471, "step": 77500 }, { "epoch": 2.18, "learning_rate": 8.249894825410182e-06, "loss": 0.1507, "step": 77550 }, { "epoch": 2.18, "learning_rate": 8.235871546767636e-06, "loss": 0.1345, "step": 77600 }, { "epoch": 2.18, "learning_rate": 8.221848268125087e-06, "loss": 0.1597, "step": 77650 }, { "epoch": 2.18, "learning_rate": 8.20782498948254e-06, "loss": 0.1361, "step": 77700 }, { "epoch": 2.18, "learning_rate": 8.193801710839996e-06, "loss": 0.1437, "step": 77750 }, { "epoch": 2.18, "learning_rate": 8.179778432197449e-06, "loss": 0.1438, "step": 77800 }, { "epoch": 2.18, "learning_rate": 8.1657551535549e-06, "loss": 0.1442, "step": 77850 }, { "epoch": 2.18, "learning_rate": 8.151731874912354e-06, "loss": 0.1451, "step": 77900 }, { "epoch": 2.19, "learning_rate": 8.137708596269809e-06, "loss": 0.1455, "step": 77950 }, { "epoch": 2.19, "learning_rate": 8.123685317627262e-06, "loss": 0.1331, "step": 78000 }, { "epoch": 2.19, "eval_bleu": 96.0923, "eval_gen_len": 64.236, "eval_loss": 0.24420137703418732, "eval_rouge1": 93.9515, "eval_rouge2": 90.4414, "eval_rougeL": 93.7733, "eval_rougeLsum": 93.7553, "eval_runtime": 3102.8715, "eval_samples_per_second": 0.967, "eval_steps_per_second": 0.242, "step": 78000 }, { "epoch": 2.19, "learning_rate": 8.109662038984714e-06, "loss": 0.1332, "step": 78050 }, { "epoch": 2.19, "learning_rate": 8.095638760342167e-06, "loss": 0.1361, "step": 78100 }, { "epoch": 2.19, "learning_rate": 8.081615481699623e-06, "loss": 0.1394, "step": 78150 }, { "epoch": 2.19, "learning_rate": 8.067592203057076e-06, "loss": 0.1387, "step": 78200 }, { "epoch": 2.19, "learning_rate": 8.053568924414528e-06, "loss": 0.1264, "step": 78250 }, { "epoch": 2.2, "learning_rate": 8.039545645771981e-06, "loss": 0.1464, "step": 78300 }, { "epoch": 2.2, "learning_rate": 8.025522367129436e-06, "loss": 0.1574, "step": 78350 }, { "epoch": 2.2, "learning_rate": 8.01149908848689e-06, "loss": 0.1449, "step": 78400 }, { "epoch": 2.2, "learning_rate": 7.997475809844341e-06, "loss": 0.1361, "step": 78450 }, { "epoch": 2.2, "learning_rate": 7.983452531201794e-06, "loss": 0.1254, "step": 78500 }, { "epoch": 2.2, "learning_rate": 7.96942925255925e-06, "loss": 0.1396, "step": 78550 }, { "epoch": 2.2, "learning_rate": 7.955405973916703e-06, "loss": 0.1437, "step": 78600 }, { "epoch": 2.21, "learning_rate": 7.941382695274156e-06, "loss": 0.1434, "step": 78650 }, { "epoch": 2.21, "learning_rate": 7.927359416631608e-06, "loss": 0.1273, "step": 78700 }, { "epoch": 2.21, "learning_rate": 7.913336137989063e-06, "loss": 0.1393, "step": 78750 }, { "epoch": 2.21, "learning_rate": 7.899312859346516e-06, "loss": 0.1356, "step": 78800 }, { "epoch": 2.21, "learning_rate": 7.88528958070397e-06, "loss": 0.1554, "step": 78850 }, { "epoch": 2.21, "learning_rate": 7.871266302061421e-06, "loss": 0.1372, "step": 78900 }, { "epoch": 2.21, "learning_rate": 7.857243023418876e-06, "loss": 0.1407, "step": 78950 }, { "epoch": 2.22, "learning_rate": 7.84321974477633e-06, "loss": 0.1658, "step": 79000 }, { "epoch": 2.22, "eval_bleu": 96.0964, "eval_gen_len": 64.256, "eval_loss": 0.24366088211536407, "eval_rouge1": 93.9674, "eval_rouge2": 90.4223, "eval_rougeL": 93.7734, "eval_rougeLsum": 93.7712, "eval_runtime": 3096.7975, "eval_samples_per_second": 0.969, "eval_steps_per_second": 0.242, "step": 79000 }, { "epoch": 2.22, "learning_rate": 7.829196466133783e-06, "loss": 0.1379, "step": 79050 }, { "epoch": 2.22, "learning_rate": 7.815173187491235e-06, "loss": 0.1474, "step": 79100 }, { "epoch": 2.22, "learning_rate": 7.80114990884869e-06, "loss": 0.1297, "step": 79150 }, { "epoch": 2.22, "learning_rate": 7.787126630206143e-06, "loss": 0.145, "step": 79200 }, { "epoch": 2.22, "learning_rate": 7.773103351563596e-06, "loss": 0.1223, "step": 79250 }, { "epoch": 2.22, "learning_rate": 7.759080072921048e-06, "loss": 0.1425, "step": 79300 }, { "epoch": 2.23, "learning_rate": 7.745056794278503e-06, "loss": 0.1358, "step": 79350 }, { "epoch": 2.23, "learning_rate": 7.731033515635956e-06, "loss": 0.1391, "step": 79400 }, { "epoch": 2.23, "learning_rate": 7.71701023699341e-06, "loss": 0.1581, "step": 79450 }, { "epoch": 2.23, "learning_rate": 7.702986958350861e-06, "loss": 0.1425, "step": 79500 }, { "epoch": 2.23, "learning_rate": 7.688963679708317e-06, "loss": 0.15, "step": 79550 }, { "epoch": 2.23, "learning_rate": 7.67494040106577e-06, "loss": 0.1496, "step": 79600 }, { "epoch": 2.23, "learning_rate": 7.660917122423223e-06, "loss": 0.1609, "step": 79650 }, { "epoch": 2.24, "learning_rate": 7.646893843780675e-06, "loss": 0.1502, "step": 79700 }, { "epoch": 2.24, "learning_rate": 7.63287056513813e-06, "loss": 0.1345, "step": 79750 }, { "epoch": 2.24, "learning_rate": 7.618847286495583e-06, "loss": 0.1429, "step": 79800 }, { "epoch": 2.24, "learning_rate": 7.604824007853036e-06, "loss": 0.1408, "step": 79850 }, { "epoch": 2.24, "learning_rate": 7.590800729210489e-06, "loss": 0.1613, "step": 79900 }, { "epoch": 2.24, "learning_rate": 7.576777450567943e-06, "loss": 0.1605, "step": 79950 }, { "epoch": 2.24, "learning_rate": 7.562754171925397e-06, "loss": 0.1396, "step": 80000 }, { "epoch": 2.24, "eval_bleu": 96.1134, "eval_gen_len": 64.2447, "eval_loss": 0.2430116832256317, "eval_rouge1": 93.9967, "eval_rouge2": 90.463, "eval_rougeL": 93.817, "eval_rougeLsum": 93.8046, "eval_runtime": 3075.4099, "eval_samples_per_second": 0.975, "eval_steps_per_second": 0.244, "step": 80000 }, { "epoch": 2.25, "learning_rate": 7.54873089328285e-06, "loss": 0.147, "step": 80050 }, { "epoch": 2.25, "learning_rate": 7.534707614640303e-06, "loss": 0.1501, "step": 80100 }, { "epoch": 2.25, "learning_rate": 7.520684335997758e-06, "loss": 0.1215, "step": 80150 }, { "epoch": 2.25, "learning_rate": 7.50666105735521e-06, "loss": 0.1323, "step": 80200 }, { "epoch": 2.25, "learning_rate": 7.4926377787126635e-06, "loss": 0.1307, "step": 80250 }, { "epoch": 2.25, "learning_rate": 7.478614500070117e-06, "loss": 0.1369, "step": 80300 }, { "epoch": 2.25, "learning_rate": 7.46459122142757e-06, "loss": 0.1502, "step": 80350 }, { "epoch": 2.25, "learning_rate": 7.450567942785024e-06, "loss": 0.14, "step": 80400 }, { "epoch": 2.26, "learning_rate": 7.436544664142477e-06, "loss": 0.1411, "step": 80450 }, { "epoch": 2.26, "learning_rate": 7.42252138549993e-06, "loss": 0.1401, "step": 80500 }, { "epoch": 2.26, "learning_rate": 7.408498106857384e-06, "loss": 0.1713, "step": 80550 }, { "epoch": 2.26, "learning_rate": 7.394474828214837e-06, "loss": 0.1678, "step": 80600 }, { "epoch": 2.26, "learning_rate": 7.38045154957229e-06, "loss": 0.1203, "step": 80650 }, { "epoch": 2.26, "learning_rate": 7.366428270929744e-06, "loss": 0.1635, "step": 80700 }, { "epoch": 2.26, "learning_rate": 7.352404992287197e-06, "loss": 0.1318, "step": 80750 }, { "epoch": 2.27, "learning_rate": 7.3383817136446505e-06, "loss": 0.1292, "step": 80800 }, { "epoch": 2.27, "learning_rate": 7.324358435002104e-06, "loss": 0.1433, "step": 80850 }, { "epoch": 2.27, "learning_rate": 7.310335156359557e-06, "loss": 0.1572, "step": 80900 }, { "epoch": 2.27, "learning_rate": 7.2963118777170105e-06, "loss": 0.1338, "step": 80950 }, { "epoch": 2.27, "learning_rate": 7.282288599074464e-06, "loss": 0.147, "step": 81000 }, { "epoch": 2.27, "eval_bleu": 96.1115, "eval_gen_len": 64.2613, "eval_loss": 0.24394148588180542, "eval_rouge1": 93.9741, "eval_rouge2": 90.4331, "eval_rougeL": 93.775, "eval_rougeLsum": 93.7668, "eval_runtime": 3119.4921, "eval_samples_per_second": 0.962, "eval_steps_per_second": 0.24, "step": 81000 }, { "epoch": 2.27, "learning_rate": 7.268265320431917e-06, "loss": 0.1329, "step": 81050 }, { "epoch": 2.27, "learning_rate": 7.254242041789371e-06, "loss": 0.1416, "step": 81100 }, { "epoch": 2.28, "learning_rate": 7.240218763146824e-06, "loss": 0.1443, "step": 81150 }, { "epoch": 2.28, "learning_rate": 7.226195484504277e-06, "loss": 0.1519, "step": 81200 }, { "epoch": 2.28, "learning_rate": 7.212172205861731e-06, "loss": 0.1501, "step": 81250 }, { "epoch": 2.28, "learning_rate": 7.198148927219184e-06, "loss": 0.1465, "step": 81300 }, { "epoch": 2.28, "learning_rate": 7.184125648576637e-06, "loss": 0.1639, "step": 81350 }, { "epoch": 2.28, "learning_rate": 7.170102369934091e-06, "loss": 0.1335, "step": 81400 }, { "epoch": 2.28, "learning_rate": 7.156079091291544e-06, "loss": 0.1588, "step": 81450 }, { "epoch": 2.29, "learning_rate": 7.142055812648998e-06, "loss": 0.1513, "step": 81500 }, { "epoch": 2.29, "learning_rate": 7.128032534006451e-06, "loss": 0.1523, "step": 81550 }, { "epoch": 2.29, "learning_rate": 7.114009255363905e-06, "loss": 0.1477, "step": 81600 }, { "epoch": 2.29, "learning_rate": 7.0999859767213576e-06, "loss": 0.1269, "step": 81650 }, { "epoch": 2.29, "learning_rate": 7.085962698078812e-06, "loss": 0.1501, "step": 81700 }, { "epoch": 2.29, "learning_rate": 7.071939419436264e-06, "loss": 0.1421, "step": 81750 }, { "epoch": 2.29, "learning_rate": 7.0579161407937185e-06, "loss": 0.151, "step": 81800 }, { "epoch": 2.3, "learning_rate": 7.043892862151171e-06, "loss": 0.1383, "step": 81850 }, { "epoch": 2.3, "learning_rate": 7.029869583508625e-06, "loss": 0.142, "step": 81900 }, { "epoch": 2.3, "learning_rate": 7.015846304866078e-06, "loss": 0.1574, "step": 81950 }, { "epoch": 2.3, "learning_rate": 7.001823026223532e-06, "loss": 0.1601, "step": 82000 }, { "epoch": 2.3, "eval_bleu": 96.0955, "eval_gen_len": 64.251, "eval_loss": 0.24331849813461304, "eval_rouge1": 93.9796, "eval_rouge2": 90.4277, "eval_rougeL": 93.7849, "eval_rougeLsum": 93.7712, "eval_runtime": 3157.1975, "eval_samples_per_second": 0.95, "eval_steps_per_second": 0.238, "step": 82000 }, { "epoch": 2.3, "learning_rate": 6.987799747580984e-06, "loss": 0.1371, "step": 82050 }, { "epoch": 2.3, "learning_rate": 6.973776468938439e-06, "loss": 0.1392, "step": 82100 }, { "epoch": 2.3, "learning_rate": 6.959753190295891e-06, "loss": 0.1309, "step": 82150 }, { "epoch": 2.31, "learning_rate": 6.945729911653345e-06, "loss": 0.1614, "step": 82200 }, { "epoch": 2.31, "learning_rate": 6.931706633010798e-06, "loss": 0.1377, "step": 82250 }, { "epoch": 2.31, "learning_rate": 6.917683354368251e-06, "loss": 0.1473, "step": 82300 }, { "epoch": 2.31, "learning_rate": 6.9036600757257046e-06, "loss": 0.145, "step": 82350 }, { "epoch": 2.31, "learning_rate": 6.889636797083158e-06, "loss": 0.1601, "step": 82400 }, { "epoch": 2.31, "learning_rate": 6.875613518440611e-06, "loss": 0.1367, "step": 82450 }, { "epoch": 2.31, "learning_rate": 6.861590239798065e-06, "loss": 0.1492, "step": 82500 }, { "epoch": 2.32, "learning_rate": 6.847566961155518e-06, "loss": 0.1403, "step": 82550 }, { "epoch": 2.32, "learning_rate": 6.833543682512971e-06, "loss": 0.1419, "step": 82600 }, { "epoch": 2.32, "learning_rate": 6.819520403870425e-06, "loss": 0.1287, "step": 82650 }, { "epoch": 2.32, "learning_rate": 6.805497125227878e-06, "loss": 0.14, "step": 82700 }, { "epoch": 2.32, "learning_rate": 6.7914738465853314e-06, "loss": 0.1348, "step": 82750 }, { "epoch": 2.32, "learning_rate": 6.777450567942785e-06, "loss": 0.1375, "step": 82800 }, { "epoch": 2.32, "learning_rate": 6.763427289300239e-06, "loss": 0.1324, "step": 82850 }, { "epoch": 2.33, "learning_rate": 6.7494040106576915e-06, "loss": 0.1366, "step": 82900 }, { "epoch": 2.33, "learning_rate": 6.735380732015146e-06, "loss": 0.1333, "step": 82950 }, { "epoch": 2.33, "learning_rate": 6.721357453372598e-06, "loss": 0.1389, "step": 83000 }, { "epoch": 2.33, "eval_bleu": 96.1354, "eval_gen_len": 64.263, "eval_loss": 0.24297164380550385, "eval_rouge1": 93.9976, "eval_rouge2": 90.4522, "eval_rougeL": 93.8079, "eval_rougeLsum": 93.7903, "eval_runtime": 3153.994, "eval_samples_per_second": 0.951, "eval_steps_per_second": 0.238, "step": 83000 }, { "epoch": 2.33, "learning_rate": 6.707334174730052e-06, "loss": 0.1571, "step": 83050 }, { "epoch": 2.33, "learning_rate": 6.693310896087505e-06, "loss": 0.1383, "step": 83100 }, { "epoch": 2.33, "learning_rate": 6.679287617444959e-06, "loss": 0.1524, "step": 83150 }, { "epoch": 2.33, "learning_rate": 6.665264338802412e-06, "loss": 0.1568, "step": 83200 }, { "epoch": 2.33, "learning_rate": 6.651241060159866e-06, "loss": 0.1226, "step": 83250 }, { "epoch": 2.34, "learning_rate": 6.637217781517318e-06, "loss": 0.1259, "step": 83300 }, { "epoch": 2.34, "learning_rate": 6.6231945028747726e-06, "loss": 0.1466, "step": 83350 }, { "epoch": 2.34, "learning_rate": 6.609171224232225e-06, "loss": 0.1464, "step": 83400 }, { "epoch": 2.34, "learning_rate": 6.595147945589679e-06, "loss": 0.1355, "step": 83450 }, { "epoch": 2.34, "learning_rate": 6.581124666947132e-06, "loss": 0.1387, "step": 83500 }, { "epoch": 2.34, "learning_rate": 6.567101388304586e-06, "loss": 0.1477, "step": 83550 }, { "epoch": 2.34, "learning_rate": 6.5530781096620385e-06, "loss": 0.1507, "step": 83600 }, { "epoch": 2.35, "learning_rate": 6.539054831019493e-06, "loss": 0.1477, "step": 83650 }, { "epoch": 2.35, "learning_rate": 6.525031552376945e-06, "loss": 0.1483, "step": 83700 }, { "epoch": 2.35, "learning_rate": 6.5110082737343994e-06, "loss": 0.1524, "step": 83750 }, { "epoch": 2.35, "learning_rate": 6.496984995091852e-06, "loss": 0.1522, "step": 83800 }, { "epoch": 2.35, "learning_rate": 6.482961716449306e-06, "loss": 0.1347, "step": 83850 }, { "epoch": 2.35, "learning_rate": 6.468938437806759e-06, "loss": 0.1562, "step": 83900 }, { "epoch": 2.35, "learning_rate": 6.454915159164213e-06, "loss": 0.1554, "step": 83950 }, { "epoch": 2.36, "learning_rate": 6.440891880521665e-06, "loss": 0.1416, "step": 84000 }, { "epoch": 2.36, "eval_bleu": 96.1053, "eval_gen_len": 64.255, "eval_loss": 0.2427954524755478, "eval_rouge1": 93.9492, "eval_rouge2": 90.4232, "eval_rougeL": 93.7656, "eval_rougeLsum": 93.7482, "eval_runtime": 3082.0924, "eval_samples_per_second": 0.973, "eval_steps_per_second": 0.243, "step": 84000 }, { "epoch": 2.36, "learning_rate": 6.4268686018791196e-06, "loss": 0.1348, "step": 84050 }, { "epoch": 2.36, "learning_rate": 6.412845323236572e-06, "loss": 0.1689, "step": 84100 }, { "epoch": 2.36, "learning_rate": 6.398822044594026e-06, "loss": 0.139, "step": 84150 }, { "epoch": 2.36, "learning_rate": 6.384798765951479e-06, "loss": 0.1409, "step": 84200 }, { "epoch": 2.36, "learning_rate": 6.370775487308933e-06, "loss": 0.1619, "step": 84250 }, { "epoch": 2.36, "learning_rate": 6.356752208666386e-06, "loss": 0.1342, "step": 84300 }, { "epoch": 2.37, "learning_rate": 6.34272893002384e-06, "loss": 0.1418, "step": 84350 }, { "epoch": 2.37, "learning_rate": 6.328705651381293e-06, "loss": 0.1504, "step": 84400 }, { "epoch": 2.37, "learning_rate": 6.3146823727387464e-06, "loss": 0.1529, "step": 84450 }, { "epoch": 2.37, "learning_rate": 6.3006590940962e-06, "loss": 0.1363, "step": 84500 }, { "epoch": 2.37, "learning_rate": 6.286635815453653e-06, "loss": 0.1557, "step": 84550 }, { "epoch": 2.37, "learning_rate": 6.2726125368111065e-06, "loss": 0.14, "step": 84600 }, { "epoch": 2.37, "learning_rate": 6.25858925816856e-06, "loss": 0.139, "step": 84650 }, { "epoch": 2.38, "learning_rate": 6.244565979526013e-06, "loss": 0.1357, "step": 84700 }, { "epoch": 2.38, "learning_rate": 6.230542700883467e-06, "loss": 0.154, "step": 84750 }, { "epoch": 2.38, "learning_rate": 6.21651942224092e-06, "loss": 0.1294, "step": 84800 }, { "epoch": 2.38, "learning_rate": 6.202496143598373e-06, "loss": 0.1392, "step": 84850 }, { "epoch": 2.38, "learning_rate": 6.188472864955827e-06, "loss": 0.1495, "step": 84900 }, { "epoch": 2.38, "learning_rate": 6.17444958631328e-06, "loss": 0.147, "step": 84950 }, { "epoch": 2.38, "learning_rate": 6.160426307670733e-06, "loss": 0.1411, "step": 85000 }, { "epoch": 2.38, "eval_bleu": 96.1115, "eval_gen_len": 64.2543, "eval_loss": 0.24300958216190338, "eval_rouge1": 93.9688, "eval_rouge2": 90.4201, "eval_rougeL": 93.7772, "eval_rougeLsum": 93.7569, "eval_runtime": 3128.8052, "eval_samples_per_second": 0.959, "eval_steps_per_second": 0.24, "step": 85000 }, { "epoch": 2.39, "learning_rate": 6.146403029028187e-06, "loss": 0.1596, "step": 85050 }, { "epoch": 2.39, "learning_rate": 6.13237975038564e-06, "loss": 0.1376, "step": 85100 }, { "epoch": 2.39, "learning_rate": 6.1183564717430934e-06, "loss": 0.1409, "step": 85150 }, { "epoch": 2.39, "learning_rate": 6.104333193100547e-06, "loss": 0.1576, "step": 85200 }, { "epoch": 2.39, "learning_rate": 6.090309914458e-06, "loss": 0.1442, "step": 85250 }, { "epoch": 2.39, "learning_rate": 6.0762866358154535e-06, "loss": 0.1338, "step": 85300 }, { "epoch": 2.39, "learning_rate": 6.062263357172907e-06, "loss": 0.1561, "step": 85350 }, { "epoch": 2.4, "learning_rate": 6.04824007853036e-06, "loss": 0.1302, "step": 85400 }, { "epoch": 2.4, "learning_rate": 6.034216799887814e-06, "loss": 0.1793, "step": 85450 }, { "epoch": 2.4, "learning_rate": 6.020193521245267e-06, "loss": 0.1602, "step": 85500 }, { "epoch": 2.4, "learning_rate": 6.00617024260272e-06, "loss": 0.143, "step": 85550 }, { "epoch": 2.4, "learning_rate": 5.992146963960174e-06, "loss": 0.1253, "step": 85600 }, { "epoch": 2.4, "learning_rate": 5.978123685317627e-06, "loss": 0.141, "step": 85650 }, { "epoch": 2.4, "learning_rate": 5.96410040667508e-06, "loss": 0.1298, "step": 85700 }, { "epoch": 2.4, "learning_rate": 5.950077128032535e-06, "loss": 0.1495, "step": 85750 }, { "epoch": 2.41, "learning_rate": 5.936053849389987e-06, "loss": 0.1397, "step": 85800 }, { "epoch": 2.41, "learning_rate": 5.922030570747441e-06, "loss": 0.118, "step": 85850 }, { "epoch": 2.41, "learning_rate": 5.908007292104894e-06, "loss": 0.1418, "step": 85900 }, { "epoch": 2.41, "learning_rate": 5.893984013462348e-06, "loss": 0.1633, "step": 85950 }, { "epoch": 2.41, "learning_rate": 5.8799607348198005e-06, "loss": 0.1351, "step": 86000 }, { "epoch": 2.41, "eval_bleu": 96.1109, "eval_gen_len": 64.2517, "eval_loss": 0.24320924282073975, "eval_rouge1": 93.9905, "eval_rouge2": 90.4711, "eval_rougeL": 93.7884, "eval_rougeLsum": 93.7735, "eval_runtime": 3131.4139, "eval_samples_per_second": 0.958, "eval_steps_per_second": 0.24, "step": 86000 }, { "epoch": 2.41, "learning_rate": 5.865937456177255e-06, "loss": 0.1365, "step": 86050 }, { "epoch": 2.41, "learning_rate": 5.851914177534707e-06, "loss": 0.1397, "step": 86100 }, { "epoch": 2.42, "learning_rate": 5.8378908988921614e-06, "loss": 0.1393, "step": 86150 }, { "epoch": 2.42, "learning_rate": 5.823867620249614e-06, "loss": 0.1381, "step": 86200 }, { "epoch": 2.42, "learning_rate": 5.809844341607068e-06, "loss": 0.1503, "step": 86250 }, { "epoch": 2.42, "learning_rate": 5.795821062964521e-06, "loss": 0.1352, "step": 86300 }, { "epoch": 2.42, "learning_rate": 5.781797784321975e-06, "loss": 0.1371, "step": 86350 }, { "epoch": 2.42, "learning_rate": 5.767774505679427e-06, "loss": 0.1468, "step": 86400 }, { "epoch": 2.42, "learning_rate": 5.753751227036882e-06, "loss": 0.1402, "step": 86450 }, { "epoch": 2.43, "learning_rate": 5.739727948394334e-06, "loss": 0.1472, "step": 86500 }, { "epoch": 2.43, "learning_rate": 5.725704669751788e-06, "loss": 0.1517, "step": 86550 }, { "epoch": 2.43, "learning_rate": 5.711681391109241e-06, "loss": 0.1518, "step": 86600 }, { "epoch": 2.43, "learning_rate": 5.697658112466695e-06, "loss": 0.1366, "step": 86650 }, { "epoch": 2.43, "learning_rate": 5.6836348338241475e-06, "loss": 0.1325, "step": 86700 }, { "epoch": 2.43, "learning_rate": 5.669611555181602e-06, "loss": 0.1415, "step": 86750 }, { "epoch": 2.43, "learning_rate": 5.655588276539054e-06, "loss": 0.155, "step": 86800 }, { "epoch": 2.44, "learning_rate": 5.6415649978965084e-06, "loss": 0.1388, "step": 86850 }, { "epoch": 2.44, "learning_rate": 5.627541719253961e-06, "loss": 0.1539, "step": 86900 }, { "epoch": 2.44, "learning_rate": 5.613518440611415e-06, "loss": 0.1373, "step": 86950 }, { "epoch": 2.44, "learning_rate": 5.599495161968868e-06, "loss": 0.1272, "step": 87000 }, { "epoch": 2.44, "eval_bleu": 96.107, "eval_gen_len": 64.2437, "eval_loss": 0.2423429638147354, "eval_rouge1": 93.9715, "eval_rouge2": 90.4552, "eval_rougeL": 93.783, "eval_rougeLsum": 93.7701, "eval_runtime": 3108.876, "eval_samples_per_second": 0.965, "eval_steps_per_second": 0.241, "step": 87000 }, { "epoch": 2.44, "learning_rate": 5.585471883326322e-06, "loss": 0.1392, "step": 87050 }, { "epoch": 2.44, "learning_rate": 5.571448604683775e-06, "loss": 0.1521, "step": 87100 }, { "epoch": 2.44, "learning_rate": 5.557425326041229e-06, "loss": 0.1498, "step": 87150 }, { "epoch": 2.45, "learning_rate": 5.543402047398682e-06, "loss": 0.1395, "step": 87200 }, { "epoch": 2.45, "learning_rate": 5.529378768756135e-06, "loss": 0.1411, "step": 87250 }, { "epoch": 2.45, "learning_rate": 5.515355490113589e-06, "loss": 0.141, "step": 87300 }, { "epoch": 2.45, "learning_rate": 5.501332211471042e-06, "loss": 0.1477, "step": 87350 }, { "epoch": 2.45, "learning_rate": 5.487308932828495e-06, "loss": 0.1756, "step": 87400 }, { "epoch": 2.45, "learning_rate": 5.473285654185949e-06, "loss": 0.1696, "step": 87450 }, { "epoch": 2.45, "learning_rate": 5.459262375543402e-06, "loss": 0.146, "step": 87500 }, { "epoch": 2.46, "learning_rate": 5.4452390969008555e-06, "loss": 0.1241, "step": 87550 }, { "epoch": 2.46, "learning_rate": 5.431215818258309e-06, "loss": 0.147, "step": 87600 }, { "epoch": 2.46, "learning_rate": 5.417192539615762e-06, "loss": 0.1288, "step": 87650 }, { "epoch": 2.46, "learning_rate": 5.4031692609732155e-06, "loss": 0.14, "step": 87700 }, { "epoch": 2.46, "learning_rate": 5.389145982330669e-06, "loss": 0.1577, "step": 87750 }, { "epoch": 2.46, "learning_rate": 5.375122703688122e-06, "loss": 0.1302, "step": 87800 }, { "epoch": 2.46, "learning_rate": 5.361099425045576e-06, "loss": 0.1343, "step": 87850 }, { "epoch": 2.47, "learning_rate": 5.347076146403029e-06, "loss": 0.1218, "step": 87900 }, { "epoch": 2.47, "learning_rate": 5.333052867760482e-06, "loss": 0.1592, "step": 87950 }, { "epoch": 2.47, "learning_rate": 5.319029589117936e-06, "loss": 0.1501, "step": 88000 }, { "epoch": 2.47, "eval_bleu": 96.0986, "eval_gen_len": 64.2647, "eval_loss": 0.2428046017885208, "eval_rouge1": 93.9542, "eval_rouge2": 90.3776, "eval_rougeL": 93.7639, "eval_rougeLsum": 93.7476, "eval_runtime": 3098.6862, "eval_samples_per_second": 0.968, "eval_steps_per_second": 0.242, "step": 88000 }, { "epoch": 2.47, "learning_rate": 5.305006310475389e-06, "loss": 0.1199, "step": 88050 }, { "epoch": 2.47, "learning_rate": 5.290983031832842e-06, "loss": 0.1412, "step": 88100 }, { "epoch": 2.47, "learning_rate": 5.276959753190296e-06, "loss": 0.1489, "step": 88150 }, { "epoch": 2.47, "learning_rate": 5.262936474547749e-06, "loss": 0.1319, "step": 88200 }, { "epoch": 2.48, "learning_rate": 5.2489131959052025e-06, "loss": 0.1308, "step": 88250 }, { "epoch": 2.48, "learning_rate": 5.234889917262656e-06, "loss": 0.116, "step": 88300 }, { "epoch": 2.48, "learning_rate": 5.220866638620109e-06, "loss": 0.1496, "step": 88350 }, { "epoch": 2.48, "learning_rate": 5.2068433599775625e-06, "loss": 0.1404, "step": 88400 }, { "epoch": 2.48, "learning_rate": 5.192820081335016e-06, "loss": 0.1378, "step": 88450 }, { "epoch": 2.48, "learning_rate": 5.178796802692469e-06, "loss": 0.1391, "step": 88500 }, { "epoch": 2.48, "learning_rate": 5.1647735240499235e-06, "loss": 0.1307, "step": 88550 }, { "epoch": 2.48, "learning_rate": 5.150750245407376e-06, "loss": 0.1454, "step": 88600 }, { "epoch": 2.49, "learning_rate": 5.13672696676483e-06, "loss": 0.1421, "step": 88650 }, { "epoch": 2.49, "learning_rate": 5.122703688122283e-06, "loss": 0.1483, "step": 88700 }, { "epoch": 2.49, "learning_rate": 5.108680409479737e-06, "loss": 0.1311, "step": 88750 }, { "epoch": 2.49, "learning_rate": 5.094657130837189e-06, "loss": 0.1403, "step": 88800 }, { "epoch": 2.49, "learning_rate": 5.080633852194644e-06, "loss": 0.137, "step": 88850 }, { "epoch": 2.49, "learning_rate": 5.066610573552096e-06, "loss": 0.1562, "step": 88900 }, { "epoch": 2.49, "learning_rate": 5.05258729490955e-06, "loss": 0.1512, "step": 88950 }, { "epoch": 2.5, "learning_rate": 5.038564016267003e-06, "loss": 0.1537, "step": 89000 }, { "epoch": 2.5, "eval_bleu": 96.0983, "eval_gen_len": 64.2437, "eval_loss": 0.2420242875814438, "eval_rouge1": 93.9781, "eval_rouge2": 90.4393, "eval_rougeL": 93.7904, "eval_rougeLsum": 93.78, "eval_runtime": 3102.7126, "eval_samples_per_second": 0.967, "eval_steps_per_second": 0.242, "step": 89000 }, { "epoch": 2.5, "learning_rate": 5.024540737624457e-06, "loss": 0.1406, "step": 89050 }, { "epoch": 2.5, "learning_rate": 5.0105174589819095e-06, "loss": 0.1533, "step": 89100 }, { "epoch": 2.5, "learning_rate": 4.996494180339364e-06, "loss": 0.1394, "step": 89150 }, { "epoch": 2.5, "learning_rate": 4.982470901696816e-06, "loss": 0.1335, "step": 89200 }, { "epoch": 2.5, "learning_rate": 4.9684476230542705e-06, "loss": 0.126, "step": 89250 }, { "epoch": 2.5, "learning_rate": 4.954424344411723e-06, "loss": 0.1383, "step": 89300 }, { "epoch": 2.51, "learning_rate": 4.940401065769177e-06, "loss": 0.1488, "step": 89350 }, { "epoch": 2.51, "learning_rate": 4.92637778712663e-06, "loss": 0.151, "step": 89400 }, { "epoch": 2.51, "learning_rate": 4.912354508484084e-06, "loss": 0.137, "step": 89450 }, { "epoch": 2.51, "learning_rate": 4.898331229841536e-06, "loss": 0.1622, "step": 89500 }, { "epoch": 2.51, "learning_rate": 4.884307951198991e-06, "loss": 0.1332, "step": 89550 }, { "epoch": 2.51, "learning_rate": 4.870284672556443e-06, "loss": 0.159, "step": 89600 }, { "epoch": 2.51, "learning_rate": 4.856261393913897e-06, "loss": 0.1437, "step": 89650 }, { "epoch": 2.52, "learning_rate": 4.84223811527135e-06, "loss": 0.1521, "step": 89700 }, { "epoch": 2.52, "learning_rate": 4.828214836628804e-06, "loss": 0.1497, "step": 89750 }, { "epoch": 2.52, "learning_rate": 4.8141915579862566e-06, "loss": 0.1333, "step": 89800 }, { "epoch": 2.52, "learning_rate": 4.800168279343711e-06, "loss": 0.1464, "step": 89850 }, { "epoch": 2.52, "learning_rate": 4.786145000701163e-06, "loss": 0.1362, "step": 89900 }, { "epoch": 2.52, "learning_rate": 4.7721217220586175e-06, "loss": 0.1379, "step": 89950 }, { "epoch": 2.52, "learning_rate": 4.758098443416071e-06, "loss": 0.1404, "step": 90000 }, { "epoch": 2.52, "eval_bleu": 96.1002, "eval_gen_len": 64.232, "eval_loss": 0.2419024407863617, "eval_rouge1": 94.0091, "eval_rouge2": 90.4435, "eval_rougeL": 93.8185, "eval_rougeLsum": 93.8034, "eval_runtime": 3103.5189, "eval_samples_per_second": 0.967, "eval_steps_per_second": 0.242, "step": 90000 }, { "epoch": 2.53, "learning_rate": 4.744075164773524e-06, "loss": 0.1659, "step": 90050 }, { "epoch": 2.53, "learning_rate": 4.7300518861309775e-06, "loss": 0.1377, "step": 90100 }, { "epoch": 2.53, "learning_rate": 4.716028607488431e-06, "loss": 0.1398, "step": 90150 }, { "epoch": 2.53, "learning_rate": 4.702005328845884e-06, "loss": 0.1551, "step": 90200 }, { "epoch": 2.53, "learning_rate": 4.687982050203338e-06, "loss": 0.1371, "step": 90250 }, { "epoch": 2.53, "learning_rate": 4.673958771560791e-06, "loss": 0.1332, "step": 90300 }, { "epoch": 2.53, "learning_rate": 4.659935492918244e-06, "loss": 0.1401, "step": 90350 }, { "epoch": 2.54, "learning_rate": 4.645912214275698e-06, "loss": 0.1191, "step": 90400 }, { "epoch": 2.54, "learning_rate": 4.631888935633151e-06, "loss": 0.1421, "step": 90450 }, { "epoch": 2.54, "learning_rate": 4.617865656990604e-06, "loss": 0.1362, "step": 90500 }, { "epoch": 2.54, "learning_rate": 4.603842378348058e-06, "loss": 0.1488, "step": 90550 }, { "epoch": 2.54, "learning_rate": 4.589819099705511e-06, "loss": 0.1591, "step": 90600 }, { "epoch": 2.54, "learning_rate": 4.5757958210629645e-06, "loss": 0.1331, "step": 90650 }, { "epoch": 2.54, "learning_rate": 4.561772542420418e-06, "loss": 0.1337, "step": 90700 }, { "epoch": 2.55, "learning_rate": 4.547749263777871e-06, "loss": 0.1487, "step": 90750 }, { "epoch": 2.55, "learning_rate": 4.5337259851353246e-06, "loss": 0.1401, "step": 90800 }, { "epoch": 2.55, "learning_rate": 4.519702706492778e-06, "loss": 0.1403, "step": 90850 }, { "epoch": 2.55, "learning_rate": 4.505679427850231e-06, "loss": 0.141, "step": 90900 }, { "epoch": 2.55, "learning_rate": 4.491656149207685e-06, "loss": 0.1422, "step": 90950 }, { "epoch": 2.55, "learning_rate": 4.477632870565138e-06, "loss": 0.1415, "step": 91000 }, { "epoch": 2.55, "eval_bleu": 96.1065, "eval_gen_len": 64.2577, "eval_loss": 0.24246443808078766, "eval_rouge1": 94.0034, "eval_rouge2": 90.453, "eval_rougeL": 93.8071, "eval_rougeLsum": 93.7875, "eval_runtime": 3093.3898, "eval_samples_per_second": 0.97, "eval_steps_per_second": 0.242, "step": 91000 }, { "epoch": 2.55, "learning_rate": 4.463609591922591e-06, "loss": 0.1411, "step": 91050 }, { "epoch": 2.56, "learning_rate": 4.449586313280045e-06, "loss": 0.1617, "step": 91100 }, { "epoch": 2.56, "learning_rate": 4.435563034637498e-06, "loss": 0.1657, "step": 91150 }, { "epoch": 2.56, "learning_rate": 4.421539755994951e-06, "loss": 0.1337, "step": 91200 }, { "epoch": 2.56, "learning_rate": 4.407516477352405e-06, "loss": 0.1298, "step": 91250 }, { "epoch": 2.56, "learning_rate": 4.393493198709858e-06, "loss": 0.1551, "step": 91300 }, { "epoch": 2.56, "learning_rate": 4.3794699200673115e-06, "loss": 0.1551, "step": 91350 }, { "epoch": 2.56, "learning_rate": 4.365446641424765e-06, "loss": 0.1484, "step": 91400 }, { "epoch": 2.56, "learning_rate": 4.351423362782219e-06, "loss": 0.1345, "step": 91450 }, { "epoch": 2.57, "learning_rate": 4.3374000841396716e-06, "loss": 0.1532, "step": 91500 }, { "epoch": 2.57, "learning_rate": 4.323376805497126e-06, "loss": 0.1451, "step": 91550 }, { "epoch": 2.57, "learning_rate": 4.309353526854578e-06, "loss": 0.1437, "step": 91600 }, { "epoch": 2.57, "learning_rate": 4.2953302482120325e-06, "loss": 0.1572, "step": 91650 }, { "epoch": 2.57, "learning_rate": 4.281306969569485e-06, "loss": 0.1351, "step": 91700 }, { "epoch": 2.57, "learning_rate": 4.267283690926939e-06, "loss": 0.134, "step": 91750 }, { "epoch": 2.57, "learning_rate": 4.253260412284392e-06, "loss": 0.1358, "step": 91800 }, { "epoch": 2.58, "learning_rate": 4.239237133641846e-06, "loss": 0.138, "step": 91850 }, { "epoch": 2.58, "learning_rate": 4.225213854999298e-06, "loss": 0.1377, "step": 91900 }, { "epoch": 2.58, "learning_rate": 4.211190576356753e-06, "loss": 0.1473, "step": 91950 }, { "epoch": 2.58, "learning_rate": 4.197167297714205e-06, "loss": 0.1288, "step": 92000 }, { "epoch": 2.58, "eval_bleu": 96.1036, "eval_gen_len": 64.2483, "eval_loss": 0.24192480742931366, "eval_rouge1": 93.9846, "eval_rouge2": 90.4605, "eval_rougeL": 93.8057, "eval_rougeLsum": 93.7865, "eval_runtime": 3083.7598, "eval_samples_per_second": 0.973, "eval_steps_per_second": 0.243, "step": 92000 }, { "epoch": 2.58, "learning_rate": 4.183144019071659e-06, "loss": 0.1448, "step": 92050 }, { "epoch": 2.58, "learning_rate": 4.169120740429112e-06, "loss": 0.1436, "step": 92100 }, { "epoch": 2.58, "learning_rate": 4.155097461786566e-06, "loss": 0.1567, "step": 92150 }, { "epoch": 2.59, "learning_rate": 4.1410741831440186e-06, "loss": 0.1441, "step": 92200 }, { "epoch": 2.59, "learning_rate": 4.127050904501473e-06, "loss": 0.1365, "step": 92250 }, { "epoch": 2.59, "learning_rate": 4.113027625858925e-06, "loss": 0.1466, "step": 92300 }, { "epoch": 2.59, "learning_rate": 4.0990043472163795e-06, "loss": 0.1398, "step": 92350 }, { "epoch": 2.59, "learning_rate": 4.084981068573832e-06, "loss": 0.1404, "step": 92400 }, { "epoch": 2.59, "learning_rate": 4.070957789931286e-06, "loss": 0.149, "step": 92450 }, { "epoch": 2.59, "learning_rate": 4.056934511288739e-06, "loss": 0.1322, "step": 92500 }, { "epoch": 2.6, "learning_rate": 4.042911232646193e-06, "loss": 0.1344, "step": 92550 }, { "epoch": 2.6, "learning_rate": 4.0288879540036454e-06, "loss": 0.1471, "step": 92600 }, { "epoch": 2.6, "learning_rate": 4.0148646753611e-06, "loss": 0.1535, "step": 92650 }, { "epoch": 2.6, "learning_rate": 4.000841396718552e-06, "loss": 0.1403, "step": 92700 }, { "epoch": 2.6, "learning_rate": 3.986818118076006e-06, "loss": 0.1265, "step": 92750 }, { "epoch": 2.6, "learning_rate": 3.972794839433459e-06, "loss": 0.1376, "step": 92800 }, { "epoch": 2.6, "learning_rate": 3.958771560790913e-06, "loss": 0.1421, "step": 92850 }, { "epoch": 2.61, "learning_rate": 3.944748282148366e-06, "loss": 0.1379, "step": 92900 }, { "epoch": 2.61, "learning_rate": 3.93072500350582e-06, "loss": 0.1493, "step": 92950 }, { "epoch": 2.61, "learning_rate": 3.916701724863273e-06, "loss": 0.1392, "step": 93000 }, { "epoch": 2.61, "eval_bleu": 96.1104, "eval_gen_len": 64.2603, "eval_loss": 0.24193215370178223, "eval_rouge1": 94.0378, "eval_rouge2": 90.4824, "eval_rougeL": 93.8304, "eval_rougeLsum": 93.8203, "eval_runtime": 3103.7806, "eval_samples_per_second": 0.967, "eval_steps_per_second": 0.242, "step": 93000 }, { "epoch": 2.61, "learning_rate": 3.9026784462207265e-06, "loss": 0.1626, "step": 93050 }, { "epoch": 2.61, "learning_rate": 3.88865516757818e-06, "loss": 0.1596, "step": 93100 }, { "epoch": 2.61, "learning_rate": 3.874631888935633e-06, "loss": 0.1582, "step": 93150 }, { "epoch": 2.61, "learning_rate": 3.8606086102930866e-06, "loss": 0.1369, "step": 93200 }, { "epoch": 2.62, "learning_rate": 3.84658533165054e-06, "loss": 0.1522, "step": 93250 }, { "epoch": 2.62, "learning_rate": 3.832562053007993e-06, "loss": 0.1698, "step": 93300 }, { "epoch": 2.62, "learning_rate": 3.818538774365447e-06, "loss": 0.1352, "step": 93350 }, { "epoch": 2.62, "learning_rate": 3.8045154957228996e-06, "loss": 0.1307, "step": 93400 }, { "epoch": 2.62, "learning_rate": 3.7904922170803534e-06, "loss": 0.1264, "step": 93450 }, { "epoch": 2.62, "learning_rate": 3.7764689384378063e-06, "loss": 0.1512, "step": 93500 }, { "epoch": 2.62, "learning_rate": 3.7624456597952605e-06, "loss": 0.1284, "step": 93550 }, { "epoch": 2.63, "learning_rate": 3.748422381152714e-06, "loss": 0.1413, "step": 93600 }, { "epoch": 2.63, "learning_rate": 3.734399102510167e-06, "loss": 0.1374, "step": 93650 }, { "epoch": 2.63, "learning_rate": 3.7203758238676206e-06, "loss": 0.144, "step": 93700 }, { "epoch": 2.63, "learning_rate": 3.706352545225074e-06, "loss": 0.1633, "step": 93750 }, { "epoch": 2.63, "learning_rate": 3.6923292665825273e-06, "loss": 0.1542, "step": 93800 }, { "epoch": 2.63, "learning_rate": 3.6783059879399806e-06, "loss": 0.1286, "step": 93850 }, { "epoch": 2.63, "learning_rate": 3.664282709297434e-06, "loss": 0.1526, "step": 93900 }, { "epoch": 2.63, "learning_rate": 3.6502594306548874e-06, "loss": 0.1431, "step": 93950 }, { "epoch": 2.64, "learning_rate": 3.6362361520123407e-06, "loss": 0.141, "step": 94000 }, { "epoch": 2.64, "eval_bleu": 96.1146, "eval_gen_len": 64.2413, "eval_loss": 0.24184368550777435, "eval_rouge1": 94.0178, "eval_rouge2": 90.4805, "eval_rougeL": 93.8232, "eval_rougeLsum": 93.8125, "eval_runtime": 3081.3777, "eval_samples_per_second": 0.974, "eval_steps_per_second": 0.243, "step": 94000 }, { "epoch": 2.64, "learning_rate": 3.622212873369794e-06, "loss": 0.1481, "step": 94050 }, { "epoch": 2.64, "learning_rate": 3.6081895947272474e-06, "loss": 0.1359, "step": 94100 }, { "epoch": 2.64, "learning_rate": 3.5941663160847008e-06, "loss": 0.1494, "step": 94150 }, { "epoch": 2.64, "learning_rate": 3.580143037442154e-06, "loss": 0.1372, "step": 94200 }, { "epoch": 2.64, "learning_rate": 3.5661197587996075e-06, "loss": 0.1474, "step": 94250 }, { "epoch": 2.64, "learning_rate": 3.552096480157061e-06, "loss": 0.1232, "step": 94300 }, { "epoch": 2.65, "learning_rate": 3.5380732015145142e-06, "loss": 0.1355, "step": 94350 }, { "epoch": 2.65, "learning_rate": 3.5240499228719676e-06, "loss": 0.1243, "step": 94400 }, { "epoch": 2.65, "learning_rate": 3.510026644229421e-06, "loss": 0.1414, "step": 94450 }, { "epoch": 2.65, "learning_rate": 3.4960033655868743e-06, "loss": 0.1344, "step": 94500 }, { "epoch": 2.65, "learning_rate": 3.4819800869443276e-06, "loss": 0.1251, "step": 94550 }, { "epoch": 2.65, "learning_rate": 3.467956808301781e-06, "loss": 0.1469, "step": 94600 }, { "epoch": 2.65, "learning_rate": 3.4539335296592344e-06, "loss": 0.1671, "step": 94650 }, { "epoch": 2.66, "learning_rate": 3.4399102510166877e-06, "loss": 0.1343, "step": 94700 }, { "epoch": 2.66, "learning_rate": 3.425886972374141e-06, "loss": 0.1493, "step": 94750 }, { "epoch": 2.66, "learning_rate": 3.4118636937315944e-06, "loss": 0.1354, "step": 94800 }, { "epoch": 2.66, "learning_rate": 3.397840415089048e-06, "loss": 0.1397, "step": 94850 }, { "epoch": 2.66, "learning_rate": 3.383817136446501e-06, "loss": 0.1304, "step": 94900 }, { "epoch": 2.66, "learning_rate": 3.3697938578039545e-06, "loss": 0.1198, "step": 94950 }, { "epoch": 2.66, "learning_rate": 3.3557705791614083e-06, "loss": 0.1372, "step": 95000 }, { "epoch": 2.66, "eval_bleu": 96.1034, "eval_gen_len": 64.2807, "eval_loss": 0.24183472990989685, "eval_rouge1": 93.9826, "eval_rouge2": 90.409, "eval_rougeL": 93.7782, "eval_rougeLsum": 93.7723, "eval_runtime": 3083.1749, "eval_samples_per_second": 0.973, "eval_steps_per_second": 0.243, "step": 95000 }, { "epoch": 2.67, "learning_rate": 3.3417473005188616e-06, "loss": 0.1558, "step": 95050 }, { "epoch": 2.67, "learning_rate": 3.327724021876315e-06, "loss": 0.1282, "step": 95100 }, { "epoch": 2.67, "learning_rate": 3.3137007432337684e-06, "loss": 0.1595, "step": 95150 }, { "epoch": 2.67, "learning_rate": 3.2996774645912217e-06, "loss": 0.1403, "step": 95200 }, { "epoch": 2.67, "learning_rate": 3.285654185948675e-06, "loss": 0.1429, "step": 95250 }, { "epoch": 2.67, "learning_rate": 3.2716309073061284e-06, "loss": 0.1364, "step": 95300 }, { "epoch": 2.67, "learning_rate": 3.257607628663582e-06, "loss": 0.1406, "step": 95350 }, { "epoch": 2.68, "learning_rate": 3.243584350021035e-06, "loss": 0.1495, "step": 95400 }, { "epoch": 2.68, "learning_rate": 3.2295610713784885e-06, "loss": 0.1608, "step": 95450 }, { "epoch": 2.68, "learning_rate": 3.215537792735942e-06, "loss": 0.1503, "step": 95500 }, { "epoch": 2.68, "learning_rate": 3.2015145140933952e-06, "loss": 0.1371, "step": 95550 }, { "epoch": 2.68, "learning_rate": 3.1874912354508486e-06, "loss": 0.1465, "step": 95600 }, { "epoch": 2.68, "learning_rate": 3.173467956808302e-06, "loss": 0.1321, "step": 95650 }, { "epoch": 2.68, "learning_rate": 3.1594446781657553e-06, "loss": 0.1382, "step": 95700 }, { "epoch": 2.69, "learning_rate": 3.1454213995232087e-06, "loss": 0.1441, "step": 95750 }, { "epoch": 2.69, "learning_rate": 3.131398120880662e-06, "loss": 0.1558, "step": 95800 }, { "epoch": 2.69, "learning_rate": 3.1173748422381154e-06, "loss": 0.1395, "step": 95850 }, { "epoch": 2.69, "learning_rate": 3.1033515635955687e-06, "loss": 0.1308, "step": 95900 }, { "epoch": 2.69, "learning_rate": 3.089328284953022e-06, "loss": 0.1322, "step": 95950 }, { "epoch": 2.69, "learning_rate": 3.0753050063104754e-06, "loss": 0.1299, "step": 96000 }, { "epoch": 2.69, "eval_bleu": 96.1016, "eval_gen_len": 64.2387, "eval_loss": 0.24173256754875183, "eval_rouge1": 93.9954, "eval_rouge2": 90.4732, "eval_rougeL": 93.8053, "eval_rougeLsum": 93.7909, "eval_runtime": 3082.3137, "eval_samples_per_second": 0.973, "eval_steps_per_second": 0.243, "step": 96000 }, { "epoch": 2.69, "learning_rate": 3.061281727667929e-06, "loss": 0.1424, "step": 96050 }, { "epoch": 2.7, "learning_rate": 3.047258449025382e-06, "loss": 0.1332, "step": 96100 }, { "epoch": 2.7, "learning_rate": 3.0332351703828355e-06, "loss": 0.1298, "step": 96150 }, { "epoch": 2.7, "learning_rate": 3.019211891740289e-06, "loss": 0.1383, "step": 96200 }, { "epoch": 2.7, "learning_rate": 3.0051886130977422e-06, "loss": 0.1434, "step": 96250 }, { "epoch": 2.7, "learning_rate": 2.9911653344551956e-06, "loss": 0.1406, "step": 96300 }, { "epoch": 2.7, "learning_rate": 2.977142055812649e-06, "loss": 0.1343, "step": 96350 }, { "epoch": 2.7, "learning_rate": 2.9631187771701023e-06, "loss": 0.1335, "step": 96400 }, { "epoch": 2.71, "learning_rate": 2.949095498527556e-06, "loss": 0.1457, "step": 96450 }, { "epoch": 2.71, "learning_rate": 2.9350722198850094e-06, "loss": 0.1458, "step": 96500 }, { "epoch": 2.71, "learning_rate": 2.921048941242463e-06, "loss": 0.1494, "step": 96550 }, { "epoch": 2.71, "learning_rate": 2.907025662599916e-06, "loss": 0.1453, "step": 96600 }, { "epoch": 2.71, "learning_rate": 2.8930023839573695e-06, "loss": 0.1452, "step": 96650 }, { "epoch": 2.71, "learning_rate": 2.878979105314823e-06, "loss": 0.1607, "step": 96700 }, { "epoch": 2.71, "learning_rate": 2.8649558266722762e-06, "loss": 0.149, "step": 96750 }, { "epoch": 2.71, "learning_rate": 2.8509325480297296e-06, "loss": 0.1419, "step": 96800 }, { "epoch": 2.72, "learning_rate": 2.836909269387183e-06, "loss": 0.1308, "step": 96850 }, { "epoch": 2.72, "learning_rate": 2.8228859907446363e-06, "loss": 0.143, "step": 96900 }, { "epoch": 2.72, "learning_rate": 2.8088627121020897e-06, "loss": 0.134, "step": 96950 }, { "epoch": 2.72, "learning_rate": 2.794839433459543e-06, "loss": 0.1375, "step": 97000 }, { "epoch": 2.72, "eval_bleu": 96.1201, "eval_gen_len": 64.2793, "eval_loss": 0.24178442358970642, "eval_rouge1": 93.9882, "eval_rouge2": 90.4529, "eval_rougeL": 93.7934, "eval_rougeLsum": 93.7813, "eval_runtime": 3072.8346, "eval_samples_per_second": 0.976, "eval_steps_per_second": 0.244, "step": 97000 }, { "epoch": 2.72, "learning_rate": 2.7808161548169964e-06, "loss": 0.1406, "step": 97050 }, { "epoch": 2.72, "learning_rate": 2.7667928761744497e-06, "loss": 0.1449, "step": 97100 }, { "epoch": 2.72, "learning_rate": 2.752769597531903e-06, "loss": 0.137, "step": 97150 }, { "epoch": 2.73, "learning_rate": 2.7387463188893565e-06, "loss": 0.1525, "step": 97200 }, { "epoch": 2.73, "learning_rate": 2.72472304024681e-06, "loss": 0.1456, "step": 97250 }, { "epoch": 2.73, "learning_rate": 2.710699761604263e-06, "loss": 0.1472, "step": 97300 }, { "epoch": 2.73, "learning_rate": 2.6966764829617165e-06, "loss": 0.1447, "step": 97350 }, { "epoch": 2.73, "learning_rate": 2.68265320431917e-06, "loss": 0.1359, "step": 97400 }, { "epoch": 2.73, "learning_rate": 2.6686299256766232e-06, "loss": 0.1593, "step": 97450 }, { "epoch": 2.73, "learning_rate": 2.6546066470340766e-06, "loss": 0.136, "step": 97500 }, { "epoch": 2.74, "learning_rate": 2.64058336839153e-06, "loss": 0.1496, "step": 97550 }, { "epoch": 2.74, "learning_rate": 2.6265600897489833e-06, "loss": 0.1401, "step": 97600 }, { "epoch": 2.74, "learning_rate": 2.6125368111064367e-06, "loss": 0.1427, "step": 97650 }, { "epoch": 2.74, "learning_rate": 2.59851353246389e-06, "loss": 0.1495, "step": 97700 }, { "epoch": 2.74, "learning_rate": 2.5844902538213434e-06, "loss": 0.1276, "step": 97750 }, { "epoch": 2.74, "learning_rate": 2.5704669751787967e-06, "loss": 0.1434, "step": 97800 }, { "epoch": 2.74, "learning_rate": 2.5564436965362505e-06, "loss": 0.13, "step": 97850 }, { "epoch": 2.75, "learning_rate": 2.542420417893704e-06, "loss": 0.1309, "step": 97900 }, { "epoch": 2.75, "learning_rate": 2.5283971392511572e-06, "loss": 0.1659, "step": 97950 }, { "epoch": 2.75, "learning_rate": 2.5143738606086106e-06, "loss": 0.1282, "step": 98000 }, { "epoch": 2.75, "eval_bleu": 96.1244, "eval_gen_len": 64.2753, "eval_loss": 0.24166151881217957, "eval_rouge1": 93.9974, "eval_rouge2": 90.4493, "eval_rougeL": 93.8008, "eval_rougeLsum": 93.7908, "eval_runtime": 3089.0272, "eval_samples_per_second": 0.971, "eval_steps_per_second": 0.243, "step": 98000 }, { "epoch": 2.75, "learning_rate": 2.500350581966064e-06, "loss": 0.1406, "step": 98050 }, { "epoch": 2.75, "learning_rate": 2.4863273033235173e-06, "loss": 0.1402, "step": 98100 }, { "epoch": 2.75, "learning_rate": 2.4723040246809707e-06, "loss": 0.1543, "step": 98150 }, { "epoch": 2.75, "learning_rate": 2.458280746038424e-06, "loss": 0.1434, "step": 98200 }, { "epoch": 2.76, "learning_rate": 2.4442574673958774e-06, "loss": 0.1338, "step": 98250 }, { "epoch": 2.76, "learning_rate": 2.4302341887533307e-06, "loss": 0.1202, "step": 98300 }, { "epoch": 2.76, "learning_rate": 2.416210910110784e-06, "loss": 0.1641, "step": 98350 }, { "epoch": 2.76, "learning_rate": 2.4021876314682375e-06, "loss": 0.1365, "step": 98400 }, { "epoch": 2.76, "learning_rate": 2.388164352825691e-06, "loss": 0.1366, "step": 98450 }, { "epoch": 2.76, "learning_rate": 2.374141074183144e-06, "loss": 0.1485, "step": 98500 }, { "epoch": 2.76, "learning_rate": 2.3601177955405975e-06, "loss": 0.1366, "step": 98550 }, { "epoch": 2.77, "learning_rate": 2.346094516898051e-06, "loss": 0.1644, "step": 98600 }, { "epoch": 2.77, "learning_rate": 2.3320712382555042e-06, "loss": 0.1359, "step": 98650 }, { "epoch": 2.77, "learning_rate": 2.3180479596129576e-06, "loss": 0.1452, "step": 98700 }, { "epoch": 2.77, "learning_rate": 2.304024680970411e-06, "loss": 0.1391, "step": 98750 }, { "epoch": 2.77, "learning_rate": 2.2900014023278643e-06, "loss": 0.1375, "step": 98800 }, { "epoch": 2.77, "learning_rate": 2.2759781236853177e-06, "loss": 0.1517, "step": 98850 }, { "epoch": 2.77, "learning_rate": 2.261954845042771e-06, "loss": 0.149, "step": 98900 }, { "epoch": 2.78, "learning_rate": 2.2479315664002244e-06, "loss": 0.1239, "step": 98950 }, { "epoch": 2.78, "learning_rate": 2.2339082877576778e-06, "loss": 0.1374, "step": 99000 }, { "epoch": 2.78, "eval_bleu": 96.1272, "eval_gen_len": 64.262, "eval_loss": 0.24160811305046082, "eval_rouge1": 93.9828, "eval_rouge2": 90.4481, "eval_rougeL": 93.7941, "eval_rougeLsum": 93.7877, "eval_runtime": 3114.2761, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.241, "step": 99000 }, { "epoch": 2.78, "learning_rate": 2.219885009115131e-06, "loss": 0.1295, "step": 99050 }, { "epoch": 2.78, "learning_rate": 2.2058617304725845e-06, "loss": 0.1544, "step": 99100 }, { "epoch": 2.78, "learning_rate": 2.191838451830038e-06, "loss": 0.1506, "step": 99150 }, { "epoch": 2.78, "learning_rate": 2.177815173187491e-06, "loss": 0.1497, "step": 99200 }, { "epoch": 2.78, "learning_rate": 2.1637918945449445e-06, "loss": 0.1591, "step": 99250 }, { "epoch": 2.79, "learning_rate": 2.1497686159023983e-06, "loss": 0.1467, "step": 99300 }, { "epoch": 2.79, "learning_rate": 2.1357453372598517e-06, "loss": 0.131, "step": 99350 }, { "epoch": 2.79, "learning_rate": 2.121722058617305e-06, "loss": 0.1436, "step": 99400 }, { "epoch": 2.79, "learning_rate": 2.1076987799747584e-06, "loss": 0.1561, "step": 99450 }, { "epoch": 2.79, "learning_rate": 2.0936755013322118e-06, "loss": 0.1469, "step": 99500 }, { "epoch": 2.79, "learning_rate": 2.079652222689665e-06, "loss": 0.1375, "step": 99550 }, { "epoch": 2.79, "learning_rate": 2.0656289440471185e-06, "loss": 0.1539, "step": 99600 }, { "epoch": 2.79, "learning_rate": 2.051605665404572e-06, "loss": 0.1314, "step": 99650 }, { "epoch": 2.8, "learning_rate": 2.037582386762025e-06, "loss": 0.1277, "step": 99700 }, { "epoch": 2.8, "learning_rate": 2.0235591081194785e-06, "loss": 0.1627, "step": 99750 }, { "epoch": 2.8, "learning_rate": 2.009535829476932e-06, "loss": 0.139, "step": 99800 }, { "epoch": 2.8, "learning_rate": 1.9955125508343853e-06, "loss": 0.137, "step": 99850 }, { "epoch": 2.8, "learning_rate": 1.9814892721918386e-06, "loss": 0.125, "step": 99900 }, { "epoch": 2.8, "learning_rate": 1.967465993549292e-06, "loss": 0.1275, "step": 99950 }, { "epoch": 2.8, "learning_rate": 1.9534427149067453e-06, "loss": 0.1327, "step": 100000 }, { "epoch": 2.8, "eval_bleu": 96.1239, "eval_gen_len": 64.253, "eval_loss": 0.2416970431804657, "eval_rouge1": 94.0048, "eval_rouge2": 90.4634, "eval_rougeL": 93.8114, "eval_rougeLsum": 93.8025, "eval_runtime": 3102.0331, "eval_samples_per_second": 0.967, "eval_steps_per_second": 0.242, "step": 100000 }, { "epoch": 2.81, "learning_rate": 1.9394194362641987e-06, "loss": 0.1341, "step": 100050 }, { "epoch": 2.81, "learning_rate": 1.925396157621652e-06, "loss": 0.13, "step": 100100 }, { "epoch": 2.81, "learning_rate": 1.9113728789791054e-06, "loss": 0.1335, "step": 100150 }, { "epoch": 2.81, "learning_rate": 1.8973496003365588e-06, "loss": 0.1611, "step": 100200 }, { "epoch": 2.81, "learning_rate": 1.8833263216940121e-06, "loss": 0.1226, "step": 100250 }, { "epoch": 2.81, "learning_rate": 1.8693030430514655e-06, "loss": 0.1349, "step": 100300 }, { "epoch": 2.81, "learning_rate": 1.855279764408919e-06, "loss": 0.1462, "step": 100350 }, { "epoch": 2.82, "learning_rate": 1.8412564857663724e-06, "loss": 0.1378, "step": 100400 }, { "epoch": 2.82, "learning_rate": 1.8272332071238258e-06, "loss": 0.1636, "step": 100450 }, { "epoch": 2.82, "learning_rate": 1.8132099284812791e-06, "loss": 0.1499, "step": 100500 }, { "epoch": 2.82, "learning_rate": 1.7991866498387325e-06, "loss": 0.1489, "step": 100550 }, { "epoch": 2.82, "learning_rate": 1.7851633711961858e-06, "loss": 0.1582, "step": 100600 }, { "epoch": 2.82, "learning_rate": 1.7711400925536392e-06, "loss": 0.1305, "step": 100650 }, { "epoch": 2.82, "learning_rate": 1.7571168139110925e-06, "loss": 0.1596, "step": 100700 }, { "epoch": 2.83, "learning_rate": 1.743093535268546e-06, "loss": 0.1275, "step": 100750 }, { "epoch": 2.83, "learning_rate": 1.7290702566259993e-06, "loss": 0.1398, "step": 100800 }, { "epoch": 2.83, "learning_rate": 1.7150469779834526e-06, "loss": 0.1348, "step": 100850 }, { "epoch": 2.83, "learning_rate": 1.701023699340906e-06, "loss": 0.147, "step": 100900 }, { "epoch": 2.83, "learning_rate": 1.6870004206983593e-06, "loss": 0.1471, "step": 100950 }, { "epoch": 2.83, "learning_rate": 1.6729771420558127e-06, "loss": 0.1433, "step": 101000 }, { "epoch": 2.83, "eval_bleu": 96.1231, "eval_gen_len": 64.2653, "eval_loss": 0.24147701263427734, "eval_rouge1": 93.9973, "eval_rouge2": 90.4389, "eval_rougeL": 93.8007, "eval_rougeLsum": 93.7953, "eval_runtime": 3076.5825, "eval_samples_per_second": 0.975, "eval_steps_per_second": 0.244, "step": 101000 }, { "epoch": 2.83, "learning_rate": 1.6589538634132663e-06, "loss": 0.1294, "step": 101050 }, { "epoch": 2.84, "learning_rate": 1.6449305847707196e-06, "loss": 0.1555, "step": 101100 }, { "epoch": 2.84, "learning_rate": 1.630907306128173e-06, "loss": 0.1633, "step": 101150 }, { "epoch": 2.84, "learning_rate": 1.6168840274856263e-06, "loss": 0.1383, "step": 101200 }, { "epoch": 2.84, "learning_rate": 1.6028607488430797e-06, "loss": 0.1397, "step": 101250 }, { "epoch": 2.84, "learning_rate": 1.588837470200533e-06, "loss": 0.1473, "step": 101300 }, { "epoch": 2.84, "learning_rate": 1.5748141915579864e-06, "loss": 0.1379, "step": 101350 }, { "epoch": 2.84, "learning_rate": 1.5607909129154398e-06, "loss": 0.1373, "step": 101400 }, { "epoch": 2.85, "learning_rate": 1.5467676342728931e-06, "loss": 0.1438, "step": 101450 }, { "epoch": 2.85, "learning_rate": 1.5327443556303465e-06, "loss": 0.146, "step": 101500 }, { "epoch": 2.85, "learning_rate": 1.5187210769877998e-06, "loss": 0.1479, "step": 101550 }, { "epoch": 2.85, "learning_rate": 1.5046977983452532e-06, "loss": 0.1447, "step": 101600 }, { "epoch": 2.85, "learning_rate": 1.4906745197027066e-06, "loss": 0.1461, "step": 101650 }, { "epoch": 2.85, "learning_rate": 1.47665124106016e-06, "loss": 0.1559, "step": 101700 }, { "epoch": 2.85, "learning_rate": 1.4626279624176133e-06, "loss": 0.162, "step": 101750 }, { "epoch": 2.86, "learning_rate": 1.4486046837750668e-06, "loss": 0.1347, "step": 101800 }, { "epoch": 2.86, "learning_rate": 1.4345814051325202e-06, "loss": 0.1394, "step": 101850 }, { "epoch": 2.86, "learning_rate": 1.4205581264899736e-06, "loss": 0.1429, "step": 101900 }, { "epoch": 2.86, "learning_rate": 1.406534847847427e-06, "loss": 0.1298, "step": 101950 }, { "epoch": 2.86, "learning_rate": 1.3925115692048803e-06, "loss": 0.1688, "step": 102000 }, { "epoch": 2.86, "eval_bleu": 96.1287, "eval_gen_len": 64.2707, "eval_loss": 0.24163706600666046, "eval_rouge1": 94.0131, "eval_rouge2": 90.4767, "eval_rougeL": 93.8162, "eval_rougeLsum": 93.8057, "eval_runtime": 3096.7851, "eval_samples_per_second": 0.969, "eval_steps_per_second": 0.242, "step": 102000 }, { "epoch": 2.86, "learning_rate": 1.3784882905623336e-06, "loss": 0.162, "step": 102050 }, { "epoch": 2.86, "learning_rate": 1.364465011919787e-06, "loss": 0.1441, "step": 102100 }, { "epoch": 2.86, "learning_rate": 1.3504417332772403e-06, "loss": 0.1396, "step": 102150 }, { "epoch": 2.87, "learning_rate": 1.3364184546346937e-06, "loss": 0.1423, "step": 102200 }, { "epoch": 2.87, "learning_rate": 1.322395175992147e-06, "loss": 0.1324, "step": 102250 }, { "epoch": 2.87, "learning_rate": 1.3083718973496004e-06, "loss": 0.16, "step": 102300 }, { "epoch": 2.87, "learning_rate": 1.2943486187070538e-06, "loss": 0.135, "step": 102350 }, { "epoch": 2.87, "learning_rate": 1.2803253400645071e-06, "loss": 0.1298, "step": 102400 }, { "epoch": 2.87, "learning_rate": 1.2663020614219605e-06, "loss": 0.1523, "step": 102450 }, { "epoch": 2.87, "learning_rate": 1.252278782779414e-06, "loss": 0.1286, "step": 102500 }, { "epoch": 2.88, "learning_rate": 1.2382555041368674e-06, "loss": 0.1439, "step": 102550 }, { "epoch": 2.88, "learning_rate": 1.2242322254943208e-06, "loss": 0.1283, "step": 102600 }, { "epoch": 2.88, "learning_rate": 1.2102089468517741e-06, "loss": 0.1216, "step": 102650 }, { "epoch": 2.88, "learning_rate": 1.1961856682092275e-06, "loss": 0.1485, "step": 102700 }, { "epoch": 2.88, "learning_rate": 1.1821623895666808e-06, "loss": 0.1455, "step": 102750 }, { "epoch": 2.88, "learning_rate": 1.1681391109241342e-06, "loss": 0.1384, "step": 102800 }, { "epoch": 2.88, "learning_rate": 1.1541158322815876e-06, "loss": 0.14, "step": 102850 }, { "epoch": 2.89, "learning_rate": 1.1400925536390407e-06, "loss": 0.1367, "step": 102900 }, { "epoch": 2.89, "learning_rate": 1.126069274996494e-06, "loss": 0.1558, "step": 102950 }, { "epoch": 2.89, "learning_rate": 1.1120459963539474e-06, "loss": 0.1377, "step": 103000 }, { "epoch": 2.89, "eval_bleu": 96.1203, "eval_gen_len": 64.258, "eval_loss": 0.24138423800468445, "eval_rouge1": 94.0261, "eval_rouge2": 90.4778, "eval_rougeL": 93.8237, "eval_rougeLsum": 93.815, "eval_runtime": 3149.2273, "eval_samples_per_second": 0.953, "eval_steps_per_second": 0.238, "step": 103000 }, { "epoch": 2.89, "learning_rate": 1.0980227177114008e-06, "loss": 0.1283, "step": 103050 }, { "epoch": 2.89, "learning_rate": 1.0839994390688541e-06, "loss": 0.1487, "step": 103100 }, { "epoch": 2.89, "learning_rate": 1.0699761604263075e-06, "loss": 0.144, "step": 103150 }, { "epoch": 2.89, "learning_rate": 1.055952881783761e-06, "loss": 0.1369, "step": 103200 }, { "epoch": 2.9, "learning_rate": 1.0419296031412144e-06, "loss": 0.1451, "step": 103250 }, { "epoch": 2.9, "learning_rate": 1.0279063244986678e-06, "loss": 0.1536, "step": 103300 }, { "epoch": 2.9, "learning_rate": 1.0138830458561211e-06, "loss": 0.1434, "step": 103350 }, { "epoch": 2.9, "learning_rate": 9.998597672135745e-07, "loss": 0.144, "step": 103400 }, { "epoch": 2.9, "learning_rate": 9.858364885710279e-07, "loss": 0.142, "step": 103450 }, { "epoch": 2.9, "learning_rate": 9.718132099284812e-07, "loss": 0.1209, "step": 103500 }, { "epoch": 2.9, "learning_rate": 9.577899312859346e-07, "loss": 0.127, "step": 103550 }, { "epoch": 2.91, "learning_rate": 9.437666526433879e-07, "loss": 0.1505, "step": 103600 }, { "epoch": 2.91, "learning_rate": 9.297433740008414e-07, "loss": 0.1377, "step": 103650 }, { "epoch": 2.91, "learning_rate": 9.157200953582947e-07, "loss": 0.1458, "step": 103700 }, { "epoch": 2.91, "learning_rate": 9.016968167157482e-07, "loss": 0.1445, "step": 103750 }, { "epoch": 2.91, "learning_rate": 8.876735380732016e-07, "loss": 0.1363, "step": 103800 }, { "epoch": 2.91, "learning_rate": 8.736502594306549e-07, "loss": 0.152, "step": 103850 }, { "epoch": 2.91, "learning_rate": 8.596269807881083e-07, "loss": 0.1565, "step": 103900 }, { "epoch": 2.92, "learning_rate": 8.456037021455616e-07, "loss": 0.142, "step": 103950 }, { "epoch": 2.92, "learning_rate": 8.31580423503015e-07, "loss": 0.1482, "step": 104000 }, { "epoch": 2.92, "eval_bleu": 96.1208, "eval_gen_len": 64.2503, "eval_loss": 0.24138183891773224, "eval_rouge1": 94.0225, "eval_rouge2": 90.488, "eval_rougeL": 93.8289, "eval_rougeLsum": 93.8131, "eval_runtime": 3071.8283, "eval_samples_per_second": 0.977, "eval_steps_per_second": 0.244, "step": 104000 }, { "epoch": 2.92, "learning_rate": 8.175571448604684e-07, "loss": 0.1504, "step": 104050 }, { "epoch": 2.92, "learning_rate": 8.035338662179218e-07, "loss": 0.1263, "step": 104100 }, { "epoch": 2.92, "learning_rate": 7.895105875753752e-07, "loss": 0.1346, "step": 104150 }, { "epoch": 2.92, "learning_rate": 7.754873089328285e-07, "loss": 0.1471, "step": 104200 }, { "epoch": 2.92, "learning_rate": 7.614640302902819e-07, "loss": 0.1356, "step": 104250 }, { "epoch": 2.93, "learning_rate": 7.474407516477353e-07, "loss": 0.1457, "step": 104300 }, { "epoch": 2.93, "learning_rate": 7.334174730051886e-07, "loss": 0.1458, "step": 104350 }, { "epoch": 2.93, "learning_rate": 7.19394194362642e-07, "loss": 0.1373, "step": 104400 }, { "epoch": 2.93, "learning_rate": 7.053709157200954e-07, "loss": 0.1362, "step": 104450 }, { "epoch": 2.93, "learning_rate": 6.913476370775488e-07, "loss": 0.1548, "step": 104500 }, { "epoch": 2.93, "learning_rate": 6.773243584350021e-07, "loss": 0.1398, "step": 104550 }, { "epoch": 2.93, "learning_rate": 6.633010797924555e-07, "loss": 0.1604, "step": 104600 }, { "epoch": 2.94, "learning_rate": 6.492778011499089e-07, "loss": 0.146, "step": 104650 }, { "epoch": 2.94, "learning_rate": 6.352545225073622e-07, "loss": 0.1272, "step": 104700 }, { "epoch": 2.94, "learning_rate": 6.212312438648156e-07, "loss": 0.1495, "step": 104750 }, { "epoch": 2.94, "learning_rate": 6.07207965222269e-07, "loss": 0.1408, "step": 104800 }, { "epoch": 2.94, "learning_rate": 5.931846865797224e-07, "loss": 0.1502, "step": 104850 }, { "epoch": 2.94, "learning_rate": 5.791614079371758e-07, "loss": 0.1471, "step": 104900 }, { "epoch": 2.94, "learning_rate": 5.651381292946291e-07, "loss": 0.16, "step": 104950 }, { "epoch": 2.94, "learning_rate": 5.511148506520825e-07, "loss": 0.1286, "step": 105000 }, { "epoch": 2.94, "eval_bleu": 96.128, "eval_gen_len": 64.2513, "eval_loss": 0.24144940078258514, "eval_rouge1": 94.0054, "eval_rouge2": 90.4653, "eval_rougeL": 93.8155, "eval_rougeLsum": 93.8029, "eval_runtime": 3085.2312, "eval_samples_per_second": 0.972, "eval_steps_per_second": 0.243, "step": 105000 }, { "epoch": 2.95, "learning_rate": 5.370915720095358e-07, "loss": 0.1509, "step": 105050 }, { "epoch": 2.95, "learning_rate": 5.230682933669892e-07, "loss": 0.1342, "step": 105100 }, { "epoch": 2.95, "learning_rate": 5.090450147244427e-07, "loss": 0.144, "step": 105150 }, { "epoch": 2.95, "learning_rate": 4.95021736081896e-07, "loss": 0.166, "step": 105200 }, { "epoch": 2.95, "learning_rate": 4.809984574393494e-07, "loss": 0.1405, "step": 105250 }, { "epoch": 2.95, "learning_rate": 4.669751787968027e-07, "loss": 0.1551, "step": 105300 }, { "epoch": 2.95, "learning_rate": 4.529519001542561e-07, "loss": 0.1417, "step": 105350 }, { "epoch": 2.96, "learning_rate": 4.3892862151170944e-07, "loss": 0.1233, "step": 105400 }, { "epoch": 2.96, "learning_rate": 4.2490534286916285e-07, "loss": 0.1406, "step": 105450 }, { "epoch": 2.96, "learning_rate": 4.108820642266162e-07, "loss": 0.1347, "step": 105500 }, { "epoch": 2.96, "learning_rate": 3.9685878558406956e-07, "loss": 0.1533, "step": 105550 }, { "epoch": 2.96, "learning_rate": 3.82835506941523e-07, "loss": 0.1462, "step": 105600 }, { "epoch": 2.96, "learning_rate": 3.6881222829897633e-07, "loss": 0.1404, "step": 105650 }, { "epoch": 2.96, "learning_rate": 3.547889496564297e-07, "loss": 0.1323, "step": 105700 }, { "epoch": 2.97, "learning_rate": 3.4076567101388305e-07, "loss": 0.1564, "step": 105750 }, { "epoch": 2.97, "learning_rate": 3.2674239237133646e-07, "loss": 0.1508, "step": 105800 }, { "epoch": 2.97, "learning_rate": 3.127191137287898e-07, "loss": 0.1467, "step": 105850 }, { "epoch": 2.97, "learning_rate": 2.986958350862432e-07, "loss": 0.1574, "step": 105900 }, { "epoch": 2.97, "learning_rate": 2.846725564436966e-07, "loss": 0.132, "step": 105950 }, { "epoch": 2.97, "learning_rate": 2.7064927780114994e-07, "loss": 0.1392, "step": 106000 }, { "epoch": 2.97, "eval_bleu": 96.1228, "eval_gen_len": 64.2473, "eval_loss": 0.2414444237947464, "eval_rouge1": 94.0048, "eval_rouge2": 90.4742, "eval_rougeL": 93.8171, "eval_rougeLsum": 93.8047, "eval_runtime": 3077.7276, "eval_samples_per_second": 0.975, "eval_steps_per_second": 0.244, "step": 106000 }, { "epoch": 2.97, "learning_rate": 2.566259991586033e-07, "loss": 0.1544, "step": 106050 }, { "epoch": 2.98, "learning_rate": 2.4260272051605666e-07, "loss": 0.1498, "step": 106100 }, { "epoch": 2.98, "learning_rate": 2.2857944187351002e-07, "loss": 0.1291, "step": 106150 }, { "epoch": 2.98, "learning_rate": 2.145561632309634e-07, "loss": 0.1411, "step": 106200 }, { "epoch": 2.98, "learning_rate": 2.0053288458841676e-07, "loss": 0.1445, "step": 106250 }, { "epoch": 2.98, "learning_rate": 1.8650960594587014e-07, "loss": 0.1499, "step": 106300 }, { "epoch": 2.98, "learning_rate": 1.7248632730332353e-07, "loss": 0.1442, "step": 106350 }, { "epoch": 2.98, "learning_rate": 1.5846304866077688e-07, "loss": 0.1387, "step": 106400 }, { "epoch": 2.99, "learning_rate": 1.4443977001823027e-07, "loss": 0.138, "step": 106450 }, { "epoch": 2.99, "learning_rate": 1.3041649137568363e-07, "loss": 0.1559, "step": 106500 }, { "epoch": 2.99, "learning_rate": 1.1639321273313701e-07, "loss": 0.1437, "step": 106550 }, { "epoch": 2.99, "learning_rate": 1.023699340905904e-07, "loss": 0.1597, "step": 106600 }, { "epoch": 2.99, "learning_rate": 8.834665544804376e-08, "loss": 0.1455, "step": 106650 }, { "epoch": 2.99, "learning_rate": 7.432337680549714e-08, "loss": 0.1505, "step": 106700 }, { "epoch": 2.99, "learning_rate": 6.030009816295049e-08, "loss": 0.1633, "step": 106750 }, { "epoch": 3.0, "learning_rate": 4.627681952040387e-08, "loss": 0.135, "step": 106800 }, { "epoch": 3.0, "learning_rate": 3.225354087785724e-08, "loss": 0.1483, "step": 106850 }, { "epoch": 3.0, "learning_rate": 1.8230262235310616e-08, "loss": 0.1375, "step": 106900 }, { "epoch": 3.0, "learning_rate": 4.2069835927639876e-09, "loss": 0.1489, "step": 106950 }, { "epoch": 3.0, "step": 106965, "total_flos": 7.416839339283579e+17, "train_loss": 0.1832969891809344, "train_runtime": 592063.0002, "train_samples_per_second": 2.891, "train_steps_per_second": 0.181 } ], "max_steps": 106965, "num_train_epochs": 3, "total_flos": 7.416839339283579e+17, "trial_name": null, "trial_params": null }