{ "best_metric": 14.437, "best_model_checkpoint": "ckpt_mt5_calm/google/mt5-small/wmt14_de_en/lr0.0005_e5/checkpoint-500000", "epoch": 1.8137594282459693, "global_step": 510000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0005, "loss": 5.1585, "step": 500 }, { "epoch": 0.0, "learning_rate": 0.0005, "loss": 3.9783, "step": 1000 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 3.7381, "step": 1500 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 3.605, "step": 2000 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 3.4827, "step": 2500 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 3.4053, "step": 3000 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 3.363, "step": 3500 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 3.2694, "step": 4000 }, { "epoch": 0.02, "learning_rate": 0.0005, "loss": 3.1942, "step": 4500 }, { "epoch": 0.02, "learning_rate": 0.0005, "loss": 3.1715, "step": 5000 }, { "epoch": 0.02, "learning_rate": 0.0005, "loss": 3.1428, "step": 5500 }, { "epoch": 0.02, "learning_rate": 0.0005, "loss": 3.1076, "step": 6000 }, { "epoch": 0.02, "learning_rate": 0.0005, "loss": 3.0584, "step": 6500 }, { "epoch": 0.02, "learning_rate": 0.0005, "loss": 3.0355, "step": 7000 }, { "epoch": 0.03, "learning_rate": 0.0005, "loss": 3.0121, "step": 7500 }, { "epoch": 0.03, "learning_rate": 0.0005, "loss": 2.9444, "step": 8000 }, { "epoch": 0.03, "learning_rate": 0.0005, "loss": 2.9554, "step": 8500 }, { "epoch": 0.03, "learning_rate": 0.0005, "loss": 2.9541, "step": 9000 }, { "epoch": 0.03, "learning_rate": 0.0005, "loss": 2.9076, "step": 9500 }, { "epoch": 0.04, "learning_rate": 0.0005, "loss": 2.9165, "step": 10000 }, { "epoch": 0.04, "eval_bleu": 9.5968, "eval_gen_len": 17.2947, "eval_runtime": 210.2888, "eval_samples_per_second": 14.266, "eval_steps_per_second": 1.783, "step": 10000 }, { "epoch": 0.04, "learning_rate": 0.0005, "loss": 2.9083, "step": 10500 }, { "epoch": 0.04, "learning_rate": 0.0005, "loss": 2.8379, "step": 11000 }, { "epoch": 0.04, "learning_rate": 0.0005, "loss": 2.8713, "step": 11500 }, { "epoch": 0.04, "learning_rate": 0.0005, "loss": 2.8174, "step": 12000 }, { "epoch": 0.04, "learning_rate": 0.0005, "loss": 2.8014, "step": 12500 }, { "epoch": 0.05, "learning_rate": 0.0005, "loss": 2.8065, "step": 13000 }, { "epoch": 0.05, "learning_rate": 0.0005, "loss": 2.7795, "step": 13500 }, { "epoch": 0.05, "learning_rate": 0.0005, "loss": 2.765, "step": 14000 }, { "epoch": 0.05, "learning_rate": 0.0005, "loss": 2.7456, "step": 14500 }, { "epoch": 0.05, "learning_rate": 0.0005, "loss": 2.7718, "step": 15000 }, { "epoch": 0.06, "learning_rate": 0.0005, "loss": 2.7426, "step": 15500 }, { "epoch": 0.06, "learning_rate": 0.0005, "loss": 2.7086, "step": 16000 }, { "epoch": 0.06, "learning_rate": 0.0005, "loss": 2.72, "step": 16500 }, { "epoch": 0.06, "learning_rate": 0.0005, "loss": 2.6916, "step": 17000 }, { "epoch": 0.06, "learning_rate": 0.0005, "loss": 2.7049, "step": 17500 }, { "epoch": 0.06, "learning_rate": 0.0005, "loss": 2.7094, "step": 18000 }, { "epoch": 0.07, "learning_rate": 0.0005, "loss": 2.691, "step": 18500 }, { "epoch": 0.07, "learning_rate": 0.0005, "loss": 2.7057, "step": 19000 }, { "epoch": 0.07, "learning_rate": 0.0005, "loss": 2.6476, "step": 19500 }, { "epoch": 0.07, "learning_rate": 0.0005, "loss": 2.6388, "step": 20000 }, { "epoch": 0.07, "eval_bleu": 10.7385, "eval_gen_len": 17.1993, "eval_runtime": 210.0921, "eval_samples_per_second": 14.279, "eval_steps_per_second": 1.785, "step": 20000 }, { "epoch": 0.07, "learning_rate": 0.0005, "loss": 2.6002, "step": 20500 }, { "epoch": 0.07, "learning_rate": 0.0005, "loss": 2.6225, "step": 21000 }, { "epoch": 0.08, "learning_rate": 0.0005, "loss": 2.6422, "step": 21500 }, { "epoch": 0.08, "learning_rate": 0.0005, "loss": 2.6291, "step": 22000 }, { "epoch": 0.08, "learning_rate": 0.0005, "loss": 2.612, "step": 22500 }, { "epoch": 0.08, "learning_rate": 0.0005, "loss": 2.5761, "step": 23000 }, { "epoch": 0.08, "learning_rate": 0.0005, "loss": 2.5875, "step": 23500 }, { "epoch": 0.09, "learning_rate": 0.0005, "loss": 2.5992, "step": 24000 }, { "epoch": 0.09, "learning_rate": 0.0005, "loss": 2.5601, "step": 24500 }, { "epoch": 0.09, "learning_rate": 0.0005, "loss": 2.5567, "step": 25000 }, { "epoch": 0.09, "learning_rate": 0.0005, "loss": 2.5571, "step": 25500 }, { "epoch": 0.09, "learning_rate": 0.0005, "loss": 2.5457, "step": 26000 }, { "epoch": 0.09, "learning_rate": 0.0005, "loss": 2.524, "step": 26500 }, { "epoch": 0.1, "learning_rate": 0.0005, "loss": 2.545, "step": 27000 }, { "epoch": 0.1, "learning_rate": 0.0005, "loss": 2.5502, "step": 27500 }, { "epoch": 0.1, "learning_rate": 0.0005, "loss": 2.5193, "step": 28000 }, { "epoch": 0.1, "learning_rate": 0.0005, "loss": 2.518, "step": 28500 }, { "epoch": 0.1, "learning_rate": 0.0005, "loss": 2.5476, "step": 29000 }, { "epoch": 0.1, "learning_rate": 0.0005, "loss": 2.5378, "step": 29500 }, { "epoch": 0.11, "learning_rate": 0.0005, "loss": 2.5397, "step": 30000 }, { "epoch": 0.11, "eval_bleu": 11.2222, "eval_gen_len": 17.2613, "eval_runtime": 209.8126, "eval_samples_per_second": 14.298, "eval_steps_per_second": 1.787, "step": 30000 }, { "epoch": 0.11, "learning_rate": 0.0005, "loss": 2.4887, "step": 30500 }, { "epoch": 0.11, "learning_rate": 0.0005, "loss": 2.4931, "step": 31000 }, { "epoch": 0.11, "learning_rate": 0.0005, "loss": 2.514, "step": 31500 }, { "epoch": 0.11, "learning_rate": 0.0005, "loss": 2.4727, "step": 32000 }, { "epoch": 0.12, "learning_rate": 0.0005, "loss": 2.4937, "step": 32500 }, { "epoch": 0.12, "learning_rate": 0.0005, "loss": 2.4849, "step": 33000 }, { "epoch": 0.12, "learning_rate": 0.0005, "loss": 2.4951, "step": 33500 }, { "epoch": 0.12, "learning_rate": 0.0005, "loss": 2.4652, "step": 34000 }, { "epoch": 0.12, "learning_rate": 0.0005, "loss": 2.4523, "step": 34500 }, { "epoch": 0.12, "learning_rate": 0.0005, "loss": 2.4862, "step": 35000 }, { "epoch": 0.13, "learning_rate": 0.0005, "loss": 2.4779, "step": 35500 }, { "epoch": 0.13, "learning_rate": 0.0005, "loss": 2.4616, "step": 36000 }, { "epoch": 0.13, "learning_rate": 0.0005, "loss": 2.4679, "step": 36500 }, { "epoch": 0.13, "learning_rate": 0.0005, "loss": 2.4274, "step": 37000 }, { "epoch": 0.13, "learning_rate": 0.0005, "loss": 2.4542, "step": 37500 }, { "epoch": 0.14, "learning_rate": 0.0005, "loss": 2.4711, "step": 38000 }, { "epoch": 0.14, "learning_rate": 0.0005, "loss": 2.4417, "step": 38500 }, { "epoch": 0.14, "learning_rate": 0.0005, "loss": 2.444, "step": 39000 }, { "epoch": 0.14, "learning_rate": 0.0005, "loss": 2.4355, "step": 39500 }, { "epoch": 0.14, "learning_rate": 0.0005, "loss": 2.4444, "step": 40000 }, { "epoch": 0.14, "eval_bleu": 11.6796, "eval_gen_len": 17.225, "eval_runtime": 208.6699, "eval_samples_per_second": 14.377, "eval_steps_per_second": 1.797, "step": 40000 }, { "epoch": 0.14, "learning_rate": 0.0005, "loss": 2.4323, "step": 40500 }, { "epoch": 0.15, "learning_rate": 0.0005, "loss": 2.4094, "step": 41000 }, { "epoch": 0.15, "learning_rate": 0.0005, "loss": 2.415, "step": 41500 }, { "epoch": 0.15, "learning_rate": 0.0005, "loss": 2.3979, "step": 42000 }, { "epoch": 0.15, "learning_rate": 0.0005, "loss": 2.3854, "step": 42500 }, { "epoch": 0.15, "learning_rate": 0.0005, "loss": 2.3759, "step": 43000 }, { "epoch": 0.15, "learning_rate": 0.0005, "loss": 2.3881, "step": 43500 }, { "epoch": 0.16, "learning_rate": 0.0005, "loss": 2.3981, "step": 44000 }, { "epoch": 0.16, "learning_rate": 0.0005, "loss": 2.4224, "step": 44500 }, { "epoch": 0.16, "learning_rate": 0.0005, "loss": 2.3761, "step": 45000 }, { "epoch": 0.16, "learning_rate": 0.0005, "loss": 2.4399, "step": 45500 }, { "epoch": 0.16, "learning_rate": 0.0005, "loss": 2.3923, "step": 46000 }, { "epoch": 0.17, "learning_rate": 0.0005, "loss": 2.4098, "step": 46500 }, { "epoch": 0.17, "learning_rate": 0.0005, "loss": 2.3789, "step": 47000 }, { "epoch": 0.17, "learning_rate": 0.0005, "loss": 2.4062, "step": 47500 }, { "epoch": 0.17, "learning_rate": 0.0005, "loss": 2.3814, "step": 48000 }, { "epoch": 0.17, "learning_rate": 0.0005, "loss": 2.3801, "step": 48500 }, { "epoch": 0.17, "learning_rate": 0.0005, "loss": 2.3709, "step": 49000 }, { "epoch": 0.18, "learning_rate": 0.0005, "loss": 2.3442, "step": 49500 }, { "epoch": 0.18, "learning_rate": 0.0005, "loss": 2.3634, "step": 50000 }, { "epoch": 0.18, "eval_bleu": 12.1549, "eval_gen_len": 17.2763, "eval_runtime": 207.6227, "eval_samples_per_second": 14.449, "eval_steps_per_second": 1.806, "step": 50000 }, { "epoch": 0.18, "learning_rate": 0.0005, "loss": 2.3769, "step": 50500 }, { "epoch": 0.18, "learning_rate": 0.0005, "loss": 2.3646, "step": 51000 }, { "epoch": 0.18, "learning_rate": 0.0005, "loss": 2.3628, "step": 51500 }, { "epoch": 0.18, "learning_rate": 0.0005, "loss": 2.3378, "step": 52000 }, { "epoch": 0.19, "learning_rate": 0.0005, "loss": 2.3609, "step": 52500 }, { "epoch": 0.19, "learning_rate": 0.0005, "loss": 2.3754, "step": 53000 }, { "epoch": 0.19, "learning_rate": 0.0005, "loss": 2.3555, "step": 53500 }, { "epoch": 0.19, "learning_rate": 0.0005, "loss": 2.3596, "step": 54000 }, { "epoch": 0.19, "learning_rate": 0.0005, "loss": 2.3222, "step": 54500 }, { "epoch": 0.2, "learning_rate": 0.0005, "loss": 2.3257, "step": 55000 }, { "epoch": 0.2, "learning_rate": 0.0005, "loss": 2.3631, "step": 55500 }, { "epoch": 0.2, "learning_rate": 0.0005, "loss": 2.3317, "step": 56000 }, { "epoch": 0.2, "learning_rate": 0.0005, "loss": 2.3404, "step": 56500 }, { "epoch": 0.2, "learning_rate": 0.0005, "loss": 2.3737, "step": 57000 }, { "epoch": 0.2, "learning_rate": 0.0005, "loss": 2.3384, "step": 57500 }, { "epoch": 0.21, "learning_rate": 0.0005, "loss": 2.3218, "step": 58000 }, { "epoch": 0.21, "learning_rate": 0.0005, "loss": 2.3527, "step": 58500 }, { "epoch": 0.21, "learning_rate": 0.0005, "loss": 2.3268, "step": 59000 }, { "epoch": 0.21, "learning_rate": 0.0005, "loss": 2.3099, "step": 59500 }, { "epoch": 0.21, "learning_rate": 0.0005, "loss": 2.3032, "step": 60000 }, { "epoch": 0.21, "eval_bleu": 12.3766, "eval_gen_len": 17.197, "eval_runtime": 210.5803, "eval_samples_per_second": 14.246, "eval_steps_per_second": 1.781, "step": 60000 }, { "epoch": 0.22, "learning_rate": 0.0005, "loss": 2.309, "step": 60500 }, { "epoch": 0.22, "learning_rate": 0.0005, "loss": 2.3257, "step": 61000 }, { "epoch": 0.22, "learning_rate": 0.0005, "loss": 2.3094, "step": 61500 }, { "epoch": 0.22, "learning_rate": 0.0005, "loss": 2.2655, "step": 62000 }, { "epoch": 0.22, "learning_rate": 0.0005, "loss": 2.2845, "step": 62500 }, { "epoch": 0.22, "learning_rate": 0.0005, "loss": 2.264, "step": 63000 }, { "epoch": 0.23, "learning_rate": 0.0005, "loss": 2.2825, "step": 63500 }, { "epoch": 0.23, "learning_rate": 0.0005, "loss": 2.3123, "step": 64000 }, { "epoch": 0.23, "learning_rate": 0.0005, "loss": 2.3179, "step": 64500 }, { "epoch": 0.23, "learning_rate": 0.0005, "loss": 2.2915, "step": 65000 }, { "epoch": 0.23, "learning_rate": 0.0005, "loss": 2.2926, "step": 65500 }, { "epoch": 0.23, "learning_rate": 0.0005, "loss": 2.2963, "step": 66000 }, { "epoch": 0.24, "learning_rate": 0.0005, "loss": 2.3041, "step": 66500 }, { "epoch": 0.24, "learning_rate": 0.0005, "loss": 2.2735, "step": 67000 }, { "epoch": 0.24, "learning_rate": 0.0005, "loss": 2.2915, "step": 67500 }, { "epoch": 0.24, "learning_rate": 0.0005, "loss": 2.2724, "step": 68000 }, { "epoch": 0.24, "learning_rate": 0.0005, "loss": 2.2816, "step": 68500 }, { "epoch": 0.25, "learning_rate": 0.0005, "loss": 2.2798, "step": 69000 }, { "epoch": 0.25, "learning_rate": 0.0005, "loss": 2.2817, "step": 69500 }, { "epoch": 0.25, "learning_rate": 0.0005, "loss": 2.2821, "step": 70000 }, { "epoch": 0.25, "eval_bleu": 0, "eval_gen_len": 17.236, "eval_runtime": 205.7096, "eval_samples_per_second": 14.584, "eval_steps_per_second": 1.823, "step": 70000 }, { "epoch": 0.25, "learning_rate": 0.0005, "loss": 2.2764, "step": 70500 }, { "epoch": 0.25, "learning_rate": 0.0005, "loss": 2.277, "step": 71000 }, { "epoch": 0.25, "learning_rate": 0.0005, "loss": 2.248, "step": 71500 }, { "epoch": 0.26, "learning_rate": 0.0005, "loss": 2.2635, "step": 72000 }, { "epoch": 0.26, "learning_rate": 0.0005, "loss": 2.2778, "step": 72500 }, { "epoch": 0.26, "learning_rate": 0.0005, "loss": 2.274, "step": 73000 }, { "epoch": 0.26, "learning_rate": 0.0005, "loss": 2.2447, "step": 73500 }, { "epoch": 0.26, "learning_rate": 0.0005, "loss": 2.2507, "step": 74000 }, { "epoch": 0.26, "learning_rate": 0.0005, "loss": 2.2641, "step": 74500 }, { "epoch": 0.27, "learning_rate": 0.0005, "loss": 2.2654, "step": 75000 }, { "epoch": 0.27, "learning_rate": 0.0005, "loss": 2.2361, "step": 75500 }, { "epoch": 0.27, "learning_rate": 0.0005, "loss": 2.2509, "step": 76000 }, { "epoch": 0.27, "learning_rate": 0.0005, "loss": 2.2538, "step": 76500 }, { "epoch": 0.27, "learning_rate": 0.0005, "loss": 2.2482, "step": 77000 }, { "epoch": 0.28, "learning_rate": 0.0005, "loss": 2.2958, "step": 77500 }, { "epoch": 0.28, "learning_rate": 0.0005, "loss": 2.2403, "step": 78000 }, { "epoch": 0.28, "learning_rate": 0.0005, "loss": 2.2521, "step": 78500 }, { "epoch": 0.28, "learning_rate": 0.0005, "loss": 2.2556, "step": 79000 }, { "epoch": 0.28, "learning_rate": 0.0005, "loss": 2.2235, "step": 79500 }, { "epoch": 0.28, "learning_rate": 0.0005, "loss": 2.2452, "step": 80000 }, { "epoch": 0.28, "eval_bleu": 12.4778, "eval_gen_len": 17.1983, "eval_runtime": 210.1956, "eval_samples_per_second": 14.272, "eval_steps_per_second": 1.784, "step": 80000 }, { "epoch": 0.29, "learning_rate": 0.0005, "loss": 2.2499, "step": 80500 }, { "epoch": 0.29, "learning_rate": 0.0005, "loss": 2.2461, "step": 81000 }, { "epoch": 0.29, "learning_rate": 0.0005, "loss": 2.2056, "step": 81500 }, { "epoch": 0.29, "learning_rate": 0.0005, "loss": 2.2334, "step": 82000 }, { "epoch": 0.29, "learning_rate": 0.0005, "loss": 2.217, "step": 82500 }, { "epoch": 0.3, "learning_rate": 0.0005, "loss": 2.2491, "step": 83000 }, { "epoch": 0.3, "learning_rate": 0.0005, "loss": 2.229, "step": 83500 }, { "epoch": 0.3, "learning_rate": 0.0005, "loss": 2.2197, "step": 84000 }, { "epoch": 0.3, "learning_rate": 0.0005, "loss": 2.2652, "step": 84500 }, { "epoch": 0.3, "learning_rate": 0.0005, "loss": 2.2195, "step": 85000 }, { "epoch": 0.3, "learning_rate": 0.0005, "loss": 2.2357, "step": 85500 }, { "epoch": 0.31, "learning_rate": 0.0005, "loss": 2.2303, "step": 86000 }, { "epoch": 0.31, "learning_rate": 0.0005, "loss": 2.2241, "step": 86500 }, { "epoch": 0.31, "learning_rate": 0.0005, "loss": 2.2229, "step": 87000 }, { "epoch": 0.31, "learning_rate": 0.0005, "loss": 2.2029, "step": 87500 }, { "epoch": 0.31, "learning_rate": 0.0005, "loss": 2.2047, "step": 88000 }, { "epoch": 0.31, "learning_rate": 0.0005, "loss": 2.2189, "step": 88500 }, { "epoch": 0.32, "learning_rate": 0.0005, "loss": 2.2186, "step": 89000 }, { "epoch": 0.32, "learning_rate": 0.0005, "loss": 2.1882, "step": 89500 }, { "epoch": 0.32, "learning_rate": 0.0005, "loss": 2.2099, "step": 90000 }, { "epoch": 0.32, "eval_bleu": 12.9363, "eval_gen_len": 17.2027, "eval_runtime": 211.5928, "eval_samples_per_second": 14.178, "eval_steps_per_second": 1.772, "step": 90000 }, { "epoch": 0.32, "learning_rate": 0.0005, "loss": 2.2175, "step": 90500 }, { "epoch": 0.32, "learning_rate": 0.0005, "loss": 2.2035, "step": 91000 }, { "epoch": 0.33, "learning_rate": 0.0005, "loss": 2.2162, "step": 91500 }, { "epoch": 0.33, "learning_rate": 0.0005, "loss": 2.189, "step": 92000 }, { "epoch": 0.33, "learning_rate": 0.0005, "loss": 2.2124, "step": 92500 }, { "epoch": 0.33, "learning_rate": 0.0005, "loss": 2.2191, "step": 93000 }, { "epoch": 0.33, "learning_rate": 0.0005, "loss": 2.2225, "step": 93500 }, { "epoch": 0.33, "learning_rate": 0.0005, "loss": 2.1997, "step": 94000 }, { "epoch": 0.34, "learning_rate": 0.0005, "loss": 2.1941, "step": 94500 }, { "epoch": 0.34, "learning_rate": 0.0005, "loss": 2.2108, "step": 95000 }, { "epoch": 0.34, "learning_rate": 0.0005, "loss": 2.1919, "step": 95500 }, { "epoch": 0.34, "learning_rate": 0.0005, "loss": 2.197, "step": 96000 }, { "epoch": 0.34, "learning_rate": 0.0005, "loss": 2.2065, "step": 96500 }, { "epoch": 0.35, "learning_rate": 0.0005, "loss": 2.1878, "step": 97000 }, { "epoch": 0.35, "learning_rate": 0.0005, "loss": 2.1815, "step": 97500 }, { "epoch": 0.35, "learning_rate": 0.0005, "loss": 2.2084, "step": 98000 }, { "epoch": 0.35, "learning_rate": 0.0005, "loss": 2.1968, "step": 98500 }, { "epoch": 0.35, "learning_rate": 0.0005, "loss": 2.1875, "step": 99000 }, { "epoch": 0.35, "learning_rate": 0.0005, "loss": 2.1867, "step": 99500 }, { "epoch": 0.36, "learning_rate": 0.0005, "loss": 2.1648, "step": 100000 }, { "epoch": 0.36, "eval_bleu": 12.6533, "eval_gen_len": 17.2053, "eval_runtime": 210.3408, "eval_samples_per_second": 14.263, "eval_steps_per_second": 1.783, "step": 100000 }, { "epoch": 0.36, "learning_rate": 0.0005, "loss": 2.1614, "step": 100500 }, { "epoch": 0.36, "learning_rate": 0.0005, "loss": 2.1697, "step": 101000 }, { "epoch": 0.36, "learning_rate": 0.0005, "loss": 2.1723, "step": 101500 }, { "epoch": 0.36, "learning_rate": 0.0005, "loss": 2.1693, "step": 102000 }, { "epoch": 0.36, "learning_rate": 0.0005, "loss": 2.1926, "step": 102500 }, { "epoch": 0.37, "learning_rate": 0.0005, "loss": 2.1633, "step": 103000 }, { "epoch": 0.37, "learning_rate": 0.0005, "loss": 2.191, "step": 103500 }, { "epoch": 0.37, "learning_rate": 0.0005, "loss": 2.1545, "step": 104000 }, { "epoch": 0.37, "learning_rate": 0.0005, "loss": 2.1767, "step": 104500 }, { "epoch": 0.37, "learning_rate": 0.0005, "loss": 2.1776, "step": 105000 }, { "epoch": 0.38, "learning_rate": 0.0005, "loss": 2.1842, "step": 105500 }, { "epoch": 0.38, "learning_rate": 0.0005, "loss": 2.1691, "step": 106000 }, { "epoch": 0.38, "learning_rate": 0.0005, "loss": 2.1628, "step": 106500 }, { "epoch": 0.38, "learning_rate": 0.0005, "loss": 2.1709, "step": 107000 }, { "epoch": 0.38, "learning_rate": 0.0005, "loss": 2.1679, "step": 107500 }, { "epoch": 0.38, "learning_rate": 0.0005, "loss": 2.1679, "step": 108000 }, { "epoch": 0.39, "learning_rate": 0.0005, "loss": 2.1696, "step": 108500 }, { "epoch": 0.39, "learning_rate": 0.0005, "loss": 2.1717, "step": 109000 }, { "epoch": 0.39, "learning_rate": 0.0005, "loss": 2.1893, "step": 109500 }, { "epoch": 0.39, "learning_rate": 0.0005, "loss": 2.1695, "step": 110000 }, { "epoch": 0.39, "eval_bleu": 12.8902, "eval_gen_len": 17.2487, "eval_runtime": 211.7604, "eval_samples_per_second": 14.167, "eval_steps_per_second": 1.771, "step": 110000 }, { "epoch": 0.39, "learning_rate": 0.0005, "loss": 2.1338, "step": 110500 }, { "epoch": 0.39, "learning_rate": 0.0005, "loss": 2.2026, "step": 111000 }, { "epoch": 0.4, "learning_rate": 0.0005, "loss": 2.155, "step": 111500 }, { "epoch": 0.4, "learning_rate": 0.0005, "loss": 2.1416, "step": 112000 }, { "epoch": 0.4, "learning_rate": 0.0005, "loss": 2.1372, "step": 112500 }, { "epoch": 0.4, "learning_rate": 0.0005, "loss": 2.1753, "step": 113000 }, { "epoch": 0.4, "learning_rate": 0.0005, "loss": 2.1391, "step": 113500 }, { "epoch": 0.41, "learning_rate": 0.0005, "loss": 2.1305, "step": 114000 }, { "epoch": 0.41, "learning_rate": 0.0005, "loss": 2.099, "step": 114500 }, { "epoch": 0.41, "learning_rate": 0.0005, "loss": 2.1598, "step": 115000 }, { "epoch": 0.41, "learning_rate": 0.0005, "loss": 2.1628, "step": 115500 }, { "epoch": 0.41, "learning_rate": 0.0005, "loss": 2.1374, "step": 116000 }, { "epoch": 0.41, "learning_rate": 0.0005, "loss": 2.1699, "step": 116500 }, { "epoch": 0.42, "learning_rate": 0.0005, "loss": 2.1612, "step": 117000 }, { "epoch": 0.42, "learning_rate": 0.0005, "loss": 2.1554, "step": 117500 }, { "epoch": 0.42, "learning_rate": 0.0005, "loss": 2.1399, "step": 118000 }, { "epoch": 0.42, "learning_rate": 0.0005, "loss": 2.139, "step": 118500 }, { "epoch": 0.42, "learning_rate": 0.0005, "loss": 2.1397, "step": 119000 }, { "epoch": 0.43, "learning_rate": 0.0005, "loss": 2.1328, "step": 119500 }, { "epoch": 0.43, "learning_rate": 0.0005, "loss": 2.1567, "step": 120000 }, { "epoch": 0.43, "eval_bleu": 12.8997, "eval_gen_len": 17.247, "eval_runtime": 210.8317, "eval_samples_per_second": 14.229, "eval_steps_per_second": 1.779, "step": 120000 }, { "epoch": 0.43, "learning_rate": 0.0005, "loss": 2.137, "step": 120500 }, { "epoch": 0.43, "learning_rate": 0.0005, "loss": 2.1359, "step": 121000 }, { "epoch": 0.43, "learning_rate": 0.0005, "loss": 2.1319, "step": 121500 }, { "epoch": 0.43, "learning_rate": 0.0005, "loss": 2.1224, "step": 122000 }, { "epoch": 0.44, "learning_rate": 0.0005, "loss": 2.1555, "step": 122500 }, { "epoch": 0.44, "learning_rate": 0.0005, "loss": 2.1085, "step": 123000 }, { "epoch": 0.44, "learning_rate": 0.0005, "loss": 2.1293, "step": 123500 }, { "epoch": 0.44, "learning_rate": 0.0005, "loss": 2.1237, "step": 124000 }, { "epoch": 0.44, "learning_rate": 0.0005, "loss": 2.1444, "step": 124500 }, { "epoch": 0.44, "learning_rate": 0.0005, "loss": 2.1284, "step": 125000 }, { "epoch": 0.45, "learning_rate": 0.0005, "loss": 2.1433, "step": 125500 }, { "epoch": 0.45, "learning_rate": 0.0005, "loss": 2.1435, "step": 126000 }, { "epoch": 0.45, "learning_rate": 0.0005, "loss": 2.1512, "step": 126500 }, { "epoch": 0.45, "learning_rate": 0.0005, "loss": 2.1306, "step": 127000 }, { "epoch": 0.45, "learning_rate": 0.0005, "loss": 2.1311, "step": 127500 }, { "epoch": 0.46, "learning_rate": 0.0005, "loss": 2.1175, "step": 128000 }, { "epoch": 0.46, "learning_rate": 0.0005, "loss": 2.1288, "step": 128500 }, { "epoch": 0.46, "learning_rate": 0.0005, "loss": 2.102, "step": 129000 }, { "epoch": 0.46, "learning_rate": 0.0005, "loss": 2.1171, "step": 129500 }, { "epoch": 0.46, "learning_rate": 0.0005, "loss": 2.1323, "step": 130000 }, { "epoch": 0.46, "eval_bleu": 13.1654, "eval_gen_len": 17.275, "eval_runtime": 209.873, "eval_samples_per_second": 14.294, "eval_steps_per_second": 1.787, "step": 130000 }, { "epoch": 0.46, "learning_rate": 0.0005, "loss": 2.1132, "step": 130500 }, { "epoch": 0.47, "learning_rate": 0.0005, "loss": 2.1226, "step": 131000 }, { "epoch": 0.47, "learning_rate": 0.0005, "loss": 2.0993, "step": 131500 }, { "epoch": 0.47, "learning_rate": 0.0005, "loss": 2.1318, "step": 132000 }, { "epoch": 0.47, "learning_rate": 0.0005, "loss": 2.1263, "step": 132500 }, { "epoch": 0.47, "learning_rate": 0.0005, "loss": 2.1316, "step": 133000 }, { "epoch": 0.47, "learning_rate": 0.0005, "loss": 2.1146, "step": 133500 }, { "epoch": 0.48, "learning_rate": 0.0005, "loss": 2.1242, "step": 134000 }, { "epoch": 0.48, "learning_rate": 0.0005, "loss": 2.1184, "step": 134500 }, { "epoch": 0.48, "learning_rate": 0.0005, "loss": 2.1291, "step": 135000 }, { "epoch": 0.48, "learning_rate": 0.0005, "loss": 2.0707, "step": 135500 }, { "epoch": 0.48, "learning_rate": 0.0005, "loss": 2.0974, "step": 136000 }, { "epoch": 0.49, "learning_rate": 0.0005, "loss": 2.1385, "step": 136500 }, { "epoch": 0.49, "learning_rate": 0.0005, "loss": 2.1038, "step": 137000 }, { "epoch": 0.49, "learning_rate": 0.0005, "loss": 2.1058, "step": 137500 }, { "epoch": 0.49, "learning_rate": 0.0005, "loss": 2.1388, "step": 138000 }, { "epoch": 0.49, "learning_rate": 0.0005, "loss": 2.1259, "step": 138500 }, { "epoch": 0.49, "learning_rate": 0.0005, "loss": 2.1081, "step": 139000 }, { "epoch": 0.5, "learning_rate": 0.0005, "loss": 2.1273, "step": 139500 }, { "epoch": 0.5, "learning_rate": 0.0005, "loss": 2.0737, "step": 140000 }, { "epoch": 0.5, "eval_bleu": 13.3266, "eval_gen_len": 17.2593, "eval_runtime": 210.3807, "eval_samples_per_second": 14.26, "eval_steps_per_second": 1.782, "step": 140000 }, { "epoch": 0.5, "learning_rate": 0.0005, "loss": 2.1144, "step": 140500 }, { "epoch": 0.5, "learning_rate": 0.0005, "loss": 2.0973, "step": 141000 }, { "epoch": 0.5, "learning_rate": 0.0005, "loss": 2.1156, "step": 141500 }, { "epoch": 0.51, "learning_rate": 0.0005, "loss": 2.1344, "step": 142000 }, { "epoch": 0.51, "learning_rate": 0.0005, "loss": 2.1184, "step": 142500 }, { "epoch": 0.51, "learning_rate": 0.0005, "loss": 2.1114, "step": 143000 }, { "epoch": 0.51, "learning_rate": 0.0005, "loss": 2.0971, "step": 143500 }, { "epoch": 0.51, "learning_rate": 0.0005, "loss": 2.092, "step": 144000 }, { "epoch": 0.51, "learning_rate": 0.0005, "loss": 2.101, "step": 144500 }, { "epoch": 0.52, "learning_rate": 0.0005, "loss": 2.0607, "step": 145000 }, { "epoch": 0.52, "learning_rate": 0.0005, "loss": 2.1177, "step": 145500 }, { "epoch": 0.52, "learning_rate": 0.0005, "loss": 2.1142, "step": 146000 }, { "epoch": 0.52, "learning_rate": 0.0005, "loss": 2.1028, "step": 146500 }, { "epoch": 0.52, "learning_rate": 0.0005, "loss": 2.1253, "step": 147000 }, { "epoch": 0.52, "learning_rate": 0.0005, "loss": 2.0934, "step": 147500 }, { "epoch": 0.53, "learning_rate": 0.0005, "loss": 2.1133, "step": 148000 }, { "epoch": 0.53, "learning_rate": 0.0005, "loss": 2.0671, "step": 148500 }, { "epoch": 0.53, "learning_rate": 0.0005, "loss": 2.0993, "step": 149000 }, { "epoch": 0.53, "learning_rate": 0.0005, "loss": 2.1436, "step": 149500 }, { "epoch": 0.53, "learning_rate": 0.0005, "loss": 2.0909, "step": 150000 }, { "epoch": 0.53, "eval_bleu": 13.1593, "eval_gen_len": 17.2983, "eval_runtime": 212.1864, "eval_samples_per_second": 14.139, "eval_steps_per_second": 1.767, "step": 150000 }, { "epoch": 0.54, "learning_rate": 0.0005, "loss": 2.0918, "step": 150500 }, { "epoch": 0.54, "learning_rate": 0.0005, "loss": 2.1168, "step": 151000 }, { "epoch": 0.54, "learning_rate": 0.0005, "loss": 2.0985, "step": 151500 }, { "epoch": 0.54, "learning_rate": 0.0005, "loss": 2.0956, "step": 152000 }, { "epoch": 0.54, "learning_rate": 0.0005, "loss": 2.1099, "step": 152500 }, { "epoch": 0.54, "learning_rate": 0.0005, "loss": 2.0837, "step": 153000 }, { "epoch": 0.55, "learning_rate": 0.0005, "loss": 2.098, "step": 153500 }, { "epoch": 0.55, "learning_rate": 0.0005, "loss": 2.1008, "step": 154000 }, { "epoch": 0.55, "learning_rate": 0.0005, "loss": 2.1101, "step": 154500 }, { "epoch": 0.55, "learning_rate": 0.0005, "loss": 2.0986, "step": 155000 }, { "epoch": 0.55, "learning_rate": 0.0005, "loss": 2.1101, "step": 155500 }, { "epoch": 0.55, "learning_rate": 0.0005, "loss": 2.0842, "step": 156000 }, { "epoch": 0.56, "learning_rate": 0.0005, "loss": 2.1082, "step": 156500 }, { "epoch": 0.56, "learning_rate": 0.0005, "loss": 2.0941, "step": 157000 }, { "epoch": 0.56, "learning_rate": 0.0005, "loss": 2.0751, "step": 157500 }, { "epoch": 0.56, "learning_rate": 0.0005, "loss": 2.0916, "step": 158000 }, { "epoch": 0.56, "learning_rate": 0.0005, "loss": 2.0802, "step": 158500 }, { "epoch": 0.57, "learning_rate": 0.0005, "loss": 2.0879, "step": 159000 }, { "epoch": 0.57, "learning_rate": 0.0005, "loss": 2.0932, "step": 159500 }, { "epoch": 0.57, "learning_rate": 0.0005, "loss": 2.0947, "step": 160000 }, { "epoch": 0.57, "eval_bleu": 13.2815, "eval_gen_len": 17.229, "eval_runtime": 211.8324, "eval_samples_per_second": 14.162, "eval_steps_per_second": 1.77, "step": 160000 }, { "epoch": 0.57, "learning_rate": 0.0005, "loss": 2.0464, "step": 160500 }, { "epoch": 0.57, "learning_rate": 0.0005, "loss": 2.0811, "step": 161000 }, { "epoch": 0.57, "learning_rate": 0.0005, "loss": 2.0989, "step": 161500 }, { "epoch": 0.58, "learning_rate": 0.0005, "loss": 2.0888, "step": 162000 }, { "epoch": 0.58, "learning_rate": 0.0005, "loss": 2.0881, "step": 162500 }, { "epoch": 0.58, "learning_rate": 0.0005, "loss": 2.0728, "step": 163000 }, { "epoch": 0.58, "learning_rate": 0.0005, "loss": 2.0462, "step": 163500 }, { "epoch": 0.58, "learning_rate": 0.0005, "loss": 2.0762, "step": 164000 }, { "epoch": 0.59, "learning_rate": 0.0005, "loss": 2.0919, "step": 164500 }, { "epoch": 0.59, "learning_rate": 0.0005, "loss": 2.0736, "step": 165000 }, { "epoch": 0.59, "learning_rate": 0.0005, "loss": 2.0688, "step": 165500 }, { "epoch": 0.59, "learning_rate": 0.0005, "loss": 2.0949, "step": 166000 }, { "epoch": 0.59, "learning_rate": 0.0005, "loss": 2.0647, "step": 166500 }, { "epoch": 0.59, "learning_rate": 0.0005, "loss": 2.0942, "step": 167000 }, { "epoch": 0.6, "learning_rate": 0.0005, "loss": 2.0911, "step": 167500 }, { "epoch": 0.6, "learning_rate": 0.0005, "loss": 2.0855, "step": 168000 }, { "epoch": 0.6, "learning_rate": 0.0005, "loss": 2.0829, "step": 168500 }, { "epoch": 0.6, "learning_rate": 0.0005, "loss": 2.0626, "step": 169000 }, { "epoch": 0.6, "learning_rate": 0.0005, "loss": 2.0737, "step": 169500 }, { "epoch": 0.6, "learning_rate": 0.0005, "loss": 2.088, "step": 170000 }, { "epoch": 0.6, "eval_bleu": 13.2294, "eval_gen_len": 17.237, "eval_runtime": 211.594, "eval_samples_per_second": 14.178, "eval_steps_per_second": 1.772, "step": 170000 }, { "epoch": 0.61, "learning_rate": 0.0005, "loss": 2.0452, "step": 170500 }, { "epoch": 0.61, "learning_rate": 0.0005, "loss": 2.0526, "step": 171000 }, { "epoch": 0.61, "learning_rate": 0.0005, "loss": 2.0784, "step": 171500 }, { "epoch": 0.61, "learning_rate": 0.0005, "loss": 2.0585, "step": 172000 }, { "epoch": 0.61, "learning_rate": 0.0005, "loss": 2.0615, "step": 172500 }, { "epoch": 0.62, "learning_rate": 0.0005, "loss": 2.0605, "step": 173000 }, { "epoch": 0.62, "learning_rate": 0.0005, "loss": 2.0624, "step": 173500 }, { "epoch": 0.62, "learning_rate": 0.0005, "loss": 2.0654, "step": 174000 }, { "epoch": 0.62, "learning_rate": 0.0005, "loss": 2.0464, "step": 174500 }, { "epoch": 0.62, "learning_rate": 0.0005, "loss": 2.0611, "step": 175000 }, { "epoch": 0.62, "learning_rate": 0.0005, "loss": 2.094, "step": 175500 }, { "epoch": 0.63, "learning_rate": 0.0005, "loss": 2.0565, "step": 176000 }, { "epoch": 0.63, "learning_rate": 0.0005, "loss": 2.057, "step": 176500 }, { "epoch": 0.63, "learning_rate": 0.0005, "loss": 2.0681, "step": 177000 }, { "epoch": 0.63, "learning_rate": 0.0005, "loss": 2.0316, "step": 177500 }, { "epoch": 0.63, "learning_rate": 0.0005, "loss": 2.0713, "step": 178000 }, { "epoch": 0.63, "learning_rate": 0.0005, "loss": 2.0786, "step": 178500 }, { "epoch": 0.64, "learning_rate": 0.0005, "loss": 2.0669, "step": 179000 }, { "epoch": 0.64, "learning_rate": 0.0005, "loss": 2.0505, "step": 179500 }, { "epoch": 0.64, "learning_rate": 0.0005, "loss": 2.0823, "step": 180000 }, { "epoch": 0.64, "eval_bleu": 13.3117, "eval_gen_len": 17.3163, "eval_runtime": 210.849, "eval_samples_per_second": 14.228, "eval_steps_per_second": 1.779, "step": 180000 }, { "epoch": 0.64, "learning_rate": 0.0005, "loss": 2.0561, "step": 180500 }, { "epoch": 0.64, "learning_rate": 0.0005, "loss": 2.0381, "step": 181000 }, { "epoch": 0.65, "learning_rate": 0.0005, "loss": 2.0503, "step": 181500 }, { "epoch": 0.65, "learning_rate": 0.0005, "loss": 2.0706, "step": 182000 }, { "epoch": 0.65, "learning_rate": 0.0005, "loss": 2.0668, "step": 182500 }, { "epoch": 0.65, "learning_rate": 0.0005, "loss": 2.0657, "step": 183000 }, { "epoch": 0.65, "learning_rate": 0.0005, "loss": 2.0382, "step": 183500 }, { "epoch": 0.65, "learning_rate": 0.0005, "loss": 2.052, "step": 184000 }, { "epoch": 0.66, "learning_rate": 0.0005, "loss": 2.0755, "step": 184500 }, { "epoch": 0.66, "learning_rate": 0.0005, "loss": 2.0571, "step": 185000 }, { "epoch": 0.66, "learning_rate": 0.0005, "loss": 2.0192, "step": 185500 }, { "epoch": 0.66, "learning_rate": 0.0005, "loss": 2.0534, "step": 186000 }, { "epoch": 0.66, "learning_rate": 0.0005, "loss": 2.0545, "step": 186500 }, { "epoch": 0.67, "learning_rate": 0.0005, "loss": 2.0608, "step": 187000 }, { "epoch": 0.67, "learning_rate": 0.0005, "loss": 2.0693, "step": 187500 }, { "epoch": 0.67, "learning_rate": 0.0005, "loss": 2.0451, "step": 188000 }, { "epoch": 0.67, "learning_rate": 0.0005, "loss": 2.061, "step": 188500 }, { "epoch": 0.67, "learning_rate": 0.0005, "loss": 2.0575, "step": 189000 }, { "epoch": 0.67, "learning_rate": 0.0005, "loss": 2.0487, "step": 189500 }, { "epoch": 0.68, "learning_rate": 0.0005, "loss": 2.0386, "step": 190000 }, { "epoch": 0.68, "eval_bleu": 13.3541, "eval_gen_len": 17.222, "eval_runtime": 211.7397, "eval_samples_per_second": 14.168, "eval_steps_per_second": 1.771, "step": 190000 }, { "epoch": 0.68, "learning_rate": 0.0005, "loss": 2.0442, "step": 190500 }, { "epoch": 0.68, "learning_rate": 0.0005, "loss": 2.0464, "step": 191000 }, { "epoch": 0.68, "learning_rate": 0.0005, "loss": 2.0499, "step": 191500 }, { "epoch": 0.68, "learning_rate": 0.0005, "loss": 2.0559, "step": 192000 }, { "epoch": 0.68, "learning_rate": 0.0005, "loss": 2.0488, "step": 192500 }, { "epoch": 0.69, "learning_rate": 0.0005, "loss": 2.0179, "step": 193000 }, { "epoch": 0.69, "learning_rate": 0.0005, "loss": 2.0409, "step": 193500 }, { "epoch": 0.69, "learning_rate": 0.0005, "loss": 2.0508, "step": 194000 }, { "epoch": 0.69, "learning_rate": 0.0005, "loss": 2.0173, "step": 194500 }, { "epoch": 0.69, "learning_rate": 0.0005, "loss": 2.0401, "step": 195000 }, { "epoch": 0.7, "learning_rate": 0.0005, "loss": 2.0539, "step": 195500 }, { "epoch": 0.7, "learning_rate": 0.0005, "loss": 2.0666, "step": 196000 }, { "epoch": 0.7, "learning_rate": 0.0005, "loss": 2.0347, "step": 196500 }, { "epoch": 0.7, "learning_rate": 0.0005, "loss": 2.0508, "step": 197000 }, { "epoch": 0.7, "learning_rate": 0.0005, "loss": 2.0324, "step": 197500 }, { "epoch": 0.7, "learning_rate": 0.0005, "loss": 2.0521, "step": 198000 }, { "epoch": 0.71, "learning_rate": 0.0005, "loss": 2.0399, "step": 198500 }, { "epoch": 0.71, "learning_rate": 0.0005, "loss": 2.052, "step": 199000 }, { "epoch": 0.71, "learning_rate": 0.0005, "loss": 2.0318, "step": 199500 }, { "epoch": 0.71, "learning_rate": 0.0005, "loss": 2.0431, "step": 200000 }, { "epoch": 0.71, "eval_bleu": 13.6742, "eval_gen_len": 17.2567, "eval_runtime": 210.4095, "eval_samples_per_second": 14.258, "eval_steps_per_second": 1.782, "step": 200000 }, { "epoch": 0.71, "learning_rate": 0.0005, "loss": 2.0626, "step": 200500 }, { "epoch": 0.71, "learning_rate": 0.0005, "loss": 2.0585, "step": 201000 }, { "epoch": 0.72, "learning_rate": 0.0005, "loss": 2.0146, "step": 201500 }, { "epoch": 0.72, "learning_rate": 0.0005, "loss": 2.0478, "step": 202000 }, { "epoch": 0.72, "learning_rate": 0.0005, "loss": 2.0633, "step": 202500 }, { "epoch": 0.72, "learning_rate": 0.0005, "loss": 2.0407, "step": 203000 }, { "epoch": 0.72, "learning_rate": 0.0005, "loss": 2.0512, "step": 203500 }, { "epoch": 0.73, "learning_rate": 0.0005, "loss": 2.0373, "step": 204000 }, { "epoch": 0.73, "learning_rate": 0.0005, "loss": 2.0143, "step": 204500 }, { "epoch": 0.73, "learning_rate": 0.0005, "loss": 2.0394, "step": 205000 }, { "epoch": 0.73, "learning_rate": 0.0005, "loss": 2.0208, "step": 205500 }, { "epoch": 0.73, "learning_rate": 0.0005, "loss": 2.0145, "step": 206000 }, { "epoch": 0.73, "learning_rate": 0.0005, "loss": 2.0213, "step": 206500 }, { "epoch": 0.74, "learning_rate": 0.0005, "loss": 2.0238, "step": 207000 }, { "epoch": 0.74, "learning_rate": 0.0005, "loss": 2.0331, "step": 207500 }, { "epoch": 0.74, "learning_rate": 0.0005, "loss": 2.0456, "step": 208000 }, { "epoch": 0.74, "learning_rate": 0.0005, "loss": 2.0109, "step": 208500 }, { "epoch": 0.74, "learning_rate": 0.0005, "loss": 2.0352, "step": 209000 }, { "epoch": 0.75, "learning_rate": 0.0005, "loss": 2.0131, "step": 209500 }, { "epoch": 0.75, "learning_rate": 0.0005, "loss": 2.0419, "step": 210000 }, { "epoch": 0.75, "eval_bleu": 13.2481, "eval_gen_len": 17.2613, "eval_runtime": 212.253, "eval_samples_per_second": 14.134, "eval_steps_per_second": 1.767, "step": 210000 }, { "epoch": 0.75, "learning_rate": 0.0005, "loss": 2.0275, "step": 210500 }, { "epoch": 0.75, "learning_rate": 0.0005, "loss": 2.0378, "step": 211000 }, { "epoch": 0.75, "learning_rate": 0.0005, "loss": 2.042, "step": 211500 }, { "epoch": 0.75, "learning_rate": 0.0005, "loss": 2.0247, "step": 212000 }, { "epoch": 0.76, "learning_rate": 0.0005, "loss": 2.0291, "step": 212500 }, { "epoch": 0.76, "learning_rate": 0.0005, "loss": 2.0191, "step": 213000 }, { "epoch": 0.76, "learning_rate": 0.0005, "loss": 2.0086, "step": 213500 }, { "epoch": 0.76, "learning_rate": 0.0005, "loss": 2.0508, "step": 214000 }, { "epoch": 0.76, "learning_rate": 0.0005, "loss": 2.0122, "step": 214500 }, { "epoch": 0.76, "learning_rate": 0.0005, "loss": 2.0123, "step": 215000 }, { "epoch": 0.77, "learning_rate": 0.0005, "loss": 2.0295, "step": 215500 }, { "epoch": 0.77, "learning_rate": 0.0005, "loss": 2.0152, "step": 216000 }, { "epoch": 0.77, "learning_rate": 0.0005, "loss": 2.0275, "step": 216500 }, { "epoch": 0.77, "learning_rate": 0.0005, "loss": 2.0317, "step": 217000 }, { "epoch": 0.77, "learning_rate": 0.0005, "loss": 1.9995, "step": 217500 }, { "epoch": 0.78, "learning_rate": 0.0005, "loss": 2.0266, "step": 218000 }, { "epoch": 0.78, "learning_rate": 0.0005, "loss": 2.0337, "step": 218500 }, { "epoch": 0.78, "learning_rate": 0.0005, "loss": 2.0399, "step": 219000 }, { "epoch": 0.78, "learning_rate": 0.0005, "loss": 2.0263, "step": 219500 }, { "epoch": 0.78, "learning_rate": 0.0005, "loss": 2.0097, "step": 220000 }, { "epoch": 0.78, "eval_bleu": 13.6269, "eval_gen_len": 17.29, "eval_runtime": 210.1335, "eval_samples_per_second": 14.277, "eval_steps_per_second": 1.785, "step": 220000 }, { "epoch": 0.78, "learning_rate": 0.0005, "loss": 1.9963, "step": 220500 }, { "epoch": 0.79, "learning_rate": 0.0005, "loss": 2.0173, "step": 221000 }, { "epoch": 0.79, "learning_rate": 0.0005, "loss": 2.0184, "step": 221500 }, { "epoch": 0.79, "learning_rate": 0.0005, "loss": 1.99, "step": 222000 }, { "epoch": 0.79, "learning_rate": 0.0005, "loss": 2.0159, "step": 222500 }, { "epoch": 0.79, "learning_rate": 0.0005, "loss": 2.015, "step": 223000 }, { "epoch": 0.79, "learning_rate": 0.0005, "loss": 2.0349, "step": 223500 }, { "epoch": 0.8, "learning_rate": 0.0005, "loss": 2.015, "step": 224000 }, { "epoch": 0.8, "learning_rate": 0.0005, "loss": 2.0145, "step": 224500 }, { "epoch": 0.8, "learning_rate": 0.0005, "loss": 2.0142, "step": 225000 }, { "epoch": 0.8, "learning_rate": 0.0005, "loss": 2.0085, "step": 225500 }, { "epoch": 0.8, "learning_rate": 0.0005, "loss": 2.0099, "step": 226000 }, { "epoch": 0.81, "learning_rate": 0.0005, "loss": 2.0267, "step": 226500 }, { "epoch": 0.81, "learning_rate": 0.0005, "loss": 2.0294, "step": 227000 }, { "epoch": 0.81, "learning_rate": 0.0005, "loss": 2.0256, "step": 227500 }, { "epoch": 0.81, "learning_rate": 0.0005, "loss": 1.9883, "step": 228000 }, { "epoch": 0.81, "learning_rate": 0.0005, "loss": 2.0429, "step": 228500 }, { "epoch": 0.81, "learning_rate": 0.0005, "loss": 1.9926, "step": 229000 }, { "epoch": 0.82, "learning_rate": 0.0005, "loss": 2.0223, "step": 229500 }, { "epoch": 0.82, "learning_rate": 0.0005, "loss": 2.0184, "step": 230000 }, { "epoch": 0.82, "eval_bleu": 13.5436, "eval_gen_len": 17.262, "eval_runtime": 211.2793, "eval_samples_per_second": 14.199, "eval_steps_per_second": 1.775, "step": 230000 }, { "epoch": 0.82, "learning_rate": 0.0005, "loss": 2.0164, "step": 230500 }, { "epoch": 0.82, "learning_rate": 0.0005, "loss": 2.01, "step": 231000 }, { "epoch": 0.82, "learning_rate": 0.0005, "loss": 2.0199, "step": 231500 }, { "epoch": 0.83, "learning_rate": 0.0005, "loss": 2.0162, "step": 232000 }, { "epoch": 0.83, "learning_rate": 0.0005, "loss": 1.9946, "step": 232500 }, { "epoch": 0.83, "learning_rate": 0.0005, "loss": 2.0271, "step": 233000 }, { "epoch": 0.83, "learning_rate": 0.0005, "loss": 2.0181, "step": 233500 }, { "epoch": 0.83, "learning_rate": 0.0005, "loss": 1.9774, "step": 234000 }, { "epoch": 0.83, "learning_rate": 0.0005, "loss": 1.9995, "step": 234500 }, { "epoch": 0.84, "learning_rate": 0.0005, "loss": 2.0043, "step": 235000 }, { "epoch": 0.84, "learning_rate": 0.0005, "loss": 1.9856, "step": 235500 }, { "epoch": 0.84, "learning_rate": 0.0005, "loss": 2.019, "step": 236000 }, { "epoch": 0.84, "learning_rate": 0.0005, "loss": 2.0151, "step": 236500 }, { "epoch": 0.84, "learning_rate": 0.0005, "loss": 1.9811, "step": 237000 }, { "epoch": 0.84, "learning_rate": 0.0005, "loss": 1.976, "step": 237500 }, { "epoch": 0.85, "learning_rate": 0.0005, "loss": 2.0171, "step": 238000 }, { "epoch": 0.85, "learning_rate": 0.0005, "loss": 2.0226, "step": 238500 }, { "epoch": 0.85, "learning_rate": 0.0005, "loss": 1.9925, "step": 239000 }, { "epoch": 0.85, "learning_rate": 0.0005, "loss": 2.0103, "step": 239500 }, { "epoch": 0.85, "learning_rate": 0.0005, "loss": 2.0321, "step": 240000 }, { "epoch": 0.85, "eval_bleu": 13.691, "eval_gen_len": 17.2677, "eval_runtime": 211.7437, "eval_samples_per_second": 14.168, "eval_steps_per_second": 1.771, "step": 240000 }, { "epoch": 0.86, "learning_rate": 0.0005, "loss": 1.9975, "step": 240500 }, { "epoch": 0.86, "learning_rate": 0.0005, "loss": 1.9957, "step": 241000 }, { "epoch": 0.86, "learning_rate": 0.0005, "loss": 2.0085, "step": 241500 }, { "epoch": 0.86, "learning_rate": 0.0005, "loss": 2.0071, "step": 242000 }, { "epoch": 0.86, "learning_rate": 0.0005, "loss": 2.0153, "step": 242500 }, { "epoch": 0.86, "learning_rate": 0.0005, "loss": 2.0103, "step": 243000 }, { "epoch": 0.87, "learning_rate": 0.0005, "loss": 2.0012, "step": 243500 }, { "epoch": 0.87, "learning_rate": 0.0005, "loss": 1.9858, "step": 244000 }, { "epoch": 0.87, "learning_rate": 0.0005, "loss": 1.9878, "step": 244500 }, { "epoch": 0.87, "learning_rate": 0.0005, "loss": 2.0115, "step": 245000 }, { "epoch": 0.87, "learning_rate": 0.0005, "loss": 2.0136, "step": 245500 }, { "epoch": 0.87, "learning_rate": 0.0005, "loss": 2.0159, "step": 246000 }, { "epoch": 0.88, "learning_rate": 0.0005, "loss": 1.9778, "step": 246500 }, { "epoch": 0.88, "learning_rate": 0.0005, "loss": 1.976, "step": 247000 }, { "epoch": 0.88, "learning_rate": 0.0005, "loss": 2.0054, "step": 247500 }, { "epoch": 0.88, "learning_rate": 0.0005, "loss": 2.0282, "step": 248000 }, { "epoch": 0.88, "learning_rate": 0.0005, "loss": 1.9811, "step": 248500 }, { "epoch": 0.89, "learning_rate": 0.0005, "loss": 1.9773, "step": 249000 }, { "epoch": 0.89, "learning_rate": 0.0005, "loss": 1.9856, "step": 249500 }, { "epoch": 0.89, "learning_rate": 0.0005, "loss": 1.9836, "step": 250000 }, { "epoch": 0.89, "eval_bleu": 13.8262, "eval_gen_len": 17.2433, "eval_runtime": 209.5455, "eval_samples_per_second": 14.317, "eval_steps_per_second": 1.79, "step": 250000 }, { "epoch": 0.89, "learning_rate": 0.0005, "loss": 2.0082, "step": 250500 }, { "epoch": 0.89, "learning_rate": 0.0005, "loss": 2.0006, "step": 251000 }, { "epoch": 0.89, "learning_rate": 0.0005, "loss": 2.0035, "step": 251500 }, { "epoch": 0.9, "learning_rate": 0.0005, "loss": 1.9641, "step": 252000 }, { "epoch": 0.9, "learning_rate": 0.0005, "loss": 1.9869, "step": 252500 }, { "epoch": 0.9, "learning_rate": 0.0005, "loss": 1.9964, "step": 253000 }, { "epoch": 0.9, "learning_rate": 0.0005, "loss": 2.0015, "step": 253500 }, { "epoch": 0.9, "learning_rate": 0.0005, "loss": 2.0152, "step": 254000 }, { "epoch": 0.91, "learning_rate": 0.0005, "loss": 1.9973, "step": 254500 }, { "epoch": 0.91, "learning_rate": 0.0005, "loss": 1.9884, "step": 255000 }, { "epoch": 0.91, "learning_rate": 0.0005, "loss": 2.007, "step": 255500 }, { "epoch": 0.91, "learning_rate": 0.0005, "loss": 2.0142, "step": 256000 }, { "epoch": 0.91, "learning_rate": 0.0005, "loss": 2.0124, "step": 256500 }, { "epoch": 0.91, "learning_rate": 0.0005, "loss": 2.0111, "step": 257000 }, { "epoch": 0.92, "learning_rate": 0.0005, "loss": 1.979, "step": 257500 }, { "epoch": 0.92, "learning_rate": 0.0005, "loss": 1.9947, "step": 258000 }, { "epoch": 0.92, "learning_rate": 0.0005, "loss": 1.985, "step": 258500 }, { "epoch": 0.92, "learning_rate": 0.0005, "loss": 1.9802, "step": 259000 }, { "epoch": 0.92, "learning_rate": 0.0005, "loss": 2.0062, "step": 259500 }, { "epoch": 0.92, "learning_rate": 0.0005, "loss": 1.9921, "step": 260000 }, { "epoch": 0.92, "eval_bleu": 13.6876, "eval_gen_len": 17.254, "eval_runtime": 210.6894, "eval_samples_per_second": 14.239, "eval_steps_per_second": 1.78, "step": 260000 }, { "epoch": 0.93, "learning_rate": 0.0005, "loss": 1.9817, "step": 260500 }, { "epoch": 0.93, "learning_rate": 0.0005, "loss": 1.9949, "step": 261000 }, { "epoch": 0.93, "learning_rate": 0.0005, "loss": 1.9769, "step": 261500 }, { "epoch": 0.93, "learning_rate": 0.0005, "loss": 1.9957, "step": 262000 }, { "epoch": 0.93, "learning_rate": 0.0005, "loss": 2.0072, "step": 262500 }, { "epoch": 0.94, "learning_rate": 0.0005, "loss": 1.9858, "step": 263000 }, { "epoch": 0.94, "learning_rate": 0.0005, "loss": 1.9857, "step": 263500 }, { "epoch": 0.94, "learning_rate": 0.0005, "loss": 1.9634, "step": 264000 }, { "epoch": 0.94, "learning_rate": 0.0005, "loss": 1.9949, "step": 264500 }, { "epoch": 0.94, "learning_rate": 0.0005, "loss": 1.9898, "step": 265000 }, { "epoch": 0.94, "learning_rate": 0.0005, "loss": 2.0065, "step": 265500 }, { "epoch": 0.95, "learning_rate": 0.0005, "loss": 1.9879, "step": 266000 }, { "epoch": 0.95, "learning_rate": 0.0005, "loss": 1.9811, "step": 266500 }, { "epoch": 0.95, "learning_rate": 0.0005, "loss": 1.9486, "step": 267000 }, { "epoch": 0.95, "learning_rate": 0.0005, "loss": 1.9941, "step": 267500 }, { "epoch": 0.95, "learning_rate": 0.0005, "loss": 1.9992, "step": 268000 }, { "epoch": 0.95, "learning_rate": 0.0005, "loss": 2.0114, "step": 268500 }, { "epoch": 0.96, "learning_rate": 0.0005, "loss": 1.9735, "step": 269000 }, { "epoch": 0.96, "learning_rate": 0.0005, "loss": 1.9851, "step": 269500 }, { "epoch": 0.96, "learning_rate": 0.0005, "loss": 1.9978, "step": 270000 }, { "epoch": 0.96, "eval_bleu": 13.5384, "eval_gen_len": 17.2433, "eval_runtime": 210.4598, "eval_samples_per_second": 14.255, "eval_steps_per_second": 1.782, "step": 270000 }, { "epoch": 0.96, "learning_rate": 0.0005, "loss": 2.0171, "step": 270500 }, { "epoch": 0.96, "learning_rate": 0.0005, "loss": 1.9665, "step": 271000 }, { "epoch": 0.97, "learning_rate": 0.0005, "loss": 1.977, "step": 271500 }, { "epoch": 0.97, "learning_rate": 0.0005, "loss": 1.9822, "step": 272000 }, { "epoch": 0.97, "learning_rate": 0.0005, "loss": 1.994, "step": 272500 }, { "epoch": 0.97, "learning_rate": 0.0005, "loss": 1.9753, "step": 273000 }, { "epoch": 0.97, "learning_rate": 0.0005, "loss": 1.975, "step": 273500 }, { "epoch": 0.97, "learning_rate": 0.0005, "loss": 1.9895, "step": 274000 }, { "epoch": 0.98, "learning_rate": 0.0005, "loss": 1.9764, "step": 274500 }, { "epoch": 0.98, "learning_rate": 0.0005, "loss": 1.9882, "step": 275000 }, { "epoch": 0.98, "learning_rate": 0.0005, "loss": 1.9879, "step": 275500 }, { "epoch": 0.98, "learning_rate": 0.0005, "loss": 1.9878, "step": 276000 }, { "epoch": 0.98, "learning_rate": 0.0005, "loss": 1.9501, "step": 276500 }, { "epoch": 0.99, "learning_rate": 0.0005, "loss": 1.9841, "step": 277000 }, { "epoch": 0.99, "learning_rate": 0.0005, "loss": 1.9862, "step": 277500 }, { "epoch": 0.99, "learning_rate": 0.0005, "loss": 1.9768, "step": 278000 }, { "epoch": 0.99, "learning_rate": 0.0005, "loss": 1.9893, "step": 278500 }, { "epoch": 0.99, "learning_rate": 0.0005, "loss": 1.9812, "step": 279000 }, { "epoch": 0.99, "learning_rate": 0.0005, "loss": 1.9696, "step": 279500 }, { "epoch": 1.0, "learning_rate": 0.0005, "loss": 1.9842, "step": 280000 }, { "epoch": 1.0, "eval_bleu": 13.7593, "eval_gen_len": 17.2593, "eval_runtime": 208.654, "eval_samples_per_second": 14.378, "eval_steps_per_second": 1.797, "step": 280000 }, { "epoch": 1.0, "learning_rate": 0.0005, "loss": 1.979, "step": 280500 }, { "epoch": 1.0, "learning_rate": 0.0005, "loss": 1.9768, "step": 281000 }, { "epoch": 1.0, "learning_rate": 0.0005, "loss": 1.9605, "step": 281500 }, { "epoch": 1.0, "learning_rate": 0.0005, "loss": 1.9358, "step": 282000 }, { "epoch": 1.0, "learning_rate": 0.0005, "loss": 1.9724, "step": 282500 }, { "epoch": 1.01, "learning_rate": 0.0005, "loss": 1.935, "step": 283000 }, { "epoch": 1.01, "learning_rate": 0.0005, "loss": 1.93, "step": 283500 }, { "epoch": 1.01, "learning_rate": 0.0005, "loss": 1.9566, "step": 284000 }, { "epoch": 1.01, "learning_rate": 0.0005, "loss": 1.9272, "step": 284500 }, { "epoch": 1.01, "learning_rate": 0.0005, "loss": 1.9274, "step": 285000 }, { "epoch": 1.02, "learning_rate": 0.0005, "loss": 1.965, "step": 285500 }, { "epoch": 1.02, "learning_rate": 0.0005, "loss": 1.942, "step": 286000 }, { "epoch": 1.02, "learning_rate": 0.0005, "loss": 1.9553, "step": 286500 }, { "epoch": 1.02, "learning_rate": 0.0005, "loss": 1.9291, "step": 287000 }, { "epoch": 1.02, "learning_rate": 0.0005, "loss": 1.9528, "step": 287500 }, { "epoch": 1.02, "learning_rate": 0.0005, "loss": 1.9503, "step": 288000 }, { "epoch": 1.03, "learning_rate": 0.0005, "loss": 1.9189, "step": 288500 }, { "epoch": 1.03, "learning_rate": 0.0005, "loss": 1.9322, "step": 289000 }, { "epoch": 1.03, "learning_rate": 0.0005, "loss": 1.9466, "step": 289500 }, { "epoch": 1.03, "learning_rate": 0.0005, "loss": 1.9481, "step": 290000 }, { "epoch": 1.03, "eval_bleu": 13.7266, "eval_gen_len": 17.2537, "eval_runtime": 210.3298, "eval_samples_per_second": 14.263, "eval_steps_per_second": 1.783, "step": 290000 }, { "epoch": 1.03, "learning_rate": 0.0005, "loss": 1.9061, "step": 290500 }, { "epoch": 1.03, "learning_rate": 0.0005, "loss": 1.9063, "step": 291000 }, { "epoch": 1.04, "learning_rate": 0.0005, "loss": 1.9396, "step": 291500 }, { "epoch": 1.04, "learning_rate": 0.0005, "loss": 1.9513, "step": 292000 }, { "epoch": 1.04, "learning_rate": 0.0005, "loss": 1.9753, "step": 292500 }, { "epoch": 1.04, "learning_rate": 0.0005, "loss": 1.9343, "step": 293000 }, { "epoch": 1.04, "learning_rate": 0.0005, "loss": 1.9548, "step": 293500 }, { "epoch": 1.05, "learning_rate": 0.0005, "loss": 1.9354, "step": 294000 }, { "epoch": 1.05, "learning_rate": 0.0005, "loss": 1.9324, "step": 294500 }, { "epoch": 1.05, "learning_rate": 0.0005, "loss": 1.9457, "step": 295000 }, { "epoch": 1.05, "learning_rate": 0.0005, "loss": 1.9278, "step": 295500 }, { "epoch": 1.05, "learning_rate": 0.0005, "loss": 1.901, "step": 296000 }, { "epoch": 1.05, "learning_rate": 0.0005, "loss": 1.9599, "step": 296500 }, { "epoch": 1.06, "learning_rate": 0.0005, "loss": 1.9593, "step": 297000 }, { "epoch": 1.06, "learning_rate": 0.0005, "loss": 1.9276, "step": 297500 }, { "epoch": 1.06, "learning_rate": 0.0005, "loss": 1.9408, "step": 298000 }, { "epoch": 1.06, "learning_rate": 0.0005, "loss": 1.9509, "step": 298500 }, { "epoch": 1.06, "learning_rate": 0.0005, "loss": 1.9509, "step": 299000 }, { "epoch": 1.07, "learning_rate": 0.0005, "loss": 1.9437, "step": 299500 }, { "epoch": 1.07, "learning_rate": 0.0005, "loss": 1.9808, "step": 300000 }, { "epoch": 1.07, "eval_bleu": 13.9025, "eval_gen_len": 17.217, "eval_runtime": 210.0366, "eval_samples_per_second": 14.283, "eval_steps_per_second": 1.785, "step": 300000 }, { "epoch": 1.07, "learning_rate": 0.0005, "loss": 1.9484, "step": 300500 }, { "epoch": 1.07, "learning_rate": 0.0005, "loss": 1.9574, "step": 301000 }, { "epoch": 1.07, "learning_rate": 0.0005, "loss": 1.9477, "step": 301500 }, { "epoch": 1.07, "learning_rate": 0.0005, "loss": 1.9743, "step": 302000 }, { "epoch": 1.08, "learning_rate": 0.0005, "loss": 1.93, "step": 302500 }, { "epoch": 1.08, "learning_rate": 0.0005, "loss": 1.9117, "step": 303000 }, { "epoch": 1.08, "learning_rate": 0.0005, "loss": 1.9505, "step": 303500 }, { "epoch": 1.08, "learning_rate": 0.0005, "loss": 1.9717, "step": 304000 }, { "epoch": 1.08, "learning_rate": 0.0005, "loss": 1.9508, "step": 304500 }, { "epoch": 1.08, "learning_rate": 0.0005, "loss": 1.9408, "step": 305000 }, { "epoch": 1.09, "learning_rate": 0.0005, "loss": 1.9244, "step": 305500 }, { "epoch": 1.09, "learning_rate": 0.0005, "loss": 1.9406, "step": 306000 }, { "epoch": 1.09, "learning_rate": 0.0005, "loss": 1.9396, "step": 306500 }, { "epoch": 1.09, "learning_rate": 0.0005, "loss": 1.9509, "step": 307000 }, { "epoch": 1.09, "learning_rate": 0.0005, "loss": 1.9056, "step": 307500 }, { "epoch": 1.1, "learning_rate": 0.0005, "loss": 1.931, "step": 308000 }, { "epoch": 1.1, "learning_rate": 0.0005, "loss": 1.9528, "step": 308500 }, { "epoch": 1.1, "learning_rate": 0.0005, "loss": 1.9154, "step": 309000 }, { "epoch": 1.1, "learning_rate": 0.0005, "loss": 1.93, "step": 309500 }, { "epoch": 1.1, "learning_rate": 0.0005, "loss": 1.929, "step": 310000 }, { "epoch": 1.1, "eval_bleu": 13.7188, "eval_gen_len": 17.2703, "eval_runtime": 209.7351, "eval_samples_per_second": 14.304, "eval_steps_per_second": 1.788, "step": 310000 }, { "epoch": 1.1, "learning_rate": 0.0005, "loss": 1.95, "step": 310500 }, { "epoch": 1.11, "learning_rate": 0.0005, "loss": 1.9401, "step": 311000 }, { "epoch": 1.11, "learning_rate": 0.0005, "loss": 1.943, "step": 311500 }, { "epoch": 1.11, "learning_rate": 0.0005, "loss": 1.9197, "step": 312000 }, { "epoch": 1.11, "learning_rate": 0.0005, "loss": 1.9421, "step": 312500 }, { "epoch": 1.11, "learning_rate": 0.0005, "loss": 1.9385, "step": 313000 }, { "epoch": 1.11, "learning_rate": 0.0005, "loss": 1.9182, "step": 313500 }, { "epoch": 1.12, "learning_rate": 0.0005, "loss": 1.9445, "step": 314000 }, { "epoch": 1.12, "learning_rate": 0.0005, "loss": 1.9132, "step": 314500 }, { "epoch": 1.12, "learning_rate": 0.0005, "loss": 1.9134, "step": 315000 }, { "epoch": 1.12, "learning_rate": 0.0005, "loss": 1.9551, "step": 315500 }, { "epoch": 1.12, "learning_rate": 0.0005, "loss": 1.9572, "step": 316000 }, { "epoch": 1.13, "learning_rate": 0.0005, "loss": 1.9672, "step": 316500 }, { "epoch": 1.13, "learning_rate": 0.0005, "loss": 1.9485, "step": 317000 }, { "epoch": 1.13, "learning_rate": 0.0005, "loss": 1.9575, "step": 317500 }, { "epoch": 1.13, "learning_rate": 0.0005, "loss": 1.9296, "step": 318000 }, { "epoch": 1.13, "learning_rate": 0.0005, "loss": 1.9572, "step": 318500 }, { "epoch": 1.13, "learning_rate": 0.0005, "loss": 1.9276, "step": 319000 }, { "epoch": 1.14, "learning_rate": 0.0005, "loss": 1.9512, "step": 319500 }, { "epoch": 1.14, "learning_rate": 0.0005, "loss": 1.9493, "step": 320000 }, { "epoch": 1.14, "eval_bleu": 13.8795, "eval_gen_len": 17.252, "eval_runtime": 210.6151, "eval_samples_per_second": 14.244, "eval_steps_per_second": 1.78, "step": 320000 }, { "epoch": 1.14, "learning_rate": 0.0005, "loss": 1.9136, "step": 320500 }, { "epoch": 1.14, "learning_rate": 0.0005, "loss": 1.9206, "step": 321000 }, { "epoch": 1.14, "learning_rate": 0.0005, "loss": 1.9541, "step": 321500 }, { "epoch": 1.15, "learning_rate": 0.0005, "loss": 1.9498, "step": 322000 }, { "epoch": 1.15, "learning_rate": 0.0005, "loss": 1.9422, "step": 322500 }, { "epoch": 1.15, "learning_rate": 0.0005, "loss": 1.9356, "step": 323000 }, { "epoch": 1.15, "learning_rate": 0.0005, "loss": 1.9289, "step": 323500 }, { "epoch": 1.15, "learning_rate": 0.0005, "loss": 1.9355, "step": 324000 }, { "epoch": 1.15, "learning_rate": 0.0005, "loss": 1.9501, "step": 324500 }, { "epoch": 1.16, "learning_rate": 0.0005, "loss": 1.9092, "step": 325000 }, { "epoch": 1.16, "learning_rate": 0.0005, "loss": 1.9402, "step": 325500 }, { "epoch": 1.16, "learning_rate": 0.0005, "loss": 1.9522, "step": 326000 }, { "epoch": 1.16, "learning_rate": 0.0005, "loss": 1.9143, "step": 326500 }, { "epoch": 1.16, "learning_rate": 0.0005, "loss": 1.9259, "step": 327000 }, { "epoch": 1.16, "learning_rate": 0.0005, "loss": 1.9324, "step": 327500 }, { "epoch": 1.17, "learning_rate": 0.0005, "loss": 1.9349, "step": 328000 }, { "epoch": 1.17, "learning_rate": 0.0005, "loss": 1.9538, "step": 328500 }, { "epoch": 1.17, "learning_rate": 0.0005, "loss": 1.9365, "step": 329000 }, { "epoch": 1.17, "learning_rate": 0.0005, "loss": 1.9075, "step": 329500 }, { "epoch": 1.17, "learning_rate": 0.0005, "loss": 1.9176, "step": 330000 }, { "epoch": 1.17, "eval_bleu": 13.8571, "eval_gen_len": 17.2563, "eval_runtime": 210.0443, "eval_samples_per_second": 14.283, "eval_steps_per_second": 1.785, "step": 330000 }, { "epoch": 1.18, "learning_rate": 0.0005, "loss": 1.9322, "step": 330500 }, { "epoch": 1.18, "learning_rate": 0.0005, "loss": 1.9333, "step": 331000 }, { "epoch": 1.18, "learning_rate": 0.0005, "loss": 1.9324, "step": 331500 }, { "epoch": 1.18, "learning_rate": 0.0005, "loss": 1.93, "step": 332000 }, { "epoch": 1.18, "learning_rate": 0.0005, "loss": 1.8769, "step": 332500 }, { "epoch": 1.18, "learning_rate": 0.0005, "loss": 1.9198, "step": 333000 }, { "epoch": 1.19, "learning_rate": 0.0005, "loss": 1.9084, "step": 333500 }, { "epoch": 1.19, "learning_rate": 0.0005, "loss": 1.9196, "step": 334000 }, { "epoch": 1.19, "learning_rate": 0.0005, "loss": 1.9354, "step": 334500 }, { "epoch": 1.19, "learning_rate": 0.0005, "loss": 1.9473, "step": 335000 }, { "epoch": 1.19, "learning_rate": 0.0005, "loss": 1.9315, "step": 335500 }, { "epoch": 1.19, "learning_rate": 0.0005, "loss": 1.9479, "step": 336000 }, { "epoch": 1.2, "learning_rate": 0.0005, "loss": 1.9078, "step": 336500 }, { "epoch": 1.2, "learning_rate": 0.0005, "loss": 1.9488, "step": 337000 }, { "epoch": 1.2, "learning_rate": 0.0005, "loss": 1.9399, "step": 337500 }, { "epoch": 1.2, "learning_rate": 0.0005, "loss": 1.9284, "step": 338000 }, { "epoch": 1.2, "learning_rate": 0.0005, "loss": 1.9436, "step": 338500 }, { "epoch": 1.21, "learning_rate": 0.0005, "loss": 1.9619, "step": 339000 }, { "epoch": 1.21, "learning_rate": 0.0005, "loss": 1.9424, "step": 339500 }, { "epoch": 1.21, "learning_rate": 0.0005, "loss": 1.933, "step": 340000 }, { "epoch": 1.21, "eval_bleu": 13.951, "eval_gen_len": 17.2883, "eval_runtime": 210.4415, "eval_samples_per_second": 14.256, "eval_steps_per_second": 1.782, "step": 340000 }, { "epoch": 1.21, "learning_rate": 0.0005, "loss": 1.9235, "step": 340500 }, { "epoch": 1.21, "learning_rate": 0.0005, "loss": 1.9515, "step": 341000 }, { "epoch": 1.21, "learning_rate": 0.0005, "loss": 1.946, "step": 341500 }, { "epoch": 1.22, "learning_rate": 0.0005, "loss": 1.9255, "step": 342000 }, { "epoch": 1.22, "learning_rate": 0.0005, "loss": 1.9137, "step": 342500 }, { "epoch": 1.22, "learning_rate": 0.0005, "loss": 1.9264, "step": 343000 }, { "epoch": 1.22, "learning_rate": 0.0005, "loss": 1.9196, "step": 343500 }, { "epoch": 1.22, "learning_rate": 0.0005, "loss": 1.9168, "step": 344000 }, { "epoch": 1.23, "learning_rate": 0.0005, "loss": 1.9151, "step": 344500 }, { "epoch": 1.23, "learning_rate": 0.0005, "loss": 1.9499, "step": 345000 }, { "epoch": 1.23, "learning_rate": 0.0005, "loss": 1.9131, "step": 345500 }, { "epoch": 1.23, "learning_rate": 0.0005, "loss": 1.9427, "step": 346000 }, { "epoch": 1.23, "learning_rate": 0.0005, "loss": 1.9444, "step": 346500 }, { "epoch": 1.23, "learning_rate": 0.0005, "loss": 1.93, "step": 347000 }, { "epoch": 1.24, "learning_rate": 0.0005, "loss": 1.9173, "step": 347500 }, { "epoch": 1.24, "learning_rate": 0.0005, "loss": 1.9276, "step": 348000 }, { "epoch": 1.24, "learning_rate": 0.0005, "loss": 1.926, "step": 348500 }, { "epoch": 1.24, "learning_rate": 0.0005, "loss": 1.9242, "step": 349000 }, { "epoch": 1.24, "learning_rate": 0.0005, "loss": 1.9495, "step": 349500 }, { "epoch": 1.24, "learning_rate": 0.0005, "loss": 1.9164, "step": 350000 }, { "epoch": 1.24, "eval_bleu": 13.9097, "eval_gen_len": 17.2873, "eval_runtime": 209.6162, "eval_samples_per_second": 14.312, "eval_steps_per_second": 1.789, "step": 350000 }, { "epoch": 1.25, "learning_rate": 0.0005, "loss": 1.9098, "step": 350500 }, { "epoch": 1.25, "learning_rate": 0.0005, "loss": 1.913, "step": 351000 }, { "epoch": 1.25, "learning_rate": 0.0005, "loss": 1.8882, "step": 351500 }, { "epoch": 1.25, "learning_rate": 0.0005, "loss": 1.8997, "step": 352000 }, { "epoch": 1.25, "learning_rate": 0.0005, "loss": 1.9239, "step": 352500 }, { "epoch": 1.26, "learning_rate": 0.0005, "loss": 1.9193, "step": 353000 }, { "epoch": 1.26, "learning_rate": 0.0005, "loss": 1.9476, "step": 353500 }, { "epoch": 1.26, "learning_rate": 0.0005, "loss": 1.9254, "step": 354000 }, { "epoch": 1.26, "learning_rate": 0.0005, "loss": 1.9267, "step": 354500 }, { "epoch": 1.26, "learning_rate": 0.0005, "loss": 1.9152, "step": 355000 }, { "epoch": 1.26, "learning_rate": 0.0005, "loss": 1.9012, "step": 355500 }, { "epoch": 1.27, "learning_rate": 0.0005, "loss": 1.9133, "step": 356000 }, { "epoch": 1.27, "learning_rate": 0.0005, "loss": 1.9281, "step": 356500 }, { "epoch": 1.27, "learning_rate": 0.0005, "loss": 1.9086, "step": 357000 }, { "epoch": 1.27, "learning_rate": 0.0005, "loss": 1.927, "step": 357500 }, { "epoch": 1.27, "learning_rate": 0.0005, "loss": 1.9396, "step": 358000 }, { "epoch": 1.27, "learning_rate": 0.0005, "loss": 1.9091, "step": 358500 }, { "epoch": 1.28, "learning_rate": 0.0005, "loss": 1.8993, "step": 359000 }, { "epoch": 1.28, "learning_rate": 0.0005, "loss": 1.9232, "step": 359500 }, { "epoch": 1.28, "learning_rate": 0.0005, "loss": 1.9021, "step": 360000 }, { "epoch": 1.28, "eval_bleu": 13.9218, "eval_gen_len": 17.2557, "eval_runtime": 209.7441, "eval_samples_per_second": 14.303, "eval_steps_per_second": 1.788, "step": 360000 }, { "epoch": 1.28, "learning_rate": 0.0005, "loss": 1.9423, "step": 360500 }, { "epoch": 1.28, "learning_rate": 0.0005, "loss": 1.9263, "step": 361000 }, { "epoch": 1.29, "learning_rate": 0.0005, "loss": 1.8766, "step": 361500 }, { "epoch": 1.29, "learning_rate": 0.0005, "loss": 1.8903, "step": 362000 }, { "epoch": 1.29, "learning_rate": 0.0005, "loss": 1.9004, "step": 362500 }, { "epoch": 1.29, "learning_rate": 0.0005, "loss": 1.9262, "step": 363000 }, { "epoch": 1.29, "learning_rate": 0.0005, "loss": 1.9132, "step": 363500 }, { "epoch": 1.29, "learning_rate": 0.0005, "loss": 1.9317, "step": 364000 }, { "epoch": 1.3, "learning_rate": 0.0005, "loss": 1.9104, "step": 364500 }, { "epoch": 1.3, "learning_rate": 0.0005, "loss": 1.9239, "step": 365000 }, { "epoch": 1.3, "learning_rate": 0.0005, "loss": 1.932, "step": 365500 }, { "epoch": 1.3, "learning_rate": 0.0005, "loss": 1.9195, "step": 366000 }, { "epoch": 1.3, "learning_rate": 0.0005, "loss": 1.9197, "step": 366500 }, { "epoch": 1.31, "learning_rate": 0.0005, "loss": 1.8899, "step": 367000 }, { "epoch": 1.31, "learning_rate": 0.0005, "loss": 1.906, "step": 367500 }, { "epoch": 1.31, "learning_rate": 0.0005, "loss": 1.9475, "step": 368000 }, { "epoch": 1.31, "learning_rate": 0.0005, "loss": 1.9302, "step": 368500 }, { "epoch": 1.31, "learning_rate": 0.0005, "loss": 1.9071, "step": 369000 }, { "epoch": 1.31, "learning_rate": 0.0005, "loss": 1.9249, "step": 369500 }, { "epoch": 1.32, "learning_rate": 0.0005, "loss": 1.9229, "step": 370000 }, { "epoch": 1.32, "eval_bleu": 13.9626, "eval_gen_len": 17.2193, "eval_runtime": 210.3966, "eval_samples_per_second": 14.259, "eval_steps_per_second": 1.782, "step": 370000 }, { "epoch": 1.32, "learning_rate": 0.0005, "loss": 1.9169, "step": 370500 }, { "epoch": 1.32, "learning_rate": 0.0005, "loss": 1.9311, "step": 371000 }, { "epoch": 1.32, "learning_rate": 0.0005, "loss": 1.9314, "step": 371500 }, { "epoch": 1.32, "learning_rate": 0.0005, "loss": 1.8933, "step": 372000 }, { "epoch": 1.32, "learning_rate": 0.0005, "loss": 1.915, "step": 372500 }, { "epoch": 1.33, "learning_rate": 0.0005, "loss": 1.9196, "step": 373000 }, { "epoch": 1.33, "learning_rate": 0.0005, "loss": 1.9201, "step": 373500 }, { "epoch": 1.33, "learning_rate": 0.0005, "loss": 1.9214, "step": 374000 }, { "epoch": 1.33, "learning_rate": 0.0005, "loss": 1.9269, "step": 374500 }, { "epoch": 1.33, "learning_rate": 0.0005, "loss": 1.9359, "step": 375000 }, { "epoch": 1.34, "learning_rate": 0.0005, "loss": 1.903, "step": 375500 }, { "epoch": 1.34, "learning_rate": 0.0005, "loss": 1.9154, "step": 376000 }, { "epoch": 1.34, "learning_rate": 0.0005, "loss": 1.9057, "step": 376500 }, { "epoch": 1.34, "learning_rate": 0.0005, "loss": 1.9249, "step": 377000 }, { "epoch": 1.34, "learning_rate": 0.0005, "loss": 1.9091, "step": 377500 }, { "epoch": 1.34, "learning_rate": 0.0005, "loss": 1.8887, "step": 378000 }, { "epoch": 1.35, "learning_rate": 0.0005, "loss": 1.9062, "step": 378500 }, { "epoch": 1.35, "learning_rate": 0.0005, "loss": 1.9382, "step": 379000 }, { "epoch": 1.35, "learning_rate": 0.0005, "loss": 1.9201, "step": 379500 }, { "epoch": 1.35, "learning_rate": 0.0005, "loss": 1.9052, "step": 380000 }, { "epoch": 1.35, "eval_bleu": 13.9355, "eval_gen_len": 17.1907, "eval_runtime": 210.2658, "eval_samples_per_second": 14.268, "eval_steps_per_second": 1.783, "step": 380000 }, { "epoch": 1.35, "learning_rate": 0.0005, "loss": 1.9128, "step": 380500 }, { "epoch": 1.35, "learning_rate": 0.0005, "loss": 1.8844, "step": 381000 }, { "epoch": 1.36, "learning_rate": 0.0005, "loss": 1.9122, "step": 381500 }, { "epoch": 1.36, "learning_rate": 0.0005, "loss": 1.8976, "step": 382000 }, { "epoch": 1.36, "learning_rate": 0.0005, "loss": 1.9251, "step": 382500 }, { "epoch": 1.36, "learning_rate": 0.0005, "loss": 1.9067, "step": 383000 }, { "epoch": 1.36, "learning_rate": 0.0005, "loss": 1.9106, "step": 383500 }, { "epoch": 1.37, "learning_rate": 0.0005, "loss": 1.9069, "step": 384000 }, { "epoch": 1.37, "learning_rate": 0.0005, "loss": 1.9251, "step": 384500 }, { "epoch": 1.37, "learning_rate": 0.0005, "loss": 1.9339, "step": 385000 }, { "epoch": 1.37, "learning_rate": 0.0005, "loss": 1.9444, "step": 385500 }, { "epoch": 1.37, "learning_rate": 0.0005, "loss": 1.9154, "step": 386000 }, { "epoch": 1.37, "learning_rate": 0.0005, "loss": 1.9127, "step": 386500 }, { "epoch": 1.38, "learning_rate": 0.0005, "loss": 1.9214, "step": 387000 }, { "epoch": 1.38, "learning_rate": 0.0005, "loss": 1.8855, "step": 387500 }, { "epoch": 1.38, "learning_rate": 0.0005, "loss": 1.927, "step": 388000 }, { "epoch": 1.38, "learning_rate": 0.0005, "loss": 1.9108, "step": 388500 }, { "epoch": 1.38, "learning_rate": 0.0005, "loss": 1.9182, "step": 389000 }, { "epoch": 1.39, "learning_rate": 0.0005, "loss": 1.9008, "step": 389500 }, { "epoch": 1.39, "learning_rate": 0.0005, "loss": 1.9159, "step": 390000 }, { "epoch": 1.39, "eval_bleu": 14.1, "eval_gen_len": 17.2573, "eval_runtime": 210.1633, "eval_samples_per_second": 14.275, "eval_steps_per_second": 1.784, "step": 390000 }, { "epoch": 1.39, "learning_rate": 0.0005, "loss": 1.8991, "step": 390500 }, { "epoch": 1.39, "learning_rate": 0.0005, "loss": 1.9076, "step": 391000 }, { "epoch": 1.39, "learning_rate": 0.0005, "loss": 1.9183, "step": 391500 }, { "epoch": 1.39, "learning_rate": 0.0005, "loss": 1.9114, "step": 392000 }, { "epoch": 1.4, "learning_rate": 0.0005, "loss": 1.9254, "step": 392500 }, { "epoch": 1.4, "learning_rate": 0.0005, "loss": 1.901, "step": 393000 }, { "epoch": 1.4, "learning_rate": 0.0005, "loss": 1.8843, "step": 393500 }, { "epoch": 1.4, "learning_rate": 0.0005, "loss": 1.9133, "step": 394000 }, { "epoch": 1.4, "learning_rate": 0.0005, "loss": 1.9348, "step": 394500 }, { "epoch": 1.4, "learning_rate": 0.0005, "loss": 1.9238, "step": 395000 }, { "epoch": 1.41, "learning_rate": 0.0005, "loss": 1.9106, "step": 395500 }, { "epoch": 1.41, "learning_rate": 0.0005, "loss": 1.9159, "step": 396000 }, { "epoch": 1.41, "learning_rate": 0.0005, "loss": 1.9189, "step": 396500 }, { "epoch": 1.41, "learning_rate": 0.0005, "loss": 1.9114, "step": 397000 }, { "epoch": 1.41, "learning_rate": 0.0005, "loss": 1.904, "step": 397500 }, { "epoch": 1.42, "learning_rate": 0.0005, "loss": 1.8842, "step": 398000 }, { "epoch": 1.42, "learning_rate": 0.0005, "loss": 1.903, "step": 398500 }, { "epoch": 1.42, "learning_rate": 0.0005, "loss": 1.9161, "step": 399000 }, { "epoch": 1.42, "learning_rate": 0.0005, "loss": 1.939, "step": 399500 }, { "epoch": 1.42, "learning_rate": 0.0005, "loss": 1.8946, "step": 400000 }, { "epoch": 1.42, "eval_bleu": 14.2056, "eval_gen_len": 17.2387, "eval_runtime": 209.8848, "eval_samples_per_second": 14.294, "eval_steps_per_second": 1.787, "step": 400000 }, { "epoch": 1.42, "learning_rate": 0.0005, "loss": 1.9221, "step": 400500 }, { "epoch": 1.43, "learning_rate": 0.0005, "loss": 1.9273, "step": 401000 }, { "epoch": 1.43, "learning_rate": 0.0005, "loss": 1.9062, "step": 401500 }, { "epoch": 1.43, "learning_rate": 0.0005, "loss": 1.8952, "step": 402000 }, { "epoch": 1.43, "learning_rate": 0.0005, "loss": 1.9129, "step": 402500 }, { "epoch": 1.43, "learning_rate": 0.0005, "loss": 1.9197, "step": 403000 }, { "epoch": 1.44, "learning_rate": 0.0005, "loss": 1.8995, "step": 403500 }, { "epoch": 1.44, "learning_rate": 0.0005, "loss": 1.9148, "step": 404000 }, { "epoch": 1.44, "learning_rate": 0.0005, "loss": 1.8738, "step": 404500 }, { "epoch": 1.44, "learning_rate": 0.0005, "loss": 1.9039, "step": 405000 }, { "epoch": 1.44, "learning_rate": 0.0005, "loss": 1.8796, "step": 405500 }, { "epoch": 1.44, "learning_rate": 0.0005, "loss": 1.8763, "step": 406000 }, { "epoch": 1.45, "learning_rate": 0.0005, "loss": 1.8879, "step": 406500 }, { "epoch": 1.45, "learning_rate": 0.0005, "loss": 1.9255, "step": 407000 }, { "epoch": 1.45, "learning_rate": 0.0005, "loss": 1.8904, "step": 407500 }, { "epoch": 1.45, "learning_rate": 0.0005, "loss": 1.9185, "step": 408000 }, { "epoch": 1.45, "learning_rate": 0.0005, "loss": 1.9233, "step": 408500 }, { "epoch": 1.45, "learning_rate": 0.0005, "loss": 1.9053, "step": 409000 }, { "epoch": 1.46, "learning_rate": 0.0005, "loss": 1.9257, "step": 409500 }, { "epoch": 1.46, "learning_rate": 0.0005, "loss": 1.8702, "step": 410000 }, { "epoch": 1.46, "eval_bleu": 14.1317, "eval_gen_len": 17.2767, "eval_runtime": 209.9792, "eval_samples_per_second": 14.287, "eval_steps_per_second": 1.786, "step": 410000 }, { "epoch": 1.46, "learning_rate": 0.0005, "loss": 1.9006, "step": 410500 }, { "epoch": 1.46, "learning_rate": 0.0005, "loss": 1.9165, "step": 411000 }, { "epoch": 1.46, "learning_rate": 0.0005, "loss": 1.8969, "step": 411500 }, { "epoch": 1.47, "learning_rate": 0.0005, "loss": 1.9103, "step": 412000 }, { "epoch": 1.47, "learning_rate": 0.0005, "loss": 1.9102, "step": 412500 }, { "epoch": 1.47, "learning_rate": 0.0005, "loss": 1.8816, "step": 413000 }, { "epoch": 1.47, "learning_rate": 0.0005, "loss": 1.8814, "step": 413500 }, { "epoch": 1.47, "learning_rate": 0.0005, "loss": 1.9083, "step": 414000 }, { "epoch": 1.47, "learning_rate": 0.0005, "loss": 1.903, "step": 414500 }, { "epoch": 1.48, "learning_rate": 0.0005, "loss": 1.9214, "step": 415000 }, { "epoch": 1.48, "learning_rate": 0.0005, "loss": 1.8792, "step": 415500 }, { "epoch": 1.48, "learning_rate": 0.0005, "loss": 1.9242, "step": 416000 }, { "epoch": 1.48, "learning_rate": 0.0005, "loss": 1.8963, "step": 416500 }, { "epoch": 1.48, "learning_rate": 0.0005, "loss": 1.9194, "step": 417000 }, { "epoch": 1.48, "learning_rate": 0.0005, "loss": 1.9021, "step": 417500 }, { "epoch": 1.49, "learning_rate": 0.0005, "loss": 1.8832, "step": 418000 }, { "epoch": 1.49, "learning_rate": 0.0005, "loss": 1.8885, "step": 418500 }, { "epoch": 1.49, "learning_rate": 0.0005, "loss": 1.9143, "step": 419000 }, { "epoch": 1.49, "learning_rate": 0.0005, "loss": 1.8917, "step": 419500 }, { "epoch": 1.49, "learning_rate": 0.0005, "loss": 1.8957, "step": 420000 }, { "epoch": 1.49, "eval_bleu": 13.8621, "eval_gen_len": 17.2537, "eval_runtime": 210.8259, "eval_samples_per_second": 14.23, "eval_steps_per_second": 1.779, "step": 420000 }, { "epoch": 1.5, "learning_rate": 0.0005, "loss": 1.9073, "step": 420500 }, { "epoch": 1.5, "learning_rate": 0.0005, "loss": 1.8913, "step": 421000 }, { "epoch": 1.5, "learning_rate": 0.0005, "loss": 1.8717, "step": 421500 }, { "epoch": 1.5, "learning_rate": 0.0005, "loss": 1.8888, "step": 422000 }, { "epoch": 1.5, "learning_rate": 0.0005, "loss": 1.8941, "step": 422500 }, { "epoch": 1.5, "learning_rate": 0.0005, "loss": 1.892, "step": 423000 }, { "epoch": 1.51, "learning_rate": 0.0005, "loss": 1.8733, "step": 423500 }, { "epoch": 1.51, "learning_rate": 0.0005, "loss": 1.909, "step": 424000 }, { "epoch": 1.51, "learning_rate": 0.0005, "loss": 1.8916, "step": 424500 }, { "epoch": 1.51, "learning_rate": 0.0005, "loss": 1.8855, "step": 425000 }, { "epoch": 1.51, "learning_rate": 0.0005, "loss": 1.9027, "step": 425500 }, { "epoch": 1.52, "learning_rate": 0.0005, "loss": 1.8949, "step": 426000 }, { "epoch": 1.52, "learning_rate": 0.0005, "loss": 1.9005, "step": 426500 }, { "epoch": 1.52, "learning_rate": 0.0005, "loss": 1.9224, "step": 427000 }, { "epoch": 1.52, "learning_rate": 0.0005, "loss": 1.886, "step": 427500 }, { "epoch": 1.52, "learning_rate": 0.0005, "loss": 1.8956, "step": 428000 }, { "epoch": 1.52, "learning_rate": 0.0005, "loss": 1.8748, "step": 428500 }, { "epoch": 1.53, "learning_rate": 0.0005, "loss": 1.8819, "step": 429000 }, { "epoch": 1.53, "learning_rate": 0.0005, "loss": 1.9058, "step": 429500 }, { "epoch": 1.53, "learning_rate": 0.0005, "loss": 1.8796, "step": 430000 }, { "epoch": 1.53, "eval_bleu": 14.1927, "eval_gen_len": 17.2637, "eval_runtime": 209.2327, "eval_samples_per_second": 14.338, "eval_steps_per_second": 1.792, "step": 430000 }, { "epoch": 1.53, "learning_rate": 0.0005, "loss": 1.9126, "step": 430500 }, { "epoch": 1.53, "learning_rate": 0.0005, "loss": 1.9029, "step": 431000 }, { "epoch": 1.53, "learning_rate": 0.0005, "loss": 1.8999, "step": 431500 }, { "epoch": 1.54, "learning_rate": 0.0005, "loss": 1.9121, "step": 432000 }, { "epoch": 1.54, "learning_rate": 0.0005, "loss": 1.9198, "step": 432500 }, { "epoch": 1.54, "learning_rate": 0.0005, "loss": 1.8766, "step": 433000 }, { "epoch": 1.54, "learning_rate": 0.0005, "loss": 1.9225, "step": 433500 }, { "epoch": 1.54, "learning_rate": 0.0005, "loss": 1.92, "step": 434000 }, { "epoch": 1.55, "learning_rate": 0.0005, "loss": 1.9178, "step": 434500 }, { "epoch": 1.55, "learning_rate": 0.0005, "loss": 1.88, "step": 435000 }, { "epoch": 1.55, "learning_rate": 0.0005, "loss": 1.8936, "step": 435500 }, { "epoch": 1.55, "learning_rate": 0.0005, "loss": 1.9063, "step": 436000 }, { "epoch": 1.55, "learning_rate": 0.0005, "loss": 1.8763, "step": 436500 }, { "epoch": 1.55, "learning_rate": 0.0005, "loss": 1.9093, "step": 437000 }, { "epoch": 1.56, "learning_rate": 0.0005, "loss": 1.9274, "step": 437500 }, { "epoch": 1.56, "learning_rate": 0.0005, "loss": 1.8963, "step": 438000 }, { "epoch": 1.56, "learning_rate": 0.0005, "loss": 1.9107, "step": 438500 }, { "epoch": 1.56, "learning_rate": 0.0005, "loss": 1.9044, "step": 439000 }, { "epoch": 1.56, "learning_rate": 0.0005, "loss": 1.893, "step": 439500 }, { "epoch": 1.56, "learning_rate": 0.0005, "loss": 1.895, "step": 440000 }, { "epoch": 1.56, "eval_bleu": 14.0582, "eval_gen_len": 17.2333, "eval_runtime": 208.5065, "eval_samples_per_second": 14.388, "eval_steps_per_second": 1.799, "step": 440000 }, { "epoch": 1.57, "learning_rate": 0.0005, "loss": 1.9065, "step": 440500 }, { "epoch": 1.57, "learning_rate": 0.0005, "loss": 1.8946, "step": 441000 }, { "epoch": 1.57, "learning_rate": 0.0005, "loss": 1.9015, "step": 441500 }, { "epoch": 1.57, "learning_rate": 0.0005, "loss": 1.8853, "step": 442000 }, { "epoch": 1.57, "learning_rate": 0.0005, "loss": 1.8862, "step": 442500 }, { "epoch": 1.58, "learning_rate": 0.0005, "loss": 1.8777, "step": 443000 }, { "epoch": 1.58, "learning_rate": 0.0005, "loss": 1.8956, "step": 443500 }, { "epoch": 1.58, "learning_rate": 0.0005, "loss": 1.8855, "step": 444000 }, { "epoch": 1.58, "learning_rate": 0.0005, "loss": 1.8992, "step": 444500 }, { "epoch": 1.58, "learning_rate": 0.0005, "loss": 1.8788, "step": 445000 }, { "epoch": 1.58, "learning_rate": 0.0005, "loss": 1.8861, "step": 445500 }, { "epoch": 1.59, "learning_rate": 0.0005, "loss": 1.9067, "step": 446000 }, { "epoch": 1.59, "learning_rate": 0.0005, "loss": 1.9006, "step": 446500 }, { "epoch": 1.59, "learning_rate": 0.0005, "loss": 1.9093, "step": 447000 }, { "epoch": 1.59, "learning_rate": 0.0005, "loss": 1.876, "step": 447500 }, { "epoch": 1.59, "learning_rate": 0.0005, "loss": 1.8703, "step": 448000 }, { "epoch": 1.6, "learning_rate": 0.0005, "loss": 1.86, "step": 448500 }, { "epoch": 1.6, "learning_rate": 0.0005, "loss": 1.8887, "step": 449000 }, { "epoch": 1.6, "learning_rate": 0.0005, "loss": 1.8746, "step": 449500 }, { "epoch": 1.6, "learning_rate": 0.0005, "loss": 1.8869, "step": 450000 }, { "epoch": 1.6, "eval_bleu": 14.0195, "eval_gen_len": 17.265, "eval_runtime": 208.4219, "eval_samples_per_second": 14.394, "eval_steps_per_second": 1.799, "step": 450000 }, { "epoch": 1.6, "learning_rate": 0.0005, "loss": 1.9001, "step": 450500 }, { "epoch": 1.6, "learning_rate": 0.0005, "loss": 1.8942, "step": 451000 }, { "epoch": 1.61, "learning_rate": 0.0005, "loss": 1.8915, "step": 451500 }, { "epoch": 1.61, "learning_rate": 0.0005, "loss": 1.8738, "step": 452000 }, { "epoch": 1.61, "learning_rate": 0.0005, "loss": 1.894, "step": 452500 }, { "epoch": 1.61, "learning_rate": 0.0005, "loss": 1.9108, "step": 453000 }, { "epoch": 1.61, "learning_rate": 0.0005, "loss": 1.8666, "step": 453500 }, { "epoch": 1.61, "learning_rate": 0.0005, "loss": 1.8989, "step": 454000 }, { "epoch": 1.62, "learning_rate": 0.0005, "loss": 1.8975, "step": 454500 }, { "epoch": 1.62, "learning_rate": 0.0005, "loss": 1.8896, "step": 455000 }, { "epoch": 1.62, "learning_rate": 0.0005, "loss": 1.903, "step": 455500 }, { "epoch": 1.62, "learning_rate": 0.0005, "loss": 1.9147, "step": 456000 }, { "epoch": 1.62, "learning_rate": 0.0005, "loss": 1.8979, "step": 456500 }, { "epoch": 1.63, "learning_rate": 0.0005, "loss": 1.8686, "step": 457000 }, { "epoch": 1.63, "learning_rate": 0.0005, "loss": 1.9064, "step": 457500 }, { "epoch": 1.63, "learning_rate": 0.0005, "loss": 1.891, "step": 458000 }, { "epoch": 1.63, "learning_rate": 0.0005, "loss": 1.9, "step": 458500 }, { "epoch": 1.63, "learning_rate": 0.0005, "loss": 1.8841, "step": 459000 }, { "epoch": 1.63, "learning_rate": 0.0005, "loss": 1.8899, "step": 459500 }, { "epoch": 1.64, "learning_rate": 0.0005, "loss": 1.8935, "step": 460000 }, { "epoch": 1.64, "eval_bleu": 14.0753, "eval_gen_len": 17.2433, "eval_runtime": 207.8742, "eval_samples_per_second": 14.432, "eval_steps_per_second": 1.804, "step": 460000 }, { "epoch": 1.64, "learning_rate": 0.0005, "loss": 1.8822, "step": 460500 }, { "epoch": 1.64, "learning_rate": 0.0005, "loss": 1.8702, "step": 461000 }, { "epoch": 1.64, "learning_rate": 0.0005, "loss": 1.8791, "step": 461500 }, { "epoch": 1.64, "learning_rate": 0.0005, "loss": 1.8787, "step": 462000 }, { "epoch": 1.64, "learning_rate": 0.0005, "loss": 1.8697, "step": 462500 }, { "epoch": 1.65, "learning_rate": 0.0005, "loss": 1.9087, "step": 463000 }, { "epoch": 1.65, "learning_rate": 0.0005, "loss": 1.8696, "step": 463500 }, { "epoch": 1.65, "learning_rate": 0.0005, "loss": 1.8831, "step": 464000 }, { "epoch": 1.65, "learning_rate": 0.0005, "loss": 1.913, "step": 464500 }, { "epoch": 1.65, "learning_rate": 0.0005, "loss": 1.8784, "step": 465000 }, { "epoch": 1.66, "learning_rate": 0.0005, "loss": 1.8686, "step": 465500 }, { "epoch": 1.66, "learning_rate": 0.0005, "loss": 1.8773, "step": 466000 }, { "epoch": 1.66, "learning_rate": 0.0005, "loss": 1.8849, "step": 466500 }, { "epoch": 1.66, "learning_rate": 0.0005, "loss": 1.8775, "step": 467000 }, { "epoch": 1.66, "learning_rate": 0.0005, "loss": 1.8864, "step": 467500 }, { "epoch": 1.66, "learning_rate": 0.0005, "loss": 1.9001, "step": 468000 }, { "epoch": 1.67, "learning_rate": 0.0005, "loss": 1.9055, "step": 468500 }, { "epoch": 1.67, "learning_rate": 0.0005, "loss": 1.8874, "step": 469000 }, { "epoch": 1.67, "learning_rate": 0.0005, "loss": 1.8834, "step": 469500 }, { "epoch": 1.67, "learning_rate": 0.0005, "loss": 1.8775, "step": 470000 }, { "epoch": 1.67, "eval_bleu": 14.1, "eval_gen_len": 17.2593, "eval_runtime": 208.9726, "eval_samples_per_second": 14.356, "eval_steps_per_second": 1.794, "step": 470000 }, { "epoch": 1.67, "learning_rate": 0.0005, "loss": 1.8792, "step": 470500 }, { "epoch": 1.68, "learning_rate": 0.0005, "loss": 1.8794, "step": 471000 }, { "epoch": 1.68, "learning_rate": 0.0005, "loss": 1.8797, "step": 471500 }, { "epoch": 1.68, "learning_rate": 0.0005, "loss": 1.8709, "step": 472000 }, { "epoch": 1.68, "learning_rate": 0.0005, "loss": 1.9154, "step": 472500 }, { "epoch": 1.68, "learning_rate": 0.0005, "loss": 1.8894, "step": 473000 }, { "epoch": 1.68, "learning_rate": 0.0005, "loss": 1.8758, "step": 473500 }, { "epoch": 1.69, "learning_rate": 0.0005, "loss": 1.8954, "step": 474000 }, { "epoch": 1.69, "learning_rate": 0.0005, "loss": 1.8666, "step": 474500 }, { "epoch": 1.69, "learning_rate": 0.0005, "loss": 1.8768, "step": 475000 }, { "epoch": 1.69, "learning_rate": 0.0005, "loss": 1.8835, "step": 475500 }, { "epoch": 1.69, "learning_rate": 0.0005, "loss": 1.8768, "step": 476000 }, { "epoch": 1.69, "learning_rate": 0.0005, "loss": 1.8924, "step": 476500 }, { "epoch": 1.7, "learning_rate": 0.0005, "loss": 1.8866, "step": 477000 }, { "epoch": 1.7, "learning_rate": 0.0005, "loss": 1.8736, "step": 477500 }, { "epoch": 1.7, "learning_rate": 0.0005, "loss": 1.883, "step": 478000 }, { "epoch": 1.7, "learning_rate": 0.0005, "loss": 1.8977, "step": 478500 }, { "epoch": 1.7, "learning_rate": 0.0005, "loss": 1.8602, "step": 479000 }, { "epoch": 1.71, "learning_rate": 0.0005, "loss": 1.8827, "step": 479500 }, { "epoch": 1.71, "learning_rate": 0.0005, "loss": 1.8683, "step": 480000 }, { "epoch": 1.71, "eval_bleu": 14.2598, "eval_gen_len": 17.2647, "eval_runtime": 209.7872, "eval_samples_per_second": 14.3, "eval_steps_per_second": 1.788, "step": 480000 }, { "epoch": 1.71, "learning_rate": 0.0005, "loss": 1.8754, "step": 480500 }, { "epoch": 1.71, "learning_rate": 0.0005, "loss": 1.8634, "step": 481000 }, { "epoch": 1.71, "learning_rate": 0.0005, "loss": 1.9076, "step": 481500 }, { "epoch": 1.71, "learning_rate": 0.0005, "loss": 1.8956, "step": 482000 }, { "epoch": 1.72, "learning_rate": 0.0005, "loss": 1.8705, "step": 482500 }, { "epoch": 1.72, "learning_rate": 0.0005, "loss": 1.8754, "step": 483000 }, { "epoch": 1.72, "learning_rate": 0.0005, "loss": 1.8641, "step": 483500 }, { "epoch": 1.72, "learning_rate": 0.0005, "loss": 1.8731, "step": 484000 }, { "epoch": 1.72, "learning_rate": 0.0005, "loss": 1.8836, "step": 484500 }, { "epoch": 1.72, "learning_rate": 0.0005, "loss": 1.8857, "step": 485000 }, { "epoch": 1.73, "learning_rate": 0.0005, "loss": 1.8813, "step": 485500 }, { "epoch": 1.73, "learning_rate": 0.0005, "loss": 1.8884, "step": 486000 }, { "epoch": 1.73, "learning_rate": 0.0005, "loss": 1.9007, "step": 486500 }, { "epoch": 1.73, "learning_rate": 0.0005, "loss": 1.8875, "step": 487000 }, { "epoch": 1.73, "learning_rate": 0.0005, "loss": 1.8734, "step": 487500 }, { "epoch": 1.74, "learning_rate": 0.0005, "loss": 1.8457, "step": 488000 }, { "epoch": 1.74, "learning_rate": 0.0005, "loss": 1.9094, "step": 488500 }, { "epoch": 1.74, "learning_rate": 0.0005, "loss": 1.8856, "step": 489000 }, { "epoch": 1.74, "learning_rate": 0.0005, "loss": 1.8534, "step": 489500 }, { "epoch": 1.74, "learning_rate": 0.0005, "loss": 1.8511, "step": 490000 }, { "epoch": 1.74, "eval_bleu": 14.3516, "eval_gen_len": 17.2557, "eval_runtime": 208.2762, "eval_samples_per_second": 14.404, "eval_steps_per_second": 1.8, "step": 490000 }, { "epoch": 1.74, "learning_rate": 0.0005, "loss": 1.9023, "step": 490500 }, { "epoch": 1.75, "learning_rate": 0.0005, "loss": 1.8887, "step": 491000 }, { "epoch": 1.75, "learning_rate": 0.0005, "loss": 1.8824, "step": 491500 }, { "epoch": 1.75, "learning_rate": 0.0005, "loss": 1.8949, "step": 492000 }, { "epoch": 1.75, "learning_rate": 0.0005, "loss": 1.8723, "step": 492500 }, { "epoch": 1.75, "learning_rate": 0.0005, "loss": 1.8601, "step": 493000 }, { "epoch": 1.76, "learning_rate": 0.0005, "loss": 1.8848, "step": 493500 }, { "epoch": 1.76, "learning_rate": 0.0005, "loss": 1.8977, "step": 494000 }, { "epoch": 1.76, "learning_rate": 0.0005, "loss": 1.8697, "step": 494500 }, { "epoch": 1.76, "learning_rate": 0.0005, "loss": 1.8754, "step": 495000 }, { "epoch": 1.76, "learning_rate": 0.0005, "loss": 1.8617, "step": 495500 }, { "epoch": 1.76, "learning_rate": 0.0005, "loss": 1.8668, "step": 496000 }, { "epoch": 1.77, "learning_rate": 0.0005, "loss": 1.8823, "step": 496500 }, { "epoch": 1.77, "learning_rate": 0.0005, "loss": 1.8749, "step": 497000 }, { "epoch": 1.77, "learning_rate": 0.0005, "loss": 1.8653, "step": 497500 }, { "epoch": 1.77, "learning_rate": 0.0005, "loss": 1.8878, "step": 498000 }, { "epoch": 1.77, "learning_rate": 0.0005, "loss": 1.878, "step": 498500 }, { "epoch": 1.77, "learning_rate": 0.0005, "loss": 1.8723, "step": 499000 }, { "epoch": 1.78, "learning_rate": 0.0005, "loss": 1.8935, "step": 499500 }, { "epoch": 1.78, "learning_rate": 0.0005, "loss": 1.898, "step": 500000 }, { "epoch": 1.78, "eval_bleu": 14.437, "eval_gen_len": 17.2627, "eval_runtime": 207.3898, "eval_samples_per_second": 14.466, "eval_steps_per_second": 1.808, "step": 500000 }, { "epoch": 1.78, "learning_rate": 0.0005, "loss": 1.8867, "step": 500500 }, { "epoch": 1.78, "learning_rate": 0.0005, "loss": 1.8817, "step": 501000 }, { "epoch": 1.78, "learning_rate": 0.0005, "loss": 1.8661, "step": 501500 }, { "epoch": 1.79, "learning_rate": 0.0005, "loss": 1.8818, "step": 502000 }, { "epoch": 1.79, "learning_rate": 0.0005, "loss": 1.8633, "step": 502500 }, { "epoch": 1.79, "learning_rate": 0.0005, "loss": 1.8829, "step": 503000 }, { "epoch": 1.79, "learning_rate": 0.0005, "loss": 1.8839, "step": 503500 }, { "epoch": 1.79, "learning_rate": 0.0005, "loss": 1.8755, "step": 504000 }, { "epoch": 1.79, "learning_rate": 0.0005, "loss": 1.8812, "step": 504500 }, { "epoch": 1.8, "learning_rate": 0.0005, "loss": 1.8726, "step": 505000 }, { "epoch": 1.8, "learning_rate": 0.0005, "loss": 1.8681, "step": 505500 }, { "epoch": 1.8, "learning_rate": 0.0005, "loss": 1.89, "step": 506000 }, { "epoch": 1.8, "learning_rate": 0.0005, "loss": 1.8702, "step": 506500 }, { "epoch": 1.8, "learning_rate": 0.0005, "loss": 1.8828, "step": 507000 }, { "epoch": 1.8, "learning_rate": 0.0005, "loss": 1.8632, "step": 507500 }, { "epoch": 1.81, "learning_rate": 0.0005, "loss": 1.89, "step": 508000 }, { "epoch": 1.81, "learning_rate": 0.0005, "loss": 1.8465, "step": 508500 }, { "epoch": 1.81, "learning_rate": 0.0005, "loss": 1.8747, "step": 509000 }, { "epoch": 1.81, "learning_rate": 0.0005, "loss": 1.8718, "step": 509500 }, { "epoch": 1.81, "learning_rate": 0.0005, "loss": 1.8686, "step": 510000 }, { "epoch": 1.81, "eval_bleu": 14.1132, "eval_gen_len": 17.263, "eval_runtime": 207.7131, "eval_samples_per_second": 14.443, "eval_steps_per_second": 1.805, "step": 510000 } ], "max_steps": 1408995, "num_train_epochs": 5, "total_flos": 6.566535498761687e+17, "trial_name": null, "trial_params": null }