{ "best_metric": null, "best_model_checkpoint": null, "epoch": 300.0, "global_step": 11700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 12.82, "learning_rate": 1.24e-05, "loss": 4.0279, "step": 500 }, { "epoch": 12.82, "eval_bleu": 49.841, "eval_em": 0.0, "eval_gen_len": 51.6403, "eval_loss": 2.4031472206115723, "eval_rm": 0.0, "eval_runtime": 118.4236, "eval_samples_per_second": 3.521, "eval_steps_per_second": 0.448, "step": 500 }, { "epoch": 25.64, "learning_rate": 2.4900000000000002e-05, "loss": 1.3442, "step": 1000 }, { "epoch": 25.64, "eval_bleu": 85.0177, "eval_em": 0.0, "eval_gen_len": 57.9784, "eval_loss": 0.501366138458252, "eval_rm": 0.0, "eval_runtime": 163.8536, "eval_samples_per_second": 2.545, "eval_steps_per_second": 0.323, "step": 1000 }, { "epoch": 38.46, "learning_rate": 3.74e-05, "loss": 0.2522, "step": 1500 }, { "epoch": 38.46, "eval_bleu": 94.0714, "eval_em": 0.0168, "eval_gen_len": 57.9137, "eval_loss": 0.3293180763721466, "eval_rm": 0.0216, "eval_runtime": 113.0226, "eval_samples_per_second": 3.69, "eval_steps_per_second": 0.469, "step": 1500 }, { "epoch": 51.28, "learning_rate": 4.99e-05, "loss": 0.1534, "step": 2000 }, { "epoch": 51.28, "eval_bleu": 94.4328, "eval_em": 0.0024, "eval_gen_len": 58.9448, "eval_loss": 0.320736825466156, "eval_rm": 0.0072, "eval_runtime": 116.3746, "eval_samples_per_second": 3.583, "eval_steps_per_second": 0.455, "step": 2000 }, { "epoch": 64.1, "learning_rate": 4.744329896907217e-05, "loss": 0.1305, "step": 2500 }, { "epoch": 64.1, "eval_bleu": 94.0708, "eval_em": 0.0, "eval_gen_len": 59.6115, "eval_loss": 0.3247060477733612, "eval_rm": 0.0, "eval_runtime": 117.0173, "eval_samples_per_second": 3.564, "eval_steps_per_second": 0.453, "step": 2500 }, { "epoch": 76.92, "learning_rate": 4.48659793814433e-05, "loss": 0.1226, "step": 3000 }, { "epoch": 76.92, "eval_bleu": 94.3143, "eval_em": 0.0024, "eval_gen_len": 58.235, "eval_loss": 0.33251264691352844, "eval_rm": 0.0024, "eval_runtime": 119.1624, "eval_samples_per_second": 3.499, "eval_steps_per_second": 0.445, "step": 3000 }, { "epoch": 89.74, "learning_rate": 4.228865979381443e-05, "loss": 0.1131, "step": 3500 }, { "epoch": 89.74, "eval_bleu": 94.5678, "eval_em": 0.0048, "eval_gen_len": 59.6811, "eval_loss": 0.3400600850582123, "eval_rm": 0.0144, "eval_runtime": 116.7251, "eval_samples_per_second": 3.572, "eval_steps_per_second": 0.454, "step": 3500 }, { "epoch": 102.56, "learning_rate": 3.971134020618557e-05, "loss": 0.1053, "step": 4000 }, { "epoch": 102.56, "eval_bleu": 94.4738, "eval_em": 0.0168, "eval_gen_len": 59.0288, "eval_loss": 0.3373829424381256, "eval_rm": 0.0552, "eval_runtime": 118.4954, "eval_samples_per_second": 3.519, "eval_steps_per_second": 0.447, "step": 4000 }, { "epoch": 115.38, "learning_rate": 3.71340206185567e-05, "loss": 0.0999, "step": 4500 }, { "epoch": 115.38, "eval_bleu": 94.6291, "eval_em": 0.0336, "eval_gen_len": 58.6283, "eval_loss": 0.3437003791332245, "eval_rm": 0.0624, "eval_runtime": 119.5949, "eval_samples_per_second": 3.487, "eval_steps_per_second": 0.443, "step": 4500 }, { "epoch": 128.21, "learning_rate": 3.455670103092783e-05, "loss": 0.0941, "step": 5000 }, { "epoch": 128.21, "eval_bleu": 94.7896, "eval_em": 0.0695, "eval_gen_len": 58.4149, "eval_loss": 0.351246178150177, "eval_rm": 0.1271, "eval_runtime": 121.3634, "eval_samples_per_second": 3.436, "eval_steps_per_second": 0.437, "step": 5000 }, { "epoch": 141.03, "learning_rate": 3.197938144329897e-05, "loss": 0.0904, "step": 5500 }, { "epoch": 141.03, "eval_bleu": 94.4101, "eval_em": 0.0719, "eval_gen_len": 58.2518, "eval_loss": 0.34235823154449463, "eval_rm": 0.1439, "eval_runtime": 118.818, "eval_samples_per_second": 3.51, "eval_steps_per_second": 0.446, "step": 5500 }, { "epoch": 153.85, "learning_rate": 2.9402061855670106e-05, "loss": 0.0833, "step": 6000 }, { "epoch": 153.85, "eval_bleu": 94.7141, "eval_em": 0.0887, "eval_gen_len": 59.0312, "eval_loss": 0.3461511433124542, "eval_rm": 0.1775, "eval_runtime": 116.2495, "eval_samples_per_second": 3.587, "eval_steps_per_second": 0.456, "step": 6000 }, { "epoch": 166.67, "learning_rate": 2.6824742268041237e-05, "loss": 0.0772, "step": 6500 }, { "epoch": 166.67, "eval_bleu": 94.6758, "eval_em": 0.0911, "eval_gen_len": 59.0767, "eval_loss": 0.34671926498413086, "eval_rm": 0.2062, "eval_runtime": 116.1647, "eval_samples_per_second": 3.59, "eval_steps_per_second": 0.456, "step": 6500 }, { "epoch": 179.49, "learning_rate": 2.4247422680412372e-05, "loss": 0.0722, "step": 7000 }, { "epoch": 179.49, "eval_bleu": 94.5698, "eval_em": 0.1055, "eval_gen_len": 58.1415, "eval_loss": 0.3461613953113556, "eval_rm": 0.2398, "eval_runtime": 119.2771, "eval_samples_per_second": 3.496, "eval_steps_per_second": 0.444, "step": 7000 }, { "epoch": 192.31, "learning_rate": 2.1670103092783507e-05, "loss": 0.0669, "step": 7500 }, { "epoch": 192.31, "eval_bleu": 95.0365, "eval_em": 0.1223, "eval_gen_len": 58.7794, "eval_loss": 0.35367459058761597, "eval_rm": 0.2782, "eval_runtime": 115.018, "eval_samples_per_second": 3.626, "eval_steps_per_second": 0.461, "step": 7500 }, { "epoch": 205.13, "learning_rate": 1.9092783505154642e-05, "loss": 0.062, "step": 8000 }, { "epoch": 205.13, "eval_bleu": 94.8694, "eval_em": 0.1247, "eval_gen_len": 58.211, "eval_loss": 0.35051023960113525, "eval_rm": 0.2686, "eval_runtime": 113.7476, "eval_samples_per_second": 3.666, "eval_steps_per_second": 0.466, "step": 8000 }, { "epoch": 217.95, "learning_rate": 1.6515463917525774e-05, "loss": 0.0576, "step": 8500 }, { "epoch": 217.95, "eval_bleu": 94.8168, "eval_em": 0.1271, "eval_gen_len": 59.0791, "eval_loss": 0.3510896563529968, "eval_rm": 0.2926, "eval_runtime": 117.1223, "eval_samples_per_second": 3.56, "eval_steps_per_second": 0.453, "step": 8500 }, { "epoch": 230.77, "learning_rate": 1.3938144329896907e-05, "loss": 0.0539, "step": 9000 }, { "epoch": 230.77, "eval_bleu": 95.1935, "eval_em": 0.1367, "eval_gen_len": 58.6787, "eval_loss": 0.34899094700813293, "eval_rm": 0.3046, "eval_runtime": 117.1796, "eval_samples_per_second": 3.559, "eval_steps_per_second": 0.452, "step": 9000 }, { "epoch": 243.59, "learning_rate": 1.1360824742268042e-05, "loss": 0.0502, "step": 9500 }, { "epoch": 243.59, "eval_bleu": 95.1882, "eval_em": 0.1319, "eval_gen_len": 58.5228, "eval_loss": 0.3490062654018402, "eval_rm": 0.3141, "eval_runtime": 118.559, "eval_samples_per_second": 3.517, "eval_steps_per_second": 0.447, "step": 9500 }, { "epoch": 256.41, "learning_rate": 8.783505154639175e-06, "loss": 0.0473, "step": 10000 }, { "epoch": 256.41, "eval_bleu": 95.1198, "eval_em": 0.1319, "eval_gen_len": 58.4245, "eval_loss": 0.3504057824611664, "eval_rm": 0.307, "eval_runtime": 118.462, "eval_samples_per_second": 3.52, "eval_steps_per_second": 0.447, "step": 10000 }, { "epoch": 269.23, "learning_rate": 6.206185567010309e-06, "loss": 0.045, "step": 10500 }, { "epoch": 269.23, "eval_bleu": 95.047, "eval_em": 0.1343, "eval_gen_len": 58.3213, "eval_loss": 0.35046613216400146, "eval_rm": 0.307, "eval_runtime": 118.1147, "eval_samples_per_second": 3.53, "eval_steps_per_second": 0.449, "step": 10500 }, { "epoch": 282.05, "learning_rate": 3.6288659793814435e-06, "loss": 0.0429, "step": 11000 }, { "epoch": 282.05, "eval_bleu": 95.2397, "eval_em": 0.1391, "eval_gen_len": 58.7242, "eval_loss": 0.3522409200668335, "eval_rm": 0.3046, "eval_runtime": 119.4326, "eval_samples_per_second": 3.492, "eval_steps_per_second": 0.444, "step": 11000 }, { "epoch": 294.87, "learning_rate": 1.0515463917525774e-06, "loss": 0.0416, "step": 11500 }, { "epoch": 294.87, "eval_bleu": 95.2821, "eval_em": 0.1415, "eval_gen_len": 58.7746, "eval_loss": 0.3522770404815674, "eval_rm": 0.3046, "eval_runtime": 119.2922, "eval_samples_per_second": 3.496, "eval_steps_per_second": 0.444, "step": 11500 }, { "epoch": 300.0, "step": 11700, "total_flos": 9455798374608960.0, "train_loss": 0.006238417584671934, "train_runtime": 1626.7385, "train_samples_per_second": 226.097, "train_steps_per_second": 7.192 } ], "max_steps": 11700, "num_train_epochs": 300, "total_flos": 9455798374608960.0, "trial_name": null, "trial_params": null }