{ "best_metric": 1.7596173286437988, "best_model_checkpoint": "./checkpoints/barthez-deft-linguistique/checkpoint-756", "epoch": 20.0, "global_step": 2160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.93, "learning_rate": 2.8722222222222222e-05, "loss": 3.0569, "step": 100 }, { "epoch": 1.0, "eval_gen_len": 18.3803, "eval_loss": 2.0281970500946045, "eval_rouge1": 31.6993, "eval_rouge2": 14.9483, "eval_rougeL": 25.5565, "eval_rougeLsum": 25.4379, "eval_runtime": 3.4728, "eval_samples_per_second": 20.444, "eval_steps_per_second": 5.183, "step": 108 }, { "epoch": 1.85, "learning_rate": 2.7333333333333335e-05, "loss": 2.2892, "step": 200 }, { "epoch": 2.0, "eval_gen_len": 18.507, "eval_loss": 1.8552746772766113, "eval_rouge1": 35.2563, "eval_rouge2": 18.019, "eval_rougeL": 28.3135, "eval_rougeLsum": 28.2927, "eval_runtime": 3.4017, "eval_samples_per_second": 20.872, "eval_steps_per_second": 5.291, "step": 216 }, { "epoch": 2.78, "learning_rate": 2.5944444444444444e-05, "loss": 1.9062, "step": 300 }, { "epoch": 3.0, "eval_gen_len": 19.5352, "eval_loss": 1.7696171998977661, "eval_rouge1": 37.4613, "eval_rouge2": 18.1488, "eval_rougeL": 28.9959, "eval_rougeLsum": 29.0134, "eval_runtime": 3.4439, "eval_samples_per_second": 20.616, "eval_steps_per_second": 5.227, "step": 324 }, { "epoch": 3.7, "learning_rate": 2.4555555555555557e-05, "loss": 1.716, "step": 400 }, { "epoch": 4.0, "eval_gen_len": 18.9577, "eval_loss": 1.7640784978866577, "eval_rouge1": 37.6903, "eval_rouge2": 18.7496, "eval_rougeL": 30.1097, "eval_rougeLsum": 30.1027, "eval_runtime": 3.4032, "eval_samples_per_second": 20.863, "eval_steps_per_second": 5.289, "step": 432 }, { "epoch": 4.63, "learning_rate": 2.316666666666667e-05, "loss": 1.5722, "step": 500 }, { "epoch": 5.0, "eval_gen_len": 19.169, "eval_loss": 1.7780805826187134, "eval_rouge1": 38.1013, "eval_rouge2": 19.8291, "eval_rougeL": 29.8142, "eval_rougeLsum": 29.802, "eval_runtime": 3.5765, "eval_samples_per_second": 19.852, "eval_steps_per_second": 5.033, "step": 540 }, { "epoch": 5.56, "learning_rate": 2.177777777777778e-05, "loss": 1.4655, "step": 600 }, { "epoch": 6.0, "eval_gen_len": 19.3662, "eval_loss": 1.7661303281784058, "eval_rouge1": 38.3557, "eval_rouge2": 20.3309, "eval_rougeL": 30.5068, "eval_rougeLsum": 30.4728, "eval_runtime": 3.4442, "eval_samples_per_second": 20.614, "eval_steps_per_second": 5.226, "step": 648 }, { "epoch": 6.48, "learning_rate": 2.038888888888889e-05, "loss": 1.3507, "step": 700 }, { "epoch": 7.0, "eval_gen_len": 19.3944, "eval_loss": 1.7596173286437988, "eval_rouge1": 39.7409, "eval_rouge2": 20.2998, "eval_rougeL": 31.0849, "eval_rougeLsum": 31.1152, "eval_runtime": 3.408, "eval_samples_per_second": 20.833, "eval_steps_per_second": 5.282, "step": 756 }, { "epoch": 7.41, "learning_rate": 1.9e-05, "loss": 1.2874, "step": 800 }, { "epoch": 8.0, "eval_gen_len": 19.4789, "eval_loss": 1.77056884765625, "eval_rouge1": 37.7846, "eval_rouge2": 20.3457, "eval_rougeL": 30.6826, "eval_rougeLsum": 30.6321, "eval_runtime": 3.4054, "eval_samples_per_second": 20.849, "eval_steps_per_second": 5.286, "step": 864 }, { "epoch": 8.33, "learning_rate": 1.7652777777777777e-05, "loss": 1.2641, "step": 900 }, { "epoch": 9.0, "eval_gen_len": 19.3944, "eval_loss": 1.784759521484375, "eval_rouge1": 38.7421, "eval_rouge2": 19.5701, "eval_rougeL": 30.5798, "eval_rougeLsum": 30.6305, "eval_runtime": 3.4074, "eval_samples_per_second": 20.837, "eval_steps_per_second": 5.283, "step": 972 }, { "epoch": 9.26, "learning_rate": 1.626388888888889e-05, "loss": 1.1192, "step": 1000 }, { "epoch": 10.0, "eval_gen_len": 19.5493, "eval_loss": 1.8008346557617188, "eval_rouge1": 40.3313, "eval_rouge2": 20.3378, "eval_rougeL": 31.8325, "eval_rougeLsum": 31.8648, "eval_runtime": 3.4325, "eval_samples_per_second": 20.684, "eval_steps_per_second": 5.244, "step": 1080 }, { "epoch": 10.19, "learning_rate": 1.4875e-05, "loss": 1.0724, "step": 1100 }, { "epoch": 11.0, "eval_gen_len": 19.8592, "eval_loss": 1.8450435400009155, "eval_rouge1": 38.9612, "eval_rouge2": 20.5719, "eval_rougeL": 31.4496, "eval_rougeLsum": 31.3144, "eval_runtime": 3.4687, "eval_samples_per_second": 20.469, "eval_steps_per_second": 5.189, "step": 1188 }, { "epoch": 11.11, "learning_rate": 1.348611111111111e-05, "loss": 1.0077, "step": 1200 }, { "epoch": 12.0, "eval_gen_len": 19.7324, "eval_loss": 1.8364313840866089, "eval_rouge1": 36.5997, "eval_rouge2": 18.46, "eval_rougeL": 29.1808, "eval_rougeLsum": 29.1705, "eval_runtime": 3.4932, "eval_samples_per_second": 20.325, "eval_steps_per_second": 5.153, "step": 1296 }, { "epoch": 12.04, "learning_rate": 1.2097222222222223e-05, "loss": 0.9743, "step": 1300 }, { "epoch": 12.96, "learning_rate": 1.0708333333333334e-05, "loss": 0.9362, "step": 1400 }, { "epoch": 13.0, "eval_gen_len": 19.6338, "eval_loss": 1.867732286453247, "eval_rouge1": 38.0371, "eval_rouge2": 19.2321, "eval_rougeL": 30.3893, "eval_rougeLsum": 30.3926, "eval_runtime": 3.4487, "eval_samples_per_second": 20.588, "eval_steps_per_second": 5.219, "step": 1404 }, { "epoch": 13.89, "learning_rate": 9.319444444444445e-06, "loss": 0.8868, "step": 1500 }, { "epoch": 14.0, "eval_gen_len": 19.6479, "eval_loss": 1.9153633117675781, "eval_rouge1": 36.4737, "eval_rouge2": 18.5314, "eval_rougeL": 29.325, "eval_rougeLsum": 29.3634, "eval_runtime": 3.4736, "eval_samples_per_second": 20.44, "eval_steps_per_second": 5.182, "step": 1512 }, { "epoch": 14.81, "learning_rate": 7.930555555555556e-06, "loss": 0.8335, "step": 1600 }, { "epoch": 15.0, "eval_gen_len": 19.8028, "eval_loss": 1.93436598777771, "eval_rouge1": 35.7583, "eval_rouge2": 18.0687, "eval_rougeL": 27.9666, "eval_rougeLsum": 27.8675, "eval_runtime": 3.3929, "eval_samples_per_second": 20.926, "eval_steps_per_second": 5.305, "step": 1620 }, { "epoch": 15.74, "learning_rate": 6.541666666666667e-06, "loss": 0.8305, "step": 1700 }, { "epoch": 16.0, "eval_gen_len": 19.9577, "eval_loss": 1.9556257724761963, "eval_rouge1": 37.2137, "eval_rouge2": 18.2199, "eval_rougeL": 29.5959, "eval_rougeLsum": 29.5799, "eval_runtime": 3.4517, "eval_samples_per_second": 20.57, "eval_steps_per_second": 5.215, "step": 1728 }, { "epoch": 16.67, "learning_rate": 5.152777777777778e-06, "loss": 0.8057, "step": 1800 }, { "epoch": 17.0, "eval_gen_len": 19.7324, "eval_loss": 1.9793369770050049, "eval_rouge1": 36.6834, "eval_rouge2": 17.8505, "eval_rougeL": 28.6701, "eval_rougeLsum": 28.7145, "eval_runtime": 3.4482, "eval_samples_per_second": 20.59, "eval_steps_per_second": 5.22, "step": 1836 }, { "epoch": 17.59, "learning_rate": 3.763888888888889e-06, "loss": 0.7869, "step": 1900 }, { "epoch": 18.0, "eval_gen_len": 19.7606, "eval_loss": 1.9994447231292725, "eval_rouge1": 37.5918, "eval_rouge2": 19.1984, "eval_rougeL": 28.8569, "eval_rougeLsum": 28.8278, "eval_runtime": 3.4143, "eval_samples_per_second": 20.795, "eval_steps_per_second": 5.272, "step": 1944 }, { "epoch": 18.52, "learning_rate": 2.375e-06, "loss": 0.7549, "step": 2000 }, { "epoch": 19.0, "eval_gen_len": 19.8028, "eval_loss": 2.011744737625122, "eval_rouge1": 37.3278, "eval_rouge2": 18.5169, "eval_rougeL": 28.778, "eval_rougeLsum": 28.7737, "eval_runtime": 3.4309, "eval_samples_per_second": 20.694, "eval_steps_per_second": 5.246, "step": 2052 }, { "epoch": 19.44, "learning_rate": 9.861111111111112e-07, "loss": 0.7497, "step": 2100 }, { "epoch": 20.0, "eval_gen_len": 19.6901, "eval_loss": 2.018871784210205, "eval_rouge1": 37.7513, "eval_rouge2": 19.1813, "eval_rougeL": 29.3675, "eval_rougeLsum": 29.402, "eval_runtime": 3.4324, "eval_samples_per_second": 20.685, "eval_steps_per_second": 5.244, "step": 2160 }, { "epoch": 20.0, "step": 2160, "total_flos": 1555682356666368.0, "train_loss": 1.2554297270598236, "train_runtime": 336.8512, "train_samples_per_second": 25.471, "train_steps_per_second": 6.412 } ], "max_steps": 2160, "num_train_epochs": 20, "total_flos": 1555682356666368.0, "trial_name": null, "trial_params": null }