{ "best_metric": 2.025848865509033, "best_model_checkpoint": "./checkpoints/barthez-deft-sciences_de_l_information/checkpoint-424", "epoch": 20.0, "global_step": 2120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.94, "learning_rate": 2.869811320754717e-05, "loss": 3.3405, "step": 100 }, { "epoch": 1.0, "eval_gen_len": 14.9714, "eval_loss": 2.368244171142578, "eval_rouge1": 31.3511, "eval_rouge2": 12.1973, "eval_rougeL": 25.6977, "eval_rougeLsum": 25.6851, "eval_runtime": 3.4096, "eval_samples_per_second": 20.53, "eval_steps_per_second": 5.279, "step": 106 }, { "epoch": 1.89, "learning_rate": 2.728301886792453e-05, "loss": 2.4219, "step": 200 }, { "epoch": 2.0, "eval_gen_len": 14.0429, "eval_loss": 2.1890599727630615, "eval_rouge1": 30.1154, "eval_rouge2": 13.3459, "eval_rougeL": 25.4854, "eval_rougeLsum": 25.5403, "eval_runtime": 3.216, "eval_samples_per_second": 21.766, "eval_steps_per_second": 5.597, "step": 212 }, { "epoch": 2.83, "learning_rate": 2.586792452830189e-05, "loss": 2.0789, "step": 300 }, { "epoch": 3.0, "eval_gen_len": 15.2, "eval_loss": 2.0993804931640625, "eval_rouge1": 32.153, "eval_rouge2": 15.3865, "eval_rougeL": 26.1859, "eval_rougeLsum": 26.1672, "eval_runtime": 3.3607, "eval_samples_per_second": 20.829, "eval_steps_per_second": 5.356, "step": 318 }, { "epoch": 3.77, "learning_rate": 2.4452830188679248e-05, "loss": 1.869, "step": 400 }, { "epoch": 4.0, "eval_gen_len": 16.9857, "eval_loss": 2.025848865509033, "eval_rouge1": 34.5797, "eval_rouge2": 16.4194, "eval_rougeL": 27.6909, "eval_rougeLsum": 27.7201, "eval_runtime": 3.3358, "eval_samples_per_second": 20.985, "eval_steps_per_second": 5.396, "step": 424 }, { "epoch": 4.72, "learning_rate": 2.3037735849056604e-05, "loss": 1.6569, "step": 500 }, { "epoch": 5.0, "eval_gen_len": 15.2429, "eval_loss": 2.0417497158050537, "eval_rouge1": 34.3854, "eval_rouge2": 16.5237, "eval_rougeL": 28.7036, "eval_rougeLsum": 28.8258, "eval_runtime": 3.3091, "eval_samples_per_second": 21.154, "eval_steps_per_second": 5.44, "step": 530 }, { "epoch": 5.66, "learning_rate": 2.162264150943396e-05, "loss": 1.5414, "step": 600 }, { "epoch": 6.0, "eval_gen_len": 16.0143, "eval_loss": 2.050337791442871, "eval_rouge1": 33.1768, "eval_rouge2": 15.4851, "eval_rougeL": 27.2818, "eval_rougeLsum": 27.2884, "eval_runtime": 3.4107, "eval_samples_per_second": 20.524, "eval_steps_per_second": 5.278, "step": 636 }, { "epoch": 6.6, "learning_rate": 2.020754716981132e-05, "loss": 1.4461, "step": 700 }, { "epoch": 7.0, "eval_gen_len": 16.6857, "eval_loss": 2.029313802719116, "eval_rouge1": 35.4273, "eval_rouge2": 16.118, "eval_rougeL": 27.3622, "eval_rougeLsum": 27.393, "eval_runtime": 3.3635, "eval_samples_per_second": 20.812, "eval_steps_per_second": 5.352, "step": 742 }, { "epoch": 7.55, "learning_rate": 1.879245283018868e-05, "loss": 1.3435, "step": 800 }, { "epoch": 8.0, "eval_gen_len": 17.2, "eval_loss": 2.033561944961548, "eval_rouge1": 35.3471, "eval_rouge2": 15.9695, "eval_rougeL": 27.668, "eval_rougeLsum": 27.6749, "eval_runtime": 3.4311, "eval_samples_per_second": 20.402, "eval_steps_per_second": 5.246, "step": 848 }, { "epoch": 8.49, "learning_rate": 1.737735849056604e-05, "loss": 1.2624, "step": 900 }, { "epoch": 9.0, "eval_gen_len": 17.1857, "eval_loss": 2.077875852584839, "eval_rouge1": 35.9201, "eval_rouge2": 17.2547, "eval_rougeL": 27.409, "eval_rougeLsum": 27.3293, "eval_runtime": 3.4, "eval_samples_per_second": 20.588, "eval_steps_per_second": 5.294, "step": 954 }, { "epoch": 9.43, "learning_rate": 1.5962264150943395e-05, "loss": 1.1807, "step": 1000 }, { "epoch": 10.0, "eval_gen_len": 17.1286, "eval_loss": 2.130077600479126, "eval_rouge1": 35.7061, "eval_rouge2": 15.9138, "eval_rougeL": 27.3968, "eval_rougeLsum": 27.4716, "eval_runtime": 3.3605, "eval_samples_per_second": 20.83, "eval_steps_per_second": 5.356, "step": 1060 }, { "epoch": 10.38, "learning_rate": 1.4547169811320755e-05, "loss": 1.0972, "step": 1100 }, { "epoch": 11.0, "eval_gen_len": 17.1429, "eval_loss": 2.1725852489471436, "eval_rouge1": 34.3194, "eval_rouge2": 16.1313, "eval_rougeL": 27.0367, "eval_rougeLsum": 27.0737, "eval_runtime": 3.3905, "eval_samples_per_second": 20.646, "eval_steps_per_second": 5.309, "step": 1166 }, { "epoch": 11.32, "learning_rate": 1.3132075471698114e-05, "loss": 1.0224, "step": 1200 }, { "epoch": 12.0, "eval_gen_len": 16.6571, "eval_loss": 2.1703763008117676, "eval_rouge1": 34.9278, "eval_rouge2": 16.7958, "eval_rougeL": 27.8754, "eval_rougeLsum": 27.932, "eval_runtime": 3.3798, "eval_samples_per_second": 20.711, "eval_steps_per_second": 5.326, "step": 1272 }, { "epoch": 12.26, "learning_rate": 1.1716981132075474e-05, "loss": 1.0181, "step": 1300 }, { "epoch": 13.0, "eval_gen_len": 16.7571, "eval_loss": 2.245802879333496, "eval_rouge1": 34.472, "eval_rouge2": 15.9111, "eval_rougeL": 28.2938, "eval_rougeLsum": 28.2946, "eval_runtime": 3.367, "eval_samples_per_second": 20.79, "eval_steps_per_second": 5.346, "step": 1378 }, { "epoch": 13.21, "learning_rate": 1.030188679245283e-05, "loss": 0.9769, "step": 1400 }, { "epoch": 14.0, "eval_gen_len": 16.5429, "eval_loss": 2.3404934406280518, "eval_rouge1": 35.1592, "eval_rouge2": 16.3135, "eval_rougeL": 29.0956, "eval_rougeLsum": 29.0858, "eval_runtime": 3.3904, "eval_samples_per_second": 20.647, "eval_steps_per_second": 5.309, "step": 1484 }, { "epoch": 14.15, "learning_rate": 8.900943396226416e-06, "loss": 0.8866, "step": 1500 }, { "epoch": 15.0, "eval_gen_len": 16.2429, "eval_loss": 2.3303470611572266, "eval_rouge1": 34.8732, "eval_rouge2": 15.6709, "eval_rougeL": 27.5858, "eval_rougeLsum": 27.6169, "eval_runtime": 3.4313, "eval_samples_per_second": 20.401, "eval_steps_per_second": 5.246, "step": 1590 }, { "epoch": 15.09, "learning_rate": 7.485849056603774e-06, "loss": 0.8888, "step": 1600 }, { "epoch": 16.0, "eval_gen_len": 17.5143, "eval_loss": 2.297647476196289, "eval_rouge1": 35.3034, "eval_rouge2": 16.8011, "eval_rougeL": 27.7988, "eval_rougeLsum": 27.7569, "eval_runtime": 3.3934, "eval_samples_per_second": 20.628, "eval_steps_per_second": 5.304, "step": 1696 }, { "epoch": 16.04, "learning_rate": 6.070754716981133e-06, "loss": 0.8194, "step": 1700 }, { "epoch": 16.98, "learning_rate": 4.6556603773584905e-06, "loss": 0.8358, "step": 1800 }, { "epoch": 17.0, "eval_gen_len": 16.8143, "eval_loss": 2.334933042526245, "eval_rouge1": 35.505, "eval_rouge2": 16.8851, "eval_rougeL": 28.3651, "eval_rougeLsum": 28.413, "eval_runtime": 3.4202, "eval_samples_per_second": 20.467, "eval_steps_per_second": 5.263, "step": 1802 }, { "epoch": 17.92, "learning_rate": 3.240566037735849e-06, "loss": 0.8026, "step": 1900 }, { "epoch": 18.0, "eval_gen_len": 16.6143, "eval_loss": 2.373809814453125, "eval_rouge1": 35.2328, "eval_rouge2": 17.0358, "eval_rougeL": 28.544, "eval_rougeLsum": 28.6211, "eval_runtime": 3.4477, "eval_samples_per_second": 20.303, "eval_steps_per_second": 5.221, "step": 1908 }, { "epoch": 18.87, "learning_rate": 1.8254716981132076e-06, "loss": 0.7487, "step": 2000 }, { "epoch": 19.0, "eval_gen_len": 16.7286, "eval_loss": 2.4102871417999268, "eval_rouge1": 34.0793, "eval_rouge2": 15.4468, "eval_rougeL": 27.8057, "eval_rougeLsum": 27.8586, "eval_runtime": 3.4317, "eval_samples_per_second": 20.398, "eval_steps_per_second": 5.245, "step": 2014 }, { "epoch": 19.81, "learning_rate": 4.1037735849056606e-07, "loss": 0.7722, "step": 2100 }, { "epoch": 20.0, "eval_gen_len": 16.9286, "eval_loss": 2.3990561962127686, "eval_rouge1": 34.8116, "eval_rouge2": 15.8706, "eval_rougeL": 27.9173, "eval_rougeLsum": 27.983, "eval_runtime": 3.3605, "eval_samples_per_second": 20.83, "eval_steps_per_second": 5.356, "step": 2120 }, { "epoch": 20.0, "step": 2120, "total_flos": 1197078338174976.0, "train_loss": 1.328409050995449, "train_runtime": 326.8588, "train_samples_per_second": 25.944, "train_steps_per_second": 6.486 } ], "max_steps": 2120, "num_train_epochs": 20, "total_flos": 1197078338174976.0, "trial_name": null, "trial_params": null }