José Ángel González
add model
b0ec8dc
{
"best_metric": 2.025848865509033,
"best_model_checkpoint": "./checkpoints/barthez-deft-sciences_de_l_information/checkpoint-424",
"epoch": 20.0,
"global_step": 2120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.94,
"learning_rate": 2.869811320754717e-05,
"loss": 3.3405,
"step": 100
},
{
"epoch": 1.0,
"eval_gen_len": 14.9714,
"eval_loss": 2.368244171142578,
"eval_rouge1": 31.3511,
"eval_rouge2": 12.1973,
"eval_rougeL": 25.6977,
"eval_rougeLsum": 25.6851,
"eval_runtime": 3.4096,
"eval_samples_per_second": 20.53,
"eval_steps_per_second": 5.279,
"step": 106
},
{
"epoch": 1.89,
"learning_rate": 2.728301886792453e-05,
"loss": 2.4219,
"step": 200
},
{
"epoch": 2.0,
"eval_gen_len": 14.0429,
"eval_loss": 2.1890599727630615,
"eval_rouge1": 30.1154,
"eval_rouge2": 13.3459,
"eval_rougeL": 25.4854,
"eval_rougeLsum": 25.5403,
"eval_runtime": 3.216,
"eval_samples_per_second": 21.766,
"eval_steps_per_second": 5.597,
"step": 212
},
{
"epoch": 2.83,
"learning_rate": 2.586792452830189e-05,
"loss": 2.0789,
"step": 300
},
{
"epoch": 3.0,
"eval_gen_len": 15.2,
"eval_loss": 2.0993804931640625,
"eval_rouge1": 32.153,
"eval_rouge2": 15.3865,
"eval_rougeL": 26.1859,
"eval_rougeLsum": 26.1672,
"eval_runtime": 3.3607,
"eval_samples_per_second": 20.829,
"eval_steps_per_second": 5.356,
"step": 318
},
{
"epoch": 3.77,
"learning_rate": 2.4452830188679248e-05,
"loss": 1.869,
"step": 400
},
{
"epoch": 4.0,
"eval_gen_len": 16.9857,
"eval_loss": 2.025848865509033,
"eval_rouge1": 34.5797,
"eval_rouge2": 16.4194,
"eval_rougeL": 27.6909,
"eval_rougeLsum": 27.7201,
"eval_runtime": 3.3358,
"eval_samples_per_second": 20.985,
"eval_steps_per_second": 5.396,
"step": 424
},
{
"epoch": 4.72,
"learning_rate": 2.3037735849056604e-05,
"loss": 1.6569,
"step": 500
},
{
"epoch": 5.0,
"eval_gen_len": 15.2429,
"eval_loss": 2.0417497158050537,
"eval_rouge1": 34.3854,
"eval_rouge2": 16.5237,
"eval_rougeL": 28.7036,
"eval_rougeLsum": 28.8258,
"eval_runtime": 3.3091,
"eval_samples_per_second": 21.154,
"eval_steps_per_second": 5.44,
"step": 530
},
{
"epoch": 5.66,
"learning_rate": 2.162264150943396e-05,
"loss": 1.5414,
"step": 600
},
{
"epoch": 6.0,
"eval_gen_len": 16.0143,
"eval_loss": 2.050337791442871,
"eval_rouge1": 33.1768,
"eval_rouge2": 15.4851,
"eval_rougeL": 27.2818,
"eval_rougeLsum": 27.2884,
"eval_runtime": 3.4107,
"eval_samples_per_second": 20.524,
"eval_steps_per_second": 5.278,
"step": 636
},
{
"epoch": 6.6,
"learning_rate": 2.020754716981132e-05,
"loss": 1.4461,
"step": 700
},
{
"epoch": 7.0,
"eval_gen_len": 16.6857,
"eval_loss": 2.029313802719116,
"eval_rouge1": 35.4273,
"eval_rouge2": 16.118,
"eval_rougeL": 27.3622,
"eval_rougeLsum": 27.393,
"eval_runtime": 3.3635,
"eval_samples_per_second": 20.812,
"eval_steps_per_second": 5.352,
"step": 742
},
{
"epoch": 7.55,
"learning_rate": 1.879245283018868e-05,
"loss": 1.3435,
"step": 800
},
{
"epoch": 8.0,
"eval_gen_len": 17.2,
"eval_loss": 2.033561944961548,
"eval_rouge1": 35.3471,
"eval_rouge2": 15.9695,
"eval_rougeL": 27.668,
"eval_rougeLsum": 27.6749,
"eval_runtime": 3.4311,
"eval_samples_per_second": 20.402,
"eval_steps_per_second": 5.246,
"step": 848
},
{
"epoch": 8.49,
"learning_rate": 1.737735849056604e-05,
"loss": 1.2624,
"step": 900
},
{
"epoch": 9.0,
"eval_gen_len": 17.1857,
"eval_loss": 2.077875852584839,
"eval_rouge1": 35.9201,
"eval_rouge2": 17.2547,
"eval_rougeL": 27.409,
"eval_rougeLsum": 27.3293,
"eval_runtime": 3.4,
"eval_samples_per_second": 20.588,
"eval_steps_per_second": 5.294,
"step": 954
},
{
"epoch": 9.43,
"learning_rate": 1.5962264150943395e-05,
"loss": 1.1807,
"step": 1000
},
{
"epoch": 10.0,
"eval_gen_len": 17.1286,
"eval_loss": 2.130077600479126,
"eval_rouge1": 35.7061,
"eval_rouge2": 15.9138,
"eval_rougeL": 27.3968,
"eval_rougeLsum": 27.4716,
"eval_runtime": 3.3605,
"eval_samples_per_second": 20.83,
"eval_steps_per_second": 5.356,
"step": 1060
},
{
"epoch": 10.38,
"learning_rate": 1.4547169811320755e-05,
"loss": 1.0972,
"step": 1100
},
{
"epoch": 11.0,
"eval_gen_len": 17.1429,
"eval_loss": 2.1725852489471436,
"eval_rouge1": 34.3194,
"eval_rouge2": 16.1313,
"eval_rougeL": 27.0367,
"eval_rougeLsum": 27.0737,
"eval_runtime": 3.3905,
"eval_samples_per_second": 20.646,
"eval_steps_per_second": 5.309,
"step": 1166
},
{
"epoch": 11.32,
"learning_rate": 1.3132075471698114e-05,
"loss": 1.0224,
"step": 1200
},
{
"epoch": 12.0,
"eval_gen_len": 16.6571,
"eval_loss": 2.1703763008117676,
"eval_rouge1": 34.9278,
"eval_rouge2": 16.7958,
"eval_rougeL": 27.8754,
"eval_rougeLsum": 27.932,
"eval_runtime": 3.3798,
"eval_samples_per_second": 20.711,
"eval_steps_per_second": 5.326,
"step": 1272
},
{
"epoch": 12.26,
"learning_rate": 1.1716981132075474e-05,
"loss": 1.0181,
"step": 1300
},
{
"epoch": 13.0,
"eval_gen_len": 16.7571,
"eval_loss": 2.245802879333496,
"eval_rouge1": 34.472,
"eval_rouge2": 15.9111,
"eval_rougeL": 28.2938,
"eval_rougeLsum": 28.2946,
"eval_runtime": 3.367,
"eval_samples_per_second": 20.79,
"eval_steps_per_second": 5.346,
"step": 1378
},
{
"epoch": 13.21,
"learning_rate": 1.030188679245283e-05,
"loss": 0.9769,
"step": 1400
},
{
"epoch": 14.0,
"eval_gen_len": 16.5429,
"eval_loss": 2.3404934406280518,
"eval_rouge1": 35.1592,
"eval_rouge2": 16.3135,
"eval_rougeL": 29.0956,
"eval_rougeLsum": 29.0858,
"eval_runtime": 3.3904,
"eval_samples_per_second": 20.647,
"eval_steps_per_second": 5.309,
"step": 1484
},
{
"epoch": 14.15,
"learning_rate": 8.900943396226416e-06,
"loss": 0.8866,
"step": 1500
},
{
"epoch": 15.0,
"eval_gen_len": 16.2429,
"eval_loss": 2.3303470611572266,
"eval_rouge1": 34.8732,
"eval_rouge2": 15.6709,
"eval_rougeL": 27.5858,
"eval_rougeLsum": 27.6169,
"eval_runtime": 3.4313,
"eval_samples_per_second": 20.401,
"eval_steps_per_second": 5.246,
"step": 1590
},
{
"epoch": 15.09,
"learning_rate": 7.485849056603774e-06,
"loss": 0.8888,
"step": 1600
},
{
"epoch": 16.0,
"eval_gen_len": 17.5143,
"eval_loss": 2.297647476196289,
"eval_rouge1": 35.3034,
"eval_rouge2": 16.8011,
"eval_rougeL": 27.7988,
"eval_rougeLsum": 27.7569,
"eval_runtime": 3.3934,
"eval_samples_per_second": 20.628,
"eval_steps_per_second": 5.304,
"step": 1696
},
{
"epoch": 16.04,
"learning_rate": 6.070754716981133e-06,
"loss": 0.8194,
"step": 1700
},
{
"epoch": 16.98,
"learning_rate": 4.6556603773584905e-06,
"loss": 0.8358,
"step": 1800
},
{
"epoch": 17.0,
"eval_gen_len": 16.8143,
"eval_loss": 2.334933042526245,
"eval_rouge1": 35.505,
"eval_rouge2": 16.8851,
"eval_rougeL": 28.3651,
"eval_rougeLsum": 28.413,
"eval_runtime": 3.4202,
"eval_samples_per_second": 20.467,
"eval_steps_per_second": 5.263,
"step": 1802
},
{
"epoch": 17.92,
"learning_rate": 3.240566037735849e-06,
"loss": 0.8026,
"step": 1900
},
{
"epoch": 18.0,
"eval_gen_len": 16.6143,
"eval_loss": 2.373809814453125,
"eval_rouge1": 35.2328,
"eval_rouge2": 17.0358,
"eval_rougeL": 28.544,
"eval_rougeLsum": 28.6211,
"eval_runtime": 3.4477,
"eval_samples_per_second": 20.303,
"eval_steps_per_second": 5.221,
"step": 1908
},
{
"epoch": 18.87,
"learning_rate": 1.8254716981132076e-06,
"loss": 0.7487,
"step": 2000
},
{
"epoch": 19.0,
"eval_gen_len": 16.7286,
"eval_loss": 2.4102871417999268,
"eval_rouge1": 34.0793,
"eval_rouge2": 15.4468,
"eval_rougeL": 27.8057,
"eval_rougeLsum": 27.8586,
"eval_runtime": 3.4317,
"eval_samples_per_second": 20.398,
"eval_steps_per_second": 5.245,
"step": 2014
},
{
"epoch": 19.81,
"learning_rate": 4.1037735849056606e-07,
"loss": 0.7722,
"step": 2100
},
{
"epoch": 20.0,
"eval_gen_len": 16.9286,
"eval_loss": 2.3990561962127686,
"eval_rouge1": 34.8116,
"eval_rouge2": 15.8706,
"eval_rougeL": 27.9173,
"eval_rougeLsum": 27.983,
"eval_runtime": 3.3605,
"eval_samples_per_second": 20.83,
"eval_steps_per_second": 5.356,
"step": 2120
},
{
"epoch": 20.0,
"step": 2120,
"total_flos": 1197078338174976.0,
"train_loss": 1.328409050995449,
"train_runtime": 326.8588,
"train_samples_per_second": 25.944,
"train_steps_per_second": 6.486
}
],
"max_steps": 2120,
"num_train_epochs": 20,
"total_flos": 1197078338174976.0,
"trial_name": null,
"trial_params": null
}