daT5-summariser / trainer_state.json
sarakolding's picture
initial commit
9ab896f
raw
history blame
11.7 kB
{
"best_metric": 2.132361888885498,
"best_model_checkpoint": "./26-125356_megasuperkanin/checkpoint-100000",
"epoch": 0.9769822970807769,
"global_step": 100000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 5e-05,
"loss": 2.6761,
"step": 2500
},
{
"epoch": 0.05,
"learning_rate": 5e-05,
"loss": 2.551,
"step": 5000
},
{
"epoch": 0.05,
"eval_gen_len": 28.4674,
"eval_loss": 2.423037052154541,
"eval_rouge1": 0.214,
"eval_rouge2": 0.0668,
"eval_rougeL": 0.1717,
"eval_rougeLsum": 0.1777,
"eval_runtime": 1015.6418,
"eval_samples_per_second": 2.265,
"eval_steps_per_second": 0.284,
"step": 5000
},
{
"epoch": 0.07,
"learning_rate": 5e-05,
"loss": 2.5186,
"step": 7500
},
{
"epoch": 0.1,
"learning_rate": 5e-05,
"loss": 2.4717,
"step": 10000
},
{
"epoch": 0.1,
"eval_gen_len": 25.6604,
"eval_loss": 2.3709843158721924,
"eval_rouge1": 0.2071,
"eval_rouge2": 0.0634,
"eval_rougeL": 0.1686,
"eval_rougeLsum": 0.1745,
"eval_runtime": 951.1096,
"eval_samples_per_second": 2.418,
"eval_steps_per_second": 0.303,
"step": 10000
},
{
"epoch": 0.12,
"learning_rate": 5e-05,
"loss": 2.4593,
"step": 12500
},
{
"epoch": 0.15,
"learning_rate": 5e-05,
"loss": 2.4281,
"step": 15000
},
{
"epoch": 0.15,
"eval_gen_len": 28.8296,
"eval_loss": 2.3228819370269775,
"eval_rouge1": 0.2137,
"eval_rouge2": 0.0662,
"eval_rougeL": 0.1711,
"eval_rougeLsum": 0.1768,
"eval_runtime": 1022.9494,
"eval_samples_per_second": 2.248,
"eval_steps_per_second": 0.282,
"step": 15000
},
{
"epoch": 0.17,
"learning_rate": 5e-05,
"loss": 2.4049,
"step": 17500
},
{
"epoch": 0.2,
"learning_rate": 5e-05,
"loss": 2.3735,
"step": 20000
},
{
"epoch": 0.2,
"eval_gen_len": 29.9183,
"eval_loss": 2.2881429195404053,
"eval_rouge1": 0.2164,
"eval_rouge2": 0.0668,
"eval_rougeL": 0.1735,
"eval_rougeLsum": 0.1808,
"eval_runtime": 1036.2984,
"eval_samples_per_second": 2.219,
"eval_steps_per_second": 0.278,
"step": 20000
},
{
"epoch": 0.22,
"learning_rate": 5e-05,
"loss": 2.3732,
"step": 22500
},
{
"epoch": 0.24,
"learning_rate": 5e-05,
"loss": 2.377,
"step": 25000
},
{
"epoch": 0.24,
"eval_gen_len": 29.5183,
"eval_loss": 2.2759358882904053,
"eval_rouge1": 0.2209,
"eval_rouge2": 0.0694,
"eval_rougeL": 0.1782,
"eval_rougeLsum": 0.1851,
"eval_runtime": 1036.1071,
"eval_samples_per_second": 2.22,
"eval_steps_per_second": 0.278,
"step": 25000
},
{
"epoch": 0.27,
"learning_rate": 5e-05,
"loss": 2.3513,
"step": 27500
},
{
"epoch": 0.29,
"learning_rate": 5e-05,
"loss": 2.3444,
"step": 30000
},
{
"epoch": 0.29,
"eval_gen_len": 29.3183,
"eval_loss": 2.2552034854888916,
"eval_rouge1": 0.2194,
"eval_rouge2": 0.0679,
"eval_rougeL": 0.1757,
"eval_rougeLsum": 0.1829,
"eval_runtime": 1037.4604,
"eval_samples_per_second": 2.217,
"eval_steps_per_second": 0.278,
"step": 30000
},
{
"epoch": 0.32,
"learning_rate": 5e-05,
"loss": 2.3504,
"step": 32500
},
{
"epoch": 0.34,
"learning_rate": 5e-05,
"loss": 2.3203,
"step": 35000
},
{
"epoch": 0.34,
"eval_gen_len": 32.2061,
"eval_loss": 2.235518455505371,
"eval_rouge1": 0.2284,
"eval_rouge2": 0.0722,
"eval_rougeL": 0.1819,
"eval_rougeLsum": 0.1892,
"eval_runtime": 1121.1561,
"eval_samples_per_second": 2.051,
"eval_steps_per_second": 0.257,
"step": 35000
},
{
"epoch": 0.37,
"learning_rate": 5e-05,
"loss": 2.3087,
"step": 37500
},
{
"epoch": 0.39,
"learning_rate": 5e-05,
"loss": 2.3132,
"step": 40000
},
{
"epoch": 0.39,
"eval_gen_len": 29.5452,
"eval_loss": 2.2289836406707764,
"eval_rouge1": 0.2183,
"eval_rouge2": 0.0673,
"eval_rougeL": 0.1759,
"eval_rougeLsum": 0.1827,
"eval_runtime": 1055.2895,
"eval_samples_per_second": 2.179,
"eval_steps_per_second": 0.273,
"step": 40000
},
{
"epoch": 0.42,
"learning_rate": 5e-05,
"loss": 2.3063,
"step": 42500
},
{
"epoch": 0.44,
"learning_rate": 5e-05,
"loss": 2.3116,
"step": 45000
},
{
"epoch": 0.44,
"eval_gen_len": 30.2935,
"eval_loss": 2.218207359313965,
"eval_rouge1": 0.2239,
"eval_rouge2": 0.07,
"eval_rougeL": 0.1798,
"eval_rougeLsum": 0.1879,
"eval_runtime": 1063.5185,
"eval_samples_per_second": 2.163,
"eval_steps_per_second": 0.271,
"step": 45000
},
{
"epoch": 0.46,
"learning_rate": 5e-05,
"loss": 2.3014,
"step": 47500
},
{
"epoch": 0.49,
"learning_rate": 5e-05,
"loss": 2.2852,
"step": 50000
},
{
"epoch": 0.49,
"eval_gen_len": 28.6443,
"eval_loss": 2.2090706825256348,
"eval_rouge1": 0.2251,
"eval_rouge2": 0.0703,
"eval_rougeL": 0.1812,
"eval_rougeLsum": 0.1887,
"eval_runtime": 1045.7282,
"eval_samples_per_second": 2.199,
"eval_steps_per_second": 0.275,
"step": 50000
},
{
"epoch": 0.51,
"learning_rate": 5e-05,
"loss": 2.2963,
"step": 52500
},
{
"epoch": 0.54,
"learning_rate": 5e-05,
"loss": 2.2683,
"step": 55000
},
{
"epoch": 0.54,
"eval_gen_len": 29.9661,
"eval_loss": 2.1879115104675293,
"eval_rouge1": 0.2257,
"eval_rouge2": 0.0716,
"eval_rougeL": 0.1806,
"eval_rougeLsum": 0.1876,
"eval_runtime": 1061.3075,
"eval_samples_per_second": 2.167,
"eval_steps_per_second": 0.271,
"step": 55000
},
{
"epoch": 0.56,
"learning_rate": 5e-05,
"loss": 2.2735,
"step": 57500
},
{
"epoch": 0.59,
"learning_rate": 5e-05,
"loss": 2.2614,
"step": 60000
},
{
"epoch": 0.59,
"eval_gen_len": 30.4435,
"eval_loss": 2.1871089935302734,
"eval_rouge1": 0.2316,
"eval_rouge2": 0.075,
"eval_rougeL": 0.1863,
"eval_rougeLsum": 0.1936,
"eval_runtime": 1083.7377,
"eval_samples_per_second": 2.122,
"eval_steps_per_second": 0.266,
"step": 60000
},
{
"epoch": 0.61,
"learning_rate": 5e-05,
"loss": 2.2735,
"step": 62500
},
{
"epoch": 0.64,
"learning_rate": 5e-05,
"loss": 2.252,
"step": 65000
},
{
"epoch": 0.64,
"eval_gen_len": 30.6239,
"eval_loss": 2.175469160079956,
"eval_rouge1": 0.226,
"eval_rouge2": 0.0729,
"eval_rougeL": 0.1834,
"eval_rougeLsum": 0.1914,
"eval_runtime": 1080.4009,
"eval_samples_per_second": 2.129,
"eval_steps_per_second": 0.267,
"step": 65000
},
{
"epoch": 0.66,
"learning_rate": 5e-05,
"loss": 2.2509,
"step": 67500
},
{
"epoch": 0.68,
"learning_rate": 5e-05,
"loss": 2.262,
"step": 70000
},
{
"epoch": 0.68,
"eval_gen_len": 30.9983,
"eval_loss": 2.16789174079895,
"eval_rouge1": 0.2256,
"eval_rouge2": 0.0716,
"eval_rougeL": 0.1815,
"eval_rougeLsum": 0.1889,
"eval_runtime": 1104.0224,
"eval_samples_per_second": 2.083,
"eval_steps_per_second": 0.261,
"step": 70000
},
{
"epoch": 0.71,
"learning_rate": 5e-05,
"loss": 2.2398,
"step": 72500
},
{
"epoch": 0.73,
"learning_rate": 5e-05,
"loss": 2.228,
"step": 75000
},
{
"epoch": 0.73,
"eval_gen_len": 29.9704,
"eval_loss": 2.1669178009033203,
"eval_rouge1": 0.2253,
"eval_rouge2": 0.0725,
"eval_rougeL": 0.1822,
"eval_rougeLsum": 0.1894,
"eval_runtime": 1052.7669,
"eval_samples_per_second": 2.185,
"eval_steps_per_second": 0.274,
"step": 75000
},
{
"epoch": 0.76,
"learning_rate": 5e-05,
"loss": 2.25,
"step": 77500
},
{
"epoch": 0.78,
"learning_rate": 5e-05,
"loss": 2.234,
"step": 80000
},
{
"epoch": 0.78,
"eval_gen_len": 29.4826,
"eval_loss": 2.1604671478271484,
"eval_rouge1": 0.2283,
"eval_rouge2": 0.0747,
"eval_rougeL": 0.1855,
"eval_rougeLsum": 0.1937,
"eval_runtime": 1075.8159,
"eval_samples_per_second": 2.138,
"eval_steps_per_second": 0.268,
"step": 80000
},
{
"epoch": 0.81,
"learning_rate": 5e-05,
"loss": 2.236,
"step": 82500
},
{
"epoch": 0.83,
"learning_rate": 5e-05,
"loss": 2.2289,
"step": 85000
},
{
"epoch": 0.83,
"eval_gen_len": 30.0213,
"eval_loss": 2.1517326831817627,
"eval_rouge1": 0.2226,
"eval_rouge2": 0.0705,
"eval_rougeL": 0.1801,
"eval_rougeLsum": 0.1873,
"eval_runtime": 1072.8178,
"eval_samples_per_second": 2.144,
"eval_steps_per_second": 0.268,
"step": 85000
},
{
"epoch": 0.85,
"learning_rate": 5e-05,
"loss": 2.2214,
"step": 87500
},
{
"epoch": 0.88,
"learning_rate": 5e-05,
"loss": 2.2043,
"step": 90000
},
{
"epoch": 0.88,
"eval_gen_len": 29.5361,
"eval_loss": 2.1455490589141846,
"eval_rouge1": 0.2265,
"eval_rouge2": 0.075,
"eval_rougeL": 0.1838,
"eval_rougeLsum": 0.1908,
"eval_runtime": 1058.731,
"eval_samples_per_second": 2.172,
"eval_steps_per_second": 0.272,
"step": 90000
},
{
"epoch": 0.9,
"learning_rate": 5e-05,
"loss": 2.2419,
"step": 92500
},
{
"epoch": 0.93,
"learning_rate": 5e-05,
"loss": 2.2259,
"step": 95000
},
{
"epoch": 0.93,
"eval_gen_len": 29.6874,
"eval_loss": 2.1389129161834717,
"eval_rouge1": 0.2287,
"eval_rouge2": 0.0713,
"eval_rougeL": 0.1844,
"eval_rougeLsum": 0.1911,
"eval_runtime": 1069.2344,
"eval_samples_per_second": 2.151,
"eval_steps_per_second": 0.269,
"step": 95000
},
{
"epoch": 0.95,
"learning_rate": 5e-05,
"loss": 2.2202,
"step": 97500
},
{
"epoch": 0.98,
"learning_rate": 5e-05,
"loss": 2.2307,
"step": 100000
},
{
"epoch": 0.98,
"eval_gen_len": 30.7513,
"eval_loss": 2.132361888885498,
"eval_rouge1": 0.2293,
"eval_rouge2": 0.0741,
"eval_rougeL": 0.1845,
"eval_rougeLsum": 0.1924,
"eval_runtime": 1089.9927,
"eval_samples_per_second": 2.11,
"eval_steps_per_second": 0.264,
"step": 100000
}
],
"max_steps": 102356,
"num_train_epochs": 1,
"total_flos": 1.8696291573252096e+17,
"trial_name": null,
"trial_params": null
}