longt5_xl_summ_screen_20 / trainer_state.json
learn3r's picture
Upload 14 files
594471a
{
"best_metric": 3.1917154788970947,
"best_model_checkpoint": "/exports/eddie/scratch/s1970716/models/summarization/longt5_xl_summ_screen_20/checkpoint-28",
"epoch": 6.956521739130435,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"learning_rate": 0.001,
"loss": 0.411,
"step": 2
},
{
"epoch": 0.28,
"learning_rate": 0.001,
"loss": 0.4642,
"step": 4
},
{
"epoch": 0.42,
"learning_rate": 0.001,
"loss": 0.4965,
"step": 6
},
{
"epoch": 0.56,
"learning_rate": 0.001,
"loss": 0.7455,
"step": 8
},
{
"epoch": 0.7,
"learning_rate": 0.001,
"loss": 0.4501,
"step": 10
},
{
"epoch": 0.83,
"learning_rate": 0.001,
"loss": 0.3804,
"step": 12
},
{
"epoch": 0.97,
"learning_rate": 0.001,
"loss": 0.4063,
"step": 14
},
{
"epoch": 0.97,
"eval_gen_len": 71.90828402366864,
"eval_loss": 3.7384819984436035,
"eval_rouge1": 27.9171,
"eval_rouge2": 6.7215,
"eval_rougeL": 17.9315,
"eval_rougeLsum": 24.363,
"eval_runtime": 823.0541,
"eval_samples_per_second": 0.411,
"eval_steps_per_second": 0.205,
"step": 14
},
{
"epoch": 1.11,
"learning_rate": 0.001,
"loss": 0.3201,
"step": 16
},
{
"epoch": 1.25,
"learning_rate": 0.001,
"loss": 0.3253,
"step": 18
},
{
"epoch": 1.39,
"learning_rate": 0.001,
"loss": 0.3215,
"step": 20
},
{
"epoch": 1.53,
"learning_rate": 0.001,
"loss": 0.3175,
"step": 22
},
{
"epoch": 1.67,
"learning_rate": 0.001,
"loss": 0.3331,
"step": 24
},
{
"epoch": 1.81,
"learning_rate": 0.001,
"loss": 0.2811,
"step": 26
},
{
"epoch": 1.95,
"learning_rate": 0.001,
"loss": 0.3125,
"step": 28
},
{
"epoch": 1.95,
"eval_gen_len": 96.20414201183432,
"eval_loss": 3.1917154788970947,
"eval_rouge1": 28.1708,
"eval_rouge2": 6.6895,
"eval_rougeL": 18.1637,
"eval_rougeLsum": 24.3987,
"eval_runtime": 1069.4844,
"eval_samples_per_second": 0.316,
"eval_steps_per_second": 0.158,
"step": 28
},
{
"epoch": 2.09,
"learning_rate": 0.001,
"loss": 0.2621,
"step": 30
},
{
"epoch": 2.23,
"learning_rate": 0.001,
"loss": 0.2194,
"step": 32
},
{
"epoch": 2.37,
"learning_rate": 0.001,
"loss": 0.2386,
"step": 34
},
{
"epoch": 2.5,
"learning_rate": 0.001,
"loss": 0.2264,
"step": 36
},
{
"epoch": 2.64,
"learning_rate": 0.001,
"loss": 0.2002,
"step": 38
},
{
"epoch": 2.78,
"learning_rate": 0.001,
"loss": 0.2477,
"step": 40
},
{
"epoch": 2.92,
"learning_rate": 0.001,
"loss": 0.2177,
"step": 42
},
{
"epoch": 2.99,
"eval_gen_len": 198.0473372781065,
"eval_loss": 3.9997544288635254,
"eval_rouge1": 29.3167,
"eval_rouge2": 5.9,
"eval_rougeL": 17.3608,
"eval_rougeLsum": 25.6945,
"eval_runtime": 1900.1301,
"eval_samples_per_second": 0.178,
"eval_steps_per_second": 0.089,
"step": 43
},
{
"epoch": 3.06,
"learning_rate": 0.001,
"loss": 0.2069,
"step": 44
},
{
"epoch": 3.2,
"learning_rate": 0.001,
"loss": 0.164,
"step": 46
},
{
"epoch": 3.34,
"learning_rate": 0.001,
"loss": 0.1679,
"step": 48
},
{
"epoch": 3.48,
"learning_rate": 0.001,
"loss": 0.1736,
"step": 50
},
{
"epoch": 3.62,
"learning_rate": 0.001,
"loss": 0.1688,
"step": 52
},
{
"epoch": 3.76,
"learning_rate": 0.001,
"loss": 0.1749,
"step": 54
},
{
"epoch": 3.9,
"learning_rate": 0.001,
"loss": 0.1753,
"step": 56
},
{
"epoch": 3.97,
"eval_gen_len": 158.6508875739645,
"eval_loss": 4.228714466094971,
"eval_rouge1": 29.0605,
"eval_rouge2": 6.2534,
"eval_rougeL": 17.5744,
"eval_rougeLsum": 25.6415,
"eval_runtime": 1492.9623,
"eval_samples_per_second": 0.226,
"eval_steps_per_second": 0.113,
"step": 57
},
{
"epoch": 4.03,
"learning_rate": 0.001,
"loss": 0.1656,
"step": 58
},
{
"epoch": 4.17,
"learning_rate": 0.001,
"loss": 0.1144,
"step": 60
},
{
"epoch": 4.31,
"learning_rate": 0.001,
"loss": 0.161,
"step": 62
},
{
"epoch": 4.45,
"learning_rate": 0.001,
"loss": 0.2169,
"step": 64
},
{
"epoch": 4.59,
"learning_rate": 0.001,
"loss": 0.1943,
"step": 66
},
{
"epoch": 4.73,
"learning_rate": 0.001,
"loss": 0.1777,
"step": 68
},
{
"epoch": 4.87,
"learning_rate": 0.001,
"loss": 0.2747,
"step": 70
},
{
"epoch": 4.94,
"eval_gen_len": 118.44378698224853,
"eval_loss": 4.102721214294434,
"eval_rouge1": 31.2245,
"eval_rouge2": 6.5663,
"eval_rougeL": 18.1588,
"eval_rougeLsum": 26.8996,
"eval_runtime": 1188.6007,
"eval_samples_per_second": 0.284,
"eval_steps_per_second": 0.142,
"step": 71
},
{
"epoch": 5.01,
"learning_rate": 0.001,
"loss": 0.1399,
"step": 72
},
{
"epoch": 5.15,
"learning_rate": 0.001,
"loss": 0.0986,
"step": 74
},
{
"epoch": 5.29,
"learning_rate": 0.001,
"loss": 0.1051,
"step": 76
},
{
"epoch": 5.43,
"learning_rate": 0.001,
"loss": 0.1288,
"step": 78
},
{
"epoch": 5.57,
"learning_rate": 0.001,
"loss": 0.1097,
"step": 80
},
{
"epoch": 5.7,
"learning_rate": 0.001,
"loss": 0.1163,
"step": 82
},
{
"epoch": 5.84,
"learning_rate": 0.001,
"loss": 0.1205,
"step": 84
},
{
"epoch": 5.98,
"learning_rate": 0.001,
"loss": 0.1045,
"step": 86
},
{
"epoch": 5.98,
"eval_gen_len": 92.98816568047337,
"eval_loss": 5.058135986328125,
"eval_rouge1": 30.6056,
"eval_rouge2": 6.8892,
"eval_rougeL": 18.4933,
"eval_rougeLsum": 26.4027,
"eval_runtime": 984.3965,
"eval_samples_per_second": 0.343,
"eval_steps_per_second": 0.172,
"step": 86
},
{
"epoch": 6.12,
"learning_rate": 0.001,
"loss": 0.0767,
"step": 88
},
{
"epoch": 6.26,
"learning_rate": 0.001,
"loss": 0.0678,
"step": 90
},
{
"epoch": 6.4,
"learning_rate": 0.001,
"loss": 0.0759,
"step": 92
},
{
"epoch": 6.54,
"learning_rate": 0.001,
"loss": 0.0714,
"step": 94
},
{
"epoch": 6.68,
"learning_rate": 0.001,
"loss": 0.0822,
"step": 96
},
{
"epoch": 6.82,
"learning_rate": 0.001,
"loss": 0.0843,
"step": 98
},
{
"epoch": 6.96,
"learning_rate": 0.001,
"loss": 0.0875,
"step": 100
},
{
"epoch": 6.96,
"eval_gen_len": 160.89644970414201,
"eval_loss": 4.59414529800415,
"eval_rouge1": 32.5234,
"eval_rouge2": 7.3736,
"eval_rougeL": 18.8958,
"eval_rougeLsum": 28.4738,
"eval_runtime": 1504.7392,
"eval_samples_per_second": 0.225,
"eval_steps_per_second": 0.112,
"step": 100
}
],
"logging_steps": 2,
"max_steps": 140,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 1.7591398064550052e+18,
"trial_name": null,
"trial_params": null
}