|
{ |
|
"best_metric": 3.1917154788970947, |
|
"best_model_checkpoint": "/exports/eddie/scratch/s1970716/models/summarization/longt5_xl_summ_screen_20/checkpoint-28", |
|
"epoch": 6.956521739130435, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.001, |
|
"loss": 0.411, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.4642, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.001, |
|
"loss": 0.4965, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.7455, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.001, |
|
"loss": 0.4501, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.001, |
|
"loss": 0.3804, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.4063, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_gen_len": 71.90828402366864, |
|
"eval_loss": 3.7384819984436035, |
|
"eval_rouge1": 27.9171, |
|
"eval_rouge2": 6.7215, |
|
"eval_rougeL": 17.9315, |
|
"eval_rougeLsum": 24.363, |
|
"eval_runtime": 823.0541, |
|
"eval_samples_per_second": 0.411, |
|
"eval_steps_per_second": 0.205, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.001, |
|
"loss": 0.3201, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.3253, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.001, |
|
"loss": 0.3215, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.001, |
|
"loss": 0.3175, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.001, |
|
"loss": 0.3331, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.001, |
|
"loss": 0.2811, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.3125, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_gen_len": 96.20414201183432, |
|
"eval_loss": 3.1917154788970947, |
|
"eval_rouge1": 28.1708, |
|
"eval_rouge2": 6.6895, |
|
"eval_rougeL": 18.1637, |
|
"eval_rougeLsum": 24.3987, |
|
"eval_runtime": 1069.4844, |
|
"eval_samples_per_second": 0.316, |
|
"eval_steps_per_second": 0.158, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.001, |
|
"loss": 0.2621, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.2194, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.001, |
|
"loss": 0.2386, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.001, |
|
"loss": 0.2264, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.2002, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.001, |
|
"loss": 0.2477, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.2177, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_gen_len": 198.0473372781065, |
|
"eval_loss": 3.9997544288635254, |
|
"eval_rouge1": 29.3167, |
|
"eval_rouge2": 5.9, |
|
"eval_rougeL": 17.3608, |
|
"eval_rougeLsum": 25.6945, |
|
"eval_runtime": 1900.1301, |
|
"eval_samples_per_second": 0.178, |
|
"eval_steps_per_second": 0.089, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.001, |
|
"loss": 0.2069, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.164, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.001, |
|
"loss": 0.1679, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.001, |
|
"loss": 0.1736, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.001, |
|
"loss": 0.1688, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.001, |
|
"loss": 0.1749, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.001, |
|
"loss": 0.1753, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_gen_len": 158.6508875739645, |
|
"eval_loss": 4.228714466094971, |
|
"eval_rouge1": 29.0605, |
|
"eval_rouge2": 6.2534, |
|
"eval_rougeL": 17.5744, |
|
"eval_rougeLsum": 25.6415, |
|
"eval_runtime": 1492.9623, |
|
"eval_samples_per_second": 0.226, |
|
"eval_steps_per_second": 0.113, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.001, |
|
"loss": 0.1656, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.001, |
|
"loss": 0.1144, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.001, |
|
"loss": 0.161, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.001, |
|
"loss": 0.2169, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.1943, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.001, |
|
"loss": 0.1777, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.2747, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_gen_len": 118.44378698224853, |
|
"eval_loss": 4.102721214294434, |
|
"eval_rouge1": 31.2245, |
|
"eval_rouge2": 6.5663, |
|
"eval_rougeL": 18.1588, |
|
"eval_rougeLsum": 26.8996, |
|
"eval_runtime": 1188.6007, |
|
"eval_samples_per_second": 0.284, |
|
"eval_steps_per_second": 0.142, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.001, |
|
"loss": 0.1399, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.0986, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.001, |
|
"loss": 0.1051, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.1288, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.001, |
|
"loss": 0.1097, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.001, |
|
"loss": 0.1163, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.1205, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.001, |
|
"loss": 0.1045, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"eval_gen_len": 92.98816568047337, |
|
"eval_loss": 5.058135986328125, |
|
"eval_rouge1": 30.6056, |
|
"eval_rouge2": 6.8892, |
|
"eval_rougeL": 18.4933, |
|
"eval_rougeLsum": 26.4027, |
|
"eval_runtime": 984.3965, |
|
"eval_samples_per_second": 0.343, |
|
"eval_steps_per_second": 0.172, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.001, |
|
"loss": 0.0767, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.001, |
|
"loss": 0.0678, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.001, |
|
"loss": 0.0759, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.0714, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.001, |
|
"loss": 0.0822, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.0843, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.001, |
|
"loss": 0.0875, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_gen_len": 160.89644970414201, |
|
"eval_loss": 4.59414529800415, |
|
"eval_rouge1": 32.5234, |
|
"eval_rouge2": 7.3736, |
|
"eval_rougeL": 18.8958, |
|
"eval_rougeLsum": 28.4738, |
|
"eval_runtime": 1504.7392, |
|
"eval_samples_per_second": 0.225, |
|
"eval_steps_per_second": 0.112, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 140, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.7591398064550052e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|