german-jeopardy-longt5-base-256 / trainer_state.json
Marvin
Initial commit
36f2b9a unverified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.776824034334766,
"eval_steps": 500,
"global_step": 720,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.99,
"learning_rate": 0.0001,
"loss": 3.6024,
"step": 36
},
{
"epoch": 0.99,
"eval_bleu": 4.4454,
"eval_bp": 0.6832,
"eval_counts_1": 5645,
"eval_counts_2": 1343,
"eval_counts_3": 424,
"eval_counts_4": 109,
"eval_exact_match": 0.0005,
"eval_f1": 0.2236,
"eval_gen_len": 11.6338,
"eval_loss": 2.468198776245117,
"eval_precisions_1": 36.6844,
"eval_precisions_2": 10.1866,
"eval_precisions_3": 3.8616,
"eval_precisions_4": 1.242,
"eval_ref_len": 21250,
"eval_rouge1": 0.2285,
"eval_rouge2": 0.0824,
"eval_rougeL": 0.2192,
"eval_rougeLsum": 0.2188,
"eval_runtime": 813.9917,
"eval_samples_per_second": 2.708,
"eval_steps_per_second": 0.677,
"eval_sys_len": 15388,
"eval_totals_1": 15388,
"eval_totals_2": 13184,
"eval_totals_3": 10980,
"eval_totals_4": 8776,
"step": 36
},
{
"epoch": 1.98,
"learning_rate": 0.0001,
"loss": 2.9671,
"step": 72
},
{
"epoch": 1.98,
"eval_bleu": 5.7163,
"eval_bp": 0.7259,
"eval_counts_1": 5988,
"eval_counts_2": 1562,
"eval_counts_3": 569,
"eval_counts_4": 179,
"eval_exact_match": 0.0018,
"eval_f1": 0.2401,
"eval_gen_len": 12.314,
"eval_loss": 2.244511842727661,
"eval_precisions_1": 37.2064,
"eval_precisions_2": 11.2455,
"eval_precisions_3": 4.8691,
"eval_precisions_4": 1.8878,
"eval_ref_len": 21250,
"eval_rouge1": 0.2465,
"eval_rouge2": 0.0971,
"eval_rougeL": 0.2371,
"eval_rougeLsum": 0.2371,
"eval_runtime": 802.4783,
"eval_samples_per_second": 2.746,
"eval_steps_per_second": 0.687,
"eval_sys_len": 16094,
"eval_totals_1": 16094,
"eval_totals_2": 13890,
"eval_totals_3": 11686,
"eval_totals_4": 9482,
"step": 72
},
{
"epoch": 2.99,
"learning_rate": 0.0001,
"loss": 2.6324,
"step": 109
},
{
"epoch": 2.99,
"eval_bleu": 6.9028,
"eval_bp": 0.7887,
"eval_counts_1": 6539,
"eval_counts_2": 1846,
"eval_counts_3": 702,
"eval_counts_4": 240,
"eval_exact_match": 0.0027,
"eval_f1": 0.2663,
"eval_gen_len": 13.2319,
"eval_loss": 2.122749090194702,
"eval_precisions_1": 38.0772,
"eval_precisions_2": 12.3322,
"eval_precisions_3": 5.4994,
"eval_precisions_4": 2.2725,
"eval_ref_len": 21250,
"eval_rouge1": 0.2729,
"eval_rouge2": 0.1154,
"eval_rougeL": 0.2601,
"eval_rougeLsum": 0.2604,
"eval_runtime": 822.9261,
"eval_samples_per_second": 2.678,
"eval_steps_per_second": 0.67,
"eval_sys_len": 17173,
"eval_totals_1": 17173,
"eval_totals_2": 14969,
"eval_totals_3": 12765,
"eval_totals_4": 10561,
"step": 109
},
{
"epoch": 3.98,
"learning_rate": 0.0001,
"loss": 2.5557,
"step": 145
},
{
"epoch": 3.98,
"eval_bleu": 7.3331,
"eval_bp": 0.7179,
"eval_counts_1": 6491,
"eval_counts_2": 1923,
"eval_counts_3": 752,
"eval_counts_4": 275,
"eval_exact_match": 0.0059,
"eval_f1": 0.2729,
"eval_gen_len": 12.0962,
"eval_loss": 2.035691022872925,
"eval_precisions_1": 40.6679,
"eval_precisions_2": 13.9783,
"eval_precisions_3": 6.5091,
"eval_precisions_4": 2.9415,
"eval_ref_len": 21250,
"eval_rouge1": 0.2783,
"eval_rouge2": 0.1214,
"eval_rougeL": 0.2676,
"eval_rougeLsum": 0.2678,
"eval_runtime": 786.7967,
"eval_samples_per_second": 2.801,
"eval_steps_per_second": 0.7,
"eval_sys_len": 15961,
"eval_totals_1": 15961,
"eval_totals_2": 13757,
"eval_totals_3": 11553,
"eval_totals_4": 9349,
"step": 145
},
{
"epoch": 5.0,
"learning_rate": 0.0001,
"loss": 2.3785,
"step": 182
},
{
"epoch": 5.0,
"eval_bleu": 8.2007,
"eval_bp": 0.7463,
"eval_counts_1": 6808,
"eval_counts_2": 2113,
"eval_counts_3": 855,
"eval_counts_4": 328,
"eval_exact_match": 0.0064,
"eval_f1": 0.2892,
"eval_gen_len": 12.6819,
"eval_loss": 1.9824347496032715,
"eval_precisions_1": 41.4137,
"eval_precisions_2": 14.8437,
"eval_precisions_3": 7.1066,
"eval_precisions_4": 3.3377,
"eval_ref_len": 21250,
"eval_rouge1": 0.2948,
"eval_rouge2": 0.1326,
"eval_rougeL": 0.2825,
"eval_rougeLsum": 0.2825,
"eval_runtime": 806.3535,
"eval_samples_per_second": 2.733,
"eval_steps_per_second": 0.683,
"eval_sys_len": 16439,
"eval_totals_1": 16439,
"eval_totals_2": 14235,
"eval_totals_3": 12031,
"eval_totals_4": 9827,
"step": 182
},
{
"epoch": 5.99,
"learning_rate": 0.0001,
"loss": 2.3396,
"step": 218
},
{
"epoch": 5.99,
"eval_bleu": 8.639,
"eval_bp": 0.7702,
"eval_counts_1": 7033,
"eval_counts_2": 2194,
"eval_counts_3": 886,
"eval_counts_4": 364,
"eval_exact_match": 0.0086,
"eval_f1": 0.3,
"eval_gen_len": 13.0254,
"eval_loss": 1.9448895454406738,
"eval_precisions_1": 41.7364,
"eval_precisions_2": 14.9792,
"eval_precisions_3": 7.1205,
"eval_precisions_4": 3.555,
"eval_ref_len": 21250,
"eval_rouge1": 0.3044,
"eval_rouge2": 0.1373,
"eval_rougeL": 0.292,
"eval_rougeLsum": 0.2922,
"eval_runtime": 473.2306,
"eval_samples_per_second": 4.657,
"eval_steps_per_second": 1.164,
"eval_sys_len": 16851,
"eval_totals_1": 16851,
"eval_totals_2": 14647,
"eval_totals_3": 12443,
"eval_totals_4": 10239,
"step": 218
},
{
"epoch": 6.98,
"learning_rate": 0.0001,
"loss": 2.2557,
"step": 254
},
{
"epoch": 6.98,
"eval_bleu": 9.049,
"eval_bp": 0.7515,
"eval_counts_1": 7167,
"eval_counts_2": 2285,
"eval_counts_3": 939,
"eval_counts_4": 389,
"eval_exact_match": 0.0095,
"eval_f1": 0.3119,
"eval_gen_len": 12.7119,
"eval_loss": 1.8937886953353882,
"eval_precisions_1": 43.3602,
"eval_precisions_2": 15.9511,
"eval_precisions_3": 7.7469,
"eval_precisions_4": 3.9226,
"eval_ref_len": 21250,
"eval_rouge1": 0.3166,
"eval_rouge2": 0.1428,
"eval_rougeL": 0.3043,
"eval_rougeLsum": 0.3046,
"eval_runtime": 453.3958,
"eval_samples_per_second": 4.861,
"eval_steps_per_second": 1.215,
"eval_sys_len": 16529,
"eval_totals_1": 16529,
"eval_totals_2": 14325,
"eval_totals_3": 12121,
"eval_totals_4": 9917,
"step": 254
},
{
"epoch": 7.99,
"learning_rate": 0.0001,
"loss": 2.1168,
"step": 291
},
{
"epoch": 7.99,
"eval_bleu": 9.6447,
"eval_bp": 0.7708,
"eval_counts_1": 7347,
"eval_counts_2": 2425,
"eval_counts_3": 1021,
"eval_counts_4": 425,
"eval_exact_match": 0.0104,
"eval_f1": 0.3211,
"eval_gen_len": 12.9374,
"eval_loss": 1.857459306716919,
"eval_precisions_1": 43.5765,
"eval_precisions_2": 16.5461,
"eval_precisions_3": 8.1995,
"eval_precisions_4": 4.1472,
"eval_ref_len": 21250,
"eval_rouge1": 0.3258,
"eval_rouge2": 0.1505,
"eval_rougeL": 0.3137,
"eval_rougeLsum": 0.3142,
"eval_runtime": 457.8255,
"eval_samples_per_second": 4.814,
"eval_steps_per_second": 1.204,
"eval_sys_len": 16860,
"eval_totals_1": 16860,
"eval_totals_2": 14656,
"eval_totals_3": 12452,
"eval_totals_4": 10248,
"step": 291
},
{
"epoch": 8.98,
"learning_rate": 0.0001,
"loss": 2.1105,
"step": 327
},
{
"epoch": 8.98,
"eval_bleu": 9.9436,
"eval_bp": 0.7807,
"eval_counts_1": 7460,
"eval_counts_2": 2461,
"eval_counts_3": 1061,
"eval_counts_4": 449,
"eval_exact_match": 0.0095,
"eval_f1": 0.3267,
"eval_gen_len": 13.1828,
"eval_loss": 1.8283559083938599,
"eval_precisions_1": 43.7948,
"eval_precisions_2": 16.5947,
"eval_precisions_3": 8.4033,
"eval_precisions_4": 4.3082,
"eval_ref_len": 21250,
"eval_rouge1": 0.3317,
"eval_rouge2": 0.1521,
"eval_rougeL": 0.3187,
"eval_rougeLsum": 0.3191,
"eval_runtime": 464.6,
"eval_samples_per_second": 4.744,
"eval_steps_per_second": 1.186,
"eval_sys_len": 17034,
"eval_totals_1": 17034,
"eval_totals_2": 14830,
"eval_totals_3": 12626,
"eval_totals_4": 10422,
"step": 327
},
{
"epoch": 10.0,
"learning_rate": 0.0001,
"loss": 1.9913,
"step": 364
},
{
"epoch": 10.0,
"eval_bleu": 10.3601,
"eval_bp": 0.7791,
"eval_counts_1": 7547,
"eval_counts_2": 2537,
"eval_counts_3": 1105,
"eval_counts_4": 487,
"eval_exact_match": 0.0113,
"eval_f1": 0.3316,
"eval_gen_len": 13.0358,
"eval_loss": 1.8056522607803345,
"eval_precisions_1": 44.3811,
"eval_precisions_2": 17.1407,
"eval_precisions_3": 8.7719,
"eval_precisions_4": 4.6858,
"eval_ref_len": 21250,
"eval_rouge1": 0.335,
"eval_rouge2": 0.1566,
"eval_rougeL": 0.323,
"eval_rougeLsum": 0.3233,
"eval_runtime": 492.674,
"eval_samples_per_second": 4.474,
"eval_steps_per_second": 1.118,
"eval_sys_len": 17005,
"eval_totals_1": 17005,
"eval_totals_2": 14801,
"eval_totals_3": 12597,
"eval_totals_4": 10393,
"step": 364
},
{
"epoch": 10.99,
"learning_rate": 0.0001,
"loss": 1.9943,
"step": 400
},
{
"epoch": 10.99,
"eval_bleu": 10.5378,
"eval_bp": 0.7697,
"eval_counts_1": 7629,
"eval_counts_2": 2574,
"eval_counts_3": 1131,
"eval_counts_4": 496,
"eval_exact_match": 0.0113,
"eval_f1": 0.3385,
"eval_gen_len": 13.0154,
"eval_loss": 1.7973003387451172,
"eval_precisions_1": 45.2975,
"eval_precisions_2": 17.5844,
"eval_precisions_3": 9.096,
"eval_precisions_4": 4.8485,
"eval_ref_len": 21250,
"eval_rouge1": 0.343,
"eval_rouge2": 0.1594,
"eval_rougeL": 0.3296,
"eval_rougeLsum": 0.33,
"eval_runtime": 454.7448,
"eval_samples_per_second": 4.847,
"eval_steps_per_second": 1.212,
"eval_sys_len": 16842,
"eval_totals_1": 16842,
"eval_totals_2": 14638,
"eval_totals_3": 12434,
"eval_totals_4": 10230,
"step": 400
},
{
"epoch": 11.98,
"learning_rate": 0.0001,
"loss": 1.941,
"step": 436
},
{
"epoch": 11.98,
"eval_bleu": 10.8273,
"eval_bp": 0.7848,
"eval_counts_1": 7681,
"eval_counts_2": 2606,
"eval_counts_3": 1164,
"eval_counts_4": 528,
"eval_exact_match": 0.0132,
"eval_f1": 0.3385,
"eval_gen_len": 13.1361,
"eval_loss": 1.777303695678711,
"eval_precisions_1": 44.905,
"eval_precisions_2": 17.4888,
"eval_precisions_3": 9.1675,
"eval_precisions_4": 5.0319,
"eval_ref_len": 21250,
"eval_rouge1": 0.3421,
"eval_rouge2": 0.1607,
"eval_rougeL": 0.3295,
"eval_rougeLsum": 0.3294,
"eval_runtime": 458.5033,
"eval_samples_per_second": 4.807,
"eval_steps_per_second": 1.202,
"eval_sys_len": 17105,
"eval_totals_1": 17105,
"eval_totals_2": 14901,
"eval_totals_3": 12697,
"eval_totals_4": 10493,
"step": 436
},
{
"epoch": 12.99,
"learning_rate": 0.0001,
"loss": 1.8453,
"step": 473
},
{
"epoch": 12.99,
"eval_bleu": 11.2687,
"eval_bp": 0.7972,
"eval_counts_1": 7817,
"eval_counts_2": 2700,
"eval_counts_3": 1224,
"eval_counts_4": 560,
"eval_exact_match": 0.0127,
"eval_f1": 0.3447,
"eval_gen_len": 13.5018,
"eval_loss": 1.7595148086547852,
"eval_precisions_1": 45.1224,
"eval_precisions_2": 17.8571,
"eval_precisions_3": 9.4766,
"eval_precisions_4": 5.2278,
"eval_ref_len": 21250,
"eval_rouge1": 0.3492,
"eval_rouge2": 0.1662,
"eval_rougeL": 0.3367,
"eval_rougeLsum": 0.3367,
"eval_runtime": 465.5444,
"eval_samples_per_second": 4.734,
"eval_steps_per_second": 1.184,
"eval_sys_len": 17324,
"eval_totals_1": 17324,
"eval_totals_2": 15120,
"eval_totals_3": 12916,
"eval_totals_4": 10712,
"step": 473
},
{
"epoch": 13.98,
"learning_rate": 0.0001,
"loss": 1.85,
"step": 509
},
{
"epoch": 13.98,
"eval_bleu": 10.9825,
"eval_bp": 0.8025,
"eval_counts_1": 7792,
"eval_counts_2": 2642,
"eval_counts_3": 1182,
"eval_counts_4": 537,
"eval_exact_match": 0.0127,
"eval_f1": 0.3416,
"eval_gen_len": 13.5395,
"eval_loss": 1.7414402961730957,
"eval_precisions_1": 44.7379,
"eval_precisions_2": 17.3667,
"eval_precisions_3": 9.086,
"eval_precisions_4": 4.9699,
"eval_ref_len": 21250,
"eval_rouge1": 0.3458,
"eval_rouge2": 0.1632,
"eval_rougeL": 0.3322,
"eval_rougeLsum": 0.3322,
"eval_runtime": 468.8552,
"eval_samples_per_second": 4.701,
"eval_steps_per_second": 1.175,
"eval_sys_len": 17417,
"eval_totals_1": 17417,
"eval_totals_2": 15213,
"eval_totals_3": 13009,
"eval_totals_4": 10805,
"step": 509
},
{
"epoch": 15.0,
"learning_rate": 0.0001,
"loss": 1.7588,
"step": 546
},
{
"epoch": 15.0,
"eval_bleu": 11.3189,
"eval_bp": 0.7939,
"eval_counts_1": 7827,
"eval_counts_2": 2702,
"eval_counts_3": 1223,
"eval_counts_4": 569,
"eval_exact_match": 0.015,
"eval_f1": 0.3446,
"eval_gen_len": 13.3026,
"eval_loss": 1.7346255779266357,
"eval_precisions_1": 45.3345,
"eval_precisions_2": 17.9404,
"eval_precisions_3": 9.5123,
"eval_precisions_4": 5.3412,
"eval_ref_len": 21250,
"eval_rouge1": 0.3487,
"eval_rouge2": 0.1661,
"eval_rougeL": 0.3355,
"eval_rougeLsum": 0.3354,
"eval_runtime": 464.8491,
"eval_samples_per_second": 4.741,
"eval_steps_per_second": 1.185,
"eval_sys_len": 17265,
"eval_totals_1": 17265,
"eval_totals_2": 15061,
"eval_totals_3": 12857,
"eval_totals_4": 10653,
"step": 546
},
{
"epoch": 15.99,
"learning_rate": 0.0001,
"loss": 1.7663,
"step": 582
},
{
"epoch": 15.99,
"eval_bleu": 11.5245,
"eval_bp": 0.8032,
"eval_counts_1": 7946,
"eval_counts_2": 2757,
"eval_counts_3": 1245,
"eval_counts_4": 581,
"eval_exact_match": 0.0154,
"eval_f1": 0.3501,
"eval_gen_len": 13.4515,
"eval_loss": 1.7190728187561035,
"eval_precisions_1": 45.5855,
"eval_precisions_2": 18.106,
"eval_precisions_3": 9.56,
"eval_precisions_4": 5.3702,
"eval_ref_len": 21250,
"eval_rouge1": 0.3544,
"eval_rouge2": 0.1695,
"eval_rougeL": 0.3418,
"eval_rougeLsum": 0.3416,
"eval_runtime": 465.8123,
"eval_samples_per_second": 4.732,
"eval_steps_per_second": 1.183,
"eval_sys_len": 17431,
"eval_totals_1": 17431,
"eval_totals_2": 15227,
"eval_totals_3": 13023,
"eval_totals_4": 10819,
"step": 582
},
{
"epoch": 16.98,
"learning_rate": 0.0001,
"loss": 1.7317,
"step": 618
},
{
"epoch": 16.98,
"eval_bleu": 12.0845,
"eval_bp": 0.8212,
"eval_counts_1": 8068,
"eval_counts_2": 2844,
"eval_counts_3": 1325,
"eval_counts_4": 633,
"eval_exact_match": 0.0163,
"eval_f1": 0.3527,
"eval_gen_len": 13.77,
"eval_loss": 1.7133468389511108,
"eval_precisions_1": 45.4484,
"eval_precisions_2": 18.2917,
"eval_precisions_3": 9.9296,
"eval_precisions_4": 5.6822,
"eval_ref_len": 21250,
"eval_rouge1": 0.3575,
"eval_rouge2": 0.1746,
"eval_rougeL": 0.3445,
"eval_rougeLsum": 0.3447,
"eval_runtime": 458.8154,
"eval_samples_per_second": 4.804,
"eval_steps_per_second": 1.201,
"eval_sys_len": 17752,
"eval_totals_1": 17752,
"eval_totals_2": 15548,
"eval_totals_3": 13344,
"eval_totals_4": 11140,
"step": 618
},
{
"epoch": 17.99,
"learning_rate": 0.0001,
"loss": 1.6421,
"step": 655
},
{
"epoch": 17.99,
"eval_bleu": 11.877,
"eval_bp": 0.8091,
"eval_counts_1": 8003,
"eval_counts_2": 2823,
"eval_counts_3": 1301,
"eval_counts_4": 609,
"eval_exact_match": 0.015,
"eval_f1": 0.353,
"eval_gen_len": 13.4669,
"eval_loss": 1.719835877418518,
"eval_precisions_1": 45.6401,
"eval_precisions_2": 18.4137,
"eval_precisions_3": 9.9109,
"eval_precisions_4": 5.5754,
"eval_ref_len": 21250,
"eval_rouge1": 0.3576,
"eval_rouge2": 0.1737,
"eval_rougeL": 0.3447,
"eval_rougeLsum": 0.3448,
"eval_runtime": 467.8501,
"eval_samples_per_second": 4.711,
"eval_steps_per_second": 1.178,
"eval_sys_len": 17535,
"eval_totals_1": 17535,
"eval_totals_2": 15331,
"eval_totals_3": 13127,
"eval_totals_4": 10923,
"step": 655
},
{
"epoch": 18.98,
"learning_rate": 0.0001,
"loss": 1.6543,
"step": 691
},
{
"epoch": 18.98,
"eval_bleu": 11.8679,
"eval_bp": 0.824,
"eval_counts_1": 8031,
"eval_counts_2": 2817,
"eval_counts_3": 1294,
"eval_counts_4": 612,
"eval_exact_match": 0.015,
"eval_f1": 0.351,
"eval_gen_len": 13.8648,
"eval_loss": 1.715085506439209,
"eval_precisions_1": 45.1104,
"eval_precisions_2": 18.0588,
"eval_precisions_3": 9.6603,
"eval_precisions_4": 5.4687,
"eval_ref_len": 21250,
"eval_rouge1": 0.3567,
"eval_rouge2": 0.1734,
"eval_rougeL": 0.3435,
"eval_rougeLsum": 0.3431,
"eval_runtime": 748.2265,
"eval_samples_per_second": 2.946,
"eval_steps_per_second": 0.736,
"eval_sys_len": 17803,
"eval_totals_1": 17803,
"eval_totals_2": 15599,
"eval_totals_3": 13395,
"eval_totals_4": 11191,
"step": 691
},
{
"epoch": 19.78,
"learning_rate": 0.0001,
"loss": 1.5702,
"step": 720
},
{
"epoch": 19.78,
"eval_bleu": 12.1229,
"eval_bp": 0.7945,
"eval_counts_1": 7996,
"eval_counts_2": 2850,
"eval_counts_3": 1330,
"eval_counts_4": 639,
"eval_exact_match": 0.0168,
"eval_f1": 0.3569,
"eval_gen_len": 13.3367,
"eval_loss": 1.7079344987869263,
"eval_precisions_1": 46.2865,
"eval_precisions_2": 18.9105,
"eval_precisions_3": 10.3365,
"eval_precisions_4": 5.9927,
"eval_ref_len": 21250,
"eval_rouge1": 0.3618,
"eval_rouge2": 0.1769,
"eval_rougeL": 0.3485,
"eval_rougeLsum": 0.348,
"eval_runtime": 880.8231,
"eval_samples_per_second": 2.502,
"eval_steps_per_second": 0.626,
"eval_sys_len": 17275,
"eval_totals_1": 17275,
"eval_totals_2": 15071,
"eval_totals_3": 12867,
"eval_totals_4": 10663,
"step": 720
},
{
"epoch": 19.78,
"step": 720,
"total_flos": 2.52283256045568e+17,
"train_loss": 2.1398978657192655,
"train_runtime": 23260.8504,
"train_samples_per_second": 8.008,
"train_steps_per_second": 0.031
}
],
"logging_steps": 500,
"max_steps": 720,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 2.52283256045568e+17,
"trial_name": null,
"trial_params": null
}