german-jeopardy-longt5-large-128 / trainer_state.json
Marvin
Initial commit
f35cffd unverified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.789564097058193,
"eval_steps": 500,
"global_step": 1440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.99,
"learning_rate": 0.0001,
"loss": 7.5882,
"step": 72
},
{
"epoch": 0.99,
"eval_bleu": 0.0872,
"eval_bp": 0.6461,
"eval_counts_1": 3993,
"eval_counts_2": 105,
"eval_counts_3": 0,
"eval_counts_4": 0,
"eval_exact_match": 0.0,
"eval_f1": 0.1155,
"eval_gen_len": 9.7105,
"eval_loss": 5.682333469390869,
"eval_precisions_1": 26.998,
"eval_precisions_2": 0.8343,
"eval_precisions_3": 0.0048,
"eval_precisions_4": 0.0031,
"eval_ref_len": 21250,
"eval_rouge1": 0.1101,
"eval_rouge2": 0.0077,
"eval_rougeL": 0.1078,
"eval_rougeLsum": 0.1076,
"eval_runtime": 1951.3051,
"eval_samples_per_second": 1.13,
"eval_steps_per_second": 0.565,
"eval_sys_len": 14790,
"eval_totals_1": 14790,
"eval_totals_2": 12586,
"eval_totals_3": 10382,
"eval_totals_4": 8178,
"step": 72
},
{
"epoch": 1.99,
"learning_rate": 0.0001,
"loss": 5.2903,
"step": 145
},
{
"epoch": 1.99,
"eval_bleu": 0.351,
"eval_bp": 0.8828,
"eval_counts_1": 3827,
"eval_counts_2": 229,
"eval_counts_3": 32,
"eval_counts_4": 0,
"eval_exact_match": 0.0,
"eval_f1": 0.0964,
"eval_gen_len": 16.7005,
"eval_loss": 4.872079372406006,
"eval_precisions_1": 20.2551,
"eval_precisions_2": 1.3721,
"eval_precisions_3": 0.2209,
"eval_precisions_4": 0.0041,
"eval_ref_len": 21250,
"eval_rouge1": 0.0924,
"eval_rouge2": 0.015,
"eval_rougeL": 0.091,
"eval_rougeLsum": 0.0909,
"eval_runtime": 3438.1674,
"eval_samples_per_second": 0.641,
"eval_steps_per_second": 0.321,
"eval_sys_len": 18894,
"eval_totals_1": 18894,
"eval_totals_2": 16690,
"eval_totals_3": 14486,
"eval_totals_4": 12282,
"step": 145
},
{
"epoch": 3.0,
"learning_rate": 0.0001,
"loss": 4.6636,
"step": 218
},
{
"epoch": 3.0,
"eval_bleu": 0.2933,
"eval_bp": 0.6758,
"eval_counts_1": 3638,
"eval_counts_2": 174,
"eval_counts_3": 21,
"eval_counts_4": 0,
"eval_exact_match": 0.0,
"eval_f1": 0.0925,
"eval_gen_len": 8.9197,
"eval_loss": 4.280586242675781,
"eval_precisions_1": 23.8276,
"eval_precisions_2": 1.3319,
"eval_precisions_3": 0.1934,
"eval_precisions_4": 0.0058,
"eval_ref_len": 21250,
"eval_rouge1": 0.0884,
"eval_rouge2": 0.012,
"eval_rougeL": 0.0876,
"eval_rougeLsum": 0.0874,
"eval_runtime": 2326.5895,
"eval_samples_per_second": 0.947,
"eval_steps_per_second": 0.474,
"eval_sys_len": 15268,
"eval_totals_1": 15268,
"eval_totals_2": 13064,
"eval_totals_3": 10860,
"eval_totals_4": 8656,
"step": 218
},
{
"epoch": 4.0,
"learning_rate": 0.0001,
"loss": 4.2229,
"step": 291
},
{
"epoch": 4.0,
"eval_bleu": 0.2288,
"eval_bp": 1.0,
"eval_counts_1": 4274,
"eval_counts_2": 240,
"eval_counts_3": 24,
"eval_counts_4": 0,
"eval_exact_match": 0.0,
"eval_f1": 0.1023,
"eval_gen_len": 24.7015,
"eval_loss": 3.9210410118103027,
"eval_precisions_1": 14.583,
"eval_precisions_2": 0.8855,
"eval_precisions_3": 0.0964,
"eval_precisions_4": 0.0022,
"eval_ref_len": 21250,
"eval_rouge1": 0.0894,
"eval_rouge2": 0.0109,
"eval_rougeL": 0.0849,
"eval_rougeLsum": 0.0849,
"eval_runtime": 2975.0462,
"eval_samples_per_second": 0.741,
"eval_steps_per_second": 0.37,
"eval_sys_len": 29308,
"eval_totals_1": 29308,
"eval_totals_2": 27104,
"eval_totals_3": 24900,
"eval_totals_4": 22696,
"step": 291
},
{
"epoch": 4.99,
"learning_rate": 0.0001,
"loss": 3.9434,
"step": 363
},
{
"epoch": 4.99,
"eval_bleu": 0.4204,
"eval_bp": 0.7465,
"eval_counts_1": 3652,
"eval_counts_2": 218,
"eval_counts_3": 35,
"eval_counts_4": 1,
"eval_exact_match": 0.0,
"eval_f1": 0.0898,
"eval_gen_len": 12.3049,
"eval_loss": 3.690653085708618,
"eval_precisions_1": 22.2114,
"eval_precisions_2": 1.5311,
"eval_precisions_3": 0.2908,
"eval_precisions_4": 0.0102,
"eval_ref_len": 21250,
"eval_rouge1": 0.0856,
"eval_rouge2": 0.0141,
"eval_rougeL": 0.0843,
"eval_rougeLsum": 0.0842,
"eval_runtime": 3036.8902,
"eval_samples_per_second": 0.726,
"eval_steps_per_second": 0.363,
"eval_sys_len": 16442,
"eval_totals_1": 16442,
"eval_totals_2": 14238,
"eval_totals_3": 12034,
"eval_totals_4": 9830,
"step": 363
},
{
"epoch": 5.99,
"learning_rate": 0.0001,
"loss": 3.6152,
"step": 436
},
{
"epoch": 5.99,
"eval_bleu": 1.0505,
"eval_bp": 0.968,
"eval_counts_1": 4103,
"eval_counts_2": 341,
"eval_counts_3": 77,
"eval_counts_4": 11,
"eval_exact_match": 0.0,
"eval_f1": 0.112,
"eval_gen_len": 14.3607,
"eval_loss": 3.460298538208008,
"eval_precisions_1": 19.9359,
"eval_precisions_2": 1.8556,
"eval_precisions_3": 0.4761,
"eval_precisions_4": 0.0787,
"eval_ref_len": 21250,
"eval_rouge1": 0.107,
"eval_rouge2": 0.019,
"eval_rougeL": 0.1023,
"eval_rougeLsum": 0.1024,
"eval_runtime": 3225.717,
"eval_samples_per_second": 0.683,
"eval_steps_per_second": 0.342,
"eval_sys_len": 20581,
"eval_totals_1": 20581,
"eval_totals_2": 18377,
"eval_totals_3": 16173,
"eval_totals_4": 13969,
"step": 436
},
{
"epoch": 7.0,
"learning_rate": 0.0001,
"loss": 3.3814,
"step": 509
},
{
"epoch": 7.0,
"eval_bleu": 2.3489,
"eval_bp": 0.8218,
"eval_counts_1": 4342,
"eval_counts_2": 675,
"eval_counts_3": 218,
"eval_counts_4": 43,
"eval_exact_match": 0.0005,
"eval_f1": 0.1308,
"eval_gen_len": 10.2418,
"eval_loss": 3.2883455753326416,
"eval_precisions_1": 24.4441,
"eval_precisions_2": 4.3383,
"eval_precisions_3": 1.6323,
"eval_precisions_4": 0.3856,
"eval_ref_len": 21250,
"eval_rouge1": 0.1264,
"eval_rouge2": 0.0353,
"eval_rougeL": 0.1234,
"eval_rougeLsum": 0.1234,
"eval_runtime": 2402.3288,
"eval_samples_per_second": 0.917,
"eval_steps_per_second": 0.459,
"eval_sys_len": 17763,
"eval_totals_1": 17763,
"eval_totals_2": 15559,
"eval_totals_3": 13355,
"eval_totals_4": 11151,
"step": 509
},
{
"epoch": 8.0,
"learning_rate": 0.0001,
"loss": 3.1711,
"step": 582
},
{
"epoch": 8.0,
"eval_bleu": 2.6207,
"eval_bp": 0.9273,
"eval_counts_1": 4820,
"eval_counts_2": 856,
"eval_counts_3": 246,
"eval_counts_4": 44,
"eval_exact_match": 0.0005,
"eval_f1": 0.1547,
"eval_gen_len": 14.3249,
"eval_loss": 3.0987935066223145,
"eval_precisions_1": 24.3939,
"eval_precisions_2": 4.8761,
"eval_precisions_3": 1.6025,
"eval_precisions_4": 0.3347,
"eval_ref_len": 21250,
"eval_rouge1": 0.1503,
"eval_rouge2": 0.0465,
"eval_rougeL": 0.1455,
"eval_rougeLsum": 0.1457,
"eval_runtime": 2969.3248,
"eval_samples_per_second": 0.742,
"eval_steps_per_second": 0.371,
"eval_sys_len": 19759,
"eval_totals_1": 19759,
"eval_totals_2": 17555,
"eval_totals_3": 15351,
"eval_totals_4": 13147,
"step": 582
},
{
"epoch": 8.99,
"learning_rate": 0.0001,
"loss": 3.0147,
"step": 654
},
{
"epoch": 8.99,
"eval_bleu": 3.4764,
"eval_bp": 0.8739,
"eval_counts_1": 5167,
"eval_counts_2": 1066,
"eval_counts_3": 321,
"eval_counts_4": 76,
"eval_exact_match": 0.0018,
"eval_f1": 0.1816,
"eval_gen_len": 14.3067,
"eval_loss": 2.9539589881896973,
"eval_precisions_1": 27.5941,
"eval_precisions_2": 6.4524,
"eval_precisions_3": 2.2421,
"eval_precisions_4": 0.6274,
"eval_ref_len": 21250,
"eval_rouge1": 0.1773,
"eval_rouge2": 0.0588,
"eval_rougeL": 0.1721,
"eval_rougeLsum": 0.1721,
"eval_runtime": 2825.1201,
"eval_samples_per_second": 0.78,
"eval_steps_per_second": 0.39,
"eval_sys_len": 18725,
"eval_totals_1": 18725,
"eval_totals_2": 16521,
"eval_totals_3": 14317,
"eval_totals_4": 12113,
"step": 654
},
{
"epoch": 9.99,
"learning_rate": 0.0001,
"loss": 2.7829,
"step": 727
},
{
"epoch": 9.99,
"eval_bleu": 4.5099,
"eval_bp": 0.7974,
"eval_counts_1": 5625,
"eval_counts_2": 1267,
"eval_counts_3": 420,
"eval_counts_4": 124,
"eval_exact_match": 0.0045,
"eval_f1": 0.2159,
"eval_gen_len": 12.9741,
"eval_loss": 2.8288302421569824,
"eval_precisions_1": 32.4638,
"eval_precisions_2": 8.378,
"eval_precisions_3": 3.251,
"eval_precisions_4": 1.1573,
"eval_ref_len": 21250,
"eval_rouge1": 0.2127,
"eval_rouge2": 0.0741,
"eval_rougeL": 0.2067,
"eval_rougeLsum": 0.2065,
"eval_runtime": 2709.6941,
"eval_samples_per_second": 0.813,
"eval_steps_per_second": 0.407,
"eval_sys_len": 17327,
"eval_totals_1": 17327,
"eval_totals_2": 15123,
"eval_totals_3": 12919,
"eval_totals_4": 10715,
"step": 727
},
{
"epoch": 10.99,
"learning_rate": 0.0001,
"loss": 2.6093,
"step": 800
},
{
"epoch": 10.99,
"eval_bleu": 5.5051,
"eval_bp": 0.8685,
"eval_counts_1": 6005,
"eval_counts_2": 1469,
"eval_counts_3": 528,
"eval_counts_4": 181,
"eval_exact_match": 0.0064,
"eval_f1": 0.231,
"eval_gen_len": 14.4791,
"eval_loss": 2.7177300453186035,
"eval_precisions_1": 32.2416,
"eval_precisions_2": 8.9459,
"eval_precisions_3": 3.7139,
"eval_precisions_4": 1.5067,
"eval_ref_len": 21250,
"eval_rouge1": 0.229,
"eval_rouge2": 0.0827,
"eval_rougeL": 0.2215,
"eval_rougeLsum": 0.2213,
"eval_runtime": 1457.0803,
"eval_samples_per_second": 1.513,
"eval_steps_per_second": 0.756,
"eval_sys_len": 18625,
"eval_totals_1": 18625,
"eval_totals_2": 16421,
"eval_totals_3": 14217,
"eval_totals_4": 12013,
"step": 800
},
{
"epoch": 12.0,
"learning_rate": 0.0001,
"loss": 2.453,
"step": 873
},
{
"epoch": 12.0,
"eval_bleu": 6.6865,
"eval_bp": 0.8515,
"eval_counts_1": 6396,
"eval_counts_2": 1744,
"eval_counts_3": 664,
"eval_counts_4": 246,
"eval_exact_match": 0.0059,
"eval_f1": 0.2565,
"eval_gen_len": 13.7142,
"eval_loss": 2.591360330581665,
"eval_precisions_1": 34.9375,
"eval_precisions_2": 10.8303,
"eval_precisions_3": 4.7773,
"eval_precisions_4": 2.1035,
"eval_ref_len": 21250,
"eval_rouge1": 0.2553,
"eval_rouge2": 0.0998,
"eval_rougeL": 0.2479,
"eval_rougeLsum": 0.2478,
"eval_runtime": 1377.6536,
"eval_samples_per_second": 1.6,
"eval_steps_per_second": 0.8,
"eval_sys_len": 18307,
"eval_totals_1": 18307,
"eval_totals_2": 16103,
"eval_totals_3": 13899,
"eval_totals_4": 11695,
"step": 873
},
{
"epoch": 12.99,
"learning_rate": 0.0001,
"loss": 2.3329,
"step": 945
},
{
"epoch": 12.99,
"eval_bleu": 7.383,
"eval_bp": 0.8592,
"eval_counts_1": 6673,
"eval_counts_2": 1888,
"eval_counts_3": 741,
"eval_counts_4": 291,
"eval_exact_match": 0.0091,
"eval_f1": 0.2749,
"eval_gen_len": 14.1751,
"eval_loss": 2.499257802963257,
"eval_precisions_1": 36.1661,
"eval_precisions_2": 11.6206,
"eval_precisions_3": 5.2767,
"eval_precisions_4": 2.458,
"eval_ref_len": 21250,
"eval_rouge1": 0.2747,
"eval_rouge2": 0.1114,
"eval_rougeL": 0.2652,
"eval_rougeLsum": 0.2652,
"eval_runtime": 1427.0765,
"eval_samples_per_second": 1.544,
"eval_steps_per_second": 0.772,
"eval_sys_len": 18451,
"eval_totals_1": 18451,
"eval_totals_2": 16247,
"eval_totals_3": 14043,
"eval_totals_4": 11839,
"step": 945
},
{
"epoch": 13.99,
"learning_rate": 0.0001,
"loss": 2.1663,
"step": 1018
},
{
"epoch": 13.99,
"eval_bleu": 8.1343,
"eval_bp": 0.8635,
"eval_counts_1": 6953,
"eval_counts_2": 2052,
"eval_counts_3": 834,
"eval_counts_4": 337,
"eval_exact_match": 0.0082,
"eval_f1": 0.2889,
"eval_gen_len": 14.6783,
"eval_loss": 2.4196276664733887,
"eval_precisions_1": 37.5209,
"eval_precisions_2": 12.5681,
"eval_precisions_3": 5.9053,
"eval_precisions_4": 2.8274,
"eval_ref_len": 21250,
"eval_rouge1": 0.2886,
"eval_rouge2": 0.1215,
"eval_rougeL": 0.2773,
"eval_rougeLsum": 0.277,
"eval_runtime": 1443.0194,
"eval_samples_per_second": 1.527,
"eval_steps_per_second": 0.764,
"eval_sys_len": 18531,
"eval_totals_1": 18531,
"eval_totals_2": 16327,
"eval_totals_3": 14123,
"eval_totals_4": 11919,
"step": 1018
},
{
"epoch": 14.99,
"learning_rate": 0.0001,
"loss": 2.0422,
"step": 1091
},
{
"epoch": 14.99,
"eval_bleu": 8.4322,
"eval_bp": 0.8339,
"eval_counts_1": 6968,
"eval_counts_2": 2089,
"eval_counts_3": 862,
"eval_counts_4": 365,
"eval_exact_match": 0.0113,
"eval_f1": 0.2951,
"eval_gen_len": 13.6987,
"eval_loss": 2.3703055381774902,
"eval_precisions_1": 38.7456,
"eval_precisions_2": 13.2383,
"eval_precisions_3": 6.3494,
"eval_precisions_4": 3.2096,
"eval_ref_len": 21250,
"eval_rouge1": 0.2961,
"eval_rouge2": 0.1268,
"eval_rougeL": 0.2858,
"eval_rougeLsum": 0.2857,
"eval_runtime": 1381.8523,
"eval_samples_per_second": 1.595,
"eval_steps_per_second": 0.797,
"eval_sys_len": 17984,
"eval_totals_1": 17984,
"eval_totals_2": 15780,
"eval_totals_3": 13576,
"eval_totals_4": 11372,
"step": 1091
},
{
"epoch": 16.0,
"learning_rate": 0.0001,
"loss": 1.9245,
"step": 1164
},
{
"epoch": 16.0,
"eval_bleu": 9.5973,
"eval_bp": 0.8892,
"eval_counts_1": 7500,
"eval_counts_2": 2353,
"eval_counts_3": 999,
"eval_counts_4": 446,
"eval_exact_match": 0.0132,
"eval_f1": 0.314,
"eval_gen_len": 14.77,
"eval_loss": 2.3217406272888184,
"eval_precisions_1": 39.4384,
"eval_precisions_2": 13.9951,
"eval_precisions_3": 6.8383,
"eval_precisions_4": 3.5953,
"eval_ref_len": 21250,
"eval_rouge1": 0.3149,
"eval_rouge2": 0.1407,
"eval_rougeL": 0.3017,
"eval_rougeLsum": 0.3017,
"eval_runtime": 1430.5793,
"eval_samples_per_second": 1.541,
"eval_steps_per_second": 0.77,
"eval_sys_len": 19017,
"eval_totals_1": 19017,
"eval_totals_2": 16813,
"eval_totals_3": 14609,
"eval_totals_4": 12405,
"step": 1164
},
{
"epoch": 17.0,
"learning_rate": 0.0001,
"loss": 1.8216,
"step": 1237
},
{
"epoch": 17.0,
"eval_bleu": 9.9557,
"eval_bp": 0.8467,
"eval_counts_1": 7444,
"eval_counts_2": 2357,
"eval_counts_3": 1044,
"eval_counts_4": 488,
"eval_exact_match": 0.0132,
"eval_f1": 0.3181,
"eval_gen_len": 13.8031,
"eval_loss": 2.27047061920166,
"eval_precisions_1": 40.8584,
"eval_precisions_2": 14.7175,
"eval_precisions_3": 7.5592,
"eval_precisions_4": 4.2044,
"eval_ref_len": 21250,
"eval_rouge1": 0.3201,
"eval_rouge2": 0.1437,
"eval_rougeL": 0.3081,
"eval_rougeLsum": 0.3077,
"eval_runtime": 1357.6078,
"eval_samples_per_second": 1.623,
"eval_steps_per_second": 0.812,
"eval_sys_len": 18219,
"eval_totals_1": 18219,
"eval_totals_2": 16015,
"eval_totals_3": 13811,
"eval_totals_4": 11607,
"step": 1237
},
{
"epoch": 17.99,
"learning_rate": 0.0001,
"loss": 1.7503,
"step": 1309
},
{
"epoch": 17.99,
"eval_bleu": 10.4354,
"eval_bp": 0.8498,
"eval_counts_1": 7571,
"eval_counts_2": 2487,
"eval_counts_3": 1114,
"eval_counts_4": 515,
"eval_exact_match": 0.0145,
"eval_f1": 0.3265,
"eval_gen_len": 13.9106,
"eval_loss": 2.238603353500366,
"eval_precisions_1": 41.4282,
"eval_precisions_2": 15.4751,
"eval_precisions_3": 8.0335,
"eval_precisions_4": 4.4157,
"eval_ref_len": 21250,
"eval_rouge1": 0.3289,
"eval_rouge2": 0.1512,
"eval_rougeL": 0.3153,
"eval_rougeLsum": 0.3151,
"eval_runtime": 1353.0462,
"eval_samples_per_second": 1.629,
"eval_steps_per_second": 0.814,
"eval_sys_len": 18275,
"eval_totals_1": 18275,
"eval_totals_2": 16071,
"eval_totals_3": 13867,
"eval_totals_4": 11663,
"step": 1309
},
{
"epoch": 18.99,
"learning_rate": 0.0001,
"loss": 1.6342,
"step": 1382
},
{
"epoch": 18.99,
"eval_bleu": 10.7447,
"eval_bp": 0.8418,
"eval_counts_1": 7697,
"eval_counts_2": 2536,
"eval_counts_3": 1155,
"eval_counts_4": 537,
"eval_exact_match": 0.0177,
"eval_f1": 0.3313,
"eval_gen_len": 13.8494,
"eval_loss": 2.2182679176330566,
"eval_precisions_1": 42.4568,
"eval_precisions_2": 15.9246,
"eval_precisions_3": 8.4178,
"eval_precisions_4": 4.6627,
"eval_ref_len": 21250,
"eval_rouge1": 0.3342,
"eval_rouge2": 0.1559,
"eval_rougeL": 0.3224,
"eval_rougeLsum": 0.3222,
"eval_runtime": 1333.2607,
"eval_samples_per_second": 1.653,
"eval_steps_per_second": 0.827,
"eval_sys_len": 18129,
"eval_totals_1": 18129,
"eval_totals_2": 15925,
"eval_totals_3": 13721,
"eval_totals_4": 11517,
"step": 1382
},
{
"epoch": 19.79,
"learning_rate": 0.0001,
"loss": 1.5474,
"step": 1440
},
{
"epoch": 19.79,
"eval_bleu": 11.1066,
"eval_bp": 0.8786,
"eval_counts_1": 7879,
"eval_counts_2": 2632,
"eval_counts_3": 1187,
"eval_counts_4": 570,
"eval_exact_match": 0.0177,
"eval_f1": 0.3375,
"eval_gen_len": 14.5136,
"eval_loss": 2.1956045627593994,
"eval_precisions_1": 41.8762,
"eval_precisions_2": 15.8449,
"eval_precisions_3": 8.2391,
"eval_precisions_4": 4.671,
"eval_ref_len": 21250,
"eval_rouge1": 0.3398,
"eval_rouge2": 0.1607,
"eval_rougeL": 0.326,
"eval_rougeLsum": 0.326,
"eval_runtime": 1394.5803,
"eval_samples_per_second": 1.58,
"eval_steps_per_second": 0.79,
"eval_sys_len": 18815,
"eval_totals_1": 18815,
"eval_totals_2": 16611,
"eval_totals_3": 14407,
"eval_totals_4": 12203,
"step": 1440
},
{
"epoch": 19.79,
"step": 1440,
"total_flos": 8.496574887886848e+17,
"train_loss": 3.111723126305474,
"train_runtime": 93678.1212,
"train_samples_per_second": 1.989,
"train_steps_per_second": 0.015
}
],
"logging_steps": 500,
"max_steps": 1440,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 8.496574887886848e+17,
"trial_name": null,
"trial_params": null
}