german-jeopardy-longt5-large / trainer_state.json
Marvin
Initial commit
dae8b58 unverified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.926991625509984,
"eval_steps": 500,
"global_step": 2900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 0.0001,
"loss": 6.5987,
"step": 145
},
{
"epoch": 1.0,
"eval_bleu": 0.1374,
"eval_bp": 1.0,
"eval_counts_1": 3804,
"eval_counts_2": 134,
"eval_counts_3": 2,
"eval_counts_4": 0,
"eval_exact_match": 0.0,
"eval_f1": 0.0814,
"eval_gen_len": 16.2899,
"eval_loss": 5.069606304168701,
"eval_precisions_1": 16.6019,
"eval_precisions_2": 0.6471,
"eval_precisions_3": 0.0108,
"eval_precisions_4": 0.0031,
"eval_ref_len": 21250,
"eval_rouge1": 0.0783,
"eval_rouge2": 0.007,
"eval_rougeL": 0.0769,
"eval_rougeLsum": 0.0768,
"eval_runtime": 2008.1612,
"eval_samples_per_second": 1.098,
"eval_steps_per_second": 0.549,
"eval_sys_len": 22913,
"eval_totals_1": 22913,
"eval_totals_2": 20709,
"eval_totals_3": 18505,
"eval_totals_4": 16301,
"step": 145
},
{
"epoch": 2.0,
"learning_rate": 0.0001,
"loss": 4.7443,
"step": 291
},
{
"epoch": 2.0,
"eval_bleu": 0.303,
"eval_bp": 0.7996,
"eval_counts_1": 4022,
"eval_counts_2": 188,
"eval_counts_3": 20,
"eval_counts_4": 0,
"eval_exact_match": 0.0,
"eval_f1": 0.1073,
"eval_gen_len": 12.9038,
"eval_loss": 4.227029323577881,
"eval_precisions_1": 23.1602,
"eval_precisions_2": 1.2399,
"eval_precisions_3": 0.1543,
"eval_precisions_4": 0.0046,
"eval_ref_len": 21250,
"eval_rouge1": 0.1028,
"eval_rouge2": 0.012,
"eval_rougeL": 0.0991,
"eval_rougeLsum": 0.099,
"eval_runtime": 2942.0368,
"eval_samples_per_second": 0.749,
"eval_steps_per_second": 0.375,
"eval_sys_len": 17366,
"eval_totals_1": 17366,
"eval_totals_2": 15162,
"eval_totals_3": 12958,
"eval_totals_4": 10754,
"step": 291
},
{
"epoch": 3.0,
"learning_rate": 0.0001,
"loss": 4.1412,
"step": 436
},
{
"epoch": 3.0,
"eval_bleu": 0.4488,
"eval_bp": 0.7507,
"eval_counts_1": 3723,
"eval_counts_2": 187,
"eval_counts_3": 26,
"eval_counts_4": 2,
"eval_exact_match": 0.0,
"eval_f1": 0.0938,
"eval_gen_len": 12.4769,
"eval_loss": 3.7837560176849365,
"eval_precisions_1": 22.5431,
"eval_precisions_2": 1.3067,
"eval_precisions_3": 0.2148,
"eval_precisions_4": 0.0202,
"eval_ref_len": 21250,
"eval_rouge1": 0.0899,
"eval_rouge2": 0.0124,
"eval_rougeL": 0.0886,
"eval_rougeLsum": 0.0884,
"eval_runtime": 2963.118,
"eval_samples_per_second": 0.744,
"eval_steps_per_second": 0.372,
"eval_sys_len": 16515,
"eval_totals_1": 16515,
"eval_totals_2": 14311,
"eval_totals_3": 12107,
"eval_totals_4": 9903,
"step": 436
},
{
"epoch": 4.0,
"learning_rate": 0.0001,
"loss": 3.6791,
"step": 582
},
{
"epoch": 4.0,
"eval_bleu": 1.6623,
"eval_bp": 1.0,
"eval_counts_1": 4576,
"eval_counts_2": 549,
"eval_counts_3": 134,
"eval_counts_4": 26,
"eval_exact_match": 0.0,
"eval_f1": 0.1323,
"eval_gen_len": 14.5676,
"eval_loss": 3.4246089458465576,
"eval_precisions_1": 20.9227,
"eval_precisions_2": 2.7915,
"eval_precisions_3": 0.7673,
"eval_precisions_4": 0.1704,
"eval_ref_len": 21250,
"eval_rouge1": 0.1259,
"eval_rouge2": 0.0296,
"eval_rougeL": 0.1204,
"eval_rougeLsum": 0.1201,
"eval_runtime": 3118.2455,
"eval_samples_per_second": 0.707,
"eval_steps_per_second": 0.353,
"eval_sys_len": 21871,
"eval_totals_1": 21871,
"eval_totals_2": 19667,
"eval_totals_3": 17463,
"eval_totals_4": 15259,
"step": 582
},
{
"epoch": 5.0,
"learning_rate": 0.0001,
"loss": 3.3523,
"step": 727
},
{
"epoch": 5.0,
"eval_bleu": 2.4472,
"eval_bp": 0.9085,
"eval_counts_1": 4900,
"eval_counts_2": 796,
"eval_counts_3": 210,
"eval_counts_4": 41,
"eval_exact_match": 0.0005,
"eval_f1": 0.1585,
"eval_gen_len": 14.3943,
"eval_loss": 3.172255277633667,
"eval_precisions_1": 25.2721,
"eval_precisions_2": 4.6319,
"eval_precisions_3": 1.4018,
"eval_precisions_4": 0.3209,
"eval_ref_len": 21250,
"eval_rouge1": 0.1542,
"eval_rouge2": 0.0449,
"eval_rougeL": 0.1486,
"eval_rougeLsum": 0.1484,
"eval_runtime": 3087.9672,
"eval_samples_per_second": 0.714,
"eval_steps_per_second": 0.357,
"eval_sys_len": 19389,
"eval_totals_1": 19389,
"eval_totals_2": 17185,
"eval_totals_3": 14981,
"eval_totals_4": 12777,
"step": 727
},
{
"epoch": 6.0,
"learning_rate": 0.0001,
"loss": 3.0161,
"step": 873
},
{
"epoch": 6.0,
"eval_bleu": 4.1987,
"eval_bp": 0.8907,
"eval_counts_1": 5633,
"eval_counts_2": 1182,
"eval_counts_3": 390,
"eval_counts_4": 111,
"eval_exact_match": 0.0045,
"eval_f1": 0.2074,
"eval_gen_len": 14.5789,
"eval_loss": 2.926840305328369,
"eval_precisions_1": 29.5773,
"eval_precisions_2": 7.0186,
"eval_precisions_3": 2.6645,
"eval_precisions_4": 0.8928,
"eval_ref_len": 21250,
"eval_rouge1": 0.204,
"eval_rouge2": 0.069,
"eval_rougeL": 0.196,
"eval_rougeLsum": 0.1961,
"eval_runtime": 3093.3528,
"eval_samples_per_second": 0.712,
"eval_steps_per_second": 0.356,
"eval_sys_len": 19045,
"eval_totals_1": 19045,
"eval_totals_2": 16841,
"eval_totals_3": 14637,
"eval_totals_4": 12433,
"step": 873
},
{
"epoch": 7.0,
"learning_rate": 0.0001,
"loss": 2.7639,
"step": 1018
},
{
"epoch": 7.0,
"eval_bleu": 5.3362,
"eval_bp": 0.8306,
"eval_counts_1": 6100,
"eval_counts_2": 1461,
"eval_counts_3": 499,
"eval_counts_4": 165,
"eval_exact_match": 0.0073,
"eval_f1": 0.2431,
"eval_gen_len": 13.8553,
"eval_loss": 2.760089635848999,
"eval_precisions_1": 34.0326,
"eval_precisions_2": 9.2939,
"eval_precisions_3": 3.6919,
"eval_precisions_4": 1.4586,
"eval_ref_len": 21250,
"eval_rouge1": 0.2409,
"eval_rouge2": 0.0885,
"eval_rougeL": 0.2332,
"eval_rougeLsum": 0.2331,
"eval_runtime": 2991.0063,
"eval_samples_per_second": 0.737,
"eval_steps_per_second": 0.368,
"eval_sys_len": 17924,
"eval_totals_1": 17924,
"eval_totals_2": 15720,
"eval_totals_3": 13516,
"eval_totals_4": 11312,
"step": 1018
},
{
"epoch": 8.0,
"learning_rate": 0.0001,
"loss": 2.5036,
"step": 1164
},
{
"epoch": 8.0,
"eval_bleu": 7.0633,
"eval_bp": 0.9483,
"eval_counts_1": 6765,
"eval_counts_2": 1845,
"eval_counts_3": 701,
"eval_counts_4": 273,
"eval_exact_match": 0.0059,
"eval_f1": 0.2689,
"eval_gen_len": 15.7232,
"eval_loss": 2.572913885116577,
"eval_precisions_1": 33.525,
"eval_precisions_2": 10.2643,
"eval_precisions_3": 4.4449,
"eval_precisions_4": 2.0122,
"eval_ref_len": 21250,
"eval_rouge1": 0.2682,
"eval_rouge2": 0.1079,
"eval_rougeL": 0.2589,
"eval_rougeLsum": 0.259,
"eval_runtime": 3343.9439,
"eval_samples_per_second": 0.659,
"eval_steps_per_second": 0.33,
"eval_sys_len": 20179,
"eval_totals_1": 20179,
"eval_totals_2": 17975,
"eval_totals_3": 15771,
"eval_totals_4": 13567,
"step": 1164
},
{
"epoch": 8.99,
"learning_rate": 0.0001,
"loss": 2.307,
"step": 1309
},
{
"epoch": 8.99,
"eval_bleu": 8.1681,
"eval_bp": 0.8911,
"eval_counts_1": 7018,
"eval_counts_2": 2047,
"eval_counts_3": 826,
"eval_counts_4": 348,
"eval_exact_match": 0.0095,
"eval_f1": 0.2907,
"eval_gen_len": 14.8076,
"eval_loss": 2.4636850357055664,
"eval_precisions_1": 36.8322,
"eval_precisions_2": 12.1484,
"eval_precisions_3": 5.6398,
"eval_precisions_4": 2.797,
"eval_ref_len": 21250,
"eval_rouge1": 0.2907,
"eval_rouge2": 0.1218,
"eval_rougeL": 0.2799,
"eval_rougeLsum": 0.2798,
"eval_runtime": 3082.8011,
"eval_samples_per_second": 0.715,
"eval_steps_per_second": 0.357,
"eval_sys_len": 19054,
"eval_totals_1": 19054,
"eval_totals_2": 16850,
"eval_totals_3": 14646,
"eval_totals_4": 12442,
"step": 1309
},
{
"epoch": 10.0,
"learning_rate": 0.0001,
"loss": 2.1012,
"step": 1455
},
{
"epoch": 10.0,
"eval_bleu": 8.6921,
"eval_bp": 0.8604,
"eval_counts_1": 7147,
"eval_counts_2": 2127,
"eval_counts_3": 883,
"eval_counts_4": 389,
"eval_exact_match": 0.0118,
"eval_f1": 0.3008,
"eval_gen_len": 14.2736,
"eval_loss": 2.361370325088501,
"eval_precisions_1": 38.6889,
"eval_precisions_2": 13.0739,
"eval_precisions_3": 6.278,
"eval_precisions_4": 3.2797,
"eval_ref_len": 21250,
"eval_rouge1": 0.3003,
"eval_rouge2": 0.1275,
"eval_rougeL": 0.289,
"eval_rougeLsum": 0.2888,
"eval_runtime": 2980.6044,
"eval_samples_per_second": 0.739,
"eval_steps_per_second": 0.37,
"eval_sys_len": 18473,
"eval_totals_1": 18473,
"eval_totals_2": 16269,
"eval_totals_3": 14065,
"eval_totals_4": 11861,
"step": 1455
},
{
"epoch": 10.99,
"learning_rate": 0.0001,
"loss": 1.9538,
"step": 1600
},
{
"epoch": 10.99,
"eval_bleu": 9.67,
"eval_bp": 0.8632,
"eval_counts_1": 7481,
"eval_counts_2": 2339,
"eval_counts_3": 997,
"eval_counts_4": 459,
"eval_exact_match": 0.0127,
"eval_f1": 0.3167,
"eval_gen_len": 14.3757,
"eval_loss": 2.297987461090088,
"eval_precisions_1": 40.3854,
"eval_precisions_2": 14.3321,
"eval_precisions_3": 7.0629,
"eval_precisions_4": 3.8533,
"eval_ref_len": 21250,
"eval_rouge1": 0.3192,
"eval_rouge2": 0.1423,
"eval_rougeL": 0.3064,
"eval_rougeLsum": 0.3068,
"eval_runtime": 1745.8738,
"eval_samples_per_second": 1.262,
"eval_steps_per_second": 0.631,
"eval_sys_len": 18524,
"eval_totals_1": 18524,
"eval_totals_2": 16320,
"eval_totals_3": 14116,
"eval_totals_4": 11912,
"step": 1600
},
{
"epoch": 12.0,
"learning_rate": 0.0001,
"loss": 1.7909,
"step": 1746
},
{
"epoch": 12.0,
"eval_bleu": 10.724,
"eval_bp": 0.8804,
"eval_counts_1": 7675,
"eval_counts_2": 2546,
"eval_counts_3": 1144,
"eval_counts_4": 546,
"eval_exact_match": 0.015,
"eval_f1": 0.3279,
"eval_gen_len": 14.583,
"eval_loss": 2.2389414310455322,
"eval_precisions_1": 40.7183,
"eval_precisions_2": 15.2959,
"eval_precisions_3": 7.9219,
"eval_precisions_4": 4.4619,
"eval_ref_len": 21250,
"eval_rouge1": 0.3299,
"eval_rouge2": 0.1528,
"eval_rougeL": 0.3174,
"eval_rougeLsum": 0.3175,
"eval_runtime": 1768.3367,
"eval_samples_per_second": 1.246,
"eval_steps_per_second": 0.623,
"eval_sys_len": 18849,
"eval_totals_1": 18849,
"eval_totals_2": 16645,
"eval_totals_3": 14441,
"eval_totals_4": 12237,
"step": 1746
},
{
"epoch": 12.99,
"learning_rate": 0.0001,
"loss": 1.6691,
"step": 1891
},
{
"epoch": 12.99,
"eval_bleu": 11.1241,
"eval_bp": 0.8695,
"eval_counts_1": 7858,
"eval_counts_2": 2635,
"eval_counts_3": 1179,
"eval_counts_4": 576,
"eval_exact_match": 0.0163,
"eval_f1": 0.3395,
"eval_gen_len": 14.3848,
"eval_loss": 2.181286096572876,
"eval_precisions_1": 42.1499,
"eval_precisions_2": 16.029,
"eval_precisions_3": 8.2824,
"eval_precisions_4": 4.7876,
"eval_ref_len": 21250,
"eval_rouge1": 0.344,
"eval_rouge2": 0.1626,
"eval_rougeL": 0.33,
"eval_rougeLsum": 0.33,
"eval_runtime": 1475.7204,
"eval_samples_per_second": 1.494,
"eval_steps_per_second": 0.747,
"eval_sys_len": 18643,
"eval_totals_1": 18643,
"eval_totals_2": 16439,
"eval_totals_3": 14235,
"eval_totals_4": 12031,
"step": 1891
},
{
"epoch": 14.0,
"learning_rate": 0.0001,
"loss": 1.5361,
"step": 2037
},
{
"epoch": 14.0,
"eval_bleu": 11.5803,
"eval_bp": 0.8754,
"eval_counts_1": 8016,
"eval_counts_2": 2729,
"eval_counts_3": 1249,
"eval_counts_4": 606,
"eval_exact_match": 0.0163,
"eval_f1": 0.3462,
"eval_gen_len": 14.564,
"eval_loss": 2.15460205078125,
"eval_precisions_1": 42.7429,
"eval_precisions_2": 16.4894,
"eval_precisions_3": 8.7063,
"eval_precisions_4": 4.9909,
"eval_ref_len": 21250,
"eval_rouge1": 0.3494,
"eval_rouge2": 0.1664,
"eval_rougeL": 0.3349,
"eval_rougeLsum": 0.3351,
"eval_runtime": 2521.9472,
"eval_samples_per_second": 0.874,
"eval_steps_per_second": 0.437,
"eval_sys_len": 18754,
"eval_totals_1": 18754,
"eval_totals_2": 16550,
"eval_totals_3": 14346,
"eval_totals_4": 12142,
"step": 2037
},
{
"epoch": 14.99,
"learning_rate": 0.0001,
"loss": 1.4365,
"step": 2182
},
{
"epoch": 14.99,
"eval_bleu": 12.1055,
"eval_bp": 0.856,
"eval_counts_1": 8112,
"eval_counts_2": 2839,
"eval_counts_3": 1316,
"eval_counts_4": 647,
"eval_exact_match": 0.02,
"eval_f1": 0.3538,
"eval_gen_len": 14.1656,
"eval_loss": 2.1357789039611816,
"eval_precisions_1": 44.1109,
"eval_precisions_2": 17.5398,
"eval_precisions_3": 9.4121,
"eval_precisions_4": 5.4933,
"eval_ref_len": 21250,
"eval_rouge1": 0.3581,
"eval_rouge2": 0.1761,
"eval_rougeL": 0.3448,
"eval_rougeLsum": 0.3448,
"eval_runtime": 2133.8974,
"eval_samples_per_second": 1.033,
"eval_steps_per_second": 0.516,
"eval_sys_len": 18390,
"eval_totals_1": 18390,
"eval_totals_2": 16186,
"eval_totals_3": 13982,
"eval_totals_4": 11778,
"step": 2182
},
{
"epoch": 16.0,
"learning_rate": 0.0001,
"loss": 1.3263,
"step": 2328
},
{
"epoch": 16.0,
"eval_bleu": 12.9765,
"eval_bp": 0.8827,
"eval_counts_1": 8381,
"eval_counts_2": 2990,
"eval_counts_3": 1430,
"eval_counts_4": 731,
"eval_exact_match": 0.0209,
"eval_f1": 0.363,
"eval_gen_len": 14.5445,
"eval_loss": 2.1189985275268555,
"eval_precisions_1": 44.3627,
"eval_precisions_2": 17.9171,
"eval_precisions_3": 9.873,
"eval_precisions_4": 5.9528,
"eval_ref_len": 21250,
"eval_rouge1": 0.3681,
"eval_rouge2": 0.1831,
"eval_rougeL": 0.3532,
"eval_rougeLsum": 0.3534,
"eval_runtime": 1849.5796,
"eval_samples_per_second": 1.192,
"eval_steps_per_second": 0.596,
"eval_sys_len": 18892,
"eval_totals_1": 18892,
"eval_totals_2": 16688,
"eval_totals_3": 14484,
"eval_totals_4": 12280,
"step": 2328
},
{
"epoch": 17.0,
"learning_rate": 0.0001,
"loss": 1.2329,
"step": 2474
},
{
"epoch": 17.0,
"eval_bleu": 13.5903,
"eval_bp": 0.8678,
"eval_counts_1": 8449,
"eval_counts_2": 3101,
"eval_counts_3": 1520,
"eval_counts_4": 786,
"eval_exact_match": 0.0227,
"eval_f1": 0.3692,
"eval_gen_len": 14.1779,
"eval_loss": 2.1201868057250977,
"eval_precisions_1": 45.3954,
"eval_precisions_2": 18.8993,
"eval_precisions_3": 10.7012,
"eval_precisions_4": 6.55,
"eval_ref_len": 21250,
"eval_rouge1": 0.3743,
"eval_rouge2": 0.1901,
"eval_rougeL": 0.3603,
"eval_rougeLsum": 0.3603,
"eval_runtime": 1363.814,
"eval_samples_per_second": 1.616,
"eval_steps_per_second": 0.808,
"eval_sys_len": 18612,
"eval_totals_1": 18612,
"eval_totals_2": 16408,
"eval_totals_3": 14204,
"eval_totals_4": 12000,
"step": 2474
},
{
"epoch": 18.0,
"learning_rate": 0.0001,
"loss": 1.1557,
"step": 2619
},
{
"epoch": 18.0,
"eval_bleu": 13.8388,
"eval_bp": 0.8325,
"eval_counts_1": 8406,
"eval_counts_2": 3154,
"eval_counts_3": 1558,
"eval_counts_4": 804,
"eval_exact_match": 0.0277,
"eval_f1": 0.371,
"eval_gen_len": 13.677,
"eval_loss": 2.1282455921173096,
"eval_precisions_1": 46.8092,
"eval_precisions_2": 20.0203,
"eval_precisions_3": 11.4982,
"eval_precisions_4": 7.0862,
"eval_ref_len": 21250,
"eval_rouge1": 0.3761,
"eval_rouge2": 0.194,
"eval_rougeL": 0.3633,
"eval_rougeLsum": 0.3636,
"eval_runtime": 1323.8829,
"eval_samples_per_second": 1.665,
"eval_steps_per_second": 0.832,
"eval_sys_len": 17958,
"eval_totals_1": 17958,
"eval_totals_2": 15754,
"eval_totals_3": 13550,
"eval_totals_4": 11346,
"step": 2619
},
{
"epoch": 19.0,
"learning_rate": 0.0001,
"loss": 1.0658,
"step": 2765
},
{
"epoch": 19.0,
"eval_bleu": 14.2084,
"eval_bp": 0.886,
"eval_counts_1": 8614,
"eval_counts_2": 3241,
"eval_counts_3": 1610,
"eval_counts_4": 839,
"eval_exact_match": 0.0272,
"eval_f1": 0.3749,
"eval_gen_len": 14.3816,
"eval_loss": 2.123244524002075,
"eval_precisions_1": 45.4445,
"eval_precisions_2": 19.3481,
"eval_precisions_3": 11.0676,
"eval_precisions_4": 6.7974,
"eval_ref_len": 21250,
"eval_rouge1": 0.3803,
"eval_rouge2": 0.196,
"eval_rougeL": 0.3654,
"eval_rougeLsum": 0.3656,
"eval_runtime": 1378.8855,
"eval_samples_per_second": 1.598,
"eval_steps_per_second": 0.799,
"eval_sys_len": 18955,
"eval_totals_1": 18955,
"eval_totals_2": 16751,
"eval_totals_3": 14547,
"eval_totals_4": 12343,
"step": 2765
},
{
"epoch": 19.93,
"learning_rate": 0.0001,
"loss": 0.9944,
"step": 2900
},
{
"epoch": 19.93,
"eval_bleu": 14.3883,
"eval_bp": 0.8806,
"eval_counts_1": 8658,
"eval_counts_2": 3273,
"eval_counts_3": 1625,
"eval_counts_4": 859,
"eval_exact_match": 0.0268,
"eval_f1": 0.3775,
"eval_gen_len": 14.2881,
"eval_loss": 2.1203458309173584,
"eval_precisions_1": 45.9237,
"eval_precisions_2": 19.6588,
"eval_precisions_3": 11.2496,
"eval_precisions_4": 7.0174,
"eval_ref_len": 21250,
"eval_rouge1": 0.3833,
"eval_rouge2": 0.1977,
"eval_rougeL": 0.369,
"eval_rougeLsum": 0.3691,
"eval_runtime": 1364.695,
"eval_samples_per_second": 1.615,
"eval_steps_per_second": 0.808,
"eval_sys_len": 18853,
"eval_totals_1": 18853,
"eval_totals_2": 16649,
"eval_totals_3": 14445,
"eval_totals_4": 12241,
"step": 2900
},
{
"epoch": 19.93,
"step": 2900,
"total_flos": 8.55557888016384e+17,
"train_loss": 2.472949571280644,
"train_runtime": 103540.1577,
"train_samples_per_second": 1.799,
"train_steps_per_second": 0.028
}
],
"logging_steps": 500,
"max_steps": 2900,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 8.55557888016384e+17,
"trial_name": null,
"trial_params": null
}