german-jeopardy-longt5-large-256 / trainer_state.json
Marvin
Initial commit
cb0ed1f unverified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.789564097058193,
"eval_steps": 500,
"global_step": 720,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.99,
"learning_rate": 0.0001,
"loss": 8.8727,
"step": 36
},
{
"epoch": 0.99,
"eval_bleu": 0.0,
"eval_bp": 0.0002,
"eval_counts_1": 2198,
"eval_counts_2": 0,
"eval_counts_3": 0,
"eval_counts_4": 0,
"eval_exact_match": 0.0,
"eval_f1": 0.0,
"eval_gen_len": 2.0,
"eval_loss": 6.380987644195557,
"eval_precisions_1": 99.7278,
"eval_precisions_2": 0.0,
"eval_precisions_3": 0.0,
"eval_precisions_4": 0.0,
"eval_ref_len": 21250,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 575.1236,
"eval_samples_per_second": 3.832,
"eval_steps_per_second": 1.916,
"eval_sys_len": 2204,
"eval_totals_1": 2204,
"eval_totals_2": 0,
"eval_totals_3": 0,
"eval_totals_4": 0,
"step": 36
},
{
"epoch": 1.98,
"learning_rate": 0.0001,
"loss": 6.0165,
"step": 72
},
{
"epoch": 1.98,
"eval_bleu": 0.0851,
"eval_bp": 1.0,
"eval_counts_1": 3587,
"eval_counts_2": 137,
"eval_counts_3": 0,
"eval_counts_4": 0,
"eval_exact_match": 0.0,
"eval_f1": 0.073,
"eval_gen_len": 15.0091,
"eval_loss": 5.386419773101807,
"eval_precisions_1": 16.3342,
"eval_precisions_2": 0.6935,
"eval_precisions_3": 0.0028,
"eval_precisions_4": 0.0016,
"eval_ref_len": 21250,
"eval_rouge1": 0.0702,
"eval_rouge2": 0.0079,
"eval_rougeL": 0.07,
"eval_rougeLsum": 0.07,
"eval_runtime": 2466.4687,
"eval_samples_per_second": 0.894,
"eval_steps_per_second": 0.447,
"eval_sys_len": 21960,
"eval_totals_1": 21960,
"eval_totals_2": 19756,
"eval_totals_3": 17552,
"eval_totals_4": 15348,
"step": 72
},
{
"epoch": 3.0,
"learning_rate": 0.0001,
"loss": 5.1537,
"step": 109
},
{
"epoch": 3.0,
"eval_bleu": 0.13,
"eval_bp": 0.6246,
"eval_counts_1": 3601,
"eval_counts_2": 145,
"eval_counts_3": 1,
"eval_counts_4": 0,
"eval_exact_match": 0.0,
"eval_f1": 0.0926,
"eval_gen_len": 9.5309,
"eval_loss": 4.961660861968994,
"eval_precisions_1": 24.9221,
"eval_precisions_2": 1.1842,
"eval_precisions_3": 0.01,
"eval_precisions_4": 0.0064,
"eval_ref_len": 21250,
"eval_rouge1": 0.0882,
"eval_rouge2": 0.0107,
"eval_rougeL": 0.0877,
"eval_rougeLsum": 0.0876,
"eval_runtime": 2425.7875,
"eval_samples_per_second": 0.909,
"eval_steps_per_second": 0.454,
"eval_sys_len": 14449,
"eval_totals_1": 14449,
"eval_totals_2": 12245,
"eval_totals_3": 10041,
"eval_totals_4": 7837,
"step": 109
},
{
"epoch": 3.99,
"learning_rate": 0.0001,
"loss": 4.863,
"step": 145
},
{
"epoch": 3.99,
"eval_bleu": 0.1468,
"eval_bp": 1.0,
"eval_counts_1": 4590,
"eval_counts_2": 229,
"eval_counts_3": 19,
"eval_counts_4": 0,
"eval_exact_match": 0.0,
"eval_f1": 0.0836,
"eval_gen_len": 29.4528,
"eval_loss": 4.553112030029297,
"eval_precisions_1": 11.0141,
"eval_precisions_2": 0.5802,
"eval_precisions_3": 0.051,
"eval_precisions_4": 0.0014,
"eval_ref_len": 21250,
"eval_rouge1": 0.0811,
"eval_rouge2": 0.0081,
"eval_rougeL": 0.0768,
"eval_rougeLsum": 0.0767,
"eval_runtime": 4425.6343,
"eval_samples_per_second": 0.498,
"eval_steps_per_second": 0.249,
"eval_sys_len": 41674,
"eval_totals_1": 41674,
"eval_totals_2": 39470,
"eval_totals_3": 37266,
"eval_totals_4": 35062,
"step": 145
},
{
"epoch": 4.97,
"learning_rate": 0.0001,
"loss": 4.5201,
"step": 181
},
{
"epoch": 4.97,
"eval_bleu": 0.2845,
"eval_bp": 0.7265,
"eval_counts_1": 3643,
"eval_counts_2": 169,
"eval_counts_3": 19,
"eval_counts_4": 0,
"eval_exact_match": 0.0,
"eval_f1": 0.0907,
"eval_gen_len": 12.5077,
"eval_loss": 4.201998233795166,
"eval_precisions_1": 22.6217,
"eval_precisions_2": 1.2158,
"eval_precisions_3": 0.1624,
"eval_precisions_4": 0.0053,
"eval_ref_len": 21250,
"eval_rouge1": 0.0865,
"eval_rouge2": 0.0115,
"eval_rougeL": 0.0856,
"eval_rougeLsum": 0.0855,
"eval_runtime": 2637.7264,
"eval_samples_per_second": 0.836,
"eval_steps_per_second": 0.418,
"eval_sys_len": 16104,
"eval_totals_1": 16104,
"eval_totals_2": 13900,
"eval_totals_3": 11696,
"eval_totals_4": 9492,
"step": 181
},
{
"epoch": 5.99,
"learning_rate": 0.0001,
"loss": 4.1347,
"step": 218
},
{
"epoch": 5.99,
"eval_bleu": 0.2878,
"eval_bp": 0.7671,
"eval_counts_1": 3670,
"eval_counts_2": 167,
"eval_counts_3": 20,
"eval_counts_4": 0,
"eval_exact_match": 0.0,
"eval_f1": 0.0917,
"eval_gen_len": 13.1656,
"eval_loss": 3.9352548122406006,
"eval_precisions_1": 21.8504,
"eval_precisions_2": 1.1445,
"eval_precisions_3": 0.1614,
"eval_precisions_4": 0.0049,
"eval_ref_len": 21250,
"eval_rouge1": 0.087,
"eval_rouge2": 0.0114,
"eval_rougeL": 0.0859,
"eval_rougeLsum": 0.0858,
"eval_runtime": 3199.7795,
"eval_samples_per_second": 0.689,
"eval_steps_per_second": 0.344,
"eval_sys_len": 16796,
"eval_totals_1": 16796,
"eval_totals_2": 14592,
"eval_totals_3": 12388,
"eval_totals_4": 10184,
"step": 218
},
{
"epoch": 6.98,
"learning_rate": 0.0001,
"loss": 4.012,
"step": 254
},
{
"epoch": 6.98,
"eval_bleu": 0.4139,
"eval_bp": 0.7546,
"eval_counts_1": 3780,
"eval_counts_2": 198,
"eval_counts_3": 35,
"eval_counts_4": 1,
"eval_exact_match": 0.0,
"eval_f1": 0.0968,
"eval_gen_len": 12.2931,
"eval_loss": 3.759270191192627,
"eval_precisions_1": 22.7958,
"eval_precisions_2": 1.3771,
"eval_precisions_3": 0.2875,
"eval_precisions_4": 0.01,
"eval_ref_len": 21250,
"eval_rouge1": 0.0916,
"eval_rouge2": 0.0128,
"eval_rougeL": 0.0903,
"eval_rougeLsum": 0.0902,
"eval_runtime": 2449.2398,
"eval_samples_per_second": 0.9,
"eval_steps_per_second": 0.45,
"eval_sys_len": 16582,
"eval_totals_1": 16582,
"eval_totals_2": 14378,
"eval_totals_3": 12174,
"eval_totals_4": 9970,
"step": 254
},
{
"epoch": 8.0,
"learning_rate": 0.0001,
"loss": 3.7048,
"step": 291
},
{
"epoch": 8.0,
"eval_bleu": 0.5493,
"eval_bp": 0.7297,
"eval_counts_1": 3668,
"eval_counts_2": 205,
"eval_counts_3": 36,
"eval_counts_4": 3,
"eval_exact_match": 0.0,
"eval_f1": 0.0923,
"eval_gen_len": 11.7568,
"eval_loss": 3.603360414505005,
"eval_precisions_1": 22.7008,
"eval_precisions_2": 1.4691,
"eval_precisions_3": 0.3064,
"eval_precisions_4": 0.0314,
"eval_ref_len": 21250,
"eval_rouge1": 0.0882,
"eval_rouge2": 0.0134,
"eval_rougeL": 0.0873,
"eval_rougeLsum": 0.0872,
"eval_runtime": 2487.6786,
"eval_samples_per_second": 0.886,
"eval_steps_per_second": 0.443,
"eval_sys_len": 16158,
"eval_totals_1": 16158,
"eval_totals_2": 13954,
"eval_totals_3": 11750,
"eval_totals_4": 9546,
"step": 291
},
{
"epoch": 8.99,
"learning_rate": 0.0001,
"loss": 3.6284,
"step": 327
},
{
"epoch": 8.99,
"eval_bleu": 1.8083,
"eval_bp": 0.8048,
"eval_counts_1": 4070,
"eval_counts_2": 527,
"eval_counts_3": 160,
"eval_counts_4": 28,
"eval_exact_match": 0.0,
"eval_f1": 0.1152,
"eval_gen_len": 9.7777,
"eval_loss": 3.4566922187805176,
"eval_precisions_1": 23.3118,
"eval_precisions_2": 3.4546,
"eval_precisions_3": 1.226,
"eval_precisions_4": 0.2581,
"eval_ref_len": 21250,
"eval_rouge1": 0.1109,
"eval_rouge2": 0.0281,
"eval_rougeL": 0.1083,
"eval_rougeLsum": 0.1082,
"eval_runtime": 2473.1224,
"eval_samples_per_second": 0.891,
"eval_steps_per_second": 0.446,
"eval_sys_len": 17459,
"eval_totals_1": 17459,
"eval_totals_2": 15255,
"eval_totals_3": 13051,
"eval_totals_4": 10847,
"step": 327
},
{
"epoch": 9.98,
"learning_rate": 0.0001,
"loss": 3.4605,
"step": 363
},
{
"epoch": 9.98,
"eval_bleu": 1.6972,
"eval_bp": 0.8793,
"eval_counts_1": 4325,
"eval_counts_2": 512,
"eval_counts_3": 128,
"eval_counts_4": 27,
"eval_exact_match": 0.0,
"eval_f1": 0.1254,
"eval_gen_len": 12.6729,
"eval_loss": 3.339012622833252,
"eval_precisions_1": 22.9699,
"eval_precisions_2": 3.0797,
"eval_precisions_3": 0.8876,
"eval_precisions_4": 0.221,
"eval_ref_len": 21250,
"eval_rouge1": 0.1206,
"eval_rouge2": 0.0288,
"eval_rougeL": 0.1168,
"eval_rougeLsum": 0.1167,
"eval_runtime": 2821.508,
"eval_samples_per_second": 0.781,
"eval_steps_per_second": 0.391,
"eval_sys_len": 18829,
"eval_totals_1": 18829,
"eval_totals_2": 16625,
"eval_totals_3": 14421,
"eval_totals_4": 12217,
"step": 363
},
{
"epoch": 10.99,
"learning_rate": 0.0001,
"loss": 3.2267,
"step": 400
},
{
"epoch": 10.99,
"eval_bleu": 2.5735,
"eval_bp": 0.8779,
"eval_counts_1": 4498,
"eval_counts_2": 774,
"eval_counts_3": 237,
"eval_counts_4": 49,
"eval_exact_match": 0.0005,
"eval_f1": 0.1381,
"eval_gen_len": 11.5009,
"eval_loss": 3.199504852294922,
"eval_precisions_1": 23.923,
"eval_precisions_2": 4.6632,
"eval_precisions_3": 1.6465,
"eval_precisions_4": 0.402,
"eval_ref_len": 21250,
"eval_rouge1": 0.1348,
"eval_rouge2": 0.0405,
"eval_rougeL": 0.132,
"eval_rougeLsum": 0.1319,
"eval_runtime": 2727.7858,
"eval_samples_per_second": 0.808,
"eval_steps_per_second": 0.404,
"eval_sys_len": 18802,
"eval_totals_1": 18802,
"eval_totals_2": 16598,
"eval_totals_3": 14394,
"eval_totals_4": 12190,
"step": 400
},
{
"epoch": 11.98,
"learning_rate": 0.0001,
"loss": 3.1761,
"step": 436
},
{
"epoch": 11.98,
"eval_bleu": 2.7554,
"eval_bp": 0.7767,
"eval_counts_1": 4578,
"eval_counts_2": 866,
"eval_counts_3": 260,
"eval_counts_4": 50,
"eval_exact_match": 0.0005,
"eval_f1": 0.1492,
"eval_gen_len": 10.5172,
"eval_loss": 3.116502523422241,
"eval_precisions_1": 26.9882,
"eval_precisions_2": 5.8676,
"eval_precisions_3": 2.0709,
"eval_precisions_4": 0.483,
"eval_ref_len": 21250,
"eval_rouge1": 0.1454,
"eval_rouge2": 0.0464,
"eval_rougeL": 0.1426,
"eval_rougeLsum": 0.1427,
"eval_runtime": 2404.4354,
"eval_samples_per_second": 0.917,
"eval_steps_per_second": 0.458,
"eval_sys_len": 16963,
"eval_totals_1": 16963,
"eval_totals_2": 14759,
"eval_totals_3": 12555,
"eval_totals_4": 10351,
"step": 436
},
{
"epoch": 12.97,
"learning_rate": 0.0001,
"loss": 3.0323,
"step": 472
},
{
"epoch": 12.97,
"eval_bleu": 3.2318,
"eval_bp": 0.839,
"eval_counts_1": 5019,
"eval_counts_2": 1048,
"eval_counts_3": 319,
"eval_counts_4": 59,
"eval_exact_match": 0.0009,
"eval_f1": 0.1729,
"eval_gen_len": 12.8294,
"eval_loss": 3.0074305534362793,
"eval_precisions_1": 27.7646,
"eval_precisions_2": 6.6024,
"eval_precisions_3": 2.3337,
"eval_precisions_4": 0.5146,
"eval_ref_len": 21250,
"eval_rouge1": 0.1691,
"eval_rouge2": 0.0557,
"eval_rougeL": 0.1648,
"eval_rougeLsum": 0.1647,
"eval_runtime": 1416.83,
"eval_samples_per_second": 1.556,
"eval_steps_per_second": 0.778,
"eval_sys_len": 18077,
"eval_totals_1": 18077,
"eval_totals_2": 15873,
"eval_totals_3": 13669,
"eval_totals_4": 11465,
"step": 472
},
{
"epoch": 13.99,
"learning_rate": 0.0001,
"loss": 2.8223,
"step": 509
},
{
"epoch": 13.99,
"eval_bleu": 3.7161,
"eval_bp": 0.783,
"eval_counts_1": 5257,
"eval_counts_2": 1120,
"eval_counts_3": 341,
"eval_counts_4": 85,
"eval_exact_match": 0.0018,
"eval_f1": 0.1929,
"eval_gen_len": 12.6824,
"eval_loss": 2.891127109527588,
"eval_precisions_1": 30.7895,
"eval_precisions_2": 7.5319,
"eval_precisions_3": 2.6922,
"eval_precisions_4": 0.8125,
"eval_ref_len": 21250,
"eval_rouge1": 0.189,
"eval_rouge2": 0.0635,
"eval_rougeL": 0.1841,
"eval_rougeLsum": 0.184,
"eval_runtime": 2341.3383,
"eval_samples_per_second": 0.941,
"eval_steps_per_second": 0.471,
"eval_sys_len": 17074,
"eval_totals_1": 17074,
"eval_totals_2": 14870,
"eval_totals_3": 12666,
"eval_totals_4": 10462,
"step": 509
},
{
"epoch": 14.98,
"learning_rate": 0.0001,
"loss": 2.7732,
"step": 545
},
{
"epoch": 14.98,
"eval_bleu": 4.3667,
"eval_bp": 0.8229,
"eval_counts_1": 5616,
"eval_counts_2": 1271,
"eval_counts_3": 407,
"eval_counts_4": 113,
"eval_exact_match": 0.0045,
"eval_f1": 0.217,
"eval_gen_len": 13.0944,
"eval_loss": 2.8103041648864746,
"eval_precisions_1": 31.5789,
"eval_precisions_2": 8.1579,
"eval_precisions_3": 3.0428,
"eval_precisions_4": 1.0115,
"eval_ref_len": 21250,
"eval_rouge1": 0.2122,
"eval_rouge2": 0.0731,
"eval_rougeL": 0.2063,
"eval_rougeLsum": 0.2061,
"eval_runtime": 2877.1799,
"eval_samples_per_second": 0.766,
"eval_steps_per_second": 0.383,
"eval_sys_len": 17784,
"eval_totals_1": 17784,
"eval_totals_2": 15580,
"eval_totals_3": 13376,
"eval_totals_4": 11172,
"step": 545
},
{
"epoch": 16.0,
"learning_rate": 0.0001,
"loss": 2.58,
"step": 582
},
{
"epoch": 16.0,
"eval_bleu": 5.357,
"eval_bp": 0.8782,
"eval_counts_1": 5959,
"eval_counts_2": 1461,
"eval_counts_3": 510,
"eval_counts_4": 171,
"eval_exact_match": 0.0064,
"eval_f1": 0.2316,
"eval_gen_len": 13.9174,
"eval_loss": 2.718313694000244,
"eval_precisions_1": 31.6833,
"eval_precisions_2": 8.7991,
"eval_precisions_3": 3.5417,
"eval_precisions_4": 1.4021,
"eval_ref_len": 21250,
"eval_rouge1": 0.2286,
"eval_rouge2": 0.0822,
"eval_rougeL": 0.2214,
"eval_rougeLsum": 0.2212,
"eval_runtime": 2978.2825,
"eval_samples_per_second": 0.74,
"eval_steps_per_second": 0.37,
"eval_sys_len": 18808,
"eval_totals_1": 18808,
"eval_totals_2": 16604,
"eval_totals_3": 14400,
"eval_totals_4": 12196,
"step": 582
},
{
"epoch": 16.99,
"learning_rate": 0.0001,
"loss": 2.5368,
"step": 618
},
{
"epoch": 16.99,
"eval_bleu": 5.8686,
"eval_bp": 0.7744,
"eval_counts_1": 5935,
"eval_counts_2": 1543,
"eval_counts_3": 576,
"eval_counts_4": 201,
"eval_exact_match": 0.0059,
"eval_f1": 0.2377,
"eval_gen_len": 12.3185,
"eval_loss": 2.6629750728607178,
"eval_precisions_1": 35.0706,
"eval_precisions_2": 10.483,
"eval_precisions_3": 4.6025,
"eval_precisions_4": 1.9494,
"eval_ref_len": 21250,
"eval_rouge1": 0.2365,
"eval_rouge2": 0.089,
"eval_rougeL": 0.2309,
"eval_rougeLsum": 0.2307,
"eval_runtime": 2677.1671,
"eval_samples_per_second": 0.823,
"eval_steps_per_second": 0.412,
"eval_sys_len": 16923,
"eval_totals_1": 16923,
"eval_totals_2": 14719,
"eval_totals_3": 12515,
"eval_totals_4": 10311,
"step": 618
},
{
"epoch": 17.98,
"learning_rate": 0.0001,
"loss": 2.4325,
"step": 654
},
{
"epoch": 17.98,
"eval_bleu": 6.8664,
"eval_bp": 0.8277,
"eval_counts_1": 6305,
"eval_counts_2": 1756,
"eval_counts_3": 685,
"eval_counts_4": 265,
"eval_exact_match": 0.0059,
"eval_f1": 0.2537,
"eval_gen_len": 13.1688,
"eval_loss": 2.579846143722534,
"eval_precisions_1": 35.2826,
"eval_precisions_2": 11.209,
"eval_precisions_3": 5.0884,
"eval_precisions_4": 2.3539,
"eval_ref_len": 21250,
"eval_rouge1": 0.2518,
"eval_rouge2": 0.0982,
"eval_rougeL": 0.2452,
"eval_rougeLsum": 0.2452,
"eval_runtime": 2086.3742,
"eval_samples_per_second": 1.056,
"eval_steps_per_second": 0.528,
"eval_sys_len": 17870,
"eval_totals_1": 17870,
"eval_totals_2": 15666,
"eval_totals_3": 13462,
"eval_totals_4": 11258,
"step": 654
},
{
"epoch": 18.99,
"learning_rate": 0.0001,
"loss": 2.2632,
"step": 691
},
{
"epoch": 18.99,
"eval_bleu": 7.5129,
"eval_bp": 0.823,
"eval_counts_1": 6577,
"eval_counts_2": 1888,
"eval_counts_3": 762,
"eval_counts_4": 304,
"eval_exact_match": 0.0086,
"eval_f1": 0.2702,
"eval_gen_len": 13.2373,
"eval_loss": 2.515482187271118,
"eval_precisions_1": 36.9806,
"eval_precisions_2": 12.1173,
"eval_precisions_3": 5.6963,
"eval_precisions_4": 2.7208,
"eval_ref_len": 21250,
"eval_rouge1": 0.2689,
"eval_rouge2": 0.1102,
"eval_rougeL": 0.261,
"eval_rougeLsum": 0.2611,
"eval_runtime": 2869.1423,
"eval_samples_per_second": 0.768,
"eval_steps_per_second": 0.384,
"eval_sys_len": 17785,
"eval_totals_1": 17785,
"eval_totals_2": 15581,
"eval_totals_3": 13377,
"eval_totals_4": 11173,
"step": 691
},
{
"epoch": 19.79,
"learning_rate": 0.0001,
"loss": 2.2026,
"step": 720
},
{
"epoch": 19.79,
"eval_bleu": 7.1987,
"eval_bp": 0.8159,
"eval_counts_1": 6644,
"eval_counts_2": 1853,
"eval_counts_3": 720,
"eval_counts_4": 273,
"eval_exact_match": 0.0073,
"eval_f1": 0.2742,
"eval_gen_len": 13.6343,
"eval_loss": 2.499704122543335,
"eval_precisions_1": 37.626,
"eval_precisions_2": 11.9904,
"eval_precisions_3": 5.434,
"eval_precisions_4": 2.4715,
"eval_ref_len": 21250,
"eval_rouge1": 0.2717,
"eval_rouge2": 0.1097,
"eval_rougeL": 0.2628,
"eval_rougeLsum": 0.2625,
"eval_runtime": 2871.4974,
"eval_samples_per_second": 0.768,
"eval_steps_per_second": 0.384,
"eval_sys_len": 17658,
"eval_totals_1": 17658,
"eval_totals_2": 15454,
"eval_totals_3": 13250,
"eval_totals_4": 11046,
"step": 720
},
{
"epoch": 19.79,
"step": 720,
"total_flos": 8.496574887886848e+17,
"train_loss": 3.782369862662421,
"train_runtime": 109409.7996,
"train_samples_per_second": 1.703,
"train_steps_per_second": 0.007
}
],
"logging_steps": 500,
"max_steps": 720,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 8.496574887886848e+17,
"trial_name": null,
"trial_params": null
}