german-jeopardy-longt5-base-128 / trainer_state.json
Marvin
Initial commit
3051fb5 unverified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.776824034334766,
"eval_steps": 500,
"global_step": 1440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.99,
"learning_rate": 0.0001,
"loss": 3.458,
"step": 72
},
{
"epoch": 0.99,
"eval_bleu": 4.6384,
"eval_bp": 0.6642,
"eval_counts_1": 5618,
"eval_counts_2": 1383,
"eval_counts_3": 463,
"eval_counts_4": 116,
"eval_exact_match": 0.0005,
"eval_f1": 0.2226,
"eval_gen_len": 11.3013,
"eval_loss": 2.3696436882019043,
"eval_precisions_1": 37.2546,
"eval_precisions_2": 10.7409,
"eval_precisions_3": 4.3385,
"eval_precisions_4": 1.3699,
"eval_ref_len": 21250,
"eval_rouge1": 0.2266,
"eval_rouge2": 0.0841,
"eval_rougeL": 0.2197,
"eval_rougeLsum": 0.2196,
"eval_runtime": 433.2426,
"eval_samples_per_second": 5.087,
"eval_steps_per_second": 1.272,
"eval_sys_len": 15080,
"eval_totals_1": 15080,
"eval_totals_2": 12876,
"eval_totals_3": 10672,
"eval_totals_4": 8468,
"step": 72
},
{
"epoch": 1.99,
"learning_rate": 0.0001,
"loss": 2.7548,
"step": 145
},
{
"epoch": 1.99,
"eval_bleu": 6.9183,
"eval_bp": 0.728,
"eval_counts_1": 6361,
"eval_counts_2": 1807,
"eval_counts_3": 700,
"eval_counts_4": 254,
"eval_exact_match": 0.0036,
"eval_f1": 0.2635,
"eval_gen_len": 12.206,
"eval_loss": 2.1310036182403564,
"eval_precisions_1": 39.4358,
"eval_precisions_2": 12.9757,
"eval_precisions_3": 5.9717,
"eval_precisions_4": 2.6686,
"eval_ref_len": 21250,
"eval_rouge1": 0.2706,
"eval_rouge2": 0.1122,
"eval_rougeL": 0.2596,
"eval_rougeLsum": 0.2596,
"eval_runtime": 445.5518,
"eval_samples_per_second": 4.947,
"eval_steps_per_second": 1.237,
"eval_sys_len": 16130,
"eval_totals_1": 16130,
"eval_totals_2": 13926,
"eval_totals_3": 11722,
"eval_totals_4": 9518,
"step": 145
},
{
"epoch": 2.99,
"learning_rate": 0.0001,
"loss": 2.5084,
"step": 218
},
{
"epoch": 2.99,
"eval_bleu": 7.616,
"eval_bp": 0.7714,
"eval_counts_1": 6758,
"eval_counts_2": 2001,
"eval_counts_3": 780,
"eval_counts_4": 285,
"eval_exact_match": 0.0045,
"eval_f1": 0.2832,
"eval_gen_len": 12.8825,
"eval_loss": 2.0244088172912598,
"eval_precisions_1": 40.0569,
"eval_precisions_2": 13.6429,
"eval_precisions_3": 6.2585,
"eval_precisions_4": 2.778,
"eval_ref_len": 21250,
"eval_rouge1": 0.2888,
"eval_rouge2": 0.1258,
"eval_rougeL": 0.2766,
"eval_rougeLsum": 0.2767,
"eval_runtime": 693.3228,
"eval_samples_per_second": 3.179,
"eval_steps_per_second": 0.795,
"eval_sys_len": 16871,
"eval_totals_1": 16871,
"eval_totals_2": 14667,
"eval_totals_3": 12463,
"eval_totals_4": 10259,
"step": 218
},
{
"epoch": 4.0,
"learning_rate": 0.0001,
"loss": 2.3562,
"step": 291
},
{
"epoch": 4.0,
"eval_bleu": 8.6611,
"eval_bp": 0.7671,
"eval_counts_1": 7011,
"eval_counts_2": 2193,
"eval_counts_3": 908,
"eval_counts_4": 360,
"eval_exact_match": 0.0077,
"eval_f1": 0.2978,
"eval_gen_len": 12.9142,
"eval_loss": 1.950147032737732,
"eval_precisions_1": 41.7421,
"eval_precisions_2": 15.0288,
"eval_precisions_3": 7.3297,
"eval_precisions_4": 3.535,
"eval_ref_len": 21250,
"eval_rouge1": 0.303,
"eval_rouge2": 0.1375,
"eval_rougeL": 0.2892,
"eval_rougeLsum": 0.2894,
"eval_runtime": 807.954,
"eval_samples_per_second": 2.728,
"eval_steps_per_second": 0.682,
"eval_sys_len": 16796,
"eval_totals_1": 16796,
"eval_totals_2": 14592,
"eval_totals_3": 12388,
"eval_totals_4": 10184,
"step": 291
},
{
"epoch": 5.0,
"learning_rate": 0.0001,
"loss": 2.2383,
"step": 364
},
{
"epoch": 5.0,
"eval_bleu": 9.6159,
"eval_bp": 0.762,
"eval_counts_1": 7245,
"eval_counts_2": 2386,
"eval_counts_3": 1015,
"eval_counts_4": 435,
"eval_exact_match": 0.0113,
"eval_f1": 0.3155,
"eval_gen_len": 12.8417,
"eval_loss": 1.8873705863952637,
"eval_precisions_1": 43.3625,
"eval_precisions_2": 16.4506,
"eval_precisions_3": 8.252,
"eval_precisions_4": 4.3086,
"eval_ref_len": 21250,
"eval_rouge1": 0.3198,
"eval_rouge2": 0.1498,
"eval_rougeL": 0.3077,
"eval_rougeLsum": 0.3079,
"eval_runtime": 789.9213,
"eval_samples_per_second": 2.79,
"eval_steps_per_second": 0.698,
"eval_sys_len": 16708,
"eval_totals_1": 16708,
"eval_totals_2": 14504,
"eval_totals_3": 12300,
"eval_totals_4": 10096,
"step": 364
},
{
"epoch": 5.99,
"learning_rate": 0.0001,
"loss": 2.1576,
"step": 436
},
{
"epoch": 5.99,
"eval_bleu": 9.5745,
"eval_bp": 0.7796,
"eval_counts_1": 7378,
"eval_counts_2": 2382,
"eval_counts_3": 997,
"eval_counts_4": 429,
"eval_exact_match": 0.0109,
"eval_f1": 0.3215,
"eval_gen_len": 13.2187,
"eval_loss": 1.859336018562317,
"eval_precisions_1": 43.3643,
"eval_precisions_2": 16.0837,
"eval_precisions_3": 7.9089,
"eval_precisions_4": 4.1242,
"eval_ref_len": 21250,
"eval_rouge1": 0.326,
"eval_rouge2": 0.1497,
"eval_rougeL": 0.3132,
"eval_rougeLsum": 0.3132,
"eval_runtime": 806.6141,
"eval_samples_per_second": 2.732,
"eval_steps_per_second": 0.683,
"eval_sys_len": 17014,
"eval_totals_1": 17014,
"eval_totals_2": 14810,
"eval_totals_3": 12606,
"eval_totals_4": 10402,
"step": 436
},
{
"epoch": 6.99,
"learning_rate": 0.0001,
"loss": 2.0356,
"step": 509
},
{
"epoch": 6.99,
"eval_bleu": 10.3053,
"eval_bp": 0.7787,
"eval_counts_1": 7570,
"eval_counts_2": 2520,
"eval_counts_3": 1097,
"eval_counts_4": 482,
"eval_exact_match": 0.0123,
"eval_f1": 0.3339,
"eval_gen_len": 13.0368,
"eval_loss": 1.8132530450820923,
"eval_precisions_1": 44.532,
"eval_precisions_2": 17.0328,
"eval_precisions_3": 8.7126,
"eval_precisions_4": 4.6404,
"eval_ref_len": 21250,
"eval_rouge1": 0.3384,
"eval_rouge2": 0.158,
"eval_rougeL": 0.3258,
"eval_rougeLsum": 0.3257,
"eval_runtime": 454.2622,
"eval_samples_per_second": 4.852,
"eval_steps_per_second": 1.213,
"eval_sys_len": 16999,
"eval_totals_1": 16999,
"eval_totals_2": 14795,
"eval_totals_3": 12591,
"eval_totals_4": 10387,
"step": 509
},
{
"epoch": 7.99,
"learning_rate": 0.0001,
"loss": 1.9575,
"step": 582
},
{
"epoch": 7.99,
"eval_bleu": 10.993,
"eval_bp": 0.8003,
"eval_counts_1": 7764,
"eval_counts_2": 2637,
"eval_counts_3": 1175,
"eval_counts_4": 545,
"eval_exact_match": 0.0136,
"eval_f1": 0.3407,
"eval_gen_len": 13.4719,
"eval_loss": 1.7855687141418457,
"eval_precisions_1": 44.6746,
"eval_precisions_2": 17.3773,
"eval_precisions_3": 9.0587,
"eval_precisions_4": 5.0618,
"eval_ref_len": 21250,
"eval_rouge1": 0.345,
"eval_rouge2": 0.1625,
"eval_rougeL": 0.3322,
"eval_rougeLsum": 0.3324,
"eval_runtime": 470.7972,
"eval_samples_per_second": 4.681,
"eval_steps_per_second": 1.17,
"eval_sys_len": 17379,
"eval_totals_1": 17379,
"eval_totals_2": 15175,
"eval_totals_3": 12971,
"eval_totals_4": 10767,
"step": 582
},
{
"epoch": 9.0,
"learning_rate": 0.0001,
"loss": 1.8889,
"step": 655
},
{
"epoch": 9.0,
"eval_bleu": 10.9637,
"eval_bp": 0.7846,
"eval_counts_1": 7766,
"eval_counts_2": 2644,
"eval_counts_3": 1184,
"eval_counts_4": 532,
"eval_exact_match": 0.0123,
"eval_f1": 0.3438,
"eval_gen_len": 13.2164,
"eval_loss": 1.7666170597076416,
"eval_precisions_1": 45.4099,
"eval_precisions_2": 17.7473,
"eval_precisions_3": 9.3272,
"eval_precisions_4": 5.0715,
"eval_ref_len": 21250,
"eval_rouge1": 0.3487,
"eval_rouge2": 0.1636,
"eval_rougeL": 0.3348,
"eval_rougeLsum": 0.335,
"eval_runtime": 461.5052,
"eval_samples_per_second": 4.776,
"eval_steps_per_second": 1.194,
"eval_sys_len": 17102,
"eval_totals_1": 17102,
"eval_totals_2": 14898,
"eval_totals_3": 12694,
"eval_totals_4": 10490,
"step": 655
},
{
"epoch": 10.0,
"learning_rate": 0.0001,
"loss": 1.8201,
"step": 728
},
{
"epoch": 10.0,
"eval_bleu": 11.3891,
"eval_bp": 0.7877,
"eval_counts_1": 7737,
"eval_counts_2": 2680,
"eval_counts_3": 1238,
"eval_counts_4": 587,
"eval_exact_match": 0.0163,
"eval_f1": 0.3406,
"eval_gen_len": 13.1388,
"eval_loss": 1.7414668798446655,
"eval_precisions_1": 45.0979,
"eval_precisions_2": 17.924,
"eval_precisions_3": 9.7113,
"eval_precisions_4": 5.5671,
"eval_ref_len": 21250,
"eval_rouge1": 0.3453,
"eval_rouge2": 0.1666,
"eval_rougeL": 0.3332,
"eval_rougeLsum": 0.3333,
"eval_runtime": 457.4345,
"eval_samples_per_second": 4.818,
"eval_steps_per_second": 1.205,
"eval_sys_len": 17156,
"eval_totals_1": 17156,
"eval_totals_2": 14952,
"eval_totals_3": 12748,
"eval_totals_4": 10544,
"step": 728
},
{
"epoch": 10.99,
"learning_rate": 0.0001,
"loss": 1.7882,
"step": 800
},
{
"epoch": 10.99,
"eval_bleu": 11.4047,
"eval_bp": 0.7995,
"eval_counts_1": 7859,
"eval_counts_2": 2722,
"eval_counts_3": 1241,
"eval_counts_4": 572,
"eval_exact_match": 0.0145,
"eval_f1": 0.3473,
"eval_gen_len": 13.4052,
"eval_loss": 1.7331299781799316,
"eval_precisions_1": 45.2603,
"eval_precisions_2": 17.9551,
"eval_precisions_3": 9.5786,
"eval_precisions_4": 5.3199,
"eval_ref_len": 21250,
"eval_rouge1": 0.3524,
"eval_rouge2": 0.1673,
"eval_rougeL": 0.3387,
"eval_rougeLsum": 0.3385,
"eval_runtime": 470.5412,
"eval_samples_per_second": 4.684,
"eval_steps_per_second": 1.171,
"eval_sys_len": 17364,
"eval_totals_1": 17364,
"eval_totals_2": 15160,
"eval_totals_3": 12956,
"eval_totals_4": 10752,
"step": 800
},
{
"epoch": 11.99,
"learning_rate": 0.0001,
"loss": 1.7095,
"step": 873
},
{
"epoch": 11.99,
"eval_bleu": 11.8807,
"eval_bp": 0.8053,
"eval_counts_1": 7968,
"eval_counts_2": 2783,
"eval_counts_3": 1292,
"eval_counts_4": 625,
"eval_exact_match": 0.0154,
"eval_f1": 0.3495,
"eval_gen_len": 13.4437,
"eval_loss": 1.7193998098373413,
"eval_precisions_1": 45.6175,
"eval_precisions_2": 18.2336,
"eval_precisions_3": 9.8936,
"eval_precisions_4": 5.7577,
"eval_ref_len": 21250,
"eval_rouge1": 0.3547,
"eval_rouge2": 0.1708,
"eval_rougeL": 0.3418,
"eval_rougeLsum": 0.3414,
"eval_runtime": 472.913,
"eval_samples_per_second": 4.66,
"eval_steps_per_second": 1.165,
"eval_sys_len": 17467,
"eval_totals_1": 17467,
"eval_totals_2": 15263,
"eval_totals_3": 13059,
"eval_totals_4": 10855,
"step": 873
},
{
"epoch": 12.99,
"learning_rate": 0.0001,
"loss": 1.6619,
"step": 946
},
{
"epoch": 12.99,
"eval_bleu": 11.7968,
"eval_bp": 0.8034,
"eval_counts_1": 8011,
"eval_counts_2": 2796,
"eval_counts_3": 1286,
"eval_counts_4": 604,
"eval_exact_match": 0.0154,
"eval_f1": 0.3526,
"eval_gen_len": 13.4964,
"eval_loss": 1.703238606452942,
"eval_precisions_1": 45.9531,
"eval_precisions_2": 18.3597,
"eval_precisions_3": 9.8733,
"eval_precisions_4": 5.5817,
"eval_ref_len": 21250,
"eval_rouge1": 0.3584,
"eval_rouge2": 0.1736,
"eval_rougeL": 0.3454,
"eval_rougeLsum": 0.3454,
"eval_runtime": 460.9308,
"eval_samples_per_second": 4.782,
"eval_steps_per_second": 1.195,
"eval_sys_len": 17433,
"eval_totals_1": 17433,
"eval_totals_2": 15229,
"eval_totals_3": 13025,
"eval_totals_4": 10821,
"step": 946
},
{
"epoch": 13.99,
"learning_rate": 0.0001,
"loss": 1.6103,
"step": 1019
},
{
"epoch": 13.99,
"eval_bleu": 12.235,
"eval_bp": 0.8163,
"eval_counts_1": 8154,
"eval_counts_2": 2891,
"eval_counts_3": 1347,
"eval_counts_4": 636,
"eval_exact_match": 0.015,
"eval_f1": 0.3602,
"eval_gen_len": 13.7223,
"eval_loss": 1.7027523517608643,
"eval_precisions_1": 46.1591,
"eval_precisions_2": 18.6987,
"eval_precisions_3": 10.1607,
"eval_precisions_4": 5.7541,
"eval_ref_len": 21250,
"eval_rouge1": 0.3659,
"eval_rouge2": 0.1795,
"eval_rougeL": 0.3509,
"eval_rougeLsum": 0.3508,
"eval_runtime": 461.3951,
"eval_samples_per_second": 4.777,
"eval_steps_per_second": 1.194,
"eval_sys_len": 17665,
"eval_totals_1": 17665,
"eval_totals_2": 15461,
"eval_totals_3": 13257,
"eval_totals_4": 11053,
"step": 1019
},
{
"epoch": 15.0,
"learning_rate": 0.0001,
"loss": 1.565,
"step": 1092
},
{
"epoch": 15.0,
"eval_bleu": 12.4116,
"eval_bp": 0.8088,
"eval_counts_1": 8135,
"eval_counts_2": 2897,
"eval_counts_3": 1362,
"eval_counts_4": 665,
"eval_exact_match": 0.02,
"eval_f1": 0.3603,
"eval_gen_len": 13.6107,
"eval_loss": 1.6954691410064697,
"eval_precisions_1": 46.4062,
"eval_precisions_2": 18.9025,
"eval_precisions_3": 10.3795,
"eval_precisions_4": 6.0909,
"eval_ref_len": 21250,
"eval_rouge1": 0.3668,
"eval_rouge2": 0.1808,
"eval_rougeL": 0.3518,
"eval_rougeLsum": 0.3516,
"eval_runtime": 457.9806,
"eval_samples_per_second": 4.812,
"eval_steps_per_second": 1.203,
"eval_sys_len": 17530,
"eval_totals_1": 17530,
"eval_totals_2": 15326,
"eval_totals_3": 13122,
"eval_totals_4": 10918,
"step": 1092
},
{
"epoch": 16.0,
"learning_rate": 0.0001,
"loss": 1.522,
"step": 1165
},
{
"epoch": 16.0,
"eval_bleu": 12.8008,
"eval_bp": 0.8318,
"eval_counts_1": 8271,
"eval_counts_2": 2982,
"eval_counts_3": 1414,
"eval_counts_4": 697,
"eval_exact_match": 0.0191,
"eval_f1": 0.3632,
"eval_gen_len": 13.9192,
"eval_loss": 1.6792546510696411,
"eval_precisions_1": 46.0883,
"eval_precisions_2": 18.943,
"eval_precisions_3": 10.4447,
"eval_precisions_4": 6.1496,
"eval_ref_len": 21250,
"eval_rouge1": 0.3695,
"eval_rouge2": 0.1828,
"eval_rougeL": 0.354,
"eval_rougeLsum": 0.354,
"eval_runtime": 476.6232,
"eval_samples_per_second": 4.624,
"eval_steps_per_second": 1.156,
"eval_sys_len": 17946,
"eval_totals_1": 17946,
"eval_totals_2": 15742,
"eval_totals_3": 13538,
"eval_totals_4": 11334,
"step": 1165
},
{
"epoch": 16.99,
"learning_rate": 0.0001,
"loss": 1.5022,
"step": 1237
},
{
"epoch": 16.99,
"eval_bleu": 12.6672,
"eval_bp": 0.8077,
"eval_counts_1": 8244,
"eval_counts_2": 2967,
"eval_counts_3": 1392,
"eval_counts_4": 680,
"eval_exact_match": 0.0191,
"eval_f1": 0.366,
"eval_gen_len": 13.6243,
"eval_loss": 1.684873104095459,
"eval_precisions_1": 47.0817,
"eval_precisions_2": 19.3846,
"eval_precisions_3": 10.6243,
"eval_precisions_4": 6.2397,
"eval_ref_len": 21250,
"eval_rouge1": 0.3728,
"eval_rouge2": 0.184,
"eval_rougeL": 0.3569,
"eval_rougeLsum": 0.3569,
"eval_runtime": 453.2,
"eval_samples_per_second": 4.863,
"eval_steps_per_second": 1.216,
"eval_sys_len": 17510,
"eval_totals_1": 17510,
"eval_totals_2": 15306,
"eval_totals_3": 13102,
"eval_totals_4": 10898,
"step": 1237
},
{
"epoch": 17.99,
"learning_rate": 0.0001,
"loss": 1.4359,
"step": 1310
},
{
"epoch": 17.99,
"eval_bleu": 13.0683,
"eval_bp": 0.8278,
"eval_counts_1": 8328,
"eval_counts_2": 3050,
"eval_counts_3": 1448,
"eval_counts_4": 717,
"eval_exact_match": 0.0181,
"eval_f1": 0.3671,
"eval_gen_len": 13.7255,
"eval_loss": 1.686221718788147,
"eval_precisions_1": 46.5954,
"eval_precisions_2": 19.4652,
"eval_precisions_3": 10.7538,
"eval_precisions_4": 6.3671,
"eval_ref_len": 21250,
"eval_rouge1": 0.3742,
"eval_rouge2": 0.1866,
"eval_rougeL": 0.3582,
"eval_rougeLsum": 0.3583,
"eval_runtime": 451.92,
"eval_samples_per_second": 4.877,
"eval_steps_per_second": 1.219,
"eval_sys_len": 17873,
"eval_totals_1": 17873,
"eval_totals_2": 15669,
"eval_totals_3": 13465,
"eval_totals_4": 11261,
"step": 1310
},
{
"epoch": 18.99,
"learning_rate": 0.0001,
"loss": 1.3994,
"step": 1383
},
{
"epoch": 18.99,
"eval_bleu": 12.8728,
"eval_bp": 0.8152,
"eval_counts_1": 8272,
"eval_counts_2": 2998,
"eval_counts_3": 1417,
"eval_counts_4": 704,
"eval_exact_match": 0.0213,
"eval_f1": 0.3673,
"eval_gen_len": 13.6956,
"eval_loss": 1.6775314807891846,
"eval_precisions_1": 46.8801,
"eval_precisions_2": 19.4158,
"eval_precisions_3": 10.7048,
"eval_precisions_4": 6.3809,
"eval_ref_len": 21250,
"eval_rouge1": 0.3739,
"eval_rouge2": 0.1866,
"eval_rougeL": 0.3583,
"eval_rougeLsum": 0.3581,
"eval_runtime": 818.5079,
"eval_samples_per_second": 2.693,
"eval_steps_per_second": 0.673,
"eval_sys_len": 17645,
"eval_totals_1": 17645,
"eval_totals_2": 15441,
"eval_totals_3": 13237,
"eval_totals_4": 11033,
"step": 1383
},
{
"epoch": 19.78,
"learning_rate": 0.0001,
"loss": 1.3609,
"step": 1440
},
{
"epoch": 19.78,
"eval_bleu": 13.1569,
"eval_bp": 0.8251,
"eval_counts_1": 8347,
"eval_counts_2": 3062,
"eval_counts_3": 1465,
"eval_counts_4": 723,
"eval_exact_match": 0.0204,
"eval_f1": 0.3692,
"eval_gen_len": 13.7328,
"eval_loss": 1.688394546508789,
"eval_precisions_1": 46.8327,
"eval_precisions_2": 19.6043,
"eval_precisions_3": 10.9206,
"eval_precisions_4": 6.449,
"eval_ref_len": 21250,
"eval_rouge1": 0.3761,
"eval_rouge2": 0.1886,
"eval_rougeL": 0.3601,
"eval_rougeLsum": 0.3596,
"eval_runtime": 834.1703,
"eval_samples_per_second": 2.642,
"eval_steps_per_second": 0.661,
"eval_sys_len": 17823,
"eval_totals_1": 17823,
"eval_totals_2": 15619,
"eval_totals_3": 13415,
"eval_totals_4": 11211,
"step": 1440
},
{
"epoch": 19.78,
"step": 1440,
"total_flos": 2.52283256045568e+17,
"train_loss": 1.9421327537960476,
"train_runtime": 22435.6962,
"train_samples_per_second": 8.303,
"train_steps_per_second": 0.064
}
],
"logging_steps": 500,
"max_steps": 1440,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 2.52283256045568e+17,
"trial_name": null,
"trial_params": null
}