german-jeopardy-longt5-base / trainer_state.json
Marvin
Initial commit
c80d287 unverified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.914163090128756,
"eval_steps": 500,
"global_step": 2900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 0.0001,
"loss": 3.1671,
"step": 145
},
{
"epoch": 1.0,
"eval_bleu": 5.9441,
"eval_bp": 0.7156,
"eval_counts_1": 6177,
"eval_counts_2": 1669,
"eval_counts_3": 604,
"eval_counts_4": 179,
"eval_exact_match": 0.0023,
"eval_f1": 0.2528,
"eval_gen_len": 12.0218,
"eval_loss": 2.190216541290283,
"eval_precisions_1": 38.7954,
"eval_precisions_2": 12.1665,
"eval_precisions_3": 5.2458,
"eval_precisions_4": 1.9227,
"eval_ref_len": 21250,
"eval_rouge1": 0.2595,
"eval_rouge2": 0.1035,
"eval_rougeL": 0.2491,
"eval_rougeLsum": 0.2492,
"eval_runtime": 793.0147,
"eval_samples_per_second": 2.779,
"eval_steps_per_second": 0.695,
"eval_sys_len": 15922,
"eval_totals_1": 15922,
"eval_totals_2": 13718,
"eval_totals_3": 11514,
"eval_totals_4": 9310,
"step": 145
},
{
"epoch": 2.0,
"learning_rate": 0.0001,
"loss": 2.5597,
"step": 291
},
{
"epoch": 2.0,
"eval_bleu": 7.7787,
"eval_bp": 0.7556,
"eval_counts_1": 6785,
"eval_counts_2": 2044,
"eval_counts_3": 804,
"eval_counts_4": 293,
"eval_exact_match": 0.0064,
"eval_f1": 0.2864,
"eval_gen_len": 12.6084,
"eval_loss": 2.016404151916504,
"eval_precisions_1": 40.876,
"eval_precisions_2": 14.1994,
"eval_precisions_3": 6.595,
"eval_precisions_4": 2.9338,
"eval_ref_len": 21250,
"eval_rouge1": 0.2931,
"eval_rouge2": 0.1291,
"eval_rougeL": 0.2817,
"eval_rougeLsum": 0.2818,
"eval_runtime": 817.9822,
"eval_samples_per_second": 2.694,
"eval_steps_per_second": 0.674,
"eval_sys_len": 16599,
"eval_totals_1": 16599,
"eval_totals_2": 14395,
"eval_totals_3": 12191,
"eval_totals_4": 9987,
"step": 291
},
{
"epoch": 2.99,
"learning_rate": 0.0001,
"loss": 2.3464,
"step": 436
},
{
"epoch": 2.99,
"eval_bleu": 9.2407,
"eval_bp": 0.7935,
"eval_counts_1": 7251,
"eval_counts_2": 2326,
"eval_counts_3": 969,
"eval_counts_4": 400,
"eval_exact_match": 0.0073,
"eval_f1": 0.3114,
"eval_gen_len": 13.2296,
"eval_loss": 1.9138075113296509,
"eval_precisions_1": 42.0129,
"eval_precisions_2": 15.45,
"eval_precisions_3": 7.5403,
"eval_precisions_4": 3.7569,
"eval_ref_len": 21250,
"eval_rouge1": 0.3162,
"eval_rouge2": 0.1456,
"eval_rougeL": 0.3031,
"eval_rougeLsum": 0.3031,
"eval_runtime": 765.0466,
"eval_samples_per_second": 2.881,
"eval_steps_per_second": 0.72,
"eval_sys_len": 17259,
"eval_totals_1": 17259,
"eval_totals_2": 15055,
"eval_totals_3": 12851,
"eval_totals_4": 10647,
"step": 436
},
{
"epoch": 4.0,
"learning_rate": 0.0001,
"loss": 2.1679,
"step": 582
},
{
"epoch": 4.0,
"eval_bleu": 9.6363,
"eval_bp": 0.7795,
"eval_counts_1": 7382,
"eval_counts_2": 2393,
"eval_counts_3": 1006,
"eval_counts_4": 434,
"eval_exact_match": 0.0109,
"eval_f1": 0.3226,
"eval_gen_len": 13.1207,
"eval_loss": 1.8524010181427002,
"eval_precisions_1": 43.3903,
"eval_precisions_2": 16.1591,
"eval_precisions_3": 7.981,
"eval_precisions_4": 4.1727,
"eval_ref_len": 21250,
"eval_rouge1": 0.3272,
"eval_rouge2": 0.1504,
"eval_rougeL": 0.3147,
"eval_rougeLsum": 0.3149,
"eval_runtime": 882.4242,
"eval_samples_per_second": 2.498,
"eval_steps_per_second": 0.624,
"eval_sys_len": 17013,
"eval_totals_1": 17013,
"eval_totals_2": 14809,
"eval_totals_3": 12605,
"eval_totals_4": 10401,
"step": 582
},
{
"epoch": 5.0,
"learning_rate": 0.0001,
"loss": 2.0454,
"step": 728
},
{
"epoch": 5.0,
"eval_bleu": 10.3812,
"eval_bp": 0.7665,
"eval_counts_1": 7581,
"eval_counts_2": 2555,
"eval_counts_3": 1111,
"eval_counts_4": 482,
"eval_exact_match": 0.0132,
"eval_f1": 0.3357,
"eval_gen_len": 12.9782,
"eval_loss": 1.7996737957000732,
"eval_precisions_1": 45.1599,
"eval_precisions_2": 17.5204,
"eval_precisions_3": 8.9749,
"eval_precisions_4": 4.7371,
"eval_ref_len": 21250,
"eval_rouge1": 0.3401,
"eval_rouge2": 0.1606,
"eval_rougeL": 0.3278,
"eval_rougeLsum": 0.3279,
"eval_runtime": 519.8377,
"eval_samples_per_second": 4.24,
"eval_steps_per_second": 1.06,
"eval_sys_len": 16787,
"eval_totals_1": 16787,
"eval_totals_2": 14583,
"eval_totals_3": 12379,
"eval_totals_4": 10175,
"step": 728
},
{
"epoch": 5.99,
"learning_rate": 0.0001,
"loss": 1.9502,
"step": 873
},
{
"epoch": 5.99,
"eval_bleu": 10.7668,
"eval_bp": 0.7992,
"eval_counts_1": 7759,
"eval_counts_2": 2618,
"eval_counts_3": 1162,
"eval_counts_4": 511,
"eval_exact_match": 0.0127,
"eval_f1": 0.3406,
"eval_gen_len": 13.4841,
"eval_loss": 1.7696163654327393,
"eval_precisions_1": 44.6973,
"eval_precisions_2": 17.2748,
"eval_precisions_3": 8.9723,
"eval_precisions_4": 4.7548,
"eval_ref_len": 21250,
"eval_rouge1": 0.3452,
"eval_rouge2": 0.1631,
"eval_rougeL": 0.3321,
"eval_rougeLsum": 0.3319,
"eval_runtime": 542.6731,
"eval_samples_per_second": 4.061,
"eval_steps_per_second": 1.015,
"eval_sys_len": 17359,
"eval_totals_1": 17359,
"eval_totals_2": 15155,
"eval_totals_3": 12951,
"eval_totals_4": 10747,
"step": 873
},
{
"epoch": 7.0,
"learning_rate": 0.0001,
"loss": 1.8414,
"step": 1019
},
{
"epoch": 7.0,
"eval_bleu": 11.3408,
"eval_bp": 0.7721,
"eval_counts_1": 7791,
"eval_counts_2": 2693,
"eval_counts_3": 1236,
"eval_counts_4": 570,
"eval_exact_match": 0.015,
"eval_f1": 0.347,
"eval_gen_len": 13.0563,
"eval_loss": 1.7471755743026733,
"eval_precisions_1": 46.147,
"eval_precisions_2": 18.3459,
"eval_precisions_3": 9.9078,
"eval_precisions_4": 5.5496,
"eval_ref_len": 21250,
"eval_rouge1": 0.3513,
"eval_rouge2": 0.1679,
"eval_rougeL": 0.3391,
"eval_rougeLsum": 0.3391,
"eval_runtime": 455.2485,
"eval_samples_per_second": 4.841,
"eval_steps_per_second": 1.21,
"eval_sys_len": 16883,
"eval_totals_1": 16883,
"eval_totals_2": 14679,
"eval_totals_3": 12475,
"eval_totals_4": 10271,
"step": 1019
},
{
"epoch": 8.0,
"learning_rate": 0.0001,
"loss": 1.7614,
"step": 1165
},
{
"epoch": 8.0,
"eval_bleu": 11.8447,
"eval_bp": 0.8198,
"eval_counts_1": 8024,
"eval_counts_2": 2799,
"eval_counts_3": 1296,
"eval_counts_4": 610,
"eval_exact_match": 0.0145,
"eval_f1": 0.352,
"eval_gen_len": 13.515,
"eval_loss": 1.7203415632247925,
"eval_precisions_1": 45.2643,
"eval_precisions_2": 18.0313,
"eval_precisions_3": 9.7305,
"eval_precisions_4": 5.4881,
"eval_ref_len": 21250,
"eval_rouge1": 0.3565,
"eval_rouge2": 0.1711,
"eval_rougeL": 0.3422,
"eval_rougeLsum": 0.3423,
"eval_runtime": 457.6091,
"eval_samples_per_second": 4.816,
"eval_steps_per_second": 1.204,
"eval_sys_len": 17727,
"eval_totals_1": 17727,
"eval_totals_2": 15523,
"eval_totals_3": 13319,
"eval_totals_4": 11115,
"step": 1165
},
{
"epoch": 9.0,
"learning_rate": 0.0001,
"loss": 1.6997,
"step": 1310
},
{
"epoch": 9.0,
"eval_bleu": 11.9689,
"eval_bp": 0.8027,
"eval_counts_1": 8046,
"eval_counts_2": 2835,
"eval_counts_3": 1314,
"eval_counts_4": 615,
"eval_exact_match": 0.0168,
"eval_f1": 0.3568,
"eval_gen_len": 13.4306,
"eval_loss": 1.7166661024093628,
"eval_precisions_1": 46.183,
"eval_precisions_2": 18.6293,
"eval_precisions_3": 10.0968,
"eval_precisions_4": 5.6892,
"eval_ref_len": 21250,
"eval_rouge1": 0.3613,
"eval_rouge2": 0.1746,
"eval_rougeL": 0.3466,
"eval_rougeLsum": 0.3466,
"eval_runtime": 543.9804,
"eval_samples_per_second": 4.052,
"eval_steps_per_second": 1.013,
"eval_sys_len": 17422,
"eval_totals_1": 17422,
"eval_totals_2": 15218,
"eval_totals_3": 13014,
"eval_totals_4": 10810,
"step": 1310
},
{
"epoch": 10.0,
"learning_rate": 0.0001,
"loss": 1.6159,
"step": 1456
},
{
"epoch": 10.0,
"eval_bleu": 12.5678,
"eval_bp": 0.8182,
"eval_counts_1": 8087,
"eval_counts_2": 2928,
"eval_counts_3": 1395,
"eval_counts_4": 681,
"eval_exact_match": 0.0181,
"eval_f1": 0.3564,
"eval_gen_len": 13.5268,
"eval_loss": 1.689180612564087,
"eval_precisions_1": 45.6944,
"eval_precisions_2": 18.8976,
"eval_precisions_3": 10.4966,
"eval_precisions_4": 6.1429,
"eval_ref_len": 21250,
"eval_rouge1": 0.3612,
"eval_rouge2": 0.1795,
"eval_rougeL": 0.3485,
"eval_rougeLsum": 0.3482,
"eval_runtime": 661.754,
"eval_samples_per_second": 3.331,
"eval_steps_per_second": 0.833,
"eval_sys_len": 17698,
"eval_totals_1": 17698,
"eval_totals_2": 15494,
"eval_totals_3": 13290,
"eval_totals_4": 11086,
"step": 1456
},
{
"epoch": 10.99,
"learning_rate": 0.0001,
"loss": 1.5681,
"step": 1601
},
{
"epoch": 10.99,
"eval_bleu": 12.497,
"eval_bp": 0.813,
"eval_counts_1": 8154,
"eval_counts_2": 2933,
"eval_counts_3": 1383,
"eval_counts_4": 664,
"eval_exact_match": 0.0168,
"eval_f1": 0.3605,
"eval_gen_len": 13.6044,
"eval_loss": 1.6923038959503174,
"eval_precisions_1": 46.3164,
"eval_precisions_2": 19.0442,
"eval_precisions_3": 10.4797,
"eval_precisions_4": 6.0402,
"eval_ref_len": 21250,
"eval_rouge1": 0.3654,
"eval_rouge2": 0.1789,
"eval_rougeL": 0.3506,
"eval_rougeLsum": 0.3505,
"eval_runtime": 528.2815,
"eval_samples_per_second": 4.172,
"eval_steps_per_second": 1.043,
"eval_sys_len": 17605,
"eval_totals_1": 17605,
"eval_totals_2": 15401,
"eval_totals_3": 13197,
"eval_totals_4": 10993,
"step": 1601
},
{
"epoch": 12.0,
"learning_rate": 0.0001,
"loss": 1.4987,
"step": 1747
},
{
"epoch": 12.0,
"eval_bleu": 12.8959,
"eval_bp": 0.8169,
"eval_counts_1": 8295,
"eval_counts_2": 3011,
"eval_counts_3": 1432,
"eval_counts_4": 697,
"eval_exact_match": 0.0181,
"eval_f1": 0.3675,
"eval_gen_len": 13.6134,
"eval_loss": 1.6824951171875,
"eval_precisions_1": 46.928,
"eval_precisions_2": 19.461,
"eval_precisions_3": 10.7929,
"eval_precisions_4": 6.2997,
"eval_ref_len": 21250,
"eval_rouge1": 0.3734,
"eval_rouge2": 0.1846,
"eval_rougeL": 0.3576,
"eval_rougeLsum": 0.3577,
"eval_runtime": 636.4551,
"eval_samples_per_second": 3.463,
"eval_steps_per_second": 0.866,
"eval_sys_len": 17676,
"eval_totals_1": 17676,
"eval_totals_2": 15472,
"eval_totals_3": 13268,
"eval_totals_4": 11064,
"step": 1747
},
{
"epoch": 13.0,
"learning_rate": 0.0001,
"loss": 1.4461,
"step": 1893
},
{
"epoch": 13.0,
"eval_bleu": 12.8688,
"eval_bp": 0.8139,
"eval_counts_1": 8246,
"eval_counts_2": 3005,
"eval_counts_3": 1424,
"eval_counts_4": 700,
"eval_exact_match": 0.0191,
"eval_f1": 0.3658,
"eval_gen_len": 13.5812,
"eval_loss": 1.6783509254455566,
"eval_precisions_1": 46.7964,
"eval_precisions_2": 19.4915,
"eval_precisions_3": 10.7773,
"eval_precisions_4": 6.3584,
"eval_ref_len": 21250,
"eval_rouge1": 0.3725,
"eval_rouge2": 0.1857,
"eval_rougeL": 0.358,
"eval_rougeLsum": 0.3576,
"eval_runtime": 521.7174,
"eval_samples_per_second": 4.225,
"eval_steps_per_second": 1.056,
"eval_sys_len": 17621,
"eval_totals_1": 17621,
"eval_totals_2": 15417,
"eval_totals_3": 13213,
"eval_totals_4": 11009,
"step": 1893
},
{
"epoch": 13.99,
"learning_rate": 0.0001,
"loss": 1.4002,
"step": 2038
},
{
"epoch": 13.99,
"eval_bleu": 13.4526,
"eval_bp": 0.8329,
"eval_counts_1": 8457,
"eval_counts_2": 3130,
"eval_counts_3": 1504,
"eval_counts_4": 745,
"eval_exact_match": 0.02,
"eval_f1": 0.3727,
"eval_gen_len": 13.9179,
"eval_loss": 1.6725177764892578,
"eval_precisions_1": 47.0749,
"eval_precisions_2": 19.8591,
"eval_precisions_3": 11.0939,
"eval_precisions_4": 6.5621,
"eval_ref_len": 21250,
"eval_rouge1": 0.3797,
"eval_rouge2": 0.1915,
"eval_rougeL": 0.3637,
"eval_rougeLsum": 0.3634,
"eval_runtime": 592.5507,
"eval_samples_per_second": 3.72,
"eval_steps_per_second": 0.93,
"eval_sys_len": 17965,
"eval_totals_1": 17965,
"eval_totals_2": 15761,
"eval_totals_3": 13557,
"eval_totals_4": 11353,
"step": 2038
},
{
"epoch": 15.0,
"learning_rate": 0.0001,
"loss": 1.3391,
"step": 2184
},
{
"epoch": 15.0,
"eval_bleu": 13.211,
"eval_bp": 0.8283,
"eval_counts_1": 8443,
"eval_counts_2": 3091,
"eval_counts_3": 1468,
"eval_counts_4": 719,
"eval_exact_match": 0.0204,
"eval_f1": 0.3737,
"eval_gen_len": 13.9133,
"eval_loss": 1.6783130168914795,
"eval_precisions_1": 47.2177,
"eval_precisions_2": 19.7168,
"eval_precisions_3": 10.8959,
"eval_precisions_4": 6.3803,
"eval_ref_len": 21250,
"eval_rouge1": 0.3804,
"eval_rouge2": 0.1901,
"eval_rougeL": 0.3634,
"eval_rougeLsum": 0.363,
"eval_runtime": 547.4964,
"eval_samples_per_second": 4.026,
"eval_steps_per_second": 1.006,
"eval_sys_len": 17881,
"eval_totals_1": 17881,
"eval_totals_2": 15677,
"eval_totals_3": 13473,
"eval_totals_4": 11269,
"step": 2184
},
{
"epoch": 16.0,
"learning_rate": 0.0001,
"loss": 1.2921,
"step": 2330
},
{
"epoch": 16.0,
"eval_bleu": 13.4907,
"eval_bp": 0.8373,
"eval_counts_1": 8457,
"eval_counts_2": 3147,
"eval_counts_3": 1511,
"eval_counts_4": 747,
"eval_exact_match": 0.0195,
"eval_f1": 0.3716,
"eval_gen_len": 13.9882,
"eval_loss": 1.6737552881240845,
"eval_precisions_1": 46.8662,
"eval_precisions_2": 19.8662,
"eval_precisions_3": 11.0801,
"eval_precisions_4": 6.5337,
"eval_ref_len": 21250,
"eval_rouge1": 0.3782,
"eval_rouge2": 0.1902,
"eval_rougeL": 0.3624,
"eval_rougeLsum": 0.3624,
"eval_runtime": 652.072,
"eval_samples_per_second": 3.38,
"eval_steps_per_second": 0.845,
"eval_sys_len": 18045,
"eval_totals_1": 18045,
"eval_totals_2": 15841,
"eval_totals_3": 13637,
"eval_totals_4": 11433,
"step": 2330
},
{
"epoch": 17.0,
"learning_rate": 0.0001,
"loss": 1.2572,
"step": 2475
},
{
"epoch": 17.0,
"eval_bleu": 13.8581,
"eval_bp": 0.8267,
"eval_counts_1": 8473,
"eval_counts_2": 3219,
"eval_counts_3": 1561,
"eval_counts_4": 783,
"eval_exact_match": 0.02,
"eval_f1": 0.3753,
"eval_gen_len": 13.7618,
"eval_loss": 1.676971435546875,
"eval_precisions_1": 47.4598,
"eval_precisions_2": 20.57,
"eval_precisions_3": 11.6103,
"eval_precisions_4": 6.9656,
"eval_ref_len": 21250,
"eval_rouge1": 0.3821,
"eval_rouge2": 0.1948,
"eval_rougeL": 0.3669,
"eval_rougeLsum": 0.3665,
"eval_runtime": 452.0799,
"eval_samples_per_second": 4.875,
"eval_steps_per_second": 1.219,
"eval_sys_len": 17853,
"eval_totals_1": 17853,
"eval_totals_2": 15649,
"eval_totals_3": 13445,
"eval_totals_4": 11241,
"step": 2475
},
{
"epoch": 18.0,
"learning_rate": 0.0001,
"loss": 1.199,
"step": 2621
},
{
"epoch": 18.0,
"eval_bleu": 13.7496,
"eval_bp": 0.8326,
"eval_counts_1": 8484,
"eval_counts_2": 3190,
"eval_counts_3": 1551,
"eval_counts_4": 771,
"eval_exact_match": 0.0186,
"eval_f1": 0.3745,
"eval_gen_len": 13.8798,
"eval_loss": 1.6934301853179932,
"eval_precisions_1": 47.2409,
"eval_precisions_2": 20.2475,
"eval_precisions_3": 11.4456,
"eval_precisions_4": 6.7947,
"eval_ref_len": 21250,
"eval_rouge1": 0.3812,
"eval_rouge2": 0.1922,
"eval_rougeL": 0.3657,
"eval_rougeLsum": 0.3658,
"eval_runtime": 869.0302,
"eval_samples_per_second": 2.536,
"eval_steps_per_second": 0.634,
"eval_sys_len": 17959,
"eval_totals_1": 17959,
"eval_totals_2": 15755,
"eval_totals_3": 13551,
"eval_totals_4": 11347,
"step": 2621
},
{
"epoch": 18.99,
"learning_rate": 0.0001,
"loss": 1.1668,
"step": 2766
},
{
"epoch": 18.99,
"eval_bleu": 13.7379,
"eval_bp": 0.8395,
"eval_counts_1": 8504,
"eval_counts_2": 3179,
"eval_counts_3": 1541,
"eval_counts_4": 776,
"eval_exact_match": 0.0204,
"eval_f1": 0.376,
"eval_gen_len": 13.9256,
"eval_loss": 1.6926020383834839,
"eval_precisions_1": 47.0198,
"eval_precisions_2": 20.0164,
"eval_precisions_3": 11.2663,
"eval_precisions_4": 6.7631,
"eval_ref_len": 21250,
"eval_rouge1": 0.3828,
"eval_rouge2": 0.1939,
"eval_rougeL": 0.3665,
"eval_rougeLsum": 0.3665,
"eval_runtime": 580.7372,
"eval_samples_per_second": 3.795,
"eval_steps_per_second": 0.949,
"eval_sys_len": 18086,
"eval_totals_1": 18086,
"eval_totals_2": 15882,
"eval_totals_3": 13678,
"eval_totals_4": 11474,
"step": 2766
},
{
"epoch": 19.91,
"learning_rate": 0.0001,
"loss": 1.1164,
"step": 2900
},
{
"epoch": 19.91,
"eval_bleu": 14.1906,
"eval_bp": 0.8529,
"eval_counts_1": 8625,
"eval_counts_2": 3250,
"eval_counts_3": 1609,
"eval_counts_4": 820,
"eval_exact_match": 0.0204,
"eval_f1": 0.3803,
"eval_gen_len": 14.069,
"eval_loss": 1.7026218175888062,
"eval_precisions_1": 47.0463,
"eval_precisions_2": 20.15,
"eval_precisions_3": 11.5548,
"eval_precisions_4": 6.996,
"eval_ref_len": 21250,
"eval_rouge1": 0.3874,
"eval_rouge2": 0.1964,
"eval_rougeL": 0.3716,
"eval_rougeLsum": 0.3715,
"eval_runtime": 462.8982,
"eval_samples_per_second": 4.761,
"eval_steps_per_second": 1.19,
"eval_sys_len": 18333,
"eval_totals_1": 18333,
"eval_totals_2": 16129,
"eval_totals_3": 13925,
"eval_totals_4": 11721,
"step": 2900
},
{
"epoch": 19.91,
"step": 2900,
"total_flos": 2.54036307345408e+17,
"train_loss": 1.724,
"train_runtime": 25476.0,
"train_samples_per_second": 7.312,
"train_steps_per_second": 0.114
}
],
"logging_steps": 500,
"max_steps": 2900,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 2.54036307345408e+17,
"trial_name": null,
"trial_params": null
}