german-jeopardy-mt5-large / trainer_state.json
Marvin
Initial commit
8f02e9b unverified
raw
history blame contribute delete
No virus
20.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.934292462958986,
"eval_steps": 500,
"global_step": 2900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 0.0001,
"loss": 2.732,
"step": 145
},
{
"epoch": 1.0,
"eval_bleu": 12.4473,
"eval_bp": 0.7805,
"eval_counts_1": 7779,
"eval_counts_2": 2893,
"eval_counts_3": 1393,
"eval_counts_4": 685,
"eval_exact_match": 0.0168,
"eval_f1": 0.3393,
"eval_gen_len": 12.2523,
"eval_loss": 1.2989141941070557,
"eval_precisions_1": 45.6809,
"eval_precisions_2": 19.5143,
"eval_precisions_3": 11.0372,
"eval_precisions_4": 6.5758,
"eval_ref_len": 21250,
"eval_rouge1": 0.3487,
"eval_rouge2": 0.1796,
"eval_rougeL": 0.3329,
"eval_rougeLsum": 0.3327,
"eval_runtime": 2048.1193,
"eval_samples_per_second": 1.076,
"eval_steps_per_second": 1.076,
"eval_sys_len": 17029,
"eval_totals_1": 17029,
"eval_totals_2": 14825,
"eval_totals_3": 12621,
"eval_totals_4": 10417,
"step": 145
},
{
"epoch": 2.0,
"learning_rate": 0.0001,
"loss": 1.5514,
"step": 291
},
{
"epoch": 2.0,
"eval_bleu": 14.7663,
"eval_bp": 0.7871,
"eval_counts_1": 8297,
"eval_counts_2": 3336,
"eval_counts_3": 1711,
"eval_counts_4": 899,
"eval_exact_match": 0.025,
"eval_f1": 0.3743,
"eval_gen_len": 12.441,
"eval_loss": 1.20997154712677,
"eval_precisions_1": 48.3931,
"eval_precisions_2": 22.3278,
"eval_precisions_3": 13.4333,
"eval_precisions_4": 8.5351,
"eval_ref_len": 21250,
"eval_rouge1": 0.3839,
"eval_rouge2": 0.2089,
"eval_rougeL": 0.3688,
"eval_rougeLsum": 0.369,
"eval_runtime": 2570.171,
"eval_samples_per_second": 0.858,
"eval_steps_per_second": 0.858,
"eval_sys_len": 17145,
"eval_totals_1": 17145,
"eval_totals_2": 14941,
"eval_totals_3": 12737,
"eval_totals_4": 10533,
"step": 291
},
{
"epoch": 3.0,
"learning_rate": 0.0001,
"loss": 1.3546,
"step": 435
},
{
"epoch": 3.0,
"eval_bleu": 16.3903,
"eval_bp": 0.7798,
"eval_counts_1": 8930,
"eval_counts_2": 3713,
"eval_counts_3": 1905,
"eval_counts_4": 1022,
"eval_exact_match": 0.034,
"eval_f1": 0.4155,
"eval_gen_len": 12.6021,
"eval_loss": 1.142831563949585,
"eval_precisions_1": 52.4739,
"eval_precisions_2": 25.0641,
"eval_precisions_3": 15.1071,
"eval_precisions_4": 9.8213,
"eval_ref_len": 21250,
"eval_rouge1": 0.4225,
"eval_rouge2": 0.2345,
"eval_rougeL": 0.4075,
"eval_rougeLsum": 0.4074,
"eval_runtime": 2923.7087,
"eval_samples_per_second": 0.754,
"eval_steps_per_second": 0.754,
"eval_sys_len": 17018,
"eval_totals_1": 17018,
"eval_totals_2": 14814,
"eval_totals_3": 12610,
"eval_totals_4": 10406,
"step": 435
},
{
"epoch": 4.0,
"learning_rate": 0.0001,
"loss": 1.1969,
"step": 581
},
{
"epoch": 4.0,
"eval_bleu": 17.8161,
"eval_bp": 0.8441,
"eval_counts_1": 9456,
"eval_counts_2": 3994,
"eval_counts_3": 2096,
"eval_counts_4": 1157,
"eval_exact_match": 0.0386,
"eval_f1": 0.4334,
"eval_gen_len": 13.4061,
"eval_loss": 1.1113450527191162,
"eval_precisions_1": 52.039,
"eval_precisions_2": 25.0141,
"eval_precisions_3": 15.2292,
"eval_precisions_4": 10.0095,
"eval_ref_len": 21250,
"eval_rouge1": 0.4409,
"eval_rouge2": 0.246,
"eval_rougeL": 0.4251,
"eval_rougeLsum": 0.4251,
"eval_runtime": 2741.9646,
"eval_samples_per_second": 0.804,
"eval_steps_per_second": 0.804,
"eval_sys_len": 18171,
"eval_totals_1": 18171,
"eval_totals_2": 15967,
"eval_totals_3": 13763,
"eval_totals_4": 11559,
"step": 581
},
{
"epoch": 5.0,
"learning_rate": 0.0001,
"loss": 1.0876,
"step": 726
},
{
"epoch": 5.0,
"eval_bleu": 18.6911,
"eval_bp": 0.8446,
"eval_counts_1": 9606,
"eval_counts_2": 4162,
"eval_counts_3": 2233,
"eval_counts_4": 1243,
"eval_exact_match": 0.0377,
"eval_f1": 0.443,
"eval_gen_len": 13.5599,
"eval_loss": 1.1031831502914429,
"eval_precisions_1": 52.8412,
"eval_precisions_2": 26.0532,
"eval_precisions_3": 16.2152,
"eval_precisions_4": 10.7461,
"eval_ref_len": 21250,
"eval_rouge1": 0.4504,
"eval_rouge2": 0.2571,
"eval_rougeL": 0.4356,
"eval_rougeLsum": 0.4357,
"eval_runtime": 3812.6899,
"eval_samples_per_second": 0.578,
"eval_steps_per_second": 0.578,
"eval_sys_len": 18179,
"eval_totals_1": 18179,
"eval_totals_2": 15975,
"eval_totals_3": 13771,
"eval_totals_4": 11567,
"step": 726
},
{
"epoch": 6.0,
"learning_rate": 0.0001,
"loss": 0.9881,
"step": 872
},
{
"epoch": 6.0,
"eval_bleu": 18.7071,
"eval_bp": 0.8481,
"eval_counts_1": 9608,
"eval_counts_2": 4167,
"eval_counts_3": 2235,
"eval_counts_4": 1246,
"eval_exact_match": 0.044,
"eval_f1": 0.4429,
"eval_gen_len": 13.6978,
"eval_loss": 1.1118519306182861,
"eval_precisions_1": 52.661,
"eval_precisions_2": 25.9772,
"eval_precisions_3": 16.1523,
"eval_precisions_4": 10.7109,
"eval_ref_len": 21250,
"eval_rouge1": 0.4505,
"eval_rouge2": 0.2567,
"eval_rougeL": 0.4348,
"eval_rougeLsum": 0.4349,
"eval_runtime": 2020.0708,
"eval_samples_per_second": 1.091,
"eval_steps_per_second": 1.091,
"eval_sys_len": 18245,
"eval_totals_1": 18245,
"eval_totals_2": 16041,
"eval_totals_3": 13837,
"eval_totals_4": 11633,
"step": 872
},
{
"epoch": 7.0,
"learning_rate": 0.0001,
"loss": 0.9142,
"step": 1017
},
{
"epoch": 7.0,
"eval_bleu": 19.3053,
"eval_bp": 0.8506,
"eval_counts_1": 9757,
"eval_counts_2": 4285,
"eval_counts_3": 2311,
"eval_counts_4": 1310,
"eval_exact_match": 0.0495,
"eval_f1": 0.451,
"eval_gen_len": 13.5826,
"eval_loss": 1.1105936765670776,
"eval_precisions_1": 53.3432,
"eval_precisions_2": 26.6364,
"eval_precisions_3": 16.6463,
"eval_precisions_4": 11.2167,
"eval_ref_len": 21250,
"eval_rouge1": 0.4587,
"eval_rouge2": 0.2641,
"eval_rougeL": 0.4427,
"eval_rougeLsum": 0.443,
"eval_runtime": 1991.0459,
"eval_samples_per_second": 1.107,
"eval_steps_per_second": 1.107,
"eval_sys_len": 18291,
"eval_totals_1": 18291,
"eval_totals_2": 16087,
"eval_totals_3": 13883,
"eval_totals_4": 11679,
"step": 1017
},
{
"epoch": 8.0,
"learning_rate": 0.0001,
"loss": 0.8323,
"step": 1163
},
{
"epoch": 8.0,
"eval_bleu": 19.4102,
"eval_bp": 0.8507,
"eval_counts_1": 9757,
"eval_counts_2": 4300,
"eval_counts_3": 2341,
"eval_counts_4": 1317,
"eval_exact_match": 0.0472,
"eval_f1": 0.4513,
"eval_gen_len": 13.6239,
"eval_loss": 1.1327157020568848,
"eval_precisions_1": 53.3373,
"eval_precisions_2": 26.7263,
"eval_precisions_3": 16.8599,
"eval_precisions_4": 11.2747,
"eval_ref_len": 21250,
"eval_rouge1": 0.4587,
"eval_rouge2": 0.2662,
"eval_rougeL": 0.4429,
"eval_rougeLsum": 0.4426,
"eval_runtime": 1972.0648,
"eval_samples_per_second": 1.118,
"eval_steps_per_second": 1.118,
"eval_sys_len": 18293,
"eval_totals_1": 18293,
"eval_totals_2": 16089,
"eval_totals_3": 13885,
"eval_totals_4": 11681,
"step": 1163
},
{
"epoch": 9.0,
"learning_rate": 0.0001,
"loss": 0.7742,
"step": 1308
},
{
"epoch": 9.0,
"eval_bleu": 19.3574,
"eval_bp": 0.8497,
"eval_counts_1": 9757,
"eval_counts_2": 4273,
"eval_counts_3": 2324,
"eval_counts_4": 1320,
"eval_exact_match": 0.049,
"eval_f1": 0.451,
"eval_gen_len": 13.5944,
"eval_loss": 1.1574428081512451,
"eval_precisions_1": 53.3957,
"eval_precisions_2": 26.5916,
"eval_precisions_3": 16.7616,
"eval_precisions_4": 11.3198,
"eval_ref_len": 21250,
"eval_rouge1": 0.4585,
"eval_rouge2": 0.2653,
"eval_rougeL": 0.4431,
"eval_rougeLsum": 0.443,
"eval_runtime": 1991.8737,
"eval_samples_per_second": 1.106,
"eval_steps_per_second": 1.106,
"eval_sys_len": 18273,
"eval_totals_1": 18273,
"eval_totals_2": 16069,
"eval_totals_3": 13865,
"eval_totals_4": 11661,
"step": 1308
},
{
"epoch": 10.0,
"learning_rate": 0.0001,
"loss": 0.7101,
"step": 1454
},
{
"epoch": 10.0,
"eval_bleu": 20.1003,
"eval_bp": 0.8694,
"eval_counts_1": 9861,
"eval_counts_2": 4403,
"eval_counts_3": 2438,
"eval_counts_4": 1416,
"eval_exact_match": 0.0531,
"eval_f1": 0.4525,
"eval_gen_len": 13.9133,
"eval_loss": 1.167409896850586,
"eval_precisions_1": 52.8995,
"eval_precisions_2": 26.7871,
"eval_precisions_3": 17.1292,
"eval_precisions_4": 11.7716,
"eval_ref_len": 21250,
"eval_rouge1": 0.4594,
"eval_rouge2": 0.2689,
"eval_rougeL": 0.444,
"eval_rougeLsum": 0.4435,
"eval_runtime": 2025.3437,
"eval_samples_per_second": 1.088,
"eval_steps_per_second": 1.088,
"eval_sys_len": 18641,
"eval_totals_1": 18641,
"eval_totals_2": 16437,
"eval_totals_3": 14233,
"eval_totals_4": 12029,
"step": 1454
},
{
"epoch": 10.99,
"learning_rate": 0.0001,
"loss": 0.6642,
"step": 1599
},
{
"epoch": 10.99,
"eval_bleu": 19.655,
"eval_bp": 0.8558,
"eval_counts_1": 9868,
"eval_counts_2": 4380,
"eval_counts_3": 2358,
"eval_counts_4": 1337,
"eval_exact_match": 0.0476,
"eval_f1": 0.4551,
"eval_gen_len": 13.9142,
"eval_loss": 1.1888612508773804,
"eval_precisions_1": 53.6713,
"eval_precisions_2": 27.0671,
"eval_precisions_3": 16.8694,
"eval_precisions_4": 11.3555,
"eval_ref_len": 21250,
"eval_rouge1": 0.4622,
"eval_rouge2": 0.2694,
"eval_rougeL": 0.4469,
"eval_rougeLsum": 0.4466,
"eval_runtime": 2020.9205,
"eval_samples_per_second": 1.091,
"eval_steps_per_second": 1.091,
"eval_sys_len": 18386,
"eval_totals_1": 18386,
"eval_totals_2": 16182,
"eval_totals_3": 13978,
"eval_totals_4": 11774,
"step": 1599
},
{
"epoch": 12.0,
"learning_rate": 0.0001,
"loss": 0.6067,
"step": 1745
},
{
"epoch": 12.0,
"eval_bleu": 19.9169,
"eval_bp": 0.8828,
"eval_counts_1": 9872,
"eval_counts_2": 4384,
"eval_counts_3": 2408,
"eval_counts_4": 1395,
"eval_exact_match": 0.0472,
"eval_f1": 0.4489,
"eval_gen_len": 14.2482,
"eval_loss": 1.2207266092300415,
"eval_precisions_1": 52.2494,
"eval_precisions_2": 26.2672,
"eval_precisions_3": 16.6229,
"eval_precisions_4": 11.3581,
"eval_ref_len": 21250,
"eval_rouge1": 0.4569,
"eval_rouge2": 0.2667,
"eval_rougeL": 0.441,
"eval_rougeLsum": 0.4408,
"eval_runtime": 2047.2616,
"eval_samples_per_second": 1.077,
"eval_steps_per_second": 1.077,
"eval_sys_len": 18894,
"eval_totals_1": 18894,
"eval_totals_2": 16690,
"eval_totals_3": 14486,
"eval_totals_4": 12282,
"step": 1745
},
{
"epoch": 12.99,
"learning_rate": 0.0001,
"loss": 0.5684,
"step": 1890
},
{
"epoch": 12.99,
"eval_bleu": 19.5451,
"eval_bp": 0.8831,
"eval_counts_1": 9870,
"eval_counts_2": 4356,
"eval_counts_3": 2360,
"eval_counts_4": 1329,
"eval_exact_match": 0.0485,
"eval_f1": 0.4506,
"eval_gen_len": 14.2432,
"eval_loss": 1.2586854696273804,
"eval_precisions_1": 52.2195,
"eval_precisions_2": 26.0885,
"eval_precisions_3": 16.2837,
"eval_precisions_4": 10.8145,
"eval_ref_len": 21250,
"eval_rouge1": 0.4581,
"eval_rouge2": 0.2651,
"eval_rougeL": 0.4414,
"eval_rougeLsum": 0.4409,
"eval_runtime": 2126.3316,
"eval_samples_per_second": 1.037,
"eval_steps_per_second": 1.037,
"eval_sys_len": 18901,
"eval_totals_1": 18901,
"eval_totals_2": 16697,
"eval_totals_3": 14493,
"eval_totals_4": 12289,
"step": 1890
},
{
"epoch": 14.0,
"learning_rate": 0.0001,
"loss": 0.5288,
"step": 2036
},
{
"epoch": 14.0,
"eval_bleu": 19.6648,
"eval_bp": 0.8547,
"eval_counts_1": 9815,
"eval_counts_2": 4360,
"eval_counts_3": 2389,
"eval_counts_4": 1335,
"eval_exact_match": 0.0454,
"eval_f1": 0.4504,
"eval_gen_len": 13.7432,
"eval_loss": 1.2803738117218018,
"eval_precisions_1": 53.4382,
"eval_precisions_2": 26.9752,
"eval_precisions_3": 17.1144,
"eval_precisions_4": 11.3569,
"eval_ref_len": 21250,
"eval_rouge1": 0.4592,
"eval_rouge2": 0.2671,
"eval_rougeL": 0.4443,
"eval_rougeLsum": 0.4436,
"eval_runtime": 3989.4275,
"eval_samples_per_second": 0.552,
"eval_steps_per_second": 0.552,
"eval_sys_len": 18367,
"eval_totals_1": 18367,
"eval_totals_2": 16163,
"eval_totals_3": 13959,
"eval_totals_4": 11755,
"step": 2036
},
{
"epoch": 14.99,
"learning_rate": 0.0001,
"loss": 0.4902,
"step": 2181
},
{
"epoch": 14.99,
"eval_bleu": 19.8138,
"eval_bp": 0.8766,
"eval_counts_1": 9886,
"eval_counts_2": 4407,
"eval_counts_3": 2398,
"eval_counts_4": 1359,
"eval_exact_match": 0.0495,
"eval_f1": 0.451,
"eval_gen_len": 14.1225,
"eval_loss": 1.321104884147644,
"eval_precisions_1": 52.6495,
"eval_precisions_2": 26.5914,
"eval_precisions_3": 16.6887,
"eval_precisions_4": 11.1714,
"eval_ref_len": 21250,
"eval_rouge1": 0.4582,
"eval_rouge2": 0.2674,
"eval_rougeL": 0.4426,
"eval_rougeLsum": 0.4421,
"eval_runtime": 2190.6068,
"eval_samples_per_second": 1.006,
"eval_steps_per_second": 1.006,
"eval_sys_len": 18777,
"eval_totals_1": 18777,
"eval_totals_2": 16573,
"eval_totals_3": 14369,
"eval_totals_4": 12165,
"step": 2181
},
{
"epoch": 16.0,
"learning_rate": 0.0001,
"loss": 0.4498,
"step": 2327
},
{
"epoch": 16.0,
"eval_bleu": 20.0703,
"eval_bp": 0.909,
"eval_counts_1": 10008,
"eval_counts_2": 4477,
"eval_counts_3": 2456,
"eval_counts_4": 1381,
"eval_exact_match": 0.0476,
"eval_f1": 0.4491,
"eval_gen_len": 14.3725,
"eval_loss": 1.3620938062667847,
"eval_precisions_1": 51.5903,
"eval_precisions_2": 26.0366,
"eval_precisions_3": 16.3832,
"eval_precisions_4": 10.8,
"eval_ref_len": 21250,
"eval_rouge1": 0.4569,
"eval_rouge2": 0.2679,
"eval_rougeL": 0.4415,
"eval_rougeLsum": 0.4412,
"eval_runtime": 4080.8757,
"eval_samples_per_second": 0.54,
"eval_steps_per_second": 0.54,
"eval_sys_len": 19399,
"eval_totals_1": 19399,
"eval_totals_2": 17195,
"eval_totals_3": 14991,
"eval_totals_4": 12787,
"step": 2327
},
{
"epoch": 16.99,
"learning_rate": 0.0001,
"loss": 0.4216,
"step": 2472
},
{
"epoch": 16.99,
"eval_bleu": 20.1319,
"eval_bp": 0.8948,
"eval_counts_1": 10016,
"eval_counts_2": 4483,
"eval_counts_3": 2455,
"eval_counts_4": 1385,
"eval_exact_match": 0.0481,
"eval_f1": 0.4531,
"eval_gen_len": 14.3008,
"eval_loss": 1.3966974020004272,
"eval_precisions_1": 52.3712,
"eval_precisions_2": 26.4937,
"eval_precisions_3": 16.6814,
"eval_precisions_4": 11.0685,
"eval_ref_len": 21250,
"eval_rouge1": 0.4615,
"eval_rouge2": 0.2705,
"eval_rougeL": 0.4457,
"eval_rougeLsum": 0.4451,
"eval_runtime": 3311.0939,
"eval_samples_per_second": 0.666,
"eval_steps_per_second": 0.666,
"eval_sys_len": 19125,
"eval_totals_1": 19125,
"eval_totals_2": 16921,
"eval_totals_3": 14717,
"eval_totals_4": 12513,
"step": 2472
},
{
"epoch": 18.0,
"learning_rate": 0.0001,
"loss": 0.3829,
"step": 2618
},
{
"epoch": 18.0,
"eval_bleu": 19.8508,
"eval_bp": 0.9123,
"eval_counts_1": 9976,
"eval_counts_2": 4407,
"eval_counts_3": 2412,
"eval_counts_4": 1374,
"eval_exact_match": 0.0476,
"eval_f1": 0.4479,
"eval_gen_len": 14.7046,
"eval_loss": 1.4460452795028687,
"eval_precisions_1": 51.2536,
"eval_precisions_2": 25.533,
"eval_precisions_3": 16.0202,
"eval_precisions_4": 10.6909,
"eval_ref_len": 21250,
"eval_rouge1": 0.4556,
"eval_rouge2": 0.2627,
"eval_rougeL": 0.4387,
"eval_rougeLsum": 0.4385,
"eval_runtime": 3748.4463,
"eval_samples_per_second": 0.588,
"eval_steps_per_second": 0.588,
"eval_sys_len": 19464,
"eval_totals_1": 19464,
"eval_totals_2": 17260,
"eval_totals_3": 15056,
"eval_totals_4": 12852,
"step": 2618
},
{
"epoch": 19.0,
"learning_rate": 0.0001,
"loss": 0.3551,
"step": 2764
},
{
"epoch": 19.0,
"eval_bleu": 20.0572,
"eval_bp": 0.8952,
"eval_counts_1": 10010,
"eval_counts_2": 4451,
"eval_counts_3": 2438,
"eval_counts_4": 1385,
"eval_exact_match": 0.0463,
"eval_f1": 0.4523,
"eval_gen_len": 14.3807,
"eval_loss": 1.4725110530853271,
"eval_precisions_1": 52.3235,
"eval_precisions_2": 26.2953,
"eval_precisions_3": 16.5591,
"eval_precisions_4": 11.0632,
"eval_ref_len": 21250,
"eval_rouge1": 0.4606,
"eval_rouge2": 0.2672,
"eval_rougeL": 0.4438,
"eval_rougeLsum": 0.4434,
"eval_runtime": 2215.2029,
"eval_samples_per_second": 0.995,
"eval_steps_per_second": 0.995,
"eval_sys_len": 19131,
"eval_totals_1": 19131,
"eval_totals_2": 16927,
"eval_totals_3": 14723,
"eval_totals_4": 12519,
"step": 2764
},
{
"epoch": 19.93,
"learning_rate": 0.0001,
"loss": 0.3301,
"step": 2900
},
{
"epoch": 19.93,
"eval_bleu": 19.8047,
"eval_bp": 0.8816,
"eval_counts_1": 9858,
"eval_counts_2": 4378,
"eval_counts_3": 2406,
"eval_counts_4": 1368,
"eval_exact_match": 0.0495,
"eval_f1": 0.4483,
"eval_gen_len": 14.2795,
"eval_loss": 1.5030488967895508,
"eval_precisions_1": 52.2361,
"eval_precisions_2": 26.2659,
"eval_precisions_3": 16.6344,
"eval_precisions_4": 11.1582,
"eval_ref_len": 21250,
"eval_rouge1": 0.4569,
"eval_rouge2": 0.2644,
"eval_rougeL": 0.4412,
"eval_rougeLsum": 0.4405,
"eval_runtime": 2181.7432,
"eval_samples_per_second": 1.01,
"eval_steps_per_second": 1.01,
"eval_sys_len": 18872,
"eval_totals_1": 18872,
"eval_totals_2": 16668,
"eval_totals_3": 14464,
"eval_totals_4": 12260,
"step": 2900
},
{
"epoch": 19.93,
"step": 2900,
"total_flos": 1.1100924470624256e+18,
"train_loss": 0.6333936349276839,
"train_runtime": 110329.1002,
"train_samples_per_second": 1.688,
"train_steps_per_second": 0.026
}
],
"logging_steps": 500,
"max_steps": 2900,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 1.1100924470624256e+18,
"trial_name": null,
"trial_params": null
}