DGSlow_T5-small / trainer_state.json
shenkha's picture
Upload 12 files
fe68760 verified
raw
history blame contribute delete
No virus
17.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 49.99777942264989,
"global_step": 16850,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_bleu": 0.8119,
"eval_gen_len": 15.295,
"eval_loss": 3.3328945636749268,
"eval_runtime": 64.1416,
"eval_samples_per_second": 88.102,
"eval_steps_per_second": 4.412,
"step": 337
},
{
"epoch": 1.48,
"learning_rate": 4.851632047477745e-05,
"loss": 3.528,
"step": 500
},
{
"epoch": 2.0,
"eval_bleu": 0.9197,
"eval_gen_len": 15.3056,
"eval_loss": 3.283737897872925,
"eval_runtime": 63.9504,
"eval_samples_per_second": 88.365,
"eval_steps_per_second": 4.425,
"step": 674
},
{
"epoch": 2.97,
"learning_rate": 4.70326409495549e-05,
"loss": 3.3932,
"step": 1000
},
{
"epoch": 3.0,
"eval_bleu": 0.9536,
"eval_gen_len": 15.6238,
"eval_loss": 3.2553622722625732,
"eval_runtime": 62.6609,
"eval_samples_per_second": 90.184,
"eval_steps_per_second": 4.516,
"step": 1011
},
{
"epoch": 4.0,
"eval_bleu": 1.0082,
"eval_gen_len": 15.5254,
"eval_loss": 3.236884117126465,
"eval_runtime": 62.9428,
"eval_samples_per_second": 89.78,
"eval_steps_per_second": 4.496,
"step": 1348
},
{
"epoch": 4.45,
"learning_rate": 4.554896142433235e-05,
"loss": 3.3394,
"step": 1500
},
{
"epoch": 5.0,
"eval_bleu": 1.088,
"eval_gen_len": 15.7466,
"eval_loss": 3.222892999649048,
"eval_runtime": 64.2741,
"eval_samples_per_second": 87.92,
"eval_steps_per_second": 4.403,
"step": 1685
},
{
"epoch": 5.93,
"learning_rate": 4.4065281899109794e-05,
"loss": 3.2982,
"step": 2000
},
{
"epoch": 6.0,
"eval_bleu": 1.1295,
"eval_gen_len": 15.7866,
"eval_loss": 3.210378885269165,
"eval_runtime": 64.1493,
"eval_samples_per_second": 88.091,
"eval_steps_per_second": 4.412,
"step": 2022
},
{
"epoch": 7.0,
"eval_bleu": 1.1648,
"eval_gen_len": 15.6852,
"eval_loss": 3.200908899307251,
"eval_runtime": 63.0315,
"eval_samples_per_second": 89.654,
"eval_steps_per_second": 4.49,
"step": 2359
},
{
"epoch": 7.42,
"learning_rate": 4.258160237388724e-05,
"loss": 3.2658,
"step": 2500
},
{
"epoch": 8.0,
"eval_bleu": 1.1548,
"eval_gen_len": 15.9358,
"eval_loss": 3.191558361053467,
"eval_runtime": 64.0034,
"eval_samples_per_second": 88.292,
"eval_steps_per_second": 4.422,
"step": 2696
},
{
"epoch": 8.9,
"learning_rate": 4.109792284866469e-05,
"loss": 3.2397,
"step": 3000
},
{
"epoch": 9.0,
"eval_bleu": 1.0651,
"eval_gen_len": 16.0241,
"eval_loss": 3.1847681999206543,
"eval_runtime": 64.3166,
"eval_samples_per_second": 87.862,
"eval_steps_per_second": 4.4,
"step": 3033
},
{
"epoch": 10.0,
"eval_bleu": 1.1443,
"eval_gen_len": 16.0409,
"eval_loss": 3.1797027587890625,
"eval_runtime": 62.6803,
"eval_samples_per_second": 90.156,
"eval_steps_per_second": 4.515,
"step": 3370
},
{
"epoch": 10.38,
"learning_rate": 3.961424332344214e-05,
"loss": 3.2203,
"step": 3500
},
{
"epoch": 11.0,
"eval_bleu": 1.1562,
"eval_gen_len": 15.9648,
"eval_loss": 3.1735124588012695,
"eval_runtime": 64.1808,
"eval_samples_per_second": 88.048,
"eval_steps_per_second": 4.409,
"step": 3707
},
{
"epoch": 11.87,
"learning_rate": 3.8130563798219586e-05,
"loss": 3.1935,
"step": 4000
},
{
"epoch": 12.0,
"eval_bleu": 1.145,
"eval_gen_len": 15.7811,
"eval_loss": 3.1689915657043457,
"eval_runtime": 62.6765,
"eval_samples_per_second": 90.161,
"eval_steps_per_second": 4.515,
"step": 4044
},
{
"epoch": 13.0,
"eval_bleu": 1.1534,
"eval_gen_len": 15.9582,
"eval_loss": 3.1667888164520264,
"eval_runtime": 64.2839,
"eval_samples_per_second": 87.907,
"eval_steps_per_second": 4.402,
"step": 4381
},
{
"epoch": 13.35,
"learning_rate": 3.664688427299703e-05,
"loss": 3.1791,
"step": 4500
},
{
"epoch": 14.0,
"eval_bleu": 1.1476,
"eval_gen_len": 15.9605,
"eval_loss": 3.1629080772399902,
"eval_runtime": 63.0085,
"eval_samples_per_second": 89.686,
"eval_steps_per_second": 4.491,
"step": 4718
},
{
"epoch": 14.83,
"learning_rate": 3.516320474777448e-05,
"loss": 3.1562,
"step": 5000
},
{
"epoch": 15.0,
"eval_bleu": 1.1504,
"eval_gen_len": 15.9747,
"eval_loss": 3.1592817306518555,
"eval_runtime": 63.8036,
"eval_samples_per_second": 88.569,
"eval_steps_per_second": 4.435,
"step": 5055
},
{
"epoch": 16.0,
"eval_bleu": 1.2003,
"eval_gen_len": 15.6548,
"eval_loss": 3.1569018363952637,
"eval_runtime": 63.7567,
"eval_samples_per_second": 88.634,
"eval_steps_per_second": 4.439,
"step": 5392
},
{
"epoch": 16.32,
"learning_rate": 3.3679525222551934e-05,
"loss": 3.1461,
"step": 5500
},
{
"epoch": 17.0,
"eval_bleu": 1.187,
"eval_gen_len": 16.2831,
"eval_loss": 3.1528096199035645,
"eval_runtime": 62.8317,
"eval_samples_per_second": 89.939,
"eval_steps_per_second": 4.504,
"step": 5729
},
{
"epoch": 17.8,
"learning_rate": 3.219584569732938e-05,
"loss": 3.1301,
"step": 6000
},
{
"epoch": 18.0,
"eval_bleu": 1.1908,
"eval_gen_len": 16.2078,
"eval_loss": 3.15116024017334,
"eval_runtime": 62.7949,
"eval_samples_per_second": 89.991,
"eval_steps_per_second": 4.507,
"step": 6066
},
{
"epoch": 19.0,
"eval_bleu": 1.2164,
"eval_gen_len": 16.1816,
"eval_loss": 3.149709463119507,
"eval_runtime": 63.3277,
"eval_samples_per_second": 89.234,
"eval_steps_per_second": 4.469,
"step": 6403
},
{
"epoch": 19.29,
"learning_rate": 3.071216617210683e-05,
"loss": 3.1199,
"step": 6500
},
{
"epoch": 20.0,
"eval_bleu": 1.2493,
"eval_gen_len": 16.0138,
"eval_loss": 3.147409200668335,
"eval_runtime": 62.5603,
"eval_samples_per_second": 90.329,
"eval_steps_per_second": 4.524,
"step": 6740
},
{
"epoch": 20.77,
"learning_rate": 2.9228486646884274e-05,
"loss": 3.1012,
"step": 7000
},
{
"epoch": 21.0,
"eval_bleu": 1.2189,
"eval_gen_len": 15.9667,
"eval_loss": 3.1471338272094727,
"eval_runtime": 62.6224,
"eval_samples_per_second": 90.239,
"eval_steps_per_second": 4.519,
"step": 7077
},
{
"epoch": 22.0,
"eval_bleu": 1.2535,
"eval_gen_len": 16.0239,
"eval_loss": 3.145897150039673,
"eval_runtime": 62.7278,
"eval_samples_per_second": 90.088,
"eval_steps_per_second": 4.512,
"step": 7414
},
{
"epoch": 22.25,
"learning_rate": 2.774480712166172e-05,
"loss": 3.0953,
"step": 7500
},
{
"epoch": 23.0,
"eval_bleu": 1.2554,
"eval_gen_len": 15.7225,
"eval_loss": 3.143968343734741,
"eval_runtime": 63.12,
"eval_samples_per_second": 89.528,
"eval_steps_per_second": 4.484,
"step": 7751
},
{
"epoch": 23.74,
"learning_rate": 2.6261127596439174e-05,
"loss": 3.0761,
"step": 8000
},
{
"epoch": 24.0,
"eval_bleu": 1.2086,
"eval_gen_len": 16.0069,
"eval_loss": 3.141495943069458,
"eval_runtime": 62.7045,
"eval_samples_per_second": 90.121,
"eval_steps_per_second": 4.513,
"step": 8088
},
{
"epoch": 25.0,
"eval_bleu": 1.305,
"eval_gen_len": 15.9912,
"eval_loss": 3.1416375637054443,
"eval_runtime": 62.5621,
"eval_samples_per_second": 90.326,
"eval_steps_per_second": 4.524,
"step": 8425
},
{
"epoch": 25.22,
"learning_rate": 2.4777448071216618e-05,
"loss": 3.0822,
"step": 8500
},
{
"epoch": 26.0,
"eval_bleu": 1.248,
"eval_gen_len": 15.8406,
"eval_loss": 3.1405673027038574,
"eval_runtime": 62.6069,
"eval_samples_per_second": 90.262,
"eval_steps_per_second": 4.52,
"step": 8762
},
{
"epoch": 26.7,
"learning_rate": 2.3293768545994066e-05,
"loss": 3.0614,
"step": 9000
},
{
"epoch": 27.0,
"eval_bleu": 1.2888,
"eval_gen_len": 16.3189,
"eval_loss": 3.140249252319336,
"eval_runtime": 62.6544,
"eval_samples_per_second": 90.193,
"eval_steps_per_second": 4.517,
"step": 9099
},
{
"epoch": 28.0,
"eval_bleu": 1.3016,
"eval_gen_len": 16.0096,
"eval_loss": 3.1379010677337646,
"eval_runtime": 62.5617,
"eval_samples_per_second": 90.327,
"eval_steps_per_second": 4.524,
"step": 9436
},
{
"epoch": 28.19,
"learning_rate": 2.1810089020771514e-05,
"loss": 3.0594,
"step": 9500
},
{
"epoch": 29.0,
"eval_bleu": 1.2268,
"eval_gen_len": 16.3702,
"eval_loss": 3.139193058013916,
"eval_runtime": 62.7397,
"eval_samples_per_second": 90.071,
"eval_steps_per_second": 4.511,
"step": 9773
},
{
"epoch": 29.67,
"learning_rate": 2.0326409495548962e-05,
"loss": 3.0453,
"step": 10000
},
{
"epoch": 30.0,
"eval_bleu": 1.278,
"eval_gen_len": 15.9722,
"eval_loss": 3.1378531455993652,
"eval_runtime": 62.634,
"eval_samples_per_second": 90.223,
"eval_steps_per_second": 4.518,
"step": 10110
},
{
"epoch": 31.0,
"eval_bleu": 1.2832,
"eval_gen_len": 15.9759,
"eval_loss": 3.136270046234131,
"eval_runtime": 62.6958,
"eval_samples_per_second": 90.134,
"eval_steps_per_second": 4.514,
"step": 10447
},
{
"epoch": 31.16,
"learning_rate": 1.884272997032641e-05,
"loss": 3.0425,
"step": 10500
},
{
"epoch": 32.0,
"eval_bleu": 1.3089,
"eval_gen_len": 16.1499,
"eval_loss": 3.136704206466675,
"eval_runtime": 62.6194,
"eval_samples_per_second": 90.244,
"eval_steps_per_second": 4.519,
"step": 10784
},
{
"epoch": 32.64,
"learning_rate": 1.7359050445103858e-05,
"loss": 3.0339,
"step": 11000
},
{
"epoch": 33.0,
"eval_bleu": 1.2718,
"eval_gen_len": 16.1051,
"eval_loss": 3.1367685794830322,
"eval_runtime": 62.5638,
"eval_samples_per_second": 90.324,
"eval_steps_per_second": 4.523,
"step": 11121
},
{
"epoch": 34.0,
"eval_bleu": 1.2683,
"eval_gen_len": 16.0159,
"eval_loss": 3.1349706649780273,
"eval_runtime": 62.675,
"eval_samples_per_second": 90.164,
"eval_steps_per_second": 4.515,
"step": 11458
},
{
"epoch": 34.12,
"learning_rate": 1.5875370919881306e-05,
"loss": 3.0301,
"step": 11500
},
{
"epoch": 35.0,
"eval_bleu": 1.286,
"eval_gen_len": 16.1807,
"eval_loss": 3.135624408721924,
"eval_runtime": 62.61,
"eval_samples_per_second": 90.257,
"eval_steps_per_second": 4.52,
"step": 11795
},
{
"epoch": 35.61,
"learning_rate": 1.4391691394658754e-05,
"loss": 3.0197,
"step": 12000
},
{
"epoch": 36.0,
"eval_bleu": 1.267,
"eval_gen_len": 16.0966,
"eval_loss": 3.1348326206207275,
"eval_runtime": 62.637,
"eval_samples_per_second": 90.218,
"eval_steps_per_second": 4.518,
"step": 12132
},
{
"epoch": 37.0,
"eval_bleu": 1.3013,
"eval_gen_len": 15.9283,
"eval_loss": 3.1351914405822754,
"eval_runtime": 62.7389,
"eval_samples_per_second": 90.072,
"eval_steps_per_second": 4.511,
"step": 12469
},
{
"epoch": 37.09,
"learning_rate": 1.29080118694362e-05,
"loss": 3.0204,
"step": 12500
},
{
"epoch": 38.0,
"eval_bleu": 1.282,
"eval_gen_len": 16.0099,
"eval_loss": 3.1345412731170654,
"eval_runtime": 62.6541,
"eval_samples_per_second": 90.194,
"eval_steps_per_second": 4.517,
"step": 12806
},
{
"epoch": 38.57,
"learning_rate": 1.142433234421365e-05,
"loss": 3.0169,
"step": 13000
},
{
"epoch": 39.0,
"eval_bleu": 1.3076,
"eval_gen_len": 16.1136,
"eval_loss": 3.1348047256469727,
"eval_runtime": 62.7366,
"eval_samples_per_second": 90.075,
"eval_steps_per_second": 4.511,
"step": 13143
},
{
"epoch": 40.0,
"eval_bleu": 1.2947,
"eval_gen_len": 16.0117,
"eval_loss": 3.135272979736328,
"eval_runtime": 62.894,
"eval_samples_per_second": 89.85,
"eval_steps_per_second": 4.5,
"step": 13480
},
{
"epoch": 40.06,
"learning_rate": 9.940652818991098e-06,
"loss": 3.0093,
"step": 13500
},
{
"epoch": 41.0,
"eval_bleu": 1.3047,
"eval_gen_len": 16.0662,
"eval_loss": 3.134445905685425,
"eval_runtime": 62.7291,
"eval_samples_per_second": 90.086,
"eval_steps_per_second": 4.511,
"step": 13817
},
{
"epoch": 41.54,
"learning_rate": 8.456973293768548e-06,
"loss": 3.0009,
"step": 14000
},
{
"epoch": 42.0,
"eval_bleu": 1.3017,
"eval_gen_len": 16.1531,
"eval_loss": 3.134453535079956,
"eval_runtime": 62.7311,
"eval_samples_per_second": 90.083,
"eval_steps_per_second": 4.511,
"step": 14154
},
{
"epoch": 43.0,
"eval_bleu": 1.2762,
"eval_gen_len": 16.0741,
"eval_loss": 3.1347758769989014,
"eval_runtime": 62.6827,
"eval_samples_per_second": 90.152,
"eval_steps_per_second": 4.515,
"step": 14491
},
{
"epoch": 43.03,
"learning_rate": 6.973293768545995e-06,
"loss": 3.0066,
"step": 14500
},
{
"epoch": 44.0,
"eval_bleu": 1.2743,
"eval_gen_len": 16.069,
"eval_loss": 3.1343321800231934,
"eval_runtime": 62.7369,
"eval_samples_per_second": 90.075,
"eval_steps_per_second": 4.511,
"step": 14828
},
{
"epoch": 44.51,
"learning_rate": 5.489614243323442e-06,
"loss": 3.0014,
"step": 15000
},
{
"epoch": 45.0,
"eval_bleu": 1.264,
"eval_gen_len": 16.0232,
"eval_loss": 3.1345059871673584,
"eval_runtime": 62.8103,
"eval_samples_per_second": 89.969,
"eval_steps_per_second": 4.506,
"step": 15165
},
{
"epoch": 45.99,
"learning_rate": 4.005934718100891e-06,
"loss": 2.9948,
"step": 15500
},
{
"epoch": 46.0,
"eval_bleu": 1.2856,
"eval_gen_len": 16.1014,
"eval_loss": 3.134138345718384,
"eval_runtime": 62.6814,
"eval_samples_per_second": 90.154,
"eval_steps_per_second": 4.515,
"step": 15502
},
{
"epoch": 47.0,
"eval_bleu": 1.2889,
"eval_gen_len": 16.0956,
"eval_loss": 3.1343159675598145,
"eval_runtime": 62.7663,
"eval_samples_per_second": 90.032,
"eval_steps_per_second": 4.509,
"step": 15839
},
{
"epoch": 47.48,
"learning_rate": 2.5222551928783385e-06,
"loss": 3.0016,
"step": 16000
},
{
"epoch": 48.0,
"eval_bleu": 1.2823,
"eval_gen_len": 16.0342,
"eval_loss": 3.13439679145813,
"eval_runtime": 62.7228,
"eval_samples_per_second": 90.095,
"eval_steps_per_second": 4.512,
"step": 16176
},
{
"epoch": 48.96,
"learning_rate": 1.0385756676557863e-06,
"loss": 2.992,
"step": 16500
},
{
"epoch": 49.0,
"eval_bleu": 1.2903,
"eval_gen_len": 16.0894,
"eval_loss": 3.134230136871338,
"eval_runtime": 62.7414,
"eval_samples_per_second": 90.068,
"eval_steps_per_second": 4.511,
"step": 16513
},
{
"epoch": 50.0,
"eval_bleu": 1.279,
"eval_gen_len": 16.0568,
"eval_loss": 3.1342360973358154,
"eval_runtime": 62.8254,
"eval_samples_per_second": 89.948,
"eval_steps_per_second": 4.505,
"step": 16850
},
{
"epoch": 50.0,
"step": 16850,
"total_flos": 8.49568285776937e+16,
"train_loss": 3.1156047950088093,
"train_runtime": 18325.9577,
"train_samples_per_second": 73.715,
"train_steps_per_second": 0.919
}
],
"max_steps": 16850,
"num_train_epochs": 50,
"total_flos": 8.49568285776937e+16,
"trial_name": null,
"trial_params": null
}