arxiv-summarization-t5-small / trainer_state.json
farleyknight's picture
End of training
4b000ae
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 76095,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 4.9342926604901765e-05,
"loss": 2.907,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 4.868585320980354e-05,
"loss": 2.7863,
"step": 2000
},
{
"epoch": 0.12,
"learning_rate": 4.8028779814705304e-05,
"loss": 2.7315,
"step": 3000
},
{
"epoch": 0.16,
"learning_rate": 4.737170641960707e-05,
"loss": 2.7056,
"step": 4000
},
{
"epoch": 0.2,
"learning_rate": 4.671463302450884e-05,
"loss": 2.6726,
"step": 5000
},
{
"epoch": 0.24,
"learning_rate": 4.6057559629410605e-05,
"loss": 2.6651,
"step": 6000
},
{
"epoch": 0.28,
"learning_rate": 4.5400486234312375e-05,
"loss": 2.625,
"step": 7000
},
{
"epoch": 0.32,
"learning_rate": 4.4743412839214144e-05,
"loss": 2.6331,
"step": 8000
},
{
"epoch": 0.35,
"learning_rate": 4.408633944411591e-05,
"loss": 2.6084,
"step": 9000
},
{
"epoch": 0.39,
"learning_rate": 4.3429266049017676e-05,
"loss": 2.5925,
"step": 10000
},
{
"epoch": 0.39,
"eval_gen_len": 19.0,
"eval_loss": 2.45664119720459,
"eval_rouge1": 17.8432,
"eval_rouge2": 6.6779,
"eval_rougeL": 14.2303,
"eval_rougeLsum": 16.1952,
"eval_runtime": 446.6007,
"eval_samples_per_second": 14.411,
"eval_steps_per_second": 1.803,
"step": 10000
},
{
"epoch": 0.43,
"learning_rate": 4.2772192653919446e-05,
"loss": 2.5905,
"step": 11000
},
{
"epoch": 0.47,
"learning_rate": 4.2115119258821215e-05,
"loss": 2.5838,
"step": 12000
},
{
"epoch": 0.51,
"learning_rate": 4.145804586372298e-05,
"loss": 2.565,
"step": 13000
},
{
"epoch": 0.55,
"learning_rate": 4.080097246862475e-05,
"loss": 2.5687,
"step": 14000
},
{
"epoch": 0.59,
"learning_rate": 4.0143899073526516e-05,
"loss": 2.5542,
"step": 15000
},
{
"epoch": 0.63,
"learning_rate": 3.948682567842828e-05,
"loss": 2.5402,
"step": 16000
},
{
"epoch": 0.67,
"learning_rate": 3.882975228333005e-05,
"loss": 2.5368,
"step": 17000
},
{
"epoch": 0.71,
"learning_rate": 3.817267888823182e-05,
"loss": 2.5261,
"step": 18000
},
{
"epoch": 0.75,
"learning_rate": 3.751560549313358e-05,
"loss": 2.5257,
"step": 19000
},
{
"epoch": 0.79,
"learning_rate": 3.6858532098035357e-05,
"loss": 2.518,
"step": 20000
},
{
"epoch": 0.79,
"eval_gen_len": 19.0,
"eval_loss": 2.3868377208709717,
"eval_rouge1": 18.0354,
"eval_rouge2": 6.8565,
"eval_rougeL": 14.3552,
"eval_rougeLsum": 16.3664,
"eval_runtime": 450.4559,
"eval_samples_per_second": 14.288,
"eval_steps_per_second": 1.787,
"step": 20000
},
{
"epoch": 0.83,
"learning_rate": 3.620145870293712e-05,
"loss": 2.5159,
"step": 21000
},
{
"epoch": 0.87,
"learning_rate": 3.554438530783889e-05,
"loss": 2.4994,
"step": 22000
},
{
"epoch": 0.91,
"learning_rate": 3.488731191274066e-05,
"loss": 2.4993,
"step": 23000
},
{
"epoch": 0.95,
"learning_rate": 3.423023851764242e-05,
"loss": 2.5103,
"step": 24000
},
{
"epoch": 0.99,
"learning_rate": 3.357316512254419e-05,
"loss": 2.4925,
"step": 25000
},
{
"epoch": 1.03,
"learning_rate": 3.291609172744596e-05,
"loss": 2.4812,
"step": 26000
},
{
"epoch": 1.06,
"learning_rate": 3.225901833234772e-05,
"loss": 2.4806,
"step": 27000
},
{
"epoch": 1.1,
"learning_rate": 3.160194493724949e-05,
"loss": 2.4638,
"step": 28000
},
{
"epoch": 1.14,
"learning_rate": 3.094487154215126e-05,
"loss": 2.4676,
"step": 29000
},
{
"epoch": 1.18,
"learning_rate": 3.0287798147053027e-05,
"loss": 2.4587,
"step": 30000
},
{
"epoch": 1.18,
"eval_gen_len": 19.0,
"eval_loss": 2.3600151538848877,
"eval_rouge1": 18.2076,
"eval_rouge2": 6.9618,
"eval_rougeL": 14.5349,
"eval_rougeLsum": 16.5626,
"eval_runtime": 450.2674,
"eval_samples_per_second": 14.294,
"eval_steps_per_second": 1.788,
"step": 30000
},
{
"epoch": 1.22,
"learning_rate": 2.9630724751954796e-05,
"loss": 2.453,
"step": 31000
},
{
"epoch": 1.26,
"learning_rate": 2.8973651356856562e-05,
"loss": 2.452,
"step": 32000
},
{
"epoch": 1.3,
"learning_rate": 2.831657796175833e-05,
"loss": 2.4553,
"step": 33000
},
{
"epoch": 1.34,
"learning_rate": 2.7659504566660098e-05,
"loss": 2.4527,
"step": 34000
},
{
"epoch": 1.38,
"learning_rate": 2.7002431171561864e-05,
"loss": 2.4478,
"step": 35000
},
{
"epoch": 1.42,
"learning_rate": 2.634535777646363e-05,
"loss": 2.4563,
"step": 36000
},
{
"epoch": 1.46,
"learning_rate": 2.5688284381365403e-05,
"loss": 2.4402,
"step": 37000
},
{
"epoch": 1.5,
"learning_rate": 2.503121098626717e-05,
"loss": 2.447,
"step": 38000
},
{
"epoch": 1.54,
"learning_rate": 2.4374137591168935e-05,
"loss": 2.4436,
"step": 39000
},
{
"epoch": 1.58,
"learning_rate": 2.37170641960707e-05,
"loss": 2.4365,
"step": 40000
},
{
"epoch": 1.58,
"eval_gen_len": 19.0,
"eval_loss": 2.329491138458252,
"eval_rouge1": 18.3579,
"eval_rouge2": 7.0312,
"eval_rougeL": 14.6145,
"eval_rougeLsum": 16.6845,
"eval_runtime": 446.3631,
"eval_samples_per_second": 14.419,
"eval_steps_per_second": 1.803,
"step": 40000
},
{
"epoch": 1.62,
"learning_rate": 2.305999080097247e-05,
"loss": 2.4393,
"step": 41000
},
{
"epoch": 1.66,
"learning_rate": 2.240291740587424e-05,
"loss": 2.4266,
"step": 42000
},
{
"epoch": 1.7,
"learning_rate": 2.1745844010776002e-05,
"loss": 2.4337,
"step": 43000
},
{
"epoch": 1.73,
"learning_rate": 2.108877061567777e-05,
"loss": 2.4238,
"step": 44000
},
{
"epoch": 1.77,
"learning_rate": 2.043169722057954e-05,
"loss": 2.4304,
"step": 45000
},
{
"epoch": 1.81,
"learning_rate": 1.9774623825481307e-05,
"loss": 2.4267,
"step": 46000
},
{
"epoch": 1.85,
"learning_rate": 1.9117550430383073e-05,
"loss": 2.4263,
"step": 47000
},
{
"epoch": 1.89,
"learning_rate": 1.8460477035284842e-05,
"loss": 2.4328,
"step": 48000
},
{
"epoch": 1.93,
"learning_rate": 1.7803403640186608e-05,
"loss": 2.4215,
"step": 49000
},
{
"epoch": 1.97,
"learning_rate": 1.7146330245088378e-05,
"loss": 2.4306,
"step": 50000
},
{
"epoch": 1.97,
"eval_gen_len": 19.0,
"eval_loss": 2.3189666271209717,
"eval_rouge1": 18.4551,
"eval_rouge2": 7.0861,
"eval_rougeL": 14.6879,
"eval_rougeLsum": 16.7627,
"eval_runtime": 447.1376,
"eval_samples_per_second": 14.394,
"eval_steps_per_second": 1.8,
"step": 50000
},
{
"epoch": 2.01,
"learning_rate": 1.6489256849990144e-05,
"loss": 2.4224,
"step": 51000
},
{
"epoch": 2.05,
"learning_rate": 1.5832183454891913e-05,
"loss": 2.414,
"step": 52000
},
{
"epoch": 2.09,
"learning_rate": 1.5175110059793679e-05,
"loss": 2.4029,
"step": 53000
},
{
"epoch": 2.13,
"learning_rate": 1.4518036664695447e-05,
"loss": 2.4104,
"step": 54000
},
{
"epoch": 2.17,
"learning_rate": 1.3860963269597216e-05,
"loss": 2.4117,
"step": 55000
},
{
"epoch": 2.21,
"learning_rate": 1.320388987449898e-05,
"loss": 2.4096,
"step": 56000
},
{
"epoch": 2.25,
"learning_rate": 1.254681647940075e-05,
"loss": 2.4072,
"step": 57000
},
{
"epoch": 2.29,
"learning_rate": 1.1889743084302516e-05,
"loss": 2.4062,
"step": 58000
},
{
"epoch": 2.33,
"learning_rate": 1.1232669689204285e-05,
"loss": 2.4014,
"step": 59000
},
{
"epoch": 2.37,
"learning_rate": 1.0575596294106051e-05,
"loss": 2.4005,
"step": 60000
},
{
"epoch": 2.37,
"eval_gen_len": 19.0,
"eval_loss": 2.3056259155273438,
"eval_rouge1": 18.3521,
"eval_rouge2": 7.0496,
"eval_rougeL": 14.6413,
"eval_rougeLsum": 16.6832,
"eval_runtime": 444.8978,
"eval_samples_per_second": 14.466,
"eval_steps_per_second": 1.809,
"step": 60000
},
{
"epoch": 2.4,
"learning_rate": 9.918522899007819e-06,
"loss": 2.4054,
"step": 61000
},
{
"epoch": 2.44,
"learning_rate": 9.261449503909587e-06,
"loss": 2.4074,
"step": 62000
},
{
"epoch": 2.48,
"learning_rate": 8.604376108811355e-06,
"loss": 2.3965,
"step": 63000
},
{
"epoch": 2.52,
"learning_rate": 7.947302713713122e-06,
"loss": 2.3964,
"step": 64000
},
{
"epoch": 2.56,
"learning_rate": 7.29022931861489e-06,
"loss": 2.3965,
"step": 65000
},
{
"epoch": 2.6,
"learning_rate": 6.633155923516657e-06,
"loss": 2.3934,
"step": 66000
},
{
"epoch": 2.64,
"learning_rate": 5.9760825284184245e-06,
"loss": 2.4052,
"step": 67000
},
{
"epoch": 2.68,
"learning_rate": 5.319009133320192e-06,
"loss": 2.397,
"step": 68000
},
{
"epoch": 2.72,
"learning_rate": 4.66193573822196e-06,
"loss": 2.3875,
"step": 69000
},
{
"epoch": 2.76,
"learning_rate": 4.004862343123727e-06,
"loss": 2.396,
"step": 70000
},
{
"epoch": 2.76,
"eval_gen_len": 19.0,
"eval_loss": 2.3012354373931885,
"eval_rouge1": 18.348,
"eval_rouge2": 7.0439,
"eval_rougeL": 14.6509,
"eval_rougeLsum": 16.6994,
"eval_runtime": 450.1278,
"eval_samples_per_second": 14.298,
"eval_steps_per_second": 1.788,
"step": 70000
},
{
"epoch": 2.8,
"learning_rate": 3.3477889480254945e-06,
"loss": 2.3949,
"step": 71000
},
{
"epoch": 2.84,
"learning_rate": 2.6907155529272622e-06,
"loss": 2.4087,
"step": 72000
},
{
"epoch": 2.88,
"learning_rate": 2.0336421578290295e-06,
"loss": 2.3957,
"step": 73000
},
{
"epoch": 2.92,
"learning_rate": 1.376568762730797e-06,
"loss": 2.3948,
"step": 74000
},
{
"epoch": 2.96,
"learning_rate": 7.194953676325646e-07,
"loss": 2.3925,
"step": 75000
},
{
"epoch": 3.0,
"learning_rate": 6.242197253433209e-08,
"loss": 2.3883,
"step": 76000
},
{
"epoch": 3.0,
"step": 76095,
"total_flos": 1.6477647782333645e+17,
"train_loss": 2.480459571265385,
"train_runtime": 37543.9323,
"train_samples_per_second": 16.214,
"train_steps_per_second": 2.027
}
],
"max_steps": 76095,
"num_train_epochs": 3,
"total_flos": 1.6477647782333645e+17,
"trial_name": null,
"trial_params": null
}