cola / trainer_state.json
ShengdingHu's picture
Training in progress, step 100
9c9ea3a
raw history blame
No virus
16.1 kB
{
"best_metric": 57.44529093644265,
"best_model_checkpoint": "outputs/bitfit/t5-base/cola/checkpoint-4000",
"epoch": 20.0,
"global_step": 5360,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.37,
"eval_average_metrics": 27.46135364047117,
"eval_loss": 0.3107774257659912,
"eval_matthews_correlation": 27.46135364047117,
"eval_runtime": 2.9522,
"eval_samples_per_second": 176.481,
"step": 100
},
{
"epoch": 0.75,
"eval_average_metrics": 43.62200421342928,
"eval_loss": 0.2646695375442505,
"eval_matthews_correlation": 43.62200421342928,
"eval_runtime": 3.3847,
"eval_samples_per_second": 153.928,
"step": 200
},
{
"epoch": 1.12,
"eval_average_metrics": 51.31099678726934,
"eval_loss": 0.2175331562757492,
"eval_matthews_correlation": 51.31099678726934,
"eval_runtime": 2.9782,
"eval_samples_per_second": 174.94,
"step": 300
},
{
"epoch": 1.49,
"eval_average_metrics": 42.848826643479434,
"eval_loss": 0.2941688299179077,
"eval_matthews_correlation": 42.848826643479434,
"eval_runtime": 2.9687,
"eval_samples_per_second": 175.497,
"step": 400
},
{
"epoch": 1.87,
"learning_rate": 0.0002720149253731343,
"loss": 0.2517,
"step": 500
},
{
"epoch": 1.87,
"eval_average_metrics": 50.68713663164287,
"eval_loss": 0.21350961923599243,
"eval_matthews_correlation": 50.68713663164287,
"eval_runtime": 3.4719,
"eval_samples_per_second": 150.063,
"step": 500
},
{
"epoch": 2.24,
"eval_average_metrics": 54.06763660847515,
"eval_loss": 0.2589772045612335,
"eval_matthews_correlation": 54.06763660847515,
"eval_runtime": 3.055,
"eval_samples_per_second": 170.539,
"step": 600
},
{
"epoch": 2.61,
"eval_average_metrics": 54.418704464065094,
"eval_loss": 0.2500777244567871,
"eval_matthews_correlation": 54.418704464065094,
"eval_runtime": 2.2921,
"eval_samples_per_second": 227.305,
"step": 700
},
{
"epoch": 2.99,
"eval_average_metrics": 44.23948334299169,
"eval_loss": 0.27688324451446533,
"eval_matthews_correlation": 44.23948334299169,
"eval_runtime": 3.2049,
"eval_samples_per_second": 162.565,
"step": 800
},
{
"epoch": 3.36,
"eval_average_metrics": 55.40444800370546,
"eval_loss": 0.22049109637737274,
"eval_matthews_correlation": 55.40444800370546,
"eval_runtime": 2.8578,
"eval_samples_per_second": 182.307,
"step": 900
},
{
"epoch": 3.73,
"learning_rate": 0.00024402985074626864,
"loss": 0.1579,
"step": 1000
},
{
"epoch": 3.73,
"eval_average_metrics": 55.43262482569132,
"eval_loss": 0.21021293103694916,
"eval_matthews_correlation": 55.43262482569132,
"eval_runtime": 2.1787,
"eval_samples_per_second": 239.129,
"step": 1000
},
{
"epoch": 4.1,
"eval_average_metrics": 53.94791613066161,
"eval_loss": 0.29473602771759033,
"eval_matthews_correlation": 53.94791613066161,
"eval_runtime": 2.097,
"eval_samples_per_second": 248.449,
"step": 1100
},
{
"epoch": 4.48,
"eval_average_metrics": 54.06763660847515,
"eval_loss": 0.29593780636787415,
"eval_matthews_correlation": 54.06763660847515,
"eval_runtime": 3.848,
"eval_samples_per_second": 135.396,
"step": 1200
},
{
"epoch": 4.85,
"eval_average_metrics": 53.83714743780037,
"eval_loss": 0.20928645133972168,
"eval_matthews_correlation": 53.83714743780037,
"eval_runtime": 3.5965,
"eval_samples_per_second": 144.863,
"step": 1300
},
{
"epoch": 5.22,
"eval_average_metrics": 56.53240387686201,
"eval_loss": 0.20188479125499725,
"eval_matthews_correlation": 56.53240387686201,
"eval_runtime": 3.5287,
"eval_samples_per_second": 147.648,
"step": 1400
},
{
"epoch": 5.6,
"learning_rate": 0.00021604477611940296,
"loss": 0.1323,
"step": 1500
},
{
"epoch": 5.6,
"eval_average_metrics": 51.82186256208148,
"eval_loss": 0.2522253096103668,
"eval_matthews_correlation": 51.82186256208148,
"eval_runtime": 3.8704,
"eval_samples_per_second": 134.61,
"step": 1500
},
{
"epoch": 5.97,
"eval_average_metrics": 50.583530922188714,
"eval_loss": 0.2909224331378937,
"eval_matthews_correlation": 50.583530922188714,
"eval_runtime": 3.3986,
"eval_samples_per_second": 153.3,
"step": 1600
},
{
"epoch": 6.34,
"eval_average_metrics": 54.97554166332294,
"eval_loss": 0.2564501464366913,
"eval_matthews_correlation": 54.97554166332294,
"eval_runtime": 2.9165,
"eval_samples_per_second": 178.641,
"step": 1700
},
{
"epoch": 6.72,
"eval_average_metrics": 55.88010902837207,
"eval_loss": 0.2708810269832611,
"eval_matthews_correlation": 55.88010902837207,
"eval_runtime": 3.5399,
"eval_samples_per_second": 147.178,
"step": 1800
},
{
"epoch": 7.09,
"eval_average_metrics": 55.50804902976887,
"eval_loss": 0.22388166189193726,
"eval_matthews_correlation": 55.50804902976887,
"eval_runtime": 3.2756,
"eval_samples_per_second": 159.054,
"step": 1900
},
{
"epoch": 7.46,
"learning_rate": 0.0001880597014925373,
"loss": 0.1072,
"step": 2000
},
{
"epoch": 7.46,
"eval_average_metrics": 53.431088355542556,
"eval_loss": 0.29746949672698975,
"eval_matthews_correlation": 53.431088355542556,
"eval_runtime": 3.5394,
"eval_samples_per_second": 147.2,
"step": 2000
},
{
"epoch": 7.84,
"eval_average_metrics": 52.16870923231859,
"eval_loss": 0.2395256757736206,
"eval_matthews_correlation": 52.16870923231859,
"eval_runtime": 3.8667,
"eval_samples_per_second": 134.741,
"step": 2100
},
{
"epoch": 8.21,
"eval_average_metrics": 52.805058715954964,
"eval_loss": 0.26214492321014404,
"eval_matthews_correlation": 52.805058715954964,
"eval_runtime": 3.163,
"eval_samples_per_second": 164.718,
"step": 2200
},
{
"epoch": 8.58,
"eval_average_metrics": 54.89797128262298,
"eval_loss": 0.27266305685043335,
"eval_matthews_correlation": 54.89797128262298,
"eval_runtime": 3.2482,
"eval_samples_per_second": 160.398,
"step": 2300
},
{
"epoch": 8.96,
"eval_average_metrics": 57.042628378400074,
"eval_loss": 0.24082112312316895,
"eval_matthews_correlation": 57.042628378400074,
"eval_runtime": 3.4083,
"eval_samples_per_second": 152.861,
"step": 2400
},
{
"epoch": 9.33,
"learning_rate": 0.00016007462686567163,
"loss": 0.0851,
"step": 2500
},
{
"epoch": 9.33,
"eval_average_metrics": 55.361147823719584,
"eval_loss": 0.33417803049087524,
"eval_matthews_correlation": 55.361147823719584,
"eval_runtime": 3.5689,
"eval_samples_per_second": 145.983,
"step": 2500
},
{
"epoch": 9.7,
"eval_average_metrics": 55.361147823719584,
"eval_loss": 0.28497520089149475,
"eval_matthews_correlation": 55.361147823719584,
"eval_runtime": 3.5734,
"eval_samples_per_second": 145.799,
"step": 2600
},
{
"epoch": 10.07,
"eval_average_metrics": 57.398346484757035,
"eval_loss": 0.30509620904922485,
"eval_matthews_correlation": 57.398346484757035,
"eval_runtime": 3.4023,
"eval_samples_per_second": 153.13,
"step": 2700
},
{
"epoch": 10.45,
"eval_average_metrics": 52.283190960824186,
"eval_loss": 0.36139407753944397,
"eval_matthews_correlation": 52.283190960824186,
"eval_runtime": 2.5435,
"eval_samples_per_second": 204.835,
"step": 2800
},
{
"epoch": 10.82,
"eval_average_metrics": 53.35094771244464,
"eval_loss": 0.295946329832077,
"eval_matthews_correlation": 53.35094771244464,
"eval_runtime": 3.7591,
"eval_samples_per_second": 138.596,
"step": 2900
},
{
"epoch": 11.19,
"learning_rate": 0.00013208955223880596,
"loss": 0.0698,
"step": 3000
},
{
"epoch": 11.19,
"eval_average_metrics": 51.86777415841536,
"eval_loss": 0.3895832896232605,
"eval_matthews_correlation": 51.86777415841536,
"eval_runtime": 3.4344,
"eval_samples_per_second": 151.698,
"step": 3000
},
{
"epoch": 11.57,
"eval_average_metrics": 55.87264481190009,
"eval_loss": 0.29485803842544556,
"eval_matthews_correlation": 55.87264481190009,
"eval_runtime": 2.9553,
"eval_samples_per_second": 176.296,
"step": 3100
},
{
"epoch": 11.94,
"eval_average_metrics": 54.38420414410802,
"eval_loss": 0.3401205539703369,
"eval_matthews_correlation": 54.38420414410802,
"eval_runtime": 3.2955,
"eval_samples_per_second": 158.095,
"step": 3200
},
{
"epoch": 12.31,
"eval_average_metrics": 55.92006900638419,
"eval_loss": 0.339764803647995,
"eval_matthews_correlation": 55.92006900638419,
"eval_runtime": 3.7048,
"eval_samples_per_second": 140.627,
"step": 3300
},
{
"epoch": 12.69,
"eval_average_metrics": 56.456262266607325,
"eval_loss": 0.3011990487575531,
"eval_matthews_correlation": 56.456262266607325,
"eval_runtime": 3.3867,
"eval_samples_per_second": 153.835,
"step": 3400
},
{
"epoch": 13.06,
"learning_rate": 0.00010410447761194029,
"loss": 0.0584,
"step": 3500
},
{
"epoch": 13.06,
"eval_average_metrics": 52.86688288819721,
"eval_loss": 0.3278854191303253,
"eval_matthews_correlation": 52.86688288819721,
"eval_runtime": 3.5162,
"eval_samples_per_second": 148.171,
"step": 3500
},
{
"epoch": 13.43,
"eval_average_metrics": 54.85489612485023,
"eval_loss": 0.35325002670288086,
"eval_matthews_correlation": 54.85489612485023,
"eval_runtime": 2.3667,
"eval_samples_per_second": 220.136,
"step": 3600
},
{
"epoch": 13.81,
"eval_average_metrics": 53.48615572958882,
"eval_loss": 0.36301782727241516,
"eval_matthews_correlation": 53.48615572958882,
"eval_runtime": 2.5827,
"eval_samples_per_second": 201.728,
"step": 3700
},
{
"epoch": 14.18,
"eval_average_metrics": 56.389461061844216,
"eval_loss": 0.31869834661483765,
"eval_matthews_correlation": 56.389461061844216,
"eval_runtime": 3.4446,
"eval_samples_per_second": 151.25,
"step": 3800
},
{
"epoch": 14.55,
"eval_average_metrics": 55.92006900638419,
"eval_loss": 0.37227579951286316,
"eval_matthews_correlation": 55.92006900638419,
"eval_runtime": 3.5335,
"eval_samples_per_second": 147.446,
"step": 3900
},
{
"epoch": 14.93,
"learning_rate": 7.611940298507463e-05,
"loss": 0.0487,
"step": 4000
},
{
"epoch": 14.93,
"eval_average_metrics": 57.44529093644265,
"eval_loss": 0.3295034170150757,
"eval_matthews_correlation": 57.44529093644265,
"eval_runtime": 2.9916,
"eval_samples_per_second": 174.155,
"step": 4000
},
{
"epoch": 15.3,
"eval_average_metrics": 54.8564185833095,
"eval_loss": 0.3849255442619324,
"eval_matthews_correlation": 54.8564185833095,
"eval_runtime": 3.2493,
"eval_samples_per_second": 160.342,
"step": 4100
},
{
"epoch": 15.67,
"eval_average_metrics": 52.77755663379653,
"eval_loss": 0.37294790148735046,
"eval_matthews_correlation": 52.77755663379653,
"eval_runtime": 3.3801,
"eval_samples_per_second": 154.137,
"step": 4200
},
{
"epoch": 16.04,
"eval_average_metrics": 52.32534052948778,
"eval_loss": 0.36685994267463684,
"eval_matthews_correlation": 52.32534052948778,
"eval_runtime": 3.873,
"eval_samples_per_second": 134.522,
"step": 4300
},
{
"epoch": 16.42,
"eval_average_metrics": 54.437572540197074,
"eval_loss": 0.36487194895744324,
"eval_matthews_correlation": 54.437572540197074,
"eval_runtime": 3.2085,
"eval_samples_per_second": 162.381,
"step": 4400
},
{
"epoch": 16.79,
"learning_rate": 4.813432835820895e-05,
"loss": 0.0385,
"step": 4500
},
{
"epoch": 16.79,
"eval_average_metrics": 54.87289928629483,
"eval_loss": 0.38992583751678467,
"eval_matthews_correlation": 54.87289928629483,
"eval_runtime": 3.4485,
"eval_samples_per_second": 151.079,
"step": 4500
},
{
"epoch": 17.16,
"eval_average_metrics": 54.847754008390616,
"eval_loss": 0.38383200764656067,
"eval_matthews_correlation": 54.847754008390616,
"eval_runtime": 3.3769,
"eval_samples_per_second": 154.283,
"step": 4600
},
{
"epoch": 17.54,
"eval_average_metrics": 53.816197444380734,
"eval_loss": 0.37992334365844727,
"eval_matthews_correlation": 53.816197444380734,
"eval_runtime": 3.4693,
"eval_samples_per_second": 150.175,
"step": 4700
},
{
"epoch": 17.91,
"eval_average_metrics": 54.89061573818697,
"eval_loss": 0.3707120716571808,
"eval_matthews_correlation": 54.89061573818697,
"eval_runtime": 3.351,
"eval_samples_per_second": 155.474,
"step": 4800
},
{
"epoch": 18.28,
"eval_average_metrics": 53.8685492100217,
"eval_loss": 0.4207901954650879,
"eval_matthews_correlation": 53.8685492100217,
"eval_runtime": 3.7792,
"eval_samples_per_second": 137.86,
"step": 4900
},
{
"epoch": 18.66,
"learning_rate": 2.014925373134328e-05,
"loss": 0.0327,
"step": 5000
},
{
"epoch": 18.66,
"eval_average_metrics": 56.40477323211171,
"eval_loss": 0.4069698750972748,
"eval_matthews_correlation": 56.40477323211171,
"eval_runtime": 3.0613,
"eval_samples_per_second": 170.192,
"step": 5000
},
{
"epoch": 19.03,
"eval_average_metrics": 55.87248612624084,
"eval_loss": 0.38561907410621643,
"eval_matthews_correlation": 55.87248612624084,
"eval_runtime": 3.0206,
"eval_samples_per_second": 172.481,
"step": 5100
},
{
"epoch": 19.4,
"eval_average_metrics": 55.88027222130694,
"eval_loss": 0.3679342567920685,
"eval_matthews_correlation": 55.88027222130694,
"eval_runtime": 3.4222,
"eval_samples_per_second": 152.242,
"step": 5200
},
{
"epoch": 19.78,
"eval_average_metrics": 56.389461061844216,
"eval_loss": 0.38738054037094116,
"eval_matthews_correlation": 56.389461061844216,
"eval_runtime": 2.4581,
"eval_samples_per_second": 211.949,
"step": 5300
},
{
"epoch": 20.0,
"step": 5360,
"total_flos": 5846454037239552.0,
"train_loss": 0.09369400846424387,
"train_runtime": 1943.2968,
"train_samples_per_second": 88.005,
"train_steps_per_second": 2.758
}
],
"max_steps": 5360,
"num_train_epochs": 20,
"total_flos": 5846454037239552.0,
"trial_name": null,
"trial_params": null
}