|
{ |
|
"best_metric": 57.44529093644265, |
|
"best_model_checkpoint": "outputs/bitfit/t5-base/cola/checkpoint-4000", |
|
"epoch": 20.0, |
|
"global_step": 5360, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.37, |
|
"eval_average_metrics": 27.46135364047117, |
|
"eval_loss": 0.3107774257659912, |
|
"eval_matthews_correlation": 27.46135364047117, |
|
"eval_runtime": 2.9522, |
|
"eval_samples_per_second": 176.481, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_average_metrics": 43.62200421342928, |
|
"eval_loss": 0.2646695375442505, |
|
"eval_matthews_correlation": 43.62200421342928, |
|
"eval_runtime": 3.3847, |
|
"eval_samples_per_second": 153.928, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_average_metrics": 51.31099678726934, |
|
"eval_loss": 0.2175331562757492, |
|
"eval_matthews_correlation": 51.31099678726934, |
|
"eval_runtime": 2.9782, |
|
"eval_samples_per_second": 174.94, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_average_metrics": 42.848826643479434, |
|
"eval_loss": 0.2941688299179077, |
|
"eval_matthews_correlation": 42.848826643479434, |
|
"eval_runtime": 2.9687, |
|
"eval_samples_per_second": 175.497, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0002720149253731343, |
|
"loss": 0.2517, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_average_metrics": 50.68713663164287, |
|
"eval_loss": 0.21350961923599243, |
|
"eval_matthews_correlation": 50.68713663164287, |
|
"eval_runtime": 3.4719, |
|
"eval_samples_per_second": 150.063, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_average_metrics": 54.06763660847515, |
|
"eval_loss": 0.2589772045612335, |
|
"eval_matthews_correlation": 54.06763660847515, |
|
"eval_runtime": 3.055, |
|
"eval_samples_per_second": 170.539, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_average_metrics": 54.418704464065094, |
|
"eval_loss": 0.2500777244567871, |
|
"eval_matthews_correlation": 54.418704464065094, |
|
"eval_runtime": 2.2921, |
|
"eval_samples_per_second": 227.305, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_average_metrics": 44.23948334299169, |
|
"eval_loss": 0.27688324451446533, |
|
"eval_matthews_correlation": 44.23948334299169, |
|
"eval_runtime": 3.2049, |
|
"eval_samples_per_second": 162.565, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"eval_average_metrics": 55.40444800370546, |
|
"eval_loss": 0.22049109637737274, |
|
"eval_matthews_correlation": 55.40444800370546, |
|
"eval_runtime": 2.8578, |
|
"eval_samples_per_second": 182.307, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.00024402985074626864, |
|
"loss": 0.1579, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_average_metrics": 55.43262482569132, |
|
"eval_loss": 0.21021293103694916, |
|
"eval_matthews_correlation": 55.43262482569132, |
|
"eval_runtime": 2.1787, |
|
"eval_samples_per_second": 239.129, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_average_metrics": 53.94791613066161, |
|
"eval_loss": 0.29473602771759033, |
|
"eval_matthews_correlation": 53.94791613066161, |
|
"eval_runtime": 2.097, |
|
"eval_samples_per_second": 248.449, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_average_metrics": 54.06763660847515, |
|
"eval_loss": 0.29593780636787415, |
|
"eval_matthews_correlation": 54.06763660847515, |
|
"eval_runtime": 3.848, |
|
"eval_samples_per_second": 135.396, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_average_metrics": 53.83714743780037, |
|
"eval_loss": 0.20928645133972168, |
|
"eval_matthews_correlation": 53.83714743780037, |
|
"eval_runtime": 3.5965, |
|
"eval_samples_per_second": 144.863, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_average_metrics": 56.53240387686201, |
|
"eval_loss": 0.20188479125499725, |
|
"eval_matthews_correlation": 56.53240387686201, |
|
"eval_runtime": 3.5287, |
|
"eval_samples_per_second": 147.648, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.00021604477611940296, |
|
"loss": 0.1323, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_average_metrics": 51.82186256208148, |
|
"eval_loss": 0.2522253096103668, |
|
"eval_matthews_correlation": 51.82186256208148, |
|
"eval_runtime": 3.8704, |
|
"eval_samples_per_second": 134.61, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_average_metrics": 50.583530922188714, |
|
"eval_loss": 0.2909224331378937, |
|
"eval_matthews_correlation": 50.583530922188714, |
|
"eval_runtime": 3.3986, |
|
"eval_samples_per_second": 153.3, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"eval_average_metrics": 54.97554166332294, |
|
"eval_loss": 0.2564501464366913, |
|
"eval_matthews_correlation": 54.97554166332294, |
|
"eval_runtime": 2.9165, |
|
"eval_samples_per_second": 178.641, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_average_metrics": 55.88010902837207, |
|
"eval_loss": 0.2708810269832611, |
|
"eval_matthews_correlation": 55.88010902837207, |
|
"eval_runtime": 3.5399, |
|
"eval_samples_per_second": 147.178, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"eval_average_metrics": 55.50804902976887, |
|
"eval_loss": 0.22388166189193726, |
|
"eval_matthews_correlation": 55.50804902976887, |
|
"eval_runtime": 3.2756, |
|
"eval_samples_per_second": 159.054, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.0001880597014925373, |
|
"loss": 0.1072, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"eval_average_metrics": 53.431088355542556, |
|
"eval_loss": 0.29746949672698975, |
|
"eval_matthews_correlation": 53.431088355542556, |
|
"eval_runtime": 3.5394, |
|
"eval_samples_per_second": 147.2, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"eval_average_metrics": 52.16870923231859, |
|
"eval_loss": 0.2395256757736206, |
|
"eval_matthews_correlation": 52.16870923231859, |
|
"eval_runtime": 3.8667, |
|
"eval_samples_per_second": 134.741, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"eval_average_metrics": 52.805058715954964, |
|
"eval_loss": 0.26214492321014404, |
|
"eval_matthews_correlation": 52.805058715954964, |
|
"eval_runtime": 3.163, |
|
"eval_samples_per_second": 164.718, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"eval_average_metrics": 54.89797128262298, |
|
"eval_loss": 0.27266305685043335, |
|
"eval_matthews_correlation": 54.89797128262298, |
|
"eval_runtime": 3.2482, |
|
"eval_samples_per_second": 160.398, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_average_metrics": 57.042628378400074, |
|
"eval_loss": 0.24082112312316895, |
|
"eval_matthews_correlation": 57.042628378400074, |
|
"eval_runtime": 3.4083, |
|
"eval_samples_per_second": 152.861, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 0.00016007462686567163, |
|
"loss": 0.0851, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_average_metrics": 55.361147823719584, |
|
"eval_loss": 0.33417803049087524, |
|
"eval_matthews_correlation": 55.361147823719584, |
|
"eval_runtime": 3.5689, |
|
"eval_samples_per_second": 145.983, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"eval_average_metrics": 55.361147823719584, |
|
"eval_loss": 0.28497520089149475, |
|
"eval_matthews_correlation": 55.361147823719584, |
|
"eval_runtime": 3.5734, |
|
"eval_samples_per_second": 145.799, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"eval_average_metrics": 57.398346484757035, |
|
"eval_loss": 0.30509620904922485, |
|
"eval_matthews_correlation": 57.398346484757035, |
|
"eval_runtime": 3.4023, |
|
"eval_samples_per_second": 153.13, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"eval_average_metrics": 52.283190960824186, |
|
"eval_loss": 0.36139407753944397, |
|
"eval_matthews_correlation": 52.283190960824186, |
|
"eval_runtime": 2.5435, |
|
"eval_samples_per_second": 204.835, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"eval_average_metrics": 53.35094771244464, |
|
"eval_loss": 0.295946329832077, |
|
"eval_matthews_correlation": 53.35094771244464, |
|
"eval_runtime": 3.7591, |
|
"eval_samples_per_second": 138.596, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"learning_rate": 0.00013208955223880596, |
|
"loss": 0.0698, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"eval_average_metrics": 51.86777415841536, |
|
"eval_loss": 0.3895832896232605, |
|
"eval_matthews_correlation": 51.86777415841536, |
|
"eval_runtime": 3.4344, |
|
"eval_samples_per_second": 151.698, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"eval_average_metrics": 55.87264481190009, |
|
"eval_loss": 0.29485803842544556, |
|
"eval_matthews_correlation": 55.87264481190009, |
|
"eval_runtime": 2.9553, |
|
"eval_samples_per_second": 176.296, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"eval_average_metrics": 54.38420414410802, |
|
"eval_loss": 0.3401205539703369, |
|
"eval_matthews_correlation": 54.38420414410802, |
|
"eval_runtime": 3.2955, |
|
"eval_samples_per_second": 158.095, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"eval_average_metrics": 55.92006900638419, |
|
"eval_loss": 0.339764803647995, |
|
"eval_matthews_correlation": 55.92006900638419, |
|
"eval_runtime": 3.7048, |
|
"eval_samples_per_second": 140.627, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"eval_average_metrics": 56.456262266607325, |
|
"eval_loss": 0.3011990487575531, |
|
"eval_matthews_correlation": 56.456262266607325, |
|
"eval_runtime": 3.3867, |
|
"eval_samples_per_second": 153.835, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 0.00010410447761194029, |
|
"loss": 0.0584, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"eval_average_metrics": 52.86688288819721, |
|
"eval_loss": 0.3278854191303253, |
|
"eval_matthews_correlation": 52.86688288819721, |
|
"eval_runtime": 3.5162, |
|
"eval_samples_per_second": 148.171, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"eval_average_metrics": 54.85489612485023, |
|
"eval_loss": 0.35325002670288086, |
|
"eval_matthews_correlation": 54.85489612485023, |
|
"eval_runtime": 2.3667, |
|
"eval_samples_per_second": 220.136, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"eval_average_metrics": 53.48615572958882, |
|
"eval_loss": 0.36301782727241516, |
|
"eval_matthews_correlation": 53.48615572958882, |
|
"eval_runtime": 2.5827, |
|
"eval_samples_per_second": 201.728, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"eval_average_metrics": 56.389461061844216, |
|
"eval_loss": 0.31869834661483765, |
|
"eval_matthews_correlation": 56.389461061844216, |
|
"eval_runtime": 3.4446, |
|
"eval_samples_per_second": 151.25, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"eval_average_metrics": 55.92006900638419, |
|
"eval_loss": 0.37227579951286316, |
|
"eval_matthews_correlation": 55.92006900638419, |
|
"eval_runtime": 3.5335, |
|
"eval_samples_per_second": 147.446, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 7.611940298507463e-05, |
|
"loss": 0.0487, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"eval_average_metrics": 57.44529093644265, |
|
"eval_loss": 0.3295034170150757, |
|
"eval_matthews_correlation": 57.44529093644265, |
|
"eval_runtime": 2.9916, |
|
"eval_samples_per_second": 174.155, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"eval_average_metrics": 54.8564185833095, |
|
"eval_loss": 0.3849255442619324, |
|
"eval_matthews_correlation": 54.8564185833095, |
|
"eval_runtime": 3.2493, |
|
"eval_samples_per_second": 160.342, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"eval_average_metrics": 52.77755663379653, |
|
"eval_loss": 0.37294790148735046, |
|
"eval_matthews_correlation": 52.77755663379653, |
|
"eval_runtime": 3.3801, |
|
"eval_samples_per_second": 154.137, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"eval_average_metrics": 52.32534052948778, |
|
"eval_loss": 0.36685994267463684, |
|
"eval_matthews_correlation": 52.32534052948778, |
|
"eval_runtime": 3.873, |
|
"eval_samples_per_second": 134.522, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 16.42, |
|
"eval_average_metrics": 54.437572540197074, |
|
"eval_loss": 0.36487194895744324, |
|
"eval_matthews_correlation": 54.437572540197074, |
|
"eval_runtime": 3.2085, |
|
"eval_samples_per_second": 162.381, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 4.813432835820895e-05, |
|
"loss": 0.0385, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"eval_average_metrics": 54.87289928629483, |
|
"eval_loss": 0.38992583751678467, |
|
"eval_matthews_correlation": 54.87289928629483, |
|
"eval_runtime": 3.4485, |
|
"eval_samples_per_second": 151.079, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"eval_average_metrics": 54.847754008390616, |
|
"eval_loss": 0.38383200764656067, |
|
"eval_matthews_correlation": 54.847754008390616, |
|
"eval_runtime": 3.3769, |
|
"eval_samples_per_second": 154.283, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"eval_average_metrics": 53.816197444380734, |
|
"eval_loss": 0.37992334365844727, |
|
"eval_matthews_correlation": 53.816197444380734, |
|
"eval_runtime": 3.4693, |
|
"eval_samples_per_second": 150.175, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"eval_average_metrics": 54.89061573818697, |
|
"eval_loss": 0.3707120716571808, |
|
"eval_matthews_correlation": 54.89061573818697, |
|
"eval_runtime": 3.351, |
|
"eval_samples_per_second": 155.474, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 18.28, |
|
"eval_average_metrics": 53.8685492100217, |
|
"eval_loss": 0.4207901954650879, |
|
"eval_matthews_correlation": 53.8685492100217, |
|
"eval_runtime": 3.7792, |
|
"eval_samples_per_second": 137.86, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 2.014925373134328e-05, |
|
"loss": 0.0327, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"eval_average_metrics": 56.40477323211171, |
|
"eval_loss": 0.4069698750972748, |
|
"eval_matthews_correlation": 56.40477323211171, |
|
"eval_runtime": 3.0613, |
|
"eval_samples_per_second": 170.192, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"eval_average_metrics": 55.87248612624084, |
|
"eval_loss": 0.38561907410621643, |
|
"eval_matthews_correlation": 55.87248612624084, |
|
"eval_runtime": 3.0206, |
|
"eval_samples_per_second": 172.481, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"eval_average_metrics": 55.88027222130694, |
|
"eval_loss": 0.3679342567920685, |
|
"eval_matthews_correlation": 55.88027222130694, |
|
"eval_runtime": 3.4222, |
|
"eval_samples_per_second": 152.242, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"eval_average_metrics": 56.389461061844216, |
|
"eval_loss": 0.38738054037094116, |
|
"eval_matthews_correlation": 56.389461061844216, |
|
"eval_runtime": 2.4581, |
|
"eval_samples_per_second": 211.949, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 5360, |
|
"total_flos": 5846454037239552.0, |
|
"train_loss": 0.09369400846424387, |
|
"train_runtime": 1943.2968, |
|
"train_samples_per_second": 88.005, |
|
"train_steps_per_second": 2.758 |
|
} |
|
], |
|
"max_steps": 5360, |
|
"num_train_epochs": 20, |
|
"total_flos": 5846454037239552.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|