|
{ |
|
"best_metric": 80.30581039755351, |
|
"best_model_checkpoint": "outputs/bitfit/t5-base/superglue-boolq/checkpoint-2600", |
|
"epoch": 20.0, |
|
"global_step": 5900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 74.6788990825688, |
|
"eval_average_metrics": 74.6788990825688, |
|
"eval_loss": 0.21743574738502502, |
|
"eval_runtime": 16.9831, |
|
"eval_samples_per_second": 96.272, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 75.71865443425077, |
|
"eval_average_metrics": 75.71865443425077, |
|
"eval_loss": 0.21506452560424805, |
|
"eval_runtime": 14.6109, |
|
"eval_samples_per_second": 111.903, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00027457627118644066, |
|
"loss": 0.374, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 77.18654434250764, |
|
"eval_average_metrics": 77.18654434250764, |
|
"eval_loss": 0.20255930721759796, |
|
"eval_runtime": 17.1645, |
|
"eval_samples_per_second": 95.254, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_accuracy": 76.5137614678899, |
|
"eval_average_metrics": 76.5137614678899, |
|
"eval_loss": 0.220754012465477, |
|
"eval_runtime": 17.1929, |
|
"eval_samples_per_second": 95.097, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00024915254237288135, |
|
"loss": 0.2114, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_accuracy": 78.1651376146789, |
|
"eval_average_metrics": 78.1651376146789, |
|
"eval_loss": 0.19822736084461212, |
|
"eval_runtime": 14.5865, |
|
"eval_samples_per_second": 112.09, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"eval_accuracy": 78.71559633027523, |
|
"eval_average_metrics": 78.71559633027523, |
|
"eval_loss": 0.19769148528575897, |
|
"eval_runtime": 17.1866, |
|
"eval_samples_per_second": 95.132, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_accuracy": 78.2262996941896, |
|
"eval_average_metrics": 78.2262996941896, |
|
"eval_loss": 0.1974276602268219, |
|
"eval_runtime": 17.1722, |
|
"eval_samples_per_second": 95.212, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.000223728813559322, |
|
"loss": 0.1997, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_accuracy": 78.71559633027523, |
|
"eval_average_metrics": 78.71559633027523, |
|
"eval_loss": 0.19954562187194824, |
|
"eval_runtime": 14.5679, |
|
"eval_samples_per_second": 112.233, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_accuracy": 78.2262996941896, |
|
"eval_average_metrics": 78.2262996941896, |
|
"eval_loss": 0.20299072563648224, |
|
"eval_runtime": 17.2624, |
|
"eval_samples_per_second": 94.714, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.0001983050847457627, |
|
"loss": 0.1934, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"eval_accuracy": 79.63302752293579, |
|
"eval_average_metrics": 79.63302752293579, |
|
"eval_loss": 0.19578830897808075, |
|
"eval_runtime": 17.2808, |
|
"eval_samples_per_second": 94.614, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"eval_accuracy": 79.26605504587157, |
|
"eval_average_metrics": 79.26605504587157, |
|
"eval_loss": 0.1937599629163742, |
|
"eval_runtime": 15.3794, |
|
"eval_samples_per_second": 106.311, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"eval_accuracy": 77.92048929663609, |
|
"eval_average_metrics": 77.92048929663609, |
|
"eval_loss": 0.2019716054201126, |
|
"eval_runtime": 17.5057, |
|
"eval_samples_per_second": 93.398, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.0001728813559322034, |
|
"loss": 0.1907, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"eval_accuracy": 80.30581039755351, |
|
"eval_average_metrics": 80.30581039755351, |
|
"eval_loss": 0.19154316186904907, |
|
"eval_runtime": 17.3782, |
|
"eval_samples_per_second": 94.083, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"eval_accuracy": 79.93883792048929, |
|
"eval_average_metrics": 79.93883792048929, |
|
"eval_loss": 0.19677455723285675, |
|
"eval_runtime": 15.9661, |
|
"eval_samples_per_second": 102.404, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.00014745762711864405, |
|
"loss": 0.183, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"eval_accuracy": 79.44954128440367, |
|
"eval_average_metrics": 79.44954128440367, |
|
"eval_loss": 0.19117017090320587, |
|
"eval_runtime": 17.2749, |
|
"eval_samples_per_second": 94.646, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"eval_accuracy": 78.10397553516819, |
|
"eval_average_metrics": 78.10397553516819, |
|
"eval_loss": 0.20411182940006256, |
|
"eval_runtime": 17.1181, |
|
"eval_samples_per_second": 95.513, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 11.53, |
|
"eval_accuracy": 79.02140672782875, |
|
"eval_average_metrics": 79.02140672782875, |
|
"eval_loss": 0.194900244474411, |
|
"eval_runtime": 17.1955, |
|
"eval_samples_per_second": 95.083, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.00012203389830508474, |
|
"loss": 0.181, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"eval_accuracy": 79.38837920489297, |
|
"eval_average_metrics": 79.38837920489297, |
|
"eval_loss": 0.20091596245765686, |
|
"eval_runtime": 16.9905, |
|
"eval_samples_per_second": 96.23, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 12.88, |
|
"eval_accuracy": 79.81651376146789, |
|
"eval_average_metrics": 79.81651376146789, |
|
"eval_loss": 0.18894420564174652, |
|
"eval_runtime": 17.2706, |
|
"eval_samples_per_second": 94.669, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 9.661016949152541e-05, |
|
"loss": 0.1786, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"eval_accuracy": 78.77675840978593, |
|
"eval_average_metrics": 78.77675840978593, |
|
"eval_loss": 0.20160046219825745, |
|
"eval_runtime": 17.0941, |
|
"eval_samples_per_second": 95.647, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"eval_accuracy": 79.57186544342507, |
|
"eval_average_metrics": 79.57186544342507, |
|
"eval_loss": 0.19864365458488464, |
|
"eval_runtime": 17.0536, |
|
"eval_samples_per_second": 95.874, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"eval_accuracy": 79.51070336391437, |
|
"eval_average_metrics": 79.51070336391437, |
|
"eval_loss": 0.19150203466415405, |
|
"eval_runtime": 17.2063, |
|
"eval_samples_per_second": 95.024, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 7.11864406779661e-05, |
|
"loss": 0.1769, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"eval_accuracy": 78.77675840978593, |
|
"eval_average_metrics": 78.77675840978593, |
|
"eval_loss": 0.19904659688472748, |
|
"eval_runtime": 17.1806, |
|
"eval_samples_per_second": 95.165, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"eval_accuracy": 79.20489296636084, |
|
"eval_average_metrics": 79.20489296636084, |
|
"eval_loss": 0.19741013646125793, |
|
"eval_runtime": 17.2538, |
|
"eval_samples_per_second": 94.762, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 4.576271186440678e-05, |
|
"loss": 0.1741, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"eval_accuracy": 79.51070336391437, |
|
"eval_average_metrics": 79.51070336391437, |
|
"eval_loss": 0.19429509341716766, |
|
"eval_runtime": 17.4899, |
|
"eval_samples_per_second": 93.483, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"eval_accuracy": 78.89908256880734, |
|
"eval_average_metrics": 78.89908256880734, |
|
"eval_loss": 0.20053960382938385, |
|
"eval_runtime": 17.2615, |
|
"eval_samples_per_second": 94.719, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 18.31, |
|
"eval_accuracy": 79.32721712538226, |
|
"eval_average_metrics": 79.32721712538226, |
|
"eval_loss": 0.1975349634885788, |
|
"eval_runtime": 17.2849, |
|
"eval_samples_per_second": 94.591, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 2.0338983050847455e-05, |
|
"loss": 0.1717, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_accuracy": 78.77675840978593, |
|
"eval_average_metrics": 78.77675840978593, |
|
"eval_loss": 0.20098499953746796, |
|
"eval_runtime": 17.3626, |
|
"eval_samples_per_second": 94.168, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 19.66, |
|
"eval_accuracy": 79.20489296636084, |
|
"eval_average_metrics": 79.20489296636084, |
|
"eval_loss": 0.19755637645721436, |
|
"eval_runtime": 17.3825, |
|
"eval_samples_per_second": 94.06, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 5900, |
|
"total_flos": 5.74047486286578e+16, |
|
"train_loss": 0.20100380073159427, |
|
"train_runtime": 3071.6757, |
|
"train_samples_per_second": 61.38, |
|
"train_steps_per_second": 1.921 |
|
} |
|
], |
|
"max_steps": 5900, |
|
"num_train_epochs": 20, |
|
"total_flos": 5.74047486286578e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|