|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 0.7262819409370422, |
|
"eval_runtime": 2.9351, |
|
"eval_samples_per_second": 34.071, |
|
"eval_steps_per_second": 4.429, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.4, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.6114884614944458, |
|
"eval_runtime": 2.9878, |
|
"eval_samples_per_second": 33.47, |
|
"eval_steps_per_second": 4.351, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.6, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.5427446365356445, |
|
"eval_runtime": 3.0364, |
|
"eval_samples_per_second": 32.934, |
|
"eval_steps_per_second": 4.281, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.5318807363510132, |
|
"eval_runtime": 3.0643, |
|
"eval_samples_per_second": 32.634, |
|
"eval_steps_per_second": 4.242, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 0.5817937254905701, |
|
"eval_runtime": 3.0801, |
|
"eval_samples_per_second": 32.467, |
|
"eval_steps_per_second": 4.221, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.5093362331390381, |
|
"eval_runtime": 3.0872, |
|
"eval_samples_per_second": 32.391, |
|
"eval_steps_per_second": 4.211, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.68, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.7841365337371826, |
|
"eval_runtime": 3.0946, |
|
"eval_samples_per_second": 32.315, |
|
"eval_steps_per_second": 4.201, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.68, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.7628840804100037, |
|
"eval_runtime": 3.096, |
|
"eval_samples_per_second": 32.3, |
|
"eval_steps_per_second": 4.199, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.68, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.5873813629150391, |
|
"eval_runtime": 3.1038, |
|
"eval_samples_per_second": 32.218, |
|
"eval_steps_per_second": 4.188, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.69, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.5227637887001038, |
|
"eval_runtime": 3.1015, |
|
"eval_samples_per_second": 32.242, |
|
"eval_steps_per_second": 4.192, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 9, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.843931257724762, |
|
"eval_runtime": 3.1122, |
|
"eval_samples_per_second": 32.132, |
|
"eval_steps_per_second": 4.177, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.8242893218994141, |
|
"eval_runtime": 3.1122, |
|
"eval_samples_per_second": 32.131, |
|
"eval_steps_per_second": 4.177, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5669903755187988, |
|
"eval_runtime": 3.112, |
|
"eval_samples_per_second": 32.134, |
|
"eval_steps_per_second": 4.177, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.5600945353507996, |
|
"eval_runtime": 3.1143, |
|
"eval_samples_per_second": 32.11, |
|
"eval_steps_per_second": 4.174, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6452310085296631, |
|
"eval_runtime": 3.1101, |
|
"eval_samples_per_second": 32.153, |
|
"eval_steps_per_second": 4.18, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.523855984210968, |
|
"eval_runtime": 3.1099, |
|
"eval_samples_per_second": 32.155, |
|
"eval_steps_per_second": 4.18, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.7315166592597961, |
|
"eval_runtime": 3.1123, |
|
"eval_samples_per_second": 32.13, |
|
"eval_steps_per_second": 4.177, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.6651241779327393, |
|
"eval_runtime": 3.1136, |
|
"eval_samples_per_second": 32.117, |
|
"eval_steps_per_second": 4.175, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.9039706587791443, |
|
"eval_runtime": 3.1111, |
|
"eval_samples_per_second": 32.143, |
|
"eval_steps_per_second": 4.179, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.037500000000000006, |
|
"loss": 1.3727, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5786058306694031, |
|
"eval_runtime": 3.1123, |
|
"eval_samples_per_second": 32.131, |
|
"eval_steps_per_second": 4.177, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.7332864999771118, |
|
"eval_runtime": 3.1117, |
|
"eval_samples_per_second": 32.137, |
|
"eval_steps_per_second": 4.178, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.7584354877471924, |
|
"eval_runtime": 3.1101, |
|
"eval_samples_per_second": 32.154, |
|
"eval_steps_per_second": 4.18, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.9900915026664734, |
|
"eval_runtime": 3.1096, |
|
"eval_samples_per_second": 32.158, |
|
"eval_steps_per_second": 4.181, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.5710880160331726, |
|
"eval_runtime": 3.1075, |
|
"eval_samples_per_second": 32.18, |
|
"eval_steps_per_second": 4.183, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.5870327353477478, |
|
"eval_runtime": 3.112, |
|
"eval_samples_per_second": 32.134, |
|
"eval_steps_per_second": 4.177, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.5832374691963196, |
|
"eval_runtime": 3.1121, |
|
"eval_samples_per_second": 32.132, |
|
"eval_steps_per_second": 4.177, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.9776791930198669, |
|
"eval_runtime": 3.1127, |
|
"eval_samples_per_second": 32.126, |
|
"eval_steps_per_second": 4.176, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.6447842121124268, |
|
"eval_runtime": 3.1119, |
|
"eval_samples_per_second": 32.135, |
|
"eval_steps_per_second": 4.178, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.8738675117492676, |
|
"eval_runtime": 3.1117, |
|
"eval_samples_per_second": 32.136, |
|
"eval_steps_per_second": 4.178, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.6709777116775513, |
|
"eval_runtime": 3.112, |
|
"eval_samples_per_second": 32.134, |
|
"eval_steps_per_second": 4.177, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.5918809771537781, |
|
"eval_runtime": 3.11, |
|
"eval_samples_per_second": 32.154, |
|
"eval_steps_per_second": 4.18, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.7616005539894104, |
|
"eval_runtime": 3.1114, |
|
"eval_samples_per_second": 32.14, |
|
"eval_steps_per_second": 4.178, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.5836986303329468, |
|
"eval_runtime": 3.1103, |
|
"eval_samples_per_second": 32.151, |
|
"eval_steps_per_second": 4.18, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 1.0103265047073364, |
|
"eval_runtime": 3.1122, |
|
"eval_samples_per_second": 32.131, |
|
"eval_steps_per_second": 4.177, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.7007672786712646, |
|
"eval_runtime": 3.1105, |
|
"eval_samples_per_second": 32.149, |
|
"eval_steps_per_second": 4.179, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 1.016095519065857, |
|
"eval_runtime": 3.1114, |
|
"eval_samples_per_second": 32.14, |
|
"eval_steps_per_second": 4.178, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6910789012908936, |
|
"eval_runtime": 3.1125, |
|
"eval_samples_per_second": 32.128, |
|
"eval_steps_per_second": 4.177, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6450607776641846, |
|
"eval_runtime": 3.1144, |
|
"eval_samples_per_second": 32.109, |
|
"eval_steps_per_second": 4.174, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.7189579010009766, |
|
"eval_runtime": 3.1147, |
|
"eval_samples_per_second": 32.106, |
|
"eval_steps_per_second": 4.174, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.025, |
|
"loss": 0.7534, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.516425609588623, |
|
"eval_runtime": 3.1107, |
|
"eval_samples_per_second": 32.147, |
|
"eval_steps_per_second": 4.179, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.4994637668132782, |
|
"eval_runtime": 3.1112, |
|
"eval_samples_per_second": 32.142, |
|
"eval_steps_per_second": 4.178, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.584023654460907, |
|
"eval_runtime": 3.1124, |
|
"eval_samples_per_second": 32.129, |
|
"eval_steps_per_second": 4.177, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.7394511699676514, |
|
"eval_runtime": 3.1125, |
|
"eval_samples_per_second": 32.129, |
|
"eval_steps_per_second": 4.177, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.6374080181121826, |
|
"eval_runtime": 3.1131, |
|
"eval_samples_per_second": 32.122, |
|
"eval_steps_per_second": 4.176, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.7466642260551453, |
|
"eval_runtime": 3.1096, |
|
"eval_samples_per_second": 32.158, |
|
"eval_steps_per_second": 4.181, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.6875578165054321, |
|
"eval_runtime": 3.1103, |
|
"eval_samples_per_second": 32.151, |
|
"eval_steps_per_second": 4.18, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5958933234214783, |
|
"eval_runtime": 3.11, |
|
"eval_samples_per_second": 32.155, |
|
"eval_steps_per_second": 4.18, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5625195503234863, |
|
"eval_runtime": 3.1122, |
|
"eval_samples_per_second": 32.131, |
|
"eval_steps_per_second": 4.177, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6837201714515686, |
|
"eval_runtime": 3.1116, |
|
"eval_samples_per_second": 32.137, |
|
"eval_steps_per_second": 4.178, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 36, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.6766077280044556, |
|
"eval_runtime": 3.1132, |
|
"eval_samples_per_second": 32.122, |
|
"eval_steps_per_second": 4.176, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 49, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6265960335731506, |
|
"eval_runtime": 3.1128, |
|
"eval_samples_per_second": 32.125, |
|
"eval_steps_per_second": 4.176, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 49, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.6641672253608704, |
|
"eval_runtime": 3.1146, |
|
"eval_samples_per_second": 32.106, |
|
"eval_steps_per_second": 4.174, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 49, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.6202273368835449, |
|
"eval_runtime": 3.1106, |
|
"eval_samples_per_second": 32.148, |
|
"eval_steps_per_second": 4.179, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 49, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6397892236709595, |
|
"eval_runtime": 3.1097, |
|
"eval_samples_per_second": 32.157, |
|
"eval_steps_per_second": 4.18, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 49, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6689194440841675, |
|
"eval_runtime": 3.1153, |
|
"eval_samples_per_second": 32.1, |
|
"eval_steps_per_second": 4.173, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 49, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.6628773212432861, |
|
"eval_runtime": 3.1124, |
|
"eval_samples_per_second": 32.13, |
|
"eval_steps_per_second": 4.177, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 49, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5903107523918152, |
|
"eval_runtime": 3.1113, |
|
"eval_samples_per_second": 32.141, |
|
"eval_steps_per_second": 4.178, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 49, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.77, |
|
"eval_loss": 0.6132528185844421, |
|
"eval_runtime": 3.1127, |
|
"eval_samples_per_second": 32.127, |
|
"eval_steps_per_second": 4.177, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.6884872317314148, |
|
"eval_runtime": 3.1145, |
|
"eval_samples_per_second": 32.108, |
|
"eval_steps_per_second": 4.174, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.0125, |
|
"loss": 0.4477, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5950008630752563, |
|
"eval_runtime": 3.1114, |
|
"eval_samples_per_second": 32.14, |
|
"eval_steps_per_second": 4.178, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.5714966058731079, |
|
"eval_runtime": 3.1122, |
|
"eval_samples_per_second": 32.131, |
|
"eval_steps_per_second": 4.177, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.6111324429512024, |
|
"eval_runtime": 3.1175, |
|
"eval_samples_per_second": 32.077, |
|
"eval_steps_per_second": 4.17, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.6023278832435608, |
|
"eval_runtime": 3.113, |
|
"eval_samples_per_second": 32.124, |
|
"eval_steps_per_second": 4.176, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5792566537857056, |
|
"eval_runtime": 3.1121, |
|
"eval_samples_per_second": 32.133, |
|
"eval_steps_per_second": 4.177, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5727165341377258, |
|
"eval_runtime": 3.1107, |
|
"eval_samples_per_second": 32.147, |
|
"eval_steps_per_second": 4.179, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5606411099433899, |
|
"eval_runtime": 3.1095, |
|
"eval_samples_per_second": 32.159, |
|
"eval_steps_per_second": 4.181, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5970374345779419, |
|
"eval_runtime": 3.1149, |
|
"eval_samples_per_second": 32.103, |
|
"eval_steps_per_second": 4.173, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5601862072944641, |
|
"eval_runtime": 3.1113, |
|
"eval_samples_per_second": 32.14, |
|
"eval_steps_per_second": 4.178, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.578113317489624, |
|
"eval_runtime": 3.1132, |
|
"eval_samples_per_second": 32.121, |
|
"eval_steps_per_second": 4.176, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.6141662001609802, |
|
"eval_runtime": 3.1126, |
|
"eval_samples_per_second": 32.127, |
|
"eval_steps_per_second": 4.177, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5757777690887451, |
|
"eval_runtime": 3.1115, |
|
"eval_samples_per_second": 32.139, |
|
"eval_steps_per_second": 4.178, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.5649742484092712, |
|
"eval_runtime": 3.1127, |
|
"eval_samples_per_second": 32.127, |
|
"eval_steps_per_second": 4.176, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5823287963867188, |
|
"eval_runtime": 3.112, |
|
"eval_samples_per_second": 32.134, |
|
"eval_steps_per_second": 4.177, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5546900033950806, |
|
"eval_runtime": 3.111, |
|
"eval_samples_per_second": 32.144, |
|
"eval_steps_per_second": 4.179, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5636573433876038, |
|
"eval_runtime": 3.1109, |
|
"eval_samples_per_second": 32.145, |
|
"eval_steps_per_second": 4.179, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5805944204330444, |
|
"eval_runtime": 3.1125, |
|
"eval_samples_per_second": 32.129, |
|
"eval_steps_per_second": 4.177, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5601564645767212, |
|
"eval_runtime": 3.1215, |
|
"eval_samples_per_second": 32.036, |
|
"eval_steps_per_second": 4.165, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.570804238319397, |
|
"eval_runtime": 3.1168, |
|
"eval_samples_per_second": 32.084, |
|
"eval_steps_per_second": 4.171, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5623500347137451, |
|
"eval_runtime": 3.11, |
|
"eval_samples_per_second": 32.155, |
|
"eval_steps_per_second": 4.18, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.3287, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5641293525695801, |
|
"eval_runtime": 3.1089, |
|
"eval_samples_per_second": 32.166, |
|
"eval_steps_per_second": 4.182, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 57, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.7256226806640625, |
|
"train_runtime": 1655.9889, |
|
"train_samples_per_second": 19.324, |
|
"train_steps_per_second": 1.208 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|