|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.6769059896469116, |
|
"eval_runtime": 2.9412, |
|
"eval_samples_per_second": 34.0, |
|
"eval_steps_per_second": 4.42, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.5348830223083496, |
|
"eval_runtime": 3.003, |
|
"eval_samples_per_second": 33.3, |
|
"eval_steps_per_second": 4.329, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.6615317463874817, |
|
"eval_runtime": 3.0496, |
|
"eval_samples_per_second": 32.791, |
|
"eval_steps_per_second": 4.263, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.61, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6595781445503235, |
|
"eval_runtime": 3.0782, |
|
"eval_samples_per_second": 32.487, |
|
"eval_steps_per_second": 4.223, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.5522617101669312, |
|
"eval_runtime": 3.0964, |
|
"eval_samples_per_second": 32.296, |
|
"eval_steps_per_second": 4.198, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.8447214365005493, |
|
"eval_runtime": 3.1114, |
|
"eval_samples_per_second": 32.14, |
|
"eval_steps_per_second": 4.178, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.7505703568458557, |
|
"eval_runtime": 3.125, |
|
"eval_samples_per_second": 32.0, |
|
"eval_steps_per_second": 4.16, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.8462668657302856, |
|
"eval_runtime": 3.1276, |
|
"eval_samples_per_second": 31.974, |
|
"eval_steps_per_second": 4.157, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.9063791036605835, |
|
"eval_runtime": 3.1352, |
|
"eval_samples_per_second": 31.896, |
|
"eval_steps_per_second": 4.146, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.553296685218811, |
|
"eval_runtime": 3.1339, |
|
"eval_samples_per_second": 31.909, |
|
"eval_steps_per_second": 4.148, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.41, |
|
"eval_loss": 0.5700768232345581, |
|
"eval_runtime": 3.1372, |
|
"eval_samples_per_second": 31.876, |
|
"eval_steps_per_second": 4.144, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.5593132972717285, |
|
"eval_runtime": 3.1365, |
|
"eval_samples_per_second": 31.883, |
|
"eval_steps_per_second": 4.145, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.52, |
|
"eval_loss": 0.559903621673584, |
|
"eval_runtime": 3.1404, |
|
"eval_samples_per_second": 31.843, |
|
"eval_steps_per_second": 4.14, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.37, |
|
"eval_loss": 0.5619108080863953, |
|
"eval_runtime": 3.1427, |
|
"eval_samples_per_second": 31.82, |
|
"eval_steps_per_second": 4.137, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.5590865612030029, |
|
"eval_runtime": 3.1432, |
|
"eval_samples_per_second": 31.815, |
|
"eval_steps_per_second": 4.136, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 0.5568958520889282, |
|
"eval_runtime": 3.1419, |
|
"eval_samples_per_second": 31.828, |
|
"eval_steps_per_second": 4.138, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.5511047840118408, |
|
"eval_runtime": 3.1417, |
|
"eval_samples_per_second": 31.83, |
|
"eval_steps_per_second": 4.138, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.52, |
|
"eval_loss": 0.5599001049995422, |
|
"eval_runtime": 3.1397, |
|
"eval_samples_per_second": 31.85, |
|
"eval_steps_per_second": 4.14, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.556063175201416, |
|
"eval_runtime": 3.1378, |
|
"eval_samples_per_second": 31.869, |
|
"eval_steps_per_second": 4.143, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.037500000000000006, |
|
"loss": 1.4827, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.5576848983764648, |
|
"eval_runtime": 3.1369, |
|
"eval_samples_per_second": 31.879, |
|
"eval_steps_per_second": 4.144, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.5536676049232483, |
|
"eval_runtime": 3.1341, |
|
"eval_samples_per_second": 31.907, |
|
"eval_steps_per_second": 4.148, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.43, |
|
"eval_loss": 0.5615798234939575, |
|
"eval_runtime": 3.1306, |
|
"eval_samples_per_second": 31.942, |
|
"eval_steps_per_second": 4.153, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.34, |
|
"eval_loss": 0.5607187747955322, |
|
"eval_runtime": 3.1284, |
|
"eval_samples_per_second": 31.965, |
|
"eval_steps_per_second": 4.155, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.39, |
|
"eval_loss": 0.561591625213623, |
|
"eval_runtime": 3.1313, |
|
"eval_samples_per_second": 31.936, |
|
"eval_steps_per_second": 4.152, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.559656023979187, |
|
"eval_runtime": 3.131, |
|
"eval_samples_per_second": 31.939, |
|
"eval_steps_per_second": 4.152, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.41, |
|
"eval_loss": 0.5622934103012085, |
|
"eval_runtime": 3.1312, |
|
"eval_samples_per_second": 31.936, |
|
"eval_steps_per_second": 4.152, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.43, |
|
"eval_loss": 0.5612391829490662, |
|
"eval_runtime": 3.1355, |
|
"eval_samples_per_second": 31.893, |
|
"eval_steps_per_second": 4.146, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.5573248863220215, |
|
"eval_runtime": 3.1325, |
|
"eval_samples_per_second": 31.924, |
|
"eval_steps_per_second": 4.15, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.42, |
|
"eval_loss": 0.563093900680542, |
|
"eval_runtime": 3.1349, |
|
"eval_samples_per_second": 31.899, |
|
"eval_steps_per_second": 4.147, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.5594209432601929, |
|
"eval_runtime": 3.1326, |
|
"eval_samples_per_second": 31.922, |
|
"eval_steps_per_second": 4.15, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.5593081116676331, |
|
"eval_runtime": 3.1339, |
|
"eval_samples_per_second": 31.909, |
|
"eval_steps_per_second": 4.148, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.43, |
|
"eval_loss": 0.5646340250968933, |
|
"eval_runtime": 3.139, |
|
"eval_samples_per_second": 31.857, |
|
"eval_steps_per_second": 4.141, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.44, |
|
"eval_loss": 0.5664234757423401, |
|
"eval_runtime": 3.1355, |
|
"eval_samples_per_second": 31.893, |
|
"eval_steps_per_second": 4.146, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.559657633304596, |
|
"eval_runtime": 3.1412, |
|
"eval_samples_per_second": 31.835, |
|
"eval_steps_per_second": 4.139, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.41, |
|
"eval_loss": 0.5628995299339294, |
|
"eval_runtime": 3.1345, |
|
"eval_samples_per_second": 31.903, |
|
"eval_steps_per_second": 4.147, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.43, |
|
"eval_loss": 0.5609996914863586, |
|
"eval_runtime": 3.1283, |
|
"eval_samples_per_second": 31.966, |
|
"eval_steps_per_second": 4.156, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.557191789150238, |
|
"eval_runtime": 3.1213, |
|
"eval_samples_per_second": 32.038, |
|
"eval_steps_per_second": 4.165, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.5591793656349182, |
|
"eval_runtime": 3.1252, |
|
"eval_samples_per_second": 31.998, |
|
"eval_steps_per_second": 4.16, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.5553145408630371, |
|
"eval_runtime": 3.119, |
|
"eval_samples_per_second": 32.061, |
|
"eval_steps_per_second": 4.168, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.025, |
|
"loss": 1.1505, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.5597114562988281, |
|
"eval_runtime": 3.1206, |
|
"eval_samples_per_second": 32.045, |
|
"eval_steps_per_second": 4.166, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.5569581389427185, |
|
"eval_runtime": 3.1177, |
|
"eval_samples_per_second": 32.075, |
|
"eval_steps_per_second": 4.17, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.5582298040390015, |
|
"eval_runtime": 3.1194, |
|
"eval_samples_per_second": 32.058, |
|
"eval_steps_per_second": 4.168, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.46, |
|
"eval_loss": 0.5601411461830139, |
|
"eval_runtime": 3.118, |
|
"eval_samples_per_second": 32.071, |
|
"eval_steps_per_second": 4.169, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 0.5598447918891907, |
|
"eval_runtime": 3.1172, |
|
"eval_samples_per_second": 32.08, |
|
"eval_steps_per_second": 4.17, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.5573525428771973, |
|
"eval_runtime": 3.1161, |
|
"eval_samples_per_second": 32.092, |
|
"eval_steps_per_second": 4.172, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.52, |
|
"eval_loss": 0.5591351985931396, |
|
"eval_runtime": 3.1166, |
|
"eval_samples_per_second": 32.086, |
|
"eval_steps_per_second": 4.171, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.5601411461830139, |
|
"eval_runtime": 3.1125, |
|
"eval_samples_per_second": 32.128, |
|
"eval_steps_per_second": 4.177, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.5592981576919556, |
|
"eval_runtime": 3.1137, |
|
"eval_samples_per_second": 32.116, |
|
"eval_steps_per_second": 4.175, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.48, |
|
"eval_loss": 0.559969425201416, |
|
"eval_runtime": 3.1115, |
|
"eval_samples_per_second": 32.139, |
|
"eval_steps_per_second": 4.178, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.39, |
|
"eval_loss": 0.5619819164276123, |
|
"eval_runtime": 3.1131, |
|
"eval_samples_per_second": 32.123, |
|
"eval_steps_per_second": 4.176, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.51, |
|
"eval_loss": 0.559766948223114, |
|
"eval_runtime": 3.1128, |
|
"eval_samples_per_second": 32.126, |
|
"eval_steps_per_second": 4.176, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.39, |
|
"eval_loss": 0.5615789890289307, |
|
"eval_runtime": 3.1133, |
|
"eval_samples_per_second": 32.12, |
|
"eval_steps_per_second": 4.176, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.43, |
|
"eval_loss": 0.5600858926773071, |
|
"eval_runtime": 3.1127, |
|
"eval_samples_per_second": 32.127, |
|
"eval_steps_per_second": 4.176, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 0.5617396831512451, |
|
"eval_runtime": 3.11, |
|
"eval_samples_per_second": 32.155, |
|
"eval_steps_per_second": 4.18, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.41, |
|
"eval_loss": 0.5618742108345032, |
|
"eval_runtime": 3.1122, |
|
"eval_samples_per_second": 32.132, |
|
"eval_steps_per_second": 4.177, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.39, |
|
"eval_loss": 0.5625372529029846, |
|
"eval_runtime": 3.1101, |
|
"eval_samples_per_second": 32.154, |
|
"eval_steps_per_second": 4.18, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.5591417551040649, |
|
"eval_runtime": 3.1109, |
|
"eval_samples_per_second": 32.145, |
|
"eval_steps_per_second": 4.179, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.5587807297706604, |
|
"eval_runtime": 3.1112, |
|
"eval_samples_per_second": 32.142, |
|
"eval_steps_per_second": 4.178, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.558001697063446, |
|
"eval_runtime": 3.1086, |
|
"eval_samples_per_second": 32.169, |
|
"eval_steps_per_second": 4.182, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.0125, |
|
"loss": 0.9071, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.5583775639533997, |
|
"eval_runtime": 3.1114, |
|
"eval_samples_per_second": 32.14, |
|
"eval_steps_per_second": 4.178, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.5589589476585388, |
|
"eval_runtime": 3.1104, |
|
"eval_samples_per_second": 32.15, |
|
"eval_steps_per_second": 4.179, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.5584514737129211, |
|
"eval_runtime": 3.1081, |
|
"eval_samples_per_second": 32.174, |
|
"eval_steps_per_second": 4.183, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.5586316585540771, |
|
"eval_runtime": 3.1099, |
|
"eval_samples_per_second": 32.156, |
|
"eval_steps_per_second": 4.18, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.5588961243629456, |
|
"eval_runtime": 3.1063, |
|
"eval_samples_per_second": 32.193, |
|
"eval_steps_per_second": 4.185, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.5587146282196045, |
|
"eval_runtime": 3.1074, |
|
"eval_samples_per_second": 32.181, |
|
"eval_steps_per_second": 4.184, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.5588311553001404, |
|
"eval_runtime": 3.1077, |
|
"eval_samples_per_second": 32.178, |
|
"eval_steps_per_second": 4.183, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.5592451095581055, |
|
"eval_runtime": 3.1096, |
|
"eval_samples_per_second": 32.159, |
|
"eval_steps_per_second": 4.181, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.5579473972320557, |
|
"eval_runtime": 3.1069, |
|
"eval_samples_per_second": 32.186, |
|
"eval_steps_per_second": 4.184, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.5586199760437012, |
|
"eval_runtime": 3.1094, |
|
"eval_samples_per_second": 32.161, |
|
"eval_steps_per_second": 4.181, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.5590428709983826, |
|
"eval_runtime": 3.1092, |
|
"eval_samples_per_second": 32.163, |
|
"eval_steps_per_second": 4.181, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.5589808821678162, |
|
"eval_runtime": 3.1069, |
|
"eval_samples_per_second": 32.186, |
|
"eval_steps_per_second": 4.184, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.5589616298675537, |
|
"eval_runtime": 3.1072, |
|
"eval_samples_per_second": 32.183, |
|
"eval_steps_per_second": 4.184, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.5590981841087341, |
|
"eval_runtime": 3.1117, |
|
"eval_samples_per_second": 32.136, |
|
"eval_steps_per_second": 4.178, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.5585782527923584, |
|
"eval_runtime": 3.1081, |
|
"eval_samples_per_second": 32.174, |
|
"eval_steps_per_second": 4.183, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.5589650869369507, |
|
"eval_runtime": 3.109, |
|
"eval_samples_per_second": 32.165, |
|
"eval_steps_per_second": 4.181, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.5591500401496887, |
|
"eval_runtime": 3.1193, |
|
"eval_samples_per_second": 32.058, |
|
"eval_steps_per_second": 4.168, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.53, |
|
"eval_loss": 0.558722734451294, |
|
"eval_runtime": 3.1185, |
|
"eval_samples_per_second": 32.066, |
|
"eval_steps_per_second": 4.169, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.5588061213493347, |
|
"eval_runtime": 3.1073, |
|
"eval_samples_per_second": 32.182, |
|
"eval_steps_per_second": 4.184, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.58, |
|
"eval_loss": 0.5589128732681274, |
|
"eval_runtime": 3.1054, |
|
"eval_samples_per_second": 32.202, |
|
"eval_steps_per_second": 4.186, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.7248, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 0.5588445663452148, |
|
"eval_runtime": 3.1071, |
|
"eval_samples_per_second": 32.184, |
|
"eval_steps_per_second": 4.184, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 1.066298309326172, |
|
"train_runtime": 1658.6269, |
|
"train_samples_per_second": 19.293, |
|
"train_steps_per_second": 1.206 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|