|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4, |
|
"eval_loss": 0.6447609663009644, |
|
"eval_runtime": 2.9283, |
|
"eval_samples_per_second": 34.15, |
|
"eval_steps_per_second": 4.44, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.4, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.7949573993682861, |
|
"eval_runtime": 2.9849, |
|
"eval_samples_per_second": 33.502, |
|
"eval_steps_per_second": 4.355, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.54, |
|
"eval_loss": 0.6180824041366577, |
|
"eval_runtime": 3.0424, |
|
"eval_samples_per_second": 32.869, |
|
"eval_steps_per_second": 4.273, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.5600907206535339, |
|
"eval_runtime": 3.0801, |
|
"eval_samples_per_second": 32.467, |
|
"eval_steps_per_second": 4.221, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.42, |
|
"eval_loss": 0.5816247463226318, |
|
"eval_runtime": 3.1022, |
|
"eval_samples_per_second": 32.235, |
|
"eval_steps_per_second": 4.191, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.43, |
|
"eval_loss": 0.5956963300704956, |
|
"eval_runtime": 3.123, |
|
"eval_samples_per_second": 32.02, |
|
"eval_steps_per_second": 4.163, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.5331138968467712, |
|
"eval_runtime": 3.1336, |
|
"eval_samples_per_second": 31.912, |
|
"eval_steps_per_second": 4.149, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.550652027130127, |
|
"eval_runtime": 3.1424, |
|
"eval_samples_per_second": 31.822, |
|
"eval_steps_per_second": 4.137, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.5438417196273804, |
|
"eval_runtime": 3.1439, |
|
"eval_samples_per_second": 31.808, |
|
"eval_steps_per_second": 4.135, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5454716682434082, |
|
"eval_runtime": 3.1529, |
|
"eval_samples_per_second": 31.717, |
|
"eval_steps_per_second": 4.123, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5140843391418457, |
|
"eval_runtime": 3.1585, |
|
"eval_samples_per_second": 31.661, |
|
"eval_steps_per_second": 4.116, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.5018832683563232, |
|
"eval_runtime": 3.1501, |
|
"eval_samples_per_second": 31.745, |
|
"eval_steps_per_second": 4.127, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.6824415326118469, |
|
"eval_runtime": 3.1461, |
|
"eval_samples_per_second": 31.786, |
|
"eval_steps_per_second": 4.132, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5734787583351135, |
|
"eval_runtime": 3.1445, |
|
"eval_samples_per_second": 31.802, |
|
"eval_steps_per_second": 4.134, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.5578454732894897, |
|
"eval_runtime": 3.1437, |
|
"eval_samples_per_second": 31.809, |
|
"eval_steps_per_second": 4.135, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.560732901096344, |
|
"eval_runtime": 3.1447, |
|
"eval_samples_per_second": 31.799, |
|
"eval_steps_per_second": 4.134, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.5974356532096863, |
|
"eval_runtime": 3.1435, |
|
"eval_samples_per_second": 31.812, |
|
"eval_steps_per_second": 4.136, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.8101874589920044, |
|
"eval_runtime": 3.141, |
|
"eval_samples_per_second": 31.837, |
|
"eval_steps_per_second": 4.139, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.6756836175918579, |
|
"eval_runtime": 3.14, |
|
"eval_samples_per_second": 31.847, |
|
"eval_steps_per_second": 4.14, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.015, |
|
"loss": 0.7598, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5266308188438416, |
|
"eval_runtime": 3.1409, |
|
"eval_samples_per_second": 31.838, |
|
"eval_steps_per_second": 4.139, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.6270546913146973, |
|
"eval_runtime": 3.1418, |
|
"eval_samples_per_second": 31.829, |
|
"eval_steps_per_second": 4.138, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.6341220736503601, |
|
"eval_runtime": 3.1406, |
|
"eval_samples_per_second": 31.842, |
|
"eval_steps_per_second": 4.139, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.6874021887779236, |
|
"eval_runtime": 3.1434, |
|
"eval_samples_per_second": 31.812, |
|
"eval_steps_per_second": 4.136, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.5264467000961304, |
|
"eval_runtime": 3.1446, |
|
"eval_samples_per_second": 31.801, |
|
"eval_steps_per_second": 4.134, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5148030519485474, |
|
"eval_runtime": 3.1438, |
|
"eval_samples_per_second": 31.809, |
|
"eval_steps_per_second": 4.135, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 19, |
|
"best_eval_accuracy": 0.74, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.77, |
|
"eval_loss": 0.5760267972946167, |
|
"eval_runtime": 3.1417, |
|
"eval_samples_per_second": 31.83, |
|
"eval_steps_per_second": 4.138, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.6580688953399658, |
|
"eval_runtime": 3.1419, |
|
"eval_samples_per_second": 31.828, |
|
"eval_steps_per_second": 4.138, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.6479081511497498, |
|
"eval_runtime": 3.145, |
|
"eval_samples_per_second": 31.797, |
|
"eval_steps_per_second": 4.134, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.696037232875824, |
|
"eval_runtime": 3.1444, |
|
"eval_samples_per_second": 31.803, |
|
"eval_steps_per_second": 4.134, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.6919419765472412, |
|
"eval_runtime": 3.1427, |
|
"eval_samples_per_second": 31.82, |
|
"eval_steps_per_second": 4.137, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.642117977142334, |
|
"eval_runtime": 3.1402, |
|
"eval_samples_per_second": 31.845, |
|
"eval_steps_per_second": 4.14, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.5680819153785706, |
|
"eval_runtime": 3.1395, |
|
"eval_samples_per_second": 31.852, |
|
"eval_steps_per_second": 4.141, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.5631085634231567, |
|
"eval_runtime": 3.1383, |
|
"eval_samples_per_second": 31.864, |
|
"eval_steps_per_second": 4.142, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5676264762878418, |
|
"eval_runtime": 3.1421, |
|
"eval_samples_per_second": 31.826, |
|
"eval_steps_per_second": 4.137, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.5388873815536499, |
|
"eval_runtime": 3.1413, |
|
"eval_samples_per_second": 31.834, |
|
"eval_steps_per_second": 4.138, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.6266981363296509, |
|
"eval_runtime": 3.1411, |
|
"eval_samples_per_second": 31.836, |
|
"eval_steps_per_second": 4.139, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6107380390167236, |
|
"eval_runtime": 3.1415, |
|
"eval_samples_per_second": 31.832, |
|
"eval_steps_per_second": 4.138, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.53592449426651, |
|
"eval_runtime": 3.1409, |
|
"eval_samples_per_second": 31.838, |
|
"eval_steps_per_second": 4.139, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.5741092562675476, |
|
"eval_runtime": 3.1433, |
|
"eval_samples_per_second": 31.814, |
|
"eval_steps_per_second": 4.136, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.01, |
|
"loss": 0.4266, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.5928249955177307, |
|
"eval_runtime": 3.1459, |
|
"eval_samples_per_second": 31.788, |
|
"eval_steps_per_second": 4.132, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.5306583046913147, |
|
"eval_runtime": 3.1398, |
|
"eval_samples_per_second": 31.849, |
|
"eval_steps_per_second": 4.14, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5909176468849182, |
|
"eval_runtime": 3.1393, |
|
"eval_samples_per_second": 31.854, |
|
"eval_steps_per_second": 4.141, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5732528567314148, |
|
"eval_runtime": 3.1401, |
|
"eval_samples_per_second": 31.846, |
|
"eval_steps_per_second": 4.14, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5560975074768066, |
|
"eval_runtime": 3.1413, |
|
"eval_samples_per_second": 31.834, |
|
"eval_steps_per_second": 4.138, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.5600245594978333, |
|
"eval_runtime": 3.1394, |
|
"eval_samples_per_second": 31.854, |
|
"eval_steps_per_second": 4.141, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5228108763694763, |
|
"eval_runtime": 3.1374, |
|
"eval_samples_per_second": 31.874, |
|
"eval_steps_per_second": 4.144, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.5382905602455139, |
|
"eval_runtime": 3.1397, |
|
"eval_samples_per_second": 31.85, |
|
"eval_steps_per_second": 4.14, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.5643246173858643, |
|
"eval_runtime": 3.1398, |
|
"eval_samples_per_second": 31.849, |
|
"eval_steps_per_second": 4.14, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.5492831468582153, |
|
"eval_runtime": 3.1396, |
|
"eval_samples_per_second": 31.851, |
|
"eval_steps_per_second": 4.141, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.5576255321502686, |
|
"eval_runtime": 3.1404, |
|
"eval_samples_per_second": 31.843, |
|
"eval_steps_per_second": 4.14, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.5543303489685059, |
|
"eval_runtime": 3.14, |
|
"eval_samples_per_second": 31.847, |
|
"eval_steps_per_second": 4.14, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.56154465675354, |
|
"eval_runtime": 3.1398, |
|
"eval_samples_per_second": 31.849, |
|
"eval_steps_per_second": 4.14, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.5357893109321594, |
|
"eval_runtime": 3.1385, |
|
"eval_samples_per_second": 31.862, |
|
"eval_steps_per_second": 4.142, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.5404526591300964, |
|
"eval_runtime": 3.1375, |
|
"eval_samples_per_second": 31.873, |
|
"eval_steps_per_second": 4.143, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.53267902135849, |
|
"eval_runtime": 3.138, |
|
"eval_samples_per_second": 31.868, |
|
"eval_steps_per_second": 4.143, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.5644922852516174, |
|
"eval_runtime": 3.1383, |
|
"eval_samples_per_second": 31.864, |
|
"eval_steps_per_second": 4.142, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.5240322947502136, |
|
"eval_runtime": 3.1384, |
|
"eval_samples_per_second": 31.863, |
|
"eval_steps_per_second": 4.142, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.5401843786239624, |
|
"eval_runtime": 3.1408, |
|
"eval_samples_per_second": 31.839, |
|
"eval_steps_per_second": 4.139, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.549487829208374, |
|
"eval_runtime": 3.1454, |
|
"eval_samples_per_second": 31.793, |
|
"eval_steps_per_second": 4.133, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.005, |
|
"loss": 0.3249, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5624470710754395, |
|
"eval_runtime": 3.142, |
|
"eval_samples_per_second": 31.827, |
|
"eval_steps_per_second": 4.137, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.5512508749961853, |
|
"eval_runtime": 3.1363, |
|
"eval_samples_per_second": 31.885, |
|
"eval_steps_per_second": 4.145, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.5536577701568604, |
|
"eval_runtime": 3.1364, |
|
"eval_samples_per_second": 31.884, |
|
"eval_steps_per_second": 4.145, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.5443692207336426, |
|
"eval_runtime": 3.1379, |
|
"eval_samples_per_second": 31.868, |
|
"eval_steps_per_second": 4.143, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.5552690029144287, |
|
"eval_runtime": 3.1344, |
|
"eval_samples_per_second": 31.904, |
|
"eval_steps_per_second": 4.148, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.522087812423706, |
|
"eval_runtime": 3.1353, |
|
"eval_samples_per_second": 31.895, |
|
"eval_steps_per_second": 4.146, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.5135818123817444, |
|
"eval_runtime": 3.1354, |
|
"eval_samples_per_second": 31.894, |
|
"eval_steps_per_second": 4.146, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.5231194496154785, |
|
"eval_runtime": 3.1365, |
|
"eval_samples_per_second": 31.883, |
|
"eval_steps_per_second": 4.145, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.5305217504501343, |
|
"eval_runtime": 3.1343, |
|
"eval_samples_per_second": 31.905, |
|
"eval_steps_per_second": 4.148, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.5278050303459167, |
|
"eval_runtime": 3.1328, |
|
"eval_samples_per_second": 31.92, |
|
"eval_steps_per_second": 4.15, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5439716577529907, |
|
"eval_runtime": 3.1334, |
|
"eval_samples_per_second": 31.915, |
|
"eval_steps_per_second": 4.149, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.5410908460617065, |
|
"eval_runtime": 3.1331, |
|
"eval_samples_per_second": 31.918, |
|
"eval_steps_per_second": 4.149, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.5346353650093079, |
|
"eval_runtime": 3.1316, |
|
"eval_samples_per_second": 31.933, |
|
"eval_steps_per_second": 4.151, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.5240665078163147, |
|
"eval_runtime": 3.1322, |
|
"eval_samples_per_second": 31.926, |
|
"eval_steps_per_second": 4.15, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.5424520969390869, |
|
"eval_runtime": 3.1337, |
|
"eval_samples_per_second": 31.911, |
|
"eval_steps_per_second": 4.148, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.521338701248169, |
|
"eval_runtime": 3.1447, |
|
"eval_samples_per_second": 31.799, |
|
"eval_steps_per_second": 4.134, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.540505588054657, |
|
"eval_runtime": 3.1395, |
|
"eval_samples_per_second": 31.853, |
|
"eval_steps_per_second": 4.141, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.525080144405365, |
|
"eval_runtime": 3.1248, |
|
"eval_samples_per_second": 32.002, |
|
"eval_steps_per_second": 4.16, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.5299803614616394, |
|
"eval_runtime": 3.122, |
|
"eval_samples_per_second": 32.031, |
|
"eval_steps_per_second": 4.164, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.5285055637359619, |
|
"eval_runtime": 3.129, |
|
"eval_samples_per_second": 31.959, |
|
"eval_steps_per_second": 4.155, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.2946, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.5294349789619446, |
|
"eval_runtime": 3.1201, |
|
"eval_samples_per_second": 32.05, |
|
"eval_steps_per_second": 4.166, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 25, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.4514722671508789, |
|
"train_runtime": 1665.4633, |
|
"train_samples_per_second": 19.214, |
|
"train_steps_per_second": 1.201 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|