|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.57, |
|
"eval_loss": 0.627579391002655, |
|
"eval_runtime": 2.9468, |
|
"eval_samples_per_second": 33.935, |
|
"eval_steps_per_second": 4.412, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.57, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.6135770678520203, |
|
"eval_runtime": 3.0105, |
|
"eval_samples_per_second": 33.217, |
|
"eval_steps_per_second": 4.318, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.63, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.6773912906646729, |
|
"eval_runtime": 3.0612, |
|
"eval_samples_per_second": 32.667, |
|
"eval_steps_per_second": 4.247, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5963658690452576, |
|
"eval_runtime": 3.096, |
|
"eval_samples_per_second": 32.3, |
|
"eval_steps_per_second": 4.199, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.5315800309181213, |
|
"eval_runtime": 3.1161, |
|
"eval_samples_per_second": 32.091, |
|
"eval_steps_per_second": 4.172, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.5231013298034668, |
|
"eval_runtime": 3.1314, |
|
"eval_samples_per_second": 31.935, |
|
"eval_steps_per_second": 4.151, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5155523419380188, |
|
"eval_runtime": 3.1398, |
|
"eval_samples_per_second": 31.849, |
|
"eval_steps_per_second": 4.14, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.6215571761131287, |
|
"eval_runtime": 3.1438, |
|
"eval_samples_per_second": 31.809, |
|
"eval_steps_per_second": 4.135, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.5012997388839722, |
|
"eval_runtime": 3.1482, |
|
"eval_samples_per_second": 31.764, |
|
"eval_steps_per_second": 4.129, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.5733979940414429, |
|
"eval_runtime": 3.1518, |
|
"eval_samples_per_second": 31.728, |
|
"eval_steps_per_second": 4.125, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.4682706594467163, |
|
"eval_runtime": 3.1601, |
|
"eval_samples_per_second": 31.645, |
|
"eval_steps_per_second": 4.114, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.71, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5332651734352112, |
|
"eval_runtime": 3.1621, |
|
"eval_samples_per_second": 31.624, |
|
"eval_steps_per_second": 4.111, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.6740408539772034, |
|
"eval_runtime": 3.1473, |
|
"eval_samples_per_second": 31.773, |
|
"eval_steps_per_second": 4.131, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.5184910893440247, |
|
"eval_runtime": 3.1461, |
|
"eval_samples_per_second": 31.786, |
|
"eval_steps_per_second": 4.132, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.5030907392501831, |
|
"eval_runtime": 3.1466, |
|
"eval_samples_per_second": 31.78, |
|
"eval_steps_per_second": 4.131, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.5397804379463196, |
|
"eval_runtime": 3.1405, |
|
"eval_samples_per_second": 31.843, |
|
"eval_steps_per_second": 4.14, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5245677828788757, |
|
"eval_runtime": 3.1417, |
|
"eval_samples_per_second": 31.83, |
|
"eval_steps_per_second": 4.138, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.7413635849952698, |
|
"eval_runtime": 3.149, |
|
"eval_samples_per_second": 31.756, |
|
"eval_steps_per_second": 4.128, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.6816568970680237, |
|
"eval_runtime": 3.1399, |
|
"eval_samples_per_second": 31.849, |
|
"eval_steps_per_second": 4.14, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.015, |
|
"loss": 0.7352, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.6655824184417725, |
|
"eval_runtime": 3.1414, |
|
"eval_samples_per_second": 31.833, |
|
"eval_steps_per_second": 4.138, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.73, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5838807225227356, |
|
"eval_runtime": 3.1408, |
|
"eval_samples_per_second": 31.839, |
|
"eval_steps_per_second": 4.139, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.6626308560371399, |
|
"eval_runtime": 3.1413, |
|
"eval_samples_per_second": 31.834, |
|
"eval_steps_per_second": 4.138, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.5017469525337219, |
|
"eval_runtime": 3.1427, |
|
"eval_samples_per_second": 31.82, |
|
"eval_steps_per_second": 4.137, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5168166160583496, |
|
"eval_runtime": 3.1452, |
|
"eval_samples_per_second": 31.795, |
|
"eval_steps_per_second": 4.133, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 20, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.78, |
|
"eval_loss": 0.5911619067192078, |
|
"eval_runtime": 3.1431, |
|
"eval_samples_per_second": 31.816, |
|
"eval_steps_per_second": 4.136, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.77, |
|
"eval_loss": 0.5595811605453491, |
|
"eval_runtime": 3.1433, |
|
"eval_samples_per_second": 31.813, |
|
"eval_steps_per_second": 4.136, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.77, |
|
"eval_loss": 0.48836490511894226, |
|
"eval_runtime": 3.1431, |
|
"eval_samples_per_second": 31.816, |
|
"eval_steps_per_second": 4.136, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.47384893894195557, |
|
"eval_runtime": 3.142, |
|
"eval_samples_per_second": 31.826, |
|
"eval_steps_per_second": 4.137, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5052289366722107, |
|
"eval_runtime": 3.1453, |
|
"eval_samples_per_second": 31.794, |
|
"eval_steps_per_second": 4.133, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.6162938475608826, |
|
"eval_runtime": 3.1453, |
|
"eval_samples_per_second": 31.793, |
|
"eval_steps_per_second": 4.133, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5823907256126404, |
|
"eval_runtime": 3.143, |
|
"eval_samples_per_second": 31.817, |
|
"eval_steps_per_second": 4.136, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.4995167553424835, |
|
"eval_runtime": 3.1421, |
|
"eval_samples_per_second": 31.826, |
|
"eval_steps_per_second": 4.137, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.4935573935508728, |
|
"eval_runtime": 3.1423, |
|
"eval_samples_per_second": 31.824, |
|
"eval_steps_per_second": 4.137, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.5463616847991943, |
|
"eval_runtime": 3.1442, |
|
"eval_samples_per_second": 31.804, |
|
"eval_steps_per_second": 4.135, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5164341926574707, |
|
"eval_runtime": 3.1446, |
|
"eval_samples_per_second": 31.801, |
|
"eval_steps_per_second": 4.134, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.5088481307029724, |
|
"eval_runtime": 3.1452, |
|
"eval_samples_per_second": 31.795, |
|
"eval_steps_per_second": 4.133, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.599052906036377, |
|
"eval_runtime": 3.1458, |
|
"eval_samples_per_second": 31.788, |
|
"eval_steps_per_second": 4.132, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.49629107117652893, |
|
"eval_runtime": 3.1523, |
|
"eval_samples_per_second": 31.723, |
|
"eval_steps_per_second": 4.124, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.5085676312446594, |
|
"eval_runtime": 3.1502, |
|
"eval_samples_per_second": 31.744, |
|
"eval_steps_per_second": 4.127, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.01, |
|
"loss": 0.411, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5202763080596924, |
|
"eval_runtime": 3.1439, |
|
"eval_samples_per_second": 31.808, |
|
"eval_steps_per_second": 4.135, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5843695998191833, |
|
"eval_runtime": 3.1411, |
|
"eval_samples_per_second": 31.836, |
|
"eval_steps_per_second": 4.139, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5285233855247498, |
|
"eval_runtime": 3.1423, |
|
"eval_samples_per_second": 31.824, |
|
"eval_steps_per_second": 4.137, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5552850961685181, |
|
"eval_runtime": 3.1448, |
|
"eval_samples_per_second": 31.799, |
|
"eval_steps_per_second": 4.134, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.5588386058807373, |
|
"eval_runtime": 3.1454, |
|
"eval_samples_per_second": 31.793, |
|
"eval_steps_per_second": 4.133, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.5391697883605957, |
|
"eval_runtime": 3.1435, |
|
"eval_samples_per_second": 31.812, |
|
"eval_steps_per_second": 4.136, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.5494285225868225, |
|
"eval_runtime": 3.1433, |
|
"eval_samples_per_second": 31.814, |
|
"eval_steps_per_second": 4.136, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.4982169270515442, |
|
"eval_runtime": 3.1427, |
|
"eval_samples_per_second": 31.819, |
|
"eval_steps_per_second": 4.137, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.5374172329902649, |
|
"eval_runtime": 3.1407, |
|
"eval_samples_per_second": 31.84, |
|
"eval_steps_per_second": 4.139, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5730433464050293, |
|
"eval_runtime": 3.142, |
|
"eval_samples_per_second": 31.827, |
|
"eval_steps_per_second": 4.137, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.51490718126297, |
|
"eval_runtime": 3.1446, |
|
"eval_samples_per_second": 31.8, |
|
"eval_steps_per_second": 4.134, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.49485138058662415, |
|
"eval_runtime": 3.1403, |
|
"eval_samples_per_second": 31.844, |
|
"eval_steps_per_second": 4.14, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5295071601867676, |
|
"eval_runtime": 3.1397, |
|
"eval_samples_per_second": 31.851, |
|
"eval_steps_per_second": 4.141, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.5223097801208496, |
|
"eval_runtime": 3.1423, |
|
"eval_samples_per_second": 31.824, |
|
"eval_steps_per_second": 4.137, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.5616713762283325, |
|
"eval_runtime": 3.141, |
|
"eval_samples_per_second": 31.837, |
|
"eval_steps_per_second": 4.139, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.5373037457466125, |
|
"eval_runtime": 3.1394, |
|
"eval_samples_per_second": 31.853, |
|
"eval_steps_per_second": 4.141, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.48570069670677185, |
|
"eval_runtime": 3.1398, |
|
"eval_samples_per_second": 31.849, |
|
"eval_steps_per_second": 4.14, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.49541863799095154, |
|
"eval_runtime": 3.1394, |
|
"eval_samples_per_second": 31.853, |
|
"eval_steps_per_second": 4.141, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.5024493932723999, |
|
"eval_runtime": 3.1403, |
|
"eval_samples_per_second": 31.844, |
|
"eval_steps_per_second": 4.14, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.497109591960907, |
|
"eval_runtime": 3.1489, |
|
"eval_samples_per_second": 31.757, |
|
"eval_steps_per_second": 4.128, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.005, |
|
"loss": 0.318, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5264896154403687, |
|
"eval_runtime": 3.1479, |
|
"eval_samples_per_second": 31.767, |
|
"eval_steps_per_second": 4.13, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.4966976046562195, |
|
"eval_runtime": 3.1464, |
|
"eval_samples_per_second": 31.783, |
|
"eval_steps_per_second": 4.132, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.497234046459198, |
|
"eval_runtime": 3.1465, |
|
"eval_samples_per_second": 31.782, |
|
"eval_steps_per_second": 4.132, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.4908214509487152, |
|
"eval_runtime": 3.1342, |
|
"eval_samples_per_second": 31.906, |
|
"eval_steps_per_second": 4.148, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5055844783782959, |
|
"eval_runtime": 3.1392, |
|
"eval_samples_per_second": 31.855, |
|
"eval_steps_per_second": 4.141, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5230618119239807, |
|
"eval_runtime": 3.1268, |
|
"eval_samples_per_second": 31.981, |
|
"eval_steps_per_second": 4.158, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.47373998165130615, |
|
"eval_runtime": 3.1252, |
|
"eval_samples_per_second": 31.998, |
|
"eval_steps_per_second": 4.16, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.5015537142753601, |
|
"eval_runtime": 3.1238, |
|
"eval_samples_per_second": 32.012, |
|
"eval_steps_per_second": 4.162, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.49883437156677246, |
|
"eval_runtime": 3.1204, |
|
"eval_samples_per_second": 32.047, |
|
"eval_steps_per_second": 4.166, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5276287794113159, |
|
"eval_runtime": 3.1254, |
|
"eval_samples_per_second": 31.996, |
|
"eval_steps_per_second": 4.16, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.4912046790122986, |
|
"eval_runtime": 3.1233, |
|
"eval_samples_per_second": 32.018, |
|
"eval_steps_per_second": 4.162, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.48646289110183716, |
|
"eval_runtime": 3.1257, |
|
"eval_samples_per_second": 31.993, |
|
"eval_steps_per_second": 4.159, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.475396066904068, |
|
"eval_runtime": 3.1256, |
|
"eval_samples_per_second": 31.994, |
|
"eval_steps_per_second": 4.159, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.49221473932266235, |
|
"eval_runtime": 3.1249, |
|
"eval_samples_per_second": 32.001, |
|
"eval_steps_per_second": 4.16, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.4884068965911865, |
|
"eval_runtime": 3.1256, |
|
"eval_samples_per_second": 31.994, |
|
"eval_steps_per_second": 4.159, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.48677849769592285, |
|
"eval_runtime": 3.1234, |
|
"eval_samples_per_second": 32.017, |
|
"eval_steps_per_second": 4.162, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.48715740442276, |
|
"eval_runtime": 3.1231, |
|
"eval_samples_per_second": 32.02, |
|
"eval_steps_per_second": 4.163, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.4847962260246277, |
|
"eval_runtime": 3.1233, |
|
"eval_samples_per_second": 32.017, |
|
"eval_steps_per_second": 4.162, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.4922550320625305, |
|
"eval_runtime": 3.1248, |
|
"eval_samples_per_second": 32.002, |
|
"eval_steps_per_second": 4.16, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.4888269007205963, |
|
"eval_runtime": 3.1211, |
|
"eval_samples_per_second": 32.04, |
|
"eval_steps_per_second": 4.165, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.287, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.4912576377391815, |
|
"eval_runtime": 3.1223, |
|
"eval_samples_per_second": 32.028, |
|
"eval_steps_per_second": 4.164, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 24, |
|
"best_eval_accuracy": 0.78, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.43781012725830076, |
|
"train_runtime": 1665.2419, |
|
"train_samples_per_second": 19.216, |
|
"train_steps_per_second": 1.201 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|